paligemma-cpu-gguf

Running

abetlen commited on Oct 3, 2024

Commit

f78095a

1 Parent(s): a9dabe2

Update

Files changed (1) hide show

models.py CHANGED Viewed

@@ -9,6 +9,8 @@ import PIL.Image
 # pylint: disable=g-bad-import-order
 import gradio_helpers
 ORGANIZATION = 'abetlen'
 BASE_MODELS = [
@@ -69,10 +71,16 @@ def generate(
     model_name: str, sampler: str, image: PIL.Image.Image, prompt: str
 ) -> str:
   """Generates output with specified `model_name`, `sampler`."""
-  model, params_cpu = get_cached_model(model_name)
-  batch = model.shard_batch(model.prepare_batch([image], [prompt]))
-  with gradio_helpers.timed('sharding'):
-    params = model.shard_params(params_cpu)
-  with gradio_helpers.timed('computation', start_message=True):
-    tokens = model.predict(params, batch, sampler=sampler)
-  return model.tokenizer.to_str(tokens[0])

 # pylint: disable=g-bad-import-order
 import gradio_helpers
+import llama_cpp
 ORGANIZATION = 'abetlen'
 BASE_MODELS = [
     model_name: str, sampler: str, image: PIL.Image.Image, prompt: str
 ) -> str:
   """Generates output with specified `model_name`, `sampler`."""
+  # model, params_cpu = get_cached_model(model_name)
+  # batch = model.shard_batch(model.prepare_batch([image], [prompt]))
+  # with gradio_helpers.timed('sharding'):
+  #   params = model.shard_params(params_cpu)
+  # with gradio_helpers.timed('computation', start_message=True):
+  #   tokens = model.predict(params, batch, sampler=sampler)
+  model_path = gradio_helpers.get_paths()[model_name]
+  model = llama_cpp.Llama(
+      model_path,
+  )
+  # return model.tokenizer.to_str(tokens[0])
+  return "output"