Spaces:

sflindrs
/

vlm_comparer

Running

sflindrs commited on Feb 11

Commit

e329bce

verified ·

1 Parent(s): 084f1ce

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -44,10 +44,28 @@ def compare_image_to_text_models(image, prompt, model1_choice, model1_custom, mo
     # These models should support a call signature of (image, prompt)
     pipe1 = pipeline(task="image-text-to-text", model=model1_name, device=device)
     pipe2 = pipeline(task="image-text-to-text", model=model2_name, device=device)
     # Run inference on the image with the provided prompt.
-    output1 = pipe1(image, query=prompt)
-    output2 = pipe2(image, query=prompt)
     # Extract the generated text.
     def extract_text(output):

     # These models should support a call signature of (image, prompt)
     pipe1 = pipeline(task="image-text-to-text", model=model1_name, device=device)
     pipe2 = pipeline(task="image-text-to-text", model=model2_name, device=device)
+    messages = [
+         {
+             "role": "user",
+             "content": [
+                 {
+                     "type": "image",
+                     "url": image,
+                 },
+                 {"type": "text", "text": prompt},
+             ],
+         },
+         {
+             "role": "assistant",
+             "content": [
+                 {"type": "text", "text": ""},
+             ],
+         },
+    ]
     # Run inference on the image with the provided prompt.
+    output1 = pipe1(text=messages, max_new_tokens=1024)
+    output2 = pipe2(text=messages, max_new_tokens=1024)
     # Extract the generated text.
     def extract_text(output):