Spaces:

CohereLabsCommunity
/

cohere-ui

Sleeping

App Files Files Community

Spestly commited on May 2

Commit

2763883

verified ·

1 Parent(s): 93928ee

Update app.py

Browse files

Files changed (1) hide show

app.py +59 -42

app.py CHANGED Viewed

@@ -4,11 +4,6 @@ import base64
 from PIL import Image
 import io
-client = InferenceClient(
-    provider="cohere",
-    api_key="HF_TOKEN", # Will add soon as soon as I get the auth working
-)
 def image_to_data_url(image_path):
     if image_path is None:
         return None
@@ -18,7 +13,12 @@ def image_to_data_url(image_path):
         img_str = base64.b64encode(buffered.getvalue()).decode()
         return f"data:image/{img.format.lower()};base64,{img_str}"
-def process_input(image, image_url, prompt, model):
     image_data = None
     if image is not None:
         image_data = image_to_data_url(image)
@@ -27,29 +27,30 @@ def process_input(image, image_url, prompt, model):
     if not image_data:
         raise gr.Error("Please provide either an image upload or image URL")
-    messages = [
-        {
-            "role": "user",
-            "content": [
-                {"type": "text", "text": prompt},
-                {"type": "image_url", "image_url": {"url": image_data}}
-            ]
-        }
-    ]
-    stream = client.chat.completions.create(
-        model=model,
-        messages=messages,
-        max_tokens=512,
-        stream=True,
-    )
-    full_response = ""
-    for chunk in stream:
-        content = chunk.choices[0].delta.content or ""
-        full_response += content
-        yield full_response
 models = [
     "CohereLabs/aya-vision-32b",
@@ -57,15 +58,30 @@ models = [
 ]
 with gr.Blocks() as demo:
-    gr.Markdown("# Cohere Aya Vision model UI")
     with gr.Row():
         with gr.Column():
             model_choice = gr.Dropdown(
-                label="Select Model",
                 choices=models,
-                value=models[0],
-                interactive=True
             )
             with gr.Tab("Upload Image"):
@@ -77,14 +93,13 @@ with gr.Blocks() as demo:
             with gr.Tab("Image URL"):
                 image_url = gr.Textbox(
                     label="Image URL",
-                    placeholder="Paste image URL here...",
-                    value=""
                 )
             prompt = gr.Textbox(
                 label="Prompt",
                 value="Describe this image in one sentence.",
-                interactive=True
             )
             submit_btn = gr.Button("Generate", variant="primary")
@@ -98,7 +113,7 @@ with gr.Blocks() as demo:
     submit_btn.click(
         fn=process_input,
-        inputs=[image_input, image_url, prompt, model_choice],
         outputs=output,
         concurrency_limit=None
     )
@@ -109,17 +124,19 @@ with gr.Blocks() as demo:
                 None,
                 "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg",
                 "Describe this image in one sentence.",
-                models[0]
             ],
             [
                 None,
-                "https://upload.wikimedia.org/wikipedia/commons/thumb/4/4d/Cat_November_2010-1a.jpg/1200px-Cat_November_2010-1a.jpg",
-                "What is the main subject of this image?",
-                models[1]
             ]
         ],
-        inputs=[image_input, image_url, prompt, model_choice],
-        label="Example Inputs"
     )
 if __name__ == "__main__":

 from PIL import Image
 import io
 def image_to_data_url(image_path):
     if image_path is None:
         return None
         img_str = base64.b64encode(buffered.getvalue()).decode()
         return f"data:image/{img.format.lower()};base64,{img_str}"
+def process_input(image, image_url, prompt, model, hf_token):
+    if not hf_token.startswith("hf_"):
+        raise gr.Error("Invalid Hugging Face token. It should start with 'hf_'")
+    client = InferenceClient(provider="cohere", api_key=hf_token)
     image_data = None
     if image is not None:
         image_data = image_to_data_url(image)
     if not image_data:
         raise gr.Error("Please provide either an image upload or image URL")
+    messages = [{
+        "role": "user",
+        "content": [
+            {"type": "text", "text": prompt},
+            {"type": "image_url", "image_url": {"url": image_data}}
+        ]
+    }]
+    try:
+        stream = client.chat.completions.create(
+            model=model,
+            messages=messages,
+            max_tokens=512,
+            stream=True,
+        )
+        full_response = ""
+        for chunk in stream:
+            content = chunk.choices[0].delta.content or ""
+            full_response += content
+            yield full_response
+    except Exception as e:
+        raise gr.Error(f"API Error: {str(e)}")
 models = [
     "CohereLabs/aya-vision-32b",
 ]
 with gr.Blocks() as demo:
+    gr.Markdown("""
+    # 🔍 Aya-Vision Model Interface
+    *Explore state-of-the-art vision-language models by Cohere through this interface.
+    Supports image inputs via upload or URL, with streaming responses.*
+    Read more about Aya Vision [here](https://cohere.com/research/aya)
+    **Get your HF token:** [Hugging Face Settings](https://huggingface.co/settings/tokens)
+    """)
     with gr.Row():
         with gr.Column():
+            hf_token = gr.Textbox(
+                label="Hugging Face Token",
+                type="password",
+                placeholder="hf_XXXXXXXXXXXXXX",
+                info="Token is used temporarily for the request"
+            )
             model_choice = gr.Dropdown(
+                label="Model Selection",
                 choices=models,
+                value=models[0]
             )
             with gr.Tab("Upload Image"):
             with gr.Tab("Image URL"):
                 image_url = gr.Textbox(
                     label="Image URL",
+                    placeholder="https://example.com/image.jpg",
                 )
             prompt = gr.Textbox(
                 label="Prompt",
                 value="Describe this image in one sentence.",
+                lines=3
             )
             submit_btn = gr.Button("Generate", variant="primary")
     submit_btn.click(
         fn=process_input,
+        inputs=[image_input, image_url, prompt, model_choice, hf_token],
         outputs=output,
         concurrency_limit=None
     )
                 None,
                 "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg",
                 "Describe this image in one sentence.",
+                models[0],
+                ""
             ],
             [
                 None,
+                "https://upload.wikimedia.org/wikipedia/commons/4/47/PNG_transparency_demonstration_1.png",
+                "What is unique about this image format?",
+                models[1],
+                ""
             ]
         ],
+        inputs=[image_input, image_url, prompt, model_choice, hf_token],
+        label="Try these examples:"
     )
 if __name__ == "__main__":