from huggingface_hub import InferenceClient import gradio as gr import base64 from PIL import Image import io def image_to_data_url(image_path): if image_path is None: return None with Image.open(image_path) as img: buffered = io.BytesIO() img_format = img.format if img.format else "JPEG" img.save(buffered, format=img_format) img_str = base64.b64encode(buffered.getvalue()).decode() return f"data:image/{img_format.lower()};base64,{img_str}" def process_input(image, image_url, prompt, model, hf_token): if not hf_token.startswith("hf_"): raise gr.Error("Invalid Hugging Face token. It should start with 'hf_'") client = InferenceClient( api_key=hf_token, provider="cohere" ) image_data = None if image is not None: image_data = image_to_data_url(image) elif image_url: image_data = image_url if not image_data: raise gr.Error("Please provide either an image upload or image URL") messages = [{ "role": "user", "content": [ {"type": "text", "text": prompt}, {"type": "image_url", "image_url": {"url": image_data}} ] }] try: stream = client.chat.completions.create( model=model, messages=messages, max_tokens=8000, stream=True, ) full_response = "" for chunk in stream: if hasattr(chunk.choices[0], 'delta') and hasattr(chunk.choices[0].delta, 'content'): content = chunk.choices[0].delta.content or "" full_response += content yield full_response elif hasattr(chunk, 'content'): content = chunk.content or "" full_response += content yield full_response except Exception as e: raise gr.Error(f"API Error: {str(e)}") models = [ "CohereLabs/aya-vision-32b", "CohereLabs/aya-vision-8b", ] with gr.Blocks() as demo: gr.Markdown(""" # 🔍 Aya-Vision Model Interface *Explore state-of-the-art vision-language models by Cohere through this interface. Supports image inputs via upload or URL, with streaming responses.* Read more about Aya Vision [here](https://cohere.com/research/aya) **Get your HF token:** [Hugging Face Settings](https://huggingface.co/settings/tokens) """) with gr.Row(): with gr.Column(): hf_token = gr.Textbox( label="Hugging Face Token", type="password", placeholder="hf_XXXXXXXXXXXXXX", info="Token is used temporarily for the request" ) model_choice = gr.Dropdown( label="Model Selection", choices=models, value=models[0] ) with gr.Tab("Upload Image"): image_input = gr.Image( label="Upload Image", type="filepath", sources=["upload"] ) with gr.Tab("Image URL"): image_url = gr.Textbox( label="Image URL", placeholder="https://example.com/image.jpg", ) prompt = gr.Textbox( label="Prompt", value="Describe this image in one sentence.", lines=3 ) submit_btn = gr.Button("Generate", variant="primary") with gr.Column(): output = gr.Textbox( label="Model Response", interactive=False, lines=10, autoscroll=True ) submit_btn.click( fn=process_input, inputs=[image_input, image_url, prompt, model_choice, hf_token], outputs=output, concurrency_limit=None ) gr.Examples( examples=[ [ None, "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg", "Describe this image in one sentence.", models[0], "" ], [ None, "https://upload.wikimedia.org/wikipedia/commons/4/47/PNG_transparency_demonstration_1.png", "What is unique about this image format?", models[1], "" ] ], inputs=[image_input, image_url, prompt, model_choice, hf_token], label="Try these examples:" ) if __name__ == "__main__": demo.launch()