cohere-ui / app.py
Spestly's picture
Update app.py
7af49e8 verified
raw
history blame
4.72 kB
from huggingface_hub import InferenceClient
import gradio as gr
import base64
from PIL import Image
import io
def image_to_data_url(image_path):
if image_path is None:
return None
with Image.open(image_path) as img:
buffered = io.BytesIO()
img_format = img.format if img.format else "JPEG"
img.save(buffered, format=img_format)
img_str = base64.b64encode(buffered.getvalue()).decode()
return f"data:image/{img_format.lower()};base64,{img_str}"
def process_input(image, image_url, prompt, model, hf_token):
if not hf_token.startswith("hf_"):
raise gr.Error("Invalid Hugging Face token. It should start with 'hf_'")
client = InferenceClient(
api_key=hf_token,
provider="cohere"
)
image_data = None
if image is not None:
image_data = image_to_data_url(image)
elif image_url:
image_data = image_url
if not image_data:
raise gr.Error("Please provide either an image upload or image URL")
messages = [{
"role": "user",
"content": [
{"type": "text", "text": prompt},
{"type": "image_url", "image_url": {"url": image_data}}
]
}]
try:
stream = client.chat.completions.create(
model=model,
messages=messages,
max_tokens=8000,
stream=True,
)
full_response = ""
for chunk in stream:
if hasattr(chunk.choices[0], 'delta') and hasattr(chunk.choices[0].delta, 'content'):
content = chunk.choices[0].delta.content or ""
full_response += content
yield full_response
elif hasattr(chunk, 'content'):
content = chunk.content or ""
full_response += content
yield full_response
except Exception as e:
raise gr.Error(f"API Error: {str(e)}")
models = [
"CohereLabs/aya-vision-32b",
"CohereLabs/aya-vision-8b",
]
with gr.Blocks() as demo:
gr.Markdown("""
# πŸ” Aya-Vision Model Interface
*Explore state-of-the-art vision-language models by Cohere through this interface.
Supports image inputs via upload or URL, with streaming responses.*
Read more about Aya Vision [here](https://cohere.com/research/aya)
**Get your HF token:** [Hugging Face Settings](https://huggingface.co/settings/tokens)
""")
with gr.Row():
with gr.Column():
hf_token = gr.Textbox(
label="Hugging Face Token",
type="password",
placeholder="hf_XXXXXXXXXXXXXX",
info="Token is used temporarily for the request"
)
model_choice = gr.Dropdown(
label="Model Selection",
choices=models,
value=models[0]
)
with gr.Tab("Upload Image"):
image_input = gr.Image(
label="Upload Image",
type="filepath",
sources=["upload"]
)
with gr.Tab("Image URL"):
image_url = gr.Textbox(
label="Image URL",
placeholder="https://example.com/image.jpg",
)
prompt = gr.Textbox(
label="Prompt",
value="Describe this image in one sentence.",
lines=3
)
submit_btn = gr.Button("Generate", variant="primary")
with gr.Column():
output = gr.Textbox(
label="Model Response",
interactive=False,
lines=10,
autoscroll=True
)
submit_btn.click(
fn=process_input,
inputs=[image_input, image_url, prompt, model_choice, hf_token],
outputs=output,
concurrency_limit=None
)
gr.Examples(
examples=[
[
None,
"https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg",
"Describe this image in one sentence.",
models[0],
""
],
[
None,
"https://upload.wikimedia.org/wikipedia/commons/4/47/PNG_transparency_demonstration_1.png",
"What is unique about this image format?",
models[1],
""
]
],
inputs=[image_input, image_url, prompt, model_choice, hf_token],
label="Try these examples:"
)
if __name__ == "__main__":
demo.launch()