File size: 3,628 Bytes
a05fede
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
from huggingface_hub import InferenceClient
import gradio as gr
import base64
from PIL import Image
import io

client = InferenceClient(
    provider="cohere",
    api_key="HF_TOKEN", # Will add soon as soon as I get the auth working
)

def image_to_data_url(image_path):
    if image_path is None:
        return None
    with Image.open(image_path) as img:
        buffered = io.BytesIO()
        img.save(buffered, format=img.format)
        img_str = base64.b64encode(buffered.getvalue()).decode()
        return f"data:image/{img.format.lower()};base64,{img_str}"

def process_input(image, image_url, prompt, model):
    image_data = None
    if image is not None:
        image_data = image_to_data_url(image)
    elif image_url:
        image_data = image_url
    
    if not image_data:
        raise gr.Error("Please provide either an image upload or image URL")
    
    messages = [
        {
            "role": "user",
            "content": [
                {"type": "text", "text": prompt},
                {"type": "image_url", "image_url": {"url": image_data}}
            ]
        }
    ]
    
    stream = client.chat.completions.create(
        model=model,
        messages=messages,
        max_tokens=512,
        stream=True,
    )
    
    full_response = ""
    for chunk in stream:
        content = chunk.choices[0].delta.content or ""
        full_response += content
        yield full_response

models = [
    "CohereLabs/aya-vision-32b",
    "CohereLabs/aya-vision-8b",
]

with gr.Blocks() as demo:
    gr.Markdown("# Cohere Aya Vision model UI")
    
    with gr.Row():
        with gr.Column():
            model_choice = gr.Dropdown(
                label="Select Model",
                choices=models,
                value=models[0],
                interactive=True
            )
            
            with gr.Tab("Upload Image"):
                image_input = gr.Image(
                    label="Upload Image",
                    type="filepath",
                    sources=["upload"]
                )
            with gr.Tab("Image URL"):
                image_url = gr.Textbox(
                    label="Image URL",
                    placeholder="Paste image URL here...",
                    value=""
                )
            
            prompt = gr.Textbox(
                label="Prompt",
                value="Describe this image in one sentence.",
                interactive=True
            )
            submit_btn = gr.Button("Generate", variant="primary")
        
        with gr.Column():
            output = gr.Textbox(
                label="Model Response",
                interactive=False,
                lines=10,
                autoscroll=True
            )

    submit_btn.click(
        fn=process_input,
        inputs=[image_input, image_url, prompt, model_choice],
        outputs=output,
        concurrency_limit=None
    )

    gr.Examples(
        examples=[
            [
                None,
                "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg",
                "Describe this image in one sentence.",
                models[0]
            ],
            [
                None,
                "https://upload.wikimedia.org/wikipedia/commons/thumb/4/4d/Cat_November_2010-1a.jpg/1200px-Cat_November_2010-1a.jpg",
                "What is the main subject of this image?",
                models[1]
            ]
        ],
        inputs=[image_input, image_url, prompt, model_choice],
        label="Example Inputs"
    )

if __name__ == "__main__":
    demo.queue().launch()