Spestly commited on
Commit
2763883
·
verified ·
1 Parent(s): 93928ee

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -42
app.py CHANGED
@@ -4,11 +4,6 @@ import base64
4
  from PIL import Image
5
  import io
6
 
7
- client = InferenceClient(
8
- provider="cohere",
9
- api_key="HF_TOKEN", # Will add soon as soon as I get the auth working
10
- )
11
-
12
  def image_to_data_url(image_path):
13
  if image_path is None:
14
  return None
@@ -18,7 +13,12 @@ def image_to_data_url(image_path):
18
  img_str = base64.b64encode(buffered.getvalue()).decode()
19
  return f"data:image/{img.format.lower()};base64,{img_str}"
20
 
21
- def process_input(image, image_url, prompt, model):
 
 
 
 
 
22
  image_data = None
23
  if image is not None:
24
  image_data = image_to_data_url(image)
@@ -27,29 +27,30 @@ def process_input(image, image_url, prompt, model):
27
 
28
  if not image_data:
29
  raise gr.Error("Please provide either an image upload or image URL")
 
 
 
 
 
 
 
 
30
 
31
- messages = [
32
- {
33
- "role": "user",
34
- "content": [
35
- {"type": "text", "text": prompt},
36
- {"type": "image_url", "image_url": {"url": image_data}}
37
- ]
38
- }
39
- ]
40
-
41
- stream = client.chat.completions.create(
42
- model=model,
43
- messages=messages,
44
- max_tokens=512,
45
- stream=True,
46
- )
47
-
48
- full_response = ""
49
- for chunk in stream:
50
- content = chunk.choices[0].delta.content or ""
51
- full_response += content
52
- yield full_response
53
 
54
  models = [
55
  "CohereLabs/aya-vision-32b",
@@ -57,15 +58,30 @@ models = [
57
  ]
58
 
59
  with gr.Blocks() as demo:
60
- gr.Markdown("# Cohere Aya Vision model UI")
 
 
 
 
 
 
61
 
 
 
 
62
  with gr.Row():
63
  with gr.Column():
 
 
 
 
 
 
 
64
  model_choice = gr.Dropdown(
65
- label="Select Model",
66
  choices=models,
67
- value=models[0],
68
- interactive=True
69
  )
70
 
71
  with gr.Tab("Upload Image"):
@@ -77,14 +93,13 @@ with gr.Blocks() as demo:
77
  with gr.Tab("Image URL"):
78
  image_url = gr.Textbox(
79
  label="Image URL",
80
- placeholder="Paste image URL here...",
81
- value=""
82
  )
83
 
84
  prompt = gr.Textbox(
85
  label="Prompt",
86
  value="Describe this image in one sentence.",
87
- interactive=True
88
  )
89
  submit_btn = gr.Button("Generate", variant="primary")
90
 
@@ -98,7 +113,7 @@ with gr.Blocks() as demo:
98
 
99
  submit_btn.click(
100
  fn=process_input,
101
- inputs=[image_input, image_url, prompt, model_choice],
102
  outputs=output,
103
  concurrency_limit=None
104
  )
@@ -109,17 +124,19 @@ with gr.Blocks() as demo:
109
  None,
110
  "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg",
111
  "Describe this image in one sentence.",
112
- models[0]
 
113
  ],
114
  [
115
  None,
116
- "https://upload.wikimedia.org/wikipedia/commons/thumb/4/4d/Cat_November_2010-1a.jpg/1200px-Cat_November_2010-1a.jpg",
117
- "What is the main subject of this image?",
118
- models[1]
 
119
  ]
120
  ],
121
- inputs=[image_input, image_url, prompt, model_choice],
122
- label="Example Inputs"
123
  )
124
 
125
  if __name__ == "__main__":
 
4
  from PIL import Image
5
  import io
6
 
 
 
 
 
 
7
  def image_to_data_url(image_path):
8
  if image_path is None:
9
  return None
 
13
  img_str = base64.b64encode(buffered.getvalue()).decode()
14
  return f"data:image/{img.format.lower()};base64,{img_str}"
15
 
16
+ def process_input(image, image_url, prompt, model, hf_token):
17
+ if not hf_token.startswith("hf_"):
18
+ raise gr.Error("Invalid Hugging Face token. It should start with 'hf_'")
19
+
20
+ client = InferenceClient(provider="cohere", api_key=hf_token)
21
+
22
  image_data = None
23
  if image is not None:
24
  image_data = image_to_data_url(image)
 
27
 
28
  if not image_data:
29
  raise gr.Error("Please provide either an image upload or image URL")
30
+
31
+ messages = [{
32
+ "role": "user",
33
+ "content": [
34
+ {"type": "text", "text": prompt},
35
+ {"type": "image_url", "image_url": {"url": image_data}}
36
+ ]
37
+ }]
38
 
39
+ try:
40
+ stream = client.chat.completions.create(
41
+ model=model,
42
+ messages=messages,
43
+ max_tokens=512,
44
+ stream=True,
45
+ )
46
+
47
+ full_response = ""
48
+ for chunk in stream:
49
+ content = chunk.choices[0].delta.content or ""
50
+ full_response += content
51
+ yield full_response
52
+ except Exception as e:
53
+ raise gr.Error(f"API Error: {str(e)}")
 
 
 
 
 
 
 
54
 
55
  models = [
56
  "CohereLabs/aya-vision-32b",
 
58
  ]
59
 
60
  with gr.Blocks() as demo:
61
+ gr.Markdown("""
62
+ # 🔍 Aya-Vision Model Interface
63
+
64
+ *Explore state-of-the-art vision-language models by Cohere through this interface.
65
+ Supports image inputs via upload or URL, with streaming responses.*
66
+
67
+ Read more about Aya Vision [here](https://cohere.com/research/aya)
68
 
69
+ **Get your HF token:** [Hugging Face Settings](https://huggingface.co/settings/tokens)
70
+ """)
71
+
72
  with gr.Row():
73
  with gr.Column():
74
+ hf_token = gr.Textbox(
75
+ label="Hugging Face Token",
76
+ type="password",
77
+ placeholder="hf_XXXXXXXXXXXXXX",
78
+ info="Token is used temporarily for the request"
79
+ )
80
+
81
  model_choice = gr.Dropdown(
82
+ label="Model Selection",
83
  choices=models,
84
+ value=models[0]
 
85
  )
86
 
87
  with gr.Tab("Upload Image"):
 
93
  with gr.Tab("Image URL"):
94
  image_url = gr.Textbox(
95
  label="Image URL",
96
+ placeholder="https://example.com/image.jpg",
 
97
  )
98
 
99
  prompt = gr.Textbox(
100
  label="Prompt",
101
  value="Describe this image in one sentence.",
102
+ lines=3
103
  )
104
  submit_btn = gr.Button("Generate", variant="primary")
105
 
 
113
 
114
  submit_btn.click(
115
  fn=process_input,
116
+ inputs=[image_input, image_url, prompt, model_choice, hf_token],
117
  outputs=output,
118
  concurrency_limit=None
119
  )
 
124
  None,
125
  "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg",
126
  "Describe this image in one sentence.",
127
+ models[0],
128
+ ""
129
  ],
130
  [
131
  None,
132
+ "https://upload.wikimedia.org/wikipedia/commons/4/47/PNG_transparency_demonstration_1.png",
133
+ "What is unique about this image format?",
134
+ models[1],
135
+ ""
136
  ]
137
  ],
138
+ inputs=[image_input, image_url, prompt, model_choice, hf_token],
139
+ label="Try these examples:"
140
  )
141
 
142
  if __name__ == "__main__":