Spaces:

CohereLabsCommunity
/

cohere-ui

Sleeping

App Files Files Community

cohere-ui / app.py

Spestly

Update app.py

7af49e8 verified 7 months ago

raw

history blame

4.72 kB

	from huggingface_hub import InferenceClient
	import gradio as gr
	import base64
	from PIL import Image
	import io

	def image_to_data_url(image_path):
	if image_path is None:
	return None
	with Image.open(image_path) as img:
	buffered = io.BytesIO()
	img_format = img.format if img.format else "JPEG"
	img.save(buffered, format=img_format)
	img_str = base64.b64encode(buffered.getvalue()).decode()
	return f"data:image/{img_format.lower()};base64,{img_str}"

	def process_input(image, image_url, prompt, model, hf_token):
	if not hf_token.startswith("hf_"):
	raise gr.Error("Invalid Hugging Face token. It should start with 'hf_'")

	client = InferenceClient(
	api_key=hf_token,
	provider="cohere"
	)

	image_data = None
	if image is not None:
	image_data = image_to_data_url(image)
	elif image_url:
	image_data = image_url

	if not image_data:
	raise gr.Error("Please provide either an image upload or image URL")

	messages = [{
	"role": "user",
	"content": [
	{"type": "text", "text": prompt},
	{"type": "image_url", "image_url": {"url": image_data}}
	]
	}]

	try:
	stream = client.chat.completions.create(
	model=model,
	messages=messages,
	max_tokens=8000,
	stream=True,
	)

	full_response = ""
	for chunk in stream:
	if hasattr(chunk.choices[0], 'delta') and hasattr(chunk.choices[0].delta, 'content'):
	content = chunk.choices[0].delta.content or ""
	full_response += content
	yield full_response
	elif hasattr(chunk, 'content'):
	content = chunk.content or ""
	full_response += content
	yield full_response
	except Exception as e:
	raise gr.Error(f"API Error: {str(e)}")

	models = [
	"CohereLabs/aya-vision-32b",
	"CohereLabs/aya-vision-8b",
	]

	with gr.Blocks() as demo:
	gr.Markdown("""
	# 🔍 Aya-Vision Model Interface

	*Explore state-of-the-art vision-language models by Cohere through this interface.
	Supports image inputs via upload or URL, with streaming responses.*
	Read more about Aya Vision [here](https://cohere.com/research/aya)

	Get your HF token: [Hugging Face Settings](https://huggingface.co/settings/tokens)
	""")

	with gr.Row():
	with gr.Column():
	hf_token = gr.Textbox(
	label="Hugging Face Token",
	type="password",
	placeholder="hf_XXXXXXXXXXXXXX",
	info="Token is used temporarily for the request"
	)

	model_choice = gr.Dropdown(
	label="Model Selection",
	choices=models,
	value=models[0]
	)

	with gr.Tab("Upload Image"):
	image_input = gr.Image(
	label="Upload Image",
	type="filepath",
	sources=["upload"]
	)
	with gr.Tab("Image URL"):
	image_url = gr.Textbox(
	label="Image URL",
	placeholder="https://example.com/image.jpg",
	)

	prompt = gr.Textbox(
	label="Prompt",
	value="Describe this image in one sentence.",
	lines=3
	)
	submit_btn = gr.Button("Generate", variant="primary")

	with gr.Column():
	output = gr.Textbox(
	label="Model Response",
	interactive=False,
	lines=10,
	autoscroll=True
	)

	submit_btn.click(
	fn=process_input,
	inputs=[image_input, image_url, prompt, model_choice, hf_token],
	outputs=output,
	concurrency_limit=None
	)

	gr.Examples(
	examples=[
	[
	None,
	"https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg",
	"Describe this image in one sentence.",
	models[0],
	""
	],
	[
	None,
	"https://upload.wikimedia.org/wikipedia/commons/4/47/PNG_transparency_demonstration_1.png",
	"What is unique about this image format?",
	models[1],
	""
	]
	],
	inputs=[image_input, image_url, prompt, model_choice, hf_token],
	label="Try these examples:"
	)

	if __name__ == "__main__":
	demo.launch()