Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| import json | |
| import torch | |
| import requests | |
| import time | |
| import random | |
| from PIL import Image | |
| from typing import Union | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| print(f"Using {device}" if device != "cpu" else "Using CPU") | |
| def _load_model(): | |
| tokenizer = AutoTokenizer.from_pretrained("vikhyatk/moondream2", trust_remote_code=True, revision="2024-05-08") | |
| model = AutoModelForCausalLM.from_pretrained("vikhyatk/moondream2", device_map=device, trust_remote_code=True, revision="2024-05-08") | |
| return (model, tokenizer) | |
| class MoonDream(): | |
| def __init__(self, model=None, tokenizer=None): | |
| self.model, self.tokenizer = (model, tokenizer) | |
| if not model or not tokenizer: | |
| self.model, self.tokenizer = _load_model() | |
| self.device = device | |
| self.model.to(self.device) | |
| def __call__(self, question, imgs): | |
| imn = 0 | |
| for img in imgs: | |
| img = self.model.encode_image(img) | |
| res = self.model.answer_question(question=question, image_embeds=img, tokenizer=self.tokenizer) | |
| yield res | |
| return | |
| def _respond_one(question, img): | |
| txt = "" | |
| yield (txt := txt + MoonDream()(question, [img])) | |
| return txt | |
| def respond_batch(question, **imgs): | |
| md = MoonDream() | |
| for img in imgs.values(): | |
| res = md(question, img) | |
| for r in res: | |
| yield r | |
| yield "\n\n\n\n\n\n" | |
| return | |
| red = Image.new("RGB", (192,192), (255,0,0)) | |
| green = Image.new("RGB", (192,192), (0,255,0)) | |
| blue = Image.new("RGB", (192,192), (0,0,255)) | |
| res = respond_batch("What color is this? Elaborate upon what emotion registers most strongly with you upon viewing. ", imgs=[red, green, blue]) | |
| for r in res: | |
| print(r) | |
| if "\n\n\n\n\n\n" in r: | |
| break | |
| def dual_images(img1: Image): | |
| # Ran once for each img to it's respective output. Output should be detailed str of description/feature extraction/interrogation. | |
| md = MoonDream() | |
| res = md("Describe the image in plain english ", [img1]) | |
| txt = "" | |
| for r in res: | |
| yield (txt := txt + r) | |
| return | |
| import os | |
| with open("together_key.txt", "r") as f: | |
| os.environ["TOGETHER_KEY"] = f.read().strip() | |
| print("Set together key") | |
| def merge_descriptions_to_prompt(mi, d1, d2): | |
| from together import Together | |
| tog = Together(api_key=os.getenv("TOGETHER_KEY")) | |
| res = tog.completions.create(prompt=f"""Describe what would result if the following two descriptions were describing one thing. | |
| ### Description 1: | |
| ```text | |
| {d1} | |
| ``` | |
| ### Description 2: | |
| ```text | |
| {d2} | |
| ``` | |
| Merge-Specific Instructions: | |
| ```text | |
| {mi} | |
| ``` | |
| Ensure you end your output with ```\\n | |
| --- | |
| Complete Description: | |
| ```text""", model="meta-llama/Meta-Llama-3-70B", stop=["```"], max_tokens=1024) | |
| return res.choices[0].text.split("```")[0] | |
| def xform_image_description(img, inst): | |
| from together import Together | |
| desc = dual_images(img) | |
| tog = Together(api_key=os.getenv("TOGETHER_KEY")) | |
| prompt=f"""Describe the image in aggressively verbose detail. I must know every freckle upon a man's brow and each blade of the grass intimately.\nDescription: ```text\n{desc}\n```\nInstructions:\n```text\n{inst}\n```\n\n\n---\nDetailed Description:\n```text""" | |
| res = tog.completions.create(prompt=prompt, model="meta-llama/Meta-Llama-3-70B", stop=["```"], max_tokens=1024) | |
| return res.choices[0].text[len(prompt):].split("```")[0] | |
| with gr.Blocks() as demo: | |
| with gr.Row(visible=True): | |
| with gr.Column(): | |
| with gr.Row(): | |
| img = gr.Image(label="images", type='pil') | |
| with gr.Row(): | |
| btn = gr.Button("submit") | |
| with gr.Row(): | |
| otpt = gr.Textbox(label="output", lines=3, interactive=True) | |
| with gr.Row(): | |
| with gr.Column(): | |
| im1 = gr.Image(label="image 1", type='pil') | |
| with gr.Column(): | |
| im2 = gr.Image(label="image 2", type='pil') | |
| with gr.Row(): | |
| btn2 = gr.Button("submit batch") | |
| with gr.Row(): | |
| with gr.Column(): | |
| otp2 = gr.Textbox(label="individual batch output (left)", interactive=True) | |
| with gr.Column(): | |
| otp3 = gr.Textbox(label="individual batch output (right)", interactive=True) | |
| with gr.Row(): | |
| minst = gr.Textbox(label="Merge Instructions") | |
| with gr.Row(): | |
| btn_scd = gr.Button("Merge Descriptions to Single Combined Description") | |
| with gr.Row(): | |
| otp4 = gr.Textbox(label="batch output ( combined )", interactive=True, lines=4) | |
| btn2.click(dual_images, inputs=[im1], outputs=[otp2]) | |
| btn2.click(dual_images, inputs=[im2], outputs=[otp3]) | |
| btn.click(dual_images, inputs=[img], outputs=[otpt]) | |
| btn_scd.click(merge_descriptions_to_prompt, inputs=[minst, otp2, otp3], outputs=[otp4]) | |
| demo.launch(debug=True, share=True) |