BLIP2

Running on Zero

App Files Files Community

hysts HF Staff

dxli commited on Feb 10, 2023

Commit

3b85b9a

0 Parent(s):

Duplicate from Salesforce/BLIP2

Browse files

Co-authored-by: Dongxu Li <dxli@users.noreply.huggingface.co>

Files changed (9) hide show

.gitattributes +36 -0
README.md +14 -0
app.py +282 -0
flower.jpg +0 -0
forbidden_city.webp +0 -0
house.png +3 -0
pizza.jpg +0 -0
sunset.jpg +0 -0
utils.py +27 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,36 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text
+house.png filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,14 @@

+---
+title: BLIP2
+emoji: 🌖
+colorFrom: blue
+colorTo: pink
+sdk: gradio
+sdk_version: 3.17.0
+app_file: app.py
+pinned: false
+license: bsd-3-clause
+duplicated_from: Salesforce/BLIP2
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,282 @@

+from io import BytesIO
+import string
+import gradio as gr
+import requests
+from utils import Endpoint, get_token
+def encode_image(image):
+    buffered = BytesIO()
+    image.save(buffered, format="JPEG")
+    buffered.seek(0)
+    return buffered
+def query_chat_api(
+    image, prompt, decoding_method, temperature, len_penalty, repetition_penalty
+):
+    url = endpoint.url
+    url = url + "/api/generate"
+    headers = {
+        "User-Agent": "BLIP-2 HuggingFace Space",
+        "Auth-Token": get_token(),
+    }
+    data = {
+        "prompt": prompt,
+        "use_nucleus_sampling": decoding_method == "Nucleus sampling",
+        "temperature": temperature,
+        "length_penalty": len_penalty,
+        "repetition_penalty": repetition_penalty,
+    }
+    image = encode_image(image)
+    files = {"image": image}
+    response = requests.post(url, data=data, files=files, headers=headers)
+    if response.status_code == 200:
+        return response.json()
+    else:
+        return "Error: " + response.text
+def query_caption_api(
+    image, decoding_method, temperature, len_penalty, repetition_penalty
+):
+    url = endpoint.url
+    url = url + "/api/caption"
+    headers = {
+        "User-Agent": "BLIP-2 HuggingFace Space",
+        "Auth-Token": get_token(),
+    }
+    data = {
+        "use_nucleus_sampling": decoding_method == "Nucleus sampling",
+        "temperature": temperature,
+        "length_penalty": len_penalty,
+        "repetition_penalty": repetition_penalty,
+    }
+    image = encode_image(image)
+    files = {"image": image}
+    response = requests.post(url, data=data, files=files, headers=headers)
+    if response.status_code == 200:
+        return response.json()
+    else:
+        return "Error: " + response.text
+def postprocess_output(output):
+    # if last character is not a punctuation, add a full stop
+    if not output[0][-1] in string.punctuation:
+        output[0] += "."
+    return output
+def inference_chat(
+    image,
+    text_input,
+    decoding_method,
+    temperature,
+    length_penalty,
+    repetition_penalty,
+    history=[],
+):
+    text_input = text_input
+    history.append(text_input)
+    prompt = " ".join(history)
+    output = query_chat_api(
+        image, prompt, decoding_method, temperature, length_penalty, repetition_penalty
+    )
+    output = postprocess_output(output)
+    history += output
+    chat = [
+        (history[i], history[i + 1]) for i in range(0, len(history) - 1, 2)
+    ]  # convert to tuples of list
+    return {chatbot: chat, state: history}
+def inference_caption(
+    image,
+    decoding_method,
+    temperature,
+    length_penalty,
+    repetition_penalty,
+):
+    output = query_caption_api(
+        image, decoding_method, temperature, length_penalty, repetition_penalty
+    )
+    return output[0]
+title = """<h1 align="center">BLIP-2</h1>"""
+description = """Gradio demo for BLIP-2, image-to-text generation from Salesforce Research. To use it, simply upload your image, or click one of the examples to load them.
+<br> <strong>Disclaimer</strong>: This is a research prototype and is not intended for production use. No data including but not restricted to text and images is collected."""
+article = """<strong>Paper</strong>: <a href='https://arxiv.org/abs/2301.12597' target='_blank'>BLIP-2: Bootstrapping Language-Image Pre-training with Frozen Image Encoders and Large Language Models</a>
+<br> <strong>Code</strong>: BLIP2 is now integrated into GitHub repo: <a href='https://github.com/salesforce/LAVIS' target='_blank'>LAVIS: a One-stop Library for Language and Vision</a>
+<br> <strong>🤗 `transformers` integration</strong>: You can now use `transformers` to use our BLIP-2 models! Check out the <a href='https://huggingface.co/docs/transformers/main/en/model_doc/blip-2' target='_blank'> official docs </a>
+<p> <strong>Project Page</strong>: <a href='https://github.com/salesforce/LAVIS/tree/main/projects/blip2' target='_blank'> BLIP2 on LAVIS</a>
+<br> <strong>Description</strong>: Captioning results from <strong>BLIP2_OPT_6.7B</strong>. Chat results from <strong>BLIP2_FlanT5xxl</strong>.
+"""
+endpoint = Endpoint()
+examples = [
+    ["house.png", "How could someone get out of the house?"],
+    ["flower.jpg", "Question: What is this flower and where is it's origin? Answer:"],
+    ["pizza.jpg", "What are steps to cook it?"],
+    ["sunset.jpg", "Here is a romantic message going along the photo:"],
+    ["forbidden_city.webp", "In what dynasties was this place built?"],
+]
+with gr.Blocks(
+    css="""
+    .message.svelte-w6rprc.svelte-w6rprc.svelte-w6rprc {font-size: 20px; margin-top: 20px}
+    #component-21 > div.wrap.svelte-w6rprc {height: 600px;}
+    """
+) as iface:
+    state = gr.State([])
+    gr.Markdown(title)
+    gr.Markdown(description)
+    gr.Markdown(article)
+    with gr.Row():
+        with gr.Column(scale=1):
+            image_input = gr.Image(type="pil")
+            # with gr.Row():
+            sampling = gr.Radio(
+                choices=["Beam search", "Nucleus sampling"],
+                value="Beam search",
+                label="Text Decoding Method",
+                interactive=True,
+            )
+            temperature = gr.Slider(
+                minimum=0.5,
+                maximum=1.0,
+                value=1.0,
+                step=0.1,
+                interactive=True,
+                label="Temperature (used with nucleus sampling)",
+            )
+            len_penalty = gr.Slider(
+                minimum=-1.0,
+                maximum=2.0,
+                value=1.0,
+                step=0.2,
+                interactive=True,
+                label="Length Penalty (set to larger for longer sequence, used with beam search)",
+            )
+            rep_penalty = gr.Slider(
+                minimum=1.0,
+                maximum=5.0,
+                value=1.5,
+                step=0.5,
+                interactive=True,
+                label="Repeat Penalty (larger value prevents repetition)",
+            )
+        with gr.Column(scale=1.8):
+            with gr.Column():
+                caption_output = gr.Textbox(lines=1, label="Caption Output")
+                caption_button = gr.Button(
+                    value="Caption it!", interactive=True, variant="primary"
+                )
+                caption_button.click(
+                    inference_caption,
+                    [
+                        image_input,
+                        sampling,
+                        temperature,
+                        len_penalty,
+                        rep_penalty,
+                    ],
+                    [caption_output],
+                )
+            gr.Markdown("""Trying prompting your input for chat; e.g. example prompt for QA, \"Question: {} Answer:\" Use proper punctuation (e.g., question mark).""")
+            with gr.Row():
+                with gr.Column(
+                    scale=1.5,
+                ):
+                    chatbot = gr.Chatbot(
+                        label="Chat Output (from FlanT5)",
+                    )
+                # with gr.Row():
+                with gr.Column(scale=1):
+                    chat_input = gr.Textbox(lines=1, label="Chat Input")
+                    chat_input.submit(
+                        inference_chat,
+                        [
+                            image_input,
+                            chat_input,
+                            sampling,
+                            temperature,
+                            len_penalty,
+                            rep_penalty,
+                            state,
+                        ],
+                        [chatbot, state],
+                    )
+                    with gr.Row():
+                        clear_button = gr.Button(value="Clear", interactive=True)
+                        clear_button.click(
+                            lambda: ("", [], []),
+                            [],
+                            [chat_input, chatbot, state],
+                            queue=False,
+                        )
+                        submit_button = gr.Button(
+                            value="Submit", interactive=True, variant="primary"
+                        )
+                        submit_button.click(
+                            inference_chat,
+                            [
+                                image_input,
+                                chat_input,
+                                sampling,
+                                temperature,
+                                len_penalty,
+                                rep_penalty,
+                                state,
+                            ],
+                            [chatbot, state],
+                        )
+            image_input.change(
+                lambda: ("", "", []),
+                [],
+                [chatbot, caption_output, state],
+                queue=False,
+            )
+    examples = gr.Examples(
+        examples=examples,
+        inputs=[image_input, chat_input],
+    )
+iface.queue(concurrency_count=1, api_open=False, max_size=10)
+iface.launch(enable_queue=True)

flower.jpg ADDED Viewed

forbidden_city.webp ADDED Viewed

house.png ADDED Viewed

Git LFS Details

SHA256: a7b8999524f8f178a43d3417b9f7dfa80d8aff7ccb7ea1b5ba0e5f96bc17bdc0
Pointer size: 132 Bytes
Size of remote file: 1.24 MB

pizza.jpg ADDED Viewed

sunset.jpg ADDED Viewed

utils.py ADDED Viewed

	@@ -0,0 +1,27 @@

+import os
+class Endpoint:
+    def __init__(self):
+        self._url = None
+    @property
+    def url(self):
+        if self._url is None:
+            self._url = self.get_url()
+        return self._url
+    def get_url(self):
+        endpoint = os.environ.get("endpoint")
+        return endpoint
+def get_token():
+    token = os.environ.get("auth_token")
+    if token is None:
+        raise ValueError("auth-token not found in environment variables")
+    return token