Spaces:

nomadicsynth
/

openclip-embed

Runtime error

App Files Files Community

RoboApocalypse commited on May 4, 2024

Commit

1f9e30b

1 Parent(s): 435181d

Refactor generate_embedding function to remove unneeded variables

Browse files

Files changed (1) hide show

app.py +8 -45

app.py CHANGED Viewed

@@ -1,10 +1,9 @@
 import gradio as gr
 from numpy import empty
 import open_clip
 import torch
 import PIL.Image as Image
-from io import BytesIO
-import base64
 # Set device to GPU if available
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
@@ -12,8 +11,8 @@ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 # Load the OpenCLIP model and the necessary preprocessors
 # openclip_model = 'laion/CLIP-ViT-B-32-laion2B-s34B-b79K'
 # openclip_model = 'laion/CLIP-ViT-B-16-laion2B-s34B-b88K'
-openclip_model = 'laion/CLIP-ViT-L-14-laion2B-s32B-b82K'
-openclip_model = 'hf-hub:' + openclip_model
 model, preprocess_train, preprocess_val = open_clip.create_model_and_transforms(
     model_name=openclip_model,
     device=device
@@ -21,7 +20,7 @@ model, preprocess_train, preprocess_val = open_clip.create_model_and_transforms(
 # Define function to generate text embeddings
-def generate_text_embedding(text_data):
     """
     Generate embeddings for text data using the OpenCLIP model.
@@ -76,7 +75,7 @@ def generate_text_embedding(text_data):
     return text_embeddings
 # Define function to generate image embeddings
-def generate_image_embedding(image_data):
     """
     Generate embeddings for image data using the OpenCLIP model.
@@ -129,7 +128,7 @@ def generate_image_embedding(image_data):
 # Define function to generate embeddings
-def generate_embedding(text_data, image_data, image_data_base64):
     """
     Generate embeddings for text and image data using the OpenCLIP model.
@@ -139,8 +138,6 @@ def generate_embedding(text_data, image_data, image_data_base64):
         Text data to embed.
     image_data : PIL.Image.Image or tuple of PIL.Image.Image
         Image data to embed.
-    image_data_base64 : str or tuple of str
-        Base64 encoded image data to embed.
     Returns
     -------
@@ -150,8 +147,6 @@ def generate_embedding(text_data, image_data, image_data_base64):
         List of image embeddings.
     similarity : list of str
         List of cosine similarity between text and image embeddings.
-    image_data_base64_embeddings : str or tuple of str
-        List of image embeddings for base64 encoded image data.
     """
     # Embed text data
@@ -193,38 +188,7 @@ def generate_embedding(text_data, image_data, image_data_base64):
         for i in empty_data_indices:
             similarity.insert(i, "")
-    # Embed base64 encoded image data
-    decoded_image_data = []
-    if image_data_base64:
-        # If image_data_base64 is a string, convert to list of strings
-        if isinstance(image_data_base64, str):
-            image_data_base64 = [image_data_base64]
-        # If image_data_base64 is a tuple of strings, convert to list of strings
-        if isinstance(image_data_base64, tuple):
-            image_data_base64 = list(image_data_base64)
-        # If image_data_base64 is not a list of strings, raise error
-        if not isinstance(image_data_base64, list):
-            raise TypeError("image_data_base64 must be a string or a tuple of strings.")
-        # Keep track of indices of empty image strings
-        empty_data_indices = [i for i, img in enumerate(image_data_base64) if img == ""]
-        # Remove empty image strings
-        image_data_base64 = [img for img in image_data_base64 if img != ""]
-        if image_data_base64:
-            # Decode base64 encoded image data
-            decoded_image_data = [Image.open(BytesIO(base64.b64decode(img))) for img in image_data_base64]
-        # Insert empty strings at indices of empty image strings
-        for i in empty_data_indices:
-            decoded_image_data.insert(i, None)
-    image_data_base64_embeddings = generate_image_embedding(tuple(decoded_image_data))
-    return (text_embeddings, image_embeddings, similarity, image_data_base64_embeddings)
 # Define Gradio interface
@@ -233,13 +197,12 @@ demo = gr.Interface(
     inputs=[
         gr.Textbox(lines=5, max_lines=5, placeholder="Enter Text Here...", label="Text to Embed"),
         gr.Image(height=512, type="pil", label="Image to Embed"),
-        gr.Textbox(lines=5, max_lines=5, label="Base64 Encoded Image", autoscroll=False)
     ],
     outputs=[
         gr.Textbox(lines=5, max_lines=5, label="Text Embedding", autoscroll=False),
         gr.Textbox(lines=5, max_lines=5, label="Image Embedding", autoscroll=False),
         gr.Textbox(label="Cosine Similarity"),
-        gr.Textbox(lines=5, max_lines=5, label="Embedding of Base64 Encoded Images", autoscroll=False)
     ],
     title="OpenCLIP Embedding Generator",
     description="Generate embeddings using OpenCLIP model for text and images.",

+from typing import Union
 import gradio as gr
 from numpy import empty
 import open_clip
 import torch
 import PIL.Image as Image
 # Set device to GPU if available
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 # Load the OpenCLIP model and the necessary preprocessors
 # openclip_model = 'laion/CLIP-ViT-B-32-laion2B-s34B-b79K'
 # openclip_model = 'laion/CLIP-ViT-B-16-laion2B-s34B-b88K'
+openclip_model_name = 'laion/CLIP-ViT-L-14-laion2B-s32B-b82K'
+openclip_model = "hf-hub:" + openclip_model_name
 model, preprocess_train, preprocess_val = open_clip.create_model_and_transforms(
     model_name=openclip_model,
     device=device
 # Define function to generate text embeddings
+def generate_text_embedding(text_data: Union[str, tuple[str]]) -> list[str]:
     """
     Generate embeddings for text data using the OpenCLIP model.
     return text_embeddings
 # Define function to generate image embeddings
+def generate_image_embedding(image_data: Union[Image.Image, tuple[Image.Image]]) -> list[str]:
     """
     Generate embeddings for image data using the OpenCLIP model.
 # Define function to generate embeddings
+def generate_embedding(text_data: Union[str, tuple[str]], image_data: Union[Image.Image, tuple[Image.Image]]) -> tuple[list[str], list[str], list[str]]:
     """
     Generate embeddings for text and image data using the OpenCLIP model.
         Text data to embed.
     image_data : PIL.Image.Image or tuple of PIL.Image.Image
         Image data to embed.
     Returns
     -------
         List of image embeddings.
     similarity : list of str
         List of cosine similarity between text and image embeddings.
     """
     # Embed text data
         for i in empty_data_indices:
             similarity.insert(i, "")
+    return (text_embeddings, image_embeddings, similarity, openclip_model_name)
 # Define Gradio interface
     inputs=[
         gr.Textbox(lines=5, max_lines=5, placeholder="Enter Text Here...", label="Text to Embed"),
         gr.Image(height=512, type="pil", label="Image to Embed"),
     ],
     outputs=[
         gr.Textbox(lines=5, max_lines=5, label="Text Embedding", autoscroll=False),
         gr.Textbox(lines=5, max_lines=5, label="Image Embedding", autoscroll=False),
         gr.Textbox(label="Cosine Similarity"),
+        gr.Textbox(label="Embedding Model"),
     ],
     title="OpenCLIP Embedding Generator",
     description="Generate embeddings using OpenCLIP model for text and images.",