ModernBert-Similarity

Running

App Files Files Community

eswardivi commited on Dec 29, 2024

Commit

3592f5f

verified ·

1 Parent(s): 5a94d47

Updated to ModernBERT for similarity comparsion

Browse files

Files changed (1) hide show

app.py +84 -69

app.py CHANGED Viewed

@@ -1,26 +1,14 @@
 import gradio as gr
 import torch
 from transformers import (
-    AutoModelForCausalLM,
     AutoTokenizer,
-    TextIteratorStreamer,
 )
 import os
 from threading import Thread
 import spaces
 import time
-token = os.environ["HF_TOKEN"]
-model = AutoModelForCausalLM.from_pretrained(
-    "microsoft/Phi-3-mini-4k-instruct", token=token,trust_remote_code=True
-)
-tok = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct", token=token)
-terminators = [
-    tok.eos_token_id,
-]
 if torch.cuda.is_available():
     device = torch.device("cuda")
     print(f"Using GPU: {torch.cuda.get_device_name(device)}")
@@ -28,70 +16,97 @@ else:
     device = torch.device("cpu")
     print("Using CPU")
-model = model.to(device)
-# Dispatch Errors
-@spaces.GPU(duration=60)
-def chat(message, history, temperature,do_sample, max_tokens):
-    chat = []
-    for item in history:
-        chat.append({"role": "user", "content": item[0]})
-        if item[1] is not None:
-            chat.append({"role": "assistant", "content": item[1]})
-    chat.append({"role": "user", "content": message})
-    messages = tok.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
-    model_inputs = tok([messages], return_tensors="pt").to(device)
-    streamer = TextIteratorStreamer(
-        tok, timeout=20.0, skip_prompt=True, skip_special_tokens=True
     )
-    generate_kwargs = dict(
-        model_inputs,
-        streamer=streamer,
-        max_new_tokens=max_tokens,
-        do_sample=True,
-        temperature=temperature,
-        eos_token_id=terminators,
     )
-    if temperature == 0:
-        generate_kwargs['do_sample'] = False
-    t = Thread(target=model.generate, kwargs=generate_kwargs)
-    t.start()
-    partial_text = ""
-    for new_text in streamer:
-        partial_text += new_text
-        yield partial_text
-    yield partial_text
-demo = gr.ChatInterface(
-    fn=chat,
-    examples=[["Write me a poem about Machine Learning."]],
-    # multimodal=False,
-    additional_inputs_accordion=gr.Accordion(
-        label="⚙️ Parameters", open=False, render=False
-    ),
-    additional_inputs=[
-        gr.Slider(
-            minimum=0, maximum=1, step=0.1, value=0.9, label="Temperature", render=False
         ),
-        gr.Checkbox(label="Sampling",value=True),
-        gr.Slider(
-            minimum=128,
-            maximum=4096,
-            step=1,
-            value=512,
-            label="Max new tokens",
-            render=False,
         ),
     ],
-    stop_btn="Stop Generation",
-    title="Chat With LLMs",
-    description="Now Running [microsoft/Phi-3-mini-4k-instruct](https://huggingface.com/microsoft/Phi-3-mini-4k-instruct)"
-)
-demo.launch()

 import gradio as gr
 import torch
 from transformers import (
+    AutoModel,
     AutoTokenizer,
 )
 import os
 from threading import Thread
 import spaces
 import time
 if torch.cuda.is_available():
     device = torch.device("cuda")
     print(f"Using GPU: {torch.cuda.get_device_name(device)}")
     device = torch.device("cpu")
     print("Using CPU")
+def mean_pooling(model_output, attention_mask):
+    token_embeddings = model_output[0]
+    input_mask_expanded = (
+        attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
     )
+    return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(
+        input_mask_expanded.sum(1), min=1e-9
     )
+def cls_pooling(model_output):
+    return model_output[0][:, 0]
+@spaces.GPU
+def get_embedding(text, use_mean_pooling, model_id):
+    tokenizer = AutoTokenizer.from_pretrained(model_id)
+    model = AutoModel.from_pretrained(model_id, torch_dtype=torch.float16)
+    model = model.to(device)
+    inputs = tokenizer(
+        text, return_tensors="pt", padding=True, truncation=True, max_length=512
+    )
+    inputs = {name: tensor.to(device) for name, tensor in inputs.items()}
+    with torch.no_grad():
+        model_output = model(**inputs)
+    if use_mean_pooling:
+        return mean_pooling(model_output, inputs["attention_mask"])
+    return cls_pooling(model_output)
+def get_similarity(text1, text2, pooling_method, model_id):
+    use_mean_pooling = pooling_method == "Use Mean Pooling"
+    embedding1 = get_embedding(text1, use_mean_pooling, model_id)
+    embedding2 = get_embedding(text2, use_mean_pooling, model_id)
+    return torch.nn.functional.cosine_similarity(embedding1, embedding2).item()
+gr.Interface(
+    get_similarity,
+    [
+        gr.Textbox(lines=7, label="Text 1"),
+        gr.Textbox(lines=7, label="Text 2"),
+        gr.Dropdown(
+            choices=["Use Mean Pooling", "Use CLS"],
+            value="Use Mean Pooling",
+            label="Pooling Method",
+            info="Mean Pooling: Averages all token embeddings (better for semantic similarity)\nCLS Pooling: Uses only the [CLS] token embedding (faster, might miss context)",
         ),
+        gr.Dropdown(
+            choices=[
+                "tasksource/ModernBERT-base-embed",
+                "tasksource/ModernBERT-base-nli",
+                "joe32140/ModernBERT-large-msmarco",
+                "answerdotai/ModernBERT-large",
+                "answerdotai/ModernBERT-base",
+            ],
+            value="answerdotai/ModernBERT-large",
+            label="Model",
+            info="Choose between the variants of ModernBERT \nMight take a few seconds to load the model",
         ),
     ],
+    gr.Textbox(label="Similarity"),
+    title="ModernBERT Similarity Demo",
+    description="Compute the similarity between two texts using ModernBERT. Choose between different pooling strategies for embedding generation.",
+    examples=[
+        [
+            "The quick brown fox jumps over the lazy dog",
+            "A swift brown fox leaps above a sleeping canine",
+            "Use Mean Pooling",
+            "answerdotai/ModernBERT-large"
+        ],
+        [
+            "I love programming in Python",
+            "I hate coding with Python",
+            "Use Mean Pooling",
+            "answerdotai/ModernBERT-large"
+        ],
+        [
+            "The weather is beautiful today",
+            "Machine learning models are improving rapidly",
+            "Use Mean Pooling",
+            "answerdotai/ModernBERT-large"
+        ],
+        [
+            "def calculate_sum(a, b):\n    return a + b",
+            "def add_numbers(x, y):\n    result = x + y\n    return result",
+            "Use Mean Pooling",
+            "answerdotai/ModernBERT-large"
+        ]
+    ]
+).launch(share=True)