nomic-embeddings

Running

App Files Files Community

Patryk Ptasiński commited on Jul 14

Commit

3726350

1 Parent(s): 1be0f7d

Add cuda

Browse files

Files changed (2) hide show

app.py +16 -1
test_models.sh +0 -45

app.py CHANGED Viewed

@@ -1,11 +1,23 @@
 from typing import List, Dict, Any
 import json
 import gradio as gr
 from fastapi import FastAPI
 from fastapi.responses import JSONResponse
 from sentence_transformers import SentenceTransformer
 # Available models
 MODELS = {
     "nomic-ai/nomic-embed-text-v1.5": {"trust_remote_code": True},
@@ -58,12 +70,14 @@ def load_model(model_name: str):
     # Load the new model
     trust_remote_code = MODELS.get(model_name, {}).get("trust_remote_code", False)
     try:
         current_model = SentenceTransformer(
             model_name,
             trust_remote_code=trust_remote_code,
-            device='cpu'
         )
         current_model_name = model_name
     except Exception as e:
         raise ValueError(f"Failed to load model '{model_name}': {str(e)}")
@@ -148,6 +162,7 @@ with gr.Blocks(title="Multi-Model Text Embeddings", css="""
 """) as app:
     gr.Markdown("# Multi-Model Text Embeddings")
     gr.Markdown("Generate embeddings for your text using 28+ state-of-the-art embedding models including top MTEB performers like NV-Embed-v2, gte-Qwen2-7B-instruct, Nomic, BGE, Snowflake, IBM Granite, Qwen3, Stella, and more.")
     # Model selector dropdown (allows custom input)
     model_dropdown = gr.Dropdown(

 from typing import List, Dict, Any
 import json
+import torch
 import gradio as gr
 from fastapi import FastAPI
 from fastapi.responses import JSONResponse
 from sentence_transformers import SentenceTransformer
+# Device detection - use GPU if available, otherwise CPU
+def get_device():
+    if torch.cuda.is_available():
+        print("🚀 GPU detected - using CUDA for acceleration")
+        return 'cuda'
+    else:
+        print("💻 Using CPU for inference")
+        return 'cpu'
+DEVICE = get_device()
 # Available models
 MODELS = {
     "nomic-ai/nomic-embed-text-v1.5": {"trust_remote_code": True},
     # Load the new model
     trust_remote_code = MODELS.get(model_name, {}).get("trust_remote_code", False)
     try:
+        print(f"Loading model '{model_name}' on {DEVICE}")
         current_model = SentenceTransformer(
             model_name,
             trust_remote_code=trust_remote_code,
+            device=DEVICE
         )
         current_model_name = model_name
+        print(f"✅ Model '{model_name}' loaded successfully on {DEVICE}")
     except Exception as e:
         raise ValueError(f"Failed to load model '{model_name}': {str(e)}")
 """) as app:
     gr.Markdown("# Multi-Model Text Embeddings")
     gr.Markdown("Generate embeddings for your text using 28+ state-of-the-art embedding models including top MTEB performers like NV-Embed-v2, gte-Qwen2-7B-instruct, Nomic, BGE, Snowflake, IBM Granite, Qwen3, Stella, and more.")
+    gr.Markdown(f"**Device**: {DEVICE.upper()} {'🚀' if DEVICE == 'cuda' else '💻'}")
     # Model selector dropdown (allows custom input)
     model_dropdown = gr.Dropdown(

test_models.sh DELETED Viewed

@@ -1,45 +0,0 @@
-#!/bin/bash
-# Test script for all embedding models
-BASE_URL="https://ipepe-nomic-embeddings.hf.space"
-TEST_TEXT="Hello world test"
-echo "Testing all embedding models..."
-echo "================================="
-# Get list of models
-MODELS=$(curl -s "${BASE_URL}/models" | grep -o '"[^"]*"' | grep -E "(nomic|BAAI|sentence|Snowflake|granite|Qwen|stella|nvidia|Alibaba|intfloat)" | tr -d '"')
-# Test each model
-for model in $MODELS; do
-    echo "Testing: $model"
-    # Test with 30 second timeout
-    response=$(timeout 30 curl -X POST "${BASE_URL}/embed" \
-        -H "Content-Type: application/json" \
-        -d "{\"text\": \"$TEST_TEXT\", \"model\": \"$model\"}" \
-        -w "\nHTTP_STATUS:%{http_code}" \
-        -s 2>/dev/null)
-    if [ $? -eq 124 ]; then
-        echo "  ❌ TIMEOUT (>30s)"
-    else
-        status=$(echo "$response" | grep "HTTP_STATUS" | cut -d: -f2)
-        if [ "$status" = "200" ]; then
-            # Check if response contains embedding
-            if echo "$response" | grep -q '"embedding":\['; then
-                echo "  ✅ SUCCESS"
-            else
-                echo "  ⚠️  PARTIAL - No embedding in response"
-            fi
-        else
-            # Extract error message
-            error_msg=$(echo "$response" | grep -o '"error":"[^"]*"' | cut -d'"' -f4)
-            echo "  ❌ ERROR ($status): $error_msg"
-        fi
-    fi
-    echo ""
-done
-echo "Testing complete!"