Spaces:

atin121
/

VibesMark

Sleeping

App Files Files Community

atin121 commited on Jan 28

Commit

67a3b3a

1 Parent(s): 06fdab0

Added OpenRouter + env file; basic funcitonality working

Browse files

Files changed (6) hide show

.env.example +2 -0
.gitignore +1 -0
README.md +56 -1
TestQuesitons.txt +3 -0
app.py +47 -14
watch.py +36 -0

.env.example ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ OPENROUTER_API_KEY=your_api_key_here
2	+ OPENROUTER_BASE_URL=https://openrouter.ai/api/v1/chat/completions

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ .env

README.md CHANGED Viewed

@@ -1,6 +1,61 @@
 # Vibes Benchmark v0.1
-Benchmark on Vibes
 Run it with
 `python app.py`

 # Vibes Benchmark v0.1
+A tool for benchmarking different AI models by comparing their responses to custom questions.
+## Prerequisites
+- Python 3.8 or higher
+- An OpenRouter API key ([Get one here](https://openrouter.ai/))
+## Setup
+1. Clone the repository:
+   ```bash
+   git clone [repository-url]
+   cd vibes-benchmark
+   ```
+2. Install dependencies:
+   ```bash
+   pip install -r requirements.txt
+   ```
+3. Configure environment variables:
+   ```bash
+   cp .env.example .env
+   ```
+   Then edit `.env` and add your OpenRouter API key
+## Usage
+1. Prepare a text file with your questions (one per line)
+2. Run the application:
+   ```bash
+   python app.py
+   ```
+3. Upload your questions file through the web interface
+4. Click "Run Benchmark" to start comparing model responses
+## Features
+- Compare responses from different AI models side by side
+- Supports up to 10 questions per benchmark
+- Randomly selects different models for comparison
+- Real-time response generation
+## Supported Models
+- Claude 3 Opus
+- Claude 3 Sonnet
+- Gemini Pro
+- Mistral Medium
+- Claude 2.1
+- GPT-4 Turbo
+- GPT-3.5 Turbo
+## License
+[Your chosen license]
 Run it with
 `python app.py`

TestQuesitons.txt ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ How many states are in america?
2	+
3	+ How much wood could a woodchuck chuck if a woodchuck could chuck wood?

app.py CHANGED Viewed

@@ -1,6 +1,12 @@
 import gradio as gr
 import random
 import time
 MAX_QUESTIONS = 10  # Maximum number of questions to support
@@ -8,26 +14,53 @@ MAX_QUESTIONS = 10  # Maximum number of questions to support
 # Fix the models
 #
 MODELS = [
-    "anthropic/claude-3-opus",
-    "anthropic/claude-3-sonnet",
     "google/gemini-pro",
-    "meta-llama/llama-2-70b-chat",
-    "mistral/mistral-medium",
-    "deepseek/deepseek-coder",
-    "deepseek/deepseek-r1",
 ]
 #
 ######
-######
-# Add OpenRouter here
-#
 def get_response(question, model):
-    # Simulate an API call with a random delay
-    time.sleep(random.uniform(0.5, 1.5))
-    return f"Sample response from {model} for: {question}"
-#
-######
 def read_questions(file_obj):
     """Read questions from uploaded file and return as list"""

 import gradio as gr
 import random
 import time
+import os
+import requests
+from dotenv import load_dotenv
+# Load environment variables
+load_dotenv()
 MAX_QUESTIONS = 10  # Maximum number of questions to support
 # Fix the models
 #
 MODELS = [
+    "anthropic/claude-3-opus-20240229",
+    "anthropic/claude-3-sonnet-20240229",
     "google/gemini-pro",
+    "mistralai/mistral-medium",  # Updated from mistral-7b-instruct
+    "anthropic/claude-2.1",
+    "openai/gpt-4-turbo-preview",
+    "openai/gpt-3.5-turbo"
 ]
 #
 ######
+# Get configuration from environment variables
+OPENROUTER_API_KEY = os.getenv('OPENROUTER_API_KEY')
+OPENROUTER_BASE_URL = os.getenv('OPENROUTER_BASE_URL')
+if not OPENROUTER_API_KEY or not OPENROUTER_BASE_URL:
+    raise ValueError("Missing required environment variables. Please check your .env file.")
 def get_response(question, model):
+    """Get response from OpenRouter API for the given question and model."""
+    headers = {
+        "Authorization": f"Bearer {OPENROUTER_API_KEY}",
+        "HTTP-Referer": "http://localhost:7860",  # Replace with your actual domain
+        "Content-Type": "application/json"
+    }
+    data = {
+        "model": model,
+        "messages": [
+            {"role": "user", "content": question}
+        ]
+    }
+    try:
+        response = requests.post(
+            OPENROUTER_BASE_URL,
+            headers=headers,
+            json=data,
+            timeout=30  # 30 second timeout
+        )
+        response.raise_for_status()
+        result = response.json()
+        return result['choices'][0]['message']['content']
+    except requests.exceptions.RequestException as e:
+        return f"Error: Failed to get response from {model}: {str(e)}"
 def read_questions(file_obj):
     """Read questions from uploaded file and return as list"""

watch.py ADDED Viewed

	@@ -0,0 +1,36 @@

+from watchdog.observers import Observer
+from watchdog.events import FileSystemEventHandler
+import subprocess
+import time
+import sys
+class AppReloader(FileSystemEventHandler):
+    def __init__(self):
+        self.process = None
+        self.start_app()
+    def start_app(self):
+        if self.process:
+            self.process.terminate()
+            self.process.wait()
+        print("\n--- Restarting app.py ---\n")
+        self.process = subprocess.Popen([sys.executable, "app.py"])
+    def on_modified(self, event):
+        if event.src_path.endswith('app.py'):
+            self.start_app()
+if __name__ == "__main__":
+    event_handler = AppReloader()
+    observer = Observer()
+    observer.schedule(event_handler, path='.', recursive=False)
+    observer.start()
+    try:
+        while True:
+            time.sleep(1)
+    except KeyboardInterrupt:
+        observer.stop()
+        if event_handler.process:
+            event_handler.process.terminate()
+    observer.join()