Added streaming (but only one at a time)
Browse files
app.py
CHANGED
|
@@ -3,6 +3,7 @@ import random
|
|
| 3 |
import time
|
| 4 |
import os
|
| 5 |
import requests
|
|
|
|
| 6 |
from dotenv import load_dotenv
|
| 7 |
|
| 8 |
# Load environment variables
|
|
@@ -44,7 +45,8 @@ def get_response(question, model):
|
|
| 44 |
"model": model,
|
| 45 |
"messages": [
|
| 46 |
{"role": "user", "content": question}
|
| 47 |
-
]
|
|
|
|
| 48 |
}
|
| 49 |
|
| 50 |
try:
|
|
@@ -52,12 +54,29 @@ def get_response(question, model):
|
|
| 52 |
OPENROUTER_BASE_URL,
|
| 53 |
headers=headers,
|
| 54 |
json=data,
|
| 55 |
-
timeout=30 # 30 second timeout
|
|
|
|
| 56 |
)
|
| 57 |
response.raise_for_status()
|
| 58 |
|
| 59 |
-
|
| 60 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
|
| 62 |
except requests.exceptions.RequestException as e:
|
| 63 |
return f"Error: Failed to get response from {model}: {str(e)}"
|
|
@@ -120,8 +139,6 @@ with gr.Blocks() as demo:
|
|
| 120 |
questions = read_questions(file)
|
| 121 |
|
| 122 |
# Initialize all update values as blank
|
| 123 |
-
# We have 4 fields per question (model1, response1, model2, response2)
|
| 124 |
-
# => total of MAX_QUESTIONS * 4 output components
|
| 125 |
updates = [gr.update(value="")] * (MAX_QUESTIONS * 4)
|
| 126 |
|
| 127 |
# Process each question, 2 models per question
|
|
@@ -132,9 +149,9 @@ with gr.Blocks() as demo:
|
|
| 132 |
yield updates # partial update (reveal model_1 accordion)
|
| 133 |
|
| 134 |
# 2) Get response from model_1
|
| 135 |
-
response_1
|
| 136 |
-
|
| 137 |
-
|
| 138 |
|
| 139 |
# 3) Pick second model (ensure different from first), yield it
|
| 140 |
remaining_models = [m for m in MODELS if m != model_1]
|
|
@@ -143,9 +160,9 @@ with gr.Blocks() as demo:
|
|
| 143 |
yield updates
|
| 144 |
|
| 145 |
# 4) Get response from model_2
|
| 146 |
-
response_2
|
| 147 |
-
|
| 148 |
-
|
| 149 |
|
| 150 |
# The outputs we update after each yield
|
| 151 |
update_targets = []
|
|
|
|
| 3 |
import time
|
| 4 |
import os
|
| 5 |
import requests
|
| 6 |
+
import json
|
| 7 |
from dotenv import load_dotenv
|
| 8 |
|
| 9 |
# Load environment variables
|
|
|
|
| 45 |
"model": model,
|
| 46 |
"messages": [
|
| 47 |
{"role": "user", "content": question}
|
| 48 |
+
],
|
| 49 |
+
"stream": True
|
| 50 |
}
|
| 51 |
|
| 52 |
try:
|
|
|
|
| 54 |
OPENROUTER_BASE_URL,
|
| 55 |
headers=headers,
|
| 56 |
json=data,
|
| 57 |
+
timeout=30, # 30 second timeout
|
| 58 |
+
stream=True
|
| 59 |
)
|
| 60 |
response.raise_for_status()
|
| 61 |
|
| 62 |
+
full_response = ""
|
| 63 |
+
for line in response.iter_lines():
|
| 64 |
+
if line:
|
| 65 |
+
line = line.decode('utf-8')
|
| 66 |
+
if line.startswith('data: '):
|
| 67 |
+
json_str = line[6:] # Remove 'data: ' prefix
|
| 68 |
+
if json_str.strip() == '[DONE]':
|
| 69 |
+
break
|
| 70 |
+
try:
|
| 71 |
+
chunk = json.loads(json_str)
|
| 72 |
+
if chunk['choices'][0]['delta'].get('content'):
|
| 73 |
+
content = chunk['choices'][0]['delta']['content']
|
| 74 |
+
full_response += content
|
| 75 |
+
yield full_response
|
| 76 |
+
except json.JSONDecodeError:
|
| 77 |
+
continue
|
| 78 |
+
|
| 79 |
+
return full_response
|
| 80 |
|
| 81 |
except requests.exceptions.RequestException as e:
|
| 82 |
return f"Error: Failed to get response from {model}: {str(e)}"
|
|
|
|
| 139 |
questions = read_questions(file)
|
| 140 |
|
| 141 |
# Initialize all update values as blank
|
|
|
|
|
|
|
| 142 |
updates = [gr.update(value="")] * (MAX_QUESTIONS * 4)
|
| 143 |
|
| 144 |
# Process each question, 2 models per question
|
|
|
|
| 149 |
yield updates # partial update (reveal model_1 accordion)
|
| 150 |
|
| 151 |
# 2) Get response from model_1
|
| 152 |
+
for response_1 in get_response(question, model_1):
|
| 153 |
+
updates[i*4 + 1] = gr.update(value=response_1) # response1
|
| 154 |
+
yield updates
|
| 155 |
|
| 156 |
# 3) Pick second model (ensure different from first), yield it
|
| 157 |
remaining_models = [m for m in MODELS if m != model_1]
|
|
|
|
| 160 |
yield updates
|
| 161 |
|
| 162 |
# 4) Get response from model_2
|
| 163 |
+
for response_2 in get_response(question, model_2):
|
| 164 |
+
updates[i*4 + 3] = gr.update(value=response_2) # response2
|
| 165 |
+
yield updates
|
| 166 |
|
| 167 |
# The outputs we update after each yield
|
| 168 |
update_targets = []
|