|
|
import os |
|
|
os.environ["TOKENIZERS_PARALLELISM"] = "false" |
|
|
|
|
|
|
|
|
|
|
|
MISTRAL_API_KEY = os.environ.get("MISTRAL_API_KEY", "") |
|
|
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "") |
|
|
NEBIUS_API_KEY = os.environ.get("NEBIUS_API_KEY", "") |
|
|
GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY", "") |
|
|
ANTHROPIC_API_KEY = os.environ.get("ANTHROPIC_API_KEY", "") |
|
|
GROK_API_KEY = os.environ.get("GROK_API_KEY", "") |
|
|
HF_API_TOKEN = os.environ.get("HF_API_TOKEN", "") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import datetime |
|
|
import functools |
|
|
import traceback |
|
|
from typing import List, Optional, Any, Dict |
|
|
|
|
|
import torch |
|
|
import transformers |
|
|
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline |
|
|
from langchain_community.llms import HuggingFacePipeline |
|
|
|
|
|
|
|
|
from langchain_community.document_loaders import OnlinePDFLoader |
|
|
from langchain.text_splitter import RecursiveCharacterTextSplitter |
|
|
from langchain_community.vectorstores import FAISS |
|
|
from langchain.embeddings import HuggingFaceEmbeddings |
|
|
from langchain_community.retrievers import BM25Retriever |
|
|
from langchain.retrievers import EnsembleRetriever |
|
|
from langchain.prompts import ChatPromptTemplate |
|
|
from langchain.schema import StrOutputParser, Document |
|
|
from langchain_core.runnables import RunnableParallel, RunnableLambda |
|
|
from transformers.quantizers.auto import AutoQuantizationConfig |
|
|
import gradio as gr |
|
|
import requests |
|
|
from pydantic import PrivateAttr |
|
|
import pydantic |
|
|
import zipfile |
|
|
import mimetypes |
|
|
|
|
|
from langchain.llms.base import LLM |
|
|
from typing import Any, Optional, List |
|
|
import typing |
|
|
import time |
|
|
import sys |
|
|
import csv |
|
|
import statistics |
|
|
import re |
|
|
|
|
|
|
|
|
try: |
|
|
import openai |
|
|
from importlib.metadata import version as pkg_version |
|
|
openai_version = pkg_version("openai") |
|
|
print(f"OpenAI import success, version: {openai_version}") |
|
|
if tuple(map(int, openai_version.split("."))) < (1, 0, 0): |
|
|
print("ERROR: openai version must be >= 1.0.0 for NEBIUS support. Please upgrade with: pip install --upgrade openai") |
|
|
sys.exit(1) |
|
|
from openai import OpenAI |
|
|
OPENAI_AVAILABLE = True |
|
|
except ImportError as e: |
|
|
OPENAI_AVAILABLE = False |
|
|
print("OpenAI import failed:", e) |
|
|
except Exception as e: |
|
|
print("OpenAI version check failed:", e) |
|
|
sys.exit(1) |
|
|
|
|
|
|
|
|
try: |
|
|
from mistralai import Mistral |
|
|
MISTRAL_AVAILABLE = True |
|
|
print("Mistral import success") |
|
|
except ImportError as e: |
|
|
MISTRAL_AVAILABLE = False |
|
|
print("Mistral import failed:", e) |
|
|
print("Please install mistralai package with: pip install mistralai") |
|
|
except Exception as e: |
|
|
MISTRAL_AVAILABLE = False |
|
|
print("Mistral import error:", e) |
|
|
|
|
|
def debug_print(message: str): |
|
|
print(f"[{datetime.datetime.now().isoformat()}] {message}", flush=True) |
|
|
|
|
|
def word_count(text: str) -> int: |
|
|
return len(text.split()) |
|
|
|
|
|
|
|
|
def initialize_tokenizer(): |
|
|
try: |
|
|
return AutoTokenizer.from_pretrained("gpt2") |
|
|
except Exception as e: |
|
|
debug_print("Failed to initialize tokenizer: " + str(e)) |
|
|
return None |
|
|
|
|
|
global_tokenizer = initialize_tokenizer() |
|
|
|
|
|
def count_tokens(text: str) -> int: |
|
|
if global_tokenizer: |
|
|
try: |
|
|
return len(global_tokenizer.encode(text)) |
|
|
except Exception as e: |
|
|
return len(text.split()) |
|
|
return len(text.split()) |
|
|
|
|
|
|
|
|
|
|
|
import uuid |
|
|
import threading |
|
|
import queue |
|
|
from typing import Dict, Any, Tuple, Optional |
|
|
import time |
|
|
|
|
|
|
|
|
jobs = {} |
|
|
results_queue = queue.Queue() |
|
|
processing_lock = threading.Lock() |
|
|
|
|
|
|
|
|
last_job_id = None |
|
|
|
|
|
|
|
|
|
|
|
def process_in_background(job_id, function, args): |
|
|
"""Process a function in the background and store results""" |
|
|
try: |
|
|
debug_print(f"Processing job {job_id} in background") |
|
|
result = function(*args) |
|
|
results_queue.put((job_id, result)) |
|
|
debug_print(f"Job {job_id} completed and added to results queue") |
|
|
except Exception as e: |
|
|
debug_print(f"Error in background job {job_id}: {str(e)}") |
|
|
error_result = (f"Error processing job: {str(e)}", "", "", "") |
|
|
results_queue.put((job_id, error_result)) |
|
|
|
|
|
def load_pdfs_async(file_links, model_choice, prompt_template, bm25_weight, temperature, top_p): |
|
|
"""Asynchronous version of load_pdfs_updated to prevent timeouts""" |
|
|
global last_job_id |
|
|
if not file_links: |
|
|
return "Please enter non-empty URLs", "", "Model used: N/A", "", "", get_job_list() |
|
|
|
|
|
job_id = str(uuid.uuid4()) |
|
|
debug_print(f"Starting async job {job_id} for file loading") |
|
|
|
|
|
|
|
|
threading.Thread( |
|
|
target=process_in_background, |
|
|
args=(job_id, load_pdfs_updated, [file_links, model_choice, prompt_template, bm25_weight, temperature, top_p]) |
|
|
).start() |
|
|
|
|
|
job_query = f"Loading files: {file_links.split()[0]}..." if file_links else "No files" |
|
|
jobs[job_id] = { |
|
|
"status": "processing", |
|
|
"type": "load_files", |
|
|
"start_time": time.time(), |
|
|
"query": job_query |
|
|
} |
|
|
|
|
|
last_job_id = job_id |
|
|
|
|
|
return ( |
|
|
f"Files submitted and processing in the background (Job ID: {job_id}).\n\n" |
|
|
f"Use 'Check Job Status' tab with this ID to get results.", |
|
|
f"Job ID: {job_id}", |
|
|
f"Model requested: {model_choice}", |
|
|
job_id, |
|
|
job_query, |
|
|
get_job_list() |
|
|
) |
|
|
|
|
|
def submit_query_async(query, model1, model2, temperature, top_p, top_k, max_tokens): |
|
|
"""Asynchronous version of submit_query_updated to prevent timeouts""" |
|
|
global last_job_id |
|
|
if not query: |
|
|
return ("Please enter a non-empty query", "Input/Output tokens: 0/0", |
|
|
"Please enter a non-empty query", "Input/Output tokens: 0/0", |
|
|
"", "", get_job_list()) |
|
|
if not (model1 or model2): |
|
|
return ("Please select at least one model", "Input/Output tokens: 0/0", |
|
|
"Please select at least one model", "Input/Output tokens: 0/0", |
|
|
"", "", get_job_list()) |
|
|
responses = {"model1": None, "model2": None} |
|
|
job_ids = [] |
|
|
if model1: |
|
|
model1_job_id = str(uuid.uuid4()) |
|
|
debug_print(f"Starting async job {model1_job_id} for Model 1: {model1}") |
|
|
threading.Thread( |
|
|
target=process_in_background, |
|
|
args=(model1_job_id, submit_query_updated, [query, model1, temperature, top_p, top_k, max_tokens]) |
|
|
).start() |
|
|
jobs[model1_job_id] = { |
|
|
"status": "processing", |
|
|
"type": "query", |
|
|
"start_time": time.time(), |
|
|
"query": query, |
|
|
"model": model1, |
|
|
"model_position": "model1" |
|
|
} |
|
|
job_ids.append(model1_job_id) |
|
|
responses["model1"] = f"Processing (Job ID: {model1_job_id})" |
|
|
if model2: |
|
|
model2_job_id = str(uuid.uuid4()) |
|
|
debug_print(f"Starting async job {model2_job_id} for Model 2: {model2}") |
|
|
threading.Thread( |
|
|
target=process_in_background, |
|
|
args=(model2_job_id, submit_query_updated, [query, model2, temperature, top_p, top_k, max_tokens]) |
|
|
).start() |
|
|
jobs[model2_job_id] = { |
|
|
"status": "processing", |
|
|
"type": "query", |
|
|
"start_time": time.time(), |
|
|
"query": query, |
|
|
"model": model2, |
|
|
"model_position": "model2" |
|
|
} |
|
|
job_ids.append(model2_job_id) |
|
|
responses["model2"] = f"Processing (Job ID: {model2_job_id})" |
|
|
last_job_id = job_ids[0] if job_ids else None |
|
|
return ( |
|
|
responses.get("model1", "Not selected"), |
|
|
"Input tokens: " + str(count_tokens(query)) if model1 else "Not selected", |
|
|
responses.get("model2", "Not selected"), |
|
|
"Input tokens: " + str(count_tokens(query)) if model2 else "Not selected", |
|
|
last_job_id, |
|
|
query, |
|
|
get_job_list() |
|
|
) |
|
|
|
|
|
def update_ui_with_last_job_id(): |
|
|
|
|
|
|
|
|
pass |
|
|
|
|
|
|
|
|
def get_job_list(): |
|
|
job_list_md = "### Submitted Jobs\n\n" |
|
|
|
|
|
if not jobs: |
|
|
return "No jobs found. Submit a query or load files to create jobs." |
|
|
|
|
|
|
|
|
sorted_jobs = sorted( |
|
|
[(job_id, job_info) for job_id, job_info in jobs.items()], |
|
|
key=lambda x: x[1].get("start_time", 0), |
|
|
reverse=True |
|
|
) |
|
|
|
|
|
for job_id, job_info in sorted_jobs: |
|
|
status = job_info.get("status", "unknown") |
|
|
job_type = job_info.get("type", "unknown") |
|
|
query = job_info.get("query", "") |
|
|
model = job_info.get("model", "") |
|
|
start_time = job_info.get("start_time", 0) |
|
|
time_str = datetime.datetime.fromtimestamp(start_time).strftime("%Y-%m-%d %H:%M:%S") |
|
|
|
|
|
|
|
|
query_preview = query[:30] + "..." if query and len(query) > 30 else query or "N/A" |
|
|
|
|
|
|
|
|
if status == "processing": |
|
|
status_formatted = f"<span style='color: red'>⏳ {status}</span>" |
|
|
elif status == "completed": |
|
|
status_formatted = f"<span style='color: green'>✅ {status}</span>" |
|
|
else: |
|
|
status_formatted = f"<span style='color: orange'>❓ {status}</span>" |
|
|
|
|
|
|
|
|
model_icon = "🇺🇸" if model == "Llama" else "🇪🇺" if model == "Mistral" else "" |
|
|
model_prefix = f"{model_icon} {model} " if model else "" |
|
|
|
|
|
|
|
|
if job_type == "query": |
|
|
job_list_md += f"- [{job_id}](javascript:void) - {time_str} - {status_formatted} - {model_prefix}Query: {query_preview}\n" |
|
|
else: |
|
|
job_list_md += f"- [{job_id}](javascript:void) - {time_str} - {status_formatted} - File Load Job\n" |
|
|
|
|
|
return job_list_md |
|
|
|
|
|
|
|
|
def job_selected(job_id): |
|
|
if job_id in jobs: |
|
|
return job_id, jobs[job_id].get("query", "No query for this job") |
|
|
return job_id, "Job not found" |
|
|
|
|
|
|
|
|
def refresh_job_list(): |
|
|
return get_job_list() |
|
|
|
|
|
|
|
|
def sync_model_dropdown(value): |
|
|
return value |
|
|
|
|
|
|
|
|
def check_job_status(job_id): |
|
|
if not job_id: |
|
|
|
|
|
return "Please enter a job ID", "", "", "", "", "", "", "", "" |
|
|
|
|
|
|
|
|
try: |
|
|
while not results_queue.empty(): |
|
|
completed_id, result = results_queue.get_nowait() |
|
|
if completed_id in jobs: |
|
|
jobs[completed_id]["status"] = "completed" |
|
|
jobs[completed_id]["result"] = result |
|
|
jobs[completed_id]["end_time"] = time.time() |
|
|
debug_print(f"Job {completed_id} completed and stored in jobs dictionary") |
|
|
except queue.Empty: |
|
|
pass |
|
|
|
|
|
|
|
|
if job_id not in jobs: |
|
|
return "Job not found. Please check the ID and try again.", "", "", "", "", "", "", "", "" |
|
|
|
|
|
job = jobs[job_id] |
|
|
job_query = job.get("query", "No query available for this job") |
|
|
|
|
|
|
|
|
model1_resp, model1_tok, model2_resp, model2_tok = update_model_responses_from_jobs() |
|
|
|
|
|
|
|
|
status_report = generate_detailed_job_status(job_id, job) |
|
|
|
|
|
|
|
|
if job["status"] == "processing": |
|
|
elapsed_time = time.time() - job["start_time"] |
|
|
job_type = job.get("type", "unknown") |
|
|
|
|
|
if job_type == "load_files": |
|
|
return ( |
|
|
status_report, |
|
|
f"Job ID: {job_id}", |
|
|
f"Status: Processing", |
|
|
"", |
|
|
job_query, |
|
|
model1_resp, |
|
|
model1_tok, |
|
|
model2_resp, |
|
|
model2_tok |
|
|
) |
|
|
else: |
|
|
return ( |
|
|
status_report, |
|
|
f"Job ID: {job_id}", |
|
|
f"Input tokens: {count_tokens(job.get('query', ''))}", |
|
|
"Output tokens: pending", |
|
|
job_query, |
|
|
model1_resp, |
|
|
model1_tok, |
|
|
model2_resp, |
|
|
model2_tok |
|
|
) |
|
|
|
|
|
|
|
|
if job["status"] == "completed": |
|
|
result = job["result"] |
|
|
processing_time = job["end_time"] - job["start_time"] |
|
|
|
|
|
if job.get("type") == "load_files": |
|
|
return ( |
|
|
status_report, |
|
|
result[1], |
|
|
result[2], |
|
|
"", |
|
|
job_query, |
|
|
model1_resp, |
|
|
model1_tok, |
|
|
model2_resp, |
|
|
model2_tok |
|
|
) |
|
|
else: |
|
|
|
|
|
r = list(result) if isinstance(result, (list, tuple)) else [result] |
|
|
while len(r) < 4: |
|
|
r.append("") |
|
|
return ( |
|
|
status_report, |
|
|
r[1], |
|
|
r[2], |
|
|
r[3], |
|
|
job_query, |
|
|
model1_resp, |
|
|
model1_tok, |
|
|
model2_resp, |
|
|
model2_tok |
|
|
) |
|
|
|
|
|
|
|
|
return status_report, "", "", "", job_query, model1_resp, model1_tok, model2_resp, model2_tok |
|
|
|
|
|
def generate_detailed_job_status(job_id, job): |
|
|
"""Generate detailed status report for a job showing model processing information""" |
|
|
if not job: |
|
|
return "Job not found" |
|
|
|
|
|
job_type = job.get("type", "unknown") |
|
|
status = job.get("status", "unknown") |
|
|
query = job.get("query", "") |
|
|
model = job.get("model", "") |
|
|
start_time = job.get("start_time", 0) |
|
|
end_time = job.get("end_time", 0) |
|
|
|
|
|
report = f"## Job Status Report\n\n" |
|
|
report += f"**Job ID:** {job_id}\n" |
|
|
report += f"**Type:** {job_type}\n" |
|
|
report += f"**Status:** {status}\n" |
|
|
report += f"**Query:** {query[:100]}{'...' if len(query) > 100 else ''}\n\n" |
|
|
|
|
|
if job_type == "query": |
|
|
|
|
|
related_jobs = [(jid, jinfo) for jid, jinfo in jobs.items() |
|
|
if jinfo.get("query") == query and jinfo.get("type") == "query"] |
|
|
|
|
|
report += f"## Model Processing Status\n\n" |
|
|
|
|
|
for jid, jinfo in related_jobs: |
|
|
jmodel = jinfo.get("model", "Unknown") |
|
|
jstatus = jinfo.get("status", "unknown") |
|
|
jstart = jinfo.get("start_time", 0) |
|
|
jend = jinfo.get("end_time", 0) |
|
|
|
|
|
if jstatus == "processing": |
|
|
elapsed = time.time() - jstart |
|
|
report += f"**{jmodel}:** ⏳ Processing (elapsed: {elapsed:.1f}s)\n" |
|
|
elif jstatus == "completed": |
|
|
elapsed = jend - jstart |
|
|
result = jinfo.get("result", ("", "", "", "")) |
|
|
input_tokens = result[1] if len(result) > 1 else "N/A" |
|
|
output_tokens = result[2] if len(result) > 2 else "N/A" |
|
|
report += f"**{jmodel}:** ✅ Completed (time: {elapsed:.1f}s, {input_tokens}, {output_tokens})\n" |
|
|
else: |
|
|
report += f"**{jmodel}:** ❓ {jstatus}\n" |
|
|
|
|
|
|
|
|
completed_jobs = [j for j in related_jobs if j[1].get("status") == "completed"] |
|
|
processing_jobs = [j for j in related_jobs if j[1].get("status") == "processing"] |
|
|
|
|
|
report += f"\n## Summary\n" |
|
|
report += f"- **Total models:** {len(related_jobs)}\n" |
|
|
report += f"- **Completed:** {len(completed_jobs)}\n" |
|
|
report += f"- **Processing:** {len(processing_jobs)}\n" |
|
|
|
|
|
if completed_jobs: |
|
|
total_time = sum(j[1].get("end_time", 0) - j[1].get("start_time", 0) for j in completed_jobs) |
|
|
report += f"- **Total processing time:** {total_time:.1f}s\n" |
|
|
|
|
|
elif job_type == "load_files": |
|
|
if status == "processing": |
|
|
elapsed = time.time() - start_time |
|
|
report += f"**File loading in progress** (elapsed: {elapsed:.1f}s)\n" |
|
|
elif status == "completed": |
|
|
elapsed = end_time - start_time |
|
|
report += f"**File loading completed** (time: {elapsed:.1f}s)\n" |
|
|
|
|
|
return report |
|
|
|
|
|
def update_model_responses_from_jobs(): |
|
|
"""Update Model 1 and Model 2 response fields based on completed jobs""" |
|
|
global last_job_id |
|
|
|
|
|
|
|
|
try: |
|
|
while not results_queue.empty(): |
|
|
completed_id, result = results_queue.get_nowait() |
|
|
if completed_id in jobs: |
|
|
jobs[completed_id]["status"] = "completed" |
|
|
jobs[completed_id]["result"] = result |
|
|
jobs[completed_id]["end_time"] = time.time() |
|
|
debug_print(f"Job {completed_id} completed and stored in jobs dictionary") |
|
|
except queue.Empty: |
|
|
pass |
|
|
|
|
|
|
|
|
model1_jobs = [(job_id, job_info) for job_id, job_info in jobs.items() |
|
|
if job_info.get("type") == "query" and job_info.get("status") == "completed" |
|
|
and job_info.get("model_position") == "model1"] |
|
|
model2_jobs = [(job_id, job_info) for job_id, job_info in jobs.items() |
|
|
if job_info.get("type") == "query" and job_info.get("status") == "completed" |
|
|
and job_info.get("model_position") == "model2"] |
|
|
|
|
|
|
|
|
model1_jobs.sort(key=lambda x: x[1].get("end_time", 0), reverse=True) |
|
|
model2_jobs.sort(key=lambda x: x[1].get("end_time", 0), reverse=True) |
|
|
|
|
|
model1_response = "No completed jobs found" |
|
|
model1_tokens = "Input/Output tokens: 0/0" |
|
|
model2_response = "No completed jobs found" |
|
|
model2_tokens = "Input/Output tokens: 0/0" |
|
|
|
|
|
if model1_jobs: |
|
|
|
|
|
job_id, job_info = model1_jobs[0] |
|
|
result = job_info.get("result", ("", "", "", "")) |
|
|
model_name = job_info.get("model", "Unknown Model") |
|
|
response_text = result[0] if len(result) > 0 else "No response" |
|
|
input_tokens = result[1] if len(result) > 1 else "Input tokens: 0" |
|
|
output_tokens = result[2] if len(result) > 2 else "Output tokens: 0" |
|
|
|
|
|
model1_response = f"Model: {model_name}\n{input_tokens} | {output_tokens}\n\n{response_text}" |
|
|
model1_tokens = f"{input_tokens} | {output_tokens}" |
|
|
|
|
|
if model2_jobs: |
|
|
|
|
|
job_id, job_info = model2_jobs[0] |
|
|
result = job_info.get("result", ("", "", "", "")) |
|
|
model_name = job_info.get("model", "Unknown Model") |
|
|
response_text = result[0] if len(result) > 0 else "No response" |
|
|
input_tokens = result[1] if len(result) > 1 else "Input tokens: 0" |
|
|
output_tokens = result[2] if len(result) > 2 else "Output tokens: 0" |
|
|
|
|
|
model2_response = f"Model: {model_name}\n{input_tokens} | {output_tokens}\n\n{response_text}" |
|
|
model2_tokens = f"{input_tokens} | {output_tokens}" |
|
|
|
|
|
return model1_response, model1_tokens, model2_response, model2_tokens |
|
|
|
|
|
|
|
|
def cleanup_old_jobs(): |
|
|
current_time = time.time() |
|
|
to_delete = [] |
|
|
|
|
|
for job_id, job in jobs.items(): |
|
|
|
|
|
if job["status"] == "completed" and (current_time - job.get("end_time", 0)) > 86400: |
|
|
to_delete.append(job_id) |
|
|
elif job["status"] == "processing" and (current_time - job.get("start_time", 0)) > 172800: |
|
|
to_delete.append(job_id) |
|
|
|
|
|
for job_id in to_delete: |
|
|
del jobs[job_id] |
|
|
|
|
|
debug_print(f"Cleaned up {len(to_delete)} old jobs. {len(jobs)} jobs remaining.") |
|
|
return f"Cleaned up {len(to_delete)} old jobs", "", "" |
|
|
|
|
|
|
|
|
def truncate_prompt(prompt: str, max_tokens: int = 4096) -> str: |
|
|
"""Truncate prompt to fit within token limit, preserving the most recent/relevant parts.""" |
|
|
if not prompt: |
|
|
return "" |
|
|
|
|
|
if global_tokenizer: |
|
|
try: |
|
|
tokens = global_tokenizer.encode(prompt) |
|
|
if len(tokens) > max_tokens: |
|
|
|
|
|
|
|
|
beginning_tokens = int(max_tokens * 0.2) |
|
|
ending_tokens = max_tokens - beginning_tokens |
|
|
|
|
|
new_tokens = tokens[:beginning_tokens] + tokens[-(ending_tokens):] |
|
|
return global_tokenizer.decode(new_tokens) |
|
|
except Exception as e: |
|
|
debug_print(f"Truncation error: {str(e)}") |
|
|
|
|
|
|
|
|
words = prompt.split() |
|
|
if len(words) > max_tokens: |
|
|
beginning_words = int(max_tokens * 0.2) |
|
|
ending_words = max_tokens - beginning_words |
|
|
|
|
|
return " ".join(words[:beginning_words] + words[-(ending_words):]) |
|
|
|
|
|
return prompt |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
default_prompt = """\ |
|
|
{conversation_history} |
|
|
Use the following context to provide a detailed technical answer to the user's question. |
|
|
Do not include an introduction like "Based on the provided documents, ...". Just answer the question. |
|
|
If you don't know the answer, please respond with "I don't know". |
|
|
|
|
|
Context: |
|
|
{context} |
|
|
|
|
|
User's question: |
|
|
{question} |
|
|
""" |
|
|
|
|
|
def load_txt_from_url(url: str) -> Document: |
|
|
response = requests.get(url) |
|
|
if response.status_code == 200: |
|
|
text = response.text.strip() |
|
|
if not text: |
|
|
raise ValueError(f"TXT file at {url} is empty.") |
|
|
return Document(page_content=text, metadata={"source": url}) |
|
|
else: |
|
|
raise Exception(f"Failed to load {url} with status {response.status_code}") |
|
|
|
|
|
|
|
|
|
|
|
models = [ |
|
|
|
|
|
{"display": "🟦 GPT OSS 120b (Nebius)", "backend": "openai/gpt-oss-120b", "provider": "nebius"}, |
|
|
{"display": "🟦 GPT OSS 20b (Nebius)", "backend": "openai/gpt-oss-20b", "provider": "nebius"}, |
|
|
{"display": "🟦 Google Gemma 3 27b-Instruct (Nebius)", "backend": "google/gemma-3-27b-it", "provider": "nebius"}, |
|
|
{"display": "🟦 Kimi K2", "backend": "moonshotai/Kimi-K2-Instruct", "provider": "nebius"}, |
|
|
{"display": "🟦 DeepSeek-R1-0528 (Nebius)", "backend": "deepseek-ai/DeepSeek-R1-0528", "provider": "nebius"}, |
|
|
{"display": "🟦 DeepSeek-V3-0324 (Nebius)", "backend": "deepseek-ai/DeepSeek-V3-0324", "provider": "nebius"}, |
|
|
{"display": "🟦 DeepSeek-V3 (Nebius)", "backend": "deepseek-ai/DeepSeek-V3", "provider": "nebius"}, |
|
|
{"display": "🟦 DeepSeek-R1-Distill-Llama-70B (Nebius)", "backend": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", "provider": "nebius"}, |
|
|
{"display": "🟦 Meta-Llama-3.3-70B-Instruct (Nebius)", "backend": "meta-llama/Llama-3.3-70B-Instruct", "provider": "nebius"}, |
|
|
{"display": "🟦 Meta-Llama-3.1-8B-Instruct (Nebius)", "backend": "meta-llama/Meta-Llama-3.1-8B-Instruct", "provider": "nebius"}, |
|
|
{"display": "🟦 Meta-Llama-3.1-70B-Instruct (Nebius)", "backend": "meta-llama/Meta-Llama-3.1-70B-Instruct", "provider": "nebius"}, |
|
|
{"display": "🟦 Meta-Llama-3.1-405B-Instruct (Nebius)", "backend": "meta-llama/Meta-Llama-3.1-405B-Instruct", "provider": "nebius"}, |
|
|
{"display": "🟦 NVIDIA Llama-3_1-Nemotron-Ultra-253B-v1 (Nebius)", "backend": "nvidia/Llama-3_1-Nemotron-Ultra-253B-v1", "provider": "nebius"}, |
|
|
{"display": "🟦 NVIDIA Llama-3_3-Nemotron-Super-49B-v1 (Nebius)", "backend": "nvidia/Llama-3_3-Nemotron-Super-49B-v1", "provider": "nebius"}, |
|
|
{"display": "🟦 Mistral-Nemo-Instruct-2407 (Nebius)", "backend": "mistralai/Mistral-Nemo-Instruct-2407", "provider": "nebius"}, |
|
|
{"display": "🟦 Hermes 4 405B (Nebius)", "backend": "NousResearch/Hermes-4-405B", "provider": "nebius"}, |
|
|
{"display": "🟦 Hermes 4 70B (Nebius)", "backend": "NousResearch/Hermes-4-70B", "provider": "nebius"}, |
|
|
{"display": "🟦 GLM-4.5 (Nebius)", "backend": "zai-org/GLM-4.5", "provider": "nebius"}, |
|
|
{"display": "🟦 GLM-4.5 AIR (Nebius)", "backend": "zai-org/GLM-4.5-Air", "provider": "nebius"}, |
|
|
{"display": "🟦 Qwen3-235B-A22B (Nebius)", "backend": "Qwen/Qwen3-235B-A22B", "provider": "nebius"}, |
|
|
{"display": "🟦 Qwen3-30B-A3B (Nebius)", "backend": "Qwen/Qwen3-30B-A3B", "provider": "nebius"}, |
|
|
{"display": "🟦 Qwen3-32B (Nebius)", "backend": "Qwen/Qwen3-32B", "provider": "nebius"}, |
|
|
{"display": "🟦 Qwen3-14B (Nebius)", "backend": "Qwen/Qwen3-14B", "provider": "nebius"}, |
|
|
{"display": "🟦 Qwen3-4B-fast (Nebius)", "backend": "Qwen/Qwen3-4B-fast", "provider": "nebius"}, |
|
|
{"display": "🟦 QwQ-32B (Nebius)", "backend": "Qwen/QwQ-32B", "provider": "nebius"}, |
|
|
{"display": "🟦 Google Gemma-2-2b-it (Nebius)", "backend": "google/gemma-2-2b-it", "provider": "nebius"}, |
|
|
{"display": "🟦 Google Gemma-2-9b-it (Nebius)", "backend": "google/gemma-2-9b-it", "provider": "nebius"}, |
|
|
{"display": "🟦 Hermes-3-Llama-405B (Nebius)", "backend": "NousResearch/Hermes-3-Llama-405B", "provider": "nebius"}, |
|
|
{"display": "🟦 Llama3-OpenBioLLM-70B (Nebius, Medical)", "backend": "aaditya/Llama3-OpenBioLLM-70B", "provider": "nebius"}, |
|
|
{"display": "🟦 Qwen2.5-72B-Instruct (Nebius, Code)", "backend": "Qwen/Qwen2.5-72B-Instruct", "provider": "nebius"}, |
|
|
{"display": "🟦 Qwen2.5-Coder-7B (Nebius, Code)", "backend": "Qwen/Qwen2.5-Coder-7B", "provider": "nebius"}, |
|
|
{"display": "🟦 Qwen2.5-Coder-32B-Instruct (Nebius, Code)", "backend": "Qwen/Qwen2.5-Coder-32B-Instruct", "provider": "nebius"}, |
|
|
|
|
|
{"display": "🤗 Remote Meta-Llama-3 (HuggingFace)", "backend": "meta-llama/Meta-Llama-3-8B-Instruct", "provider": "hf_inference"}, |
|
|
{"display": "🤗 SciFive PubMed Classifier", "backend": "razent/SciFive-base-Pubmed_PMC", "provider": "hf_inference"}, |
|
|
{"display": "🤗 Tiny GPT-2 Classifier", "backend": "ydshieh/tiny-random-GPT2ForSequenceClassification", "provider": "hf_inference"}, |
|
|
{"display": "🤗 ArabianGPT QA (0.4B)", "backend": "gp-tar4/QA_FineTuned_ArabianGPT-03B", "provider": "hf_inference"}, |
|
|
{"display": "🤗 Tiny Mistral Classifier", "backend": "xshubhamx/tiny-mistral", "provider": "hf_inference"}, |
|
|
{"display": "🤗 Hallucination Scorer", "backend": "tcapelle/hallu_scorer", "provider": "hf_inference"}, |
|
|
{"display": "🇪🇺 Mistral-API (Mistral)", "backend": "mistral-small-latest", "provider": "mistral"}, |
|
|
|
|
|
{"display": "🇺🇸 GPT-3.5 (OpenAI)", "backend": "gpt-3.5-turbo", "provider": "openai"}, |
|
|
{"display": "🇺🇸 GPT-4o (OpenAI)", "backend": "gpt-4o", "provider": "openai"}, |
|
|
{"display": "🇺🇸 GPT-4o mini (OpenAI)", "backend": "gpt-4o-mini", "provider": "openai"}, |
|
|
{"display": "🇺🇸 o1-mini (OpenAI)", "backend": "o1-mini", "provider": "openai"}, |
|
|
{"display": "🇺🇸 o3-mini (OpenAI)", "backend": "o3-mini", "provider": "openai"}, |
|
|
|
|
|
{"display": "🦾 Grok 2 (xAI)", "backend": "grok-2", "provider": "grok"}, |
|
|
{"display": "🦾 Grok 3 (xAI)", "backend": "grok-3", "provider": "grok"}, |
|
|
|
|
|
{"display": "🟧 Sonnet 4 (Anthropic)", "backend": "sonnet-4", "provider": "anthropic"}, |
|
|
{"display": "🟧 Sonnet 3.7 (Anthropic)", "backend": "sonnet-3.7", "provider": "anthropic"}, |
|
|
|
|
|
{"display": "🔷 Gemini 2.5 Pro (Google)", "backend": "gemini-2.5-pro", "provider": "gemini"}, |
|
|
{"display": "🔷 Gemini 2.5 Flash (Google)", "backend": "gemini-2.5-flash", "provider": "gemini"}, |
|
|
{"display": "🔷 Gemini 2.5 Flash Lite Preview (Google)", "backend": "gemini-2.5-flash-lite-preview-06-17", "provider": "gemini"}, |
|
|
{"display": "🔷 Gemini 2.0 Flash (Google)", "backend": "gemini-2.0-flash", "provider": "gemini"}, |
|
|
{"display": "🔷 Gemini 2.0 Flash Preview Image Gen (Text+Image) (Google)", "backend": "gemini-2.0-flash-preview-image-generation", "provider": "gemini"}, |
|
|
{"display": "🔷 Gemini 2.0 Flash Lite (Google)", "backend": "gemini-2.0-flash-lite", "provider": "gemini"}, |
|
|
] |
|
|
|
|
|
|
|
|
model_display_options = [m["display"] for m in models] |
|
|
|
|
|
|
|
|
class ErrorLLM(LLM): |
|
|
@property |
|
|
def _llm_type(self) -> str: |
|
|
return "error_llm" |
|
|
def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str: |
|
|
return "Error: LLM pipeline could not be created. Please check your configuration and try again." |
|
|
@property |
|
|
def _identifying_params(self) -> dict: |
|
|
return {} |
|
|
|
|
|
class LocalLLM(LLM): |
|
|
@property |
|
|
def _llm_type(self) -> str: |
|
|
return "local_llm" |
|
|
def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str: |
|
|
return "Local LLM Fallback Response" |
|
|
@property |
|
|
def _identifying_params(self) -> dict: |
|
|
return {} |
|
|
|
|
|
|
|
|
class NebiusLLM(LLM): |
|
|
temperature: float = 0.5 |
|
|
top_p: float = 0.95 |
|
|
top_k: int = 50 |
|
|
max_tokens: int = 3000 |
|
|
model: str = "meta-llama/Meta-Llama-3.1-70B-Instruct" |
|
|
|
|
|
def __init__(self, model: str, temperature: float = 0.5, top_p: float = 0.95, top_k: int = 50, max_tokens: int = 3000, **kwargs: Any): |
|
|
try: |
|
|
from openai import OpenAI |
|
|
except ImportError: |
|
|
raise ImportError("openai package is required for NEBIUS models.") |
|
|
super().__init__(**kwargs) |
|
|
api_key = NEBIUS_API_KEY or os.environ.get("NEBIUS_API_KEY") |
|
|
if not api_key: |
|
|
raise ValueError("Please set the NEBIUS_API_KEY either in the code or as an environment variable.") |
|
|
self.model = model |
|
|
self.temperature = temperature |
|
|
self.top_p = top_p |
|
|
self.top_k = top_k |
|
|
self.max_tokens = max_tokens |
|
|
|
|
|
object.__setattr__(self, "_client", OpenAI(base_url="https://api.studio.nebius.com/v1/", api_key=api_key)) |
|
|
|
|
|
@property |
|
|
def _llm_type(self) -> str: |
|
|
return "nebius_llm" |
|
|
|
|
|
def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str: |
|
|
try: |
|
|
completion = self._client.chat.completions.create( |
|
|
model=self.model, |
|
|
messages=[{"role": "user", "content": prompt}], |
|
|
temperature=self.temperature, |
|
|
top_p=self.top_p, |
|
|
max_tokens=self.max_tokens |
|
|
) |
|
|
return completion.choices[0].message.content if hasattr(completion.choices[0].message, 'content') else str(completion.choices[0].message) |
|
|
except Exception as e: |
|
|
return f"Error from NEBIUS: {str(e)}" |
|
|
|
|
|
@property |
|
|
def _identifying_params(self) -> dict: |
|
|
return {"model": self.model, "temperature": self.temperature, "top_p": self.top_p} |
|
|
|
|
|
|
|
|
class OpenAILLM(LLM): |
|
|
temperature: float = 0.7 |
|
|
top_p: float = 0.95 |
|
|
top_k: int = 50 |
|
|
max_tokens: int = 3000 |
|
|
model: str = "gpt-3.5-turbo" |
|
|
|
|
|
def __init__(self, model: str, temperature: float = 0.7, top_p: float = 0.95, top_k: int = 50, max_tokens: int = 3000, **kwargs: Any): |
|
|
import openai |
|
|
super().__init__(**kwargs) |
|
|
self.model = model |
|
|
self.temperature = temperature |
|
|
self.top_p = top_p |
|
|
self.top_k = top_k |
|
|
self.max_tokens = max_tokens |
|
|
api_key = OPENAI_API_KEY or os.environ.get("OPENAI_API_KEY") |
|
|
if not api_key: |
|
|
raise ValueError("Please set the OPENAI_API_KEY either in the code or as an environment variable.") |
|
|
openai.api_key = api_key |
|
|
object.__setattr__(self, "_client", openai) |
|
|
|
|
|
@property |
|
|
def _llm_type(self) -> str: |
|
|
return "openai_llm" |
|
|
|
|
|
def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str: |
|
|
try: |
|
|
|
|
|
models_with_max_completion_tokens = ["o1-mini", "o3-mini", "gpt-4o", "gpt-4o-mini"] |
|
|
o1o3_models = ["o1-mini", "o3-mini"] |
|
|
|
|
|
model_param = {} |
|
|
if any(m in self.model for m in models_with_max_completion_tokens): |
|
|
model_param["max_completion_tokens"] = self.max_tokens |
|
|
else: |
|
|
model_param["max_tokens"] = self.max_tokens |
|
|
|
|
|
kwargs = { |
|
|
"model": self.model, |
|
|
"messages": [{"role": "user", "content": prompt}], |
|
|
**model_param |
|
|
} |
|
|
if any(m in self.model for m in o1o3_models): |
|
|
kwargs["temperature"] = 1 |
|
|
kwargs["top_p"] = 1 |
|
|
else: |
|
|
kwargs["temperature"] = self.temperature |
|
|
kwargs["top_p"] = self.top_p |
|
|
|
|
|
completion = self._client.chat.completions.create(**kwargs) |
|
|
return completion.choices[0].message.content if hasattr(completion.choices[0].message, 'content') else str(completion.choices[0].message) |
|
|
except Exception as e: |
|
|
return f"Error from OpenAI: {str(e)}" |
|
|
|
|
|
@property |
|
|
def _identifying_params(self) -> dict: |
|
|
return {"model": self.model, "temperature": self.temperature, "top_p": self.top_p} |
|
|
|
|
|
|
|
|
class HuggingFaceLLM(LLM): |
|
|
temperature: float = 0.5 |
|
|
top_p: float = 0.95 |
|
|
top_k: int = 50 |
|
|
max_tokens: int = 3000 |
|
|
model: str = "meta-llama/Meta-Llama-3-8B-Instruct" |
|
|
|
|
|
def __init__(self, model: str, temperature: float = 0.5, top_p: float = 0.95, top_k: int = 50, max_tokens: int = 3000, **kwargs: Any): |
|
|
from huggingface_hub import InferenceClient |
|
|
super().__init__(**kwargs) |
|
|
self.model = model |
|
|
self.temperature = temperature |
|
|
self.top_p = top_p |
|
|
self.top_k = top_k |
|
|
self.max_tokens = max_tokens |
|
|
hf_api_token = HF_API_TOKEN or os.environ.get("HF_API_TOKEN") |
|
|
if not hf_api_token: |
|
|
raise ValueError("Please set the HF_API_TOKEN either in the code or as an environment variable to use HuggingFace inference.") |
|
|
|
|
|
object.__setattr__(self, "_client", InferenceClient(token=hf_api_token, timeout=120)) |
|
|
|
|
|
@property |
|
|
def _llm_type(self) -> str: |
|
|
return "hf_llm" |
|
|
|
|
|
def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str: |
|
|
try: |
|
|
response = self._client.text_generation( |
|
|
prompt, |
|
|
model=self.model, |
|
|
temperature=self.temperature, |
|
|
top_p=self.top_p, |
|
|
max_new_tokens=self.max_tokens |
|
|
) |
|
|
return response |
|
|
except Exception as e: |
|
|
return f"Error from HuggingFace: {str(e)}" |
|
|
|
|
|
@property |
|
|
def _identifying_params(self) -> dict: |
|
|
return {"model": self.model, "temperature": self.temperature, "top_p": self.top_p} |
|
|
|
|
|
|
|
|
class MistralLLM(LLM): |
|
|
temperature: float = 0.7 |
|
|
top_p: float = 0.95 |
|
|
top_k: int = 50 |
|
|
max_tokens: int = 3000 |
|
|
model: str = "mistral-small-latest" |
|
|
client: Any = None |
|
|
|
|
|
def __init__(self, model: str, temperature: float = 0.7, top_p: float = 0.95, top_k: int = 50, max_tokens: int = 3000, **kwargs: Any): |
|
|
try: |
|
|
from mistralai import Mistral |
|
|
except ImportError as e: |
|
|
raise ImportError(f"mistralai package is required for Mistral models. Please install with: pip install mistralai. Error: {e}") |
|
|
except Exception as e: |
|
|
raise ImportError(f"Unexpected error importing mistralai: {e}") |
|
|
super().__init__(**kwargs) |
|
|
|
|
|
|
|
|
api_key = MISTRAL_API_KEY or os.environ.get("MISTRAL_API_KEY") |
|
|
if not api_key: |
|
|
debug_print("MISTRAL_API_KEY not found in code or environment variables") |
|
|
raise ValueError("Please set the MISTRAL_API_KEY either in the code or as an environment variable.") |
|
|
|
|
|
debug_print(f"Initializing MistralLLM with model: {model}, API key: {api_key[:8]}...") |
|
|
|
|
|
self.model = model |
|
|
self.temperature = temperature |
|
|
self.top_p = top_p |
|
|
self.top_k = top_k |
|
|
self.max_tokens = max_tokens |
|
|
|
|
|
try: |
|
|
|
|
|
self.client = Mistral(api_key=api_key) |
|
|
debug_print("Mistral client created successfully") |
|
|
except Exception as e: |
|
|
debug_print(f"Error creating Mistral client: {str(e)}") |
|
|
raise RuntimeError(f"Failed to create Mistral client: {str(e)}") |
|
|
|
|
|
@property |
|
|
def _llm_type(self) -> str: |
|
|
return "mistral_llm" |
|
|
|
|
|
def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str: |
|
|
try: |
|
|
debug_print(f"Mistral API call: model={self.model}, temp={self.temperature}, top_p={self.top_p}, top_k={self.top_k}, max_tokens={self.max_tokens}") |
|
|
response = self.client.chat.complete( |
|
|
model=self.model, |
|
|
messages=[{"role": "user", "content": prompt}], |
|
|
temperature=self.temperature, |
|
|
top_p=self.top_p, |
|
|
max_tokens=self.max_tokens |
|
|
) |
|
|
debug_print(f"Mistral API response received successfully") |
|
|
return response.choices[0].message.content |
|
|
except Exception as e: |
|
|
debug_print(f"Mistral API error: {str(e)}") |
|
|
return f"Error from Mistral: {str(e)}" |
|
|
|
|
|
@property |
|
|
def _identifying_params(self) -> dict: |
|
|
return {"model": self.model, "temperature": self.temperature, "top_p": self.top_p, "top_k": self.top_k, "max_tokens": self.max_tokens} |
|
|
|
|
|
|
|
|
class GeminiLLM(LLM): |
|
|
temperature: float = 0.7 |
|
|
top_p: float = 0.95 |
|
|
max_tokens: int = 3000 |
|
|
model: str = "gemini-2.5-flash" |
|
|
|
|
|
|
|
|
GEMINI_LIMITS = { |
|
|
"gemini-2.5-pro": {"rpm": 5, "rpd": 100}, |
|
|
"gemini-2.5-flash": {"rpm": 10, "rpd": 250}, |
|
|
"gemini-2.5-flash-lite-preview-06-17": {"rpm": 15, "rpd": 1000}, |
|
|
"gemini-2.0-flash": {"rpm": 15, "rpd": 200}, |
|
|
"gemini-2.0-flash-preview-image-generation": {"rpm": 15, "rpd": 200}, |
|
|
"gemini-2.0-flash-lite": {"rpm": 30, "rpd": 200}, |
|
|
} |
|
|
|
|
|
def __init__(self, model: str, temperature: float = 0.7, top_p: float = 0.95, max_tokens: int = 3000, **kwargs: Any): |
|
|
try: |
|
|
import google.generativeai as genai |
|
|
except ImportError: |
|
|
raise ImportError("google-generativeai package is required for Gemini models.") |
|
|
super().__init__(**kwargs) |
|
|
api_key = GEMINI_API_KEY or os.environ.get("GEMINI_API_KEY") |
|
|
if not api_key: |
|
|
raise ValueError("Please set the GEMINI_API_KEY either in the code or as an environment variable.") |
|
|
self.model = model |
|
|
self.temperature = temperature |
|
|
self.top_p = top_p |
|
|
self.max_tokens = max_tokens |
|
|
genai.configure(api_key=api_key) |
|
|
object.__setattr__(self, "_client", genai) |
|
|
object.__setattr__(self, "_rpm_limit", self.GEMINI_LIMITS.get(model, {}).get("rpm", None)) |
|
|
object.__setattr__(self, "_rpd_limit", self.GEMINI_LIMITS.get(model, {}).get("rpd", None)) |
|
|
object.__setattr__(self, "_last_request_time", 0) |
|
|
|
|
|
@property |
|
|
def _llm_type(self) -> str: |
|
|
return "gemini_llm" |
|
|
|
|
|
def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str: |
|
|
import time |
|
|
import re |
|
|
global GEMINI_LAST_REQUEST_TIME, GEMINI_DAILY_REQUESTS |
|
|
model = self._client.GenerativeModel(self.model) |
|
|
retries = 5 |
|
|
|
|
|
today_str = datetime.datetime.now().strftime('%Y-%m-%d') |
|
|
rpd_limit = object.__getattribute__(self, "_rpd_limit") if hasattr(self, "_rpd_limit") else None |
|
|
count_info = GEMINI_DAILY_REQUESTS.get(self.model, (today_str, 0)) |
|
|
if count_info[0] != today_str: |
|
|
|
|
|
GEMINI_DAILY_REQUESTS[self.model] = (today_str, 0) |
|
|
count_info = (today_str, 0) |
|
|
if rpd_limit is not None and count_info[1] >= rpd_limit: |
|
|
debug_print(f"Gemini: DAILY LIMIT REACHED for {self.model}: {count_info[1]}/{rpd_limit}") |
|
|
return f"Error from Gemini: Daily request limit reached for {self.model} ({rpd_limit} per day)" |
|
|
for attempt in range(retries): |
|
|
|
|
|
rpm_limit = object.__getattribute__(self, "_rpm_limit") if hasattr(self, "_rpm_limit") else None |
|
|
if rpm_limit: |
|
|
now = time.time() |
|
|
min_interval = 60.0 / rpm_limit |
|
|
last_time = GEMINI_LAST_REQUEST_TIME.get(self.model, 0) |
|
|
elapsed = now - last_time |
|
|
if elapsed < min_interval: |
|
|
sleep_time = min_interval - elapsed |
|
|
debug_print(f"Gemini: Sleeping {sleep_time:.2f}s to respect RPM limit for {self.model}") |
|
|
time.sleep(sleep_time) |
|
|
try: |
|
|
response = model.generate_content(prompt, generation_config={ |
|
|
"temperature": self.temperature, |
|
|
"top_p": self.top_p, |
|
|
"max_output_tokens": self.max_tokens |
|
|
}) |
|
|
now = time.time() |
|
|
GEMINI_LAST_REQUEST_TIME[self.model] = now |
|
|
object.__setattr__(self, "_last_request_time", now) |
|
|
|
|
|
count_info = GEMINI_DAILY_REQUESTS.get(self.model, (today_str, 0)) |
|
|
GEMINI_DAILY_REQUESTS[self.model] = (today_str, count_info[1] + 1) |
|
|
rpd_limit = object.__getattribute__(self, "_rpd_limit") if hasattr(self, "_rpd_limit") else None |
|
|
debug_print(f"Gemini: {self.model} daily usage: {GEMINI_DAILY_REQUESTS[self.model][1]}/{rpd_limit}") |
|
|
return response.text if hasattr(response, 'text') else str(response) |
|
|
except Exception as e: |
|
|
msg = str(e) |
|
|
debug_print(f"Gemini error: {msg}") |
|
|
|
|
|
if "429" in msg: |
|
|
retry_delay = None |
|
|
match = re.search(r'retry_delay\s*{\s*seconds:\s*(\d+)', msg) |
|
|
if match: |
|
|
retry_delay = int(match.group(1)) |
|
|
sleep_time = retry_delay + 2 |
|
|
debug_print(f"Gemini: 429 received, sleeping for retry_delay {retry_delay}s + 2s buffer (total {sleep_time}s)") |
|
|
time.sleep(sleep_time) |
|
|
continue |
|
|
|
|
|
elif 'retry_delay' in msg: |
|
|
debug_print(f"Gemini: 429 received, empty retry_delay, sleeping for 3s and retrying") |
|
|
time.sleep(3) |
|
|
continue |
|
|
else: |
|
|
debug_print(f"Gemini: 429 received, but no retry_delay found. Returning error.") |
|
|
return f"Error from Gemini: {msg}" |
|
|
|
|
|
return f"Error from Gemini: {msg}" |
|
|
|
|
|
@property |
|
|
def _identifying_params(self) -> dict: |
|
|
return {"model": self.model, "temperature": self.temperature, "top_p": self.top_p} |
|
|
|
|
|
|
|
|
class GrokLLM(LLM): |
|
|
temperature: float = 0.7 |
|
|
top_p: float = 0.95 |
|
|
max_tokens: int = 3000 |
|
|
model: str = "grok-2" |
|
|
|
|
|
def __init__(self, model: str, temperature: float = 0.7, top_p: float = 0.95, max_tokens: int = 3000, **kwargs: Any): |
|
|
import requests |
|
|
super().__init__(**kwargs) |
|
|
api_key = GROK_API_KEY or os.environ.get("GROK_API_KEY") |
|
|
if not api_key: |
|
|
raise ValueError("Please set the GROK_API_KEY either in the code or as an environment variable.") |
|
|
self.model = model |
|
|
self.temperature = temperature |
|
|
self.top_p = top_p |
|
|
self.max_tokens = max_tokens |
|
|
object.__setattr__(self, "_api_key", api_key) |
|
|
|
|
|
@property |
|
|
def _llm_type(self) -> str: |
|
|
return "grok_llm" |
|
|
|
|
|
def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str: |
|
|
import requests |
|
|
headers = { |
|
|
"Authorization": f"Bearer {self._api_key}", |
|
|
"Content-Type": "application/json" |
|
|
} |
|
|
data = { |
|
|
"model": self.model, |
|
|
"messages": [{"role": "user", "content": prompt}], |
|
|
"temperature": self.temperature, |
|
|
"top_p": self.top_p, |
|
|
"max_tokens": self.max_tokens |
|
|
} |
|
|
try: |
|
|
response = requests.post("https://api.x.ai/v1/chat/completions", headers=headers, json=data, timeout=60) |
|
|
response.raise_for_status() |
|
|
result = response.json() |
|
|
return result["choices"][0]["message"]["content"] |
|
|
except Exception as e: |
|
|
return f"Error from Grok: {str(e)}" |
|
|
|
|
|
@property |
|
|
def _identifying_params(self) -> dict: |
|
|
return {"model": self.model, "temperature": self.temperature, "top_p": self.top_p} |
|
|
|
|
|
|
|
|
class AnthropicLLM(LLM): |
|
|
temperature: float = 0.7 |
|
|
top_p: float = 0.95 |
|
|
max_tokens: int = 3000 |
|
|
model: str = "claude-sonnet-4-20250514" |
|
|
|
|
|
def __init__(self, model: str, temperature: float = 0.7, top_p: float = 0.95, max_tokens: int = 3000, **kwargs: Any): |
|
|
try: |
|
|
import anthropic |
|
|
except ImportError: |
|
|
raise ImportError("anthropic package is required for Anthropic models.") |
|
|
|
|
|
super().__init__(**kwargs) |
|
|
|
|
|
api_key = ANTHROPIC_API_KEY or os.environ.get("ANTHROPIC_API_KEY") |
|
|
if not api_key: |
|
|
raise ValueError("Please set the ANTHROPIC_API_KEY either in the code or as an environment variable.") |
|
|
|
|
|
|
|
|
model_map = { |
|
|
"sonnet-4": "claude-sonnet-4-20250514", |
|
|
"sonnet-3.7": "claude-3-7-sonnet-20250219", |
|
|
} |
|
|
self.model = model_map.get(model, model) |
|
|
self.temperature = temperature |
|
|
self.top_p = top_p |
|
|
self.max_tokens = max_tokens |
|
|
|
|
|
|
|
|
object.__setattr__(self, "_client", anthropic.Anthropic(api_key=api_key)) |
|
|
|
|
|
@property |
|
|
def _llm_type(self) -> str: |
|
|
return "anthropic_llm" |
|
|
|
|
|
def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str: |
|
|
try: |
|
|
response = self._client.messages.create( |
|
|
model=self.model, |
|
|
max_tokens=self.max_tokens, |
|
|
messages=[{"role": "user", "content": prompt}], |
|
|
temperature=self.temperature, |
|
|
top_p=self.top_p |
|
|
) |
|
|
|
|
|
|
|
|
if hasattr(response, 'content') and response.content: |
|
|
if isinstance(response.content, list): |
|
|
|
|
|
text_content = "" |
|
|
for content_block in response.content: |
|
|
if hasattr(content_block, 'text'): |
|
|
text_content += content_block.text |
|
|
elif isinstance(content_block, dict) and 'text' in content_block: |
|
|
text_content += content_block['text'] |
|
|
return text_content |
|
|
else: |
|
|
return str(response.content) |
|
|
|
|
|
return str(response) |
|
|
|
|
|
except Exception as e: |
|
|
return f"Error from Anthropic: {str(e)}" |
|
|
|
|
|
@property |
|
|
def _identifying_params(self) -> dict: |
|
|
return {"model": self.model, "temperature": self.temperature, "top_p": self.top_p} |
|
|
|
|
|
|
|
|
class SimpleLLMChain: |
|
|
def __init__(self, llm_choice: str = model_display_options[0], temperature: float = 0.5, top_p: float = 0.95, top_k: int = 50, max_tokens: int = 3000): |
|
|
self.llm_choice = llm_choice |
|
|
self.temperature = temperature |
|
|
self.top_p = top_p |
|
|
self.top_k = top_k |
|
|
self.max_tokens = max_tokens |
|
|
self.llm = self.create_llm_pipeline() |
|
|
self.conversation_history = [] |
|
|
|
|
|
def create_llm_pipeline(self): |
|
|
|
|
|
model_entry = next((m for m in models if m["display"] == self.llm_choice), None) |
|
|
if not model_entry: |
|
|
return ErrorLLM() |
|
|
provider = model_entry["provider"] |
|
|
backend = model_entry["backend"] |
|
|
try: |
|
|
if provider == "nebius": |
|
|
return NebiusLLM(model=backend, temperature=self.temperature, top_p=self.top_p, top_k=self.top_k, max_tokens=self.max_tokens) |
|
|
elif provider == "openai": |
|
|
return OpenAILLM(model=backend, temperature=self.temperature, top_p=self.top_p, top_k=self.top_k, max_tokens=self.max_tokens) |
|
|
elif provider == "hf_inference": |
|
|
return HuggingFaceLLM(model=backend, temperature=self.temperature, top_p=self.top_p, top_k=self.top_k, max_tokens=self.max_tokens) |
|
|
elif provider == "mistral": |
|
|
return MistralLLM(model=backend, temperature=self.temperature, top_p=self.top_p, top_k=self.top_k, max_tokens=self.max_tokens) |
|
|
elif provider == "gemini": |
|
|
return GeminiLLM(model=backend, temperature=self.temperature, top_p=self.top_p, max_tokens=self.max_tokens) |
|
|
elif provider == "grok": |
|
|
return GrokLLM(model=backend, temperature=self.temperature, top_p=self.top_p, max_tokens=self.max_tokens) |
|
|
elif provider == "anthropic": |
|
|
return AnthropicLLM(model=backend, temperature=self.temperature, top_p=self.top_p, max_tokens=self.max_tokens) |
|
|
else: |
|
|
return LocalLLM() |
|
|
except Exception as e: |
|
|
debug_print(f"Error creating LLM pipeline: {str(e)}") |
|
|
return ErrorLLM() |
|
|
|
|
|
def update_llm_pipeline(self, new_model_choice: str, temperature: float, top_p: float, top_k: int, max_tokens: int): |
|
|
self.llm_choice = new_model_choice |
|
|
self.temperature = temperature |
|
|
self.top_p = top_p |
|
|
self.top_k = top_k |
|
|
self.max_tokens = max_tokens |
|
|
self.llm = self.create_llm_pipeline() |
|
|
|
|
|
def submit_query(self, query: str) -> tuple: |
|
|
try: |
|
|
response = self.llm(query) |
|
|
|
|
|
self.conversation_history.append({"query": query, "response": response}) |
|
|
input_tokens = count_tokens(query) |
|
|
output_tokens = count_tokens(response) |
|
|
return (response, f"Input tokens: {input_tokens}", f"Output tokens: {output_tokens}") |
|
|
except Exception as e: |
|
|
return (f"Error processing query: {str(e)}", "Input tokens: 0", "Output tokens: 0") |
|
|
|
|
|
|
|
|
def submit_query_updated(query: str, model_choice: str = None, temperature: float = 0.5, top_p: float = 0.95, top_k: int = 50, max_tokens: int = 3000): |
|
|
"""Process a query with the specified model and parameters.""" |
|
|
debug_print(f"Processing query: {query}") |
|
|
if not query: |
|
|
debug_print("Empty query received") |
|
|
return "Please enter a non-empty query", "", "Input tokens: 0", "Output tokens: 0" |
|
|
|
|
|
try: |
|
|
global llm_chain |
|
|
if llm_chain is None: |
|
|
llm_chain = SimpleLLMChain( |
|
|
llm_choice=model_choice, |
|
|
temperature=temperature, |
|
|
top_p=top_p, |
|
|
top_k=top_k, |
|
|
max_tokens=max_tokens |
|
|
) |
|
|
elif llm_chain.llm_choice != model_choice: |
|
|
llm_chain.update_llm_pipeline(model_choice, temperature, top_p, top_k, max_tokens) |
|
|
|
|
|
response, input_tokens, output_tokens = llm_chain.submit_query(query) |
|
|
return response, "", input_tokens, output_tokens |
|
|
except Exception as e: |
|
|
debug_print(f"Error in submit_query_updated: {str(e)}") |
|
|
return f"Error: {str(e)}", "", "Input tokens: 0", "Output tokens: 0" |
|
|
|
|
|
|
|
|
def reset_app_updated(): |
|
|
global llm_chain |
|
|
llm_chain = None |
|
|
return "Application reset successfully" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
global rag_chain |
|
|
rag_chain = SimpleLLMChain() |
|
|
|
|
|
def load_pdfs_updated(file_links, model_choice, prompt_template, bm25_weight, temperature, top_p): |
|
|
debug_print("Inside load_pdfs function.") |
|
|
if not file_links: |
|
|
debug_print("Please enter non-empty URLs") |
|
|
return "Please enter non-empty URLs", "Word count: N/A", "Model used: N/A", "Context: N/A" |
|
|
try: |
|
|
links = [link.strip() for link in file_links.split("\n") if link.strip()] |
|
|
global rag_chain |
|
|
if rag_chain.raw_data: |
|
|
rag_chain.update_llm_pipeline(model_choice, temperature, top_p, prompt_template, bm25_weight) |
|
|
context_display = rag_chain.get_current_context() |
|
|
response_msg = f"Files already loaded. Chain updated with model: {model_choice}" |
|
|
return ( |
|
|
response_msg, |
|
|
f"Word count: {word_count(rag_chain.context)}", |
|
|
f"Model used: {rag_chain.llm_choice}", |
|
|
f"Context:\n{context_display}" |
|
|
) |
|
|
else: |
|
|
rag_chain = SimpleLLMChain( |
|
|
llm_choice=model_choice, |
|
|
temperature=temperature, |
|
|
top_p=top_p |
|
|
) |
|
|
rag_chain.add_pdfs_to_vectore_store(links) |
|
|
context_display = rag_chain.get_current_context() |
|
|
response_msg = f"Files loaded successfully. Using model: {model_choice}" |
|
|
return ( |
|
|
response_msg, |
|
|
f"Word count: {word_count(rag_chain.context)}", |
|
|
f"Model used: {rag_chain.llm_choice}", |
|
|
f"Context:\n{context_display}" |
|
|
) |
|
|
except Exception as e: |
|
|
error_msg = traceback.format_exc() |
|
|
debug_print("Could not load files. Error: " + error_msg) |
|
|
return ( |
|
|
"Error loading files: " + str(e), |
|
|
f"Word count: {word_count('')}", |
|
|
f"Model used: {rag_chain.llm_choice}", |
|
|
"Context: N/A" |
|
|
) |
|
|
|
|
|
def update_model(new_model: str): |
|
|
global rag_chain |
|
|
if rag_chain and rag_chain.raw_data: |
|
|
rag_chain.update_llm_pipeline(new_model, rag_chain.temperature, rag_chain.top_p, |
|
|
rag_chain.prompt_template, rag_chain.bm25_weight) |
|
|
debug_print(f"Model updated to {rag_chain.llm_choice}") |
|
|
return f"Model updated to: {rag_chain.llm_choice}" |
|
|
else: |
|
|
return "No files loaded; please load files first." |
|
|
|
|
|
|
|
|
|
|
|
def reset_app_updated(): |
|
|
global rag_chain |
|
|
rag_chain = SimpleLLMChain() |
|
|
debug_print("App reset successfully.") |
|
|
return ( |
|
|
"App reset successfully. You can now load new files", |
|
|
"", |
|
|
"Model used: Not selected" |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
error_patterns = [ |
|
|
r"error generating response:", |
|
|
r"api error occurred:", |
|
|
r"bad gateway", |
|
|
r"cloudflare", |
|
|
r"server disconnected without sending a response", |
|
|
r"getaddrinfo failed" |
|
|
] |
|
|
|
|
|
|
|
|
|
|
|
def run_batch_query(query, model1, temperature, top_p, top_k, max_tokens, num_runs, delay_ms, prefix=None): |
|
|
import re |
|
|
num_runs = int(num_runs) |
|
|
delay_ms = int(delay_ms) |
|
|
results = [] |
|
|
error_count = 0 |
|
|
token_counts = [] |
|
|
outputs = [] |
|
|
model_name = model1 |
|
|
|
|
|
def sanitize(s): |
|
|
return re.sub(r'[^A-Za-z0-9_-]+', '', str(s).replace(' ', '_')) |
|
|
safe_prefix = sanitize(prefix) if prefix else '' |
|
|
safe_model = sanitize(model_name) |
|
|
date_str = datetime.datetime.now().strftime('%Y%m%d_%H%M%S') |
|
|
for i in range(num_runs): |
|
|
attempt = 0 |
|
|
max_attempts = 5 |
|
|
while attempt < max_attempts: |
|
|
response, _, input_tokens, output_tokens = submit_query_updated(query, model1, temperature, top_p, top_k, max_tokens) |
|
|
output = response if isinstance(response, str) else str(response) |
|
|
if any(re.search(pat, output, re.IGNORECASE) for pat in error_patterns): |
|
|
error_count += 1 |
|
|
attempt += 1 |
|
|
time.sleep((delay_ms/1000.0) * (attempt+1)) |
|
|
continue |
|
|
else: |
|
|
break |
|
|
try: |
|
|
token_num = 0 |
|
|
if output_tokens is not None: |
|
|
try: |
|
|
last_token = output_tokens.split()[-1] if isinstance(output_tokens, str) else str(output_tokens) |
|
|
if last_token.isdigit(): |
|
|
token_num = int(last_token) |
|
|
except Exception as e: |
|
|
debug_print(f"Token count conversion failed for output_tokens={output_tokens}: {e}") |
|
|
else: |
|
|
token_num = 0 |
|
|
except Exception as e: |
|
|
debug_print(f"Token count conversion outer exception for output_tokens={output_tokens}: {e}") |
|
|
token_num = 0 |
|
|
token_counts.append(token_num) |
|
|
results.append({ |
|
|
'run': i+1, |
|
|
'output': output, |
|
|
'input_tokens': input_tokens, |
|
|
'output_tokens': output_tokens, |
|
|
'tokens': token_num, |
|
|
'error': attempt if attempt > 0 else 0 |
|
|
}) |
|
|
outputs.append(f"=== Query {i+1}/{num_runs} ===\nTokens: {token_num}\n{output}") |
|
|
time.sleep(delay_ms/1000.0) |
|
|
|
|
|
filename = f"{safe_prefix + '-' if safe_prefix else ''}{num_runs}_{safe_model}_{date_str}.csv" |
|
|
abs_csv_path = os.path.abspath(filename) |
|
|
with open(abs_csv_path, 'w', newline='', encoding='utf-8') as csvfile: |
|
|
writer = csv.writer(csvfile) |
|
|
writer.writerow(['Run', 'Output', 'Input Tokens', 'Output Tokens', 'Tokens', 'Error Retries']) |
|
|
for r in results: |
|
|
writer.writerow([r['run'], r['output'], r['input_tokens'], r['output_tokens'], r['tokens'], r['error']]) |
|
|
|
|
|
total_tokens = sum(token_counts) |
|
|
avg_tokens = statistics.mean(token_counts) if token_counts else 0 |
|
|
stdev_tokens = statistics.stdev(token_counts) if len(token_counts) > 1 else 0 |
|
|
stats = f"Total queries: {num_runs}\nTotal tokens: {total_tokens}\nAverage tokens: {avg_tokens:.2f}\nSTDEV tokens: {stdev_tokens:.2f}\nErrors encountered: {error_count}" |
|
|
output_text = f"Model: {model_name}\n\n" + '\n\n'.join(outputs) |
|
|
return output_text, abs_csv_path, stats |
|
|
|
|
|
|
|
|
|
|
|
def submit_batch_query_async(prefix, query, prompt_mode, model, temperature, top_p, top_k, max_tokens, num_runs, delay_ms): |
|
|
global last_job_id |
|
|
if not query: |
|
|
return ("Please enter a non-empty query", "", "", get_job_list()) |
|
|
job_id = str(uuid.uuid4()) |
|
|
debug_print(f"Starting async batch job {job_id} for batch query") |
|
|
threading.Thread( |
|
|
target=process_in_background, |
|
|
args=(job_id, process_batch_query_job, [job_id, prefix, query, "All at Once", model, temperature, top_p, top_k, max_tokens, num_runs, delay_ms]) |
|
|
).start() |
|
|
jobs[job_id] = { |
|
|
"status": "processing", |
|
|
"type": "batch_query", |
|
|
"start_time": time.time(), |
|
|
"query": query, |
|
|
"model": model, |
|
|
"params": { |
|
|
"prefix": prefix, |
|
|
"prompt_mode": prompt_mode, |
|
|
"temperature": temperature, |
|
|
"top_p": top_p, |
|
|
"top_k": top_k, |
|
|
"max_tokens": max_tokens, |
|
|
"num_runs": num_runs, |
|
|
"delay_ms": delay_ms |
|
|
} |
|
|
} |
|
|
last_job_id = job_id |
|
|
return ( |
|
|
f"Batch job submitted and processing in the background (Job ID: {job_id}).\n\nUse 'Check Job Status' tab with this ID to get results.", |
|
|
job_id, |
|
|
query, |
|
|
get_job_list() |
|
|
) |
|
|
|
|
|
def process_batch_query_job(job_id, prefix, query, prompt_mode, model, temperature, top_p, top_k, max_tokens, num_runs, delay_ms): |
|
|
import statistics |
|
|
import os |
|
|
num_runs = int(num_runs) |
|
|
delay_ms = int(delay_ms) |
|
|
results = [] |
|
|
error_count = 0 |
|
|
token_counts = [] |
|
|
outputs = [] |
|
|
model_name = model |
|
|
query_times = [] |
|
|
batch_start = time.time() |
|
|
|
|
|
def sanitize(s): |
|
|
import re |
|
|
return re.sub(r'[^A-Za-z0-9_-]+', '', str(s).replace(' ', '_')) |
|
|
safe_prefix = sanitize(prefix) if prefix else '' |
|
|
safe_model = sanitize(model_name) |
|
|
date_str = datetime.datetime.now().strftime('%Y%m%d_%H%M%S') |
|
|
if prompt_mode == "Sequential Prompts": |
|
|
|
|
|
lines = query.strip().split('\n') |
|
|
if len(lines) < 2: |
|
|
debug_print("Sequential mode requires at least 2 lines: instruction + at least one prompt") |
|
|
return "Error: Sequential mode requires at least 2 lines (instruction + prompts)", "", "", "" |
|
|
|
|
|
instruction = lines[0].strip() |
|
|
individual_prompts = [line.strip() for line in lines[1:] if line.strip()] |
|
|
|
|
|
debug_print(f"Sequential mode: instruction='{instruction}', {len(individual_prompts)} prompts") |
|
|
|
|
|
for i, prompt in enumerate(individual_prompts): |
|
|
|
|
|
for run_num in range(num_runs): |
|
|
|
|
|
full_prompt = f"{instruction}\n\n{prompt}" |
|
|
|
|
|
attempt = 0 |
|
|
max_attempts = 5 |
|
|
start = time.time() |
|
|
while attempt < max_attempts: |
|
|
response, _, input_tokens, output_tokens = submit_query_updated(full_prompt, model, temperature, top_p, top_k, max_tokens) |
|
|
output = response if isinstance(response, str) else str(response) |
|
|
if any(re.search(pat, output, re.IGNORECASE) for pat in error_patterns): |
|
|
error_count += 1 |
|
|
attempt += 1 |
|
|
time.sleep((delay_ms/1000.0) * (attempt+1)) |
|
|
continue |
|
|
else: |
|
|
break |
|
|
end = time.time() |
|
|
elapsed = end - start |
|
|
query_times.append(elapsed) |
|
|
|
|
|
try: |
|
|
token_num = 0 |
|
|
if output_tokens is not None: |
|
|
try: |
|
|
last_token = output_tokens.split()[-1] if isinstance(output_tokens, str) else str(output_tokens) |
|
|
if last_token.isdigit(): |
|
|
token_num = int(last_token) |
|
|
except Exception as e: |
|
|
debug_print(f"Token count conversion failed for output_tokens={output_tokens}: {e}") |
|
|
else: |
|
|
token_num = 0 |
|
|
except Exception as e: |
|
|
debug_print(f"Token count conversion outer exception for output_tokens={output_tokens}: {e}") |
|
|
token_num = 0 |
|
|
|
|
|
token_counts.append(token_num) |
|
|
results.append({ |
|
|
'prompt_number': i+1, |
|
|
'run': run_num+1, |
|
|
'input_prompt': prompt, |
|
|
'full_prompt': full_prompt, |
|
|
'output': output, |
|
|
'input_tokens': input_tokens, |
|
|
'output_tokens': output_tokens, |
|
|
'tokens': token_num, |
|
|
'error': attempt if attempt > 0 else 0, |
|
|
'time': elapsed |
|
|
}) |
|
|
outputs.append(f"=== Prompt {i+1}/{len(individual_prompts)} - Run {run_num+1}/{num_runs} ===\nInput: {prompt}\nTokens: {token_num}\nOutput: {output}") |
|
|
|
|
|
|
|
|
total_processed = i * num_runs + run_num + 1 |
|
|
total_to_process = len(individual_prompts) * num_runs |
|
|
jobs[job_id]["partial_results"] = { |
|
|
"num_done": total_processed, |
|
|
"total": total_to_process, |
|
|
"avg_time": statistics.mean(query_times) if query_times else 0, |
|
|
"stdev_time": statistics.stdev(query_times) if len(query_times) > 1 else 0, |
|
|
"total_tokens": sum(token_counts), |
|
|
"avg_tokens": statistics.mean(token_counts) if token_counts else 0, |
|
|
"stdev_tokens": statistics.stdev(token_counts) if len(token_counts) > 1 else 0, |
|
|
"errors": error_count, |
|
|
} |
|
|
time.sleep(delay_ms/1000.0) |
|
|
else: |
|
|
|
|
|
for i in range(num_runs): |
|
|
attempt = 0 |
|
|
max_attempts = 5 |
|
|
start = time.time() |
|
|
while attempt < max_attempts: |
|
|
response, _, input_tokens, output_tokens = submit_query_updated(query, model, temperature, top_p, top_k, max_tokens) |
|
|
output = response if isinstance(response, str) else str(response) |
|
|
if any(re.search(pat, output, re.IGNORECASE) for pat in error_patterns): |
|
|
error_count += 1 |
|
|
attempt += 1 |
|
|
time.sleep((delay_ms/1000.0) * (attempt+1)) |
|
|
continue |
|
|
else: |
|
|
break |
|
|
end = time.time() |
|
|
elapsed = end - start |
|
|
query_times.append(elapsed) |
|
|
try: |
|
|
token_num = 0 |
|
|
if output_tokens is not None: |
|
|
try: |
|
|
last_token = output_tokens.split()[-1] if isinstance(output_tokens, str) else str(output_tokens) |
|
|
if last_token.isdigit(): |
|
|
token_num = int(last_token) |
|
|
except Exception as e: |
|
|
debug_print(f"Token count conversion failed for output_tokens={output_tokens}: {e}") |
|
|
else: |
|
|
token_num = 0 |
|
|
except Exception as e: |
|
|
debug_print(f"Token count conversion outer exception for output_tokens={output_tokens}: {e}") |
|
|
token_num = 0 |
|
|
token_counts.append(token_num) |
|
|
results.append({ |
|
|
'run': i+1, |
|
|
'output': output, |
|
|
'input_tokens': input_tokens, |
|
|
'output_tokens': output_tokens, |
|
|
'tokens': token_num, |
|
|
'error': attempt if attempt > 0 else 0, |
|
|
'time': elapsed |
|
|
}) |
|
|
outputs.append(f"=== Query {i+1}/{num_runs} ===\nTokens: {token_num}\n{output}") |
|
|
|
|
|
jobs[job_id]["partial_results"] = { |
|
|
"num_done": i+1, |
|
|
"total": num_runs, |
|
|
"avg_time": statistics.mean(query_times) if query_times else 0, |
|
|
"stdev_time": statistics.stdev(query_times) if len(query_times) > 1 else 0, |
|
|
"total_tokens": sum(token_counts), |
|
|
"avg_tokens": statistics.mean(token_counts) if token_counts else 0, |
|
|
"stdev_tokens": statistics.stdev(token_counts) if len(token_counts) > 1 else 0, |
|
|
"errors": error_count, |
|
|
} |
|
|
time.sleep(delay_ms/1000.0) |
|
|
batch_end = time.time() |
|
|
total_time = batch_end - batch_start |
|
|
avg_time = statistics.mean(query_times) if query_times else 0 |
|
|
stdev_time = statistics.stdev(query_times) if len(query_times) > 1 else 0 |
|
|
|
|
|
if prompt_mode == "Sequential Prompts": |
|
|
filename = f"{safe_prefix + '-' if safe_prefix else ''}sequential-{safe_model}_{date_str}.csv" |
|
|
abs_csv_path = os.path.abspath(filename) |
|
|
with open(abs_csv_path, 'w', newline='', encoding='utf-8') as csvfile: |
|
|
writer = csv.writer(csvfile) |
|
|
writer.writerow(['Prompt Number', 'Run', 'Input Prompt', 'Full Prompt', 'Output', 'Input Tokens', 'Output Tokens', 'Tokens', 'Error Retries', 'Time (s)']) |
|
|
for r in results: |
|
|
writer.writerow([ |
|
|
r['prompt_number'], |
|
|
r['run'], |
|
|
r['input_prompt'], |
|
|
r['full_prompt'], |
|
|
r['output'], |
|
|
r['input_tokens'], |
|
|
r['output_tokens'], |
|
|
r['tokens'], |
|
|
r['error'], |
|
|
f"{r['time']:.3f}" |
|
|
]) |
|
|
else: |
|
|
filename = f"{safe_prefix + '-' if safe_prefix else ''}{num_runs}-{safe_model}_{date_str}.csv" |
|
|
abs_csv_path = os.path.abspath(filename) |
|
|
with open(abs_csv_path, 'w', newline='', encoding='utf-8') as csvfile: |
|
|
writer = csv.writer(csvfile) |
|
|
writer.writerow(['Run', 'Output', 'Input Tokens', 'Output Tokens', 'Tokens', 'Error Retries', 'Time (s)']) |
|
|
for r in results: |
|
|
writer.writerow([r['run'], r['output'], r['input_tokens'], r['output_tokens'], r['tokens'], r['error'], f"{r['time']:.3f}"]) |
|
|
|
|
|
txt_filename = f"{safe_prefix}-{num_runs}-{1}_LLMs_prompt_{date_str}.TXT" |
|
|
abs_txt_path = os.path.abspath(txt_filename) |
|
|
with open(abs_txt_path, 'w', encoding='utf-8') as txtfile: |
|
|
txtfile.write(query) |
|
|
|
|
|
total_tokens = sum(token_counts) |
|
|
avg_tokens = statistics.mean(token_counts) if token_counts else 0 |
|
|
stdev_tokens = statistics.stdev(token_counts) if len(token_counts) > 1 else 0 |
|
|
|
|
|
if prompt_mode == "Sequential Prompts": |
|
|
total_prompts = len(individual_prompts) |
|
|
total_runs = total_prompts * num_runs |
|
|
stats = ( |
|
|
f"Prompt mode: {prompt_mode}\n" |
|
|
f"Total prompts: {total_prompts}\n" |
|
|
f"Runs per prompt: {num_runs}\n" |
|
|
f"Total runs: {total_runs}\n" |
|
|
f"Total tokens: {total_tokens}\n" |
|
|
f"Average tokens: {avg_tokens:.2f}\n" |
|
|
f"STDEV tokens: {stdev_tokens:.2f}\n" |
|
|
f"Errors encountered: {error_count}\n" |
|
|
f"Total time elapsed: {total_time:.2f} s\n" |
|
|
f"Average time per run: {avg_time:.2f} s\n" |
|
|
f"STD time per run: {stdev_time:.2f} s" |
|
|
) |
|
|
else: |
|
|
stats = ( |
|
|
f"Prompt mode: {prompt_mode}\n" |
|
|
f"Total queries: {num_runs}\n" |
|
|
f"Total tokens: {total_tokens}\n" |
|
|
f"Average tokens: {avg_tokens:.2f}\n" |
|
|
f"STDEV tokens: {stdev_tokens:.2f}\n" |
|
|
f"Errors encountered: {error_count}\n" |
|
|
f"Total time elapsed: {total_time:.2f} s\n" |
|
|
f"Average time per query: {avg_time:.2f} s\n" |
|
|
f"STD time per query: {stdev_time:.2f} s" |
|
|
) |
|
|
|
|
|
output_text = f"Model: {model_name}\n\n" + '\n\n'.join(outputs) |
|
|
return output_text, abs_csv_path, stats, abs_txt_path |
|
|
|
|
|
def check_batch_job_status(job_id): |
|
|
|
|
|
try: |
|
|
while not results_queue.empty(): |
|
|
completed_id, result = results_queue.get_nowait() |
|
|
if completed_id in jobs: |
|
|
jobs[completed_id]["status"] = "completed" |
|
|
jobs[completed_id]["result"] = result |
|
|
jobs[completed_id]["end_time"] = time.time() |
|
|
debug_print(f"Job {completed_id} completed and stored in jobs dictionary") |
|
|
except queue.Empty: |
|
|
pass |
|
|
if job_id not in jobs: |
|
|
|
|
|
return ("Job not found. Please check the ID and try again.", "", "", "", "", "", "", "", "") |
|
|
job = jobs[job_id] |
|
|
|
|
|
if job.get("output_format") == "ZIP" and job.get("zip_job_ids"): |
|
|
all_done = all(jobs[jid]["status"] == "completed" for jid in job["zip_job_ids"]) |
|
|
if all_done and not job.get("zip_created"): |
|
|
|
|
|
csv_paths = [] |
|
|
txt_paths = [] |
|
|
for jid in job["zip_job_ids"]: |
|
|
result = jobs[jid]["result"] |
|
|
if isinstance(result, (list, tuple)) and len(result) > 1: |
|
|
csv_paths.append(result[1]) |
|
|
if isinstance(result, (list, tuple)) and len(result) > 3: |
|
|
txt_paths.append(result[3]) |
|
|
|
|
|
prefix = job.get("params", {}).get("prefix", "batch") |
|
|
num_runs = job.get("params", {}).get("num_runs", len(job["zip_job_ids"])) |
|
|
num_llms = len(job["zip_job_ids"]) |
|
|
date_str = datetime.datetime.now().strftime('%Y%m%d_%H%M%S') |
|
|
def sanitize(s): |
|
|
import re |
|
|
return re.sub(r'[^A-Za-z0-9_-]+', '', str(s).replace(' ', '_')) |
|
|
safe_prefix = sanitize(prefix) if prefix else 'batch' |
|
|
zip_name = f"{safe_prefix}-{num_runs}_{num_llms}_LLMs_{date_str}.zip" |
|
|
with zipfile.ZipFile(zip_name, 'w') as zipf: |
|
|
for csv_path in csv_paths: |
|
|
zipf.write(csv_path, os.path.basename(csv_path)) |
|
|
for txt_path in txt_paths: |
|
|
zipf.write(txt_path, os.path.basename(txt_path)) |
|
|
job["zip_created"] = True |
|
|
job["zip_path"] = os.path.abspath(zip_name) |
|
|
if job.get("zip_created"): |
|
|
return (f"ZIP archive created: {os.path.basename(job['zip_path'])}", job["zip_path"], "ZIP archive ready.", job.get("query", ""), "", "", "", "", "") |
|
|
else: |
|
|
|
|
|
|
|
|
num_total = len(job["zip_job_ids"]) |
|
|
num_done = sum(1 for jid in job["zip_job_ids"] if jobs[jid]["status"] == "completed") |
|
|
|
|
|
total_tokens = 0 |
|
|
errors = 0 |
|
|
for jid in job["zip_job_ids"]: |
|
|
j = jobs[jid] |
|
|
if j["status"] == "completed": |
|
|
result = j.get("result", ("", "", "")) |
|
|
stats = result[2] if len(result) > 2 else "" |
|
|
if stats: |
|
|
for line in stats.split('\n'): |
|
|
if line.lower().startswith("total tokens"): |
|
|
try: |
|
|
total_tokens += int(line.split(":",1)[1].strip()) |
|
|
except: pass |
|
|
if line.lower().startswith("errors encountered"): |
|
|
try: |
|
|
errors += int(line.split(":",1)[1].strip()) |
|
|
except: pass |
|
|
temp_stats = f"Batch ZIP job is being processed.\nJobs completed: {num_done} out of {num_total}\nTotal tokens so far: {total_tokens}\nErrors encountered: {errors}\n\nZIP will be created when all jobs are done." |
|
|
return (temp_stats, "", "", job.get("query", ""), "", "", "", "", "") |
|
|
if job["status"] == "processing": |
|
|
elapsed_time = time.time() - job["start_time"] |
|
|
|
|
|
temp_stats = f"Batch job is still being processed (elapsed: {elapsed_time:.1f}s).\n" |
|
|
|
|
|
if "partial_results" in job: |
|
|
partial = job["partial_results"] |
|
|
num_done = partial.get("num_done", 0) |
|
|
total = partial.get("total", "?") |
|
|
|
|
|
|
|
|
job_params = job.get("params", {}) |
|
|
prompt_mode = job_params.get("prompt_mode", "All at Once") |
|
|
num_runs = job_params.get("num_runs", "?") |
|
|
|
|
|
|
|
|
if prompt_mode == "Sequential Prompts" and total != "?" and num_runs != "?": |
|
|
|
|
|
num_prompts = total // num_runs if total != "?" and num_runs != "?" else "?" |
|
|
temp_stats += f"Progress: {num_done} out of {total} total runs\n" |
|
|
temp_stats += f"({num_prompts} prompts × {num_runs} runs each)\n" |
|
|
else: |
|
|
temp_stats += f"Queries run: {num_done} out of {total}\n" |
|
|
|
|
|
avg_time = partial.get("avg_time", None) |
|
|
stdev_time = partial.get("stdev_time", None) |
|
|
total_tokens = partial.get("total_tokens", None) |
|
|
avg_tokens = partial.get("avg_tokens", None) |
|
|
stdev_tokens = partial.get("stdev_tokens", None) |
|
|
errors = partial.get("errors", None) |
|
|
if avg_time is not None and stdev_time is not None: |
|
|
temp_stats += f"Average time per query: {avg_time}\nSTDEV time: {stdev_time}\n" |
|
|
if total_tokens is not None: |
|
|
temp_stats += f"Total tokens: {total_tokens}\n" |
|
|
if avg_tokens is not None: |
|
|
temp_stats += f"Average tokens: {avg_tokens}\n" |
|
|
if stdev_tokens is not None: |
|
|
temp_stats += f"STDEV tokens: {stdev_tokens}\n" |
|
|
if errors is not None: |
|
|
temp_stats += f"Errors encountered: {errors}\n" |
|
|
else: |
|
|
|
|
|
job_params = job.get("params", {}) |
|
|
prompt_mode = job_params.get("prompt_mode", "All at Once") |
|
|
num_runs = job_params.get("num_runs", "?") |
|
|
|
|
|
if prompt_mode == "Sequential Prompts": |
|
|
|
|
|
|
|
|
temp_stats += f"Starting sequential prompts processing...\n" |
|
|
temp_stats += f"Will run {num_runs} times per prompt\n" |
|
|
else: |
|
|
temp_stats += f"Starting batch processing...\n" |
|
|
temp_stats += f"Will run {num_runs} times\n" |
|
|
temp_stats += "\nTry checking again in a few seconds." |
|
|
return ( |
|
|
temp_stats, |
|
|
"", |
|
|
"", |
|
|
job.get("query", ""), |
|
|
"", |
|
|
"", |
|
|
"", |
|
|
"", |
|
|
"" |
|
|
) |
|
|
if job["status"] == "completed": |
|
|
result = job["result"] |
|
|
|
|
|
if isinstance(result, (list, tuple)): |
|
|
output_text, abs_csv_path, stats, abs_txt_path = result[:4] if len(result) >= 4 else (result + ("",) * (4 - len(result))) |
|
|
else: |
|
|
output_text, abs_csv_path, stats, abs_txt_path = result, "", "", "" |
|
|
|
|
|
stats_dict = {} |
|
|
stats_lines = stats.split('\n') if stats else [] |
|
|
for line in stats_lines: |
|
|
if ':' in line: |
|
|
k, v = line.split(':', 1) |
|
|
stats_dict[k.strip().lower()] = v.strip() |
|
|
|
|
|
elapsed = job.get("end_time", 0) - job.get("start_time", 0) |
|
|
|
|
|
total_queries = stats_dict.get("total queries", "?") |
|
|
|
|
|
avg_time = stats_dict.get("average time per query", None) |
|
|
stdev_time = stats_dict.get("std time per query", None) |
|
|
|
|
|
header = f"Elapsed time: {elapsed:.2f}s\n" |
|
|
header += f"Queries run: {total_queries} out of {total_queries}\n" if total_queries != "?" else "" |
|
|
if avg_time and stdev_time: |
|
|
header += f"Average time per query: {avg_time}\nSTDEV time: {stdev_time}\n" |
|
|
|
|
|
for k in ["total tokens", "average tokens", "stdev tokens", "errors encountered"]: |
|
|
if k in stats_dict: |
|
|
header += f"{k.title()}: {stats_dict[k]}\n" |
|
|
|
|
|
header += "\n---\n" |
|
|
|
|
|
return header + output_text, abs_csv_path, header + output_text, job.get("query", ""), "", "", "", "", "" |
|
|
|
|
|
return (f"Job status: {job['status']}", "", "", job.get("query", ""), "", "", "", "", "") |
|
|
|
|
|
|
|
|
|
|
|
def download_csv(csv_path): |
|
|
with open(csv_path, 'rb') as f: |
|
|
return f.read(), csv_path |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
custom_css = """ |
|
|
textarea { |
|
|
overflow-y: scroll !important; |
|
|
max-height: 200px; |
|
|
} |
|
|
""" |
|
|
|
|
|
|
|
|
def add_dots_and_reset(): |
|
|
if not hasattr(add_dots_and_reset, "dots"): |
|
|
add_dots_and_reset.dots = "" |
|
|
|
|
|
|
|
|
add_dots_and_reset.dots += "." |
|
|
|
|
|
|
|
|
if len(add_dots_and_reset.dots) > 5: |
|
|
add_dots_and_reset.dots = "" |
|
|
|
|
|
print(f"Current dots: {add_dots_and_reset.dots}") |
|
|
return add_dots_and_reset.dots |
|
|
|
|
|
|
|
|
def run_query(max_value): |
|
|
|
|
|
return [[i, i**2] for i in range(1, max_value + 1)] |
|
|
|
|
|
|
|
|
def periodic_update(is_checked): |
|
|
interval = 2 if is_checked else None |
|
|
debug_print(f"Auto-refresh checkbox is {'checked' if is_checked else 'unchecked'}, every={interval}") |
|
|
if is_checked: |
|
|
global last_job_id |
|
|
job_list_md = refresh_job_list() |
|
|
job_status = check_job_status(last_job_id) if last_job_id else ("No job ID available", "", "", "", "", "", "", "", "") |
|
|
query_results = run_query(10) |
|
|
|
|
|
model1_resp, model1_tok, model2_resp, model2_tok = update_model_responses_from_jobs() |
|
|
return job_list_md, job_status[0], query_results, "", model1_resp, model1_tok, model2_resp, model2_tok, "", "", "" |
|
|
else: |
|
|
|
|
|
return "", "", [], "", "", "", "", "", "", "", "" |
|
|
|
|
|
|
|
|
def get_interval(is_checked): |
|
|
return 2 if is_checked else None |
|
|
|
|
|
|
|
|
import glob |
|
|
|
|
|
def list_all_csv_files(): |
|
|
csv_files = sorted(glob.glob("*.csv"), key=os.path.getmtime, reverse=True) |
|
|
zip_files = sorted(glob.glob("*.zip"), key=os.path.getmtime, reverse=True) |
|
|
all_files = csv_files + zip_files |
|
|
if not all_files: |
|
|
return "No CSV or ZIP files found.", [], [] |
|
|
|
|
|
file_infos = [] |
|
|
for f in all_files: |
|
|
stat = os.stat(f) |
|
|
dt = datetime.datetime.fromtimestamp(stat.st_mtime).strftime('%Y-%m-%d %H:%M:%S') |
|
|
size_kb = stat.st_size / 1024 |
|
|
file_infos.append({ |
|
|
"name": os.path.basename(f), |
|
|
"path": os.path.abspath(f), |
|
|
"datetime": dt, |
|
|
"size_kb": f"{size_kb:.1f} KB" |
|
|
}) |
|
|
|
|
|
html_links = '<table><thead><tr><th>File</th><th>Date/Time</th><th>Size</th></tr></thead><tbody>' |
|
|
for info in file_infos: |
|
|
html_links += f'<tr><td><a href="/file={info["path"]}" download target="_blank">{info["name"]}</a></td>' \ |
|
|
f'<td>{info["datetime"]}</td><td>{info["size_kb"]}</td></tr>' |
|
|
html_links += '</tbody></table>' |
|
|
|
|
|
gradio_table = [[info["name"], info["datetime"], info["size_kb"]] for info in file_infos] |
|
|
return html_links, all_files, [os.path.abspath(f) for f in all_files], gradio_table |
|
|
|
|
|
|
|
|
with gr.Blocks(css=custom_css, js=""" |
|
|
document.addEventListener('DOMContentLoaded', function() { |
|
|
// Add event listener for job list clicks |
|
|
const jobListInterval = setInterval(() => { |
|
|
const jobLinks = document.querySelectorAll('.job-list-container a'); |
|
|
if (jobLinks.length > 0) { |
|
|
jobLinks.forEach(link => { |
|
|
link.addEventListener('click', function(e) { |
|
|
e.preventDefault(); |
|
|
const jobId = this.textContent.split(' ')[0]; |
|
|
// Find the job ID input textbox and set its value |
|
|
const jobIdInput = document.querySelector('.job-id-input input'); |
|
|
if (jobIdInput) { |
|
|
jobIdInput.value = jobId; |
|
|
// Trigger the input event to update Gradio's state |
|
|
jobIdInput.dispatchEvent(new Event('input', { bubbles: true })); |
|
|
} |
|
|
}); |
|
|
}); |
|
|
clearInterval(jobListInterval); |
|
|
} |
|
|
}, 500); |
|
|
}); |
|
|
""") as app: |
|
|
gr.Markdown('''# PsyLLM Interface |
|
|
**Model Selection & Parameters:** Choose from the following options: |
|
|
- 🟦 NEBIUS Models (DeepSeek, Llama, Mistral, Qwen, etc.) - various context windows |
|
|
- 🇺🇸 Remote Meta-Llama-3 (HuggingFace) - has context windows of 8000 tokens |
|
|
- 🇪🇺 Mistral-API (Mistral) - has context windows of 32000 tokens |
|
|
- 🇺�� OpenAI Models (GPT-3.5, GPT-4o, o1-mini, o3-mini) - various context windows |
|
|
|
|
|
**🔥 Randomness (Temperature):** Adjusts output predictability. |
|
|
- Example: 0.2 makes the output very deterministic (less creative), while 0.8 introduces more variety and spontaneity. |
|
|
|
|
|
**🎯 Word Variety (Top‑p):** Limits word choices to a set probability percentage. |
|
|
- Example: 0.5 restricts output to the most likely 50% of token choices for a focused answer; 0.95 allows almost all possibilities for more diverse responses. |
|
|
|
|
|
**📏 Max Tokens:** Maximum response length (up to 8192 tokens, default 3000) |
|
|
|
|
|
**⚠️ IMPORTANT: This app uses asynchronous processing to avoid timeout issues** |
|
|
- When you submit a query, you'll receive a Job ID |
|
|
- Use the "Check Job Status" tab to monitor and retrieve your results |
|
|
''') |
|
|
|
|
|
with gr.Tabs() as tabs: |
|
|
with gr.TabItem("Submit Query"): |
|
|
with gr.Row(): |
|
|
with gr.Column(scale=1): |
|
|
model1_dropdown = gr.Dropdown( |
|
|
choices=model_display_options, |
|
|
value=model_display_options[0], |
|
|
label="Model 1", |
|
|
interactive=True |
|
|
) |
|
|
model2_dropdown = gr.Dropdown( |
|
|
choices=model_display_options, |
|
|
value=model_display_options[1], |
|
|
label="Model 2", |
|
|
interactive=True |
|
|
) |
|
|
with gr.Column(scale=2): |
|
|
temperature_slider = gr.Slider( |
|
|
minimum=0.1, maximum=1.0, value=0.5, step=0.1, |
|
|
label="Randomness (Temperature)" |
|
|
) |
|
|
top_p_slider = gr.Slider( |
|
|
minimum=0.1, maximum=0.99, value=0.95, step=0.05, |
|
|
label="Word Variety (Top-p)" |
|
|
) |
|
|
top_k_slider = gr.Slider( |
|
|
minimum=1, maximum=100, value=50, step=1, |
|
|
label="Top-k (Number of tokens to consider)" |
|
|
) |
|
|
max_tokens_slider = gr.Slider( |
|
|
minimum=64, maximum=8192, value=3000, step=64, |
|
|
label="Max Tokens (Response length)" |
|
|
) |
|
|
with gr.Row(): |
|
|
query_input = gr.Textbox( |
|
|
label="Enter your query here", |
|
|
placeholder="Type your query", |
|
|
lines=4 |
|
|
) |
|
|
submit_button = gr.Button("Submit Query to Selected Models") |
|
|
with gr.Row(): |
|
|
with gr.Column(scale=1): |
|
|
gr.Markdown("### Model 1 Results") |
|
|
model1_response = gr.Textbox( |
|
|
label="Model 1 Response", |
|
|
placeholder="Response will appear here", |
|
|
lines=8 |
|
|
) |
|
|
model1_tokens = gr.Markdown("Input/Output tokens: 0/0") |
|
|
with gr.Column(scale=1): |
|
|
gr.Markdown("### Model 2 Results") |
|
|
model2_response = gr.Textbox( |
|
|
label="Model 2 Response", |
|
|
placeholder="Response will appear here", |
|
|
lines=8 |
|
|
) |
|
|
model2_tokens = gr.Markdown("Input/Output tokens: 0/0") |
|
|
|
|
|
with gr.TabItem("Check Job Status"): |
|
|
with gr.Row(): |
|
|
with gr.Column(scale=1): |
|
|
job_list = gr.Markdown( |
|
|
value="No jobs yet", |
|
|
label="Job List (Click to select)" |
|
|
) |
|
|
|
|
|
refresh_button = gr.Button("Refresh Job List") |
|
|
|
|
|
|
|
|
auto_refresh_checkbox = gr.Checkbox( |
|
|
label="Enable Auto Refresh", |
|
|
value=False |
|
|
) |
|
|
|
|
|
|
|
|
df = gr.DataFrame( |
|
|
value=run_query(10), |
|
|
headers=["Number", "Square"], |
|
|
label="Query Results", |
|
|
visible=False |
|
|
) |
|
|
|
|
|
refresh_csv_button = gr.Button("Refresh CSV Files") |
|
|
csv_download_html = gr.HTML(label="All CSV Download Links") |
|
|
csv_download_file = gr.File(label="All CSV Files", file_types=[".csv"], interactive=True, file_count="multiple") |
|
|
|
|
|
with gr.Column(scale=2): |
|
|
job_id_input = gr.Textbox( |
|
|
label="Job ID", |
|
|
placeholder="Job ID will appear here when selected from the list", |
|
|
lines=1 |
|
|
) |
|
|
job_query_display = gr.Textbox( |
|
|
label="Job Query", |
|
|
placeholder="The query associated with this job will appear here", |
|
|
lines=2, |
|
|
interactive=False |
|
|
) |
|
|
check_button = gr.Button("Check Status") |
|
|
cleanup_button = gr.Button("Cleanup Old Jobs") |
|
|
|
|
|
with gr.Row(): |
|
|
status_response = gr.Textbox( |
|
|
label="Job Result", |
|
|
placeholder="Job result will appear here", |
|
|
lines=8 |
|
|
) |
|
|
status_context = gr.Textbox( |
|
|
label="Context Information", |
|
|
placeholder="Context information will appear here", |
|
|
lines=6 |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
status_tokens1 = gr.Markdown("") |
|
|
status_tokens2 = gr.Markdown("") |
|
|
|
|
|
with gr.TabItem("Batch Query"): |
|
|
with gr.Row(): |
|
|
batch_prefix_input = gr.Textbox( |
|
|
label="CSV Filename Prefix (optional)", |
|
|
placeholder="Enter prefix for CSV filename (optional)", |
|
|
lines=1 |
|
|
) |
|
|
with gr.Row(): |
|
|
batch_query_input = gr.Textbox( |
|
|
label="Enter your query here", |
|
|
placeholder="For Sequential Prompts: First line = instruction, rest = individual prompts\nFor All at Once: Single query to repeat", |
|
|
lines=4 |
|
|
) |
|
|
with gr.Row(): |
|
|
batch_prompt_mode = gr.Radio( |
|
|
choices=["All at Once", "Sequential Prompts"], |
|
|
value="All at Once", |
|
|
label="Prompt Mode", |
|
|
interactive=True |
|
|
) |
|
|
with gr.Row(): |
|
|
batch_mode_help = gr.Markdown( |
|
|
"**All at Once**: Repeats the same query multiple times\n" |
|
|
"**Sequential Prompts**: First line is the instruction, subsequent lines are individual prompts to process sequentially" |
|
|
) |
|
|
with gr.Row(): |
|
|
batch_output_format = gr.Radio( |
|
|
choices=["CSV", "ZIP"], |
|
|
value="CSV", |
|
|
label="Batch Output Format (CSV or ZIP)", |
|
|
interactive=True |
|
|
) |
|
|
with gr.Row(): |
|
|
mistral_hf_checkbox = gr.CheckboxGroup( |
|
|
choices=[m["display"] for m in [m for m in models if m["provider"] in ("mistral", "hf_inference")]], |
|
|
label="Mistral & HuggingFace Models" |
|
|
) |
|
|
nebius_checkbox = gr.CheckboxGroup( |
|
|
choices=[m["display"] for m in [m for m in models if m["provider"] == "nebius"]], |
|
|
label="Nebius Models" |
|
|
) |
|
|
openai_checkbox = gr.CheckboxGroup( |
|
|
choices=[m["display"] for m in [m for m in models if m["provider"] in ("openai", "gemini", "grok", "anthropic")]], |
|
|
label="OpenAI / Gemini / Grok / Anthropic Models" |
|
|
) |
|
|
with gr.Row(): |
|
|
batch_temperature_slider = gr.Slider( |
|
|
minimum=0.1, maximum=1.0, value=0.5, step=0.1, |
|
|
label="Randomness (Temperature)" |
|
|
) |
|
|
batch_top_p_slider = gr.Slider( |
|
|
minimum=0.1, maximum=0.99, value=0.95, step=0.05, |
|
|
label="Word Variety (Top-p)" |
|
|
) |
|
|
batch_top_k_slider = gr.Slider( |
|
|
minimum=1, maximum=100, value=50, step=1, |
|
|
label="Top-k (Number of tokens to consider)" |
|
|
) |
|
|
batch_max_tokens_slider = gr.Slider( |
|
|
minimum=64, maximum=8192, value=3000, step=64, |
|
|
label="Max Tokens (Response length)" |
|
|
) |
|
|
with gr.Row(): |
|
|
batch_num_runs = gr.Dropdown( |
|
|
choices=[5, 25, 50, 75, 100], |
|
|
value=5, |
|
|
label="Number of runs" |
|
|
) |
|
|
batch_delay = gr.Dropdown( |
|
|
choices=[100, 200, 300, 400, 500], |
|
|
value=100, |
|
|
label="Delay between queries (ms)" |
|
|
) |
|
|
with gr.Row(): |
|
|
batch_submit_button = gr.Button("Run Batch Query") |
|
|
with gr.Row(): |
|
|
batch_outputs = gr.Textbox( |
|
|
label="Batch Outputs", |
|
|
lines=10 |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
batch_check_button = gr.Button("Check Batch Status") |
|
|
batch_auto_refresh = gr.Checkbox(label="Enable Auto Refresh", value=False) |
|
|
with gr.Row(): |
|
|
batch_stats = gr.Textbox( |
|
|
label="Job Results (Stats)", |
|
|
lines=3 |
|
|
) |
|
|
|
|
|
|
|
|
def serve_csv_links(path): |
|
|
import mimetypes |
|
|
if os.path.isfile(path): |
|
|
rel_path = os.path.relpath(path, ".") |
|
|
href = f"/file={rel_path}" |
|
|
|
|
|
mime = mimetypes.guess_type(path)[0] or 'text/csv' |
|
|
html_link = f'<ul><li><a href="{href}" download target="_blank" type="{mime}">{os.path.basename(path)}</a></li></ul>' |
|
|
return html_link, path |
|
|
else: |
|
|
return "❌ File not found", None |
|
|
|
|
|
|
|
|
with gr.Row(): |
|
|
batch_download_html = gr.HTML(label="Download Link") |
|
|
batch_download_file = gr.File(label="Download CSV", file_types=[".csv"]) |
|
|
batch_csv_path = gr.Textbox(label="CSV File Path", interactive=False) |
|
|
batch_job_id = gr.Textbox(label="Batch Job ID", interactive=False) |
|
|
batch_job_query = gr.Textbox(label="Batch Job Query", interactive=False) |
|
|
|
|
|
batch_job_list_headline = gr.Markdown("### Submitted Jobs", elem_id="batch-job-list-headline") |
|
|
batch_job_list = gr.Markdown(label="Batch Job List", value=get_job_list(), elem_id="batch-job-list") |
|
|
|
|
|
batch_refresh_job_list_button = gr.Button("Refresh Job List", elem_id="batch-refresh-job-list-btn") |
|
|
|
|
|
|
|
|
def batch_submit_multi_model(prefix, query, prompt_mode, output_format, mistral_hf, nebius, openai, temperature, top_p, top_k, max_tokens, num_runs, delay_ms): |
|
|
selected_models = (mistral_hf or []) + (nebius or []) + (openai or []) |
|
|
if not query: |
|
|
return ("Please enter a non-empty query", "", "", get_job_list()) |
|
|
if not selected_models: |
|
|
return ("Please select at least one model", "", "", get_job_list()) |
|
|
job_ids = [] |
|
|
csv_paths = [] |
|
|
for model_display in selected_models: |
|
|
|
|
|
model_entry = next((m for m in models if m["display"] == model_display), None) |
|
|
if not model_entry: |
|
|
continue |
|
|
model_backend = model_entry["display"] |
|
|
|
|
|
job_id = str(uuid.uuid4()) |
|
|
threading.Thread( |
|
|
target=process_in_background, |
|
|
args=(job_id, process_batch_query_job, [job_id, prefix, query, prompt_mode, model_backend, temperature, top_p, top_k, max_tokens, num_runs, delay_ms]) |
|
|
).start() |
|
|
jobs[job_id] = { |
|
|
"status": "processing", |
|
|
"type": "batch_query", |
|
|
"start_time": time.time(), |
|
|
"query": query, |
|
|
"model": model_display, |
|
|
"params": { |
|
|
"prefix": prefix, |
|
|
"prompt_mode": prompt_mode, |
|
|
"temperature": temperature, |
|
|
"top_p": top_p, |
|
|
"top_k": top_k, |
|
|
"max_tokens": max_tokens, |
|
|
"num_runs": num_runs, |
|
|
"delay_ms": delay_ms |
|
|
} |
|
|
} |
|
|
job_ids.append(job_id) |
|
|
last_job_id = job_ids[0] if job_ids else None |
|
|
|
|
|
if output_format == "ZIP" and job_ids: |
|
|
|
|
|
jobs[job_ids[0]]["zip_job_ids"] = job_ids |
|
|
jobs[job_ids[0]]["output_format"] = "ZIP" |
|
|
return ( |
|
|
f"Batch jobs submitted for {len(job_ids)} model(s). First Job ID: {last_job_id}.\nUse 'Check Job Status' tab to monitor results.", |
|
|
last_job_id, |
|
|
query, |
|
|
get_job_list() |
|
|
) |
|
|
|
|
|
batch_submit_button.click( |
|
|
batch_submit_multi_model, |
|
|
inputs=[ |
|
|
batch_prefix_input, |
|
|
batch_query_input, |
|
|
batch_prompt_mode, |
|
|
batch_output_format, |
|
|
mistral_hf_checkbox, |
|
|
nebius_checkbox, |
|
|
openai_checkbox, |
|
|
batch_temperature_slider, |
|
|
batch_top_p_slider, |
|
|
batch_top_k_slider, |
|
|
batch_max_tokens_slider, |
|
|
batch_num_runs, |
|
|
batch_delay |
|
|
], |
|
|
outputs=[ |
|
|
batch_outputs, |
|
|
batch_job_id, |
|
|
batch_job_query, |
|
|
batch_job_list |
|
|
] |
|
|
) |
|
|
|
|
|
|
|
|
def batch_check_status_and_advance(job_id): |
|
|
|
|
|
if not job_id or job_id not in jobs: |
|
|
|
|
|
return ("No valid job selected.", "", "", "", "", "", "", "", "") |
|
|
job = jobs[job_id] |
|
|
model_name = job.get("model", "Unknown Model") |
|
|
|
|
|
out_tuple = check_batch_job_status(job_id) |
|
|
|
|
|
if not isinstance(out_tuple, (list, tuple)): |
|
|
out_tuple = (out_tuple,) |
|
|
out_tuple = tuple(out_tuple) + ("",) * (9 - len(out_tuple)) |
|
|
out, csv_path, stats, query, *rest = out_tuple[:9] |
|
|
|
|
|
out = f"### Model: {model_name}\n\n" + out |
|
|
|
|
|
if job["status"] == "completed": |
|
|
|
|
|
running_jobs = [jid for jid, jinfo in jobs.items() |
|
|
if jinfo.get("type") == "batch_query" and jinfo.get("status") == "processing"] |
|
|
if running_jobs: |
|
|
next_id = running_jobs[0] |
|
|
next_job = jobs[next_id] |
|
|
return ( |
|
|
out, |
|
|
csv_path, |
|
|
stats, |
|
|
next_id, |
|
|
next_job.get("query", ""), "", "", "", "" |
|
|
) |
|
|
return (out, csv_path, stats, job_id, query, "", "", "", "") |
|
|
|
|
|
batch_check_button.click( |
|
|
batch_check_status_and_advance, |
|
|
inputs=[batch_job_id], |
|
|
outputs=[ |
|
|
batch_outputs, |
|
|
batch_csv_path, |
|
|
batch_stats, |
|
|
batch_job_id, |
|
|
batch_job_query |
|
|
] |
|
|
) |
|
|
|
|
|
|
|
|
batch_csv_path.change( |
|
|
fn=serve_csv_links, |
|
|
inputs=[batch_csv_path], |
|
|
outputs=[batch_download_html, batch_download_file] |
|
|
) |
|
|
|
|
|
|
|
|
def batch_periodic_update(is_checked): |
|
|
interval = 2 if is_checked else None |
|
|
if is_checked: |
|
|
global last_job_id |
|
|
job_list_md = refresh_job_list() |
|
|
job_status = check_batch_job_status(last_job_id) if last_job_id else ("No job ID available", "", "", "", "", "", "", "", "") |
|
|
return job_list_md, job_status[0], job_status[1], job_status[2], job_status[3], job_status[4], job_status[5], job_status[6], job_status[7], job_status[8] |
|
|
else: |
|
|
return "", "", "", "", "", "", "", "", "" |
|
|
|
|
|
batch_auto_refresh.change( |
|
|
fn=batch_periodic_update, |
|
|
inputs=[batch_auto_refresh], |
|
|
outputs=[batch_job_list, batch_outputs, batch_csv_path, batch_stats, batch_job_query], |
|
|
every=2 |
|
|
) |
|
|
|
|
|
|
|
|
refresh_csv_button_batch = gr.Button("Refresh CSV Files") |
|
|
csv_download_html_batch = gr.HTML(label="All CSV Download Links") |
|
|
csv_download_file_batch = gr.File(label="All CSV Files", file_types=[".csv"], interactive=True, file_count="multiple") |
|
|
|
|
|
with gr.TabItem("App Management"): |
|
|
with gr.Row(): |
|
|
reset_button = gr.Button("Reset App") |
|
|
|
|
|
with gr.Row(): |
|
|
reset_response = gr.Textbox( |
|
|
label="Reset Response", |
|
|
placeholder="Reset confirmation will appear here", |
|
|
lines=2 |
|
|
) |
|
|
reset_context = gr.Textbox( |
|
|
label="", |
|
|
placeholder="", |
|
|
lines=2, |
|
|
visible=False |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
reset_model = gr.Markdown("") |
|
|
|
|
|
|
|
|
submit_button.click( |
|
|
submit_query_async, |
|
|
inputs=[ |
|
|
query_input, |
|
|
model1_dropdown, |
|
|
model2_dropdown, |
|
|
temperature_slider, |
|
|
top_p_slider, |
|
|
top_k_slider, |
|
|
max_tokens_slider |
|
|
], |
|
|
outputs=[ |
|
|
model1_response, |
|
|
model1_tokens, |
|
|
model2_response, |
|
|
model2_tokens, |
|
|
job_id_input, |
|
|
job_query_display, |
|
|
job_list |
|
|
] |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
check_button.click( |
|
|
check_job_status, |
|
|
inputs=[job_id_input], |
|
|
outputs=[status_response, status_context, status_tokens1, status_tokens2, job_query_display, model1_response, model1_tokens, model2_response, model2_tokens] |
|
|
) |
|
|
|
|
|
refresh_button.click( |
|
|
refresh_job_list, |
|
|
inputs=[], |
|
|
outputs=[job_list] |
|
|
) |
|
|
|
|
|
job_id_input.change( |
|
|
job_selected, |
|
|
inputs=[job_id_input], |
|
|
outputs=[job_id_input, job_query_display] |
|
|
) |
|
|
|
|
|
cleanup_button.click( |
|
|
cleanup_old_jobs, |
|
|
inputs=[], |
|
|
outputs=[status_response, status_context, status_tokens1] |
|
|
) |
|
|
|
|
|
reset_button.click( |
|
|
reset_app_updated, |
|
|
inputs=[], |
|
|
outputs=[reset_response, reset_context, reset_model] |
|
|
) |
|
|
|
|
|
app.load( |
|
|
fn=refresh_job_list, |
|
|
inputs=None, |
|
|
outputs=job_list |
|
|
) |
|
|
|
|
|
auto_refresh_checkbox.change( |
|
|
fn=periodic_update, |
|
|
inputs=[auto_refresh_checkbox], |
|
|
outputs=[job_list, status_response, df, status_context, model1_response, model1_tokens, model2_response, model2_tokens], |
|
|
every=2 |
|
|
) |
|
|
|
|
|
|
|
|
def refresh_csv_files(): |
|
|
html_links, csv_files, abs_paths, gradio_table = list_all_csv_files() |
|
|
return html_links, abs_paths, gradio_table |
|
|
|
|
|
|
|
|
csv_file_info_df = gr.DataFrame(headers=["File Name", "Date/Time", "Size"], label="CSV File Info", interactive=False) |
|
|
csv_file_info_df_batch = gr.DataFrame(headers=["File Name", "Date/Time", "Size"], label="CSV File Info", interactive=False) |
|
|
|
|
|
refresh_csv_button.click( |
|
|
fn=refresh_csv_files, |
|
|
inputs=[], |
|
|
outputs=[csv_download_html, csv_download_file, csv_file_info_df] |
|
|
) |
|
|
refresh_csv_button_batch.click( |
|
|
fn=refresh_csv_files, |
|
|
inputs=[], |
|
|
outputs=[csv_download_html_batch, csv_download_file_batch, csv_file_info_df_batch] |
|
|
) |
|
|
|
|
|
|
|
|
batch_refresh_job_list_button.click( |
|
|
refresh_job_list, |
|
|
inputs=[], |
|
|
outputs=[batch_job_list] |
|
|
) |
|
|
|
|
|
|
|
|
global llm_chain |
|
|
llm_chain = None |
|
|
|
|
|
|
|
|
GEMINI_LAST_REQUEST_TIME = {} |
|
|
|
|
|
GEMINI_DAILY_REQUESTS = {} |
|
|
|
|
|
if __name__ == "__main__": |
|
|
debug_print("Launching Gradio interface.") |
|
|
app.queue().launch(share=False) |
|
|
|