Spaces:

duckdb-nsql-hub
/

DuckDB-SQL-Eval

Sleeping

App Files Files Community

DuckDB-SQL-Eval / app.py

tdoehmen

just use openrouter

f9d0ccd about 1 year ago

raw

history blame

2.84 kB

	import gradio as gr
	import subprocess
	import os
	import re
	from datetime import datetime

	def run_evaluation(model_name):
	results = []

	# Use the secret OpenRouter API key from the Hugging Face space
	if "OPENROUTER_API_KEY" not in os.environ:
	return "Error: OPENROUTER_API_KEY not found in environment variables."

	try:
	# Set up environment
	env = os.environ.copy()
	env["OPENROUTER_API_KEY"] = os.environ["OPENROUTER_API_KEY"]

	# Run inference
	current_date = datetime.now().strftime("%Y%m%d")
	inference_cmd = f"""
	cd duckdb-nsql/ &&
	python eval/predict.py \
	predict \
	eval/data/dev.json \
	eval/data/tables.json \
	--output-dir output/ \
	--stop-tokens ';' \
	--max-tokens 30000 \
	--overwrite-manifest \
	--manifest-client openrouter \
	--manifest-engine {model_name} \
	--prompt-format duckdbinstgraniteshort
	"""
	inference_result = subprocess.run(inference_cmd, shell=True, check=True, capture_output=True, text=True, env=env)
	results.append("Inference completed.")

	# Extract JSON file path from inference output
	json_path_match = re.search(r'(.*\.json)', inference_result.stdout)
	if not json_path_match:
	raise ValueError("Could not find JSON file path in inference output")
	json_file = os.path.basename(json_path_match.group(1))
	results.append(f"Generated JSON file: {json_file}")

	# Run evaluation
	eval_cmd = f"""
	cd duckdb-nsql/ &&
	python eval/evaluate.py evaluate \
	--gold eval/data/dev.json \
	--db eval/data/databases/ \
	--tables eval/data/tables.json \
	--output-dir output/ \
	--pred output/{json_file}
	"""
	eval_result = subprocess.run(eval_cmd, shell=True, check=True, capture_output=True, text=True)

	# Extract and format metrics from eval output
	metrics = eval_result.stdout
	if metrics:
	results.append(f"Evaluation completed:\n{metrics}")
	else:
	results.append("Evaluation completed, but couldn't get metrics.")

	except subprocess.CalledProcessError as e:
	results.append(f"Error occurred: {str(e)}")
	results.append(f"Command output: {e.output}")
	except Exception as e:
	results.append(f"An unexpected error occurred: {str(e)}")

	return "\n\n".join(results)

	with gr.Blocks() as demo:
	gr.Markdown("# DuckDB SQL Evaluation App (OpenRouter)")

	model_name = gr.Textbox(label="Model Name (e.g., qwen/qwen-2.5-72b-instruct)")
	start_btn = gr.Button("Start Evaluation")
	output = gr.Textbox(label="Output", lines=20)

	start_btn.click(fn=run_evaluation, inputs=[model_name], outputs=output)

	demo.launch()