Spaces:

PabloTJ
/

palindroms

Running

App Files Files Community

palindroms / app.py

PabloTJ

Update app.py

0dfed7a verified 7 months ago

raw

history blame contribute delete

5.47 kB

	import gradio as gr
	from transformers import pipeline, set_seed
	import re
	import numpy as np
	import pandas as pd
	import os
	import torch

	# Check GPU availability (for debugging)
	print("CUDA available:", torch.cuda.is_available())
	if torch.cuda.is_available():
	print("GPU Name:", torch.cuda.get_device_name(0))
	else:
	print("No GPU detected. Running on CPU.")

	# Set seed for reproducibility
	set_seed(42)

	# Define the six premium generation models:
	premium_models = [
	"Qwen/Qwen2.5-Omni-7B",
	"Qwen/Qwen2.5-VL-7B-Instruct",
	"deepseek-ai/Janus-Pro-7B",
	"meta-llama/Llama-2-7b-hf",
	"Alibaba-NLP/gte-Qwen2-7B-instruct",
	"HuggingFaceH4/zephyr-7b-beta"
	]

	# Define five languages: English, German, Spanish, French, Portuguese.
	languages = {
	"en": "English",
	"de": "German",
	"es": "Spanish",
	"fr": "French",
	"pt": "Portuguese"
	}

	# Define two cost-effective grammar evaluation models:
	grammar_model_names = [
	"vennify/t5-base-grammar-correction",
	"hassaanik/grammar-correction-model"
	]

	# Determine device: Use GPU (0) if available, otherwise CPU (-1)
	device = 0 if torch.cuda.is_available() else -1

	# Function to load generation pipelines with appropriate device setting.
	def load_generation_pipeline(model_name):
	try:
	return pipeline("text-generation", model=model_name, device=device)
	except Exception as e:
	print(f"Error loading generation model {model_name}: {e}")
	return None

	# Function to load grammar evaluation pipelines with appropriate device setting.
	def load_grammar_pipeline(model_name):
	try:
	return pipeline("text2text-generation", model=model_name, device=device)
	except Exception as e:
	print(f"Error loading grammar model {model_name}: {e}")
	return None

	# Pre-load grammar evaluators.
	rater_models = []
	for model_name in grammar_model_names:
	p = load_grammar_pipeline(model_name)
	if p is not None:
	rater_models.append(p)

	def clean_text(text):
	return re.sub(r'[^a-zA-Z0-9]', '', text.lower())

	def is_palindrome(text):
	cleaned = clean_text(text)
	return cleaned == cleaned[::-1]

	# Updated prompt instructs the model to output only the palindrome.
	def build_prompt(lang):
	return (
	f"Instruction: Generate a single original palindrome in {lang}.\n"
	"Output only the palindrome. The palindrome should be a continuous text that reads the same forward and backward.\n"
	"Do not output any additional text or commentary.\n"
	"Palindrome: "
	)

	def grammar_prompt(pal, lang):
	return (
	f"Rate from 0 to 100 how grammatically correct this palindrome is in {lang}. "
	"Return only a number with no explanation.\n\n"
	f'"{pal}"\n'
	)

	def extract_score(text):
	match = re.search(r"\d{1,3}", text)
	if match:
	score = int(match.group())
	return min(max(score, 0), 100)
	return 0

	# Main benchmark function that runs tests and saves CSV results.
	def run_benchmark_all():
	results = []
	for model_name in premium_models:
	gen_pipeline = load_generation_pipeline(model_name)
	if gen_pipeline is None:
	continue
	for code, lang in languages.items():
	prompt = build_prompt(lang)
	try:
	gen_output = gen_pipeline(prompt, max_new_tokens=100, do_sample=True)[0]['generated_text'].strip()
	except Exception as e:
	gen_output = f"Error generating text: {e}"
	valid = is_palindrome(gen_output)
	cleaned_len = len(clean_text(gen_output))

	scores = []
	for rater in rater_models:
	rprompt = grammar_prompt(gen_output, lang)
	try:
	rtext = rater(rprompt, max_new_tokens=10)[0]['generated_text']
	score = extract_score(rtext)
	scores.append(score)
	except Exception as e:
	scores.append(0)
	avg_score = np.mean(scores) if scores else 0
	penalty = (avg_score / 100) if valid else (avg_score / 100) * 0.5
	final_score = round(cleaned_len * penalty, 2)

	results.append({
	"Model": model_name,
	"Language": lang,
	"Palindrome": gen_output,
	"Valid": "✅" if valid else "❌",
	"Length": cleaned_len,
	"Grammar Score": avg_score,
	"Final Score": final_score
	})

	df = pd.DataFrame(results).sort_values(by="Final Score", ascending=False).reset_index(drop=True)
	csv_path = "benchmark_results.csv"
	df.to_csv(csv_path, index=False)
	print(f"CSV saved to {os.path.abspath(csv_path)}")
	return gr.Dataframe(df), csv_path

	# Build the Gradio UI using Blocks for a canvas layout.
	with gr.Blocks(title="Premium Model Palindrome Benchmark") as demo:
	gr.Markdown("# Premium Model Palindrome Benchmark")
	gr.Markdown(
	"This benchmark runs automatically over 6 premium text-generation models across 5 languages and saves the results "
	"to a CSV file upon completion."
	)
	with gr.Row():
	run_button = gr.Button("Run All Benchmarks")
	output_table = gr.Dataframe(label="Benchmark Results")
	output_file = gr.File(label="Download CSV Results")
	run_button.click(fn=run_benchmark_all, inputs=[], outputs=[output_table, output_file])

	demo.launch()