Spaces:

NAMAA-Space
/

QARI-Competition

Running

App Files Files Community

QARI-Competition / app.py

oddadmix

Update app.py

a7ac470 verified 11 days ago

raw

history blame contribute delete

8.46 kB

	import gradio as gr
	import os
	import json
	from huggingface_hub import upload_file
	import pandas as pd
	from datasets import load_dataset

	HF_TOKEN = os.getenv("HF_TOKEN")
	SUBMISSIONS_REPO = "NAMAA-Space/ocr-competition-submissions"
	RESULTS_REPO = "NAMAA-Space/ocr-competition-results"

	def validate_fields(team_name, email, model_name, hf_model_id, hf_token, code):
	if not team_name or not email or not model_name or not hf_model_id or not hf_token or not code:
	return "All fields are required. Please fill in all fields."
	return submit(team_name, email, model_name, hf_model_id, hf_token, code)

	def submit(team_name, email, model_name, hf_model_id, hf_token, code):
	# entry = {
	# "team_name": team_name,
	# "email": email,
	# "model_name": model_name,
	# "hf_model_id": hf_model_id,
	# "hf_token": hf_token,
	# "code": code
	# }
	# filename = f"{team_name}_{model_name}_{pd.Timestamp.now().strftime('%Y%m%d%H%M%S')}.json"
	# filename = filename.replace("/", "-")
	# with open(filename, "w") as f:
	# json.dump(entry, f)
	# upload_file(path_or_fileobj=filename,
	# path_in_repo=filename,
	# repo_id=SUBMISSIONS_REPO,
	# repo_type="dataset",
	# token=HF_TOKEN)
	return "Submission is closed"

	def show_results():
	try:
	ds = load_dataset(RESULTS_REPO, split="train")
	df = ds.to_pandas()[["team_name", "model_name", "WER", "CER", "BLEU"]]

	# Calculate composite score
	# Formula: (100 - WER) × 0.35 + (100 - CER) × 0.35 + BLEU × 0.30
	df['Score'] = (100 - df['WER']) * 0.35 + (100 - df['CER']) * 0.35 + df['BLEU'] * 0.30

	# Round score to 2 decimal places
	df['Score'] = df['Score'].round(2)

	# Reorder columns to show Score first
	df = df[['Score', 'team_name', 'model_name', 'WER', 'CER', 'BLEU']]

	# Sort by Score (descending - highest is best)
	df = df.sort_values(by='Score', ascending=False).reset_index(drop=True)

	# Add rank column
	df.insert(0, 'Rank', range(1, len(df) + 1))

	return df

	except Exception as e:
	return f"An error occurred while loading the results: {e}"

	with gr.Blocks() as demo:
	# Welcome message
	gr.Markdown("""
	<h2 style="font-size:28px;">👋 Welcome to the VLM OCR Competition!</h2>
	<p style="font-size:18px;">
	This competition aims to improve open-source Arabic OCR models.
	It's part of the NAMAA Community mission to strengthen the Arabic presence in the ML space.
	This competition is designed to push the boundaries of OCR performance on diverse Arabic documents.
	</p>
	""")

	with gr.Tabs():
	with gr.Tab("📜 Rules"):
	# Text instructions
	gr.Markdown("""
	<h3 style="font-size:22px;">QARI OCR Competition Rules</h3>
	<p style="font-size:18px;">
	Welcome to the <b>QARI OCR Competition</b> organized by the <b>NAMAA Community</b> and sponsored by <b>KANDCA</b>!
	The competition runs from <b>September 15 to October 15</b>.
	Join the <a href="https://discord.gg/GDTpeHZt" target="_blank">Discord server</a> for support and discussion.
	Full rules and submission portal: <a href="https://huggingface.co/spaces/NAMAA-Space/QARI-Competition" target="_blank">Hugging Face Space</a>.
	</p>

	<h4 style="font-size:20px;">📜 Submission Rules</h4>
	<ul style="font-size:18px;">
	<li>Each team can submit <b>one model evaluation per week</b>.</li>
	<li>Provide with your submission:
	<ol>
	<li>Team name (must stay consistent across submissions)</li>
	<li>Model name & Hugging Face Model ID</li>
	<li>A valid Hugging Face token with access</li>
	<li><b>The inference code</b> and any <b>dependency installation instructions</b></li>
	<li><b>The OCR output must be a single-page structured HTML</b> using the following tags:
	<ul>
	<li><header>, <footer>, <main>, <section id="1">, <section id="2"></li>
	<li><p>, <h1>-<h5>, <b>, <i>, <u></li>
	<li><img>, <table>, <hr>, <ul>, <ol></li>
	</ul>
	</li>
	<li><b>Submitting only unstructured output will result in a 5-point deduction</b> from your final score.</li>
	</ol>
	</li>
	<li><b>The submitted code is the responsibility of the submitting team.</b></li>
	<li>Ideally, provide a working <b>Google Colab link</b> with all details and dependencies.</li>
	</ul>

	<h4 style="font-size:20px;">📆 Evaluation Schedule</h4>
	<ul style="font-size:18px;">
	<li>Submissions received by <strong>Sunday at midnight</strong> will be evaluated on <strong>Monday</strong>.</li>
	<li>The leaderboard will be updated by <strong>Wednesday or Thursday</strong> of the same week.</li>
	</ul>
	""")


	with gr.Tab("🎁 Prizes"):
	gr.Markdown("""
	<h3 style="font-size:22px;">Prize Distribution ((bank transfer or API credits))</h3>
	<ul style="font-size:18px;">
	<li>1st Place: 🥇 250 USD</li>
	<li>2nd Place: 🥈 125 USD</li>
	<li>3rd Place: 🥉 75 USD</li>
	<li>4th Place: 🎖️ 50 USD</li>
	<li>5th Place: 🎖️ 25 USD</li>
	</ul>
	""")

	with gr.Tab("📊 Evaluation"):
	gr.Markdown("""
	<h3 style="font-size:22px;">Evaluation Details</h3>
	<ul style="font-size:18px;">
	<li>The evaluation dataset will remain <b>private</b> and is not shared with participants.</li>
	<li>It will include:
	<ul>
	<li>Historical documents</li>
	<li>Scanned pages</li>
	<li>Different layouts</li>
	<li>Handwritten pages</li>
	</ul>
	</li>
	<li>Models will be evaluated on <b>accuracy metrics</b> such as:
	<ul>
	<li>Word Error Rate (WER)</li>
	<li>Character Error Rate (CER)</li>
	<li>BLEU score</li>
	</ul>
	</li>
	<li><b>Evaluation schedule:</b>
	<ul>
	<li>Submissions received by <b>Sunday at midnight</b> will be evaluated on <b>Monday</b>.</li>
	<li>The leaderboard will be updated by <b>Wednesday or Thursday</b> of the same week.</li>
	</ul>
	</li>
	</ul>
	""")

	with gr.Tab("🚀 Submit & Leaderboard"):
	gr.Markdown("<h3 style='font-size:22px;'>Submit Your Model</h3>")
	with gr.Row():
	team = gr.Textbox(label="Team Name", placeholder="Enter your team name")
	email = gr.Textbox(label="Email", placeholder="Enter your email")
	model = gr.Textbox(label="Model Name", placeholder="Enter your model name")
	hf_model = gr.Textbox(label="Hugging Face Model ID", placeholder="Enter your HF Model ID")
	hf_token = gr.Textbox(label="Hugging Face Access Token", type="password", placeholder="Enter your HF token")
	code = gr.Textbox(label="Code (instructions to run your model) or colab link", lines=6, placeholder="Paste your run code here...")

	submit_btn = gr.Button("Submit")
	status = gr.Textbox(label="Status")

	submit_btn.click(fn=validate_fields,
	inputs=[team, email, model, hf_model, hf_token, code],
	outputs=status)

	gr.Markdown("<h3 style='font-size:22px;'>Leaderboard Results</h3>")
	results = gr.Dataframe(headers=["model_name", "WER", "CER", "BLEU", "team_name"])
	demo.load(fn=show_results, outputs=results)

	demo.launch()