import gradio as gr
import os
import json
from huggingface_hub import upload_file
import pandas as pd
from datasets import load_dataset
HF_TOKEN = os.getenv("HF_TOKEN")
SUBMISSIONS_REPO = "NAMAA-Space/ocr-competition-submissions"
RESULTS_REPO = "NAMAA-Space/ocr-competition-results"
def validate_fields(team_name, email, model_name, hf_model_id, hf_token, code):
if not team_name or not email or not model_name or not hf_model_id or not hf_token or not code:
return "All fields are required. Please fill in all fields."
return submit(team_name, email, model_name, hf_model_id, hf_token, code)
def submit(team_name, email, model_name, hf_model_id, hf_token, code):
# entry = {
# "team_name": team_name,
# "email": email,
# "model_name": model_name,
# "hf_model_id": hf_model_id,
# "hf_token": hf_token,
# "code": code
# }
# filename = f"{team_name}_{model_name}_{pd.Timestamp.now().strftime('%Y%m%d%H%M%S')}.json"
# filename = filename.replace("/", "-")
# with open(filename, "w") as f:
# json.dump(entry, f)
# upload_file(path_or_fileobj=filename,
# path_in_repo=filename,
# repo_id=SUBMISSIONS_REPO,
# repo_type="dataset",
# token=HF_TOKEN)
return "Submission is closed"
def show_results():
try:
ds = load_dataset(RESULTS_REPO, split="train")
df = ds.to_pandas()[["team_name", "model_name", "WER", "CER", "BLEU"]]
# Calculate composite score
# Formula: (100 - WER) × 0.35 + (100 - CER) × 0.35 + BLEU × 0.30
df['Score'] = (100 - df['WER']) * 0.35 + (100 - df['CER']) * 0.35 + df['BLEU'] * 0.30
# Round score to 2 decimal places
df['Score'] = df['Score'].round(2)
# Reorder columns to show Score first
df = df[['Score', 'team_name', 'model_name', 'WER', 'CER', 'BLEU']]
# Sort by Score (descending - highest is best)
df = df.sort_values(by='Score', ascending=False).reset_index(drop=True)
# Add rank column
df.insert(0, 'Rank', range(1, len(df) + 1))
return df
except Exception as e:
return f"An error occurred while loading the results: {e}"
with gr.Blocks() as demo:
# Welcome message
gr.Markdown("""
👋 Welcome to the VLM OCR Competition!
This competition aims to improve **open-source Arabic OCR models**.
It's part of the NAMAA Community mission to strengthen the Arabic presence in the ML space.
This competition is designed to **push the boundaries** of OCR performance on diverse Arabic documents.
""")
with gr.Tabs():
with gr.Tab("📜 Rules"):
# Text instructions
gr.Markdown("""
QARI OCR Competition Rules
Welcome to the QARI OCR Competition organized by the NAMAA Community and sponsored by KANDCA!
The competition runs from September 15 to October 15.
Join the Discord server for support and discussion.
Full rules and submission portal: Hugging Face Space.
📜 Submission Rules
- Each team can submit one model evaluation per week.
- Provide with your submission:
- Team name (must stay consistent across submissions)
- Model name & Hugging Face Model ID
- A valid Hugging Face token with access
- The inference code and any dependency installation instructions
- The OCR output must be a single-page structured HTML using the following tags:
- <header>, <footer>, <main>, <section id="1">, <section id="2">
- <p>, <h1>-<h5>, <b>, <i>, <u>
- <img>, <table>, <hr>, <ul>, <ol>
- Submitting only unstructured output will result in a 5-point deduction from your final score.
- The submitted code is the responsibility of the submitting team.
- Ideally, provide a working Google Colab link with all details and dependencies.
📆 Evaluation Schedule
- Submissions received by Sunday at midnight will be evaluated on Monday.
- The leaderboard will be updated by Wednesday or Thursday of the same week.
""")
with gr.Tab("🎁 Prizes"):
gr.Markdown("""
Prize Distribution ((bank transfer or API credits))
- 1st Place: 🥇 250 USD
- 2nd Place: 🥈 125 USD
- 3rd Place: 🥉 75 USD
- 4th Place: 🎖️ 50 USD
- 5th Place: 🎖️ 25 USD
""")
with gr.Tab("📊 Evaluation"):
gr.Markdown("""
Evaluation Details
- The evaluation dataset will remain private and is not shared with participants.
- It will include:
- Historical documents
- Scanned pages
- Different layouts
- Handwritten pages
- Models will be evaluated on accuracy metrics such as:
- Word Error Rate (WER)
- Character Error Rate (CER)
- BLEU score
- Evaluation schedule:
- Submissions received by Sunday at midnight will be evaluated on Monday.
- The leaderboard will be updated by Wednesday or Thursday of the same week.
""")
with gr.Tab("🚀 Submit & Leaderboard"):
gr.Markdown("Submit Your Model
")
with gr.Row():
team = gr.Textbox(label="Team Name", placeholder="Enter your team name")
email = gr.Textbox(label="Email", placeholder="Enter your email")
model = gr.Textbox(label="Model Name", placeholder="Enter your model name")
hf_model = gr.Textbox(label="Hugging Face Model ID", placeholder="Enter your HF Model ID")
hf_token = gr.Textbox(label="Hugging Face Access Token", type="password", placeholder="Enter your HF token")
code = gr.Textbox(label="Code (instructions to run your model) or colab link", lines=6, placeholder="Paste your run code here...")
submit_btn = gr.Button("Submit")
status = gr.Textbox(label="Status")
submit_btn.click(fn=validate_fields,
inputs=[team, email, model, hf_model, hf_token, code],
outputs=status)
gr.Markdown("Leaderboard Results
")
results = gr.Dataframe(headers=["model_name", "WER", "CER", "BLEU", "team_name"])
demo.load(fn=show_results, outputs=results)
demo.launch()