import gradio as gr import os import json from huggingface_hub import upload_file import pandas as pd from datasets import load_dataset HF_TOKEN = os.getenv("HF_TOKEN") SUBMISSIONS_REPO = "NAMAA-Space/ocr-competition-submissions" RESULTS_REPO = "NAMAA-Space/ocr-competition-results" def validate_fields(team_name, email, model_name, hf_model_id, hf_token, code): if not team_name or not email or not model_name or not hf_model_id or not hf_token or not code: return "All fields are required. Please fill in all fields." return submit(team_name, email, model_name, hf_model_id, hf_token, code) def submit(team_name, email, model_name, hf_model_id, hf_token, code): # entry = { # "team_name": team_name, # "email": email, # "model_name": model_name, # "hf_model_id": hf_model_id, # "hf_token": hf_token, # "code": code # } # filename = f"{team_name}_{model_name}_{pd.Timestamp.now().strftime('%Y%m%d%H%M%S')}.json" # filename = filename.replace("/", "-") # with open(filename, "w") as f: # json.dump(entry, f) # upload_file(path_or_fileobj=filename, # path_in_repo=filename, # repo_id=SUBMISSIONS_REPO, # repo_type="dataset", # token=HF_TOKEN) return "Submission is closed" def show_results(): try: ds = load_dataset(RESULTS_REPO, split="train") df = ds.to_pandas()[["team_name", "model_name", "WER", "CER", "BLEU"]] # Calculate composite score # Formula: (100 - WER) × 0.35 + (100 - CER) × 0.35 + BLEU × 0.30 df['Score'] = (100 - df['WER']) * 0.35 + (100 - df['CER']) * 0.35 + df['BLEU'] * 0.30 # Round score to 2 decimal places df['Score'] = df['Score'].round(2) # Reorder columns to show Score first df = df[['Score', 'team_name', 'model_name', 'WER', 'CER', 'BLEU']] # Sort by Score (descending - highest is best) df = df.sort_values(by='Score', ascending=False).reset_index(drop=True) # Add rank column df.insert(0, 'Rank', range(1, len(df) + 1)) return df except Exception as e: return f"An error occurred while loading the results: {e}" with gr.Blocks() as demo: # Welcome message gr.Markdown("""

👋 Welcome to the VLM OCR Competition!

This competition aims to improve **open-source Arabic OCR models**. It's part of the NAMAA Community mission to strengthen the Arabic presence in the ML space. This competition is designed to **push the boundaries** of OCR performance on diverse Arabic documents.

""") with gr.Tabs(): with gr.Tab("📜 Rules"): # Text instructions gr.Markdown("""

QARI OCR Competition Rules

Welcome to the QARI OCR Competition organized by the NAMAA Community and sponsored by KANDCA! The competition runs from September 15 to October 15. Join the Discord server for support and discussion. Full rules and submission portal: Hugging Face Space.

📜 Submission Rules

Each team can submit one model evaluation per week.
Provide with your submission:
1. Team name (must stay consistent across submissions)
2. Model name & Hugging Face Model ID
3. A valid Hugging Face token with access
4. The inference code and any dependency installation instructions
5. The OCR output must be a single-page structured HTML using the following tags:
  - <header>, <footer>, <main>, <section id="1">, <section id="2">
  - <p>, <h1>-<h5>, <b>, <i>, <u>
  - <img>, <table>, <hr>, <ul>, <ol>
6. Submitting only unstructured output will result in a 5-point deduction from your final score.
The submitted code is the responsibility of the submitting team.
Ideally, provide a working Google Colab link with all details and dependencies.

📆 Evaluation Schedule

Submissions received by Sunday at midnight will be evaluated on Monday.
The leaderboard will be updated by Wednesday or Thursday of the same week.

""") with gr.Tab("🎁 Prizes"): gr.Markdown("""

Prize Distribution ((bank transfer or API credits))

1st Place: 🥇 250 USD
2nd Place: 🥈 125 USD
3rd Place: 🥉 75 USD
4th Place: 🎖️ 50 USD
5th Place: 🎖️ 25 USD

""") with gr.Tab("📊 Evaluation"): gr.Markdown("""

Evaluation Details

The evaluation dataset will remain private and is not shared with participants.
It will include:
- Historical documents
- Scanned pages
- Different layouts
- Handwritten pages
Models will be evaluated on accuracy metrics such as:
- Word Error Rate (WER)
- Character Error Rate (CER)
- BLEU score
Evaluation schedule:
- Submissions received by Sunday at midnight will be evaluated on Monday.
- The leaderboard will be updated by Wednesday or Thursday of the same week.

""") with gr.Tab("🚀 Submit & Leaderboard"): gr.Markdown("

Submit Your Model

") with gr.Row(): team = gr.Textbox(label="Team Name", placeholder="Enter your team name") email = gr.Textbox(label="Email", placeholder="Enter your email") model = gr.Textbox(label="Model Name", placeholder="Enter your model name") hf_model = gr.Textbox(label="Hugging Face Model ID", placeholder="Enter your HF Model ID") hf_token = gr.Textbox(label="Hugging Face Access Token", type="password", placeholder="Enter your HF token") code = gr.Textbox(label="Code (instructions to run your model) or colab link", lines=6, placeholder="Paste your run code here...") submit_btn = gr.Button("Submit") status = gr.Textbox(label="Status") submit_btn.click(fn=validate_fields, inputs=[team, email, model, hf_model, hf_token, code], outputs=status) gr.Markdown("

Leaderboard Results

") results = gr.Dataframe(headers=["model_name", "WER", "CER", "BLEU", "team_name"]) demo.load(fn=show_results, outputs=results) demo.launch()