import gradio as gr import os import json from huggingface_hub import upload_file import pandas as pd from datasets import load_dataset HF_TOKEN = os.getenv("HF_TOKEN") SUBMISSIONS_REPO = "NAMAA-Space/ocr-competition-submissions" RESULTS_REPO = "NAMAA-Space/ocr-competition-results" def validate_fields(team_name, email, model_name, hf_model_id, hf_token, code): if not team_name or not email or not model_name or not hf_model_id or not hf_token or not code: return "All fields are required. Please fill in all fields." return submit(team_name, email, model_name, hf_model_id, hf_token, code) def submit(team_name, email, model_name, hf_model_id, hf_token, code): # entry = { # "team_name": team_name, # "email": email, # "model_name": model_name, # "hf_model_id": hf_model_id, # "hf_token": hf_token, # "code": code # } # filename = f"{team_name}_{model_name}_{pd.Timestamp.now().strftime('%Y%m%d%H%M%S')}.json" # filename = filename.replace("/", "-") # with open(filename, "w") as f: # json.dump(entry, f) # upload_file(path_or_fileobj=filename, # path_in_repo=filename, # repo_id=SUBMISSIONS_REPO, # repo_type="dataset", # token=HF_TOKEN) return "Submission is closed" def show_results(): try: ds = load_dataset(RESULTS_REPO, split="train") df = ds.to_pandas()[["team_name", "model_name", "WER", "CER", "BLEU"]] # Calculate composite score # Formula: (100 - WER) × 0.35 + (100 - CER) × 0.35 + BLEU × 0.30 df['Score'] = (100 - df['WER']) * 0.35 + (100 - df['CER']) * 0.35 + df['BLEU'] * 0.30 # Round score to 2 decimal places df['Score'] = df['Score'].round(2) # Reorder columns to show Score first df = df[['Score', 'team_name', 'model_name', 'WER', 'CER', 'BLEU']] # Sort by Score (descending - highest is best) df = df.sort_values(by='Score', ascending=False).reset_index(drop=True) # Add rank column df.insert(0, 'Rank', range(1, len(df) + 1)) return df except Exception as e: return f"An error occurred while loading the results: {e}" with gr.Blocks() as demo: # Welcome message gr.Markdown("""

👋 Welcome to the VLM OCR Competition!

This competition aims to improve **open-source Arabic OCR models**. It's part of the NAMAA Community mission to strengthen the Arabic presence in the ML space. This competition is designed to **push the boundaries** of OCR performance on diverse Arabic documents.

""") with gr.Tabs(): with gr.Tab("📜 Rules"): # Text instructions gr.Markdown("""

QARI OCR Competition Rules

Welcome to the QARI OCR Competition organized by the NAMAA Community and sponsored by KANDCA! The competition runs from September 15 to October 15. Join the Discord server for support and discussion. Full rules and submission portal: Hugging Face Space.

📜 Submission Rules

📆 Evaluation Schedule

""") with gr.Tab("🎁 Prizes"): gr.Markdown("""

Prize Distribution ((bank transfer or API credits))

""") with gr.Tab("📊 Evaluation"): gr.Markdown("""

Evaluation Details

""") with gr.Tab("🚀 Submit & Leaderboard"): gr.Markdown("

Submit Your Model

") with gr.Row(): team = gr.Textbox(label="Team Name", placeholder="Enter your team name") email = gr.Textbox(label="Email", placeholder="Enter your email") model = gr.Textbox(label="Model Name", placeholder="Enter your model name") hf_model = gr.Textbox(label="Hugging Face Model ID", placeholder="Enter your HF Model ID") hf_token = gr.Textbox(label="Hugging Face Access Token", type="password", placeholder="Enter your HF token") code = gr.Textbox(label="Code (instructions to run your model) or colab link", lines=6, placeholder="Paste your run code here...") submit_btn = gr.Button("Submit") status = gr.Textbox(label="Status") submit_btn.click(fn=validate_fields, inputs=[team, email, model, hf_model, hf_token, code], outputs=status) gr.Markdown("

Leaderboard Results

") results = gr.Dataframe(headers=["model_name", "WER", "CER", "BLEU", "team_name"]) demo.load(fn=show_results, outputs=results) demo.launch()