Spaces:
Running
Running
| import gradio as gr | |
| import os | |
| import json | |
| from huggingface_hub import upload_file | |
| import pandas as pd | |
| from datasets import load_dataset | |
| HF_TOKEN = os.getenv("HF_TOKEN") | |
| SUBMISSIONS_REPO = "NAMAA-Space/ocr-competition-submissions" | |
| RESULTS_REPO = "NAMAA-Space/ocr-competition-results" | |
| def validate_fields(team_name, email, model_name, hf_model_id, hf_token, code): | |
| if not team_name or not email or not model_name or not hf_model_id or not hf_token or not code: | |
| return "All fields are required. Please fill in all fields." | |
| return submit(team_name, email, model_name, hf_model_id, hf_token, code) | |
| def submit(team_name, email, model_name, hf_model_id, hf_token, code): | |
| # entry = { | |
| # "team_name": team_name, | |
| # "email": email, | |
| # "model_name": model_name, | |
| # "hf_model_id": hf_model_id, | |
| # "hf_token": hf_token, | |
| # "code": code | |
| # } | |
| # filename = f"{team_name}_{model_name}_{pd.Timestamp.now().strftime('%Y%m%d%H%M%S')}.json" | |
| # filename = filename.replace("/", "-") | |
| # with open(filename, "w") as f: | |
| # json.dump(entry, f) | |
| # upload_file(path_or_fileobj=filename, | |
| # path_in_repo=filename, | |
| # repo_id=SUBMISSIONS_REPO, | |
| # repo_type="dataset", | |
| # token=HF_TOKEN) | |
| return "Submission is closed" | |
| def show_results(): | |
| try: | |
| ds = load_dataset(RESULTS_REPO, split="train") | |
| df = ds.to_pandas()[["team_name", "model_name", "WER", "CER", "BLEU"]] | |
| # Calculate composite score | |
| # Formula: (100 - WER) Γ 0.35 + (100 - CER) Γ 0.35 + BLEU Γ 0.30 | |
| df['Score'] = (100 - df['WER']) * 0.35 + (100 - df['CER']) * 0.35 + df['BLEU'] * 0.30 | |
| # Round score to 2 decimal places | |
| df['Score'] = df['Score'].round(2) | |
| # Reorder columns to show Score first | |
| df = df[['Score', 'team_name', 'model_name', 'WER', 'CER', 'BLEU']] | |
| # Sort by Score (descending - highest is best) | |
| df = df.sort_values(by='Score', ascending=False).reset_index(drop=True) | |
| # Add rank column | |
| df.insert(0, 'Rank', range(1, len(df) + 1)) | |
| return df | |
| except Exception as e: | |
| return f"An error occurred while loading the results: {e}" | |
| with gr.Blocks() as demo: | |
| # Welcome message | |
| gr.Markdown(""" | |
| <h2 style="font-size:28px;">π Welcome to the VLM OCR Competition!</h2> | |
| <p style="font-size:18px;"> | |
| This competition aims to improve **open-source Arabic OCR models**. | |
| It's part of the NAMAA Community mission to strengthen the Arabic presence in the ML space. | |
| This competition is designed to **push the boundaries** of OCR performance on diverse Arabic documents. | |
| </p> | |
| """) | |
| with gr.Tabs(): | |
| with gr.Tab("π Rules"): | |
| # Text instructions | |
| gr.Markdown(""" | |
| <h3 style="font-size:22px;">QARI OCR Competition Rules</h3> | |
| <p style="font-size:18px;"> | |
| Welcome to the <b>QARI OCR Competition</b> organized by the <b>NAMAA Community</b> and sponsored by <b>KANDCA</b>! | |
| The competition runs from <b>September 15 to October 15</b>. | |
| Join the <a href="https://discord.gg/GDTpeHZt" target="_blank">Discord server</a> for support and discussion. | |
| Full rules and submission portal: <a href="https://huggingface.co/spaces/NAMAA-Space/QARI-Competition" target="_blank">Hugging Face Space</a>. | |
| </p> | |
| <h4 style="font-size:20px;">π Submission Rules</h4> | |
| <ul style="font-size:18px;"> | |
| <li>Each team can submit <b>one model evaluation per week</b>.</li> | |
| <li>Provide with your submission: | |
| <ol> | |
| <li>Team name (must stay consistent across submissions)</li> | |
| <li>Model name & Hugging Face Model ID</li> | |
| <li>A valid Hugging Face token with access</li> | |
| <li><b>The inference code</b> and any <b>dependency installation instructions</b></li> | |
| <li><b>The OCR output must be a single-page structured HTML</b> using the following tags: | |
| <ul> | |
| <li><header>, <footer>, <main>, <section id="1">, <section id="2"></li> | |
| <li><p>, <h1>-<h5>, <b>, <i>, <u></li> | |
| <li><img>, <table>, <hr>, <ul>, <ol></li> | |
| </ul> | |
| </li> | |
| <li><b>Submitting only unstructured output will result in a 5-point deduction</b> from your final score.</li> | |
| </ol> | |
| </li> | |
| <li><b>The submitted code is the responsibility of the submitting team.</b></li> | |
| <li>Ideally, provide a working <b>Google Colab link</b> with all details and dependencies.</li> | |
| </ul> | |
| <h4 style="font-size:20px;">π Evaluation Schedule</h4> | |
| <ul style="font-size:18px;"> | |
| <li>Submissions received by <strong>Sunday at midnight</strong> will be evaluated on <strong>Monday</strong>.</li> | |
| <li>The leaderboard will be updated by <strong>Wednesday or Thursday</strong> of the same week.</li> | |
| </ul> | |
| """) | |
| with gr.Tab("π Prizes"): | |
| gr.Markdown(""" | |
| <h3 style="font-size:22px;">Prize Distribution ((bank transfer or API credits))</h3> | |
| <ul style="font-size:18px;"> | |
| <li>1st Place: π₯ 250 USD</li> | |
| <li>2nd Place: π₯ 125 USD</li> | |
| <li>3rd Place: π₯ 75 USD</li> | |
| <li>4th Place: ποΈ 50 USD</li> | |
| <li>5th Place: ποΈ 25 USD</li> | |
| </ul> | |
| """) | |
| with gr.Tab("π Evaluation"): | |
| gr.Markdown(""" | |
| <h3 style="font-size:22px;">Evaluation Details</h3> | |
| <ul style="font-size:18px;"> | |
| <li>The evaluation dataset will remain <b>private</b> and is not shared with participants.</li> | |
| <li>It will include: | |
| <ul> | |
| <li>Historical documents</li> | |
| <li>Scanned pages</li> | |
| <li>Different layouts</li> | |
| <li>Handwritten pages</li> | |
| </ul> | |
| </li> | |
| <li>Models will be evaluated on <b>accuracy metrics</b> such as: | |
| <ul> | |
| <li>Word Error Rate (WER)</li> | |
| <li>Character Error Rate (CER)</li> | |
| <li>BLEU score</li> | |
| </ul> | |
| </li> | |
| <li><b>Evaluation schedule:</b> | |
| <ul> | |
| <li>Submissions received by <b>Sunday at midnight</b> will be evaluated on <b>Monday</b>.</li> | |
| <li>The leaderboard will be updated by <b>Wednesday or Thursday</b> of the same week.</li> | |
| </ul> | |
| </li> | |
| </ul> | |
| """) | |
| with gr.Tab("π Submit & Leaderboard"): | |
| gr.Markdown("<h3 style='font-size:22px;'>Submit Your Model</h3>") | |
| with gr.Row(): | |
| team = gr.Textbox(label="Team Name", placeholder="Enter your team name") | |
| email = gr.Textbox(label="Email", placeholder="Enter your email") | |
| model = gr.Textbox(label="Model Name", placeholder="Enter your model name") | |
| hf_model = gr.Textbox(label="Hugging Face Model ID", placeholder="Enter your HF Model ID") | |
| hf_token = gr.Textbox(label="Hugging Face Access Token", type="password", placeholder="Enter your HF token") | |
| code = gr.Textbox(label="Code (instructions to run your model) or colab link", lines=6, placeholder="Paste your run code here...") | |
| submit_btn = gr.Button("Submit") | |
| status = gr.Textbox(label="Status") | |
| submit_btn.click(fn=validate_fields, | |
| inputs=[team, email, model, hf_model, hf_token, code], | |
| outputs=status) | |
| gr.Markdown("<h3 style='font-size:22px;'>Leaderboard Results</h3>") | |
| results = gr.Dataframe(headers=["model_name", "WER", "CER", "BLEU", "team_name"]) | |
| demo.load(fn=show_results, outputs=results) | |
| demo.launch() | |