Spaces:
Runtime error
Runtime error
File size: 8,459 Bytes
6cfd8cb 06bca27 8dac542 fc57e2d 8dac542 fc57e2d 6cfd8cb fc57e2d a7ac470 6cfd8cb 6f2f386 a3b1e6b d17c334 12880bb c5eb6d6 12880bb c5eb6d6 12880bb e1094c7 6cfd8cb 023a239 fc57e2d 5c54a4c fc57e2d 5c54a4c 50ea9c6 5c54a4c 50ea9c6 5c54a4c 50ea9c6 fc57e2d 09d6c19 023a239 09d6c19 023a239 fc57e2d 023a239 8574af6 023a239 8574af6 023a239 fc57e2d 023a239 fc57e2d 023a239 8574af6 fc57e2d 023a239 fc57e2d 6cfd8cb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 |
import gradio as gr
import os
import json
from huggingface_hub import upload_file
import pandas as pd
from datasets import load_dataset
HF_TOKEN = os.getenv("HF_TOKEN")
SUBMISSIONS_REPO = "NAMAA-Space/ocr-competition-submissions"
RESULTS_REPO = "NAMAA-Space/ocr-competition-results"
def validate_fields(team_name, email, model_name, hf_model_id, hf_token, code):
if not team_name or not email or not model_name or not hf_model_id or not hf_token or not code:
return "All fields are required. Please fill in all fields."
return submit(team_name, email, model_name, hf_model_id, hf_token, code)
def submit(team_name, email, model_name, hf_model_id, hf_token, code):
# entry = {
# "team_name": team_name,
# "email": email,
# "model_name": model_name,
# "hf_model_id": hf_model_id,
# "hf_token": hf_token,
# "code": code
# }
# filename = f"{team_name}_{model_name}_{pd.Timestamp.now().strftime('%Y%m%d%H%M%S')}.json"
# filename = filename.replace("/", "-")
# with open(filename, "w") as f:
# json.dump(entry, f)
# upload_file(path_or_fileobj=filename,
# path_in_repo=filename,
# repo_id=SUBMISSIONS_REPO,
# repo_type="dataset",
# token=HF_TOKEN)
return "Submission is closed"
def show_results():
try:
ds = load_dataset(RESULTS_REPO, split="train")
df = ds.to_pandas()[["team_name", "model_name", "WER", "CER", "BLEU"]]
# Calculate composite score
# Formula: (100 - WER) Γ 0.35 + (100 - CER) Γ 0.35 + BLEU Γ 0.30
df['Score'] = (100 - df['WER']) * 0.35 + (100 - df['CER']) * 0.35 + df['BLEU'] * 0.30
# Round score to 2 decimal places
df['Score'] = df['Score'].round(2)
# Reorder columns to show Score first
df = df[['Score', 'team_name', 'model_name', 'WER', 'CER', 'BLEU']]
# Sort by Score (descending - highest is best)
df = df.sort_values(by='Score', ascending=False).reset_index(drop=True)
# Add rank column
df.insert(0, 'Rank', range(1, len(df) + 1))
return df
except Exception as e:
return f"An error occurred while loading the results: {e}"
with gr.Blocks() as demo:
# Welcome message
gr.Markdown("""
<h2 style="font-size:28px;">π Welcome to the VLM OCR Competition!</h2>
<p style="font-size:18px;">
This competition aims to improve **open-source Arabic OCR models**.
It's part of the NAMAA Community mission to strengthen the Arabic presence in the ML space.
This competition is designed to **push the boundaries** of OCR performance on diverse Arabic documents.
</p>
""")
with gr.Tabs():
with gr.Tab("π Rules"):
# Text instructions
gr.Markdown("""
<h3 style="font-size:22px;">QARI OCR Competition Rules</h3>
<p style="font-size:18px;">
Welcome to the <b>QARI OCR Competition</b> organized by the <b>NAMAA Community</b> and sponsored by <b>KANDCA</b>!
The competition runs from <b>September 15 to October 15</b>.
Join the <a href="https://discord.gg/GDTpeHZt" target="_blank">Discord server</a> for support and discussion.
Full rules and submission portal: <a href="https://huggingface.co/spaces/NAMAA-Space/QARI-Competition" target="_blank">Hugging Face Space</a>.
</p>
<h4 style="font-size:20px;">π Submission Rules</h4>
<ul style="font-size:18px;">
<li>Each team can submit <b>one model evaluation per week</b>.</li>
<li>Provide with your submission:
<ol>
<li>Team name (must stay consistent across submissions)</li>
<li>Model name & Hugging Face Model ID</li>
<li>A valid Hugging Face token with access</li>
<li><b>The inference code</b> and any <b>dependency installation instructions</b></li>
<li><b>The OCR output must be a single-page structured HTML</b> using the following tags:
<ul>
<li><header>, <footer>, <main>, <section id="1">, <section id="2"></li>
<li><p>, <h1>-<h5>, <b>, <i>, <u></li>
<li><img>, <table>, <hr>, <ul>, <ol></li>
</ul>
</li>
<li><b>Submitting only unstructured output will result in a 5-point deduction</b> from your final score.</li>
</ol>
</li>
<li><b>The submitted code is the responsibility of the submitting team.</b></li>
<li>Ideally, provide a working <b>Google Colab link</b> with all details and dependencies.</li>
</ul>
<h4 style="font-size:20px;">π Evaluation Schedule</h4>
<ul style="font-size:18px;">
<li>Submissions received by <strong>Sunday at midnight</strong> will be evaluated on <strong>Monday</strong>.</li>
<li>The leaderboard will be updated by <strong>Wednesday or Thursday</strong> of the same week.</li>
</ul>
""")
with gr.Tab("π Prizes"):
gr.Markdown("""
<h3 style="font-size:22px;">Prize Distribution ((bank transfer or API credits))</h3>
<ul style="font-size:18px;">
<li>1st Place: π₯ 250 USD</li>
<li>2nd Place: π₯ 125 USD</li>
<li>3rd Place: π₯ 75 USD</li>
<li>4th Place: ποΈ 50 USD</li>
<li>5th Place: ποΈ 25 USD</li>
</ul>
""")
with gr.Tab("π Evaluation"):
gr.Markdown("""
<h3 style="font-size:22px;">Evaluation Details</h3>
<ul style="font-size:18px;">
<li>The evaluation dataset will remain <b>private</b> and is not shared with participants.</li>
<li>It will include:
<ul>
<li>Historical documents</li>
<li>Scanned pages</li>
<li>Different layouts</li>
<li>Handwritten pages</li>
</ul>
</li>
<li>Models will be evaluated on <b>accuracy metrics</b> such as:
<ul>
<li>Word Error Rate (WER)</li>
<li>Character Error Rate (CER)</li>
<li>BLEU score</li>
</ul>
</li>
<li><b>Evaluation schedule:</b>
<ul>
<li>Submissions received by <b>Sunday at midnight</b> will be evaluated on <b>Monday</b>.</li>
<li>The leaderboard will be updated by <b>Wednesday or Thursday</b> of the same week.</li>
</ul>
</li>
</ul>
""")
with gr.Tab("π Submit & Leaderboard"):
gr.Markdown("<h3 style='font-size:22px;'>Submit Your Model</h3>")
with gr.Row():
team = gr.Textbox(label="Team Name", placeholder="Enter your team name")
email = gr.Textbox(label="Email", placeholder="Enter your email")
model = gr.Textbox(label="Model Name", placeholder="Enter your model name")
hf_model = gr.Textbox(label="Hugging Face Model ID", placeholder="Enter your HF Model ID")
hf_token = gr.Textbox(label="Hugging Face Access Token", type="password", placeholder="Enter your HF token")
code = gr.Textbox(label="Code (instructions to run your model) or colab link", lines=6, placeholder="Paste your run code here...")
submit_btn = gr.Button("Submit")
status = gr.Textbox(label="Status")
submit_btn.click(fn=validate_fields,
inputs=[team, email, model, hf_model, hf_token, code],
outputs=status)
gr.Markdown("<h3 style='font-size:22px;'>Leaderboard Results</h3>")
results = gr.Dataframe(headers=["model_name", "WER", "CER", "BLEU", "team_name"])
demo.load(fn=show_results, outputs=results)
demo.launch()
|