File size: 8,459 Bytes
6cfd8cb
 
 
 
 
 
 
 
06bca27
 
8dac542
fc57e2d
 
8dac542
fc57e2d
6cfd8cb
fc57e2d
a7ac470
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6cfd8cb
 
6f2f386
a3b1e6b
d17c334
12880bb
c5eb6d6
 
 
12880bb
c5eb6d6
 
 
 
 
 
 
 
 
 
 
 
 
12880bb
e1094c7
 
6cfd8cb
 
023a239
 
 
 
 
 
 
 
 
fc57e2d
 
 
5c54a4c
fc57e2d
5c54a4c
 
 
 
 
 
 
50ea9c6
5c54a4c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50ea9c6
5c54a4c
 
 
 
 
 
50ea9c6
fc57e2d
 
 
09d6c19
023a239
09d6c19
023a239
 
 
 
 
fc57e2d
 
 
 
023a239
 
8574af6
023a239
 
 
 
 
 
 
 
8574af6
 
 
 
 
 
 
 
 
 
 
 
 
023a239
fc57e2d
 
 
023a239
fc57e2d
023a239
 
 
 
 
8574af6
fc57e2d
 
 
 
 
 
 
 
023a239
fc57e2d
 
 
6cfd8cb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
import gradio as gr
import os
import json
from huggingface_hub import upload_file
import pandas as pd
from datasets import load_dataset

HF_TOKEN = os.getenv("HF_TOKEN")
SUBMISSIONS_REPO = "NAMAA-Space/ocr-competition-submissions"
RESULTS_REPO = "NAMAA-Space/ocr-competition-results"

def validate_fields(team_name, email, model_name, hf_model_id, hf_token, code):
    if not team_name or not email or not model_name or not hf_model_id or not hf_token or not code:
        return "All fields are required. Please fill in all fields."
    return submit(team_name, email, model_name, hf_model_id, hf_token, code)

def submit(team_name, email, model_name, hf_model_id, hf_token, code):
    # entry = {
    #     "team_name": team_name,
    #     "email": email,
    #     "model_name": model_name,
    #     "hf_model_id": hf_model_id,
    #     "hf_token": hf_token,
    #     "code": code
    # }
    # filename = f"{team_name}_{model_name}_{pd.Timestamp.now().strftime('%Y%m%d%H%M%S')}.json"
    # filename = filename.replace("/", "-")
    # with open(filename, "w") as f:
    #     json.dump(entry, f)
    # upload_file(path_or_fileobj=filename,
    #             path_in_repo=filename,
    #             repo_id=SUBMISSIONS_REPO,
    #             repo_type="dataset",
    #             token=HF_TOKEN)
    return "Submission is closed"

def show_results():
    try:
        ds = load_dataset(RESULTS_REPO, split="train")
        df = ds.to_pandas()[["team_name", "model_name", "WER", "CER", "BLEU"]]
            
        # Calculate composite score
        # Formula: (100 - WER) Γ— 0.35 + (100 - CER) Γ— 0.35 + BLEU Γ— 0.30
        df['Score'] = (100 - df['WER']) * 0.35 + (100 - df['CER']) * 0.35 + df['BLEU'] * 0.30
            
        # Round score to 2 decimal places
        df['Score'] = df['Score'].round(2)
        
        # Reorder columns to show Score first
        df = df[['Score', 'team_name', 'model_name', 'WER', 'CER', 'BLEU']]
        
        # Sort by Score (descending - highest is best)
        df = df.sort_values(by='Score', ascending=False).reset_index(drop=True)
        
        # Add rank column
        df.insert(0, 'Rank', range(1, len(df) + 1))
        
        return df
        
    except Exception as e:
        return f"An error occurred while loading the results: {e}"

with gr.Blocks() as demo:
    # Welcome message
    gr.Markdown("""
    <h2 style="font-size:28px;">πŸ‘‹ Welcome to the VLM OCR Competition!</h2>
    <p style="font-size:18px;">
    This competition aims to improve **open-source Arabic OCR models**.  
    It's part of the NAMAA Community mission to strengthen the Arabic presence in the ML space.  
    This competition is designed to **push the boundaries** of OCR performance on diverse Arabic documents.
    </p>
    """)

    with gr.Tabs():
        with gr.Tab("πŸ“œ Rules"):
            # Text instructions
            gr.Markdown("""
        <h3 style="font-size:22px;">QARI OCR Competition Rules</h3>
        <p style="font-size:18px;">
        Welcome to the <b>QARI OCR Competition</b> organized by the <b>NAMAA Community</b> and sponsored by <b>KANDCA</b>!  
        The competition runs from <b>September 15 to October 15</b>.  
        Join the <a href="https://discord.gg/GDTpeHZt" target="_blank">Discord server</a> for support and discussion.  
        Full rules and submission portal: <a href="https://huggingface.co/spaces/NAMAA-Space/QARI-Competition" target="_blank">Hugging Face Space</a>.
        </p>
        
        <h4 style="font-size:20px;">πŸ“œ Submission Rules</h4>
        <ul style="font-size:18px;">
            <li>Each team can submit <b>one model evaluation per week</b>.</li>
            <li>Provide with your submission:
                <ol>
                    <li>Team name (must stay consistent across submissions)</li>
                    <li>Model name & Hugging Face Model ID</li>
                    <li>A valid Hugging Face token with access</li>
                    <li><b>The inference code</b> and any <b>dependency installation instructions</b></li>
                    <li><b>The OCR output must be a single-page structured HTML</b> using the following tags:
                        <ul>
                            <li>&lt;header&gt;, &lt;footer&gt;, &lt;main&gt;, &lt;section id="1"&gt;, &lt;section id="2"&gt;</li>
                            <li>&lt;p&gt;, &lt;h1&gt;-&lt;h5&gt;, &lt;b&gt;, &lt;i&gt;, &lt;u&gt;</li>
                            <li>&lt;img&gt;, &lt;table&gt;, &lt;hr&gt;, &lt;ul&gt;, &lt;ol&gt;</li>
                        </ul>
                    </li>
                    <li><b>Submitting only unstructured output will result in a 5-point deduction</b> from your final score.</li>
                </ol>
            </li>
            <li><b>The submitted code is the responsibility of the submitting team.</b></li>
            <li>Ideally, provide a working <b>Google Colab link</b> with all details and dependencies.</li>
        </ul>
        
        <h4 style="font-size:20px;">πŸ“† Evaluation Schedule</h4>
        <ul style="font-size:18px;">
            <li>Submissions received by <strong>Sunday at midnight</strong> will be evaluated on <strong>Monday</strong>.</li>
            <li>The leaderboard will be updated by <strong>Wednesday or Thursday</strong> of the same week.</li>
        </ul>
        """)
        

        with gr.Tab("🎁 Prizes"):
            gr.Markdown("""
            <h3 style="font-size:22px;">Prize Distribution ((bank transfer or API credits))</h3>
            <ul style="font-size:18px;">
                <li>1st Place: πŸ₯‡ 250 USD</li>
                <li>2nd Place: πŸ₯ˆ 125 USD</li>
                <li>3rd Place: πŸ₯‰ 75 USD</li>
                <li>4th Place: πŸŽ–οΈ 50 USD</li>
                <li>5th Place: πŸŽ–οΈ 25 USD</li>
            </ul>
            """)

        with gr.Tab("πŸ“Š Evaluation"):
            gr.Markdown("""
            <h3 style="font-size:22px;">Evaluation Details</h3>
            <ul style="font-size:18px;">
                <li>The evaluation dataset will remain <b>private</b> and is not shared with participants.</li>
                <li>It will include:
                    <ul>
                        <li>Historical documents</li>
                        <li>Scanned pages</li>
                        <li>Different layouts</li>
                        <li>Handwritten pages</li>
                    </ul>
                </li>
                <li>Models will be evaluated on <b>accuracy metrics</b> such as:
                    <ul>
                        <li>Word Error Rate (WER)</li>
                        <li>Character Error Rate (CER)</li>
                        <li>BLEU score</li>
                    </ul>
                </li>
                <li><b>Evaluation schedule:</b>
                    <ul>
                        <li>Submissions received by <b>Sunday at midnight</b> will be evaluated on <b>Monday</b>.</li>
                        <li>The leaderboard will be updated by <b>Wednesday or Thursday</b> of the same week.</li>
                    </ul>
                </li>
            </ul>
            """)

        with gr.Tab("πŸš€ Submit & Leaderboard"):
            gr.Markdown("<h3 style='font-size:22px;'>Submit Your Model</h3>")
            with gr.Row():
                team = gr.Textbox(label="Team Name", placeholder="Enter your team name")
                email = gr.Textbox(label="Email", placeholder="Enter your email")
            model = gr.Textbox(label="Model Name", placeholder="Enter your model name")
            hf_model = gr.Textbox(label="Hugging Face Model ID", placeholder="Enter your HF Model ID")
            hf_token = gr.Textbox(label="Hugging Face Access Token", type="password", placeholder="Enter your HF token")
            code = gr.Textbox(label="Code (instructions to run your model) or colab link", lines=6, placeholder="Paste your run code here...")

            submit_btn = gr.Button("Submit")
            status = gr.Textbox(label="Status")

            submit_btn.click(fn=validate_fields,
                             inputs=[team, email, model, hf_model, hf_token, code],
                             outputs=status)

            gr.Markdown("<h3 style='font-size:22px;'>Leaderboard Results</h3>")
            results = gr.Dataframe(headers=["model_name", "WER", "CER", "BLEU", "team_name"])
            demo.load(fn=show_results, outputs=results)

    demo.launch()