Spaces:

r-three
/

quick-tokenizer-accuracy

Running

App Files Files Community

Gül Sena Altıntaş commited on Aug 19

Commit

15729bc

1 Parent(s): 279fdab

Now accepts multiline!

Browse files

Files changed (2) hide show

app.py +48 -87
serve_on_killarney.sh +2 -2

app.py CHANGED Viewed

@@ -63,8 +63,18 @@ PREDEFINED_MODELS = [
 model_cache = {}
-def parse_dataset(text):
     """Parse the input dataset text into structured questions"""
     def clean_cell(s: str) -> str:
         return s.strip().replace("\r", "").replace("\n", " ").strip('"').strip()
@@ -75,12 +85,6 @@ def parse_dataset(text):
     # Normalize line endings
     text = text.replace("\r\n", "\n").replace("\r", "\n")
-    # Detect delimiter from first non-empty line
-    for line in text.splitlines():
-        if line.strip():
-            delimiter = "\t" if "\t" in line else ","
-            break
     # Use csv.reader to handle quoted multi-line cells
     reader = csv.reader(io.StringIO(text), delimiter=delimiter, quotechar='"')
@@ -112,67 +116,6 @@ def parse_dataset(text):
     return questions, error_msg
-def parse_datasetold(text):
-    """Parse the input dataset text into structured questions"""
-    if not text.strip():
-        return [], "Please enter your dataset"
-    # Detect delimiter
-    sample_line = text.splitlines()[0]
-    delimiter = "\t" if "\t" in sample_line else ","
-    # Use csv.reader to correctly parse quotes & newlines
-    reader = csv.reader(io.StringIO(text), delimiter=delimiter)
-    questions = []
-    errors = []
-    for i, row in enumerate(reader, 1):
-        parts = [clean_cell(p) for p in row if p.strip()]
-        if len(parts) < 5:
-            errors.append(f"Line {i}: Not enough columns (need 5, got {len(parts)})")
-            continue
-        question = {
-            "question": parts[0],
-            "correct_answer": parts[1],
-            "choices": [parts[2], parts[3], parts[4]],
-        }
-        if question["correct_answer"] not in question["choices"]:
-            question["choices"].append(question["correct_answer"])
-        questions.append(question)
-    error_msg = "\n".join(errors) if errors else ""
-    return questions, error_msg
-    for i, line in enumerate(reader, 1):
-        # for i, line in enumerate(lines[1:], 2):  # Start from line 2 (after header)
-        line = line.strip()
-        if not line:
-            continue
-        parts = [clean_text(part) for part in line.split(delimiter)]
-        if len(parts) < 5:
-            errors.append(f"Line {i}: Not enough columns (need 5, got {len(parts)})")
-            continue
-        question = {
-            "question": parts[0],
-            "correct_answer": parts[1],
-            "choices": [parts[2], parts[3], parts[4]],
-        }
-        # Ensure correct answer is in choices
-        if question["correct_answer"] not in question["choices"]:
-            question["choices"].append(question["correct_answer"])
-        questions.append(question)
-    error_msg = "\n".join(errors) if errors else ""
-    return questions, error_msg
 def setup_tokenizer(model_path):
     tokenizer_name = model_path
     if "supertoken" in model_path:
@@ -403,7 +346,11 @@ def evaluate_model_on_questions(model_path, questions, progress_callback=None):
 def run_evaluation(
-    dataset_text, selected_predefined, custom_models_text="", progress=gr.Progress()
 ):
     """Main evaluation function"""
     if not dataset_text.strip():
@@ -447,7 +394,7 @@ def run_evaluation(
         )
     # Parse dataset
-    questions, parse_error = parse_dataset(dataset_text)
     if parse_error:
         return (
@@ -976,22 +923,18 @@ def generate_csv_summary(questions, results, summary_stats):
 # Sample datasets for quick testing
 SAMPLE_DATASETS = {
     "Custom (enter below)": "",
-    "LP": """Question,Correct Answer,Choice1,Choice2,Choice3
-    In which country is Llanfairpwllgwyngyllgogerychwyrndrobwllllantysiliogogogoch located?	Wales	Germany	France	Scotland
 In which country is Llanfair pwllgwyngyll located?	Wales	Germany	France	Scotland
 In which country is Llanfair PG located?	Wales	Germany	France	Scotland""",
-    "Simple Math": """Question,Correct Answer,Choice1,Choice2,Choice3
-What is 2+2?,4,3,2,5
-What is 5*3?,15,12,16,18
-What is 10-7?,3,7,4,2
-What is 8/2?,4,3,2,5""",
-    "World Capitals": """Question,Correct Answer,Choice1,Choice2,Choice3
-What is the capital of France?,Paris,London,Berlin,Rome
-What is the capital of Japan?,Tokyo,Seoul,Beijing,Bangkok
-What is the capital of Brazil?,Brasília,Rio de Janeiro,São Paulo,Salvador
-What is the capital of Australia?,Canberra,Sydney,Melbourne,Perth""",
-    "Science Quiz": """Question,Correct Answer,Choice1,Choice2,Choice3
-What is the chemical symbol for gold?,Au,Ag,Ca,K
 Which planet is closest to the Sun?,Mercury,Venus,Earth,Mars
 What is the speed of light?,299792458 m/s,300000000 m/s,2992458 m/s,299000000 m/s
 What gas do plants absorb from the atmosphere?,Carbon dioxide,Oxygen,Nitrogen,Hydrogen""",
@@ -1035,11 +978,14 @@ css = """
 # }
 """
 # Create Gradio interface
 with gr.Blocks(
     title="🤖 Model Performance Comparison", theme=gr.themes.Soft(), css=css
 ) as demo:
-    gr.Markdown("""
     # 🤖 Model Performance Comparison Tool
     Compare LLM performance on multiple-choice questions using Hugging Face models.
@@ -1052,7 +998,17 @@ with gr.Blocks(
     - Detailed question-by-question results
     - Performance charts and statistics
     """)
     with gr.Row():
         with gr.Column(scale=2):
             # Sample dataset selector
@@ -1178,7 +1134,12 @@ bigscience/bloom-560m""",
     evaluate_btn.click(
         fn=run_evaluation,
-        inputs=[dataset_input, predefined_selector, custom_models_input],
         outputs=[
             summary_output,
             detailed_results,

 model_cache = {}
+def normalize_delimiter(delim: str) -> str:
+    delim = delim.strip()
+    if delim == "\\t":  # user typed literal \t
+        return "\t"
+    if len(delim) != 1:
+        raise ValueError(f"Delimiter must be a single character, got {repr(delim)}")
+    return delim
+def parse_dataset(text, delimiter: str = "\t"):
     """Parse the input dataset text into structured questions"""
+    delimiter = normalize_delimiter(delimiter)
     def clean_cell(s: str) -> str:
         return s.strip().replace("\r", "").replace("\n", " ").strip('"').strip()
     # Normalize line endings
     text = text.replace("\r\n", "\n").replace("\r", "\n")
     # Use csv.reader to handle quoted multi-line cells
     reader = csv.reader(io.StringIO(text), delimiter=delimiter, quotechar='"')
     return questions, error_msg
 def setup_tokenizer(model_path):
     tokenizer_name = model_path
     if "supertoken" in model_path:
 def run_evaluation(
+    dataset_text,
+    selected_predefined,
+    custom_models_text="",
+    delimiter: str = "\t",
+    progress=gr.Progress(),
 ):
     """Main evaluation function"""
     if not dataset_text.strip():
         )
     # Parse dataset
+    questions, parse_error = parse_dataset(dataset_text, delimiter=delimiter)
     if parse_error:
         return (
 # Sample datasets for quick testing
 SAMPLE_DATASETS = {
     "Custom (enter below)": "",
+    "LP": """In which country is Llanfairpwllgwyngyllgogerychwyrndrobwllllantysiliogogogoch located?	Wales	Germany	France	Scotland
 In which country is Llanfair pwllgwyngyll located?	Wales	Germany	France	Scotland
 In which country is Llanfair PG located?	Wales	Germany	France	Scotland""",
+    "Simple Math": """What is 2+2?  4    3  2    5
+What is 5*3?    15 12  16   18
+What is 10-7?   3 7   4 2
+What is 8/2?    4  3    2  5""",
+    "World Capitals": """What is the capital of France? Paris   London    Berlin Rome
+What is the capital of Japan?   Tokyo Seoul   Beijing   Bangkok
+What is the capital of Brazil?  Brasília Rio de Janeiro  São Paulo    Salvador
+What is the capital of Australia?   Canberra  Sydney   Melbourne Perth""",
+    "Science Quiz": """What is the chemical symbol for gold?,Au,Ag,Ca,K
 Which planet is closest to the Sun?,Mercury,Venus,Earth,Mars
 What is the speed of light?,299792458 m/s,300000000 m/s,2992458 m/s,299000000 m/s
 What gas do plants absorb from the atmosphere?,Carbon dioxide,Oxygen,Nitrogen,Hydrogen""",
 # }
 """
 # Create Gradio interface
 with gr.Blocks(
     title="🤖 Model Performance Comparison", theme=gr.themes.Soft(), css=css
 ) as demo:
+    with gr.Row():
+        with gr.Column(scale=2):
+            gr.Markdown("""
     # 🤖 Model Performance Comparison Tool
     Compare LLM performance on multiple-choice questions using Hugging Face models.
     - Detailed question-by-question results
     - Performance charts and statistics
     """)
+        with gr.Column(scale=1):
+            # with gr.Accordion("Delimiter Options"):
+            gr.Markdown("""
+            Enter the delimiter used in your dataset:
+            """)
+            delimiter_selector = gr.Textbox(
+                label="Delimiter",
+                placeholder="Enter a delimiter, e.g., , or \\t",
+                value="\\t",  # default
+                lines=1,
+            )
     with gr.Row():
         with gr.Column(scale=2):
             # Sample dataset selector
     evaluate_btn.click(
         fn=run_evaluation,
+        inputs=[
+            dataset_input,
+            predefined_selector,
+            custom_models_input,
+            delimiter_selector,
+        ],
         outputs=[
             summary_output,
             detailed_results,

serve_on_killarney.sh CHANGED Viewed

@@ -16,8 +16,8 @@ NODES=1
 NTASKS_PER_NODE=1
 CPUS_PER_TASK=4
 ### request more memory to run on more models
-MEM="16G"
-TIME="02:00:00"
 GRADIO_PORT=7861
 script_location="$APP_DIR/$SCRIPT_NAME"

 NTASKS_PER_NODE=1
 CPUS_PER_TASK=4
 ### request more memory to run on more models
+MEM="64G"
+TIME="06:00:00"
 GRADIO_PORT=7861
 script_location="$APP_DIR/$SCRIPT_NAME"