Spaces:
				
			
			
	
			
			
					
		Running
		
	
	
	
			
			
	
	
	
	
		
		
					
		Running
		
	add truth data viewer
Browse files- app.py +4 -3
- data_utils.py +4 -1
- eval_utils.py +1 -1
    	
        app.py
    CHANGED
    
    | @@ -104,7 +104,8 @@ def sample_explore_item(model_name, size_H, size_W): | |
| 104 | 
             
                puzzle_solved = explore_item['correct_cells'] == explore_item['total_cells']
         | 
| 105 | 
             
                cell_acc = explore_item["correct_cells"] / explore_item["total_cells"] * 100
         | 
| 106 | 
             
                model_eval_md = f"### π Evaluation:\n\n  **Total Cells**: {explore_item['total_cells']} | **Correct Cells**: {explore_item['correct_cells']} | **Puzzle solved**: {puzzle_solved} | **Cell Acc**: {cell_acc:.2f}%"
         | 
| 107 | 
            -
                 | 
|  | |
| 108 |  | 
| 109 |  | 
| 110 | 
             
            def _tab_explore():
         | 
| @@ -124,11 +125,11 @@ def _tab_explore(): | |
| 124 | 
             
                puzzle_md = gr.Markdown("### π¦ Puzzle: \n\nTo be loaded", elem_id="puzzle-md", elem_classes="box_md")
         | 
| 125 | 
             
                model_reasoning_md = gr.Markdown("### π€ Reasoning: \n\nTo be loaded", elem_id="model-reasoning-md", elem_classes="box_md")
         | 
| 126 | 
             
                model_prediction_md = gr.Markdown("### π¬ Answer: \n\nTo be loaded", elem_id="model-prediction-md", elem_classes="box_md")
         | 
|  | |
| 127 | 
             
                model_eval_md = gr.Markdown("### π Evaluation: \n\nTo be loaded", elem_id="model-eval-md", elem_classes="box_md")
         | 
| 128 | 
            -
                
         | 
| 129 | 
             
                explore_button.click(fn=sample_explore_item, 
         | 
| 130 | 
             
                                     inputs=[model_selection, size_H_selection, size_W_selection], 
         | 
| 131 | 
            -
                                     outputs=[puzzle_md, model_reasoning_md, model_prediction_md, model_eval_md])
         | 
| 132 |  | 
| 133 |  | 
| 134 |  | 
|  | |
| 104 | 
             
                puzzle_solved = explore_item['correct_cells'] == explore_item['total_cells']
         | 
| 105 | 
             
                cell_acc = explore_item["correct_cells"] / explore_item["total_cells"] * 100
         | 
| 106 | 
             
                model_eval_md = f"### π Evaluation:\n\n  **Total Cells**: {explore_item['total_cells']} | **Correct Cells**: {explore_item['correct_cells']} | **Puzzle solved**: {puzzle_solved} | **Cell Acc**: {cell_acc:.2f}%"
         | 
| 107 | 
            +
                turht_solution_md = f"### β
 Truth Solution:\n\n{explore_item['truth_solution_table']}"
         | 
| 108 | 
            +
                return puzzle_md, model_reasoning_md, model_prediction_md, model_eval_md, turht_solution_md
         | 
| 109 |  | 
| 110 |  | 
| 111 | 
             
            def _tab_explore():
         | 
|  | |
| 125 | 
             
                puzzle_md = gr.Markdown("### π¦ Puzzle: \n\nTo be loaded", elem_id="puzzle-md", elem_classes="box_md")
         | 
| 126 | 
             
                model_reasoning_md = gr.Markdown("### π€ Reasoning: \n\nTo be loaded", elem_id="model-reasoning-md", elem_classes="box_md")
         | 
| 127 | 
             
                model_prediction_md = gr.Markdown("### π¬ Answer: \n\nTo be loaded", elem_id="model-prediction-md", elem_classes="box_md")
         | 
| 128 | 
            +
                turht_solution_md = gr.Markdown("### β
 Truth Solution: \n\nTo be loaded", elem_id="truth-solution-md", elem_classes="box_md")
         | 
| 129 | 
             
                model_eval_md = gr.Markdown("### π Evaluation: \n\nTo be loaded", elem_id="model-eval-md", elem_classes="box_md")
         | 
|  | |
| 130 | 
             
                explore_button.click(fn=sample_explore_item, 
         | 
| 131 | 
             
                                     inputs=[model_selection, size_H_selection, size_W_selection], 
         | 
| 132 | 
            +
                                     outputs=[puzzle_md, model_reasoning_md, model_prediction_md, model_eval_md, turht_solution_md])
         | 
| 133 |  | 
| 134 |  | 
| 135 |  | 
    	
        data_utils.py
    CHANGED
    
    | @@ -92,6 +92,8 @@ def get_random_item(model_name="random", size_H="random", size_W="random"): | |
| 92 | 
             
                        continue 
         | 
| 93 | 
             
                    if "child" in item["puzzle"].lower() or "mother" in item["puzzle"].lower():
         | 
| 94 | 
             
                        continue
         | 
|  | |
|  | |
| 95 | 
             
                    prediction_reasoning = prediction_json.get("reasoning", "")
         | 
| 96 | 
             
                    prediction_table = prediction_json["solution"]
         | 
| 97 | 
             
                    if prediction_table is not None:
         | 
| @@ -120,10 +122,11 @@ def get_random_item(model_name="random", size_H="random", size_W="random"): | |
| 120 | 
             
                table_md = tabulate(rows, headers=headers, tablefmt="github")
         | 
| 121 | 
             
                explore_item["solution_table_md"] = table_md
         | 
| 122 |  | 
| 123 | 
            -
                this_total_cells, this_correct_cells = eval_each_puzzle(explore_item["id"], prediction_table)
         | 
| 124 | 
             
                # print(table_md)
         | 
| 125 | 
             
                explore_item["correct_cells"] = this_correct_cells
         | 
| 126 | 
             
                explore_item["total_cells"] = this_total_cells
         | 
|  | |
| 127 | 
             
                return explore_item
         | 
| 128 |  | 
| 129 |  | 
|  | |
| 92 | 
             
                        continue 
         | 
| 93 | 
             
                    if "child" in item["puzzle"].lower() or "mother" in item["puzzle"].lower():
         | 
| 94 | 
             
                        continue
         | 
| 95 | 
            +
                    if "loves the spaghetti eater" in item["puzzle"].lower():
         | 
| 96 | 
            +
                        continue 
         | 
| 97 | 
             
                    prediction_reasoning = prediction_json.get("reasoning", "")
         | 
| 98 | 
             
                    prediction_table = prediction_json["solution"]
         | 
| 99 | 
             
                    if prediction_table is not None:
         | 
|  | |
| 122 | 
             
                table_md = tabulate(rows, headers=headers, tablefmt="github")
         | 
| 123 | 
             
                explore_item["solution_table_md"] = table_md
         | 
| 124 |  | 
| 125 | 
            +
                this_total_cells, this_correct_cells, truth_solution_table = eval_each_puzzle(explore_item["id"], prediction_table)
         | 
| 126 | 
             
                # print(table_md)
         | 
| 127 | 
             
                explore_item["correct_cells"] = this_correct_cells
         | 
| 128 | 
             
                explore_item["total_cells"] = this_total_cells
         | 
| 129 | 
            +
                explore_item["truth_solution_table"]  = tabulate(truth_solution_table["rows"], headers=truth_solution_table["header"], tablefmt="github")
         | 
| 130 | 
             
                return explore_item
         | 
| 131 |  | 
| 132 |  | 
    	
        eval_utils.py
    CHANGED
    
    | @@ -83,7 +83,7 @@ def eval_each_puzzle(id, prediction_table): | |
| 83 | 
             
                                predicted_cell = prediction_table[house][column].lower().strip()
         | 
| 84 | 
             
                            if truth_cell == predicted_cell:
         | 
| 85 | 
             
                                this_correct_cells += 1  
         | 
| 86 | 
            -
                return this_total_cells, this_correct_cells
         | 
| 87 |  | 
| 88 | 
             
            def eval_model(model, filepath):
         | 
| 89 | 
             
                global private_solutions
         | 
|  | |
| 83 | 
             
                                predicted_cell = prediction_table[house][column].lower().strip()
         | 
| 84 | 
             
                            if truth_cell == predicted_cell:
         | 
| 85 | 
             
                                this_correct_cells += 1  
         | 
| 86 | 
            +
                return this_total_cells, this_correct_cells, private_solutions[id]
         | 
| 87 |  | 
| 88 | 
             
            def eval_model(model, filepath):
         | 
| 89 | 
             
                global private_solutions
         | 

