Spaces:
				
			
			
	
			
			
		Running
		
			on 
			
			Zero
	
	
	
			
			
	
	
	
	
		
		
		Running
		
			on 
			
			Zero
	| import gradio as gr | |
| import random | |
| import pandas as pd | |
| import os | |
| import threading | |
| import time | |
| import numpy as np | |
| from utils.data_loader import get_random_example | |
| from utils.models import generate_summaries, model_names | |
| from utils.ui_helpers import toggle_context_display, update_feedback, get_context_html, toggle_reference_answer | |
| from utils.leaderboard import load_leaderboard_data, submit_vote_with_elo, generate_leaderboard_html | |
| from utils.vote_logger import save_vote_details | |
| from utils.shared import generation_interrupt | |
| feedback_options = { | |
| "left": [ | |
| "Model A: Answers the question completely", | |
| "Model A: Information is accurate and correct", | |
| "Model A: Stays on topic and relevant", | |
| "Model A: Clear and well-written response", | |
| "Model A: Appropriately says 'I don't know' without enough info", | |
| "Model A: Asks helpful follow-up questions when unclear" | |
| ], | |
| "right": [ | |
| "Model B: Answers the question completely", | |
| "Model B: Information is accurate and correct", | |
| "Model B: Stays on topic and relevant", | |
| "Model B: Clear and well-written response", | |
| "Model B: Appropriately says 'I don't know' without enough info", | |
| "Model B: Asks helpful follow-up questions when unclear" | |
| ], | |
| "tie": [ | |
| "Model A: Answers the question completely", | |
| "Model A: Information is accurate and correct", | |
| "Model A: Stays on topic and relevant", | |
| "Model A: Clear and well-written response", | |
| "Model A: Appropriately says 'I don't know' without enough info", | |
| "Model A: Asks helpful follow-up questions when unclear", | |
| "Model B: Answers the question completely", | |
| "Model B: Information is accurate and correct", | |
| "Model B: Stays on topic and relevant", | |
| "Model B: Clear and well-written response", | |
| "Model B: Appropriately says 'I don't know' without enough info", | |
| "Model B: Asks helpful follow-up questions when unclear" | |
| ], | |
| "neither": [ | |
| "Model A: Incomplete or missing key information", | |
| "Model A: Contains incorrect or made-up information", | |
| "Model A: Goes off-topic or irrelevant", | |
| "Model A: Poorly written or confusing", | |
| "Model A: Should have admitted uncertainty without enough info", | |
| "Model A: Should have asked clarifying questions but didn't", | |
| "Model B: Incomplete or missing key information", | |
| "Model B: Contains incorrect or made-up information", | |
| "Model B: Goes off-topic or irrelevant", | |
| "Model B: Poorly written or confusing", | |
| "Model B: Should have admitted uncertainty without enough info", | |
| "Model B: Should have asked clarifying questions but didn't" | |
| ] | |
| } | |
| def weighted_sample_without_replacement(population, weights, k=2): | |
| """ | |
| Performs a weighted random sampling without replacement. | |
| Args: | |
| population: The list of items to sample from | |
| weights: The weight for each item | |
| k: Number of items to sample | |
| Returns: | |
| A list of k sampled items | |
| """ | |
| if len(population) <= k: | |
| return population | |
| # Convert weights to numpy array for efficient operations | |
| weights = np.array(weights) | |
| # Create a copy of the population and weights | |
| remaining_population = population.copy() | |
| remaining_weights = weights.copy() | |
| selected = [] | |
| for _ in range(k): | |
| # Normalize weights so they sum to 1 | |
| normalized_weights = remaining_weights / remaining_weights.sum() | |
| # Randomly select one item based on weights | |
| selected_idx = np.random.choice(len(remaining_population), p=normalized_weights) | |
| # Add the selected item to our result | |
| selected.append(remaining_population[selected_idx]) | |
| # Remove the selected item from the pool | |
| remaining_population.pop(selected_idx) | |
| remaining_weights = np.delete(remaining_weights, selected_idx) | |
| return selected | |
| def load_context(set_interrupt=False): | |
| if set_interrupt: | |
| generation_interrupt.set() | |
| time.sleep(0.2) | |
| generation_interrupt.clear() | |
| example = get_random_example() | |
| context_desc = example.get('processed_context_desc', '') | |
| if context_desc: | |
| context_desc = f"<div class='context-topic'><span class='topic-label'>The question and context are about:</span> {context_desc}</div>" | |
| show_full = False | |
| context_html = get_context_html(example, show_full=show_full) | |
| return [ | |
| example, | |
| gr.update(value=example['question'], elem_classes="query-text"), # Regular query styles | |
| gr.update(value=context_desc, visible=bool(context_desc)), | |
| gr.update(value=context_html), | |
| gr.update(value="Show Full Context", elem_classes=["context-toggle-button"], visible=True), # Ensure toggle is visible | |
| show_full | |
| ] | |
| def toggle_faq(expanded): | |
| """Toggle FAQ visibility with proper arrow icons""" | |
| new_state = not expanded | |
| button_text = "▼ Why can't I upload a file or ask my own question?" if new_state else "▶ Why can't I upload a file or ask my own question?" | |
| return new_state, gr.update(visible=new_state), gr.update(value=button_text) | |
| # Explicit function to hide the FAQ section completely | |
| def hide_faq_section(): | |
| """Completely hide the FAQ section and its content""" | |
| return gr.update(visible=False), gr.update(visible=False) | |
| def load_leaderboard(): | |
| results = load_leaderboard_data() | |
| leaderboard_html = generate_leaderboard_html(results) | |
| return leaderboard_html | |
| def generate_model_summaries(example): | |
| result = { | |
| "model_a": "", | |
| "model_b": "", | |
| "summary_a": "", | |
| "summary_b": "", | |
| "completed": False | |
| } | |
| if generation_interrupt.is_set(): | |
| return result | |
| try: | |
| # Get current leaderboard data to determine model usage counts | |
| leaderboard_data = load_leaderboard_data() | |
| # Calculate weights using inverse weighting | |
| # Weight = K / (games_played + C) | |
| K = 100 # Scaling factor | |
| C = 5 # Smoothing constant | |
| weights = [] | |
| model_list = [] | |
| for model in model_names: | |
| # Get games played for the model, default to 0 if not found | |
| games_played = leaderboard_data["games_played"].get(model, 0) | |
| # Calculate weight using inverse formula | |
| weight = K / (games_played + C) | |
| weights.append(weight) | |
| model_list.append(model) | |
| # Select two models using weighted sampling without replacement | |
| selected_models = weighted_sample_without_replacement(model_list, weights, k=2) | |
| m_a_name, m_b_name = selected_models | |
| result["model_a"] = m_a_name | |
| result["model_b"] = m_b_name | |
| s_a, s_b = generate_summaries(example, m_a_name, m_b_name) | |
| if not generation_interrupt.is_set(): | |
| result["summary_a"] = s_a | |
| result["summary_b"] = s_b | |
| result["completed"] = bool(s_a and s_b) | |
| except Exception as e: | |
| print(f"Error in generation: {e}") | |
| return result | |
| def process_generation_result(result): | |
| if not result["completed"] or not result["summary_a"] or not result["summary_b"]: | |
| return [ | |
| result.get("model_a", ""), | |
| result.get("model_b", ""), | |
| result.get("summary_a", ""), | |
| result.get("summary_b", ""), | |
| None, [], False, load_leaderboard_data(), | |
| gr.update(value=result.get("summary_a", "Generation was interrupted or failed.")), | |
| gr.update(value=result.get("summary_b", "Generation was interrupted or failed.")), | |
| gr.update(interactive=False, elem_classes=["vote-button"]), | |
| gr.update(interactive=False, elem_classes=["vote-button"]), | |
| gr.update(interactive=False, elem_classes=["vote-button"]), | |
| gr.update(interactive=False, elem_classes=["vote-button", "vote-button-neither"]), | |
| gr.update(choices=[], value=[], interactive=False, visible=False), | |
| gr.update(visible=False), | |
| gr.update(interactive=False, visible=True), | |
| gr.update(visible=False), | |
| gr.update(interactive=True), | |
| gr.update(elem_classes=[]) | |
| ] | |
| buttons_interactive = bool(result["summary_a"] and result["summary_b"]) | |
| agg_results = load_leaderboard_data() | |
| return [ | |
| result["model_a"], result["model_b"], | |
| result["summary_a"], result["summary_b"], | |
| None, [], False, agg_results, | |
| gr.update(value=result["summary_a"]), | |
| gr.update(value=result["summary_b"]), | |
| gr.update(interactive=buttons_interactive, elem_classes=["vote-button"]), | |
| gr.update(interactive=buttons_interactive, elem_classes=["vote-button"]), | |
| gr.update(interactive=buttons_interactive, elem_classes=["vote-button"]), | |
| gr.update(interactive=buttons_interactive, elem_classes=["vote-button", "vote-button-neither"]), | |
| gr.update(choices=[], value=[], interactive=False, visible=False), | |
| gr.update(visible=False), | |
| gr.update(interactive=False, visible=True), | |
| gr.update(visible=False), | |
| gr.update(interactive=True), | |
| gr.update(elem_classes=[]) | |
| ] | |
| def process_example(example): | |
| result = generate_model_summaries(example) | |
| return process_generation_result(result) | |
| def select_vote_improved(winner_choice): | |
| feedback_choices = feedback_options.get(winner_choice, []) | |
| btn_a_classes = ["vote-button"] | |
| btn_b_classes = ["vote-button"] | |
| btn_tie_classes = ["vote-button"] | |
| btn_neither_classes = ["vote-button", "vote-button-neither"] | |
| if winner_choice == 'left': | |
| btn_a_classes.append("selected") | |
| elif winner_choice == 'right': | |
| btn_b_classes.append("selected") | |
| elif winner_choice == 'tie': | |
| btn_tie_classes.append("selected") | |
| elif winner_choice == 'neither': | |
| btn_neither_classes.append("selected") | |
| return [ | |
| winner_choice, | |
| gr.update(choices=feedback_choices, value=[], interactive=True, visible=True), | |
| gr.update(visible=True), | |
| gr.update(interactive=True), | |
| gr.update(elem_classes=btn_a_classes), | |
| gr.update(elem_classes=btn_b_classes), | |
| gr.update(elem_classes=btn_tie_classes), | |
| gr.update(elem_classes=btn_neither_classes) | |
| ] | |
| def handle_vote_submission(example, m_a, m_b, winner, feedback, summary_a, summary_b, current_results): | |
| if winner is None: | |
| print("Warning: Submit called without a winner selected.") | |
| return {} | |
| save_vote_details(example, m_a, m_b, winner, feedback, summary_a, summary_b) | |
| return submit_vote_with_elo(m_a, m_b, winner, feedback, current_results) | |
| def show_loading_state(): | |
| """Show loading state while fetching new content and reset UI elements""" | |
| return [ | |
| gr.update(value="Loading new question and summaries...", interactive=False), | |
| gr.update(value="Loading new question and summaries...", interactive=False), | |
| gr.update(interactive=False, elem_classes=["vote-button"]), # Reset styling | |
| gr.update(interactive=False, elem_classes=["vote-button"]), | |
| gr.update(interactive=False, elem_classes=["vote-button"]), | |
| gr.update(interactive=False, elem_classes=["vote-button", "vote-button-neither"]), | |
| gr.update(visible=False), # feedback_section | |
| gr.update(interactive=False), # submit_button | |
| gr.update(visible=False), # results_reveal_area | |
| gr.update(interactive=True), # random_question_btn - KEEP ACTIVE during inference | |
| None # Reset selected_winner | |
| ] | |
| def handle_new_example_click(): | |
| return load_context(set_interrupt=True)[0] | |
| def update_ui_for_new_context(example): | |
| context_desc = example.get('processed_context_desc', '') | |
| if context_desc: | |
| context_desc = f"<div class='context-topic'><span class='topic-label'>The question and context are about:</span> {context_desc}</div>" | |
| return [ | |
| gr.update(value=example['question'], elem_classes="query-text"), # Regular query styles | |
| gr.update(value=context_desc, visible=bool(context_desc)), | |
| gr.update(value=get_context_html(example, False)), | |
| gr.update(value="Show Full Context", elem_classes=["context-toggle-button"], visible=True), # Ensure toggle is visible | |
| False | |
| ] | |
| def reset_reference_section(): | |
| """Reset reference answer section to hidden state when loading new question""" | |
| return [ | |
| False, # Reset show_reference_answer state to False | |
| gr.update(visible=False), # Hide reference content (like FAQ) | |
| gr.update(value="▶ Show Reference Answer"), # Reset button text (like FAQ) | |
| gr.update(value="") # Clear reference content | |
| ] | |
| def cleanup_on_disconnect(): | |
| print(f"Browser disconnected. Cleaning up resources...") | |
| generation_interrupt.set() | |
| # Helper functions for showing/hiding UI elements | |
| def initialize_empty_app(): | |
| return [ | |
| gr.update(visible=False), # context_section | |
| gr.update(visible=False), # model_section | |
| gr.update(visible=False), # voting_section | |
| gr.update(visible=False) # submit_button | |
| ] | |
| def show_all_after_loading(): | |
| return [ | |
| gr.update(visible=True), # context_section | |
| gr.update(visible=True), # model_section | |
| gr.update(visible=True), # voting_section | |
| gr.update(visible=True), # submit_button | |
| gr.update(value="🔄 Try a New Question", elem_classes=["query-button"]) # update button text | |
| ] | |
| with gr.Blocks(theme=gr.themes.Default( | |
| primary_hue=gr.themes.colors.orange, | |
| secondary_hue=gr.themes.colors.slate | |
| )) as demo: | |
| css_path = os.path.join(os.getcwd(), 'static', 'styles.css') | |
| with open(css_path, 'r') as f: | |
| css_content = f.read() | |
| gr.HTML(f"<style>{css_content}</style>") | |
| unload_js = """ | |
| <script> | |
| window.addEventListener('beforeunload', function(e) { | |
| navigator.sendBeacon('/cleanup?session_id=' + window.gradioClientState.session_hash); | |
| }); | |
| </script> | |
| """ | |
| gr.HTML(unload_js) | |
| # State variables | |
| current_example = gr.State({}) | |
| model_a_name = gr.State("") | |
| model_b_name = gr.State("") | |
| summary_a_text = gr.State("") | |
| summary_b_text = gr.State("") | |
| selected_winner = gr.State(None) | |
| feedback_list = gr.State([]) | |
| show_results_state = gr.State(False) | |
| results_agg = gr.State(load_leaderboard_data()) | |
| show_full_context = gr.State(False) | |
| show_reference_answer = gr.State(False) # NEW: State for reference answer toggle | |
| faq_expanded = gr.State(False) # State for FAQ toggle | |
| with gr.Tabs() as tabs: | |
| with gr.TabItem("Arena", id="arena-tab"): | |
| gr.Markdown("# SLM RAG Arena - Compare and Find The Best Sub-5B Models for RAG") | |
| gr.Markdown(""" | |
| 🏟️ This arena evaluates how well small language models (under 5B) answer questions based on document contexts. | |
| 📝 Instructions: | |
| - **Click the "Get a Question" button** to load a random question with context | |
| - **Review the query and context** to understand the information provided to the models | |
| - **Compare answers** generated by two different models on answer quality or appropriate refusal | |
| - **Cast your vote** for the better response, or select 'Tie' if equally good or 'Neither' if both are inadequate | |
| """) | |
| gr.Markdown("---") | |
| with gr.Column(elem_id="main-interface-area") as main_interface_area: | |
| with gr.Row(elem_id="query-title-row"): | |
| gr.Markdown("### 💬 Query - Question About Document Content", elem_classes="section-heading") | |
| with gr.Row(elem_id="query-container"): | |
| with gr.Row(elem_classes="query-box-row"): | |
| query_display = gr.Markdown(value="Click \"Get a Question\" to start", elem_classes=["query-text", "empty-query"], elem_id="query-section") | |
| random_question_btn = gr.Button("💡 Get a Question", elem_classes=["query-button", "initial-button"]) | |
| # Add the FAQ toggle and content here | |
| with gr.Row(visible=True, elem_id="faq-container") as faq_container: | |
| faq_toggle_btn = gr.Button("▶ Why can't I upload a file or ask my own question?", elem_classes=["faq-toggle-button"]) | |
| # FAQ Content - initially hidden | |
| with gr.Row(visible=False, elem_id="faq-content") as faq_content: | |
| gr.Markdown(""" | |
| This arena tests how well different AI models summarize information using standardized questions and contexts. All models see the exact same inputs for fair comparison. | |
| We don't allow file uploads here as that would change what we're measuring. Instead, check our leaderboard to find top-performing models for your needs. We'll soon launch a separate playground where you can test models with your own files. | |
| """, elem_classes="faq-text") | |
| context_description = gr.Markdown("", elem_classes="context-description") | |
| # Create a section container for all context-related elements - INITIALLY HIDDEN | |
| with gr.Column(visible=False, elem_id="context-section") as context_section: | |
| context_divider = gr.HTML("<hr>", elem_id="context-divider") | |
| with gr.Row(elem_id="context-header-row"): | |
| gr.Markdown("### 📋 Context - Retrieved Content from the Document", elem_classes="context-title") | |
| context_toggle_btn = gr.Button("Show Full Context", elem_classes=["context-toggle-button"]) | |
| context_display = gr.HTML(value="", label="Context Chunks") | |
| # Model comparison section - initially hidden | |
| with gr.Column(visible=False, elem_id="model-section") as model_section: | |
| gr.Markdown("---") | |
| # NEW: Model comparison header (simple) | |
| gr.Markdown("### 🔍 Compare Models - Are these Grounded, Complete Answers or Correct Rejections?", elem_classes="section-heading") | |
| with gr.Row(elem_id="summary-containers"): | |
| with gr.Column(scale=1): | |
| with gr.Group(elem_classes=["summary-card", "summary-card-a"]): | |
| summary_a_display = gr.Textbox( | |
| label="Model A", | |
| lines=10, | |
| interactive=False, | |
| show_copy_button=True, | |
| autoscroll=False, | |
| elem_id="summary-a-display" | |
| ) | |
| with gr.Column(scale=1): | |
| with gr.Group(elem_classes=["summary-card", "summary-card-b"]): | |
| summary_b_display = gr.Textbox( | |
| label="Model B", | |
| lines=10, | |
| interactive=False, | |
| show_copy_button=True, | |
| autoscroll=False, | |
| elem_id="summary-b-display" | |
| ) | |
| # NEW: Reference Answer Toggle (exactly like FAQ style) | |
| with gr.Row(elem_id="reference-toggle-row"): | |
| reference_toggle_btn = gr.Button("▶ Show Reference Answer", elem_classes=["faq-toggle-button"]) | |
| # Reference Answer Content - initially hidden (exactly like FAQ) | |
| with gr.Row(visible=False, elem_id="reference-content") as reference_content: | |
| reference_answer_display = gr.Markdown("", elem_classes="faq-text") | |
| # Voting section - initially hidden | |
| with gr.Column(visible=False, elem_id="voting-section") as voting_section: | |
| gr.HTML("<hr>") | |
| gr.Markdown("### 🏅 Cast Your Vote", elem_classes="section-heading") | |
| with gr.Row(): | |
| vote_button_a = gr.Button("⬅️ Summary A is Better", elem_classes=["vote-button"], interactive=False) | |
| vote_button_tie = gr.Button("🤝 Tie / Equally Good", elem_classes=["vote-button"], interactive=False) | |
| vote_button_b = gr.Button("➡️ Summary B is Better", elem_classes=["vote-button"], interactive=False) | |
| vote_button_neither = gr.Button("❌ Neither is Good", elem_classes=["vote-button", "vote-button-neither"], interactive=False) | |
| with gr.Group(elem_classes=["feedback-section"], visible=False) as feedback_section: | |
| feedback_checkboxes = gr.CheckboxGroup(label="Feedback (optional)", choices=[], interactive=False) | |
| # Submit button - initially hidden | |
| submit_button = gr.Button("Submit Your Vote", variant="primary", interactive=False, elem_id="submit-button", visible=False) | |
| with gr.Column(visible=False) as results_reveal_area: | |
| gr.Markdown("---") | |
| gr.Markdown("### ✅ Vote Submitted!", elem_classes="section-heading") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| gr.Markdown("### Model A was:", elem_classes="section-heading") | |
| model_a_reveal = gr.Markdown("", elem_classes="model-reveal model-a-reveal") | |
| with gr.Column(scale=1): | |
| gr.Markdown("### Model B was:", elem_classes="section-heading") | |
| model_b_reveal = gr.Markdown("", elem_classes="model-reveal model-b-reveal") | |
| gr.HTML("<hr>") | |
| with gr.Row(elem_classes=["control-buttons"]): | |
| try_another_btn = gr.Button("🔄 Try Another Question", elem_id="try-another-btn") | |
| with gr.TabItem("Leaderboard", id="leaderboard-tab"): | |
| gr.Markdown("# SLM RAG Leaderboard", elem_classes="orange-title") | |
| gr.HTML('View performance statistics for all models ranked by Elo rating. <br><br><a href="https://docs.google.com/forms/d/e/1FAIpQLSeUZoy43MlpK8-tJS4a6n5Q8PAKf-8Twdui5ybU18t0e2UuVA/viewform" class="form-link" target="_blank" rel="noopener noreferrer">Submit a new model request</a>') | |
| with gr.Group(elem_id="leaderboard-info"): | |
| gr.Markdown("""### About Elo Ratings | |
| The Elo rating system provides a more accurate ranking than simple win rates: | |
| - All models start at 1500 points | |
| - Points are exchanged after each comparison based on the expected outcome | |
| - Beating a stronger model earns more points than beating a weaker one | |
| - The ± value shows the statistical confidence interval (95%) | |
| """) | |
| results_table_display = gr.HTML(label="Model Performance") | |
| # FAQ toggle functionality with icon change | |
| faq_toggle_btn.click( | |
| fn=toggle_faq, | |
| inputs=[faq_expanded], | |
| outputs=[faq_expanded, faq_content, faq_toggle_btn] | |
| ) | |
| # Context toggle functionality | |
| context_toggle_btn.click( | |
| fn=toggle_context_display, | |
| inputs=[current_example, show_full_context], | |
| outputs=[show_full_context, context_display, context_toggle_btn] | |
| ) | |
| # NEW: Reference answer toggle functionality (exactly like FAQ) | |
| reference_toggle_btn.click( | |
| fn=toggle_reference_answer, | |
| inputs=[show_reference_answer, current_example], | |
| outputs=[show_reference_answer, reference_content, reference_toggle_btn, reference_answer_display] | |
| ) | |
| # Initialize UI to empty state on load | |
| demo.load( | |
| fn=initialize_empty_app, | |
| inputs=[], | |
| outputs=[ | |
| context_section, | |
| model_section, | |
| voting_section, | |
| submit_button | |
| ] | |
| ) | |
| # Load leaderboard on start | |
| demo.load( | |
| fn=load_leaderboard, | |
| inputs=[], | |
| outputs=[results_table_display] | |
| ) | |
| # Getting a new question | |
| random_question_btn.click( | |
| fn=show_loading_state, | |
| inputs=[], | |
| outputs=[ | |
| summary_a_display, summary_b_display, | |
| vote_button_a, vote_button_b, vote_button_tie, vote_button_neither, | |
| feedback_section, submit_button, results_reveal_area, random_question_btn, | |
| selected_winner | |
| ] | |
| ).then( | |
| fn=handle_new_example_click, | |
| inputs=[], | |
| outputs=[current_example] | |
| ).then( | |
| fn=update_ui_for_new_context, | |
| inputs=[current_example], | |
| outputs=[query_display, context_description, context_display, | |
| context_toggle_btn, show_full_context] | |
| ).then( | |
| # NEW: Reset reference section when loading new question | |
| fn=reset_reference_section, | |
| inputs=[], | |
| outputs=[show_reference_answer, reference_content, reference_toggle_btn, reference_answer_display] | |
| ).then( | |
| # IMPORTANT: Explicitly hide FAQ here | |
| fn=hide_faq_section, | |
| inputs=[], | |
| outputs=[faq_container, faq_content] | |
| ).then( | |
| fn=show_all_after_loading, | |
| inputs=[], | |
| outputs=[ | |
| context_section, | |
| model_section, | |
| voting_section, | |
| submit_button, | |
| random_question_btn | |
| ] | |
| ).then( | |
| fn=process_example, | |
| inputs=[current_example], | |
| outputs=[model_a_name, model_b_name, summary_a_text, summary_b_text, | |
| selected_winner, feedback_list, show_results_state, results_agg, | |
| summary_a_display, summary_b_display, vote_button_a, vote_button_b, | |
| vote_button_tie, vote_button_neither, feedback_checkboxes, feedback_section, | |
| submit_button, results_reveal_area, random_question_btn, main_interface_area] | |
| ) | |
| # Try another question | |
| try_another_btn.click( | |
| fn=show_loading_state, | |
| inputs=[], | |
| outputs=[ | |
| summary_a_display, summary_b_display, | |
| vote_button_a, vote_button_b, vote_button_tie, vote_button_neither, | |
| feedback_section, submit_button, results_reveal_area, random_question_btn, | |
| selected_winner | |
| ] | |
| ).then( | |
| fn=handle_new_example_click, | |
| inputs=[], | |
| outputs=[current_example] | |
| ).then( | |
| fn=update_ui_for_new_context, | |
| inputs=[current_example], | |
| outputs=[query_display, context_description, context_display, | |
| context_toggle_btn, show_full_context] | |
| ).then( | |
| # NEW: Reset reference section when trying another question | |
| fn=reset_reference_section, | |
| inputs=[], | |
| outputs=[show_reference_answer, reference_content, reference_toggle_btn, reference_answer_display] | |
| ).then( | |
| # IMPORTANT: Explicitly hide FAQ here too | |
| fn=hide_faq_section, | |
| inputs=[], | |
| outputs=[faq_container, faq_content] | |
| ).then( | |
| fn=show_all_after_loading, | |
| inputs=[], | |
| outputs=[ | |
| context_section, | |
| model_section, | |
| voting_section, | |
| submit_button, | |
| random_question_btn | |
| ] | |
| ).then( | |
| fn=process_example, | |
| inputs=[current_example], | |
| outputs=[model_a_name, model_b_name, summary_a_text, summary_b_text, | |
| selected_winner, feedback_list, show_results_state, results_agg, | |
| summary_a_display, summary_b_display, vote_button_a, vote_button_b, | |
| vote_button_tie, vote_button_neither, feedback_checkboxes, feedback_section, | |
| submit_button, results_reveal_area, random_question_btn, main_interface_area] | |
| ) | |
| # Vote button handling | |
| for btn, choice in zip( | |
| [vote_button_a, vote_button_b, vote_button_tie, vote_button_neither], | |
| ['left', 'right', 'tie', 'neither'] | |
| ): | |
| btn.click( | |
| fn=lambda choice=choice: select_vote_improved(choice), | |
| inputs=None, | |
| outputs=[selected_winner, feedback_checkboxes, feedback_section, submit_button, | |
| vote_button_a, vote_button_b, vote_button_tie, vote_button_neither] | |
| ) | |
| feedback_checkboxes.change( | |
| fn=update_feedback, | |
| inputs=[feedback_checkboxes], | |
| outputs=[feedback_list] | |
| ) | |
| submit_button.click( | |
| fn=handle_vote_submission, | |
| inputs=[current_example, model_a_name, model_b_name, selected_winner, feedback_list, summary_a_text, summary_b_text, results_agg], | |
| outputs=[show_results_state, results_agg, vote_button_a, vote_button_b, | |
| vote_button_tie, vote_button_neither, feedback_checkboxes, | |
| feedback_section, submit_button, results_reveal_area, | |
| random_question_btn, results_table_display, main_interface_area, | |
| context_toggle_btn, model_a_reveal, model_b_reveal] | |
| ) | |
| tabs.select( | |
| fn=load_leaderboard, | |
| inputs=[], | |
| outputs=[results_table_display], | |
| api_name="refresh_leaderboard" | |
| ) | |
| demo.unload(cleanup_on_disconnect) | |
| if __name__ == "__main__": | |
| demo.launch(debug=True) | 
 
			
