Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -201,6 +201,7 @@ def vote(
|
|
| 201 |
gr.update(value=f"*Model: {model_a}*"), # model_name_a
|
| 202 |
gr.update(value=f"*Model: {model_b}*"), # model_name_b
|
| 203 |
gr.update(interactive=True, value="Run the evaluators", variant="primary"), # send_btn
|
|
|
|
| 204 |
]
|
| 205 |
|
| 206 |
|
|
@@ -434,61 +435,59 @@ with gr.Blocks(theme="default", css=CSS_STYLES) as demo:
|
|
| 434 |
|
| 435 |
with gr.Tabs():
|
| 436 |
with gr.TabItem("Judge Arena"):
|
|
|
|
| 437 |
with gr.Row():
|
| 438 |
# Left side - Input section
|
| 439 |
with gr.Column(scale=1):
|
| 440 |
-
random_btn = gr.Button("π²", scale=0)
|
| 441 |
with gr.Group():
|
| 442 |
human_input = gr.TextArea(
|
| 443 |
label="π© Human Input",
|
| 444 |
-
lines=
|
| 445 |
placeholder="Enter the human message here..."
|
| 446 |
)
|
| 447 |
|
| 448 |
ai_response = gr.TextArea(
|
| 449 |
label="π€ AI Response",
|
| 450 |
-
lines=
|
| 451 |
placeholder="Enter the AI response here..."
|
| 452 |
)
|
| 453 |
|
| 454 |
-
|
| 455 |
-
|
| 456 |
-
|
| 457 |
-
|
| 458 |
-
|
| 459 |
-
)
|
| 460 |
|
| 461 |
# Right side - Model outputs
|
| 462 |
with gr.Column(scale=1):
|
| 463 |
-
gr.Markdown("
|
| 464 |
-
gr.Markdown("\n### π©ββοΈ Judge A")
|
| 465 |
with gr.Group():
|
|
|
|
| 466 |
with gr.Row():
|
| 467 |
with gr.Column(scale=1, min_width=100): # Fixed narrow width for score
|
| 468 |
-
score_a = gr.Textbox(label="Score", interactive=False)
|
| 469 |
vote_a = gr.Button("Vote A", variant="primary", visible=False)
|
| 470 |
with gr.Column(scale=9, min_width=400): # Wider width for critique
|
| 471 |
-
critique_a = gr.TextArea(label="Critique", lines=
|
| 472 |
-
model_name_a = gr.Markdown("*Model: Hidden*")
|
| 473 |
|
| 474 |
-
#
|
| 475 |
-
gr.
|
| 476 |
|
| 477 |
-
#
|
| 478 |
with gr.Row(visible=False) as tie_button_row:
|
| 479 |
with gr.Column():
|
| 480 |
vote_tie = gr.Button("Tie", variant="secondary")
|
| 481 |
-
|
| 482 |
|
| 483 |
-
gr.Markdown("###
|
| 484 |
with gr.Group():
|
|
|
|
| 485 |
with gr.Row():
|
| 486 |
with gr.Column(scale=1, min_width=100): # Fixed narrow width for score
|
| 487 |
-
score_b = gr.Textbox(label="Score", interactive=False)
|
| 488 |
vote_b = gr.Button("Vote B", variant="primary", visible=False)
|
| 489 |
with gr.Column(scale=9, min_width=400): # Wider width for critique
|
| 490 |
-
critique_b = gr.TextArea(label="Critique", lines=
|
| 491 |
-
model_name_b = gr.Markdown("*Model: Hidden*")
|
| 492 |
# Place Vote B button directly under Judge B
|
| 493 |
|
| 494 |
gr.Markdown("<br>")
|
|
@@ -636,6 +635,7 @@ with gr.Blocks(theme="default", css=CSS_STYLES) as demo:
|
|
| 636 |
model_name_a,
|
| 637 |
model_name_b,
|
| 638 |
send_btn,
|
|
|
|
| 639 |
],
|
| 640 |
)
|
| 641 |
|
|
@@ -658,6 +658,7 @@ with gr.Blocks(theme="default", css=CSS_STYLES) as demo:
|
|
| 658 |
model_name_a,
|
| 659 |
model_name_b,
|
| 660 |
send_btn,
|
|
|
|
| 661 |
],
|
| 662 |
)
|
| 663 |
|
|
@@ -680,6 +681,7 @@ with gr.Blocks(theme="default", css=CSS_STYLES) as demo:
|
|
| 680 |
model_name_a,
|
| 681 |
model_name_b,
|
| 682 |
send_btn,
|
|
|
|
| 683 |
],
|
| 684 |
)
|
| 685 |
|
|
@@ -703,6 +705,10 @@ with gr.Blocks(theme="default", css=CSS_STYLES) as demo:
|
|
| 703 |
score_a, critique_a = parse_model_response(response_a)
|
| 704 |
score_b, critique_b = parse_model_response(response_b)
|
| 705 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 706 |
# Update the last_submission state with the current values
|
| 707 |
last_submission.value = current_submission
|
| 708 |
|
|
@@ -725,6 +731,7 @@ with gr.Blocks(theme="default", css=CSS_STYLES) as demo:
|
|
| 725 |
variant="secondary",
|
| 726 |
interactive=True
|
| 727 |
),
|
|
|
|
| 728 |
)
|
| 729 |
|
| 730 |
send_btn.click(
|
|
@@ -744,6 +751,7 @@ with gr.Blocks(theme="default", css=CSS_STYLES) as demo:
|
|
| 744 |
model_name_a,
|
| 745 |
model_name_b,
|
| 746 |
send_btn,
|
|
|
|
| 747 |
],
|
| 748 |
)
|
| 749 |
|
|
@@ -825,5 +833,12 @@ with gr.Blocks(theme="default", css=CSS_STYLES) as demo:
|
|
| 825 |
outputs=[send_btn]
|
| 826 |
)
|
| 827 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 828 |
if __name__ == "__main__":
|
| 829 |
demo.launch()
|
|
|
|
| 201 |
gr.update(value=f"*Model: {model_a}*"), # model_name_a
|
| 202 |
gr.update(value=f"*Model: {model_b}*"), # model_name_b
|
| 203 |
gr.update(interactive=True, value="Run the evaluators", variant="primary"), # send_btn
|
| 204 |
+
gr.update(visible=True), # spacing_div
|
| 205 |
]
|
| 206 |
|
| 207 |
|
|
|
|
| 435 |
|
| 436 |
with gr.Tabs():
|
| 437 |
with gr.TabItem("Judge Arena"):
|
| 438 |
+
random_btn = gr.Button("π²", scale=0)
|
| 439 |
with gr.Row():
|
| 440 |
# Left side - Input section
|
| 441 |
with gr.Column(scale=1):
|
|
|
|
| 442 |
with gr.Group():
|
| 443 |
human_input = gr.TextArea(
|
| 444 |
label="π© Human Input",
|
| 445 |
+
lines=12,
|
| 446 |
placeholder="Enter the human message here..."
|
| 447 |
)
|
| 448 |
|
| 449 |
ai_response = gr.TextArea(
|
| 450 |
label="π€ AI Response",
|
| 451 |
+
lines=12,
|
| 452 |
placeholder="Enter the AI response here..."
|
| 453 |
)
|
| 454 |
|
| 455 |
+
send_btn = gr.Button(
|
| 456 |
+
value="Run the evaluators",
|
| 457 |
+
variant="primary",
|
| 458 |
+
size="lg"
|
| 459 |
+
)
|
|
|
|
| 460 |
|
| 461 |
# Right side - Model outputs
|
| 462 |
with gr.Column(scale=1):
|
| 463 |
+
gr.Markdown("### π©ββοΈ Judge A")
|
|
|
|
| 464 |
with gr.Group():
|
| 465 |
+
model_name_a = gr.Markdown("*Model: Hidden*")
|
| 466 |
with gr.Row():
|
| 467 |
with gr.Column(scale=1, min_width=100): # Fixed narrow width for score
|
| 468 |
+
score_a = gr.Textbox(label="Score", lines=5, interactive=False)
|
| 469 |
vote_a = gr.Button("Vote A", variant="primary", visible=False)
|
| 470 |
with gr.Column(scale=9, min_width=400): # Wider width for critique
|
| 471 |
+
critique_a = gr.TextArea(label="Critique", lines=7, interactive=False)
|
|
|
|
| 472 |
|
| 473 |
+
# Spacing div that's visible only when tie button is hidden
|
| 474 |
+
spacing_div = gr.HTML('<div style="height: 42px;"></div>', visible=True, elem_id="spacing-div")
|
| 475 |
|
| 476 |
+
# Tie button row
|
| 477 |
with gr.Row(visible=False) as tie_button_row:
|
| 478 |
with gr.Column():
|
| 479 |
vote_tie = gr.Button("Tie", variant="secondary")
|
| 480 |
+
|
| 481 |
|
| 482 |
+
gr.Markdown("### π§ββοΈ Judge B")
|
| 483 |
with gr.Group():
|
| 484 |
+
model_name_b = gr.Markdown("*Model: Hidden*")
|
| 485 |
with gr.Row():
|
| 486 |
with gr.Column(scale=1, min_width=100): # Fixed narrow width for score
|
| 487 |
+
score_b = gr.Textbox(label="Score", lines=5, interactive=False)
|
| 488 |
vote_b = gr.Button("Vote B", variant="primary", visible=False)
|
| 489 |
with gr.Column(scale=9, min_width=400): # Wider width for critique
|
| 490 |
+
critique_b = gr.TextArea(label="Critique", lines=7, interactive=False)
|
|
|
|
| 491 |
# Place Vote B button directly under Judge B
|
| 492 |
|
| 493 |
gr.Markdown("<br>")
|
|
|
|
| 635 |
model_name_a,
|
| 636 |
model_name_b,
|
| 637 |
send_btn,
|
| 638 |
+
spacing_div,
|
| 639 |
],
|
| 640 |
)
|
| 641 |
|
|
|
|
| 658 |
model_name_a,
|
| 659 |
model_name_b,
|
| 660 |
send_btn,
|
| 661 |
+
spacing_div,
|
| 662 |
],
|
| 663 |
)
|
| 664 |
|
|
|
|
| 681 |
model_name_a,
|
| 682 |
model_name_b,
|
| 683 |
send_btn,
|
| 684 |
+
spacing_div,
|
| 685 |
],
|
| 686 |
)
|
| 687 |
|
|
|
|
| 705 |
score_a, critique_a = parse_model_response(response_a)
|
| 706 |
score_b, critique_b = parse_model_response(response_b)
|
| 707 |
|
| 708 |
+
# Format scores with "/ 5"
|
| 709 |
+
score_a = f"{score_a} / 5"
|
| 710 |
+
score_b = f"{score_b} / 5"
|
| 711 |
+
|
| 712 |
# Update the last_submission state with the current values
|
| 713 |
last_submission.value = current_submission
|
| 714 |
|
|
|
|
| 731 |
variant="secondary",
|
| 732 |
interactive=True
|
| 733 |
),
|
| 734 |
+
gr.update(visible=False), # spacing_div
|
| 735 |
)
|
| 736 |
|
| 737 |
send_btn.click(
|
|
|
|
| 751 |
model_name_a,
|
| 752 |
model_name_b,
|
| 753 |
send_btn,
|
| 754 |
+
spacing_div,
|
| 755 |
],
|
| 756 |
)
|
| 757 |
|
|
|
|
| 833 |
outputs=[send_btn]
|
| 834 |
)
|
| 835 |
|
| 836 |
+
# Update the demo.load to include the random example population
|
| 837 |
+
demo.load(
|
| 838 |
+
fn=populate_random_example,
|
| 839 |
+
inputs=[],
|
| 840 |
+
outputs=[human_input, ai_response]
|
| 841 |
+
)
|
| 842 |
+
|
| 843 |
if __name__ == "__main__":
|
| 844 |
demo.launch()
|