Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -37,6 +37,84 @@ def sort_categories(categories):
|
|
| 37 |
# ))
|
| 38 |
# return templates
|
| 39 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
def get_modality_icon(modality):
|
| 41 |
"""Return an emoji icon for each modality type."""
|
| 42 |
icons = {
|
|
@@ -220,6 +298,9 @@ def update_detailed_scorecard(model, selected_categories):
|
|
| 220 |
if category_name in models[model]['scores']:
|
| 221 |
category_data = models[model]['scores'][category_name]
|
| 222 |
card_content = f"<div class='card'><div class='card-title'>{category_name}</div>"
|
|
|
|
|
|
|
|
|
|
| 223 |
|
| 224 |
# Sort sections within each category
|
| 225 |
sorted_sections = sorted(category_data.items(),
|
|
@@ -595,6 +676,153 @@ css = """
|
|
| 595 |
border-color: #2c3e50;
|
| 596 |
color: #99ccff;
|
| 597 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 598 |
"""
|
| 599 |
|
| 600 |
first_model = next(iter(models.values()))
|
|
|
|
| 37 |
# ))
|
| 38 |
# return templates
|
| 39 |
|
| 40 |
+
def create_category_summary(category_data):
|
| 41 |
+
"""Create a summary section for a category"""
|
| 42 |
+
# Calculate statistics
|
| 43 |
+
total_sections = len(category_data)
|
| 44 |
+
completed_sections = sum(1 for section in category_data.values() if section['status'] == 'Yes')
|
| 45 |
+
na_sections = sum(1 for section in category_data.values() if section['status'] == 'N/A')
|
| 46 |
+
|
| 47 |
+
# Calculate completion rates
|
| 48 |
+
total_questions = 0
|
| 49 |
+
completed_questions = 0
|
| 50 |
+
evaluation_types = set()
|
| 51 |
+
has_human_eval = False
|
| 52 |
+
has_quantitative = False
|
| 53 |
+
has_documentation = False
|
| 54 |
+
|
| 55 |
+
for section in category_data.values():
|
| 56 |
+
if section['status'] != 'N/A':
|
| 57 |
+
questions = section.get('questions', {})
|
| 58 |
+
total_questions += len(questions)
|
| 59 |
+
completed_questions += sum(1 for q in questions.values() if q)
|
| 60 |
+
|
| 61 |
+
# Check for evaluation types
|
| 62 |
+
for question in questions.keys():
|
| 63 |
+
if 'human' in question.lower():
|
| 64 |
+
has_human_eval = True
|
| 65 |
+
if any(term in question.lower() for term in ['quantitative', 'metric', 'benchmark']):
|
| 66 |
+
has_quantitative = True
|
| 67 |
+
if 'documentation' in question.lower():
|
| 68 |
+
has_documentation = True
|
| 69 |
+
|
| 70 |
+
completion_rate = (completed_questions / total_questions * 100) if total_questions > 0 else 0
|
| 71 |
+
|
| 72 |
+
# Create summary HTML
|
| 73 |
+
html = "<div class='summary-card'>"
|
| 74 |
+
html += "<div class='summary-title'>π Section Summary</div>"
|
| 75 |
+
|
| 76 |
+
# Completion metrics
|
| 77 |
+
html += "<div class='summary-section'>"
|
| 78 |
+
html += "<div class='summary-subtitle'>π Completion Metrics</div>"
|
| 79 |
+
html += f"<div class='metric-row'><span class='metric-label'>Overall Completion Rate:</span> <span class='metric-value'>{completion_rate:.1f}%</span></div>"
|
| 80 |
+
html += f"<div class='metric-row'><span class='metric-label'>Sections Completed:</span> <span class='metric-value'>{completed_sections}/{total_sections}</span></div>"
|
| 81 |
+
html += "</div>"
|
| 82 |
+
|
| 83 |
+
# Evaluation Coverage
|
| 84 |
+
html += "<div class='summary-section'>"
|
| 85 |
+
html += "<div class='summary-subtitle'>π― Evaluation Coverage</div>"
|
| 86 |
+
html += "<div class='coverage-grid'>"
|
| 87 |
+
html += f"<div class='coverage-item {get_coverage_class(has_human_eval)}'>π₯ Human Evaluation</div>"
|
| 88 |
+
html += f"<div class='coverage-item {get_coverage_class(has_quantitative)}'>π Quantitative Analysis</div>"
|
| 89 |
+
html += f"<div class='coverage-item {get_coverage_class(has_documentation)}'>π Documentation</div>"
|
| 90 |
+
html += "</div>"
|
| 91 |
+
html += "</div>"
|
| 92 |
+
|
| 93 |
+
# Status Breakdown
|
| 94 |
+
html += "<div class='summary-section'>"
|
| 95 |
+
html += "<div class='summary-subtitle'>π Status Breakdown</div>"
|
| 96 |
+
html += create_status_pills(category_data)
|
| 97 |
+
html += "</div>"
|
| 98 |
+
|
| 99 |
+
html += "</div>"
|
| 100 |
+
return html
|
| 101 |
+
|
| 102 |
+
def get_coverage_class(has_feature):
|
| 103 |
+
"""Return CSS class based on feature presence"""
|
| 104 |
+
return 'covered' if has_feature else 'not-covered'
|
| 105 |
+
|
| 106 |
+
def create_status_pills(category_data):
|
| 107 |
+
"""Create status pill indicators"""
|
| 108 |
+
status_counts = {'Yes': 0, 'No': 0, 'N/A': 0}
|
| 109 |
+
for section in category_data.values():
|
| 110 |
+
status_counts[section['status']] += 1
|
| 111 |
+
|
| 112 |
+
html = "<div class='status-pills'>"
|
| 113 |
+
for status, count in status_counts.items():
|
| 114 |
+
html += f"<div class='status-pill {status.lower()}'>{status}: {count}</div>"
|
| 115 |
+
html += "</div>"
|
| 116 |
+
return html
|
| 117 |
+
|
| 118 |
def get_modality_icon(modality):
|
| 119 |
"""Return an emoji icon for each modality type."""
|
| 120 |
icons = {
|
|
|
|
| 298 |
if category_name in models[model]['scores']:
|
| 299 |
category_data = models[model]['scores'][category_name]
|
| 300 |
card_content = f"<div class='card'><div class='card-title'>{category_name}</div>"
|
| 301 |
+
|
| 302 |
+
# Add summary section at the top of each card
|
| 303 |
+
card_content += create_category_summary(category_data)
|
| 304 |
|
| 305 |
# Sort sections within each category
|
| 306 |
sorted_sections = sorted(category_data.items(),
|
|
|
|
| 676 |
border-color: #2c3e50;
|
| 677 |
color: #99ccff;
|
| 678 |
}
|
| 679 |
+
|
| 680 |
+
.summary-card {
|
| 681 |
+
background-color: #f8f9fa;
|
| 682 |
+
border: 1px solid #e0e0e0;
|
| 683 |
+
border-radius: 8px;
|
| 684 |
+
padding: 16px;
|
| 685 |
+
margin-bottom: 20px;
|
| 686 |
+
}
|
| 687 |
+
|
| 688 |
+
.summary-title {
|
| 689 |
+
font-size: 1.2em;
|
| 690 |
+
font-weight: bold;
|
| 691 |
+
margin-bottom: 12px;
|
| 692 |
+
color: #333;
|
| 693 |
+
}
|
| 694 |
+
|
| 695 |
+
.summary-section {
|
| 696 |
+
margin-bottom: 16px;
|
| 697 |
+
}
|
| 698 |
+
|
| 699 |
+
.summary-subtitle {
|
| 700 |
+
font-size: 1em;
|
| 701 |
+
font-weight: 600;
|
| 702 |
+
color: #555;
|
| 703 |
+
margin-bottom: 8px;
|
| 704 |
+
}
|
| 705 |
+
|
| 706 |
+
.metric-row {
|
| 707 |
+
display: flex;
|
| 708 |
+
justify-content: space-between;
|
| 709 |
+
align-items: center;
|
| 710 |
+
margin-bottom: 4px;
|
| 711 |
+
}
|
| 712 |
+
|
| 713 |
+
.metric-label {
|
| 714 |
+
color: #666;
|
| 715 |
+
}
|
| 716 |
+
|
| 717 |
+
.metric-value {
|
| 718 |
+
font-weight: 600;
|
| 719 |
+
color: #333;
|
| 720 |
+
}
|
| 721 |
+
|
| 722 |
+
.coverage-grid {
|
| 723 |
+
display: grid;
|
| 724 |
+
grid-template-columns: repeat(auto-fit, minmax(150px, 1fr));
|
| 725 |
+
gap: 8px;
|
| 726 |
+
margin-top: 8px;
|
| 727 |
+
}
|
| 728 |
+
|
| 729 |
+
.coverage-item {
|
| 730 |
+
padding: 8px;
|
| 731 |
+
border-radius: 6px;
|
| 732 |
+
text-align: center;
|
| 733 |
+
font-size: 0.9em;
|
| 734 |
+
}
|
| 735 |
+
|
| 736 |
+
.coverage-item.covered {
|
| 737 |
+
background-color: #e6ffe6;
|
| 738 |
+
color: #006600;
|
| 739 |
+
border: 1px solid #b3ffb3;
|
| 740 |
+
}
|
| 741 |
+
|
| 742 |
+
.coverage-item.not-covered {
|
| 743 |
+
background-color: #f5f5f5;
|
| 744 |
+
color: #666;
|
| 745 |
+
border: 1px solid #ddd;
|
| 746 |
+
}
|
| 747 |
+
|
| 748 |
+
.status-pills {
|
| 749 |
+
display: flex;
|
| 750 |
+
gap: 8px;
|
| 751 |
+
flex-wrap: wrap;
|
| 752 |
+
}
|
| 753 |
+
|
| 754 |
+
.status-pill {
|
| 755 |
+
padding: 4px 12px;
|
| 756 |
+
border-radius: 16px;
|
| 757 |
+
font-size: 0.9em;
|
| 758 |
+
font-weight: 500;
|
| 759 |
+
}
|
| 760 |
+
|
| 761 |
+
.status-pill.yes {
|
| 762 |
+
background-color: #e6ffe6;
|
| 763 |
+
color: #006600;
|
| 764 |
+
border: 1px solid #b3ffb3;
|
| 765 |
+
}
|
| 766 |
+
|
| 767 |
+
.status-pill.no {
|
| 768 |
+
background-color: #ffe6e6;
|
| 769 |
+
color: #990000;
|
| 770 |
+
border: 1px solid #ffb3b3;
|
| 771 |
+
}
|
| 772 |
+
|
| 773 |
+
.status-pill.n\\/a {
|
| 774 |
+
background-color: #f5f5f5;
|
| 775 |
+
color: #666;
|
| 776 |
+
border: 1px solid #ddd;
|
| 777 |
+
}
|
| 778 |
+
|
| 779 |
+
.dark .summary-card {
|
| 780 |
+
background-color: #2a2a2a;
|
| 781 |
+
border-color: #444;
|
| 782 |
+
}
|
| 783 |
+
|
| 784 |
+
.dark .summary-title,
|
| 785 |
+
.dark .summary-subtitle {
|
| 786 |
+
color: #e0e0e0;
|
| 787 |
+
}
|
| 788 |
+
|
| 789 |
+
.dark .metric-label {
|
| 790 |
+
color: #999;
|
| 791 |
+
}
|
| 792 |
+
|
| 793 |
+
.dark .metric-value {
|
| 794 |
+
color: #fff;
|
| 795 |
+
}
|
| 796 |
+
|
| 797 |
+
.dark .coverage-item.covered {
|
| 798 |
+
background-color: #1a3a1a;
|
| 799 |
+
color: #90EE90;
|
| 800 |
+
border-color: #2d5a2d;
|
| 801 |
+
}
|
| 802 |
+
|
| 803 |
+
.dark .coverage-item.not-covered {
|
| 804 |
+
background-color: #333;
|
| 805 |
+
color: #999;
|
| 806 |
+
border-color: #444;
|
| 807 |
+
}
|
| 808 |
+
|
| 809 |
+
.dark .status-pill.yes {
|
| 810 |
+
background-color: #1a3a1a;
|
| 811 |
+
color: #90EE90;
|
| 812 |
+
border-color: #2d5a2d;
|
| 813 |
+
}
|
| 814 |
+
|
| 815 |
+
.dark .status-pill.no {
|
| 816 |
+
background-color: #3a1a1a;
|
| 817 |
+
color: #FFB6B6;
|
| 818 |
+
border-color: #5a2d2d;
|
| 819 |
+
}
|
| 820 |
+
|
| 821 |
+
.dark .status-pill.n\\/a {
|
| 822 |
+
background-color: #333;
|
| 823 |
+
color: #999;
|
| 824 |
+
border-color: #444;
|
| 825 |
+
}
|
| 826 |
"""
|
| 827 |
|
| 828 |
first_model = next(iter(models.values()))
|