Spaces:
				
			
			
	
			
			
					
		Running
		
	
	
	
			
			
	
	
	
	
		
		
					
		Running
		
	| from collections import OrderedDict | |
| BANNER = ''' | |
| <div style=" | |
| display: flex; | |
| justify-content: center; | |
| align-items: center; | |
| height: 120px; | |
| background: #ffffff; | |
| border-bottom: 2px solid #ced4da; | |
| "> | |
| <h1 style=" | |
| font-size: 5em; | |
| font-family: 'Poppins', 'Segoe UI', sans-serif; | |
| background: linear-gradient(90deg, #007bff, #00c6ff, #0096c7, #0077b6); | |
| -webkit-background-clip: text; | |
| color: transparent; | |
| animation: gradientFlow 4s ease-in-out infinite; | |
| background-size: 300%; | |
| letter-spacing: 2px; | |
| "> | |
| HardcoreLogic | |
| </h1> | |
| </div> | |
| <style> | |
| @keyframes gradientFlow { | |
| 0% { background-position: 0% 50%; } | |
| 50% { background-position: 100% 50%; } | |
| 100% { background-position: 0% 50%; } | |
| } | |
| </style> | |
| ''' | |
| CITATION_TEXT = """ | |
| """ | |
| ''' | |
| @article{hardcorelogic2025, | |
| title={HardcoreLogic: Challenging Large Reasoning Models with Long-tail Logic Puzzle Games}, | |
| author={}, | |
| year={2025}, | |
| url={https://arxiv.org/}, | |
| }''' | |
| column_names = OrderedDict({ | |
| "model": "Model", | |
| "open-source": "Open Source", | |
| "total accuracy": "Total Acc", | |
| "unsolvable puzzle": "Unsolvable Puzzle ACC", | |
| }) | |
| column_names_puzzle = OrderedDict({ | |
| "model": "Model", | |
| "total accuracy": "Total Acc", | |
| "Zebra": "Zebra", | |
| "Binario": "Binario", | |
| "Crypto": "Crypto", | |
| "Hanoi": "Hanoi", | |
| "Hitpri": "Hitpri", | |
| "Kakurasu": "Kakurasu", | |
| "Minesweeper": "Minesweeper", | |
| "Navigation": "Navigation", | |
| "Skyscraper": "Skyscraper", | |
| "Sudoku": "Sudoku", | |
| }) | |
| LEADERBOARD_REMARKS = """**WB Reward**: for each comparison (A vs B), a reward for A is **+/-1** if A is **much better/worse** than B, and **+/-0.5** if A is **slightly better/worse** than B; when there is a **Tie**, the reward is **0**. | |
| """ | |
| LEADERBOARD_REMARKS_MAIN = """ | |
| """ | |
| RANKING_COLUMN = "total accuracy" | |
| ORDERED_COLUMN_NAMES = [ | |
| "model", | |
| "mode", | |
| "open-source", | |
| "total accuracy", | |
| "increased complexity", | |
| "uncommon elements", | |
| "unsolvable puzzle", | |
| "temperature", | |
| "n_sampling", | |
| "n" | |
| ] | |
| ORDERED_COLUMN_NAMES_PUZZLE = [ | |
| "model", | |
| "mode", | |
| "open-source", | |
| "total accuracy", | |
| "Zebra", | |
| "Binario", | |
| "Crypto", | |
| "Hanoi", | |
| "Hitpri", | |
| "Kakurasu", | |
| "Minesweeper", | |
| "Navigation", | |
| "Skyscraper", | |
| "Sudoku", | |
| "temperature", | |
| "n_sampling", | |
| "n" | |
| ] | |
| js_light = """ | |
| function refresh() { | |
| const url = new URL(window.location); | |
| if (url.searchParams.get('__theme') !== 'light') { | |
| url.searchParams.set('__theme', 'light'); | |
| window.location.href = url.href; | |
| } | |
| // Find the fieldset with the given id | |
| const fieldset = document.getElementById("rank-column-radio"); | |
| // Create a new span element with the text "Decoding Mode:" | |
| const rankBySpan = document.createElement("span"); | |
| rankBySpan.textContent = "Decoding Mode: "; | |
| rankBySpan.style.fontWeight = "bold"; // Optional: make the text bold | |
| rankBySpan.style.fontSize = "19px"; // Larger font size | |
| rankBySpan.style.paddingRight = "18px"; // Add padding on the right | |
| // Wrap the span and the labels in a flex container | |
| const flexContainer = document.createElement("div"); | |
| flexContainer.style.display = "flex"; | |
| flexContainer.style.alignItems = "center"; | |
| // Insert the rankBySpan at the beginning of the flex container | |
| flexContainer.appendChild(rankBySpan); | |
| // Move all existing labels into the flex container | |
| while (fieldset.firstChild) { | |
| flexContainer.appendChild(fieldset.firstChild); | |
| } | |
| // Append the flex container back to the fieldset | |
| fieldset.appendChild(flexContainer); | |
| } | |
| """ | |
| js_code = """ | |
| function scroll_top() { | |
| console.log("Hello from Gradio!"); | |
| const bubbles = document.querySelectorAll('.bubble-wrap'); | |
| bubbles.forEach((bubble, index) => { | |
| setTimeout(() => { | |
| bubble.scrollTop = 0; | |
| }, index * 100); // Delay of 100ms between each iteration | |
| }); | |
| } | |
| """ | |
| TASK_TYPE_STR = "**Tasks**: Info seeking (**InfoSek**), Creative Writing (**CrtWrt**), Coding&Debugging (**Code**), Reasoning (**Reason**), Editing (**Edit**), **Math**, Planning (**Plan**), Brainstorming (**Brnstrm**), Role playing (**RolPly**), Advice seeking (**AdvSek**), Data Analysis (**DataAna**)" | |
| css = """ | |
| /* ========== 🌟 Global Typography ========== */ | |
| code { | |
| font-size: large; | |
| } | |
| footer {visibility: hidden} | |
| .markdown-text{font-size: 14pt} | |
| .markdown-text-small{font-size: 13pt} | |
| .markdown-text-tiny{font-size: 12pt} | |
| /* ========== 🎓 Fudan Blue Theme Colors ========== */ | |
| :root { | |
| --fudan-blue: #002D72; | |
| --fudan-blue-light: #E6EEF8; | |
| --fudan-gray: #f7f7f7; | |
| --fudan-border: #c8d6e5; | |
| --fudan-highlight: #1E56A0; | |
| } | |
| /* ========== 🏅 Leaderboard Table ========== */ | |
| #leaderboard-table th, | |
| #leaderboard-puzzle-table th { | |
| background-color: var(--fudan-blue); | |
| color: white; | |
| text-align: center; | |
| padding: 10px; | |
| font-size: 15px; | |
| border-bottom: 2px solid var(--fudan-border); | |
| } | |
| #leaderboard-table td, | |
| #leaderboard-puzzle-table td { | |
| text-align: center; | |
| font-size: 14px; | |
| padding: 8px; | |
| background-color: white; | |
| border-bottom: 1px solid var(--fudan-border); | |
| } | |
| /* Hover 行高亮 */ | |
| #leaderboard-table tr:hover, | |
| #leaderboard-puzzle-table tr:hover { | |
| background-color: var(--fudan-blue-light); | |
| transition: background-color 0.2s ease-in-out; | |
| } | |
| /* 表格整体外观 */ | |
| #leaderboard-table, | |
| #leaderboard-puzzle-table { | |
| border-collapse: collapse; | |
| border-radius: 10px; | |
| overflow: hidden; | |
| box-shadow: 0 0 10px rgba(0, 45, 114, 0.15); | |
| } | |
| /* ========== 📊 Tabs ========== */ | |
| .tab-buttons button[role="tab"] { | |
| font-size: 15px; | |
| font-weight: 600; | |
| color: var(--fudan-blue); | |
| border: 1px solid var(--fudan-border); | |
| border-radius: 8px; | |
| background-color: white; | |
| padding: 8px 16px; | |
| margin-right: 5px; | |
| transition: all 0.2s ease-in-out; | |
| } | |
| .tab-buttons button[role="tab"]:hover { | |
| background-color: var(--fudan-blue-light); | |
| } | |
| button.selected[role="tab"][aria-selected="true"] { | |
| background-color: var(--fudan-blue); | |
| color: white; | |
| font-weight: bold; | |
| font-size: 16px; | |
| } | |
| /* ========== 📦 Accordion & Buttons ========== */ | |
| .accordion-label button span{ | |
| font-size: 14pt; | |
| font-weight: bold; | |
| color: var(--fudan-blue); | |
| } | |
| .btn_boderline{ | |
| border: 1px solid var(--fudan-blue); | |
| border-radius: 5px; | |
| padding: 6px 12px; | |
| margin: 5px; | |
| font-size: 14pt; | |
| font-weight: bold; | |
| background-color: var(--fudan-blue-light); | |
| color: var(--fudan-blue); | |
| transition: background-color 0.3s; | |
| } | |
| .btn_boderline:hover{ | |
| background-color: var(--fudan-blue); | |
| color: white; | |
| } | |
| /* ========== 🧩 Box & Card ========== */ | |
| .box_md{ | |
| border: 1px solid var(--fudan-border); | |
| border-radius: 10px; | |
| padding: 10px; | |
| font-size: 12pt; | |
| margin: 8px; | |
| background-color: white; | |
| box-shadow: 0 0 6px rgba(0, 45, 114, 0.1); | |
| } | |
| /* ========== 💬 Markdown Text Enhancements ========== */ | |
| .markdown-text-details{ | |
| margin: 10px; | |
| padding: 10px; | |
| background-color: var(--fudan-gray); | |
| border-left: 4px solid var(--fudan-blue); | |
| border-radius: 6px; | |
| } | |
| /* ========== 📈 Plot & Visualization ========== */ | |
| .plotly-plot{ | |
| height: auto; | |
| max-height: 600px; | |
| min-height: 600px; | |
| border: 1px solid var(--fudan-border); | |
| border-radius: 10px; | |
| } | |
| /* ========== 🧷 Misc Components ========== */ | |
| .sample_button{ | |
| border: 2px solid var(--fudan-blue); | |
| border-radius: 10px; | |
| padding: 10px; | |
| font-size: 17pt; | |
| font-weight: bold; | |
| margin: 5px; | |
| background-color: var(--fudan-blue-light); | |
| color: var(--fudan-blue); | |
| transition: all 0.3s ease-in-out; | |
| } | |
| .sample_button:hover { | |
| background-color: var(--fudan-blue); | |
| color: white; | |
| } | |
| /* Scrollable Table Containers */ | |
| #leaderboard-table, | |
| #leaderboard-puzzle-table { | |
| display: block; | |
| max-height: 800px; | |
| overflow-y: auto; | |
| } | |
| """ | |
