JunsWan commited on
Commit
291845b
·
verified ·
1 Parent(s): 63eb23b

Delete hardcorelogic.puzzle.json

Browse files
Files changed (1) hide show
  1. hardcorelogic.puzzle.json +0 -401
hardcorelogic.puzzle.json DELETED
@@ -1,401 +0,0 @@
1
- [
2
- {
3
- "model": "Qwen3-8B",
4
- "mode": "sampling (Temp=0.6)",
5
- "open-source": true,
6
- "total accuracy": 12.37,
7
- "Zebra": 26.92,
8
- "Sudoku": 2.33,
9
- "Skyscraper": 0.29,
10
- "Kakurasu": 36.00,
11
- "Crypto": 14.93,
12
- "Minesweeper": 0.25,
13
- "Navigation": 67.88,
14
- "Binario": 7.83,
15
- "Hanoi": 9.0,
16
- "Hitori": 16.83,
17
- "temperature": 0.6,
18
- "n_sampling": 4,
19
- "n": 50
20
- },
21
- {
22
- "model": "Qwen3-30B-A3B-Thinking-2507",
23
- "mode": "sampling (Temp=0.6)",
24
- "open-source": true,
25
- "total accuracy": 37.10,
26
- "Zebra": 64.25,
27
- "Sudoku": 24.72,
28
- "Skyscraper": 1.25,
29
- "Kakurasu": 87.88,
30
- "Crypto": 55.86,
31
- "Minesweeper": 38.13,
32
- "Navigation": 93.75,
33
- "Binario": 19.17,
34
- "Hanoi": 28.38,
35
- "Hitori": 40.00,
36
- "temperature": 0.6,
37
- "n_sampling": 4,
38
- "n": 50
39
- },
40
- {
41
- "model": "Qwen3-32B",
42
- "mode": "sampling (Temp=0.6)",
43
- "open-source": true,
44
- "total accuracy": 20.97,
45
- "Zebra": 44.00,
46
- "Sudoku": 6.89,
47
- "Skyscraper": 1.63,
48
- "Kakurasu": "51.13",
49
- "Crypto": "43.57",
50
- "Minesweeper": "0.54",
51
- "Navigation": "83.88",
52
- "Binario": "13.83",
53
- "Hanoi": "18.96",
54
- "Hitori": "21.42",
55
- "temperature": 0.6,
56
- "n_sampling": 4,
57
- "n": 50
58
- },
59
- {
60
- "model": "Qwen3-Next-80B-A3B-Thinking",
61
- "mode": "sampling (Temp=0.6)",
62
- "open-source": true,
63
- "total accuracy": 36.30,
64
- "Zebra": "71.0",
65
- "Sudoku": "21.78",
66
- "Skyscraper": "4.25",
67
- "Kakurasu": "83.13",
68
- "Crypto": "57.71",
69
- "Minesweeper": "27.38",
70
- "Navigation": "95.0",
71
- "Binario": "27.67",
72
- "Hanoi": "27.38",
73
- "Hitori": "36.5",
74
- "temperature": 0.6,
75
- "n_sampling": 4,
76
- "n": 50
77
- },
78
- {
79
- "model": "Qwen3-235B-A22B-Thinking-2507",
80
- "mode": "sampling (Temp=0.6)",
81
- "open-source": true,
82
- "total accuracy": 43.11,
83
- "Zebra": "61.0",
84
- "Sudoku": "28.94",
85
- "Skyscraper": "4.21",
86
- "Kakurasu": "89.5",
87
- "Crypto": "75.79",
88
- "Minesweeper": "38.75",
89
- "Navigation": "97.25",
90
- "Binario": "35.0",
91
- "Hanoi": "40.83",
92
- "Hitori": "40.5",
93
- "temperature": 0.6,
94
- "n_sampling": 4,
95
- "n": 50
96
- },
97
- {
98
- "model": "MiniMax-M1-40k",
99
- "mode": "sampling (Temp=0.6)",
100
- "open-source": true,
101
- "total accuracy": 6.44,
102
- "Zebra": "16.08",
103
- "Sudoku": "0.0",
104
- "Skyscraper": "0.13",
105
- "Kakurasu": "25.5",
106
- "Crypto": "1.5",
107
- "Minesweeper": "0.17",
108
- "Navigation": "9.5",
109
- "Binario": "4.58",
110
- "Hanoi": "13.75",
111
- "Hitori": "9.92",
112
- "temperature": 0.6,
113
- "n_sampling": 4,
114
- "n": 50
115
- },
116
- {
117
- "model": "DeepSeek-R1-0528-Qwen3-8B",
118
- "mode": "sampling (Temp=0.6)",
119
- "open-source": true,
120
- "total accuracy": 13.83,
121
- "Zebra": "39.33",
122
- "Sudoku": "0.28",
123
- "Skyscraper": "0.04",
124
- "Kakurasu": "39.38",
125
- "Crypto": "12.00",
126
- "Minesweeper": "1.75",
127
- "Navigation": "69.88",
128
- "Binario": "6.25",
129
- "Hanoi": "17.71",
130
- "Hitori": "8.00",
131
- "temperature": 0.6,
132
- "n_sampling": 4,
133
- "n": 50
134
- },
135
- {
136
- "model": "DeepSeek-V3.1",
137
- "mode": "sampling (Temp=0.6)",
138
- "open-source": true,
139
- "total accuracy": 41.08,
140
- "Zebra": "62.67",
141
- "Sudoku": "18.61",
142
- "Skyscraper": "1.38",
143
- "Kakurasu": "92.0",
144
- "Crypto": "75.64",
145
- "Minesweeper": "33.46",
146
- "Navigation": "92.75",
147
- "Binario": "23.42",
148
- "Hanoi": "46.63",
149
- "Hitori": "45.75",
150
- "temperature": 0.6,
151
- "n_sampling": 4,
152
- "n": 50
153
- },
154
- {
155
- "model": "DeepSeek-R1-0528",
156
- "mode": "sampling (Temp=0.6)",
157
- "open-source": true,
158
- "total accuracy": 41.10,
159
- "Zebra": "59.08",
160
- "Sudoku": "19.39",
161
- "Skyscraper": "1.25",
162
- "Kakurasu": "89.75",
163
- "Crypto": "80.93",
164
- "Minesweeper": "36.63",
165
- "Navigation": "97.0",
166
- "Binario": "35.83",
167
- "Hanoi": "43.58",
168
- "Hitori": "28.42",
169
- "temperature": 0.6,
170
- "n_sampling": 4,
171
- "n": 50
172
- },
173
- {
174
- "model": "GLM-4.5",
175
- "mode": "sampling (Temp=0.6)",
176
- "open-source": true,
177
- "total accuracy": 21.65,
178
- "Zebra": "29.58",
179
- "Sudoku": "4.56",
180
- "Skyscraper": "1.92",
181
- "Kakurasu": "44.25",
182
- "Crypto": "24.14",
183
- "Minesweeper": "9.33",
184
- "Navigation": "93.63",
185
- "Binario": "16.92",
186
- "Hanoi": "31.17",
187
- "Hitori": "23.25",
188
- "temperature": 0.6,
189
- "n_sampling": 4,
190
- "n": 50
191
- },
192
- {
193
- "model": "Kimi-K2-Instruct",
194
- "mode": "sampling (Temp=0.6)",
195
- "open-source": true,
196
- "total accuracy": 15.18,
197
- "Zebra": "19.42",
198
- "Sudoku": "1.89",
199
- "Skyscraper": "0.08",
200
- "Kakurasu": "50.75",
201
- "Crypto": "20.21",
202
- "Minesweeper": "7.00",
203
- "Navigation": "63.13",
204
- "Binario": "7.58",
205
- "Hanoi": "21.08",
206
- "Hitori": "11.67",
207
- "temperature": 0.6,
208
- "n_sampling": 4,
209
- "n": 50
210
- },
211
- {
212
- "model": "Seed-OSS-36B-Instruct",
213
- "mode": "sampling (Temp=0.6)",
214
- "open-source": true,
215
- "total accuracy": 38.87,
216
- "Zebra": "53.0",
217
- "Sudoku": "24.17",
218
- "Skyscraper": "4.71",
219
- "Kakurasu": "91.38",
220
- "Crypto": "52.43",
221
- "Minesweeper": "24.67",
222
- "Navigation": "96.5",
223
- "Binario": "31.67",
224
- "Hanoi": "45.17",
225
- "Hitori": "48.92",
226
- "temperature": 0.6,
227
- "n_sampling": 4,
228
- "n": 50
229
- },
230
- {
231
- "model": "gpt-oss-120b",
232
- "mode": "sampling (Temp=0.6)",
233
- "open-source": true,
234
- "total accuracy": 51.33,
235
- "Zebra": "56.67",
236
- "Sudoku": "58.22",
237
- "Skyscraper": "9.04",
238
- "Kakurasu": "88.5",
239
- "Crypto": "79.71",
240
- "Minesweeper": "56.67",
241
- "Navigation": "95.88",
242
- "Binario": "42.67",
243
- "Hanoi": "36.13",
244
- "Hitori": "61.08",
245
- "temperature": 0.6,
246
- "n_sampling": 4,
247
- "n": 50
248
- },
249
- {
250
- "model": "gpt-5",
251
- "mode": "sampling (Temp=0.6)",
252
- "open-source": false,
253
- "total accuracy": 67.37,
254
- "Zebra": "76.67",
255
- "Sudoku": "60.56",
256
- "Skyscraper": "22.92",
257
- "Kakurasu": "100.0",
258
- "Crypto": "77.86",
259
- "Minesweeper": "77.5",
260
- "Navigation": "98.75",
261
- "Binario": "85.0",
262
- "Hanoi": "65.83",
263
- "Hitori": "67.5",
264
- "temperature": 0.6,
265
- "n_sampling": 4,
266
- "n": 5
267
- },
268
- {
269
- "model": "gpt-5-mini",
270
- "mode": "sampling (Temp=0.6)",
271
- "open-source": false,
272
- "total accuracy": 53.40,
273
- "Zebra": "67.5",
274
- "Sudoku": "49.44",
275
- "Skyscraper": "15.0",
276
- "Kakurasu": "90.0",
277
- "Crypto": "92.86",
278
- "Minesweeper": "45.42",
279
- "Navigation": "100.0",
280
- "Binario": "47.5",
281
- "Hanoi": "47.92",
282
- "Hitori": "53.33",
283
- "temperature": 0.6,
284
- "n_sampling": 4,
285
- "n": 5
286
- },
287
- {
288
- "model": "o4-mini",
289
- "mode": "sampling (Temp=0.6)",
290
- "open-source": false,
291
- "total accuracy": 49.81,
292
- "Zebra": "71.67",
293
- "Sudoku": "48.89",
294
- "Skyscraper": "8.75",
295
- "Kakurasu": "87.5",
296
- "Crypto": "81.43",
297
- "Minesweeper": "46.67",
298
- "Navigation": "98.75",
299
- "Binario": "49.17",
300
- "Hanoi": "36.25",
301
- "Hitori": "50.83",
302
- "temperature": 0.6,
303
- "n_sampling": 4,
304
- "n": 5
305
- },
306
- {
307
- "model": "grok-4",
308
- "mode": "sampling (Temp=0.6)",
309
- "open-source": false,
310
- "total accuracy": 58.27,
311
- "Zebra": "87.5",
312
- "Sudoku": "35.56",
313
- "Skyscraper": "14.17",
314
- "Kakurasu": "98.75",
315
- "Crypto": "83.57",
316
- "Minesweeper": "42.08",
317
- "Navigation": "100.0",
318
- "Binario": "65.0",
319
- "Hanoi": "67.92",
320
- "Hitori": "73.33",
321
- "temperature": 0.6,
322
- "n_sampling": 4,
323
- "n": 5
324
- },
325
- {
326
- "model": "gemini-2.5-pro",
327
- "mode": "sampling (Temp=0.6)",
328
- "open-source": false,
329
- "total accuracy": 40.19,
330
- "Zebra": "47.5",
331
- "Sudoku": "12.22",
332
- "Skyscraper": "10.0",
333
- "Kakurasu": "90.0",
334
- "Crypto": "50.71",
335
- "Minesweeper": "35.0",
336
- "Navigation": "100.0",
337
- "Binario": "42.5",
338
- "Hanoi": "46.67",
339
- "Hitori": "45.0",
340
- "temperature": 0.6,
341
- "n_sampling": 4,
342
- "n": 5
343
- },
344
- {
345
- "model": "grok-3-mini",
346
- "mode": "sampling (Temp=0.6)",
347
- "open-source": false,
348
- "total accuracy": 42.50,
349
- "Zebra": "74.17",
350
- "Sudoku": "10.0",
351
- "Skyscraper": "0.42",
352
- "Kakurasu": "96.25",
353
- "Crypto": "59.29",
354
- "Minesweeper": "36.67",
355
- "Navigation": "97.5",
356
- "Binario": "40.83",
357
- "Hanoi": "45.0",
358
- "Hitori": "60.0",
359
- "temperature": 0.6,
360
- "n_sampling": 4,
361
- "n": 5
362
- },
363
- {
364
- "model": "claude-sonnet-4-thinking",
365
- "mode": "sampling (Temp=0.6)",
366
- "open-source": false,
367
- "total accuracy": 30.51,
368
- "Zebra": "30.83",
369
- "Sudoku": "19.44",
370
- "Skyscraper": "1.67",
371
- "Kakurasu": "88.75",
372
- "Crypto": "54.29",
373
- "Minesweeper": "15.83",
374
- "Navigation": "93.75",
375
- "Binario": "24.17",
376
- "Hanoi": "26.25",
377
- "Hitori": "40.0",
378
- "temperature": 0.6,
379
- "n_sampling": 4,
380
- "n": 5
381
- },
382
- {
383
- "model": "gemini-2.5-flash",
384
- "mode": "sampling (Temp=0.6)",
385
- "open-source": false,
386
- "total accuracy": 19.49,
387
- "Zebra": "20.0",
388
- "Sudoku": "0.56",
389
- "Skyscraper": "2.08",
390
- "Kakurasu": "43.75",
391
- "Crypto": "17.14",
392
- "Minesweeper": "12.92",
393
- "Navigation": "97.5",
394
- "Binario": "29.17",
395
- "Hanoi": "18.33",
396
- "Hitori": "22.5",
397
- "temperature": 0.6,
398
- "n_sampling": 4,
399
- "n": 5
400
- }
401
- ]