[ { "model": "Qwen3-8B", "mode": "sampling (Temp=0.6)", "open-source": true, "total accuracy": 12.37, "increased complexity": 15.08, "uncommon elements": 10.58, "unsolvable puzzle": 69.54, "temperature": 0.6, "n_sampling": 4, "n": 50 }, { "model": "Qwen3-30B-A3B-Thinking-2507", "mode": "sampling (Temp=0.6)", "open-source": true, "total accuracy": 37.33, "increased complexity": "", "uncommon elements": "" , "unsolvable puzzle": 86.09, "temperature": 0.6, "n_sampling": 4, "n": 50 }, { "model": "Qwen3-32B", "mode": "sampling (Temp=0.6)", "open-source": true, "total accuracy": 20.97, "increased complexity": 25.38 , "uncommon elements": 16.93 , "unsolvable puzzle": 65.48, "temperature": 0.6, "n_sampling": 4, "n": 50 }, { "model": "Qwen3-Next-80B-A3B-Thinking", "mode": "sampling (Temp=0.6)", "open-source": true, "total accuracy": 36.35, "increased complexity": 41.97, "uncommon elements": 32.13 , "unsolvable puzzle": 83.11, "temperature": 0.6, "n_sampling": 4, "n": 50 }, { "model": "Qwen3-235B-A22B-Thinking-2507", "mode": "sampling (Temp=0.6)", "open-source": true, "total accuracy": 43.33, "increased complexity": 46.93, "uncommon elements": 40.94 , "unsolvable puzzle": 84.41, "temperature": 0.6, "n_sampling": 4, "n": 50 }, { "model": "MiniMax-M1-40k", "mode": "sampling (Temp=0.6)", "open-source": true, "total accuracy": 6.44, "increased complexity": 5.27, "uncommon elements": 6.88 , "unsolvable puzzle": 51.39, "temperature": 0.6, "n_sampling": 4, "n": 50 }, { "model": "DeepSeek-R1-0528-Qwen3-8B", "mode": "sampling (Temp=0.6)", "open-source": true, "total accuracy": 13.83, "increased complexity": "", "uncommon elements": "" , "unsolvable puzzle": 95.19, "temperature": 0.6, "n_sampling": 4, "n": 50 }, { "model": "DeepSeek-V3.1", "mode": "sampling (Temp=0.6)", "open-source": true, "total accuracy": 41.43, "increased complexity": 44.61, "uncommon elements": 39.09 , "unsolvable puzzle": 88.76, "temperature": 0.6, "n_sampling": 4, "n": 50 }, { "model": "DeepSeek-R1-0528", "mode": "sampling (Temp=0.6)", "open-source": true, "total accuracy": 41.37, "increased complexity": 45.87, "uncommon elements": 37.28 , "unsolvable puzzle": 93.50, "temperature": 0.6, "n_sampling": 4, "n": 50 }, { "model": "GLM-4.5", "mode": "sampling (Temp=0.6)", "open-source": true, "total accuracy": 21.67, "increased complexity": 24.17, "uncommon elements": 21.49, "unsolvable puzzle": 93.26, "temperature": 0.6, "n_sampling": 4, "n": 50 }, { "model": "Kimi-K2-Instruct", "mode": "sampling (Temp=0.6)", "open-source": true, "total accuracy": 15.18, "increased complexity": 17.33, "uncommon elements": 14.71, "unsolvable puzzle": 87.46, "temperature": 0.6, "n_sampling": 4, "n": 50 }, { "model": "Seed-OSS-36B-Instruct", "mode": "sampling (Temp=0.6)", "open-source": true, "total accuracy": 38.96, "increased complexity": 41.01, "uncommon elements": 38.79 , "unsolvable puzzle": 85.76, "temperature": 0.6, "n_sampling": 4, "n": 50 }, { "model": "gpt-oss-120b", "mode": "sampling (Temp=0.6)", "open-source": true, "total accuracy": 51.97, "increased complexity": 54.08, "uncommon elements": 51.11, "unsolvable puzzle": 93.35, "temperature": 0.6, "n_sampling": 4, "n": 50 }, { "model": "gpt-5", "mode": "sampling (Temp=0.6)", "open-source": false, "total accuracy": 69.10, "increased complexity": 69.89, "uncommon elements": 67.88, "unsolvable puzzle": 97.78, "temperature": 0.6, "n_sampling": 4, "n": 5 }, { "model": "gpt-5-mini", "mode": "sampling (Temp=0.6)", "open-source": false, "total accuracy": 54.49, "increased complexity": 55.76, "uncommon elements": 52.13 , "unsolvable puzzle": 98.52, "temperature": 0.6, "n_sampling": 4, "n": 5 }, { "model": "o4-mini", "mode": "sampling (Temp=0.6)", "open-source": false, "total accuracy": 50.13, "increased complexity": 55.11, "uncommon elements": 47.13 , "unsolvable puzzle": 95.00, "temperature": 0.6, "n_sampling": 4, "n": 5 }, { "model": "grok-4", "mode": "sampling (Temp=0.6)", "open-source": false, "total accuracy": 59.55, "increased complexity": 58.26 , "uncommon elements": 59.62 , "unsolvable puzzle": 97.59, "temperature": 0.6, "n_sampling": 4, "n": 5 }, { "model": "gemini-2.5-pro", "mode": "sampling (Temp=0.6)", "open-source": false, "total accuracy": 40.58, "increased complexity": 43.80, "uncommon elements": 39.38 , "unsolvable puzzle": 91.48, "temperature": 0.6, "n_sampling": 4, "n": 5 }, { "model": "grok-3-mini", "mode": "sampling (Temp=0.6)", "open-source": false, "total accuracy": 42.56, "increased complexity": 48.48, "uncommon elements": 39.5, "unsolvable puzzle": 94.63, "temperature": 0.6, "n_sampling": 4, "n": 5 }, { "model": "claude-sonnet-4-thinking", "mode": "sampling (Temp=0.6)", "open-source": false, "total accuracy": 30.51, "increased complexity": 34.67, "uncommon elements": 28.25 , "unsolvable puzzle": 57.96, "temperature": 0.6, "n_sampling": 4, "n": 5 }, { "model": "gemini-2.5-flash", "mode": "sampling (Temp=0.6)", "open-source": false, "total accuracy": 19.49, "increased complexity": 25.11, "uncommon elements": 16.00, "unsolvable puzzle": 57.78, "temperature": 0.6, "n_sampling": 4, "n": 5 } ]