CodeGoat24 commited on
Commit
4a37102
·
verified ·
1 Parent(s): 2f58c14

Update leaderboard_data.json

Browse files
Files changed (1) hide show
  1. leaderboard_data.json +33 -1613
leaderboard_data.json CHANGED
@@ -1,1222 +1,5 @@
1
  {
2
  "leaderboard": [
3
- {
4
- "model": "Runway-Gen4-Image",
5
- "link": "https://docs.dev.runwayml.com/api/#tag/Start-generating/paths/~1v1~1text_to_image/post",
6
- "hf": "-",
7
- "open_source": false,
8
- "release_date": "2025-07",
9
-
10
- "Overall": 69.75,
11
-
12
- "Style": 93.44,
13
-
14
- "World Knowledge": 90.36,
15
-
16
- "Attribute-Overall": 74.03,
17
- "Quantity": 72.86,
18
- "Expression": 51.97,
19
- "Material": 89.42,
20
- "Size": 68.06,
21
- "Shape": 65.62,
22
- "Color": 95.00,
23
-
24
- "Action-Overall": 70.21,
25
- "Hand": 62.18,
26
- "Full body": 79.35,
27
- "Animal": 82.35,
28
- "Non Contact": 66.15,
29
- "Contact": 60.37,
30
- "State": 71.70,
31
-
32
- "Relationship-Overall": 72.56,
33
- "Composition": 74.32,
34
- "Similarity": 62.22,
35
- "Inclusion": 77.84,
36
- "Comparison": 75.78,
37
-
38
- "Compound-Overall": 67.76,
39
- "Imagination": 71.65,
40
- "Feature matching": 63.71,
41
-
42
- "Grammar-Overall": 70.08,
43
- "Pronoun Reference": 71.21,
44
- "Consistency": 67.59,
45
- "Negation": 71.03,
46
-
47
- "Layout-Overall": 76.33,
48
- "2D": 77.61,
49
- "3D": 75.00,
50
-
51
- "Logical Reasoning": 49.31,
52
-
53
- "Text": 33.43
54
- },
55
- {
56
- "model": "Nano Banana",
57
- "link": "https://ainanobanana.io/",
58
- "hf": "-",
59
- "open_source": false,
60
- "release_date": "2025-08",
61
-
62
- "Overall": 87.45,
63
-
64
- "Style": 98.87,
65
-
66
- "World Knowledge": 96.32,
67
-
68
- "Attribute-Overall": 87.84,
69
- "Quantity": 85.00,
70
- "Expression": 83.33,
71
- "Material": 88.50,
72
- "Size": 95.74,
73
- "Shape": 78.21,
74
- "Color": 99.17,
75
-
76
- "Action-Overall": 86.83,
77
- "Hand": 82.05,
78
- "Full body": 93.41,
79
- "Animal": 86.03,
80
- "Non Contact": 82.47,
81
- "Contact": 83.33,
82
- "State": 91.98,
83
-
84
- "Relationship-Overall": 92.00,
85
- "Composition": 94.76,
86
- "Similarity": 86.52,
87
- "Inclusion": 91.26,
88
- "Comparison": 94.53,
89
-
90
- "Compound-Overall": 87.83,
91
- "Imagination": 89.66,
92
- "Feature matching": 86.02,
93
-
94
- "Grammar-Overall": 83.36,
95
- "Pronoun Reference": 90.71,
96
- "Consistency": 82.08,
97
- "Negation": 76.59,
98
-
99
- "Layout-Overall": 91.96,
100
- "2D": 92.65,
101
- "3D": 91.25,
102
-
103
- "Logical Reasoning": 74.26,
104
-
105
- "Text": 75.22
106
- },
107
- {
108
- "model": "Stability-AI-stable-image-ultra",
109
- "link": "https://platform.stability.ai/docs/api-reference#tag/Generate/paths/~1v2beta~1stable-image~1generate~1ultra/post",
110
- "hf": "-",
111
- "open_source": false,
112
- "release_date": "2025-04",
113
-
114
- "Overall": 61.96,
115
-
116
- "Style": 87.20,
117
-
118
- "World Knowledge": 87.18,
119
-
120
- "Attribute-Overall": 66.35,
121
- "Quantity": 67.36,
122
- "Expression": 48.08,
123
- "Material": 64.15,
124
- "Size": 69.44,
125
- "Shape": 64.38,
126
- "Color": 91.67,
127
-
128
- "Action-Overall": 59.22,
129
- "Hand": 55.77,
130
- "Full body": 58.15,
131
- "Animal": 63.24,
132
- "Non Contact": 61.22,
133
- "Contact": 51.79,
134
- "State": 64.15,
135
-
136
- "Relationship-Overall": 69.04,
137
- "Composition": 72.64,
138
- "Similarity": 66.67,
139
- "Inclusion": 70.11,
140
- "Comparison": 62.50,
141
-
142
- "Compound-Overall": 54.25,
143
- "Imagination": 60.97,
144
- "Feature matching": 47.40,
145
-
146
- "Grammar-Overall": 61.10,
147
- "Pronoun Reference": 78.68,
148
- "Consistency": 58.33,
149
- "Negation": 45.00,
150
-
151
- "Layout-Overall": 64.55,
152
- "2D": 67.28,
153
- "3D": 61.74,
154
-
155
- "Logical Reasoning": 31.59,
156
-
157
- "Text": 39.08
158
- },
159
- {
160
- "model": "HiDream_v2L",
161
- "link": "https://hidreamai.com/doc/txt2img/request",
162
- "hf": "-",
163
- "open_source": false,
164
- "release_date": "2025-07",
165
-
166
- "Overall": 61.64,
167
-
168
- "Style": 87.99,
169
-
170
- "World Knowledge": 89.62,
171
-
172
- "Attribute-Overall": 64.38,
173
- "Quantity": 65.71,
174
- "Expression": 44.87,
175
- "Material": 57.82,
176
- "Size": 74.26,
177
- "Shape": 59.87,
178
- "Color": 94.92,
179
-
180
- "Action-Overall": 59.50,
181
- "Hand": 51.28,
182
- "Full body": 58.56,
183
- "Animal": 67.65,
184
- "Non Contact": 61.98,
185
- "Contact": 51.52,
186
- "State": 65.09,
187
-
188
- "Relationship-Overall": 66.62,
189
- "Composition": 71.23,
190
- "Similarity": 64.20,
191
- "Inclusion": 65.93,
192
- "Comparison": 60.32,
193
-
194
- "Compound-Overall": 49.28,
195
- "Imagination": 53.75,
196
- "Feature matching": 44.76,
197
-
198
- "Grammar-Overall": 58.86,
199
- "Pronoun Reference": 72.35,
200
- "Consistency": 60.00,
201
- "Negation": 44.23,
202
-
203
- "Layout-Overall": 69.06,
204
- "2D": 70.41,
205
- "3D": 67.68,
206
-
207
- "Logical Reasoning": 26.73,
208
-
209
- "Text": 44.31
210
- },
211
- {
212
- "model": "Imagen-4.0-Fast-preview-06-06",
213
- "link": "https://deepmind.google/models/imagen/",
214
- "hf": "-",
215
- "open_source": false,
216
- "release_date": "2025-06",
217
-
218
- "Overall": 77.75,
219
-
220
- "Style": 92.00,
221
-
222
- "World Knowledge": 94.78,
223
-
224
- "Attribute-Overall": 83.65,
225
- "Quantity": 77.08,
226
- "Expression": 75.00,
227
- "Material": 85.85,
228
- "Size": 89.58,
229
- "Shape": 78.75,
230
- "Color": 98.33,
231
-
232
- "Action-Overall": 79.85,
233
- "Hand": 73.72,
234
- "Full body": 84.24,
235
- "Animal": 81.62,
236
- "Non Contact": 76.53,
237
- "Contact": 76.79,
238
- "State": 84.91,
239
-
240
- "Relationship-Overall": 82.36,
241
- "Composition": 83.45,
242
- "Similarity": 73.89,
243
- "Inclusion": 89.13,
244
- "Comparison": 82.03,
245
-
246
- "Compound-Overall": 74.10,
247
- "Imagination": 80.10,
248
- "Feature matching": 67.97,
249
-
250
- "Grammar-Overall": 76.74,
251
- "Pronoun Reference": 86.03,
252
- "Consistency": 75.00,
253
- "Negation": 68.46,
254
-
255
- "Layout-Overall": 86.19,
256
- "2D": 88.24,
257
- "3D": 84.09,
258
-
259
- "Logical Reasoning": 56.36,
260
-
261
- "Text": 51.44
262
- },
263
- {
264
- "model": "Recraft",
265
- "link": "https://www.recraft.ai/docs#generate-image",
266
- "hf": "-",
267
- "open_source": false,
268
- "release_date": "2024-12",
269
-
270
- "Overall": 62.63,
271
- "Style": 87.20,
272
- "World Knowledge": 90.19,
273
-
274
- "Attribute-Overall": 68.16,
275
- "Quantity": 68.06,
276
- "Expression": 56.41,
277
- "Material": 70.75,
278
- "Size": 65.97,
279
- "Shape": 57.50,
280
- "Color": 95.83,
281
-
282
- "Action-Overall": 60.55,
283
- "Hand": 50.00,
284
- "Full body": 70.65,
285
- "Animal": 76.47,
286
- "Non Contact": 55.61,
287
- "Contact": 48.81,
288
- "State": 63.21,
289
-
290
- "Relationship-Overall": 62.56,
291
- "Composition": 64.53,
292
- "Similarity": 59.44,
293
- "Inclusion": 59.24,
294
- "Comparison": 67.19,
295
-
296
- "Compound-Overall": 44.85,
297
- "Imagination": 43.37,
298
- "Feature matching": 46.35,
299
-
300
- "Grammar-Overall": 63.64,
301
- "Pronoun Reference": 73.16,
302
- "Consistency": 58.33,
303
- "Negation": 58.08,
304
-
305
- "Layout-Overall": 57.84,
306
- "2D": 58.82,
307
- "3D": 56.82,
308
-
309
- "Logical Reasoning": 29.55,
310
-
311
- "Text": 61.78
312
- },
313
- {
314
- "model": "FLUX-kontext-max",
315
- "link": "https://bfl.ai/models/flux-kontext",
316
- "hf": "-",
317
- "open_source": false,
318
- "release_date": "2025-06",
319
-
320
- "Overall": 80.00,
321
- "Style": 96.59,
322
- "World Knowledge": 94.19,
323
-
324
- "Attribute-Overall": 80.93,
325
- "Quantity": 75.69,
326
- "Expression": 74.32,
327
- "Material": 82.55,
328
- "Size": 86.81,
329
- "Shape": 74.38,
330
- "Color": 94.17,
331
-
332
- "Action-Overall": 77.38,
333
- "Hand": 67.95,
334
- "Full body": 83.15,
335
- "Animal": 77.94,
336
- "Non Contact": 77.04,
337
- "Contact": 70.83,
338
- "State": 84.43,
339
-
340
- "Relationship-Overall": 85.08,
341
- "Composition": 87.50,
342
- "Similarity": 78.89,
343
- "Inclusion": 90.00,
344
- "Comparison": 81.25,
345
-
346
- "Compound-Overall": 78.99,
347
- "Imagination": 83.93,
348
- "Feature matching": 73.96,
349
-
350
- "Grammar-Overall": 78.53,
351
- "Pronoun Reference": 84.23,
352
- "Consistency": 78.70,
353
- "Negation": 72.69,
354
-
355
- "Layout-Overall": 85.04,
356
- "2D": 86.74,
357
- "3D": 88.33,
358
-
359
- "Logical Reasoning": 61.36,
360
-
361
- "Text": 61.92
362
- },
363
- {
364
- "model": "FLUX-kontext-pro",
365
- "link": "https://bfl.ai/models/flux-kontext",
366
- "hf": "-",
367
- "open_source": false,
368
- "release_date": "2025-06",
369
-
370
- "Overall": 75.84,
371
- "Style": 94.78,
372
- "World Knowledge": 91.61,
373
-
374
- "Attribute-Overall": 79.20,
375
- "Quantity": 75.00,
376
- "Expression": 71.62,
377
- "Material": 76.89,
378
- "Size": 84.72,
379
- "Shape": 74.38,
380
- "Color": 97.50,
381
-
382
- "Action-Overall": 77.66,
383
- "Hand": 75.00,
384
- "Full body": 79.35,
385
- "Animal": 80.88,
386
- "Non Contact": 71.94,
387
- "Contact": 73.21,
388
- "State": 84.91,
389
-
390
- "Relationship-Overall": 79.34,
391
- "Composition": 81.42,
392
- "Similarity": 75.56,
393
- "Inclusion": 83.33,
394
- "Comparison": 74.22,
395
-
396
- "Compound-Overall": 72.68,
397
- "Imagination": 75.00,
398
- "Feature matching": 70.31,
399
-
400
- "Grammar-Overall": 72.69,
401
- "Pronoun Reference": 84.23,
402
- "Consistency": 76.85,
403
- "Negation": 57.69,
404
-
405
- "Layout-Overall": 84.47,
406
- "2D": 85.98,
407
- "3D": 82.95,
408
-
409
- "Logical Reasoning": 55.68,
410
-
411
- "Text": 50.29
412
- },
413
- {
414
- "model": "wan2.2-t2i-plus",
415
- "link": "https://help.aliyun.com/zh/model-studio/text-to-image-v2-api-reference",
416
- "hf": "-",
417
- "open_source": false,
418
- "release_date": "2025-07",
419
-
420
- "Overall": 64.82,
421
- "Style": 91.10,
422
- "World Knowledge": 87.34,
423
-
424
- "Attribute-Overall": 70.19,
425
- "Quantity": 76.39,
426
- "Expression": 55.77,
427
- "Material": 66.51,
428
- "Size": 71.53,
429
- "Shape": 64.38,
430
- "Color": 94.17,
431
-
432
- "Action-Overall": 68.00,
433
- "Hand": 58.33,
434
- "Full body": 75.82,
435
- "Animal": 69.12,
436
- "Non Contact": 68.88,
437
- "Contact": 57.74,
438
- "State": 75.00,
439
-
440
- "Relationship-Overall": 73.03,
441
- "Composition": 70.27,
442
- "Similarity": 67.98,
443
- "Inclusion": 77.72,
444
- "Comparison": 76.69,
445
-
446
- "Compound-Overall": 61.37,
447
- "Imagination": 66.92,
448
- "Feature matching": 55.73,
449
-
450
- "Grammar-Overall": 66.53,
451
- "Pronoun Reference": 73.90,
452
- "Consistency": 56.74,
453
- "Negation": 66.92,
454
-
455
- "Layout-Overall": 74.77,
456
- "2D": 77.49,
457
- "3D": 71.97,
458
-
459
- "Logical Reasoning": 42.05,
460
-
461
- "Text": 13.83
462
- },
463
- {
464
- "model": "FLUX.1-dev",
465
- "link": "https://bfl.ai/blog/24-08-01-bfl",
466
- "hf": "https://huggingface.co/black-forest-labs/FLUX.1-dev",
467
- "open_source": true,
468
- "release_date": "2024-08",
469
-
470
- "Overall": 61.30,
471
- "Style": 83.90,
472
- "World Knowledge": 88.92,
473
-
474
- "Attribute-Overall": 67.84,
475
- "Quantity": 72.22,
476
- "Expression": 53.85,
477
- "Material": 58.96,
478
- "Size": 75.00,
479
- "Shape": 65.00,
480
- "Color": 91.67,
481
-
482
- "Action-Overall": 62.17,
483
- "Hand": 51.28,
484
- "Full body": 67.39,
485
- "Animal": 69.85,
486
- "Non Contact": 59.69,
487
- "Contact": 58.93,
488
- "State": 65.57,
489
-
490
- "Relationship-Overall": 67.26,
491
- "Composition": 62.50,
492
- "Similarity": 66.67,
493
- "Inclusion": 72.83,
494
- "Comparison": 62.50,
495
-
496
- "Compound-Overall": 47.04,
497
- "Imagination": 47.96,
498
- "Feature matching": 46.09,
499
-
500
- "Grammar-Overall": 60.96,
501
- "Pronoun Reference": 73.16,
502
- "Consistency": 63.43,
503
- "Negation": 46.15,
504
-
505
- "Layout-Overall": 71.83,
506
- "2D": 74.26,
507
- "3D": 69.32,
508
-
509
- "Logical Reasoning": 30.91,
510
-
511
- "Text": 32.18
512
- },
513
- {
514
- "model": "HiDream-I1-Full",
515
- "link": "https://arxiv.org/pdf/2505.22705",
516
- "hf": "https://huggingface.co/HiDream-ai/HiDream-I1-Full",
517
- "open_source": true,
518
- "release_date": "2025-05",
519
-
520
- "Overall": 71.81,
521
- "Style": 92.50,
522
- "World Knowledge": 94.15,
523
-
524
- "Attribute-Overall": 72.97,
525
- "Quantity": 73.61,
526
- "Expression": 59.62,
527
- "Material": 72.17,
528
- "Size": 79.17,
529
- "Shape": 61.88,
530
- "Color": 98.33,
531
-
532
- "Action-Overall": 73.00,
533
- "Hand": 62.18,
534
- "Full body": 76.09,
535
- "Animal": 73.53,
536
- "Non Contact": 74.49,
537
- "Contact": 70.24,
538
- "State": 78.77,
539
-
540
- "Relationship-Overall": 75.38,
541
- "Composition": 79.05,
542
- "Similarity": 68.33,
543
- "Inclusion": 78.26,
544
- "Comparison": 72.66,
545
-
546
- "Compound-Overall": 62.63,
547
- "Imagination": 64.29,
548
- "Feature matching": 60.94,
549
-
550
- "Grammar-Overall": 63.24,
551
- "Pronoun Reference": 83.09,
552
- "Consistency": 65.74,
553
- "Negation": 40.38,
554
-
555
- "Layout-Overall": 78.17,
556
- "2D": 82.72,
557
- "3D": 73.48,
558
-
559
- "Logical Reasoning": 41.14,
560
-
561
- "Text": 64.94
562
- },
563
- {
564
- "model": "Pref-GRPO",
565
- "link": "https://github.com/CodeGoat24/UnifiedReward",
566
- "hf": "https://huggingface.co/CodeGoat24/FLUX.1-dev-PrefGRPO",
567
- "open_source": true,
568
- "release_date": "2025-08",
569
-
570
- "Overall": 69.46,
571
- "Style": 88.40,
572
- "World Knowledge": 90.35,
573
-
574
- "Attribute-Overall": 75.00,
575
- "Quantity": 71.53,
576
- "Expression": 60.90,
577
- "Material": 73.11,
578
- "Size": 77.08,
579
- "Shape": 74.38,
580
- "Color": 99.17,
581
-
582
- "Action-Overall": 69.77,
583
- "Hand": 60.90,
584
- "Full body": 72.28,
585
- "Animal": 77.21,
586
- "Non Contact": 68.37,
587
- "Contact": 64.88,
588
- "State": 74.53,
589
-
590
- "Relationship-Overall": 76.52,
591
- "Composition": 81.42,
592
- "Similarity": 76.67,
593
- "Inclusion": 76.09,
594
- "Comparison": 65.62,
595
-
596
- "Compound-Overall": 63.27,
597
- "Imagination": 65.56,
598
- "Feature matching": 60.94,
599
-
600
- "Grammar-Overall": 62.43,
601
- "Pronoun Reference": 79.04,
602
- "Consistency": 66.20,
603
- "Negation": 41.92,
604
-
605
- "Layout-Overall": 77.61,
606
- "2D": 82.35,
607
- "3D": 72.73,
608
-
609
- "Logical Reasoning": 47.13,
610
-
611
- "Text": 47.13
612
- },
613
- {
614
- "model": "SD-3.5-Large",
615
- "link": "https://stability.ai/news/introducing-stable-diffusion-3-5",
616
- "hf": "https://huggingface.co/stabilityai/stable-diffusion-3.5-large",
617
- "open_source": true,
618
- "release_date": "2024-10",
619
-
620
- "Overall": 62.99,
621
- "Style": 88.60,
622
- "World Knowledge": 88.92,
623
-
624
- "Attribute-Overall": 68.59,
625
- "Quantity": 71.53,
626
- "Expression": 51.92,
627
- "Material": 68.87,
628
- "Size": 68.06,
629
- "Shape": 65.62,
630
- "Color": 90.83,
631
-
632
- "Action-Overall": 62.17,
633
- "Hand": 57.05,
634
- "Full body": 61.96,
635
- "Animal": 63.24,
636
- "Non Contact": 62.24,
637
- "Contact": 59.52,
638
- "State": 67.45,
639
-
640
- "Relationship-Overall": 69.80,
641
- "Composition": 75.34,
642
- "Similarity": 68.33,
643
- "Inclusion": 68.48,
644
- "Comparison": 60.94,
645
-
646
- "Compound-Overall": 58.76,
647
- "Imagination": 64.80,
648
- "Feature matching": 52.60,
649
-
650
- "Grammar-Overall": 58.96,
651
- "Pronoun Reference": 74.63,
652
- "Consistency": 61.11,
653
- "Negation": 40.77,
654
-
655
- "Layout-Overall": 69.03,
656
- "2D": 70.96,
657
- "3D": 67.05,
658
-
659
- "Logical Reasoning": 32.27,
660
-
661
- "Text": 32.76
662
- },
663
- {
664
- "model": "Janus-Pro",
665
- "link": "https://arxiv.org/pdf/2501.17811",
666
- "hf": "https://huggingface.co/deepseek-ai/Janus-Pro-7B",
667
- "open_source": true,
668
- "release_date": "2025-01",
669
-
670
- "Overall": 61.61,
671
- "Style": 90.80,
672
- "World Knowledge": 86.71,
673
- "Attribute-Overall": 67.74,
674
- "Quantity": 56.25,
675
- "Expression": 55.77,
676
- "Material": 71.70,
677
- "Size": 73.61,
678
- "Shape": 61.88,
679
- "Color": 90.83,
680
-
681
- "Action-Overall": 64.26,
682
- "Hand": 50.64,
683
- "Full body": 63.04,
684
- "Animal": 75.00,
685
- "Non Contact": 62.24,
686
- "Contact": 56.55,
687
- "State": 76.42,
688
-
689
- "Relationship-Overall": 68.40,
690
- "Composition": 76.01,
691
- "Similarity": 56.11,
692
- "Inclusion": 75.00,
693
- "Comparison": 58.59,
694
-
695
- "Compound-Overall": 62.11,
696
- "Imagination": 69.64,
697
- "Feature matching": 54.43,
698
-
699
- "Grammar-Overall": 64.44,
700
- "Pronoun Reference": 75.37,
701
- "Consistency": 66.20,
702
- "Negation": 51.54,
703
-
704
- "Layout-Overall": 72.01,
705
- "2D": 74.63,
706
- "3D": 69.32,
707
-
708
- "Logical Reasoning": 37.05,
709
-
710
- "Text": 2.59
711
- },
712
- {
713
- "model": "Show-o2",
714
- "link": "https://arxiv.org/pdf/2506.15564",
715
- "hf": "https://huggingface.co/showlab/show-o2-7B",
716
- "open_source": true,
717
- "release_date": "2025-06",
718
-
719
- "Overall": 62.73,
720
- "Style": 87.20,
721
- "World Knowledge": 86.08,
722
- "Attribute-Overall": 70.51,
723
- "Quantity": 59.03,
724
- "Expression": 63.46,
725
- "Material": 73.58,
726
- "Size": 72.92,
727
- "Shape": 63.12,
728
- "Color": 95.00,
729
-
730
- "Action-Overall": 69.58,
731
- "Hand": 56.41,
732
- "Full body": 77.72,
733
- "Animal": 72.79,
734
- "Non Contact": 70.41,
735
- "Contact": 52.38,
736
- "State": 83.02,
737
-
738
- "Relationship-Overall": 70.18,
739
- "Composition": 79.05,
740
- "Similarity": 61.11,
741
- "Inclusion": 70.11,
742
- "Comparison": 62.50,
743
-
744
- "Compound-Overall": 64.69,
745
- "Imagination": 69.90,
746
- "Feature matching": 59.38,
747
-
748
- "Grammar-Overall": 61.63,
749
- "Pronoun Reference": 75.37,
750
- "Consistency": 65.28,
751
- "Negation": 44.23,
752
-
753
- "Layout-Overall": 75.37,
754
- "2D": 77.94,
755
- "3D": 72.73,
756
-
757
- "Logical Reasoning": 40.91,
758
-
759
- "Text": 1.15
760
- },
761
- {
762
- "model": "Bagel",
763
- "link": "https://arxiv.org/pdf/2505.14683",
764
- "hf": "https://huggingface.co/ByteDance-Seed/BAGEL-7B-MoT",
765
- "open_source": true,
766
- "release_date": "2025-05",
767
-
768
- "Overall": 61.53,
769
- "Style": 90.20,
770
- "World Knowledge": 85.60,
771
- "Attribute-Overall": 67.74,
772
- "Quantity": 59.03,
773
- "Expression": 50.00,
774
- "Material": 72.64,
775
- "Size": 76.39,
776
- "Shape": 59.38,
777
- "Color": 93.33,
778
-
779
- "Action-Overall": 61.98,
780
- "Hand": 52.56,
781
- "Full body": 60.87,
782
- "Animal": 69.12,
783
- "Non Contact": 62.24,
784
- "Contact": 58.93,
785
- "State": 67.45,
786
-
787
- "Relationship-Overall": 70.69,
788
- "Composition": 76.35,
789
- "Similarity": 70.56,
790
- "Inclusion": 69.57,
791
- "Comparison": 59.38,
792
-
793
- "Compound-Overall": 58.12,
794
- "Imagination": 67.35,
795
- "Feature matching": 48.70,
796
-
797
- "Grammar-Overall": 66.44,
798
- "Pronoun Reference": 71.69,
799
- "Consistency": 68.52,
800
- "Negation": 59.23,
801
-
802
- "Layout-Overall": 76.49,
803
- "2D": 79.04,
804
- "3D": 73.86,
805
-
806
- "Logical Reasoning": 30.23,
807
-
808
- "Text": 7.76
809
- },
810
- {
811
- "model": "BLIP3-o",
812
- "link": "https://arxiv.org/pdf/2505.09568",
813
- "hf": "https://huggingface.co/BLIP3o/BLIP3o-Model-8B",
814
- "open_source": true,
815
- "release_date": "2025-05",
816
-
817
- "Overall": 59.87,
818
- "Style": 92.80,
819
- "World Knowledge": 80.22,
820
- "Attribute-Overall": 63.89,
821
- "Quantity": 51.39,
822
- "Expression": 60.26,
823
- "Material": 64.62,
824
- "Size": 75.00,
825
- "Shape": 54.37,
826
- "Color": 81.67,
827
-
828
- "Action-Overall": 63.97,
829
- "Hand": 58.33,
830
- "Full body": 70.11,
831
- "Animal": 70.59,
832
- "Non Contact": 60.20,
833
- "Contact": 51.79,
834
- "State": 71.70,
835
-
836
- "Relationship-Overall": 66.50,
837
- "Composition": 70.61,
838
- "Similarity": 60.00,
839
- "Inclusion": 67.39,
840
- "Comparison": 64.84,
841
-
842
- "Compound-Overall": 53.74,
843
- "Imagination": 61.73,
844
- "Feature matching": 45.57,
845
-
846
- "Grammar-Overall": 68.58,
847
- "Pronoun Reference": 79.04,
848
- "Consistency": 61.11,
849
- "Negation": 63.85,
850
-
851
- "Layout-Overall": 68.47,
852
- "2D": 72.79,
853
- "3D": 64.02,
854
-
855
- "Logical Reasoning": 39.55,
856
-
857
- "Text": 1.15
858
- },
859
- {
860
- "model": "CogVew4",
861
- "link": "https://arxiv.org/pdf/2403.05121",
862
- "hf": "https://huggingface.co/zai-org/CogView4-6B",
863
- "open_source": true,
864
- "release_date": "2024-03",
865
-
866
- "Overall": 56.30,
867
- "Style": 82.00,
868
- "World Knowledge": 83.07,
869
- "Attribute-Overall": 63.25,
870
- "Quantity": 71.53,
871
- "Expression": 44.23,
872
- "Material": 55.19,
873
- "Size": 72.22,
874
- "Shape": 57.50,
875
- "Color": 89.17,
876
-
877
- "Action-Overall": 57.51,
878
- "Hand": 53.85,
879
- "Full body": 59.78,
880
- "Animal": 68.38,
881
- "Non Contact": 50.51,
882
- "Contact": 51.19,
883
- "State": 62.74,
884
-
885
- "Relationship-Overall": 62.44,
886
- "Composition": 60.47,
887
- "Similarity": 60.00,
888
- "Inclusion": 69.57,
889
- "Comparison": 60.16,
890
-
891
- "Compound-Overall": 44.72,
892
- "Imagination": 47.19,
893
- "Feature matching": 42.19,
894
-
895
- "Grammar-Overall": 54.81,
896
- "Pronoun Reference": 69.49,
897
- "Consistency": 56.02,
898
- "Negation": 38.46,
899
-
900
- "Layout-Overall": 69.22,
901
- "2D": 77.21,
902
- "3D": 60.98,
903
-
904
- "Logical Reasoning": 28.18,
905
-
906
- "Text": 17.82
907
- },
908
- {
909
- "model": "Hunyuan-DiT",
910
- "link": "https://arxiv.org/pdf/2405.08748",
911
- "hf": "https://huggingface.co/Tencent-Hunyuan/HunyuanDiT",
912
- "open_source": true,
913
- "release_date": "2024-05",
914
-
915
- "Overall": 51.38,
916
-
917
- "Style": 94.10,
918
-
919
- "World Knowledge": 80.70,
920
-
921
- "Attribute-Overall": 62.71,
922
- "Quantity": 67.36,
923
- "Expression": 44.23,
924
- "Material": 71.70,
925
- "Size": 61.81,
926
- "Shape": 47.50,
927
- "Color": 86.67,
928
-
929
- "Action-Overall": 49.05,
930
- "Hand": 35.90,
931
- "Full body": 54.89,
932
- "Animal": 54.41,
933
- "Non Contact": 46.94,
934
- "Contact": 35.71,
935
- "State": 62.74,
936
-
937
- "Relationship-Overall": 59.64,
938
- "Composition": 60.14,
939
- "Similarity": 64.44,
940
- "Inclusion": 60.33,
941
- "Comparison": 50.78,
942
-
943
- "Compound-Overall": 41.62,
944
- "Imagination": 46.68,
945
- "Feature matching": 36.46,
946
-
947
- "Grammar-Overall": 55.48,
948
- "Pronoun Reference": 62.87,
949
- "Consistency": 57.87,
950
- "Negation": 45.77,
951
-
952
- "Layout-Overall": 44.78,
953
- "2D": 39.34,
954
- "3D": 50.38,
955
-
956
- "Logical Reasoning": 24.55,
957
-
958
- "Text": 1.15
959
- },
960
- {
961
- "model": "Janus",
962
- "link": "https://arxiv.org/pdf/2410.13848",
963
- "hf": "https://huggingface.co/deepseek-ai/Janus-1.3B",
964
- "open_source": true,
965
- "release_date": "2024-10",
966
-
967
- "Overall": 51.23,
968
-
969
- "Style": 89.90,
970
-
971
- "World Knowledge": 73.58,
972
-
973
- "Attribute-Overall": 54.81,
974
- "Quantity": 37.50,
975
- "Expression": 37.82,
976
- "Material": 58.96,
977
- "Size": 65.97,
978
- "Shape": 47.50,
979
- "Color": 86.67,
980
-
981
- "Action-Overall": 50.38,
982
- "Hand": 32.69,
983
- "Full body": 51.63,
984
- "Animal": 61.76,
985
- "Non Contact": 48.47,
986
- "Contact": 38.10,
987
- "State": 66.51,
988
-
989
- "Relationship-Overall": 55.08,
990
- "Composition": 56.76,
991
- "Similarity": 53.89,
992
- "Inclusion": 59.24,
993
- "Comparison": 46.88,
994
-
995
- "Compound-Overall": 46.65,
996
- "Imagination": 58.16,
997
- "Feature matching": 34.90,
998
-
999
- "Grammar-Overall": 59.09,
1000
- "Pronoun Reference": 66.18,
1001
- "Consistency": 51.39,
1002
- "Negation": 58.08,
1003
-
1004
- "Layout-Overall": 54.85,
1005
- "2D": 57.72,
1006
- "3D": 51.89,
1007
-
1008
- "Logical Reasoning": 26.82,
1009
-
1010
- "Text": 1.15
1011
- },
1012
- {
1013
- "model": "Janus-flow",
1014
- "link": "https://arxiv.org/pdf/2411.07975",
1015
- "hf": "https://huggingface.co/deepseek-ai/JanusFlow-1.3B",
1016
- "open_source": true,
1017
- "release_date": "2024-11",
1018
-
1019
- "Overall": 46.39,
1020
-
1021
- "Style": 86.20,
1022
-
1023
- "World Knowledge": 62.50,
1024
-
1025
- "Attribute-Overall": 47.97,
1026
- "Quantity": 43.06,
1027
- "Expression": 30.77,
1028
- "Material": 55.19,
1029
- "Size": 55.56,
1030
- "Shape": 30.00,
1031
- "Color": 78.33,
1032
-
1033
- "Action-Overall": 43.35,
1034
- "Hand": 23.08,
1035
- "Full body": 48.37,
1036
- "Animal": 58.82,
1037
- "Non Contact": 36.73,
1038
- "Contact": 36.31,
1039
- "State": 55.66,
1040
-
1041
- "Relationship-Overall": 50.00,
1042
- "Composition": 59.80,
1043
- "Similarity": 38.89,
1044
- "Inclusion": 51.63,
1045
- "Comparison": 40.62,
1046
-
1047
- "Compound-Overall": 45.10,
1048
- "Imagination": 57.65,
1049
- "Feature matching": 32.29,
1050
-
1051
- "Grammar-Overall": 60.29,
1052
- "Pronoun Reference": 66.18,
1053
- "Consistency": 48.61,
1054
- "Negation": 63.85,
1055
-
1056
- "Layout-Overall": 46.46,
1057
- "2D": 49.26,
1058
- "3D": 43.56,
1059
-
1060
- "Logical Reasoning": 21.14,
1061
-
1062
- "Text": 0.86
1063
- },
1064
- {
1065
- "model": "Emu3",
1066
- "link": "https://arxiv.org/pdf/2409.18869",
1067
- "hf": "https://huggingface.co/BAAI/Emu3-Gen",
1068
- "open_source": true,
1069
- "release_date": "2024-09",
1070
-
1071
- "Overall": 46.02,
1072
-
1073
- "Style": 86.80,
1074
-
1075
- "World Knowledge": 77.06,
1076
-
1077
- "Attribute-Overall": 51.39,
1078
- "Quantity": 44.44,
1079
- "Expression": 45.51,
1080
- "Material": 53.77,
1081
- "Size": 43.06,
1082
- "Shape": 46.25,
1083
- "Color": 80.00,
1084
-
1085
- "Action-Overall": 40.11,
1086
- "Hand": 25.00,
1087
- "Full body": 47.28,
1088
- "Animal": 50.74,
1089
- "Non Contact": 35.20,
1090
- "Contact": 27.98,
1091
- "State": 52.36,
1092
-
1093
- "Relationship-Overall": 49.75,
1094
- "Composition": 56.76,
1095
- "Similarity": 46.67,
1096
- "Inclusion": 48.37,
1097
- "Comparison": 39.84,
1098
-
1099
- "Compound-Overall": 36.86,
1100
- "Imagination": 41.33,
1101
- "Feature matching": 32.29,
1102
-
1103
- "Grammar-Overall": 52.94,
1104
- "Pronoun Reference": 59.56,
1105
- "Consistency": 53.70,
1106
- "Negation": 45.38,
1107
-
1108
- "Layout-Overall": 44.78,
1109
- "2D": 45.22,
1110
- "3D": 44.32,
1111
-
1112
- "Logical Reasoning": 19.32,
1113
-
1114
- "Text": 1.15
1115
- },
1116
- {
1117
- "model": "Playground2.5",
1118
- "link": "https://arxiv.org/pdf/2402.17245",
1119
- "hf": "https://huggingface.co/playgroundai/playground-v2.5-1024px-aesthetic",
1120
- "open_source": true,
1121
- "release_date": "2024-02",
1122
-
1123
- "Overall": 45.61,
1124
-
1125
- "Style": 89.50,
1126
-
1127
- "World Knowledge": 76.11,
1128
-
1129
- "Attribute-Overall": 52.78,
1130
- "Quantity": 58.33,
1131
- "Expression": 43.59,
1132
- "Material": 57.08,
1133
- "Size": 44.44,
1134
- "Shape": 41.25,
1135
- "Color": 75.83,
1136
-
1137
- "Action-Overall": 42.68,
1138
- "Hand": 28.85,
1139
- "Full body": 50.00,
1140
- "Animal": 52.21,
1141
- "Non Contact": 35.20,
1142
- "Contact": 29.17,
1143
- "State": 58.02,
1144
-
1145
- "Relationship-Overall": 51.52,
1146
- "Composition": 60.14,
1147
- "Similarity": 49.44,
1148
- "Inclusion": 48.37,
1149
- "Comparison": 39.06,
1150
-
1151
- "Compound-Overall": 35.44,
1152
- "Imagination": 43.88,
1153
- "Feature matching": 26.82,
1154
-
1155
- "Grammar-Overall": 53.21,
1156
- "Pronoun Reference": 58.82,
1157
- "Consistency": 50.00,
1158
- "Negation": 50.00,
1159
-
1160
- "Layout-Overall": 37.13,
1161
- "2D": 34.56,
1162
- "3D": 39.77,
1163
-
1164
- "Logical Reasoning": 16.59,
1165
-
1166
- "Text": 1.15
1167
- },
1168
- {
1169
- "model": "SDXL",
1170
- "link": "https://arxiv.org/pdf/2307.01952",
1171
- "hf": "https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0",
1172
- "open_source": true,
1173
- "release_date": "2023-07",
1174
-
1175
- "Overall": 39.75,
1176
-
1177
- "Style": 87.40,
1178
-
1179
- "World Knowledge": 72.63,
1180
-
1181
- "Attribute-Overall": 44.34,
1182
- "Quantity": 44.44,
1183
- "Expression": 25.00,
1184
- "Material": 52.83,
1185
- "Size": 44.44,
1186
- "Shape": 33.75,
1187
- "Color": 68.33,
1188
-
1189
- "Action-Overall": 34.22,
1190
- "Hand": 19.23,
1191
- "Full body": 35.33,
1192
- "Animal": 43.38,
1193
- "Non Contact": 26.53,
1194
- "Contact": 24.40,
1195
- "State": 53.30,
1196
-
1197
- "Relationship-Overall": 44.92,
1198
- "Composition": 53.72,
1199
- "Similarity": 38.33,
1200
- "Inclusion": 39.67,
1201
- "Comparison": 41.41,
1202
-
1203
- "Compound-Overall": 26.68,
1204
- "Imagination": 33.93,
1205
- "Feature matching": 19.27,
1206
-
1207
- "Grammar-Overall": 47.33,
1208
- "Pronoun Reference": 50.37,
1209
- "Consistency": 42.59,
1210
- "Negation": 48.08,
1211
-
1212
- "Layout-Overall": 29.85,
1213
- "2D": 26.47,
1214
- "3D": 33.33,
1215
-
1216
- "Logical Reasoning": 9.55,
1217
-
1218
- "Text": 1.15
1219
- },
1220
  {
1221
  "model": "GPT-4o",
1222
  "link": "https://platform.openai.com/docs/guides/image-generation",
@@ -1224,414 +7,51 @@
1224
  "open_source": false,
1225
  "release_date": "2025-04",
1226
 
1227
- "Overall": 92.77,
1228
-
1229
- "Style": 98.57,
1230
-
1231
- "World Knowledge": 98.87,
1232
-
1233
- "Attribute-Overall": 93.59,
1234
- "Quantity": 90.00,
1235
- "Expression": 94.70,
1236
- "Material": 94.20,
1237
- "Size": 91.61,
1238
- "Shape": 92.50,
1239
- "Color": 99.17,
1240
-
1241
- "Action-Overall": 90.79,
1242
- "Hand": 89.74,
1243
- "Full body": 92.22,
1244
- "Animal": 87.12,
1245
- "Non Contact": 90.43,
1246
- "Contact": 89.82,
1247
- "State": 93.75,
1248
-
1249
- "Relationship-Overall": 94.97,
1250
- "Composition": 96.23,
1251
- "Similarity": 95.00,
1252
- "Inclusion": 94.89,
1253
- "Comparison": 92.19,
1254
-
1255
- "Compound-Overall": 93.55,
1256
- "Imagination": 95.64,
1257
- "Feature matching": 91.40,
1258
-
1259
- "Grammar-Overall": 91.76,
1260
- "Pronoun Reference": 92.91,
1261
- "Consistency": 91.67,
1262
- "Negation": 90.57,
1263
 
1264
- "Layout-Overall": 91.35,
1265
- "2D": 91.04,
1266
- "3D": 91.67,
1267
 
1268
- "Logical Reasoning": 84.97,
1269
 
1270
- "Text": 89.24
1271
- },
1272
- {
1273
- "model": "Imagen-4.0-Ultra-preview-06-06",
1274
- "link": "https://deepmind.google/models/imagen/",
1275
- "hf": "-",
1276
- "open_source": false,
1277
- "release_date": "2025-06",
1278
-
1279
- "Overall": 91.54,
1280
-
1281
- "Style": 99.20,
1282
-
1283
- "World Knowledge": 97.47,
1284
-
1285
- "Attribute-Overall": 92.52,
1286
- "Quantity": 93.06,
1287
- "Expression": 81.41,
1288
- "Material": 94.34,
1289
- "Size": 95.83,
1290
- "Shape": 91.88,
1291
  "Color": 100.0,
1292
 
1293
- "Action-Overall": 92.20,
1294
- "Hand": 90.38,
1295
- "Full body": 93.44,
1296
- "Animal": 91.91,
1297
  "Non Contact": 90.31,
1298
- "Contact": 89.29,
1299
- "State": 96.70,
1300
-
1301
- "Relationship-Overall": 93.02,
1302
- "Composition": 95.27,
1303
- "Similarity": 84.44,
1304
- "Inclusion": 98.37,
1305
- "Comparison": 92.19,
1306
-
1307
- "Compound-Overall": 91.37,
1308
- "Imagination": 92.86,
1309
- "Feature matching": 89.84,
1310
-
1311
- "Grammar-Overall": 87.97,
1312
- "Pronoun Reference": 94.12,
1313
- "Consistency": 87.04,
1314
- "Negation": 82.31,
1315
-
1316
- "Layout-Overall": 93.10,
1317
- "2D": 92.65,
1318
- "3D": 93.56,
1319
-
1320
- "Logical Reasoning": 79.55,
1321
-
1322
- "Text": 89.08
1323
- },
1324
- {
1325
- "model": "Seedream-3.0",
1326
- "link": "https://www.byteplus.com/en/product/Seedream",
1327
- "hf": "-",
1328
- "open_source": false,
1329
- "release_date": "2025-05",
1330
-
1331
- "Overall": 78.95,
1332
-
1333
- "Style": 98.10,
1334
-
1335
- "World Knowledge": 95.25,
1336
-
1337
- "Attribute-Overall": 85.58,
1338
- "Quantity": 80.56,
1339
- "Expression": 82.05,
1340
- "Material": 90.57,
1341
- "Size": 85.42,
1342
- "Shape": 78.12,
1343
- "Color": 97.50,
1344
-
1345
- "Action-Overall": 82.98,
1346
- "Hand": 75.00,
1347
- "Full body": 89.67,
1348
- "Animal": 85.29,
1349
- "Non Contact": 75.51,
1350
- "Contact": 80.95,
1351
- "State": 90.09,
1352
-
1353
- "Relationship-Overall": 80.84,
1354
- "Composition": 82.77,
1355
- "Similarity": 73.89,
1356
- "Inclusion": 84.24,
1357
- "Comparison": 81.25,
1358
-
1359
- "Compound-Overall": 73.84,
1360
- "Imagination": 78.57,
1361
- "Feature matching": 69.01,
1362
-
1363
- "Grammar-Overall": 61.36,
1364
- "Pronoun Reference": 79.78,
1365
- "Consistency": 69.91,
1366
- "Negation": 35.00,
1367
-
1368
- "Layout-Overall": 87.31,
1369
- "2D": 86.76,
1370
- "3D": 87.88,
1371
-
1372
- "Logical Reasoning": 52.73,
1373
-
1374
- "Text": 71.55
1375
- },
1376
- {
1377
- "model": "Imagen-3.0",
1378
- "link": "https://arxiv.org/pdf/2408.07009",
1379
- "hf": "-",
1380
- "open_source": false,
1381
- "release_date": "2025-02",
1382
-
1383
- "Overall": 71.85,
1384
-
1385
- "Style": 89.25,
1386
-
1387
- "World Knowledge": 94.75,
1388
-
1389
- "Attribute-Overall": 77.33,
1390
- "Quantity": 75.78,
1391
- "Expression": 64.67,
1392
- "Material": 80.66,
1393
- "Size": 82.84,
1394
- "Shape": 70.00,
1395
- "Color": 93.10,
1396
-
1397
- "Action-Overall": 81.46,
1398
- "Hand": 80.00,
1399
- "Full body": 83.89,
1400
- "Animal": 85.29,
1401
- "Non Contact": 77.37,
1402
- "Contact": 74.40,
1403
- "State": 87.38,
1404
-
1405
- "Relationship-Overall": 82.86,
1406
- "Composition": 83.90,
1407
- "Similarity": 73.33,
1408
- "Inclusion": 88.64,
1409
- "Comparison": 83.90,
1410
-
1411
- "Compound-Overall": 71.71,
1412
- "Imagination": 79.23,
1413
- "Feature matching": 64.06,
1414
-
1415
- "Grammar-Overall": 69.84,
1416
- "Pronoun Reference": 79.04,
1417
- "Consistency": 70.75,
1418
- "Negation": 59.13,
1419
-
1420
- "Layout-Overall": 81.34,
1421
- "2D": 82.72,
1422
- "3D": 79.92,
1423
-
1424
- "Logical Reasoning": 48.36,
1425
-
1426
- "Text": 21.55
1427
- },
1428
- {
1429
- "model": "DALL-E-3",
1430
- "link": "https://openai.com/zh-Hans-CN/index/dall-e-3/",
1431
- "hf": "-",
1432
- "open_source": false,
1433
- "release_date": "2024-09",
1434
-
1435
- "Overall": 69.18,
1436
 
1437
- "Style": 95.06,
1438
-
1439
- "World Knowledge": 93.51,
1440
-
1441
- "Attribute-Overall": 75.97,
1442
- "Quantity": 62.14,
1443
- "Expression": 59.87,
1444
- "Material": 87.74,
1445
- "Size": 87.50,
1446
- "Shape": 65.00,
1447
- "Color": 92.50,
1448
-
1449
- "Action-Overall": 69.83,
1450
- "Hand": 60.90,
1451
- "Full body": 75.00,
1452
- "Animal": 76.47,
1453
- "Non Contact": 66.84,
1454
- "Contact": 63.41,
1455
- "State": 75.47,
1456
-
1457
- "Relationship-Overall": 78.06,
1458
- "Composition": 82.43,
1459
- "Similarity": 69.44,
1460
- "Inclusion": 87.78,
1461
- "Comparison": 66.41,
1462
-
1463
- "Compound-Overall": 70.60,
1464
- "Imagination": 76.79,
1465
- "Feature matching": 64.21,
1466
-
1467
- "Grammar-Overall": 68.07,
1468
- "Pronoun Reference": 74.24,
1469
- "Consistency": 74.07,
1470
- "Negation": 56.64,
1471
-
1472
- "Layout-Overall": 66.67,
1473
- "2D": 57.72,
1474
- "3D": 76.17,
1475
-
1476
- "Logical Reasoning": 48.18,
1477
-
1478
- "Text": 25.86
1479
- },
1480
- {
1481
- "model": "FLUX-pro-1.1-Ultra",
1482
- "link": "https://bfl.ai/",
1483
- "hf": "-",
1484
- "open_source": false,
1485
- "release_date": "2025-03",
1486
-
1487
- "Overall": 70.67,
1488
-
1489
- "Style": 90.60,
1490
-
1491
- "World Knowledge": 91.61,
1492
-
1493
- "Attribute-Overall": 76.50,
1494
- "Quantity": 75.69,
1495
- "Expression": 59.62,
1496
- "Material": 78.77,
1497
- "Size": 77.78,
1498
- "Shape": 74.38,
1499
- "Color": 96.67,
1500
-
1501
- "Action-Overall": 76.50,
1502
- "Hand": 57.69,
1503
- "Full body": 68.48,
1504
- "Animal": 77.21,
1505
- "Non Contact": 76.53,
1506
- "Contact": 64.29,
1507
- "State": 76.89,
1508
-
1509
- "Relationship-Overall": 77.54,
1510
- "Composition": 80.41,
1511
- "Similarity": 72.78,
1512
- "Inclusion": 82.07,
1513
- "Comparison": 71.09,
1514
-
1515
- "Compound-Overall": 67.78,
1516
- "Imagination": 74.74,
1517
- "Feature matching": 60.68,
1518
-
1519
- "Grammar-Overall": 70.05,
1520
- "Pronoun Reference": 84.56,
1521
- "Consistency": 68.98,
1522
- "Negation": 55.77,
1523
-
1524
- "Layout-Overall": 81.53,
1525
- "2D": 80.15,
1526
- "3D": 82.95,
1527
-
1528
- "Logical Reasoning": 43.18,
1529
-
1530
- "Text": 37.36
1531
- },
1532
- {
1533
- "model": "Keling-Ketu",
1534
- "link": "https://kolors.kuaishou.com/",
1535
- "hf": "-",
1536
- "open_source": false,
1537
- "release_date": "2025-04",
1538
-
1539
- "Overall": 65.93,
1540
-
1541
- "Style": 92.27,
1542
-
1543
- "World Knowledge": 86.62,
1544
-
1545
- "Attribute-Overall": 71.66,
1546
- "Quantity": 75.00,
1547
- "Expression": 56.41,
1548
- "Material": 78.77,
1549
- "Size": 79.17,
1550
- "Shape": 53.12,
1551
- "Color": 91.38,
1552
-
1553
- "Action-Overall": 68.73,
1554
- "Hand": 54.49,
1555
- "Full body": 76.09,
1556
- "Animal": 72.79,
1557
- "Non Contact": 69.90,
1558
- "Contact": 58.93,
1559
- "State": 76.89,
1560
-
1561
- "Relationship-Overall": 70.94,
1562
- "Composition": 68.92,
1563
- "Similarity": 70.56,
1564
- "Inclusion": 74.46,
1565
- "Comparison": 71.09,
1566
 
1567
- "Compound-Overall": 60.81,
1568
- "Imagination": 66.24,
1569
- "Feature matching": 55.26,
1570
 
1571
- "Grammar-Overall": 71.26,
1572
- "Pronoun Reference": 77.21,
1573
- "Consistency": 67.59,
1574
- "Negation": 68.08,
1575
 
1576
- "Layout-Overall": 77.23,
1577
- "2D": 80.97,
1578
- "3D": 73.36,
1579
 
1580
- "Logical Reasoning": 43.75,
1581
 
1582
- "Text": 16.03
1583
  },
1584
- {
1585
- "model": "Qwen-Image",
1586
- "link": "https://arxiv.org/pdf/2508.02324",
1587
- "hf": "https://huggingface.co/Qwen/Qwen-Image",
1588
- "open_source": true,
1589
- "release_date": "2025-08",
1590
-
1591
- "Overall": 78.81,
1592
-
1593
- "Style": 95.10,
1594
-
1595
- "World Knowledge": 94.30,
1596
-
1597
- "Attribute-Overall": 87.61,
1598
- "Quantity": 81.94,
1599
- "Expression": 84.62,
1600
- "Material": 91.98,
1601
- "Size": 84.03,
1602
- "Shape": 84.38,
1603
- "Color": 99.17,
1604
-
1605
- "Action-Overall": 84.13,
1606
- "Hand": 82.05,
1607
- "Full body": 88.59,
1608
- "Animal": 88.24,
1609
- "Non Contact": 80.61,
1610
- "Contact": 77.38,
1611
- "State": 87.74,
1612
-
1613
- "Relationship-Overall": 79.70,
1614
- "Composition": 81.76,
1615
- "Similarity": 67.78,
1616
- "Inclusion": 86.96,
1617
- "Comparison": 81.25,
1618
-
1619
- "Compound-Overall": 73.32,
1620
- "Imagination": 73.21,
1621
- "Feature matching": 73.44,
1622
-
1623
- "Grammar-Overall": 60.29,
1624
- "Pronoun Reference": 83.82,
1625
- "Consistency": 70.37,
1626
- "Negation": 27.31,
1627
-
1628
- "Layout-Overall": 85.52,
1629
- "2D": 86.40,
1630
- "3D": 85.23,
1631
-
1632
- "Logical Reasoning": 53.64,
1633
-
1634
- "Text": 76.14
1635
- }
1636
  ]
1637
  }
 
1
  {
2
  "leaderboard": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  {
4
  "model": "GPT-4o",
5
  "link": "https://platform.openai.com/docs/guides/image-generation",
 
7
  "open_source": false,
8
  "release_date": "2025-04",
9
 
10
+ "Overall": 91.02,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
+ "Style": 99.39,
 
 
13
 
14
+ "World Knowledge": 98.72,
15
 
16
+ "Attribute-Overall": 94.99,
17
+ "Quantity": 93.62,
18
+ "Expression": 94.59,
19
+ "Material": 96.19,
20
+ "Size": 93.06,
21
+ "Shape": 92.95,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  "Color": 100.0,
23
 
24
+ "Action-Overall": 92.34,
25
+ "Hand": 94.08,
26
+ "Full body": 97.28,
27
+ "Animal": 90.91,
28
  "Non Contact": 90.31,
29
+ "Contact": 88.34,
30
+ "State": 92.65,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
+ "Relationship-Overall": 95.77,
33
+ "Composition": 97.30,
34
+ "Similarity": 93.18,
35
+ "Inclusion": 96.69,
36
+ "Comparison": 94.53,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
+ "Compound-Overall": 93.91,
39
+ "Imagination": 95.92,
40
+ "Feature matching": 91.74,
41
 
42
+ "Grammar-Overall": 91.02,
43
+ "Pronoun Reference": 95.15,
44
+ "Consistency": 89.35,
45
+ "Negation": 88.05,
46
 
47
+ "Layout-Overall": 89.27,
48
+ "2D": 89.18,
49
+ "3D": 89.35,
50
 
51
+ "Logical Reasoning": 91.44,
52
 
53
+ "Text": 63.37
54
  },
55
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  ]
57
  }