Spaces:

LanguageBind
/

Video-Bench

Running

App Files Files Community

LanguageBind commited on Dec 31, 2023

Commit

6ca2788

1 Parent(s): 84f5285

Update src/compute.py

Browse files

Files changed (1) hide show

src/compute.py +125 -4

src/compute.py CHANGED Viewed

@@ -1,3 +1,123 @@
 import json
 import os
 import glob
@@ -23,7 +143,8 @@ def chatgpt_json(merge_file):
             if sub_item['output_chatgpt_choice'] == correct_answer_data[dataset_name][id]['answer']:
                 correct += 1
-        dataset_scores_dict[dataset_name] = round(correct / total_nums * 100, 2)
     return dataset_scores_dict
@@ -63,21 +184,21 @@ def compute_scores(merge_file):
     exclusive_understanding_score = 0
     # import ipdb; ipdb.set_trace()
     for dataset_name, weight in exclusive_understanding_weight.items():
-        exclusive_understanding_score += weight * dataset_score_dict[dataset_name] / weights_sum
     # Prior Knowledge-based Question-answer
     prior_QA_weight = dataset_weight[2]
     weights_sum = sum(prior_QA_weight.values())
     prior_QA_score = 0
     for dataset_name, weight in prior_QA_weight.items():
-        prior_QA_score += weight * dataset_score_dict[dataset_name] / weights_sum
     # Comprehension and Decision-making
     com_and_dec_QA_weight = dataset_weight[3]
     weights_sum = sum(com_and_dec_QA_weight.values())
     com_and_dec_QA_score = 0
     for dataset_name, weight in com_and_dec_QA_weight.items():
-        com_and_dec_QA_score += weight * dataset_score_dict[dataset_name] / weights_sum
     dataset_score_dict['Exclusive_understanding'] = exclusive_understanding_score
     dataset_score_dict['Prior_Knowledge'] = prior_QA_score

+# import json
+# import os
+# import glob
+# import argparse
+# import csv
+#
+#
+# def chatgpt_json(merge_file):
+#     # chat results
+#     merge_data = merge_file.decode("utf-8")
+#     merge_data = eval(merge_data)
+#     correct_answer_file = 'file/ANSWER.json'
+#     with open(correct_answer_file, 'r', encoding='utf-8') as f:
+#         correct_answer_data = json.load(f)
+#
+#     dataset_scores_dict = {}
+#     for dataset_name, item in merge_data.items():
+#
+#         total_nums = len(item)
+#         correct = 0
+#         # assert len(item) >= len(correct_answer_data[dataset_name]), f'Video-Bench-Input.json---{dataset_name}---is incomplete!'
+#         for id, sub_item in item.items():
+#             if sub_item['output_chatgpt_choice'] == correct_answer_data[dataset_name][id]['answer']:
+#                 correct += 1
+#
+#         dataset_scores_dict[dataset_name] = round(correct / total_nums * 100, 2)
+#     return dataset_scores_dict
+#
+#
+# def compute_scores(merge_file):
+#     dataset_score_dict = chatgpt_json(merge_file)
+#     dataset_weight = {
+#         1:
+#             {
+#                 "ActivityNet": 1,
+#                 "MSVD": 1,
+#                 "MSRVTT": 1,
+#                 "TGIF": 1,
+#                 "Youcook2": 1,
+#                 "Ucfcrime": 1,
+#                 "MOT": 0.5,
+#             },
+#
+#         2:
+#             {
+#                 "TVQA": 1,
+#                 "MV": 1,
+#                 "NBA": 1,
+#             },
+#
+#         3:
+#             {
+#                 "Driving-exam": 0.5,
+#                 "Driving-decision-making": 1,
+#                 "SQA3D": 1,
+#             }
+#
+#     }
+#
+#     # Video-exclusive Understanding score
+#     exclusive_understanding_weight = dataset_weight[1]
+#     weights_sum = sum(exclusive_understanding_weight.values())
+#     exclusive_understanding_score = 0
+#     # import ipdb; ipdb.set_trace()
+#     for dataset_name, weight in exclusive_understanding_weight.items():
+#         exclusive_understanding_score += weight * dataset_score_dict[dataset_name] / weights_sum
+#
+#     # Prior Knowledge-based Question-answer
+#     prior_QA_weight = dataset_weight[2]
+#     weights_sum = sum(prior_QA_weight.values())
+#     prior_QA_score = 0
+#     for dataset_name, weight in prior_QA_weight.items():
+#         prior_QA_score += weight * dataset_score_dict[dataset_name] / weights_sum
+#
+#     # Comprehension and Decision-making
+#     com_and_dec_QA_weight = dataset_weight[3]
+#     weights_sum = sum(com_and_dec_QA_weight.values())
+#     com_and_dec_QA_score = 0
+#     for dataset_name, weight in com_and_dec_QA_weight.items():
+#         com_and_dec_QA_score += weight * dataset_score_dict[dataset_name] / weights_sum
+#
+#     dataset_score_dict['Exclusive_understanding'] = exclusive_understanding_score
+#     dataset_score_dict['Prior_Knowledge'] = prior_QA_score
+#     dataset_score_dict['Comprehension_and_Decision-making'] = com_and_dec_QA_score
+#
+#     # final score
+#     final_score = sum([exclusive_understanding_score, prior_QA_score, com_and_dec_QA_score]) / 3
+#     dataset_score_dict['final_score'] = final_score
+#
+#     # print(dataset_score_dict)
+#     # with open(args.score_output_file, 'w', encoding='utf-8') as f:
+#     #   json.dump(dataset_score_dict, f, indent=2)
+#     # print(f'{args.score_output_file} is saved!')
+#     # ========================
+#     data = [
+#
+#         ["Avg. All", "Avg. Video-Exclusive", "Avg. Prior-Knowledge QA", "Avg. Decision-Making",
+#          "ActivityNet", "MSVD", "MSRVTT", "TGIF", "Youcook2", "Ucfcrime",
+#          "MOT", "TVQA", "MV", "NBA", "Driving-exam", "Driving-decision-making", "SQA3D"],
+#
+#         [final_score, exclusive_understanding_score, prior_QA_score, com_and_dec_QA_score,
+#          dataset_score_dict['ActivityNet'],
+#          dataset_score_dict["MSVD"],
+#          dataset_score_dict['MSRVTT'],
+#          dataset_score_dict['TGIF'],
+#          dataset_score_dict['Youcook2'],
+#          dataset_score_dict['Ucfcrime'],
+#          dataset_score_dict['MOT'],
+#          dataset_score_dict['TVQA'],
+#          dataset_score_dict['MV'],
+#          dataset_score_dict['NBA'],
+#          dataset_score_dict['Driving-exam'],
+#          dataset_score_dict['Driving-decision-making'],
+#          dataset_score_dict['SQA3D'],
+#          ],
+#     ]
+#
+#     return data
+#
 import json
 import os
 import glob
             if sub_item['output_chatgpt_choice'] == correct_answer_data[dataset_name][id]['answer']:
                 correct += 1
+        # dataset_scores_dict[dataset_name] = round(correct / total_nums * 100, 2)
+        dataset_scores_dict[dataset_name] = round(correct / total_nums , 4)
     return dataset_scores_dict
     exclusive_understanding_score = 0
     # import ipdb; ipdb.set_trace()
     for dataset_name, weight in exclusive_understanding_weight.items():
+        exclusive_understanding_score += weight * dataset_score_dict[dataset_name] / weights_sum * 100
     # Prior Knowledge-based Question-answer
     prior_QA_weight = dataset_weight[2]
     weights_sum = sum(prior_QA_weight.values())
     prior_QA_score = 0
     for dataset_name, weight in prior_QA_weight.items():
+        prior_QA_score += weight * dataset_score_dict[dataset_name] / weights_sum *100
     # Comprehension and Decision-making
     com_and_dec_QA_weight = dataset_weight[3]
     weights_sum = sum(com_and_dec_QA_weight.values())
     com_and_dec_QA_score = 0
     for dataset_name, weight in com_and_dec_QA_weight.items():
+        com_and_dec_QA_score += weight * dataset_score_dict[dataset_name] / weights_sum *100
     dataset_score_dict['Exclusive_understanding'] = exclusive_understanding_score
     dataset_score_dict['Prior_Knowledge'] = prior_QA_score