Spaces:

OpenHandsCommunity
/

evaluation

Running

App Files Files Community

xingyaoww commited on Nov 5, 2024

Commit

bc761ca

1 Parent(s): bc4c0d0

stop processing history

Browse files

Files changed (1) hide show

utils/swe_bench.py +0 -55

utils/swe_bench.py CHANGED Viewed

@@ -9,49 +9,6 @@ def clean_git_patch(git_patch):
         git_patch = git_patch[git_patch.index('diff'):]
     return git_patch
-def reformat_history(history):
-    new_history = []
-    cur_turn = []
-    for i, (action, observation) in enumerate(history):
-        # Compatibility mode: old format before refractor
-        if 'source' not in action:
-            return history
-        if i == 0:
-            assert action['action'] == 'message'
-            assert action['source'] == 'user'
-            # skip the initial instruction
-            continue
-        if action['source'] == 'agent':
-            # cleanup all previous turns
-            if len(cur_turn) == 1:
-                new_history.append(cur_turn[0])
-            elif len(cur_turn) == 2:
-                # one action from user, one action from agent
-                agent_msg_action, agent_msg_obs = cur_turn[0]
-                assert agent_msg_obs['observation'] == 'null'
-                user_msg_action, user_msg_obs = cur_turn[1]
-                assert user_msg_obs['observation'] == 'null'
-                # re-write user message to be a observation message
-                user_msg_action_as_obs = {
-                    'observation': 'message',
-                    'source': 'user',
-                    'content': user_msg_action['args']['content'],
-                }
-                new_history.append((agent_msg_action, user_msg_action_as_obs))
-            elif len(cur_turn) == 0:
-                pass
-            else:
-                st.write(f'Unsupported #interactions per iteration: {len(cur_turn)}')
-                st.json(cur_turn)
-                raise ValueError(f'Unsupported #interactions per iteration: {len(cur_turn)}')
-            # reset new turn
-            cur_turn = []
-        cur_turn.append((action, observation))
-    return new_history
 def _load_report_legacy(instance_id_to_status, report):
      # instance_id to status
@@ -103,7 +60,6 @@ def load_df_from_selected_filepaths(select_filepaths):
                 # clear out git patch
                 if 'git_patch' in d:
                     d['git_patch'] = clean_git_patch(d['git_patch'])
-                d['history'] = reformat_history(d['history'])
                 if d['instance_id'] in instance_id_to_status:
                     d['fine_grained_report'] = dict(instance_id_to_status[d['instance_id']])
                 data.append(d)
@@ -139,13 +95,6 @@ def agg_stats(df):
             test_result['test_errored'] = bool(entry['report'].get('test_errored', False))
             test_result['patch_applied'] = bool(entry['report'].get('apply_test_patch_success', False))
-        # avg,std obs length
-        obs_lengths = []
-        for _, (_, obs) in enumerate(history):
-            if 'content' in obs:
-                obs_lengths.append(len(obs['content']))
-        obs_lengths = pd.Series(obs_lengths)
         metrics = entry.get('metrics', {})
         cost = metrics.get('accumulated_cost', None)
@@ -154,14 +103,10 @@ def agg_stats(df):
             'instance_id': entry['instance_id'],
             'agent_class': entry['metadata']['agent_class'],
             'model_name': entry['metadata']['llm_config']['model'] if 'llm_config' in entry['metadata'] else entry['metadata']['model_name'],
-            'n_turns': len(history),
             **test_result,
             'agent_stuck_in_loop': agent_stuck_in_loop,
             'contains_error': contains_error,
             'cost': cost,
-            'obs_len_avg': round(obs_lengths.mean(), 0),
-            'obs_len_std': round(obs_lengths.std(), 0),
-            'obs_len_max': round(obs_lengths.max(), 0),
         }
         if 'swe_instance' in entry:
             d.update(

         git_patch = git_patch[git_patch.index('diff'):]
     return git_patch
 def _load_report_legacy(instance_id_to_status, report):
      # instance_id to status
                 # clear out git patch
                 if 'git_patch' in d:
                     d['git_patch'] = clean_git_patch(d['git_patch'])
                 if d['instance_id'] in instance_id_to_status:
                     d['fine_grained_report'] = dict(instance_id_to_status[d['instance_id']])
                 data.append(d)
             test_result['test_errored'] = bool(entry['report'].get('test_errored', False))
             test_result['patch_applied'] = bool(entry['report'].get('apply_test_patch_success', False))
         metrics = entry.get('metrics', {})
         cost = metrics.get('accumulated_cost', None)
             'instance_id': entry['instance_id'],
             'agent_class': entry['metadata']['agent_class'],
             'model_name': entry['metadata']['llm_config']['model'] if 'llm_config' in entry['metadata'] else entry['metadata']['model_name'],
             **test_result,
             'agent_stuck_in_loop': agent_stuck_in_loop,
             'contains_error': contains_error,
             'cost': cost,
         }
         if 'swe_instance' in entry:
             d.update(