Spaces:
Build error
Build error
| import argparse | |
| import pandas as pd | |
| from datasets import load_dataset | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument('output_filepath', type=str, help='Path to save the output file') | |
| parser.add_argument( | |
| '--dataset_name', | |
| type=str, | |
| help='Name of the dataset to download', | |
| default='princeton-nlp/SWE-bench_Verified', | |
| ) | |
| parser.add_argument('--split', type=str, help='Split to download', default='test') | |
| args = parser.parse_args() | |
| dataset = load_dataset(args.dataset_name, split=args.split) | |
| output_filepath = args.output_filepath | |
| print( | |
| f'Downloading gold patches from {args.dataset_name} (split: {args.split}) to {output_filepath}' | |
| ) | |
| patches = [ | |
| { | |
| 'instance_id': row['instance_id'], | |
| 'model_patch': row['patch'], | |
| 'model_name_or_path': 'gold', | |
| } | |
| for row in dataset | |
| ] | |
| print(f'{len(patches)} gold patches loaded') | |
| pd.DataFrame(patches).to_json(output_filepath, lines=True, orient='records') | |
| print(f'Patches saved to {output_filepath}') | |