Corey Morris
commited on
Commit
·
83a34f0
1
Parent(s):
513e813
added mostly hardcoded generate url method and test
Browse files- details_data_processor.py +30 -2
- test_details_data_processing.py +9 -1
details_data_processor.py
CHANGED
|
@@ -8,7 +8,7 @@ import requests
|
|
| 8 |
|
| 9 |
class DetailsDataProcessor:
|
| 10 |
# Download
|
| 11 |
-
#url example
|
| 12 |
|
| 13 |
def __init__(self, directory='results', pattern='results*.json'):
|
| 14 |
self.directory = directory
|
|
@@ -18,10 +18,38 @@ class DetailsDataProcessor:
|
|
| 18 |
|
| 19 |
# download a file from a single url and save it to a local directory
|
| 20 |
@staticmethod
|
| 21 |
-
def
|
| 22 |
r = requests.get(url, allow_redirects=True)
|
| 23 |
open(filename, 'wb').write(r.content)
|
| 24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
# @staticmethod
|
| 26 |
# def _find_files(directory, pattern):
|
| 27 |
# for root, dirs, files in os.walk(directory):
|
|
|
|
| 8 |
|
| 9 |
class DetailsDataProcessor:
|
| 10 |
# Download
|
| 11 |
+
#url example https://huggingface.co/datasets/open-llm-leaderboard/details/resolve/main/64bits/LexPodLM-13B/details_harness%7ChendrycksTest-moral_scenarios%7C5_2023-07-25T13%3A41%3A51.227672.json
|
| 12 |
|
| 13 |
def __init__(self, directory='results', pattern='results*.json'):
|
| 14 |
self.directory = directory
|
|
|
|
| 18 |
|
| 19 |
# download a file from a single url and save it to a local directory
|
| 20 |
@staticmethod
|
| 21 |
+
def download_file(url, filename):
|
| 22 |
r = requests.get(url, allow_redirects=True)
|
| 23 |
open(filename, 'wb').write(r.content)
|
| 24 |
|
| 25 |
+
@staticmethod
|
| 26 |
+
def single_file_pipeline(url, filename):
|
| 27 |
+
DetailsDataProcessor.download_file(url, filename)
|
| 28 |
+
# read file
|
| 29 |
+
with open(filename) as f:
|
| 30 |
+
data = json.load(f)
|
| 31 |
+
# convert to dataframe
|
| 32 |
+
df = pd.DataFrame(data)
|
| 33 |
+
return df
|
| 34 |
+
|
| 35 |
+
@staticmethod
|
| 36 |
+
def generate_url(file_path):
|
| 37 |
+
base_url = 'https://huggingface.co/datasets/open-llm-leaderboard/details/resolve/main/'
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
organization = '64bits'
|
| 41 |
+
model = 'LexPodLM-13B'
|
| 42 |
+
filename = '_2023-07-25T13%3A41%3A51.227672.json'
|
| 43 |
+
# extract organization, model, and filename from file_path instead of hardcoding
|
| 44 |
+
# filename = file_path.split('/')[-1]
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
other_chunk = 'details_harness%7ChendrycksTest-moral_scenarios%7C5'
|
| 49 |
+
constructed_url = base_url + organization + '/' + model + '/' + other_chunk + filename
|
| 50 |
+
return constructed_url
|
| 51 |
+
|
| 52 |
+
|
| 53 |
# @staticmethod
|
| 54 |
# def _find_files(directory, pattern):
|
| 55 |
# for root, dirs, files in os.walk(directory):
|
test_details_data_processing.py
CHANGED
|
@@ -16,10 +16,18 @@ class TestDetailsDataProcessor(unittest.TestCase):
|
|
| 16 |
# self.assertIsInstance(data, pd.DataFrame)
|
| 17 |
|
| 18 |
def test_download_file(self):
|
| 19 |
-
DetailsDataProcessor.
|
| 20 |
self.assertTrue(os.path.exists('test.html'))
|
| 21 |
os.remove('test.html')
|
| 22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
if __name__ == '__main__':
|
| 25 |
unittest.main()
|
|
|
|
| 16 |
# self.assertIsInstance(data, pd.DataFrame)
|
| 17 |
|
| 18 |
def test_download_file(self):
|
| 19 |
+
DetailsDataProcessor.download_file('https://www.google.com', 'test.html')
|
| 20 |
self.assertTrue(os.path.exists('test.html'))
|
| 21 |
os.remove('test.html')
|
| 22 |
|
| 23 |
+
def test_generate_url(self):
|
| 24 |
+
results_file_path = "64bits/LexPodLM-13B/results_2023-07-25T13:41:51.227672.json"
|
| 25 |
+
expected_url = 'https://huggingface.co/datasets/open-llm-leaderboard/details/resolve/main/64bits/LexPodLM-13B/details_harness%7ChendrycksTest-moral_scenarios%7C5_2023-07-25T13%3A41%3A51.227672.json'
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
constructed_url = self.processor.generate_url(results_file_path)
|
| 29 |
+
self.assertEqual(expected_url, constructed_url)
|
| 30 |
+
|
| 31 |
|
| 32 |
if __name__ == '__main__':
|
| 33 |
unittest.main()
|