Corey Morris
commited on
Commit
·
abac22e
1
Parent(s):
6d41115
truthfulqa data added to dataframe
Browse files- result_data_processor.py +20 -0
- test_data_processing.py +5 -5
result_data_processor.py
CHANGED
|
@@ -34,6 +34,22 @@ class ResultDataProcessor:
|
|
| 34 |
.str.replace('\|5', '', regex=True))
|
| 35 |
return df[[model_name]]
|
| 36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
@staticmethod
|
| 38 |
def _extract_parameters(model_name):
|
| 39 |
"""
|
|
@@ -66,6 +82,10 @@ class ResultDataProcessor:
|
|
| 66 |
raw_data = self._read_and_transform_data(filename)
|
| 67 |
model_name = filename.split('/')[2]
|
| 68 |
cleaned_data = self._cleanup_dataframe(raw_data, model_name)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
dataframes.append(cleaned_data)
|
| 70 |
|
| 71 |
|
|
|
|
| 34 |
.str.replace('\|5', '', regex=True))
|
| 35 |
return df[[model_name]]
|
| 36 |
|
| 37 |
+
def _extract_mc1(self, df, model_name):
|
| 38 |
+
df = df.rename(columns={'mc1': model_name})
|
| 39 |
+
# rename row harness|truthfulqa:mc|0 to truthfulqa:mc1
|
| 40 |
+
df.index = (df.index.str.replace('mc\|0', 'mc1', regex=True))
|
| 41 |
+
# just return the harness|truthfulqa:mc1 row
|
| 42 |
+
df = df.loc[['harness|truthfulqa:mc1']]
|
| 43 |
+
return df[[model_name]]
|
| 44 |
+
|
| 45 |
+
def _extract_mc2(self, df, model_name):
|
| 46 |
+
# rename row harness|truthfulqa:mc|0 to truthfulqa:mc2
|
| 47 |
+
df = df.rename(columns={'mc2': model_name})
|
| 48 |
+
df.index = (df.index.str.replace('mc\|0', 'mc2', regex=True))
|
| 49 |
+
df = df.loc[['harness|truthfulqa:mc2']]
|
| 50 |
+
return df[[model_name]]
|
| 51 |
+
|
| 52 |
+
|
| 53 |
@staticmethod
|
| 54 |
def _extract_parameters(model_name):
|
| 55 |
"""
|
|
|
|
| 82 |
raw_data = self._read_and_transform_data(filename)
|
| 83 |
model_name = filename.split('/')[2]
|
| 84 |
cleaned_data = self._cleanup_dataframe(raw_data, model_name)
|
| 85 |
+
mc1 = self._extract_mc1(raw_data, model_name)
|
| 86 |
+
mc2 = self._extract_mc2(raw_data, model_name)
|
| 87 |
+
cleaned_data = pd.concat([cleaned_data, mc1])
|
| 88 |
+
cleaned_data = pd.concat([cleaned_data, mc2])
|
| 89 |
dataframes.append(cleaned_data)
|
| 90 |
|
| 91 |
|
test_data_processing.py
CHANGED
|
@@ -18,17 +18,17 @@ class TestResultDataProcessor(unittest.TestCase):
|
|
| 18 |
self.assertIn('Parameters', data.columns)
|
| 19 |
self.assertIn('MMLU_average', data.columns)
|
| 20 |
# check number of columns
|
| 21 |
-
self.assertEqual(len(data.columns),
|
| 22 |
|
| 23 |
# check that the number of rows is correct
|
| 24 |
def test_rows(self):
|
| 25 |
data = self.processor.data
|
| 26 |
self.assertEqual(len(data), 992)
|
| 27 |
|
| 28 |
-
#
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
|
| 33 |
# test that a column that contains truthfulqa:mc does not exist
|
| 34 |
def test_truthfulqa_mc(self):
|
|
|
|
| 18 |
self.assertIn('Parameters', data.columns)
|
| 19 |
self.assertIn('MMLU_average', data.columns)
|
| 20 |
# check number of columns
|
| 21 |
+
self.assertEqual(len(data.columns), 63)
|
| 22 |
|
| 23 |
# check that the number of rows is correct
|
| 24 |
def test_rows(self):
|
| 25 |
data = self.processor.data
|
| 26 |
self.assertEqual(len(data), 992)
|
| 27 |
|
| 28 |
+
# check that mc1 column exists
|
| 29 |
+
def test_mc1(self):
|
| 30 |
+
data = self.processor.data
|
| 31 |
+
self.assertIn('harness|truthfulqa:mc1', data.columns)
|
| 32 |
|
| 33 |
# test that a column that contains truthfulqa:mc does not exist
|
| 34 |
def test_truthfulqa_mc(self):
|