Update all_datasets.py
Browse files- all_datasets.py +19 -18
all_datasets.py
CHANGED
|
@@ -1,18 +1,19 @@
|
|
| 1 |
-
#%%
|
| 2 |
-
from datasets import load_dataset
|
| 3 |
-
import pandas as pd
|
| 4 |
-
import
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
|
|
|
|
|
| 1 |
+
#%%
|
| 2 |
+
from datasets import load_dataset
|
| 3 |
+
import pandas as pd
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
BASE_DIR = Path(__file__).resolve().parent
|
| 6 |
+
JSONL_PATH = BASE_DIR / "deepmind_math.jsonl"
|
| 7 |
+
clean_math = pd.read_json(
|
| 8 |
+
JSONL_PATH,
|
| 9 |
+
lines=True,
|
| 10 |
+
orient="records"
|
| 11 |
+
)
|
| 12 |
+
GSM8k = load_dataset('openai/gsm8k','main', split= 'train')
|
| 13 |
+
MMMLU = load_dataset('cais/mmlu', 'college_mathematics', split='test+validation')
|
| 14 |
+
MMMU = load_dataset('MMMU/MMMU', 'Math', split='test+validation')
|
| 15 |
+
Olympiad_math = load_dataset('Hothan/OlympiadBench', 'TP_TO_maths_en_COMP', split='train')
|
| 16 |
+
Olympiad_math2 = load_dataset('Hothan/OlympiadBench', 'OE_TO_maths_en_COMP', split='train')
|
| 17 |
+
ScienceQA = load_dataset("derek-thomas/ScienceQA", split="train")
|
| 18 |
+
PubmedQA = load_dataset('qiaojin/PubMedQA','pqa_unlabeled', split='train')
|
| 19 |
+
# %%
|