Spaces:
Runtime error
Runtime error
| from datasets import load_dataset | |
| def load_data_for_training( | |
| tokenizer, | |
| loader_path, | |
| dataset_dir, | |
| max_input_length=256, | |
| ): | |
| def preprocess_function(examples): | |
| inputs = [doc for doc in examples["document"]] | |
| model_inputs = tokenizer( | |
| inputs, max_length=max_input_length, truncation=True | |
| ) | |
| return model_inputs | |
| # preprocess dataset | |
| datasets = load_dataset( | |
| path=loader_path, | |
| data_dir=dataset_dir, | |
| ) | |
| tokenized_datasets = datasets.map(preprocess_function, batched=True) | |
| return tokenized_datasets | |