Spaces:

sakaltcommunity
/

Traningafri

Sleeping

Sakalti commited on Dec 21, 2024

Commit

1567b41

verified ·

1 Parent(s): e3d279c

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -33,11 +33,11 @@ if st.button("トレーニング開始"):
         # データセットの準備（スワヒリ語）
         st.write("データセットのロード中...")
         from datasets import load_dataset
-        dataset = load_dataset(dataset_name, 'swahili', split="train")  # 言語を指定
         # トレーニング用のデータセットの準備
         def tokenize_function(examples):
-            return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=128)
         tokenized_dataset = dataset.map(tokenize_function, batched=True)

         # データセットの準備（スワヒリ語）
         st.write("データセットのロード中...")
         from datasets import load_dataset
+        dataset = load_dataset(dataset_name, split="train")  # 言語を指定
         # トレーニング用のデータセットの準備
         def tokenize_function(examples):
+            return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=168)
         tokenized_dataset = dataset.map(tokenize_function, batched=True)