Spaces:
Running
Running
Count lines instead of using dataloader.dataset
Browse files- translate.py +3 -4
translate.py
CHANGED
|
@@ -116,7 +116,7 @@ def main(
|
|
| 116 |
"top_p": top_p,
|
| 117 |
}
|
| 118 |
|
| 119 |
-
|
| 120 |
|
| 121 |
if accelerator.is_main_process:
|
| 122 |
print(
|
|
@@ -156,7 +156,7 @@ def main(
|
|
| 156 |
samples_seen: int = 0
|
| 157 |
|
| 158 |
with tqdm(
|
| 159 |
-
total=
|
| 160 |
desc="Dataset translation",
|
| 161 |
leave=True,
|
| 162 |
ascii=True,
|
|
@@ -185,8 +185,7 @@ def main(
|
|
| 185 |
if accelerator.is_main_process:
|
| 186 |
if step == len(data_loader) - 1:
|
| 187 |
tgt_text = tgt_text[
|
| 188 |
-
: (
|
| 189 |
-
- samples_seen
|
| 190 |
]
|
| 191 |
else:
|
| 192 |
samples_seen += len(tgt_text)
|
|
|
|
| 116 |
"top_p": top_p,
|
| 117 |
}
|
| 118 |
|
| 119 |
+
total_lines: int = count_lines(sentences_path)
|
| 120 |
|
| 121 |
if accelerator.is_main_process:
|
| 122 |
print(
|
|
|
|
| 156 |
samples_seen: int = 0
|
| 157 |
|
| 158 |
with tqdm(
|
| 159 |
+
total=total_lines,
|
| 160 |
desc="Dataset translation",
|
| 161 |
leave=True,
|
| 162 |
ascii=True,
|
|
|
|
| 185 |
if accelerator.is_main_process:
|
| 186 |
if step == len(data_loader) - 1:
|
| 187 |
tgt_text = tgt_text[
|
| 188 |
+
: (total_lines * num_return_sequences) - samples_seen
|
|
|
|
| 189 |
]
|
| 190 |
else:
|
| 191 |
samples_seen += len(tgt_text)
|