Spaces:
Running
on
Zero
Running
on
Zero
Fix MASK token noising
Browse files
app.py
CHANGED
|
@@ -115,7 +115,7 @@ def noisify_answer(input_ids, answer_start, threshold=1.0, eot_weight=1.0, clust
|
|
| 115 |
noise = rng.choice(np.arange(vocab_size), size=len(noised_indices), p=mixed_probs)
|
| 116 |
for idx, val in zip(noised_indices, noise):
|
| 117 |
noised[idx] = val
|
| 118 |
-
noised[idx] = tokenizer.encode('MASK', add_special_tokens = False)
|
| 119 |
|
| 120 |
return noised, noised_indices
|
| 121 |
|
|
|
|
| 115 |
noise = rng.choice(np.arange(vocab_size), size=len(noised_indices), p=mixed_probs)
|
| 116 |
for idx, val in zip(noised_indices, noise):
|
| 117 |
noised[idx] = val
|
| 118 |
+
noised[idx] = tokenizer.encode('MASK', add_special_tokens = False)[0]
|
| 119 |
|
| 120 |
return noised, noised_indices
|
| 121 |
|