Spaces:
Running
on
Zero
Running
on
Zero
Fix cluster to clustering
Browse files
app.py
CHANGED
|
@@ -56,7 +56,7 @@ def get_noising_schedule(i, max_it, sharpness=5.0):
|
|
| 56 |
x = i / max_it
|
| 57 |
return (np.exp(-sharpness * x) - np.exp(-sharpness)) / (1 - np.exp(-sharpness))
|
| 58 |
|
| 59 |
-
def noisify_answer(input_ids, answer_start, threshold=1.0, eot_weight=1.0,
|
| 60 |
noised = input_ids.copy()
|
| 61 |
answer_len = len(noised) - answer_start
|
| 62 |
num_to_noise = int(threshold * answer_len)
|
|
@@ -69,7 +69,7 @@ def noisify_answer(input_ids, answer_start, threshold=1.0, eot_weight=1.0, clust
|
|
| 69 |
mixed_probs /= mixed_probs.sum()
|
| 70 |
|
| 71 |
# Determine number of clusters and average cluster size
|
| 72 |
-
num_clusters = max(1, int((1 -
|
| 73 |
cluster_size = max(1, int(num_to_noise / num_clusters))
|
| 74 |
|
| 75 |
noised_indices = set()
|
|
|
|
| 56 |
x = i / max_it
|
| 57 |
return (np.exp(-sharpness * x) - np.exp(-sharpness)) / (1 - np.exp(-sharpness))
|
| 58 |
|
| 59 |
+
def noisify_answer(input_ids, answer_start, threshold=1.0, eot_weight=1.0, clustering=0.5):
|
| 60 |
noised = input_ids.copy()
|
| 61 |
answer_len = len(noised) - answer_start
|
| 62 |
num_to_noise = int(threshold * answer_len)
|
|
|
|
| 69 |
mixed_probs /= mixed_probs.sum()
|
| 70 |
|
| 71 |
# Determine number of clusters and average cluster size
|
| 72 |
+
num_clusters = max(1, int((1 - clustering) * num_to_noise)) # fewer clusters if more intensity
|
| 73 |
cluster_size = max(1, int(num_to_noise / num_clusters))
|
| 74 |
|
| 75 |
noised_indices = set()
|