Spaces:
Running
on
Zero
Running
on
Zero
Add clustering of noise
Browse files
app.py
CHANGED
|
@@ -56,31 +56,40 @@ def get_noising_schedule(i, max_it, sharpness=5.0):
|
|
| 56 |
x = i / max_it
|
| 57 |
return (np.exp(-sharpness * x) - np.exp(-sharpness)) / (1 - np.exp(-sharpness))
|
| 58 |
|
| 59 |
-
def noisify_answer(input_ids, answer_start, threshold=1.0, eot_weight=1.0,
|
| 60 |
noised = input_ids.copy()
|
| 61 |
answer_len = len(noised) - answer_start
|
| 62 |
num_to_noise = int(threshold * answer_len)
|
| 63 |
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
insert_token = rng.choice(np.arange(vocab_size), p=token_probabilities)
|
| 67 |
-
noised = np.concatenate([noised[:insert_idx], [insert_token], noised[insert_idx:]])
|
| 68 |
-
noised = noised[:len(input_ids)]
|
| 69 |
|
| 70 |
-
|
| 71 |
-
|
|
|
|
| 72 |
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
|
| 81 |
return noised
|
| 82 |
|
| 83 |
|
|
|
|
| 84 |
# Add new noising function
|
| 85 |
def confidence_guided_noising(input_ids, answer_start, confidences, threshold, eot_weight, noise_clipping):
|
| 86 |
noised = input_ids.copy()
|
|
@@ -137,7 +146,7 @@ def generate_diffusion_text(input_ids, answer_start):
|
|
| 137 |
return sampled, conf
|
| 138 |
|
| 139 |
# --- Inference Wrapper ---
|
| 140 |
-
def diffusion_chat(question, eot_weight, max_it, sharpness, noise_clipping, use_confidence_noising,
|
| 141 |
placeholder = "What do you know about the city of New York?"
|
| 142 |
if question.strip() == "":
|
| 143 |
question = placeholder
|
|
@@ -194,7 +203,7 @@ def diffusion_chat(question, eot_weight, max_it, sharpness, noise_clipping, use_
|
|
| 194 |
if use_confidence_noising:
|
| 195 |
current_tokens = confidence_guided_noising(generated_tokens, answer_start, confidences, threshold, eot_weight, noise_clipping)
|
| 196 |
else:
|
| 197 |
-
current_tokens = noisify_answer(generated_tokens, answer_start, threshold=threshold, eot_weight=eot_weight,
|
| 198 |
|
| 199 |
time.sleep(0.01)
|
| 200 |
|
|
@@ -218,7 +227,7 @@ demo = gr.Interface(
|
|
| 218 |
gr.Slider(1.0, 20.0, value=5.0, step=0.5, label="↓ = more noising (sharpness)"),
|
| 219 |
gr.Slider(0.01, 1.0, value=0.05, step=0.01, label="↓ = more confidence guidance (noise clipping)"),
|
| 220 |
gr.Checkbox(value=False, label="Use confidence-guided noising"),
|
| 221 |
-
gr.Slider(0,
|
| 222 |
],
|
| 223 |
outputs=[gr.HTML(label="Diffusion Output")],
|
| 224 |
title="Diffusion Language Model Chat",
|
|
|
|
| 56 |
x = i / max_it
|
| 57 |
return (np.exp(-sharpness * x) - np.exp(-sharpness)) / (1 - np.exp(-sharpness))
|
| 58 |
|
| 59 |
+
def noisify_answer(input_ids, answer_start, threshold=1.0, eot_weight=1.0, cluster=0.5):
|
| 60 |
noised = input_ids.copy()
|
| 61 |
answer_len = len(noised) - answer_start
|
| 62 |
num_to_noise = int(threshold * answer_len)
|
| 63 |
|
| 64 |
+
if num_to_noise == 0:
|
| 65 |
+
return noised
|
|
|
|
|
|
|
|
|
|
| 66 |
|
| 67 |
+
mixed_probs = token_probabilities.copy()
|
| 68 |
+
mixed_probs[eot_token_id] *= eot_weight
|
| 69 |
+
mixed_probs /= mixed_probs.sum()
|
| 70 |
|
| 71 |
+
# Determine number of clusters and average cluster size
|
| 72 |
+
num_clusters = max(1, int((1 - cluster) * num_to_noise)) # fewer clusters if more intensity
|
| 73 |
+
cluster_size = max(1, int(num_to_noise / num_clusters))
|
| 74 |
|
| 75 |
+
noised_indices = set()
|
| 76 |
+
for _ in range(num_clusters):
|
| 77 |
+
center = rng.integers(answer_start, len(noised))
|
| 78 |
+
span_start = max(answer_start, center - cluster_size // 2)
|
| 79 |
+
span_end = min(len(noised), span_start + cluster_size)
|
| 80 |
+
noised_indices.update(range(span_start, span_end))
|
| 81 |
+
|
| 82 |
+
# Trim in case we overshot due to overlapping clusters
|
| 83 |
+
noised_indices = sorted(list(noised_indices))[:num_to_noise]
|
| 84 |
+
|
| 85 |
+
noise = rng.choice(np.arange(vocab_size), size=len(noised_indices), p=mixed_probs)
|
| 86 |
+
for idx, val in zip(noised_indices, noise):
|
| 87 |
+
noised[idx] = val
|
| 88 |
|
| 89 |
return noised
|
| 90 |
|
| 91 |
|
| 92 |
+
|
| 93 |
# Add new noising function
|
| 94 |
def confidence_guided_noising(input_ids, answer_start, confidences, threshold, eot_weight, noise_clipping):
|
| 95 |
noised = input_ids.copy()
|
|
|
|
| 146 |
return sampled, conf
|
| 147 |
|
| 148 |
# --- Inference Wrapper ---
|
| 149 |
+
def diffusion_chat(question, eot_weight, max_it, sharpness, noise_clipping, use_confidence_noising, clustering):
|
| 150 |
placeholder = "What do you know about the city of New York?"
|
| 151 |
if question.strip() == "":
|
| 152 |
question = placeholder
|
|
|
|
| 203 |
if use_confidence_noising:
|
| 204 |
current_tokens = confidence_guided_noising(generated_tokens, answer_start, confidences, threshold, eot_weight, noise_clipping)
|
| 205 |
else:
|
| 206 |
+
current_tokens = noisify_answer(generated_tokens, answer_start, threshold=threshold, eot_weight=eot_weight, clustering=clustering)
|
| 207 |
|
| 208 |
time.sleep(0.01)
|
| 209 |
|
|
|
|
| 227 |
gr.Slider(1.0, 20.0, value=5.0, step=0.5, label="↓ = more noising (sharpness)"),
|
| 228 |
gr.Slider(0.01, 1.0, value=0.05, step=0.01, label="↓ = more confidence guidance (noise clipping)"),
|
| 229 |
gr.Checkbox(value=False, label="Use confidence-guided noising"),
|
| 230 |
+
gr.Slider(0.0, 1.0, value=0.5, step=0.05, label="↑ = more clustered noising (fewer, larger edits)")
|
| 231 |
],
|
| 232 |
outputs=[gr.HTML(label="Diffusion Output")],
|
| 233 |
title="Diffusion Language Model Chat",
|