Spaces:

Ruurd
/

lad

Running on Zero

App Files Files

Ruurd commited on Apr 11

Commit

a494446

1 Parent(s): 9756472

Fix generation

Browse files

Files changed (1) hide show

app.py +25 -16

app.py CHANGED Viewed

@@ -130,7 +130,7 @@ def confidence_guided_noising(input_ids, answer_start, confidences, threshold, e
 @spaces.GPU
-def generate_diffusion_text(input_ids, answer_start):
     with torch.no_grad():
         input_tensor = torch.tensor([input_ids], dtype=torch.long).to(model.device)
         logits = model(input_ids=input_tensor)["logits"]
@@ -170,15 +170,24 @@ def diffusion_chat(question, eot_weight, max_it, sharpness, noise_clipping, use_
     for i in range(max_it):
         print('Generating output')
-        generated_tokens, confidences = generate_diffusion_text(current_tokens, answer_start)
-        current_tokens = generated_tokens
-        # --- Decode and highlight changed tokens in GREEN ---
-        decoded_ids = current_tokens[answer_start:]
-        decoded_tokens = tokenizer.convert_ids_to_tokens(decoded_ids)
         highlighted = []
         for j, tok in enumerate(decoded_tokens):
             token_str = tokenizer.convert_tokens_to_string([tok])
             if prev_decoded_tokens and j < len(prev_decoded_tokens) and tok != prev_decoded_tokens[j]:
                 highlighted.append(f'<span style="color:green">{token_str}</span>')
@@ -189,27 +198,29 @@ def diffusion_chat(question, eot_weight, max_it, sharpness, noise_clipping, use_
         yield f"<b>Iteration {i+1}/{max_it} (after generation):</b><br>" + "".join(highlighted).replace('\n', '<br>')
         time.sleep(0.1)
-        # --- Apply noising and highlight RED tokens ---
         threshold = get_noising_schedule(i, max_it, sharpness=sharpness)
         if use_confidence_noising:
-            current_tokens = confidence_guided_noising(
                 generated_tokens, answer_start, confidences, threshold, eot_weight, noise_clipping
             )
-            just_noised_indices = []  # Optional: could extract from confidence scores
         else:
-            current_tokens, just_noised_indices = noisify_answer(
                 generated_tokens, answer_start, threshold=threshold, eot_weight=eot_weight, clustering=clustering
             )
-        decoded_ids = current_tokens[answer_start:]
-        decoded_tokens = tokenizer.convert_ids_to_tokens(decoded_ids)
         highlighted = []
         for j, tok in enumerate(decoded_tokens):
             tok_id = tokenizer.convert_tokens_to_ids(tok)
             if tok_id == eot_token_id:
-                continue  # Skip EOT tokens in display
             token_str = tokenizer.convert_tokens_to_string([tok])
             abs_idx = answer_start + j
             if abs_idx in just_noised_indices:
@@ -228,8 +239,6 @@ def diffusion_chat(question, eot_weight, max_it, sharpness, noise_clipping, use_
             yield f"<b>Stopped early after {i+1} iterations.</b>"
             break
     final_tokens = tokenizer.convert_ids_to_tokens(current_tokens[answer_start:])
     final_tokens = [tok for tok in final_tokens if tokenizer.convert_tokens_to_ids(tok) != eot_token_id]
     final_output = tokenizer.convert_tokens_to_string(final_tokens)

 @spaces.GPU
+def generate_diffusion_text(input_ids):
     with torch.no_grad():
         input_tensor = torch.tensor([input_ids], dtype=torch.long).to(model.device)
         logits = model(input_ids=input_tensor)["logits"]
     for i in range(max_it):
         print('Generating output')
+        # Compose full input: original prompt + current answer
+        full_input_tokens = ori_input_tokens[:answer_start] + current_tokens[answer_start:]
+        full_input_tokens = full_input_tokens[:256] + [pad_token] * max(0, 256 - len(full_input_tokens))
+        # Model step
+        generated_tokens, confidences = generate_diffusion_text(full_input_tokens)
+        # Save full output for noising step
+        current_tokens = generated_tokens
+        # --- GREEN HIGHLIGHT ---
+        decoded_tokens = tokenizer.convert_ids_to_tokens(current_tokens[answer_start:])
         highlighted = []
         for j, tok in enumerate(decoded_tokens):
+            tok_id = tokenizer.convert_tokens_to_ids(tok)
+            if tok_id == eot_token_id:
+                continue
             token_str = tokenizer.convert_tokens_to_string([tok])
             if prev_decoded_tokens and j < len(prev_decoded_tokens) and tok != prev_decoded_tokens[j]:
                 highlighted.append(f'<span style="color:green">{token_str}</span>')
         yield f"<b>Iteration {i+1}/{max_it} (after generation):</b><br>" + "".join(highlighted).replace('\n', '<br>')
         time.sleep(0.1)
+        # --- NOISING STEP ---
         threshold = get_noising_schedule(i, max_it, sharpness=sharpness)
         if use_confidence_noising:
+            noised_answer = confidence_guided_noising(
                 generated_tokens, answer_start, confidences, threshold, eot_weight, noise_clipping
             )
+            just_noised_indices = []
         else:
+            noised_answer, just_noised_indices = noisify_answer(
                 generated_tokens, answer_start, threshold=threshold, eot_weight=eot_weight, clustering=clustering
             )
+        # Compose full input again: prompt + noised answer
+        current_tokens = ori_input_tokens[:answer_start] + noised_answer[answer_start:]
+        current_tokens = current_tokens[:256] + [pad_token] * max(0, 256 - len(current_tokens))
+        # --- RED HIGHLIGHT ---
+        decoded_tokens = tokenizer.convert_ids_to_tokens(current_tokens[answer_start:])
         highlighted = []
         for j, tok in enumerate(decoded_tokens):
             tok_id = tokenizer.convert_tokens_to_ids(tok)
             if tok_id == eot_token_id:
+                continue
             token_str = tokenizer.convert_tokens_to_string([tok])
             abs_idx = answer_start + j
             if abs_idx in just_noised_indices:
             yield f"<b>Stopped early after {i+1} iterations.</b>"
             break
     final_tokens = tokenizer.convert_ids_to_tokens(current_tokens[answer_start:])
     final_tokens = [tok for tok in final_tokens if tokenizer.convert_tokens_to_ids(tok) != eot_token_id]
     final_output = tokenizer.convert_tokens_to_string(final_tokens)