Spaces:

Ruurd
/

lad

Running on Zero

App Files Files

Ruurd commited on Apr 11

Commit

9756472

1 Parent(s): d29da35

Fix red higlighting

Browse files

Files changed (1) hide show

app.py +15 -9

app.py CHANGED Viewed

@@ -166,7 +166,7 @@ def diffusion_chat(question, eot_weight, max_it, sharpness, noise_clipping, use_
         ori_input_tokens, answer_start, threshold=1.0, eot_weight=eot_weight, clustering=clustering
     )
     last_tokens = []
-    just_noised_indices = []
     for i in range(max_it):
         print('Generating output')
@@ -176,24 +176,26 @@ def diffusion_chat(question, eot_weight, max_it, sharpness, noise_clipping, use_
         # --- Decode and highlight changed tokens in GREEN ---
         decoded_ids = current_tokens[answer_start:]
         decoded_tokens = tokenizer.convert_ids_to_tokens(decoded_ids)
-        filtered_tokens = [tok for tok in decoded_tokens if tokenizer.convert_tokens_to_ids(tok) != eot_token_id]
         highlighted = []
-        for i, tok in enumerate(decoded_tokens):
             token_str = tokenizer.convert_tokens_to_string([tok])
-            if filtered_tokens and i < len(filtered_tokens) and tok != filtered_tokens[i]:
                 highlighted.append(f'<span style="color:green">{token_str}</span>')
             else:
                 highlighted.append(token_str)
         yield f"<b>Iteration {i+1}/{max_it} (after generation):</b><br>" + "".join(highlighted).replace('\n', '<br>')
         time.sleep(0.1)
         # --- Apply noising and highlight RED tokens ---
         threshold = get_noising_schedule(i, max_it, sharpness=sharpness)
         if use_confidence_noising:
-            current_tokens = confidence_guided_noising(generated_tokens, answer_start, confidences, threshold, eot_weight, noise_clipping)
-            just_noised_indices = []  # optional: could track confidence-weighted indices too
         else:
             current_tokens, just_noised_indices = noisify_answer(
                 generated_tokens, answer_start, threshold=threshold, eot_weight=eot_weight, clustering=clustering
@@ -201,12 +203,15 @@ def diffusion_chat(question, eot_weight, max_it, sharpness, noise_clipping, use_
         decoded_ids = current_tokens[answer_start:]
         decoded_tokens = tokenizer.convert_ids_to_tokens(decoded_ids)
-        filtered_tokens = [tok for tok in decoded_tokens if tokenizer.convert_tokens_to_ids(tok) != eot_token_id]
         highlighted = []
-        for i, tok in enumerate(filtered_tokens):
             token_str = tokenizer.convert_tokens_to_string([tok])
-            abs_idx = answer_start + i
             if abs_idx in just_noised_indices:
                 highlighted.append(f'<span style="color:red">{token_str}</span>')
             else:
@@ -224,6 +229,7 @@ def diffusion_chat(question, eot_weight, max_it, sharpness, noise_clipping, use_
             break
     final_tokens = tokenizer.convert_ids_to_tokens(current_tokens[answer_start:])
     final_tokens = [tok for tok in final_tokens if tokenizer.convert_tokens_to_ids(tok) != eot_token_id]
     final_output = tokenizer.convert_tokens_to_string(final_tokens)

         ori_input_tokens, answer_start, threshold=1.0, eot_weight=eot_weight, clustering=clustering
     )
     last_tokens = []
+    prev_decoded_tokens = []
     for i in range(max_it):
         print('Generating output')
         # --- Decode and highlight changed tokens in GREEN ---
         decoded_ids = current_tokens[answer_start:]
         decoded_tokens = tokenizer.convert_ids_to_tokens(decoded_ids)
         highlighted = []
+        for j, tok in enumerate(decoded_tokens):
             token_str = tokenizer.convert_tokens_to_string([tok])
+            if prev_decoded_tokens and j < len(prev_decoded_tokens) and tok != prev_decoded_tokens[j]:
                 highlighted.append(f'<span style="color:green">{token_str}</span>')
             else:
                 highlighted.append(token_str)
+        prev_decoded_tokens = decoded_tokens
         yield f"<b>Iteration {i+1}/{max_it} (after generation):</b><br>" + "".join(highlighted).replace('\n', '<br>')
         time.sleep(0.1)
         # --- Apply noising and highlight RED tokens ---
         threshold = get_noising_schedule(i, max_it, sharpness=sharpness)
         if use_confidence_noising:
+            current_tokens = confidence_guided_noising(
+                generated_tokens, answer_start, confidences, threshold, eot_weight, noise_clipping
+            )
+            just_noised_indices = []  # Optional: could extract from confidence scores
         else:
             current_tokens, just_noised_indices = noisify_answer(
                 generated_tokens, answer_start, threshold=threshold, eot_weight=eot_weight, clustering=clustering
         decoded_ids = current_tokens[answer_start:]
         decoded_tokens = tokenizer.convert_ids_to_tokens(decoded_ids)
         highlighted = []
+        for j, tok in enumerate(decoded_tokens):
+            tok_id = tokenizer.convert_tokens_to_ids(tok)
+            if tok_id == eot_token_id:
+                continue  # Skip EOT tokens in display
             token_str = tokenizer.convert_tokens_to_string([tok])
+            abs_idx = answer_start + j
             if abs_idx in just_noised_indices:
                 highlighted.append(f'<span style="color:red">{token_str}</span>')
             else:
             break
     final_tokens = tokenizer.convert_ids_to_tokens(current_tokens[answer_start:])
     final_tokens = [tok for tok in final_tokens if tokenizer.convert_tokens_to_ids(tok) != eot_token_id]
     final_output = tokenizer.convert_tokens_to_string(final_tokens)