Spaces:
Paused
Paused
Fix short generations
Browse files
app.py
CHANGED
|
@@ -6,9 +6,16 @@ import gradio as gr
|
|
| 6 |
import numpy as np
|
| 7 |
import spaces
|
| 8 |
from scipy.signal import convolve2d
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
-
|
| 11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
attnlrp.register(model)
|
| 13 |
|
| 14 |
def really_clean_tokens(tokens):
|
|
@@ -54,6 +61,7 @@ def generate_and_visualize(prompt, num_tokens=10):
|
|
| 54 |
return input_tokens, all_relevances, generated_tokens
|
| 55 |
|
| 56 |
def process_relevances(input_tokens, all_relevances, generated_tokens):
|
|
|
|
| 57 |
attention_matrix = np.array([el[:len(all_relevances[0])] for el in all_relevances])
|
| 58 |
|
| 59 |
### FIND ZONES OF INTEREST
|
|
@@ -61,6 +69,9 @@ def process_relevances(input_tokens, all_relevances, generated_tokens):
|
|
| 61 |
kernel_width = 6
|
| 62 |
context_width = 20 # Number of tokens to include as context on each side
|
| 63 |
kernel = np.ones((kernel_width, kernel_width))
|
|
|
|
|
|
|
|
|
|
| 64 |
|
| 65 |
# Compute the rolling sum using 2D convolution
|
| 66 |
rolled_sum = convolve2d(attention_matrix, kernel, mode='valid')
|
|
|
|
| 6 |
import numpy as np
|
| 7 |
import spaces
|
| 8 |
from scipy.signal import convolve2d
|
| 9 |
+
from huggingface_hub import login
|
| 10 |
+
import os
|
| 11 |
+
from dotenv import load_dotenv
|
| 12 |
|
| 13 |
+
load_dotenv()
|
| 14 |
+
|
| 15 |
+
login(os.get("HF_TOKEN"))
|
| 16 |
+
|
| 17 |
+
model = LlamaForCausalLM.from_pretrained("meta-llama/Llama-3.2-3B-Instruct", torch_dtype=torch.bfloat16, device_map="cuda")
|
| 18 |
+
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-3B-Instruct")
|
| 19 |
attnlrp.register(model)
|
| 20 |
|
| 21 |
def really_clean_tokens(tokens):
|
|
|
|
| 61 |
return input_tokens, all_relevances, generated_tokens
|
| 62 |
|
| 63 |
def process_relevances(input_tokens, all_relevances, generated_tokens):
|
| 64 |
+
|
| 65 |
attention_matrix = np.array([el[:len(all_relevances[0])] for el in all_relevances])
|
| 66 |
|
| 67 |
### FIND ZONES OF INTEREST
|
|
|
|
| 69 |
kernel_width = 6
|
| 70 |
context_width = 20 # Number of tokens to include as context on each side
|
| 71 |
kernel = np.ones((kernel_width, kernel_width))
|
| 72 |
+
|
| 73 |
+
if len(generated_tokens) < kernel_width:
|
| 74 |
+
return [(token, None, None) for token in generated_tokens]
|
| 75 |
|
| 76 |
# Compute the rolling sum using 2D convolution
|
| 77 |
rolled_sum = convolve2d(attention_matrix, kernel, mode='valid')
|