Spaces:
Running
Running
adding application
Browse files
app.py
CHANGED
|
@@ -242,14 +242,14 @@ def find_last_token_index(full_ids, word_ids):
|
|
| 242 |
@GPU # this block runs on a job GPU
|
| 243 |
def analyse_word(model_name: str, word: str, patchscopes_template: str, context:str = ""):
|
| 244 |
try:
|
| 245 |
-
text = context+ " " + word
|
| 246 |
model, tokenizer = get_model_and_tokenizer(model_name)
|
| 247 |
|
| 248 |
# Build extraction prompt (where hidden states will be collected)
|
| 249 |
extraction_prompt ="X"
|
| 250 |
|
| 251 |
# Identify last token position of the *word* inside the prompt IDs
|
| 252 |
-
word_token_ids = tokenizer.encode(
|
| 253 |
|
| 254 |
# Instantiate Patchscopes retriever
|
| 255 |
patch_retriever = PatchscopesRetriever(
|
|
@@ -262,7 +262,7 @@ def analyse_word(model_name: str, word: str, patchscopes_template: str, context:
|
|
| 262 |
|
| 263 |
# Run retrieval for the word across all layers (one pass)
|
| 264 |
retrieved_words = patch_retriever.get_hidden_states_and_retrieve_word(
|
| 265 |
-
|
| 266 |
num_tokens_to_generate=len(tokenizer.tokenize(word)),
|
| 267 |
)[0]
|
| 268 |
|
|
@@ -309,14 +309,14 @@ with gr.Blocks(theme="soft") as demo:
|
|
| 309 |
label="Patchscopes prompt (use X as placeholder)",
|
| 310 |
value="repeat the following word X twice: 1)X 2)",
|
| 311 |
)
|
| 312 |
-
context_box = gr.Textbox(label="context", value="")
|
| 313 |
word_box = gr.Textbox(label="Word to test", value="interpretable")
|
| 314 |
run_btn = gr.Button("Analyse")
|
| 315 |
out_html = gr.HTML()
|
| 316 |
|
| 317 |
run_btn.click(
|
| 318 |
analyse_word,
|
| 319 |
-
inputs=[model_name, word_box, patchscopes_template, context_box],
|
| 320 |
outputs=out_html,
|
| 321 |
)
|
| 322 |
|
|
|
|
| 242 |
@GPU # this block runs on a job GPU
|
| 243 |
def analyse_word(model_name: str, word: str, patchscopes_template: str, context:str = ""):
|
| 244 |
try:
|
| 245 |
+
# text = context+ " " + word
|
| 246 |
model, tokenizer = get_model_and_tokenizer(model_name)
|
| 247 |
|
| 248 |
# Build extraction prompt (where hidden states will be collected)
|
| 249 |
extraction_prompt ="X"
|
| 250 |
|
| 251 |
# Identify last token position of the *word* inside the prompt IDs
|
| 252 |
+
word_token_ids = tokenizer.encode(word, add_special_tokens=False)
|
| 253 |
|
| 254 |
# Instantiate Patchscopes retriever
|
| 255 |
patch_retriever = PatchscopesRetriever(
|
|
|
|
| 262 |
|
| 263 |
# Run retrieval for the word across all layers (one pass)
|
| 264 |
retrieved_words = patch_retriever.get_hidden_states_and_retrieve_word(
|
| 265 |
+
word,
|
| 266 |
num_tokens_to_generate=len(tokenizer.tokenize(word)),
|
| 267 |
)[0]
|
| 268 |
|
|
|
|
| 309 |
label="Patchscopes prompt (use X as placeholder)",
|
| 310 |
value="repeat the following word X twice: 1)X 2)",
|
| 311 |
)
|
| 312 |
+
# context_box = gr.Textbox(label="context", value="")
|
| 313 |
word_box = gr.Textbox(label="Word to test", value="interpretable")
|
| 314 |
run_btn = gr.Button("Analyse")
|
| 315 |
out_html = gr.HTML()
|
| 316 |
|
| 317 |
run_btn.click(
|
| 318 |
analyse_word,
|
| 319 |
+
inputs=[model_name, word_box, patchscopes_template], #, context_box],
|
| 320 |
outputs=out_html,
|
| 321 |
)
|
| 322 |
|