shftan commited on
Commit
0f93598
·
1 Parent(s): 64ea65c
Files changed (2) hide show
  1. app.py +16 -18
  2. utils.py +5 -3
app.py CHANGED
@@ -1,25 +1,22 @@
1
  import gradio as gr
2
  import spaces
3
- from huggingface_hub import hf_hub_download
4
  import torch
5
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
6
  import pyvene as pv
7
  from utils import get_tokens, select_concepts, get_concepts_dictionary, get_response, plot_tokens_with_highlights
 
8
 
9
- #zero = torch.Tensor([0]).cuda()
10
- #print(zero.device) # <-- 'cpu'
11
 
12
- #@spaces.GPU
13
- #def greet(n):
14
- # print(zero.device) # <-- 'cuda:0'
15
- # return f"Hello {zero + n} Tensor"
16
 
17
  @spaces.GPU
18
  def launch_app():
19
-
20
  @spaces.GPU
21
- # Function to process user input to the app
22
  def process_user_input(prompt, concept):
 
 
23
  # Check if prompt or concept are empty
24
  if not prompt or not concept:
25
  return f"<h3>Please provide both a prompt and a concept</h3>"
@@ -66,10 +63,10 @@ def launch_app():
66
  # Combine HTMLs
67
  output_html = highlighted_tokens_html + concepts_html + "<p>&nbsp;</p>" + response_html + "<p>&nbsp;</p>" + documentation_html
68
 
69
- return output_html
70
 
71
  # Set model, interpreter, dictionary choices
72
- model_name = "google/gemma-2-2b-it"
73
  interpreter_name = "pyvene/gemma-reft-r1-2b-it-res"
74
  interpreter_path = "l20/weight.pt"
75
  interpreter_component = "model.layers[20].output"
@@ -85,14 +82,15 @@ def launch_app():
85
  return torch.relu(self.proj(base))
86
 
87
  # Load tokenizer and model
88
- tokenizer = AutoTokenizer.from_pretrained(model_name)
89
- model = AutoModelForCausalLM.from_pretrained(model_name, device_map='auto').to('cuda')
90
 
91
  # Load fast model inference pipeline
92
  pipe = pipeline(
93
  task="text-generation",
94
  model=model_name,
95
- use_fast=True
 
96
  )
97
 
98
  path_to_params = hf_hub_download(
@@ -100,12 +98,12 @@ def launch_app():
100
  filename=interpreter_path,
101
  force_download=False,
102
  )
103
- params = torch.load(path_to_params)
104
- encoder = Encoder(embed_dim=params.shape[0], latent_dim=params.shape[1]).cuda()
105
  encoder.proj.weight.data = params.float()
106
  pv_model = pv.IntervenableModel({
107
  "component": interpreter_component,
108
- "intervention": encoder}, model=model).cuda()
109
 
110
  # Load dictionary
111
  all_concepts = get_concepts_dictionary(dictionary_url)
@@ -132,7 +130,7 @@ def launch_app():
132
  outputs=output_html
133
  )
134
 
135
- demo.launch()
136
 
137
  if __name__ == "__main__":
138
  launch_app()
 
1
  import gradio as gr
2
  import spaces
3
+ from huggingface_hub import HfApi, hf_hub_download
4
  import torch
5
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
6
  import pyvene as pv
7
  from utils import get_tokens, select_concepts, get_concepts_dictionary, get_response, plot_tokens_with_highlights
8
+ import os
9
 
10
+ hf_token = os.getenv("HF_TOKEN")
 
11
 
 
 
 
 
12
 
13
  @spaces.GPU
14
  def launch_app():
15
+
16
  @spaces.GPU
 
17
  def process_user_input(prompt, concept):
18
+ yield "Processing..."
19
+
20
  # Check if prompt or concept are empty
21
  if not prompt or not concept:
22
  return f"<h3>Please provide both a prompt and a concept</h3>"
 
63
  # Combine HTMLs
64
  output_html = highlighted_tokens_html + concepts_html + "<p>&nbsp;</p>" + response_html + "<p>&nbsp;</p>" + documentation_html
65
 
66
+ yield output_html
67
 
68
  # Set model, interpreter, dictionary choices
69
+ model_name = "google/gemma-3-270m-it" #"google/gemma-2-2b-it"
70
  interpreter_name = "pyvene/gemma-reft-r1-2b-it-res"
71
  interpreter_path = "l20/weight.pt"
72
  interpreter_component = "model.layers[20].output"
 
82
  return torch.relu(self.proj(base))
83
 
84
  # Load tokenizer and model
85
+ tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_token)
86
+ model = AutoModelForCausalLM.from_pretrained(model_name, device_map='auto', token=hf_token).to("cuda" if torch.cuda.is_available() else "cpu")
87
 
88
  # Load fast model inference pipeline
89
  pipe = pipeline(
90
  task="text-generation",
91
  model=model_name,
92
+ use_fast=True,
93
+ token=hf_token
94
  )
95
 
96
  path_to_params = hf_hub_download(
 
98
  filename=interpreter_path,
99
  force_download=False,
100
  )
101
+ params = torch.load(path_to_params, map_location="cuda" if torch.cuda.is_available() else "cpu")
102
+ encoder = Encoder(embed_dim=params.shape[0], latent_dim=params.shape[1]).to("cuda" if torch.cuda.is_available() else "cpu")
103
  encoder.proj.weight.data = params.float()
104
  pv_model = pv.IntervenableModel({
105
  "component": interpreter_component,
106
+ "intervention": encoder}, model=model).to("cuda" if torch.cuda.is_available() else "cpu")
107
 
108
  # Load dictionary
109
  all_concepts = get_concepts_dictionary(dictionary_url)
 
130
  outputs=output_html
131
  )
132
 
133
+ demo.launch(debug=True)
134
 
135
  if __name__ == "__main__":
136
  launch_app()
utils.py CHANGED
@@ -5,23 +5,25 @@ import requests
5
  import json
6
  import pandas as pd
7
  import torch
8
-
9
 
10
  # Function to get tokens given text
 
11
  def get_tokens(tokenizer, text):
12
- token_ids = tokenizer.encode(text, return_tensors="pt", add_special_tokens=False).to("cuda")
13
  tokens = tokenizer.convert_ids_to_tokens(token_ids[0])
14
 
15
  return tokens, token_ids
16
 
17
  # Function to apply chat template to prompt
 
18
  def decorate_prompt(tokenizer, prompt):
19
  chat = [
20
  {"role": "user", "content": prompt},
21
  {"role": "assistant", "content": ""},
22
  ]
23
  text = tokenizer.apply_chat_template(chat, tokenize=False)
24
- token_ids = tokenizer.encode(text, return_tensors="pt", add_special_tokens=False).to("cuda")
25
 
26
  return token_ids
27
 
 
5
  import json
6
  import pandas as pd
7
  import torch
8
+ import spaces
9
 
10
  # Function to get tokens given text
11
+ @spaces.GPU
12
  def get_tokens(tokenizer, text):
13
+ token_ids = tokenizer.encode(text, return_tensors="pt", add_special_tokens=False).to("cuda" if torch.cuda.is_available() else "cpu")
14
  tokens = tokenizer.convert_ids_to_tokens(token_ids[0])
15
 
16
  return tokens, token_ids
17
 
18
  # Function to apply chat template to prompt
19
+ @spaces.GPU
20
  def decorate_prompt(tokenizer, prompt):
21
  chat = [
22
  {"role": "user", "content": prompt},
23
  {"role": "assistant", "content": ""},
24
  ]
25
  text = tokenizer.apply_chat_template(chat, tokenize=False)
26
+ token_ids = tokenizer.encode(text, return_tensors="pt", add_special_tokens=False).to("cuda" if torch.cuda.is_available() else "cpu")
27
 
28
  return token_ids
29