Spaces:

Ruurd
/

lad

Running on Zero

Ruurd commited on May 19

Commit

b1cf46e

verified ·

1 Parent(s): 800af7e

Update model to Llama 3.2 3B-Instruct based, changed prompt format

Files changed (1) hide show

app.py CHANGED Viewed

@@ -18,7 +18,7 @@ tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-3B", use_fast=Tr
 vocab_size = len(tokenizer)
 eos_token_id = tokenizer.eos_token_id
 mask_token_id = tokenizer.encode('MASK', add_special_tokens=False)[0]
-assistant_marker_ids = tokenizer.encode("Assistant:", add_special_tokens=False)
 # def load_model():
 #     ckpt_path = hf_hub_download(
@@ -195,6 +195,17 @@ def generate_diffusion_text(input_ids, top_p, top_k):
         conf = probs[range(len(sampled)), sampled].cpu().numpy()
     return sampled, conf
 # --- Inference Wrapper ---
 def diffusion_chat(question, max_it, pause_length, sharpness,
                    clustering, noise_start, use_confidence_noising,
@@ -204,7 +215,7 @@ def diffusion_chat(question, max_it, pause_length, sharpness,
         question = placeholder
     print('started generation')
-    prompt = f"User: {question}\nAssistant:"
     input_ids = tokenizer.encode(prompt, add_special_tokens=False)
     answer_start = find_answer_start(input_ids, assistant_marker_ids)
     if answer_start is None:

 vocab_size = len(tokenizer)
 eos_token_id = tokenizer.eos_token_id
 mask_token_id = tokenizer.encode('MASK', add_special_tokens=False)[0]
+assistant_marker_ids = tokenizer.encode("<|start_header_id|>assistant<|end_header_id|>", add_special_tokens=False)
 # def load_model():
 #     ckpt_path = hf_hub_download(
         conf = probs[range(len(sampled)), sampled].cpu().numpy()
     return sampled, conf
+def format_chat_prompt(question):
+    return (
+        "<|begin_of_text|>\n"
+        "<|start_header_id|>system<|end_header_id|>\n"
+        "You are a helpful assistant.\n"
+        "<|start_header_id|>user<|end_header_id|>\n"
+        f"{question}\n"
+        "<|start_header_id|>assistant<|end_header_id|>\n"
+    )
 # --- Inference Wrapper ---
 def diffusion_chat(question, max_it, pause_length, sharpness,
                    clustering, noise_start, use_confidence_noising,
         question = placeholder
     print('started generation')
+    prompt = format_chat_prompt(question)
     input_ids = tokenizer.encode(prompt, add_special_tokens=False)
     answer_start = find_answer_start(input_ids, assistant_marker_ids)
     if answer_start is None: