NewJapaneseLLM

Running on Zero

vilarin commited on May 23, 2024

Commit

639e063

verified ·

1 Parent(s): 997f90e

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -64,7 +64,8 @@ def stream_chat(message: str, history: list, temperature: float, max_new_tokens:
     conversation.append({"role": "user", "content": message})
     input_ids = tokenizer.apply_chat_template(conversation, tokenize=True, add_generation_prompt=True, return_tensors="pt").to(model.device)
     gen_tokens= model.generate(
         input_ids,
         max_new_tokens=max_new_tokens,

     conversation.append({"role": "user", "content": message})
     input_ids = tokenizer.apply_chat_template(conversation, tokenize=True, add_generation_prompt=True, return_tensors="pt").to(model.device)
+    prompt_padded_len = len(input_ids[0])
     gen_tokens= model.generate(
         input_ids,
         max_new_tokens=max_new_tokens,