File size: 1,780 Bytes
d5beaef
87577d6
 
d5beaef
 
 
 
 
 
 
 
 
 
 
 
 
d0c796a
d5beaef
 
 
 
 
 
 
 
 
87577d6
 
749e9d7
 
 
 
 
 
 
 
d5beaef
 
87577d6
d5e03be
 
459cf0f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import torch
import random
import gradio as gr
from peft import PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

checkpoint_path = "microsoft/Phi-3-mini-4k-instruct"
model_kwargs = dict(
    use_cache=False,
    trust_remote_code=True,
    attn_implementation='eager',  # loading the model with flash-attenstion support
    torch_dtype=torch.bfloat16,
    device_map=None
)
base_model = AutoModelForCausalLM.from_pretrained(checkpoint_path, **model_kwargs)

new_model = "checkpoint_dir/checkpoint-100"  # change to the path where your model is saved

model = PeftModel.from_pretrained(base_model, new_model)
model = model.merge_and_unload()

tokenizer = AutoTokenizer.from_pretrained(checkpoint_path, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

def infer(message, history):
    chat_list = []
    for chat in history:
        chat_user = {"role":"user", "content":chat[0]}
        chat_assistant = {"role":"assistant", "content":chat[1]}
        chat_list.append(chat_user)
        chat_list.append(chat_assistant)
    chat_list.append({"role": "user", "content": message})        
    prompt = pipe.tokenizer.apply_chat_template(chat_list, tokenize=False, add_generation_prompt=True)
    outputs = pipe(prompt, max_new_tokens=256, do_sample=True, num_beams=1, temperature=0.3, top_k=50, top_p=0.95, max_time= 180)
    return outputs[0]['generated_text'][len(prompt):].strip()

gr.ChatInterface(infer, chatbot=gr.Chatbot(height=300),
                 textbox=gr.Textbox(placeholder="How can I help you today", container=False,
                                     scale=7), theme="soft", title="Phi-3 Assistant").launch()