Spaces:

AkashDataScience
/

Phi-3_assistant

Sleeping

Phi-3_assistant / app.py

Updated file path

d0c796a about 1 year ago

1.29 kB

	import torch
	import random
	import gradio as gr
	from peft import PeftModel
	from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

	checkpoint_path = "microsoft/Phi-3-mini-4k-instruct"
	model_kwargs = dict(
	use_cache=False,
	trust_remote_code=True,
	attn_implementation='eager', # loading the model with flash-attenstion support
	torch_dtype=torch.bfloat16,
	device_map=None
	)
	base_model = AutoModelForCausalLM.from_pretrained(checkpoint_path, **model_kwargs)

	new_model = "checkpoint_dir/checkpoint-100" # change to the path where your model is saved

	model = PeftModel.from_pretrained(base_model, new_model)
	model = model.merge_and_unload()

	tokenizer = AutoTokenizer.from_pretrained(checkpoint_path, trust_remote_code=True)
	tokenizer.pad_token = tokenizer.eos_token
	tokenizer.padding_side = "right"

	pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

	def infer(message, history):
	prompt = pipe.tokenizer.apply_chat_template([{"role": "user", "content": prompt}], tokenize=False, add_generation_prompt=True)
	outputs = pipe(prompt, max_new_tokens=256, do_sample=True, num_beams=1, temperature=0.3, top_k=50, top_p=0.95, max_time= 180)
	return outputs[0]['generated_text'][len(prompt):].strip()

	gr.ChatInterface(infer).launch()