Spaces:

help2opensource
/

mental_health_therapy_chatbot

Sleeping

help2opensource

Update space

474fd08 20 days ago

2.1 kB

	import torch
	from transformers import AutoTokenizer, AutoModelForCausalLM
	from peft import PeftModel
	import gradio as gr

	# -------------------------
	# Base + Adapter configuration
	# -------------------------

	base_model_name = "Qwen/Qwen3-4B-Instruct-2507"
	adapter_model_name = "help2opensource/Qwen3-4B-Instruct-2507_mental_health_therapy"

	device = "cuda" if torch.cuda.is_available() else "cpu"

	# -------------------------
	# Load base model and tokenizer
	# -------------------------
	tokenizer = AutoTokenizer.from_pretrained(base_model_name)
	base_model = AutoModelForCausalLM.from_pretrained(
	base_model_name,
	torch_dtype=torch.float16 if device == "cuda" else torch.float32,
	).to(device)

	# -------------------------
	# Load LoRA adapter
	# -------------------------
	model = PeftModel.from_pretrained(base_model, adapter_model_name)

	# Optional: merge LoRA weights for faster inference
	model = model.merge_and_unload()


	def predict(message, history):
	# Ensure history format is consistent
	messages = history + [{"role": "user", "content": message}]

	# Apply chat template correctly
	try:
	input_text = tokenizer.apply_chat_template(
	messages, tokenize=False, add_generation_prompt=True
	)
	except TypeError:
	# For older tokenizers that don't support add_generation_prompt
	input_text = tokenizer.apply_chat_template(messages, tokenize=False)

	inputs = tokenizer(input_text, return_tensors="pt").to(device)

	with torch.no_grad():
	outputs = model.generate(
	**inputs,
	max_new_tokens=1024,
	temperature=0.7,
	top_p=0.9,
	do_sample=True,
	)

	decoded = tokenizer.decode(outputs[0], skip_special_tokens=False)

	# Extract only the assistant’s final response
	if "<\|im_start\|>assistant" in decoded:
	response = (
	decoded.split("<\|im_start\|>assistant")[-1].split("<\|im_end\|>")[0].strip()
	)
	else:
	response = decoded

	return response


	demo = gr.ChatInterface(predict, type="messages")

	demo.launch()