Spaces:

DarwinAnim8or
/

TinyRP-Demo

Sleeping

App Files Files Community

TinyRP-Demo / app.py

DarwinAnim8or

Update app.py

3453e14 verified 5 months ago

raw

history blame contribute delete

4.87 kB

	import gradio as gr
	from transformers import AutoTokenizer, AutoModelForCausalLM
	import torch

	# Model configuration
	MODEL_NAME = "DarwinAnim8or/TinyRP"

	# Load model
	print("Loading model...")
	try:
	tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
	model = AutoModelForCausalLM.from_pretrained(
	MODEL_NAME,
	torch_dtype=torch.float32,
	device_map="cpu",
	trust_remote_code=True
	)
	print("✅ Model loaded successfully")
	except Exception as e:
	print(f"❌ Model loading failed: {e}")
	tokenizer = None
	model = None

	# Character presets
	CHARACTERS = {
	"Knight": "You are Sir Gareth, a brave knight on a quest to save the kingdom. You speak with honor and courage.",
	"Wizard": "You are Eldara, an ancient wizard who speaks in riddles and knows mystical secrets.",
	"Tavern Keeper": "You are Bram, a cheerful tavern keeper who loves stories and meeting travelers.",
	"Scientist": "You are Dr. Maya Chen, a brilliant scientist who loves discovery and explaining concepts simply.",
	"Space Explorer": "You are Captain Nova, a fearless space explorer who has traveled distant galaxies."
	}

	def respond(message, history, character, max_tokens, temperature, top_p, repetition_penalty):
	"""Generate response using ChatML format"""

	if not message.strip():
	yield "Please enter a message."
	return

	if model is None:
	yield "❌ Model not loaded properly."
	return

	try:
	# Build ChatML conversation
	conversation = ""

	# Add character as system message
	if character != "None" and character in CHARACTERS:
	conversation += f"<\|im_start\|>system\n{CHARACTERS[character]}<\|im_end\|>\n"

	# Add conversation history
	for user_msg, assistant_msg in history:
	conversation += f"<\|im_start\|>user\n{user_msg}<\|im_end\|>\n"
	conversation += f"<\|im_start\|>assistant\n{assistant_msg}<\|im_end\|>\n"

	# Add current message
	conversation += f"<\|im_start\|>user\n{message}<\|im_end\|>\n<\|im_start\|>assistant\n"

	# Tokenize
	inputs = tokenizer.encode(
	conversation,
	return_tensors="pt",
	max_length=900,
	truncation=True
	)

	# Generate
	response = ""
	with torch.no_grad():
	outputs = model.generate(
	inputs,
	max_new_tokens=int(max_tokens),
	temperature=float(temperature),
	top_p=float(top_p),
	repetition_penalty=float(repetition_penalty),
	do_sample=True,
	pad_token_id=tokenizer.eos_token_id,
	eos_token_id=tokenizer.eos_token_id
	)

	# Decode
	full_text = tokenizer.decode(outputs[0], skip_special_tokens=False)

	# Extract assistant response
	if "<\|im_start\|>assistant\n" in full_text:
	response = full_text.split("<\|im_start\|>assistant\n")[-1]
	response = response.replace("<\|im_end\|>", "").strip()
	else:
	response = "Could not generate response."

	# Clean response
	response = response.replace("<\|im_start\|>", "").replace("<\|im_end\|>", "")
	response = response.strip()

	if not response:
	response = "No response generated."

	yield response

	except Exception as e:
	yield f"Generation error: {str(e)}"

	# Create simple ChatInterface
	demo = gr.ChatInterface(
	fn=respond,
	title="🎭 TinyRP Character Chat",
	description="Chat with AI characters using local CPU inference! Select a character and start chatting.",
	additional_inputs=[
	gr.Dropdown(
	choices=["None"] + list(CHARACTERS.keys()),
	value="Knight",
	label="Character"
	),
	gr.Slider(
	minimum=16,
	maximum=256,
	value=48,
	step=16,
	label="Max tokens"
	),
	gr.Slider(
	minimum=0.1,
	maximum=2.0,
	value=0.9,
	step=0.1,
	label="Temperature"
	),
	gr.Slider(
	minimum=0.1,
	maximum=1.0,
	value=0.85,
	step=0.05,
	label="Top-p"
	),
	gr.Slider(
	minimum=1.0,
	maximum=1.5,
	value=1.1,
	step=0.05,
	label="Repetition penalty"
	)
	],
	examples=[
	["Hello! What's your name?"],
	["Tell me about your adventures."],
	["What's your favorite thing to do?"],
	["Can you help me with something?"]
	],
	cache_examples=False
	)

	if __name__ == "__main__":
	demo.launch()