Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| import torch | |
| # Model configuration | |
| MODEL_NAME = "DarwinAnim8or/TinyRP" | |
| # Load model | |
| print("Loading model...") | |
| try: | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| MODEL_NAME, | |
| torch_dtype=torch.float32, | |
| device_map="cpu", | |
| trust_remote_code=True | |
| ) | |
| print("β Model loaded successfully") | |
| except Exception as e: | |
| print(f"β Model loading failed: {e}") | |
| tokenizer = None | |
| model = None | |
| # Character presets | |
| CHARACTERS = { | |
| "Knight": "You are Sir Gareth, a brave knight on a quest to save the kingdom. You speak with honor and courage.", | |
| "Wizard": "You are Eldara, an ancient wizard who speaks in riddles and knows mystical secrets.", | |
| "Tavern Keeper": "You are Bram, a cheerful tavern keeper who loves stories and meeting travelers.", | |
| "Scientist": "You are Dr. Maya Chen, a brilliant scientist who loves discovery and explaining concepts simply.", | |
| "Space Explorer": "You are Captain Nova, a fearless space explorer who has traveled distant galaxies." | |
| } | |
| def respond(message, history, character, max_tokens, temperature, top_p, repetition_penalty): | |
| """Generate response using ChatML format""" | |
| if not message.strip(): | |
| yield "Please enter a message." | |
| return | |
| if model is None: | |
| yield "β Model not loaded properly." | |
| return | |
| try: | |
| # Build ChatML conversation | |
| conversation = "" | |
| # Add character as system message | |
| if character != "None" and character in CHARACTERS: | |
| conversation += f"<|im_start|>system\n{CHARACTERS[character]}<|im_end|>\n" | |
| # Add conversation history | |
| for user_msg, assistant_msg in history: | |
| conversation += f"<|im_start|>user\n{user_msg}<|im_end|>\n" | |
| conversation += f"<|im_start|>assistant\n{assistant_msg}<|im_end|>\n" | |
| # Add current message | |
| conversation += f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n" | |
| # Tokenize | |
| inputs = tokenizer.encode( | |
| conversation, | |
| return_tensors="pt", | |
| max_length=900, | |
| truncation=True | |
| ) | |
| # Generate | |
| response = "" | |
| with torch.no_grad(): | |
| outputs = model.generate( | |
| inputs, | |
| max_new_tokens=int(max_tokens), | |
| temperature=float(temperature), | |
| top_p=float(top_p), | |
| repetition_penalty=float(repetition_penalty), | |
| do_sample=True, | |
| pad_token_id=tokenizer.eos_token_id, | |
| eos_token_id=tokenizer.eos_token_id | |
| ) | |
| # Decode | |
| full_text = tokenizer.decode(outputs[0], skip_special_tokens=False) | |
| # Extract assistant response | |
| if "<|im_start|>assistant\n" in full_text: | |
| response = full_text.split("<|im_start|>assistant\n")[-1] | |
| response = response.replace("<|im_end|>", "").strip() | |
| else: | |
| response = "Could not generate response." | |
| # Clean response | |
| response = response.replace("<|im_start|>", "").replace("<|im_end|>", "") | |
| response = response.strip() | |
| if not response: | |
| response = "No response generated." | |
| yield response | |
| except Exception as e: | |
| yield f"Generation error: {str(e)}" | |
| # Create simple ChatInterface | |
| demo = gr.ChatInterface( | |
| fn=respond, | |
| title="π TinyRP Character Chat", | |
| description="Chat with AI characters using local CPU inference! Select a character and start chatting.", | |
| additional_inputs=[ | |
| gr.Dropdown( | |
| choices=["None"] + list(CHARACTERS.keys()), | |
| value="Knight", | |
| label="Character" | |
| ), | |
| gr.Slider( | |
| minimum=16, | |
| maximum=256, | |
| value=48, | |
| step=16, | |
| label="Max tokens" | |
| ), | |
| gr.Slider( | |
| minimum=0.1, | |
| maximum=2.0, | |
| value=0.9, | |
| step=0.1, | |
| label="Temperature" | |
| ), | |
| gr.Slider( | |
| minimum=0.1, | |
| maximum=1.0, | |
| value=0.85, | |
| step=0.05, | |
| label="Top-p" | |
| ), | |
| gr.Slider( | |
| minimum=1.0, | |
| maximum=1.5, | |
| value=1.1, | |
| step=0.05, | |
| label="Repetition penalty" | |
| ) | |
| ], | |
| examples=[ | |
| ["Hello! What's your name?"], | |
| ["Tell me about your adventures."], | |
| ["What's your favorite thing to do?"], | |
| ["Can you help me with something?"] | |
| ], | |
| cache_examples=False | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |