Spaces:
Runtime error
Runtime error
File size: 2,477 Bytes
dafbfa4 c31d961 00c908c b717fcf dafbfa4 981a2dd 00c908c c31d961 00c908c e3cea63 00c908c e3cea63 b717fcf c31d961 00c908c dafbfa4 00c908c 981a2dd dafbfa4 00c908c dafbfa4 00c908c 981a2dd 00c908c ff54bae 00c908c b717fcf 00c908c b717fcf 00c908c ff54bae 00c908c ff54bae 00c908c ff54bae b717fcf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
from threading import Thread
import random
# Configuration 🛠️
model_name = "microsoft/phi-3-mini-4k-instruct"
device = "cuda" if torch.cuda.is_available() else "cpu"
# Load model with memory optimizations
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.float16 if device == "cuda" else torch.float32,
device_map="auto",
low_cpu_mem_usage=True
)
tokenizer = AutoTokenizer.from_pretrained(model_name)
# Safety tools 🛡️ (simplified)
BLOCKED_WORDS = ["violence", "hate", "gun", "personal"]
SAFE_IDEAS = ["Design a robot to clean parks 🌳", "Code a recycling game ♻️"]
def is_safe(text):
text = text.lower()
return not any(bad_word in text for bad_word in BLOCKED_WORDS)
def respond(message, history, system_message, max_tokens, temperature, top_p):
if not is_safe(message):
return f"🚫 Let's focus on positive projects! Try: {random.choice(SAFE_IDEAS)}"
# Create prompt with limited history
prompt = f"System: {system_message}\n"
for user, bot in history[-2:]: # Keep only last 2 exchanges
prompt += f"User: {user}\nAssistant: {bot}\n"
prompt += f"User: {message}\nAssistant:"
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
# Generation settings
generation_kwargs = dict(
inputs.input_ids,
max_new_tokens=min(max_tokens, 256),
temperature=min(temperature, 0.7),
top_p=top_p,
do_sample=True,
pad_token_id=tokenizer.eos_token_id
)
# Generate response
outputs = model.generate(**generation_kwargs)
response = tokenizer.decode(outputs[0][inputs.input_ids.shape[-1]:], skip_special_tokens=True)
yield response
with gr.Blocks() as demo:
gr.Markdown("# 🤖 REACT Ethical AI Lab")
gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox("Help students create ethical AI projects", label="Guidelines"),
gr.Slider(64, 256, value=128, label="Max Length"),
gr.Slider(0.1, 0.7, value=0.3, label="Creativity"),
gr.Slider(0.5, 1.0, value=0.9, label="Focus")
],
examples=[
["How to make a solar-powered robot?"],
["Simple air quality sensor code"]
]
)
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0") |