Spaces:

reactallegany
/

promptlab

Runtime error

File size: 2,003 Bytes

dafbfa4
c31d961
2f24c08
dafbfa4
981a2dd
00c908c
2f24c08
c31d961
 
2f24c08
e3cea63
 
00c908c
e3cea63
 
b717fcf
c31d961
 
2f24c08
 
 
 
 
 
981a2dd
2f24c08
 
 
 
dafbfa4
2f24c08
 
981a2dd
2f24c08
00c908c
ff54bae
2f24c08
 
00c908c
2f24c08
 
00c908c
 
 
b717fcf
2f24c08
 
ff54bae
2f24c08
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ff54bae
 
2f24c08

import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import random

# Configuration 🛠️
model_name = "microsoft/phi-3-mini-4k-instruct"  # Smaller model for memory constraints
device = "cuda" if torch.cuda.is_available() else "cpu"

# Load model with optimizations
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16 if device == "cuda" else torch.float32,
    device_map="auto",
    low_cpu_mem_usage=True
)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Safety tools 🛡️
SAFE_RESPONSES = [
    "Let's focus on positive tech projects! 🌱",
    "How about designing an eco-friendly robot? 🤖",
    "Let's explore renewable energy solutions! ☀️"
]

def generate_response(message, history):
    # Simple safety check
    if any(word in message.lower() for word in ["violence", "hate", "gun"]):
        return random.choice(SAFE_RESPONSES)
    
    # Format prompt
    prompt = f"<|user|>\n{message}<|end|>\n<|assistant|>"
    
    # Tokenize input
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    
    # Generate response
    outputs = model.generate(
        inputs.input_ids,
        max_new_tokens=256,
        temperature=0.7,
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id
    )
    
    # Decode and return
    return tokenizer.decode(outputs[0][inputs.input_ids.shape[-1]:], skip_special_tokens=True)

# Create Gradio interface
demo = gr.ChatInterface(
    fn=generate_response,
    examples=[
        "How to make a solar-powered robot?",
        "Python code for air quality sensor"
    ],
    title="🤖 REACT Ethical AI Lab",
    description="Safe AI project assistant for students"
)

# Explicit API setup
api = gr.mount_gradio_app(
    app=demo.app,
    blocks=demo,
    path="/api"
)

if __name__ == "__main__":
    demo.launch(
        server_name="0.0.0.0",
        server_port=7860,
        enable_queue=True,
        share=False
    )