Spaces:

reactallegany
/

promptlab

Runtime error

File size: 3,491 Bytes

dafbfa4
c31d961
 
 
 
 
 
 
 
b717fcf
dafbfa4
981a2dd
c31d961
b717fcf
c31d961
 
 
 
 
 
 
 
b717fcf
981a2dd
c31d961
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dafbfa4
 
 
 
 
 
 
c31d961
 
 
 
 
981a2dd
dafbfa4
 
 
 
 
ff54bae
dafbfa4
 
 
 
 
b717fcf
981a2dd
c31d961
ff54bae
 
 
 
981a2dd
ff54bae
 
b717fcf
ff54bae
b717fcf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ff54bae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b717fcf

import gradio as gr
import torch
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    TextIteratorStreamer,
    pipeline,
    BitsAndBytesConfig
)
from threading import Thread
import random

# Configuration 🛠️
model_name = "HuggingFaceH4/zephyr-7b-beta"
device = "cuda" if torch.cuda.is_available() else "cpu"

# Quantization setup
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
)

# Model loading with fallback
try:
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        quantization_config=quantization_config if device == "cuda" else None,
        device_map="auto",
        torch_dtype=torch.float16 if device == "cuda" else torch.float32
    )
except Exception as e:
    print(f"Error loading model with GPU: {e}")
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        device_map="cpu",
        torch_dtype=torch.float32
    )

tokenizer = AutoTokenizer.from_pretrained(model_name)

# Safety tools 🛡️
BLOCKED_WORDS = ["violence", "hate", "gun", "personal"]
SAFE_IDEAS = [
    "Design a robot to clean parks 🌳",
    "Code a game about recycling ♻️",
    "Plan an AI tool for school safety 🚸"
]
safety_checker = pipeline(
    "text-classification", 
    model="unitary/toxic-bert",
    device=0 if device == "cuda" else -1
)

def is_safe(text):
    text = text.lower()
    if any(bad_word in text for bad_word in BLOCKED_WORDS):
        return False
    result = safety_checker(text)[0]
    return not (result["label"] == "toxic" and result["score"] > 0.7)

def respond(message, history, system_message, max_tokens, temperature, top_p):
    if not is_safe(message):
        return f"🚫 Let's focus on positive projects! Try: {random.choice(SAFE_IDEAS)}"
    
    messages = [{"role": "system", "content": system_message}]
    
    for user_msg, bot_msg in history[-5:]:
        if user_msg:
            messages.append({"role": "user", "content": user_msg})
        if bot_msg:
            messages.append({"role": "assistant", "content": bot_msg})
    
    messages.append({"role": "user", "content": message})
    
    inputs = tokenizer.apply_chat_template(
        messages,
        return_tensors="pt"
    ).to(model.device)
    
    streamer = TextIteratorStreamer(tokenizer, skip_prompt=True)
    generation_kwargs = {
        "inputs": inputs,
        "max_new_tokens": max_tokens,
        "temperature": temperature,
        "top_p": top_p,
        "streamer": streamer
    }
    
    thread = Thread(target=model.generate, kwargs=generation_kwargs)
    thread.start()
    
    partial_message = ""
    for new_token in streamer:
        partial_message += new_token
        yield partial_message

with gr.Blocks() as demo:
    gr.Markdown("# 🤖 REACT Ethical AI Lab")
    gr.ChatInterface(
        respond,
        additional_inputs=[
            gr.Textbox("You help students create ethical AI projects.", label="Guidelines"),
            gr.Slider(128, 1024, value=512, label="Max Response Length"),
            gr.Slider(0.1, 1.0, value=0.3, label="Creativity Level"),
            gr.Slider(0.7, 1.0, value=0.85, label="Focus Level")
        ],
        examples=[
            ["How to build a robot that plants trees?"],
            ["Python code for a pollution sensor"]
        ]
    )

if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0")