Spaces:

reactallegany
/

promptlab

Runtime error

App Files Files Community

promptlab / app.py

bditto

Update app.py

c31d961 verified 8 months ago

raw

history blame

3.49 kB

	import gradio as gr
	import torch
	from transformers import (
	AutoModelForCausalLM,
	AutoTokenizer,
	TextIteratorStreamer,
	pipeline,
	BitsAndBytesConfig
	)
	from threading import Thread
	import random

	# Configuration 🛠️
	model_name = "HuggingFaceH4/zephyr-7b-beta"
	device = "cuda" if torch.cuda.is_available() else "cpu"

	# Quantization setup
	quantization_config = BitsAndBytesConfig(
	load_in_4bit=True,
	bnb_4bit_quant_type="nf4",
	bnb_4bit_compute_dtype=torch.float16,
	bnb_4bit_use_double_quant=True,
	)

	# Model loading with fallback
	try:
	model = AutoModelForCausalLM.from_pretrained(
	model_name,
	quantization_config=quantization_config if device == "cuda" else None,
	device_map="auto",
	torch_dtype=torch.float16 if device == "cuda" else torch.float32
	)
	except Exception as e:
	print(f"Error loading model with GPU: {e}")
	model = AutoModelForCausalLM.from_pretrained(
	model_name,
	device_map="cpu",
	torch_dtype=torch.float32
	)

	tokenizer = AutoTokenizer.from_pretrained(model_name)

	# Safety tools 🛡️
	BLOCKED_WORDS = ["violence", "hate", "gun", "personal"]
	SAFE_IDEAS = [
	"Design a robot to clean parks 🌳",
	"Code a game about recycling ♻️",
	"Plan an AI tool for school safety 🚸"
	]
	safety_checker = pipeline(
	"text-classification",
	model="unitary/toxic-bert",
	device=0 if device == "cuda" else -1
	)

	def is_safe(text):
	text = text.lower()
	if any(bad_word in text for bad_word in BLOCKED_WORDS):
	return False
	result = safety_checker(text)[0]
	return not (result["label"] == "toxic" and result["score"] > 0.7)

	def respond(message, history, system_message, max_tokens, temperature, top_p):
	if not is_safe(message):
	return f"🚫 Let's focus on positive projects! Try: {random.choice(SAFE_IDEAS)}"

	messages = [{"role": "system", "content": system_message}]

	for user_msg, bot_msg in history[-5:]:
	if user_msg:
	messages.append({"role": "user", "content": user_msg})
	if bot_msg:
	messages.append({"role": "assistant", "content": bot_msg})

	messages.append({"role": "user", "content": message})

	inputs = tokenizer.apply_chat_template(
	messages,
	return_tensors="pt"
	).to(model.device)

	streamer = TextIteratorStreamer(tokenizer, skip_prompt=True)
	generation_kwargs = {
	"inputs": inputs,
	"max_new_tokens": max_tokens,
	"temperature": temperature,
	"top_p": top_p,
	"streamer": streamer
	}

	thread = Thread(target=model.generate, kwargs=generation_kwargs)
	thread.start()

	partial_message = ""
	for new_token in streamer:
	partial_message += new_token
	yield partial_message

	with gr.Blocks() as demo:
	gr.Markdown("# 🤖 REACT Ethical AI Lab")
	gr.ChatInterface(
	respond,
	additional_inputs=[
	gr.Textbox("You help students create ethical AI projects.", label="Guidelines"),
	gr.Slider(128, 1024, value=512, label="Max Response Length"),
	gr.Slider(0.1, 1.0, value=0.3, label="Creativity Level"),
	gr.Slider(0.7, 1.0, value=0.85, label="Focus Level")
	],
	examples=[
	["How to build a robot that plants trees?"],
	["Python code for a pollution sensor"]
	]
	)

	if __name__ == "__main__":
	demo.launch(server_name="0.0.0.0")