Spaces:

reactallegany
/

promptlab

Runtime error

App Files Files Community

bditto commited on Apr 16

Commit

2f24c08

verified ·

1 Parent(s): 00c908c

Update app.py

Browse files

Files changed (1) hide show

app.py +45 -45

app.py CHANGED Viewed

@@ -1,14 +1,13 @@
 import gradio as gr
 import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
-from threading import Thread
 import random
 # Configuration 🛠️
-model_name = "microsoft/phi-3-mini-4k-instruct"
 device = "cuda" if torch.cuda.is_available() else "cpu"
-# Load model with memory optimizations
 model = AutoModelForCausalLM.from_pretrained(
     model_name,
     torch_dtype=torch.float16 if device == "cuda" else torch.float32,
@@ -17,57 +16,58 @@ model = AutoModelForCausalLM.from_pretrained(
 )
 tokenizer = AutoTokenizer.from_pretrained(model_name)
-# Safety tools 🛡️ (simplified)
-BLOCKED_WORDS = ["violence", "hate", "gun", "personal"]
-SAFE_IDEAS = ["Design a robot to clean parks 🌳", "Code a recycling game ♻️"]
-def is_safe(text):
-    text = text.lower()
-    return not any(bad_word in text for bad_word in BLOCKED_WORDS)
-def respond(message, history, system_message, max_tokens, temperature, top_p):
-    if not is_safe(message):
-        return f"🚫 Let's focus on positive projects! Try: {random.choice(SAFE_IDEAS)}"
-    # Create prompt with limited history
-    prompt = f"System: {system_message}\n"
-    for user, bot in history[-2:]:  # Keep only last 2 exchanges
-        prompt += f"User: {user}\nAssistant: {bot}\n"
-    prompt += f"User: {message}\nAssistant:"
     inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
-    # Generation settings
-    generation_kwargs = dict(
         inputs.input_ids,
-        max_new_tokens=min(max_tokens, 256),
-        temperature=min(temperature, 0.7),
-        top_p=top_p,
         do_sample=True,
         pad_token_id=tokenizer.eos_token_id
     )
-    # Generate response
-    outputs = model.generate(**generation_kwargs)
-    response = tokenizer.decode(outputs[0][inputs.input_ids.shape[-1]:], skip_special_tokens=True)
-    yield response
-with gr.Blocks() as demo:
-    gr.Markdown("# 🤖 REACT Ethical AI Lab")
-    gr.ChatInterface(
-        respond,
-        additional_inputs=[
-            gr.Textbox("Help students create ethical AI projects", label="Guidelines"),
-            gr.Slider(64, 256, value=128, label="Max Length"),
-            gr.Slider(0.1, 0.7, value=0.3, label="Creativity"),
-            gr.Slider(0.5, 1.0, value=0.9, label="Focus")
-        ],
-        examples=[
-            ["How to make a solar-powered robot?"],
-            ["Simple air quality sensor code"]
-        ]
-    )
 if __name__ == "__main__":
-    demo.launch(server_name="0.0.0.0")

 import gradio as gr
 import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
 import random
 # Configuration 🛠️
+model_name = "microsoft/phi-3-mini-4k-instruct"  # Smaller model for memory constraints
 device = "cuda" if torch.cuda.is_available() else "cpu"
+# Load model with optimizations
 model = AutoModelForCausalLM.from_pretrained(
     model_name,
     torch_dtype=torch.float16 if device == "cuda" else torch.float32,
 )
 tokenizer = AutoTokenizer.from_pretrained(model_name)
+# Safety tools 🛡️
+SAFE_RESPONSES = [
+    "Let's focus on positive tech projects! 🌱",
+    "How about designing an eco-friendly robot? 🤖",
+    "Let's explore renewable energy solutions! ☀️"
+]
+def generate_response(message, history):
+    # Simple safety check
+    if any(word in message.lower() for word in ["violence", "hate", "gun"]):
+        return random.choice(SAFE_RESPONSES)
+    # Format prompt
+    prompt = f"<|user|>\n{message}<|end|>\n<|assistant|>"
+    # Tokenize input
     inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
+    # Generate response
+    outputs = model.generate(
         inputs.input_ids,
+        max_new_tokens=256,
+        temperature=0.7,
         do_sample=True,
         pad_token_id=tokenizer.eos_token_id
     )
+    # Decode and return
+    return tokenizer.decode(outputs[0][inputs.input_ids.shape[-1]:], skip_special_tokens=True)
+# Create Gradio interface
+demo = gr.ChatInterface(
+    fn=generate_response,
+    examples=[
+        "How to make a solar-powered robot?",
+        "Python code for air quality sensor"
+    ],
+    title="🤖 REACT Ethical AI Lab",
+    description="Safe AI project assistant for students"
+)
+# Explicit API setup
+api = gr.mount_gradio_app(
+    app=demo.app,
+    blocks=demo,
+    path="/api"
+)
 if __name__ == "__main__":
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        enable_queue=True,
+        share=False
+    )