bditto commited on
Commit
2f24c08
·
verified ·
1 Parent(s): 00c908c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -45
app.py CHANGED
@@ -1,14 +1,13 @@
1
  import gradio as gr
2
  import torch
3
- from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
4
- from threading import Thread
5
  import random
6
 
7
  # Configuration 🛠️
8
- model_name = "microsoft/phi-3-mini-4k-instruct"
9
  device = "cuda" if torch.cuda.is_available() else "cpu"
10
 
11
- # Load model with memory optimizations
12
  model = AutoModelForCausalLM.from_pretrained(
13
  model_name,
14
  torch_dtype=torch.float16 if device == "cuda" else torch.float32,
@@ -17,57 +16,58 @@ model = AutoModelForCausalLM.from_pretrained(
17
  )
18
  tokenizer = AutoTokenizer.from_pretrained(model_name)
19
 
20
- # Safety tools 🛡️ (simplified)
21
- BLOCKED_WORDS = ["violence", "hate", "gun", "personal"]
22
- SAFE_IDEAS = ["Design a robot to clean parks 🌳", "Code a recycling game ♻️"]
 
 
 
23
 
24
- def is_safe(text):
25
- text = text.lower()
26
- return not any(bad_word in text for bad_word in BLOCKED_WORDS)
27
-
28
- def respond(message, history, system_message, max_tokens, temperature, top_p):
29
- if not is_safe(message):
30
- return f"🚫 Let's focus on positive projects! Try: {random.choice(SAFE_IDEAS)}"
31
 
32
- # Create prompt with limited history
33
- prompt = f"System: {system_message}\n"
34
- for user, bot in history[-2:]: # Keep only last 2 exchanges
35
- prompt += f"User: {user}\nAssistant: {bot}\n"
36
- prompt += f"User: {message}\nAssistant:"
37
 
 
38
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
39
 
40
- # Generation settings
41
- generation_kwargs = dict(
42
  inputs.input_ids,
43
- max_new_tokens=min(max_tokens, 256),
44
- temperature=min(temperature, 0.7),
45
- top_p=top_p,
46
  do_sample=True,
47
  pad_token_id=tokenizer.eos_token_id
48
  )
49
 
50
- # Generate response
51
- outputs = model.generate(**generation_kwargs)
52
- response = tokenizer.decode(outputs[0][inputs.input_ids.shape[-1]:], skip_special_tokens=True)
53
-
54
- yield response
55
 
56
- with gr.Blocks() as demo:
57
- gr.Markdown("# 🤖 REACT Ethical AI Lab")
58
- gr.ChatInterface(
59
- respond,
60
- additional_inputs=[
61
- gr.Textbox("Help students create ethical AI projects", label="Guidelines"),
62
- gr.Slider(64, 256, value=128, label="Max Length"),
63
- gr.Slider(0.1, 0.7, value=0.3, label="Creativity"),
64
- gr.Slider(0.5, 1.0, value=0.9, label="Focus")
65
- ],
66
- examples=[
67
- ["How to make a solar-powered robot?"],
68
- ["Simple air quality sensor code"]
69
- ]
70
- )
 
 
71
 
72
  if __name__ == "__main__":
73
- demo.launch(server_name="0.0.0.0")
 
 
 
 
 
 
1
  import gradio as gr
2
  import torch
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer
 
4
  import random
5
 
6
  # Configuration 🛠️
7
+ model_name = "microsoft/phi-3-mini-4k-instruct" # Smaller model for memory constraints
8
  device = "cuda" if torch.cuda.is_available() else "cpu"
9
 
10
+ # Load model with optimizations
11
  model = AutoModelForCausalLM.from_pretrained(
12
  model_name,
13
  torch_dtype=torch.float16 if device == "cuda" else torch.float32,
 
16
  )
17
  tokenizer = AutoTokenizer.from_pretrained(model_name)
18
 
19
+ # Safety tools 🛡️
20
+ SAFE_RESPONSES = [
21
+ "Let's focus on positive tech projects! 🌱",
22
+ "How about designing an eco-friendly robot? 🤖",
23
+ "Let's explore renewable energy solutions! ☀️"
24
+ ]
25
 
26
+ def generate_response(message, history):
27
+ # Simple safety check
28
+ if any(word in message.lower() for word in ["violence", "hate", "gun"]):
29
+ return random.choice(SAFE_RESPONSES)
 
 
 
30
 
31
+ # Format prompt
32
+ prompt = f"<|user|>\n{message}<|end|>\n<|assistant|>"
 
 
 
33
 
34
+ # Tokenize input
35
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
36
 
37
+ # Generate response
38
+ outputs = model.generate(
39
  inputs.input_ids,
40
+ max_new_tokens=256,
41
+ temperature=0.7,
 
42
  do_sample=True,
43
  pad_token_id=tokenizer.eos_token_id
44
  )
45
 
46
+ # Decode and return
47
+ return tokenizer.decode(outputs[0][inputs.input_ids.shape[-1]:], skip_special_tokens=True)
 
 
 
48
 
49
+ # Create Gradio interface
50
+ demo = gr.ChatInterface(
51
+ fn=generate_response,
52
+ examples=[
53
+ "How to make a solar-powered robot?",
54
+ "Python code for air quality sensor"
55
+ ],
56
+ title="🤖 REACT Ethical AI Lab",
57
+ description="Safe AI project assistant for students"
58
+ )
59
+
60
+ # Explicit API setup
61
+ api = gr.mount_gradio_app(
62
+ app=demo.app,
63
+ blocks=demo,
64
+ path="/api"
65
+ )
66
 
67
  if __name__ == "__main__":
68
+ demo.launch(
69
+ server_name="0.0.0.0",
70
+ server_port=7860,
71
+ enable_queue=True,
72
+ share=False
73
+ )