bditto commited on
Commit
b717fcf
Β·
verified Β·
1 Parent(s): 8e7c954

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -25
app.py CHANGED
@@ -1,11 +1,17 @@
1
  import gradio as gr
2
  import os
3
- from huggingface_hub import InferenceClient, login
4
- from transformers import pipeline
5
  import random
6
 
7
- # Authenticate using secret environment variable πŸ”’
8
- login(token=os.environ.get("HF_TOKEN"))
 
 
 
 
 
 
9
 
10
  # Safety tools πŸ›‘οΈ
11
  BLOCKED_WORDS = ["violence", "hate", "gun", "personal"]
@@ -14,12 +20,8 @@ SAFE_IDEAS = [
14
  "Code a game about recycling ♻️",
15
  "Plan an AI tool for school safety 🚸"
16
  ]
17
-
18
- # Safety model
19
  safety_checker = pipeline("text-classification", model="unitary/toxic-bert")
20
 
21
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
22
-
23
  def is_safe(text):
24
  text = text.lower()
25
  if any(bad_word in text for bad_word in BLOCKED_WORDS):
@@ -28,16 +30,14 @@ def is_safe(text):
28
  return not (result["label"] == "toxic" and result["score"] > 0.7)
29
 
30
  def respond(message, history, system_message, max_tokens, temperature, top_p):
 
31
  if not is_safe(message):
32
  return f"🚫 Let's focus on positive projects! Try: {random.choice(SAFE_IDEAS)}"
33
 
34
- messages = [{
35
- "role": "system",
36
- "content": f"{system_message}\nYou are a friendly STEM mentor for kids. Never discuss unsafe topics!"
37
- }]
38
 
39
- # Rest of chat implementation
40
- for user_msg, bot_msg in history:
41
  if user_msg:
42
  messages.append({"role": "user", "content": user_msg})
43
  if bot_msg:
@@ -45,17 +45,30 @@ def respond(message, history, system_message, max_tokens, temperature, top_p):
45
 
46
  messages.append({"role": "user", "content": message})
47
 
48
- response = ""
49
- for chunk in client.chat_completion(
50
  messages,
51
- max_tokens=max_tokens,
52
- stream=True,
53
- temperature=temperature,
54
- top_p=top_p
55
- ):
56
- token = chunk.choices[0].delta.content
57
- response += token
58
- yield response
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
  with gr.Blocks() as demo:
61
  gr.Markdown("# πŸ€– REACT Ethical AI Lab")
@@ -74,4 +87,4 @@ with gr.Blocks() as demo:
74
  )
75
 
76
  if __name__ == "__main__":
77
- demo.launch()
 
1
  import gradio as gr
2
  import os
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer, pipeline
4
+ from threading import Thread
5
  import random
6
 
7
+ # Local model setup πŸ€–
8
+ model_name = "HuggingFaceH4/zephyr-7b-beta"
9
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
10
+ model = AutoModelForCausalLM.from_pretrained(
11
+ model_name,
12
+ device_map="auto",
13
+ load_in_4bit=True # Reduces VRAM usage
14
+ )
15
 
16
  # Safety tools πŸ›‘οΈ
17
  BLOCKED_WORDS = ["violence", "hate", "gun", "personal"]
 
20
  "Code a game about recycling ♻️",
21
  "Plan an AI tool for school safety 🚸"
22
  ]
 
 
23
  safety_checker = pipeline("text-classification", model="unitary/toxic-bert")
24
 
 
 
25
  def is_safe(text):
26
  text = text.lower()
27
  if any(bad_word in text for bad_word in BLOCKED_WORDS):
 
30
  return not (result["label"] == "toxic" and result["score"] > 0.7)
31
 
32
  def respond(message, history, system_message, max_tokens, temperature, top_p):
33
+ # Safety check first πŸ”’
34
  if not is_safe(message):
35
  return f"🚫 Let's focus on positive projects! Try: {random.choice(SAFE_IDEAS)}"
36
 
37
+ # Prepare chat history
38
+ messages = [{"role": "system", "content": system_message}]
 
 
39
 
40
+ for user_msg, bot_msg in history[-5:]: # Keep last 5 exchanges
 
41
  if user_msg:
42
  messages.append({"role": "user", "content": user_msg})
43
  if bot_msg:
 
45
 
46
  messages.append({"role": "user", "content": message})
47
 
48
+ # Tokenize and prepare streaming
49
+ inputs = tokenizer.apply_chat_template(
50
  messages,
51
+ return_tensors="pt"
52
+ ).to(model.device)
53
+
54
+ streamer = TextIteratorStreamer(tokenizer, skip_prompt=True)
55
+ generation_kwargs = {
56
+ "inputs": inputs,
57
+ "max_new_tokens": max_tokens,
58
+ "temperature": temperature,
59
+ "top_p": top_p,
60
+ "streamer": streamer
61
+ }
62
+
63
+ # Start generation in thread
64
+ thread = Thread(target=model.generate, kwargs=generation_kwargs)
65
+ thread.start()
66
+
67
+ # Stream output
68
+ partial_message = ""
69
+ for new_token in streamer:
70
+ partial_message += new_token
71
+ yield partial_message
72
 
73
  with gr.Blocks() as demo:
74
  gr.Markdown("# πŸ€– REACT Ethical AI Lab")
 
87
  )
88
 
89
  if __name__ == "__main__":
90
+ demo.launch(server_name="0.0.0.0")