Spaces:
Sleeping
Sleeping
Update ai_chatbot.py
Browse files- ai_chatbot.py +150 -99
ai_chatbot.py
CHANGED
|
@@ -1,114 +1,165 @@
|
|
| 1 |
-
import
|
|
|
|
|
|
|
| 2 |
import re
|
| 3 |
-
|
| 4 |
|
| 5 |
class AIChatbot:
|
| 6 |
def __init__(self):
|
| 7 |
-
|
| 8 |
-
self.
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
self.
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
]
|
| 18 |
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
"Good day! How may I assist you?",
|
| 25 |
-
"Hello! I'm here to help. What would you like to know?"
|
| 26 |
-
],
|
| 27 |
-
"farewell": [
|
| 28 |
-
"Goodbye! Have a great day!",
|
| 29 |
-
"See you later! Take care!",
|
| 30 |
-
"Bye! Feel free to come back anytime!",
|
| 31 |
-
"Farewell! I hope I was helpful!"
|
| 32 |
-
],
|
| 33 |
-
"thanks": [
|
| 34 |
-
"You're welcome! Is there anything else I can help with?",
|
| 35 |
-
"My pleasure! Happy to help!",
|
| 36 |
-
"No problem! Feel free to ask if you need anything else!",
|
| 37 |
-
"You're very welcome! Anything else I can assist you with?"
|
| 38 |
-
],
|
| 39 |
-
"default": [
|
| 40 |
-
"That's an interesting question! I'm still learning, but I'd be happy to help with basic interactions.",
|
| 41 |
-
"I understand you're asking about that. While I'm designed for basic conversations, I'm here to chat!",
|
| 42 |
-
"Thanks for sharing that with me! I'm always happy to engage in conversation.",
|
| 43 |
-
"I appreciate your message! I'm here to have friendly conversations with you.",
|
| 44 |
-
"That's a thoughtful question! I'm designed to be helpful and engaging in our chat."
|
| 45 |
-
]
|
| 46 |
-
}
|
| 47 |
|
| 48 |
-
def
|
| 49 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
if not text:
|
| 51 |
-
return
|
| 52 |
-
return re.
|
| 53 |
-
|
| 54 |
-
def
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
|
| 68 |
-
#
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
|
|
|
|
|
|
|
|
|
| 72 |
|
| 73 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
|
| 75 |
-
def
|
| 76 |
-
"""
|
| 77 |
-
if not
|
| 78 |
-
return
|
| 79 |
-
|
| 80 |
-
# Add to conversation history
|
| 81 |
-
self.conversation_history.append({"user": user_message, "bot": ""})
|
| 82 |
-
|
| 83 |
-
# Detect intent
|
| 84 |
-
intent = self._detect_intent(user_message)
|
| 85 |
|
| 86 |
-
#
|
| 87 |
-
|
| 88 |
-
response = random.choice(self.responses[intent])
|
| 89 |
-
else:
|
| 90 |
-
response = random.choice(self.responses["default"])
|
| 91 |
|
| 92 |
-
#
|
| 93 |
-
self.
|
| 94 |
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
"""Get the conversation history"""
|
| 99 |
-
return self.conversation_history
|
| 100 |
-
|
| 101 |
-
def clear_history(self):
|
| 102 |
-
"""Clear the conversation history"""
|
| 103 |
-
self.conversation_history = []
|
| 104 |
-
return "Conversation history cleared!"
|
| 105 |
|
| 106 |
-
def
|
| 107 |
-
"""
|
| 108 |
-
|
| 109 |
-
"
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from sentence_transformers import SentenceTransformer
|
| 2 |
+
import numpy as np
|
| 3 |
+
from typing import List, Dict, Tuple
|
| 4 |
import re
|
| 5 |
+
import random
|
| 6 |
|
| 7 |
class AIChatbot:
|
| 8 |
def __init__(self):
|
| 9 |
+
# Load the pre-trained model (can use a smaller model for more speed)
|
| 10 |
+
self.model = SentenceTransformer('all-MiniLM-L6-v2')
|
| 11 |
+
# Warm up the model to avoid first-request slowness
|
| 12 |
+
_ = self.model.encode(["Hello, world!"])
|
| 13 |
+
self.conversation_embeddings = None
|
| 14 |
+
self.conversation_responses = None
|
| 15 |
+
self.load_conversation_responses()
|
| 16 |
+
|
| 17 |
+
def load_conversation_responses(self):
|
| 18 |
+
"""Load basic conversation patterns and compute their normalized embeddings"""
|
| 19 |
+
# Basic conversation patterns for general interaction
|
| 20 |
+
self.conversation_responses = [
|
| 21 |
+
{"pattern": "hello hi hey greetings", "response": "Hello! How can I help you today?", "category": "greeting"},
|
| 22 |
+
{"pattern": "how are you how do you do", "response": "I'm doing well, thank you for asking! How can I assist you?", "category": "greeting"},
|
| 23 |
+
{"pattern": "goodbye bye see you later", "response": "Goodbye! Have a great day!", "category": "farewell"},
|
| 24 |
+
{"pattern": "thank you thanks", "response": "You're welcome! Is there anything else I can help you with?", "category": "gratitude"},
|
| 25 |
+
{"pattern": "what can you do what are your capabilities", "response": "I'm here to chat and help answer your questions! Feel free to ask me anything.", "category": "capabilities"},
|
| 26 |
+
{"pattern": "who are you what is your name", "response": "I'm an AI chatbot designed to have conversations and help answer questions. You can call me your AI assistant!", "category": "identity"},
|
| 27 |
+
{"pattern": "help me assist me", "response": "I'd be happy to help! What would you like to know or discuss?", "category": "assistance"},
|
| 28 |
+
{"pattern": "tell me about yourself", "response": "I'm an AI chatbot that uses natural language processing to understand and respond to your messages. I'm here to chat and be helpful!", "category": "identity"},
|
| 29 |
+
{"pattern": "how does this work how do you work", "response": "I use machine learning models to understand your messages and generate appropriate responses. It's like having a conversation with an AI!", "category": "technical"},
|
| 30 |
+
{"pattern": "what time is it what's the date", "response": "I don't have access to real-time information, but I can help you with other questions!", "category": "time"},
|
| 31 |
+
{"pattern": "weather forecast temperature", "response": "I don't have access to weather data, but I'd be happy to chat about other topics!", "category": "weather"},
|
| 32 |
+
{"pattern": "joke funny humor", "response": "Why don't scientists trust atoms? Because they make up everything! 😄", "category": "humor"},
|
| 33 |
+
{"pattern": "compliment nice great awesome", "response": "Thank you! That's very kind of you to say.", "category": "compliment"},
|
| 34 |
+
{"pattern": "sorry apologize", "response": "No worries at all! Is there anything I can help you with?", "category": "apology"},
|
| 35 |
+
{"pattern": "yes no maybe", "response": "I understand! Feel free to ask me anything else.", "category": "confirmation"}
|
| 36 |
]
|
| 37 |
|
| 38 |
+
if self.conversation_responses:
|
| 39 |
+
# Compute and normalize embeddings for all patterns
|
| 40 |
+
patterns = [resp['pattern'] for resp in self.conversation_responses]
|
| 41 |
+
embeddings = self.model.encode(patterns, normalize_embeddings=True)
|
| 42 |
+
self.conversation_embeddings = np.array(embeddings)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
|
| 44 |
+
def get_general_response(self, message):
|
| 45 |
+
"""Generate a general conversational response when no specific pattern matches"""
|
| 46 |
+
general_responses = [
|
| 47 |
+
"That's interesting! Tell me more about that.",
|
| 48 |
+
"I see what you mean. What else would you like to discuss?",
|
| 49 |
+
"That's a great point! Is there anything specific you'd like to know?",
|
| 50 |
+
"I understand. How can I help you further?",
|
| 51 |
+
"That sounds fascinating! What made you think about that?",
|
| 52 |
+
"I appreciate you sharing that with me. What else is on your mind?",
|
| 53 |
+
"That's a thoughtful question. Let me think about that...",
|
| 54 |
+
"I'm here to chat! What would you like to talk about?",
|
| 55 |
+
"That's worth discussing! What's your perspective on this?",
|
| 56 |
+
"I'm listening! Feel free to share more details."
|
| 57 |
+
]
|
| 58 |
+
return random.choice(general_responses)
|
| 59 |
+
|
| 60 |
+
def _tokenize(self, text: str):
|
| 61 |
if not text:
|
| 62 |
+
return []
|
| 63 |
+
return [t for t in re.findall(r"[a-z0-9]+", text.lower()) if len(t) > 2]
|
| 64 |
+
|
| 65 |
+
def _overlap_ratio(self, q_tokens, faq_tokens):
|
| 66 |
+
if not q_tokens or not faq_tokens:
|
| 67 |
+
return 0.0
|
| 68 |
+
q_set = set(q_tokens)
|
| 69 |
+
f_set = set(faq_tokens)
|
| 70 |
+
inter = len(q_set & f_set)
|
| 71 |
+
denom = max(len(q_set), 1)
|
| 72 |
+
return inter / denom
|
| 73 |
+
|
| 74 |
+
def _wh_class(self, text: str) -> str:
|
| 75 |
+
if not text:
|
| 76 |
+
return ''
|
| 77 |
+
s = text.strip().lower()
|
| 78 |
+
# simple heuristic classification by leading wh-word
|
| 79 |
+
for key in ['who', 'where', 'when', 'what', 'how', 'why', 'which']:
|
| 80 |
+
if s.startswith(key + ' ') or s.startswith(key + "?"):
|
| 81 |
+
return key
|
| 82 |
+
# also check presence if not leading
|
| 83 |
+
for key in ['who', 'where', 'when', 'what', 'how', 'why', 'which']:
|
| 84 |
+
if f' {key} ' in f' {s} ':
|
| 85 |
+
return key
|
| 86 |
+
return ''
|
| 87 |
+
|
| 88 |
+
def find_best_match(self, message: str, threshold: float = 0.6) -> Tuple[str, float]:
|
| 89 |
+
print(f"find_best_match called with: {message}") # Debug print
|
| 90 |
+
if not self.conversation_responses or self.conversation_embeddings is None:
|
| 91 |
+
return self.get_general_response(message), 0.0
|
| 92 |
+
|
| 93 |
+
# Compute and normalize embedding for the input message
|
| 94 |
+
message_embedding = self.model.encode([message], normalize_embeddings=True)[0]
|
| 95 |
+
similarities = np.dot(self.conversation_embeddings, message_embedding)
|
| 96 |
+
|
| 97 |
+
# Compute keyword overlap with each conversation pattern
|
| 98 |
+
msg_tokens = self._tokenize(message)
|
| 99 |
+
overlap_scores = []
|
| 100 |
+
for resp in self.conversation_responses:
|
| 101 |
+
pattern_tokens = self._tokenize(resp['pattern'])
|
| 102 |
+
overlap_scores.append(self._overlap_ratio(msg_tokens, pattern_tokens))
|
| 103 |
+
|
| 104 |
+
similarities = np.array(similarities)
|
| 105 |
+
overlap_scores = np.array(overlap_scores)
|
| 106 |
+
|
| 107 |
+
# Combined score to reduce false positives
|
| 108 |
+
combined = 0.7 * similarities + 0.3 * overlap_scores
|
| 109 |
|
| 110 |
+
# Apply WH-word intent consistency penalty
|
| 111 |
+
msg_wh = self._wh_class(message)
|
| 112 |
+
if msg_wh:
|
| 113 |
+
for i, resp in enumerate(self.conversation_responses):
|
| 114 |
+
pattern_wh = self._wh_class(resp['pattern'])
|
| 115 |
+
if pattern_wh and pattern_wh != msg_wh:
|
| 116 |
+
combined[i] *= 0.6 # penalize mismatched intent significantly
|
| 117 |
|
| 118 |
+
best_idx = int(np.argmax(combined))
|
| 119 |
+
best_semantic = float(similarities[best_idx])
|
| 120 |
+
best_overlap = float(overlap_scores[best_idx])
|
| 121 |
+
best_combined = float(combined[best_idx])
|
| 122 |
+
|
| 123 |
+
# Acceptance criteria: require good semantic OR strong combined with overlap
|
| 124 |
+
accept = (
|
| 125 |
+
best_semantic >= max(0.6, threshold)
|
| 126 |
+
or (best_combined >= threshold and best_overlap >= 0.2)
|
| 127 |
+
)
|
| 128 |
+
|
| 129 |
+
if accept:
|
| 130 |
+
return self.conversation_responses[best_idx]['response'], best_combined
|
| 131 |
+
else:
|
| 132 |
+
# Return a general conversational response
|
| 133 |
+
return self.get_general_response(message), best_combined
|
| 134 |
|
| 135 |
+
def get_suggested_topics(self, message: str, num_suggestions: int = 3) -> List[str]:
|
| 136 |
+
"""Get suggested conversation topics based on the input message"""
|
| 137 |
+
if not self.conversation_responses or self.conversation_embeddings is None:
|
| 138 |
+
return []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 139 |
|
| 140 |
+
# Compute and normalize embedding for the input message
|
| 141 |
+
message_embedding = self.model.encode([message], normalize_embeddings=True)[0]
|
|
|
|
|
|
|
|
|
|
| 142 |
|
| 143 |
+
# Calculate cosine similarity
|
| 144 |
+
similarities = np.dot(self.conversation_embeddings, message_embedding)
|
| 145 |
|
| 146 |
+
# Get top N similar conversation topics
|
| 147 |
+
top_indices = np.argsort(similarities)[-num_suggestions:][::-1]
|
| 148 |
+
return [self.conversation_responses[idx]['pattern'] for idx in top_indices if similarities[idx] > 0.3]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 149 |
|
| 150 |
+
def add_conversation_pattern(self, pattern: str, response: str, category: str = "general") -> bool:
|
| 151 |
+
"""Add a new conversation pattern to the static list (for demonstration purposes)"""
|
| 152 |
+
try:
|
| 153 |
+
new_pattern = {"pattern": pattern, "response": response, "category": category}
|
| 154 |
+
self.conversation_responses.append(new_pattern)
|
| 155 |
+
|
| 156 |
+
# Recompute embeddings
|
| 157 |
+
patterns = [resp['pattern'] for resp in self.conversation_responses]
|
| 158 |
+
embeddings = self.model.encode(patterns, normalize_embeddings=True)
|
| 159 |
+
self.conversation_embeddings = np.array(embeddings)
|
| 160 |
+
|
| 161 |
+
print(f"Conversation pattern added: {pattern}")
|
| 162 |
+
return True
|
| 163 |
+
except Exception as e:
|
| 164 |
+
print(f"Error adding conversation pattern: {e}")
|
| 165 |
+
return False
|