Spaces:

markobinario
/

flaskbot

Running

App Files Files Community

markobinario commited on Oct 21

Commit

a09117f

verified ·

1 Parent(s): 9e863a7

Update ai_chatbot.py

Browse files

Files changed (1) hide show

ai_chatbot.py +152 -110

ai_chatbot.py CHANGED Viewed

@@ -1,61 +1,62 @@
 from sentence_transformers import SentenceTransformer
 import numpy as np
 from typing import List, Dict, Tuple
 import re
-import random
 class AIChatbot:
-    def __init__(self):
         # Load the pre-trained model (can use a smaller model for more speed)
         self.model = SentenceTransformer('all-MiniLM-L6-v2')
         # Warm up the model to avoid first-request slowness
         _ = self.model.encode(["Hello, world!"])
-        self.conversation_embeddings = None
-        self.conversation_responses = None
-        self.load_conversation_responses()
-    def load_conversation_responses(self):
-        """Load basic conversation patterns and compute their normalized embeddings"""
-        # Basic conversation patterns for general interaction
-        self.conversation_responses = [
-            {"pattern": "hello hi hey greetings", "response": "Hello! How can I help you today?", "category": "greeting"},
-            {"pattern": "how are you how do you do", "response": "I'm doing well, thank you for asking! How can I assist you?", "category": "greeting"},
-            {"pattern": "goodbye bye see you later", "response": "Goodbye! Have a great day!", "category": "farewell"},
-            {"pattern": "thank you thanks", "response": "You're welcome! Is there anything else I can help you with?", "category": "gratitude"},
-            {"pattern": "what can you do what are your capabilities", "response": "I'm here to chat and help answer your questions! Feel free to ask me anything.", "category": "capabilities"},
-            {"pattern": "who are you what is your name", "response": "I'm an AI chatbot designed to have conversations and help answer questions. You can call me your AI assistant!", "category": "identity"},
-            {"pattern": "help me assist me", "response": "I'd be happy to help! What would you like to know or discuss?", "category": "assistance"},
-            {"pattern": "tell me about yourself", "response": "I'm an AI chatbot that uses natural language processing to understand and respond to your messages. I'm here to chat and be helpful!", "category": "identity"},
-            {"pattern": "how does this work how do you work", "response": "I use machine learning models to understand your messages and generate appropriate responses. It's like having a conversation with an AI!", "category": "technical"},
-            {"pattern": "what time is it what's the date", "response": "I don't have access to real-time information, but I can help you with other questions!", "category": "time"},
-            {"pattern": "weather forecast temperature", "response": "I don't have access to weather data, but I'd be happy to chat about other topics!", "category": "weather"},
-            {"pattern": "joke funny humor", "response": "Why don't scientists trust atoms? Because they make up everything! 😄", "category": "humor"},
-            {"pattern": "compliment nice great awesome", "response": "Thank you! That's very kind of you to say.", "category": "compliment"},
-            {"pattern": "sorry apologize", "response": "No worries at all! Is there anything I can help you with?", "category": "apology"},
-            {"pattern": "yes no maybe", "response": "I understand! Feel free to ask me anything else.", "category": "confirmation"}
-        ]
-        if self.conversation_responses:
-            # Compute and normalize embeddings for all patterns
-            patterns = [resp['pattern'] for resp in self.conversation_responses]
-            embeddings = self.model.encode(patterns, normalize_embeddings=True)
-            self.conversation_embeddings = np.array(embeddings)
-    def get_general_response(self, message):
-        """Generate a general conversational response when no specific pattern matches"""
-        general_responses = [
-            "That's interesting! Tell me more about that.",
-            "I see what you mean. What else would you like to discuss?",
-            "That's a great point! Is there anything specific you'd like to know?",
-            "I understand. How can I help you further?",
-            "That sounds fascinating! What made you think about that?",
-            "I appreciate you sharing that with me. What else is on your mind?",
-            "That's a thoughtful question. Let me think about that...",
-            "I'm here to chat! What would you like to talk about?",
-            "That's worth discussing! What's your perspective on this?",
-            "I'm listening! Feel free to share more details."
-        ]
-        return random.choice(general_responses)
     def _tokenize(self, text: str):
         if not text:
@@ -85,81 +86,122 @@ class AIChatbot:
                 return key
         return ''
-    def find_best_match(self, message: str, threshold: float = 0.6) -> Tuple[str, float]:
-        print(f"find_best_match called with: {message}")  # Debug print
-        if not self.conversation_responses or self.conversation_embeddings is None:
-            return self.get_general_response(message), 0.0
-        # Compute and normalize embedding for the input message
-        message_embedding = self.model.encode([message], normalize_embeddings=True)[0]
-        similarities = np.dot(self.conversation_embeddings, message_embedding)
-        # Compute keyword overlap with each conversation pattern
-        msg_tokens = self._tokenize(message)
-        overlap_scores = []
-        for resp in self.conversation_responses:
-            pattern_tokens = self._tokenize(resp['pattern'])
-            overlap_scores.append(self._overlap_ratio(msg_tokens, pattern_tokens))
-        similarities = np.array(similarities)
-        overlap_scores = np.array(overlap_scores)
-        # Combined score to reduce false positives
-        combined = 0.7 * similarities + 0.3 * overlap_scores
-        # Apply WH-word intent consistency penalty
-        msg_wh = self._wh_class(message)
-        if msg_wh:
-            for i, resp in enumerate(self.conversation_responses):
-                pattern_wh = self._wh_class(resp['pattern'])
-                if pattern_wh and pattern_wh != msg_wh:
-                    combined[i] *= 0.6  # penalize mismatched intent significantly
-        best_idx = int(np.argmax(combined))
-        best_semantic = float(similarities[best_idx])
-        best_overlap = float(overlap_scores[best_idx])
-        best_combined = float(combined[best_idx])
-        # Acceptance criteria: require good semantic OR strong combined with overlap
-        accept = (
-            best_semantic >= max(0.6, threshold)
-            or (best_combined >= threshold and best_overlap >= 0.2)
-        )
-        if accept:
-            return self.conversation_responses[best_idx]['response'], best_combined
-        else:
-            # Return a general conversational response
-            return self.get_general_response(message), best_combined
-    def get_suggested_topics(self, message: str, num_suggestions: int = 3) -> List[str]:
-        """Get suggested conversation topics based on the input message"""
-        if not self.conversation_responses or self.conversation_embeddings is None:
             return []
-        # Compute and normalize embedding for the input message
-        message_embedding = self.model.encode([message], normalize_embeddings=True)[0]
         # Calculate cosine similarity
-        similarities = np.dot(self.conversation_embeddings, message_embedding)
-        # Get top N similar conversation topics
         top_indices = np.argsort(similarities)[-num_suggestions:][::-1]
-        return [self.conversation_responses[idx]['pattern'] for idx in top_indices if similarities[idx] > 0.3]
-    def add_conversation_pattern(self, pattern: str, response: str, category: str = "general") -> bool:
-        """Add a new conversation pattern to the static list (for demonstration purposes)"""
-        try:
-            new_pattern = {"pattern": pattern, "response": response, "category": category}
-            self.conversation_responses.append(new_pattern)
-            # Recompute embeddings
-            patterns = [resp['pattern'] for resp in self.conversation_responses]
-            embeddings = self.model.encode(patterns, normalize_embeddings=True)
-            self.conversation_embeddings = np.array(embeddings)
-            print(f"Conversation pattern added: {pattern}")
-            return True
-        except Exception as e:
-            print(f"Error adding conversation pattern: {e}")
-            return False

 from sentence_transformers import SentenceTransformer
 import numpy as np
 from typing import List, Dict, Tuple
+import mysql.connector
+from mysql.connector import Error
 import re
 class AIChatbot:
+    def __init__(self, db_config: Dict[str, str]):
+        self.db_config = db_config
         # Load the pre-trained model (can use a smaller model for more speed)
         self.model = SentenceTransformer('all-MiniLM-L6-v2')
         # Warm up the model to avoid first-request slowness
         _ = self.model.encode(["Hello, world!"])
+        self.faq_embeddings = None
+        self.faqs = None
+        self.load_faqs()
+    def get_db_connection(self):
+        try:
+            connection = mysql.connector.connect(**self.db_config)
+            return connection
+        except Error as e:
+            print(f"Error connecting to database: {e}")
+            return None
+    def load_faqs(self):
+        """Load active FAQs from database and compute their normalized embeddings"""
+        connection = self.get_db_connection()
+        if connection:
+            try:
+                cursor = connection.cursor(dictionary=True)
+                cursor.execute("SELECT id, question, answer FROM faqs WHERE is_active = 1 ORDER BY sort_order, id")
+                self.faqs = cursor.fetchall()
+                cursor.close()
+                if self.faqs:
+                    # Compute and normalize embeddings for all questions
+                    questions = [faq['question'] for faq in self.faqs]
+                    embeddings = self.model.encode(questions, normalize_embeddings=True)
+                    self.faq_embeddings = np.array(embeddings)
+            except Error as e:
+                print(f"Error loading FAQs: {e}")
+            finally:
+                connection.close()
+    def save_unanswered_question(self, question):
+        print(f"Saving unanswered question: {question}")  # Debug print
+        try:
+            connection = self.get_db_connection()
+            if connection:
+                cursor = connection.cursor()
+                query = "INSERT INTO unanswered_questions (question) VALUES (%s)"
+                cursor.execute(query, (question,))
+                connection.commit()
+                cursor.close()
+                connection.close()
+        except Error as e:
+            print(f"Error saving unanswered question: {e}")
     def _tokenize(self, text: str):
         if not text:
                 return key
         return ''
+    def find_best_match(self, question: str, threshold: float = 0.7) -> Tuple[str, float]:
+        print(f"find_best_match called with: {question}")  # Debug print
+        # First try to match with FAQs
+        if self.faqs and self.faq_embeddings is not None:
+            # Compute and normalize embedding for the input question
+            question_embedding = self.model.encode([question], normalize_embeddings=True)[0]
+            similarities = np.dot(self.faq_embeddings, question_embedding)
+            # Compute keyword overlap with each FAQ question
+            q_tokens = self._tokenize(question)
+            overlap_scores = []
+            for faq in self.faqs:
+                overlap_scores.append(self._overlap_ratio(q_tokens, self._tokenize(faq['question'])))
+            similarities = np.array(similarities)
+            overlap_scores = np.array(overlap_scores)
+            # Combined score to reduce false positives
+            combined = 0.7 * similarities + 0.3 * overlap_scores
+            # Apply WH-word intent consistency penalty
+            q_wh = self._wh_class(question)
+            if q_wh:
+                for i, faq in enumerate(self.faqs):
+                    f_wh = self._wh_class(faq['question'])
+                    if f_wh and f_wh != q_wh:
+                        combined[i] *= 0.6  # penalize mismatched intent significantly
+            best_idx = int(np.argmax(combined))
+            best_semantic = float(similarities[best_idx])
+            best_overlap = float(overlap_scores[best_idx])
+            best_combined = float(combined[best_idx])
+            best_wh = self._wh_class(self.faqs[best_idx]['question'])
+            # Acceptance criteria: require good semantic OR strong combined with overlap
+            accept = (
+                best_semantic >= max(0.7, threshold)
+                or (best_combined >= threshold and best_overlap >= 0.3)
+            )
+            # Enforce WH intent match when present
+            if accept and q_wh and best_wh and q_wh != best_wh:
+                accept = False
+            if accept:
+                return self.faqs[best_idx]['answer'], best_combined
+        # If no FAQ match, provide general conversation response
+        return self._generate_general_response(question)
+    def _generate_general_response(self, question: str) -> Tuple[str, float]:
+        """Generate general conversation responses for non-FAQ questions"""
+        question_lower = question.lower().strip()
+        # Greeting responses
+        if any(greeting in question_lower for greeting in ['hello', 'hi', 'hey', 'good morning', 'good afternoon', 'good evening']):
+            return "Hello! I'm the PSAU AI assistant. I'm here to help you with questions about university admissions, courses, and general information about Pangasinan State University. How can I assist you today?", 0.8
+        # Thank you responses
+        if any(thanks in question_lower for thanks in ['thank you', 'thanks', 'thank', 'appreciate']):
+            return "You're very welcome! I'm happy to help. Is there anything else you'd like to know about PSAU or university admissions?", 0.9
+        # Goodbye responses
+        if any(goodbye in question_lower for goodbye in ['bye', 'goodbye', 'see you', 'farewell']):
+            return "Goodbye! It was nice chatting with you. Feel free to come back anytime if you have more questions about PSAU. Good luck with your academic journey!", 0.9
+        # How are you responses
+        if any(how in question_lower for how in ['how are you', 'how do you do', 'how is it going']):
+            return "I'm doing great, thank you for asking! I'm here and ready to help you with any questions about PSAU admissions, courses, or university life. What would you like to know?", 0.8
+        # What can you do responses
+        if any(what in question_lower for what in ['what can you do', 'what do you do', 'what are your capabilities']):
+            return "I can help you with:\n• University admission requirements and procedures\n• Course information and recommendations\n• General questions about PSAU\n• Academic guidance and support\n• Information about campus life\n\nWhat specific information are you looking for?", 0.9
+        # About PSAU responses
+        if any(about in question_lower for about in ['about psa', 'about psu', 'about pangasinan state', 'tell me about']):
+            return "Pangasinan State University (PSAU) is a premier state university in the Philippines offering quality education across various fields. We provide undergraduate and graduate programs in areas like Computer Science, Business, Education, Nursing, and more. We're committed to academic excellence and student success. What would you like to know more about?", 0.8
+        # Help responses
+        if any(help in question_lower for help in ['help', 'assist', 'support']):
+            return "I'm here to help! I can assist you with:\n• Admission requirements and deadlines\n• Course information and recommendations\n• Academic programs and majors\n• Campus facilities and services\n• General university information\n\nJust ask me any question and I'll do my best to help you!", 0.9
+        # Default general response
+        return "I understand you're asking about something, but I'm specifically designed to help with PSAU-related questions like admissions, courses, and university information. Could you rephrase your question to be more specific about what you'd like to know about Pangasinan State University? I'm here to help with academic guidance and university-related inquiries!", 0.6
+    def get_suggested_questions(self, question: str, num_suggestions: int = 3) -> List[str]:
+        """Get suggested questions based on the input question"""
+        if not self.faqs or self.faq_embeddings is None:
             return []
+        # Compute and normalize embedding for the input question
+        question_embedding = self.model.encode([question], normalize_embeddings=True)[0]
         # Calculate cosine similarity
+        similarities = np.dot(self.faq_embeddings, question_embedding)
+        # Get top N similar questions
         top_indices = np.argsort(similarities)[-num_suggestions:][::-1]
+        return [self.faqs[idx]['question'] for idx in top_indices if similarities[idx] > 0.3]
+    def add_faq(self, question: str, answer: str) -> bool:
+        """Add a new FAQ to the database"""
+        connection = self.get_db_connection()
+        if connection:
+            try:
+                cursor = connection.cursor()
+                query = "INSERT INTO faqs (question, answer) VALUES (%s, %s)"
+                cursor.execute(query, (question, answer))
+                connection.commit()
+                cursor.close()
+                # Reload FAQs to update embeddings
+                self.load_faqs()
+                return True
+            except Error as e:
+                print(f"Error adding FAQ: {e}")
+                return False
+            finally:
+                connection.close()
+        return False