Spaces:
Sleeping
Sleeping
Update ai_chatbot.py
Browse files- ai_chatbot.py +124 -139
ai_chatbot.py
CHANGED
|
@@ -1,155 +1,140 @@
|
|
|
|
|
|
|
|
| 1 |
from sentence_transformers import SentenceTransformer
|
| 2 |
import numpy as np
|
| 3 |
-
from
|
| 4 |
-
import
|
| 5 |
-
import os
|
| 6 |
-
import requests
|
| 7 |
|
| 8 |
-
class
|
| 9 |
def __init__(self):
|
| 10 |
-
|
|
|
|
| 11 |
self.model = SentenceTransformer('all-MiniLM-L6-v2')
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
self.
|
| 15 |
-
self.faqs = None
|
| 16 |
-
self.load_faqs()
|
| 17 |
|
| 18 |
-
def
|
| 19 |
-
"""
|
| 20 |
-
|
| 21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
-
def
|
| 24 |
-
"""
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
return 0.0
|
| 36 |
-
q_set = set(q_tokens)
|
| 37 |
-
f_set = set(faq_tokens)
|
| 38 |
-
inter = len(q_set & f_set)
|
| 39 |
-
denom = max(len(q_set), 1)
|
| 40 |
-
return inter / denom
|
| 41 |
-
|
| 42 |
-
def _wh_class(self, text: str) -> str:
|
| 43 |
-
if not text:
|
| 44 |
-
return ''
|
| 45 |
-
s = text.strip().lower()
|
| 46 |
-
# simple heuristic classification by leading wh-word
|
| 47 |
-
for key in ['who', 'where', 'when', 'what', 'how', 'why', 'which']:
|
| 48 |
-
if s.startswith(key + ' ') or s.startswith(key + "?"):
|
| 49 |
-
return key
|
| 50 |
-
# also check presence if not leading
|
| 51 |
-
for key in ['who', 'where', 'when', 'what', 'how', 'why', 'which']:
|
| 52 |
-
if f' {key} ' in f' {s} ':
|
| 53 |
-
return key
|
| 54 |
-
return ''
|
| 55 |
-
|
| 56 |
-
def find_best_match(self, question: str, threshold: float = 0.7) -> Tuple[str, float]:
|
| 57 |
-
print(f"find_best_match called with: {question}") # Debug print
|
| 58 |
-
# Always act as a general-conversation bot
|
| 59 |
-
return self._generate_general_response(question)
|
| 60 |
|
| 61 |
-
def
|
| 62 |
-
"""
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
# Greeting responses
|
| 66 |
-
if any(greeting in question_lower for greeting in ['hello', 'hi', 'hey', 'good morning', 'good afternoon', 'good evening']):
|
| 67 |
-
return "Hello! I'm the PSAU AI assistant. I'm here to help you with questions about university admissions, courses, and general information about Pangasinan State University. How can I assist you today?", 0.8
|
| 68 |
|
| 69 |
-
#
|
| 70 |
-
|
| 71 |
-
return "You're very welcome! I'm happy to help. Is there anything else you'd like to know about PSAU or university admissions?", 0.9
|
| 72 |
|
| 73 |
-
#
|
| 74 |
-
|
| 75 |
-
return "Goodbye! It was nice chatting with you. Feel free to come back anytime if you have more questions about PSAU. Good luck with your academic journey!", 0.9
|
| 76 |
|
| 77 |
-
#
|
| 78 |
-
|
| 79 |
-
|
| 80 |
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
|
|
|
|
|
|
| 88 |
|
| 89 |
-
|
| 90 |
-
if any(help in question_lower for help in ['help', 'assist', 'support']):
|
| 91 |
-
return "I'm here to help! I can assist you with:\nβ’ Admission requirements and deadlines\nβ’ Course information and recommendations\nβ’ Academic programs and majors\nβ’ Campus facilities and services\nβ’ General university information\n\nJust ask me any question and I'll do my best to help you!", 0.9
|
| 92 |
|
| 93 |
-
|
| 94 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
|
| 96 |
-
def
|
| 97 |
-
"""
|
| 98 |
-
return
|
| 99 |
|
| 100 |
-
def
|
| 101 |
-
"""
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
"
|
| 139 |
-
"How to apply?",
|
| 140 |
-
"Where is the campus located?"
|
| 141 |
-
]
|
| 142 |
-
items: List[Dict[str, str]] = []
|
| 143 |
-
for q in sample_questions:
|
| 144 |
-
r = requests.get(f"{base_url}/faqs", params={"question": q}, timeout=10)
|
| 145 |
-
if r.ok and r.json().get('answer'):
|
| 146 |
-
items.append({"question": q, "answer": r.json()['answer']})
|
| 147 |
-
if not items:
|
| 148 |
-
return "No FAQs available at the moment."
|
| 149 |
-
faq_text = "## π Frequently Asked Questions\n\n"
|
| 150 |
-
for i, faq in enumerate(items, 1):
|
| 151 |
-
faq_text += f"**{i}. {faq['question']}**\n"
|
| 152 |
-
faq_text += f"{faq['answer']}\n\n"
|
| 153 |
-
return faq_text
|
| 154 |
-
except Exception as e:
|
| 155 |
-
return f"Could not load FAQs from gateway: {e}"
|
|
|
|
| 1 |
+
import requests
|
| 2 |
+
import json
|
| 3 |
from sentence_transformers import SentenceTransformer
|
| 4 |
import numpy as np
|
| 5 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
| 6 |
+
from recommender import CourseRecommender
|
|
|
|
|
|
|
| 7 |
|
| 8 |
+
class Chatbot:
|
| 9 |
def __init__(self):
|
| 10 |
+
self.qa_pairs = []
|
| 11 |
+
self.question_embeddings = []
|
| 12 |
self.model = SentenceTransformer('all-MiniLM-L6-v2')
|
| 13 |
+
self.database_url = "https://database-46m3.onrender.com"
|
| 14 |
+
self.recommender = CourseRecommender()
|
| 15 |
+
self.load_qa_data()
|
|
|
|
|
|
|
| 16 |
|
| 17 |
+
def load_qa_data(self):
|
| 18 |
+
"""Load Q&A pairs from the faqs table in the database"""
|
| 19 |
+
try:
|
| 20 |
+
# Connect to the faqs table endpoint
|
| 21 |
+
faqs_url = f"{self.database_url}/faqs"
|
| 22 |
+
response = requests.get(faqs_url)
|
| 23 |
+
if response.status_code == 200:
|
| 24 |
+
data = response.json()
|
| 25 |
+
# Assuming the database returns a list of FAQ objects
|
| 26 |
+
if isinstance(data, list):
|
| 27 |
+
self.qa_pairs = data
|
| 28 |
+
else:
|
| 29 |
+
# If it's a single object, wrap it in a list
|
| 30 |
+
self.qa_pairs = [data]
|
| 31 |
+
|
| 32 |
+
# Generate embeddings for all questions
|
| 33 |
+
questions = [item.get('question', '') for item in self.qa_pairs]
|
| 34 |
+
self.question_embeddings = self.model.encode(questions)
|
| 35 |
+
print(f"Loaded {len(self.qa_pairs)} FAQ pairs from database")
|
| 36 |
+
else:
|
| 37 |
+
print(f"Failed to load data from faqs table. Status code: {response.status_code}")
|
| 38 |
+
self._load_fallback_data()
|
| 39 |
+
except Exception as e:
|
| 40 |
+
print(f"Error loading FAQ data: {str(e)}")
|
| 41 |
+
self._load_fallback_data()
|
| 42 |
|
| 43 |
+
def _load_fallback_data(self):
|
| 44 |
+
"""Load fallback data if database is unavailable"""
|
| 45 |
+
self.qa_pairs = [
|
| 46 |
+
{"question": "What is artificial intelligence?", "answer": "Artificial Intelligence (AI) is a branch of computer science that aims to create machines capable of intelligent behavior."},
|
| 47 |
+
{"question": "How does machine learning work?", "answer": "Machine learning is a subset of AI that enables computers to learn and improve from experience without being explicitly programmed."},
|
| 48 |
+
{"question": "What is deep learning?", "answer": "Deep learning is a subset of machine learning that uses neural networks with multiple layers to model and understand complex patterns in data."},
|
| 49 |
+
{"question": "What is natural language processing?", "answer": "Natural Language Processing (NLP) is a field of AI that focuses on the interaction between computers and humans through natural language."},
|
| 50 |
+
{"question": "What is a neural network?", "answer": "A neural network is a computing system inspired by biological neural networks that constitute animal brains. It consists of interconnected nodes (neurons) that process information."}
|
| 51 |
+
]
|
| 52 |
+
questions = [item['question'] for item in self.qa_pairs]
|
| 53 |
+
self.question_embeddings = self.model.encode(questions)
|
| 54 |
+
print("Loaded fallback Q&A data")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
|
| 56 |
+
def find_best_match(self, user_input, threshold=0.7):
|
| 57 |
+
"""Find the best matching question using semantic similarity"""
|
| 58 |
+
if not self.qa_pairs:
|
| 59 |
+
return None, 0
|
|
|
|
|
|
|
|
|
|
| 60 |
|
| 61 |
+
# Encode the user input
|
| 62 |
+
user_embedding = self.model.encode([user_input])
|
|
|
|
| 63 |
|
| 64 |
+
# Calculate cosine similarity with all questions
|
| 65 |
+
similarities = cosine_similarity(user_embedding, self.question_embeddings)[0]
|
|
|
|
| 66 |
|
| 67 |
+
# Find the best match
|
| 68 |
+
best_match_idx = np.argmax(similarities)
|
| 69 |
+
best_similarity = similarities[best_match_idx]
|
| 70 |
|
| 71 |
+
if best_similarity >= threshold:
|
| 72 |
+
return self.qa_pairs[best_match_idx], best_similarity
|
| 73 |
+
else:
|
| 74 |
+
return None, best_similarity
|
| 75 |
+
|
| 76 |
+
def get_response(self, user_input):
|
| 77 |
+
"""Get response for user input"""
|
| 78 |
+
if not user_input.strip():
|
| 79 |
+
return "Please enter a message."
|
| 80 |
|
| 81 |
+
best_match, similarity = self.find_best_match(user_input)
|
|
|
|
|
|
|
| 82 |
|
| 83 |
+
if best_match:
|
| 84 |
+
return {
|
| 85 |
+
'answer': best_match.get('answer', 'No answer found'),
|
| 86 |
+
'confidence': float(similarity),
|
| 87 |
+
'matched_question': best_match.get('question', ''),
|
| 88 |
+
'status': 'success'
|
| 89 |
+
}
|
| 90 |
+
else:
|
| 91 |
+
return {
|
| 92 |
+
'answer': "I'm sorry, I couldn't find a relevant answer to your question. Could you please rephrase it or ask something else?",
|
| 93 |
+
'confidence': float(similarity),
|
| 94 |
+
'matched_question': '',
|
| 95 |
+
'status': 'no_match'
|
| 96 |
+
}
|
| 97 |
|
| 98 |
+
def get_qa_count(self):
|
| 99 |
+
"""Get the number of loaded Q&A pairs"""
|
| 100 |
+
return len(self.qa_pairs)
|
| 101 |
|
| 102 |
+
def get_course_recommendations(self, stanine, gwa, strand, hobbies):
|
| 103 |
+
"""Get course recommendations using the recommender system"""
|
| 104 |
+
try:
|
| 105 |
+
# Validate inputs
|
| 106 |
+
stanine = int(stanine) if isinstance(stanine, str) else stanine
|
| 107 |
+
gwa = float(gwa) if isinstance(gwa, str) else gwa
|
| 108 |
+
|
| 109 |
+
if not (1 <= stanine <= 9):
|
| 110 |
+
return "β Stanine score must be between 1 and 9"
|
| 111 |
+
if not (75 <= gwa <= 100):
|
| 112 |
+
return "β GWA must be between 75 and 100"
|
| 113 |
+
if not strand:
|
| 114 |
+
return "β Please select a strand"
|
| 115 |
+
if not hobbies or not str(hobbies).strip():
|
| 116 |
+
return "β Please enter your hobbies/interests"
|
| 117 |
+
|
| 118 |
+
# Get recommendations
|
| 119 |
+
recommendations = self.recommender.recommend_courses(
|
| 120 |
+
stanine=stanine,
|
| 121 |
+
gwa=gwa,
|
| 122 |
+
strand=strand,
|
| 123 |
+
hobbies=str(hobbies)
|
| 124 |
+
)
|
| 125 |
+
|
| 126 |
+
if not recommendations:
|
| 127 |
+
return "No recommendations available at the moment."
|
| 128 |
+
|
| 129 |
+
# Format response (without confidence scores)
|
| 130 |
+
response = f"## π― Course Recommendations for You\n\n"
|
| 131 |
+
response += f"**Profile:** Stanine {stanine}, GWA {gwa}, {strand} Strand\n"
|
| 132 |
+
response += f"**Interests:** {hobbies}\n\n"
|
| 133 |
+
|
| 134 |
+
for i, rec in enumerate(recommendations, 1):
|
| 135 |
+
response += f"### {i}. {rec['code']} - {rec['name']}\n\n"
|
| 136 |
+
|
| 137 |
+
return response
|
| 138 |
+
|
| 139 |
+
except Exception as e:
|
| 140 |
+
return f"β Error getting recommendations: {str(e)}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|