Spaces:

markobinario
/

flaskbot

Running

App Files Files Community

markobinario commited on Oct 19

Commit

d1e5d7d

verified ·

1 Parent(s): ef8a26d

Upload 3 files

Browse files

Files changed (3) hide show

ai_chatbot.py +160 -0
database_recommender.py +293 -0
requirements.txt +7 -1

ai_chatbot.py ADDED Viewed

	@@ -0,0 +1,160 @@

+from sentence_transformers import SentenceTransformer
+import numpy as np
+from typing import List, Dict, Tuple
+import re
+class AIChatbot:
+    def __init__(self):
+        # Load the pre-trained model (can use a smaller model for more speed)
+        self.model = SentenceTransformer('all-MiniLM-L6-v2')
+        # Warm up the model to avoid first-request slowness
+        _ = self.model.encode(["Hello, world!"])
+        self.faq_embeddings = None
+        self.faqs = None
+        self.load_faqs()
+    def load_faqs(self):
+        """Load static FAQs and compute their normalized embeddings"""
+        # Static FAQ data
+        self.faqs = [
+            {"id": 1, "question": "What are the admission requirements?", "answer": "To apply for admission, you need to submit your high school diploma, transcript of records, 2x2 ID photo, and completed application form. You also need to take the entrance examination."},
+            {"id": 2, "question": "When is the application deadline?", "answer": "The application deadline is usually in March for the first semester and October for the second semester. Please check our website for the exact dates."},
+            {"id": 3, "question": "What courses are available?", "answer": "We offer various courses including BS Computer Science, BS Information Technology, BS Business Administration, BS Education, BS Nursing, BS Architecture, and more. Check our course catalog for the complete list."},
+            {"id": 4, "question": "How much is the tuition fee?", "answer": "Tuition fees vary by program. For undergraduate programs, it ranges from ₱15,000 to ₱25,000 per semester. Please contact the registrar's office for specific program fees."},
+            {"id": 5, "question": "Do you offer scholarships?", "answer": "Yes, we offer various scholarships including academic scholarships, athletic scholarships, and need-based financial aid. Applications are available at the student affairs office."},
+            {"id": 6, "question": "What is the minimum GWA requirement?", "answer": "The minimum GWA requirement is 80% for most programs. Some programs may have higher requirements. Please check the specific requirements for your chosen program."},
+            {"id": 7, "question": "How can I contact the admissions office?", "answer": "You can contact the admissions office at (02) 123-4567 or email admissions@psau.edu.ph. Office hours are Monday to Friday, 8:00 AM to 5:00 PM."},
+            {"id": 8, "question": "Is there a dormitory available?", "answer": "Yes, we have dormitory facilities for both male and female students. Dormitory fees are separate from tuition. Please contact the housing office for availability and rates."},
+            {"id": 9, "question": "What documents do I need for enrollment?", "answer": "For enrollment, you need your admission letter, original and photocopy of birth certificate, original and photocopy of high school diploma, 2x2 ID photos, and medical certificate."},
+            {"id": 10, "question": "Can I transfer from another school?", "answer": "Yes, we accept transferees. You need to submit your transcript of records, honorable dismissal, and other required documents. Some credits may be credited depending on the program."}
+        ]
+        if self.faqs:
+            # Compute and normalize embeddings for all questions
+            questions = [faq['question'] for faq in self.faqs]
+            embeddings = self.model.encode(questions, normalize_embeddings=True)
+            self.faq_embeddings = np.array(embeddings)
+    def save_unanswered_question(self, question):
+        """Log unanswered questions to console (can be extended to save to file)"""
+        print(f"Unanswered question logged: {question}")
+        # In a real implementation, you could save this to a file or send to an admin
+    def _tokenize(self, text: str):
+        if not text:
+            return []
+        return [t for t in re.findall(r"[a-z0-9]+", text.lower()) if len(t) > 2]
+    def _overlap_ratio(self, q_tokens, faq_tokens):
+        if not q_tokens or not faq_tokens:
+            return 0.0
+        q_set = set(q_tokens)
+        f_set = set(faq_tokens)
+        inter = len(q_set & f_set)
+        denom = max(len(q_set), 1)
+        return inter / denom
+    def _wh_class(self, text: str) -> str:
+        if not text:
+            return ''
+        s = text.strip().lower()
+        # simple heuristic classification by leading wh-word
+        for key in ['who', 'where', 'when', 'what', 'how', 'why', 'which']:
+            if s.startswith(key + ' ') or s.startswith(key + "?"):
+                return key
+        # also check presence if not leading
+        for key in ['who', 'where', 'when', 'what', 'how', 'why', 'which']:
+            if f' {key} ' in f' {s} ':
+                return key
+        return ''
+    def find_best_match(self, question: str, threshold: float = 0.7) -> Tuple[str, float]:
+        print(f"find_best_match called with: {question}")  # Debug print
+        if not self.faqs or self.faq_embeddings is None:
+            return "I'm sorry, I couldn't find any FAQs in the database.", 0.0
+        # Compute and normalize embedding for the input question
+        question_embedding = self.model.encode([question], normalize_embeddings=True)[0]
+        similarities = np.dot(self.faq_embeddings, question_embedding)
+        # Compute keyword overlap with each FAQ question
+        q_tokens = self._tokenize(question)
+        overlap_scores = []
+        for faq in self.faqs:
+            overlap_scores.append(self._overlap_ratio(q_tokens, self._tokenize(faq['question'])))
+        similarities = np.array(similarities)
+        overlap_scores = np.array(overlap_scores)
+        # Combined score to reduce false positives
+        combined = 0.7 * similarities + 0.3 * overlap_scores
+        # Apply WH-word intent consistency penalty
+        q_wh = self._wh_class(question)
+        if q_wh:
+            for i, faq in enumerate(self.faqs):
+                f_wh = self._wh_class(faq['question'])
+                if f_wh and f_wh != q_wh:
+                    combined[i] *= 0.6  # penalize mismatched intent significantly
+        best_idx = int(np.argmax(combined))
+        best_semantic = float(similarities[best_idx])
+        best_overlap = float(overlap_scores[best_idx])
+        best_combined = float(combined[best_idx])
+        best_wh = self._wh_class(self.faqs[best_idx]['question'])
+        # Acceptance criteria: require good semantic OR strong combined with overlap
+        accept = (
+            best_semantic >= max(0.7, threshold)
+            or (best_combined >= threshold and best_overlap >= 0.3)
+        )
+        # Enforce WH intent match when present
+        if accept and q_wh and best_wh and q_wh != best_wh:
+            accept = False
+        if accept:
+            return self.faqs[best_idx]['answer'], best_combined
+        else:
+            # Log as unanswered so admins can curate (ignore errors)
+            try:
+                self.save_unanswered_question(question)
+            except Exception:
+                pass
+            fallback = (
+                "Sorry, I don’t have the knowledge to answer that yet.\n"
+                "I’ll notify an admin about your question and we’ll add the answer soon.\n"
+                "Please come back in a while."
+            )
+            return (fallback, best_combined)
+    def get_suggested_questions(self, question: str, num_suggestions: int = 3) -> List[str]:
+        """Get suggested questions based on the input question"""
+        if not self.faqs or self.faq_embeddings is None:
+            return []
+        # Compute and normalize embedding for the input question
+        question_embedding = self.model.encode([question], normalize_embeddings=True)[0]
+        # Calculate cosine similarity
+        similarities = np.dot(self.faq_embeddings, question_embedding)
+        # Get top N similar questions
+        top_indices = np.argsort(similarities)[-num_suggestions:][::-1]
+        return [self.faqs[idx]['question'] for idx in top_indices if similarities[idx] > 0.3]
+    def add_faq(self, question: str, answer: str) -> bool:
+        """Add a new FAQ to the static list (for demonstration purposes)"""
+        try:
+            new_id = max([faq['id'] for faq in self.faqs]) + 1 if self.faqs else 1
+            new_faq = {"id": new_id, "question": question, "answer": answer}
+            self.faqs.append(new_faq)
+            # Recompute embeddings
+            questions = [faq['question'] for faq in self.faqs]
+            embeddings = self.model.encode(questions, normalize_embeddings=True)
+            self.faq_embeddings = np.array(embeddings)
+            print(f"FAQ added: {question}")
+            return True
+        except Exception as e:
+            print(f"Error adding FAQ: {e}")
+            return False

database_recommender.py ADDED Viewed

	@@ -0,0 +1,293 @@

+import pandas as pd
+import numpy as np
+from sklearn.neighbors import KNeighborsClassifier
+from sklearn.preprocessing import LabelEncoder, StandardScaler
+import joblib
+import json
+class CourseRecommender:
+    def __init__(self):
+        self.model = None
+        self.label_encoders = {}
+        self.scaler = StandardScaler()
+        self.courses = self.get_courses()
+        self.training_data = self.get_training_data()
+        self.train_model()
+    def get_courses(self):
+        """Get static course data"""
+        return {
+            'BSCS': 'Bachelor of Science in Computer Science',
+            'BSIT': 'Bachelor of Science in Information Technology',
+            'BSBA': 'Bachelor of Science in Business Administration',
+            'BSED': 'Bachelor of Science in Education',
+            'BSN': 'Bachelor of Science in Nursing',
+            'BSArch': 'Bachelor of Science in Architecture',
+            'BSIE': 'Bachelor of Science in Industrial Engineering',
+            'BSHM': 'Bachelor of Science in Hospitality Management',
+            'BSA': 'Bachelor of Science in Accountancy',
+            'BSPsych': 'Bachelor of Science in Psychology',
+            'BSAgri': 'Bachelor of Science in Agriculture'
+        }
+    def save_student_data(self, stanine, gwa, strand, course, rating, hobbies=None):
+        """Save student feedback to in-memory storage (for demonstration purposes)"""
+        try:
+            # In a real implementation, you could save this to a file or external storage
+            print(f"Student feedback saved: Stanine={stanine}, GWA={gwa}, Strand={strand}, Course={course}, Rating={rating}, Hobbies={hobbies}")
+            return True
+        except Exception as e:
+            print(f"Error saving student feedback: {e}")
+            return False
+    def get_training_data(self):
+        """Get static training data for demonstration purposes"""
+        # Sample training data to demonstrate the recommender system
+        training_data = [
+            # STEM students
+            (8, 95, 'STEM', 'BSCS', 5, 'programming, gaming, technology'),
+            (7, 90, 'STEM', 'BSIT', 4, 'computers, software, coding'),
+            (9, 98, 'STEM', 'BSCS', 5, 'programming, algorithms, math'),
+            (6, 85, 'STEM', 'BSIT', 3, 'technology, computers'),
+            (8, 92, 'STEM', 'BSArch', 4, 'design, drawing, creativity'),
+            (7, 88, 'STEM', 'BSIE', 4, 'engineering, problem solving'),
+            # ABM students
+            (8, 90, 'ABM', 'BSBA', 5, 'business, management, leadership'),
+            (7, 85, 'ABM', 'BSA', 4, 'accounting, numbers, finance'),
+            (6, 82, 'ABM', 'BSBA', 3, 'business, marketing'),
+            (9, 95, 'ABM', 'BSA', 5, 'accounting, finance, analysis'),
+            # HUMSS students
+            (8, 88, 'HUMSS', 'BSED', 5, 'teaching, helping, education'),
+            (7, 85, 'HUMSS', 'BSPsych', 4, 'psychology, helping, people'),
+            (6, 80, 'HUMSS', 'BSED', 3, 'teaching, children'),
+            (9, 92, 'HUMSS', 'BSPsych', 5, 'psychology, counseling, people'),
+            # General interests
+            (7, 87, 'STEM', 'BSN', 4, 'helping, healthcare, caring'),
+            (8, 89, 'ABM', 'BSHM', 4, 'hospitality, service, management'),
+            (6, 83, 'HUMSS', 'BSAgri', 3, 'agriculture, environment, nature'),
+        ]
+        return pd.DataFrame(training_data, columns=['stanine', 'gwa', 'strand', 'course', 'rating', 'hobbies'])
+    def train_model(self):
+        """Train the recommendation model using the training data"""
+        try:
+            training_data = self.get_training_data()
+            if training_data.empty:
+                print("No training data available - using default recommendations")
+                return
+            # Prepare features (hobbies required)
+            feature_columns = ['stanine', 'gwa', 'strand', 'hobbies']
+            # Create feature matrix
+            X = training_data[feature_columns].copy()
+            y = training_data['course']
+            # Handle categorical variables
+            categorical_columns = ['strand', 'hobbies']
+            # Refit encoders every training to incorporate new categories
+            for col in categorical_columns:
+                if col in X.columns:
+                    X[col] = X[col].fillna('unknown')
+                    self.label_encoders[col] = LabelEncoder()
+                    X[col] = self.label_encoders[col].fit_transform(X[col])
+            # Scale numerical features
+            numerical_columns = ['stanine', 'gwa']
+            if not X[numerical_columns].empty:
+                X[numerical_columns] = self.scaler.fit_transform(X[numerical_columns])
+            # Train KNN model
+            self.model = KNeighborsClassifier(n_neighbors=3, weights='distance')
+            self.model.fit(X, y)
+            print("✅ Model trained successfully (hobbies required and encoded)")
+        except Exception as e:
+            print(f"Error training model: {e}")
+            self.model = None
+    def get_default_recommendations(self, stanine, gwa, strand):
+        """Provide default recommendations based on basic rules when no training data is available"""
+        courses = self.courses
+        recommendations = []
+        # Basic rules for recommendations
+        if strand == 'STEM':
+            if stanine >= 8 and gwa >= 90:
+                priority_courses = ['BSCS', 'BSIT']
+            else:
+                priority_courses = ['BSIT', 'BSCS']
+        elif strand == 'ABM':
+            priority_courses = ['BSBA']
+        elif strand == 'HUMSS':
+            priority_courses = ['BSED']
+        else:
+            priority_courses = list(courses.keys())
+        # Add courses with default probabilities
+        for i, course in enumerate(priority_courses[:2]):  # Only take top 2
+            if course in courses:
+                recommendations.append({
+                    'code': course,
+                    'name': courses[course],
+                    'probability': 1.0 - (i * 0.2)  # Decreasing probability for each course
+                })
+        return recommendations
+    def recommend_courses(self, stanine, gwa, strand, hobbies=None, top_n=5):
+        """Recommend courses based on student profile (hobbies required)"""
+        try:
+            if self.model is None:
+                return self.get_default_recommendations(stanine, gwa, strand)
+            # Prepare input features
+            input_data = pd.DataFrame([{
+                'stanine': stanine,
+                'gwa': gwa,
+                'strand': strand,
+                'hobbies': (hobbies or '').strip()
+            }])
+            # Validate hobbies
+            if not input_data['hobbies'].iloc[0]:
+                raise ValueError('hobbies is required for recommendations')
+            # Encode categorical variables
+            for col in ['strand', 'hobbies']:
+                if col in input_data.columns and col in self.label_encoders:
+                    value = input_data[col].iloc[0]
+                    if value not in self.label_encoders[col].classes_:
+                        # Extend encoder classes to include unseen value at inference
+                        self.label_encoders[col].classes_ = np.append(self.label_encoders[col].classes_, value)
+                    input_data[col] = self.label_encoders[col].transform(input_data[col])
+            # Scale numerical features
+            numerical_columns = ['stanine', 'gwa']
+            if not input_data[numerical_columns].empty:
+                input_data[numerical_columns] = self.scaler.transform(input_data[numerical_columns])
+            # Get predictions
+            predictions = self.model.predict_proba(input_data)
+            courses = self.model.classes_
+            # Get top recommendations
+            top_indices = np.argsort(predictions[0])[-top_n:][::-1]
+            recommendations = []
+            course_map = self.courses
+            for idx in top_indices:
+                code = courses[idx]
+                confidence = predictions[0][idx]
+                recommendations.append({
+                    'code': code,
+                    'name': course_map.get(code, code),
+                    'rating': round(confidence * 100, 1)
+                })
+            return recommendations
+        except Exception as e:
+            print(f"Error recommending courses: {e}")
+            return self.get_default_recommendations(stanine, gwa, strand)
+    def _get_recommendation_reason(self, course, stanine, gwa, strand, hobbies, interests, personality_type, learning_style, career_goals):
+        """Generate personalized reason for recommendation"""
+        reasons = []
+        # Academic performance reasons
+        if stanine >= 8:
+            reasons.append("Excellent academic performance")
+        elif stanine >= 6:
+            reasons.append("Good academic foundation")
+        if gwa >= 85:
+            reasons.append("High academic achievement")
+        elif gwa >= 80:
+            reasons.append("Strong academic record")
+        # Strand alignment
+        if strand == "STEM" and course in ["BSCS", "BSIT", "BSArch", "BSIE", "BSN"]:
+            reasons.append("Perfect match with your STEM background")
+        elif strand == "ABM" and course in ["BSBA", "BSA"]:
+            reasons.append("Excellent alignment with your ABM strand")
+        elif strand == "HUMSS" and course in ["BSED", "BSPsych"]:
+            reasons.append("Great fit with your HUMSS background")
+        # Hobbies and interests alignment
+        if hobbies and any(hobby in hobbies.lower() for hobby in ["gaming", "programming", "technology", "computers"]):
+            if course in ["BSCS", "BSIT"]:
+                reasons.append("Matches your technology interests")
+        if hobbies and any(hobby in hobbies.lower() for hobby in ["business", "leadership", "management"]):
+            if course in ["BSBA", "BSA"]:
+                reasons.append("Aligns with your business interests")
+        if hobbies and any(hobby in hobbies.lower() for hobby in ["helping", "teaching", "caring"]):
+            if course in ["BSED", "BSN", "BSPsych"]:
+                reasons.append("Perfect for your helping nature")
+        # Personality type alignment
+        if personality_type == "introvert" and course in ["BSCS", "BSA", "BSArch"]:
+            reasons.append("Suits your introverted personality")
+        elif personality_type == "extrovert" and course in ["BSBA", "BSED", "BSHM"]:
+            reasons.append("Great for your outgoing personality")
+        # Learning style alignment
+        if learning_style == "hands-on" and course in ["BSIT", "BSHM", "BSAgri"]:
+            reasons.append("Matches your hands-on learning preference")
+        elif learning_style == "visual" and course in ["BSArch", "BSCS"]:
+            reasons.append("Perfect for your visual learning style")
+        # Career goals alignment
+        if career_goals and any(goal in career_goals.lower() for goal in ["developer", "programmer", "software"]):
+            if course in ["BSCS", "BSIT"]:
+                reasons.append("Direct path to your career goals")
+        if career_goals and any(goal in career_goals.lower() for goal in ["business", "entrepreneur", "manager"]):
+            if course in ["BSBA", "BSA"]:
+                reasons.append("Direct path to your business goals")
+        # Default reason if no specific matches
+        if not reasons:
+            reasons.append("Good academic and personal fit")
+        return " • ".join(reasons[:3])  # Limit to top 3 reasons
+    def save_model(self, model_path='course_recommender_model.joblib'):
+        """Save the trained model"""
+        if self.model is None:
+            raise Exception("No model to save!")
+        model_data = {
+            'model': self.model,
+            'scaler': self.scaler,
+            'label_encoders': self.label_encoders
+        }
+        joblib.dump(model_data, model_path)
+    def load_model(self, model_path='course_recommender_model.joblib'):
+        """Load a trained model"""
+        model_data = joblib.load(model_path)
+        self.model = model_data['model']
+        self.scaler = model_data['scaler']
+        self.label_encoders = model_data['label_encoders']
+# Example usage
+if __name__ == "__main__":
+    recommender = CourseRecommender()
+    # Example recommendation
+    recommendations = recommender.recommend_courses(
+        stanine=8,
+        gwa=95,
+        strand='STEM',
+        hobbies='programming, gaming, technology'
+    )
+    print("Recommended courses:", json.dumps(recommendations, indent=2))

requirements.txt CHANGED Viewed

	@@ -1 +1,7 @@
1	- gradio~~>=4.42.0~~

+gradio
+numpy
+pandas
+scikit-learn
+joblib
+sentence-transformers
+torch