File size: 26,933 Bytes
42f2789
 
 
 
 
 
a45fe55
 
 
 
 
 
 
 
7b249d7
a45fe55
 
 
 
 
 
 
 
 
 
 
7b249d7
 
a45fe55
 
7b249d7
75e3fea
a45fe55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ef21a5c
a45fe55
 
ef21a5c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42f2789
a45fe55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42f2789
a45fe55
42f2789
a45fe55
 
 
 
 
 
7b249d7
 
a45fe55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7b249d7
a45fe55
7b249d7
a45fe55
 
 
7b249d7
a45fe55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7b249d7
a45fe55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7b249d7
a45fe55
 
 
 
 
 
 
 
50fd2f5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
# --- Set Cache Folders for Hugging Face Environment ---
# THIS IS THE CRUCIAL FIX for the PermissionError. It must be at the top.
import os
os.environ["TRANSFORMERS_CACHE"] = "/tmp/transformers"
os.environ["HF_HOME"] = "/tmp/huggingface"

import json
import re
import logging
import math
import docx
import fitz  # PyMuPDF
from dotenv import load_dotenv
import google.generativeai as genai
from flask import Flask, render_template, request, jsonify
from sentence_transformers import SentenceTransformer, util
from rapidfuzz import fuzz, process
from urllib.parse import quote_plus

# --- Load environment variables and configure Gemini API ---
load_dotenv()
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
genai.configure(api_key=GEMINI_API_KEY)

app = Flask(__name__)

# --- Setup Loggers ---
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S')

# --- Load Model & Data ---
model = SentenceTransformer("sentence-transformers/paraphrase-albert-small-v2")

def load_json(filename):
    try:
        with open(filename, "r", encoding="utf-8") as f:
            return json.load(f)
    except Exception as e:
        logging.error(f"Error loading {filename}: {e}")
        return [] if filename != "pincodes.json" else {}

COURSE_DATA = load_json("careers.json")
COLLEGE_DATA = load_json("colleges.json")
PINCODE_DATA = load_json("pincodes.json")

ALL_TAGS = set()
for course in COURSE_DATA:
    tags_obj = course.get("tags", {})
    if isinstance(tags_obj, dict):
        for category_tags in tags_obj.values(): ALL_TAGS.update(category_tags)
ALL_TAGS.update(["theory", "research", "practical"])

# --- Constants & Dictionaries ---
QUESTIONS = { "stream": "What was your academic stream after 10th?", "subject_strengths": "Which subjects do you feel strongest in?", "subject_weaknesses": "Which subjects do you find most difficult?", "learning_style": "Do you learn better through practical work or theory/research?", "work_environment": "What kind of work environment do you prefer? (e.g., an office, a lab, outdoors, a workshop)", "team_preference": "Do you prefer working alone or collaboratively?", "interest_activities": "Outside academics, what hobbies do you enjoy?", "general_interests": "What topics or fields are you generally curious about?", "primary_driver": "What motivates your future most? (e.g., money, creativity, helping people, innovation, stability)" }
STOP_WORDS = {"a", "an", "and", "the", "in", "on", "for", "with", "i", "my", "is", "are", "like", "to", "of"}
TRAIT_KEYWORDS = { "analytical": ["math", "physics", "science", "data", "logic", "puzzles", "engineering", "theory", "research"], "creative": ["art", "design", "music", "writing", "media", "film", "painting"], "social": ["helping", "teaching", "volunteering", "communication", "people", "society", "healthcare", "environment"], "structured": ["commerce", "law", "management", "finance", "corporate", "office"], "hands_on": ["practical", "projects", "sports", "repair", "construction", "biology"], "collaborative": ["team", "teamwork", "collaboration", "people", "social"], "independent": ["alone", "independent", "self-directed", "focus", "quiet"], "field_work": ["outdoors", "on-the-move", "travel", "construction", "farming"], "lab_work": ["lab", "research", "science", "biotech", "forensic"] }

# --- Helper Functions ---
def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'[.&,]', '', text)
    text = re.sub(r'\b(in|and)\b', '', text)
    return re.sub(r'\s+', ' ', text).strip()

def normalize_word(word):
    if not ALL_TAGS: return word
    best, score, _ = process.extractOne(word, ALL_TAGS, scorer=fuzz.ratio)
    return best if score >= 85 else word

def parse_input(text):
    words = re.split(r"[,\s]+", text.lower())
    return {normalize_word(word) for word in words if word and word not in STOP_WORDS}

def build_user_profile(answers):
    profile = {key: parse_input(value) for key, value in answers.items() if value}
    all_keywords = set().union(*profile.values()) if profile else set()
    persona = {trait for trait, kws in TRAIT_KEYWORDS.items() if not all_keywords.isdisjoint(kws)}
    return profile, persona

def calculate_distance(pin1, pin2, pincode_data):
    if pin1 not in pincode_data or pin2 not in pincode_data: return float('inf')
    lat1, lon1 = pincode_data[pin1]['lat'], pincode_data[pin1]['lon']
    lat2, lon2 = pincode_data[pin2]['lat'], pincode_data[pin2]['lon']
    R, dLat, dLon = 6371, math.radians(lat2 - lat1), math.radians(lon2 - lon1)
    a = math.sin(dLat / 2)**2 + math.cos(math.radians(lat1)) * math.cos(math.radians(lat2)) * math.sin(dLon / 2)**2
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
    return R * c

def find_nearby_colleges(course_name, user_pincode):
    if user_pincode not in PINCODE_DATA:
        return "<div class='college-card error-card'>Sorry, I don't have location data for that PIN code.</div>", []
    colleges_with_course = []
    for college in COLLEGE_DATA:
        for offered_course in college.get('courses_offered', []):
            if course_name.lower() == offered_course.lower():
                colleges_with_course.append(college)
                break
    if not colleges_with_course:
        return f"<div class='college-card error-card'>I couldn't find any colleges in my database offering <b>{course_name}</b>.</div>", []
    nearby_colleges = sorted([(c, calculate_distance(user_pincode, c['pincode'], PINCODE_DATA)) for c in colleges_with_course], key=lambda x: x[1])
    response_html = "<div class='college-card primary-card'>"
    response_html += f"<h4>🎯 Top Matches for {course_name} near {user_pincode}</h4><ul>"
    colleges_found = 0
    for college, dist in nearby_colleges:
        if dist <= 150:
            colleges_found += 1
            encoded_name = quote_plus(college['name'])
            response_html += f"<li><a href='https://www.google.com/search?q={encoded_name}' target='_blank'><b>{college['name']}</b></a> ({college['pincode']})<br><small>Approx. {dist:.0f} km away</small></li>"
    if colleges_found == 0:
        return f"<div class='college-card error-card'>I couldn't find any colleges offering <b>{course_name}</b> within a 150km radius of your PIN code.</div>", []
    response_html += "</ul></div>"
    return response_html, nearby_colleges

def format_course_details(course):
    details = "<div class='details-card'>"
    details += f"<h3>πŸŽ“ {course.get('course', 'N/A')}</h3><p>{course.get('description', '')}</p>"
    careers = course.get('possible_careers', [])
    if careers: details += "<b>πŸ’Ό Potential Career Paths:</b><ul>" + "".join(f"<li>{c}</li>" for c in careers) + "</ul>"
    education = course.get('required_education', '')
    if education: details += f"<p>βœ… <b>Entry Requirements:</b> {education}</p>"
    related = course.get('related_courses', [])
    if related: details += "<b>πŸ“š Key Subjects You'll Study:</b><ul>" + "".join(f"<li>{s}</li>" for s in related) + "</ul>"
    details += "</div>"
    return details

def format_comparison(courses):
    if not courses: return "<div class='details-card'><p>I couldn't find any valid courses to compare. Please check the names and try again.</p></div>"
    table_style = "width:100%;border-collapse:collapse;text-align:left;"
    th_style = "border-bottom:2px solid #dee2e6;padding:12px;font-size:1rem;"
    td_style = "border-bottom:1px solid #dee2e6;padding:12px;vertical-align:top;"
    html = f"<div class='details-card'><table style='{table_style}'><thead><tr><th style='{th_style}'>Feature</th>"
    for course in courses:
        course_name = course.get('course')
        html += f"<th style='{th_style}'><div class='clickable-card' data-action='quick_reply' data-value='{course_name}' style='padding:0; margin:0; text-align:left;'>{course_name}</div></th>"
    html += "</tr></thead><tbody>"
    html += f"<tr><td style='{td_style}'><b>πŸ’Ό Careers</b></td>"
    for course in courses: html += f"<td style='{td_style}'>{', '.join(course.get('possible_careers', ['N/A']))}</td>"
    html += "</tr><tr><td style='{td_style}'><b>βœ… Requirements</b></td>"
    for course in courses: html += f"<td style='{td_style}'>{course.get('required_education', 'N/A')}</td>"
    html += "</tr><tr><td style='{td_style}'><b>πŸ“š Key Subjects</b></td>"
    for course in courses: html += f"<td style='{td_style}'>{', '.join(course.get('related_courses', ['N/A']))}</td>"
    html += "</tr><tr><td style='{td_style}'><b>πŸ› οΈ Core Skills</b></td>"
    for course in courses:
        skills = course.get('tags', {}).get('skills', ['N/A'])
        html += f"<td style='{td_style}'>{', '.join(s.capitalize() for s in skills)}</td>"
    html += "</tr></tbody></table>"
    html += "<p style='font-size:0.85rem; text-align:center; margin-top:1rem; opacity:0.8;'>You can click on a course title in the table above for a detailed view.</p></div>"
    return html

def get_recommendations(answers, courses):
    profile, persona = build_user_profile(answers)
    if not any(profile.values()): return "", []
    user_profile_text = " ".join(set().union(*profile.values()))
    user_emb = model.encode(user_profile_text, convert_to_tensor=True) if user_profile_text.strip() else None
    if user_emb is None: return "", []
    scored_courses = []
    for course in courses:
        tags_obj = course.get("tags", {})
        course_tags = set().union(*tags_obj.values()) if isinstance(tags_obj, dict) else set()
        rich_course_text = f"{course.get('course', '')} {course.get('description', '')} {' '.join(course.get('possible_careers', []))} {' '.join(course_tags)}"
        course_emb = model.encode(rich_course_text, convert_to_tensor=True)
        semantic_score = float(util.cos_sim(user_emb, course_emb)[0][0])
        heuristic_score = sum([0.3 if profile.get("general_interests", set()).intersection(course_tags) else 0, 0.15 if profile.get("subject_strengths", set()).intersection(course_tags) else 0, 0.1 if profile.get("stream", set()).intersection(course_tags) else 0, 0.15 if persona.intersection(course_tags) else 0])
        penalty_score = sum([len(profile.get("subject_weaknesses", set()).intersection(course_tags)) * 0.2, 0.15 if persona.intersection(course.get("anti_tags", [])) else 0])
        final_score = (semantic_score + heuristic_score - penalty_score) * 100
        if final_score > 20: scored_courses.append((final_score, course))
    scored_courses.sort(key=lambda x: x[0], reverse=True)
    top_courses = scored_courses[:3]
    raw_recs = [course for _, course in top_courses]
    if not raw_recs: return "πŸ€” I couldn’t find a strong match. Would you like to try again?", []

    response_html = "<div class='recommendation-container'><h4>πŸš€ Here are my top recommendations for you:</h4>"
    for i, (_, course_data) in enumerate(top_courses):
        course_name = course_data.get('course')
        description = course_data.get('description', '')
        skills = course_data.get('tags', {}).get('skills', [])[:3]
        careers = course_data.get('possible_careers', [])[:3]
        
        response_html += f"<div class='recommendation-card clickable-card' data-action='details' data-value='{i+1}'>"
        response_html += f"<p style='margin-bottom: 1rem;'><b>{i+1}. {course_name}</b><br>{description}</p>"
        
        response_html += "<div style='font-size: 0.9rem; display: flex; flex-direction: column; gap: 0.5rem;'>"
        if skills:
            response_html += f"<div><b>πŸ› οΈ Key Skills:</b> {', '.join(s.capitalize() for s in skills)}</div>"
        if careers:
            response_html += f"<div><b>πŸ’Ό Potential Careers:</b> {', '.join(careers)}</div>"
        response_html += "</div>"
        
        response_html += "</div>"
    
    response_html += f"<div class='recommendation-card clickable-card compare-card' data-action='compare' data-value='compare'><p><b>βš–οΈ Compare Courses</b></p></div></div>"
    return response_html, raw_recs

def next_question(answers):
    for key, q in QUESTIONS.items():
        if key not in answers: return key, q
    return None, None

def extract_text_from_file(file):
    text = ""
    filename = file.filename.lower()
    if filename.endswith('.pdf'):
        pdf_document = fitz.open(stream=file.read(), filetype="pdf")
        for page in pdf_document:
            text += page.get_text()
        pdf_document.close()
    elif filename.endswith('.docx'):
        doc = docx.Document(file)
        for para in doc.paragraphs:
            text += para.text + "\n"
    return text

def analyze_resume_and_suggest_jobs(resume_text):
    if not GEMINI_API_KEY:
        return "<div class='college-card error-card'>Error: Gemini API key is not configured.</div>"
    prompt = f"""
    You are an expert career coach. Analyze the following resume text.
    Your response must be a single JSON object with four keys:
    1. "person_name": A string containing the full name of the candidate found in the resume. If no name is clear, return an empty string.
    2. "overall_score": An integer score out of 100 for the resume's quality.
    3. "summary": A brief, encouraging 1-2 sentence summary of the resume.
    4. "job_titles": A list of 3-5 specific job titles the candidate is well-suited for based on their skills and experience.

    Do not add any text before or after the JSON object.

    Resume Text to analyze:
    ---
    {resume_text}
    ---
    """
    try:
        model_gen = genai.GenerativeModel('gemini-1.5-pro-latest')
        response = model_gen.generate_content(prompt)
        json_match = re.search(r'\{.*\}', response.text, re.DOTALL)
        if not json_match:
            raise ValueError("Invalid JSON response from API")
        
        feedback = json.loads(json_match.group(0))
        name = feedback.get('person_name', '').strip()
        
        html_response = "<div class='details-card'>"
        if name:
            html_response += f"<h3>πŸ“ Resume Analysis for {name}</h3>"
        else:
            html_response += f"<h3>πŸ“ Resume Analysis</h3>"
        html_response += f"<p><b>Overall Score:</b> {feedback.get('overall_score', 'N/A')}/100</p>"
        html_response += f"<p><b>Summary:</b> <i>{feedback.get('summary', '')}</i></p>"
        html_response += "<b>πŸš€ Potential Job Roles:</b><ul>" + "".join(f"<li>{title}</li>" for title in feedback.get('job_titles', [])) + "</ul>"
        html_response += "</div>"
        return html_response
    except Exception as e:
        print(f"\n---!!! GEMINI API ERROR !!!---\n{e}\n-----------------------------\n")
        logging.error(f"Gemini API Error: {e}")
        error_message = ("Sorry, the analysis failed. This is often an API key issue. "
                       "Please check the terminal where you ran `python app.py` for the specific error message.")
        return f"<div class='college-card error-card'>{error_message}</div>"

# --- Flask Routes ---
@app.route("/")
def index():
    return render_template("index.html")

@app.route("/upload_resume", methods=["POST"])
def upload_resume():
    if 'resume_file' not in request.files: return jsonify({"error": "No file part"}), 400
    file = request.files['resume_file']
    if file.filename == '': return jsonify({"error": "No selected file"}), 400
    if file and (file.filename.lower().endswith('.pdf') or file.filename.lower().endswith('.docx')):
        try:
            resume_text = extract_text_from_file(file)
            if not resume_text.strip(): return jsonify({"response": "<div class='college-card error-card'>The uploaded file seems to be empty.</div>"})
            feedback_html = analyze_resume_and_suggest_jobs(resume_text)
            return jsonify({"response": feedback_html})
        except Exception as e:
            logging.error(f"Resume Upload Error: {e}")
            return jsonify({"response": "<div class='college-card error-card'>Sorry, an error occurred while processing your file.</div>"})
    return jsonify({"error": "Invalid file type. Please upload a PDF or DOCX file."}), 400

@app.route("/chat", methods=["POST"])
def chat():
    data = request.get_json()
    msg = data.get("message", "").strip()
    convo = data.get("conversation", {})
    bot_response = ""
    if not convo:
        convo = {"state": "awaiting_initial_action", "answers": {}}
        bot_response = "Welcome to CareerPal! You can type `start` to begin a personalized guidance session, or select a specific tool from the panel on the left."
        logging.info("--- NEW SESSION INITIALIZED ---")
        return jsonify({"response": bot_response, "conversation": convo})

    current_state = convo.get("state", "awaiting_initial_action")
    msg_lower = msg.lower()
    logging.info(f"STATE: {current_state} | USER: {msg}")

    feature_commands = ["personalized guidance", "compare courses", "college location finder", "resume analyser"]
    if msg_lower in feature_commands:
        current_state = "awaiting_initial_action"

    if current_state == "awaiting_initial_action":
        if msg_lower == "start" or msg_lower == "personalized guidance":
            convo["state"] = "asking_questions"
            convo["answers"] = {}
            key, question = next_question(convo["answers"])
            bot_response = f"Great, let's find your perfect career path! I'll ask a few questions to get started.<br><br>{question}"
        elif msg_lower == "compare courses":
            convo["state"] = "awaiting_compare_confirmation"
            bot_response = "Do you have specific courses in mind to compare?<div class='quick-reply-container'><div class='quick-reply-button clickable-card' data-action='quick_reply' data-value='Yes'>Yes</div><div class='quick-reply-button clickable-card' data-action='quick_reply' data-value='No'>No</div></div>"
        elif msg_lower == "college location finder":
            convo["state"] = "awaiting_course_for_college_search"
            bot_response = "Happy to help you find colleges! What is the name of the course you're interested in?"
        elif msg_lower == "resume analyser":
            convo["state"] = "awaiting_resume_upload"
            bot_response = "Great! Please upload your resume (PDF or DOCX format) using the upload button below."
        elif msg_lower == 'end chat':
            bot_response = "Sure. Would you like to leave some feedback about your experience?<div class='quick-reply-container'><div class='quick-reply-button clickable-card' data-action='quick_reply' data-value='Yes'>πŸ‘ Yes</div><div class='quick-reply-button clickable-card' data-action='quick_reply' data-value='No'>πŸ‘Ž No</div></div>"
            convo['state'] = 'awaiting_end_confirmation'
        else:
            bot_response = "Sorry, I didn't understand. You can type `start` or select a feature from the panel."
    
    elif current_state == "asking_questions":
        last_key, _ = next_question(convo["answers"])
        if last_key:
            convo["answers"][last_key] = msg
            parsed_tags = parse_input(msg)
            cleaned = ", ".join(tag.capitalize() for tag in parsed_tags) or msg
            if last_key == "subject_weaknesses": bot_response = f"πŸ‘Œ Got it β€” I’ll stay away from careers heavy in {cleaned}. "
            elif last_key == "subject_strengths": bot_response = f"πŸ”₯ Nice! Being strong in {cleaned} is a great asset. "
            elif last_key == "interest_activities": bot_response = f"😎 Cool! Enjoying {cleaned} gives me clues about your personality. "
            elif last_key == "general_interests": bot_response = f"πŸ‘ That's insightful! An interest in {cleaned} helps narrow down the options. "
            else: bot_response = "βœ… Okay, noted. "
        next_key, next_q = next_question(convo["answers"])
        if next_q:
            bot_response += next_q
        else:
            bot_response, recs = get_recommendations(convo["answers"], COURSE_DATA)
            if recs:
                convo["last_recommendations"] = recs
                convo["state"] = "awaiting_more_details"
                bot_response += "<br>Click a course for more details, compare, or end the session."
                bot_response += "<div class='quick-reply-container'><div class='quick-reply-button clickable-card' data-action='quick_reply' data-value='End Chat'>πŸšͺ End Chat</div></div>"
            else:
                convo["state"] = "awaiting_initial_action"

    elif current_state == "awaiting_compare_confirmation":
        if msg_lower == 'yes':
            convo["state"] = "awaiting_course_names_for_compare"
            bot_response = "Please enter up to 3 course names, separated by commas."
        else:
            convo["state"] = "asking_questions"
            convo["answers"] = {}
            key, question = next_question(convo["answers"])
            bot_response = f"No problem! Let's find some courses for you first.<br><br>{question}"
    
    elif current_state == "awaiting_course_names_for_compare":
        user_courses = [name.strip() for name in msg_lower.split(',')[:3]]
        matched_courses = []
        course_titles = [c['course'] for c in COURSE_DATA]
        for user_course in user_courses:
            best_match, score, _ = process.extractOne(user_course, course_titles, scorer=fuzz.token_set_ratio, processor=preprocess_text)
            if score > 85:
                matched_courses.append(next(c for c in COURSE_DATA if c['course'] == best_match))
        
        bot_response = format_comparison(matched_courses)
        convo["last_recommendations"] = matched_courses
        convo["state"] = "awaiting_more_details"
        bot_response += "<div class='quick-reply-container'><div class='quick-reply-button clickable-card' data-action='quick_reply' data-value='End Chat'>πŸšͺ End Chat</div></div>"

    elif current_state == "awaiting_course_for_college_search":
        course_titles = [c['course'] for c in COURSE_DATA]
        best_match, score, _ = process.extractOne(msg_lower, course_titles, scorer=fuzz.token_set_ratio, processor=preprocess_text)
        
        if score > 85:
            convo["course_for_college_search"] = best_match
            convo["state"] = "awaiting_pincode"
            bot_response = f"Okay, searching for colleges offering '<b>{best_match}</b>'. Please provide your 6-digit area PIN code."
        else:
            bot_response = "I couldn't find a clear match for that course. Could you please try rephrasing or be more specific?"
            convo["state"] = "awaiting_course_for_college_search"

    elif current_state == "awaiting_pincode":
        if re.match(r"^\d{6}$", msg):
            course_name = convo.get("course_for_college_search", "this course")
            bot_response, _ = find_nearby_colleges(course_name, msg)
            bot_response += "<div class='quick-reply-container'><div class='quick-reply-button clickable-card' data-action='quick_reply' data-value='College Location Finder'>πŸ”Ž Search Again</div><div class='quick-reply-button clickable-card' data-action='quick_reply' data-value='End Chat'>πŸšͺ End Chat</div></div>"
            convo["state"] = "awaiting_initial_action"
        else:
            bot_response = "That doesn't seem like a valid 6-digit PIN code. Please try again."

    elif current_state == "awaiting_more_details":
        recs = convo.get("last_recommendations", [])
        if msg_lower == 'end chat':
            bot_response = "Sure. Would you like to leave some feedback about your experience?<div class='quick-reply-container'><div class='quick-reply-button clickable-card' data-action='quick_reply' data-value='Yes'>πŸ‘ Yes</div><div class='quick-reply-button clickable-card' data-action='quick_reply' data-value='No'>πŸ‘Ž No</div></div>"
            convo['state'] = 'awaiting_end_confirmation'
        elif msg_lower == 'compare':
            bot_response = format_comparison(recs)
            bot_response += "<div class='quick-reply-container'><div class='quick-reply-button clickable-card' data-action='quick_reply' data-value='End Chat'>πŸšͺ End Chat</div></div>"
        else:
            chosen_course = None
            if msg.isdigit() and 1 <= int(msg) <= len(recs):
                chosen_course = recs[int(msg) - 1]
            else:
                 course_titles = [r.get('course', '') for r in recs]
                 best_match, score, _ = process.extractOne(msg, course_titles, scorer=fuzz.ratio)
                 if score > 70: chosen_course = next((r for r in recs if r.get('course') == best_match), None)
            
            if chosen_course:
                bot_response = format_course_details(chosen_course)
                convo["course_for_college_search"] = chosen_course.get('course')
                bot_response += "<br><br>Would you like to find nearby colleges for this course?<div class='quick-reply-container'><div class='quick-reply-button clickable-card' data-action='quick_reply' data-value='Yes'>πŸ‘ Yes</div><div class='quick-reply-button clickable-card' data-action='quick_reply' data-value='No'>πŸ‘Ž No</div></div>"
                convo['state'] = 'awaiting_college_search_confirmation'
            else:
                bot_response = "Sorry, I didn't recognize that selection. Please choose an option from your recommendations."
    
    elif current_state == 'awaiting_college_search_confirmation':
        if 'yes' in msg_lower:
            bot_response = "Great! Please provide your 6-digit PIN code."
            convo['state'] = 'awaiting_pincode'
        else:
            bot_response = "No problem. You can explore other recommendations, compare courses, or select a new feature from the left panel."
            bot_response += "<div class='quick-reply-container'><div class='quick-reply-button clickable-card' data-action='quick_reply' data-value='End Chat'>πŸšͺ End Chat</div></div>"
            convo['state'] = 'awaiting_more_details'

    elif current_state == 'awaiting_end_confirmation':
        if 'yes' in msg_lower:
            bot_response = "I'd love to hear your thoughts. How was your experience?"
            convo['state'] = 'awaiting_feedback'
        else:
            bot_response = "No problem! It was great helping you."
            bot_response += "<div class='quick-reply-container'><div class='quick-reply-button clickable-card' data-action='restart'>πŸ”„ Start Over</div></div>"
            convo['state'] = 'session_ended'

    elif current_state == 'awaiting_feedback':
        logging.info(f"FEEDBACK: {msg}")
        bot_response = "Thank you for your feedback!"
        bot_response += "<div class='quick-reply-container'><div class='quick-reply-button clickable-card' data-action='restart'>πŸ”„ Start Over</div></div>"
        convo['state'] = 'session_ended'

    logging.info(f"BOT: {re.sub('<[^<]+?>', ' ', bot_response).strip()}")
    return jsonify({"response": bot_response, "conversation": convo})

if __name__ == "__main__":
    app.run(debug=True)