CareerPal / app.py
SpanDone's picture
Fixed Major Issues
42f2789
# --- Set Cache Folders for Hugging Face Environment ---
# THIS IS THE CRUCIAL FIX for the PermissionError. It must be at the top.
import os
os.environ["TRANSFORMERS_CACHE"] = "/tmp/transformers"
os.environ["HF_HOME"] = "/tmp/huggingface"
import json
import re
import logging
import math
import docx
import fitz # PyMuPDF
from dotenv import load_dotenv
import google.generativeai as genai
from flask import Flask, render_template, request, jsonify
from sentence_transformers import SentenceTransformer, util
from rapidfuzz import fuzz, process
from urllib.parse import quote_plus
# --- Load environment variables and configure Gemini API ---
load_dotenv()
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
genai.configure(api_key=GEMINI_API_KEY)
app = Flask(__name__)
# --- Setup Loggers ---
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
# --- Load Model & Data ---
model = SentenceTransformer("sentence-transformers/paraphrase-albert-small-v2")
def load_json(filename):
try:
with open(filename, "r", encoding="utf-8") as f:
return json.load(f)
except Exception as e:
logging.error(f"Error loading {filename}: {e}")
return [] if filename != "pincodes.json" else {}
COURSE_DATA = load_json("careers.json")
COLLEGE_DATA = load_json("colleges.json")
PINCODE_DATA = load_json("pincodes.json")
ALL_TAGS = set()
for course in COURSE_DATA:
tags_obj = course.get("tags", {})
if isinstance(tags_obj, dict):
for category_tags in tags_obj.values(): ALL_TAGS.update(category_tags)
ALL_TAGS.update(["theory", "research", "practical"])
# --- Constants & Dictionaries ---
QUESTIONS = { "stream": "What was your academic stream after 10th?", "subject_strengths": "Which subjects do you feel strongest in?", "subject_weaknesses": "Which subjects do you find most difficult?", "learning_style": "Do you learn better through practical work or theory/research?", "work_environment": "What kind of work environment do you prefer? (e.g., an office, a lab, outdoors, a workshop)", "team_preference": "Do you prefer working alone or collaboratively?", "interest_activities": "Outside academics, what hobbies do you enjoy?", "general_interests": "What topics or fields are you generally curious about?", "primary_driver": "What motivates your future most? (e.g., money, creativity, helping people, innovation, stability)" }
STOP_WORDS = {"a", "an", "and", "the", "in", "on", "for", "with", "i", "my", "is", "are", "like", "to", "of"}
TRAIT_KEYWORDS = { "analytical": ["math", "physics", "science", "data", "logic", "puzzles", "engineering", "theory", "research"], "creative": ["art", "design", "music", "writing", "media", "film", "painting"], "social": ["helping", "teaching", "volunteering", "communication", "people", "society", "healthcare", "environment"], "structured": ["commerce", "law", "management", "finance", "corporate", "office"], "hands_on": ["practical", "projects", "sports", "repair", "construction", "biology"], "collaborative": ["team", "teamwork", "collaboration", "people", "social"], "independent": ["alone", "independent", "self-directed", "focus", "quiet"], "field_work": ["outdoors", "on-the-move", "travel", "construction", "farming"], "lab_work": ["lab", "research", "science", "biotech", "forensic"] }
# --- Helper Functions ---
def preprocess_text(text):
text = text.lower()
text = re.sub(r'[.&,]', '', text)
text = re.sub(r'\b(in|and)\b', '', text)
return re.sub(r'\s+', ' ', text).strip()
def normalize_word(word):
if not ALL_TAGS: return word
best, score, _ = process.extractOne(word, ALL_TAGS, scorer=fuzz.ratio)
return best if score >= 85 else word
def parse_input(text):
words = re.split(r"[,\s]+", text.lower())
return {normalize_word(word) for word in words if word and word not in STOP_WORDS}
def build_user_profile(answers):
profile = {key: parse_input(value) for key, value in answers.items() if value}
all_keywords = set().union(*profile.values()) if profile else set()
persona = {trait for trait, kws in TRAIT_KEYWORDS.items() if not all_keywords.isdisjoint(kws)}
return profile, persona
def calculate_distance(pin1, pin2, pincode_data):
if pin1 not in pincode_data or pin2 not in pincode_data: return float('inf')
lat1, lon1 = pincode_data[pin1]['lat'], pincode_data[pin1]['lon']
lat2, lon2 = pincode_data[pin2]['lat'], pincode_data[pin2]['lon']
R, dLat, dLon = 6371, math.radians(lat2 - lat1), math.radians(lon2 - lon1)
a = math.sin(dLat / 2)**2 + math.cos(math.radians(lat1)) * math.cos(math.radians(lat2)) * math.sin(dLon / 2)**2
c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
return R * c
def find_nearby_colleges(course_name, user_pincode):
if user_pincode not in PINCODE_DATA:
return "<div class='college-card error-card'>Sorry, I don't have location data for that PIN code.</div>", []
colleges_with_course = []
for college in COLLEGE_DATA:
for offered_course in college.get('courses_offered', []):
if course_name.lower() == offered_course.lower():
colleges_with_course.append(college)
break
if not colleges_with_course:
return f"<div class='college-card error-card'>I couldn't find any colleges in my database offering <b>{course_name}</b>.</div>", []
nearby_colleges = sorted([(c, calculate_distance(user_pincode, c['pincode'], PINCODE_DATA)) for c in colleges_with_course], key=lambda x: x[1])
response_html = "<div class='college-card primary-card'>"
response_html += f"<h4>🎯 Top Matches for {course_name} near {user_pincode}</h4><ul>"
colleges_found = 0
for college, dist in nearby_colleges:
if dist <= 150:
colleges_found += 1
encoded_name = quote_plus(college['name'])
response_html += f"<li><a href='https://www.google.com/search?q={encoded_name}' target='_blank'><b>{college['name']}</b></a> ({college['pincode']})<br><small>Approx. {dist:.0f} km away</small></li>"
if colleges_found == 0:
return f"<div class='college-card error-card'>I couldn't find any colleges offering <b>{course_name}</b> within a 150km radius of your PIN code.</div>", []
response_html += "</ul></div>"
return response_html, nearby_colleges
def format_course_details(course):
details = "<div class='details-card'>"
details += f"<h3>πŸŽ“ {course.get('course', 'N/A')}</h3><p>{course.get('description', '')}</p>"
careers = course.get('possible_careers', [])
if careers: details += "<b>πŸ’Ό Potential Career Paths:</b><ul>" + "".join(f"<li>{c}</li>" for c in careers) + "</ul>"
education = course.get('required_education', '')
if education: details += f"<p>βœ… <b>Entry Requirements:</b> {education}</p>"
related = course.get('related_courses', [])
if related: details += "<b>πŸ“š Key Subjects You'll Study:</b><ul>" + "".join(f"<li>{s}</li>" for s in related) + "</ul>"
details += "</div>"
return details
def format_comparison(courses):
if not courses: return "<div class='details-card'><p>I couldn't find any valid courses to compare. Please check the names and try again.</p></div>"
table_style = "width:100%;border-collapse:collapse;text-align:left;"
th_style = "border-bottom:2px solid #dee2e6;padding:12px;font-size:1rem;"
td_style = "border-bottom:1px solid #dee2e6;padding:12px;vertical-align:top;"
html = f"<div class='details-card'><table style='{table_style}'><thead><tr><th style='{th_style}'>Feature</th>"
for course in courses:
course_name = course.get('course')
html += f"<th style='{th_style}'><div class='clickable-card' data-action='quick_reply' data-value='{course_name}' style='padding:0; margin:0; text-align:left;'>{course_name}</div></th>"
html += "</tr></thead><tbody>"
html += f"<tr><td style='{td_style}'><b>πŸ’Ό Careers</b></td>"
for course in courses: html += f"<td style='{td_style}'>{', '.join(course.get('possible_careers', ['N/A']))}</td>"
html += "</tr><tr><td style='{td_style}'><b>βœ… Requirements</b></td>"
for course in courses: html += f"<td style='{td_style}'>{course.get('required_education', 'N/A')}</td>"
html += "</tr><tr><td style='{td_style}'><b>πŸ“š Key Subjects</b></td>"
for course in courses: html += f"<td style='{td_style}'>{', '.join(course.get('related_courses', ['N/A']))}</td>"
html += "</tr><tr><td style='{td_style}'><b>πŸ› οΈ Core Skills</b></td>"
for course in courses:
skills = course.get('tags', {}).get('skills', ['N/A'])
html += f"<td style='{td_style}'>{', '.join(s.capitalize() for s in skills)}</td>"
html += "</tr></tbody></table>"
html += "<p style='font-size:0.85rem; text-align:center; margin-top:1rem; opacity:0.8;'>You can click on a course title in the table above for a detailed view.</p></div>"
return html
def get_recommendations(answers, courses):
profile, persona = build_user_profile(answers)
if not any(profile.values()): return "", []
user_profile_text = " ".join(set().union(*profile.values()))
user_emb = model.encode(user_profile_text, convert_to_tensor=True) if user_profile_text.strip() else None
if user_emb is None: return "", []
scored_courses = []
for course in courses:
tags_obj = course.get("tags", {})
course_tags = set().union(*tags_obj.values()) if isinstance(tags_obj, dict) else set()
rich_course_text = f"{course.get('course', '')} {course.get('description', '')} {' '.join(course.get('possible_careers', []))} {' '.join(course_tags)}"
course_emb = model.encode(rich_course_text, convert_to_tensor=True)
semantic_score = float(util.cos_sim(user_emb, course_emb)[0][0])
heuristic_score = sum([0.3 if profile.get("general_interests", set()).intersection(course_tags) else 0, 0.15 if profile.get("subject_strengths", set()).intersection(course_tags) else 0, 0.1 if profile.get("stream", set()).intersection(course_tags) else 0, 0.15 if persona.intersection(course_tags) else 0])
penalty_score = sum([len(profile.get("subject_weaknesses", set()).intersection(course_tags)) * 0.2, 0.15 if persona.intersection(course.get("anti_tags", [])) else 0])
final_score = (semantic_score + heuristic_score - penalty_score) * 100
if final_score > 20: scored_courses.append((final_score, course))
scored_courses.sort(key=lambda x: x[0], reverse=True)
top_courses = scored_courses[:3]
raw_recs = [course for _, course in top_courses]
if not raw_recs: return "πŸ€” I couldn’t find a strong match. Would you like to try again?", []
response_html = "<div class='recommendation-container'><h4>πŸš€ Here are my top recommendations for you:</h4>"
for i, (_, course_data) in enumerate(top_courses):
course_name = course_data.get('course')
description = course_data.get('description', '')
skills = course_data.get('tags', {}).get('skills', [])[:3]
careers = course_data.get('possible_careers', [])[:3]
response_html += f"<div class='recommendation-card clickable-card' data-action='details' data-value='{i+1}'>"
response_html += f"<p style='margin-bottom: 1rem;'><b>{i+1}. {course_name}</b><br>{description}</p>"
response_html += "<div style='font-size: 0.9rem; display: flex; flex-direction: column; gap: 0.5rem;'>"
if skills:
response_html += f"<div><b>πŸ› οΈ Key Skills:</b> {', '.join(s.capitalize() for s in skills)}</div>"
if careers:
response_html += f"<div><b>πŸ’Ό Potential Careers:</b> {', '.join(careers)}</div>"
response_html += "</div>"
response_html += "</div>"
response_html += f"<div class='recommendation-card clickable-card compare-card' data-action='compare' data-value='compare'><p><b>βš–οΈ Compare Courses</b></p></div></div>"
return response_html, raw_recs
def next_question(answers):
for key, q in QUESTIONS.items():
if key not in answers: return key, q
return None, None
def extract_text_from_file(file):
text = ""
filename = file.filename.lower()
if filename.endswith('.pdf'):
pdf_document = fitz.open(stream=file.read(), filetype="pdf")
for page in pdf_document:
text += page.get_text()
pdf_document.close()
elif filename.endswith('.docx'):
doc = docx.Document(file)
for para in doc.paragraphs:
text += para.text + "\n"
return text
def analyze_resume_and_suggest_jobs(resume_text):
if not GEMINI_API_KEY:
return "<div class='college-card error-card'>Error: Gemini API key is not configured.</div>"
prompt = f"""
You are an expert career coach. Analyze the following resume text.
Your response must be a single JSON object with four keys:
1. "person_name": A string containing the full name of the candidate found in the resume. If no name is clear, return an empty string.
2. "overall_score": An integer score out of 100 for the resume's quality.
3. "summary": A brief, encouraging 1-2 sentence summary of the resume.
4. "job_titles": A list of 3-5 specific job titles the candidate is well-suited for based on their skills and experience.
Do not add any text before or after the JSON object.
Resume Text to analyze:
---
{resume_text}
---
"""
try:
model_gen = genai.GenerativeModel('gemini-1.5-pro-latest')
response = model_gen.generate_content(prompt)
json_match = re.search(r'\{.*\}', response.text, re.DOTALL)
if not json_match:
raise ValueError("Invalid JSON response from API")
feedback = json.loads(json_match.group(0))
name = feedback.get('person_name', '').strip()
html_response = "<div class='details-card'>"
if name:
html_response += f"<h3>πŸ“ Resume Analysis for {name}</h3>"
else:
html_response += f"<h3>πŸ“ Resume Analysis</h3>"
html_response += f"<p><b>Overall Score:</b> {feedback.get('overall_score', 'N/A')}/100</p>"
html_response += f"<p><b>Summary:</b> <i>{feedback.get('summary', '')}</i></p>"
html_response += "<b>πŸš€ Potential Job Roles:</b><ul>" + "".join(f"<li>{title}</li>" for title in feedback.get('job_titles', [])) + "</ul>"
html_response += "</div>"
return html_response
except Exception as e:
print(f"\n---!!! GEMINI API ERROR !!!---\n{e}\n-----------------------------\n")
logging.error(f"Gemini API Error: {e}")
error_message = ("Sorry, the analysis failed. This is often an API key issue. "
"Please check the terminal where you ran `python app.py` for the specific error message.")
return f"<div class='college-card error-card'>{error_message}</div>"
# --- Flask Routes ---
@app.route("/")
def index():
return render_template("index.html")
@app.route("/upload_resume", methods=["POST"])
def upload_resume():
if 'resume_file' not in request.files: return jsonify({"error": "No file part"}), 400
file = request.files['resume_file']
if file.filename == '': return jsonify({"error": "No selected file"}), 400
if file and (file.filename.lower().endswith('.pdf') or file.filename.lower().endswith('.docx')):
try:
resume_text = extract_text_from_file(file)
if not resume_text.strip(): return jsonify({"response": "<div class='college-card error-card'>The uploaded file seems to be empty.</div>"})
feedback_html = analyze_resume_and_suggest_jobs(resume_text)
return jsonify({"response": feedback_html})
except Exception as e:
logging.error(f"Resume Upload Error: {e}")
return jsonify({"response": "<div class='college-card error-card'>Sorry, an error occurred while processing your file.</div>"})
return jsonify({"error": "Invalid file type. Please upload a PDF or DOCX file."}), 400
@app.route("/chat", methods=["POST"])
def chat():
data = request.get_json()
msg = data.get("message", "").strip()
convo = data.get("conversation", {})
bot_response = ""
if not convo:
convo = {"state": "awaiting_initial_action", "answers": {}}
bot_response = "Welcome to CareerPal! You can type `start` to begin a personalized guidance session, or select a specific tool from the panel on the left."
logging.info("--- NEW SESSION INITIALIZED ---")
return jsonify({"response": bot_response, "conversation": convo})
current_state = convo.get("state", "awaiting_initial_action")
msg_lower = msg.lower()
logging.info(f"STATE: {current_state} | USER: {msg}")
feature_commands = ["personalized guidance", "compare courses", "college location finder", "resume analyser"]
if msg_lower in feature_commands:
current_state = "awaiting_initial_action"
if current_state == "awaiting_initial_action":
if msg_lower == "start" or msg_lower == "personalized guidance":
convo["state"] = "asking_questions"
convo["answers"] = {}
key, question = next_question(convo["answers"])
bot_response = f"Great, let's find your perfect career path! I'll ask a few questions to get started.<br><br>{question}"
elif msg_lower == "compare courses":
convo["state"] = "awaiting_compare_confirmation"
bot_response = "Do you have specific courses in mind to compare?<div class='quick-reply-container'><div class='quick-reply-button clickable-card' data-action='quick_reply' data-value='Yes'>Yes</div><div class='quick-reply-button clickable-card' data-action='quick_reply' data-value='No'>No</div></div>"
elif msg_lower == "college location finder":
convo["state"] = "awaiting_course_for_college_search"
bot_response = "Happy to help you find colleges! What is the name of the course you're interested in?"
elif msg_lower == "resume analyser":
convo["state"] = "awaiting_resume_upload"
bot_response = "Great! Please upload your resume (PDF or DOCX format) using the upload button below."
elif msg_lower == 'end chat':
bot_response = "Sure. Would you like to leave some feedback about your experience?<div class='quick-reply-container'><div class='quick-reply-button clickable-card' data-action='quick_reply' data-value='Yes'>πŸ‘ Yes</div><div class='quick-reply-button clickable-card' data-action='quick_reply' data-value='No'>πŸ‘Ž No</div></div>"
convo['state'] = 'awaiting_end_confirmation'
else:
bot_response = "Sorry, I didn't understand. You can type `start` or select a feature from the panel."
elif current_state == "asking_questions":
last_key, _ = next_question(convo["answers"])
if last_key:
convo["answers"][last_key] = msg
parsed_tags = parse_input(msg)
cleaned = ", ".join(tag.capitalize() for tag in parsed_tags) or msg
if last_key == "subject_weaknesses": bot_response = f"πŸ‘Œ Got it β€” I’ll stay away from careers heavy in {cleaned}. "
elif last_key == "subject_strengths": bot_response = f"πŸ”₯ Nice! Being strong in {cleaned} is a great asset. "
elif last_key == "interest_activities": bot_response = f"😎 Cool! Enjoying {cleaned} gives me clues about your personality. "
elif last_key == "general_interests": bot_response = f"πŸ‘ That's insightful! An interest in {cleaned} helps narrow down the options. "
else: bot_response = "βœ… Okay, noted. "
next_key, next_q = next_question(convo["answers"])
if next_q:
bot_response += next_q
else:
bot_response, recs = get_recommendations(convo["answers"], COURSE_DATA)
if recs:
convo["last_recommendations"] = recs
convo["state"] = "awaiting_more_details"
bot_response += "<br>Click a course for more details, compare, or end the session."
bot_response += "<div class='quick-reply-container'><div class='quick-reply-button clickable-card' data-action='quick_reply' data-value='End Chat'>πŸšͺ End Chat</div></div>"
else:
convo["state"] = "awaiting_initial_action"
elif current_state == "awaiting_compare_confirmation":
if msg_lower == 'yes':
convo["state"] = "awaiting_course_names_for_compare"
bot_response = "Please enter up to 3 course names, separated by commas."
else:
convo["state"] = "asking_questions"
convo["answers"] = {}
key, question = next_question(convo["answers"])
bot_response = f"No problem! Let's find some courses for you first.<br><br>{question}"
elif current_state == "awaiting_course_names_for_compare":
user_courses = [name.strip() for name in msg_lower.split(',')[:3]]
matched_courses = []
course_titles = [c['course'] for c in COURSE_DATA]
for user_course in user_courses:
best_match, score, _ = process.extractOne(user_course, course_titles, scorer=fuzz.token_set_ratio, processor=preprocess_text)
if score > 85:
matched_courses.append(next(c for c in COURSE_DATA if c['course'] == best_match))
bot_response = format_comparison(matched_courses)
convo["last_recommendations"] = matched_courses
convo["state"] = "awaiting_more_details"
bot_response += "<div class='quick-reply-container'><div class='quick-reply-button clickable-card' data-action='quick_reply' data-value='End Chat'>πŸšͺ End Chat</div></div>"
elif current_state == "awaiting_course_for_college_search":
course_titles = [c['course'] for c in COURSE_DATA]
best_match, score, _ = process.extractOne(msg_lower, course_titles, scorer=fuzz.token_set_ratio, processor=preprocess_text)
if score > 85:
convo["course_for_college_search"] = best_match
convo["state"] = "awaiting_pincode"
bot_response = f"Okay, searching for colleges offering '<b>{best_match}</b>'. Please provide your 6-digit area PIN code."
else:
bot_response = "I couldn't find a clear match for that course. Could you please try rephrasing or be more specific?"
convo["state"] = "awaiting_course_for_college_search"
elif current_state == "awaiting_pincode":
if re.match(r"^\d{6}$", msg):
course_name = convo.get("course_for_college_search", "this course")
bot_response, _ = find_nearby_colleges(course_name, msg)
bot_response += "<div class='quick-reply-container'><div class='quick-reply-button clickable-card' data-action='quick_reply' data-value='College Location Finder'>πŸ”Ž Search Again</div><div class='quick-reply-button clickable-card' data-action='quick_reply' data-value='End Chat'>πŸšͺ End Chat</div></div>"
convo["state"] = "awaiting_initial_action"
else:
bot_response = "That doesn't seem like a valid 6-digit PIN code. Please try again."
elif current_state == "awaiting_more_details":
recs = convo.get("last_recommendations", [])
if msg_lower == 'end chat':
bot_response = "Sure. Would you like to leave some feedback about your experience?<div class='quick-reply-container'><div class='quick-reply-button clickable-card' data-action='quick_reply' data-value='Yes'>πŸ‘ Yes</div><div class='quick-reply-button clickable-card' data-action='quick_reply' data-value='No'>πŸ‘Ž No</div></div>"
convo['state'] = 'awaiting_end_confirmation'
elif msg_lower == 'compare':
bot_response = format_comparison(recs)
bot_response += "<div class='quick-reply-container'><div class='quick-reply-button clickable-card' data-action='quick_reply' data-value='End Chat'>πŸšͺ End Chat</div></div>"
else:
chosen_course = None
if msg.isdigit() and 1 <= int(msg) <= len(recs):
chosen_course = recs[int(msg) - 1]
else:
course_titles = [r.get('course', '') for r in recs]
best_match, score, _ = process.extractOne(msg, course_titles, scorer=fuzz.ratio)
if score > 70: chosen_course = next((r for r in recs if r.get('course') == best_match), None)
if chosen_course:
bot_response = format_course_details(chosen_course)
convo["course_for_college_search"] = chosen_course.get('course')
bot_response += "<br><br>Would you like to find nearby colleges for this course?<div class='quick-reply-container'><div class='quick-reply-button clickable-card' data-action='quick_reply' data-value='Yes'>πŸ‘ Yes</div><div class='quick-reply-button clickable-card' data-action='quick_reply' data-value='No'>πŸ‘Ž No</div></div>"
convo['state'] = 'awaiting_college_search_confirmation'
else:
bot_response = "Sorry, I didn't recognize that selection. Please choose an option from your recommendations."
elif current_state == 'awaiting_college_search_confirmation':
if 'yes' in msg_lower:
bot_response = "Great! Please provide your 6-digit PIN code."
convo['state'] = 'awaiting_pincode'
else:
bot_response = "No problem. You can explore other recommendations, compare courses, or select a new feature from the left panel."
bot_response += "<div class='quick-reply-container'><div class='quick-reply-button clickable-card' data-action='quick_reply' data-value='End Chat'>πŸšͺ End Chat</div></div>"
convo['state'] = 'awaiting_more_details'
elif current_state == 'awaiting_end_confirmation':
if 'yes' in msg_lower:
bot_response = "I'd love to hear your thoughts. How was your experience?"
convo['state'] = 'awaiting_feedback'
else:
bot_response = "No problem! It was great helping you."
bot_response += "<div class='quick-reply-container'><div class='quick-reply-button clickable-card' data-action='restart'>πŸ”„ Start Over</div></div>"
convo['state'] = 'session_ended'
elif current_state == 'awaiting_feedback':
logging.info(f"FEEDBACK: {msg}")
bot_response = "Thank you for your feedback!"
bot_response += "<div class='quick-reply-container'><div class='quick-reply-button clickable-card' data-action='restart'>πŸ”„ Start Over</div></div>"
convo['state'] = 'session_ended'
logging.info(f"BOT: {re.sub('<[^<]+?>', ' ', bot_response).strip()}")
return jsonify({"response": bot_response, "conversation": convo})
if __name__ == "__main__":
app.run(debug=True)