hr-assistance / main.py
renceabishek's picture
twisting comments
4fe402e
from fastapi import FastAPI
from pydantic import BaseModel
from transformers import pipeline
from sentence_transformers import SentenceTransformer
import faiss
import os
print("πŸ“ Files in /app:", os.listdir())
# === Load and chunk structured .txt files ==== test
def load_structured_chunks(file_path):
with open(file_path, "r", encoding="utf-8") as f:
text = f.read()
print("πŸ“„ Raw resume text:\n", text)
# Split by '== Section ==' headers
raw_chunks = text.split("== ")
print("🧩 Raw chunks:\n", raw_chunks)
chunks = []
for chunk in raw_chunks:
cleaned = chunk.strip()
if cleaned:
# Optionally prepend section title for context
lines = cleaned.splitlines()
section_title = lines[0] if lines else "Untitled"
section_body = "\n".join(lines[1:]).strip()
if section_body:
chunks.append(f"{section_title}\n{section_body}")
return chunks
# Load resume and extra info
resume_chunks = load_structured_chunks("resume.txt")
extra_chunks = load_structured_chunks("extra_info.txt") if "extra_info.txt" in __import__('os').listdir() else []
all_chunks = resume_chunks + extra_chunks
print("βœ… Resume chunks:", resume_chunks)
print("βœ… Extra info chunks:", extra_chunks)
print("πŸ“Š Total chunks:", len(all_chunks))
# === Embed chunks ===
embedder = SentenceTransformer("all-MiniLM-L6-v2")
embeddings = embedder.encode(all_chunks)
# === Create FAISS index ===
dimension = embeddings[0].shape[0]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)
# === Retrieval function ===
def retrieve_context(query, top_k=3):
print("πŸ“Š Total chunks:", len(all_chunks))
print("πŸ“¦ FAISS index size:", index.ntotal)
query_embedding = embedder.encode([query])
scores, indices = index.search(query_embedding, top_k)
selected_chunks = []
for i, score in zip(indices[0], scores[0]):
chunk = all_chunks[i]
print("πŸ” selected_chunks retrieved chunks:\n", chunk)
# Skip short or noisy chunks unless query matches
if len(chunk.split()) < 10 and not any(k in query.lower() for k in ["salary", "notice", "job", "current"]):
continue
selected_chunks.append((chunk, score))
# If nothing survives filtering, fall back to original top_k
if not selected_chunks:
selected_chunks = [(all_chunks[i], scores[0][j]) for j, i in enumerate(indices[0])]
# Sort by score (lowest distance = best match)
# print("πŸ” selected_chunks retrieved chunks:\n", selected_chunks)
selected_chunks.sort(key=lambda x: x[1])
final_chunks = [chunk for chunk, _ in selected_chunks[:top_k]]
print("πŸ” Final retrieved chunks:\n", final_chunks)
return "\n\n".join(final_chunks)
# === Load QA model ===
qa_pipeline = pipeline("question-answering", model="deepset/tinyroberta-squad2")
# === FastAPI setup ===
app = FastAPI()
class Question(BaseModel):
query: str
@app.post("/predict")
async def predict(question: Question):
context = retrieve_context(question.query)
print("πŸ” Retrieved chunks:\n", retrieve_context(question.query))
result = qa_pipeline(
question=question.query,
context=context
)
return {"answer": result["answer"]}
@app.get("/")
def health_check():
return {"status": "Resume Q&A API is running"}