File size: 3,388 Bytes
0f50b81
5aa5ba7
d7be225
72fbd66
 
58e0a60
 
 
d7be225
4fe402e
72fbd66
 
 
58e0a60
 
d7be225
72fbd66
 
58e0a60
72fbd66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58e0a60
 
 
 
d1302b7
72fbd66
 
 
 
 
 
 
 
 
d1302b7
 
 
 
72fbd66
 
d1302b7
 
72fbd66
cee126a
 
 
 
 
d1302b7
cee126a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72fbd66
 
d352740
 
72fbd66
5aa5ba7
 
 
 
d7be225
5aa5ba7
 
72fbd66
74255a6
2ae3da8
 
 
 
72fbd66
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
from fastapi import FastAPI
from pydantic import BaseModel
from transformers import pipeline
from sentence_transformers import SentenceTransformer
import faiss
import os

print("πŸ“ Files in /app:", os.listdir())

# === Load and chunk structured .txt files ==== test
def load_structured_chunks(file_path):
    with open(file_path, "r", encoding="utf-8") as f:
        text = f.read()
        print("πŸ“„ Raw resume text:\n", text)


    # Split by '== Section ==' headers
    raw_chunks = text.split("== ")
    print("🧩 Raw chunks:\n", raw_chunks)
    chunks = []

    for chunk in raw_chunks:
        cleaned = chunk.strip()
        if cleaned:
            # Optionally prepend section title for context
            lines = cleaned.splitlines()
            section_title = lines[0] if lines else "Untitled"
            section_body = "\n".join(lines[1:]).strip()
            if section_body:
                chunks.append(f"{section_title}\n{section_body}")
    return chunks

# Load resume and extra info
resume_chunks = load_structured_chunks("resume.txt")
extra_chunks = load_structured_chunks("extra_info.txt") if "extra_info.txt" in __import__('os').listdir() else []
all_chunks = resume_chunks + extra_chunks

print("βœ… Resume chunks:", resume_chunks)
print("βœ… Extra info chunks:", extra_chunks)
print("πŸ“Š Total chunks:", len(all_chunks))


# === Embed chunks ===
embedder = SentenceTransformer("all-MiniLM-L6-v2")
embeddings = embedder.encode(all_chunks)

# === Create FAISS index ===
dimension = embeddings[0].shape[0]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)





# === Retrieval function ===
def retrieve_context(query, top_k=3):
    print("πŸ“Š Total chunks:", len(all_chunks))
    print("πŸ“¦ FAISS index size:", index.ntotal)
    query_embedding = embedder.encode([query])
    scores, indices = index.search(query_embedding, top_k)

    selected_chunks = []
    for i, score in zip(indices[0], scores[0]):
        chunk = all_chunks[i]
        print("πŸ” selected_chunks retrieved chunks:\n", chunk)
        # Skip short or noisy chunks unless query matches
        if len(chunk.split()) < 10 and not any(k in query.lower() for k in ["salary", "notice", "job", "current"]):
            continue
        selected_chunks.append((chunk, score))

    # If nothing survives filtering, fall back to original top_k
    if not selected_chunks:
        selected_chunks = [(all_chunks[i], scores[0][j]) for j, i in enumerate(indices[0])]

    # Sort by score (lowest distance = best match)
    # print("πŸ” selected_chunks retrieved chunks:\n", selected_chunks)
    selected_chunks.sort(key=lambda x: x[1])
    final_chunks = [chunk for chunk, _ in selected_chunks[:top_k]]

    print("πŸ” Final retrieved chunks:\n", final_chunks)
    return "\n\n".join(final_chunks)

# === Load QA model ===
qa_pipeline = pipeline("question-answering", model="deepset/tinyroberta-squad2")

# === FastAPI setup ===
app = FastAPI()

class Question(BaseModel):
    query: str

@app.post("/predict")
async def predict(question: Question):
    context = retrieve_context(question.query)
    print("πŸ” Retrieved chunks:\n", retrieve_context(question.query))
    result = qa_pipeline(
        question=question.query,
        context=context
    )
    return {"answer": result["answer"]}

@app.get("/")
def health_check():
    return {"status": "Resume Q&A API is running"}