Spaces:
Sleeping
Sleeping
| from fastapi import FastAPI | |
| from pydantic import BaseModel | |
| import joblib | |
| import re | |
| # Initialize FastAPI app | |
| app = FastAPI( | |
| title="Email Classification API", | |
| version="1.0.0", | |
| description="Classifies support emails into categories and masks personal information.", | |
| docs_url="/docs", | |
| redoc_url="/redoc" | |
| ) | |
| # Load pre-trained model | |
| model = joblib.load("model.joblib") | |
| # Input schema | |
| class EmailInput(BaseModel): | |
| input_email_body: str | |
| # PII Masking Function | |
| def mask_and_store_all_pii(text): | |
| text = str(text) | |
| pii_map = {} | |
| entity_list = [] | |
| patterns = { | |
| "email": r"\b[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+\b", | |
| "phone_number": r"\b\d{10}\b", | |
| "dob": r"\b\d{2}[/-]\d{2}[/-]\d{4}\b", | |
| "aadhar_num": r"\b\d{4}[- ]?\d{4}[- ]?\d{4}\b", | |
| "credit_debit_no": r"\b(?:\d[ -]*?){13,16}\b", | |
| "cvv_no": r"\b\d{3}\b", | |
| "expiry_no": r"\b(0[1-9]|1[0-2])\/\d{2,4}\b", | |
| "full_name": r"\b([A-Z][a-z]+(?:\s[A-Z][a-z]+)+)\b" | |
| } | |
| for label, pattern in patterns.items(): | |
| for match in re.finditer(pattern, text): | |
| original = match.group() | |
| start, end = match.start(), match.end() | |
| placeholder = f"[{label}_{len(pii_map)}]" | |
| pii_map[placeholder] = original | |
| entity_list.append({ | |
| "position": [start, end], | |
| "classification": label, | |
| "entity": original | |
| }) | |
| text = text.replace(original, placeholder, 1) | |
| return text, pii_map, entity_list | |
| # Restore PII | |
| def restore_pii(masked_text, pii_map): | |
| restored = masked_text | |
| for placeholder, original in pii_map.items(): | |
| restored = restored.replace(placeholder, original) | |
| return restored | |
| # Classification Endpoint | |
| def classify_email(data: EmailInput): | |
| raw_text = data.input_email_body | |
| # Masking | |
| masked_text, pii_map, entity_list = mask_and_store_all_pii(raw_text) | |
| # Prediction | |
| predicted_category = model.predict([masked_text])[0] | |
| # Response format | |
| return { | |
| "input_email_body": raw_text, | |
| "list_of_masked_entities": entity_list, | |
| "masked_email": masked_text, | |
| "category_of_the_email": predicted_category | |
| } | |
| # Health check endpoint | |
| def root(): | |
| return {"message": "Email Classification API is running."} |