Spaces:
Sleeping
Sleeping
| import os | |
| from typing import List, Dict, Any | |
| from fastapi import HTTPException | |
| from utils.ingestion.parser import parse_pdf_bytes, parse_docx_bytes | |
| # ────────────────────────────── Helpers ────────────────────────────── | |
| def _infer_mime(filename: str) -> str: | |
| lower = filename.lower() | |
| if lower.endswith(".pdf"): | |
| return "application/pdf" | |
| if lower.endswith(".docx"): | |
| return "application/vnd.openxmlformats-officedocument.wordprocessingml.document" | |
| return "application/octet-stream" | |
| def _extract_pages(filename: str, file_bytes: bytes) -> List[Dict[str, Any]]: | |
| mime = _infer_mime(filename) | |
| if mime == "application/pdf": | |
| return parse_pdf_bytes(file_bytes) | |
| elif mime == "application/vnd.openxmlformats-officedocument.wordprocessingml.document": | |
| return parse_docx_bytes(file_bytes) | |
| else: | |
| raise HTTPException(status_code=400, detail=f"Unsupported file type: {filename}") | |