# pdf_extractor.py import fitz # PyMuPDF def extract_text_pdf_raw(pdf_path): doc = fitz.open(pdf_path) return "\n".join(page.get_text() for page in doc) def extract_label_value_pairs(pdf_path): raw_text = extract_text_pdf_raw(pdf_path) label_value_map = {} for line in raw_text.split('\n'): line = line.strip() if ':' in line: label, value = line.split(':', 1) if len(value.strip()) > 0: label_value_map[label.strip().lower()] = value.strip() elif '-' in line: parts = line.split('-', 1) if len(parts) == 2: label, value = parts label_value_map[label.strip().lower()] = value.strip() return label_value_map