# app.py import gradio as gr import tempfile from pdf_extractor import extract_label_value_pairs from word_extractor import extract_red_text_with_labels, is_red_font from docx import Document from docx.shared import RGBColor import difflib def find_best_match_label(target_label, pdf_data): keys = list(pdf_data.keys()) match = difflib.get_close_matches(target_label.lower(), keys, n=1, cutoff=0.4) return match[0] if match else None def replace_red_text_by_label(word_path, label_value_map): doc = Document(word_path) for table in doc.tables: for row in table.rows: cells = row.cells if len(cells) >= 2: label = cells[0].text.strip().replace(":", "").replace("\n", " ") matched_label = find_best_match_label(label, label_value_map) if not matched_label: continue new_value = label_value_map[matched_label] for para in cells[1].paragraphs: for run in para.runs: if is_red_font(run): run.text = new_value run.font.color.rgb = RGBColor(0, 0, 0) # make black temp_dir = tempfile.mkdtemp() updated_path = f"{temp_dir}/updated.docx" doc.save(updated_path) return updated_path def process_files(pdf_file, word_file): pdf_path = pdf_file.name word_path = word_file.name pdf_data = extract_label_value_pairs(pdf_path) # {label: value} word_data = extract_red_text_with_labels(word_path) # {label: [red_texts]} updated_doc_path = replace_red_text_by_label(word_path, pdf_data) return updated_doc_path gr.Interface( fn=process_files, inputs=[ gr.File(label="Upload PDF File", type="filepath"), gr.File(label="Upload Word File", type="filepath") ], outputs=gr.File(label="Download Updated Word File"), title="Red Text Replacer", description="Upload a PDF and Word document. Red-colored text in the Word doc will be replaced by matching content from the PDF." ).launch()