Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import requests | |
| import base64 | |
| import os | |
| import json | |
| import mimetypes | |
| # --- Configuration --- | |
| OPENROUTER_API_KEY = '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' | |
| IMAGE_MODEL = "opengvlab/internvl3-14b:free" | |
| OPENROUTER_API_URL = "https://openrouter.ai/api/v1/chat/completions" | |
| # --- Application State --- | |
| current_batch = [] | |
| # --- Helper Functions --- | |
| def generate_extraction_prompt(doc_type_provided_by_user): | |
| prompt = f"""You are an advanced OCR and information extraction AI. | |
| The user has provided an image and identified it as a '{doc_type_provided_by_user}'. | |
| Your task is to meticulously analyze this image and extract all relevant information. | |
| Output Format Instructions: | |
| Provide your response as a SINGLE, VALID JSON OBJECT. Do not include any explanatory text before or after the JSON. | |
| The JSON object should have the following top-level keys: | |
| - "document_type_provided": (string) The type provided by the user: "{doc_type_provided_by_user}". | |
| - "document_type_detected": (string) Your best guess of the specific document type (e.g., "Passport", "National ID Card", "Driver's License", "Visa Sticker", "Hotel Confirmation Voucher", "Boarding Pass", "Photograph of a person"). | |
| - "extracted_fields": (object) A key-value map of all extracted information. Be comprehensive. Examples: | |
| - For passports/IDs: "Surname", "Given Names", "Document Number", "Nationality", "Date of Birth", "Sex", "Place of Birth", "Date of Issue", "Date of Expiry", "Issuing Authority", "Country Code". | |
| - For hotel reservations: "Guest Name", "Hotel Name", "Booking Reference", "Check-in Date", "Check-out Date", "Room Type". | |
| - For photos: "Description" (e.g., "Portrait of a person", "Image contains text: [text if any]"). | |
| - "mrz_data": (object or null) If a Machine Readable Zone (MRZ) is present: | |
| - "raw_mrz_lines": (array of strings) Each line of the MRZ. | |
| - "parsed_mrz": (object) Key-value pairs of parsed MRZ fields (e.g., "passport_type", "issuing_country", "surname", "given_names", "passport_number", "nationality", "dob", "sex", "expiry_date", "personal_number"). | |
| If no MRZ, this field should be null. | |
| - "multilingual_info": (array of objects or null) For any text segments not in English: | |
| - Each object: {{"language_detected": "ISO 639-1 code", "original_text": "...", "english_translation_or_transliteration": "..."}} | |
| If no non-English text, this field can be null or an empty array. | |
| - "full_text_ocr": (string) Concatenation of all text found on the document. | |
| Extraction Guidelines: | |
| 1. Prioritize accuracy. If unsure about a character or word, indicate uncertainty if possible, or extract the most likely interpretation. | |
| 2. Extract all visible text, including small print, stamps, and handwritten annotations if legible. | |
| 3. For dates, try to use ISO 8601 format (YYYY-MM-DD) if possible, but retain original format if conversion is ambiguous. | |
| 4. If the image is a photo of a person without much text, the "extracted_fields" might contain a description, and "full_text_ocr" might be minimal. | |
| 5. If the document is multi-page and only one page is provided, note this if apparent. | |
| Ensure the entire output strictly adheres to the JSON format. | |
| """ | |
| return prompt | |
| def process_single_image_with_openrouter(image_path, doc_type): | |
| if not OPENROUTER_API_KEY: | |
| return {"error": "OpenRouter API key not set.", "document_type_provided": doc_type} | |
| try: | |
| with open(image_path, "rb") as f: | |
| encoded_image_bytes = f.read() | |
| encoded_image_string = base64.b64encode(encoded_image_bytes).decode("utf-8") | |
| mime_type, _ = mimetypes.guess_type(image_path) | |
| if not mime_type: | |
| ext = os.path.splitext(image_path)[1].lower() | |
| if ext == ".png": mime_type = "image/png" | |
| elif ext in [".jpg", ".jpeg"]: mime_type = "image/jpeg" | |
| elif ext == ".webp": mime_type = "image/webp" | |
| else: mime_type = "image/jpeg" | |
| data_url = f"data:{mime_type};base64,{encoded_image_string}" | |
| prompt_text = generate_extraction_prompt(doc_type) | |
| payload = { | |
| "model": IMAGE_MODEL, | |
| "messages": [ | |
| { | |
| "role": "user", | |
| "content": [ | |
| {"type": "text", "text": prompt_text}, | |
| {"type": "image_url", "image_url": {"url": data_url}} | |
| ] | |
| } | |
| ], | |
| "max_tokens": 3000, | |
| "temperature": 0.1, | |
| } | |
| headers = { | |
| "Authorization": f"Bearer {OPENROUTER_API_KEY}", | |
| "Content-Type": "application/json", | |
| "HTTP-Referer": "https://huggingface.co/spaces/Passport_Extractor", | |
| "X-Title": "Document Classifier" | |
| } | |
| print(f"Sending request to OpenRouter for image: {os.path.basename(image_path)}, type: {doc_type}") | |
| response = requests.post(OPENROUTER_API_URL, headers=headers, json=payload, timeout=120) | |
| response.raise_for_status() | |
| result = response.json() | |
| print(f"Received response from OpenRouter. Status: {response.status_code}") | |
| if "choices" in result and result["choices"]: | |
| content_text = result["choices"][0]["message"]["content"] | |
| clean_content = content_text.strip() | |
| if clean_content.startswith("```json"): | |
| clean_content = clean_content[7:] | |
| if clean_content.endswith("```"): | |
| clean_content = clean_content[:-3] | |
| elif clean_content.startswith("`") and clean_content.endswith("`"): | |
| clean_content = clean_content[1:-1] | |
| try: | |
| parsed_json = json.loads(clean_content) | |
| if "document_type_provided" not in parsed_json: | |
| parsed_json["document_type_provided"] = doc_type | |
| return parsed_json | |
| except json.JSONDecodeError as e: | |
| print(f"JSONDecodeError: {e}. Raw content was:\n{content_text}") | |
| return { | |
| "error": "Failed to parse LLM output as JSON.", | |
| "raw_content_from_llm": content_text, | |
| "document_type_provided": doc_type | |
| } | |
| else: | |
| print(f"No 'choices' in API response: {result}") | |
| return {"error": "No choices in API response.", "details": result, "document_type_provided": doc_type} | |
| except requests.exceptions.Timeout: | |
| print(f"API Request Timeout for {os.path.basename(image_path)}") | |
| return {"error": "API request timed out.", "document_type_provided": doc_type} | |
| except requests.exceptions.RequestException as e: | |
| error_message = f"API Request Error: {str(e)}" | |
| if e.response is not None: | |
| error_message += f" Status: {e.response.status_code}, Response: {e.response.text}" | |
| print(error_message) | |
| return {"error": error_message, "document_type_provided": doc_type} | |
| except Exception as e: | |
| print(f"An unexpected error occurred during processing {os.path.basename(image_path)}: {str(e)}") | |
| return {"error": f"An unexpected error: {str(e)}", "document_type_provided": doc_type} | |
| def add_document_to_batch_ui(image_filepath, doc_type_selection): | |
| global current_batch | |
| if image_filepath and doc_type_selection: | |
| filename = os.path.basename(image_filepath) | |
| current_batch.append({"path": image_filepath, "type": doc_type_selection, "filename": filename}) | |
| batch_display_data = [[item["filename"], item["type"]] for item in current_batch] | |
| return batch_display_data, f"Added '{filename}' as '{doc_type_selection}'." | |
| batch_display_data = [[item["filename"], item["type"]] for item in current_batch] | |
| return batch_display_data, "Failed to add: Image or document type missing." | |
| def process_batch_ui(): | |
| global current_batch | |
| if not OPENROUTER_API_KEY: | |
| return {"error": "OPENROUTER_API_KEY is not set. Please configure it."}, "API Key Missing." | |
| if not current_batch: | |
| return {"message": "Batch is empty. Add documents first."}, "Batch is empty." | |
| all_results = [] | |
| status_updates = [] | |
| for i, item_to_process in enumerate(current_batch): | |
| status_msg = f"Processing document {i+1}/{len(current_batch)}: {item_to_process['filename']} ({item_to_process['type']})..." | |
| print(status_msg) | |
| extracted_data = process_single_image_with_openrouter(item_to_process["path"], item_to_process["type"]) | |
| all_results.append(extracted_data) | |
| if "error" in extracted_data: | |
| status_updates.append(f"Error processing {item_to_process['filename']}: {extracted_data['error']}") | |
| else: | |
| status_updates.append(f"Successfully processed {item_to_process['filename']}.") | |
| grouped_by_person = {} | |
| unidentified_docs = [] | |
| for result_item in all_results: | |
| doc_id = None | |
| if isinstance(result_item, dict) and "extracted_fields" in result_item and isinstance(result_item["extracted_fields"], dict): | |
| fields = result_item["extracted_fields"] | |
| passport_no = fields.get("Document Number") or fields.get("Passport Number") or fields.get("passport_number") | |
| name = fields.get("Given Names") or fields.get("Given Name") or fields.get("Name") | |
| surname = fields.get("Surname") or fields.get("Family Name") | |
| dob = fields.get("Date of Birth") or fields.get("DOB") | |
| if passport_no: | |
| doc_id = f"passport_{str(passport_no).replace(' ', '').lower()}" | |
| elif name and surname and dob: | |
| doc_id = f"{str(name).replace(' ', '').lower()}_{str(surname).replace(' ', '').lower()}_{str(dob).replace(' ', '')}" | |
| elif name and surname: | |
| doc_id = f"{str(name).replace(' ', '').lower()}_{str(surname).replace(' ', '').lower()}" | |
| if doc_id: | |
| if doc_id not in grouped_by_person: | |
| grouped_by_person[doc_id] = {"person_identifier": doc_id, "documents": []} | |
| grouped_by_person[doc_id]["documents"].append(result_item) | |
| else: | |
| unidentified_docs.append(result_item) | |
| final_structured_output = { | |
| "summary": f"Processed {len(current_batch)} documents.", | |
| "grouped_by_person": list(grouped_by_person.values()) if grouped_by_person else [], | |
| "unidentified_documents_or_errors": unidentified_docs | |
| } | |
| final_status = "Batch processing complete. " + " | ".join(status_updates) | |
| print(final_status) | |
| return final_structured_output, final_status | |
| def clear_batch_ui(): | |
| global current_batch | |
| current_batch = [] | |
| return [], "Batch cleared successfully." | |
| with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
| gr.Markdown("# π Document Information Extractor (OpenGVLab/InternVL3-14B via OpenRouter)") | |
| gr.Markdown( | |
| "**Instructions:**\n" | |
| "1. Upload a document image (e.g., passport front/back, photo, hotel reservation).\n" | |
| "2. Select the correct document type.\n" | |
| "3. Click 'Add Document to Current Batch'. Repeat for all documents of a person or a related set.\n" | |
| "4. Review the batch. Click 'Clear Entire Batch' to start over.\n" | |
| "5. Click 'Process Batch and Extract Information' to send documents to the AI.\n" | |
| "6. View the extracted information in JSON format below." | |
| ) | |
| if not OPENROUTER_API_KEY: | |
| gr.Markdown( | |
| "<h3 style='color:red;'>β οΈ Warning: `OPENROUTER_API_KEY` environment variable is not detected. " | |
| "API calls will fail. Please set it and restart this application.</h3>" | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| gr.Markdown("### Step 1: Add Document") | |
| image_input = gr.Image( | |
| label="Upload Document Image", | |
| type="filepath", | |
| sources=["upload"], | |
| height=300 | |
| ) | |
| doc_type_choices = [ | |
| 'passport_front', 'passport_back', 'national_id_front', 'national_id_back', | |
| 'drivers_license_front', 'drivers_license_back', 'visa_sticker', | |
| 'photo', 'hotel_reservation', 'boarding_pass', 'utility_bill', 'other_document' | |
| ] | |
| doc_type_input = gr.Dropdown( | |
| label="Select Document Type", | |
| choices=doc_type_choices, | |
| value='passport_front', | |
| filterable=True | |
| ) | |
| add_button = gr.Button("β Add Document to Current Batch", variant="secondary") | |
| with gr.Column(scale=2): | |
| gr.Markdown("### Step 2: Review Current Batch") | |
| batch_dataframe = gr.Dataframe( | |
| headers=["Filename", "Document Type"], | |
| datatype=["str", "str"], | |
| row_count=1, # Changed: Start with 1 row, should grow dynamically | |
| col_count=2, # Changed: Simpler integer for fixed columns | |
| wrap=True | |
| ) | |
| clear_batch_button = gr.Button("ποΈ Clear Entire Batch", variant="stop") | |
| gr.Markdown("### Step 3: Process Batch") | |
| process_button = gr.Button("π Process Batch and Extract Information", variant="primary") | |
| status_message_textbox = gr.Textbox(label="Processing Status", interactive=False, lines=2) | |
| gr.Markdown("### Step 4: View Results") | |
| output_json_display = gr.JSON(label="Extracted Information (JSON Format)") | |
| add_button.click( | |
| fn=add_document_to_batch_ui, | |
| inputs=[image_input, doc_type_input], | |
| outputs=[batch_dataframe, status_message_textbox] | |
| ).then(lambda: None, outputs=image_input) | |
| clear_batch_button.click( | |
| fn=clear_batch_ui, | |
| inputs=[], | |
| outputs=[batch_dataframe, status_message_textbox] | |
| ) | |
| process_button.click( | |
| fn=process_batch_ui, | |
| inputs=[], | |
| outputs=[output_json_display, status_message_textbox] | |
| ) | |
| if __name__ == "__main__": | |
| if not OPENROUTER_API_KEY: | |
| print("ERROR: The OPENROUTER_API_KEY environment variable is not set.") | |
| print("Please set it before running the application, e.g.:") | |
| print(" export OPENROUTER_API_KEY='your_openrouter_key_here'") | |
| print("The application will launch, but API calls will fail.") | |
| demo.launch(share=True) # Added share=True |