import gradio as gr
from PIL import Image
import os
import torch
import json
import spaces
from transformers import AutoModelForImageTextToText, AutoProcessor
from qwen_vl_utils import process_vision_info

os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"

# Load model and processor
print("Loading Qwen3-VL-30B-A3B-Instruct model...")
model = AutoModelForImageTextToText.from_pretrained(
    "Qwen/Qwen3-VL-30B-A3B-Instruct", torch_dtype=torch.bfloat16, device_map="auto"
)
processor = AutoProcessor.from_pretrained("Qwen/Qwen3-VL-30B-A3B-Instruct")
print("Model loaded successfully!")

EXTRACTION_PROMPT = """Extract metadata from this library catalog card as JSON.

Library catalog cards contain bibliographic information about materials and filing/access information. Extract whatever fields are present:

CORE BIBLIOGRAPHIC FIELDS:
- title: Full title of the work
- author: Main author/creator (person or organization)
- editor: Editor if different from author
- contributor: Other contributors (translators, illustrators, etc.)
- publication_date: Date(s) of publication
- publisher: Publisher name
- publication_place: Place of publication
- physical_description: Physical details (volumes, pages, size, illustrations)
- series: Series information if part of a series
- edition: Edition statement
- contents: Description of contents, volumes, or parts

CATALOGING/ACCESS FIELDS:
- call_number: Library classification number
- subject_headings: Subject terms (often numbered list)
- added_entries: Additional access points for co-authors, editors, etc. (often with Roman numerals)
- notes: Any additional notes

CARD-SPECIFIC:
- filing_heading: The heading under which this card is filed (often at top, may be in all caps)
- card_sequence: If this is a continuation card (e.g., "Card 2", "Card 3")

Return ONLY valid JSON. Use null for fields not present on the card. Use arrays [] for repeating fields like subject_headings and added_entries."""


@spaces.GPU
def extract_metadata(image):
    """Extract structured metadata from catalog card image."""
    if image is None:
        return "Please upload an image."

    try:
        # Ensure image is PIL Image
        if not isinstance(image, Image.Image):
            image = Image.open(image).convert("RGB")

        # Format messages for Qwen3-VL
        messages = [
            {
                "role": "user",
                "content": [
                    {"type": "image", "image": image},
                    {"type": "text", "text": EXTRACTION_PROMPT},
                ],
            }
        ]

        # Prepare inputs
        text = processor.apply_chat_template(
            messages, tokenize=False, add_generation_prompt=True
        )
        image_inputs, video_inputs = process_vision_info(messages)

        inputs = processor(
            text=[text],
            images=image_inputs,
            videos=video_inputs,
            padding=True,
            return_tensors="pt",
        )
        inputs = inputs.to(model.device)

        # Generate
        with torch.inference_mode():
            generated_ids = model.generate(
                **inputs, max_new_tokens=512, temperature=0.1, do_sample=False
            )

        # Trim input tokens from output
        generated_ids_trimmed = [
            out_ids[len(in_ids) :]
            for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
        ]

        # Decode output
        output_text = processor.batch_decode(
            generated_ids_trimmed,
            skip_special_tokens=True,
            clean_up_tokenization_spaces=False,
        )[0]

        # Try to parse as JSON for pretty formatting
        try:
            json_data = json.loads(output_text)
            return json.dumps(json_data, indent=2)
        except json.JSONDecodeError:
            # If not valid JSON, return as-is
            return output_text

    except Exception as e:
        return f"Error during extraction: {str(e)}"


# Create Gradio interface
with gr.Blocks(title="Library Card Metadata Extractor") as demo:
    gr.Markdown("# 📇 Library Card Metadata Extractor")
    gr.Markdown(
        "Extract structured metadata from library catalog cards using **Qwen/Qwen3-VL-30B-A3B-Instruct**. "
        "Upload an image of a catalog card and get JSON-formatted metadata including title, author, dates, "
        "call numbers, and more.\n\n"
        "This demo works with catalog cards from libraries and archives, such as the "
        "[Rubenstein Manuscript Catalog](https://huggingface.co/datasets/biglam/rubenstein-manuscript-catalog) "
        "and [Boston Public Library Card Catalog](https://huggingface.co/datasets/biglam/bpl-card-catalog)."
    )

    gr.Markdown("---")

    with gr.Row():
        with gr.Column(scale=1):
            gr.Markdown("### 📤 Upload Catalog Card")
            image_input = gr.Image(label="Library Catalog Card", type="pil")
            submit_btn = gr.Button("🔍 Extract Metadata", variant="primary", size="lg")

        with gr.Column(scale=1):
            gr.Markdown("### 📋 Extracted Metadata (JSON)")
            output = gr.Code(label="Metadata", language="json", lines=15)

    submit_btn.click(fn=extract_metadata, inputs=image_input, outputs=output)

    gr.Markdown("---")

    # Examples
    gr.Markdown("## 🎯 Try Examples")
    gr.Examples(
        examples=[
            ["examples/bpl_0.jpg"],
            ["examples/bpl_2.jpg"],
            ["examples/bpl_4.jpg"],
            ["examples/bpl_6.jpg"],
            ["examples/bpl_8.jpg"],
            ["examples/bpl_9.jpg"],
            ["examples/bpl_12.jpg"],
            ["examples/bpl_15.jpg"],
            ["examples/bpl_22.jpg"],
        ],
        inputs=image_input,
        outputs=output,
        fn=extract_metadata,
        cache_examples=False,
    )

    gr.Markdown("---")

    # Footer
    gr.Markdown(
        "<center>\n\n"
        "Built for the GLAM community using [Qwen3-VL-30B-A3B-Instruct](https://huggingface.co/Qwen/Qwen3-VL-30B-A3B-Instruct) | "
        "Example cards from [Rubenstein](https://huggingface.co/datasets/biglam/rubenstein-manuscript-catalog) "
        "and [BPL](https://huggingface.co/datasets/biglam/bpl-card-catalog) collections\n\n"
        "</center>"
    )

if __name__ == "__main__":
    print("Launching demo...")
    demo.launch()