import gradio as gr
from transformers import AutoProcessor, AutoModelForImageTextToText
from PIL import Image
import base64
from io import BytesIO
import os

# -----------------------------
#  Load model and processor once
# -----------------------------
processor = AutoProcessor.from_pretrained("ds4sd/SmolDocling-256M-preview")
model = AutoModelForImageTextToText.from_pretrained("ds4sd/SmolDocling-256M-preview")

# -----------------------------
#  Image conversion helper
# -----------------------------
def convert_to_pil(image_input):
    """
    Convert base64, dict, or file path to PIL.Image.
    Handles:
      - "data:image/png;base64,...."
      - plain base64
      - {"type": "image", "data": "..."}
      - file path
    """
    # Case 1: dict input (Perplexity/Claude format)
    if isinstance(image_input, dict) and "data" in image_input:
        image_input = image_input["data"]

    # Case 2: base64 string with prefix
    if isinstance(image_input, str) and image_input.startswith("data:image"):
        base64_str = image_input.split(",", 1)[1]
        image_data = base64.b64decode(base64_str)
        return Image.open(BytesIO(image_data))

    # Case 3: plain base64 string (no prefix)
    if isinstance(image_input, str) and "," in image_input and len(image_input) > 100:
        try:
            image_data = base64.b64decode(image_input)
            return Image.open(BytesIO(image_data))
        except Exception:
            pass

    # Case 4: local file path
    if isinstance(image_input, str) and os.path.exists(image_input):
        return Image.open(image_input)

    raise ValueError("Could not convert image input to PIL.Image")

# -----------------------------
#  Core function
# -----------------------------
def smoldocling_readimage(image: Image.Image, prompt_text: str) -> str:
    """
    Run SmolDocling image-to-text conversion.
    """
    messages = [
        {"role": "user", "content": [{"type": "image"}, {"type": "text", "text": prompt_text}]}
    ]
    prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
    inputs = processor(text=prompt, images=[image], return_tensors="pt")
    outputs = model.generate(**inputs, max_new_tokens=1024)

    prompt_length = inputs.input_ids.shape[1]
    generated = outputs[:, prompt_length:]
    result = processor.batch_decode(generated, skip_special_tokens=False)[0]
    return result.replace("<end_of_utterance>", "").strip()

# -----------------------------
#  Wrapper for MCP schema compatibility
# -----------------------------
def smoldocling_entry(image: str, prompt_text: str) -> str:
    """
    Entry point for the SmolDocling MCP tool.

    Expected input formats:
    - **Base64 string**: "data:image/png;base64,...."
    - **Object** (Perplexity/Claude style): {"type": "image", "data": "data:image/png;base64,..."}
    - **Local file path** (for internal testing)

    Parameters
    ----------
    image : str
        A base64-encoded image string (with or without data: prefix) OR
        a JSON-encoded object containing image data.
    prompt_text : str
        Instruction text for how to process the document (e.g., "Convert this page to docling.")

    Returns
    -------
    str
        Structured or textual content extracted from the image.
    """
    # Handle Perplexity-style dicts encoded as JSON strings
    print(f"Received entry: {image} prompt: {prompt_text}")
    try:
        import json
        maybe_json = json.loads(image)
        if isinstance(maybe_json, dict) and "data" in maybe_json:
            image = maybe_json
    except Exception:
        pass

    pil_image = convert_to_pil(image)
    return smoldocling_readimage(pil_image, prompt_text)

# -----------------------------
#  Gradio MCP App (Headless)
# -----------------------------
with gr.Blocks() as demo:
    gr.Markdown(
        """
        ### 📄 SmolDocling MCP Tool
        This is a **headless MCP tool** for document image conversion.
        It supports input as:
        - Base64-encoded images
        - Perplexity/Claude `{"type": "image", "data": "..."}` objects
        - Local file paths (for testing)
        """
    )

    # Expose MCP tool
    gr.api(smoldocling_entry)

# Launch MCP server mode
_, url, _ = demo.launch(mcp_server=True)
print(f"MCP Server running at: {url}")