Spaces:
				
			
			
	
			
			
		Running
		
			on 
			
			Zero
	
	
	
			
			
	
	
	
	
		
		
		Running
		
			on 
			
			Zero
	Update app.py
Browse files
    	
        app.py
    CHANGED
    
    | @@ -1,28 +1,16 @@ | |
| 1 | 
             
            import os
         | 
| 2 | 
             
            os.environ.setdefault("HF_HUB_ENABLE_HF_TRANSFER", "0")  # disable hf_transfer if missing
         | 
| 3 |  | 
| 4 | 
            -
            import io
         | 
| 5 | 
            -
            import json
         | 
| 6 | 
            -
            import re
         | 
| 7 | 
            -
            import tempfile
         | 
| 8 | 
            -
            from typing import List, Dict, Any, Optional, Tuple
         | 
| 9 | 
            -
             | 
| 10 | 
             
            import gradio as gr
         | 
| 11 | 
             
            import torch
         | 
| 12 | 
             
            from transformers import AutoModel, AutoTokenizer
         | 
| 13 | 
             
            import spaces
         | 
| 14 | 
            -
             | 
| 15 | 
            -
             | 
| 16 | 
            -
             | 
| 17 | 
            -
            try:
         | 
| 18 | 
            -
                import pandas as pd
         | 
| 19 | 
            -
                _HAS_PANDAS = True
         | 
| 20 | 
            -
            except Exception:
         | 
| 21 | 
            -
                _HAS_PANDAS = False
         | 
| 22 | 
            -
             | 
| 23 | 
             
            from gradio.themes import Soft
         | 
| 24 | 
             
            from gradio.themes.utils import fonts
         | 
| 25 | 
            -
             | 
| 26 |  | 
| 27 | 
             
            # ===== Model Load =====
         | 
| 28 | 
             
            model_name = "deepseek-ai/DeepSeek-OCR"
         | 
| @@ -46,348 +34,160 @@ except Exception: | |
| 46 | 
             
                        pass
         | 
| 47 |  | 
| 48 |  | 
| 49 | 
            -
             | 
| 50 | 
            -
             | 
| 51 | 
            -
             | 
| 52 | 
            -
             | 
| 53 | 
            -
             | 
| 54 | 
            -
             | 
| 55 | 
            -
                 | 
| 56 | 
            -
             | 
| 57 | 
            -
             | 
| 58 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 59 | 
             
                """
         | 
| 60 | 
            -
                 | 
| 61 | 
            -
                 | 
| 62 | 
            -
                2) fallback to first top-level-looking {...}
         | 
| 63 | 
             
                """
         | 
| 64 | 
            -
                 | 
| 65 | 
            -
             | 
| 66 | 
            -
             | 
| 67 | 
            -
                 | 
| 68 | 
            -
             | 
| 69 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
| 70 | 
             
                else:
         | 
| 71 | 
            -
                     | 
| 72 | 
            -
             | 
| 73 | 
            -
             | 
| 74 | 
            -
                 | 
| 75 | 
            -
             | 
| 76 | 
            -
             | 
| 77 | 
            -
             | 
| 78 | 
            -
                 | 
| 79 | 
            -
                     | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 80 |  | 
| 81 |  | 
| 82 | 
            -
             | 
| 83 | 
            -
                """
         | 
| 84 | 
            -
                Convert a chart-style JSON into a Markdown pipe table.
         | 
| 85 | 
            -
                Expected schema (flexible):
         | 
| 86 | 
            -
                  {
         | 
| 87 | 
            -
                    "type": "bar|line|...",
         | 
| 88 | 
            -
                    "title": "...",
         | 
| 89 | 
            -
                    "x": ["Germany","France",...],  # categories (or "categories")
         | 
| 90 | 
            -
                    "series": [{"name":"2024","data":[...]} , ...]
         | 
| 91 | 
            -
                  }
         | 
| 92 | 
            -
                We handle keys: x|categories; y ignored (derived from series).
         | 
| 93 | 
            -
                """
         | 
| 94 | 
            -
                if not js:
         | 
| 95 | 
            -
                    return None
         | 
| 96 | 
            -
             | 
| 97 | 
            -
                x = js.get("x") or js.get("categories")
         | 
| 98 | 
            -
                series = js.get("series")
         | 
| 99 | 
            -
                if not isinstance(x, list) or not isinstance(series, list):
         | 
| 100 | 
            -
                    return None
         | 
| 101 | 
            -
             | 
| 102 | 
            -
                # build rows: first col is x category, next cols are series values
         | 
| 103 | 
            -
                headers = ["Category"] + [str(s.get("name", f"series{i}")) for i, s in enumerate(series)]
         | 
| 104 | 
            -
                rows: List[List[str]] = []
         | 
| 105 | 
            -
                for i, cat in enumerate(x):
         | 
| 106 | 
            -
                    row = [str(cat)]
         | 
| 107 | 
            -
                    for s in series:
         | 
| 108 | 
            -
                        data = s.get("data", [])
         | 
| 109 | 
            -
                        val = data[i] if i < len(data) else ""
         | 
| 110 | 
            -
                        row.append(str(val))
         | 
| 111 | 
            -
                    rows.append(row)
         | 
| 112 | 
            -
             | 
| 113 | 
            -
                # to markdown pipe table
         | 
| 114 | 
            -
                header_line = "| " + " | ".join(headers) + " |"
         | 
| 115 | 
            -
                align_line  = "| " + " | ".join([":---"] * len(headers)) + " |"
         | 
| 116 | 
            -
                data_lines  = ["| " + " | ".join(r) + " |" for r in rows]
         | 
| 117 | 
            -
                return "\n".join([header_line, align_line, *data_lines])
         | 
| 118 | 
            -
             | 
| 119 | 
            -
             | 
| 120 | 
            -
            def _md_table_to_df(md_text: str):
         | 
| 121 | 
            -
                if not _HAS_PANDAS:
         | 
| 122 | 
            -
                    return None
         | 
| 123 | 
            -
                m = _MD_TABLE_BLOCK_RE.search(md_text or "")
         | 
| 124 | 
            -
                if not m:
         | 
| 125 | 
            -
                    return None
         | 
| 126 | 
            -
                block = m.group(0).strip()
         | 
| 127 | 
            -
                lines = [ln.strip() for ln in block.splitlines() if ln.strip()]
         | 
| 128 | 
            -
                if len(lines) < 2:
         | 
| 129 | 
            -
                    return None
         | 
| 130 | 
            -
                header = [h.strip() for h in lines[0].strip("|").split("|")]
         | 
| 131 | 
            -
                align_or_sep = lines[1]
         | 
| 132 | 
            -
                data_lines = lines[2:] if re.search(r":?-+:?", align_or_sep) else lines[1:]
         | 
| 133 | 
            -
                rows = []
         | 
| 134 | 
            -
                for ln in data_lines:
         | 
| 135 | 
            -
                    parts = [p.strip() for p in ln.strip("|").split("|")]
         | 
| 136 | 
            -
                    if len(parts) == len(header):
         | 
| 137 | 
            -
                        rows.append(parts)
         | 
| 138 | 
            -
                if not rows:
         | 
| 139 | 
            -
                    return None
         | 
| 140 | 
            -
                df = pd.DataFrame(rows, columns=header)
         | 
| 141 | 
            -
                # try cast numerics
         | 
| 142 | 
            -
                for c in df.columns[1:]:
         | 
| 143 | 
            -
                    df[c] = pd.to_numeric(df[c], errors="ignore")
         | 
| 144 | 
            -
                return df
         | 
| 145 | 
            -
             | 
| 146 | 
            -
             | 
| 147 | 
            -
            def _numeric_block_to_df(text: str):
         | 
| 148 | 
            -
                """Rough fallback: largest numeric-ish block into a DataFrame."""
         | 
| 149 | 
            -
                if not _HAS_PANDAS:
         | 
| 150 | 
            -
                    return None
         | 
| 151 | 
            -
                blocks = []
         | 
| 152 | 
            -
                cur = []
         | 
| 153 | 
            -
                for ln in (text or "").splitlines():
         | 
| 154 | 
            -
                    if re.search(r"\d", ln) and ("," in ln or "\t" in ln or "  " in ln or "|" in ln):
         | 
| 155 | 
            -
                        cur.append(ln)
         | 
| 156 | 
            -
                    else:
         | 
| 157 | 
            -
                        if cur:
         | 
| 158 | 
            -
                            blocks.append("\n".join(cur)); cur = []
         | 
| 159 | 
            -
                if cur: blocks.append("\n".join(cur))
         | 
| 160 | 
            -
                if not blocks: return None
         | 
| 161 | 
            -
             | 
| 162 | 
            -
                block = max(blocks, key=len)
         | 
| 163 | 
            -
                from io import StringIO
         | 
| 164 | 
            -
                # CSV
         | 
| 165 | 
            -
                try:
         | 
| 166 | 
            -
                    df = pd.read_csv(StringIO(block))
         | 
| 167 | 
            -
                    if df.shape[1] >= 2: return df
         | 
| 168 | 
            -
                except Exception:
         | 
| 169 | 
            -
                    pass
         | 
| 170 | 
            -
                # whitespace
         | 
| 171 | 
            -
                try:
         | 
| 172 | 
            -
                    df = pd.read_csv(StringIO(block), sep=r"\s+", engine="python")
         | 
| 173 | 
            -
                    if df.shape[1] >= 2: return df
         | 
| 174 | 
            -
                except Exception:
         | 
| 175 | 
            -
                    pass
         | 
| 176 | 
            -
                return None
         | 
| 177 | 
            -
             | 
| 178 | 
            -
             | 
| 179 | 
            -
            def _df_to_markdown_and_csv(df) -> Tuple[str, str]:
         | 
| 180 | 
            -
                """Return (markdown_pipe_table, csv_text)."""
         | 
| 181 | 
            -
                if not _HAS_PANDAS or df is None:
         | 
| 182 | 
            -
                    return "", ""
         | 
| 183 | 
            -
                # Markdown
         | 
| 184 | 
            -
                md = []
         | 
| 185 | 
            -
                headers = list(df.columns)
         | 
| 186 | 
            -
                md.append("| " + " | ".join(map(str, headers)) + " |")
         | 
| 187 | 
            -
                md.append("| " + " | ".join([":---"] * len(headers)) + " |")
         | 
| 188 | 
            -
                for _, row in df.iterrows():
         | 
| 189 | 
            -
                    md.append("| " + " | ".join(map(lambda x: str(x), row.tolist())) + " |")
         | 
| 190 | 
            -
                md_text = "\n".join(md)
         | 
| 191 | 
            -
             | 
| 192 | 
            -
                # CSV
         | 
| 193 | 
            -
                buf = io.StringIO()
         | 
| 194 | 
            -
                df.to_csv(buf, index=False)
         | 
| 195 | 
            -
                csv_text = buf.getvalue()
         | 
| 196 | 
            -
                return md_text, csv_text
         | 
| 197 | 
            -
             | 
| 198 | 
            -
             | 
| 199 | 
            -
            # ===== Inference Function =====
         | 
| 200 | 
             
            @spaces.GPU
         | 
| 201 | 
            -
            def  | 
| 202 | 
             
                """
         | 
| 203 | 
            -
                Process  | 
| 204 | 
            -
                Adds deep parsing to extract structured DATA from figures (JSON + Table + CSV)
         | 
| 205 | 
            -
                and appends it inside the Markdown for RAG indexing.
         | 
| 206 | 
             
                """
         | 
| 207 | 
            -
                if  | 
| 208 | 
            -
                    return  | 
| 209 |  | 
| 210 | 
            -
                #  | 
| 211 | 
             
                if torch.cuda.is_available():
         | 
| 212 | 
             
                    model_runtime = model.to("cuda", dtype=torch.bfloat16)
         | 
| 213 | 
             
                else:
         | 
| 214 | 
             
                    model_runtime = model.to("cpu", dtype=torch.float32)
         | 
| 215 |  | 
| 216 | 
            -
                 | 
| 217 | 
            -
                    #  | 
| 218 | 
            -
                     | 
| 219 | 
            -
             | 
| 220 | 
            -
                     | 
| 221 | 
            -
             | 
| 222 | 
            -
                     | 
| 223 | 
            -
                         | 
| 224 | 
            -
                     | 
| 225 | 
            -
             | 
| 226 | 
            -
             | 
| 227 | 
            -
             | 
| 228 | 
            -
                     | 
| 229 | 
            -
             | 
| 230 | 
            -
             | 
| 231 | 
            -
             | 
| 232 | 
            -
             | 
| 233 | 
            -
             | 
| 234 | 
            -
             | 
| 235 | 
            -
             | 
| 236 | 
            -
             | 
| 237 | 
            -
             | 
| 238 | 
            -
             | 
| 239 | 
            -
             | 
| 240 | 
            -
                        "Base": {"base_size": 1024, "image_size": 1024, "crop_mode": False},
         | 
| 241 | 
            -
                        "Large": {"base_size": 1280, "image_size": 1280, "crop_mode": False},
         | 
| 242 | 
            -
                        "Gundam (Recommended)": {"base_size": 1024, "image_size": 640, "crop_mode": True},
         | 
| 243 | 
            -
                    }
         | 
| 244 | 
            -
                    config = size_configs.get(model_size, size_configs["Gundam (Recommended)"])
         | 
| 245 | 
            -
             | 
| 246 | 
            -
                    # ===== primary pass =====
         | 
| 247 | 
            -
                    with torch.no_grad():
         | 
| 248 | 
            -
                        primary_text = model_runtime.infer(
         | 
| 249 | 
            -
                            tokenizer,
         | 
| 250 | 
            -
                            prompt=prompt,
         | 
| 251 | 
            -
                            image_file=temp_image_path,
         | 
| 252 | 
            -
                            output_path=output_path,
         | 
| 253 | 
            -
                            base_size=config["base_size"],
         | 
| 254 | 
            -
                            image_size=config["image_size"],
         | 
| 255 | 
            -
                            crop_mode=config["crop_mode"],
         | 
| 256 | 
            -
                            save_results=True,
         | 
| 257 | 
            -
                            test_compress=True,
         | 
| 258 | 
            -
                            eval_mode=is_eval_mode,
         | 
| 259 | 
            -
                        )
         | 
| 260 | 
            -
             | 
| 261 | 
            -
                    # collect results
         | 
| 262 | 
            -
                    image_result_path = os.path.join(output_path, "result_with_boxes.jpg")
         | 
| 263 | 
            -
                    markdown_result_path = os.path.join(output_path, "result.mmd")
         | 
| 264 | 
            -
             | 
| 265 | 
            -
                    markdown_content = "Markdown result was not generated. This is expected for 'Free OCR' task."
         | 
| 266 | 
            -
                    if os.path.exists(markdown_result_path):
         | 
| 267 | 
            -
                        try:
         | 
| 268 | 
            -
                            with open(markdown_result_path, "r", encoding="utf-8") as f:
         | 
| 269 | 
            -
                                markdown_content = f.read()
         | 
| 270 | 
            -
                        except Exception:
         | 
| 271 | 
            -
                            pass
         | 
| 272 | 
            -
             | 
| 273 | 
            -
                    result_image = None
         | 
| 274 | 
            -
                    if os.path.exists(image_result_path):
         | 
| 275 | 
            -
                        try:
         | 
| 276 | 
            -
                            from PIL import Image
         | 
| 277 | 
            -
                            result_image = Image.open(image_result_path)
         | 
| 278 | 
            -
                            result_image.load()
         | 
| 279 | 
            -
                        except Exception:
         | 
| 280 | 
            -
                            result_image = None
         | 
| 281 | 
            -
             | 
| 282 | 
            -
                    # draw bboxes if <|det|>
         | 
| 283 | 
            -
                    det_pat = re.compile(r"<\|det\|>\[\[(\d+),\s*(\d+),\s*(\d+),\s*(\d+)\]\]<\|/det\|>")
         | 
| 284 | 
            -
                    matches = list(det_pat.finditer(primary_text or ""))
         | 
| 285 | 
            -
                    if matches:
         | 
| 286 | 
            -
                        img_with_boxes = image.copy()
         | 
| 287 | 
            -
                        draw = ImageDraw.Draw(img_with_boxes)
         | 
| 288 | 
            -
                        w, h = image.size
         | 
| 289 | 
            -
                        for m in matches:
         | 
| 290 | 
            -
                            x1,y1,x2,y2 = [int(c) for c in m.groups()]
         | 
| 291 | 
            -
                            x1 = int(x1/1000*w); y1=int(y1/1000*h); x2=int(x2/1000*w); y2=int(y2/1000*h)
         | 
| 292 | 
            -
                            draw.rectangle([x1,y1,x2,y2], outline="red", width=3)
         | 
| 293 | 
            -
                        result_image = img_with_boxes
         | 
| 294 | 
            -
             | 
| 295 | 
            -
                    # ===== deep parse for DATA (not rendering) =====
         | 
| 296 | 
            -
                    # We always try deep parse for Convert to Markdown / Parse Figure, otherwise only if checkbox is on
         | 
| 297 | 
            -
                    should_deep = deep_parse and (task_type in {"📄 Convert to Markdown", "📈 Parse Figure"})
         | 
| 298 | 
            -
             | 
| 299 | 
            -
                    extracted_md_section = ""
         | 
| 300 | 
            -
                    if should_deep:
         | 
| 301 | 
            -
                        # ask model for STRICT JSON for charts
         | 
| 302 | 
            -
                        strict_json_prompt = (
         | 
| 303 | 
            -
                            "<image>\n"
         | 
| 304 | 
            -
                            "Parse the figure. If it's a chart, return ONLY a single JSON object with keys:\n"
         | 
| 305 | 
            -
                            "{\n"
         | 
| 306 | 
            -
                            '  "type": "bar|line|area|scatter|pie|table|unknown",\n'
         | 
| 307 | 
            -
                            '  "title": "string",\n'
         | 
| 308 | 
            -
                            '  "x": ["category", ...],\n'
         | 
| 309 | 
            -
                            '  "series": [{"name": "string", "data": [number|null, ...]}, ...]\n'
         | 
| 310 | 
            -
                            "}\n"
         | 
| 311 | 
            -
                            "If it's a table, return the same JSON using 'type': 'table' and fill x from the first column and series from remaining columns.\n"
         | 
| 312 | 
            -
                            "Do not include any explanation text. Return ONLY the JSON."
         | 
| 313 | 
            -
                        )
         | 
| 314 | 
            -
             | 
| 315 | 
            -
                        with torch.no_grad():
         | 
| 316 | 
            -
                            deep_text = model_runtime.infer(
         | 
| 317 | 
             
                                tokenizer,
         | 
| 318 | 
            -
                                 | 
| 319 | 
            -
                                 | 
| 320 | 
            -
                                 | 
| 321 | 
            -
                                 | 
| 322 | 
            -
                                 | 
| 323 | 
            -
             | 
| 324 | 
            -
             | 
| 325 | 
            -
             | 
| 326 | 
            -
             | 
| 327 | 
            -
                             | 
| 328 | 
            -
             | 
| 329 | 
            -
             | 
| 330 | 
            -
             | 
| 331 | 
            -
             | 
| 332 | 
            -
             | 
| 333 | 
            -
             | 
| 334 | 
            -
             | 
| 335 | 
            -
             | 
| 336 | 
            -
             | 
| 337 | 
            -
             | 
| 338 | 
            -
             | 
| 339 | 
            -
             | 
| 340 | 
            -
                            extracted_md_section = "### Extracted Figure Data\n\n"
         | 
| 341 | 
            -
                            extracted_md_section += "**JSON (canonical for RAG)**\n\n```json\n" + json.dumps(js, ensure_ascii=False, indent=2) + "\n```\n\n"
         | 
| 342 | 
            -
                            if md_table:
         | 
| 343 | 
            -
                                extracted_md_section += "**Table (Markdown)**\n\n" + md_table + "\n\n"
         | 
| 344 | 
            -
                            if csv_text:
         | 
| 345 | 
            -
                                extracted_md_section += "**CSV**\n\n```csv\n" + csv_text.strip() + "\n```\n"
         | 
| 346 | 
            -
                        else:
         | 
| 347 | 
            -
                            # Fallback: ask for generic figure parse, then try to pull Markdown tables / numeric blocks
         | 
| 348 | 
            -
                            with torch.no_grad():
         | 
| 349 | 
            -
                                fallback_text = model_runtime.infer(
         | 
| 350 | 
            -
                                    tokenizer,
         | 
| 351 | 
            -
                                    prompt="<image>\nParse the figure.",
         | 
| 352 | 
            -
                                    image_file=temp_image_path,
         | 
| 353 | 
            -
                                    output_path=output_path,
         | 
| 354 | 
            -
                                    base_size=config["base_size"],
         | 
| 355 | 
            -
                                    image_size=config["image_size"],
         | 
| 356 | 
            -
                                    crop_mode=config["crop_mode"],
         | 
| 357 | 
            -
                                    save_results=False,
         | 
| 358 | 
            -
                                    test_compress=True,
         | 
| 359 | 
            -
                                    eval_mode=True,
         | 
| 360 | 
            -
                                ) or ""
         | 
| 361 | 
            -
             | 
| 362 | 
            -
                            df = _md_table_to_df(fallback_text)
         | 
| 363 | 
            -
                            if df is None:
         | 
| 364 | 
            -
                                df = _numeric_block_to_df(fallback_text)
         | 
| 365 | 
            -
             | 
| 366 | 
            -
                            if df is not None:
         | 
| 367 | 
            -
                                md_table, csv_text = _df_to_markdown_and_csv(df)
         | 
| 368 | 
            -
                                js_fallback = {
         | 
| 369 | 
            -
                                    "type": "table",
         | 
| 370 | 
            -
                                    "title": "",
         | 
| 371 | 
            -
                                    "x": df.iloc[:,0].astype(str).tolist(),
         | 
| 372 | 
            -
                                    "series": [{"name": c, "data": [None if pd.isna(v) else (float(v) if str(v).replace('.','',1).isdigit() else v) for v in df[c].tolist()]}
         | 
| 373 | 
            -
                                               for c in df.columns[1:]] if _HAS_PANDAS else []
         | 
| 374 | 
            -
                                }
         | 
| 375 | 
            -
                                extracted_md_section = "### Extracted Figure Data\n\n"
         | 
| 376 | 
            -
                                extracted_md_section += "**JSON (canonical for RAG)**\n\n```json\n" + json.dumps(js_fallback, ensure_ascii=False, indent=2) + "\n```\n\n"
         | 
| 377 | 
            -
                                extracted_md_section += "**Table (Markdown)**\n\n" + md_table + "\n\n"
         | 
| 378 | 
            -
                                extracted_md_section += "**CSV**\n\n```csv\n" + csv_text.strip() + "\n```\n"
         | 
| 379 | 
            -
                            else:
         | 
| 380 | 
            -
                                # Nothing structured; keep a short diagnostic (plain text only)
         | 
| 381 | 
            -
                                extracted_md_section = "### Extracted Figure Data\n\n_No structured table/series detected. You may need to adjust the deep-parse prompt for this figure type._\n"
         | 
| 382 | 
            -
             | 
| 383 | 
            -
                    # ===== Merge into final Markdown =====
         | 
| 384 | 
            -
                    if extracted_md_section:
         | 
| 385 | 
            -
                        markdown_content = markdown_content.rstrip() + "\n\n---\n\n" + extracted_md_section
         | 
| 386 | 
            -
             | 
| 387 | 
            -
                    # For the “Markdown Source (or Eval Output)” tab
         | 
| 388 | 
            -
                    text_result = primary_text if primary_text else markdown_content
         | 
| 389 | 
            -
             | 
| 390 | 
            -
                    return result_image, markdown_content, text_result
         | 
| 391 |  | 
| 392 |  | 
| 393 | 
             
            # ===== Theme and UI =====
         | 
| @@ -407,77 +207,109 @@ custom_css = """ | |
| 407 |  | 
| 408 | 
             
            # ===== Interface =====
         | 
| 409 | 
             
            with gr.Blocks(
         | 
| 410 | 
            -
                title="DeepSeek-OCR by Jatevo LLM Inference",
         | 
| 411 | 
             
                theme=theme,
         | 
| 412 | 
             
                css=custom_css,
         | 
| 413 | 
             
            ) as demo:
         | 
| 414 | 
             
                gr.Markdown(
         | 
| 415 | 
             
                    """
         | 
| 416 | 
            -
                    # DeepSeek-OCR by Jatevo LLM Inference
         | 
| 417 | 
            -
                     | 
| 418 | 
            -
                     | 
| 419 | 
            -
             | 
|  | |
| 420 | 
             
                    **Model Sizes:**
         | 
| 421 | 
             
                    - **Tiny** — Fastest, lower accuracy (512×512)
         | 
| 422 | 
             
                    - **Small** — Fast, good accuracy (640×640)
         | 
| 423 | 
             
                    - **Base** — Balanced performance (1024×1024)
         | 
| 424 | 
             
                    - **Large** — Best accuracy, slower (1280×1280)
         | 
| 425 | 
             
                    - **Gundam (Recommended)** — Optimized for documents (1024 base, 640 image, crop mode)
         | 
|  | |
|  | |
| 426 | 
             
                    """
         | 
| 427 | 
             
                )
         | 
| 428 |  | 
| 429 | 
             
                with gr.Row():
         | 
| 430 | 
             
                    with gr.Column(scale=1):
         | 
| 431 | 
            -
                         | 
|  | |
|  | |
|  | |
|  | |
|  | |
| 432 | 
             
                        model_size = gr.Dropdown(
         | 
| 433 | 
             
                            choices=["Tiny", "Small", "Base", "Large", "Gundam (Recommended)"],
         | 
| 434 | 
             
                            value="Gundam (Recommended)",
         | 
| 435 | 
             
                            label="Model Size",
         | 
| 436 | 
             
                        )
         | 
|  | |
| 437 | 
             
                        task_type = gr.Dropdown(
         | 
| 438 | 
            -
                            choices=[ | 
|  | |
|  | |
|  | |
|  | |
|  | |
| 439 | 
             
                            value="📄 Convert to Markdown",
         | 
| 440 | 
             
                            label="Task Type",
         | 
| 441 | 
             
                        )
         | 
|  | |
| 442 | 
             
                        ref_text_input = gr.Textbox(
         | 
| 443 | 
             
                            label="Reference Text (for Locate task)",
         | 
| 444 | 
             
                            placeholder="e.g., 'the teacher', '20-10', 'a red car'...",
         | 
| 445 | 
             
                            visible=False,
         | 
| 446 | 
             
                        )
         | 
|  | |
| 447 | 
             
                        eval_mode_checkbox = gr.Checkbox(
         | 
| 448 | 
             
                            value=False,
         | 
| 449 | 
             
                            label="Enable Evaluation Mode",
         | 
| 450 | 
            -
                            info="Returns only plain text (faster). | 
| 451 | 
            -
                        )
         | 
| 452 | 
            -
                        deep_parse_checkbox = gr.Checkbox(
         | 
| 453 | 
            -
                            value=True,
         | 
| 454 | 
            -
                            label="Deep parse and extract figure data (JSON + table + CSV)",
         | 
| 455 | 
            -
                            info="Adds a second pass that extracts machine-readable data for RAG.",
         | 
| 456 | 
             
                        )
         | 
| 457 | 
            -
             | 
|  | |
| 458 |  | 
| 459 | 
             
                    with gr.Column(scale=2):
         | 
| 460 | 
            -
                         | 
| 461 | 
            -
             | 
| 462 | 
            -
             | 
| 463 | 
            -
                             | 
| 464 | 
            -
             | 
| 465 | 
            -
             | 
| 466 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 467 |  | 
|  | |
| 468 | 
             
                def toggle_ref_text_visibility(task):
         | 
| 469 | 
             
                    return gr.Textbox(visible=True) if task == "🔍 Locate Object by Reference" else gr.Textbox(visible=False)
         | 
| 470 |  | 
| 471 | 
            -
                task_type.change( | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 472 |  | 
| 473 | 
             
                submit_btn.click(
         | 
| 474 | 
            -
                    fn= | 
| 475 | 
            -
                    inputs=[ | 
| 476 | 
            -
                    outputs= | 
|  | |
|  | |
|  | |
|  | |
| 477 | 
             
                )
         | 
| 478 |  | 
| 479 |  | 
| 480 | 
             
            # ===== Launch =====
         | 
| 481 | 
             
            if __name__ == "__main__":
         | 
| 482 | 
             
                demo.queue(max_size=20)
         | 
| 483 | 
            -
                demo.launch()
         | 
|  | |
| 1 | 
             
            import os
         | 
| 2 | 
             
            os.environ.setdefault("HF_HUB_ENABLE_HF_TRANSFER", "0")  # disable hf_transfer if missing
         | 
| 3 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 4 | 
             
            import gradio as gr
         | 
| 5 | 
             
            import torch
         | 
| 6 | 
             
            from transformers import AutoModel, AutoTokenizer
         | 
| 7 | 
             
            import spaces
         | 
| 8 | 
            +
            import tempfile
         | 
| 9 | 
            +
            from PIL import Image
         | 
| 10 | 
            +
            import re
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 11 | 
             
            from gradio.themes import Soft
         | 
| 12 | 
             
            from gradio.themes.utils import fonts
         | 
| 13 | 
            +
            import fitz  # PyMuPDF for PDF processing
         | 
| 14 |  | 
| 15 | 
             
            # ===== Model Load =====
         | 
| 16 | 
             
            model_name = "deepseek-ai/DeepSeek-OCR"
         | 
|  | |
| 34 | 
             
                        pass
         | 
| 35 |  | 
| 36 |  | 
| 37 | 
            +
            def pdf_to_images(pdf_path, dpi=200):
         | 
| 38 | 
            +
                """
         | 
| 39 | 
            +
                Convert PDF pages to PIL Images using PyMuPDF
         | 
| 40 | 
            +
                Args:
         | 
| 41 | 
            +
                    pdf_path: Path to PDF file
         | 
| 42 | 
            +
                    dpi: Resolution for rendering (default 200)
         | 
| 43 | 
            +
                Returns:
         | 
| 44 | 
            +
                    List of PIL Image objects
         | 
| 45 | 
            +
                """
         | 
| 46 | 
            +
                images = []
         | 
| 47 | 
            +
                pdf_document = fitz.open(pdf_path)
         | 
| 48 | 
            +
                
         | 
| 49 | 
            +
                for page_num in range(len(pdf_document)):
         | 
| 50 | 
            +
                    page = pdf_document[page_num]
         | 
| 51 | 
            +
                    # Render page to pixmap with specified DPI
         | 
| 52 | 
            +
                    mat = fitz.Matrix(dpi / 72, dpi / 72)  # 72 is default DPI
         | 
| 53 | 
            +
                    pix = page.get_pixmap(matrix=mat)
         | 
| 54 | 
            +
                    
         | 
| 55 | 
            +
                    # Convert to PIL Image
         | 
| 56 | 
            +
                    img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
         | 
| 57 | 
            +
                    images.append(img)
         | 
| 58 | 
            +
                
         | 
| 59 | 
            +
                pdf_document.close()
         | 
| 60 | 
            +
                return images
         | 
| 61 | 
            +
             | 
| 62 | 
            +
             | 
| 63 | 
            +
            def process_single_page(image, model_runtime, tokenizer, model_size, task_type, ref_text, is_eval_mode, output_path):
         | 
| 64 | 
             
                """
         | 
| 65 | 
            +
                Process a single page/image with DeepSeek-OCR
         | 
| 66 | 
            +
                Returns markdown content
         | 
|  | |
| 67 | 
             
                """
         | 
| 68 | 
            +
                # ===== choose task prompt =====
         | 
| 69 | 
            +
                if task_type == "📝 Free OCR":
         | 
| 70 | 
            +
                    prompt = "<image>\nFree OCR."
         | 
| 71 | 
            +
                elif task_type == "📄 Convert to Markdown":
         | 
| 72 | 
            +
                    prompt = "<image>\n<|grounding|>Convert the document to markdown."
         | 
| 73 | 
            +
                elif task_type == "📈 Parse Figure":
         | 
| 74 | 
            +
                    prompt = "<image>\nParse the figure."
         | 
| 75 | 
            +
                elif task_type == "🔍 Locate Object by Reference":
         | 
| 76 | 
            +
                    if not ref_text or ref_text.strip() == "":
         | 
| 77 | 
            +
                        raise gr.Error("Please provide reference text for the Locate task!")
         | 
| 78 | 
            +
                    prompt = f"<image>\nLocate <|ref|>{ref_text.strip()}<|/ref|> in the image."
         | 
| 79 | 
             
                else:
         | 
| 80 | 
            +
                    prompt = "<image>\nFree OCR."
         | 
| 81 | 
            +
             | 
| 82 | 
            +
                # save image temporarily
         | 
| 83 | 
            +
                temp_image_path = os.path.join(output_path, "temp_image.jpg")
         | 
| 84 | 
            +
                image.save(temp_image_path)
         | 
| 85 | 
            +
             | 
| 86 | 
            +
                # ===== size config =====
         | 
| 87 | 
            +
                size_configs = {
         | 
| 88 | 
            +
                    "Tiny": {"base_size": 512, "image_size": 512, "crop_mode": False},
         | 
| 89 | 
            +
                    "Small": {"base_size": 640, "image_size": 640, "crop_mode": False},
         | 
| 90 | 
            +
                    "Base": {"base_size": 1024, "image_size": 1024, "crop_mode": False},
         | 
| 91 | 
            +
                    "Large": {"base_size": 1280, "image_size": 1280, "crop_mode": False},
         | 
| 92 | 
            +
                    "Gundam (Recommended)": {"base_size": 1024, "image_size": 640, "crop_mode": True},
         | 
| 93 | 
            +
                }
         | 
| 94 | 
            +
                config = size_configs.get(model_size, size_configs["Gundam (Recommended)"])
         | 
| 95 | 
            +
             | 
| 96 | 
            +
                # ===== inference =====
         | 
| 97 | 
            +
                with torch.no_grad():
         | 
| 98 | 
            +
                    plain_text_result = model_runtime.infer(
         | 
| 99 | 
            +
                        tokenizer,
         | 
| 100 | 
            +
                        prompt=prompt,
         | 
| 101 | 
            +
                        image_file=temp_image_path,
         | 
| 102 | 
            +
                        output_path=output_path,
         | 
| 103 | 
            +
                        base_size=config["base_size"],
         | 
| 104 | 
            +
                        image_size=config["image_size"],
         | 
| 105 | 
            +
                        crop_mode=config["crop_mode"],
         | 
| 106 | 
            +
                        save_results=True,
         | 
| 107 | 
            +
                        test_compress=True,
         | 
| 108 | 
            +
                        eval_mode=is_eval_mode,
         | 
| 109 | 
            +
                    )
         | 
| 110 | 
            +
             | 
| 111 | 
            +
                # ===== collect markdown result =====
         | 
| 112 | 
            +
                markdown_result_path = os.path.join(output_path, "result.mmd")
         | 
| 113 | 
            +
                markdown_content = ""
         | 
| 114 | 
            +
                
         | 
| 115 | 
            +
                if os.path.exists(markdown_result_path):
         | 
| 116 | 
            +
                    try:
         | 
| 117 | 
            +
                        with open(markdown_result_path, "r", encoding="utf-8") as f:
         | 
| 118 | 
            +
                            markdown_content = f.read()
         | 
| 119 | 
            +
                    except Exception:
         | 
| 120 | 
            +
                        pass
         | 
| 121 | 
            +
                
         | 
| 122 | 
            +
                # If no markdown file, use plain text result
         | 
| 123 | 
            +
                if not markdown_content and plain_text_result:
         | 
| 124 | 
            +
                    markdown_content = plain_text_result
         | 
| 125 | 
            +
                
         | 
| 126 | 
            +
                return markdown_content
         | 
| 127 |  | 
| 128 |  | 
| 129 | 
            +
            # ===== Main Processing Function =====
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 130 | 
             
            @spaces.GPU
         | 
| 131 | 
            +
            def process_pdf(pdf_file, model_size, task_type, ref_text, is_eval_mode, progress=gr.Progress()):
         | 
| 132 | 
             
                """
         | 
| 133 | 
            +
                Process PDF with DeepSeek-OCR and return combined markdown from all pages.
         | 
|  | |
|  | |
| 134 | 
             
                """
         | 
| 135 | 
            +
                if pdf_file is None:
         | 
| 136 | 
            +
                    return "Please upload a PDF file first."
         | 
| 137 |  | 
| 138 | 
            +
                # handle CPU/GPU
         | 
| 139 | 
             
                if torch.cuda.is_available():
         | 
| 140 | 
             
                    model_runtime = model.to("cuda", dtype=torch.bfloat16)
         | 
| 141 | 
             
                else:
         | 
| 142 | 
             
                    model_runtime = model.to("cpu", dtype=torch.float32)
         | 
| 143 |  | 
| 144 | 
            +
                try:
         | 
| 145 | 
            +
                    # Convert PDF to images
         | 
| 146 | 
            +
                    progress(0, desc="Converting PDF to images...")
         | 
| 147 | 
            +
                    images = pdf_to_images(pdf_file.name)
         | 
| 148 | 
            +
                    total_pages = len(images)
         | 
| 149 | 
            +
                    
         | 
| 150 | 
            +
                    if total_pages == 0:
         | 
| 151 | 
            +
                        return "No pages found in the PDF."
         | 
| 152 | 
            +
                    
         | 
| 153 | 
            +
                    progress(0.1, desc=f"Found {total_pages} pages. Starting OCR...")
         | 
| 154 | 
            +
                    
         | 
| 155 | 
            +
                    # Process each page
         | 
| 156 | 
            +
                    all_markdown_results = []
         | 
| 157 | 
            +
                    
         | 
| 158 | 
            +
                    with tempfile.TemporaryDirectory() as output_path:
         | 
| 159 | 
            +
                        for page_num, image in enumerate(images, start=1):
         | 
| 160 | 
            +
                            progress(
         | 
| 161 | 
            +
                                (page_num / total_pages) * 0.9 + 0.1,
         | 
| 162 | 
            +
                                desc=f"Processing page {page_num}/{total_pages}..."
         | 
| 163 | 
            +
                            )
         | 
| 164 | 
            +
                            
         | 
| 165 | 
            +
                            markdown_content = process_single_page(
         | 
| 166 | 
            +
                                image,
         | 
| 167 | 
            +
                                model_runtime,
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 168 | 
             
                                tokenizer,
         | 
| 169 | 
            +
                                model_size,
         | 
| 170 | 
            +
                                task_type,
         | 
| 171 | 
            +
                                ref_text,
         | 
| 172 | 
            +
                                is_eval_mode,
         | 
| 173 | 
            +
                                output_path
         | 
| 174 | 
            +
                            )
         | 
| 175 | 
            +
                            
         | 
| 176 | 
            +
                            # Add page separator
         | 
| 177 | 
            +
                            page_header = f"\n\n---\n\n# Page {page_num}\n\n"
         | 
| 178 | 
            +
                            all_markdown_results.append(page_header + markdown_content)
         | 
| 179 | 
            +
                    
         | 
| 180 | 
            +
                    # Combine all results
         | 
| 181 | 
            +
                    progress(1.0, desc="Finalizing...")
         | 
| 182 | 
            +
                    combined_markdown = "\n\n".join(all_markdown_results)
         | 
| 183 | 
            +
                    
         | 
| 184 | 
            +
                    # Add document header
         | 
| 185 | 
            +
                    final_output = f"# Document OCR Results\n\n**Total Pages:** {total_pages}\n\n{combined_markdown}"
         | 
| 186 | 
            +
                    
         | 
| 187 | 
            +
                    return final_output
         | 
| 188 | 
            +
                
         | 
| 189 | 
            +
                except Exception as e:
         | 
| 190 | 
            +
                    return f"Error processing PDF: {str(e)}"
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 191 |  | 
| 192 |  | 
| 193 | 
             
            # ===== Theme and UI =====
         | 
|  | |
| 207 |  | 
| 208 | 
             
            # ===== Interface =====
         | 
| 209 | 
             
            with gr.Blocks(
         | 
| 210 | 
            +
                title="DeepSeek-OCR PDF Parser by Jatevo LLM Inference",
         | 
| 211 | 
             
                theme=theme,
         | 
| 212 | 
             
                css=custom_css,
         | 
| 213 | 
             
            ) as demo:
         | 
| 214 | 
             
                gr.Markdown(
         | 
| 215 | 
             
                    """
         | 
| 216 | 
            +
                    # 📄 DeepSeek-OCR PDF Parser by Jatevo LLM Inference
         | 
| 217 | 
            +
                    
         | 
| 218 | 
            +
                    Upload a PDF to extract text and convert to Markdown using **DeepSeek-OCR**.  
         | 
| 219 | 
            +
                    Each page is processed sequentially and combined into a single markdown document.
         | 
| 220 | 
            +
                    
         | 
| 221 | 
             
                    **Model Sizes:**
         | 
| 222 | 
             
                    - **Tiny** — Fastest, lower accuracy (512×512)
         | 
| 223 | 
             
                    - **Small** — Fast, good accuracy (640×640)
         | 
| 224 | 
             
                    - **Base** — Balanced performance (1024×1024)
         | 
| 225 | 
             
                    - **Large** — Best accuracy, slower (1280×1280)
         | 
| 226 | 
             
                    - **Gundam (Recommended)** — Optimized for documents (1024 base, 640 image, crop mode)
         | 
| 227 | 
            +
                    
         | 
| 228 | 
            +
                    **Note:** Processing time depends on the number of pages and model size.
         | 
| 229 | 
             
                    """
         | 
| 230 | 
             
                )
         | 
| 231 |  | 
| 232 | 
             
                with gr.Row():
         | 
| 233 | 
             
                    with gr.Column(scale=1):
         | 
| 234 | 
            +
                        pdf_input = gr.File(
         | 
| 235 | 
            +
                            label="Upload PDF",
         | 
| 236 | 
            +
                            file_types=[".pdf"],
         | 
| 237 | 
            +
                            type="filepath"
         | 
| 238 | 
            +
                        )
         | 
| 239 | 
            +
             | 
| 240 | 
             
                        model_size = gr.Dropdown(
         | 
| 241 | 
             
                            choices=["Tiny", "Small", "Base", "Large", "Gundam (Recommended)"],
         | 
| 242 | 
             
                            value="Gundam (Recommended)",
         | 
| 243 | 
             
                            label="Model Size",
         | 
| 244 | 
             
                        )
         | 
| 245 | 
            +
             | 
| 246 | 
             
                        task_type = gr.Dropdown(
         | 
| 247 | 
            +
                            choices=[
         | 
| 248 | 
            +
                                "📝 Free OCR",
         | 
| 249 | 
            +
                                "📄 Convert to Markdown",
         | 
| 250 | 
            +
                                "📈 Parse Figure",
         | 
| 251 | 
            +
                                "🔍 Locate Object by Reference",
         | 
| 252 | 
            +
                            ],
         | 
| 253 | 
             
                            value="📄 Convert to Markdown",
         | 
| 254 | 
             
                            label="Task Type",
         | 
| 255 | 
             
                        )
         | 
| 256 | 
            +
             | 
| 257 | 
             
                        ref_text_input = gr.Textbox(
         | 
| 258 | 
             
                            label="Reference Text (for Locate task)",
         | 
| 259 | 
             
                            placeholder="e.g., 'the teacher', '20-10', 'a red car'...",
         | 
| 260 | 
             
                            visible=False,
         | 
| 261 | 
             
                        )
         | 
| 262 | 
            +
             | 
| 263 | 
             
                        eval_mode_checkbox = gr.Checkbox(
         | 
| 264 | 
             
                            value=False,
         | 
| 265 | 
             
                            label="Enable Evaluation Mode",
         | 
| 266 | 
            +
                            info="Returns only plain text (faster).",
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
| 267 | 
             
                        )
         | 
| 268 | 
            +
             | 
| 269 | 
            +
                        submit_btn = gr.Button("🚀 Process PDF", variant="primary", size="lg")
         | 
| 270 |  | 
| 271 | 
             
                    with gr.Column(scale=2):
         | 
| 272 | 
            +
                        gr.Markdown("### 📝 Markdown Output")
         | 
| 273 | 
            +
                        output_markdown_preview = gr.Markdown(
         | 
| 274 | 
            +
                            label="Rendered Markdown",
         | 
| 275 | 
            +
                            value="*Upload a PDF and click 'Process PDF' to see results here.*"
         | 
| 276 | 
            +
                        )
         | 
| 277 | 
            +
                        
         | 
| 278 | 
            +
                        gr.Markdown("### 📄 Markdown Source (Copy/Download)")
         | 
| 279 | 
            +
                        output_text = gr.Textbox(
         | 
| 280 | 
            +
                            label="Raw Markdown",
         | 
| 281 | 
            +
                            lines=25,
         | 
| 282 | 
            +
                            show_copy_button=True,
         | 
| 283 | 
            +
                            interactive=False,
         | 
| 284 | 
            +
                            placeholder="Markdown source will appear here..."
         | 
| 285 | 
            +
                        )
         | 
| 286 |  | 
| 287 | 
            +
                # show/hide reference text box based on selected task
         | 
| 288 | 
             
                def toggle_ref_text_visibility(task):
         | 
| 289 | 
             
                    return gr.Textbox(visible=True) if task == "🔍 Locate Object by Reference" else gr.Textbox(visible=False)
         | 
| 290 |  | 
| 291 | 
            +
                task_type.change(
         | 
| 292 | 
            +
                    fn=toggle_ref_text_visibility,
         | 
| 293 | 
            +
                    inputs=task_type,
         | 
| 294 | 
            +
                    outputs=ref_text_input,
         | 
| 295 | 
            +
                )
         | 
| 296 | 
            +
             | 
| 297 | 
            +
                def update_outputs(markdown_text):
         | 
| 298 | 
            +
                    """Update both markdown preview and raw text"""
         | 
| 299 | 
            +
                    return markdown_text, markdown_text
         | 
| 300 |  | 
| 301 | 
             
                submit_btn.click(
         | 
| 302 | 
            +
                    fn=process_pdf,
         | 
| 303 | 
            +
                    inputs=[pdf_input, model_size, task_type, ref_text_input, eval_mode_checkbox],
         | 
| 304 | 
            +
                    outputs=output_text,
         | 
| 305 | 
            +
                ).then(
         | 
| 306 | 
            +
                    fn=update_outputs,
         | 
| 307 | 
            +
                    inputs=output_text,
         | 
| 308 | 
            +
                    outputs=[output_markdown_preview, output_text]
         | 
| 309 | 
             
                )
         | 
| 310 |  | 
| 311 |  | 
| 312 | 
             
            # ===== Launch =====
         | 
| 313 | 
             
            if __name__ == "__main__":
         | 
| 314 | 
             
                demo.queue(max_size=20)
         | 
| 315 | 
            +
                demo.launch()
         | 
