Spaces:

lucacadalora
/

jatevo

Running on Zero

App Files Files Community

lucacadalora commited on 14 days ago

Commit

d7ea6d3

verified ·

1 Parent(s): d71bc6d

Update app.py

Browse files

Files changed (1) hide show

app.py +182 -66

app.py CHANGED Viewed

@@ -5,6 +5,7 @@ import io
 import base64
 import re
 import tempfile
 import gradio as gr
 import torch
@@ -76,32 +77,28 @@ def _img_to_data_uri(pil_img, fmt="PNG"):
     pil_img.save(buf, format=fmt)
     return f"data:image/{fmt.lower()};base64,{base64.b64encode(buf.getvalue()).decode()}"
-def _df_to_chart_data_uri(df):
     """Render a simple chart from a DataFrame using matplotlib (single figure, no explicit colors)."""
     if not _HAS_MPL or not _HAS_PANDAS:
         return None
-    # Heuristics: first column is x-axis if non-numeric or unique labels; otherwise use index
     try:
         plt.figure()  # one plot per chart (no subplots; no explicit colors)
-        if df.shape[1] >= 2:
-            # Assume first column is x if not numeric or contains non-monotonic categories
-            x = df.columns[0]
-            maybe_x = df[x]
-            numeric_x = pd.to_numeric(maybe_x, errors="coerce")
-            if numeric_x.isna().any() or maybe_x.nunique() < len(maybe_x):
-                # categorical-ish: set as index
-                df_plot = df.set_index(x)
-            else:
-                # numeric x: also set as index so matplotlib draws a line plot
-                df_plot = df.set_index(x)
-            # If there are many series, default to line; if few, bar
-            if df_plot.shape[1] <= 5:
-                df_plot.plot(kind="bar")
-            else:
-                df_plot.plot()
         else:
-            # Single series: plot it
-            df.plot()
         buf = io.BytesIO()
         plt.tight_layout()
         plt.savefig(buf, format="PNG", dpi=160)
@@ -110,7 +107,7 @@ def _df_to_chart_data_uri(df):
     except Exception:
         return None
-def _html_table_to_df(html):
     """Pick the largest <table> from HTML; return as DataFrame or None."""
     if not _HAS_READ_HTML:
         return None
@@ -118,16 +115,91 @@ def _html_table_to_df(html):
         tables = pd.read_html(html)  # list[DataFrame]
         if not tables:
             return None
-        # choose the table with max cells
         return max(tables, key=lambda t: (t.shape[0] * t.shape[1]))
     except Exception:
         return None
 _SMILES_REGEX = re.compile(r"(?:SMILES|Smiles)\s*[:：]\s*([A-Za-z0-9@\[\]\(\)\+\-\=\\\/%]+)")
-def _render_smiles_block(text):
     """Find SMILES in text, render with RDKit, return list[(title, data_uri)]."""
-    assets = []
     if not _HAS_RDKIT:
         return assets
     try:
@@ -142,16 +214,52 @@ def _render_smiles_block(text):
         pass
     return assets
 # ===== Inference Function =====
 @spaces.GPU
 def process_image(image, model_size, task_type, ref_text, is_eval_mode, deep_parse=True):
     """
-    Process image with DeepSeek-OCR and return annotated image, markdown, and text.
-    Adds deep parsing for figures to render charts (from HTML tables) and chemistry (from SMILES).
     """
     if image is None:
-        return None, "Please upload an image first.", "Please upload an image first."
     # handle CPU/GPU
     if torch.cuda.is_available():
@@ -241,13 +349,10 @@ def process_image(image, model_size, task_type, ref_text, is_eval_mode, deep_par
             result_image = image_with_bboxes
         # ===== DEEP PARSING & RENDERING (secondary pass) =====
-        # Enable when:
-        # - Task is Convert to Markdown (common flow), or
-        # - Task is Parse Figure (primary content already requested), or
-        # - User checked deep_parse (default True)
         should_run_deep = deep_parse and task_type in {"📄 Convert to Markdown", "📈 Parse Figure"}
-        deep_assets = []  # list of tuples (title, data_uri or None), appended to markdown
         def _run_deep_parse(prompt_text):
             with torch.no_grad():
@@ -261,51 +366,60 @@ def process_image(image, model_size, task_type, ref_text, is_eval_mode, deep_par
                     crop_mode=config["crop_mode"],
                     save_results=False,
                     test_compress=True,
-                    eval_mode=True,  # text is enough
                 )
-        parsed_text = ""
         if should_run_deep:
             try:
                 parsed_text = _run_deep_parse("<image>\nParse the figure.") or ""
             except Exception:
                 parsed_text = ""
-            # 1) If there is an HTML table → build a chart
             if "<table" in parsed_text.lower() and _HAS_PANDAS:
-                try:
-                    df = _html_table_to_df(parsed_text)
-                    if df is not None:
-                        chart_uri = _df_to_chart_data_uri(df)
-                        if chart_uri:
-                            deep_assets.append(("Figure (re-rendered from parsed table)", chart_uri))
-                        # Add raw HTML table for copy/paste even if chart failed
-                        deep_assets.append(("Parsed table (HTML)", None))
-                except Exception:
-                    pass
-            # 2) If there are SMILES → render molecules (if RDKit available)
-            smiles_assets = _render_smiles_block(parsed_text)
-            deep_assets.extend(smiles_assets)
-        # ===== Append deep assets into the Markdown so they show in the preview =====
-        if deep_assets:
-            md_parts = [markdown_content, "\n\n---\n\n### Parsed Figures (auto-rendered)\n"]
-            for title, data_uri in deep_assets:
-                if data_uri:
-                    md_parts.append(f"**{title}**\n\n![]({data_uri})\n")
-                else:
-                    # raw parsed output (e.g., table HTML) fenced for readability
-                    # Keep size sensible; truncate huge content
-                    snippet = parsed_text.strip()
-                    if len(snippet) > 6000:
-                        snippet = snippet[:6000] + "\n<!-- truncated -->"
-                    md_parts.append(f"**{title}**\n\n```html\n{snippet}\n```\n")
-            markdown_content = "\n".join(md_parts)
         # ===== Decide what to show in the "Markdown Source (or Eval Output)" tab =====
         text_result = plain_text_result if plain_text_result else markdown_content
-        return result_image, markdown_content, text_result
 # ===== Theme and UI =====
@@ -393,6 +507,8 @@ with gr.Blocks(
                     output_image = gr.Image(interactive=False)
                 with gr.TabItem("Markdown Preview"):
                     output_markdown = gr.Markdown()
                 with gr.TabItem("Markdown Source (or Eval Output)"):
                     output_text = gr.Textbox(
                         lines=20, show_copy_button=True, interactive=False
@@ -411,7 +527,7 @@ with gr.Blocks(
     submit_btn.click(
         fn=process_image,
         inputs=[image_input, model_size, task_type, ref_text_input, eval_mode_checkbox, deep_parse_checkbox],
-        outputs=[output_image, output_markdown, output_text],
     )

 import base64
 import re
 import tempfile
+from typing import List, Tuple, Optional
 import gradio as gr
 import torch
     pil_img.save(buf, format=fmt)
     return f"data:image/{fmt.lower()};base64,{base64.b64encode(buf.getvalue()).decode()}"
+def _df_to_chart_data_uri(df: "pd.DataFrame") -> Optional[str]:
     """Render a simple chart from a DataFrame using matplotlib (single figure, no explicit colors)."""
     if not _HAS_MPL or not _HAS_PANDAS:
         return None
     try:
+        # basic heuristics: first col is x-axis if non-numeric-ish
         plt.figure()  # one plot per chart (no subplots; no explicit colors)
+        df_plot = df.copy()
+        # If first column is non-numeric, set it as index
+        if df_plot.shape[1] >= 2:
+            xcol = df_plot.columns[0]
+            numeric_x = pd.to_numeric(df_plot[xcol], errors="coerce")
+            if numeric_x.isna().any():
+                df_plot = df_plot.set_index(xcol)
+        # bar for <=5 series, else line
+        if df_plot.shape[1] <= 5:
+            df_plot.plot(kind="bar")
         else:
+            df_plot.plot()
         buf = io.BytesIO()
         plt.tight_layout()
         plt.savefig(buf, format="PNG", dpi=160)
     except Exception:
         return None
+def _html_table_to_df(html: str) -> Optional["pd.DataFrame"]:
     """Pick the largest <table> from HTML; return as DataFrame or None."""
     if not _HAS_READ_HTML:
         return None
         tables = pd.read_html(html)  # list[DataFrame]
         if not tables:
             return None
         return max(tables, key=lambda t: (t.shape[0] * t.shape[1]))
     except Exception:
         return None
+# --- Fallbacks when we don't get HTML tables ---
+_MD_TABLE_BLOCK_RE = re.compile(
+    r"(?:^\s*\|.+\|\s*$\n^\s*\|(?:\s*:?-+:?\s*\|)+\s*$\n(?:^\s*\|.+\|\s*$\n?)+)",
+    flags=re.MULTILINE
+)
+def _md_table_to_df(md_text: str) -> Optional["pd.DataFrame"]:
+    """Parse the first Markdown pipe-table into a DataFrame."""
+    if not _HAS_PANDAS:
+        return None
+    try:
+        m = _MD_TABLE_BLOCK_RE.search(md_text or "")
+        if not m:
+            return None
+        block = m.group(0).strip()
+        # Normalize: remove alignment row, split by pipes
+        lines = [ln.strip() for ln in block.splitlines() if ln.strip()]
+        if len(lines) < 2:
+            return None
+        header = [h.strip() for h in lines[0].strip("|").split("|")]
+        align_or_sep = lines[1]
+        data_lines = lines[2:] if re.search(r":?-+:?", align_or_sep) else lines[1:]
+        rows = []
+        for ln in data_lines:
+            parts = [p.strip() for p in ln.strip("|").split("|")]
+            if len(parts) == len(header):
+                rows.append(parts)
+        if not rows:
+            return None
+        df = pd.DataFrame(rows, columns=header)
+        # try cast numerics where possible
+        for c in df.columns:
+            df[c] = pd.to_numeric(df[c], errors="ignore")
+        return df
+    except Exception:
+        return None
+def _numeric_block_to_df(text: str) -> Optional["pd.DataFrame"]:
+    """Very rough fallback: parse whitespace/csv-ish numeric blocks into a DataFrame."""
+    if not _HAS_PANDAS:
+        return None
+    # grab the largest numeric-ish block: lines containing numbers and separators
+    blocks = []
+    cur = []
+    for ln in (text or "").splitlines():
+        if re.search(r"\d", ln) and ("," in ln or "\t" in ln or "  " in ln or "|" in ln):
+            cur.append(ln)
+        else:
+            if cur:
+                blocks.append("\n".join(cur))
+                cur = []
+    if cur:
+        blocks.append("\n".join(cur))
+    if not blocks:
+        return None
+    block = max(blocks, key=len)
+    # try CSV first
+    try:
+        from io import StringIO
+        df = pd.read_csv(StringIO(block))
+        if df.shape[1] >= 2:
+            return df
+    except Exception:
+        pass
+    # try whitespace sep
+    try:
+        from io import StringIO
+        df = pd.read_csv(StringIO(block), sep=r"\s+", engine="python")
+        if df.shape[1] >= 2:
+            return df
+    except Exception:
+        pass
+    return None
 _SMILES_REGEX = re.compile(r"(?:SMILES|Smiles)\s*[:：]\s*([A-Za-z0-9@\[\]\(\)\+\-\=\\\/%]+)")
+def _render_smiles_block(text: str) -> List[Tuple[str, str]]:
     """Find SMILES in text, render with RDKit, return list[(title, data_uri)]."""
+    assets: List[Tuple[str, str]] = []
     if not _HAS_RDKIT:
         return assets
     try:
         pass
     return assets
+def _assets_to_markdown_section(assets: List[Tuple[str, str]], parsed_text: str) -> str:
+    out = ["\n\n---\n\n### Parsed Figures (auto-rendered)\n"]
+    if not assets and not parsed_text.strip():
+        out.append("_No deep-parsed content detected._\n")
+        return "".join(out)
+    for title, data_uri in assets:
+        if data_uri:
+            out.append(f"**{title}**\n\n![]({data_uri})\n\n")
+    # Always expose a short snippet so you can see what the model returned
+    snippet = parsed_text.strip()
+    if len(snippet) > 4000:
+        snippet = snippet[:4000] + "\n<!-- truncated -->"
+    if snippet:
+        out.append("**Raw deep-parse output (snippet)**\n\n```text\n")
+        out.append(snippet)
+        out.append("\n```\n")
+    return "".join(out)
+def _assets_to_html_section(assets: List[Tuple[str, str]], parsed_text: str) -> str:
+    """Simple HTML block for the HTML Preview tab; ensures images render even if Markdown sanitizer blocks data URIs."""
+    parts = [
+        '<section class="parsed-figures"><h3>Parsed Figures (auto-rendered)</h3>'
+    ]
+    if not assets and not parsed_text.strip():
+        parts.append("<p><em>No deep-parsed content detected.</em></p></section>")
+        return "".join(parts)
+    for title, data_uri in assets:
+        if data_uri:
+            parts.append(f'<figure><figcaption><strong>{title}</strong></figcaption><img style="max-width:100%;height:auto" src="{data_uri}"/></figure>')
+    if parsed_text.strip():
+        safe = parsed_text.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
+        parts.append(f"<details><summary>Raw deep-parse output (snippet)</summary><pre>{safe[:8000]}</pre></details>")
+    parts.append("</section>")
+    return "".join(parts)
 # ===== Inference Function =====
 @spaces.GPU
 def process_image(image, model_size, task_type, ref_text, is_eval_mode, deep_parse=True):
     """
+    Process image with DeepSeek-OCR and return annotated image, markdown, html, and text.
+    Adds deep parsing for figures to render charts (from tables) and chemistry (from SMILES).
     """
     if image is None:
+        return None, "Please upload an image first.", "<p>Please upload an image first.</p>", "Please upload an image first."
     # handle CPU/GPU
     if torch.cuda.is_available():
             result_image = image_with_bboxes
         # ===== DEEP PARSING & RENDERING (secondary pass) =====
         should_run_deep = deep_parse and task_type in {"📄 Convert to Markdown", "📈 Parse Figure"}
+        deep_assets: List[Tuple[str, str]] = []  # (title, data_uri)
+        parsed_text = ""
         def _run_deep_parse(prompt_text):
             with torch.no_grad():
                     crop_mode=config["crop_mode"],
                     save_results=False,
                     test_compress=True,
+                    eval_mode=True,
                 )
         if should_run_deep:
             try:
                 parsed_text = _run_deep_parse("<image>\nParse the figure.") or ""
             except Exception:
                 parsed_text = ""
+            # 1) Charts/tables:
+            df = None
             if "<table" in parsed_text.lower() and _HAS_PANDAS:
+                df = _html_table_to_df(parsed_text)
+            if df is None:  # fallback: markdown pipe-table
+                df = _md_table_to_df(parsed_text)
+            if df is None:  # fallback: generic numeric block
+                df = _numeric_block_to_df(parsed_text)
+            if df is not None:
+                chart_uri = _df_to_chart_data_uri(df)
+                if chart_uri:
+                    deep_assets.append(("Figure (re-rendered from parsed data)", chart_uri))
+            # 2) Chemistry (SMILES)
+            deep_assets.extend(_render_smiles_block(parsed_text))
+        # ===== Append deep assets into the Markdown + build HTML preview =====
+        html_preview = ""  # for HTML tab
+        if task_type == "📄 Convert to Markdown":
+            # extend markdown with a diagnostic/asset section regardless of success,
+            # so you can see whether deep-parse attempted
+            markdown_content = markdown_content + _assets_to_markdown_section(deep_assets, parsed_text)
+            html_preview = _assets_to_html_section(deep_assets, parsed_text)
+        elif task_type == "📈 Parse Figure":
+            # just show what we got from deep parse
+            header = "# Parse Figure\n\n"
+            body = _assets_to_markdown_section(deep_assets, parsed_text)
+            markdown_content = header + body
+            html_preview = _assets_to_html_section(deep_assets, parsed_text)
+        else:
+            # other tasks: keep as-is, but still provide an HTML tab with any assets
+            if deep_assets or parsed_text.strip():
+                markdown_content = markdown_content + _assets_to_markdown_section(deep_assets, parsed_text)
+                html_preview = _assets_to_html_section(deep_assets, parsed_text)
+            else:
+                html_preview = "<p>No parsed-figure content.</p>"
         # ===== Decide what to show in the "Markdown Source (or Eval Output)" tab =====
         text_result = plain_text_result if plain_text_result else markdown_content
+        # return (image, markdown, html, text)
+        return result_image, markdown_content, html_preview, text_result
 # ===== Theme and UI =====
                     output_image = gr.Image(interactive=False)
                 with gr.TabItem("Markdown Preview"):
                     output_markdown = gr.Markdown()
+                with gr.TabItem("Rendered HTML (figures)"):
+                    output_html = gr.HTML()
                 with gr.TabItem("Markdown Source (or Eval Output)"):
                     output_text = gr.Textbox(
                         lines=20, show_copy_button=True, interactive=False
     submit_btn.click(
         fn=process_image,
         inputs=[image_input, model_size, task_type, ref_text_input, eval_mode_checkbox, deep_parse_checkbox],
+        outputs=[output_image, output_markdown, output_html, output_text],
     )