File size: 4,544 Bytes
4c591ce
 
 
 
 
 
 
0f1f0f4
4c591ce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
874a8bc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import os
import shutil
import time
import uuid
from pathlib import Path
import gradio as gr

PERSIST_ROOT = Path(os.getenv("PERSIST_ROOT", "./data"))
OUT_DIR = PERSIST_ROOT / "outputs"
OUT_DIR.mkdir(parents=True, exist_ok=True)

from commonforms import prepare_form  # API documented in the repo README

def run_commonforms(
    input_pdf_path: str,
    model_name: str,
    keep_existing_fields: bool,
    use_signature_fields: bool,
    image_size: int,
    confidence: float,
) -> tuple[str, str]:
    """
    Runs CommonForms and returns (download_path, status_message).
    """
    work_id = f"{int(time.time())}-{uuid.uuid4().hex[:8]}"
    work_dir = Path("/tmp") / f"cf-{work_id}"
    work_dir.mkdir(parents=True, exist_ok=True)

    src = Path(input_pdf_path)
    in_path = work_dir / f"input{src.suffix or '.pdf'}"
    shutil.copy2(src, in_path)

    out_name = f"fillable-{src.stem}-{work_id}.pdf"
    out_tmp = work_dir / out_name
    out_final = OUT_DIR / out_name

    try:
        import torch  # optional, only for device detect
        device = "cuda" if torch.cuda.is_available() else "cpu"
    except Exception:
        device = "cpu"

    # Call CommonForms (kwargs mirror the CLI flags in README)
    prepare_form(
        in_path,
        out_tmp,
        model_or_path=model_name,                       # "FFDNet-L" or "FFDNet-S" or path to .pt
        keep_existing_fields=keep_existing_fields,
        use_signature_fields=use_signature_fields,
        device=device,                           # "cpu" | "cuda" | "0"
        image_size=image_size,                   # e.g. 1600
        confidence=confidence,                   # e.g. 0.3
    )

    shutil.move(out_tmp, out_final)
    return str(out_final), f"βœ… Generated {out_name} on {device}"

def _process(pdf, model_name, keep_existing_fields, use_signature_fields, image_size, confidence ):
    if pdf is None:
        return None, None, "Please upload a PDF."
    in_path = pdf if isinstance(pdf, str) else getattr(pdf, "name", None)
    if not in_path or not os.path.exists(in_path):
        return None, None, "Could not locate uploaded file."
    try:
        path, msg = run_commonforms(
            in_path,
            model_name,
            keep_existing_fields,
            use_signature_fields,
            image_size,
            confidence,
        )
        # DownloadButton expects a filepath; File can mirror the same path.
        return path, path, msg
    except Exception as e:
        return None, None, f"❌ Error: {e}"

with gr.Blocks(title="CommonForms β€” PDF β†’ Fillable PDF") as demo:
    gr.Markdown(
        """
        # CommonForms β€” PDF β†’ Fillable PDF
        Upload a PDF, select options, and generate a fillable PDF.

        [πŸ“„ CommonForms Paper](https://arxiv.org/abs/2509.16506) | [πŸ€— Dataset](https://huggingface.co/datasets/jbarrow/CommonForms) | [πŸ€— FFDNet-L](https://huggingface.co/jbarrow/FFDNet-L) | [πŸ€— FFDNet-S](https://huggingface.co/jbarrow/FFDNet-S) | [πŸ’Ύ Code](https://github.com/jbarrow/commonforms)
        """
    )

    with gr.Row():
        with gr.Column(scale=2):
            pdf_in = gr.File(label="Upload PDF", file_types=[".pdf"], type="filepath")

            with gr.Row():
                model_name = gr.Dropdown(
                    label="Detector model",
                    choices=["FFDNet-L", "FFDNet-S"],
                    value="FFDNet-L",
                )

            with gr.Row():
                keep_existing_fields = gr.Checkbox(label="Keep existing fields", value=False)
                use_signature_fields = gr.Checkbox(label="Use signature widgets for signatures", value=True)

            with gr.Row():
                image_size = gr.Slider(800, 2048, value=1600, step=32, label="Image size")
                confidence = gr.Slider(0.05, 0.9, value=0.30, step=0.05, label="Confidence threshold")

            run_btn = gr.Button("Run CommonForms", variant="primary")
            status = gr.Textbox(label="Status", interactive=False)

        with gr.Column(scale=1):
            download_btn = gr.DownloadButton(label="Download Fillable PDF", value=None)
            file_out = gr.File(label="Output (preview / alt download)")

    run_btn.click(
        _process,
        inputs=[pdf_in, model_name, keep_existing_fields, use_signature_fields, image_size, confidence ],
        outputs=[download_btn, file_out, status],
        queue=True,
        api_name="run",
    )

# Limit default concurrency to 1 (safe for ZeroGPU / GPU-bound ops)
demo.launch()