jbarrow commited on
Commit
4c591ce
Β·
1 Parent(s): 034e8f8

add application file

Browse files
Files changed (1) hide show
  1. main.py +123 -0
main.py ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ import time
4
+ import uuid
5
+ from pathlib import Path
6
+ import gradio as gr
7
+
8
+ PERSIST_ROOT = Path(os.getenv("PERSIST_ROOT", "/data"))
9
+ OUT_DIR = PERSIST_ROOT / "outputs"
10
+ OUT_DIR.mkdir(parents=True, exist_ok=True)
11
+
12
+ from commonforms import prepare_form # API documented in the repo README
13
+
14
+ def run_commonforms(
15
+ input_pdf_path: str,
16
+ model_name: str,
17
+ keep_existing_fields: bool,
18
+ use_signature_fields: bool,
19
+ image_size: int,
20
+ confidence: float,
21
+ ) -> tuple[str, str]:
22
+ """
23
+ Runs CommonForms and returns (download_path, status_message).
24
+ """
25
+ work_id = f"{int(time.time())}-{uuid.uuid4().hex[:8]}"
26
+ work_dir = Path("/tmp") / f"cf-{work_id}"
27
+ work_dir.mkdir(parents=True, exist_ok=True)
28
+
29
+ src = Path(input_pdf_path)
30
+ in_path = work_dir / f"input{src.suffix or '.pdf'}"
31
+ shutil.copy2(src, in_path)
32
+
33
+ out_name = f"fillable-{src.stem}-{work_id}.pdf"
34
+ out_tmp = work_dir / out_name
35
+ out_final = OUT_DIR / out_name
36
+
37
+ try:
38
+ import torch # optional, only for device detect
39
+ device = "cuda" if torch.cuda.is_available() else "cpu"
40
+ except Exception:
41
+ device = "cpu"
42
+
43
+ # Call CommonForms (kwargs mirror the CLI flags in README)
44
+ prepare_form(
45
+ in_path,
46
+ out_tmp,
47
+ model_or_path=model_name, # "FFDNet-L" or "FFDNet-S" or path to .pt
48
+ keep_existing_fields=keep_existing_fields,
49
+ use_signature_fields=use_signature_fields,
50
+ device=device, # "cpu" | "cuda" | "0"
51
+ image_size=image_size, # e.g. 1600
52
+ confidence=confidence, # e.g. 0.3
53
+ )
54
+
55
+ shutil.move(out_tmp, out_final)
56
+ return str(out_final), f"βœ… Generated {out_name} on {device}"
57
+
58
+ def _process(pdf, model_name, keep_existing_fields, use_signature_fields, image_size, confidence ):
59
+ if pdf is None:
60
+ return None, None, "Please upload a PDF."
61
+ in_path = pdf if isinstance(pdf, str) else getattr(pdf, "name", None)
62
+ if not in_path or not os.path.exists(in_path):
63
+ return None, None, "Could not locate uploaded file."
64
+ try:
65
+ path, msg = run_commonforms(
66
+ in_path,
67
+ model_name,
68
+ keep_existing_fields,
69
+ use_signature_fields,
70
+ image_size,
71
+ confidence,
72
+ )
73
+ # DownloadButton expects a filepath; File can mirror the same path.
74
+ return path, path, msg
75
+ except Exception as e:
76
+ return None, None, f"❌ Error: {e}"
77
+
78
+ with gr.Blocks(title="CommonForms β€” PDF β†’ Fillable PDF") as demo:
79
+ gr.Markdown(
80
+ """
81
+ # CommonForms β€” PDF β†’ Fillable PDF
82
+ Upload a PDF, select options, and generate a fillable PDF.
83
+
84
+ [πŸ“„ CommonForms Paper](https://arxiv.org/abs/2509.16506) | [πŸ€— Dataset](https://huggingface.co/datasets/jbarrow/CommonForms) | [πŸ€— FFDNet-L](https://huggingface.co/jbarrow/FFDNet-L) | [πŸ€— FFDNet-S](https://huggingface.co/jbarrow/FFDNet-S) | [πŸ’Ύ Code](https://github.com/jbarrow/commonforms)
85
+ """
86
+ )
87
+
88
+ with gr.Row():
89
+ with gr.Column(scale=2):
90
+ pdf_in = gr.File(label="Upload PDF", file_types=[".pdf"], type="filepath")
91
+
92
+ with gr.Row():
93
+ model_name = gr.Dropdown(
94
+ label="Detector model",
95
+ choices=["FFDNet-L", "FFDNet-S"],
96
+ value="FFDNet-L",
97
+ )
98
+
99
+ with gr.Row():
100
+ keep_existing_fields = gr.Checkbox(label="Keep existing fields", value=False)
101
+ use_signature_fields = gr.Checkbox(label="Use signature widgets for signatures", value=True)
102
+
103
+ with gr.Row():
104
+ image_size = gr.Slider(800, 2048, value=1600, step=32, label="Image size")
105
+ confidence = gr.Slider(0.05, 0.9, value=0.30, step=0.05, label="Confidence threshold")
106
+
107
+ run_btn = gr.Button("Run CommonForms", variant="primary")
108
+ status = gr.Textbox(label="Status", interactive=False)
109
+
110
+ with gr.Column(scale=1):
111
+ download_btn = gr.DownloadButton(label="Download Fillable PDF", value=None)
112
+ file_out = gr.File(label="Output (preview / alt download)")
113
+
114
+ run_btn.click(
115
+ _process,
116
+ inputs=[pdf_in, model_name, keep_existing_fields, use_signature_fields, image_size, confidence ],
117
+ outputs=[download_btn, file_out, status],
118
+ queue=True,
119
+ api_name="run",
120
+ )
121
+
122
+ # Limit default concurrency to 1 (safe for ZeroGPU / GPU-bound ops)
123
+ demo.queue(max_size=16, default_concurrency_limit=1, api_open=True).launch(share=True)