JaceWei commited on
Commit
c643f73
·
1 Parent(s): b45e6bc
.gitignore CHANGED
@@ -1,5 +1,6 @@
1
  input/
2
  output/
 
3
  Paper2Poster/assets/
4
  Paper2Video/assets/
5
  posterbuilder/latex_proj/figures/
 
1
  input/
2
  output/
3
+ runs
4
  Paper2Poster/assets/
5
  Paper2Video/assets/
6
  posterbuilder/latex_proj/figures/
app.py CHANGED
@@ -1,76 +1,312 @@
1
  import gradio as gr
2
- import subprocess, shutil, os, zipfile, datetime, sys, time
3
  from pathlib import Path
4
 
 
 
 
5
  def _ensure_versions():
6
  import importlib, subprocess, sys
7
- def ver(pkg):
 
8
  try:
9
  m = importlib.import_module(pkg)
10
  return getattr(m, "__version__", "0")
11
  except Exception:
12
  return "0"
13
 
14
- hub_ok = False
15
  try:
16
  from packaging.version import Version
17
- hv = Version(ver("huggingface_hub"))
18
- hub_ok = Version("0.24.0") <= hv < Version("1.0.0")
19
- except Exception:
20
- pass
 
 
 
 
 
 
 
 
 
21
 
22
  if not hub_ok:
23
- subprocess.check_call([sys.executable, "-m", "pip",
24
- "install", "huggingface-hub==0.27.1",
25
- "transformers==4.48.0",
26
- "--force-reinstall", "--no-deps"])
 
 
 
 
 
 
 
27
  _ensure_versions()
28
 
 
 
 
29
  ROOT = Path(__file__).resolve().parent
30
- OUTPUT_DIR = ROOT / "output"
31
- INPUT_DIR = ROOT / "input"
32
- LOGO_DIR = INPUT_DIR / "logo"
33
- POSTER_LATEX_DIR = ROOT / "posterbuilder" / "latex_proj"
34
- ZIP_PATH = ROOT / "output.zip"
35
- LOG_PATH = ROOT / "last_run.log"
36
  TIMEOUT_SECONDS = 1800 # 30 minutes
 
37
 
38
- def run_pipeline(arxiv_url, pdf_file, openai_key, logo_files):
39
- start_time = datetime.datetime.now()
40
- logs = [f"🚀 Starting pipeline at {start_time.strftime('%Y-%m-%d %H:%M:%S')}\n"]
 
 
41
 
42
- # ====== Prepare directories ======
43
- for d in [OUTPUT_DIR, LOGO_DIR, POSTER_LATEX_DIR, INPUT_DIR]:
44
- d.mkdir(parents=True, exist_ok=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
- # Clean up old outputs
47
- for item in OUTPUT_DIR.iterdir():
48
- if item.is_dir():
49
- shutil.rmtree(item)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  else:
51
- item.unlink()
52
- if ZIP_PATH.exists():
53
- ZIP_PATH.unlink()
54
- logs.append("🧹 Cleaned previous output.\n")
55
- _write_logs(logs)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  yield "\n".join(logs), None
57
 
58
  # ====== Validation: must upload LOGO ======
59
- # Gradio may return a single file or a list, normalize to list
60
  if logo_files is None:
61
  logo_files = []
62
  if not isinstance(logo_files, (list, tuple)):
63
  logo_files = [logo_files]
64
- logo_files = [f for f in logo_files if f] # filter None
65
 
66
  if len(logo_files) == 0:
67
  msg = "❌ You must upload at least one institutional logo (multiple allowed)."
68
  logs.append(msg)
69
- _write_logs(logs)
70
  yield "\n".join(logs), None
71
  return
72
 
73
- # Clear input/logo and then save new files
74
  for item in LOGO_DIR.iterdir():
75
  if item.is_file():
76
  item.unlink()
@@ -79,8 +315,8 @@ def run_pipeline(arxiv_url, pdf_file, openai_key, logo_files):
79
  p = LOGO_DIR / Path(lf.name).name
80
  shutil.copy(lf.name, p)
81
  saved_logo_paths.append(p)
82
- logs.append(f"🏷️ Saved {len(saved_logo_paths)} logo file(s) to: {LOGO_DIR}\n")
83
- _write_logs(logs)
84
  yield "\n".join(logs), None
85
 
86
  # ====== Handle uploaded PDF (optional) ======
@@ -90,46 +326,48 @@ def run_pipeline(arxiv_url, pdf_file, openai_key, logo_files):
90
  pdf_dir.mkdir(parents=True, exist_ok=True)
91
  pdf_path = pdf_dir / Path(pdf_file.name).name
92
  shutil.copy(pdf_file.name, pdf_path)
93
- logs.append(f"📄 Uploaded PDF saved to: {pdf_path}\n")
94
 
95
  # For pipeline Step 1.5 compatibility: also copy to input/paper.pdf
96
  canonical_pdf = INPUT_DIR / "paper.pdf"
97
  shutil.copy(pdf_file.name, canonical_pdf)
98
- _write_logs(logs)
99
  yield "\n".join(logs), None
100
 
101
  # ====== Validate input source ======
102
  if not arxiv_url and not pdf_file:
103
  msg = "❌ Please provide either an arXiv link or upload a PDF file (choose one)."
104
  logs.append(msg)
105
- _write_logs(logs)
106
  yield "\n".join(logs), None
107
  return
108
 
109
- # ====== Build command ======
110
  cmd = [
111
- "python", "pipeline.py",
112
  "--model_name_t", "gpt-5",
113
  "--model_name_v", "gpt-5",
114
  "--result_dir", "output",
115
  "--paper_latex_root", "input/latex_proj",
116
  "--openai_key", openai_key,
117
  "--gemini_key", "##",
118
- "--logo_dir", str(LOGO_DIR) # 👈 pass logo directory
119
  ]
120
  if arxiv_url:
121
  cmd += ["--arxiv_url", arxiv_url]
122
- # if pdf_path:
123
- # cmd += ["--pdf_path", str(pdf_path)]
124
 
125
- # ====== Run command with REAL-TIME streaming ======
126
- logs.append("\n======= REAL-TIME LOG =======\n")
127
- _write_logs(logs)
 
128
  yield "\n".join(logs), None
129
 
 
130
  try:
131
  process = subprocess.Popen(
132
  cmd,
 
133
  stdout=subprocess.PIPE,
134
  stderr=subprocess.STDOUT,
135
  text=True,
@@ -139,7 +377,7 @@ def run_pipeline(arxiv_url, pdf_file, openai_key, logo_files):
139
  except Exception as e:
140
  msg = f"❌ Pipeline failed to start: {e}"
141
  logs.append(msg)
142
- _write_logs(logs)
143
  yield "\n".join(logs), None
144
  return
145
 
@@ -148,24 +386,22 @@ def run_pipeline(arxiv_url, pdf_file, openai_key, logo_files):
148
  while True:
149
  # Timeout guard
150
  if (datetime.datetime.now() - start_time).total_seconds() > TIMEOUT_SECONDS:
151
- logs.append("❌ Pipeline timed out (30 min limit). Killing process...\n")
152
  try:
153
  process.kill()
154
  except Exception:
155
  pass
156
- _write_logs(logs)
157
  yield "\n".join(logs), None
158
  return
159
 
160
  line = process.stdout.readline()
161
  if line:
162
- # echo to HF Space logs as well
163
- print(line, end="")
164
  logs.append(line.rstrip("\n"))
165
- # write & occasionally yield to reduce UI churn
166
- _write_logs(logs)
167
  now = time.time()
168
- if now - last_yield >= 0.3: # throttle UI updates ~3/sec
169
  last_yield = now
170
  yield "\n".join(logs), None
171
  elif process.poll() is not None:
@@ -174,19 +410,19 @@ def run_pipeline(arxiv_url, pdf_file, openai_key, logo_files):
174
  time.sleep(0.05)
175
 
176
  return_code = process.wait()
177
- logs.append(f"\nProcess finished with code {return_code}\n")
178
- _write_logs(logs)
179
  yield "\n".join(logs), None
180
 
181
  if return_code != 0:
182
- logs.append("❌ Process exited with non-zero status. See logs above.\n")
183
- _write_logs(logs)
184
  yield "\n".join(logs), None
185
  return
186
 
187
  except Exception as e:
188
- logs.append(f"❌ Error during streaming: {e}\n")
189
- _write_logs(logs)
190
  yield "\n".join(logs), None
191
  return
192
  finally:
@@ -196,22 +432,44 @@ def run_pipeline(arxiv_url, pdf_file, openai_key, logo_files):
196
  except Exception:
197
  pass
198
 
199
- # ====== Check output & zip ======
200
  has_output = False
201
  try:
202
- for _ in OUTPUT_DIR.iterdir():
203
- has_output = True
204
- break
 
205
  except FileNotFoundError:
206
  has_output = False
207
 
208
  if not has_output:
209
  msg = "❌ No output generated. Please check logs above."
210
  logs.append(msg)
211
- _write_logs(logs)
212
  yield "\n".join(logs), None
213
  return
214
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
215
  try:
216
  with zipfile.ZipFile(ZIP_PATH, 'w', zipfile.ZIP_DEFLATED) as zipf:
217
  for root, dirs, files in os.walk(OUTPUT_DIR):
@@ -219,43 +477,51 @@ def run_pipeline(arxiv_url, pdf_file, openai_key, logo_files):
219
  file_path = Path(root) / file
220
  arcname = file_path.relative_to(OUTPUT_DIR)
221
  zipf.write(file_path, arcname=arcname)
222
- logs.append(f"✅ Zipped output folder to {ZIP_PATH}\n")
223
  except Exception as e:
224
- logs.append(f"❌ Failed to create zip: {e}\n")
225
 
226
  end_time = datetime.datetime.now()
227
- logs.append(f"🏁 Completed at {end_time.strftime('%Y-%m-%d %H:%M:%S')} (Duration: {(end_time - start_time).seconds}s)\n")
 
 
228
 
229
- _write_logs(logs)
230
  yield "\n".join(logs), (str(ZIP_PATH) if ZIP_PATH.exists() else None)
231
 
232
- def _write_logs(logs):
233
- try:
234
- with open(LOG_PATH, "w", encoding="utf-8") as f:
235
- f.write("\n".join(logs))
236
- except Exception:
237
- # Avoid crashing UI if disk I/O fails
238
- pass
239
-
240
- # ===================== Gradio UI =====================
241
  iface = gr.Interface(
242
  fn=run_pipeline,
243
  inputs=[
244
  gr.Textbox(label="📘 ArXiv URL (choose one)", placeholder="https://arxiv.org/abs/2505.xxxxx"),
245
  gr.File(label="📄 Upload PDF (choose one)"),
246
  gr.Textbox(label="🔑 OpenAI API Key", placeholder="sk-...", type="password"),
247
- gr.File(label="🏷️ Upload Institutional Logo(s) (required, multiple allowed)", file_count="multiple", file_types=["image"]),
 
 
 
 
 
 
248
  ],
249
  outputs=[
250
- gr.Textbox(label="🧾 Logs", lines=30, max_lines=50),
251
  gr.File(label="📦 Download Results (.zip)")
252
  ],
253
- title="📄 Paper2Poster",
254
- description=(
255
- "Upload your paper, and the pipeline will automatically generate a fully compilable LaTeX poster; you can download the ZIP file and compile it yourself. Each paper takes approximately 6–10 minutes to process.\n"
256
- "Provide either an arXiv link or upload a PDF file (choose one); the system will generate a poster and package it for download.\n"
257
- "You must upload at least one institutional logo (multiple allowed).\n"
258
- ),
 
 
 
 
 
 
259
  allow_flagging="never",
260
  )
261
 
 
1
  import gradio as gr
2
+ import subprocess, shutil, os, zipfile, datetime, sys, time, uuid, stat, re
3
  from pathlib import Path
4
 
5
+ # =====================
6
+ # Version guard
7
+ # =====================
8
  def _ensure_versions():
9
  import importlib, subprocess, sys
10
+
11
+ def get_version(pkg):
12
  try:
13
  m = importlib.import_module(pkg)
14
  return getattr(m, "__version__", "0")
15
  except Exception:
16
  return "0"
17
 
 
18
  try:
19
  from packaging.version import Version
20
+ except ImportError:
21
+ # 安装packaging,确保下面版本比较能用
22
+ subprocess.check_call([sys.executable, "-m", "pip", "install", "packaging"])
23
+ from packaging.version import Version
24
+
25
+ # 检查 huggingface_hub
26
+ hub_ver = get_version("huggingface_hub")
27
+ hv = Version(hub_ver)
28
+
29
+ required_min = Version("0.24.0")
30
+ required_max = Version("1.0.0")
31
+
32
+ hub_ok = required_min <= hv < required_max
33
 
34
  if not hub_ok:
35
+ print(f"[INFO] huggingface_hub=={hub_ver} not in range "
36
+ f"[{required_min}, {required_max}), reinstalling...")
37
+ subprocess.check_call([
38
+ sys.executable, "-m", "pip", "install",
39
+ "huggingface-hub==0.27.1",
40
+ "transformers==4.48.0",
41
+ "--force-reinstall", "--no-deps"
42
+ ])
43
+ else:
44
+ print(f"[INFO] huggingface_hub version OK: {hub_ver}")
45
+
46
  _ensure_versions()
47
 
48
+ # =====================
49
+ # Paths (read-only repo root; DO NOT write here)
50
+ # =====================
51
  ROOT = Path(__file__).resolve().parent
52
+ RUNS_DIR = ROOT / "runs" # all per-run workspaces live here
53
+ RUNS_DIR.mkdir(parents=True, exist_ok=True)
54
+
 
 
 
55
  TIMEOUT_SECONDS = 1800 # 30 minutes
56
+ RETENTION_HOURS = 12 # auto-clean runs older than N hours
57
 
58
+ # ---------------------
59
+ # Utils
60
+ # ---------------------
61
+ def _now_str():
62
+ return datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
63
 
64
+ def _write_logs(log_path: Path, logs):
65
+ try:
66
+ log_path.parent.mkdir(parents=True, exist_ok=True)
67
+ with open(log_path, "w", encoding="utf-8") as f:
68
+ f.write("\n".join(logs))
69
+ except Exception:
70
+ pass
71
+
72
+ def _on_rm_error(func, path, exc_info):
73
+ # fix "PermissionError: [Errno 13] Permission denied" for readonly files
74
+ os.chmod(path, stat.S_IWRITE)
75
+ func(path)
76
+
77
+ def _copytree(src: Path, dst: Path, symlinks=True, ignore=None):
78
+ if dst.exists():
79
+ shutil.rmtree(dst, onerror=_on_rm_error)
80
+ shutil.copytree(src, dst, symlinks=symlinks, ignore=ignore)
81
+
82
+ def _safe_copy(src: Path, dst: Path):
83
+ dst.parent.mkdir(parents=True, exist_ok=True)
84
+ shutil.copy2(src, dst)
85
+
86
+ def _cleanup_old_runs(max_age_hours=12):
87
+ try:
88
+ now = datetime.datetime.now().timestamp()
89
+ for run_dir in RUNS_DIR.iterdir():
90
+ try:
91
+ if not run_dir.is_dir():
92
+ continue
93
+ mtime = run_dir.stat().st_mtime
94
+ age_h = (now - mtime) / 3600.0
95
+ if age_h > max_age_hours:
96
+ shutil.rmtree(run_dir, onerror=_on_rm_error)
97
+ except Exception:
98
+ continue
99
+ except Exception:
100
+ pass
101
 
102
+ def _prepare_workspace(logs):
103
+ """Create isolated per-run workspace and copy needed code/assets into it."""
104
+ run_id = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") + "-" + uuid.uuid4().hex[:8]
105
+ work_dir = RUNS_DIR / run_id
106
+ work_dir.mkdir(parents=True, exist_ok=True)
107
+
108
+ # Per-run log & zip path
109
+ log_path = work_dir / "run.log"
110
+ zip_path = work_dir / "output.zip"
111
+
112
+ logs.append(f"🧩 New workspace: {work_dir.relative_to(ROOT)} (run_id={run_id})")
113
+
114
+ # Copy code/assets that do file IO so they are run-local (avoid shared writes)
115
+ # Keep copies as cheap as possible (symlinks=True when supported)
116
+ needed_dirs = ["posterbuilder", "Paper2Poster"]
117
+ for d in needed_dirs:
118
+ src = ROOT / d
119
+ if src.exists():
120
+ _copytree(src, work_dir / d, symlinks=True)
121
+ logs.append(f" ↪ copied {d}/ → runs/{run_id}/{d}/ (symlink where possible)")
122
+
123
+ # template/ optional
124
+ tmpl = ROOT / "template"
125
+ if tmpl.exists():
126
+ _copytree(tmpl, work_dir / "template", symlinks=True)
127
+ logs.append(" ↪ copied template/")
128
+
129
+ # pipeline.py must live inside workspace so that ROOT_DIR=work_dir
130
+ _safe_copy(ROOT / "pipeline.py", work_dir / "pipeline.py")
131
+
132
+ # Create standard IO dirs in workspace
133
+ (work_dir / "input" / "pdf").mkdir(parents=True, exist_ok=True)
134
+ (work_dir / "input" / "logo").mkdir(parents=True, exist_ok=True)
135
+ (work_dir / "posterbuilder" / "latex_proj").mkdir(parents=True, exist_ok=True)
136
+
137
+ return run_id, work_dir, log_path, zip_path
138
+
139
+ # ---------------------
140
+ # Helpers for new features (post-processing)
141
+ # ---------------------
142
+ def _parse_rgb(s):
143
+ """Accepts '94,46,145' / '94 46 145' / '[94,46,145]' / '(94, 46, 145)' and returns (r,g,b) or None."""
144
+ if s is None:
145
+ return None
146
+ if isinstance(s, (tuple, list)) and len(s) == 3:
147
+ vals = s
148
+ else:
149
+ nums = re.findall(r"\d+", str(s))
150
+ if len(nums) < 3:
151
+ return None
152
+ vals = nums[:3]
153
+ try:
154
+ r, g, b = (int(vals[0]), int(vals[1]), int(vals[2]))
155
+ if any(v < 0 or v > 255 for v in (r, g, b)):
156
+ return None
157
+ return (r, g, b)
158
+ except Exception:
159
+ return None
160
+
161
+ def _apply_meeting_logo(OUTPUT_DIR: Path, meeting_logo_file, logs):
162
+ """Replace output/poster_latex_proj/logos/right_logo.png if meeting_logo_file provided."""
163
+ if not meeting_logo_file:
164
+ return False
165
+
166
+ logos_dir = OUTPUT_DIR / "poster_latex_proj" / "logos"
167
+ target = logos_dir / "right_logo.png"
168
+ try:
169
+ logos_dir.mkdir(parents=True, exist_ok=True)
170
+ # Try to convert to PNG for safety
171
+ try:
172
+ from PIL import Image
173
+ img = Image.open(meeting_logo_file.name)
174
+ # preserve alpha if available
175
+ if img.mode not in ("RGB", "RGBA"):
176
+ img = img.convert("RGBA")
177
+ img.save(target, format="PNG")
178
+ logs.append(f"🖼️ Meeting logo converted to PNG and saved → {target.relative_to(OUTPUT_DIR)}")
179
+ except Exception as e:
180
+ # Fallback: raw copy with .png name
181
+ shutil.copy(meeting_logo_file.name, target)
182
+ logs.append(f"🖼️ Meeting logo copied (no conversion) → {target.relative_to(OUTPUT_DIR)} (note: ensure it's a valid PNG).")
183
+ return True
184
+ except Exception as e:
185
+ logs.append(f"⚠️ Failed to apply meeting logo: {e}")
186
+ return False
187
+
188
+ def _apply_theme_rgb(OUTPUT_DIR: Path, rgb_tuple, logs):
189
+ """Replace \\definecolor{nipspurple}{RGB}{r,g,b} in poster_output.tex if rgb_tuple provided."""
190
+ if not rgb_tuple:
191
+ return False
192
+
193
+ tex_path = OUTPUT_DIR / "poster_latex_proj" / "poster_output.tex"
194
+ if not tex_path.exists():
195
+ logs.append(f"⚠️ Theme RGB skipped: {tex_path.relative_to(OUTPUT_DIR)} not found.")
196
+ return False
197
+
198
+ try:
199
+ content = tex_path.read_text(encoding="utf-8")
200
+ pattern = r"(\\definecolor\{nipspurple\}\{RGB\}\{)\s*\d+\s*,\s*\d+\s*,\s*\d+\s*(\})"
201
+ new_vals = f"{rgb_tuple[0]},{rgb_tuple[1]},{rgb_tuple[2]}"
202
+ new_content, n = re.subn(pattern, r"\1" + new_vals + r"\2", content, flags=re.MULTILINE)
203
+ if n > 0:
204
+ tex_path.write_text(new_content, encoding="utf-8")
205
+ logs.append(f"🎨 Theme color updated: themecolor = {{{new_vals}}} in {tex_path.relative_to(OUTPUT_DIR)}")
206
+ return True
207
  else:
208
+ logs.append("⚠️ Theme RGB not applied: definecolor for 'themecolor' not found.")
209
+ return False
210
+ except Exception as e:
211
+ logs.append(f"⚠️ Failed to update theme RGB: {e}")
212
+ return False
213
+
214
+ def _apply_left_logo(OUTPUT_DIR: Path, logo_files, logs):
215
+ """
216
+ Use the first institutional logo uploaded by the user:
217
+ - Copy it into output/poster_latex_proj/logos/ as left_logo.<ext>
218
+ - Replace 'logos/left_logo.png' in poster_output.tex with the proper file extension
219
+ Does NOT convert formats. Simply renames and rewrites the tex reference.
220
+ """
221
+ if not logo_files:
222
+ logs.append("ℹ️ No institutional logo uploaded.")
223
+ return False
224
+
225
+ # If multiple files component, take the first one
226
+ f = logo_files[0] if isinstance(logo_files, (list, tuple)) else logo_files
227
+ if not f:
228
+ logs.append("ℹ️ No institutional logo uploaded.")
229
+ return False
230
+
231
+ ext = Path(f.name).suffix or ".png" # fallback to .png if no extension
232
+ logos_dir = OUTPUT_DIR / "poster_latex_proj" / "logos"
233
+ tex_path = OUTPUT_DIR / "poster_latex_proj" / "poster_output.tex"
234
+
235
+ try:
236
+ logos_dir.mkdir(parents=True, exist_ok=True)
237
+ dst = logos_dir / f"left_logo{ext}"
238
+ shutil.copy(f.name, dst)
239
+ logs.append(f"🏷️ Institutional logo copied to: {dst.relative_to(OUTPUT_DIR)}")
240
+ except Exception as e:
241
+ logs.append(f"⚠️ Failed to copy institutional logo: {e}")
242
+ return False
243
+
244
+ if not tex_path.exists():
245
+ logs.append("⚠️ poster_output.tex not found, cannot replace left_logo path.")
246
+ return False
247
+
248
+ try:
249
+ text = tex_path.read_text(encoding="utf-8")
250
+ old = "logos/left_logo.png"
251
+ new = f"logos/left_logo{ext}"
252
+
253
+ if old in text:
254
+ tex_path.write_text(text.replace(old, new), encoding="utf-8")
255
+ logs.append(f"🛠️ Replaced left_logo.png → left_logo{ext} in poster_output.tex")
256
+ return True
257
+
258
+ # Fallback (covers weird spacing or macro variations)
259
+ import re
260
+ pattern = r"(logos/left_logo)\.png"
261
+ new_text, n = re.subn(pattern, r"\1" + ext, text)
262
+
263
+ if n > 0:
264
+ tex_path.write_text(new_text, encoding="utf-8")
265
+ logs.append(f"🛠️ Replaced left_logo.png → left_logo{ext} (regex fallback)")
266
+ return True
267
+
268
+ logs.append("ℹ️ No left_logo.png reference found in poster_output.tex.")
269
+ return False
270
+
271
+ except Exception as e:
272
+ logs.append(f"⚠️ Failed to modify poster_output.tex: {e}")
273
+ return False
274
+
275
+
276
+ # =====================
277
+ # Gradio pipeline function (ISOLATED)
278
+ # =====================
279
+ def run_pipeline(arxiv_url, pdf_file, openai_key, logo_files, meeting_logo_file, theme_rgb):
280
+ _cleanup_old_runs(RETENTION_HOURS)
281
+
282
+ start_time = datetime.datetime.now()
283
+ logs = [f"🚀 Starting pipeline at {_now_str()}"]
284
+
285
+ # --- Prepare per-run workspace ---
286
+ run_id, WORK_DIR, LOG_PATH, ZIP_PATH = _prepare_workspace(logs)
287
+ INPUT_DIR = WORK_DIR / "input"
288
+ OUTPUT_DIR = WORK_DIR / "output"
289
+ LOGO_DIR = INPUT_DIR / "logo"
290
+ POSTER_LATEX_DIR = WORK_DIR / "posterbuilder" / "latex_proj"
291
+
292
+ _write_logs(LOG_PATH, logs)
293
  yield "\n".join(logs), None
294
 
295
  # ====== Validation: must upload LOGO ======
 
296
  if logo_files is None:
297
  logo_files = []
298
  if not isinstance(logo_files, (list, tuple)):
299
  logo_files = [logo_files]
300
+ logo_files = [f for f in logo_files if f]
301
 
302
  if len(logo_files) == 0:
303
  msg = "❌ You must upload at least one institutional logo (multiple allowed)."
304
  logs.append(msg)
305
+ _write_logs(LOG_PATH, logs)
306
  yield "\n".join(logs), None
307
  return
308
 
309
+ # Save logos into run-local dir
310
  for item in LOGO_DIR.iterdir():
311
  if item.is_file():
312
  item.unlink()
 
315
  p = LOGO_DIR / Path(lf.name).name
316
  shutil.copy(lf.name, p)
317
  saved_logo_paths.append(p)
318
+ logs.append(f"🏷️ Saved {len(saved_logo_paths)} logo file(s) {LOGO_DIR.relative_to(WORK_DIR)}")
319
+ _write_logs(LOG_PATH, logs)
320
  yield "\n".join(logs), None
321
 
322
  # ====== Handle uploaded PDF (optional) ======
 
326
  pdf_dir.mkdir(parents=True, exist_ok=True)
327
  pdf_path = pdf_dir / Path(pdf_file.name).name
328
  shutil.copy(pdf_file.name, pdf_path)
329
+ logs.append(f"📄 Uploaded PDF {pdf_path.relative_to(WORK_DIR)}")
330
 
331
  # For pipeline Step 1.5 compatibility: also copy to input/paper.pdf
332
  canonical_pdf = INPUT_DIR / "paper.pdf"
333
  shutil.copy(pdf_file.name, canonical_pdf)
334
+ _write_logs(LOG_PATH, logs)
335
  yield "\n".join(logs), None
336
 
337
  # ====== Validate input source ======
338
  if not arxiv_url and not pdf_file:
339
  msg = "❌ Please provide either an arXiv link or upload a PDF file (choose one)."
340
  logs.append(msg)
341
+ _write_logs(LOG_PATH, logs)
342
  yield "\n".join(logs), None
343
  return
344
 
345
+ # ====== Build command (run INSIDE workspace) ======
346
  cmd = [
347
+ sys.executable, "pipeline.py",
348
  "--model_name_t", "gpt-5",
349
  "--model_name_v", "gpt-5",
350
  "--result_dir", "output",
351
  "--paper_latex_root", "input/latex_proj",
352
  "--openai_key", openai_key,
353
  "--gemini_key", "##",
354
+ "--logo_dir", str(LOGO_DIR) # run-local logo dir
355
  ]
356
  if arxiv_url:
357
  cmd += ["--arxiv_url", arxiv_url]
358
+ # (Keep pdf via input/paper.pdf; pipeline will read it if exists)
 
359
 
360
+ logs.append("\n======= REAL-TIME LOG =======")
361
+ logs.append(f"cwd = runs/{WORK_DIR.name}")
362
+ logs.append(f"cmd = {' '.join(cmd)}")
363
+ _write_logs(LOG_PATH, logs)
364
  yield "\n".join(logs), None
365
 
366
+ # ====== Run with REAL-TIME streaming, inside workspace ======
367
  try:
368
  process = subprocess.Popen(
369
  cmd,
370
+ cwd=str(WORK_DIR),
371
  stdout=subprocess.PIPE,
372
  stderr=subprocess.STDOUT,
373
  text=True,
 
377
  except Exception as e:
378
  msg = f"❌ Pipeline failed to start: {e}"
379
  logs.append(msg)
380
+ _write_logs(LOG_PATH, logs)
381
  yield "\n".join(logs), None
382
  return
383
 
 
386
  while True:
387
  # Timeout guard
388
  if (datetime.datetime.now() - start_time).total_seconds() > TIMEOUT_SECONDS:
389
+ logs.append("❌ Pipeline timed out (30 min limit). Killing process")
390
  try:
391
  process.kill()
392
  except Exception:
393
  pass
394
+ _write_logs(LOG_PATH, logs)
395
  yield "\n".join(logs), None
396
  return
397
 
398
  line = process.stdout.readline()
399
  if line:
400
+ print(line, end="") # echo to Space logs
 
401
  logs.append(line.rstrip("\n"))
402
+ _write_logs(LOG_PATH, logs)
 
403
  now = time.time()
404
+ if now - last_yield >= 0.3:
405
  last_yield = now
406
  yield "\n".join(logs), None
407
  elif process.poll() is not None:
 
410
  time.sleep(0.05)
411
 
412
  return_code = process.wait()
413
+ logs.append(f"\nProcess finished with code {return_code}")
414
+ _write_logs(LOG_PATH, logs)
415
  yield "\n".join(logs), None
416
 
417
  if return_code != 0:
418
+ logs.append("❌ Process exited with non-zero status. See logs above.")
419
+ _write_logs(LOG_PATH, logs)
420
  yield "\n".join(logs), None
421
  return
422
 
423
  except Exception as e:
424
+ logs.append(f"❌ Error during streaming: {e}")
425
+ _write_logs(LOG_PATH, logs)
426
  yield "\n".join(logs), None
427
  return
428
  finally:
 
432
  except Exception:
433
  pass
434
 
435
+ # ====== Check output ======
436
  has_output = False
437
  try:
438
+ if OUTPUT_DIR.exists():
439
+ for _ in OUTPUT_DIR.iterdir():
440
+ has_output = True
441
+ break
442
  except FileNotFoundError:
443
  has_output = False
444
 
445
  if not has_output:
446
  msg = "❌ No output generated. Please check logs above."
447
  logs.append(msg)
448
+ _write_logs(LOG_PATH, logs)
449
  yield "\n".join(logs), None
450
  return
451
 
452
+ # ====== NEW: Post-processing (optional features) ======
453
+ # 1) Optional meeting logo replacement
454
+ applied_logo = _apply_meeting_logo(OUTPUT_DIR, meeting_logo_file, logs)
455
+
456
+ # 2) Optional theme color update
457
+ rgb_tuple = _parse_rgb(theme_rgb)
458
+ if theme_rgb and not rgb_tuple:
459
+ logs.append(f"⚠️ Ignored Theme RGB input '{theme_rgb}': expected like '94,46,145'.")
460
+ applied_rgb = _apply_theme_rgb(OUTPUT_DIR, rgb_tuple, logs) if rgb_tuple else False
461
+
462
+ # 3) Optional institutional logo -> left_logo.<ext>
463
+ _apply_left_logo(OUTPUT_DIR, logo_files, logs)
464
+
465
+ _write_logs(LOG_PATH, logs)
466
+ yield "\n".join(logs), None
467
+
468
+
469
+ _write_logs(LOG_PATH, logs)
470
+ yield "\n".join(logs), None
471
+
472
+ # ====== Zip output (run-local) ======
473
  try:
474
  with zipfile.ZipFile(ZIP_PATH, 'w', zipfile.ZIP_DEFLATED) as zipf:
475
  for root, dirs, files in os.walk(OUTPUT_DIR):
 
477
  file_path = Path(root) / file
478
  arcname = file_path.relative_to(OUTPUT_DIR)
479
  zipf.write(file_path, arcname=arcname)
480
+ logs.append(f"✅ Zipped output {ZIP_PATH.relative_to(WORK_DIR)}")
481
  except Exception as e:
482
+ logs.append(f"❌ Failed to create zip: {e}")
483
 
484
  end_time = datetime.datetime.now()
485
+ dur = (end_time - start_time).seconds
486
+ logs.append(f"🏁 Completed at {_now_str()} (Duration: {dur}s)")
487
+ logs.append(f"🆔 run_id = {WORK_DIR.name}")
488
 
489
+ _write_logs(LOG_PATH, logs)
490
  yield "\n".join(logs), (str(ZIP_PATH) if ZIP_PATH.exists() else None)
491
 
492
+ # =====================
493
+ # Gradio UI
494
+ # =====================
 
 
 
 
 
 
495
  iface = gr.Interface(
496
  fn=run_pipeline,
497
  inputs=[
498
  gr.Textbox(label="📘 ArXiv URL (choose one)", placeholder="https://arxiv.org/abs/2505.xxxxx"),
499
  gr.File(label="📄 Upload PDF (choose one)"),
500
  gr.Textbox(label="🔑 OpenAI API Key", placeholder="sk-...", type="password"),
501
+ gr.File(
502
+ label="🏷️ Institutional Logo (optional, multiple allowed)",
503
+ file_count="multiple",
504
+ file_types=["image"],
505
+ ),
506
+ gr.File(label="🧩 Optional: Conference Logo (replaces right_logo.png)", file_count="single", file_types=["image"]),
507
+ gr.Textbox(label="🎨 Optional: Theme RGB (e.g., 94,46,145)", placeholder="94,46,145"),
508
  ],
509
  outputs=[
510
+ gr.Textbox(label="🧾 Logs (8~10 minutes)", lines=30, max_lines=50),
511
  gr.File(label="📦 Download Results (.zip)")
512
  ],
513
+ title="🎓 Paper2Poster",
514
+ description="""
515
+ paper(https://arxiv.org/abs/2505.21497) | [GitHub](https://github.com/Paper2Poster/Paper2Poster) | project page (https://paper2poster.github.io/)
516
+
517
+ # Paper2Poster
518
+
519
+ Upload a paper, generate a poster for you.
520
+ Each paper takes approximately **8–10 minutes**.
521
+
522
+ This work is based on the **[CAMEL-ai](https://camel-ai.org/)** framework.
523
+ """,
524
+
525
  allow_flagging="never",
526
  )
527
 
install_tectonic.sh ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ set -e
3
+
4
+ echo "📦 Installing tectonic..."
5
+
6
+ if ! command -v tectonic &> /dev/null; then
7
+ wget -O /tmp/tectonic.tar.gz https://github.com/tectonic-typesetting/tectonic/releases/download/tectonic%400.15.0/tectonic-0.15.0-x86_64-unknown-linux-gnu.tar.gz
8
+ mkdir -p /tmp/tectonic
9
+ tar -xzf /tmp/tectonic.tar.gz -C /tmp/tectonic
10
+
11
+ # 找到可执行文件路径
12
+ TECTONIC_BIN=$(find /tmp/tectonic -type f -name tectonic | head -n 1)
13
+
14
+ # 默认安装路径
15
+ INSTALL_DIR="/usr/local/bin"
16
+
17
+ # 如果没有写入权限,就改到用户目录
18
+ if [ ! -w "$INSTALL_DIR" ]; then
19
+ INSTALL_DIR="$HOME/.local/bin"
20
+ mkdir -p "$INSTALL_DIR"
21
+ echo "⚠️ No permission for /usr/local/bin, installing to $INSTALL_DIR"
22
+ fi
23
+
24
+ cp "$TECTONIC_BIN" "$INSTALL_DIR/tectonic"
25
+ chmod +x "$INSTALL_DIR/tectonic"
26
+
27
+ # 自动提示 PATH 设置
28
+ if [[ ":$PATH:" != *":$INSTALL_DIR:"* ]]; then
29
+ echo "⚙️ You may need to add this to your ~/.bashrc:"
30
+ echo "export PATH=\$PATH:$INSTALL_DIR"
31
+ fi
32
+
33
+ echo "✅ Tectonic installed successfully at $INSTALL_DIR/tectonic"
34
+ else
35
+ echo "Tectonic already installed."
36
+ fi
install_tinytex.sh ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ # ============================================================
3
+ # install_tinytex_xelatex.sh
4
+ # 一键安装 TinyTeX (用户级,无需 sudo)
5
+ # 适用于 beamer / fontspec / gemini / cam 海报编译
6
+ # ============================================================
7
+
8
+ set -e # 出错立即退出
9
+ WORKDIR=$(pwd)
10
+ echo "当前目录: $WORKDIR"
11
+
12
+ # ------------------------------------------------------------
13
+ # Step 0. 预清理旧残留
14
+ # ------------------------------------------------------------
15
+ echo "🧹 Step 0. 检查并清理旧 TinyTeX 安装或锁文件..."
16
+ if [ -d "$HOME/.TinyTeX" ]; then
17
+ echo "⚠️ 检测到已有 ~/.TinyTeX 目录,尝试安全删除..."
18
+ # 杀掉占用 .nfs 文件的进程
19
+ PIDS=$(lsof +D "$HOME/.TinyTeX" 2>/dev/null | awk 'NR>1 {print $2}' | sort -u)
20
+ if [ -n "$PIDS" ]; then
21
+ echo "🔪 结束占用进程: $PIDS"
22
+ kill -9 $PIDS 2>/dev/null || true
23
+ fi
24
+ # 强制删除旧目录
25
+ rm -rf "$HOME/.TinyTeX" 2>/dev/null || true
26
+ fi
27
+
28
+ # 清除当前目录下可能存在的 texlive 安装残留
29
+ rm -rf texlive install-tl* tinytex.profile 2>/dev/null || true
30
+
31
+ # ------------------------------------------------------------
32
+ # Step 1. 安装 TinyTeX
33
+ # ------------------------------------------------------------
34
+ echo "🚀 Step 1. 安装 TinyTeX 到用户目录 (~/.TinyTeX)..."
35
+ wget -qO- "https://yihui.org/tinytex/install-unx.sh" | sh
36
+
37
+ # ------------------------------------------------------------
38
+ # Step 2. 设置 PATH 环境变量
39
+ # ------------------------------------------------------------
40
+ echo "✅ 安装完成,设置 PATH 环境变量..."
41
+ export PATH=$HOME/.TinyTeX/bin/x86_64-linux:$PATH
42
+
43
+ # 写入 ~/.bashrc 方便以后使用
44
+ if ! grep -q ".TinyTeX/bin/x86_64-linux" ~/.bashrc; then
45
+ echo 'export PATH=$HOME/.TinyTeX/bin/x86_64-linux:$PATH' >> ~/.bashrc
46
+ echo "📝 已自动将 TinyTeX 路径写入 ~/.bashrc"
47
+ fi
48
+
49
+ # ------------------------------------------------------------
50
+ # Step 3. 检查 XeLaTeX 是否可用
51
+ # ------------------------------------------------------------
52
+ echo "🧩 Step 2. 检查 XeLaTeX 版本..."
53
+ if ! command -v xelatex >/dev/null 2>&1; then
54
+ echo "❌ XeLaTeX 未找到,安装可能失败,请检查 ~/.TinyTeX/bin 是否存在。"
55
+ exit 1
56
+ else
57
+ xelatex --version | head -n 2
58
+ fi
59
+
60
+ # ------------------------------------------------------------
61
+ # Step 4. 安装常用包
62
+ # ------------------------------------------------------------
63
+ echo "📦 Step 3. 安装必要的 LaTeX 包 (无 sudo)..."
64
+ tlmgr option repository http://mirror.ctan.org/systems/texlive/tlnet
65
+ tlmgr update --self --all --reinstall-forcibly-removed
66
+ tlmgr install \
67
+ beamer beamerposter fontspec xcolor pgfplots anyfontsize type1cm \
68
+ ragged2e changepage geometry booktabs natbib tikz fp
69
+
70
+ echo "✅ 所有包已安装。TinyTeX 可正常用于 XeLaTeX 海报编译。"
71
+ echo "👉 现在你可以运行:"
72
+ echo " xelatex poster_output.tex"
pipeline.py CHANGED
@@ -21,6 +21,9 @@ P2P_ROOT = ROOT_DIR / "Paper2Poster"
21
  PB_ROOT = ROOT_DIR / "posterbuilder"
22
  sys.path.append(str(P2P_ROOT))
23
 
 
 
 
24
  def copy_folder(src_dir, dst_dir):
25
  src_dir = Path(src_dir)
26
  dst_dir = Path(dst_dir)
@@ -40,7 +43,7 @@ def safe_copy(src: Path, dst: Path):
40
  dst.parent.mkdir(parents=True, exist_ok=True)
41
  shutil.copy2(src, dst)
42
 
43
- def str2list(s):
44
  return [int(x) for x in s.split(',')]
45
 
46
  def run_paper2poster_content_build():
@@ -96,13 +99,7 @@ def _list_logo_files(logo_dir: Path):
96
  return files
97
 
98
  def _compose_logos_horizontally(logo_paths, out_path: Path, box_w=2000, box_h=476, gap=16):
99
- """
100
- 宽度为硬约束:输出图像宽度必为 box_w(默认 2000px)。
101
- 多 logo 按比例统一缩放,拼接后刚好占满 box_w(包含间距)。
102
- 高度由比例自然决定,可能 < box_h,也可能 > box_h(甚至 > 2*box_h),不会再二次压缩。
103
- 透明背景,输出 PNG。
104
- """
105
- # 读取图片
106
  imgs = []
107
  for p in logo_paths:
108
  p = Path(p)
@@ -112,35 +109,27 @@ def _compose_logos_horizontally(logo_paths, out_path: Path, box_w=2000, box_h=47
112
  if n == 0:
113
  raise RuntimeError("No logo images found.")
114
 
115
- # 原始总宽度(不含 gap);拼接总宽 = sum(w_i) + gap*(n-1)
116
  widths = [im.width for im in imgs]
117
  heights = [im.height for im in imgs]
118
  sum_w = sum(widths)
119
  if sum_w <= 0:
120
  raise RuntimeError("All logo images have zero width.")
121
 
122
- # 计算统一缩放比例,使:sum(w_i * s) + gap*(n-1) == box_w
123
- # => s = (box_w - gap*(n-1)) / sum_w
124
  total_gap = max(0, gap * (n - 1))
125
  if box_w <= total_gap:
126
  raise ValueError(f"box_w({box_w}) too small vs total gaps({total_gap}). Increase box_w or reduce gap.")
127
  s = (box_w - total_gap) / float(sum_w)
128
 
129
- # 按统一比例缩放(四舍五入到整数像素,避免累计误差)
130
  resized = []
131
  scaled_widths = []
132
- scaled_heights = []
133
  for im, w, h in zip(imgs, widths, heights):
134
  nw = max(1, int(round(w * s)))
135
  nh = max(1, int(round(h * s)))
136
  resized.append(im.resize((nw, nh), Image.LANCZOS))
137
  scaled_widths.append(nw)
138
- scaled_heights.append(nh)
139
 
140
- # 由于整数取整,可能出现总宽 != box_w - total_gap;对若干图微调 1px 以精确对齐
141
  current_sum_w = sum(scaled_widths)
142
  diff = (box_w - total_gap) - current_sum_w
143
- # 按从宽到窄/从大到小顺序均匀分配像素误差
144
  if diff != 0:
145
  order = sorted(range(n), key=lambda i: scaled_widths[i], reverse=(diff > 0))
146
  idx = 0
@@ -155,14 +144,13 @@ def _compose_logos_horizontally(logo_paths, out_path: Path, box_w=2000, box_h=47
155
  remaining -= 1
156
  idx += 1
157
 
158
- # 计算最终尺寸
159
  total_w = sum(scaled_widths) + total_gap
160
  assert total_w == box_w, f"width pack mismatch: got {total_w}, expect {box_w}"
161
  canvas_w = box_w
162
- canvas_h = max(im.height for im in resized) # 高度由比例自然决定(可能 > 2*box_h)
163
 
164
- # 画布 & 居中摆放(垂直方向居中)
165
- canvas = Image.new("RGBA", (canvas_w, canvas_h), (0, 0, 0, 0))
166
  cur_x = 0
167
  for idx, im in enumerate(resized):
168
  y = (canvas_h - im.height) // 2
@@ -171,13 +159,8 @@ def _compose_logos_horizontally(logo_paths, out_path: Path, box_w=2000, box_h=47
171
  if idx != n - 1:
172
  cur_x += gap
173
 
174
- # out_path.parent.mkdir(parents=True, exist_ok=True)
175
  canvas.save(out_path, format="PNG")
176
- print(f" 🧩 Logos composed (width-locked) → {out_path.relative_to(ROOT_DIR)} "
177
- f"(n={n}, final_size={canvas_w}x{canvas_h})")
178
-
179
-
180
-
181
 
182
  if __name__ == '__main__':
183
  parser = argparse.ArgumentParser(description='Paper2Video Generation Pipeline')
@@ -196,11 +179,11 @@ if __name__ == '__main__':
196
  args = parser.parse_args()
197
  print("start")
198
 
199
- # ✅ 使用传入的 key 设置环境变量
200
  os.environ["OPENAI_API_KEY"] = args.openai_key
201
  os.environ["GEMINI_API_KEY"] = args.gemini_key
202
 
203
- # 清空 output
204
  output_dir = ROOT_DIR / "output"
205
  if output_dir.exists():
206
  print(f" 🧹 Clearing old output directory: {output_dir.relative_to(ROOT_DIR)}")
@@ -210,9 +193,7 @@ if __name__ == '__main__':
210
  (output_dir / "slide_imgs").mkdir(parents=True, exist_ok=True)
211
  print(" ✅ Created subfolders: latex_proj / poster_latex_proj / slide_imgs")
212
 
213
- # ================
214
- # Step 0: Download from arXiv
215
- # ================
216
  try:
217
  if args.arxiv_url:
218
  import requests, tarfile
@@ -255,59 +236,13 @@ if __name__ == '__main__':
255
  except Exception as e:
256
  print(f"❌ Step 0 failed: {e}")
257
 
258
- # =========================
259
- # Step 1: Slide Generation
260
- # =========================
261
- # try:
262
- # print("🧩 Step 1: Generating Slides ...")
263
- # slide_latex_path = path.join(args.paper_latex_root, "slides.tex")
264
- # slide_image_dir = path.join(args.result_dir, 'slide_imgs')
265
- # os.makedirs(slide_image_dir, exist_ok=True)
266
-
267
- # start_time = time.time()
268
- # prompt_path = "./Paper2Video/src/prompts/slide_beamer_prompt.txt"
269
-
270
- # if args.if_tree_search:
271
- # usage_slide, beamer_path = latex_code_gen(
272
- # prompt_path=prompt_path,
273
- # tex_dir=args.paper_latex_root,
274
- # beamer_save_path=slide_latex_path,
275
- # model_config_ll=get_agent_config(args.model_name_t),
276
- # model_config_vl=get_agent_config(args.model_name_v),
277
- # beamer_temp_name=args.beamer_templete_prompt
278
- # )
279
- # else:
280
- # paper_latex_path = path.join(args.paper_latex_root, "main.tex")
281
- # usage_slide = latex_code_gen(
282
- # prompt_path=prompt_path,
283
- # tex_dir=args.paper_latex_root,
284
- # tex_path=paper_latex_path,
285
- # beamer_save_path=slide_latex_path,
286
- # model_config=get_agent_config(args.model_name_t)
287
- # )
288
- # beamer_path = slide_latex_path
289
-
290
- # if not os.path.exists(beamer_path):
291
- # raise FileNotFoundError(f"❌ Beamer PDF not found: {beamer_path}")
292
-
293
- # slide_imgs = convert_from_path(beamer_path, dpi=400)
294
- # for i, img in enumerate(slide_imgs):
295
- # img.save(path.join(slide_image_dir, f"{i+1}.png"))
296
- # print("✅ Step 1 done.")
297
- # except Exception as e:
298
- # print(f"❌ Step 1 failed: {e}")
299
-
300
- # =========================
301
- # Step 1.5: Poster2Poster 内容生成
302
- # =========================
303
  try:
304
  run_paper2poster_content_build()
305
  except Exception as e:
306
  print(f"❌ Step 1.5 failed: {e}")
307
 
308
- # =========================
309
- # Step 2: Build Poster
310
- # =========================
311
  try:
312
  print("🧩 Step 2: Building poster ...")
313
  build_poster()
@@ -315,9 +250,7 @@ if __name__ == '__main__':
315
  except Exception as e:
316
  print(f"❌ Step 2 failed: {e}")
317
 
318
- # =========================
319
- # Step 3: 导出 latex_proj & 处理 LOGO & 应用 template
320
- # =========================
321
  try:
322
  src_lp = PB_ROOT / "latex_proj"
323
  dst_lp = ROOT_DIR / "output" / "poster_latex_proj"
@@ -344,16 +277,13 @@ if __name__ == '__main__':
344
  print("⚠️ template directory not found, skipping Step 3.5.")
345
 
346
  logos_out_dir = dst_lp / "logos"
347
- # logos_out_dir.mkdir(parents=True, exist_ok=True)
348
  left_logo_path = logos_out_dir / "left_logo.png"
349
 
350
  if len(logo_files) == 1:
351
- # 单图:拷贝并转成 PNG(以确保一致)
352
  im = Image.open(logo_files[0]).convert("RGBA")
353
  im.save(left_logo_path, format="PNG")
354
  print(f"🖼️ Single logo saved → {left_logo_path.relative_to(ROOT_DIR)}")
355
  else:
356
- # 多图:拼接
357
  _compose_logos_horizontally(logo_files, left_logo_path, box_w=2000, box_h=476, gap=16)
358
 
359
  print("✅ Step 3 done.")
 
21
  PB_ROOT = ROOT_DIR / "posterbuilder"
22
  sys.path.append(str(P2P_ROOT))
23
 
24
+ print(f"🔒 Workspace ROOT_DIR = {ROOT_DIR}")
25
+ print(f"🔒 This run is isolated under: {ROOT_DIR.resolve()}")
26
+
27
  def copy_folder(src_dir, dst_dir):
28
  src_dir = Path(src_dir)
29
  dst_dir = Path(dst_dir)
 
43
  dst.parent.mkdir(parents=True, exist_ok=True)
44
  shutil.copy2(src, dst)
45
 
46
+ def str2list(s):
47
  return [int(x) for x in s.split(',')]
48
 
49
  def run_paper2poster_content_build():
 
99
  return files
100
 
101
  def _compose_logos_horizontally(logo_paths, out_path: Path, box_w=2000, box_h=476, gap=16):
102
+ # (same as your original; omitted comments for brevity)
 
 
 
 
 
 
103
  imgs = []
104
  for p in logo_paths:
105
  p = Path(p)
 
109
  if n == 0:
110
  raise RuntimeError("No logo images found.")
111
 
 
112
  widths = [im.width for im in imgs]
113
  heights = [im.height for im in imgs]
114
  sum_w = sum(widths)
115
  if sum_w <= 0:
116
  raise RuntimeError("All logo images have zero width.")
117
 
 
 
118
  total_gap = max(0, gap * (n - 1))
119
  if box_w <= total_gap:
120
  raise ValueError(f"box_w({box_w}) too small vs total gaps({total_gap}). Increase box_w or reduce gap.")
121
  s = (box_w - total_gap) / float(sum_w)
122
 
 
123
  resized = []
124
  scaled_widths = []
 
125
  for im, w, h in zip(imgs, widths, heights):
126
  nw = max(1, int(round(w * s)))
127
  nh = max(1, int(round(h * s)))
128
  resized.append(im.resize((nw, nh), Image.LANCZOS))
129
  scaled_widths.append(nw)
 
130
 
 
131
  current_sum_w = sum(scaled_widths)
132
  diff = (box_w - total_gap) - current_sum_w
 
133
  if diff != 0:
134
  order = sorted(range(n), key=lambda i: scaled_widths[i], reverse=(diff > 0))
135
  idx = 0
 
144
  remaining -= 1
145
  idx += 1
146
 
 
147
  total_w = sum(scaled_widths) + total_gap
148
  assert total_w == box_w, f"width pack mismatch: got {total_w}, expect {box_w}"
149
  canvas_w = box_w
150
+ canvas_h = max(im.height for im in resized)
151
 
152
+ from PIL import Image as PILImage
153
+ canvas = PILImage.new("RGBA", (canvas_w, canvas_h), (0, 0, 0, 0))
154
  cur_x = 0
155
  for idx, im in enumerate(resized):
156
  y = (canvas_h - im.height) // 2
 
159
  if idx != n - 1:
160
  cur_x += gap
161
 
 
162
  canvas.save(out_path, format="PNG")
163
+ print(f" 🧩 Logos composed (width-locked) → {out_path.relative_to(ROOT_DIR)} (n={n}, final_size={canvas_w}x{canvas_h})")
 
 
 
 
164
 
165
  if __name__ == '__main__':
166
  parser = argparse.ArgumentParser(description='Paper2Video Generation Pipeline')
 
179
  args = parser.parse_args()
180
  print("start")
181
 
182
+ # env
183
  os.environ["OPENAI_API_KEY"] = args.openai_key
184
  os.environ["GEMINI_API_KEY"] = args.gemini_key
185
 
186
+ # clean and create run-local output
187
  output_dir = ROOT_DIR / "output"
188
  if output_dir.exists():
189
  print(f" 🧹 Clearing old output directory: {output_dir.relative_to(ROOT_DIR)}")
 
193
  (output_dir / "slide_imgs").mkdir(parents=True, exist_ok=True)
194
  print(" ✅ Created subfolders: latex_proj / poster_latex_proj / slide_imgs")
195
 
196
+ # Step 0: arXiv download (same as your original)
 
 
197
  try:
198
  if args.arxiv_url:
199
  import requests, tarfile
 
236
  except Exception as e:
237
  print(f"❌ Step 0 failed: {e}")
238
 
239
+ # Step 1.5: content build
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
240
  try:
241
  run_paper2poster_content_build()
242
  except Exception as e:
243
  print(f"❌ Step 1.5 failed: {e}")
244
 
245
+ # Step 2: build poster
 
 
246
  try:
247
  print("🧩 Step 2: Building poster ...")
248
  build_poster()
 
250
  except Exception as e:
251
  print(f"❌ Step 2 failed: {e}")
252
 
253
+ # Step 3: export latex & apply template & logos
 
 
254
  try:
255
  src_lp = PB_ROOT / "latex_proj"
256
  dst_lp = ROOT_DIR / "output" / "poster_latex_proj"
 
277
  print("⚠️ template directory not found, skipping Step 3.5.")
278
 
279
  logos_out_dir = dst_lp / "logos"
 
280
  left_logo_path = logos_out_dir / "left_logo.png"
281
 
282
  if len(logo_files) == 1:
 
283
  im = Image.open(logo_files[0]).convert("RGBA")
284
  im.save(left_logo_path, format="PNG")
285
  print(f"🖼️ Single logo saved → {left_logo_path.relative_to(ROOT_DIR)}")
286
  else:
 
287
  _compose_logos_horizontally(logo_files, left_logo_path, box_w=2000, box_h=476, gap=16)
288
 
289
  print("✅ Step 3 done.")
posterbuilder/convert.py CHANGED
@@ -85,12 +85,27 @@ def fix_latex_escaped_commands(s: str) -> str:
85
  def escape_text(s: str) -> str:
86
  if not s:
87
  return ""
 
 
 
 
 
 
 
 
 
88
  rep = {"&": r"\&", "%": r"\%", "$": r"\$", "#": r"\#", "_": r"\_",
89
  "{": r"\{", "}": r"\}", "~": r"\textasciitilde{}", "^": r"\textasciicircum{}"}
90
  for k, v in rep.items():
91
  s = s.replace(k, v)
 
 
 
 
 
92
  return s
93
 
 
94
  def soft_wrap_title_for_logo(title: str, first_limit=68, next_limit=72) -> str:
95
  if not title or len(title) <= first_limit: return title
96
  def break_at(s: str, limit: int):
 
85
  def escape_text(s: str) -> str:
86
  if not s:
87
  return ""
88
+
89
+ # ✅ 保护 $...$ 内的内容
90
+ math = []
91
+ def save_math(m):
92
+ math.append(m.group(0))
93
+ return f"__MATH{len(math)-1}__"
94
+
95
+ s = re.sub(r"\${1,2}.*?\${1,2}", save_math, s)
96
+
97
  rep = {"&": r"\&", "%": r"\%", "$": r"\$", "#": r"\#", "_": r"\_",
98
  "{": r"\{", "}": r"\}", "~": r"\textasciitilde{}", "^": r"\textasciicircum{}"}
99
  for k, v in rep.items():
100
  s = s.replace(k, v)
101
+
102
+ # ✅ 恢复 math
103
+ for i, block in enumerate(math):
104
+ s = s.replace(f"__MATH{i}__", block)
105
+
106
  return s
107
 
108
+
109
  def soft_wrap_title_for_logo(title: str, first_limit=68, next_limit=72) -> str:
110
  if not title or len(title) <= first_limit: return title
111
  def break_at(s: str, limit: int):
posterbuilder/latex_proj/poster_output.tex CHANGED
@@ -46,7 +46,7 @@
46
  % Title
47
  % ====================
48
 
49
- \title{Paper2Poster: Towards Multimodal Poster}
50
 
51
  \author{Wei Pang\textsuperscript{1}, Kevin Qinghong Lin\textsuperscript{2}, Xiangru Jian\textsuperscript{1}, Xi He\textsuperscript{1}, Philip Torr\textsuperscript{3}}
52
 
@@ -76,7 +76,7 @@
76
 
77
 
78
  % --- injected font tweaks ---
79
- \setbeamerfont{title}{size=\Huge}
80
  \setbeamerfont{author}{size=\Large}
81
  \setbeamerfont{institute}{size=\large}
82
  \setbeamerfont{block title}{size=\Large}
@@ -97,7 +97,7 @@
97
  \separatorcolumn
98
  \begin{column}{\colwidth}
99
  \begin{block}{Why Posters Are Hard}
100
- We target \textbf{single-page, multimodal compression} of \textit{20K+ tokens} into clear panels. Posters demand \textcolor{blue}{tight text–visual coupling}, \textbf{layout balance}, and \textit{readable density}. Pure LLM/VLM approaches \textcolor{red}{miss spatial feedback}, causing overflow and incoherence. We reveal that \textbf{visual-in-the-loop planning} is essential to preserve reading order, keep figures relevant, and sustain \textit{engagement} within hard space limits.
101
 
102
  \begin{figure}
103
  \centering
@@ -106,8 +106,12 @@ We target \textbf{single-page, multimodal compression} of \textit{20K+ tokens} i
106
 
107
  \end{block}
108
 
109
- \begin{block}{Benchmark and Data}
110
- We launch the \textbf{Paper2Poster Benchmark}: \textcolor{blue}{100 paper–poster pairs} spanning \textit{280 topics}. Average input: \textcolor{blue}{20,370 tokens, 22.6 pages}. Output posters compress text by \textcolor{blue}{14.4×} and figures by \textcolor{blue}{2.6×}. Evaluation covers \textbf{Visual Quality}, \textbf{Textual Coherence}, \textbf{VLM-as-Judge}, and \textbf{PaperQuiz}. This suite spotlights \textit{semantic alignment}, \textbf{fluency}, and \textcolor{blue}{reader comprehension}.
 
 
 
 
111
 
112
  \begin{figure}
113
  \centering
@@ -116,8 +120,11 @@ We launch the \textbf{Paper2Poster Benchmark}: \textcolor{blue}{100 paper–post
116
 
117
  \end{block}
118
 
119
- \begin{block}{PaperQuiz: What Matters}
120
- We generate \textcolor{blue}{100 MCQs/paper}: \textbf{50 verbatim} + \textbf{50 interpretive}. Multiple VLM readers simulate \textit{novice-to-expert} audiences and answer from the poster only. Scores are length-penalized to reward \textbf{dense clarity}. Results \textbf{correlate with human judgment}, proving PaperQuiz captures \textcolor{blue}{information delivery} beyond surface visuals and discourages \textcolor{red}{verbose, unfocused designs}.
 
 
 
121
 
122
  \begin{figure}
123
  \centering
@@ -126,11 +133,8 @@ We generate \textcolor{blue}{100 MCQs/paper}: \textbf{50 verbatim} + \textbf{50
126
 
127
  \end{block}
128
 
129
- \end{column}
130
- \separatorcolumn
131
- \begin{column}{\colwidth}
132
  \begin{block}{PosterAgent Pipeline}
133
- Our \textbf{top-down, visual-in-the-loop} agent compresses long papers into coherent posters. \textbf{Parser} builds a structured asset library. \textbf{Planner} aligns text–visual pairs and produces a \textcolor{blue}{binary-tree layout}. \textbf{PainterCommenter} renders panels via code and uses VLM feedback to fix \textcolor{red}{overflow} and misalignment. The result: \textbf{balanced, legible}, editable posters.
134
 
135
  \begin{figure}
136
  \centering
@@ -139,16 +143,12 @@ Our \textbf{top-down, visual-in-the-loop} agent compresses long papers into cohe
139
 
140
  \end{block}
141
 
142
- \begin{block}{Parser: Structured Assets}
143
- We distill PDFs into \textbf{section synopses} and \textit{figure/table assets} using \textcolor{blue}{MARKER} and \textcolor{blue}{DOCLING}, then LLM summarization. The asset library preserves \textbf{hierarchy} and \textit{semantics} while shrinking context for efficient planning. This step boosts \textbf{visual-semantic matching} and reduces \textcolor{red}{noise}, enabling reliable downstream \textit{layout reasoning}.
144
- \end{block}
145
-
146
- \begin{block}{Planner: Layout Mastery}
147
- We semantically match \textbf{sections ↔ figures} and allocate space via a \textcolor{blue}{binary-tree layout} that preserves \textit{reading order}, aspect ratios, and \textbf{content length} estimates. Panels are populated iteratively, ensuring \textbf{text brevity} and \textit{visual balance}. This strategy stabilizes coordinates and avoids \textcolor{red}{LLM numeric drift} in absolute placements.
148
 
149
  \begin{figure}
150
  \centering
151
- \includegraphics[width=0.80\linewidth]{figures/paper-picture-30.png}
152
  \end{figure}
153
 
154
  \end{block}
@@ -156,30 +156,26 @@ We semantically match \textbf{sections ↔ figures} and allocate space via a \te
156
  \end{column}
157
  \separatorcolumn
158
  \begin{column}{\colwidth}
159
- \begin{block}{Painter–Commenter Loop}
160
- The \textbf{Painter} turns section–figure pairs into crisp bullets and executable \textcolor{blue}{python-pptx} code, rendering draft panels. The \textbf{Commenter} VLM zooms into panels, using \textit{in-context examples} to flag \textcolor{red}{overflow} or \textcolor{red}{blankness}. Iterations continue until \textbf{fit and alignment} are achieved, producing \textit{readable, compact} panels with minimal revision cycles.
161
 
162
  \begin{figure}
163
  \centering
164
- \includegraphics[width=0.80\linewidth]{figures/paper-picture-61.png}
165
  \end{figure}
166
 
167
  \end{block}
168
 
169
- \begin{block}{Results: Stronger, Leaner}
170
- Our open-source variants beat \textcolor{blue}{4o-driven multi-agents} on most metrics, with \textcolor{blue}{87\% fewer tokens}. We hit \textbf{state-of-the-art figure relevance}, near-\textit{GT} visual similarity, and \textbf{high VLM-as-Judge} scores. PaperQuiz confirms \textbf{better knowledge transfer}. Cost is tiny: \textcolor{blue}{\$0.0045–\$0.55/poster}. Key bottleneck remains \textcolor{red}{Engagement}, guiding future design.
171
 
172
  \begin{figure}
173
  \centering
174
- \includegraphics[width=0.80\linewidth]{figures/paper-table-1.png}
175
  \end{figure}
176
 
177
  \end{block}
178
 
179
- \begin{block}{Limits and Next Steps}
180
- Current bottleneck: \textbf{sequential panel refinement} slows throughput (\textasciitilde{}\textcolor{blue}{4.5 min/doc}). We plan \textbf{panel-level parallelism}, \textit{external knowledge} integration (e.g., OpenReview), and \textbf{human-in-the-loop} editing for higher \textcolor{blue}{engagement}. These upgrades aim to boost \textbf{runtime, interactivity}, and \textit{visual storytelling}, pushing toward fully automated \textbf{author-grade posters}.
181
- \end{block}
182
-
183
  \end{column}
184
  \separatorcolumn
185
  \end{columns}
 
46
  % Title
47
  % ====================
48
 
49
+ \title{Paper2Poster: \ Towards Multimodal Poster Automation from Scientific Papers}
50
 
51
  \author{Wei Pang\textsuperscript{1}, Kevin Qinghong Lin\textsuperscript{2}, Xiangru Jian\textsuperscript{1}, Xi He\textsuperscript{1}, Philip Torr\textsuperscript{3}}
52
 
 
76
 
77
 
78
  % --- injected font tweaks ---
79
+ \setbeamerfont{title}{size=\huge}
80
  \setbeamerfont{author}{size=\Large}
81
  \setbeamerfont{institute}{size=\large}
82
  \setbeamerfont{block title}{size=\Large}
 
97
  \separatorcolumn
98
  \begin{column}{\colwidth}
99
  \begin{block}{Why Posters Are Hard}
100
+ We tackle \textbf{single-page multimodal compression}: dense papers must become legible posters with \textcolor{red}{tight spatial constraints}. Pure LLM or VLM approaches \textbf{struggle with layout}, missing \textit{reading order} and \textbf{overflow control}. We reveal \textcolor{blue}{visual-in-the-loop} planning is key to \textbf{clarity}, \textbf{balance}, and \textbf{engagement}.
101
 
102
  \begin{figure}
103
  \centering
 
106
 
107
  \end{block}
108
 
109
+ \begin{block}{Benchmark \& Task}
110
+ We introduce \textbf{Paper2Poster} and the task: generate a \textbf{single-page}, well-balanced poster that faithfully conveys core ideas. The protocol measures \textit{what matters}: \textbf{visual alignment}, \textbf{text fluency}, \textbf{holistic quality}, and knowledge transfer via \textcolor{blue}{PaperQuiz}. Our setup \textbf{standardizes evaluation} for automated poster generation.
111
+ \end{block}
112
+
113
+ \begin{block}{Curated Diverse Dataset}
114
+ Dataset spans \textcolor{blue}{100} paper–poster pairs (NeurIPS, ICML, ICLR). Papers average \textcolor{blue}{22.6} pages and \textcolor{blue}{20K+} tokens; posters average \textcolor{blue}{1.4K} tokens. We observe \textbf{14.4x} text compression and \textbf{2.6x} figure reduction. Coverage: CV (\textcolor{blue}{19\%}), NLP (\textcolor{blue}{17\%}), RL (\textcolor{blue}{10\%})—driving \textbf{robustness}.
115
 
116
  \begin{figure}
117
  \centering
 
120
 
121
  \end{block}
122
 
123
+ \end{column}
124
+ \separatorcolumn
125
+ \begin{column}{\colwidth}
126
+ \begin{block}{Four-Pronged Evaluation}
127
+ Our \textbf{four-pronged} suite tests end-to-end quality: Visual Quality via \textcolor{blue}{AltCLIP} similarity and \textbf{figure relevance}; Textual Coherence via \textcolor{blue}{PPL} (Llama-2-7B); VLM-as-Judge across \textbf{6 criteria}; and \textcolor{blue}{PaperQuiz} with length-aware penalties rewarding \textbf{dense, readable} designs.
128
 
129
  \begin{figure}
130
  \centering
 
133
 
134
  \end{block}
135
 
 
 
 
136
  \begin{block}{PosterAgent Pipeline}
137
+ PosterAgent is \textbf{top-down, visual-in-the-loop}. \textit{Parser} builds a semantic asset library; \textit{Planner} aligns text–visual pairs and uses \textcolor{blue}{binary-tree} layouts to preserve \textbf{reading order}. \textit{Painter-Commenter} renders panels, applies \textcolor{blue}{zoom-in} VLM feedback, and fixes \textbf{overflow} and \textbf{alignment}—yielding concise, coherent posters.
138
 
139
  \begin{figure}
140
  \centering
 
143
 
144
  \end{block}
145
 
146
+ \begin{block}{Main Results}
147
+ Across metrics, \textbf{PosterAgent} variants beat multi-agent baselines. We attain \textcolor{blue}{state-leading figure relevance} and near-\textbf{human} visual similarity. GPT-4o pixel posters look good but show \textcolor{red}{noisy text} and high \textcolor{red}{PPL}. VLM-as-Judge scores place PosterAgent-4o at \textcolor{blue}{3.72} overall, approaching GT posters.
 
 
 
 
148
 
149
  \begin{figure}
150
  \centering
151
+ \includegraphics[width=0.80\linewidth]{figures/paper-table-1.png}
152
  \end{figure}
153
 
154
  \end{block}
 
156
  \end{column}
157
  \separatorcolumn
158
  \begin{column}{\colwidth}
159
+ \begin{block}{PaperQuiz Insights}
160
+ \textcolor{blue}{PaperQuiz} tracks human judgment and rewards \textbf{informative brevity}. With penalties, GT posters lead; \textbf{PosterAgent} tops automated methods. Open-source \textcolor{blue}{Qwen-2.5} stacks stay \textbf{competitive}. Stronger reader VLMs exploit \textbf{structured layouts}, outperforming blog-like or \textcolor{red}{text-garbling} image generations.
161
 
162
  \begin{figure}
163
  \centering
164
+ \includegraphics[width=0.80\linewidth]{figures/paper-picture-9.png}
165
  \end{figure}
166
 
167
  \end{block}
168
 
169
+ \begin{block}{Efficient, Open, Scalable}
170
+ Our pipeline slashes tokens by \textcolor{blue}{60–87\%}. PosterAgent-4o uses \textcolor{blue}{101K} tokens (\textcolor{blue}{\$0.55}); PosterAgent-Qwen uses \textcolor{blue}{47.6K} (\textcolor{blue}{\$0.0045}). Runtime \textcolor{blue}{4.5 min}. \textcolor{red}{Bottleneck}: sequential panel refinement; \textbf{future} parallelism, external knowledge, and human-in-the-loop will boost \textbf{engagement}.
171
 
172
  \begin{figure}
173
  \centering
174
+ \includegraphics[width=0.80\linewidth]{figures/paper-table-8.png}
175
  \end{figure}
176
 
177
  \end{block}
178
 
 
 
 
 
179
  \end{column}
180
  \separatorcolumn
181
  \end{columns}
requirements.txt CHANGED
@@ -68,6 +68,7 @@ marko==2.1.2
68
  python-docx==1.1.2
69
  deepsearch-glm==1.0.0
70
  openpyxl==3.1.5
 
71
 
72
  # ========= LangChain / LLMs =========
73
  langchain==0.3.17
 
68
  python-docx==1.1.2
69
  deepsearch-glm==1.0.0
70
  openpyxl==3.1.5
71
+ tectonic @ https://github.com/tectonic-typesetting/tectonic/releases/download/tectonic%400.15.0/tectonic-0.15.0-x86_64-unknown-linux-gnu.tar.gz
72
 
73
  # ========= LangChain / LLMs =========
74
  langchain==0.3.17