File size: 7,322 Bytes
7c08dc3 0d563bd 7c08dc3 0d563bd 7c08dc3 c643f73 7c08dc3 0d563bd 7c08dc3 0d563bd 7c08dc3 930133a 7c08dc3 0d563bd 7c08dc3 c643f73 7c08dc3 0d563bd 7c08dc3 0d563bd c643f73 0d563bd c643f73 0d563bd 7c08dc3 c643f73 7c08dc3 0d563bd 7c08dc3 0d563bd c643f73 7c08dc3 0d563bd 7c08dc3 c643f73 7c08dc3 c643f73 7c08dc3 c643f73 0d563bd c643f73 0d563bd c643f73 0d563bd c643f73 0d563bd 7c08dc3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 |
% Unofficial University of Cambridge Poster Template
% https://github.com/andiac/gemini-cam
% a fork of https://github.com/anishathalye/gemini
% also refer to https://github.com/k4rtik/uchicago-poster
\documentclass[final]{beamer}
% ====================
% Packages
% ====================
\usepackage[T1]{fontenc}
\usepackage{lmodern}
\usepackage[size=custom,width=120,height=72,scale=1.0]{beamerposter}
\usetheme{gemini}
\usecolortheme{cam}
\usepackage{graphicx}
\usepackage{booktabs}
\usepackage[numbers]{natbib}
\usepackage{tikz}
\usepackage{pgfplots}
\pgfplotsset{compat=1.14}
\usepackage{anyfontsize}
\definecolor{nipspurple}{RGB}{94,46,145}
\setbeamercolor{headline}{bg=white, fg=black}
\setbeamercolor{block title}{bg=nipspurple, fg=white}
\addtobeamertemplate{block begin}{
\setlength{\textpaddingtop}{0.2em}%
\setlength{\textpaddingbottom}{0.2em}%
}{}
% ====================
% Lengths
% ====================
% If you have N columns, choose \sepwidth and \colwidth such that
% (N+1)*\sepwidth + N*\colwidth = \paperwidth
\newlength{\sepwidth}
\newlength{\colwidth}
\setlength{\sepwidth}{0.025\paperwidth}
\setlength{\colwidth}{0.3\paperwidth}
\newcommand{\separatorcolumn}{\begin{column}{\sepwidth}\end{column}}
% ====================
% Title
% ====================
\title{Paper2Poster: \ Towards Multimodal Poster Automation from Scientific Papers}
\author{Wei Pang\textsuperscript{1}, Kevin Qinghong Lin\textsuperscript{2}, Xiangru Jian\textsuperscript{1}, Xi He\textsuperscript{1}, Philip Torr\textsuperscript{3}}
\institute[shortinst]{1 University of Waterloo; 2 National University of Singapore; 3 University of Oxford}
% ====================
% Footer (optional)
% ====================
\footercontent{
\href{https://paper2poster.github.io/}{https://paper2poster.github.io/} \hfill
Generated by Paper2Poster \hfill
}
% (can be left out to remove footer)
% ====================
% Logo (optional)
% ====================
% use this to include logos on the left and/or right side of the header:
\logoright{\includegraphics[height=5cm]{logos/right_logo.png}}
\logoleft{\includegraphics[height=4cm]{logos/left_logo.png}}
% ====================
% Body
% ====================
% --- injected font tweaks ---
\setbeamerfont{title}{size=\huge}
\setbeamerfont{author}{size=\Large}
\setbeamerfont{institute}{size=\large}
\setbeamerfont{block title}{size=\Large}
\setbeamerfont{block body}{size=\large}
\begin{document}
% Refer to https://github.com/k4rtik/uchicago-poster
% logo: https://www.cam.ac.uk/brand-resources/about-the-logo/logo-downloads
\addtobeamertemplate{headline}{}
{
\begin{tikzpicture}[remember picture,overlay]
\node [anchor=north west, inner sep=3cm] at ([xshift=0.0cm,yshift=1.0cm]current page.north west)
\end{tikzpicture}
}
\begin{frame}[t]
\begin{columns}[t]
\separatorcolumn
\begin{column}{\colwidth}
\begin{block}{Why Posters Are Hard}
We tackle \textbf{single-page multimodal compression}: dense papers must become legible posters with \textcolor{red}{tight spatial constraints}. Pure LLM or VLM approaches \textbf{struggle with layout}, missing \textit{reading order} and \textbf{overflow control}. We reveal \textcolor{blue}{visual-in-the-loop} planning is key to \textbf{clarity}, \textbf{balance}, and \textbf{engagement}.
\begin{figure}
\centering
\includegraphics[width=0.80\linewidth]{figures/paper-picture-1.png}
\end{figure}
\end{block}
\begin{block}{Benchmark \& Task}
We introduce \textbf{Paper2Poster} and the task: generate a \textbf{single-page}, well-balanced poster that faithfully conveys core ideas. The protocol measures \textit{what matters}: \textbf{visual alignment}, \textbf{text fluency}, \textbf{holistic quality}, and knowledge transfer via \textcolor{blue}{PaperQuiz}. Our setup \textbf{standardizes evaluation} for automated poster generation.
\end{block}
\begin{block}{Curated Diverse Dataset}
Dataset spans \textcolor{blue}{100} paper–poster pairs (NeurIPS, ICML, ICLR). Papers average \textcolor{blue}{22.6} pages and \textcolor{blue}{20K+} tokens; posters average \textcolor{blue}{1.4K} tokens. We observe \textbf{14.4x} text compression and \textbf{2.6x} figure reduction. Coverage: CV (\textcolor{blue}{19\%}), NLP (\textcolor{blue}{17\%}), RL (\textcolor{blue}{10\%})—driving \textbf{robustness}.
\begin{figure}
\centering
\includegraphics[width=0.80\linewidth]{figures/paper-picture-6.png}
\end{figure}
\end{block}
\end{column}
\separatorcolumn
\begin{column}{\colwidth}
\begin{block}{Four-Pronged Evaluation}
Our \textbf{four-pronged} suite tests end-to-end quality: Visual Quality via \textcolor{blue}{AltCLIP} similarity and \textbf{figure relevance}; Textual Coherence via \textcolor{blue}{PPL} (Llama-2-7B); VLM-as-Judge across \textbf{6 criteria}; and \textcolor{blue}{PaperQuiz} with length-aware penalties rewarding \textbf{dense, readable} designs.
\begin{figure}
\centering
\includegraphics[width=0.80\linewidth]{figures/paper-picture-7.png}
\end{figure}
\end{block}
\begin{block}{PosterAgent Pipeline}
PosterAgent is \textbf{top-down, visual-in-the-loop}. \textit{Parser} builds a semantic asset library; \textit{Planner} aligns text–visual pairs and uses \textcolor{blue}{binary-tree} layouts to preserve \textbf{reading order}. \textit{Painter-Commenter} renders panels, applies \textcolor{blue}{zoom-in} VLM feedback, and fixes \textbf{overflow} and \textbf{alignment}—yielding concise, coherent posters.
\begin{figure}
\centering
\includegraphics[width=0.80\linewidth]{figures/paper-picture-8.png}
\end{figure}
\end{block}
\begin{block}{Main Results}
Across metrics, \textbf{PosterAgent} variants beat multi-agent baselines. We attain \textcolor{blue}{state-leading figure relevance} and near-\textbf{human} visual similarity. GPT-4o pixel posters look good but show \textcolor{red}{noisy text} and high \textcolor{red}{PPL}. VLM-as-Judge scores place PosterAgent-4o at \textcolor{blue}{3.72} overall, approaching GT posters.
\begin{figure}
\centering
\includegraphics[width=0.80\linewidth]{figures/paper-table-1.png}
\end{figure}
\end{block}
\end{column}
\separatorcolumn
\begin{column}{\colwidth}
\begin{block}{PaperQuiz Insights}
\textcolor{blue}{PaperQuiz} tracks human judgment and rewards \textbf{informative brevity}. With penalties, GT posters lead; \textbf{PosterAgent} tops automated methods. Open-source \textcolor{blue}{Qwen-2.5} stacks stay \textbf{competitive}. Stronger reader VLMs exploit \textbf{structured layouts}, outperforming blog-like or \textcolor{red}{text-garbling} image generations.
\begin{figure}
\centering
\includegraphics[width=0.80\linewidth]{figures/paper-picture-9.png}
\end{figure}
\end{block}
\begin{block}{Efficient, Open, Scalable}
Our pipeline slashes tokens by \textcolor{blue}{60–87\%}. PosterAgent-4o uses \textcolor{blue}{101K} tokens (\textcolor{blue}{\$0.55}); PosterAgent-Qwen uses \textcolor{blue}{47.6K} (\textcolor{blue}{\$0.0045}). Runtime ≈ \textcolor{blue}{4.5 min}. \textcolor{red}{Bottleneck}: sequential panel refinement; \textbf{future} parallelism, external knowledge, and human-in-the-loop will boost \textbf{engagement}.
\begin{figure}
\centering
\includegraphics[width=0.80\linewidth]{figures/paper-table-8.png}
\end{figure}
\end{block}
\end{column}
\separatorcolumn
\end{columns}
\end{frame}
\end{document}
|