| \documentclass{beamer} | |
| % Theme and Color | |
| \usetheme{Madrid} | |
| \usecolortheme{default} | |
| % Packages | |
| \usepackage[utf8]{inputenc} | |
| \usepackage[T1]{fontenc} | |
| \usepackage{amsmath, amssymb, amsfonts} | |
| \usepackage{booktabs} | |
| \usepackage{graphicx} | |
| \usepackage{hyperref} | |
| \usepackage{bm} % For bold math symbols | |
| % Custom commands from the source text for consistency | |
| \newcommand{\KL}{D_{\mathrm{KL}}} | |
| \def\figref#1{Figure~\ref{#1}} | |
| \title[Meta-Safe RL]{A CMDP-within-online framework for Meta-Safe Reinforcement Learning} | |
| \author{Vanshaj Khattar\inst{1} \and Yuhao Ding\inst{2} \and Bilgehan Sel\inst{1} \and Javad Lavaei\inst{2} \and Ming Jin\inst{1}} | |
| \institute[VT \& UCB]{ | |
| \inst{1} Virginia Tech \\ | |
| \inst{2} UC Berkeley | |
| } | |
| \date{\today} | |
| \setbeamerfont{caption}{size=\scriptsize} | |
| \begin{document} | |
| \begin{frame}{Experimental Results: MuJoCo Environments} | |
| \centering | |
| \textbf{Half-Cheetah (Low Task-Similarity)} | |
| \begin{figure} | |
| \includegraphics[width=0.6\textwidth]{HalfCheetah/HalfCheetahReward_low_task_similarity_broken_axis.pdf} | |
| \includegraphics[width=0.6\textwidth]{HalfCheetah/HalfCheetahCost_low_task_similarity.pdf} | |
| \caption{Reward (top) and constraint violation (bottom) for Half-Cheetah. Our method (Meta-SRL) learns a high-reward policy while keeping the constraint violation below the threshold (blue line).} | |
| \label{fig:halfcheetah} | |
| \end{figure} | |
| \end{frame} | |
| \end{document} |