PaperShow / Paper2Video /src /latex_proj /slidesproposal_0.75.tex
ZaynZhu
Clean version without large assets
7c08dc3
\documentclass{beamer}
% Theme and Color
\usetheme{Madrid}
\usecolortheme{default}
% Packages
\usepackage[utf8]{inputenc}
\usepackage[T1]{fontenc}
\usepackage{amsmath, amssymb, amsfonts}
\usepackage{booktabs}
\usepackage{graphicx}
\usepackage{hyperref}
\usepackage{bm} % For bold math symbols
% Custom commands from the source text for consistency
\newcommand{\KL}{D_{\mathrm{KL}}}
\def\figref#1{Figure~\ref{#1}}
\title[Meta-Safe RL]{A CMDP-within-online framework for Meta-Safe Reinforcement Learning}
\author{Vanshaj Khattar\inst{1} \and Yuhao Ding\inst{2} \and Bilgehan Sel\inst{1} \and Javad Lavaei\inst{2} \and Ming Jin\inst{1}}
\institute[VT \& UCB]{
\inst{1} Virginia Tech \\
\inst{2} UC Berkeley
}
\date{\today}
\setbeamerfont{caption}{size=\scriptsize}
\begin{document}
\begin{frame}{Experimental Results: MuJoCo Environments}
\centering
\textbf{Half-Cheetah (Low Task-Similarity)}
\begin{figure}
\includegraphics[width=0.6\textwidth]{HalfCheetah/HalfCheetahReward_low_task_similarity_broken_axis.pdf}
\includegraphics[width=0.6\textwidth]{HalfCheetah/HalfCheetahCost_low_task_similarity.pdf}
\caption{Reward (top) and constraint violation (bottom) for Half-Cheetah. Our method (Meta-SRL) learns a high-reward policy while keeping the constraint violation below the threshold (blue line).}
\label{fig:halfcheetah}
\end{figure}
\end{frame}
\end{document}