File size: 1,421 Bytes
7c08dc3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
\documentclass{beamer}
% Theme and Color
\usetheme{Madrid}
\usecolortheme{default}
% Packages
\usepackage[utf8]{inputenc}
\usepackage[T1]{fontenc}
\usepackage{amsmath, amssymb, amsfonts}
\usepackage{booktabs}
\usepackage{graphicx}
\usepackage{hyperref}
\usepackage{bm} % For bold math symbols
% Custom commands from the source text for consistency
\newcommand{\KL}{D_{\mathrm{KL}}}
\def\figref#1{Figure~\ref{#1}}
\title[Meta-Safe RL]{A CMDP-within-online framework for Meta-Safe Reinforcement Learning}
\author{Vanshaj Khattar\inst{1} \and Yuhao Ding\inst{2} \and Bilgehan Sel\inst{1} \and Javad Lavaei\inst{2} \and Ming Jin\inst{1}}
\institute[VT \& UCB]{
\inst{1} Virginia Tech \\
\inst{2} UC Berkeley
}
\date{\today}
\setbeamerfont{caption}{size=\scriptsize}
\begin{document}
\begin{frame}{Experimental Results: MuJoCo Environments}
\centering
\textbf{Half-Cheetah (Low Task-Similarity)}
\begin{figure}
\includegraphics[width=0.6\textwidth]{HalfCheetah/HalfCheetahReward_low_task_similarity_broken_axis.pdf}
\includegraphics[width=0.6\textwidth]{HalfCheetah/HalfCheetahCost_low_task_similarity.pdf}
\caption{Reward (top) and constraint violation (bottom) for Half-Cheetah. Our method (Meta-SRL) learns a high-reward policy while keeping the constraint violation below the threshold (blue line).}
\label{fig:halfcheetah}
\end{figure}
\end{frame}
\end{document} |