File size: 1,421 Bytes
7c08dc3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
\documentclass{beamer}


% Theme and Color
\usetheme{Madrid}
\usecolortheme{default}

% Packages
\usepackage[utf8]{inputenc}
\usepackage[T1]{fontenc}
\usepackage{amsmath, amssymb, amsfonts}
\usepackage{booktabs}
\usepackage{graphicx}
\usepackage{hyperref}
\usepackage{bm} % For bold math symbols

% Custom commands from the source text for consistency
\newcommand{\KL}{D_{\mathrm{KL}}}
\def\figref#1{Figure~\ref{#1}}

\title[Meta-Safe RL]{A CMDP-within-online framework for Meta-Safe Reinforcement Learning}
\author{Vanshaj Khattar\inst{1} \and Yuhao Ding\inst{2} \and Bilgehan Sel\inst{1} \and Javad Lavaei\inst{2} \and Ming Jin\inst{1}}
\institute[VT \& UCB]{
  \inst{1} Virginia Tech \\
  \inst{2} UC Berkeley
}
\date{\today}


\setbeamerfont{caption}{size=\scriptsize}
\begin{document}
\begin{frame}{Experimental Results: MuJoCo Environments}
    \centering
    \textbf{Half-Cheetah (Low Task-Similarity)}
    \begin{figure}
        \includegraphics[width=0.8\textwidth]{HalfCheetah/HalfCheetahReward_low_task_similarity_broken_axis.pdf}
        \includegraphics[width=0.8\textwidth]{HalfCheetah/HalfCheetahCost_low_task_similarity.pdf}
        \caption{Reward (top) and constraint violation (bottom) for Half-Cheetah. Our method (Meta-SRL) learns a high-reward policy while keeping the constraint violation below the threshold (blue line).}
        \label{fig:halfcheetah}
    \end{figure}
\end{frame}
\end{document}