Spaces:
Running
Running
Update index.html
Browse files- index.html +2 -7
index.html
CHANGED
|
@@ -137,13 +137,8 @@ gradient norm and then apply soft removal on them to mitigate the potential jail
|
|
| 137 |
<span id="Refusal-Loss" class="formula" style="">
|
| 138 |
$$
|
| 139 |
\displaystyle
|
| 140 |
-
\
|
| 141 |
-
\
|
| 142 |
-
JB (y) &= \begin{cases}
|
| 143 |
-
1 \text{, if $y$ contains any jailbreak keyword;} \\
|
| 144 |
-
0 \text{, otherwise.}
|
| 145 |
-
\end{cases}
|
| 146 |
-
\end{aligned}
|
| 147 |
$$
|
| 148 |
</span>
|
| 149 |
<span id="Refusal-Loss-Approximation" class="formula" style="display: none;">
|
|
|
|
| 137 |
<span id="Refusal-Loss" class="formula" style="">
|
| 138 |
$$
|
| 139 |
\displaystyle
|
| 140 |
+
x_{1:n}=\mathtt{embed}_\theta(q_{1:n})\\
|
| 141 |
+
\mathtt{Affirmation~Loss}(x_{1:n},\theta)=-\log P(y|x_{1:n})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
$$
|
| 143 |
</span>
|
| 144 |
<span id="Refusal-Loss-Approximation" class="formula" style="display: none;">
|