Spaces:
Runtime error
Runtime error
Commit
·
208fe4c
1
Parent(s):
d7d8b33
style updates
Browse files- evals/evals.yaml +3 -0
- frontend/public/smui.css +0 -0
- frontend/src/App.svelte +52 -24
- frontend/src/theme/_smui-theme.scss +48 -11
- zeno-evals-hub/frontend/index.html +3 -10
evals/evals.yaml
CHANGED
|
@@ -2,8 +2,11 @@
|
|
| 2 |
results-file: ./crossword/crossword-turbo.jsonl
|
| 3 |
second-results-file: ./crossword/crossword-turbo-0301.jsonl
|
| 4 |
functions-file: ./crossword/crossword_fns.py
|
|
|
|
| 5 |
- emotional-intelligence:
|
| 6 |
results-file: ./crossword/crossword-turbo.jsonl
|
| 7 |
second-results-file: ./crossword/crossword-turbo-0301.jsonl
|
|
|
|
| 8 |
- crossword-only-result:
|
| 9 |
results-file: ./crossword/crossword-turbo.jsonl
|
|
|
|
|
|
| 2 |
results-file: ./crossword/crossword-turbo.jsonl
|
| 3 |
second-results-file: ./crossword/crossword-turbo-0301.jsonl
|
| 4 |
functions-file: ./crossword/crossword_fns.py
|
| 5 |
+
link: https://github.com/openai/evals/tree/main/evals/registry/data/partially_solved_crossword_clues
|
| 6 |
- emotional-intelligence:
|
| 7 |
results-file: ./crossword/crossword-turbo.jsonl
|
| 8 |
second-results-file: ./crossword/crossword-turbo-0301.jsonl
|
| 9 |
+
link: https://github.com/openai/evals/tree/main/evals/registry/data/partially_solved_crossword_clues
|
| 10 |
- crossword-only-result:
|
| 11 |
results-file: ./crossword/crossword-turbo.jsonl
|
| 12 |
+
link: https://github.com/openai/evals/tree/main/evals/registry/data/partially_solved_crossword_clues
|
frontend/public/smui.css
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
frontend/src/App.svelte
CHANGED
|
@@ -1,6 +1,5 @@
|
|
| 1 |
<script lang="ts">
|
| 2 |
-
import Button, {
|
| 3 |
-
import Handshake from "./assets/handshake.svelte";
|
| 4 |
let data = fetch("/args").then((d) => d.json());
|
| 5 |
let blur = function (ev) {
|
| 6 |
ev.target.blur();
|
|
@@ -8,23 +7,30 @@
|
|
| 8 |
</script>
|
| 9 |
|
| 10 |
<main>
|
| 11 |
-
<
|
| 12 |
-
<
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
<!-- table with links to zeno sites. -->
|
| 18 |
<div id="container">
|
| 19 |
<div id="table-background">
|
| 20 |
<table>
|
| 21 |
<thead>
|
| 22 |
<tr>
|
| 23 |
-
<th>
|
| 24 |
-
<th>
|
| 25 |
-
<th>
|
| 26 |
-
<th>
|
| 27 |
-
<th
|
| 28 |
</tr>
|
| 29 |
</thead>
|
| 30 |
<tbody>
|
|
@@ -34,7 +40,7 @@
|
|
| 34 |
{#each final_data as d}
|
| 35 |
{@const name = Object.keys(d)[0]}
|
| 36 |
<tr>
|
| 37 |
-
<td><
|
| 38 |
<td>
|
| 39 |
{#each d[name]["models"] as m}{m}<br />{/each}
|
| 40 |
</td>
|
|
@@ -50,10 +56,11 @@
|
|
| 50 |
on:focusout={blur}
|
| 51 |
href="/{name}/"
|
| 52 |
ripple={false}
|
| 53 |
-
|
|
|
|
| 54 |
>
|
| 55 |
<Icon class="material-icons">rocket</Icon>
|
| 56 |
-
<Label>
|
| 57 |
</Button>
|
| 58 |
</td>
|
| 59 |
</tr>
|
|
@@ -72,17 +79,18 @@
|
|
| 72 |
justify-content: center;
|
| 73 |
}
|
| 74 |
#table-background {
|
| 75 |
-
|
| 76 |
-
width: 800px;
|
| 77 |
padding: 20px;
|
| 78 |
border-radius: 20px;
|
| 79 |
}
|
| 80 |
.name-wrap {
|
| 81 |
border: 1px solid transparent;
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
|
|
|
|
|
|
| 86 |
}
|
| 87 |
table {
|
| 88 |
border-collapse: collapse;
|
|
@@ -106,6 +114,7 @@
|
|
| 106 |
table th:last-child {
|
| 107 |
border-radius: 0 20px 20px 0;
|
| 108 |
}
|
|
|
|
| 109 |
tbody:before {
|
| 110 |
content: "@";
|
| 111 |
display: block;
|
|
@@ -117,10 +126,29 @@
|
|
| 117 |
}
|
| 118 |
tbody tr {
|
| 119 |
opacity: 0.9;
|
| 120 |
-
height:
|
| 121 |
}
|
| 122 |
-
tbody tr:hover {
|
| 123 |
opacity: 1;
|
| 124 |
background-color: #ededed;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 125 |
}
|
| 126 |
</style>
|
|
|
|
| 1 |
<script lang="ts">
|
| 2 |
+
import Button, { Icon, Label } from "@smui/button";
|
|
|
|
| 3 |
let data = fetch("/args").then((d) => d.json());
|
| 4 |
let blur = function (ev) {
|
| 5 |
ev.target.blur();
|
|
|
|
| 7 |
</script>
|
| 8 |
|
| 9 |
<main>
|
| 10 |
+
<header>
|
| 11 |
+
<h1>Evals Hub</h1>
|
| 12 |
+
</header>
|
| 13 |
+
<div class="tagline">
|
| 14 |
+
Explore and compare the results of
|
| 15 |
+
<img class="open_ai" src="./build/openai.svg" alt="OpenAI logo" />
|
| 16 |
+
<a href="https://github.com/openai/evals">
|
| 17 |
+
<b>OpenAI Evals </b>
|
| 18 |
+
</a>
|
| 19 |
+
using
|
| 20 |
+
<img class="open_ai" src="./build/zeno.png" alt="Zeno logo" />
|
| 21 |
+
<b><a href="https://github.com/zeno-ml/zeno/stargazers">Zeno</a></b>
|
| 22 |
+
</div>
|
| 23 |
<!-- table with links to zeno sites. -->
|
| 24 |
<div id="container">
|
| 25 |
<div id="table-background">
|
| 26 |
<table>
|
| 27 |
<thead>
|
| 28 |
<tr>
|
| 29 |
+
<th>evaluation</th>
|
| 30 |
+
<th>models</th>
|
| 31 |
+
<th>accuracy</th>
|
| 32 |
+
<th>instances</th>
|
| 33 |
+
<th />
|
| 34 |
</tr>
|
| 35 |
</thead>
|
| 36 |
<tbody>
|
|
|
|
| 40 |
{#each final_data as d}
|
| 41 |
{@const name = Object.keys(d)[0]}
|
| 42 |
<tr>
|
| 43 |
+
<td><a href="#"><span class="name-wrap">{name}</span></a> </td>
|
| 44 |
<td>
|
| 45 |
{#each d[name]["models"] as m}{m}<br />{/each}
|
| 46 |
</td>
|
|
|
|
| 56 |
on:focusout={blur}
|
| 57 |
href="/{name}/"
|
| 58 |
ripple={false}
|
| 59 |
+
variant="unelevated"
|
| 60 |
+
color="primary"
|
| 61 |
>
|
| 62 |
<Icon class="material-icons">rocket</Icon>
|
| 63 |
+
<Label>Open</Label>
|
| 64 |
</Button>
|
| 65 |
</td>
|
| 66 |
</tr>
|
|
|
|
| 79 |
justify-content: center;
|
| 80 |
}
|
| 81 |
#table-background {
|
| 82 |
+
width: 900px;
|
|
|
|
| 83 |
padding: 20px;
|
| 84 |
border-radius: 20px;
|
| 85 |
}
|
| 86 |
.name-wrap {
|
| 87 |
border: 1px solid transparent;
|
| 88 |
+
border-radius: 10px;
|
| 89 |
+
font-weight: 500;
|
| 90 |
+
color: var(--logo);
|
| 91 |
+
}
|
| 92 |
+
.name-wrap:hover {
|
| 93 |
+
color: var(--P2);
|
| 94 |
}
|
| 95 |
table {
|
| 96 |
border-collapse: collapse;
|
|
|
|
| 114 |
table th:last-child {
|
| 115 |
border-radius: 0 20px 20px 0;
|
| 116 |
}
|
| 117 |
+
|
| 118 |
tbody:before {
|
| 119 |
content: "@";
|
| 120 |
display: block;
|
|
|
|
| 126 |
}
|
| 127 |
tbody tr {
|
| 128 |
opacity: 0.9;
|
| 129 |
+
height: 70px;
|
| 130 |
}
|
| 131 |
+
/* tbody tr:hover {
|
| 132 |
opacity: 1;
|
| 133 |
background-color: #ededed;
|
| 134 |
+
} */
|
| 135 |
+
.open_ai {
|
| 136 |
+
width: 20px;
|
| 137 |
+
margin-left: 5px;
|
| 138 |
+
}
|
| 139 |
+
.tagline {
|
| 140 |
+
text-align: center;
|
| 141 |
+
display: flex;
|
| 142 |
+
justify-content: center;
|
| 143 |
+
align-items: center;
|
| 144 |
+
}
|
| 145 |
+
.tagline b {
|
| 146 |
+
margin-right: 5px;
|
| 147 |
+
margin-left: 5px;
|
| 148 |
+
}
|
| 149 |
+
header {
|
| 150 |
+
display: flex;
|
| 151 |
+
align-items: center;
|
| 152 |
+
justify-content: center;
|
| 153 |
}
|
| 154 |
</style>
|
frontend/src/theme/_smui-theme.scss
CHANGED
|
@@ -2,19 +2,16 @@
|
|
| 2 |
|
| 3 |
@use "@material/theme/color-palette";
|
| 4 |
|
| 5 |
-
// Svelte Colors!
|
| 6 |
@use "@material/theme/index" as theme with (
|
| 7 |
-
$primary: #
|
| 8 |
-
$secondary: #
|
| 9 |
-
$surface: #
|
| 10 |
-
$background: #
|
| 11 |
$error: color-palette.$red-900
|
| 12 |
);
|
| 13 |
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
.button-shaped-round {
|
| 17 |
-
@include mdc-button.shape-radius(100%);
|
| 18 |
}
|
| 19 |
|
| 20 |
:root {
|
|
@@ -24,13 +21,53 @@
|
|
| 24 |
|
| 25 |
color-scheme: light dark;
|
| 26 |
color: #213547;
|
| 27 |
-
background-color: #f0f3fb;
|
| 28 |
|
| 29 |
font-synthesis: none;
|
| 30 |
text-rendering: optimizeLegibility;
|
| 31 |
-webkit-font-smoothing: antialiased;
|
| 32 |
-moz-osx-font-smoothing: grayscale;
|
| 33 |
-webkit-text-size-adjust: 100%;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
}
|
| 35 |
|
| 36 |
a {
|
|
@@ -38,7 +75,7 @@ a {
|
|
| 38 |
text-decoration: inherit;
|
| 39 |
}
|
| 40 |
a:hover {
|
| 41 |
-
color:
|
| 42 |
}
|
| 43 |
|
| 44 |
h1 {
|
|
|
|
| 2 |
|
| 3 |
@use "@material/theme/color-palette";
|
| 4 |
|
|
|
|
| 5 |
@use "@material/theme/index" as theme with (
|
| 6 |
+
$primary: #6a1b9a,
|
| 7 |
+
$secondary: #989895,
|
| 8 |
+
$surface: #ffffff,
|
| 9 |
+
$background: #ffffff,
|
| 10 |
$error: color-palette.$red-900
|
| 11 |
);
|
| 12 |
|
| 13 |
+
.material-icons {
|
| 14 |
+
fill: var(--logo);
|
|
|
|
|
|
|
| 15 |
}
|
| 16 |
|
| 17 |
:root {
|
|
|
|
| 21 |
|
| 22 |
color-scheme: light dark;
|
| 23 |
color: #213547;
|
|
|
|
| 24 |
|
| 25 |
font-synthesis: none;
|
| 26 |
text-rendering: optimizeLegibility;
|
| 27 |
-webkit-font-smoothing: antialiased;
|
| 28 |
-moz-osx-font-smoothing: grayscale;
|
| 29 |
-webkit-text-size-adjust: 100%;
|
| 30 |
+
|
| 31 |
+
--G1: #333333;
|
| 32 |
+
--G2: #73726f;
|
| 33 |
+
--G3: #989895;
|
| 34 |
+
--G4: #d3d3d3;
|
| 35 |
+
--G5: #ebebea;
|
| 36 |
+
--G6: #ffffff;
|
| 37 |
+
--logo: #6a1b9a;
|
| 38 |
+
--P1: #b18bd3;
|
| 39 |
+
--P2: #d2bae9;
|
| 40 |
+
--P3: #f7f1fb;
|
| 41 |
+
--P4: #f9f7fb;
|
| 42 |
+
--Y1: #f2f2ee;
|
| 43 |
+
--Y2: #fbfbfa;
|
| 44 |
+
|
| 45 |
+
--mdc-theme-primary: var(--G2);
|
| 46 |
+
--mdc-theme-secondary: var(--G3);
|
| 47 |
+
--mdc-theme-background: var(--G6);
|
| 48 |
+
--mdc-theme-surface: var(--G6);
|
| 49 |
+
--mdc-theme-error: #b71c1c;
|
| 50 |
+
--mdc-theme-on-primary: var(--G6);
|
| 51 |
+
--mdc-theme-on-secondary: var(--G6);
|
| 52 |
+
--mdc-theme-on-surface: var(--G1);
|
| 53 |
+
--mdc-theme-on-error: var(--G6);
|
| 54 |
+
--mdc-theme-text-primary-on-background: rgba(0, 0, 0, 0.87);
|
| 55 |
+
--mdc-theme-text-secondary-on-background: rgba(0, 0, 0, 0.54);
|
| 56 |
+
--mdc-theme-text-hint-on-background: rgba(0, 0, 0, 0.38);
|
| 57 |
+
--mdc-theme-text-disabled-on-background: rgba(0, 0, 0, 0.38);
|
| 58 |
+
--mdc-theme-text-icon-on-background: rgba(0, 0, 0, 0.38);
|
| 59 |
+
--mdc-theme-text-primary-on-light: rgba(0, 0, 0, 0.87);
|
| 60 |
+
--mdc-theme-text-secondary-on-light: rgba(0, 0, 0, 0.54);
|
| 61 |
+
--mdc-theme-text-hint-on-light: rgba(0, 0, 0, 0.38);
|
| 62 |
+
--mdc-theme-text-disabled-on-light: rgba(0, 0, 0, 0.38);
|
| 63 |
+
--mdc-theme-text-icon-on-light: rgba(0, 0, 0, 0.38);
|
| 64 |
+
--mdc-theme-text-primary-on-dark: white;
|
| 65 |
+
--mdc-theme-text-secondary-on-dark: rgba(255, 255, 255, 0.7);
|
| 66 |
+
--mdc-theme-text-hint-on-dark: rgba(255, 255, 255, 0.5);
|
| 67 |
+
--mdc-theme-text-disabled-on-dark: rgba(255, 255, 255, 0.5);
|
| 68 |
+
--mdc-theme-text-icon-on-dark: rgba(255, 255, 255, 0.5);
|
| 69 |
+
--mdc-outlined-button-container-height: 33px;
|
| 70 |
+
--mdc-filled-button-container-color: var(--logo);
|
| 71 |
}
|
| 72 |
|
| 73 |
a {
|
|
|
|
| 75 |
text-decoration: inherit;
|
| 76 |
}
|
| 77 |
a:hover {
|
| 78 |
+
color: var(--logo);
|
| 79 |
}
|
| 80 |
|
| 81 |
h1 {
|
zeno-evals-hub/frontend/index.html
CHANGED
|
@@ -2,11 +2,11 @@
|
|
| 2 |
<html lang="en">
|
| 3 |
<head>
|
| 4 |
<meta charset="UTF-8" />
|
| 5 |
-
<link rel="icon" type="image/svg+xml" href="./build/
|
| 6 |
<link rel="stylesheet" href="./build/smui.css" />
|
| 7 |
|
| 8 |
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
| 9 |
-
<title>
|
| 10 |
<!-- Material Icons -->
|
| 11 |
<link
|
| 12 |
rel="stylesheet"
|
|
@@ -18,15 +18,8 @@
|
|
| 18 |
href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,600,700"
|
| 19 |
/>
|
| 20 |
<!-- Roboto Mono -->
|
| 21 |
-
<link
|
| 22 |
-
rel="stylesheet"
|
| 23 |
-
href="https://fonts.googleapis.com/css?family=Roboto+Mono"
|
| 24 |
-
/>
|
| 25 |
</head>
|
| 26 |
-
<div id="header">
|
| 27 |
-
<img src="./build/openai.svg" alt="OpenAI SVG" />
|
| 28 |
-
<h2>OpenAI Evals Hub</h2>
|
| 29 |
-
</div>
|
| 30 |
<body>
|
| 31 |
<div id="app"></div>
|
| 32 |
<script type="module" src="http://localhost:5173/src/main.ts"></script>
|
|
|
|
| 2 |
<html lang="en">
|
| 3 |
<head>
|
| 4 |
<meta charset="UTF-8" />
|
| 5 |
+
<link rel="icon" type="image/svg+xml" href="./build/zeno.png" />
|
| 6 |
<link rel="stylesheet" href="./build/smui.css" />
|
| 7 |
|
| 8 |
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
| 9 |
+
<title>Evals Hub</title>
|
| 10 |
<!-- Material Icons -->
|
| 11 |
<link
|
| 12 |
rel="stylesheet"
|
|
|
|
| 18 |
href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,600,700"
|
| 19 |
/>
|
| 20 |
<!-- Roboto Mono -->
|
| 21 |
+
<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto+Mono" />
|
|
|
|
|
|
|
|
|
|
| 22 |
</head>
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
<body>
|
| 24 |
<div id="app"></div>
|
| 25 |
<script type="module" src="http://localhost:5173/src/main.ts"></script>
|