ed-donner commited on
Commit
8a1a166
·
1 Parent(s): 1b762a1

Updated models

Browse files
Files changed (3) hide show
  1. arena/c4.py +6 -7
  2. arena/game.py +7 -2
  3. arena/llm.py +19 -14
arena/c4.py CHANGED
@@ -24,6 +24,8 @@ function refresh() {
24
  }
25
  """
26
 
 
 
27
 
28
  def message_html(game) -> str:
29
  """
@@ -181,11 +183,10 @@ def player_section(name, default):
181
  """
182
  Create the left and right sections of the UI
183
  """
184
- all_model_names = LLM.all_model_names()
185
  with gr.Row():
186
  gr.HTML(f'<div style="text-align: center;font-size:18px">{name} Player</div>')
187
  with gr.Row():
188
- dropdown = gr.Dropdown(all_model_names, value=default, label="LLM", interactive=True)
189
  with gr.Row():
190
  gr.HTML('<div style="text-align: center;font-size:16px">Inner thoughts</div>')
191
  with gr.Row():
@@ -213,9 +214,7 @@ def make_display():
213
  )
214
  with gr.Row():
215
  with gr.Column(scale=1):
216
- red_thoughts, red_dropdown = player_section(
217
- "Red", "openai/gpt-oss-120b via Groq"
218
- )
219
  with gr.Column(scale=2):
220
  with gr.Row():
221
  message = gr.HTML(
@@ -237,14 +236,14 @@ def make_display():
237
 
238
  with gr.Column(scale=1):
239
  yellow_thoughts, yellow_dropdown = player_section(
240
- "Yellow", "gemini-2.5-flash-lite"
241
  )
242
  with gr.TabItem("Leaderboard") as leaderboard_tab:
243
  with gr.Row():
244
  with gr.Column(scale=1):
245
  ratings_df = gr.Dataframe(
246
  headers=["Player", "ELO"],
247
- label="Ratings",
248
  column_widths=[2, 1],
249
  wrap=True,
250
  col_count=2,
 
24
  }
25
  """
26
 
27
+ ALL_MODEL_NAMES = LLM.all_model_names()
28
+
29
 
30
  def message_html(game) -> str:
31
  """
 
183
  """
184
  Create the left and right sections of the UI
185
  """
 
186
  with gr.Row():
187
  gr.HTML(f'<div style="text-align: center;font-size:18px">{name} Player</div>')
188
  with gr.Row():
189
+ dropdown = gr.Dropdown(ALL_MODEL_NAMES, value=default, label="LLM", interactive=True)
190
  with gr.Row():
191
  gr.HTML('<div style="text-align: center;font-size:16px">Inner thoughts</div>')
192
  with gr.Row():
 
214
  )
215
  with gr.Row():
216
  with gr.Column(scale=1):
217
+ red_thoughts, red_dropdown = player_section("Red", ALL_MODEL_NAMES[0])
 
 
218
  with gr.Column(scale=2):
219
  with gr.Row():
220
  message = gr.HTML(
 
236
 
237
  with gr.Column(scale=1):
238
  yellow_thoughts, yellow_dropdown = player_section(
239
+ "Yellow", ALL_MODEL_NAMES[1]
240
  )
241
  with gr.TabItem("Leaderboard") as leaderboard_tab:
242
  with gr.Row():
243
  with gr.Column(scale=1):
244
  ratings_df = gr.Dataframe(
245
  headers=["Player", "ELO"],
246
+ label="Ratings (recent models only)",
247
  column_widths=[2, 1],
248
  wrap=True,
249
  col_count=2,
arena/game.py CHANGED
@@ -3,6 +3,7 @@ from arena.player import Player
3
  from arena.record import get_games, Result, record_game, ratings
4
  from datetime import datetime
5
  from typing import List
 
6
 
7
 
8
  class Game:
@@ -54,9 +55,13 @@ class Game:
54
  @staticmethod
55
  def get_ratings():
56
  """
57
- Return the ELO ratings of all players
58
  """
59
- return ratings()
 
 
 
 
60
 
61
  def record(self):
62
  """
 
3
  from arena.record import get_games, Result, record_game, ratings
4
  from datetime import datetime
5
  from typing import List
6
+ from arena.llm import LLM
7
 
8
 
9
  class Game:
 
55
  @staticmethod
56
  def get_ratings():
57
  """
58
+ Return the ELO ratings of all players - filter out any models that are not supported
59
  """
60
+ return {
61
+ model: rating
62
+ for model, rating in ratings().items()
63
+ if model in LLM.all_supported_model_names()
64
+ }
65
 
66
  def record(self):
67
  """
arena/llm.py CHANGED
@@ -6,6 +6,9 @@ import logging
6
  from typing import Dict, Type, Self, List
7
  import os
8
  import time
 
 
 
9
 
10
  logger = logging.getLogger(__name__)
11
 
@@ -110,17 +113,25 @@ class LLM(ABC):
110
  mapping[model_name] = llm
111
  return mapping
112
 
 
 
 
 
 
 
 
113
  @classmethod
114
  def all_model_names(cls) -> List[str]:
115
  """
116
  Return a list of all the model names supported.
117
  Use the ones specified in the model_map, but also check if there's an env variable set that restricts the models
118
  """
119
- models = list(cls.model_map().keys())
120
  allowed = os.getenv("MODELS")
 
121
  if allowed:
122
  allowed_models = allowed.split(",")
123
- return [model for model in models if model in allowed_models]
124
  else:
125
  return models
126
 
@@ -144,10 +155,9 @@ class Claude(LLM):
144
  """
145
 
146
  model_names = [
147
- "claude-3-5-sonnet-latest",
148
- "claude-3-7-sonnet-latest",
149
  "claude-opus-4-1-20250805",
150
- "claude-sonnet-4-5-20250929",
 
151
  ]
152
 
153
  def __init__(self, model_name: str, temperature: float):
@@ -182,7 +192,7 @@ class GPT(LLM):
182
  A class to act as an interface to the remote AI, in this case GPT
183
  """
184
 
185
- model_names = ["gpt-4o-mini", "gpt-4o", "gpt-5", "gpt-5-mini", "gpt-5-nano", "gpt-4.1-mini"]
186
 
187
  def __init__(self, model_name: str, temperature: float):
188
  """
@@ -199,7 +209,7 @@ class O1(LLM):
199
  A class to act as an interface to the remote AI, in this case O1
200
  """
201
 
202
- model_names = ["o1-mini"]
203
 
204
  def __init__(self, model_name: str, temperature: float):
205
  """
@@ -231,7 +241,7 @@ class O3(LLM):
231
  A class to act as an interface to the remote AI, in this case O3
232
  """
233
 
234
- model_names = ["o3-mini"]
235
 
236
  def __init__(self, model_name: str, temperature: float):
237
  """
@@ -269,8 +279,6 @@ class Gemini(LLM):
269
  """
270
 
271
  model_names = [
272
- "gemini-2.0-flash",
273
- "gemini-1.5-flash",
274
  "gemini-2.5-flash",
275
  "gemini-2.5-flash-lite",
276
  "gemini-2.5-pro",
@@ -347,7 +355,7 @@ class DeepSeekLocal(LLM):
347
  A class to act as an interface to the remote AI, in this case Ollama via the OpenAI client
348
  """
349
 
350
- model_names = ["deepseek-r1:14b local"]
351
 
352
  def __init__(self, model_name: str, temperature: float):
353
  """
@@ -386,9 +394,6 @@ class GroqAPI(LLM):
386
  """
387
 
388
  model_names = [
389
- "deepseek-r1-distill-llama-70b via Groq",
390
- "llama-3.3-70b-versatile via Groq",
391
- "mixtral-8x7b-32768 via Groq",
392
  "openai/gpt-oss-120b via Groq",
393
  ]
394
 
 
6
  from typing import Dict, Type, Self, List
7
  import os
8
  import time
9
+ from dotenv import load_dotenv
10
+
11
+ load_dotenv(override=True)
12
 
13
  logger = logging.getLogger(__name__)
14
 
 
113
  mapping[model_name] = llm
114
  return mapping
115
 
116
+ @classmethod
117
+ def all_supported_model_names(cls) -> List[str]:
118
+ """
119
+ Return a list of all the model names supported by all subclasses of this one.
120
+ """
121
+ return list(cls.model_map().keys())
122
+
123
  @classmethod
124
  def all_model_names(cls) -> List[str]:
125
  """
126
  Return a list of all the model names supported.
127
  Use the ones specified in the model_map, but also check if there's an env variable set that restricts the models
128
  """
129
+ models = cls.all_supported_model_names()
130
  allowed = os.getenv("MODELS")
131
+ print(f"Allowed models: {allowed}")
132
  if allowed:
133
  allowed_models = allowed.split(",")
134
+ return [model for model in allowed_models if model in models]
135
  else:
136
  return models
137
 
 
155
  """
156
 
157
  model_names = [
 
 
158
  "claude-opus-4-1-20250805",
159
+ "claude-sonnet-4-5",
160
+ "claude-haiku-4-5",
161
  ]
162
 
163
  def __init__(self, model_name: str, temperature: float):
 
192
  A class to act as an interface to the remote AI, in this case GPT
193
  """
194
 
195
+ model_names = ["gpt-5", "gpt-5-mini", "gpt-5-nano"]
196
 
197
  def __init__(self, model_name: str, temperature: float):
198
  """
 
209
  A class to act as an interface to the remote AI, in this case O1
210
  """
211
 
212
+ model_names = []
213
 
214
  def __init__(self, model_name: str, temperature: float):
215
  """
 
241
  A class to act as an interface to the remote AI, in this case O3
242
  """
243
 
244
+ model_names = []
245
 
246
  def __init__(self, model_name: str, temperature: float):
247
  """
 
279
  """
280
 
281
  model_names = [
 
 
282
  "gemini-2.5-flash",
283
  "gemini-2.5-flash-lite",
284
  "gemini-2.5-pro",
 
355
  A class to act as an interface to the remote AI, in this case Ollama via the OpenAI client
356
  """
357
 
358
+ model_names = []
359
 
360
  def __init__(self, model_name: str, temperature: float):
361
  """
 
394
  """
395
 
396
  model_names = [
 
 
 
397
  "openai/gpt-oss-120b via Groq",
398
  ]
399