Spaces:
Running
Running
add openai/gpt-4o-mini
Browse files
main.py
CHANGED
|
@@ -37,6 +37,7 @@ API_KEY = os.environ.get("CHAT_AUTH_KEY", "default_secret_key")
|
|
| 37 |
api_key_header = APIKeyHeader(name=API_KEY_NAME, auto_error=False)
|
| 38 |
|
| 39 |
ModelID = Literal[
|
|
|
|
| 40 |
"meta-llama/llama-3-70b-instruct",
|
| 41 |
"anthropic/claude-3.5-sonnet",
|
| 42 |
"deepseek/deepseek-coder",
|
|
@@ -68,14 +69,14 @@ class QueryModel(BaseModel):
|
|
| 68 |
class NewsQueryModel(BaseModel):
|
| 69 |
query: str = Field(..., description="News topic to search for")
|
| 70 |
model_id: ModelID = Field(
|
| 71 |
-
default="
|
| 72 |
description="ID of the model to use for response generation"
|
| 73 |
)
|
| 74 |
class Config:
|
| 75 |
schema_extra = {
|
| 76 |
"example": {
|
| 77 |
"query": "Latest developments in AI",
|
| 78 |
-
"model_id": "
|
| 79 |
}
|
| 80 |
}
|
| 81 |
|
|
@@ -103,7 +104,7 @@ def limit_tokens(input_string, token_limit=6000):
|
|
| 103 |
def calculate_tokens(msgs):
|
| 104 |
return sum(len(encoding.encode(str(m))) for m in msgs)
|
| 105 |
|
| 106 |
-
def chat_with_llama_stream(messages, model="gpt-
|
| 107 |
logger.info(f"Starting chat with model: {model}")
|
| 108 |
while calculate_tokens(messages) > (8000 - max_output_tokens):
|
| 109 |
if len(messages) > max_llm_history:
|
|
@@ -206,6 +207,7 @@ async def coding_assistant(query: QueryModel, background_tasks: BackgroundTasks,
|
|
| 206 |
- openai/gpt-3.5-turbo-instruct
|
| 207 |
- qwen/qwen-72b-chat
|
| 208 |
- google/gemma-2-27b-it
|
|
|
|
| 209 |
Requires API Key authentication via X-API-Key header.
|
| 210 |
"""
|
| 211 |
logger.info(f"Received coding assistant query: {query.user_query}")
|
|
@@ -321,7 +323,7 @@ async def news_assistant(query: NewsQueryModel, api_key: str = Depends(verify_ap
|
|
| 321 |
class SearchQueryModel(BaseModel):
|
| 322 |
query: str = Field(..., description="Search query")
|
| 323 |
model_id: ModelID = Field(
|
| 324 |
-
default="
|
| 325 |
description="ID of the model to use for response generation"
|
| 326 |
)
|
| 327 |
class Config:
|
|
|
|
| 37 |
api_key_header = APIKeyHeader(name=API_KEY_NAME, auto_error=False)
|
| 38 |
|
| 39 |
ModelID = Literal[
|
| 40 |
+
"openai/gpt-4o-mini",
|
| 41 |
"meta-llama/llama-3-70b-instruct",
|
| 42 |
"anthropic/claude-3.5-sonnet",
|
| 43 |
"deepseek/deepseek-coder",
|
|
|
|
| 69 |
class NewsQueryModel(BaseModel):
|
| 70 |
query: str = Field(..., description="News topic to search for")
|
| 71 |
model_id: ModelID = Field(
|
| 72 |
+
default="openai/gpt-4o-mini",
|
| 73 |
description="ID of the model to use for response generation"
|
| 74 |
)
|
| 75 |
class Config:
|
| 76 |
schema_extra = {
|
| 77 |
"example": {
|
| 78 |
"query": "Latest developments in AI",
|
| 79 |
+
"model_id": "openai/gpt-4o-mini"
|
| 80 |
}
|
| 81 |
}
|
| 82 |
|
|
|
|
| 104 |
def calculate_tokens(msgs):
|
| 105 |
return sum(len(encoding.encode(str(m))) for m in msgs)
|
| 106 |
|
| 107 |
+
def chat_with_llama_stream(messages, model="openai/gpt-4o-mini", max_llm_history=4, max_output_tokens=2500):
|
| 108 |
logger.info(f"Starting chat with model: {model}")
|
| 109 |
while calculate_tokens(messages) > (8000 - max_output_tokens):
|
| 110 |
if len(messages) > max_llm_history:
|
|
|
|
| 207 |
- openai/gpt-3.5-turbo-instruct
|
| 208 |
- qwen/qwen-72b-chat
|
| 209 |
- google/gemma-2-27b-it
|
| 210 |
+
- openai/gpt-4o-mini
|
| 211 |
Requires API Key authentication via X-API-Key header.
|
| 212 |
"""
|
| 213 |
logger.info(f"Received coding assistant query: {query.user_query}")
|
|
|
|
| 323 |
class SearchQueryModel(BaseModel):
|
| 324 |
query: str = Field(..., description="Search query")
|
| 325 |
model_id: ModelID = Field(
|
| 326 |
+
default="openai/gpt-4o-mini",
|
| 327 |
description="ID of the model to use for response generation"
|
| 328 |
)
|
| 329 |
class Config:
|