Spaces:
Sleeping
Sleeping
feat(chart): use inference proxy (#1688)
Browse files* feat(chart): use inference proxy
* fix: also use `HF_API_ROOT` for embedding endpoints
chart/env/prod.yaml
CHANGED
|
@@ -159,7 +159,7 @@ envVars:
|
|
| 159 |
"endpoints": [
|
| 160 |
{
|
| 161 |
"type": "openai",
|
| 162 |
-
"baseURL": "https://api-inference.huggingface.
|
| 163 |
}
|
| 164 |
]
|
| 165 |
},
|
|
@@ -193,7 +193,7 @@ envVars:
|
|
| 193 |
"endpoints": [
|
| 194 |
{
|
| 195 |
"type": "openai",
|
| 196 |
-
"baseURL": "https://api-inference.huggingface.
|
| 197 |
}
|
| 198 |
]
|
| 199 |
},
|
|
@@ -261,7 +261,7 @@ envVars:
|
|
| 261 |
"endpoints": [
|
| 262 |
{
|
| 263 |
"type": "openai",
|
| 264 |
-
"baseURL": "https://api-inference.huggingface.
|
| 265 |
}
|
| 266 |
]
|
| 267 |
},
|
|
@@ -280,7 +280,7 @@ envVars:
|
|
| 280 |
"endpoints": [
|
| 281 |
{
|
| 282 |
"type": "openai",
|
| 283 |
-
"baseURL": "https://api-inference.huggingface.
|
| 284 |
"multimodal": {
|
| 285 |
"image": {
|
| 286 |
"maxSizeInMB": 10,
|
|
@@ -597,7 +597,7 @@ envVars:
|
|
| 597 |
]
|
| 598 |
HF_ORG_ADMIN: '644171cfbd0c97265298aa99'
|
| 599 |
HF_ORG_EARLY_ACCESS: '5e67bd5b1009063689407478'
|
| 600 |
-
|
| 601 |
infisical:
|
| 602 |
enabled: true
|
| 603 |
env: "prod-us-east-1"
|
|
|
|
| 159 |
"endpoints": [
|
| 160 |
{
|
| 161 |
"type": "openai",
|
| 162 |
+
"baseURL": "https://proxy.serverless.api-inference.huggingface.tech/models/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/v1"
|
| 163 |
}
|
| 164 |
]
|
| 165 |
},
|
|
|
|
| 193 |
"endpoints": [
|
| 194 |
{
|
| 195 |
"type": "openai",
|
| 196 |
+
"baseURL": "https://proxy.serverless.api-inference.huggingface.tech/models/nvidia/Llama-3.1-Nemotron-70B-Instruct-HF/v1"
|
| 197 |
}
|
| 198 |
]
|
| 199 |
},
|
|
|
|
| 261 |
"endpoints": [
|
| 262 |
{
|
| 263 |
"type": "openai",
|
| 264 |
+
"baseURL": "https://proxy.serverless.api-inference.huggingface.tech/models/Qwen/Qwen2.5-Coder-32B-Instruct/v1"
|
| 265 |
}
|
| 266 |
]
|
| 267 |
},
|
|
|
|
| 280 |
"endpoints": [
|
| 281 |
{
|
| 282 |
"type": "openai",
|
| 283 |
+
"baseURL": "https://proxy.serverless.api-inference.huggingface.tech/models/meta-llama/Llama-3.2-11B-Vision-Instruct/v1",
|
| 284 |
"multimodal": {
|
| 285 |
"image": {
|
| 286 |
"maxSizeInMB": 10,
|
|
|
|
| 597 |
]
|
| 598 |
HF_ORG_ADMIN: '644171cfbd0c97265298aa99'
|
| 599 |
HF_ORG_EARLY_ACCESS: '5e67bd5b1009063689407478'
|
| 600 |
+
HF_API_ROOT: 'https://proxy.serverless.api-inference.huggingface.tech/models'
|
| 601 |
infisical:
|
| 602 |
enabled: true
|
| 603 |
env: "prod-us-east-1"
|
src/lib/server/embeddingEndpoints/hfApi/embeddingHfApi.ts
CHANGED
|
@@ -18,7 +18,7 @@ export async function embeddingEndpointHfApi(
|
|
| 18 |
input: z.input<typeof embeddingEndpointHfApiSchema>
|
| 19 |
): Promise<EmbeddingEndpoint> {
|
| 20 |
const { model, authorization } = embeddingEndpointHfApiSchema.parse(input);
|
| 21 |
-
const url =
|
| 22 |
|
| 23 |
return async ({ inputs }) => {
|
| 24 |
const batchesInputs = chunk(inputs, 128);
|
|
|
|
| 18 |
input: z.input<typeof embeddingEndpointHfApiSchema>
|
| 19 |
): Promise<EmbeddingEndpoint> {
|
| 20 |
const { model, authorization } = embeddingEndpointHfApiSchema.parse(input);
|
| 21 |
+
const url = `${env.HF_API_ROOT}/${model.id}`;
|
| 22 |
|
| 23 |
return async ({ inputs }) => {
|
| 24 |
const batchesInputs = chunk(inputs, 128);
|