Spaces:
Sleeping
Sleeping
Añade soporte para múltiples motores de inferencia en `app.py`, permitiendo la selección entre Gemini y Qwen3-VL. Se implementa la configuración de claves API y la creación de instancias de cliente según el motor seleccionado. Además, se mejora la gestión de errores al verificar la configuración de las claves API, proporcionando mensajes específicos para cada motor. Esta modificación optimiza la flexibilidad y la claridad del código al manejar diferentes proveedores de inferencia.
Browse files
app.py
CHANGED
|
@@ -12,11 +12,24 @@ from langsmith.run_trees import RunTree
|
|
| 12 |
|
| 13 |
load_dotenv()
|
| 14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
# Configure Gemini via OpenAI-compatible endpoint
|
| 16 |
GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai/"
|
| 17 |
GEMINI_MODEL = "gemini-2.5-flash"
|
| 18 |
-
|
| 19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
|
| 21 |
# Optional LangSmith client for guaranteed flush
|
| 22 |
_ls_api_key_env = os.getenv("LANGSMITH_API_KEY")
|
|
@@ -201,10 +214,18 @@ def respond(message, history: list[tuple[str, str]]):
|
|
| 201 |
user_text, files = _extract_text_and_files(message)
|
| 202 |
|
| 203 |
if not _client:
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 208 |
return
|
| 209 |
|
| 210 |
# Build OpenAI-style messages from history
|
|
@@ -273,24 +294,42 @@ def respond(message, history: list[tuple[str, str]]):
|
|
| 273 |
try:
|
| 274 |
if pipeline:
|
| 275 |
try:
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 285 |
child_llm.post()
|
| 286 |
except Exception:
|
| 287 |
child_llm = None
|
| 288 |
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 294 |
|
| 295 |
accumulated = ""
|
| 296 |
for chunk in stream:
|
|
|
|
| 12 |
|
| 13 |
load_dotenv()
|
| 14 |
|
| 15 |
+
INFERENCE_GEMINI = "Gemini"
|
| 16 |
+
INFERENCE_QWEN3_VL = "Qwen3-VL"
|
| 17 |
+
INFERENCE = INFERENCE_GEMINI
|
| 18 |
+
|
| 19 |
# Configure Gemini via OpenAI-compatible endpoint
|
| 20 |
GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai/"
|
| 21 |
GEMINI_MODEL = "gemini-2.5-flash"
|
| 22 |
+
|
| 23 |
+
# Configure Qwen3-VL via OpenAI-compatible endpoint
|
| 24 |
+
QWEN3_VL_BASE_URL = "https://router.huggingface.co/v1"
|
| 25 |
+
QWEN3_VL_MODEL = "Qwen/Qwen3-VL-235B-A22B-Thinking:novita"
|
| 26 |
+
|
| 27 |
+
if INFERENCE == INFERENCE_GEMINI:
|
| 28 |
+
_api_key = os.getenv("GEMINI_API_KEY")
|
| 29 |
+
_client = OpenAI(api_key=_api_key, base_url=GEMINI_BASE_URL) if _api_key else None
|
| 30 |
+
elif INFERENCE == INFERENCE_QWEN3_VL:
|
| 31 |
+
_api_key = os.getenv("HUGGINGFACE_INFERENCE_PROVIDERS_API_KEY")
|
| 32 |
+
_client = OpenAI(api_key=_api_key, base_url=QWEN3_VL_BASE_URL) if _api_key else None
|
| 33 |
|
| 34 |
# Optional LangSmith client for guaranteed flush
|
| 35 |
_ls_api_key_env = os.getenv("LANGSMITH_API_KEY")
|
|
|
|
| 214 |
user_text, files = _extract_text_and_files(message)
|
| 215 |
|
| 216 |
if not _client:
|
| 217 |
+
if INFERENCE == INFERENCE_GEMINI:
|
| 218 |
+
yield (
|
| 219 |
+
"Gemini API key not configured. Set environment variable GEMINI_API_KEY "
|
| 220 |
+
"and restart the app."
|
| 221 |
+
)
|
| 222 |
+
elif INFERENCE == INFERENCE_QWEN3_VL:
|
| 223 |
+
yield (
|
| 224 |
+
"Qwen3-VL API key not configured. Set environment variable QWEN3_VL_API_KEY "
|
| 225 |
+
"and restart the app."
|
| 226 |
+
)
|
| 227 |
+
else:
|
| 228 |
+
yield "Inference engine not configured. Set environment variable INFERENCE to 'Gemini' or 'Qwen3-VL' and restart the app."
|
| 229 |
return
|
| 230 |
|
| 231 |
# Build OpenAI-style messages from history
|
|
|
|
| 294 |
try:
|
| 295 |
if pipeline:
|
| 296 |
try:
|
| 297 |
+
if INFERENCE == INFERENCE_GEMINI:
|
| 298 |
+
child_llm = pipeline.create_child(
|
| 299 |
+
name="LLMCall",
|
| 300 |
+
run_type="llm",
|
| 301 |
+
inputs={
|
| 302 |
+
"model": GEMINI_MODEL,
|
| 303 |
+
"provider": "gemini-openai",
|
| 304 |
+
"messages_preview": _preview_text(str(messages[-1]), 600),
|
| 305 |
+
},
|
| 306 |
+
)
|
| 307 |
+
elif INFERENCE == INFERENCE_QWEN3_VL:
|
| 308 |
+
child_llm = pipeline.create_child(
|
| 309 |
+
name="LLMCall",
|
| 310 |
+
run_type="llm",
|
| 311 |
+
inputs={
|
| 312 |
+
"model": QWEN3_VL_MODEL,
|
| 313 |
+
"provider": "qwen3-vl-openai",
|
| 314 |
+
"messages_preview": _preview_text(str(messages[-1]), 600),
|
| 315 |
+
},
|
| 316 |
+
)
|
| 317 |
child_llm.post()
|
| 318 |
except Exception:
|
| 319 |
child_llm = None
|
| 320 |
|
| 321 |
+
if INFERENCE == INFERENCE_GEMINI:
|
| 322 |
+
stream = _client.chat.completions.create(
|
| 323 |
+
model=GEMINI_MODEL,
|
| 324 |
+
messages=messages,
|
| 325 |
+
stream=True,
|
| 326 |
+
)
|
| 327 |
+
elif INFERENCE == INFERENCE_QWEN3_VL:
|
| 328 |
+
stream = _client.chat.completions.create(
|
| 329 |
+
model=QWEN3_VL_MODEL,
|
| 330 |
+
messages=messages,
|
| 331 |
+
stream=True,
|
| 332 |
+
)
|
| 333 |
|
| 334 |
accumulated = ""
|
| 335 |
for chunk in stream:
|