Maximofn commited on
Commit
c42bd73
·
1 Parent(s): 6cfec6e

Añade soporte para múltiples motores de inferencia en `app.py`, permitiendo la selección entre Gemini y Qwen3-VL. Se implementa la configuración de claves API y la creación de instancias de cliente según el motor seleccionado. Además, se mejora la gestión de errores al verificar la configuración de las claves API, proporcionando mensajes específicos para cada motor. Esta modificación optimiza la flexibilidad y la claridad del código al manejar diferentes proveedores de inferencia.

Browse files
Files changed (1) hide show
  1. app.py +59 -20
app.py CHANGED
@@ -12,11 +12,24 @@ from langsmith.run_trees import RunTree
12
 
13
  load_dotenv()
14
 
 
 
 
 
15
  # Configure Gemini via OpenAI-compatible endpoint
16
  GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai/"
17
  GEMINI_MODEL = "gemini-2.5-flash"
18
- _api_key = os.getenv("GEMINI_API_KEY")
19
- _client = OpenAI(api_key=_api_key, base_url=GEMINI_BASE_URL) if _api_key else None
 
 
 
 
 
 
 
 
 
20
 
21
  # Optional LangSmith client for guaranteed flush
22
  _ls_api_key_env = os.getenv("LANGSMITH_API_KEY")
@@ -201,10 +214,18 @@ def respond(message, history: list[tuple[str, str]]):
201
  user_text, files = _extract_text_and_files(message)
202
 
203
  if not _client:
204
- yield (
205
- "Gemini API key not configured. Set environment variable GEMINI_API_KEY "
206
- "and restart the app."
207
- )
 
 
 
 
 
 
 
 
208
  return
209
 
210
  # Build OpenAI-style messages from history
@@ -273,24 +294,42 @@ def respond(message, history: list[tuple[str, str]]):
273
  try:
274
  if pipeline:
275
  try:
276
- child_llm = pipeline.create_child(
277
- name="LLMCall",
278
- run_type="llm",
279
- inputs={
280
- "model": GEMINI_MODEL,
281
- "provider": "gemini-openai",
282
- "messages_preview": _preview_text(str(messages[-1]), 600),
283
- },
284
- )
 
 
 
 
 
 
 
 
 
 
 
285
  child_llm.post()
286
  except Exception:
287
  child_llm = None
288
 
289
- stream = _client.chat.completions.create(
290
- model=GEMINI_MODEL,
291
- messages=messages,
292
- stream=True,
293
- )
 
 
 
 
 
 
 
294
 
295
  accumulated = ""
296
  for chunk in stream:
 
12
 
13
  load_dotenv()
14
 
15
+ INFERENCE_GEMINI = "Gemini"
16
+ INFERENCE_QWEN3_VL = "Qwen3-VL"
17
+ INFERENCE = INFERENCE_GEMINI
18
+
19
  # Configure Gemini via OpenAI-compatible endpoint
20
  GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai/"
21
  GEMINI_MODEL = "gemini-2.5-flash"
22
+
23
+ # Configure Qwen3-VL via OpenAI-compatible endpoint
24
+ QWEN3_VL_BASE_URL = "https://router.huggingface.co/v1"
25
+ QWEN3_VL_MODEL = "Qwen/Qwen3-VL-235B-A22B-Thinking:novita"
26
+
27
+ if INFERENCE == INFERENCE_GEMINI:
28
+ _api_key = os.getenv("GEMINI_API_KEY")
29
+ _client = OpenAI(api_key=_api_key, base_url=GEMINI_BASE_URL) if _api_key else None
30
+ elif INFERENCE == INFERENCE_QWEN3_VL:
31
+ _api_key = os.getenv("HUGGINGFACE_INFERENCE_PROVIDERS_API_KEY")
32
+ _client = OpenAI(api_key=_api_key, base_url=QWEN3_VL_BASE_URL) if _api_key else None
33
 
34
  # Optional LangSmith client for guaranteed flush
35
  _ls_api_key_env = os.getenv("LANGSMITH_API_KEY")
 
214
  user_text, files = _extract_text_and_files(message)
215
 
216
  if not _client:
217
+ if INFERENCE == INFERENCE_GEMINI:
218
+ yield (
219
+ "Gemini API key not configured. Set environment variable GEMINI_API_KEY "
220
+ "and restart the app."
221
+ )
222
+ elif INFERENCE == INFERENCE_QWEN3_VL:
223
+ yield (
224
+ "Qwen3-VL API key not configured. Set environment variable QWEN3_VL_API_KEY "
225
+ "and restart the app."
226
+ )
227
+ else:
228
+ yield "Inference engine not configured. Set environment variable INFERENCE to 'Gemini' or 'Qwen3-VL' and restart the app."
229
  return
230
 
231
  # Build OpenAI-style messages from history
 
294
  try:
295
  if pipeline:
296
  try:
297
+ if INFERENCE == INFERENCE_GEMINI:
298
+ child_llm = pipeline.create_child(
299
+ name="LLMCall",
300
+ run_type="llm",
301
+ inputs={
302
+ "model": GEMINI_MODEL,
303
+ "provider": "gemini-openai",
304
+ "messages_preview": _preview_text(str(messages[-1]), 600),
305
+ },
306
+ )
307
+ elif INFERENCE == INFERENCE_QWEN3_VL:
308
+ child_llm = pipeline.create_child(
309
+ name="LLMCall",
310
+ run_type="llm",
311
+ inputs={
312
+ "model": QWEN3_VL_MODEL,
313
+ "provider": "qwen3-vl-openai",
314
+ "messages_preview": _preview_text(str(messages[-1]), 600),
315
+ },
316
+ )
317
  child_llm.post()
318
  except Exception:
319
  child_llm = None
320
 
321
+ if INFERENCE == INFERENCE_GEMINI:
322
+ stream = _client.chat.completions.create(
323
+ model=GEMINI_MODEL,
324
+ messages=messages,
325
+ stream=True,
326
+ )
327
+ elif INFERENCE == INFERENCE_QWEN3_VL:
328
+ stream = _client.chat.completions.create(
329
+ model=QWEN3_VL_MODEL,
330
+ messages=messages,
331
+ stream=True,
332
+ )
333
 
334
  accumulated = ""
335
  for chunk in stream: