Testing completely new code
Browse files- README.md +242 -10
- RESUMEN.md +60 -0
- app.py +319 -236
- apuntes-filosofia-antigua-completos.md +0 -417
- client.py +167 -0
- config.py +83 -0
- examples.py +339 -0
- exported-assets.zip +3 -0
- requirements.txt +5 -4
- script.py +381 -0
- script_1.py +202 -0
- script_2.py +535 -0
- script_3.py +637 -0
- tests.py +217 -0
- utils.py +185 -0
README.md
CHANGED
|
@@ -1,11 +1,243 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
colorFrom: blue
|
| 5 |
-
colorTo: purple
|
| 6 |
-
sdk: gradio
|
| 7 |
-
sdk_version: 5.49.1
|
| 8 |
-
app_file: app.py
|
| 9 |
-
pinned: false
|
| 10 |
-
license: llama3.2
|
| 11 |
-
---
|
|
|
|
| 1 |
+
# 🦙 Llama 3.2 3B Chat - Hugging Face Space
|
| 2 |
+
|
| 3 |
+
Un Space de Hugging Face para chatear con Meta Llama 3.2 3B Instruct con sistema de colas, streaming y API para cliente Python.
|
| 4 |
+
|
| 5 |
+
## ✨ Características
|
| 6 |
+
|
| 7 |
+
- 🔄 **Sistema de colas**: Solo procesa una petición a la vez para evitar sobrecargar el modelo
|
| 8 |
+
- 📡 **Streaming en tiempo real**: Ve la respuesta generándose en tiempo real
|
| 9 |
+
- 🐍 **Cliente Python**: API completa para integración con aplicaciones Python
|
| 10 |
+
- 💬 **Interfaz web**: Chat interactivo con sistema de prompts y configuración
|
| 11 |
+
- 📊 **Monitoreo**: Estado de cola en tiempo real
|
| 12 |
+
- 🔐 **Autenticación**: Soporte para modelos restringidos con HF token
|
| 13 |
+
|
| 14 |
+
## 🚀 Configuración del Space
|
| 15 |
+
|
| 16 |
+
### 1. Crear el Space
|
| 17 |
+
|
| 18 |
+
1. Ve a [Hugging Face Spaces](https://huggingface.co/new-space)
|
| 19 |
+
2. Elige **Gradio** como SDK
|
| 20 |
+
3. Selecciona **T4 small** o superior como hardware
|
| 21 |
+
4. Nombra tu Space (ej: `tu-usuario/llama-chat`)
|
| 22 |
+
|
| 23 |
+
### 2. Configurar el token HF
|
| 24 |
+
|
| 25 |
+
1. Ve a **Settings** de tu Space
|
| 26 |
+
2. En **Repository secrets**, agrega:
|
| 27 |
+
- **Name**: `HF_TOKEN`
|
| 28 |
+
- **Value**: Tu token de Hugging Face (con acceso a Llama)
|
| 29 |
+
|
| 30 |
+
### 3. Subir archivos
|
| 31 |
+
|
| 32 |
+
Sube estos archivos a tu Space:
|
| 33 |
+
- `app.py` (aplicación principal)
|
| 34 |
+
- `requirements.txt` (dependencias)
|
| 35 |
+
|
| 36 |
+
### 4. Verificar el despliegue
|
| 37 |
+
|
| 38 |
+
Una vez que el Space esté corriendo, deberías ver:
|
| 39 |
+
- Una interfaz de chat en la pestaña principal
|
| 40 |
+
- Un endpoint API en la segunda pestaña
|
| 41 |
+
- Estado de cola actualizado automáticamente
|
| 42 |
+
|
| 43 |
+
## 📱 Uso de la interfaz web
|
| 44 |
+
|
| 45 |
+
### Chat Principal
|
| 46 |
+
- **System Prompt**: Define el comportamiento del asistente
|
| 47 |
+
- **Mensaje**: Tu pregunta o mensaje
|
| 48 |
+
- **Max Tokens**: Longitud máxima de la respuesta (50-1024)
|
| 49 |
+
- **Temperature**: Creatividad de la respuesta (0.1-2.0)
|
| 50 |
+
|
| 51 |
+
### Estado de Cola
|
| 52 |
+
- **queue_size**: Número de peticiones en espera
|
| 53 |
+
- **is_processing**: Si está procesando actualmente
|
| 54 |
+
- **timestamp**: Última actualización
|
| 55 |
+
|
| 56 |
+
## 🐍 Cliente Python
|
| 57 |
+
|
| 58 |
+
### Instalación
|
| 59 |
+
|
| 60 |
+
```bash
|
| 61 |
+
pip install requests
|
| 62 |
+
```
|
| 63 |
+
|
| 64 |
+
### Uso Básico
|
| 65 |
+
|
| 66 |
+
```python
|
| 67 |
+
from client import LlamaClient
|
| 68 |
+
|
| 69 |
+
# Inicializar cliente con la URL de tu Space
|
| 70 |
+
client = LlamaClient("https://tu-usuario-llama-chat.hf.space")
|
| 71 |
+
|
| 72 |
+
# Chat simple
|
| 73 |
+
response = client.chat(
|
| 74 |
+
message="¿Qué es la inteligencia artificial?",
|
| 75 |
+
system_prompt="Eres un profesor experto."
|
| 76 |
+
)
|
| 77 |
+
|
| 78 |
+
print(response["response"])
|
| 79 |
+
```
|
| 80 |
+
|
| 81 |
+
### Chat con Streaming
|
| 82 |
+
|
| 83 |
+
```python
|
| 84 |
+
# Ver respuesta generándose en tiempo real
|
| 85 |
+
for chunk in client.chat_stream(
|
| 86 |
+
message="Explica la física cuántica",
|
| 87 |
+
system_prompt="Eres un divulgador científico.",
|
| 88 |
+
max_tokens=300
|
| 89 |
+
):
|
| 90 |
+
print(f"\r{chunk['response']}", end="", flush=True)
|
| 91 |
+
|
| 92 |
+
if chunk.get("is_complete", False):
|
| 93 |
+
print("\n[Completo]")
|
| 94 |
+
break
|
| 95 |
+
```
|
| 96 |
+
|
| 97 |
+
### Chat con Historial
|
| 98 |
+
|
| 99 |
+
```python
|
| 100 |
+
# Mantener conversación
|
| 101 |
+
history = [
|
| 102 |
+
["Hola", "¡Hola! ¿En qué puedo ayudarte?"],
|
| 103 |
+
["Explica el machine learning", "El machine learning es..."]
|
| 104 |
+
]
|
| 105 |
+
|
| 106 |
+
response = client.chat(
|
| 107 |
+
message="¿Puedes dar un ejemplo práctico?",
|
| 108 |
+
history=history
|
| 109 |
+
)
|
| 110 |
+
```
|
| 111 |
+
|
| 112 |
+
## 🔧 API Endpoints
|
| 113 |
+
|
| 114 |
+
### POST /call/api_chat
|
| 115 |
+
Respuesta completa sin streaming.
|
| 116 |
+
|
| 117 |
+
**Payload:**
|
| 118 |
+
```json
|
| 119 |
+
{
|
| 120 |
+
"data": [
|
| 121 |
+
"system_prompt",
|
| 122 |
+
"message",
|
| 123 |
+
[["user", "assistant"], ...],
|
| 124 |
+
512,
|
| 125 |
+
0.7
|
| 126 |
+
]
|
| 127 |
+
}
|
| 128 |
+
```
|
| 129 |
+
|
| 130 |
+
**Respuesta:**
|
| 131 |
+
```json
|
| 132 |
+
{
|
| 133 |
+
"data": [{
|
| 134 |
+
"response": "Respuesta del modelo",
|
| 135 |
+
"queue_status": {
|
| 136 |
+
"queue_size": 0,
|
| 137 |
+
"is_processing": false,
|
| 138 |
+
"timestamp": "2025-10-16T17:30:00"
|
| 139 |
+
}
|
| 140 |
+
}]
|
| 141 |
+
}
|
| 142 |
+
```
|
| 143 |
+
|
| 144 |
+
### POST /call/api_chat_stream
|
| 145 |
+
Respuesta con streaming.
|
| 146 |
+
|
| 147 |
+
Misma estructura de payload, pero responde con eventos SSE.
|
| 148 |
+
|
| 149 |
+
## 📊 Monitoreo y Debugging
|
| 150 |
+
|
| 151 |
+
### Logs del Space
|
| 152 |
+
Revisa los logs en la interfaz de HF Spaces para debugging.
|
| 153 |
+
|
| 154 |
+
### Estado de Cola
|
| 155 |
+
Usa `client.get_queue_status()` para monitorear la cola:
|
| 156 |
+
|
| 157 |
+
```python
|
| 158 |
+
status = client.get_queue_status()
|
| 159 |
+
print(f"Cola: {status['queue_size']} peticiones")
|
| 160 |
+
print(f"Procesando: {status['is_processing']}")
|
| 161 |
+
```
|
| 162 |
+
|
| 163 |
+
### Manejo de Errores
|
| 164 |
+
|
| 165 |
+
```python
|
| 166 |
+
response = client.chat("Hola")
|
| 167 |
+
|
| 168 |
+
if "error" in response:
|
| 169 |
+
print(f"Error: {response['error']}")
|
| 170 |
+
else:
|
| 171 |
+
print(f"Respuesta: {response['response']}")
|
| 172 |
+
```
|
| 173 |
+
|
| 174 |
+
## ⚙️ Configuración Avanzada
|
| 175 |
+
|
| 176 |
+
### Parámetros del Modelo
|
| 177 |
+
|
| 178 |
+
- **max_tokens**: 50-1024 (recomendado: 512)
|
| 179 |
+
- **temperature**: 0.1-2.0 (recomendado: 0.7)
|
| 180 |
+
- **repetition_penalty**: Automático (1.1)
|
| 181 |
+
|
| 182 |
+
### Optimización de Performance
|
| 183 |
+
|
| 184 |
+
1. **Hardware**: Usa GPU T4 small mínimo
|
| 185 |
+
2. **Batch size**: Sistema de colas evita problemas de memoria
|
| 186 |
+
3. **Context length**: Máximo 2048 tokens de entrada
|
| 187 |
+
|
| 188 |
+
### System Prompts Útiles
|
| 189 |
+
|
| 190 |
+
```python
|
| 191 |
+
# Para tareas académicas
|
| 192 |
+
system_prompt = "Eres un tutor experto que explica conceptos complejos de forma clara y pedagógica."
|
| 193 |
+
|
| 194 |
+
# Para programación
|
| 195 |
+
system_prompt = "Eres un desarrollador senior que ayuda con código Python, explicando paso a paso."
|
| 196 |
+
|
| 197 |
+
# Para creatividad
|
| 198 |
+
system_prompt = "Eres un escritor creativo que ayuda a generar ideas originales y contenido engagente."
|
| 199 |
+
```
|
| 200 |
+
|
| 201 |
+
## 🐛 Troubleshooting
|
| 202 |
+
|
| 203 |
+
### Error: HF_TOKEN no encontrado
|
| 204 |
+
- Verifica que agregaste el token en Repository secrets
|
| 205 |
+
- Asegúrate que el nombre sea exactamente `HF_TOKEN`
|
| 206 |
+
|
| 207 |
+
### Error: Modelo no disponible
|
| 208 |
+
- Tu token debe tener acceso a Llama 3.2 3B
|
| 209 |
+
- Solicita acceso en la página del modelo si es necesario
|
| 210 |
+
|
| 211 |
+
### Timeouts en cliente Python
|
| 212 |
+
- Aumenta el timeout: `requests.post(..., timeout=600)`
|
| 213 |
+
- El modelo puede tardar en cargar la primera vez
|
| 214 |
+
|
| 215 |
+
### Cola muy larga
|
| 216 |
+
- El sistema procesa una petición a la vez
|
| 217 |
+
- Considera usar hardware más potente
|
| 218 |
+
|
| 219 |
+
## 🤝 Contribuciones
|
| 220 |
+
|
| 221 |
+
¿Mejoras sugeridas?
|
| 222 |
+
1. Fork el código
|
| 223 |
+
2. Implementa mejoras
|
| 224 |
+
3. Prueba con tu propio Space
|
| 225 |
+
4. Comparte tu versión
|
| 226 |
+
|
| 227 |
+
## 📝 Licencia
|
| 228 |
+
|
| 229 |
+
Este código es de uso libre. Respeta los términos de uso de:
|
| 230 |
+
- Hugging Face Spaces
|
| 231 |
+
- Meta Llama 3.2 License
|
| 232 |
+
- Gradio License
|
| 233 |
+
|
| 234 |
+
## 🔗 Enlaces Útiles
|
| 235 |
+
|
| 236 |
+
- [Hugging Face Spaces](https://huggingface.co/spaces)
|
| 237 |
+
- [Meta Llama 3.2 3B Instruct](https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct)
|
| 238 |
+
- [Gradio Documentation](https://gradio.app/docs/)
|
| 239 |
+
- [Transformers Library](https://huggingface.co/docs/transformers)
|
| 240 |
+
|
| 241 |
---
|
| 242 |
+
|
| 243 |
+
**¡Disfruta chateando con Llama! 🦙**
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
RESUMEN.md
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
🦙 RESUMEN DEL PROYECTO - Llama 3.2 3B Chat Space
|
| 3 |
+
================================================================
|
| 4 |
+
|
| 5 |
+
✅ ARCHIVOS GENERADOS:
|
| 6 |
+
|
| 7 |
+
📱 CORE APPLICATION:
|
| 8 |
+
- app.py → Aplicación principal de Gradio con cola y streaming
|
| 9 |
+
- requirements.txt → Dependencias del proyecto
|
| 10 |
+
- config.py → Configuración centralizada
|
| 11 |
+
- utils.py → Utilidades y monitoreo de rendimiento
|
| 12 |
+
|
| 13 |
+
🐍 CLIENTE PYTHON:
|
| 14 |
+
- client.py → Cliente Python para API del Space
|
| 15 |
+
- examples.py → Ejemplos avanzados de uso
|
| 16 |
+
- tests.py → Suite de pruebas automáticas
|
| 17 |
+
|
| 18 |
+
📚 DOCUMENTACIÓN:
|
| 19 |
+
- README.md → Instrucciones completas de setup y uso
|
| 20 |
+
|
| 21 |
+
🚀 CARACTERÍSTICAS IMPLEMENTADAS:
|
| 22 |
+
|
| 23 |
+
✓ Sistema de colas (una petición a la vez)
|
| 24 |
+
✓ Streaming en tiempo real
|
| 25 |
+
✓ API completa para cliente Python
|
| 26 |
+
✓ Soporte para system prompt, message e history
|
| 27 |
+
✓ Interfaz web con Gradio
|
| 28 |
+
✓ Monitoreo de estado de cola
|
| 29 |
+
✓ Manejo de errores robusto
|
| 30 |
+
✓ Configuración para hf_token
|
| 31 |
+
✓ Validación de parámetros
|
| 32 |
+
✓ Estimación de tokens
|
| 33 |
+
✓ Historial de conversación
|
| 34 |
+
✓ Múltiples temperaturas
|
| 35 |
+
✓ Límites configurables
|
| 36 |
+
|
| 37 |
+
🔧 PASOS SIGUIENTES:
|
| 38 |
+
|
| 39 |
+
1. Crear Space en Hugging Face:
|
| 40 |
+
- Ve a https://huggingface.co/new-space
|
| 41 |
+
- Selecciona Gradio SDK
|
| 42 |
+
- Elige hardware T4 small o superior
|
| 43 |
+
|
| 44 |
+
2. Configurar HF_TOKEN:
|
| 45 |
+
- Settings → Repository secrets
|
| 46 |
+
- Agregar HF_TOKEN con tu token de acceso
|
| 47 |
+
|
| 48 |
+
3. Subir archivos:
|
| 49 |
+
- app.py y requirements.txt son obligatorios
|
| 50 |
+
- Los demás archivos son opcionales pero recomendados
|
| 51 |
+
|
| 52 |
+
4. Probar funcionalidad:
|
| 53 |
+
- Usar tests.py para verificar el funcionamiento
|
| 54 |
+
- Usar examples.py para casos de uso avanzados
|
| 55 |
+
|
| 56 |
+
📖 DOCUMENTACIÓN COMPLETA:
|
| 57 |
+
Revisar README.md para instrucciones detalladas de configuración,
|
| 58 |
+
uso de la API, troubleshooting y ejemplos de integración.
|
| 59 |
+
|
| 60 |
+
¡Tu Space está listo para ser desplegado! 🚀
|
app.py
CHANGED
|
@@ -1,291 +1,374 @@
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
-
import
|
| 3 |
-
from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
|
| 4 |
-
import os
|
| 5 |
-
import threading
|
| 6 |
import queue
|
|
|
|
| 7 |
import time
|
| 8 |
-
|
|
|
|
|
|
|
|
|
|
| 9 |
import json
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
device
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
global model, tokenizer
|
| 19 |
-
|
| 20 |
-
model_name = "meta-llama/Llama-3.2-3B-Instruct"
|
| 21 |
-
|
| 22 |
-
# Configuración optimizada para el tier gratuito
|
| 23 |
-
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 24 |
-
model = AutoModelForCausalLM.from_pretrained(
|
| 25 |
-
model_name,
|
| 26 |
-
torch_dtype=torch.bfloat16 if device == "cuda" else torch.float32,
|
| 27 |
-
device_map="auto" if device == "cuda" else None,
|
| 28 |
-
low_cpu_mem_usage=True,
|
| 29 |
-
trust_remote_code=True
|
| 30 |
-
)
|
| 31 |
-
|
| 32 |
-
if tokenizer.pad_token is None:
|
| 33 |
-
tokenizer.pad_token = tokenizer.eos_token
|
| 34 |
-
|
| 35 |
-
print(f"Modelo cargado en: {device}")
|
| 36 |
-
|
| 37 |
-
def format_chat_prompt(message: str, history: List[List[str]] = None) -> str:
|
| 38 |
-
"""Formatea el prompt usando el template de chat de Llama 3.2"""
|
| 39 |
-
messages = []
|
| 40 |
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
for user_msg, assistant_msg in history:
|
| 44 |
-
messages.append({"role": "user", "content": user_msg})
|
| 45 |
-
if assistant_msg:
|
| 46 |
-
messages.append({"role": "assistant", "content": assistant_msg})
|
| 47 |
|
| 48 |
-
|
| 49 |
-
messages.append({"role": "user", "content": message})
|
| 50 |
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
tokenize=False,
|
| 55 |
-
add_generation_prompt=True
|
| 56 |
-
)
|
| 57 |
|
| 58 |
-
|
|
|
|
|
|
|
| 59 |
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
top_p: float = 0.9) -> Generator[str, None, None]:
|
| 63 |
-
"""Genera respuesta con streaming usando TextIteratorStreamer"""
|
| 64 |
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
|
|
|
| 68 |
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
formatted_prompt = format_chat_prompt(message, history)
|
| 72 |
|
| 73 |
-
#
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
max_length=2048
|
| 79 |
-
)
|
| 80 |
-
inputs = inputs.to(device)
|
| 81 |
-
|
| 82 |
-
# Configurar streaming
|
| 83 |
-
streamer = TextIteratorStreamer(
|
| 84 |
-
tokenizer,
|
| 85 |
-
timeout=30.0,
|
| 86 |
-
skip_prompt=True,
|
| 87 |
-
skip_special_tokens=True
|
| 88 |
)
|
| 89 |
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
"temperature": temperature,
|
| 96 |
-
"
|
| 97 |
-
"do_sample": True,
|
| 98 |
-
"streamer": streamer,
|
| 99 |
-
"pad_token_id": tokenizer.eos_token_id,
|
| 100 |
-
"eos_token_id": tokenizer.eos_token_id,
|
| 101 |
}
|
| 102 |
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
# Crear interfaz Gradio
|
| 163 |
-
with gr.Blocks(
|
| 164 |
-
title="Llama 3.2-3B Server",
|
| 165 |
-
theme=gr.themes.Soft(),
|
| 166 |
-
css="""
|
| 167 |
-
.gradio-container {
|
| 168 |
-
max-width: 1000px !important;
|
| 169 |
}
|
| 170 |
-
"""
|
| 171 |
-
) as demo:
|
| 172 |
|
| 173 |
-
|
| 174 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 175 |
|
| 176 |
-
|
| 177 |
-
|
|
|
|
|
|
|
| 178 |
|
| 179 |
with gr.Row():
|
| 180 |
with gr.Column(scale=3):
|
| 181 |
-
chatbot = gr.Chatbot(
|
| 182 |
-
height=500,
|
| 183 |
-
show_label=False,
|
| 184 |
-
container=False,
|
| 185 |
-
bubble_full_width=False
|
| 186 |
-
)
|
| 187 |
-
|
| 188 |
msg = gr.Textbox(
|
|
|
|
| 189 |
placeholder="Escribe tu mensaje aquí...",
|
| 190 |
-
|
| 191 |
-
container=False,
|
| 192 |
-
scale=7
|
| 193 |
)
|
| 194 |
|
| 195 |
with gr.Row():
|
| 196 |
-
|
| 197 |
-
clear_btn = gr.Button("Limpiar"
|
| 198 |
|
| 199 |
with gr.Column(scale=1):
|
| 200 |
-
gr.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 201 |
|
| 202 |
max_tokens = gr.Slider(
|
| 203 |
minimum=50,
|
| 204 |
maximum=1024,
|
| 205 |
value=512,
|
| 206 |
step=50,
|
| 207 |
-
label="
|
| 208 |
)
|
| 209 |
|
| 210 |
temperature = gr.Slider(
|
| 211 |
minimum=0.1,
|
| 212 |
-
maximum=
|
| 213 |
value=0.7,
|
| 214 |
step=0.1,
|
| 215 |
-
label="
|
| 216 |
)
|
| 217 |
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
maximum=1.0,
|
| 221 |
-
value=0.9,
|
| 222 |
-
step=0.1,
|
| 223 |
-
label="Top-p"
|
| 224 |
-
)
|
| 225 |
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
**Endpoint:** `/api/generate`
|
| 230 |
-
|
| 231 |
-
**Parámetros:**
|
| 232 |
-
- `message`: Mensaje de entrada
|
| 233 |
-
- `max_tokens`: Máximo tokens (50-1024)
|
| 234 |
-
- `temperature`: Creatividad (0.1-1.0)
|
| 235 |
-
- `top_p`: Diversidad (0.1-1.0)
|
| 236 |
-
- `stream`: true/false para streaming
|
| 237 |
-
|
| 238 |
-
**Ejemplo Flutter:**
|
| 239 |
-
```dart
|
| 240 |
-
final response = await dio.post(
|
| 241 |
-
'https://tu-space.hf.space/api/generate',
|
| 242 |
-
data: {
|
| 243 |
-
'message': 'Hola',
|
| 244 |
-
'stream': true
|
| 245 |
-
}
|
| 246 |
-
);
|
| 247 |
-
```
|
| 248 |
-
""")
|
| 249 |
-
|
| 250 |
-
# Eventos
|
| 251 |
-
msg.submit(
|
| 252 |
-
chat_interface,
|
| 253 |
-
inputs=[msg, chatbot, max_tokens, temperature, top_p],
|
| 254 |
-
outputs=[msg, chatbot],
|
| 255 |
-
queue=True
|
| 256 |
-
)
|
| 257 |
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
queue=True
|
| 263 |
-
)
|
| 264 |
|
| 265 |
-
|
| 266 |
-
lambda: ([], ""),
|
| 267 |
-
outputs=[chatbot, msg],
|
| 268 |
-
queue=False
|
| 269 |
-
)
|
| 270 |
|
| 271 |
-
|
| 272 |
-
|
| 273 |
-
demo.queue(default_concurrency_limit=1, max_size=10, api_open=True)
|
| 274 |
|
|
|
|
|
|
|
| 275 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 276 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 277 |
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 281 |
|
| 282 |
-
|
| 283 |
if __name__ == "__main__":
|
| 284 |
-
|
| 285 |
-
server_name="0.0.0.0",
|
| 286 |
-
server_port=7860,
|
| 287 |
-
share=False,
|
| 288 |
-
show_error=True,
|
| 289 |
-
quiet=False,
|
| 290 |
-
mcp_server=True
|
| 291 |
-
)
|
|
|
|
| 1 |
+
|
| 2 |
import gradio as gr
|
| 3 |
+
import asyncio
|
|
|
|
|
|
|
|
|
|
| 4 |
import queue
|
| 5 |
+
import threading
|
| 6 |
import time
|
| 7 |
+
import os
|
| 8 |
+
from typing import List, Dict, Optional, Generator, Tuple
|
| 9 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
|
| 10 |
+
import torch
|
| 11 |
import json
|
| 12 |
+
from datetime import datetime
|
| 13 |
+
|
| 14 |
+
class LlamaChat:
|
| 15 |
+
def __init__(self):
|
| 16 |
+
self.model_name = "meta-llama/Llama-3.2-3B-Instruct"
|
| 17 |
+
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 18 |
+
self.tokenizer = None
|
| 19 |
+
self.model = None
|
| 20 |
+
self.request_queue = queue.Queue()
|
| 21 |
+
self.is_processing = False
|
| 22 |
+
self.current_streamer = None
|
| 23 |
+
|
| 24 |
+
# Inicializar modelo
|
| 25 |
+
self._load_model()
|
| 26 |
+
|
| 27 |
+
# Iniciar worker thread para procesar colas
|
| 28 |
+
self.worker_thread = threading.Thread(target=self._queue_worker, daemon=True)
|
| 29 |
+
self.worker_thread.start()
|
| 30 |
+
|
| 31 |
+
def _load_model(self):
|
| 32 |
+
"""Cargar el modelo y tokenizer con el token de HF"""
|
| 33 |
+
try:
|
| 34 |
+
hf_token = os.environ.get("HF_TOKEN")
|
| 35 |
+
if not hf_token:
|
| 36 |
+
raise ValueError("HF_TOKEN no encontrado en variables de entorno")
|
| 37 |
+
|
| 38 |
+
print(f"Cargando modelo {self.model_name}...")
|
| 39 |
+
self.tokenizer = AutoTokenizer.from_pretrained(
|
| 40 |
+
self.model_name,
|
| 41 |
+
token=hf_token,
|
| 42 |
+
trust_remote_code=True
|
| 43 |
+
)
|
| 44 |
|
| 45 |
+
self.model = AutoModelForCausalLM.from_pretrained(
|
| 46 |
+
self.model_name,
|
| 47 |
+
token=hf_token,
|
| 48 |
+
torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
|
| 49 |
+
device_map="auto" if self.device == "cuda" else None,
|
| 50 |
+
trust_remote_code=True
|
| 51 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
|
| 53 |
+
if self.tokenizer.pad_token is None:
|
| 54 |
+
self.tokenizer.pad_token = self.tokenizer.eos_token
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
|
| 56 |
+
print("Modelo cargado exitosamente!")
|
|
|
|
| 57 |
|
| 58 |
+
except Exception as e:
|
| 59 |
+
print(f"Error cargando modelo: {e}")
|
| 60 |
+
raise
|
|
|
|
|
|
|
|
|
|
| 61 |
|
| 62 |
+
def _format_messages(self, system_prompt: str, message: str, history: List[List[str]]) -> str:
|
| 63 |
+
"""Formatear mensajes para Llama-3.2-Instruct"""
|
| 64 |
+
messages = []
|
| 65 |
|
| 66 |
+
if system_prompt:
|
| 67 |
+
messages.append({"role": "system", "content": system_prompt})
|
|
|
|
|
|
|
| 68 |
|
| 69 |
+
# Agregar historial
|
| 70 |
+
for user_msg, assistant_msg in history:
|
| 71 |
+
messages.append({"role": "user", "content": user_msg})
|
| 72 |
+
messages.append({"role": "assistant", "content": assistant_msg})
|
| 73 |
|
| 74 |
+
# Agregar mensaje actual
|
| 75 |
+
messages.append({"role": "user", "content": message})
|
|
|
|
| 76 |
|
| 77 |
+
# Usar el chat template del tokenizer
|
| 78 |
+
formatted_prompt = self.tokenizer.apply_chat_template(
|
| 79 |
+
messages,
|
| 80 |
+
tokenize=False,
|
| 81 |
+
add_generation_prompt=True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
)
|
| 83 |
|
| 84 |
+
return formatted_prompt
|
| 85 |
+
|
| 86 |
+
def _queue_worker(self):
|
| 87 |
+
"""Worker thread para procesar cola de requests"""
|
| 88 |
+
while True:
|
| 89 |
+
try:
|
| 90 |
+
if not self.request_queue.empty():
|
| 91 |
+
request = self.request_queue.get()
|
| 92 |
+
self.is_processing = True
|
| 93 |
+
self._process_request(request)
|
| 94 |
+
self.is_processing = False
|
| 95 |
+
self.request_queue.task_done()
|
| 96 |
+
else:
|
| 97 |
+
time.sleep(0.1)
|
| 98 |
+
except Exception as e:
|
| 99 |
+
print(f"Error en queue worker: {e}")
|
| 100 |
+
self.is_processing = False
|
| 101 |
+
|
| 102 |
+
def _process_request(self, request: Dict):
|
| 103 |
+
"""Procesar una request individual"""
|
| 104 |
+
try:
|
| 105 |
+
system_prompt = request["system_prompt"]
|
| 106 |
+
message = request["message"]
|
| 107 |
+
history = request["history"]
|
| 108 |
+
max_tokens = request.get("max_tokens", 512)
|
| 109 |
+
temperature = request.get("temperature", 0.7)
|
| 110 |
+
response_callback = request["callback"]
|
| 111 |
+
|
| 112 |
+
# Formatear prompt
|
| 113 |
+
formatted_prompt = self._format_messages(system_prompt, message, history)
|
| 114 |
+
|
| 115 |
+
# Tokenizar
|
| 116 |
+
inputs = self.tokenizer(
|
| 117 |
+
formatted_prompt,
|
| 118 |
+
return_tensors="pt",
|
| 119 |
+
truncation=True,
|
| 120 |
+
max_length=2048
|
| 121 |
+
).to(self.device)
|
| 122 |
+
|
| 123 |
+
# Configurar streamer
|
| 124 |
+
streamer = TextIteratorStreamer(
|
| 125 |
+
self.tokenizer,
|
| 126 |
+
timeout=60,
|
| 127 |
+
skip_prompt=True,
|
| 128 |
+
skip_special_tokens=True
|
| 129 |
+
)
|
| 130 |
+
self.current_streamer = streamer
|
| 131 |
+
|
| 132 |
+
# Configurar parámetros de generación
|
| 133 |
+
generation_kwargs = {
|
| 134 |
+
**inputs,
|
| 135 |
+
"max_new_tokens": max_tokens,
|
| 136 |
+
"temperature": temperature,
|
| 137 |
+
"do_sample": True,
|
| 138 |
+
"pad_token_id": self.tokenizer.eos_token_id,
|
| 139 |
+
"streamer": streamer,
|
| 140 |
+
"repetition_penalty": 1.1
|
| 141 |
+
}
|
| 142 |
+
|
| 143 |
+
# Generar en thread separado
|
| 144 |
+
def generate():
|
| 145 |
+
with torch.no_grad():
|
| 146 |
+
self.model.generate(**generation_kwargs)
|
| 147 |
+
|
| 148 |
+
generation_thread = threading.Thread(target=generate)
|
| 149 |
+
generation_thread.start()
|
| 150 |
+
|
| 151 |
+
# Stream respuesta
|
| 152 |
+
full_response = ""
|
| 153 |
+
for new_text in streamer:
|
| 154 |
+
if new_text:
|
| 155 |
+
full_response += new_text
|
| 156 |
+
response_callback(full_response, False)
|
| 157 |
+
|
| 158 |
+
response_callback(full_response, True)
|
| 159 |
+
generation_thread.join()
|
| 160 |
+
|
| 161 |
+
except Exception as e:
|
| 162 |
+
print(f"Error procesando request: {e}")
|
| 163 |
+
response_callback(f"Error: {str(e)}", True)
|
| 164 |
+
finally:
|
| 165 |
+
self.current_streamer = None
|
| 166 |
+
|
| 167 |
+
def chat_stream(self, system_prompt: str, message: str, history: List[List[str]],
|
| 168 |
+
max_tokens: int = 512, temperature: float = 0.7) -> Generator[Tuple[str, bool], None, None]:
|
| 169 |
+
"""Método principal para chatear con streaming"""
|
| 170 |
+
if not message.strip():
|
| 171 |
+
yield "Por favor, escribe un mensaje.", True
|
| 172 |
+
return
|
| 173 |
+
|
| 174 |
+
# Crear evento para comunicación con el worker
|
| 175 |
+
response_queue = queue.Queue()
|
| 176 |
+
response_complete = threading.Event()
|
| 177 |
+
current_response = [""]
|
| 178 |
+
|
| 179 |
+
def response_callback(text: str, is_complete: bool):
|
| 180 |
+
current_response[0] = text
|
| 181 |
+
response_queue.put((text, is_complete))
|
| 182 |
+
if is_complete:
|
| 183 |
+
response_complete.set()
|
| 184 |
+
|
| 185 |
+
# Agregar request a la cola
|
| 186 |
+
request = {
|
| 187 |
+
"system_prompt": system_prompt or "",
|
| 188 |
+
"message": message,
|
| 189 |
+
"history": history or [],
|
| 190 |
+
"max_tokens": max_tokens,
|
| 191 |
"temperature": temperature,
|
| 192 |
+
"callback": response_callback
|
|
|
|
|
|
|
|
|
|
|
|
|
| 193 |
}
|
| 194 |
|
| 195 |
+
self.request_queue.put(request)
|
| 196 |
+
|
| 197 |
+
# Esperar y streamear respuesta
|
| 198 |
+
while not response_complete.is_set():
|
| 199 |
+
try:
|
| 200 |
+
text, is_complete = response_queue.get(timeout=0.1)
|
| 201 |
+
yield text, is_complete
|
| 202 |
+
if is_complete:
|
| 203 |
+
break
|
| 204 |
+
except queue.Empty:
|
| 205 |
+
# Si no hay nuevos tokens, yield el último estado
|
| 206 |
+
if current_response[0]:
|
| 207 |
+
yield current_response[0], False
|
| 208 |
+
continue
|
| 209 |
+
|
| 210 |
+
def get_queue_status(self) -> Dict[str, any]:
|
| 211 |
+
"""Obtener estado de la cola"""
|
| 212 |
+
return {
|
| 213 |
+
"queue_size": self.request_queue.qsize(),
|
| 214 |
+
"is_processing": self.is_processing,
|
| 215 |
+
"timestamp": datetime.now().isoformat()
|
| 216 |
+
}
|
| 217 |
+
|
| 218 |
+
# Inicializar el chat
|
| 219 |
+
chat_instance = LlamaChat()
|
| 220 |
+
|
| 221 |
+
# Función para la interfaz de Gradio
|
| 222 |
+
def chat_interface(message: str, history: List[List[str]], system_prompt: str,
|
| 223 |
+
max_tokens: int, temperature: float):
|
| 224 |
+
"""Interfaz de chat para Gradio"""
|
| 225 |
+
for response, is_complete in chat_instance.chat_stream(
|
| 226 |
+
system_prompt, message, history, max_tokens, temperature
|
| 227 |
+
):
|
| 228 |
+
if not is_complete:
|
| 229 |
+
# Para Gradio, necesitamos devolver el historial completo
|
| 230 |
+
new_history = history + [[message, response]]
|
| 231 |
+
yield new_history, ""
|
| 232 |
+
else:
|
| 233 |
+
final_history = history + [[message, response]]
|
| 234 |
+
yield final_history, ""
|
| 235 |
+
|
| 236 |
+
# Función para API Python
|
| 237 |
+
def api_chat(system_prompt: str = "", message: str = "", history: List[List[str]] = None,
|
| 238 |
+
max_tokens: int = 512, temperature: float = 0.7) -> Dict:
|
| 239 |
+
"""API para cliente Python"""
|
| 240 |
+
if history is None:
|
| 241 |
+
history = []
|
| 242 |
+
|
| 243 |
+
full_response = ""
|
| 244 |
+
for response, is_complete in chat_instance.chat_stream(
|
| 245 |
+
system_prompt, message, history, max_tokens, temperature
|
| 246 |
+
):
|
| 247 |
+
full_response = response
|
| 248 |
+
if is_complete:
|
| 249 |
+
break
|
| 250 |
+
|
| 251 |
+
return {
|
| 252 |
+
"response": full_response,
|
| 253 |
+
"queue_status": chat_instance.get_queue_status()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 254 |
}
|
|
|
|
|
|
|
| 255 |
|
| 256 |
+
# Función para streaming API
|
| 257 |
+
def api_chat_stream(system_prompt: str = "", message: str = "", history: List[List[str]] = None,
|
| 258 |
+
max_tokens: int = 512, temperature: float = 0.7):
|
| 259 |
+
"""API streaming para cliente Python"""
|
| 260 |
+
if history is None:
|
| 261 |
+
history = []
|
| 262 |
+
|
| 263 |
+
for response, is_complete in chat_instance.chat_stream(
|
| 264 |
+
system_prompt, message, history, max_tokens, temperature
|
| 265 |
+
):
|
| 266 |
+
yield {
|
| 267 |
+
"response": response,
|
| 268 |
+
"is_complete": is_complete,
|
| 269 |
+
"queue_status": chat_instance.get_queue_status()
|
| 270 |
+
}
|
| 271 |
|
| 272 |
+
# Crear interfaz de Gradio
|
| 273 |
+
with gr.Blocks(title="Llama 3.2 3B Chat", theme=gr.themes.Soft()) as app:
|
| 274 |
+
gr.Markdown("# 🦙 Llama 3.2 3B Instruct Chat")
|
| 275 |
+
gr.Markdown("Chat con Meta Llama 3.2 3B con sistema de colas y streaming")
|
| 276 |
|
| 277 |
with gr.Row():
|
| 278 |
with gr.Column(scale=3):
|
| 279 |
+
chatbot = gr.Chatbot(height=500, show_label=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 280 |
msg = gr.Textbox(
|
| 281 |
+
label="Mensaje",
|
| 282 |
placeholder="Escribe tu mensaje aquí...",
|
| 283 |
+
lines=2
|
|
|
|
|
|
|
| 284 |
)
|
| 285 |
|
| 286 |
with gr.Row():
|
| 287 |
+
send_btn = gr.Button("Enviar", variant="primary")
|
| 288 |
+
clear_btn = gr.Button("Limpiar")
|
| 289 |
|
| 290 |
with gr.Column(scale=1):
|
| 291 |
+
system_prompt = gr.Textbox(
|
| 292 |
+
label="System Prompt",
|
| 293 |
+
placeholder="Eres un asistente útil...",
|
| 294 |
+
lines=5,
|
| 295 |
+
value="Eres un asistente de IA útil y amigable. Responde de manera clara y concisa."
|
| 296 |
+
)
|
| 297 |
|
| 298 |
max_tokens = gr.Slider(
|
| 299 |
minimum=50,
|
| 300 |
maximum=1024,
|
| 301 |
value=512,
|
| 302 |
step=50,
|
| 303 |
+
label="Max Tokens"
|
| 304 |
)
|
| 305 |
|
| 306 |
temperature = gr.Slider(
|
| 307 |
minimum=0.1,
|
| 308 |
+
maximum=2.0,
|
| 309 |
value=0.7,
|
| 310 |
step=0.1,
|
| 311 |
+
label="Temperature"
|
| 312 |
)
|
| 313 |
|
| 314 |
+
gr.Markdown("### Estado de la Cola")
|
| 315 |
+
queue_status = gr.JSON(label="Queue Status", value={})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 316 |
|
| 317 |
+
# Botón para actualizar estado
|
| 318 |
+
refresh_btn = gr.Button("Actualizar Estado")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 319 |
|
| 320 |
+
# Event handlers
|
| 321 |
+
def send_message(message, history, sys_prompt, max_tok, temp):
|
| 322 |
+
if not message.strip():
|
| 323 |
+
return history, ""
|
|
|
|
|
|
|
| 324 |
|
| 325 |
+
yield from chat_interface(message, history, sys_prompt, max_tok, temp)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 326 |
|
| 327 |
+
def clear_chat():
|
| 328 |
+
return [], ""
|
|
|
|
| 329 |
|
| 330 |
+
def update_queue_status():
|
| 331 |
+
return chat_instance.get_queue_status()
|
| 332 |
|
| 333 |
+
# Conectar eventos
|
| 334 |
+
send_btn.click(
|
| 335 |
+
send_message,
|
| 336 |
+
inputs=[msg, chatbot, system_prompt, max_tokens, temperature],
|
| 337 |
+
outputs=[chatbot, msg]
|
| 338 |
+
)
|
| 339 |
|
| 340 |
+
msg.submit(
|
| 341 |
+
send_message,
|
| 342 |
+
inputs=[msg, chatbot, system_prompt, max_tokens, temperature],
|
| 343 |
+
outputs=[chatbot, msg]
|
| 344 |
+
)
|
| 345 |
|
| 346 |
+
clear_btn.click(clear_chat, outputs=[chatbot, msg])
|
| 347 |
+
refresh_btn.click(update_queue_status, outputs=[queue_status])
|
| 348 |
+
|
| 349 |
+
# Actualizar estado cada 5 segundos
|
| 350 |
+
app.load(update_queue_status, outputs=[queue_status], every=5)
|
| 351 |
+
|
| 352 |
+
# Crear API endpoints
|
| 353 |
+
api_app = gr.Interface(
|
| 354 |
+
fn=api_chat,
|
| 355 |
+
inputs=[
|
| 356 |
+
gr.Textbox(label="System Prompt"),
|
| 357 |
+
gr.Textbox(label="Message"),
|
| 358 |
+
gr.JSON(label="History"),
|
| 359 |
+
gr.Slider(50, 1024, 512, label="Max Tokens"),
|
| 360 |
+
gr.Slider(0.1, 2.0, 0.7, label="Temperature")
|
| 361 |
+
],
|
| 362 |
+
outputs=gr.JSON(label="Response"),
|
| 363 |
+
title="Llama Chat API",
|
| 364 |
+
description="API endpoint para cliente Python"
|
| 365 |
+
)
|
| 366 |
+
|
| 367 |
+
# Combinar apps
|
| 368 |
+
final_app = gr.TabbedInterface(
|
| 369 |
+
[app, api_app],
|
| 370 |
+
["💬 Chat Interface", "🔌 API Endpoint"]
|
| 371 |
+
)
|
| 372 |
|
|
|
|
| 373 |
if __name__ == "__main__":
|
| 374 |
+
final_app.launch(server_name="0.0.0.0", server_port=7860, share=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
apuntes-filosofia-antigua-completos.md
DELETED
|
@@ -1,417 +0,0 @@
|
|
| 1 |
-
# FILOSOFÍA ANTIGUA - APUNTES COMPLETOS Y ORGANIZADOS
|
| 2 |
-
|
| 3 |
-
## 1. INTRODUCCIÓN AL RELATIVISMO Y EL CONOCIMIENTO
|
| 4 |
-
|
| 5 |
-
### Relativismo Cultural
|
| 6 |
-
El **relativismo cultural** establece que los resultados del conocimiento humano son convencionales, no universales[11][13]. Este concepto fundamental distingue entre:
|
| 7 |
-
- **Conocimiento aparente**: Lo que percibimos a través de los sentidos
|
| 8 |
-
- **Conocimiento real**: La verdadera realidad que trasciende las apariencias
|
| 9 |
-
|
| 10 |
-
### La Dialéctica como Método
|
| 11 |
-
La **dialéctica** constituye el método de búsqueda de definiciones a través del diálogo estructurado[11]. Su objetivo principal es alcanzar la verdad mediante la confrontación de ideas opuestas.
|
| 12 |
-
|
| 13 |
-
---
|
| 14 |
-
|
| 15 |
-
## 2. SÓCRATES (470-399 a.C.)
|
| 16 |
-
|
| 17 |
-
### Contexto Histórico
|
| 18 |
-
Sócrates vivió en Atenas durante la época dorada de los sofistas[13][17]. Fue **condenado a muerte** por dos acusaciones principales:
|
| 19 |
-
- Corromper a los jóvenes
|
| 20 |
-
- Introducir nuevos dioses en la ciudad
|
| 21 |
-
|
| 22 |
-
### Filosofía Socrática
|
| 23 |
-
|
| 24 |
-
#### El Método Dialéctico
|
| 25 |
-
Sócrates desarrolló un revolucionario **método dialéctico** conocido como **elenchus** (ἔλεγχος)[11][17]:
|
| 26 |
-
|
| 27 |
-
1. **Establecimiento de tesis**: Un interlocutor presenta una afirmación
|
| 28 |
-
2. **Cuestionamiento**: Sócrates introduce premisas adicionales
|
| 29 |
-
3. **Demostración de contradicciones**: Se muestra que las premisas implican lo contrario de la tesis original
|
| 30 |
-
4. **Refutación**: Se demuestra la falsedad de la tesis inicial
|
| 31 |
-
|
| 32 |
-
#### Principios Fundamentales
|
| 33 |
-
|
| 34 |
-
**Ironía Socrática**: "Solo sé que no sé nada"[17][26]
|
| 35 |
-
- Reconocimiento de la propia ignorancia como punto de partida del conocimiento
|
| 36 |
-
- Método para hacer que el interlocutor reconozca su ignorancia
|
| 37 |
-
|
| 38 |
-
**Mayéutica**: El arte de "dar a luz" ideas[17][26]
|
| 39 |
-
- Proceso de ayudar al interlocutor a descubrir verdades por sí mismo
|
| 40 |
-
- Sócrates se comparaba con una partera que ayuda en el parto del conocimiento
|
| 41 |
-
|
| 42 |
-
**Intelectualismo Moral**: El conocimiento de la realidad conduce necesariamente a la virtud[11][17]
|
| 43 |
-
- Quien conoce verdaderamente el bien, actúa bien
|
| 44 |
-
- La maldad surge de la ignorancia
|
| 45 |
-
|
| 46 |
-
---
|
| 47 |
-
|
| 48 |
-
## 3. LOS SOFISTAS: PROTÁGORAS Y GORGIAS
|
| 49 |
-
|
| 50 |
-
### Características Generales de los Sofistas
|
| 51 |
-
Los sofistas eran **maestros itinerantes** que cobraban por sus enseñanzas[13][16]:
|
| 52 |
-
- Ciudadanos extranjeros que enseñaban en Atenas
|
| 53 |
-
- Especialistas en retórica y arte de la persuasión
|
| 54 |
-
- Promovían la igualdad como principio pedagógico[28]
|
| 55 |
-
|
| 56 |
-
### Diferencias Fundamentales con Sócrates
|
| 57 |
-
| Aspecto | Sócrates | Sofistas |
|
| 58 |
-
|---------|----------|----------|
|
| 59 |
-
| **Objetivo** | Búsqueda de la verdad absoluta | Persuasión y éxito práctico |
|
| 60 |
-
| **Método** | Dialéctica para alcanzar conocimiento | Retórica para convencer |
|
| 61 |
-
| **Epistemología** | Existe verdad objetiva | Relativismo: no hay verdades absolutas |
|
| 62 |
-
| **Enseñanza** | Gratuita, por amor a la sabiduría | De pago, profesionalizada |
|
| 63 |
-
|
| 64 |
-
### Protágoras de Abdera (485-411 a.C.)
|
| 65 |
-
|
| 66 |
-
#### Relativismo Antropológico
|
| 67 |
-
**Frase célebre**: *"El hombre es la medida de todas las cosas, de las que son en cuanto que son y de las que no son en cuanto que no son"*[13][16][22][25]
|
| 68 |
-
|
| 69 |
-
**Interpretación**:
|
| 70 |
-
- El conocimiento es relativo al individuo o comunidad que conoce
|
| 71 |
-
- No existen verdades absolutas independientes del sujeto cognoscente
|
| 72 |
-
- Las valoraciones éticas y estéticas dependen de la perspectiva humana
|
| 73 |
-
|
| 74 |
-
#### Teoría del Nomos vs. Physis
|
| 75 |
-
- **Nomos** (ley humana): Convención social modificable según intereses comunitarios
|
| 76 |
-
- **Physis** (ley natural): Ley universal e inmutable de los presocráticos
|
| 77 |
-
- Protágoras defendía que el nomos complementa y beneficia a la naturaleza[25]
|
| 78 |
-
|
| 79 |
-
### Gorgias de Leontinos
|
| 80 |
-
|
| 81 |
-
#### Las Tres Tesis Fundamentales
|
| 82 |
-
Gorgias defendía un **relativismo radical** basado en tres proposiciones[16][25]:
|
| 83 |
-
|
| 84 |
-
1. **"Nada existe"**: Negación del ser
|
| 85 |
-
2. **"Si algo existiera, sería inconcebible para el hombre"**: Imposibilidad del conocimiento
|
| 86 |
-
3. **"Si fuera concebible, no se podría transmitir o explicar a otros"**: Incomunicabilidad del conocimiento
|
| 87 |
-
|
| 88 |
-
#### Poder de la Retórica
|
| 89 |
-
Para Gorgias, la **palabra** tiene poder transformador:
|
| 90 |
-
- La retórica puede modificar la realidad percibida
|
| 91 |
-
- El discurso es instrumento de persuasión y elevación moral[28]
|
| 92 |
-
- La habilidad argumentativa es virtud esencial en la democracia
|
| 93 |
-
|
| 94 |
-
---
|
| 95 |
-
|
| 96 |
-
## 4. PLATÓN: TEORÍA DE LAS IDEAS
|
| 97 |
-
|
| 98 |
-
### Dualismo Ontológico
|
| 99 |
-
Platón establece una división fundamental de la realidad en **dos mundos**[12][15][18][21]:
|
| 100 |
-
|
| 101 |
-
#### Mundo Inteligible (Mundo de las Ideas)
|
| 102 |
-
**Características**:
|
| 103 |
-
- **Inmaterial**, eterno, inmutable
|
| 104 |
-
- Accesible solo mediante la **razón**
|
| 105 |
-
- Constituye la **auténtica realidad**
|
| 106 |
-
- Existe fuera del espacio y del tiempo
|
| 107 |
-
- Contiene las **Ideas** como arquetipos perfectos
|
| 108 |
-
|
| 109 |
-
#### Mundo Sensible (Mundo Visible)
|
| 110 |
-
**Características**:
|
| 111 |
-
- **Material**, temporal, mutable
|
| 112 |
-
- Accesible mediante los **sentidos**
|
| 113 |
-
- Es **copia imperfecta** del mundo inteligible
|
| 114 |
-
- Sujeto a generación y corrupción
|
| 115 |
-
- Solo permite **opinión (doxa)**, no conocimiento verdadero
|
| 116 |
-
|
| 117 |
-
### El Mito de la Caverna
|
| 118 |
-
|
| 119 |
-
#### Estructura Alegórica
|
| 120 |
-
La famosa alegoría platónica ilustra el proceso del conocimiento[46][49]:
|
| 121 |
-
|
| 122 |
-
**Niveles de la Caverna**:
|
| 123 |
-
1. **Sombras en la pared**: Mundo de las apariencias sensibles
|
| 124 |
-
2. **Objetos que proyectan sombras**: Realidades físicas del mundo sensible
|
| 125 |
-
3. **Sol exterior**: La **Idea del Bien**, fuente de todo conocimiento
|
| 126 |
-
|
| 127 |
-
#### Proceso de la Paideia (Educación)
|
| 128 |
-
El **ascenso desde la caverna** representa:
|
| 129 |
-
- Liberación de la ignorancia sensible
|
| 130 |
-
- Proceso educativo hacia el conocimiento racional
|
| 131 |
-
- Acceso progresivo al mundo de las Ideas
|
| 132 |
-
- Culminación en la contemplación del Bien
|
| 133 |
-
|
| 134 |
-
### Relación entre los Mundos: Participación e Imitación
|
| 135 |
-
|
| 136 |
-
#### Participación (Methexis)
|
| 137 |
-
- Las cosas sensibles **participan** de las Ideas
|
| 138 |
-
- Reciben su ser y características de las Ideas correspondientes
|
| 139 |
-
- Relación de dependencia ontológica[18][30]
|
| 140 |
-
|
| 141 |
-
#### Imitación (Mimesis)
|
| 142 |
-
- El mundo sensible es **copia** del mundo inteligible
|
| 143 |
-
- Las Ideas funcionan como **modelos** o arquetipos
|
| 144 |
-
- La realidad física imita imperfectamente la perfección ideal[18][30]
|
| 145 |
-
|
| 146 |
-
---
|
| 147 |
-
|
| 148 |
-
## 5. EPISTEMOLOGÍA PLATÓNICA: GRADOS DEL CONOCIMIENTO
|
| 149 |
-
|
| 150 |
-
### Dualismo Epistemológico
|
| 151 |
-
Platón distingue fundamentalmente entre **dos tipos de conocimiento**[45][48][49]:
|
| 152 |
-
|
| 153 |
-
- **Doxa (δόχα)**: Opinión o conocimiento sensible del mundo visible
|
| 154 |
-
- **Episteme (ἐπιστήμη)**: Ciencia o conocimiento inteligible del mundo de las Ideas
|
| 155 |
-
|
| 156 |
-
### El Símil de la Línea
|
| 157 |
-
|
| 158 |
-
#### División de los Grados de Conocimiento
|
| 159 |
-
Platón representa los niveles de conocimiento mediante una **línea dividida en cuatro segmentos**[48][54][57]:
|
| 160 |
-
|
| 161 |
-
| **Mundo** | **Tipo de Conocimiento** | **Objeto** | **Facultad** |
|
| 162 |
-
|-----------|-------------------------|------------|--------------|
|
| 163 |
-
| **INTELIGIBLE** | **Noesis (νόησις)** | Ideas, Principios | Intuición intelectual |
|
| 164 |
-
| | **Dianoia (διάνοια)** | Objetos matemáticos | Pensamiento discursivo |
|
| 165 |
-
| **SENSIBLE** | **Pistis (πίστις)** | Objetos físicos | Creencia |
|
| 166 |
-
| | **Eikasia (εἰκασία)** | Sombras, reflejos | Imaginación |
|
| 167 |
-
|
| 168 |
-
#### Características de cada grado
|
| 169 |
-
|
| 170 |
-
**1. Eikasia (εἰκασία) - Imaginación**[48][49]:
|
| 171 |
-
- **Grado más bajo** de conocimiento
|
| 172 |
-
- Objeto: sombras, reflejos, imágenes de las cosas sensibles
|
| 173 |
-
- Conocimiento conjetural e inseguro
|
| 174 |
-
- Correspondiente a los prisioneros en la caverna
|
| 175 |
-
|
| 176 |
-
**2. Pistis (πίστις) - Creencia**[48][49]:
|
| 177 |
-
- Conocimiento de **objetos sensibles** directamente
|
| 178 |
-
- Mayor certeza que la imaginación
|
| 179 |
-
- Actitud libre de titubeo pero no científica
|
| 180 |
-
- Conocimiento del mundo físico cotidiano
|
| 181 |
-
|
| 182 |
-
**3. Dianoia (διάνοια) - Pensamiento discursivo**[48][51][49]:
|
| 183 |
-
- **Conocimiento matemático** y geométrico
|
| 184 |
-
- Utiliza hipótesis sin justificar racionalmente
|
| 185 |
-
- Se apoya en figuras sensibles para demostrar
|
| 186 |
-
- Razonamiento lógico pero no intuitivo
|
| 187 |
-
|
| 188 |
-
**4. Noesis (νόησις) - Intuición intelectual**[48][51][49]:
|
| 189 |
-
- **Conocimiento superior** de las Ideas
|
| 190 |
-
- Acceso directo mediante la razón pura
|
| 191 |
-
- No requiere apoyo en imágenes sensibles
|
| 192 |
-
- Conocimiento de principios no hipotéticos
|
| 193 |
-
|
| 194 |
-
### La Dialéctica como Método Superior
|
| 195 |
-
|
| 196 |
-
#### Dialéctica Ascendente[48][63]:
|
| 197 |
-
- Proceso de **elevación** desde lo sensible hacia las Ideas
|
| 198 |
-
- Utiliza la razón para superar las limitaciones de los sentidos
|
| 199 |
-
- Búsqueda de principios no hipotéticos
|
| 200 |
-
- Método específico de la filosofía
|
| 201 |
-
|
| 202 |
-
#### Dialéctica Descendente:
|
| 203 |
-
- **Aplicación** del conocimiento de las Ideas al mundo sensible
|
| 204 |
-
- Comprensión de la realidad empírica desde los principios universales
|
| 205 |
-
- Función pedagógica y política del filósofo
|
| 206 |
-
|
| 207 |
-
### La Teoría de la Reminiscencia (Anamnesis)
|
| 208 |
-
|
| 209 |
-
#### Fundamentos de la Anamnesis[49][57]:
|
| 210 |
-
- El **alma es inmortal** y preexiste al cuerpo
|
| 211 |
-
- El alma ha **contemplado las Ideas** antes del nacimiento
|
| 212 |
-
- **Aprender es recordar** lo que el alma ya conocía
|
| 213 |
-
- El conocimiento verdadero surge desde el interior
|
| 214 |
-
|
| 215 |
-
#### Eros y el Conocimiento:
|
| 216 |
-
- **Eros (amor)** impulsa al alma hacia el conocimiento
|
| 217 |
-
- La **belleza sensible** despierta el recuerdo de la Belleza en sí
|
| 218 |
-
- El amor filosófico conduce hacia las Ideas supremas
|
| 219 |
-
- Proceso de ascensión desde lo particular hacia lo universal
|
| 220 |
-
|
| 221 |
-
---
|
| 222 |
-
|
| 223 |
-
## 6. ANTROPOLOGÍA PLATÓNICA
|
| 224 |
-
|
| 225 |
-
### Dualismo Antropológico
|
| 226 |
-
El ser humano participa de ambos mundos:
|
| 227 |
-
- **Cuerpo**: Pertenece al mundo sensible, mortal
|
| 228 |
-
- **Alma**: Pertenece al mundo inteligible, inmortal
|
| 229 |
-
|
| 230 |
-
### Teoría Tripartita del Alma
|
| 231 |
-
|
| 232 |
-
#### Estructura del Alma
|
| 233 |
-
1. **Alma Racional (Logos)**:
|
| 234 |
-
- Sede de la razón y el conocimiento
|
| 235 |
-
- Localizada en la cabeza
|
| 236 |
-
- Inmortal, regresa al mundo inteligible tras la muerte
|
| 237 |
-
|
| 238 |
-
2. **Alma Irascible (Thymos)**:
|
| 239 |
-
- Sede del valor y las emociones nobles
|
| 240 |
-
- Localizada en el pecho
|
| 241 |
-
- Impulsa hacia la acción y la defensa
|
| 242 |
-
|
| 243 |
-
3. **Alma Concupiscible (Epithymia)**:
|
| 244 |
-
- Sede de los deseos y apetitos corporales
|
| 245 |
-
- Localizada en el abdomen
|
| 246 |
-
- Busca placeres sensibles y satisfacciones materiales
|
| 247 |
-
|
| 248 |
-
### Teorías del Alma
|
| 249 |
-
|
| 250 |
-
#### Teoría Soma-Sema
|
| 251 |
-
- **"El cuerpo es la cárcel del alma"**
|
| 252 |
-
- El alma está **aprisionada** en el cuerpo material
|
| 253 |
-
- La filosofía busca la liberación del alma mediante el conocimiento
|
| 254 |
-
|
| 255 |
-
#### Metempsicosis (Transmigración)
|
| 256 |
-
- El alma es **inmortal** y preexiste al cuerpo
|
| 257 |
-
- Tras la muerte, el alma se reencarna en otros cuerpos
|
| 258 |
-
- El destino del alma depende de la vida filosófica llevada
|
| 259 |
-
|
| 260 |
-
---
|
| 261 |
-
|
| 262 |
-
## 7. FILOSOFÍA POLÍTICA PLATÓNICA: LA REPÚBLICA IDEAL
|
| 263 |
-
|
| 264 |
-
### La Polis Justa y las Virtudes
|
| 265 |
-
|
| 266 |
-
#### Correspondencia Alma-Estado
|
| 267 |
-
Platón establece un **paralelismo** entre la estructura del alma individual y la organización del Estado ideal[47][50][56]:
|
| 268 |
-
|
| 269 |
-
| **Parte del Alma** | **Virtud Individual** | **Clase Social** | **Virtud Política** | **Función** |
|
| 270 |
-
|-------------------|---------------------|------------------|-------------------|-------------|
|
| 271 |
-
| **Racional** | Sabiduría (Sophia) | Filósofos-Reyes | Sabiduría | Gobernar |
|
| 272 |
-
| **Irascible** | Fortaleza (Andreia) | Guardianes | Valor | Defender |
|
| 273 |
-
| **Concupiscible** | Templanza (Sophrosyne) | Productores | Templanza | Producir |
|
| 274 |
-
|
| 275 |
-
#### La Justicia como Armonía
|
| 276 |
-
- **Justicia individual**: Cada parte del alma cumple su función propia
|
| 277 |
-
- **Justicia política**: Cada clase social desempeña su rol específico
|
| 278 |
-
- **Armonía**: Resultado del equilibrio entre las partes
|
| 279 |
-
- **Virtud = Función**: Cada elemento alcanza la excelencia en su ámbito
|
| 280 |
-
|
| 281 |
-
### El Filósofo-Rey
|
| 282 |
-
|
| 283 |
-
#### Características del Gobernante Ideal[44][47][56]:
|
| 284 |
-
- **Conocimiento de la Idea del Bien**: Única fuente de legitimidad para gobernar
|
| 285 |
-
- **Educación especializada**: Formación en matemáticas, dialéctica y filosofía
|
| 286 |
-
- **Desinterés material**: Libre de ambiciones personales y posesiones
|
| 287 |
-
- **Amor a la sabiduría**: Motivación genuina por el conocimiento de la verdad
|
| 288 |
-
|
| 289 |
-
#### Justificación del Gobierno Filosófico[47][59]:
|
| 290 |
-
1. **Argumento epistemológico**: Solo el filósofo conoce objetivamente el Bien
|
| 291 |
-
2. **Argumento moral**: El conocimiento del bien implica su práctica
|
| 292 |
-
3. **Argumento técnico**: Gobernar es un arte que requiere conocimiento especializado
|
| 293 |
-
4. **Argumento de eficiencia**: El filósofo logra la armonía del conjunto
|
| 294 |
-
|
| 295 |
-
### Sistema Educativo (Paideia)
|
| 296 |
-
|
| 297 |
-
#### Educación por Estamentos:
|
| 298 |
-
- **Educación común**: Base compartida para todos los ciudadanos
|
| 299 |
-
- **Selección progresiva**: Identificación de aptitudes naturales según el alma dominante
|
| 300 |
-
- **Especialización**: Formación específica según la función social destinada
|
| 301 |
-
|
| 302 |
-
#### Contenido Educativo:
|
| 303 |
-
- **Productores**: Artes y oficios técnicos, educación básica
|
| 304 |
-
- **Guardianes**: Educación física, musical y militar
|
| 305 |
-
- **Filósofos-Reyes**: Matemáticas, dialéctica, filosofía (hasta los 50 años)
|
| 306 |
-
|
| 307 |
-
#### Restricciones Educativas:
|
| 308 |
-
- **Censura poética**: Los poetas pueden crear confusión moral en los guardianes
|
| 309 |
-
- **Control de contenidos**: Solo se enseña lo que promueve la virtud y el orden
|
| 310 |
-
- **Filosofía reservada**: Solo los mejores acceden al conocimiento supremo
|
| 311 |
-
|
| 312 |
-
### Degeneración de los Regímenes Políticos
|
| 313 |
-
|
| 314 |
-
#### Secuencia de Decadencia según Platón[56][59]:
|
| 315 |
-
|
| 316 |
-
1. **ARISTOCRACIA** (Gobierno ideal):
|
| 317 |
-
- Gobierno de los mejores (filósofos)
|
| 318 |
-
- Basado en el conocimiento y la virtud
|
| 319 |
-
- Justicia y armonía social perfectas
|
| 320 |
-
|
| 321 |
-
2. **TIMOCRACIA**:
|
| 322 |
-
- Gobierno basado en el **honor** y la ambición
|
| 323 |
-
- Dominan los guardianes (parte irascible)
|
| 324 |
-
- Búsqueda de gloria militar y reconocimiento
|
| 325 |
-
|
| 326 |
-
3. **OLIGARQUÍA**:
|
| 327 |
-
- Gobierno de los **ricos** (pocos)
|
| 328 |
-
- Motivado por la acumulación de riquezas
|
| 329 |
-
- División social entre ricos y pobres
|
| 330 |
-
|
| 331 |
-
4. **DEMOCRACIA**:
|
| 332 |
-
- Gobierno de la **mayoría**
|
| 333 |
-
- Libertad excesiva y relativismo moral
|
| 334 |
-
- Ausencia de criterios objetivos de valor
|
| 335 |
-
|
| 336 |
-
5. **TIRANÍA**:
|
| 337 |
-
- Gobierno de **uno** sin límites legales
|
| 338 |
-
- Peor forma de gobierno posible
|
| 339 |
-
- Dominación total de los apetitos descontrolados
|
| 340 |
-
|
| 341 |
-
### El Estado de las Leyes
|
| 342 |
-
|
| 343 |
-
#### Evolución del Pensamiento Político Platónico[56][59]:
|
| 344 |
-
- En **"Las Leyes"** (obra tardía), Platón adopta una perspectiva más pragmática
|
| 345 |
-
- Reconoce la dificultad de encontrar verdaderos filósofos-reyes
|
| 346 |
-
- Propone un **"segundo Estado"** basado en el imperio de la ley
|
| 347 |
-
- **Gobierno mixto**: Combinación de elementos monárquicos y democráticos
|
| 348 |
-
|
| 349 |
-
---
|
| 350 |
-
|
| 351 |
-
## 8. ONTOLOGÍA PLATÓNICA: LA TEORÍA DE LAS IDEAS
|
| 352 |
-
|
| 353 |
-
### Naturaleza de las Ideas (Eidos)
|
| 354 |
-
|
| 355 |
-
#### Características Fundamentales
|
| 356 |
-
- **Únicas**: Cada Idea es singular y no admite multiplicidad
|
| 357 |
-
- **Eternas**: Existen fuera del tiempo, sin generación ni corrupción
|
| 358 |
-
- **Inmutables**: No cambian ni se alteran
|
| 359 |
-
- **Universales**: Se aplican a múltiples casos particulares
|
| 360 |
-
- **Perfectas**: Representan la máxima perfección de cada concepto
|
| 361 |
-
|
| 362 |
-
#### Tipos de Ideas
|
| 363 |
-
|
| 364 |
-
**Ideas de Valores Morales y Estéticos**:
|
| 365 |
-
- Justicia, Belleza, Bondad
|
| 366 |
-
- Proporcionan criterios normativos absolutos
|
| 367 |
-
|
| 368 |
-
**Ideas de Realidades Naturales**:
|
| 369 |
-
- Hombre en sí, Caballo en sí, Mesa en sí
|
| 370 |
-
- Arquetipos de las especies y objetos del mundo sensible
|
| 371 |
-
|
| 372 |
-
**Ideas Matemáticas**:
|
| 373 |
-
- Números, figuras geométricas perfectas
|
| 374 |
-
- Fundamento de la ciencia matemática
|
| 375 |
-
|
| 376 |
-
#### La Idea del Bien
|
| 377 |
-
**Supremacía ontológica y epistemológica**:
|
| 378 |
-
- **Idea suprema** que da valor y realidad a las demás Ideas
|
| 379 |
-
- **Fuente de conocimiento**: Permite conocer las otras Ideas
|
| 380 |
-
- **Fundamento del ser**: Confiere existencia al mundo inteligible
|
| 381 |
-
- **Sol del mundo inteligible**: Ilumina y hace posible el conocimiento racional
|
| 382 |
-
|
| 383 |
-
### Relación de las Ideas con el Conocimiento
|
| 384 |
-
|
| 385 |
-
#### Epistemología Platónica
|
| 386 |
-
- **Episteme** (conocimiento verdadero): Solo del mundo inteligible
|
| 387 |
-
- **Doxa** (opinión): Conocimiento imperfecto del mundo sensible
|
| 388 |
-
- La **dialéctica** es el método para ascender al conocimiento de las Ideas
|
| 389 |
-
|
| 390 |
-
#### Características del Dualismo Ontológico
|
| 391 |
-
|
| 392 |
-
| **Mundo Inteligible** | **Mundo Sensible** |
|
| 393 |
-
|----------------------|-------------------|
|
| 394 |
-
| Eterno, inmutable | Temporal, mutable |
|
| 395 |
-
| Universal | Particular |
|
| 396 |
-
| Inmaterial | Material |
|
| 397 |
-
| Auténtica realidad | Copia, apariencia |
|
| 398 |
-
| Conocimiento (episteme) | Opinión (doxa) |
|
| 399 |
-
| Acceso por la razón | Acceso por los sentidos |
|
| 400 |
-
| Necesario | Contingente |
|
| 401 |
-
| Inmóvil | Móvil |
|
| 402 |
-
|
| 403 |
-
---
|
| 404 |
-
|
| 405 |
-
## CONCLUSIÓN: LEGADO DE LA FILOSOFÍA ANTIGUA
|
| 406 |
-
|
| 407 |
-
La filosofía antigua establece los fundamentos conceptuales que perduran hasta nuestros días. **Sócrates** introduce el método dialéctico como búsqueda rigurosa de la verdad, oponiéndose al relativismo sofístico. **Los sofistas** aportan la reflexión sobre el poder del lenguaje y la relatividad cultural del conocimiento. **Platón** construye el primer gran sistema metafísico occidental, estableciendo las bases de:
|
| 408 |
-
|
| 409 |
-
### Contribuciones Fundamentales:
|
| 410 |
-
|
| 411 |
-
1. **Epistemológicas**: La distinción entre conocimiento verdadero (episteme) y opinión (doxa)
|
| 412 |
-
2. **Ontológicas**: El dualismo entre mundo inteligible e mundo sensible
|
| 413 |
-
3. **Políticas**: La teoría del Estado ideal y la figura del filósofo-rey
|
| 414 |
-
4. **Antropológicas**: La concepción tripartita del alma y su inmortalidad
|
| 415 |
-
5. **Metodológicas**: La dialéctica como método de acceso a la verdad
|
| 416 |
-
|
| 417 |
-
Este desarrollo intelectual marca el paso del pensamiento mítico al racional, estableciendo las bases de la epistemología, la ética, la política y la metafísica como disciplinas filosóficas autónomas. La influencia de estos conceptos se extiende a través de toda la historia de la filosofía occidental, desde el neoplatonismo hasta la filosofía contemporánea.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
client.py
ADDED
|
@@ -0,0 +1,167 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import requests
|
| 3 |
+
import json
|
| 4 |
+
import time
|
| 5 |
+
from typing import List, Dict, Generator, Optional
|
| 6 |
+
|
| 7 |
+
class LlamaClient:
|
| 8 |
+
def __init__(self, base_url: str):
|
| 9 |
+
"""
|
| 10 |
+
Cliente para interactuar con el Hugging Face Space de Llama Chat
|
| 11 |
+
|
| 12 |
+
Args:
|
| 13 |
+
base_url: URL base del Space (ej: "https://tu-usuario-llama-chat.hf.space")
|
| 14 |
+
"""
|
| 15 |
+
self.base_url = base_url.rstrip('/')
|
| 16 |
+
self.api_endpoint = f"{self.base_url}/call/api_chat"
|
| 17 |
+
self.stream_endpoint = f"{self.base_url}/call/api_chat_stream"
|
| 18 |
+
|
| 19 |
+
def chat(self, message: str, system_prompt: str = "", history: List[List[str]] = None,
|
| 20 |
+
max_tokens: int = 512, temperature: float = 0.7) -> Dict:
|
| 21 |
+
"""
|
| 22 |
+
Enviar un mensaje y recibir respuesta completa
|
| 23 |
+
|
| 24 |
+
Args:
|
| 25 |
+
message: Mensaje del usuario
|
| 26 |
+
system_prompt: Prompt del sistema (opcional)
|
| 27 |
+
history: Historial de conversación [[user, assistant], ...]
|
| 28 |
+
max_tokens: Máximo número de tokens a generar
|
| 29 |
+
temperature: Temperatura para la generación
|
| 30 |
+
|
| 31 |
+
Returns:
|
| 32 |
+
Dict con 'response' y 'queue_status'
|
| 33 |
+
"""
|
| 34 |
+
if history is None:
|
| 35 |
+
history = []
|
| 36 |
+
|
| 37 |
+
payload = {
|
| 38 |
+
"data": [system_prompt, message, history, max_tokens, temperature]
|
| 39 |
+
}
|
| 40 |
+
|
| 41 |
+
try:
|
| 42 |
+
response = requests.post(self.api_endpoint, json=payload, timeout=300)
|
| 43 |
+
response.raise_for_status()
|
| 44 |
+
|
| 45 |
+
result = response.json()
|
| 46 |
+
return result.get("data", [{}])[0]
|
| 47 |
+
|
| 48 |
+
except requests.exceptions.RequestException as e:
|
| 49 |
+
return {"error": f"Error de conexión: {str(e)}"}
|
| 50 |
+
except json.JSONDecodeError as e:
|
| 51 |
+
return {"error": f"Error decodificando JSON: {str(e)}"}
|
| 52 |
+
|
| 53 |
+
def chat_stream(self, message: str, system_prompt: str = "", history: List[List[str]] = None,
|
| 54 |
+
max_tokens: int = 512, temperature: float = 0.7) -> Generator[Dict, None, None]:
|
| 55 |
+
"""
|
| 56 |
+
Enviar un mensaje y recibir respuesta en streaming
|
| 57 |
+
|
| 58 |
+
Args:
|
| 59 |
+
message: Mensaje del usuario
|
| 60 |
+
system_prompt: Prompt del sistema (opcional)
|
| 61 |
+
history: Historial de conversación
|
| 62 |
+
max_tokens: Máximo número de tokens a generar
|
| 63 |
+
temperature: Temperatura para la generación
|
| 64 |
+
|
| 65 |
+
Yields:
|
| 66 |
+
Dict con 'response', 'is_complete' y 'queue_status'
|
| 67 |
+
"""
|
| 68 |
+
if history is None:
|
| 69 |
+
history = []
|
| 70 |
+
|
| 71 |
+
payload = {
|
| 72 |
+
"data": [system_prompt, message, history, max_tokens, temperature]
|
| 73 |
+
}
|
| 74 |
+
|
| 75 |
+
try:
|
| 76 |
+
response = requests.post(self.stream_endpoint, json=payload, stream=True, timeout=300)
|
| 77 |
+
response.raise_for_status()
|
| 78 |
+
|
| 79 |
+
for line in response.iter_lines():
|
| 80 |
+
if line:
|
| 81 |
+
try:
|
| 82 |
+
data = json.loads(line.decode('utf-8'))
|
| 83 |
+
if "data" in data:
|
| 84 |
+
yield data["data"][0]
|
| 85 |
+
except json.JSONDecodeError:
|
| 86 |
+
continue
|
| 87 |
+
|
| 88 |
+
except requests.exceptions.RequestException as e:
|
| 89 |
+
yield {"error": f"Error de conexión: {str(e)}", "is_complete": True}
|
| 90 |
+
|
| 91 |
+
def get_queue_status(self) -> Dict:
|
| 92 |
+
"""
|
| 93 |
+
Obtener estado actual de la cola
|
| 94 |
+
|
| 95 |
+
Returns:
|
| 96 |
+
Dict con información del estado de la cola
|
| 97 |
+
"""
|
| 98 |
+
try:
|
| 99 |
+
# Hacer una request vacía solo para obtener el estado
|
| 100 |
+
result = self.chat("", max_tokens=1)
|
| 101 |
+
return result.get("queue_status", {})
|
| 102 |
+
except Exception as e:
|
| 103 |
+
return {"error": str(e)}
|
| 104 |
+
|
| 105 |
+
# Ejemplo de uso del cliente
|
| 106 |
+
def example_usage():
|
| 107 |
+
"""Ejemplo de cómo usar el cliente"""
|
| 108 |
+
|
| 109 |
+
# Inicializar cliente (reemplaza con tu URL del Space)
|
| 110 |
+
client = LlamaClient("https://tu-usuario-llama-chat.hf.space")
|
| 111 |
+
|
| 112 |
+
print("=== Ejemplo 1: Chat simple ===")
|
| 113 |
+
response = client.chat(
|
| 114 |
+
message="¿Qué es la inteligencia artificial?",
|
| 115 |
+
system_prompt="Eres un profesor de informática experto."
|
| 116 |
+
)
|
| 117 |
+
|
| 118 |
+
if "error" in response:
|
| 119 |
+
print(f"Error: {response['error']}")
|
| 120 |
+
else:
|
| 121 |
+
print(f"Respuesta: {response['response']}")
|
| 122 |
+
print(f"Estado cola: {response['queue_status']}")
|
| 123 |
+
|
| 124 |
+
print("\n=== Ejemplo 2: Chat con historial ===")
|
| 125 |
+
history = [
|
| 126 |
+
["Hola", "¡Hola! ¿En qué puedo ayudarte?"],
|
| 127 |
+
["¿Cuál es tu nombre?", "Soy un asistente de IA basado en Llama 3.2."]
|
| 128 |
+
]
|
| 129 |
+
|
| 130 |
+
response = client.chat(
|
| 131 |
+
message="¿Puedes explicarme conceptos de física?",
|
| 132 |
+
system_prompt="Eres un tutor de física para estudiantes de bachillerato.",
|
| 133 |
+
history=history
|
| 134 |
+
)
|
| 135 |
+
|
| 136 |
+
if "error" in response:
|
| 137 |
+
print(f"Error: {response['error']}")
|
| 138 |
+
else:
|
| 139 |
+
print(f"Respuesta: {response['response']}")
|
| 140 |
+
|
| 141 |
+
print("\n=== Ejemplo 3: Chat con streaming ===")
|
| 142 |
+
print("Pregunta: Explica la teoría de la relatividad")
|
| 143 |
+
print("Respuesta (streaming):")
|
| 144 |
+
|
| 145 |
+
for chunk in client.chat_stream(
|
| 146 |
+
message="Explica la teoría de la relatividad de forma simple",
|
| 147 |
+
system_prompt="Eres un divulgador científico.",
|
| 148 |
+
max_tokens=300,
|
| 149 |
+
temperature=0.8
|
| 150 |
+
):
|
| 151 |
+
if "error" in chunk:
|
| 152 |
+
print(f"Error: {chunk['error']}")
|
| 153 |
+
break
|
| 154 |
+
|
| 155 |
+
print(f"\r{chunk['response']}", end="", flush=True)
|
| 156 |
+
|
| 157 |
+
if chunk.get("is_complete", False):
|
| 158 |
+
print("\n[Respuesta completa]")
|
| 159 |
+
print(f"Estado cola: {chunk['queue_status']}")
|
| 160 |
+
break
|
| 161 |
+
|
| 162 |
+
print("\n=== Ejemplo 4: Verificar estado de cola ===")
|
| 163 |
+
status = client.get_queue_status()
|
| 164 |
+
print(f"Estado actual: {status}")
|
| 165 |
+
|
| 166 |
+
if __name__ == "__main__":
|
| 167 |
+
example_usage()
|
config.py
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# config.py - Configuración del Space
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
|
| 5 |
+
class Config:
|
| 6 |
+
"""Configuración centralizada para el Space"""
|
| 7 |
+
|
| 8 |
+
# Modelo
|
| 9 |
+
MODEL_NAME = "meta-llama/Llama-3.2-3B-Instruct"
|
| 10 |
+
DEVICE = "cuda" if os.environ.get("SPACES_GPU") else "cpu"
|
| 11 |
+
|
| 12 |
+
# Tokens y autenticación
|
| 13 |
+
HF_TOKEN = os.environ.get("HF_TOKEN")
|
| 14 |
+
|
| 15 |
+
# Límites de generación
|
| 16 |
+
MAX_TOKENS_LIMIT = 1024
|
| 17 |
+
MIN_TOKENS_LIMIT = 50
|
| 18 |
+
DEFAULT_MAX_TOKENS = 512
|
| 19 |
+
|
| 20 |
+
# Temperatura
|
| 21 |
+
MAX_TEMPERATURE = 2.0
|
| 22 |
+
MIN_TEMPERATURE = 0.1
|
| 23 |
+
DEFAULT_TEMPERATURE = 0.7
|
| 24 |
+
|
| 25 |
+
# Cola y concurrencia
|
| 26 |
+
MAX_QUEUE_SIZE = 10
|
| 27 |
+
QUEUE_TIMEOUT = 300 # 5 minutos
|
| 28 |
+
|
| 29 |
+
# Context length
|
| 30 |
+
MAX_CONTEXT_LENGTH = 2048
|
| 31 |
+
|
| 32 |
+
# Interface
|
| 33 |
+
CHAT_HEIGHT = 500
|
| 34 |
+
DEFAULT_SYSTEM_PROMPT = "Eres un asistente de IA útil y amigable. Responde de manera clara y concisa."
|
| 35 |
+
|
| 36 |
+
# API
|
| 37 |
+
API_TIMEOUT = 300
|
| 38 |
+
ENABLE_API_LOGGING = True
|
| 39 |
+
|
| 40 |
+
@classmethod
|
| 41 |
+
def validate(cls):
|
| 42 |
+
"""Validar configuración"""
|
| 43 |
+
errors = []
|
| 44 |
+
|
| 45 |
+
if not cls.HF_TOKEN:
|
| 46 |
+
errors.append("HF_TOKEN no configurado en variables de entorno")
|
| 47 |
+
|
| 48 |
+
if cls.MAX_TOKENS_LIMIT < cls.MIN_TOKENS_LIMIT:
|
| 49 |
+
errors.append("MAX_TOKENS_LIMIT debe ser mayor que MIN_TOKENS_LIMIT")
|
| 50 |
+
|
| 51 |
+
if cls.MAX_TEMPERATURE < cls.MIN_TEMPERATURE:
|
| 52 |
+
errors.append("MAX_TEMPERATURE debe ser mayor que MIN_TEMPERATURE")
|
| 53 |
+
|
| 54 |
+
return errors
|
| 55 |
+
|
| 56 |
+
@classmethod
|
| 57 |
+
def get_model_config(cls):
|
| 58 |
+
"""Configuración específica del modelo"""
|
| 59 |
+
return {
|
| 60 |
+
"torch_dtype": "float16" if cls.DEVICE == "cuda" else "float32",
|
| 61 |
+
"device_map": "auto" if cls.DEVICE == "cuda" else None,
|
| 62 |
+
"trust_remote_code": True,
|
| 63 |
+
"token": cls.HF_TOKEN
|
| 64 |
+
}
|
| 65 |
+
|
| 66 |
+
@classmethod
|
| 67 |
+
def get_generation_config(cls, max_tokens=None, temperature=None):
|
| 68 |
+
"""Configuración de generación"""
|
| 69 |
+
return {
|
| 70 |
+
"max_new_tokens": max_tokens or cls.DEFAULT_MAX_TOKENS,
|
| 71 |
+
"temperature": temperature or cls.DEFAULT_TEMPERATURE,
|
| 72 |
+
"do_sample": True,
|
| 73 |
+
"repetition_penalty": 1.1,
|
| 74 |
+
"top_p": 0.9,
|
| 75 |
+
"top_k": 50
|
| 76 |
+
}
|
| 77 |
+
|
| 78 |
+
# Validar configuración al importar
|
| 79 |
+
config_errors = Config.validate()
|
| 80 |
+
if config_errors:
|
| 81 |
+
print("⚠️ Errores de configuración:")
|
| 82 |
+
for error in config_errors:
|
| 83 |
+
print(f" - {error}")
|
examples.py
ADDED
|
@@ -0,0 +1,339 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# examples.py - Ejemplos avanzados de uso del cliente
|
| 2 |
+
|
| 3 |
+
from client import LlamaClient
|
| 4 |
+
import asyncio
|
| 5 |
+
import time
|
| 6 |
+
from typing import List
|
| 7 |
+
import json
|
| 8 |
+
|
| 9 |
+
class AdvancedLlamaClient:
|
| 10 |
+
"""Cliente extendido con funcionalidades avanzadas"""
|
| 11 |
+
|
| 12 |
+
def __init__(self, base_url: str):
|
| 13 |
+
self.client = LlamaClient(base_url)
|
| 14 |
+
self.conversation_history = []
|
| 15 |
+
|
| 16 |
+
def continuous_chat(self):
|
| 17 |
+
"""Chat interactivo continuo"""
|
| 18 |
+
print("🦙 Chat con Llama 3.2 3B - Escribe 'salir' para terminar")
|
| 19 |
+
print("=" * 50)
|
| 20 |
+
|
| 21 |
+
system_prompt = input("System prompt (opcional): ").strip()
|
| 22 |
+
if not system_prompt:
|
| 23 |
+
system_prompt = "Eres un asistente útil y amigable."
|
| 24 |
+
|
| 25 |
+
while True:
|
| 26 |
+
try:
|
| 27 |
+
message = input("\nTú: ").strip()
|
| 28 |
+
|
| 29 |
+
if message.lower() in ['salir', 'exit', 'quit']:
|
| 30 |
+
print("¡Hasta luego! 👋")
|
| 31 |
+
break
|
| 32 |
+
|
| 33 |
+
if not message:
|
| 34 |
+
continue
|
| 35 |
+
|
| 36 |
+
print("🦙: ", end="", flush=True)
|
| 37 |
+
full_response = ""
|
| 38 |
+
|
| 39 |
+
for chunk in self.client.chat_stream(
|
| 40 |
+
message=message,
|
| 41 |
+
system_prompt=system_prompt,
|
| 42 |
+
history=self.conversation_history,
|
| 43 |
+
max_tokens=512,
|
| 44 |
+
temperature=0.7
|
| 45 |
+
):
|
| 46 |
+
if "error" in chunk:
|
| 47 |
+
print(f"Error: {chunk['error']}")
|
| 48 |
+
break
|
| 49 |
+
|
| 50 |
+
# Mostrar solo el texto nuevo
|
| 51 |
+
new_text = chunk['response'][len(full_response):]
|
| 52 |
+
print(new_text, end="", flush=True)
|
| 53 |
+
full_response = chunk['response']
|
| 54 |
+
|
| 55 |
+
if chunk.get("is_complete", False):
|
| 56 |
+
print() # Nueva línea al final
|
| 57 |
+
break
|
| 58 |
+
|
| 59 |
+
# Agregar al historial
|
| 60 |
+
if full_response and not full_response.startswith("Error:"):
|
| 61 |
+
self.conversation_history.append([message, full_response])
|
| 62 |
+
|
| 63 |
+
# Limitar historial a 10 intercambios
|
| 64 |
+
if len(self.conversation_history) > 10:
|
| 65 |
+
self.conversation_history = self.conversation_history[-10:]
|
| 66 |
+
|
| 67 |
+
except KeyboardInterrupt:
|
| 68 |
+
print("\n\n¡Hasta luego! 👋")
|
| 69 |
+
break
|
| 70 |
+
except Exception as e:
|
| 71 |
+
print(f"\nError inesperado: {e}")
|
| 72 |
+
|
| 73 |
+
def batch_questions(self, questions: List[str], system_prompt: str = ""):
|
| 74 |
+
"""Procesar múltiples preguntas en lote"""
|
| 75 |
+
print(f"Procesando {len(questions)} preguntas...")
|
| 76 |
+
results = []
|
| 77 |
+
|
| 78 |
+
for i, question in enumerate(questions, 1):
|
| 79 |
+
print(f"\nPregunta {i}/{len(questions)}: {question}")
|
| 80 |
+
print("-" * 40)
|
| 81 |
+
|
| 82 |
+
response = self.client.chat(
|
| 83 |
+
message=question,
|
| 84 |
+
system_prompt=system_prompt,
|
| 85 |
+
max_tokens=300
|
| 86 |
+
)
|
| 87 |
+
|
| 88 |
+
if "error" in response:
|
| 89 |
+
print(f"Error: {response['error']}")
|
| 90 |
+
results.append({"question": question, "error": response['error']})
|
| 91 |
+
else:
|
| 92 |
+
print(f"Respuesta: {response['response']}")
|
| 93 |
+
results.append({
|
| 94 |
+
"question": question,
|
| 95 |
+
"response": response['response'],
|
| 96 |
+
"queue_status": response['queue_status']
|
| 97 |
+
})
|
| 98 |
+
|
| 99 |
+
# Pequeña pausa entre preguntas
|
| 100 |
+
time.sleep(1)
|
| 101 |
+
|
| 102 |
+
return results
|
| 103 |
+
|
| 104 |
+
def compare_temperatures(self, message: str, temperatures: List[float] = [0.3, 0.7, 1.2]):
|
| 105 |
+
"""Comparar respuestas con diferentes temperaturas"""
|
| 106 |
+
print(f"Comparando respuestas para: '{message}'")
|
| 107 |
+
print("=" * 60)
|
| 108 |
+
|
| 109 |
+
results = {}
|
| 110 |
+
|
| 111 |
+
for temp in temperatures:
|
| 112 |
+
print(f"\n🌡️ Temperature: {temp}")
|
| 113 |
+
print("-" * 30)
|
| 114 |
+
|
| 115 |
+
response = self.client.chat(
|
| 116 |
+
message=message,
|
| 117 |
+
temperature=temp,
|
| 118 |
+
max_tokens=200
|
| 119 |
+
)
|
| 120 |
+
|
| 121 |
+
if "error" in response:
|
| 122 |
+
print(f"Error: {response['error']}")
|
| 123 |
+
results[temp] = {"error": response['error']}
|
| 124 |
+
else:
|
| 125 |
+
print(response['response'])
|
| 126 |
+
results[temp] = {"response": response['response']}
|
| 127 |
+
|
| 128 |
+
return results
|
| 129 |
+
|
| 130 |
+
def roleplay_scenario(self, scenario: str, turns: int = 5):
|
| 131 |
+
"""Escenario de roleplay interactivo"""
|
| 132 |
+
print(f"🎭 Escenario: {scenario}")
|
| 133 |
+
print("=" * 50)
|
| 134 |
+
|
| 135 |
+
system_prompt = f"Actúa como {scenario}. Mantén el rol consistentemente y responde de manera inmersiva."
|
| 136 |
+
history = []
|
| 137 |
+
|
| 138 |
+
for turn in range(turns):
|
| 139 |
+
user_input = input(f"\nTurno {turn + 1} - Tú: ").strip()
|
| 140 |
+
|
| 141 |
+
if not user_input or user_input.lower() == 'salir':
|
| 142 |
+
break
|
| 143 |
+
|
| 144 |
+
print("🎭: ", end="", flush=True)
|
| 145 |
+
|
| 146 |
+
for chunk in self.client.chat_stream(
|
| 147 |
+
message=user_input,
|
| 148 |
+
system_prompt=system_prompt,
|
| 149 |
+
history=history,
|
| 150 |
+
temperature=0.8,
|
| 151 |
+
max_tokens=300
|
| 152 |
+
):
|
| 153 |
+
if "error" in chunk:
|
| 154 |
+
print(f"Error: {chunk['error']}")
|
| 155 |
+
break
|
| 156 |
+
|
| 157 |
+
print(f"\r🎭: {chunk['response']}", end="", flush=True)
|
| 158 |
+
|
| 159 |
+
if chunk.get("is_complete", False):
|
| 160 |
+
history.append([user_input, chunk['response']])
|
| 161 |
+
print()
|
| 162 |
+
break
|
| 163 |
+
|
| 164 |
+
return history
|
| 165 |
+
|
| 166 |
+
def academic_tutor_example():
|
| 167 |
+
"""Ejemplo: Tutor académico para física"""
|
| 168 |
+
client = LlamaClient("https://tu-usuario-llama-chat.hf.space")
|
| 169 |
+
|
| 170 |
+
system_prompt = """Eres un tutor de física especializado en bachillerato español.
|
| 171 |
+
Explicas conceptos de forma clara, usas ejemplos cotidianos y siempre verificas
|
| 172 |
+
que el estudiante entienda antes de avanzar. Puedes resolver problemas paso a paso."""
|
| 173 |
+
|
| 174 |
+
physics_questions = [
|
| 175 |
+
"¿Qué es la velocidad angular y cómo se relaciona con la velocidad lineal?",
|
| 176 |
+
"Explica el principio de conservación de la energía con un ejemplo",
|
| 177 |
+
"¿Cómo funciona el efecto Doppler?",
|
| 178 |
+
"Diferencia entre masa y peso físicamente"
|
| 179 |
+
]
|
| 180 |
+
|
| 181 |
+
print("🔬 Tutor de Física - Bachillerato")
|
| 182 |
+
print("=" * 40)
|
| 183 |
+
|
| 184 |
+
for question in physics_questions:
|
| 185 |
+
print(f"\n📚 Pregunta: {question}")
|
| 186 |
+
print("-" * 50)
|
| 187 |
+
|
| 188 |
+
full_response = ""
|
| 189 |
+
for chunk in client.chat_stream(
|
| 190 |
+
message=question,
|
| 191 |
+
system_prompt=system_prompt,
|
| 192 |
+
max_tokens=400,
|
| 193 |
+
temperature=0.6
|
| 194 |
+
):
|
| 195 |
+
if "error" in chunk:
|
| 196 |
+
print(f"Error: {chunk['error']}")
|
| 197 |
+
break
|
| 198 |
+
|
| 199 |
+
print(f"\r👨🏫: {chunk['response']}", end="", flush=True)
|
| 200 |
+
full_response = chunk['response']
|
| 201 |
+
|
| 202 |
+
if chunk.get("is_complete", False):
|
| 203 |
+
print("\n")
|
| 204 |
+
break
|
| 205 |
+
|
| 206 |
+
input("Presiona Enter para la siguiente pregunta...")
|
| 207 |
+
|
| 208 |
+
def programming_assistant_example():
|
| 209 |
+
"""Ejemplo: Asistente de programación"""
|
| 210 |
+
client = LlamaClient("https://tu-usuario-llama-chat.hf.space")
|
| 211 |
+
|
| 212 |
+
system_prompt = """Eres un desarrollador senior especializado en Python y Flutter.
|
| 213 |
+
Ayudas a estudiantes con código, debugging y mejores prácticas. Siempre explicas
|
| 214 |
+
el código línea por línea y sugieres mejoras."""
|
| 215 |
+
|
| 216 |
+
code_questions = [
|
| 217 |
+
"¿Cómo implementar un patrón Singleton en Python?",
|
| 218 |
+
"Explica la diferencia entre async/await y threading",
|
| 219 |
+
"¿Cómo manejo errores de API en Flutter?",
|
| 220 |
+
"Mejores prácticas para estructurar un proyecto Flutter"
|
| 221 |
+
]
|
| 222 |
+
|
| 223 |
+
print("💻 Asistente de Programación")
|
| 224 |
+
print("=" * 35)
|
| 225 |
+
|
| 226 |
+
for question in code_questions:
|
| 227 |
+
print(f"\n🤔 {question}")
|
| 228 |
+
print("-" * 60)
|
| 229 |
+
|
| 230 |
+
response = client.chat(
|
| 231 |
+
message=question,
|
| 232 |
+
system_prompt=system_prompt,
|
| 233 |
+
max_tokens=600,
|
| 234 |
+
temperature=0.4 # Menor temperatura para código
|
| 235 |
+
)
|
| 236 |
+
|
| 237 |
+
if "error" in response:
|
| 238 |
+
print(f"❌ Error: {response['error']}")
|
| 239 |
+
else:
|
| 240 |
+
print(f"💡 {response['response']}")
|
| 241 |
+
|
| 242 |
+
print("\n" + "="*60)
|
| 243 |
+
time.sleep(2)
|
| 244 |
+
|
| 245 |
+
def creative_writing_example():
|
| 246 |
+
"""Ejemplo: Escritura creativa colaborativa"""
|
| 247 |
+
client = LlamaClient("https://tu-usuario-llama-chat.hf.space")
|
| 248 |
+
|
| 249 |
+
system_prompt = """Eres un escritor creativo experto. Ayudas a desarrollar historias,
|
| 250 |
+
personajes y narrativas. Puedes continuar historias, sugerir tramas y crear diálogos
|
| 251 |
+
naturales. Eres imaginativo pero coherente."""
|
| 252 |
+
|
| 253 |
+
print("✍️ Escritura Creativa Colaborativa")
|
| 254 |
+
print("=" * 40)
|
| 255 |
+
|
| 256 |
+
story_start = input("Escribe el inicio de una historia (2-3 líneas): ")
|
| 257 |
+
|
| 258 |
+
current_story = story_start
|
| 259 |
+
history = []
|
| 260 |
+
|
| 261 |
+
for chapter in range(3):
|
| 262 |
+
print(f"\n📖 Capítulo {chapter + 1}")
|
| 263 |
+
print("-" * 30)
|
| 264 |
+
|
| 265 |
+
prompt = f"Continúa esta historia de manera creativa e interesante:\n\n{current_story}"
|
| 266 |
+
|
| 267 |
+
print("✨ Continuando la historia...")
|
| 268 |
+
continuation = ""
|
| 269 |
+
|
| 270 |
+
for chunk in client.chat_stream(
|
| 271 |
+
message=prompt,
|
| 272 |
+
system_prompt=system_prompt,
|
| 273 |
+
history=history,
|
| 274 |
+
max_tokens=400,
|
| 275 |
+
temperature=1.0 # Alta creatividad
|
| 276 |
+
):
|
| 277 |
+
if "error" in chunk:
|
| 278 |
+
print(f"Error: {chunk['error']}")
|
| 279 |
+
break
|
| 280 |
+
|
| 281 |
+
continuation = chunk['response']
|
| 282 |
+
print(f"\r{continuation}", end="", flush=True)
|
| 283 |
+
|
| 284 |
+
if chunk.get("is_complete", False):
|
| 285 |
+
print("\n")
|
| 286 |
+
break
|
| 287 |
+
|
| 288 |
+
current_story += "\n\n" + continuation
|
| 289 |
+
history.append([prompt, continuation])
|
| 290 |
+
|
| 291 |
+
# Opción de dirigir la historia
|
| 292 |
+
direction = input("\n¿Quieres sugerir una dirección para la historia? (opcional): ")
|
| 293 |
+
if direction.strip():
|
| 294 |
+
current_story += "\n\n[Dirección sugerida: " + direction + "]"
|
| 295 |
+
|
| 296 |
+
print("\n📚 Historia completa:")
|
| 297 |
+
print("=" * 50)
|
| 298 |
+
print(current_story)
|
| 299 |
+
|
| 300 |
+
def main():
|
| 301 |
+
"""Menú principal de ejemplos"""
|
| 302 |
+
examples = {
|
| 303 |
+
"1": ("Chat Continuo", lambda: AdvancedLlamaClient("https://tu-usuario-llama-chat.hf.space").continuous_chat()),
|
| 304 |
+
"2": ("Tutor de Física", academic_tutor_example),
|
| 305 |
+
"3": ("Asistente de Programación", programming_assistant_example),
|
| 306 |
+
"4": ("Escritura Creativa", creative_writing_example),
|
| 307 |
+
"5": ("Comparar Temperaturas", lambda: AdvancedLlamaClient("https://tu-usuario-llama-chat.hf.space").compare_temperatures(
|
| 308 |
+
"Explica la inteligencia artificial", [0.3, 0.7, 1.2]
|
| 309 |
+
)),
|
| 310 |
+
}
|
| 311 |
+
|
| 312 |
+
print("🦙 Ejemplos Avanzados - Llama 3.2 Chat")
|
| 313 |
+
print("=" * 45)
|
| 314 |
+
print("IMPORTANTE: Cambia la URL por tu Space real")
|
| 315 |
+
print("=" * 45)
|
| 316 |
+
|
| 317 |
+
while True:
|
| 318 |
+
print("\nSelecciona un ejemplo:")
|
| 319 |
+
for key, (name, _) in examples.items():
|
| 320 |
+
print(f"{key}. {name}")
|
| 321 |
+
print("0. Salir")
|
| 322 |
+
|
| 323 |
+
choice = input("\nOpción: ").strip()
|
| 324 |
+
|
| 325 |
+
if choice == "0":
|
| 326 |
+
print("¡Hasta luego! 🦙")
|
| 327 |
+
break
|
| 328 |
+
elif choice in examples:
|
| 329 |
+
try:
|
| 330 |
+
print(f"\n🚀 Ejecutando: {examples[choice][0]}")
|
| 331 |
+
print("=" * 50)
|
| 332 |
+
examples[choice][1]()
|
| 333 |
+
except Exception as e:
|
| 334 |
+
print(f"Error ejecutando ejemplo: {e}")
|
| 335 |
+
else:
|
| 336 |
+
print("Opción no válida")
|
| 337 |
+
|
| 338 |
+
if __name__ == "__main__":
|
| 339 |
+
main()
|
exported-assets.zip
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8609046f903919fb1ee14e368371096c2c51a6ca518af32af7c05c9fce4f7252
|
| 3 |
+
size 36209
|
requirements.txt
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
|
|
|
|
|
| 1 |
torch>=2.0.0
|
| 2 |
-
transformers>=4.45.0
|
| 3 |
-
gradio>=4.44.0
|
| 4 |
accelerate>=0.20.0
|
| 5 |
bitsandbytes>=0.41.0
|
| 6 |
-
|
| 7 |
-
|
|
|
|
|
|
| 1 |
+
gradio>=4.0.0
|
| 2 |
+
transformers>=4.35.0
|
| 3 |
torch>=2.0.0
|
|
|
|
|
|
|
| 4 |
accelerate>=0.20.0
|
| 5 |
bitsandbytes>=0.41.0
|
| 6 |
+
sentencepiece>=0.1.99
|
| 7 |
+
protobuf>=3.20.0
|
| 8 |
+
requests>=2.28.0
|
script.py
ADDED
|
@@ -0,0 +1,381 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Primero, voy a crear el código completo para el Hugging Face Space
|
| 2 |
+
# que cumpla con todos los requisitos mencionados
|
| 3 |
+
|
| 4 |
+
app_py_code = '''
|
| 5 |
+
import gradio as gr
|
| 6 |
+
import asyncio
|
| 7 |
+
import queue
|
| 8 |
+
import threading
|
| 9 |
+
import time
|
| 10 |
+
import os
|
| 11 |
+
from typing import List, Dict, Optional, Generator, Tuple
|
| 12 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
|
| 13 |
+
import torch
|
| 14 |
+
import json
|
| 15 |
+
from datetime import datetime
|
| 16 |
+
|
| 17 |
+
class LlamaChat:
|
| 18 |
+
def __init__(self):
|
| 19 |
+
self.model_name = "meta-llama/Llama-3.2-3B-Instruct"
|
| 20 |
+
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 21 |
+
self.tokenizer = None
|
| 22 |
+
self.model = None
|
| 23 |
+
self.request_queue = queue.Queue()
|
| 24 |
+
self.is_processing = False
|
| 25 |
+
self.current_streamer = None
|
| 26 |
+
|
| 27 |
+
# Inicializar modelo
|
| 28 |
+
self._load_model()
|
| 29 |
+
|
| 30 |
+
# Iniciar worker thread para procesar colas
|
| 31 |
+
self.worker_thread = threading.Thread(target=self._queue_worker, daemon=True)
|
| 32 |
+
self.worker_thread.start()
|
| 33 |
+
|
| 34 |
+
def _load_model(self):
|
| 35 |
+
"""Cargar el modelo y tokenizer con el token de HF"""
|
| 36 |
+
try:
|
| 37 |
+
hf_token = os.environ.get("HF_TOKEN")
|
| 38 |
+
if not hf_token:
|
| 39 |
+
raise ValueError("HF_TOKEN no encontrado en variables de entorno")
|
| 40 |
+
|
| 41 |
+
print(f"Cargando modelo {self.model_name}...")
|
| 42 |
+
self.tokenizer = AutoTokenizer.from_pretrained(
|
| 43 |
+
self.model_name,
|
| 44 |
+
token=hf_token,
|
| 45 |
+
trust_remote_code=True
|
| 46 |
+
)
|
| 47 |
+
|
| 48 |
+
self.model = AutoModelForCausalLM.from_pretrained(
|
| 49 |
+
self.model_name,
|
| 50 |
+
token=hf_token,
|
| 51 |
+
torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
|
| 52 |
+
device_map="auto" if self.device == "cuda" else None,
|
| 53 |
+
trust_remote_code=True
|
| 54 |
+
)
|
| 55 |
+
|
| 56 |
+
if self.tokenizer.pad_token is None:
|
| 57 |
+
self.tokenizer.pad_token = self.tokenizer.eos_token
|
| 58 |
+
|
| 59 |
+
print("Modelo cargado exitosamente!")
|
| 60 |
+
|
| 61 |
+
except Exception as e:
|
| 62 |
+
print(f"Error cargando modelo: {e}")
|
| 63 |
+
raise
|
| 64 |
+
|
| 65 |
+
def _format_messages(self, system_prompt: str, message: str, history: List[List[str]]) -> str:
|
| 66 |
+
"""Formatear mensajes para Llama-3.2-Instruct"""
|
| 67 |
+
messages = []
|
| 68 |
+
|
| 69 |
+
if system_prompt:
|
| 70 |
+
messages.append({"role": "system", "content": system_prompt})
|
| 71 |
+
|
| 72 |
+
# Agregar historial
|
| 73 |
+
for user_msg, assistant_msg in history:
|
| 74 |
+
messages.append({"role": "user", "content": user_msg})
|
| 75 |
+
messages.append({"role": "assistant", "content": assistant_msg})
|
| 76 |
+
|
| 77 |
+
# Agregar mensaje actual
|
| 78 |
+
messages.append({"role": "user", "content": message})
|
| 79 |
+
|
| 80 |
+
# Usar el chat template del tokenizer
|
| 81 |
+
formatted_prompt = self.tokenizer.apply_chat_template(
|
| 82 |
+
messages,
|
| 83 |
+
tokenize=False,
|
| 84 |
+
add_generation_prompt=True
|
| 85 |
+
)
|
| 86 |
+
|
| 87 |
+
return formatted_prompt
|
| 88 |
+
|
| 89 |
+
def _queue_worker(self):
|
| 90 |
+
"""Worker thread para procesar cola de requests"""
|
| 91 |
+
while True:
|
| 92 |
+
try:
|
| 93 |
+
if not self.request_queue.empty():
|
| 94 |
+
request = self.request_queue.get()
|
| 95 |
+
self.is_processing = True
|
| 96 |
+
self._process_request(request)
|
| 97 |
+
self.is_processing = False
|
| 98 |
+
self.request_queue.task_done()
|
| 99 |
+
else:
|
| 100 |
+
time.sleep(0.1)
|
| 101 |
+
except Exception as e:
|
| 102 |
+
print(f"Error en queue worker: {e}")
|
| 103 |
+
self.is_processing = False
|
| 104 |
+
|
| 105 |
+
def _process_request(self, request: Dict):
|
| 106 |
+
"""Procesar una request individual"""
|
| 107 |
+
try:
|
| 108 |
+
system_prompt = request["system_prompt"]
|
| 109 |
+
message = request["message"]
|
| 110 |
+
history = request["history"]
|
| 111 |
+
max_tokens = request.get("max_tokens", 512)
|
| 112 |
+
temperature = request.get("temperature", 0.7)
|
| 113 |
+
response_callback = request["callback"]
|
| 114 |
+
|
| 115 |
+
# Formatear prompt
|
| 116 |
+
formatted_prompt = self._format_messages(system_prompt, message, history)
|
| 117 |
+
|
| 118 |
+
# Tokenizar
|
| 119 |
+
inputs = self.tokenizer(
|
| 120 |
+
formatted_prompt,
|
| 121 |
+
return_tensors="pt",
|
| 122 |
+
truncation=True,
|
| 123 |
+
max_length=2048
|
| 124 |
+
).to(self.device)
|
| 125 |
+
|
| 126 |
+
# Configurar streamer
|
| 127 |
+
streamer = TextIteratorStreamer(
|
| 128 |
+
self.tokenizer,
|
| 129 |
+
timeout=60,
|
| 130 |
+
skip_prompt=True,
|
| 131 |
+
skip_special_tokens=True
|
| 132 |
+
)
|
| 133 |
+
self.current_streamer = streamer
|
| 134 |
+
|
| 135 |
+
# Configurar parámetros de generación
|
| 136 |
+
generation_kwargs = {
|
| 137 |
+
**inputs,
|
| 138 |
+
"max_new_tokens": max_tokens,
|
| 139 |
+
"temperature": temperature,
|
| 140 |
+
"do_sample": True,
|
| 141 |
+
"pad_token_id": self.tokenizer.eos_token_id,
|
| 142 |
+
"streamer": streamer,
|
| 143 |
+
"repetition_penalty": 1.1
|
| 144 |
+
}
|
| 145 |
+
|
| 146 |
+
# Generar en thread separado
|
| 147 |
+
def generate():
|
| 148 |
+
with torch.no_grad():
|
| 149 |
+
self.model.generate(**generation_kwargs)
|
| 150 |
+
|
| 151 |
+
generation_thread = threading.Thread(target=generate)
|
| 152 |
+
generation_thread.start()
|
| 153 |
+
|
| 154 |
+
# Stream respuesta
|
| 155 |
+
full_response = ""
|
| 156 |
+
for new_text in streamer:
|
| 157 |
+
if new_text:
|
| 158 |
+
full_response += new_text
|
| 159 |
+
response_callback(full_response, False)
|
| 160 |
+
|
| 161 |
+
response_callback(full_response, True)
|
| 162 |
+
generation_thread.join()
|
| 163 |
+
|
| 164 |
+
except Exception as e:
|
| 165 |
+
print(f"Error procesando request: {e}")
|
| 166 |
+
response_callback(f"Error: {str(e)}", True)
|
| 167 |
+
finally:
|
| 168 |
+
self.current_streamer = None
|
| 169 |
+
|
| 170 |
+
def chat_stream(self, system_prompt: str, message: str, history: List[List[str]],
|
| 171 |
+
max_tokens: int = 512, temperature: float = 0.7) -> Generator[Tuple[str, bool], None, None]:
|
| 172 |
+
"""Método principal para chatear con streaming"""
|
| 173 |
+
if not message.strip():
|
| 174 |
+
yield "Por favor, escribe un mensaje.", True
|
| 175 |
+
return
|
| 176 |
+
|
| 177 |
+
# Crear evento para comunicación con el worker
|
| 178 |
+
response_queue = queue.Queue()
|
| 179 |
+
response_complete = threading.Event()
|
| 180 |
+
current_response = [""]
|
| 181 |
+
|
| 182 |
+
def response_callback(text: str, is_complete: bool):
|
| 183 |
+
current_response[0] = text
|
| 184 |
+
response_queue.put((text, is_complete))
|
| 185 |
+
if is_complete:
|
| 186 |
+
response_complete.set()
|
| 187 |
+
|
| 188 |
+
# Agregar request a la cola
|
| 189 |
+
request = {
|
| 190 |
+
"system_prompt": system_prompt or "",
|
| 191 |
+
"message": message,
|
| 192 |
+
"history": history or [],
|
| 193 |
+
"max_tokens": max_tokens,
|
| 194 |
+
"temperature": temperature,
|
| 195 |
+
"callback": response_callback
|
| 196 |
+
}
|
| 197 |
+
|
| 198 |
+
self.request_queue.put(request)
|
| 199 |
+
|
| 200 |
+
# Esperar y streamear respuesta
|
| 201 |
+
while not response_complete.is_set():
|
| 202 |
+
try:
|
| 203 |
+
text, is_complete = response_queue.get(timeout=0.1)
|
| 204 |
+
yield text, is_complete
|
| 205 |
+
if is_complete:
|
| 206 |
+
break
|
| 207 |
+
except queue.Empty:
|
| 208 |
+
# Si no hay nuevos tokens, yield el último estado
|
| 209 |
+
if current_response[0]:
|
| 210 |
+
yield current_response[0], False
|
| 211 |
+
continue
|
| 212 |
+
|
| 213 |
+
def get_queue_status(self) -> Dict[str, any]:
|
| 214 |
+
"""Obtener estado de la cola"""
|
| 215 |
+
return {
|
| 216 |
+
"queue_size": self.request_queue.qsize(),
|
| 217 |
+
"is_processing": self.is_processing,
|
| 218 |
+
"timestamp": datetime.now().isoformat()
|
| 219 |
+
}
|
| 220 |
+
|
| 221 |
+
# Inicializar el chat
|
| 222 |
+
chat_instance = LlamaChat()
|
| 223 |
+
|
| 224 |
+
# Función para la interfaz de Gradio
|
| 225 |
+
def chat_interface(message: str, history: List[List[str]], system_prompt: str,
|
| 226 |
+
max_tokens: int, temperature: float):
|
| 227 |
+
"""Interfaz de chat para Gradio"""
|
| 228 |
+
for response, is_complete in chat_instance.chat_stream(
|
| 229 |
+
system_prompt, message, history, max_tokens, temperature
|
| 230 |
+
):
|
| 231 |
+
if not is_complete:
|
| 232 |
+
# Para Gradio, necesitamos devolver el historial completo
|
| 233 |
+
new_history = history + [[message, response]]
|
| 234 |
+
yield new_history, ""
|
| 235 |
+
else:
|
| 236 |
+
final_history = history + [[message, response]]
|
| 237 |
+
yield final_history, ""
|
| 238 |
+
|
| 239 |
+
# Función para API Python
|
| 240 |
+
def api_chat(system_prompt: str = "", message: str = "", history: List[List[str]] = None,
|
| 241 |
+
max_tokens: int = 512, temperature: float = 0.7) -> Dict:
|
| 242 |
+
"""API para cliente Python"""
|
| 243 |
+
if history is None:
|
| 244 |
+
history = []
|
| 245 |
+
|
| 246 |
+
full_response = ""
|
| 247 |
+
for response, is_complete in chat_instance.chat_stream(
|
| 248 |
+
system_prompt, message, history, max_tokens, temperature
|
| 249 |
+
):
|
| 250 |
+
full_response = response
|
| 251 |
+
if is_complete:
|
| 252 |
+
break
|
| 253 |
+
|
| 254 |
+
return {
|
| 255 |
+
"response": full_response,
|
| 256 |
+
"queue_status": chat_instance.get_queue_status()
|
| 257 |
+
}
|
| 258 |
+
|
| 259 |
+
# Función para streaming API
|
| 260 |
+
def api_chat_stream(system_prompt: str = "", message: str = "", history: List[List[str]] = None,
|
| 261 |
+
max_tokens: int = 512, temperature: float = 0.7):
|
| 262 |
+
"""API streaming para cliente Python"""
|
| 263 |
+
if history is None:
|
| 264 |
+
history = []
|
| 265 |
+
|
| 266 |
+
for response, is_complete in chat_instance.chat_stream(
|
| 267 |
+
system_prompt, message, history, max_tokens, temperature
|
| 268 |
+
):
|
| 269 |
+
yield {
|
| 270 |
+
"response": response,
|
| 271 |
+
"is_complete": is_complete,
|
| 272 |
+
"queue_status": chat_instance.get_queue_status()
|
| 273 |
+
}
|
| 274 |
+
|
| 275 |
+
# Crear interfaz de Gradio
|
| 276 |
+
with gr.Blocks(title="Llama 3.2 3B Chat", theme=gr.themes.Soft()) as app:
|
| 277 |
+
gr.Markdown("# 🦙 Llama 3.2 3B Instruct Chat")
|
| 278 |
+
gr.Markdown("Chat con Meta Llama 3.2 3B con sistema de colas y streaming")
|
| 279 |
+
|
| 280 |
+
with gr.Row():
|
| 281 |
+
with gr.Column(scale=3):
|
| 282 |
+
chatbot = gr.Chatbot(height=500, show_label=False)
|
| 283 |
+
msg = gr.Textbox(
|
| 284 |
+
label="Mensaje",
|
| 285 |
+
placeholder="Escribe tu mensaje aquí...",
|
| 286 |
+
lines=2
|
| 287 |
+
)
|
| 288 |
+
|
| 289 |
+
with gr.Row():
|
| 290 |
+
send_btn = gr.Button("Enviar", variant="primary")
|
| 291 |
+
clear_btn = gr.Button("Limpiar")
|
| 292 |
+
|
| 293 |
+
with gr.Column(scale=1):
|
| 294 |
+
system_prompt = gr.Textbox(
|
| 295 |
+
label="System Prompt",
|
| 296 |
+
placeholder="Eres un asistente útil...",
|
| 297 |
+
lines=5,
|
| 298 |
+
value="Eres un asistente de IA útil y amigable. Responde de manera clara y concisa."
|
| 299 |
+
)
|
| 300 |
+
|
| 301 |
+
max_tokens = gr.Slider(
|
| 302 |
+
minimum=50,
|
| 303 |
+
maximum=1024,
|
| 304 |
+
value=512,
|
| 305 |
+
step=50,
|
| 306 |
+
label="Max Tokens"
|
| 307 |
+
)
|
| 308 |
+
|
| 309 |
+
temperature = gr.Slider(
|
| 310 |
+
minimum=0.1,
|
| 311 |
+
maximum=2.0,
|
| 312 |
+
value=0.7,
|
| 313 |
+
step=0.1,
|
| 314 |
+
label="Temperature"
|
| 315 |
+
)
|
| 316 |
+
|
| 317 |
+
gr.Markdown("### Estado de la Cola")
|
| 318 |
+
queue_status = gr.JSON(label="Queue Status", value={})
|
| 319 |
+
|
| 320 |
+
# Botón para actualizar estado
|
| 321 |
+
refresh_btn = gr.Button("Actualizar Estado")
|
| 322 |
+
|
| 323 |
+
# Event handlers
|
| 324 |
+
def send_message(message, history, sys_prompt, max_tok, temp):
|
| 325 |
+
if not message.strip():
|
| 326 |
+
return history, ""
|
| 327 |
+
|
| 328 |
+
yield from chat_interface(message, history, sys_prompt, max_tok, temp)
|
| 329 |
+
|
| 330 |
+
def clear_chat():
|
| 331 |
+
return [], ""
|
| 332 |
+
|
| 333 |
+
def update_queue_status():
|
| 334 |
+
return chat_instance.get_queue_status()
|
| 335 |
+
|
| 336 |
+
# Conectar eventos
|
| 337 |
+
send_btn.click(
|
| 338 |
+
send_message,
|
| 339 |
+
inputs=[msg, chatbot, system_prompt, max_tokens, temperature],
|
| 340 |
+
outputs=[chatbot, msg]
|
| 341 |
+
)
|
| 342 |
+
|
| 343 |
+
msg.submit(
|
| 344 |
+
send_message,
|
| 345 |
+
inputs=[msg, chatbot, system_prompt, max_tokens, temperature],
|
| 346 |
+
outputs=[chatbot, msg]
|
| 347 |
+
)
|
| 348 |
+
|
| 349 |
+
clear_btn.click(clear_chat, outputs=[chatbot, msg])
|
| 350 |
+
refresh_btn.click(update_queue_status, outputs=[queue_status])
|
| 351 |
+
|
| 352 |
+
# Actualizar estado cada 5 segundos
|
| 353 |
+
app.load(update_queue_status, outputs=[queue_status], every=5)
|
| 354 |
+
|
| 355 |
+
# Crear API endpoints
|
| 356 |
+
api_app = gr.Interface(
|
| 357 |
+
fn=api_chat,
|
| 358 |
+
inputs=[
|
| 359 |
+
gr.Textbox(label="System Prompt"),
|
| 360 |
+
gr.Textbox(label="Message"),
|
| 361 |
+
gr.JSON(label="History"),
|
| 362 |
+
gr.Slider(50, 1024, 512, label="Max Tokens"),
|
| 363 |
+
gr.Slider(0.1, 2.0, 0.7, label="Temperature")
|
| 364 |
+
],
|
| 365 |
+
outputs=gr.JSON(label="Response"),
|
| 366 |
+
title="Llama Chat API",
|
| 367 |
+
description="API endpoint para cliente Python"
|
| 368 |
+
)
|
| 369 |
+
|
| 370 |
+
# Combinar apps
|
| 371 |
+
final_app = gr.TabbedInterface(
|
| 372 |
+
[app, api_app],
|
| 373 |
+
["💬 Chat Interface", "🔌 API Endpoint"]
|
| 374 |
+
)
|
| 375 |
+
|
| 376 |
+
if __name__ == "__main__":
|
| 377 |
+
final_app.launch(server_name="0.0.0.0", server_port=7860, share=True)
|
| 378 |
+
'''
|
| 379 |
+
|
| 380 |
+
print("Código generado para app.py")
|
| 381 |
+
print("=" * 50)
|
script_1.py
ADDED
|
@@ -0,0 +1,202 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Crear requirements.txt
|
| 2 |
+
requirements_txt = '''gradio>=4.0.0
|
| 3 |
+
transformers>=4.35.0
|
| 4 |
+
torch>=2.0.0
|
| 5 |
+
accelerate>=0.20.0
|
| 6 |
+
bitsandbytes>=0.41.0
|
| 7 |
+
sentencepiece>=0.1.99
|
| 8 |
+
protobuf>=3.20.0
|
| 9 |
+
requests>=2.28.0
|
| 10 |
+
'''
|
| 11 |
+
|
| 12 |
+
print("Requirements.txt:")
|
| 13 |
+
print(requirements_txt)
|
| 14 |
+
print("=" * 50)
|
| 15 |
+
|
| 16 |
+
# Crear cliente Python de ejemplo
|
| 17 |
+
client_code = '''
|
| 18 |
+
import requests
|
| 19 |
+
import json
|
| 20 |
+
import time
|
| 21 |
+
from typing import List, Dict, Generator, Optional
|
| 22 |
+
|
| 23 |
+
class LlamaClient:
|
| 24 |
+
def __init__(self, base_url: str):
|
| 25 |
+
"""
|
| 26 |
+
Cliente para interactuar con el Hugging Face Space de Llama Chat
|
| 27 |
+
|
| 28 |
+
Args:
|
| 29 |
+
base_url: URL base del Space (ej: "https://tu-usuario-llama-chat.hf.space")
|
| 30 |
+
"""
|
| 31 |
+
self.base_url = base_url.rstrip('/')
|
| 32 |
+
self.api_endpoint = f"{self.base_url}/call/api_chat"
|
| 33 |
+
self.stream_endpoint = f"{self.base_url}/call/api_chat_stream"
|
| 34 |
+
|
| 35 |
+
def chat(self, message: str, system_prompt: str = "", history: List[List[str]] = None,
|
| 36 |
+
max_tokens: int = 512, temperature: float = 0.7) -> Dict:
|
| 37 |
+
"""
|
| 38 |
+
Enviar un mensaje y recibir respuesta completa
|
| 39 |
+
|
| 40 |
+
Args:
|
| 41 |
+
message: Mensaje del usuario
|
| 42 |
+
system_prompt: Prompt del sistema (opcional)
|
| 43 |
+
history: Historial de conversación [[user, assistant], ...]
|
| 44 |
+
max_tokens: Máximo número de tokens a generar
|
| 45 |
+
temperature: Temperatura para la generación
|
| 46 |
+
|
| 47 |
+
Returns:
|
| 48 |
+
Dict con 'response' y 'queue_status'
|
| 49 |
+
"""
|
| 50 |
+
if history is None:
|
| 51 |
+
history = []
|
| 52 |
+
|
| 53 |
+
payload = {
|
| 54 |
+
"data": [system_prompt, message, history, max_tokens, temperature]
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
try:
|
| 58 |
+
response = requests.post(self.api_endpoint, json=payload, timeout=300)
|
| 59 |
+
response.raise_for_status()
|
| 60 |
+
|
| 61 |
+
result = response.json()
|
| 62 |
+
return result.get("data", [{}])[0]
|
| 63 |
+
|
| 64 |
+
except requests.exceptions.RequestException as e:
|
| 65 |
+
return {"error": f"Error de conexión: {str(e)}"}
|
| 66 |
+
except json.JSONDecodeError as e:
|
| 67 |
+
return {"error": f"Error decodificando JSON: {str(e)}"}
|
| 68 |
+
|
| 69 |
+
def chat_stream(self, message: str, system_prompt: str = "", history: List[List[str]] = None,
|
| 70 |
+
max_tokens: int = 512, temperature: float = 0.7) -> Generator[Dict, None, None]:
|
| 71 |
+
"""
|
| 72 |
+
Enviar un mensaje y recibir respuesta en streaming
|
| 73 |
+
|
| 74 |
+
Args:
|
| 75 |
+
message: Mensaje del usuario
|
| 76 |
+
system_prompt: Prompt del sistema (opcional)
|
| 77 |
+
history: Historial de conversación
|
| 78 |
+
max_tokens: Máximo número de tokens a generar
|
| 79 |
+
temperature: Temperatura para la generación
|
| 80 |
+
|
| 81 |
+
Yields:
|
| 82 |
+
Dict con 'response', 'is_complete' y 'queue_status'
|
| 83 |
+
"""
|
| 84 |
+
if history is None:
|
| 85 |
+
history = []
|
| 86 |
+
|
| 87 |
+
payload = {
|
| 88 |
+
"data": [system_prompt, message, history, max_tokens, temperature]
|
| 89 |
+
}
|
| 90 |
+
|
| 91 |
+
try:
|
| 92 |
+
response = requests.post(self.stream_endpoint, json=payload, stream=True, timeout=300)
|
| 93 |
+
response.raise_for_status()
|
| 94 |
+
|
| 95 |
+
for line in response.iter_lines():
|
| 96 |
+
if line:
|
| 97 |
+
try:
|
| 98 |
+
data = json.loads(line.decode('utf-8'))
|
| 99 |
+
if "data" in data:
|
| 100 |
+
yield data["data"][0]
|
| 101 |
+
except json.JSONDecodeError:
|
| 102 |
+
continue
|
| 103 |
+
|
| 104 |
+
except requests.exceptions.RequestException as e:
|
| 105 |
+
yield {"error": f"Error de conexión: {str(e)}", "is_complete": True}
|
| 106 |
+
|
| 107 |
+
def get_queue_status(self) -> Dict:
|
| 108 |
+
"""
|
| 109 |
+
Obtener estado actual de la cola
|
| 110 |
+
|
| 111 |
+
Returns:
|
| 112 |
+
Dict con información del estado de la cola
|
| 113 |
+
"""
|
| 114 |
+
try:
|
| 115 |
+
# Hacer una request vacía solo para obtener el estado
|
| 116 |
+
result = self.chat("", max_tokens=1)
|
| 117 |
+
return result.get("queue_status", {})
|
| 118 |
+
except Exception as e:
|
| 119 |
+
return {"error": str(e)}
|
| 120 |
+
|
| 121 |
+
# Ejemplo de uso del cliente
|
| 122 |
+
def example_usage():
|
| 123 |
+
"""Ejemplo de cómo usar el cliente"""
|
| 124 |
+
|
| 125 |
+
# Inicializar cliente (reemplaza con tu URL del Space)
|
| 126 |
+
client = LlamaClient("https://tu-usuario-llama-chat.hf.space")
|
| 127 |
+
|
| 128 |
+
print("=== Ejemplo 1: Chat simple ===")
|
| 129 |
+
response = client.chat(
|
| 130 |
+
message="¿Qué es la inteligencia artificial?",
|
| 131 |
+
system_prompt="Eres un profesor de informática experto."
|
| 132 |
+
)
|
| 133 |
+
|
| 134 |
+
if "error" in response:
|
| 135 |
+
print(f"Error: {response['error']}")
|
| 136 |
+
else:
|
| 137 |
+
print(f"Respuesta: {response['response']}")
|
| 138 |
+
print(f"Estado cola: {response['queue_status']}")
|
| 139 |
+
|
| 140 |
+
print("\\n=== Ejemplo 2: Chat con historial ===")
|
| 141 |
+
history = [
|
| 142 |
+
["Hola", "¡Hola! ¿En qué puedo ayudarte?"],
|
| 143 |
+
["¿Cuál es tu nombre?", "Soy un asistente de IA basado en Llama 3.2."]
|
| 144 |
+
]
|
| 145 |
+
|
| 146 |
+
response = client.chat(
|
| 147 |
+
message="¿Puedes explicarme conceptos de física?",
|
| 148 |
+
system_prompt="Eres un tutor de física para estudiantes de bachillerato.",
|
| 149 |
+
history=history
|
| 150 |
+
)
|
| 151 |
+
|
| 152 |
+
if "error" in response:
|
| 153 |
+
print(f"Error: {response['error']}")
|
| 154 |
+
else:
|
| 155 |
+
print(f"Respuesta: {response['response']}")
|
| 156 |
+
|
| 157 |
+
print("\\n=== Ejemplo 3: Chat con streaming ===")
|
| 158 |
+
print("Pregunta: Explica la teoría de la relatividad")
|
| 159 |
+
print("Respuesta (streaming):")
|
| 160 |
+
|
| 161 |
+
for chunk in client.chat_stream(
|
| 162 |
+
message="Explica la teoría de la relatividad de forma simple",
|
| 163 |
+
system_prompt="Eres un divulgador científico.",
|
| 164 |
+
max_tokens=300,
|
| 165 |
+
temperature=0.8
|
| 166 |
+
):
|
| 167 |
+
if "error" in chunk:
|
| 168 |
+
print(f"Error: {chunk['error']}")
|
| 169 |
+
break
|
| 170 |
+
|
| 171 |
+
print(f"\\r{chunk['response']}", end="", flush=True)
|
| 172 |
+
|
| 173 |
+
if chunk.get("is_complete", False):
|
| 174 |
+
print("\\n[Respuesta completa]")
|
| 175 |
+
print(f"Estado cola: {chunk['queue_status']}")
|
| 176 |
+
break
|
| 177 |
+
|
| 178 |
+
print("\\n=== Ejemplo 4: Verificar estado de cola ===")
|
| 179 |
+
status = client.get_queue_status()
|
| 180 |
+
print(f"Estado actual: {status}")
|
| 181 |
+
|
| 182 |
+
if __name__ == "__main__":
|
| 183 |
+
example_usage()
|
| 184 |
+
'''
|
| 185 |
+
|
| 186 |
+
print("Cliente Python (client.py):")
|
| 187 |
+
print(client_code)
|
| 188 |
+
|
| 189 |
+
# Escribir los archivos
|
| 190 |
+
with open("app.py", "w", encoding="utf-8") as f:
|
| 191 |
+
f.write(app_py_code)
|
| 192 |
+
|
| 193 |
+
with open("requirements.txt", "w", encoding="utf-8") as f:
|
| 194 |
+
f.write(requirements_txt)
|
| 195 |
+
|
| 196 |
+
with open("client.py", "w", encoding="utf-8") as f:
|
| 197 |
+
f.write(client_code)
|
| 198 |
+
|
| 199 |
+
print("\\nArchivos creados:")
|
| 200 |
+
print("- app.py (aplicación principal)")
|
| 201 |
+
print("- requirements.txt (dependencias)")
|
| 202 |
+
print("- client.py (cliente Python de ejemplo)")
|
script_2.py
ADDED
|
@@ -0,0 +1,535 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Crear README.md con instrucciones completas
|
| 2 |
+
readme_content = '''# 🦙 Llama 3.2 3B Chat - Hugging Face Space
|
| 3 |
+
|
| 4 |
+
Un Space de Hugging Face para chatear con Meta Llama 3.2 3B Instruct con sistema de colas, streaming y API para cliente Python.
|
| 5 |
+
|
| 6 |
+
## ✨ Características
|
| 7 |
+
|
| 8 |
+
- 🔄 **Sistema de colas**: Solo procesa una petición a la vez para evitar sobrecargar el modelo
|
| 9 |
+
- 📡 **Streaming en tiempo real**: Ve la respuesta generándose en tiempo real
|
| 10 |
+
- 🐍 **Cliente Python**: API completa para integración con aplicaciones Python
|
| 11 |
+
- 💬 **Interfaz web**: Chat interactivo con sistema de prompts y configuración
|
| 12 |
+
- 📊 **Monitoreo**: Estado de cola en tiempo real
|
| 13 |
+
- 🔐 **Autenticación**: Soporte para modelos restringidos con HF token
|
| 14 |
+
|
| 15 |
+
## 🚀 Configuración del Space
|
| 16 |
+
|
| 17 |
+
### 1. Crear el Space
|
| 18 |
+
|
| 19 |
+
1. Ve a [Hugging Face Spaces](https://huggingface.co/new-space)
|
| 20 |
+
2. Elige **Gradio** como SDK
|
| 21 |
+
3. Selecciona **T4 small** o superior como hardware
|
| 22 |
+
4. Nombra tu Space (ej: `tu-usuario/llama-chat`)
|
| 23 |
+
|
| 24 |
+
### 2. Configurar el token HF
|
| 25 |
+
|
| 26 |
+
1. Ve a **Settings** de tu Space
|
| 27 |
+
2. En **Repository secrets**, agrega:
|
| 28 |
+
- **Name**: `HF_TOKEN`
|
| 29 |
+
- **Value**: Tu token de Hugging Face (con acceso a Llama)
|
| 30 |
+
|
| 31 |
+
### 3. Subir archivos
|
| 32 |
+
|
| 33 |
+
Sube estos archivos a tu Space:
|
| 34 |
+
- `app.py` (aplicación principal)
|
| 35 |
+
- `requirements.txt` (dependencias)
|
| 36 |
+
|
| 37 |
+
### 4. Verificar el despliegue
|
| 38 |
+
|
| 39 |
+
Una vez que el Space esté corriendo, deberías ver:
|
| 40 |
+
- Una interfaz de chat en la pestaña principal
|
| 41 |
+
- Un endpoint API en la segunda pestaña
|
| 42 |
+
- Estado de cola actualizado automáticamente
|
| 43 |
+
|
| 44 |
+
## 📱 Uso de la interfaz web
|
| 45 |
+
|
| 46 |
+
### Chat Principal
|
| 47 |
+
- **System Prompt**: Define el comportamiento del asistente
|
| 48 |
+
- **Mensaje**: Tu pregunta o mensaje
|
| 49 |
+
- **Max Tokens**: Longitud máxima de la respuesta (50-1024)
|
| 50 |
+
- **Temperature**: Creatividad de la respuesta (0.1-2.0)
|
| 51 |
+
|
| 52 |
+
### Estado de Cola
|
| 53 |
+
- **queue_size**: Número de peticiones en espera
|
| 54 |
+
- **is_processing**: Si está procesando actualmente
|
| 55 |
+
- **timestamp**: Última actualización
|
| 56 |
+
|
| 57 |
+
## 🐍 Cliente Python
|
| 58 |
+
|
| 59 |
+
### Instalación
|
| 60 |
+
|
| 61 |
+
```bash
|
| 62 |
+
pip install requests
|
| 63 |
+
```
|
| 64 |
+
|
| 65 |
+
### Uso Básico
|
| 66 |
+
|
| 67 |
+
```python
|
| 68 |
+
from client import LlamaClient
|
| 69 |
+
|
| 70 |
+
# Inicializar cliente con la URL de tu Space
|
| 71 |
+
client = LlamaClient("https://tu-usuario-llama-chat.hf.space")
|
| 72 |
+
|
| 73 |
+
# Chat simple
|
| 74 |
+
response = client.chat(
|
| 75 |
+
message="¿Qué es la inteligencia artificial?",
|
| 76 |
+
system_prompt="Eres un profesor experto."
|
| 77 |
+
)
|
| 78 |
+
|
| 79 |
+
print(response["response"])
|
| 80 |
+
```
|
| 81 |
+
|
| 82 |
+
### Chat con Streaming
|
| 83 |
+
|
| 84 |
+
```python
|
| 85 |
+
# Ver respuesta generándose en tiempo real
|
| 86 |
+
for chunk in client.chat_stream(
|
| 87 |
+
message="Explica la física cuántica",
|
| 88 |
+
system_prompt="Eres un divulgador científico.",
|
| 89 |
+
max_tokens=300
|
| 90 |
+
):
|
| 91 |
+
print(f"\\r{chunk['response']}", end="", flush=True)
|
| 92 |
+
|
| 93 |
+
if chunk.get("is_complete", False):
|
| 94 |
+
print("\\n[Completo]")
|
| 95 |
+
break
|
| 96 |
+
```
|
| 97 |
+
|
| 98 |
+
### Chat con Historial
|
| 99 |
+
|
| 100 |
+
```python
|
| 101 |
+
# Mantener conversación
|
| 102 |
+
history = [
|
| 103 |
+
["Hola", "¡Hola! ¿En qué puedo ayudarte?"],
|
| 104 |
+
["Explica el machine learning", "El machine learning es..."]
|
| 105 |
+
]
|
| 106 |
+
|
| 107 |
+
response = client.chat(
|
| 108 |
+
message="¿Puedes dar un ejemplo práctico?",
|
| 109 |
+
history=history
|
| 110 |
+
)
|
| 111 |
+
```
|
| 112 |
+
|
| 113 |
+
## 🔧 API Endpoints
|
| 114 |
+
|
| 115 |
+
### POST /call/api_chat
|
| 116 |
+
Respuesta completa sin streaming.
|
| 117 |
+
|
| 118 |
+
**Payload:**
|
| 119 |
+
```json
|
| 120 |
+
{
|
| 121 |
+
"data": [
|
| 122 |
+
"system_prompt",
|
| 123 |
+
"message",
|
| 124 |
+
[["user", "assistant"], ...],
|
| 125 |
+
512,
|
| 126 |
+
0.7
|
| 127 |
+
]
|
| 128 |
+
}
|
| 129 |
+
```
|
| 130 |
+
|
| 131 |
+
**Respuesta:**
|
| 132 |
+
```json
|
| 133 |
+
{
|
| 134 |
+
"data": [{
|
| 135 |
+
"response": "Respuesta del modelo",
|
| 136 |
+
"queue_status": {
|
| 137 |
+
"queue_size": 0,
|
| 138 |
+
"is_processing": false,
|
| 139 |
+
"timestamp": "2025-10-16T17:30:00"
|
| 140 |
+
}
|
| 141 |
+
}]
|
| 142 |
+
}
|
| 143 |
+
```
|
| 144 |
+
|
| 145 |
+
### POST /call/api_chat_stream
|
| 146 |
+
Respuesta con streaming.
|
| 147 |
+
|
| 148 |
+
Misma estructura de payload, pero responde con eventos SSE.
|
| 149 |
+
|
| 150 |
+
## 📊 Monitoreo y Debugging
|
| 151 |
+
|
| 152 |
+
### Logs del Space
|
| 153 |
+
Revisa los logs en la interfaz de HF Spaces para debugging.
|
| 154 |
+
|
| 155 |
+
### Estado de Cola
|
| 156 |
+
Usa `client.get_queue_status()` para monitorear la cola:
|
| 157 |
+
|
| 158 |
+
```python
|
| 159 |
+
status = client.get_queue_status()
|
| 160 |
+
print(f"Cola: {status['queue_size']} peticiones")
|
| 161 |
+
print(f"Procesando: {status['is_processing']}")
|
| 162 |
+
```
|
| 163 |
+
|
| 164 |
+
### Manejo de Errores
|
| 165 |
+
|
| 166 |
+
```python
|
| 167 |
+
response = client.chat("Hola")
|
| 168 |
+
|
| 169 |
+
if "error" in response:
|
| 170 |
+
print(f"Error: {response['error']}")
|
| 171 |
+
else:
|
| 172 |
+
print(f"Respuesta: {response['response']}")
|
| 173 |
+
```
|
| 174 |
+
|
| 175 |
+
## ⚙️ Configuración Avanzada
|
| 176 |
+
|
| 177 |
+
### Parámetros del Modelo
|
| 178 |
+
|
| 179 |
+
- **max_tokens**: 50-1024 (recomendado: 512)
|
| 180 |
+
- **temperature**: 0.1-2.0 (recomendado: 0.7)
|
| 181 |
+
- **repetition_penalty**: Automático (1.1)
|
| 182 |
+
|
| 183 |
+
### Optimización de Performance
|
| 184 |
+
|
| 185 |
+
1. **Hardware**: Usa GPU T4 small mínimo
|
| 186 |
+
2. **Batch size**: Sistema de colas evita problemas de memoria
|
| 187 |
+
3. **Context length**: Máximo 2048 tokens de entrada
|
| 188 |
+
|
| 189 |
+
### System Prompts Útiles
|
| 190 |
+
|
| 191 |
+
```python
|
| 192 |
+
# Para tareas académicas
|
| 193 |
+
system_prompt = "Eres un tutor experto que explica conceptos complejos de forma clara y pedagógica."
|
| 194 |
+
|
| 195 |
+
# Para programación
|
| 196 |
+
system_prompt = "Eres un desarrollador senior que ayuda con código Python, explicando paso a paso."
|
| 197 |
+
|
| 198 |
+
# Para creatividad
|
| 199 |
+
system_prompt = "Eres un escritor creativo que ayuda a generar ideas originales y contenido engagente."
|
| 200 |
+
```
|
| 201 |
+
|
| 202 |
+
## 🐛 Troubleshooting
|
| 203 |
+
|
| 204 |
+
### Error: HF_TOKEN no encontrado
|
| 205 |
+
- Verifica que agregaste el token en Repository secrets
|
| 206 |
+
- Asegúrate que el nombre sea exactamente `HF_TOKEN`
|
| 207 |
+
|
| 208 |
+
### Error: Modelo no disponible
|
| 209 |
+
- Tu token debe tener acceso a Llama 3.2 3B
|
| 210 |
+
- Solicita acceso en la página del modelo si es necesario
|
| 211 |
+
|
| 212 |
+
### Timeouts en cliente Python
|
| 213 |
+
- Aumenta el timeout: `requests.post(..., timeout=600)`
|
| 214 |
+
- El modelo puede tardar en cargar la primera vez
|
| 215 |
+
|
| 216 |
+
### Cola muy larga
|
| 217 |
+
- El sistema procesa una petición a la vez
|
| 218 |
+
- Considera usar hardware más potente
|
| 219 |
+
|
| 220 |
+
## 🤝 Contribuciones
|
| 221 |
+
|
| 222 |
+
¿Mejoras sugeridas?
|
| 223 |
+
1. Fork el código
|
| 224 |
+
2. Implementa mejoras
|
| 225 |
+
3. Prueba con tu propio Space
|
| 226 |
+
4. Comparte tu versión
|
| 227 |
+
|
| 228 |
+
## 📝 Licencia
|
| 229 |
+
|
| 230 |
+
Este código es de uso libre. Respeta los términos de uso de:
|
| 231 |
+
- Hugging Face Spaces
|
| 232 |
+
- Meta Llama 3.2 License
|
| 233 |
+
- Gradio License
|
| 234 |
+
|
| 235 |
+
## 🔗 Enlaces Útiles
|
| 236 |
+
|
| 237 |
+
- [Hugging Face Spaces](https://huggingface.co/spaces)
|
| 238 |
+
- [Meta Llama 3.2 3B Instruct](https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct)
|
| 239 |
+
- [Gradio Documentation](https://gradio.app/docs/)
|
| 240 |
+
- [Transformers Library](https://huggingface.co/docs/transformers)
|
| 241 |
+
|
| 242 |
+
---
|
| 243 |
+
|
| 244 |
+
**¡Disfruta chateando con Llama! 🦙**
|
| 245 |
+
'''
|
| 246 |
+
|
| 247 |
+
# Crear archivo de configuración adicional
|
| 248 |
+
config_py_content = '''# config.py - Configuración del Space
|
| 249 |
+
|
| 250 |
+
import os
|
| 251 |
+
|
| 252 |
+
class Config:
|
| 253 |
+
"""Configuración centralizada para el Space"""
|
| 254 |
+
|
| 255 |
+
# Modelo
|
| 256 |
+
MODEL_NAME = "meta-llama/Llama-3.2-3B-Instruct"
|
| 257 |
+
DEVICE = "cuda" if os.environ.get("SPACES_GPU") else "cpu"
|
| 258 |
+
|
| 259 |
+
# Tokens y autenticación
|
| 260 |
+
HF_TOKEN = os.environ.get("HF_TOKEN")
|
| 261 |
+
|
| 262 |
+
# Límites de generación
|
| 263 |
+
MAX_TOKENS_LIMIT = 1024
|
| 264 |
+
MIN_TOKENS_LIMIT = 50
|
| 265 |
+
DEFAULT_MAX_TOKENS = 512
|
| 266 |
+
|
| 267 |
+
# Temperatura
|
| 268 |
+
MAX_TEMPERATURE = 2.0
|
| 269 |
+
MIN_TEMPERATURE = 0.1
|
| 270 |
+
DEFAULT_TEMPERATURE = 0.7
|
| 271 |
+
|
| 272 |
+
# Cola y concurrencia
|
| 273 |
+
MAX_QUEUE_SIZE = 10
|
| 274 |
+
QUEUE_TIMEOUT = 300 # 5 minutos
|
| 275 |
+
|
| 276 |
+
# Context length
|
| 277 |
+
MAX_CONTEXT_LENGTH = 2048
|
| 278 |
+
|
| 279 |
+
# Interface
|
| 280 |
+
CHAT_HEIGHT = 500
|
| 281 |
+
DEFAULT_SYSTEM_PROMPT = "Eres un asistente de IA útil y amigable. Responde de manera clara y concisa."
|
| 282 |
+
|
| 283 |
+
# API
|
| 284 |
+
API_TIMEOUT = 300
|
| 285 |
+
ENABLE_API_LOGGING = True
|
| 286 |
+
|
| 287 |
+
@classmethod
|
| 288 |
+
def validate(cls):
|
| 289 |
+
"""Validar configuración"""
|
| 290 |
+
errors = []
|
| 291 |
+
|
| 292 |
+
if not cls.HF_TOKEN:
|
| 293 |
+
errors.append("HF_TOKEN no configurado en variables de entorno")
|
| 294 |
+
|
| 295 |
+
if cls.MAX_TOKENS_LIMIT < cls.MIN_TOKENS_LIMIT:
|
| 296 |
+
errors.append("MAX_TOKENS_LIMIT debe ser mayor que MIN_TOKENS_LIMIT")
|
| 297 |
+
|
| 298 |
+
if cls.MAX_TEMPERATURE < cls.MIN_TEMPERATURE:
|
| 299 |
+
errors.append("MAX_TEMPERATURE debe ser mayor que MIN_TEMPERATURE")
|
| 300 |
+
|
| 301 |
+
return errors
|
| 302 |
+
|
| 303 |
+
@classmethod
|
| 304 |
+
def get_model_config(cls):
|
| 305 |
+
"""Configuración específica del modelo"""
|
| 306 |
+
return {
|
| 307 |
+
"torch_dtype": "float16" if cls.DEVICE == "cuda" else "float32",
|
| 308 |
+
"device_map": "auto" if cls.DEVICE == "cuda" else None,
|
| 309 |
+
"trust_remote_code": True,
|
| 310 |
+
"token": cls.HF_TOKEN
|
| 311 |
+
}
|
| 312 |
+
|
| 313 |
+
@classmethod
|
| 314 |
+
def get_generation_config(cls, max_tokens=None, temperature=None):
|
| 315 |
+
"""Configuración de generación"""
|
| 316 |
+
return {
|
| 317 |
+
"max_new_tokens": max_tokens or cls.DEFAULT_MAX_TOKENS,
|
| 318 |
+
"temperature": temperature or cls.DEFAULT_TEMPERATURE,
|
| 319 |
+
"do_sample": True,
|
| 320 |
+
"repetition_penalty": 1.1,
|
| 321 |
+
"top_p": 0.9,
|
| 322 |
+
"top_k": 50
|
| 323 |
+
}
|
| 324 |
+
|
| 325 |
+
# Validar configuración al importar
|
| 326 |
+
config_errors = Config.validate()
|
| 327 |
+
if config_errors:
|
| 328 |
+
print("⚠️ Errores de configuración:")
|
| 329 |
+
for error in config_errors:
|
| 330 |
+
print(f" - {error}")
|
| 331 |
+
'''
|
| 332 |
+
|
| 333 |
+
# Crear archivo con utils adicionales
|
| 334 |
+
utils_py_content = '''# utils.py - Utilidades para el Space
|
| 335 |
+
|
| 336 |
+
import time
|
| 337 |
+
import functools
|
| 338 |
+
import logging
|
| 339 |
+
from typing import List, Dict, Callable, Any
|
| 340 |
+
from datetime import datetime
|
| 341 |
+
|
| 342 |
+
# Configurar logging
|
| 343 |
+
logging.basicConfig(level=logging.INFO)
|
| 344 |
+
logger = logging.getLogger(__name__)
|
| 345 |
+
|
| 346 |
+
def timing_decorator(func: Callable) -> Callable:
|
| 347 |
+
"""Decorator para medir tiempo de ejecución"""
|
| 348 |
+
@functools.wraps(func)
|
| 349 |
+
def wrapper(*args, **kwargs):
|
| 350 |
+
start_time = time.time()
|
| 351 |
+
result = func(*args, **kwargs)
|
| 352 |
+
end_time = time.time()
|
| 353 |
+
|
| 354 |
+
logger.info(f"{func.__name__} ejecutado en {end_time - start_time:.2f}s")
|
| 355 |
+
return result
|
| 356 |
+
return wrapper
|
| 357 |
+
|
| 358 |
+
def sanitize_input(text: str, max_length: int = 2000) -> str:
|
| 359 |
+
"""Sanitizar entrada del usuario"""
|
| 360 |
+
if not isinstance(text, str):
|
| 361 |
+
return ""
|
| 362 |
+
|
| 363 |
+
# Truncar si es muy largo
|
| 364 |
+
text = text[:max_length]
|
| 365 |
+
|
| 366 |
+
# Limpiar caracteres problemáticos
|
| 367 |
+
text = text.replace('\\x00', '') # Null bytes
|
| 368 |
+
text = text.strip()
|
| 369 |
+
|
| 370 |
+
return text
|
| 371 |
+
|
| 372 |
+
def format_history(history: List[List[str]]) -> List[List[str]]:
|
| 373 |
+
"""Formatear y validar historial de chat"""
|
| 374 |
+
if not history:
|
| 375 |
+
return []
|
| 376 |
+
|
| 377 |
+
formatted_history = []
|
| 378 |
+
for item in history:
|
| 379 |
+
if isinstance(item, list) and len(item) == 2:
|
| 380 |
+
user_msg = sanitize_input(str(item[0]))
|
| 381 |
+
assistant_msg = sanitize_input(str(item[1]))
|
| 382 |
+
|
| 383 |
+
if user_msg and assistant_msg:
|
| 384 |
+
formatted_history.append([user_msg, assistant_msg])
|
| 385 |
+
|
| 386 |
+
# Limitar historial a últimas 10 conversaciones
|
| 387 |
+
return formatted_history[-10:]
|
| 388 |
+
|
| 389 |
+
def estimate_tokens(text: str) -> int:
|
| 390 |
+
"""Estimación aproximada de tokens"""
|
| 391 |
+
# Aproximación: ~4 caracteres por token en español
|
| 392 |
+
return len(text) // 4
|
| 393 |
+
|
| 394 |
+
def validate_parameters(max_tokens: int, temperature: float) -> Dict[str, Any]:
|
| 395 |
+
"""Validar parámetros de generación"""
|
| 396 |
+
from config import Config
|
| 397 |
+
|
| 398 |
+
errors = []
|
| 399 |
+
|
| 400 |
+
# Validar max_tokens
|
| 401 |
+
if not isinstance(max_tokens, int):
|
| 402 |
+
max_tokens = Config.DEFAULT_MAX_TOKENS
|
| 403 |
+
errors.append("max_tokens debe ser un entero")
|
| 404 |
+
elif max_tokens < Config.MIN_TOKENS_LIMIT:
|
| 405 |
+
max_tokens = Config.MIN_TOKENS_LIMIT
|
| 406 |
+
errors.append(f"max_tokens mínimo es {Config.MIN_TOKENS_LIMIT}")
|
| 407 |
+
elif max_tokens > Config.MAX_TOKENS_LIMIT:
|
| 408 |
+
max_tokens = Config.MAX_TOKENS_LIMIT
|
| 409 |
+
errors.append(f"max_tokens máximo es {Config.MAX_TOKENS_LIMIT}")
|
| 410 |
+
|
| 411 |
+
# Validar temperature
|
| 412 |
+
if not isinstance(temperature, (int, float)):
|
| 413 |
+
temperature = Config.DEFAULT_TEMPERATURE
|
| 414 |
+
errors.append("temperature debe ser un número")
|
| 415 |
+
elif temperature < Config.MIN_TEMPERATURE:
|
| 416 |
+
temperature = Config.MIN_TEMPERATURE
|
| 417 |
+
errors.append(f"temperature mínima es {Config.MIN_TEMPERATURE}")
|
| 418 |
+
elif temperature > Config.MAX_TEMPERATURE:
|
| 419 |
+
temperature = Config.MAX_TEMPERATURE
|
| 420 |
+
errors.append(f"temperature máxima es {Config.MAX_TEMPERATURE}")
|
| 421 |
+
|
| 422 |
+
return {
|
| 423 |
+
"max_tokens": max_tokens,
|
| 424 |
+
"temperature": float(temperature),
|
| 425 |
+
"errors": errors
|
| 426 |
+
}
|
| 427 |
+
|
| 428 |
+
def create_error_response(error_msg: str) -> Dict[str, Any]:
|
| 429 |
+
"""Crear respuesta de error estandarizada"""
|
| 430 |
+
return {
|
| 431 |
+
"response": f"Error: {error_msg}",
|
| 432 |
+
"queue_status": {
|
| 433 |
+
"queue_size": 0,
|
| 434 |
+
"is_processing": False,
|
| 435 |
+
"timestamp": datetime.now().isoformat(),
|
| 436 |
+
"error": True
|
| 437 |
+
}
|
| 438 |
+
}
|
| 439 |
+
|
| 440 |
+
def truncate_context(text: str, max_length: int = 1800) -> str:
|
| 441 |
+
"""Truncar contexto manteniendo coherencia"""
|
| 442 |
+
if len(text) <= max_length:
|
| 443 |
+
return text
|
| 444 |
+
|
| 445 |
+
# Truncar por párrafos si es posible
|
| 446 |
+
paragraphs = text.split('\\n\\n')
|
| 447 |
+
truncated = ""
|
| 448 |
+
|
| 449 |
+
for paragraph in paragraphs:
|
| 450 |
+
if len(truncated + paragraph) <= max_length:
|
| 451 |
+
truncated += paragraph + '\\n\\n'
|
| 452 |
+
else:
|
| 453 |
+
break
|
| 454 |
+
|
| 455 |
+
# Si no hay párrafos, truncar por oraciones
|
| 456 |
+
if not truncated:
|
| 457 |
+
sentences = text.split('. ')
|
| 458 |
+
for sentence in sentences:
|
| 459 |
+
if len(truncated + sentence) <= max_length:
|
| 460 |
+
truncated += sentence + '. '
|
| 461 |
+
else:
|
| 462 |
+
break
|
| 463 |
+
|
| 464 |
+
# Último recurso: truncar directamente
|
| 465 |
+
if not truncated:
|
| 466 |
+
truncated = text[:max_length]
|
| 467 |
+
|
| 468 |
+
return truncated.strip()
|
| 469 |
+
|
| 470 |
+
class PerformanceMonitor:
|
| 471 |
+
"""Monitor de rendimiento simple"""
|
| 472 |
+
|
| 473 |
+
def __init__(self):
|
| 474 |
+
self.stats = {
|
| 475 |
+
"total_requests": 0,
|
| 476 |
+
"successful_requests": 0,
|
| 477 |
+
"failed_requests": 0,
|
| 478 |
+
"total_tokens_generated": 0,
|
| 479 |
+
"average_response_time": 0,
|
| 480 |
+
"start_time": datetime.now()
|
| 481 |
+
}
|
| 482 |
+
|
| 483 |
+
def record_request(self, success: bool, tokens_generated: int = 0, response_time: float = 0):
|
| 484 |
+
"""Registrar una request"""
|
| 485 |
+
self.stats["total_requests"] += 1
|
| 486 |
+
|
| 487 |
+
if success:
|
| 488 |
+
self.stats["successful_requests"] += 1
|
| 489 |
+
self.stats["total_tokens_generated"] += tokens_generated
|
| 490 |
+
else:
|
| 491 |
+
self.stats["failed_requests"] += 1
|
| 492 |
+
|
| 493 |
+
# Actualizar tiempo promedio de respuesta
|
| 494 |
+
if response_time > 0:
|
| 495 |
+
current_avg = self.stats["average_response_time"]
|
| 496 |
+
total_requests = self.stats["total_requests"]
|
| 497 |
+
|
| 498 |
+
self.stats["average_response_time"] = (
|
| 499 |
+
(current_avg * (total_requests - 1) + response_time) / total_requests
|
| 500 |
+
)
|
| 501 |
+
|
| 502 |
+
def get_stats(self) -> Dict[str, Any]:
|
| 503 |
+
"""Obtener estadísticas"""
|
| 504 |
+
uptime = datetime.now() - self.stats["start_time"]
|
| 505 |
+
|
| 506 |
+
return {
|
| 507 |
+
**self.stats,
|
| 508 |
+
"uptime_seconds": uptime.total_seconds(),
|
| 509 |
+
"success_rate": (
|
| 510 |
+
self.stats["successful_requests"] / max(self.stats["total_requests"], 1)
|
| 511 |
+
) * 100,
|
| 512 |
+
"tokens_per_minute": (
|
| 513 |
+
self.stats["total_tokens_generated"] / max(uptime.total_seconds() / 60, 1)
|
| 514 |
+
)
|
| 515 |
+
}
|
| 516 |
+
|
| 517 |
+
# Instancia global del monitor
|
| 518 |
+
performance_monitor = PerformanceMonitor()
|
| 519 |
+
'''
|
| 520 |
+
|
| 521 |
+
# Escribir todos los archivos
|
| 522 |
+
with open("README.md", "w", encoding="utf-8") as f:
|
| 523 |
+
f.write(readme_content)
|
| 524 |
+
|
| 525 |
+
with open("config.py", "w", encoding="utf-8") as f:
|
| 526 |
+
f.write(config_py_content)
|
| 527 |
+
|
| 528 |
+
with open("utils.py", "w", encoding="utf-8") as f:
|
| 529 |
+
f.write(utils_py_content)
|
| 530 |
+
|
| 531 |
+
print("Archivos adicionales creados:")
|
| 532 |
+
print("- README.md (instrucciones completas)")
|
| 533 |
+
print("- config.py (configuración centralizada)")
|
| 534 |
+
print("- utils.py (utilidades y monitoreo)")
|
| 535 |
+
print("\\n¡Todo listo para subir a Hugging Face Spaces! 🚀")
|
script_3.py
ADDED
|
@@ -0,0 +1,637 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Crear archivo con ejemplos avanzados
|
| 2 |
+
examples_py_content = '''# examples.py - Ejemplos avanzados de uso del cliente
|
| 3 |
+
|
| 4 |
+
from client import LlamaClient
|
| 5 |
+
import asyncio
|
| 6 |
+
import time
|
| 7 |
+
from typing import List
|
| 8 |
+
import json
|
| 9 |
+
|
| 10 |
+
class AdvancedLlamaClient:
|
| 11 |
+
"""Cliente extendido con funcionalidades avanzadas"""
|
| 12 |
+
|
| 13 |
+
def __init__(self, base_url: str):
|
| 14 |
+
self.client = LlamaClient(base_url)
|
| 15 |
+
self.conversation_history = []
|
| 16 |
+
|
| 17 |
+
def continuous_chat(self):
|
| 18 |
+
"""Chat interactivo continuo"""
|
| 19 |
+
print("🦙 Chat con Llama 3.2 3B - Escribe 'salir' para terminar")
|
| 20 |
+
print("=" * 50)
|
| 21 |
+
|
| 22 |
+
system_prompt = input("System prompt (opcional): ").strip()
|
| 23 |
+
if not system_prompt:
|
| 24 |
+
system_prompt = "Eres un asistente útil y amigable."
|
| 25 |
+
|
| 26 |
+
while True:
|
| 27 |
+
try:
|
| 28 |
+
message = input("\\nTú: ").strip()
|
| 29 |
+
|
| 30 |
+
if message.lower() in ['salir', 'exit', 'quit']:
|
| 31 |
+
print("¡Hasta luego! 👋")
|
| 32 |
+
break
|
| 33 |
+
|
| 34 |
+
if not message:
|
| 35 |
+
continue
|
| 36 |
+
|
| 37 |
+
print("🦙: ", end="", flush=True)
|
| 38 |
+
full_response = ""
|
| 39 |
+
|
| 40 |
+
for chunk in self.client.chat_stream(
|
| 41 |
+
message=message,
|
| 42 |
+
system_prompt=system_prompt,
|
| 43 |
+
history=self.conversation_history,
|
| 44 |
+
max_tokens=512,
|
| 45 |
+
temperature=0.7
|
| 46 |
+
):
|
| 47 |
+
if "error" in chunk:
|
| 48 |
+
print(f"Error: {chunk['error']}")
|
| 49 |
+
break
|
| 50 |
+
|
| 51 |
+
# Mostrar solo el texto nuevo
|
| 52 |
+
new_text = chunk['response'][len(full_response):]
|
| 53 |
+
print(new_text, end="", flush=True)
|
| 54 |
+
full_response = chunk['response']
|
| 55 |
+
|
| 56 |
+
if chunk.get("is_complete", False):
|
| 57 |
+
print() # Nueva línea al final
|
| 58 |
+
break
|
| 59 |
+
|
| 60 |
+
# Agregar al historial
|
| 61 |
+
if full_response and not full_response.startswith("Error:"):
|
| 62 |
+
self.conversation_history.append([message, full_response])
|
| 63 |
+
|
| 64 |
+
# Limitar historial a 10 intercambios
|
| 65 |
+
if len(self.conversation_history) > 10:
|
| 66 |
+
self.conversation_history = self.conversation_history[-10:]
|
| 67 |
+
|
| 68 |
+
except KeyboardInterrupt:
|
| 69 |
+
print("\\n\\n¡Hasta luego! 👋")
|
| 70 |
+
break
|
| 71 |
+
except Exception as e:
|
| 72 |
+
print(f"\\nError inesperado: {e}")
|
| 73 |
+
|
| 74 |
+
def batch_questions(self, questions: List[str], system_prompt: str = ""):
|
| 75 |
+
"""Procesar múltiples preguntas en lote"""
|
| 76 |
+
print(f"Procesando {len(questions)} preguntas...")
|
| 77 |
+
results = []
|
| 78 |
+
|
| 79 |
+
for i, question in enumerate(questions, 1):
|
| 80 |
+
print(f"\\nPregunta {i}/{len(questions)}: {question}")
|
| 81 |
+
print("-" * 40)
|
| 82 |
+
|
| 83 |
+
response = self.client.chat(
|
| 84 |
+
message=question,
|
| 85 |
+
system_prompt=system_prompt,
|
| 86 |
+
max_tokens=300
|
| 87 |
+
)
|
| 88 |
+
|
| 89 |
+
if "error" in response:
|
| 90 |
+
print(f"Error: {response['error']}")
|
| 91 |
+
results.append({"question": question, "error": response['error']})
|
| 92 |
+
else:
|
| 93 |
+
print(f"Respuesta: {response['response']}")
|
| 94 |
+
results.append({
|
| 95 |
+
"question": question,
|
| 96 |
+
"response": response['response'],
|
| 97 |
+
"queue_status": response['queue_status']
|
| 98 |
+
})
|
| 99 |
+
|
| 100 |
+
# Pequeña pausa entre preguntas
|
| 101 |
+
time.sleep(1)
|
| 102 |
+
|
| 103 |
+
return results
|
| 104 |
+
|
| 105 |
+
def compare_temperatures(self, message: str, temperatures: List[float] = [0.3, 0.7, 1.2]):
|
| 106 |
+
"""Comparar respuestas con diferentes temperaturas"""
|
| 107 |
+
print(f"Comparando respuestas para: '{message}'")
|
| 108 |
+
print("=" * 60)
|
| 109 |
+
|
| 110 |
+
results = {}
|
| 111 |
+
|
| 112 |
+
for temp in temperatures:
|
| 113 |
+
print(f"\\n🌡️ Temperature: {temp}")
|
| 114 |
+
print("-" * 30)
|
| 115 |
+
|
| 116 |
+
response = self.client.chat(
|
| 117 |
+
message=message,
|
| 118 |
+
temperature=temp,
|
| 119 |
+
max_tokens=200
|
| 120 |
+
)
|
| 121 |
+
|
| 122 |
+
if "error" in response:
|
| 123 |
+
print(f"Error: {response['error']}")
|
| 124 |
+
results[temp] = {"error": response['error']}
|
| 125 |
+
else:
|
| 126 |
+
print(response['response'])
|
| 127 |
+
results[temp] = {"response": response['response']}
|
| 128 |
+
|
| 129 |
+
return results
|
| 130 |
+
|
| 131 |
+
def roleplay_scenario(self, scenario: str, turns: int = 5):
|
| 132 |
+
"""Escenario de roleplay interactivo"""
|
| 133 |
+
print(f"🎭 Escenario: {scenario}")
|
| 134 |
+
print("=" * 50)
|
| 135 |
+
|
| 136 |
+
system_prompt = f"Actúa como {scenario}. Mantén el rol consistentemente y responde de manera inmersiva."
|
| 137 |
+
history = []
|
| 138 |
+
|
| 139 |
+
for turn in range(turns):
|
| 140 |
+
user_input = input(f"\\nTurno {turn + 1} - Tú: ").strip()
|
| 141 |
+
|
| 142 |
+
if not user_input or user_input.lower() == 'salir':
|
| 143 |
+
break
|
| 144 |
+
|
| 145 |
+
print("🎭: ", end="", flush=True)
|
| 146 |
+
|
| 147 |
+
for chunk in self.client.chat_stream(
|
| 148 |
+
message=user_input,
|
| 149 |
+
system_prompt=system_prompt,
|
| 150 |
+
history=history,
|
| 151 |
+
temperature=0.8,
|
| 152 |
+
max_tokens=300
|
| 153 |
+
):
|
| 154 |
+
if "error" in chunk:
|
| 155 |
+
print(f"Error: {chunk['error']}")
|
| 156 |
+
break
|
| 157 |
+
|
| 158 |
+
print(f"\\r🎭: {chunk['response']}", end="", flush=True)
|
| 159 |
+
|
| 160 |
+
if chunk.get("is_complete", False):
|
| 161 |
+
history.append([user_input, chunk['response']])
|
| 162 |
+
print()
|
| 163 |
+
break
|
| 164 |
+
|
| 165 |
+
return history
|
| 166 |
+
|
| 167 |
+
def academic_tutor_example():
|
| 168 |
+
"""Ejemplo: Tutor académico para física"""
|
| 169 |
+
client = LlamaClient("https://tu-usuario-llama-chat.hf.space")
|
| 170 |
+
|
| 171 |
+
system_prompt = """Eres un tutor de física especializado en bachillerato español.
|
| 172 |
+
Explicas conceptos de forma clara, usas ejemplos cotidianos y siempre verificas
|
| 173 |
+
que el estudiante entienda antes de avanzar. Puedes resolver problemas paso a paso."""
|
| 174 |
+
|
| 175 |
+
physics_questions = [
|
| 176 |
+
"¿Qué es la velocidad angular y cómo se relaciona con la velocidad lineal?",
|
| 177 |
+
"Explica el principio de conservación de la energía con un ejemplo",
|
| 178 |
+
"¿Cómo funciona el efecto Doppler?",
|
| 179 |
+
"Diferencia entre masa y peso físicamente"
|
| 180 |
+
]
|
| 181 |
+
|
| 182 |
+
print("🔬 Tutor de Física - Bachillerato")
|
| 183 |
+
print("=" * 40)
|
| 184 |
+
|
| 185 |
+
for question in physics_questions:
|
| 186 |
+
print(f"\\n📚 Pregunta: {question}")
|
| 187 |
+
print("-" * 50)
|
| 188 |
+
|
| 189 |
+
full_response = ""
|
| 190 |
+
for chunk in client.chat_stream(
|
| 191 |
+
message=question,
|
| 192 |
+
system_prompt=system_prompt,
|
| 193 |
+
max_tokens=400,
|
| 194 |
+
temperature=0.6
|
| 195 |
+
):
|
| 196 |
+
if "error" in chunk:
|
| 197 |
+
print(f"Error: {chunk['error']}")
|
| 198 |
+
break
|
| 199 |
+
|
| 200 |
+
print(f"\\r👨🏫: {chunk['response']}", end="", flush=True)
|
| 201 |
+
full_response = chunk['response']
|
| 202 |
+
|
| 203 |
+
if chunk.get("is_complete", False):
|
| 204 |
+
print("\\n")
|
| 205 |
+
break
|
| 206 |
+
|
| 207 |
+
input("Presiona Enter para la siguiente pregunta...")
|
| 208 |
+
|
| 209 |
+
def programming_assistant_example():
|
| 210 |
+
"""Ejemplo: Asistente de programación"""
|
| 211 |
+
client = LlamaClient("https://tu-usuario-llama-chat.hf.space")
|
| 212 |
+
|
| 213 |
+
system_prompt = """Eres un desarrollador senior especializado en Python y Flutter.
|
| 214 |
+
Ayudas a estudiantes con código, debugging y mejores prácticas. Siempre explicas
|
| 215 |
+
el código línea por línea y sugieres mejoras."""
|
| 216 |
+
|
| 217 |
+
code_questions = [
|
| 218 |
+
"¿Cómo implementar un patrón Singleton en Python?",
|
| 219 |
+
"Explica la diferencia entre async/await y threading",
|
| 220 |
+
"¿Cómo manejo errores de API en Flutter?",
|
| 221 |
+
"Mejores prácticas para estructurar un proyecto Flutter"
|
| 222 |
+
]
|
| 223 |
+
|
| 224 |
+
print("💻 Asistente de Programación")
|
| 225 |
+
print("=" * 35)
|
| 226 |
+
|
| 227 |
+
for question in code_questions:
|
| 228 |
+
print(f"\\n🤔 {question}")
|
| 229 |
+
print("-" * 60)
|
| 230 |
+
|
| 231 |
+
response = client.chat(
|
| 232 |
+
message=question,
|
| 233 |
+
system_prompt=system_prompt,
|
| 234 |
+
max_tokens=600,
|
| 235 |
+
temperature=0.4 # Menor temperatura para código
|
| 236 |
+
)
|
| 237 |
+
|
| 238 |
+
if "error" in response:
|
| 239 |
+
print(f"❌ Error: {response['error']}")
|
| 240 |
+
else:
|
| 241 |
+
print(f"💡 {response['response']}")
|
| 242 |
+
|
| 243 |
+
print("\\n" + "="*60)
|
| 244 |
+
time.sleep(2)
|
| 245 |
+
|
| 246 |
+
def creative_writing_example():
|
| 247 |
+
"""Ejemplo: Escritura creativa colaborativa"""
|
| 248 |
+
client = LlamaClient("https://tu-usuario-llama-chat.hf.space")
|
| 249 |
+
|
| 250 |
+
system_prompt = """Eres un escritor creativo experto. Ayudas a desarrollar historias,
|
| 251 |
+
personajes y narrativas. Puedes continuar historias, sugerir tramas y crear diálogos
|
| 252 |
+
naturales. Eres imaginativo pero coherente."""
|
| 253 |
+
|
| 254 |
+
print("✍️ Escritura Creativa Colaborativa")
|
| 255 |
+
print("=" * 40)
|
| 256 |
+
|
| 257 |
+
story_start = input("Escribe el inicio de una historia (2-3 líneas): ")
|
| 258 |
+
|
| 259 |
+
current_story = story_start
|
| 260 |
+
history = []
|
| 261 |
+
|
| 262 |
+
for chapter in range(3):
|
| 263 |
+
print(f"\\n📖 Capítulo {chapter + 1}")
|
| 264 |
+
print("-" * 30)
|
| 265 |
+
|
| 266 |
+
prompt = f"Continúa esta historia de manera creativa e interesante:\\n\\n{current_story}"
|
| 267 |
+
|
| 268 |
+
print("✨ Continuando la historia...")
|
| 269 |
+
continuation = ""
|
| 270 |
+
|
| 271 |
+
for chunk in client.chat_stream(
|
| 272 |
+
message=prompt,
|
| 273 |
+
system_prompt=system_prompt,
|
| 274 |
+
history=history,
|
| 275 |
+
max_tokens=400,
|
| 276 |
+
temperature=1.0 # Alta creatividad
|
| 277 |
+
):
|
| 278 |
+
if "error" in chunk:
|
| 279 |
+
print(f"Error: {chunk['error']}")
|
| 280 |
+
break
|
| 281 |
+
|
| 282 |
+
continuation = chunk['response']
|
| 283 |
+
print(f"\\r{continuation}", end="", flush=True)
|
| 284 |
+
|
| 285 |
+
if chunk.get("is_complete", False):
|
| 286 |
+
print("\\n")
|
| 287 |
+
break
|
| 288 |
+
|
| 289 |
+
current_story += "\\n\\n" + continuation
|
| 290 |
+
history.append([prompt, continuation])
|
| 291 |
+
|
| 292 |
+
# Opción de dirigir la historia
|
| 293 |
+
direction = input("\\n¿Quieres sugerir una dirección para la historia? (opcional): ")
|
| 294 |
+
if direction.strip():
|
| 295 |
+
current_story += "\\n\\n[Dirección sugerida: " + direction + "]"
|
| 296 |
+
|
| 297 |
+
print("\\n📚 Historia completa:")
|
| 298 |
+
print("=" * 50)
|
| 299 |
+
print(current_story)
|
| 300 |
+
|
| 301 |
+
def main():
|
| 302 |
+
"""Menú principal de ejemplos"""
|
| 303 |
+
examples = {
|
| 304 |
+
"1": ("Chat Continuo", lambda: AdvancedLlamaClient("https://tu-usuario-llama-chat.hf.space").continuous_chat()),
|
| 305 |
+
"2": ("Tutor de Física", academic_tutor_example),
|
| 306 |
+
"3": ("Asistente de Programación", programming_assistant_example),
|
| 307 |
+
"4": ("Escritura Creativa", creative_writing_example),
|
| 308 |
+
"5": ("Comparar Temperaturas", lambda: AdvancedLlamaClient("https://tu-usuario-llama-chat.hf.space").compare_temperatures(
|
| 309 |
+
"Explica la inteligencia artificial", [0.3, 0.7, 1.2]
|
| 310 |
+
)),
|
| 311 |
+
}
|
| 312 |
+
|
| 313 |
+
print("🦙 Ejemplos Avanzados - Llama 3.2 Chat")
|
| 314 |
+
print("=" * 45)
|
| 315 |
+
print("IMPORTANTE: Cambia la URL por tu Space real")
|
| 316 |
+
print("=" * 45)
|
| 317 |
+
|
| 318 |
+
while True:
|
| 319 |
+
print("\\nSelecciona un ejemplo:")
|
| 320 |
+
for key, (name, _) in examples.items():
|
| 321 |
+
print(f"{key}. {name}")
|
| 322 |
+
print("0. Salir")
|
| 323 |
+
|
| 324 |
+
choice = input("\\nOpción: ").strip()
|
| 325 |
+
|
| 326 |
+
if choice == "0":
|
| 327 |
+
print("¡Hasta luego! 🦙")
|
| 328 |
+
break
|
| 329 |
+
elif choice in examples:
|
| 330 |
+
try:
|
| 331 |
+
print(f"\\n🚀 Ejecutando: {examples[choice][0]}")
|
| 332 |
+
print("=" * 50)
|
| 333 |
+
examples[choice][1]()
|
| 334 |
+
except Exception as e:
|
| 335 |
+
print(f"Error ejecutando ejemplo: {e}")
|
| 336 |
+
else:
|
| 337 |
+
print("Opción no válida")
|
| 338 |
+
|
| 339 |
+
if __name__ == "__main__":
|
| 340 |
+
main()
|
| 341 |
+
'''
|
| 342 |
+
|
| 343 |
+
# Crear archivo de pruebas
|
| 344 |
+
tests_py_content = '''# tests.py - Pruebas para verificar el funcionamiento
|
| 345 |
+
|
| 346 |
+
from client import LlamaClient
|
| 347 |
+
import time
|
| 348 |
+
import json
|
| 349 |
+
|
| 350 |
+
def test_basic_functionality(base_url: str):
|
| 351 |
+
"""Pruebas básicas de funcionalidad"""
|
| 352 |
+
print("🧪 Ejecutando pruebas básicas...")
|
| 353 |
+
client = LlamaClient(base_url)
|
| 354 |
+
|
| 355 |
+
tests = []
|
| 356 |
+
|
| 357 |
+
# Test 1: Chat simple
|
| 358 |
+
print("\\n1. Test chat simple...")
|
| 359 |
+
try:
|
| 360 |
+
response = client.chat(
|
| 361 |
+
message="Hola, ¿puedes presentarte en una línea?",
|
| 362 |
+
max_tokens=50
|
| 363 |
+
)
|
| 364 |
+
|
| 365 |
+
if "error" in response:
|
| 366 |
+
tests.append(("Chat simple", False, response['error']))
|
| 367 |
+
else:
|
| 368 |
+
tests.append(("Chat simple", True, f"Respuesta: {response['response'][:50]}..."))
|
| 369 |
+
except Exception as e:
|
| 370 |
+
tests.append(("Chat simple", False, str(e)))
|
| 371 |
+
|
| 372 |
+
# Test 2: Chat con system prompt
|
| 373 |
+
print("2. Test system prompt...")
|
| 374 |
+
try:
|
| 375 |
+
response = client.chat(
|
| 376 |
+
message="¿Cuánto es 2+2?",
|
| 377 |
+
system_prompt="Eres una calculadora. Solo responde con números.",
|
| 378 |
+
max_tokens=20
|
| 379 |
+
)
|
| 380 |
+
|
| 381 |
+
success = "error" not in response and "4" in response.get('response', '')
|
| 382 |
+
tests.append(("System prompt", success, response.get('response', 'No response')))
|
| 383 |
+
except Exception as e:
|
| 384 |
+
tests.append(("System prompt", False, str(e)))
|
| 385 |
+
|
| 386 |
+
# Test 3: Chat con historial
|
| 387 |
+
print("3. Test historial...")
|
| 388 |
+
try:
|
| 389 |
+
history = [["¿Cómo te llamas?", "Soy un asistente de IA."]]
|
| 390 |
+
response = client.chat(
|
| 391 |
+
message="¿Recuerdas cómo te llamas?",
|
| 392 |
+
history=history,
|
| 393 |
+
max_tokens=50
|
| 394 |
+
)
|
| 395 |
+
|
| 396 |
+
success = "error" not in response
|
| 397 |
+
tests.append(("Historial", success, response.get('response', 'Error')[:50]))
|
| 398 |
+
except Exception as e:
|
| 399 |
+
tests.append(("Historial", False, str(e)))
|
| 400 |
+
|
| 401 |
+
# Test 4: Streaming
|
| 402 |
+
print("4. Test streaming...")
|
| 403 |
+
try:
|
| 404 |
+
chunks_received = 0
|
| 405 |
+
final_response = ""
|
| 406 |
+
|
| 407 |
+
for chunk in client.chat_stream(
|
| 408 |
+
message="Cuenta del 1 al 5",
|
| 409 |
+
max_tokens=30
|
| 410 |
+
):
|
| 411 |
+
chunks_received += 1
|
| 412 |
+
if "error" in chunk:
|
| 413 |
+
tests.append(("Streaming", False, chunk['error']))
|
| 414 |
+
break
|
| 415 |
+
|
| 416 |
+
final_response = chunk['response']
|
| 417 |
+
if chunk.get("is_complete", False):
|
| 418 |
+
break
|
| 419 |
+
|
| 420 |
+
success = chunks_received > 1 and final_response
|
| 421 |
+
tests.append(("Streaming", success, f"{chunks_received} chunks, respuesta: {final_response[:30]}"))
|
| 422 |
+
except Exception as e:
|
| 423 |
+
tests.append(("Streaming", False, str(e)))
|
| 424 |
+
|
| 425 |
+
# Test 5: Estado de cola
|
| 426 |
+
print("5. Test estado de cola...")
|
| 427 |
+
try:
|
| 428 |
+
status = client.get_queue_status()
|
| 429 |
+
success = "queue_size" in status and "is_processing" in status
|
| 430 |
+
tests.append(("Estado cola", success, str(status)))
|
| 431 |
+
except Exception as e:
|
| 432 |
+
tests.append(("Estado cola", False, str(e)))
|
| 433 |
+
|
| 434 |
+
# Mostrar resultados
|
| 435 |
+
print("\\n" + "="*60)
|
| 436 |
+
print("📊 RESULTADOS DE PRUEBAS")
|
| 437 |
+
print("="*60)
|
| 438 |
+
|
| 439 |
+
passed = 0
|
| 440 |
+
for test_name, success, details in tests:
|
| 441 |
+
status = "✅ PASS" if success else "❌ FAIL"
|
| 442 |
+
print(f"{status} {test_name}: {details}")
|
| 443 |
+
if success:
|
| 444 |
+
passed += 1
|
| 445 |
+
|
| 446 |
+
print(f"\\n🎯 Resultado: {passed}/{len(tests)} pruebas pasaron")
|
| 447 |
+
return passed == len(tests)
|
| 448 |
+
|
| 449 |
+
def stress_test(base_url: str, num_requests: int = 5):
|
| 450 |
+
"""Prueba de estrés con múltiples requests"""
|
| 451 |
+
print(f"\\n⚡ Prueba de estrés ({num_requests} requests)...")
|
| 452 |
+
client = LlamaClient(base_url)
|
| 453 |
+
|
| 454 |
+
start_time = time.time()
|
| 455 |
+
results = []
|
| 456 |
+
|
| 457 |
+
for i in range(num_requests):
|
| 458 |
+
print(f"Request {i+1}/{num_requests}...", end=" ")
|
| 459 |
+
|
| 460 |
+
try:
|
| 461 |
+
request_start = time.time()
|
| 462 |
+
response = client.chat(
|
| 463 |
+
message=f"Esta es la request número {i+1}. Responde brevemente.",
|
| 464 |
+
max_tokens=50
|
| 465 |
+
)
|
| 466 |
+
request_time = time.time() - request_start
|
| 467 |
+
|
| 468 |
+
if "error" in response:
|
| 469 |
+
print(f"❌ Error: {response['error']}")
|
| 470 |
+
results.append({"success": False, "time": request_time, "error": response['error']})
|
| 471 |
+
else:
|
| 472 |
+
print(f"✅ {request_time:.2f}s")
|
| 473 |
+
results.append({"success": True, "time": request_time, "response_length": len(response['response'])})
|
| 474 |
+
|
| 475 |
+
except Exception as e:
|
| 476 |
+
print(f"❌ Exception: {e}")
|
| 477 |
+
results.append({"success": False, "time": 0, "error": str(e)})
|
| 478 |
+
|
| 479 |
+
total_time = time.time() - start_time
|
| 480 |
+
|
| 481 |
+
# Análisis de resultados
|
| 482 |
+
successful = [r for r in results if r['success']]
|
| 483 |
+
failed = [r for r in results if not r['success']]
|
| 484 |
+
|
| 485 |
+
print(f"\\n📈 Análisis de estrés:")
|
| 486 |
+
print(f" • Total: {total_time:.2f}s")
|
| 487 |
+
print(f" • Exitosas: {len(successful)}/{num_requests}")
|
| 488 |
+
print(f" • Fallidas: {len(failed)}/{num_requests}")
|
| 489 |
+
|
| 490 |
+
if successful:
|
| 491 |
+
avg_time = sum(r['time'] for r in successful) / len(successful)
|
| 492 |
+
print(f" • Tiempo promedio: {avg_time:.2f}s")
|
| 493 |
+
|
| 494 |
+
return len(successful) == num_requests
|
| 495 |
+
|
| 496 |
+
def performance_benchmark(base_url: str):
|
| 497 |
+
"""Benchmark de rendimiento"""
|
| 498 |
+
print("\\n🏁 Benchmark de rendimiento...")
|
| 499 |
+
client = LlamaClient(base_url)
|
| 500 |
+
|
| 501 |
+
test_cases = [
|
| 502 |
+
("Respuesta corta", "Hola", 20),
|
| 503 |
+
("Respuesta media", "Explica qué es Python en un párrafo", 100),
|
| 504 |
+
("Respuesta larga", "Describe la historia de la programación", 300),
|
| 505 |
+
]
|
| 506 |
+
|
| 507 |
+
for test_name, message, max_tokens in test_cases:
|
| 508 |
+
print(f"\\n{test_name} ({max_tokens} tokens)...")
|
| 509 |
+
|
| 510 |
+
# Test sin streaming
|
| 511 |
+
start_time = time.time()
|
| 512 |
+
response = client.chat(message=message, max_tokens=max_tokens)
|
| 513 |
+
normal_time = time.time() - start_time
|
| 514 |
+
|
| 515 |
+
if "error" in response:
|
| 516 |
+
print(f" ❌ Error: {response['error']}")
|
| 517 |
+
continue
|
| 518 |
+
|
| 519 |
+
# Test con streaming
|
| 520 |
+
start_time = time.time()
|
| 521 |
+
for chunk in client.chat_stream(message=message, max_tokens=max_tokens):
|
| 522 |
+
if chunk.get("is_complete", False):
|
| 523 |
+
break
|
| 524 |
+
stream_time = time.time() - start_time
|
| 525 |
+
|
| 526 |
+
print(f" • Sin streaming: {normal_time:.2f}s")
|
| 527 |
+
print(f" • Con streaming: {stream_time:.2f}s")
|
| 528 |
+
print(f" • Tokens generados: ~{len(response['response'])} chars")
|
| 529 |
+
|
| 530 |
+
def main():
|
| 531 |
+
"""Ejecutar todas las pruebas"""
|
| 532 |
+
base_url = input("Ingresa la URL de tu Space: ").strip()
|
| 533 |
+
|
| 534 |
+
if not base_url:
|
| 535 |
+
print("❌ URL requerida")
|
| 536 |
+
return
|
| 537 |
+
|
| 538 |
+
if not base_url.startswith(('http://', 'https://')):
|
| 539 |
+
base_url = f"https://{base_url}"
|
| 540 |
+
|
| 541 |
+
print(f"🧪 Probando Space: {base_url}")
|
| 542 |
+
print("="*60)
|
| 543 |
+
|
| 544 |
+
# Ejecutar pruebas
|
| 545 |
+
basic_ok = test_basic_functionality(base_url)
|
| 546 |
+
|
| 547 |
+
if basic_ok:
|
| 548 |
+
print("\\n✅ Pruebas básicas exitosas. Continuando...")
|
| 549 |
+
stress_ok = stress_test(base_url, 3)
|
| 550 |
+
performance_benchmark(base_url)
|
| 551 |
+
|
| 552 |
+
if stress_ok:
|
| 553 |
+
print("\\n🎉 ¡Todas las pruebas pasaron! El Space funciona correctamente.")
|
| 554 |
+
else:
|
| 555 |
+
print("\\n⚠️ Algunas pruebas de estrés fallaron. Verifica el rendimiento.")
|
| 556 |
+
else:
|
| 557 |
+
print("\\n❌ Pruebas básicas fallaron. Verifica la configuración del Space.")
|
| 558 |
+
|
| 559 |
+
if __name__ == "__main__":
|
| 560 |
+
main()
|
| 561 |
+
'''
|
| 562 |
+
|
| 563 |
+
# Escribir archivos finales
|
| 564 |
+
with open("examples.py", "w", encoding="utf-8") as f:
|
| 565 |
+
f.write(examples_py_content)
|
| 566 |
+
|
| 567 |
+
with open("tests.py", "w", encoding="utf-8") as f:
|
| 568 |
+
f.write(tests_py_content)
|
| 569 |
+
|
| 570 |
+
# Crear resumen final
|
| 571 |
+
summary = """
|
| 572 |
+
🦙 RESUMEN DEL PROYECTO - Llama 3.2 3B Chat Space
|
| 573 |
+
================================================================
|
| 574 |
+
|
| 575 |
+
✅ ARCHIVOS GENERADOS:
|
| 576 |
+
|
| 577 |
+
📱 CORE APPLICATION:
|
| 578 |
+
- app.py → Aplicación principal de Gradio con cola y streaming
|
| 579 |
+
- requirements.txt → Dependencias del proyecto
|
| 580 |
+
- config.py → Configuración centralizada
|
| 581 |
+
- utils.py → Utilidades y monitoreo de rendimiento
|
| 582 |
+
|
| 583 |
+
🐍 CLIENTE PYTHON:
|
| 584 |
+
- client.py → Cliente Python para API del Space
|
| 585 |
+
- examples.py → Ejemplos avanzados de uso
|
| 586 |
+
- tests.py → Suite de pruebas automáticas
|
| 587 |
+
|
| 588 |
+
📚 DOCUMENTACIÓN:
|
| 589 |
+
- README.md → Instrucciones completas de setup y uso
|
| 590 |
+
|
| 591 |
+
🚀 CARACTERÍSTICAS IMPLEMENTADAS:
|
| 592 |
+
|
| 593 |
+
✓ Sistema de colas (una petición a la vez)
|
| 594 |
+
✓ Streaming en tiempo real
|
| 595 |
+
✓ API completa para cliente Python
|
| 596 |
+
✓ Soporte para system prompt, message e history
|
| 597 |
+
✓ Interfaz web con Gradio
|
| 598 |
+
✓ Monitoreo de estado de cola
|
| 599 |
+
✓ Manejo de errores robusto
|
| 600 |
+
✓ Configuración para hf_token
|
| 601 |
+
✓ Validación de parámetros
|
| 602 |
+
✓ Estimación de tokens
|
| 603 |
+
✓ Historial de conversación
|
| 604 |
+
✓ Múltiples temperaturas
|
| 605 |
+
✓ Límites configurables
|
| 606 |
+
|
| 607 |
+
🔧 PASOS SIGUIENTES:
|
| 608 |
+
|
| 609 |
+
1. Crear Space en Hugging Face:
|
| 610 |
+
- Ve a https://huggingface.co/new-space
|
| 611 |
+
- Selecciona Gradio SDK
|
| 612 |
+
- Elige hardware T4 small o superior
|
| 613 |
+
|
| 614 |
+
2. Configurar HF_TOKEN:
|
| 615 |
+
- Settings → Repository secrets
|
| 616 |
+
- Agregar HF_TOKEN con tu token de acceso
|
| 617 |
+
|
| 618 |
+
3. Subir archivos:
|
| 619 |
+
- app.py y requirements.txt son obligatorios
|
| 620 |
+
- Los demás archivos son opcionales pero recomendados
|
| 621 |
+
|
| 622 |
+
4. Probar funcionalidad:
|
| 623 |
+
- Usar tests.py para verificar el funcionamiento
|
| 624 |
+
- Usar examples.py para casos de uso avanzados
|
| 625 |
+
|
| 626 |
+
📖 DOCUMENTACIÓN COMPLETA:
|
| 627 |
+
Revisar README.md para instrucciones detalladas de configuración,
|
| 628 |
+
uso de la API, troubleshooting y ejemplos de integración.
|
| 629 |
+
|
| 630 |
+
¡Tu Space está listo para ser desplegado! 🚀
|
| 631 |
+
"""
|
| 632 |
+
|
| 633 |
+
print(summary)
|
| 634 |
+
|
| 635 |
+
# Guardar resumen
|
| 636 |
+
with open("RESUMEN.md", "w", encoding="utf-8") as f:
|
| 637 |
+
f.write(summary)
|
tests.py
ADDED
|
@@ -0,0 +1,217 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# tests.py - Pruebas para verificar el funcionamiento
|
| 2 |
+
|
| 3 |
+
from client import LlamaClient
|
| 4 |
+
import time
|
| 5 |
+
import json
|
| 6 |
+
|
| 7 |
+
def test_basic_functionality(base_url: str):
|
| 8 |
+
"""Pruebas básicas de funcionalidad"""
|
| 9 |
+
print("🧪 Ejecutando pruebas básicas...")
|
| 10 |
+
client = LlamaClient(base_url)
|
| 11 |
+
|
| 12 |
+
tests = []
|
| 13 |
+
|
| 14 |
+
# Test 1: Chat simple
|
| 15 |
+
print("\n1. Test chat simple...")
|
| 16 |
+
try:
|
| 17 |
+
response = client.chat(
|
| 18 |
+
message="Hola, ¿puedes presentarte en una línea?",
|
| 19 |
+
max_tokens=50
|
| 20 |
+
)
|
| 21 |
+
|
| 22 |
+
if "error" in response:
|
| 23 |
+
tests.append(("Chat simple", False, response['error']))
|
| 24 |
+
else:
|
| 25 |
+
tests.append(("Chat simple", True, f"Respuesta: {response['response'][:50]}..."))
|
| 26 |
+
except Exception as e:
|
| 27 |
+
tests.append(("Chat simple", False, str(e)))
|
| 28 |
+
|
| 29 |
+
# Test 2: Chat con system prompt
|
| 30 |
+
print("2. Test system prompt...")
|
| 31 |
+
try:
|
| 32 |
+
response = client.chat(
|
| 33 |
+
message="¿Cuánto es 2+2?",
|
| 34 |
+
system_prompt="Eres una calculadora. Solo responde con números.",
|
| 35 |
+
max_tokens=20
|
| 36 |
+
)
|
| 37 |
+
|
| 38 |
+
success = "error" not in response and "4" in response.get('response', '')
|
| 39 |
+
tests.append(("System prompt", success, response.get('response', 'No response')))
|
| 40 |
+
except Exception as e:
|
| 41 |
+
tests.append(("System prompt", False, str(e)))
|
| 42 |
+
|
| 43 |
+
# Test 3: Chat con historial
|
| 44 |
+
print("3. Test historial...")
|
| 45 |
+
try:
|
| 46 |
+
history = [["¿Cómo te llamas?", "Soy un asistente de IA."]]
|
| 47 |
+
response = client.chat(
|
| 48 |
+
message="¿Recuerdas cómo te llamas?",
|
| 49 |
+
history=history,
|
| 50 |
+
max_tokens=50
|
| 51 |
+
)
|
| 52 |
+
|
| 53 |
+
success = "error" not in response
|
| 54 |
+
tests.append(("Historial", success, response.get('response', 'Error')[:50]))
|
| 55 |
+
except Exception as e:
|
| 56 |
+
tests.append(("Historial", False, str(e)))
|
| 57 |
+
|
| 58 |
+
# Test 4: Streaming
|
| 59 |
+
print("4. Test streaming...")
|
| 60 |
+
try:
|
| 61 |
+
chunks_received = 0
|
| 62 |
+
final_response = ""
|
| 63 |
+
|
| 64 |
+
for chunk in client.chat_stream(
|
| 65 |
+
message="Cuenta del 1 al 5",
|
| 66 |
+
max_tokens=30
|
| 67 |
+
):
|
| 68 |
+
chunks_received += 1
|
| 69 |
+
if "error" in chunk:
|
| 70 |
+
tests.append(("Streaming", False, chunk['error']))
|
| 71 |
+
break
|
| 72 |
+
|
| 73 |
+
final_response = chunk['response']
|
| 74 |
+
if chunk.get("is_complete", False):
|
| 75 |
+
break
|
| 76 |
+
|
| 77 |
+
success = chunks_received > 1 and final_response
|
| 78 |
+
tests.append(("Streaming", success, f"{chunks_received} chunks, respuesta: {final_response[:30]}"))
|
| 79 |
+
except Exception as e:
|
| 80 |
+
tests.append(("Streaming", False, str(e)))
|
| 81 |
+
|
| 82 |
+
# Test 5: Estado de cola
|
| 83 |
+
print("5. Test estado de cola...")
|
| 84 |
+
try:
|
| 85 |
+
status = client.get_queue_status()
|
| 86 |
+
success = "queue_size" in status and "is_processing" in status
|
| 87 |
+
tests.append(("Estado cola", success, str(status)))
|
| 88 |
+
except Exception as e:
|
| 89 |
+
tests.append(("Estado cola", False, str(e)))
|
| 90 |
+
|
| 91 |
+
# Mostrar resultados
|
| 92 |
+
print("\n" + "="*60)
|
| 93 |
+
print("📊 RESULTADOS DE PRUEBAS")
|
| 94 |
+
print("="*60)
|
| 95 |
+
|
| 96 |
+
passed = 0
|
| 97 |
+
for test_name, success, details in tests:
|
| 98 |
+
status = "✅ PASS" if success else "❌ FAIL"
|
| 99 |
+
print(f"{status} {test_name}: {details}")
|
| 100 |
+
if success:
|
| 101 |
+
passed += 1
|
| 102 |
+
|
| 103 |
+
print(f"\n🎯 Resultado: {passed}/{len(tests)} pruebas pasaron")
|
| 104 |
+
return passed == len(tests)
|
| 105 |
+
|
| 106 |
+
def stress_test(base_url: str, num_requests: int = 5):
|
| 107 |
+
"""Prueba de estrés con múltiples requests"""
|
| 108 |
+
print(f"\n⚡ Prueba de estrés ({num_requests} requests)...")
|
| 109 |
+
client = LlamaClient(base_url)
|
| 110 |
+
|
| 111 |
+
start_time = time.time()
|
| 112 |
+
results = []
|
| 113 |
+
|
| 114 |
+
for i in range(num_requests):
|
| 115 |
+
print(f"Request {i+1}/{num_requests}...", end=" ")
|
| 116 |
+
|
| 117 |
+
try:
|
| 118 |
+
request_start = time.time()
|
| 119 |
+
response = client.chat(
|
| 120 |
+
message=f"Esta es la request número {i+1}. Responde brevemente.",
|
| 121 |
+
max_tokens=50
|
| 122 |
+
)
|
| 123 |
+
request_time = time.time() - request_start
|
| 124 |
+
|
| 125 |
+
if "error" in response:
|
| 126 |
+
print(f"❌ Error: {response['error']}")
|
| 127 |
+
results.append({"success": False, "time": request_time, "error": response['error']})
|
| 128 |
+
else:
|
| 129 |
+
print(f"✅ {request_time:.2f}s")
|
| 130 |
+
results.append({"success": True, "time": request_time, "response_length": len(response['response'])})
|
| 131 |
+
|
| 132 |
+
except Exception as e:
|
| 133 |
+
print(f"❌ Exception: {e}")
|
| 134 |
+
results.append({"success": False, "time": 0, "error": str(e)})
|
| 135 |
+
|
| 136 |
+
total_time = time.time() - start_time
|
| 137 |
+
|
| 138 |
+
# Análisis de resultados
|
| 139 |
+
successful = [r for r in results if r['success']]
|
| 140 |
+
failed = [r for r in results if not r['success']]
|
| 141 |
+
|
| 142 |
+
print(f"\n📈 Análisis de estrés:")
|
| 143 |
+
print(f" • Total: {total_time:.2f}s")
|
| 144 |
+
print(f" • Exitosas: {len(successful)}/{num_requests}")
|
| 145 |
+
print(f" • Fallidas: {len(failed)}/{num_requests}")
|
| 146 |
+
|
| 147 |
+
if successful:
|
| 148 |
+
avg_time = sum(r['time'] for r in successful) / len(successful)
|
| 149 |
+
print(f" • Tiempo promedio: {avg_time:.2f}s")
|
| 150 |
+
|
| 151 |
+
return len(successful) == num_requests
|
| 152 |
+
|
| 153 |
+
def performance_benchmark(base_url: str):
|
| 154 |
+
"""Benchmark de rendimiento"""
|
| 155 |
+
print("\n🏁 Benchmark de rendimiento...")
|
| 156 |
+
client = LlamaClient(base_url)
|
| 157 |
+
|
| 158 |
+
test_cases = [
|
| 159 |
+
("Respuesta corta", "Hola", 20),
|
| 160 |
+
("Respuesta media", "Explica qué es Python en un párrafo", 100),
|
| 161 |
+
("Respuesta larga", "Describe la historia de la programación", 300),
|
| 162 |
+
]
|
| 163 |
+
|
| 164 |
+
for test_name, message, max_tokens in test_cases:
|
| 165 |
+
print(f"\n{test_name} ({max_tokens} tokens)...")
|
| 166 |
+
|
| 167 |
+
# Test sin streaming
|
| 168 |
+
start_time = time.time()
|
| 169 |
+
response = client.chat(message=message, max_tokens=max_tokens)
|
| 170 |
+
normal_time = time.time() - start_time
|
| 171 |
+
|
| 172 |
+
if "error" in response:
|
| 173 |
+
print(f" ❌ Error: {response['error']}")
|
| 174 |
+
continue
|
| 175 |
+
|
| 176 |
+
# Test con streaming
|
| 177 |
+
start_time = time.time()
|
| 178 |
+
for chunk in client.chat_stream(message=message, max_tokens=max_tokens):
|
| 179 |
+
if chunk.get("is_complete", False):
|
| 180 |
+
break
|
| 181 |
+
stream_time = time.time() - start_time
|
| 182 |
+
|
| 183 |
+
print(f" • Sin streaming: {normal_time:.2f}s")
|
| 184 |
+
print(f" • Con streaming: {stream_time:.2f}s")
|
| 185 |
+
print(f" • Tokens generados: ~{len(response['response'])} chars")
|
| 186 |
+
|
| 187 |
+
def main():
|
| 188 |
+
"""Ejecutar todas las pruebas"""
|
| 189 |
+
base_url = input("Ingresa la URL de tu Space: ").strip()
|
| 190 |
+
|
| 191 |
+
if not base_url:
|
| 192 |
+
print("❌ URL requerida")
|
| 193 |
+
return
|
| 194 |
+
|
| 195 |
+
if not base_url.startswith(('http://', 'https://')):
|
| 196 |
+
base_url = f"https://{base_url}"
|
| 197 |
+
|
| 198 |
+
print(f"🧪 Probando Space: {base_url}")
|
| 199 |
+
print("="*60)
|
| 200 |
+
|
| 201 |
+
# Ejecutar pruebas
|
| 202 |
+
basic_ok = test_basic_functionality(base_url)
|
| 203 |
+
|
| 204 |
+
if basic_ok:
|
| 205 |
+
print("\n✅ Pruebas básicas exitosas. Continuando...")
|
| 206 |
+
stress_ok = stress_test(base_url, 3)
|
| 207 |
+
performance_benchmark(base_url)
|
| 208 |
+
|
| 209 |
+
if stress_ok:
|
| 210 |
+
print("\n🎉 ¡Todas las pruebas pasaron! El Space funciona correctamente.")
|
| 211 |
+
else:
|
| 212 |
+
print("\n⚠️ Algunas pruebas de estrés fallaron. Verifica el rendimiento.")
|
| 213 |
+
else:
|
| 214 |
+
print("\n❌ Pruebas básicas fallaron. Verifica la configuración del Space.")
|
| 215 |
+
|
| 216 |
+
if __name__ == "__main__":
|
| 217 |
+
main()
|
utils.py
ADDED
|
@@ -0,0 +1,185 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# utils.py - Utilidades para el Space
|
| 2 |
+
|
| 3 |
+
import time
|
| 4 |
+
import functools
|
| 5 |
+
import logging
|
| 6 |
+
from typing import List, Dict, Callable, Any
|
| 7 |
+
from datetime import datetime
|
| 8 |
+
|
| 9 |
+
# Configurar logging
|
| 10 |
+
logging.basicConfig(level=logging.INFO)
|
| 11 |
+
logger = logging.getLogger(__name__)
|
| 12 |
+
|
| 13 |
+
def timing_decorator(func: Callable) -> Callable:
|
| 14 |
+
"""Decorator para medir tiempo de ejecución"""
|
| 15 |
+
@functools.wraps(func)
|
| 16 |
+
def wrapper(*args, **kwargs):
|
| 17 |
+
start_time = time.time()
|
| 18 |
+
result = func(*args, **kwargs)
|
| 19 |
+
end_time = time.time()
|
| 20 |
+
|
| 21 |
+
logger.info(f"{func.__name__} ejecutado en {end_time - start_time:.2f}s")
|
| 22 |
+
return result
|
| 23 |
+
return wrapper
|
| 24 |
+
|
| 25 |
+
def sanitize_input(text: str, max_length: int = 2000) -> str:
|
| 26 |
+
"""Sanitizar entrada del usuario"""
|
| 27 |
+
if not isinstance(text, str):
|
| 28 |
+
return ""
|
| 29 |
+
|
| 30 |
+
# Truncar si es muy largo
|
| 31 |
+
text = text[:max_length]
|
| 32 |
+
|
| 33 |
+
# Limpiar caracteres problemáticos
|
| 34 |
+
text = text.replace('\x00', '') # Null bytes
|
| 35 |
+
text = text.strip()
|
| 36 |
+
|
| 37 |
+
return text
|
| 38 |
+
|
| 39 |
+
def format_history(history: List[List[str]]) -> List[List[str]]:
|
| 40 |
+
"""Formatear y validar historial de chat"""
|
| 41 |
+
if not history:
|
| 42 |
+
return []
|
| 43 |
+
|
| 44 |
+
formatted_history = []
|
| 45 |
+
for item in history:
|
| 46 |
+
if isinstance(item, list) and len(item) == 2:
|
| 47 |
+
user_msg = sanitize_input(str(item[0]))
|
| 48 |
+
assistant_msg = sanitize_input(str(item[1]))
|
| 49 |
+
|
| 50 |
+
if user_msg and assistant_msg:
|
| 51 |
+
formatted_history.append([user_msg, assistant_msg])
|
| 52 |
+
|
| 53 |
+
# Limitar historial a últimas 10 conversaciones
|
| 54 |
+
return formatted_history[-10:]
|
| 55 |
+
|
| 56 |
+
def estimate_tokens(text: str) -> int:
|
| 57 |
+
"""Estimación aproximada de tokens"""
|
| 58 |
+
# Aproximación: ~4 caracteres por token en español
|
| 59 |
+
return len(text) // 4
|
| 60 |
+
|
| 61 |
+
def validate_parameters(max_tokens: int, temperature: float) -> Dict[str, Any]:
|
| 62 |
+
"""Validar parámetros de generación"""
|
| 63 |
+
from config import Config
|
| 64 |
+
|
| 65 |
+
errors = []
|
| 66 |
+
|
| 67 |
+
# Validar max_tokens
|
| 68 |
+
if not isinstance(max_tokens, int):
|
| 69 |
+
max_tokens = Config.DEFAULT_MAX_TOKENS
|
| 70 |
+
errors.append("max_tokens debe ser un entero")
|
| 71 |
+
elif max_tokens < Config.MIN_TOKENS_LIMIT:
|
| 72 |
+
max_tokens = Config.MIN_TOKENS_LIMIT
|
| 73 |
+
errors.append(f"max_tokens mínimo es {Config.MIN_TOKENS_LIMIT}")
|
| 74 |
+
elif max_tokens > Config.MAX_TOKENS_LIMIT:
|
| 75 |
+
max_tokens = Config.MAX_TOKENS_LIMIT
|
| 76 |
+
errors.append(f"max_tokens máximo es {Config.MAX_TOKENS_LIMIT}")
|
| 77 |
+
|
| 78 |
+
# Validar temperature
|
| 79 |
+
if not isinstance(temperature, (int, float)):
|
| 80 |
+
temperature = Config.DEFAULT_TEMPERATURE
|
| 81 |
+
errors.append("temperature debe ser un número")
|
| 82 |
+
elif temperature < Config.MIN_TEMPERATURE:
|
| 83 |
+
temperature = Config.MIN_TEMPERATURE
|
| 84 |
+
errors.append(f"temperature mínima es {Config.MIN_TEMPERATURE}")
|
| 85 |
+
elif temperature > Config.MAX_TEMPERATURE:
|
| 86 |
+
temperature = Config.MAX_TEMPERATURE
|
| 87 |
+
errors.append(f"temperature máxima es {Config.MAX_TEMPERATURE}")
|
| 88 |
+
|
| 89 |
+
return {
|
| 90 |
+
"max_tokens": max_tokens,
|
| 91 |
+
"temperature": float(temperature),
|
| 92 |
+
"errors": errors
|
| 93 |
+
}
|
| 94 |
+
|
| 95 |
+
def create_error_response(error_msg: str) -> Dict[str, Any]:
|
| 96 |
+
"""Crear respuesta de error estandarizada"""
|
| 97 |
+
return {
|
| 98 |
+
"response": f"Error: {error_msg}",
|
| 99 |
+
"queue_status": {
|
| 100 |
+
"queue_size": 0,
|
| 101 |
+
"is_processing": False,
|
| 102 |
+
"timestamp": datetime.now().isoformat(),
|
| 103 |
+
"error": True
|
| 104 |
+
}
|
| 105 |
+
}
|
| 106 |
+
|
| 107 |
+
def truncate_context(text: str, max_length: int = 1800) -> str:
|
| 108 |
+
"""Truncar contexto manteniendo coherencia"""
|
| 109 |
+
if len(text) <= max_length:
|
| 110 |
+
return text
|
| 111 |
+
|
| 112 |
+
# Truncar por párrafos si es posible
|
| 113 |
+
paragraphs = text.split('\n\n')
|
| 114 |
+
truncated = ""
|
| 115 |
+
|
| 116 |
+
for paragraph in paragraphs:
|
| 117 |
+
if len(truncated + paragraph) <= max_length:
|
| 118 |
+
truncated += paragraph + '\n\n'
|
| 119 |
+
else:
|
| 120 |
+
break
|
| 121 |
+
|
| 122 |
+
# Si no hay párrafos, truncar por oraciones
|
| 123 |
+
if not truncated:
|
| 124 |
+
sentences = text.split('. ')
|
| 125 |
+
for sentence in sentences:
|
| 126 |
+
if len(truncated + sentence) <= max_length:
|
| 127 |
+
truncated += sentence + '. '
|
| 128 |
+
else:
|
| 129 |
+
break
|
| 130 |
+
|
| 131 |
+
# Último recurso: truncar directamente
|
| 132 |
+
if not truncated:
|
| 133 |
+
truncated = text[:max_length]
|
| 134 |
+
|
| 135 |
+
return truncated.strip()
|
| 136 |
+
|
| 137 |
+
class PerformanceMonitor:
|
| 138 |
+
"""Monitor de rendimiento simple"""
|
| 139 |
+
|
| 140 |
+
def __init__(self):
|
| 141 |
+
self.stats = {
|
| 142 |
+
"total_requests": 0,
|
| 143 |
+
"successful_requests": 0,
|
| 144 |
+
"failed_requests": 0,
|
| 145 |
+
"total_tokens_generated": 0,
|
| 146 |
+
"average_response_time": 0,
|
| 147 |
+
"start_time": datetime.now()
|
| 148 |
+
}
|
| 149 |
+
|
| 150 |
+
def record_request(self, success: bool, tokens_generated: int = 0, response_time: float = 0):
|
| 151 |
+
"""Registrar una request"""
|
| 152 |
+
self.stats["total_requests"] += 1
|
| 153 |
+
|
| 154 |
+
if success:
|
| 155 |
+
self.stats["successful_requests"] += 1
|
| 156 |
+
self.stats["total_tokens_generated"] += tokens_generated
|
| 157 |
+
else:
|
| 158 |
+
self.stats["failed_requests"] += 1
|
| 159 |
+
|
| 160 |
+
# Actualizar tiempo promedio de respuesta
|
| 161 |
+
if response_time > 0:
|
| 162 |
+
current_avg = self.stats["average_response_time"]
|
| 163 |
+
total_requests = self.stats["total_requests"]
|
| 164 |
+
|
| 165 |
+
self.stats["average_response_time"] = (
|
| 166 |
+
(current_avg * (total_requests - 1) + response_time) / total_requests
|
| 167 |
+
)
|
| 168 |
+
|
| 169 |
+
def get_stats(self) -> Dict[str, Any]:
|
| 170 |
+
"""Obtener estadísticas"""
|
| 171 |
+
uptime = datetime.now() - self.stats["start_time"]
|
| 172 |
+
|
| 173 |
+
return {
|
| 174 |
+
**self.stats,
|
| 175 |
+
"uptime_seconds": uptime.total_seconds(),
|
| 176 |
+
"success_rate": (
|
| 177 |
+
self.stats["successful_requests"] / max(self.stats["total_requests"], 1)
|
| 178 |
+
) * 100,
|
| 179 |
+
"tokens_per_minute": (
|
| 180 |
+
self.stats["total_tokens_generated"] / max(uptime.total_seconds() / 60, 1)
|
| 181 |
+
)
|
| 182 |
+
}
|
| 183 |
+
|
| 184 |
+
# Instancia global del monitor
|
| 185 |
+
performance_monitor = PerformanceMonitor()
|