|
|
|
|
|
import requests |
|
|
import json |
|
|
import time |
|
|
from typing import List, Dict, Generator, Optional |
|
|
|
|
|
class LlamaClient: |
|
|
def __init__(self, base_url: str): |
|
|
""" |
|
|
Cliente para interactuar con el Hugging Face Space de Llama Chat |
|
|
|
|
|
Args: |
|
|
base_url: URL base del Space (ej: "https://tu-usuario-llama-chat.hf.space") |
|
|
""" |
|
|
self.base_url = base_url.rstrip('/') |
|
|
self.api_endpoint = f"{self.base_url}/call/api_chat" |
|
|
self.stream_endpoint = f"{self.base_url}/call/api_chat_stream" |
|
|
|
|
|
def chat(self, message: str, system_prompt: str = "", history: List[List[str]] = None, |
|
|
max_tokens: int = 512, temperature: float = 0.7) -> Dict: |
|
|
""" |
|
|
Enviar un mensaje y recibir respuesta completa |
|
|
|
|
|
Args: |
|
|
message: Mensaje del usuario |
|
|
system_prompt: Prompt del sistema (opcional) |
|
|
history: Historial de conversación [[user, assistant], ...] |
|
|
max_tokens: Máximo número de tokens a generar |
|
|
temperature: Temperatura para la generación |
|
|
|
|
|
Returns: |
|
|
Dict con 'response' y 'queue_status' |
|
|
""" |
|
|
if history is None: |
|
|
history = [] |
|
|
|
|
|
payload = { |
|
|
"data": [system_prompt, message, history, max_tokens, temperature] |
|
|
} |
|
|
|
|
|
try: |
|
|
response = requests.post(self.api_endpoint, json=payload, timeout=300) |
|
|
response.raise_for_status() |
|
|
|
|
|
result = response.json() |
|
|
return result.get("data", [{}])[0] |
|
|
|
|
|
except requests.exceptions.RequestException as e: |
|
|
return {"error": f"Error de conexión: {str(e)}"} |
|
|
except json.JSONDecodeError as e: |
|
|
return {"error": f"Error decodificando JSON: {str(e)}"} |
|
|
|
|
|
def chat_stream(self, message: str, system_prompt: str = "", history: List[List[str]] = None, |
|
|
max_tokens: int = 512, temperature: float = 0.7) -> Generator[Dict, None, None]: |
|
|
""" |
|
|
Enviar un mensaje y recibir respuesta en streaming |
|
|
|
|
|
Args: |
|
|
message: Mensaje del usuario |
|
|
system_prompt: Prompt del sistema (opcional) |
|
|
history: Historial de conversación |
|
|
max_tokens: Máximo número de tokens a generar |
|
|
temperature: Temperatura para la generación |
|
|
|
|
|
Yields: |
|
|
Dict con 'response', 'is_complete' y 'queue_status' |
|
|
""" |
|
|
if history is None: |
|
|
history = [] |
|
|
|
|
|
payload = { |
|
|
"data": [system_prompt, message, history, max_tokens, temperature] |
|
|
} |
|
|
|
|
|
try: |
|
|
response = requests.post(self.stream_endpoint, json=payload, stream=True, timeout=300) |
|
|
response.raise_for_status() |
|
|
|
|
|
for line in response.iter_lines(): |
|
|
if line: |
|
|
try: |
|
|
data = json.loads(line.decode('utf-8')) |
|
|
if "data" in data: |
|
|
yield data["data"][0] |
|
|
except json.JSONDecodeError: |
|
|
continue |
|
|
|
|
|
except requests.exceptions.RequestException as e: |
|
|
yield {"error": f"Error de conexión: {str(e)}", "is_complete": True} |
|
|
|
|
|
def get_queue_status(self) -> Dict: |
|
|
""" |
|
|
Obtener estado actual de la cola |
|
|
|
|
|
Returns: |
|
|
Dict con información del estado de la cola |
|
|
""" |
|
|
try: |
|
|
|
|
|
result = self.chat("", max_tokens=1) |
|
|
return result.get("queue_status", {}) |
|
|
except Exception as e: |
|
|
return {"error": str(e)} |
|
|
|
|
|
|
|
|
def example_usage(): |
|
|
"""Ejemplo de cómo usar el cliente""" |
|
|
|
|
|
|
|
|
client = LlamaClient("https://tu-usuario-llama-chat.hf.space") |
|
|
|
|
|
print("=== Ejemplo 1: Chat simple ===") |
|
|
response = client.chat( |
|
|
message="¿Qué es la inteligencia artificial?", |
|
|
system_prompt="Eres un profesor de informática experto." |
|
|
) |
|
|
|
|
|
if "error" in response: |
|
|
print(f"Error: {response['error']}") |
|
|
else: |
|
|
print(f"Respuesta: {response['response']}") |
|
|
print(f"Estado cola: {response['queue_status']}") |
|
|
|
|
|
print("\n=== Ejemplo 2: Chat con historial ===") |
|
|
history = [ |
|
|
["Hola", "¡Hola! ¿En qué puedo ayudarte?"], |
|
|
["¿Cuál es tu nombre?", "Soy un asistente de IA basado en Llama 3.2."] |
|
|
] |
|
|
|
|
|
response = client.chat( |
|
|
message="¿Puedes explicarme conceptos de física?", |
|
|
system_prompt="Eres un tutor de física para estudiantes de bachillerato.", |
|
|
history=history |
|
|
) |
|
|
|
|
|
if "error" in response: |
|
|
print(f"Error: {response['error']}") |
|
|
else: |
|
|
print(f"Respuesta: {response['response']}") |
|
|
|
|
|
print("\n=== Ejemplo 3: Chat con streaming ===") |
|
|
print("Pregunta: Explica la teoría de la relatividad") |
|
|
print("Respuesta (streaming):") |
|
|
|
|
|
for chunk in client.chat_stream( |
|
|
message="Explica la teoría de la relatividad de forma simple", |
|
|
system_prompt="Eres un divulgador científico.", |
|
|
max_tokens=300, |
|
|
temperature=0.8 |
|
|
): |
|
|
if "error" in chunk: |
|
|
print(f"Error: {chunk['error']}") |
|
|
break |
|
|
|
|
|
print(f"\r{chunk['response']}", end="", flush=True) |
|
|
|
|
|
if chunk.get("is_complete", False): |
|
|
print("\n[Respuesta completa]") |
|
|
print(f"Estado cola: {chunk['queue_status']}") |
|
|
break |
|
|
|
|
|
print("\n=== Ejemplo 4: Verificar estado de cola ===") |
|
|
status = client.get_queue_status() |
|
|
print(f"Estado actual: {status}") |
|
|
|
|
|
if __name__ == "__main__": |
|
|
example_usage() |
|
|
|