| import json | |
| from fastapi import APIRouter, HTTPException | |
| from fastapi.responses import StreamingResponse | |
| from core.llm import LLMClient | |
| from core.memory import save_user_state, load_user_state | |
| router = APIRouter() | |
| llm_client = LLMClient(provider="ollama") # Default to Ollama | |
| async def chat(user_id: str, message: str): | |
| if not message: | |
| raise HTTPException(status_code=400, detail="Message is required") | |
| # Load user state from Redis | |
| user_state = load_user_state(user_id) | |
| conversation_history = json.loads(user_state.get("conversation", "[]")) if user_state else [] | |
| # Add user message to history | |
| conversation_history.append({"role": "user", "content": message}) | |
| # Generate AI response | |
| try: | |
| full_response = "" | |
| response_stream = llm_client.generate(prompt=message, stream=True) | |
| # Collect streamed response | |
| for chunk in response_stream: | |
| full_response += chunk | |
| # Save updated conversation | |
| conversation_history.append({"role": "assistant", "content": full_response}) | |
| save_user_state(user_id, {"conversation": json.dumps(conversation_history)}) | |
| return {"response": full_response} | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=f"LLM generation failed: {e}") | |