Spaces:
Sleeping
Sleeping
| import os | |
| import logging | |
| from typing import Optional | |
| from datetime import datetime | |
| from contextlib import asynccontextmanager | |
| from fastapi import FastAPI, HTTPException, Depends, Security, status | |
| from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from pydantic import BaseModel, Field | |
| import uvicorn | |
| # Configure logging | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| # Global variables for model | |
| model = None | |
| tokenizer = None | |
| model_loaded = False | |
| torch_available = False | |
| async def lifespan(app: FastAPI): | |
| # Startup | |
| global model, tokenizer, model_loaded, torch_available | |
| logger.info("Real LLM AI Assistant starting up...") | |
| try: | |
| # Try to import torch and transformers | |
| import torch | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| torch_available = True | |
| logger.info("PyTorch and Transformers available!") | |
| # Use a better conversational model | |
| model_name = os.getenv("MODEL_NAME", "microsoft/DialoGPT-small") # Use small for better compatibility | |
| logger.info(f"Loading real LLM model: {model_name}") | |
| # Load tokenizer | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| if tokenizer.pad_token is None: | |
| tokenizer.pad_token = tokenizer.eos_token | |
| # Load model with optimizations | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_name, | |
| torch_dtype=torch.float32, | |
| low_cpu_mem_usage=True, | |
| pad_token_id=tokenizer.eos_token_id | |
| ) | |
| model_loaded = True | |
| logger.info("Real LLM model loaded successfully!") | |
| except ImportError as e: | |
| logger.warning(f"PyTorch/Transformers not available: {e}") | |
| logger.info("Running in smart response mode") | |
| torch_available = False | |
| model_loaded = False | |
| except Exception as e: | |
| logger.warning(f"Could not load LLM model: {e}") | |
| logger.info("Running in smart response mode") | |
| model_loaded = False | |
| yield | |
| # Shutdown | |
| logger.info("AI Assistant shutting down...") | |
| # Initialize FastAPI app with lifespan | |
| app = FastAPI( | |
| title="Real LLM AI Agent API", | |
| description="AI Agent powered by actual LLM models with fallback", | |
| version="4.1.0", | |
| lifespan=lifespan | |
| ) | |
| # CORS middleware | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| # Security | |
| security = HTTPBearer() | |
| # Configuration | |
| API_KEYS = { | |
| os.getenv("API_KEY_1", "27Eud5J73j6SqPQAT2ioV-CtiCg-p0WNqq6I4U0Ig6E"): "user1", | |
| os.getenv("API_KEY_2", "QbzG2CqHU1Nn6F1EogZ1d3dp8ilRTMJQBwTJDQBzS-U"): "user2", | |
| } | |
| # Request/Response models | |
| class ChatRequest(BaseModel): | |
| message: str = Field(..., min_length=1, max_length=2000) | |
| max_length: Optional[int] = Field(200, ge=50, le=500) | |
| temperature: Optional[float] = Field(0.8, ge=0.1, le=1.5) | |
| top_p: Optional[float] = Field(0.9, ge=0.1, le=1.0) | |
| do_sample: Optional[bool] = Field(True) | |
| class ChatResponse(BaseModel): | |
| response: str | |
| model_used: str | |
| timestamp: str | |
| processing_time: float | |
| tokens_used: int | |
| model_loaded: bool | |
| class HealthResponse(BaseModel): | |
| status: str | |
| model_loaded: bool | |
| timestamp: str | |
| def verify_api_key(credentials: HTTPAuthorizationCredentials = Security(security)) -> str: | |
| """Verify API key authentication""" | |
| api_key = credentials.credentials | |
| if api_key not in API_KEYS: | |
| raise HTTPException( | |
| status_code=status.HTTP_401_UNAUTHORIZED, | |
| detail="Invalid API key" | |
| ) | |
| return API_KEYS[api_key] | |
| def get_smart_fallback_response(message: str) -> str: | |
| """Smart fallback responses when LLM is not available""" | |
| message_lower = message.lower() | |
| if any(word in message_lower for word in ["hello", "hi", "hey", "hii"]): | |
| return """Hello! I'm your AI assistant. I'm currently running in smart mode while the full LLM model loads. | |
| I can still help you with questions about: | |
| • Machine Learning and AI concepts | |
| • Programming and Python | |
| • Data Science topics | |
| • Technology explanations | |
| • General conversations | |
| What would you like to know about? I'll do my best to provide helpful information!""" | |
| elif any(word in message_lower for word in ["machine learning", "ml"]): | |
| return """Machine learning is a fascinating field! It's a subset of artificial intelligence where computers learn to make predictions or decisions by finding patterns in data, rather than being explicitly programmed for every scenario. | |
| Key concepts: | |
| • **Training**: The model learns from example data | |
| • **Patterns**: It identifies relationships and trends | |
| • **Prediction**: It applies learned patterns to new data | |
| • **Improvement**: Performance gets better with more data | |
| Common applications include recommendation systems (like Netflix suggestions), image recognition, natural language processing, and autonomous vehicles. | |
| Would you like me to explain any specific aspect of machine learning in more detail?""" | |
| elif any(word in message_lower for word in ["ai", "artificial intelligence"]): | |
| return """Artificial Intelligence is the simulation of human intelligence in machines! It's about creating systems that can think, learn, and solve problems. | |
| Current AI can: | |
| • Understand and generate human language | |
| • Recognize images and objects | |
| • Play complex games at superhuman levels | |
| • Drive cars autonomously | |
| • Discover new medicines | |
| Types of AI: | |
| • **Narrow AI**: Specialized for specific tasks (what we have today) | |
| • **General AI**: Human-level intelligence across all domains (future goal) | |
| • **Super AI**: Beyond human intelligence (theoretical) | |
| AI is transforming every industry and changing how we work, learn, and live. What aspect of AI interests you most?""" | |
| elif any(word in message_lower for word in ["python", "programming"]): | |
| return """Python is an excellent choice for AI and programming! It's known for its simple, readable syntax and powerful capabilities. | |
| Why Python is great: | |
| • **Easy to learn**: Clear, English-like syntax | |
| • **Versatile**: Web development, AI, data science, automation | |
| • **Rich ecosystem**: Thousands of libraries and frameworks | |
| • **Community**: Large, helpful developer community | |
| For AI/ML specifically: | |
| • **NumPy**: Numerical computing | |
| • **Pandas**: Data manipulation | |
| • **Scikit-learn**: Machine learning algorithms | |
| • **TensorFlow/PyTorch**: Deep learning | |
| Python lets you focus on solving problems rather than wrestling with complex syntax. Are you interested in learning Python for a specific purpose?""" | |
| else: | |
| return f"""I understand you're asking about: "{message}" | |
| I'm currently running in smart mode while the full LLM model loads. I can provide helpful information on topics like: | |
| • **Technology**: AI, machine learning, programming | |
| • **Science**: Data science, computer science concepts | |
| • **Learning**: Programming languages, career advice | |
| • **General**: Explanations, discussions, problem-solving | |
| Could you be more specific about what you'd like to know? I'm here to help and will provide the most useful information I can! | |
| If you're looking for creative writing, storytelling, or very specific technical details, the full LLM model will provide even better responses once it's loaded.""" | |
| def generate_llm_response(message: str, max_length: int = 200, temperature: float = 0.8, top_p: float = 0.9, do_sample: bool = True) -> tuple: | |
| """Generate response using actual LLM model or smart fallback""" | |
| global model, tokenizer, model_loaded, torch_available | |
| if not torch_available: | |
| return get_smart_fallback_response(message), "smart_fallback_mode", len(message.split()) | |
| if not model_loaded or model is None or tokenizer is None: | |
| return get_smart_fallback_response(message), "smart_fallback_mode", len(message.split()) | |
| try: | |
| import torch | |
| # Prepare input with conversation format | |
| input_text = f"Human: {message}\nAssistant:" | |
| # Tokenize input | |
| inputs = tokenizer.encode(input_text, return_tensors="pt") | |
| # Generate response | |
| with torch.no_grad(): | |
| outputs = model.generate( | |
| inputs, | |
| max_length=inputs.shape[1] + max_length, | |
| temperature=temperature, | |
| top_p=top_p, | |
| do_sample=do_sample, | |
| pad_token_id=tokenizer.eos_token_id, | |
| eos_token_id=tokenizer.eos_token_id, | |
| num_return_sequences=1, | |
| repetition_penalty=1.1, | |
| length_penalty=1.0 | |
| ) | |
| # Decode response | |
| response = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| # Extract only the assistant's response | |
| if "Assistant:" in response: | |
| response = response.split("Assistant:")[-1].strip() | |
| # Remove the input text if it's still there | |
| if input_text.replace("Assistant:", "").strip() in response: | |
| response = response.replace(input_text.replace("Assistant:", "").strip(), "").strip() | |
| # Clean up the response | |
| response = response.strip() | |
| if not response or len(response) < 10: | |
| return get_smart_fallback_response(message), "smart_fallback_mode", len(message.split()) | |
| # Count tokens | |
| tokens_used = len(tokenizer.encode(response)) | |
| return response, os.getenv("MODEL_NAME", "microsoft/DialoGPT-small"), tokens_used | |
| except Exception as e: | |
| logger.error(f"Error generating LLM response: {str(e)}") | |
| return get_smart_fallback_response(message), "smart_fallback_mode", len(message.split()) | |
| async def root(): | |
| """Health check endpoint""" | |
| return HealthResponse( | |
| status="healthy", | |
| model_loaded=model_loaded, | |
| timestamp=datetime.now().isoformat() | |
| ) | |
| async def health_check(): | |
| """Detailed health check""" | |
| return HealthResponse( | |
| status="healthy" if model_loaded else "smart_mode", | |
| model_loaded=model_loaded, | |
| timestamp=datetime.now().isoformat() | |
| ) | |
| async def chat( | |
| request: ChatRequest, | |
| user: str = Depends(verify_api_key) | |
| ): | |
| """Main chat endpoint using real LLM model or smart fallback""" | |
| start_time = datetime.now() | |
| try: | |
| # Generate response using actual LLM or smart fallback | |
| response_text, model_used, tokens_used = generate_llm_response( | |
| request.message, | |
| request.max_length, | |
| request.temperature, | |
| request.top_p, | |
| request.do_sample | |
| ) | |
| # Calculate processing time | |
| processing_time = (datetime.now() - start_time).total_seconds() | |
| return ChatResponse( | |
| response=response_text, | |
| model_used=model_used, | |
| timestamp=datetime.now().isoformat(), | |
| processing_time=processing_time, | |
| tokens_used=tokens_used, | |
| model_loaded=model_loaded | |
| ) | |
| except Exception as e: | |
| logger.error(f"Error in chat endpoint: {str(e)}") | |
| # Even if there's an error, provide a helpful response | |
| return ChatResponse( | |
| response="I'm experiencing some technical difficulties, but I'm still here to help! Could you please try rephrasing your question?", | |
| model_used="error_recovery_mode", | |
| timestamp=datetime.now().isoformat(), | |
| processing_time=(datetime.now() - start_time).total_seconds(), | |
| tokens_used=0, | |
| model_loaded=model_loaded | |
| ) | |
| async def get_model_info(user: str = Depends(verify_api_key)): | |
| """Get information about the loaded model""" | |
| return { | |
| "model_name": os.getenv("MODEL_NAME", "microsoft/DialoGPT-small"), | |
| "model_loaded": model_loaded, | |
| "torch_available": torch_available, | |
| "status": "active" if model_loaded else "smart_fallback_mode", | |
| "capabilities": [ | |
| "Real LLM text generation" if model_loaded else "Smart fallback responses", | |
| "Conversational AI responses", | |
| "Dynamic response generation" if model_loaded else "Contextual smart responses", | |
| "Adjustable temperature and top_p" if model_loaded else "Fixed high-quality responses", | |
| "Natural language understanding" | |
| ], | |
| "version": "4.1.0", | |
| "type": "Real LLM Model" if model_loaded else "Smart Fallback Mode" | |
| } | |
| if __name__ == "__main__": | |
| # For Hugging Face Spaces | |
| port = int(os.getenv("PORT", "7860")) | |
| uvicorn.run( | |
| app, | |
| host="0.0.0.0", | |
| port=port, | |
| reload=False | |
| ) | |