Spaces:
Running
Running
| """ | |
| Medical Chatbot using Gemini 1.5 Flash with citation and confidence scoring | |
| """ | |
| import google.generativeai as genai | |
| from google.generativeai import types | |
| from typing import List, Dict, Any | |
| from config import GOOGLE_API_KEY, LLM_MODEL, TOP_K, SIMILARITY_THRESHOLD | |
| from embedding_service import EmbeddingService | |
| class MedicalChatbot: | |
| def __init__(self, embedding_service: EmbeddingService): | |
| """Initialize the medical chatbot""" | |
| self.embedding_service = embedding_service | |
| # Configure Gemini | |
| genai.configure(api_key=GOOGLE_API_KEY) | |
| # Try available model names | |
| model_attempts = [ | |
| "models/gemini-2.5-flash", # Fast and efficient | |
| "models/gemini-2.0-flash", # Alternative fast model | |
| "models/gemini-2.5-pro", # More capable | |
| "models/gemini-flash-latest", | |
| "models/gemini-pro-latest", | |
| ] | |
| self.model = None | |
| for model_name in model_attempts: | |
| try: | |
| self.model = genai.GenerativeModel(model_name) | |
| # Test if it actually works | |
| test_response = self.model.generate_content("test") | |
| print(f"✓ Successfully initialized model: {model_name}") | |
| break | |
| except Exception as e: | |
| print(f"✗ Failed to initialize {model_name}: {str(e)[:80]}") | |
| continue | |
| if self.model is None: | |
| raise Exception("Could not initialize any Gemini model. Please check your API key and model availability.") | |
| # System prompt for medical chatbot | |
| self.system_prompt = """You are a medical information assistant. Based ONLY on the provided medical context, answer the user's question accurately and concisely. | |
| IMPORTANT RULES: | |
| 1. Answer ONLY using information from the provided context below | |
| 2. DO NOT make up or guess information | |
| 3. If the context doesn't contain enough information, say "Based on the available information..." | |
| 4. Be accurate and factual | |
| 5. Keep answers concise and clear | |
| 6. At the end, add a disclaimer: "⚠️ This is not medical advice. Consult healthcare professionals." | |
| """ | |
| def calculate_confidence_score(self, similarity_scores: List[float]) -> tuple: | |
| """Calculate confidence score based on similarity scores""" | |
| if not similarity_scores: | |
| return "Low", 0.0 | |
| avg_score = sum(similarity_scores) / len(similarity_scores) | |
| max_score = max(similarity_scores) | |
| # Confidence based on best match | |
| if max_score >= 0.85: | |
| return "High", max_score | |
| elif max_score >= 0.65: | |
| return "Medium", max_score | |
| else: | |
| return "Low", max_score | |
| def format_context_with_citations(self, results: List[Dict[str, Any]]) -> str: | |
| """Format retrieved context with citations""" | |
| context_parts = [] | |
| citation_map = {} | |
| for idx, result in enumerate(results): | |
| metadata = result.metadata | |
| score = result.score | |
| text = metadata.get('text', '') | |
| citation_id = f"[Source {idx + 1}]" | |
| citation_map[f"Source_{idx + 1}"] = { | |
| 'id': citation_id, | |
| 'text': text[:300] + "..." if len(text) > 300 else text, | |
| 'source': metadata.get('source', 'unknown'), | |
| 'similarity_score': round(score, 3), | |
| 'metadata': metadata | |
| } | |
| # Format the context more clearly | |
| context_parts.append(f"{citation_id}\n{text}\n") | |
| return "".join(context_parts), citation_map | |
| def generate_response(self, user_query: str) -> Dict[str, Any]: | |
| """Generate response to user query with citations and confidence""" | |
| # Check if query is medical-related | |
| is_medical_query = self.is_medical_related(user_query) | |
| if not is_medical_query: | |
| return { | |
| 'response': "I'm a medical assistant. Please ask me medical or health-related questions only.", | |
| 'confidence': "N/A", | |
| 'confidence_score': 0.0, | |
| 'sources': [], | |
| 'citations': {} | |
| } | |
| # Search for relevant documents | |
| results = self.embedding_service.search(user_query, top_k=TOP_K) | |
| if not results.matches: | |
| return { | |
| 'response': "I couldn't find relevant medical information for your query. Please consult with a healthcare professional for accurate medical advice.", | |
| 'confidence': "Low", | |
| 'confidence_score': 0.0, | |
| 'sources': [], | |
| 'citations': {} | |
| } | |
| # Filter results by similarity threshold | |
| filtered_results = [ | |
| r for r in results.matches | |
| if r.score >= SIMILARITY_THRESHOLD | |
| ] | |
| if not filtered_results: | |
| return { | |
| 'response': "I couldn't find enough reliable information for your query. Please consult with a healthcare professional.", | |
| 'confidence': "Low", | |
| 'confidence_score': 0.0, | |
| 'sources': [], | |
| 'citations': {} | |
| } | |
| # Format context with citations | |
| context, citation_map = self.format_context_with_citations(filtered_results) | |
| # Generate response using Gemini | |
| prompt = f"""{self.system_prompt} | |
| MEDICAL CONTEXT FROM DATABASE: | |
| {context} | |
| USER QUESTION: {user_query} | |
| INSTRUCTIONS: | |
| Based on the medical context above, provide a helpful answer to the user's question. | |
| - Use information from the context when available | |
| - If the context has relevant but not exact information, explain what you found | |
| - Be clear and helpful | |
| - End with: "⚠️ This is not medical advice. Consult healthcare professionals." | |
| Answer the question:""" | |
| try: | |
| response = self.model.generate_content( | |
| prompt, | |
| generation_config={ | |
| "temperature": 0.3, # Lower temperature for more factual responses | |
| "top_p": 0.8, | |
| "top_k": 40, | |
| "max_output_tokens": 500, | |
| } | |
| ) | |
| answer = response.text | |
| except Exception as e: | |
| answer = f"Error generating response: {str(e)}" | |
| print(f"DEBUG: Model error: {e}") | |
| print(f"DEBUG: Model object: {self.model}") | |
| # Calculate confidence | |
| similarity_scores = [r.score for r in filtered_results] | |
| confidence_level, confidence_score = self.calculate_confidence_score(similarity_scores) | |
| return { | |
| 'response': answer, | |
| 'confidence': confidence_level, | |
| 'confidence_score': confidence_score, | |
| 'sources': [r.metadata.get('source', 'unknown') for r in filtered_results], | |
| 'citations': citation_map | |
| } | |
| def is_medical_related(self, query: str) -> bool: | |
| """Check if query is medical-related - very permissive""" | |
| query_lower = query.lower() | |
| # Comprehensive medical keywords | |
| medical_keywords = [ | |
| 'health', 'medical', 'disease', 'symptom', 'treatment', 'diagnosis', | |
| 'medicine', 'patient', 'doctor', 'hospital', 'therapy', 'condition', | |
| 'illness', 'sick', 'pain', 'cure', 'medication', 'physician', | |
| 'nurse', 'clinical', 'healthcare', 'surgery', 'cure', 'heal', | |
| 'blood', 'heart', 'lung', 'brain', 'cancer', 'diabetes', 'covid', | |
| 'vaccine', 'pandemic', 'infection', 'fever', 'cough', 'ache', | |
| 'eye', 'vision', 'irritation', 'red', 'tear', 'dry', 'irritated', | |
| 'head', 'headache', 'stomach', 'nausea', 'dizzy', 'tired', | |
| 'chest', 'breathing', 'breath', 'wheeze', 'nose', 'runny', | |
| 'ear', 'throat', 'sore', 'inflam', 'swell', 'burn', 'itch', | |
| 'suffering', 'problem', 'issue', 'hurt', 'injury', 'wound' | |
| ] | |
| # Accept any query that contains medical keywords or looks like a medical concern | |
| has_medical_keyword = any(keyword in query_lower for keyword in medical_keywords) | |
| # Also accept questions with medical-sounding patterns | |
| medical_patterns = [ | |
| 'i have', 'i am suffering', 'i feel', 'why do i', 'what should i', | |
| 'why is', 'how to', 'how can i', 'what causes' | |
| ] | |
| has_medical_pattern = any(pattern in query_lower for pattern in medical_patterns) | |
| # Be permissive - if it sounds like a medical concern, accept it | |
| return has_medical_keyword or has_medical_pattern | |