# medical_ai.py - VERSION COMPETITION OPTIMISÉE AVEC MEDGEMMA import os import json import numpy as np from typing import List, Dict, Any from sentence_transformers import SentenceTransformer import faiss from functools import lru_cache from transformers import NllbTokenizer, AutoModelForSeq2SeqLM, pipeline from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig import torch from typing import Optional import logging import re # Set up logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # === CONFIGURATION COMPÉTITION OPTIMISÉE === EMBEDDING_MODEL_NAME = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2" NLLB_MODEL_NAME = "facebook/nllb-200-distilled-600M" # MODÈLE PRINCIPAL - BioGPT (CPU-friendly, medical-specific) MODEL_NAME = "microsoft/BioGPT" PATIENT_RECORDS_PATH = "patient_records.json" # Configuration optimisée pour performance maximale DEVICE = "cuda" if torch.cuda.is_available() else "cpu" MAX_LENGTH = 512 TEMPERATURE = 0.7 TOP_P = 0.9 TOP_K = 50 # Configuration quantification USE_4BIT = True # Utilise 4-bit par défaut, mettre False pour 8-bit USE_8BIT = False # Sera activé automatiquement si USE_4BIT = False # === 1. DÉTECTION DE LANGUE AVANCÉE === class AdvancedLanguageDetector: def __init__(self): try: # Utilise un modèle plus précis pour la détection self.lang_id = pipeline("text-classification", model="papluca/xlm-roberta-base-language-detection", device=-1) # Force CPU self.lang_map = { 'fr': 'fr', 'en': 'en', 'bss': 'bss', 'dua': 'dua', 'ewo': 'ewo', 'fr-FR': 'fr', 'en-EN': 'en', 'fr_XX': 'fr', 'en_XX': 'en', 'LABEL_0': 'en', 'LABEL_1': 'fr' # Fallbacks } logger.info("Advanced language detector initialized") except Exception as e: logger.error(f"Error initializing language detector: {str(e)}") self.lang_id = None @lru_cache(maxsize=256) def detect_language(self, text: str) -> str: if not text.strip(): return 'en' # Always use model prediction if available, only fallback to keywords if model fails if self.lang_id: try: pred = self.lang_id(text)[0] detected = pred['label'] if isinstance(pred, dict) else str(pred) confidence = pred.get('score', 0.5) if isinstance(pred, dict) else 0.5 # Lower threshold to 0.6 (was 0.8) # But always use model prediction if it returns a label return self.lang_map.get(detected, 'en') except Exception as e: logger.warning(f"Language model failed, falling back to keyword detection: {e}") return self._keyword_detection(text) return self._keyword_detection(text) def _keyword_detection(self, text: str) -> str: """Détection par mots-clés comme fallback""" french_indicators = [ 'que', 'quoi', 'comment', 'pourquoi', 'symptômes', 'maladie', 'traitement', 'médecin', 'santé', 'bonjour', 'avez-vous', 'je', 'vous', 'nous', 'mal', 'fièvre', 'douleur', 'depuis', 'combien', 'où', 'quand', 'le', 'la', 'les', 'des', 'avec', 'sans', 'est-ce', 'cela', 'cette', 'mon', 'ma', 'ta', 'ton', 'tes', 'notre', 'votre', 'leurs', 'être', 'avoir', 'faire', 'pouvez-vous', 'aider', 'malade', 'hôpital', 'urgence', 'soin', 'traiter', 'guérir', 'maladies', 'symptome', 'symptômes', 'consultation', 'prescription', 'ordonnance', 'analyse', 'prise de sang', 'toux', 'rhume', 'grippe', 'température', 'douleurs', 'vomissement', 'diarrhée', 'fatigue', 'enfant', 'adulte', 'femme', 'homme', 'grossesse', 'enceinte', 'accouchement', 'bébé', 'vaccin', 'vaccination', 'piqûre', 'piqure', 'allergie', 'antibiotique', 'antalgique', 'paracétamol', 'ibuprofène', 'aspirine', 'pharmacie', 'pharmacien', 'consulté', 'consultation', 'urgence', 'urgence médicale', 'urgence vitale', 'urgence absolue' ] english_indicators = ['what', 'how', 'why', 'symptoms', 'disease', 'treatment', 'doctor', 'health'] text_lower = text.lower() fr_score = sum(2 if indicator in text_lower else 0 for indicator in french_indicators) en_score = sum(2 if indicator in text_lower else 0 for indicator in english_indicators) return 'fr' if fr_score > en_score else 'en' # === 2. TRADUCTION OPTIMISÉE === class OptimizedTranslator: def __init__(self, model_name=NLLB_MODEL_NAME): try: self.tokenizer = NllbTokenizer.from_pretrained(model_name) self.model = AutoModelForSeq2SeqLM.from_pretrained( model_name, torch_dtype=torch.float32, # CPU optimized low_cpu_mem_usage=True ) self.lang_code_map = { 'fr': 'fra_Latn', 'en': 'eng_Latn', 'bss': 'bss_Latn', 'dua': 'dua_Latn', 'ewo': 'ewo_Latn', } logger.info("Optimized translator initialized") except Exception as e: logger.error(f"Error initializing translator: {str(e)}") self.tokenizer = None self.model = None @lru_cache(maxsize=256) def translate(self, text: str, source_lang: str, target_lang: str) -> str: if not text.strip() or source_lang == target_lang: return text if self.tokenizer is None or self.model is None: return text try: src = self.lang_code_map.get(source_lang, 'eng_Latn') tgt = self.lang_code_map.get(target_lang, 'eng_Latn') self.tokenizer.src_lang = src inputs = self.tokenizer(text, return_tensors="pt", max_length=512, truncation=True) with torch.no_grad(): generated_tokens = self.model.generate( **inputs, forced_bos_token_id=self.tokenizer.convert_tokens_to_ids(tgt), max_length=512, num_beams=4, # Améliore la qualité early_stopping=True ) result = self.tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0] return result except Exception as e: logger.error(f"Translation error: {str(e)}") return text # === 3. RAG MÉDICAL AVANCÉ === class AdvancedMedicalRAG: def __init__(self, embedding_model_name=EMBEDDING_MODEL_NAME, records_path=PATIENT_RECORDS_PATH): try: self.embedder = SentenceTransformer(embedding_model_name) if not os.path.exists(records_path): logger.warning(f"Creating sample medical records...") self._create_sample_records(records_path) with open(records_path, 'r', encoding='utf-8') as f: self.records = json.load(f) # Construction d'indices spécialisés self.medical_chunks = [] self.educational_chunks = [] self.emergency_chunks = [] self.prevention_chunks = [] self._build_specialized_chunks() # Indices FAISS multiples pour différents types de requêtes self.medical_index, _ = self._build_faiss_index(self.medical_chunks) self.edu_index, _ = self._build_faiss_index(self.educational_chunks) self.emergency_index, _ = self._build_faiss_index(self.emergency_chunks) self.prevention_index, _ = self._build_faiss_index(self.prevention_chunks) logger.info(f"Advanced RAG initialized: {len(self.medical_chunks)} medical, " f"{len(self.educational_chunks)} educational, {len(self.emergency_chunks)} emergency chunks") except Exception as e: logger.error(f"Error initializing Advanced RAG: {str(e)}") self._initialize_fallback() def _create_sample_records(self, path: str): """Crée des enregistrements médicaux de base pour la compétition""" sample_records = [ { "id": "malaria_001", "diagnosis": {"en": "Malaria (Plasmodium falciparum)", "fr": "Paludisme (Plasmodium falciparum)"}, "symptoms": {"en": "High fever, chills, headache, nausea, vomiting, fatigue", "fr": "Fièvre élevée, frissons, maux de tête, nausées, vomissements, fatigue"}, "medications": [{"name": {"en": "Artemether-Lumefantrine", "fr": "Artéméther-Luméfantrine"}, "dosage": "20mg/120mg twice daily for 3 days"}], "care_instructions": {"en": "Complete bed rest, increase fluid intake, complete full medication course, return if symptoms worsen or fever persists after 48 hours", "fr": "Repos complet au lit, augmenter l'apport hydrique, terminer le traitement complet, revenir si les symptômes s'aggravent ou si la fièvre persiste après 48 heures"} }, { "id": "diabetes_prevention", "context_type": "prevention", "topic": {"en": "Type 2 Diabetes Prevention", "fr": "Prévention du Diabète de Type 2"}, "educational_content": {"en": "Maintain healthy BMI (18.5-24.9), engage in 150 minutes moderate exercise weekly, consume balanced diet rich in fiber and low in processed sugars, regular blood glucose monitoring for high-risk individuals", "fr": "Maintenir un IMC sain (18,5-24,9), pratiquer 150 minutes d'exercice modéré par semaine, consommer une alimentation équilibrée riche en fibres et pauvre en sucres transformés, surveillance régulière de la glycémie pour les personnes à risque"}, "target_group": "Adults over 30, family history of diabetes, sedentary lifestyle" } ] with open(path, 'w', encoding='utf-8') as f: json.dump(sample_records, f, ensure_ascii=False, indent=2) def _initialize_fallback(self): """Initialise un système de fallback basique""" self.medical_chunks = ["General medical consultation and symptom assessment"] self.educational_chunks = ["Health education and prevention guidelines"] self.emergency_chunks = ["Emergency medical procedures and protocols"] self.prevention_chunks = ["Disease prevention and health maintenance"] self.medical_index = None self.edu_index = None self.emergency_index = None self.prevention_index = None def _build_specialized_chunks(self): """Construit des chunks spécialisés pour différents types de requêtes médicales""" for rec in self.records: try: # Chunks médicaux (diagnostics, traitements) if 'diagnosis' in rec: medical_parts = [] medical_parts.append(f"Condition: {rec['diagnosis'].get('en', '')}") if 'symptoms' in rec: medical_parts.append(f"Symptoms: {rec['symptoms'].get('en', '')}") if 'medications' in rec: meds = [f"{m['name'].get('en', '')} ({m.get('dosage', '')})" for m in rec['medications']] medical_parts.append(f"Treatment: {', '.join(meds)}") if 'care_instructions' in rec: medical_parts.append(f"Care instructions: {rec['care_instructions'].get('en', '')}") if medical_parts: self.medical_chunks.append(". ".join(medical_parts)) # Chunks éducatifs if rec.get('context_type') == 'prevention' or 'educational_content' in rec: edu_parts = [] if 'topic' in rec: edu_parts.append(f"Topic: {rec['topic'].get('en', '')}") if 'educational_content' in rec: edu_parts.append(f"Information: {rec['educational_content'].get('en', '')}") if 'target_group' in rec: edu_parts.append(f"Target: {rec['target_group']}") if edu_parts: chunk = ". ".join(edu_parts) self.educational_chunks.append(chunk) if 'prevention' in chunk.lower(): self.prevention_chunks.append(chunk) # Chunks d'urgence if rec.get('context_type') == 'emergency_education' or 'emergency' in str(rec).lower(): emergency_parts = [] if 'scenario' in rec: emergency_parts.append(f"Emergency: {rec['scenario'].get('en', '')}") if 'action_steps' in rec: emergency_parts.append(f"Actions: {rec['action_steps'].get('en', '')}") if emergency_parts: self.emergency_chunks.append(". ".join(emergency_parts)) except Exception as e: logger.error(f"Error processing record: {str(e)}") continue def _build_faiss_index(self, chunks): if not chunks: return None, None try: embeddings = self.embedder.encode(chunks, show_progress_bar=False, convert_to_numpy=True) index = faiss.IndexFlatL2(embeddings.shape[1]) index.add(embeddings) return index, embeddings except Exception as e: logger.error(f"Error building FAISS index: {str(e)}") return None, None def get_smart_contexts(self, question: str, lang: str = "en") -> Dict[str, List[str]]: """Récupère des contextes intelligents basés sur le type de question""" question_lower = question.lower() contexts = { "medical": [], "educational": [], "emergency": [], "prevention": [] } try: q_emb = self.embedder.encode([question], convert_to_numpy=True) # Détection du type de question is_emergency = any(word in question_lower for word in ['emergency', 'urgent', 'severe', 'critical', 'urgence', 'grave']) is_prevention = any(word in question_lower for word in ['prevent', 'prevention', 'avoid', 'prévenir', 'éviter']) is_educational = any(word in question_lower for word in ['what is', 'explain', 'how', 'why', "qu'est-ce que", 'expliquer', 'comment', 'pourquoi']) # Récupération contextuelle intelligente if is_emergency and self.emergency_index: _, I = self.emergency_index.search(q_emb, min(3, len(self.emergency_chunks))) contexts["emergency"] = [self.emergency_chunks[i] for i in I[0] if i < len(self.emergency_chunks)] if is_prevention and self.prevention_index: _, I = self.prevention_index.search(q_emb, min(2, len(self.prevention_chunks))) contexts["prevention"] = [self.prevention_chunks[i] for i in I[0] if i < len(self.prevention_chunks)] if is_educational and self.edu_index: _, I = self.edu_index.search(q_emb, min(3, len(self.educational_chunks))) contexts["educational"] = [self.educational_chunks[i] for i in I[0] if i < len(self.educational_chunks)] # Toujours inclure du contexte médical général if self.medical_index: n_med = 4 if not any(contexts.values()) else 2 _, I = self.medical_index.search(q_emb, min(n_med, len(self.medical_chunks))) contexts["medical"] = [self.medical_chunks[i] for i in I[0] if i < len(self.medical_chunks)] except Exception as e: logger.error(f"Error getting smart contexts: {str(e)}") return contexts # === 4. GÉNÉRATEUR LLM OPTIMISÉ AVEC MISTRAL 7B INSTRUCT === class CompetitionMedicalLLM: def __init__(self, model_name: str = MODEL_NAME): self.device = DEVICE logger.info(f"Loading BioGPT model {model_name} on {self.device}...") try: hf_token = os.environ.get("HUGGINGFACE_TOKEN") or os.environ.get("HF_TOKEN") self.tokenizer = AutoTokenizer.from_pretrained( model_name, token=hf_token, trust_remote_code=True ) if self.tokenizer.pad_token is None: self.tokenizer.pad_token = self.tokenizer.eos_token model_kwargs = { "token": hf_token, "trust_remote_code": True, "low_cpu_mem_usage": True, "torch_dtype": torch.float32, } if DEVICE == "cuda": model_kwargs["device_map"] = "auto" model_kwargs["torch_dtype"] = torch.float16 self.model = AutoModelForCausalLM.from_pretrained(model_name, **model_kwargs) pipeline_kwargs = { "model": self.model, "tokenizer": self.tokenizer, "torch_dtype": torch.float16 if DEVICE == "cuda" else torch.float32, "device_map": "auto" if DEVICE == "cuda" else None, } if DEVICE == "cpu": pipeline_kwargs["device"] = -1 self.generator = pipeline("text-generation", **pipeline_kwargs) logger.info(f"BioGPT model loaded successfully on {self.device}") except Exception as e: logger.error(f"Error loading BioGPT model: {str(e)}") self.generator = None def _build_biogpt_prompt(self, question: str, contexts: Dict[str, List[str]], lang: str = "en") -> str: # Interactive medical consultation prompt with conversational AI context_str = "\n".join([f"- {c}" for ctxs in contexts.values() for c in ctxs]) prompt = ( "You are a caring medical assistant having a conversation with a patient. " "You can ask follow-up questions to gather more information, just like a real doctor would. " "Structure your response as a natural conversation:\n" "1. Acknowledge their concern with empathy\n" "2. Ask relevant follow-up questions to understand their symptoms better\n" "3. Based on their answers, provide:\n" " - SYMPTOMS: What they're experiencing\n" " - POSSIBLE CAUSES: What might be causing this\n" " - TREATMENT: Medications and dosages (if applicable)\n" " - ADVICE: Practical relief suggestions\n" " - REFERRAL: When to see a doctor\n" "Be conversational, warm, and professional. Use simple language. " "Consider Cameroonian healthcare context. " "End with: 'This is an AI-generated answer. Please always contact a doctor for more precision.'\n" f"Available medical information:\n{context_str}\n" f"Patient: {question}\n" "Your conversational response:" ) return prompt def generate_expert_response(self, question: str, contexts: Dict[str, List[str]], lang: str = "en") -> str: if self.generator is None: logger.error("LLM generator is None! The model may have failed to load. Returning fallback response.") return self._expert_fallback_response(question, contexts, lang) try: prompt = self._build_biogpt_prompt(question, contexts, lang) generation_config = { "max_new_tokens": 256, "temperature": TEMPERATURE, "top_p": TOP_P, "top_k": TOP_K, "do_sample": True, "pad_token_id": self.tokenizer.eos_token_id, "repetition_penalty": 1.1, "length_penalty": 1.0, } response = self.generator(prompt, **generation_config) full_text = response[0]['generated_text'] logger.info(f"[BIOGPT RAW OUTPUT] {full_text}") # Remove the prompt from the output if present if full_text.startswith(prompt): response_text = full_text[len(prompt):].strip() else: response_text = full_text.strip() return response_text except Exception as e: logger.error(f"Error in BioGPT generation: {str(e)}. Returning fallback response.") return self._expert_fallback_response(question, contexts, lang) def _medgemma_post_process(self, response: str, lang: str) -> str: """Post-traitement spécialisé pour MedGemma""" # Nettoyage des tokens spéciaux MedGemma cleanup_patterns = [ r".*", r".*", r"", r"", r"\n\nuser:", r"\n\nmodel:", r"\n\nPatient:", r"\n\nDoctor:", ] for pattern in cleanup_patterns: response = re.sub(pattern, "", response, flags=re.DOTALL).strip() # Suppression des répétitions lines = response.split('\n') unique_lines = [] for line in lines: if line.strip() and line.strip() not in [l.strip() for l in unique_lines]: unique_lines.append(line) response = '\n'.join(unique_lines) # Vérification de la qualité de la réponse if not response.strip() or len(response.strip()) < 10: response = "[Réponse générée insuffisante par MedGemma]" # Ajout du disclaimer médical si nécessaire disclaimer = { "en": "\n\n⚕️ Medical Disclaimer: This information is for educational purposes. Always consult a qualified healthcare professional for proper diagnosis and treatment.", "fr": "\n\n⚕️ Avertissement médical: Cette information est à des fins éducatives. Consultez toujours un professionnel de santé qualifié pour un diagnostic et traitement appropriés." } if "consult" not in response.lower() and "disclaimer" not in response.lower() and len(response) > 50: response += disclaimer.get(lang, disclaimer["en"]) return response.strip() def _expert_fallback_response(self, question: str, contexts: Dict[str, List[str]], lang: str) -> str: """Réponse de fallback de niveau expert, amicale et dans la langue détectée""" templates = { "en": { "intro": "Thank you for your medical question! I'm here to help with professional healthcare guidance.", "structure": ( "\n\n🔍 Assessment: Your question requires careful medical attention and professional evaluation." "\n\n💡 General Advice: Monitor your symptoms, maintain good health practices, and don't hesitate to seek professional medical care." "\n\n⚠️ Important: For accurate diagnosis and treatment, please consult a qualified healthcare provider. Take care!" ), "context_available": "Based on medical information: " }, "fr": { "intro": "Merci pour votre question médicale ! Je suis là pour vous aider avec des conseils de santé professionnels.", "structure": ( "\n\n🔍 Évaluation : Votre question nécessite une attention médicale attentive et une évaluation professionnelle." "\n\n💡 Conseil général : Surveillez vos symptômes, maintenez de bonnes pratiques de santé, et n'hésitez pas à consulter un professionnel de santé." "\n\n⚠️ Important : Pour un diagnostic et un traitement précis, veuillez consulter un professionnel de santé qualifié. Prenez soin de vous !" ), "context_available": "Selon les informations médicales : " } } template = templates.get(lang, templates["en"]) response = template["intro"] # Ajout du contexte disponible all_contexts = [] for context_list in contexts.values(): all_contexts.extend(context_list) if all_contexts: response += f" {template['context_available']}{' | '.join(all_contexts[:2])}" response += template["structure"] return response # === PIPELINE PRINCIPAL COMPÉTITION AVEC MEDGEMMA === class CompetitionMedicalAIPipeline: def __init__(self): logger.info("🏆 Initializing COMPETITION Medical AI Pipeline with MedGemma-4B...") try: self.lang_detector = AdvancedLanguageDetector() self.translator = OptimizedTranslator() self.rag = AdvancedMedicalRAG() self.llm = CompetitionMedicalLLM() logger.info("🎯 Competition Medical AI Pipeline with MedGemma-4B ready for excellence!") except Exception as e: logger.error(f"Error initializing competition pipeline: {str(e)}") raise def process(self, question: str, user_lang: str = "auto", conversation_history: list = None) -> Dict[str, Any]: """Traitement de niveau compétition avec MedGemma-4B""" try: if not question or not question.strip(): return self._empty_question_response(user_lang) # Détection langue avancée detected_lang = self.lang_detector.detect_language(question) if user_lang == "auto" else user_lang logger.info(f"🎯 Processing competition-level question in {detected_lang} with MedGemma-4B") # Traduction si nécessaire avec qualité optimale question_en = question if detected_lang != "en": question_en = self.translator.translate(question, detected_lang, "en") # RAG intelligent multi-contexte smart_contexts = self.rag.get_smart_contexts(question_en, "en") # Génération experte avec MedGemma try: response_en = self.llm.generate_expert_response(question_en, smart_contexts, "en") except Exception as e: logger.error(f"MedGemma generation failed: {str(e)}. Using fallback.") response_en = self._expert_fallback_response(question, smart_contexts, "en") # Gestion des réponses vides de MedGemma if (response_en.strip() == "[Réponse générée insuffisante par MedGemma]" or response_en.strip() == "[Aucune réponse générée par le modèle]" or len(response_en.strip()) < 20): logger.warning("MedGemma generated insufficient response, using enhanced fallback.") response_en = self._enhanced_fallback_response(question, smart_contexts, "en") # Traduction retour avec qualité optimale final_response = response_en if detected_lang != "en": final_response = self.translator.translate(response_en, "en", detected_lang) # Contextes utilisés pour transparence all_contexts = [] for context_list in smart_contexts.values(): all_contexts.extend(context_list) if detected_lang != "en" and all_contexts: all_contexts = [self.translator.translate(ctx, "en", detected_lang) for ctx in all_contexts] return { "response": final_response, "source_lang": detected_lang, "context_used": all_contexts[:5], # Top 5 contextes "confidence": "high", # Indicateur de qualité "model_used": "MedGemma-4B-IT" } except Exception as e: logger.error(f"Competition processing error: {str(e)}") # Always use detected_lang for fallback, default to 'en' if not available fallback_lang = detected_lang if 'detected_lang' in locals() and detected_lang in ["en", "fr"] else "en" fallback_response = self._expert_fallback_response(question, {}, fallback_lang) return { "response": fallback_response, "source_lang": fallback_lang, "context_used": [], "confidence": "medium", "model_used": "Fallback" } def _enhanced_fallback_response(self, question: str, contexts: Dict[str, List[str]], lang: str) -> str: """Réponse de fallback améliorée quand MedGemma échoue""" templates = { "en": { "intro": "Thank you for your medical question. Based on general medical knowledge:", "general_advice": { "symptoms": "If you're experiencing concerning symptoms, it's important to monitor them closely and seek medical attention if they worsen or persist.", "prevention": "Prevention is key to maintaining good health. Regular check-ups, healthy lifestyle choices, and following medical guidelines are essential.", "treatment": "Treatment approaches vary depending on the specific condition. A healthcare professional can provide personalized recommendations.", "emergency": "For any urgent medical concerns, please seek immediate medical attention or contact emergency services.", "default": "Medical questions require careful evaluation by qualified healthcare professionals who can assess your specific situation." }, "disclaimer": "\n\n⚕️ Important: This general information cannot replace professional medical advice. Please consult a qualified healthcare provider for proper diagnosis and treatment." }, "fr": { "intro": "Merci pour votre question médicale. Selon les connaissances médicales générales :", "general_advice": { "symptoms": "Si vous ressentez des symptômes préoccupants, il est important de les surveiller attentivement et de consulter un médecin si ils s'aggravent ou persistent.", "prevention": "La prévention est essentielle pour maintenir une bonne santé. Des contrôles réguliers, des choix de vie sains et le respect des directives médicales sont essentiels.", "treatment": "Les approches de traitement varient selon l'état spécifique. Un professionnel de santé peut fournir des recommandations personnalisées.", "emergency": "Pour toute préoccupation médicale urgente, veuillez consulter immédiatement un médecin ou contacter les services d'urgence.", "default": "Les questions médicales nécessitent une évaluation attentive par des professionnels de santé qualifiés qui peuvent évaluer votre situation spécifique." }, "disclaimer": "\n\n⚕️ Important : Cette information générale ne peut remplacer un avis médical professionnel. Veuillez consulter un professionnel de santé qualifié pour un diagnostic et un traitement appropriés." } } template = templates.get(lang, templates["en"]) question_lower = question.lower() # Sélection du conseil basé sur le type de question advice_key = "default" if any(word in question_lower for word in ['symptom', 'feel', 'pain', 'hurt', 'symptôme', 'douleur', 'mal']): advice_key = "symptoms" elif any(word in question_lower for word in ['prevent', 'avoid', 'prévenir', 'éviter']): advice_key = "prevention" elif any(word in question_lower for word in ['treat', 'cure', 'medication', 'traitement', 'médicament']): advice_key = "treatment" elif any(word in question_lower for word in ['emergency', 'urgent', 'severe', 'urgence', 'grave']): advice_key = "emergency" response = template["intro"] # Ajout du contexte si disponible all_contexts = [] for context_list in contexts.values(): all_contexts.extend(context_list) if all_contexts: context_summary = " | ".join(all_contexts[:2]) response += f"\n\n📋 Contexte médical pertinent : {context_summary}" response += f"\n\n💡 {template['general_advice'][advice_key]}" response += template["disclaimer"] return response def _empty_question_response(self, user_lang: str) -> Dict[str, Any]: """Réponse pour question vide""" responses = { "en": "Please provide a medical question for me to assist you with professional healthcare guidance using MedGemma-4B.", "fr": "Veuillez poser une question médicale pour que je puisse vous fournir des conseils de santé professionnels avec MedGemma-4B." } lang = user_lang if user_lang != "auto" else "en" return { "response": responses.get(lang, responses["en"]), "source_lang": lang, "context_used": [], "confidence": "high", "model_used": "MedGemma-4B-IT" } def _expert_fallback_response(self, question: str, contexts: Dict[str, List[str]], lang: str) -> str: """Réponse de fallback de niveau expert""" templates = { "en": { "intro": "Thank you for your medical question! I'm here to help with professional healthcare guidance using MedGemma-4B.", "structure": ( "\n\n🔍 Assessment: Your question requires careful medical attention and professional evaluation." "\n\n💡 General Guidance: Please monitor your symptoms, maintain good health practices, and seek professional medical care when needed." "\n\n⚠️ Important: For accurate diagnosis and treatment, always consult a qualified healthcare provider. Your health is important!" ), "context_available": "Based on available medical information: " }, "fr": { "intro": "Merci pour votre question médicale ! Je suis là pour vous aider avec des conseils de santé professionnels utilisant MedGemma-4B.", "structure": ( "\n\n🔍 Évaluation : Votre question nécessite une attention médicale attentive et une évaluation professionnelle." "\n\n💡 Conseil général : Surveillez vos symptômes, maintenez de bonnes pratiques de santé, et consultez un professionnel médical quand nécessaire." "\n\n⚠️ Important : Pour un diagnostic et un traitement précis, consultez toujours un professionnel de santé qualifié. Votre santé est importante !" ), "context_available": "Selon les informations médicales disponibles : " } } template = templates.get(lang, templates["en"]) response = template["intro"] # Ajout du contexte disponible all_contexts = [] for context_list in contexts.values(): all_contexts.extend(context_list) if all_contexts: response += f" {template['context_available']}{' | '.join(all_contexts[:2])}" response += template["structure"] return response def get_model_info(self) -> Dict[str, Any]: """Retourne des informations sur le modèle utilisé""" return { "model_name": MODEL_NAME, "quantization": "4-bit" if USE_4BIT else "8-bit" if USE_8BIT else "full", "device": DEVICE, "max_length": MAX_LENGTH, "temperature": TEMPERATURE } # Alias pour compatibilité MedicalAIPipeline = CompetitionMedicalAIPipeline