|
|
import os |
|
|
import logging |
|
|
from datetime import datetime |
|
|
from typing import List |
|
|
from docx import Document |
|
|
from docx.shared import Inches |
|
|
from docx.enum.style import WD_STYLE_TYPE |
|
|
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT |
|
|
from docx.shared import RGBColor |
|
|
from docx.oxml.shared import OxmlElement, qn |
|
|
|
|
|
|
|
|
from template_matcher import TemplateMatcher, TemplateMatch |
|
|
|
|
|
from dotenv import load_dotenv |
|
|
|
|
|
|
|
|
load_dotenv() |
|
|
|
|
|
DB_PATH = os.getenv("TEMPLATE_DB_PATH", "templates/medical_templates.pkl") |
|
|
OUTPUT_DIR = os.getenv("OUTPUT_DIR", "templates_remplis") |
|
|
|
|
|
class TemplateGenerator: |
|
|
"""Génère des templates médicaux remplis au format .doc""" |
|
|
|
|
|
def __init__(self): |
|
|
"""Initialise le générateur de templates""" |
|
|
|
|
|
self.output_dir = OUTPUT_DIR |
|
|
self._create_output_directory() |
|
|
|
|
|
|
|
|
logging.basicConfig( |
|
|
level=logging.INFO, |
|
|
format='%(asctime)s - %(levelname)s - [GENERATOR] %(message)s' |
|
|
) |
|
|
|
|
|
def _create_output_directory(self): |
|
|
"""Crée le répertoire de sortie s'il n'existe pas""" |
|
|
if not os.path.exists(self.output_dir): |
|
|
os.makedirs(self.output_dir) |
|
|
logging.info(f"📁 Répertoire de sortie créé: {self.output_dir}") |
|
|
|
|
|
def _add_custom_styles(self, doc: Document): |
|
|
"""Ajoute des styles personnalisés au document""" |
|
|
styles = doc.styles |
|
|
|
|
|
|
|
|
try: |
|
|
section_style = styles.add_style('Section Title', WD_STYLE_TYPE.PARAGRAPH) |
|
|
section_style.font.size = Inches(0.16) |
|
|
section_style.font.bold = True |
|
|
section_style.font.color.rgb = RGBColor(0, 51, 102) |
|
|
section_style.paragraph_format.space_after = Inches(0.1) |
|
|
section_style.paragraph_format.keep_with_next = True |
|
|
except: |
|
|
logging.warning("Style 'Section Title' déjà existant") |
|
|
|
|
|
|
|
|
try: |
|
|
content_style = styles.add_style('Section Content', WD_STYLE_TYPE.PARAGRAPH) |
|
|
content_style.font.size = Inches(0.14) |
|
|
content_style.paragraph_format.left_indent = Inches(0.25) |
|
|
content_style.paragraph_format.space_after = Inches(0.15) |
|
|
except: |
|
|
logging.warning("Style 'Section Content' déjà existant") |
|
|
|
|
|
|
|
|
try: |
|
|
header_style = styles.add_style('Document Header', WD_STYLE_TYPE.PARAGRAPH) |
|
|
header_style.font.size = Inches(0.18) |
|
|
header_style.font.bold = True |
|
|
header_style.font.color.rgb = RGBColor(0, 0, 0) |
|
|
header_style.paragraph_format.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER |
|
|
header_style.paragraph_format.space_after = Inches(0.2) |
|
|
except: |
|
|
logging.warning("Style 'Document Header' déjà existant") |
|
|
|
|
|
def _add_document_header(self, doc: Document, template_match: TemplateMatch, transcription_filename: str): |
|
|
"""Ajoute l'en-tête du document""" |
|
|
|
|
|
header = doc.add_paragraph() |
|
|
header.style = 'Document Header' |
|
|
header.add_run("COMPTE-RENDU MÉDICAL GÉNÉRÉ AUTOMATIQUEMENT") |
|
|
|
|
|
|
|
|
info_paragraph = doc.add_paragraph() |
|
|
info_paragraph.add_run("Template utilisé: ").bold = True |
|
|
info_paragraph.add_run(os.path.basename(template_match.template_info.filepath)) |
|
|
|
|
|
|
|
|
if template_match.template_info.medecin and template_match.template_info.medecin != "Non identifié": |
|
|
medecin_para = doc.add_paragraph() |
|
|
medecin_para.add_run("Médecin: ").bold = True |
|
|
medecin_para.add_run(template_match.template_info.medecin) |
|
|
|
|
|
centre = getattr(template_match.template_info, 'centre_medical', 'Non spécifié') |
|
|
if centre and centre != "Non spécifié": |
|
|
centre_para = doc.add_paragraph() |
|
|
centre_para.add_run("Centre médical: ").bold = True |
|
|
centre_para.add_run(centre) |
|
|
|
|
|
|
|
|
type_para = doc.add_paragraph() |
|
|
type_para.add_run("Type de document: ").bold = True |
|
|
type_para.add_run(template_match.template_info.type) |
|
|
|
|
|
|
|
|
generation_para = doc.add_paragraph() |
|
|
generation_para.add_run("Date de génération: ").bold = True |
|
|
generation_para.add_run(datetime.now().strftime("%d/%m/%Y à %H:%M")) |
|
|
|
|
|
score_para = doc.add_paragraph() |
|
|
score_para.add_run("Score de correspondance: ").bold = True |
|
|
score_para.add_run(f"{template_match.overall_score:.3f} ({template_match.confidence_level})") |
|
|
|
|
|
filling_para = doc.add_paragraph() |
|
|
filling_para.add_run("Pourcentage de remplissage: ").bold = True |
|
|
filling_para.add_run(f"{template_match.filling_percentage:.1f}%") |
|
|
|
|
|
|
|
|
doc.add_paragraph("_" * 80) |
|
|
|
|
|
def _add_filled_sections(self, doc: Document, template_match: TemplateMatch): |
|
|
"""Ajoute les sections remplies au document""" |
|
|
if not template_match.extracted_data: |
|
|
logging.warning("❌ Aucune section à remplir trouvée") |
|
|
doc.add_paragraph("Aucune section n'a pu être remplie automatiquement.") |
|
|
return |
|
|
|
|
|
logging.info(f"📝 Génération de {len(template_match.extracted_data)} sections remplies") |
|
|
|
|
|
|
|
|
sections_title = doc.add_paragraph() |
|
|
sections_title.add_run("CONTENU EXTRAIT ET STRUCTURÉ").bold = True |
|
|
sections_title.add_run().font.size = Inches(0.18) |
|
|
|
|
|
for section_name, content in template_match.extracted_data.items(): |
|
|
|
|
|
section_title = doc.add_paragraph() |
|
|
section_title.style = 'Section Title' |
|
|
section_title.add_run(f"{section_name.upper()}") |
|
|
|
|
|
|
|
|
section_content = doc.add_paragraph() |
|
|
section_content.style = 'Section Content' |
|
|
section_content.add_run(content) |
|
|
|
|
|
logging.info(f" ✅ Section ajoutée: {section_name} ({len(content)} caractères)") |
|
|
|
|
|
def _add_missing_sections(self, doc: Document, template_match: TemplateMatch): |
|
|
"""Ajoute les sections manquantes au document""" |
|
|
missing_sections = [s.section_name for s in template_match.section_matches.values() if not s.can_fill] |
|
|
|
|
|
if missing_sections: |
|
|
logging.info(f"⚠️ {len(missing_sections)} sections manquantes identifiées") |
|
|
|
|
|
|
|
|
missing_title = doc.add_paragraph() |
|
|
missing_title.add_run("SECTIONS NON REMPLIES").bold = True |
|
|
missing_title.add_run().font.color.rgb = RGBColor(204, 102, 0) |
|
|
|
|
|
missing_subtitle = doc.add_paragraph() |
|
|
missing_subtitle.add_run("(Informations non trouvées dans la transcription)") |
|
|
missing_subtitle.add_run().font.color.rgb = RGBColor(102, 102, 102) |
|
|
|
|
|
for section in missing_sections: |
|
|
missing_para = doc.add_paragraph() |
|
|
missing_para.add_run(f"• {section}") |
|
|
missing_para.add_run().font.color.rgb = RGBColor(204, 102, 0) |
|
|
|
|
|
|
|
|
placeholder = doc.add_paragraph() |
|
|
placeholder.style = 'Section Content' |
|
|
placeholder.add_run("[À COMPLÉTER MANUELLEMENT]") |
|
|
placeholder.add_run().font.color.rgb = RGBColor(153, 153, 153) |
|
|
placeholder.add_run().italic = True |
|
|
|
|
|
def _add_original_transcription(self, doc: Document, transcription: str): |
|
|
"""Ajoute la transcription originale en annexe""" |
|
|
|
|
|
doc.add_page_break() |
|
|
|
|
|
|
|
|
annexe_title = doc.add_paragraph() |
|
|
annexe_title.add_run("ANNEXE - TRANSCRIPTION ORIGINALE").bold = True |
|
|
annexe_title.add_run().font.size = Inches(0.16) |
|
|
annexe_title.add_run().font.color.rgb = RGBColor(102, 102, 102) |
|
|
|
|
|
|
|
|
doc.add_paragraph("=" * 60) |
|
|
|
|
|
|
|
|
transcription_para = doc.add_paragraph() |
|
|
transcription_para.add_run(transcription) |
|
|
transcription_para.add_run().font.size = Inches(0.12) |
|
|
transcription_para.add_run().font.color.rgb = RGBColor(51, 51, 51) |
|
|
|
|
|
def generate_filled_template(self, template_match: TemplateMatch, transcription: str, transcription_filename: str) -> str: |
|
|
""" |
|
|
Génère un template rempli et le sauvegarde au format .doc |
|
|
|
|
|
Args: |
|
|
template_match: Le template avec le meilleur score |
|
|
transcription: La transcription originale |
|
|
transcription_filename: Le nom du fichier de transcription |
|
|
|
|
|
Returns: |
|
|
str: Le chemin du fichier généré |
|
|
""" |
|
|
logging.info("🚀 Début de la génération du template rempli") |
|
|
logging.info(f"📋 Template sélectionné: {template_match.template_id}") |
|
|
logging.info(f"📊 Score: {template_match.overall_score:.3f}") |
|
|
logging.info(f"🔧 Remplissage: {template_match.filling_percentage:.1f}%") |
|
|
|
|
|
try: |
|
|
|
|
|
doc = Document() |
|
|
|
|
|
|
|
|
self._add_custom_styles(doc) |
|
|
|
|
|
|
|
|
self._add_document_header(doc, template_match, transcription_filename) |
|
|
|
|
|
|
|
|
self._add_filled_sections(doc, template_match) |
|
|
|
|
|
|
|
|
self._add_missing_sections(doc, template_match) |
|
|
|
|
|
|
|
|
self._add_original_transcription(doc, transcription) |
|
|
|
|
|
|
|
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") |
|
|
safe_template_id = template_match.template_id.replace('/', '_').replace('\\', '_') |
|
|
output_filename = f"template_rempli_{safe_template_id}_{timestamp}.docx" |
|
|
output_path = os.path.join(self.output_dir, output_filename) |
|
|
|
|
|
|
|
|
doc.save(output_path) |
|
|
|
|
|
logging.info(f"✅ Template rempli généré avec succès:") |
|
|
logging.info(f" 📁 Fichier: {output_path}") |
|
|
logging.info(f" 📏 Taille: {os.path.getsize(output_path)} bytes") |
|
|
logging.info(f" 📋 Sections remplies: {len(template_match.extracted_data)}") |
|
|
logging.info(f" ⚠️ Sections manquantes: {len([s for s in template_match.section_matches.values() if not s.can_fill])}") |
|
|
|
|
|
return output_path |
|
|
|
|
|
except Exception as e: |
|
|
logging.error(f"❌ Erreur lors de la génération du template: {e}") |
|
|
raise |
|
|
|
|
|
def display_generation_summary(self, template_match: TemplateMatch, output_path: str): |
|
|
"""Affiche un résumé de la génération dans les logs""" |
|
|
logging.info("=" * 80) |
|
|
logging.info("📊 RÉSUMÉ DE LA GÉNÉRATION") |
|
|
logging.info("=" * 80) |
|
|
logging.info(f"🎯 Template utilisé: {template_match.template_id}") |
|
|
logging.info(f"📁 Template source: {os.path.basename(template_match.template_info.filepath)}") |
|
|
logging.info(f"👨⚕️ Médecin: {template_match.template_info.medecin}") |
|
|
logging.info(f"🏥 Centre: {getattr(template_match.template_info, 'centre_medical', 'Non spécifié')}") |
|
|
logging.info(f"📝 Type: {template_match.template_info.type}") |
|
|
logging.info(f"📊 Score de correspondance: {template_match.overall_score:.3f} ({template_match.confidence_level})") |
|
|
logging.info(f"🔧 Pourcentage de remplissage: {template_match.filling_percentage:.1f}%") |
|
|
logging.info(f"📋 Sections remplies: {len(template_match.extracted_data)}") |
|
|
logging.info(f"⚠️ Sections manquantes: {len([s for s in template_match.section_matches.values() if not s.can_fill])}") |
|
|
logging.info(f"💾 Fichier généré: {os.path.basename(output_path)}") |
|
|
logging.info(f"📏 Taille du fichier: {os.path.getsize(output_path)} bytes") |
|
|
logging.info("=" * 80) |
|
|
|
|
|
|
|
|
def main(): |
|
|
"""Fonction principale qui utilise le premier fichier pour matcher puis génère le template""" |
|
|
|
|
|
|
|
|
logging.basicConfig( |
|
|
level=logging.INFO, |
|
|
format='%(asctime)s - %(levelname)s - %(message)s' |
|
|
) |
|
|
|
|
|
|
|
|
db_path = DB_PATH |
|
|
|
|
|
|
|
|
transcription_filename = "default.73.931915433.rtf_3650535_radiologie.doc" |
|
|
transcription_content = """ la Technique :** 3 plans T2, diffusion axiale, T2 grand champ et T1 Dixon. |
|
|
Résultats |
|
|
L'utérus est antéversé, antéfléchi, latéralisé à droite, de taille normale pour l'âge. |
|
|
L'endomètre est fin, mesurant moins de 2 mm. |
|
|
Pas d'adénomyose franche. |
|
|
Aspect normal du col utérin et du vagin. |
|
|
L'ovaire droit, en position postérieure, mesure 18 x 11 mm avec présence de 4 follicules. |
|
|
L'ovaire gauche, en position latéro-utérine, présente un volumineux endométriome de 45 mm, typique en hypersignal T1 Dixon. |
|
|
Deuxième endométriome accolé à l'ovaire droit, périphérique, mesurant 13 mm. |
|
|
Pas d'épaississement marqué du torus ni des ligaments utéro-sacrés. |
|
|
Pas d'autre localisation pelvienne. |
|
|
Pas d'épanchement pelvien. |
|
|
Pas d'anomalie de la vessie. |
|
|
Pas d'adénomégalie pelvienne, pas de dilatation des uretères. |
|
|
en Conclusion |
|
|
Endométriome ovarien droit périphérique de 13 mm. |
|
|
Endométriome ovarien gauche centro-ovarien de 45 mm.""" |
|
|
|
|
|
if not os.path.exists(db_path): |
|
|
logging.error(f"❌ Base de données non trouvée: {db_path}") |
|
|
return |
|
|
|
|
|
try: |
|
|
logging.info("🚀 DÉMARRAGE DU PROCESSUS COMPLET") |
|
|
logging.info("=" * 80) |
|
|
|
|
|
|
|
|
logging.info("📍 ÉTAPE 1: MATCHING DES TEMPLATES") |
|
|
matcher = TemplateMatcher(db_path) |
|
|
matches = matcher.match_templates(transcription_content, transcription_filename, k=3) |
|
|
|
|
|
if not matches: |
|
|
logging.error("❌ Aucun template trouvé") |
|
|
return |
|
|
|
|
|
|
|
|
best_match = matches[0] |
|
|
logging.info(f"✅ Meilleur template sélectionné: {best_match.template_id}") |
|
|
|
|
|
|
|
|
logging.info("📍 ÉTAPE 2: GÉNÉRATION DU TEMPLATE REMPLI") |
|
|
generator = TemplateGenerator() |
|
|
output_path = generator.generate_filled_template( |
|
|
best_match, |
|
|
transcription_content, |
|
|
transcription_filename |
|
|
) |
|
|
|
|
|
|
|
|
logging.info("📍 ÉTAPE 3: RÉSUMÉ FINAL") |
|
|
generator.display_generation_summary(best_match, output_path) |
|
|
|
|
|
logging.info("🎉 PROCESSUS TERMINÉ AVEC SUCCÈS") |
|
|
|
|
|
except Exception as e: |
|
|
logging.error(f"❌ Erreur dans le processus principal: {e}") |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |