|
|
|
|
|
""" |
|
|
Document Assembler |
|
|
Handles creating medical documents by inserting sections into Word templates |
|
|
""" |
|
|
|
|
|
import os |
|
|
import re |
|
|
from datetime import datetime |
|
|
from typing import Dict, Any, List |
|
|
from docx import Document |
|
|
from docx.enum.text import WD_ALIGN_PARAGRAPH |
|
|
from docx.shared import Pt |
|
|
from langchain.tools import tool |
|
|
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder |
|
|
from langchain.agents import AgentExecutor, create_openai_tools_agent |
|
|
|
|
|
|
|
|
@tool |
|
|
def create_medical_document(template_path: str, sections_text: str, title: str, output_path: str) -> str: |
|
|
"""Create a medical document by inserting sections into a Word template.""" |
|
|
if not os.path.exists(template_path): |
|
|
raise FileNotFoundError(f"Template file not found: {template_path}") |
|
|
|
|
|
doc = Document(template_path) |
|
|
|
|
|
|
|
|
sections = {} |
|
|
current_section = None |
|
|
current_content = [] |
|
|
for line in sections_text.split('\n'): |
|
|
line = line.strip() |
|
|
if not line: |
|
|
continue |
|
|
line_lower = line.lower().replace('é', 'e').replace('è', 'e').replace('à', 'a') |
|
|
if any(keyword in line_lower for keyword in ['technique', 'resultat', 'conclusion', 'indication']): |
|
|
if current_section: |
|
|
sections[current_section] = '\n'.join(current_content).strip() |
|
|
current_section = line |
|
|
current_content = [] |
|
|
elif current_section: |
|
|
current_content.append(line) |
|
|
if current_section and current_content: |
|
|
sections[current_section] = '\n'.join(current_content).strip() |
|
|
|
|
|
|
|
|
title_section_found = False |
|
|
for idx, paragraph in enumerate(doc.paragraphs): |
|
|
para_text = paragraph.text.strip() |
|
|
para_norm = para_text.lower().replace('é', 'e').replace('è', 'e').replace( |
|
|
'à', 'a').replace(':', '').replace('\xa0', ' ').strip() |
|
|
|
|
|
|
|
|
if 'titre' in para_norm: |
|
|
print( |
|
|
f"🎯 Found title section in template: '{para_text}' at index {idx}") |
|
|
|
|
|
paragraph.clear() |
|
|
paragraph.text = title |
|
|
|
|
|
for run in paragraph.runs: |
|
|
run.font.bold = True |
|
|
run.font.size = Pt(14) |
|
|
title_section_found = True |
|
|
break |
|
|
|
|
|
|
|
|
if not title_section_found: |
|
|
print("📝 No title section found in template, adding header...") |
|
|
header_para = doc.paragraphs[0].insert_paragraph_before() |
|
|
header_text = f"{title}\nDate: {datetime.now().strftime('%d/%m/%Y')}\nHeure: {datetime.now().strftime('%H:%M')}\n{'='*40}" |
|
|
header_para.text = header_text |
|
|
for run in header_para.runs: |
|
|
run.font.bold = True |
|
|
run.font.size = Pt(14) |
|
|
header_para.alignment = WD_ALIGN_PARAGRAPH.CENTER |
|
|
|
|
|
|
|
|
section_indices = {} |
|
|
for idx, paragraph in enumerate(doc.paragraphs): |
|
|
para_text = paragraph.text.strip() |
|
|
para_norm = para_text.lower().replace('é', 'e').replace('è', 'e').replace( |
|
|
'à', 'a').replace(':', '').replace('\xa0', ' ').strip() |
|
|
for section_name in sections.keys(): |
|
|
section_norm = section_name.lower().replace('é', 'e').replace( |
|
|
'è', 'e').replace('à', 'a').replace(':', '').strip() |
|
|
if (section_norm in para_norm and len(section_norm) > 0 and len(para_norm) > 0): |
|
|
section_indices[section_name] = idx |
|
|
print("DEBUG section_indices:", section_indices) |
|
|
print("DEBUG sections.keys():", list(sections.keys())) |
|
|
|
|
|
|
|
|
sorted_sections = sorted(section_indices.items(), key=lambda x: x[1]) |
|
|
for i, (section_name, idx) in enumerate(sorted_sections): |
|
|
|
|
|
start = idx + 1 |
|
|
if i + 1 < len(sorted_sections): |
|
|
end = sorted_sections[i+1][1] |
|
|
else: |
|
|
end = len(doc.paragraphs) |
|
|
|
|
|
for j in range(end-1, start-1, -1): |
|
|
p = doc.paragraphs[j] |
|
|
if p.text.strip(): |
|
|
p.clear() |
|
|
|
|
|
if sections[section_name]: |
|
|
new_para = doc.paragraphs[idx+1] if (idx+1 < |
|
|
len(doc.paragraphs)) else doc.add_paragraph() |
|
|
new_para.text = sections[section_name] |
|
|
|
|
|
doc.save(output_path) |
|
|
return f"Document created successfully: {output_path}" |
|
|
|
|
|
|
|
|
def create_document_assembler_agent(llm): |
|
|
"""Create the document assembler agent.""" |
|
|
document_assembler_prompt = ChatPromptTemplate.from_messages([ |
|
|
("system", """You are a medical document assembler. |
|
|
Create medical documents by inserting sections into Word templates. |
|
|
Use the provided title for the document header and insert sections in the correct locations."""), |
|
|
("human", |
|
|
"Create a medical document with template {template_path}, sections content: {sections_text}, title: {title}, and save to {output_path}"), |
|
|
MessagesPlaceholder("agent_scratchpad") |
|
|
]) |
|
|
|
|
|
document_assembler_agent = create_openai_tools_agent( |
|
|
llm=llm, |
|
|
tools=[create_medical_document], |
|
|
prompt=document_assembler_prompt |
|
|
) |
|
|
|
|
|
document_assembler_executor = AgentExecutor( |
|
|
agent=document_assembler_agent, |
|
|
tools=[create_medical_document], |
|
|
verbose=True |
|
|
) |
|
|
|
|
|
return document_assembler_executor |
|
|
|