Spaces:

Nourhenem
/

pipeline2

Sleeping

File size: 5,914 Bytes

f92da22

#!/usr/bin/env python3
"""
Document Assembler
Handles creating medical documents by inserting sections into Word templates
"""

import os
import re
from datetime import datetime
from typing import Dict, Any, List
from docx import Document
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.shared import Pt
from langchain.tools import tool
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.agents import AgentExecutor, create_openai_tools_agent


@tool
def create_medical_document(template_path: str, sections_text: str, title: str, output_path: str) -> str:
    """Create a medical document by inserting sections into a Word template."""
    if not os.path.exists(template_path):
        raise FileNotFoundError(f"Template file not found: {template_path}")

    doc = Document(template_path)

    # Parse sections from text
    sections = {}
    current_section = None
    current_content = []
    for line in sections_text.split('\n'):
        line = line.strip()
        if not line:
            continue
        line_lower = line.lower().replace('é', 'e').replace('è', 'e').replace('à', 'a')
        if any(keyword in line_lower for keyword in ['technique', 'resultat', 'conclusion', 'indication']):
            if current_section:
                sections[current_section] = '\n'.join(current_content).strip()
            current_section = line
            current_content = []
        elif current_section:
            current_content.append(line)
    if current_section and current_content:
        sections[current_section] = '\n'.join(current_content).strip()

    # First, check if there's a "Titre" section in the template and insert the title there
    title_section_found = False
    for idx, paragraph in enumerate(doc.paragraphs):
        para_text = paragraph.text.strip()
        para_norm = para_text.lower().replace('é', 'e').replace('è', 'e').replace(
            'à', 'a').replace(':', '').replace('\xa0', ' ').strip()

        # Check if this is a title section (case insensitive)
        if 'titre' in para_norm:
            print(
                f"🎯 Found title section in template: '{para_text}' at index {idx}")
            # Clear the paragraph and insert the generated title
            paragraph.clear()
            paragraph.text = title
            # Apply formatting to make it stand out
            for run in paragraph.runs:
                run.font.bold = True
                run.font.size = Pt(14)
            title_section_found = True
            break

    # If no title section found, add header with dynamic title
    if not title_section_found:
        print("📝 No title section found in template, adding header...")
        header_para = doc.paragraphs[0].insert_paragraph_before()
        header_text = f"{title}\nDate: {datetime.now().strftime('%d/%m/%Y')}\nHeure: {datetime.now().strftime('%H:%M')}\n{'='*40}"
        header_para.text = header_text
        for run in header_para.runs:
            run.font.bold = True
            run.font.size = Pt(14)
        header_para.alignment = WD_ALIGN_PARAGRAPH.CENTER

    # Locate section titles in the template
    section_indices = {}
    for idx, paragraph in enumerate(doc.paragraphs):
        para_text = paragraph.text.strip()
        para_norm = para_text.lower().replace('é', 'e').replace('è', 'e').replace(
            'à', 'a').replace(':', '').replace('\xa0', ' ').strip()
        for section_name in sections.keys():
            section_norm = section_name.lower().replace('é', 'e').replace(
                'è', 'e').replace('à', 'a').replace(':', '').strip()
            if (section_norm in para_norm and len(section_norm) > 0 and len(para_norm) > 0):
                section_indices[section_name] = idx
    print("DEBUG section_indices:", section_indices)
    print("DEBUG sections.keys():", list(sections.keys()))

    # For each section found, remove content between this title and the next title, then insert the generated content
    sorted_sections = sorted(section_indices.items(), key=lambda x: x[1])
    for i, (section_name, idx) in enumerate(sorted_sections):
        # Determine the end of the section (before the next title or end of doc)
        start = idx + 1
        if i + 1 < len(sorted_sections):
            end = sorted_sections[i+1][1]
        else:
            end = len(doc.paragraphs)
        # Remove paragraphs between start and end
        for j in range(end-1, start-1, -1):
            p = doc.paragraphs[j]
            if p.text.strip():
                p.clear()
        # Insert content right after the title
        if sections[section_name]:
            new_para = doc.paragraphs[idx+1] if (idx+1 <
                                                 len(doc.paragraphs)) else doc.add_paragraph()
            new_para.text = sections[section_name]

    doc.save(output_path)
    return f"Document created successfully: {output_path}"


def create_document_assembler_agent(llm):
    """Create the document assembler agent."""
    document_assembler_prompt = ChatPromptTemplate.from_messages([
        ("system", """You are a medical document assembler.
        Create medical documents by inserting sections into Word templates.
        Use the provided title for the document header and insert sections in the correct locations."""),
        ("human",
         "Create a medical document with template {template_path}, sections content: {sections_text}, title: {title}, and save to {output_path}"),
        MessagesPlaceholder("agent_scratchpad")
    ])

    document_assembler_agent = create_openai_tools_agent(
        llm=llm,
        tools=[create_medical_document],
        prompt=document_assembler_prompt
    )

    document_assembler_executor = AgentExecutor(
        agent=document_assembler_agent,
        tools=[create_medical_document],
        verbose=True
    )

    return document_assembler_executor