|
|
|
|
|
""" |
|
|
Template Analyzer Agent |
|
|
Analyzes Word document templates to extract structure and sections |
|
|
""" |
|
|
|
|
|
import os |
|
|
import re |
|
|
from typing import Dict, Any |
|
|
from docx import Document |
|
|
from langchain.tools import tool |
|
|
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder |
|
|
from langchain.agents import AgentExecutor, create_openai_tools_agent |
|
|
|
|
|
|
|
|
@tool |
|
|
def analyze_word_template(template_path: str) -> Dict[str, Any]: |
|
|
"""Analyze a Word document template to extract structure and sections.""" |
|
|
if not os.path.exists(template_path): |
|
|
raise FileNotFoundError(f"Template file not found: {template_path}") |
|
|
|
|
|
doc = Document(template_path) |
|
|
analysis = { |
|
|
'sections': [], |
|
|
'formatting': {}, |
|
|
'document_info': {} |
|
|
} |
|
|
|
|
|
|
|
|
for i, paragraph in enumerate(doc.paragraphs): |
|
|
text = paragraph.text.strip() |
|
|
if text: |
|
|
|
|
|
if re.search(r'\b(examen|observation|conclusion|résultat|resultat|diagnostic|rapport|échographie|echographie|analyse|commentaire|recommandation|technique|matériel|matériel|méthode|indication)\b', text, re.IGNORECASE): |
|
|
analysis['sections'].append({ |
|
|
'text': text, |
|
|
'index': i, |
|
|
'style': paragraph.style.name if paragraph.style else 'Normal' |
|
|
}) |
|
|
|
|
|
|
|
|
if paragraph.runs: |
|
|
run = paragraph.runs[0] |
|
|
analysis['formatting'][i] = { |
|
|
'bold': run.bold, |
|
|
'italic': run.italic, |
|
|
'font_name': run.font.name, |
|
|
'font_size': run.font.size.pt if run.font.size else None, |
|
|
'alignment': paragraph.alignment |
|
|
} |
|
|
|
|
|
|
|
|
if doc.core_properties.title: |
|
|
analysis['document_info'] = { |
|
|
'title': doc.core_properties.title, |
|
|
'author': doc.core_properties.author, |
|
|
'subject': doc.core_properties.subject |
|
|
} |
|
|
|
|
|
return analysis |
|
|
|
|
|
|
|
|
def create_template_analyzer_agent(llm): |
|
|
"""Create the template analyzer agent.""" |
|
|
template_analyzer_prompt = ChatPromptTemplate.from_messages([ |
|
|
("system", """You are a medical document template analyzer. |
|
|
Analyze the provided Word template and extract its structure, sections, and formatting. |
|
|
Provide a detailed analysis that can be used by other agents."""), |
|
|
("human", |
|
|
"Analyze the template at {template_path} and provide a comprehensive analysis."), |
|
|
MessagesPlaceholder("agent_scratchpad") |
|
|
]) |
|
|
|
|
|
template_analyzer_agent = create_openai_tools_agent( |
|
|
llm=llm, |
|
|
tools=[analyze_word_template], |
|
|
prompt=template_analyzer_prompt |
|
|
) |
|
|
|
|
|
template_analyzer_executor = AgentExecutor( |
|
|
agent=template_analyzer_agent, |
|
|
tools=[analyze_word_template], |
|
|
verbose=True |
|
|
) |
|
|
|
|
|
return template_analyzer_executor |
|
|
|