Spaces:

Nourhenem
/

pipeline2

Sleeping

App Files Files Community

pipeline2 / template_analyzer.py

Nourhenem

initial commit

f92da22 verified about 1 month ago

raw

history blame contribute delete

3.07 kB

	#!/usr/bin/env python3
	"""
	Template Analyzer Agent
	Analyzes Word document templates to extract structure and sections
	"""

	import os
	import re
	from typing import Dict, Any
	from docx import Document
	from langchain.tools import tool
	from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
	from langchain.agents import AgentExecutor, create_openai_tools_agent


	@tool
	def analyze_word_template(template_path: str) -> Dict[str, Any]:
	"""Analyze a Word document template to extract structure and sections."""
	if not os.path.exists(template_path):
	raise FileNotFoundError(f"Template file not found: {template_path}")

	doc = Document(template_path)
	analysis = {
	'sections': [],
	'formatting': {},
	'document_info': {}
	}

	# Analyze paragraphs and sections
	for i, paragraph in enumerate(doc.paragraphs):
	text = paragraph.text.strip()
	if text:
	# Detect sections - improved regex to catch all section types
	if re.search(r'\b(examen\|observation\|conclusion\|résultat\|resultat\|diagnostic\|rapport\|échographie\|echographie\|analyse\|commentaire\|recommandation\|technique\|matériel\|matériel\|méthode\|indication)\b', text, re.IGNORECASE):
	analysis['sections'].append({
	'text': text,
	'index': i,
	'style': paragraph.style.name if paragraph.style else 'Normal'
	})

	# Analyze formatting
	if paragraph.runs:
	run = paragraph.runs[0]
	analysis['formatting'][i] = {
	'bold': run.bold,
	'italic': run.italic,
	'font_name': run.font.name,
	'font_size': run.font.size.pt if run.font.size else None,
	'alignment': paragraph.alignment
	}

	# Analyze document properties
	if doc.core_properties.title:
	analysis['document_info'] = {
	'title': doc.core_properties.title,
	'author': doc.core_properties.author,
	'subject': doc.core_properties.subject
	}

	return analysis


	def create_template_analyzer_agent(llm):
	"""Create the template analyzer agent."""
	template_analyzer_prompt = ChatPromptTemplate.from_messages([
	("system", """You are a medical document template analyzer.
	Analyze the provided Word template and extract its structure, sections, and formatting.
	Provide a detailed analysis that can be used by other agents."""),
	("human",
	"Analyze the template at {template_path} and provide a comprehensive analysis."),
	MessagesPlaceholder("agent_scratchpad")
	])

	template_analyzer_agent = create_openai_tools_agent(
	llm=llm,
	tools=[analyze_word_template],
	prompt=template_analyzer_prompt
	)

	template_analyzer_executor = AgentExecutor(
	agent=template_analyzer_agent,
	tools=[analyze_word_template],
	verbose=True
	)

	return template_analyzer_executor