|
|
|
|
|
""" |
|
|
Transcription Processor |
|
|
Handles transcription loading, correction, and medical analysis |
|
|
""" |
|
|
|
|
|
import os |
|
|
import json |
|
|
from typing import Dict, Any, Tuple |
|
|
from langchain.tools import tool |
|
|
from langchain.prompts import ChatPromptTemplate |
|
|
|
|
|
|
|
|
@tool |
|
|
def load_transcription(transcription_path: str) -> str: |
|
|
"""Load and return the raw transcription text from a file.""" |
|
|
if not os.path.exists(transcription_path): |
|
|
raise FileNotFoundError( |
|
|
f"Transcription file not found: {transcription_path}") |
|
|
|
|
|
with open(transcription_path, 'r', encoding='utf-8') as f: |
|
|
return f.read().strip() |
|
|
|
|
|
|
|
|
def load_transcription_with_user_id(transcription_path: str) -> Tuple[str, str]: |
|
|
"""Load transcription text and user_id from a JSON file.""" |
|
|
if not os.path.exists(transcription_path): |
|
|
raise FileNotFoundError( |
|
|
f"Transcription file not found: {transcription_path}") |
|
|
|
|
|
with open(transcription_path, 'r', encoding='utf-8') as f: |
|
|
data = json.load(f) |
|
|
|
|
|
transcription_text = data.get('transcription', '') |
|
|
user_id = data.get('user_id', 'unknown') |
|
|
|
|
|
return transcription_text, user_id |
|
|
|
|
|
|
|
|
def create_transcription_corrector_chain(llm): |
|
|
"""Create the transcription corrector chain.""" |
|
|
transcription_corrector_prompt = ChatPromptTemplate.from_messages([ |
|
|
("system", """You are an experienced medical secretary. You will receive a document that is the output of a speech recognition engine (ASR) applied to medical data. |
|
|
|
|
|
Your task is to correct the document while maintaining maximum fidelity to the original text: |
|
|
|
|
|
CORRECTION RULES: |
|
|
- Correct spelling and grammar errors |
|
|
- Correct incorrect words, especially medication names or disease names |
|
|
- Correct incorrect dates or addresses |
|
|
- Do NOT add synonyms, stay as faithful as possible to the original text |
|
|
- Add line breaks and punctuation when necessary |
|
|
- List results and analysis descriptions as bullet points |
|
|
- Replace "la ligne" and "à la ligne" with line breaks in the text |
|
|
- Replace "point" with a period followed by a line break in the text |
|
|
|
|
|
NUMBER FORMATTING: |
|
|
Write numbers in their correct numerical form: |
|
|
- "1°" for "un degré" |
|
|
- "3D" for "trois d" |
|
|
- "200 kg" for "deux cent kilogrammes" or "deux cent kilo" |
|
|
- "5.423" for "cinq point quatre cent vingt trois" |
|
|
- "0.5" for "zéro virgule cinq" |
|
|
- "3 3/4" for "trois et trois quarts" |
|
|
- "142,015" for "cent quarante deux mille quinze" |
|
|
- "06 32 16 15 19" for "zéro six trente deux seize quinze dix-neuf" |
|
|
- "99.50 €" for "quatre vingt dix neuf euros et cinquante centimes" |
|
|
- "Friday May 15, 2015" for "quinze mai deux mille quinze" |
|
|
- "20:30" for "vingt heures trente" |
|
|
- "12:15" for "midi quinze" |
|
|
- "5:15" for "cinq heures et quart" |
|
|
- "2:45" for "trois heures moins le quart" |
|
|
|
|
|
Also support Swiss and Belgian forms: |
|
|
- "70" for "septante" |
|
|
- "80" for "huitante" |
|
|
- "90" for "nonante" |
|
|
- "77" for "septante sept" |
|
|
- "81" for "huitante un" |
|
|
- "95" for "nonante cinq" |
|
|
|
|
|
Return the corrected text as simple text without explanations or comments. Maintain the original structure and content as much as possible."""), |
|
|
("human", |
|
|
"Correct the following medical transcription while maintaining maximum fidelity to the original text:\n\n{transcription}") |
|
|
]) |
|
|
|
|
|
return transcription_corrector_prompt | llm |
|
|
|
|
|
|
|
|
def create_medical_analyzer_chain(llm): |
|
|
"""Create the medical analyzer chain.""" |
|
|
medical_analyzer_prompt = ChatPromptTemplate.from_messages([ |
|
|
("system", """You are a medical information extractor. |
|
|
Extract and categorize ONLY the medical information that is explicitly mentioned in the transcription. |
|
|
DO NOT add interpretations, conclusions, or information not present in the text. |
|
|
DO NOT make assumptions or add medical knowledge. |
|
|
Simply organize the information that is already there into structured categories. |
|
|
Focus on measurements, anatomical structures, and findings that are explicitly stated."""), |
|
|
("human", |
|
|
"Extract and organize ONLY the medical information explicitly mentioned in this transcription:\n\n{corrected_transcription}") |
|
|
]) |
|
|
|
|
|
return medical_analyzer_prompt | llm |
|
|
|
|
|
|
|
|
def create_title_generator_chain(llm): |
|
|
"""Create the title generator chain.""" |
|
|
title_generator_prompt = ChatPromptTemplate.from_messages([ |
|
|
("system", """You are a medical title generator. |
|
|
Generate a professional medical report title in FRENCH based on the medical data and findings. |
|
|
The title should be specific to the type of examination and findings. |
|
|
Return ONLY the title in French, nothing else."""), |
|
|
("human", """Generate a medical report title in FRENCH based on this medical data: |
|
|
|
|
|
{medical_data} |
|
|
|
|
|
Generate a professional title in French that reflects the type of examination and key findings.""") |
|
|
]) |
|
|
|
|
|
return title_generator_prompt | llm |
|
|
|