#!/usr/bin/env python3 import os import json import logging import re from typing import Dict, Any from pathlib import Path from unstructured.partition.pdf import partition_pdf from flask import Flask, request, jsonify from flask_cors import CORS from dotenv import load_dotenv from bloatectomy import bloatectomy from werkzeug.utils import secure_filename from langchain_groq import ChatGroq from typing_extensions import TypedDict, NotRequired # --- Logging --- logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s") logger = logging.getLogger("patient-assistant") # --- Load environment --- load_dotenv() GROQ_API_KEY = os.getenv("GROQ_API_KEY") if not GROQ_API_KEY: logger.error("GROQ_API_KEY not set in environment") exit(1) # --- Flask app setup --- BASE_DIR = Path(__file__).resolve().parent REPORTS_ROOT = Path(os.getenv("REPORTS_ROOT", str(BASE_DIR / "reports"))) static_folder = BASE_DIR / "static" app = Flask(__name__, static_folder=str(static_folder), static_url_path="/static") CORS(app) # --- LLM setup --- llm = ChatGroq( model=os.getenv("LLM_MODEL", "meta-llama/llama-4-scout-17b-16e-instruct"), temperature=0.0, max_tokens=1024, api_key=GROQ_API_KEY, ) def clean_notes_with_bloatectomy(text: str, style: str = "remov") -> str: """Helper function to clean up text using the bloatectomy library.""" try: b = bloatectomy(text, style=style, output="html") tokens = getattr(b, "tokens", None) if not tokens: return text return "\n".join(tokens) except Exception: logger.exception("Bloatectomy cleaning failed; returning original text") return text # --- Agent prompt instructions --- PATIENT_ASSISTANT_PROMPT = """ You are a patient assistant helping to analyze medical records and reports. Your goal is to provide a comprehensive response based on the patient's health history and the current conversation. Your tasks include: - Analyzing medical records and reports to detect anomalies, redundant tests, or misleading treatments. - Suggesting preventive care based on the overall patient health history. - Optimizing healthcare costs by comparing past visits and treatments, helping patients make smarter choices. - Offering personalized lifestyle recommendations, such as adopting healthier food habits, daily routines, and regular health checks. - Generating a natural, helpful reply to the user. You will be provided with the last user message, the conversation history, and a summary of the patient's medical reports. Use this information to give a tailored and informative response. STRICT OUTPUT FORMAT (JSON ONLY): Return a single JSON object with the following keys: - assistant_reply: string // a natural language reply to the user (short, helpful, always present) - patientDetails: object // keys may include name, problem, city, contact (update if user shared info) - conversationSummary: string (optional) // short summary of conversation + relevant patient docs Rules: - ALWAYS include `assistant_reply` as a non-empty string. - Do NOT produce any text outside the JSON object. - Be concise in `assistant_reply`. If you need more details, ask a targeted follow-up question. - Do not make up information that is not present in the provided medical reports or conversation history. """ # --- JSON extraction helper --- def extract_json_from_llm_response(raw_response: str) -> dict: """Safely extracts a JSON object from a string that might contain extra text or markdown.""" default = { "assistant_reply": "I'm sorry — I couldn't understand that. Could you please rephrase?", "patientDetails": {}, "conversationSummary": "", } if not raw_response or not isinstance(raw_response, str): return default # Find the JSON object, ignoring any markdown code fences m = re.search(r"```(?:json)?\s*([\s\S]*?)\s*```", raw_response) json_string = m.group(1).strip() if m else raw_response # Find the first opening brace and the last closing brace first = json_string.find('{') last = json_string.rfind('}') if first == -1 or last == -1 or first >= last: try: return json.loads(json_string) except Exception: logger.warning("Could not locate JSON braces in LLM output. Falling back to default.") return default candidate = json_string[first:last+1] # Remove trailing commas that might cause parsing issues candidate = re.sub(r',\s*(?=[}\]])', '', candidate) try: parsed = json.loads(candidate) except Exception as e: logger.warning("Failed to parse JSON from LLM output: %s", e) return default # Basic validation of the parsed JSON if isinstance(parsed, dict) and "assistant_reply" in parsed and isinstance(parsed["assistant_reply"], str) and parsed["assistant_reply"].strip(): parsed.setdefault("patientDetails", {}) parsed.setdefault("conversationSummary", "") return parsed else: logger.warning("Parsed JSON missing 'assistant_reply' or invalid format. Returning default.") return default # --- Flask routes --- @app.route("/", methods=["GET"]) def serve_frontend(): """Serves the frontend HTML file.""" try: return app.send_static_file("frontend2.html") except Exception: return "