#!/usr/bin/env python3 """ Chapter Extraction Manager - Manages chapter extraction in subprocess to prevent GUI freezing """ import subprocess import sys import os import json import threading import queue import time from pathlib import Path class ChapterExtractionManager: """ Manages chapter extraction in a separate process to prevent GUI freezing Similar to GlossaryManager but for chapter extraction """ def __init__(self, log_callback=None): """ Initialize the extraction manager Args: log_callback: Function to call with log messages (for GUI integration) """ self.log_callback = log_callback self.process = None self.output_queue = queue.Queue() self.error_queue = queue.Queue() self.result = None self.is_running = False self.stop_requested = False def extract_chapters_async(self, epub_path, output_dir, extraction_mode="smart", progress_callback=None, completion_callback=None): """ Start chapter extraction in a subprocess Args: epub_path: Path to EPUB file output_dir: Output directory for extracted content extraction_mode: Extraction mode (smart, comprehensive, full, enhanced) progress_callback: Function to call with progress updates completion_callback: Function to call when extraction completes """ if self.is_running: self._log("âš ī¸ Chapter extraction already in progress") return False self.is_running = True self.stop_requested = False self.result = None # Start extraction in a thread that manages the subprocess thread = threading.Thread( target=self._run_extraction_subprocess, args=(epub_path, output_dir, extraction_mode, progress_callback, completion_callback), daemon=True ) thread.start() return True def _run_extraction_subprocess(self, epub_path, output_dir, extraction_mode, progress_callback, completion_callback): """ Run the extraction subprocess and handle its output """ try: # Build command differently for frozen vs dev mode if getattr(sys, 'frozen', False): # In a frozen one-file build, sys.executable is our GUI .exe, not Python. # Use an internal worker-mode flag handled by translator_gui.py to run the worker. cmd = [ sys.executable, '--run-chapter-extraction', epub_path, output_dir, extraction_mode ] else: # In dev mode, invoke the worker script with the Python interpreter base_dir = Path(__file__).parent worker_script = base_dir / "chapter_extraction_worker.py" cmd = [ sys.executable, str(worker_script), epub_path, output_dir, extraction_mode ] # Set environment to force UTF-8 encoding env = os.environ.copy() env['PYTHONIOENCODING'] = 'utf-8' env['PYTHONLEGACYWINDOWSSTDIO'] = '0' # Use new Windows console API # Set default worker count if not already set env.setdefault('EXTRACTION_WORKERS', '2') self._log(f"🚀 Starting chapter extraction subprocess...") self._log(f"📚 EPUB: {os.path.basename(epub_path)}") self._log(f"📂 Output: {output_dir}") self._log(f"âš™ī¸ Mode: {extraction_mode}") # Start the subprocess with UTF-8 encoding self.process = subprocess.Popen( cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, encoding='utf-8', errors='replace', # Replace invalid chars instead of failing bufsize=1, universal_newlines=True, env=env # Pass the environment with UTF-8 settings ) # Read output in real-time while True: if self.stop_requested: self._terminate_process() break # Check if process is still running if self.process.poll() is not None: break # Read stdout line by line with error handling try: line = self.process.stdout.readline() if not line: continue line = line.strip() if not line: continue except UnicodeDecodeError as e: self._log(f"âš ī¸ Encoding error reading output: {e}") continue # Skip all processing if stop is requested to suppress logs if self.stop_requested: continue # Parse output based on prefix if line.startswith("[PROGRESS]"): # Progress update message = line[10:].strip() if progress_callback: progress_callback(message) self._log(f"📊 {message}") elif line.startswith("[INFO]"): # Information message message = line[6:].strip() self._log(f"â„šī¸ {message}") elif line.startswith("[ERROR]"): # Error message message = line[7:].strip() self._log(f"❌ {message}") self.error_queue.put(message) elif line.startswith("[RESULT]"): # Final result as JSON try: json_str = line[8:].strip() self.result = json.loads(json_str) if self.result.get("success"): self._log(f"✅ Extraction completed successfully!") self._log(f"📚 Extracted {self.result.get('chapters', 0)} chapters") else: error = self.result.get("error", "Unknown error") self._log(f"❌ Extraction failed: {error}") except json.JSONDecodeError as e: self._log(f"âš ī¸ Failed to parse result: {e}") elif line.startswith("["): # Other prefixed messages - skip pass else: # Regular output - only log if not too verbose if not any(skip in line for skip in ["📁 Searching for", "📁 Found", "📁 ✓", "📁 ✗"]): self._log(line) # Get any remaining output - but only process if not stopped if not self.stop_requested: remaining_output, remaining_error = self.process.communicate(timeout=1) # Process any remaining output if remaining_output: for line in remaining_output.strip().split('\n'): if line and not line.startswith("["): self._log(line) # Check for errors if remaining_error: for line in remaining_error.strip().split('\n'): if line: self._log(f"âš ī¸ {line}") # Check final status if self.process.returncode != 0: self._log(f"âš ī¸ Process exited with code {self.process.returncode}") else: # If stopped, just clean up without processing output try: self.process.communicate(timeout=0.1) except subprocess.TimeoutExpired: pass # Ignore timeout when cleaning up except subprocess.TimeoutExpired: if not self.stop_requested: self._log("âš ī¸ Subprocess communication timeout") self._terminate_process() except Exception as e: # Only log errors if not stopping (unless it's a critical error) if not self.stop_requested or "Subprocess error" in str(e): self._log(f"❌ Subprocess error: {e}") self.result = { "success": False, "error": str(e) if not self.stop_requested else "Extraction stopped by user" } finally: self.is_running = False # Store process reference before clearing it in case termination is needed process_ref = self.process self.process = None # If process is still running, try to clean it up if process_ref and process_ref.poll() is None: try: process_ref.terminate() time.sleep(0.1) # Brief wait if process_ref.poll() is None: process_ref.kill() except Exception: pass # Ignore cleanup errors in finally block # Ensure result is never None if self.result is None: if self.stop_requested: self.result = { "success": False, "error": "Extraction stopped by user" } else: self.result = { "success": False, "error": "Extraction process ended unexpectedly" } # Call completion callback if completion_callback: completion_callback(self.result) def stop_extraction(self): """Stop the extraction process""" if not self.is_running: return False # Set stop flag first to suppress subsequent logs self.stop_requested = True self._log("🛑 Stopping chapter extraction...") # Store process reference to avoid race condition process_ref = self.process # Give it a moment to stop gracefully time.sleep(0.5) # Force terminate if still running and process still exists if process_ref: self._terminate_process_ref(process_ref) return True def _terminate_process(self): """Terminate the subprocess using current process reference""" if self.process: self._terminate_process_ref(self.process) def _terminate_process_ref(self, process_ref): """Terminate a specific process reference""" if not process_ref: return try: # Check if process is still alive before attempting termination if process_ref.poll() is None: process_ref.terminate() # Give it a moment to terminate time.sleep(0.5) # Force kill if still running if process_ref.poll() is None: process_ref.kill() time.sleep(0.1) # Brief wait after kill # Only log termination if not stopping (user already knows they stopped it) if not self.stop_requested: self._log("✅ Process terminated") else: # Only log if not stopping if not self.stop_requested: self._log("✅ Process already terminated") except Exception as e: # Always log termination errors as they might indicate a problem self._log(f"âš ī¸ Error terminating process: {e}") def _log(self, message): """Log a message using the callback if available""" # Suppress logs when stop is requested (except for stop/termination messages) if self.stop_requested and not any(keyword in message for keyword in ["🛑", "✅ Process terminated", "❌ Subprocess error"]): return if self.log_callback: self.log_callback(message) else: print(message) def is_extraction_running(self): """Check if extraction is currently running""" return self.is_running def get_result(self): """Get the extraction result if available""" return self.result # Example usage if __name__ == "__main__": import tkinter as tk from tkinter import filedialog def test_extraction(): """Test the extraction manager""" # Create a simple GUI for testing root = tk.Tk() root.title("Chapter Extraction Test") root.geometry("800x600") # Text widget for logs text = tk.Text(root, wrap=tk.WORD) text.pack(fill=tk.BOTH, expand=True, padx=10, pady=10) # Log callback def log_message(msg): text.insert(tk.END, msg + "\n") text.see(tk.END) root.update_idletasks() # Progress callback def progress_update(msg): log_message(f"📊 Progress: {msg}") # Completion callback def extraction_complete(result): if result and result.get("success"): log_message(f"✅ Extraction completed!") log_message(f" Chapters: {result.get('chapters', 0)}") else: log_message(f"❌ Extraction failed!") # Create manager manager = ChapterExtractionManager(log_callback=log_message) # File selection epub_path = filedialog.askopenfilename( title="Select EPUB file", filetypes=[("EPUB files", "*.epub"), ("All files", "*.*")] ) if epub_path: output_dir = os.path.splitext(os.path.basename(epub_path))[0] # Start extraction manager.extract_chapters_async( epub_path, output_dir, extraction_mode="smart", progress_callback=progress_update, completion_callback=extraction_complete ) # Button to stop stop_btn = tk.Button( root, text="Stop Extraction", command=lambda: manager.stop_extraction() ) stop_btn.pack(pady=5) root.mainloop() # Run test test_extraction()