#!/usr/bin/env python3 """ Chapter Extraction Worker - Runs chapter extraction in a separate process to prevent GUI freezing """ import sys import os import io # Force UTF-8 encoding for stdout/stderr on Windows sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace') sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8', errors='replace') import json import zipfile import time import traceback from pathlib import Path # Add parent directory to path for imports sys.path.insert(0, str(Path(__file__).parent)) def run_chapter_extraction(epub_path, output_dir, extraction_mode="smart", progress_callback=None): """ Run chapter extraction in this worker process Args: epub_path: Path to EPUB file output_dir: Output directory for extracted content extraction_mode: Extraction mode (smart, comprehensive, full, enhanced) progress_callback: Callback function for progress updates (uses print for IPC) Returns: dict: Extraction results including chapters and metadata """ try: # Import here to avoid loading heavy modules until needed from TransateKRtoEN import ChapterExtractor # Create progress callback that prints to stdout for IPC def worker_progress_callback(message): # Use special prefix for progress messages print(f"[PROGRESS] {message}", flush=True) # Create extractor with progress callback extractor = ChapterExtractor(progress_callback=worker_progress_callback) # Set extraction mode os.environ["EXTRACTION_MODE"] = extraction_mode # Open EPUB and extract chapters print(f"[INFO] Starting extraction of: {epub_path}", flush=True) print(f"[INFO] Output directory: {output_dir}", flush=True) print(f"[INFO] Extraction mode: {extraction_mode}", flush=True) with zipfile.ZipFile(epub_path, 'r') as zf: # Extract metadata first metadata = extractor._extract_epub_metadata(zf) print(f"[INFO] Extracted metadata: {list(metadata.keys())}", flush=True) # Extract chapters chapters = extractor.extract_chapters(zf, output_dir) print(f"[INFO] Extracted {len(chapters)} chapters", flush=True) # The extract_chapters method already handles OPF sorting internally # Just log if OPF was used opf_path = os.path.join(output_dir, 'content.opf') if os.path.exists(opf_path): print(f"[INFO] OPF file available for chapter ordering", flush=True) # CRITICAL: Save the full chapters with body content! # This is what the main process needs to load chapters_full_path = os.path.join(output_dir, "chapters_full.json") try: with open(chapters_full_path, 'w', encoding='utf-8') as f: json.dump(chapters, f, ensure_ascii=False) print(f"[INFO] Saved full chapters data to: {chapters_full_path}", flush=True) except Exception as e: print(f"[WARNING] Could not save full chapters: {e}", flush=True) # Fall back to saving individual files for chapter in chapters: try: chapter_file = f"chapter_{chapter['num']:04d}_{chapter.get('filename', 'content').replace('/', '_')}.html" chapter_path = os.path.join(output_dir, chapter_file) with open(chapter_path, 'w', encoding='utf-8') as f: f.write(chapter.get('body', '')) print(f"[INFO] Saved chapter {chapter['num']} to {chapter_file}", flush=True) except Exception as ce: print(f"[WARNING] Could not save chapter {chapter.get('num')}: {ce}", flush=True) # Return results as JSON for IPC result = { "success": True, "chapters": len(chapters), "metadata": metadata, "chapter_info": [ { "num": ch.get("num"), "title": ch.get("title"), "has_images": ch.get("has_images", False), "file_size": ch.get("file_size", 0), "content_hash": ch.get("content_hash", "") } for ch in chapters ] } # Output result as JSON print(f"[RESULT] {json.dumps(result)}", flush=True) return result except Exception as e: # Send error information error_info = { "success": False, "error": str(e), "traceback": traceback.format_exc() } print(f"[ERROR] {str(e)}", flush=True) print(f"[RESULT] {json.dumps(error_info)}", flush=True) return error_info def main(): """Main entry point for worker process""" # Parse command line arguments if len(sys.argv) < 3: print("[ERROR] Usage: chapter_extraction_worker.py [extraction_mode]", flush=True) sys.exit(1) epub_path = sys.argv[1] output_dir = sys.argv[2] extraction_mode = sys.argv[3] if len(sys.argv) > 3 else "smart" # Validate inputs if not os.path.exists(epub_path): print(f"[ERROR] EPUB file not found: {epub_path}", flush=True) sys.exit(1) # Create output directory if needed os.makedirs(output_dir, exist_ok=True) # Run extraction result = run_chapter_extraction(epub_path, output_dir, extraction_mode) # Exit with appropriate code sys.exit(0 if result.get("success", False) else 1) if __name__ == "__main__": # Ensure freeze support for Windows frozen exe try: import multiprocessing multiprocessing.freeze_support() except Exception: pass main()