diff --git a/app.py b/app.py new file mode 100644 index 0000000000000000000000000000000000000000..3ff547f09a9df631f66b9f44e2cafcb8ca124a2b --- /dev/null +++ b/app.py @@ -0,0 +1,5320 @@ +#!/usr/bin/env python3 +""" +Glossarion Web - Gradio Web Interface +AI-powered translation in your browser +""" + +import gradio as gr +import os +import sys +import json +import tempfile +import base64 +from pathlib import Path + +# Import API key encryption/decryption +try: + from api_key_encryption import APIKeyEncryption + API_KEY_ENCRYPTION_AVAILABLE = True + # Create web-specific encryption handler with its own key file + _web_encryption_handler = None + def get_web_encryption_handler(): + global _web_encryption_handler + if _web_encryption_handler is None: + _web_encryption_handler = APIKeyEncryption() + # Use web-specific key file + from pathlib import Path + _web_encryption_handler.key_file = Path('.glossarion_web_key') + _web_encryption_handler.cipher = _web_encryption_handler._get_or_create_cipher() + # Add web-specific fields to encrypt + _web_encryption_handler.api_key_fields.extend([ + 'azure_vision_key', + 'google_vision_credentials' + ]) + return _web_encryption_handler + + def decrypt_config(config): + return get_web_encryption_handler().decrypt_config(config) + + def encrypt_config(config): + return get_web_encryption_handler().encrypt_config(config) +except ImportError: + API_KEY_ENCRYPTION_AVAILABLE = False + def decrypt_config(config): + return config # Fallback: return config as-is + def encrypt_config(config): + return config # Fallback: return config as-is + +# Import your existing translation modules +try: + import TransateKRtoEN + from model_options import get_model_options + TRANSLATION_AVAILABLE = True +except ImportError: + TRANSLATION_AVAILABLE = False + print("⚠️ Translation modules not found") + +# Import manga translation modules +try: + from manga_translator import MangaTranslator + from unified_api_client import UnifiedClient + MANGA_TRANSLATION_AVAILABLE = True + print("✅ Manga translation modules loaded successfully") +except ImportError as e: + MANGA_TRANSLATION_AVAILABLE = False + print(f"⚠️ Manga translation modules not found: {e}") + print(f"⚠️ Current working directory: {os.getcwd()}") + print(f"⚠️ Python path: {sys.path[:3]}...") + + # Check if files exist + files_to_check = ['manga_translator.py', 'unified_api_client.py', 'bubble_detector.py', 'local_inpainter.py'] + for file in files_to_check: + if os.path.exists(file): + print(f"✅ Found: {file}") + else: + print(f"❌ Missing: {file}") + + +class GlossarionWeb: + """Web interface for Glossarion translator""" + + def __init__(self): + # Determine config file path based on environment + is_hf_spaces = os.getenv('SPACE_ID') is not None or os.getenv('HF_SPACES') == 'true' + + if is_hf_spaces: + # Use /data directory for Hugging Face Spaces persistent storage + data_dir = '/data' + if not os.path.exists(data_dir): + # Fallback to current directory if /data doesn't exist + data_dir = '.' + self.config_file = os.path.join(data_dir, 'config_web.json') + print(f"🤗 HF Spaces detected - using config path: {self.config_file}") + print(f"📁 Directory exists: {os.path.exists(os.path.dirname(self.config_file))}") + else: + # Local mode - use current directory + self.config_file = "config_web.json" + print(f"🏠 Local mode - using config path: {self.config_file}") + + # Load raw config first + self.config = self.load_config() + + # Create a decrypted version for display/use in the UI + # but keep the original for saving + self.decrypted_config = self.config.copy() + if API_KEY_ENCRYPTION_AVAILABLE: + self.decrypted_config = decrypt_config(self.decrypted_config) + + self.models = get_model_options() if TRANSLATION_AVAILABLE else ["gpt-4", "claude-3-5-sonnet"] + print(f"🤖 Loaded {len(self.models)} models: {self.models[:5]}{'...' if len(self.models) > 5 else ''}") + + # Translation state management + import threading + self.is_translating = False + self.stop_flag = threading.Event() + self.translation_thread = None + self.current_unified_client = None # Track active client to allow cancellation + self.current_translator = None # Track active translator to allow shutdown + + # Add stop flags for different translation types + self.epub_translation_stop = False + self.epub_translation_thread = None + self.glossary_extraction_stop = False + self.glossary_extraction_thread = None + + # Default prompts from the GUI (same as translator_gui.py) + self.default_prompts = { + "korean": ( + "You are a professional Korean to English novel translator, you must strictly output only English text and HTML tags while following these rules:\n" + "- Use a natural, comedy-friendly English translation style that captures both humor and readability without losing any original meaning.\n" + "- Include 100% of the source text - every word, phrase, and sentence must be fully translated without exception.\n" + "- Retain Korean honorifics and respectful speech markers in romanized form, including but not limited to: -nim, -ssi, -yang, -gun, -isiyeo, -hasoseo. For archaic/classical Korean honorific forms (like 이시여/isiyeo, 하소서/hasoseo), preserve them as-is rather than converting to modern equivalents.\n" + "- Always localize Korean terminology to proper English equivalents instead of literal translations (examples: 마왕 = Demon King; 마술 = magic).\n" + "- When translating Korean's pronoun-dropping style, insert pronouns in English only where needed for clarity: prioritize original pronouns as implied or according to the glossary, and only use they/them as a last resort, use I/me for first-person narration, and maintain natural English flow without overusing pronouns just because they're omitted in Korean.\n" + "- All Korean profanity must be translated to English profanity.\n" + "- Preserve original intent, and speech tone.\n" + "- Retain onomatopoeia in Romaji.\n" + "- Keep original Korean quotation marks (" ", ' ', 「」, 『』) as-is without converting to English quotes.\n" + "- Every Korean/Chinese/Japanese character must be converted to its English meaning. Examples: The character 생 means 'life/living', 활 means 'active', 관 means 'hall/building' - together 생활관 means Dormitory.\n" + "- Preserve ALL HTML tags exactly as they appear in the source, including , , <h1>, <h2>, <p>, <br>, <div>, etc.\n" + ), + "japanese": ( + "You are a professional Japanese to English novel translator, you must strictly output only English text and HTML tags while following these rules:\n" + "- Use a natural, comedy-friendly English translation style that captures both humor and readability without losing any original meaning.\n" + "- Include 100% of the source text - every word, phrase, and sentence must be fully translated without exception.\n" + "- Retain Japanese honorifics and respectful speech markers in romanized form, including but not limited to: -san, -sama, -chan, -kun, -dono, -sensei, -senpai, -kouhai. For archaic/classical Japanese honorific forms, preserve them as-is rather than converting to modern equivalents.\n" + "- Always localize Japanese terminology to proper English equivalents instead of literal translations (examples: 魔王 = Demon King; 魔術 = magic).\n" + "- When translating Japanese's pronoun-dropping style, insert pronouns in English only where needed for clarity: prioritize original pronouns as implied or according to the glossary, and only use they/them as a last resort, use I/me for first-person narration while reflecting the Japanese pronoun's nuance (私/僕/俺/etc.) through speech patterns rather than the pronoun itself, and maintain natural English flow without overusing pronouns just because they're omitted in Japanese.\n" + "- All Japanese profanity must be translated to English profanity.\n" + "- Preserve original intent, and speech tone.\n" + "- Retain onomatopoeia in Romaji.\n" + "- Keep original Japanese quotation marks (「」, 『』) as-is without converting to English quotes.\n" + "- Every Korean/Chinese/Japanese character must be converted to its English meaning. Examples: The character 生 means 'life/living', 活 means 'active', 館 means 'hall/building' - together 生活館 means Dormitory.\n" + "- Preserve ALL HTML tags exactly as they appear in the source, including <head>, <title>, <h1>, <h2>, <p>, <br>, <div>, etc.\n" + ), + "chinese": ( + "You are a professional Chinese to English novel translator, you must strictly output only English text and HTML tags while following these rules:\n" + "- Use a natural, comedy-friendly English translation style that captures both humor and readability without losing any original meaning.\n" + "- Include 100% of the source text - every word, phrase, and sentence must be fully translated without exception.\n" + "- Always localize Chinese terminology to proper English equivalents instead of literal translations (examples: 魔王 = Demon King; 魔法 = magic).\n" + "- When translating Chinese's pronoun-dropping style, insert pronouns in English only where needed for clarity while maintaining natural English flow.\n" + "- All Chinese profanity must be translated to English profanity.\n" + "- Preserve original intent, and speech tone.\n" + "- Retain onomatopoeia in Pinyin.\n" + "- Keep original Chinese quotation marks (「」, 『』) as-is without converting to English quotes.\n" + "- Every Korean/Chinese/Japanese character must be converted to its English meaning. Examples: The character 生 means 'life/living', 活 means 'active', 館 means 'hall/building' - together 生活館 means Dormitory.\n" + "- Preserve ALL HTML tags exactly as they appear in the source, including <head>, <title>, <h1>, <h2>, <p>, <br>, <div>, etc.\n" + ), + "Manga_JP": ( + "You are a professional Japanese to English Manga translator.\n" + "You have both the image of the Manga panel and the extracted text to work with.\n" + "Output only English text while following these rules: \n\n" + + "VISUAL CONTEXT:\n" + "- Analyze the character's facial expressions and body language in the image.\n" + "- Consider the scene's mood and atmosphere.\n" + "- Note any action or movement depicted.\n" + "- Use visual cues to determine the appropriate tone and emotion.\n" + "- USE THE IMAGE to inform your translation choices. The image is not decorative - it contains essential context for accurate translation.\n\n" + + "DIALOGUE REQUIREMENTS:\n" + "- Match the translation tone to the character's expression.\n" + "- If a character looks angry, use appropriately intense language.\n" + "- If a character looks shy or embarrassed, reflect that in the translation.\n" + "- Keep speech patterns consistent with the character's appearance and demeanor.\n" + "- Retain honorifics and onomatopoeia in Romaji.\n\n" + + "IMPORTANT: Use both the visual context and text to create the most accurate and natural-sounding translation.\n" + ), + "Manga_KR": ( + "You are a professional Korean to English Manhwa translator.\n" + "You have both the image of the Manhwa panel and the extracted text to work with.\n" + "Output only English text while following these rules: \n\n" + + "VISUAL CONTEXT:\n" + "- Analyze the character's facial expressions and body language in the image.\n" + "- Consider the scene's mood and atmosphere.\n" + "- Note any action or movement depicted.\n" + "- Use visual cues to determine the appropriate tone and emotion.\n" + "- USE THE IMAGE to inform your translation choices. The image is not decorative - it contains essential context for accurate translation.\n\n" + + "DIALOGUE REQUIREMENTS:\n" + "- Match the translation tone to the character's expression.\n" + "- If a character looks angry, use appropriately intense language.\n" + "- If a character looks shy or embarrassed, reflect that in the translation.\n" + "- Keep speech patterns consistent with the character's appearance and demeanor.\n" + "- Retain honorifics and onomatopoeia in Romaji.\n\n" + + "IMPORTANT: Use both the visual context and text to create the most accurate and natural-sounding translation.\n" + ), + "Manga_CN": ( + "You are a professional Chinese to English Manga translator.\n" + "You have both the image of the Manga panel and the extracted text to work with.\n" + "Output only English text while following these rules: \n\n" + + "VISUAL CONTEXT:\n" + "- Analyze the character's facial expressions and body language in the image.\n" + "- Consider the scene's mood and atmosphere.\n" + "- Note any action or movement depicted.\n" + "- Use visual cues to determine the appropriate tone and emotion.\n" + "- USE THE IMAGE to inform your translation choices. The image is not decorative - it contains essential context for accurate translation.\n" + + "DIALOGUE REQUIREMENTS:\n" + "- Match the translation tone to the character's expression.\n" + "- If a character looks angry, use appropriately intense language.\n" + "- If a character looks shy or embarrassed, reflect that in the translation.\n" + "- Keep speech patterns consistent with the character's appearance and demeanor.\n" + "- Retain honorifics and onomatopoeia in Romaji.\n\n" + + "IMPORTANT: Use both the visual context and text to create the most accurate and natural-sounding translation.\n" + ), + "Original": "Return everything exactly as seen on the source." + } + + # Load profiles from config and merge with defaults + # Always include default prompts, then overlay any custom ones from config + self.profiles = self.default_prompts.copy() + config_profiles = self.config.get('prompt_profiles', {}) + if config_profiles: + self.profiles.update(config_profiles) + + def get_config_value(self, key, default=None): + """Get value from decrypted config with fallback""" + return self.decrypted_config.get(key, default) + + def get_current_config_for_update(self): + """Get the current config for updating (uses in-memory version)""" + # Return a copy of the in-memory config, not loaded from file + return self.config.copy() + + def get_default_config(self): + """Get default configuration for Hugging Face Spaces""" + return { + 'model': 'gpt-4-turbo', + 'api_key': '', + 'ocr_provider': 'custom-api', + 'bubble_detection_enabled': True, + 'inpainting_enabled': True, + 'manga_font_size_mode': 'auto', + 'manga_font_size': 24, + 'manga_font_size_multiplier': 1.0, + 'manga_max_font_size': 48, + 'manga_text_color': [255, 255, 255], # White text (like manga integration) + 'manga_shadow_enabled': True, + 'manga_shadow_color': [0, 0, 0], # Black shadow (like manga integration) + 'manga_shadow_offset_x': 2, # Match manga integration + 'manga_shadow_offset_y': 2, # Match manga integration + 'manga_shadow_blur': 0, # Match manga integration (no blur) + 'manga_bg_opacity': 180, + 'manga_bg_style': 'circle', + 'manga_settings': { + 'ocr': { + 'detector_type': 'rtdetr_onnx', + 'rtdetr_confidence': 0.3, + 'bubble_confidence': 0.3, + 'detect_text_bubbles': True, + 'detect_empty_bubbles': True, + 'detect_free_text': True, + 'bubble_max_detections_yolo': 100 + }, + 'inpainting': { + 'local_method': 'anime', + 'method': 'local', + 'batch_size': 10, + 'enable_cache': True + }, + 'advanced': { + 'parallel_processing': True, + 'max_workers': 2, + 'parallel_panel_translation': False, + 'panel_max_workers': 7, + 'format_detection': True, + 'webtoon_mode': 'auto', + 'torch_precision': 'fp16', + 'auto_cleanup_models': False, + 'debug_mode': False, + 'save_intermediate': False + }, + 'rendering': { + 'auto_min_size': 12, + 'auto_max_size': 48, + 'auto_fit_style': 'balanced' + }, + 'font_sizing': { + 'algorithm': 'smart', + 'prefer_larger': True, + 'max_lines': 10, + 'line_spacing': 1.3, + 'bubble_size_factor': True, + 'min_size': 12, + 'max_size': 48 + }, + 'tiling': { + 'enabled': False, + 'tile_size': 480, + 'tile_overlap': 64 + } + } + } + + def load_config(self): + """Load configuration - from persistent file on HF Spaces or local file""" + is_hf_spaces = os.getenv('SPACE_ID') is not None or os.getenv('HF_SPACES') == 'true' + + # Try to load from file (works both locally and on HF Spaces with persistent storage) + try: + if os.path.exists(self.config_file): + with open(self.config_file, 'r', encoding='utf-8') as f: + loaded_config = json.load(f) + # Start with defaults + default_config = self.get_default_config() + # Deep merge - preserve nested structures from loaded config + self._deep_merge_config(default_config, loaded_config) + + if is_hf_spaces: + print(f"✅ Loaded config from persistent storage: {self.config_file}") + else: + print(f"✅ Loaded config from local file: {self.config_file}") + + return default_config + except Exception as e: + print(f"Could not load config from {self.config_file}: {e}") + + # If loading fails or file doesn't exist - return defaults + print(f"📝 Using default configuration") + return self.get_default_config() + + def _deep_merge_config(self, base, override): + """Deep merge override config into base config""" + for key, value in override.items(): + if key in base and isinstance(base[key], dict) and isinstance(value, dict): + # Recursively merge nested dicts + self._deep_merge_config(base[key], value) + else: + # Override the value + base[key] = value + + def set_all_environment_variables(self): + """Set all environment variables from config for translation engines""" + config = self.get_config_value + + # Chapter Processing Options + os.environ['BATCH_TRANSLATE_HEADERS'] = '1' if config('batch_translate_headers', False) else '0' + os.environ['HEADERS_PER_BATCH'] = str(config('headers_per_batch', 400)) + os.environ['USE_NCX_NAVIGATION'] = '1' if config('use_ncx_navigation', False) else '0' + os.environ['ATTACH_CSS_TO_CHAPTERS'] = '1' if config('attach_css_to_chapters', False) else '0' + os.environ['RETAIN_SOURCE_EXTENSION'] = '1' if config('retain_source_extension', True) else '0' + os.environ['USE_CONSERVATIVE_BATCHING'] = '1' if config('use_conservative_batching', False) else '0' + os.environ['DISABLE_GEMINI_SAFETY'] = '1' if config('disable_gemini_safety', False) else '0' + os.environ['USE_HTTP_OPENROUTER'] = '1' if config('use_http_openrouter', False) else '0' + os.environ['DISABLE_OPENROUTER_COMPRESSION'] = '1' if config('disable_openrouter_compression', False) else '0' + + # Chapter Extraction Settings + os.environ['TEXT_EXTRACTION_METHOD'] = config('text_extraction_method', 'standard') + os.environ['FILE_FILTERING_LEVEL'] = config('file_filtering_level', 'smart') + + # Thinking Mode Settings + os.environ['ENABLE_GPT_THINKING'] = '1' if config('enable_gpt_thinking', True) else '0' + os.environ['GPT_THINKING_EFFORT'] = config('gpt_thinking_effort', 'medium') + os.environ['OR_THINKING_TOKENS'] = str(config('or_thinking_tokens', 2000)) + os.environ['ENABLE_GEMINI_THINKING'] = '1' if config('enable_gemini_thinking', False) else '0' + os.environ['GEMINI_THINKING_BUDGET'] = str(config('gemini_thinking_budget', 0)) + # IMPORTANT: Also set THINKING_BUDGET for unified_api_client compatibility + os.environ['THINKING_BUDGET'] = str(config('gemini_thinking_budget', 0)) + + # Translation Settings + os.environ['CONTEXTUAL'] = '1' if config('contextual', False) else '0' + os.environ['TRANSLATION_HISTORY_LIMIT'] = str(config('translation_history_limit', 2)) + os.environ['TRANSLATION_HISTORY_ROLLING'] = '1' if config('translation_history_rolling', False) else '0' + os.environ['BATCH_TRANSLATION'] = '1' if config('batch_translation', True) else '0' + os.environ['BATCH_SIZE'] = str(config('batch_size', 10)) + os.environ['THREAD_SUBMISSION_DELAY'] = str(config('thread_submission_delay', 0.1)) + os.environ['DELAY'] = str(config('delay', 1)) + os.environ['CHAPTER_RANGE'] = config('chapter_range', '') + os.environ['TOKEN_LIMIT'] = str(config('token_limit', 200000)) + os.environ['TOKEN_LIMIT_DISABLED'] = '1' if config('token_limit_disabled', False) else '0' + os.environ['DISABLE_INPUT_TOKEN_LIMIT'] = '1' if config('token_limit_disabled', False) else '0' + + # Glossary Settings + os.environ['ENABLE_AUTO_GLOSSARY'] = '1' if config('enable_auto_glossary', False) else '0' + os.environ['APPEND_GLOSSARY_TO_PROMPT'] = '1' if config('append_glossary_to_prompt', True) else '0' + os.environ['GLOSSARY_MIN_FREQUENCY'] = str(config('glossary_min_frequency', 2)) + os.environ['GLOSSARY_MAX_NAMES'] = str(config('glossary_max_names', 50)) + os.environ['GLOSSARY_MAX_TITLES'] = str(config('glossary_max_titles', 30)) + os.environ['GLOSSARY_BATCH_SIZE'] = str(config('glossary_batch_size', 50)) + os.environ['GLOSSARY_FILTER_MODE'] = config('glossary_filter_mode', 'all') + os.environ['GLOSSARY_FUZZY_THRESHOLD'] = str(config('glossary_fuzzy_threshold', 0.90)) + + # Manual Glossary Settings + os.environ['MANUAL_GLOSSARY_MIN_FREQUENCY'] = str(config('manual_glossary_min_frequency', 2)) + os.environ['MANUAL_GLOSSARY_MAX_NAMES'] = str(config('manual_glossary_max_names', 50)) + os.environ['MANUAL_GLOSSARY_MAX_TITLES'] = str(config('manual_glossary_max_titles', 30)) + os.environ['GLOSSARY_MAX_TEXT_SIZE'] = str(config('glossary_max_text_size', 50000)) + os.environ['GLOSSARY_MAX_SENTENCES'] = str(config('glossary_max_sentences', 200)) + os.environ['GLOSSARY_CHAPTER_SPLIT_THRESHOLD'] = str(config('glossary_chapter_split_threshold', 8192)) + os.environ['MANUAL_GLOSSARY_FILTER_MODE'] = config('manual_glossary_filter_mode', 'all') + os.environ['STRIP_HONORIFICS'] = '1' if config('strip_honorifics', True) else '0' + os.environ['MANUAL_GLOSSARY_FUZZY_THRESHOLD'] = str(config('manual_glossary_fuzzy_threshold', 0.90)) + os.environ['GLOSSARY_USE_LEGACY_CSV'] = '1' if config('glossary_use_legacy_csv', False) else '0' + + # QA Scanner Settings + os.environ['ENABLE_POST_TRANSLATION_SCAN'] = '1' if config('enable_post_translation_scan', False) else '0' + os.environ['QA_MIN_FOREIGN_CHARS'] = str(config('qa_min_foreign_chars', 10)) + os.environ['QA_CHECK_REPETITION'] = '1' if config('qa_check_repetition', True) else '0' + os.environ['QA_CHECK_GLOSSARY_LEAKAGE'] = '1' if config('qa_check_glossary_leakage', True) else '0' + os.environ['QA_MIN_FILE_LENGTH'] = str(config('qa_min_file_length', 0)) + os.environ['QA_CHECK_MULTIPLE_HEADERS'] = '1' if config('qa_check_multiple_headers', True) else '0' + os.environ['QA_CHECK_MISSING_HTML'] = '1' if config('qa_check_missing_html', True) else '0' + os.environ['QA_CHECK_INSUFFICIENT_PARAGRAPHS'] = '1' if config('qa_check_insufficient_paragraphs', True) else '0' + os.environ['QA_MIN_PARAGRAPH_PERCENTAGE'] = str(config('qa_min_paragraph_percentage', 30)) + os.environ['QA_REPORT_FORMAT'] = config('qa_report_format', 'detailed') + os.environ['QA_AUTO_SAVE_REPORT'] = '1' if config('qa_auto_save_report', True) else '0' + + # Manga/Image Translation Settings (when available) + os.environ['BUBBLE_DETECTION_ENABLED'] = '1' if config('bubble_detection_enabled', True) else '0' + os.environ['INPAINTING_ENABLED'] = '1' if config('inpainting_enabled', True) else '0' + os.environ['MANGA_FONT_SIZE_MODE'] = config('manga_font_size_mode', 'auto') + os.environ['MANGA_FONT_SIZE'] = str(config('manga_font_size', 24)) + os.environ['MANGA_FONT_MULTIPLIER'] = str(config('manga_font_multiplier', 1.0)) + os.environ['MANGA_MIN_FONT_SIZE'] = str(config('manga_min_font_size', 12)) + os.environ['MANGA_MAX_FONT_SIZE'] = str(config('manga_max_font_size', 48)) + os.environ['MANGA_SHADOW_ENABLED'] = '1' if config('manga_shadow_enabled', True) else '0' + os.environ['MANGA_SHADOW_OFFSET_X'] = str(config('manga_shadow_offset_x', 2)) + os.environ['MANGA_SHADOW_OFFSET_Y'] = str(config('manga_shadow_offset_y', 2)) + os.environ['MANGA_SHADOW_BLUR'] = str(config('manga_shadow_blur', 0)) + os.environ['MANGA_BG_OPACITY'] = str(config('manga_bg_opacity', 130)) + os.environ['MANGA_BG_STYLE'] = config('manga_bg_style', 'circle') + + # OCR Provider Settings + os.environ['OCR_PROVIDER'] = config('ocr_provider', 'custom-api') + + # Advanced Manga Settings + manga_settings = config('manga_settings', {}) + if manga_settings: + advanced = manga_settings.get('advanced', {}) + os.environ['PARALLEL_PANEL_TRANSLATION'] = '1' if advanced.get('parallel_panel_translation', False) else '0' + os.environ['PANEL_MAX_WORKERS'] = str(advanced.get('panel_max_workers', 7)) + os.environ['PANEL_START_STAGGER_MS'] = str(advanced.get('panel_start_stagger_ms', 0)) + os.environ['WEBTOON_MODE'] = '1' if advanced.get('webtoon_mode', False) else '0' + os.environ['DEBUG_MODE'] = '1' if advanced.get('debug_mode', False) else '0' + os.environ['SAVE_INTERMEDIATE'] = '1' if advanced.get('save_intermediate', False) else '0' + os.environ['PARALLEL_PROCESSING'] = '1' if advanced.get('parallel_processing', True) else '0' + os.environ['MAX_WORKERS'] = str(advanced.get('max_workers', 4)) + os.environ['AUTO_CLEANUP_MODELS'] = '1' if advanced.get('auto_cleanup_models', False) else '0' + os.environ['TORCH_PRECISION'] = advanced.get('torch_precision', 'auto') + os.environ['PRELOAD_LOCAL_INPAINTING_FOR_PANELS'] = '1' if advanced.get('preload_local_inpainting_for_panels', False) else '0' + + # OCR settings + ocr = manga_settings.get('ocr', {}) + os.environ['DETECTOR_TYPE'] = ocr.get('detector_type', 'rtdetr_onnx') + os.environ['RTDETR_CONFIDENCE'] = str(ocr.get('rtdetr_confidence', 0.3)) + os.environ['BUBBLE_CONFIDENCE'] = str(ocr.get('bubble_confidence', 0.3)) + os.environ['DETECT_TEXT_BUBBLES'] = '1' if ocr.get('detect_text_bubbles', True) else '0' + os.environ['DETECT_EMPTY_BUBBLES'] = '1' if ocr.get('detect_empty_bubbles', True) else '0' + os.environ['DETECT_FREE_TEXT'] = '1' if ocr.get('detect_free_text', True) else '0' + os.environ['BUBBLE_MAX_DETECTIONS_YOLO'] = str(ocr.get('bubble_max_detections_yolo', 100)) + + # Inpainting settings + inpainting = manga_settings.get('inpainting', {}) + os.environ['LOCAL_INPAINT_METHOD'] = inpainting.get('local_method', 'anime_onnx') + os.environ['INPAINT_BATCH_SIZE'] = str(inpainting.get('batch_size', 10)) + os.environ['INPAINT_CACHE_ENABLED'] = '1' if inpainting.get('enable_cache', True) else '0' + + # HD Strategy + os.environ['HD_STRATEGY'] = advanced.get('hd_strategy', 'resize') + os.environ['HD_RESIZE_LIMIT'] = str(advanced.get('hd_strategy_resize_limit', 1536)) + os.environ['HD_CROP_MARGIN'] = str(advanced.get('hd_strategy_crop_margin', 16)) + os.environ['HD_CROP_TRIGGER'] = str(advanced.get('hd_strategy_crop_trigger_size', 1024)) + + # Concise Pipeline Logs + os.environ['CONCISE_PIPELINE_LOGS'] = '1' if config('concise_pipeline_logs', False) else '0' + + print("✅ All environment variables set from configuration") + + def save_config(self, config): + """Save configuration - to persistent file on HF Spaces or local file""" + is_hf_spaces = os.getenv('SPACE_ID') is not None or os.getenv('HF_SPACES') == 'true' + + # Always try to save to file (works both locally and on HF Spaces with persistent storage) + try: + config_to_save = config.copy() + + # Only encrypt if we have the encryption module AND keys aren't already encrypted + if API_KEY_ENCRYPTION_AVAILABLE: + # Check if keys need encryption (not already encrypted) + needs_encryption = False + for key in ['api_key', 'azure_vision_key', 'google_vision_credentials']: + if key in config_to_save: + value = config_to_save[key] + # If it's a non-empty string that doesn't start with 'ENC:', it needs encryption + if value and isinstance(value, str) and not value.startswith('ENC:'): + needs_encryption = True + break + + if needs_encryption: + config_to_save = encrypt_config(config_to_save) + + # Create directory if it doesn't exist (important for HF Spaces) + os.makedirs(os.path.dirname(self.config_file) or '.', exist_ok=True) + + # Debug output + if is_hf_spaces: + print(f"📝 Saving to HF Spaces persistent storage: {self.config_file}") + + print(f"DEBUG save_config called with model={config.get('model')}, batch_size={config.get('batch_size')}") + print(f"DEBUG self.config before={self.config.get('model') if hasattr(self, 'config') else 'N/A'}") + print(f"DEBUG self.decrypted_config before={self.decrypted_config.get('model') if hasattr(self, 'decrypted_config') else 'N/A'}") + + with open(self.config_file, 'w', encoding='utf-8') as f: + json.dump(config_to_save, f, ensure_ascii=False, indent=2) + + # IMPORTANT: Update the in-memory configs so the UI reflects the changes immediately + self.config = config_to_save + # Update decrypted config too + self.decrypted_config = config.copy() # Use the original (unencrypted) version + if API_KEY_ENCRYPTION_AVAILABLE: + # Make sure decrypted_config has decrypted values + self.decrypted_config = decrypt_config(self.decrypted_config) + + print(f"DEBUG self.config after={self.config.get('model')}") + print(f"DEBUG self.decrypted_config after={self.decrypted_config.get('model')}") + + if is_hf_spaces: + print(f"✅ Saved to persistent storage: {self.config_file}") + # Also verify the file was written + if os.path.exists(self.config_file): + file_size = os.path.getsize(self.config_file) + print(f"✅ File confirmed: {file_size} bytes") + return "✅ Settings saved to persistent storage!" + else: + print(f"✅ Saved to {self.config_file}") + return "✅ Settings saved successfully!" + + except Exception as e: + print(f"❌ Save error: {e}") + if is_hf_spaces: + print(f"💡 Note: Make sure you have persistent storage enabled for your Space") + return f"❌ Failed to save: {str(e)}\n\nNote: Persistent storage may not be enabled" + return f"❌ Failed to save: {str(e)}" + + def translate_epub( + self, + epub_file, + model, + api_key, + profile_name, + system_prompt, + temperature, + max_tokens, + enable_image_trans=False, + glossary_file=None + ): + """Translate EPUB file - yields progress updates""" + + if not TRANSLATION_AVAILABLE: + yield None, None, None, "❌ Translation modules not loaded", None, "Error", 0 + return + + if not epub_file: + yield None, None, None, "❌ Please upload an EPUB or TXT file", None, "Error", 0 + return + + if not api_key: + yield None, None, None, "❌ Please provide an API key", None, "Error", 0 + return + + if not profile_name: + yield None, None, None, "❌ Please select a translation profile", None, "Error", 0 + return + + # Initialize logs list + translation_logs = [] + + try: + # Initial status + input_path = epub_file.name if hasattr(epub_file, 'name') else epub_file + file_ext = os.path.splitext(input_path)[1].lower() + file_type = "EPUB" if file_ext == ".epub" else "TXT" + + translation_logs.append(f"📚 Starting {file_type} translation...") + yield None, None, gr.update(visible=True), "\n".join(translation_logs), gr.update(visible=True), "Starting...", 0 + + # Save uploaded file to temp location if needed + epub_base = os.path.splitext(os.path.basename(input_path))[0] + + translation_logs.append(f"📖 Input: {os.path.basename(input_path)}") + translation_logs.append(f"🤖 Model: {model}") + translation_logs.append(f"📝 Profile: {profile_name}") + yield None, None, gr.update(visible=True), "\n".join(translation_logs), gr.update(visible=True), "Initializing...", 5 + + # Use the provided system prompt (user may have edited it) + translation_prompt = system_prompt if system_prompt else self.profiles.get(profile_name, "") + + # Set the input path as a command line argument simulation + import sys + original_argv = sys.argv.copy() + sys.argv = ['glossarion_web.py', input_path] + + # Set environment variables for TransateKRtoEN.main() + os.environ['INPUT_PATH'] = input_path + os.environ['MODEL'] = model + os.environ['TRANSLATION_TEMPERATURE'] = str(temperature) + os.environ['MAX_OUTPUT_TOKENS'] = str(max_tokens) + os.environ['ENABLE_IMAGE_TRANSLATION'] = '1' if enable_image_trans else '0' + # Set output directory to current working directory + os.environ['OUTPUT_DIRECTORY'] = os.getcwd() + + # Set all additional environment variables from config + self.set_all_environment_variables() + + # OVERRIDE critical safety features AFTER config load + # CORRECT variable name is EMERGENCY_PARAGRAPH_RESTORE (no ATION) + os.environ['EMERGENCY_PARAGRAPH_RESTORE'] = '0' # DISABLED + os.environ['REMOVE_AI_ARTIFACTS'] = '1' # ENABLED + + # Debug: Verify settings + translation_logs.append(f"\n🔧 Debug: EMERGENCY_PARAGRAPH_RESTORE = '{os.environ.get('EMERGENCY_PARAGRAPH_RESTORE', 'NOT SET')}'") + translation_logs.append(f"🔧 Debug: REMOVE_AI_ARTIFACTS = '{os.environ.get('REMOVE_AI_ARTIFACTS', 'NOT SET')}'") + yield None, None, gr.update(visible=True), "\n".join(translation_logs), gr.update(visible=True), "Configuration set...", 10 + + # Set API key environment variable + if 'gpt' in model.lower() or 'openai' in model.lower(): + os.environ['OPENAI_API_KEY'] = api_key + os.environ['API_KEY'] = api_key + elif 'claude' in model.lower(): + os.environ['ANTHROPIC_API_KEY'] = api_key + os.environ['API_KEY'] = api_key + elif 'gemini' in model.lower(): + os.environ['GOOGLE_API_KEY'] = api_key + os.environ['API_KEY'] = api_key + else: + os.environ['API_KEY'] = api_key + + # Set the system prompt + if translation_prompt: + # Save to temp profile + temp_config = self.config.copy() + temp_config['prompt_profiles'] = temp_config.get('prompt_profiles', {}) + temp_config['prompt_profiles'][profile_name] = translation_prompt + temp_config['active_profile'] = profile_name + + # Save temporarily + with open(self.config_file, 'w', encoding='utf-8') as f: + json.dump(temp_config, f, ensure_ascii=False, indent=2) + + translation_logs.append("⚙️ Configuration set") + yield None, None, gr.update(visible=True), "\n".join(translation_logs), gr.update(visible=True), "Starting translation...", 10 + + # Create a thread-safe queue for capturing logs + import queue + import threading + import time + log_queue = queue.Queue() + translation_complete = threading.Event() + translation_error = [None] + + def log_callback(msg): + """Capture log messages""" + if msg and msg.strip(): + log_queue.put(msg.strip()) + + # Run translation in a separate thread + def run_translation(): + try: + result = TransateKRtoEN.main( + log_callback=log_callback, + stop_callback=None + ) + translation_error[0] = None + except Exception as e: + translation_error[0] = e + finally: + translation_complete.set() + + translation_thread = threading.Thread(target=run_translation, daemon=True) + translation_thread.start() + + # Monitor progress + last_yield_time = time.time() + progress_percent = 10 + + while not translation_complete.is_set() or not log_queue.empty(): + # Check if stop was requested + if self.epub_translation_stop: + translation_logs.append("⚠️ Stopping translation...") + # Try to stop the translation thread + translation_complete.set() + break + + # Collect logs + new_logs = [] + while not log_queue.empty(): + try: + msg = log_queue.get_nowait() + new_logs.append(msg) + except queue.Empty: + break + + # Add new logs + if new_logs: + translation_logs.extend(new_logs) + + # Update progress based on log content + for log in new_logs: + if 'Chapter' in log or 'chapter' in log: + progress_percent = min(progress_percent + 5, 90) + elif '✅' in log or 'Complete' in log: + progress_percent = min(progress_percent + 10, 95) + elif 'Translating' in log: + progress_percent = min(progress_percent + 2, 85) + + # Yield updates periodically + current_time = time.time() + if new_logs or (current_time - last_yield_time) > 1.0: + status_text = new_logs[-1] if new_logs else "Processing..." + # Keep only last 100 logs to avoid UI overflow + display_logs = translation_logs[-100:] if len(translation_logs) > 100 else translation_logs + yield None, None, gr.update(visible=True), "\n".join(display_logs), gr.update(visible=True), status_text, progress_percent + last_yield_time = current_time + + # Small delay to avoid CPU spinning + time.sleep(0.1) + + # Wait for thread to complete + translation_thread.join(timeout=5) + + # Restore original sys.argv + sys.argv = original_argv + + # Log any errors but don't fail immediately - check for output first + if translation_error[0]: + error_msg = f"⚠️ Translation completed with warnings: {str(translation_error[0])}" + translation_logs.append(error_msg) + translation_logs.append("🔍 Checking for output file...") + + # Check for output file - just grab any .epub from the output directory + output_dir = epub_base + compiled_epub = None + + # First, try to find ANY .epub file in the output directory + output_dir_path = os.path.join(os.getcwd(), output_dir) + if os.path.isdir(output_dir_path): + translation_logs.append(f"\n📂 Checking output directory: {output_dir_path}") + for file in os.listdir(output_dir_path): + if file.endswith('.epub'): + full_path = os.path.join(output_dir_path, file) + # Make sure it's not a temp/backup file + if os.path.isfile(full_path) and os.path.getsize(full_path) > 1000: + compiled_epub = full_path + translation_logs.append(f" ✅ Found EPUB in output dir: {file}") + break + + # If we found it in the output directory, return it immediately + if compiled_epub: + file_size = os.path.getsize(compiled_epub) + translation_logs.append(f"\n✅ Translation complete: {os.path.basename(compiled_epub)}") + translation_logs.append(f"🔗 File path: {compiled_epub}") + translation_logs.append(f"📏 File size: {file_size:,} bytes ({file_size/1024/1024:.2f} MB)") + translation_logs.append(f"📥 Click 'Download Translated {file_type}' below to save your file") + final_status = "Translation complete!" if not translation_error[0] else "Translation completed with warnings" + + yield ( + compiled_epub, + gr.update(value="### ✅ Translation Complete!", visible=True), + gr.update(visible=False), + "\n".join(translation_logs), + gr.update(value=final_status, visible=True), + final_status, + 100 + ) + return + + # Determine output extension based on input file type + output_ext = ".epub" if file_ext == ".epub" else ".txt" + + # Get potential base directories + base_dirs = [ + os.getcwd(), # Current working directory + os.path.dirname(input_path), # Input file directory + "/tmp", # Common temp directory on Linux/HF Spaces + "/home/user/app", # HF Spaces app directory + os.path.expanduser("~"), # Home directory + ] + + # Look for multiple possible output locations + possible_paths = [] + + # Extract title from input filename for more patterns + # e.g., "tales of terror_dick donovan 2" -> "Tales of Terror" + title_parts = os.path.basename(input_path).replace(output_ext, '').split('_') + possible_titles = [ + epub_base, # Original: tales of terror_dick donovan 2 + ' '.join(title_parts[:-2]).title() if len(title_parts) > 2 else epub_base, # Tales Of Terror + ] + + for base_dir in base_dirs: + if base_dir and os.path.exists(base_dir): + for title in possible_titles: + # Direct in base directory + possible_paths.append(os.path.join(base_dir, f"{title}_translated{output_ext}")) + possible_paths.append(os.path.join(base_dir, f"{title}{output_ext}")) + # In output subdirectory + possible_paths.append(os.path.join(base_dir, output_dir, f"{title}_translated{output_ext}")) + possible_paths.append(os.path.join(base_dir, output_dir, f"{title}{output_ext}")) + # In nested output directory + possible_paths.append(os.path.join(base_dir, epub_base, f"{title}_translated{output_ext}")) + possible_paths.append(os.path.join(base_dir, epub_base, f"{title}{output_ext}")) + + # Also add relative paths + possible_paths.extend([ + f"{epub_base}_translated{output_ext}", + os.path.join(output_dir, f"{epub_base}_translated{output_ext}"), + os.path.join(output_dir, f"{epub_base}{output_ext}"), + ]) + + # Also search for any translated file in the output directory + if os.path.isdir(output_dir): + for file in os.listdir(output_dir): + if file.endswith(f'_translated{output_ext}'): + possible_paths.insert(0, os.path.join(output_dir, file)) + + # Add debug information about current environment + translation_logs.append(f"\n📁 Debug Info:") + translation_logs.append(f" Current working directory: {os.getcwd()}") + translation_logs.append(f" Input file directory: {os.path.dirname(input_path)}") + translation_logs.append(f" Looking for: {epub_base}_translated{output_ext}") + + translation_logs.append(f"\n🔍 Searching for output file...") + for potential_epub in possible_paths[:10]: # Show first 10 paths + translation_logs.append(f" Checking: {potential_epub}") + if os.path.exists(potential_epub): + compiled_epub = potential_epub + translation_logs.append(f" ✅ Found: {potential_epub}") + break + + if not compiled_epub and len(possible_paths) > 10: + translation_logs.append(f" ... and {len(possible_paths) - 10} more paths") + + if compiled_epub: + # Verify file exists and is readable + if os.path.exists(compiled_epub) and os.path.isfile(compiled_epub): + file_size = os.path.getsize(compiled_epub) + translation_logs.append(f"✅ Translation complete: {os.path.basename(compiled_epub)}") + translation_logs.append(f"🔗 File path: {compiled_epub}") + translation_logs.append(f"📏 File size: {file_size:,} bytes ({file_size/1024/1024:.2f} MB)") + translation_logs.append(f"📥 Click 'Download Translated {file_type}' below to save your file") + # Make the file component visible with the translated file + final_status = "Translation complete!" if not translation_error[0] else "Translation completed with warnings" + + # Return the actual file path WITH visibility update + yield ( + compiled_epub, # epub_output - The file path (Gradio will handle it) + gr.update(value="### ✅ Translation Complete!", visible=True), # epub_status_message + gr.update(visible=False), # epub_progress_group + "\n".join(translation_logs), # epub_logs + gr.update(value=final_status, visible=True), # epub_status + final_status, # epub_progress_text + 100 # epub_progress_bar + ) + return + else: + translation_logs.append(f"⚠️ File found but not accessible: {compiled_epub}") + compiled_epub = None # Force search + + # Output file not found - search recursively in relevant directories + translation_logs.append("⚠️ Output file not in expected locations, searching recursively...") + found_files = [] + + # Search in multiple directories + search_dirs = [ + os.getcwd(), # Current directory + os.path.dirname(input_path), # Input file directory + "/tmp", # Temp directory (HF Spaces) + "/home/user/app", # HF Spaces app directory + ] + + for search_dir in search_dirs: + if not os.path.exists(search_dir): + continue + + translation_logs.append(f" Searching in: {search_dir}") + try: + for root, dirs, files in os.walk(search_dir, topdown=True): + # Limit depth to 3 levels and skip hidden/system directories + depth = root[len(search_dir):].count(os.sep) + if depth >= 3: + dirs[:] = [] # Don't go deeper + else: + dirs[:] = [d for d in dirs if not d.startswith('.') and d not in ['__pycache__', 'node_modules', 'venv', '.git']] + + for file in files: + # Look for files with _translated in name or matching our pattern + if (f'_translated{output_ext}' in file or + (file.endswith(output_ext) and epub_base in file)): + full_path = os.path.join(root, file) + found_files.append(full_path) + translation_logs.append(f" ✅ Found: {full_path}") + except (PermissionError, OSError) as e: + translation_logs.append(f" ⚠️ Could not search {search_dir}: {e}") + + if found_files: + # Use the most recently modified file + compiled_epub = max(found_files, key=os.path.getmtime) + + # Verify file exists and get info + if os.path.exists(compiled_epub) and os.path.isfile(compiled_epub): + file_size = os.path.getsize(compiled_epub) + translation_logs.append(f"✅ Found output file: {os.path.basename(compiled_epub)}") + translation_logs.append(f"🔗 File path: {compiled_epub}") + translation_logs.append(f"📏 File size: {file_size:,} bytes ({file_size/1024/1024:.2f} MB)") + translation_logs.append(f"📥 Click 'Download Translated {file_type}' below to save your file") + # Return the actual file path directly + yield ( + compiled_epub, # epub_output - Just the file path + gr.update(value="### ✅ Translation Complete!", visible=True), # epub_status_message + gr.update(visible=False), # epub_progress_group + "\n".join(translation_logs), # epub_logs + gr.update(value="Translation complete!", visible=True), # epub_status + "Translation complete!", # epub_progress_text + 100 # epub_progress_bar + ) + return + + # Still couldn't find output - report failure + translation_logs.append("❌ Could not locate translated output file") + translation_logs.append(f"🔍 Checked paths: {', '.join(possible_paths[:5])}...") + translation_logs.append("\n💡 Troubleshooting tips:") + translation_logs.append(" 1. Check if TransateKRtoEN.py completed successfully") + translation_logs.append(" 2. Look for any error messages in the logs above") + translation_logs.append(" 3. The output might be in a subdirectory - check manually") + yield None, gr.update(value="### ⚠️ Output Not Found", visible=True), gr.update(visible=False), "\n".join(translation_logs), gr.update(value="Translation process completed but output file not found", visible=True), "Output not found", 90 + + except Exception as e: + import traceback + error_msg = f"❌ Error during translation:\n{str(e)}\n\n{traceback.format_exc()}" + translation_logs.append(error_msg) + yield None, None, gr.update(visible=False), "\n".join(translation_logs), gr.update(visible=True), "Error occurred", 0 + + def translate_epub_with_stop(self, *args): + """Wrapper for translate_epub that includes button visibility control""" + self.epub_translation_stop = False + + # Show stop button, hide translate button at start + for result in self.translate_epub(*args): + if self.epub_translation_stop: + # Translation was stopped + yield result[0], result[1], result[2], result[3] + "\n\n⚠️ Translation stopped by user", result[4], "Stopped", 0, gr.update(visible=True), gr.update(visible=False) + return + # Add button visibility updates to the yields + yield result[0], result[1], result[2], result[3], result[4], result[5], result[6], gr.update(visible=False), gr.update(visible=True) + + # Reset buttons at the end + yield result[0], result[1], result[2], result[3], result[4], result[5], result[6], gr.update(visible=True), gr.update(visible=False) + + def stop_epub_translation(self): + """Stop the ongoing EPUB translation""" + self.epub_translation_stop = True + if self.epub_translation_thread and self.epub_translation_thread.is_alive(): + # The thread will check the stop flag + pass + return gr.update(visible=True), gr.update(visible=False), "Translation stopped" + + def extract_glossary( + self, + epub_file, + model, + api_key, + min_frequency, + max_names, + max_titles=30, + max_text_size=50000, + max_sentences=200, + translation_batch=50, + chapter_split_threshold=8192, + filter_mode='all', + strip_honorifics=True, + fuzzy_threshold=0.90, + extraction_prompt=None, + format_instructions=None, + use_legacy_csv=False + ): + """Extract glossary from EPUB with manual extraction settings - yields progress updates""" + + if not epub_file: + yield None, None, None, "❌ Please upload an EPUB file", None, "Error", 0 + return + + extraction_logs = [] + + try: + import extract_glossary_from_epub + + extraction_logs.append("🔍 Starting glossary extraction...") + yield None, None, gr.update(visible=True), "\n".join(extraction_logs), gr.update(visible=True), "Starting...", 0 + + input_path = epub_file.name if hasattr(epub_file, 'name') else epub_file + output_path = input_path.replace('.epub', '_glossary.csv') + + extraction_logs.append(f"📖 Input: {os.path.basename(input_path)}") + extraction_logs.append(f"🤖 Model: {model}") + yield None, None, gr.update(visible=True), "\n".join(extraction_logs), gr.update(visible=True), "Initializing...", 10 + + # Set all environment variables from config + self.set_all_environment_variables() + + # Set API key + if 'gpt' in model.lower(): + os.environ['OPENAI_API_KEY'] = api_key + elif 'claude' in model.lower(): + os.environ['ANTHROPIC_API_KEY'] = api_key + else: + os.environ['API_KEY'] = api_key + + extraction_logs.append("📋 Extracting text from EPUB...") + yield None, None, gr.update(visible=True), "\n".join(extraction_logs), gr.update(visible=True), "Extracting text...", 20 + + # Set environment variables for glossary extraction + os.environ['MODEL'] = model + os.environ['GLOSSARY_MIN_FREQUENCY'] = str(min_frequency) + os.environ['GLOSSARY_MAX_NAMES'] = str(max_names) + os.environ['GLOSSARY_MAX_TITLES'] = str(max_titles) + os.environ['GLOSSARY_BATCH_SIZE'] = str(translation_batch) + os.environ['GLOSSARY_MAX_TEXT_SIZE'] = str(max_text_size) + os.environ['GLOSSARY_MAX_SENTENCES'] = str(max_sentences) + os.environ['GLOSSARY_CHAPTER_SPLIT_THRESHOLD'] = str(chapter_split_threshold) + os.environ['GLOSSARY_FILTER_MODE'] = filter_mode + os.environ['GLOSSARY_STRIP_HONORIFICS'] = '1' if strip_honorifics else '0' + os.environ['GLOSSARY_FUZZY_THRESHOLD'] = str(fuzzy_threshold) + os.environ['GLOSSARY_USE_LEGACY_CSV'] = '1' if use_legacy_csv else '0' + + # Set prompts if provided + if extraction_prompt: + os.environ['GLOSSARY_SYSTEM_PROMPT'] = extraction_prompt + if format_instructions: + os.environ['GLOSSARY_FORMAT_INSTRUCTIONS'] = format_instructions + + extraction_logs.append(f"⚙️ Settings: Min freq={min_frequency}, Max names={max_names}, Filter={filter_mode}") + extraction_logs.append(f"⚙️ Options: Strip honorifics={strip_honorifics}, Fuzzy threshold={fuzzy_threshold:.2f}") + yield None, None, gr.update(visible=True), "\n".join(extraction_logs), gr.update(visible=True), "Processing...", 40 + + # Create a thread-safe queue for capturing logs + import queue + import threading + import time + log_queue = queue.Queue() + extraction_complete = threading.Event() + extraction_error = [None] + extraction_result = [None] + + def log_callback(msg): + """Capture log messages""" + if msg and msg.strip(): + log_queue.put(msg.strip()) + + # Run extraction in a separate thread + def run_extraction(): + try: + result = extract_glossary_from_epub.main( + log_callback=log_callback, + stop_callback=None + ) + extraction_result[0] = result + extraction_error[0] = None + except Exception as e: + extraction_error[0] = e + finally: + extraction_complete.set() + + extraction_thread = threading.Thread(target=run_extraction, daemon=True) + extraction_thread.start() + + # Monitor progress + last_yield_time = time.time() + progress_percent = 40 + + while not extraction_complete.is_set() or not log_queue.empty(): + # Check if stop was requested + if self.glossary_extraction_stop: + extraction_logs.append("⚠️ Stopping extraction...") + # Try to stop the extraction thread + extraction_complete.set() + break + + # Collect logs + new_logs = [] + while not log_queue.empty(): + try: + msg = log_queue.get_nowait() + new_logs.append(msg) + except queue.Empty: + break + + # Add new logs + if new_logs: + extraction_logs.extend(new_logs) + + # Update progress based on log content + for log in new_logs: + if 'Processing' in log or 'Extracting' in log: + progress_percent = min(progress_percent + 5, 80) + elif 'Writing' in log or 'Saving' in log: + progress_percent = min(progress_percent + 10, 90) + + # Yield updates periodically + current_time = time.time() + if new_logs or (current_time - last_yield_time) > 1.0: + status_text = new_logs[-1] if new_logs else "Processing..." + # Keep only last 100 logs + display_logs = extraction_logs[-100:] if len(extraction_logs) > 100 else extraction_logs + yield None, None, gr.update(visible=True), "\n".join(display_logs), gr.update(visible=True), status_text, progress_percent + last_yield_time = current_time + + # Small delay to avoid CPU spinning + time.sleep(0.1) + + # Wait for thread to complete + extraction_thread.join(timeout=5) + + # Check for errors + if extraction_error[0]: + error_msg = f"❌ Extraction error: {str(extraction_error[0])}" + extraction_logs.append(error_msg) + yield None, None, gr.update(visible=False), "\n".join(extraction_logs), gr.update(visible=True), error_msg, 0 + return + + extraction_logs.append("🖍️ Writing glossary to CSV...") + yield None, None, gr.update(visible=True), "\n".join(extraction_logs), gr.update(visible=True), "Writing CSV...", 95 + + if os.path.exists(output_path): + extraction_logs.append(f"✅ Glossary extracted successfully!") + extraction_logs.append(f"💾 Saved to: {os.path.basename(output_path)}") + yield output_path, gr.update(visible=True), gr.update(visible=False), "\n".join(extraction_logs), gr.update(visible=True), "Extraction complete!", 100 + else: + extraction_logs.append("❌ Glossary extraction failed - output file not created") + yield None, None, gr.update(visible=False), "\n".join(extraction_logs), gr.update(visible=True), "Extraction failed", 0 + + except Exception as e: + import traceback + error_msg = f"❌ Error during extraction:\n{str(e)}\n\n{traceback.format_exc()}" + extraction_logs.append(error_msg) + yield None, None, gr.update(visible=False), "\n".join(extraction_logs), gr.update(visible=True), "Error occurred", 0 + + def extract_glossary_with_stop(self, *args): + """Wrapper for extract_glossary that includes button visibility control""" + self.glossary_extraction_stop = False + + # Show stop button, hide extract button at start + for result in self.extract_glossary(*args): + if self.glossary_extraction_stop: + # Extraction was stopped + yield result[0], result[1], result[2], result[3] + "\n\n⚠️ Extraction stopped by user", result[4], "Stopped", 0, gr.update(visible=True), gr.update(visible=False) + return + # Add button visibility updates to the yields + yield result[0], result[1], result[2], result[3], result[4], result[5], result[6], gr.update(visible=False), gr.update(visible=True) + + # Reset buttons at the end + yield result[0], result[1], result[2], result[3], result[4], result[5], result[6], gr.update(visible=True), gr.update(visible=False) + + def stop_glossary_extraction(self): + """Stop the ongoing glossary extraction""" + self.glossary_extraction_stop = True + if self.glossary_extraction_thread and self.glossary_extraction_thread.is_alive(): + # The thread will check the stop flag + pass + return gr.update(visible=True), gr.update(visible=False), "Extraction stopped" + + def run_qa_scan(self, folder_path, min_foreign_chars, check_repetition, + check_glossary_leakage, min_file_length, check_multiple_headers, + check_missing_html, check_insufficient_paragraphs, + min_paragraph_percentage, report_format, auto_save_report): + """Run Quick QA scan on output folder - yields progress updates""" + + # Handle both string paths and File objects + if hasattr(folder_path, 'name'): + # It's a File object from Gradio + folder_path = folder_path.name + + if not folder_path: + yield gr.update(visible=False), gr.update(value="### ❌ Error", visible=True), gr.update(visible=False), "❌ Please provide a folder path or upload a ZIP file", gr.update(visible=False), "Error", 0 + return + + if isinstance(folder_path, str): + folder_path = folder_path.strip() + + if not os.path.exists(folder_path): + yield gr.update(visible=False), gr.update(value=f"### ❌ File/Folder not found", visible=True), gr.update(visible=False), f"❌ File/Folder not found: {folder_path}", gr.update(visible=False), "Error", 0 + return + + # Initialize scan_logs early + scan_logs = [] + + # Check if it's a ZIP or EPUB file (for Hugging Face Spaces or convenience) + if os.path.isfile(folder_path) and (folder_path.lower().endswith('.zip') or folder_path.lower().endswith('.epub')): + # Extract ZIP/EPUB to temp folder + import zipfile + import tempfile + + temp_dir = tempfile.mkdtemp(prefix="qa_scan_") + + try: + file_type = "EPUB" if folder_path.lower().endswith('.epub') else "ZIP" + scan_logs.append(f"📦 Extracting {file_type} file: {os.path.basename(folder_path)}") + + with zipfile.ZipFile(folder_path, 'r') as zip_ref: + # For EPUB files, look for the content folders + if file_type == "EPUB": + # EPUB files typically have OEBPS, EPUB, or similar content folders + all_files = zip_ref.namelist() + # Extract everything + zip_ref.extractall(temp_dir) + + # Try to find the content directory + content_dirs = ['OEBPS', 'EPUB', 'OPS', 'content'] + actual_content_dir = None + for dir_name in content_dirs: + potential_dir = os.path.join(temp_dir, dir_name) + if os.path.exists(potential_dir): + actual_content_dir = potential_dir + break + + # If no standard content dir found, use the temp_dir itself + if actual_content_dir: + folder_path = actual_content_dir + scan_logs.append(f"📁 Found EPUB content directory: {os.path.basename(actual_content_dir)}") + else: + folder_path = temp_dir + scan_logs.append(f"📁 Using extracted root directory") + else: + # Regular ZIP file + zip_ref.extractall(temp_dir) + folder_path = temp_dir + + scan_logs.append(f"✅ Successfully extracted to temporary folder") + # Continue with normal processing, but include initial logs + # Note: we'll need to pass scan_logs through the rest of the function + + except Exception as e: + yield gr.update(visible=False), gr.update(value=f"### ❌ {file_type} extraction failed", visible=True), gr.update(visible=False), f"❌ Failed to extract {file_type}: {str(e)}", gr.update(visible=False), "Error", 0 + return + elif not os.path.isdir(folder_path): + yield gr.update(visible=False), gr.update(value=f"### ❌ Not a folder, ZIP, or EPUB", visible=True), gr.update(visible=False), f"❌ Path is not a folder, ZIP, or EPUB file: {folder_path}", gr.update(visible=False), "Error", 0 + return + + try: + scan_logs.append("🔍 Starting Quick QA Scan...") + scan_logs.append(f"📁 Scanning folder: {folder_path}") + yield gr.update(visible=False), gr.update(value="### Scanning...", visible=True), gr.update(visible=True), "\n".join(scan_logs), gr.update(visible=False), "Starting...", 0 + + # Find all HTML/XHTML files in the folder and subfolders + html_files = [] + for root, dirs, files in os.walk(folder_path): + for file in files: + if file.lower().endswith(('.html', '.xhtml', '.htm')): + html_files.append(os.path.join(root, file)) + + if not html_files: + scan_logs.append(f"⚠️ No HTML/XHTML files found in {folder_path}") + yield gr.update(visible=False), gr.update(value="### ⚠️ No files found", visible=True), gr.update(visible=False), "\n".join(scan_logs), gr.update(visible=False), "No files to scan", 0 + return + + scan_logs.append(f"📄 Found {len(html_files)} HTML/XHTML files to scan") + scan_logs.append("⚡ Quick Scan Mode (85% threshold, Speed optimized)") + yield gr.update(visible=False), gr.update(value="### Initializing...", visible=True), gr.update(visible=True), "\n".join(scan_logs), gr.update(visible=False), "Initializing...", 10 + + # QA scanning process + total_files = len(html_files) + issues_found = [] + chapters_scanned = set() + + for i, file_path in enumerate(html_files): + if self.qa_scan_stop: + scan_logs.append("⚠️ Scan stopped by user") + break + + # Get relative path from base folder for cleaner display + rel_path = os.path.relpath(file_path, folder_path) + file_name = rel_path.replace('\\', '/') + + # Quick scan optimization: skip if we've already scanned similar chapters + # (consecutive chapter checking) + chapter_match = None + for pattern in ['chapter', 'ch', 'c']: + if pattern in file_name.lower(): + import re + match = re.search(r'(\d+)', file_name) + if match: + chapter_num = int(match.group(1)) + # Skip if we've already scanned nearby chapters (Quick Scan optimization) + if any(abs(chapter_num - ch) <= 1 for ch in chapters_scanned): + if len(chapters_scanned) > 5: # Only skip after scanning a few + continue + chapters_scanned.add(chapter_num) + break + + scan_logs.append(f"\n🔍 Scanning: {file_name}") + progress = int(10 + (80 * i / total_files)) + yield None, None, gr.update(visible=True), "\n".join(scan_logs), gr.update(visible=True), f"Scanning {file_name}...", progress + + # Read and check the HTML file + try: + with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: + content = f.read() + + file_issues = [] + + # Check file length + if len(content) < min_file_length: + continue # Skip short files + + # Check for foreign characters (simulation - would need actual implementation) + # In real implementation, would check for source language characters + import random + + # Check for multiple headers + if check_multiple_headers: + import re + headers = re.findall(r'<h[1-6][^>]*>', content, re.IGNORECASE) + if len(headers) >= 2: + file_issues.append("Multiple headers detected") + + # Check for missing html tag + if check_missing_html: + if '<html' not in content.lower(): + file_issues.append("Missing <html> tag") + + # Check for insufficient paragraphs + if check_insufficient_paragraphs: + p_tags = content.count('<p>') + content.count('<p ') + text_length = len(re.sub(r'<[^>]+>', '', content)) + if text_length > 0: + p_text = re.findall(r'<p[^>]*>(.*?)</p>', content, re.DOTALL) + p_text_length = sum(len(t) for t in p_text) + percentage = (p_text_length / text_length) * 100 + if percentage < min_paragraph_percentage: + file_issues.append(f"Only {percentage:.1f}% text in <p> tags") + + # Simulated additional checks + if check_repetition and random.random() > 0.85: + file_issues.append("Excessive repetition detected") + + if check_glossary_leakage and random.random() > 0.9: + file_issues.append("Glossary leakage detected") + + # Report issues found + if file_issues: + for issue in file_issues: + issues_found.append(f" ⚠️ {file_name}: {issue}") + scan_logs.append(f" ⚠️ Issue: {issue}") + else: + scan_logs.append(f" ✅ No issues found") + + except Exception as e: + scan_logs.append(f" ❌ Error reading file: {str(e)}") + + # Update logs periodically + if len(scan_logs) > 100: + scan_logs = scan_logs[-100:] # Keep only last 100 logs + + yield gr.update(visible=False), None, gr.update(visible=True), "\n".join(scan_logs), gr.update(visible=False), f"Scanning {file_name}...", progress + + # Generate report + scan_logs.append("\n📝 Generating report...") + yield gr.update(visible=False), None, gr.update(visible=True), "\n".join(scan_logs), gr.update(visible=False), "Generating report...", 95 + + # Create report content based on selected format + if report_format == "summary": + # Summary format - brief overview only + report_content = "QA SCAN REPORT - SUMMARY\n" + report_content += "=" * 50 + "\n\n" + report_content += f"Total files scanned: {total_files}\n" + report_content += f"Issues found: {len(issues_found)}\n\n" + if issues_found: + report_content += f"Files with issues: {min(len(issues_found), 10)} (showing first 10)\n" + report_content += "\n".join(issues_found[:10]) + else: + report_content += "✅ No issues detected." + + elif report_format == "verbose": + # Verbose format - all data including passed files + report_content = "QA SCAN REPORT - VERBOSE (ALL DATA)\n" + report_content += "=" * 50 + "\n\n" + from datetime import datetime + report_content += f"Scan Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n" + report_content += f"Folder Scanned: {folder_path}\n" + report_content += f"Total files scanned: {total_files}\n" + report_content += f"Issues found: {len(issues_found)}\n" + report_content += f"Settings used:\n" + report_content += f" - Min foreign chars: {min_foreign_chars}\n" + report_content += f" - Check repetition: {check_repetition}\n" + report_content += f" - Check glossary leakage: {check_glossary_leakage}\n" + report_content += f" - Min file length: {min_file_length}\n" + report_content += f" - Check multiple headers: {check_multiple_headers}\n" + report_content += f" - Check missing HTML: {check_missing_html}\n" + report_content += f" - Check insufficient paragraphs: {check_insufficient_paragraphs}\n" + report_content += f" - Min paragraph percentage: {min_paragraph_percentage}%\n\n" + + report_content += "ALL FILES PROCESSED:\n" + report_content += "-" * 30 + "\n" + for file in html_files: + rel_path = os.path.relpath(file, folder_path) + report_content += f" {rel_path}\n" + + if issues_found: + report_content += "\n\nISSUES DETECTED (DETAILED):\n" + report_content += "\n".join(issues_found) + else: + report_content += "\n\n✅ No issues detected. All files passed scan." + + else: # detailed (default/recommended) + # Detailed format - recommended balance + report_content = "QA SCAN REPORT - DETAILED\n" + report_content += "=" * 50 + "\n\n" + report_content += f"Total files scanned: {total_files}\n" + report_content += f"Issues found: {len(issues_found)}\n\n" + + if issues_found: + report_content += "ISSUES DETECTED:\n" + report_content += "\n".join(issues_found) + else: + report_content += "No issues detected. All files passed quick scan." + + # Always save report to file for download + from datetime import datetime + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + report_filename = f"qa_scan_report_{timestamp}.txt" + report_path = os.path.join(os.getcwd(), report_filename) + + # Always write the report file + with open(report_path, 'w', encoding='utf-8') as f: + f.write(report_content) + + if auto_save_report: + scan_logs.append(f"💾 Report auto-saved to: {report_filename}") + else: + scan_logs.append(f"📄 Report ready for download: {report_filename}") + + scan_logs.append(f"\n✅ QA Scan completed!") + scan_logs.append(f"📊 Summary: {total_files} files scanned, {len(issues_found)} issues found") + scan_logs.append(f"\n📥 Click 'Download QA Report' below to save the report") + + # Always return the report path and make File component visible + final_status = f"✅ Scan complete!\n{total_files} files scanned\n{len(issues_found)} issues found" + yield gr.update(value=report_path, visible=True), gr.update(value=f"### {final_status}", visible=True), gr.update(visible=False), "\n".join(scan_logs), gr.update(value=final_status, visible=True), "Scan complete!", 100 + + except Exception as e: + import traceback + error_msg = f"❌ Error during QA scan:\n{str(e)}\n\n{traceback.format_exc()}" + scan_logs.append(error_msg) + yield gr.update(visible=False), gr.update(value="### ❌ Error occurred", visible=True), gr.update(visible=False), "\n".join(scan_logs), gr.update(visible=True), "Error occurred", 0 + + def run_qa_scan_with_stop(self, *args): + """Wrapper for run_qa_scan that includes button visibility control""" + self.qa_scan_stop = False + + # Show stop button, hide scan button at start + for result in self.run_qa_scan(*args): + if self.qa_scan_stop: + # Scan was stopped + yield result[0], result[1], result[2], result[3] + "\n\n⚠️ Scan stopped by user", result[4], "Stopped", 0, gr.update(visible=True), gr.update(visible=False) + return + # Add button visibility updates to the yields + yield result[0], result[1], result[2], result[3], result[4], result[5], result[6], gr.update(visible=False), gr.update(visible=True) + + # Reset buttons at the end + yield result[0], result[1], result[2], result[3], result[4], result[5], result[6], gr.update(visible=True), gr.update(visible=False) + + def stop_qa_scan(self): + """Stop the ongoing QA scan""" + self.qa_scan_stop = True + return gr.update(visible=True), gr.update(visible=False), "Scan stopped" + + def stop_translation(self): + """Stop the ongoing translation process""" + print(f"DEBUG: stop_translation called, was_translating={self.is_translating}") + if self.is_translating: + print("DEBUG: Setting stop flag and cancellation") + self.stop_flag.set() + self.is_translating = False + + # Best-effort: cancel any in-flight API operation on the active client + try: + if getattr(self, 'current_unified_client', None): + self.current_unified_client.cancel_current_operation() + print("DEBUG: Requested UnifiedClient cancellation") + except Exception as e: + print(f"DEBUG: UnifiedClient cancel failed: {e}") + + # Also propagate to MangaTranslator class if available + try: + if MANGA_TRANSLATION_AVAILABLE: + from manga_translator import MangaTranslator + MangaTranslator.set_global_cancellation(True) + print("DEBUG: Set MangaTranslator global cancellation") + except ImportError: + pass + + # Also propagate to UnifiedClient if available + try: + if MANGA_TRANSLATION_AVAILABLE: + from unified_api_client import UnifiedClient + UnifiedClient.set_global_cancellation(True) + print("DEBUG: Set UnifiedClient global cancellation") + except ImportError: + pass + + # Kick off translator shutdown to free resources quickly + try: + tr = getattr(self, 'current_translator', None) + if tr and hasattr(tr, 'shutdown'): + import threading as _th + _th.Thread(target=tr.shutdown, name="WebMangaTranslatorShutdown", daemon=True).start() + print("DEBUG: Initiated translator shutdown thread") + # Clear reference so a new start creates a fresh instance + self.current_translator = None + except Exception as e: + print(f"DEBUG: Failed to start translator shutdown: {e}") + else: + print("DEBUG: stop_translation called but not translating") + + def _reset_translation_flags(self): + """Reset all translation flags for new translation""" + self.is_translating = False + self.stop_flag.clear() + + # Reset global cancellation flags + try: + if MANGA_TRANSLATION_AVAILABLE: + from manga_translator import MangaTranslator + MangaTranslator.set_global_cancellation(False) + except ImportError: + pass + + try: + if MANGA_TRANSLATION_AVAILABLE: + from unified_api_client import UnifiedClient + UnifiedClient.set_global_cancellation(False) + except ImportError: + pass + + def translate_manga( + self, + image_files, + model, + api_key, + profile_name, + system_prompt, + ocr_provider, + google_creds_path, + azure_key, + azure_endpoint, + enable_bubble_detection, + enable_inpainting, + font_size_mode, + font_size, + font_multiplier, + min_font_size, + max_font_size, + text_color, + shadow_enabled, + shadow_color, + shadow_offset_x, + shadow_offset_y, + shadow_blur, + bg_opacity, + bg_style, + parallel_panel_translation=False, + panel_max_workers=10 + ): + """Translate manga images - GENERATOR that yields (logs, image, cbz_file, status, progress_group, progress_text, progress_bar) updates""" + + # Reset translation flags and set running state + self._reset_translation_flags() + self.is_translating = True + + if not MANGA_TRANSLATION_AVAILABLE: + self.is_translating = False + yield "❌ Manga translation modules not loaded", None, None, gr.update(value="❌ Error", visible=True), gr.update(visible=False), gr.update(value="Error"), gr.update(value=0) + return + + if not image_files: + self.is_translating = False + yield "❌ Please upload at least one image", gr.update(visible=False), gr.update(visible=False), gr.update(value="❌ Error", visible=True), gr.update(visible=False), gr.update(value="Error"), gr.update(value=0) + return + + if not api_key: + self.is_translating = False + yield "❌ Please provide an API key", gr.update(visible=False), gr.update(visible=False), gr.update(value="❌ Error", visible=True), gr.update(visible=False), gr.update(value="Error"), gr.update(value=0) + return + + # Check for stop request + if self.stop_flag.is_set(): + self.is_translating = False + yield "⏹️ Translation stopped by user", gr.update(visible=False), gr.update(visible=False), gr.update(value="⏹️ Stopped", visible=True), gr.update(visible=False), gr.update(value="Stopped"), gr.update(value=0) + return + + if ocr_provider == "google": + # Check if credentials are provided or saved in config + if not google_creds_path and not self.get_config_value('google_vision_credentials'): + yield "❌ Please provide Google Cloud credentials JSON file", gr.update(visible=False), gr.update(visible=False), gr.update(value="❌ Error", visible=True), gr.update(visible=False), gr.update(value="Error"), gr.update(value=0) + return + + if ocr_provider == "azure": + # Ensure azure credentials are strings + azure_key_str = str(azure_key) if azure_key else '' + azure_endpoint_str = str(azure_endpoint) if azure_endpoint else '' + if not azure_key_str.strip() or not azure_endpoint_str.strip(): + yield "❌ Please provide Azure API key and endpoint", gr.update(visible=False), gr.update(visible=False), gr.update(value="❌ Error", visible=True), gr.update(visible=False), gr.update(value="Error"), gr.update(value=0) + return + + try: + + # Set all environment variables from config + self.set_all_environment_variables() + + # Set API key environment variable + if 'gpt' in model.lower() or 'openai' in model.lower(): + os.environ['OPENAI_API_KEY'] = api_key + elif 'claude' in model.lower(): + os.environ['ANTHROPIC_API_KEY'] = api_key + elif 'gemini' in model.lower(): + os.environ['GOOGLE_API_KEY'] = api_key + + # Set Google Cloud credentials if provided and save to config + if ocr_provider == "google": + if google_creds_path: + # New file provided - save it + creds_path = google_creds_path.name if hasattr(google_creds_path, 'name') else google_creds_path + os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = creds_path + # Auto-save to config + self.config['google_vision_credentials'] = creds_path + self.save_config(self.config) + elif self.get_config_value('google_vision_credentials'): + # Use saved credentials from config + creds_path = self.get_config_value('google_vision_credentials') + if os.path.exists(creds_path): + os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = creds_path + else: + yield f"❌ Saved Google credentials not found: {creds_path}", gr.update(visible=False), gr.update(visible=False), gr.update(value="❌ Error", visible=True), gr.update(visible=False), gr.update(value="Error"), gr.update(value=0) + return + + # Set Azure credentials if provided and save to config + if ocr_provider == "azure": + # Convert to strings and strip whitespace + azure_key_str = str(azure_key).strip() if azure_key else '' + azure_endpoint_str = str(azure_endpoint).strip() if azure_endpoint else '' + + os.environ['AZURE_VISION_KEY'] = azure_key_str + os.environ['AZURE_VISION_ENDPOINT'] = azure_endpoint_str + # Auto-save to config + self.config['azure_vision_key'] = azure_key_str + self.config['azure_vision_endpoint'] = azure_endpoint_str + self.save_config(self.config) + + # Apply text visibility settings to config + # Convert hex color to RGB tuple + def hex_to_rgb(hex_color): + # Handle different color formats + if isinstance(hex_color, (list, tuple)): + # Already RGB format + return tuple(hex_color[:3]) + elif isinstance(hex_color, str): + # Remove any brackets or spaces if present + hex_color = hex_color.strip().strip('[]').strip() + if hex_color.startswith('#'): + # Hex format + hex_color = hex_color.lstrip('#') + if len(hex_color) == 6: + return tuple(int(hex_color[i:i+2], 16) for i in (0, 2, 4)) + elif len(hex_color) == 3: + # Short hex format like #FFF + return tuple(int(hex_color[i]*2, 16) for i in range(3)) + elif ',' in hex_color: + # RGB string format like "255, 0, 0" + try: + parts = hex_color.split(',') + return tuple(int(p.strip()) for p in parts[:3]) + except: + pass + # Default to black if parsing fails + return (0, 0, 0) + + # Debug logging for color values + print(f"DEBUG: text_color type: {type(text_color)}, value: {text_color}") + print(f"DEBUG: shadow_color type: {type(shadow_color)}, value: {shadow_color}") + + try: + text_rgb = hex_to_rgb(text_color) + shadow_rgb = hex_to_rgb(shadow_color) + except Exception as e: + print(f"WARNING: Error converting colors: {e}") + print(f"WARNING: Using default colors - text: black, shadow: white") + text_rgb = (0, 0, 0) # Default to black text + shadow_rgb = (255, 255, 255) # Default to white shadow + + self.config['manga_font_size_mode'] = font_size_mode + self.config['manga_font_size'] = int(font_size) + self.config['manga_font_size_multiplier'] = float(font_multiplier) + self.config['manga_max_font_size'] = int(max_font_size) + self.config['manga_text_color'] = list(text_rgb) + self.config['manga_shadow_enabled'] = bool(shadow_enabled) + self.config['manga_shadow_color'] = list(shadow_rgb) + self.config['manga_shadow_offset_x'] = int(shadow_offset_x) + self.config['manga_shadow_offset_y'] = int(shadow_offset_y) + self.config['manga_shadow_blur'] = int(shadow_blur) + self.config['manga_bg_opacity'] = int(bg_opacity) + self.config['manga_bg_style'] = bg_style + + # Also update nested manga_settings structure + if 'manga_settings' not in self.config: + self.config['manga_settings'] = {} + if 'rendering' not in self.config['manga_settings']: + self.config['manga_settings']['rendering'] = {} + if 'font_sizing' not in self.config['manga_settings']: + self.config['manga_settings']['font_sizing'] = {} + + self.config['manga_settings']['rendering']['auto_min_size'] = int(min_font_size) + self.config['manga_settings']['font_sizing']['min_size'] = int(min_font_size) + self.config['manga_settings']['rendering']['auto_max_size'] = int(max_font_size) + self.config['manga_settings']['font_sizing']['max_size'] = int(max_font_size) + + # Prepare output directory + output_dir = tempfile.mkdtemp(prefix="manga_translated_") + translated_files = [] + cbz_mode = False + cbz_output_path = None + + # Initialize translation logs early (needed for CBZ processing) + translation_logs = [] + + # Check if any file is a CBZ/ZIP archive + import zipfile + files_to_process = image_files if isinstance(image_files, list) else [image_files] + extracted_images = [] + + for file in files_to_process: + file_path = file.name if hasattr(file, 'name') else file + if file_path.lower().endswith(('.cbz', '.zip')): + # Extract CBZ + cbz_mode = True + translation_logs.append(f"📚 Extracting CBZ: {os.path.basename(file_path)}") + extract_dir = tempfile.mkdtemp(prefix="cbz_extract_") + + try: + with zipfile.ZipFile(file_path, 'r') as zip_ref: + zip_ref.extractall(extract_dir) + + # Find all image files in extracted directory + import glob + for ext in ['*.png', '*.jpg', '*.jpeg', '*.webp', '*.bmp', '*.gif']: + extracted_images.extend(glob.glob(os.path.join(extract_dir, '**', ext), recursive=True)) + + # Sort naturally (by filename) + extracted_images.sort() + translation_logs.append(f"✅ Extracted {len(extracted_images)} images from CBZ") + + # Prepare CBZ output path + cbz_output_path = os.path.join(output_dir, f"{os.path.splitext(os.path.basename(file_path))[0]}_translated.cbz") + except Exception as e: + translation_logs.append(f"❌ Error extracting CBZ: {str(e)}") + else: + # Regular image file + extracted_images.append(file_path) + + # Use extracted images if CBZ was processed, otherwise use original files + if extracted_images: + # Create mock file objects for extracted images + class MockFile: + def __init__(self, path): + self.name = path + + files_to_process = [MockFile(img) for img in extracted_images] + + total_images = len(files_to_process) + + # Merge web app config with SimpleConfig for MangaTranslator + # This includes all the text visibility settings we just set + merged_config = self.config.copy() + + # Override with web-specific settings + merged_config['model'] = model + merged_config['active_profile'] = profile_name + + # Update manga_settings + if 'manga_settings' not in merged_config: + merged_config['manga_settings'] = {} + if 'ocr' not in merged_config['manga_settings']: + merged_config['manga_settings']['ocr'] = {} + if 'inpainting' not in merged_config['manga_settings']: + merged_config['manga_settings']['inpainting'] = {} + if 'advanced' not in merged_config['manga_settings']: + merged_config['manga_settings']['advanced'] = {} + + merged_config['manga_settings']['ocr']['provider'] = ocr_provider + merged_config['manga_settings']['ocr']['bubble_detection_enabled'] = enable_bubble_detection + merged_config['manga_settings']['inpainting']['method'] = 'local' if enable_inpainting else 'none' + # Make sure local_method is set from config (defaults to anime) + if 'local_method' not in merged_config['manga_settings']['inpainting']: + merged_config['manga_settings']['inpainting']['local_method'] = self.get_config_value('manga_settings', {}).get('inpainting', {}).get('local_method', 'anime') + + # Set parallel panel translation settings from config (Manga Settings tab) + # These are controlled in the Manga Settings tab, so reload config to get latest values + current_config = self.load_config() + if API_KEY_ENCRYPTION_AVAILABLE: + current_config = decrypt_config(current_config) + + config_parallel = current_config.get('manga_settings', {}).get('advanced', {}).get('parallel_panel_translation', False) + config_max_workers = current_config.get('manga_settings', {}).get('advanced', {}).get('panel_max_workers', 10) + + # Map web UI settings to MangaTranslator expected names + merged_config['manga_settings']['advanced']['parallel_panel_translation'] = config_parallel + merged_config['manga_settings']['advanced']['panel_max_workers'] = int(config_max_workers) + # CRITICAL: Also set the setting names that MangaTranslator actually checks + merged_config['manga_settings']['advanced']['parallel_processing'] = config_parallel + merged_config['manga_settings']['advanced']['max_workers'] = int(config_max_workers) + + # Log the parallel settings being used + print(f"🔧 Reloaded config - Using parallel panel translation: {config_parallel}") + print(f"🔧 Reloaded config - Using panel max workers: {config_max_workers}") + + # CRITICAL: Set skip_inpainting flag to False when inpainting is enabled + merged_config['manga_skip_inpainting'] = not enable_inpainting + + # Create a simple config object for MangaTranslator + class SimpleConfig: + def __init__(self, cfg): + self.config = cfg + + def get(self, key, default=None): + return self.config.get(key, default) + + # Create mock GUI object with necessary attributes + class MockGUI: + def __init__(self, config, profile_name, system_prompt, max_output_tokens, api_key, model): + self.config = config + # Add profile_var mock for MangaTranslator compatibility + class ProfileVar: + def __init__(self, profile): + self.profile = str(profile) if profile else '' + def get(self): + return self.profile + self.profile_var = ProfileVar(profile_name) + # Add prompt_profiles BOTH to config AND as attribute (manga_translator checks both) + if 'prompt_profiles' not in self.config: + self.config['prompt_profiles'] = {} + self.config['prompt_profiles'][profile_name] = system_prompt + # Also set as direct attribute for line 4653 check + self.prompt_profiles = self.config['prompt_profiles'] + # Add max_output_tokens as direct attribute (line 299 check) + self.max_output_tokens = max_output_tokens + # Add mock GUI attributes that MangaTranslator expects + class MockVar: + def __init__(self, val): + # Ensure val is properly typed + self.val = val + def get(self): + return self.val + self.delay_entry = MockVar(float(config.get('delay', 2.0))) + self.trans_temp = MockVar(float(config.get('translation_temperature', 0.3))) + self.contextual_var = MockVar(bool(config.get('contextual', False))) + self.trans_history = MockVar(int(config.get('translation_history_limit', 2))) + self.translation_history_rolling_var = MockVar(bool(config.get('translation_history_rolling', False))) + self.token_limit_disabled = bool(config.get('token_limit_disabled', False)) + # IMPORTANT: token_limit_entry must return STRING because manga_translator calls .strip() on it + self.token_limit_entry = MockVar(str(config.get('token_limit', 200000))) + # Add API key and model for custom-api OCR provider - ensure strings + self.api_key_entry = MockVar(str(api_key) if api_key else '') + self.model_var = MockVar(str(model) if model else '') + + simple_config = SimpleConfig(merged_config) + # Get max_output_tokens from config or use from web app config + web_max_tokens = merged_config.get('max_output_tokens', 16000) + mock_gui = MockGUI(simple_config.config, profile_name, system_prompt, web_max_tokens, api_key, model) + + # Ensure model path is in config for local inpainting + if enable_inpainting: + local_method = merged_config.get('manga_settings', {}).get('inpainting', {}).get('local_method', 'anime') + # Set the model path key that MangaTranslator expects + model_path_key = f'manga_{local_method}_model_path' + if model_path_key not in merged_config: + # Use default model path or empty string + default_model_path = self.get_config_value(model_path_key, '') + merged_config[model_path_key] = default_model_path + print(f"Set {model_path_key} to: {default_model_path}") + + # Setup OCR configuration + ocr_config = { + 'provider': ocr_provider + } + + if ocr_provider == 'google': + ocr_config['google_credentials_path'] = google_creds_path.name if google_creds_path else None + elif ocr_provider == 'azure': + # Use string versions + azure_key_str = str(azure_key).strip() if azure_key else '' + azure_endpoint_str = str(azure_endpoint).strip() if azure_endpoint else '' + ocr_config['azure_key'] = azure_key_str + ocr_config['azure_endpoint'] = azure_endpoint_str + + # Create UnifiedClient for translation API calls + try: + unified_client = UnifiedClient( + api_key=api_key, + model=model, + output_dir=output_dir + ) + # Store reference for stop() cancellation support + self.current_unified_client = unified_client + except Exception as e: + error_log = f"❌ Failed to initialize API client: {str(e)}" + yield error_log, gr.update(visible=False), gr.update(visible=False), gr.update(value=error_log, visible=True), gr.update(visible=False), gr.update(value="Error"), gr.update(value=0) + return + + # Log storage - will be yielded as live updates + last_yield_log_count = [0] # Track when we last yielded + last_yield_time = [0] # Track last yield time + + # Track current image being processed + current_image_idx = [0] + + import time + + def should_yield_logs(): + """Check if we should yield log updates (every 2 logs or 1 second)""" + current_time = time.time() + log_count_diff = len(translation_logs) - last_yield_log_count[0] + time_diff = current_time - last_yield_time[0] + + # Yield if 2+ new logs OR 1+ seconds passed + return log_count_diff >= 2 or time_diff >= 1.0 + + def capture_log(msg, level="info"): + """Capture logs - caller will yield periodically""" + if msg and msg.strip(): + log_msg = msg.strip() + translation_logs.append(log_msg) + + # Initialize timing + last_yield_time[0] = time.time() + + # Create MangaTranslator instance + try: + # Debug: Log inpainting config + inpaint_cfg = merged_config.get('manga_settings', {}).get('inpainting', {}) + print(f"\n=== INPAINTING CONFIG DEBUG ===") + print(f"Inpainting enabled checkbox: {enable_inpainting}") + print(f"Inpainting method: {inpaint_cfg.get('method')}") + print(f"Local method: {inpaint_cfg.get('local_method')}") + print(f"Full inpainting config: {inpaint_cfg}") + print("=== END DEBUG ===\n") + + translator = MangaTranslator( + ocr_config=ocr_config, + unified_client=unified_client, + main_gui=mock_gui, + log_callback=capture_log + ) + + # Keep a reference for stop/shutdown support + self.current_translator = translator + + # Connect stop flag so translator can react immediately to stop requests + if hasattr(translator, 'set_stop_flag'): + try: + translator.set_stop_flag(self.stop_flag) + except Exception: + pass + + # CRITICAL: Set skip_inpainting flag directly on translator instance + translator.skip_inpainting = not enable_inpainting + print(f"Set translator.skip_inpainting = {translator.skip_inpainting}") + + # Explicitly initialize local inpainting if enabled + if enable_inpainting: + print(f"🎨 Initializing local inpainting...") + try: + # Force initialization of the inpainter + init_result = translator._initialize_local_inpainter() + if init_result: + print(f"✅ Local inpainter initialized successfully") + else: + print(f"⚠️ Local inpainter initialization returned False") + except Exception as init_error: + print(f"❌ Failed to initialize inpainter: {init_error}") + import traceback + traceback.print_exc() + + except Exception as e: + import traceback + full_error = traceback.format_exc() + print(f"\n\n=== MANGA TRANSLATOR INIT ERROR ===") + print(full_error) + print(f"\nocr_config: {ocr_config}") + print(f"\nmock_gui.model_var.get(): {mock_gui.model_var.get()}") + print(f"\nmock_gui.api_key_entry.get(): {type(mock_gui.api_key_entry.get())}") + print("=== END ERROR ===") + error_log = f"❌ Failed to initialize manga translator: {str(e)}\n\nCheck console for full traceback" + yield error_log, gr.update(visible=False), gr.update(visible=False), gr.update(value=error_log, visible=True), gr.update(visible=False), gr.update(value="Error"), gr.update(value=0) + return + + # Process each image with real progress tracking + for idx, img_file in enumerate(files_to_process, 1): + try: + # Check for stop request before processing each image + if self.stop_flag.is_set(): + translation_logs.append(f"\n⏹️ Translation stopped by user before image {idx}/{total_images}") + self.is_translating = False + yield "\n".join(translation_logs), gr.update(visible=False), gr.update(visible=False), gr.update(value="⏹️ Translation stopped", visible=True), gr.update(visible=True), gr.update(value="Stopped"), gr.update(value=0) + return + + # Update current image index for log capture + current_image_idx[0] = idx + + # Calculate progress range for this image + start_progress = (idx - 1) / total_images + end_progress = idx / total_images + + input_path = img_file.name if hasattr(img_file, 'name') else img_file + output_path = os.path.join(output_dir, f"translated_{os.path.basename(input_path)}") + filename = os.path.basename(input_path) + + # Log start of processing and YIELD update + start_msg = f"🎨 [{idx}/{total_images}] Starting: {filename}" + translation_logs.append(start_msg) + translation_logs.append(f"Image path: {input_path}") + translation_logs.append(f"Processing with OCR: {ocr_provider}, Model: {model}") + translation_logs.append("-" * 60) + + # Yield initial log update with progress + progress_percent = int(((idx - 1) / total_images) * 100) + status_text = f"Processing {idx}/{total_images}: {filename}" + last_yield_log_count[0] = len(translation_logs) + last_yield_time[0] = time.time() + yield "\n".join(translation_logs), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(value=status_text), gr.update(value=progress_percent) + + # Start processing in a thread so we can yield logs periodically + import threading + processing_complete = [False] + result_container = [None] + + def process_wrapper(): + result_container[0] = translator.process_image( + image_path=input_path, + output_path=output_path, + batch_index=idx, + batch_total=total_images + ) + processing_complete[0] = True + + # Start processing in background + process_thread = threading.Thread(target=process_wrapper, daemon=True) + process_thread.start() + + # Poll for log updates while processing + while not processing_complete[0]: + time.sleep(0.5) # Check every 0.5 seconds + + # Check for stop request during processing + if self.stop_flag.is_set(): + translation_logs.append(f"\n⏹️ Translation stopped by user while processing image {idx}/{total_images}") + self.is_translating = False + yield "\n".join(translation_logs), gr.update(visible=False), gr.update(visible=False), gr.update(value="⏹️ Translation stopped", visible=True), gr.update(visible=True), gr.update(value="Stopped"), gr.update(value=0) + return + + if should_yield_logs(): + progress_percent = int(((idx - 0.5) / total_images) * 100) # Mid-processing + status_text = f"Processing {idx}/{total_images}: {filename} (in progress...)" + last_yield_log_count[0] = len(translation_logs) + last_yield_time[0] = time.time() + yield "\n".join(translation_logs), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(value=status_text), gr.update(value=progress_percent) + + # Wait for thread to complete + process_thread.join(timeout=1) + result = result_container[0] + + if result.get('success'): + # Use the output path from the result + final_output = result.get('output_path', output_path) + if os.path.exists(final_output): + translated_files.append(final_output) + translation_logs.append(f"✅ Image {idx}/{total_images} COMPLETE: {filename} | Total: {len(translated_files)}/{total_images} done") + translation_logs.append("") + # Yield progress update with all translated images so far + progress_percent = int((idx / total_images) * 100) + status_text = f"Completed {idx}/{total_images}: {filename}" + # Show all translated files as gallery + yield "\n".join(translation_logs), gr.update(value=translated_files, visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(value=status_text), gr.update(value=progress_percent) + else: + translation_logs.append(f"⚠️ Image {idx}/{total_images}: Output file missing for {filename}") + translation_logs.append(f"⚠️ Warning: Output file not found for image {idx}") + translation_logs.append("") + # Yield progress update + progress_percent = int((idx / total_images) * 100) + status_text = f"Warning: {idx}/{total_images} - Output missing for {filename}" + yield "\n".join(translation_logs), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(value=status_text), gr.update(value=progress_percent) + else: + errors = result.get('errors', []) + error_msg = errors[0] if errors else 'Unknown error' + translation_logs.append(f"❌ Image {idx}/{total_images} FAILED: {error_msg[:50]}") + translation_logs.append(f"⚠️ Error on image {idx}: {error_msg}") + translation_logs.append("") + # Yield progress update + progress_percent = int((idx / total_images) * 100) + status_text = f"Failed: {idx}/{total_images} - {filename}" + yield "\n".join(translation_logs), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(value=status_text), gr.update(value=progress_percent) + + # If translation failed, save original with error overlay + from PIL import Image as PILImage, ImageDraw, ImageFont + img = PILImage.open(input_path) + draw = ImageDraw.Draw(img) + # Add error message + draw.text((10, 10), f"Translation Error: {error_msg[:50]}", fill="red") + img.save(output_path) + translated_files.append(output_path) + + except Exception as e: + import traceback + error_trace = traceback.format_exc() + translation_logs.append(f"❌ Image {idx}/{total_images} ERROR: {str(e)[:60]}") + translation_logs.append(f"❌ Exception on image {idx}: {str(e)}") + print(f"Manga translation error for {input_path}:\n{error_trace}") + + # Save original on error + try: + from PIL import Image as PILImage + img = PILImage.open(input_path) + img.save(output_path) + translated_files.append(output_path) + except: + pass + continue + + # Check for stop request before final processing + if self.stop_flag.is_set(): + translation_logs.append("\n⏹️ Translation stopped by user") + self.is_translating = False + yield "\n".join(translation_logs), gr.update(visible=False), gr.update(visible=False), gr.update(value="⏹️ Translation stopped", visible=True), gr.update(visible=True), gr.update(value="Stopped"), gr.update(value=0) + return + + # Add completion message + translation_logs.append("\n" + "="*60) + translation_logs.append(f"✅ ALL COMPLETE! Successfully translated {len(translated_files)}/{total_images} images") + translation_logs.append("="*60) + + # If CBZ mode, compile translated images into CBZ archive + final_output_for_display = None + if cbz_mode and cbz_output_path and translated_files: + translation_logs.append("\n📦 Compiling translated images into CBZ archive...") + try: + with zipfile.ZipFile(cbz_output_path, 'w', zipfile.ZIP_DEFLATED) as cbz: + for img_path in translated_files: + # Preserve original filename structure + arcname = os.path.basename(img_path).replace("translated_", "") + cbz.write(img_path, arcname) + + translation_logs.append(f"✅ CBZ archive created: {os.path.basename(cbz_output_path)}") + translation_logs.append(f"📁 Archive location: {cbz_output_path}") + final_output_for_display = cbz_output_path + except Exception as e: + translation_logs.append(f"❌ Error creating CBZ: {str(e)}") + + # Build final status with detailed panel information + final_status_lines = [] + if translated_files: + final_status_lines.append(f"✅ Successfully translated {len(translated_files)}/{total_images} image(s)!") + final_status_lines.append("") + final_status_lines.append("🖼️ **Translated Panels:**") + for i, file_path in enumerate(translated_files, 1): + filename = os.path.basename(file_path) + final_status_lines.append(f" {i}. {filename}") + + final_status_lines.append("") + final_status_lines.append("🔄 **Download Options:**") + if cbz_mode and cbz_output_path: + final_status_lines.append(f" 📦 CBZ Archive: {os.path.basename(cbz_output_path)}") + final_status_lines.append(f" 📁 Location: {cbz_output_path}") + else: + final_status_lines.append(f" 📁 Output directory: {output_dir}") + final_status_lines.append(" 🖼️ Individual images: Click on images in gallery above to download") + else: + final_status_lines.append("❌ Translation failed - no images were processed") + + final_status_text = "\n".join(final_status_lines) + + # Final yield with complete logs, image, CBZ, and final status + # Format: (logs_textbox, output_image, cbz_file, status_textbox, progress_group, progress_text, progress_bar) + final_progress_text = f"Complete! Processed {len(translated_files)}/{total_images} images" + if translated_files: + # Show all translated images in gallery + if cbz_mode and cbz_output_path and os.path.exists(cbz_output_path): + yield ( + "\n".join(translation_logs), + gr.update(value=translated_files, visible=True), # Show all images in gallery + gr.update(value=cbz_output_path, visible=True), # CBZ file for download with visibility + gr.update(value=final_status_text, visible=True), + gr.update(visible=True), + gr.update(value=final_progress_text), + gr.update(value=100) + ) + else: + yield ( + "\n".join(translation_logs), + gr.update(value=translated_files, visible=True), # Show all images in gallery + gr.update(visible=False), # Hide CBZ component + gr.update(value=final_status_text, visible=True), + gr.update(visible=True), + gr.update(value=final_progress_text), + gr.update(value=100) + ) + else: + yield ( + "\n".join(translation_logs), + gr.update(visible=False), + gr.update(visible=False), # Hide CBZ component + gr.update(value=final_status_text, visible=True), + gr.update(visible=True), + gr.update(value=final_progress_text), + gr.update(value=0) # 0% if nothing was processed + ) + + except Exception as e: + import traceback + error_msg = f"❌ Error during manga translation:\n{str(e)}\n\n{traceback.format_exc()}" + self.is_translating = False + yield error_msg, gr.update(visible=False), gr.update(visible=False), gr.update(value=error_msg, visible=True), gr.update(visible=False), gr.update(value="Error occurred"), gr.update(value=0) + finally: + # Always reset translation state when done + self.is_translating = False + # Clear active references on full completion + try: + self.current_translator = None + self.current_unified_client = None + except Exception: + pass + + def stop_manga_translation(self): + """Simple function to stop manga translation""" + print("DEBUG: Stop button clicked") + if self.is_translating: + print("DEBUG: Stopping active translation") + self.stop_translation() + # Return UI updates for button visibility and status + return ( + gr.update(visible=True), # translate button - show + gr.update(visible=False), # stop button - hide + "⏹️ Translation stopped by user" + ) + else: + print("DEBUG: No active translation to stop") + return ( + gr.update(visible=True), # translate button - show + gr.update(visible=False), # stop button - hide + "No active translation to stop" + ) + + def start_manga_translation(self, *args): + """Simple function to start manga translation - GENERATOR FUNCTION""" + print("DEBUG: Translate button clicked") + + # Reset flags for new translation and mark as translating BEFORE first yield + self._reset_translation_flags() + self.is_translating = True + + # Initial yield to update button visibility + yield ( + "🚀 Starting translation...", + gr.update(visible=False), # manga_output_gallery - hide initially + gr.update(visible=False), # manga_cbz_output + gr.update(value="Starting...", visible=True), # manga_status + gr.update(visible=False), # manga_progress_group + gr.update(value="Initializing..."), # manga_progress_text + gr.update(value=0), # manga_progress_bar + gr.update(visible=False), # translate button - hide during translation + gr.update(visible=True) # stop button - show during translation + ) + + # Call the translate function and yield all its results + last_result = None + try: + for result in self.translate_manga(*args): + # Check if stop was requested during iteration + if self.stop_flag.is_set(): + print("DEBUG: Stop flag detected, breaking translation loop") + break + + last_result = result + # Pad result to include button states (translate_visible=False, stop_visible=True) + if len(result) >= 7: + yield result + (gr.update(visible=False), gr.update(visible=True)) + else: + # Pad result to match expected length (7 values) then add button states + padded_result = list(result) + [gr.update(visible=False)] * (7 - len(result)) + yield tuple(padded_result) + (gr.update(visible=False), gr.update(visible=True)) + + except GeneratorExit: + print("DEBUG: Translation generator was closed") + self.is_translating = False + return + except Exception as e: + print(f"DEBUG: Exception during translation: {e}") + self.is_translating = False + # Show error and reset buttons + error_msg = f"❌ Error during translation: {str(e)}" + yield ( + error_msg, + gr.update(visible=False), + gr.update(visible=False), + gr.update(value=error_msg, visible=True), + gr.update(visible=False), + gr.update(value="Error occurred"), + gr.update(value=0), + gr.update(visible=True), # translate button - show after error + gr.update(visible=False) # stop button - hide after error + ) + return + finally: + # Clear active references when the loop exits + self.is_translating = False + try: + self.current_translator = None + self.current_unified_client = None + except Exception: + pass + + # Check if we stopped early + if self.stop_flag.is_set(): + yield ( + "⏹️ Translation stopped by user", + gr.update(visible=False), + gr.update(visible=False), + gr.update(value="⏹️ Translation stopped", visible=True), + gr.update(visible=False), + gr.update(value="Stopped"), + gr.update(value=0), + gr.update(visible=True), # translate button - show after stop + gr.update(visible=False) # stop button - hide after stop + ) + return + + # Final yield to reset buttons after successful completion + print("DEBUG: Translation completed normally, resetting buttons") + if last_result is None: + last_result = ("", gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(value="Complete"), gr.update(value=100)) + + if len(last_result) >= 7: + yield last_result[:7] + (gr.update(visible=True), gr.update(visible=False)) + else: + # Pad result to match expected length then add button states + padded_result = list(last_result) + [gr.update(visible=False)] * (7 - len(last_result)) + yield tuple(padded_result) + (gr.update(visible=True), gr.update(visible=False)) + + def create_interface(self): + """Create and return the Gradio interface""" + # Reload config before creating interface to get latest values + self.config = self.load_config() + self.decrypted_config = decrypt_config(self.config.copy()) if API_KEY_ENCRYPTION_AVAILABLE else self.config.copy() + + # Load and encode icon as base64 + icon_base64 = "" + icon_path = "Halgakos.ico" if os.path.exists("Halgakos.ico") else "Halgakos.ico" + if os.path.exists(icon_path): + with open(icon_path, "rb") as f: + icon_base64 = base64.b64encode(f.read()).decode() + + # Custom CSS to hide Gradio footer and add favicon + custom_css = """ + footer {display: none !important;} + .gradio-container {min-height: 100vh;} + + /* Stop button styling */ + .gr-button[data-variant="stop"] { + background-color: #dc3545 !important; + border-color: #dc3545 !important; + color: white !important; + } + .gr-button[data-variant="stop"]:hover { + background-color: #c82333 !important; + border-color: #bd2130 !important; + color: white !important; + } + """ + + # JavaScript for localStorage persistence - SIMPLE VERSION + localStorage_js = """ + <script> + console.log('Glossarion localStorage script loading...'); + + // Simple localStorage functions + function saveToLocalStorage(key, value) { + try { + localStorage.setItem('glossarion_' + key, JSON.stringify(value)); + console.log('Saved:', key, '=', value); + return true; + } catch (e) { + console.error('Save failed:', e); + return false; + } + } + + function loadFromLocalStorage(key, defaultValue) { + try { + const item = localStorage.getItem('glossarion_' + key); + return item ? JSON.parse(item) : defaultValue; + } catch (e) { + console.error('Load failed:', e); + return defaultValue; + } + } + + // Manual save current form values to localStorage + function saveCurrentSettings() { + const settings = {}; + + // Find all input elements in Gradio + document.querySelectorAll('input, select, textarea').forEach(el => { + // Skip file inputs + if (el.type === 'file') return; + + // Get a unique key based on element properties + let key = el.id || el.name || el.placeholder || ''; + if (!key) { + // Try to get label text + const label = el.closest('div')?.querySelector('label'); + if (label) key = label.textContent; + } + + if (key) { + key = key.trim().replace(/[^a-zA-Z0-9]/g, '_'); + if (el.type === 'checkbox') { + settings[key] = el.checked; + } else if (el.type === 'radio') { + if (el.checked) settings[key] = el.value; + } else if (el.value) { + settings[key] = el.value; + } + } + }); + + // Save all settings + Object.keys(settings).forEach(key => { + saveToLocalStorage(key, settings[key]); + }); + + console.log('Saved', Object.keys(settings).length, 'settings'); + return settings; + } + + // Export settings from localStorage + function exportSettings() { + console.log('Export started'); + + // First save current form state + saveCurrentSettings(); + + // Then export from localStorage + const settings = {}; + for (let i = 0; i < localStorage.length; i++) { + const key = localStorage.key(i); + if (key && key.startsWith('glossarion_')) { + try { + settings[key.replace('glossarion_', '')] = JSON.parse(localStorage.getItem(key)); + } catch (e) { + // Store as-is if not JSON + settings[key.replace('glossarion_', '')] = localStorage.getItem(key); + } + } + } + + if (Object.keys(settings).length === 0) { + alert('No settings to export. Try saving some settings first.'); + return; + } + + // Download as JSON + const blob = new Blob([JSON.stringify(settings, null, 2)], {type: 'application/json'}); + const url = URL.createObjectURL(blob); + const a = document.createElement('a'); + a.href = url; + a.download = 'glossarion_settings_' + new Date().toISOString().slice(0,19).replace(/:/g, '-') + '.json'; + document.body.appendChild(a); + a.click(); + document.body.removeChild(a); + URL.revokeObjectURL(url); + + console.log('Exported', Object.keys(settings).length, 'settings'); + } + + function importSettings(fileContent) { + try { + const settings = JSON.parse(fileContent); + Object.keys(settings).forEach(key => { + saveToLocalStorage(key, settings[key]); + }); + location.reload(); // Reload to apply settings + } catch (e) { + alert('Invalid settings file format'); + } + } + + // Expose to global scope + window.exportSettings = exportSettings; + window.importSettings = importSettings; + window.saveCurrentSettings = saveCurrentSettings; + window.saveToLocalStorage = saveToLocalStorage; + window.loadFromLocalStorage = loadFromLocalStorage; + + // Load settings from localStorage on page load for HF Spaces + function loadSettingsFromLocalStorage() { + console.log('Attempting to load settings from localStorage...'); + try { + // Get all localStorage items with glossarion_ prefix + const settings = {}; + for (let i = 0; i < localStorage.length; i++) { + const key = localStorage.key(i); + if (key && key.startsWith('glossarion_')) { + const cleanKey = key.replace('glossarion_', ''); + try { + settings[cleanKey] = JSON.parse(localStorage.getItem(key)); + } catch (e) { + settings[cleanKey] = localStorage.getItem(key); + } + } + } + + if (Object.keys(settings).length > 0) { + console.log('Found', Object.keys(settings).length, 'settings in localStorage'); + + // Try to update Gradio components + // This is tricky because Gradio components are rendered dynamically + // We'll need to find them by their labels or other identifiers + + // For now, just log what we found + console.log('Settings:', settings); + } + } catch (e) { + console.error('Error loading from localStorage:', e); + } + } + + // Try loading settings at various points + window.addEventListener('load', function() { + console.log('Page loaded'); + setTimeout(loadSettingsFromLocalStorage, 1000); + setTimeout(loadSettingsFromLocalStorage, 3000); + }); + + document.addEventListener('DOMContentLoaded', function() { + console.log('DOM ready'); + setTimeout(loadSettingsFromLocalStorage, 500); + }); + </script> + """ + + with gr.Blocks( + title="Glossarion - AI Translation", + theme=gr.themes.Soft(), + css=custom_css + ) as app: + + # Add custom HTML with favicon link and title with icon + icon_img_tag = f'<img src="data:image/png;base64,{icon_base64}" alt="Glossarion">' if icon_base64 else '' + + gr.HTML(f""" + <link rel="icon" type="image/x-icon" href="file/Halgakos.ico"> + <link rel="shortcut icon" type="image/x-icon" href="file/Halgakos.ico"> + <style> + .title-with-icon {{ + display: flex; + align-items: center; + gap: 15px; + margin-bottom: 10px; + }} + .title-with-icon img {{ + width: 48px; + height: 48px; + }} + </style> + <div class="title-with-icon"> + {icon_img_tag} + <h1>Glossarion - AI-Powered Translation</h1> + </div> + {localStorage_js} + """) + + with gr.Row(): + gr.Markdown(""" + Translate novels and books using advanced AI models (GPT-5, Claude, etc.) + """) + + + # SECURITY: Save Config button disabled for Hugging Face to prevent API key leakage + # Users should use localStorage (browser-based storage) instead + # with gr.Column(scale=0): + # save_config_btn = gr.Button( + # "💾 Save Config", + # variant="secondary", + # size="sm" + # ) + # save_status_text = gr.Markdown( + # "", + # visible=False + # ) + + with gr.Tabs() as main_tabs: + # EPUB Translation Tab + with gr.Tab("📚 EPUB Translation"): + with gr.Row(): + with gr.Column(): + epub_file = gr.File( + label="📖 Upload EPUB or TXT File", + file_types=[".epub", ".txt"] + ) + + with gr.Row(): + translate_btn = gr.Button( + "🚀 Translate EPUB", + variant="primary", + size="lg", + scale=2 + ) + + stop_epub_btn = gr.Button( + "⏹️ Stop Translation", + variant="stop", + size="lg", + visible=False, + scale=1 + ) + + epub_model = gr.Dropdown( + choices=self.models, + value=self.get_config_value('model', 'gpt-4-turbo'), + label="🤖 AI Model", + interactive=True, + allow_custom_value=True, + filterable=True + ) + + epub_api_key = gr.Textbox( + label="🔑 API Key", + type="password", + placeholder="Enter your API key", + value=self.get_config_value('api_key', '') + ) + + # Use all profiles without filtering + profile_choices = list(self.profiles.keys()) + # Use saved active_profile instead of hardcoded default + default_profile = self.get_config_value('active_profile', profile_choices[0] if profile_choices else '') + + epub_profile = gr.Dropdown( + choices=profile_choices, + value=default_profile, + label="📝 Translation Profile" + ) + + epub_system_prompt = gr.Textbox( + label="System Prompt (Translation Instructions)", + lines=8, + max_lines=15, + interactive=True, + placeholder="Select a profile to load translation instructions...", + value=self.profiles.get(default_profile, '') if default_profile else '' + ) + + with gr.Accordion("⚙️ Advanced Settings", open=False): + epub_temperature = gr.Slider( + minimum=0, + maximum=1, + value=self.get_config_value('temperature', 0.3), + step=0.1, + label="Temperature" + ) + + epub_max_tokens = gr.Number( + label="Max Output Tokens", + value=self.get_config_value('max_output_tokens', 16000), + minimum=0 + ) + + gr.Markdown("### Image Translation") + + enable_image_translation = gr.Checkbox( + label="Enable Image Translation", + value=self.get_config_value('enable_image_translation', False), + info="Extracts and translates text from images using vision models" + ) + + gr.Markdown("### Glossary Settings") + + enable_auto_glossary = gr.Checkbox( + label="Enable Automatic Glossary Generation", + value=self.get_config_value('enable_auto_glossary', False), + info="Automatic extraction and translation of character names/terms" + ) + + append_glossary = gr.Checkbox( + label="Append Glossary to System Prompt", + value=self.get_config_value('append_glossary_to_prompt', True), + info="Applies to ALL glossaries - manual and automatic" + ) + + # Automatic glossary extraction settings (only show when enabled) + with gr.Group(visible=self.get_config_value('enable_auto_glossary', False)) as auto_glossary_settings: + gr.Markdown("#### Automatic Glossary Extraction Settings") + + with gr.Row(): + auto_glossary_min_freq = gr.Slider( + minimum=1, + maximum=10, + value=self.get_config_value('glossary_min_frequency', 2), + step=1, + label="Min Frequency", + info="Minimum times a name must appear" + ) + + auto_glossary_max_names = gr.Slider( + minimum=10, + maximum=200, + value=self.get_config_value('glossary_max_names', 50), + step=10, + label="Max Names", + info="Maximum number of character names" + ) + + with gr.Row(): + auto_glossary_max_titles = gr.Slider( + minimum=10, + maximum=100, + value=self.get_config_value('glossary_max_titles', 30), + step=5, + label="Max Titles", + info="Maximum number of titles/terms" + ) + + auto_glossary_batch_size = gr.Slider( + minimum=10, + maximum=100, + value=self.get_config_value('glossary_batch_size', 50), + step=5, + label="Translation Batch Size", + info="Terms per API call" + ) + + auto_glossary_filter_mode = gr.Radio( + choices=[ + ("All names & terms", "all"), + ("Names with honorifics only", "only_with_honorifics"), + ("Names without honorifics & terms", "only_without_honorifics") + ], + value=self.get_config_value('glossary_filter_mode', 'all'), + label="Filter Mode", + info="What types of names to extract" + ) + + auto_glossary_fuzzy_threshold = gr.Slider( + minimum=0.5, + maximum=1.0, + value=self.get_config_value('glossary_fuzzy_threshold', 0.90), + step=0.05, + label="Fuzzy Matching Threshold", + info="How similar names must be to match (0.9 = 90% match)" + ) + + # Toggle visibility of auto glossary settings + enable_auto_glossary.change( + fn=lambda x: gr.update(visible=x), + inputs=[enable_auto_glossary], + outputs=[auto_glossary_settings] + ) + + gr.Markdown("### Quality Assurance") + + enable_post_translation_scan = gr.Checkbox( + label="Enable post-translation Scanning phase", + value=self.get_config_value('enable_post_translation_scan', False), + info="Automatically run QA Scanner after translation completes" + ) + + glossary_file = gr.File( + label="📋 Manual Glossary CSV (optional)", + file_types=[".csv", ".json", ".txt"] + ) + + with gr.Column(): + # Add logo and status at top + with gr.Row(): + gr.Image( + value="Halgakos.png", + label=None, + show_label=False, + width=80, + height=80, + interactive=False, + show_download_button=False, + container=False + ) + epub_status_message = gr.Markdown( + value="### Ready to translate\nUpload an EPUB or TXT file and click 'Translate' to begin.", + visible=True + ) + + # Progress section (similar to manga tab) + with gr.Group(visible=False) as epub_progress_group: + gr.Markdown("### Progress") + epub_progress_text = gr.Textbox( + label="📨 Current Status", + value="Ready to start", + interactive=False, + lines=1 + ) + epub_progress_bar = gr.Slider( + minimum=0, + maximum=100, + value=0, + step=1, + label="📋 Translation Progress", + interactive=False, + show_label=True + ) + + epub_logs = gr.Textbox( + label="📋 Translation Logs", + lines=20, + max_lines=30, + value="Ready to translate. Upload an EPUB or TXT file and configure settings.", + visible=True, + interactive=False + ) + + epub_output = gr.File( + label="📥 Download Translated File", + visible=True # Always visible, will show file when ready + ) + + epub_status = gr.Textbox( + label="Final Status", + lines=3, + max_lines=5, + visible=False, + interactive=False + ) + + # Sync handlers will be connected after manga components are created + + # Translation button handler - now with progress outputs + translate_btn.click( + fn=self.translate_epub_with_stop, + inputs=[ + epub_file, + epub_model, + epub_api_key, + epub_profile, + epub_system_prompt, + epub_temperature, + epub_max_tokens, + enable_image_translation, + glossary_file + ], + outputs=[ + epub_output, # Download file + epub_status_message, # Top status message + epub_progress_group, # Progress group visibility + epub_logs, # Translation logs + epub_status, # Final status + epub_progress_text, # Progress text + epub_progress_bar, # Progress bar + translate_btn, # Show/hide translate button + stop_epub_btn # Show/hide stop button + ] + ) + + # Stop button handler + stop_epub_btn.click( + fn=self.stop_epub_translation, + inputs=[], + outputs=[translate_btn, stop_epub_btn, epub_status] + ) + + # Manga Translation Tab + with gr.Tab("🎨 Manga Translation"): + with gr.Row(): + with gr.Column(): + manga_images = gr.File( + label="🖼️ Upload Manga Images or CBZ", + file_types=[".png", ".jpg", ".jpeg", ".webp", ".bmp", ".gif", ".cbz", ".zip"], + file_count="multiple" + ) + + with gr.Row(): + translate_manga_btn = gr.Button( + "🚀 Translate Manga", + variant="primary", + size="lg", + scale=2 + ) + + stop_manga_btn = gr.Button( + "⏹️ Stop Translation", + variant="stop", + size="lg", + visible=False, + scale=1 + ) + + manga_model = gr.Dropdown( + choices=self.models, + value=self.get_config_value('model', 'gpt-4-turbo'), + label="🤖 AI Model", + interactive=True, + allow_custom_value=True, + filterable=True + ) + + manga_api_key = gr.Textbox( + label="🔑 API Key", + type="password", + placeholder="Enter your API key", + value=self.get_config_value('api_key', '') # Pre-fill from config + ) + + # Use all profiles without filtering + profile_choices = list(self.profiles.keys()) + # Use the active profile from config, same as EPUB tab + default_profile = self.get_config_value('active_profile', profile_choices[0] if profile_choices else '') + + manga_profile = gr.Dropdown( + choices=profile_choices, + value=default_profile, + label="📝 Translation Profile" + ) + + # Editable manga system prompt + manga_system_prompt = gr.Textbox( + label="Manga System Prompt (Translation Instructions)", + lines=8, + max_lines=15, + interactive=True, + placeholder="Select a manga profile to load translation instructions...", + value=self.profiles.get(default_profile, '') if default_profile else '' + ) + + with gr.Accordion("⚙️ OCR Settings", open=False): + gr.Markdown("🔒 **Credentials are auto-saved** to your config (encrypted) after first use.") + + ocr_provider = gr.Radio( + choices=["google", "azure", "custom-api"], + value=self.get_config_value('ocr_provider', 'custom-api'), + label="OCR Provider" + ) + + # Show saved Google credentials path if available + saved_google_path = self.get_config_value('google_vision_credentials', '') + if saved_google_path and os.path.exists(saved_google_path): + gr.Markdown(f"✅ **Saved credentials found:** `{os.path.basename(saved_google_path)}`") + gr.Markdown("💡 *Using saved credentials. Upload a new file only if you want to change them.*") + else: + gr.Markdown("⚠️ No saved Google credentials found. Please upload your JSON file.") + + # Note: File component doesn't support pre-filling paths due to browser security + google_creds = gr.File( + label="Google Cloud Credentials JSON (upload to update)", + file_types=[".json"] + ) + + azure_key = gr.Textbox( + label="Azure Vision API Key (if using Azure)", + type="password", + placeholder="Enter Azure API key", + value=self.get_config_value('azure_vision_key', '') + ) + + azure_endpoint = gr.Textbox( + label="Azure Vision Endpoint (if using Azure)", + placeholder="https://your-resource.cognitiveservices.azure.com/", + value=self.get_config_value('azure_vision_endpoint', '') + ) + + bubble_detection = gr.Checkbox( + label="Enable Bubble Detection", + value=self.get_config_value('bubble_detection_enabled', True) + ) + + inpainting = gr.Checkbox( + label="Enable Text Removal (Inpainting)", + value=self.get_config_value('inpainting_enabled', True) + ) + + with gr.Accordion("⚡ Parallel Processing", open=False): + gr.Markdown("### Parallel Panel Translation") + gr.Markdown("*Process multiple panels simultaneously for faster translation*") + + # Check environment variables first, then config + parallel_enabled = os.getenv('PARALLEL_PANEL_TRANSLATION', '').lower() == 'true' + if not parallel_enabled: + # Fall back to config if not set in env + parallel_enabled = self.get_config_value('manga_settings', {}).get('advanced', {}).get('parallel_panel_translation', False) + + # Get max workers from env or config + max_workers_env = os.getenv('PANEL_MAX_WORKERS', '') + if max_workers_env.isdigit(): + max_workers = int(max_workers_env) + else: + max_workers = self.get_config_value('manga_settings', {}).get('advanced', {}).get('panel_max_workers', 7) + + parallel_panel_translation = gr.Checkbox( + label="Enable Parallel Panel Translation", + value=parallel_enabled, + info="Translates multiple panels at once instead of sequentially" + ) + + panel_max_workers = gr.Slider( + minimum=1, + maximum=20, + value=max_workers, + step=1, + label="Max concurrent panels", + interactive=True, + info="Number of panels to process simultaneously (higher = faster but more memory)" + ) + + with gr.Accordion("✨ Text Visibility Settings", open=False): + gr.Markdown("### Font Settings") + + font_size_mode = gr.Radio( + choices=["auto", "fixed", "multiplier"], + value=self.get_config_value('manga_font_size_mode', 'auto'), + label="Font Size Mode" + ) + + font_size = gr.Slider( + minimum=0, + maximum=72, + value=self.get_config_value('manga_font_size', 24), + step=1, + label="Fixed Font Size (0=auto, used when mode=fixed)" + ) + + font_multiplier = gr.Slider( + minimum=0.5, + maximum=2.0, + value=self.get_config_value('manga_font_size_multiplier', 1.0), + step=0.1, + label="Font Size Multiplier (when mode=multiplier)" + ) + + min_font_size = gr.Slider( + minimum=0, + maximum=100, + value=self.get_config_value('manga_settings', {}).get('rendering', {}).get('auto_min_size', 12), + step=1, + label="Minimum Font Size (0=no limit)" + ) + + max_font_size = gr.Slider( + minimum=20, + maximum=100, + value=self.get_config_value('manga_max_font_size', 48), + step=1, + label="Maximum Font Size" + ) + + gr.Markdown("### Text Color") + + # Convert RGB array to hex if needed + def to_hex_color(color_value, default='#000000'): + if isinstance(color_value, (list, tuple)) and len(color_value) >= 3: + return '#{:02x}{:02x}{:02x}'.format(int(color_value[0]), int(color_value[1]), int(color_value[2])) + elif isinstance(color_value, str): + return color_value if color_value.startswith('#') else default + return default + + text_color_rgb = gr.ColorPicker( + label="Font Color", + value=to_hex_color(self.get_config_value('manga_text_color', [255, 255, 255]), '#FFFFFF') # Default white + ) + + gr.Markdown("### Shadow Settings") + + shadow_enabled = gr.Checkbox( + label="Enable Text Shadow", + value=self.get_config_value('manga_shadow_enabled', True) + ) + + shadow_color = gr.ColorPicker( + label="Shadow Color", + value=to_hex_color(self.get_config_value('manga_shadow_color', [0, 0, 0]), '#000000') # Default black + ) + + shadow_offset_x = gr.Slider( + minimum=-10, + maximum=10, + value=self.get_config_value('manga_shadow_offset_x', 2), + step=1, + label="Shadow Offset X" + ) + + shadow_offset_y = gr.Slider( + minimum=-10, + maximum=10, + value=self.get_config_value('manga_shadow_offset_y', 2), + step=1, + label="Shadow Offset Y" + ) + + shadow_blur = gr.Slider( + minimum=0, + maximum=10, + value=self.get_config_value('manga_shadow_blur', 0), + step=1, + label="Shadow Blur" + ) + + gr.Markdown("### Background Settings") + + bg_opacity = gr.Slider( + minimum=0, + maximum=255, + value=self.get_config_value('manga_bg_opacity', 130), + step=1, + label="Background Opacity" + ) + + # Ensure bg_style value is valid + bg_style_value = self.get_config_value('manga_bg_style', 'circle') + if bg_style_value not in ["box", "circle", "wrap"]: + bg_style_value = 'circle' # Default fallback + + bg_style = gr.Radio( + choices=["box", "circle", "wrap"], + value=bg_style_value, + label="Background Style" + ) + + with gr.Column(): + # Add logo and loading message at top + with gr.Row(): + gr.Image( + value="Halgakos.png", + label=None, + show_label=False, + width=80, + height=80, + interactive=False, + show_download_button=False, + container=False + ) + status_message = gr.Markdown( + value="### Ready to translate\nUpload an image and click 'Translate Manga' to begin.", + visible=True + ) + + # Progress section for manga translation (similar to manga integration script) + with gr.Group(visible=False) as manga_progress_group: + gr.Markdown("### Progress") + manga_progress_text = gr.Textbox( + label="📈 Current Status", + value="Ready to start", + interactive=False, + lines=1 + ) + manga_progress_bar = gr.Slider( + minimum=0, + maximum=100, + value=0, + step=1, + label="📋 Translation Progress", + interactive=False, + show_label=True + ) + + manga_logs = gr.Textbox( + label="📋 Translation Logs", + lines=20, + max_lines=30, + value="Ready to translate. Click 'Translate Manga' to begin.", + visible=True, + interactive=False + ) + + # Use Gallery to show all translated images + manga_output_gallery = gr.Gallery( + label="📷 Translated Images (click to download)", + visible=False, + show_label=True, + elem_id="manga_output_gallery", + columns=3, + rows=2, + height="auto", + allow_preview=True, + show_download_button=True # Allow download of individual images + ) + # Keep CBZ output for bulk download + manga_cbz_output = gr.File(label="📦 Download Translated CBZ", visible=False) + manga_status = gr.Textbox( + label="Final Status", + lines=8, + max_lines=15, + visible=False + ) + + # Global sync flag to prevent loops + self._syncing_active = False + + # Auto-save Azure credentials on change + def save_azure_credentials(key, endpoint): + """Save Azure credentials to config""" + try: + current_config = self.get_current_config_for_update() + # Don't decrypt - just update what we need + if key and key.strip(): + current_config['azure_vision_key'] = str(key).strip() + if endpoint and endpoint.strip(): + current_config['azure_vision_endpoint'] = str(endpoint).strip() + self.save_config(current_config) + return None + except Exception as e: + print(f"Failed to save Azure credentials: {e}") + return None + + # All auto-save handlers removed - use manual Save Config button to avoid constant writes to persistent storage + + # Only update system prompts when profiles change - no cross-tab syncing + epub_profile.change( + fn=lambda p: self.profiles.get(p, ''), + inputs=[epub_profile], + outputs=[epub_system_prompt] + ) + + manga_profile.change( + fn=lambda p: self.profiles.get(p, ''), + inputs=[manga_profile], + outputs=[manga_system_prompt] + ) + + # Manual save function for all configuration + def save_all_config( + model, api_key, profile, temperature, max_tokens, + enable_image_trans, enable_auto_gloss, append_gloss, + # Auto glossary settings + auto_gloss_min_freq, auto_gloss_max_names, auto_gloss_max_titles, + auto_gloss_batch_size, auto_gloss_filter_mode, auto_gloss_fuzzy, + enable_post_scan, + # Manual glossary extraction settings + manual_min_freq, manual_max_names, manual_max_titles, + manual_max_text_size, manual_max_sentences, manual_trans_batch, + manual_chapter_split, manual_filter_mode, manual_strip_honorifics, + manual_fuzzy, manual_extraction_prompt, manual_format_instructions, + manual_use_legacy_csv, + # QA Scanner settings + qa_min_foreign, qa_check_rep, qa_check_gloss_leak, + qa_min_file_len, qa_check_headers, qa_check_html, + qa_check_paragraphs, qa_min_para_percent, qa_report_fmt, qa_auto_save, + # Chapter processing options + batch_trans_headers, headers_batch, ncx_nav, attach_css, retain_ext, + conservative_batch, gemini_safety, http_openrouter, openrouter_compress, + extraction_method, filter_level, + # Thinking mode settings + gpt_thinking_enabled, gpt_effort, or_tokens, + gemini_thinking_enabled, gemini_budget, + manga_model, manga_api_key, manga_profile, + ocr_prov, azure_k, azure_e, + bubble_det, inpaint, + font_mode, font_s, font_mult, min_font, max_font, + text_col, shadow_en, shadow_col, + shadow_x, shadow_y, shadow_b, + bg_op, bg_st, + parallel_trans, panel_workers, + # Advanced Settings fields + detector_type_val, rtdetr_conf, bubble_conf, + detect_text, detect_empty, detect_free, max_detections, + local_method_val, webtoon_val, + batch_size_val, cache_enabled_val, + parallel_proc, max_work, + preload_local, stagger_ms, + torch_prec, auto_cleanup, + debug, save_inter, concise_logs + ): + """Save all configuration values at once""" + try: + config = self.get_current_config_for_update() + + # Save all values + config['model'] = model + if api_key: # Only save non-empty API keys + config['api_key'] = api_key + config['active_profile'] = profile + config['temperature'] = temperature + config['max_output_tokens'] = max_tokens + config['enable_image_translation'] = enable_image_trans + config['enable_auto_glossary'] = enable_auto_gloss + config['append_glossary_to_prompt'] = append_gloss + + # Auto glossary settings + config['glossary_min_frequency'] = auto_gloss_min_freq + config['glossary_max_names'] = auto_gloss_max_names + config['glossary_max_titles'] = auto_gloss_max_titles + config['glossary_batch_size'] = auto_gloss_batch_size + config['glossary_filter_mode'] = auto_gloss_filter_mode + config['glossary_fuzzy_threshold'] = auto_gloss_fuzzy + + # Manual glossary extraction settings + config['manual_glossary_min_frequency'] = manual_min_freq + config['manual_glossary_max_names'] = manual_max_names + config['manual_glossary_max_titles'] = manual_max_titles + config['glossary_max_text_size'] = manual_max_text_size + config['glossary_max_sentences'] = manual_max_sentences + config['manual_glossary_batch_size'] = manual_trans_batch + config['glossary_chapter_split_threshold'] = manual_chapter_split + config['manual_glossary_filter_mode'] = manual_filter_mode + config['strip_honorifics'] = manual_strip_honorifics + config['manual_glossary_fuzzy_threshold'] = manual_fuzzy + config['manual_glossary_prompt'] = manual_extraction_prompt + config['glossary_format_instructions'] = manual_format_instructions + config['glossary_use_legacy_csv'] = manual_use_legacy_csv + config['enable_post_translation_scan'] = enable_post_scan + + # QA Scanner settings + config['qa_min_foreign_chars'] = qa_min_foreign + config['qa_check_repetition'] = qa_check_rep + config['qa_check_glossary_leakage'] = qa_check_gloss_leak + config['qa_min_file_length'] = qa_min_file_len + config['qa_check_multiple_headers'] = qa_check_headers + config['qa_check_missing_html'] = qa_check_html + config['qa_check_insufficient_paragraphs'] = qa_check_paragraphs + config['qa_min_paragraph_percentage'] = qa_min_para_percent + config['qa_report_format'] = qa_report_fmt + config['qa_auto_save_report'] = qa_auto_save + + # Chapter processing options + config['batch_translate_headers'] = batch_trans_headers + config['headers_per_batch'] = headers_batch + config['use_ncx_navigation'] = ncx_nav + config['attach_css_to_chapters'] = attach_css + config['retain_source_extension'] = retain_ext + config['use_conservative_batching'] = conservative_batch + config['disable_gemini_safety'] = gemini_safety + config['use_http_openrouter'] = http_openrouter + config['disable_openrouter_compression'] = openrouter_compress + config['text_extraction_method'] = extraction_method + config['file_filtering_level'] = filter_level + + # Thinking mode settings + config['enable_gpt_thinking'] = gpt_thinking_enabled + config['gpt_thinking_effort'] = gpt_effort + config['or_thinking_tokens'] = or_tokens + config['enable_gemini_thinking'] = gemini_thinking_enabled + config['gemini_thinking_budget'] = gemini_budget + + # Manga settings + config['ocr_provider'] = ocr_prov + if azure_k: + config['azure_vision_key'] = azure_k + if azure_e: + config['azure_vision_endpoint'] = azure_e + config['bubble_detection_enabled'] = bubble_det + config['inpainting_enabled'] = inpaint + config['manga_font_size_mode'] = font_mode + config['manga_font_size'] = font_s + config['manga_font_multiplier'] = font_mult + config['manga_min_font_size'] = min_font + config['manga_max_font_size'] = max_font + config['manga_text_color'] = text_col + config['manga_shadow_enabled'] = shadow_en + config['manga_shadow_color'] = shadow_col + config['manga_shadow_offset_x'] = shadow_x + config['manga_shadow_offset_y'] = shadow_y + config['manga_shadow_blur'] = shadow_b + config['manga_bg_opacity'] = bg_op + config['manga_bg_style'] = bg_st + + # Advanced settings + if 'manga_settings' not in config: + config['manga_settings'] = {} + if 'advanced' not in config['manga_settings']: + config['manga_settings']['advanced'] = {} + config['manga_settings']['advanced']['parallel_panel_translation'] = parallel_trans + config['manga_settings']['advanced']['panel_max_workers'] = panel_workers + + # Advanced bubble detection and inpainting settings + if 'ocr' not in config['manga_settings']: + config['manga_settings']['ocr'] = {} + if 'inpainting' not in config['manga_settings']: + config['manga_settings']['inpainting'] = {} + + config['manga_settings']['ocr']['detector_type'] = detector_type_val + config['manga_settings']['ocr']['rtdetr_confidence'] = rtdetr_conf + config['manga_settings']['ocr']['bubble_confidence'] = bubble_conf + config['manga_settings']['ocr']['detect_text_bubbles'] = detect_text + config['manga_settings']['ocr']['detect_empty_bubbles'] = detect_empty + config['manga_settings']['ocr']['detect_free_text'] = detect_free + config['manga_settings']['ocr']['bubble_max_detections_yolo'] = max_detections + config['manga_settings']['inpainting']['local_method'] = local_method_val + config['manga_settings']['advanced']['webtoon_mode'] = webtoon_val + config['manga_settings']['inpainting']['batch_size'] = batch_size_val + config['manga_settings']['inpainting']['enable_cache'] = cache_enabled_val + config['manga_settings']['advanced']['parallel_processing'] = parallel_proc + config['manga_settings']['advanced']['max_workers'] = max_work + config['manga_settings']['advanced']['preload_local_inpainting_for_panels'] = preload_local + config['manga_settings']['advanced']['panel_start_stagger_ms'] = stagger_ms + config['manga_settings']['advanced']['torch_precision'] = torch_prec + config['manga_settings']['advanced']['auto_cleanup_models'] = auto_cleanup + config['manga_settings']['advanced']['debug_mode'] = debug + config['manga_settings']['advanced']['save_intermediate'] = save_inter + config['concise_pipeline_logs'] = concise_logs + + # Save to file + result = self.save_config(config) + + # Show success message for 3 seconds + return gr.update(value=result, visible=True) + + except Exception as e: + return gr.update(value=f"❌ Save failed: {str(e)}", visible=True) + + # Save button will be configured after all components are created + + # Auto-hide status message after 3 seconds + def hide_status_after_delay(): + import time + time.sleep(3) + return gr.update(visible=False) + + # Note: We can't use the change event to auto-hide because it would trigger immediately + # The status will remain visible until manually dismissed or page refresh + + # All individual field auto-save handlers removed - use manual Save Config button instead + + # Translate button click handler + translate_manga_btn.click( + fn=self.start_manga_translation, + inputs=[ + manga_images, + manga_model, + manga_api_key, + manga_profile, + manga_system_prompt, + ocr_provider, + google_creds, + azure_key, + azure_endpoint, + bubble_detection, + inpainting, + font_size_mode, + font_size, + font_multiplier, + min_font_size, + max_font_size, + text_color_rgb, + shadow_enabled, + shadow_color, + shadow_offset_x, + shadow_offset_y, + shadow_blur, + bg_opacity, + bg_style, + parallel_panel_translation, + panel_max_workers + ], + outputs=[manga_logs, manga_output_gallery, manga_cbz_output, manga_status, manga_progress_group, manga_progress_text, manga_progress_bar, translate_manga_btn, stop_manga_btn] + ) + + # Stop button click handler + stop_manga_btn.click( + fn=self.stop_manga_translation, + inputs=[], + outputs=[translate_manga_btn, stop_manga_btn, manga_status] + ) + + # Load settings from localStorage on page load + def load_settings_from_storage(): + """Load settings from localStorage or config file""" + is_hf_spaces = os.getenv('SPACE_ID') is not None or os.getenv('HF_SPACES') == 'true' + + if not is_hf_spaces: + # Load from config file locally + config = self.load_config() + # Decrypt API keys if needed + if API_KEY_ENCRYPTION_AVAILABLE: + config = decrypt_config(config) + return [ + config.get('model', 'gpt-4-turbo'), + config.get('api_key', ''), + config.get('active_profile', list(self.profiles.keys())[0] if self.profiles else ''), # profile + self.profiles.get(config.get('active_profile', list(self.profiles.keys())[0] if self.profiles else ''), ''), # prompt + config.get('ocr_provider', 'custom-api'), + None, # google_creds (file component - can't be pre-filled) + config.get('azure_vision_key', ''), + config.get('azure_vision_endpoint', ''), + config.get('bubble_detection_enabled', True), + config.get('inpainting_enabled', True), + config.get('manga_font_size_mode', 'auto'), + config.get('manga_font_size', 24), + config.get('manga_font_multiplier', 1.0), + config.get('manga_min_font_size', 12), + config.get('manga_max_font_size', 48), + config.get('manga_text_color', [255, 255, 255]), # Default white text + config.get('manga_shadow_enabled', True), + config.get('manga_shadow_color', [0, 0, 0]), # Default black shadow + config.get('manga_shadow_offset_x', 2), + config.get('manga_shadow_offset_y', 2), + config.get('manga_shadow_blur', 0), + config.get('manga_bg_opacity', 180), + config.get('manga_bg_style', 'auto'), + config.get('manga_settings', {}).get('advanced', {}).get('parallel_panel_translation', False), + config.get('manga_settings', {}).get('advanced', {}).get('panel_max_workers', 7) + ] + else: + # For HF Spaces, return defaults (will be overridden by JS) + return [ + 'gpt-4-turbo', # model + '', # api_key + list(self.profiles.keys())[0] if self.profiles else '', # profile + self.profiles.get(list(self.profiles.keys())[0] if self.profiles else '', ''), # prompt + 'custom-api', # ocr_provider + None, # google_creds (file component - can't be pre-filled) + '', # azure_key + '', # azure_endpoint + True, # bubble_detection + True, # inpainting + 'auto', # font_size_mode + 24, # font_size + 1.0, # font_multiplier + 12, # min_font_size + 48, # max_font_size + '#FFFFFF', # text_color - white + True, # shadow_enabled + '#000000', # shadow_color - black + 2, # shadow_offset_x + 2, # shadow_offset_y + 0, # shadow_blur + 180, # bg_opacity + 'auto', # bg_style + False, # parallel_panel_translation + 7 # panel_max_workers + ] + + # Store references for load handler + self.manga_components = { + 'model': manga_model, + 'api_key': manga_api_key, + 'profile': manga_profile, + 'prompt': manga_system_prompt, + 'ocr_provider': ocr_provider, + 'google_creds': google_creds, + 'azure_key': azure_key, + 'azure_endpoint': azure_endpoint, + 'bubble_detection': bubble_detection, + 'inpainting': inpainting, + 'font_size_mode': font_size_mode, + 'font_size': font_size, + 'font_multiplier': font_multiplier, + 'min_font_size': min_font_size, + 'max_font_size': max_font_size, + 'text_color_rgb': text_color_rgb, + 'shadow_enabled': shadow_enabled, + 'shadow_color': shadow_color, + 'shadow_offset_x': shadow_offset_x, + 'shadow_offset_y': shadow_offset_y, + 'shadow_blur': shadow_blur, + 'bg_opacity': bg_opacity, + 'bg_style': bg_style, + 'parallel_panel_translation': parallel_panel_translation, + 'panel_max_workers': panel_max_workers + } + self.load_settings_fn = load_settings_from_storage + + # Manga Settings Tab - NEW + with gr.Tab("🎬 Manga Settings"): + gr.Markdown("### Advanced Manga Translation Settings") + gr.Markdown("Configure bubble detection, inpainting, preprocessing, and rendering options.") + + with gr.Accordion("🕹️ Bubble Detection & Inpainting", open=True): + gr.Markdown("#### Bubble Detection") + + detector_type = gr.Radio( + choices=["rtdetr_onnx", "rtdetr", "yolo"], + value=self.get_config_value('manga_settings', {}).get('ocr', {}).get('detector_type', 'rtdetr_onnx'), + label="Detector Type", + interactive=True + ) + + rtdetr_confidence = gr.Slider( + minimum=0.0, + maximum=1.0, + value=self.get_config_value('manga_settings', {}).get('ocr', {}).get('rtdetr_confidence', 0.3), + step=0.05, + label="RT-DETR Confidence Threshold", + interactive=True + ) + + bubble_confidence = gr.Slider( + minimum=0.0, + maximum=1.0, + value=self.get_config_value('manga_settings', {}).get('ocr', {}).get('bubble_confidence', 0.3), + step=0.05, + label="YOLO Bubble Confidence Threshold", + interactive=True + ) + + detect_text_bubbles = gr.Checkbox( + label="Detect Text Bubbles", + value=self.get_config_value('manga_settings', {}).get('ocr', {}).get('detect_text_bubbles', True) + ) + + detect_empty_bubbles = gr.Checkbox( + label="Detect Empty Bubbles", + value=self.get_config_value('manga_settings', {}).get('ocr', {}).get('detect_empty_bubbles', True) + ) + + detect_free_text = gr.Checkbox( + label="Detect Free Text (outside bubbles)", + value=self.get_config_value('manga_settings', {}).get('ocr', {}).get('detect_free_text', True) + ) + + bubble_max_detections = gr.Slider( + minimum=1, + maximum=2000, + value=self.get_config_value('manga_settings', {}).get('ocr', {}).get('bubble_max_detections_yolo', 100), + step=1, + label="Max detections (YOLO only)", + interactive=True, + info="Maximum number of bubble detections for YOLO detector" + ) + + gr.Markdown("#### Inpainting") + + local_inpaint_method = gr.Radio( + choices=["anime_onnx", "anime", "lama", "lama_onnx", "aot", "aot_onnx"], + value=self.get_config_value('manga_settings', {}).get('inpainting', {}).get('local_method', 'anime_onnx'), + label="Local Inpainting Model", + interactive=True + ) + + with gr.Row(): + download_models_btn = gr.Button( + "📥 Download Models", + variant="secondary", + size="sm" + ) + load_models_btn = gr.Button( + "📂 Load Models", + variant="secondary", + size="sm" + ) + + gr.Markdown("#### Mask Dilation") + + auto_iterations = gr.Checkbox( + label="Auto Iterations (Recommended)", + value=self.get_config_value('manga_settings', {}).get('auto_iterations', True) + ) + + mask_dilation = gr.Slider( + minimum=0, + maximum=20, + value=self.get_config_value('manga_settings', {}).get('mask_dilation', 0), + step=1, + label="General Mask Dilation", + interactive=True + ) + + text_bubble_dilation = gr.Slider( + minimum=0, + maximum=20, + value=self.get_config_value('manga_settings', {}).get('text_bubble_dilation_iterations', 2), + step=1, + label="Text Bubble Dilation Iterations", + interactive=True + ) + + empty_bubble_dilation = gr.Slider( + minimum=0, + maximum=20, + value=self.get_config_value('manga_settings', {}).get('empty_bubble_dilation_iterations', 3), + step=1, + label="Empty Bubble Dilation Iterations", + interactive=True + ) + + free_text_dilation = gr.Slider( + minimum=0, + maximum=20, + value=self.get_config_value('manga_settings', {}).get('free_text_dilation_iterations', 3), + step=1, + label="Free Text Dilation Iterations", + interactive=True + ) + + with gr.Accordion("🖌️ Image Preprocessing", open=False): + preprocessing_enabled = gr.Checkbox( + label="Enable Preprocessing", + value=self.get_config_value('manga_settings', {}).get('preprocessing', {}).get('enabled', False) + ) + + auto_detect_quality = gr.Checkbox( + label="Auto Detect Image Quality", + value=self.get_config_value('manga_settings', {}).get('preprocessing', {}).get('auto_detect_quality', True) + ) + + enhancement_strength = gr.Slider( + minimum=1.0, + maximum=3.0, + value=self.get_config_value('manga_settings', {}).get('preprocessing', {}).get('enhancement_strength', 1.5), + step=0.1, + label="Enhancement Strength", + interactive=True + ) + + denoise_strength = gr.Slider( + minimum=0, + maximum=50, + value=self.get_config_value('manga_settings', {}).get('preprocessing', {}).get('denoise_strength', 10), + step=1, + label="Denoise Strength", + interactive=True + ) + + max_image_dimension = gr.Number( + label="Max Image Dimension (pixels)", + value=self.get_config_value('manga_settings', {}).get('preprocessing', {}).get('max_image_dimension', 2000), + minimum=500 + ) + + chunk_height = gr.Number( + label="Chunk Height for Large Images", + value=self.get_config_value('manga_settings', {}).get('preprocessing', {}).get('chunk_height', 1000), + minimum=500 + ) + + gr.Markdown("#### HD Strategy for Inpainting") + gr.Markdown("*Controls how large images are processed during inpainting*") + + hd_strategy = gr.Radio( + choices=["original", "resize", "crop"], + value=self.get_config_value('manga_settings', {}).get('advanced', {}).get('hd_strategy', 'resize'), + label="HD Strategy", + interactive=True, + info="original = legacy full-image; resize/crop = faster" + ) + + hd_strategy_resize_limit = gr.Slider( + minimum=512, + maximum=4096, + value=self.get_config_value('manga_settings', {}).get('advanced', {}).get('hd_strategy_resize_limit', 1536), + step=64, + label="Resize Limit (long edge, px)", + info="For resize strategy", + interactive=True + ) + + hd_strategy_crop_margin = gr.Slider( + minimum=0, + maximum=256, + value=self.get_config_value('manga_settings', {}).get('advanced', {}).get('hd_strategy_crop_margin', 16), + step=2, + label="Crop Margin (px)", + info="For crop strategy", + interactive=True + ) + + hd_strategy_crop_trigger = gr.Slider( + minimum=256, + maximum=4096, + value=self.get_config_value('manga_settings', {}).get('advanced', {}).get('hd_strategy_crop_trigger_size', 1024), + step=64, + label="Crop Trigger Size (px)", + info="Apply crop only if long edge exceeds this", + interactive=True + ) + + gr.Markdown("#### Image Tiling") + gr.Markdown("*Alternative tiling strategy (note: HD Strategy takes precedence)*") + + tiling_enabled = gr.Checkbox( + label="Enable Tiling", + value=self.get_config_value('manga_settings', {}).get('tiling', {}).get('enabled', False) + ) + + tiling_tile_size = gr.Slider( + minimum=256, + maximum=1024, + value=self.get_config_value('manga_settings', {}).get('tiling', {}).get('tile_size', 480), + step=64, + label="Tile Size (px)", + interactive=True + ) + + tiling_tile_overlap = gr.Slider( + minimum=0, + maximum=128, + value=self.get_config_value('manga_settings', {}).get('tiling', {}).get('tile_overlap', 64), + step=16, + label="Tile Overlap (px)", + interactive=True + ) + + with gr.Accordion("🎨 Font & Text Rendering", open=False): + gr.Markdown("#### Font Sizing Algorithm") + + font_algorithm = gr.Radio( + choices=["smart", "simple"], + value=self.get_config_value('manga_settings', {}).get('font_sizing', {}).get('algorithm', 'smart'), + label="Font Sizing Algorithm", + interactive=True + ) + + prefer_larger = gr.Checkbox( + label="Prefer Larger Fonts", + value=self.get_config_value('manga_settings', {}).get('font_sizing', {}).get('prefer_larger', True) + ) + + max_lines = gr.Slider( + minimum=1, + maximum=20, + value=self.get_config_value('manga_settings', {}).get('font_sizing', {}).get('max_lines', 10), + step=1, + label="Maximum Lines Per Bubble", + interactive=True + ) + + line_spacing = gr.Slider( + minimum=0.5, + maximum=3.0, + value=self.get_config_value('manga_settings', {}).get('font_sizing', {}).get('line_spacing', 1.3), + step=0.1, + label="Line Spacing Multiplier", + interactive=True + ) + + bubble_size_factor = gr.Checkbox( + label="Use Bubble Size Factor", + value=self.get_config_value('manga_settings', {}).get('font_sizing', {}).get('bubble_size_factor', True) + ) + + auto_fit_style = gr.Radio( + choices=["balanced", "aggressive", "conservative"], + value=self.get_config_value('manga_settings', {}).get('rendering', {}).get('auto_fit_style', 'balanced'), + label="Auto Fit Style", + interactive=True + ) + + with gr.Accordion("⚙️ Advanced Options", open=False): + gr.Markdown("#### Format Detection") + + format_detection = gr.Checkbox( + label="Enable Format Detection (manga/webtoon)", + value=self.get_config_value('manga_settings', {}).get('advanced', {}).get('format_detection', True) + ) + + webtoon_mode = gr.Radio( + choices=["auto", "force_manga", "force_webtoon"], + value=self.get_config_value('manga_settings', {}).get('advanced', {}).get('webtoon_mode', 'auto'), + label="Webtoon Mode", + interactive=True + ) + + gr.Markdown("#### Inpainting Performance") + + inpaint_batch_size = gr.Slider( + minimum=1, + maximum=32, + value=self.get_config_value('manga_settings', {}).get('inpainting', {}).get('batch_size', 10), + step=1, + label="Batch Size", + interactive=True, + info="Process multiple regions at once" + ) + + inpaint_cache_enabled = gr.Checkbox( + label="Enable inpainting cache (speeds up repeated processing)", + value=self.get_config_value('manga_settings', {}).get('inpainting', {}).get('enable_cache', True) + ) + + gr.Markdown("#### Performance") + + parallel_processing = gr.Checkbox( + label="Enable Parallel Processing", + value=self.get_config_value('manga_settings', {}).get('advanced', {}).get('parallel_processing', True) + ) + + max_workers = gr.Slider( + minimum=1, + maximum=8, + value=self.get_config_value('manga_settings', {}).get('advanced', {}).get('max_workers', 2), + step=1, + label="Max Worker Threads", + interactive=True + ) + + gr.Markdown("**⚡ Advanced Performance**") + + preload_local_inpainting = gr.Checkbox( + label="Preload local inpainting instances for panel-parallel runs", + value=self.get_config_value('manga_settings', {}).get('advanced', {}).get('preload_local_inpainting_for_panels', True), + info="Preloads inpainting models to speed up parallel processing" + ) + + panel_start_stagger = gr.Slider( + minimum=0, + maximum=1000, + value=self.get_config_value('manga_settings', {}).get('advanced', {}).get('panel_start_stagger_ms', 30), + step=10, + label="Panel start stagger", + interactive=True, + info="Milliseconds delay between panel starts" + ) + + gr.Markdown("#### Model Optimization") + + torch_precision = gr.Radio( + choices=["fp32", "fp16"], + value=self.get_config_value('manga_settings', {}).get('advanced', {}).get('torch_precision', 'fp16'), + label="Torch Precision", + interactive=True + ) + + auto_cleanup_models = gr.Checkbox( + label="Auto Cleanup Models from Memory", + value=self.get_config_value('manga_settings', {}).get('advanced', {}).get('auto_cleanup_models', False) + ) + + gr.Markdown("#### Debug Options") + + debug_mode = gr.Checkbox( + label="Enable Debug Mode", + value=self.get_config_value('manga_settings', {}).get('advanced', {}).get('debug_mode', False) + ) + + save_intermediate = gr.Checkbox( + label="Save Intermediate Files", + value=self.get_config_value('manga_settings', {}).get('advanced', {}).get('save_intermediate', False) + ) + + concise_pipeline_logs = gr.Checkbox( + label="Concise Pipeline Logs", + value=self.get_config_value('concise_pipeline_logs', True) + ) + + # Button handlers for model management + def download_models_handler(detector_type_val, inpaint_method_val): + """Download selected models""" + messages = [] + + try: + # Download bubble detection model + if detector_type_val: + messages.append(f"📥 Downloading {detector_type_val} bubble detector...") + try: + from bubble_detector import BubbleDetector + bd = BubbleDetector() + + if detector_type_val == "rtdetr_onnx": + if bd.load_rtdetr_onnx_model(): + messages.append("✅ RT-DETR ONNX model downloaded successfully") + else: + messages.append("❌ Failed to download RT-DETR ONNX model") + elif detector_type_val == "rtdetr": + if bd.load_rtdetr_model(): + messages.append("✅ RT-DETR model downloaded successfully") + else: + messages.append("❌ Failed to download RT-DETR model") + elif detector_type_val == "yolo": + messages.append("ℹ️ YOLO models are downloaded automatically on first use") + except Exception as e: + messages.append(f"❌ Error downloading detector: {str(e)}") + + # Download inpainting model + if inpaint_method_val: + messages.append(f"\n📥 Downloading {inpaint_method_val} inpainting model...") + try: + from local_inpainter import LocalInpainter, LAMA_JIT_MODELS + + inpainter = LocalInpainter({}) + + # Map method names to download keys + method_map = { + 'anime_onnx': 'anime_onnx', + 'anime': 'anime', + 'lama': 'lama', + 'lama_onnx': 'lama_onnx', + 'aot': 'aot', + 'aot_onnx': 'aot_onnx' + } + + method_key = method_map.get(inpaint_method_val) + if method_key and method_key in LAMA_JIT_MODELS: + model_info = LAMA_JIT_MODELS[method_key] + messages.append(f"Downloading {model_info['name']}...") + + model_path = inpainter.download_jit_model(method_key) + if model_path: + messages.append(f"✅ {model_info['name']} downloaded to: {model_path}") + else: + messages.append(f"❌ Failed to download {model_info['name']}") + else: + messages.append(f"ℹ️ {inpaint_method_val} is downloaded automatically on first use") + + except Exception as e: + messages.append(f"❌ Error downloading inpainting model: {str(e)}") + + if not messages: + messages.append("ℹ️ No models selected for download") + + except Exception as e: + messages.append(f"❌ Error during download: {str(e)}") + + return gr.Info("\n".join(messages)) + + def load_models_handler(detector_type_val, inpaint_method_val): + """Load selected models into memory""" + messages = [] + + try: + # Load bubble detection model + if detector_type_val: + messages.append(f"📦 Loading {detector_type_val} bubble detector...") + try: + from bubble_detector import BubbleDetector + bd = BubbleDetector() + + if detector_type_val == "rtdetr_onnx": + if bd.load_rtdetr_onnx_model(): + messages.append("✅ RT-DETR ONNX model loaded successfully") + else: + messages.append("❌ Failed to load RT-DETR ONNX model") + elif detector_type_val == "rtdetr": + if bd.load_rtdetr_model(): + messages.append("✅ RT-DETR model loaded successfully") + else: + messages.append("❌ Failed to load RT-DETR model") + elif detector_type_val == "yolo": + messages.append("ℹ️ YOLO models are loaded automatically when needed") + except Exception as e: + messages.append(f"❌ Error loading detector: {str(e)}") + + # Load inpainting model + if inpaint_method_val: + messages.append(f"\n📦 Loading {inpaint_method_val} inpainting model...") + try: + from local_inpainter import LocalInpainter, LAMA_JIT_MODELS + import os + + inpainter = LocalInpainter({}) + + # Map method names to model keys + method_map = { + 'anime_onnx': 'anime_onnx', + 'anime': 'anime', + 'lama': 'lama', + 'lama_onnx': 'lama_onnx', + 'aot': 'aot', + 'aot_onnx': 'aot_onnx' + } + + method_key = method_map.get(inpaint_method_val) + if method_key: + # First check if model exists, download if not + if method_key in LAMA_JIT_MODELS: + model_info = LAMA_JIT_MODELS[method_key] + cache_dir = os.path.expanduser('~/.cache/inpainting') + model_filename = os.path.basename(model_info['url']) + model_path = os.path.join(cache_dir, model_filename) + + if not os.path.exists(model_path): + messages.append(f"Model not found, downloading first...") + model_path = inpainter.download_jit_model(method_key) + if not model_path: + messages.append(f"❌ Failed to download model") + return gr.Info("\n".join(messages)) + + # Now load the model + if inpainter.load_model(method_key, model_path): + messages.append(f"✅ {model_info['name']} loaded successfully") + else: + messages.append(f"❌ Failed to load {model_info['name']}") + else: + messages.append(f"ℹ️ {inpaint_method_val} will be loaded automatically when needed") + else: + messages.append(f"ℹ️ Unknown method: {inpaint_method_val}") + + except Exception as e: + messages.append(f"❌ Error loading inpainting model: {str(e)}") + + if not messages: + messages.append("ℹ️ No models selected for loading") + + except Exception as e: + messages.append(f"❌ Error during loading: {str(e)}") + + return gr.Info("\n".join(messages)) + + download_models_btn.click( + fn=download_models_handler, + inputs=[detector_type, local_inpaint_method], + outputs=None + ) + + load_models_btn.click( + fn=load_models_handler, + inputs=[detector_type, local_inpaint_method], + outputs=None + ) + + # Auto-save parallel panel translation settings + def save_parallel_settings(preload_enabled, parallel_enabled, max_workers, stagger_ms): + """Save parallel panel translation settings to config""" + try: + current_config = self.get_current_config_for_update() + # Don't decrypt - just update what we need + + # Initialize nested structure if not exists + if 'manga_settings' not in current_config: + current_config['manga_settings'] = {} + if 'advanced' not in current_config['manga_settings']: + current_config['manga_settings']['advanced'] = {} + + current_config['manga_settings']['advanced']['preload_local_inpainting_for_panels'] = bool(preload_enabled) + current_config['manga_settings']['advanced']['parallel_panel_translation'] = bool(parallel_enabled) + current_config['manga_settings']['advanced']['panel_max_workers'] = int(max_workers) + current_config['manga_settings']['advanced']['panel_start_stagger_ms'] = int(stagger_ms) + + self.save_config(current_config) + return None + except Exception as e: + print(f"Failed to save parallel panel settings: {e}") + return None + + # Auto-save inpainting performance settings + def save_inpainting_settings(batch_size, cache_enabled): + """Save inpainting performance settings to config""" + try: + current_config = self.get_current_config_for_update() + # Don't decrypt - just update what we need + + # Initialize nested structure if not exists + if 'manga_settings' not in current_config: + current_config['manga_settings'] = {} + if 'inpainting' not in current_config['manga_settings']: + current_config['manga_settings']['inpainting'] = {} + + current_config['manga_settings']['inpainting']['batch_size'] = int(batch_size) + current_config['manga_settings']['inpainting']['enable_cache'] = bool(cache_enabled) + + self.save_config(current_config) + return None + except Exception as e: + print(f"Failed to save inpainting settings: {e}") + return None + + # Auto-save preload local inpainting setting + def save_preload_setting(preload_enabled): + """Save preload local inpainting setting to config""" + try: + current_config = self.get_current_config_for_update() + # Don't decrypt - just update what we need + + # Initialize nested structure if not exists + if 'manga_settings' not in current_config: + current_config['manga_settings'] = {} + if 'advanced' not in current_config['manga_settings']: + current_config['manga_settings']['advanced'] = {} + + current_config['manga_settings']['advanced']['preload_local_inpainting_for_panels'] = bool(preload_enabled) + + self.save_config(current_config) + return None + except Exception as e: + print(f"Failed to save preload setting: {e}") + return None + + # Auto-save bubble detection settings + def save_bubble_detection_settings(detector_type_val, rtdetr_conf, bubble_conf, detect_text, detect_empty, detect_free, max_detections, local_method_val): + """Save bubble detection settings to config""" + try: + current_config = self.get_current_config_for_update() + # Don't decrypt - just update what we need + + # Initialize nested structure + if 'manga_settings' not in current_config: + current_config['manga_settings'] = {} + if 'ocr' not in current_config['manga_settings']: + current_config['manga_settings']['ocr'] = {} + if 'inpainting' not in current_config['manga_settings']: + current_config['manga_settings']['inpainting'] = {} + + # Save bubble detection settings + current_config['manga_settings']['ocr']['detector_type'] = detector_type_val + current_config['manga_settings']['ocr']['rtdetr_confidence'] = float(rtdetr_conf) + current_config['manga_settings']['ocr']['bubble_confidence'] = float(bubble_conf) + current_config['manga_settings']['ocr']['detect_text_bubbles'] = bool(detect_text) + current_config['manga_settings']['ocr']['detect_empty_bubbles'] = bool(detect_empty) + current_config['manga_settings']['ocr']['detect_free_text'] = bool(detect_free) + current_config['manga_settings']['ocr']['bubble_max_detections_yolo'] = int(max_detections) + + # Save inpainting method + current_config['manga_settings']['inpainting']['local_method'] = local_method_val + + self.save_config(current_config) + return None + except Exception as e: + print(f"Failed to save bubble detection settings: {e}") + return None + + # All Advanced Settings auto-save handlers removed - use manual Save Config button + + gr.Markdown("\n---\n**Note:** These settings will be saved to your config and applied to all manga translations.") + + # Manual Glossary Extraction Tab + with gr.Tab("📝 Manual Glossary Extraction"): + gr.Markdown(""" + ### Extract character names and terms from EPUB files + Configure extraction settings below, then upload an EPUB file to extract a glossary. + """) + + with gr.Row(): + with gr.Column(): + glossary_epub = gr.File( + label="📖 Upload EPUB File", + file_types=[".epub"] + ) + + with gr.Row(): + extract_btn = gr.Button( + "🔍 Extract Glossary", + variant="primary", + size="lg", + scale=2 + ) + + stop_glossary_btn = gr.Button( + "⏹️ Stop Extraction", + variant="stop", + size="lg", + visible=False, + scale=1 + ) + + glossary_model = gr.Dropdown( + choices=self.models, + value=self.get_config_value('model', 'gpt-4-turbo'), + label="🤖 AI Model", + interactive=True, + allow_custom_value=True, + filterable=True + ) + + glossary_api_key = gr.Textbox( + label="🔑 API Key", + type="password", + placeholder="Enter your API key", + value=self.get_config_value('api_key', '') + ) + + # Tabs for different settings sections + with gr.Tabs(): + # Extraction Settings Tab + with gr.Tab("Extraction Settings"): + with gr.Accordion("🎯 Targeted Extraction Settings", open=True): + with gr.Row(): + with gr.Column(): + min_freq = gr.Slider( + minimum=1, + maximum=10, + value=self.get_config_value('glossary_min_frequency', 2), + step=1, + label="Min frequency", + info="How many times a name must appear (lower = more terms)" + ) + + max_titles = gr.Slider( + minimum=10, + maximum=100, + value=self.get_config_value('glossary_max_titles', 30), + step=5, + label="Max titles", + info="Limits to prevent huge glossaries" + ) + + max_text_size = gr.Number( + label="Max text size", + value=self.get_config_value('glossary_max_text_size', 50000), + info="Characters to analyze (0 = entire text)" + ) + + max_sentences = gr.Slider( + minimum=50, + maximum=500, + value=self.get_config_value('glossary_max_sentences', 200), + step=10, + label="Max sentences", + info="Maximum sentences to send to AI (increase for more context)" + ) + + with gr.Column(): + max_names_slider = gr.Slider( + minimum=10, + maximum=200, + value=self.get_config_value('glossary_max_names', 50), + step=10, + label="Max names", + info="Maximum number of character names to extract" + ) + + translation_batch = gr.Slider( + minimum=10, + maximum=100, + value=self.get_config_value('glossary_batch_size', 50), + step=5, + label="Translation batch", + info="Terms per API call (larger = faster but may reduce quality)" + ) + + chapter_split_threshold = gr.Number( + label="Chapter split threshold", + value=self.get_config_value('glossary_chapter_split_threshold', 8192), + info="Split large texts into chunks (0 = no splitting)" + ) + + # Filter mode selection + filter_mode = gr.Radio( + choices=[ + "all", + "only_with_honorifics", + "only_without_honorifics" + ], + value=self.get_config_value('glossary_filter_mode', 'all'), + label="Filter mode", + info="What types of names to extract" + ) + + # Strip honorifics checkbox + strip_honorifics = gr.Checkbox( + label="Remove honorifics from extracted names", + value=self.get_config_value('strip_honorifics', True), + info="Remove suffixes like '님', 'さん', '先生' from names" + ) + + # Fuzzy threshold slider + fuzzy_threshold = gr.Slider( + minimum=0.5, + maximum=1.0, + value=self.get_config_value('glossary_fuzzy_threshold', 0.90), + step=0.05, + label="Fuzzy threshold", + info="How similar names must be to match (0.9 = 90% match, 1.0 = exact match)" + ) + + + # Extraction Prompt Tab + with gr.Tab("Extraction Prompt"): + gr.Markdown(""" + ### System Prompt for Extraction + Customize how the AI extracts names and terms from your text. + """) + + extraction_prompt = gr.Textbox( + label="Extraction Template (Use placeholders: {language}, {min_frequency}, {max_names}, {max_titles})", + lines=10, + value=self.get_config_value('manual_glossary_prompt', + "Extract character names and important terms from the following text.\n\n" + "Output format:\n{fields}\n\n" + "Rules:\n- Output ONLY CSV lines in the exact format shown above\n" + "- No headers, no extra text, no JSON\n" + "- One entry per line\n" + "- Leave gender empty for terms (just end with comma)") + ) + + reset_extraction_prompt_btn = gr.Button( + "Reset to Default", + variant="secondary", + size="sm" + ) + + # Format Instructions Tab + with gr.Tab("Format Instructions"): + gr.Markdown(""" + ### Output Format Instructions + These instructions tell the AI exactly how to format the extracted glossary. + """) + + format_instructions = gr.Textbox( + label="Format Instructions (Use placeholder: {text_sample})", + lines=10, + value=self.get_config_value('glossary_format_instructions', + "Return the results in EXACT CSV format with this header:\n" + "type,raw_name,translated_name\n\n" + "For example:\n" + "character,김상현,Kim Sang-hyun\n" + "character,갈편제,Gale Hardest\n" + "term,마법사,Mage\n\n" + "Only include terms that actually appear in the text.\n" + "Do not use quotes around values unless they contain commas.\n\n" + "Text to analyze:\n{text_sample}") + ) + + use_legacy_csv = gr.Checkbox( + label="Use legacy CSV format", + value=self.get_config_value('glossary_use_legacy_csv', False), + info="When disabled: Uses clean format with sections (===CHARACTERS===). When enabled: Uses traditional CSV format with repeated type columns." + ) + + with gr.Column(): + # Add logo and status at top + with gr.Row(): + gr.Image( + value="Halgakos.png", + label=None, + show_label=False, + width=80, + height=80, + interactive=False, + show_download_button=False, + container=False + ) + glossary_status_message = gr.Markdown( + value="### Ready to extract\nUpload an EPUB file and click 'Extract Glossary' to begin.", + visible=True + ) + + # Progress section (similar to translation tabs) + with gr.Group(visible=False) as glossary_progress_group: + gr.Markdown("### Progress") + glossary_progress_text = gr.Textbox( + label="📨 Current Status", + value="Ready to start", + interactive=False, + lines=1 + ) + glossary_progress_bar = gr.Slider( + minimum=0, + maximum=100, + value=0, + step=1, + label="📋 Extraction Progress", + interactive=False, + show_label=True + ) + + glossary_logs = gr.Textbox( + label="📋 Extraction Logs", + lines=20, + max_lines=30, + value="Ready to extract. Upload an EPUB file and configure settings.", + visible=True, + interactive=False + ) + + glossary_output = gr.File( + label="📥 Download Glossary CSV", + visible=False + ) + + glossary_status = gr.Textbox( + label="Final Status", + lines=3, + max_lines=5, + visible=False, + interactive=False + ) + + extract_btn.click( + fn=self.extract_glossary_with_stop, + inputs=[ + glossary_epub, + glossary_model, + glossary_api_key, + min_freq, + max_names_slider, + max_titles, + max_text_size, + max_sentences, + translation_batch, + chapter_split_threshold, + filter_mode, + strip_honorifics, + fuzzy_threshold, + extraction_prompt, + format_instructions, + use_legacy_csv + ], + outputs=[ + glossary_output, + glossary_status_message, + glossary_progress_group, + glossary_logs, + glossary_status, + glossary_progress_text, + glossary_progress_bar, + extract_btn, + stop_glossary_btn + ] + ) + + # Stop button handler + stop_glossary_btn.click( + fn=self.stop_glossary_extraction, + inputs=[], + outputs=[extract_btn, stop_glossary_btn, glossary_status] + ) + + # QA Scanner Tab + with gr.Tab("🔍 QA Scanner"): + gr.Markdown(""" + ### Quick Scan for Translation Quality + Scan translated content for common issues like untranslated text, formatting problems, and quality concerns. + + **Supported inputs:** + - 📁 Output folder containing extracted HTML/XHTML files + - 📖 EPUB file (will be automatically extracted and scanned) + - 📦 ZIP file containing HTML/XHTML files + """) + + with gr.Row(): + with gr.Column(): + # Check if running on Hugging Face Spaces + is_hf_spaces = os.getenv('SPACE_ID') is not None or os.getenv('HF_SPACES') == 'true' + + if is_hf_spaces: + gr.Markdown(""" + **🤗 Hugging Face Spaces Mode** + Upload an EPUB or ZIP file containing the translated content. + The scanner will extract and analyze the HTML/XHTML files inside. + """) + qa_folder_path = gr.File( + label="📂 Upload EPUB or ZIP file", + file_types=[".epub", ".zip"], + type="filepath" + ) + else: + qa_folder_path = gr.Textbox( + label="📁 Path to Folder, EPUB, or ZIP", + placeholder="Enter path to: folder with HTML files, EPUB file, or ZIP file", + info="Can be a folder path, or direct path to an EPUB/ZIP file" + ) + + with gr.Row(): + qa_scan_btn = gr.Button( + "⚡ Quick Scan", + variant="primary", + size="lg", + scale=2 + ) + + stop_qa_btn = gr.Button( + "⏹️ Stop Scan", + variant="stop", + size="lg", + visible=False, + scale=1 + ) + + with gr.Accordion("⚙️ Quick Scan Settings", open=True): + gr.Markdown(""" + **Quick Scan Mode (85% threshold, Speed optimized)** + - 3-5x faster scanning + - Checks consecutive chapters only + - Simplified analysis + - Good for large libraries + - Minimal resource usage + """) + + # Foreign Character Detection + gr.Markdown("#### Foreign Character Detection") + min_foreign_chars = gr.Slider( + minimum=0, + maximum=50, + value=self.get_config_value('qa_min_foreign_chars', 10), + step=1, + label="Minimum foreign characters to flag", + info="0 = always flag, higher = more tolerant" + ) + + # Detection Options + gr.Markdown("#### Detection Options") + check_repetition = gr.Checkbox( + label="Check for excessive repetition", + value=self.get_config_value('qa_check_repetition', True) + ) + + check_glossary_leakage = gr.Checkbox( + label="Check for glossary leakage (raw glossary entries in translation)", + value=self.get_config_value('qa_check_glossary_leakage', True) + ) + + # File Processing + gr.Markdown("#### File Processing") + min_file_length = gr.Slider( + minimum=0, + maximum=5000, + value=self.get_config_value('qa_min_file_length', 0), + step=100, + label="Minimum file length (characters)", + info="Skip files shorter than this" + ) + + # Additional Checks + gr.Markdown("#### Additional Checks") + check_multiple_headers = gr.Checkbox( + label="Detect files with 2 or more headers (h1-h6 tags)", + value=self.get_config_value('qa_check_multiple_headers', True), + info="Identifies files that may have been incorrectly split or merged" + ) + + check_missing_html = gr.Checkbox( + label="Flag HTML files with missing <html> tag", + value=self.get_config_value('qa_check_missing_html', True), + info="Checks if HTML files have proper structure" + ) + + check_insufficient_paragraphs = gr.Checkbox( + label="Check for insufficient paragraph tags", + value=self.get_config_value('qa_check_insufficient_paragraphs', True) + ) + + min_paragraph_percentage = gr.Slider( + minimum=10, + maximum=90, + value=self.get_config_value('qa_min_paragraph_percentage', 30), + step=5, + label="Minimum text in <p> tags (%)", + info="Files with less than this percentage will be flagged" + ) + + # Report Settings + gr.Markdown("#### Report Settings") + + report_format = gr.Radio( + choices=["summary", "detailed", "verbose"], + value=self.get_config_value('qa_report_format', 'detailed'), + label="Report format", + info="Summary = brief overview, Detailed = recommended, Verbose = all data" + ) + + auto_save_report = gr.Checkbox( + label="Automatically save report after scan", + value=self.get_config_value('qa_auto_save_report', True) + ) + + with gr.Column(): + # Add logo and status at top + with gr.Row(): + gr.Image( + value="Halgakos.png", + label=None, + show_label=False, + width=80, + height=80, + interactive=False, + show_download_button=False, + container=False + ) + qa_status_message = gr.Markdown( + value="### Ready to scan\nEnter the path to your output folder and click 'Quick Scan' to begin.", + visible=True + ) + + # Progress section + with gr.Group(visible=False) as qa_progress_group: + gr.Markdown("### Progress") + qa_progress_text = gr.Textbox( + label="📨 Current Status", + value="Ready to start", + interactive=False, + lines=1 + ) + qa_progress_bar = gr.Slider( + minimum=0, + maximum=100, + value=0, + step=1, + label="📋 Scan Progress", + interactive=False, + show_label=True + ) + + qa_logs = gr.Textbox( + label="📋 Scan Logs", + lines=20, + max_lines=30, + value="Ready to scan. Enter output folder path and configure settings.", + visible=True, + interactive=False + ) + + qa_report = gr.File( + label="📄 Download QA Report", + visible=False + ) + + qa_status = gr.Textbox( + label="Final Status", + lines=3, + max_lines=5, + visible=False, + interactive=False + ) + + # QA Scan button handler + qa_scan_btn.click( + fn=self.run_qa_scan_with_stop, + inputs=[ + qa_folder_path, + min_foreign_chars, + check_repetition, + check_glossary_leakage, + min_file_length, + check_multiple_headers, + check_missing_html, + check_insufficient_paragraphs, + min_paragraph_percentage, + report_format, + auto_save_report + ], + outputs=[ + qa_report, + qa_status_message, + qa_progress_group, + qa_logs, + qa_status, + qa_progress_text, + qa_progress_bar, + qa_scan_btn, + stop_qa_btn + ] + ) + + # Stop button handler + stop_qa_btn.click( + fn=self.stop_qa_scan, + inputs=[], + outputs=[qa_scan_btn, stop_qa_btn, qa_status] + ) + + # Settings Tab + with gr.Tab("⚙️ Settings"): + gr.Markdown("### Configuration") + + gr.Markdown("#### Translation Profiles") + gr.Markdown("Profiles are loaded from your `config_web.json` file. The web interface has its own separate configuration.") + + with gr.Accordion("View All Profiles", open=False): + profiles_text = "\n\n".join( + [f"**{name}**:\n```\n{prompt[:200]}...\n```" + for name, prompt in self.profiles.items()] + ) + gr.Markdown(profiles_text if profiles_text else "No profiles found") + + gr.Markdown("---") + gr.Markdown("#### Advanced Translation Settings") + + with gr.Row(): + with gr.Column(): + thread_delay = gr.Slider( + minimum=0, + maximum=5, + value=self.get_config_value('thread_submission_delay', 0.1), + step=0.1, + label="Threading delay (s)", + interactive=True + ) + + api_delay = gr.Slider( + minimum=0, + maximum=10, + value=self.get_config_value('delay', 1), + step=0.5, + label="API call delay (s)", + interactive=True + ) + + chapter_range = gr.Textbox( + label="Chapter range (e.g., 5-10)", + value=self.get_config_value('chapter_range', ''), + placeholder="Leave empty for all chapters" + ) + + token_limit = gr.Number( + label="Input Token limit", + value=self.get_config_value('token_limit', 200000), + minimum=0 + ) + + disable_token_limit = gr.Checkbox( + label="Disable Input Token Limit", + value=self.get_config_value('token_limit_disabled', False) + ) + + output_token_limit = gr.Number( + label="Output Token limit", + value=self.get_config_value('max_output_tokens', 16000), + minimum=0 + ) + + with gr.Column(): + contextual = gr.Checkbox( + label="Contextual Translation", + value=self.get_config_value('contextual', False) + ) + + history_limit = gr.Number( + label="Translation History Limit", + value=self.get_config_value('translation_history_limit', 2), + minimum=0 + ) + + rolling_history = gr.Checkbox( + label="Rolling History Window", + value=self.get_config_value('translation_history_rolling', False) + ) + + batch_translation = gr.Checkbox( + label="Batch Translation", + value=self.get_config_value('batch_translation', True) + ) + + batch_size = gr.Number( + label="Batch Size", + value=self.get_config_value('batch_size', 10), + minimum=1 + ) + + gr.Markdown("---") + gr.Markdown("#### Chapter Processing Options") + + with gr.Row(): + with gr.Column(): + # Chapter Header Translation + batch_translate_headers = gr.Checkbox( + label="Batch Translate Headers", + value=self.get_config_value('batch_translate_headers', False) + ) + + headers_per_batch = gr.Number( + label="Headers per batch", + value=self.get_config_value('headers_per_batch', 400), + minimum=1 + ) + + # NCX and CSS options + use_ncx_navigation = gr.Checkbox( + label="Use NCX-only Navigation (Compatibility Mode)", + value=self.get_config_value('use_ncx_navigation', False) + ) + + attach_css_to_chapters = gr.Checkbox( + label="Attach CSS to Chapters (Fixes styling issues)", + value=self.get_config_value('attach_css_to_chapters', False) + ) + + retain_source_extension = gr.Checkbox( + label="Retain source extension (no 'response_' prefix)", + value=self.get_config_value('retain_source_extension', True) + ) + + with gr.Column(): + # Conservative Batching + use_conservative_batching = gr.Checkbox( + label="Use Conservative Batching", + value=self.get_config_value('use_conservative_batching', False), + info="Groups chapters in batches of 3x batch size for memory management" + ) + + # Gemini API Safety + disable_gemini_safety = gr.Checkbox( + label="Disable Gemini API Safety Filters", + value=self.get_config_value('disable_gemini_safety', False), + info="⚠️ Disables ALL content safety filters for Gemini models (BLOCK_NONE)" + ) + + # OpenRouter Options + use_http_openrouter = gr.Checkbox( + label="Use HTTP-only for OpenRouter (bypass SDK)", + value=self.get_config_value('use_http_openrouter', False), + info="Direct HTTP POST with explicit headers" + ) + + disable_openrouter_compression = gr.Checkbox( + label="Disable compression for OpenRouter (Accept-Encoding)", + value=self.get_config_value('disable_openrouter_compression', False), + info="Sends Accept-Encoding: identity for uncompressed responses" + ) + + gr.Markdown("---") + gr.Markdown("#### Chapter Extraction Settings") + + with gr.Row(): + with gr.Column(): + gr.Markdown("**Text Extraction Method:**") + text_extraction_method = gr.Radio( + choices=["standard", "enhanced"], + value=self.get_config_value('text_extraction_method', 'standard'), + label="", + info="Standard uses BeautifulSoup, Enhanced uses html2text" + ) + + gr.Markdown("• **Standard (BeautifulSoup)** - Traditional HTML parsing, fast and reliable") + gr.Markdown("• **Enhanced (html2text)** - Superior Unicode handling, cleaner text extraction") + + with gr.Column(): + gr.Markdown("**File Filtering Level:**") + file_filtering_level = gr.Radio( + choices=["smart", "moderate", "full"], + value=self.get_config_value('file_filtering_level', 'smart'), + label="", + info="Controls which files are extracted from EPUBs" + ) + + gr.Markdown("• **Smart (Aggressive Filtering)** - Skips navigation, TOC, copyright files") + gr.Markdown("• **Moderate** - Only skips obvious navigation files") + gr.Markdown("• **Full (No Filtering)** - Extracts ALL HTML/XHTML files") + + gr.Markdown("---") + gr.Markdown("#### Response Handling & Retry Logic") + + with gr.Row(): + with gr.Column(): + gr.Markdown("**GPT-5 Thinking (OpenRouter/OpenAI-style)**") + enable_gpt_thinking = gr.Checkbox( + label="Enable GPT / OR Thinking", + value=self.get_config_value('enable_gpt_thinking', True), + info="Controls GPT-5 and OpenRouter reasoning" + ) + + with gr.Row(): + gpt_thinking_effort = gr.Dropdown( + choices=["low", "medium", "high"], + value=self.get_config_value('gpt_thinking_effort', 'medium'), + label="Effort", + interactive=True + ) + + or_thinking_tokens = gr.Number( + label="OR Thinking Tokens", + value=self.get_config_value('or_thinking_tokens', 2000), + minimum=0, + maximum=50000, + info="tokens" + ) + + gr.Markdown("*Provide Tokens to force a max token budget for other models; GPT-5 only uses Effort (low/medium/high)*", elem_classes=["markdown-small"]) + + with gr.Column(): + gr.Markdown("**Gemini Thinking Mode**") + enable_gemini_thinking = gr.Checkbox( + label="Enable Gemini Thinking", + value=self.get_config_value('enable_gemini_thinking', False), + info="Control Gemini's thinking process", + interactive=True + ) + + gemini_thinking_budget = gr.Number( + label="Budget", + value=self.get_config_value('gemini_thinking_budget', 0), + minimum=0, + maximum=50000, + info="tokens (0 = disabled)", + interactive=True + ) + + gr.Markdown("*0 = disabled, 512-24576 = limited thinking*", elem_classes=["markdown-small"]) + + gr.Markdown("---") + gr.Markdown("🔒 **API keys are encrypted** when saved to config using AES encryption.") + + save_api_key = gr.Checkbox( + label="Save API Key (Encrypted)", + value=True + ) + + save_status = gr.Textbox(label="Settings Status", value="Use the 'Save Config' button to save changes", interactive=False) + + # Hidden HTML component for JavaScript execution + js_executor = gr.HTML("", visible=False) + + # Auto-save function for settings tab + def save_settings_tab(thread_delay_val, api_delay_val, chapter_range_val, token_limit_val, disable_token_limit_val, output_token_limit_val, contextual_val, history_limit_val, rolling_history_val, batch_translation_val, batch_size_val, save_api_key_val): + """Save settings from the Settings tab""" + try: + current_config = self.get_current_config_for_update() + # Don't decrypt - just update non-encrypted fields + + # Update settings + current_config['thread_submission_delay'] = float(thread_delay_val) + current_config['delay'] = float(api_delay_val) + current_config['chapter_range'] = str(chapter_range_val) + current_config['token_limit'] = int(token_limit_val) + current_config['token_limit_disabled'] = bool(disable_token_limit_val) + current_config['max_output_tokens'] = int(output_token_limit_val) + current_config['contextual'] = bool(contextual_val) + current_config['translation_history_limit'] = int(history_limit_val) + current_config['translation_history_rolling'] = bool(rolling_history_val) + current_config['batch_translation'] = bool(batch_translation_val) + current_config['batch_size'] = int(batch_size_val) + + # Save to file + self.save_config(current_config) + + # JavaScript to save to localStorage + js_code = """ + <script> + (function() { + // Save individual settings to localStorage + window.saveToLocalStorage('thread_delay', %f); + window.saveToLocalStorage('api_delay', %f); + window.saveToLocalStorage('chapter_range', '%s'); + window.saveToLocalStorage('token_limit', %d); + window.saveToLocalStorage('disable_token_limit', %s); + window.saveToLocalStorage('output_token_limit', %d); + window.saveToLocalStorage('contextual', %s); + window.saveToLocalStorage('history_limit', %d); + window.saveToLocalStorage('rolling_history', %s); + window.saveToLocalStorage('batch_translation', %s); + window.saveToLocalStorage('batch_size', %d); + console.log('Settings saved to localStorage'); + })(); + </script> + """ % ( + thread_delay_val, api_delay_val, chapter_range_val, token_limit_val, + str(disable_token_limit_val).lower(), output_token_limit_val, + str(contextual_val).lower(), history_limit_val, + str(rolling_history_val).lower(), str(batch_translation_val).lower(), + batch_size_val + ) + + return "✅ Settings saved successfully", js_code + except Exception as e: + return f"❌ Failed to save: {str(e)}", "" + + # Settings tab auto-save handlers removed - use manual Save Config button + + # Token sync handlers removed - use manual Save Config button + + # Help Tab + with gr.Tab("❓ Help"): + gr.Markdown(""" + ## How to Use Glossarion + + ### Translation + 1. Upload an EPUB file + 2. Select AI model (GPT-4, Claude, etc.) + 3. Enter your API key + 4. Click "Translate" + 5. Download the translated EPUB + + ### Manga Translation + 1. Upload manga image(s) (PNG, JPG, etc.) + 2. Select AI model and enter API key + 3. Choose translation profile (e.g., Manga_JP, Manga_KR) + 4. Configure OCR settings (Google Cloud Vision recommended) + 5. Enable bubble detection and inpainting for best results + 6. Click "Translate Manga" + + ### Glossary Extraction + 1. Upload an EPUB file + 2. Configure extraction settings + 3. Click "Extract Glossary" + 4. Use the CSV in future translations + + ### API Keys + - **OpenAI**: Get from https://platform.openai.com/api-keys + - **Anthropic**: Get from https://console.anthropic.com/ + + ### Translation Profiles + Profiles contain detailed translation instructions and rules. + Select a profile that matches your source language and style preferences. + + You can create and edit profiles in the desktop application. + + ### Tips + - Use glossaries for consistent character name translation + - Lower temperature (0.1-0.3) for more literal translations + - Higher temperature (0.5-0.7) for more creative translations + """) + + # Create a comprehensive load function that refreshes ALL values + def load_all_settings(): + """Load all settings from config file on page refresh""" + # Reload config to get latest values + self.config = self.load_config() + self.decrypted_config = decrypt_config(self.config.copy()) if API_KEY_ENCRYPTION_AVAILABLE else self.config.copy() + + # Helper function to convert RGB arrays to hex + def to_hex_color(color_value, default='#000000'): + if isinstance(color_value, (list, tuple)) and len(color_value) >= 3: + return '#{:02x}{:02x}{:02x}'.format(int(color_value[0]), int(color_value[1]), int(color_value[2])) + elif isinstance(color_value, str): + return color_value if color_value.startswith('#') else default + return default + + # Return values for all tracked components + return [ + self.get_config_value('model', 'gpt-4-turbo'), # epub_model + self.get_config_value('api_key', ''), # epub_api_key + self.get_config_value('active_profile', list(self.profiles.keys())[0] if self.profiles else ''), # epub_profile + self.profiles.get(self.get_config_value('active_profile', ''), ''), # epub_system_prompt + self.get_config_value('temperature', 0.3), # epub_temperature + self.get_config_value('max_output_tokens', 16000), # epub_max_tokens + self.get_config_value('enable_image_translation', False), # enable_image_translation + self.get_config_value('enable_auto_glossary', False), # enable_auto_glossary + self.get_config_value('append_glossary_to_prompt', True), # append_glossary + # Auto glossary settings + self.get_config_value('glossary_min_frequency', 2), # auto_glossary_min_freq + self.get_config_value('glossary_max_names', 50), # auto_glossary_max_names + self.get_config_value('glossary_max_titles', 30), # auto_glossary_max_titles + self.get_config_value('glossary_batch_size', 50), # auto_glossary_batch_size + self.get_config_value('glossary_filter_mode', 'all'), # auto_glossary_filter_mode + self.get_config_value('glossary_fuzzy_threshold', 0.90), # auto_glossary_fuzzy_threshold + # Manual glossary extraction settings + self.get_config_value('manual_glossary_min_frequency', self.get_config_value('glossary_min_frequency', 2)), # min_freq + self.get_config_value('manual_glossary_max_names', self.get_config_value('glossary_max_names', 50)), # max_names_slider + self.get_config_value('manual_glossary_max_titles', self.get_config_value('glossary_max_titles', 30)), # max_titles + self.get_config_value('glossary_max_text_size', 50000), # max_text_size + self.get_config_value('glossary_max_sentences', 200), # max_sentences + self.get_config_value('manual_glossary_batch_size', self.get_config_value('glossary_batch_size', 50)), # translation_batch + self.get_config_value('glossary_chapter_split_threshold', 8192), # chapter_split_threshold + self.get_config_value('manual_glossary_filter_mode', self.get_config_value('glossary_filter_mode', 'all')), # filter_mode + self.get_config_value('strip_honorifics', True), # strip_honorifics + self.get_config_value('manual_glossary_fuzzy_threshold', self.get_config_value('glossary_fuzzy_threshold', 0.90)), # fuzzy_threshold + # Chapter processing options + self.get_config_value('batch_translate_headers', False), # batch_translate_headers + self.get_config_value('headers_per_batch', 400), # headers_per_batch + self.get_config_value('use_ncx_navigation', False), # use_ncx_navigation + self.get_config_value('attach_css_to_chapters', False), # attach_css_to_chapters + self.get_config_value('retain_source_extension', True), # retain_source_extension + self.get_config_value('use_conservative_batching', False), # use_conservative_batching + self.get_config_value('disable_gemini_safety', False), # disable_gemini_safety + self.get_config_value('use_http_openrouter', False), # use_http_openrouter + self.get_config_value('disable_openrouter_compression', False), # disable_openrouter_compression + self.get_config_value('text_extraction_method', 'standard'), # text_extraction_method + self.get_config_value('file_filtering_level', 'smart'), # file_filtering_level + # QA report format + self.get_config_value('qa_report_format', 'detailed'), # report_format + # Thinking mode settings + self.get_config_value('enable_gpt_thinking', True), # enable_gpt_thinking + self.get_config_value('gpt_thinking_effort', 'medium'), # gpt_thinking_effort + self.get_config_value('or_thinking_tokens', 2000), # or_thinking_tokens + self.get_config_value('enable_gemini_thinking', False), # enable_gemini_thinking - disabled by default + self.get_config_value('gemini_thinking_budget', 0), # gemini_thinking_budget - 0 = disabled + # Manga settings + self.get_config_value('model', 'gpt-4-turbo'), # manga_model + self.get_config_value('api_key', ''), # manga_api_key + self.get_config_value('active_profile', list(self.profiles.keys())[0] if self.profiles else ''), # manga_profile + self.profiles.get(self.get_config_value('active_profile', ''), ''), # manga_system_prompt + self.get_config_value('ocr_provider', 'custom-api'), # ocr_provider + self.get_config_value('azure_vision_key', ''), # azure_key + self.get_config_value('azure_vision_endpoint', ''), # azure_endpoint + self.get_config_value('bubble_detection_enabled', True), # bubble_detection + self.get_config_value('inpainting_enabled', True), # inpainting + self.get_config_value('manga_font_size_mode', 'auto'), # font_size_mode + self.get_config_value('manga_font_size', 24), # font_size + self.get_config_value('manga_font_multiplier', 1.0), # font_multiplier + self.get_config_value('manga_min_font_size', 12), # min_font_size + self.get_config_value('manga_max_font_size', 48), # max_font_size + # Convert colors to hex format if they're stored as RGB arrays (white text, black shadow like manga integration) + to_hex_color(self.get_config_value('manga_text_color', [255, 255, 255]), '#FFFFFF'), # text_color_rgb - default white + self.get_config_value('manga_shadow_enabled', True), # shadow_enabled + to_hex_color(self.get_config_value('manga_shadow_color', [0, 0, 0]), '#000000'), # shadow_color - default black + self.get_config_value('manga_shadow_offset_x', 2), # shadow_offset_x + self.get_config_value('manga_shadow_offset_y', 2), # shadow_offset_y + self.get_config_value('manga_shadow_blur', 0), # shadow_blur + self.get_config_value('manga_bg_opacity', 130), # bg_opacity + self.get_config_value('manga_bg_style', 'circle'), # bg_style + self.get_config_value('manga_settings', {}).get('advanced', {}).get('parallel_panel_translation', False), # parallel_panel_translation + self.get_config_value('manga_settings', {}).get('advanced', {}).get('panel_max_workers', 7), # panel_max_workers + ] + + + # SECURITY: Save Config button DISABLED to prevent API keys from being saved to persistent storage on HF Spaces + # This is a critical security measure to prevent API key leakage in shared environments + # save_config_btn.click( + # fn=save_all_config, + # inputs=[ + # # EPUB tab fields + # epub_model, epub_api_key, epub_profile, epub_temperature, epub_max_tokens, + # enable_image_translation, enable_auto_glossary, append_glossary, + # # Auto glossary settings + # auto_glossary_min_freq, auto_glossary_max_names, auto_glossary_max_titles, + # auto_glossary_batch_size, auto_glossary_filter_mode, auto_glossary_fuzzy_threshold, + # enable_post_translation_scan, + # # Manual glossary extraction settings + # min_freq, max_names_slider, max_titles, + # max_text_size, max_sentences, translation_batch, + # chapter_split_threshold, filter_mode, strip_honorifics, + # fuzzy_threshold, extraction_prompt, format_instructions, + # use_legacy_csv, + # # QA Scanner settings + # min_foreign_chars, check_repetition, check_glossary_leakage, + # min_file_length, check_multiple_headers, check_missing_html, + # check_insufficient_paragraphs, min_paragraph_percentage, + # report_format, auto_save_report, + # # Chapter processing options + # batch_translate_headers, headers_per_batch, use_ncx_navigation, + # attach_css_to_chapters, retain_source_extension, + # use_conservative_batching, disable_gemini_safety, + # use_http_openrouter, disable_openrouter_compression, + # text_extraction_method, file_filtering_level, + # # Thinking mode settings + # enable_gpt_thinking, gpt_thinking_effort, or_thinking_tokens, + # enable_gemini_thinking, gemini_thinking_budget, + # # Manga tab fields + # manga_model, manga_api_key, manga_profile, + # ocr_provider, azure_key, azure_endpoint, + # bubble_detection, inpainting, + # font_size_mode, font_size, font_multiplier, min_font_size, max_font_size, + # text_color_rgb, shadow_enabled, shadow_color, + # shadow_offset_x, shadow_offset_y, shadow_blur, + # bg_opacity, bg_style, + # parallel_panel_translation, panel_max_workers, + # # Advanced Settings fields + # detector_type, rtdetr_confidence, bubble_confidence, + # detect_text_bubbles, detect_empty_bubbles, detect_free_text, bubble_max_detections, + # local_inpaint_method, webtoon_mode, + # inpaint_batch_size, inpaint_cache_enabled, + # parallel_processing, max_workers, + # preload_local_inpainting, panel_start_stagger, + # torch_precision, auto_cleanup_models, + # debug_mode, save_intermediate, concise_pipeline_logs + # ], + # outputs=[save_status_text] + # ) + + # Add load handler to restore settings on page load + app.load( + fn=load_all_settings, + inputs=[], + outputs=[ + epub_model, epub_api_key, epub_profile, epub_system_prompt, epub_temperature, epub_max_tokens, + enable_image_translation, enable_auto_glossary, append_glossary, + # Auto glossary settings + auto_glossary_min_freq, auto_glossary_max_names, auto_glossary_max_titles, + auto_glossary_batch_size, auto_glossary_filter_mode, auto_glossary_fuzzy_threshold, + # Manual glossary extraction settings + min_freq, max_names_slider, max_titles, + max_text_size, max_sentences, translation_batch, + chapter_split_threshold, filter_mode, strip_honorifics, + fuzzy_threshold, + # Chapter processing options + batch_translate_headers, headers_per_batch, use_ncx_navigation, + attach_css_to_chapters, retain_source_extension, + use_conservative_batching, disable_gemini_safety, + use_http_openrouter, disable_openrouter_compression, + text_extraction_method, file_filtering_level, + report_format, + # Thinking mode settings + enable_gpt_thinking, gpt_thinking_effort, or_thinking_tokens, + enable_gemini_thinking, gemini_thinking_budget, + # Manga settings + manga_model, manga_api_key, manga_profile, manga_system_prompt, + ocr_provider, azure_key, azure_endpoint, bubble_detection, inpainting, + font_size_mode, font_size, font_multiplier, min_font_size, max_font_size, + text_color_rgb, shadow_enabled, shadow_color, shadow_offset_x, shadow_offset_y, + shadow_blur, bg_opacity, bg_style, parallel_panel_translation, panel_max_workers + ] + ) + + return app + + +def main(): + """Launch Gradio web app""" + print("🚀 Starting Glossarion Web Interface...") + + # Check if running on Hugging Face Spaces + is_spaces = os.getenv('SPACE_ID') is not None or os.getenv('HF_SPACES') == 'true' + if is_spaces: + print("🤗 Running on Hugging Face Spaces") + print(f"📁 Space ID: {os.getenv('SPACE_ID', 'Unknown')}") + print(f"📁 Files in current directory: {len(os.listdir('.'))} items") + print(f"📁 Working directory: {os.getcwd()}") + print(f"😎 Available manga modules: {MANGA_TRANSLATION_AVAILABLE}") + else: + print("🏠 Running locally") + + web_app = GlossarionWeb() + app = web_app.create_interface() + + # Set favicon with absolute path if available (skip for Spaces) + favicon_path = None + if not is_spaces and os.path.exists("Halgakos.ico"): + favicon_path = os.path.abspath("Halgakos.ico") + print(f"✅ Using favicon: {favicon_path}") + elif not is_spaces: + print("⚠️ Halgakos.ico not found") + + # Launch with options appropriate for environment + launch_args = { + "server_name": "0.0.0.0", # Allow external access + "server_port": 7860, + "share": False, + "show_error": True, + } + + # Only add favicon for non-Spaces environments + if not is_spaces and favicon_path: + launch_args["favicon_path"] = favicon_path + + app.launch(**launch_args) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/bubble_detector.py b/bubble_detector.py new file mode 100644 index 0000000000000000000000000000000000000000..86b633385624dc3ae2de7fba5ef90276fbd1d3ba --- /dev/null +++ b/bubble_detector.py @@ -0,0 +1,2030 @@ +""" +bubble_detector.py - Modified version that works in frozen PyInstaller executables +Replace your bubble_detector.py with this version +""" +import os +import sys +import json +import numpy as np +import cv2 +from typing import List, Tuple, Optional, Dict, Any +import logging +import traceback +import hashlib +from pathlib import Path +import threading +import time + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +# Check if we're running in a frozen environment +IS_FROZEN = getattr(sys, 'frozen', False) +if IS_FROZEN: + # In frozen environment, set proper paths for ML libraries + MEIPASS = sys._MEIPASS + os.environ['TORCH_HOME'] = MEIPASS + os.environ['TRANSFORMERS_CACHE'] = os.path.join(MEIPASS, 'transformers') + os.environ['HF_HOME'] = os.path.join(MEIPASS, 'huggingface') + logger.info(f"Running in frozen environment: {MEIPASS}") + +# Modified import checks for frozen environment +YOLO_AVAILABLE = False +YOLO = None +torch = None +TORCH_AVAILABLE = False +ONNX_AVAILABLE = False +TRANSFORMERS_AVAILABLE = False +RTDetrForObjectDetection = None +RTDetrImageProcessor = None +PIL_AVAILABLE = False + +# Try to import YOLO dependencies with better error handling +if IS_FROZEN: + # In frozen environment, try harder to import + try: + # First try to import torch components individually + import torch + import torch.nn + import torch.cuda + TORCH_AVAILABLE = True + logger.info("✓ PyTorch loaded in frozen environment") + except Exception as e: + logger.warning(f"PyTorch not available in frozen environment: {e}") + TORCH_AVAILABLE = False + torch = None + + # Try ultralytics after torch + if TORCH_AVAILABLE: + try: + from ultralytics import YOLO + YOLO_AVAILABLE = True + logger.info("✓ Ultralytics YOLO loaded in frozen environment") + except Exception as e: + logger.warning(f"Ultralytics not available in frozen environment: {e}") + YOLO_AVAILABLE = False + + # Try transformers + try: + import transformers + # Try specific imports + try: + from transformers import RTDetrForObjectDetection, RTDetrImageProcessor + TRANSFORMERS_AVAILABLE = True + logger.info("✓ Transformers RT-DETR loaded in frozen environment") + except ImportError: + # Try alternative import + try: + from transformers import AutoModel, AutoImageProcessor + RTDetrForObjectDetection = AutoModel + RTDetrImageProcessor = AutoImageProcessor + TRANSFORMERS_AVAILABLE = True + logger.info("✓ Transformers loaded with AutoModel fallback") + except: + TRANSFORMERS_AVAILABLE = False + logger.warning("Transformers RT-DETR not available in frozen environment") + except Exception as e: + logger.warning(f"Transformers not available in frozen environment: {e}") + TRANSFORMERS_AVAILABLE = False +else: + # Normal environment - original import logic + try: + from ultralytics import YOLO + YOLO_AVAILABLE = True + except: + YOLO_AVAILABLE = False + logger.warning("Ultralytics YOLO not available") + + try: + import torch + # Test if cuda attribute exists + _ = torch.cuda + TORCH_AVAILABLE = True + except (ImportError, AttributeError): + TORCH_AVAILABLE = False + torch = None + logger.warning("PyTorch not available or incomplete") + + try: + from transformers import RTDetrForObjectDetection, RTDetrImageProcessor + try: + from transformers import RTDetrV2ForObjectDetection + RTDetrForObjectDetection = RTDetrV2ForObjectDetection + except ImportError: + pass + TRANSFORMERS_AVAILABLE = True + except: + TRANSFORMERS_AVAILABLE = False + logger.info("Transformers not available for RT-DETR") + +# Configure ORT memory behavior before importing +try: + os.environ.setdefault('ORT_DISABLE_MEMORY_ARENA', '1') +except Exception: + pass +# ONNX Runtime - works well in frozen environments +try: + import onnxruntime as ort + ONNX_AVAILABLE = True + logger.info("✓ ONNX Runtime available") +except ImportError: + ONNX_AVAILABLE = False + logger.warning("ONNX Runtime not available") + +# PIL +try: + from PIL import Image + PIL_AVAILABLE = True +except ImportError: + PIL_AVAILABLE = False + logger.info("PIL not available") + + +class BubbleDetector: + """ + Combined YOLOv8 and RT-DETR speech bubble detector for comics and manga. + Supports multiple model formats and provides configurable detection. + Backward compatible with existing code while adding RT-DETR support. + """ + + # Process-wide shared RT-DETR to avoid concurrent meta-device loads + _rtdetr_init_lock = threading.Lock() + _rtdetr_shared_model = None + _rtdetr_shared_processor = None + _rtdetr_loaded = False + _rtdetr_repo_id = 'ogkalu/comic-text-and-bubble-detector' + + # Shared RT-DETR (ONNX) across process to avoid device/context storms + _rtdetr_onnx_init_lock = threading.Lock() + _rtdetr_onnx_shared_session = None + _rtdetr_onnx_loaded = False + _rtdetr_onnx_providers = None + _rtdetr_onnx_model_path = None + # Limit concurrent runs to avoid device hangs. Defaults to 2 for better parallelism. + # Can be overridden via env DML_MAX_CONCURRENT or config rtdetr_max_concurrency + try: + _rtdetr_onnx_max_concurrent = int(os.environ.get('DML_MAX_CONCURRENT', '2')) + except Exception: + _rtdetr_onnx_max_concurrent = 2 + _rtdetr_onnx_sema = threading.Semaphore(max(1, _rtdetr_onnx_max_concurrent)) + _rtdetr_onnx_sema_initialized = False + + def __init__(self, config_path: str = "config.json"): + """ + Initialize the bubble detector. + + Args: + config_path: Path to configuration file + """ + # Set thread limits early if environment indicates single-threaded mode + try: + if os.environ.get('OMP_NUM_THREADS') == '1': + # Already in single-threaded mode, ensure it's applied to this process + # Check if torch is available at module level before trying to use it + if TORCH_AVAILABLE and torch is not None: + try: + torch.set_num_threads(1) + except (RuntimeError, AttributeError): + pass + try: + import cv2 + cv2.setNumThreads(1) + except (ImportError, AttributeError): + pass + except Exception: + pass + + self.config_path = config_path + self.config = self._load_config() + + # YOLOv8 components (original) + self.model = None + self.model_loaded = False + self.model_type = None # 'yolo', 'onnx', or 'torch' + self.onnx_session = None + + # RT-DETR components (new) + self.rtdetr_model = None + self.rtdetr_processor = None + self.rtdetr_loaded = False + self.rtdetr_repo = 'ogkalu/comic-text-and-bubble-detector' + + # RT-DETR (ONNX) backend components + self.rtdetr_onnx_session = None + self.rtdetr_onnx_loaded = False + self.rtdetr_onnx_repo = 'ogkalu/comic-text-and-bubble-detector' + + # RT-DETR class definitions + self.CLASS_BUBBLE = 0 # Empty speech bubble + self.CLASS_TEXT_BUBBLE = 1 # Bubble with text + self.CLASS_TEXT_FREE = 2 # Text without bubble + + # Detection settings + self.default_confidence = 0.3 + self.default_iou_threshold = 0.45 + # Allow override from settings + try: + ocr_cfg = self.config.get('manga_settings', {}).get('ocr', {}) if isinstance(self.config, dict) else {} + self.default_max_detections = int(ocr_cfg.get('bubble_max_detections', 100)) + self.max_det_yolo = int(ocr_cfg.get('bubble_max_detections_yolo', self.default_max_detections)) + self.max_det_rtdetr = int(ocr_cfg.get('bubble_max_detections_rtdetr', self.default_max_detections)) + except Exception: + self.default_max_detections = 100 + self.max_det_yolo = 100 + self.max_det_rtdetr = 100 + + # Cache directory for ONNX conversions + self.cache_dir = os.environ.get('BUBBLE_CACHE_DIR', 'models') + os.makedirs(self.cache_dir, exist_ok=True) + + # RT-DETR concurrency setting from config + try: + rtdetr_max_conc = int(ocr_cfg.get('rtdetr_max_concurrency', 2)) + # Update class-level semaphore if not yet initialized or if value changed + if not BubbleDetector._rtdetr_onnx_sema_initialized or rtdetr_max_conc != BubbleDetector._rtdetr_onnx_max_concurrent: + BubbleDetector._rtdetr_onnx_max_concurrent = max(1, rtdetr_max_conc) + BubbleDetector._rtdetr_onnx_sema = threading.Semaphore(BubbleDetector._rtdetr_onnx_max_concurrent) + BubbleDetector._rtdetr_onnx_sema_initialized = True + logger.info(f"RT-DETR concurrency set to: {BubbleDetector._rtdetr_onnx_max_concurrent}") + except Exception as e: + logger.warning(f"Failed to set RT-DETR concurrency: {e}") + + # GPU availability + self.use_gpu = TORCH_AVAILABLE and torch.cuda.is_available() + self.device = 'cuda' if self.use_gpu else 'cpu' + + # Quantization/precision settings + adv_cfg = self.config.get('manga_settings', {}).get('advanced', {}) if isinstance(self.config, dict) else {} + ocr_cfg = self.config.get('manga_settings', {}).get('ocr', {}) if isinstance(self.config, dict) else {} + env_quant = os.environ.get('MODEL_QUANTIZE', 'false').lower() == 'true' + self.quantize_enabled = bool(env_quant or adv_cfg.get('quantize_models', False) or ocr_cfg.get('quantize_bubble_detector', False)) + self.quantize_dtype = str(adv_cfg.get('torch_precision', os.environ.get('TORCH_PRECISION', 'auto'))).lower() + # Prefer advanced.onnx_quantize; fall back to env or global quantize + self.onnx_quantize_enabled = bool(adv_cfg.get('onnx_quantize', os.environ.get('ONNX_QUANTIZE', 'false').lower() == 'true' or self.quantize_enabled)) + + # Stop flag support + self.stop_flag = None + self._stopped = False + self.log_callback = None + + logger.info(f"🗨️ BubbleDetector initialized") + logger.info(f" GPU: {'Available' if self.use_gpu else 'Not available'}") + logger.info(f" YOLO: {'Available' if YOLO_AVAILABLE else 'Not installed'}") + logger.info(f" ONNX: {'Available' if ONNX_AVAILABLE else 'Not installed'}") + logger.info(f" RT-DETR: {'Available' if TRANSFORMERS_AVAILABLE else 'Not installed'}") + logger.info(f" Quantization: {'ENABLED' if self.quantize_enabled else 'disabled'} (torch_precision={self.quantize_dtype}, onnx_quantize={'on' if self.onnx_quantize_enabled else 'off'})" ) + + def _load_config(self) -> Dict[str, Any]: + """Load configuration from file.""" + if os.path.exists(self.config_path): + try: + with open(self.config_path, 'r', encoding='utf-8') as f: + return json.load(f) + except Exception as e: + logger.warning(f"Failed to load config: {e}") + return {} + + def _save_config(self): + """Save configuration to file.""" + try: + with open(self.config_path, 'w', encoding='utf-8') as f: + json.dump(self.config, f, indent=2) + except Exception as e: + logger.error(f"Failed to save config: {e}") + + def set_stop_flag(self, stop_flag): + """Set the stop flag for checking interruptions""" + self.stop_flag = stop_flag + self._stopped = False + + def set_log_callback(self, log_callback): + """Set log callback for GUI integration""" + self.log_callback = log_callback + + def _check_stop(self) -> bool: + """Check if stop has been requested""" + if self._stopped: + return True + if self.stop_flag and self.stop_flag.is_set(): + self._stopped = True + return True + # Check global manga translator cancellation + try: + from manga_translator import MangaTranslator + if MangaTranslator.is_globally_cancelled(): + self._stopped = True + return True + except Exception: + pass + return False + + def _log(self, message: str, level: str = "info"): + """Log message with stop suppression""" + # Suppress logs when stopped (allow only essential stop confirmation messages) + if self._check_stop(): + essential_stop_keywords = [ + "⏹️ Translation stopped by user", + "⏹️ Bubble detection stopped", + "cleanup", "🧹" + ] + if not any(keyword in message for keyword in essential_stop_keywords): + return + + if self.log_callback: + self.log_callback(message, level) + else: + logger.info(message) if level == 'info' else getattr(logger, level, logger.info)(message) + + def reset_stop_flags(self): + """Reset stop flags when starting new processing""" + self._stopped = False + + def load_model(self, model_path: str, force_reload: bool = False) -> bool: + """ + Load a YOLOv8 model for bubble detection. + + Args: + model_path: Path to model file (.pt, .onnx, or .torchscript) + force_reload: Force reload even if model is already loaded + + Returns: + True if model loaded successfully, False otherwise + """ + try: + # If given a Hugging Face repo ID (e.g., 'owner/name'), fetch detector.onnx into models/ + if model_path and (('/' in model_path) and not os.path.exists(model_path)): + try: + from huggingface_hub import hf_hub_download + os.makedirs(self.cache_dir, exist_ok=True) + logger.info(f"📥 Resolving repo '{model_path}' to detector.onnx in {self.cache_dir}...") + resolved = hf_hub_download(repo_id=model_path, filename='detector.onnx', cache_dir=self.cache_dir, local_dir=self.cache_dir, local_dir_use_symlinks=False) + if resolved and os.path.exists(resolved): + model_path = resolved + logger.info(f"✅ Downloaded detector.onnx to: {model_path}") + except Exception as repo_err: + logger.error(f"Failed to download from repo '{model_path}': {repo_err}") + if not os.path.exists(model_path): + logger.error(f"Model file not found: {model_path}") + return False + + # Check if it's the same model already loaded + if self.model_loaded and not force_reload: + last_path = self.config.get('last_model_path', '') + if last_path == model_path: + logger.info("Model already loaded (same path)") + return True + else: + logger.info(f"Model path changed from {last_path} to {model_path}, reloading...") + force_reload = True + + # Clear previous model if force reload + if force_reload: + logger.info("Force reloading model...") + self.model = None + self.onnx_session = None + self.model_loaded = False + self.model_type = None + + logger.info(f"📥 Loading bubble detection model: {model_path}") + + # Determine model type by extension + ext = Path(model_path).suffix.lower() + + if ext in ['.pt', '.pth']: + if not YOLO_AVAILABLE: + logger.warning("Ultralytics package not available in this build") + logger.info("Bubble detection will be disabled - this is normal for lightweight builds") + # Don't return False immediately, try other fallbacks + self.model_loaded = False + return False + + # Load YOLOv8 model + try: + self.model = YOLO(model_path) + self.model_type = 'yolo' + + # Set to eval mode + if hasattr(self.model, 'model'): + self.model.model.eval() + + # Move to GPU if available + if self.use_gpu and TORCH_AVAILABLE: + try: + self.model.to('cuda') + except Exception as gpu_error: + logger.warning(f"Could not move model to GPU: {gpu_error}") + + logger.info("✅ YOLOv8 model loaded successfully") + # Apply optional FP16 precision to reduce VRAM if enabled + if self.quantize_enabled and self.use_gpu and TORCH_AVAILABLE: + try: + m = self.model.model if hasattr(self.model, 'model') else self.model + m.half() + logger.info("🔻 Applied FP16 precision to YOLO model (GPU)") + except Exception as _e: + logger.warning(f"Could not switch YOLO model to FP16: {_e}") + + except Exception as yolo_error: + logger.error(f"Failed to load YOLO model: {yolo_error}") + return False + + elif ext == '.onnx': + if not ONNX_AVAILABLE: + logger.warning("ONNX Runtime not available in this build") + logger.info("ONNX model support disabled - this is normal for lightweight builds") + return False + + try: + # Load ONNX model + providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if self.use_gpu else ['CPUExecutionProvider'] + session_path = model_path + if self.quantize_enabled: + try: + from onnxruntime.quantization import quantize_dynamic, QuantType + quant_path = os.path.splitext(model_path)[0] + ".int8.onnx" + if not os.path.exists(quant_path) or os.environ.get('FORCE_ONNX_REBUILD', 'false').lower() == 'true': + logger.info("🔻 Quantizing ONNX model weights to INT8 (dynamic)...") + quantize_dynamic(model_input=model_path, model_output=quant_path, weight_type=QuantType.QInt8, op_types_to_quantize=['Conv', 'MatMul']) + session_path = quant_path + self.config['last_onnx_quantized_path'] = quant_path + self._save_config() + logger.info(f"✅ Using quantized ONNX model: {quant_path}") + except Exception as qe: + logger.warning(f"ONNX quantization not applied: {qe}") + # Use conservative ORT memory options to reduce RAM growth + so = ort.SessionOptions() + try: + so.enable_mem_pattern = False + so.enable_cpu_mem_arena = False + except Exception: + pass + self.onnx_session = ort.InferenceSession(session_path, sess_options=so, providers=providers) + self.model_type = 'onnx' + + logger.info("✅ ONNX model loaded successfully") + + except Exception as onnx_error: + logger.error(f"Failed to load ONNX model: {onnx_error}") + return False + + elif ext == '.torchscript': + if not TORCH_AVAILABLE: + logger.warning("PyTorch not available in this build") + logger.info("TorchScript model support disabled - this is normal for lightweight builds") + return False + + try: + # Add safety check for torch being None + if torch is None: + logger.error("PyTorch module is None - cannot load TorchScript model") + return False + + # Load TorchScript model + self.model = torch.jit.load(model_path, map_location='cpu') + self.model.eval() + self.model_type = 'torch' + + if self.use_gpu: + try: + self.model = self.model.cuda() + except Exception as gpu_error: + logger.warning(f"Could not move TorchScript model to GPU: {gpu_error}") + + logger.info("✅ TorchScript model loaded successfully") + + # Optional FP16 precision on GPU + if self.quantize_enabled and self.use_gpu and TORCH_AVAILABLE: + try: + self.model = self.model.half() + logger.info("🔻 Applied FP16 precision to TorchScript model (GPU)") + except Exception as _e: + logger.warning(f"Could not switch TorchScript model to FP16: {_e}") + + except Exception as torch_error: + logger.error(f"Failed to load TorchScript model: {torch_error}") + return False + + else: + logger.error(f"Unsupported model format: {ext}") + logger.info("Supported formats: .pt/.pth (YOLOv8), .onnx (ONNX), .torchscript (TorchScript)") + return False + + # Only set loaded if we actually succeeded + self.model_loaded = True + self.config['last_model_path'] = model_path + self.config['model_type'] = self.model_type + self._save_config() + + return True + + except Exception as e: + logger.error(f"Failed to load model: {e}") + logger.error(traceback.format_exc()) + self.model_loaded = False + + # Provide helpful context for .exe users + logger.info("Note: If running from .exe, some ML libraries may not be included") + logger.info("This is normal for lightweight builds - bubble detection will be disabled") + + return False + + def load_rtdetr_model(self, model_path: str = None, model_id: str = None, force_reload: bool = False) -> bool: + """ + Load RT-DETR model for advanced bubble and text detection. + This implementation avoids the 'meta tensor' copy error by: + - Serializing the entire load under a class lock (no concurrent loads) + - Loading directly onto the target device (CUDA if available) via device_map='auto' + - Avoiding .to() on a potentially-meta model; no device migration post-load + + Args: + model_path: Optional path to local model + model_id: Optional HuggingFace model ID (default: 'ogkalu/comic-text-and-bubble-detector') + force_reload: Force reload even if already loaded + + Returns: + True if successful, False otherwise + """ + if not TRANSFORMERS_AVAILABLE: + logger.error("Transformers library required for RT-DETR. Install with: pip install transformers") + return False + + if not PIL_AVAILABLE: + logger.error("PIL required for RT-DETR. Install with: pip install pillow") + return False + + if self.rtdetr_loaded and not force_reload: + logger.info("RT-DETR model already loaded") + return True + + # Fast path: if shared already loaded and not forcing reload, attach + if BubbleDetector._rtdetr_loaded and not force_reload: + self.rtdetr_model = BubbleDetector._rtdetr_shared_model + self.rtdetr_processor = BubbleDetector._rtdetr_shared_processor + self.rtdetr_loaded = True + logger.info("RT-DETR model attached from shared cache") + return True + + # Serialize the ENTIRE loading sequence to avoid concurrent init issues + with BubbleDetector._rtdetr_init_lock: + try: + # Re-check after acquiring lock + if BubbleDetector._rtdetr_loaded and not force_reload: + self.rtdetr_model = BubbleDetector._rtdetr_shared_model + self.rtdetr_processor = BubbleDetector._rtdetr_shared_processor + self.rtdetr_loaded = True + logger.info("RT-DETR model attached from shared cache (post-lock)") + return True + + # Use custom model_id if provided, otherwise use default + repo_id = model_id if model_id else self.rtdetr_repo + logger.info(f"📥 Loading RT-DETR model from {repo_id}...") + + # Ensure TorchDynamo/compile doesn't interfere on some builds + try: + os.environ.setdefault('TORCHDYNAMO_DISABLE', '1') + except Exception: + pass + + # Decide device strategy + gpu_available = bool(TORCH_AVAILABLE and hasattr(torch, 'cuda') and torch.cuda.is_available()) + device_map = 'auto' if gpu_available else None + # Choose dtype + dtype = None + if TORCH_AVAILABLE: + try: + dtype = torch.float16 if gpu_available else torch.float32 + except Exception: + dtype = None + low_cpu = True if gpu_available else False + + # Load processor (once) + self.rtdetr_processor = RTDetrImageProcessor.from_pretrained( + repo_id, + size={"width": 640, "height": 640}, + cache_dir=self.cache_dir if not model_path else None + ) + + # Prepare kwargs for from_pretrained + from_kwargs = { + 'cache_dir': self.cache_dir if not model_path else None, + 'low_cpu_mem_usage': low_cpu, + 'device_map': device_map, + } + if dtype is not None: + from_kwargs['dtype'] = dtype + + # First attempt: load directly to target (CUDA if available) + try: + self.rtdetr_model = RTDetrForObjectDetection.from_pretrained( + model_path if model_path else repo_id, + **from_kwargs, + ) + except Exception as primary_err: + # Fallback to a simple CPU load (no device move) if CUDA path fails + logger.warning(f"RT-DETR primary load failed ({primary_err}); retrying on CPU...") + from_kwargs_fallback = { + 'cache_dir': self.cache_dir if not model_path else None, + 'low_cpu_mem_usage': False, + 'device_map': None, + } + if TORCH_AVAILABLE: + from_kwargs_fallback['dtype'] = torch.float32 + self.rtdetr_model = RTDetrForObjectDetection.from_pretrained( + model_path if model_path else repo_id, + **from_kwargs_fallback, + ) + + # Optional dynamic quantization for linear layers (CPU only) + if self.quantize_enabled and TORCH_AVAILABLE and (not gpu_available): + try: + try: + import torch.ao.quantization as tq + quantize_dynamic = tq.quantize_dynamic # type: ignore + except Exception: + import torch.quantization as tq # type: ignore + quantize_dynamic = tq.quantize_dynamic # type: ignore + self.rtdetr_model = quantize_dynamic(self.rtdetr_model, {torch.nn.Linear}, dtype=torch.qint8) + logger.info("🔻 Applied dynamic INT8 quantization to RT-DETR linear layers (CPU)") + except Exception as qe: + logger.warning(f"RT-DETR dynamic quantization skipped: {qe}") + + # Finalize + self.rtdetr_model.eval() + + # Sanity check: ensure no parameter is left on 'meta' device + try: + for n, p in self.rtdetr_model.named_parameters(): + dev = getattr(p, 'device', None) + if dev is not None and getattr(dev, 'type', '') == 'meta': + raise RuntimeError(f"Parameter {n} is on 'meta' device after load") + except Exception as e: + logger.error(f"RT-DETR load sanity check failed: {e}") + self.rtdetr_loaded = False + return False + + # Publish shared cache + BubbleDetector._rtdetr_shared_model = self.rtdetr_model + BubbleDetector._rtdetr_shared_processor = self.rtdetr_processor + BubbleDetector._rtdetr_loaded = True + BubbleDetector._rtdetr_repo_id = repo_id + + self.rtdetr_loaded = True + + # Save the model ID that was used + self.config['rtdetr_loaded'] = True + self.config['rtdetr_model_id'] = repo_id + self._save_config() + + loc = 'CUDA' if gpu_available else 'CPU' + logger.info(f"✅ RT-DETR model loaded successfully ({loc})") + logger.info(" Classes: Empty bubbles, Text bubbles, Free text") + + # Auto-convert to ONNX for RT-DETR only if explicitly enabled + if os.environ.get('AUTO_CONVERT_RTDETR_ONNX', 'false').lower() == 'true': + onnx_path = os.path.join(self.cache_dir, 'rtdetr_comic.onnx') + if self.convert_to_onnx('rtdetr', onnx_path): + logger.info("🚀 RT-DETR converted to ONNX for faster inference") + # Store ONNX path for later use + self.config['rtdetr_onnx_path'] = onnx_path + self._save_config() + # Optionally quantize ONNX for reduced RAM + if self.onnx_quantize_enabled: + try: + from onnxruntime.quantization import quantize_dynamic, QuantType + quant_path = os.path.splitext(onnx_path)[0] + ".int8.onnx" + if not os.path.exists(quant_path) or os.environ.get('FORCE_ONNX_REBUILD', 'false').lower() == 'true': + logger.info("🔻 Quantizing RT-DETR ONNX to INT8 (dynamic)...") + quantize_dynamic(model_input=onnx_path, model_output=quant_path, weight_type=QuantType.QInt8, op_types_to_quantize=['Conv', 'MatMul']) + self.config['rtdetr_onnx_quantized_path'] = quant_path + self._save_config() + logger.info(f"✅ Quantized RT-DETR ONNX saved to: {quant_path}") + except Exception as qe: + logger.warning(f"ONNX quantization for RT-DETR skipped: {qe}") + else: + logger.info("ℹ️ Skipping RT-DETR ONNX export (converter not supported in current environment)") + + return True + except Exception as e: + logger.error(f"❌ Failed to load RT-DETR: {e}") + self.rtdetr_loaded = False + return False + + def check_rtdetr_available(self, model_id: str = None) -> bool: + """ + Check if RT-DETR model is available (cached). + + Args: + model_id: Optional HuggingFace model ID + + Returns: + True if model is cached and available + """ + try: + from pathlib import Path + + # Use provided model_id or default + repo_id = model_id if model_id else self.rtdetr_repo + + # Check HuggingFace cache + cache_dir = Path.home() / ".cache" / "huggingface" / "hub" + model_id_formatted = repo_id.replace("/", "--") + + # Look for model folder + model_folders = list(cache_dir.glob(f"models--{model_id_formatted}*")) + + if model_folders: + for folder in model_folders: + if (folder / "snapshots").exists(): + snapshots = list((folder / "snapshots").iterdir()) + if snapshots: + return True + + return False + + except Exception: + return False + + def detect_bubbles(self, + image_path: str, + confidence: float = None, + iou_threshold: float = None, + max_detections: int = None, + use_rtdetr: bool = None) -> List[Tuple[int, int, int, int]]: + """ + Detect speech bubbles in an image (backward compatible method). + + Args: + image_path: Path to image file + confidence: Minimum confidence threshold (0-1) + iou_threshold: IOU threshold for NMS (0-1) + max_detections: Maximum number of detections to return + use_rtdetr: If True, use RT-DETR instead of YOLOv8 (if available) + + Returns: + List of bubble bounding boxes as (x, y, width, height) tuples + """ + # Check for stop at start + if self._check_stop(): + self._log("⏹️ Bubble detection stopped by user", "warning") + return [] + + # Decide which model to use + if use_rtdetr is None: + # Auto-select: prefer RT-DETR if available + use_rtdetr = self.rtdetr_loaded + + if use_rtdetr: + # Prefer ONNX backend if available, else PyTorch + if getattr(self, 'rtdetr_onnx_loaded', False): + results = self.detect_with_rtdetr_onnx( + image_path=image_path, + confidence=confidence, + return_all_bubbles=True + ) + return results + if self.rtdetr_loaded: + results = self.detect_with_rtdetr( + image_path=image_path, + confidence=confidence, + return_all_bubbles=True + ) + return results + + # Original YOLOv8 detection + if not self.model_loaded: + logger.error("No model loaded. Call load_model() first.") + return [] + + # Use defaults if not specified + confidence = confidence or self.default_confidence + iou_threshold = iou_threshold or self.default_iou_threshold + max_detections = max_detections or self.default_max_detections + + try: + # Load image + image = cv2.imread(image_path) + if image is None: + logger.error(f"Failed to load image: {image_path}") + return [] + + h, w = image.shape[:2] + self._log(f"🔍 Detecting bubbles in {w}x{h} image") + + # Check for stop before inference + if self._check_stop(): + self._log("⏹️ Bubble detection inference stopped by user", "warning") + return [] + + if self.model_type == 'yolo': + # YOLOv8 inference + results = self.model( + image_path, + conf=confidence, + iou=iou_threshold, + max_det=min(max_detections, getattr(self, 'max_det_yolo', max_detections)), + verbose=False + ) + + bubbles = [] + for r in results: + if r.boxes is not None: + for box in r.boxes: + # Get box coordinates + x1, y1, x2, y2 = box.xyxy[0].cpu().numpy() + x, y = int(x1), int(y1) + width = int(x2 - x1) + height = int(y2 - y1) + + # Get confidence + conf = float(box.conf[0]) + + # Add to list + if len(bubbles) < max_detections: + bubbles.append((x, y, width, height)) + + logger.debug(f" Bubble: ({x},{y}) {width}x{height} conf={conf:.2f}") + + elif self.model_type == 'onnx': + # ONNX inference + bubbles = self._detect_with_onnx(image, confidence, iou_threshold, max_detections) + + elif self.model_type == 'torch': + # TorchScript inference + bubbles = self._detect_with_torchscript(image, confidence, iou_threshold, max_detections) + + else: + logger.error(f"Unknown model type: {self.model_type}") + return [] + + logger.info(f"✅ Detected {len(bubbles)} speech bubbles") + time.sleep(0.1) # Brief pause for stability + logger.debug("💤 Bubble detection pausing briefly for stability") + return bubbles + + except Exception as e: + logger.error(f"Detection failed: {e}") + logger.error(traceback.format_exc()) + return [] + + def detect_with_rtdetr(self, + image_path: str = None, + image: np.ndarray = None, + confidence: float = None, + return_all_bubbles: bool = False) -> Any: + """ + Detect using RT-DETR model with 3-class detection (PyTorch backend). + + Args: + image_path: Path to image file + image: Image array (BGR format) + confidence: Confidence threshold + return_all_bubbles: If True, return list of bubble boxes (for compatibility) + If False, return dict with all classes + + Returns: + List of bubbles if return_all_bubbles=True, else dict with classes + """ + # Check for stop at start + if self._check_stop(): + self._log("⏹️ RT-DETR detection stopped by user", "warning") + if return_all_bubbles: + return [] + return {'bubbles': [], 'text_bubbles': [], 'text_free': []} + + if not self.rtdetr_loaded: + self._log("RT-DETR not loaded. Call load_rtdetr_model() first.", "warning") + if return_all_bubbles: + return [] + return {'bubbles': [], 'text_bubbles': [], 'text_free': []} + + confidence = confidence or self.default_confidence + + try: + # Load image + if image_path: + image = cv2.imread(image_path) + elif image is None: + logger.error("No image provided") + if return_all_bubbles: + return [] + return {'bubbles': [], 'text_bubbles': [], 'text_free': []} + + # Convert BGR to RGB for PIL + image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + pil_image = Image.fromarray(image_rgb) + + # Prepare image for model + inputs = self.rtdetr_processor(images=pil_image, return_tensors="pt") + + # Move inputs to the same device as the model and match model dtype for floating tensors + model_device = next(self.rtdetr_model.parameters()).device if self.rtdetr_model is not None else (torch.device('cpu') if TORCH_AVAILABLE else 'cpu') + model_dtype = None + if TORCH_AVAILABLE and self.rtdetr_model is not None: + try: + model_dtype = next(self.rtdetr_model.parameters()).dtype + except Exception: + model_dtype = None + + if TORCH_AVAILABLE: + new_inputs = {} + for k, v in inputs.items(): + if isinstance(v, torch.Tensor): + v = v.to(model_device) + if model_dtype is not None and torch.is_floating_point(v): + v = v.to(model_dtype) + new_inputs[k] = v + inputs = new_inputs + + # Run inference with autocast when model is half/bfloat16 on CUDA + use_amp = TORCH_AVAILABLE and hasattr(model_device, 'type') and model_device.type == 'cuda' and (model_dtype in (torch.float16, torch.bfloat16)) + autocast_dtype = model_dtype if model_dtype in (torch.float16, torch.bfloat16) else None + + with torch.no_grad(): + if use_amp and autocast_dtype is not None: + with torch.autocast('cuda', dtype=autocast_dtype): + outputs = self.rtdetr_model(**inputs) + else: + outputs = self.rtdetr_model(**inputs) + + # Brief pause for stability after inference + time.sleep(0.1) + logger.debug("💤 RT-DETR inference pausing briefly for stability") + + # Post-process results + target_sizes = torch.tensor([pil_image.size[::-1]]) if TORCH_AVAILABLE else None + if TORCH_AVAILABLE and hasattr(model_device, 'type') and model_device.type == "cuda": + target_sizes = target_sizes.to(model_device) + + results = self.rtdetr_processor.post_process_object_detection( + outputs, + target_sizes=target_sizes, + threshold=confidence + )[0] + + # Apply per-detector cap if configured + cap = getattr(self, 'max_det_rtdetr', self.default_max_detections) + if cap and len(results['boxes']) > cap: + # Keep top-scoring first + scores = results['scores'] + top_idx = scores.topk(k=cap).indices if hasattr(scores, 'topk') else range(cap) + results = { + 'boxes': [results['boxes'][i] for i in top_idx], + 'scores': [results['scores'][i] for i in top_idx], + 'labels': [results['labels'][i] for i in top_idx] + } + + logger.info(f"📊 RT-DETR found {len(results['boxes'])} detections above {confidence:.2f} confidence") + + # Apply NMS to remove duplicate detections + # Group detections by class + class_detections = {self.CLASS_BUBBLE: [], self.CLASS_TEXT_BUBBLE: [], self.CLASS_TEXT_FREE: []} + + for box, score, label in zip(results['boxes'], results['scores'], results['labels']): + x1, y1, x2, y2 = map(float, box.tolist()) + label_id = label.item() + if label_id in class_detections: + class_detections[label_id].append((x1, y1, x2, y2, float(score.item()))) + + # Apply NMS per class to remove duplicates + def compute_iou(box1, box2): + """Compute IoU between two boxes (x1, y1, x2, y2)""" + x1_1, y1_1, x2_1, y2_1 = box1[:4] + x1_2, y1_2, x2_2, y2_2 = box2[:4] + + # Intersection + x_left = max(x1_1, x1_2) + y_top = max(y1_1, y1_2) + x_right = min(x2_1, x2_2) + y_bottom = min(y2_1, y2_2) + + if x_right < x_left or y_bottom < y_top: + return 0.0 + + intersection = (x_right - x_left) * (y_bottom - y_top) + + # Union + area1 = (x2_1 - x1_1) * (y2_1 - y1_1) + area2 = (x2_2 - x1_2) * (y2_2 - y1_2) + union = area1 + area2 - intersection + + return intersection / union if union > 0 else 0.0 + + def apply_nms(boxes_with_scores, iou_threshold=0.45): + """Apply Non-Maximum Suppression""" + if not boxes_with_scores: + return [] + + # Sort by score (descending) + sorted_boxes = sorted(boxes_with_scores, key=lambda x: x[4], reverse=True) + keep = [] + + while sorted_boxes: + # Keep the box with highest score + current = sorted_boxes.pop(0) + keep.append(current) + + # Remove boxes with high IoU + sorted_boxes = [box for box in sorted_boxes if compute_iou(current, box) < iou_threshold] + + return keep + + # Apply NMS and organize by class + detections = { + 'bubbles': [], # Empty speech bubbles + 'text_bubbles': [], # Bubbles with text + 'text_free': [] # Text without bubbles + } + + for class_id, boxes_list in class_detections.items(): + nms_boxes = apply_nms(boxes_list, iou_threshold=self.default_iou_threshold) + + for x1, y1, x2, y2, scr in nms_boxes: + width = int(x2 - x1) + height = int(y2 - y1) + # Store as (x, y, width, height) to match YOLOv8 format + bbox = (int(x1), int(y1), width, height) + + if class_id == self.CLASS_BUBBLE: + detections['bubbles'].append(bbox) + elif class_id == self.CLASS_TEXT_BUBBLE: + detections['text_bubbles'].append(bbox) + elif class_id == self.CLASS_TEXT_FREE: + detections['text_free'].append(bbox) + + # Stop early if we hit the configured cap across all classes + total_count = len(detections['bubbles']) + len(detections['text_bubbles']) + len(detections['text_free']) + if total_count >= (self.config.get('manga_settings', {}).get('ocr', {}).get('bubble_max_detections', self.default_max_detections) if isinstance(self.config, dict) else self.default_max_detections): + break + + # Log results + total = len(detections['bubbles']) + len(detections['text_bubbles']) + len(detections['text_free']) + logger.info(f"✅ RT-DETR detected {total} objects:") + logger.info(f" - Empty bubbles: {len(detections['bubbles'])}") + logger.info(f" - Text bubbles: {len(detections['text_bubbles'])}") + logger.info(f" - Free text: {len(detections['text_free'])}") + + # Return format based on compatibility mode + if return_all_bubbles: + # Return all bubbles (empty + with text) for backward compatibility + all_bubbles = detections['bubbles'] + detections['text_bubbles'] + return all_bubbles + else: + return detections + + except Exception as e: + logger.error(f"RT-DETR detection failed: {e}") + logger.error(traceback.format_exc()) + if return_all_bubbles: + return [] + return {'bubbles': [], 'text_bubbles': [], 'text_free': []} + + def detect_all_text_regions(self, image_path: str = None, image: np.ndarray = None) -> List[Tuple[int, int, int, int]]: + """ + Detect all text regions using RT-DETR (both in bubbles and free text). + + Returns: + List of bounding boxes for all text regions + """ + if not self.rtdetr_loaded: + logger.warning("RT-DETR required for text detection") + return [] + + detections = self.detect_with_rtdetr(image_path=image_path, image=image, return_all_bubbles=False) + + # Combine text bubbles and free text + all_text = detections['text_bubbles'] + detections['text_free'] + + logger.info(f"📝 Found {len(all_text)} text regions total") + return all_text + + def _detect_with_onnx(self, image: np.ndarray, confidence: float, + iou_threshold: float, max_detections: int) -> List[Tuple[int, int, int, int]]: + """Run detection using ONNX model.""" + # Preprocess image + img_size = 640 # Standard YOLOv8 input size + img_resized = cv2.resize(image, (img_size, img_size)) + img_norm = img_resized.astype(np.float32) / 255.0 + img_transposed = np.transpose(img_norm, (2, 0, 1)) + img_batch = np.expand_dims(img_transposed, axis=0) + + # Run inference + input_name = self.onnx_session.get_inputs()[0].name + outputs = self.onnx_session.run(None, {input_name: img_batch}) + + # Process outputs (YOLOv8 format) + predictions = outputs[0][0] # Remove batch dimension + + # Filter by confidence and apply NMS + bubbles = [] + boxes = [] + scores = [] + + for pred in predictions.T: # Transpose to get predictions per detection + if len(pred) >= 5: + x_center, y_center, width, height, obj_conf = pred[:5] + + if obj_conf >= confidence: + # Convert to corner coordinates + x1 = x_center - width / 2 + y1 = y_center - height / 2 + + # Scale to original image size + h, w = image.shape[:2] + x1 = int(x1 * w / img_size) + y1 = int(y1 * h / img_size) + width = int(width * w / img_size) + height = int(height * h / img_size) + + boxes.append([x1, y1, x1 + width, y1 + height]) + scores.append(float(obj_conf)) + + # Apply NMS + if boxes: + indices = cv2.dnn.NMSBoxes(boxes, scores, confidence, iou_threshold) + if len(indices) > 0: + indices = indices.flatten()[:max_detections] + for i in indices: + x1, y1, x2, y2 = boxes[i] + bubbles.append((x1, y1, x2 - x1, y2 - y1)) + + return bubbles + + def _detect_with_torchscript(self, image: np.ndarray, confidence: float, + iou_threshold: float, max_detections: int) -> List[Tuple[int, int, int, int]]: + """Run detection using TorchScript model.""" + # Similar to ONNX but using PyTorch tensors + img_size = 640 + img_resized = cv2.resize(image, (img_size, img_size)) + img_norm = img_resized.astype(np.float32) / 255.0 + img_tensor = torch.from_numpy(img_norm).permute(2, 0, 1).unsqueeze(0) + + if self.use_gpu: + img_tensor = img_tensor.cuda() + + with torch.no_grad(): + outputs = self.model(img_tensor) + + # Process outputs similar to ONNX + # Implementation depends on exact model output format + # This is a placeholder - adjust based on your model + return [] + + def visualize_detections(self, image_path: str, bubbles: List[Tuple[int, int, int, int]] = None, + output_path: str = None, use_rtdetr: bool = False) -> np.ndarray: + """ + Visualize detected bubbles on the image. + + Args: + image_path: Path to original image + bubbles: List of bubble bounding boxes (if None, will detect) + output_path: Optional path to save visualization + use_rtdetr: Use RT-DETR for visualization with class colors + + Returns: + Image with drawn bounding boxes + """ + image = cv2.imread(image_path) + if image is None: + logger.error(f"Failed to load image: {image_path}") + return None + + vis_image = image.copy() + + if use_rtdetr and self.rtdetr_loaded: + # RT-DETR visualization with different colors per class + detections = self.detect_with_rtdetr(image_path=image_path, return_all_bubbles=False) + + # Colors for each class + colors = { + 'bubbles': (0, 255, 0), # Green for empty bubbles + 'text_bubbles': (255, 0, 0), # Blue for text bubbles + 'text_free': (0, 0, 255) # Red for free text + } + + # Draw detections + for class_name, bboxes in detections.items(): + color = colors[class_name] + + for i, (x, y, w, h) in enumerate(bboxes): + # Draw rectangle + cv2.rectangle(vis_image, (x, y), (x + w, y + h), color, 2) + + # Add label + label = f"{class_name.replace('_', ' ').title()} {i+1}" + label_size, _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1) + cv2.rectangle(vis_image, (x, y - label_size[1] - 4), + (x + label_size[0], y), color, -1) + cv2.putText(vis_image, label, (x, y - 2), + cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1) + else: + # Original YOLOv8 visualization + if bubbles is None: + bubbles = self.detect_bubbles(image_path) + + # Draw bounding boxes + for i, (x, y, w, h) in enumerate(bubbles): + # Draw rectangle + color = (0, 255, 0) # Green + thickness = 2 + cv2.rectangle(vis_image, (x, y), (x + w, y + h), color, thickness) + + # Add label + label = f"Bubble {i+1}" + label_size, _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1) + cv2.rectangle(vis_image, (x, y - label_size[1] - 4), (x + label_size[0], y), color, -1) + cv2.putText(vis_image, label, (x, y - 2), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1) + + # Save if output path provided + if output_path: + cv2.imwrite(output_path, vis_image) + logger.info(f"💾 Visualization saved to: {output_path}") + + return vis_image + + def convert_to_onnx(self, model_path: str, output_path: str = None) -> bool: + """ + Convert a YOLOv8 or RT-DETR model to ONNX format. + + Args: + model_path: Path to model file or 'rtdetr' for loaded RT-DETR + output_path: Path for ONNX output (auto-generated if None) + + Returns: + True if conversion successful, False otherwise + """ + try: + logger.info(f"🔄 Converting {model_path} to ONNX...") + + # Generate output path if not provided + if output_path is None: + if model_path == 'rtdetr' and self.rtdetr_loaded: + base_name = 'rtdetr_comic' + else: + base_name = Path(model_path).stem + output_path = os.path.join(self.cache_dir, f"{base_name}.onnx") + + # Check if already exists + if os.path.exists(output_path) and not os.environ.get('FORCE_ONNX_REBUILD', 'false').lower() == 'true': + logger.info(f"✅ ONNX model already exists: {output_path}") + return True + + # Handle RT-DETR conversion + if model_path == 'rtdetr' and self.rtdetr_loaded: + if not TORCH_AVAILABLE: + logger.error("PyTorch required for RT-DETR ONNX conversion") + return False + + # RT-DETR specific conversion + self.rtdetr_model.eval() + + # Create dummy input (pixel values): BxCxHxW + dummy_input = torch.randn(1, 3, 640, 640) + if self.device == 'cuda': + dummy_input = dummy_input.to('cuda') + + # Wrap the model to return only tensors (logits, pred_boxes) + class _RTDetrExportWrapper(torch.nn.Module): + def __init__(self, mdl): + super().__init__() + self.mdl = mdl + def forward(self, images): + out = self.mdl(pixel_values=images) + # Handle dict/ModelOutput/tuple outputs + logits = None + boxes = None + try: + if isinstance(out, dict): + logits = out.get('logits', None) + boxes = out.get('pred_boxes', out.get('boxes', None)) + else: + logits = getattr(out, 'logits', None) + boxes = getattr(out, 'pred_boxes', getattr(out, 'boxes', None)) + except Exception: + pass + if (logits is None or boxes is None) and isinstance(out, (tuple, list)) and len(out) >= 2: + logits, boxes = out[0], out[1] + return logits, boxes + + wrapper = _RTDetrExportWrapper(self.rtdetr_model) + if self.device == 'cuda': + wrapper = wrapper.to('cuda') + + # Try PyTorch 2.x dynamo_export first (more tolerant of newer aten ops) + try: + success = False + try: + from torch.onnx import dynamo_export + try: + exp = dynamo_export(wrapper, dummy_input) + except TypeError: + # Older PyTorch dynamo_export may not support this calling convention + exp = dynamo_export(wrapper, dummy_input) + # exp may have save(); otherwise, it may expose model_proto + try: + exp.save(output_path) # type: ignore + success = True + except Exception: + try: + import onnx as _onnx + _onnx.save(exp.model_proto, output_path) # type: ignore + success = True + except Exception as _se: + logger.warning(f"dynamo_export produced model but could not save: {_se}") + except Exception as de: + logger.warning(f"dynamo_export failed; falling back to legacy exporter: {de}") + if success: + logger.info(f"✅ RT-DETR ONNX saved to: {output_path} (dynamo_export)") + return True + except Exception as de2: + logger.warning(f"dynamo_export path error: {de2}") + + # Legacy exporter with opset fallback + last_err = None + for opset in [19, 18, 17, 16, 15, 14, 13]: + try: + torch.onnx.export( + wrapper, + dummy_input, + output_path, + export_params=True, + opset_version=opset, + do_constant_folding=True, + input_names=['pixel_values'], + output_names=['logits', 'boxes'], + dynamic_axes={ + 'pixel_values': {0: 'batch', 2: 'height', 3: 'width'}, + 'logits': {0: 'batch'}, + 'boxes': {0: 'batch'} + } + ) + logger.info(f"✅ RT-DETR ONNX saved to: {output_path} (opset {opset})") + return True + except Exception as _e: + last_err = _e + try: + msg = str(_e) + except Exception: + msg = '' + logger.warning(f"RT-DETR ONNX export failed at opset {opset}: {msg}") + continue + + logger.error(f"All RT-DETR ONNX export attempts failed. Last error: {last_err}") + return False + + # Handle YOLOv8 conversion - FIXED + elif YOLO_AVAILABLE and os.path.exists(model_path): + logger.info(f"Loading YOLOv8 model from: {model_path}") + + # Load model + model = YOLO(model_path) + + # Export to ONNX - this returns the path to the exported model + logger.info("Exporting to ONNX format...") + exported_path = model.export(format='onnx', imgsz=640, simplify=True) + + # exported_path could be a string or Path object + exported_path = str(exported_path) if exported_path else None + + if exported_path and os.path.exists(exported_path): + # Move to desired location if different + if exported_path != output_path: + import shutil + logger.info(f"Moving ONNX from {exported_path} to {output_path}") + shutil.move(exported_path, output_path) + + logger.info(f"✅ YOLOv8 ONNX saved to: {output_path}") + return True + else: + # Fallback: check if it was created with expected name + expected_onnx = model_path.replace('.pt', '.onnx') + if os.path.exists(expected_onnx): + if expected_onnx != output_path: + import shutil + shutil.move(expected_onnx, output_path) + logger.info(f"✅ YOLOv8 ONNX saved to: {output_path}") + return True + else: + logger.error(f"ONNX export failed - no output file found") + return False + + else: + logger.error(f"Cannot convert {model_path}: Model not found or dependencies missing") + return False + + except Exception as e: + logger.error(f"Conversion failed: {e}") + # Avoid noisy full stack trace in production logs; return False gracefully + return False + + def batch_detect(self, image_paths: List[str], **kwargs) -> Dict[str, List[Tuple[int, int, int, int]]]: + """ + Detect bubbles in multiple images. + + Args: + image_paths: List of image paths + **kwargs: Detection parameters (confidence, iou_threshold, max_detections, use_rtdetr) + + Returns: + Dictionary mapping image paths to bubble lists + """ + results = {} + + for i, image_path in enumerate(image_paths): + logger.info(f"Processing image {i+1}/{len(image_paths)}: {os.path.basename(image_path)}") + bubbles = self.detect_bubbles(image_path, **kwargs) + results[image_path] = bubbles + + return results + + def unload(self, release_shared: bool = False): + """Release model resources held by this detector instance. + Args: + release_shared: If True, also clear class-level shared RT-DETR caches. + """ + try: + # Release instance-level models and sessions + try: + if getattr(self, 'onnx_session', None) is not None: + self.onnx_session = None + except Exception: + pass + try: + if getattr(self, 'rtdetr_onnx_session', None) is not None: + self.rtdetr_onnx_session = None + except Exception: + pass + for attr in ['model', 'rtdetr_model', 'rtdetr_processor']: + try: + if hasattr(self, attr): + setattr(self, attr, None) + except Exception: + pass + for flag in ['model_loaded', 'rtdetr_loaded', 'rtdetr_onnx_loaded']: + try: + if hasattr(self, flag): + setattr(self, flag, False) + except Exception: + pass + + # Optional: release shared caches + if release_shared: + try: + BubbleDetector._rtdetr_shared_model = None + BubbleDetector._rtdetr_shared_processor = None + BubbleDetector._rtdetr_loaded = False + except Exception: + pass + + # Free CUDA cache and trigger GC + try: + if TORCH_AVAILABLE and torch is not None and torch.cuda.is_available(): + torch.cuda.empty_cache() + except Exception: + pass + try: + import gc + gc.collect() + except Exception: + pass + except Exception: + # Best-effort only + pass + + def get_bubble_masks(self, image_path: str, bubbles: List[Tuple[int, int, int, int]]) -> np.ndarray: + """ + Create a mask image with bubble regions. + + Args: + image_path: Path to original image + bubbles: List of bubble bounding boxes + + Returns: + Binary mask with bubble regions as white (255) + """ + image = cv2.imread(image_path) + if image is None: + return None + + h, w = image.shape[:2] + mask = np.zeros((h, w), dtype=np.uint8) + + # Fill bubble regions + for x, y, bw, bh in bubbles: + cv2.rectangle(mask, (x, y), (x + bw, y + bh), 255, -1) + + return mask + + def filter_bubbles_by_size(self, bubbles: List[Tuple[int, int, int, int]], + min_area: int = 100, + max_area: int = None) -> List[Tuple[int, int, int, int]]: + """ + Filter bubbles by area. + + Args: + bubbles: List of bubble bounding boxes + min_area: Minimum area in pixels + max_area: Maximum area in pixels (None for no limit) + + Returns: + Filtered list of bubbles + """ + filtered = [] + + for x, y, w, h in bubbles: + area = w * h + if area >= min_area and (max_area is None or area <= max_area): + filtered.append((x, y, w, h)) + + return filtered + + def merge_overlapping_bubbles(self, bubbles: List[Tuple[int, int, int, int]], + overlap_threshold: float = 0.1) -> List[Tuple[int, int, int, int]]: + """ + Merge overlapping bubble detections. + + Args: + bubbles: List of bubble bounding boxes + overlap_threshold: Minimum overlap ratio to merge + + Returns: + Merged list of bubbles + """ + if not bubbles: + return [] + + # Convert to numpy array for easier manipulation + boxes = np.array([(x, y, x+w, y+h) for x, y, w, h in bubbles]) + + merged = [] + used = set() + + for i, box1 in enumerate(boxes): + if i in used: + continue + + # Start with current box + x1, y1, x2, y2 = box1 + + # Check for overlaps with remaining boxes + for j in range(i + 1, len(boxes)): + if j in used: + continue + + box2 = boxes[j] + + # Calculate intersection + ix1 = max(x1, box2[0]) + iy1 = max(y1, box2[1]) + ix2 = min(x2, box2[2]) + iy2 = min(y2, box2[3]) + + if ix1 < ix2 and iy1 < iy2: + # Calculate overlap ratio + intersection = (ix2 - ix1) * (iy2 - iy1) + area1 = (x2 - x1) * (y2 - y1) + area2 = (box2[2] - box2[0]) * (box2[3] - box2[1]) + overlap = intersection / min(area1, area2) + + if overlap >= overlap_threshold: + # Merge boxes + x1 = min(x1, box2[0]) + y1 = min(y1, box2[1]) + x2 = max(x2, box2[2]) + y2 = max(y2, box2[3]) + used.add(j) + + merged.append((int(x1), int(y1), int(x2 - x1), int(y2 - y1))) + + return merged + + # ============================ + # RT-DETR (ONNX) BACKEND + # ============================ + def load_rtdetr_onnx_model(self, model_id: str = None, force_reload: bool = False) -> bool: + """ + Load RT-DETR ONNX model using onnxruntime. Downloads detector.onnx and config.json + from the provided Hugging Face repo if not already cached. + """ + if not ONNX_AVAILABLE: + logger.error("ONNX Runtime not available for RT-DETR ONNX backend") + return False + try: + # If singleton mode and already loaded, just attach shared session + try: + adv = (self.config or {}).get('manga_settings', {}).get('advanced', {}) if isinstance(self.config, dict) else {} + singleton = bool(adv.get('use_singleton_models', True)) + except Exception: + singleton = True + if singleton and BubbleDetector._rtdetr_onnx_loaded and not force_reload and BubbleDetector._rtdetr_onnx_shared_session is not None: + self.rtdetr_onnx_session = BubbleDetector._rtdetr_onnx_shared_session + self.rtdetr_onnx_loaded = True + return True + + repo = model_id or self.rtdetr_onnx_repo + try: + from huggingface_hub import hf_hub_download + except Exception as e: + logger.error(f"huggingface-hub required to fetch RT-DETR ONNX: {e}") + return False + + # Ensure local models dir (use configured cache_dir directly: e.g., 'models') + cache_dir = self.cache_dir + os.makedirs(cache_dir, exist_ok=True) + + # Download files into models/ and avoid symlinks so the file is visible there + try: + _ = hf_hub_download(repo_id=repo, filename='config.json', cache_dir=cache_dir, local_dir=cache_dir, local_dir_use_symlinks=False) + except Exception: + pass + onnx_fp = hf_hub_download(repo_id=repo, filename='detector.onnx', cache_dir=cache_dir, local_dir=cache_dir, local_dir_use_symlinks=False) + BubbleDetector._rtdetr_onnx_model_path = onnx_fp + + # Pick providers: prefer CUDA if available; otherwise CPU. Do NOT use DML. + providers = ['CPUExecutionProvider'] + try: + avail = ort.get_available_providers() if ONNX_AVAILABLE else [] + if 'CUDAExecutionProvider' in avail: + providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] + except Exception: + pass + + # Session options with reduced memory arena and optional thread limiting in singleton mode + so = ort.SessionOptions() + try: + so.enable_mem_pattern = False + so.enable_cpu_mem_arena = False + except Exception: + pass + # If singleton models mode is enabled in config, limit ORT threading to reduce CPU spikes + try: + adv = (self.config or {}).get('manga_settings', {}).get('advanced', {}) if isinstance(self.config, dict) else {} + if bool(adv.get('use_singleton_models', True)): + so.intra_op_num_threads = 1 + so.inter_op_num_threads = 1 + try: + so.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL + except Exception: + pass + try: + so.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_BASIC + except Exception: + pass + except Exception: + pass + + # Create session (serialize creation in singleton mode to avoid device storms) + if singleton: + with BubbleDetector._rtdetr_onnx_init_lock: + # Re-check after acquiring lock + if BubbleDetector._rtdetr_onnx_loaded and BubbleDetector._rtdetr_onnx_shared_session is not None and not force_reload: + self.rtdetr_onnx_session = BubbleDetector._rtdetr_onnx_shared_session + self.rtdetr_onnx_loaded = True + return True + sess = ort.InferenceSession(onnx_fp, providers=providers, sess_options=so) + BubbleDetector._rtdetr_onnx_shared_session = sess + BubbleDetector._rtdetr_onnx_loaded = True + BubbleDetector._rtdetr_onnx_providers = providers + self.rtdetr_onnx_session = sess + self.rtdetr_onnx_loaded = True + else: + self.rtdetr_onnx_session = ort.InferenceSession(onnx_fp, providers=providers, sess_options=so) + self.rtdetr_onnx_loaded = True + logger.info("✅ RT-DETR (ONNX) model ready") + return True + except Exception as e: + logger.error(f"Failed to load RT-DETR ONNX: {e}") + self.rtdetr_onnx_session = None + self.rtdetr_onnx_loaded = False + return False + + def detect_with_rtdetr_onnx(self, + image_path: str = None, + image: np.ndarray = None, + confidence: float = 0.3, + return_all_bubbles: bool = False) -> Any: + """Detect using RT-DETR ONNX backend. + Returns bubbles list if return_all_bubbles else dict by classes similar to PyTorch path. + """ + if not self.rtdetr_onnx_loaded or self.rtdetr_onnx_session is None: + logger.warning("RT-DETR ONNX not loaded") + return [] if return_all_bubbles else {'bubbles': [], 'text_bubbles': [], 'text_free': []} + try: + # Acquire image + if image_path is not None: + import cv2 + image = cv2.imread(image_path) + if image is None: + raise RuntimeError(f"Failed to read image: {image_path}") + image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + else: + if image is None: + raise RuntimeError("No image provided") + # Assume image is BGR np.ndarray if from OpenCV + try: + import cv2 + image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + except Exception: + image_rgb = image + + # To PIL then resize 640x640 as in reference + from PIL import Image as _PILImage + pil_image = _PILImage.fromarray(image_rgb) + im_resized = pil_image.resize((640, 640)) + arr = np.asarray(im_resized, dtype=np.float32) / 255.0 + arr = np.transpose(arr, (2, 0, 1)) # (3,H,W) + im_data = arr[np.newaxis, ...] + + w, h = pil_image.size + orig_size = np.array([[w, h]], dtype=np.int64) + + # Run with a concurrency guard to prevent device hangs and limit memory usage + # Apply semaphore for ALL providers (not just DML) to control concurrency + providers = BubbleDetector._rtdetr_onnx_providers or [] + def _do_run(session): + return session.run(None, { + 'images': im_data, + 'orig_target_sizes': orig_size + }) + + # Always use semaphore to limit concurrent RT-DETR calls + acquired = False + try: + BubbleDetector._rtdetr_onnx_sema.acquire() + acquired = True + + # Special DML error handling + if 'DmlExecutionProvider' in providers: + try: + outputs = _do_run(self.rtdetr_onnx_session) + except Exception as dml_err: + msg = str(dml_err) + if '887A0005' in msg or '887A0006' in msg or 'Dml' in msg: + # Rebuild CPU session and retry once + try: + base_path = BubbleDetector._rtdetr_onnx_model_path + if base_path: + so = ort.SessionOptions() + so.enable_mem_pattern = False + so.enable_cpu_mem_arena = False + cpu_providers = ['CPUExecutionProvider'] + # Serialize rebuild + with BubbleDetector._rtdetr_onnx_init_lock: + sess = ort.InferenceSession(base_path, providers=cpu_providers, sess_options=so) + BubbleDetector._rtdetr_onnx_shared_session = sess + BubbleDetector._rtdetr_onnx_providers = cpu_providers + self.rtdetr_onnx_session = sess + outputs = _do_run(self.rtdetr_onnx_session) + else: + raise + except Exception: + raise + else: + raise + else: + # Non-DML providers - just run directly + outputs = _do_run(self.rtdetr_onnx_session) + finally: + if acquired: + try: + BubbleDetector._rtdetr_onnx_sema.release() + except Exception: + pass + + # outputs expected: labels, boxes, scores + labels, boxes, scores = outputs[:3] + if labels.ndim == 2 and labels.shape[0] == 1: + labels = labels[0] + if scores.ndim == 2 and scores.shape[0] == 1: + scores = scores[0] + if boxes.ndim == 3 and boxes.shape[0] == 1: + boxes = boxes[0] + + # Apply NMS to remove duplicate detections + # Group detections by class and apply NMS per class + class_detections = {self.CLASS_BUBBLE: [], self.CLASS_TEXT_BUBBLE: [], self.CLASS_TEXT_FREE: []} + + for lab, box, scr in zip(labels, boxes, scores): + if float(scr) < float(confidence): + continue + label_id = int(lab) + if label_id in class_detections: + x1, y1, x2, y2 = map(float, box) + class_detections[label_id].append((x1, y1, x2, y2, float(scr))) + + # Apply NMS per class to remove duplicates + def compute_iou(box1, box2): + """Compute IoU between two boxes (x1, y1, x2, y2)""" + x1_1, y1_1, x2_1, y2_1 = box1[:4] + x1_2, y1_2, x2_2, y2_2 = box2[:4] + + # Intersection + x_left = max(x1_1, x1_2) + y_top = max(y1_1, y1_2) + x_right = min(x2_1, x2_2) + y_bottom = min(y2_1, y2_2) + + if x_right < x_left or y_bottom < y_top: + return 0.0 + + intersection = (x_right - x_left) * (y_bottom - y_top) + + # Union + area1 = (x2_1 - x1_1) * (y2_1 - y1_1) + area2 = (x2_2 - x1_2) * (y2_2 - y1_2) + union = area1 + area2 - intersection + + return intersection / union if union > 0 else 0.0 + + def apply_nms(boxes_with_scores, iou_threshold=0.45): + """Apply Non-Maximum Suppression""" + if not boxes_with_scores: + return [] + + # Sort by score (descending) + sorted_boxes = sorted(boxes_with_scores, key=lambda x: x[4], reverse=True) + keep = [] + + while sorted_boxes: + # Keep the box with highest score + current = sorted_boxes.pop(0) + keep.append(current) + + # Remove boxes with high IoU + sorted_boxes = [box for box in sorted_boxes if compute_iou(current, box) < iou_threshold] + + return keep + + # Apply NMS and build final detections + detections = {'bubbles': [], 'text_bubbles': [], 'text_free': []} + bubbles_all = [] + + for class_id, boxes_list in class_detections.items(): + nms_boxes = apply_nms(boxes_list, iou_threshold=self.default_iou_threshold) + + for x1, y1, x2, y2, scr in nms_boxes: + bbox = (int(x1), int(y1), int(x2 - x1), int(y2 - y1)) + + if class_id == self.CLASS_BUBBLE: + detections['bubbles'].append(bbox) + bubbles_all.append(bbox) + elif class_id == self.CLASS_TEXT_BUBBLE: + detections['text_bubbles'].append(bbox) + bubbles_all.append(bbox) + elif class_id == self.CLASS_TEXT_FREE: + detections['text_free'].append(bbox) + + return bubbles_all if return_all_bubbles else detections + except Exception as e: + logger.error(f"RT-DETR ONNX detection failed: {e}") + return [] if return_all_bubbles else {'bubbles': [], 'text_bubbles': [], 'text_free': []} + + +# Standalone utility functions +def download_model_from_huggingface(repo_id: str = "ogkalu/comic-speech-bubble-detector-yolov8m", + filename: str = "comic-speech-bubble-detector-yolov8m.pt", + cache_dir: str = "models") -> str: + """ + Download model from Hugging Face Hub. + + Args: + repo_id: Hugging Face repository ID + filename: Model filename in the repository + cache_dir: Local directory to cache the model + + Returns: + Path to downloaded model file + """ + try: + from huggingface_hub import hf_hub_download + + os.makedirs(cache_dir, exist_ok=True) + + logger.info(f"📥 Downloading {filename} from {repo_id}...") + + model_path = hf_hub_download( + repo_id=repo_id, + filename=filename, + cache_dir=cache_dir, + local_dir=cache_dir + ) + + logger.info(f"✅ Model downloaded to: {model_path}") + return model_path + + except ImportError: + logger.error("huggingface-hub package required. Install with: pip install huggingface-hub") + return None + except Exception as e: + logger.error(f"Download failed: {e}") + return None + + +def download_rtdetr_model(cache_dir: str = "models") -> bool: + """ + Download RT-DETR model for advanced detection. + + Args: + cache_dir: Directory to cache the model + + Returns: + True if successful + """ + if not TRANSFORMERS_AVAILABLE: + logger.error("Transformers required. Install with: pip install transformers") + return False + + try: + logger.info("📥 Downloading RT-DETR model...") + from transformers import RTDetrForObjectDetection, RTDetrImageProcessor + + # This will download and cache the model + processor = RTDetrImageProcessor.from_pretrained( + "ogkalu/comic-text-and-bubble-detector", + cache_dir=cache_dir + ) + model = RTDetrForObjectDetection.from_pretrained( + "ogkalu/comic-text-and-bubble-detector", + cache_dir=cache_dir + ) + + logger.info("✅ RT-DETR model downloaded successfully") + return True + + except Exception as e: + logger.error(f"Download failed: {e}") + return False + + +# Example usage and testing +if __name__ == "__main__": + import sys + + # Create detector + detector = BubbleDetector() + + if len(sys.argv) > 1: + if sys.argv[1] == "download": + # Download model from Hugging Face + model_path = download_model_from_huggingface() + if model_path: + print(f"YOLOv8 model downloaded to: {model_path}") + + # Also download RT-DETR + if download_rtdetr_model(): + print("RT-DETR model downloaded") + + elif sys.argv[1] == "detect" and len(sys.argv) > 3: + # Detect bubbles in an image + model_path = sys.argv[2] + image_path = sys.argv[3] + + # Load appropriate model + if 'rtdetr' in model_path.lower(): + if detector.load_rtdetr_model(): + # Use RT-DETR + results = detector.detect_with_rtdetr(image_path) + print(f"RT-DETR Detection:") + print(f" Empty bubbles: {len(results['bubbles'])}") + print(f" Text bubbles: {len(results['text_bubbles'])}") + print(f" Free text: {len(results['text_free'])}") + else: + if detector.load_model(model_path): + bubbles = detector.detect_bubbles(image_path, confidence=0.5) + print(f"YOLOv8 detected {len(bubbles)} bubbles:") + for i, (x, y, w, h) in enumerate(bubbles): + print(f" Bubble {i+1}: position=({x},{y}) size=({w}x{h})") + + # Optionally visualize + if len(sys.argv) > 4: + output_path = sys.argv[4] + detector.visualize_detections(image_path, output_path=output_path, + use_rtdetr='rtdetr' in model_path.lower()) + + elif sys.argv[1] == "test-both" and len(sys.argv) > 2: + # Test both models + image_path = sys.argv[2] + + # Load YOLOv8 + yolo_path = "models/comic-speech-bubble-detector-yolov8m.pt" + if os.path.exists(yolo_path): + detector.load_model(yolo_path) + yolo_bubbles = detector.detect_bubbles(image_path, use_rtdetr=False) + print(f"YOLOv8: {len(yolo_bubbles)} bubbles") + + # Load RT-DETR + if detector.load_rtdetr_model(): + rtdetr_bubbles = detector.detect_bubbles(image_path, use_rtdetr=True) + print(f"RT-DETR: {len(rtdetr_bubbles)} bubbles") + + else: + print("Usage:") + print(" python bubble_detector.py download") + print(" python bubble_detector.py detect <model_path> <image_path> [output_path]") + print(" python bubble_detector.py test-both <image_path>") + + else: + print("Bubble Detector Module (YOLOv8 + RT-DETR)") + print("Usage:") + print(" python bubble_detector.py download") + print(" python bubble_detector.py detect <model_path> <image_path> [output_path]") + print(" python bubble_detector.py test-both <image_path>") diff --git a/hyphen_textwrap.py b/hyphen_textwrap.py new file mode 100644 index 0000000000000000000000000000000000000000..443a66d5e0a4bea562973d9c7b6c18bbd37ac6f4 --- /dev/null +++ b/hyphen_textwrap.py @@ -0,0 +1,508 @@ +# modified textwrap module to add hyphens whenever it breaks a long word +# https://github.com/python/cpython/blob/main/Lib/textwrap.py + +"""Text wrapping and filling with improved hyphenation support. + +This module is adapted from comic-translate's enhanced textwrap implementation. +It provides better hyphenation behavior when breaking long words across lines. +""" + +# Copyright (C) 1999-2001 Gregory P. Ward. +# Copyright (C) 2002, 2003 Python Software Foundation. +# Written by Greg Ward <gward@python.net> + +import re + +__all__ = ['TextWrapper', 'wrap', 'fill', 'dedent', 'indent', 'shorten'] + +# Hardcode the recognized whitespace characters to the US-ASCII +# whitespace characters. The main reason for doing this is that +# some Unicode spaces (like \u00a0) are non-breaking whitespaces. +_whitespace = '\t\n\x0b\x0c\r ' + +class TextWrapper: + """ + Object for wrapping/filling text. The public interface consists of + the wrap() and fill() methods; the other methods are just there for + subclasses to override in order to tweak the default behaviour. + If you want to completely replace the main wrapping algorithm, + you'll probably have to override _wrap_chunks(). + + Several instance attributes control various aspects of wrapping: + width (default: 70) + the maximum width of wrapped lines (unless break_long_words + is false) + initial_indent (default: "") + string that will be prepended to the first line of wrapped + output. Counts towards the line's width. + subsequent_indent (default: "") + string that will be prepended to all lines save the first + of wrapped output; also counts towards each line's width. + expand_tabs (default: true) + Expand tabs in input text to spaces before further processing. + Each tab will become 0 .. 'tabsize' spaces, depending on its position + in its line. If false, each tab is treated as a single character. + tabsize (default: 8) + Expand tabs in input text to 0 .. 'tabsize' spaces, unless + 'expand_tabs' is false. + replace_whitespace (default: true) + Replace all whitespace characters in the input text by spaces + after tab expansion. Note that if expand_tabs is false and + replace_whitespace is true, every tab will be converted to a + single space! + fix_sentence_endings (default: false) + Ensure that sentence-ending punctuation is always followed + by two spaces. Off by default because the algorithm is + (unavoidably) imperfect. + break_long_words (default: true) + Break words longer than 'width'. If false, those words will not + be broken, and some lines might be longer than 'width'. + break_on_hyphens (default: true) + Allow breaking hyphenated words. If true, wrapping will occur + preferably on whitespaces and right after hyphens part of + compound words. + drop_whitespace (default: true) + Drop leading and trailing whitespace from lines. + max_lines (default: None) + Truncate wrapped lines. + placeholder (default: ' [...]') + Append to the last line of truncated text. + hyphenate_broken_words (default: True) + Add hyphens when breaking long words across lines. + """ + + unicode_whitespace_trans = dict.fromkeys(map(ord, _whitespace), ord(' ')) + + # This funky little regex is just the trick for splitting + # text up into word-wrappable chunks. E.g. + # "Hello there -- you goof-ball, use the -b option!" + # splits into + # Hello/ /there/ /--/ /you/ /goof-/ball,/ /use/ /the/ /-b/ /option! + # (after stripping out empty strings). + word_punct = r'[\w!"\'\&.,?]' + letter = r'[^\d\W]' + whitespace = r'[%s]' % re.escape(_whitespace) + nowhitespace = '[^' + whitespace[1:] + wordsep_re = re.compile(r''' + ( # any whitespace + %(ws)s+ + | # em-dash between words + (?<=%(wp)s) -{2,} (?=\w) + | # word, possibly hyphenated + %(nws)s+? (?: + # hyphenated word + -(?: (?<=%(lt)s{2}-) | (?<=%(lt)s-%(lt)s-)) + (?= %(lt)s -? %(lt)s) + | # end of word + (?=%(ws)s|\Z) + | # em-dash + (?<=%(wp)s) (?=-{2,}\w) + ) + )''' % {'wp': word_punct, 'lt': letter, + 'ws': whitespace, 'nws': nowhitespace}, + re.VERBOSE) + del word_punct, letter, nowhitespace + + # This less funky little regex just split on recognized spaces. E.g. + # "Hello there -- you goof-ball, use the -b option!" + # splits into + # Hello/ /there/ /--/ /you/ /goof-ball,/ /use/ /the/ /-b/ /option!/ + wordsep_simple_re = re.compile(r'(%s+)' % whitespace) + del whitespace + + # XXX this is not locale- or charset-aware -- string.lowercase + # is US-ASCII only (and therefore English-only) + sentence_end_re = re.compile(r'[a-z]' # lowercase letter + r'[\.\!\?]' # sentence-ending punct. + r'[\"\']?' # optional end-of-quote + r'\Z') # end of chunk + + def __init__(self, + width=70, + initial_indent="", + subsequent_indent="", + expand_tabs=True, + replace_whitespace=True, + fix_sentence_endings=False, + break_long_words=True, + drop_whitespace=True, + break_on_hyphens=True, + hyphenate_broken_words=True, + tabsize=8, + *, + max_lines=None, + placeholder=' [...]'): + self.width = width + self.initial_indent = initial_indent + self.subsequent_indent = subsequent_indent + self.expand_tabs = expand_tabs + self.replace_whitespace = replace_whitespace + self.fix_sentence_endings = fix_sentence_endings + self.break_long_words = break_long_words + self.drop_whitespace = drop_whitespace + self.break_on_hyphens = break_on_hyphens + self.tabsize = tabsize + self.max_lines = max_lines + self.placeholder = placeholder + self.hyphenate_broken_words = hyphenate_broken_words + + + # -- Private methods ----------------------------------------------- + # (possibly useful for subclasses to override) + + def _munge_whitespace(self, text): + """_munge_whitespace(text : string) -> string + + Munge whitespace in text: expand tabs and convert all other + whitespace characters to spaces. Eg. " foo\\tbar\\n\\nbaz" + becomes " foo bar baz". + """ + if self.expand_tabs: + text = text.expandtabs(self.tabsize) + if self.replace_whitespace: + text = text.translate(self.unicode_whitespace_trans) + return text + + + def _split(self, text): + """_split(text : string) -> [string] + + Split the text to wrap into indivisible chunks. Chunks are + not quite the same as words; see _wrap_chunks() for full + details. As an example, the text + Look, goof-ball -- use the -b option! + breaks into the following chunks: + 'Look,', ' ', 'goof-', 'ball', ' ', '--', ' ', + 'use', ' ', 'the', ' ', '-b', ' ', 'option!' + if break_on_hyphens is True, or in: + 'Look,', ' ', 'goof-ball', ' ', '--', ' ', + 'use', ' ', 'the', ' ', '-b', ' ', option!' + otherwise. + """ + if self.break_on_hyphens is True: + chunks = self.wordsep_re.split(text) + else: + chunks = self.wordsep_simple_re.split(text) + chunks = [c for c in chunks if c] + + return chunks + + def _fix_sentence_endings(self, chunks): + """_fix_sentence_endings(chunks : [string]) + + Correct for sentence endings buried in 'chunks'. Eg. when the + original text contains "... foo.\\nBar ...", munge_whitespace() + and split() will convert that to [..., "foo.", " ", "Bar", ...] + which has one too few spaces; this method simply changes the one + space to two. + """ + i = 0 + patsearch = self.sentence_end_re.search + while i < len(chunks)-1: + if chunks[i+1] == " " and patsearch(chunks[i]): + chunks[i+1] = " " + i += 2 + else: + i += 1 + + def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width): + """_handle_long_word(chunks : [string], + cur_line : [string], + cur_len : int, width : int) + + Handle a chunk of text (most likely a word, not whitespace) that + is too long to fit in any line. + """ + # Figure out when indent is larger than the specified width, and make + # sure at least one character is stripped off on every pass + if width < 1: + space_left = 1 + else: + space_left = width - cur_len + + # If we're allowed to break long words, then do so: put as much + # of the next chunk onto the current line as will fit. + if self.break_long_words: + end = space_left + chunk = reversed_chunks[-1] + if self.break_on_hyphens and len(chunk) > space_left: + # break after last hyphen, but only if there are + # non-hyphens before it + hyphen = chunk.rfind('-', 0, space_left) + if hyphen > 0 and any(c != '-' for c in chunk[:hyphen]): + end = hyphen + 1 + + if chunk[:end]: + cur_line.append(chunk[:end]) + # Now adds a hyphen whenever a long word is split to the next line + # unless certain chracters already exists at the split + if self.hyphenate_broken_words and chunk[:end][-1] not in ['-','.',',']: + cur_line.append('-') + reversed_chunks[-1] = chunk[end:] + + # Otherwise, we have to preserve the long word intact. Only add + # it to the current line if there's nothing already there -- + # that minimizes how much we violate the width constraint. + elif not cur_line: + cur_line.append(reversed_chunks.pop()) + + # If we're not allowed to break long words, and there's already + # text on the current line, do nothing. Next time through the + # main loop of _wrap_chunks(), we'll wind up here again, but + # cur_len will be zero, so the next line will be entirely + # devoted to the long word that we can't handle right now. + + def _wrap_chunks(self, chunks): + """_wrap_chunks(chunks : [string]) -> [string] + + Wrap a sequence of text chunks and return a list of lines of + length 'self.width' or less. (If 'break_long_words' is false, + some lines may be longer than this.) Chunks correspond roughly + to words and the whitespace between them: each chunk is + indivisible (modulo 'break_long_words'), but a line break can + come between any two chunks. Chunks should not have internal + whitespace; ie. a chunk is either all whitespace or a "word". + Whitespace chunks will be removed from the beginning and end of + lines, but apart from that whitespace is preserved. + """ + lines = [] + if self.width <= 0: + raise ValueError("invalid width %r (must be > 0)" % self.width) + if self.max_lines is not None: + if self.max_lines > 1: + indent = self.subsequent_indent + else: + indent = self.initial_indent + if len(indent) + len(self.placeholder.lstrip()) > self.width: + raise ValueError("placeholder too large for max width") + + # Arrange in reverse order so items can be efficiently popped + # from a stack of chucks. + chunks.reverse() + + while chunks: + + # Start the list of chunks that will make up the current line. + # cur_len is just the length of all the chunks in cur_line. + cur_line = [] + cur_len = 0 + + # Figure out which static string will prefix this line. + if lines: + indent = self.subsequent_indent + else: + indent = self.initial_indent + + # Maximum width for this line. + width = self.width - len(indent) + + # First chunk on line is whitespace -- drop it, unless this + # is the very beginning of the text (ie. no lines started yet). + if self.drop_whitespace and chunks[-1].strip() == '' and lines: + del chunks[-1] + + while chunks: + l = len(chunks[-1]) + + # Can at least squeeze this chunk onto the current line. + if cur_len + l <= width: + cur_line.append(chunks.pop()) + cur_len += l + + # Nope, this line is full. + else: + break + + # The current line is full, and the next chunk is too big to + # fit on *any* line (not just this one). + if chunks and len(chunks[-1]) > width: + self._handle_long_word(chunks, cur_line, cur_len, width) + cur_len = sum(map(len, cur_line)) + + # If the last chunk on this line is all whitespace, drop it. + if self.drop_whitespace and cur_line and cur_line[-1].strip() == '': + cur_len -= len(cur_line[-1]) + del cur_line[-1] + + if cur_line: + if (self.max_lines is None or + len(lines) + 1 < self.max_lines or + (not chunks or + self.drop_whitespace and + len(chunks) == 1 and + not chunks[0].strip()) and cur_len <= width): + # Convert current line back to a string and store it in + # list of all lines (return value). + lines.append(indent + ''.join(cur_line)) + else: + while cur_line: + if (cur_line[-1].strip() and + cur_len + len(self.placeholder) <= width): + cur_line.append(self.placeholder) + lines.append(indent + ''.join(cur_line)) + break + cur_len -= len(cur_line[-1]) + del cur_line[-1] + else: + if lines: + prev_line = lines[-1].rstrip() + if (len(prev_line) + len(self.placeholder) <= + self.width): + lines[-1] = prev_line + self.placeholder + break + lines.append(indent + self.placeholder.lstrip()) + break + + return lines + + def _split_chunks(self, text): + text = self._munge_whitespace(text) + return self._split(text) + + # -- Public interface ---------------------------------------------- + + def wrap(self, text): + """wrap(text : string) -> [string] + + Reformat the single paragraph in 'text' so it fits in lines of + no more than 'self.width' columns, and return a list of wrapped + lines. Tabs in 'text' are expanded with string.expandtabs(), + and all other whitespace characters (including newline) are + converted to space. + """ + chunks = self._split_chunks(text) + if self.fix_sentence_endings: + self._fix_sentence_endings(chunks) + return self._wrap_chunks(chunks) + + def fill(self, text): + """fill(text : string) -> string + + Reformat the single paragraph in 'text' to fit in lines of no + more than 'self.width' columns, and return a new string + containing the entire wrapped paragraph. + """ + return "\n".join(self.wrap(text)) + + +# -- Convenience interface --------------------------------------------- + +def wrap(text, width=70, **kwargs): + """Wrap a single paragraph of text, returning a list of wrapped lines. + + Reformat the single paragraph in 'text' so it fits in lines of no + more than 'width' columns, and return a list of wrapped lines. By + default, tabs in 'text' are expanded with string.expandtabs(), and + all other whitespace characters (including newline) are converted to + space. See TextWrapper class for available keyword args to customize + wrapping behaviour. + """ + w = TextWrapper(width=width, **kwargs) + return w.wrap(text) + +def fill(text, width=70, **kwargs): + """Fill a single paragraph of text, returning a new string. + + Reformat the single paragraph in 'text' to fit in lines of no more + than 'width' columns, and return a new string containing the entire + wrapped paragraph. As with wrap(), tabs are expanded and other + whitespace characters converted to space. See TextWrapper class for + available keyword args to customize wrapping behaviour. + """ + w = TextWrapper(width=width, **kwargs) + return w.fill(text) + +def shorten(text, width, **kwargs): + """Collapse and truncate the given text to fit in the given width. + + The text first has its whitespace collapsed. If it then fits in + the *width*, it is returned as is. Otherwise, as many words + as possible are joined and then the placeholder is appended:: + + >>> textwrap.shorten("Hello world!", width=12) + 'Hello world!' + >>> textwrap.shorten("Hello world!", width=11) + 'Hello [...]' + """ + w = TextWrapper(width=width, max_lines=1, **kwargs) + return w.fill(' '.join(text.strip().split())) + + +# -- Loosely related functionality ------------------------------------- + +_whitespace_only_re = re.compile('^[ \t]+$', re.MULTILINE) +_leading_whitespace_re = re.compile('(^[ \t]*)(?:[^ \t\n])', re.MULTILINE) + +def dedent(text): + """Remove any common leading whitespace from every line in `text`. + + This can be used to make triple-quoted strings line up with the left + edge of the display, while still presenting them in the source code + in indented form. + + Note that tabs and spaces are both treated as whitespace, but they + are not equal: the lines " hello" and "\\thello" are + considered to have no common leading whitespace. + + Entirely blank lines are normalized to a newline character. + """ + # Look for the longest leading string of spaces and tabs common to + # all lines. + margin = None + text = _whitespace_only_re.sub('', text) + indents = _leading_whitespace_re.findall(text) + for indent in indents: + if margin is None: + margin = indent + + # Current line more deeply indented than previous winner: + # no change (previous winner is still on top). + elif indent.startswith(margin): + pass + + # Current line consistent with and no deeper than previous winner: + # it's the new winner. + elif margin.startswith(indent): + margin = indent + + # Find the largest common whitespace between current line and previous + # winner. + else: + for i, (x, y) in enumerate(zip(margin, indent)): + if x != y: + margin = margin[:i] + break + + # sanity check (testing/debugging only) + if 0 and margin: + for line in text.split("\n"): + assert not line or line.startswith(margin), \ + "line = %r, margin = %r" % (line, margin) + + if margin: + text = re.sub(r'(?m)^' + margin, '', text) + return text + + +def indent(text, prefix, predicate=None): + """Adds 'prefix' to the beginning of selected lines in 'text'. + + If 'predicate' is provided, 'prefix' will only be added to the lines + where 'predicate(line)' is True. If 'predicate' is not provided, + it will default to adding 'prefix' to all non-empty lines that do not + consist solely of whitespace characters. + """ + if predicate is None: + # str.splitlines(True) doesn't produce empty string. + # ''.splitlines(True) => [] + # 'foo\n'.splitlines(True) => ['foo\n'] + # So we can use just `not s.isspace()` here. + predicate = lambda s: not s.isspace() + + prefixed_lines = [] + for line in text.splitlines(True): + if predicate(line): + prefixed_lines.append(prefix) + prefixed_lines.append(line) + + return ''.join(prefixed_lines) diff --git a/local_inpainter.py b/local_inpainter.py new file mode 100644 index 0000000000000000000000000000000000000000..1d788627b8c3d8ebf924ee27aaa0f3a8e2e8b9e3 --- /dev/null +++ b/local_inpainter.py @@ -0,0 +1,2531 @@ +""" +Local inpainting implementation - COMPATIBLE VERSION WITH JIT SUPPORT +Maintains full backward compatibility while adding proper JIT model support +""" +import os +import sys +import json +import numpy as np +import cv2 +from typing import Optional, List, Tuple, Dict, Any +import logging +import traceback +import re +import hashlib +import urllib.request +from pathlib import Path +import threading +import time + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +# Check if we're running in a frozen environment +IS_FROZEN = getattr(sys, 'frozen', False) +if IS_FROZEN: + MEIPASS = sys._MEIPASS + os.environ['TORCH_HOME'] = MEIPASS + os.environ['TRANSFORMERS_CACHE'] = os.path.join(MEIPASS, 'transformers') + os.environ['HF_HOME'] = os.path.join(MEIPASS, 'huggingface') + logger.info(f"Running in frozen environment: {MEIPASS}") + +# Environment variables for ONNX +ONNX_CACHE_DIR = os.environ.get('ONNX_CACHE_DIR', 'models') +AUTO_CONVERT_TO_ONNX = os.environ.get('AUTO_CONVERT_TO_ONNX', 'false').lower() == 'true' +SKIP_ONNX_FOR_CKPT = os.environ.get('SKIP_ONNX_FOR_CKPT', 'true').lower() == 'true' +FORCE_ONNX_REBUILD = os.environ.get('FORCE_ONNX_REBUILD', 'false').lower() == 'true' +CACHE_DIR = os.environ.get('MODEL_CACHE_DIR', os.path.expanduser('~/.cache/inpainting')) + +# Modified import handling for frozen environment +TORCH_AVAILABLE = False +torch = None +nn = None +F = None +BaseModel = object + +try: + import onnxruntime_extensions + ONNX_EXTENSIONS_AVAILABLE = True +except ImportError: + ONNX_EXTENSIONS_AVAILABLE = False + logger.info("ONNX Runtime Extensions not available - FFT models won't work in ONNX") + +if IS_FROZEN: + # In frozen environment, try harder to import + try: + import torch + import torch.nn as nn + import torch.nn.functional as F + TORCH_AVAILABLE = True + BaseModel = nn.Module + logger.info("✓ PyTorch loaded in frozen environment") + except Exception as e: + logger.error(f"PyTorch not available in frozen environment: {e}") + logger.error("❌ Inpainting disabled - PyTorch is required") +else: + # Normal environment + try: + import torch + import torch.nn as nn + import torch.nn.functional as F + TORCH_AVAILABLE = True + BaseModel = nn.Module + except ImportError: + TORCH_AVAILABLE = False + logger.error("PyTorch not available - inpainting disabled") + +# Configure ORT memory behavior before importing +try: + os.environ.setdefault('ORT_DISABLE_MEMORY_ARENA', '1') +except Exception: + pass +# ONNX Runtime - usually works well in frozen environments +ONNX_AVAILABLE = False +try: + import onnx + import onnxruntime as ort + ONNX_AVAILABLE = True + logger.info("✓ ONNX Runtime available") +except ImportError: + ONNX_AVAILABLE = False + logger.warning("ONNX Runtime not available") + +# Bubble detector - optional +BUBBLE_DETECTOR_AVAILABLE = False +try: + from bubble_detector import BubbleDetector + BUBBLE_DETECTOR_AVAILABLE = True + logger.info("✓ Bubble detector available") +except ImportError: + logger.info("Bubble detector not available - basic inpainting will be used") + + +# JIT Model URLs (for automatic download) +LAMA_JIT_MODELS = { + 'lama': { + 'url': 'https://github.com/Sanster/models/releases/download/add_big_lama/big-lama.pt', + 'md5': 'e3aa4aaa15225a33ec84f9f4bc47e500', + 'name': 'BigLama' + }, + 'anime': { + 'url': 'https://github.com/Sanster/models/releases/download/AnimeMangaInpainting/anime-manga-big-lama.pt', + 'md5': '29f284f36a0a510bcacf39ecf4c4d54f', + 'name': 'Anime-Manga BigLama' + }, + 'lama_official': { + 'url': 'https://github.com/Sanster/models/releases/download/lama/lama.pt', + 'md5': '4b1a1de53b7a74e0ff9dd622834e8e1e', + 'name': 'LaMa Official' + }, + 'aot': { + 'url': 'https://huggingface.co/ogkalu/aot-inpainting-jit/resolve/main/aot_traced.pt', + 'md5': '5ecdac562c1d56267468fc4fbf80db27', + 'name': 'AOT GAN' + }, + 'aot_onnx': { + 'url': 'https://huggingface.co/ogkalu/aot-inpainting/resolve/main/aot.onnx', + 'md5': 'ffd39ed8e2a275869d3b49180d030f0d8b8b9c2c20ed0e099ecd207201f0eada', + 'name': 'AOT ONNX (Fast)', + 'is_onnx': True + }, + 'lama_onnx': { + 'url': 'https://huggingface.co/Carve/LaMa-ONNX/resolve/main/lama_fp32.onnx', + 'md5': None, # Add MD5 if you want to verify + 'name': 'LaMa ONNX (Carve)', + 'is_onnx': True # Flag to indicate this is ONNX, not JIT + }, + 'anime_onnx': { + 'url': 'https://huggingface.co/ogkalu/lama-manga-onnx-dynamic/resolve/main/lama-manga-dynamic.onnx', + 'md5': 'de31ffa5ba26916b8ea35319f6c12151ff9654d4261bccf0583a69bb095315f9', + 'name': 'Anime/Manga ONNX (Dynamic)', + 'is_onnx': True # Flag to indicate this is ONNX + } +} + + +def norm_img(img: np.ndarray) -> np.ndarray: + """Normalize image to [0, 1] range""" + if img.dtype == np.uint8: + return img.astype(np.float32) / 255.0 + return img + + +def get_cache_path_by_url(url: str) -> str: + """Get cache path for a model URL""" + os.makedirs(CACHE_DIR, exist_ok=True) + filename = os.path.basename(url) + return os.path.join(CACHE_DIR, filename) + + +def download_model(url: str, md5: str = None) -> str: + """Download model if not cached""" + cache_path = get_cache_path_by_url(url) + + if os.path.exists(cache_path): + logger.info(f"✅ Model already cached: {cache_path}") + return cache_path + + logger.info(f"📥 Downloading model from {url}") + + try: + urllib.request.urlretrieve(url, cache_path) + logger.info(f"✅ Model downloaded to: {cache_path}") + return cache_path + except Exception as e: + logger.error(f"❌ Download failed: {e}") + if os.path.exists(cache_path): + os.remove(cache_path) + raise + + +class FFCInpaintModel(BaseModel): # Use BaseModel instead of nn.Module + """FFC model for LaMa inpainting - for checkpoint compatibility""" + + def __init__(self): + if not TORCH_AVAILABLE: + # Initialize as a simple object when PyTorch is not available + super().__init__() + logger.warning("PyTorch not available - FFCInpaintModel initialized as placeholder") + self._pytorch_available = False + return + + # Additional safety check for nn being None + if nn is None: + super().__init__() + logger.error("Neural network modules not available - FFCInpaintModel disabled") + self._pytorch_available = False + return + + super().__init__() + self._pytorch_available = True + + try: + # Encoder + self.model_1_ffc_convl2l = nn.Conv2d(4, 64, 7, padding=3) + self.model_1_bn_l = nn.BatchNorm2d(64) + + self.model_2_ffc_convl2l = nn.Conv2d(64, 128, 3, padding=1) + self.model_2_bn_l = nn.BatchNorm2d(128) + + self.model_3_ffc_convl2l = nn.Conv2d(128, 256, 3, padding=1) + self.model_3_bn_l = nn.BatchNorm2d(256) + + self.model_4_ffc_convl2l = nn.Conv2d(256, 128, 3, padding=1) + self.model_4_ffc_convl2g = nn.Conv2d(256, 384, 3, padding=1) + self.model_4_bn_l = nn.BatchNorm2d(128) + self.model_4_bn_g = nn.BatchNorm2d(384) + + # FFC blocks + for i in range(5, 23): + for conv_type in ['conv1', 'conv2']: + setattr(self, f'model_{i}_{conv_type}_ffc_convl2l', nn.Conv2d(128, 128, 3, padding=1)) + setattr(self, f'model_{i}_{conv_type}_ffc_convl2g', nn.Conv2d(128, 384, 3, padding=1)) + setattr(self, f'model_{i}_{conv_type}_ffc_convg2l', nn.Conv2d(384, 128, 3, padding=1)) + setattr(self, f'model_{i}_{conv_type}_ffc_convg2g_conv1_0', nn.Conv2d(384, 192, 1)) + setattr(self, f'model_{i}_{conv_type}_ffc_convg2g_conv1_1', nn.BatchNorm2d(192)) + setattr(self, f'model_{i}_{conv_type}_ffc_convg2g_fu_conv_layer', nn.Conv2d(384, 384, 1)) + setattr(self, f'model_{i}_{conv_type}_ffc_convg2g_fu_bn', nn.BatchNorm2d(384)) + setattr(self, f'model_{i}_{conv_type}_ffc_convg2g_conv2', nn.Conv2d(192, 384, 1)) + setattr(self, f'model_{i}_{conv_type}_bn_l', nn.BatchNorm2d(128)) + setattr(self, f'model_{i}_{conv_type}_bn_g', nn.BatchNorm2d(384)) + + # Decoder + self.model_24 = nn.Conv2d(512, 256, 3, padding=1) + self.model_25 = nn.BatchNorm2d(256) + + self.model_27 = nn.Conv2d(256, 128, 3, padding=1) + self.model_28 = nn.BatchNorm2d(128) + + self.model_30 = nn.Conv2d(128, 64, 3, padding=1) + self.model_31 = nn.BatchNorm2d(64) + + self.model_34 = nn.Conv2d(64, 3, 7, padding=3) + + # Activation functions + self.relu = nn.ReLU(inplace=True) + self.tanh = nn.Tanh() + + logger.info("FFCInpaintModel initialized successfully") + + except Exception as e: + logger.error(f"Failed to initialize FFCInpaintModel: {e}") + self._pytorch_available = False + raise + + def forward(self, image, mask): + if not self._pytorch_available: + logger.error("PyTorch not available for forward pass") + raise RuntimeError("PyTorch not available for forward pass") + + if not TORCH_AVAILABLE or torch is None: + logger.error("PyTorch not available for forward pass") + raise RuntimeError("PyTorch not available for forward pass") + + try: + x = torch.cat([image, mask], dim=1) + + x = self.relu(self.model_1_bn_l(self.model_1_ffc_convl2l(x))) + x = self.relu(self.model_2_bn_l(self.model_2_ffc_convl2l(x))) + x = self.relu(self.model_3_bn_l(self.model_3_ffc_convl2l(x))) + + x_l = self.relu(self.model_4_bn_l(self.model_4_ffc_convl2l(x))) + x_g = self.relu(self.model_4_bn_g(self.model_4_ffc_convl2g(x))) + + for i in range(5, 23): + identity_l, identity_g = x_l, x_g + x_l, x_g = self._ffc_block(x_l, x_g, i, 'conv1') + x_l, x_g = self._ffc_block(x_l, x_g, i, 'conv2') + x_l = x_l + identity_l + x_g = x_g + identity_g + + x = torch.cat([x_l, x_g], dim=1) + x = self.relu(self.model_25(self.model_24(x))) + x = self.relu(self.model_28(self.model_27(x))) + x = self.relu(self.model_31(self.model_30(x))) + x = self.tanh(self.model_34(x)) + + mask_3ch = mask.repeat(1, 3, 1, 1) + return x * mask_3ch + image * (1 - mask_3ch) + + except Exception as e: + logger.error(f"Forward pass failed: {e}") + raise RuntimeError(f"Forward pass failed: {e}") + + def _ffc_block(self, x_l, x_g, idx, conv_type): + if not self._pytorch_available: + raise RuntimeError("PyTorch not available for FFC block") + + if not TORCH_AVAILABLE: + raise RuntimeError("PyTorch not available for FFC block") + + try: + convl2l = getattr(self, f'model_{idx}_{conv_type}_ffc_convl2l') + convl2g = getattr(self, f'model_{idx}_{conv_type}_ffc_convl2g') + convg2l = getattr(self, f'model_{idx}_{conv_type}_ffc_convg2l') + convg2g_conv1 = getattr(self, f'model_{idx}_{conv_type}_ffc_convg2g_conv1_0') + convg2g_bn1 = getattr(self, f'model_{idx}_{conv_type}_ffc_convg2g_conv1_1') + fu_conv = getattr(self, f'model_{idx}_{conv_type}_ffc_convg2g_fu_conv_layer') + fu_bn = getattr(self, f'model_{idx}_{conv_type}_ffc_convg2g_fu_bn') + convg2g_conv2 = getattr(self, f'model_{idx}_{conv_type}_ffc_convg2g_conv2') + bn_l = getattr(self, f'model_{idx}_{conv_type}_bn_l') + bn_g = getattr(self, f'model_{idx}_{conv_type}_bn_g') + + out_xl = convl2l(x_l) + convg2l(x_g) + out_xg = convl2g(x_l) + convg2g_conv2(self.relu(convg2g_bn1(convg2g_conv1(x_g)))) + self.relu(fu_bn(fu_conv(x_g))) + + return self.relu(bn_l(out_xl)), self.relu(bn_g(out_xg)) + + except Exception as e: + logger.error(f"FFC block failed: {e}") + raise RuntimeError(f"FFC block failed: {e}") + + +class LocalInpainter: + """Local inpainter with full backward compatibility""" + + # MAINTAIN ORIGINAL SUPPORTED_METHODS for compatibility + SUPPORTED_METHODS = { + 'lama': ('LaMa Inpainting', FFCInpaintModel), + 'mat': ('MAT Inpainting', FFCInpaintModel), + 'aot': ('AOT GAN Inpainting', FFCInpaintModel), + 'aot_onnx': ('AOT ONNX (Fast)', FFCInpaintModel), + 'sd': ('Stable Diffusion Inpainting', FFCInpaintModel), + 'anime': ('Anime/Manga Inpainting', FFCInpaintModel), + 'anime_onnx': ('Anime ONNX (Fast)', FFCInpaintModel), + 'lama_official': ('Official LaMa', FFCInpaintModel), + } + + def __init__(self, config_path="config.json"): + # Set thread limits early if environment indicates single-threaded mode + try: + if os.environ.get('OMP_NUM_THREADS') == '1': + # Already in single-threaded mode, ensure it's applied to this process + # Check if torch is available at module level before trying to use it + if TORCH_AVAILABLE and torch is not None: + try: + torch.set_num_threads(1) + except (RuntimeError, AttributeError): + pass + try: + import cv2 + cv2.setNumThreads(1) + except (ImportError, AttributeError): + pass + except Exception: + pass + + self.config_path = config_path + self.config = self._load_config() + self.model = None + self.model_loaded = False + self.current_method = None + self.use_opencv_fallback = False # FORCED DISABLED - No OpenCV fallback allowed + self.onnx_session = None + self.use_onnx = False + self.is_jit_model = False + self.pad_mod = 8 + + # Default tiling settings - OFF by default for most models + self.tiling_enabled = False + self.tile_size = 512 + self.tile_overlap = 64 + + # ONNX-specific settings + self.onnx_model_loaded = False + self.onnx_input_size = None # Will be detected from model + + # Quantization diagnostics flags + self.onnx_quantize_applied = False + self.torch_quantize_applied = False + + # Bubble detection + self.bubble_detector = None + self.bubble_model_loaded = False + + # Create directories + os.makedirs(ONNX_CACHE_DIR, exist_ok=True) + os.makedirs(CACHE_DIR, exist_ok=True) + logger.info(f"📁 ONNX cache directory: {ONNX_CACHE_DIR}") + logger.info(f" Contents: {os.listdir(ONNX_CACHE_DIR) if os.path.exists(ONNX_CACHE_DIR) else 'Directory does not exist'}") + + + # Check GPU availability safely + self.use_gpu = False + self.device = None + + if TORCH_AVAILABLE and torch is not None: + try: + self.use_gpu = torch.cuda.is_available() + self.device = torch.device('cuda' if self.use_gpu else 'cpu') + if self.use_gpu: + logger.info(f"🚀 GPU: {torch.cuda.get_device_name(0)}") + else: + logger.info("💻 Using CPU") + except AttributeError: + # torch module exists but doesn't have cuda attribute + self.use_gpu = False + self.device = None + logger.info("⚠️ PyTorch incomplete - inpainting disabled") + else: + logger.info("⚠️ PyTorch not available - inpainting disabled") + + # Quantization/precision toggle (off by default) + try: + adv_cfg = self.config.get('manga_settings', {}).get('advanced', {}) if isinstance(self.config, dict) else {} + # Track singleton mode from settings for thread limiting (deprecated - kept for compatibility) + self.singleton_mode = bool(adv_cfg.get('use_singleton_models', True)) + env_quant = os.environ.get('MODEL_QUANTIZE', 'false').lower() == 'true' + self.quantize_enabled = bool(env_quant or adv_cfg.get('quantize_models', False)) + # ONNX quantization is now strictly opt-in (config or env), decoupled from general quantize_models + self.onnx_quantize_enabled = bool( + adv_cfg.get('onnx_quantize', os.environ.get('ONNX_QUANTIZE', 'false').lower() == 'true') + ) + self.torch_precision = str(adv_cfg.get('torch_precision', os.environ.get('TORCH_PRECISION', 'auto'))).lower() + logger.info(f"Quantization: {'ENABLED' if self.quantize_enabled else 'disabled'} for Local Inpainter; onnx_quantize={'on' if self.onnx_quantize_enabled else 'off'}; torch_precision={self.torch_precision}") + self.int8_enabled = bool( + adv_cfg.get('int8_quantize', False) + or adv_cfg.get('quantize_int8', False) + or os.environ.get('TORCH_INT8', 'false').lower() == 'true' + or self.torch_precision in ('int8', 'int8_dynamic') + ) + logger.info( + f"Quantization: {'ENABLED' if self.quantize_enabled else 'disabled'} for Local Inpainter; " + f"onnx_quantize={'on' if self.onnx_quantize_enabled else 'off'}; " + f"torch_precision={self.torch_precision}; int8={'on' if self.int8_enabled else 'off'}" + ) + except Exception: + self.quantize_enabled = False + self.onnx_quantize_enabled = False + self.torch_precision = 'auto' + self.int8_enabled = False + + # HD strategy defaults (mirror of comic-translate behavior) + try: + adv_cfg = self.config.get('manga_settings', {}).get('advanced', {}) if isinstance(self.config, dict) else {} + except Exception: + adv_cfg = {} + try: + self.hd_strategy = str(os.environ.get('HD_STRATEGY', adv_cfg.get('hd_strategy', 'resize'))).lower() + except Exception: + self.hd_strategy = 'resize' + try: + self.hd_resize_limit = int(os.environ.get('HD_RESIZE_LIMIT', adv_cfg.get('hd_strategy_resize_limit', 1536))) + except Exception: + self.hd_resize_limit = 1536 + try: + self.hd_crop_margin = int(os.environ.get('HD_CROP_MARGIN', adv_cfg.get('hd_strategy_crop_margin', 16))) + except Exception: + self.hd_crop_margin = 16 + try: + self.hd_crop_trigger_size = int(os.environ.get('HD_CROP_TRIGGER', adv_cfg.get('hd_strategy_crop_trigger_size', 1024))) + except Exception: + self.hd_crop_trigger_size = 1024 + logger.info(f"HD strategy: {self.hd_strategy} (resize_limit={self.hd_resize_limit}, crop_margin={self.hd_crop_margin}, crop_trigger={self.hd_crop_trigger_size})") + + # Stop flag support + self.stop_flag = None + self._stopped = False + self.log_callback = None + + # Initialize bubble detector if available + if BUBBLE_DETECTOR_AVAILABLE: + try: + self.bubble_detector = BubbleDetector() + logger.info("🗨️ Bubble detection available") + except: + self.bubble_detector = None + logger.info("🗨️ Bubble detection not available") + + def _load_config(self): + try: + if self.config_path and os.path.exists(self.config_path): + with open(self.config_path, 'r', encoding='utf-8') as f: + content = f.read().strip() + if not content: + return {} + try: + return json.loads(content) + except json.JSONDecodeError: + # Likely a concurrent write; retry once after a short delay + try: + import time + time.sleep(0.05) + with open(self.config_path, 'r', encoding='utf-8') as f2: + return json.load(f2) + except Exception: + return {} + except Exception: + return {} + return {} + + def _save_config(self): + # Don't save if config is empty (prevents purging) + if not getattr(self, 'config', None): + return + try: + # Load existing (best-effort) + full_config = {} + if self.config_path and os.path.exists(self.config_path): + try: + with open(self.config_path, 'r', encoding='utf-8') as f: + full_config = json.load(f) + except Exception as read_err: + logger.debug(f"Config read during save failed (non-critical): {read_err}") + full_config = {} + # Update + full_config.update(self.config) + # Atomic write: write to temp then replace + tmp_path = (self.config_path or 'config.json') + '.tmp' + with open(tmp_path, 'w', encoding='utf-8') as f: + json.dump(full_config, f, indent=2, ensure_ascii=False) + try: + os.replace(tmp_path, self.config_path or 'config.json') + except Exception as replace_err: + logger.debug(f"Config atomic replace failed, trying direct write: {replace_err}") + # Fallback to direct write + with open(self.config_path or 'config.json', 'w', encoding='utf-8') as f: + json.dump(full_config, f, indent=2, ensure_ascii=False) + except Exception as save_err: + # Never crash on config save, but log for debugging + logger.debug(f"Config save failed (non-critical): {save_err}") + pass + + def set_stop_flag(self, stop_flag): + """Set the stop flag for checking interruptions""" + self.stop_flag = stop_flag + self._stopped = False + + def set_log_callback(self, log_callback): + """Set log callback for GUI integration""" + self.log_callback = log_callback + + def _check_stop(self) -> bool: + """Check if stop has been requested""" + if self._stopped: + return True + if self.stop_flag and self.stop_flag.is_set(): + self._stopped = True + return True + # Check global manga translator cancellation + try: + from manga_translator import MangaTranslator + if MangaTranslator.is_globally_cancelled(): + self._stopped = True + return True + except Exception: + pass + return False + + def _log(self, message: str, level: str = "info"): + """Log message with stop suppression""" + # Suppress logs when stopped (allow only essential stop confirmation messages) + if self._check_stop(): + essential_stop_keywords = [ + "⏹️ Translation stopped by user", + "⏹️ Inpainting stopped", + "cleanup", "🧹" + ] + if not any(keyword in message for keyword in essential_stop_keywords): + return + + if self.log_callback: + self.log_callback(message, level) + else: + logger.info(message) if level == 'info' else getattr(logger, level, logger.info)(message) + + def reset_stop_flags(self): + """Reset stop flags when starting new processing""" + self._stopped = False + + def convert_to_onnx(self, model_path: str, method: str) -> Optional[str]: + """Convert a PyTorch model to ONNX format with FFT handling via custom operators""" + if not ONNX_AVAILABLE: + logger.warning("ONNX not available, skipping conversion") + return None + + try: + # Generate ONNX path + model_name = os.path.basename(model_path).replace('.pt', '') + onnx_path = os.path.join(ONNX_CACHE_DIR, f"{model_name}_{method}.onnx") + + # Check if ONNX already exists + if os.path.exists(onnx_path) and not FORCE_ONNX_REBUILD: + logger.info(f"✅ ONNX model already exists: {onnx_path}") + return onnx_path + + logger.info(f"🔄 Converting {method} model to ONNX...") + + # The model should already be loaded at this point + if not self.model_loaded or self.current_method != method: + logger.error("Model not loaded for ONNX conversion") + return None + + # Create dummy inputs + dummy_image = torch.randn(1, 3, 512, 512).to(self.device) + dummy_mask = torch.randn(1, 1, 512, 512).to(self.device) + + # For FFT models, we can't convert directly + fft_models = ['lama', 'anime', 'lama_official'] + if method in fft_models: + logger.warning(f"⚠️ {method.upper()} uses FFT operations that cannot be exported") + return None # Just return None, don't suggest Carve + + # Standard export for non-FFT models + try: + torch.onnx.export( + self.model, + (dummy_image, dummy_mask), + onnx_path, + export_params=True, + opset_version=13, + do_constant_folding=True, + input_names=['image', 'mask'], + output_names=['output'], + dynamic_axes={ + 'image': {0: 'batch', 2: 'height', 3: 'width'}, + 'mask': {0: 'batch', 2: 'height', 3: 'width'}, + 'output': {0: 'batch', 2: 'height', 3: 'width'} + } + ) + logger.info(f"✅ ONNX model saved to: {onnx_path}") + return onnx_path + + except torch.onnx.errors.UnsupportedOperatorError as e: + logger.error(f"❌ Unsupported operator: {e}") + return None + + except Exception as e: + logger.error(f"❌ ONNX conversion failed: {e}") + logger.error(traceback.format_exc()) + return None + + def load_onnx_model(self, onnx_path: str) -> bool: + """Load an ONNX model with custom operator support""" + if not ONNX_AVAILABLE: + logger.error("ONNX Runtime not available") + return False + + # Check if this exact ONNX model is already loaded + if (self.onnx_session is not None and + hasattr(self, 'current_onnx_path') and + self.current_onnx_path == onnx_path): + logger.debug(f"✅ ONNX model already loaded: {onnx_path}") + return True + + try: + # Don't log here if we already logged in load_model + logger.debug(f"📦 ONNX Runtime loading: {onnx_path}") + + # Store the path for later checking + self.current_onnx_path = onnx_path + + # Check if this is a Carve model (fixed 512x512) + is_carve_model = "lama_fp32" in onnx_path or "carve" in onnx_path.lower() + if is_carve_model: + logger.info("📦 Detected Carve ONNX model (fixed 512x512 input)") + self.onnx_fixed_size = (512, 512) + else: + self.onnx_fixed_size = None + + # Standard ONNX loading: prefer CUDA if available; otherwise CPU. Do NOT use DML. + try: + avail = ort.get_available_providers() if ONNX_AVAILABLE else [] + except Exception: + avail = [] + if 'CUDAExecutionProvider' in avail: + providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] + else: + providers = ['CPUExecutionProvider'] + session_path = onnx_path + try: + fname_lower = os.path.basename(onnx_path).lower() + except Exception: + fname_lower = str(onnx_path).lower() + + # Device-aware policy for LaMa-type ONNX (Carve or contains 'lama') + is_lama_model = is_carve_model or ('lama' in fname_lower) + if is_lama_model: + base = os.path.splitext(onnx_path)[0] + if self.use_gpu: + # Prefer FP16 on CUDA + fp16_path = base + '.fp16.onnx' + if (not os.path.exists(fp16_path)) or FORCE_ONNX_REBUILD: + try: + import onnx as _onnx + try: + from onnxruntime_tools.transformers.float16 import convert_float_to_float16 as _to_fp16 + except Exception: + try: + from onnxconverter_common import float16 + def _to_fp16(m, keep_io_types=True): + return float16.convert_float_to_float16(m, keep_io_types=keep_io_types) + except Exception: + _to_fp16 = None + if _to_fp16 is not None: + m = _onnx.load(onnx_path) + m_fp16 = _to_fp16(m, keep_io_types=True) + _onnx.save(m_fp16, fp16_path) + logger.info(f"✅ Generated FP16 ONNX for LaMa: {fp16_path}") + except Exception as e: + logger.warning(f"FP16 conversion for LaMa failed: {e}") + if os.path.exists(fp16_path): + session_path = fp16_path + else: + # CPU path for LaMa: quantize only if enabled, and MatMul-only to avoid artifacts + if self.onnx_quantize_enabled: + try: + from onnxruntime.quantization import quantize_dynamic, QuantType + quant_path = base + '.matmul.int8.onnx' + if (not os.path.exists(quant_path)) or FORCE_ONNX_REBUILD: + logger.info("🔻 LaMa: Quantizing ONNX weights to INT8 (dynamic, ops=['MatMul'])...") + quantize_dynamic( + model_input=onnx_path, + model_output=quant_path, + weight_type=QuantType.QInt8, + op_types_to_quantize=['MatMul'] + ) + self.onnx_quantize_applied = True + # Validate dynamic quant result + try: + import onnx as _onnx + _m_q = _onnx.load(quant_path) + _onnx.checker.check_model(_m_q) + except Exception as _qchk: + logger.warning(f"LaMa dynamic quant model invalid; deleting and falling back: {_qchk}") + try: + os.remove(quant_path) + except Exception: + pass + quant_path = None + except Exception as dy_err: + logger.warning(f"LaMa dynamic quantization failed: {dy_err}") + quant_path = None + # Fallback: static QDQ MatMul-only with zero data reader + if quant_path is None: + try: + import onnx as _onnx + from onnxruntime.quantization import ( + CalibrationDataReader, quantize_static, + QuantFormat, QuantType, CalibrationMethod + ) + m = _onnx.load(onnx_path) + shapes = {} + for inp in m.graph.input: + dims = [] + for d in inp.type.tensor_type.shape.dim: + dims.append(d.dim_value if d.dim_value > 0 else 1) + shapes[inp.name] = dims + class _ZeroReader(CalibrationDataReader): + def __init__(self, shapes): + self.shapes = shapes + self.done = False + def get_next(self): + if self.done: + return None + feed = {} + for name, s in self.shapes.items(): + ss = list(s) + if len(ss) == 4: + if ss[2] <= 1: ss[2] = 512 + if ss[3] <= 1: ss[3] = 512 + if ss[1] <= 1 and 'mask' not in name.lower(): + ss[1] = 3 + feed[name] = np.zeros(ss, dtype=np.float32) + self.done = True + return feed + dr = _ZeroReader(shapes) + quant_path = base + '.matmul.int8.onnx' + quantize_static( + model_input=onnx_path, + model_output=quant_path, + calibration_data_reader=dr, + quant_format=QuantFormat.QDQ, + activation_type=QuantType.QUInt8, + weight_type=QuantType.QInt8, + per_channel=False, + calibrate_method=CalibrationMethod.MinMax, + op_types_to_quantize=['MatMul'] + ) + # Validate + try: + _m_q = _onnx.load(quant_path) + _onnx.checker.check_model(_m_q) + except Exception as _qchk2: + logger.warning(f"LaMa static MatMul-only quant model invalid; deleting: {_qchk2}") + try: + os.remove(quant_path) + except Exception: + pass + quant_path = None + else: + logger.info(f"✅ Generated MatMul-only INT8 ONNX for LaMa: {quant_path}") + self.onnx_quantize_applied = True + except Exception as st_err: + logger.warning(f"LaMa static MatMul-only quantization failed: {st_err}") + quant_path = None + # Use the quantized model if valid + if quant_path and os.path.exists(quant_path): + session_path = quant_path + logger.info(f"✅ Using LaMa quantized ONNX model: {quant_path}") + # If quantization not enabled or failed, session_path remains onnx_path (FP32) + + # Optional dynamic/static quantization for other models (opt-in) + if (not is_lama_model) and self.onnx_quantize_enabled: + base = os.path.splitext(onnx_path)[0] + fname = os.path.basename(onnx_path).lower() + is_aot = 'aot' in fname + # For AOT: ignore any MatMul-only file and prefer Conv+MatMul + if is_aot: + try: + ignored_matmul = base + ".matmul.int8.onnx" + if os.path.exists(ignored_matmul): + logger.info(f"⏭️ Ignoring MatMul-only quantized file for AOT: {ignored_matmul}") + except Exception: + pass + # Choose target quant file and ops + if is_aot: + quant_path = base + ".int8.onnx" + ops_to_quant = ['MatMul'] + # Use MatMul-only for safer quantization across models + ops_for_static = ['MatMul'] + # Try to simplify AOT graph prior to quantization + quant_input_path = onnx_path + try: + import onnx as _onnx + try: + from onnxsim import simplify as _onnx_simplify + _model = _onnx.load(onnx_path) + _sim_model, _check = _onnx_simplify(_model) + if _check: + sim_path = base + ".sim.onnx" + _onnx.save(_sim_model, sim_path) + quant_input_path = sim_path + logger.info(f"🧰 Simplified AOT ONNX before quantization: {sim_path}") + except Exception as _sim_err: + logger.info(f"AOT simplification skipped: {_sim_err}") + # No ONNX shape inference; keep original graph structure + # Ensure opset >= 13 for QDQ (axis attribute on DequantizeLinear) + try: + _m_tmp = _onnx.load(quant_input_path) + _opset = max([op.version for op in _m_tmp.opset_import]) if _m_tmp.opset_import else 11 + if _opset < 13: + from onnx import version_converter as _vc + _m13 = _vc.convert_version(_m_tmp, 13) + up_path = base + ".op13.onnx" + _onnx.save(_m13, up_path) + quant_input_path = up_path + logger.info(f"🧰 Upgraded ONNX opset to 13 before QDQ quantization: {up_path}") + except Exception as _operr: + logger.info(f"Opset upgrade skipped: {_operr}") + except Exception: + quant_input_path = onnx_path + else: + quant_path = base + ".matmul.int8.onnx" + ops_to_quant = ['MatMul'] + ops_for_static = ops_to_quant + quant_input_path = onnx_path + # Perform quantization if needed + if not os.path.exists(quant_path) or FORCE_ONNX_REBUILD: + if is_aot: + # Directly perform static QDQ quantization for MatMul only (avoid Conv activations) + try: + import onnx as _onnx + from onnxruntime.quantization import CalibrationDataReader, quantize_static, QuantFormat, QuantType, CalibrationMethod + _model = _onnx.load(quant_input_path) + # Build input shapes from the model graph + input_shapes = {} + for inp in _model.graph.input: + dims = [] + for d in inp.type.tensor_type.shape.dim: + if d.dim_value > 0: + dims.append(d.dim_value) + else: + # default fallback dimension + dims.append(1) + input_shapes[inp.name] = dims + class _ZeroDataReader(CalibrationDataReader): + def __init__(self, input_shapes): + self._shapes = input_shapes + self._provided = False + def get_next(self): + if self._provided: + return None + feed = {} + for name, shape in self._shapes.items(): + # Ensure reasonable default spatial size + s = list(shape) + if len(s) == 4: + if s[2] <= 1: + s[2] = 512 + if s[3] <= 1: + s[3] = 512 + # channel fallback + if s[1] <= 1 and 'mask' not in name.lower(): + s[1] = 3 + feed[name] = (np.zeros(s, dtype=np.float32)) + self._provided = True + return feed + dr = _ZeroDataReader(input_shapes) + quantize_static( + model_input=quant_input_path, + model_output=quant_path, + calibration_data_reader=dr, + quant_format=QuantFormat.QDQ, + activation_type=QuantType.QUInt8, + weight_type=QuantType.QInt8, + per_channel=True, + calibrate_method=CalibrationMethod.MinMax, + op_types_to_quantize=ops_for_static + ) + # Validate quantized model to catch structural errors early + try: + _m_q = _onnx.load(quant_path) + _onnx.checker.check_model(_m_q) + except Exception as _qchk: + logger.warning(f"Quantized AOT model validation failed: {_qchk}") + # Remove broken quantized file to force fallback + try: + os.remove(quant_path) + except Exception: + pass + else: + logger.info(f"✅ Static INT8 quantization produced: {quant_path}") + except Exception as st_err: + logger.warning(f"Static ONNX quantization failed: {st_err}") + else: + # First attempt: dynamic quantization (MatMul) + try: + from onnxruntime.quantization import quantize_dynamic, QuantType + logger.info("🔻 Quantizing ONNX inpainting model weights to INT8 (dynamic, ops=['MatMul'])...") + quantize_dynamic( + model_input=quant_input_path, + model_output=quant_path, + weight_type=QuantType.QInt8, + op_types_to_quantize=['MatMul'] + ) + except Exception as dy_err: + logger.warning(f"Dynamic ONNX quantization failed: {dy_err}; attempting static quantization...") + # Fallback: static quantization with a zero data reader + try: + import onnx as _onnx + from onnxruntime.quantization import CalibrationDataReader, quantize_static, QuantFormat, QuantType, CalibrationMethod + _model = _onnx.load(quant_input_path) + # Build input shapes from the model graph + input_shapes = {} + for inp in _model.graph.input: + dims = [] + for d in inp.type.tensor_type.shape.dim: + if d.dim_value > 0: + dims.append(d.dim_value) + else: + # default fallback dimension + dims.append(1) + input_shapes[inp.name] = dims + class _ZeroDataReader(CalibrationDataReader): + def __init__(self, input_shapes): + self._shapes = input_shapes + self._provided = False + def get_next(self): + if self._provided: + return None + feed = {} + for name, shape in self._shapes.items(): + # Ensure reasonable default spatial size + s = list(shape) + if len(s) == 4: + if s[2] <= 1: + s[2] = 512 + if s[3] <= 1: + s[3] = 512 + # channel fallback + if s[1] <= 1 and 'mask' not in name.lower(): + s[1] = 3 + feed[name] = (np.zeros(s, dtype=np.float32)) + self._provided = True + return feed + dr = _ZeroDataReader(input_shapes) + quantize_static( + model_input=quant_input_path, + model_output=quant_path, + calibration_data_reader=dr, + quant_format=QuantFormat.QDQ, + activation_type=QuantType.QUInt8, + weight_type=QuantType.QInt8, + per_channel=True, + calibrate_method=CalibrationMethod.MinMax, + op_types_to_quantize=ops_for_static + ) + # Validate quantized model to catch structural errors early + try: + _m_q = _onnx.load(quant_path) + _onnx.checker.check_model(_m_q) + except Exception as _qchk: + logger.warning(f"Quantized AOT model validation failed: {_qchk}") + # Remove broken quantized file to force fallback + try: + os.remove(quant_path) + except Exception: + pass + else: + logger.info(f"✅ Static INT8 quantization produced: {quant_path}") + except Exception as st_err: + logger.warning(f"Static ONNX quantization failed: {st_err}") + # Prefer the quantized file if it now exists + if os.path.exists(quant_path): + # Validate existing quantized model before using it + try: + import onnx as _onnx + _m_q = _onnx.load(quant_path) + _onnx.checker.check_model(_m_q) + except Exception as _qchk: + logger.warning(f"Existing quantized ONNX invalid; deleting and falling back: {_qchk}") + try: + os.remove(quant_path) + except Exception: + pass + else: + session_path = quant_path + logger.info(f"✅ Using quantized ONNX model: {quant_path}") + else: + logger.warning("ONNX quantization not applied: quantized file not created") + # Use conservative ORT memory options to reduce RAM growth + so = ort.SessionOptions() + try: + so.enable_mem_pattern = False + so.enable_cpu_mem_arena = False + except Exception: + pass + # Enable optimal performance settings (let ONNX use all CPU cores) + try: + # Use all available CPU threads for best performance + # ONNX Runtime will automatically use optimal thread count + so.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_EXTENDED + except Exception: + pass + # Try to create an inference session, with graceful fallbacks + try: + self.onnx_session = ort.InferenceSession(session_path, sess_options=so, providers=providers) + except Exception as e: + err = str(e) + logger.warning(f"ONNX session creation failed for {session_path}: {err}") + # If quantized path failed due to unsupported ops or invalid graph, remove it and retry unquantized + if session_path != onnx_path and ('ConvInteger' in err or 'NOT_IMPLEMENTED' in err or 'INVALID_ARGUMENT' in err): + try: + if os.path.exists(session_path): + os.remove(session_path) + logger.info(f"🧹 Deleted invalid quantized model: {session_path}") + except Exception: + pass + try: + logger.info("Retrying with unquantized ONNX model...") + self.onnx_session = ort.InferenceSession(onnx_path, sess_options=so, providers=providers) + session_path = onnx_path + except Exception as e2: + logger.warning(f"Unquantized ONNX session failed with current providers: {e2}") + # As a last resort, try CPU-only + try: + logger.info("Retrying ONNX on CPUExecutionProvider only...") + self.onnx_session = ort.InferenceSession(onnx_path, sess_options=so, providers=['CPUExecutionProvider']) + session_path = onnx_path + providers = ['CPUExecutionProvider'] + except Exception as e3: + logger.error(f"Failed to create ONNX session on CPU: {e3}") + raise + else: + # If we weren't quantized but failed on CUDA, try CPU-only once + if self.use_gpu and 'NOT_IMPLEMENTED' in err: + try: + logger.info("Retrying ONNX on CPUExecutionProvider only...") + self.onnx_session = ort.InferenceSession(session_path, sess_options=so, providers=['CPUExecutionProvider']) + providers = ['CPUExecutionProvider'] + except Exception as e4: + logger.error(f"Failed to create ONNX session on CPU: {e4}") + raise + + # Get input/output names + if self.onnx_session is None: + raise RuntimeError("ONNX session was not created") + self.onnx_input_names = [i.name for i in self.onnx_session.get_inputs()] + self.onnx_output_names = [o.name for o in self.onnx_session.get_outputs()] + + # Check input shapes to detect fixed-size models + input_shape = self.onnx_session.get_inputs()[0].shape + if len(input_shape) == 4 and input_shape[2] == 512 and input_shape[3] == 512: + self.onnx_fixed_size = (512, 512) + logger.info(f" Model expects fixed size: 512x512") + + # Log success with I/O info in a single line + logger.debug(f"✅ ONNX session created - Inputs: {self.onnx_input_names}, Outputs: {self.onnx_output_names}") + + self.use_onnx = True + return True + + except Exception as e: + logger.error(f"❌ Failed to load ONNX: {e}") + import traceback + logger.debug(f"ONNX load traceback: {traceback.format_exc()}") + self.use_onnx = False + self.model_loaded = False + return False + + def _convert_checkpoint_key(self, key): + """Convert checkpoint key format to model format""" + # model.24.weight -> model_24.weight + if re.match(r'^model\.(\d+)\.(weight|bias|running_mean|running_var)$', key): + return re.sub(r'model\.(\d+)\.', r'model_\1.', key) + + # model.5.conv1.ffc.weight -> model_5_conv1_ffc.weight + if key.startswith('model.'): + parts = key.split('.') + if parts[-1] in ['weight', 'bias', 'running_mean', 'running_var']: + return '_'.join(parts[:-1]).replace('model_', 'model_') + '.' + parts[-1] + + return key.replace('.', '_') + + def _load_weights_with_mapping(self, model, state_dict): + """Load weights with proper mapping""" + model_dict = model.state_dict() + + logger.info(f"📊 Model expects {len(model_dict)} weights") + logger.info(f"📊 Checkpoint has {len(state_dict)} weights") + + # Filter out num_batches_tracked + actual_weights = {k: v for k, v in state_dict.items() if 'num_batches_tracked' not in k} + logger.info(f" Actual weights: {len(actual_weights)}") + + mapped = {} + unmapped_ckpt = [] + unmapped_model = list(model_dict.keys()) + + # Map checkpoint weights + for ckpt_key, ckpt_val in actual_weights.items(): + success = False + converted_key = self._convert_checkpoint_key(ckpt_key) + + if converted_key in model_dict: + target_shape = model_dict[converted_key].shape + + if target_shape == ckpt_val.shape: + mapped[converted_key] = ckpt_val + success = True + elif len(ckpt_val.shape) == 4 and len(target_shape) == 4: + # 4D permute for decoder convs + permuted = ckpt_val.permute(1, 0, 2, 3) + if target_shape == permuted.shape: + mapped[converted_key] = permuted + logger.info(f" ✅ Permuted: {ckpt_key}") + success = True + elif len(ckpt_val.shape) == 2 and len(target_shape) == 2: + # 2D transpose + transposed = ckpt_val.transpose(0, 1) + if target_shape == transposed.shape: + mapped[converted_key] = transposed + success = True + + if success and converted_key in unmapped_model: + unmapped_model.remove(converted_key) + + if not success: + unmapped_ckpt.append(ckpt_key) + + # Try fallback mapping for unmapped + if unmapped_ckpt: + logger.info(f" 🔧 Fallback mapping for {len(unmapped_ckpt)} weights...") + for ckpt_key in unmapped_ckpt[:]: + ckpt_val = actual_weights[ckpt_key] + for model_key in unmapped_model[:]: + if model_dict[model_key].shape == ckpt_val.shape: + if ('weight' in ckpt_key and 'weight' in model_key) or \ + ('bias' in ckpt_key and 'bias' in model_key): + mapped[model_key] = ckpt_val + unmapped_model.remove(model_key) + unmapped_ckpt.remove(ckpt_key) + logger.info(f" ✅ Mapped: {ckpt_key} -> {model_key}") + break + + # Initialize missing weights + complete_dict = model_dict.copy() + complete_dict.update(mapped) + + for key in unmapped_model: + param = complete_dict[key] + if 'weight' in key: + if 'conv' in key.lower(): + nn.init.kaiming_normal_(param, mode='fan_out', nonlinearity='relu') + else: + nn.init.xavier_uniform_(param) + elif 'bias' in key: + nn.init.zeros_(param) + elif 'running_mean' in key: + nn.init.zeros_(param) + elif 'running_var' in key: + nn.init.ones_(param) + + # Report + logger.info(f"✅ Mapped {len(actual_weights) - len(unmapped_ckpt)}/{len(actual_weights)} checkpoint weights") + logger.info(f" Filled {len(mapped)}/{len(model_dict)} model positions") + + if unmapped_model: + pct = (len(unmapped_model) / len(model_dict)) * 100 + logger.info(f" ⚠️ Initialized {len(unmapped_model)} missing weights ({pct:.1f}%)") + if pct > 20: + logger.warning(" ⚠️ May produce artifacts - checkpoint is incomplete") + logger.warning(" 💡 Consider downloading JIT model for better quality:") + logger.warning(f" inpainter.download_jit_model('{self.current_method or 'lama'}')") + + model.load_state_dict(complete_dict, strict=True) + return True + + def download_jit_model(self, method: str) -> str: + """Download JIT model for a method""" + if method in LAMA_JIT_MODELS: + model_info = LAMA_JIT_MODELS[method] + logger.info(f"📥 Downloading {model_info['name']}...") + + try: + model_path = download_model(model_info['url'], model_info['md5']) + return model_path + except Exception as e: + logger.error(f"Failed to download {method}: {e}") + else: + logger.warning(f"No JIT model available for {method}") + + return None + + def load_model(self, method, model_path, force_reload=False): + """Load model - supports both JIT and checkpoint files with ONNX conversion""" + try: + if not TORCH_AVAILABLE: + logger.warning("PyTorch not available in this build") + logger.info("Inpainting features will be disabled - this is normal for lightweight builds") + logger.info("The application will continue to work without local inpainting") + self.model_loaded = False + return False + + # Additional safety check for torch being None + if torch is None or nn is None: + logger.warning("PyTorch modules not properly loaded") + logger.info("Inpainting features will be disabled - this is normal for lightweight builds") + self.model_loaded = False + return False + + # Check if model path changed - but only if we had a previous path saved + current_saved_path = self.config.get(f'{method}_model_path', '') + if current_saved_path and current_saved_path != model_path: + logger.info(f"📍 Model path changed for {method}") + logger.info(f" Old: {current_saved_path}") + logger.info(f" New: {model_path}") + force_reload = True + + if not os.path.exists(model_path): + # Try to auto-download JIT model if path doesn't exist + logger.warning(f"Model not found: {model_path}") + logger.info("Attempting to download JIT model...") + + try: + jit_path = self.download_jit_model(method) + if jit_path and os.path.exists(jit_path): + model_path = jit_path + logger.info(f"Using downloaded JIT model: {jit_path}") + else: + logger.error(f"Model not found and download failed: {model_path}") + logger.info("Inpainting will be unavailable for this session") + return False + except Exception as download_error: + logger.error(f"Download failed: {download_error}") + logger.info("Inpainting will be unavailable for this session") + return False + + # Check if already loaded in THIS instance + if self.model_loaded and self.current_method == method and not force_reload: + # Additional check for ONNX - make sure the session exists + if self.use_onnx and self.onnx_session is not None: + logger.debug(f"✅ {method.upper()} ONNX already loaded (skipping reload)") + return True + elif not self.use_onnx and self.model is not None: + logger.debug(f"✅ {method.upper()} already loaded (skipping reload)") + return True + else: + # Model claims to be loaded but objects are missing - force reload + logger.warning(f"⚠️ Model claims loaded but session/model object is None - forcing reload") + force_reload = True + self.model_loaded = False + + # Clear previous model if force reload + if force_reload: + logger.info(f"🔄 Force reloading {method} model...") + self.model = None + self.onnx_session = None + self.model_loaded = False + self.is_jit_model = False + # Only log loading message when actually loading + logger.info(f"📥 Loading {method} from {model_path}") + elif self.model_loaded and self.current_method != method: + # If we have a model loaded but it's a different method, clear it + logger.info(f"🔄 Switching from {self.current_method} to {method}") + self.model = None + self.onnx_session = None + self.model_loaded = False + self.is_jit_model = False + # Only log loading message when actually loading + logger.info(f"📥 Loading {method} from {model_path}") + elif not self.model_loaded: + # Only log when we're actually going to load + logger.info(f"📥 Loading {method} from {model_path}") + # else: model is loaded and current, no logging needed + + # Normalize path and enforce expected extension for certain methods + try: + _ext = os.path.splitext(model_path)[1].lower() + _method_lower = str(method).lower() + # For explicit ONNX methods, ensure we use a .onnx path + if _method_lower in ("lama_onnx", "anime_onnx", "aot_onnx") and _ext != ".onnx": + # If the file exists, try to detect if it's actually an ONNX model and correct the extension + if os.path.exists(model_path) and ONNX_AVAILABLE: + try: + import onnx as _onnx + _ = _onnx.load(model_path) # will raise if not ONNX + # Build a corrected path under the ONNX cache dir + base_name = os.path.splitext(os.path.basename(model_path))[0] + if base_name.endswith('.pt'): + base_name = base_name[:-3] + corrected_path = os.path.join(ONNX_CACHE_DIR, base_name + ".onnx") + # Avoid overwriting a valid file with an invalid one + if model_path != corrected_path: + try: + import shutil as _shutil + _shutil.copy2(model_path, corrected_path) + model_path = corrected_path + logger.info(f"🔧 Corrected ONNX model extension/path: {model_path}") + except Exception as _cp_e: + # As a fallback, try in-place rename to .onnx + try: + in_place = os.path.splitext(model_path)[0] + ".onnx" + os.replace(model_path, in_place) + model_path = in_place + logger.info(f"🔧 Renamed ONNX model to: {model_path}") + except Exception: + logger.warning(f"Could not correct ONNX extension automatically: {_cp_e}") + except Exception: + # Not an ONNX file; leave as-is + pass + # If the path doesn't exist or still wrong, prefer the known ONNX download for this method + if (not os.path.exists(model_path)) or (os.path.splitext(model_path)[1].lower() != ".onnx"): + try: + # Download the appropriate ONNX model based on the method + if _method_lower == "anime_onnx": + _dl = self.download_jit_model("anime_onnx") + elif _method_lower == "aot_onnx": + _dl = self.download_jit_model("aot_onnx") + else: + _dl = self.download_jit_model("lama_onnx") + if _dl and os.path.exists(_dl): + model_path = _dl + logger.info(f"🔧 Using downloaded {_method_lower.upper()} model: {model_path}") + except Exception: + pass + except Exception: + pass + + # Check file signature to detect ONNX files (even with wrong extension) + # or check file extension + ext = model_path.lower().split('.')[-1] + is_onnx = False + + # Check by file signature + try: + with open(model_path, 'rb') as f: + file_header = f.read(8) + if file_header.startswith(b'\x08'): + is_onnx = True + logger.debug("📦 Detected ONNX file signature") + except Exception: + pass + + # Check by extension + if ext == 'onnx': + is_onnx = True + + # Handle ONNX files + if is_onnx: + # Note: load_onnx_model will handle its own logging + try: + onnx_load_result = self.load_onnx_model(model_path) + if onnx_load_result: + # CRITICAL: Set model_loaded flag FIRST before any other operations + # This ensures concurrent threads see the correct state immediately + self.model_loaded = True + self.use_onnx = True + self.is_jit_model = False + # Ensure aot_onnx is properly set as current method + if 'aot' in method.lower(): + self.current_method = 'aot_onnx' + else: + self.current_method = method + # Save with BOTH key formats for compatibility (non-critical - do last) + try: + self.config[f'{method}_model_path'] = model_path + self.config[f'manga_{method}_model_path'] = model_path + self._save_config() + except Exception as cfg_err: + logger.debug(f"Config save after ONNX load failed (non-critical): {cfg_err}") + logger.info(f"✅ {method.upper()} ONNX loaded with method: {self.current_method}") + # Double-check model_loaded flag is still set + if not self.model_loaded: + logger.error("❌ CRITICAL: model_loaded flag was unset after successful ONNX load!") + self.model_loaded = True + return True + else: + logger.error("Failed to load ONNX model - load_onnx_model returned False") + self.model_loaded = False + return False + except Exception as onnx_err: + logger.error(f"Exception during ONNX model loading: {onnx_err}") + import traceback + logger.debug(traceback.format_exc()) + self.model_loaded = False + return False + + # Check if it's a JIT model (.pt) or checkpoint (.ckpt/.pth) + if model_path.endswith('.pt'): + try: + # Try loading as JIT/TorchScript + logger.info("📦 Attempting to load as JIT model...") + self.model = torch.jit.load(model_path, map_location=self.device or 'cpu') + self.model.eval() + + if self.use_gpu and self.device: + try: + self.model = self.model.to(self.device) + except Exception as gpu_error: + logger.warning(f"Could not move model to GPU: {gpu_error}") + logger.info("Using CPU instead") + + self.is_jit_model = True + self.model_loaded = True + self.current_method = method + logger.info("✅ JIT model loaded successfully!") + time.sleep(0.1) # Brief pause for stability + logger.debug("💤 JIT model loading pausing briefly for stability") + + # Optional FP16 precision on GPU to reduce VRAM + if self.quantize_enabled and self.use_gpu: + try: + if self.torch_precision in ('fp16', 'auto'): + self.model = self.model.half() + logger.info("🔻 Applied FP16 precision to inpainting model (GPU)") + else: + logger.info("Torch precision set to fp32; skipping half()") + except Exception as _e: + logger.warning(f"Could not switch inpainting model precision: {_e}") + + # Optional INT8 dynamic quantization for CPU TorchScript (best-effort) + if (self.int8_enabled or (self.quantize_enabled and not self.use_gpu and self.torch_precision in ('auto', 'int8'))) and not self.use_gpu: + try: + applied = False + # Try TorchScript dynamic quantization API (older PyTorch) + try: + from torch.quantization import quantize_dynamic_jit # type: ignore + self.model = quantize_dynamic_jit(self.model, {"aten::linear"}, dtype=torch.qint8) # type: ignore + applied = True + except Exception: + pass + # Try eager-style dynamic quantization on the scripted module (may no-op) + if not applied: + try: + import torch.ao.quantization as tq # type: ignore + self.model = tq.quantize_dynamic(self.model, {nn.Linear}, dtype=torch.qint8) # type: ignore + applied = True + except Exception: + pass + # Always try to optimize TorchScript for inference + try: + self.model = torch.jit.optimize_for_inference(self.model) # type: ignore + except Exception: + pass + if applied: + logger.info("🔻 Applied INT8 dynamic quantization to JIT inpainting model (CPU)") + self.torch_quantize_applied = True + else: + logger.info("ℹ️ INT8 dynamic quantization not applied (unsupported for this JIT graph); using FP32 CPU") + except Exception as _qe: + logger.warning(f"INT8 quantization skipped: {_qe}") + + # Save with BOTH key formats for compatibility + self.config[f'{method}_model_path'] = model_path + self.config[f'manga_{method}_model_path'] = model_path + self._save_config() + + # ONNX CONVERSION (optionally in background) + if AUTO_CONVERT_TO_ONNX and self.model_loaded: + def _convert_and_switch(): + try: + onnx_path = self.convert_to_onnx(model_path, method) + if onnx_path and self.load_onnx_model(onnx_path): + logger.info("🚀 Using ONNX model for inference") + else: + logger.info("📦 Using PyTorch JIT model for inference") + except Exception as onnx_error: + logger.warning(f"ONNX conversion failed: {onnx_error}") + logger.info("📦 Using PyTorch JIT model for inference") + + if os.environ.get('AUTO_CONVERT_TO_ONNX_BACKGROUND', 'true').lower() == 'true': + threading.Thread(target=_convert_and_switch, daemon=True).start() + else: + _convert_and_switch() + + return True + except Exception as jit_error: + logger.info(f" Not a JIT model, trying as regular checkpoint... ({jit_error})") + try: + checkpoint = torch.load(model_path, map_location='cpu', weights_only=False) + self.is_jit_model = False + except Exception as load_error: + logger.error(f"Failed to load checkpoint: {load_error}") + return False + else: + # Load as regular checkpoint + try: + checkpoint = torch.load(model_path, map_location='cpu', weights_only=False) + self.is_jit_model = False + except Exception as load_error: + logger.error(f"Failed to load checkpoint: {load_error}") + logger.info("This may happen if PyTorch is not fully available in the .exe build") + return False + + # If we get here, it's not JIT, so load as checkpoint + if not self.is_jit_model: + try: + # Try to create the model - this might fail if nn.Module is None + self.model = FFCInpaintModel() + + if isinstance(checkpoint, dict): + if 'gen_state_dict' in checkpoint: + state_dict = checkpoint['gen_state_dict'] + logger.info("📦 Found gen_state_dict") + elif 'state_dict' in checkpoint: + state_dict = checkpoint['state_dict'] + elif 'model' in checkpoint: + state_dict = checkpoint['model'] + else: + state_dict = checkpoint + else: + state_dict = checkpoint + + self._load_weights_with_mapping(self.model, state_dict) + + self.model.eval() + if self.use_gpu and self.device: + try: + self.model = self.model.to(self.device) + except Exception as gpu_error: + logger.warning(f"Could not move model to GPU: {gpu_error}") + logger.info("Using CPU instead") + + # Optional INT8 dynamic quantization for CPU eager model + if (self.int8_enabled or (self.quantize_enabled and not self.use_gpu and self.torch_precision in ('auto', 'int8'))) and not self.use_gpu: + try: + import torch.ao.quantization as tq # type: ignore + self.model = tq.quantize_dynamic(self.model, {nn.Linear}, dtype=torch.qint8) # type: ignore + logger.info("🔻 Applied dynamic INT8 quantization to inpainting model (CPU)") + self.torch_quantize_applied = True + except Exception as qe: + logger.warning(f"INT8 dynamic quantization not applied: {qe}") + + except Exception as model_error: + logger.error(f"Failed to create or initialize model: {model_error}") + logger.info("This may happen if PyTorch neural network modules are not available in the .exe build") + return False + + self.model_loaded = True + self.current_method = method + + self.config[f'{method}_model_path'] = model_path + self._save_config() + + logger.info(f"✅ {method.upper()} loaded!") + + # ONNX CONVERSION (optionally in background) + if AUTO_CONVERT_TO_ONNX and model_path.endswith('.pt') and self.model_loaded: + def _convert_and_switch(): + try: + onnx_path = self.convert_to_onnx(model_path, method) + if onnx_path and self.load_onnx_model(onnx_path): + logger.info("🚀 Using ONNX model for inference") + except Exception as onnx_error: + logger.warning(f"ONNX conversion failed: {onnx_error}") + logger.info("📦 Continuing with PyTorch model") + + if os.environ.get('AUTO_CONVERT_TO_ONNX_BACKGROUND', 'true').lower() == 'true': + threading.Thread(target=_convert_and_switch, daemon=True).start() + else: + _convert_and_switch() + + return True + + except Exception as e: + logger.error(f"❌ Failed to load model: {e}") + logger.error(traceback.format_exc()) + logger.info("Note: If running from .exe, some ML libraries may not be included") + logger.info("This is normal for lightweight builds - inpainting will be disabled") + self.model_loaded = False + return False + + def load_model_with_retry(self, method, model_path, force_reload=False, retries: int = 2, retry_delay: float = 0.5) -> bool: + """Attempt to load a model with retries. + Returns True if loaded; False if all attempts fail. On failure, the inpainter will safely no-op. + """ + try: + attempts = max(0, int(retries)) + 1 + except Exception: + attempts = 1 + for attempt in range(attempts): + try: + ok = self.load_model(method, model_path, force_reload=force_reload) + if ok: + return True + except Exception as e: + logger.warning(f"Load attempt {attempt+1} failed with exception: {e}") + # brief delay before next try + if attempt < attempts - 1: + try: + time.sleep(max(0.0, float(retry_delay))) + except Exception: + pass + # If we reach here, loading failed. Leave model unloaded so inpaint() no-ops and returns original image. + logger.warning("All load attempts failed; local inpainting will fall back to returning original images (no-op)") + self.model_loaded = False + # Keep current_method for logging/context if provided + try: + self.current_method = method + except Exception: + pass + return False + + def unload(self): + """Release all heavy resources held by this inpainter instance.""" + try: + # Release ONNX session and metadata + try: + if self.onnx_session is not None: + self.onnx_session = None + except Exception: + pass + for attr in ['onnx_input_names', 'onnx_output_names', 'current_onnx_path', 'onnx_fixed_size']: + try: + if hasattr(self, attr): + setattr(self, attr, None) + except Exception: + pass + + # Release PyTorch model + try: + if self.model is not None: + if TORCH_AVAILABLE and torch is not None: + try: + # Move to CPU then drop reference + self.model = self.model.to('cpu') if hasattr(self.model, 'to') else None + except Exception: + pass + self.model = None + except Exception: + pass + + # Drop bubble detector reference (not the global cache) + try: + self.bubble_detector = None + except Exception: + pass + + # Update flags + self.model_loaded = False + self.use_onnx = False + self.is_jit_model = False + + # Free CUDA cache and trigger GC + try: + if TORCH_AVAILABLE and torch is not None and torch.cuda.is_available(): + torch.cuda.empty_cache() + except Exception: + pass + try: + import gc + gc.collect() + except Exception: + pass + except Exception: + # Never raise from unload + pass + + def pad_img_to_modulo(self, img: np.ndarray, mod: int) -> Tuple[np.ndarray, Tuple[int, int, int, int]]: + """Pad image to be divisible by mod""" + if len(img.shape) == 2: + height, width = img.shape + else: + height, width = img.shape[:2] + + pad_height = (mod - height % mod) % mod + pad_width = (mod - width % mod) % mod + + pad_top = pad_height // 2 + pad_bottom = pad_height - pad_top + pad_left = pad_width // 2 + pad_right = pad_width - pad_left + + if len(img.shape) == 2: + padded = np.pad(img, ((pad_top, pad_bottom), (pad_left, pad_right)), mode='reflect') + else: + padded = np.pad(img, ((pad_top, pad_bottom), (pad_left, pad_right), (0, 0)), mode='reflect') + + return padded, (pad_top, pad_bottom, pad_left, pad_right) + + def remove_padding(self, img: np.ndarray, padding: Tuple[int, int, int, int]) -> np.ndarray: + """Remove padding from image""" + pad_top, pad_bottom, pad_left, pad_right = padding + + if len(img.shape) == 2: + return img[pad_top:img.shape[0]-pad_bottom, pad_left:img.shape[1]-pad_right] + else: + return img[pad_top:img.shape[0]-pad_bottom, pad_left:img.shape[1]-pad_right, :] + + def _inpaint_tiled(self, image, mask, tile_size, overlap, refinement='normal'): + """Process image in tiles""" + orig_h, orig_w = image.shape[:2] + result = image.copy() + + # Calculate tile positions + for y in range(0, orig_h, tile_size - overlap): + for x in range(0, orig_w, tile_size - overlap): + # Calculate tile boundaries + x_end = min(x + tile_size, orig_w) + y_end = min(y + tile_size, orig_h) + + # Adjust start to ensure full tile size if possible + if x_end - x < tile_size and x > 0: + x = max(0, x_end - tile_size) + if y_end - y < tile_size and y > 0: + y = max(0, y_end - tile_size) + + # Extract tile + tile_img = image[y:y_end, x:x_end] + tile_mask = mask[y:y_end, x:x_end] + + # Skip if no inpainting needed + if np.sum(tile_mask) == 0: + continue + + # Process this tile with the actual model + processed_tile = self._process_single_tile(tile_img, tile_mask, tile_size, refinement) + + # Auto-retry for tile if no visible change + try: + if self._is_noop(tile_img, processed_tile, tile_mask): + kernel = np.ones((3, 3), np.uint8) + expanded = cv2.dilate(tile_mask, kernel, iterations=1) + processed_retry = self._process_single_tile(tile_img, expanded, tile_size, 'fast') + if self._is_noop(tile_img, processed_retry, expanded): + logger.warning("Tile remained unchanged after retry; proceeding without further fallback") + processed_tile = processed_retry + else: + processed_tile = processed_retry + except Exception as e: + logger.debug(f"Tiled no-op detection error: {e}") + + # Blend tile back into result + if overlap > 0 and (x > 0 or y > 0): + result[y:y_end, x:x_end] = self._blend_tile( + result[y:y_end, x:x_end], + processed_tile, + x > 0, + y > 0, + overlap + ) + else: + result[y:y_end, x:x_end] = processed_tile + + logger.info(f"✅ Tiled inpainting complete ({orig_w}x{orig_h} in {tile_size}x{tile_size} tiles)") + time.sleep(0.1) # Brief pause for stability + logger.debug("💤 Tiled inpainting completion pausing briefly for stability") + return result + + def _process_single_tile(self, tile_img, tile_mask, tile_size, refinement): + """Process a single tile without tiling""" + # Temporarily disable tiling + old_tiling = self.tiling_enabled + self.tiling_enabled = False + result = self.inpaint(tile_img, tile_mask, refinement, _skip_hd=True) + self.tiling_enabled = old_tiling + return result + + def _blend_tile(self, existing, new_tile, blend_x, blend_y, overlap): + """Blend a tile with existing result""" + if not blend_x and not blend_y: + # No blending needed for first tile + return new_tile + + h, w = new_tile.shape[:2] + result = new_tile.copy() + + # Create blend weights + if blend_x and overlap > 0 and w > overlap: + # Horizontal blend on left edge + for i in range(overlap): + alpha = i / overlap + result[:, i] = existing[:, i] * (1 - alpha) + new_tile[:, i] * alpha + + if blend_y and overlap > 0 and h > overlap: + # Vertical blend on top edge + for i in range(overlap): + alpha = i / overlap + result[i, :] = existing[i, :] * (1 - alpha) + new_tile[i, :] * alpha + + return result + + def _is_noop(self, original: np.ndarray, result: np.ndarray, mask: np.ndarray, threshold: float = 0.75) -> bool: + """Return True if inpainting produced negligible change within the masked area.""" + try: + if original is None or result is None: + return True + if original.shape != result.shape: + return False + # Normalize mask to single channel boolean + if mask is None: + return False + if len(mask.shape) == 3: + mask_gray = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY) + else: + mask_gray = mask + m = mask_gray > 0 + if not np.any(m): + return False + # Fast path + if np.array_equal(original, result): + return True + diff = cv2.absdiff(result, original) + if len(diff.shape) == 3: + diff_gray = cv2.cvtColor(diff, cv2.COLOR_BGR2GRAY) + else: + diff_gray = diff + mean_diff = float(np.mean(diff_gray[m])) + return mean_diff < threshold + except Exception as e: + logger.debug(f"No-op detection failed: {e}") + return False + + def _is_white_paste(self, result: np.ndarray, mask: np.ndarray, white_threshold: int = 245, ratio: float = 0.90) -> bool: + """Detect 'white paste' failure: masked area mostly saturated near white.""" + try: + if result is None or mask is None: + return False + if len(mask.shape) == 3: + mask_gray = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY) + else: + mask_gray = mask + m = mask_gray > 0 + if not np.any(m): + return False + if len(result.shape) == 3: + white = (result[..., 0] >= white_threshold) & (result[..., 1] >= white_threshold) & (result[..., 2] >= white_threshold) + else: + white = result >= white_threshold + count_mask = int(np.count_nonzero(m)) + count_white = int(np.count_nonzero(white & m)) + if count_mask == 0: + return False + frac = count_white / float(count_mask) + return frac >= ratio + except Exception as e: + logger.debug(f"White paste detection failed: {e}") + return False + + def _log_inpaint_diag(self, path: str, result: np.ndarray, mask: np.ndarray): + try: + h, w = result.shape[:2] + if len(result.shape) == 3: + stats = (float(result.min()), float(result.max()), float(result.mean())) + else: + stats = (float(result.min()), float(result.max()), float(result.mean())) + logger.info(f"[Diag] Path={path} onnx_quant={self.onnx_quantize_applied} torch_quant={self.torch_quantize_applied} size={w}x{h} stats(min,max,mean)={stats}") + if self._is_white_paste(result, mask): + logger.warning(f"[Diag] White-paste detected (mask>0 mostly white)") + except Exception as e: + logger.debug(f"Diag log failed: {e}") + + def inpaint(self, image, mask, refinement='normal', _retry_attempt: int = 0, _skip_hd: bool = False, _skip_tiling: bool = False): + """Inpaint - compatible with JIT, checkpoint, and ONNX models + Implements HD strategy (Resize/Crop) similar to comic-translate to speed up large images. + """ + # Check for stop at start + if self._check_stop(): + self._log("⏹️ Inpainting stopped by user", "warning") + return image + + if not self.model_loaded: + self._log("No model loaded", "error") + return image + + try: + # Store original dimensions + orig_h, orig_w = image.shape[:2] + + # HD strategy (mirror of comic-translate): optional RESIZE or CROP before core inpainting + if not _skip_hd: + try: + strategy = getattr(self, 'hd_strategy', 'resize') or 'resize' + except Exception: + strategy = 'resize' + H, W = orig_h, orig_w + if strategy == 'resize' and max(H, W) > max(16, int(getattr(self, 'hd_resize_limit', 1536))): + limit = max(16, int(getattr(self, 'hd_resize_limit', 1536))) + ratio = float(limit) / float(max(H, W)) + new_w = max(1, int(W * ratio + 0.5)) + new_h = max(1, int(H * ratio + 0.5)) + image_small = cv2.resize(image, (new_w, new_h), interpolation=cv2.INTER_LANCZOS4) + mask_small = mask if len(mask.shape) == 2 else cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY) + mask_small = cv2.resize(mask_small, (new_w, new_h), interpolation=cv2.INTER_NEAREST) + result_small = self.inpaint(image_small, mask_small, refinement, 0, _skip_hd=True, _skip_tiling=True) + result_full = cv2.resize(result_small, (W, H), interpolation=cv2.INTER_LANCZOS4) + # Paste only masked area + mask_gray = mask_small # already gray but at small size + mask_gray = cv2.resize(mask_gray, (W, H), interpolation=cv2.INTER_NEAREST) + m = mask_gray > 0 + out = image.copy() + out[m] = result_full[m] + return out + elif strategy == 'crop' and max(H, W) > max(16, int(getattr(self, 'hd_crop_trigger_size', 1024))): + mask_gray0 = mask if len(mask.shape) == 2 else cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY) + _, thresh = cv2.threshold(mask_gray0, 127, 255, cv2.THRESH_BINARY) + contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) + if contours: + out = image.copy() + margin = max(0, int(getattr(self, 'hd_crop_margin', 16))) + for cnt in contours: + x, y, w, h = cv2.boundingRect(cnt) + l = max(0, x - margin); t = max(0, y - margin) + r = min(W, x + w + margin); b = min(H, y + h + margin) + if r <= l or b <= t: + continue + crop_img = image[t:b, l:r] + crop_mask = mask_gray0[t:b, l:r] + patch = self.inpaint(crop_img, crop_mask, refinement, 0, _skip_hd=True, _skip_tiling=True) + out[t:b, l:r] = patch + return out + + if len(mask.shape) == 3: + mask = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY) + + # Apply dilation for anime method + if self.current_method == 'anime': + kernel = np.ones((7, 7), np.uint8) + mask = cv2.dilate(mask, kernel, iterations=1) + + # Use instance tiling settings for ALL models + logger.info(f"🔍 Tiling check: enabled={self.tiling_enabled}, tile_size={self.tile_size}, image_size={orig_h}x{orig_w}") + + # If tiling is enabled and image is larger than tile size + if (not _skip_tiling) and self.tiling_enabled and (orig_h > self.tile_size or orig_w > self.tile_size): + logger.info(f"🔲 Using tiled inpainting: {self.tile_size}x{self.tile_size} tiles with {self.tile_overlap}px overlap") + return self._inpaint_tiled(image, mask, self.tile_size, self.tile_overlap, refinement) + + # ONNX inference path + if self.use_onnx and self.onnx_session: + logger.debug("Using ONNX inference") + + # CRITICAL: Convert BGR (OpenCV default) to RGB (ML model expected) + image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + + # Check if this is a Carve model + is_carve_model = False + if hasattr(self, 'current_onnx_path'): + is_carve_model = "lama_fp32" in self.current_onnx_path or "carve" in self.current_onnx_path.lower() + + # Handle fixed-size models (resize instead of padding) + if hasattr(self, 'onnx_fixed_size') and self.onnx_fixed_size: + fixed_h, fixed_w = self.onnx_fixed_size + # Resize to fixed size + image_resized = cv2.resize(image_rgb, (fixed_w, fixed_h), interpolation=cv2.INTER_LANCZOS4) + mask_resized = cv2.resize(mask, (fixed_w, fixed_h), interpolation=cv2.INTER_NEAREST) + + # Prepare inputs based on model type + if is_carve_model: + # Carve model expects normalized input [0, 1] range + logger.debug("Using Carve model normalization [0, 1]") + img_np = image_resized.astype(np.float32) / 255.0 + mask_np = mask_resized.astype(np.float32) / 255.0 + mask_np = (mask_np > 0.5) * 1.0 # Binary mask + elif self.current_method == 'aot' or 'aot' in str(self.current_method).lower(): + # AOT normalization: [-1, 1] range for image + logger.debug("Using AOT model normalization [-1, 1] for image, [0, 1] for mask") + img_np = (image_resized.astype(np.float32) / 127.5) - 1.0 + mask_np = mask_resized.astype(np.float32) / 255.0 + mask_np = (mask_np > 0.5) * 1.0 # Binary mask + img_np = img_np * (1 - mask_np[:, :, np.newaxis]) # Mask out regions + else: + # Standard LaMa normalization: [0, 1] range + logger.debug("Using standard LaMa normalization [0, 1]") + img_np = image_resized.astype(np.float32) / 255.0 + mask_np = mask_resized.astype(np.float32) / 255.0 + mask_np = (mask_np > 0) * 1.0 + + # Convert to NCHW format + img_np = img_np.transpose(2, 0, 1)[np.newaxis, ...] + mask_np = mask_np[np.newaxis, np.newaxis, ...] + + # Run ONNX inference + ort_inputs = { + self.onnx_input_names[0]: img_np.astype(np.float32), + self.onnx_input_names[1]: mask_np.astype(np.float32) + } + + ort_outputs = self.onnx_session.run(self.onnx_output_names, ort_inputs) + output = ort_outputs[0] + + # Post-process output based on model type + if is_carve_model: + # CRITICAL: Carve model outputs values ALREADY in [0, 255] range! + # DO NOT multiply by 255 or apply any scaling + logger.debug("Carve model output is already in [0, 255] range") + raw_output = output[0].transpose(1, 2, 0) + logger.debug(f"Carve output stats: min={raw_output.min():.3f}, max={raw_output.max():.3f}, mean={raw_output.mean():.3f}") + result = raw_output # Just transpose, no scaling + elif self.current_method == 'aot' or 'aot' in str(self.current_method).lower(): + # AOT: [-1, 1] to [0, 255] + result = ((output[0].transpose(1, 2, 0) + 1.0) * 127.5) + else: + # Standard: [0, 1] to [0, 255] + result = output[0].transpose(1, 2, 0) * 255 + + result = np.clip(np.round(result), 0, 255).astype(np.uint8) + # CRITICAL: Convert RGB (model output) back to BGR (OpenCV expected) + result = cv2.cvtColor(result, cv2.COLOR_RGB2BGR) + + # Resize back to original size + result = cv2.resize(result, (orig_w, orig_h), interpolation=cv2.INTER_LANCZOS4) + self._log_inpaint_diag('onnx-fixed', result, mask) + + else: + # Variable-size models (use padding) + image_padded, padding = self.pad_img_to_modulo(image_rgb, self.pad_mod) + mask_padded, _ = self.pad_img_to_modulo(mask, self.pad_mod) + + # Prepare inputs based on model type + if is_carve_model: + # Carve model normalization [0, 1] + logger.debug("Using Carve model normalization [0, 1]") + img_np = image_padded.astype(np.float32) / 255.0 + mask_np = mask_padded.astype(np.float32) / 255.0 + mask_np = (mask_np > 0.5) * 1.0 + elif self.current_method == 'aot' or 'aot' in str(self.current_method).lower(): + # AOT normalization: [-1, 1] for image + logger.debug("Using AOT model normalization [-1, 1] for image, [0, 1] for mask") + img_np = (image_padded.astype(np.float32) / 127.5) - 1.0 + mask_np = mask_padded.astype(np.float32) / 255.0 + mask_np = (mask_np > 0.5) * 1.0 + img_np = img_np * (1 - mask_np[:, :, np.newaxis]) # Mask out regions + else: + # Standard LaMa normalization: [0, 1] + logger.debug("Using standard LaMa normalization [0, 1]") + img_np = image_padded.astype(np.float32) / 255.0 + mask_np = mask_padded.astype(np.float32) / 255.0 + mask_np = (mask_np > 0) * 1.0 + + # Convert to NCHW format + img_np = img_np.transpose(2, 0, 1)[np.newaxis, ...] + mask_np = mask_np[np.newaxis, np.newaxis, ...] + + # Check for stop before inference + if self._check_stop(): + self._log("⏹️ ONNX inference stopped by user", "warning") + return image + + # Run ONNX inference + ort_inputs = { + self.onnx_input_names[0]: img_np.astype(np.float32), + self.onnx_input_names[1]: mask_np.astype(np.float32) + } + + ort_outputs = self.onnx_session.run(self.onnx_output_names, ort_inputs) + output = ort_outputs[0] + + # Post-process output + if is_carve_model: + # CRITICAL: Carve model outputs values ALREADY in [0, 255] range! + logger.debug("Carve model output is already in [0, 255] range") + raw_output = output[0].transpose(1, 2, 0) + logger.debug(f"Carve output stats: min={raw_output.min():.3f}, max={raw_output.max():.3f}, mean={raw_output.mean():.3f}") + result = raw_output # Just transpose, no scaling + elif self.current_method == 'aot' or 'aot' in str(self.current_method).lower(): + result = ((output[0].transpose(1, 2, 0) + 1.0) * 127.5) + else: + result = output[0].transpose(1, 2, 0) * 255 + + result = np.clip(np.round(result), 0, 255).astype(np.uint8) + # CRITICAL: Convert RGB (model output) back to BGR (OpenCV expected) + result = cv2.cvtColor(result, cv2.COLOR_RGB2BGR) + + # Remove padding + result = self.remove_padding(result, padding) + self._log_inpaint_diag('onnx-padded', result, mask) + + elif self.is_jit_model: + # JIT model processing + if self.current_method == 'aot': + # Special handling for AOT model + logger.debug("Using AOT-specific preprocessing") + + # CRITICAL: Convert BGR (OpenCV) to RGB (AOT model expected) + image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + + # Pad images to be divisible by mod + image_padded, padding = self.pad_img_to_modulo(image_rgb, self.pad_mod) + mask_padded, _ = self.pad_img_to_modulo(mask, self.pad_mod) + + # AOT normalization: [-1, 1] range + img_torch = torch.from_numpy(image_padded).permute(2, 0, 1).unsqueeze_(0).float() / 127.5 - 1.0 + mask_torch = torch.from_numpy(mask_padded).unsqueeze_(0).unsqueeze_(0).float() / 255.0 + + # Binarize mask for AOT + mask_torch[mask_torch < 0.5] = 0 + mask_torch[mask_torch >= 0.5] = 1 + + # Move to device + img_torch = img_torch.to(self.device) + mask_torch = mask_torch.to(self.device) + + # Optional FP16 on GPU for lower VRAM + if self.quantize_enabled and self.use_gpu: + try: + if self.torch_precision == 'fp16' or self.torch_precision == 'auto': + img_torch = img_torch.half() + mask_torch = mask_torch.half() + except Exception: + pass + + # CRITICAL FOR AOT: Apply mask to input image + img_torch = img_torch * (1 - mask_torch) + + logger.debug(f"AOT Image shape: {img_torch.shape}, Mask shape: {mask_torch.shape}") + + # Run inference + with torch.no_grad(): + inpainted = self.model(img_torch, mask_torch) + + # Post-process AOT output: denormalize from [-1, 1] to [0, 255] + result = ((inpainted.cpu().squeeze_(0).permute(1, 2, 0).numpy() + 1.0) * 127.5) + result = np.clip(np.round(result), 0, 255).astype(np.uint8) + + # CRITICAL: Convert RGB (model output) back to BGR (OpenCV expected) + result = cv2.cvtColor(result, cv2.COLOR_RGB2BGR) + + # Remove padding + result = self.remove_padding(result, padding) + self._log_inpaint_diag('jit-aot', result, mask) + + else: + # LaMa/Anime model processing + logger.debug(f"Using standard processing for {self.current_method}") + + # CRITICAL: Convert BGR (OpenCV) to RGB (LaMa/JIT models expected) + image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + + # Pad images to be divisible by mod + image_padded, padding = self.pad_img_to_modulo(image_rgb, self.pad_mod) + mask_padded, _ = self.pad_img_to_modulo(mask, self.pad_mod) + + # CRITICAL: Normalize to [0, 1] range for LaMa models + image_norm = image_padded.astype(np.float32) / 255.0 + mask_norm = mask_padded.astype(np.float32) / 255.0 + + # Binary mask (values > 0 become 1) + mask_binary = (mask_norm > 0) * 1.0 + + # Convert to PyTorch tensors with correct shape + # Image should be [B, C, H, W] + image_tensor = torch.from_numpy(image_norm).permute(2, 0, 1).unsqueeze(0).float() + mask_tensor = torch.from_numpy(mask_binary).unsqueeze(0).unsqueeze(0).float() + + # Move to device + image_tensor = image_tensor.to(self.device) + mask_tensor = mask_tensor.to(self.device) + + # Optional FP16 on GPU for lower VRAM + if self.quantize_enabled and self.use_gpu: + try: + if self.torch_precision == 'fp16' or self.torch_precision == 'auto': + image_tensor = image_tensor.half() + mask_tensor = mask_tensor.half() + except Exception: + pass + + # Debug shapes + logger.debug(f"Image tensor shape: {image_tensor.shape}") # Should be [1, 3, H, W] + logger.debug(f"Mask tensor shape: {mask_tensor.shape}") # Should be [1, 1, H, W] + + # Ensure spatial dimensions match + if image_tensor.shape[2:] != mask_tensor.shape[2:]: + logger.warning(f"Spatial dimension mismatch: image {image_tensor.shape[2:]}, mask {mask_tensor.shape[2:]}") + # Resize mask to match image + mask_tensor = F.interpolate(mask_tensor, size=image_tensor.shape[2:], mode='nearest') + + # Run inference with proper error handling + with torch.no_grad(): + try: + # Standard LaMa JIT models expect (image, mask) + inpainted = self.model(image_tensor, mask_tensor) + except RuntimeError as e: + error_str = str(e) + logger.error(f"Model inference failed: {error_str}") + + # If tensor size mismatch, log detailed info + if "size of tensor" in error_str.lower(): + logger.error(f"Image shape: {image_tensor.shape}") + logger.error(f"Mask shape: {mask_tensor.shape}") + + # Try transposing if needed + if "dimension 3" in error_str and "880" in error_str: + # This suggests the tensors might be in wrong format + # Try different permutation + logger.info("Attempting to fix tensor format...") + + # Ensure image is [B, C, H, W] not [B, H, W, C] + if image_tensor.shape[1] > 3: + image_tensor = image_tensor.permute(0, 3, 1, 2) + logger.info(f"Permuted image to: {image_tensor.shape}") + + # Try again + inpainted = self.model(image_tensor, mask_tensor) + else: + # As last resort, try swapped arguments + logger.info("Trying swapped arguments (mask, image)...") + inpainted = self.model(mask_tensor, image_tensor) + else: + raise e + + # Process output + # Output should be [B, C, H, W] + if len(inpainted.shape) == 4: + # Remove batch dimension and permute to [H, W, C] + result = inpainted[0].permute(1, 2, 0).detach().cpu().numpy() + else: + # Handle unexpected output shape + result = inpainted.detach().cpu().numpy() + if len(result.shape) == 3 and result.shape[0] == 3: + result = result.transpose(1, 2, 0) + + # Denormalize to 0-255 range + result = np.clip(result * 255, 0, 255).astype(np.uint8) + + # CRITICAL: Convert RGB (model output) back to BGR (OpenCV expected) + result = cv2.cvtColor(result, cv2.COLOR_RGB2BGR) + + # Remove padding + result = self.remove_padding(result, padding) + self._log_inpaint_diag('jit-lama', result, mask) + + else: + # Original checkpoint model processing (keep as is) + h, w = image.shape[:2] + size = 768 if self.current_method == 'anime' else 512 + + img_resized = cv2.resize(image, (size, size), interpolation=cv2.INTER_LANCZOS4) + mask_resized = cv2.resize(mask, (size, size), interpolation=cv2.INTER_NEAREST) + + img_norm = img_resized.astype(np.float32) / 127.5 - 1 + mask_norm = mask_resized.astype(np.float32) / 255.0 + + img_tensor = torch.from_numpy(img_norm).permute(2, 0, 1).unsqueeze(0).float() + mask_tensor = torch.from_numpy(mask_norm).unsqueeze(0).unsqueeze(0).float() + + if self.use_gpu and self.device: + img_tensor = img_tensor.to(self.device) + mask_tensor = mask_tensor.to(self.device) + + with torch.no_grad(): + output = self.model(img_tensor, mask_tensor) + + result = output.squeeze(0).permute(1, 2, 0).cpu().numpy() + result = ((result + 1) * 127.5).clip(0, 255).astype(np.uint8) + result = cv2.resize(result, (w, h), interpolation=cv2.INTER_LANCZOS4) + self._log_inpaint_diag('ckpt', result, mask) + + # Ensure result matches original size exactly + if result.shape[:2] != (orig_h, orig_w): + result = cv2.resize(result, (orig_w, orig_h), interpolation=cv2.INTER_LANCZOS4) + + # Apply refinement blending if requested + if refinement != 'fast': + # Ensure mask is same size as result + if mask.shape[:2] != (orig_h, orig_w): + mask = cv2.resize(mask, (orig_w, orig_h), interpolation=cv2.INTER_NEAREST) + + mask_3ch = cv2.cvtColor(mask, cv2.COLOR_GRAY2BGR) / 255.0 + kernel = cv2.getGaussianKernel(21, 5) + kernel = kernel @ kernel.T + mask_blur = cv2.filter2D(mask_3ch, -1, kernel) + result = (result * mask_blur + image * (1 - mask_blur)).astype(np.uint8) + + # No-op detection and auto-retry + try: + if self._is_noop(image, result, mask): + if _retry_attempt == 0: + logger.warning("⚠️ Inpainting produced no visible change; retrying with slight mask dilation and fast refinement") + kernel = np.ones((3, 3), np.uint8) + expanded_mask = cv2.dilate(mask, kernel, iterations=1) + return self.inpaint(image, expanded_mask, refinement='fast', _retry_attempt=1) + elif _retry_attempt == 1: + logger.warning("⚠️ Still no visible change after retry; attempting a second dilation and fast refinement") + kernel = np.ones((5, 5), np.uint8) + expanded_mask2 = cv2.dilate(mask, kernel, iterations=1) + return self.inpaint(image, expanded_mask2, refinement='fast', _retry_attempt=2) + else: + logger.warning("⚠️ No further retries; returning last result without fallback") + except Exception as e: + logger.debug(f"No-op detection step failed: {e}") + + logger.info("✅ Inpainted successfully!") + + # Force garbage collection to reduce memory spikes + try: + import gc + gc.collect() + # Clear CUDA cache if using GPU + if torch is not None and hasattr(torch, 'cuda') and torch.cuda.is_available(): + torch.cuda.empty_cache() + except Exception: + pass + + time.sleep(0.1) # Brief pause for stability + logger.debug("💤 Inpainting completion pausing briefly for stability") + return result + + except Exception as e: + logger.error(f"❌ Inpainting failed: {e}") + logger.error(traceback.format_exc()) + + # Return original image on failure + logger.warning("Returning original image due to error") + return image + + def inpaint_with_prompt(self, image, mask, prompt=None): + """Compatibility method""" + return self.inpaint(image, mask) + + def batch_inpaint(self, images, masks): + """Batch inpainting""" + return [self.inpaint(img, mask) for img, mask in zip(images, masks)] + + def load_bubble_model(self, model_path: str) -> bool: + """Load bubble detection model""" + if not BUBBLE_DETECTOR_AVAILABLE: + logger.warning("Bubble detector not available") + return False + + if self.bubble_detector is None: + self.bubble_detector = BubbleDetector() + + if self.bubble_detector.load_model(model_path): + self.bubble_model_loaded = True + self.config['bubble_model_path'] = model_path + self._save_config() + logger.info("✅ Bubble detection model loaded") + return True + + return False + + def detect_bubbles(self, image_path: str, confidence: float = 0.5) -> List[Tuple[int, int, int, int]]: + """Detect speech bubbles in image""" + if not self.bubble_model_loaded or self.bubble_detector is None: + logger.warning("No bubble model loaded") + return [] + + return self.bubble_detector.detect_bubbles(image_path, confidence=confidence) + + def create_bubble_mask(self, image: np.ndarray, bubbles: List[Tuple[int, int, int, int]], + expand_pixels: int = 5) -> np.ndarray: + """Create mask from detected bubbles""" + h, w = image.shape[:2] + mask = np.zeros((h, w), dtype=np.uint8) + + for x, y, bw, bh in bubbles: + x1 = max(0, x - expand_pixels) + y1 = max(0, y - expand_pixels) + x2 = min(w, x + bw + expand_pixels) + y2 = min(h, y + bh + expand_pixels) + + cv2.rectangle(mask, (x1, y1), (x2, y2), 255, -1) + + return mask + + def inpaint_with_bubble_detection(self, image_path: str, confidence: float = 0.5, + expand_pixels: int = 5, refinement: str = 'normal') -> np.ndarray: + """Inpaint using automatic bubble detection""" + image = cv2.imread(image_path) + if image is None: + logger.error(f"Failed to load image: {image_path}") + return None + + bubbles = self.detect_bubbles(image_path, confidence) + if not bubbles: + logger.warning("No bubbles detected") + return image + + logger.info(f"Detected {len(bubbles)} bubbles") + + mask = self.create_bubble_mask(image, bubbles, expand_pixels) + result = self.inpaint(image, mask, refinement) + + return result + + def batch_inpaint_with_bubbles(self, image_paths: List[str], **kwargs) -> List[np.ndarray]: + """Batch inpaint multiple images with bubble detection""" + results = [] + + for i, image_path in enumerate(image_paths): + logger.info(f"Processing image {i+1}/{len(image_paths)}") + result = self.inpaint_with_bubble_detection(image_path, **kwargs) + results.append(result) + + return results + + +# Compatibility classes - MAINTAIN ALL ORIGINAL CLASSES +class LaMaModel(FFCInpaintModel): + pass + +class MATModel(FFCInpaintModel): + pass + +class AOTModel(FFCInpaintModel): + pass + +class SDInpaintModel(FFCInpaintModel): + pass + +class AnimeMangaInpaintModel(FFCInpaintModel): + pass + +class LaMaOfficialModel(FFCInpaintModel): + pass + + +class HybridInpainter: + """Hybrid inpainter for compatibility""" + + def __init__(self): + self.inpainters = {} + + def add_method(self, name, method, model_path): + """Add a method - maintains compatibility""" + try: + inpainter = LocalInpainter() + if inpainter.load_model(method, model_path): + self.inpainters[name] = inpainter + return True + except: + pass + return False + + def inpaint_ensemble(self, image: np.ndarray, mask: np.ndarray, + weights: Dict[str, float] = None) -> np.ndarray: + """Ensemble inpainting""" + if not self.inpainters: + logger.error("No inpainters loaded") + return image + + if weights is None: + weights = {name: 1.0 / len(self.inpainters) for name in self.inpainters} + + results = [] + for name, inpainter in self.inpainters.items(): + result = inpainter.inpaint(image, mask) + weight = weights.get(name, 1.0 / len(self.inpainters)) + results.append(result * weight) + + ensemble = np.sum(results, axis=0).astype(np.uint8) + return ensemble + + +# Helper function for quick setup +def setup_inpainter_for_manga(auto_download=True): + """Quick setup for manga inpainting""" + inpainter = LocalInpainter() + + if auto_download: + # Try to download anime JIT model + jit_path = inpainter.download_jit_model('anime') + if jit_path: + inpainter.load_model('anime', jit_path) + logger.info("✅ Manga inpainter ready with JIT model") + + return inpainter + + +if __name__ == "__main__": + import sys + + if len(sys.argv) > 1: + if sys.argv[1] == "download_jit": + # Download JIT models + inpainter = LocalInpainter() + for method in ['lama', 'anime', 'lama_official']: + print(f"\nDownloading {method}...") + path = inpainter.download_jit_model(method) + if path: + print(f" ✅ Downloaded to: {path}") + + elif len(sys.argv) > 2: + # Test with model + inpainter = LocalInpainter() + inpainter.load_model('lama', sys.argv[1]) + print("Model loaded - check logs for details") + + else: + print("\nLocal Inpainter - Compatible Version") + print("=====================================") + print("\nSupports both:") + print(" - JIT models (.pt) - RECOMMENDED") + print(" - Checkpoint files (.ckpt) - With warnings") + print("\nTo download JIT models:") + print(" python local_inpainter.py download_jit") + print("\nTo test:") + print(" python local_inpainter.py <model_path>") \ No newline at end of file diff --git a/manga_integration.py b/manga_integration.py new file mode 100644 index 0000000000000000000000000000000000000000..81c41f48ddd5fa88da4866046d04a1d1dc30da65 --- /dev/null +++ b/manga_integration.py @@ -0,0 +1,8390 @@ +# manga_integration.py +""" +Enhanced GUI Integration module for Manga Translation with text visibility controls +Integrates with TranslatorGUI using WindowManager and existing infrastructure +Now includes full page context mode with customizable prompt +""" +import sys +import os +import json +import threading +import time +import hashlib +import traceback +import concurrent.futures +from PySide6.QtWidgets import (QWidget, QLabel, QFrame, QPushButton, QVBoxLayout, QHBoxLayout, + QGroupBox, QListWidget, QComboBox, QLineEdit, QCheckBox, + QRadioButton, QSlider, QSpinBox, QDoubleSpinBox, QTextEdit, + QProgressBar, QFileDialog, QMessageBox, QColorDialog, QScrollArea, + QDialog, QButtonGroup, QApplication) +from PySide6.QtCore import Qt, QTimer, Signal, QObject, Slot, QEvent +from PySide6.QtGui import QFont, QColor, QTextCharFormat, QIcon, QKeyEvent +import tkinter as tk +from tkinter import ttk, filedialog as tk_filedialog, messagebox as tk_messagebox, scrolledtext +try: + import ttkbootstrap as tb +except ImportError: + tb = ttk +from typing import List, Dict, Optional, Any +from queue import Queue +import logging +from manga_translator import MangaTranslator, GOOGLE_CLOUD_VISION_AVAILABLE +from manga_settings_dialog import MangaSettingsDialog + + +# Try to import UnifiedClient for API initialization +try: + from unified_api_client import UnifiedClient +except ImportError: + UnifiedClient = None + +# Module-level function for multiprocessing (must be picklable) +def _preload_models_worker(models_list, progress_queue): + """Worker function to preload models in separate process (module-level for pickling)""" + try: + total_steps = len(models_list) + + for idx, (model_type, model_key, model_name, model_path) in enumerate(models_list): + try: + # Send start progress + base_progress = int((idx / total_steps) * 100) + progress_queue.put(('progress', base_progress, model_name)) + + if model_type == 'detector': + from bubble_detector import BubbleDetector + from manga_translator import MangaTranslator + + # Progress: 0-25% of this model's portion + progress_queue.put(('progress', base_progress + int(25 / total_steps), f"{model_name} - Initializing")) + bd = BubbleDetector() + + # Progress: 25-75% - loading model + progress_queue.put(('progress', base_progress + int(50 / total_steps), f"{model_name} - Downloading/Loading")) + if model_key == 'rtdetr_onnx': + model_repo = model_path if model_path else 'ogkalu/comic-text-and-bubble-detector' + bd.load_rtdetr_onnx_model(model_repo) + elif model_key == 'rtdetr': + bd.load_rtdetr_model() + elif model_key == 'yolo': + if model_path: + bd.load_model(model_path) + + # Progress: 75-100% - finalizing + progress_queue.put(('progress', base_progress + int(75 / total_steps), f"{model_name} - Finalizing")) + progress_queue.put(('loaded', model_type, model_name)) + + elif model_type == 'inpainter': + from local_inpainter import LocalInpainter + + # Progress: 0-25% + progress_queue.put(('progress', base_progress + int(25 / total_steps), f"{model_name} - Initializing")) + inp = LocalInpainter() + resolved_path = model_path + + if not resolved_path or not os.path.exists(resolved_path): + # Progress: 25-50% - downloading + progress_queue.put(('progress', base_progress + int(40 / total_steps), f"{model_name} - Downloading")) + try: + resolved_path = inp.download_jit_model(model_key) + except: + resolved_path = None + + if resolved_path and os.path.exists(resolved_path): + # Progress: 50-90% - loading + progress_queue.put(('progress', base_progress + int(60 / total_steps), f"{model_name} - Loading model")) + success = inp.load_model_with_retry(model_key, resolved_path) + + # Progress: 90-100% - finalizing + progress_queue.put(('progress', base_progress + int(85 / total_steps), f"{model_name} - Finalizing")) + if success: + progress_queue.put(('loaded', model_type, model_name)) + + except Exception as e: + progress_queue.put(('error', model_name, str(e))) + + # Send completion signal + progress_queue.put(('complete', None, None)) + + except Exception as e: + progress_queue.put(('error', 'Process', str(e))) + +class _MangaGuiLogHandler(logging.Handler): + """Forward logging records into MangaTranslationTab._log.""" + def __init__(self, gui_ref, level=logging.INFO): + super().__init__(level) + self.gui_ref = gui_ref + self._last_msg = None + self.setFormatter(logging.Formatter('%(levelname)s:%(name)s:%(message)s')) + + def emit(self, record: logging.LogRecord) -> None: + # Avoid looping/duplicates from this module's own messages or when stdio is redirected + try: + if getattr(self.gui_ref, '_stdio_redirect_active', False): + return + # Filter out manga_translator, bubble_detector, local_inpainter logs as they're already shown + if record and isinstance(record.name, str): + if record.name.startswith(('manga_integration', 'manga_translator', 'bubble_detector', 'local_inpainter', 'unified_api_client', 'google_genai', 'httpx')): + return + except Exception: + pass + try: + msg = self.format(record) + except Exception: + msg = record.getMessage() + # Deduplicate identical consecutive messages + if msg == self._last_msg: + return + self._last_msg = msg + + # Map logging levels to our tag levels + lvl = record.levelname.lower() + tag = 'info' + if lvl.startswith('warn'): + tag = 'warning' + elif lvl.startswith('err') or lvl.startswith('crit'): + tag = 'error' + elif lvl.startswith('debug'): + tag = 'debug' + elif lvl.startswith('info'): + tag = 'info' + + # Always store to persistent log (even if GUI is closed) + try: + with MangaTranslationTab._persistent_log_lock: + if len(MangaTranslationTab._persistent_log) >= 1000: + MangaTranslationTab._persistent_log.pop(0) + MangaTranslationTab._persistent_log.append((msg, tag)) + except Exception: + pass + + # Also try to display in GUI if it exists + try: + if hasattr(self.gui_ref, '_log'): + self.gui_ref._log(msg, tag) + except Exception: + pass + +class _StreamToGuiLog: + """A minimal file-like stream that forwards lines to _log.""" + def __init__(self, write_cb): + self._write_cb = write_cb + self._buf = '' + + def write(self, s: str): + try: + self._buf += s + while '\n' in self._buf: + line, self._buf = self._buf.split('\n', 1) + if line.strip(): + self._write_cb(line) + except Exception: + pass + + def flush(self): + try: + if self._buf.strip(): + self._write_cb(self._buf) + self._buf = '' + except Exception: + pass + +class MangaTranslationTab: + """GUI interface for manga translation integrated with TranslatorGUI""" + + # Class-level cancellation flag for all instances + _global_cancelled = False + _global_cancel_lock = threading.RLock() + + # Class-level log storage to persist across window closures + _persistent_log = [] + _persistent_log_lock = threading.RLock() + + # Class-level preload tracking to prevent duplicate loading + _preload_in_progress = False + _preload_lock = threading.RLock() + _preload_completed_models = set() # Track which models have been loaded + + @classmethod + def set_global_cancellation(cls, cancelled: bool): + """Set global cancellation flag for all translation instances""" + with cls._global_cancel_lock: + cls._global_cancelled = cancelled + + @classmethod + def is_globally_cancelled(cls) -> bool: + """Check if globally cancelled""" + with cls._global_cancel_lock: + return cls._global_cancelled + + def __init__(self, parent_widget, main_gui, dialog, scroll_area=None): + """Initialize manga translation interface + + Args: + parent_widget: The content widget for the interface (PySide6 QWidget) + main_gui: Reference to TranslatorGUI instance + dialog: The dialog window (PySide6 QDialog) + scroll_area: The scroll area widget (PySide6 QScrollArea, optional) + """ + # CRITICAL: Set thread limits FIRST before any imports or processing + import os + parallel_enabled = main_gui.config.get('manga_settings', {}).get('advanced', {}).get('parallel_processing', False) + if not parallel_enabled: + # Force single-threaded mode for all libraries + os.environ['OMP_NUM_THREADS'] = '1' + os.environ['MKL_NUM_THREADS'] = '1' + os.environ['OPENBLAS_NUM_THREADS'] = '1' + os.environ['NUMEXPR_NUM_THREADS'] = '1' + os.environ['VECLIB_MAXIMUM_THREADS'] = '1' + os.environ['ONNXRUNTIME_NUM_THREADS'] = '1' + # Also set torch and cv2 thread limits if already imported + try: + import torch + torch.set_num_threads(1) + except (ImportError, RuntimeError): + pass + try: + import cv2 + cv2.setNumThreads(1) + except (ImportError, AttributeError): + pass + + self.parent_widget = parent_widget + self.main_gui = main_gui + self.dialog = dialog + self.scroll_area = scroll_area + + # Translation state + self.translator = None + self.is_running = False + self.stop_flag = threading.Event() + self.translation_thread = None + self.translation_future = None + # Shared executor from main GUI if available + try: + if hasattr(self.main_gui, 'executor') and self.main_gui.executor: + self.executor = self.main_gui.executor + else: + self.executor = None + except Exception: + self.executor = None + self.selected_files = [] + self.current_file_index = 0 + self.font_mapping = {} # Initialize font mapping dictionary + + + # Progress tracking + self.total_files = 0 + self.completed_files = 0 + self.failed_files = 0 + self.qwen2vl_model_size = self.main_gui.config.get('qwen2vl_model_size', '1') + + # Advanced performance toggles + try: + adv_cfg = self.main_gui.config.get('manga_settings', {}).get('advanced', {}) + except Exception: + adv_cfg = {} + # In singleton mode, reduce OpenCV thread usage to avoid CPU spikes + try: + if bool(adv_cfg.get('use_singleton_models', False)): + import cv2 as _cv2 + try: + _cv2.setNumThreads(1) + except Exception: + pass + except Exception: + pass + # Do NOT preload big local models by default to avoid startup crashes + self.preload_local_models_on_open = bool(adv_cfg.get('preload_local_models_on_open', False)) + + # Queue for thread-safe GUI updates + self.update_queue = Queue() + + # Flags for stdio redirection to avoid duplicate GUI logs + self._stdout_redirect_on = False + self._stderr_redirect_on = False + self._stdio_redirect_active = False + + # Flag to prevent saving during initialization + self._initializing = True + + # IMPORTANT: Load settings BEFORE building interface + # This ensures all variables are initialized before they're used in the GUI + self._load_rendering_settings() + + # Initialize the full page context prompt + self.full_page_context_prompt = ( + "You will receive multiple text segments from a manga page, each prefixed with an index like [0], [1], etc. " + "Translate each segment considering the context of all segments together. " + "Maintain consistency in character names, tone, and style across all translations.\n\n" + "CRITICAL: Return your response as a valid JSON object where each key includes BOTH the index prefix " + "AND the original text EXACTLY as provided (e.g., '[0] こんにちは'), and each value is the translation.\n" + "This is essential for correct mapping - do not modify or omit the index prefixes!\n\n" + "Make sure to properly escape any special characters in the JSON:\n" + "- Use \\n for newlines\n" + "- Use \\\" for quotes\n" + "- Use \\\\ for backslashes\n\n" + "Example:\n" + '{\n' + ' "[0] こんにちは": "Hello",\n' + ' "[1] ありがとう": "Thank you",\n' + ' "[2] さようなら": "Goodbye"\n' + '}\n\n' + 'REMEMBER: Keep the [index] prefix in each JSON key exactly as shown in the input!' + ) + + # Initialize the OCR system prompt + self.ocr_prompt = self.main_gui.config.get('manga_ocr_prompt', + "YOU ARE AN OCR SYSTEM. YOUR ONLY JOB IS TEXT EXTRACTION.\n\n" + "CRITICAL RULES:\n" + "1. DO NOT TRANSLATE ANYTHING\n" + "2. DO NOT MODIFY THE TEXT\n" + "3. DO NOT EXPLAIN OR COMMENT\n" + "4. ONLY OUTPUT THE EXACT TEXT YOU SEE\n" + "5. PRESERVE NATURAL TEXT FLOW - DO NOT ADD UNNECESSARY LINE BREAKS\n\n" + "If you see Korean text, output it in Korean.\n" + "If you see Japanese text, output it in Japanese.\n" + "If you see Chinese text, output it in Chinese.\n" + "If you see English text, output it in English.\n\n" + "IMPORTANT: Only use line breaks where they naturally occur in the original text " + "(e.g., between dialogue lines or paragraphs). Do not break text mid-sentence or " + "between every word/character.\n\n" + "For vertical text common in manga/comics, transcribe it as a continuous line unless " + "there are clear visual breaks.\n\n" + "NEVER translate. ONLY extract exactly what is written.\n" + "Output ONLY the raw text, nothing else." + ) + + # flag to skip status checks during init + self._initializing_gui = True + + # Build interface AFTER loading settings + self._build_interface() + + # Now allow status checks + self._initializing_gui = False + + # Do one status check after everything is built + # Use QTimer for PySide6 dialog + QTimer.singleShot(100, self._check_provider_status) + + # Start model preloading in background + QTimer.singleShot(200, self._start_model_preloading) + + # Now that everything is initialized, allow saving + self._initializing = False + + # Attach logging bridge so library logs appear in our log area + self._attach_logging_bridge() + + # Start update loop + self._process_updates() + + # Install event filter for F11 fullscreen toggle + self._install_fullscreen_handler() + + def _is_stop_requested(self) -> bool: + """Check if stop has been requested using multiple sources""" + # Check global cancellation first + if self.is_globally_cancelled(): + return True + + # Check local stop flag + if hasattr(self, 'stop_flag') and self.stop_flag.is_set(): + return True + + # Check running state + if hasattr(self, 'is_running') and not self.is_running: + return True + + return False + + def _reset_global_cancellation(self): + """Reset all global cancellation flags for new translation""" + # Reset local class flag + self.set_global_cancellation(False) + + # Reset MangaTranslator class flag + try: + from manga_translator import MangaTranslator + MangaTranslator.set_global_cancellation(False) + except ImportError: + pass + + # Reset UnifiedClient flag + try: + from unified_api_client import UnifiedClient + UnifiedClient.set_global_cancellation(False) + except ImportError: + pass + + def reset_stop_flags(self): + """Reset all stop flags when starting new translation""" + self.is_running = False + if hasattr(self, 'stop_flag'): + self.stop_flag.clear() + self._reset_global_cancellation() + self._log("🔄 Stop flags reset for new translation", "debug") + + def _install_fullscreen_handler(self): + """Install event filter to handle F11 key for fullscreen toggle""" + if not self.dialog: + return + + # Create event filter for the dialog + class FullscreenEventFilter(QObject): + def __init__(self, dialog_ref): + super().__init__() + self.dialog = dialog_ref + self.is_fullscreen = False + self.normal_geometry = None + + def eventFilter(self, obj, event): + if event.type() == QEvent.KeyPress: + key_event = event + if key_event.key() == Qt.Key_F11: + self.toggle_fullscreen() + return True + return False + + def toggle_fullscreen(self): + if self.is_fullscreen: + # Exit fullscreen + self.dialog.setWindowState(self.dialog.windowState() & ~Qt.WindowFullScreen) + if self.normal_geometry: + self.dialog.setGeometry(self.normal_geometry) + self.is_fullscreen = False + else: + # Enter fullscreen + self.normal_geometry = self.dialog.geometry() + self.dialog.setWindowState(self.dialog.windowState() | Qt.WindowFullScreen) + self.is_fullscreen = True + + # Create and install the event filter + self._fullscreen_filter = FullscreenEventFilter(self.dialog) + self.dialog.installEventFilter(self._fullscreen_filter) + + def _distribute_stop_flags(self): + """Distribute stop flags to all manga translation components""" + if not hasattr(self, 'translator') or not self.translator: + return + + # Set stop flag on translator + if hasattr(self.translator, 'set_stop_flag'): + self.translator.set_stop_flag(self.stop_flag) + + # Set stop flag on OCR manager and all providers + if hasattr(self.translator, 'ocr_manager') and self.translator.ocr_manager: + if hasattr(self.translator.ocr_manager, 'set_stop_flag'): + self.translator.ocr_manager.set_stop_flag(self.stop_flag) + + # Set stop flag on bubble detector if available + if hasattr(self.translator, 'bubble_detector') and self.translator.bubble_detector: + if hasattr(self.translator.bubble_detector, 'set_stop_flag'): + self.translator.bubble_detector.set_stop_flag(self.stop_flag) + + # Set stop flag on local inpainter if available + if hasattr(self.translator, 'local_inpainter') and self.translator.local_inpainter: + if hasattr(self.translator.local_inpainter, 'set_stop_flag'): + self.translator.local_inpainter.set_stop_flag(self.stop_flag) + + # Also try to set on thread-local components if accessible + if hasattr(self.translator, '_thread_local'): + thread_local = self.translator._thread_local + # Set on thread-local bubble detector + if hasattr(thread_local, 'bubble_detector') and thread_local.bubble_detector: + if hasattr(thread_local.bubble_detector, 'set_stop_flag'): + thread_local.bubble_detector.set_stop_flag(self.stop_flag) + + # Set on thread-local inpainters + if hasattr(thread_local, 'local_inpainters') and isinstance(thread_local.local_inpainters, dict): + for inpainter in thread_local.local_inpainters.values(): + if hasattr(inpainter, 'set_stop_flag'): + inpainter.set_stop_flag(self.stop_flag) + + self._log("🔄 Stop flags distributed to all components", "debug") + + def _preflight_bubble_detector(self, ocr_settings: dict) -> bool: + """Check if bubble detector is preloaded in the pool or already loaded. + Returns True if a ready instance or preloaded spare is available; no heavy loads are performed here. + """ + try: + import time as _time + start = _time.time() + if not ocr_settings.get('bubble_detection_enabled', False): + return False + det_type = ocr_settings.get('detector_type', 'rtdetr_onnx') + model_id = ocr_settings.get('rtdetr_model_url') or ocr_settings.get('bubble_model_path') or '' + + # 1) If translator already has a ready detector, report success + try: + bd = getattr(self, 'translator', None) and getattr(self.translator, 'bubble_detector', None) + if bd and (getattr(bd, 'rtdetr_loaded', False) or getattr(bd, 'rtdetr_onnx_loaded', False) or getattr(bd, 'model_loaded', False)): + self._log("🤖 Bubble detector already loaded", "debug") + return True + except Exception: + pass + + # 2) Check shared preload pool for spares + try: + from manga_translator import MangaTranslator + key = (det_type, model_id) + with MangaTranslator._detector_pool_lock: + rec = MangaTranslator._detector_pool.get(key) + spares = (rec or {}).get('spares') or [] + if len(spares) > 0: + self._log(f"🤖 Preflight: found {len(spares)} preloaded bubble detector spare(s) for key={key}", "info") + return True + except Exception: + pass + + # 3) No spares/ready detector yet; do not load here. Just report timing and return False. + elapsed = _time.time() - start + self._log(f"⏱️ Preflight checked bubble detector pool in {elapsed:.2f}s — no ready instance", "debug") + return False + except Exception: + return False + + def _start_model_preloading(self): + """Start preloading models in separate process for true background loading""" + from multiprocessing import Process, Queue as MPQueue + import queue + + # Check if preload is already in progress + with MangaTranslationTab._preload_lock: + if MangaTranslationTab._preload_in_progress: + print("Model preloading already in progress, skipping...") + return + + # Get settings + manga_settings = self.main_gui.config.get('manga_settings', {}) + ocr_settings = manga_settings.get('ocr', {}) + inpaint_settings = manga_settings.get('inpainting', {}) + + models_to_load = [] + bubble_detection_enabled = ocr_settings.get('bubble_detection_enabled', False) + skip_inpainting = self.main_gui.config.get('manga_skip_inpainting', False) + inpainting_method = inpaint_settings.get('method', 'local') + inpainting_enabled = not skip_inpainting and inpainting_method == 'local' + + # Check if models need loading + try: + from manga_translator import MangaTranslator + + if bubble_detection_enabled: + detector_type = ocr_settings.get('detector_type', 'rtdetr_onnx') + model_url = ocr_settings.get('rtdetr_model_url') or ocr_settings.get('bubble_model_path') or '' + key = (detector_type, model_url) + model_id = f"detector_{detector_type}_{model_url}" + + # Skip if already loaded in this session + if model_id not in MangaTranslationTab._preload_completed_models: + with MangaTranslator._detector_pool_lock: + rec = MangaTranslator._detector_pool.get(key) + if not rec or (not rec.get('spares') and not rec.get('loaded')): + detector_name = 'RT-DETR ONNX' if detector_type == 'rtdetr_onnx' else 'RT-DETR' if detector_type == 'rtdetr' else 'YOLO' + models_to_load.append(('detector', detector_type, detector_name, model_url)) + + if inpainting_enabled: + # Check top-level config first (manga_local_inpaint_model), then nested config + local_method = self.main_gui.config.get('manga_local_inpaint_model', + inpaint_settings.get('local_method', 'anime_onnx')) + model_path = self.main_gui.config.get(f'manga_{local_method}_model_path', '') + # Fallback to non-prefixed key if not found + if not model_path: + model_path = self.main_gui.config.get(f'{local_method}_model_path', '') + key = (local_method, model_path or '') + model_id = f"inpainter_{local_method}_{model_path}" + + # Skip if already loaded in this session + if model_id not in MangaTranslationTab._preload_completed_models: + with MangaTranslator._inpaint_pool_lock: + rec = MangaTranslator._inpaint_pool.get(key) + if not rec or (not rec.get('loaded') and not rec.get('spares')): + models_to_load.append(('inpainter', local_method, local_method.capitalize(), model_path)) + except Exception as e: + print(f"Error checking models: {e}") + return + + if not models_to_load: + return + + # Set preload in progress flag + with MangaTranslationTab._preload_lock: + MangaTranslationTab._preload_in_progress = True + + # Show progress bar + self.preload_progress_frame.setVisible(True) + + # Create queue for IPC + progress_queue = MPQueue() + + # Start loading in separate process using module-level function + load_process = Process(target=_preload_models_worker, args=(models_to_load, progress_queue), daemon=True) + load_process.start() + + # Store models being loaded for tracking + models_being_loaded = [] + for model_type, model_key, model_name, model_path in models_to_load: + if model_type == 'detector': + models_being_loaded.append(f"detector_{model_key}_{model_path}") + elif model_type == 'inpainter': + models_being_loaded.append(f"inpainter_{model_key}_{model_path}") + + # Monitor progress with QTimer + def check_progress(): + try: + while True: + try: + msg = progress_queue.get_nowait() + msg_type = msg[0] + + if msg_type == 'progress': + _, progress, model_name = msg + self.preload_progress_bar.setValue(progress) + self.preload_status_label.setText(f"Loading {model_name}...") + + elif msg_type == 'loaded': + _, model_type, model_name = msg + print(f"✓ Loaded {model_name}") + + elif msg_type == 'error': + _, model_name, error = msg + print(f"✗ Failed to load {model_name}: {error}") + + elif msg_type == 'complete': + # Child process cached models + self.preload_progress_bar.setValue(100) + self.preload_status_label.setText("✓ Models ready") + + # Mark all models as completed + with MangaTranslationTab._preload_lock: + MangaTranslationTab._preload_completed_models.update(models_being_loaded) + MangaTranslationTab._preload_in_progress = False + + # Load RT-DETR into pool in background (doesn't block GUI) + def load_rtdetr_bg(): + try: + from manga_translator import MangaTranslator + from bubble_detector import BubbleDetector + + for model_type, model_key, model_name, model_path in models_to_load: + if model_type == 'detector' and model_key == 'rtdetr_onnx': + key = (model_key, model_path) + + # Check if already loaded + with MangaTranslator._detector_pool_lock: + rec = MangaTranslator._detector_pool.get(key) + if rec and rec.get('spares'): + print(f"⏭️ {model_name} already in pool") + continue + + # Load into pool + bd = BubbleDetector() + model_repo = model_path if model_path else 'ogkalu/comic-text-and-bubble-detector' + bd.load_rtdetr_onnx_model(model_repo) + + with MangaTranslator._detector_pool_lock: + rec = MangaTranslator._detector_pool.get(key) + if not rec: + rec = {'spares': []} + MangaTranslator._detector_pool[key] = rec + rec['spares'].append(bd) + print(f"✓ Loaded {model_name} into pool (background)") + except Exception as e: + print(f"✗ Background RT-DETR loading error: {e}") + + # Start background loading + threading.Thread(target=load_rtdetr_bg, daemon=True).start() + + QTimer.singleShot(2000, lambda: self.preload_progress_frame.setVisible(False)) + return + + except queue.Empty: + break + + QTimer.singleShot(100, check_progress) + + except Exception as e: + print(f"Progress check error: {e}") + self.preload_progress_frame.setVisible(False) + # Reset flag on error + with MangaTranslationTab._preload_lock: + MangaTranslationTab._preload_in_progress = False + + QTimer.singleShot(100, check_progress) + + def _disable_spinbox_mousewheel(self, spinbox): + """Disable mousewheel scrolling on a spinbox (PySide6)""" + # Override wheelEvent to prevent scrolling + spinbox.wheelEvent = lambda event: None + + def _disable_combobox_mousewheel(self, combobox): + """Disable mousewheel scrolling on a combobox (PySide6)""" + # Override wheelEvent to prevent scrolling + combobox.wheelEvent = lambda event: None + + def _create_styled_checkbox(self, text): + """Create a checkbox with proper checkmark using text overlay""" + from PySide6.QtWidgets import QCheckBox, QLabel + from PySide6.QtCore import Qt, QTimer + from PySide6.QtGui import QFont + + checkbox = QCheckBox(text) + checkbox.setStyleSheet(""" + QCheckBox { + color: white; + spacing: 6px; + } + QCheckBox::indicator { + width: 14px; + height: 14px; + border: 1px solid #5a9fd4; + border-radius: 2px; + background-color: #2d2d2d; + } + QCheckBox::indicator:checked { + background-color: #5a9fd4; + border-color: #5a9fd4; + } + QCheckBox::indicator:hover { + border-color: #7bb3e0; + } + QCheckBox:disabled { + color: #666666; + } + QCheckBox::indicator:disabled { + background-color: #1a1a1a; + border-color: #3a3a3a; + } + """) + + # Create checkmark overlay + checkmark = QLabel("✓", checkbox) + checkmark.setStyleSheet(""" + QLabel { + color: white; + background: transparent; + font-weight: bold; + font-size: 11px; + } + """) + checkmark.setAlignment(Qt.AlignCenter) + checkmark.hide() + checkmark.setAttribute(Qt.WA_TransparentForMouseEvents) # Make checkmark click-through + + # Position checkmark properly after widget is shown + def position_checkmark(): + # Position over the checkbox indicator + checkmark.setGeometry(2, 1, 14, 14) + + # Show/hide checkmark based on checked state + def update_checkmark(): + if checkbox.isChecked(): + position_checkmark() + checkmark.show() + else: + checkmark.hide() + + checkbox.stateChanged.connect(update_checkmark) + # Delay initial positioning to ensure widget is properly rendered + QTimer.singleShot(0, lambda: (position_checkmark(), update_checkmark())) + + return checkbox + + def _download_hf_model(self): + """Download HuggingFace models with progress tracking - PySide6 version""" + from PySide6.QtWidgets import (QDialog, QVBoxLayout, QHBoxLayout, QLabel, + QRadioButton, QButtonGroup, QLineEdit, QPushButton, + QGroupBox, QTextEdit, QProgressBar, QFrame, + QScrollArea, QWidget, QSizePolicy) + from PySide6.QtCore import Qt, QThread, Signal, QTimer + from PySide6.QtGui import QFont + + provider = self.ocr_provider_value + + # Model sizes (approximate in MB) + model_sizes = { + 'manga-ocr': 450, + 'Qwen2-VL': { + '2B': 4000, + '7B': 14000, + '72B': 144000, + 'custom': 10000 # Default estimate for custom models + } + } + + # For Qwen2-VL, show model selection dialog first + if provider == 'Qwen2-VL': + # Create PySide6 dialog + selection_dialog = QDialog(self.dialog) + selection_dialog.setWindowTitle("Select Qwen2-VL Model Size") + selection_dialog.setMinimumSize(600, 500) + main_layout = QVBoxLayout(selection_dialog) + + # Title + title_label = QLabel("Select Qwen2-VL Model Size") + title_font = QFont("Arial", 14, QFont.Weight.Bold) + title_label.setFont(title_font) + title_label.setAlignment(Qt.AlignmentFlag.AlignCenter) + main_layout.addWidget(title_label) + + # Model selection frame + model_frame = QGroupBox("Model Options") + model_frame_font = QFont("Arial", 11, QFont.Weight.Bold) + model_frame.setFont(model_frame_font) + model_frame_layout = QVBoxLayout(model_frame) + model_frame_layout.setContentsMargins(15, 15, 15, 15) + model_frame_layout.setSpacing(10) + + model_options = { + "2B": { + "title": "2B Model", + "desc": "• Smallest model (~4GB download, 4-8GB VRAM)\n• Fast but less accurate\n• Good for quick testing" + }, + "7B": { + "title": "7B Model", + "desc": "• Medium model (~14GB download, 12-16GB VRAM)\n• Best balance of speed and quality\n• Recommended for most users" + }, + "72B": { + "title": "72B Model", + "desc": "• Largest model (~144GB download, 80GB+ VRAM)\n• Highest quality but very slow\n• Requires high-end GPU" + }, + "custom": { + "title": "Custom Model", + "desc": "• Enter any Hugging Face model ID\n• For advanced users\n• Size varies by model" + } + } + + # Store selected model + selected_model_key = {"value": "2B"} + custom_model_id_text = {"value": ""} + + # Radio button group + button_group = QButtonGroup(selection_dialog) + + for idx, (key, info) in enumerate(model_options.items()): + # Radio button + rb = QRadioButton(info["title"]) + rb_font = QFont("Arial", 11, QFont.Weight.Bold) + rb.setFont(rb_font) + if idx == 0: + rb.setChecked(True) + rb.clicked.connect(lambda checked, k=key: selected_model_key.update({"value": k})) + button_group.addButton(rb) + model_frame_layout.addWidget(rb) + + # Description + desc_label = QLabel(info["desc"]) + desc_font = QFont("Arial", 9) + desc_label.setFont(desc_font) + desc_label.setStyleSheet("color: #666666; margin-left: 20px;") + model_frame_layout.addWidget(desc_label) + + # Separator + if key != "custom": + separator = QFrame() + separator.setFrameShape(QFrame.Shape.HLine) + separator.setFrameShadow(QFrame.Shadow.Sunken) + model_frame_layout.addWidget(separator) + + main_layout.addWidget(model_frame) + + # Custom model ID frame (initially hidden) + custom_frame = QGroupBox("Custom Model ID") + custom_frame_font = QFont("Arial", 11, QFont.Weight.Bold) + custom_frame.setFont(custom_frame_font) + custom_frame_layout = QHBoxLayout(custom_frame) + custom_frame_layout.setContentsMargins(15, 15, 15, 15) + + custom_label = QLabel("Model ID:") + custom_label_font = QFont("Arial", 10) + custom_label.setFont(custom_label_font) + custom_frame_layout.addWidget(custom_label) + + custom_entry = QLineEdit() + custom_entry.setPlaceholderText("e.g., Qwen/Qwen2-VL-2B-Instruct") + custom_entry.setFont(custom_label_font) + custom_entry.textChanged.connect(lambda text: custom_model_id_text.update({"value": text})) + custom_frame_layout.addWidget(custom_entry) + + custom_frame.hide() # Hidden by default + main_layout.addWidget(custom_frame) + + # Toggle custom frame visibility + def toggle_custom_frame(): + if selected_model_key["value"] == "custom": + custom_frame.show() + else: + custom_frame.hide() + + for rb in button_group.buttons(): + rb.clicked.connect(toggle_custom_frame) + + # GPU status frame + gpu_frame = QGroupBox("System Status") + gpu_frame_font = QFont("Arial", 11, QFont.Weight.Bold) + gpu_frame.setFont(gpu_frame_font) + gpu_frame_layout = QVBoxLayout(gpu_frame) + gpu_frame_layout.setContentsMargins(15, 15, 15, 15) + + try: + import torch + if torch.cuda.is_available(): + gpu_mem = torch.cuda.get_device_properties(0).total_memory / 1e9 + gpu_text = f"✓ GPU: {torch.cuda.get_device_name(0)} ({gpu_mem:.1f}GB)" + gpu_color = '#4CAF50' + else: + gpu_text = "✗ No GPU detected - will use CPU (very slow)" + gpu_color = '#f44336' + except: + gpu_text = "? GPU status unknown - install torch with CUDA" + gpu_color = '#FF9800' + + gpu_label = QLabel(gpu_text) + gpu_label_font = QFont("Arial", 10) + gpu_label.setFont(gpu_label_font) + gpu_label.setStyleSheet(f"color: {gpu_color};") + gpu_frame_layout.addWidget(gpu_label) + + main_layout.addWidget(gpu_frame) + + # Buttons + button_layout = QHBoxLayout() + button_layout.addStretch() + + model_confirmed = {'value': False, 'model_key': None, 'model_id': None} + + def confirm_selection(): + selected = selected_model_key["value"] + if selected == "custom": + if not custom_model_id_text["value"].strip(): + from PySide6.QtWidgets import QMessageBox + QMessageBox.critical(selection_dialog, "Error", "Please enter a model ID") + return + model_confirmed['model_key'] = selected + model_confirmed['model_id'] = custom_model_id_text["value"].strip() + else: + model_confirmed['model_key'] = selected + model_confirmed['model_id'] = f"Qwen/Qwen2-VL-{selected}-Instruct" + model_confirmed['value'] = True + selection_dialog.accept() + + proceed_btn = QPushButton("Continue") + proceed_btn.setStyleSheet("QPushButton { background-color: #4CAF50; color: white; padding: 8px 20px; font-weight: bold; }") + proceed_btn.clicked.connect(confirm_selection) + button_layout.addWidget(proceed_btn) + + cancel_btn = QPushButton("Cancel") + cancel_btn.setStyleSheet("QPushButton { background-color: #9E9E9E; color: white; padding: 8px 20px; }") + cancel_btn.clicked.connect(selection_dialog.reject) + button_layout.addWidget(cancel_btn) + + button_layout.addStretch() + main_layout.addLayout(button_layout) + + # Show dialog and wait for result + result = selection_dialog.exec() + + if not model_confirmed['value'] or result == QDialog.DialogCode.Rejected: + return + + selected_model_key = model_confirmed['model_key'] + model_id = model_confirmed['model_id'] + total_size_mb = model_sizes['Qwen2-VL'][selected_model_key] + elif provider == 'rapidocr': + total_size_mb = 50 # Approximate size for display + model_id = None + selected_model_key = None + else: + total_size_mb = model_sizes.get(provider, 500) + model_id = None + selected_model_key = None + + # Create download dialog with window manager - pass Tkinter root instead of PySide6 dialog + download_dialog, scrollable_frame, canvas = self.main_gui.wm.setup_scrollable( + self.main_gui.master, + f"Download {provider} Model", + width=600, + height=450, + max_width_ratio=0.6, + max_height_ratio=0.6 + ) + + # Info section + info_frame = tk.LabelFrame( + scrollable_frame, + text="Model Information", + font=('Arial', 11, 'bold'), + padx=15, + pady=10 + ) + info_frame.pack(fill=tk.X, padx=20, pady=10) + + if provider == 'Qwen2-VL': + info_text = f"📚 Qwen2-VL {selected_model_key} Model\n" + info_text += f"Model ID: {model_id}\n" + info_text += f"Estimated size: ~{total_size_mb/1000:.1f}GB\n" + info_text += "Vision-Language model for Korean OCR" + else: + info_text = f"📚 {provider} Model\nOptimized for manga/manhwa text detection" + + tk.Label(info_frame, text=info_text, font=('Arial', 10), justify=tk.LEFT).pack(anchor='w') + + # Progress section + progress_frame = tk.LabelFrame( + scrollable_frame, + text="Download Progress", + font=('Arial', 11, 'bold'), + padx=15, + pady=10 + ) + progress_frame.pack(fill=tk.X, padx=20, pady=10) + + progress_label = tk.Label(progress_frame, text="Ready to download", font=('Arial', 10)) + progress_label.pack(pady=(5, 10)) + + progress_var = tk.DoubleVar() + try: + # Try to use our custom progress bar style + progress_bar = ttk.Progressbar(progress_frame, length=550, mode='determinate', + variable=progress_var, + style="MangaProgress.Horizontal.TProgressbar") + except Exception: + # Fallback to default if style not available yet + progress_bar = ttk.Progressbar(progress_frame, length=550, mode='determinate', + variable=progress_var) + progress_bar.pack(pady=(0, 5)) + + size_label = tk.Label(progress_frame, text="", font=('Arial', 9), fg='#666666') + size_label.pack() + + speed_label = tk.Label(progress_frame, text="", font=('Arial', 9), fg='#666666') + speed_label.pack() + + status_label = tk.Label(progress_frame, text="Click 'Download' to begin", + font=('Arial', 9), fg='#666666') + status_label.pack(pady=(5, 0)) + + # Log section + log_frame = tk.LabelFrame( + scrollable_frame, + text="Download Log", + font=('Arial', 11, 'bold'), + padx=15, + pady=10 + ) + log_frame.pack(fill=tk.BOTH, expand=True, padx=20, pady=10) + + # Create a frame to hold the text widget and scrollbar + text_frame = tk.Frame(log_frame) + text_frame.pack(fill=tk.BOTH, expand=True) + + details_text = tk.Text( + text_frame, + height=12, + width=70, + font=('Courier', 9), + bg='#f5f5f5' + ) + details_text.pack(side=tk.LEFT, fill=tk.BOTH, expand=True) + + # Attach scrollbar to the frame, not the text widget + scrollbar = ttk.Scrollbar(text_frame, command=details_text.yview) + scrollbar.pack(side=tk.RIGHT, fill=tk.Y) + details_text.config(yscrollcommand=scrollbar.set) + + def add_log(message): + """Add message to log""" + details_text.insert(tk.END, f"{message}\n") + details_text.see(tk.END) + details_text.update() + + # Buttons frame + button_frame = tk.Frame(download_dialog) + button_frame.pack(pady=15) + + # Download tracking variables + download_active = {'value': False} + + def get_dir_size(path): + """Get total size of directory""" + total = 0 + try: + for dirpath, dirnames, filenames in os.walk(path): + for filename in filenames: + filepath = os.path.join(dirpath, filename) + if os.path.exists(filepath): + total += os.path.getsize(filepath) + except: + pass + return total + + def download_with_progress(): + """Download model with real progress tracking""" + import time + + download_active['value'] = True + total_size = total_size_mb * 1024 * 1024 + + try: + if provider == 'manga-ocr': + progress_label.config(text="Downloading manga-ocr model...") + add_log("Downloading manga-ocr model from Hugging Face...") + add_log("This will download ~450MB of model files") + progress_var.set(10) + + try: + from huggingface_hub import snapshot_download + + # Download the model files directly without importing manga_ocr + model_repo = "kha-white/manga-ocr-base" + add_log(f"Repository: {model_repo}") + + cache_dir = os.path.expanduser("~/.cache/huggingface/hub") + initial_size = get_dir_size(cache_dir) if os.path.exists(cache_dir) else 0 + start_time = time.time() + + add_log("Starting download...") + progress_var.set(20) + + # Download with progress tracking + import threading + download_complete = threading.Event() + download_error = [None] + + def download_model(): + try: + snapshot_download( + repo_id=model_repo, + repo_type="model", + resume_download=True, + local_files_only=False + ) + download_complete.set() + except Exception as e: + download_error[0] = e + download_complete.set() + + download_thread = threading.Thread(target=download_model, daemon=True) + download_thread.start() + + # Show progress while downloading + while not download_complete.is_set() and download_active['value']: + current_size = get_dir_size(cache_dir) if os.path.exists(cache_dir) else 0 + downloaded = current_size - initial_size + + if downloaded > 0: + progress = min(20 + (downloaded / total_size) * 70, 95) + progress_var.set(progress) + + elapsed = time.time() - start_time + if elapsed > 1: + speed = downloaded / elapsed + speed_mb = speed / (1024 * 1024) + speed_label.config(text=f"Speed: {speed_mb:.1f} MB/s") + + mb_downloaded = downloaded / (1024 * 1024) + mb_total = total_size / (1024 * 1024) + size_label.config(text=f"{mb_downloaded:.1f} MB / {mb_total:.1f} MB") + progress_label.config(text=f"Downloading: {progress:.1f}%") + + time.sleep(0.5) + + download_thread.join(timeout=5) + + if download_error[0]: + raise download_error[0] + + if download_complete.is_set() and not download_error[0]: + progress_var.set(100) + progress_label.config(text="✅ Download complete!") + status_label.config(text="Model files downloaded") + add_log("✅ Model files downloaded successfully") + add_log("") + add_log("Next step: Click 'Load Model' to initialize manga-ocr") + # Schedule status check on main thread + self.update_queue.put(('call_method', self._check_provider_status, ())) + else: + raise Exception("Download was cancelled") + + except ImportError: + progress_label.config(text="❌ Missing huggingface_hub") + status_label.config(text="Install huggingface_hub first") + add_log("ERROR: huggingface_hub not installed") + add_log("Run: pip install huggingface_hub") + except Exception as e: + raise # Re-raise to be caught by outer exception handler + + elif provider == 'Qwen2-VL': + try: + from transformers import AutoProcessor, AutoTokenizer, AutoModelForVision2Seq + import torch + except ImportError as e: + progress_label.config(text="❌ Missing dependencies") + status_label.config(text="Install dependencies first") + add_log(f"ERROR: {str(e)}") + add_log("Please install manually:") + add_log("pip install transformers torch torchvision") + return + + progress_label.config(text=f"Downloading model...") + add_log(f"Starting download of {model_id}") + progress_var.set(10) + + add_log("Downloading processor...") + status_label.config(text="Downloading processor...") + processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True) + progress_var.set(30) + add_log("✓ Processor downloaded") + + add_log("Downloading tokenizer...") + status_label.config(text="Downloading tokenizer...") + tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True) + progress_var.set(50) + add_log("✓ Tokenizer downloaded") + + add_log("Downloading model weights (this may take several minutes)...") + status_label.config(text="Downloading model weights...") + progress_label.config(text="Downloading model weights...") + + if torch.cuda.is_available(): + add_log(f"Using GPU: {torch.cuda.get_device_name(0)}") + model = AutoModelForVision2Seq.from_pretrained( + model_id, + dtype=torch.float16, + device_map="auto", + trust_remote_code=True + ) + else: + add_log("No GPU detected, will load on CPU") + model = AutoModelForVision2Seq.from_pretrained( + model_id, + dtype=torch.float32, + trust_remote_code=True + ) + + progress_var.set(90) + add_log("✓ Model weights downloaded") + + add_log("Initializing model...") + status_label.config(text="Initializing...") + + qwen_provider = self.ocr_manager.get_provider('Qwen2-VL') + if qwen_provider: + qwen_provider.processor = processor + qwen_provider.tokenizer = tokenizer + qwen_provider.model = model + qwen_provider.model.eval() + qwen_provider.is_loaded = True + qwen_provider.is_installed = True + + if selected_model_key: + qwen_provider.loaded_model_size = selected_model_key + + progress_var.set(100) + progress_label.config(text="✅ Download complete!") + status_label.config(text="Model ready for Korean OCR!") + add_log("✓ Model ready to use!") + + # Schedule status check on main thread + self.update_queue.put(('call_method', self._check_provider_status, ())) + + elif provider == 'rapidocr': + progress_label.config(text="📦 RapidOCR Installation Instructions") + add_log("RapidOCR requires manual pip installation") + progress_var.set(20) + + add_log("Command to run:") + add_log("pip install rapidocr-onnxruntime") + progress_var.set(50) + + add_log("") + add_log("After installation:") + add_log("1. Close this dialog") + add_log("2. Click 'Load Model' to initialize RapidOCR") + add_log("3. Status should show '✅ Model loaded'") + progress_var.set(100) + + progress_label.config(text="📦 Installation instructions shown") + status_label.config(text="Manual pip install required") + + download_btn.config(state=tk.DISABLED) + cancel_btn.config(text="Close") + + except Exception as e: + progress_label.config(text="❌ Download failed") + status_label.config(text=f"Error: {str(e)[:50]}") + add_log(f"ERROR: {str(e)}") + self._log(f"Download error: {str(e)}", "error") + + finally: + download_active['value'] = False + + def start_download(): + """Start download in background thread or executor""" + download_btn.config(state=tk.DISABLED) + cancel_btn.config(text="Cancel") + + try: + if hasattr(self.main_gui, '_ensure_executor'): + self.main_gui._ensure_executor() + execu = getattr(self.main_gui, 'executor', None) + if execu: + execu.submit(download_with_progress) + else: + import threading + download_thread = threading.Thread(target=download_with_progress, daemon=True) + download_thread.start() + except Exception: + import threading + download_thread = threading.Thread(target=download_with_progress, daemon=True) + download_thread.start() + + def cancel_download(): + """Cancel or close dialog""" + if download_active['value']: + download_active['value'] = False + status_label.config(text="Cancelling...") + else: + download_dialog.destroy() + + download_btn = tb.Button(button_frame, text="Download", command=start_download, bootstyle="primary") + download_btn.pack(side=tk.LEFT, padx=5) + + cancel_btn = tb.Button(button_frame, text="Close", command=cancel_download, bootstyle="secondary") + cancel_btn.pack(side=tk.LEFT, padx=5) + + # Auto-resize + self.main_gui.wm.auto_resize_dialog(download_dialog, canvas, max_width_ratio=0.5, max_height_ratio=0.6) + + def _check_provider_status(self): + """Check and display OCR provider status""" + # Skip during initialization to prevent lag + if hasattr(self, '_initializing_gui') and self._initializing_gui: + if hasattr(self, 'provider_status_label'): + self.provider_status_label.setText("") + self.provider_status_label.setStyleSheet("color: black;") + return + + # Get provider value + if not hasattr(self, 'ocr_provider_value'): + # Not initialized yet, skip + return + provider = self.ocr_provider_value + + # Hide ALL buttons first + if hasattr(self, 'provider_setup_btn'): + self.provider_setup_btn.setVisible(False) + if hasattr(self, 'download_model_btn'): + self.download_model_btn.setVisible(False) + + if provider == 'google': + # Google - check for credentials file + google_creds = self.main_gui.config.get('google_vision_credentials', '') + if google_creds and os.path.exists(google_creds): + self.provider_status_label.setText("✅ Ready") + self.provider_status_label.setStyleSheet("color: green;") + else: + self.provider_status_label.setText("❌ Credentials needed") + self.provider_status_label.setStyleSheet("color: red;") + + elif provider == 'azure': + # Azure - check for API key + azure_key = self.main_gui.config.get('azure_vision_key', '') + if azure_key: + self.provider_status_label.setText("✅ Ready") + self.provider_status_label.setStyleSheet("color: green;") + else: + self.provider_status_label.setText("❌ Key needed") + self.provider_status_label.setStyleSheet("color: red;") + + elif provider == 'custom-api': + # Custom API - check for main API key + api_key = None + if hasattr(self.main_gui, 'api_key_entry') and self.main_gui.api_key_entry.get().strip(): + api_key = self.main_gui.api_key_entry.get().strip() + elif hasattr(self.main_gui, 'config') and self.main_gui.config.get('api_key'): + api_key = self.main_gui.config.get('api_key') + + # Check if AI bubble detection is enabled + manga_settings = self.main_gui.config.get('manga_settings', {}) + ocr_settings = manga_settings.get('ocr', {}) + bubble_detection_enabled = ocr_settings.get('bubble_detection_enabled', False) + + if api_key: + if bubble_detection_enabled: + self.provider_status_label.setText("✅ Ready") + self.provider_status_label.setStyleSheet("color: green;") + else: + self.provider_status_label.setText("⚠️ Enable AI bubble detection for best results") + self.provider_status_label.setStyleSheet("color: orange;") + else: + self.provider_status_label.setText("❌ API key needed") + self.provider_status_label.setStyleSheet("color: red;") + + elif provider == 'Qwen2-VL': + # Initialize OCR manager if needed + if not hasattr(self, 'ocr_manager'): + from ocr_manager import OCRManager + self.ocr_manager = OCRManager(log_callback=self._log) + + # Check status first + status = self.ocr_manager.check_provider_status(provider) + + # Load saved model size if available + if hasattr(self, 'qwen2vl_model_size'): + saved_model_size = self.qwen2vl_model_size + else: + saved_model_size = self.main_gui.config.get('qwen2vl_model_size', '1') + + # When displaying status for loaded model + if status['loaded']: + # Map the saved size to display name + size_names = {'1': '2B', '2': '7B', '3': '72B', '4': 'custom'} + display_size = size_names.get(saved_model_size, saved_model_size) + self.provider_status_label.setText(f"✅ {display_size} model loaded") + self.provider_status_label.setStyleSheet("color: green;") + + # Show reload button + self.provider_setup_btn.setText("Reload") + self.provider_setup_btn.setVisible(True) + + elif status['installed']: + # Dependencies installed but model not loaded + self.provider_status_label.setText("📦 Dependencies ready") + self.provider_status_label.setStyleSheet("color: orange;") + + # Show Load button + self.provider_setup_btn.setText("Load Model") + self.provider_setup_btn.setVisible(True) + + # Also show Download button + self.download_model_btn.setText("📥 Download Model") + self.download_model_btn.setVisible(True) + + else: + # Not installed + self.provider_status_label.setText("❌ Not installed") + self.provider_status_label.setStyleSheet("color: red;") + + # Show BOTH buttons + self.provider_setup_btn.setText("Load Model") + self.provider_setup_btn.setVisible(True) + + self.download_model_btn.setText("📥 Download Qwen2-VL") + self.download_model_btn.setVisible(True) + + # Additional GPU status check for Qwen2-VL + if not status['loaded']: + try: + import torch + if not torch.cuda.is_available(): + self._log("⚠️ No GPU detected - Qwen2-VL will run slowly on CPU", "warning") + except ImportError: + pass + + else: + # Local OCR providers + if not hasattr(self, 'ocr_manager'): + from ocr_manager import OCRManager + self.ocr_manager = OCRManager(log_callback=self._log) + + status = self.ocr_manager.check_provider_status(provider) + + if status['loaded']: + # Model is loaded and ready + if provider == 'Qwen2-VL': + # Check which model size is loaded + qwen_provider = self.ocr_manager.get_provider('Qwen2-VL') + if qwen_provider and hasattr(qwen_provider, 'loaded_model_size'): + model_size = qwen_provider.loaded_model_size + status_text = f"✅ {model_size} model loaded" + else: + status_text = "✅ Model loaded" + self.provider_status_label.setText(status_text) + self.provider_status_label.setStyleSheet("color: green;") + else: + self.provider_status_label.setText("✅ Model loaded") + self.provider_status_label.setStyleSheet("color: green;") + + # Show reload button for all local providers + self.provider_setup_btn.setText("Reload") + self.provider_setup_btn.setVisible(True) + + elif status['installed']: + # Dependencies installed but model not loaded + self.provider_status_label.setText("📦 Dependencies ready") + self.provider_status_label.setStyleSheet("color: orange;") + + # Show Load button for all providers + self.provider_setup_btn.setText("Load Model") + self.provider_setup_btn.setVisible(True) + + # Also show Download button for models that need downloading + if provider in ['Qwen2-VL', 'manga-ocr']: + self.download_model_btn.setText("📥 Download Model") + self.download_model_btn.setVisible(True) + + else: + # Not installed + self.provider_status_label.setText("❌ Not installed") + self.provider_status_label.setStyleSheet("color: red;") + + # Categorize providers + huggingface_providers = ['manga-ocr', 'Qwen2-VL', 'rapidocr'] # Move rapidocr here + pip_providers = ['easyocr', 'paddleocr', 'doctr'] # Remove rapidocr from here + + if provider in huggingface_providers: + # For HuggingFace models, show BOTH buttons + self.provider_setup_btn.setText("Load Model") + self.provider_setup_btn.setVisible(True) + + # Download button + if provider == 'rapidocr': + self.download_model_btn.setText("📥 Install RapidOCR") + else: + self.download_model_btn.setText(f"📥 Download {provider}") + self.download_model_btn.setVisible(True) + + elif provider in pip_providers: + # Check if running as .exe + if getattr(sys, 'frozen', False): + # Running as .exe - can't pip install + self.provider_status_label.setText("❌ Not available in .exe") + self.provider_status_label.setStyleSheet("color: red;") + self._log(f"⚠️ {provider} cannot be installed in standalone .exe version", "warning") + else: + # Running from Python - can pip install + self.provider_setup_btn.setText("Install") + self.provider_setup_btn.setVisible(True) + + def _setup_ocr_provider(self): + """Setup/install/load OCR provider""" + provider = self.ocr_provider_value + + if provider in ['google', 'azure']: + return # Cloud providers don't need setup + + # your own api key + if provider == 'custom-api': + # Open configuration dialog for custom API + try: + from custom_api_config_dialog import CustomAPIConfigDialog + dialog = CustomAPIConfigDialog( + self.manga_window, + self.main_gui.config, + self.main_gui.save_config + ) + # After dialog closes, refresh status + from PySide6.QtCore import QTimer + QTimer.singleShot(100, self._check_provider_status) + except ImportError: + # If dialog not available, show message + from PySide6.QtWidgets import QMessageBox + from PySide6.QtCore import QTimer + QTimer.singleShot(0, lambda: QMessageBox.information( + self.dialog, + "Custom API Configuration", + "This mode uses your own API key in the main GUI:\n\n" + "- Make sure your API supports vision\n" + "- api_key: Your API key\n" + "- model: Model name\n" + "- custom url: You can override API endpoint under Other settings" + )) + return + + status = self.ocr_manager.check_provider_status(provider) + + # For Qwen2-VL, check if we need to select model size first + model_size = None + if provider == 'Qwen2-VL' and status['installed'] and not status['loaded']: + # Create PySide6 dialog for model selection + from PySide6.QtWidgets import (QDialog, QVBoxLayout, QHBoxLayout, QLabel, + QRadioButton, QButtonGroup, QLineEdit, QPushButton, + QGroupBox, QFrame, QMessageBox) + from PySide6.QtCore import Qt + from PySide6.QtGui import QFont + + selection_dialog = QDialog(self.dialog) + selection_dialog.setWindowTitle("Select Qwen2-VL Model Size") + selection_dialog.setMinimumSize(600, 450) + main_layout = QVBoxLayout(selection_dialog) + + # Title + title_label = QLabel("Select Model Size to Load") + title_font = QFont("Arial", 12, QFont.Weight.Bold) + title_label.setFont(title_font) + title_label.setAlignment(Qt.AlignmentFlag.AlignCenter) + main_layout.addWidget(title_label) + + # Model selection frame + model_frame = QGroupBox("Available Models") + model_frame_font = QFont("Arial", 11, QFont.Weight.Bold) + model_frame.setFont(model_frame_font) + model_frame_layout = QVBoxLayout(model_frame) + model_frame_layout.setContentsMargins(15, 15, 15, 15) + model_frame_layout.setSpacing(10) + + # Model options + model_options = { + "1": {"name": "Qwen2-VL 2B", "desc": "Smallest (4-8GB VRAM)"}, + "2": {"name": "Qwen2-VL 7B", "desc": "Medium (12-16GB VRAM)"}, + "3": {"name": "Qwen2-VL 72B", "desc": "Largest (80GB+ VRAM)"}, + "4": {"name": "Custom Model", "desc": "Enter any HF model ID"}, + } + + # Store selected model + selected_model_key = {"value": "1"} + custom_model_id_text = {"value": ""} + + # Radio button group + button_group = QButtonGroup(selection_dialog) + + for idx, (key, info) in enumerate(model_options.items()): + # Radio button + rb = QRadioButton(f"{info['name']} - {info['desc']}") + rb_font = QFont("Arial", 10) + rb.setFont(rb_font) + if idx == 0: + rb.setChecked(True) + rb.clicked.connect(lambda checked, k=key: selected_model_key.update({"value": k})) + button_group.addButton(rb) + model_frame_layout.addWidget(rb) + + # Separator + if key != "4": + separator = QFrame() + separator.setFrameShape(QFrame.Shape.HLine) + separator.setFrameShadow(QFrame.Shadow.Sunken) + model_frame_layout.addWidget(separator) + + main_layout.addWidget(model_frame) + + # Custom model ID frame (initially hidden) + custom_frame = QGroupBox("Custom Model Configuration") + custom_frame_font = QFont("Arial", 11, QFont.Weight.Bold) + custom_frame.setFont(custom_frame_font) + custom_frame_layout = QHBoxLayout(custom_frame) + custom_frame_layout.setContentsMargins(15, 15, 15, 15) + + custom_label = QLabel("Model ID:") + custom_label_font = QFont("Arial", 10) + custom_label.setFont(custom_label_font) + custom_frame_layout.addWidget(custom_label) + + custom_entry = QLineEdit() + custom_entry.setPlaceholderText("e.g., Qwen/Qwen2-VL-2B-Instruct") + custom_entry.setFont(custom_label_font) + custom_entry.textChanged.connect(lambda text: custom_model_id_text.update({"value": text})) + custom_frame_layout.addWidget(custom_entry) + + custom_frame.hide() # Hidden by default + main_layout.addWidget(custom_frame) + + # Toggle custom frame visibility + def toggle_custom_frame(): + if selected_model_key["value"] == "4": + custom_frame.show() + else: + custom_frame.hide() + + for rb in button_group.buttons(): + rb.clicked.connect(toggle_custom_frame) + + # Buttons with centering + button_layout = QHBoxLayout() + button_layout.addStretch() + + model_confirmed = {'value': False, 'size': None} + + def confirm_selection(): + selected = selected_model_key["value"] + self._log(f"DEBUG: Radio button selection = {selected}") + if selected == "4": + if not custom_model_id_text["value"].strip(): + QMessageBox.critical(selection_dialog, "Error", "Please enter a model ID") + return + model_confirmed['size'] = f"custom:{custom_model_id_text['value'].strip()}" + else: + model_confirmed['size'] = selected + model_confirmed['value'] = True + selection_dialog.accept() + + load_btn = QPushButton("Load") + load_btn.setStyleSheet("QPushButton { background-color: #4CAF50; color: white; padding: 8px 20px; font-weight: bold; }") + load_btn.clicked.connect(confirm_selection) + button_layout.addWidget(load_btn) + + cancel_btn = QPushButton("Cancel") + cancel_btn.setStyleSheet("QPushButton { background-color: #9E9E9E; color: white; padding: 8px 20px; }") + cancel_btn.clicked.connect(selection_dialog.reject) + button_layout.addWidget(cancel_btn) + + button_layout.addStretch() + main_layout.addLayout(button_layout) + + # Show dialog and wait for result (PySide6 modal dialog) + result = selection_dialog.exec() + + if result != QDialog.DialogCode.Accepted or not model_confirmed['value']: + return + + model_size = model_confirmed['size'] + self._log(f"DEBUG: Dialog closed, model_size set to: {model_size}") + + # Create PySide6 progress dialog + from PySide6.QtWidgets import QDialog, QVBoxLayout, QLabel, QProgressBar, QGroupBox + from PySide6.QtCore import QTimer + from PySide6.QtGui import QFont + + progress_dialog = QDialog(self.dialog) + progress_dialog.setWindowTitle(f"Setting up {provider}") + progress_dialog.setMinimumSize(400, 200) + progress_layout = QVBoxLayout(progress_dialog) + + # Progress section + progress_section = QGroupBox("Setup Progress") + progress_section_font = QFont("Arial", 11, QFont.Weight.Bold) + progress_section.setFont(progress_section_font) + progress_section_layout = QVBoxLayout(progress_section) + progress_section_layout.setContentsMargins(15, 15, 15, 15) + progress_section_layout.setSpacing(10) + + progress_label = QLabel("Initializing...") + progress_label_font = QFont("Arial", 10) + progress_label.setFont(progress_label_font) + progress_section_layout.addWidget(progress_label) + + progress_bar = QProgressBar() + progress_bar.setMinimum(0) + progress_bar.setMaximum(0) # Indeterminate mode + progress_bar.setMinimumWidth(350) + progress_section_layout.addWidget(progress_bar) + + status_label = QLabel("") + status_label_font = QFont("Arial", 9) + status_label.setFont(status_label_font) + status_label.setStyleSheet("color: #666666;") + progress_section_layout.addWidget(status_label) + + progress_layout.addWidget(progress_section) + + def update_progress(message, percent=None): + """Update progress display (thread-safe)""" + # Use lambda to ensure we capture the correct widget references + def update_ui(): + progress_label.setText(message) + if percent is not None: + progress_bar.setMaximum(100) # Switch to determinate mode + progress_bar.setValue(int(percent)) + + # Schedule on main thread + self.update_queue.put(('call_method', update_ui, ())) + + def setup_thread(): + """Run setup in background thread""" + nonlocal model_size + print(f"\n=== SETUP THREAD STARTED for {provider} ===") + print(f"Status: {status}") + print(f"Model size: {model_size}") + + try: + # Check if we need to install + if not status['installed']: + # Install provider + print(f"Installing {provider}...") + update_progress(f"Installing {provider}...") + success = self.ocr_manager.install_provider(provider, update_progress) + print(f"Install result: {success}") + + if not success: + print("Installation FAILED") + update_progress("❌ Installation failed!", 0) + self._log(f"Failed to install {provider}", "error") + return + else: + # Already installed, skip installation + print(f"{provider} dependencies already installed") + self._log(f"DEBUG: {provider} dependencies already installed") + success = True # Mark as success since deps are ready + + # Load model + print(f"About to load {provider} model...") + update_progress(f"Loading {provider} model...") + self._log(f"DEBUG: Loading provider {provider}, status['installed']={status.get('installed', False)}") + + # Special handling for Qwen2-VL - pass model_size + if provider == 'Qwen2-VL': + if success and model_size: + # Save the model size to config + self.qwen2vl_model_size = model_size + self.main_gui.config['qwen2vl_model_size'] = model_size + + # Save config immediately + if hasattr(self.main_gui, 'save_config'): + self.main_gui.save_config(show_message=False) + self._log(f"DEBUG: In thread, about to load with model_size={model_size}") + if model_size: + success = self.ocr_manager.load_provider(provider, model_size=model_size) + + if success: + provider_obj = self.ocr_manager.get_provider('Qwen2-VL') + if provider_obj: + provider_obj.loaded_model_size = { + "1": "2B", + "2": "7B", + "3": "72B", + "4": "custom" + }.get(model_size, model_size) + else: + self._log("Warning: No model size specified for Qwen2-VL, defaulting to 2B", "warning") + success = self.ocr_manager.load_provider(provider, model_size="1") + else: + print(f"Loading {provider} without model_size parameter") + self._log(f"DEBUG: Loading {provider} without model_size parameter") + success = self.ocr_manager.load_provider(provider) + print(f"load_provider returned: {success}") + self._log(f"DEBUG: load_provider returned success={success}") + + print(f"\nFinal success value: {success}") + if success: + print("SUCCESS! Model loaded successfully") + update_progress(f"✅ {provider} ready!", 100) + self._log(f"✅ {provider} is ready to use", "success") + # Schedule status check on main thread + self.update_queue.put(('call_method', self._check_provider_status, ())) + else: + print("FAILED! Model did not load") + update_progress("❌ Failed to load model!", 0) + self._log(f"Failed to load {provider} model", "error") + + except Exception as e: + print(f"\n!!! EXCEPTION CAUGHT !!!") + print(f"Exception type: {type(e).__name__}") + print(f"Exception message: {str(e)}") + import traceback + traceback_str = traceback.format_exc() + print(f"Traceback:\n{traceback_str}") + + error_msg = f"❌ Error: {str(e)}" + update_progress(error_msg, 0) + self._log(f"Setup error: {str(e)}", "error") + self._log(traceback_str, "debug") + # Don't close dialog on error - let user read the error + return + + # Only close dialog on success + if success: + # Schedule dialog close on main thread after 2 seconds + import time + time.sleep(2) + self.update_queue.put(('call_method', progress_dialog.close, ())) + else: + # On failure, keep dialog open so user can see the error + import time + time.sleep(5) + self.update_queue.put(('call_method', progress_dialog.close, ())) + + # Show progress dialog (non-blocking) + progress_dialog.show() + + # Start setup in background via executor if available + try: + if hasattr(self.main_gui, '_ensure_executor'): + self.main_gui._ensure_executor() + execu = getattr(self.main_gui, 'executor', None) + if execu: + execu.submit(setup_thread) + else: + import threading + threading.Thread(target=setup_thread, daemon=True).start() + except Exception: + import threading + threading.Thread(target=setup_thread, daemon=True).start() + + def _on_ocr_provider_change(self, event=None): + """Handle OCR provider change""" + # Get the new provider value from combo box + if hasattr(self, 'provider_combo'): + provider = self.provider_combo.currentText() + self.ocr_provider_value = provider + else: + provider = self.ocr_provider_value + + # Hide ALL provider-specific frames first (PySide6) + if hasattr(self, 'google_creds_frame'): + self.google_creds_frame.setVisible(False) + if hasattr(self, 'azure_frame'): + self.azure_frame.setVisible(False) + + # Show only the relevant settings frame for the selected provider + if provider == 'google': + # Show Google credentials frame + if hasattr(self, 'google_creds_frame'): + self.google_creds_frame.setVisible(True) + + elif provider == 'azure': + # Show Azure settings frame + if hasattr(self, 'azure_frame'): + self.azure_frame.setVisible(True) + + # For all other providers (manga-ocr, Qwen2-VL, easyocr, paddleocr, doctr) + # Don't show any cloud credential frames - they use local models + + # Check provider status to show appropriate buttons + self._check_provider_status() + + # Update the main status label at the top based on new provider + self._update_main_status_label() + + # Log the change + provider_descriptions = { + 'custom-api': "Custom API - use your own vision model", + 'google': "Google Cloud Vision (requires credentials)", + 'azure': "Azure Computer Vision (requires API key)", + 'manga-ocr': "Manga OCR - optimized for Japanese manga", + 'rapidocr': "RapidOCR - fast local OCR with region detection", + 'Qwen2-VL': "Qwen2-VL - a big model", + 'easyocr': "EasyOCR - multi-language support", + 'paddleocr': "PaddleOCR - CJK language support", + 'doctr': "DocTR - document text recognition" + } + + self._log(f"📋 OCR provider changed to: {provider_descriptions.get(provider, provider)}", "info") + + # Save the selection + self.main_gui.config['manga_ocr_provider'] = provider + if hasattr(self.main_gui, 'save_config'): + self.main_gui.save_config(show_message=False) + + # IMPORTANT: Reset translator to force recreation with new OCR provider + if hasattr(self, 'translator') and self.translator: + self._log(f"OCR provider changed to {provider.upper()}. Translator will be recreated on next run.", "info") + self.translator = None # Force recreation on next translation + + def _update_main_status_label(self): + """Update the main status label at the top based on current provider and credentials""" + if not hasattr(self, 'status_label'): + return + + # Get API key + try: + if hasattr(self.main_gui.api_key_entry, 'text'): + has_api_key = bool(self.main_gui.api_key_entry.text().strip()) + elif hasattr(self.main_gui.api_key_entry, 'get'): + has_api_key = bool(self.main_gui.api_key_entry.get().strip()) + else: + has_api_key = False + except: + has_api_key = False + + # Get current provider + provider = self.ocr_provider_value if hasattr(self, 'ocr_provider_value') else self.main_gui.config.get('manga_ocr_provider', 'custom-api') + + # Determine readiness based on provider + if provider == 'google': + has_vision = os.path.exists(self.main_gui.config.get('google_vision_credentials', '')) + is_ready = has_api_key and has_vision + elif provider == 'azure': + has_azure = bool(self.main_gui.config.get('azure_vision_key', '')) + is_ready = has_api_key and has_azure + else: + # Local providers or custom-api only need API key for translation + is_ready = has_api_key + + # Update label + status_text = "✅ Ready" if is_ready else "❌ Setup Required" + status_color = "green" if is_ready else "red" + + self.status_label.setText(status_text) + self.status_label.setStyleSheet(f"color: {status_color};") + + def _build_interface(self): + """Build the enhanced manga translation interface using PySide6""" + # Create main layout for PySide6 widget + main_layout = QVBoxLayout(self.parent_widget) + main_layout.setContentsMargins(10, 10, 10, 10) + main_layout.setSpacing(6) + self._build_pyside6_interface(main_layout) + + def _build_pyside6_interface(self, main_layout): + # Import QSizePolicy for layout management + from PySide6.QtWidgets import QSizePolicy + + # Apply global stylesheet for checkboxes and radio buttons + checkbox_radio_style = """ + QCheckBox { + color: white; + spacing: 6px; + } + QCheckBox::indicator { + width: 14px; + height: 14px; + border: 1px solid #5a9fd4; + border-radius: 2px; + background-color: #2d2d2d; + } + QCheckBox::indicator:checked { + background-color: #5a9fd4; + border-color: #5a9fd4; + } + QCheckBox::indicator:hover { + border-color: #7bb3e0; + } + QCheckBox:disabled { + color: #666666; + } + QCheckBox::indicator:disabled { + background-color: #1a1a1a; + border-color: #3a3a3a; + } + QRadioButton { + color: white; + spacing: 5px; + } + QRadioButton::indicator { + width: 13px; + height: 13px; + border: 2px solid #5a9fd4; + border-radius: 7px; + background-color: #2d2d2d; + } + QRadioButton::indicator:checked { + background-color: #5a9fd4; + border: 2px solid #5a9fd4; + } + QRadioButton::indicator:hover { + border-color: #7bb3e0; + } + QRadioButton:disabled { + color: #666666; + } + QRadioButton::indicator:disabled { + background-color: #1a1a1a; + border-color: #3a3a3a; + } + /* Disabled fields styling */ + QLineEdit:disabled, QComboBox:disabled, QSpinBox:disabled, QDoubleSpinBox:disabled { + background-color: #1a1a1a; + color: #666666; + border: 1px solid #3a3a3a; + } + QLabel:disabled { + color: #666666; + } + """ + self.parent_widget.setStyleSheet(checkbox_radio_style) + + # Title (at the very top) + title_frame = QWidget() + title_layout = QHBoxLayout(title_frame) + title_layout.setContentsMargins(0, 0, 0, 0) + title_layout.setSpacing(8) + + title_label = QLabel("🎌 Manga Translation") + title_font = QFont("Arial", 13) + title_font.setBold(True) + title_label.setFont(title_font) + title_layout.addWidget(title_label) + + # Requirements check - based on selected OCR provider + has_api_key = bool(self.main_gui.api_key_entry.text().strip()) if hasattr(self.main_gui.api_key_entry, 'text') else bool(self.main_gui.api_key_entry.get().strip()) + + # Get the saved OCR provider to check appropriate credentials + saved_provider = self.main_gui.config.get('manga_ocr_provider', 'custom-api') + + # Determine readiness based on provider + if saved_provider == 'google': + has_vision = os.path.exists(self.main_gui.config.get('google_vision_credentials', '')) + is_ready = has_api_key and has_vision + elif saved_provider == 'azure': + has_azure = bool(self.main_gui.config.get('azure_vision_key', '')) + is_ready = has_api_key and has_azure + else: + # Local providers or custom-api only need API key for translation + is_ready = has_api_key + + status_text = "✅ Ready" if is_ready else "❌ Setup Required" + status_color = "green" if is_ready else "red" + + status_label = QLabel(status_text) + status_font = QFont("Arial", 10) + status_label.setFont(status_font) + status_label.setStyleSheet(f"color: {status_color};") + title_layout.addStretch() + title_layout.addWidget(status_label) + + main_layout.addWidget(title_frame) + + # Store reference for updates + self.status_label = status_label + + # Model Preloading Progress Bar (right after title, initially hidden) + self.preload_progress_frame = QWidget() + self.preload_progress_frame.setStyleSheet( + "background-color: #2d2d2d; " + "border: 1px solid #4a5568; " + "border-radius: 4px; " + "padding: 6px;" + ) + preload_layout = QVBoxLayout(self.preload_progress_frame) + preload_layout.setContentsMargins(8, 6, 8, 6) + preload_layout.setSpacing(4) + + self.preload_status_label = QLabel("Loading models...") + preload_status_font = QFont("Segoe UI", 9) + preload_status_font.setBold(True) + self.preload_status_label.setFont(preload_status_font) + self.preload_status_label.setStyleSheet("color: #ffffff; background: transparent; border: none;") + self.preload_status_label.setAlignment(Qt.AlignCenter) + preload_layout.addWidget(self.preload_status_label) + + self.preload_progress_bar = QProgressBar() + self.preload_progress_bar.setRange(0, 100) + self.preload_progress_bar.setValue(0) + self.preload_progress_bar.setTextVisible(True) + self.preload_progress_bar.setMinimumHeight(22) + self.preload_progress_bar.setStyleSheet(""" + QProgressBar { + border: 1px solid #4a5568; + border-radius: 3px; + text-align: center; + background-color: #1e1e1e; + color: #ffffff; + font-weight: bold; + font-size: 9px; + } + QProgressBar::chunk { + background: qlineargradient(x1:0, y1:0, x2:1, y2:0, + stop:0 #2d6a4f, stop:0.5 #1b4332, stop:1 #081c15); + border-radius: 2px; + margin: 0px; + } + """) + preload_layout.addWidget(self.preload_progress_bar) + + self.preload_progress_frame.setVisible(False) # Hidden by default + main_layout.addWidget(self.preload_progress_frame) + + # Add instructions based on selected provider + if not is_ready: + req_frame = QWidget() + req_layout = QVBoxLayout(req_frame) + req_layout.setContentsMargins(0, 5, 0, 5) + + req_text = [] + if not has_api_key: + req_text.append("• API Key not configured") + + # Only show provider-specific credential warnings + if saved_provider == 'google': + has_vision = os.path.exists(self.main_gui.config.get('google_vision_credentials', '')) + if not has_vision: + req_text.append("• Google Cloud Vision credentials not set") + elif saved_provider == 'azure': + has_azure = bool(self.main_gui.config.get('azure_vision_key', '')) + if not has_azure: + req_text.append("• Azure credentials not configured") + + if req_text: # Only show frame if there are actual missing requirements + req_label = QLabel("\n".join(req_text)) + req_font = QFont("Arial", 10) + req_label.setFont(req_font) + req_label.setStyleSheet("color: red;") + req_label.setAlignment(Qt.AlignLeft) + req_layout.addWidget(req_label) + main_layout.addWidget(req_frame) + else: + # Create empty frame to maintain layout consistency + req_frame = QWidget() + req_frame.setVisible(False) + main_layout.addWidget(req_frame) + + # File selection frame - SPANS BOTH COLUMNS + file_frame = QGroupBox("Select Manga Images") + file_frame_font = QFont("Arial", 10) + file_frame_font.setBold(True) + file_frame.setFont(file_frame_font) + file_frame_layout = QVBoxLayout(file_frame) + file_frame_layout.setContentsMargins(10, 10, 10, 8) + file_frame_layout.setSpacing(6) + + # File listbox (QListWidget handles scrolling automatically) + self.file_listbox = QListWidget() + self.file_listbox.setSelectionMode(QListWidget.ExtendedSelection) + self.file_listbox.setMinimumHeight(200) + file_frame_layout.addWidget(self.file_listbox) + + # File buttons + file_btn_frame = QWidget() + file_btn_layout = QHBoxLayout(file_btn_frame) + file_btn_layout.setContentsMargins(0, 6, 0, 0) + file_btn_layout.setSpacing(4) + + add_files_btn = QPushButton("Add Files") + add_files_btn.clicked.connect(self._add_files) + add_files_btn.setStyleSheet("QPushButton { background-color: #007bff; color: white; padding: 3px 10px; font-size: 9pt; }") + file_btn_layout.addWidget(add_files_btn) + + add_folder_btn = QPushButton("Add Folder") + add_folder_btn.clicked.connect(self._add_folder) + add_folder_btn.setStyleSheet("QPushButton { background-color: #007bff; color: white; padding: 3px 10px; font-size: 9pt; }") + file_btn_layout.addWidget(add_folder_btn) + + remove_btn = QPushButton("Remove Selected") + remove_btn.clicked.connect(self._remove_selected) + remove_btn.setStyleSheet("QPushButton { background-color: #dc3545; color: white; padding: 3px 10px; font-size: 9pt; }") + file_btn_layout.addWidget(remove_btn) + + clear_btn = QPushButton("Clear All") + clear_btn.clicked.connect(self._clear_all) + clear_btn.setStyleSheet("QPushButton { background-color: #ffc107; color: black; padding: 3px 10px; font-size: 9pt; }") + file_btn_layout.addWidget(clear_btn) + + file_btn_layout.addStretch() + file_frame_layout.addWidget(file_btn_frame) + + main_layout.addWidget(file_frame) + + # Create 2-column layout for settings + columns_container = QWidget() + columns_layout = QHBoxLayout(columns_container) + columns_layout.setContentsMargins(0, 0, 0, 0) + columns_layout.setSpacing(10) + + # Left column (Column 1) + left_column = QWidget() + left_column_layout = QVBoxLayout(left_column) + left_column_layout.setContentsMargins(0, 0, 0, 0) + left_column_layout.setSpacing(6) + + # Right column (Column 2) + right_column = QWidget() + right_column_layout = QVBoxLayout(right_column) + right_column_layout.setContentsMargins(0, 0, 0, 0) + right_column_layout.setSpacing(6) + + # Settings frame - GOES TO LEFT COLUMN + settings_frame = QGroupBox("Translation Settings") + settings_frame_font = QFont("Arial", 10) + settings_frame_font.setBold(True) + settings_frame.setFont(settings_frame_font) + settings_frame_layout = QVBoxLayout(settings_frame) + settings_frame_layout.setContentsMargins(10, 10, 10, 8) + settings_frame_layout.setSpacing(6) + + # API Settings - Hybrid approach + api_frame = QWidget() + api_layout = QHBoxLayout(api_frame) + api_layout.setContentsMargins(0, 0, 0, 10) + api_layout.setSpacing(10) + + api_label = QLabel("OCR: Google Cloud Vision | Translation: API Key") + api_font = QFont("Arial", 10) + api_font.setItalic(True) + api_label.setFont(api_font) + api_label.setStyleSheet("color: gray;") + api_layout.addWidget(api_label) + + # Show current model from main GUI + current_model = 'Unknown' + try: + if hasattr(self.main_gui, 'model_combo') and hasattr(self.main_gui.model_combo, 'currentText'): + # PySide6 QComboBox + current_model = self.main_gui.model_combo.currentText() + elif hasattr(self.main_gui, 'model_var'): + # Tkinter StringVar + current_model = self.main_gui.model_var.get() if hasattr(self.main_gui.model_var, 'get') else str(self.main_gui.model_var) + elif hasattr(self.main_gui, 'config'): + # Fallback to config + current_model = self.main_gui.config.get('model', 'Unknown') + except Exception as e: + print(f"Error getting model: {e}") + current_model = 'Unknown' + + model_label = QLabel(f"Model: {current_model}") + model_font = QFont("Arial", 10) + model_font.setItalic(True) + model_label.setFont(model_font) + model_label.setStyleSheet("color: gray;") + api_layout.addStretch() + api_layout.addWidget(model_label) + + settings_frame_layout.addWidget(api_frame) + + # OCR Provider Selection - ENHANCED VERSION + self.ocr_provider_frame = QWidget() + ocr_provider_layout = QHBoxLayout(self.ocr_provider_frame) + ocr_provider_layout.setContentsMargins(0, 0, 0, 10) + ocr_provider_layout.setSpacing(10) + + provider_label = QLabel("OCR Provider:") + provider_label.setMinimumWidth(150) + provider_label.setAlignment(Qt.AlignLeft) + ocr_provider_layout.addWidget(provider_label) + + # Expanded provider list with descriptions + ocr_providers = [ + ('custom-api', 'Your Own key'), + ('google', 'Google Cloud Vision'), + ('azure', 'Azure Computer Vision'), + ('rapidocr', '⚡ RapidOCR (Fast & Local)'), + ('manga-ocr', '🇯🇵 Manga OCR (Japanese)'), + ('Qwen2-VL', '🇰🇷 Qwen2-VL (Korean)'), + ('easyocr', '🌏 EasyOCR (Multi-lang)'), + #('paddleocr', '🐼 PaddleOCR'), + ('doctr', '📄 DocTR'), + ] + + # Just the values for the combobox + provider_values = [p[0] for p in ocr_providers] + provider_display = [f"{p[0]} - {p[1]}" for p in ocr_providers] + + self.ocr_provider_value = self.main_gui.config.get('manga_ocr_provider', 'custom-api') + self.provider_combo = QComboBox() + self.provider_combo.addItems(provider_values) + self.provider_combo.setCurrentText(self.ocr_provider_value) + self.provider_combo.setMinimumWidth(120) # Reduced for better fit + self.provider_combo.setSizePolicy(QSizePolicy.Policy.Expanding, QSizePolicy.Policy.Fixed) + self.provider_combo.currentTextChanged.connect(self._on_ocr_provider_change) + self._disable_combobox_mousewheel(self.provider_combo) # Disable mousewheel scrolling + ocr_provider_layout.addWidget(self.provider_combo) + + # Provider status indicator with more detail + self.provider_status_label = QLabel("") + status_font = QFont("Arial", 9) + self.provider_status_label.setFont(status_font) + self.provider_status_label.setWordWrap(True) # Allow text wrapping + self.provider_status_label.setSizePolicy(QSizePolicy.Policy.Expanding, QSizePolicy.Policy.Preferred) + ocr_provider_layout.addWidget(self.provider_status_label) + + # Setup/Install button for non-cloud providers + self.provider_setup_btn = QPushButton("Setup") + self.provider_setup_btn.clicked.connect(self._setup_ocr_provider) + self.provider_setup_btn.setStyleSheet("QPushButton { background-color: #17a2b8; color: white; padding: 5px 15px; }") + self.provider_setup_btn.setMinimumWidth(100) + self.provider_setup_btn.setVisible(False) # Hidden by default, _check_provider_status will show it + ocr_provider_layout.addWidget(self.provider_setup_btn) + + # Add explicit download button for Hugging Face models + self.download_model_btn = QPushButton("📥 Download") + self.download_model_btn.clicked.connect(self._download_hf_model) + self.download_model_btn.setStyleSheet("QPushButton { background-color: #28a745; color: white; padding: 5px 15px; }") + self.download_model_btn.setMinimumWidth(150) + self.download_model_btn.setVisible(False) # Hidden by default + ocr_provider_layout.addWidget(self.download_model_btn) + + ocr_provider_layout.addStretch() + settings_frame_layout.addWidget(self.ocr_provider_frame) + + # Initialize OCR manager + from ocr_manager import OCRManager + self.ocr_manager = OCRManager(log_callback=self._log) + + # Check initial provider status + self._check_provider_status() + + # Google Cloud Credentials section (now in a frame that can be hidden) + self.google_creds_frame = QWidget() + google_creds_layout = QHBoxLayout(self.google_creds_frame) + google_creds_layout.setContentsMargins(0, 0, 0, 10) + google_creds_layout.setSpacing(10) + + google_label = QLabel("Google Cloud Credentials:") + google_label.setMinimumWidth(150) + google_label.setAlignment(Qt.AlignLeft) + google_creds_layout.addWidget(google_label) + + # Show current credentials file + google_creds_path = self.main_gui.config.get('google_vision_credentials', '') or self.main_gui.config.get('google_cloud_credentials', '') + creds_display = os.path.basename(google_creds_path) if google_creds_path else "Not Set" + + self.creds_label = QLabel(creds_display) + creds_font = QFont("Arial", 9) + self.creds_label.setFont(creds_font) + self.creds_label.setStyleSheet(f"color: {'green' if google_creds_path else 'red'};") + google_creds_layout.addWidget(self.creds_label) + + browse_btn = QPushButton("Browse") + browse_btn.clicked.connect(self._browse_google_credentials_permanent) + browse_btn.setStyleSheet("QPushButton { background-color: #007bff; color: white; padding: 5px 15px; }") + google_creds_layout.addWidget(browse_btn) + + google_creds_layout.addStretch() + settings_frame_layout.addWidget(self.google_creds_frame) + self.google_creds_frame.setVisible(False) # Hidden by default + + # Azure settings frame (hidden by default) + self.azure_frame = QWidget() + azure_frame_layout = QVBoxLayout(self.azure_frame) + azure_frame_layout.setContentsMargins(0, 0, 0, 10) + azure_frame_layout.setSpacing(5) + + # Azure Key + azure_key_frame = QWidget() + azure_key_layout = QHBoxLayout(azure_key_frame) + azure_key_layout.setContentsMargins(0, 0, 0, 0) + azure_key_layout.setSpacing(10) + + azure_key_label = QLabel("Azure Key:") + azure_key_label.setMinimumWidth(150) + azure_key_label.setAlignment(Qt.AlignLeft) + azure_key_layout.addWidget(azure_key_label) + + self.azure_key_entry = QLineEdit() + self.azure_key_entry.setEchoMode(QLineEdit.Password) + self.azure_key_entry.setMinimumWidth(150) # Reduced for better fit + self.azure_key_entry.setSizePolicy(QSizePolicy.Policy.Expanding, QSizePolicy.Policy.Fixed) + azure_key_layout.addWidget(self.azure_key_entry) + + # Show/Hide button for Azure key + self.show_azure_key_checkbox = self._create_styled_checkbox("Show") + self.show_azure_key_checkbox.stateChanged.connect(self._toggle_azure_key_visibility) + azure_key_layout.addWidget(self.show_azure_key_checkbox) + azure_key_layout.addStretch() + azure_frame_layout.addWidget(azure_key_frame) + + # Azure Endpoint + azure_endpoint_frame = QWidget() + azure_endpoint_layout = QHBoxLayout(azure_endpoint_frame) + azure_endpoint_layout.setContentsMargins(0, 0, 0, 0) + azure_endpoint_layout.setSpacing(10) + + azure_endpoint_label = QLabel("Azure Endpoint:") + azure_endpoint_label.setMinimumWidth(150) + azure_endpoint_label.setAlignment(Qt.AlignLeft) + azure_endpoint_layout.addWidget(azure_endpoint_label) + + self.azure_endpoint_entry = QLineEdit() + self.azure_endpoint_entry.setMinimumWidth(150) # Reduced for better fit + self.azure_endpoint_entry.setSizePolicy(QSizePolicy.Policy.Expanding, QSizePolicy.Policy.Fixed) + azure_endpoint_layout.addWidget(self.azure_endpoint_entry) + azure_endpoint_layout.addStretch() + azure_frame_layout.addWidget(azure_endpoint_frame) + + # Load saved Azure settings + saved_key = self.main_gui.config.get('azure_vision_key', '') + saved_endpoint = self.main_gui.config.get('azure_vision_endpoint', 'https://YOUR-RESOURCE.cognitiveservices.azure.com/') + self.azure_key_entry.setText(saved_key) + self.azure_endpoint_entry.setText(saved_endpoint) + + settings_frame_layout.addWidget(self.azure_frame) + self.azure_frame.setVisible(False) # Hidden by default + + # Initially show/hide based on saved provider + self._on_ocr_provider_change() + + # Separator for context settings + separator1 = QFrame() + separator1.setFrameShape(QFrame.HLine) + separator1.setFrameShadow(QFrame.Sunken) + settings_frame_layout.addWidget(separator1) + + # Context and Full Page Mode Settings + context_frame = QGroupBox("🔄 Context & Translation Mode") + context_frame_font = QFont("Arial", 11) + context_frame_font.setBold(True) + context_frame.setFont(context_frame_font) + context_frame_layout = QVBoxLayout(context_frame) + context_frame_layout.setContentsMargins(10, 10, 10, 10) + context_frame_layout.setSpacing(10) + + # Show current contextual settings from main GUI + context_info = QWidget() + context_info_layout = QVBoxLayout(context_info) + context_info_layout.setContentsMargins(0, 0, 0, 10) + context_info_layout.setSpacing(5) + + context_title = QLabel("Main GUI Context Settings:") + title_font = QFont("Arial", 10) + title_font.setBold(True) + context_title.setFont(title_font) + context_info_layout.addWidget(context_title) + + # Display current settings + settings_frame_display = QWidget() + settings_display_layout = QVBoxLayout(settings_frame_display) + settings_display_layout.setContentsMargins(20, 0, 0, 0) + settings_display_layout.setSpacing(3) + + # Contextual enabled status + contextual_status = "Enabled" if self.main_gui.contextual_var.get() else "Disabled" + self.contextual_status_label = QLabel(f"• Contextual Translation: {contextual_status}") + status_font = QFont("Arial", 10) + self.contextual_status_label.setFont(status_font) + settings_display_layout.addWidget(self.contextual_status_label) + + # History limit + history_limit = self.main_gui.trans_history.get() if hasattr(self.main_gui, 'trans_history') else "3" + self.history_limit_label = QLabel(f"• Translation History Limit: {history_limit} exchanges") + self.history_limit_label.setFont(status_font) + settings_display_layout.addWidget(self.history_limit_label) + + # Rolling history status + rolling_status = "Enabled (Rolling Window)" if self.main_gui.translation_history_rolling_var.get() else "Disabled (Reset on Limit)" + self.rolling_status_label = QLabel(f"• Rolling History: {rolling_status}") + self.rolling_status_label.setFont(status_font) + settings_display_layout.addWidget(self.rolling_status_label) + + context_info_layout.addWidget(settings_frame_display) + context_frame_layout.addWidget(context_info) + + # Refresh button to update from main GUI + refresh_btn = QPushButton("↻ Refresh from Main GUI") + refresh_btn.clicked.connect(self._refresh_context_settings) + refresh_btn.setStyleSheet("QPushButton { background-color: #6c757d; color: white; padding: 5px 15px; }") + context_frame_layout.addWidget(refresh_btn) + + # Separator + separator2 = QFrame() + separator2.setFrameShape(QFrame.HLine) + separator2.setFrameShadow(QFrame.Sunken) + context_frame_layout.addWidget(separator2) + + # Full Page Context Translation Settings + full_page_frame = QWidget() + full_page_layout = QVBoxLayout(full_page_frame) + full_page_layout.setContentsMargins(0, 0, 0, 0) + full_page_layout.setSpacing(5) + + full_page_title = QLabel("Full Page Context Mode (Manga-specific):") + title_font2 = QFont("Arial", 10) + title_font2.setBold(True) + full_page_title.setFont(title_font2) + full_page_layout.addWidget(full_page_title) + + # Enable/disable toggle + self.full_page_context_checked = self.main_gui.config.get('manga_full_page_context', True) + + toggle_frame = QWidget() + toggle_layout = QHBoxLayout(toggle_frame) + toggle_layout.setContentsMargins(20, 0, 0, 0) + toggle_layout.setSpacing(10) + + self.context_checkbox = self._create_styled_checkbox("Enable Full Page Context Translation") + self.context_checkbox.setChecked(self.full_page_context_checked) + self.context_checkbox.stateChanged.connect(self._on_context_toggle) + toggle_layout.addWidget(self.context_checkbox) + + # Edit prompt button + edit_prompt_btn = QPushButton("Edit Prompt") + edit_prompt_btn.clicked.connect(self._edit_context_prompt) + edit_prompt_btn.setStyleSheet("QPushButton { background-color: #6c757d; color: white; padding: 5px 15px; }") + toggle_layout.addWidget(edit_prompt_btn) + + # Help button for full page context + help_btn = QPushButton("?") + help_btn.setFixedWidth(30) + help_btn.clicked.connect(lambda: self._show_help_dialog( + "Full Page Context Mode", + "Full page context sends all text regions from the page together in a single request.\n\n" + "This allows the AI to see all text at once for more contextually accurate translations, " + "especially useful for maintaining character name consistency and understanding " + "conversation flow across multiple speech bubbles.\n\n" + "✅ Pros:\n" + "• Better context awareness\n" + "• Consistent character names\n" + "• Understanding of conversation flow\n" + "• Maintains tone across bubbles\n\n" + "❌ Cons:\n" + "• Single API call failure affects all text\n" + "• May use more tokens\n" + "• Slower for pages with many text regions" + )) + help_btn.setStyleSheet("QPushButton { background-color: #17a2b8; color: white; padding: 5px; }") + toggle_layout.addWidget(help_btn) + toggle_layout.addStretch() + + full_page_layout.addWidget(toggle_frame) + context_frame_layout.addWidget(full_page_frame) + + # Separator + separator3 = QFrame() + separator3.setFrameShape(QFrame.HLine) + separator3.setFrameShadow(QFrame.Sunken) + context_frame_layout.addWidget(separator3) + + # Visual Context Settings (for non-vision model support) + visual_frame = QWidget() + visual_layout = QVBoxLayout(visual_frame) + visual_layout.setContentsMargins(0, 0, 0, 0) + visual_layout.setSpacing(5) + + visual_title = QLabel("Visual Context (Image Support):") + title_font3 = QFont("Arial", 10) + title_font3.setBold(True) + visual_title.setFont(title_font3) + visual_layout.addWidget(visual_title) + + # Visual context toggle + self.visual_context_enabled_checked = self.main_gui.config.get('manga_visual_context_enabled', True) + + visual_toggle_frame = QWidget() + visual_toggle_layout = QHBoxLayout(visual_toggle_frame) + visual_toggle_layout.setContentsMargins(20, 0, 0, 0) + visual_toggle_layout.setSpacing(10) + + self.visual_context_checkbox = self._create_styled_checkbox("Include page image in translation requests") + self.visual_context_checkbox.setChecked(self.visual_context_enabled_checked) + self.visual_context_checkbox.stateChanged.connect(self._on_visual_context_toggle) + visual_toggle_layout.addWidget(self.visual_context_checkbox) + + # Help button for visual context + visual_help_btn = QPushButton("?") + visual_help_btn.setFixedWidth(30) + visual_help_btn.clicked.connect(lambda: self._show_help_dialog( + "Visual Context Settings", + "Visual context includes the manga page image with translation requests.\n\n" + "⚠️ WHEN TO DISABLE:\n" + "• Using text-only models (Claude, GPT-3.5, standard Gemini)\n" + "• Model doesn't support images\n" + "• Want to reduce token usage\n" + "• Testing text-only translation\n\n" + "✅ WHEN TO ENABLE:\n" + "• Using vision models (Gemini Vision, GPT-4V, Claude 3)\n" + "• Want spatial awareness of text position\n" + "• Need visual context for better translation\n\n" + "Impact:\n" + "• Disabled: Only text is sent (compatible with any model)\n" + "• Enabled: Text + image sent (requires vision model)\n\n" + "Note: Disabling may reduce translation quality as the AI won't see\n" + "the artwork context or spatial layout of the text." + )) + visual_help_btn.setStyleSheet("QPushButton { background-color: #17a2b8; color: white; padding: 5px; }") + visual_toggle_layout.addWidget(visual_help_btn) + visual_toggle_layout.addStretch() + + visual_layout.addWidget(visual_toggle_frame) + + # Output settings - moved here to be below visual context + output_settings_frame = QWidget() + output_settings_layout = QHBoxLayout(output_settings_frame) + output_settings_layout.setContentsMargins(20, 10, 0, 0) + output_settings_layout.setSpacing(10) + + self.create_subfolder_checkbox = self._create_styled_checkbox("Create 'translated' subfolder for output") + self.create_subfolder_checkbox.setChecked(self.main_gui.config.get('manga_create_subfolder', True)) + self.create_subfolder_checkbox.stateChanged.connect(self._save_rendering_settings) + output_settings_layout.addWidget(self.create_subfolder_checkbox) + output_settings_layout.addStretch() + + visual_layout.addWidget(output_settings_frame) + + context_frame_layout.addWidget(visual_frame) + + # Add the completed context_frame to settings_frame + settings_frame_layout.addWidget(context_frame) + + # Add main settings frame to left column + left_column_layout.addWidget(settings_frame) + + # Text Rendering Settings Frame - SPLIT BETWEEN COLUMNS + render_frame = QGroupBox("Text Visibility Settings") + render_frame_font = QFont("Arial", 12) + render_frame_font.setBold(True) + render_frame.setFont(render_frame_font) + render_frame_layout = QVBoxLayout(render_frame) + render_frame_layout.setContentsMargins(15, 15, 15, 10) + render_frame_layout.setSpacing(10) + + # Inpainting section + inpaint_group = QGroupBox("Inpainting") + inpaint_group_font = QFont("Arial", 11) + inpaint_group_font.setBold(True) + inpaint_group.setFont(inpaint_group_font) + inpaint_group_layout = QVBoxLayout(inpaint_group) + inpaint_group_layout.setContentsMargins(15, 15, 15, 10) + inpaint_group_layout.setSpacing(10) + + # Skip inpainting toggle - use value loaded from config + self.skip_inpainting_checkbox = self._create_styled_checkbox("Skip Inpainter") + self.skip_inpainting_checkbox.setChecked(self.skip_inpainting_value) + self.skip_inpainting_checkbox.stateChanged.connect(self._toggle_inpaint_visibility) + inpaint_group_layout.addWidget(self.skip_inpainting_checkbox) + + # Inpainting method selection (only visible when inpainting is enabled) + self.inpaint_method_frame = QWidget() + inpaint_method_layout = QHBoxLayout(self.inpaint_method_frame) + inpaint_method_layout.setContentsMargins(0, 0, 0, 0) + inpaint_method_layout.setSpacing(10) + + method_label = QLabel("Inpaint Method:") + method_label_font = QFont('Arial', 9) + method_label.setFont(method_label_font) + method_label.setMinimumWidth(95) + method_label.setAlignment(Qt.AlignLeft) + inpaint_method_layout.addWidget(method_label) + + # Radio buttons for inpaint method + method_selection_frame = QWidget() + method_selection_layout = QHBoxLayout(method_selection_frame) + method_selection_layout.setContentsMargins(0, 0, 0, 0) + method_selection_layout.setSpacing(10) + + self.inpaint_method_value = self.main_gui.config.get('manga_inpaint_method', 'local') + self.inpaint_method_group = QButtonGroup() + + # Set smaller font for radio buttons + radio_font = QFont('Arial', 9) + + cloud_radio = QRadioButton("Cloud API") + cloud_radio.setFont(radio_font) + cloud_radio.setChecked(self.inpaint_method_value == 'cloud') + cloud_radio.toggled.connect(lambda checked: self._on_inpaint_method_change() if checked else None) + self.inpaint_method_group.addButton(cloud_radio, 0) + method_selection_layout.addWidget(cloud_radio) + + local_radio = QRadioButton("Local Model") + local_radio.setFont(radio_font) + local_radio.setChecked(self.inpaint_method_value == 'local') + local_radio.toggled.connect(lambda checked: self._on_inpaint_method_change() if checked else None) + self.inpaint_method_group.addButton(local_radio, 1) + method_selection_layout.addWidget(local_radio) + + hybrid_radio = QRadioButton("Hybrid") + hybrid_radio.setFont(radio_font) + hybrid_radio.setChecked(self.inpaint_method_value == 'hybrid') + hybrid_radio.toggled.connect(lambda checked: self._on_inpaint_method_change() if checked else None) + self.inpaint_method_group.addButton(hybrid_radio, 2) + method_selection_layout.addWidget(hybrid_radio) + + # Store references to radio buttons + self.cloud_radio = cloud_radio + self.local_radio = local_radio + self.hybrid_radio = hybrid_radio + + inpaint_method_layout.addWidget(method_selection_frame) + inpaint_method_layout.addStretch() + inpaint_group_layout.addWidget(self.inpaint_method_frame) + + # Cloud settings frame + self.cloud_inpaint_frame = QWidget() + # Ensure this widget doesn't become a window + self.cloud_inpaint_frame.setWindowFlags(Qt.WindowType.Widget) + cloud_inpaint_layout = QVBoxLayout(self.cloud_inpaint_frame) + cloud_inpaint_layout.setContentsMargins(0, 0, 0, 0) + cloud_inpaint_layout.setSpacing(5) + + # Quality selection for cloud + quality_frame = QWidget() + quality_layout = QHBoxLayout(quality_frame) + quality_layout.setContentsMargins(0, 0, 0, 0) + quality_layout.setSpacing(10) + + quality_label = QLabel("Cloud Quality:") + quality_label_font = QFont('Arial', 9) + quality_label.setFont(quality_label_font) + quality_label.setMinimumWidth(95) + quality_label.setAlignment(Qt.AlignLeft) + quality_layout.addWidget(quality_label) + + # inpaint_quality_value is already loaded from config in _load_rendering_settings + self.quality_button_group = QButtonGroup() + + quality_options = [('high', 'High Quality'), ('fast', 'Fast')] + for idx, (value, text) in enumerate(quality_options): + quality_radio = QRadioButton(text) + quality_radio.setChecked(self.inpaint_quality_value == value) + quality_radio.toggled.connect(lambda checked, v=value: self._save_rendering_settings() if checked else None) + self.quality_button_group.addButton(quality_radio, idx) + quality_layout.addWidget(quality_radio) + + quality_layout.addStretch() + cloud_inpaint_layout.addWidget(quality_frame) + + # Conditional separator + self.inpaint_separator = QFrame() + self.inpaint_separator.setFrameShape(QFrame.HLine) + self.inpaint_separator.setFrameShadow(QFrame.Sunken) + if not self.skip_inpainting_value: + cloud_inpaint_layout.addWidget(self.inpaint_separator) + + # Cloud API status + api_status_frame = QWidget() + api_status_layout = QHBoxLayout(api_status_frame) + api_status_layout.setContentsMargins(0, 10, 0, 0) + api_status_layout.setSpacing(10) + + # Check if API key exists + saved_api_key = self.main_gui.config.get('replicate_api_key', '') + if saved_api_key: + status_text = "✅ Cloud API configured" + status_color = 'green' + else: + status_text = "❌ Cloud API not configured" + status_color = 'red' + + self.inpaint_api_status_label = QLabel(status_text) + api_status_font = QFont('Arial', 9) + self.inpaint_api_status_label.setFont(api_status_font) + self.inpaint_api_status_label.setStyleSheet(f"color: {status_color};") + api_status_layout.addWidget(self.inpaint_api_status_label) + + configure_api_btn = QPushButton("Configure API Key") + configure_api_btn.clicked.connect(self._configure_inpaint_api) + configure_api_btn.setStyleSheet("QPushButton { background-color: #17a2b8; color: white; padding: 5px 15px; }") + api_status_layout.addWidget(configure_api_btn) + + if saved_api_key: + clear_api_btn = QPushButton("Clear") + clear_api_btn.clicked.connect(self._clear_inpaint_api) + clear_api_btn.setStyleSheet("QPushButton { background-color: #6c757d; color: white; padding: 5px 15px; }") + api_status_layout.addWidget(clear_api_btn) + + api_status_layout.addStretch() + cloud_inpaint_layout.addWidget(api_status_frame) + inpaint_group_layout.addWidget(self.cloud_inpaint_frame) + + # Local inpainting settings frame + self.local_inpaint_frame = QWidget() + # Ensure this widget doesn't become a window + self.local_inpaint_frame.setWindowFlags(Qt.WindowType.Widget) + local_inpaint_layout = QVBoxLayout(self.local_inpaint_frame) + local_inpaint_layout.setContentsMargins(0, 0, 0, 0) + local_inpaint_layout.setSpacing(5) + + # Local model selection + local_model_frame = QWidget() + local_model_layout = QHBoxLayout(local_model_frame) + local_model_layout.setContentsMargins(0, 0, 0, 0) + local_model_layout.setSpacing(10) + + local_model_label = QLabel("Local Model:") + local_model_label_font = QFont('Arial', 9) + local_model_label.setFont(local_model_label_font) + local_model_label.setMinimumWidth(95) + local_model_label.setAlignment(Qt.AlignLeft) + local_model_layout.addWidget(local_model_label) + self.local_model_label = local_model_label + + self.local_model_type_value = self.main_gui.config.get('manga_local_inpaint_model', 'anime_onnx') + local_model_combo = QComboBox() + local_model_combo.addItems(['aot', 'aot_onnx', 'lama', 'lama_onnx', 'anime', 'anime_onnx', 'mat', 'ollama', 'sd_local']) + local_model_combo.setCurrentText(self.local_model_type_value) + local_model_combo.setMinimumWidth(120) + local_model_combo.setMaximumWidth(120) + local_combo_font = QFont('Arial', 9) + local_model_combo.setFont(local_combo_font) + local_model_combo.currentTextChanged.connect(self._on_local_model_change) + self._disable_combobox_mousewheel(local_model_combo) # Disable mousewheel scrolling + local_model_layout.addWidget(local_model_combo) + self.local_model_combo = local_model_combo + + # Model descriptions + model_desc = { + 'lama': 'LaMa (Best quality)', + 'aot': 'AOT GAN (Fast)', + 'aot_onnx': 'AOT ONNX (Optimized)', + 'mat': 'MAT (High-res)', + 'sd_local': 'Stable Diffusion (Anime)', + 'anime': 'Anime/Manga Inpainting', + 'anime_onnx': 'Anime ONNX (Fast/Optimized)', + 'lama_onnx': 'LaMa ONNX (Optimized)', + } + self.model_desc_label = QLabel(model_desc.get(self.local_model_type_value, '')) + desc_font = QFont('Arial', 8) + self.model_desc_label.setFont(desc_font) + self.model_desc_label.setStyleSheet("color: gray;") + self.model_desc_label.setMaximumWidth(200) + local_model_layout.addWidget(self.model_desc_label) + local_model_layout.addStretch() + + local_inpaint_layout.addWidget(local_model_frame) + + # Model file selection + model_path_frame = QWidget() + model_path_layout = QHBoxLayout(model_path_frame) + model_path_layout.setContentsMargins(0, 5, 0, 0) + model_path_layout.setSpacing(10) + + model_file_label = QLabel("Model File:") + model_file_label_font = QFont('Arial', 9) + model_file_label.setFont(model_file_label_font) + model_file_label.setMinimumWidth(95) + model_file_label.setAlignment(Qt.AlignLeft) + model_path_layout.addWidget(model_file_label) + self.model_file_label = model_file_label + + self.local_model_path_value = self.main_gui.config.get(f'manga_{self.local_model_type_value}_model_path', '') + self.local_model_entry = QLineEdit(self.local_model_path_value) + self.local_model_entry.setReadOnly(True) + self.local_model_entry.setMinimumWidth(100) # Reduced for better fit + self.local_model_entry.setSizePolicy(QSizePolicy.Policy.Expanding, QSizePolicy.Policy.Fixed) + self.local_model_entry.setStyleSheet( + "QLineEdit { background-color: #2b2b2b; color: #ffffff; }" + ) + model_path_layout.addWidget(self.local_model_entry) + + browse_model_btn = QPushButton("Browse") + browse_model_btn.clicked.connect(self._browse_local_model) + browse_model_btn.setStyleSheet("QPushButton { background-color: #007bff; color: white; padding: 5px 15px; }") + model_path_layout.addWidget(browse_model_btn) + self.browse_model_btn = browse_model_btn + + # Manual load button to avoid auto-loading on dialog open + load_model_btn = QPushButton("Load") + load_model_btn.clicked.connect(self._click_load_local_model) + load_model_btn.setStyleSheet("QPushButton { background-color: #28a745; color: white; padding: 5px 15px; }") + model_path_layout.addWidget(load_model_btn) + self.load_model_btn = load_model_btn + model_path_layout.addStretch() + + local_inpaint_layout.addWidget(model_path_frame) + + # Model status + self.local_model_status_label = QLabel("") + status_font = QFont('Arial', 9) + self.local_model_status_label.setFont(status_font) + local_inpaint_layout.addWidget(self.local_model_status_label) + + # Download model button + download_model_btn = QPushButton("📥 Download Model") + download_model_btn.clicked.connect(self._download_model) + download_model_btn.setStyleSheet("QPushButton { background-color: #17a2b8; color: white; padding: 5px 15px; }") + local_inpaint_layout.addWidget(download_model_btn) + + # Model info button + model_info_btn = QPushButton("ℹ️ Model Info") + model_info_btn.clicked.connect(self._show_model_info) + model_info_btn.setStyleSheet("QPushButton { background-color: #6c757d; color: white; padding: 5px 15px; }") + local_inpaint_layout.addWidget(model_info_btn) + + # Add local_inpaint_frame to inpaint_group + inpaint_group_layout.addWidget(self.local_inpaint_frame) + + # Hide both frames by default to prevent window popup + self.cloud_inpaint_frame.hide() + self.local_inpaint_frame.hide() + + # Try to load saved model for current type on dialog open + initial_model_type = self.local_model_type_value + initial_model_path = self.main_gui.config.get(f'manga_{initial_model_type}_model_path', '') + + if initial_model_path and os.path.exists(initial_model_path): + self.local_model_entry.setText(initial_model_path) + if getattr(self, 'preload_local_models_on_open', False): + self.local_model_status_label.setText("⏳ Loading saved model...") + self.local_model_status_label.setStyleSheet("color: orange;") + # Auto-load after dialog is ready + QTimer.singleShot(500, lambda: self._try_load_model(initial_model_type, initial_model_path)) + else: + # Do not auto-load large models at startup to avoid crashes on some systems + self.local_model_status_label.setText("💤 Saved model detected (not loaded). Click 'Load' to initialize.") + self.local_model_status_label.setStyleSheet("color: blue;") + else: + self.local_model_status_label.setText("No model loaded") + self.local_model_status_label.setStyleSheet("color: gray;") + + # Initialize visibility based on current settings + self._toggle_inpaint_visibility() + + # Add inpaint_group to render_frame + render_frame_layout.addWidget(inpaint_group) + + # Add render_frame (inpainting only) to LEFT COLUMN + left_column_layout.addWidget(render_frame) + + # Advanced Settings button at the TOP OF RIGHT COLUMN + advanced_button_frame = QWidget() + advanced_button_layout = QHBoxLayout(advanced_button_frame) + advanced_button_layout.setContentsMargins(0, 0, 0, 10) + advanced_button_layout.setSpacing(10) + + advanced_settings_desc = QLabel("Configure OCR, preprocessing, and performance options") + desc_font = QFont("Arial", 9) + advanced_settings_desc.setFont(desc_font) + advanced_settings_desc.setStyleSheet("color: gray;") + advanced_button_layout.addWidget(advanced_settings_desc) + + advanced_button_layout.addStretch() + + advanced_settings_btn = QPushButton("⚙️ Advanced Settings") + advanced_settings_btn.clicked.connect(self._open_advanced_settings) + advanced_settings_btn.setStyleSheet("QPushButton { background-color: #17a2b8; color: white; padding: 5px 15px; }") + advanced_button_layout.addWidget(advanced_settings_btn) + + right_column_layout.addWidget(advanced_button_frame) + + # Background Settings - MOVED TO RIGHT COLUMN + self.bg_settings_frame = QGroupBox("Background Settings") + bg_settings_font = QFont("Arial", 10) + bg_settings_font.setBold(True) + self.bg_settings_frame.setFont(bg_settings_font) + bg_settings_layout = QVBoxLayout(self.bg_settings_frame) + bg_settings_layout.setContentsMargins(10, 10, 10, 10) + bg_settings_layout.setSpacing(8) + + # Free text only background opacity toggle (applies BG opacity only to free-text regions) + self.ft_only_checkbox = self._create_styled_checkbox("Free text only background opacity") + self.ft_only_checkbox.setChecked(self.free_text_only_bg_opacity_value) + # Connect directly to save+apply (working pattern) + self.ft_only_checkbox.stateChanged.connect(lambda: (self._on_ft_only_bg_opacity_changed(), self._save_rendering_settings(), self._apply_rendering_settings())) + bg_settings_layout.addWidget(self.ft_only_checkbox) + + # Background opacity slider + opacity_frame = QWidget() + opacity_layout = QHBoxLayout(opacity_frame) + opacity_layout.setContentsMargins(0, 5, 0, 5) + opacity_layout.setSpacing(10) + + opacity_label_text = QLabel("Background Opacity:") + opacity_label_text.setMinimumWidth(150) + opacity_layout.addWidget(opacity_label_text) + + self.opacity_slider = QSlider(Qt.Horizontal) + self.opacity_slider.setMinimum(0) + self.opacity_slider.setMaximum(255) + self.opacity_slider.setValue(self.bg_opacity_value) + self.opacity_slider.setMinimumWidth(200) + self.opacity_slider.valueChanged.connect(lambda value: (self._update_opacity_label(value), self._save_rendering_settings(), self._apply_rendering_settings())) + opacity_layout.addWidget(self.opacity_slider) + + self.opacity_label = QLabel("100%") + self.opacity_label.setMinimumWidth(50) + opacity_layout.addWidget(self.opacity_label) + opacity_layout.addStretch() + + bg_settings_layout.addWidget(opacity_frame) + + # Initialize the label with the loaded value + self._update_opacity_label(self.bg_opacity_value) + + # Background size reduction + reduction_frame = QWidget() + reduction_layout = QHBoxLayout(reduction_frame) + reduction_layout.setContentsMargins(0, 5, 0, 5) + reduction_layout.setSpacing(10) + + reduction_label_text = QLabel("Background Size:") + reduction_label_text.setMinimumWidth(150) + reduction_layout.addWidget(reduction_label_text) + + self.reduction_slider = QDoubleSpinBox() + self.reduction_slider.setMinimum(0.5) + self.reduction_slider.setMaximum(2.0) + self.reduction_slider.setSingleStep(0.05) + self.reduction_slider.setValue(self.bg_reduction_value) + self.reduction_slider.setMinimumWidth(100) + self.reduction_slider.valueChanged.connect(lambda value: (self._update_reduction_label(value), self._save_rendering_settings(), self._apply_rendering_settings())) + self._disable_spinbox_mousewheel(self.reduction_slider) + reduction_layout.addWidget(self.reduction_slider) + + self.reduction_label = QLabel("100%") + self.reduction_label.setMinimumWidth(50) + reduction_layout.addWidget(self.reduction_label) + reduction_layout.addStretch() + + bg_settings_layout.addWidget(reduction_frame) + + # Initialize the label with the loaded value + self._update_reduction_label(self.bg_reduction_value) + + # Background style selection + style_frame = QWidget() + style_layout = QHBoxLayout(style_frame) + style_layout.setContentsMargins(0, 5, 0, 5) + style_layout.setSpacing(10) + + style_label = QLabel("Background Style:") + style_label.setMinimumWidth(150) + style_layout.addWidget(style_label) + + # Radio buttons for background style + self.bg_style_group = QButtonGroup() + + box_radio = QRadioButton("Box") + box_radio.setChecked(self.bg_style_value == "box") + box_radio.toggled.connect(lambda checked: (setattr(self, 'bg_style_value', 'box'), self._save_rendering_settings(), self._apply_rendering_settings()) if checked else None) + self.bg_style_group.addButton(box_radio, 0) + style_layout.addWidget(box_radio) + + circle_radio = QRadioButton("Circle") + circle_radio.setChecked(self.bg_style_value == "circle") + circle_radio.toggled.connect(lambda checked: (setattr(self, 'bg_style_value', 'circle'), self._save_rendering_settings(), self._apply_rendering_settings()) if checked else None) + self.bg_style_group.addButton(circle_radio, 1) + style_layout.addWidget(circle_radio) + + wrap_radio = QRadioButton("Wrap") + wrap_radio.setChecked(self.bg_style_value == "wrap") + wrap_radio.toggled.connect(lambda checked: (setattr(self, 'bg_style_value', 'wrap'), self._save_rendering_settings(), self._apply_rendering_settings()) if checked else None) + self.bg_style_group.addButton(wrap_radio, 2) + style_layout.addWidget(wrap_radio) + + # Store references + self.box_radio = box_radio + self.circle_radio = circle_radio + self.wrap_radio = wrap_radio + + # Add tooltips or descriptions + style_help = QLabel("(Box: rounded rectangle, Circle: ellipse, Wrap: per-line)") + style_help_font = QFont('Arial', 9) + style_help.setFont(style_help_font) + style_help.setStyleSheet("color: gray;") + style_layout.addWidget(style_help) + style_layout.addStretch() + + bg_settings_layout.addWidget(style_frame) + + # Add Background Settings to RIGHT COLUMN + right_column_layout.addWidget(self.bg_settings_frame) + + # Font Settings group (consolidated) - GOES TO RIGHT COLUMN (after background settings) + font_render_frame = QGroupBox("Font & Text Settings") + font_render_frame_font = QFont("Arial", 10) + font_render_frame_font.setBold(True) + font_render_frame.setFont(font_render_frame_font) + font_render_frame_layout = QVBoxLayout(font_render_frame) + font_render_frame_layout.setContentsMargins(15, 15, 15, 10) + font_render_frame_layout.setSpacing(10) + self.sizing_group = QGroupBox("Font Settings") + sizing_group_font = QFont("Arial", 9) + sizing_group_font.setBold(True) + self.sizing_group.setFont(sizing_group_font) + sizing_group_layout = QVBoxLayout(self.sizing_group) + sizing_group_layout.setContentsMargins(10, 10, 10, 10) + sizing_group_layout.setSpacing(8) + + # Font sizing algorithm selection + algo_frame = QWidget() + algo_layout = QHBoxLayout(algo_frame) + algo_layout.setContentsMargins(0, 6, 0, 0) + algo_layout.setSpacing(10) + + algo_label = QLabel("Font Size Algorithm:") + algo_label.setMinimumWidth(150) + algo_layout.addWidget(algo_label) + + # Radio buttons for algorithm selection + self.font_algorithm_group = QButtonGroup() + + for idx, (value, text) in enumerate([ + ('conservative', 'Conservative'), + ('smart', 'Smart'), + ('aggressive', 'Aggressive') + ]): + rb = QRadioButton(text) + rb.setChecked(self.font_algorithm_value == value) + rb.toggled.connect(lambda checked, v=value: (setattr(self, 'font_algorithm_value', v), self._save_rendering_settings(), self._apply_rendering_settings()) if checked else None) + self.font_algorithm_group.addButton(rb, idx) + algo_layout.addWidget(rb) + + algo_layout.addStretch() + sizing_group_layout.addWidget(algo_frame) + + # Font size selection with mode toggle + font_frame_container = QWidget() + font_frame_layout = QVBoxLayout(font_frame_container) + font_frame_layout.setContentsMargins(0, 5, 0, 5) + font_frame_layout.setSpacing(10) + + # Mode selection frame + mode_frame = QWidget() + mode_layout = QHBoxLayout(mode_frame) + mode_layout.setContentsMargins(0, 0, 0, 0) + mode_layout.setSpacing(10) + + mode_label = QLabel("Font Size Mode:") + mode_label.setMinimumWidth(150) + mode_layout.addWidget(mode_label) + + # Radio buttons for mode selection + self.font_size_mode_group = QButtonGroup() + + auto_radio = QRadioButton("Auto") + auto_radio.setChecked(self.font_size_mode_value == "auto") + auto_radio.toggled.connect(lambda checked: (setattr(self, 'font_size_mode_value', 'auto'), self._toggle_font_size_mode()) if checked else None) + self.font_size_mode_group.addButton(auto_radio, 0) + mode_layout.addWidget(auto_radio) + + fixed_radio = QRadioButton("Fixed Size") + fixed_radio.setChecked(self.font_size_mode_value == "fixed") + fixed_radio.toggled.connect(lambda checked: (setattr(self, 'font_size_mode_value', 'fixed'), self._toggle_font_size_mode()) if checked else None) + self.font_size_mode_group.addButton(fixed_radio, 1) + mode_layout.addWidget(fixed_radio) + + multiplier_radio = QRadioButton("Dynamic Multiplier") + multiplier_radio.setChecked(self.font_size_mode_value == "multiplier") + multiplier_radio.toggled.connect(lambda checked: (setattr(self, 'font_size_mode_value', 'multiplier'), self._toggle_font_size_mode()) if checked else None) + self.font_size_mode_group.addButton(multiplier_radio, 2) + mode_layout.addWidget(multiplier_radio) + + # Store references + self.auto_mode_radio = auto_radio + self.fixed_mode_radio = fixed_radio + self.multiplier_mode_radio = multiplier_radio + + mode_layout.addStretch() + font_frame_layout.addWidget(mode_frame) + + # Fixed font size frame + self.fixed_size_frame = QWidget() + fixed_size_layout = QHBoxLayout(self.fixed_size_frame) + fixed_size_layout.setContentsMargins(0, 8, 0, 0) + fixed_size_layout.setSpacing(10) + + fixed_size_label = QLabel("Font Size:") + fixed_size_label.setMinimumWidth(150) + fixed_size_layout.addWidget(fixed_size_label) + + self.font_size_spinbox = QSpinBox() + self.font_size_spinbox.setMinimum(0) + self.font_size_spinbox.setMaximum(72) + self.font_size_spinbox.setValue(self.font_size_value) + self.font_size_spinbox.setMinimumWidth(100) + self.font_size_spinbox.valueChanged.connect(lambda value: (setattr(self, 'font_size_value', value), self._save_rendering_settings(), self._apply_rendering_settings())) + self._disable_spinbox_mousewheel(self.font_size_spinbox) + fixed_size_layout.addWidget(self.font_size_spinbox) + + fixed_help_label = QLabel("(0 = Auto)") + fixed_help_font = QFont('Arial', 9) + fixed_help_label.setFont(fixed_help_font) + fixed_help_label.setStyleSheet("color: gray;") + fixed_size_layout.addWidget(fixed_help_label) + fixed_size_layout.addStretch() + + font_frame_layout.addWidget(self.fixed_size_frame) + + # Dynamic multiplier frame + self.multiplier_frame = QWidget() + multiplier_layout = QHBoxLayout(self.multiplier_frame) + multiplier_layout.setContentsMargins(0, 8, 0, 0) + multiplier_layout.setSpacing(10) + + multiplier_label_text = QLabel("Size Multiplier:") + multiplier_label_text.setMinimumWidth(150) + multiplier_layout.addWidget(multiplier_label_text) + + self.multiplier_slider = QDoubleSpinBox() + self.multiplier_slider.setMinimum(0.5) + self.multiplier_slider.setMaximum(2.0) + self.multiplier_slider.setSingleStep(0.1) + self.multiplier_slider.setValue(self.font_size_multiplier_value) + self.multiplier_slider.setMinimumWidth(100) + self.multiplier_slider.valueChanged.connect(lambda value: (self._update_multiplier_label(value), self._save_rendering_settings(), self._apply_rendering_settings())) + self._disable_spinbox_mousewheel(self.multiplier_slider) + multiplier_layout.addWidget(self.multiplier_slider) + + self.multiplier_label = QLabel("1.0x") + self.multiplier_label.setMinimumWidth(50) + multiplier_layout.addWidget(self.multiplier_label) + + multiplier_help_label = QLabel("(Scales with panel size)") + multiplier_help_font = QFont('Arial', 9) + multiplier_help_label.setFont(multiplier_help_font) + multiplier_help_label.setStyleSheet("color: gray;") + multiplier_layout.addWidget(multiplier_help_label) + multiplier_layout.addStretch() + + font_frame_layout.addWidget(self.multiplier_frame) + + # Constraint checkbox frame (only visible in multiplier mode) + self.constraint_frame = QWidget() + constraint_layout = QHBoxLayout(self.constraint_frame) + constraint_layout.setContentsMargins(20, 0, 0, 0) + constraint_layout.setSpacing(10) + + self.constrain_checkbox = self._create_styled_checkbox("Constrain text to bubble boundaries") + self.constrain_checkbox.setChecked(self.constrain_to_bubble_value) + self.constrain_checkbox.stateChanged.connect(lambda: (setattr(self, 'constrain_to_bubble_value', self.constrain_checkbox.isChecked()), self._save_rendering_settings(), self._apply_rendering_settings())) + constraint_layout.addWidget(self.constrain_checkbox) + + constraint_help_label = QLabel("(Unchecked allows text to exceed bubbles)") + constraint_help_font = QFont('Arial', 9) + constraint_help_label.setFont(constraint_help_font) + constraint_help_label.setStyleSheet("color: gray;") + constraint_layout.addWidget(constraint_help_label) + constraint_layout.addStretch() + + font_frame_layout.addWidget(self.constraint_frame) + + # Add font_frame_container to sizing_group_layout + sizing_group_layout.addWidget(font_frame_container) + + # Minimum Font Size (Auto mode lower bound) + self.min_size_frame = QWidget() + min_size_layout = QHBoxLayout(self.min_size_frame) + min_size_layout.setContentsMargins(0, 5, 0, 5) + min_size_layout.setSpacing(10) + + min_size_label = QLabel("Minimum Font Size:") + min_size_label.setMinimumWidth(150) + min_size_layout.addWidget(min_size_label) + + self.min_size_spinbox = QSpinBox() + self.min_size_spinbox.setMinimum(8) + self.min_size_spinbox.setMaximum(20) + self.min_size_spinbox.setValue(self.auto_min_size_value) + self.min_size_spinbox.setMinimumWidth(100) + self.min_size_spinbox.valueChanged.connect(lambda value: (setattr(self, 'auto_min_size_value', value), self._save_rendering_settings(), self._apply_rendering_settings())) + self._disable_spinbox_mousewheel(self.min_size_spinbox) + min_size_layout.addWidget(self.min_size_spinbox) + + min_help_label = QLabel("(Auto mode won't go below this)") + min_help_font = QFont('Arial', 9) + min_help_label.setFont(min_help_font) + min_help_label.setStyleSheet("color: gray;") + min_size_layout.addWidget(min_help_label) + min_size_layout.addStretch() + + sizing_group_layout.addWidget(self.min_size_frame) + + # Maximum Font Size (Auto mode upper bound) + self.max_size_frame = QWidget() + max_size_layout = QHBoxLayout(self.max_size_frame) + max_size_layout.setContentsMargins(0, 5, 0, 5) + max_size_layout.setSpacing(10) + + max_size_label = QLabel("Maximum Font Size:") + max_size_label.setMinimumWidth(150) + max_size_layout.addWidget(max_size_label) + + self.max_size_spinbox = QSpinBox() + self.max_size_spinbox.setMinimum(20) + self.max_size_spinbox.setMaximum(100) + self.max_size_spinbox.setValue(self.max_font_size_value) + self.max_size_spinbox.setMinimumWidth(100) + self.max_size_spinbox.valueChanged.connect(lambda value: (setattr(self, 'max_font_size_value', value), self._save_rendering_settings(), self._apply_rendering_settings())) + self._disable_spinbox_mousewheel(self.max_size_spinbox) + max_size_layout.addWidget(self.max_size_spinbox) + + max_help_label = QLabel("(Limits maximum text size)") + max_help_font = QFont('Arial', 9) + max_help_label.setFont(max_help_font) + max_help_label.setStyleSheet("color: gray;") + max_size_layout.addWidget(max_help_label) + max_size_layout.addStretch() + + sizing_group_layout.addWidget(self.max_size_frame) + + # Initialize visibility AFTER all frames are created + self._toggle_font_size_mode() + + # Auto Fit Style (applies to Auto mode) + fit_row = QWidget() + fit_layout = QHBoxLayout(fit_row) + fit_layout.setContentsMargins(0, 0, 0, 6) + fit_layout.setSpacing(10) + + fit_label = QLabel("Auto Fit Style:") + fit_label.setMinimumWidth(110) + fit_layout.addWidget(fit_label) + + # Radio buttons for auto fit style + self.auto_fit_style_group = QButtonGroup() + + for idx, (value, text) in enumerate([('compact','Compact'), ('balanced','Balanced'), ('readable','Readable')]): + rb = QRadioButton(text) + rb.setChecked(self.auto_fit_style_value == value) + rb.toggled.connect(lambda checked, v=value: (setattr(self, 'auto_fit_style_value', v), self._save_rendering_settings(), self._apply_rendering_settings()) if checked else None) + self.auto_fit_style_group.addButton(rb, idx) + fit_layout.addWidget(rb) + + fit_layout.addStretch() + sizing_group_layout.addWidget(fit_row) + + # Behavior toggles + self.prefer_larger_checkbox = self._create_styled_checkbox("Prefer larger text") + self.prefer_larger_checkbox.setChecked(self.prefer_larger_value) + self.prefer_larger_checkbox.stateChanged.connect(lambda: (setattr(self, 'prefer_larger_value', self.prefer_larger_checkbox.isChecked()), self._save_rendering_settings(), self._apply_rendering_settings())) + sizing_group_layout.addWidget(self.prefer_larger_checkbox) + + self.bubble_size_factor_checkbox = self._create_styled_checkbox("Scale with bubble size") + self.bubble_size_factor_checkbox.setChecked(self.bubble_size_factor_value) + self.bubble_size_factor_checkbox.stateChanged.connect(lambda: (setattr(self, 'bubble_size_factor_value', self.bubble_size_factor_checkbox.isChecked()), self._save_rendering_settings(), self._apply_rendering_settings())) + sizing_group_layout.addWidget(self.bubble_size_factor_checkbox) + + # Line Spacing row with live value label + row_ls = QWidget() + ls_layout = QHBoxLayout(row_ls) + ls_layout.setContentsMargins(0, 6, 0, 2) + ls_layout.setSpacing(10) + + ls_label = QLabel("Line Spacing:") + ls_label.setMinimumWidth(110) + ls_layout.addWidget(ls_label) + + self.line_spacing_spinbox = QDoubleSpinBox() + self.line_spacing_spinbox.setMinimum(1.0) + self.line_spacing_spinbox.setMaximum(2.0) + self.line_spacing_spinbox.setSingleStep(0.01) + self.line_spacing_spinbox.setValue(self.line_spacing_value) + self.line_spacing_spinbox.setMinimumWidth(100) + self.line_spacing_spinbox.valueChanged.connect(lambda value: (self._on_line_spacing_changed(value), self._save_rendering_settings(), self._apply_rendering_settings())) + self._disable_spinbox_mousewheel(self.line_spacing_spinbox) + ls_layout.addWidget(self.line_spacing_spinbox) + + self.line_spacing_value_label = QLabel(f"{self.line_spacing_value:.2f}") + self.line_spacing_value_label.setMinimumWidth(50) + ls_layout.addWidget(self.line_spacing_value_label) + ls_layout.addStretch() + + sizing_group_layout.addWidget(row_ls) + + # Max Lines + row_ml = QWidget() + ml_layout = QHBoxLayout(row_ml) + ml_layout.setContentsMargins(0, 2, 0, 4) + ml_layout.setSpacing(10) + + ml_label = QLabel("Max Lines:") + ml_label.setMinimumWidth(110) + ml_layout.addWidget(ml_label) + + self.max_lines_spinbox = QSpinBox() + self.max_lines_spinbox.setMinimum(5) + self.max_lines_spinbox.setMaximum(20) + self.max_lines_spinbox.setValue(self.max_lines_value) + self.max_lines_spinbox.setMinimumWidth(100) + self.max_lines_spinbox.valueChanged.connect(lambda value: (setattr(self, 'max_lines_value', value), self._save_rendering_settings(), self._apply_rendering_settings())) + self._disable_spinbox_mousewheel(self.max_lines_spinbox) + ml_layout.addWidget(self.max_lines_spinbox) + ml_layout.addStretch() + + sizing_group_layout.addWidget(row_ml) + + # Quick Presets (horizontal) merged into sizing group + row_presets = QWidget() + presets_layout = QHBoxLayout(row_presets) + presets_layout.setContentsMargins(0, 6, 0, 2) + presets_layout.setSpacing(10) + + presets_label = QLabel("Quick Presets:") + presets_label.setMinimumWidth(110) + presets_layout.addWidget(presets_label) + + small_preset_btn = QPushButton("Small Bubbles") + small_preset_btn.setMinimumWidth(120) + small_preset_btn.clicked.connect(lambda: self._set_font_preset('small')) + presets_layout.addWidget(small_preset_btn) + + balanced_preset_btn = QPushButton("Balanced") + balanced_preset_btn.setMinimumWidth(120) + balanced_preset_btn.clicked.connect(lambda: self._set_font_preset('balanced')) + presets_layout.addWidget(balanced_preset_btn) + + large_preset_btn = QPushButton("Large Text") + large_preset_btn.setMinimumWidth(120) + large_preset_btn.clicked.connect(lambda: self._set_font_preset('large')) + presets_layout.addWidget(large_preset_btn) + + presets_layout.addStretch() + sizing_group_layout.addWidget(row_presets) + + # Text wrapping mode (moved into Font Settings) + wrap_frame = QWidget() + wrap_layout = QVBoxLayout(wrap_frame) + wrap_layout.setContentsMargins(0, 12, 0, 4) + wrap_layout.setSpacing(5) + + self.strict_wrap_checkbox = self._create_styled_checkbox("Strict text wrapping (force text to fit within bubbles)") + self.strict_wrap_checkbox.setChecked(self.strict_text_wrapping_value) + self.strict_wrap_checkbox.stateChanged.connect(lambda: (setattr(self, 'strict_text_wrapping_value', self.strict_wrap_checkbox.isChecked()), self._save_rendering_settings(), self._apply_rendering_settings())) + wrap_layout.addWidget(self.strict_wrap_checkbox) + + wrap_help_label = QLabel("(Break words with hyphens if needed)") + wrap_help_font = QFont('Arial', 9) + wrap_help_label.setFont(wrap_help_font) + wrap_help_label.setStyleSheet("color: gray; margin-left: 20px;") + wrap_layout.addWidget(wrap_help_label) + + # Force CAPS LOCK directly below strict wrapping + self.force_caps_checkbox = self._create_styled_checkbox("Force CAPS LOCK") + self.force_caps_checkbox.setChecked(self.force_caps_lock_value) + self.force_caps_checkbox.stateChanged.connect(lambda: (setattr(self, 'force_caps_lock_value', self.force_caps_checkbox.isChecked()), self._save_rendering_settings(), self._apply_rendering_settings())) + wrap_layout.addWidget(self.force_caps_checkbox) + + sizing_group_layout.addWidget(wrap_frame) + + # Update multiplier label with loaded value + self._update_multiplier_label(self.font_size_multiplier_value) + + # Add sizing_group to font_render_frame (right column) + font_render_frame_layout.addWidget(self.sizing_group) + + # Font style selection (moved into Font Settings) + font_style_frame = QWidget() + font_style_layout = QHBoxLayout(font_style_frame) + font_style_layout.setContentsMargins(0, 6, 0, 4) + font_style_layout.setSpacing(10) + + font_style_label = QLabel("Font Style:") + font_style_label.setMinimumWidth(110) + font_style_layout.addWidget(font_style_label) + + # Font style will be set from loaded config in _load_rendering_settings + self.font_combo = QComboBox() + self.font_combo.addItems(self._get_available_fonts()) + self.font_combo.setCurrentText(self.font_style_value) + self.font_combo.setMinimumWidth(120) # Reduced for better fit + self.font_combo.setSizePolicy(QSizePolicy.Policy.Expanding, QSizePolicy.Policy.Fixed) + self.font_combo.currentTextChanged.connect(lambda: (self._on_font_selected(), self._save_rendering_settings(), self._apply_rendering_settings())) + self._disable_combobox_mousewheel(self.font_combo) # Disable mousewheel scrolling + font_style_layout.addWidget(self.font_combo) + font_style_layout.addStretch() + + font_render_frame_layout.addWidget(font_style_frame) + + # Font color selection (moved into Font Settings) + color_frame = QWidget() + color_layout = QHBoxLayout(color_frame) + color_layout.setContentsMargins(0, 6, 0, 12) + color_layout.setSpacing(10) + + color_label = QLabel("Font Color:") + color_label.setMinimumWidth(110) + color_layout.addWidget(color_label) + + # Color preview frame + self.color_preview_frame = QFrame() + self.color_preview_frame.setFixedSize(40, 30) + self.color_preview_frame.setFrameShape(QFrame.Box) + self.color_preview_frame.setLineWidth(1) + # Initialize with current color + r, g, b = self.text_color_r_value, self.text_color_g_value, self.text_color_b_value + self.color_preview_frame.setStyleSheet(f"background-color: rgb({r},{g},{b}); border: 1px solid #5a9fd4;") + color_layout.addWidget(self.color_preview_frame) + + # RGB display label + r, g, b = self.text_color_r_value, self.text_color_g_value, self.text_color_b_value + self.rgb_label = QLabel(f"RGB({r},{g},{b})") + self.rgb_label.setMinimumWidth(100) + color_layout.addWidget(self.rgb_label) + + # Color picker button + def pick_font_color(): + # Get current color + current_color = QColor(self.text_color_r_value, self.text_color_g_value, self.text_color_b_value) + + # Open color dialog + color = QColorDialog.getColor(current_color, self.dialog, "Choose Font Color") + if color.isValid(): + # Update RGB values + self.text_color_r_value = color.red() + self.text_color_g_value = color.green() + self.text_color_b_value = color.blue() + # Update display + self.rgb_label.setText(f"RGB({color.red()},{color.green()},{color.blue()})") + self._update_color_preview(None) + # Save settings to config + self._save_rendering_settings() + + choose_color_btn = QPushButton("Choose Color") + choose_color_btn.clicked.connect(pick_font_color) + choose_color_btn.setStyleSheet("QPushButton { background-color: #17a2b8; color: white; padding: 5px 15px; }") + color_layout.addWidget(choose_color_btn) + color_layout.addStretch() + + font_render_frame_layout.addWidget(color_frame) + + self._update_color_preview(None) # Initialize with loaded colors + + # Text Shadow settings (moved into Font Settings) + shadow_header = QWidget() + shadow_header_layout = QHBoxLayout(shadow_header) + shadow_header_layout.setContentsMargins(0, 4, 0, 4) + + # Shadow enabled checkbox + self.shadow_enabled_checkbox = self._create_styled_checkbox("Enable Shadow") + self.shadow_enabled_checkbox.setChecked(self.shadow_enabled_value) + self.shadow_enabled_checkbox.stateChanged.connect(lambda: (setattr(self, 'shadow_enabled_value', self.shadow_enabled_checkbox.isChecked()), self._toggle_shadow_controls(), self._save_rendering_settings(), self._apply_rendering_settings())) + shadow_header_layout.addWidget(self.shadow_enabled_checkbox) + shadow_header_layout.addStretch() + + font_render_frame_layout.addWidget(shadow_header) + + # Shadow controls container + self.shadow_controls = QWidget() + shadow_controls_layout = QVBoxLayout(self.shadow_controls) + shadow_controls_layout.setContentsMargins(0, 2, 0, 6) + shadow_controls_layout.setSpacing(5) + + # Shadow color + shadow_color_frame = QWidget() + shadow_color_layout = QHBoxLayout(shadow_color_frame) + shadow_color_layout.setContentsMargins(0, 2, 0, 8) + shadow_color_layout.setSpacing(10) + + shadow_color_label = QLabel("Shadow Color:") + shadow_color_label.setMinimumWidth(110) + shadow_color_layout.addWidget(shadow_color_label) + + # Shadow color preview + self.shadow_preview_frame = QFrame() + self.shadow_preview_frame.setFixedSize(30, 25) + self.shadow_preview_frame.setFrameShape(QFrame.Box) + self.shadow_preview_frame.setLineWidth(1) + # Initialize with current color + sr, sg, sb = self.shadow_color_r_value, self.shadow_color_g_value, self.shadow_color_b_value + self.shadow_preview_frame.setStyleSheet(f"background-color: rgb({sr},{sg},{sb}); border: 1px solid #5a9fd4;") + shadow_color_layout.addWidget(self.shadow_preview_frame) + + # Shadow RGB display label + sr, sg, sb = self.shadow_color_r_value, self.shadow_color_g_value, self.shadow_color_b_value + self.shadow_rgb_label = QLabel(f"RGB({sr},{sg},{sb})") + self.shadow_rgb_label.setMinimumWidth(120) + shadow_color_layout.addWidget(self.shadow_rgb_label) + + # Shadow color picker button + def pick_shadow_color(): + # Get current color + current_color = QColor(self.shadow_color_r_value, self.shadow_color_g_value, self.shadow_color_b_value) + + # Open color dialog + color = QColorDialog.getColor(current_color, self.dialog, "Choose Shadow Color") + if color.isValid(): + # Update RGB values + self.shadow_color_r_value = color.red() + self.shadow_color_g_value = color.green() + self.shadow_color_b_value = color.blue() + # Update display + self.shadow_rgb_label.setText(f"RGB({color.red()},{color.green()},{color.blue()})") + self._update_shadow_preview(None) + # Save settings to config + self._save_rendering_settings() + + choose_shadow_btn = QPushButton("Choose Color") + choose_shadow_btn.setMinimumWidth(120) + choose_shadow_btn.clicked.connect(pick_shadow_color) + choose_shadow_btn.setStyleSheet("QPushButton { background-color: #17a2b8; color: white; padding: 5px 15px; }") + shadow_color_layout.addWidget(choose_shadow_btn) + shadow_color_layout.addStretch() + + shadow_controls_layout.addWidget(shadow_color_frame) + + self._update_shadow_preview(None) # Initialize with loaded colors + + # Shadow offset + offset_frame = QWidget() + offset_layout = QHBoxLayout(offset_frame) + offset_layout.setContentsMargins(0, 2, 0, 0) + offset_layout.setSpacing(10) + + offset_label = QLabel("Shadow Offset:") + offset_label.setMinimumWidth(110) + offset_layout.addWidget(offset_label) + + # X offset + x_label = QLabel("X:") + offset_layout.addWidget(x_label) + + self.shadow_offset_x_spinbox = QSpinBox() + self.shadow_offset_x_spinbox.setMinimum(-10) + self.shadow_offset_x_spinbox.setMaximum(10) + self.shadow_offset_x_spinbox.setValue(self.shadow_offset_x_value) + self.shadow_offset_x_spinbox.setMinimumWidth(60) + self.shadow_offset_x_spinbox.valueChanged.connect(lambda value: (setattr(self, 'shadow_offset_x_value', value), self._save_rendering_settings(), self._apply_rendering_settings())) + self._disable_spinbox_mousewheel(self.shadow_offset_x_spinbox) + offset_layout.addWidget(self.shadow_offset_x_spinbox) + + # Y offset + y_label = QLabel("Y:") + offset_layout.addWidget(y_label) + + self.shadow_offset_y_spinbox = QSpinBox() + self.shadow_offset_y_spinbox.setMinimum(-10) + self.shadow_offset_y_spinbox.setMaximum(10) + self.shadow_offset_y_spinbox.setValue(self.shadow_offset_y_value) + self.shadow_offset_y_spinbox.setMinimumWidth(60) + self.shadow_offset_y_spinbox.valueChanged.connect(lambda value: (setattr(self, 'shadow_offset_y_value', value), self._save_rendering_settings(), self._apply_rendering_settings())) + self._disable_spinbox_mousewheel(self.shadow_offset_y_spinbox) + offset_layout.addWidget(self.shadow_offset_y_spinbox) + offset_layout.addStretch() + + shadow_controls_layout.addWidget(offset_frame) + + # Shadow blur + blur_frame = QWidget() + blur_layout = QHBoxLayout(blur_frame) + blur_layout.setContentsMargins(0, 2, 0, 0) + blur_layout.setSpacing(10) + + blur_label = QLabel("Shadow Blur:") + blur_label.setMinimumWidth(110) + blur_layout.addWidget(blur_label) + + self.shadow_blur_spinbox = QSpinBox() + self.shadow_blur_spinbox.setMinimum(0) + self.shadow_blur_spinbox.setMaximum(10) + self.shadow_blur_spinbox.setValue(self.shadow_blur_value) + self.shadow_blur_spinbox.setMinimumWidth(100) + self.shadow_blur_spinbox.valueChanged.connect(lambda value: (self._on_shadow_blur_changed(value), self._save_rendering_settings(), self._apply_rendering_settings())) + self._disable_spinbox_mousewheel(self.shadow_blur_spinbox) + blur_layout.addWidget(self.shadow_blur_spinbox) + + # Shadow blur value label + self.shadow_blur_value_label = QLabel(f"{self.shadow_blur_value}") + self.shadow_blur_value_label.setMinimumWidth(30) + blur_layout.addWidget(self.shadow_blur_value_label) + + blur_help_label = QLabel("(0=sharp, 10=blurry)") + blur_help_font = QFont('Arial', 9) + blur_help_label.setFont(blur_help_font) + blur_help_label.setStyleSheet("color: gray;") + blur_layout.addWidget(blur_help_label) + blur_layout.addStretch() + + shadow_controls_layout.addWidget(blur_frame) + + # Add shadow_controls to font_render_frame_layout + font_render_frame_layout.addWidget(self.shadow_controls) + + # Initially disable shadow controls + self._toggle_shadow_controls() + + # Add font_render_frame to RIGHT COLUMN + right_column_layout.addWidget(font_render_frame) + + # Control buttons - IN LEFT COLUMN + # Check if ready based on selected provider + # Get API key from main GUI - handle both Tkinter and PySide6 + try: + if hasattr(self.main_gui.api_key_entry, 'text'): # PySide6 QLineEdit + has_api_key = bool(self.main_gui.api_key_entry.text().strip()) + elif hasattr(self.main_gui.api_key_entry, 'get'): # Tkinter Entry + has_api_key = bool(self.main_gui.api_key_entry.get().strip()) + else: + has_api_key = False + except: + has_api_key = False + + provider = self.ocr_provider_value + + # Determine readiness based on provider + if provider == 'google': + has_vision = os.path.exists(self.main_gui.config.get('google_vision_credentials', '')) + is_ready = has_api_key and has_vision + elif provider == 'azure': + has_azure = bool(self.main_gui.config.get('azure_vision_key', '')) + is_ready = has_api_key and has_azure + elif provider == 'custom-api': + is_ready = has_api_key # Only needs API key + else: + # Local providers (manga-ocr, easyocr, etc.) only need API key for translation + is_ready = has_api_key + + control_frame = QWidget() + control_layout = QVBoxLayout(control_frame) + control_layout.setContentsMargins(10, 15, 10, 10) + control_layout.setSpacing(15) + + self.start_button = QPushButton("▶ Start Translation") + self.start_button.clicked.connect(self._start_translation) + self.start_button.setEnabled(is_ready) + self.start_button.setMinimumHeight(90) # Increased from 80 to 90 + self.start_button.setStyleSheet( + "QPushButton { " + " background-color: #28a745; " + " color: white; " + " padding: 22px 30px; " + " font-size: 14pt; " + " font-weight: bold; " + " border-radius: 8px; " + "} " + "QPushButton:hover { background-color: #218838; } " + "QPushButton:disabled { " + " background-color: #2d2d2d; " + " color: #666666; " + "}" + ) + control_layout.addWidget(self.start_button) + + # Add tooltip to show why button is disabled + if not is_ready: + reasons = [] + if not has_api_key: + reasons.append("API key not configured") + if provider == 'google' and not os.path.exists(self.main_gui.config.get('google_vision_credentials', '')): + reasons.append("Google Vision credentials not set") + elif provider == 'azure' and not self.main_gui.config.get('azure_vision_key', ''): + reasons.append("Azure credentials not configured") + tooltip_text = "Cannot start: " + ", ".join(reasons) + self.start_button.setToolTip(tooltip_text) + + self.stop_button = QPushButton("⏹ Stop") + self.stop_button.clicked.connect(self._stop_translation) + self.stop_button.setEnabled(False) + self.stop_button.setMinimumHeight(90) # Increased from 80 to 90 + self.stop_button.setStyleSheet( + "QPushButton { " + " background-color: #dc3545; " + " color: white; " + " padding: 22px 30px; " + " font-size: 14pt; " + " font-weight: bold; " + " border-radius: 8px; " + "} " + "QPushButton:hover { background-color: #c82333; } " + "QPushButton:disabled { " + " background-color: #2d2d2d; " + " color: #999999; " + "}" + ) + control_layout.addWidget(self.stop_button) + + # Add control buttons to LEFT COLUMN + left_column_layout.addWidget(control_frame) + + # Add stretch to balance columns + left_column_layout.addStretch() + right_column_layout.addStretch() + + # Set size policies to make columns expand and shrink properly + left_column.setSizePolicy(QSizePolicy.Policy.Preferred, QSizePolicy.Policy.Preferred) + right_column.setSizePolicy(QSizePolicy.Policy.Preferred, QSizePolicy.Policy.Preferred) + + # Set minimum widths for columns to allow shrinking + left_column.setMinimumWidth(300) + right_column.setMinimumWidth(300) + + # Add columns to container with equal stretch factors + columns_layout.addWidget(left_column, stretch=1) + columns_layout.addWidget(right_column, stretch=1) + + # Make the columns container itself have proper size policy + columns_container.setSizePolicy(QSizePolicy.Policy.Expanding, QSizePolicy.Policy.Preferred) + + # Add columns container to main layout + main_layout.addWidget(columns_container) + + # Progress frame + progress_frame = QGroupBox("Progress") + progress_frame_font = QFont('Arial', 10) + progress_frame_font.setBold(True) + progress_frame.setFont(progress_frame_font) + progress_frame_layout = QVBoxLayout(progress_frame) + progress_frame_layout.setContentsMargins(10, 10, 10, 8) + progress_frame_layout.setSpacing(6) + + # Overall progress + self.progress_label = QLabel("Ready to start") + progress_label_font = QFont('Arial', 9) + self.progress_label.setFont(progress_label_font) + self.progress_label.setStyleSheet("color: white;") + progress_frame_layout.addWidget(self.progress_label) + + # Create and configure progress bar + self.progress_bar = QProgressBar() + self.progress_bar.setMinimum(0) + self.progress_bar.setMaximum(100) + self.progress_bar.setValue(0) + self.progress_bar.setMinimumHeight(18) + self.progress_bar.setTextVisible(True) + self.progress_bar.setStyleSheet(""" + QProgressBar { + border: 1px solid #4a5568; + border-radius: 3px; + background-color: #2d3748; + text-align: center; + color: white; + } + QProgressBar::chunk { + background-color: white; + } + """) + progress_frame_layout.addWidget(self.progress_bar) + + # Current file status + self.current_file_label = QLabel("") + current_file_font = QFont('Arial', 10) + self.current_file_label.setFont(current_file_font) + self.current_file_label.setStyleSheet("color: lightgray;") + progress_frame_layout.addWidget(self.current_file_label) + + main_layout.addWidget(progress_frame) + + # Log frame + log_frame = QGroupBox("Translation Log") + log_frame_font = QFont('Arial', 10) + log_frame_font.setBold(True) + log_frame.setFont(log_frame_font) + log_frame_layout = QVBoxLayout(log_frame) + log_frame_layout.setContentsMargins(10, 10, 10, 8) + log_frame_layout.setSpacing(6) + + # Log text widget (QTextEdit handles scrolling automatically) + self.log_text = QTextEdit() + self.log_text.setReadOnly(True) + self.log_text.setMinimumHeight(600) # Increased from 400 to 600 for better visibility + self.log_text.setStyleSheet(""" + QTextEdit { + background-color: #1e1e1e; + color: white; + font-family: 'Consolas', 'Courier New', monospace; + font-size: 10pt; + border: 1px solid #4a5568; + } + """) + log_frame_layout.addWidget(self.log_text) + + main_layout.addWidget(log_frame) + + # Restore persistent log from previous sessions + self._restore_persistent_log() + + def _restore_persistent_log(self): + """Restore log messages from persistent storage""" + try: + with MangaTranslationTab._persistent_log_lock: + if MangaTranslationTab._persistent_log: + # PySide6 QTextEdit + color_map = { + 'info': 'white', + 'success': 'green', + 'warning': 'orange', + 'error': 'red', + 'debug': 'lightblue' + } + for message, level in MangaTranslationTab._persistent_log: + color = color_map.get(level, 'white') + self.log_text.setTextColor(QColor(color)) + self.log_text.append(message) + except Exception as e: + print(f"Failed to restore persistent log: {e}") + + def _show_help_dialog(self, title: str, message: str): + """Show a help dialog with the given title and message""" + # Create a PySide6 dialog + help_dialog = QDialog(self.dialog) + help_dialog.setWindowTitle(title) + help_dialog.resize(500, 400) + help_dialog.setModal(True) + + # Main layout + main_layout = QVBoxLayout(help_dialog) + main_layout.setContentsMargins(20, 20, 20, 20) + main_layout.setSpacing(10) + + # Icon and title + title_frame = QWidget() + title_layout = QHBoxLayout(title_frame) + title_layout.setContentsMargins(0, 0, 0, 10) + + icon_label = QLabel("ℹ️") + icon_font = QFont('Arial', 20) + icon_label.setFont(icon_font) + title_layout.addWidget(icon_label) + + title_label = QLabel(title) + title_font = QFont('Arial', 12) + title_font.setBold(True) + title_label.setFont(title_font) + title_layout.addWidget(title_label) + title_layout.addStretch() + + main_layout.addWidget(title_frame) + + # Help text in a scrollable text widget + text_widget = QTextEdit() + text_widget.setReadOnly(True) + text_widget.setPlainText(message) + text_font = QFont('Arial', 10) + text_widget.setFont(text_font) + main_layout.addWidget(text_widget) + + # Close button + close_btn = QPushButton("Close") + close_btn.clicked.connect(help_dialog.accept) + close_btn.setStyleSheet("QPushButton { background-color: #6c757d; color: white; padding: 5px 20px; }") + main_layout.addWidget(close_btn, alignment=Qt.AlignCenter) + + # Show the dialog + help_dialog.exec() + + def _on_visual_context_toggle(self): + """Handle visual context toggle""" + enabled = self.visual_context_enabled_value + self.main_gui.config['manga_visual_context_enabled'] = enabled + + # Update translator if it exists + if self.translator: + self.translator.visual_context_enabled = enabled + + # Save config + if hasattr(self.main_gui, 'save_config'): + self.main_gui.save_config(show_message=False) + + # Log the change + if enabled: + self._log("📷 Visual context ENABLED - Images will be sent to API", "info") + self._log(" Make sure you're using a vision-capable model", "warning") + else: + self._log("📝 Visual context DISABLED - Text-only mode", "info") + self._log(" Compatible with non-vision models (Claude, GPT-3.5, etc.)", "success") + + def _open_advanced_settings(self): + """Open the manga advanced settings dialog""" + try: + def on_settings_saved(settings): + """Callback when settings are saved""" + # Update config with new settings + self.main_gui.config['manga_settings'] = settings + + # Mirror critical font size values into nested settings (avoid legacy top-level min key) + try: + rendering = settings.get('rendering', {}) if isinstance(settings, dict) else {} + font_sizing = settings.get('font_sizing', {}) if isinstance(settings, dict) else {} + min_from_dialog = rendering.get('auto_min_size', font_sizing.get('min_readable', font_sizing.get('min_size'))) + max_from_dialog = rendering.get('auto_max_size', font_sizing.get('max_size')) + if min_from_dialog is not None: + ms = self.main_gui.config.setdefault('manga_settings', {}) + rend = ms.setdefault('rendering', {}) + font = ms.setdefault('font_sizing', {}) + rend['auto_min_size'] = int(min_from_dialog) + font['min_size'] = int(min_from_dialog) + if hasattr(self, 'auto_min_size_value'): + self.auto_min_size_value = int(min_from_dialog) + if max_from_dialog is not None: + self.main_gui.config['manga_max_font_size'] = int(max_from_dialog) + if hasattr(self, 'max_font_size_value'): + self.max_font_size_value = int(max_from_dialog) + except Exception: + pass + + # Persist mirrored values + try: + if hasattr(self.main_gui, 'save_config'): + self.main_gui.save_config(show_message=False) + except Exception: + pass + + # Reload settings in translator if it exists + if self.translator: + self._log("📋 Reloading settings in translator...", "info") + # The translator will pick up new settings on next operation + + self._log("✅ Advanced settings saved and applied", "success") + + # Open the settings dialog + # Note: MangaSettingsDialog is still Tkinter-based, so pass Tkinter root + MangaSettingsDialog( + parent=self.main_gui.master, # Use Tkinter root instead of PySide6 dialog + main_gui=self.main_gui, + config=self.main_gui.config, + callback=on_settings_saved + ) + + except Exception as e: + from PySide6.QtWidgets import QMessageBox + self._log(f"❌ Error opening settings dialog: {str(e)}", "error") + QMessageBox.critical(self.dialog, "Error", f"Failed to open settings dialog:\n{str(e)}") + + def _toggle_font_size_mode(self): + """Toggle between auto, fixed size and multiplier modes""" + mode = self.font_size_mode_value + + # Handle main frames (fixed size and multiplier) + if hasattr(self, 'fixed_size_frame') and hasattr(self, 'multiplier_frame'): + if mode == "fixed": + self.fixed_size_frame.show() + self.multiplier_frame.hide() + if hasattr(self, 'constraint_frame'): + self.constraint_frame.hide() + elif mode == "multiplier": + self.fixed_size_frame.hide() + self.multiplier_frame.show() + if hasattr(self, 'constraint_frame'): + self.constraint_frame.show() + else: # auto + self.fixed_size_frame.hide() + self.multiplier_frame.hide() + if hasattr(self, 'constraint_frame'): + self.constraint_frame.hide() + + # MIN/MAX FIELDS ARE ALWAYS VISIBLE - NEVER HIDE THEM + # They are packed at creation time and stay visible in all modes + + # Only save and apply if we're not initializing + if not hasattr(self, '_initializing') or not self._initializing: + self._save_rendering_settings() + self._apply_rendering_settings() + + def _update_multiplier_label(self, value): + """Update multiplier label and value variable""" + self.font_size_multiplier_value = float(value) # UPDATE THE VALUE VARIABLE! + self.multiplier_label.setText(f"{float(value):.1f}x") + + def _on_line_spacing_changed(self, value): + """Update line spacing value label and value variable""" + self.line_spacing_value = float(value) # UPDATE THE VALUE VARIABLE! + try: + if hasattr(self, 'line_spacing_value_label'): + self.line_spacing_value_label.setText(f"{float(value):.2f}") + except Exception: + pass + + def _on_shadow_blur_changed(self, value): + """Update shadow blur value label and value variable""" + self.shadow_blur_value = int(float(value)) # UPDATE THE VALUE VARIABLE! + try: + if hasattr(self, 'shadow_blur_value_label'): + self.shadow_blur_value_label.setText(f"{int(float(value))}") + except Exception: + pass + + def _on_ft_only_bg_opacity_changed(self): + """Handle free text only background opacity checkbox change (PySide6)""" + # Update the value from checkbox state + self.free_text_only_bg_opacity_value = self.ft_only_checkbox.isChecked() + + def _update_color_preview(self, event=None): + """Update the font color preview""" + r = self.text_color_r_value + g = self.text_color_g_value + b = self.text_color_b_value + if hasattr(self, 'color_preview_frame'): + self.color_preview_frame.setStyleSheet(f"background-color: rgb({r},{g},{b}); border: 1px solid #5a9fd4;") + # Auto-save and apply on change + if event is not None: # Only save on user interaction, not initial load + self._save_rendering_settings() + self._apply_rendering_settings() + + def _update_shadow_preview(self, event=None): + """Update the shadow color preview""" + r = self.shadow_color_r_value + g = self.shadow_color_g_value + b = self.shadow_color_b_value + if hasattr(self, 'shadow_preview_frame'): + self.shadow_preview_frame.setStyleSheet(f"background-color: rgb({r},{g},{b}); border: 1px solid #5a9fd4;") + # Auto-save and apply on change + if event is not None: # Only save on user interaction, not initial load + self._save_rendering_settings() + self._apply_rendering_settings() + + def _toggle_azure_key_visibility(self, state): + """Toggle visibility of Azure API key""" + from PySide6.QtWidgets import QLineEdit + from PySide6.QtCore import Qt + + # Check the checkbox state directly to be sure + is_checked = self.show_azure_key_checkbox.isChecked() + + if is_checked: + # Show the key + self.azure_key_entry.setEchoMode(QLineEdit.Normal) + else: + # Hide the key + self.azure_key_entry.setEchoMode(QLineEdit.Password) + + def _toggle_shadow_controls(self): + """Enable/disable shadow controls based on checkbox""" + if self.shadow_enabled_value: + if hasattr(self, 'shadow_controls'): + self.shadow_controls.setEnabled(True) + else: + if hasattr(self, 'shadow_controls'): + self.shadow_controls.setEnabled(False) + + def _set_font_preset(self, preset: str): + """Apply font sizing preset (moved from dialog)""" + try: + if preset == 'small': + self.font_algorithm_value = 'conservative' + self.auto_min_size_value = 10 + self.max_font_size_value = 32 + self.prefer_larger_value = False + self.bubble_size_factor_value = True + self.line_spacing_value = 1.2 + self.max_lines_value = 8 + elif preset == 'balanced': + self.font_algorithm_value = 'smart' + self.auto_min_size_value = 12 + self.max_font_size_value = 48 + self.prefer_larger_value = True + self.bubble_size_factor_value = True + self.line_spacing_value = 1.3 + self.max_lines_value = 10 + elif preset == 'large': + self.font_algorithm_value = 'aggressive' + self.auto_min_size_value = 14 + self.max_font_size_value = 64 + self.prefer_larger_value = True + self.bubble_size_factor_value = False + self.line_spacing_value = 1.4 + self.max_lines_value = 12 + + # Update all spinboxes with new values + if hasattr(self, 'min_size_spinbox'): + self.min_size_spinbox.setValue(self.auto_min_size_value) + if hasattr(self, 'max_size_spinbox'): + self.max_size_spinbox.setValue(self.max_font_size_value) + if hasattr(self, 'line_spacing_spinbox'): + self.line_spacing_spinbox.setValue(self.line_spacing_value) + if hasattr(self, 'max_lines_spinbox'): + self.max_lines_spinbox.setValue(self.max_lines_value) + + # Update checkboxes + if hasattr(self, 'prefer_larger_checkbox'): + self.prefer_larger_checkbox.setChecked(self.prefer_larger_value) + if hasattr(self, 'bubble_size_factor_checkbox'): + self.bubble_size_factor_checkbox.setChecked(self.bubble_size_factor_value) + + # Update the line spacing label + if hasattr(self, 'line_spacing_value_label'): + self.line_spacing_value_label.setText(f"{float(self.line_spacing_value):.2f}") + + self._save_rendering_settings() + except Exception as e: + self._log(f"Error setting preset: {e}", "debug") + + def _enable_widget_tree(self, widget): + """Recursively enable a widget and its children (PySide6 version)""" + try: + widget.setEnabled(True) + except: + pass + # PySide6 way to iterate children + try: + for child in widget.children(): + if hasattr(child, 'setEnabled'): + self._enable_widget_tree(child) + except: + pass + + def _disable_widget_tree(self, widget): + """Recursively disable a widget and its children (PySide6 version)""" + try: + widget.setEnabled(False) + except: + pass + # PySide6 way to iterate children + try: + for child in widget.children(): + if hasattr(child, 'setEnabled'): + self._disable_widget_tree(child) + except: + pass + + def _load_rendering_settings(self): + """Load text rendering settings from config""" + config = self.main_gui.config + + # One-time migration for legacy min font size key + try: + legacy_min = config.get('manga_min_readable_size', None) + if legacy_min is not None: + ms = config.setdefault('manga_settings', {}) + rend = ms.setdefault('rendering', {}) + font = ms.setdefault('font_sizing', {}) + current_min = rend.get('auto_min_size', font.get('min_size')) + if current_min is None or int(current_min) < int(legacy_min): + rend['auto_min_size'] = int(legacy_min) + font['min_size'] = int(legacy_min) + # Remove legacy key + try: + del config['manga_min_readable_size'] + except Exception: + pass + # Persist migration silently + if hasattr(self.main_gui, 'save_config'): + self.main_gui.save_config(show_message=False) + except Exception: + pass + + # Get inpainting settings from the nested location + manga_settings = config.get('manga_settings', {}) + inpaint_settings = manga_settings.get('inpainting', {}) + + # Load inpaint method from the correct location (no Tkinter variables in PySide6) + self.inpaint_method_value = inpaint_settings.get('method', 'local') + self.local_model_type_value = inpaint_settings.get('local_method', 'anime_onnx') + + # Load model paths + self.local_model_path_value = '' + for model_type in ['aot', 'aot_onnx', 'lama', 'lama_onnx', 'anime', 'anime_onnx', 'mat', 'ollama', 'sd_local']: + path = inpaint_settings.get(f'{model_type}_model_path', '') + if model_type == self.local_model_type_value: + self.local_model_path_value = path + + # Initialize with defaults (plain Python values, no Tkinter variables) + self.bg_opacity_value = config.get('manga_bg_opacity', 130) + self.free_text_only_bg_opacity_value = config.get('manga_free_text_only_bg_opacity', True) + self.bg_style_value = config.get('manga_bg_style', 'circle') + self.bg_reduction_value = config.get('manga_bg_reduction', 1.0) + self.font_size_value = config.get('manga_font_size', 0) + + self.selected_font_path = config.get('manga_font_path', None) + self.skip_inpainting_value = config.get('manga_skip_inpainting', False) + self.inpaint_quality_value = config.get('manga_inpaint_quality', 'high') + self.inpaint_dilation_value = config.get('manga_inpaint_dilation', 15) + self.inpaint_passes_value = config.get('manga_inpaint_passes', 2) + + self.font_size_mode_value = config.get('manga_font_size_mode', 'fixed') + self.font_size_multiplier_value = config.get('manga_font_size_multiplier', 1.0) + + # Auto fit style for auto mode + try: + rend_cfg = (config.get('manga_settings', {}) or {}).get('rendering', {}) + except Exception: + rend_cfg = {} + self.auto_fit_style_value = rend_cfg.get('auto_fit_style', 'balanced') + + # Auto minimum font size (from rendering or font_sizing) + try: + font_cfg = (config.get('manga_settings', {}) or {}).get('font_sizing', {}) + except Exception: + font_cfg = {} + auto_min_default = rend_cfg.get('auto_min_size', font_cfg.get('min_size', 10)) + self.auto_min_size_value = int(auto_min_default) + + self.force_caps_lock_value = config.get('manga_force_caps_lock', True) + self.constrain_to_bubble_value = config.get('manga_constrain_to_bubble', True) + + # Advanced font sizing (from manga_settings.font_sizing) + font_settings = (config.get('manga_settings', {}) or {}).get('font_sizing', {}) + self.font_algorithm_value = str(font_settings.get('algorithm', 'smart')) + self.prefer_larger_value = bool(font_settings.get('prefer_larger', True)) + self.bubble_size_factor_value = bool(font_settings.get('bubble_size_factor', True)) + self.line_spacing_value = float(font_settings.get('line_spacing', 1.3)) + self.max_lines_value = int(font_settings.get('max_lines', 10)) + + # Determine effective max font size with fallback + font_max_top = config.get('manga_max_font_size', None) + nested_ms = config.get('manga_settings', {}) if isinstance(config.get('manga_settings', {}), dict) else {} + nested_render = nested_ms.get('rendering', {}) if isinstance(nested_ms.get('rendering', {}), dict) else {} + nested_font = nested_ms.get('font_sizing', {}) if isinstance(nested_ms.get('font_sizing', {}), dict) else {} + effective_max = font_max_top if font_max_top is not None else ( + nested_render.get('auto_max_size', nested_font.get('max_size', 48)) + ) + self.max_font_size_value = int(effective_max) + + # If top-level keys were missing, mirror max now (won't save during initialization) + if font_max_top is None: + self.main_gui.config['manga_max_font_size'] = int(effective_max) + + self.strict_text_wrapping_value = config.get('manga_strict_text_wrapping', True) + + # Font color settings + manga_text_color = config.get('manga_text_color', [102, 0, 0]) + self.text_color_r_value = manga_text_color[0] + self.text_color_g_value = manga_text_color[1] + self.text_color_b_value = manga_text_color[2] + + # Shadow settings + self.shadow_enabled_value = config.get('manga_shadow_enabled', True) + + manga_shadow_color = config.get('manga_shadow_color', [204, 128, 128]) + self.shadow_color_r_value = manga_shadow_color[0] + self.shadow_color_g_value = manga_shadow_color[1] + self.shadow_color_b_value = manga_shadow_color[2] + + self.shadow_offset_x_value = config.get('manga_shadow_offset_x', 2) + self.shadow_offset_y_value = config.get('manga_shadow_offset_y', 2) + self.shadow_blur_value = config.get('manga_shadow_blur', 0) + + # Initialize font_style with saved value or default + self.font_style_value = config.get('manga_font_style', 'Default') + + # Full page context settings + self.full_page_context_value = config.get('manga_full_page_context', False) + + self.full_page_context_prompt = config.get('manga_full_page_context_prompt', + "You will receive multiple text segments from a manga page, each prefixed with an index like [0], [1], etc. " + "Translate each segment considering the context of all segments together. " + "Maintain consistency in character names, tone, and style across all translations.\n\n" + "CRITICAL: Return your response as a valid JSON object where each key includes BOTH the index prefix " + "AND the original text EXACTLY as provided (e.g., '[0] こんにちは'), and each value is the translation.\n" + "This is essential for correct mapping - do not modify or omit the index prefixes!\n\n" + "Make sure to properly escape any special characters in the JSON:\n" + "- Use \\n for newlines\n" + "- Use \\\" for quotes\n" + "- Use \\\\ for backslashes\n\n" + "Example:\n" + '{\n' + ' "[0] こんにちは": "Hello",\n' + ' "[1] ありがとう": "Thank you",\n' + ' "[2] さようなら": "Goodbye"\n' + '}\n\n' + 'REMEMBER: Keep the [index] prefix in each JSON key exactly as shown in the input!' + ) + + # Load OCR prompt + self.ocr_prompt = config.get('manga_ocr_prompt', + "YOU ARE AN OCR SYSTEM. YOUR ONLY JOB IS TEXT EXTRACTION.\n\n" + "CRITICAL RULES:\n" + "1. DO NOT TRANSLATE ANYTHING\n" + "2. DO NOT MODIFY THE TEXT\n" + "3. DO NOT EXPLAIN OR COMMENT\n" + "4. ONLY OUTPUT THE EXACT TEXT YOU SEE\n" + "5. PRESERVE NATURAL TEXT FLOW - DO NOT ADD UNNECESSARY LINE BREAKS\n\n" + "If you see Korean text, output it in Korean.\n" + "If you see Japanese text, output it in Japanese.\n" + "If you see Chinese text, output it in Chinese.\n" + "If you see English text, output it in English.\n\n" + "IMPORTANT: Only use line breaks where they naturally occur in the original text " + "(e.g., between dialogue lines or paragraphs). Do not break text mid-sentence or " + "between every word/character.\n\n" + "For vertical text common in manga/comics, transcribe it as a continuous line unless " + "there are clear visual breaks.\n\n" + "NEVER translate. ONLY extract exactly what is written.\n" + "Output ONLY the raw text, nothing else." + ) + # Visual context setting + self.visual_context_enabled_value = self.main_gui.config.get('manga_visual_context_enabled', True) + self.qwen2vl_model_size = config.get('qwen2vl_model_size', '1') # Default to '1' (2B) + + # Initialize RapidOCR settings + self.rapidocr_use_recognition_value = self.main_gui.config.get('rapidocr_use_recognition', True) + self.rapidocr_language_value = self.main_gui.config.get('rapidocr_language', 'auto') + self.rapidocr_detection_mode_value = self.main_gui.config.get('rapidocr_detection_mode', 'document') + + # Output settings + self.create_subfolder_value = config.get('manga_create_subfolder', True) + + def _save_rendering_settings(self): + """Save rendering settings with validation""" + # Don't save during initialization + if hasattr(self, '_initializing') and self._initializing: + return + + # Validate that variables exist and have valid values before saving + try: + # Ensure manga_settings structure exists + if 'manga_settings' not in self.main_gui.config: + self.main_gui.config['manga_settings'] = {} + if 'inpainting' not in self.main_gui.config['manga_settings']: + self.main_gui.config['manga_settings']['inpainting'] = {} + + # Save to nested location + inpaint = self.main_gui.config['manga_settings']['inpainting'] + if hasattr(self, 'inpaint_method_value'): + inpaint['method'] = self.inpaint_method_value + if hasattr(self, 'local_model_type_value'): + inpaint['local_method'] = self.local_model_type_value + model_type = self.local_model_type_value + if hasattr(self, 'local_model_path_value'): + inpaint[f'{model_type}_model_path'] = self.local_model_path_value + + # Add new inpainting settings + if hasattr(self, 'inpaint_method_value'): + self.main_gui.config['manga_inpaint_method'] = self.inpaint_method_value + if hasattr(self, 'local_model_type_value'): + self.main_gui.config['manga_local_inpaint_model'] = self.local_model_type_value + + # Save model paths for each type + for model_type in ['aot', 'lama', 'lama_onnx', 'anime', 'mat', 'ollama', 'sd_local']: + if hasattr(self, 'local_model_type_value'): + if model_type == self.local_model_type_value: + if hasattr(self, 'local_model_path_value'): + path = self.local_model_path_value + if path: + self.main_gui.config[f'manga_{model_type}_model_path'] = path + + # Save all other settings with validation + if hasattr(self, 'bg_opacity_value'): + self.main_gui.config['manga_bg_opacity'] = self.bg_opacity_value + if hasattr(self, 'bg_style_value'): + self.main_gui.config['manga_bg_style'] = self.bg_style_value + if hasattr(self, 'bg_reduction_value'): + self.main_gui.config['manga_bg_reduction'] = self.bg_reduction_value + + # Save free-text-only background opacity toggle + if hasattr(self, 'free_text_only_bg_opacity_value'): + self.main_gui.config['manga_free_text_only_bg_opacity'] = bool(self.free_text_only_bg_opacity_value) + + # CRITICAL: Font size settings - validate before saving + if hasattr(self, 'font_size_value'): + value = self.font_size_value + self.main_gui.config['manga_font_size'] = value + + if hasattr(self, 'max_font_size_value'): + value = self.max_font_size_value + # Validate the value is reasonable + if 0 <= value <= 200: + self.main_gui.config['manga_max_font_size'] = value + + # Mirror these into nested manga_settings so the dialog and integration stay in sync + try: + ms = self.main_gui.config.setdefault('manga_settings', {}) + rend = ms.setdefault('rendering', {}) + font = ms.setdefault('font_sizing', {}) + # Mirror bounds + if hasattr(self, 'auto_min_size_value'): + rend['auto_min_size'] = int(self.auto_min_size_value) + font['min_size'] = int(self.auto_min_size_value) + if hasattr(self, 'max_font_size_value'): + rend['auto_max_size'] = int(self.max_font_size_value) + font['max_size'] = int(self.max_font_size_value) + # Persist advanced font sizing controls + if hasattr(self, 'font_algorithm_value'): + font['algorithm'] = str(self.font_algorithm_value) + if hasattr(self, 'prefer_larger_value'): + font['prefer_larger'] = bool(self.prefer_larger_value) + if hasattr(self, 'bubble_size_factor_value'): + font['bubble_size_factor'] = bool(self.bubble_size_factor_value) + if hasattr(self, 'line_spacing_value'): + font['line_spacing'] = float(self.line_spacing_value) + if hasattr(self, 'max_lines_value'): + font['max_lines'] = int(self.max_lines_value) + if hasattr(self, 'auto_fit_style_value'): + rend['auto_fit_style'] = str(self.auto_fit_style_value) + except Exception: + pass + + # Continue with other settings + self.main_gui.config['manga_font_path'] = self.selected_font_path + + if hasattr(self, 'skip_inpainting_value'): + self.main_gui.config['manga_skip_inpainting'] = self.skip_inpainting_value + if hasattr(self, 'inpaint_quality_value'): + self.main_gui.config['manga_inpaint_quality'] = self.inpaint_quality_value + if hasattr(self, 'inpaint_dilation_value'): + self.main_gui.config['manga_inpaint_dilation'] = self.inpaint_dilation_value + if hasattr(self, 'inpaint_passes_value'): + self.main_gui.config['manga_inpaint_passes'] = self.inpaint_passes_value + if hasattr(self, 'font_size_mode_value'): + self.main_gui.config['manga_font_size_mode'] = self.font_size_mode_value + if hasattr(self, 'font_size_multiplier_value'): + self.main_gui.config['manga_font_size_multiplier'] = self.font_size_multiplier_value + if hasattr(self, 'font_style_value'): + self.main_gui.config['manga_font_style'] = self.font_style_value + if hasattr(self, 'constrain_to_bubble_value'): + self.main_gui.config['manga_constrain_to_bubble'] = self.constrain_to_bubble_value + if hasattr(self, 'strict_text_wrapping_value'): + self.main_gui.config['manga_strict_text_wrapping'] = self.strict_text_wrapping_value + if hasattr(self, 'force_caps_lock_value'): + self.main_gui.config['manga_force_caps_lock'] = self.force_caps_lock_value + + # Save font color as list + if hasattr(self, 'text_color_r_value') and hasattr(self, 'text_color_g_value') and hasattr(self, 'text_color_b_value'): + self.main_gui.config['manga_text_color'] = [ + self.text_color_r_value, + self.text_color_g_value, + self.text_color_b_value + ] + + # Save shadow settings + if hasattr(self, 'shadow_enabled_value'): + self.main_gui.config['manga_shadow_enabled'] = self.shadow_enabled_value + if hasattr(self, 'shadow_color_r_value') and hasattr(self, 'shadow_color_g_value') and hasattr(self, 'shadow_color_b_value'): + self.main_gui.config['manga_shadow_color'] = [ + self.shadow_color_r_value, + self.shadow_color_g_value, + self.shadow_color_b_value + ] + if hasattr(self, 'shadow_offset_x_value'): + self.main_gui.config['manga_shadow_offset_x'] = self.shadow_offset_x_value + if hasattr(self, 'shadow_offset_y_value'): + self.main_gui.config['manga_shadow_offset_y'] = self.shadow_offset_y_value + if hasattr(self, 'shadow_blur_value'): + self.main_gui.config['manga_shadow_blur'] = self.shadow_blur_value + + # Save output settings + if hasattr(self, 'create_subfolder_value'): + self.main_gui.config['manga_create_subfolder'] = self.create_subfolder_value + + # Save full page context settings + if hasattr(self, 'full_page_context_value'): + self.main_gui.config['manga_full_page_context'] = self.full_page_context_value + if hasattr(self, 'full_page_context_prompt'): + self.main_gui.config['manga_full_page_context_prompt'] = self.full_page_context_prompt + + # OCR prompt + if hasattr(self, 'ocr_prompt'): + self.main_gui.config['manga_ocr_prompt'] = self.ocr_prompt + + # Qwen and custom models + if hasattr(self, 'qwen2vl_model_size'): + self.main_gui.config['qwen2vl_model_size'] = self.qwen2vl_model_size + + # RapidOCR specific settings + if hasattr(self, 'rapidocr_use_recognition_value'): + self.main_gui.config['rapidocr_use_recognition'] = self.rapidocr_use_recognition_value + if hasattr(self, 'rapidocr_detection_mode_value'): + self.main_gui.config['rapidocr_detection_mode'] = self.rapidocr_detection_mode_value + if hasattr(self, 'rapidocr_language_value'): + self.main_gui.config['rapidocr_language'] = self.rapidocr_language_value + + # Auto-save to disk (PySide6 version - no Tkinter black window issue) + # Settings are stored in self.main_gui.config and persisted immediately + if hasattr(self.main_gui, 'save_config'): + self.main_gui.save_config(show_message=False) + + except Exception as e: + # Log error but don't crash + print(f"Error saving manga settings: {e}") + + def _on_context_toggle(self): + """Handle full page context toggle""" + enabled = self.full_page_context_value + self._save_rendering_settings() + + def _edit_context_prompt(self): + """Open dialog to edit full page context prompt and OCR prompt""" + from PySide6.QtWidgets import (QDialog, QVBoxLayout, QLabel, QTextEdit, + QPushButton, QHBoxLayout) + from PySide6.QtCore import Qt + + # Create PySide6 dialog + dialog = QDialog(self.dialog) + dialog.setWindowTitle("Edit Prompts") + dialog.setMinimumSize(700, 600) + + layout = QVBoxLayout(dialog) + + # Instructions + instructions = QLabel( + "Edit the prompt used for full page context translation.\n" + "This will be appended to the main translation system prompt." + ) + instructions.setWordWrap(True) + layout.addWidget(instructions) + + # Full Page Context label + context_label = QLabel("Full Page Context Prompt:") + font = context_label.font() + font.setBold(True) + context_label.setFont(font) + layout.addWidget(context_label) + + # Text editor for context + text_editor = QTextEdit() + text_editor.setMinimumHeight(200) + text_editor.setPlainText(self.full_page_context_prompt) + layout.addWidget(text_editor) + + # OCR Prompt label + ocr_label = QLabel("OCR System Prompt:") + ocr_label.setFont(font) + layout.addWidget(ocr_label) + + # Text editor for OCR + ocr_editor = QTextEdit() + ocr_editor.setMinimumHeight(200) + + # Get current OCR prompt + if hasattr(self, 'ocr_prompt'): + ocr_editor.setPlainText(self.ocr_prompt) + else: + ocr_editor.setPlainText("") + + layout.addWidget(ocr_editor) + + def save_prompt(): + self.full_page_context_prompt = text_editor.toPlainText().strip() + self.ocr_prompt = ocr_editor.toPlainText().strip() + + # Save to config + self.main_gui.config['manga_full_page_context_prompt'] = self.full_page_context_prompt + self.main_gui.config['manga_ocr_prompt'] = self.ocr_prompt + + self._save_rendering_settings() + self._log("✅ Updated prompts", "success") + dialog.accept() + + def reset_prompt(): + default_prompt = ( + "You will receive multiple text segments from a manga page, each prefixed with an index like [0], [1], etc. " + "Translate each segment considering the context of all segments together. " + "Maintain consistency in character names, tone, and style across all translations.\n\n" + "CRITICAL: Return your response as a valid JSON object where each key includes BOTH the index prefix " + "AND the original text EXACTLY as provided (e.g., '[0] こんにちは'), and each value is the translation.\n" + "This is essential for correct mapping - do not modify or omit the index prefixes!\n\n" + "Make sure to properly escape any special characters in the JSON:\n" + "- Use \\n for newlines\n" + "- Use \\\" for quotes\n" + "- Use \\\\ for backslashes\n\n" + "Example:\n" + '{\n' + ' "[0] こんにちは": "Hello",\n' + ' "[1] ありがとう": "Thank you",\n' + ' "[2] さようなら": "Goodbye"\n' + '}\n\n' + 'REMEMBER: Keep the [index] prefix in each JSON key exactly as shown in the input!' + ) + text_editor.setPlainText(default_prompt) + + default_ocr = ( + "YOU ARE AN OCR SYSTEM. YOUR ONLY JOB IS TEXT EXTRACTION.\n\n" + "CRITICAL RULES:\n" + "1. DO NOT TRANSLATE ANYTHING\n" + "2. DO NOT MODIFY THE TEXT\n" + "3. DO NOT EXPLAIN OR COMMENT\n" + "4. ONLY OUTPUT THE EXACT TEXT YOU SEE\n" + "5. PRESERVE NATURAL TEXT FLOW - DO NOT ADD UNNECESSARY LINE BREAKS\n\n" + "If you see Korean text, output it in Korean.\n" + "If you see Japanese text, output it in Japanese.\n" + "If you see Chinese text, output it in Chinese.\n" + "If you see English text, output it in English.\n\n" + "IMPORTANT: Only use line breaks where they naturally occur in the original text " + "(e.g., between dialogue lines or paragraphs). Do not break text mid-sentence or " + "between every word/character.\n\n" + "For vertical text common in manga/comics, transcribe it as a continuous line unless " + "there are clear visual breaks.\n\n" + "NEVER translate. ONLY extract exactly what is written.\n" + "Output ONLY the raw text, nothing else." + ) + ocr_editor.setPlainText(default_ocr) + + # Button layout + button_layout = QHBoxLayout() + + save_btn = QPushButton("Save") + save_btn.clicked.connect(save_prompt) + button_layout.addWidget(save_btn) + + reset_btn = QPushButton("Reset to Default") + reset_btn.clicked.connect(reset_prompt) + button_layout.addWidget(reset_btn) + + cancel_btn = QPushButton("Cancel") + cancel_btn.clicked.connect(dialog.reject) + button_layout.addWidget(cancel_btn) + + button_layout.addStretch() + layout.addLayout(button_layout) + + # Show dialog + dialog.exec() + + def _refresh_context_settings(self): + """Refresh context settings from main GUI""" + # Actually fetch the current values from main GUI + if hasattr(self.main_gui, 'contextual_var'): + contextual_enabled = self.main_gui.contextual_var.get() + if hasattr(self, 'contextual_status_label'): + self.contextual_status_label.setText(f"• Contextual Translation: {'Enabled' if contextual_enabled else 'Disabled'}") + + if hasattr(self.main_gui, 'trans_history'): + history_limit = self.main_gui.trans_history.get() + if hasattr(self, 'history_limit_label'): + self.history_limit_label.setText(f"• Translation History Limit: {history_limit} exchanges") + + if hasattr(self.main_gui, 'translation_history_rolling_var'): + rolling_enabled = self.main_gui.translation_history_rolling_var.get() + rolling_status = "Enabled (Rolling Window)" if rolling_enabled else "Disabled (Reset on Limit)" + if hasattr(self, 'rolling_status_label'): + self.rolling_status_label.setText(f"• Rolling History: {rolling_status}") + + # Get and update model from main GUI + current_model = None + model_changed = False + + if hasattr(self.main_gui, 'model_var'): + current_model = self.main_gui.model_var.get() + elif hasattr(self.main_gui, 'model_combo'): + current_model = self.main_gui.model_combo.get() + elif hasattr(self.main_gui, 'config'): + current_model = self.main_gui.config.get('model', 'Unknown') + + # Update model display in the API Settings frame (skip if parent_frame doesn't exist) + if hasattr(self, 'parent_frame') and hasattr(self.parent_frame, 'winfo_children'): + try: + for widget in self.parent_frame.winfo_children(): + if isinstance(widget, tk.LabelFrame) and "Translation Settings" in widget.cget("text"): + for child in widget.winfo_children(): + if isinstance(child, tk.Frame): + for subchild in child.winfo_children(): + if isinstance(subchild, tk.Label) and "Model:" in subchild.cget("text"): + old_model_text = subchild.cget("text") + old_model = old_model_text.split("Model: ")[-1] if "Model: " in old_model_text else None + if old_model != current_model: + model_changed = True + subchild.config(text=f"Model: {current_model}") + break + except Exception: + pass # Silently skip if there's an issue with Tkinter widgets + + # If model changed, reset translator and client to force recreation + if model_changed and current_model: + if self.translator: + self._log(f"Model changed to {current_model}. Translator will be recreated on next run.", "info") + self.translator = None # Force recreation on next translation + + # Also reset the client if it exists to ensure new model is used + if hasattr(self.main_gui, 'client') and self.main_gui.client: + if hasattr(self.main_gui.client, 'model') and self.main_gui.client.model != current_model: + self.main_gui.client = None # Force recreation with new model + + # If translator exists, update its history manager settings + if self.translator and hasattr(self.translator, 'history_manager'): + try: + # Update the history manager with current main GUI settings + if hasattr(self.main_gui, 'contextual_var'): + self.translator.history_manager.contextual_enabled = self.main_gui.contextual_var.get() + + if hasattr(self.main_gui, 'trans_history'): + self.translator.history_manager.max_history = int(self.main_gui.trans_history.get()) + + if hasattr(self.main_gui, 'translation_history_rolling_var'): + self.translator.history_manager.rolling_enabled = self.main_gui.translation_history_rolling_var.get() + + # Reset the history to apply new settings + self.translator.history_manager.reset() + + self._log("✅ Refreshed context settings from main GUI and updated translator", "success") + except Exception as e: + self._log(f"✅ Refreshed context settings display (translator will update on next run)", "success") + else: + log_message = "✅ Refreshed context settings from main GUI" + if model_changed: + log_message += f" (Model: {current_model})" + self._log(log_message, "success") + + def _browse_google_credentials_permanent(self): + """Browse and set Google Cloud Vision credentials from the permanent button""" + from PySide6.QtWidgets import QFileDialog + + file_path, _ = QFileDialog.getOpenFileName( + self.dialog, + "Select Google Cloud Service Account JSON", + "", + "JSON files (*.json);;All files (*.*)" + ) + + if file_path: + # Save to config with both keys for compatibility + self.main_gui.config['google_vision_credentials'] = file_path + self.main_gui.config['google_cloud_credentials'] = file_path + + # Save configuration + if hasattr(self.main_gui, 'save_config'): + self.main_gui.save_config(show_message=False) + + + from PySide6.QtWidgets import QMessageBox + + # Update button state immediately + if hasattr(self, 'start_button'): + self.start_button.setEnabled(True) + + # Update credentials display + if hasattr(self, 'creds_label'): + self.creds_label.setText(os.path.basename(file_path)) + self.creds_label.setStyleSheet("color: green;") + + # Update the main status label and provider status + self._update_main_status_label() + self._check_provider_status() + + QMessageBox.information(self.dialog, "Success", "Google Cloud credentials set successfully!") + + def _update_status_display(self): + """Update the status display after credentials change""" + # This would update the status label if we had a reference to it + # For now, we'll just ensure the button is enabled + google_creds_path = self.main_gui.config.get('google_vision_credentials', '') or self.main_gui.config.get('google_cloud_credentials', '') + has_vision = os.path.exists(google_creds_path) if google_creds_path else False + + if has_vision and hasattr(self, 'start_button'): + self.start_button.setEnabled(True) + + def _get_available_fonts(self): + """Get list of available fonts from system and custom directories""" + fonts = ["Default"] # Default option + + # Reset font mapping + self.font_mapping = {} + + # Comprehensive map of Windows font filenames to proper display names + font_name_map = { + # === BASIC LATIN FONTS === + # Arial family + 'arial': 'Arial', + 'ariali': 'Arial Italic', + 'arialbd': 'Arial Bold', + 'arialbi': 'Arial Bold Italic', + 'ariblk': 'Arial Black', + + # Times New Roman + 'times': 'Times New Roman', + 'timesbd': 'Times New Roman Bold', + 'timesi': 'Times New Roman Italic', + 'timesbi': 'Times New Roman Bold Italic', + + # Calibri family + 'calibri': 'Calibri', + 'calibrib': 'Calibri Bold', + 'calibrii': 'Calibri Italic', + 'calibriz': 'Calibri Bold Italic', + 'calibril': 'Calibri Light', + 'calibrili': 'Calibri Light Italic', + + # Comic Sans family + 'comic': 'Comic Sans MS', + 'comici': 'Comic Sans MS Italic', + 'comicbd': 'Comic Sans MS Bold', + 'comicz': 'Comic Sans MS Bold Italic', + + # Segoe UI family + 'segoeui': 'Segoe UI', + 'segoeuib': 'Segoe UI Bold', + 'segoeuii': 'Segoe UI Italic', + 'segoeuiz': 'Segoe UI Bold Italic', + 'segoeuil': 'Segoe UI Light', + 'segoeuisl': 'Segoe UI Semilight', + 'seguisb': 'Segoe UI Semibold', + 'seguisbi': 'Segoe UI Semibold Italic', + 'seguisli': 'Segoe UI Semilight Italic', + 'seguili': 'Segoe UI Light Italic', + 'seguibl': 'Segoe UI Black', + 'seguibli': 'Segoe UI Black Italic', + 'seguihis': 'Segoe UI Historic', + 'seguiemj': 'Segoe UI Emoji', + 'seguisym': 'Segoe UI Symbol', + + # Courier + 'cour': 'Courier New', + 'courbd': 'Courier New Bold', + 'couri': 'Courier New Italic', + 'courbi': 'Courier New Bold Italic', + + # Verdana + 'verdana': 'Verdana', + 'verdanab': 'Verdana Bold', + 'verdanai': 'Verdana Italic', + 'verdanaz': 'Verdana Bold Italic', + + # Georgia + 'georgia': 'Georgia', + 'georgiab': 'Georgia Bold', + 'georgiai': 'Georgia Italic', + 'georgiaz': 'Georgia Bold Italic', + + # Tahoma + 'tahoma': 'Tahoma', + 'tahomabd': 'Tahoma Bold', + + # Trebuchet + 'trebuc': 'Trebuchet MS', + 'trebucbd': 'Trebuchet MS Bold', + 'trebucit': 'Trebuchet MS Italic', + 'trebucbi': 'Trebuchet MS Bold Italic', + + # Impact + 'impact': 'Impact', + + # Consolas + 'consola': 'Consolas', + 'consolab': 'Consolas Bold', + 'consolai': 'Consolas Italic', + 'consolaz': 'Consolas Bold Italic', + + # Sitka family (from your screenshot) + 'sitka': 'Sitka Small', + 'sitkab': 'Sitka Small Bold', + 'sitkai': 'Sitka Small Italic', + 'sitkaz': 'Sitka Small Bold Italic', + 'sitkavf': 'Sitka Text', + 'sitkavfb': 'Sitka Text Bold', + 'sitkavfi': 'Sitka Text Italic', + 'sitkavfz': 'Sitka Text Bold Italic', + 'sitkasubheading': 'Sitka Subheading', + 'sitkasubheadingb': 'Sitka Subheading Bold', + 'sitkasubheadingi': 'Sitka Subheading Italic', + 'sitkasubheadingz': 'Sitka Subheading Bold Italic', + 'sitkaheading': 'Sitka Heading', + 'sitkaheadingb': 'Sitka Heading Bold', + 'sitkaheadingi': 'Sitka Heading Italic', + 'sitkaheadingz': 'Sitka Heading Bold Italic', + 'sitkadisplay': 'Sitka Display', + 'sitkadisplayb': 'Sitka Display Bold', + 'sitkadisplayi': 'Sitka Display Italic', + 'sitkadisplayz': 'Sitka Display Bold Italic', + 'sitkabanner': 'Sitka Banner', + 'sitkabannerb': 'Sitka Banner Bold', + 'sitkabanneri': 'Sitka Banner Italic', + 'sitkabannerz': 'Sitka Banner Bold Italic', + + # Ink Free (from your screenshot) + 'inkfree': 'Ink Free', + + # Lucida family + 'l_10646': 'Lucida Sans Unicode', + 'lucon': 'Lucida Console', + 'ltype': 'Lucida Sans Typewriter', + 'ltypeb': 'Lucida Sans Typewriter Bold', + 'ltypei': 'Lucida Sans Typewriter Italic', + 'ltypebi': 'Lucida Sans Typewriter Bold Italic', + + # Palatino Linotype + 'pala': 'Palatino Linotype', + 'palab': 'Palatino Linotype Bold', + 'palabi': 'Palatino Linotype Bold Italic', + 'palai': 'Palatino Linotype Italic', + + # Noto fonts + 'notosansjp': 'Noto Sans JP', + 'notoserifjp': 'Noto Serif JP', + + # UD Digi Kyokasho (Japanese educational font) + 'uddigikyokashon-b': 'UD Digi Kyokasho NK-B', + 'uddigikyokashon-r': 'UD Digi Kyokasho NK-R', + 'uddigikyokashonk-b': 'UD Digi Kyokasho NK-B', + 'uddigikyokashonk-r': 'UD Digi Kyokasho NK-R', + + # Urdu Typesetting + 'urdtype': 'Urdu Typesetting', + 'urdtypeb': 'Urdu Typesetting Bold', + + # Segoe variants + 'segmdl2': 'Segoe MDL2 Assets', + 'segoeicons': 'Segoe Fluent Icons', + 'segoepr': 'Segoe Print', + 'segoeprb': 'Segoe Print Bold', + 'segoesc': 'Segoe Script', + 'segoescb': 'Segoe Script Bold', + 'seguivar': 'Segoe UI Variable', + + # Sans Serif Collection + 'sansserifcollection': 'Sans Serif Collection', + + # Additional common Windows 10/11 fonts + 'holomdl2': 'HoloLens MDL2 Assets', + 'gadugi': 'Gadugi', + 'gadugib': 'Gadugi Bold', + + # Cascadia Code (developer font) + 'cascadiacode': 'Cascadia Code', + 'cascadiacodepl': 'Cascadia Code PL', + 'cascadiamono': 'Cascadia Mono', + 'cascadiamonopl': 'Cascadia Mono PL', + + # More Segoe UI variants + 'seguibli': 'Segoe UI Black Italic', + 'segoeuiblack': 'Segoe UI Black', + + # Other fonts + 'aldhabi': 'Aldhabi', + 'andiso': 'Andalus', # This is likely Andalus font + 'arabtype': 'Arabic Typesetting', + 'mstmc': 'Myanmar Text', # Alternate file name + 'monbaiti': 'Mongolian Baiti', # Shorter filename variant + 'leeluisl': 'Leelawadee UI Semilight', # Missing variant + 'simsunextg': 'SimSun-ExtG', # Extended SimSun variant + 'ebrima': 'Ebrima', + 'ebrimabd': 'Ebrima Bold', + 'gabriola': 'Gabriola', + + # Bahnschrift variants + 'bahnschrift': 'Bahnschrift', + 'bahnschriftlight': 'Bahnschrift Light', + 'bahnschriftsemibold': 'Bahnschrift SemiBold', + 'bahnschriftbold': 'Bahnschrift Bold', + + # Majalla (African language font) + 'majalla': 'Sakkal Majalla', + 'majallab': 'Sakkal Majalla Bold', + + # Additional fonts that might be missing + 'amiri': 'Amiri', + 'amiri-bold': 'Amiri Bold', + 'amiri-slanted': 'Amiri Slanted', + 'amiri-boldslanted': 'Amiri Bold Slanted', + 'aparaj': 'Aparajita', + 'aparajb': 'Aparajita Bold', + 'aparaji': 'Aparajita Italic', + 'aparajbi': 'Aparajita Bold Italic', + 'kokila': 'Kokila', + 'kokilab': 'Kokila Bold', + 'kokilai': 'Kokila Italic', + 'kokilabi': 'Kokila Bold Italic', + 'utsaah': 'Utsaah', + 'utsaahb': 'Utsaah Bold', + 'utsaahi': 'Utsaah Italic', + 'utsaahbi': 'Utsaah Bold Italic', + 'vani': 'Vani', + 'vanib': 'Vani Bold', + + # === JAPANESE FONTS === + 'msgothic': 'MS Gothic', + 'mspgothic': 'MS PGothic', + 'msmincho': 'MS Mincho', + 'mspmincho': 'MS PMincho', + 'meiryo': 'Meiryo', + 'meiryob': 'Meiryo Bold', + 'yugothic': 'Yu Gothic', + 'yugothb': 'Yu Gothic Bold', + 'yugothl': 'Yu Gothic Light', + 'yugothm': 'Yu Gothic Medium', + 'yugothr': 'Yu Gothic Regular', + 'yumin': 'Yu Mincho', + 'yumindb': 'Yu Mincho Demibold', + 'yuminl': 'Yu Mincho Light', + + # === KOREAN FONTS === + 'malgun': 'Malgun Gothic', + 'malgunbd': 'Malgun Gothic Bold', + 'malgunsl': 'Malgun Gothic Semilight', + 'gulim': 'Gulim', + 'gulimche': 'GulimChe', + 'dotum': 'Dotum', + 'dotumche': 'DotumChe', + 'batang': 'Batang', + 'batangche': 'BatangChe', + 'gungsuh': 'Gungsuh', + 'gungsuhche': 'GungsuhChe', + + # === CHINESE FONTS === + # Simplified Chinese + 'simsun': 'SimSun', + 'simsunb': 'SimSun Bold', + 'simsunextb': 'SimSun ExtB', + 'nsimsun': 'NSimSun', + 'simhei': 'SimHei', + 'simkai': 'KaiTi', + 'simfang': 'FangSong', + 'simli': 'LiSu', + 'simyou': 'YouYuan', + 'stcaiyun': 'STCaiyun', + 'stfangsong': 'STFangsong', + 'sthupo': 'STHupo', + 'stkaiti': 'STKaiti', + 'stliti': 'STLiti', + 'stsong': 'STSong', + 'stxihei': 'STXihei', + 'stxingkai': 'STXingkai', + 'stxinwei': 'STXinwei', + 'stzhongsong': 'STZhongsong', + + # Traditional Chinese + 'msjh': 'Microsoft JhengHei', + 'msjhbd': 'Microsoft JhengHei Bold', + 'msjhl': 'Microsoft JhengHei Light', + 'mingliu': 'MingLiU', + 'pmingliu': 'PMingLiU', + 'mingliub': 'MingLiU Bold', + 'mingliuhk': 'MingLiU_HKSCS', + 'mingliuextb': 'MingLiU ExtB', + 'pmingliuextb': 'PMingLiU ExtB', + 'mingliuhkextb': 'MingLiU_HKSCS ExtB', + 'kaiu': 'DFKai-SB', + + # Microsoft YaHei + 'msyh': 'Microsoft YaHei', + 'msyhbd': 'Microsoft YaHei Bold', + 'msyhl': 'Microsoft YaHei Light', + + # === THAI FONTS === + 'leelawui': 'Leelawadee UI', + 'leelauib': 'Leelawadee UI Bold', + 'leelauisl': 'Leelawadee UI Semilight', + 'leelawad': 'Leelawadee', + 'leelawdb': 'Leelawadee Bold', + + # === INDIC FONTS === + 'mangal': 'Mangal', + 'vrinda': 'Vrinda', + 'raavi': 'Raavi', + 'shruti': 'Shruti', + 'tunga': 'Tunga', + 'gautami': 'Gautami', + 'kartika': 'Kartika', + 'latha': 'Latha', + 'kalinga': 'Kalinga', + 'vijaya': 'Vijaya', + 'nirmala': 'Nirmala UI', + 'nirmalab': 'Nirmala UI Bold', + 'nirmalas': 'Nirmala UI Semilight', + + # === ARABIC FONTS === + 'arial': 'Arial', + 'trado': 'Traditional Arabic', + 'tradbdo': 'Traditional Arabic Bold', + 'simpo': 'Simplified Arabic', + 'simpbdo': 'Simplified Arabic Bold', + 'simpfxo': 'Simplified Arabic Fixed', + + # === OTHER ASIAN FONTS === + 'javatext': 'Javanese Text', + 'himalaya': 'Microsoft Himalaya', + 'mongolianbaiti': 'Mongolian Baiti', + 'msuighur': 'Microsoft Uighur', + 'msuighub': 'Microsoft Uighur Bold', + 'msyi': 'Microsoft Yi Baiti', + 'taileb': 'Microsoft Tai Le Bold', + 'taile': 'Microsoft Tai Le', + 'ntailu': 'Microsoft New Tai Lue', + 'ntailub': 'Microsoft New Tai Lue Bold', + 'phagspa': 'Microsoft PhagsPa', + 'phagspab': 'Microsoft PhagsPa Bold', + 'mmrtext': 'Myanmar Text', + 'mmrtextb': 'Myanmar Text Bold', + + # === SYMBOL FONTS === + 'symbol': 'Symbol', + 'webdings': 'Webdings', + 'wingding': 'Wingdings', + 'wingdng2': 'Wingdings 2', + 'wingdng3': 'Wingdings 3', + 'mtextra': 'MT Extra', + 'marlett': 'Marlett', + + # === OTHER FONTS === + 'mvboli': 'MV Boli', + 'sylfaen': 'Sylfaen', + 'estrangelo': 'Estrangelo Edessa', + 'euphemia': 'Euphemia', + 'plantagenet': 'Plantagenet Cherokee', + 'micross': 'Microsoft Sans Serif', + + # Franklin Gothic + 'framd': 'Franklin Gothic Medium', + 'framdit': 'Franklin Gothic Medium Italic', + 'fradm': 'Franklin Gothic Demi', + 'fradmcn': 'Franklin Gothic Demi Cond', + 'fradmit': 'Franklin Gothic Demi Italic', + 'frahv': 'Franklin Gothic Heavy', + 'frahvit': 'Franklin Gothic Heavy Italic', + 'frabook': 'Franklin Gothic Book', + 'frabookit': 'Franklin Gothic Book Italic', + + # Cambria + 'cambria': 'Cambria', + 'cambriab': 'Cambria Bold', + 'cambriai': 'Cambria Italic', + 'cambriaz': 'Cambria Bold Italic', + 'cambria&cambria math': 'Cambria Math', + + # Candara + 'candara': 'Candara', + 'candarab': 'Candara Bold', + 'candarai': 'Candara Italic', + 'candaraz': 'Candara Bold Italic', + 'candaral': 'Candara Light', + 'candarali': 'Candara Light Italic', + + # Constantia + 'constan': 'Constantia', + 'constanb': 'Constantia Bold', + 'constani': 'Constantia Italic', + 'constanz': 'Constantia Bold Italic', + + # Corbel + 'corbel': 'Corbel', + 'corbelb': 'Corbel Bold', + 'corbeli': 'Corbel Italic', + 'corbelz': 'Corbel Bold Italic', + 'corbell': 'Corbel Light', + 'corbelli': 'Corbel Light Italic', + + # Bahnschrift + 'bahnschrift': 'Bahnschrift', + + # Garamond + 'gara': 'Garamond', + 'garabd': 'Garamond Bold', + 'garait': 'Garamond Italic', + + # Century Gothic + 'gothic': 'Century Gothic', + 'gothicb': 'Century Gothic Bold', + 'gothici': 'Century Gothic Italic', + 'gothicz': 'Century Gothic Bold Italic', + + # Bookman Old Style + 'bookos': 'Bookman Old Style', + 'bookosb': 'Bookman Old Style Bold', + 'bookosi': 'Bookman Old Style Italic', + 'bookosbi': 'Bookman Old Style Bold Italic', + } + + # Dynamically discover all Windows fonts + windows_fonts = [] + windows_font_dir = "C:/Windows/Fonts" + + if os.path.exists(windows_font_dir): + for font_file in os.listdir(windows_font_dir): + font_path = os.path.join(windows_font_dir, font_file) + + # Check if it's a font file + if os.path.isfile(font_path) and font_file.lower().endswith(('.ttf', '.ttc', '.otf')): + # Get base name without extension + base_name = os.path.splitext(font_file)[0] + base_name_lower = base_name.lower() + + # Check if we have a proper name mapping + if base_name_lower in font_name_map: + display_name = font_name_map[base_name_lower] + else: + # Generic cleanup for unmapped fonts + display_name = base_name.replace('_', ' ').replace('-', ' ') + display_name = ' '.join(word.capitalize() for word in display_name.split()) + + windows_fonts.append((display_name, font_path)) + + # Sort alphabetically + windows_fonts.sort(key=lambda x: x[0]) + + # Add all discovered fonts to the list + for font_name, font_path in windows_fonts: + fonts.append(font_name) + self.font_mapping[font_name] = font_path + + # Check for custom fonts directory (keep your existing code) + script_dir = os.path.dirname(os.path.abspath(__file__)) + fonts_dir = os.path.join(script_dir, "fonts") + + if os.path.exists(fonts_dir): + for root, dirs, files in os.walk(fonts_dir): + for font_file in files: + if font_file.endswith(('.ttf', '.ttc', '.otf')): + font_path = os.path.join(root, font_file) + font_name = os.path.splitext(font_file)[0] + # Add category from folder + category = os.path.basename(root) + if category != "fonts": + font_name = f"{font_name} ({category})" + fonts.append(font_name) + self.font_mapping[font_name] = font_path + + # Load previously saved custom fonts (keep your existing code) + if 'custom_fonts' in self.main_gui.config: + for custom_font in self.main_gui.config['custom_fonts']: + if os.path.exists(custom_font['path']): + # Check if this font is already in the list + if custom_font['name'] not in fonts: + fonts.append(custom_font['name']) + self.font_mapping[custom_font['name']] = custom_font['path'] + + # Add custom fonts option at the end + fonts.append("Browse Custom Font...") + + return fonts + + def _on_font_selected(self): + """Handle font selection - updates font path only, save+apply called by widget""" + if not hasattr(self, 'font_combo'): + return + selected = self.font_combo.currentText() + + if selected == "Default": + self.selected_font_path = None + elif selected == "Browse Custom Font...": + # Open file dialog to select custom font using PySide6 + font_path, _ = QFileDialog.getOpenFileName( + self.dialog if hasattr(self, 'dialog') else None, + "Select Font File", + "", + "Font files (*.ttf *.ttc *.otf);;TrueType fonts (*.ttf);;TrueType collections (*.ttc);;OpenType fonts (*.otf);;All files (*.*)" + ) + + # Check if user selected a file (not cancelled) + if font_path and font_path.strip(): + # Add to combo box + font_name = os.path.basename(font_path) + + # Insert before "Browse Custom Font..." option + if font_name not in [n for n in self.font_mapping.keys()]: + # Add to combo box (PySide6) + self.font_combo.insertItem(self.font_combo.count() - 1, font_name) + self.font_combo.setCurrentText(font_name) + + # Update font mapping + self.font_mapping[font_name] = font_path + self.selected_font_path = font_path + + # Save custom font to config + if 'custom_fonts' not in self.main_gui.config: + self.main_gui.config['custom_fonts'] = [] + + custom_font_entry = {'name': font_name, 'path': font_path} + # Check if this exact entry already exists + font_exists = False + for existing_font in self.main_gui.config['custom_fonts']: + if existing_font['path'] == font_path: + font_exists = True + break + + if not font_exists: + self.main_gui.config['custom_fonts'].append(custom_font_entry) + # Save config immediately to persist custom fonts + if hasattr(self.main_gui, 'save_config'): + self.main_gui.save_config(show_message=False) + else: + # Font already exists, just select it + self.font_combo.setCurrentText(font_name) + self.selected_font_path = self.font_mapping[font_name] + else: + # User cancelled, revert to previous selection + if hasattr(self, 'previous_font_selection'): + self.font_combo.setCurrentText(self.previous_font_selection) + else: + self.font_combo.setCurrentText("Default") + return + else: + # Check if it's in the font mapping + if selected in self.font_mapping: + self.selected_font_path = self.font_mapping[selected] + else: + # This shouldn't happen, but just in case + self.selected_font_path = None + + # Store current selection for next time + self.previous_font_selection = selected + + def _update_opacity_label(self, value): + """Update opacity percentage label and value variable""" + self.bg_opacity_value = int(value) # UPDATE THE VALUE VARIABLE! + percentage = int((float(value) / 255) * 100) + self.opacity_label.setText(f"{percentage}%") + + def _update_reduction_label(self, value): + """Update size reduction percentage label and value variable""" + self.bg_reduction_value = float(value) # UPDATE THE VALUE VARIABLE! + percentage = int(float(value) * 100) + self.reduction_label.setText(f"{percentage}%") + + def _toggle_inpaint_quality_visibility(self): + """Show/hide inpaint quality options based on skip_inpainting setting""" + if hasattr(self, 'inpaint_quality_frame'): + if self.skip_inpainting_value: + # Hide quality options when inpainting is skipped + self.inpaint_quality_frame.hide() + else: + # Show quality options when inpainting is enabled + self.inpaint_quality_frame.show() + + def _toggle_inpaint_visibility(self): + """Show/hide inpainting options based on skip toggle""" + # Update the value from the checkbox + self.skip_inpainting_value = self.skip_inpainting_checkbox.isChecked() + + if self.skip_inpainting_value: + # Hide all inpainting options + self.inpaint_method_frame.hide() + self.cloud_inpaint_frame.hide() + self.local_inpaint_frame.hide() + self.inpaint_separator.hide() # Hide separator + else: + # Show method selection + self.inpaint_method_frame.show() + self.inpaint_separator.show() # Show separator + self._on_inpaint_method_change() + + # Don't save during initialization + if not (hasattr(self, '_initializing') and self._initializing): + self._save_rendering_settings() + + def _on_inpaint_method_change(self): + """Show appropriate inpainting settings based on method""" + # Determine current method from radio buttons + if self.cloud_radio.isChecked(): + method = 'cloud' + elif self.local_radio.isChecked(): + method = 'local' + elif self.hybrid_radio.isChecked(): + method = 'hybrid' + else: + method = 'local' # Default fallback + + # Update the stored value + self.inpaint_method_value = method + + if method == 'cloud': + self.cloud_inpaint_frame.show() + self.local_inpaint_frame.hide() + elif method == 'local': + self.local_inpaint_frame.show() + self.cloud_inpaint_frame.hide() + elif method == 'hybrid': + # Show both frames for hybrid + self.local_inpaint_frame.show() + self.cloud_inpaint_frame.show() + + # Don't save during initialization + if not (hasattr(self, '_initializing') and self._initializing): + self._save_rendering_settings() + + def _on_local_model_change(self, new_model_type=None): + """Handle model type change and auto-load if model exists""" + # Get model type from combo box (PySide6) + if new_model_type is None: + model_type = self.local_model_combo.currentText() + else: + model_type = new_model_type + + # Update stored value + self.local_model_type_value = model_type + + # Update description + model_desc = { + 'lama': 'LaMa (Best quality)', + 'aot': 'AOT GAN (Fast)', + 'aot_onnx': 'AOT ONNX (Optimized)', + 'mat': 'MAT (High-res)', + 'sd_local': 'Stable Diffusion (Anime)', + 'anime': 'Anime/Manga Inpainting', + 'anime_onnx': 'Anime ONNX (Fast/Optimized)', + 'lama_onnx': 'LaMa ONNX (Optimized)', + } + self.model_desc_label.setText(model_desc.get(model_type, '')) + + # Check for saved path for this model type + saved_path = self.main_gui.config.get(f'manga_{model_type}_model_path', '') + + if saved_path and os.path.exists(saved_path): + # Update the path display + self.local_model_entry.setText(saved_path) + self.local_model_path_value = saved_path + self.local_model_status_label.setText("⏳ Loading saved model...") + self.local_model_status_label.setStyleSheet("color: orange;") + + # Auto-load the model after a short delay using QTimer + from PySide6.QtCore import QTimer + QTimer.singleShot(100, lambda: self._try_load_model(model_type, saved_path)) + else: + # Clear the path display + self.local_model_entry.setText("") + self.local_model_path_value = "" + self.local_model_status_label.setText("No model loaded") + self.local_model_status_label.setStyleSheet("color: gray;") + + self._save_rendering_settings() + + def _browse_local_model(self): + """Browse for local inpainting model and auto-load""" + from PySide6.QtWidgets import QFileDialog + from PySide6.QtCore import QTimer + + model_type = self.local_model_type_value + + if model_type == 'sd_local': + filter_str = "Model files (*.safetensors *.pt *.pth *.ckpt *.onnx);;SafeTensors (*.safetensors);;Checkpoint files (*.ckpt);;PyTorch models (*.pt *.pth);;ONNX models (*.onnx);;All files (*.*)" + else: + filter_str = "Model files (*.pt *.pth *.ckpt *.onnx);;Checkpoint files (*.ckpt);;PyTorch models (*.pt *.pth);;ONNX models (*.onnx);;All files (*.*)" + + path, _ = QFileDialog.getOpenFileName( + self.dialog, + f"Select {model_type.upper()} Model", + "", + filter_str + ) + + if path: + self.local_model_entry.setText(path) + self.local_model_path_value = path + # Save to config + self.main_gui.config[f'manga_{model_type}_model_path'] = path + self._save_rendering_settings() + + # Update status first + self._update_local_model_status() + + # Auto-load the selected model using QTimer + QTimer.singleShot(100, lambda: self._try_load_model(model_type, path)) + + def _click_load_local_model(self): + """Manually trigger loading of the selected local inpainting model""" + from PySide6.QtWidgets import QMessageBox + from PySide6.QtCore import QTimer + + try: + model_type = self.local_model_type_value if hasattr(self, 'local_model_type_value') else None + path = self.local_model_path_value if hasattr(self, 'local_model_path_value') else '' + if not model_type or not path: + QMessageBox.information(self.dialog, "Load Model", "Please select a model file first using the Browse button.") + return + # Defer to keep UI responsive using QTimer + QTimer.singleShot(50, lambda: self._try_load_model(model_type, path)) + except Exception: + pass + + def _try_load_model(self, method: str, model_path: str): + """Try to load a model and update status (runs loading on a background thread).""" + from PySide6.QtCore import QTimer, QMetaObject, Qt, Q_ARG, QThread + from PySide6.QtWidgets import QApplication + + try: + # Show loading status immediately + self.local_model_status_label.setText("⏳ Loading model...") + self.local_model_status_label.setStyleSheet("color: orange;") + QApplication.processEvents() # Process pending events to update UI + self.main_gui.append_log(f"⏳ Loading {method.upper()} model...") + + def do_load(): + from local_inpainter import LocalInpainter + success = False + try: + test_inpainter = LocalInpainter() + success = test_inpainter.load_model_with_retry(method, model_path, force_reload=True) + print(f"DEBUG: Model loading completed, success={success}") + except Exception as e: + print(f"DEBUG: Model loading exception: {e}") + self.main_gui.append_log(f"❌ Error loading model: {e}") + success = False + + # Update UI directly from thread (works in PySide6/Qt6) + print(f"DEBUG: Updating UI, success={success}") + try: + if success: + self.local_model_status_label.setText(f"✅ {method.upper()} model ready") + self.local_model_status_label.setStyleSheet("color: green;") + self.main_gui.append_log(f"✅ {method.upper()} model loaded successfully!") + if hasattr(self, 'translator') and self.translator: + for attr in ('local_inpainter', '_last_local_method', '_last_local_model_path'): + if hasattr(self.translator, attr): + try: + delattr(self.translator, attr) + except Exception: + pass + else: + self.local_model_status_label.setText("⚠️ Model file found but failed to load") + self.local_model_status_label.setStyleSheet("color: orange;") + self.main_gui.append_log("⚠️ Model file found but failed to load") + print(f"DEBUG: UI update completed") + except Exception as e: + print(f"ERROR updating UI after load: {e}") + import traceback + traceback.print_exc() + + # Fire background loader + threading.Thread(target=do_load, daemon=True).start() + return True + except Exception as e: + try: + self.local_model_status_label.setText(f"❌ Error: {str(e)[:50]}") + self.local_model_status_label.setStyleSheet("color: red;") + except Exception: + pass + self.main_gui.append_log(f"❌ Error loading model: {e}") + return False + + def _update_local_model_status(self): + """Update local model status display""" + path = self.local_model_path_value if hasattr(self, 'local_model_path_value') else '' + + if not path: + self.local_model_status_label.setText("⚠️ No model selected") + self.local_model_status_label.setStyleSheet("color: orange;") + return + + if not os.path.exists(path): + self.local_model_status_label.setText("❌ Model file not found") + self.local_model_status_label.setStyleSheet("color: red;") + return + + # Check for ONNX cache + if path.endswith(('.pt', '.pth', '.safetensors')): + onnx_dir = os.path.join(os.path.dirname(path), 'models') + if os.path.exists(onnx_dir): + # Check if ONNX file exists for this model + model_hash = hashlib.md5(path.encode()).hexdigest()[:8] + onnx_files = [f for f in os.listdir(onnx_dir) if model_hash in f] + if onnx_files: + self.local_model_status_label.setText("✅ Model ready (ONNX cached)") + self.local_model_status_label.setStyleSheet("color: green;") + else: + self.local_model_status_label.setText("ℹ️ Will convert to ONNX on first use") + self.local_model_status_label.setStyleSheet("color: blue;") + else: + self.local_model_status_label.setText("ℹ️ Will convert to ONNX on first use") + self.local_model_status_label.setStyleSheet("color: blue;") + else: + self.local_model_status_label.setText("✅ ONNX model ready") + self.local_model_status_label.setStyleSheet("color: green;") + + def _download_model(self): + """Actually download the model for the selected type""" + from PySide6.QtWidgets import QMessageBox + + model_type = self.local_model_type_value + + # Define URLs for each model type + model_urls = { + 'aot': 'https://huggingface.co/ogkalu/aot-inpainting-jit/resolve/main/aot_traced.pt', + 'aot_onnx': 'https://huggingface.co/ogkalu/aot-inpainting/resolve/main/aot.onnx', + 'lama': 'https://github.com/Sanster/models/releases/download/add_big_lama/big-lama.pt', + 'lama_onnx': 'https://huggingface.co/Carve/LaMa-ONNX/resolve/main/lama_fp32.onnx', + 'anime': 'https://github.com/Sanster/models/releases/download/AnimeMangaInpainting/anime-manga-big-lama.pt', + 'anime_onnx': 'https://huggingface.co/ogkalu/lama-manga-onnx-dynamic/resolve/main/lama-manga-dynamic.onnx', + 'mat': '', # User must provide + 'ollama': '', # Not applicable + 'sd_local': '' # User must provide + } + + url = model_urls.get(model_type, '') + + if not url: + QMessageBox.information(self.dialog, "Manual Download", + f"Please manually download and browse for {model_type} model") + return + + # Determine filename + filename_map = { + 'aot': 'aot_traced.pt', + 'aot_onnx': 'aot.onnx', + 'lama': 'big-lama.pt', + 'anime': 'anime-manga-big-lama.pt', + 'anime_onnx': 'lama-manga-dynamic.onnx', + 'lama_onnx': 'lama_fp32.onnx', + 'fcf_onnx': 'fcf.onnx', + 'sd_inpaint_onnx': 'sd_inpaint_unet.onnx' + } + + filename = filename_map.get(model_type, f'{model_type}.pt') + save_path = os.path.join('models', filename) + + # Create models directory + os.makedirs('models', exist_ok=True) + + # Check if already exists + if os.path.exists(save_path): + self.local_model_entry.setText(save_path) + self.local_model_path_value = save_path + self.local_model_status_label.setText("✅ Model already downloaded") + self.local_model_status_label.setStyleSheet("color: green;") + QMessageBox.information(self.dialog, "Model Ready", f"Model already exists at:\n{save_path}") + return + + # Download the model + self._perform_download(url, save_path, model_type) + + def _perform_download(self, url: str, save_path: str, model_name: str): + """Perform the actual download with progress indication""" + import threading + import requests + from PySide6.QtWidgets import QDialog, QVBoxLayout, QLabel, QProgressBar, QPushButton + from PySide6.QtCore import Qt, QTimer + from PySide6.QtGui import QIcon + + # Create a progress dialog + progress_dialog = QDialog(self.dialog) + progress_dialog.setWindowTitle(f"Downloading {model_name.upper()} Model") + progress_dialog.setFixedSize(400, 150) + progress_dialog.setModal(True) + + # Set window icon + icon_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'halgakos.ico') + if os.path.exists(icon_path): + progress_dialog.setWindowIcon(QIcon(icon_path)) + + layout = QVBoxLayout(progress_dialog) + + # Progress label + progress_label = QLabel("⏳ Downloading...") + progress_label.setAlignment(Qt.AlignCenter) + layout.addWidget(progress_label) + + # Progress bar + progress_bar = QProgressBar() + progress_bar.setMinimum(0) + progress_bar.setMaximum(100) + progress_bar.setValue(0) + layout.addWidget(progress_bar) + + # Status label + status_label = QLabel("0%") + status_label.setAlignment(Qt.AlignCenter) + layout.addWidget(status_label) + + # Cancel flag + cancel_download = {'value': False} + + def on_cancel(): + cancel_download['value'] = True + progress_dialog.close() + + progress_dialog.closeEvent = lambda event: on_cancel() + + def download_thread(): + import time + try: + # Download with progress and speed tracking + response = requests.get(url, stream=True, timeout=30) + response.raise_for_status() + + total_size = int(response.headers.get('content-length', 0)) + downloaded = 0 + start_time = time.time() + last_update = start_time + + with open(save_path, 'wb') as f: + for chunk in response.iter_content(chunk_size=8192): + if cancel_download['value']: + # Clean up partial file + f.close() + if os.path.exists(save_path): + os.remove(save_path) + return + + if chunk: + f.write(chunk) + downloaded += len(chunk) + + # Update progress (throttle updates to every 0.1 seconds) + current_time = time.time() + if total_size > 0 and (current_time - last_update > 0.1): + last_update = current_time + elapsed = current_time - start_time + speed = downloaded / elapsed if elapsed > 0 else 0 + speed_mb = speed / (1024 * 1024) + progress = (downloaded / total_size) * 100 + + # Direct widget updates work in PySide6 from threads + try: + progress_bar.setValue(int(progress)) + status_label.setText(f"{progress:.1f}% - {speed_mb:.2f} MB/s") + progress_label.setText(f"⏳ Downloading... {downloaded//1024//1024}MB / {total_size//1024//1024}MB") + except RuntimeError: + # Widget was destroyed, exit + cancel_download['value'] = True + return + + # Success - direct call (works in PySide6/Qt6) + try: + progress_dialog.close() + self._download_complete(save_path, model_name) + except Exception as e: + print(f"Error in download completion: {e}") + + except requests.exceptions.RequestException as e: + # Error - direct call + if not cancel_download['value']: + try: + progress_dialog.close() + self._download_failed(str(e)) + except Exception as ex: + print(f"Error handling download failure: {ex}") + except Exception as e: + if not cancel_download['value']: + try: + progress_dialog.close() + self._download_failed(str(e)) + except Exception as ex: + print(f"Error handling download failure: {ex}") + + # Start download in background thread + thread = threading.Thread(target=download_thread, daemon=True) + thread.start() + + # Show dialog + progress_dialog.exec() + + def _download_complete(self, save_path: str, model_name: str): + """Handle successful download""" + from PySide6.QtWidgets import QMessageBox + + # Update the model path entry + self.local_model_entry.setText(save_path) + self.local_model_path_value = save_path + + # Save to config + self.main_gui.config[f'manga_{model_name}_model_path'] = save_path + self._save_rendering_settings() + + # Log to main GUI + self.main_gui.append_log(f"✅ Downloaded {model_name} model to: {save_path}") + + # Auto-load the downloaded model (direct call) + self.local_model_status_label.setText("⏳ Loading downloaded model...") + self.local_model_status_label.setStyleSheet("color: orange;") + + # Try to load immediately + if self._try_load_model(model_name, save_path): + QMessageBox.information(self.dialog, "Success", f"{model_name.upper()} model downloaded and loaded!") + else: + QMessageBox.information(self.dialog, "Download Complete", f"{model_name.upper()} model downloaded but needs manual loading") + + def _download_failed(self, error: str): + """Handle download failure""" + from PySide6.QtWidgets import QMessageBox + + QMessageBox.critical(self.dialog, "Download Failed", f"Failed to download model:\n{error}") + self.main_gui.append_log(f"❌ Model download failed: {error}") + + def _show_model_info(self): + """Show information about models""" + model_type = self.local_model_type_value + + info = { + 'aot': "AOT GAN Model:\n\n" + "• Auto-downloads from HuggingFace\n" + "• Traced PyTorch JIT model\n" + "• Good for general inpainting\n" + "• Fast processing speed\n" + "• File size: ~100MB", + + 'aot_onnx': "AOT ONNX Model:\n\n" + "• Optimized ONNX version\n" + "• Auto-downloads from HuggingFace\n" + "• 2-3x faster than PyTorch version\n" + "• Great for batch processing\n" + "• Lower memory usage\n" + "• File size: ~100MB", + + 'lama': "LaMa Model:\n\n" + "• Auto-downloads anime-optimized version\n" + "• Best quality for manga/anime\n" + "• Large model (~200MB)\n" + "• Excellent at removing text from bubbles\n" + "• Preserves art style well", + + 'anime': "Anime-Specific Model:\n\n" + "• Same as LaMa anime version\n" + "• Optimized for manga/anime art\n" + "• Auto-downloads from GitHub\n" + "• Recommended for manga translation\n" + "• Preserves screen tones and patterns", + + 'anime_onnx': "Anime ONNX Model:\n\n" + "• Optimized ONNX version for speed\n" + "• Auto-downloads from HuggingFace\n" + "• 2-3x faster than PyTorch version\n" + "• Perfect for batch processing\n" + "• Same quality as anime model\n" + "• File size: ~190MB\n" + "• DEFAULT for inpainting", + + 'mat': "MAT Model:\n\n" + "• Manual download required\n" + "• Get from: github.com/fenglinglwb/MAT\n" + "• Good for high-resolution images\n" + "• Slower but high quality\n" + "• File size: ~500MB", + + 'ollama': "Ollama:\n\n" + "• Uses local Ollama server\n" + "• No model download needed here\n" + "• Run: ollama pull llava\n" + "• Context-aware inpainting\n" + "• Requires Ollama running locally", + + 'sd_local': "Stable Diffusion:\n\n" + "• Manual download required\n" + "• Get from HuggingFace\n" + "• Requires significant VRAM (4-8GB)\n" + "• Best quality but slowest\n" + "• Can use custom prompts" + } + + from PySide6.QtWidgets import QDialog, QVBoxLayout, QTextEdit, QPushButton + from PySide6.QtCore import Qt + + # Create info dialog + info_dialog = QDialog(self.dialog) + info_dialog.setWindowTitle(f"{model_type.upper()} Model Information") + info_dialog.setFixedSize(450, 350) + info_dialog.setModal(True) + + layout = QVBoxLayout(info_dialog) + + # Info text + text_widget = QTextEdit() + text_widget.setReadOnly(True) + text_widget.setPlainText(info.get(model_type, "Please select a model type first")) + layout.addWidget(text_widget) + + # Close button + close_btn = QPushButton("Close") + close_btn.clicked.connect(info_dialog.close) + close_btn.setStyleSheet("QPushButton { background-color: #6c757d; color: white; padding: 5px 15px; }") + layout.addWidget(close_btn) + + info_dialog.exec() + + def _toggle_inpaint_controls_visibility(self): + """Toggle visibility of inpaint controls (mask expansion and passes) based on skip inpainting setting""" + # Just return if the frame doesn't exist - prevents AttributeError + if not hasattr(self, 'inpaint_controls_frame'): + return + + if self.skip_inpainting_value: + self.inpaint_controls_frame.hide() + else: + # Show it back + self.inpaint_controls_frame.show() + + def _configure_inpaint_api(self): + """Configure cloud inpainting API""" + from PySide6.QtWidgets import QMessageBox, QDialog, QVBoxLayout, QHBoxLayout, QLabel, QLineEdit, QPushButton + from PySide6.QtCore import Qt + import webbrowser + + # Show instructions + result = QMessageBox.question( + self.dialog, + "Configure Cloud Inpainting", + "Cloud inpainting uses Replicate API for questionable results.\n\n" + "1. Go to replicate.com and sign up (free tier available?)\n" + "2. Get your API token from Account Settings\n" + "3. Enter it here\n\n" + "Pricing: ~$0.0023 per image?\n" + "Free tier: ~100 images per month?\n\n" + "Would you like to proceed?", + QMessageBox.Yes | QMessageBox.No + ) + + if result != QMessageBox.Yes: + return + + # Open Replicate page + webbrowser.open("https://replicate.com/account/api-tokens") + + # Create API key input dialog + api_dialog = QDialog(self.dialog) + api_dialog.setWindowTitle("Replicate API Key") + api_dialog.setFixedSize(400, 150) + api_dialog.setModal(True) + + layout = QVBoxLayout(api_dialog) + layout.setContentsMargins(20, 20, 20, 20) + + # Label + label = QLabel("Enter your Replicate API key:") + layout.addWidget(label) + + # Entry with show/hide + entry_layout = QHBoxLayout() + entry = QLineEdit() + entry.setEchoMode(QLineEdit.Password) + entry_layout.addWidget(entry) + + # Toggle show/hide + show_btn = QPushButton("Show") + show_btn.setFixedWidth(60) + def toggle_show(): + if entry.echoMode() == QLineEdit.Password: + entry.setEchoMode(QLineEdit.Normal) + show_btn.setText("Hide") + else: + entry.setEchoMode(QLineEdit.Password) + show_btn.setText("Show") + show_btn.clicked.connect(toggle_show) + entry_layout.addWidget(show_btn) + + layout.addLayout(entry_layout) + + # Buttons + btn_layout = QHBoxLayout() + btn_layout.addStretch() + + cancel_btn = QPushButton("Cancel") + cancel_btn.clicked.connect(api_dialog.reject) + btn_layout.addWidget(cancel_btn) + + ok_btn = QPushButton("OK") + ok_btn.setStyleSheet("QPushButton { background-color: #28a745; color: white; padding: 5px 15px; }") + ok_btn.clicked.connect(api_dialog.accept) + btn_layout.addWidget(ok_btn) + + layout.addLayout(btn_layout) + + # Focus and key bindings + entry.setFocus() + + # Execute dialog + if api_dialog.exec() == QDialog.Accepted: + api_key = entry.text().strip() + + if api_key: + try: + # Save the API key + self.main_gui.config['replicate_api_key'] = api_key + self.main_gui.save_config(show_message=False) + + # Update UI + self.inpaint_api_status_label.setText("✅ Cloud inpainting configured") + self.inpaint_api_status_label.setStyleSheet("color: green;") + + # Set flag on translator + if self.translator: + self.translator.use_cloud_inpainting = True + self.translator.replicate_api_key = api_key + + self._log("✅ Cloud inpainting API configured", "success") + + except Exception as e: + QMessageBox.critical(self.dialog, "Error", f"Failed to save API key:\n{str(e)}") + + def _clear_inpaint_api(self): + """Clear the inpainting API configuration""" + self.main_gui.config['replicate_api_key'] = '' + self.main_gui.save_config(show_message=False) + + self.inpaint_api_status_label.setText("❌ Inpainting API not configured") + self.inpaint_api_status_label.setStyleSheet("color: red;") + + if hasattr(self, 'translator') and self.translator: + self.translator.use_cloud_inpainting = False + self.translator.replicate_api_key = None + + self._log("🗑️ Cleared inpainting API configuration", "info") + + # Note: Clear button management would need to be handled differently in PySide6 + # For now, we'll skip automatic button removal + + def _add_files(self): + """Add image files (and CBZ archives) to the list""" + from PySide6.QtWidgets import QFileDialog + + files, _ = QFileDialog.getOpenFileNames( + self.dialog, + "Select Manga Images or CBZ", + "", + "Images / CBZ (*.png *.jpg *.jpeg *.gif *.bmp *.webp *.cbz);;Image files (*.png *.jpg *.jpeg *.gif *.bmp *.webp);;Comic Book Zip (*.cbz);;All files (*.*)" + ) + + if not files: + return + + # Ensure temp root for CBZ extraction lives for the session + cbz_temp_root = getattr(self, 'cbz_temp_root', None) + if cbz_temp_root is None: + try: + import tempfile + cbz_temp_root = tempfile.mkdtemp(prefix='glossarion_cbz_') + self.cbz_temp_root = cbz_temp_root + except Exception: + cbz_temp_root = None + + for path in files: + lower = path.lower() + if lower.endswith('.cbz'): + # Extract images from CBZ and add them in natural sort order + try: + import zipfile, shutil + base = os.path.splitext(os.path.basename(path))[0] + extract_dir = os.path.join(self.cbz_temp_root or os.path.dirname(path), base) + os.makedirs(extract_dir, exist_ok=True) + with zipfile.ZipFile(path, 'r') as zf: + # Extract all to preserve subfolders and avoid name collisions + zf.extractall(extract_dir) + # Initialize CBZ job tracking + if not hasattr(self, 'cbz_jobs'): + self.cbz_jobs = {} + if not hasattr(self, 'cbz_image_to_job'): + self.cbz_image_to_job = {} + # Prepare output dir next to source CBZ + out_dir = os.path.join(os.path.dirname(path), f"{base}_translated") + self.cbz_jobs[path] = { + 'extract_dir': extract_dir, + 'out_dir': out_dir, + } + # Collect all images recursively from extract_dir + added = 0 + for root, _, files_in_dir in os.walk(extract_dir): + for fn in sorted(files_in_dir): + if fn.lower().endswith(('.png', '.jpg', '.jpeg', '.webp', '.bmp', '.gif')): + target_path = os.path.join(root, fn) + if target_path not in self.selected_files: + self.selected_files.append(target_path) + self.file_listbox.addItem(os.path.basename(target_path)) + added += 1 + # Map extracted image to its CBZ job + self.cbz_image_to_job[target_path] = path + self._log(f"📦 Added {added} images from CBZ: {os.path.basename(path)}", "info") + except Exception as e: + self._log(f"❌ Failed to read CBZ {os.path.basename(path)}: {e}", "error") + else: + if path not in self.selected_files: + self.selected_files.append(path) + self.file_listbox.addItem(os.path.basename(path)) + + def _add_folder(self): + """Add all images (and CBZ archives) from a folder""" + from PySide6.QtWidgets import QFileDialog + + folder = QFileDialog.getExistingDirectory( + self.dialog, + "Select Folder with Manga Images or CBZ" + ) + if not folder: + return + + # Extensions + image_extensions = {'.png', '.jpg', '.jpeg', '.gif', '.bmp', '.webp'} + cbz_ext = '.cbz' + + # Ensure temp root for CBZ extraction lives for the session + cbz_temp_root = getattr(self, 'cbz_temp_root', None) + if cbz_temp_root is None: + try: + import tempfile + cbz_temp_root = tempfile.mkdtemp(prefix='glossarion_cbz_') + self.cbz_temp_root = cbz_temp_root + except Exception: + cbz_temp_root = None + + for filename in sorted(os.listdir(folder)): + filepath = os.path.join(folder, filename) + if not os.path.isfile(filepath): + continue + lower = filename.lower() + if any(lower.endswith(ext) for ext in image_extensions): + if filepath not in self.selected_files: + self.selected_files.append(filepath) + self.file_listbox.addItem(filename) + elif lower.endswith(cbz_ext): + # Extract images from CBZ archive + try: + import zipfile, shutil + base = os.path.splitext(os.path.basename(filepath))[0] + extract_dir = os.path.join(self.cbz_temp_root or folder, base) + os.makedirs(extract_dir, exist_ok=True) + with zipfile.ZipFile(filepath, 'r') as zf: + zf.extractall(extract_dir) + # Initialize CBZ job tracking + if not hasattr(self, 'cbz_jobs'): + self.cbz_jobs = {} + if not hasattr(self, 'cbz_image_to_job'): + self.cbz_image_to_job = {} + # Prepare output dir next to source CBZ + out_dir = os.path.join(os.path.dirname(filepath), f"{base}_translated") + self.cbz_jobs[filepath] = { + 'extract_dir': extract_dir, + 'out_dir': out_dir, + } + # Collect all images recursively + added = 0 + for root, _, files_in_dir in os.walk(extract_dir): + for fn in sorted(files_in_dir): + if fn.lower().endswith(tuple(image_extensions)): + target_path = os.path.join(root, fn) + if target_path not in self.selected_files: + self.selected_files.append(target_path) + self.file_listbox.addItem(os.path.basename(target_path)) + added += 1 + # Map extracted image to its CBZ job + self.cbz_image_to_job[target_path] = filepath + self._log(f"📦 Added {added} images from CBZ: {filename}", "info") + except Exception as e: + self._log(f"❌ Failed to read CBZ {filename}: {e}", "error") + + def _remove_selected(self): + """Remove selected files from the list""" + selected_items = self.file_listbox.selectedItems() + + if not selected_items: + return + + # Remove in reverse order to maintain indices + for item in selected_items: + row = self.file_listbox.row(item) + self.file_listbox.takeItem(row) + if 0 <= row < len(self.selected_files): + del self.selected_files[row] + + def _clear_all(self): + """Clear all files from the list""" + self.file_listbox.clear() + self.selected_files.clear() + + def _finalize_cbz_jobs(self): + """Package translated outputs back into .cbz for each imported CBZ. + - Always creates a CLEAN archive with only final translated pages. + - If save_intermediate is enabled in settings, also creates a DEBUG archive that + contains the same final pages at root plus debug/raw artifacts under subfolders. + """ + try: + if not hasattr(self, 'cbz_jobs') or not self.cbz_jobs: + return + import zipfile + # Read debug flag from settings + save_debug = False + try: + save_debug = bool(self.main_gui.config.get('manga_settings', {}).get('advanced', {}).get('save_intermediate', False)) + except Exception: + save_debug = False + image_exts = ('.png', '.jpg', '.jpeg', '.webp', '.bmp', '.gif') + text_exts = ('.txt', '.json', '.csv', '.log') + excluded_patterns = ('_mask', '_overlay', '_debug', '_raw', '_ocr', '_regions', '_chunk', '_clean', '_cleaned', '_inpaint', '_inpainted') + + for cbz_path, job in self.cbz_jobs.items(): + out_dir = job.get('out_dir') + if not out_dir or not os.path.isdir(out_dir): + continue + parent = os.path.dirname(cbz_path) + base = os.path.splitext(os.path.basename(cbz_path))[0] + + # Compute original basenames from extracted images mapping + original_basenames = set() + try: + if hasattr(self, 'cbz_image_to_job'): + for img_path, job_path in self.cbz_image_to_job.items(): + if job_path == cbz_path: + original_basenames.add(os.path.basename(img_path)) + except Exception: + pass + + # Helper to iterate files in out_dir + all_files = [] + for root, _, files in os.walk(out_dir): + for fn in files: + fp = os.path.join(root, fn) + rel = os.path.relpath(fp, out_dir) + all_files.append((fp, rel, fn)) + + # 1) CLEAN ARCHIVE: only final images matching original basenames + clean_zip = os.path.join(parent, f"{base}_translated.cbz") + clean_count = 0 + with zipfile.ZipFile(clean_zip, 'w', zipfile.ZIP_DEFLATED) as zf: + for fp, rel, fn in all_files: + fn_lower = fn.lower() + if not fn_lower.endswith(image_exts): + continue + if original_basenames and fn not in original_basenames: + # Only include pages corresponding to original entries + continue + # Also skip obvious debug artifacts by pattern (extra safeguard) + if any(p in fn_lower for p in excluded_patterns): + continue + zf.write(fp, fn) # place at root with page filename + clean_count += 1 + self._log(f"📦 Compiled CLEAN {clean_count} pages into {os.path.basename(clean_zip)}", "success") + + # 2) DEBUG ARCHIVE: include final pages + extras under subfolders + if save_debug: + debug_zip = os.path.join(parent, f"{base}_translated_debug.cbz") + dbg_count = 0 + raw_count = 0 + page_count = 0 + with zipfile.ZipFile(debug_zip, 'w', zipfile.ZIP_DEFLATED) as zf: + for fp, rel, fn in all_files: + fn_lower = fn.lower() + # Final pages at root + if fn_lower.endswith(image_exts) and (not original_basenames or fn in original_basenames) and not any(p in fn_lower for p in excluded_patterns): + zf.write(fp, fn) + page_count += 1 + continue + # Raw text/logs + if fn_lower.endswith(text_exts): + zf.write(fp, os.path.join('raw', rel)) + raw_count += 1 + continue + # Other images or artifacts -> debug/ + zf.write(fp, os.path.join('debug', rel)) + dbg_count += 1 + self._log(f"📦 Compiled DEBUG archive: pages={page_count}, debug_files={dbg_count}, raw={raw_count} -> {os.path.basename(debug_zip)}", "info") + except Exception as e: + self._log(f"⚠️ Failed to compile CBZ packages: {e}", "warning") + + def _attach_logging_bridge(self): + """Attach a root logging handler that forwards records into the GUI log.""" + try: + if getattr(self, '_gui_log_handler', None) is None: + handler = _MangaGuiLogHandler(self, level=logging.INFO) + root_logger = logging.getLogger() + # Avoid duplicates + if all(not isinstance(h, _MangaGuiLogHandler) for h in root_logger.handlers): + root_logger.addHandler(handler) + self._gui_log_handler = handler + # Ensure common module loggers propagate + for name in ['bubble_detector', 'local_inpainter', 'manga_translator']: + try: + lg = logging.getLogger(name) + lg.setLevel(logging.INFO) + lg.propagate = True + except Exception: + pass + except Exception: + pass + + def _redirect_stderr(self, enable: bool): + """Temporarily redirect stderr to the GUI log (captures tqdm/HF progress).""" + try: + if enable: + if not hasattr(self, '_old_stderr') or self._old_stderr is None: + self._old_stderr = sys.stderr + sys.stderr = _StreamToGuiLog(lambda s: self._log(s, 'info')) + self._stderr_redirect_on = True + else: + if hasattr(self, '_old_stderr') and self._old_stderr is not None: + sys.stderr = self._old_stderr + self._old_stderr = None + self._stderr_redirect_on = False + # Update combined flag to avoid double-forwarding with logging handler + self._stdio_redirect_active = bool(self._stdout_redirect_on or self._stderr_redirect_on) + except Exception: + pass + + def _redirect_stdout(self, enable: bool): + """Temporarily redirect stdout to the GUI log.""" + try: + if enable: + if not hasattr(self, '_old_stdout') or self._old_stdout is None: + self._old_stdout = sys.stdout + sys.stdout = _StreamToGuiLog(lambda s: self._log(s, 'info')) + self._stdout_redirect_on = True + else: + if hasattr(self, '_old_stdout') and self._old_stdout is not None: + sys.stdout = self._old_stdout + self._old_stdout = None + self._stdout_redirect_on = False + # Update combined flag to avoid double-forwarding with logging handler + self._stdio_redirect_active = bool(self._stdout_redirect_on or self._stderr_redirect_on) + except Exception: + pass + + def _log(self, message: str, level: str = "info"): + """Log message to GUI text widget or console with enhanced stop suppression""" + # Enhanced stop suppression - allow only essential stop confirmation messages + if self._is_stop_requested() or self.is_globally_cancelled(): + # Only allow very specific stop confirmation messages - nothing else + essential_stop_keywords = [ + "⏹️ Translation stopped by user", + "🧹 Cleaning up models to free RAM", + "✅ Model cleanup complete - RAM should be freed", + "✅ All models cleaned up - RAM freed!" + ] + # Suppress ALL other messages when stopped - be very restrictive + if not any(keyword in message for keyword in essential_stop_keywords): + return + + # Lightweight deduplication: ignore identical lines within a short interval + try: + now = time.time() + last_msg = getattr(self, '_last_log_msg', None) + last_ts = getattr(self, '_last_log_time', 0) + if last_msg == message and (now - last_ts) < 0.7: + return + except Exception: + pass + + # Store in persistent log (thread-safe) + try: + with MangaTranslationTab._persistent_log_lock: + # Keep only last 1000 messages to avoid unbounded growth + if len(MangaTranslationTab._persistent_log) >= 1000: + MangaTranslationTab._persistent_log.pop(0) + MangaTranslationTab._persistent_log.append((message, level)) + except Exception: + pass + + # Check if log_text widget exists yet + if hasattr(self, 'log_text') and self.log_text: + # Thread-safe logging to GUI + if threading.current_thread() == threading.main_thread(): + # We're in the main thread, update directly + try: + # PySide6 QTextEdit - append with color + color_map = { + 'info': 'white', + 'success': 'green', + 'warning': 'orange', + 'error': 'red', + 'debug': 'lightblue' + } + color = color_map.get(level, 'white') + self.log_text.setTextColor(QColor(color)) + self.log_text.append(message) + except Exception: + pass + else: + # We're in a background thread, use queue + self.update_queue.put(('log', message, level)) + else: + # Widget doesn't exist yet or we're in initialization, print to console + print(message) + + # Update deduplication state + try: + self._last_log_msg = message + self._last_log_time = time.time() + except Exception: + pass + + def _update_progress(self, current: int, total: int, status: str): + """Thread-safe progress update""" + self.update_queue.put(('progress', current, total, status)) + + def _update_current_file(self, filename: str): + """Thread-safe current file update""" + self.update_queue.put(('current_file', filename)) + + def _start_startup_heartbeat(self): + """Show a small spinner in the progress label during startup so there is no silence.""" + try: + self._startup_heartbeat_running = True + self._heartbeat_idx = 0 + chars = ['|', '/', '-', '\\'] + def tick(): + if not getattr(self, '_startup_heartbeat_running', False): + return + try: + c = chars[self._heartbeat_idx % len(chars)] + if hasattr(self, 'progress_label'): + self.progress_label.setText(f"Starting… {c}") + self.progress_label.setStyleSheet("color: white;") + # Force update to ensure it's visible + from PySide6.QtWidgets import QApplication + QApplication.processEvents() + except Exception: + pass + self._heartbeat_idx += 1 + # Schedule next tick with QTimer - only if still running + if getattr(self, '_startup_heartbeat_running', False): + QTimer.singleShot(250, tick) + # Kick off + QTimer.singleShot(0, tick) + except Exception: + pass + + def _stop_startup_heartbeat(self): + """Stop the startup heartbeat spinner""" + try: + self._startup_heartbeat_running = False + # Clear the spinner text immediately + if hasattr(self, 'progress_label') and self.progress_label: + self.progress_label.setText("Initializing...") + self.progress_label.setStyleSheet("color: white;") + except Exception: + pass + + def _process_updates(self): + """Process queued GUI updates""" + try: + while True: + update = self.update_queue.get_nowait() + + if update[0] == 'log': + _, message, level = update + try: + # PySide6 QTextEdit + color_map = { + 'info': 'white', + 'success': 'green', + 'warning': 'orange', + 'error': 'red', + 'debug': 'lightblue' + } + color = color_map.get(level, 'white') + self.log_text.setTextColor(QColor(color)) + self.log_text.append(message) + except Exception: + pass + + elif update[0] == 'progress': + _, current, total, status = update + if total > 0: + percentage = (current / total) * 100 + self.progress_bar.setValue(int(percentage)) + + # Check if this is a stopped status and style accordingly + if "stopped" in status.lower() or "cancelled" in status.lower(): + # Make the status more prominent for stopped translations + self.progress_label.setText(f"⏹️ {status}") + self.progress_label.setStyleSheet("color: orange;") + elif "complete" in status.lower() or "finished" in status.lower(): + # Success status + self.progress_label.setText(f"✅ {status}") + self.progress_label.setStyleSheet("color: green;") + elif "error" in status.lower() or "failed" in status.lower(): + # Error status + self.progress_label.setText(f"❌ {status}") + self.progress_label.setStyleSheet("color: red;") + else: + # Normal status - white for dark mode + self.progress_label.setText(status) + self.progress_label.setStyleSheet("color: white;") + + elif update[0] == 'current_file': + _, filename = update + # Style the current file display based on the status + if "stopped" in filename.lower() or "cancelled" in filename.lower(): + self.current_file_label.setText(f"⏹️ {filename}") + self.current_file_label.setStyleSheet("color: orange;") + elif "complete" in filename.lower() or "finished" in filename.lower(): + self.current_file_label.setText(f"✅ {filename}") + self.current_file_label.setStyleSheet("color: green;") + elif "error" in filename.lower() or "failed" in filename.lower(): + self.current_file_label.setText(f"❌ {filename}") + self.current_file_label.setStyleSheet("color: red;") + else: + self.current_file_label.setText(f"Current: {filename}") + self.current_file_label.setStyleSheet("color: lightgray;") + + elif update[0] == 'ui_state': + _, state = update + if state == 'translation_started': + try: + if hasattr(self, 'start_button') and self.start_button: + self.start_button.setEnabled(False) + if hasattr(self, 'stop_button') and self.stop_button: + self.stop_button.setEnabled(True) + if hasattr(self, 'file_listbox') and self.file_listbox: + self.file_listbox.setEnabled(False) + except Exception: + pass + + elif update[0] == 'call_method': + # Call a method on the main thread + _, method, args = update + try: + method(*args) + except Exception as e: + import traceback + print(f"Error calling method {method}: {e}") + print(traceback.format_exc()) + + except Exception: + # Queue is empty or some other exception + pass + + # Schedule next update with QTimer + QTimer.singleShot(100, self._process_updates) + + def load_local_inpainting_model(self, model_path): + """Load a local inpainting model + + Args: + model_path: Path to the model file + + Returns: + bool: True if successful + """ + try: + # Store the model path + self.local_inpaint_model_path = model_path + + # If using diffusers/torch models, load them here + if model_path.endswith('.safetensors') or model_path.endswith('.ckpt'): + # Initialize your inpainting pipeline + # This depends on your specific inpainting implementation + # Example: + # from diffusers import StableDiffusionInpaintPipeline + # self.inpaint_pipeline = StableDiffusionInpaintPipeline.from_single_file(model_path) + pass + + return True + except Exception as e: + self._log(f"Failed to load inpainting model: {e}", "error") + return False + + def _start_translation(self): + """Start the translation process""" + # Check files BEFORE redirecting stdout to avoid deadlock + if not self.selected_files: + from PySide6.QtWidgets import QMessageBox + QMessageBox.warning(self.dialog, "No Files", "Please select manga images to translate.") + return + + # Immediately disable Start to prevent double-clicks + try: + if hasattr(self, 'start_button') and self.start_button: + self.start_button.setEnabled(False) + except Exception: + pass + + # Immediate minimal feedback using direct log append + try: + if hasattr(self, 'log_text') and self.log_text: + from PySide6.QtGui import QColor + self.log_text.setTextColor(QColor('white')) + self.log_text.append("Starting translation...") + except Exception: + pass + + # Start heartbeat spinner so there's visible activity until logs stream + self._start_startup_heartbeat() + + # Reset all stop flags at the start of new translation + self.is_running = False + if hasattr(self, 'stop_flag'): + self.stop_flag.clear() + self._reset_global_cancellation() + + # Log start directly to GUI + try: + if hasattr(self, 'log_text') and self.log_text: + from PySide6.QtGui import QColor, QTextCursor + from PySide6.QtCore import QTimer + self.log_text.setTextColor(QColor('white')) + self.log_text.append("🚀 Starting new manga translation batch") + + # Scroll to bottom after a short delay to ensure it happens after button processing + def scroll_to_bottom(): + try: + if hasattr(self, 'log_text') and self.log_text: + self.log_text.moveCursor(QTextCursor.End) + self.log_text.ensureCursorVisible() + # Also scroll the parent scroll area if it exists + if hasattr(self, 'scroll_area') and self.scroll_area: + scrollbar = self.scroll_area.verticalScrollBar() + if scrollbar: + scrollbar.setValue(scrollbar.maximum()) + except Exception: + pass + + # Schedule scroll with a small delay + QTimer.singleShot(50, scroll_to_bottom) + QTimer.singleShot(150, scroll_to_bottom) # Second attempt to be sure + except Exception: + pass + + # Force GUI update + try: + from PySide6.QtWidgets import QApplication + QApplication.processEvents() + except Exception: + pass + + # Run the heavy preparation and kickoff in a background thread to avoid GUI freeze + threading.Thread(target=self._start_translation_heavy, name="MangaStartHeavy", daemon=True).start() + return + + def _start_translation_heavy(self): + """Heavy part of start: build configs, init client/translator, and launch worker (runs off-main-thread).""" + try: + # Set thread limits based on parallel processing settings + try: + advanced = self.main_gui.config.get('manga_settings', {}).get('advanced', {}) + parallel_enabled = advanced.get('parallel_processing', False) + + if parallel_enabled: + # Allow multiple threads for parallel processing + num_threads = advanced.get('max_workers', 4) + import os + os.environ['OMP_NUM_THREADS'] = str(num_threads) + os.environ['MKL_NUM_THREADS'] = str(num_threads) + os.environ['OPENBLAS_NUM_THREADS'] = str(num_threads) + os.environ['NUMEXPR_NUM_THREADS'] = str(num_threads) + os.environ['VECLIB_MAXIMUM_THREADS'] = str(num_threads) + os.environ['ONNXRUNTIME_NUM_THREADS'] = str(num_threads) + try: + import torch + torch.set_num_threads(num_threads) + except ImportError: + pass + try: + import cv2 + cv2.setNumThreads(num_threads) + except (ImportError, AttributeError): + pass + self._log(f"⚡ Thread limit: {num_threads} threads (parallel processing enabled)", "debug") + else: + # HARDCODED: Limit to exactly 1 thread for sequential processing + import os + os.environ['OMP_NUM_THREADS'] = '1' + os.environ['MKL_NUM_THREADS'] = '1' + os.environ['OPENBLAS_NUM_THREADS'] = '1' + os.environ['NUMEXPR_NUM_THREADS'] = '1' + os.environ['VECLIB_MAXIMUM_THREADS'] = '1' + os.environ['ONNXRUNTIME_NUM_THREADS'] = '1' + try: + import torch + torch.set_num_threads(1) # Hardcoded to 1 + except ImportError: + pass + try: + import cv2 + cv2.setNumThreads(1) # Limit OpenCV to 1 thread + except (ImportError, AttributeError): + pass + self._log("⚡ Thread limit: 1 thread (sequential processing)", "debug") + except Exception as e: + self._log(f"⚠️ Warning: Could not set thread limits: {e}", "warning") + + # Early feedback + self._log("⏳ Preparing configuration...", "info") + # Build OCR configuration + ocr_config = {'provider': self.ocr_provider_value} + + if ocr_config['provider'] == 'Qwen2-VL': + qwen_provider = self.ocr_manager.get_provider('Qwen2-VL') + if qwen_provider: + # Set model size configuration + if hasattr(qwen_provider, 'loaded_model_size'): + if qwen_provider.loaded_model_size == "Custom": + ocr_config['model_size'] = f"custom:{qwen_provider.model_id}" + else: + size_map = {'2B': '1', '7B': '2', '72B': '3'} + ocr_config['model_size'] = size_map.get(qwen_provider.loaded_model_size, '2') + self._log(f"Setting ocr_config['model_size'] = {ocr_config['model_size']}", "info") + + # Set OCR prompt if available + if hasattr(self, 'ocr_prompt'): + # Set it via environment variable (Qwen2VL will read this) + os.environ['OCR_SYSTEM_PROMPT'] = self.ocr_prompt + + # Also set it directly on the provider if it has the method + if hasattr(qwen_provider, 'set_ocr_prompt'): + qwen_provider.set_ocr_prompt(self.ocr_prompt) + else: + # If no setter method, set it directly + qwen_provider.ocr_prompt = self.ocr_prompt + + self._log("✅ Set custom OCR prompt for Qwen2-VL", "info") + + elif ocr_config['provider'] == 'google': + import os + google_creds = self.main_gui.config.get('google_vision_credentials', '') or self.main_gui.config.get('google_cloud_credentials', '') + if not google_creds or not os.path.exists(google_creds): + self._log("❌ Google Cloud Vision credentials not found. Please set up credentials in the main settings.", "error") + self._stop_startup_heartbeat() + self._reset_ui_state() + return + ocr_config['google_credentials_path'] = google_creds + + elif ocr_config['provider'] == 'azure': + # Support both PySide6 QLineEdit (.text()) and Tkinter Entry (.get()) + if hasattr(self.azure_key_entry, 'text'): + azure_key = self.azure_key_entry.text().strip() + elif hasattr(self.azure_key_entry, 'get'): + azure_key = self.azure_key_entry.get().strip() + else: + azure_key = '' + if hasattr(self.azure_endpoint_entry, 'text'): + azure_endpoint = self.azure_endpoint_entry.text().strip() + elif hasattr(self.azure_endpoint_entry, 'get'): + azure_endpoint = self.azure_endpoint_entry.get().strip() + else: + azure_endpoint = '' + + if not azure_key or not azure_endpoint: + self._log("❌ Azure credentials not configured.", "error") + self._stop_startup_heartbeat() + self._reset_ui_state() + return + + # Save Azure settings + self.main_gui.config['azure_vision_key'] = azure_key + self.main_gui.config['azure_vision_endpoint'] = azure_endpoint + if hasattr(self.main_gui, 'save_config'): + self.main_gui.save_config(show_message=False) + + ocr_config['azure_key'] = azure_key + ocr_config['azure_endpoint'] = azure_endpoint + + # Get current API key and model for translation + api_key = None + model = 'gemini-2.5-flash' # default + + # Try to get API key from various sources (support PySide6 and Tkinter widgets) + if hasattr(self.main_gui, 'api_key_entry'): + try: + if hasattr(self.main_gui.api_key_entry, 'text'): + api_key_candidate = self.main_gui.api_key_entry.text() + elif hasattr(self.main_gui.api_key_entry, 'get'): + api_key_candidate = self.main_gui.api_key_entry.get() + else: + api_key_candidate = '' + if api_key_candidate and api_key_candidate.strip(): + api_key = api_key_candidate.strip() + except Exception: + pass + if not api_key and hasattr(self.main_gui, 'config') and self.main_gui.config.get('api_key'): + api_key = self.main_gui.config.get('api_key') + + # Try to get model - ALWAYS get the current selection from GUI + if hasattr(self.main_gui, 'model_var'): + model = self.main_gui.model_var.get() + elif hasattr(self.main_gui, 'config') and self.main_gui.config.get('model'): + model = self.main_gui.config.get('model') + + if not api_key: + self._log("❌ API key not found. Please configure your API key in the main settings.", "error") + self._stop_startup_heartbeat() + self._reset_ui_state() + return + + # Check if we need to create or update the client + needs_new_client = False + self._log("🔎 Checking API client...", "debug") + + if not hasattr(self.main_gui, 'client') or not self.main_gui.client: + needs_new_client = True + self._log(f"🛠 Creating new API client with model: {model}", "info") + elif hasattr(self.main_gui.client, 'model') and self.main_gui.client.model != model: + needs_new_client = True + self._log(f"🛠 Model changed from {self.main_gui.client.model} to {model}, creating new client", "info") + else: + self._log("♻️ Reusing existing API client", "debug") + + if needs_new_client: + # Apply multi-key settings from config so UnifiedClient picks them up + try: + import os # Import os here + use_mk = bool(self.main_gui.config.get('use_multi_api_keys', False)) + mk_list = self.main_gui.config.get('multi_api_keys', []) + if use_mk and mk_list: + os.environ['USE_MULTI_API_KEYS'] = '1' + os.environ['USE_MULTI_KEYS'] = '1' # backward-compat for retry paths + os.environ['MULTI_API_KEYS'] = json.dumps(mk_list) + os.environ['FORCE_KEY_ROTATION'] = '1' if self.main_gui.config.get('force_key_rotation', True) else '0' + os.environ['ROTATION_FREQUENCY'] = str(self.main_gui.config.get('rotation_frequency', 1)) + self._log("🔑 Multi-key mode ENABLED for manga translator", "info") + else: + # Explicitly disable if not configured + os.environ['USE_MULTI_API_KEYS'] = '0' + os.environ['USE_MULTI_KEYS'] = '0' + # Fallback keys (optional) + if self.main_gui.config.get('use_fallback_keys', False): + os.environ['USE_FALLBACK_KEYS'] = '1' + os.environ['FALLBACK_KEYS'] = json.dumps(self.main_gui.config.get('fallback_keys', [])) + else: + os.environ['USE_FALLBACK_KEYS'] = '0' + os.environ['FALLBACK_KEYS'] = '[]' + except Exception as env_err: + self._log(f"⚠️ Failed to apply multi-key settings: {env_err}", "warning") + + # Create the unified client with the current model + try: + from unified_api_client import UnifiedClient + self._log("⏳ Creating API client (network/model handshake)...", "debug") + self.main_gui.client = UnifiedClient(model=model, api_key=api_key) + self._log(f"✅ API client ready (model: {model})", "info") + try: + time.sleep(0.05) + except Exception: + pass + except Exception as e: + self._log(f"❌ Failed to create API client: {str(e)}", "error") + import traceback + self._log(traceback.format_exc(), "debug") + self._stop_startup_heartbeat() + self._reset_ui_state() + return + + # Reset the translator's history manager for new batch + if hasattr(self, 'translator') and self.translator and hasattr(self.translator, 'reset_history_manager'): + self.translator.reset_history_manager() + + # Set environment variables for custom-api provider + if ocr_config['provider'] == 'custom-api': + import os # Import os for environment variables + env_vars = self.main_gui._get_environment_variables( + epub_path='', # Not needed for manga + api_key=api_key + ) + + # Apply all environment variables EXCEPT SYSTEM_PROMPT + for key, value in env_vars.items(): + if key == 'SYSTEM_PROMPT': + # DON'T SET THE TRANSLATION SYSTEM PROMPT FOR OCR + continue + os.environ[key] = str(value) + + # Set a VERY EXPLICIT OCR prompt that OpenAI can't ignore + os.environ['OCR_SYSTEM_PROMPT'] = ( + "YOU ARE AN OCR SYSTEM. YOUR ONLY JOB IS TEXT EXTRACTION.\n\n" + "CRITICAL RULES:\n" + "1. DO NOT TRANSLATE ANYTHING\n" + "2. DO NOT MODIFY THE TEXT\n" + "3. DO NOT EXPLAIN OR COMMENT\n" + "4. ONLY OUTPUT THE EXACT TEXT YOU SEE\n" + "5. PRESERVE NATURAL TEXT FLOW - DO NOT ADD UNNECESSARY LINE BREAKS\n\n" + "If you see Korean text, output it in Korean.\n" + "If you see Japanese text, output it in Japanese.\n" + "If you see Chinese text, output it in Chinese.\n" + "If you see English text, output it in English.\n\n" + "IMPORTANT: Only use line breaks where they naturally occur in the original text " + "(e.g., between dialogue lines or paragraphs). Do not break text mid-sentence or " + "between every word/character.\n\n" + "For vertical text common in manga/comics, transcribe it as a continuous line unless " + "there are clear visual breaks.\n\n" + "NEVER translate. ONLY extract exactly what is written.\n" + "Output ONLY the raw text, nothing else." + ) + + self._log("✅ Set environment variables for custom-api OCR (excluded SYSTEM_PROMPT)") + + # Respect user settings: only set default detector values when bubble detection is OFF. + try: + ms = self.main_gui.config.setdefault('manga_settings', {}) + ocr_set = ms.setdefault('ocr', {}) + changed = False + bubble_enabled = bool(ocr_set.get('bubble_detection_enabled', False)) + + if not bubble_enabled: + # User has bubble detection OFF -> set non-intrusive defaults only + if 'detector_type' not in ocr_set: + ocr_set['detector_type'] = 'rtdetr_onnx' + changed = True + if not ocr_set.get('rtdetr_model_url') and not ocr_set.get('bubble_model_path'): + # Default HF repo (detector.onnx lives here) + ocr_set['rtdetr_model_url'] = 'ogkalu/comic-text-and-bubble-detector' + changed = True + if changed and hasattr(self.main_gui, 'save_config'): + self.main_gui.save_config(show_message=False) + # Do not preload bubble detector for custom-api here; it will load on use or via panel preloading + self._preloaded_bd = None + except Exception: + self._preloaded_bd = None + except Exception as e: + # Surface any startup error and reset UI so the app doesn't look stuck + try: + import traceback + self._log(f"❌ Startup error: {e}", "error") + self._log(traceback.format_exc(), "debug") + except Exception: + pass + self._stop_startup_heartbeat() + self._reset_ui_state() + return + + # Initialize translator if needed (or if it was reset or client was cleared during shutdown) + needs_new_translator = (not hasattr(self, 'translator')) or (self.translator is None) + if not needs_new_translator: + try: + needs_new_translator = getattr(self.translator, 'client', None) is None + if needs_new_translator: + self._log("♻️ Translator exists but client was cleared — reinitializing translator", "debug") + except Exception: + needs_new_translator = True + if needs_new_translator: + self._log("⚙️ Initializing translator...", "info") + + # CRITICAL: Set batch environment variables BEFORE creating translator + # This ensures MangaTranslator picks up the batch settings on initialization + try: + # Get batch translation setting from main GUI + batch_translation_enabled = False + batch_size_value = 1 + + if hasattr(self.main_gui, 'batch_translation_var'): + # Check if batch translation is enabled in GUI + try: + if hasattr(self.main_gui.batch_translation_var, 'get'): + batch_translation_enabled = bool(self.main_gui.batch_translation_var.get()) + else: + batch_translation_enabled = bool(self.main_gui.batch_translation_var) + except Exception: + pass + + if hasattr(self.main_gui, 'batch_size_var'): + # Get batch size from GUI + try: + if hasattr(self.main_gui.batch_size_var, 'get'): + batch_size_value = int(self.main_gui.batch_size_var.get()) + else: + batch_size_value = int(self.main_gui.batch_size_var) + except Exception: + batch_size_value = 1 + + # Set environment variables for the translator to pick up + if batch_translation_enabled: + os.environ['BATCH_TRANSLATION'] = '1' + os.environ['BATCH_SIZE'] = str(max(1, batch_size_value)) + self._log(f"📦 Batch Translation ENABLED: {batch_size_value} concurrent API calls", "info") + else: + os.environ['BATCH_TRANSLATION'] = '0' + os.environ['BATCH_SIZE'] = '1' + self._log("📦 Batch Translation DISABLED: Sequential API calls", "info") + except Exception as e: + self._log(f"⚠️ Warning: Could not set batch settings: {e}", "warning") + os.environ['BATCH_TRANSLATION'] = '0' + os.environ['BATCH_SIZE'] = '1' + + try: + self.translator = MangaTranslator( + ocr_config, + self.main_gui.client, + self.main_gui, + log_callback=self._log + ) + + # Fix 4: Safely set OCR manager + if hasattr(self, 'ocr_manager'): + self.translator.ocr_manager = self.ocr_manager + else: + from ocr_manager import OCRManager + self.ocr_manager = OCRManager(log_callback=self._log) + self.translator.ocr_manager = self.ocr_manager + + # Attach preloaded RT-DETR if available + try: + if hasattr(self, '_preloaded_bd') and self._preloaded_bd: + self.translator.bubble_detector = self._preloaded_bd + self._log("🤖 RT-DETR preloaded and attached to translator", "debug") + except Exception: + pass + + # Distribute stop flags to all components + self._distribute_stop_flags() + + # Provide Replicate API key to translator if present, but DO NOT force-enable cloud mode here. + # Actual inpainting mode is chosen by the UI and applied in _apply_rendering_settings. + saved_api_key = self.main_gui.config.get('replicate_api_key', '') + if saved_api_key: + self.translator.replicate_api_key = saved_api_key + + # Apply text rendering settings (this sets skip/cloud/local based on UI) + self._apply_rendering_settings() + + try: + time.sleep(0.05) + except Exception: + pass + self._log("✅ Translator ready", "info") + + except Exception as e: + self._log(f"❌ Failed to initialize translator: {str(e)}", "error") + import traceback + self._log(traceback.format_exc(), "error") + self._stop_startup_heartbeat() + self._reset_ui_state() + return + else: + # Update batch settings for existing translator + try: + batch_translation_enabled = False + batch_size_value = 1 + + if hasattr(self.main_gui, 'batch_translation_var'): + try: + if hasattr(self.main_gui.batch_translation_var, 'get'): + batch_translation_enabled = bool(self.main_gui.batch_translation_var.get()) + else: + batch_translation_enabled = bool(self.main_gui.batch_translation_var) + except Exception: + pass + + if hasattr(self.main_gui, 'batch_size_var'): + try: + if hasattr(self.main_gui.batch_size_var, 'get'): + batch_size_value = int(self.main_gui.batch_size_var.get()) + else: + batch_size_value = int(self.main_gui.batch_size_var) + except Exception: + batch_size_value = 1 + + # Update environment variables and translator attributes + if batch_translation_enabled: + os.environ['BATCH_TRANSLATION'] = '1' + os.environ['BATCH_SIZE'] = str(max(1, batch_size_value)) + self.translator.batch_mode = True + self.translator.batch_size = max(1, batch_size_value) + self._log(f"📦 Batch Translation UPDATED: {batch_size_value} concurrent API calls", "info") + else: + os.environ['BATCH_TRANSLATION'] = '0' + os.environ['BATCH_SIZE'] = '1' + self.translator.batch_mode = False + self.translator.batch_size = 1 + self._log("📦 Batch Translation UPDATED: Sequential API calls", "info") + except Exception as e: + self._log(f"⚠️ Warning: Could not update batch settings: {e}", "warning") + + # Update the translator with the new client if model changed + if needs_new_client and hasattr(self.translator, 'client'): + self.translator.client = self.main_gui.client + self._log(f"Updated translator with new API client", "info") + + # Distribute stop flags to all components + self._distribute_stop_flags() + + # Update rendering settings + self._apply_rendering_settings() + + # Ensure inpainting settings are properly synchronized + if hasattr(self, 'inpainting_mode_var'): + inpainting_mode = self.inpainting_mode_var.get() + + if inpainting_mode == 'skip': + self.translator.skip_inpainting = True + self.translator.use_cloud_inpainting = False + self._log("Inpainting: SKIP", "debug") + + elif inpainting_mode == 'local': + self.translator.skip_inpainting = False + self.translator.use_cloud_inpainting = False + + # IMPORTANT: Load the local inpainting model if not already loaded + if hasattr(self, 'local_model_var'): + selected_model = self.local_model_var.get() + if selected_model and selected_model != "None": + # Get model path from available models + model_info = self.available_local_models.get(selected_model) + if model_info: + model_path = model_info['path'] + # Load the model into translator + if hasattr(self.translator, 'load_local_inpainting_model'): + success = self.translator.load_local_inpainting_model(model_path) + if success: + self._log(f"Inpainting: LOCAL - Loaded {selected_model}", "info") + else: + self._log(f"Inpainting: Failed to load local model {selected_model}", "error") + else: + # Set the model path directly if no load method + self.translator.local_inpaint_model_path = model_path + self._log(f"Inpainting: LOCAL - Set model path for {selected_model}", "info") + else: + self._log("Inpainting: LOCAL - No model selected", "warning") + else: + self._log("Inpainting: LOCAL - No model configured", "warning") + else: + self._log("Inpainting: LOCAL (default)", "debug") + + elif inpainting_mode == 'cloud': + self.translator.skip_inpainting = False + saved_api_key = self.main_gui.config.get('replicate_api_key', '') + if saved_api_key: + self.translator.use_cloud_inpainting = True + self.translator.replicate_api_key = saved_api_key + self._log("Inpainting: CLOUD (Replicate)", "debug") + else: + # Fallback to local if no API key + self.translator.use_cloud_inpainting = False + self._log("Inpainting: LOCAL (no Replicate key, fallback)", "warning") + else: + # Default to local inpainting if variable doesn't exist + self.translator.skip_inpainting = False + self.translator.use_cloud_inpainting = False + self._log("Inpainting: LOCAL (default)", "debug") + + # Double-check the settings are applied correctly + self._log(f"Inpainting final status:", "debug") + self._log(f" - Skip: {self.translator.skip_inpainting}", "debug") + self._log(f" - Cloud: {self.translator.use_cloud_inpainting}", "debug") + self._log(f" - Mode: {'SKIP' if self.translator.skip_inpainting else 'CLOUD' if self.translator.use_cloud_inpainting else 'LOCAL'}", "debug") + + # Preflight RT-DETR to avoid first-page fallback after aggressive cleanup + try: + ocr_set = self.main_gui.config.get('manga_settings', {}).get('ocr', {}) or {} + if ocr_set.get('bubble_detection_enabled', False): + # Ensure a default RT-DETR model id exists when required + if ocr_set.get('detector_type', 'rtdetr') in ('rtdetr', 'auto'): + if not ocr_set.get('rtdetr_model_url') and not ocr_set.get('bubble_model_path'): + ocr_set['rtdetr_model_url'] = 'ogkalu/comic-text-and-bubble-detector' + if hasattr(self.main_gui, 'save_config'): + self.main_gui.save_config(show_message=False) + self._preflight_bubble_detector(ocr_set) + except Exception: + pass + + # Reset progress + self.total_files = len(self.selected_files) + self.completed_files = 0 + self.failed_files = 0 + self.current_file_index = 0 + + # Reset all global cancellation flags for new translation + self._reset_global_cancellation() + + # Update UI state (PySide6) - queue UI updates for main thread + self.is_running = True + self.stop_flag.clear() + # Queue UI updates to be processed by main thread + self.update_queue.put(('ui_state', 'translation_started')) + + # Log start message + self._log(f"Starting translation of {self.total_files} files...", "info") + self._log(f"Using OCR provider: {ocr_config['provider'].upper()}", "info") + if ocr_config['provider'] == 'google': + self._log(f"Using Google Vision credentials: {os.path.basename(ocr_config['google_credentials_path'])}", "info") + elif ocr_config['provider'] == 'azure': + self._log(f"Using Azure endpoint: {ocr_config['azure_endpoint']}", "info") + else: + self._log(f"Using local OCR provider: {ocr_config['provider'].upper()}", "info") + # Report effective API routing/model with multi-key awareness + try: + c = getattr(self.main_gui, 'client', None) + if c is not None: + if getattr(c, 'use_multi_keys', False): + total_keys = 0 + try: + stats = c.get_stats() + total_keys = stats.get('total_keys', 0) + except Exception: + pass + self._log( + f"API routing: Multi-key pool enabled — starting model '{getattr(c, 'model', 'unknown')}', keys={total_keys}, rotation={getattr(c, '_rotation_frequency', 1)}", + "info" + ) + else: + self._log(f"API model: {getattr(c, 'model', 'unknown')}", "info") + except Exception: + pass + self._log(f"Contextual: {'Enabled' if self.main_gui.contextual_var.get() else 'Disabled'}", "info") + self._log(f"History limit: {self.main_gui.trans_history.get()} exchanges", "info") + self._log(f"Rolling history: {'Enabled' if self.main_gui.translation_history_rolling_var.get() else 'Disabled'}", "info") + self._log(f" Full Page Context: {'Enabled' if self.full_page_context_value else 'Disabled'}", "info") + + # Stop heartbeat before launching worker; now regular progress takes over + self._stop_startup_heartbeat() + + # Update progress to show we're starting the translation worker + self._log("🚀 Launching translation worker...", "info") + self._update_progress(0, self.total_files, "Starting translation...") + + # Start translation via executor + try: + # Sync with main GUI executor if possible and update EXTRACTION_WORKERS + if hasattr(self.main_gui, '_ensure_executor'): + self.main_gui._ensure_executor() + self.executor = self.main_gui.executor + # Ensure env var reflects current worker setting from main GUI + try: + os.environ["EXTRACTION_WORKERS"] = str(self.main_gui.extraction_workers_var.get()) + except Exception: + pass + + if self.executor: + self.translation_future = self.executor.submit(self._translation_worker) + else: + # Fallback to dedicated thread + self.translation_thread = threading.Thread( + target=self._translation_worker, + daemon=True + ) + self.translation_thread.start() + except Exception: + # Last resort fallback to thread + self.translation_thread = threading.Thread( + target=self._translation_worker, + daemon=True + ) + self.translation_thread.start() + + def _apply_rendering_settings(self): + """Apply current rendering settings to translator (PySide6 version)""" + if not self.translator: + return + + # Read all values from PySide6 widgets to ensure they're current + # Background opacity slider + if hasattr(self, 'opacity_slider'): + self.bg_opacity_value = self.opacity_slider.value() + + # Background reduction slider + if hasattr(self, 'reduction_slider'): + self.bg_reduction_value = self.reduction_slider.value() + + # Background style (radio buttons) + if hasattr(self, 'bg_style_group'): + checked_id = self.bg_style_group.checkedId() + if checked_id == 0: + self.bg_style_value = "box" + elif checked_id == 1: + self.bg_style_value = "circle" + elif checked_id == 2: + self.bg_style_value = "wrap" + + # Font selection + if hasattr(self, 'font_combo'): + selected = self.font_combo.currentText() + if selected == "Default": + self.selected_font_path = None + elif selected in self.font_mapping: + self.selected_font_path = self.font_mapping[selected] + + # Text color (stored in value variables updated by color picker) + text_color = ( + self.text_color_r_value, + self.text_color_g_value, + self.text_color_b_value + ) + + # Shadow enabled checkbox + if hasattr(self, 'shadow_enabled_checkbox'): + self.shadow_enabled_value = self.shadow_enabled_checkbox.isChecked() + + # Shadow color (stored in value variables updated by color picker) + shadow_color = ( + self.shadow_color_r_value, + self.shadow_color_g_value, + self.shadow_color_b_value + ) + + # Shadow offset spinboxes + if hasattr(self, 'shadow_offset_x_spinbox'): + self.shadow_offset_x_value = self.shadow_offset_x_spinbox.value() + if hasattr(self, 'shadow_offset_y_spinbox'): + self.shadow_offset_y_value = self.shadow_offset_y_spinbox.value() + + # Shadow blur spinbox + if hasattr(self, 'shadow_blur_spinbox'): + self.shadow_blur_value = self.shadow_blur_spinbox.value() + + # Force caps lock checkbox + if hasattr(self, 'force_caps_checkbox'): + self.force_caps_lock_value = self.force_caps_checkbox.isChecked() + + # Strict text wrapping checkbox + if hasattr(self, 'strict_wrap_checkbox'): + self.strict_text_wrapping_value = self.strict_wrap_checkbox.isChecked() + + # Font sizing controls + if hasattr(self, 'min_size_spinbox'): + self.auto_min_size_value = self.min_size_spinbox.value() + if hasattr(self, 'max_size_spinbox'): + self.max_font_size_value = self.max_size_spinbox.value() + if hasattr(self, 'multiplier_slider'): + self.font_size_multiplier_value = self.multiplier_slider.value() + + # Determine font size value based on mode + if self.font_size_mode_value == 'multiplier': + # Pass negative value to indicate multiplier mode + font_size = -self.font_size_multiplier_value + else: + # Fixed mode - use the font size value directly + font_size = self.font_size_value if self.font_size_value > 0 else None + + # Apply concise logging toggle from Advanced settings + try: + adv_cfg = self.main_gui.config.get('manga_settings', {}).get('advanced', {}) + self.translator.concise_logs = bool(adv_cfg.get('concise_logs', False)) + except Exception: + pass + + # Push rendering settings to translator + self.translator.update_text_rendering_settings( + bg_opacity=self.bg_opacity_value, + bg_style=self.bg_style_value, + bg_reduction=self.bg_reduction_value, + font_style=self.selected_font_path, + font_size=font_size, + text_color=text_color, + shadow_enabled=self.shadow_enabled_value, + shadow_color=shadow_color, + shadow_offset_x=self.shadow_offset_x_value, + shadow_offset_y=self.shadow_offset_y_value, + shadow_blur=self.shadow_blur_value, + force_caps_lock=self.force_caps_lock_value + ) + + # Free-text-only background opacity toggle -> read from checkbox (PySide6) + try: + if hasattr(self, 'ft_only_checkbox'): + ft_only_enabled = self.ft_only_checkbox.isChecked() + self.translator.free_text_only_bg_opacity = bool(ft_only_enabled) + # Also update the value variable + self.free_text_only_bg_opacity_value = ft_only_enabled + except Exception: + pass + + # Update font mode and multiplier explicitly + self.translator.font_size_mode = self.font_size_mode_value + self.translator.font_size_multiplier = self.font_size_multiplier_value + self.translator.min_readable_size = self.auto_min_size_value + self.translator.max_font_size_limit = self.max_font_size_value + self.translator.strict_text_wrapping = self.strict_text_wrapping_value + self.translator.force_caps_lock = self.force_caps_lock_value + + # Update constrain to bubble setting + if hasattr(self, 'constrain_to_bubble_value'): + self.translator.constrain_to_bubble = self.constrain_to_bubble_value + + # Handle inpainting mode (radio: skip/local/cloud/hybrid) + mode = None + if hasattr(self, 'inpainting_mode_var'): + mode = self.inpainting_mode_var.get() + else: + mode = 'local' + + # Persist selected mode on translator + self.translator.inpaint_mode = mode + + if mode == 'skip': + self.translator.skip_inpainting = True + self.translator.use_cloud_inpainting = False + self._log(" Inpainting: Skipped", "info") + elif mode == 'cloud': + self.translator.skip_inpainting = False + saved_api_key = self.main_gui.config.get('replicate_api_key', '') + if saved_api_key: + self.translator.use_cloud_inpainting = True + self.translator.replicate_api_key = saved_api_key + self._log(" Inpainting: Cloud (Replicate)", "info") + else: + self.translator.use_cloud_inpainting = False + self._log(" Inpainting: Local (no Replicate key, fallback)", "warning") + elif mode == 'hybrid': + self.translator.skip_inpainting = False + self.translator.use_cloud_inpainting = False + self._log(" Inpainting: Hybrid", "info") + else: + # Local (default) + self.translator.skip_inpainting = False + self.translator.use_cloud_inpainting = False + self._log(" Inpainting: Local", "info") + + # Persist free-text-only BG opacity setting to config (handled in _save_rendering_settings) + # Value is now read directly from checkbox in PySide6 + + # Log the applied rendering and inpainting settings + self._log(f"Applied rendering settings:", "info") + self._log(f" Background: {self.bg_style_value} @ {int(self.bg_opacity_value/255*100)}% opacity", "info") + import os + self._log(f" Font: {os.path.basename(self.selected_font_path) if self.selected_font_path else 'Default'}", "info") + self._log(f" Minimum Font Size: {self.auto_min_size_value}pt", "info") + self._log(f" Maximum Font Size: {self.max_font_size_value}pt", "info") + self._log(f" Strict Text Wrapping: {'Enabled (force fit)' if self.strict_text_wrapping_value else 'Disabled (allow overflow)'}", "info") + if self.font_size_mode_value == 'multiplier': + self._log(f" Font Size: Dynamic multiplier ({self.font_size_multiplier_value:.1f}x)", "info") + if hasattr(self, 'constrain_to_bubble_value'): + constraint_status = "constrained" if self.constrain_to_bubble_value else "unconstrained" + self._log(f" Text Constraint: {constraint_status}", "info") + else: + size_text = f"{self.font_size_value}pt" if self.font_size_value > 0 else "Auto" + self._log(f" Font Size: Fixed ({size_text})", "info") + self._log(f" Text Color: RGB({text_color[0]}, {text_color[1]}, {text_color[2]})", "info") + self._log(f" Shadow: {'Enabled' if self.shadow_enabled_value else 'Disabled'}", "info") + try: + self._log(f" Free-text-only BG opacity: {'Enabled' if getattr(self, 'free_text_only_bg_opacity_value', False) else 'Disabled'}", "info") + except Exception: + pass + self._log(f" Full Page Context: {'Enabled' if self.full_page_context_value else 'Disabled'}", "info") + + def _translation_worker(self): + """Worker thread for translation""" + try: + # Defensive: ensure translator exists before using it (legacy callers may start this worker early) + if not hasattr(self, 'translator') or self.translator is None: + self._log("⚠️ Translator not initialized yet; skipping worker start", "warning") + return + if hasattr(self.translator, 'set_stop_flag'): + self.translator.set_stop_flag(self.stop_flag) + + # Ensure API parallelism (batch API calls) is controlled independently of local parallel processing. + # Propagate the GUI "Batch Translation" toggle into environment so Unified API Client applies it globally + # for all providers (including custom endpoints). + try: + import os as _os + _os.environ['BATCH_TRANSLATION'] = '1' if getattr(self.main_gui, 'batch_translation_var', None) and self.main_gui.batch_translation_var.get() else '0' + # Use GUI batch size if available; default to 3 to match existing default + bs_val = None + try: + bs_val = str(int(self.main_gui.batch_size_var.get())) if hasattr(self.main_gui, 'batch_size_var') else None + except Exception: + bs_val = None + _os.environ['BATCH_SIZE'] = bs_val or _os.environ.get('BATCH_SIZE', '3') + except Exception: + # Non-fatal if env cannot be set + pass + + # Panel-level parallelization setting (LOCAL threading for panels) + advanced = self.main_gui.config.get('manga_settings', {}).get('advanced', {}) + panel_parallel = bool(advanced.get('parallel_panel_translation', False)) + requested_panel_workers = int(advanced.get('panel_max_workers', 2)) + + # Decouple from global parallel processing: panel concurrency is governed ONLY by panel settings + effective_workers = requested_panel_workers if (panel_parallel and len(self.selected_files) > 1) else 1 + + # Hint translator about preferred BD ownership: use singleton only when not using panel parallelism + try: + if hasattr(self, 'translator') and self.translator: + self.translator.use_singleton_bubble_detector = not (panel_parallel and effective_workers > 1) + except Exception: + pass + + # Model preloading phase + self._log("🔧 Model preloading phase", "info") + # Log current counters (diagnostic) + try: + st = self.translator.get_preload_counters() if hasattr(self.translator, 'get_preload_counters') else None + if st: + self._log(f" Preload counters before: inpaint_spares={st.get('inpaint_spares',0)}, detector_spares={st.get('detector_spares',0)}", "debug") + except Exception: + pass + # 1) Warm up bubble detector instances first (so detection can start immediately) + try: + ocr_set = self.main_gui.config.get('manga_settings', {}).get('ocr', {}) or {} + if ( + effective_workers > 1 + and ocr_set.get('bubble_detection_enabled', True) + and hasattr(self, 'translator') + and self.translator + ): + # For parallel panel translation, prefer thread-local detectors (avoid singleton for concurrency) + try: + self.translator.use_singleton_bubble_detector = False + except Exception: + pass + desired_bd = min(int(effective_workers), max(1, int(len(self.selected_files) or 1))) + self._log(f"🧰 Preloading bubble detector instances for {desired_bd} panel worker(s)...", "info") + try: + import time as _time + t0 = _time.time() + self.translator.preload_bubble_detectors(ocr_set, desired_bd) + dt = _time.time() - t0 + self._log(f"⏱️ Bubble detector preload finished in {dt:.2f}s", "info") + except Exception as _e: + self._log(f"⚠️ Bubble detector preload skipped: {_e}", "warning") + except Exception: + pass + # 2) Preload LOCAL inpainting instances for panel parallelism + inpaint_preload_event = None + try: + inpaint_method = self.main_gui.config.get('manga_inpaint_method', 'cloud') + if ( + effective_workers > 1 + and inpaint_method == 'local' + and hasattr(self, 'translator') + and self.translator + ): + local_method = self.main_gui.config.get('manga_local_inpaint_model', 'anime') + model_path = self.main_gui.config.get(f'manga_{local_method}_model_path', '') + if not model_path: + model_path = self.main_gui.config.get(f'{local_method}_model_path', '') + + # Preload one shared instance plus spares for parallel panel processing + # Constrain to actual number of files (no need for more workers than files) + desired_inp = min(int(effective_workers), max(1, int(len(self.selected_files) or 1))) + self._log(f"🧰 Preloading {desired_inp} local inpainting instance(s) for panel workers...", "info") + try: + import time as _time + t0 = _time.time() + # Use synchronous preload to ensure instances are ready before panel processing starts + self.translator.preload_local_inpainters(local_method, model_path, desired_inp) + dt = _time.time() - t0 + self._log(f"⏱️ Local inpainting preload finished in {dt:.2f}s", "info") + except Exception as _e: + self._log(f"⚠️ Local inpainting preload failed: {_e}", "warning") + import traceback + self._log(traceback.format_exc(), "debug") + except Exception as preload_err: + self._log(f"⚠️ Inpainting preload setup failed: {preload_err}", "warning") + + # Log updated counters (diagnostic) + try: + st2 = self.translator.get_preload_counters() if hasattr(self.translator, 'get_preload_counters') else None + if st2: + self._log(f" Preload counters after: inpaint_spares={st2.get('inpaint_spares',0)}, detector_spares={st2.get('detector_spares',0)}", "debug") + except Exception: + pass + + if panel_parallel and len(self.selected_files) > 1 and effective_workers > 1: + self._log(f"🚀 Parallel PANEL translation ENABLED ({effective_workers} workers)", "info") + + import concurrent.futures + import threading as _threading + progress_lock = _threading.Lock() + # Memory barrier: ensures resources are fully released before next panel starts + completion_barrier = _threading.Semaphore(1) # Only one panel can complete at a time + counters = { + 'started': 0, + 'done': 0, + 'failed': 0 + } + total = self.total_files + + def process_single(idx, filepath): + # Check stop flag at the very beginning + if self.stop_flag.is_set(): + return False + + # Create an isolated translator instance per panel + translator = None # Initialize outside try block for cleanup + try: + # Check again before starting expensive work + if self.stop_flag.is_set(): + return False + from manga_translator import MangaTranslator + import os + # Build full OCR config for this thread (mirror _start_translation) + ocr_config = {'provider': self.ocr_provider_value} + if ocr_config['provider'] == 'google': + google_creds = self.main_gui.config.get('google_vision_credentials', '') or \ + self.main_gui.config.get('google_cloud_credentials', '') + if google_creds and os.path.exists(google_creds): + ocr_config['google_credentials_path'] = google_creds + else: + self._log("⚠️ Google Cloud Vision credentials not found for parallel task", "warning") + elif ocr_config['provider'] == 'azure': + azure_key = self.main_gui.config.get('azure_vision_key', '') + azure_endpoint = self.main_gui.config.get('azure_vision_endpoint', '') + if azure_key and azure_endpoint: + ocr_config['azure_key'] = azure_key + ocr_config['azure_endpoint'] = azure_endpoint + else: + self._log("⚠️ Azure credentials not found for parallel task", "warning") + + translator = MangaTranslator(ocr_config, self.main_gui.client, self.main_gui, log_callback=self._log) + translator.set_stop_flag(self.stop_flag) + + # CRITICAL: Disable singleton bubble detector for parallel panel processing + # Each panel should use pool-based detectors for true parallelism + try: + translator.use_singleton_bubble_detector = False + self._log(f" 🤖 Panel translator: bubble detector pool mode enabled", "debug") + except Exception: + pass + + # Ensure parallel processing settings are properly applied to each panel translator + # The web UI maps parallel_panel_translation to parallel_processing for MangaTranslator compatibility + try: + advanced = self.main_gui.config.get('manga_settings', {}).get('advanced', {}) + if advanced.get('parallel_panel_translation', False): + # Override the manga_settings in this translator instance to enable parallel processing + # for bubble regions within each panel + translator.manga_settings.setdefault('advanced', {})['parallel_processing'] = True + panel_workers = int(advanced.get('panel_max_workers', 2)) + translator.manga_settings.setdefault('advanced', {})['max_workers'] = panel_workers + # Also set the instance attributes directly + translator.parallel_processing = True + translator.max_workers = panel_workers + self._log(f" 📋 Panel translator configured: parallel_processing={translator.parallel_processing}, max_workers={translator.max_workers}", "debug") + else: + self._log(f" 📋 Panel translator: parallel_panel_translation=False, using sequential bubble processing", "debug") + except Exception as e: + self._log(f" ⚠️ Warning: Failed to configure parallel processing for panel translator: {e}", "warning") + + # Also propagate global cancellation to isolated translator + from manga_translator import MangaTranslator as MTClass + if MTClass.is_globally_cancelled(): + return False + + # Check stop flag before configuration + if self.stop_flag.is_set(): + return False + + # Apply inpainting and rendering options roughly matching current translator + try: + translator.constrain_to_bubble = getattr(self, 'constrain_to_bubble_var').get() if hasattr(self, 'constrain_to_bubble_var') else True + except Exception: + pass + + # Set full page context based on UI + try: + translator.set_full_page_context( + enabled=self.full_page_context_var.get(), + custom_prompt=self.full_page_context_prompt + ) + except Exception: + pass + + # Another check before path setup + if self.stop_flag.is_set(): + return False + + # Determine output path (route CBZ images to job out_dir) + filename = os.path.basename(filepath) + output_path = None + try: + if hasattr(self, 'cbz_image_to_job') and filepath in self.cbz_image_to_job: + cbz_file = self.cbz_image_to_job[filepath] + job = getattr(self, 'cbz_jobs', {}).get(cbz_file) + if job: + output_dir = job.get('out_dir') + os.makedirs(output_dir, exist_ok=True) + output_path = os.path.join(output_dir, filename) + except Exception: + output_path = None + if not output_path: + if self.create_subfolder_value: + output_dir = os.path.join(os.path.dirname(filepath), 'translated') + os.makedirs(output_dir, exist_ok=True) + output_path = os.path.join(output_dir, filename) + else: + base, ext = os.path.splitext(filepath) + output_path = f"{base}_translated{ext}" + + # Announce start + self._update_current_file(filename) + with progress_lock: + counters['started'] += 1 + self._update_progress(counters['done'], total, f"Processing {counters['started']}/{total}: {filename}") + + # Final check before expensive processing + if self.stop_flag.is_set(): + return False + + # Process image + result = translator.process_image(filepath, output_path, batch_index=idx+1, batch_total=total) + + # CRITICAL: Explicitly cleanup this panel's translator resources + # This prevents resource leaks and partial translation issues + try: + if translator: + # Return checked-out inpainter to pool for reuse + if hasattr(translator, '_return_inpainter_to_pool'): + translator._return_inpainter_to_pool() + # Return bubble detector to pool for reuse + if hasattr(translator, '_return_bubble_detector_to_pool'): + translator._return_bubble_detector_to_pool() + # Clear all caches and state + if hasattr(translator, 'reset_for_new_image'): + translator.reset_for_new_image() + # Clear internal state + if hasattr(translator, 'clear_internal_state'): + translator.clear_internal_state() + except Exception as cleanup_err: + self._log(f"⚠️ Panel translator cleanup warning: {cleanup_err}", "debug") + + # CRITICAL: Use completion barrier to prevent resource conflicts + # This ensures only one panel completes/cleans up at a time + with completion_barrier: + # Update counters only if not stopped + with progress_lock: + if self.stop_flag.is_set(): + # Don't update counters if translation was stopped + return False + + # Check if translation actually produced valid output + translation_successful = False + if result.get('success', False) and not result.get('interrupted', False): + # Verify there's an actual output file and translated regions + output_exists = result.get('output_path') and os.path.exists(result.get('output_path', '')) + regions = result.get('regions', []) + has_translations = any(r.get('translated_text', '') for r in regions) + + # CRITICAL: Verify all detected regions got translated + # Partial failures indicate inpainting or rendering issues + if has_translations and regions: + translated_count = sum(1 for r in regions if r.get('translated_text', '').strip()) + detected_count = len(regions) + completion_rate = translated_count / detected_count if detected_count > 0 else 0 + + # Log warning if completion rate is less than 100% + if completion_rate < 1.0: + self._log(f"⚠️ Partial translation: {translated_count}/{detected_count} regions translated ({completion_rate*100:.1f}%)", "warning") + self._log(f" This may indicate bubble detector or inpainter issues", "warning") + + # Only consider successful if at least 50% of regions translated + # This prevents marking completely failed images as successful + translation_successful = output_exists and completion_rate >= 0.5 + else: + translation_successful = output_exists and has_translations + + if translation_successful: + self.completed_files += 1 + self._log(f"✅ Translation completed: {filename}", "success") + # Memory barrier: ensure resources are released before next completion + time.sleep(0.15) # Slightly longer pause for stability + self._log("💤 Panel completion pausing for resource cleanup", "debug") + else: + self.failed_files += 1 + # Log the specific reason for failure + if result.get('interrupted', False): + self._log(f"⚠️ Translation interrupted: {filename}", "warning") + elif not result.get('success', False): + self._log(f"❌ Translation failed: {filename}", "error") + elif not result.get('output_path') or not os.path.exists(result.get('output_path', '')): + self._log(f"❌ Output file not created: {filename}", "error") + else: + self._log(f"❌ No text was translated: {filename}", "error") + counters['failed'] += 1 + counters['done'] += 1 + self._update_progress(counters['done'], total, f"Completed {counters['done']}/{total}") + # End of completion_barrier block - resources now released for next panel + + return result.get('success', False) + except Exception as e: + with progress_lock: + # Don't update error counters if stopped + if not self.stop_flag.is_set(): + self.failed_files += 1 + counters['failed'] += 1 + counters['done'] += 1 + if not self.stop_flag.is_set(): + self._log(f"❌ Error in panel task: {str(e)}", "error") + self._log(traceback.format_exc(), "error") + return False + finally: + # CRITICAL: Always cleanup translator resources, even on error + # This prevents resource leaks and ensures proper cleanup in parallel mode + try: + if translator: + # Return checked-out inpainter to pool for reuse + if hasattr(translator, '_return_inpainter_to_pool'): + translator._return_inpainter_to_pool() + # Return bubble detector to pool for reuse + if hasattr(translator, '_return_bubble_detector_to_pool'): + translator._return_bubble_detector_to_pool() + # Force cleanup of all models and caches + if hasattr(translator, 'clear_internal_state'): + translator.clear_internal_state() + # Clear any remaining references + translator = None + except Exception: + pass # Never let cleanup fail the finally block + + with concurrent.futures.ThreadPoolExecutor(max_workers=max(1, effective_workers)) as executor: + futures = [] + stagger_ms = int(advanced.get('panel_start_stagger_ms', 30)) + for idx, filepath in enumerate(self.selected_files): + if self.stop_flag.is_set(): + break + futures.append(executor.submit(process_single, idx, filepath)) + if stagger_ms > 0: + time.sleep(stagger_ms / 1000.0) + time.sleep(0.1) # Brief pause for stability + self._log("💤 Staggered submission pausing briefly for stability", "debug") + + # Handle completion and stop behavior + try: + for f in concurrent.futures.as_completed(futures): + if self.stop_flag.is_set(): + # More aggressive cancellation + for rem in futures: + rem.cancel() + # Try to shutdown executor immediately + try: + executor.shutdown(wait=False) + except Exception: + pass + break + try: + # Consume future result to let it raise exceptions or return + f.result(timeout=0.1) # Very short timeout + except Exception: + # Ignore; counters are updated inside process_single + pass + except Exception: + # If as_completed fails due to shutdown, that's ok + pass + + # If stopped during parallel processing, do not log panel completion + if self.stop_flag.is_set(): + pass + else: + # After parallel processing, skip sequential loop + pass + + # After parallel processing, skip sequential loop + + # Finalize CBZ packaging after parallel mode finishes + try: + self._finalize_cbz_jobs() + except Exception: + pass + + else: + # Sequential processing (or panel parallel requested but capped to 1 by global setting) + for index, filepath in enumerate(self.selected_files): + if self.stop_flag.is_set(): + self._log("\n⏹️ Translation stopped by user", "warning") + break + + # IMPORTANT: Reset translator state for each new image + if hasattr(self.translator, 'reset_for_new_image'): + self.translator.reset_for_new_image() + + self.current_file_index = index + filename = os.path.basename(filepath) + + self._update_current_file(filename) + self._update_progress( + index, + self.total_files, + f"Processing {index + 1}/{self.total_files}: {filename}" + ) + + try: + # Determine output path (route CBZ images to job out_dir) + job_output_path = None + try: + if hasattr(self, 'cbz_image_to_job') and filepath in self.cbz_image_to_job: + cbz_file = self.cbz_image_to_job[filepath] + job = getattr(self, 'cbz_jobs', {}).get(cbz_file) + if job: + output_dir = job.get('out_dir') + os.makedirs(output_dir, exist_ok=True) + job_output_path = os.path.join(output_dir, filename) + except Exception: + job_output_path = None + if job_output_path: + output_path = job_output_path + else: + if self.create_subfolder_value: + output_dir = os.path.join(os.path.dirname(filepath), 'translated') + os.makedirs(output_dir, exist_ok=True) + output_path = os.path.join(output_dir, filename) + else: + base, ext = os.path.splitext(filepath) + output_path = f"{base}_translated{ext}" + + # Process the image + result = self.translator.process_image(filepath, output_path) + + # Check if translation was interrupted + if result.get('interrupted', False): + self._log(f"⏸️ Translation of {filename} was interrupted", "warning") + self.failed_files += 1 + if self.stop_flag.is_set(): + break + elif result.get('success', False): + # Verify translation actually produced valid output + output_exists = result.get('output_path') and os.path.exists(result.get('output_path', '')) + has_translations = any(r.get('translated_text', '') for r in result.get('regions', [])) + + if output_exists and has_translations: + self.completed_files += 1 + self._log(f"✅ Translation completed: {filename}", "success") + time.sleep(0.1) # Brief pause for stability + self._log("💤 Sequential completion pausing briefly for stability", "debug") + else: + self.failed_files += 1 + if not output_exists: + self._log(f"❌ Output file not created: {filename}", "error") + else: + self._log(f"❌ No text was translated: {filename}", "error") + else: + self.failed_files += 1 + errors = '\n'.join(result.get('errors', ['Unknown error'])) + self._log(f"❌ Translation failed: {filename}\n{errors}", "error") + + # Check for specific error types in the error messages + errors_lower = errors.lower() + if '429' in errors or 'rate limit' in errors_lower: + self._log(f"⚠️ RATE LIMIT DETECTED - Please wait before continuing", "error") + self._log(f" The API provider is limiting your requests", "error") + self._log(f" Consider increasing delay between requests in settings", "error") + + # Optionally pause for a bit + self._log(f" Pausing for 60 seconds...", "warning") + for sec in range(60): + if self.stop_flag.is_set(): + break + time.sleep(1) + if sec % 10 == 0: + self._log(f" Waiting... {60-sec} seconds remaining", "warning") + + except Exception as e: + self.failed_files += 1 + error_str = str(e) + error_type = type(e).__name__ + + self._log(f"❌ Error processing {filename}:", "error") + self._log(f" Error type: {error_type}", "error") + self._log(f" Details: {error_str}", "error") + + # Check for specific API errors + if "429" in error_str or "rate limit" in error_str.lower(): + self._log(f"⚠️ RATE LIMIT ERROR (429) - API is throttling requests", "error") + self._log(f" Please wait before continuing or reduce request frequency", "error") + self._log(f" Consider increasing the API delay in settings", "error") + + # Pause for rate limit + self._log(f" Pausing for 60 seconds...", "warning") + for sec in range(60): + if self.stop_flag.is_set(): + break + time.sleep(1) + if sec % 10 == 0: + self._log(f" Waiting... {60-sec} seconds remaining", "warning") + + elif "401" in error_str or "unauthorized" in error_str.lower(): + self._log(f"❌ AUTHENTICATION ERROR (401) - Check your API key", "error") + self._log(f" The API key appears to be invalid or expired", "error") + + elif "403" in error_str or "forbidden" in error_str.lower(): + self._log(f"❌ FORBIDDEN ERROR (403) - Access denied", "error") + self._log(f" Check your API subscription and permissions", "error") + + elif "timeout" in error_str.lower(): + self._log(f"⏱️ TIMEOUT ERROR - Request took too long", "error") + self._log(f" Consider increasing timeout settings", "error") + + else: + # Generic error with full traceback + self._log(f" Full traceback:", "error") + self._log(traceback.format_exc(), "error") + + + # Finalize CBZ packaging (both modes) + try: + self._finalize_cbz_jobs() + except Exception: + pass + + # Final summary - only if not stopped + if not self.stop_flag.is_set(): + self._log(f"\n{'='*60}", "info") + self._log(f"📊 Translation Summary:", "info") + self._log(f" Total files: {self.total_files}", "info") + self._log(f" ✅ Successful: {self.completed_files}", "success") + self._log(f" ❌ Failed: {self.failed_files}", "error" if self.failed_files > 0 else "info") + self._log(f"{'='*60}\n", "info") + + self._update_progress( + self.total_files, + self.total_files, + f"Complete! {self.completed_files} successful, {self.failed_files} failed" + ) + + except Exception as e: + self._log(f"\n❌ Translation error: {str(e)}", "error") + self._log(traceback.format_exc(), "error") + + finally: + # Check if auto cleanup is enabled in settings + auto_cleanup_enabled = False # Default disabled by default + try: + advanced_settings = self.main_gui.config.get('manga_settings', {}).get('advanced', {}) + auto_cleanup_enabled = advanced_settings.get('auto_cleanup_models', False) + except Exception: + pass + + if auto_cleanup_enabled: + # Clean up all models to free RAM + try: + # For parallel panel translation, cleanup happens here after ALL panels complete + is_parallel_panel = False + try: + advanced_settings = self.main_gui.config.get('manga_settings', {}).get('advanced', {}) + is_parallel_panel = advanced_settings.get('parallel_panel_translation', True) + except Exception: + pass + + # Skip the "all parallel panels complete" message if stopped + if is_parallel_panel and not self.stop_flag.is_set(): + self._log("\n🧹 All parallel panels complete - cleaning up models to free RAM...", "info") + elif not is_parallel_panel: + self._log("\n🧹 Cleaning up models to free RAM...", "info") + + # Clean up the shared translator if parallel processing was used + if 'translator' in locals(): + translator.cleanup_all_models() + self._log("✅ Shared translator models cleaned up!", "info") + + # Also clean up the instance translator if it exists + if hasattr(self, 'translator') and self.translator: + self.translator.cleanup_all_models() + # Set to None to ensure it's released + self.translator = None + self._log("✅ Instance translator models cleaned up!", "info") + + self._log("✅ All models cleaned up - RAM freed!", "info") + + except Exception as e: + self._log(f"⚠️ Warning: Model cleanup failed: {e}", "warning") + + # Force garbage collection to ensure memory is freed + try: + import gc + gc.collect() + except Exception: + pass + else: + # Only log if not stopped + if not self.stop_flag.is_set(): + self._log("🔑 Auto cleanup disabled - models will remain in RAM for faster subsequent translations", "info") + + # IMPORTANT: Reset the entire translator instance to free ALL memory + # Controlled by a separate "Unload models after translation" toggle + try: + # Check if we should reset the translator instance + reset_translator = False # default disabled + try: + advanced_settings = self.main_gui.config.get('manga_settings', {}).get('advanced', {}) + reset_translator = bool(advanced_settings.get('unload_models_after_translation', False)) + except Exception: + reset_translator = False + + if reset_translator: + self._log("\n🗑️ Resetting translator instance to free all memory...", "info") + + # Clear the instance translator completely + if hasattr(self, 'translator'): + # First ensure models are cleaned if not already done + try: + if self.translator and hasattr(self.translator, 'cleanup_all_models'): + self.translator.cleanup_all_models() + except Exception: + pass + + # Clear all internal state using the dedicated method + try: + if self.translator and hasattr(self.translator, 'clear_internal_state'): + self.translator.clear_internal_state() + except Exception: + pass + + # Clear remaining references with proper cleanup + try: + if self.translator: + # Properly unload OCR manager and all its providers + if hasattr(self.translator, 'ocr_manager') and self.translator.ocr_manager: + try: + ocr_manager = self.translator.ocr_manager + # Clear all loaded OCR providers + if hasattr(ocr_manager, 'providers'): + for provider_name, provider in ocr_manager.providers.items(): + # Unload each provider's models + if hasattr(provider, 'model'): + provider.model = None + if hasattr(provider, 'processor'): + provider.processor = None + if hasattr(provider, 'tokenizer'): + provider.tokenizer = None + if hasattr(provider, 'reader'): + provider.reader = None + if hasattr(provider, 'is_loaded'): + provider.is_loaded = False + self._log(f" ✓ Unloaded OCR provider: {provider_name}", "debug") + ocr_manager.providers.clear() + self._log(" ✓ OCR manager fully unloaded", "debug") + except Exception as e: + self._log(f" Warning: OCR manager cleanup failed: {e}", "debug") + finally: + self.translator.ocr_manager = None + + # Properly unload local inpainter + if hasattr(self.translator, 'local_inpainter') and self.translator.local_inpainter: + try: + if hasattr(self.translator.local_inpainter, 'unload'): + self.translator.local_inpainter.unload() + self._log(" ✓ Local inpainter unloaded", "debug") + except Exception as e: + self._log(f" Warning: Local inpainter cleanup failed: {e}", "debug") + finally: + self.translator.local_inpainter = None + + # Properly unload bubble detector + if hasattr(self.translator, 'bubble_detector') and self.translator.bubble_detector: + try: + if hasattr(self.translator.bubble_detector, 'unload'): + self.translator.bubble_detector.unload(release_shared=True) + self._log(" ✓ Bubble detector unloaded", "debug") + except Exception as e: + self._log(f" Warning: Bubble detector cleanup failed: {e}", "debug") + finally: + self.translator.bubble_detector = None + + # Clear API clients + if hasattr(self.translator, 'client'): + self.translator.client = None + if hasattr(self.translator, 'vision_client'): + self.translator.vision_client = None + except Exception: + pass + + # Call translator shutdown to free all resources + try: + if translator and hasattr(translator, 'shutdown'): + translator.shutdown() + except Exception: + pass + # Finally, delete the translator instance entirely + self.translator = None + self._log("✅ Translator instance reset - all memory freed!", "info") + + # Also clear the shared translator from parallel processing if it exists + if 'translator' in locals(): + try: + # Clear internal references + if hasattr(translator, 'cache'): + translator.cache = None + if hasattr(translator, 'text_regions'): + translator.text_regions = None + if hasattr(translator, 'translated_regions'): + translator.translated_regions = None + # Delete the local reference + del translator + except Exception: + pass + + # Clear standalone OCR manager if it exists in manga_integration + if hasattr(self, 'ocr_manager') and self.ocr_manager: + try: + ocr_manager = self.ocr_manager + # Clear all loaded OCR providers + if hasattr(ocr_manager, 'providers'): + for provider_name, provider in ocr_manager.providers.items(): + # Unload each provider's models + if hasattr(provider, 'model'): + provider.model = None + if hasattr(provider, 'processor'): + provider.processor = None + if hasattr(provider, 'tokenizer'): + provider.tokenizer = None + if hasattr(provider, 'reader'): + provider.reader = None + if hasattr(provider, 'is_loaded'): + provider.is_loaded = False + ocr_manager.providers.clear() + self.ocr_manager = None + self._log(" ✓ Standalone OCR manager cleared", "debug") + except Exception as e: + self._log(f" Warning: Standalone OCR manager cleanup failed: {e}", "debug") + + # Force multiple garbage collection passes to ensure everything is freed + try: + import gc + gc.collect() + gc.collect() # Multiple passes for stubborn references + gc.collect() + self._log("✅ Memory fully reclaimed", "debug") + except Exception: + pass + else: + # Only log if not stopped + if not self.stop_flag.is_set(): + self._log("🔑 Translator instance preserved for faster subsequent translations", "debug") + + except Exception as e: + self._log(f"⚠️ Warning: Failed to reset translator instance: {e}", "warning") + + # Reset UI state (PySide6 - call directly) + try: + self._reset_ui_state() + except Exception as e: + self._log(f"Error resetting UI: {e}", "warning") + + def _stop_translation(self): + """Stop the translation process""" + if self.is_running: + # Set local stop flag + self.stop_flag.set() + + # Set global cancellation flags for coordinated stopping + self.set_global_cancellation(True) + + # Also propagate to MangaTranslator class + try: + from manga_translator import MangaTranslator + MangaTranslator.set_global_cancellation(True) + except ImportError: + pass + + # Also propagate to UnifiedClient if available + try: + from unified_api_client import UnifiedClient + UnifiedClient.set_global_cancellation(True) + except ImportError: + pass + + # Update progress to show stopped status + self._update_progress( + self.completed_files, + self.total_files, + f"Stopped - {self.completed_files}/{self.total_files} completed" + ) + + # Try to style the progress bar to indicate stopped status + try: + # Set progress bar to a distinctive value and try to change appearance + if hasattr(self, 'progress_bar'): + # You could also set a custom style here if needed + # For now, we'll rely on the text indicators + pass + except Exception: + pass + + # Update current file display to show stopped + self._update_current_file("Translation stopped") + + # Kick off immediate resource shutdown to free RAM + try: + tr = getattr(self, 'translator', None) + if tr and hasattr(tr, 'shutdown'): + import threading + threading.Thread(target=tr.shutdown, name="MangaTranslatorShutdown", daemon=True).start() + self._log("🧹 Initiated translator resource shutdown", "info") + # Important: clear the stale translator reference so the next Start creates a fresh instance + self.translator = None + except Exception as e: + self._log(f"⚠️ Failed to start shutdown: {e}", "warning") + + # Immediately reset UI state to re-enable start button + self._reset_ui_state() + self._log("\n⏹️ Translation stopped by user", "warning") + + def _reset_ui_state(self): + """Reset UI to ready state - with widget existence checks (PySide6)""" + # Restore stdio redirection if active + self._redirect_stderr(False) + self._redirect_stdout(False) + # Stop any startup heartbeat if still running + try: + self._stop_startup_heartbeat() + except Exception: + pass + try: + # Check if the dialog still exists (PySide6) + if not hasattr(self, 'dialog') or not self.dialog: + return + + # Reset running flag + self.is_running = False + + # Check and update start_button if it exists (PySide6) + if hasattr(self, 'start_button') and self.start_button: + if not self.start_button.isEnabled(): + self.start_button.setEnabled(True) + + # Check and update stop_button if it exists (PySide6) + if hasattr(self, 'stop_button') and self.stop_button: + if self.stop_button.isEnabled(): + self.stop_button.setEnabled(False) + + # Re-enable file modification - check if listbox exists (PySide6) + if hasattr(self, 'file_listbox') and self.file_listbox: + if not self.file_listbox.isEnabled(): + self.file_listbox.setEnabled(True) + + except Exception as e: + # Log the error but don't crash + if hasattr(self, '_log'): + self._log(f"Error resetting UI state: {str(e)}", "warning") diff --git a/manga_settings_dialog.py b/manga_settings_dialog.py new file mode 100644 index 0000000000000000000000000000000000000000..5f630c5ded20bc902293d80b93471fa4ea86ac8f --- /dev/null +++ b/manga_settings_dialog.py @@ -0,0 +1,4412 @@ +# manga_settings_dialog.py +""" +Enhanced settings dialog for manga translation with all settings visible +Properly integrated with TranslatorGUI's WindowManager and UIHelper +""" + +import os +import json +from PySide6.QtWidgets import (QDialog, QVBoxLayout, QHBoxLayout, QGridLayout, + QLabel, QPushButton, QCheckBox, QSpinBox, QDoubleSpinBox, + QSlider, QComboBox, QLineEdit, QGroupBox, QTabWidget, + QWidget, QScrollArea, QFrame, QRadioButton, QButtonGroup, + QMessageBox, QFileDialog, QSizePolicy, QApplication) +from PySide6.QtCore import Qt, Signal, QTimer, QEvent, QObject +from PySide6.QtGui import QFont, QIcon +from typing import Dict, Any, Optional, Callable +from bubble_detector import BubbleDetector +import logging +import time +import copy + +# Use the same logging infrastructure initialized by translator_gui +logger = logging.getLogger(__name__) + +class MangaSettingsDialog(QDialog): + """Settings dialog for manga translation""" + + def __init__(self, parent, main_gui, config: Dict[str, Any], callback: Optional[Callable] = None): + """Initialize settings dialog + + Args: + parent: Parent window (should be QWidget or None) + main_gui: Reference to TranslatorGUI instance + config: Configuration dictionary + callback: Function to call after saving + """ + # Ensure parent is a QWidget or None for proper PySide6 initialization + if parent is not None and not hasattr(parent, 'windowTitle'): + # If parent is not a QWidget, use None + parent = None + super().__init__(parent) + self.parent = parent + self.main_gui = main_gui + self.config = config + self.callback = callback + + # Make dialog non-modal so it doesn't block the manga integration GUI + self.setModal(False) + + # Enhanced default settings structure with all options + self.default_settings = { + 'preprocessing': { + 'enabled': False, + 'auto_detect_quality': True, + 'contrast_threshold': 0.4, + 'sharpness_threshold': 0.3, + 'noise_threshold': 20, + 'enhancement_strength': 1.5, + 'denoise_strength': 10, + 'max_image_dimension': 2000, + 'max_image_pixels': 2000000, + 'chunk_height': 2000, + 'chunk_overlap': 100, + # Inpainting tiling + 'inpaint_tiling_enabled': False, # Off by default + 'inpaint_tile_size': 512, # Default tile size + 'inpaint_tile_overlap': 64 # Overlap to avoid seams + }, + 'compression': { + 'enabled': False, + 'format': 'jpeg', + 'jpeg_quality': 85, + 'png_compress_level': 6, + 'webp_quality': 85 + }, + 'ocr': { + 'language_hints': ['ja', 'ko', 'zh'], + 'confidence_threshold': 0.7, + 'merge_nearby_threshold': 20, + 'azure_merge_multiplier': 3.0, + 'text_detection_mode': 'document', + 'enable_rotation_correction': True, + 'bubble_detection_enabled': True, + 'roi_locality_enabled': False, + 'bubble_model_path': '', + 'bubble_confidence': 0.3, + 'detector_type': 'rtdetr_onnx', + 'rtdetr_confidence': 0.3, + 'detect_empty_bubbles': True, + 'detect_text_bubbles': True, + 'detect_free_text': True, + 'rtdetr_model_url': '', + 'use_rtdetr_for_ocr_regions': True, + 'azure_reading_order': 'natural', + 'azure_model_version': 'latest', + 'azure_max_wait': 60, + 'azure_poll_interval': 0.5, + 'min_text_length': 0, + 'exclude_english_text': False, + 'english_exclude_threshold': 0.7, + 'english_exclude_min_chars': 4, + 'english_exclude_short_tokens': False + }, + 'advanced': { + 'format_detection': True, + 'webtoon_mode': 'auto', + 'debug_mode': False, + 'save_intermediate': False, + 'parallel_processing': True, + 'max_workers': 2, + 'parallel_panel_translation': False, + 'panel_max_workers': 2, + 'use_singleton_models': False, + 'auto_cleanup_models': False, + 'unload_models_after_translation': False, + 'auto_convert_to_onnx': False, # Disabled by default + 'auto_convert_to_onnx_background': True, + 'quantize_models': False, + 'onnx_quantize': False, + 'torch_precision': 'fp16', + # HD strategy defaults (mirrors comic-translate) + 'hd_strategy': 'resize', # 'original' | 'resize' | 'crop' + 'hd_strategy_resize_limit': 1536, # long-edge cap for resize + 'hd_strategy_crop_margin': 16, # pixels padding around cropped ROIs + 'hd_strategy_crop_trigger_size': 1024, # only crop if long edge exceeds this + # RAM cap defaults + 'ram_cap_enabled': False, + 'ram_cap_mb': 4096, + 'ram_cap_mode': 'soft', + 'ram_gate_timeout_sec': 15.0, + 'ram_min_floor_over_baseline_mb': 256 + }, + 'inpainting': { + 'batch_size': 10, + 'enable_cache': True, + 'method': 'local', + 'local_method': 'anime' + }, + 'font_sizing': { + 'algorithm': 'smart', # 'smart', 'conservative', 'aggressive' + 'prefer_larger': True, # Prefer larger readable text + 'max_lines': 10, # Maximum lines before forcing smaller + 'line_spacing': 1.3, # Line height multiplier + 'bubble_size_factor': True # Scale font based on bubble size + }, + + # Mask dilation settings with new iteration controls + 'mask_dilation': 0, + 'use_all_iterations': True, # Master control - use same for all by default + 'all_iterations': 2, # Value when using same for all + 'text_bubble_dilation_iterations': 2, # Text-filled speech bubbles + 'empty_bubble_dilation_iterations': 3, # Empty speech bubbles + 'free_text_dilation_iterations': 0, # Free text (0 for clean B&W) + 'bubble_dilation_iterations': 2, # Legacy support + 'dilation_iterations': 2, # Legacy support + + # Cloud inpainting settings + 'cloud_inpaint_model': 'ideogram-v2', + 'cloud_custom_version': '', + 'cloud_inpaint_prompt': 'clean background, smooth surface', + 'cloud_negative_prompt': 'text, writing, letters', + 'cloud_inference_steps': 20, + 'cloud_timeout': 60 + } + + # Merge with existing config + self.settings = self._merge_settings(config.get('manga_settings', {})) + + # Show dialog + self.show_dialog() + + def _create_styled_checkbox(self, text): + """Create a checkbox with proper checkmark using text overlay (same as manga_integration.py)""" + checkbox = QCheckBox(text) + checkbox.setStyleSheet(""" + QCheckBox { + color: white; + spacing: 6px; + } + QCheckBox::indicator { + width: 14px; + height: 14px; + border: 1px solid #5a9fd4; + border-radius: 2px; + background-color: #2d2d2d; + } + QCheckBox::indicator:checked { + background-color: #5a9fd4; + border-color: #5a9fd4; + } + QCheckBox::indicator:hover { + border-color: #7bb3e0; + } + QCheckBox:disabled { + color: #666666; + } + QCheckBox::indicator:disabled { + background-color: #1a1a1a; + border-color: #3a3a3a; + } + """) + + # Create checkmark overlay + checkmark = QLabel("✓", checkbox) + checkmark.setStyleSheet(""" + QLabel { + color: white; + background: transparent; + font-weight: bold; + font-size: 11px; + } + """) + checkmark.setAlignment(Qt.AlignCenter) + checkmark.hide() + checkmark.setAttribute(Qt.WA_TransparentForMouseEvents) # Make checkmark click-through + + # Position checkmark properly after widget is shown + def position_checkmark(): + # Position over the checkbox indicator + checkmark.setGeometry(2, 1, 14, 14) + + # Show/hide checkmark based on checked state + def update_checkmark(): + if checkbox.isChecked(): + position_checkmark() + checkmark.show() + else: + checkmark.hide() + + checkbox.stateChanged.connect(update_checkmark) + # Delay initial positioning to ensure widget is properly rendered + QTimer.singleShot(0, lambda: (position_checkmark(), update_checkmark())) + + return checkbox + + def _disable_spinbox_scroll(self, widget): + """Disable mouse wheel scrolling on a spinbox, combobox, or slider (PySide6 version)""" + # Install event filter to block wheel events + class WheelEventFilter(QObject): + def eventFilter(self, obj, event): + if event.type() == QEvent.Wheel: + event.ignore() + return True + return False + + filter_obj = WheelEventFilter(widget) # Parent it to the widget + widget.installEventFilter(filter_obj) + # Store the filter so it doesn't get garbage collected + if not hasattr(widget, '_wheel_filter'): + widget._wheel_filter = filter_obj + + def _disable_all_spinbox_scrolling(self, parent): + """Recursively find and disable scrolling on all spinboxes, comboboxes, and sliders (PySide6 version)""" + # Check if the parent itself is a spinbox, combobox, or slider + if isinstance(parent, (QSpinBox, QDoubleSpinBox, QComboBox, QSlider)): + self._disable_spinbox_scroll(parent) + + # Check all children recursively + if hasattr(parent, 'children'): + for child in parent.children(): + if isinstance(child, QWidget): + self._disable_all_spinbox_scrolling(child) + + def _create_font_size_controls(self, parent_layout): + """Create improved font size controls with presets""" + + # Font size frame + font_frame = QWidget() + font_layout = QHBoxLayout(font_frame) + font_layout.setContentsMargins(0, 0, 0, 0) + parent_layout.addWidget(font_frame) + + label = QLabel("Font Size:") + label.setMinimumWidth(150) + font_layout.addWidget(label) + + # Font size mode selection + mode_frame = QWidget() + mode_layout = QHBoxLayout(mode_frame) + mode_layout.setContentsMargins(0, 0, 0, 0) + font_layout.addWidget(mode_frame) + + # Radio buttons for mode - using QButtonGroup + self.font_size_mode_group = QButtonGroup() + self.font_size_mode = 'auto' # Store mode as string attribute + + modes = [ + ("Auto", "auto", "Automatically fit text to bubble size"), + ("Fixed", "fixed", "Use a specific font size"), + ("Scale", "scale", "Scale auto size by percentage") + ] + + for text, value, tooltip in modes: + rb = QRadioButton(text) + rb.setChecked(value == 'auto') + rb.setToolTip(tooltip) + rb.toggled.connect(lambda checked, v=value: self._on_font_mode_change(v) if checked else None) + mode_layout.addWidget(rb) + self.font_size_mode_group.addButton(rb) + + # Controls frame (changes based on mode) + self.font_controls_frame = QWidget() + self.font_controls_layout = QVBoxLayout(self.font_controls_frame) + self.font_controls_layout.setContentsMargins(20, 5, 0, 5) + parent_layout.addWidget(self.font_controls_frame) + + # Fixed size controls + self.fixed_size_frame = QWidget() + fixed_layout = QHBoxLayout(self.fixed_size_frame) + fixed_layout.setContentsMargins(0, 0, 0, 0) + fixed_layout.addWidget(QLabel("Size:")) + + self.fixed_font_size_spin = QSpinBox() + self.fixed_font_size_spin.setRange(8, 72) + self.fixed_font_size_spin.setValue(16) + self.fixed_font_size_spin.valueChanged.connect(self._save_rendering_settings) + fixed_layout.addWidget(self.fixed_font_size_spin) + + # Quick presets for fixed size + fixed_layout.addWidget(QLabel("Presets:")) + + presets = [ + ("Small", 12), + ("Medium", 16), + ("Large", 20), + ("XL", 24) + ] + + for text, size in presets: + btn = QPushButton(text) + btn.setMaximumWidth(60) + btn.clicked.connect(lambda checked, s=size: self._set_fixed_size(s)) + fixed_layout.addWidget(btn) + + fixed_layout.addStretch() + + # Scale controls + self.scale_frame = QWidget() + scale_layout = QHBoxLayout(self.scale_frame) + scale_layout.setContentsMargins(0, 0, 0, 0) + scale_layout.addWidget(QLabel("Scale:")) + + # QSlider uses integers, so we'll use 50-200 to represent 0.5-2.0 + self.font_scale_slider = QSlider(Qt.Horizontal) + self.font_scale_slider.setRange(50, 200) + self.font_scale_slider.setValue(100) + self.font_scale_slider.setMinimumWidth(200) + self.font_scale_slider.valueChanged.connect(self._update_scale_label) + scale_layout.addWidget(self.font_scale_slider) + + self.scale_label = QLabel("100%") + self.scale_label.setMinimumWidth(50) + scale_layout.addWidget(self.scale_label) + + # Quick scale presets + scale_layout.addWidget(QLabel("Quick:")) + + scale_presets = [ + ("75%", 0.75), + ("100%", 1.0), + ("125%", 1.25), + ("150%", 1.5) + ] + + for text, scale in scale_presets: + btn = QPushButton(text) + btn.setMaximumWidth(50) + btn.clicked.connect(lambda checked, s=scale: self._set_scale(s)) + scale_layout.addWidget(btn) + + scale_layout.addStretch() + + # Auto size settings + self.auto_frame = QWidget() + auto_layout = QVBoxLayout(self.auto_frame) + auto_layout.setContentsMargins(0, 0, 0, 0) + + # Min/Max size constraints for auto mode + constraints_frame = QWidget() + constraints_layout = QHBoxLayout(constraints_frame) + constraints_layout.setContentsMargins(0, 0, 0, 0) + auto_layout.addWidget(constraints_frame) + + constraints_layout.addWidget(QLabel("Size Range:")) + + constraints_layout.addWidget(QLabel("Min:")) + self.min_font_size_spin = QSpinBox() + self.min_font_size_spin.setRange(6, 20) + self.min_font_size_spin.setValue(10) + self.min_font_size_spin.valueChanged.connect(self._save_rendering_settings) + constraints_layout.addWidget(self.min_font_size_spin) + + constraints_layout.addWidget(QLabel("Max:")) + self.max_font_size_spin = QSpinBox() + self.max_font_size_spin.setRange(16, 48) + self.max_font_size_spin.setValue(28) + self.max_font_size_spin.valueChanged.connect(self._save_rendering_settings) + constraints_layout.addWidget(self.max_font_size_spin) + + constraints_layout.addStretch() + + # Auto fit quality + quality_frame = QWidget() + quality_layout = QHBoxLayout(quality_frame) + quality_layout.setContentsMargins(0, 0, 0, 0) + auto_layout.addWidget(quality_frame) + + quality_layout.addWidget(QLabel("Fit Style:")) + + self.auto_fit_style_group = QButtonGroup() + self.auto_fit_style = 'balanced' # Store as string attribute + + fit_styles = [ + ("Compact", "compact", "Fit more text, smaller size"), + ("Balanced", "balanced", "Balance readability and fit"), + ("Readable", "readable", "Prefer larger, more readable text") + ] + + for text, value, tooltip in fit_styles: + rb = QRadioButton(text) + rb.setChecked(value == 'balanced') + rb.setToolTip(tooltip) + rb.toggled.connect(lambda checked, v=value: self._on_fit_style_change(v) if checked else None) + quality_layout.addWidget(rb) + self.auto_fit_style_group.addButton(rb) + + quality_layout.addStretch() + + # Initialize the correct frame + self._on_font_mode_change('auto') + + def _on_font_mode_change(self, mode): + """Show/hide appropriate font controls based on mode""" + # Update the stored mode + self.font_size_mode = mode + + # Remove all frames from layout + self.font_controls_layout.removeWidget(self.fixed_size_frame) + self.font_controls_layout.removeWidget(self.scale_frame) + self.font_controls_layout.removeWidget(self.auto_frame) + self.fixed_size_frame.hide() + self.scale_frame.hide() + self.auto_frame.hide() + + # Show the appropriate frame + if mode == 'fixed': + self.font_controls_layout.addWidget(self.fixed_size_frame) + self.fixed_size_frame.show() + elif mode == 'scale': + self.font_controls_layout.addWidget(self.scale_frame) + self.scale_frame.show() + else: # auto + self.font_controls_layout.addWidget(self.auto_frame) + self.auto_frame.show() + + self._save_rendering_settings() + + def _set_fixed_size(self, size): + """Set fixed font size from preset""" + self.fixed_font_size_spin.setValue(size) + self._save_rendering_settings() + + def _set_scale(self, scale): + """Set font scale from preset""" + # Scale is 0.5-2.0, slider uses 50-200 + self.font_scale_slider.setValue(int(scale * 100)) + self._update_scale_label() + self._save_rendering_settings() + + def _update_scale_label(self): + """Update the scale percentage label""" + # Get value from slider (50-200) and convert to percentage + scale_value = self.font_scale_slider.value() + self.scale_label.setText(f"{scale_value}%") + self._save_rendering_settings() + + def _on_fit_style_change(self, style): + """Handle fit style change""" + self.auto_fit_style = style + self._save_rendering_settings() + + def _create_tooltip(self, widget, text): + """Create a tooltip for a widget - PySide6 version""" + # In PySide6, tooltips are much simpler - just set the toolTip property + widget.setToolTip(text) + + def _merge_settings(self, existing: Dict) -> Dict: + """Merge existing settings with defaults""" + result = self.default_settings.copy() + + def deep_merge(base: Dict, update: Dict) -> Dict: + for key, value in update.items(): + if key in base and isinstance(base[key], dict) and isinstance(value, dict): + base[key] = deep_merge(base[key], value) + else: + base[key] = value + return base + + return deep_merge(result, existing) + + def show_dialog(self): + """Display the settings dialog using PySide6""" + # Set initialization flag to prevent auto-saves during setup + self._initializing = True + + # Set dialog properties + self.setWindowTitle("Manga Translation Settings") + # Dialog is already non-modal from __init__, don't override it + + # Set the halgakos.ico icon + try: + icon_path = os.path.join(os.path.dirname(__file__), 'Halgakos.ico') + if os.path.exists(icon_path): + self.setWindowIcon(QIcon(icon_path)) + except Exception: + pass # Fail silently if icon can't be loaded + + # Apply overall dark theme styling + self.setStyleSheet(""" + QDialog { + background-color: #1e1e1e; + color: white; + font-family: Arial; + } + QGroupBox { + font-family: Arial; + font-size: 10pt; + font-weight: bold; + color: white; + border: 1px solid #555; + border-radius: 5px; + margin-top: 10px; + padding-top: 5px; + } + QGroupBox::title { + subcontrol-origin: margin; + left: 10px; + padding: 0 5px 0 5px; + } + QLabel { + color: white; + font-family: Arial; + font-size: 9pt; + } + QLineEdit { + background-color: #2d2d2d; + color: white; + border: 1px solid #555; + border-radius: 3px; + padding: 3px; + font-family: Arial; + font-size: 9pt; + } + QSpinBox, QDoubleSpinBox { + background-color: #2d2d2d; + color: white; + border: 1px solid #555; + border-radius: 3px; + padding: 3px; + font-family: Arial; + font-size: 9pt; + } + QComboBox { + background-color: #2d2d2d; + color: white; + border: 1px solid #555; + border-radius: 3px; + padding: 3px 5px; + padding-right: 25px; + font-family: Arial; + font-size: 9pt; + min-height: 20px; + } + QComboBox:hover { + border: 1px solid #7bb3e0; + } + QComboBox:focus { + border: 1px solid #5a9fd4; + } + QComboBox::drop-down { + subcontrol-origin: padding; + subcontrol-position: right center; + width: 20px; + border-left: 1px solid #555; + background-color: #3c3c3c; + border-top-right-radius: 3px; + border-bottom-right-radius: 3px; + } + QComboBox::drop-down:hover { + background-color: #4a4a4a; + } + QComboBox::down-arrow { + image: none; + border-left: 4px solid transparent; + border-right: 4px solid transparent; + border-top: 5px solid #aaa; + width: 0; + height: 0; + margin-right: 5px; + } + QComboBox::down-arrow:hover { + border-top: 5px solid #fff; + } + QComboBox QAbstractItemView { + background-color: #2d2d2d; + color: white; + selection-background-color: #5a9fd4; + selection-color: white; + border: 1px solid #555; + outline: none; + } + QPushButton { + font-family: Arial; + font-size: 9pt; + padding: 5px 15px; + border-radius: 3px; + border: none; + } + QSlider::groove:horizontal { + border: 1px solid #555; + height: 6px; + background: #2d2d2d; + border-radius: 3px; + } + QSlider::handle:horizontal { + background: #5a9fd4; + border: 1px solid #5a9fd4; + width: 18px; + border-radius: 9px; + margin: -6px 0; + } + QSlider::handle:horizontal:hover { + background: #7bb3e0; + border: 1px solid #7bb3e0; + } + QRadioButton { + color: white; + spacing: 6px; + font-family: Arial; + font-size: 9pt; + } + QRadioButton::indicator { + width: 16px; + height: 16px; + border: 2px solid #5a9fd4; + border-radius: 8px; + background-color: #2d2d2d; + } + QRadioButton::indicator:checked { + background-color: #5a9fd4; + border: 2px solid #5a9fd4; + } + QRadioButton::indicator:hover { + border-color: #7bb3e0; + } + QRadioButton:disabled { + color: #666666; + } + QRadioButton::indicator:disabled { + background-color: #1a1a1a; + border-color: #3a3a3a; + } + QCheckBox { + color: white; + spacing: 6px; + } + QCheckBox::indicator { + width: 14px; + height: 14px; + border: 1px solid #5a9fd4; + border-radius: 2px; + background-color: #2d2d2d; + } + QCheckBox::indicator:checked { + background-color: #5a9fd4; + border-color: #5a9fd4; + } + QCheckBox::indicator:hover { + border-color: #7bb3e0; + } + QCheckBox:disabled { + color: #666666; + } + QCheckBox::indicator:disabled { + background-color: #1a1a1a; + border-color: #3a3a3a; + } + QLineEdit:disabled, QComboBox:disabled, QSpinBox:disabled, QDoubleSpinBox:disabled { + background-color: #1a1a1a; + color: #666666; + border: 1px solid #3a3a3a; + } + QLabel:disabled { + color: #666666; + } + QScrollArea { + background-color: #1e1e1e; + border: none; + } + QWidget { + background-color: #1e1e1e; + color: white; + } + """) + + # Calculate size based on screen dimensions + # Use availableGeometry to exclude taskbar and other system UI + app = QApplication.instance() + if app: + screen = app.primaryScreen().availableGeometry() + else: + screen = self.parent.screen().availableGeometry() if self.parent else self.screen().availableGeometry() + + dialog_width = min(800, int(screen.width() * 0.5)) + dialog_height = int(screen.height() * 0.90) # Use 90% of available height for more screen space with safety margin + self.resize(dialog_width, dialog_height) + + # Center the dialog within available screen space + dialog_x = screen.x() + (screen.width() - dialog_width) // 2 + dialog_y = screen.y() + (screen.height() - dialog_height) // 2 + self.move(dialog_x, dialog_y) + + # Create main layout + main_layout = QVBoxLayout(self) + main_layout.setContentsMargins(10, 10, 10, 10) + main_layout.setSpacing(10) + + # Create scroll area for the content + scroll_area = QScrollArea() + scroll_area.setWidgetResizable(True) + scroll_area.setHorizontalScrollBarPolicy(Qt.ScrollBarAsNeeded) + scroll_area.setVerticalScrollBarPolicy(Qt.ScrollBarAsNeeded) + + # Create content widget that will go inside scroll area + content_widget = QWidget() + content_layout = QVBoxLayout(content_widget) + content_layout.setContentsMargins(5, 5, 5, 5) + + # Create tab widget with enhanced styling + self.tab_widget = QTabWidget() + self.tab_widget.setStyleSheet(""" + QTabWidget::pane { + border: 1px solid #555; + background-color: #2b2b2b; + } + QTabBar::tab { + background-color: #3c3c3c; + color: #cccccc; + border: 1px solid #555; + border-bottom: none; + padding: 8px 16px; + margin-right: 2px; + font-family: Arial; + font-size: 10pt; + font-weight: bold; + } + QTabBar::tab:selected { + background-color: #5a9fd4; + color: white; + border-color: #7bb3e0; + margin-bottom: -1px; + } + QTabBar::tab:hover:!selected { + background-color: #4a4a4a; + color: white; + border-color: #7bb3e0; + } + QTabBar::tab:first { + margin-left: 0; + } + """) + content_layout.addWidget(self.tab_widget) + + # Create all tabs + self._create_preprocessing_tab() + self._create_ocr_tab() + self._create_inpainting_tab() + self._create_advanced_tab() + self._create_cloud_api_tab() + # NOTE: Font Sizing tab removed; controls are now in Manga Integration UI + + # Set content widget in scroll area + scroll_area.setWidget(content_widget) + main_layout.addWidget(scroll_area) + + # Create button frame at bottom + button_frame = QWidget() + button_layout = QHBoxLayout(button_frame) + button_layout.setContentsMargins(0, 5, 0, 0) + + # Reset button on left + reset_button = QPushButton("Reset to Defaults") + reset_button.clicked.connect(self._reset_defaults) + reset_button.setMinimumWidth(120) + reset_button.setMinimumHeight(32) + reset_button.setStyleSheet(""" + QPushButton { + background-color: #ffc107; + color: #1a1a1a; + font-weight: bold; + font-size: 10pt; + border: none; + border-radius: 4px; + padding: 6px 16px; + } + QPushButton:hover { + background-color: #ffcd38; + } + QPushButton:pressed { + background-color: #e0a800; + } + """) + button_layout.addWidget(reset_button) + + button_layout.addStretch() # Push other buttons to the right + + # Cancel and Save buttons on right + cancel_button = QPushButton("Cancel") + cancel_button.clicked.connect(self._cancel) + cancel_button.setMinimumWidth(90) + cancel_button.setMinimumHeight(32) + cancel_button.setStyleSheet(""" + QPushButton { + background-color: #6c757d; + color: white; + font-weight: bold; + font-size: 10pt; + border: none; + border-radius: 4px; + padding: 6px 16px; + } + QPushButton:hover { + background-color: #7d8a96; + } + QPushButton:pressed { + background-color: #5a6268; + } + """) + button_layout.addWidget(cancel_button) + + save_button = QPushButton("Save") + save_button.clicked.connect(self._save_settings) + save_button.setDefault(True) # Make it the default button + save_button.setMinimumWidth(90) + save_button.setMinimumHeight(32) + save_button.setStyleSheet(""" + QPushButton { + background-color: #28a745; + color: white; + font-weight: bold; + font-size: 10pt; + border: none; + border-radius: 4px; + padding: 6px 16px; + } + QPushButton:hover { + background-color: #34c759; + } + QPushButton:pressed { + background-color: #218838; + } + """) + button_layout.addWidget(save_button) + + main_layout.addWidget(button_frame) + + # Clear initialization flag after setup is complete + self._initializing = False + + # Initialize preprocessing state + self._toggle_preprocessing() + + # Initialize tiling controls state (must be after widgets are created) + try: + self._toggle_tiling_controls() + except Exception as e: + print(f"Warning: Failed to initialize tiling controls: {e}") + + # Initialize iteration controls state + try: + self._toggle_iteration_controls() + except Exception: + pass + + # Disable mouse wheel scrolling on all spinboxes and comboboxes + self._disable_all_spinbox_scrolling(self) + + # Show the dialog + self.show() + + def _create_preprocessing_tab(self): + """Create preprocessing settings tab with all options""" + # Create tab widget and add to tab widget + tab_widget = QWidget() + self.tab_widget.addTab(tab_widget, "Preprocessing") + + # Main scrollable content + main_layout = QVBoxLayout(tab_widget) + main_layout.setContentsMargins(5, 5, 5, 5) + main_layout.setSpacing(6) + + # Enable preprocessing group + enable_group = QGroupBox("Image Preprocessing") + main_layout.addWidget(enable_group) + enable_layout = QVBoxLayout(enable_group) + enable_layout.setContentsMargins(8, 8, 8, 6) + enable_layout.setSpacing(4) + + self.preprocess_enabled = self._create_styled_checkbox("Enable Image Preprocessing") + self.preprocess_enabled.setChecked(self.settings['preprocessing']['enabled']) + self.preprocess_enabled.toggled.connect(self._toggle_preprocessing) + enable_layout.addWidget(self.preprocess_enabled) + + # Store all preprocessing controls for enable/disable + self.preprocessing_controls = [] + + # Auto quality detection + self.auto_detect = self._create_styled_checkbox("Auto-detect image quality issues") + self.auto_detect.setChecked(self.settings['preprocessing']['auto_detect_quality']) + enable_layout.addWidget(self.auto_detect) + self.preprocessing_controls.append(self.auto_detect) + + # Quality thresholds section + threshold_group = QGroupBox("Image Enhancement") + main_layout.addWidget(threshold_group) + threshold_layout = QVBoxLayout(threshold_group) + threshold_layout.setContentsMargins(8, 8, 8, 6) + threshold_layout.setSpacing(4) + self.preprocessing_controls.append(threshold_group) + + # Contrast threshold + contrast_frame = QWidget() + contrast_layout = QHBoxLayout(contrast_frame) + contrast_layout.setContentsMargins(0, 0, 0, 0) + threshold_layout.addWidget(contrast_frame) + + contrast_label = QLabel("Contrast Adjustment:") + contrast_label.setMinimumWidth(150) + contrast_layout.addWidget(contrast_label) + self.preprocessing_controls.append(contrast_label) + + self.contrast_threshold = QDoubleSpinBox() + self.contrast_threshold.setRange(0.0, 1.0) + self.contrast_threshold.setSingleStep(0.01) + self.contrast_threshold.setDecimals(2) + self.contrast_threshold.setValue(self.settings['preprocessing']['contrast_threshold']) + contrast_layout.addWidget(self.contrast_threshold) + self.preprocessing_controls.append(self.contrast_threshold) + contrast_layout.addStretch() + + # Sharpness threshold + sharpness_frame = QWidget() + sharpness_layout = QHBoxLayout(sharpness_frame) + sharpness_layout.setContentsMargins(0, 0, 0, 0) + threshold_layout.addWidget(sharpness_frame) + + sharpness_label = QLabel("Sharpness Enhancement:") + sharpness_label.setMinimumWidth(150) + sharpness_layout.addWidget(sharpness_label) + self.preprocessing_controls.append(sharpness_label) + + self.sharpness_threshold = QDoubleSpinBox() + self.sharpness_threshold.setRange(0.0, 1.0) + self.sharpness_threshold.setSingleStep(0.01) + self.sharpness_threshold.setDecimals(2) + self.sharpness_threshold.setValue(self.settings['preprocessing']['sharpness_threshold']) + sharpness_layout.addWidget(self.sharpness_threshold) + self.preprocessing_controls.append(self.sharpness_threshold) + sharpness_layout.addStretch() + + # Enhancement strength + enhance_frame = QWidget() + enhance_layout = QHBoxLayout(enhance_frame) + enhance_layout.setContentsMargins(0, 0, 0, 0) + threshold_layout.addWidget(enhance_frame) + + enhance_label = QLabel("Overall Enhancement:") + enhance_label.setMinimumWidth(150) + enhance_layout.addWidget(enhance_label) + self.preprocessing_controls.append(enhance_label) + + self.enhancement_strength = QDoubleSpinBox() + self.enhancement_strength.setRange(0.0, 3.0) + self.enhancement_strength.setSingleStep(0.01) + self.enhancement_strength.setDecimals(2) + self.enhancement_strength.setValue(self.settings['preprocessing']['enhancement_strength']) + enhance_layout.addWidget(self.enhancement_strength) + self.preprocessing_controls.append(self.enhancement_strength) + enhance_layout.addStretch() + + # Noise reduction section + noise_group = QGroupBox("Noise Reduction") + main_layout.addWidget(noise_group) + noise_layout = QVBoxLayout(noise_group) + noise_layout.setContentsMargins(8, 8, 8, 6) + noise_layout.setSpacing(4) + self.preprocessing_controls.append(noise_group) + + # Noise threshold + noise_threshold_frame = QWidget() + noise_threshold_layout = QHBoxLayout(noise_threshold_frame) + noise_threshold_layout.setContentsMargins(0, 0, 0, 0) + noise_layout.addWidget(noise_threshold_frame) + + noise_label = QLabel("Noise Threshold:") + noise_label.setMinimumWidth(150) + noise_threshold_layout.addWidget(noise_label) + self.preprocessing_controls.append(noise_label) + + self.noise_threshold = QSpinBox() + self.noise_threshold.setRange(0, 50) + self.noise_threshold.setValue(self.settings['preprocessing']['noise_threshold']) + noise_threshold_layout.addWidget(self.noise_threshold) + self.preprocessing_controls.append(self.noise_threshold) + noise_threshold_layout.addStretch() + + # Denoise strength + denoise_frame = QWidget() + denoise_layout = QHBoxLayout(denoise_frame) + denoise_layout.setContentsMargins(0, 0, 0, 0) + noise_layout.addWidget(denoise_frame) + + denoise_label = QLabel("Denoise Strength:") + denoise_label.setMinimumWidth(150) + denoise_layout.addWidget(denoise_label) + self.preprocessing_controls.append(denoise_label) + + self.denoise_strength = QSpinBox() + self.denoise_strength.setRange(0, 30) + self.denoise_strength.setValue(self.settings['preprocessing']['denoise_strength']) + denoise_layout.addWidget(self.denoise_strength) + self.preprocessing_controls.append(self.denoise_strength) + denoise_layout.addStretch() + + # Size limits section + size_group = QGroupBox("Image Size Limits") + main_layout.addWidget(size_group) + size_layout = QVBoxLayout(size_group) + size_layout.setContentsMargins(8, 8, 8, 6) + size_layout.setSpacing(4) + self.preprocessing_controls.append(size_group) + + # Max dimension + dimension_frame = QWidget() + dimension_layout = QHBoxLayout(dimension_frame) + dimension_layout.setContentsMargins(0, 0, 0, 0) + size_layout.addWidget(dimension_frame) + + dimension_label = QLabel("Max Dimension:") + dimension_label.setMinimumWidth(150) + dimension_layout.addWidget(dimension_label) + self.preprocessing_controls.append(dimension_label) + + self.dimension_spinbox = QSpinBox() + self.dimension_spinbox.setRange(500, 4000) + self.dimension_spinbox.setSingleStep(100) + self.dimension_spinbox.setValue(self.settings['preprocessing']['max_image_dimension']) + dimension_layout.addWidget(self.dimension_spinbox) + self.preprocessing_controls.append(self.dimension_spinbox) + + dimension_layout.addWidget(QLabel("pixels")) + dimension_layout.addStretch() + + # Max pixels + pixels_frame = QWidget() + pixels_layout = QHBoxLayout(pixels_frame) + pixels_layout.setContentsMargins(0, 0, 0, 0) + size_layout.addWidget(pixels_frame) + + pixels_label = QLabel("Max Total Pixels:") + pixels_label.setMinimumWidth(150) + pixels_layout.addWidget(pixels_label) + self.preprocessing_controls.append(pixels_label) + + self.pixels_spinbox = QSpinBox() + self.pixels_spinbox.setRange(1000000, 10000000) + self.pixels_spinbox.setSingleStep(100000) + self.pixels_spinbox.setValue(self.settings['preprocessing']['max_image_pixels']) + pixels_layout.addWidget(self.pixels_spinbox) + self.preprocessing_controls.append(self.pixels_spinbox) + + pixels_layout.addWidget(QLabel("pixels")) + pixels_layout.addStretch() + + # Compression section + compression_group = QGroupBox("Image Compression (applies to OCR uploads)") + main_layout.addWidget(compression_group) + compression_layout = QVBoxLayout(compression_group) + compression_layout.setContentsMargins(8, 8, 8, 6) + compression_layout.setSpacing(4) + # Do NOT add compression controls to preprocessing_controls; keep independent of preprocessing toggle + + # Enable compression toggle + self.compression_enabled = self._create_styled_checkbox("Enable compression for OCR uploads") + self.compression_enabled.setChecked(self.settings.get('compression', {}).get('enabled', False)) + self.compression_enabled.toggled.connect(self._toggle_compression_enabled) + compression_layout.addWidget(self.compression_enabled) + + # Format selection + format_frame = QWidget() + format_layout = QHBoxLayout(format_frame) + format_layout.setContentsMargins(0, 0, 0, 0) + compression_layout.addWidget(format_frame) + + self.format_label = QLabel("Format:") + self.format_label.setMinimumWidth(150) + format_layout.addWidget(self.format_label) + + self.compression_format_combo = QComboBox() + self.compression_format_combo.addItems(['jpeg', 'png', 'webp']) + self.compression_format_combo.setCurrentText(self.settings.get('compression', {}).get('format', 'jpeg')) + self.compression_format_combo.currentTextChanged.connect(self._toggle_compression_format) + format_layout.addWidget(self.compression_format_combo) + format_layout.addStretch() + + # JPEG quality + self.jpeg_frame = QWidget() + jpeg_layout = QHBoxLayout(self.jpeg_frame) + jpeg_layout.setContentsMargins(0, 0, 0, 0) + compression_layout.addWidget(self.jpeg_frame) + + self.jpeg_label = QLabel("JPEG Quality:") + self.jpeg_label.setMinimumWidth(150) + jpeg_layout.addWidget(self.jpeg_label) + + self.jpeg_quality_spin = QSpinBox() + self.jpeg_quality_spin.setRange(1, 95) + self.jpeg_quality_spin.setValue(self.settings.get('compression', {}).get('jpeg_quality', 85)) + jpeg_layout.addWidget(self.jpeg_quality_spin) + + self.jpeg_help = QLabel("(higher = better quality, larger size)") + self.jpeg_help.setStyleSheet("color: gray; font-size: 9pt;") + jpeg_layout.addWidget(self.jpeg_help) + jpeg_layout.addStretch() + + # PNG compression level + self.png_frame = QWidget() + png_layout = QHBoxLayout(self.png_frame) + png_layout.setContentsMargins(0, 0, 0, 0) + compression_layout.addWidget(self.png_frame) + + self.png_label = QLabel("PNG Compression:") + self.png_label.setMinimumWidth(150) + png_layout.addWidget(self.png_label) + + self.png_level_spin = QSpinBox() + self.png_level_spin.setRange(0, 9) + self.png_level_spin.setValue(self.settings.get('compression', {}).get('png_compress_level', 6)) + png_layout.addWidget(self.png_level_spin) + + self.png_help = QLabel("(0 = fastest, 9 = smallest)") + self.png_help.setStyleSheet("color: gray; font-size: 9pt;") + png_layout.addWidget(self.png_help) + png_layout.addStretch() + + # WEBP quality + self.webp_frame = QWidget() + webp_layout = QHBoxLayout(self.webp_frame) + webp_layout.setContentsMargins(0, 0, 0, 0) + compression_layout.addWidget(self.webp_frame) + + self.webp_label = QLabel("WEBP Quality:") + self.webp_label.setMinimumWidth(150) + webp_layout.addWidget(self.webp_label) + + self.webp_quality_spin = QSpinBox() + self.webp_quality_spin.setRange(1, 100) + self.webp_quality_spin.setValue(self.settings.get('compression', {}).get('webp_quality', 85)) + webp_layout.addWidget(self.webp_quality_spin) + + self.webp_help = QLabel("(higher = better quality, larger size)") + self.webp_help.setStyleSheet("color: gray; font-size: 9pt;") + webp_layout.addWidget(self.webp_help) + webp_layout.addStretch() + + # Initialize format-specific visibility and enabled state + self._toggle_compression_format() + self._toggle_compression_enabled() + + # HD Strategy (Inpainting acceleration) - Independent of preprocessing toggle + hd_group = QGroupBox("Inpainting HD Strategy") + main_layout.addWidget(hd_group) + hd_layout = QVBoxLayout(hd_group) + hd_layout.setContentsMargins(8, 8, 8, 6) + hd_layout.setSpacing(4) + # Do NOT add to preprocessing_controls - HD Strategy should be independent + + # Chunk settings for large images - Independent of preprocessing toggle + chunk_group = QGroupBox("Large Image Processing") + main_layout.addWidget(chunk_group) + chunk_layout = QVBoxLayout(chunk_group) + chunk_layout.setContentsMargins(8, 8, 8, 6) + chunk_layout.setSpacing(4) + # Do NOT add to preprocessing_controls - Large Image Processing should be independent + + # Strategy selector + strat_frame = QWidget() + strat_layout = QHBoxLayout(strat_frame) + strat_layout.setContentsMargins(0, 0, 0, 0) + hd_layout.addWidget(strat_frame) + + strat_label = QLabel("Strategy:") + strat_label.setMinimumWidth(150) + strat_layout.addWidget(strat_label) + + self.hd_strategy_combo = QComboBox() + self.hd_strategy_combo.addItems(['original', 'resize', 'crop']) + self.hd_strategy_combo.setCurrentText(self.settings.get('advanced', {}).get('hd_strategy', 'resize')) + self.hd_strategy_combo.currentTextChanged.connect(self._on_hd_strategy_change) + strat_layout.addWidget(self.hd_strategy_combo) + + strat_help = QLabel("(original = legacy full-image; resize/crop = faster)") + strat_help.setStyleSheet("color: gray; font-size: 9pt;") + strat_layout.addWidget(strat_help) + strat_layout.addStretch() + + # Resize limit row + self.hd_resize_frame = QWidget() + resize_layout = QHBoxLayout(self.hd_resize_frame) + resize_layout.setContentsMargins(0, 0, 0, 0) + hd_layout.addWidget(self.hd_resize_frame) + + resize_label = QLabel("Resize limit (long edge):") + resize_label.setMinimumWidth(150) + resize_layout.addWidget(resize_label) + + self.hd_resize_limit_spin = QSpinBox() + self.hd_resize_limit_spin.setRange(512, 4096) + self.hd_resize_limit_spin.setSingleStep(64) + self.hd_resize_limit_spin.setValue(int(self.settings.get('advanced', {}).get('hd_strategy_resize_limit', 1536))) + resize_layout.addWidget(self.hd_resize_limit_spin) + + resize_layout.addWidget(QLabel("px")) + resize_layout.addStretch() + + # Crop params rows + self.hd_crop_margin_frame = QWidget() + margin_layout = QHBoxLayout(self.hd_crop_margin_frame) + margin_layout.setContentsMargins(0, 0, 0, 0) + hd_layout.addWidget(self.hd_crop_margin_frame) + + margin_label = QLabel("Crop margin:") + margin_label.setMinimumWidth(150) + margin_layout.addWidget(margin_label) + + self.hd_crop_margin_spin = QSpinBox() + self.hd_crop_margin_spin.setRange(0, 256) + self.hd_crop_margin_spin.setSingleStep(2) + self.hd_crop_margin_spin.setValue(int(self.settings.get('advanced', {}).get('hd_strategy_crop_margin', 16))) + margin_layout.addWidget(self.hd_crop_margin_spin) + + margin_layout.addWidget(QLabel("px")) + margin_layout.addStretch() + + self.hd_crop_trigger_frame = QWidget() + trigger_layout = QHBoxLayout(self.hd_crop_trigger_frame) + trigger_layout.setContentsMargins(0, 0, 0, 0) + hd_layout.addWidget(self.hd_crop_trigger_frame) + + trigger_label = QLabel("Crop trigger size:") + trigger_label.setMinimumWidth(150) + trigger_layout.addWidget(trigger_label) + + self.hd_crop_trigger_spin = QSpinBox() + self.hd_crop_trigger_spin.setRange(256, 4096) + self.hd_crop_trigger_spin.setSingleStep(64) + self.hd_crop_trigger_spin.setValue(int(self.settings.get('advanced', {}).get('hd_strategy_crop_trigger_size', 1024))) + trigger_layout.addWidget(self.hd_crop_trigger_spin) + + trigger_help = QLabel("px (apply crop only if long edge > trigger)") + trigger_layout.addWidget(trigger_help) + trigger_layout.addStretch() + + # Initialize strategy-specific visibility + self._on_hd_strategy_change() + + # Clarifying note about precedence with tiling + note_label = QLabel( + "Note: HD Strategy (resize/crop) takes precedence over Inpainting Tiling when it triggers.\n" + "Set strategy to 'original' if you want tiling to control large-image behavior." + ) + note_label.setStyleSheet("color: gray; font-size: 9pt;") + note_label.setWordWrap(True) + hd_layout.addWidget(note_label) + + # Chunk height + chunk_height_frame = QWidget() + chunk_height_layout = QHBoxLayout(chunk_height_frame) + chunk_height_layout.setContentsMargins(0, 0, 0, 0) + chunk_layout.addWidget(chunk_height_frame) + + chunk_height_label = QLabel("Chunk Height:") + chunk_height_label.setMinimumWidth(150) + chunk_height_layout.addWidget(chunk_height_label) + # Do NOT add to preprocessing_controls - chunk settings should be independent + + self.chunk_height_spinbox = QSpinBox() + self.chunk_height_spinbox.setRange(500, 2000) + self.chunk_height_spinbox.setSingleStep(100) + self.chunk_height_spinbox.setValue(self.settings['preprocessing']['chunk_height']) + chunk_height_layout.addWidget(self.chunk_height_spinbox) + # Do NOT add to preprocessing_controls - chunk settings should be independent + + chunk_height_layout.addWidget(QLabel("pixels")) + chunk_height_layout.addStretch() + + # Chunk overlap + chunk_overlap_frame = QWidget() + chunk_overlap_layout = QHBoxLayout(chunk_overlap_frame) + chunk_overlap_layout.setContentsMargins(0, 0, 0, 0) + chunk_layout.addWidget(chunk_overlap_frame) + + chunk_overlap_label = QLabel("Chunk Overlap:") + chunk_overlap_label.setMinimumWidth(150) + chunk_overlap_layout.addWidget(chunk_overlap_label) + # Do NOT add to preprocessing_controls - chunk settings should be independent + + self.chunk_overlap_spinbox = QSpinBox() + self.chunk_overlap_spinbox.setRange(0, 200) + self.chunk_overlap_spinbox.setSingleStep(10) + self.chunk_overlap_spinbox.setValue(self.settings['preprocessing']['chunk_overlap']) + chunk_overlap_layout.addWidget(self.chunk_overlap_spinbox) + # Do NOT add to preprocessing_controls - chunk settings should be independent + + chunk_overlap_layout.addWidget(QLabel("pixels")) + chunk_overlap_layout.addStretch() + + # Inpainting Tiling section + tiling_group = QGroupBox("Inpainting Tiling") + main_layout.addWidget(tiling_group) + tiling_layout = QVBoxLayout(tiling_group) + tiling_layout.setContentsMargins(8, 8, 8, 6) + tiling_layout.setSpacing(4) + # Do NOT add to preprocessing_controls - tiling should be independent + + # Enable tiling + # Prefer values from legacy 'tiling' section if present, otherwise use 'preprocessing' + tiling_enabled_value = self.settings['preprocessing'].get('inpaint_tiling_enabled', False) + if 'tiling' in self.settings and isinstance(self.settings['tiling'], dict) and 'enabled' in self.settings['tiling']: + tiling_enabled_value = self.settings['tiling']['enabled'] + + self.inpaint_tiling_enabled = self._create_styled_checkbox("Enable automatic tiling for inpainting (processes large images in tiles)") + self.inpaint_tiling_enabled.setChecked(tiling_enabled_value) + self.inpaint_tiling_enabled.toggled.connect(self._toggle_tiling_controls) + tiling_layout.addWidget(self.inpaint_tiling_enabled) + + # Tile size + tile_size_frame = QWidget() + tile_size_layout = QHBoxLayout(tile_size_frame) + tile_size_layout.setContentsMargins(0, 0, 0, 0) + tiling_layout.addWidget(tile_size_frame) + + self.tile_size_label = QLabel("Tile Size:") + self.tile_size_label.setMinimumWidth(150) + tile_size_layout.addWidget(self.tile_size_label) + + tile_size_value = self.settings['preprocessing'].get('inpaint_tile_size', 512) + if 'tiling' in self.settings and isinstance(self.settings['tiling'], dict) and 'tile_size' in self.settings['tiling']: + tile_size_value = self.settings['tiling']['tile_size'] + + self.tile_size_spinbox = QSpinBox() + self.tile_size_spinbox.setRange(256, 2048) + self.tile_size_spinbox.setSingleStep(128) + self.tile_size_spinbox.setValue(tile_size_value) + tile_size_layout.addWidget(self.tile_size_spinbox) + + self.tile_size_unit_label = QLabel("pixels") + tile_size_layout.addWidget(self.tile_size_unit_label) + tile_size_layout.addStretch() + + # Tile overlap + tile_overlap_frame = QWidget() + tile_overlap_layout = QHBoxLayout(tile_overlap_frame) + tile_overlap_layout.setContentsMargins(0, 0, 0, 0) + tiling_layout.addWidget(tile_overlap_frame) + + self.tile_overlap_label = QLabel("Tile Overlap:") + self.tile_overlap_label.setMinimumWidth(150) + tile_overlap_layout.addWidget(self.tile_overlap_label) + + tile_overlap_value = self.settings['preprocessing'].get('inpaint_tile_overlap', 64) + if 'tiling' in self.settings and isinstance(self.settings['tiling'], dict) and 'tile_overlap' in self.settings['tiling']: + tile_overlap_value = self.settings['tiling']['tile_overlap'] + + self.tile_overlap_spinbox = QSpinBox() + self.tile_overlap_spinbox.setRange(0, 256) + self.tile_overlap_spinbox.setSingleStep(16) + self.tile_overlap_spinbox.setValue(tile_overlap_value) + tile_overlap_layout.addWidget(self.tile_overlap_spinbox) + + self.tile_overlap_unit_label = QLabel("pixels") + tile_overlap_layout.addWidget(self.tile_overlap_unit_label) + tile_overlap_layout.addStretch() + + # Don't initialize here - will be done after dialog is shown + + # ELIMINATE ALL EMPTY SPACE - Add stretch at the end + main_layout.addStretch() + + def _create_inpainting_tab(self): + """Create inpainting settings tab with comprehensive per-text-type dilation controls""" + # Create tab widget and add to tab widget + tab_widget = QWidget() + self.tab_widget.addTab(tab_widget, "Inpainting") + + # Main scrollable content + main_layout = QVBoxLayout(tab_widget) + main_layout.setContentsMargins(5, 5, 5, 5) + main_layout.setSpacing(6) + + # General Mask Settings (applies to all inpainting methods) + mask_group = QGroupBox("Mask Settings") + main_layout.addWidget(mask_group) + mask_layout = QVBoxLayout(mask_group) + mask_layout.setContentsMargins(8, 8, 8, 6) + mask_layout.setSpacing(4) + + # Auto toggle (affects both mask dilation AND iterations) + self.auto_iterations_checkbox = self._create_styled_checkbox("Auto Iterations (automatically set values based on OCR provider and B&W vs Color)") + self.auto_iterations_checkbox.setChecked(self.settings.get('auto_iterations', True)) + self.auto_iterations_checkbox.toggled.connect(self._toggle_iteration_controls) + self.auto_iterations_checkbox.toggled.connect(self._on_primary_auto_toggle) # Sync with "Use Same For All" + mask_layout.addWidget(self.auto_iterations_checkbox) + + # Mask Dilation frame (affected by auto setting) + mask_dilation_group = QGroupBox("Mask Dilation") + mask_layout.addWidget(mask_dilation_group) + mask_dilation_layout = QVBoxLayout(mask_dilation_group) + mask_dilation_layout.setContentsMargins(8, 8, 8, 6) + mask_dilation_layout.setSpacing(4) + + # Note about dilation importance + note_label = QLabel( + "Mask dilation is critical for avoiding white spots in final images.\n" + "Adjust per text type for optimal results." + ) + note_label.setStyleSheet("color: gray; font-style: italic;") + note_label.setWordWrap(True) + mask_dilation_layout.addWidget(note_label) + + # Keep all three dilation controls in a list for easy access + if not hasattr(self, 'mask_dilation_controls'): + self.mask_dilation_controls = [] + + # Mask dilation size + dilation_frame = QWidget() + dilation_layout = QHBoxLayout(dilation_frame) + dilation_layout.setContentsMargins(0, 0, 0, 0) + mask_dilation_layout.addWidget(dilation_frame) + + self.dilation_label = QLabel("Mask Dilation:") + self.dilation_label.setMinimumWidth(150) + dilation_layout.addWidget(self.dilation_label) + + self.mask_dilation_spinbox = QSpinBox() + self.mask_dilation_spinbox.setRange(0, 50) + self.mask_dilation_spinbox.setSingleStep(5) + self.mask_dilation_spinbox.setValue(self.settings.get('mask_dilation', 15)) + dilation_layout.addWidget(self.mask_dilation_spinbox) + + self.dilation_unit_label = QLabel("pixels (expand mask beyond text)") + dilation_layout.addWidget(self.dilation_unit_label) + dilation_layout.addStretch() + + # Per-Text-Type Iterations - EXPANDED SECTION + iterations_group = QGroupBox("Dilation Iterations Control") + iterations_layout = QVBoxLayout(iterations_group) + mask_dilation_layout.addWidget(iterations_group) + + # All Iterations Master Control (NEW) + all_iter_widget = QWidget() + all_iter_layout = QHBoxLayout(all_iter_widget) + all_iter_layout.setContentsMargins(0, 0, 0, 0) + iterations_layout.addWidget(all_iter_widget) + + # Checkbox to enable/disable uniform iterations + self.use_all_iterations_checkbox = self._create_styled_checkbox("Use Same For All:") + self.use_all_iterations_checkbox.setChecked(self.settings.get('use_all_iterations', True)) + self.use_all_iterations_checkbox.toggled.connect(self._toggle_iteration_controls) + all_iter_layout.addWidget(self.use_all_iterations_checkbox) + + all_iter_layout.addSpacing(10) + + self.all_iterations_spinbox = QSpinBox() + self.all_iterations_spinbox.setRange(0, 5) + self.all_iterations_spinbox.setValue(self.settings.get('all_iterations', 2)) + self.all_iterations_spinbox.setEnabled(self.use_all_iterations_checkbox.isChecked()) + all_iter_layout.addWidget(self.all_iterations_spinbox) + + self.all_iter_label = QLabel("iterations (applies to all text types)") + all_iter_layout.addWidget(self.all_iter_label) + all_iter_layout.addStretch() + + # Separator + separator1 = QFrame() + separator1.setFrameShape(QFrame.Shape.HLine) + separator1.setFrameShadow(QFrame.Shadow.Sunken) + iterations_layout.addWidget(separator1) + + # Individual Controls Label + self.individual_controls_header_label = QLabel("Individual Text Type Controls:") + individual_label_font = QFont('Arial', 9) + individual_label_font.setBold(True) + self.individual_controls_header_label.setFont(individual_label_font) + iterations_layout.addWidget(self.individual_controls_header_label) + + # Text Bubble iterations (modified from original bubble iterations) + text_bubble_iter_widget = QWidget() + text_bubble_iter_layout = QHBoxLayout(text_bubble_iter_widget) + text_bubble_iter_layout.setContentsMargins(0, 0, 0, 0) + iterations_layout.addWidget(text_bubble_iter_widget) + + self.text_bubble_label = QLabel("Text Bubbles:") + self.text_bubble_label.setMinimumWidth(120) + text_bubble_iter_layout.addWidget(self.text_bubble_label) + + self.text_bubble_iter_spinbox = QSpinBox() + self.text_bubble_iter_spinbox.setRange(0, 5) + self.text_bubble_iter_spinbox.setValue(self.settings.get('text_bubble_dilation_iterations', + self.settings.get('bubble_dilation_iterations', 2))) + text_bubble_iter_layout.addWidget(self.text_bubble_iter_spinbox) + + self.text_bubble_desc = QLabel("iterations (speech/dialogue bubbles)") + text_bubble_iter_layout.addWidget(self.text_bubble_desc) + text_bubble_iter_layout.addStretch() + + # Empty Bubble iterations (NEW) + empty_bubble_iter_widget = QWidget() + empty_bubble_iter_layout = QHBoxLayout(empty_bubble_iter_widget) + empty_bubble_iter_layout.setContentsMargins(0, 0, 0, 0) + iterations_layout.addWidget(empty_bubble_iter_widget) + + self.empty_bubble_label = QLabel("Empty Bubbles:") + self.empty_bubble_label.setMinimumWidth(120) + empty_bubble_iter_layout.addWidget(self.empty_bubble_label) + + self.empty_bubble_iter_spinbox = QSpinBox() + self.empty_bubble_iter_spinbox.setRange(0, 5) + self.empty_bubble_iter_spinbox.setValue(self.settings.get('empty_bubble_dilation_iterations', 3)) + empty_bubble_iter_layout.addWidget(self.empty_bubble_iter_spinbox) + + self.empty_bubble_desc = QLabel("iterations (empty speech bubbles)") + empty_bubble_iter_layout.addWidget(self.empty_bubble_desc) + empty_bubble_iter_layout.addStretch() + + # Free text iterations + free_text_iter_widget = QWidget() + free_text_iter_layout = QHBoxLayout(free_text_iter_widget) + free_text_iter_layout.setContentsMargins(0, 0, 0, 0) + iterations_layout.addWidget(free_text_iter_widget) + + self.free_text_label = QLabel("Free Text:") + self.free_text_label.setMinimumWidth(120) + free_text_iter_layout.addWidget(self.free_text_label) + + self.free_text_iter_spinbox = QSpinBox() + self.free_text_iter_spinbox.setRange(0, 5) + self.free_text_iter_spinbox.setValue(self.settings.get('free_text_dilation_iterations', 0)) + free_text_iter_layout.addWidget(self.free_text_iter_spinbox) + + self.free_text_desc = QLabel("iterations (0 = perfect for B&W panels)") + free_text_iter_layout.addWidget(self.free_text_desc) + free_text_iter_layout.addStretch() + + # Store individual control widgets for enable/disable (includes descriptive labels) + self.individual_iteration_controls = [ + (self.text_bubble_label, self.text_bubble_iter_spinbox, self.text_bubble_desc), + (self.empty_bubble_label, self.empty_bubble_iter_spinbox, self.empty_bubble_desc), + (self.free_text_label, self.free_text_iter_spinbox, self.free_text_desc) + ] + + # Apply initial state + self._toggle_iteration_controls() + + # Quick presets - UPDATED VERSION + preset_widget = QWidget() + preset_layout = QHBoxLayout(preset_widget) + preset_layout.setContentsMargins(0, 0, 0, 0) + mask_dilation_layout.addWidget(preset_widget) + + preset_label = QLabel("Quick Presets:") + preset_layout.addWidget(preset_label) + preset_layout.addSpacing(10) + + bw_manga_btn = QPushButton("B&W Manga") + bw_manga_btn.setStyleSheet(""" + QPushButton { + background-color: #3a7ca5; + color: white; + border: none; + padding: 6px 12px; + border-radius: 4px; + font-weight: bold; + } + QPushButton:hover { + background-color: #4a8cb5; + } + QPushButton:pressed { + background-color: #2a6c95; + } + """) + bw_manga_btn.clicked.connect(lambda: self._set_mask_preset(15, False, 2, 2, 3, 0)) + preset_layout.addWidget(bw_manga_btn) + + colored_btn = QPushButton("Colored") + colored_btn.setStyleSheet(""" + QPushButton { + background-color: #3a7ca5; + color: white; + border: none; + padding: 6px 12px; + border-radius: 4px; + font-weight: bold; + } + QPushButton:hover { + background-color: #4a8cb5; + } + QPushButton:pressed { + background-color: #2a6c95; + } + """) + colored_btn.clicked.connect(lambda: self._set_mask_preset(15, False, 2, 2, 3, 3)) + preset_layout.addWidget(colored_btn) + + uniform_btn = QPushButton("Uniform") + uniform_btn.setStyleSheet(""" + QPushButton { + background-color: #3a7ca5; + color: white; + border: none; + padding: 6px 12px; + border-radius: 4px; + font-weight: bold; + } + QPushButton:hover { + background-color: #4a8cb5; + } + QPushButton:pressed { + background-color: #2a6c95; + } + """) + uniform_btn.clicked.connect(lambda: self._set_mask_preset(0, True, 2, 2, 2, 0)) + preset_layout.addWidget(uniform_btn) + + preset_layout.addStretch() + + # Help text - UPDATED + help_text = QLabel( + "💡 B&W Manga: Optimized for black & white panels with clean bubbles\n" + "💡 Colored: For colored manga with complex backgrounds\n" + "💡 Aggressive: For difficult text removal cases\n" + "💡 Uniform: Good for Manga-OCR\n" + "ℹ️ Empty bubbles often need more iterations than text bubbles\n" + "ℹ️ Set Free Text to 0 for crisp B&W panels without bleeding" + ) + help_text_font = QFont('Arial', 9) + help_text.setFont(help_text_font) + help_text.setStyleSheet("color: gray;") + help_text.setWordWrap(True) + mask_dilation_layout.addWidget(help_text) + + main_layout.addStretch() + + def _toggle_iteration_controls(self): + """Enable/disable iteration controls based on Auto and 'Use Same For All' toggles""" + # Get auto checkbox state + auto_on = False + if hasattr(self, 'auto_iterations_checkbox'): + auto_on = self.auto_iterations_checkbox.isChecked() + + # Get use_all checkbox state + use_all = False + if hasattr(self, 'use_all_iterations_checkbox'): + use_all = self.use_all_iterations_checkbox.isChecked() + + # Also update the auto_iterations_enabled attribute + self.auto_iterations_enabled = auto_on + + if auto_on: + # Disable ALL mask dilation and iteration controls when auto is on + # Mask dilation controls + try: + if hasattr(self, 'mask_dilation_spinbox'): + self.mask_dilation_spinbox.setEnabled(False) + except Exception: + pass + try: + if hasattr(self, 'dilation_label'): + self.dilation_label.setEnabled(False) + except Exception: + pass + try: + if hasattr(self, 'dilation_unit_label'): + self.dilation_unit_label.setEnabled(False) + except Exception: + pass + # Iteration controls + try: + self.all_iterations_spinbox.setEnabled(False) + except Exception: + pass + try: + if hasattr(self, 'all_iter_label'): + self.all_iter_label.setEnabled(False) + except Exception: + pass + try: + if hasattr(self, 'use_all_iterations_checkbox'): + self.use_all_iterations_checkbox.setEnabled(False) + except Exception: + pass + try: + if hasattr(self, 'individual_controls_header_label'): + self.individual_controls_header_label.setEnabled(False) + except Exception: + pass + # Disable individual controls and their description labels + for control_tuple in getattr(self, 'individual_iteration_controls', []): + try: + if len(control_tuple) == 3: + label, spinbox, desc_label = control_tuple + spinbox.setEnabled(False) + label.setEnabled(False) + desc_label.setEnabled(False) + elif len(control_tuple) == 2: + label, spinbox = control_tuple + spinbox.setEnabled(False) + label.setEnabled(False) + except Exception: + pass + return + + # Auto off -> enable mask dilation (always) and "Use Same For All" (respect its state) + # Mask dilation is always enabled when auto is off + try: + if hasattr(self, 'mask_dilation_spinbox'): + self.mask_dilation_spinbox.setEnabled(True) + except Exception: + pass + try: + if hasattr(self, 'dilation_label'): + self.dilation_label.setEnabled(True) + except Exception: + pass + try: + if hasattr(self, 'dilation_unit_label'): + self.dilation_unit_label.setEnabled(True) + except Exception: + pass + + try: + if hasattr(self, 'use_all_iterations_checkbox'): + self.use_all_iterations_checkbox.setEnabled(True) + except Exception: + pass + + try: + self.all_iterations_spinbox.setEnabled(use_all) + except Exception: + pass + try: + if hasattr(self, 'all_iter_label'): + self.all_iter_label.setEnabled(use_all) + except Exception: + pass + try: + if hasattr(self, 'individual_controls_header_label'): + self.individual_controls_header_label.setEnabled(not use_all) + except Exception: + pass + + # Individual controls respect the "Use Same For All" state + for control_tuple in getattr(self, 'individual_iteration_controls', []): + enabled = not use_all + try: + if len(control_tuple) == 3: + label, spinbox, desc_label = control_tuple + spinbox.setEnabled(enabled) + label.setEnabled(enabled) + desc_label.setEnabled(enabled) + elif len(control_tuple) == 2: + label, spinbox = control_tuple + spinbox.setEnabled(enabled) + label.setEnabled(enabled) + except Exception: + pass + + def _on_primary_auto_toggle(self, checked): + """When primary Auto toggle changes, disable/enable 'Use Same For All' checkbox""" + if hasattr(self, 'use_all_iterations_checkbox'): + self.use_all_iterations_checkbox.setEnabled(not checked) + + def _set_mask_preset(self, dilation, use_all, all_iter, text_bubble_iter, empty_bubble_iter, free_text_iter): + """Set mask dilation preset values with comprehensive iteration controls""" + self.mask_dilation_spinbox.setValue(dilation) + self.use_all_iterations_checkbox.setChecked(use_all) + self.all_iterations_spinbox.setValue(all_iter) + self.text_bubble_iter_spinbox.setValue(text_bubble_iter) + self.empty_bubble_iter_spinbox.setValue(empty_bubble_iter) + self.free_text_iter_spinbox.setValue(free_text_iter) + self._toggle_iteration_controls() + + def _create_cloud_api_tab(self): + """Create cloud API settings tab""" + # Create tab widget and add to tab widget + tab_widget = QWidget() + self.tab_widget.addTab(tab_widget, "Cloud API") + + # Create scroll area for content + scroll_area = QScrollArea() + scroll_area.setWidgetResizable(True) + scroll_area.setFrameShape(QFrame.Shape.NoFrame) + + content_widget = QWidget() + content_layout = QVBoxLayout(content_widget) + content_layout.setSpacing(10) + content_layout.setContentsMargins(20, 20, 20, 20) + + scroll_area.setWidget(content_widget) + + # Add scroll area to parent layout + parent_layout = QVBoxLayout(tab_widget) + parent_layout.setContentsMargins(0, 0, 0, 0) + parent_layout.addWidget(scroll_area) + + # API Model Selection + model_group = QGroupBox("Inpainting Model") + model_layout = QVBoxLayout(model_group) + content_layout.addWidget(model_group) + + model_desc = QLabel("Select the Replicate model to use for inpainting:") + model_layout.addWidget(model_desc) + model_layout.addSpacing(10) + + # Model options - use button group for radio buttons + self.cloud_model_button_group = QButtonGroup() + self.cloud_model_selected = self.settings.get('cloud_inpaint_model', 'ideogram-v2') + + models = [ + ('ideogram-v2', 'Ideogram V2 (Best quality, with prompts)', 'ideogram-ai/ideogram-v2'), + ('sd-inpainting', 'Stable Diffusion Inpainting (Classic, fast)', 'stability-ai/stable-diffusion-inpainting'), + ('flux-inpainting', 'FLUX Dev Inpainting (High quality)', 'zsxkib/flux-dev-inpainting'), + ('custom', 'Custom Model (Enter model identifier)', '') + ] + + for value, text, model_id in models: + row_widget = QWidget() + row_layout = QHBoxLayout(row_widget) + row_layout.setContentsMargins(0, 0, 0, 0) + model_layout.addWidget(row_widget) + + rb = QRadioButton(text) + rb.setChecked(value == self.cloud_model_selected) + rb.toggled.connect(lambda checked, v=value: self._on_cloud_model_change(v) if checked else None) + self.cloud_model_button_group.addButton(rb) + row_layout.addWidget(rb) + + if model_id: + model_id_label = QLabel(f"({model_id})") + model_id_font = QFont('Arial', 8) + model_id_label.setFont(model_id_font) + model_id_label.setStyleSheet("color: gray;") + row_layout.addWidget(model_id_label) + + row_layout.addStretch() + + # Custom version ID (now model identifier) + self.custom_version_widget = QWidget() + custom_version_layout = QVBoxLayout(self.custom_version_widget) + custom_version_layout.setContentsMargins(0, 10, 0, 0) + model_layout.addWidget(self.custom_version_widget) + + custom_id_row = QWidget() + custom_id_layout = QHBoxLayout(custom_id_row) + custom_id_layout.setContentsMargins(0, 0, 0, 0) + custom_version_layout.addWidget(custom_id_row) + + custom_id_label = QLabel("Model ID:") + custom_id_label.setMinimumWidth(120) + custom_id_layout.addWidget(custom_id_label) + + self.custom_version_entry = QLineEdit() + self.custom_version_entry.setText(self.settings.get('cloud_custom_version', '')) + custom_id_layout.addWidget(self.custom_version_entry) + + # Add helper text for custom model + helper_text = QLabel("Format: owner/model-name (e.g. stability-ai/stable-diffusion-inpainting)") + helper_font = QFont('Arial', 8) + helper_text.setFont(helper_font) + helper_text.setStyleSheet("color: gray;") + helper_text.setContentsMargins(120, 0, 0, 0) + custom_version_layout.addWidget(helper_text) + + # Initially hide custom version entry + if self.cloud_model_selected != 'custom': + self.custom_version_widget.setVisible(False) + + # Performance Settings + perf_group = QGroupBox("Performance Settings") + perf_layout = QVBoxLayout(perf_group) + content_layout.addWidget(perf_group) + + # Timeout + timeout_widget = QWidget() + timeout_layout = QHBoxLayout(timeout_widget) + timeout_layout.setContentsMargins(0, 0, 0, 0) + perf_layout.addWidget(timeout_widget) + + timeout_label = QLabel("API Timeout:") + timeout_label.setMinimumWidth(120) + timeout_layout.addWidget(timeout_label) + + self.cloud_timeout_spinbox = QSpinBox() + self.cloud_timeout_spinbox.setRange(30, 300) + self.cloud_timeout_spinbox.setValue(self.settings.get('cloud_timeout', 60)) + timeout_layout.addWidget(self.cloud_timeout_spinbox) + + timeout_unit = QLabel("seconds") + timeout_unit_font = QFont('Arial', 9) + timeout_unit.setFont(timeout_unit_font) + timeout_layout.addWidget(timeout_unit) + timeout_layout.addStretch() + + # Help text + help_text = QLabel( + "💡 Tips:\n" + "• Ideogram V2 is currently the best quality option\n" + "• SD inpainting is fast and supports prompts\n" + "• FLUX inpainting offers high quality results\n" + "• Find more models at replicate.com/collections/inpainting" + ) + help_font = QFont('Arial', 9) + help_text.setFont(help_font) + help_text.setStyleSheet("color: gray;") + help_text.setWordWrap(True) + content_layout.addWidget(help_text) + + # Prompt Settings (for all models except custom) + self.prompt_group = QGroupBox("Prompt Settings") + prompt_layout = QVBoxLayout(self.prompt_group) + content_layout.addWidget(self.prompt_group) + + # Positive prompt + prompt_label = QLabel("Inpainting Prompt:") + prompt_layout.addWidget(prompt_label) + + self.cloud_prompt_entry = QLineEdit() + self.cloud_prompt_entry.setText(self.settings.get('cloud_inpaint_prompt', 'clean background, smooth surface')) + prompt_layout.addWidget(self.cloud_prompt_entry) + + # Add note about prompts + prompt_tip = QLabel("Tip: Describe what you want in the inpainted area (e.g., 'white wall', 'wooden floor')") + prompt_tip_font = QFont('Arial', 8) + prompt_tip.setFont(prompt_tip_font) + prompt_tip.setStyleSheet("color: gray;") + prompt_tip.setWordWrap(True) + prompt_tip.setContentsMargins(0, 2, 0, 10) + prompt_layout.addWidget(prompt_tip) + + # Negative prompt (mainly for SD) + self.negative_prompt_label = QLabel("Negative Prompt (SD only):") + prompt_layout.addWidget(self.negative_prompt_label) + + self.negative_entry = QLineEdit() + self.negative_entry.setText(self.settings.get('cloud_negative_prompt', 'text, writing, letters')) + prompt_layout.addWidget(self.negative_entry) + + # Inference steps (for SD) + self.steps_widget = QWidget() + steps_layout = QHBoxLayout(self.steps_widget) + steps_layout.setContentsMargins(0, 10, 0, 5) + prompt_layout.addWidget(self.steps_widget) + + self.steps_label = QLabel("Inference Steps (SD only):") + self.steps_label.setMinimumWidth(180) + steps_layout.addWidget(self.steps_label) + + self.steps_spinbox = QSpinBox() + self.steps_spinbox.setRange(10, 50) + self.steps_spinbox.setValue(self.settings.get('cloud_inference_steps', 20)) + steps_layout.addWidget(self.steps_spinbox) + + steps_desc = QLabel("(Higher = better quality, slower)") + steps_desc_font = QFont('Arial', 9) + steps_desc.setFont(steps_desc_font) + steps_desc.setStyleSheet("color: gray;") + steps_layout.addWidget(steps_desc) + steps_layout.addStretch() + + # Add stretch at end + content_layout.addStretch() + + # Initially hide prompt frame if not using appropriate model + if self.cloud_model_selected == 'custom': + self.prompt_group.setVisible(False) + + # Show/hide SD-specific options based on model + self._on_cloud_model_change(self.cloud_model_selected) + + def _on_cloud_model_change(self, model): + """Handle cloud model selection change""" + # Store the selected model + self.cloud_model_selected = model + + # Show/hide custom version entry + if model == 'custom': + self.custom_version_widget.setVisible(True) + # DON'T HIDE THE PROMPT FRAME FOR CUSTOM MODELS + self.prompt_group.setVisible(True) + else: + self.custom_version_widget.setVisible(False) + self.prompt_group.setVisible(True) + + # Show/hide SD-specific options + if model == 'sd-inpainting': + # Show negative prompt and steps + self.negative_prompt_label.setVisible(True) + self.negative_entry.setVisible(True) + self.steps_widget.setVisible(True) + else: + # Hide SD-specific options + self.negative_prompt_label.setVisible(False) + self.negative_entry.setVisible(False) + self.steps_widget.setVisible(False) + + def _toggle_preprocessing(self): + """Enable/disable preprocessing controls based on main toggle""" + enabled = self.preprocess_enabled.isChecked() + + # Process each control in preprocessing_controls list + for control in self.preprocessing_controls: + try: + if isinstance(control, QGroupBox): + # Enable/disable entire group box children + self._toggle_frame_children(control, enabled) + elif isinstance(control, (QSlider, QSpinBox, QCheckBox, QDoubleSpinBox, QComboBox, QLabel)): + # Just use setEnabled() - the global stylesheet handles the visual state + control.setEnabled(enabled) + except Exception as e: + pass + + # Ensure tiling fields respect their own toggle regardless of preprocessing state + try: + if hasattr(self, '_toggle_tiling_controls'): + self._toggle_tiling_controls() + except Exception: + pass + + def _toggle_frame_children(self, widget, enabled): + """Recursively enable/disable all children of a widget""" + # Handle all controls including labels - just use setEnabled() + for child in widget.findChildren(QWidget): + if isinstance(child, (QSlider, QSpinBox, QCheckBox, QDoubleSpinBox, QComboBox, QLineEdit, QLabel)): + try: + child.setEnabled(enabled) + except Exception: + pass + + def _toggle_roi_locality_controls(self): + """Show/hide ROI locality controls based on toggle.""" + try: + enabled = self.roi_locality_checkbox.isChecked() + except Exception: + enabled = False + # Rows to manage + rows = [ + getattr(self, 'roi_pad_row', None), + getattr(self, 'roi_min_row', None), + getattr(self, 'roi_area_row', None), + getattr(self, 'roi_max_row', None) + ] + for row in rows: + try: + if row is None: continue + row.setVisible(enabled) + except Exception: + pass + + def _toggle_tiling_controls(self): + """Enable/disable tiling size/overlap fields based on tiling toggle.""" + try: + enabled = bool(self.inpaint_tiling_enabled.isChecked()) + except Exception: + enabled = False + + # Enable/disable tiling widgets and their labels + widgets_to_toggle = [ + ('tile_size_spinbox', 'tile_size_label', 'tile_size_unit_label'), + ('tile_overlap_spinbox', 'tile_overlap_label', 'tile_overlap_unit_label') + ] + + for widget_names in widgets_to_toggle: + for widget_name in widget_names: + try: + widget = getattr(self, widget_name, None) + if widget is not None: + # Just use setEnabled() for everything - stylesheet handles visuals + widget.setEnabled(enabled) + except Exception: + pass + + def _on_hd_strategy_change(self): + """Show/hide HD strategy controls based on selected strategy.""" + try: + strategy = self.hd_strategy_combo.currentText() + except Exception: + strategy = 'original' + + # Show/hide resize limit based on strategy + if hasattr(self, 'hd_resize_frame'): + self.hd_resize_frame.setVisible(strategy == 'resize') + + # Show/hide crop params based on strategy + if hasattr(self, 'hd_crop_margin_frame'): + self.hd_crop_margin_frame.setVisible(strategy == 'crop') + if hasattr(self, 'hd_crop_trigger_frame'): + self.hd_crop_trigger_frame.setVisible(strategy == 'crop') + + def _toggle_compression_enabled(self): + """Enable/disable compression controls based on compression toggle.""" + try: + enabled = bool(self.compression_enabled.isChecked()) + except Exception: + enabled = False + + # Enable/disable all compression format controls + compression_widgets = [ + getattr(self, 'format_label', None), + getattr(self, 'compression_format_combo', None), + getattr(self, 'jpeg_frame', None), + getattr(self, 'jpeg_label', None), + getattr(self, 'jpeg_quality_spin', None), + getattr(self, 'jpeg_help', None), + getattr(self, 'png_frame', None), + getattr(self, 'png_label', None), + getattr(self, 'png_level_spin', None), + getattr(self, 'png_help', None), + getattr(self, 'webp_frame', None), + getattr(self, 'webp_label', None), + getattr(self, 'webp_quality_spin', None), + getattr(self, 'webp_help', None), + ] + + for widget in compression_widgets: + try: + if widget is not None: + widget.setEnabled(enabled) + except Exception: + pass + + def _toggle_compression_format(self): + """Show only the controls relevant to the selected format (hide others).""" + fmt = self.compression_format_combo.currentText().lower() if hasattr(self, 'compression_format_combo') else 'jpeg' + try: + # Hide all rows first + for row in [getattr(self, 'jpeg_frame', None), getattr(self, 'png_frame', None), getattr(self, 'webp_frame', None)]: + try: + if row is not None: + row.setVisible(False) + except Exception: + pass + # Show the selected one + if fmt == 'jpeg': + if hasattr(self, 'jpeg_frame') and self.jpeg_frame is not None: + self.jpeg_frame.setVisible(True) + elif fmt == 'png': + if hasattr(self, 'png_frame') and self.png_frame is not None: + self.png_frame.setVisible(True) + else: # webp + if hasattr(self, 'webp_frame') and self.webp_frame is not None: + self.webp_frame.setVisible(True) + except Exception: + pass + + def _toggle_ocr_batching_controls(self): + """Show/hide OCR batching rows based on enable toggle.""" + try: + enabled = bool(self.ocr_batch_enabled_checkbox.isChecked()) + except Exception: + enabled = False + try: + if hasattr(self, 'ocr_bs_row') and self.ocr_bs_row: + self.ocr_bs_row.setVisible(enabled) + except Exception: + pass + try: + if hasattr(self, 'ocr_cc_row') and self.ocr_cc_row: + self.ocr_cc_row.setVisible(enabled) + except Exception: + pass + + def _create_ocr_tab(self): + """Create OCR settings tab with all options""" + # Create tab widget and add to tab widget + tab_widget = QWidget() + self.tab_widget.addTab(tab_widget, "OCR") + + # Create scroll area for OCR settings + scroll_area = QScrollArea() + scroll_area.setWidgetResizable(True) + scroll_area.setFrameShape(QFrame.Shape.NoFrame) + + content_widget = QWidget() + content_layout = QVBoxLayout(content_widget) + content_layout.setSpacing(10) + content_layout.setContentsMargins(20, 20, 20, 20) + + scroll_area.setWidget(content_widget) + + # Add scroll area to parent layout + parent_layout = QVBoxLayout(tab_widget) + parent_layout.setContentsMargins(0, 0, 0, 0) + parent_layout.addWidget(scroll_area) + + # Language hints + lang_group = QGroupBox("Language Detection") + lang_layout = QVBoxLayout(lang_group) + content_layout.addWidget(lang_group) + + lang_desc = QLabel("Select languages to prioritize during OCR:") + lang_desc_font = QFont('Arial', 10) + lang_desc.setFont(lang_desc_font) + lang_layout.addWidget(lang_desc) + lang_layout.addSpacing(10) + + # Language checkboxes + self.lang_checkboxes = {} + languages = [ + ('ja', 'Japanese'), + ('ko', 'Korean'), + ('zh', 'Chinese (Simplified)'), + ('zh-TW', 'Chinese (Traditional)'), + ('en', 'English') + ] + + lang_grid_widget = QWidget() + lang_grid_layout = QGridLayout(lang_grid_widget) + lang_layout.addWidget(lang_grid_widget) + + for i, (code, name) in enumerate(languages): + checkbox = self._create_styled_checkbox(name) + checkbox.setChecked(code in self.settings['ocr']['language_hints']) + self.lang_checkboxes[code] = checkbox + lang_grid_layout.addWidget(checkbox, i//2, i%2) + + # OCR parameters + ocr_group = QGroupBox("OCR Parameters") + ocr_layout = QVBoxLayout(ocr_group) + content_layout.addWidget(ocr_group) + + # Confidence threshold + conf_widget = QWidget() + conf_layout = QHBoxLayout(conf_widget) + conf_layout.setContentsMargins(0, 0, 0, 0) + ocr_layout.addWidget(conf_widget) + + conf_label = QLabel("Confidence Threshold:") + conf_label.setMinimumWidth(180) + conf_layout.addWidget(conf_label) + + self.confidence_threshold_slider = QSlider(Qt.Orientation.Horizontal) + self.confidence_threshold_slider.setRange(0, 100) + self.confidence_threshold_slider.setValue(int(self.settings['ocr']['confidence_threshold'] * 100)) + self.confidence_threshold_slider.setMinimumWidth(250) + conf_layout.addWidget(self.confidence_threshold_slider) + + self.confidence_threshold_label = QLabel(f"{self.settings['ocr']['confidence_threshold']:.2f}") + self.confidence_threshold_label.setMinimumWidth(50) + self.confidence_threshold_slider.valueChanged.connect( + lambda v: self.confidence_threshold_label.setText(f"{v/100:.2f}") + ) + conf_layout.addWidget(self.confidence_threshold_label) + conf_layout.addStretch() + + # Detection mode + mode_widget = QWidget() + mode_layout = QHBoxLayout(mode_widget) + mode_layout.setContentsMargins(0, 0, 0, 0) + ocr_layout.addWidget(mode_widget) + + mode_label = QLabel("Detection Mode:") + mode_label.setMinimumWidth(180) + mode_layout.addWidget(mode_label) + + self.detection_mode_combo = QComboBox() + self.detection_mode_combo.addItems(['document', 'text']) + self.detection_mode_combo.setCurrentText(self.settings['ocr']['text_detection_mode']) + mode_layout.addWidget(self.detection_mode_combo) + + mode_desc = QLabel("(document = better for manga, text = simple layouts)") + mode_desc_font = QFont('Arial', 9) + mode_desc.setFont(mode_desc_font) + mode_desc.setStyleSheet("color: gray;") + mode_layout.addWidget(mode_desc) + mode_layout.addStretch() + + # Text merging settings + merge_group = QGroupBox("Text Region Merging") + merge_layout = QVBoxLayout(merge_group) + content_layout.addWidget(merge_group) + + # Merge nearby threshold + nearby_widget = QWidget() + nearby_layout = QHBoxLayout(nearby_widget) + nearby_layout.setContentsMargins(0, 0, 0, 0) + merge_layout.addWidget(nearby_widget) + + nearby_label = QLabel("Merge Distance:") + nearby_label.setMinimumWidth(180) + nearby_layout.addWidget(nearby_label) + + self.merge_nearby_threshold_spinbox = QSpinBox() + self.merge_nearby_threshold_spinbox.setRange(0, 200) + self.merge_nearby_threshold_spinbox.setSingleStep(10) + self.merge_nearby_threshold_spinbox.setValue(self.settings['ocr']['merge_nearby_threshold']) + nearby_layout.addWidget(self.merge_nearby_threshold_spinbox) + + nearby_unit = QLabel("pixels") + nearby_layout.addWidget(nearby_unit) + nearby_layout.addStretch() + + # Text Filtering Setting + filter_group = QGroupBox("Text Filtering") + filter_layout = QVBoxLayout(filter_group) + content_layout.addWidget(filter_group) + + # Minimum text length + min_length_widget = QWidget() + min_length_layout = QHBoxLayout(min_length_widget) + min_length_layout.setContentsMargins(0, 0, 0, 0) + filter_layout.addWidget(min_length_widget) + + min_length_label = QLabel("Min Text Length:") + min_length_label.setMinimumWidth(180) + min_length_layout.addWidget(min_length_label) + + self.min_text_length_spinbox = QSpinBox() + self.min_text_length_spinbox.setRange(1, 10) + self.min_text_length_spinbox.setValue(self.settings['ocr'].get('min_text_length', 0)) + min_length_layout.addWidget(self.min_text_length_spinbox) + + min_length_unit = QLabel("characters") + min_length_layout.addWidget(min_length_unit) + + min_length_desc = QLabel("(skip text shorter than this)") + min_length_desc_font = QFont('Arial', 9) + min_length_desc.setFont(min_length_desc_font) + min_length_desc.setStyleSheet("color: gray;") + min_length_layout.addWidget(min_length_desc) + min_length_layout.addStretch() + + # Exclude English text checkbox + self.exclude_english_checkbox = self._create_styled_checkbox("Exclude primarily English text (tunable threshold)") + self.exclude_english_checkbox.setChecked(self.settings['ocr'].get('exclude_english_text', False)) + filter_layout.addWidget(self.exclude_english_checkbox) + + # Threshold slider + english_threshold_widget = QWidget() + english_threshold_layout = QHBoxLayout(english_threshold_widget) + english_threshold_layout.setContentsMargins(0, 0, 0, 0) + filter_layout.addWidget(english_threshold_widget) + + threshold_label = QLabel("English Exclude Threshold:") + threshold_label.setMinimumWidth(240) + english_threshold_layout.addWidget(threshold_label) + + self.english_exclude_threshold_slider = QSlider(Qt.Orientation.Horizontal) + self.english_exclude_threshold_slider.setRange(60, 99) + self.english_exclude_threshold_slider.setValue(int(self.settings['ocr'].get('english_exclude_threshold', 0.7) * 100)) + self.english_exclude_threshold_slider.setMinimumWidth(250) + english_threshold_layout.addWidget(self.english_exclude_threshold_slider) + + self.english_threshold_label = QLabel(f"{int(self.settings['ocr'].get('english_exclude_threshold', 0.7)*100)}%") + self.english_threshold_label.setMinimumWidth(50) + self.english_exclude_threshold_slider.valueChanged.connect( + lambda v: self.english_threshold_label.setText(f"{v}%") + ) + english_threshold_layout.addWidget(self.english_threshold_label) + english_threshold_layout.addStretch() + + # Minimum character count + min_chars_widget = QWidget() + min_chars_layout = QHBoxLayout(min_chars_widget) + min_chars_layout.setContentsMargins(0, 0, 0, 0) + filter_layout.addWidget(min_chars_widget) + + min_chars_label = QLabel("Min chars to exclude as English:") + min_chars_label.setMinimumWidth(240) + min_chars_layout.addWidget(min_chars_label) + + self.english_exclude_min_chars_spinbox = QSpinBox() + self.english_exclude_min_chars_spinbox.setRange(1, 10) + self.english_exclude_min_chars_spinbox.setValue(self.settings['ocr'].get('english_exclude_min_chars', 4)) + min_chars_layout.addWidget(self.english_exclude_min_chars_spinbox) + + min_chars_unit = QLabel("characters") + min_chars_layout.addWidget(min_chars_unit) + min_chars_layout.addStretch() + + # Legacy aggressive short-token filter + self.english_exclude_short_tokens_checkbox = self._create_styled_checkbox("Aggressively drop very short ASCII tokens (legacy)") + self.english_exclude_short_tokens_checkbox.setChecked(self.settings['ocr'].get('english_exclude_short_tokens', False)) + filter_layout.addWidget(self.english_exclude_short_tokens_checkbox) + + # Help text + filter_help = QLabel( + "💡 Text filtering helps skip:\n" + " • UI elements and watermarks\n" + " • Page numbers and copyright text\n" + " • Single characters or symbols\n" + " • Non-target language text" + ) + filter_help_font = QFont('Arial', 9) + filter_help.setFont(filter_help_font) + filter_help.setStyleSheet("color: gray;") + filter_help.setWordWrap(True) + filter_help.setContentsMargins(0, 10, 0, 0) + filter_layout.addWidget(filter_help) + + # Azure-specific OCR settings + azure_ocr_group = QGroupBox("Azure OCR Settings") + azure_ocr_layout = QVBoxLayout(azure_ocr_group) + content_layout.addWidget(azure_ocr_group) + + # Azure merge multiplier + merge_mult_widget = QWidget() + merge_mult_layout = QHBoxLayout(merge_mult_widget) + merge_mult_layout.setContentsMargins(0, 0, 0, 0) + azure_ocr_layout.addWidget(merge_mult_widget) + + merge_mult_label = QLabel("Merge Multiplier:") + merge_mult_label.setMinimumWidth(180) + merge_mult_layout.addWidget(merge_mult_label) + + self.azure_merge_multiplier_slider = QSlider(Qt.Orientation.Horizontal) + self.azure_merge_multiplier_slider.setRange(100, 500) + self.azure_merge_multiplier_slider.setValue(int(self.settings['ocr'].get('azure_merge_multiplier', 2.0) * 100)) + self.azure_merge_multiplier_slider.setMinimumWidth(200) + merge_mult_layout.addWidget(self.azure_merge_multiplier_slider) + + self.azure_label = QLabel(f"{self.settings['ocr'].get('azure_merge_multiplier', 2.0):.2f}x") + self.azure_label.setMinimumWidth(50) + self.azure_merge_multiplier_slider.valueChanged.connect( + lambda v: self.azure_label.setText(f"{v/100:.2f}x") + ) + merge_mult_layout.addWidget(self.azure_label) + + merge_mult_desc = QLabel("(multiplies merge distance for Azure lines)") + merge_mult_desc_font = QFont('Arial', 9) + merge_mult_desc.setFont(merge_mult_desc_font) + merge_mult_desc.setStyleSheet("color: gray;") + merge_mult_layout.addWidget(merge_mult_desc) + merge_mult_layout.addStretch() + + # Reading order + reading_order_widget = QWidget() + reading_order_layout = QHBoxLayout(reading_order_widget) + reading_order_layout.setContentsMargins(0, 0, 0, 0) + azure_ocr_layout.addWidget(reading_order_widget) + + reading_order_label = QLabel("Reading Order:") + reading_order_label.setMinimumWidth(180) + reading_order_layout.addWidget(reading_order_label) + + self.azure_reading_order_combo = QComboBox() + self.azure_reading_order_combo.addItems(['basic', 'natural']) + self.azure_reading_order_combo.setCurrentText(self.settings['ocr'].get('azure_reading_order', 'natural')) + reading_order_layout.addWidget(self.azure_reading_order_combo) + + reading_order_desc = QLabel("(natural = better for complex layouts)") + reading_order_desc_font = QFont('Arial', 9) + reading_order_desc.setFont(reading_order_desc_font) + reading_order_desc.setStyleSheet("color: gray;") + reading_order_layout.addWidget(reading_order_desc) + reading_order_layout.addStretch() + + # Model version + model_version_widget = QWidget() + model_version_layout = QHBoxLayout(model_version_widget) + model_version_layout.setContentsMargins(0, 0, 0, 0) + azure_ocr_layout.addWidget(model_version_widget) + + model_version_label = QLabel("Model Version:") + model_version_label.setMinimumWidth(180) + model_version_layout.addWidget(model_version_label) + + self.azure_model_version_combo = QComboBox() + self.azure_model_version_combo.addItems(['latest', '2022-04-30', '2022-01-30', '2021-09-30']) + self.azure_model_version_combo.setCurrentText(self.settings['ocr'].get('azure_model_version', 'latest')) + self.azure_model_version_combo.setEditable(True) + model_version_layout.addWidget(self.azure_model_version_combo) + + model_version_desc = QLabel("(use 'latest' for newest features)") + model_version_desc_font = QFont('Arial', 9) + model_version_desc.setFont(model_version_desc_font) + model_version_desc.setStyleSheet("color: gray;") + model_version_layout.addWidget(model_version_desc) + model_version_layout.addStretch() + + # Timeout settings + timeout_widget = QWidget() + timeout_layout = QHBoxLayout(timeout_widget) + timeout_layout.setContentsMargins(0, 0, 0, 0) + azure_ocr_layout.addWidget(timeout_widget) + + timeout_label = QLabel("Max Wait Time:") + timeout_label.setMinimumWidth(180) + timeout_layout.addWidget(timeout_label) + + self.azure_max_wait_spinbox = QSpinBox() + self.azure_max_wait_spinbox.setRange(10, 120) + self.azure_max_wait_spinbox.setSingleStep(5) + self.azure_max_wait_spinbox.setValue(self.settings['ocr'].get('azure_max_wait', 60)) + timeout_layout.addWidget(self.azure_max_wait_spinbox) + + timeout_unit = QLabel("seconds") + timeout_layout.addWidget(timeout_unit) + timeout_layout.addStretch() + + # Poll interval + poll_widget = QWidget() + poll_layout = QHBoxLayout(poll_widget) + poll_layout.setContentsMargins(0, 0, 0, 0) + azure_ocr_layout.addWidget(poll_widget) + + poll_label = QLabel("Poll Interval:") + poll_label.setMinimumWidth(180) + poll_layout.addWidget(poll_label) + + self.azure_poll_interval_slider = QSlider(Qt.Orientation.Horizontal) + self.azure_poll_interval_slider.setRange(0, 200) + self.azure_poll_interval_slider.setValue(int(self.settings['ocr'].get('azure_poll_interval', 0.5) * 100)) + self.azure_poll_interval_slider.setMinimumWidth(200) + poll_layout.addWidget(self.azure_poll_interval_slider) + + self.azure_poll_interval_label = QLabel(f"{self.settings['ocr'].get('azure_poll_interval', 0.5):.2f}") + self.azure_poll_interval_label.setMinimumWidth(50) + self.azure_poll_interval_slider.valueChanged.connect( + lambda v: self.azure_poll_interval_label.setText(f"{v/100:.2f}") + ) + poll_layout.addWidget(self.azure_poll_interval_label) + + poll_unit = QLabel("sec") + poll_layout.addWidget(poll_unit) + poll_layout.addStretch() + + # Help text + azure_help = QLabel( + "💡 Azure Read API auto-detects language well\n" + "💡 Natural reading order works better for manga panels" + ) + azure_help_font = QFont('Arial', 9) + azure_help.setFont(azure_help_font) + azure_help.setStyleSheet("color: gray;") + azure_help.setWordWrap(True) + azure_help.setContentsMargins(0, 10, 0, 0) + azure_ocr_layout.addWidget(azure_help) + + # Rotation correction + self.enable_rotation_checkbox = self._create_styled_checkbox("Enable automatic rotation correction for tilted text") + self.enable_rotation_checkbox.setChecked(self.settings['ocr']['enable_rotation_correction']) + merge_layout.addWidget(self.enable_rotation_checkbox) + + # OCR batching and locality settings + ocr_batch_group = QGroupBox("OCR Batching & Concurrency") + ocr_batch_layout = QVBoxLayout(ocr_batch_group) + content_layout.addWidget(ocr_batch_group) + + # Enable OCR batching + self.ocr_batch_enabled_checkbox = self._create_styled_checkbox("Enable OCR batching (independent of translation batching)") + self.ocr_batch_enabled_checkbox.setChecked(self.settings['ocr'].get('ocr_batch_enabled', True)) + self.ocr_batch_enabled_checkbox.stateChanged.connect(self._toggle_ocr_batching_controls) + ocr_batch_layout.addWidget(self.ocr_batch_enabled_checkbox) + + # OCR batch size + ocr_bs_widget = QWidget() + ocr_bs_layout = QHBoxLayout(ocr_bs_widget) + ocr_bs_layout.setContentsMargins(0, 0, 0, 0) + ocr_batch_layout.addWidget(ocr_bs_widget) + self.ocr_bs_row = ocr_bs_widget + + ocr_bs_label = QLabel("OCR Batch Size:") + ocr_bs_label.setMinimumWidth(180) + ocr_bs_layout.addWidget(ocr_bs_label) + + self.ocr_batch_size_spinbox = QSpinBox() + self.ocr_batch_size_spinbox.setRange(1, 32) + self.ocr_batch_size_spinbox.setValue(int(self.settings['ocr'].get('ocr_batch_size', 8))) + ocr_bs_layout.addWidget(self.ocr_batch_size_spinbox) + + ocr_bs_desc = QLabel("(Google: items/request; Azure: drives concurrency)") + ocr_bs_desc_font = QFont('Arial', 9) + ocr_bs_desc.setFont(ocr_bs_desc_font) + ocr_bs_desc.setStyleSheet("color: gray;") + ocr_bs_layout.addWidget(ocr_bs_desc) + ocr_bs_layout.addStretch() + + # OCR Max Concurrency + ocr_cc_widget = QWidget() + ocr_cc_layout = QHBoxLayout(ocr_cc_widget) + ocr_cc_layout.setContentsMargins(0, 0, 0, 0) + ocr_batch_layout.addWidget(ocr_cc_widget) + self.ocr_cc_row = ocr_cc_widget + + ocr_cc_label = QLabel("OCR Max Concurrency:") + ocr_cc_label.setMinimumWidth(180) + ocr_cc_layout.addWidget(ocr_cc_label) + + self.ocr_max_conc_spinbox = QSpinBox() + self.ocr_max_conc_spinbox.setRange(1, 8) + self.ocr_max_conc_spinbox.setValue(int(self.settings['ocr'].get('ocr_max_concurrency', 2))) + ocr_cc_layout.addWidget(self.ocr_max_conc_spinbox) + + ocr_cc_desc = QLabel("(Google: concurrent requests; Azure: workers, capped at 4)") + ocr_cc_desc_font = QFont('Arial', 9) + ocr_cc_desc.setFont(ocr_cc_desc_font) + ocr_cc_desc.setStyleSheet("color: gray;") + ocr_cc_layout.addWidget(ocr_cc_desc) + ocr_cc_layout.addStretch() + + # Apply initial visibility for OCR batching controls + try: + self._toggle_ocr_batching_controls() + except Exception: + pass + + # ROI sizing + roi_group = QGroupBox("ROI Locality Controls") + roi_layout = QVBoxLayout(roi_group) + content_layout.addWidget(roi_group) + + # ROI locality toggle (now inside this section) + self.roi_locality_checkbox = self._create_styled_checkbox("Enable ROI-based OCR locality and batching (uses bubble detection)") + self.roi_locality_checkbox.setChecked(self.settings['ocr'].get('roi_locality_enabled', False)) + self.roi_locality_checkbox.stateChanged.connect(self._toggle_roi_locality_controls) + roi_layout.addWidget(self.roi_locality_checkbox) + + # ROI padding ratio + roi_pad_widget = QWidget() + roi_pad_layout = QHBoxLayout(roi_pad_widget) + roi_pad_layout.setContentsMargins(0, 0, 0, 0) + roi_layout.addWidget(roi_pad_widget) + self.roi_pad_row = roi_pad_widget + + roi_pad_label = QLabel("ROI Padding Ratio:") + roi_pad_label.setMinimumWidth(180) + roi_pad_layout.addWidget(roi_pad_label) + + self.roi_padding_ratio_slider = QSlider(Qt.Orientation.Horizontal) + self.roi_padding_ratio_slider.setRange(0, 30) + self.roi_padding_ratio_slider.setValue(int(float(self.settings['ocr'].get('roi_padding_ratio', 0.08)) * 100)) + self.roi_padding_ratio_slider.setMinimumWidth(200) + roi_pad_layout.addWidget(self.roi_padding_ratio_slider) + + self.roi_padding_ratio_label = QLabel(f"{float(self.settings['ocr'].get('roi_padding_ratio', 0.08)):.2f}") + self.roi_padding_ratio_label.setMinimumWidth(50) + self.roi_padding_ratio_slider.valueChanged.connect( + lambda v: self.roi_padding_ratio_label.setText(f"{v/100:.2f}") + ) + roi_pad_layout.addWidget(self.roi_padding_ratio_label) + roi_pad_layout.addStretch() + + # ROI min side / area + roi_min_widget = QWidget() + roi_min_layout = QHBoxLayout(roi_min_widget) + roi_min_layout.setContentsMargins(0, 0, 0, 0) + roi_layout.addWidget(roi_min_widget) + self.roi_min_row = roi_min_widget + + roi_min_label = QLabel("Min ROI Side:") + roi_min_label.setMinimumWidth(180) + roi_min_layout.addWidget(roi_min_label) + + self.roi_min_side_spinbox = QSpinBox() + self.roi_min_side_spinbox.setRange(1, 64) + self.roi_min_side_spinbox.setValue(int(self.settings['ocr'].get('roi_min_side_px', 12))) + roi_min_layout.addWidget(self.roi_min_side_spinbox) + + roi_min_unit = QLabel("px") + roi_min_layout.addWidget(roi_min_unit) + roi_min_layout.addStretch() + + roi_area_widget = QWidget() + roi_area_layout = QHBoxLayout(roi_area_widget) + roi_area_layout.setContentsMargins(0, 0, 0, 0) + roi_layout.addWidget(roi_area_widget) + self.roi_area_row = roi_area_widget + + roi_area_label = QLabel("Min ROI Area:") + roi_area_label.setMinimumWidth(180) + roi_area_layout.addWidget(roi_area_label) + + self.roi_min_area_spinbox = QSpinBox() + self.roi_min_area_spinbox.setRange(1, 5000) + self.roi_min_area_spinbox.setValue(int(self.settings['ocr'].get('roi_min_area_px', 100))) + roi_area_layout.addWidget(self.roi_min_area_spinbox) + + roi_area_unit = QLabel("px^2") + roi_area_layout.addWidget(roi_area_unit) + roi_area_layout.addStretch() + + # ROI max side (0 disables) + roi_max_widget = QWidget() + roi_max_layout = QHBoxLayout(roi_max_widget) + roi_max_layout.setContentsMargins(0, 0, 0, 0) + roi_layout.addWidget(roi_max_widget) + self.roi_max_row = roi_max_widget + + roi_max_label = QLabel("ROI Max Side (0=off):") + roi_max_label.setMinimumWidth(180) + roi_max_layout.addWidget(roi_max_label) + + self.roi_max_side_spinbox = QSpinBox() + self.roi_max_side_spinbox.setRange(0, 2048) + self.roi_max_side_spinbox.setValue(int(self.settings['ocr'].get('roi_max_side', 0))) + roi_max_layout.addWidget(self.roi_max_side_spinbox) + roi_max_layout.addStretch() + + # Apply initial visibility based on toggle + self._toggle_roi_locality_controls() + + # AI Bubble Detection Settings + bubble_group = QGroupBox("AI Bubble Detection") + bubble_layout = QVBoxLayout(bubble_group) + content_layout.addWidget(bubble_group) + + # Enable bubble detection + self.bubble_detection_enabled_checkbox = self._create_styled_checkbox("Enable AI-powered bubble detection (overrides traditional merging)") + # IMPORTANT: Default to True for optimal text detection (especially for Chinese/Japanese text) + self.bubble_detection_enabled_checkbox.setChecked(self.settings['ocr'].get('bubble_detection_enabled', True)) + self.bubble_detection_enabled_checkbox.stateChanged.connect(self._toggle_bubble_controls) + bubble_layout.addWidget(self.bubble_detection_enabled_checkbox) + + # Use RT-DETR for text region detection (not just bubble detection) + self.use_rtdetr_for_ocr_checkbox = self._create_styled_checkbox("Use RT-DETR to guide OCR (Google/Azure only - others already do this)") + self.use_rtdetr_for_ocr_checkbox.setChecked(self.settings['ocr'].get('use_rtdetr_for_ocr_regions', True)) # Default: True + self.use_rtdetr_for_ocr_checkbox.setToolTip( + "When enabled, RT-DETR first detects all text regions (text bubbles + free text), \n" + "then your OCR provider reads each region separately.\n\n" + "🎯 Applies to: Google Cloud Vision, Azure Computer Vision\n" + "✓ Already enabled: Qwen2-VL, Custom API, EasyOCR, PaddleOCR, DocTR, manga-ocr\n\n" + "Benefits:\n" + "• More accurate text detection (trained specifically for manga/comics)\n" + "• Better separation of overlapping text\n" + "• Improved handling of different text types (bubbles vs. free text)\n" + "• Focused OCR on actual text regions (faster, more accurate)\n\n" + "Note: Requires bubble detection to be enabled and uses the selected detector above." + ) + bubble_layout.addWidget(self.use_rtdetr_for_ocr_checkbox) + + # Detector type dropdown + detector_type_widget = QWidget() + detector_type_layout = QHBoxLayout(detector_type_widget) + detector_type_layout.setContentsMargins(0, 10, 0, 0) + bubble_layout.addWidget(detector_type_widget) + + detector_type_label = QLabel("Detector:") + detector_type_label.setMinimumWidth(120) + detector_type_layout.addWidget(detector_type_label) + + # Model mapping + self.detector_models = { + 'RTEDR_onnx': 'ogkalu/comic-text-and-bubble-detector', + 'RT-DETR': 'ogkalu/comic-text-and-bubble-detector', + 'YOLOv8 Speech': 'ogkalu/comic-speech-bubble-detector-yolov8m', + 'YOLOv8 Text': 'ogkalu/comic-text-segmenter-yolov8m', + 'YOLOv8 Manga': 'ogkalu/manga-text-detector-yolov8s', + 'Custom Model': '' + } + + # Get saved detector type (default to ONNX backend) + saved_type = self.settings['ocr'].get('detector_type', 'rtdetr_onnx') + if saved_type == 'rtdetr_onnx': + initial_selection = 'RTEDR_onnx' + elif saved_type == 'rtdetr': + initial_selection = 'RT-DETR' + elif saved_type == 'yolo': + initial_selection = 'YOLOv8 Speech' + elif saved_type == 'custom': + initial_selection = 'Custom Model' + else: + initial_selection = 'RTEDR_onnx' + + self.detector_type_combo = QComboBox() + self.detector_type_combo.addItems(list(self.detector_models.keys())) + self.detector_type_combo.setCurrentText(initial_selection) + self.detector_type_combo.currentTextChanged.connect(self._on_detector_type_changed) + detector_type_layout.addWidget(self.detector_type_combo) + detector_type_layout.addStretch() + + # NOW create the settings frame + self.yolo_settings_group = QGroupBox("Model Settings") + yolo_settings_layout = QVBoxLayout(self.yolo_settings_group) + bubble_layout.addWidget(self.yolo_settings_group) + self.rtdetr_settings_frame = self.yolo_settings_group # Alias for compatibility + + # Model path/URL row + model_widget = QWidget() + model_layout = QHBoxLayout(model_widget) + model_layout.setContentsMargins(0, 5, 0, 0) + yolo_settings_layout.addWidget(model_widget) + + model_label = QLabel("Model:") + model_label.setMinimumWidth(100) + model_layout.addWidget(model_label) + + self.bubble_model_entry = QLineEdit() + self.bubble_model_entry.setText(self.settings['ocr'].get('bubble_model_path', '')) + self.bubble_model_entry.setReadOnly(True) + self.bubble_model_entry.setStyleSheet( + "QLineEdit { background-color: #1e1e1e; color: #ffffff; border: 1px solid #3a3a3a; }" + ) + model_layout.addWidget(self.bubble_model_entry) + self.rtdetr_url_entry = self.bubble_model_entry # Alias + + # Store for compatibility + self.detector_radio_widgets = [self.detector_type_combo] + + # Browse and Clear buttons (initially hidden for HuggingFace models) + self.bubble_browse_btn = QPushButton("Browse") + self.bubble_browse_btn.clicked.connect(self._browse_bubble_model) + model_layout.addWidget(self.bubble_browse_btn) + + self.bubble_clear_btn = QPushButton("Clear") + self.bubble_clear_btn.clicked.connect(self._clear_bubble_model) + model_layout.addWidget(self.bubble_clear_btn) + model_layout.addStretch() + + # Download and Load buttons + button_widget = QWidget() + button_layout = QHBoxLayout(button_widget) + button_layout.setContentsMargins(0, 10, 0, 0) + yolo_settings_layout.addWidget(button_widget) + + button_label = QLabel("Actions:") + button_label.setMinimumWidth(100) + button_layout.addWidget(button_label) + + self.rtdetr_download_btn = QPushButton("Download") + self.rtdetr_download_btn.clicked.connect(self._download_rtdetr_model) + self.rtdetr_download_btn.setStyleSheet(""" + QPushButton { + background-color: #5a9fd4; + color: white; + font-weight: bold; + border: none; + border-radius: 3px; + padding: 5px 15px; + } + QPushButton:hover { + background-color: #7bb3e0; + } + QPushButton:pressed { + background-color: #4a8fc4; + } + """) + button_layout.addWidget(self.rtdetr_download_btn) + + self.rtdetr_load_btn = QPushButton("Load Model") + self.rtdetr_load_btn.clicked.connect(self._load_rtdetr_model) + self.rtdetr_load_btn.setStyleSheet(""" + QPushButton { + background-color: #5a9fd4; + color: white; + font-weight: bold; + border: none; + border-radius: 3px; + padding: 5px 15px; + } + QPushButton:hover { + background-color: #7bb3e0; + } + QPushButton:pressed { + background-color: #4a8fc4; + } + """) + button_layout.addWidget(self.rtdetr_load_btn) + + self.rtdetr_status_label = QLabel("") + rtdetr_status_font = QFont('Arial', 9) + self.rtdetr_status_label.setFont(rtdetr_status_font) + button_layout.addWidget(self.rtdetr_status_label) + button_layout.addStretch() + + # RT-DETR Detection classes + rtdetr_classes_widget = QWidget() + rtdetr_classes_layout = QHBoxLayout(rtdetr_classes_widget) + rtdetr_classes_layout.setContentsMargins(0, 10, 0, 0) + yolo_settings_layout.addWidget(rtdetr_classes_widget) + self.rtdetr_classes_frame = rtdetr_classes_widget + + classes_label = QLabel("Detect:") + classes_label.setMinimumWidth(100) + rtdetr_classes_layout.addWidget(classes_label) + + self.detect_empty_bubbles_checkbox = self._create_styled_checkbox("Empty Bubbles") + self.detect_empty_bubbles_checkbox.setChecked(self.settings['ocr'].get('detect_empty_bubbles', True)) + rtdetr_classes_layout.addWidget(self.detect_empty_bubbles_checkbox) + + self.detect_text_bubbles_checkbox = self._create_styled_checkbox("Text Bubbles") + self.detect_text_bubbles_checkbox.setChecked(self.settings['ocr'].get('detect_text_bubbles', True)) + rtdetr_classes_layout.addWidget(self.detect_text_bubbles_checkbox) + + self.detect_free_text_checkbox = self._create_styled_checkbox("Free Text") + self.detect_free_text_checkbox.setChecked(self.settings['ocr'].get('detect_free_text', True)) + rtdetr_classes_layout.addWidget(self.detect_free_text_checkbox) + rtdetr_classes_layout.addStretch() + + # Confidence + conf_widget = QWidget() + conf_layout = QHBoxLayout(conf_widget) + conf_layout.setContentsMargins(0, 10, 0, 0) + yolo_settings_layout.addWidget(conf_widget) + + conf_label = QLabel("Confidence:") + conf_label.setMinimumWidth(100) + conf_layout.addWidget(conf_label) + + detector_label = self.detector_type_combo.currentText() + default_conf = 0.3 if ('RT-DETR' in detector_label or 'RTEDR_onnx' in detector_label or 'onnx' in detector_label.lower()) else 0.5 + + self.bubble_conf_slider = QSlider(Qt.Orientation.Horizontal) + self.bubble_conf_slider.setRange(0, 99) + self.bubble_conf_slider.setValue(int(self.settings['ocr'].get('bubble_confidence', default_conf) * 100)) + self.bubble_conf_slider.setMinimumWidth(200) + conf_layout.addWidget(self.bubble_conf_slider) + self.rtdetr_conf_scale = self.bubble_conf_slider # Alias + + self.bubble_conf_label = QLabel(f"{self.settings['ocr'].get('bubble_confidence', default_conf):.2f}") + self.bubble_conf_label.setMinimumWidth(50) + self.bubble_conf_slider.valueChanged.connect( + lambda v: self.bubble_conf_label.setText(f"{v/100:.2f}") + ) + conf_layout.addWidget(self.bubble_conf_label) + self.rtdetr_conf_label = self.bubble_conf_label # Alias + conf_layout.addStretch() + + # YOLO-specific: Max detections (only visible for YOLO) + self.yolo_maxdet_widget = QWidget() + yolo_maxdet_layout = QHBoxLayout(self.yolo_maxdet_widget) + yolo_maxdet_layout.setContentsMargins(0, 6, 0, 0) + yolo_settings_layout.addWidget(self.yolo_maxdet_widget) + self.yolo_maxdet_row = self.yolo_maxdet_widget # Alias + self.yolo_maxdet_widget.setVisible(False) # Hidden initially + + maxdet_label = QLabel("Max detections:") + maxdet_label.setMinimumWidth(100) + yolo_maxdet_layout.addWidget(maxdet_label) + + self.bubble_max_det_yolo_spinbox = QSpinBox() + self.bubble_max_det_yolo_spinbox.setRange(1, 2000) + self.bubble_max_det_yolo_spinbox.setValue(self.settings['ocr'].get('bubble_max_detections_yolo', 100)) + yolo_maxdet_layout.addWidget(self.bubble_max_det_yolo_spinbox) + yolo_maxdet_layout.addStretch() + + # Status label at the bottom of bubble group + self.bubble_status_label = QLabel("") + bubble_status_font = QFont('Arial', 9) + self.bubble_status_label.setFont(bubble_status_font) + bubble_status_label_container = QWidget() + bubble_status_label_layout = QVBoxLayout(bubble_status_label_container) + bubble_status_label_layout.setContentsMargins(0, 10, 0, 0) + bubble_status_label_layout.addWidget(self.bubble_status_label) + bubble_layout.addWidget(bubble_status_label_container) + + # Store controls for enable/disable + self.bubble_controls = [ + self.detector_type_combo, + self.bubble_model_entry, + self.bubble_browse_btn, + self.bubble_clear_btn, + self.bubble_conf_slider, + self.rtdetr_download_btn, + self.rtdetr_load_btn + ] + + self.rtdetr_controls = [ + self.bubble_model_entry, + self.rtdetr_load_btn, + self.rtdetr_download_btn, + self.bubble_conf_slider, + self.detect_empty_bubbles_checkbox, + self.detect_text_bubbles_checkbox, + self.detect_free_text_checkbox + ] + + self.yolo_controls = [ + self.bubble_model_entry, + self.bubble_browse_btn, + self.bubble_clear_btn, + self.bubble_conf_slider, + self.yolo_maxdet_widget + ] + + # Add stretch to end of OCR tab content + content_layout.addStretch() + + # Initialize control states + self._toggle_bubble_controls() + + # Only call detector change after everything is initialized + if self.bubble_detection_enabled_checkbox.isChecked(): + try: + self._on_detector_type_changed() + self._update_bubble_status() + except AttributeError: + # Frames not yet created, skip initialization + pass + + # Check status after dialog ready + QTimer.singleShot(500, self._check_rtdetr_status) + + def _on_detector_type_changed(self, detector=None): + """Handle detector type change""" + if not hasattr(self, 'bubble_detection_enabled_checkbox'): + return + + if not self.bubble_detection_enabled_checkbox.isChecked(): + self.yolo_settings_group.setVisible(False) + return + + if detector is None: + detector = self.detector_type_combo.currentText() + + # Handle different detector types + if detector == 'Custom Model': + # Custom model - enable manual entry + self.bubble_model_entry.setText(self.settings['ocr'].get('custom_model_path', '')) + self.bubble_model_entry.setReadOnly(False) + self.bubble_model_entry.setStyleSheet( + "QLineEdit { background-color: #2b2b2b; color: #ffffff; border: 1px solid #3a3a3a; }" + ) + # Show browse/clear buttons for custom + self.bubble_browse_btn.setVisible(True) + self.bubble_clear_btn.setVisible(True) + # Hide download button + self.rtdetr_download_btn.setVisible(False) + elif detector in self.detector_models: + # HuggingFace model + url = self.detector_models[detector] + self.bubble_model_entry.setText(url) + # Make entry read-only for HuggingFace models + self.bubble_model_entry.setReadOnly(True) + self.bubble_model_entry.setStyleSheet( + "QLineEdit { background-color: #1e1e1e; color: #ffffff; border: 1px solid #3a3a3a; }" + ) + # Hide browse/clear buttons for HuggingFace models + self.bubble_browse_btn.setVisible(False) + self.bubble_clear_btn.setVisible(False) + # Show download button + self.rtdetr_download_btn.setVisible(True) + + # Show/hide RT-DETR specific controls + is_rtdetr = 'RT-DETR' in detector or 'RTEDR_onnx' in detector + + if is_rtdetr: + self.rtdetr_classes_frame.setVisible(True) + # Hide YOLO-only max det row + self.yolo_maxdet_widget.setVisible(False) + else: + self.rtdetr_classes_frame.setVisible(False) + # Show YOLO-only max det row for YOLO models + if 'YOLO' in detector or 'Yolo' in detector or 'yolo' in detector or detector == 'Custom Model': + self.yolo_maxdet_widget.setVisible(True) + else: + self.yolo_maxdet_widget.setVisible(False) + + # Show/hide RT-DETR concurrency control in Performance section (Advanced tab) + # Only update if the widget has been created (Advanced tab may not be loaded yet) + if hasattr(self, 'rtdetr_conc_frame'): + self.rtdetr_conc_frame.setVisible(is_rtdetr) + + # Always show settings frame + self.yolo_settings_group.setVisible(True) + + # Update status + self._update_bubble_status() + + def _download_rtdetr_model(self): + """Download selected model""" + try: + detector = self.detector_type_combo.currentText() + model_url = self.bubble_model_entry.text() + + self.rtdetr_status_label.setText("Downloading...") + self.rtdetr_status_label.setStyleSheet("color: orange;") + QApplication.processEvents() + + if 'RTEDR_onnx' in detector: + from bubble_detector import BubbleDetector + bd = BubbleDetector() + if bd.load_rtdetr_onnx_model(model_id=model_url): + self.rtdetr_status_label.setText("✅ Downloaded") + self.rtdetr_status_label.setStyleSheet("color: green;") + QMessageBox.information(self, "Success", f"RTEDR_onnx model downloaded successfully!") + else: + self.rtdetr_status_label.setText("❌ Failed") + self.rtdetr_status_label.setStyleSheet("color: red;") + QMessageBox.critical(self, "Error", f"Failed to download RTEDR_onnx model") + elif 'RT-DETR' in detector: + # RT-DETR handling (works fine) + from bubble_detector import BubbleDetector + bd = BubbleDetector() + + if bd.load_rtdetr_model(model_id=model_url): + self.rtdetr_status_label.setText("✅ Downloaded") + self.rtdetr_status_label.setStyleSheet("color: green;") + QMessageBox.information(self, "Success", f"RT-DETR model downloaded successfully!") + else: + self.rtdetr_status_label.setText("❌ Failed") + self.rtdetr_status_label.setStyleSheet("color: red;") + QMessageBox.critical(self, "Error", f"Failed to download RT-DETR model") + else: + # FIX FOR YOLO: Download to a simpler local path + from huggingface_hub import hf_hub_download + import os + + # Create models directory + models_dir = "models" + os.makedirs(models_dir, exist_ok=True) + + # Define simple local filenames + filename_map = { + 'ogkalu/comic-speech-bubble-detector-yolov8m': 'comic-speech-bubble-detector.pt', + 'ogkalu/comic-text-segmenter-yolov8m': 'comic-text-segmenter.pt', + 'ogkalu/manga-text-detector-yolov8s': 'manga-text-detector.pt' + } + + filename = filename_map.get(model_url, 'model.pt') + + # Download to cache first + cached_path = hf_hub_download(repo_id=model_url, filename=filename) + + # Copy to local models directory with simple path + import shutil + local_path = os.path.join(models_dir, filename) + shutil.copy2(cached_path, local_path) + + # Set the simple local path instead of the cache path + self.bubble_model_entry.setText(local_path) + self.rtdetr_status_label.setText("✅ Downloaded") + self.rtdetr_status_label.setStyleSheet("color: green;") + QMessageBox.information(self, "Success", f"Model downloaded to:\n{local_path}") + + except ImportError: + self.rtdetr_status_label.setText("❌ Missing deps") + self.rtdetr_status_label.setStyleSheet("color: red;") + QMessageBox.critical(self, "Error", "Install: pip install huggingface-hub transformers") + except Exception as e: + self.rtdetr_status_label.setText("❌ Error") + self.rtdetr_status_label.setStyleSheet("color: red;") + QMessageBox.critical(self, "Error", f"Download failed: {e}") + + def _check_rtdetr_status(self): + """Check if model is already loaded""" + try: + from bubble_detector import BubbleDetector + + if hasattr(self.main_gui, 'manga_tab') and hasattr(self.main_gui.manga_tab, 'translator'): + translator = self.main_gui.manga_tab.translator + if hasattr(translator, 'bubble_detector') and translator.bubble_detector: + if getattr(translator.bubble_detector, 'rtdetr_onnx_loaded', False): + self.rtdetr_status_label.setText("✅ Loaded") + self.rtdetr_status_label.setStyleSheet("color: green;") + return True + if getattr(translator.bubble_detector, 'rtdetr_loaded', False): + self.rtdetr_status_label.setText("✅ Loaded") + self.rtdetr_status_label.setStyleSheet("color: green;") + return True + elif getattr(translator.bubble_detector, 'model_loaded', False): + self.rtdetr_status_label.setText("✅ Loaded") + self.rtdetr_status_label.setStyleSheet("color: green;") + return True + + self.rtdetr_status_label.setText("Not loaded") + self.rtdetr_status_label.setStyleSheet("color: gray;") + return False + + except ImportError: + self.rtdetr_status_label.setText("❌ Missing deps") + self.rtdetr_status_label.setStyleSheet("color: red;") + return False + except Exception: + self.rtdetr_status_label.setText("Not loaded") + self.rtdetr_status_label.setStyleSheet("color: gray;") + return False + + def _load_rtdetr_model(self): + """Load selected model""" + try: + from bubble_detector import BubbleDetector + from PySide6.QtWidgets import QApplication + + self.rtdetr_status_label.setText("Loading...") + self.rtdetr_status_label.setStyleSheet("color: orange;") + QApplication.processEvents() + + bd = BubbleDetector() + detector = self.detector_type_combo.currentText() + model_path = self.bubble_model_entry.text() + + if 'RTEDR_onnx' in detector: + # RT-DETR (ONNX) uses repo id directly + if bd.load_rtdetr_onnx_model(model_id=model_path): + self.rtdetr_status_label.setText("✅ Ready") + self.rtdetr_status_label.setStyleSheet("color: green;") + QMessageBox.information(self, "Success", f"RTEDR_onnx model loaded successfully!") + else: + self.rtdetr_status_label.setText("❌ Failed") + self.rtdetr_status_label.setStyleSheet("color: red;") + elif 'RT-DETR' in detector: + # RT-DETR uses model_id directly + if bd.load_rtdetr_model(model_id=model_path): + self.rtdetr_status_label.setText("✅ Ready") + self.rtdetr_status_label.setStyleSheet("color: green;") + QMessageBox.information(self, "Success", f"RT-DETR model loaded successfully!") + else: + self.rtdetr_status_label.setText("❌ Failed") + self.rtdetr_status_label.setStyleSheet("color: red;") + else: + # YOLOv8 - CHECK LOCAL MODELS FOLDER FIRST + if model_path.startswith('ogkalu/'): + # It's a HuggingFace ID - check if already downloaded + filename_map = { + 'ogkalu/comic-speech-bubble-detector-yolov8m': 'comic-speech-bubble-detector.pt', + 'ogkalu/comic-text-segmenter-yolov8m': 'comic-text-segmenter.pt', + 'ogkalu/manga-text-detector-yolov8s': 'manga-text-detector.pt' + } + + filename = filename_map.get(model_path, 'model.pt') + local_path = os.path.join('models', filename) + + # Check if it exists locally + if os.path.exists(local_path): + # Use the local file + model_path = local_path + self.bubble_model_entry.setText(local_path) # Update the field + else: + # Not downloaded yet + QMessageBox.warning(self, "Download Required", + f"Model not found locally.\nPlease download it first using the Download button.") + self.rtdetr_status_label.setText("❌ Not downloaded") + self.rtdetr_status_label.setStyleSheet("color: orange;") + return + + # Now model_path should be a local file + if not os.path.exists(model_path): + QMessageBox.critical(self, "Error", f"Model file not found: {model_path}") + self.rtdetr_status_label.setText("❌ File not found") + self.rtdetr_status_label.setStyleSheet("color: red;") + return + + # Load the YOLOv8 model from local file + if bd.load_model(model_path): + self.rtdetr_status_label.setText("✅ Ready") + self.rtdetr_status_label.setStyleSheet("color: green;") + QMessageBox.information(self, "Success", f"YOLOv8 model loaded successfully!") + + # Auto-convert to ONNX if enabled + if os.environ.get('AUTO_CONVERT_TO_ONNX', 'true').lower() == 'true': + onnx_path = model_path.replace('.pt', '.onnx') + if not os.path.exists(onnx_path): + if bd.convert_to_onnx(model_path, onnx_path): + logger.info(f"✅ Converted to ONNX: {onnx_path}") + else: + self.rtdetr_status_label.setText("❌ Failed") + self.rtdetr_status_label.setStyleSheet("color: red;") + + except ImportError: + self.rtdetr_status_label.setText("❌ Missing deps") + self.rtdetr_status_label.setStyleSheet("color: red;") + QMessageBox.critical(self, "Error", "Install transformers: pip install transformers") + except Exception as e: + self.rtdetr_status_label.setText("❌ Error") + self.rtdetr_status_label.setStyleSheet("color: red;") + QMessageBox.critical(self, "Error", f"Failed to load: {e}") + + def _toggle_bubble_controls(self): + """Enable/disable bubble detection controls""" + enabled = self.bubble_detection_enabled_checkbox.isChecked() + + if enabled: + # Enable controls + for widget in self.bubble_controls: + try: + widget.setEnabled(True) + except: + pass + + # Show/hide frames based on detector type + self._on_detector_type_changed() + else: + # Disable controls + for widget in self.bubble_controls: + try: + widget.setEnabled(False) + except: + pass + + # Hide frames + self.yolo_settings_group.setVisible(False) + self.bubble_status_label.setText("") + + def _browse_bubble_model(self): + """Browse for model file""" + path, _ = QFileDialog.getOpenFileName( + self, + "Select Model File", + "", + "Model files (*.pt *.pth *.bin *.safetensors);;All files (*.*)" + ) + + if path: + self.bubble_model_entry.setText(path) + self._update_bubble_status() + + def _clear_bubble_model(self): + """Clear selected model""" + self.bubble_model_entry.setText("") + self._update_bubble_status() + + def _update_bubble_status(self): + """Update bubble model status label""" + if not self.bubble_detection_enabled_checkbox.isChecked(): + self.bubble_status_label.setText("") + return + + detector = self.detector_type_combo.currentText() + model_path = self.bubble_model_entry.text() + + if not model_path: + self.bubble_status_label.setText("⚠️ No model selected") + self.bubble_status_label.setStyleSheet("color: orange;") + return + + if model_path.startswith("ogkalu/"): + self.bubble_status_label.setText(f"📥 {detector} ready to download") + self.bubble_status_label.setStyleSheet("color: blue;") + elif os.path.exists(model_path): + self.bubble_status_label.setText("✅ Model file ready") + self.bubble_status_label.setStyleSheet("color: green;") + else: + self.bubble_status_label.setText("❌ Model file not found") + self.bubble_status_label.setStyleSheet("color: red;") + + def _update_azure_label(self): + """Update Azure multiplier label""" + # This method is deprecated - Azure multiplier UI was removed + pass + + def _set_azure_multiplier(self, value): + """Set Azure multiplier from preset""" + # This method is deprecated - Azure multiplier UI was removed + pass + + def _create_advanced_tab(self): + """Create advanced settings tab with all options""" + # Create tab widget and add to tab widget + tab_widget = QWidget() + self.tab_widget.addTab(tab_widget, "Advanced") + + # Main scrollable content + main_layout = QVBoxLayout(tab_widget) + main_layout.setContentsMargins(5, 5, 5, 5) + main_layout.setSpacing(6) + + # Format detection + detect_group = QGroupBox("Format Detection") + main_layout.addWidget(detect_group) + detect_layout = QVBoxLayout(detect_group) + detect_layout.setContentsMargins(8, 8, 8, 6) + detect_layout.setSpacing(4) + + self.format_detection_checkbox = self._create_styled_checkbox("Enable automatic manga format detection (reading direction)") + self.format_detection_checkbox.setChecked(self.settings['advanced']['format_detection']) + detect_layout.addWidget(self.format_detection_checkbox) + + # Webtoon mode + webtoon_frame = QWidget() + webtoon_layout = QHBoxLayout(webtoon_frame) + webtoon_layout.setContentsMargins(0, 0, 0, 0) + detect_layout.addWidget(webtoon_frame) + + webtoon_label = QLabel("Webtoon Mode:") + webtoon_label.setMinimumWidth(150) + webtoon_layout.addWidget(webtoon_label) + + self.webtoon_mode_combo = QComboBox() + self.webtoon_mode_combo.addItems(['auto', 'enabled', 'disabled']) + self.webtoon_mode_combo.setCurrentText(self.settings['advanced']['webtoon_mode']) + webtoon_layout.addWidget(self.webtoon_mode_combo) + webtoon_layout.addStretch() + + # Debug settings + debug_group = QGroupBox("Debug Options") + main_layout.addWidget(debug_group) + debug_layout = QVBoxLayout(debug_group) + debug_layout.setContentsMargins(8, 8, 8, 6) + debug_layout.setSpacing(4) + + self.debug_mode_checkbox = self._create_styled_checkbox("Enable debug mode (verbose logging)") + self.debug_mode_checkbox.setChecked(self.settings['advanced']['debug_mode']) + debug_layout.addWidget(self.debug_mode_checkbox) + + # New: Concise pipeline logs (reduce noise) + self.concise_logs_checkbox = self._create_styled_checkbox("Concise pipeline logs (reduce noise)") + self.concise_logs_checkbox.setChecked(bool(self.settings.get('advanced', {}).get('concise_logs', True))) + def _save_concise(): + try: + if 'advanced' not in self.settings: + self.settings['advanced'] = {} + self.settings['advanced']['concise_logs'] = bool(self.concise_logs_checkbox.isChecked()) + if hasattr(self, 'config'): + self.config['manga_settings'] = self.settings + if hasattr(self.main_gui, 'save_config'): + self.main_gui.save_config(show_message=False) + except Exception: + pass + self.concise_logs_checkbox.toggled.connect(_save_concise) + debug_layout.addWidget(self.concise_logs_checkbox) + + self.save_intermediate_checkbox = self._create_styled_checkbox("Save intermediate images (preprocessed, detection overlays)") + self.save_intermediate_checkbox.setChecked(self.settings['advanced']['save_intermediate']) + debug_layout.addWidget(self.save_intermediate_checkbox) + + # Performance settings + perf_group = QGroupBox("Performance") + main_layout.addWidget(perf_group) + perf_layout = QVBoxLayout(perf_group) + perf_layout.setContentsMargins(8, 8, 8, 6) + perf_layout.setSpacing(4) + + # New: Parallel rendering (per-region overlays) + self.render_parallel_checkbox = self._create_styled_checkbox("Enable parallel rendering (per-region overlays)") + self.render_parallel_checkbox.setChecked(self.settings.get('advanced', {}).get('render_parallel', True)) + perf_layout.addWidget(self.render_parallel_checkbox) + + self.parallel_processing_checkbox = self._create_styled_checkbox("Enable parallel processing (experimental)") + self.parallel_processing_checkbox.setChecked(self.settings['advanced']['parallel_processing']) + self.parallel_processing_checkbox.toggled.connect(self._toggle_workers) + perf_layout.addWidget(self.parallel_processing_checkbox) + + # Max workers + workers_frame = QWidget() + workers_layout = QHBoxLayout(workers_frame) + workers_layout.setContentsMargins(0, 0, 0, 0) + perf_layout.addWidget(workers_frame) + + self.workers_label = QLabel("Max Workers:") + self.workers_label.setMinimumWidth(150) + workers_layout.addWidget(self.workers_label) + + self.max_workers_spinbox = QSpinBox() + self.max_workers_spinbox.setRange(1, 999) + self.max_workers_spinbox.setValue(self.settings['advanced']['max_workers']) + workers_layout.addWidget(self.max_workers_spinbox) + + self.workers_desc_label = QLabel("(threads for parallel processing)") + workers_layout.addWidget(self.workers_desc_label) + workers_layout.addStretch() + + # Initialize workers state + self._toggle_workers() + + # Memory management section + memory_group = QGroupBox("Memory Management") + main_layout.addWidget(memory_group) + memory_layout = QVBoxLayout(memory_group) + memory_layout.setContentsMargins(8, 8, 8, 6) + memory_layout.setSpacing(4) + + # Singleton mode checkbox - will connect handler later after panel widgets created + self.use_singleton_models_checkbox = self._create_styled_checkbox("Use single model instances (saves RAM, only affects local models)") + self.use_singleton_models_checkbox.setChecked(self.settings.get('advanced', {}).get('use_singleton_models', True)) + self.use_singleton_models_checkbox.toggled.connect(self._toggle_singleton_controls) + memory_layout.addWidget(self.use_singleton_models_checkbox) + + # Singleton note + singleton_note = QLabel( + "When enabled: One bubble detector & one inpainter shared across all images.\n" + "When disabled: Each thread/image can have its own models (uses more RAM).\n" + "✅ Batch API translation remains fully functional with singleton mode enabled." + ) + singleton_note_font = QFont('Arial', 9) + singleton_note.setFont(singleton_note_font) + singleton_note.setStyleSheet("color: gray;") + singleton_note.setWordWrap(True) + memory_layout.addWidget(singleton_note) + + self.auto_cleanup_models_checkbox = self._create_styled_checkbox("Automatically cleanup models after translation to free RAM") + self.auto_cleanup_models_checkbox.setChecked(self.settings.get('advanced', {}).get('auto_cleanup_models', False)) + memory_layout.addWidget(self.auto_cleanup_models_checkbox) + + # Unload models after translation (disabled by default) + self.unload_models_checkbox = self._create_styled_checkbox("Unload models after translation (reset translator instance)") + self.unload_models_checkbox.setChecked(self.settings.get('advanced', {}).get('unload_models_after_translation', False)) + memory_layout.addWidget(self.unload_models_checkbox) + + # Add a note about parallel processing + note_label = QLabel("Note: When parallel panel translation is enabled, cleanup happens after ALL panels complete.") + note_font = QFont('Arial', 9) + note_label.setFont(note_font) + note_label.setStyleSheet("color: gray;") + note_label.setWordWrap(True) + memory_layout.addWidget(note_label) + + # Panel-level parallel translation + panel_group = QGroupBox("Parallel Panel Translation") + main_layout.addWidget(panel_group) + panel_layout = QVBoxLayout(panel_group) + panel_layout.setContentsMargins(8, 8, 8, 6) + panel_layout.setSpacing(4) + + # New: Preload local inpainting for panels (default ON) + self.preload_local_panels_checkbox = self._create_styled_checkbox("Preload local inpainting instances for panel-parallel runs") + self.preload_local_panels_checkbox.setChecked(self.settings.get('advanced', {}).get('preload_local_inpainting_for_panels', True)) + panel_layout.addWidget(self.preload_local_panels_checkbox) + + self.parallel_panel_checkbox = self._create_styled_checkbox("Enable parallel panel translation (process multiple images concurrently)") + self.parallel_panel_checkbox.setChecked(self.settings.get('advanced', {}).get('parallel_panel_translation', False)) + self.parallel_panel_checkbox.toggled.connect(self._toggle_panel_controls) + panel_layout.addWidget(self.parallel_panel_checkbox) + + # Local LLM Performance (add to performance group) + inpaint_perf_group = QGroupBox("Local LLM Performance") + perf_layout.addWidget(inpaint_perf_group) + inpaint_perf_layout = QVBoxLayout(inpaint_perf_group) + inpaint_perf_layout.setContentsMargins(8, 8, 8, 6) + inpaint_perf_layout.setSpacing(4) + + # RT-DETR Concurrency (for memory optimization) + rtdetr_conc_widget = QWidget() + rtdetr_conc_layout = QHBoxLayout(rtdetr_conc_widget) + rtdetr_conc_layout.setContentsMargins(0, 0, 0, 0) + inpaint_perf_layout.addWidget(rtdetr_conc_widget) + self.rtdetr_conc_frame = rtdetr_conc_widget + + rtdetr_conc_label = QLabel("RT-DETR Concurrency:") + rtdetr_conc_label.setMinimumWidth(150) + rtdetr_conc_layout.addWidget(rtdetr_conc_label) + + self.rtdetr_max_concurrency_spinbox = QSpinBox() + self.rtdetr_max_concurrency_spinbox.setRange(1, 999) + self.rtdetr_max_concurrency_spinbox.setValue(self.settings['ocr'].get('rtdetr_max_concurrency', 12)) + self.rtdetr_max_concurrency_spinbox.setToolTip("Maximum concurrent RT-DETR region OCR calls (rate limiting handled via delays)") + rtdetr_conc_layout.addWidget(self.rtdetr_max_concurrency_spinbox) + + rtdetr_conc_desc = QLabel("parallel OCR calls (lower = less RAM)") + rtdetr_conc_desc_font = QFont('Arial', 9) + rtdetr_conc_desc.setFont(rtdetr_conc_desc_font) + rtdetr_conc_desc.setStyleSheet("color: gray;") + rtdetr_conc_layout.addWidget(rtdetr_conc_desc) + rtdetr_conc_layout.addStretch() + + # Initially hide RT-DETR concurrency control until we check detector type + self.rtdetr_conc_frame.setVisible(False) + + # Inpainting Concurrency + inpaint_bs_frame = QWidget() + inpaint_bs_layout = QHBoxLayout(inpaint_bs_frame) + inpaint_bs_layout.setContentsMargins(0, 0, 0, 0) + inpaint_perf_layout.addWidget(inpaint_bs_frame) + + inpaint_bs_label = QLabel("Inpainting Concurrency:") + inpaint_bs_label.setMinimumWidth(150) + inpaint_bs_layout.addWidget(inpaint_bs_label) + + self.inpaint_batch_size_spinbox = QSpinBox() + self.inpaint_batch_size_spinbox.setRange(1, 32) + self.inpaint_batch_size_spinbox.setValue(self.settings.get('inpainting', {}).get('batch_size', 10)) + inpaint_bs_layout.addWidget(self.inpaint_batch_size_spinbox) + + inpaint_bs_help = QLabel("(process multiple regions at once)") + inpaint_bs_help_font = QFont('Arial', 9) + inpaint_bs_help.setFont(inpaint_bs_help_font) + inpaint_bs_help.setStyleSheet("color: gray;") + inpaint_bs_layout.addWidget(inpaint_bs_help) + inpaint_bs_layout.addStretch() + + self.enable_cache_checkbox = self._create_styled_checkbox("Enable inpainting cache (speeds up repeated processing)") + self.enable_cache_checkbox.setChecked(self.settings.get('inpainting', {}).get('enable_cache', True)) + inpaint_perf_layout.addWidget(self.enable_cache_checkbox) + + # Max concurrent panels + panels_frame = QWidget() + panels_layout = QHBoxLayout(panels_frame) + panels_layout.setContentsMargins(0, 0, 0, 0) + panel_layout.addWidget(panels_frame) + + self.panels_label = QLabel("Max concurrent panels:") + self.panels_label.setMinimumWidth(150) + panels_layout.addWidget(self.panels_label) + + self.panel_max_workers_spinbox = QSpinBox() + self.panel_max_workers_spinbox.setRange(1, 12) + self.panel_max_workers_spinbox.setValue(self.settings.get('advanced', {}).get('panel_max_workers', 2)) + panels_layout.addWidget(self.panel_max_workers_spinbox) + panels_layout.addStretch() + + # Panel start stagger (ms) + stagger_frame = QWidget() + stagger_layout = QHBoxLayout(stagger_frame) + stagger_layout.setContentsMargins(0, 0, 0, 0) + panel_layout.addWidget(stagger_frame) + + self.stagger_label = QLabel("Panel start stagger:") + self.stagger_label.setMinimumWidth(150) + stagger_layout.addWidget(self.stagger_label) + + self.panel_stagger_ms_spinbox = QSpinBox() + self.panel_stagger_ms_spinbox.setRange(0, 1000) + self.panel_stagger_ms_spinbox.setValue(self.settings.get('advanced', {}).get('panel_start_stagger_ms', 30)) + stagger_layout.addWidget(self.panel_stagger_ms_spinbox) + + self.stagger_unit_label = QLabel("ms") + stagger_layout.addWidget(self.stagger_unit_label) + stagger_layout.addStretch() + + # Initialize panel controls state + self._toggle_panel_controls() + self._toggle_singleton_controls() + + # ONNX conversion settings + onnx_group = QGroupBox("ONNX Conversion") + main_layout.addWidget(onnx_group) + onnx_layout = QVBoxLayout(onnx_group) + onnx_layout.setContentsMargins(8, 8, 8, 6) + onnx_layout.setSpacing(4) + + self.auto_convert_onnx_checkbox = self._create_styled_checkbox("Auto-convert local models to ONNX for faster inference (recommended)") + self.auto_convert_onnx_checkbox.setChecked(self.settings['advanced'].get('auto_convert_to_onnx', False)) + onnx_layout.addWidget(self.auto_convert_onnx_checkbox) + + self.auto_convert_onnx_bg_checkbox = self._create_styled_checkbox("Convert in background (non-blocking; switches to ONNX when ready)") + self.auto_convert_onnx_bg_checkbox.setChecked(self.settings['advanced'].get('auto_convert_to_onnx_background', True)) + onnx_layout.addWidget(self.auto_convert_onnx_bg_checkbox) + + # Connect toggle handler + def _toggle_onnx_controls(): + self.auto_convert_onnx_bg_checkbox.setEnabled(self.auto_convert_onnx_checkbox.isChecked()) + self.auto_convert_onnx_checkbox.toggled.connect(_toggle_onnx_controls) + _toggle_onnx_controls() + + # Model memory optimization (quantization) + quant_group = QGroupBox("Model Memory Optimization") + main_layout.addWidget(quant_group) + quant_layout = QVBoxLayout(quant_group) + quant_layout.setContentsMargins(8, 8, 8, 6) + quant_layout.setSpacing(4) + + self.quantize_models_checkbox = self._create_styled_checkbox("Reduce RAM with quantized models (global switch)") + self.quantize_models_checkbox.setChecked(self.settings['advanced'].get('quantize_models', False)) + quant_layout.addWidget(self.quantize_models_checkbox) + + # ONNX quantize sub-toggle + onnx_quant_frame = QWidget() + onnx_quant_layout = QHBoxLayout(onnx_quant_frame) + onnx_quant_layout.setContentsMargins(0, 0, 0, 0) + quant_layout.addWidget(onnx_quant_frame) + + self.onnx_quantize_checkbox = self._create_styled_checkbox("Quantize ONNX models to INT8 (dynamic)") + self.onnx_quantize_checkbox.setChecked(self.settings['advanced'].get('onnx_quantize', False)) + onnx_quant_layout.addWidget(self.onnx_quantize_checkbox) + + onnx_quant_help = QLabel("(lower RAM/CPU; slight accuracy trade-off)") + onnx_quant_help_font = QFont('Arial', 9) + onnx_quant_help.setFont(onnx_quant_help_font) + onnx_quant_help.setStyleSheet("color: gray;") + onnx_quant_layout.addWidget(onnx_quant_help) + onnx_quant_layout.addStretch() + + # Torch precision dropdown + precision_frame = QWidget() + precision_layout = QHBoxLayout(precision_frame) + precision_layout.setContentsMargins(0, 0, 0, 0) + quant_layout.addWidget(precision_frame) + + precision_label = QLabel("Torch precision:") + precision_label.setMinimumWidth(150) + precision_layout.addWidget(precision_label) + + self.torch_precision_combo = QComboBox() + self.torch_precision_combo.addItems(['fp16', 'fp32', 'auto']) + self.torch_precision_combo.setCurrentText(self.settings['advanced'].get('torch_precision', 'fp16')) + precision_layout.addWidget(self.torch_precision_combo) + + precision_help = QLabel("(fp16 only, since fp32 is currently bugged)") + precision_help_font = QFont('Arial', 9) + precision_help.setFont(precision_help_font) + precision_help.setStyleSheet("color: gray;") + precision_layout.addWidget(precision_help) + precision_layout.addStretch() + + # Aggressive memory cleanup + cleanup_group = QGroupBox("Memory & Cleanup") + main_layout.addWidget(cleanup_group) + cleanup_layout = QVBoxLayout(cleanup_group) + cleanup_layout.setContentsMargins(8, 8, 8, 6) + cleanup_layout.setSpacing(4) + + self.force_deep_cleanup_checkbox = self._create_styled_checkbox("Force deep model cleanup after every image (slowest, lowest RAM)") + self.force_deep_cleanup_checkbox.setChecked(self.settings.get('advanced', {}).get('force_deep_cleanup_each_image', False)) + cleanup_layout.addWidget(self.force_deep_cleanup_checkbox) + + cleanup_help = QLabel("Also clears shared caches at batch end.") + cleanup_help_font = QFont('Arial', 9) + cleanup_help.setFont(cleanup_help_font) + cleanup_help.setStyleSheet("color: gray;") + cleanup_layout.addWidget(cleanup_help) + + # RAM cap controls + self.ram_cap_enabled_checkbox = self._create_styled_checkbox("Enable RAM cap") + self.ram_cap_enabled_checkbox.setChecked(self.settings.get('advanced', {}).get('ram_cap_enabled', False)) + cleanup_layout.addWidget(self.ram_cap_enabled_checkbox) + + # RAM cap value + ramcap_value_frame = QWidget() + ramcap_value_layout = QHBoxLayout(ramcap_value_frame) + ramcap_value_layout.setContentsMargins(0, 0, 0, 0) + cleanup_layout.addWidget(ramcap_value_frame) + + ramcap_value_label = QLabel("Max RAM (MB):") + ramcap_value_label.setMinimumWidth(150) + ramcap_value_layout.addWidget(ramcap_value_label) + + self.ram_cap_mb_spinbox = QSpinBox() + self.ram_cap_mb_spinbox.setRange(512, 131072) + self.ram_cap_mb_spinbox.setValue(int(self.settings.get('advanced', {}).get('ram_cap_mb', 0) or 0)) + ramcap_value_layout.addWidget(self.ram_cap_mb_spinbox) + + ramcap_value_help = QLabel("(0 = disabled)") + ramcap_value_help_font = QFont('Arial', 9) + ramcap_value_help.setFont(ramcap_value_help_font) + ramcap_value_help.setStyleSheet("color: gray;") + ramcap_value_layout.addWidget(ramcap_value_help) + ramcap_value_layout.addStretch() + + # RAM cap mode + ramcap_mode_frame = QWidget() + ramcap_mode_layout = QHBoxLayout(ramcap_mode_frame) + ramcap_mode_layout.setContentsMargins(0, 0, 0, 0) + cleanup_layout.addWidget(ramcap_mode_frame) + + ramcap_mode_label = QLabel("Cap mode:") + ramcap_mode_label.setMinimumWidth(150) + ramcap_mode_layout.addWidget(ramcap_mode_label) + + self.ram_cap_mode_combo = QComboBox() + self.ram_cap_mode_combo.addItems(['soft', 'hard (Windows only)']) + self.ram_cap_mode_combo.setCurrentText(self.settings.get('advanced', {}).get('ram_cap_mode', 'soft')) + ramcap_mode_layout.addWidget(self.ram_cap_mode_combo) + + ramcap_mode_help = QLabel("Soft = clean/trim, Hard = OS-enforced (may OOM)") + ramcap_mode_help_font = QFont('Arial', 9) + ramcap_mode_help.setFont(ramcap_mode_help_font) + ramcap_mode_help.setStyleSheet("color: gray;") + ramcap_mode_layout.addWidget(ramcap_mode_help) + ramcap_mode_layout.addStretch() + + # Advanced RAM gate tuning + gate_frame = QWidget() + gate_layout = QHBoxLayout(gate_frame) + gate_layout.setContentsMargins(0, 0, 0, 0) + cleanup_layout.addWidget(gate_frame) + + gate_label = QLabel("Gate timeout (sec):") + gate_label.setMinimumWidth(150) + gate_layout.addWidget(gate_label) + + self.ram_gate_timeout_spinbox = QDoubleSpinBox() + self.ram_gate_timeout_spinbox.setRange(2.0, 60.0) + self.ram_gate_timeout_spinbox.setSingleStep(0.5) + self.ram_gate_timeout_spinbox.setValue(float(self.settings.get('advanced', {}).get('ram_gate_timeout_sec', 10.0))) + gate_layout.addWidget(self.ram_gate_timeout_spinbox) + gate_layout.addStretch() + + # Gate floor + floor_frame = QWidget() + floor_layout = QHBoxLayout(floor_frame) + floor_layout.setContentsMargins(0, 0, 0, 0) + cleanup_layout.addWidget(floor_frame) + + floor_label = QLabel("Gate floor over baseline (MB):") + floor_label.setMinimumWidth(180) + floor_layout.addWidget(floor_label) + + self.ram_gate_floor_spinbox = QSpinBox() + self.ram_gate_floor_spinbox.setRange(64, 2048) + self.ram_gate_floor_spinbox.setValue(int(self.settings.get('advanced', {}).get('ram_min_floor_over_baseline_mb', 128))) + floor_layout.addWidget(self.ram_gate_floor_spinbox) + floor_layout.addStretch() + + # Update RT-DETR concurrency control visibility based on current detector type + # This is called after the Advanced tab is fully created to sync with OCR tab state + QTimer.singleShot(0, self._sync_rtdetr_concurrency_visibility) + + def _sync_rtdetr_concurrency_visibility(self): + """Sync RT-DETR concurrency control visibility with detector type selection""" + if hasattr(self, 'detector_type_combo') and hasattr(self, 'rtdetr_conc_frame'): + detector = self.detector_type_combo.currentText() + is_rtdetr = 'RT-DETR' in detector or 'RTEDR_onnx' in detector + self.rtdetr_conc_frame.setVisible(is_rtdetr) + + def _toggle_workers(self): + """Enable/disable worker settings based on parallel processing toggle""" + if hasattr(self, 'parallel_processing_checkbox'): + enabled = bool(self.parallel_processing_checkbox.isChecked()) + if hasattr(self, 'max_workers_spinbox'): + self.max_workers_spinbox.setEnabled(enabled) + if hasattr(self, 'workers_label'): + self.workers_label.setEnabled(enabled) + self.workers_label.setStyleSheet("color: white;" if enabled else "color: gray;") + if hasattr(self, 'workers_desc_label'): + self.workers_desc_label.setEnabled(enabled) + self.workers_desc_label.setStyleSheet("color: white;" if enabled else "color: gray;") + + def _toggle_singleton_controls(self): + """Enable/disable parallel panel translation based on singleton toggle.""" + # When singleton mode is ENABLED, parallel panel translation should be DISABLED + try: + singleton_enabled = bool(self.use_singleton_models_checkbox.isChecked()) + except Exception: + singleton_enabled = True # Default + + # Disable parallel panel checkbox when singleton is enabled + if hasattr(self, 'parallel_panel_checkbox'): + self.parallel_panel_checkbox.setEnabled(not singleton_enabled) + if singleton_enabled: + # Also gray out the label when disabled + pass # The checkbox itself shows as disabled + + def _toggle_panel_controls(self): + """Enable/disable panel control fields based on parallel panel toggle.""" + try: + enabled = bool(self.parallel_panel_checkbox.isChecked()) + except Exception: + enabled = False + + # Enable/disable panel control widgets and their labels + panel_widgets = [ + ('panel_max_workers_spinbox', 'panels_label'), + ('panel_stagger_ms_spinbox', 'stagger_label', 'stagger_unit_label'), + ('preload_local_panels_checkbox',) # Add preload checkbox + ] + + for widget_names in panel_widgets: + for widget_name in widget_names: + try: + widget = getattr(self, widget_name, None) + if widget is not None: + # Just use setEnabled() - stylesheet handles visuals + widget.setEnabled(enabled) + except Exception: + pass + + def _apply_defaults_to_controls(self): + """Apply default values to all visible Tk variables/controls across tabs without rebuilding the dialog.""" + try: + # Use current in-memory settings (which we set to defaults above) + s = self.settings if isinstance(getattr(self, 'settings', None), dict) else self.default_settings + pre = s.get('preprocessing', {}) + comp = s.get('compression', {}) + ocr = s.get('ocr', {}) + adv = s.get('advanced', {}) + inp = s.get('inpainting', {}) + font = s.get('font_sizing', {}) + + # Preprocessing + if hasattr(self, 'preprocess_enabled'): self.preprocess_enabled.set(bool(pre.get('enabled', False))) + if hasattr(self, 'auto_detect'): self.auto_detect.set(bool(pre.get('auto_detect_quality', True))) + if hasattr(self, 'contrast_threshold'): self.contrast_threshold.set(float(pre.get('contrast_threshold', 0.4))) + if hasattr(self, 'sharpness_threshold'): self.sharpness_threshold.set(float(pre.get('sharpness_threshold', 0.3))) + if hasattr(self, 'enhancement_strength'): self.enhancement_strength.set(float(pre.get('enhancement_strength', 1.5))) + if hasattr(self, 'noise_threshold'): self.noise_threshold.set(int(pre.get('noise_threshold', 20))) + if hasattr(self, 'denoise_strength'): self.denoise_strength.set(int(pre.get('denoise_strength', 10))) + if hasattr(self, 'max_dimension'): self.max_dimension.set(int(pre.get('max_image_dimension', 2000))) + if hasattr(self, 'max_pixels'): self.max_pixels.set(int(pre.get('max_image_pixels', 2000000))) + if hasattr(self, 'chunk_height'): self.chunk_height.set(int(pre.get('chunk_height', 1000))) + if hasattr(self, 'chunk_overlap'): self.chunk_overlap.set(int(pre.get('chunk_overlap', 100))) + # Compression + if hasattr(self, 'compression_enabled_var'): self.compression_enabled_var.set(bool(comp.get('enabled', False))) + if hasattr(self, 'compression_format_var'): self.compression_format_var.set(str(comp.get('format', 'jpeg'))) + if hasattr(self, 'jpeg_quality_var'): self.jpeg_quality_var.set(int(comp.get('jpeg_quality', 85))) + if hasattr(self, 'png_level_var'): self.png_level_var.set(int(comp.get('png_compress_level', 6))) + if hasattr(self, 'webp_quality_var'): self.webp_quality_var.set(int(comp.get('webp_quality', 85))) + # Tiling + if hasattr(self, 'inpaint_tiling_enabled'): self.inpaint_tiling_enabled.set(bool(pre.get('inpaint_tiling_enabled', False))) + if hasattr(self, 'inpaint_tile_size'): self.inpaint_tile_size.set(int(pre.get('inpaint_tile_size', 512))) + if hasattr(self, 'inpaint_tile_overlap'): self.inpaint_tile_overlap.set(int(pre.get('inpaint_tile_overlap', 64))) + + # OCR basic + if hasattr(self, 'confidence_threshold'): self.confidence_threshold.set(float(ocr.get('confidence_threshold', 0.7))) + if hasattr(self, 'detection_mode'): self.detection_mode.set(str(ocr.get('text_detection_mode', 'document'))) + if hasattr(self, 'merge_nearby_threshold'): self.merge_nearby_threshold.set(int(ocr.get('merge_nearby_threshold', 20))) + if hasattr(self, 'enable_rotation'): self.enable_rotation.set(bool(ocr.get('enable_rotation_correction', True))) + + # Language checkboxes + try: + if hasattr(self, 'lang_vars') and isinstance(self.lang_vars, dict): + langs = set(ocr.get('language_hints', ['ja', 'ko', 'zh'])) + for code, var in self.lang_vars.items(): + var.set(code in langs) + except Exception: + pass + + # OCR batching/locality + if hasattr(self, 'ocr_batch_enabled_var'): self.ocr_batch_enabled_var.set(bool(ocr.get('ocr_batch_enabled', True))) + if hasattr(self, 'ocr_batch_size_var'): self.ocr_batch_size_var.set(int(ocr.get('ocr_batch_size', 8))) + if hasattr(self, 'ocr_max_conc_var'): self.ocr_max_conc_var.set(int(ocr.get('ocr_max_concurrency', 2))) + if hasattr(self, 'roi_locality_var'): self.roi_locality_var.set(bool(ocr.get('roi_locality_enabled', False))) + if hasattr(self, 'roi_padding_ratio_var'): self.roi_padding_ratio_var.set(float(ocr.get('roi_padding_ratio', 0.08))) + if hasattr(self, 'roi_min_side_var'): self.roi_min_side_var.set(int(ocr.get('roi_min_side_px', 12))) + if hasattr(self, 'roi_min_area_var'): self.roi_min_area_var.set(int(ocr.get('roi_min_area_px', 100))) + if hasattr(self, 'roi_max_side_var'): self.roi_max_side_var.set(int(ocr.get('roi_max_side', 0))) + + # English filters + if hasattr(self, 'exclude_english_var'): self.exclude_english_var.set(bool(ocr.get('exclude_english_text', False))) + if hasattr(self, 'english_exclude_threshold'): self.english_exclude_threshold.set(float(ocr.get('english_exclude_threshold', 0.7))) + if hasattr(self, 'english_exclude_min_chars'): self.english_exclude_min_chars.set(int(ocr.get('english_exclude_min_chars', 4))) + if hasattr(self, 'english_exclude_short_tokens'): self.english_exclude_short_tokens.set(bool(ocr.get('english_exclude_short_tokens', False))) + + # Azure + if hasattr(self, 'azure_merge_multiplier'): self.azure_merge_multiplier.set(float(ocr.get('azure_merge_multiplier', 3.0))) + if hasattr(self, 'azure_reading_order'): self.azure_reading_order.set(str(ocr.get('azure_reading_order', 'natural'))) + if hasattr(self, 'azure_model_version'): self.azure_model_version.set(str(ocr.get('azure_model_version', 'latest'))) + if hasattr(self, 'azure_max_wait'): self.azure_max_wait.set(int(ocr.get('azure_max_wait', 60))) + if hasattr(self, 'azure_poll_interval'): self.azure_poll_interval.set(float(ocr.get('azure_poll_interval', 0.5))) + try: + self._update_azure_label() + except Exception: + pass + + # Bubble detector + if hasattr(self, 'bubble_detection_enabled'): self.bubble_detection_enabled.set(bool(ocr.get('bubble_detection_enabled', False))) + # Detector type mapping to UI labels + if hasattr(self, 'detector_type'): + dt = str(ocr.get('detector_type', 'rtdetr_onnx')) + if dt == 'rtdetr_onnx': self.detector_type.set('RTEDR_onnx') + elif dt == 'rtdetr': self.detector_type.set('RT-DETR') + elif dt == 'yolo': self.detector_type.set('YOLOv8 Speech') + elif dt == 'custom': self.detector_type.set('Custom Model') + else: self.detector_type.set('RTEDR_onnx') + if hasattr(self, 'bubble_model_path'): self.bubble_model_path.set(str(ocr.get('bubble_model_path', ''))) + if hasattr(self, 'bubble_confidence'): self.bubble_confidence.set(float(ocr.get('bubble_confidence', 0.5))) + if hasattr(self, 'detect_empty_bubbles'): self.detect_empty_bubbles.set(bool(ocr.get('detect_empty_bubbles', True))) + if hasattr(self, 'detect_text_bubbles'): self.detect_text_bubbles.set(bool(ocr.get('detect_text_bubbles', True))) + if hasattr(self, 'detect_free_text'): self.detect_free_text.set(bool(ocr.get('detect_free_text', True))) + if hasattr(self, 'bubble_max_det_yolo_var'): self.bubble_max_det_yolo_var.set(int(ocr.get('bubble_max_detections_yolo', 100))) + + # Inpainting + if hasattr(self, 'inpaint_batch_size'): self.inpaint_batch_size.set(int(inp.get('batch_size', 1))) + if hasattr(self, 'enable_cache_var'): self.enable_cache_var.set(bool(inp.get('enable_cache', True))) + if hasattr(self, 'mask_dilation_var'): self.mask_dilation_var.set(int(s.get('mask_dilation', 0))) + if hasattr(self, 'use_all_iterations_var'): self.use_all_iterations_var.set(bool(s.get('use_all_iterations', True))) + if hasattr(self, 'all_iterations_var'): self.all_iterations_var.set(int(s.get('all_iterations', 2))) + if hasattr(self, 'text_bubble_iterations_var'): self.text_bubble_iterations_var.set(int(s.get('text_bubble_dilation_iterations', 2))) + if hasattr(self, 'empty_bubble_iterations_var'): self.empty_bubble_iterations_var.set(int(s.get('empty_bubble_dilation_iterations', 3))) + if hasattr(self, 'free_text_iterations_var'): self.free_text_iterations_var.set(int(s.get('free_text_dilation_iterations', 0))) + + # Advanced + if hasattr(self, 'format_detection'): self.format_detection.set(1 if adv.get('format_detection', True) else 0) + if hasattr(self, 'webtoon_mode'): self.webtoon_mode.set(str(adv.get('webtoon_mode', 'auto'))) + if hasattr(self, 'debug_mode'): self.debug_mode.set(1 if adv.get('debug_mode', False) else 0) + if hasattr(self, 'save_intermediate'): self.save_intermediate.set(1 if adv.get('save_intermediate', False) else 0) + if hasattr(self, 'parallel_processing'): self.parallel_processing.set(1 if adv.get('parallel_processing', False) else 0) + if hasattr(self, 'max_workers'): self.max_workers.set(int(adv.get('max_workers', 4))) + if hasattr(self, 'use_singleton_models'): self.use_singleton_models.set(bool(adv.get('use_singleton_models', True))) + if hasattr(self, 'auto_cleanup_models'): self.auto_cleanup_models.set(bool(adv.get('auto_cleanup_models', False))) + if hasattr(self, 'unload_models_var'): self.unload_models_var.set(bool(adv.get('unload_models_after_translation', False))) + if hasattr(self, 'parallel_panel_var'): self.parallel_panel_var.set(bool(adv.get('parallel_panel_translation', False))) + if hasattr(self, 'panel_max_workers_var'): self.panel_max_workers_var.set(int(adv.get('panel_max_workers', 2))) + if hasattr(self, 'panel_stagger_ms_var'): self.panel_stagger_ms_var.set(int(adv.get('panel_start_stagger_ms', 30))) + # New: preload local inpainting for parallel panels (default True) + if hasattr(self, 'preload_local_panels_var'): self.preload_local_panels_var.set(bool(adv.get('preload_local_inpainting_for_panels', True))) + if hasattr(self, 'auto_convert_onnx_var'): self.auto_convert_onnx_var.set(bool(adv.get('auto_convert_to_onnx', False))) + if hasattr(self, 'auto_convert_onnx_bg_var'): self.auto_convert_onnx_bg_var.set(bool(adv.get('auto_convert_to_onnx_background', True))) + if hasattr(self, 'quantize_models_var'): self.quantize_models_var.set(bool(adv.get('quantize_models', False))) + if hasattr(self, 'onnx_quantize_var'): self.onnx_quantize_var.set(bool(adv.get('onnx_quantize', False))) + if hasattr(self, 'torch_precision_var'): self.torch_precision_var.set(str(adv.get('torch_precision', 'auto'))) + + # Font sizing tab + if hasattr(self, 'font_algorithm_var'): self.font_algorithm_var.set(str(font.get('algorithm', 'smart'))) + if hasattr(self, 'min_font_size_var'): self.min_font_size_var.set(int(font.get('min_size', 10))) + if hasattr(self, 'max_font_size_var'): self.max_font_size_var.set(int(font.get('max_size', 40))) + if hasattr(self, 'min_readable_var'): self.min_readable_var.set(int(font.get('min_readable', 14))) + if hasattr(self, 'prefer_larger_var'): self.prefer_larger_var.set(bool(font.get('prefer_larger', True))) + if hasattr(self, 'bubble_size_factor_var'): self.bubble_size_factor_var.set(bool(font.get('bubble_size_factor', True))) + if hasattr(self, 'line_spacing_var'): self.line_spacing_var.set(float(font.get('line_spacing', 1.3))) + if hasattr(self, 'max_lines_var'): self.max_lines_var.set(int(font.get('max_lines', 10))) + try: + if hasattr(self, '_on_font_mode_change'): + self._on_font_mode_change() + except Exception: + pass + + # Rendering controls (if present in this dialog) + if hasattr(self, 'font_size_mode_var'): self.font_size_mode_var.set(str(s.get('rendering', {}).get('font_size_mode', 'auto'))) + if hasattr(self, 'fixed_font_size_var'): self.fixed_font_size_var.set(int(s.get('rendering', {}).get('fixed_font_size', 16))) + if hasattr(self, 'font_scale_var'): self.font_scale_var.set(float(s.get('rendering', {}).get('font_scale', 1.0))) + if hasattr(self, 'auto_fit_style_var'): self.auto_fit_style_var.set(str(s.get('rendering', {}).get('auto_fit_style', 'balanced'))) + + # Cloud API tab + if hasattr(self, 'cloud_model_var'): self.cloud_model_var.set(str(s.get('cloud_inpaint_model', 'ideogram-v2'))) + if hasattr(self, 'custom_version_var'): self.custom_version_var.set(str(s.get('cloud_custom_version', ''))) + if hasattr(self, 'cloud_prompt_var'): self.cloud_prompt_var.set(str(s.get('cloud_inpaint_prompt', 'clean background, smooth surface'))) + if hasattr(self, 'cloud_negative_prompt_var'): self.cloud_negative_prompt_var.set(str(s.get('cloud_negative_prompt', 'text, writing, letters'))) + if hasattr(self, 'cloud_steps_var'): self.cloud_steps_var.set(int(s.get('cloud_inference_steps', 20))) + if hasattr(self, 'cloud_timeout_var'): self.cloud_timeout_var.set(int(s.get('cloud_timeout', 60))) + + # Trigger dependent UI updates + try: + self._toggle_preprocessing() + except Exception: + pass + try: + if hasattr(self, '_on_cloud_model_change'): + self._on_cloud_model_change() + except Exception: + pass + try: + self._toggle_iteration_controls() + except Exception: + pass + try: + self._toggle_roi_locality_controls() + except Exception: + pass + try: + self._toggle_workers() + except Exception: + pass + + # Build/attach advanced control for local inpainting preload if not present + try: + if not hasattr(self, 'preload_local_panels_var') and hasattr(self, '_create_advanced_tab_ui'): + # If there is a helper to build advanced UI, we rely on it. Otherwise, attach to existing advanced frame if available. + pass + except Exception: + pass + try: + if hasattr(self, 'compression_format_combo'): + self._toggle_compression_format() + except Exception: + pass + try: + if hasattr(self, 'detector_type'): + self._on_detector_type_changed() + except Exception: + pass + try: + self.dialog.update_idletasks() + except Exception: + pass + except Exception: + # Best-effort application only + pass + + + def _set_font_preset(self, preset: str): + """Apply font sizing preset""" + if preset == 'small': + # For manga with small bubbles + self.font_algorithm_var.set('conservative') + self.min_font_size_var.set(8) + self.max_font_size_var.set(24) + self.min_readable_var.set(12) + self.prefer_larger_var.set(False) + self.bubble_size_factor_var.set(True) + self.line_spacing_var.set(1.2) + self.max_lines_var.set(8) + elif preset == 'balanced': + # Default balanced settings + self.font_algorithm_var.set('smart') + self.min_font_size_var.set(10) + self.max_font_size_var.set(40) + self.min_readable_var.set(14) + self.prefer_larger_var.set(True) + self.bubble_size_factor_var.set(True) + self.line_spacing_var.set(1.3) + self.max_lines_var.set(10) + elif preset == 'large': + # For maximum readability + self.font_algorithm_var.set('aggressive') + self.min_font_size_var.set(14) + self.max_font_size_var.set(50) + self.min_readable_var.set(16) + self.prefer_larger_var.set(True) + self.bubble_size_factor_var.set(False) + self.line_spacing_var.set(1.4) + self.max_lines_var.set(12) + + + def _save_rendering_settings(self, *args): + """Auto-save font and rendering settings when controls change""" + # Don't save during initialization + if hasattr(self, '_initializing') and self._initializing: + return + + try: + # Ensure rendering section exists in settings + if 'rendering' not in self.settings: + self.settings['rendering'] = {} + + # Save font size controls if they exist + if hasattr(self, 'font_size_mode_var'): + self.settings['rendering']['font_size_mode'] = self.font_size_mode_var.get() + self.settings['rendering']['fixed_font_size'] = self.fixed_font_size_var.get() + self.settings['rendering']['font_scale'] = self.font_scale_var.get() + self.settings['rendering']['auto_fit_style'] = self.auto_fit_style_var.get() + + # Save min/max for auto mode + if hasattr(self, 'min_font_size_var'): + self.settings['rendering']['auto_min_size'] = self.min_font_size_var.get() + if hasattr(self, 'max_font_size_var'): + self.settings['rendering']['auto_max_size'] = self.max_font_size_var.get() + + # Update config + self.config['manga_settings'] = self.settings + + # Mirror only auto max to top-level config for backward compatibility; keep min nested + try: + auto_max = self.settings.get('rendering', {}).get('auto_max_size', None) + if auto_max is not None: + self.config['manga_max_font_size'] = int(auto_max) + except Exception: + pass + + # Save to file immediately + if hasattr(self.main_gui, 'save_config'): + self.main_gui.save_config() + print(f"Auto-saved rendering settings") + time.sleep(0.1) # Brief pause for stability + print("💤 Auto-save pausing briefly for stability") + + except Exception as e: + print(f"Error auto-saving rendering settings: {e}") + + def _save_settings(self): + """Save all settings including expanded iteration controls""" + try: + # Collect all preprocessing settings + self.settings['preprocessing']['enabled'] = self.preprocess_enabled.isChecked() + self.settings['preprocessing']['auto_detect_quality'] = self.auto_detect.isChecked() + self.settings['preprocessing']['contrast_threshold'] = self.contrast_threshold.value() + self.settings['preprocessing']['sharpness_threshold'] = self.sharpness_threshold.value() + self.settings['preprocessing']['enhancement_strength'] = self.enhancement_strength.value() + self.settings['preprocessing']['noise_threshold'] = self.noise_threshold.value() + self.settings['preprocessing']['denoise_strength'] = self.denoise_strength.value() + self.settings['preprocessing']['max_image_dimension'] = self.dimension_spinbox.value() + self.settings['preprocessing']['max_image_pixels'] = self.pixels_spinbox.value() + self.settings['preprocessing']['chunk_height'] = self.chunk_height_spinbox.value() + self.settings['preprocessing']['chunk_overlap'] = self.chunk_overlap_spinbox.value() + + # Compression (saved separately from preprocessing) + if 'compression' not in self.settings: + self.settings['compression'] = {} + self.settings['compression']['enabled'] = bool(self.compression_enabled.isChecked()) + self.settings['compression']['format'] = str(self.compression_format_combo.currentText()) + self.settings['compression']['jpeg_quality'] = int(self.jpeg_quality_spin.value()) + self.settings['compression']['png_compress_level'] = int(self.png_level_spin.value()) + self.settings['compression']['webp_quality'] = int(self.webp_quality_spin.value()) + + # TILING SETTINGS - save under preprocessing (primary) and mirror under 'tiling' for backward compatibility + self.settings['preprocessing']['inpaint_tiling_enabled'] = self.inpaint_tiling_enabled.isChecked() + self.settings['preprocessing']['inpaint_tile_size'] = self.tile_size_spinbox.value() + self.settings['preprocessing']['inpaint_tile_overlap'] = self.tile_overlap_spinbox.value() + # Back-compat mirror + self.settings['tiling'] = { + 'enabled': self.inpaint_tiling_enabled.isChecked(), + 'tile_size': self.tile_size_spinbox.value(), + 'tile_overlap': self.tile_overlap_spinbox.value() + } + + # OCR settings + self.settings['ocr']['language_hints'] = [code for code, checkbox in self.lang_checkboxes.items() if checkbox.isChecked()] + self.settings['ocr']['confidence_threshold'] = self.confidence_threshold_slider.value() / 100.0 + self.settings['ocr']['text_detection_mode'] = self.detection_mode_combo.currentText() + self.settings['ocr']['merge_nearby_threshold'] = self.merge_nearby_threshold_spinbox.value() + self.settings['ocr']['enable_rotation_correction'] = self.enable_rotation_checkbox.isChecked() + self.settings['ocr']['azure_merge_multiplier'] = self.azure_merge_multiplier_slider.value() / 100.0 + self.settings['ocr']['azure_reading_order'] = self.azure_reading_order_combo.currentText() + self.settings['ocr']['azure_model_version'] = self.azure_model_version_combo.currentText() + self.settings['ocr']['azure_max_wait'] = self.azure_max_wait_spinbox.value() + self.settings['ocr']['azure_poll_interval'] = self.azure_poll_interval_slider.value() / 100.0 + self.settings['ocr']['min_text_length'] = self.min_text_length_spinbox.value() + self.settings['ocr']['exclude_english_text'] = self.exclude_english_checkbox.isChecked() + + # OCR batching & locality + self.settings['ocr']['ocr_batch_enabled'] = bool(self.ocr_batch_enabled_checkbox.isChecked()) + self.settings['ocr']['ocr_batch_size'] = int(self.ocr_batch_size_spinbox.value()) + self.settings['ocr']['ocr_max_concurrency'] = int(self.ocr_max_conc_spinbox.value()) + self.settings['ocr']['roi_locality_enabled'] = bool(self.roi_locality_checkbox.isChecked()) + self.settings['ocr']['roi_padding_ratio'] = float(self.roi_padding_ratio_slider.value() / 100.0) + self.settings['ocr']['roi_min_side_px'] = int(self.roi_min_side_spinbox.value()) + self.settings['ocr']['roi_min_area_px'] = int(self.roi_min_area_spinbox.value()) + self.settings['ocr']['roi_max_side'] = int(self.roi_max_side_spinbox.value()) + self.settings['ocr']['english_exclude_threshold'] = self.english_exclude_threshold_slider.value() / 100.0 + self.settings['ocr']['english_exclude_min_chars'] = self.english_exclude_min_chars_spinbox.value() + self.settings['ocr']['english_exclude_short_tokens'] = self.english_exclude_short_tokens_checkbox.isChecked() + + # Bubble detection settings + self.settings['ocr']['bubble_detection_enabled'] = self.bubble_detection_enabled_checkbox.isChecked() + self.settings['ocr']['use_rtdetr_for_ocr_regions'] = self.use_rtdetr_for_ocr_checkbox.isChecked() # NEW: RT-DETR for OCR guidance + self.settings['ocr']['bubble_model_path'] = self.bubble_model_entry.text() + self.settings['ocr']['bubble_confidence'] = self.bubble_conf_slider.value() / 100.0 + self.settings['ocr']['rtdetr_confidence'] = self.bubble_conf_slider.value() / 100.0 + self.settings['ocr']['detect_empty_bubbles'] = self.detect_empty_bubbles_checkbox.isChecked() + self.settings['ocr']['detect_text_bubbles'] = self.detect_text_bubbles_checkbox.isChecked() + self.settings['ocr']['detect_free_text'] = self.detect_free_text_checkbox.isChecked() + self.settings['ocr']['rtdetr_model_url'] = self.bubble_model_entry.text() + self.settings['ocr']['bubble_max_detections_yolo'] = int(self.bubble_max_det_yolo_spinbox.value()) + self.settings['ocr']['rtdetr_max_concurrency'] = int(self.rtdetr_max_concurrency_spinbox.value()) + + # Save the detector type properly + detector_display = self.detector_type_combo.currentText() + if 'RTEDR_onnx' in detector_display or 'ONNX' in detector_display.upper(): + self.settings['ocr']['detector_type'] = 'rtdetr_onnx' + elif 'RT-DETR' in detector_display: + self.settings['ocr']['detector_type'] = 'rtdetr' + elif 'YOLOv8' in detector_display: + self.settings['ocr']['detector_type'] = 'yolo' + elif detector_display == 'Custom Model': + self.settings['ocr']['detector_type'] = 'custom' + self.settings['ocr']['custom_model_path'] = self.bubble_model_entry.text() + else: + self.settings['ocr']['detector_type'] = 'rtdetr_onnx' + + # Inpainting settings + if 'inpainting' not in self.settings: + self.settings['inpainting'] = {} + self.settings['inpainting']['batch_size'] = self.inpaint_batch_size_spinbox.value() + self.settings['inpainting']['enable_cache'] = self.enable_cache_checkbox.isChecked() + + # Save all dilation settings + self.settings['mask_dilation'] = self.mask_dilation_spinbox.value() + self.settings['use_all_iterations'] = self.use_all_iterations_checkbox.isChecked() + self.settings['all_iterations'] = self.all_iterations_spinbox.value() + self.settings['text_bubble_dilation_iterations'] = self.text_bubble_iter_spinbox.value() + self.settings['empty_bubble_dilation_iterations'] = self.empty_bubble_iter_spinbox.value() + self.settings['free_text_dilation_iterations'] = self.free_text_iter_spinbox.value() + self.settings['auto_iterations'] = self.auto_iterations_checkbox.isChecked() + + # Legacy support + self.settings['bubble_dilation_iterations'] = self.text_bubble_iter_spinbox.value() + self.settings['dilation_iterations'] = self.text_bubble_iter_spinbox.value() + + # Advanced settings + self.settings['advanced']['format_detection'] = bool(self.format_detection_checkbox.isChecked()) + self.settings['advanced']['webtoon_mode'] = self.webtoon_mode_combo.currentText() + self.settings['advanced']['debug_mode'] = bool(self.debug_mode_checkbox.isChecked()) + self.settings['advanced']['save_intermediate'] = bool(self.save_intermediate_checkbox.isChecked()) + self.settings['advanced']['parallel_processing'] = bool(self.parallel_processing_checkbox.isChecked()) + self.settings['advanced']['max_workers'] = self.max_workers_spinbox.value() + + # Save HD strategy settings + self.settings['advanced']['hd_strategy'] = str(self.hd_strategy_combo.currentText()) + self.settings['advanced']['hd_strategy_resize_limit'] = int(self.hd_resize_limit_spin.value()) + self.settings['advanced']['hd_strategy_crop_margin'] = int(self.hd_crop_margin_spin.value()) + self.settings['advanced']['hd_strategy_crop_trigger_size'] = int(self.hd_crop_trigger_spin.value()) + # Also reflect into environment for immediate effect in this session + os.environ['HD_STRATEGY'] = self.settings['advanced']['hd_strategy'] + os.environ['HD_RESIZE_LIMIT'] = str(self.settings['advanced']['hd_strategy_resize_limit']) + os.environ['HD_CROP_MARGIN'] = str(self.settings['advanced']['hd_strategy_crop_margin']) + os.environ['HD_CROP_TRIGGER'] = str(self.settings['advanced']['hd_strategy_crop_trigger_size']) + + # Save parallel rendering toggle + if hasattr(self, 'render_parallel_checkbox'): + self.settings['advanced']['render_parallel'] = bool(self.render_parallel_checkbox.isChecked()) + + # Panel-level parallel translation settings + self.settings['advanced']['parallel_panel_translation'] = bool(self.parallel_panel_checkbox.isChecked()) + self.settings['advanced']['panel_max_workers'] = int(self.panel_max_workers_spinbox.value()) + self.settings['advanced']['panel_start_stagger_ms'] = int(self.panel_stagger_ms_spinbox.value()) + # New: preload local inpainting for panels + if hasattr(self, 'preload_local_panels_checkbox'): + self.settings['advanced']['preload_local_inpainting_for_panels'] = bool(self.preload_local_panels_checkbox.isChecked()) + + # Memory management settings + self.settings['advanced']['use_singleton_models'] = bool(self.use_singleton_models_checkbox.isChecked()) + self.settings['advanced']['auto_cleanup_models'] = bool(self.auto_cleanup_models_checkbox.isChecked()) + self.settings['advanced']['unload_models_after_translation'] = bool(self.unload_models_checkbox.isChecked() if hasattr(self, 'unload_models_checkbox') else False) + + # ONNX auto-convert settings (persist and apply to environment) + if hasattr(self, 'auto_convert_onnx_checkbox'): + self.settings['advanced']['auto_convert_to_onnx'] = bool(self.auto_convert_onnx_checkbox.isChecked()) + os.environ['AUTO_CONVERT_TO_ONNX'] = 'true' if self.auto_convert_onnx_checkbox.isChecked() else 'false' + if hasattr(self, 'auto_convert_onnx_bg_checkbox'): + self.settings['advanced']['auto_convert_to_onnx_background'] = bool(self.auto_convert_onnx_bg_checkbox.isChecked()) + os.environ['AUTO_CONVERT_TO_ONNX_BACKGROUND'] = 'true' if self.auto_convert_onnx_bg_checkbox.isChecked() else 'false' + + # Quantization toggles and precision + if hasattr(self, 'quantize_models_checkbox'): + self.settings['advanced']['quantize_models'] = bool(self.quantize_models_checkbox.isChecked()) + os.environ['MODEL_QUANTIZE'] = 'true' if self.quantize_models_checkbox.isChecked() else 'false' + if hasattr(self, 'onnx_quantize_checkbox'): + self.settings['advanced']['onnx_quantize'] = bool(self.onnx_quantize_checkbox.isChecked()) + os.environ['ONNX_QUANTIZE'] = 'true' if self.onnx_quantize_checkbox.isChecked() else 'false' + if hasattr(self, 'torch_precision_combo'): + self.settings['advanced']['torch_precision'] = str(self.torch_precision_combo.currentText()) + os.environ['TORCH_PRECISION'] = self.settings['advanced']['torch_precision'] + + # Memory cleanup toggle + if hasattr(self, 'force_deep_cleanup_checkbox'): + if 'advanced' not in self.settings: + self.settings['advanced'] = {} + self.settings['advanced']['force_deep_cleanup_each_image'] = bool(self.force_deep_cleanup_checkbox.isChecked()) + + # RAM cap settings + if hasattr(self, 'ram_cap_enabled_checkbox'): + self.settings['advanced']['ram_cap_enabled'] = bool(self.ram_cap_enabled_checkbox.isChecked()) + if hasattr(self, 'ram_cap_mb_spinbox'): + self.settings['advanced']['ram_cap_mb'] = int(self.ram_cap_mb_spinbox.value()) + if hasattr(self, 'ram_cap_mode_combo'): + mode = self.ram_cap_mode_combo.currentText() + self.settings['advanced']['ram_cap_mode'] = 'hard' if mode.startswith('hard') else 'soft' + if hasattr(self, 'ram_gate_timeout_spinbox'): + self.settings['advanced']['ram_gate_timeout_sec'] = float(self.ram_gate_timeout_spinbox.value()) + if hasattr(self, 'ram_gate_floor_spinbox'): + self.settings['advanced']['ram_min_floor_over_baseline_mb'] = int(self.ram_gate_floor_spinbox.value()) + + # Cloud API settings + if hasattr(self, 'cloud_model_selected'): + self.settings['cloud_inpaint_model'] = self.cloud_model_selected + self.settings['cloud_custom_version'] = self.custom_version_entry.text() + self.settings['cloud_inpaint_prompt'] = self.cloud_prompt_entry.text() + self.settings['cloud_negative_prompt'] = self.negative_entry.text() + self.settings['cloud_inference_steps'] = self.steps_spinbox.value() + self.settings['cloud_timeout'] = self.cloud_timeout_spinbox.value() + + # Clear bubble detector cache to force reload with new settings + if hasattr(self.main_gui, 'manga_tab') and hasattr(self.main_gui.manga_tab, 'translator'): + if hasattr(self.main_gui.manga_tab.translator, 'bubble_detector'): + self.main_gui.manga_tab.translator.bubble_detector = None + + # Save to config + self.config['manga_settings'] = self.settings + + # Save to file - using the correct method name + try: + if hasattr(self.main_gui, 'save_config'): + self.main_gui.save_config(show_message=False) + print("Settings saved successfully") + elif hasattr(self.main_gui, 'save_configuration'): + self.main_gui.save_configuration() + print("Settings saved successfully") + else: + # Try direct save as fallback + if hasattr(self.main_gui, 'config_file'): + with open(self.main_gui.config_file, 'w') as f: + json.dump(self.config, f, indent=2) + print("Settings saved directly to config file") + except Exception as e: + print(f"Error saving configuration: {e}") + QMessageBox.critical(self, "Save Error", f"Failed to save settings: {e}") + return + + # Call callback if provided + if self.callback: + try: + self.callback(self.settings) + except Exception as e: + print(f"Error in callback: {e}") + + # Close dialog + self.accept() + + except Exception as e: + import traceback + print(f"Critical error in _save_settings: {e}") + print(traceback.format_exc()) + QMessageBox.critical(self, "Save Error", f"Failed to save settings: {e}") + + def _reset_defaults(self): + """Reset by removing manga_settings from config and reinitializing the dialog.""" + reply = QMessageBox.question(self, "Reset Settings", + "Reset all manga settings to defaults?\nThis will remove custom manga settings from config.json.", + QMessageBox.StandardButton.Yes | QMessageBox.StandardButton.No, + QMessageBox.StandardButton.No) + if reply != QMessageBox.StandardButton.Yes: + return + # Remove manga_settings key to force defaults + try: + if isinstance(self.config, dict) and 'manga_settings' in self.config: + del self.config['manga_settings'] + except Exception: + pass + # Persist changes WITHOUT showing message + try: + if hasattr(self.main_gui, 'save_config'): + self.main_gui.save_config(show_message=False) + elif hasattr(self.main_gui, 'save_configuration'): + self.main_gui.save_configuration() + elif hasattr(self.main_gui, 'config_file') and isinstance(self.main_gui.config_file, str): + with open(self.main_gui.config_file, 'w', encoding='utf-8') as f: + json.dump(self.config, f, ensure_ascii=False, indent=2) + except Exception: + try: + if hasattr(self.main_gui, 'CONFIG_FILE') and isinstance(self.main_gui.CONFIG_FILE, str): + with open(self.main_gui.CONFIG_FILE, 'w', encoding='utf-8') as f: + json.dump(self.config, f, ensure_ascii=False, indent=2) + except Exception: + pass + # Close and reopen dialog so defaults apply + self.close() + try: + MangaSettingsDialog(parent=self.parent, main_gui=self.main_gui, config=self.config, callback=self.callback) + except Exception: + pass # Don't show any message + + def _cancel(self): + """Cancel without saving""" + self.reject() diff --git a/manga_translator.py b/manga_translator.py new file mode 100644 index 0000000000000000000000000000000000000000..33ccf93f071baae09452e40fa50fa4d19081aed1 --- /dev/null +++ b/manga_translator.py @@ -0,0 +1,11216 @@ +# manga_translator.py +""" +Enhanced Manga Translation Pipeline with improved text visibility controls +Handles OCR, translation, and advanced text rendering for manga panels +Now with proper history management and full page context support +""" + +import os +import json +import base64 +import logging +import time +import traceback +import cv2 +from PIL import ImageEnhance, ImageFilter +from typing import List, Dict, Tuple, Optional, Any +from dataclasses import dataclass +from concurrent.futures import ThreadPoolExecutor, as_completed +import threading +from PIL import Image, ImageDraw, ImageFont +import numpy as np +from bubble_detector import BubbleDetector +from TransateKRtoEN import send_with_interrupt + +# Google Cloud Vision imports +try: + from google.cloud import vision + GOOGLE_CLOUD_VISION_AVAILABLE = True +except ImportError: + GOOGLE_CLOUD_VISION_AVAILABLE = False + print("Warning: Google Cloud Vision not installed. Install with: pip install google-cloud-vision") + +# Import HistoryManager for proper context management +try: + from history_manager import HistoryManager +except ImportError: + HistoryManager = None + print("Warning: HistoryManager not available. Context tracking will be limited.") + +logger = logging.getLogger(__name__) + +@dataclass +class TextRegion: + """Represents a detected text region (speech bubble, narration box, etc.)""" + text: str + vertices: List[Tuple[int, int]] # Polygon vertices from Cloud Vision + bounding_box: Tuple[int, int, int, int] # x, y, width, height + confidence: float + region_type: str # 'text_block' from Cloud Vision + translated_text: Optional[str] = None + bubble_bounds: Optional[Tuple[int, int, int, int]] = None # RT-DETR bubble bounds for rendering + + def to_dict(self): + return { + 'text': self.text, + 'vertices': self.vertices, + 'bounding_box': self.bounding_box, + 'confidence': self.confidence, + 'region_type': self.region_type, + 'translated_text': self.translated_text + } + +class MangaTranslator: + """Main class for manga translation pipeline using Google Cloud Vision + API Key""" + + # Global, process-wide registry to make local inpainting init safe across threads + # Only dictionary operations are locked (microseconds); heavy work happens outside the lock. + _inpaint_pool_lock = threading.Lock() + _inpaint_pool = {} # (method, model_path) -> {'inpainter': obj|None, 'loaded': bool, 'event': threading.Event()} + + # Detector preloading pool for non-singleton bubble detector instances + _detector_pool_lock = threading.Lock() + _detector_pool = {} # (detector_type, model_id_or_path) -> {'spares': list[BubbleDetector]} + + # Bubble detector singleton loading coordination + _singleton_bd_event = threading.Event() + _singleton_bd_loading = False + + # SINGLETON PATTERN: Shared model instances across all translators + _singleton_lock = threading.Lock() + _singleton_bubble_detector = None + _singleton_local_inpainter = None + _singleton_refs = 0 # Reference counter for singleton instances + + # Class-level cancellation flag for all instances + _global_cancelled = False + _global_cancel_lock = threading.RLock() + + @classmethod + def set_global_cancellation(cls, cancelled: bool): + """Set global cancellation flag for all translator instances""" + with cls._global_cancel_lock: + cls._global_cancelled = cancelled + + @classmethod + def is_globally_cancelled(cls) -> bool: + """Check if globally cancelled""" + with cls._global_cancel_lock: + return cls._global_cancelled + + @classmethod + def reset_global_flags(cls): + """Reset global cancellation flags when starting new translation""" + with cls._global_cancel_lock: + cls._global_cancelled = False + + def _return_inpainter_to_pool(self): + """Return a checked-out inpainter instance back to the pool for reuse.""" + if not hasattr(self, '_checked_out_inpainter') or not hasattr(self, '_inpainter_pool_key'): + return # Nothing checked out + + try: + with MangaTranslator._inpaint_pool_lock: + key = self._inpainter_pool_key + rec = MangaTranslator._inpaint_pool.get(key) + if rec and 'checked_out' in rec: + checked_out = rec['checked_out'] + if self._checked_out_inpainter in checked_out: + checked_out.remove(self._checked_out_inpainter) + self._log(f"🔄 Returned inpainter to pool ({len(checked_out)}/{len(rec.get('spares', []))} still in use)", "info") + # Clear the references + self._checked_out_inpainter = None + self._inpainter_pool_key = None + except Exception as e: + # Non-critical - just log + try: + self._log(f"⚠️ Failed to return inpainter to pool: {e}", "debug") + except: + pass + + def _return_bubble_detector_to_pool(self): + """Return a checked-out bubble detector instance back to the pool for reuse.""" + if not hasattr(self, '_checked_out_bubble_detector') or not hasattr(self, '_bubble_detector_pool_key'): + return # Nothing checked out + + try: + with MangaTranslator._detector_pool_lock: + key = self._bubble_detector_pool_key + rec = MangaTranslator._detector_pool.get(key) + if rec and 'checked_out' in rec: + checked_out = rec['checked_out'] + if self._checked_out_bubble_detector in checked_out: + checked_out.remove(self._checked_out_bubble_detector) + self._log(f"🔄 Returned bubble detector to pool ({len(checked_out)}/{len(rec.get('spares', []))} still in use)", "info") + # Clear the references + self._checked_out_bubble_detector = None + self._bubble_detector_pool_key = None + except Exception as e: + # Non-critical - just log + try: + self._log(f"⚠️ Failed to return bubble detector to pool: {e}", "debug") + except: + pass + + @classmethod + def cleanup_singletons(cls, force=False): + """Clean up singleton instances when no longer needed + + Args: + force: If True, cleanup even if references exist (for app shutdown) + """ + with cls._singleton_lock: + if force or cls._singleton_refs == 0: + # Cleanup singleton bubble detector + if cls._singleton_bubble_detector is not None: + try: + if hasattr(cls._singleton_bubble_detector, 'unload'): + cls._singleton_bubble_detector.unload(release_shared=True) + cls._singleton_bubble_detector = None + print("🤖 Singleton bubble detector cleaned up") + except Exception as e: + print(f"Failed to cleanup singleton bubble detector: {e}") + + # Cleanup singleton local inpainter + if cls._singleton_local_inpainter is not None: + try: + if hasattr(cls._singleton_local_inpainter, 'unload'): + cls._singleton_local_inpainter.unload() + cls._singleton_local_inpainter = None + print("🎨 Singleton local inpainter cleaned up") + except Exception as e: + print(f"Failed to cleanup singleton local inpainter: {e}") + + cls._singleton_refs = 0 + + def __init__(self, ocr_config: dict, unified_client, main_gui, log_callback=None): + """Initialize with OCR configuration and API client from main GUI + + Args: + ocr_config: Dictionary with OCR provider settings: + { + 'provider': 'google' or 'azure', + 'google_credentials_path': str (if google), + 'azure_key': str (if azure), + 'azure_endpoint': str (if azure) + } + """ + # CRITICAL: Set thread limits FIRST before any heavy library operations + # This must happen before cv2, torch, numpy operations + try: + parallel_enabled = main_gui.config.get('manga_settings', {}).get('advanced', {}).get('parallel_processing', False) + if not parallel_enabled: + # Force single-threaded mode for all computational libraries + os.environ['OMP_NUM_THREADS'] = '1' + os.environ['MKL_NUM_THREADS'] = '1' + os.environ['OPENBLAS_NUM_THREADS'] = '1' + os.environ['NUMEXPR_NUM_THREADS'] = '1' + os.environ['VECLIB_MAXIMUM_THREADS'] = '1' + os.environ['ONNXRUNTIME_NUM_THREADS'] = '1' + # Set torch and cv2 thread limits if already imported + try: + import torch + torch.set_num_threads(1) + except (ImportError, RuntimeError): + pass + try: + cv2.setNumThreads(1) + except (AttributeError, NameError): + pass + except Exception: + pass # Silently fail if config not available + + # Set up logging first + self.log_callback = log_callback + self.main_gui = main_gui + + # Set up stdout capture to redirect prints to GUI + self._setup_stdout_capture() + + # Pass log callback to unified client + self.client = unified_client + if hasattr(self.client, 'log_callback'): + self.client.log_callback = log_callback + elif hasattr(self.client, 'set_log_callback'): + self.client.set_log_callback(log_callback) + self.ocr_config = ocr_config + self.main_gui = main_gui + self.log_callback = log_callback + self.config = main_gui.config + self.manga_settings = self.config.get('manga_settings', {}) + # Concise logging flag from Advanced settings + try: + self.concise_logs = bool(self.manga_settings.get('advanced', {}).get('concise_logs', True)) + except Exception: + self.concise_logs = True + + # Ensure all GUI environment variables are set + self._sync_environment_variables() + + # Initialize attributes + self.current_image = None + self.current_mask = None + self.text_regions = [] + self.translated_regions = [] + self.final_image = None + + # Initialize inpainter attributes + self.local_inpainter = None + self.hybrid_inpainter = None + self.inpainter = None + + # Initialize bubble detector (will check singleton mode later) + self.bubble_detector = None + # Default: do NOT use singleton models unless explicitly enabled + self.use_singleton_models = self.manga_settings.get('advanced', {}).get('use_singleton_models', False) + + # For bubble detector specifically, prefer a singleton so it stays resident in RAM + self.use_singleton_bubble_detector = self.manga_settings.get('advanced', {}).get('use_singleton_bubble_detector', True) + + # Processing flags + self.is_processing = False + self.cancel_requested = False + self.stop_flag = None # Initialize stop_flag attribute + + # Initialize batch mode attributes (API parallelism) from environment, not GUI local toggles + # BATCH_TRANSLATION controls whether UnifiedClient allows concurrent API calls across threads. + try: + self.batch_mode = os.getenv('BATCH_TRANSLATION', '0') == '1' + except Exception: + self.batch_mode = False + + # OCR ROI cache - PER IMAGE ONLY (cleared aggressively to prevent text leakage) + # CRITICAL: This cache MUST be cleared before every new image to prevent text contamination + # THREAD-SAFE: Each translator instance has its own cache (safe for parallel panel translation) + self.ocr_roi_cache = {} + self._current_image_hash = None # Track current image to force cache invalidation + + # Thread-safe lock for cache operations (critical for parallel panel translation) + import threading + self._cache_lock = threading.Lock() + try: + self.batch_size = int(os.getenv('BATCH_SIZE', '1')) + except Exception: + # Fallback to GUI entry if present; otherwise default to 1 + try: + self.batch_size = int(main_gui.batch_size_var.get()) if hasattr(main_gui, 'batch_size_var') else 1 + except Exception: + self.batch_size = 1 + self.batch_current = 1 + + if self.batch_mode: + self._log(f"📦 BATCH MODE: Processing {self.batch_size} images") + self._log(f"⏱️ Keeping API delay for rate limit protection") + + # NOTE: We NO LONGER preload models here! + # Models should only be loaded when actually needed + # This was causing unnecessary RAM usage + ocr_settings = self.manga_settings.get('ocr', {}) + bubble_detection_enabled = ocr_settings.get('bubble_detection_enabled', False) + if bubble_detection_enabled: + self._log("📦 BATCH MODE: Bubble detection will be loaded on first use") + else: + self._log("📦 BATCH MODE: Bubble detection is disabled") + + # Cache for processed images - DEPRECATED/UNUSED (kept for backward compatibility) + # DO NOT USE THIS FOR TEXT DATA - IT CAN LEAK BETWEEN IMAGES + self.cache = {} + # Determine OCR provider + self.ocr_provider = ocr_config.get('provider', 'google') + + if self.ocr_provider == 'google': + if not GOOGLE_CLOUD_VISION_AVAILABLE: + raise ImportError("Google Cloud Vision required. Install with: pip install google-cloud-vision") + + google_path = ocr_config.get('google_credentials_path') + if not google_path: + raise ValueError("Google credentials path required") + + os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = google_path + self.vision_client = vision.ImageAnnotatorClient() + + elif self.ocr_provider == 'azure': + # Import Azure libraries + try: + from azure.cognitiveservices.vision.computervision import ComputerVisionClient + from msrest.authentication import CognitiveServicesCredentials + self.azure_cv = ComputerVisionClient + self.azure_creds = CognitiveServicesCredentials + except ImportError: + raise ImportError("Azure Computer Vision required. Install with: pip install azure-cognitiveservices-vision-computervision") + + azure_key = ocr_config.get('azure_key') + azure_endpoint = ocr_config.get('azure_endpoint') + + if not azure_key or not azure_endpoint: + raise ValueError("Azure key and endpoint required") + + self.vision_client = self.azure_cv( + azure_endpoint, + self.azure_creds(azure_key) + ) + else: + # New OCR providers handled by OCR manager + try: + from ocr_manager import OCRManager + self.ocr_manager = OCRManager(log_callback=log_callback) + print(f"Initialized OCR Manager for {self.ocr_provider}") + # Initialize OCR manager with stop flag awareness + if hasattr(self.ocr_manager, 'reset_stop_flags'): + self.ocr_manager.reset_stop_flags() + except Exception as _e: + self.ocr_manager = None + self._log(f"Failed to initialize OCRManager: {str(_e)}", "error") + + self.client = unified_client + self.main_gui = main_gui + self.log_callback = log_callback + + # Prefer allocator that can return memory to OS (effective before torch loads) + try: + os.environ.setdefault("PYTORCH_CUDA_ALLOC_CONF", "expandable_segments:True") + os.environ.setdefault("TOKENIZERS_PARALLELISM", "false") + except Exception: + pass + + # Get all settings from GUI + self.api_delay = float(self.main_gui.delay_entry.get() if hasattr(main_gui, 'delay_entry') else 2.0) + # Propagate API delay to unified_api_client via env var so its internal pacing/logging matches GUI + try: + os.environ["SEND_INTERVAL_SECONDS"] = str(self.api_delay) + except Exception: + pass + self.temperature = float(main_gui.trans_temp.get() if hasattr(main_gui, 'trans_temp') else 0.3) + self.max_tokens = int(main_gui.max_output_tokens if hasattr(main_gui, 'max_output_tokens') else 4000) + if hasattr(main_gui, 'token_limit_disabled') and main_gui.token_limit_disabled: + self.input_token_limit = None # None means no limit + self._log("📊 Input token limit: DISABLED (unlimited)") + else: + token_limit_value = main_gui.token_limit_entry.get() if hasattr(main_gui, 'token_limit_entry') else '120000' + if token_limit_value and token_limit_value.strip().isdigit(): + self.input_token_limit = int(token_limit_value.strip()) + else: + self.input_token_limit = 120000 # Default + self._log(f"📊 Input token limit: {self.input_token_limit} tokens") + + # Get contextual settings from GUI + self.contextual_enabled = main_gui.contextual_var.get() if hasattr(main_gui, 'contextual_var') else False + self.translation_history_limit = int(main_gui.trans_history.get() if hasattr(main_gui, 'trans_history') else 3) + self.rolling_history_enabled = main_gui.translation_history_rolling_var.get() if hasattr(main_gui, 'translation_history_rolling_var') else False + + # Initialize HistoryManager placeholder + self.history_manager = None + self.history_manager_initialized = False + self.history_output_dir = None + + # Full page context translation settings + self.full_page_context_enabled = True + + # Default prompt for full page context mode + self.full_page_context_prompt = ( + "You will receive multiple text segments from a manga page, each prefixed with an index like [0], [1], etc. " + "Translate each segment considering the context of all segments together. " + "Maintain consistency in character names, tone, and style across all translations.\n\n" + "CRITICAL: Return your response as a valid JSON object where each key includes BOTH the index prefix " + "AND the original text EXACTLY as provided (e.g., '[0] こんにちは'), and each value is the translation.\n" + "This is essential for correct mapping - do not modify or omit the index prefixes!\n\n" + "Make sure to properly escape any special characters in the JSON:\n" + "- Use \\n for newlines\n" + "- Use \\\" for quotes\n" + "- Use \\\\ for backslashes\n\n" + "Example:\n" + '{\n' + ' "[0] こんにちは": "Hello",\n' + ' "[1] ありがとう": "Thank you",\n' + ' "[2] さようなら": "Goodbye"\n' + '}\n\n' + 'REMEMBER: Keep the [index] prefix in each JSON key exactly as shown in the input!' + ) + + # Visual context setting (for non-vision model support) + self.visual_context_enabled = main_gui.config.get('manga_visual_context_enabled', True) + + # Store context for contextual translation (backwards compatibility) + self.translation_context = [] + + # Font settings for text rendering + self.font_path = self._find_font() + self.min_font_size = 10 + self.max_font_size = 60 + try: + _ms = main_gui.config.get('manga_settings', {}) or {} + _rend = _ms.get('rendering', {}) or {} + _font = _ms.get('font_sizing', {}) or {} + self.min_readable_size = int(_rend.get('auto_min_size', _font.get('min_size', 16))) + except Exception: + self.min_readable_size = int(main_gui.config.get('manga_min_readable_size', 16)) + self.max_font_size_limit = main_gui.config.get('manga_max_font_size', 24) + self.strict_text_wrapping = main_gui.config.get('manga_strict_text_wrapping', False) + + # Enhanced text rendering settings - Load from config if available + config = main_gui.config if hasattr(main_gui, 'config') else {} + + self.text_bg_opacity = config.get('manga_bg_opacity', 255) # 0-255, default fully opaque + self.text_bg_style = config.get('manga_bg_style', 'box') # 'box', 'circle', 'wrap' + self.text_bg_reduction = config.get('manga_bg_reduction', 1.0) # Size reduction factor (0.5-1.0) + self.constrain_to_bubble = config.get('manga_constrain_to_bubble', True) + + # Text color from config + manga_text_color = config.get('manga_text_color', [0, 0, 0]) + self.text_color = tuple(manga_text_color) # Convert list to tuple + + self.outline_color = (255, 255, 255) # White outline + self.outline_width_factor = 15 # Divider for font_size to get outline width + self.selected_font_style = config.get('manga_font_path', None) # Will store selected font path + self.custom_font_size = config.get('manga_font_size', None) if config.get('manga_font_size', 0) > 0 else None + + # Text shadow settings from config + self.shadow_enabled = config.get('manga_shadow_enabled', False) + manga_shadow_color = config.get('manga_shadow_color', [128, 128, 128]) + self.shadow_color = tuple(manga_shadow_color) # Convert list to tuple + self.shadow_offset_x = config.get('manga_shadow_offset_x', 2) + self.shadow_offset_y = config.get('manga_shadow_offset_y', 2) + self.shadow_blur = config.get('manga_shadow_blur', 0) # 0 = sharp shadow, higher = more blur + self.force_caps_lock = config.get('manga_force_caps_lock', False) + self.skip_inpainting = config.get('manga_skip_inpainting', True) + + # Font size multiplier mode - Load from config + self.font_size_mode = config.get('manga_font_size_mode', 'fixed') # 'fixed' or 'multiplier' + self.font_size_multiplier = config.get('manga_font_size_multiplier', 1.0) # Default multiplierr + + #inpainting quality + self.inpaint_quality = config.get('manga_inpaint_quality', 'high') # 'high' or 'fast' + + self._log("\n🔧 MangaTranslator initialized with settings:") + self._log(f" API Delay: {self.api_delay}s") + self._log(f" Temperature: {self.temperature}") + self._log(f" Max Output Tokens: {self.max_tokens}") + self._log(f" Input Token Limit: {'DISABLED' if self.input_token_limit is None else self.input_token_limit}") + self._log(f" Contextual Translation: {'ENABLED' if self.contextual_enabled else 'DISABLED'}") + self._log(f" Translation History Limit: {self.translation_history_limit}") + self._log(f" Rolling History: {'ENABLED' if self.rolling_history_enabled else 'DISABLED'}") + self._log(f" Font Path: {self.font_path or 'Default'}") + self._log(f" Text Rendering: BG {self.text_bg_style}, Opacity {int(self.text_bg_opacity/255*100)}%") + self._log(f" Shadow: {'ENABLED' if self.shadow_enabled else 'DISABLED'}\n") + + self.manga_settings = config.get('manga_settings', {}) + + # Initialize local inpainter if configured (respects singleton mode) + if self.manga_settings.get('inpainting', {}).get('method') == 'local': + if self.use_singleton_models: + self._initialize_singleton_local_inpainter() + else: + self._initialize_local_inpainter() + + # advanced settings + self.debug_mode = self.manga_settings.get('advanced', {}).get('debug_mode', False) + self.save_intermediate = self.manga_settings.get('advanced', {}).get('save_intermediate', False) + self.parallel_processing = self.manga_settings.get('advanced', {}).get('parallel_processing', True) + self.max_workers = self.manga_settings.get('advanced', {}).get('max_workers', 2) + # Deep cleanup control: if True, release models after every image (aggressive) + self.force_deep_cleanup_each_image = self.manga_settings.get('advanced', {}).get('force_deep_cleanup_each_image', False) + + # RAM cap + adv = self.manga_settings.get('advanced', {}) + self.ram_cap_enabled = bool(adv.get('ram_cap_enabled', False)) + self.ram_cap_mb = int(adv.get('ram_cap_mb', 0) or 0) + self.ram_cap_mode = str(adv.get('ram_cap_mode', 'soft')) + self.ram_check_interval_sec = float(adv.get('ram_check_interval_sec', 1.0)) + self.ram_recovery_margin_mb = int(adv.get('ram_recovery_margin_mb', 256)) + self._mem_over_cap = False + self._mem_stop_event = threading.Event() + self._mem_thread = None + # Advanced RAM gate tuning + self.ram_gate_timeout_sec = float(adv.get('ram_gate_timeout_sec', 10.0)) + self.ram_min_floor_over_baseline_mb = int(adv.get('ram_min_floor_over_baseline_mb', 128)) + # Measure baseline at init + try: + self.ram_baseline_mb = self._get_process_rss_mb() or 0 + except Exception: + self.ram_baseline_mb = 0 + if self.ram_cap_enabled and self.ram_cap_mb > 0: + self._init_ram_cap() + + + def set_stop_flag(self, stop_flag): + """Set the stop flag for checking interruptions""" + self.stop_flag = stop_flag + self.cancel_requested = False + + def reset_stop_flags(self): + """Reset all stop flags when starting new translation""" + self.cancel_requested = False + self.is_processing = False + # Reset global flags + self.reset_global_flags() + self._log("🔄 Stop flags reset for new translation", "debug") + + def _check_stop(self): + """Check if stop has been requested using multiple sources""" + # Check global cancellation first + if self.is_globally_cancelled(): + self.cancel_requested = True + return True + + # Check local stop flag (only if it exists and is set) + if hasattr(self, 'stop_flag') and self.stop_flag and self.stop_flag.is_set(): + self.cancel_requested = True + return True + + # Check processing flag + if hasattr(self, 'cancel_requested') and self.cancel_requested: + return True + + return False + + def _setup_stdout_capture(self): + """Set up stdout capture to redirect print statements to GUI""" + import sys + import builtins + + # Store original print function + self._original_print = builtins.print + + # Create custom print function + def gui_print(*args, **kwargs): + """Custom print that redirects to GUI""" + # Convert args to string + message = ' '.join(str(arg) for arg in args) + + # Check if this is one of the specific messages we want to capture + if any(marker in message for marker in ['🔍', '✅', '⏳', 'INFO:', 'ERROR:', 'WARNING:']): + if self.log_callback: + # Clean up the message + message = message.strip() + + # Determine level + level = 'info' + if 'ERROR:' in message or '❌' in message: + level = 'error' + elif 'WARNING:' in message or '⚠️' in message: + level = 'warning' + + # Remove prefixes like "INFO:" if present + for prefix in ['INFO:', 'ERROR:', 'WARNING:', 'DEBUG:']: + message = message.replace(prefix, '').strip() + + # Send to GUI + self.log_callback(message, level) + return # Don't print to console + + # For other messages, use original print + self._original_print(*args, **kwargs) + + # Replace the built-in print + builtins.print = gui_print + + def __del__(self): + """Restore original print when MangaTranslator is destroyed""" + if hasattr(self, '_original_print'): + import builtins + builtins.print = self._original_print + # Best-effort shutdown in case caller forgot to call shutdown() + try: + self.shutdown() + except Exception: + pass + + def _cleanup_thread_locals(self): + """Aggressively release thread-local heavy objects (onnx sessions, detectors).""" + try: + if hasattr(self, '_thread_local'): + tl = self._thread_local + # Release thread-local inpainters + if hasattr(tl, 'local_inpainters') and isinstance(tl.local_inpainters, dict): + try: + for inp in list(tl.local_inpainters.values()): + try: + if hasattr(inp, 'unload'): + inp.unload() + except Exception: + pass + finally: + try: + tl.local_inpainters.clear() + except Exception: + pass + # Return thread-local bubble detector to pool (DO NOT unload) + if hasattr(tl, 'bubble_detector') and tl.bubble_detector is not None: + try: + # Instead of unloading, return to pool for reuse + self._return_bubble_detector_to_pool() + # Keep thread-local reference intact for reuse in next image + # Only clear if we're truly shutting down the thread + except Exception: + pass + except Exception: + # Best-effort cleanup only + pass + + def shutdown(self): + """Fully release resources for MangaTranslator (models, detectors, torch caches, threads).""" + try: + # Decrement singleton reference counter if using singleton mode + if hasattr(self, 'use_singleton_models') and self.use_singleton_models: + with MangaTranslator._singleton_lock: + MangaTranslator._singleton_refs = max(0, MangaTranslator._singleton_refs - 1) + self._log(f"Singleton refs: {MangaTranslator._singleton_refs}", "debug") + + # Stop memory watchdog thread if running + if hasattr(self, '_mem_stop_event') and getattr(self, '_mem_stop_event', None) is not None: + try: + self._mem_stop_event.set() + except Exception: + pass + # Perform deep cleanup, then try to teardown torch + try: + self._deep_cleanup_models() + except Exception: + pass + try: + self._force_torch_teardown() + except Exception: + pass + try: + self._huggingface_teardown() + except Exception: + pass + try: + self._trim_working_set() + except Exception: + pass + # Null out heavy references + for attr in [ + 'client', 'vision_client', 'local_inpainter', 'hybrid_inpainter', 'inpainter', + 'bubble_detector', 'ocr_manager', 'history_manager', 'current_image', 'current_mask', + 'text_regions', 'translated_regions', 'final_image' + ]: + try: + if hasattr(self, attr): + setattr(self, attr, None) + except Exception: + pass + except Exception as e: + try: + self._log(f"⚠️ shutdown() encountered: {e}", "warning") + except Exception: + pass + + def _sync_environment_variables(self): + """Sync all GUI environment variables to ensure manga translation respects GUI settings + This ensures settings like RETRY_TRUNCATED, THINKING_BUDGET, etc. are properly set + """ + try: + # Get config from main_gui if available + if not hasattr(self, 'main_gui') or not self.main_gui: + return + + # Use the main_gui's set_all_environment_variables method if available + if hasattr(self.main_gui, 'set_all_environment_variables'): + self.main_gui.set_all_environment_variables() + else: + # Fallback: manually set key variables + config = self.main_gui.config if hasattr(self.main_gui, 'config') else {} + + # Thinking settings (most important for speed) + thinking_enabled = config.get('enable_gemini_thinking', True) + thinking_budget = config.get('gemini_thinking_budget', -1) + + # CRITICAL FIX: If thinking is disabled, force budget to 0 regardless of config value + if not thinking_enabled: + thinking_budget = 0 + + os.environ['ENABLE_GEMINI_THINKING'] = '1' if thinking_enabled else '0' + os.environ['GEMINI_THINKING_BUDGET'] = str(thinking_budget) + os.environ['THINKING_BUDGET'] = str(thinking_budget) # Also set for unified_api_client + + # Retry settings + retry_truncated = config.get('retry_truncated', False) + max_retry_tokens = config.get('max_retry_tokens', 16384) + max_retries = config.get('max_retries', 7) + os.environ['RETRY_TRUNCATED'] = '1' if retry_truncated else '0' + os.environ['MAX_RETRY_TOKENS'] = str(max_retry_tokens) + os.environ['MAX_RETRIES'] = str(max_retries) + + # Safety settings + disable_gemini_safety = config.get('disable_gemini_safety', False) + os.environ['DISABLE_GEMINI_SAFETY'] = '1' if disable_gemini_safety else '0' + + except Exception as e: + self._log(f"⚠️ Failed to sync environment variables: {e}", "warning") + + def _force_torch_teardown(self): + """Best-effort teardown of PyTorch CUDA context and caches to drop closer to baseline. + Safe to call even if CUDA is not available. + """ + try: + import torch, os, gc + # CPU: free cached tensors + try: + gc.collect() + except Exception: + pass + # CUDA path + if hasattr(torch, 'cuda') and torch.cuda.is_available(): + try: + torch.cuda.synchronize() + except Exception: + pass + try: + torch.cuda.empty_cache() + except Exception: + pass + try: + torch.cuda.ipc_collect() + except Exception: + pass + # Try to clear cuBLAS workspaces (not always available) + try: + getattr(torch._C, "_cuda_clearCublasWorkspaces")() + except Exception: + pass + # Optional hard reset via CuPy if present + reset_done = False + try: + import cupy + try: + cupy.cuda.runtime.deviceReset() + reset_done = True + self._log("CUDA deviceReset via CuPy", "debug") + except Exception: + pass + except Exception: + pass + # Fallback: attempt to call cudaDeviceReset from cudart on Windows + if os.name == 'nt' and not reset_done: + try: + import ctypes + candidates = [ + "cudart64_12.dll", "cudart64_120.dll", "cudart64_110.dll", + "cudart64_102.dll", "cudart64_101.dll", "cudart64_100.dll", "cudart64_90.dll" + ] + for name in candidates: + try: + dll = ctypes.CDLL(name) + dll.cudaDeviceReset.restype = ctypes.c_int + rc = dll.cudaDeviceReset() + self._log(f"cudaDeviceReset via {name} rc={rc}", "debug") + reset_done = True + break + except Exception: + continue + except Exception: + pass + except Exception: + pass + + def _huggingface_teardown(self): + """Best-effort teardown of HuggingFace/transformers/tokenizers state. + - Clears on-disk model cache for known repos (via _clear_hf_cache) + - Optionally purges relevant modules from sys.modules (AGGRESSIVE_HF_UNLOAD=1) + """ + try: + import os, sys, gc + # Clear disk cache for detectors (and any default repo) to avoid growth across runs + try: + self._clear_hf_cache() + except Exception: + pass + # Optional aggressive purge of modules to free Python-level caches + if os.getenv('AGGRESSIVE_HF_UNLOAD', '1') == '1': + prefixes = ( + 'transformers', + 'huggingface_hub', + 'tokenizers', + 'safetensors', + 'accelerate', + ) + to_purge = [m for m in list(sys.modules.keys()) if m.startswith(prefixes)] + for m in to_purge: + try: + del sys.modules[m] + except Exception: + pass + gc.collect() + except Exception: + pass + + def _deep_cleanup_models(self): + """Release ALL model references and caches to reduce RAM after translation. + This is the COMPREHENSIVE cleanup that ensures all models are unloaded from RAM. + """ + self._log("🧹 Starting comprehensive model cleanup to free RAM...", "info") + + try: + # ========== 1. CLEANUP OCR MODELS ========== + try: + if hasattr(self, 'ocr_manager'): + ocr_manager = getattr(self, 'ocr_manager', None) + if ocr_manager: + self._log(" Cleaning up OCR models...", "debug") + # Clear all loaded OCR providers + if hasattr(ocr_manager, 'providers'): + for provider_name, provider in ocr_manager.providers.items(): + try: + # Unload the model + if hasattr(provider, 'model'): + provider.model = None + if hasattr(provider, 'processor'): + provider.processor = None + if hasattr(provider, 'tokenizer'): + provider.tokenizer = None + if hasattr(provider, 'reader'): + provider.reader = None + if hasattr(provider, 'is_loaded'): + provider.is_loaded = False + self._log(f" ✓ Unloaded {provider_name} OCR provider", "debug") + except Exception as e: + self._log(f" Warning: Failed to unload {provider_name}: {e}", "debug") + # Clear the entire OCR manager + self.ocr_manager = None + self._log(" ✓ OCR models cleaned up", "debug") + except Exception as e: + self._log(f" Warning: OCR cleanup failed: {e}", "debug") + + # ========== 2. CLEANUP BUBBLE DETECTOR (YOLO/RT-DETR) ========== + try: + # Instance-level bubble detector + if hasattr(self, 'bubble_detector') and self.bubble_detector is not None: + # Check if using singleton mode - don't unload shared instance + if (getattr(self, 'use_singleton_bubble_detector', False)) or (hasattr(self, 'use_singleton_models') and self.use_singleton_models): + self._log(" Skipping bubble detector cleanup (singleton mode)", "debug") + # Just clear our reference, don't unload the shared instance + self.bubble_detector = None + else: + self._log(" Cleaning up bubble detector (YOLO/RT-DETR)...", "debug") + bd = self.bubble_detector + try: + if hasattr(bd, 'unload'): + bd.unload(release_shared=True) # This unloads YOLO and RT-DETR models + self._log(" ✓ Called bubble detector unload", "debug") + except Exception as e: + self._log(f" Warning: Bubble detector unload failed: {e}", "debug") + self.bubble_detector = None + self._log(" ✓ Bubble detector cleaned up", "debug") + + # Also clean class-level shared RT-DETR models unless keeping singleton warm + if not getattr(self, 'use_singleton_bubble_detector', False): + try: + from bubble_detector import BubbleDetector + if hasattr(BubbleDetector, '_rtdetr_shared_model'): + BubbleDetector._rtdetr_shared_model = None + if hasattr(BubbleDetector, '_rtdetr_shared_processor'): + BubbleDetector._rtdetr_shared_processor = None + if hasattr(BubbleDetector, '_rtdetr_loaded'): + BubbleDetector._rtdetr_loaded = False + self._log(" ✓ Cleared shared RT-DETR cache", "debug") + except Exception: + pass + # Clear preloaded detector spares + try: + with MangaTranslator._detector_pool_lock: + for rec in MangaTranslator._detector_pool.values(): + try: + rec['spares'] = [] + except Exception: + pass + except Exception: + pass + except Exception as e: + self._log(f" Warning: Bubble detector cleanup failed: {e}", "debug") + + # ========== 3. CLEANUP INPAINTERS ========== + try: + self._log(" Cleaning up inpainter models...", "debug") + + # Instance-level inpainter + if hasattr(self, 'local_inpainter') and self.local_inpainter is not None: + # Check if using singleton mode - don't unload shared instance + if hasattr(self, 'use_singleton_models') and self.use_singleton_models: + self._log(" Skipping local inpainter cleanup (singleton mode)", "debug") + # Just clear our reference, don't unload the shared instance + self.local_inpainter = None + else: + try: + if hasattr(self.local_inpainter, 'unload'): + self.local_inpainter.unload() + self._log(" ✓ Unloaded local inpainter", "debug") + except Exception: + pass + self.local_inpainter = None + + # Hybrid inpainter + if hasattr(self, 'hybrid_inpainter') and self.hybrid_inpainter is not None: + try: + if hasattr(self.hybrid_inpainter, 'unload'): + self.hybrid_inpainter.unload() + self._log(" ✓ Unloaded hybrid inpainter", "debug") + except Exception: + pass + self.hybrid_inpainter = None + + # Generic inpainter reference + if hasattr(self, 'inpainter') and self.inpainter is not None: + try: + if hasattr(self.inpainter, 'unload'): + self.inpainter.unload() + self._log(" ✓ Unloaded inpainter", "debug") + except Exception: + pass + self.inpainter = None + + # Release any shared inpainters in the global pool + with MangaTranslator._inpaint_pool_lock: + for key, rec in list(MangaTranslator._inpaint_pool.items()): + try: + inp = rec.get('inpainter') if isinstance(rec, dict) else None + if inp is not None: + try: + if hasattr(inp, 'unload'): + inp.unload() + self._log(f" ✓ Unloaded pooled inpainter: {key}", "debug") + except Exception: + pass + # Drop any spare instances as well + try: + for spare in rec.get('spares') or []: + try: + if hasattr(spare, 'unload'): + spare.unload() + except Exception: + pass + rec['spares'] = [] + except Exception: + pass + except Exception: + pass + MangaTranslator._inpaint_pool.clear() + self._log(" ✓ Cleared inpainter pool", "debug") + + # Release process-wide shared inpainter + if hasattr(MangaTranslator, '_shared_local_inpainter'): + shared = getattr(MangaTranslator, '_shared_local_inpainter', None) + if shared is not None: + try: + if hasattr(shared, 'unload'): + shared.unload() + self._log(" ✓ Unloaded shared inpainter", "debug") + except Exception: + pass + setattr(MangaTranslator, '_shared_local_inpainter', None) + + self._log(" ✓ Inpainter models cleaned up", "debug") + except Exception as e: + self._log(f" Warning: Inpainter cleanup failed: {e}", "debug") + + # ========== 4. CLEANUP THREAD-LOCAL MODELS ========== + try: + if hasattr(self, '_thread_local') and self._thread_local is not None: + self._log(" Cleaning up thread-local models...", "debug") + tl = self._thread_local + + # Thread-local inpainters + if hasattr(tl, 'local_inpainters') and isinstance(tl.local_inpainters, dict): + for key, inp in list(tl.local_inpainters.items()): + try: + if hasattr(inp, 'unload'): + inp.unload() + self._log(f" ✓ Unloaded thread-local inpainter: {key}", "debug") + except Exception: + pass + tl.local_inpainters.clear() + + # Thread-local bubble detector + if hasattr(tl, 'bubble_detector') and tl.bubble_detector is not None: + try: + if hasattr(tl.bubble_detector, 'unload'): + tl.bubble_detector.unload(release_shared=False) + self._log(" ✓ Unloaded thread-local bubble detector", "debug") + except Exception: + pass + tl.bubble_detector = None + + self._log(" ✓ Thread-local models cleaned up", "debug") + except Exception as e: + self._log(f" Warning: Thread-local cleanup failed: {e}", "debug") + + # ========== 5. CLEAR PYTORCH/CUDA CACHE ========== + try: + import torch + if torch.cuda.is_available(): + torch.cuda.empty_cache() + torch.cuda.synchronize() + self._log(" ✓ Cleared CUDA cache", "debug") + except Exception: + pass + + # ========== 6. FORCE GARBAGE COLLECTION ========== + try: + import gc + gc.collect() + # Multiple passes for stubborn references + gc.collect() + gc.collect() + self._log(" ✓ Forced garbage collection", "debug") + except Exception: + pass + + self._log("✅ Model cleanup complete - RAM should be freed", "info") + + except Exception as e: + # Never raise from deep cleanup + self._log(f"⚠️ Model cleanup encountered error: {e}", "warning") + pass + + def _clear_hf_cache(self, repo_id: str = None): + """Best-effort: clear Hugging Face cache for a specific repo (RT-DETR by default). + This targets disk cache; it won’t directly reduce RAM but helps avoid growth across runs. + """ + try: + # Determine repo_id from BubbleDetector if not provided + if repo_id is None: + try: + import bubble_detector as _bdmod + BD = getattr(_bdmod, 'BubbleDetector', None) + if BD is not None and hasattr(BD, '_rtdetr_repo_id'): + repo_id = getattr(BD, '_rtdetr_repo_id') or 'ogkalu/comic-text-and-bubble-detector' + else: + repo_id = 'ogkalu/comic-text-and-bubble-detector' + except Exception: + repo_id = 'ogkalu/comic-text-and-bubble-detector' + + # Try to use huggingface_hub to delete just the matching repo cache + try: + from huggingface_hub import scan_cache_dir + info = scan_cache_dir() + repos = getattr(info, 'repos', []) + to_delete = [] + for repo in repos: + rid = getattr(repo, 'repo_id', None) or getattr(repo, 'id', None) + if rid == repo_id: + to_delete.append(repo) + if to_delete: + # Prefer the high-level deletion API if present + if hasattr(info, 'delete_repos'): + info.delete_repos(to_delete) + else: + import shutil + for repo in to_delete: + repo_dir = getattr(repo, 'repo_path', None) or getattr(repo, 'repo_dir', None) + if repo_dir and os.path.exists(repo_dir): + shutil.rmtree(repo_dir, ignore_errors=True) + except Exception: + # Fallback: try removing default HF cache dir for this repo pattern + try: + from pathlib import Path + hf_home = os.environ.get('HF_HOME') + if hf_home: + base = Path(hf_home) + else: + base = Path.home() / '.cache' / 'huggingface' / 'hub' + # Repo cache dirs are named like models--{org}--{name} + safe_name = repo_id.replace('/', '--') + candidates = list(base.glob(f'models--{safe_name}*')) + import shutil + for c in candidates: + shutil.rmtree(str(c), ignore_errors=True) + except Exception: + pass + except Exception: + # Best-effort only + pass + + def _trim_working_set(self): + """Release freed memory back to the OS where possible. + - On Windows: use EmptyWorkingSet on current process + - On Linux: attempt malloc_trim(0) + - On macOS: no direct API; rely on GC + """ + import sys + import platform + try: + system = platform.system() + if system == 'Windows': + import ctypes + psapi = ctypes.windll.psapi + kernel32 = ctypes.windll.kernel32 + h_process = kernel32.GetCurrentProcess() + psapi.EmptyWorkingSet(h_process) + elif system == 'Linux': + import ctypes + libc = ctypes.CDLL('libc.so.6') + try: + libc.malloc_trim(0) + except Exception: + pass + except Exception: + pass + + def _get_process_rss_mb(self) -> int: + """Return current RSS in MB (cross-platform best-effort).""" + try: + import psutil, os as _os + return int(psutil.Process(_os.getpid()).memory_info().rss / (1024*1024)) + except Exception: + # Windows fallback + try: + import ctypes, os as _os + class PROCESS_MEMORY_COUNTERS(ctypes.Structure): + _fields_ = [ + ("cb", ctypes.c_uint), + ("PageFaultCount", ctypes.c_uint), + ("PeakWorkingSetSize", ctypes.c_size_t), + ("WorkingSetSize", ctypes.c_size_t), + ("QuotaPeakPagedPoolUsage", ctypes.c_size_t), + ("QuotaPagedPoolUsage", ctypes.c_size_t), + ("QuotaPeakNonPagedPoolUsage", ctypes.c_size_t), + ("QuotaNonPagedPoolUsage", ctypes.c_size_t), + ("PagefileUsage", ctypes.c_size_t), + ("PeakPagefileUsage", ctypes.c_size_t), + ] + GetCurrentProcess = ctypes.windll.kernel32.GetCurrentProcess + GetProcessMemoryInfo = ctypes.windll.psapi.GetProcessMemoryInfo + counters = PROCESS_MEMORY_COUNTERS() + counters.cb = ctypes.sizeof(PROCESS_MEMORY_COUNTERS) + GetProcessMemoryInfo(GetCurrentProcess(), ctypes.byref(counters), counters.cb) + return int(counters.WorkingSetSize / (1024*1024)) + except Exception: + return 0 + + def _apply_windows_job_memory_limit(self, cap_mb: int) -> bool: + """Apply a hard memory cap using Windows Job Objects. Returns True on success.""" + try: + import ctypes + from ctypes import wintypes + JOB_OBJECT_LIMIT_JOB_MEMORY = 0x00000200 + JobObjectExtendedLimitInformation = 9 + + class JOBOBJECT_BASIC_LIMIT_INFORMATION(ctypes.Structure): + _fields_ = [ + ("PerProcessUserTimeLimit", ctypes.c_longlong), + ("PerJobUserTimeLimit", ctypes.c_longlong), + ("LimitFlags", wintypes.DWORD), + ("MinimumWorkingSetSize", ctypes.c_size_t), + ("MaximumWorkingSetSize", ctypes.c_size_t), + ("ActiveProcessLimit", wintypes.DWORD), + ("Affinity", ctypes.c_void_p), + ("PriorityClass", wintypes.DWORD), + ("SchedulingClass", wintypes.DWORD), + ] + + class IO_COUNTERS(ctypes.Structure): + _fields_ = [ + ("ReadOperationCount", ctypes.c_ulonglong), + ("WriteOperationCount", ctypes.c_ulonglong), + ("OtherOperationCount", ctypes.c_ulonglong), + ("ReadTransferCount", ctypes.c_ulonglong), + ("WriteTransferCount", ctypes.c_ulonglong), + ("OtherTransferCount", ctypes.c_ulonglong), + ] + + class JOBOBJECT_EXTENDED_LIMIT_INFORMATION(ctypes.Structure): + _fields_ = [ + ("BasicLimitInformation", JOBOBJECT_BASIC_LIMIT_INFORMATION), + ("IoInfo", IO_COUNTERS), + ("ProcessMemoryLimit", ctypes.c_size_t), + ("JobMemoryLimit", ctypes.c_size_t), + ("PeakProcessMemoryUsed", ctypes.c_size_t), + ("PeakJobMemoryUsed", ctypes.c_size_t), + ] + + kernel32 = ctypes.WinDLL('kernel32', use_last_error=True) + CreateJobObject = kernel32.CreateJobObjectW + CreateJobObject.argtypes = [ctypes.c_void_p, wintypes.LPCWSTR] + CreateJobObject.restype = wintypes.HANDLE + SetInformationJobObject = kernel32.SetInformationJobObject + SetInformationJobObject.argtypes = [wintypes.HANDLE, wintypes.INT, ctypes.c_void_p, wintypes.DWORD] + SetInformationJobObject.restype = wintypes.BOOL + AssignProcessToJobObject = kernel32.AssignProcessToJobObject + AssignProcessToJobObject.argtypes = [wintypes.HANDLE, wintypes.HANDLE] + AssignProcessToJobObject.restype = wintypes.BOOL + GetCurrentProcess = kernel32.GetCurrentProcess + GetCurrentProcess.restype = wintypes.HANDLE + + hJob = CreateJobObject(None, None) + if not hJob: + return False + + info = JOBOBJECT_EXTENDED_LIMIT_INFORMATION() + info.BasicLimitInformation.LimitFlags = JOB_OBJECT_LIMIT_JOB_MEMORY + info.JobMemoryLimit = ctypes.c_size_t(int(cap_mb) * 1024 * 1024) + + ok = SetInformationJobObject(hJob, JobObjectExtendedLimitInformation, ctypes.byref(info), ctypes.sizeof(info)) + if not ok: + return False + + ok = AssignProcessToJobObject(hJob, GetCurrentProcess()) + if not ok: + return False + return True + except Exception: + return False + + def _memory_watchdog(self): + try: + import time + while not self._mem_stop_event.is_set(): + if not self.ram_cap_enabled or self.ram_cap_mb <= 0: + break + rss = self._get_process_rss_mb() + if rss and rss > self.ram_cap_mb: + self._mem_over_cap = True + # Aggressive attempt to reduce memory + try: + self._deep_cleanup_models() + except Exception: + pass + try: + self._trim_working_set() + except Exception: + pass + # Wait a bit before re-checking + time.sleep(max(0.2, self.ram_check_interval_sec / 2)) + time.sleep(0.1) # Brief pause for stability + self._log("💤 Memory watchdog pausing briefly for stability", "debug") + else: + # Below cap or couldn't read RSS + self._mem_over_cap = False + time.sleep(self.ram_check_interval_sec) + except Exception: + pass + + def _init_ram_cap(self): + # Hard cap via Windows Job Object if selected and on Windows + try: + import platform + if self.ram_cap_mode.startswith('hard') or self.ram_cap_mode == 'hard': + if platform.system() == 'Windows': + if not self._apply_windows_job_memory_limit(self.ram_cap_mb): + self._log("⚠️ Failed to apply hard RAM cap; falling back to soft mode", "warning") + self.ram_cap_mode = 'soft' + else: + self._log("⚠️ Hard RAM cap only supported on Windows; using soft mode", "warning") + self.ram_cap_mode = 'soft' + except Exception: + self.ram_cap_mode = 'soft' + # Start watchdog regardless of mode to proactively stay under cap during operations + try: + self._mem_thread = threading.Thread(target=self._memory_watchdog, daemon=True) + self._mem_thread.start() + except Exception: + pass + + def _block_if_over_cap(self, context_msg: str = ""): + # If over cap, block until we drop under cap - margin + if not self.ram_cap_enabled or self.ram_cap_mb <= 0: + return + import time + # Never require target below baseline + floor margin + baseline = max(0, getattr(self, 'ram_baseline_mb', 0)) + floor = baseline + max(0, self.ram_min_floor_over_baseline_mb) + # Compute target below cap by recovery margin, but not below floor + target = self.ram_cap_mb - max(64, min(self.ram_recovery_margin_mb, self.ram_cap_mb // 4)) + target = max(target, floor) + start = time.time() + waited = False + last_log = 0 + while True: + rss = self._get_process_rss_mb() + now = time.time() + if rss and rss <= target: + break + # Timeout to avoid deadlock when baseline can't go lower than target + if now - start > max(2.0, self.ram_gate_timeout_sec): + self._log(f"⌛ RAM gate timeout for {context_msg}: RSS={rss} MB, target={target} MB; proceeding in low-memory mode", "warning") + break + waited = True + # Periodic log to help diagnose + if now - last_log > 3.0 and rss: + self._log(f"⏳ Waiting for RAM drop: RSS={rss} MB, target={target} MB ({context_msg})", "info") + last_log = now + # Attempt cleanup while waiting + try: + self._deep_cleanup_models() + except Exception: + pass + try: + self._trim_working_set() + except Exception: + pass + if self._check_stop(): + break + time.sleep(0.1) # Brief pause for stability + self._log("💤 RAM gate pausing briefly for stability", "debug") + if waited and context_msg: + self._log(f"🧹 Proceeding with {context_msg} (RSS now {self._get_process_rss_mb()} MB; target {target} MB)", "info") + + def set_batch_mode(self, enabled: bool, batch_size: int = 1): + """Enable or disable batch mode optimizations""" + self.batch_mode = enabled + self.batch_size = batch_size + + if enabled: + # Check if bubble detection is actually enabled before considering preload + ocr_settings = self.manga_settings.get('ocr', {}) if hasattr(self, 'manga_settings') else {} + bubble_detection_enabled = ocr_settings.get('bubble_detection_enabled', False) + + # Only suggest preloading if bubble detection is actually going to be used + if bubble_detection_enabled: + self._log("📦 BATCH MODE: Bubble detection models will load on first use") + # NOTE: We don't actually preload anymore to save RAM + # Models are loaded on-demand when first needed + + # Similarly for OCR models - they load on demand + if hasattr(self, 'ocr_manager') and self.ocr_manager: + self._log(f"📦 BATCH MODE: {self.ocr_provider} will load on first use") + # NOTE: We don't preload OCR models either + + self._log(f"📦 BATCH MODE ENABLED: Processing {batch_size} images") + self._log(f"⏱️ API delay: {self.api_delay}s (preserved for rate limiting)") + else: + self._log("📝 BATCH MODE DISABLED") + + def _ensure_bubble_detector_ready(self, ocr_settings): + """Ensure a usable BubbleDetector for current thread, auto-reloading models after cleanup.""" + try: + bd = self._get_thread_bubble_detector() + detector_type = ocr_settings.get('detector_type', 'rtdetr_onnx') + if detector_type == 'rtdetr_onnx': + if not getattr(bd, 'rtdetr_onnx_loaded', False): + model_id = ocr_settings.get('rtdetr_model_url') or ocr_settings.get('bubble_model_path') + if not bd.load_rtdetr_onnx_model(model_id=model_id): + return None + elif detector_type == 'rtdetr': + if not getattr(bd, 'rtdetr_loaded', False): + model_id = ocr_settings.get('rtdetr_model_url') or ocr_settings.get('bubble_model_path') + if not bd.load_rtdetr_model(model_id=model_id): + return None + elif detector_type == 'yolo': + model_path = ocr_settings.get('bubble_model_path') + if model_path and not getattr(bd, 'model_loaded', False): + if not bd.load_model(model_path): + return None + else: # auto + # Prefer RT-DETR if available, else YOLO if configured + if not getattr(bd, 'rtdetr_loaded', False): + bd.load_rtdetr_model(model_id=ocr_settings.get('rtdetr_model_url') or ocr_settings.get('bubble_model_path')) + return bd + except Exception: + return None + + def _merge_with_bubble_detection(self, regions: List[TextRegion], image_path: str) -> List[TextRegion]: + """Merge text regions by bubble and filter based on RT-DETR class settings""" + try: + # Get detector settings from config + ocr_settings = self.main_gui.config.get('manga_settings', {}).get('ocr', {}) + detector_type = ocr_settings.get('detector_type', 'rtdetr_onnx') + + # Ensure detector is ready (auto-reload after cleanup) + bd = self._ensure_bubble_detector_ready(ocr_settings) + if bd is None: + self._log("⚠️ Bubble detector unavailable after cleanup; falling back to proximity merge", "warning") + # Use more conservative threshold for Azure/Google to avoid cross-bubble merging + threshold = 30 if getattr(self, 'ocr_provider', '').lower() in ('azure', 'google') else 50 + return self._merge_nearby_regions(regions, threshold=threshold) + + # Check if bubble detection is enabled + if not ocr_settings.get('bubble_detection_enabled', False): + self._log("📦 Bubble detection is disabled in settings", "info") + # Use more conservative threshold for Azure/Google to avoid cross-bubble merging + threshold = 30 if getattr(self, 'ocr_provider', '').lower() in ('azure', 'google') else 50 + return self._merge_nearby_regions(regions, threshold=threshold) + + # Initialize thread-local detector + bd = self._get_thread_bubble_detector() + + bubbles = None + rtdetr_detections = None + + if detector_type == 'rtdetr_onnx': + if not self.batch_mode: + self._log("🤖 Using RTEDR_onnx for bubble detection", "info") + if self.batch_mode and getattr(bd, 'rtdetr_onnx_loaded', False): + pass + elif not getattr(bd, 'rtdetr_onnx_loaded', False): + self._log("📥 Loading RTEDR_onnx model...", "info") + if not bd.load_rtdetr_onnx_model(): + self._log("⚠️ Failed to load RTEDR_onnx, falling back to traditional merging", "warning") + return self._merge_nearby_regions(regions) + else: + # Model loaded successfully - mark in pool for reuse + try: + model_id = ocr_settings.get('rtdetr_model_url') or ocr_settings.get('bubble_model_path') or '' + key = ('rtdetr_onnx', model_id) + with MangaTranslator._detector_pool_lock: + if key not in MangaTranslator._detector_pool: + MangaTranslator._detector_pool[key] = {'spares': []} + # Mark this detector type as loaded for next run + MangaTranslator._detector_pool[key]['loaded'] = True + except Exception: + pass + rtdetr_confidence = ocr_settings.get('rtdetr_confidence', 0.3) + detect_empty = ocr_settings.get('detect_empty_bubbles', True) + detect_text_bubbles = ocr_settings.get('detect_text_bubbles', True) + detect_free_text = ocr_settings.get('detect_free_text', True) + if not self.batch_mode: + self._log(f"📋 RTEDR_onnx class filters:", "info") + self._log(f" Empty bubbles: {'✓' if detect_empty else '✗'}", "info") + self._log(f" Text bubbles: {'✓' if detect_text_bubbles else '✗'}", "info") + self._log(f" Free text: {'✓' if detect_free_text else '✗'}", "info") + self._log(f"🎯 RTEDR_onnx confidence threshold: {rtdetr_confidence:.2f}", "info") + rtdetr_detections = bd.detect_with_rtdetr_onnx( + image_path=image_path, + confidence=rtdetr_confidence, + return_all_bubbles=False + ) + # Combine enabled bubble types for merging + bubbles = [] + if detect_empty and 'bubbles' in rtdetr_detections: + bubbles.extend(rtdetr_detections['bubbles']) + if detect_text_bubbles and 'text_bubbles' in rtdetr_detections: + bubbles.extend(rtdetr_detections['text_bubbles']) + # Store free text locations for filtering later + free_text_regions = rtdetr_detections.get('text_free', []) if detect_free_text else [] + self._log(f"✅ RTEDR_onnx detected:", "success") + self._log(f" {len(rtdetr_detections.get('bubbles', []))} empty bubbles", "info") + self._log(f" {len(rtdetr_detections.get('text_bubbles', []))} text bubbles", "info") + self._log(f" {len(rtdetr_detections.get('text_free', []))} free text regions", "info") + elif detector_type == 'rtdetr': + # BATCH OPTIMIZATION: Less verbose logging + if not self.batch_mode: + self._log("🤖 Using RT-DETR for bubble detection", "info") + + # BATCH OPTIMIZATION: Don't reload if already loaded + if self.batch_mode and bd.rtdetr_loaded: + # Model already loaded, skip the loading step entirely + pass + elif not bd.rtdetr_loaded: + self._log("📥 Loading RT-DETR model...", "info") + if not bd.load_rtdetr_model(): + self._log("⚠️ Failed to load RT-DETR, falling back to traditional merging", "warning") + return self._merge_nearby_regions(regions) + else: + # Model loaded successfully - mark in pool for reuse + try: + model_id = ocr_settings.get('rtdetr_model_url') or ocr_settings.get('bubble_model_path') or '' + key = ('rtdetr', model_id) + with MangaTranslator._detector_pool_lock: + if key not in MangaTranslator._detector_pool: + MangaTranslator._detector_pool[key] = {'spares': []} + # Mark this detector type as loaded for next run + MangaTranslator._detector_pool[key]['loaded'] = True + except Exception: + pass + + # Get settings + rtdetr_confidence = ocr_settings.get('rtdetr_confidence', 0.3) + detect_empty = ocr_settings.get('detect_empty_bubbles', True) + detect_text_bubbles = ocr_settings.get('detect_text_bubbles', True) + detect_free_text = ocr_settings.get('detect_free_text', True) + + # BATCH OPTIMIZATION: Reduce logging + if not self.batch_mode: + self._log(f"📋 RT-DETR class filters:", "info") + self._log(f" Empty bubbles: {'✓' if detect_empty else '✗'}", "info") + self._log(f" Text bubbles: {'✓' if detect_text_bubbles else '✗'}", "info") + self._log(f" Free text: {'✓' if detect_free_text else '✗'}", "info") + self._log(f"🎯 RT-DETR confidence threshold: {rtdetr_confidence:.2f}", "info") + + # Get FULL RT-DETR detections (not just bubbles) + rtdetr_detections = bd.detect_with_rtdetr( + image_path=image_path, + confidence=rtdetr_confidence, + return_all_bubbles=False # Get dict with all classes + ) + + # Combine enabled bubble types for merging + bubbles = [] + if detect_empty and 'bubbles' in rtdetr_detections: + bubbles.extend(rtdetr_detections['bubbles']) + if detect_text_bubbles and 'text_bubbles' in rtdetr_detections: + bubbles.extend(rtdetr_detections['text_bubbles']) + + # Store free text locations for filtering later + free_text_regions = rtdetr_detections.get('text_free', []) if detect_free_text else [] + + # Helper to test if a point lies in any bbox + def _point_in_any_bbox(cx, cy, boxes): + try: + for (bx, by, bw, bh) in boxes or []: + if bx <= cx <= bx + bw and by <= cy <= by + bh: + return True + except Exception: + pass + return False + + self._log(f"✅ RT-DETR detected:", "success") + self._log(f" {len(rtdetr_detections.get('bubbles', []))} empty bubbles", "info") + self._log(f" {len(rtdetr_detections.get('text_bubbles', []))} text bubbles", "info") + self._log(f" {len(rtdetr_detections.get('text_free', []))} free text regions", "info") + + elif detector_type == 'yolo': + # Use YOLOv8 (existing code) + self._log("🤖 Using YOLOv8 for bubble detection", "info") + + model_path = ocr_settings.get('bubble_model_path') + if not model_path: + self._log("⚠️ No YOLO model configured, falling back to traditional merging", "warning") + return self._merge_nearby_regions(regions) + + if not bd.model_loaded: + self._log(f"📥 Loading YOLO model: {os.path.basename(model_path)}") + if not bd.load_model(model_path): + self._log("⚠️ Failed to load YOLO model, falling back to traditional merging", "warning") + return self._merge_nearby_regions(regions) + + confidence = ocr_settings.get('bubble_confidence', 0.3) + self._log(f"🎯 Detecting bubbles with YOLO (confidence >= {confidence:.2f})") + bubbles = bd.detect_bubbles(image_path, confidence=confidence, use_rtdetr=False) + + else: + # Unknown detector type + self._log(f"❌ Unknown detector type: {detector_type}", "error") + self._log(" Valid options: rtdetr_onnx, rtdetr, yolo", "error") + return self._merge_nearby_regions(regions) + + if not bubbles: + self._log("⚠️ No bubbles detected, using traditional merging", "warning") + return self._merge_nearby_regions(regions) + + self._log(f"✅ Found {len(bubbles)} bubbles for grouping", "success") + + # Merge regions within bubbles + merged_regions = [] + used_indices = set() + + # Build lookup of free text regions for exclusion + free_text_bboxes = free_text_regions if detector_type in ('rtdetr', 'rtdetr_onnx') else [] + + # DEBUG: Log free text bboxes + if free_text_bboxes: + self._log(f"🔍 Free text exclusion zones: {len(free_text_bboxes)} regions", "debug") + for idx, (fx, fy, fw, fh) in enumerate(free_text_bboxes): + self._log(f" Free text zone {idx + 1}: x={fx:.0f}, y={fy:.0f}, w={fw:.0f}, h={fh:.0f}", "debug") + else: + self._log(f"⚠️ No free text exclusion zones detected by RT-DETR", "warning") + + # Helper to check if a point is in any free text region + def _point_in_free_text(cx, cy, free_boxes): + try: + for idx, (fx, fy, fw, fh) in enumerate(free_boxes or []): + if fx <= cx <= fx + fw and fy <= cy <= fy + fh: + self._log(f" ✓ Point ({cx:.0f}, {cy:.0f}) is in free text zone {idx + 1}", "debug") + return True + except Exception as e: + self._log(f" ⚠️ Error checking free text: {e}", "debug") + pass + return False + + for bubble_idx, (bx, by, bw, bh) in enumerate(bubbles): + bubble_regions = [] + self._log(f"\n Processing bubble {bubble_idx + 1}: x={bx:.0f}, y={by:.0f}, w={bw:.0f}, h={bh:.0f}", "debug") + + for idx, region in enumerate(regions): + if idx in used_indices: + continue + + rx, ry, rw, rh = region.bounding_box + region_center_x = rx + rw / 2 + region_center_y = ry + rh / 2 + + # Check if center is inside this bubble + if (bx <= region_center_x <= bx + bw and + by <= region_center_y <= by + bh): + + self._log(f" Region '{region.text[:20]}...' center ({region_center_x:.0f}, {region_center_y:.0f}) is in bubble", "debug") + + # CRITICAL: Don't merge if this region is in a free text area + # Free text should stay separate from bubbles + if _point_in_free_text(region_center_x, region_center_y, free_text_bboxes): + # This region is in a free text area, don't merge it into bubble + self._log(f" ❌ SKIPPING: Region overlaps with free text area", "debug") + continue + + self._log(f" ✓ Adding region to bubble {bubble_idx + 1}", "debug") + bubble_regions.append(region) + used_indices.add(idx) + + if bubble_regions: + # CRITICAL: Check if this "bubble" actually contains multiple separate bubbles + # This happens when RT-DETR detects one large bubble over stacked speech bubbles + split_groups = self._split_bubble_if_needed(bubble_regions) + + # Process each split group as a separate bubble + for group_idx, group in enumerate(split_groups): + merged_text = " ".join(r.text for r in group) + + min_x = min(r.bounding_box[0] for r in group) + min_y = min(r.bounding_box[1] for r in group) + max_x = max(r.bounding_box[0] + r.bounding_box[2] for r in group) + max_y = max(r.bounding_box[1] + r.bounding_box[3] for r in group) + + all_vertices = [] + for r in group: + if hasattr(r, 'vertices') and r.vertices: + all_vertices.extend(r.vertices) + + if not all_vertices: + all_vertices = [ + (min_x, min_y), + (max_x, min_y), + (max_x, max_y), + (min_x, max_y) + ] + + merged_region = TextRegion( + text=merged_text, + vertices=all_vertices, + bounding_box=(min_x, min_y, max_x - min_x, max_y - min_y), + confidence=0.95, + region_type='bubble_detected', + bubble_bounds=(bx, by, bw, bh) # Pass bubble_bounds in constructor + ) + + # Store original regions for masking + merged_region.original_regions = group + # Classify as text bubble for downstream rendering/masking + merged_region.bubble_type = 'text_bubble' + # Mark that this should be inpainted + merged_region.should_inpaint = True + + merged_regions.append(merged_region) + + # DEBUG: Verify bubble_bounds was set + if not getattr(self, 'concise_logs', False): + has_bb = hasattr(merged_region, 'bubble_bounds') and merged_region.bubble_bounds is not None + self._log(f" 🔍 Merged region has bubble_bounds: {has_bb}", "debug") + if has_bb: + self._log(f" bubble_bounds = {merged_region.bubble_bounds}", "debug") + + if len(split_groups) > 1: + self._log(f" Bubble {bubble_idx + 1}.{group_idx + 1}: Merged {len(group)} text regions (split from {len(bubble_regions)} total)", "info") + else: + self._log(f" Bubble {bubble_idx + 1}: Merged {len(group)} text regions", "info") + + # Handle text outside bubbles based on RT-DETR settings + for idx, region in enumerate(regions): + if idx not in used_indices: + # This text is outside any bubble + + # For RT-DETR mode, check if we should include free text + if detector_type in ('rtdetr', 'rtdetr_onnx'): + # If "Free Text" checkbox is checked, include ALL text outside bubbles + # Don't require RT-DETR to specifically detect it as free text + if ocr_settings.get('detect_free_text', True): + region.should_inpaint = True + # If RT-DETR detected free text box covering this region's center, mark explicitly + try: + cx = region.bounding_box[0] + region.bounding_box[2] / 2 + cy = region.bounding_box[1] + region.bounding_box[3] / 2 + # Find which free text bbox this region belongs to (if any) + found_free_text_box = False + for fx, fy, fw, fh in free_text_bboxes: + if fx <= cx <= fx + fw and fy <= cy <= fy + fh: + region.bubble_type = 'free_text' + # CRITICAL: Set bubble_bounds to the RT-DETR free text detection box + # This ensures rendering uses the full RT-DETR bounds, not just OCR polygon + if not hasattr(region, 'bubble_bounds') or region.bubble_bounds is None: + region.bubble_bounds = (fx, fy, fw, fh) + found_free_text_box = True + self._log(f" Free text region INCLUDED: '{region.text[:30]}...'", "debug") + break + + if not found_free_text_box: + # Text outside bubbles but not in free text box - still mark as free text + region.bubble_type = 'free_text' + # Use region's own bbox if no RT-DETR free text box found + if not hasattr(region, 'bubble_bounds') or region.bubble_bounds is None: + region.bubble_bounds = region.bounding_box + self._log(f" Text outside bubbles INCLUDED (as free text): '{region.text[:30]}...'", "debug") + except Exception: + # Default to free text if check fails + region.bubble_type = 'free_text' + if not hasattr(region, 'bubble_bounds') or region.bubble_bounds is None: + region.bubble_bounds = region.bounding_box + else: + region.should_inpaint = False + self._log(f" Text outside bubbles EXCLUDED (Free Text unchecked): '{region.text[:30]}...'", "info") + else: + # For YOLO/auto, include all text by default + region.should_inpaint = True + + merged_regions.append(region) + + # Log summary + regions_to_inpaint = sum(1 for r in merged_regions if getattr(r, 'should_inpaint', True)) + regions_to_skip = len(merged_regions) - regions_to_inpaint + + self._log(f"📊 Bubble detection complete: {len(regions)} → {len(merged_regions)} regions", "success") + if detector_type == 'rtdetr': + self._log(f" {regions_to_inpaint} regions will be inpainted", "info") + if regions_to_skip > 0: + self._log(f" {regions_to_skip} regions will be preserved (Free Text unchecked)", "info") + + return merged_regions + + except Exception as e: + self._log(f"❌ Bubble detection error: {str(e)}", "error") + self._log(" Falling back to traditional merging", "warning") + return self._merge_nearby_regions(regions) + + def set_full_page_context(self, enabled: bool, custom_prompt: str = None): + """Configure full page context translation mode + + Args: + enabled: Whether to translate all text regions in a single contextual request + custom_prompt: Optional custom prompt for full page context mode + """ + self.full_page_context_enabled = enabled + if custom_prompt: + self.full_page_context_prompt = custom_prompt + + self._log(f"📄 Full page context mode: {'ENABLED' if enabled else 'DISABLED'}") + if enabled: + self._log(" All text regions will be sent together for contextual translation") + else: + self._log(" Text regions will be translated individually") + + def update_text_rendering_settings(self, + bg_opacity: int = None, + bg_style: str = None, + bg_reduction: float = None, + font_style: str = None, + font_size: int = None, + text_color: tuple = None, + shadow_enabled: bool = None, + shadow_color: tuple = None, + shadow_offset_x: int = None, + shadow_offset_y: int = None, + shadow_blur: int = None, + force_caps_lock: bool = None): # ADD THIS PARAMETER + """Update text rendering settings""" + self._log("📐 Updating text rendering settings:", "info") + + if bg_opacity is not None: + self.text_bg_opacity = max(0, min(255, bg_opacity)) + self._log(f" Background opacity: {int(self.text_bg_opacity/255*100)}%", "info") + if bg_style is not None and bg_style in ['box', 'circle', 'wrap']: + self.text_bg_style = bg_style + self._log(f" Background style: {bg_style}", "info") + if bg_reduction is not None: + self.text_bg_reduction = max(0.5, min(2.0, bg_reduction)) + self._log(f" Background size: {int(self.text_bg_reduction*100)}%", "info") + if font_style is not None: + self.selected_font_style = font_style + font_name = os.path.basename(font_style) if font_style else 'Default' + self._log(f" Font: {font_name}", "info") + if font_size is not None: + if font_size < 0: + # Negative value indicates multiplier mode + self.font_size_mode = 'multiplier' + self.font_size_multiplier = abs(font_size) + self.custom_font_size = None # Clear fixed size + self._log(f" Font size mode: Dynamic multiplier ({self.font_size_multiplier:.1f}x)", "info") + else: + # Positive value or 0 indicates fixed mode + self.font_size_mode = 'fixed' + self.custom_font_size = font_size if font_size > 0 else None + self._log(f" Font size mode: Fixed ({font_size if font_size > 0 else 'Auto'})", "info") + if text_color is not None: + self.text_color = text_color + self._log(f" Text color: RGB{text_color}", "info") + if shadow_enabled is not None: + self.shadow_enabled = shadow_enabled + self._log(f" Shadow: {'Enabled' if shadow_enabled else 'Disabled'}", "info") + if shadow_color is not None: + self.shadow_color = shadow_color + self._log(f" Shadow color: RGB{shadow_color}", "info") + if shadow_offset_x is not None: + self.shadow_offset_x = shadow_offset_x + if shadow_offset_y is not None: + self.shadow_offset_y = shadow_offset_y + if shadow_blur is not None: + self.shadow_blur = max(0, shadow_blur) + if force_caps_lock is not None: # ADD THIS BLOCK + self.force_caps_lock = force_caps_lock + self._log(f" Force Caps Lock: {'Enabled' if force_caps_lock else 'Disabled'}", "info") + + self._log("✅ Rendering settings updated", "info") + + def _log(self, message: str, level: str = "info"): + """Log message to GUI or console, and also to file logger. + The file logger is configured in translator_gui._setup_file_logging(). + Enhanced with comprehensive stop suppression. + """ + # Enhanced stop suppression - allow only essential stop confirmation messages + if self._check_stop() or self.is_globally_cancelled(): + # Only allow very specific stop confirmation messages - nothing else + essential_stop_keywords = [ + "⏹️ Translation stopped by user", + "🧹 Cleaning up models to free RAM", + "✅ Model cleanup complete - RAM should be freed", + "✅ All models cleaned up - RAM freed!" + ] + # Suppress ALL other messages when stopped - be very restrictive + if not any(keyword in message for keyword in essential_stop_keywords): + return + + # Concise pipeline logs: keep only high-level messages and errors/warnings + if getattr(self, 'concise_logs', False): + if level in ("error", "warning"): + pass + else: + keep_prefixes = ( + # Pipeline boundaries and IO + "📷 STARTING", "📁 Input", "📁 Output", + # Step markers + "📍 [STEP", + # Step 1 essentials + "🔍 Detecting text regions", # start of detection on file + "📄 Detected", # format detected + "Using OCR provider:", # provider line + "Using Azure Read API", # azure-specific run mode + "⚠️ Converting image to PNG", # azure PNG compatibility + "🤖 Using AI bubble detection", # BD merge mode + "🤖 Using RTEDR_onnx", # selected BD + "✅ Detected", # detected N regions after merging + # Detectors/inpainter readiness + "🤖 Using bubble detector", "🎨 Using local inpainter", + # Step 2: key actions + "🔀 Running", # Running translation and inpainting concurrently + "📄 Using FULL PAGE CONTEXT", # Explicit mode notice + "📄 Full page context mode", # Alternate phrasing + "📄 Full page context translation", # Start/summary + "🎭 Creating text mask", "📊 Mask breakdown", "📏 Applying", + "🎨 Inpainting", "🧽 Using local inpainting", + # Detection and summary + "📊 Bubble detection complete", "✅ Detection complete", + # Mapping/translation summary + "📊 Mapping", "📊 Full page context translation complete", + # Rendering + "✍️ Rendering", "✅ ENHANCED text rendering complete", + # Output and final summary + "💾 Saved output", "✅ TRANSLATION PIPELINE COMPLETE", + "📊 Translation Summary", "✅ Successful", "❌ Failed", + # Cleanup + "🔑 Auto cleanup", "🔑 Translator instance preserved" + ) + _msg = message.lstrip() if isinstance(message, str) else message + if not any(_msg.startswith(p) for p in keep_prefixes): + return + + # In batch mode, only log important messages + if self.batch_mode: + # Skip verbose/debug messages in batch mode + if level == "debug" or "DEBUG:" in message: + return + # Skip repetitive messages + if any(skip in message for skip in [ + "Using vertex-based", "Using", "Applying", "Font size", + "Region", "Found text", "Style:" + ]): + return + + # Send to GUI if available + if self.log_callback: + try: + self.log_callback(message, level) + except Exception: + # Fall back to print if GUI callback fails + print(message) + else: + print(message) + + # Always record to the Python logger (file) + try: + _logger = logging.getLogger(__name__) + if level == "error": + _logger.error(message) + elif level == "warning": + _logger.warning(message) + elif level == "debug": + _logger.debug(message) + else: + # Map custom levels like 'success' to INFO + _logger.info(message) + except Exception: + pass + + def _is_primarily_english(self, text: str) -> bool: + """Heuristic: treat text as English if it has no CJK and a high ASCII ratio. + Conservative by default to avoid dropping legitimate content. + Tunable via manga_settings.ocr: + - english_exclude_threshold (float, default 0.70) + - english_exclude_min_chars (int, default 4) + - english_exclude_short_tokens (bool, default False) + """ + if not text: + return False + + # Pull tuning knobs from settings (with safe defaults) + ocr_settings = {} + try: + ocr_settings = self.main_gui.config.get('manga_settings', {}).get('ocr', {}) + except Exception: + pass + threshold = float(ocr_settings.get('english_exclude_threshold', 0.70)) + min_chars = int(ocr_settings.get('english_exclude_min_chars', 4)) + exclude_short = bool(ocr_settings.get('english_exclude_short_tokens', False)) + + # 1) If text contains any CJK or full-width characters, do NOT treat as English + has_cjk = any( + '\u4e00' <= char <= '\u9fff' or # Chinese + '\u3040' <= char <= '\u309f' or # Hiragana + '\u30a0' <= char <= '\u30ff' or # Katakana + '\uac00' <= char <= '\ud7af' or # Korean + '\uff00' <= char <= '\uffef' # Full-width characters + for char in text + ) + if has_cjk: + return False + + text_stripped = text.strip() + non_space_len = sum(1 for c in text_stripped if not c.isspace()) + + # 2) By default, do not exclude very short tokens to avoid losing interjections like "Ah", "Eh?", etc. + if not exclude_short and non_space_len < max(1, min_chars): + return False + + # Optional legacy behavior: aggressively drop very short pure-ASCII tokens + if exclude_short: + if len(text_stripped) == 1 and text_stripped.isalpha() and ord(text_stripped) < 128: + self._log(f" Excluding single English letter: '{text_stripped}'", "debug") + return True + if len(text_stripped) <= 3: + ascii_letters = sum(1 for char in text_stripped if char.isalpha() and ord(char) < 128) + if ascii_letters >= len(text_stripped) * 0.5: + self._log(f" Excluding short English text: '{text_stripped}'", "debug") + return True + + # 3) Compute ASCII ratio (exclude spaces) + ascii_chars = sum(1 for char in text if 33 <= ord(char) <= 126) + total_chars = sum(1 for char in text if not char.isspace()) + if total_chars == 0: + return False + ratio = ascii_chars / total_chars + + if ratio > threshold: + self._log(f" Excluding English text ({ratio:.0%} ASCII, threshold {threshold:.0%}, len={non_space_len}): '{text[:30]}...'", "debug") + return True + return False + + def _load_bubble_detector(self, ocr_settings, image_path): + """Load bubble detector with appropriate model based on settings + + Returns: + dict: Detection results or None if failed + """ + detector_type = ocr_settings.get('detector_type', 'rtdetr_onnx') + model_path = ocr_settings.get('bubble_model_path', '') + confidence = ocr_settings.get('bubble_confidence', 0.3) + + bd = self._get_thread_bubble_detector() + + if detector_type == 'rtdetr_onnx' or 'RTEDR_onnx' in str(detector_type): + # Load RT-DETR ONNX model + if bd.load_rtdetr_onnx_model(model_id=ocr_settings.get('rtdetr_model_url') or model_path): + return bd.detect_with_rtdetr_onnx( + image_path=image_path, + confidence=ocr_settings.get('rtdetr_confidence', confidence), + return_all_bubbles=False + ) + elif detector_type == 'rtdetr' or 'RT-DETR' in str(detector_type): + # Load RT-DETR (PyTorch) model + if bd.load_rtdetr_model(model_id=ocr_settings.get('rtdetr_model_url') or model_path): + return bd.detect_with_rtdetr( + image_path=image_path, + confidence=ocr_settings.get('rtdetr_confidence', confidence), + return_all_bubbles=False + ) + elif detector_type == 'custom': + # Custom model - try to determine type from path + custom_path = ocr_settings.get('custom_model_path', model_path) + if 'rtdetr' in custom_path.lower(): + # Custom RT-DETR model + if bd.load_rtdetr_model(model_id=custom_path): + return bd.detect_with_rtdetr( + image_path=image_path, + confidence=confidence, + return_all_bubbles=False + ) + else: + # Assume YOLO format for other custom models + if custom_path and bd.load_model(custom_path): + detections = bd.detect_bubbles( + image_path, + confidence=confidence + ) + return { + 'text_bubbles': detections if detections else [], + 'text_free': [], + 'bubbles': [] + } + else: + # Standard YOLO model + if model_path and bd.load_model(model_path): + detections = bd.detect_bubbles( + image_path, + confidence=confidence + ) + return { + 'text_bubbles': detections if detections else [], + 'text_free': [], + 'bubbles': [] + } + return None + + def _ensure_google_client(self): + try: + if getattr(self, 'vision_client', None) is None: + from google.cloud import vision + google_path = self.ocr_config.get('google_credentials_path') if hasattr(self, 'ocr_config') else None + if google_path: + os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = google_path + self.vision_client = vision.ImageAnnotatorClient() + self._log("✅ Reinitialized Google Vision client", "debug") + except Exception as e: + self._log(f"❌ Failed to initialize Google Vision client: {e}", "error") + + def _ensure_azure_client(self): + try: + if getattr(self, 'vision_client', None) is None: + from azure.cognitiveservices.vision.computervision import ComputerVisionClient + from msrest.authentication import CognitiveServicesCredentials + key = None + endpoint = None + try: + key = (self.ocr_config or {}).get('azure_key') + endpoint = (self.ocr_config or {}).get('azure_endpoint') + except Exception: + pass + if not key: + key = self.main_gui.config.get('azure_vision_key', '') if hasattr(self, 'main_gui') else None + if not endpoint: + endpoint = self.main_gui.config.get('azure_vision_endpoint', '') if hasattr(self, 'main_gui') else None + if not key or not endpoint: + raise ValueError("Azure credentials missing for client init") + self.vision_client = ComputerVisionClient(endpoint, CognitiveServicesCredentials(key)) + self._log("✅ Reinitialized Azure Computer Vision client", "debug") + except Exception as e: + self._log(f"❌ Failed to initialize Azure CV client: {e}", "error") + + def detect_text_regions(self, image_path: str) -> List[TextRegion]: + """Detect text regions using configured OCR provider""" + # Reduce logging in batch mode + if not self.batch_mode: + self._log(f"🔍 Detecting text regions in: {os.path.basename(image_path)}") + self._log(f" Using OCR provider: {self.ocr_provider.upper()}") + else: + # Only show batch progress if batch_current is set properly + if hasattr(self, 'batch_current') and hasattr(self, 'batch_size'): + self._log(f"🔍 [{self.batch_current}/{self.batch_size}] {os.path.basename(image_path)}") + else: + self._log(f"🔍 Detecting text: {os.path.basename(image_path)}") + + try: + # ============================================================ + # CRITICAL: FORCE CLEAR ALL TEXT-RELATED CACHES + # This MUST happen for EVERY image to prevent text contamination + # NO EXCEPTIONS - batch mode or not, ALL caches get cleared + # ============================================================ + + # 1. Clear OCR ROI cache (prevents text from previous images leaking) + # THREAD-SAFE: Use lock to prevent race conditions in parallel panel translation + if hasattr(self, 'ocr_roi_cache'): + with self._cache_lock: + self.ocr_roi_cache.clear() + self._log("🧹 Cleared OCR ROI cache", "debug") + + # 2. Clear OCR manager caches (multiple potential cache locations) + if hasattr(self, 'ocr_manager') and self.ocr_manager: + # Clear last_results (can contain text from previous image) + if hasattr(self.ocr_manager, 'last_results'): + self.ocr_manager.last_results = None + # Clear generic cache + if hasattr(self.ocr_manager, 'cache'): + self.ocr_manager.cache.clear() + # Clear provider-level caches + if hasattr(self.ocr_manager, 'providers'): + for provider_name, provider in self.ocr_manager.providers.items(): + if hasattr(provider, 'last_results'): + provider.last_results = None + if hasattr(provider, 'cache'): + provider.cache.clear() + self._log("🧹 Cleared OCR manager caches", "debug") + + # 3. Clear bubble detector cache (can contain text region info) + if hasattr(self, 'bubble_detector') and self.bubble_detector: + if hasattr(self.bubble_detector, 'last_detections'): + self.bubble_detector.last_detections = None + if hasattr(self.bubble_detector, 'cache'): + self.bubble_detector.cache.clear() + self._log("🧹 Cleared bubble detector cache", "debug") + + # Get manga settings from main_gui config + manga_settings = self.main_gui.config.get('manga_settings', {}) + preprocessing = manga_settings.get('preprocessing', {}) + ocr_settings = manga_settings.get('ocr', {}) + + # Get text filtering settings + min_text_length = ocr_settings.get('min_text_length', 2) + exclude_english = ocr_settings.get('exclude_english_text', True) + confidence_threshold = ocr_settings.get('confidence_threshold', 0.1) + + # Load and preprocess image if enabled + if preprocessing.get('enabled', False): + self._log("📐 Preprocessing enabled - enhancing image quality") + processed_image_data = self._preprocess_image(image_path, preprocessing) + else: + # Read image with optional compression (separate from preprocessing) + try: + comp_cfg = (self.main_gui.config.get('manga_settings', {}) or {}).get('compression', {}) + if comp_cfg.get('enabled', False): + processed_image_data = self._load_image_with_compression_only(image_path, comp_cfg) + else: + with open(image_path, 'rb') as image_file: + processed_image_data = image_file.read() + except Exception: + with open(image_path, 'rb') as image_file: + processed_image_data = image_file.read() + + # Compute per-image hash for caching (based on uploaded bytes) + # CRITICAL FIX #1: Never allow None page_hash to prevent cache key collisions + try: + import hashlib + page_hash = hashlib.sha1(processed_image_data).hexdigest() + + # CRITICAL: Never allow None page_hash + if page_hash is None: + # Fallback: use image path + timestamp for uniqueness + import time + import uuid + page_hash = hashlib.sha1( + f"{image_path}_{time.time()}_{uuid.uuid4()}".encode() + ).hexdigest() + self._log("⚠️ Using fallback page hash for cache isolation", "warning") + + # CRITICAL: If image hash changed, force clear ROI cache + # THREAD-SAFE: Use lock for parallel panel translation + if hasattr(self, '_current_image_hash') and self._current_image_hash != page_hash: + if hasattr(self, 'ocr_roi_cache'): + with self._cache_lock: + self.ocr_roi_cache.clear() + self._log("🧹 Image changed - cleared ROI cache", "debug") + self._current_image_hash = page_hash + except Exception as e: + # Emergency fallback - never let page_hash be None + import uuid + page_hash = str(uuid.uuid4()) + self._current_image_hash = page_hash + self._log(f"⚠️ Page hash generation failed: {e}, using UUID fallback", "error") + + regions = [] + + # Route to appropriate provider + if self.ocr_provider == 'google': + # === GOOGLE CLOUD VISION === + # Ensure client exists (it might have been cleaned up between runs) + try: + self._ensure_google_client() + except Exception: + pass + + # Check if we should use RT-DETR for text region detection (NEW FEATURE) + # IMPORTANT: bubble_detection_enabled should default to True for optimal detection + if ocr_settings.get('bubble_detection_enabled', True) and ocr_settings.get('use_rtdetr_for_ocr_regions', True): + self._log("🎯 Using RT-DETR to guide Google Cloud Vision OCR") + + # Run RT-DETR to detect text regions first + _ = self._get_thread_bubble_detector() + rtdetr_detections = self._load_bubble_detector(ocr_settings, image_path) + + if rtdetr_detections: + # Collect all text-containing regions WITH TYPE TRACKING + all_regions = [] + # Track region type to assign bubble_type later + region_types = {} + idx = 0 + if 'text_bubbles' in rtdetr_detections: + for bbox in rtdetr_detections.get('text_bubbles', []): + all_regions.append(bbox) + region_types[idx] = 'text_bubble' + idx += 1 + if 'text_free' in rtdetr_detections: + for bbox in rtdetr_detections.get('text_free', []): + all_regions.append(bbox) + region_types[idx] = 'free_text' + idx += 1 + + if all_regions: + self._log(f"📊 RT-DETR detected {len(all_regions)} text regions, OCR-ing each with Google Vision") + + # Load image for cropping + import cv2 + cv_image = cv2.imread(image_path) + if cv_image is None: + self._log("⚠️ Failed to load image, falling back to full-page OCR", "warning") + else: + # Define worker function for concurrent OCR + def ocr_region_google(region_data): + i, region_idx, x, y, w, h = region_data + try: + # RATE LIMITING: Add small delay to avoid potential rate limits + # Google has high limits (1,800/min paid tier) but being conservative + import time + import random + time.sleep(0.1 + random.random() * 0.2) # 0.1-0.3s random delay + + # Crop region + cropped = self._safe_crop_region(cv_image, x, y, w, h) + if cropped is None: + return None + + # Validate and resize crop if needed (Google Vision requires minimum dimensions) + h_crop, w_crop = cropped.shape[:2] + MIN_SIZE = 50 # Minimum dimension (increased from 10 for better OCR) + MIN_AREA = 2500 # Minimum area (50x50) + + if h_crop < MIN_SIZE or w_crop < MIN_SIZE or h_crop * w_crop < MIN_AREA: + # Region too small - try to resize it + scale_w = MIN_SIZE / w_crop if w_crop < MIN_SIZE else 1.0 + scale_h = MIN_SIZE / h_crop if h_crop < MIN_SIZE else 1.0 + scale = max(scale_w, scale_h) + + if scale > 1.0: + new_w = int(w_crop * scale) + new_h = int(h_crop * scale) + cropped = cv2.resize(cropped, (new_w, new_h), interpolation=cv2.INTER_CUBIC) + self._log(f"🔍 Region {i} resized from {w_crop}x{h_crop}px to {new_w}x{new_h}px for OCR", "debug") + h_crop, w_crop = new_h, new_w + + # Final validation + if h_crop < 10 or w_crop < 10: + self._log(f"⚠️ Region {i} too small even after resize ({w_crop}x{h_crop}px), skipping", "debug") + return None + + # Encode cropped image + _, encoded = cv2.imencode('.jpg', cropped, [cv2.IMWRITE_JPEG_QUALITY, 95]) + region_image_data = encoded.tobytes() + + # Create Vision API image object + vision_image = vision.Image(content=region_image_data) + image_context = vision.ImageContext( + language_hints=ocr_settings.get('language_hints', ['ja', 'ko', 'zh']) + ) + + # Detect text in this region + detection_mode = ocr_settings.get('text_detection_mode', 'document') + if detection_mode == 'document': + response = self.vision_client.document_text_detection( + image=vision_image, + image_context=image_context + ) + else: + response = self.vision_client.text_detection( + image=vision_image, + image_context=image_context + ) + + if response.error.message: + self._log(f"⚠️ Region {i} error: {response.error.message}", "warning") + return None + + # Extract text from this region + region_text = response.full_text_annotation.text if response.full_text_annotation else "" + if region_text.strip(): + # Clean the text + region_text = self._fix_encoding_issues(region_text) + region_text = self._sanitize_unicode_characters(region_text) + region_text = region_text.strip() + + # Create TextRegion with original image coordinates + region = TextRegion( + text=region_text, + vertices=[(x, y), (x+w, y), (x+w, y+h), (x, y+h)], + bounding_box=(x, y, w, h), + confidence=0.9, # RT-DETR confidence + region_type='text_block' + ) + # Assign bubble_type from RT-DETR detection + region.bubble_type = region_types.get(region_idx, 'text_bubble') + if not getattr(self, 'concise_logs', False): + self._log(f"✅ Region {i}/{len(all_regions)} ({region.bubble_type}): {region_text[:50]}...") + return region + return None + + except Exception as e: + # Provide more detailed error info for debugging + error_msg = str(e) + if 'Bad Request' in error_msg or 'invalid' in error_msg.lower(): + self._log(f"⏭️ Skipping region {i}: Too small or invalid for Google Vision (dimensions < 10x10px or area < 100px²)", "debug") + else: + self._log(f"⚠️ Error OCR-ing region {i}: {e}", "warning") + return None + + # Process regions concurrently with RT-DETR concurrency control + from concurrent.futures import ThreadPoolExecutor, as_completed + # Use rtdetr_max_concurrency setting (default 12) to control parallel OCR calls + max_workers = min(ocr_settings.get('rtdetr_max_concurrency', 12), len(all_regions)) + + region_data_list = [(i+1, i, x, y, w, h) for i, (x, y, w, h) in enumerate(all_regions)] + + with ThreadPoolExecutor(max_workers=max_workers) as executor: + futures = {executor.submit(ocr_region_google, rd): rd for rd in region_data_list} + for future in as_completed(futures): + try: + result = future.result() + if result: + regions.append(result) + finally: + # Clean up future to free memory + del future + + # If we got results, sort and post-process + if regions: + # CRITICAL: Sort regions by position (top-to-bottom, left-to-right) + # Concurrent processing returns them in completion order, not detection order + regions.sort(key=lambda r: (r.bounding_box[1], r.bounding_box[0])) + self._log(f"✅ RT-DETR + Google Vision: {len(regions)} text regions detected (sorted by position)") + + # POST-PROCESS: Check for text_bubbles that overlap with free_text regions + # If a text_bubble's center is within a free_text bbox, reclassify it as free_text + free_text_bboxes = rtdetr_detections.get('text_free', []) + if free_text_bboxes: + reclassified_count = 0 + for region in regions: + if getattr(region, 'bubble_type', None) == 'text_bubble': + # Get region center + x, y, w, h = region.bounding_box + cx = x + w / 2 + cy = y + h / 2 + + self._log(f" Checking text_bubble '{region.text[:30]}...' at center ({cx:.0f}, {cy:.0f})", "debug") + + # Check if center is in any free_text bbox + for bbox_idx, (fx, fy, fw, fh) in enumerate(free_text_bboxes): + in_x = fx <= cx <= fx + fw + in_y = fy <= cy <= fy + fh + self._log(f" vs free_text bbox {bbox_idx+1}: in_x={in_x}, in_y={in_y}", "debug") + + if in_x and in_y: + # Reclassify as free text + old_type = region.bubble_type + region.bubble_type = 'free_text' + reclassified_count += 1 + self._log(f" ✅ RECLASSIFIED '{region.text[:30]}...' from {old_type} to free_text", "info") + break + + if reclassified_count > 0: + self._log(f"🔄 Reclassified {reclassified_count} overlapping regions as free_text", "info") + + # MERGE: Combine free_text regions that are within the same free_text bbox + # Group free_text regions by which free_text bbox they belong to + free_text_groups = {} + other_regions = [] + + for region in regions: + if getattr(region, 'bubble_type', None) == 'free_text': + # Find which free_text bbox this region belongs to + x, y, w, h = region.bounding_box + cx = x + w / 2 + cy = y + h / 2 + + for bbox_idx, (fx, fy, fw, fh) in enumerate(free_text_bboxes): + if fx <= cx <= fx + fw and fy <= cy <= fy + fh: + if bbox_idx not in free_text_groups: + free_text_groups[bbox_idx] = [] + free_text_groups[bbox_idx].append(region) + break + else: + # Free text region not in any bbox (shouldn't happen, but handle it) + other_regions.append(region) + else: + other_regions.append(region) + + # Merge each group of free_text regions + merged_free_text = [] + for bbox_idx, group in free_text_groups.items(): + if len(group) > 1: + # Merge multiple free text regions in same bbox + merged_text = " ".join(r.text for r in group) + + min_x = min(r.bounding_box[0] for r in group) + min_y = min(r.bounding_box[1] for r in group) + max_x = max(r.bounding_box[0] + r.bounding_box[2] for r in group) + max_y = max(r.bounding_box[1] + r.bounding_box[3] for r in group) + + all_vertices = [] + for r in group: + if hasattr(r, 'vertices') and r.vertices: + all_vertices.extend(r.vertices) + + if not all_vertices: + all_vertices = [ + (min_x, min_y), + (max_x, min_y), + (max_x, max_y), + (min_x, max_y) + ] + + merged_region = TextRegion( + text=merged_text, + vertices=all_vertices, + bounding_box=(min_x, min_y, max_x - min_x, max_y - min_y), + confidence=0.95, + region_type='text_block' + ) + merged_region.bubble_type = 'free_text' + merged_region.should_inpaint = True + merged_free_text.append(merged_region) + self._log(f"🔀 Merged {len(group)} free_text regions into one: '{merged_text[:50]}...'", "debug") + else: + # Single region, keep as-is + merged_free_text.extend(group) + + # Combine all regions + regions = other_regions + merged_free_text + self._log(f"✅ Final: {len(regions)} regions after reclassification and merging", "info") + + # Skip merging section and return directly + return regions + else: + self._log("⚠️ No text found in RT-DETR regions, falling back to full-page OCR", "warning") + + # If bubble detection is enabled and batch variables suggest batching, do ROI-based batched OCR + try: + use_roi_locality = ocr_settings.get('bubble_detection_enabled', False) and ocr_settings.get('roi_locality_enabled', False) + # Determine OCR batching enable + if 'ocr_batch_enabled' in ocr_settings: + ocr_batch_enabled = bool(ocr_settings.get('ocr_batch_enabled')) + else: + ocr_batch_enabled = (os.getenv('BATCH_OCR', '0') == '1') or (os.getenv('BATCH_TRANSLATION', '0') == '1') or getattr(self, 'batch_mode', False) + # Determine OCR batch size + bs = int(ocr_settings.get('ocr_batch_size') or 0) + if bs <= 0: + bs = int(os.getenv('OCR_BATCH_SIZE', '0') or 0) + if bs <= 0: + bs = int(os.getenv('BATCH_SIZE', str(getattr(self, 'batch_size', 1))) or 1) + ocr_batch_size = max(1, bs) + except Exception: + use_roi_locality = False + ocr_batch_enabled = False + ocr_batch_size = 1 + if use_roi_locality and (ocr_batch_enabled or ocr_batch_size > 1): + rois = self._prepare_ocr_rois_from_bubbles(image_path, ocr_settings, preprocessing, page_hash) + if rois: + # Determine concurrency for Google: OCR_MAX_CONCURRENCY env or min(BATCH_SIZE,2) + try: + max_cc = int(ocr_settings.get('ocr_max_concurrency') or 0) + if max_cc <= 0: + max_cc = int(os.getenv('OCR_MAX_CONCURRENCY', '0') or 0) + if max_cc <= 0: + max_cc = min(max(1, ocr_batch_size), 2) + except Exception: + max_cc = min(max(1, ocr_batch_size), 2) + regions = self._google_ocr_rois_batched(rois, ocr_settings, max(1, ocr_batch_size), max_cc, page_hash) + self._log(f"✅ Google OCR batched over {len(rois)} ROIs → {len(regions)} regions (cc={max_cc})", "info") + + # Force garbage collection after concurrent OCR to reduce memory spikes + try: + import gc + gc.collect() + except Exception: + pass + + return regions + + # Start local inpainter preload while Google OCR runs (background; multiple if panel-parallel) + try: + if not getattr(self, 'skip_inpainting', False) and not getattr(self, 'use_cloud_inpainting', False): + already_loaded, _lm = self._is_local_inpainter_loaded() + if not already_loaded: + import threading as _threading + local_method = (self.manga_settings.get('inpainting', {}) or {}).get('local_method', 'anime') + model_path = self.main_gui.config.get(f'manga_{local_method}_model_path', '') if hasattr(self, 'main_gui') else '' + adv = self.main_gui.config.get('manga_settings', {}).get('advanced', {}) if hasattr(self, 'main_gui') else {} + # Determine desired instances from panel-parallel settings + desired = 1 + if adv.get('parallel_panel_translation', False): + try: + desired = max(1, int(adv.get('panel_max_workers', 2))) + except Exception: + desired = 2 + # Honor advanced toggle for panel-local preload; for non-panel (desired==1) always allow + allow = True if desired == 1 else bool(adv.get('preload_local_inpainting_for_panels', True)) + if allow: + self._inpaint_preload_event = _threading.Event() + def _preload_inp_many(): + try: + self.preload_local_inpainters_concurrent(local_method, model_path, desired) + finally: + try: + self._inpaint_preload_event.set() + except Exception: + pass + _threading.Thread(target=_preload_inp_many, name="InpaintPreload@GoogleOCR", daemon=True).start() + except Exception: + pass + + # Create Vision API image object (full-page fallback) + image = vision.Image(content=processed_image_data) + + # Build image context with all parameters + image_context = vision.ImageContext( + language_hints=ocr_settings.get('language_hints', ['ja', 'ko', 'zh']) + ) + + # Add text detection params if available in your API version + if hasattr(vision, 'TextDetectionParams'): + image_context.text_detection_params = vision.TextDetectionParams( + enable_text_detection_confidence_score=True + ) + + # Configure text detection based on settings + detection_mode = ocr_settings.get('text_detection_mode', 'document') + + if detection_mode == 'document': + response = self.vision_client.document_text_detection( + image=image, + image_context=image_context + ) + else: + response = self.vision_client.text_detection( + image=image, + image_context=image_context + ) + + if response.error.message: + raise Exception(f"Cloud Vision API error: {response.error.message}") + + # Process each page (usually just one for manga) + for page in response.full_text_annotation.pages: + for block in page.blocks: + # Extract text first to check if it's worth processing + block_text = "" + total_confidence = 0.0 + word_count = 0 + + for paragraph in block.paragraphs: + for word in paragraph.words: + # Get word-level confidence (more reliable than block level) + word_confidence = getattr(word, 'confidence', 0.0) # Default to 0 if not available + word_text = ''.join([symbol.text for symbol in word.symbols]) + + # Only include words above threshold + if word_confidence >= confidence_threshold: + block_text += word_text + " " + total_confidence += word_confidence + word_count += 1 + else: + if not getattr(self, 'concise_logs', False): + self._log(f" Skipping low confidence word ({word_confidence:.2f}): {word_text}") + + block_text = block_text.strip() + + # CLEAN ORIGINAL OCR TEXT - Fix cube characters and encoding issues + original_text = block_text + block_text = self._fix_encoding_issues(block_text) + block_text = self._sanitize_unicode_characters(block_text) + + # Log cleaning if changes were made + if block_text != original_text: + self._log(f"🧹 Cleaned OCR text: '{original_text[:30]}...' → '{block_text[:30]}...'", "debug") + + # TEXT FILTERING SECTION + # Skip if text is too short (after cleaning) + if len(block_text.strip()) < min_text_length: + if not getattr(self, 'concise_logs', False): + self._log(f" Skipping short text ({len(block_text)} chars): {block_text}") + continue + + # Skip if primarily English and exclude_english is enabled + if exclude_english and self._is_primarily_english(block_text): + if not getattr(self, 'concise_logs', False): + self._log(f" Skipping English text: {block_text[:50]}...") + continue + + # Skip if no confident words found + if word_count == 0 or not block_text: + if not getattr(self, 'concise_logs', False): + self._log(f" Skipping block - no words above threshold {confidence_threshold}") + continue + + # Calculate average confidence for the block + avg_confidence = total_confidence / word_count if word_count > 0 else 0.0 + + # Extract vertices and create region + vertices = [(v.x, v.y) for v in block.bounding_box.vertices] + + # Calculate bounding box + xs = [v[0] for v in vertices] + ys = [v[1] for v in vertices] + x_min, x_max = min(xs), max(xs) + y_min, y_max = min(ys), max(ys) + + region = TextRegion( + text=block_text, + vertices=vertices, + bounding_box=(x_min, y_min, x_max - x_min, y_max - y_min), + confidence=avg_confidence, # Use average confidence + region_type='text_block' + ) + regions.append(region) + if not getattr(self, 'concise_logs', False): + self._log(f" Found text region ({avg_confidence:.2f}): {block_text[:50]}...") + + elif self.ocr_provider == 'azure': + # === AZURE COMPUTER VISION === + # Ensure client exists (it might have been cleaned up between runs) + try: + self._ensure_azure_client() + except Exception: + pass + import io + import time + from azure.cognitiveservices.vision.computervision.models import OperationStatusCodes + + # Check if we should use RT-DETR for text region detection (NEW FEATURE) + if ocr_settings.get('bubble_detection_enabled', False) and ocr_settings.get('use_rtdetr_for_ocr_regions', True): + self._log("🎯 Using RT-DETR to guide Azure Computer Vision OCR") + + # Run RT-DETR to detect text regions first + _ = self._get_thread_bubble_detector() + rtdetr_detections = self._load_bubble_detector(ocr_settings, image_path) + + if rtdetr_detections: + # Collect all text-containing regions WITH TYPE TRACKING + all_regions = [] + # Track region type to assign bubble_type later + region_types = {} + idx = 0 + if 'text_bubbles' in rtdetr_detections: + for bbox in rtdetr_detections.get('text_bubbles', []): + all_regions.append(bbox) + region_types[idx] = 'text_bubble' + idx += 1 + if 'text_free' in rtdetr_detections: + for bbox in rtdetr_detections.get('text_free', []): + all_regions.append(bbox) + region_types[idx] = 'free_text' + idx += 1 + + if all_regions: + self._log(f"📊 RT-DETR detected {len(all_regions)} text regions, OCR-ing each with Azure Vision") + + # Load image for cropping + import cv2 + cv_image = cv2.imread(image_path) + if cv_image is None: + self._log("⚠️ Failed to load image, falling back to full-page OCR", "warning") + else: + ocr_results = [] + + # Get Azure settings + azure_reading_order = ocr_settings.get('azure_reading_order', 'natural') + azure_model_version = ocr_settings.get('azure_model_version', 'latest') + azure_max_wait = ocr_settings.get('azure_max_wait', 60) + azure_poll_interval = ocr_settings.get('azure_poll_interval', 1.0) + + # Define worker function for concurrent OCR + def ocr_region_azure(region_data): + i, region_idx, x, y, w, h = region_data + try: + # Crop region + cropped = self._safe_crop_region(cv_image, x, y, w, h) + if cropped is None: + return None + + # Validate and resize crop if needed (Azure Vision requires minimum dimensions) + h_crop, w_crop = cropped.shape[:2] + MIN_SIZE = 50 # Minimum dimension (Azure requirement) + MIN_AREA = 2500 # Minimum area (50x50) + + if h_crop < MIN_SIZE or w_crop < MIN_SIZE or h_crop * w_crop < MIN_AREA: + # Region too small - try to resize it + scale_w = MIN_SIZE / w_crop if w_crop < MIN_SIZE else 1.0 + scale_h = MIN_SIZE / h_crop if h_crop < MIN_SIZE else 1.0 + scale = max(scale_w, scale_h) + + if scale > 1.0: + new_w = int(w_crop * scale) + new_h = int(h_crop * scale) + cropped = cv2.resize(cropped, (new_w, new_h), interpolation=cv2.INTER_CUBIC) + self._log(f"🔍 Region {i} resized from {w_crop}x{h_crop}px to {new_w}x{new_h}px for Azure OCR", "debug") + h_crop, w_crop = new_h, new_w + + # Final validation + if h_crop < 10 or w_crop < 10: + self._log(f"⚠️ Region {i} too small even after resize ({w_crop}x{h_crop}px), skipping", "debug") + return None + + # RATE LIMITING: Add delay between Azure API calls to avoid "Too Many Requests" + # Azure Free tier: 20 calls/minute = 1 call per 3 seconds + # Azure Standard tier: Higher limits but still needs throttling + import time + import random + # Stagger requests with randomized delay (0.1-0.3 seconds) + time.sleep(0.1 + random.random() * 0.2) # 0.1-0.3s random delay + + # Encode cropped image + _, encoded = cv2.imencode('.jpg', cropped, [cv2.IMWRITE_JPEG_QUALITY, 95]) + region_image_bytes = encoded.tobytes() + + # Call Azure Read API + read_response = self.vision_client.read_in_stream( + io.BytesIO(region_image_bytes), + language=ocr_settings.get('language_hints', ['ja'])[0] if ocr_settings.get('language_hints') else 'ja', + model_version=azure_model_version, + reading_order=azure_reading_order, + raw=True + ) + + # Get operation location + operation_location = read_response.headers['Operation-Location'] + operation_id = operation_location.split('/')[-1] + + # Poll for result + start_time = time.time() + while True: + result = self.vision_client.get_read_result(operation_id) + if result.status not in [OperationStatusCodes.not_started, OperationStatusCodes.running]: + break + if time.time() - start_time > azure_max_wait: + self._log(f"⚠️ Azure timeout for region {i}", "warning") + break + time.sleep(azure_poll_interval) + + if result.status == OperationStatusCodes.succeeded: + # Extract text from result + region_text = "" + for text_result in result.analyze_result.read_results: + for line in text_result.lines: + region_text += line.text + "\n" + + region_text = region_text.strip() + if region_text: + # Clean the text + region_text = self._fix_encoding_issues(region_text) + region_text = self._sanitize_unicode_characters(region_text) + + # Create TextRegion with original image coordinates + region = TextRegion( + text=region_text, + vertices=[(x, y), (x+w, y), (x+w, y+h), (x, y+h)], + bounding_box=(x, y, w, h), + confidence=0.9, # RT-DETR confidence + region_type='text_block' + ) + # Assign bubble_type from RT-DETR detection + region.bubble_type = region_types.get(region_idx, 'text_bubble') + if not getattr(self, 'concise_logs', False): + self._log(f"✅ Region {i}/{len(all_regions)} ({region.bubble_type}): {region_text[:50]}...") + return region + return None + + except Exception as e: + # Provide more detailed error info for debugging + error_msg = str(e) + if 'Bad Request' in error_msg or 'invalid' in error_msg.lower() or 'Too Many Requests' in error_msg: + if 'Too Many Requests' in error_msg: + self._log(f"⏸️ Region {i}: Azure rate limit hit, consider increasing delays", "warning") + else: + self._log(f"⏭️ Skipping region {i}: Too small or invalid for Azure Vision", "debug") + else: + self._log(f"⚠️ Error OCR-ing region {i}: {e}", "warning") + return None + + # Process regions concurrently with RT-DETR concurrency control + from concurrent.futures import ThreadPoolExecutor, as_completed + # Use rtdetr_max_concurrency setting (default 12) + # Note: Rate limiting is handled via 0.1-0.3s delays per request + max_workers = min(ocr_settings.get('rtdetr_max_concurrency', 12), len(all_regions)) + + region_data_list = [(i+1, i, x, y, w, h) for i, (x, y, w, h) in enumerate(all_regions)] + + with ThreadPoolExecutor(max_workers=max_workers) as executor: + futures = {executor.submit(ocr_region_azure, rd): rd for rd in region_data_list} + for future in as_completed(futures): + try: + result = future.result() + if result: + regions.append(result) + finally: + # Clean up future to free memory + del future + + # If we got results, sort and post-process + if regions: + # CRITICAL: Sort regions by position (top-to-bottom, left-to-right) + # Concurrent processing returns them in completion order, not detection order + regions.sort(key=lambda r: (r.bounding_box[1], r.bounding_box[0])) + self._log(f"✅ RT-DETR + Azure Vision: {len(regions)} text regions detected (sorted by position)") + + # POST-PROCESS: Check for text_bubbles that overlap with free_text regions + # If a text_bubble's center is within a free_text bbox, reclassify it as free_text + free_text_bboxes = rtdetr_detections.get('text_free', []) + + # DEBUG: Log what we have + self._log(f"🔍 POST-PROCESS: Found {len(free_text_bboxes)} free_text bboxes from RT-DETR", "debug") + for idx, (fx, fy, fw, fh) in enumerate(free_text_bboxes): + self._log(f" Free text bbox {idx+1}: x={fx:.0f}, y={fy:.0f}, w={fw:.0f}, h={fh:.0f}", "debug") + + text_bubble_count = sum(1 for r in regions if getattr(r, 'bubble_type', None) == 'text_bubble') + free_text_count = sum(1 for r in regions if getattr(r, 'bubble_type', None) == 'free_text') + self._log(f"🔍 Before reclassification: {text_bubble_count} text_bubbles, {free_text_count} free_text", "debug") + + if free_text_bboxes: + reclassified_count = 0 + for region in regions: + if getattr(region, 'bubble_type', None) == 'text_bubble': + # Get region center + x, y, w, h = region.bounding_box + cx = x + w / 2 + cy = y + h / 2 + + self._log(f" Checking text_bubble '{region.text[:30]}...' at center ({cx:.0f}, {cy:.0f})", "debug") + + # Check if center is in any free_text bbox + for bbox_idx, (fx, fy, fw, fh) in enumerate(free_text_bboxes): + in_x = fx <= cx <= fx + fw + in_y = fy <= cy <= fy + fh + self._log(f" vs free_text bbox {bbox_idx+1}: in_x={in_x}, in_y={in_y}", "debug") + + if in_x and in_y: + # Reclassify as free text + old_type = region.bubble_type + region.bubble_type = 'free_text' + reclassified_count += 1 + self._log(f" ✅ RECLASSIFIED '{region.text[:30]}...' from {old_type} to free_text", "info") + break + + if reclassified_count > 0: + self._log(f"🔄 Reclassified {reclassified_count} overlapping regions as free_text", "info") + + # MERGE: Combine free_text regions that are within the same free_text bbox + # Group free_text regions by which free_text bbox they belong to + free_text_groups = {} + other_regions = [] + + for region in regions: + if getattr(region, 'bubble_type', None) == 'free_text': + # Find which free_text bbox this region belongs to + x, y, w, h = region.bounding_box + cx = x + w / 2 + cy = y + h / 2 + + for bbox_idx, (fx, fy, fw, fh) in enumerate(free_text_bboxes): + if fx <= cx <= fx + fw and fy <= cy <= fy + fh: + if bbox_idx not in free_text_groups: + free_text_groups[bbox_idx] = [] + free_text_groups[bbox_idx].append(region) + break + else: + # Free text region not in any bbox (shouldn't happen, but handle it) + other_regions.append(region) + else: + other_regions.append(region) + + # Merge each group of free_text regions + merged_free_text = [] + for bbox_idx, group in free_text_groups.items(): + if len(group) > 1: + # Merge multiple free text regions in same bbox + merged_text = " ".join(r.text for r in group) + + min_x = min(r.bounding_box[0] for r in group) + min_y = min(r.bounding_box[1] for r in group) + max_x = max(r.bounding_box[0] + r.bounding_box[2] for r in group) + max_y = max(r.bounding_box[1] + r.bounding_box[3] for r in group) + + all_vertices = [] + for r in group: + if hasattr(r, 'vertices') and r.vertices: + all_vertices.extend(r.vertices) + + if not all_vertices: + all_vertices = [ + (min_x, min_y), + (max_x, min_y), + (max_x, max_y), + (min_x, max_y) + ] + + merged_region = TextRegion( + text=merged_text, + vertices=all_vertices, + bounding_box=(min_x, min_y, max_x - min_x, max_y - min_y), + confidence=0.95, + region_type='text_block' + ) + merged_region.bubble_type = 'free_text' + merged_region.should_inpaint = True + merged_free_text.append(merged_region) + self._log(f"🔀 Merged {len(group)} free_text regions into one: '{merged_text[:50]}...'", "debug") + else: + # Single region, keep as-is + merged_free_text.extend(group) + + # Combine all regions + regions = other_regions + merged_free_text + self._log(f"✅ Final: {len(regions)} regions after reclassification and merging", "info") + + # Skip merging section and return directly + return regions + else: + self._log("⚠️ No text found in RT-DETR regions, falling back to full-page OCR", "warning") + + # ROI-based concurrent OCR when bubble detection is enabled and batching is requested + try: + use_roi_locality = ocr_settings.get('bubble_detection_enabled', False) and ocr_settings.get('roi_locality_enabled', False) + if 'ocr_batch_enabled' in ocr_settings: + ocr_batch_enabled = bool(ocr_settings.get('ocr_batch_enabled')) + else: + ocr_batch_enabled = (os.getenv('BATCH_OCR', '0') == '1') or (os.getenv('BATCH_TRANSLATION', '0') == '1') or getattr(self, 'batch_mode', False) + bs = int(ocr_settings.get('ocr_batch_size') or 0) + if bs <= 0: + bs = int(os.getenv('OCR_BATCH_SIZE', '0') or 0) + if bs <= 0: + bs = int(os.getenv('BATCH_SIZE', str(getattr(self, 'batch_size', 1))) or 1) + ocr_batch_size = max(1, bs) + except Exception: + use_roi_locality = False + ocr_batch_enabled = False + ocr_batch_size = 1 + if use_roi_locality and (ocr_batch_enabled or ocr_batch_size > 1): + rois = self._prepare_ocr_rois_from_bubbles(image_path, ocr_settings, preprocessing, page_hash) + if rois: + # AZURE RATE LIMITING: Force low concurrency to prevent "Too Many Requests" + # Azure has strict rate limits that vary by tier: + # - Free tier: 20 requests/minute + # - Standard tier: Higher but still limited + try: + azure_workers = int(ocr_settings.get('ocr_max_concurrency') or 0) + if azure_workers <= 0: + azure_workers = 1 # Force sequential by default + else: + azure_workers = min(2, max(1, azure_workers)) # Cap at 2 max + except Exception: + azure_workers = 1 # Safe default + regions = self._azure_ocr_rois_concurrent(rois, ocr_settings, azure_workers, page_hash) + self._log(f"✅ Azure OCR concurrent over {len(rois)} ROIs → {len(regions)} regions (workers={azure_workers})", "info") + + # Force garbage collection after concurrent OCR to reduce memory spikes + try: + import gc + gc.collect() + except Exception: + pass + + return regions + + # Start local inpainter preload while Azure OCR runs (background; multiple if panel-parallel) + try: + if not getattr(self, 'skip_inpainting', False) and not getattr(self, 'use_cloud_inpainting', False): + already_loaded, _lm = self._is_local_inpainter_loaded() + if not already_loaded: + import threading as _threading + local_method = (self.manga_settings.get('inpainting', {}) or {}).get('local_method', 'anime') + model_path = self.main_gui.config.get(f'manga_{local_method}_model_path', '') if hasattr(self, 'main_gui') else '' + adv = self.main_gui.config.get('manga_settings', {}).get('advanced', {}) if hasattr(self, 'main_gui') else {} + desired = 1 + if adv.get('parallel_panel_translation', False): + try: + desired = max(1, int(adv.get('panel_max_workers', 2))) + except Exception: + desired = 2 + allow = True if desired == 1 else bool(adv.get('preload_local_inpainting_for_panels', True)) + if allow: + self._inpaint_preload_event = _threading.Event() + def _preload_inp_many(): + try: + self.preload_local_inpainters_concurrent(local_method, model_path, desired) + finally: + try: + self._inpaint_preload_event.set() + except Exception: + pass + _threading.Thread(target=_preload_inp_many, name="InpaintPreload@AzureOCR", daemon=True).start() + except Exception: + pass + + # Ensure Azure-supported format for the BYTES we are sending. + # If compression is enabled and produced an Azure-supported format (JPEG/PNG/BMP/TIFF), + # DO NOT force-convert to PNG. Only convert when the current bytes are in an unsupported format. + file_ext = os.path.splitext(image_path)[1].lower() + azure_supported_exts = ['.jpg', '.jpeg', '.png', '.bmp', '.pdf', '.tiff'] + azure_supported_fmts = ['jpeg', 'jpg', 'png', 'bmp', 'tiff'] + + # Probe the actual byte format we will upload + try: + from PIL import Image as _PILImage + img_probe = _PILImage.open(io.BytesIO(processed_image_data)) + fmt = (img_probe.format or '').lower() + except Exception: + fmt = '' + + # If original is a PDF, allow as-is (Azure supports PDF streams) + if file_ext == '.pdf': + needs_convert = False + else: + # Decide based on the detected format of the processed bytes + needs_convert = fmt not in azure_supported_fmts + + if needs_convert: + # If compression settings are enabled and target format is Azure-supported, prefer that + try: + comp_cfg = (self.main_gui.config.get('manga_settings', {}) or {}).get('compression', {}) + except Exception: + comp_cfg = {} + + # Determine if conversion is actually needed based on compression and current format + try: + from PIL import Image as _PILImage + img2 = _PILImage.open(io.BytesIO(processed_image_data)) + fmt_lower = (img2.format or '').lower() + except Exception: + img2 = None + fmt_lower = '' + + accepted = {'jpeg', 'jpg', 'png', 'bmp', 'tiff'} + convert_needed = False + target_fmt = None + + if comp_cfg.get('enabled', False): + cf = str(comp_cfg.get('format', '')).lower() + desired = None + if cf in ('jpeg', 'jpg'): + desired = 'JPEG' + elif cf == 'png': + desired = 'PNG' + elif cf == 'bmp': + desired = 'BMP' + elif cf == 'tiff': + desired = 'TIFF' + # If WEBP or others, desired remains None and we fall back to PNG only if unsupported + + if desired is not None: + # Skip conversion if already in the desired supported format + already_matches = ((fmt_lower in ('jpeg', 'jpg') and desired == 'JPEG') or (fmt_lower == desired.lower())) + if not already_matches: + convert_needed = True + target_fmt = desired + else: + # Compression format not supported by Azure (e.g., WEBP); convert only if unsupported + if fmt_lower not in accepted: + convert_needed = True + target_fmt = 'PNG' + else: + # No compression preference; convert only if unsupported by Azure + if fmt_lower not in accepted: + convert_needed = True + target_fmt = 'PNG' + + if convert_needed: + self._log(f"⚠️ Converting image to {target_fmt} for Azure compatibility") + try: + if img2 is None: + from PIL import Image as _PILImage + img2 = _PILImage.open(io.BytesIO(processed_image_data)) + buffer = io.BytesIO() + if target_fmt == 'JPEG' and img2.mode != 'RGB': + img2 = img2.convert('RGB') + img2.save(buffer, format=target_fmt) + processed_image_data = buffer.getvalue() + except Exception: + pass + + # Create stream from image data + image_stream = io.BytesIO(processed_image_data) + + # Get Azure-specific settings + reading_order = ocr_settings.get('azure_reading_order', 'natural') + model_version = ocr_settings.get('azure_model_version', 'latest') + max_wait = ocr_settings.get('azure_max_wait', 60) + poll_interval = ocr_settings.get('azure_poll_interval', 0.5) + + # Map language hints to Azure language codes + language_hints = ocr_settings.get('language_hints', ['ja', 'ko', 'zh']) + + # Build parameters dictionary + read_params = { + 'raw': True, + 'readingOrder': reading_order + } + + # Add model version if not using latest + if model_version != 'latest': + read_params['model-version'] = model_version + + # Use language parameter only if single language is selected + if len(language_hints) == 1: + azure_lang = language_hints[0] + # Map to Azure language codes + lang_mapping = { + 'zh': 'zh-Hans', + 'zh-TW': 'zh-Hant', + 'zh-CN': 'zh-Hans', + 'ja': 'ja', + 'ko': 'ko', + 'en': 'en' + } + azure_lang = lang_mapping.get(azure_lang, azure_lang) + read_params['language'] = azure_lang + self._log(f" Using Azure Read API with language: {azure_lang}, order: {reading_order}") + else: + self._log(f" Using Azure Read API (auto-detect for {len(language_hints)} languages, order: {reading_order})") + + # Start Read operation with error handling and rate limit retry + # Use max_retries from config (default 7, configurable in Other Settings) + max_retries = self.main_gui.config.get('max_retries', 7) + retry_delay = 60 # Start with 60 seconds for rate limits + read_response = None + + for retry_attempt in range(max_retries): + try: + # Ensure client is alive before starting + if getattr(self, 'vision_client', None) is None: + self._log("⚠️ Azure client missing before read; reinitializing...", "warning") + self._ensure_azure_client() + if getattr(self, 'vision_client', None) is None: + raise RuntimeError("Azure Computer Vision client is not initialized. Check your key/endpoint and azure-cognitiveservices-vision-computervision installation.") + + # Reset stream position for retry + image_stream.seek(0) + + read_response = self.vision_client.read_in_stream( + image_stream, + **read_params + ) + # Success! Break out of retry loop + break + + except Exception as e: + error_msg = str(e) + + # Handle rate limit errors with fixed 60s wait + if 'Too Many Requests' in error_msg or '429' in error_msg: + if retry_attempt < max_retries - 1: + wait_time = retry_delay # Fixed 60s wait each time + self._log(f"⚠️ Azure rate limit hit. Waiting {wait_time}s before retry {retry_attempt + 1}/{max_retries}...", "warning") + time.sleep(wait_time) + continue + else: + self._log(f"❌ Azure rate limit: Exhausted {max_retries} retries", "error") + raise + + # Handle bad request errors + elif 'Bad Request' in error_msg: + self._log("⚠️ Azure Read API Bad Request - likely invalid image format or too small. Retrying without language parameter...", "warning") + # Retry without language parameter + image_stream.seek(0) + read_params.pop('language', None) + if getattr(self, 'vision_client', None) is None: + self._ensure_azure_client() + read_response = self.vision_client.read_in_stream( + image_stream, + **read_params + ) + break + else: + raise + + if read_response is None: + raise RuntimeError("Failed to get response from Azure Read API after retries") + + # Get operation ID + operation_location = read_response.headers.get("Operation-Location") if hasattr(read_response, 'headers') else None + if not operation_location: + raise RuntimeError("Azure Read API did not return Operation-Location header") + operation_id = operation_location.split("/")[-1] + + # Poll for results with configurable timeout + self._log(f" Waiting for Azure OCR to complete (max {max_wait}s)...") + wait_time = 0 + last_status = None + result = None + + while wait_time < max_wait: + try: + if getattr(self, 'vision_client', None) is None: + # Client got cleaned up mid-poll; reinitialize and continue + self._log("⚠️ Azure client became None during polling; reinitializing...", "warning") + self._ensure_azure_client() + if getattr(self, 'vision_client', None) is None: + raise AttributeError("Azure client lost and could not be reinitialized") + result = self.vision_client.get_read_result(operation_id) + except AttributeError as e: + # Defensive: reinitialize once and retry this iteration + self._log(f"⚠️ {e} — reinitializing Azure client and retrying once", "warning") + self._ensure_azure_client() + if getattr(self, 'vision_client', None) is None: + raise + result = self.vision_client.get_read_result(operation_id) + + # Log status changes + if result.status != last_status: + self._log(f" Status: {result.status}") + last_status = result.status + + if result.status not in [OperationStatusCodes.running, OperationStatusCodes.not_started]: + break + + time.sleep(poll_interval) + self._log("💤 Azure OCR polling pausing briefly for stability", "debug") + wait_time += poll_interval + + if not result: + raise RuntimeError("Azure Read API polling did not return a result") + if result.status == OperationStatusCodes.succeeded: + # Track statistics + total_lines = 0 + handwritten_lines = 0 + + for page_num, page in enumerate(result.analyze_result.read_results): + if len(result.analyze_result.read_results) > 1: + self._log(f" Processing page {page_num + 1}/{len(result.analyze_result.read_results)}") + + for line in page.lines: + # CLEAN ORIGINAL OCR TEXT FOR AZURE - Fix cube characters and encoding issues + original_azure_text = line.text + cleaned_line_text = self._fix_encoding_issues(line.text) + cleaned_line_text = self._sanitize_unicode_characters(cleaned_line_text) + + # Log cleaning if changes were made + if cleaned_line_text != original_azure_text: + self._log(f"🧹 Cleaned Azure OCR text: '{original_azure_text[:30]}...' → '{cleaned_line_text[:30]}...'", "debug") + + # TEXT FILTERING FOR AZURE + # Skip if text is too short (after cleaning) + if len(cleaned_line_text.strip()) < min_text_length: + if not getattr(self, 'concise_logs', False): + self._log(f" Skipping short text ({len(cleaned_line_text)} chars): {cleaned_line_text}") + continue + + # Skip if primarily English and exclude_english is enabled (use cleaned text) + if exclude_english and self._is_primarily_english(cleaned_line_text): + if not getattr(self, 'concise_logs', False): + self._log(f" Skipping English text: {cleaned_line_text[:50]}...") + continue + + # Azure provides 8-point bounding box + bbox = line.bounding_box + vertices = [ + (bbox[0], bbox[1]), + (bbox[2], bbox[3]), + (bbox[4], bbox[5]), + (bbox[6], bbox[7]) + ] + + # Calculate rectangular bounding box + xs = [v[0] for v in vertices] + ys = [v[1] for v in vertices] + x_min, x_max = min(xs), max(xs) + y_min, y_max = min(ys), max(ys) + + # Calculate confidence from word-level data + confidence = 0.95 # Default high confidence + + if hasattr(line, 'words') and line.words: + # Calculate average confidence from words + confidences = [] + for word in line.words: + if hasattr(word, 'confidence'): + confidences.append(word.confidence) + + if confidences: + confidence = sum(confidences) / len(confidences) + if not getattr(self, 'concise_logs', False): + self._log(f" Line has {len(line.words)} words, avg confidence: {confidence:.3f}") + + # Check for handwriting style (if available) + style = 'print' # Default + style_confidence = None + + if hasattr(line, 'appearance') and line.appearance: + if hasattr(line.appearance, 'style'): + style_info = line.appearance.style + if hasattr(style_info, 'name'): + style = style_info.name + if style == 'handwriting': + handwritten_lines += 1 + if hasattr(style_info, 'confidence'): + style_confidence = style_info.confidence + if not getattr(self, 'concise_logs', False): + self._log(f" Style: {style} (confidence: {style_confidence:.2f})") + + # Apply confidence threshold filtering + if confidence >= confidence_threshold: + region = TextRegion( + text=cleaned_line_text, # Use cleaned text instead of original + vertices=vertices, + bounding_box=(x_min, y_min, x_max - x_min, y_max - y_min), + confidence=confidence, + region_type='text_line' + ) + + # Add extra attributes for Azure-specific info + region.style = style + region.style_confidence = style_confidence + + regions.append(region) + total_lines += 1 + + # More detailed logging (use cleaned text) + if not getattr(self, 'concise_logs', False): + if style == 'handwriting': + self._log(f" Found handwritten text ({confidence:.2f}): {cleaned_line_text[:50]}...") + else: + self._log(f" Found text region ({confidence:.2f}): {cleaned_line_text[:50]}...") + else: + if not getattr(self, 'concise_logs', False): + self._log(f" Skipping low confidence text ({confidence:.2f}): {cleaned_line_text[:30]}...") + + # Log summary statistics + if total_lines > 0 and not getattr(self, 'concise_logs', False): + self._log(f" Total lines detected: {total_lines}") + if handwritten_lines > 0: + self._log(f" Handwritten lines: {handwritten_lines} ({handwritten_lines/total_lines*100:.1f}%)") + + elif result.status == OperationStatusCodes.failed: + # More detailed error handling + error_msg = "Azure OCR failed" + if hasattr(result, 'message'): + error_msg += f": {result.message}" + if hasattr(result.analyze_result, 'errors') and result.analyze_result.errors: + for error in result.analyze_result.errors: + self._log(f" Error: {error}", "error") + raise Exception(error_msg) + else: + # Timeout or other status + raise Exception(f"Azure OCR ended with status: {result.status} after {wait_time}s") + + else: + # === NEW OCR PROVIDERS === + import cv2 + import numpy as np + from ocr_manager import OCRManager + + # Load image as numpy array + if isinstance(processed_image_data, bytes): + # Convert bytes to numpy array + nparr = np.frombuffer(processed_image_data, np.uint8) + image = cv2.imdecode(nparr, cv2.IMREAD_COLOR) + else: + # Load from file path + image = cv2.imread(image_path) + if image is None: + # Try with PIL for Unicode paths + from PIL import Image as PILImage + pil_image = PILImage.open(image_path) + image = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR) + + # Ensure OCR manager is available + if not hasattr(self, 'ocr_manager') or self.ocr_manager is None: + try: + # Prefer GUI-provided manager if available + if hasattr(self, 'main_gui') and hasattr(self.main_gui, 'ocr_manager') and self.main_gui.ocr_manager is not None: + self.ocr_manager = self.main_gui.ocr_manager + else: + from ocr_manager import OCRManager + self.ocr_manager = OCRManager(log_callback=self.log_callback) + self._log("Initialized internal OCRManager instance", "info") + except Exception as _e: + self.ocr_manager = None + self._log(f"Failed to initialize OCRManager: {str(_e)}", "error") + if self.ocr_manager is None: + raise RuntimeError("OCRManager is not available; cannot proceed with OCR provider.") + + # Check provider status and load if needed + provider_status = self.ocr_manager.check_provider_status(self.ocr_provider) + + if not provider_status['installed']: + self._log(f"❌ {self.ocr_provider} is not installed", "error") + self._log(f" Please install it from the GUI settings", "error") + raise Exception(f"{self.ocr_provider} OCR provider is not installed") + + # Start local inpainter preload while provider is being readied/used (non-cloud path only; background) + try: + if not getattr(self, 'skip_inpainting', False) and not getattr(self, 'use_cloud_inpainting', False): + already_loaded, _lm = self._is_local_inpainter_loaded() + if not already_loaded: + import threading as _threading + local_method = (self.manga_settings.get('inpainting', {}) or {}).get('local_method', 'anime') + model_path = self.main_gui.config.get(f'manga_{local_method}_model_path', '') if hasattr(self, 'main_gui') else '' + adv = self.main_gui.config.get('manga_settings', {}).get('advanced', {}) if hasattr(self, 'main_gui') else {} + desired = 1 + if adv.get('parallel_panel_translation', False): + try: + desired = max(1, int(adv.get('panel_max_workers', 2))) + except Exception: + desired = 2 + allow = True if desired == 1 else bool(adv.get('preload_local_inpainting_for_panels', True)) + if allow: + self._inpaint_preload_event = _threading.Event() + def _preload_inp_many(): + try: + self.preload_local_inpainters_concurrent(local_method, model_path, desired) + finally: + try: + self._inpaint_preload_event.set() + except Exception: + pass + _threading.Thread(target=_preload_inp_many, name="InpaintPreload@OCRProvider", daemon=True).start() + except Exception: + pass + + if not provider_status['loaded']: + # Check if Qwen2-VL - if it's supposedly not loaded but actually is, skip + if self.ocr_provider == 'Qwen2-VL': + provider = self.ocr_manager.get_provider('Qwen2-VL') + if provider and hasattr(provider, 'model') and provider.model is not None: + self._log("✅ Qwen2-VL model actually already loaded, skipping reload") + success = True + else: + # Only actually load if truly not loaded + model_size = self.ocr_config.get('model_size', '2') if hasattr(self, 'ocr_config') else '2' + self._log(f"Loading Qwen2-VL with model_size={model_size}") + success = self.ocr_manager.load_provider(self.ocr_provider, model_size=model_size) + if not success: + raise Exception(f"Failed to load {self.ocr_provider} model") + elif self.ocr_provider == 'custom-api': + # Custom API needs to initialize UnifiedClient with credentials + self._log("📡 Loading custom-api provider...") + # Try to get API key and model from GUI if available + load_kwargs = {} + if hasattr(self, 'main_gui'): + # Get API key from GUI + if hasattr(self.main_gui, 'api_key_entry'): + api_key = self.main_gui.api_key_entry.get() + if api_key: + load_kwargs['api_key'] = api_key + # Get model from GUI + if hasattr(self.main_gui, 'model_var'): + model = self.main_gui.model_var.get() + if model: + load_kwargs['model'] = model + success = self.ocr_manager.load_provider(self.ocr_provider, **load_kwargs) + if not success: + raise Exception(f"Failed to initialize {self.ocr_provider}") + else: + # Other providers + success = self.ocr_manager.load_provider(self.ocr_provider) + if not success: + raise Exception(f"Failed to load {self.ocr_provider} model") + + if not success: + raise Exception(f"Failed to load {self.ocr_provider} model") + + # Initialize ocr_results here before any provider-specific code + ocr_results = [] + + # Special handling for manga-ocr (needs region detection first) + if self.ocr_provider == 'manga-ocr': + # IMPORTANT: Initialize fresh results list + ocr_results = [] + + # Check if we should use bubble detection for regions + if ocr_settings.get('bubble_detection_enabled', False): + self._log("📝 Using bubble detection regions for manga-ocr...") + + # Run bubble detection to get regions + if self.bubble_detector is None: + from bubble_detector import BubbleDetector + self.bubble_detector = BubbleDetector() + + # Get regions from bubble detector + rtdetr_detections = self._load_bubble_detector(ocr_settings, image_path) + if rtdetr_detections: + + # Process detections immediately and don't store + all_regions = [] + + # ONLY ADD TEXT-CONTAINING REGIONS + # Skip empty bubbles since they shouldn't have text + if 'text_bubbles' in rtdetr_detections: + all_regions.extend(rtdetr_detections.get('text_bubbles', [])) + if 'text_free' in rtdetr_detections: + all_regions.extend(rtdetr_detections.get('text_free', [])) + + # DO NOT ADD empty bubbles - they're duplicates of text_bubbles + # if 'bubbles' in rtdetr_detections: # <-- REMOVE THIS + # all_regions.extend(rtdetr_detections.get('bubbles', [])) + + self._log(f"📊 Processing {len(all_regions)} text-containing regions (skipping empty bubbles)") + + # Clear detection results after extracting regions + rtdetr_detections = None + + # Check if parallel processing is enabled + if self.parallel_processing and len(all_regions) > 1: + self._log(f"🚀 Using PARALLEL OCR for {len(all_regions)} regions with manga-ocr") + ocr_results = self._parallel_ocr_regions(image, all_regions, 'manga-ocr', confidence_threshold) + else: + # Process each region with manga-ocr + for i, (x, y, w, h) in enumerate(all_regions): + cropped = self._safe_crop_region(image, x, y, w, h) + if cropped is None: + continue + result = self.ocr_manager.detect_text(cropped, 'manga-ocr', confidence=confidence_threshold) + if result and len(result) > 0 and result[0].text.strip(): + result[0].bbox = (x, y, w, h) + result[0].vertices = [(x, y), (x+w, y), (x+w, y+h), (x, y+h)] + # CRITICAL: Store RT-DETR bubble bounds for rendering + # The bbox/vertices are the small OCR polygon, but bubble_bounds is the full RT-DETR bubble + result[0].bubble_bounds = (x, y, w, h) + ocr_results.append(result[0]) + self._log(f"🔍 Processing region {i+1}/{len(all_regions)} with manga-ocr...") + self._log(f"✅ Detected text: {result[0].text[:50]}...") + + # Clear regions list after processing + all_regions = None + else: + # NO bubble detection - just process full image + self._log("📝 Processing full image with manga-ocr (no bubble detection)") + ocr_results = self.ocr_manager.detect_text(image, self.ocr_provider, confidence=confidence_threshold) + + elif self.ocr_provider == 'Qwen2-VL': + # Initialize results list + ocr_results = [] + + # Configure Qwen2-VL for Korean text + language_hints = ocr_settings.get('language_hints', ['ko']) + self._log("🍩 Qwen2-VL OCR for Korean text recognition") + + # Check if we should use bubble detection for regions + if ocr_settings.get('bubble_detection_enabled', False): + self._log("📝 Using bubble detection regions for Qwen2-VL...") + + # Run bubble detection to get regions (thread-local) + _ = self._get_thread_bubble_detector() + + # Get regions from bubble detector + rtdetr_detections = self._load_bubble_detector(ocr_settings, image_path) + if rtdetr_detections: + + # Process only text-containing regions + all_regions = [] + if 'text_bubbles' in rtdetr_detections: + all_regions.extend(rtdetr_detections.get('text_bubbles', [])) + if 'text_free' in rtdetr_detections: + all_regions.extend(rtdetr_detections.get('text_free', [])) + + self._log(f"📊 Processing {len(all_regions)} text regions with Qwen2-VL") + + # Check if parallel processing is enabled + if self.parallel_processing and len(all_regions) > 1: + self._log(f"🚀 Using PARALLEL OCR for {len(all_regions)} regions with Qwen2-VL") + ocr_results = self._parallel_ocr_regions(image, all_regions, 'Qwen2-VL', confidence_threshold) + else: + # Process each region with Qwen2-VL + for i, (x, y, w, h) in enumerate(all_regions): + cropped = self._safe_crop_region(image, x, y, w, h) + if cropped is None: + continue + result = self.ocr_manager.detect_text(cropped, 'Qwen2-VL', confidence=confidence_threshold) + if result and len(result) > 0 and result[0].text.strip(): + result[0].bbox = (x, y, w, h) + result[0].vertices = [(x, y), (x+w, y), (x+w, y+h), (x, y+h)] + ocr_results.append(result[0]) + self._log(f"✅ Region {i+1}: {result[0].text[:50]}...") + else: + # Process full image without bubble detection + self._log("📝 Processing full image with Qwen2-VL") + ocr_results = self.ocr_manager.detect_text(image, self.ocr_provider) + + elif self.ocr_provider == 'custom-api': + # Initialize results list + ocr_results = [] + + # Configure Custom API for text extraction + self._log("🔌 Using Custom API for OCR") + + # Check if we should use bubble detection for regions + if ocr_settings.get('bubble_detection_enabled', False): + self._log("📝 Using bubble detection regions for Custom API...") + + # Run bubble detection to get regions (thread-local) + _ = self._get_thread_bubble_detector() + + # Get regions from bubble detector + rtdetr_detections = self._load_bubble_detector(ocr_settings, image_path) + if rtdetr_detections: + + # Process only text-containing regions + all_regions = [] + if 'text_bubbles' in rtdetr_detections: + all_regions.extend(rtdetr_detections.get('text_bubbles', [])) + if 'text_free' in rtdetr_detections: + all_regions.extend(rtdetr_detections.get('text_free', [])) + + self._log(f"📊 Processing {len(all_regions)} text regions with Custom API") + + # Clear detections after extracting regions + rtdetr_detections = None + + # Decide parallelization for custom-api: + # Use API batch mode OR local parallel toggle so that API calls can run in parallel + if (getattr(self, 'batch_mode', False) or self.parallel_processing) and len(all_regions) > 1: + self._log(f"🚀 Using PARALLEL OCR for {len(all_regions)} regions (custom-api; API batch mode honored)") + ocr_results = self._parallel_ocr_regions(image, all_regions, 'custom-api', confidence_threshold) + else: + # Original sequential processing + for i, (x, y, w, h) in enumerate(all_regions): + cropped = self._safe_crop_region(image, x, y, w, h) + if cropped is None: + continue + result = self.ocr_manager.detect_text( + cropped, + 'custom-api', + confidence=confidence_threshold + ) + if result and len(result) > 0 and result[0].text.strip(): + result[0].bbox = (x, y, w, h) + result[0].vertices = [(x, y), (x+w, y), (x+w, y+h), (x, y+h)] + ocr_results.append(result[0]) + self._log(f"🔍 Region {i+1}/{len(all_regions)}: {result[0].text[:50]}...") + + # Clear regions list after processing + all_regions = None + else: + # Process full image without bubble detection + self._log("📝 Processing full image with Custom API") + ocr_results = self.ocr_manager.detect_text( + image, + 'custom-api', + confidence=confidence_threshold + ) + + elif self.ocr_provider == 'easyocr': + # Initialize results list + ocr_results = [] + + # Configure EasyOCR languages + language_hints = ocr_settings.get('language_hints', ['ja', 'en']) + validated_languages = self._validate_easyocr_languages(language_hints) + + easyocr_provider = self.ocr_manager.get_provider('easyocr') + if easyocr_provider: + if easyocr_provider.languages != validated_languages: + easyocr_provider.languages = validated_languages + easyocr_provider.is_loaded = False + self._log(f"🔥 Reloading EasyOCR with languages: {validated_languages}") + self.ocr_manager.load_provider('easyocr') + + # Check if we should use bubble detection + if ocr_settings.get('bubble_detection_enabled', False): + self._log("📝 Using bubble detection regions for EasyOCR...") + + # Run bubble detection to get regions (thread-local) + _ = self._get_thread_bubble_detector() + + # Get regions from bubble detector + rtdetr_detections = self._load_bubble_detector(ocr_settings, image_path) + if rtdetr_detections: + + # Process only text-containing regions + all_regions = [] + if 'text_bubbles' in rtdetr_detections: + all_regions.extend(rtdetr_detections.get('text_bubbles', [])) + if 'text_free' in rtdetr_detections: + all_regions.extend(rtdetr_detections.get('text_free', [])) + + self._log(f"📊 Processing {len(all_regions)} text regions with EasyOCR") + + # Check if parallel processing is enabled + if self.parallel_processing and len(all_regions) > 1: + self._log(f"🚀 Using PARALLEL OCR for {len(all_regions)} regions with EasyOCR") + ocr_results = self._parallel_ocr_regions(image, all_regions, 'easyocr', confidence_threshold) + else: + # Process each region with EasyOCR + for i, (x, y, w, h) in enumerate(all_regions): + cropped = self._safe_crop_region(image, x, y, w, h) + if cropped is None: + continue + result = self.ocr_manager.detect_text(cropped, 'easyocr', confidence=confidence_threshold) + if result and len(result) > 0 and result[0].text.strip(): + result[0].bbox = (x, y, w, h) + result[0].vertices = [(x, y), (x+w, y), (x+w, y+h), (x, y+h)] + ocr_results.append(result[0]) + self._log(f"✅ Region {i+1}: {result[0].text[:50]}...") + else: + # Process full image without bubble detection + self._log("📝 Processing full image with EasyOCR") + ocr_results = self.ocr_manager.detect_text(image, self.ocr_provider) + + elif self.ocr_provider == 'paddleocr': + # Initialize results list + ocr_results = [] + + # Configure PaddleOCR language + language_hints = ocr_settings.get('language_hints', ['ja']) + lang_map = {'ja': 'japan', 'ko': 'korean', 'zh': 'ch', 'en': 'en'} + paddle_lang = lang_map.get(language_hints[0] if language_hints else 'ja', 'japan') + + # Reload if language changed + paddle_provider = self.ocr_manager.get_provider('paddleocr') + if paddle_provider and paddle_provider.is_loaded: + if hasattr(paddle_provider.model, 'lang') and paddle_provider.model.lang != paddle_lang: + from paddleocr import PaddleOCR + paddle_provider.model = PaddleOCR( + use_angle_cls=True, + lang=paddle_lang, + use_gpu=True, + show_log=False + ) + self._log(f"🔥 Reloaded PaddleOCR with language: {paddle_lang}") + + # Check if we should use bubble detection + if ocr_settings.get('bubble_detection_enabled', False): + self._log("📝 Using bubble detection regions for PaddleOCR...") + + # Run bubble detection to get regions (thread-local) + _ = self._get_thread_bubble_detector() + + # Get regions from bubble detector + rtdetr_detections = self._load_bubble_detector(ocr_settings, image_path) + if rtdetr_detections: + + # Process only text-containing regions + all_regions = [] + if 'text_bubbles' in rtdetr_detections: + all_regions.extend(rtdetr_detections.get('text_bubbles', [])) + if 'text_free' in rtdetr_detections: + all_regions.extend(rtdetr_detections.get('text_free', [])) + + self._log(f"📊 Processing {len(all_regions)} text regions with PaddleOCR") + + # Check if parallel processing is enabled + if self.parallel_processing and len(all_regions) > 1: + self._log(f"🚀 Using PARALLEL OCR for {len(all_regions)} regions with PaddleOCR") + ocr_results = self._parallel_ocr_regions(image, all_regions, 'paddleocr', confidence_threshold) + else: + # Process each region with PaddleOCR + for i, (x, y, w, h) in enumerate(all_regions): + cropped = self._safe_crop_region(image, x, y, w, h) + if cropped is None: + continue + result = self.ocr_manager.detect_text(cropped, 'paddleocr', confidence=confidence_threshold) + if result and len(result) > 0 and result[0].text.strip(): + result[0].bbox = (x, y, w, h) + result[0].vertices = [(x, y), (x+w, y), (x+w, y+h), (x, y+h)] + ocr_results.append(result[0]) + self._log(f"✅ Region {i+1}: {result[0].text[:50]}...") + else: + # Process full image without bubble detection + self._log("📝 Processing full image with PaddleOCR") + ocr_results = self.ocr_manager.detect_text(image, self.ocr_provider) + + elif self.ocr_provider == 'doctr': + # Initialize results list + ocr_results = [] + + self._log("📄 DocTR OCR for document text recognition") + + # Check if we should use bubble detection + if ocr_settings.get('bubble_detection_enabled', False): + self._log("📝 Using bubble detection regions for DocTR...") + + # Run bubble detection to get regions (thread-local) + _ = self._get_thread_bubble_detector() + + # Get regions from bubble detector + rtdetr_detections = self._load_bubble_detector(ocr_settings, image_path) + if rtdetr_detections: + + # Process only text-containing regions + all_regions = [] + if 'text_bubbles' in rtdetr_detections: + all_regions.extend(rtdetr_detections.get('text_bubbles', [])) + if 'text_free' in rtdetr_detections: + all_regions.extend(rtdetr_detections.get('text_free', [])) + + self._log(f"📊 Processing {len(all_regions)} text regions with DocTR") + + # Check if parallel processing is enabled + if self.parallel_processing and len(all_regions) > 1: + self._log(f"🚀 Using PARALLEL OCR for {len(all_regions)} regions with DocTR") + ocr_results = self._parallel_ocr_regions(image, all_regions, 'doctr', confidence_threshold) + else: + # Process each region with DocTR + for i, (x, y, w, h) in enumerate(all_regions): + cropped = self._safe_crop_region(image, x, y, w, h) + if cropped is None: + continue + result = self.ocr_manager.detect_text(cropped, 'doctr', confidence=confidence_threshold) + if result and len(result) > 0 and result[0].text.strip(): + result[0].bbox = (x, y, w, h) + result[0].vertices = [(x, y), (x+w, y), (x+w, y+h), (x, y+h)] + ocr_results.append(result[0]) + self._log(f"✅ Region {i+1}: {result[0].text[:50]}...") + else: + # Process full image without bubble detection + self._log("📝 Processing full image with DocTR") + ocr_results = self.ocr_manager.detect_text(image, self.ocr_provider) + + elif self.ocr_provider == 'rapidocr': + # Initialize results list + ocr_results = [] + + # Get RapidOCR settings + use_recognition = self.main_gui.config.get('rapidocr_use_recognition', True) + language = self.main_gui.config.get('rapidocr_language', 'auto') + detection_mode = self.main_gui.config.get('rapidocr_detection_mode', 'document') + + self._log(f"⚡ RapidOCR - Recognition: {'Full' if use_recognition else 'Detection Only'}") + + # ALWAYS process full image with RapidOCR for best results + self._log("📊 Processing full image with RapidOCR") + ocr_results = self.ocr_manager.detect_text( + image, + 'rapidocr', + confidence=confidence_threshold, + use_recognition=use_recognition, + language=language, + detection_mode=detection_mode + ) + + # RT-DETR detection only affects merging, not OCR + if ocr_settings.get('bubble_detection_enabled', False): + self._log("🤖 RT-DETR will be used for bubble-based merging") + + else: + # Default processing for any other providers + ocr_results = self.ocr_manager.detect_text(image, self.ocr_provider) + + # Convert OCR results to TextRegion format + for result in ocr_results: + # CLEAN ORIGINAL OCR TEXT - Fix cube characters and encoding issues + original_ocr_text = result.text + cleaned_result_text = self._fix_encoding_issues(result.text) + cleaned_result_text = self._normalize_unicode_width(cleaned_result_text) + cleaned_result_text = self._sanitize_unicode_characters(cleaned_result_text) + + # Log cleaning if changes were made + if cleaned_result_text != original_ocr_text: + self._log(f"🧹 Cleaned OCR manager text: '{original_ocr_text[:30]}...' → '{cleaned_result_text[:30]}...'", "debug") + + # Apply filtering (use cleaned text) + if len(cleaned_result_text.strip()) < min_text_length: + if not getattr(self, 'concise_logs', False): + self._log(f" Skipping short text ({len(cleaned_result_text)} chars): {cleaned_result_text}") + continue + + if exclude_english and self._is_primarily_english(cleaned_result_text): + if not getattr(self, 'concise_logs', False): + self._log(f" Skipping English text: {cleaned_result_text[:50]}...") + continue + + if result.confidence < confidence_threshold: + if not getattr(self, 'concise_logs', False): + self._log(f" Skipping low confidence ({result.confidence:.2f}): {cleaned_result_text[:30]}...") + continue + + # Create TextRegion (use cleaned text) + # CRITICAL: Preserve bubble_bounds if it was set during OCR (e.g., manga-ocr with RT-DETR) + region_kwargs = { + 'text': cleaned_result_text, # Use cleaned text instead of original + 'vertices': result.vertices if result.vertices else [ + (result.bbox[0], result.bbox[1]), + (result.bbox[0] + result.bbox[2], result.bbox[1]), + (result.bbox[0] + result.bbox[2], result.bbox[1] + result.bbox[3]), + (result.bbox[0], result.bbox[1] + result.bbox[3]) + ], + 'bounding_box': result.bbox, + 'confidence': result.confidence, + 'region_type': 'text_block' + } + # Preserve bubble_bounds from OCR result if present + if hasattr(result, 'bubble_bounds') and result.bubble_bounds is not None: + region_kwargs['bubble_bounds'] = result.bubble_bounds + self._log(f" 🔍 Preserved bubble_bounds from OCR: {result.bubble_bounds}", "debug") + else: + if hasattr(result, 'bubble_bounds'): + self._log(f" ⚠️ OCR result has bubble_bounds but it's None!", "debug") + else: + self._log(f" ℹ️ OCR result has no bubble_bounds attribute", "debug") + + region = TextRegion(**region_kwargs) + regions.append(region) + if not getattr(self, 'concise_logs', False): + self._log(f" Found text ({result.confidence:.2f}): {cleaned_result_text[:50]}...") + + # MERGING SECTION (applies to all providers) + # Check if bubble detection is enabled + if ocr_settings.get('bubble_detection_enabled', False): + # For manga-ocr and similar providers, skip merging since regions already have bubble_bounds from OCR + # Only Azure and Google need merging because they return line-level OCR results + if self.ocr_provider in ['manga-ocr', 'Qwen2-VL', 'custom-api', 'easyocr', 'paddleocr', 'doctr']: + self._log("🎯 Skipping bubble detection merge for manga-ocr (regions already aligned with RT-DETR)") + # Regions already have bubble_bounds set from OCR phase - no need to merge + else: + # Azure and Google return line-level results that need to be merged into bubbles + self._log("🤖 Using AI bubble detection for merging") + regions = self._merge_with_bubble_detection(regions, image_path) + else: + # Traditional merging + merge_threshold = ocr_settings.get('merge_nearby_threshold', 20) + + # Apply provider-specific adjustments + if self.ocr_provider == 'azure': + azure_multiplier = ocr_settings.get('azure_merge_multiplier', 2.0) + merge_threshold = int(merge_threshold * azure_multiplier) + self._log(f"📋 Using Azure-adjusted merge threshold: {merge_threshold}px") + + # Pre-group Azure lines if the method exists + if hasattr(self, '_pregroup_azure_lines'): + regions = self._pregroup_azure_lines(regions, merge_threshold) + + elif self.ocr_provider in ['paddleocr', 'easyocr', 'doctr']: + # These providers often return smaller text segments + line_multiplier = ocr_settings.get('line_ocr_merge_multiplier', 1.5) + merge_threshold = int(merge_threshold * line_multiplier) + self._log(f"📋 Using line-based OCR adjusted threshold: {merge_threshold}px") + + # Apply standard merging + regions = self._merge_nearby_regions(regions, threshold=merge_threshold) + + self._log(f"✅ Detected {len(regions)} text regions after merging") + + # NOTE: Debug images are saved in process_image() with correct output_dir + # Removed duplicate save here to avoid creating unexpected 'translated_images' folders + + return regions + + except Exception as e: + self._log(f"❌ Error detecting text: {str(e)}", "error") + import traceback + self._log(traceback.format_exc(), "error") + raise + + def _validate_easyocr_languages(self, languages): + """Validate EasyOCR language combinations""" + # EasyOCR compatibility rules + incompatible_sets = [ + {'ja', 'ko'}, # Japanese + Korean + {'ja', 'zh'}, # Japanese + Chinese + {'ko', 'zh'} # Korean + Chinese + ] + + lang_set = set(languages) + + for incompatible in incompatible_sets: + if incompatible.issubset(lang_set): + # Conflict detected - keep first language + English + primary_lang = languages[0] if languages else 'en' + result = [primary_lang, 'en'] if primary_lang != 'en' else ['en'] + + self._log(f"⚠️ EasyOCR: {' + '.join(incompatible)} not compatible", "warning") + self._log(f"🔧 Auto-adjusted from {languages} to {result}", "info") + return result + + return languages + + def _parallel_ocr_regions(self, image: np.ndarray, regions: List, provider: str, confidence_threshold: float) -> List: + """Process multiple regions in parallel using ThreadPoolExecutor""" + from concurrent.futures import ThreadPoolExecutor, as_completed + import threading + + ocr_results = [] + results_lock = threading.Lock() + + def process_single_region(index: int, bbox: Tuple[int, int, int, int]): + """Process a single region with OCR""" + x, y, w, h = bbox + try: + # Use the safe crop method + cropped = self._safe_crop_region(image, x, y, w, h) + + # Skip if crop failed + if cropped is None: + self._log(f"⚠️ Skipping region {index} - invalid crop", "warning") + return + + # Run OCR on this region + result = self.ocr_manager.detect_text( + cropped, + provider, + confidence=confidence_threshold + ) + + if result and len(result) > 0 and result[0].text.strip(): + # Adjust coordinates to full image space + result[0].bbox = (x, y, w, h) + result[0].vertices = [(x, y), (x+w, y), (x+w, y+h), (x, y+h)] + # CRITICAL: Store RT-DETR bubble bounds for rendering (for non-Azure/Google providers) + result[0].bubble_bounds = (x, y, w, h) + return (index, result[0]) + return (index, None) + + except Exception as e: + self._log(f"Error processing region {index}: {str(e)}", "error") + return (index, None) + + # Process regions in parallel + max_workers = self.manga_settings.get('advanced', {}).get('max_workers', 4) + # For custom-api, treat OCR calls as API calls: use batch size when batch mode is enabled + try: + if provider == 'custom-api': + # prefer MangaTranslator.batch_size (from env BATCH_SIZE) + bs = int(getattr(self, 'batch_size', 0) or int(os.getenv('BATCH_SIZE', '0'))) + if bs and bs > 0: + max_workers = bs + except Exception: + pass + # Never spawn more workers than regions + max_workers = max(1, min(max_workers, len(regions))) + with ThreadPoolExecutor(max_workers=max_workers) as executor: + # Submit all tasks + future_to_index = {} + for i, bbox in enumerate(regions): + future = executor.submit(process_single_region, i, bbox) + future_to_index[future] = i + + # Collect results + results_dict = {} + completed = 0 + for future in as_completed(future_to_index): + try: + index, result = future.result(timeout=30) + if result: + results_dict[index] = result + completed += 1 + self._log(f"✅ [{completed}/{len(regions)}] Processed region {index+1}") + except Exception as e: + self._log(f"Failed to process region: {str(e)}", "error") + + # Sort results by index to maintain order + for i in range(len(regions)): + if i in results_dict: + ocr_results.append(results_dict[i]) + + self._log(f"📊 Parallel OCR complete: {len(ocr_results)}/{len(regions)} regions extracted") + return ocr_results + + def _pregroup_azure_lines(self, lines: List[TextRegion], base_threshold: int) -> List[TextRegion]: + """Pre-group Azure lines that are obviously part of the same text block + This makes them more like Google's blocks before the main merge logic""" + + if len(lines) <= 1: + return lines + + # Sort by vertical position first, then horizontal + lines.sort(key=lambda r: (r.bounding_box[1], r.bounding_box[0])) + + pregrouped = [] + i = 0 + + while i < len(lines): + current_group = [lines[i]] + current_bbox = list(lines[i].bounding_box) + + # Look ahead for lines that should obviously be grouped + j = i + 1 + while j < len(lines): + x1, y1, w1, h1 = current_bbox + x2, y2, w2, h2 = lines[j].bounding_box + + # Calculate gaps + vertical_gap = y2 - (y1 + h1) if y2 > y1 + h1 else 0 + + # Check horizontal alignment + center_x1 = x1 + w1 / 2 + center_x2 = x2 + w2 / 2 + horizontal_offset = abs(center_x1 - center_x2) + avg_width = (w1 + w2) / 2 + + # Group if: + # 1. Lines are vertically adjacent (small gap) + # 2. Lines are well-aligned horizontally (likely same bubble) + if (vertical_gap < h1 * 0.5 and # Less than half line height gap + horizontal_offset < avg_width * 0.5): # Well centered + + # Add to group + current_group.append(lines[j]) + + # Update bounding box to include new line + min_x = min(x1, x2) + min_y = min(y1, y2) + max_x = max(x1 + w1, x2 + w2) + max_y = max(y1 + h1, y2 + h2) + current_bbox = [min_x, min_y, max_x - min_x, max_y - min_y] + + j += 1 + else: + break + + # Create merged region from group + if len(current_group) > 1: + merged_text = " ".join([line.text for line in current_group]) + all_vertices = [] + for line in current_group: + all_vertices.extend(line.vertices) + + merged_region = TextRegion( + text=merged_text, + vertices=all_vertices, + bounding_box=tuple(current_bbox), + confidence=0.95, + region_type='pregrouped_lines' + ) + pregrouped.append(merged_region) + + self._log(f" Pre-grouped {len(current_group)} Azure lines into block") + else: + # Single line, keep as is + pregrouped.append(lines[i]) + + i = j if j > i + 1 else i + 1 + + self._log(f" Azure pre-grouping: {len(lines)} lines → {len(pregrouped)} blocks") + return pregrouped + + def _safe_crop_region(self, image, x, y, w, h): + """Safely crop a region from image with validation""" + img_h, img_w = image.shape[:2] + + # Validate and clamp coordinates + x = max(0, min(x, img_w - 1)) + y = max(0, min(y, img_h - 1)) + x2 = min(x + w, img_w) + y2 = min(y + h, img_h) + + # Ensure valid region + if x2 <= x or y2 <= y: + self._log(f"⚠️ Invalid crop region: ({x},{y},{w},{h}) for image {img_w}x{img_h}", "warning") + return None + + # Minimum size check + if (x2 - x) < 5 or (y2 - y) < 5: + self._log(f"⚠️ Region too small: {x2-x}x{y2-y} pixels", "warning") + return None + + cropped = image[y:y2, x:x2] + + if cropped.size == 0: + self._log(f"⚠️ Empty crop result", "warning") + return None + + return cropped + + def _prepare_ocr_rois_from_bubbles(self, image_path: str, ocr_settings: Dict, preprocessing: Dict, page_hash: str) -> List[Dict[str, Any]]: + """Prepare ROI crops (bytes) from bubble detection to use with OCR locality. + - Enhancements/resizing are gated by preprocessing['enabled']. + - Compression/encoding is controlled by manga_settings['compression'] independently. + Returns list of dicts: {id, bbox, bytes, type} + """ + try: + # Run bubble detector and collect text-containing boxes + detections = self._load_bubble_detector(ocr_settings, image_path) + if not detections: + return [] + regions = [] + for key in ('text_bubbles', 'text_free'): + for i, (bx, by, bw, bh) in enumerate(detections.get(key, []) or []): + regions.append({'type': 'text_bubble' if key == 'text_bubbles' else 'free_text', + 'bbox': (int(bx), int(by), int(bw), int(bh)), + 'id': f"{key}_{i}"}) + if not regions: + return [] + + # Open original image once + pil = Image.open(image_path) + if pil.mode != 'RGB': + pil = pil.convert('RGB') + + pad_ratio = float(ocr_settings.get('roi_padding_ratio', 0.08)) # 8% padding default + preproc_enabled = bool(preprocessing.get('enabled', False)) + # Compression settings (separate from preprocessing) + comp = {} + try: + comp = (self.main_gui.config.get('manga_settings', {}) or {}).get('compression', {}) + except Exception: + comp = {} + comp_enabled = bool(comp.get('enabled', False)) + comp_format = str(comp.get('format', 'jpeg')).lower() + jpeg_q = int(comp.get('jpeg_quality', 85)) + png_lvl = int(comp.get('png_compress_level', 6)) + webp_q = int(comp.get('webp_quality', 85)) + + out = [] + W, H = pil.size + # Pre-filter tiny ROIs (skip before cropping) + min_side_px = int(ocr_settings.get('roi_min_side_px', 12)) + min_area_px = int(ocr_settings.get('roi_min_area_px', 100)) + for rec in regions: + x, y, w, h = rec['bbox'] + if min(w, h) < max(1, min_side_px) or (w * h) < max(1, min_area_px): + # Skip tiny ROI + continue + # Apply padding + px = int(w * pad_ratio) + py = int(h * pad_ratio) + x1 = max(0, x - px) + y1 = max(0, y - py) + x2 = min(W, x + w + px) + y2 = min(H, y + h + py) + if x2 <= x1 or y2 <= y1: + continue + crop = pil.crop((x1, y1, x2, y2)) + + # Quality-affecting steps only when preprocessing enabled + if preproc_enabled: + try: + # Enhance contrast/sharpness/brightness if configured + c = float(preprocessing.get('contrast_threshold', 0.4)) + s = float(preprocessing.get('sharpness_threshold', 0.3)) + g = float(preprocessing.get('enhancement_strength', 1.5)) + if c: + crop = ImageEnhance.Contrast(crop).enhance(1 + c) + if s: + crop = ImageEnhance.Sharpness(crop).enhance(1 + s) + if g and g != 1.0: + crop = ImageEnhance.Brightness(crop).enhance(g) + # Optional ROI resize limit (short side cap) + roi_max_side = int(ocr_settings.get('roi_max_side', 0) or 0) + if roi_max_side and (crop.width > roi_max_side or crop.height > roi_max_side): + ratio = min(roi_max_side / crop.width, roi_max_side / crop.height) + crop = crop.resize((max(1, int(crop.width * ratio)), max(1, int(crop.height * ratio))), Image.Resampling.LANCZOS) + except Exception: + pass + # Encoding/Compression independent of preprocessing + from io import BytesIO + buf = BytesIO() + try: + if comp_enabled: + if comp_format in ('jpeg', 'jpg'): + if crop.mode != 'RGB': + crop = crop.convert('RGB') + crop.save(buf, format='JPEG', quality=max(1, min(95, jpeg_q)), optimize=True, progressive=True) + elif comp_format == 'png': + crop.save(buf, format='PNG', optimize=True, compress_level=max(0, min(9, png_lvl))) + elif comp_format == 'webp': + crop.save(buf, format='WEBP', quality=max(1, min(100, webp_q))) + else: + crop.save(buf, format='PNG', optimize=True) + else: + # Default lossless PNG + crop.save(buf, format='PNG', optimize=True) + img_bytes = buf.getvalue() + except Exception: + buf = BytesIO() + crop.save(buf, format='PNG', optimize=True) + img_bytes = buf.getvalue() + + out.append({ + 'id': rec['id'], + 'bbox': (x, y, w, h), # keep original bbox without padding for placement + 'bytes': img_bytes, + 'type': rec['type'], + 'page_hash': page_hash + }) + return out + except Exception as e: + self._log(f"⚠️ ROI preparation failed: {e}", "warning") + return [] + + def _google_ocr_rois_batched(self, rois: List[Dict[str, Any]], ocr_settings: Dict, batch_size: int, max_concurrency: int, page_hash: str) -> List[TextRegion]: + """Batch OCR of ROI crops using Google Vision batchAnnotateImages. + - Uses bounded concurrency for multiple batches in flight. + - Consults and updates an in-memory ROI OCR cache. + """ + try: + from google.cloud import vision as _vision + except Exception: + self._log("❌ Google Vision SDK not available for ROI batching", "error") + return [] + + lang_hints = ocr_settings.get('language_hints', ['ja', 'ko', 'zh']) + detection_mode = ocr_settings.get('text_detection_mode', 'document') + feature_type = _vision.Feature.Type.DOCUMENT_TEXT_DETECTION if detection_mode == 'document' else _vision.Feature.Type.TEXT_DETECTION + feature = _vision.Feature(type=feature_type) + + results: List[TextRegion] = [] + min_text_length = int(ocr_settings.get('min_text_length', 2)) + exclude_english = bool(ocr_settings.get('exclude_english_text', True)) + + # Check cache first and build work list of uncached ROIs + work_rois = [] + for roi in rois: + x, y, w, h = roi['bbox'] + # Include region type in cache key to prevent mismapping + cache_key = ("google", page_hash, x, y, w, h, tuple(lang_hints), detection_mode, roi.get('type', 'unknown')) + # THREAD-SAFE: Use lock for cache access in parallel panel translation + with self._cache_lock: + cached_text = self.ocr_roi_cache.get(cache_key) + if cached_text: + region = TextRegion( + text=cached_text, + vertices=[(x, y), (x+w, y), (x+w, y+h), (x, y+h)], + bounding_box=(x, y, w, h), + confidence=0.95, + region_type='ocr_roi' + ) + try: + region.bubble_type = 'free_text' if roi.get('type') == 'free_text' else 'text_bubble' + region.should_inpaint = True + except Exception: + pass + results.append(region) + else: + roi['cache_key'] = cache_key + work_rois.append(roi) + + if not work_rois: + return results + + # Create batches + batch_size = max(1, batch_size) + batches = [work_rois[i:i+batch_size] for i in range(0, len(work_rois), batch_size)] + max_concurrency = max(1, int(max_concurrency or 1)) + + def do_batch(batch): + # RATE LIMITING: Add small delay before batch submission + import time + import random + time.sleep(0.1 + random.random() * 0.2) # 0.1-0.3s random delay + + requests = [] + for roi in batch: + img = _vision.Image(content=roi['bytes']) + ctx = _vision.ImageContext(language_hints=list(lang_hints)) + req = _vision.AnnotateImageRequest(image=img, features=[feature], image_context=ctx) + requests.append(req) + return self.vision_client.batch_annotate_images(requests=requests), batch + + # Execute with concurrency + if max_concurrency == 1 or len(batches) == 1: + iter_batches = [(self.vision_client.batch_annotate_images(requests=[ + _vision.AnnotateImageRequest(image=_vision.Image(content=roi['bytes']), features=[feature], image_context=_vision.ImageContext(language_hints=list(lang_hints))) + for roi in batch + ]), batch) for batch in batches] + else: + from concurrent.futures import ThreadPoolExecutor, as_completed + iter_batches = [] + with ThreadPoolExecutor(max_workers=max_concurrency) as ex: + futures = [ex.submit(do_batch, b) for b in batches] + for fut in as_completed(futures): + try: + iter_batches.append(fut.result()) + except Exception as e: + self._log(f"⚠️ Google batch failed: {e}", "warning") + continue + + # Consume responses and update cache + for resp, batch in iter_batches: + for roi, ann in zip(batch, resp.responses): + if getattr(ann, 'error', None) and ann.error.message: + self._log(f"⚠️ ROI OCR error: {ann.error.message}", "warning") + continue + text = '' + try: + if getattr(ann, 'full_text_annotation', None) and ann.full_text_annotation.text: + text = ann.full_text_annotation.text + elif ann.text_annotations: + text = ann.text_annotations[0].description + except Exception: + text = '' + text = (text or '').strip() + text_clean = self._sanitize_unicode_characters(self._fix_encoding_issues(text)) + if len(text_clean.strip()) < min_text_length: + continue + if exclude_english and self._is_primarily_english(text_clean): + continue + x, y, w, h = roi['bbox'] + # Update cache + # THREAD-SAFE: Use lock for cache write in parallel panel translation + try: + ck = roi.get('cache_key') or ("google", page_hash, x, y, w, h, tuple(lang_hints), detection_mode) + with self._cache_lock: + self.ocr_roi_cache[ck] = text_clean + except Exception: + pass + region = TextRegion( + text=text_clean, + vertices=[(x, y), (x+w, y), (x+w, y+h), (x, y+h)], + bounding_box=(x, y, w, h), + confidence=0.95, + region_type='ocr_roi' + ) + try: + region.bubble_type = 'free_text' if roi.get('type') == 'free_text' else 'text_bubble' + region.should_inpaint = True + except Exception: + pass + results.append(region) + return results + + def _azure_ocr_rois_concurrent(self, rois: List[Dict[str, Any]], ocr_settings: Dict, max_workers: int, page_hash: str) -> List[TextRegion]: + """Concurrent ROI OCR for Azure Read API. Each ROI is sent as a separate call. + Concurrency is bounded by max_workers. Consults/updates cache. + """ + from concurrent.futures import ThreadPoolExecutor, as_completed + from azure.cognitiveservices.vision.computervision.models import OperationStatusCodes + import io + results: List[TextRegion] = [] + + # Read settings + reading_order = ocr_settings.get('azure_reading_order', 'natural') + model_version = ocr_settings.get('azure_model_version', 'latest') + language_hints = ocr_settings.get('language_hints', ['ja']) + read_params = {'raw': True, 'readingOrder': reading_order} + if model_version != 'latest': + read_params['model-version'] = model_version + if len(language_hints) == 1: + lang_mapping = {'zh': 'zh-Hans', 'zh-TW': 'zh-Hant', 'zh-CN': 'zh-Hans', 'ja': 'ja', 'ko': 'ko', 'en': 'en'} + read_params['language'] = lang_mapping.get(language_hints[0], language_hints[0]) + + min_text_length = int(ocr_settings.get('min_text_length', 2)) + exclude_english = bool(ocr_settings.get('exclude_english_text', True)) + + # Check cache first and split into cached vs work rois + cached_regions: List[TextRegion] = [] + work_rois: List[Dict[str, Any]] = [] + for roi in rois: + x, y, w, h = roi['bbox'] + # Include region type in cache key to prevent mismapping + cache_key = ("azure", page_hash, x, y, w, h, reading_order, roi.get('type', 'unknown')) + # THREAD-SAFE: Use lock for cache access in parallel panel translation + with self._cache_lock: + text_cached = self.ocr_roi_cache.get(cache_key) + if text_cached: + region = TextRegion( + text=text_cached, + vertices=[(x, y), (x+w, y), (x+w, y+h), (x, y+h)], + bounding_box=(x, y, w, h), + confidence=0.95, + region_type='ocr_roi' + ) + try: + region.bubble_type = 'free_text' if roi.get('type') == 'free_text' else 'text_bubble' + region.should_inpaint = True + except Exception: + pass + cached_regions.append(region) + else: + roi['cache_key'] = cache_key + work_rois.append(roi) + + def ocr_one(roi): + try: + # RATE LIMITING: Add delay between Azure API calls to avoid "Too Many Requests" + import time + import random + # Stagger requests with randomized delay + time.sleep(0.1 + random.random() * 0.2) # 0.1-0.3s random delay + + # Ensure Azure-supported format for ROI bytes; honor compression preference when possible + data = roi['bytes'] + try: + from PIL import Image as _PILImage + im = _PILImage.open(io.BytesIO(data)) + fmt = (im.format or '').lower() + if fmt not in ['jpeg', 'jpg', 'png', 'bmp', 'tiff']: + # Choose conversion target based on compression settings if available + try: + comp_cfg = (self.main_gui.config.get('manga_settings', {}) or {}).get('compression', {}) + except Exception: + comp_cfg = {} + target_fmt = 'PNG' + try: + if comp_cfg.get('enabled', False): + cf = str(comp_cfg.get('format', '')).lower() + if cf in ('jpeg', 'jpg'): + target_fmt = 'JPEG' + elif cf == 'png': + target_fmt = 'PNG' + elif cf == 'bmp': + target_fmt = 'BMP' + elif cf == 'tiff': + target_fmt = 'TIFF' + except Exception: + pass + buf2 = io.BytesIO() + if target_fmt == 'JPEG' and im.mode != 'RGB': + im = im.convert('RGB') + im.save(buf2, format=target_fmt) + data = buf2.getvalue() + except Exception: + pass + stream = io.BytesIO(data) + read_response = self.vision_client.read_in_stream(stream, **read_params) + op_loc = read_response.headers.get('Operation-Location') if hasattr(read_response, 'headers') else None + if not op_loc: + return None + op_id = op_loc.split('/')[-1] + # Poll + import time + waited = 0.0 + poll_interval = float(ocr_settings.get('azure_poll_interval', 0.5)) + max_wait = float(ocr_settings.get('azure_max_wait', 60)) + while waited < max_wait: + result = self.vision_client.get_read_result(op_id) + if result.status not in [OperationStatusCodes.running, OperationStatusCodes.not_started]: + break + time.sleep(poll_interval) + waited += poll_interval + if result.status != OperationStatusCodes.succeeded: + return None + # Aggregate text lines + texts = [] + for page in result.analyze_result.read_results: + for line in page.lines: + t = self._sanitize_unicode_characters(self._fix_encoding_issues(line.text or '')) + if t: + texts.append(t) + text_all = ' '.join(texts).strip() + if len(text_all) < min_text_length: + return None + if exclude_english and self._is_primarily_english(text_all): + return None + x, y, w, h = roi['bbox'] + # Update cache + # THREAD-SAFE: Use lock for cache write in parallel panel translation + try: + ck = roi.get('cache_key') + if ck: + with self._cache_lock: + self.ocr_roi_cache[ck] = text_all + except Exception: + pass + region = TextRegion( + text=text_all, + vertices=[(x, y), (x+w, y), (x+w, y+h), (x, y+h)], + bounding_box=(x, y, w, h), + confidence=0.95, + region_type='ocr_roi' + ) + try: + region.bubble_type = 'free_text' if roi.get('type') == 'free_text' else 'text_bubble' + region.should_inpaint = True + except Exception: + pass + return region + except Exception: + return None + + # Combine cached and new results + results.extend(cached_regions) + + if work_rois: + max_workers = max(1, min(max_workers, len(work_rois))) + with ThreadPoolExecutor(max_workers=max_workers) as ex: + fut_map = {ex.submit(ocr_one, r): r for r in work_rois} + for fut in as_completed(fut_map): + reg = fut.result() + if reg is not None: + results.append(reg) + return results + + def _detect_text_azure(self, image_data: bytes, ocr_settings: dict) -> List[TextRegion]: + """Detect text using Azure Computer Vision""" + import io + from azure.cognitiveservices.vision.computervision.models import OperationStatusCodes + + stream = io.BytesIO(image_data) + + # Use Read API for better manga text detection + read_result = self.vision_client.read_in_stream( + stream, + raw=True, + language='ja' # or from ocr_settings + ) + + # Get operation ID from headers + operation_location = read_result.headers["Operation-Location"] + operation_id = operation_location.split("/")[-1] + + # Wait for completion + import time + while True: + result = self.vision_client.get_read_result(operation_id) + if result.status not in [OperationStatusCodes.running, OperationStatusCodes.not_started]: + break + time.sleep(0.1) # Brief pause for stability + logger.debug("💤 Azure text detection pausing briefly for stability") + + regions = [] + confidence_threshold = ocr_settings.get('confidence_threshold', 0.6) + + if result.status == OperationStatusCodes.succeeded: + for page in result.analyze_result.read_results: + for line in page.lines: + # Azure returns bounding box as 8 coordinates + bbox = line.bounding_box + vertices = [ + (bbox[0], bbox[1]), + (bbox[2], bbox[3]), + (bbox[4], bbox[5]), + (bbox[6], bbox[7]) + ] + + xs = [v[0] for v in vertices] + ys = [v[1] for v in vertices] + x_min, x_max = min(xs), max(xs) + y_min, y_max = min(ys), max(ys) + + # Azure doesn't provide per-line confidence in Read API + confidence = 0.95 # Default high confidence + + if confidence >= confidence_threshold: + region = TextRegion( + text=line.text, + vertices=vertices, + bounding_box=(x_min, y_min, x_max - x_min, y_max - y_min), + confidence=confidence, + region_type='text_line' + ) + regions.append(region) + + return regions + + def _load_image_with_compression_only(self, image_path: str, comp: Dict) -> bytes: + """Load image and apply compression settings only (no enhancements/resizing).""" + from io import BytesIO + pil = Image.open(image_path) + if pil.mode != 'RGB': + pil = pil.convert('RGB') + buf = BytesIO() + try: + fmt = str(comp.get('format', 'jpeg')).lower() + if fmt in ('jpeg', 'jpg'): + q = max(1, min(95, int(comp.get('jpeg_quality', 85)))) + pil.save(buf, format='JPEG', quality=q, optimize=True, progressive=True) + elif fmt == 'png': + lvl = max(0, min(9, int(comp.get('png_compress_level', 6)))) + pil.save(buf, format='PNG', optimize=True, compress_level=lvl) + elif fmt == 'webp': + wq = max(1, min(100, int(comp.get('webp_quality', 85)))) + pil.save(buf, format='WEBP', quality=wq) + else: + pil.save(buf, format='PNG', optimize=True) + except Exception: + pil.save(buf, format='PNG', optimize=True) + return buf.getvalue() + + def _preprocess_image(self, image_path: str, preprocessing_settings: Dict) -> bytes: + """Preprocess image for better OCR results + - Enhancements/resizing controlled by preprocessing_settings + - Compression controlled by manga_settings['compression'] independently + """ + try: + # Open image with PIL + pil_image = Image.open(image_path) + + # Convert to RGB if necessary + if pil_image.mode != 'RGB': + pil_image = pil_image.convert('RGB') + + # Auto-detect quality issues if enabled + if preprocessing_settings.get('auto_detect_quality', True): + needs_enhancement = self._detect_quality_issues(pil_image, preprocessing_settings) + if needs_enhancement: + self._log(" Auto-detected quality issues - applying enhancements") + else: + needs_enhancement = True + + if needs_enhancement: + # Apply contrast enhancement + contrast_threshold = preprocessing_settings.get('contrast_threshold', 0.4) + enhancer = ImageEnhance.Contrast(pil_image) + pil_image = enhancer.enhance(1 + contrast_threshold) + + # Apply sharpness enhancement + sharpness_threshold = preprocessing_settings.get('sharpness_threshold', 0.3) + enhancer = ImageEnhance.Sharpness(pil_image) + pil_image = enhancer.enhance(1 + sharpness_threshold) + + # Apply general enhancement strength + enhancement_strength = preprocessing_settings.get('enhancement_strength', 1.5) + if enhancement_strength != 1.0: + # Brightness adjustment + enhancer = ImageEnhance.Brightness(pil_image) + pil_image = enhancer.enhance(enhancement_strength) + + # Resize if too large + max_dimension = preprocessing_settings.get('max_image_dimension', 2000) + if pil_image.width > max_dimension or pil_image.height > max_dimension: + ratio = min(max_dimension / pil_image.width, max_dimension / pil_image.height) + new_size = (int(pil_image.width * ratio), int(pil_image.height * ratio)) + pil_image = pil_image.resize(new_size, Image.Resampling.LANCZOS) + self._log(f" Resized image to {new_size[0]}x{new_size[1]}") + + # Convert back to bytes with compression settings from global config + from io import BytesIO + buffered = BytesIO() + comp = {} + try: + comp = (self.main_gui.config.get('manga_settings', {}) or {}).get('compression', {}) + except Exception: + comp = {} + try: + if comp.get('enabled', False): + fmt = str(comp.get('format', 'jpeg')).lower() + if fmt in ('jpeg', 'jpg'): + if pil_image.mode != 'RGB': + pil_image = pil_image.convert('RGB') + quality = max(1, min(95, int(comp.get('jpeg_quality', 85)))) + pil_image.save(buffered, format='JPEG', quality=quality, optimize=True, progressive=True) + self._log(f" Compressed image as JPEG (q={quality})") + elif fmt == 'png': + level = max(0, min(9, int(comp.get('png_compress_level', 6)))) + pil_image.save(buffered, format='PNG', optimize=True, compress_level=level) + self._log(f" Compressed image as PNG (level={level})") + elif fmt == 'webp': + q = max(1, min(100, int(comp.get('webp_quality', 85)))) + pil_image.save(buffered, format='WEBP', quality=q) + self._log(f" Compressed image as WEBP (q={q})") + else: + pil_image.save(buffered, format='PNG', optimize=True) + self._log(" Unknown compression format; saved as optimized PNG") + else: + pil_image.save(buffered, format='PNG', optimize=True) + except Exception as _e: + self._log(f" ⚠️ Compression failed ({_e}); saved as optimized PNG", "warning") + pil_image.save(buffered, format='PNG', optimize=True) + return buffered.getvalue() + + except Exception as e: + self._log(f"⚠️ Preprocessing failed: {str(e)}, using original image", "warning") + with open(image_path, 'rb') as f: + return f.read() + + def _detect_quality_issues(self, image: Image.Image, settings: Dict) -> bool: + """Auto-detect if image needs quality enhancement""" + # Convert to grayscale for analysis + gray = image.convert('L') + + # Get histogram + hist = gray.histogram() + + # Calculate contrast (simplified) + pixels = sum(hist) + mean = sum(i * hist[i] for i in range(256)) / pixels + variance = sum(hist[i] * (i - mean) ** 2 for i in range(256)) / pixels + std_dev = variance ** 0.5 + + # Low contrast if std deviation is low + contrast_threshold = settings.get('contrast_threshold', 0.4) * 100 + if std_dev < contrast_threshold: + self._log(" Low contrast detected") + return True + + # Check for blur using Laplacian variance + import numpy as np + gray_array = np.array(gray) + laplacian = cv2.Laplacian(gray_array, cv2.CV_64F) + variance = laplacian.var() + + sharpness_threshold = settings.get('sharpness_threshold', 0.3) * 100 + if variance < sharpness_threshold: + self._log(" Blur detected") + return True + + return False + + def _save_debug_image(self, image_path: str, regions: List[TextRegion], debug_base_dir: str = None): + """Save debug image with detected regions highlighted, respecting save_intermediate toggle. + All files are written under <translated_images>/debug (or provided debug_base_dir).""" + advanced_settings = self.manga_settings.get('advanced', {}) + # Skip debug images in batch mode unless explicitly requested + if self.batch_mode and not advanced_settings.get('force_debug_batch', False): + return + # Respect the 'Save intermediate images' toggle only + if not advanced_settings.get('save_intermediate', False): + return + # Compute debug directory under translated_images + if debug_base_dir is None: + translated_dir = os.path.join(os.path.dirname(image_path), 'translated_images') + debug_dir = os.path.join(translated_dir, 'debug') + else: + debug_dir = os.path.join(debug_base_dir, 'debug') + os.makedirs(debug_dir, exist_ok=True) + base_name = os.path.splitext(os.path.basename(image_path))[0] + + try: + import cv2 + import numpy as np + from PIL import Image as PILImage + + # Handle Unicode paths + try: + img = cv2.imread(image_path) + if img is None: + # Fallback to PIL for Unicode paths + pil_image = PILImage.open(image_path) + img = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR) + except Exception as e: + self._log(f" Failed to load image for debug: {str(e)}", "warning") + return + + # Debug directory prepared earlier; compute base name + # base_name already computed above + + # Draw rectangles around detected text regions + overlay = img.copy() + + # Calculate statistics + total_chars = sum(len(r.text) for r in regions) + avg_confidence = np.mean([r.confidence for r in regions]) if regions else 0 + + for i, region in enumerate(regions): + # Convert to int to avoid OpenCV type errors + x, y, w, h = map(int, region.bounding_box) + + # Color based on confidence + if region.confidence > 0.95: + color = (0, 255, 0) # Green - high confidence + elif region.confidence > 0.8: + color = (0, 165, 255) # Orange - medium confidence + else: + color = (0, 0, 255) # Red - low confidence + + # Draw rectangle + cv2.rectangle(overlay, (x, y), (x + w, y + h), color, 2) + + # Add region info + info_text = f"#{i} ({region.confidence:.2f})" + cv2.putText(overlay, info_text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, + 0.5, color, 1, cv2.LINE_AA) + + # Add character count + char_count = len(region.text.strip()) + cv2.putText(overlay, f"{char_count} chars", (x, y + h + 15), + cv2.FONT_HERSHEY_SIMPLEX, 0.4, color, 1, cv2.LINE_AA) + + # Add detected text preview if in verbose debug mode + if self.manga_settings.get('advanced', {}).get('save_intermediate', False): + text_preview = region.text[:20] + "..." if len(region.text) > 20 else region.text + cv2.putText(overlay, text_preview, (x, y + h + 30), cv2.FONT_HERSHEY_SIMPLEX, + 0.4, color, 1, cv2.LINE_AA) + + # Add overall statistics to the image + stats_bg = overlay.copy() + cv2.rectangle(stats_bg, (10, 10), (300, 90), (0, 0, 0), -1) + cv2.addWeighted(stats_bg, 0.7, overlay, 0.3, 0, overlay) + + stats_text = [ + f"Regions: {len(regions)}", + f"Total chars: {total_chars}", + f"Avg confidence: {avg_confidence:.2f}" + ] + + for i, text in enumerate(stats_text): + cv2.putText(overlay, text, (20, 35 + i*20), cv2.FONT_HERSHEY_SIMPLEX, + 0.5, (255, 255, 255), 1, cv2.LINE_AA) + + # Save main debug image (always under translated_images/debug when enabled) + debug_path = os.path.join(debug_dir, f"{base_name}_debug_regions.png") + cv2.imwrite(debug_path, overlay) + self._log(f" 📸 Saved debug image: {debug_path}") + + # Save text mask + mask = self.create_text_mask(img, regions) + mask_debug_path = debug_path.replace('_debug', '_mask') + cv2.imwrite(mask_debug_path, mask) + mask_percentage = ((mask > 0).sum() / mask.size) * 100 + self._log(f" 🎭 Saved mask image: {mask_debug_path}", "info") + self._log(f" 📊 Mask coverage: {mask_percentage:.1f}% of image", "info") + + # If save_intermediate is enabled, save additional debug images + if self.manga_settings.get('advanced', {}).get('save_intermediate', False): + # Save confidence heatmap + heatmap = self._create_confidence_heatmap(img, regions) + heatmap_path = os.path.join(debug_dir, f"{base_name}_confidence_heatmap.png") + cv2.imwrite(heatmap_path, heatmap) + self._log(f" 🌡️ Saved confidence heatmap: {heatmap_path}") + + # Save polygon visualization with safe text areas + if any(hasattr(r, 'vertices') and r.vertices for r in regions): + polygon_img = img.copy() + for region in regions: + if hasattr(region, 'vertices') and region.vertices: + # Draw polygon + pts = np.array(region.vertices, np.int32) + pts = pts.reshape((-1, 1, 2)) + + # Fill with transparency + overlay_poly = polygon_img.copy() + cv2.fillPoly(overlay_poly, [pts], (0, 255, 255)) + cv2.addWeighted(overlay_poly, 0.2, polygon_img, 0.8, 0, polygon_img) + + # Draw outline + cv2.polylines(polygon_img, [pts], True, (255, 0, 0), 2) + + # Draw safe text area + try: + safe_x, safe_y, safe_w, safe_h = self.get_safe_text_area(region) + # Convert to int for OpenCV + safe_x, safe_y, safe_w, safe_h = map(int, (safe_x, safe_y, safe_w, safe_h)) + cv2.rectangle(polygon_img, (safe_x, safe_y), + (safe_x + safe_w, safe_y + safe_h), + (0, 255, 0), 1) + except: + pass # Skip if get_safe_text_area fails + + # Add legend to explain colors + legend_bg = polygon_img.copy() + legend_height = 140 + legend_width = 370 + cv2.rectangle(legend_bg, (10, 10), (10 + legend_width, 10 + legend_height), (0, 0, 0), -1) + cv2.addWeighted(legend_bg, 0.8, polygon_img, 0.2, 0, polygon_img) + + # Add legend items + # Note: OpenCV uses BGR format, so (255, 0, 0) = Blue, (0, 0, 255) = Red + legend_items = [ + ("Blue outline: OCR polygon (detected text)", (255, 0, 0)), + ("Yellow fill: Mask area (will be inpainted)", (0, 255, 255)), + ("Green rect: Safe text area (algorithm-based)", (0, 255, 0)), + ("Magenta rect: Mask bounds (actual render area)", (255, 0, 255)) + ] + + for i, (text, color) in enumerate(legend_items): + y_pos = 30 + i * 30 + # Draw color sample + if i == 1: # Yellow fill + cv2.rectangle(polygon_img, (20, y_pos - 8), (35, y_pos + 8), color, -1) + else: + cv2.rectangle(polygon_img, (20, y_pos - 8), (35, y_pos + 8), color, 2) + # Draw text + cv2.putText(polygon_img, text, (45, y_pos + 5), cv2.FONT_HERSHEY_SIMPLEX, + 0.45, (255, 255, 255), 1, cv2.LINE_AA) + + polygon_path = os.path.join(debug_dir, f"{base_name}_polygons.png") + cv2.imwrite(polygon_path, polygon_img) + self._log(f" 🔷 Saved polygon visualization: {polygon_path}") + + # Save individual region crops with more info + regions_dir = os.path.join(debug_dir, 'regions') + os.makedirs(regions_dir, exist_ok=True) + + for i, region in enumerate(regions[:10]): # Limit to first 10 regions + # Convert to int to avoid OpenCV type errors + x, y, w, h = map(int, region.bounding_box) + # Add padding + pad = 10 + x1 = max(0, x - pad) + y1 = max(0, y - pad) + x2 = min(img.shape[1], x + w + pad) + y2 = min(img.shape[0], y + h + pad) + + region_crop = img[y1:y2, x1:x2].copy() + + # Draw bounding box on crop + cv2.rectangle(region_crop, (pad, pad), + (pad + w, pad + h), (0, 255, 0), 2) + + # Add text info on the crop + info = f"Conf: {region.confidence:.2f} | Chars: {len(region.text)}" + cv2.putText(region_crop, info, (5, 15), cv2.FONT_HERSHEY_SIMPLEX, + 0.4, (255, 255, 255), 1, cv2.LINE_AA) + + # Save with meaningful filename + safe_text = region.text[:20].replace('/', '_').replace('\\', '_').strip() + region_path = os.path.join(regions_dir, f"region_{i:03d}_{safe_text}.png") + cv2.imwrite(region_path, region_crop) + + self._log(f" 📁 Saved individual region crops to: {regions_dir}") + + except Exception as e: + self._log(f" ❌ Failed to save debug image: {str(e)}", "warning") + if self.manga_settings.get('advanced', {}).get('debug_mode', False): + # If debug mode is on, log the full traceback + import traceback + self._log(traceback.format_exc(), "warning") + + def _create_confidence_heatmap(self, img, regions): + """Create a heatmap showing OCR confidence levels""" + heatmap = np.zeros_like(img[:, :, 0], dtype=np.float32) + + for region in regions: + # Convert to int for array indexing + x, y, w, h = map(int, region.bounding_box) + confidence = region.confidence + heatmap[y:y+h, x:x+w] = confidence + + # Convert to color heatmap + heatmap_normalized = (heatmap * 255).astype(np.uint8) + heatmap_colored = cv2.applyColorMap(heatmap_normalized, cv2.COLORMAP_JET) + + # Blend with original image + result = cv2.addWeighted(img, 0.7, heatmap_colored, 0.3, 0) + return result + + def _get_translation_history_context(self) -> List[Dict[str, str]]: + """Get translation history context from HistoryManager""" + if not self.history_manager or not self.contextual_enabled: + return [] + + try: + # Load full history + full_history = self.history_manager.load_history() + + if not full_history: + return [] + + # Extract only the contextual messages up to the limit + context = [] + exchange_count = 0 + + # Process history in pairs (user + assistant messages) + for i in range(0, len(full_history), 2): + if i + 1 < len(full_history): + user_msg = full_history[i] + assistant_msg = full_history[i + 1] + + if user_msg.get("role") == "user" and assistant_msg.get("role") == "assistant": + context.extend([user_msg, assistant_msg]) + exchange_count += 1 + + # Only keep up to the history limit + if exchange_count >= self.translation_history_limit: + # Get only the most recent exchanges + context = context[-(self.translation_history_limit * 2):] + break + + return context + + except Exception as e: + self._log(f"⚠️ Error loading history context: {str(e)}", "warning") + return [] + + def translate_text(self, text: str, context: Optional[List[Dict]] = None, image_path: str = None, region: TextRegion = None) -> str: + """Translate text using API with GUI system prompt and full image context""" + try: + # Build per-request log prefix for clearer parallel logs + try: + import threading + thread_name = threading.current_thread().name + except Exception: + thread_name = "MainThread" + bbox_info = "" + try: + if region and hasattr(region, 'bounding_box') and region.bounding_box: + x, y, w, h = region.bounding_box + bbox_info = f" [bbox={x},{y},{w}x{h}]" + except Exception: + pass + prefix = f"[{thread_name}]{bbox_info}" + + self._log(f"\n{prefix} 🌐 Starting translation for text: '{text[:50]}...'") + # CHECK 1: Before starting + if self._check_stop(): + self._log("⏹️ Translation stopped before full page context processing", "warning") + return {} + + # Get system prompt from GUI profile + profile_name = self.main_gui.profile_var.get() + + # Get the prompt from prompt_profiles dictionary + system_prompt = '' + if hasattr(self.main_gui, 'prompt_profiles') and profile_name in self.main_gui.prompt_profiles: + system_prompt = self.main_gui.prompt_profiles[profile_name] + self._log(f"📋 Using profile: {profile_name}") + else: + self._log(f"⚠️ Profile '{profile_name}' not found in prompt_profiles", "warning") + + self._log(f"{prefix} 📝 System prompt: {system_prompt[:100]}..." if system_prompt else f"{prefix} 📝 No system prompt configured") + + if system_prompt: + messages = [{"role": "system", "content": system_prompt}] + else: + messages = [] + + + # Add contextual translations if enabled + if self.contextual_enabled and self.history_manager: + # Get history from HistoryManager + history_context = self._get_translation_history_context() + + if history_context: + context_count = len(history_context) // 2 # Each exchange is 2 messages + self._log(f"🔗 Adding {context_count} previous exchanges from history (limit: {self.translation_history_limit})") + messages.extend(history_context) + else: + self._log(f"🔗 Contextual enabled but no history available yet") + else: + self._log(f"{prefix} 🔗 Contextual: {'Disabled' if not self.contextual_enabled else 'No HistoryManager'}") + + # Add full image context if available AND visual context is enabled + if image_path and self.visual_context_enabled: + try: + import base64 + from PIL import Image as PILImage + + self._log(f"{prefix} 📷 Adding full page visual context for translation") + + # Read and encode the full image + with open(image_path, 'rb') as img_file: + img_data = img_file.read() + + # Check image size + img_size_mb = len(img_data) / (1024 * 1024) + self._log(f"{prefix} 📊 Image size: {img_size_mb:.2f} MB") + + # Optionally resize if too large (Gemini has limits) + if img_size_mb > 10: # If larger than 10MB + self._log(f"📉 Resizing large image for API limits...") + pil_image = PILImage.open(image_path) + + # Calculate new size (max 2048px on longest side) + max_size = 2048 + ratio = min(max_size / pil_image.width, max_size / pil_image.height) + if ratio < 1: + new_size = (int(pil_image.width * ratio), int(pil_image.height * ratio)) + pil_image = pil_image.resize(new_size, PILImage.Resampling.LANCZOS) + + # Re-encode + from io import BytesIO + buffered = BytesIO() + pil_image.save(buffered, format="PNG", optimize=True) + img_data = buffered.getvalue() + self._log(f"{prefix} ✅ Resized to {new_size[0]}x{new_size[1]}px ({len(img_data)/(1024*1024):.2f} MB)") + + # Encode to base64 + img_base64 = base64.b64encode(img_data).decode('utf-8') + + # Build the message with image and text location info + location_description = "" + if region: + x, y, w, h = region.bounding_box + # Describe where on the page this text is located + page_width = PILImage.open(image_path).width + page_height = PILImage.open(image_path).height + + # Determine position + h_pos = "left" if x < page_width/3 else "center" if x < 2*page_width/3 else "right" + v_pos = "top" if y < page_height/3 else "middle" if y < 2*page_height/3 else "bottom" + + location_description = f"\n\nThe text to translate is located in the {v_pos}-{h_pos} area of the page, " + location_description += f"at coordinates ({x}, {y}) with size {w}x{h} pixels." + + # Add image and text to translate + messages.append({ + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": { + "url": f"data:image/png;base64,{img_base64}" + } + }, + { + "type": "text", + "text": f"Looking at this full manga page, translate the following text: '{text}'{location_description}" + } + ] + }) + + self._log(f"{prefix} ✅ Added full page image as visual context") + + except Exception as e: + self._log(f"⚠️ Failed to add image context: {str(e)}", "warning") + self._log(f" Error type: {type(e).__name__}", "warning") + import traceback + self._log(traceback.format_exc(), "warning") + # Fall back to text-only translation + messages.append({"role": "user", "content": text}) + elif image_path and not self.visual_context_enabled: + # Visual context disabled - text-only mode + self._log(f"{prefix} 📝 Text-only mode (visual context disabled)") + messages.append({"role": "user", "content": text}) + else: + # No image path provided - text-only translation + messages.append({"role": "user", "content": text}) + + # Check input token limit + text_tokens = 0 + image_tokens = 0 + + for msg in messages: + if isinstance(msg.get("content"), str): + # Simple text message + text_tokens += len(msg["content"]) // 4 + elif isinstance(msg.get("content"), list): + # Message with mixed content (text + image) + for content_part in msg["content"]: + if content_part.get("type") == "text": + text_tokens += len(content_part.get("text", "")) // 4 + elif content_part.get("type") == "image_url": + # Only count image tokens if visual context is enabled + if self.visual_context_enabled: + image_tokens += 258 + + estimated_tokens = text_tokens + image_tokens + + # Check token limit only if it's enabled + if self.input_token_limit is None: + self._log(f"{prefix} 📊 Token estimate - Text: {text_tokens}, Images: {image_tokens} (Total: {estimated_tokens} / unlimited)") + else: + self._log(f"{prefix} 📊 Token estimate - Text: {text_tokens}, Images: {image_tokens} (Total: {estimated_tokens} / {self.input_token_limit})") + + if estimated_tokens > self.input_token_limit: + self._log(f"⚠️ Token limit exceeded, trimming context", "warning") + # Keep system prompt, image, and current text only + if image_path: + messages = [messages[0], messages[-1]] + else: + messages = [messages[0], {"role": "user", "content": text}] + # Recalculate tokens after trimming + text_tokens = len(messages[0]["content"]) // 4 + if isinstance(messages[-1].get("content"), str): + text_tokens += len(messages[-1]["content"]) // 4 + else: + text_tokens += len(messages[-1]["content"][0]["text"]) // 4 + estimated_tokens = text_tokens + image_tokens + self._log(f"📊 Trimmed token estimate: {estimated_tokens}") + + start_time = time.time() + api_time = 0 # Initialize to avoid NameError + + try: + response = send_with_interrupt( + messages=messages, + client=self.client, + temperature=self.temperature, + max_tokens=self.max_tokens, + stop_check_fn=self._check_stop + + ) + api_time = time.time() - start_time + self._log(f"{prefix} ✅ API responded in {api_time:.2f} seconds") + + # Normalize response to plain text (handle tuples and bytes) + if hasattr(response, 'content'): + response_text = response.content + else: + response_text = response + + # Handle tuple response like (text, 'stop') from some clients + if isinstance(response_text, tuple): + response_text = response_text[0] + + # Decode bytes/bytearray + if isinstance(response_text, (bytes, bytearray)): + try: + response_text = response_text.decode('utf-8', errors='replace') + except Exception: + response_text = str(response_text) + + # Ensure string + if not isinstance(response_text, str): + response_text = str(response_text) + + response_text = response_text.strip() + + # If it's a stringified tuple like "('text', 'stop')", extract the first element + if response_text.startswith("('") or response_text.startswith('("'): + import ast, re + try: + parsed_tuple = ast.literal_eval(response_text) + if isinstance(parsed_tuple, tuple) and parsed_tuple: + response_text = str(parsed_tuple[0]) + self._log("📦 Extracted response from tuple literal", "debug") + except Exception: + match = re.match(r"^\('(.+?)',\s*'.*'\)$", response_text, re.DOTALL) + if match: + tmp = match.group(1) + tmp = tmp.replace('\\n', '\n').replace("\\'", "'").replace('\\\"', '"').replace('\\\\', '\\') + response_text = tmp + self._log("📦 Extracted response using regex from tuple literal", "debug") + + self._log(f"{prefix} 📥 Received response ({len(response_text)} chars)") + + except Exception as api_error: + api_time = time.time() - start_time + error_str = str(api_error).lower() + error_type = type(api_error).__name__ + + # Check for specific error types + if "429" in error_str or "rate limit" in error_str: + self._log(f"⚠️ RATE LIMIT ERROR (429) after {api_time:.2f}s", "error") + self._log(f" The API rate limit has been exceeded", "error") + self._log(f" Please wait before retrying or reduce request frequency", "error") + self._log(f" Error details: {str(api_error)}", "error") + raise Exception(f"Rate limit exceeded (429): {str(api_error)}") + + elif "401" in error_str or "unauthorized" in error_str: + self._log(f"❌ AUTHENTICATION ERROR (401) after {api_time:.2f}s", "error") + self._log(f" Invalid API key or authentication failed", "error") + self._log(f" Please check your API key in settings", "error") + self._log(f" Error details: {str(api_error)}", "error") + raise Exception(f"Authentication failed (401): {str(api_error)}") + + elif "403" in error_str or "forbidden" in error_str: + self._log(f"❌ FORBIDDEN ERROR (403) after {api_time:.2f}s", "error") + self._log(f" Access denied - check API permissions", "error") + self._log(f" Error details: {str(api_error)}", "error") + raise Exception(f"Access forbidden (403): {str(api_error)}") + + elif "400" in error_str or "bad request" in error_str: + self._log(f"❌ BAD REQUEST ERROR (400) after {api_time:.2f}s", "error") + self._log(f" Invalid request format or parameters", "error") + self._log(f" Error details: {str(api_error)}", "error") + raise Exception(f"Bad request (400): {str(api_error)}") + + elif "timeout" in error_str: + self._log(f"⏱️ TIMEOUT ERROR after {api_time:.2f}s", "error") + self._log(f" API request timed out", "error") + self._log(f" Consider increasing timeout or retry", "error") + self._log(f" Error details: {str(api_error)}", "error") + raise Exception(f"Request timeout: {str(api_error)}") + + else: + # Generic API error + self._log(f"❌ API ERROR ({error_type}) after {api_time:.2f}s", "error") + self._log(f" Error details: {str(api_error)}", "error") + self._log(f" Full traceback:", "error") + self._log(traceback.format_exc(), "error") + raise + + + + # Initialize translated with extracted response text to avoid UnboundLocalError + if response_text is None: + translated = "" + elif isinstance(response_text, str): + translated = response_text + elif isinstance(response_text, (bytes, bytearray)): + try: + translated = response_text.decode('utf-8', errors='replace') + except Exception: + translated = str(response_text) + else: + translated = str(response_text) + + # ADD THIS DEBUG CODE: + self._log(f"🔍 RAW API RESPONSE DEBUG:", "debug") + self._log(f" Type: {type(translated)}", "debug") + #self._log(f" Raw content length: {len(translated)}", "debug") + #self._log(f" First 200 chars: {translated[:200]}", "debug") + #self._log(f" Last 200 chars: {translated[-200:]}", "debug") + + # Check if both Japanese and English are present + has_japanese = any('\u3040' <= c <= '\u9fff' or '\uac00' <= c <= '\ud7af' for c in translated) + has_english = any('a' <= c.lower() <= 'z' for c in translated) + + if has_japanese and has_english: + self._log(f" ⚠️ WARNING: Response contains BOTH Japanese AND English!", "warning") + self._log(f" This might be causing the duplicate text issue", "warning") + + # Check if response looks like JSON (contains both { and } and : characters) + if '{' in translated and '}' in translated and ':' in translated: + try: + # It might be JSON, try to fix and parse it + fixed_json = self._fix_json_response(translated) + import json + parsed = json.loads(fixed_json) + + # If it's a dict with a single translation, extract it + if isinstance(parsed, dict) and len(parsed) == 1: + translated = list(parsed.values())[0] + translated = self._clean_translation_text(translated) + self._log("📦 Extracted translation from JSON response", "debug") + except: + # Not JSON or failed to parse, use as-is + pass + + self._log(f"{prefix} 🔍 Raw response type: {type(translated)}") + self._log(f"{prefix} 🔍 Raw response content: '{translated[:5000]}...'") + + # Check if the response looks like a Python literal (tuple/string representation) + if translated.startswith("('") or translated.startswith('("') or translated.startswith("('''"): + self._log(f"⚠️ Detected Python literal in response, attempting to extract actual text", "warning") + original = translated + try: + # Try to evaluate it as a Python literal + import ast + evaluated = ast.literal_eval(translated) + self._log(f"📦 Evaluated type: {type(evaluated)}") + + if isinstance(evaluated, tuple): + # Take the first element of the tuple + translated = str(evaluated[0]) + self._log(f"📦 Extracted from tuple: '{translated[:50]}...'") + elif isinstance(evaluated, str): + translated = evaluated + self._log(f"📦 Extracted string: '{translated[:50]}...'") + else: + self._log(f"⚠️ Unexpected type after eval: {type(evaluated)}", "warning") + + except Exception as e: + self._log(f"⚠️ Failed to parse Python literal: {e}", "warning") + self._log(f"⚠️ Original content: {original[:200]}", "warning") + + # Try multiple levels of unescaping + temp = translated + for i in range(5): # Try up to 5 levels of unescaping + if temp.startswith("('") or temp.startswith('("'): + # Try regex as fallback + import re + match = re.search(r"^\(['\"](.+)['\"]\)$", temp, re.DOTALL) + if match: + temp = match.group(1) + self._log(f"📦 Regex extracted (level {i+1}): '{temp[:50]}...'") + else: + break + else: + break + translated = temp + + # Additional check for escaped content + #if '\\\\' in translated or '\\n' in translated or "\\'" in translated or '\\"' in translated: + # self._log(f"⚠️ Detected escaped content, unescaping...", "warning") + # try: + # before = translated + # + # # Handle quotes and apostrophes + # translated = translated.replace("\\'", "'") + # translated = translated.replace('\\"', '"') + # translated = translated.replace("\\`", "`") + + # DON'T UNESCAPE NEWLINES BEFORE JSON PARSING! + # translated = translated.replace('\\n', '\n') # COMMENT THIS OUT + + # translated = translated.replace('\\\\', '\\') + # translated = translated.replace('\\/', '/') + # translated = translated.replace('\\t', '\t') # COMMENT THIS OUT TOO + # translated = translated.replace('\\r', '\r') # AND THIS + + # self._log(f"📦 Unescaped safely: '{before[:50]}...' -> '{translated[:50]}...'") + # except Exception as e: + # self._log(f"⚠️ Failed to unescape: {e}", "warning") + + # Clean up unwanted trailing apostrophes/quotes + import re + response_text = translated + response_text = re.sub(r"['''\"`]$", "", response_text.strip()) # Remove trailing + response_text = re.sub(r"^['''\"`]", "", response_text.strip()) # Remove leading + response_text = re.sub(r"\s+['''\"`]\s+", " ", response_text) # Remove isolated + translated = response_text + translated = self._clean_translation_text(translated) + + # Apply glossary if available + if hasattr(self.main_gui, 'manual_glossary') and self.main_gui.manual_glossary: + glossary_count = len(self.main_gui.manual_glossary) + self._log(f"📚 Applying glossary with {glossary_count} entries") + + replacements = 0 + for entry in self.main_gui.manual_glossary: + if 'source' in entry and 'target' in entry: + if entry['source'] in translated: + translated = translated.replace(entry['source'], entry['target']) + replacements += 1 + + if replacements > 0: + self._log(f" ✏️ Made {replacements} glossary replacements") + + translated = self._clean_translation_text(translated) + + # Store in history if HistoryManager is available + if self.history_manager and self.contextual_enabled: + try: + # Append to history with proper limit handling + self.history_manager.append_to_history( + user_content=text, + assistant_content=translated, + hist_limit=self.translation_history_limit, + reset_on_limit=not self.rolling_history_enabled, + rolling_window=self.rolling_history_enabled + ) + + # Check if we're about to hit the limit + if self.history_manager.will_reset_on_next_append( + self.translation_history_limit, + self.rolling_history_enabled + ): + mode = "roll over" if self.rolling_history_enabled else "reset" + self._log(f"📚 History will {mode} on next translation (at limit: {self.translation_history_limit})") + + except Exception as e: + self._log(f"⚠️ Failed to save to history: {str(e)}", "warning") + + # Also store in legacy context for compatibility + self.translation_context.append({ + "original": text, + "translated": translated + }) + + return translated + + except Exception as e: + self._log(f"❌ Translation error: {str(e)}", "error") + self._log(f" Error type: {type(e).__name__}", "error") + import traceback + self._log(f" Traceback: {traceback.format_exc()}", "error") + return text + + def translate_full_page_context(self, regions: List[TextRegion], image_path: str, _in_fallback=False) -> Dict[str, str]: + """Translate all text regions with full page context in a single request + + Args: + regions: List of text regions to translate + image_path: Path to the manga page image + _in_fallback: Internal flag to prevent infinite recursion during fallback attempts + """ + try: + import time + import traceback + import json + + # Initialize response_text at the start + response_text = "" + + self._log(f"\n📄 Full page context translation of {len(regions)} text regions") + + # Get system prompt from GUI profile + profile_name = self.main_gui.profile_var.get() + + # Ensure visual_context_enabled exists (temporary fix) + if not hasattr(self, 'visual_context_enabled'): + self.visual_context_enabled = self.main_gui.config.get('manga_visual_context_enabled', True) + + # Try to get the prompt from prompt_profiles dictionary (for all profiles including custom ones) + system_prompt = '' + if hasattr(self.main_gui, 'prompt_profiles') and profile_name in self.main_gui.prompt_profiles: + system_prompt = self.main_gui.prompt_profiles[profile_name] + self._log(f"📋 Using profile: {profile_name}") + else: + # Fallback to check if it's stored as a direct attribute (legacy support) + system_prompt = getattr(self.main_gui, profile_name.replace(' ', '_'), '') + if system_prompt: + self._log(f"📋 Using profile (legacy): {profile_name}") + else: + self._log(f"⚠️ Profile '{profile_name}' not found, using empty prompt", "warning") + + # Combine with full page context instructions + if system_prompt: + system_prompt = f"{system_prompt}\n\n{self.full_page_context_prompt}" + else: + system_prompt = self.full_page_context_prompt + + messages = [{"role": "system", "content": system_prompt}] + + # CHECK 2: Before adding context + if self._check_stop(): + self._log("⏹️ Translation stopped during context preparation", "warning") + return {} + + # Add contextual translations if enabled + if self.contextual_enabled and self.history_manager: + history_context = self._get_translation_history_context() + if history_context: + context_count = len(history_context) // 2 + self._log(f"🔗 Adding {context_count} previous exchanges from history") + messages.extend(history_context) + + # Prepare text segments with indices + all_texts = {} + text_list = [] + for i, region in enumerate(regions): + # Use index-based key to handle duplicate texts + key = f"[{i}] {region.text}" + all_texts[key] = region.text + text_list.append(f"{key}") + + # CHECK 3: Before image processing + if self._check_stop(): + self._log("⏹️ Translation stopped before image processing", "warning") + return {} + + # Create the full context message text + context_text = "\n".join(text_list) + + # Log text content info + total_chars = sum(len(region.text) for region in regions) + self._log(f"📝 Text content: {len(regions)} regions, {total_chars} total characters") + + # Process image if visual context is enabled + if self.visual_context_enabled: + try: + import base64 + from PIL import Image as PILImage + + self._log(f"📷 Adding full page visual context for translation") + + # Read and encode the image + with open(image_path, 'rb') as img_file: + img_data = img_file.read() + + # Check image size + img_size_mb = len(img_data) / (1024 * 1024) + self._log(f"📊 Image size: {img_size_mb:.2f} MB") + + # Get image dimensions + pil_image = PILImage.open(image_path) + self._log(f" Image dimensions: {pil_image.width}x{pil_image.height}") + + # CHECK 4: Before resizing (which can take time) + if self._check_stop(): + self._log("⏹️ Translation stopped during image preparation", "warning") + return {} + + # Resize if needed + if img_size_mb > 10: + self._log(f"📉 Resizing large image for API limits...") + max_size = 2048 + ratio = min(max_size / pil_image.width, max_size / pil_image.height) + if ratio < 1: + new_size = (int(pil_image.width * ratio), int(pil_image.height * ratio)) + pil_image = pil_image.resize(new_size, PILImage.Resampling.LANCZOS) + from io import BytesIO + buffered = BytesIO() + pil_image.save(buffered, format="PNG", optimize=True) + img_data = buffered.getvalue() + self._log(f"✅ Resized to {new_size[0]}x{new_size[1]}px ({len(img_data)/(1024*1024):.2f} MB)") + + # Convert to base64 + img_b64 = base64.b64encode(img_data).decode('utf-8') + + # Create message with both text and image + messages.append({ + "role": "user", + "content": [ + {"type": "text", "text": context_text}, + {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{img_b64}"}} + ] + }) + + self._log(f"✅ Added full page image as visual context") + + except Exception as e: + self._log(f"⚠️ Failed to add image context: {str(e)}", "warning") + self._log(f" Error type: {type(e).__name__}", "warning") + import traceback + self._log(traceback.format_exc(), "warning") + self._log(f" Falling back to text-only translation", "warning") + + # Fall back to text-only translation + messages.append({"role": "user", "content": context_text}) + else: + # Visual context disabled - send text only + self._log(f"📝 Text-only mode (visual context disabled for non-vision models)") + messages.append({"role": "user", "content": context_text}) + + # CHECK 5: Before API call + if self._check_stop(): + self._log("⏹️ Translation stopped before API call", "warning") + return {} + + # Store original model for fallback + original_model = self.client.model if hasattr(self.client, 'model') else None + + # Check input token limit + text_tokens = 0 + image_tokens = 0 + + for msg in messages: + if isinstance(msg.get("content"), str): + # Simple text message + text_tokens += len(msg["content"]) // 4 + elif isinstance(msg.get("content"), list): + # Message with mixed content (text + image) + for content_part in msg["content"]: + if content_part.get("type") == "text": + text_tokens += len(content_part.get("text", "")) // 4 + elif content_part.get("type") == "image_url": + # Only count image tokens if visual context is enabled + if self.visual_context_enabled: + image_tokens += 258 + + estimated_tokens = text_tokens + image_tokens + + # Check token limit only if it's enabled + if self.input_token_limit is None: + self._log(f"📊 Token estimate - Text: {text_tokens}, Images: {image_tokens} (Total: {estimated_tokens} / unlimited)") + else: + self._log(f"📊 Token estimate - Text: {text_tokens}, Images: {image_tokens} (Total: {estimated_tokens} / {self.input_token_limit})") + + if estimated_tokens > self.input_token_limit: + self._log(f"⚠️ Token limit exceeded, trimming context", "warning") + # Keep system prompt and current message only + messages = [messages[0], messages[-1]] + # Recalculate tokens + text_tokens = len(messages[0]["content"]) // 4 + if isinstance(messages[-1]["content"], str): + text_tokens += len(messages[-1]["content"]) // 4 + else: + for content_part in messages[-1]["content"]: + if content_part.get("type") == "text": + text_tokens += len(content_part.get("text", "")) // 4 + estimated_tokens = text_tokens + image_tokens + self._log(f"📊 Trimmed token estimate: {estimated_tokens}") + + # Make API call using the client's send method (matching translate_text) + self._log(f"🌐 Sending full page context to API...") + self._log(f" API Model: {self.client.model if hasattr(self.client, 'model') else 'unknown'}") + self._log(f" Temperature: {self.temperature}") + self._log(f" Max Output Tokens: {self.max_tokens}") + + start_time = time.time() + api_time = 0 # Initialize to avoid NameError + + try: + response = send_with_interrupt( + messages=messages, + client=self.client, + temperature=self.temperature, + max_tokens=self.max_tokens, + stop_check_fn=self._check_stop + ) + api_time = time.time() - start_time + + # Extract content from response + if hasattr(response, 'content'): + response_text = response.content + # Check if it's a tuple representation + if isinstance(response_text, tuple): + response_text = response_text[0] # Get first element of tuple + response_text = response_text.strip() + elif hasattr(response, 'text'): + # Gemini responses have .text attribute + response_text = response.text.strip() + elif hasattr(response, 'candidates') and response.candidates: + # Handle Gemini GenerateContentResponse structure + try: + response_text = response.candidates[0].content.parts[0].text.strip() + except (IndexError, AttributeError): + response_text = str(response).strip() + else: + # If response is a string or other format + response_text = str(response).strip() + + # Check if it's a stringified tuple + if response_text.startswith("('") or response_text.startswith('("'): + # It's a tuple converted to string, extract the JSON part + import ast + try: + parsed_tuple = ast.literal_eval(response_text) + if isinstance(parsed_tuple, tuple): + response_text = parsed_tuple[0] # Get first element + self._log("📦 Extracted response from tuple format", "debug") + except: + # If literal_eval fails, try regex + import re + match = re.match(r"^\('(.+)', '.*'\)$", response_text, re.DOTALL) + if match: + response_text = match.group(1) + # Unescape the string + response_text = response_text.replace('\\n', '\n') + response_text = response_text.replace("\\'", "'") + response_text = response_text.replace('\\"', '"') + response_text = response_text.replace('\\\\', '\\') + self._log("📦 Extracted response using regex from tuple string", "debug") + + # CHECK 6: Immediately after API response + if self._check_stop(): + self._log(f"⏹️ Translation stopped after API call ({api_time:.2f}s)", "warning") + return {} + + self._log(f"✅ API responded in {api_time:.2f} seconds") + self._log(f"📥 Received response ({len(response_text)} chars)") + + except Exception as api_error: + api_time = time.time() - start_time + + # CHECK 7: After API error + if self._check_stop(): + self._log(f"⏹️ Translation stopped during API error handling", "warning") + return {} + + error_str = str(api_error).lower() + error_type = type(api_error).__name__ + + # Check for specific error types + if "429" in error_str or "rate limit" in error_str: + self._log(f"⚠️ RATE LIMIT ERROR (429) after {api_time:.2f}s", "error") + self._log(f" The API rate limit has been exceeded", "error") + self._log(f" Please wait before retrying or reduce request frequency", "error") + self._log(f" Error details: {str(api_error)}", "error") + raise Exception(f"Rate limit exceeded (429): {str(api_error)}") + + elif "401" in error_str or "unauthorized" in error_str: + self._log(f"❌ AUTHENTICATION ERROR (401) after {api_time:.2f}s", "error") + self._log(f" Invalid API key or authentication failed", "error") + self._log(f" Please check your API key in settings", "error") + self._log(f" Error details: {str(api_error)}", "error") + raise Exception(f"Authentication failed (401): {str(api_error)}") + + elif "403" in error_str or "forbidden" in error_str: + self._log(f"❌ FORBIDDEN ERROR (403) after {api_time:.2f}s", "error") + self._log(f" Access denied - check API permissions", "error") + self._log(f" Error details: {str(api_error)}", "error") + raise Exception(f"Access forbidden (403): {str(api_error)}") + + elif "400" in error_str or "bad request" in error_str: + self._log(f"❌ BAD REQUEST ERROR (400) after {api_time:.2f}s", "error") + self._log(f" Invalid request format or parameters", "error") + self._log(f" Error details: {str(api_error)}", "error") + raise Exception(f"Bad request (400): {str(api_error)}") + + elif "timeout" in error_str: + self._log(f"⏱️ TIMEOUT ERROR after {api_time:.2f}s", "error") + self._log(f" API request timed out", "error") + self._log(f" Consider increasing timeout or retry", "error") + self._log(f" Error details: {str(api_error)}", "error") + raise Exception(f"Request timeout: {str(api_error)}") + + else: + # Generic API error + self._log(f"❌ API ERROR ({error_type}) after {api_time:.2f}s", "error") + self._log(f" Error details: {str(api_error)}", "error") + self._log(f" Full traceback:", "error") + self._log(traceback.format_exc(), "error") + raise + + # CHECK 8: Before parsing response + if self._check_stop(): + self._log("⏹️ Translation stopped before parsing response", "warning") + return {} + + # Check if we got a response + if not response_text: + self._log("❌ Empty response from API", "error") + return {} + + self._log(f"🔍 Raw response type: {type(response_text)}") + self._log(f"🔍 Raw response preview: '{response_text[:2000]}...'") + + # Clean up response_text (handle Python literals, escapes, etc.) + if response_text.startswith("('") or response_text.startswith('("') or response_text.startswith("('''"): + self._log(f"⚠️ Detected Python literal in response, attempting to extract actual text", "warning") + try: + import ast + evaluated = ast.literal_eval(response_text) + if isinstance(evaluated, tuple): + response_text = str(evaluated[0]) + elif isinstance(evaluated, str): + response_text = evaluated + except Exception as e: + self._log(f"⚠️ Failed to parse Python literal: {e}", "warning") + + # Handle escaped content + #if '\\\\' in response_text or '\\n' in response_text or "\\'" in response_text or '\\"' in response_text: + # self._log(f"⚠️ Detected escaped content, unescaping...", "warning") + # response_text = response_text.replace("\\'", "'") + # response_text = response_text.replace('\\"', '"') + # response_text = response_text.replace('\\n', '\n') + # response_text = response_text.replace('\\\\', '\\') + # response_text = response_text.replace('\\/', '/') + # response_text = response_text.replace('\\t', '\t') + # response_text = response_text.replace('\\r', '\r') + + # Clean up quotes + import re + response_text = re.sub(r"['''\"`]$", "", response_text.strip()) + response_text = re.sub(r"^['''\"`]", "", response_text.strip()) + response_text = re.sub(r"\s+['''\"`]\s+", " ", response_text) + + # Try to parse as JSON + translations = {} + try: + # Strip markdown blocks more aggressively + import re + import json + + # Method 1: Find JSON object directly (most reliable) + json_match = re.search(r'\{.*\}', response_text, re.DOTALL) + if json_match: + json_text = json_match.group(0) + try: + translations = json.loads(json_text) + self._log(f"✅ Successfully parsed {len(translations)} translations (direct extraction)") + except json.JSONDecodeError: + # Try to fix the extracted JSON + json_text = self._fix_json_response(json_text) + translations = json.loads(json_text) + self._log(f"✅ Successfully parsed {len(translations)} translations (after fix)") + else: + # Method 2: Try stripping markdown if no JSON found + cleaned = response_text + + # Remove markdown code blocks + if '```' in cleaned: + # This pattern handles ```json, ``json, ``` or `` + patterns = [ + r'```json\s*\n?(.*?)```', + r'``json\s*\n?(.*?)``', + r'```\s*\n?(.*?)```', + r'``\s*\n?(.*?)``' + ] + + for pattern in patterns: + match = re.search(pattern, cleaned, re.DOTALL) + if match: + cleaned = match.group(1).strip() + break + + # Try to parse the cleaned text + translations = json.loads(cleaned) + self._log(f"✅ Successfully parsed {len(translations)} translations (after markdown strip)") + + # Handle different response formats + if isinstance(translations, list): + # Array of translations only - map by position + temp = {} + for i, region in enumerate(regions): + if i < len(translations): + temp[region.text] = translations[i] + translations = temp + + self._log(f"📊 Total translations: {len(translations)}") + + except Exception as e: + self._log(f"❌ Failed to parse JSON: {str(e)}", "error") + self._log(f"Response preview: {response_text[:500]}...", "warning") + + # CRITICAL: Check if this is a refusal message BEFORE regex fallback + # OpenAI and other APIs refuse certain content with text responses instead of JSON + # ONLY check if response looks like plain text refusal (not malformed JSON with translations) + import re + response_lower = response_text.lower() + + # Quick check: if response starts with refusal keywords, it's definitely a refusal + refusal_starts = ['sorry', 'i cannot', "i can't", 'i apologize', 'i am unable', "i'm unable"] + if any(response_lower.strip().startswith(start) for start in refusal_starts): + # Very likely a refusal - raise immediately + from unified_api_client import UnifiedClientError + raise UnifiedClientError( + f"Content refused by API", + error_type="prohibited_content", + details={"refusal_message": response_text[:500]} + ) + + # Skip refusal check if response contains valid-looking JSON structure with translations + # (indicates malformed JSON that should go to regex fallback, not a refusal) + has_json_structure = ( + (response_text.strip().startswith('{') and ':' in response_text and '"' in response_text) or + (response_text.strip().startswith('[') and ':' in response_text and '"' in response_text) + ) + + # Also check if response contains short translations (not refusal paragraphs) + # Refusals are typically long paragraphs, translations are short + avg_value_length = 0 + if has_json_structure: + # Quick estimate: count chars between quotes + import re + values = re.findall(r'"([^"]{1,200})"\s*[,}]', response_text) + if values: + avg_value_length = sum(len(v) for v in values) / len(values) + + # If looks like JSON with short values, skip refusal check (go to regex fallback) + if has_json_structure and avg_value_length > 0 and avg_value_length < 150: + self._log(f"🔍 Detected malformed JSON with translations (avg len: {avg_value_length:.0f}), trying regex fallback", "debug") + # Skip refusal detection, go straight to regex fallback + pass + else: + # Check for refusal patterns + # Refusal patterns - both simple strings and regex patterns + # Must be strict to avoid false positives on valid translations + refusal_patterns = [ + "i cannot assist", + "i can't assist", + "i cannot help", + "i can't help", + r"sorry.{0,10}i can't (assist|help|translate)", # OpenAI specific + "i'm unable to translate", + "i am unable to translate", + "i apologize, but i cannot", + "i'm sorry, but i cannot", + "i don't have the ability to", + "this request cannot be", + "unable to process this", + "cannot complete this", + r"against.{0,20}(content )?policy", # "against policy" or "against content policy" + "violates.*policy", + r"(can't|cannot).{0,30}(sexual|explicit|inappropriate)", # "can't translate sexual" + "appears to sexualize", + "who appear to be", + "prohibited content", + "content blocked", + ] + + # Check both simple string matching and regex patterns + is_refusal = False + for pattern in refusal_patterns: + if '.*' in pattern or r'.{' in pattern: + # It's a regex pattern + if re.search(pattern, response_lower): + is_refusal = True + break + else: + # Simple string match + if pattern in response_lower: + is_refusal = True + break + + if is_refusal: + # Raise UnifiedClientError with prohibited_content type + # Fallback mechanism will handle this automatically + from unified_api_client import UnifiedClientError + raise UnifiedClientError( + f"Content refused by API", + error_type="prohibited_content", + details={"refusal_message": response_text[:500]} + ) + + # Fallback: try regex extraction (handles both quoted and unquoted keys) + try: + import re + translations = {} + + # Try 1: Standard quoted keys and values + pattern1 = r'"([^"]+)"\s*:\s*"([^"]*(?:\\.[^"]*)*)"' + matches = re.findall(pattern1, response_text) + + if matches: + for key, value in matches: + value = value.replace('\\n', '\n').replace('\\"', '"').replace('\\\\', '\\') + translations[key] = value + self._log(f"✅ Recovered {len(translations)} translations using regex (quoted keys)") + else: + # Try 2: Unquoted keys (for invalid JSON like: key: "value") + pattern2 = r'([^\s:{}]+)\s*:\s*([^\n}]+)' + matches = re.findall(pattern2, response_text) + + for key, value in matches: + # Clean up key and value + key = key.strip() + value = value.strip().rstrip(',') + # Remove quotes from value if present + if value.startswith('"') and value.endswith('"'): + value = value[1:-1] + elif value.startswith("'") and value.endswith("'"): + value = value[1:-1] + translations[key] = value + + if translations: + self._log(f"✅ Recovered {len(translations)} translations using regex (unquoted keys)") + + if not translations: + self._log("❌ All parsing attempts failed", "error") + return {} + except Exception as e: + self._log(f"❌ Failed to recover JSON: {e}", "error") + return {} + + # Map translations back to regions + result = {} + all_originals = [] + all_translations = [] + + # Extract translation values in order + translation_values = list(translations.values()) if translations else [] + + # DEBUG: Log what we extracted + self._log(f"📊 Extracted {len(translation_values)} translation values", "debug") + for i, val in enumerate(translation_values[:1000]): # First 1000 for debugging + # Safely handle None values + val_str = str(val) if val is not None else "" + self._log(f" Translation {i}: '{val_str[:1000]}...'", "debug") + + # Clean all translation values to remove quotes + # CRITICAL: Also clean the keys in the dictionary to maintain correct mapping + cleaned_translations = {} + for key, value in translations.items(): + cleaned_key = key + cleaned_value = self._clean_translation_text(value) + # Only add if the cleaned value is not empty (avoid misalignment) + if cleaned_value: + cleaned_translations[cleaned_key] = cleaned_value + else: + self._log(f"🔍 Skipping empty translation after cleaning: '{key}' → ''", "debug") + + # Replace original dict with cleaned version + translations = cleaned_translations + translation_values = list(translations.values()) if translations else [] + + self._log(f"🔍 DEBUG: translation_values after cleaning:", "debug") + for i, val in enumerate(translation_values): + self._log(f" [{i}]: {repr(val)}", "debug") + + # CRITICAL: Check if translation values are actually refusal messages + # API sometimes returns valid JSON where each "translation" is a refusal + if translation_values: + # Check first few translations for refusal patterns + import re + refusal_patterns = [ + "i cannot", + "i can't", + r"sorry.{0,5}i can't help", + r"sorry.{0,5}i can't", + "sexually explicit", + "content policy", + "prohibited content", + "appears to be", + "who appear to be", + ] + + # Sample first 3 translations (or all if fewer) + sample_size = min(3, len(translation_values)) + refusal_count = 0 + + for sample_val in translation_values[:sample_size]: + if sample_val: + val_lower = sample_val.lower() + for pattern in refusal_patterns: + if '.*' in pattern or r'.{' in pattern: + if re.search(pattern, val_lower): + refusal_count += 1 + break + else: + if pattern in val_lower: + refusal_count += 1 + break + + # If most translations are refusals, treat as refusal + if refusal_count >= sample_size * 0.5: # 50% threshold + # Raise UnifiedClientError with prohibited_content type + # Fallback mechanism will handle this automatically + from unified_api_client import UnifiedClientError + raise UnifiedClientError( + f"Content refused by API", + error_type="prohibited_content", + details={"refusal_message": translation_values[0][:500]} + ) + + # Key-based mapping (prioritize indexed format as requested in prompt) + self._log(f"📋 Mapping {len(translations)} translations to {len(regions)} regions") + + for i, region in enumerate(regions): + if i % 10 == 0 and self._check_stop(): + self._log(f"⏹️ Translation stopped during mapping (processed {i}/{len(regions)} regions)", "warning") + return result + + # Get translation using multiple strategies (indexed format is most reliable) + translated = "" + + # Strategy 1: Indexed key format "[N] original_text" (NEW STANDARD - most reliable) + key = f"[{i}] {region.text}" + if key in translations: + translated = translations[key] + self._log(f" ✅ Matched indexed key: '{key[:40]}...'", "debug") + # Strategy 2: Direct key match without index (backward compatibility) + elif region.text in translations: + translated = translations[region.text] + self._log(f" ✅ Matched direct key: '{region.text[:40]}...'", "debug") + # Strategy 3: Position-based fallback (least reliable, only if counts match exactly) + elif i < len(translation_values) and len(translation_values) == len(regions): + translated = translation_values[i] + self._log(f" ⚠️ Using position-based fallback for region {i}", "debug") + + # Only mark as missing if we genuinely have no translation + # NOTE: Keep translation even if it matches original (e.g., numbers, names, SFX) + if not translated: + self._log(f" ⚠️ No translation for region {i}, leaving empty", "warning") + translated = "" + + # Apply glossary if we have a translation + if translated and hasattr(self.main_gui, 'manual_glossary') and self.main_gui.manual_glossary: + for entry in self.main_gui.manual_glossary: + if 'source' in entry and 'target' in entry: + if entry['source'] in translated: + translated = translated.replace(entry['source'], entry['target']) + + result[region.text] = translated + region.translated_text = translated + + if translated: + all_originals.append(f"[{i+1}] {region.text}") + all_translations.append(f"[{i+1}] {translated}") + self._log(f" ✅ Translated: '{region.text[:30]}...' → '{translated[:30]}...'", "debug") + + # Save history if enabled + if self.history_manager and self.contextual_enabled and all_originals: + try: + combined_original = "\n".join(all_originals) + combined_translation = "\n".join(all_translations) + + self.history_manager.append_to_history( + user_content=combined_original, + assistant_content=combined_translation, + hist_limit=self.translation_history_limit, + reset_on_limit=not self.rolling_history_enabled, + rolling_window=self.rolling_history_enabled + ) + + self._log(f"📚 Saved {len(all_originals)} translations as 1 combined history entry", "success") + except Exception as e: + self._log(f"⚠️ Failed to save page to history: {str(e)}", "warning") + + return result + + except Exception as e: + if self._check_stop(): + self._log("⏹️ Translation stopped due to user request", "warning") + return {} + + # Check if this is a prohibited_content error + from unified_api_client import UnifiedClientError + if isinstance(e, UnifiedClientError) and getattr(e, "error_type", None) == "prohibited_content": + # Check if USE_FALLBACK_KEYS is enabled and we're not already in a fallback attempt + use_fallback = os.getenv('USE_FALLBACK_KEYS', '0') == '1' + + if use_fallback and not _in_fallback: + self._log(f"⛔ Content refused by primary model, trying fallback keys...", "warning") + + # Store original credentials to restore after fallback attempts + original_api_key = self.client.api_key + original_model = self.client.model + + # Try to get fallback keys from environment + try: + fallback_keys_json = os.getenv('FALLBACK_KEYS', '[]') + fallback_keys = json.loads(fallback_keys_json) if fallback_keys_json != '[]' else [] + + if fallback_keys: + for idx, fallback in enumerate(fallback_keys, 1): + if self._check_stop(): + self._log("⏹️ Translation stopped during fallback", "warning") + return {} + + fallback_model = fallback.get('model') + fallback_key = fallback.get('api_key') + + if not fallback_model or not fallback_key: + continue + + self._log(f"🔄 Trying fallback {idx}/{len(fallback_keys)}: {fallback_model}", "info") + + try: + # Temporarily switch to fallback model + old_key = self.client.api_key + old_model = self.client.model + + self.client.api_key = fallback_key + self.client.model = fallback_model + + # Re-setup client with new credentials + if hasattr(self.client, '_setup_client'): + self.client._setup_client() + + # Retry the translation with fallback model (mark as in_fallback to prevent recursion) + return self.translate_full_page_context(regions, image_path, _in_fallback=True) + + except UnifiedClientError as fallback_err: + if getattr(fallback_err, "error_type", None) == "prohibited_content": + self._log(f" ⛔ Fallback {idx} also refused", "warning") + # Restore original credentials and try next fallback + self.client.api_key = old_key + self.client.model = old_model + if hasattr(self.client, '_setup_client'): + self.client._setup_client() + continue + else: + # Other error, restore and raise + self.client.api_key = old_key + self.client.model = old_model + if hasattr(self.client, '_setup_client'): + self.client._setup_client() + raise + except Exception as fallback_err: + self._log(f" ❌ Fallback {idx} error: {str(fallback_err)[:100]}", "error") + # Restore original credentials and try next fallback + self.client.api_key = old_key + self.client.model = old_model + if hasattr(self.client, '_setup_client'): + self.client._setup_client() + continue + + self._log(f"❌ All fallback keys refused content", "error") + else: + self._log(f"⚠️ No fallback keys configured", "warning") + except Exception as fallback_error: + self._log(f"❌ Error processing fallback keys: {str(fallback_error)}", "error") + finally: + # Always restore original credentials after fallback attempts + try: + self.client.api_key = original_api_key + self.client.model = original_model + if hasattr(self.client, '_setup_client'): + self.client._setup_client() + except Exception: + pass # Ignore errors during credential restoration + + # If we get here, all fallbacks failed or weren't configured + self._log(f"❌ Content refused by API", "error") + return {} + + self._log(f"❌ Full page context translation error: {str(e)}", "error") + self._log(traceback.format_exc(), "error") + return {} + + def _fix_json_response(self, response_text: str) -> str: + import re + import json + + # Debug: Show what we received + self._log(f"DEBUG: Original length: {len(response_text)}", "debug") + self._log(f"DEBUG: First 50 chars: [{response_text[:50]}]", "debug") + + cleaned = response_text + if "```json" in cleaned: + match = re.search(r'```json\s*(.*?)```', cleaned, re.DOTALL) + if match: + cleaned = match.group(1).strip() + self._log(f"DEBUG: Extracted {len(cleaned)} chars from markdown", "debug") + else: + self._log("DEBUG: Regex didn't match!", "warning") + + # Try to parse + try: + result = json.loads(cleaned) + self._log(f"✅ Parsed JSON with {len(result)} entries", "info") + return cleaned + except json.JSONDecodeError as e: + self._log(f"⚠️ JSON invalid: {str(e)}", "warning") + self._log(f"DEBUG: Cleaned text starts with: [{cleaned[:20]}]", "debug") + return cleaned + + def _clean_translation_text(self, text: str) -> str: + """Remove unnecessary quotation marks, dots, and invalid characters from translated text""" + if not text: + return text + + # Log what we're cleaning + original = text + + # First, fix encoding issues + text = self._fix_encoding_issues(text) + + # Normalize width/compatibility (e.g., fullwidth → ASCII, circled numbers → digits) + text = self._normalize_unicode_width(text) + + # Remove Unicode replacement characters and invalid symbols + text = self._sanitize_unicode_characters(text) + + # Remove leading and trailing whitespace + text = text.strip() + + # Remove ALL types of quotes and dots from start/end + # Keep removing until no more quotes/dots at edges + while len(text) > 0: + old_len = len(text) + + # Remove from start + text = text.lstrip('"\'`''""「」『』【】《》〈〉.·•°') + + # Remove from end (but preserve ... and !!) + if not text.endswith('...') and not text.endswith('!!'): + text = text.rstrip('"\'`''""「」『』【】《》〈〉.·•°') + + # If nothing changed, we're done + if len(text) == old_len: + break + + # Final strip + text = text.strip() + + # Log if we made changes + if text != original: + self._log(f"🧹 Cleaned text: '{original}' → '{text}'", "debug") + + return text + + def _sanitize_unicode_characters(self, text: str) -> str: + """Remove invalid Unicode characters, replacement characters, and box symbols. + Also more aggressively exclude square-like glyphs that leak as 'cubes' in some fonts. + """ + if not text: + return text + + import re + original = text + + + # Remove Unicode replacement character (�) and similar invalid symbols + text = text.replace('\ufffd', '') # Unicode replacement character + + # Geometric squares and variants (broad sweep) + geo_squares = [ + '□','■','▢','▣','▤','▥','▦','▧','▨','▩','◻','⬛','⬜', + '\u25a1','\u25a0','\u2b1c','\u2b1b' + ] + for s in geo_squares: + text = text.replace(s, '') + + # Extra cube-like CJK glyphs commonly misrendered in non-CJK fonts + # (unconditionally removed per user request) + cube_likes = [ + '口', # U+53E3 + '囗', # U+56D7 + '日', # U+65E5 (often boxy) + '曰', # U+66F0 + '田', # U+7530 + '回', # U+56DE + 'ロ', # U+30ED (Katakana RO) + 'ロ', # U+FF9B (Halfwidth RO) + 'ㅁ', # U+3141 (Hangul MIEUM) + '丨', # U+4E28 (CJK radical two) tall bar + ] + for s in cube_likes: + text = text.replace(s, '') + + # Remove entire ranges that commonly render as boxes/blocks + # Box Drawing, Block Elements, Geometric Shapes (full range), plus a common white/black large square range already handled + text = re.sub(r'[\u2500-\u257F\u2580-\u259F\u25A0-\u25FF]', '', text) + + # Optional debug: log culprits found in original text (before removal) + try: + culprits = re.findall(r'[\u2500-\u257F\u2580-\u259F\u25A0-\u25FF\u2B1B\u2B1C\u53E3\u56D7\u65E5\u66F0\u7530\u56DE\u30ED\uFF9B\u3141\u4E28]', original) + if culprits: + as_codes = [f'U+{ord(c):04X}' for c in culprits] + self._log(f"🧊 Removed box-like glyphs: {', '.join(as_codes)}", "debug") + except Exception: + pass + + # If line is mostly ASCII, strip any remaining single CJK ideographs that stand alone + try: + ascii_count = sum(1 for ch in text if ord(ch) < 128) + ratio = ascii_count / max(1, len(text)) + if ratio >= 0.8: + text = re.sub(r'(?:(?<=\s)|^)[\u3000-\u303F\u3040-\u30FF\u3400-\u9FFF\uFF00-\uFFEF](?=(?:\s)|$)', '', text) + except Exception: + pass + + # Remove invisible and zero-width characters + text = re.sub(r'[\u200b-\u200f\u2028-\u202f\u205f-\u206f\ufeff]', '', text) + + # Remove remaining control characters (except common ones like newline, tab) + text = re.sub(r'[\x00-\x08\x0B-\x0C\x0E-\x1F\x7F-\x9F]', '', text) + + # Remove any remaining characters that can't be properly encoded + try: + text = text.encode('utf-8', errors='ignore').decode('utf-8') + except UnicodeError: + pass + + if text != original: + try: + self._log(f"🔧 Sanitized Unicode: '{original}' → '{text}'", "debug") + except Exception: + pass + + return text + + def _normalize_unicode_width(self, text: str) -> str: + """Normalize Unicode to NFKC to 'unsquare' fullwidth/stylized forms while preserving CJK text""" + if not text: + return text + try: + import unicodedata + original = text + # NFKC folds compatibility characters (fullwidth forms, circled digits, etc.) to standard forms + text = unicodedata.normalize('NFKC', text) + if text != original: + try: + self._log(f"🔤 Normalized width/compat: '{original[:30]}...' → '{text[:30]}...'", "debug") + except Exception: + pass + return text + except Exception: + return text + + def _fix_encoding_issues(self, text: str) -> str: + """Fix common encoding issues in text, especially for Korean""" + if not text: + return text + + # Check for mojibake indicators (UTF-8 misinterpreted as Latin-1) + mojibake_indicators = ['ë', 'ì', 'ê°', 'ã', 'Ã', 'â', 'ä', 'ð', 'í', 'ë­', 'ì´'] + + if any(indicator in text for indicator in mojibake_indicators): + self._log("🔧 Detected mojibake encoding issue, attempting fixes...", "debug") + + # Try multiple encoding fixes + encodings_to_try = [ + ('latin-1', 'utf-8'), + ('windows-1252', 'utf-8'), + ('iso-8859-1', 'utf-8'), + ('cp1252', 'utf-8') + ] + + for from_enc, to_enc in encodings_to_try: + try: + fixed = text.encode(from_enc, errors='ignore').decode(to_enc, errors='ignore') + + # Check if the fix actually improved things + # Should have Korean characters (Hangul range) or be cleaner + if any('\uAC00' <= c <= '\uD7AF' for c in fixed) or fixed.count('�') < text.count('�'): + self._log(f"✅ Fixed encoding using {from_enc} -> {to_enc}", "debug") + return fixed + except: + continue + + # Clean up any remaining control characters and replacement characters + import re + text = re.sub(r'[\x00-\x08\x0B-\x0C\x0E-\x1F\x7F]', '', text) + + # Additional cleanup for common encoding artifacts + # Remove sequences that commonly appear from encoding errors + text = re.sub(r'\ufffd+', '', text) # Remove multiple replacement characters + text = re.sub(r'[\u25a0-\u25ff]+', '', text) # Remove geometric shapes (common fallbacks) + + # Clean up double spaces and normalize whitespace + text = re.sub(r'\s+', ' ', text).strip() + + return text + + def create_text_mask(self, image: np.ndarray, regions: List[TextRegion]) -> np.ndarray: + """Create mask with comprehensive per-text-type dilation settings""" + mask = np.zeros(image.shape[:2], dtype=np.uint8) + + regions_masked = 0 + regions_skipped = 0 + + self._log(f"🎭 Creating text mask for {len(regions)} regions", "info") + + # Get manga settings + manga_settings = self.main_gui.config.get('manga_settings', {}) + + # Get dilation settings + base_dilation_size = manga_settings.get('mask_dilation', 15) + + # If Auto Iterations is enabled, auto-set dilation by OCR provider and RT-DETR guide status + auto_iterations = manga_settings.get('auto_iterations', True) + if auto_iterations: + try: + ocr_settings = manga_settings.get('ocr', {}) + use_rtdetr_guide = ocr_settings.get('use_rtdetr_for_ocr_regions', True) + bubble_detection_enabled = ocr_settings.get('bubble_detection_enabled', False) + + # If RT-DETR guide is enabled for Google/Azure, force dilation to 0 + if (getattr(self, 'ocr_provider', '').lower() in ('azure', 'google') and + bubble_detection_enabled and use_rtdetr_guide): + base_dilation_size = 0 + self._log(f"📏 Auto dilation (RT-DETR guided): 0px (using iterations only)", "info") + elif getattr(self, 'ocr_provider', '').lower() in ('azure', 'google'): + # CRITICAL: Without RT-DETR, Azure/Google OCR is very conservative + # Use base dilation to expand masks to actual bubble size + base_dilation_size = 15 # Base expansion for Azure/Google without RT-DETR + self._log(f"📏 Auto dilation by provider ({self.ocr_provider}, no RT-DETR): {base_dilation_size}px", "info") + else: + base_dilation_size = 0 + self._log(f"📏 Auto dilation by provider ({self.ocr_provider}): {base_dilation_size}px", "info") + except Exception: + pass + + # Auto iterations: decide by image color vs B&W + auto_iterations = manga_settings.get('auto_iterations', True) + if auto_iterations: + try: + # Heuristic: consider image B&W if RGB channels are near-equal + if len(image.shape) < 3 or image.shape[2] == 1: + is_bw = True + else: + # Compute mean absolute differences between channels + ch0 = image[:, :, 0].astype(np.int16) + ch1 = image[:, :, 1].astype(np.int16) + ch2 = image[:, :, 2].astype(np.int16) + diff01 = np.mean(np.abs(ch0 - ch1)) + diff12 = np.mean(np.abs(ch1 - ch2)) + diff02 = np.mean(np.abs(ch0 - ch2)) + # If channels are essentially the same, treat as B&W + is_bw = max(diff01, diff12, diff02) < 2.0 + if is_bw: + text_bubble_iterations = 2 + empty_bubble_iterations = 2 + free_text_iterations = 0 + self._log("📏 Auto iterations (B&W): text=2, empty=2, free=0", "info") + else: + text_bubble_iterations = 4 + empty_bubble_iterations = 4 + free_text_iterations = 4 + self._log("📏 Auto iterations (Color): all=3", "info") + except Exception: + # Fallback to configured behavior on any error + auto_iterations = False + + if not auto_iterations: + # Check if using uniform iterations for all text types + use_all_iterations = manga_settings.get('use_all_iterations', False) + + if use_all_iterations: + # Use the same iteration count for all text types + all_iterations = manga_settings.get('all_iterations', 2) + text_bubble_iterations = all_iterations + empty_bubble_iterations = all_iterations + free_text_iterations = all_iterations + self._log(f"📏 Using uniform iterations: {all_iterations} for all text types", "info") + else: + # Use individual iteration settings + text_bubble_iterations = manga_settings.get('text_bubble_dilation_iterations', + manga_settings.get('bubble_dilation_iterations', 2)) + empty_bubble_iterations = manga_settings.get('empty_bubble_dilation_iterations', 3) + free_text_iterations = manga_settings.get('free_text_dilation_iterations', 0) + self._log(f"📏 Using individual iterations - Text bubbles: {text_bubble_iterations}, " + f"Empty bubbles: {empty_bubble_iterations}, Free text: {free_text_iterations}", "info") + + # Create separate masks for different text types + text_bubble_mask = np.zeros(image.shape[:2], dtype=np.uint8) + empty_bubble_mask = np.zeros(image.shape[:2], dtype=np.uint8) + free_text_mask = np.zeros(image.shape[:2], dtype=np.uint8) + + text_bubble_count = 0 + empty_bubble_count = 0 + free_text_count = 0 + + for i, region in enumerate(regions): + # CHECK: Should this region be inpainted? + if not getattr(region, 'should_inpaint', True): + # Skip this region - it shouldn't be inpainted + regions_skipped += 1 + self._log(f" Region {i+1}: SKIPPED (filtered by settings)", "debug") + continue + + regions_masked += 1 + + # Determine text type + text_type = 'free_text' # default + + # Check if region has bubble_type attribute (from bubble detection) + if hasattr(region, 'bubble_type'): + # RT-DETR classifications + if region.bubble_type == 'empty_bubble': + text_type = 'empty_bubble' + elif region.bubble_type == 'text_bubble': + text_type = 'text_bubble' + else: # 'free_text' or others + text_type = 'free_text' + else: + # Fallback: use simple heuristics if no bubble detection + x, y, w, h = region.bounding_box + x, y, w, h = int(x), int(y), int(w), int(h) + aspect_ratio = w / h if h > 0 else 1 + + # Check if region has text + has_text = hasattr(region, 'text') and region.text and len(region.text.strip()) > 0 + + # Heuristic: bubbles tend to be more square-ish or tall + # Free text tends to be wide and short + if aspect_ratio < 2.5 and w > 50 and h > 50: + if has_text: + text_type = 'text_bubble' + else: + # Could be empty bubble if it's round/oval shaped + text_type = 'empty_bubble' + else: + text_type = 'free_text' + + # Select appropriate mask and increment counter + if text_type == 'text_bubble': + target_mask = text_bubble_mask + text_bubble_count += 1 + mask_type = "TEXT BUBBLE" + elif text_type == 'empty_bubble': + target_mask = empty_bubble_mask + empty_bubble_count += 1 + mask_type = "EMPTY BUBBLE" + else: + target_mask = free_text_mask + free_text_count += 1 + mask_type = "FREE TEXT" + + # Check if this is a merged region with original regions + if hasattr(region, 'original_regions') and region.original_regions: + # Use original regions for precise masking + self._log(f" Region {i+1} ({mask_type}): Using {len(region.original_regions)} original regions", "debug") + + for orig_region in region.original_regions: + if hasattr(orig_region, 'vertices') and orig_region.vertices: + pts = np.array(orig_region.vertices, np.int32) + pts = pts.reshape((-1, 1, 2)) + cv2.fillPoly(target_mask, [pts], 255) + else: + x, y, w, h = orig_region.bounding_box + x, y, w, h = int(x), int(y), int(w), int(h) + cv2.rectangle(target_mask, (x, y), (x + w, y + h), 255, -1) + else: + # Normal region + if hasattr(region, 'vertices') and region.vertices and len(region.vertices) <= 8: + pts = np.array(region.vertices, np.int32) + pts = pts.reshape((-1, 1, 2)) + cv2.fillPoly(target_mask, [pts], 255) + self._log(f" Region {i+1} ({mask_type}): Using polygon", "debug") + else: + x, y, w, h = region.bounding_box + x, y, w, h = int(x), int(y), int(w), int(h) + cv2.rectangle(target_mask, (x, y), (x + w, y + h), 255, -1) + self._log(f" Region {i+1} ({mask_type}): Using bounding box", "debug") + + self._log(f"📊 Mask breakdown: {text_bubble_count} text bubbles, {empty_bubble_count} empty bubbles, " + f"{free_text_count} free text regions, {regions_skipped} skipped", "info") + + # Apply different dilation settings to each mask type + if base_dilation_size > 0: + kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (base_dilation_size, base_dilation_size)) + + # Apply dilation to text bubble mask + if text_bubble_count > 0 and text_bubble_iterations > 0: + self._log(f"📏 Applying text bubble dilation: {base_dilation_size}px, {text_bubble_iterations} iterations", "info") + text_bubble_mask = cv2.dilate(text_bubble_mask, kernel, iterations=text_bubble_iterations) + + # Apply dilation to empty bubble mask + if empty_bubble_count > 0 and empty_bubble_iterations > 0: + self._log(f"📏 Applying empty bubble dilation: {base_dilation_size}px, {empty_bubble_iterations} iterations", "info") + empty_bubble_mask = cv2.dilate(empty_bubble_mask, kernel, iterations=empty_bubble_iterations) + + # Apply dilation to free text mask + if free_text_count > 0 and free_text_iterations > 0: + self._log(f"📏 Applying free text dilation: {base_dilation_size}px, {free_text_iterations} iterations", "info") + free_text_mask = cv2.dilate(free_text_mask, kernel, iterations=free_text_iterations) + elif free_text_count > 0 and free_text_iterations == 0: + self._log(f"📏 No dilation for free text (iterations=0, perfect for B&W panels)", "info") + + # Combine all masks + mask = cv2.bitwise_or(text_bubble_mask, empty_bubble_mask) + mask = cv2.bitwise_or(mask, free_text_mask) + + coverage_percent = (np.sum(mask > 0) / mask.size) * 100 + self._log(f"📊 Final mask coverage: {coverage_percent:.1f}% of image", "info") + + return mask + + def _get_or_init_shared_local_inpainter(self, local_method: str, model_path: str, force_reload: bool = False): + """Return a shared LocalInpainter for (local_method, model_path) with minimal locking. + If another thread is loading the same model, wait on its event instead of competing. + Set force_reload=True only when the method or model_path actually changed. + + If spare instances are available in the pool, check one out for use. + The instance will stay assigned to this translator until cleanup. + """ + from local_inpainter import LocalInpainter + key = (local_method, model_path or '') + + # FIRST: Try to check out a spare instance if available (for true parallelism) + # Don't pop it - instead mark it as 'in use' so it stays in memory + with MangaTranslator._inpaint_pool_lock: + rec = MangaTranslator._inpaint_pool.get(key) + if rec and rec.get('spares'): + spares = rec.get('spares') or [] + # Initialize checked_out list if it doesn't exist + if 'checked_out' not in rec: + rec['checked_out'] = [] + checked_out = rec['checked_out'] + + # Look for an available spare (not checked out) + for spare in spares: + if spare not in checked_out and spare and getattr(spare, 'model_loaded', False): + # Mark as checked out + checked_out.append(spare) + self._log(f"🧰 Checked out spare inpainter ({len(checked_out)}/{len(spares)} in use)", "debug") + # Store reference for later return + self._checked_out_inpainter = spare + self._inpainter_pool_key = key + return spare + + # FALLBACK: Use the shared instance + rec = MangaTranslator._inpaint_pool.get(key) + if rec and rec.get('loaded') and rec.get('inpainter'): + # Already loaded - do NOT force reload! + return rec['inpainter'] + # Create or wait for loader + with MangaTranslator._inpaint_pool_lock: + rec = MangaTranslator._inpaint_pool.get(key) + if rec and rec.get('loaded') and rec.get('inpainter'): + # Already loaded - do NOT force reload! + return rec['inpainter'] + if not rec: + # Register loading record + rec = {'inpainter': None, 'loaded': False, 'event': threading.Event()} + MangaTranslator._inpaint_pool[key] = rec + is_loader = True + else: + is_loader = False + event = rec['event'] + # Loader performs heavy work without holding the lock + if is_loader: + try: + inp = LocalInpainter() + # Apply tiling settings once to the shared instance + tiling_settings = self.manga_settings.get('tiling', {}) + inp.tiling_enabled = tiling_settings.get('enabled', False) + inp.tile_size = tiling_settings.get('tile_size', 512) + inp.tile_overlap = tiling_settings.get('tile_overlap', 64) + # Ensure model path + if not model_path or not os.path.exists(model_path): + try: + model_path = inp.download_jit_model(local_method) + except Exception as e: + self._log(f"⚠️ JIT download failed: {e}", "warning") + model_path = None + # Load model - NEVER force reload for first-time shared pool loading + loaded_ok = False + if model_path and os.path.exists(model_path): + try: + self._log(f"📦 Loading inpainter model...", "debug") + self._log(f" Method: {local_method}", "debug") + self._log(f" Path: {model_path}", "debug") + # Only force reload if explicitly requested AND this is not the first load + # For shared pool, we should never force reload on initial load + loaded_ok = inp.load_model_with_retry(local_method, model_path, force_reload=force_reload) + if not loaded_ok: + # Retry with force_reload if initial load failed + self._log(f"🔄 Initial load failed, retrying with force_reload=True", "warning") + loaded_ok = inp.load_model_with_retry(local_method, model_path, force_reload=True) + if not loaded_ok: + self._log(f"❌ Both load attempts failed", "error") + # Check file validity + try: + size_mb = os.path.getsize(model_path) / (1024 * 1024) + self._log(f" File size: {size_mb:.2f} MB", "info") + if size_mb < 1: + self._log(f" ⚠️ File may be corrupted (too small)", "warning") + except Exception: + self._log(f" ⚠️ Could not read model file", "warning") + except Exception as e: + self._log(f"⚠️ Inpainter load exception: {e}", "warning") + import traceback + self._log(traceback.format_exc(), "debug") + loaded_ok = False + elif not model_path: + self._log(f"⚠️ No model path configured for {local_method}", "warning") + elif not os.path.exists(model_path): + self._log(f"⚠️ Model file does not exist: {model_path}", "warning") + # Publish result + with MangaTranslator._inpaint_pool_lock: + rec = MangaTranslator._inpaint_pool.get(key) or rec + rec['inpainter'] = inp + rec['loaded'] = bool(loaded_ok) + rec['event'].set() + return inp + except Exception as e: + with MangaTranslator._inpaint_pool_lock: + rec = MangaTranslator._inpaint_pool.get(key) or rec + rec['inpainter'] = None + rec['loaded'] = False + rec['event'].set() + self._log(f"⚠️ Shared inpainter setup failed: {e}", "warning") + return None + else: + # Wait for loader to finish (without holding the lock) + success = event.wait(timeout=120) + if not success: + self._log(f"⏱️ Timeout waiting for inpainter to load (120s)", "warning") + return None + + # Check if load was successful + rec2 = MangaTranslator._inpaint_pool.get(key) + if not rec2: + self._log(f"⚠️ Inpainter pool record disappeared after load", "warning") + return None + + inp = rec2.get('inpainter') + loaded = rec2.get('loaded', False) + + if inp and loaded: + # Successfully loaded by another thread + return inp + elif inp and not loaded: + # Inpainter created but model failed to load + # Try to load it ourselves + self._log(f"⚠️ Inpainter exists but model not loaded, attempting to load", "debug") + if model_path and os.path.exists(model_path): + try: + loaded_ok = inp.load_model_with_retry(local_method, model_path, force_reload=True) + if loaded_ok: + # Update the pool record + with MangaTranslator._inpaint_pool_lock: + rec2['loaded'] = True + self._log(f"✅ Successfully loaded model on retry in waiting thread", "info") + return inp + except Exception as e: + self._log(f"❌ Failed to load in waiting thread: {e}", "warning") + return inp # Return anyway, inpaint will no-op + else: + self._log(f"⚠️ Loader thread failed to create inpainter", "warning") + return None + + @classmethod + def _count_preloaded_inpainters(cls) -> int: + try: + with cls._inpaint_pool_lock: + total = 0 + for rec in cls._inpaint_pool.values(): + try: + total += len(rec.get('spares') or []) + except Exception: + pass + return total + except Exception: + return 0 + + def preload_local_inpainters(self, local_method: str, model_path: str, count: int) -> int: + """Preload N local inpainting instances sequentially into the shared pool for parallel panel translation. + Returns the number of instances successfully preloaded. + """ + # Respect singleton mode: do not create extra instances/spares + if getattr(self, 'use_singleton_models', False): + try: + self._log("🧰 Skipping local inpainting preload (singleton mode)", "debug") + except Exception: + pass + return 0 + try: + from local_inpainter import LocalInpainter + except Exception: + self._log("❌ Local inpainter module not available for preloading", "error") + return 0 + key = (local_method, model_path or '') + created = 0 + + # FIRST: Ensure the shared instance is initialized and ready + # This prevents race conditions when spare instances run out + with MangaTranslator._inpaint_pool_lock: + rec = MangaTranslator._inpaint_pool.get(key) + if not rec or not rec.get('loaded') or not rec.get('inpainter'): + # Need to create the shared instance + if not rec: + rec = {'inpainter': None, 'loaded': False, 'event': threading.Event(), 'spares': []} + MangaTranslator._inpaint_pool[key] = rec + need_init_shared = True + else: + need_init_shared = not (rec.get('loaded') and rec.get('inpainter')) + else: + need_init_shared = False + + if need_init_shared: + self._log(f"📦 Initializing shared inpainter instance first...", "info") + try: + shared_inp = self._get_or_init_shared_local_inpainter(local_method, model_path, force_reload=False) + if shared_inp and getattr(shared_inp, 'model_loaded', False): + self._log(f"✅ Shared instance initialized and model loaded", "info") + # Verify the pool record is updated + with MangaTranslator._inpaint_pool_lock: + rec_check = MangaTranslator._inpaint_pool.get(key) + if rec_check: + self._log(f" Pool record: loaded={rec_check.get('loaded')}, has_inpainter={rec_check.get('inpainter') is not None}", "debug") + else: + self._log(f"⚠️ Shared instance initialization returned but model not loaded", "warning") + if shared_inp: + self._log(f" Instance exists but model_loaded={getattr(shared_inp, 'model_loaded', 'ATTR_MISSING')}", "debug") + except Exception as e: + self._log(f"⚠️ Shared instance initialization failed: {e}", "warning") + import traceback + self._log(traceback.format_exc(), "debug") + + # Ensure pool record and spares list exist + with MangaTranslator._inpaint_pool_lock: + rec = MangaTranslator._inpaint_pool.get(key) + if not rec: + rec = {'inpainter': None, 'loaded': False, 'event': threading.Event(), 'spares': []} + MangaTranslator._inpaint_pool[key] = rec + if 'spares' not in rec or rec['spares'] is None: + rec['spares'] = [] + spares = rec.get('spares') + # Prepare tiling settings + tiling_settings = self.manga_settings.get('tiling', {}) if hasattr(self, 'manga_settings') else {} + desired = max(0, int(count) - len(spares)) + if desired <= 0: + return 0 + ctx = " for parallel panels" if int(count) > 1 else "" + self._log(f"🧰 Preloading {desired} local inpainting instance(s){ctx}", "info") + for i in range(desired): + try: + inp = LocalInpainter() + inp.tiling_enabled = tiling_settings.get('enabled', False) + inp.tile_size = tiling_settings.get('tile_size', 512) + inp.tile_overlap = tiling_settings.get('tile_overlap', 64) + # Resolve model path if needed + resolved = model_path + if not resolved or not os.path.exists(resolved): + try: + resolved = inp.download_jit_model(local_method) + except Exception as e: + self._log(f"⚠️ Preload JIT download failed: {e}", "warning") + resolved = None + if resolved and os.path.exists(resolved): + ok = inp.load_model_with_retry(local_method, resolved, force_reload=False) + if ok and getattr(inp, 'model_loaded', False): + with MangaTranslator._inpaint_pool_lock: + rec = MangaTranslator._inpaint_pool.get(key) or {'spares': []} + if 'spares' not in rec or rec['spares'] is None: + rec['spares'] = [] + rec['spares'].append(inp) + MangaTranslator._inpaint_pool[key] = rec + created += 1 + elif ok and not getattr(inp, 'model_loaded', False): + self._log(f"⚠️ Preload: load_model_with_retry returned True but model_loaded is False", "warning") + elif not ok: + self._log(f"⚠️ Preload: load_model_with_retry returned False", "warning") + else: + self._log("⚠️ Preload skipped: no model path available", "warning") + except Exception as e: + self._log(f"⚠️ Preload error: {e}", "warning") + self._log(f"✅ Preloaded {created} local inpainting instance(s)", "info") + return created + + def preload_local_inpainters_concurrent(self, local_method: str, model_path: str, count: int, max_parallel: int = None) -> int: + """Preload N local inpainting instances concurrently into the shared pool. + Honors advanced toggles for panel/region parallelism to pick a reasonable parallelism. + Returns number of instances successfully preloaded. + """ + # Respect singleton mode: do not create extra instances/spares + if getattr(self, 'use_singleton_models', False): + try: + self._log("🧰 Skipping concurrent local inpainting preload (singleton mode)", "debug") + except Exception: + pass + return 0 + try: + from local_inpainter import LocalInpainter + except Exception: + self._log("❌ Local inpainter module not available for preloading", "error") + return 0 + key = (local_method, model_path or '') + # Determine desired number based on existing spares + with MangaTranslator._inpaint_pool_lock: + rec = MangaTranslator._inpaint_pool.get(key) + if not rec: + rec = {'inpainter': None, 'loaded': False, 'event': threading.Event(), 'spares': []} + MangaTranslator._inpaint_pool[key] = rec + spares = (rec.get('spares') or []) + desired = max(0, int(count) - len(spares)) + if desired <= 0: + return 0 + # Determine max_parallel from advanced settings if not provided + if max_parallel is None: + adv = {} + try: + adv = self.main_gui.config.get('manga_settings', {}).get('advanced', {}) if hasattr(self, 'main_gui') else {} + except Exception: + adv = {} + if adv.get('parallel_panel_translation', False): + try: + max_parallel = max(1, int(adv.get('panel_max_workers', 2))) + except Exception: + max_parallel = 2 + elif adv.get('parallel_processing', False): + try: + max_parallel = max(1, int(adv.get('max_workers', 4))) + except Exception: + max_parallel = 2 + else: + max_parallel = 1 + max_parallel = max(1, min(int(max_parallel), int(desired))) + ctx = " for parallel panels" if int(count) > 1 else "" + self._log(f"🧰 Preloading {desired} local inpainting instance(s){ctx} (parallel={max_parallel})", "info") + # Resolve model path once + resolved_path = model_path + if not resolved_path or not os.path.exists(resolved_path): + try: + probe_inp = LocalInpainter() + resolved_path = probe_inp.download_jit_model(local_method) + except Exception as e: + self._log(f"⚠️ JIT download failed for concurrent preload: {e}", "warning") + resolved_path = None + tiling_settings = self.manga_settings.get('tiling', {}) if hasattr(self, 'manga_settings') else {} + from concurrent.futures import ThreadPoolExecutor, as_completed + created = 0 + def _one(): + try: + inp = LocalInpainter() + inp.tiling_enabled = tiling_settings.get('enabled', False) + inp.tile_size = tiling_settings.get('tile_size', 512) + inp.tile_overlap = tiling_settings.get('tile_overlap', 64) + if resolved_path and os.path.exists(resolved_path): + ok = inp.load_model_with_retry(local_method, resolved_path, force_reload=False) + if ok and getattr(inp, 'model_loaded', False): + with MangaTranslator._inpaint_pool_lock: + rec2 = MangaTranslator._inpaint_pool.get(key) or {'spares': []} + if 'spares' not in rec2 or rec2['spares'] is None: + rec2['spares'] = [] + rec2['spares'].append(inp) + MangaTranslator._inpaint_pool[key] = rec2 + return True + except Exception as e: + self._log(f"⚠️ Concurrent preload error: {e}", "warning") + return False + with ThreadPoolExecutor(max_workers=max_parallel) as ex: + futs = [ex.submit(_one) for _ in range(desired)] + for f in as_completed(futs): + try: + if f.result(): + created += 1 + except Exception: + pass + self._log(f"✅ Preloaded {created} local inpainting instance(s)", "info") + return created + return created + + @classmethod + def _count_preloaded_detectors(cls) -> int: + try: + with cls._detector_pool_lock: + return sum(len((rec or {}).get('spares') or []) for rec in cls._detector_pool.values()) + except Exception: + return 0 + + @classmethod + def get_preload_counters(cls) -> Dict[str, int]: + """Return current counters for preloaded instances (for diagnostics/logging).""" + try: + with cls._inpaint_pool_lock: + inpaint_spares = sum(len((rec or {}).get('spares') or []) for rec in cls._inpaint_pool.values()) + inpaint_keys = len(cls._inpaint_pool) + with cls._detector_pool_lock: + detector_spares = sum(len((rec or {}).get('spares') or []) for rec in cls._detector_pool.values()) + detector_keys = len(cls._detector_pool) + return { + 'inpaint_spares': inpaint_spares, + 'inpaint_keys': inpaint_keys, + 'detector_spares': detector_spares, + 'detector_keys': detector_keys, + } + except Exception: + return {'inpaint_spares': 0, 'inpaint_keys': 0, 'detector_spares': 0, 'detector_keys': 0} + + def preload_bubble_detectors(self, ocr_settings: Dict[str, Any], count: int) -> int: + """Preload N bubble detector instances (non-singleton) for panel parallelism. + Only applies when not using singleton models. + """ + try: + from bubble_detector import BubbleDetector + except Exception: + self._log("❌ BubbleDetector module not available for preloading", "error") + return 0 + # Skip if singleton mode + if getattr(self, 'use_singleton_models', False): + return 0 + det_type = (ocr_settings or {}).get('detector_type', 'rtdetr_onnx') + model_id = (ocr_settings or {}).get('rtdetr_model_url') or (ocr_settings or {}).get('bubble_model_path') or '' + key = (det_type, model_id) + created = 0 + with MangaTranslator._detector_pool_lock: + rec = MangaTranslator._detector_pool.get(key) + if not rec: + rec = {'spares': []} + MangaTranslator._detector_pool[key] = rec + spares = rec.get('spares') + if spares is None: + spares = [] + rec['spares'] = spares + desired = max(0, int(count) - len(spares)) + if desired <= 0: + return 0 + self._log(f"🧰 Preloading {desired} bubble detector instance(s) [{det_type}]", "info") + for i in range(desired): + try: + bd = BubbleDetector() + ok = False + if det_type == 'rtdetr_onnx': + ok = bool(bd.load_rtdetr_onnx_model(model_id=model_id)) + elif det_type == 'rtdetr': + ok = bool(bd.load_rtdetr_model(model_id=model_id)) + elif det_type == 'yolo': + if model_id: + ok = bool(bd.load_model(model_id)) + else: + # auto: prefer RT-DETR + ok = bool(bd.load_rtdetr_model(model_id=model_id)) + if ok: + with MangaTranslator._detector_pool_lock: + rec = MangaTranslator._detector_pool.get(key) or {'spares': []} + if 'spares' not in rec or rec['spares'] is None: + rec['spares'] = [] + rec['spares'].append(bd) + MangaTranslator._detector_pool[key] = rec + created += 1 + except Exception as e: + self._log(f"⚠️ Bubble detector preload error: {e}", "warning") + self._log(f"✅ Preloaded {created} bubble detector instance(s)", "info") + return created + + def _initialize_local_inpainter(self): + """Initialize local inpainting if configured""" + try: + from local_inpainter import LocalInpainter, HybridInpainter, AnimeMangaInpaintModel + + # LOAD THE SETTINGS FROM CONFIG FIRST + # The dialog saves it as 'manga_local_inpaint_model' at root level + saved_local_method = self.main_gui.config.get('manga_local_inpaint_model', 'anime') + saved_inpaint_method = self.main_gui.config.get('manga_inpaint_method', 'cloud') + + # MIGRATION: Ensure manga_ prefixed model path keys exist for ONNX methods + # This fixes compatibility where model paths were saved without manga_ prefix + for method_variant in ['anime', 'anime_onnx', 'lama', 'lama_onnx', 'aot', 'aot_onnx']: + non_prefixed_key = f'{method_variant}_model_path' + prefixed_key = f'manga_{method_variant}_model_path' + # If we have the non-prefixed but not the prefixed, migrate it + if non_prefixed_key in self.main_gui.config and prefixed_key not in self.main_gui.config: + self.main_gui.config[prefixed_key] = self.main_gui.config[non_prefixed_key] + self._log(f"🔄 Migrated model path config: {non_prefixed_key} → {prefixed_key}", "debug") + + # Update manga_settings with the saved values + # ALWAYS use the top-level saved config to ensure correct model is loaded + if 'inpainting' not in self.manga_settings: + self.manga_settings['inpainting'] = {} + + # Always override with saved values from top-level config + # This ensures the user's model selection in the settings dialog is respected + self.manga_settings['inpainting']['method'] = saved_inpaint_method + self.manga_settings['inpainting']['local_method'] = saved_local_method + + # Now get the values (they'll be correct now) + inpaint_method = self.manga_settings.get('inpainting', {}).get('method', 'cloud') + + if inpaint_method == 'local': + # This will now get the correct saved value + local_method = self.manga_settings.get('inpainting', {}).get('local_method', 'anime') + + # Model path is saved with manga_ prefix - try both key formats for compatibility + model_path = self.main_gui.config.get(f'manga_{local_method}_model_path', '') + if not model_path: + # Fallback to non-prefixed key (older format) + model_path = self.main_gui.config.get(f'{local_method}_model_path', '') + + self._log(f"Using local method: {local_method} (loaded from config)", "info") + + # Check if we already have a loaded instance in the shared pool + # This avoids unnecessary tracking and reloading + inp_shared = self._get_or_init_shared_local_inpainter(local_method, model_path, force_reload=False) + + # Only track changes AFTER getting the shared instance + # This prevents spurious reloads on first initialization + if not hasattr(self, '_last_local_method'): + self._last_local_method = local_method + self._last_local_model_path = model_path + else: + # Check if settings actually changed and we need to force reload + need_reload = False + if self._last_local_method != local_method: + self._log(f"🔄 Local method changed from {self._last_local_method} to {local_method}", "info") + need_reload = True + # If method changed, we need a different model - get it with force_reload + inp_shared = self._get_or_init_shared_local_inpainter(local_method, model_path, force_reload=True) + elif self._last_local_model_path != model_path: + self._log(f"🔄 Model path changed", "info") + if self._last_local_model_path: + self._log(f" Old: {os.path.basename(self._last_local_model_path)}", "debug") + if model_path: + self._log(f" New: {os.path.basename(model_path)}", "debug") + need_reload = True + # If path changed, reload the model + inp_shared = self._get_or_init_shared_local_inpainter(local_method, model_path, force_reload=True) + + # Update tracking only if changes were made + if need_reload: + self._last_local_method = local_method + self._last_local_model_path = model_path + if inp_shared is not None: + self.local_inpainter = inp_shared + if getattr(self.local_inpainter, 'model_loaded', False): + self._log(f"✅ Using shared {local_method.upper()} inpainting model", "info") + return True + else: + self._log(f"⚠️ Shared inpainter created but model not loaded", "warning") + self._log(f"🔄 Attempting to retry model loading...", "info") + + # Retry loading the model + if model_path and os.path.exists(model_path): + self._log(f"📦 Model path: {model_path}", "info") + self._log(f"📋 Method: {local_method}", "info") + try: + loaded_ok = inp_shared.load_model_with_retry(local_method, model_path, force_reload=True) + if loaded_ok and getattr(inp_shared, 'model_loaded', False): + self._log(f"✅ Model loaded successfully on retry", "info") + return True + else: + self._log(f"❌ Model still not loaded after retry", "error") + # Check if model file exists and is valid + try: + size_mb = os.path.getsize(model_path) / (1024 * 1024) + self._log(f"📊 Model file size: {size_mb:.2f} MB", "info") + if size_mb < 1: + self._log(f"⚠️ Model file seems too small (< 1 MB) - may be corrupted", "warning") + except Exception: + pass + except Exception as e: + self._log(f"❌ Retry load failed: {e}", "error") + import traceback + self._log(traceback.format_exc(), "debug") + elif not model_path: + self._log(f"❌ No model path provided", "error") + elif not os.path.exists(model_path): + self._log(f"❌ Model path does not exist: {model_path}", "error") + self._log(f"📥 Tip: Try downloading the model from the Manga Settings dialog", "info") + + # If retry failed, fall through to fallback logic below + + # Fall back to instance-level init only if shared init completely failed + self._log("⚠️ Shared inpainter init failed, falling back to instance creation", "warning") + try: + from local_inpainter import LocalInpainter + + # Create local inpainter instance + self.local_inpainter = LocalInpainter() + tiling_settings = self.manga_settings.get('tiling', {}) + self.local_inpainter.tiling_enabled = tiling_settings.get('enabled', False) + self.local_inpainter.tile_size = tiling_settings.get('tile_size', 512) + self.local_inpainter.tile_overlap = tiling_settings.get('tile_overlap', 64) + self._log(f"✅ Set tiling: enabled={self.local_inpainter.tiling_enabled}, size={self.local_inpainter.tile_size}, overlap={self.local_inpainter.tile_overlap}", "info") + + # If no model path or doesn't exist, try to find or download one + if not model_path or not os.path.exists(model_path): + self._log(f"⚠️ Model path not found: {model_path}", "warning") + self._log("📥 Attempting to download JIT model...", "info") + try: + downloaded_path = self.local_inpainter.download_jit_model(local_method) + except Exception as e: + self._log(f"⚠️ JIT download failed: {e}", "warning") + downloaded_path = None + if downloaded_path: + model_path = downloaded_path + self._log(f"✅ Downloaded JIT model to: {model_path}") + else: + self._log("⚠️ JIT model download did not return a path", "warning") + + # Load model with retry to avoid transient file/JSON issues under parallel init + loaded_ok = False + if model_path and os.path.exists(model_path): + for attempt in range(2): + try: + self._log(f"📥 Loading {local_method} model... (attempt {attempt+1})", "info") + if self.local_inpainter.load_model(local_method, model_path, force_reload=need_reload): + loaded_ok = True + break + except Exception as e: + self._log(f"⚠️ Load attempt {attempt+1} failed: {e}", "warning") + time.sleep(0.5) + if loaded_ok: + self._log(f"✅ Local inpainter loaded with {local_method.upper()} (fallback instance)") + else: + self._log(f"⚠️ Failed to load model, but inpainter is ready", "warning") + else: + self._log(f"⚠️ No model available, but inpainter is initialized", "warning") + + return True + + except Exception as e: + self._log(f"❌ Local inpainter module not available: {e}", "error") + return False + + elif inpaint_method == 'hybrid': + # Track hybrid settings changes + if not hasattr(self, '_last_hybrid_config'): + self._last_hybrid_config = None + + # Set tiling from tiling section + tiling_settings = self.manga_settings.get('tiling', {}) + self.local_inpainter.tiling_enabled = tiling_settings.get('enabled', False) + self.local_inpainter.tile_size = tiling_settings.get('tile_size', 512) + self.local_inpainter.tile_overlap = tiling_settings.get('tile_overlap', 64) + + self._log(f"✅ Set tiling: enabled={self.local_inpainter.tiling_enabled}, size={self.local_inpainter.tile_size}, overlap={self.local_inpainter.tile_overlap}", "info") + + current_hybrid_config = self.manga_settings.get('inpainting', {}).get('hybrid_methods', []) + + # Check if hybrid config changed + need_reload = self._last_hybrid_config != current_hybrid_config + if need_reload: + self._log("🔄 Hybrid configuration changed, reloading...", "info") + self.hybrid_inpainter = None # Clear old instance + + self._last_hybrid_config = current_hybrid_config.copy() if current_hybrid_config else [] + + if self.hybrid_inpainter is None: + self.hybrid_inpainter = HybridInpainter() + # REMOVED: No longer override tiling settings for HybridInpainter + + # Load multiple methods + methods = self.manga_settings.get('inpainting', {}).get('hybrid_methods', []) + loaded = 0 + + for method_config in methods: + method = method_config.get('method') + model_path = method_config.get('model_path') + + if method and model_path: + if self.hybrid_inpainter.add_method(method, method, model_path): + loaded += 1 + self._log(f"✅ Added {method.upper()} to hybrid inpainter") + + if loaded > 0: + self._log(f"✅ Hybrid inpainter ready with {loaded} methods") + else: + self._log("⚠️ Hybrid inpainter initialized but no methods loaded", "warning") + + return True + + return False + + except ImportError: + self._log("❌ Local inpainter module not available", "error") + return False + except Exception as e: + self._log(f"❌ Error initializing inpainter: {e}", "error") + return False + + + def inpaint_regions(self, image: np.ndarray, mask: np.ndarray) -> np.ndarray: + """Inpaint using configured method (cloud, local, or hybrid)""" + # Primary source of truth is the runtime flags set by the UI. + if getattr(self, 'skip_inpainting', False): + self._log(" ⏭️ Skipping inpainting (preserving original art)", "info") + return image.copy() + + # Cloud mode explicitly selected in UI + if getattr(self, 'use_cloud_inpainting', False): + return self._cloud_inpaint(image, mask) + + # Hybrid mode if UI requested it (fallback to settings key if present) + mode = getattr(self, 'inpaint_mode', None) or self.manga_settings.get('inpainting', {}).get('method') + if mode == 'hybrid' and hasattr(self, 'hybrid_inpainter'): + self._log(" 🔄 Using hybrid ensemble inpainting", "info") + return self.hybrid_inpainter.inpaint_ensemble(image, mask) + + # If a background preload is running, wait until it's finished before inpainting + try: + if hasattr(self, '_inpaint_preload_event') and self._inpaint_preload_event and not self._inpaint_preload_event.is_set(): + self._log(" ⏳ Waiting for local inpainting models to finish preloading...", "info") + # Wait with a generous timeout, but proceed afterward regardless + self._inpaint_preload_event.wait(timeout=300) + except Exception: + pass + + # Default to local inpainting + local_method = self.manga_settings.get('inpainting', {}).get('local_method', 'anime') + model_path = self.main_gui.config.get(f'manga_{local_method}_model_path', '') + + # Use a thread-local inpainter instance + inp = self._get_thread_local_inpainter(local_method, model_path) + if inp and getattr(inp, 'model_loaded', False): + self._log(" 🧽 Using local inpainting", "info") + return inp.inpaint(image, mask) + else: + # Conservative fallback: try shared instance only; do not attempt risky reloads that can corrupt output + try: + shared_inp = self._get_or_init_shared_local_inpainter(local_method, model_path) + if shared_inp and getattr(shared_inp, 'model_loaded', False): + self._log(" ✅ Using shared inpainting instance", "info") + return shared_inp.inpaint(image, mask) + except Exception: + pass + + # RETRY LOGIC: Attempt to reload model with multiple strategies + self._log(" ⚠️ Local inpainting model not loaded; attempting retry...", "warning") + + retry_attempts = [ + {'force_reload': True, 'desc': 'force reload'}, + {'force_reload': True, 'desc': 'force reload with delay', 'delay': 1.0}, + {'force_reload': False, 'desc': 'standard reload'}, + ] + + for attempt_num, retry_config in enumerate(retry_attempts, 1): + try: + self._log(f" 🔄 Retry attempt {attempt_num}/{len(retry_attempts)}: {retry_config['desc']}", "info") + + # Apply delay if specified + if retry_config.get('delay'): + import time + time.sleep(retry_config['delay']) + + # Try to get or create a fresh inpainter instance + retry_inp = self._get_or_init_shared_local_inpainter( + local_method, + model_path, + force_reload=retry_config['force_reload'] + ) + + if retry_inp: + # Check if model is loaded + if getattr(retry_inp, 'model_loaded', False): + self._log(f" ✅ Model loaded successfully on retry attempt {attempt_num}", "info") + return retry_inp.inpaint(image, mask) + else: + # Model exists but not loaded - try loading it directly + self._log(f" 🔧 Model not loaded, attempting direct load...", "info") + if model_path and os.path.exists(model_path): + try: + loaded_ok = retry_inp.load_model_with_retry( + local_method, + model_path, + force_reload=True + ) + if loaded_ok and getattr(retry_inp, 'model_loaded', False): + self._log(f" ✅ Direct load successful on attempt {attempt_num}", "info") + return retry_inp.inpaint(image, mask) + else: + self._log(f" ⚠️ Direct load returned {loaded_ok}, model_loaded={getattr(retry_inp, 'model_loaded', False)}", "warning") + except Exception as load_err: + self._log(f" ⚠️ Direct load failed: {load_err}", "warning") + else: + if not model_path: + self._log(f" ⚠️ No model path configured", "warning") + elif not os.path.exists(model_path): + self._log(f" ⚠️ Model file does not exist: {model_path}", "warning") + else: + self._log(f" ⚠️ Failed to get inpainter instance on attempt {attempt_num}", "warning") + + except Exception as retry_err: + self._log(f" ⚠️ Retry attempt {attempt_num} failed: {retry_err}", "warning") + import traceback + self._log(traceback.format_exc(), "debug") + + # All retries exhausted - provide detailed diagnostic information + self._log(" ❌ All retry attempts exhausted. Diagnostics:", "error") + self._log(f" Method: {local_method}", "error") + if model_path: + self._log(f" Model path: {model_path}", "error") + if os.path.exists(model_path): + try: + size_mb = os.path.getsize(model_path) / (1024 * 1024) + self._log(f" File size: {size_mb:.2f} MB", "error") + if size_mb < 1: + self._log(f" ⚠️ File may be corrupted (too small)", "error") + except Exception: + self._log(f" ⚠️ Cannot read model file", "error") + else: + self._log(f" ⚠️ Model file does not exist", "error") + else: + self._log(f" ⚠️ No model path configured", "error") + + self._log(" 💡 Suggestion: Check Manga Settings and download the model if needed", "error") + self._log(" ⚠️ Returning original image without inpainting", "warning") + return image.copy() + + def _cloud_inpaint(self, image: np.ndarray, mask: np.ndarray) -> np.ndarray: + """Use Replicate API for inpainting""" + try: + import requests + import base64 + from io import BytesIO + from PIL import Image as PILImage + import cv2 + + self._log(" ☁️ Cloud inpainting via Replicate API", "info") + + # Convert to PIL + image_pil = PILImage.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) + mask_pil = PILImage.fromarray(mask).convert('L') + + # Convert to base64 + img_buffer = BytesIO() + image_pil.save(img_buffer, format='PNG') + img_base64 = base64.b64encode(img_buffer.getvalue()).decode() + + mask_buffer = BytesIO() + mask_pil.save(mask_buffer, format='PNG') + mask_base64 = base64.b64encode(mask_buffer.getvalue()).decode() + + # Get cloud settings + cloud_settings = self.main_gui.config.get('manga_settings', {}) + model_type = cloud_settings.get('cloud_inpaint_model', 'ideogram-v2') + timeout = cloud_settings.get('cloud_timeout', 60) + + # Determine model identifier based on model type + if model_type == 'ideogram-v2': + model = 'ideogram-ai/ideogram-v2' + self._log(f" Using Ideogram V2 inpainting model", "info") + elif model_type == 'sd-inpainting': + model = 'stability-ai/stable-diffusion-inpainting' + self._log(f" Using Stable Diffusion inpainting model", "info") + elif model_type == 'flux-inpainting': + model = 'zsxkib/flux-dev-inpainting' + self._log(f" Using FLUX inpainting model", "info") + elif model_type == 'custom': + model = cloud_settings.get('cloud_custom_version', '') + if not model: + raise Exception("No custom model identifier specified") + self._log(f" Using custom model: {model}", "info") + else: + # Default to Ideogram V2 + model = 'ideogram-ai/ideogram-v2' + self._log(f" Using default Ideogram V2 model", "info") + + # Build input data based on model type + input_data = { + 'image': f'data:image/png;base64,{img_base64}', + 'mask': f'data:image/png;base64,{mask_base64}' + } + + # Add prompt settings for models that support them + if model_type in ['ideogram-v2', 'sd-inpainting', 'flux-inpainting', 'custom']: + prompt = cloud_settings.get('cloud_inpaint_prompt', 'clean background, smooth surface') + input_data['prompt'] = prompt + self._log(f" Prompt: {prompt}", "info") + + # SD-specific parameters + if model_type == 'sd-inpainting': + negative_prompt = cloud_settings.get('cloud_negative_prompt', 'text, writing, letters') + input_data['negative_prompt'] = negative_prompt + input_data['num_inference_steps'] = cloud_settings.get('cloud_inference_steps', 20) + self._log(f" Negative prompt: {negative_prompt}", "info") + + # Get the latest version of the model + headers = { + 'Authorization': f'Token {self.replicate_api_key}', + 'Content-Type': 'application/json' + } + + # First, get the latest version of the model + model_response = requests.get( + f'https://api.replicate.com/v1/models/{model}', + headers=headers + ) + + if model_response.status_code != 200: + # If model lookup fails, try direct prediction with model identifier + self._log(f" Model lookup returned {model_response.status_code}, trying direct prediction", "warning") + version = None + else: + model_info = model_response.json() + version = model_info.get('latest_version', {}).get('id') + if not version: + raise Exception(f"Could not get version for model {model}") + + # Create prediction + prediction_data = { + 'input': input_data + } + + if version: + prediction_data['version'] = version + else: + # For custom models, try extracting version from model string + if ':' in model: + # Format: owner/model:version + model_name, version_id = model.split(':', 1) + prediction_data['version'] = version_id + else: + raise Exception(f"Could not determine version for model {model}. Try using format: owner/model:version") + + response = requests.post( + 'https://api.replicate.com/v1/predictions', + headers=headers, + json=prediction_data + ) + + if response.status_code != 201: + raise Exception(f"API error: {response.text}") + + # Get prediction URL + prediction = response.json() + prediction_url = prediction.get('urls', {}).get('get') or prediction.get('id') + + if not prediction_url: + raise Exception("No prediction URL returned") + + # If we only got an ID, construct the URL + if not prediction_url.startswith('http'): + prediction_url = f'https://api.replicate.com/v1/predictions/{prediction_url}' + + # Poll for result with configured timeout + import time + for i in range(timeout): + response = requests.get(prediction_url, headers=headers) + result = response.json() + + # Log progress every 5 seconds + if i % 5 == 0 and i > 0: + self._log(f" ⏳ Still processing... ({i}s elapsed)", "info") + + if result['status'] == 'succeeded': + # Download result image (handle both single URL and list) + output = result.get('output') + if not output: + raise Exception("No output returned from model") + + if isinstance(output, list): + output_url = output[0] if output else None + else: + output_url = output + + if not output_url: + raise Exception("No output URL in result") + + img_response = requests.get(output_url) + + # Convert back to numpy + result_pil = PILImage.open(BytesIO(img_response.content)) + result_bgr = cv2.cvtColor(np.array(result_pil), cv2.COLOR_RGB2BGR) + + self._log(" ✅ Cloud inpainting completed", "success") + return result_bgr + + elif result['status'] == 'failed': + error_msg = result.get('error', 'Unknown error') + # Check for common errors + if 'version' in error_msg.lower(): + error_msg += f" (Try using the model identifier '{model}' in the custom field)" + raise Exception(f"Inpainting failed: {error_msg}") + + time.sleep(1) + + raise Exception(f"Timeout waiting for inpainting (>{timeout}s)") + + except Exception as e: + self._log(f" ❌ Cloud inpainting failed: {str(e)}", "error") + return image.copy() + + + def _regions_overlap(self, region1: TextRegion, region2: TextRegion) -> bool: + """Check if two regions overlap""" + x1, y1, w1, h1 = region1.bounding_box + x2, y2, w2, h2 = region2.bounding_box + + # Check if rectangles overlap + if (x1 + w1 < x2 or x2 + w2 < x1 or + y1 + h1 < y2 or y2 + h2 < y1): + return False + + return True + + def _calculate_overlap_area(self, region1: TextRegion, region2: TextRegion) -> float: + """Calculate the area of overlap between two regions""" + x1, y1, w1, h1 = region1.bounding_box + x2, y2, w2, h2 = region2.bounding_box + + # Calculate intersection + x_left = max(x1, x2) + y_top = max(y1, y2) + x_right = min(x1 + w1, x2 + w2) + y_bottom = min(y1 + h1, y2 + h2) + + if x_right < x_left or y_bottom < y_top: + return 0.0 + + return (x_right - x_left) * (y_bottom - y_top) + + def _adjust_overlapping_regions(self, regions: List[TextRegion], image_width: int, image_height: int) -> List[TextRegion]: + """Adjust positions of overlapping regions to prevent overlap while preserving text mapping""" + if len(regions) <= 1: + return regions + + # Create a copy of regions with preserved indices + adjusted_regions = [] + for idx, region in enumerate(regions): + # Create a new TextRegion with copied values + adjusted_region = TextRegion( + text=region.text, + vertices=list(region.vertices), + bounding_box=list(region.bounding_box), + confidence=region.confidence, + region_type=region.region_type + ) + if hasattr(region, 'translated_text'): + adjusted_region.translated_text = region.translated_text + + # IMPORTANT: Preserve original index to maintain text mapping + adjusted_region.original_index = idx + adjusted_region.original_bbox = tuple(region.bounding_box) # Store original position + + adjusted_regions.append(adjusted_region) + + # DON'T SORT - This breaks the text-to-region mapping! + # Process in original order to maintain associations + + # Track which regions have been moved to avoid cascade effects + moved_regions = set() + + # Adjust overlapping regions + for i in range(len(adjusted_regions)): + if i in moved_regions: + continue # Skip if already moved + + for j in range(i + 1, len(adjusted_regions)): + if j in moved_regions: + continue # Skip if already moved + + region1 = adjusted_regions[i] + region2 = adjusted_regions[j] + + if self._regions_overlap(region1, region2): + x1, y1, w1, h1 = region1.bounding_box + x2, y2, w2, h2 = region2.bounding_box + + # Calculate centers using ORIGINAL positions for better logic + orig_x1, orig_y1, _, _ = region1.original_bbox + orig_x2, orig_y2, _, _ = region2.original_bbox + + # Determine which region to move based on original positions + # Move the one that's naturally "later" in reading order + if orig_y2 > orig_y1 + h1/2: # region2 is below + # Move region2 down slightly + min_gap = 10 + new_y2 = y1 + h1 + min_gap + if new_y2 + h2 <= image_height: + region2.bounding_box = (x2, new_y2, w2, h2) + moved_regions.add(j) + self._log(f" 📍 Adjusted region {j} down (preserving order)", "debug") + elif orig_y1 > orig_y2 + h2/2: # region1 is below + # Move region1 down slightly + min_gap = 10 + new_y1 = y2 + h2 + min_gap + if new_y1 + h1 <= image_height: + region1.bounding_box = (x1, new_y1, w1, h1) + moved_regions.add(i) + self._log(f" 📍 Adjusted region {i} down (preserving order)", "debug") + elif orig_x2 > orig_x1 + w1/2: # region2 is to the right + # Move region2 right slightly + min_gap = 10 + new_x2 = x1 + w1 + min_gap + if new_x2 + w2 <= image_width: + region2.bounding_box = (new_x2, y2, w2, h2) + moved_regions.add(j) + self._log(f" 📍 Adjusted region {j} right (preserving order)", "debug") + else: + # Minimal adjustment - just separate them slightly + # without changing their relative order + min_gap = 5 + if y2 >= y1: # region2 is lower or same level + new_y2 = y2 + min_gap + if new_y2 + h2 <= image_height: + region2.bounding_box = (x2, new_y2, w2, h2) + moved_regions.add(j) + else: # region1 is lower + new_y1 = y1 + min_gap + if new_y1 + h1 <= image_height: + region1.bounding_box = (x1, new_y1, w1, h1) + moved_regions.add(i) + + # IMPORTANT: Return in ORIGINAL order to preserve text mapping + # Sort by original_index to restore the original order + adjusted_regions.sort(key=lambda r: r.original_index) + + return adjusted_regions + + # Emote-only mixed font fallback (Meiryo) — primary font remains unchanged + def _get_emote_fallback_font(self, font_size: int): + """Return a Meiryo Bold fallback font if available (preferred), else Meiryo. + Does not change the primary font; used only for emote glyphs. + """ + try: + from PIL import ImageFont as _ImageFont + import os as _os + # Prefer Meiryo Bold TTC first; try common face indices, then regular Meiryo + candidates = [ + ("C:/Windows/Fonts/meiryob.ttc", [0,1,2,3]), # Meiryo Bold (and variants) TTC + ("C:/Windows/Fonts/meiryo.ttc", [1,0,2,3]), # Try bold-ish index first if present + ] + for path, idxs in candidates: + if _os.path.exists(path): + for idx in idxs: + try: + return _ImageFont.truetype(path, font_size, index=idx) + except Exception: + continue + return None + except Exception: + return None + + def _is_emote_char(self, ch: str) -> bool: + # Strict whitelist of emote-like symbols to render with Meiryo + EMOTES = set([ + '\u2661', # ♡ + '\u2665', # ♥ + '\u2764', # ❤ + '\u2605', # ★ + '\u2606', # ☆ + '\u266A', # ♪ + '\u266B', # ♫ + '\u203B', # ※ + ]) + return ch in EMOTES + + def _line_width_emote_mixed(self, draw, text: str, primary_font, emote_font) -> int: + if not emote_font: + bbox = draw.textbbox((0, 0), text, font=primary_font) + return (bbox[2] - bbox[0]) + w = 0 + i = 0 + while i < len(text): + ch = text[i] + # Treat VS16/VS15 as zero-width modifiers + if ch in ('\ufe0f', '\ufe0e'): + i += 1 + continue + f = emote_font if self._is_emote_char(ch) else primary_font + try: + bbox = draw.textbbox((0, 0), ch, font=f) + w += (bbox[2] - bbox[0]) + except Exception: + w += max(1, int(getattr(primary_font, 'size', 12) * 0.6)) + i += 1 + return w + + def _draw_text_line_emote_mixed(self, draw, line: str, x: int, y: int, primary_font, emote_font, + fill_rgba, outline_rgba, outline_width: int, + shadow_enabled: bool, shadow_color_rgba, shadow_off): + cur_x = x + i = 0 + while i < len(line): + ch = line[i] + if ch in ('\ufe0f', '\ufe0e'): + i += 1 + continue + f = emote_font if (emote_font and self._is_emote_char(ch)) else primary_font + # measure + try: + bbox = draw.textbbox((0, 0), ch, font=f) + cw = bbox[2] - bbox[0] + except Exception: + cw = max(1, int(getattr(primary_font, 'size', 12) * 0.6)) + # shadow + if shadow_enabled: + sx, sy = shadow_off + draw.text((cur_x + sx, y + sy), ch, font=f, fill=shadow_color_rgba) + # outline + if outline_width > 0: + for dx in range(-outline_width, outline_width + 1): + for dy in range(-outline_width, outline_width + 1): + if dx == 0 and dy == 0: + continue + draw.text((cur_x + dx, y + dy), ch, font=f, fill=outline_rgba) + # main + draw.text((cur_x, y), ch, font=f, fill=fill_rgba) + cur_x += cw + i += 1 + + + def render_translated_text(self, image: np.ndarray, regions: List[TextRegion]) -> np.ndarray: + """Enhanced text rendering with customizable backgrounds and styles""" + self._log(f"\n🎨 Starting ENHANCED text rendering with custom settings:", "info") + self._log(f" ✅ Using ENHANCED renderer (not the simple version)", "info") + self._log(f" Background: {self.text_bg_style} @ {int(self.text_bg_opacity/255*100)}% opacity", "info") + self._log(f" Text color: RGB{self.text_color}", "info") + self._log(f" Shadow: {'Enabled' if self.shadow_enabled else 'Disabled'}", "info") + self._log(f" Font: {os.path.basename(self.selected_font_style) if self.selected_font_style else 'Default'}", "info") + if self.force_caps_lock: + self._log(f" Force Caps Lock: ENABLED", "info") + + # Convert to PIL for text rendering + import cv2 + pil_image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) + + # Get image dimensions for boundary checking + image_height, image_width = image.shape[:2] + + # Create text mask to get accurate render boundaries + # This represents what will actually be inpainted + try: + text_mask = self.create_text_mask(image, regions) + use_mask_for_rendering = True + self._log(f" 🎭 Created text mask for accurate render boundaries", "info") + except Exception as e: + text_mask = None + use_mask_for_rendering = False + if not getattr(self, 'concise_logs', False): + self._log(f" ⚠️ Failed to create mask, using polygon bounds: {e}", "warning") + + # Only adjust overlapping regions if constraining to bubbles + if self.constrain_to_bubble: + adjusted_regions = self._adjust_overlapping_regions(regions, image_width, image_height) + else: + # Skip adjustment when not constraining (allows overflow) + adjusted_regions = regions + self._log(" 📝 Using original regions (overflow allowed)", "info") + + # Check if any regions still overlap after adjustment (shouldn't happen, but let's verify) + has_overlaps = False + for i, region1 in enumerate(adjusted_regions): + for region2 in adjusted_regions[i+1:]: + if self._regions_overlap(region1, region2): + has_overlaps = True + self._log(" ⚠️ Regions still overlap after adjustment", "warning") + break + if has_overlaps: + break + + # Handle transparency settings based on overlaps + if has_overlaps and self.text_bg_opacity < 255 and self.text_bg_opacity > 0: + self._log(" ⚠️ Overlapping regions detected with partial transparency", "warning") + self._log(" ℹ️ Rendering with requested transparency level", "info") + + region_count = 0 + + # Decide rendering path based on transparency needs + # For full transparency (opacity = 0) or no overlaps, use RGBA rendering + # For overlaps with partial transparency, we still use RGBA to honor user settings + use_rgba_rendering = True # Always use RGBA for consistent transparency support + + if use_rgba_rendering: + # Transparency-enabled rendering path + pil_image = pil_image.convert('RGBA') + + # Decide parallel rendering from advanced settings + try: + adv = getattr(self, 'manga_settings', {}).get('advanced', {}) if hasattr(self, 'manga_settings') else {} + except Exception: + adv = {} + render_parallel = bool(adv.get('render_parallel', True)) + max_workers = None + try: + max_workers = int(adv.get('max_workers', 4)) + except Exception: + max_workers = 4 + + def _render_one(region, idx): + # Build a separate overlay for this region + from PIL import Image as _PIL + overlay = _PIL.new('RGBA', pil_image.size, (0,0,0,0)) + draw = ImageDraw.Draw(overlay) + # Work on local copy of text for caps lock + tr_text = region.translated_text or '' + if self.force_caps_lock: + tr_text = tr_text.upper() + + # Get original bounding box + x, y, w, h = region.bounding_box + + # CRITICAL: Always prefer mask bounds when available (most accurate) + # Mask bounds are especially important for Azure/Google without RT-DETR, + # where OCR polygons are unreliable. + if use_mask_for_rendering and text_mask is not None: + # Use mask bounds directly - most accurate method + safe_x, safe_y, safe_w, safe_h = self.get_safe_text_area( + region, + use_mask_bounds=True, + full_mask=text_mask + ) + render_x, render_y, render_w, render_h = safe_x, safe_y, safe_w, safe_h + elif hasattr(region, 'vertices') and region.vertices: + # Fallback: use polygon-based safe area (for RT-DETR regions) + safe_x, safe_y, safe_w, safe_h = self.get_safe_text_area(region, use_mask_bounds=False) + render_x, render_y, render_w, render_h = safe_x, safe_y, safe_w, safe_h + else: + # Last resort: use simple bounding box + render_x, render_y, render_w, render_h = x, y, w, h + + # Fit text - use render dimensions for proper sizing + if self.custom_font_size: + font_size = self.custom_font_size + lines = self._wrap_text(tr_text, self._get_font(font_size), render_w, draw) + elif self.font_size_mode == 'multiplier': + # Pass use_as_is=True since render dimensions are already safe area + font_size, lines = self._fit_text_to_region(tr_text, render_w, render_h, draw, region, use_as_is=True) + else: + # Pass use_as_is=True since render dimensions are already safe area + font_size, lines = self._fit_text_to_region(tr_text, render_w, render_h, draw, region, use_as_is=True) + # Fonts + font = self._get_font(font_size) + emote_font = self._get_emote_fallback_font(font_size) + # Layout - use render dimensions (safe area if available) + # CRITICAL: Use actual text bbox height for accurate positioning + line_height = font_size * 1.2 + + # Calculate actual total height using text bbox for first line as reference + if lines: + sample_bbox = draw.textbbox((0, 0), lines[0] if lines[0] else "Ay", font=font) + actual_line_height = sample_bbox[3] - sample_bbox[1] + # Use the larger of: computed line_height or actual_line_height + line_height = max(line_height, actual_line_height * 1.1) + + total_height = len(lines) * line_height + + # Ensure text doesn't overflow vertically - constrain start_y + ideal_start_y = render_y + (render_h - total_height) // 2 + # Make sure text starts within render area and doesn't extend past bottom + max_start_y = render_y + render_h - total_height + start_y = max(render_y, min(ideal_start_y, max_start_y)) + + # Debug logging for vertical constraint + if not getattr(self, 'concise_logs', False): + end_y = start_y + total_height + render_end_y = render_y + render_h + overflow = max(0, end_y - render_end_y) + if overflow > 0: + self._log(f" ⚠️ Text would overflow by {overflow}px, constrained to render area", "debug") + self._log(f" 📏 Render area: y={render_y}-{render_end_y} (h={render_h}), Text: y={start_y}-{end_y} (h={total_height:.0f})", "debug") + # BG - use render dimensions + draw_bg = self.text_bg_opacity > 0 + try: + if draw_bg and getattr(self, 'free_text_only_bg_opacity', False): + draw_bg = self._is_free_text_region(region) + except Exception: + pass + if draw_bg: + self._draw_text_background(draw, render_x, render_y, render_w, render_h, lines, font, font_size, start_y, emote_font) + # Text - use render dimensions for centering + for i, line in enumerate(lines): + if emote_font is not None: + text_width = self._line_width_emote_mixed(draw, line, font, emote_font) + else: + tb = draw.textbbox((0,0), line, font=font) + text_width = tb[2]-tb[0] + tx = render_x + (render_w - text_width)//2 + ty = start_y + i*line_height + ow = max(1, font_size // self.outline_width_factor) + if emote_font is not None: + self._draw_text_line_emote_mixed(draw, line, tx, ty, font, emote_font, + self.text_color + (255,), self.outline_color + (255,), ow, + self.shadow_enabled, + self.shadow_color + (255,) if isinstance(self.shadow_color, tuple) and len(self.shadow_color)==3 else (0,0,0,255), + (self.shadow_offset_x, self.shadow_offset_y)) + else: + if self.shadow_enabled: + self._draw_text_shadow(draw, tx, ty, line, font) + for dx in range(-ow, ow+1): + for dy in range(-ow, ow+1): + if dx!=0 or dy!=0: + draw.text((tx+dx, ty+dy), line, font=font, fill=self.outline_color + (255,)) + draw.text((tx, ty), line, font=font, fill=self.text_color + (255,)) + return overlay + + overlays = [] + if render_parallel and len(adjusted_regions) > 1: + from concurrent.futures import ThreadPoolExecutor, as_completed + workers = max(1, min(max_workers, len(adjusted_regions))) + with ThreadPoolExecutor(max_workers=workers) as ex: + fut_to_idx = {ex.submit(_render_one, r, i): i for i, r in enumerate(adjusted_regions) if r.translated_text} + # Collect in order + temp = {} + for fut in as_completed(fut_to_idx): + i = fut_to_idx[fut] + try: + temp[i] = fut.result() + except Exception: + temp[i] = None + overlays = [temp.get(i) for i in range(len(adjusted_regions))] + else: + for i, r in enumerate(adjusted_regions): + if not r.translated_text: + overlays.append(None) + continue + overlays.append(_render_one(r, i)) + + # Composite overlays sequentially + for ov in overlays: + if ov is not None: + pil_image = Image.alpha_composite(pil_image, ov) + region_count += 1 + + # Convert back to RGB + pil_image = pil_image.convert('RGB') + + else: + # This path is now deprecated but kept for backwards compatibility + # Direct rendering without transparency layers + draw = ImageDraw.Draw(pil_image) + + for region in adjusted_regions: + if not region.translated_text: + continue + + self._log(f"DEBUG: Rendering - Original: '{region.text[:30]}...' -> Translated: '{region.translated_text[:30]}...'", "debug") + + + # APPLY CAPS LOCK TRANSFORMATION HERE + if self.force_caps_lock: + region.translated_text = region.translated_text.upper() + + region_count += 1 + self._log(f" Rendering region {region_count}: {region.translated_text[:30]}...", "info") + + # Get original bounding box + x, y, w, h = region.bounding_box + + # CRITICAL: Always prefer mask bounds when available (most accurate) + # Mask bounds are especially important for Azure/Google without RT-DETR, + # where OCR polygons are unreliable. + if use_mask_for_rendering and text_mask is not None: + # Use mask bounds directly - most accurate method + safe_x, safe_y, safe_w, safe_h = self.get_safe_text_area( + region, + use_mask_bounds=True, + full_mask=text_mask + ) + render_x, render_y, render_w, render_h = safe_x, safe_y, safe_w, safe_h + elif hasattr(region, 'vertices') and region.vertices: + # Fallback: use polygon-based safe area (for RT-DETR regions) + safe_x, safe_y, safe_w, safe_h = self.get_safe_text_area(region, use_mask_bounds=False) + render_x, render_y, render_w, render_h = safe_x, safe_y, safe_w, safe_h + else: + # Last resort: use simple bounding box + render_x, render_y, render_w, render_h = x, y, w, h + + # Find optimal font size - use render dimensions for proper sizing + if self.custom_font_size: + font_size = self.custom_font_size + lines = self._wrap_text(region.translated_text, + self._get_font(font_size), + render_w, draw) + else: + # Pass use_as_is=True since render dimensions are already safe area + font_size, lines = self._fit_text_to_region( + region.translated_text, render_w, render_h, draw, region, use_as_is=True + ) + + # Load font + font = self._get_font(font_size) + + # Calculate text layout - use render dimensions + # CRITICAL: Use actual text bbox height for accurate positioning + line_height = font_size * 1.2 + + # Calculate actual total height using text bbox for first line as reference + if lines: + sample_bbox = draw.textbbox((0, 0), lines[0] if lines[0] else "Ay", font=font) + actual_line_height = sample_bbox[3] - sample_bbox[1] + # Use the larger of: computed line_height or actual_line_height + line_height = max(line_height, actual_line_height * 1.1) + + total_height = len(lines) * line_height + + # Ensure text doesn't overflow vertically - constrain start_y + ideal_start_y = render_y + (render_h - total_height) // 2 + # Make sure text starts within render area and doesn't extend past bottom + max_start_y = render_y + render_h - total_height + start_y = max(render_y, min(ideal_start_y, max_start_y)) + + # Draw opaque background (optionally only for free text) - use render dimensions + draw_bg = self.text_bg_opacity > 0 + try: + if draw_bg and getattr(self, 'free_text_only_bg_opacity', False): + draw_bg = self._is_free_text_region(region) + except Exception: + pass + if draw_bg: + self._draw_text_background(draw, render_x, render_y, render_w, render_h, lines, font, + font_size, start_y) + + # Draw text - use render dimensions + for i, line in enumerate(lines): + # Mixed fallback not supported in legacy path; keep primary measurement + text_bbox = draw.textbbox((0, 0), line, font=font) + text_width = text_bbox[2] - text_bbox[0] + + text_x = render_x + (render_w - text_width) // 2 + text_y = start_y + i * line_height + + if self.shadow_enabled: + self._draw_text_shadow(draw, text_x, text_y, line, font) + + outline_width = max(1, font_size // self.outline_width_factor) + + # Draw outline + for dx in range(-outline_width, outline_width + 1): + for dy in range(-outline_width, outline_width + 1): + if dx != 0 or dy != 0: + draw.text((text_x + dx, text_y + dy), line, + font=font, fill=self.outline_color) + + # Draw main text + draw.text((text_x, text_y), line, font=font, fill=self.text_color) + + # Convert back to numpy array + result = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR) + self._log(f"✅ ENHANCED text rendering complete - rendered {region_count} regions", "info") + return result + + def _is_free_text_region(self, region) -> bool: + """Heuristic: determine if the region is free text (not a bubble). + Uses bubble_type when available; otherwise falls back to aspect ratio heuristics. + """ + try: + if hasattr(region, 'bubble_type') and region.bubble_type: + return region.bubble_type == 'free_text' + # Fallback heuristic + x, y, w, h = region.bounding_box + w, h = int(w), int(h) + if h <= 0: + return True + aspect = w / max(1, h) + # Wider, shorter regions are often free text + return aspect >= 2.5 or h < 50 + except Exception: + return False + + def _draw_text_background(self, draw: ImageDraw, x: int, y: int, w: int, h: int, + lines: List[str], font: ImageFont, font_size: int, + start_y: int, emote_font: ImageFont = None): + """Draw background behind text with selected style. + If emote_font is provided, measure lines with emote-only mixing. + """ + # Early return if opacity is 0 (fully transparent) + if self.text_bg_opacity == 0: + return + + # Calculate actual text bounds + line_height = font_size * 1.2 + max_width = 0 + + for line in lines: + if emote_font is not None: + line_width = self._line_width_emote_mixed(draw, line, font, emote_font) + else: + bbox = draw.textbbox((0, 0), line, font=font) + line_width = bbox[2] - bbox[0] + max_width = max(max_width, line_width) + + # Apply size reduction + padding = int(font_size * 0.3) + bg_width = int((max_width + padding * 2) * self.text_bg_reduction) + bg_height = int((len(lines) * line_height + padding * 2) * self.text_bg_reduction) + + # Center background + bg_x = x + (w - bg_width) // 2 + bg_y = int(start_y - padding) + + # Create semi-transparent color + bg_color = (255, 255, 255, self.text_bg_opacity) + + if self.text_bg_style == 'box': + # Rounded rectangle + radius = min(20, bg_width // 10, bg_height // 10) + self._draw_rounded_rectangle(draw, bg_x, bg_y, bg_x + bg_width, + bg_y + bg_height, radius, bg_color) + + elif self.text_bg_style == 'circle': + # Ellipse that encompasses the text + center_x = bg_x + bg_width // 2 + center_y = bg_y + bg_height // 2 + # Make it slightly wider to look more natural + ellipse_width = int(bg_width * 1.2) + ellipse_height = bg_height + + draw.ellipse([center_x - ellipse_width // 2, center_y - ellipse_height // 2, + center_x + ellipse_width // 2, center_y + ellipse_height // 2], + fill=bg_color) + + elif self.text_bg_style == 'wrap': + # Individual background for each line + for i, line in enumerate(lines): + bbox = draw.textbbox((0, 0), line, font=font) + line_width = bbox[2] - bbox[0] + + line_bg_width = int((line_width + padding) * self.text_bg_reduction) + line_bg_x = x + (w - line_bg_width) // 2 + line_bg_y = int(start_y + i * line_height - padding // 2) + line_bg_height = int(line_height + padding // 2) + + # Draw rounded rectangle for each line + radius = min(10, line_bg_width // 10, line_bg_height // 10) + self._draw_rounded_rectangle(draw, line_bg_x, line_bg_y, + line_bg_x + line_bg_width, + line_bg_y + line_bg_height, radius, bg_color) + + def _draw_text_shadow(self, draw: ImageDraw, x: int, y: int, text: str, font: ImageFont): + """Draw text shadow with optional blur effect""" + if self.shadow_blur == 0: + # Simple sharp shadow + shadow_x = x + self.shadow_offset_x + shadow_y = y + self.shadow_offset_y + draw.text((shadow_x, shadow_y), text, font=font, fill=self.shadow_color) + else: + # Blurred shadow (simulated with multiple layers) + blur_range = self.shadow_blur + opacity_step = 80 // (blur_range + 1) # Distribute opacity across blur layers + + for blur_offset in range(blur_range, 0, -1): + layer_opacity = opacity_step * (blur_range - blur_offset + 1) + shadow_color_with_opacity = self.shadow_color + (layer_opacity,) + + # Draw shadow at multiple positions for blur effect + for dx in range(-blur_offset, blur_offset + 1): + for dy in range(-blur_offset, blur_offset + 1): + if dx*dx + dy*dy <= blur_offset*blur_offset: # Circular blur + shadow_x = x + self.shadow_offset_x + dx + shadow_y = y + self.shadow_offset_y + dy + draw.text((shadow_x, shadow_y), text, font=font, + fill=shadow_color_with_opacity) + + def _draw_rounded_rectangle(self, draw: ImageDraw, x1: int, y1: int, + x2: int, y2: int, radius: int, fill): + """Draw a rounded rectangle""" + # Draw the main rectangle + draw.rectangle([x1 + radius, y1, x2 - radius, y2], fill=fill) + draw.rectangle([x1, y1 + radius, x2, y2 - radius], fill=fill) + + # Draw the corners + draw.pieslice([x1, y1, x1 + 2 * radius, y1 + 2 * radius], 180, 270, fill=fill) + draw.pieslice([x2 - 2 * radius, y1, x2, y1 + 2 * radius], 270, 360, fill=fill) + draw.pieslice([x1, y2 - 2 * radius, x1 + 2 * radius, y2], 90, 180, fill=fill) + draw.pieslice([x2 - 2 * radius, y2 - 2 * radius, x2, y2], 0, 90, fill=fill) + + def _get_font(self, font_size: int) -> ImageFont: + """Get font with specified size, using selected style if available""" + font_path = self.selected_font_style or self.font_path + + if font_path: + try: + return ImageFont.truetype(font_path, font_size) + except: + pass + + return ImageFont.load_default() + + def _pil_word_wrap(self, text: str, font_path: str, roi_width: int, roi_height: int, + init_font_size: int, min_font_size: int, draw: ImageDraw) -> Tuple[str, int]: + """Comic-translate's pil_word_wrap algorithm - top-down font sizing with column wrapping. + + Break long text to multiple lines, and reduce point size until all text fits within bounds. + This is a direct port from comic-translate for better text fitting. + """ + from hyphen_textwrap import wrap as hyphen_wrap + + mutable_message = text + font_size = init_font_size + + def eval_metrics(txt, font): + """Calculate width/height of multiline text. + + CRITICAL: Must match the rendering logic exactly to prevent overflow. + Rendering uses font_size * 1.2 as line_height, so we must do the same here. + """ + lines = txt.split('\n') + if not lines: + return (0, 0) + + max_width = 0 + + for line in lines: + bbox = draw.textbbox((0, 0), line if line else "A", font=font) + line_width = bbox[2] - bbox[0] + max_width = max(max_width, line_width) + + # Calculate height using same logic as rendering: + # line_height = max(font_size * 1.2, actual_bbox_height * 1.1) + sample_bbox = draw.textbbox((0, 0), lines[0] if lines[0] else "Ay", font=font) + actual_line_height = sample_bbox[3] - sample_bbox[1] + line_height = max(font_size * 1.2, actual_line_height * 1.1) + total_height = len(lines) * line_height + + return (max_width, total_height) + + # Get initial font + try: + if font_path: + font = ImageFont.truetype(font_path, font_size) + else: + font = ImageFont.load_default() + except Exception: + font = ImageFont.load_default() + + # Top-down algorithm: start with large font, shrink until it fits + while font_size > min_font_size: + try: + if font_path: + font = ImageFont.truetype(font_path, font_size) + else: + font = ImageFont.load_default() + except Exception: + font = ImageFont.load_default() + + width, height = eval_metrics(mutable_message, font) + + if height > roi_height: + # Text is too tall, reduce font size + font_size -= 0.75 + mutable_message = text # Restore original text + elif width > roi_width: + # Text is too wide, try wrapping with column optimization + columns = len(mutable_message) + + # Search for optimal column width + while columns > 0: + columns -= 1 + if columns == 0: + break + + # Use hyphen_wrap for smart wrapping + try: + wrapped = '\n'.join(hyphen_wrap( + text, columns, + break_on_hyphens=False, + break_long_words=False, + hyphenate_broken_words=True + )) + wrapped_width, _ = eval_metrics(wrapped, font) + if wrapped_width <= roi_width: + mutable_message = wrapped + break + except Exception: + # Fallback to simple wrapping if hyphen_wrap fails + break + + if columns < 1: + # Couldn't find good column width, reduce font size + font_size -= 0.75 + mutable_message = text # Restore original text + else: + # Text fits! + break + + # If we hit minimum font size, do brute-force optimization + if font_size <= min_font_size: + font_size = min_font_size + mutable_message = text + + try: + if font_path: + font = ImageFont.truetype(font_path, font_size) + else: + font = ImageFont.load_default() + except Exception: + font = ImageFont.load_default() + + # Brute force: minimize cost function (width - roi_width)^2 + (height - roi_height)^2 + min_cost = 1e9 + min_text = text + + for columns in range(1, min(len(text) + 1, 100)): # Limit iterations for performance + try: + wrapped_text = '\n'.join(hyphen_wrap( + text, columns, + break_on_hyphens=False, + break_long_words=False, + hyphenate_broken_words=True + )) + wrapped_width, wrapped_height = eval_metrics(wrapped_text, font) + cost = (wrapped_width - roi_width)**2 + (wrapped_height - roi_height)**2 + + if cost < min_cost: + min_cost = cost + min_text = wrapped_text + except Exception: + continue + + mutable_message = min_text + + return mutable_message, int(font_size) + + def get_mask_bounds(self, region: TextRegion, full_mask: np.ndarray) -> Tuple[int, int, int, int]: + """Extract the actual mask boundaries for a region. + + For non-Azure/Google OCR providers (manga-ocr, etc.), use RT-DETR bubble_bounds directly. + For Azure/Google, extract from the mask overlap to handle full-page OCR. + """ + # PRIORITY 1: For manga-ocr and other RT-DETR-guided OCR providers, use bubble_bounds directly + # These providers already OCR within RT-DETR bubbles, so bubble_bounds IS the correct render area + is_azure_google = getattr(self, 'ocr_provider', '').lower() in ('azure', 'google') + if not is_azure_google and hasattr(region, 'bubble_bounds') and region.bubble_bounds: + # Use the RT-DETR bubble bounds directly - this is the full bubble area + bx, by, bw, bh = region.bubble_bounds + if not getattr(self, 'concise_logs', False): + self._log(f" ✅ Using RT-DETR bubble_bounds for mask: {int(bw)}×{int(bh)} at ({int(bx)}, {int(by)})", "debug") + return int(bx), int(by), int(bw), int(bh) + elif not is_azure_google: + # Debug: Why are we not using bubble_bounds? + if not getattr(self, 'concise_logs', False): + has_attr = hasattr(region, 'bubble_bounds') + is_none = getattr(region, 'bubble_bounds', None) is None if has_attr else True + #self._log(f" ⚠️ manga-ocr but NO bubble_bounds (has_attr={has_attr}, is_none={is_none})", "warning") + + # PRIORITY 2: For Azure/Google or when bubble_bounds not available, extract from mask + if full_mask is not None: + try: + import cv2 + import numpy as np + + # Create a blank mask for this region + region_mask = np.zeros(full_mask.shape, dtype=np.uint8) + + # Fill the region's area in the mask + if hasattr(region, 'vertices') and region.vertices: + vertices_np = np.array(region.vertices, dtype=np.int32) + cv2.fillPoly(region_mask, [vertices_np], 255) + else: + x, y, w, h = region.bounding_box + cv2.rectangle(region_mask, (int(x), int(y)), (int(x+w), int(y+h)), 255, -1) + + # Find where this region overlaps with the full mask + overlap = cv2.bitwise_and(region_mask, full_mask) + + # Get bounding box of the overlap + contours, _ = cv2.findContours(overlap, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) + if contours: + # Get the largest contour (should be the main text region) + largest_contour = max(contours, key=cv2.contourArea) + x, y, w, h = cv2.boundingRect(largest_contour) + + if w > 0 and h > 0: + return x, y, w, h + except Exception as e: + if not getattr(self, 'concise_logs', False): + self._log(f" ⚠️ Failed to extract mask bounds: {e}, falling back", "debug") + + # Fallback to original bounding box + x, y, w, h = region.bounding_box + return int(x), int(y), int(w), int(h) + + def get_safe_text_area(self, region: TextRegion, use_mask_bounds: bool = False, full_mask: np.ndarray = None) -> Tuple[int, int, int, int]: + """Get safe text area with algorithm-aware shrink strategy. + + Respects font_algorithm and auto_fit_style settings: + - conservative: Comic-translate's 15% shrink (85% usable) + - smart: Adaptive 10-20% shrink based on bubble shape + - aggressive: Minimal 5% shrink (95% usable) + + Also applies OCR-specific adjustments for Azure/Google without RT-DETR guidance. + + Args: + region: The text region to calculate safe area for + use_mask_bounds: If True, use actual mask boundaries instead of shrinking from polygon + full_mask: The complete mask image (required if use_mask_bounds=True) + """ + # Get font sizing settings from config + try: + manga_settings = self.main_gui.config.get('manga_settings', {}) + font_sizing = manga_settings.get('font_sizing', {}) + rendering = manga_settings.get('rendering', {}) + + font_algorithm = font_sizing.get('algorithm', 'smart') + auto_fit_style = rendering.get('auto_fit_style', 'balanced') + + # Check if using Azure/Google without RT-DETR guidance + ocr_settings = manga_settings.get('ocr', {}) + use_rtdetr_guide = ocr_settings.get('use_rtdetr_for_ocr_regions', True) + is_azure_google = getattr(self, 'ocr_provider', '').lower() in ('azure', 'google') + needs_aggressive = is_azure_google and not use_rtdetr_guide + except Exception: + font_algorithm = 'smart' + auto_fit_style = 'balanced' + needs_aggressive = False + + # Base margin factor by algorithm + if font_algorithm == 'conservative': + # Comic-translate default: 15% shrink = 85% usable + base_margin = 0.85 + elif font_algorithm == 'aggressive': + # Aggressive: 5% shrink = 95% usable + base_margin = 0.95 + else: # 'smart' + # Smart: adaptive based on auto_fit_style + if auto_fit_style == 'compact': + base_margin = 0.82 # 18% shrink - tight fit + elif auto_fit_style == 'readable': + base_margin = 0.92 # 8% shrink - loose fit + else: # 'balanced' + base_margin = 0.87 # 13% shrink - balanced + + # SPECIAL CASE: Azure/Google without RT-DETR guidance + # Their OCR is too conservative, so we need more aggressive sizing + if needs_aggressive: + # Boost margin by 5-8% to compensate for conservative OCR bounds + base_margin = min(0.98, base_margin + 0.08) + self._log(f" 🎯 Azure/Google non-RT-DETR mode: Using aggressive {int(base_margin*100)}% margin", "debug") + + # OPTION 1: Use mask boundaries directly (most accurate) + if use_mask_bounds and full_mask is not None: + mask_x, mask_y, mask_w, mask_h = self.get_mask_bounds(region, full_mask) + # Use the FULL mask bounds directly - the mask already represents the accurate + # inpainted area from the inpainting process. The inpainting itself already includes + # padding/margins, so we don't need to shrink further. Using 100% maximizes text + # utilization and prevents the "text too small" issue. + + # CRITICAL: Use 100% of mask area for maximum text utilization + # The inpainting mask already has built-in margins from the mask generation process + safe_x, safe_y, safe_w, safe_h = mask_x, mask_y, mask_w, mask_h + + if not getattr(self, 'concise_logs', False): + self._log(f" 📐 Using FULL mask bounds: {mask_w}×{mask_h} (100% utilization)", "debug") + self._log(f" Mask position: ({mask_x}, {mask_y})", "debug") + if hasattr(region, 'bounding_box'): + orig_x, orig_y, orig_w, orig_h = region.bounding_box + self._log(f" Original bbox: {orig_w}×{orig_h} at ({orig_x}, {orig_y})", "debug") + return safe_x, safe_y, safe_w, safe_h + + # OPTION 2: Handle regions without vertices (simple bounding box) + if not hasattr(region, 'vertices') or not region.vertices: + x, y, w, h = region.bounding_box + safe_width = int(w * base_margin) + safe_height = int(h * base_margin) + safe_x = x + (w - safe_width) // 2 + safe_y = y + (h - safe_height) // 2 + return safe_x, safe_y, safe_width, safe_height + + # Calculate convexity for shape-aware adjustment (only for 'smart' algorithm) + margin_factor = base_margin + if font_algorithm == 'smart': + try: + # Convert vertices to numpy array with correct dtype + vertices = np.array(region.vertices, dtype=np.int32) + hull = cv2.convexHull(vertices) + hull_area = cv2.contourArea(hull) + poly_area = cv2.contourArea(vertices) + + if poly_area > 0: + convexity = hull_area / poly_area + else: + convexity = 1.0 + + # Adjust margin based on bubble shape + if convexity < 0.85: # Speech bubble with tail + # More aggressive shrink for tailed bubbles (avoid the tail) + margin_factor = base_margin - 0.10 + if not getattr(self, 'concise_logs', False): + self._log(f" Speech bubble with tail: {int(margin_factor*100)}% usable area", "debug") + elif convexity > 0.98: # Rectangular/square + # Less shrink for rectangular regions + margin_factor = base_margin + 0.05 + if not getattr(self, 'concise_logs', False): + self._log(f" Rectangular region: {int(margin_factor*100)}% usable area", "debug") + else: # Regular oval bubble + # Use base margin + margin_factor = base_margin + if not getattr(self, 'concise_logs', False): + self._log(f" Regular bubble: {int(margin_factor*100)}% usable area", "debug") + + # Clamp margin factor + margin_factor = max(0.70, min(0.98, margin_factor)) + except Exception: + margin_factor = base_margin + + # Convert vertices to numpy array for boundingRect + vertices_np = np.array(region.vertices, dtype=np.int32) + x, y, w, h = cv2.boundingRect(vertices_np) + + safe_width = int(w * margin_factor) + safe_height = int(h * margin_factor) + safe_x = x + (w - safe_width) // 2 + safe_y = y + (h - safe_height) // 2 + + return safe_x, safe_y, safe_width, safe_height + + def _fit_text_to_region(self, text: str, max_width: int, max_height: int, draw: ImageDraw, region: TextRegion = None, use_as_is: bool = False) -> Tuple[int, List[str]]: + """Find optimal font size using comic-translate's pil_word_wrap algorithm with algorithm-aware adjustments + + Args: + text: Text to fit + max_width: Maximum width available + max_height: Maximum height available + draw: PIL ImageDraw object + region: Optional TextRegion for safe area calculation + use_as_is: If True, use max_width/max_height directly without further shrinking + """ + + # Get font sizing settings + try: + manga_settings = self.main_gui.config.get('manga_settings', {}) + font_sizing = manga_settings.get('font_sizing', {}) + font_algorithm = font_sizing.get('algorithm', 'smart') + prefer_larger = font_sizing.get('prefer_larger', True) + except Exception: + font_algorithm = 'smart' + prefer_larger = True + + # Get usable area + if use_as_is: + # Dimensions are already safe area - use them directly (no double shrinking) + usable_width = max_width + usable_height = max_height + elif region and hasattr(region, 'vertices') and region.vertices: + # Calculate safe area from region + safe_x, safe_y, safe_width, safe_height = self.get_safe_text_area(region) + usable_width = safe_width + usable_height = safe_height + else: + # Fallback: use algorithm-aware margin + if font_algorithm == 'conservative': + margin = 0.85 # Comic-translate default + elif font_algorithm == 'aggressive': + margin = 0.95 + else: # smart + margin = 0.87 + usable_width = int(max_width * margin) + usable_height = int(max_height * margin) + + # Font size limits (GUI settings with algorithm adjustments) + min_font_size = max(10, self.min_readable_size) + + # Adjust initial font size based on algorithm and prefer_larger + base_init = min(40, self.max_font_size_limit) + if font_algorithm == 'aggressive' and prefer_larger: + # Start higher for aggressive mode + init_font_size = min(int(base_init * 1.2), self.max_font_size_limit) + elif font_algorithm == 'conservative': + # Start lower for conservative mode + init_font_size = int(base_init * 0.9) + else: + init_font_size = base_init + + # Use comic-translate's pil_word_wrap algorithm + wrapped_text, final_font_size = self._pil_word_wrap( + text=text, + font_path=self.selected_font_style or self.font_path, + roi_width=usable_width, + roi_height=usable_height, + init_font_size=init_font_size, + min_font_size=min_font_size, + draw=draw + ) + + # Convert wrapped text to lines + lines = wrapped_text.split('\n') if wrapped_text else [text] + + # Log font algorithm used (debug) + if not getattr(self, 'concise_logs', False): + self._log(f" Font algorithm: {font_algorithm}, init_size: {init_font_size}, final_size: {final_font_size}", "debug") + + # Apply multiplier if in multiplier mode + if self.font_size_mode == 'multiplier': + target_size = int(final_font_size * self.font_size_multiplier) + + # Check if multiplied size still fits (if constrained) + if self.constrain_to_bubble: + # Re-wrap at target size to check fit + test_wrapped, _ = self._pil_word_wrap( + text=text, + font_path=self.selected_font_style or self.font_path, + roi_width=usable_width, + roi_height=usable_height, + init_font_size=target_size, + min_font_size=target_size, # Force this size + draw=draw + ) + test_lines = test_wrapped.split('\n') if test_wrapped else [text] + test_height = len(test_lines) * target_size * 1.2 + + if test_height <= usable_height: + final_font_size = target_size + lines = test_lines + else: + self._log(f" Multiplier {self.font_size_multiplier}x would exceed bubble", "debug") + else: + # Not constrained, use multiplied size + final_font_size = target_size + lines = wrapped_text.split('\n') if wrapped_text else [text] + + self._log(f" Font sizing: text_len={len(text)}, size={final_font_size}, lines={len(lines)}", "debug") + + return final_font_size, lines + + def _fit_text_simple_topdown(self, text: str, usable_width: int, usable_height: int, + draw: ImageDraw, min_size: int, max_size: int) -> Tuple[int, List[str]]: + """Simple top-down approach - start large and shrink only if needed""" + # Start from a reasonable large size + start_size = int(max_size * 0.8) + + for font_size in range(start_size, min_size - 1, -2): # Step by 2 for speed + font = self._get_font(font_size) + lines = self._wrap_text(text, font, usable_width, draw) + + line_height = font_size * 1.2 # Tighter for overlaps + total_height = len(lines) * line_height + + if total_height <= usable_height: + return font_size, lines + + # If nothing fits, use minimum + font = self._get_font(min_size) + lines = self._wrap_text(text, font, usable_width, draw) + return min_size, lines + + def _check_potential_overlap(self, region: TextRegion) -> bool: + """Check if this region might overlap with others based on position""" + if not region or not hasattr(region, 'bounding_box'): + return False + + x, y, w, h = region.bounding_box + + # Simple heuristic: small regions or regions at edges might overlap + # You can make this smarter based on your needs + if w < 100 or h < 50: # Small bubbles often overlap + return True + + # Add more overlap detection logic here if needed + # For now, default to no overlap for larger bubbles + return False + + def _wrap_text(self, text: str, font: ImageFont, max_width: int, draw: ImageDraw) -> List[str]: + """Wrap text to fit within max_width with optional strict wrapping""" + # Handle empty text + if not text.strip(): + return [] + + # Only enforce width check if constrain_to_bubble is enabled + if self.constrain_to_bubble and max_width <= 0: + self._log(f" ⚠️ Invalid max_width: {max_width}, using fallback", "warning") + return [text[:20] + "..."] if len(text) > 20 else [text] + + words = text.split() + lines = [] + current_line = [] + + for word in words: + # Check if word alone is too long + word_bbox = draw.textbbox((0, 0), word, font=font) + word_width = word_bbox[2] - word_bbox[0] + + if word_width > max_width and len(word) > 1: + # Word is too long for the bubble + if current_line: + # Save current line first + lines.append(' '.join(current_line)) + current_line = [] + + if self.strict_text_wrapping: + # STRICT MODE: Force break the word to fit within bubble + # This is the original behavior that ensures text stays within bounds + broken_parts = self._force_break_word(word, font, max_width, draw) + lines.extend(broken_parts) + else: + # RELAXED MODE: Keep word whole (may exceed bubble) + lines.append(word) + # self._log(f" ⚠️ Word '{word}' exceeds bubble width, keeping whole", "warning") + else: + # Normal word processing + if current_line: + test_line = ' '.join(current_line + [word]) + else: + test_line = word + + text_bbox = draw.textbbox((0, 0), test_line, font=font) + text_width = text_bbox[2] - text_bbox[0] + + if text_width <= max_width: + current_line.append(word) + else: + if current_line: + lines.append(' '.join(current_line)) + current_line = [word] + else: + # Single word that fits + lines.append(word) + + if current_line: + lines.append(' '.join(current_line)) + + return lines + + # Keep the existing _force_break_word method as is (the complete version from earlier): + def _force_break_word(self, word: str, font: ImageFont, max_width: int, draw: ImageDraw) -> List[str]: + """Force break a word that's too long to fit""" + lines = [] + + # Binary search to find how many characters fit + low = 1 + high = len(word) + chars_that_fit = 1 + + while low <= high: + mid = (low + high) // 2 + test_text = word[:mid] + bbox = draw.textbbox((0, 0), test_text, font=font) + width = bbox[2] - bbox[0] + + if width <= max_width: + chars_that_fit = mid + low = mid + 1 + else: + high = mid - 1 + + # Break the word into pieces + remaining = word + while remaining: + if len(remaining) <= chars_that_fit: + # Last piece + lines.append(remaining) + break + else: + # Find the best break point + break_at = chars_that_fit + + # Try to break at a more natural point if possible + # Look for vowel-consonant boundaries for better hyphenation + for i in range(min(chars_that_fit, len(remaining) - 1), max(1, chars_that_fit - 5), -1): + if i < len(remaining) - 1: + current_char = remaining[i].lower() + next_char = remaining[i + 1].lower() + + # Good hyphenation points: + # - Between consonant and vowel + # - After prefix (un-, re-, pre-, etc.) + # - Before suffix (-ing, -ed, -er, etc.) + if (current_char in 'bcdfghjklmnpqrstvwxyz' and next_char in 'aeiou') or \ + (current_char in 'aeiou' and next_char in 'bcdfghjklmnpqrstvwxyz'): + break_at = i + 1 + break + + # Add hyphen if we're breaking in the middle of a word + if break_at < len(remaining): + # Check if adding hyphen still fits + test_with_hyphen = remaining[:break_at] + '-' + bbox = draw.textbbox((0, 0), test_with_hyphen, font=font) + width = bbox[2] - bbox[0] + + if width <= max_width: + lines.append(remaining[:break_at] + '-') + else: + # Hyphen doesn't fit, break without it + lines.append(remaining[:break_at]) + else: + lines.append(remaining[:break_at]) + + remaining = remaining[break_at:] + + return lines + + def _estimate_font_size_for_region(self, region: TextRegion) -> int: + """Estimate the likely font size for a text region based on its dimensions and text content""" + x, y, w, h = region.bounding_box + text_length = len(region.text.strip()) + + if text_length == 0: + return self.max_font_size // 2 # Default middle size + + # Calculate area per character + area = w * h + area_per_char = area / text_length + + # Estimate font size based on area per character + # These ratios are approximate and based on typical manga text + if area_per_char > 800: + estimated_size = int(self.max_font_size * 0.8) + elif area_per_char > 400: + estimated_size = int(self.max_font_size * 0.6) + elif area_per_char > 200: + estimated_size = int(self.max_font_size * 0.4) + elif area_per_char > 100: + estimated_size = int(self.max_font_size * 0.3) + else: + estimated_size = int(self.max_font_size * 0.2) + + # Clamp to reasonable bounds + return max(self.min_font_size, min(estimated_size, self.max_font_size)) + + + def _split_bubble_if_needed(self, bubble_regions: List[TextRegion]) -> List[List[TextRegion]]: + """Split a detected bubble if it actually contains multiple separate speech bubbles + + This happens when RT-DETR detects one large bounding box over vertically or + horizontally stacked speech bubbles. We detect this by checking if text regions + within the bubble have LARGE gaps between them. + + For manga-ocr and other non-Google/Azure OCR providers, RT-DETR detection is trusted + completely and splitting is disabled. + + Returns: + List of region groups - each group represents a separate bubble + """ + # For manga-ocr and other providers that use RT-DETR regions directly, trust RT-DETR + # Splitting is only needed for Google/Azure which do full-page OCR + if hasattr(self, 'ocr_provider') and self.ocr_provider not in ('google', 'azure'): + return [bubble_regions] # Trust RT-DETR completely for these providers + + if len(bubble_regions) <= 1: + return [bubble_regions] # Single region, no splitting needed + + # Sort regions by position (top-to-bottom, left-to-right) + sorted_regions = sorted(bubble_regions, key=lambda r: (r.bounding_box[1], r.bounding_box[0])) + + # Group regions that should be together + groups = [[sorted_regions[0]]] + + for i in range(1, len(sorted_regions)): + current_region = sorted_regions[i] + cx, cy, cw, ch = current_region.bounding_box + placed = False + + # Try to place in an existing group + for group in groups: + # Check if current region should be in this group + # We look at the closest region in the group + min_gap = float('inf') + min_vertical_gap = float('inf') + min_horizontal_gap = float('inf') + closest_region = None + + for group_region in group: + gx, gy, gw, gh = group_region.bounding_box + + # Calculate gap between regions + horizontal_gap = 0 + if gx + gw < cx: + horizontal_gap = cx - (gx + gw) + elif cx + cw < gx: + horizontal_gap = gx - (cx + cw) + + vertical_gap = 0 + if gy + gh < cy: + vertical_gap = cy - (gy + gh) + elif cy + ch < gy: + vertical_gap = gy - (cy + ch) + + # Use Euclidean distance as overall gap measure + gap = (horizontal_gap ** 2 + vertical_gap ** 2) ** 0.5 + + if gap < min_gap: + min_gap = gap + closest_region = group_region + # Store individual gaps for aggressive vertical splitting + min_vertical_gap = vertical_gap + min_horizontal_gap = horizontal_gap + + # AGGRESSIVE SPLIT for MANGA: Check for large vertical gaps first + # Manga often has vertically stacked speech bubbles that RT-DETR detects as one + if closest_region and min_vertical_gap > 50: + # Large vertical gap (>50px) - likely separate bubbles stacked vertically + # Check if there's NO vertical overlap (completely separate) + gx, gy, gw, gh = closest_region.bounding_box + vertical_overlap = min(gy + gh, cy + ch) - max(gy, cy) + + if vertical_overlap <= 0: + # No vertical overlap at all - definitely separate bubbles + # Create new group (don't merge) + pass # Will create new group below + else: + # Some overlap despite gap - check other criteria + horizontal_overlap = min(gx + gw, cx + cw) - max(gx, cx) + min_width = min(gw, cw) + min_height = min(gh, ch) + + # Only merge if there's very strong overlap (>75%) + if (horizontal_overlap > min_width * 0.75 or + vertical_overlap > min_height * 0.75): + group.append(current_region) + placed = True + break + # BALANCED SPLIT CRITERIA: + # Split if gap is > 21px unless there's strong overlap (>62%) + elif closest_region and min_gap < 21: # Within 21px - likely same bubble + group.append(current_region) + placed = True + break + elif closest_region: + # Check if they have significant overlap despite the gap + gx, gy, gw, gh = closest_region.bounding_box + + horizontal_overlap = min(gx + gw, cx + cw) - max(gx, cx) + vertical_overlap = min(gy + gh, cy + ch) - max(gy, cy) + + min_width = min(gw, cw) + min_height = min(gh, ch) + + # If they have strong overlap (>62%) in either direction, keep together + if (horizontal_overlap > min_width * 0.62 or + vertical_overlap > min_height * 0.62): + group.append(current_region) + placed = True + break + + # If not placed in any existing group, create a new group + if not placed: + groups.append([current_region]) + + # Log if we split the bubble + if len(groups) > 1: + self._log(f" 🔪 SPLIT: Detected bubble actually contains {len(groups)} separate bubbles", "warning") + for idx, group in enumerate(groups): + group_texts = [r.text[:15] + '...' for r in group] + self._log(f" Sub-bubble {idx + 1}: {len(group)} regions - {group_texts}", "info") + + return groups + + def _likely_different_bubbles(self, region1: TextRegion, region2: TextRegion) -> bool: + """Detect if regions are likely in different speech bubbles based on spatial patterns""" + x1, y1, w1, h1 = region1.bounding_box + x2, y2, w2, h2 = region2.bounding_box + + # Calculate gaps and positions + horizontal_gap = 0 + if x1 + w1 < x2: + horizontal_gap = x2 - (x1 + w1) + elif x2 + w2 < x1: + horizontal_gap = x1 - (x2 + w2) + + vertical_gap = 0 + if y1 + h1 < y2: + vertical_gap = y2 - (y1 + h1) + elif y2 + h2 < y1: + vertical_gap = y1 - (y2 + h2) + + # Calculate relative positions + center_x1 = x1 + w1 / 2 + center_x2 = x2 + w2 / 2 + center_y1 = y1 + h1 / 2 + center_y2 = y2 + h2 / 2 + + horizontal_center_diff = abs(center_x1 - center_x2) + avg_width = (w1 + w2) / 2 + + # FIRST CHECK: Very small gaps always indicate same bubble + if horizontal_gap < 15 and vertical_gap < 15: + return False # Definitely same bubble + + # STRICTER CHECK: For regions that are horizontally far apart + # Even if they pass the gap threshold, check if they're likely different bubbles + if horizontal_gap > 40: # Significant horizontal gap + # Unless they're VERY well aligned vertically, they're different bubbles + vertical_overlap = min(y1 + h1, y2 + h2) - max(y1, y2) + min_height = min(h1, h2) + + if vertical_overlap < min_height * 0.8: # Need 80% overlap to be same bubble + return True + + # SPECIFIC FIX: Check for multi-line text pattern + # If regions are well-aligned horizontally, they're likely in the same bubble + if horizontal_center_diff < avg_width * 0.35: # Relaxed from 0.2 to 0.35 + # Additional checks for multi-line text: + # 1. Similar widths (common in speech bubbles) + width_ratio = max(w1, w2) / min(w1, w2) if min(w1, w2) > 0 else 999 + + # 2. Reasonable vertical spacing (not too far apart) + avg_height = (h1 + h2) / 2 + + if width_ratio < 2.0 and vertical_gap < avg_height * 1.5: + # This is very likely multi-line text in the same bubble + return False + + # Pattern 1: Side-by-side bubbles (common in manga) + # Characteristics: Significant horizontal gap, similar vertical position + if horizontal_gap > 50: # Increased from 25 to avoid false positives + vertical_overlap = min(y1 + h1, y2 + h2) - max(y1, y2) + min_height = min(h1, h2) + + # If they have good vertical overlap, they're likely side-by-side bubbles + if vertical_overlap > min_height * 0.5: + return True + + # Pattern 2: Stacked bubbles + # Characteristics: Significant vertical gap, similar horizontal position + # CRITICAL: Lower threshold to catch vertically stacked bubbles in manga + if vertical_gap > 15: # Reduced from 25 to catch closer stacked bubbles + horizontal_overlap = min(x1 + w1, x2 + w2) - max(x1, x2) + min_width = min(w1, w2) + + # If they have good horizontal overlap, they're likely stacked bubbles + if horizontal_overlap > min_width * 0.5: + return True + + # Pattern 3: Diagonal arrangement (different speakers) + # If regions are separated both horizontally and vertically + if horizontal_gap > 20 and vertical_gap > 20: + return True + + # Pattern 4: Large gap relative to region size + avg_height = (h1 + h2) / 2 + + if horizontal_gap > avg_width * 0.6 or vertical_gap > avg_height * 0.6: + return True + + return False + + def _regions_should_merge(self, region1: TextRegion, region2: TextRegion, threshold: int = 50) -> bool: + """Determine if two regions should be merged - with bubble detection""" + + # First check if they're close enough spatially + if not self._regions_are_nearby(region1, region2, threshold): + return False + + x1, y1, w1, h1 = region1.bounding_box + x2, y2, w2, h2 = region2.bounding_box + + # ONLY apply special handling if regions are from Azure + if hasattr(region1, 'from_azure') and region1.from_azure: + # Azure lines are typically small - be more lenient + avg_height = (h1 + h2) / 2 + if avg_height < 50: # Likely single lines + self._log(f" Azure lines detected, using lenient merge criteria", "info") + + center_x1 = x1 + w1 / 2 + center_x2 = x2 + w2 / 2 + horizontal_center_diff = abs(center_x1 - center_x2) + avg_width = (w1 + w2) / 2 + + # If horizontally aligned and nearby, merge them + if horizontal_center_diff < avg_width * 0.7: + return True + + # GOOGLE LOGIC - unchanged from your original + # SPECIAL CASE: If one region is very small, bypass strict checks + area1 = w1 * h1 + area2 = w2 * h2 + if area1 < 500 or area2 < 500: + self._log(f" Small text region (area: {min(area1, area2)}), bypassing strict alignment checks", "info") + return True + + # Calculate actual gaps between regions + horizontal_gap = 0 + if x1 + w1 < x2: + horizontal_gap = x2 - (x1 + w1) + elif x2 + w2 < x1: + horizontal_gap = x1 - (x2 + w2) + + vertical_gap = 0 + if y1 + h1 < y2: + vertical_gap = y2 - (y1 + h1) + elif y2 + h2 < y1: + vertical_gap = y1 - (y2 + h2) + + # Calculate centers for alignment checks + center_x1 = x1 + w1 / 2 + center_x2 = x2 + w2 / 2 + center_y1 = y1 + h1 / 2 + center_y2 = y2 + h2 / 2 + + horizontal_center_diff = abs(center_x1 - center_x2) + vertical_center_diff = abs(center_y1 - center_y2) + + avg_width = (w1 + w2) / 2 + avg_height = (h1 + h2) / 2 + + # Determine text orientation and layout + is_horizontal_text = horizontal_gap > vertical_gap or (horizontal_center_diff < avg_width * 0.5) + is_vertical_text = vertical_gap > horizontal_gap or (vertical_center_diff < avg_height * 0.5) + + # PRELIMINARY CHECK: If regions overlap or are extremely close, merge them + # This handles text that's clearly in the same bubble + + # Check for overlap + overlap_x = max(0, min(x1 + w1, x2 + w2) - max(x1, x2)) + overlap_y = max(0, min(y1 + h1, y2 + h2) - max(y1, y2)) + has_overlap = overlap_x > 0 and overlap_y > 0 + + if has_overlap: + self._log(f" Regions overlap - definitely same bubble, merging", "info") + return True + + # If gaps are tiny (< 10 pixels), merge regardless of other factors + if horizontal_gap < 10 and vertical_gap < 10: + self._log(f" Very small gaps ({horizontal_gap}, {vertical_gap}) - merging", "info") + return True + + # BUBBLE BOUNDARY CHECK: Use spatial patterns to detect different bubbles + # But be less aggressive if gaps are small + # CRITICAL: Reduced threshold to allow bubble boundary detection for stacked bubbles + if horizontal_gap < 12 and vertical_gap < 12: + # Very close regions are almost certainly in the same bubble + self._log(f" Regions very close, skipping bubble boundary check", "info") + elif self._likely_different_bubbles(region1, region2): + self._log(f" Regions likely in different speech bubbles", "info") + return False + + # CHECK 1: For well-aligned text with small gaps, merge immediately + # This catches multi-line text in the same bubble + if is_horizontal_text and vertical_center_diff < avg_height * 0.4: + # Horizontal text that's well-aligned vertically + if horizontal_gap <= threshold and vertical_gap <= threshold * 0.5: + self._log(f" Well-aligned horizontal text with acceptable gaps, merging", "info") + return True + + if is_vertical_text and horizontal_center_diff < avg_width * 0.4: + # Vertical text that's well-aligned horizontally + if vertical_gap <= threshold and horizontal_gap <= threshold * 0.5: + self._log(f" Well-aligned vertical text with acceptable gaps, merging", "info") + return True + + # ADDITIONAL CHECK: Multi-line text in speech bubbles + # Even if not perfectly aligned, check for typical multi-line patterns + if horizontal_center_diff < avg_width * 0.5 and vertical_gap <= threshold: + # Lines that are reasonably centered and within threshold should merge + self._log(f" Multi-line text pattern detected, merging", "info") + return True + + # CHECK 2: Check alignment quality + # Poor alignment often indicates different bubbles + if is_horizontal_text: + # For horizontal text, check vertical alignment + if vertical_center_diff > avg_height * 0.6: + self._log(f" Poor vertical alignment for horizontal text", "info") + return False + elif is_vertical_text: + # For vertical text, check horizontal alignment + if horizontal_center_diff > avg_width * 0.6: + self._log(f" Poor horizontal alignment for vertical text", "info") + return False + + # CHECK 3: Font size check (but be reasonable) + font_size1 = self._estimate_font_size_for_region(region1) + font_size2 = self._estimate_font_size_for_region(region2) + size_ratio = max(font_size1, font_size2) / max(min(font_size1, font_size2), 1) + + # Allow some variation for emphasis or stylistic choices + if size_ratio > 2.0: + self._log(f" Font sizes too different ({font_size1} vs {font_size2})", "info") + return False + + # CHECK 4: Final sanity check on merged area + merged_width = max(x1 + w1, x2 + w2) - min(x1, x2) + merged_height = max(y1 + h1, y2 + h2) - min(y1, y2) + merged_area = merged_width * merged_height + combined_area = (w1 * h1) + (w2 * h2) + + # If merged area is way larger than combined areas, they're probably far apart + if merged_area > combined_area * 2.5: + self._log(f" Merged area indicates regions are too far apart", "info") + return False + + # If we get here, apply standard threshold checks + if horizontal_gap <= threshold and vertical_gap <= threshold: + self._log(f" Standard threshold check passed, merging", "info") + return True + + self._log(f" No merge conditions met", "info") + return False + + def _merge_nearby_regions(self, regions: List[TextRegion], threshold: int = 50) -> List[TextRegion]: + """Merge text regions that are likely part of the same speech bubble - with debug logging""" + if len(regions) <= 1: + return regions + + self._log(f"\n=== MERGE DEBUG: Starting merge analysis ===", "info") + self._log(f" Total regions: {len(regions)}", "info") + self._log(f" Threshold: {threshold}px", "info") + + # First, let's log what regions we have + for i, region in enumerate(regions): + x, y, w, h = region.bounding_box + self._log(f" Region {i}: pos({x},{y}) size({w}x{h}) text='{region.text[:20]}...'", "info") + + # Sort regions by area (largest first) to handle contained regions properly + sorted_indices = sorted(range(len(regions)), + key=lambda i: regions[i].bounding_box[2] * regions[i].bounding_box[3], + reverse=True) + + merged = [] + used = set() + + # Process each region in order of size (largest first) + for idx in sorted_indices: + i = idx + if i in used: + continue + + region1 = regions[i] + + # Start with this region + merged_text = region1.text + merged_vertices = list(region1.vertices) if hasattr(region1, 'vertices') else [] + regions_merged = [i] # Track which regions were merged + + self._log(f"\n Checking region {i} for merges:", "info") + + # Check against all other unused regions + for j in range(len(regions)): + if j == i or j in used: + continue + + region2 = regions[j] + self._log(f" Testing merge with region {j}:", "info") + + # Check if region2 is contained within region1 + x1, y1, w1, h1 = region1.bounding_box + x2, y2, w2, h2 = region2.bounding_box + + # Check if region2 is fully contained within region1 + if (x2 >= x1 and y2 >= y1 and + x2 + w2 <= x1 + w1 and y2 + h2 <= y1 + h1): + self._log(f" ✓ Region {j} is INSIDE region {i} - merging!", "success") + merged_text += " " + region2.text + if hasattr(region2, 'vertices'): + merged_vertices.extend(region2.vertices) + used.add(j) + regions_merged.append(j) + continue + + # Check if region1 is contained within region2 (shouldn't happen due to sorting, but be safe) + if (x1 >= x2 and y1 >= y2 and + x1 + w1 <= x2 + w2 and y1 + h1 <= y2 + h2): + self._log(f" ✓ Region {i} is INSIDE region {j} - merging!", "success") + merged_text += " " + region2.text + if hasattr(region2, 'vertices'): + merged_vertices.extend(region2.vertices) + used.add(j) + regions_merged.append(j) + # Update region1's bounding box to the larger region + region1 = TextRegion( + text=merged_text, + vertices=merged_vertices, + bounding_box=region2.bounding_box, + confidence=region1.confidence, + region_type='temp_merge' + ) + continue + + # FIX: Always check proximity against ORIGINAL regions, not the expanded one + # This prevents cascade merging across bubble boundaries + if self._regions_are_nearby(regions[i], region2, threshold): # Use regions[i] not region1 + #self._log(f" ✓ Regions are nearby", "info") + + # Then check if they should merge (also use original region) + if self._regions_should_merge(regions[i], region2, threshold): # Use regions[i] not region1 + #self._log(f" ✓ Regions should merge!", "success") + + # Actually perform the merge + merged_text += " " + region2.text + if hasattr(region2, 'vertices'): + merged_vertices.extend(region2.vertices) + used.add(j) + regions_merged.append(j) + + # DON'T update region1 for proximity checks - keep using original regions + else: + self._log(f" ✗ Regions should not merge", "warning") + else: + self._log(f" ✗ Regions not nearby", "warning") + + # Log if we merged multiple regions + if len(regions_merged) > 1: + self._log(f" ✅ MERGED regions {regions_merged} into one bubble", "success") + else: + self._log(f" ℹ️ Region {i} not merged with any other", "info") + + # Create final merged region with all the merged vertices + if merged_vertices: + xs = [v[0] for v in merged_vertices] + ys = [v[1] for v in merged_vertices] + else: + # Fallback: calculate from all merged regions + all_xs = [] + all_ys = [] + for idx in regions_merged: + x, y, w, h = regions[idx].bounding_box + all_xs.extend([x, x + w]) + all_ys.extend([y, y + h]) + xs = all_xs + ys = all_ys + + min_x, max_x = min(xs), max(xs) + min_y, max_y = min(ys), max(ys) + merged_bbox = (min_x, min_y, max_x - min_x, max_y - min_y) + + merged_region = TextRegion( + text=merged_text, + vertices=merged_vertices, + bounding_box=merged_bbox, + confidence=regions[i].confidence, + region_type='merged_text_block' if len(regions_merged) > 1 else regions[i].region_type + ) + + # Copy over any additional attributes + if hasattr(regions[i], 'translated_text'): + merged_region.translated_text = regions[i].translated_text + + merged.append(merged_region) + used.add(i) + + self._log(f"\n=== MERGE DEBUG: Complete ===", "info") + self._log(f" Final region count: {len(merged)} (was {len(regions)})", "info") + + # Verify the merge worked + if len(merged) == len(regions): + self._log(f" ⚠️ WARNING: No regions were actually merged!", "warning") + + return merged + + def _regions_are_nearby(self, region1: TextRegion, region2: TextRegion, threshold: int = 50) -> bool: + """Check if two regions are close enough to be in the same bubble - WITH DEBUG""" + x1, y1, w1, h1 = region1.bounding_box + x2, y2, w2, h2 = region2.bounding_box + + #self._log(f"\n === NEARBY CHECK DEBUG ===", "info") + #self._log(f" Region 1: pos({x1},{y1}) size({w1}x{h1})", "info") + #self._log(f" Region 2: pos({x2},{y2}) size({w2}x{h2})", "info") + #self._log(f" Threshold: {threshold}", "info") + + # Calculate gaps between closest edges + horizontal_gap = 0 + if x1 + w1 < x2: # region1 is to the left + horizontal_gap = x2 - (x1 + w1) + elif x2 + w2 < x1: # region2 is to the left + horizontal_gap = x1 - (x2 + w2) + + vertical_gap = 0 + if y1 + h1 < y2: # region1 is above + vertical_gap = y2 - (y1 + h1) + elif y2 + h2 < y1: # region2 is above + vertical_gap = y1 - (y2 + h2) + + #self._log(f" Horizontal gap: {horizontal_gap}", "info") + #self._log(f" Vertical gap: {vertical_gap}", "info") + + # Detect if regions are likely vertical text based on aspect ratio + aspect1 = w1 / max(h1, 1) + aspect2 = w2 / max(h2, 1) + + # More permissive vertical text detection + # Vertical text typically has aspect ratio < 1.0 (taller than wide) + is_vertical_text = (aspect1 < 1.0 and aspect2 < 1.0) or (aspect1 < 0.5 or aspect2 < 0.5) + + # Also check if text is arranged vertically (one above the other with minimal horizontal offset) + center_x1 = x1 + w1 / 2 + center_x2 = x2 + w2 / 2 + horizontal_center_diff = abs(center_x1 - center_x2) + avg_width = (w1 + w2) / 2 + + # If regions are vertically stacked with aligned centers, treat as vertical text + is_vertically_stacked = (horizontal_center_diff < avg_width * 1.5) and (vertical_gap >= 0) + + #self._log(f" Is vertical text: {is_vertical_text}", "info") + #self._log(f" Is vertically stacked: {is_vertically_stacked}", "info") + #self._log(f" Horizontal center diff: {horizontal_center_diff:.1f}", "info") + + # SIMPLE APPROACH: Just check if gaps are within threshold + # Don't overthink it + if horizontal_gap <= threshold and vertical_gap <= threshold: + #self._log(f" ✅ NEARBY: Both gaps within threshold", "success") + return True + + # SPECIAL CASE: Vertically stacked text with good alignment + # This is specifically for multi-line text in bubbles + if horizontal_center_diff < avg_width * 0.8 and vertical_gap <= threshold * 1.5: + #self._log(f" ✅ NEARBY: Vertically aligned text in same bubble", "success") + return True + + # If one gap is small and the other is slightly over, still consider nearby + if (horizontal_gap <= threshold * 0.5 and vertical_gap <= threshold * 1.5) or \ + (vertical_gap <= threshold * 0.5 and horizontal_gap <= threshold * 1.5): + #self._log(f" ✅ NEARBY: One small gap, other slightly over", "success") + return True + + # Special case: Wide bubbles with text on sides + # If regions are at nearly the same vertical position, they might be in a wide bubble + if abs(y1 - y2) < 10: # Nearly same vertical position + # Check if this could be a wide bubble spanning both regions + if horizontal_gap <= threshold * 3: # Allow up to 3x threshold for wide bubbles + #self._log(f" ✅ NEARBY: Same vertical level, possibly wide bubble", "success") + return True + + #self._log(f" ❌ NOT NEARBY: Gaps exceed threshold", "warning") + return False + + def _find_font(self) -> str: + """Find a suitable font for text rendering""" + font_candidates = [ + "C:/Windows/Fonts/comicbd.ttf", # Comic Sans MS Bold as first choice + "C:/Windows/Fonts/arial.ttf", + "C:/Windows/Fonts/calibri.ttf", + "C:/Windows/Fonts/tahoma.ttf", + "/System/Library/Fonts/Helvetica.ttc", + "/usr/share/fonts/truetype/liberation/LiberationSans-Regular.ttf", + "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf" + ] + + for font_path in font_candidates: + if os.path.exists(font_path): + return font_path + + return None # Will use default font + + def _get_singleton_bubble_detector(self): + """Get or initialize the singleton bubble detector instance with load coordination.""" + start_time = None + with MangaTranslator._singleton_lock: + if MangaTranslator._singleton_bubble_detector is not None: + self._log("🤖 Using bubble detector (already loaded)", "info") + MangaTranslator._singleton_refs += 1 + return MangaTranslator._singleton_bubble_detector + # If another thread is loading, wait for it + if MangaTranslator._singleton_bd_loading: + self._log("⏳ Waiting for bubble detector to finish loading (singleton)", "debug") + evt = MangaTranslator._singleton_bd_event + # Drop the lock while waiting + pass + else: + # Mark as loading and proceed to load outside lock + MangaTranslator._singleton_bd_loading = True + MangaTranslator._singleton_bd_event.clear() + start_time = time.time() + # Release lock and perform heavy load + pass + # Outside the lock: perform load or wait + if start_time is None: + # We are a waiter + try: + MangaTranslator._singleton_bd_event.wait(timeout=300) + except Exception: + pass + with MangaTranslator._singleton_lock: + if MangaTranslator._singleton_bubble_detector is not None: + MangaTranslator._singleton_refs += 1 + return MangaTranslator._singleton_bubble_detector + else: + # We are the loader + try: + from bubble_detector import BubbleDetector + bd = None + + # First, try to get a preloaded detector from the pool + try: + ocr_settings = self.main_gui.config.get('manga_settings', {}).get('ocr', {}) if hasattr(self, 'main_gui') else {} + det_type = ocr_settings.get('detector_type', 'rtdetr_onnx') + model_id = ocr_settings.get('rtdetr_model_url') or ocr_settings.get('bubble_model_path') or '' + key = (det_type, model_id) + self._log(f"[DEBUG] Looking for detector in pool with key: {key}", "debug") + with MangaTranslator._detector_pool_lock: + self._log(f"[DEBUG] Pool keys available: {list(MangaTranslator._detector_pool.keys())}", "debug") + rec = MangaTranslator._detector_pool.get(key) + if rec and isinstance(rec, dict): + spares = rec.get('spares') or [] + self._log(f"[DEBUG] Found pool record with {len(spares)} spares", "debug") + # For singleton mode, we can use a pool instance without checking it out + # since the singleton will keep it loaded permanently + if spares: + # Just use the first spare (don't pop or check out) + # Singleton will keep it loaded, pool can still track it + bd = spares[0] + self._log(f"🤖 Using pool bubble detector for singleton (no check-out needed)", "info") + else: + self._log(f"[DEBUG] No pool record found for key: {key}", "debug") + except Exception as e: + self._log(f"Could not fetch preloaded detector: {e}", "debug") + + # If no preloaded detector, create a new one + if bd is None: + bd = BubbleDetector() + self._log("🤖 Created new bubble detector instance", "info") + + # Optionally: defer model load until first actual call inside BD; keeping instance resident + with MangaTranslator._singleton_lock: + MangaTranslator._singleton_bubble_detector = bd + MangaTranslator._singleton_refs += 1 + MangaTranslator._singleton_bd_loading = False + try: + MangaTranslator._singleton_bd_event.set() + except Exception: + pass + elapsed = time.time() - start_time + self._log(f"🤖 Singleton bubble detector ready (took {elapsed:.2f}s)", "info") + return bd + except Exception as e: + with MangaTranslator._singleton_lock: + MangaTranslator._singleton_bd_loading = False + try: + MangaTranslator._singleton_bd_event.set() + except Exception: + pass + self._log(f"Failed to create singleton bubble detector: {e}", "error") + return None + + def _initialize_singleton_local_inpainter(self): + """Initialize singleton local inpainter instance""" + with MangaTranslator._singleton_lock: + was_existing = MangaTranslator._singleton_local_inpainter is not None + if MangaTranslator._singleton_local_inpainter is None: + try: + from local_inpainter import LocalInpainter + local_method = self.manga_settings.get('inpainting', {}).get('local_method', 'anime') + # LocalInpainter only accepts config_path, not method + MangaTranslator._singleton_local_inpainter = LocalInpainter() + # Now load the model with the specified method + if local_method: + # Try to load the model + model_path = self.manga_settings.get('inpainting', {}).get('local_model_path') + if not model_path: + # Try to download if no path specified + try: + model_path = MangaTranslator._singleton_local_inpainter.download_jit_model(local_method) + except Exception as e: + self._log(f"⚠️ Failed to download model for {local_method}: {e}", "warning") + + if model_path and os.path.exists(model_path): + success = MangaTranslator._singleton_local_inpainter.load_model_with_retry(local_method, model_path) + if success: + self._log(f"🎨 Created singleton local inpainter with {local_method} model", "info") + else: + self._log(f"⚠️ Failed to load {local_method} model", "warning") + else: + self._log(f"🎨 Created singleton local inpainter (no model loaded yet)", "info") + else: + self._log(f"🎨 Created singleton local inpainter (default)", "info") + except Exception as e: + self._log(f"Failed to create singleton local inpainter: {e}", "error") + return + # Use the singleton instance + self.local_inpainter = MangaTranslator._singleton_local_inpainter + self.inpainter = self.local_inpainter + MangaTranslator._singleton_refs += 1 + if was_existing: + self._log("🎨 Using local inpainter (already loaded)", "info") + + def _get_thread_bubble_detector(self): + """Get or initialize bubble detector (singleton or thread-local based on settings). + Will consume a preloaded detector if available for current settings. + """ + if getattr(self, 'use_singleton_bubble_detector', False) or (hasattr(self, 'use_singleton_models') and self.use_singleton_models): + # Use singleton instance (preferred) + if self.bubble_detector is None: + self.bubble_detector = self._get_singleton_bubble_detector() + return self.bubble_detector + else: + # Use thread-local instance (original behavior for parallel processing) + if not hasattr(self, '_thread_local') or getattr(self, '_thread_local', None) is None: + self._thread_local = threading.local() + if not hasattr(self._thread_local, 'bubble_detector') or self._thread_local.bubble_detector is None: + from bubble_detector import BubbleDetector + # Try to check out a preloaded spare for the current detector settings + try: + ocr_settings = self.main_gui.config.get('manga_settings', {}).get('ocr', {}) if hasattr(self, 'main_gui') else {} + det_type = ocr_settings.get('detector_type', 'rtdetr_onnx') + model_id = ocr_settings.get('rtdetr_model_url') or ocr_settings.get('bubble_model_path') or '' + key = (det_type, model_id) + with MangaTranslator._detector_pool_lock: + rec = MangaTranslator._detector_pool.get(key) + if rec and isinstance(rec, dict): + spares = rec.get('spares') or [] + # Initialize checked_out list if it doesn't exist + if 'checked_out' not in rec: + rec['checked_out'] = [] + checked_out = rec['checked_out'] + + # Look for an available spare (not checked out) + if spares: + for spare in spares: + if spare not in checked_out and spare: + # Check out this spare instance + checked_out.append(spare) + self._thread_local.bubble_detector = spare + # Store references for later return + self._checked_out_bubble_detector = spare + self._bubble_detector_pool_key = key + self._log(f"🤖 Checked out bubble detector from pool ({len(checked_out)}/{len(spares)} in use)", "info") + break + except Exception: + pass + # If still not set, create a fresh detector and store it for future use + if not hasattr(self._thread_local, 'bubble_detector') or self._thread_local.bubble_detector is None: + self._thread_local.bubble_detector = BubbleDetector() + self._log("🤖 Created thread-local bubble detector", "debug") + + # Store this new detector in the pool for future reuse + try: + with MangaTranslator._detector_pool_lock: + if key not in MangaTranslator._detector_pool: + MangaTranslator._detector_pool[key] = {'spares': [], 'checked_out': []} + # Add this new detector to spares and immediately check it out + rec = MangaTranslator._detector_pool[key] + if 'spares' not in rec: + rec['spares'] = [] + if 'checked_out' not in rec: + rec['checked_out'] = [] + rec['spares'].append(self._thread_local.bubble_detector) + rec['checked_out'].append(self._thread_local.bubble_detector) + # Store references for later return + self._checked_out_bubble_detector = self._thread_local.bubble_detector + self._bubble_detector_pool_key = key + except Exception: + pass + return self._thread_local.bubble_detector + + def _get_thread_local_inpainter(self, local_method: str, model_path: str): + """Get or create a LocalInpainter (singleton or thread-local based on settings). + Loads the requested model if needed. + """ + if hasattr(self, 'use_singleton_models') and self.use_singleton_models: + # Use singleton instance + if self.local_inpainter is None: + self._initialize_singleton_local_inpainter() + return self.local_inpainter + + # Use thread-local instance (original behavior for parallel processing) + # Ensure thread-local storage exists and has a dict + tl = getattr(self, '_thread_local', None) + if tl is None: + self._thread_local = threading.local() + tl = self._thread_local + if not hasattr(tl, 'local_inpainters') or getattr(tl, 'local_inpainters', None) is None: + tl.local_inpainters = {} + key = (local_method or 'anime', model_path or '') + if key not in tl.local_inpainters or tl.local_inpainters[key] is None: + # First, try to use a preloaded spare instance from the shared pool + try: + rec = MangaTranslator._inpaint_pool.get(key) + if rec and isinstance(rec, dict): + spares = rec.get('spares') or [] + if spares: + tl.local_inpainters[key] = spares.pop(0) + self._log("🎨 Using preloaded local inpainting instance", "info") + return tl.local_inpainters[key] + # If there's a fully loaded shared instance but no spares, use it as a last resort + if rec.get('loaded') and rec.get('inpainter') is not None: + tl.local_inpainters[key] = rec.get('inpainter') + self._log("🎨 Using shared preloaded inpainting instance", "info") + return tl.local_inpainters[key] + except Exception: + pass + + # No preloaded instance available: create and load thread-local instance + try: + from local_inpainter import LocalInpainter + # Use a per-thread config path to avoid concurrent JSON writes + try: + import tempfile + thread_cfg = os.path.join(tempfile.gettempdir(), f"gl_inpainter_{threading.get_ident()}.json") + except Exception: + thread_cfg = "config_thread_local.json" + inp = LocalInpainter(config_path=thread_cfg) + # Apply tiling settings + tiling_settings = self.manga_settings.get('tiling', {}) if hasattr(self, 'manga_settings') else {} + inp.tiling_enabled = tiling_settings.get('enabled', False) + inp.tile_size = tiling_settings.get('tile_size', 512) + inp.tile_overlap = tiling_settings.get('tile_overlap', 64) + + # Ensure model is available + resolved_model_path = model_path + if not resolved_model_path or not os.path.exists(resolved_model_path): + try: + resolved_model_path = inp.download_jit_model(local_method) + except Exception as e: + self._log(f"⚠️ JIT model download failed for {local_method}: {e}", "warning") + resolved_model_path = None + + # Load model for this thread's instance + if resolved_model_path and os.path.exists(resolved_model_path): + try: + self._log(f"📥 Loading {local_method} inpainting model (thread-local)", "info") + inp.load_model_with_retry(local_method, resolved_model_path, force_reload=False) + except Exception as e: + self._log(f"⚠️ Thread-local inpainter load error: {e}", "warning") + else: + self._log("⚠️ No model path available for thread-local inpainter", "warning") + + # Re-check thread-local and publish ONLY if model loaded successfully + tl2 = getattr(self, '_thread_local', None) + if tl2 is None: + self._thread_local = threading.local() + tl2 = self._thread_local + if not hasattr(tl2, 'local_inpainters') or getattr(tl2, 'local_inpainters', None) is None: + tl2.local_inpainters = {} + if getattr(inp, 'model_loaded', False): + tl2.local_inpainters[key] = inp + + # Store this loaded instance info in the pool for future reuse + try: + with MangaTranslator._inpaint_pool_lock: + if key not in MangaTranslator._inpaint_pool: + MangaTranslator._inpaint_pool[key] = {'inpainter': None, 'loaded': False, 'event': threading.Event(), 'spares': []} + # Mark that we have a loaded instance available + MangaTranslator._inpaint_pool[key]['loaded'] = True + MangaTranslator._inpaint_pool[key]['inpainter'] = inp # Store reference + if MangaTranslator._inpaint_pool[key].get('event'): + MangaTranslator._inpaint_pool[key]['event'].set() + except Exception: + pass + else: + # Ensure future calls will attempt a fresh init instead of using a half-initialized instance + tl2.local_inpainters[key] = None + except Exception as e: + self._log(f"❌ Failed to create thread-local inpainter: {e}", "error") + try: + tl3 = getattr(self, '_thread_local', None) + if tl3 is None: + self._thread_local = threading.local() + tl3 = self._thread_local + if not hasattr(tl3, 'local_inpainters') or getattr(tl3, 'local_inpainters', None) is None: + tl3.local_inpainters = {} + tl3.local_inpainters[key] = None + except Exception: + pass + return getattr(self._thread_local, 'local_inpainters', {}).get(key) + + def translate_regions(self, regions: List[TextRegion], image_path: str) -> List[TextRegion]: + """Translate all text regions with API delay""" + self._log(f"\n📝 Translating {len(regions)} text regions...") + + # Check stop before even starting + if self._check_stop(): + self._log(f"\n⏹️ Translation stopped before processing any regions", "warning") + return regions + + # Check if parallel processing OR batch translation is enabled + parallel_enabled = self.manga_settings.get('advanced', {}).get('parallel_processing', False) + batch_enabled = getattr(self, 'batch_mode', False) + max_workers = self.manga_settings.get('advanced', {}).get('max_workers', 4) + + # Batch translation (parallel API calls) should work independently of parallel processing + if batch_enabled: + max_workers = getattr(self, 'batch_size', max_workers) + self._log(f"📦 Using BATCH TRANSLATION with {max_workers} concurrent API calls") + return self._translate_regions_parallel(regions, image_path, max_workers) + elif parallel_enabled and len(regions) > 1: + self._log(f"🚀 Using PARALLEL processing with {max_workers} workers") + return self._translate_regions_parallel(regions, image_path, max_workers) + else: + # SEQUENTIAL CODE + for i, region in enumerate(regions): + if self._check_stop(): + self._log(f"\n⏹️ Translation stopped by user after {i}/{len(regions)} regions", "warning") + break + if region.text.strip(): + self._log(f"\n[{i+1}/{len(regions)}] Original: {region.text}") + + # Get context for translation + context = self.translation_context[-5:] if self.contextual_enabled else None + + # Translate with image context + translated = self.translate_text( + region.text, + context, + image_path=image_path, + region=region + ) + region.translated_text = translated + + self._log(f"Translated: {translated}") + + # SAVE TO HISTORY HERE + if self.history_manager and self.contextual_enabled and translated: + try: + self.history_manager.append_to_history( + user_content=region.text, + assistant_content=translated, + hist_limit=self.translation_history_limit, + reset_on_limit=not self.rolling_history_enabled, + rolling_window=self.rolling_history_enabled + ) + self._log(f"📚 Saved to history (exchange {i+1})") + except Exception as e: + self._log(f"⚠️ Failed to save history: {e}", "warning") + + # Apply API delay + if i < len(regions) - 1: # Don't delay after last translation + self._log(f"⏳ Waiting {self.api_delay}s before next translation...") + # Check stop flag every 0.1 seconds during delay + for _ in range(int(self.api_delay * 10)): + if self._check_stop(): + self._log(f"\n⏹️ Translation stopped during delay", "warning") + return regions + time.sleep(0.1) + + return regions + + # parallel processing: + + def _wait_for_api_slot(self, min_interval=None, jitter_max=0.25): + """Global, thread-safe front-edge rate limiter for API calls. + Ensures parallel requests are spaced out before dispatch, avoiding tail latency. + """ + import time + import random + import threading + + if min_interval is None: + try: + min_interval = float(getattr(self, "api_delay", 0.0)) + except Exception: + min_interval = 0.0 + if min_interval < 0: + min_interval = 0.0 + + # Lazy init shared state + if not hasattr(self, "_api_rl_lock"): + self._api_rl_lock = threading.Lock() + self._api_next_allowed = 0.0 # monotonic seconds + + while True: + now = time.monotonic() + with self._api_rl_lock: + # If we're allowed now, book the next slot and proceed + if now >= self._api_next_allowed: + jitter = random.uniform(0.0, max(jitter_max, 0.0)) if jitter_max else 0.0 + self._api_next_allowed = now + min_interval + jitter + return + + # Otherwise compute wait time (don’t hold the lock while sleeping) + wait = self._api_next_allowed - now + + # Sleep outside the lock in short increments so stop flags can be honored + if wait > 0: + try: + if self._check_stop(): + return + except Exception: + pass + time.sleep(min(wait, 0.05)) + + def _translate_regions_parallel(self, regions: List[TextRegion], image_path: str, max_workers: int = None) -> List[TextRegion]: + """Translate regions using parallel processing""" + # Get max_workers from settings if not provided + if max_workers is None: + max_workers = self.manga_settings.get('advanced', {}).get('max_workers', 4) + + # Override with API batch size when batch mode is enabled — these are API calls. + try: + if getattr(self, 'batch_mode', False): + bs = int(getattr(self, 'batch_size', 0) or int(os.getenv('BATCH_SIZE', '0'))) + if bs and bs > 0: + max_workers = bs + except Exception: + pass + # Bound to number of regions + max_workers = max(1, min(max_workers, len(regions))) + + # Thread-safe storage for results + results_lock = threading.Lock() + translated_regions = {} + failed_indices = [] + + # Filter out empty regions + valid_regions = [(i, region) for i, region in enumerate(regions) if region.text.strip()] + + if not valid_regions: + return regions + + # Create a thread pool + with ThreadPoolExecutor(max_workers=max_workers) as executor: + # Submit all translation tasks + future_to_data = {} + + for i, region in valid_regions: + # Check for stop signal before submitting + if self._check_stop(): + self._log(f"\n⏹️ Translation stopped before submitting region {i+1}", "warning") + break + + # Submit translation task + future = executor.submit( + self._translate_single_region_parallel, + region, + i, + len(valid_regions), + image_path + ) + future_to_data[future] = (i, region) + + # Process completed translations + completed = 0 + for future in as_completed(future_to_data): + i, region = future_to_data[future] + + # Check for stop signal + if self._check_stop(): + self._log(f"\n⏹️ Translation stopped at {completed}/{len(valid_regions)} completed", "warning") + # Cancel remaining futures + for f in future_to_data: + f.cancel() + break + + try: + translated_text = future.result() + if translated_text: + with results_lock: + translated_regions[i] = translated_text + completed += 1 + self._log(f"✅ [{completed}/{len(valid_regions)}] Completed region {i+1}") + else: + with results_lock: + failed_indices.append(i) + self._log(f"❌ [{completed}/{len(valid_regions)}] Failed region {i+1}", "error") + + except Exception as e: + with results_lock: + failed_indices.append(i) + self._log(f"❌ Error in region {i+1}: {str(e)}", "error") + + # Apply translations back to regions + for i, region in enumerate(regions): + if i in translated_regions: + region.translated_text = translated_regions[i] + + # Report summary + success_count = len(translated_regions) + fail_count = len(failed_indices) + self._log(f"\n📊 Parallel translation complete: {success_count} succeeded, {fail_count} failed") + + return regions + + def reset_for_new_image(self): + """Reset internal state for processing a new image""" + # ============================================================ + # CRITICAL: COMPREHENSIVE CACHE CLEARING FOR NEW IMAGE + # This ensures NO text data leaks between images + # ============================================================ + + # Clear any cached detection results + if hasattr(self, 'last_detection_results'): + del self.last_detection_results + + # FORCE clear OCR ROI cache (main text contamination source) + # THREAD-SAFE: Use lock for parallel panel translation + if hasattr(self, 'ocr_roi_cache'): + with self._cache_lock: + self.ocr_roi_cache.clear() + self._current_image_hash = None + + # Clear OCR manager and ALL provider caches + if hasattr(self, 'ocr_manager') and self.ocr_manager: + if hasattr(self.ocr_manager, 'last_results'): + self.ocr_manager.last_results = None + if hasattr(self.ocr_manager, 'cache'): + self.ocr_manager.cache.clear() + # Clear ALL provider-level caches + if hasattr(self.ocr_manager, 'providers'): + for provider_name, provider in self.ocr_manager.providers.items(): + if hasattr(provider, 'last_results'): + provider.last_results = None + if hasattr(provider, 'cache'): + provider.cache.clear() + + # Clear bubble detector cache + if hasattr(self, 'bubble_detector') and self.bubble_detector: + if hasattr(self.bubble_detector, 'last_detections'): + self.bubble_detector.last_detections = None + if hasattr(self.bubble_detector, 'cache'): + self.bubble_detector.cache.clear() + + # Don't clear translation context if using rolling history + if not self.rolling_history_enabled: + self.translation_context = [] + + # Clear any cached regions + if hasattr(self, '_cached_regions'): + del self._cached_regions + + self._log("🔄 Reset translator state for new image (ALL text caches cleared)", "debug") + + def _translate_single_region_parallel(self, region: TextRegion, index: int, total: int, image_path: str) -> Optional[str]: + """Translate a single region for parallel processing""" + try: + thread_name = threading.current_thread().name + self._log(f"\n[{thread_name}] [{index+1}/{total}] Original: {region.text}") + + # Note: Context is not used in parallel mode to avoid race conditions + # Pass None for context to maintain compatibility with your translate_text method + # Front-edge rate limiting across threads + self._wait_for_api_slot() + + translated = self.translate_text( + region.text, + None, # No context in parallel mode + image_path=image_path, + region=region + ) + + if translated: + self._log(f"[{thread_name}] Translated: {translated}") + return translated + else: + self._log(f"[{thread_name}] Translation failed", "error") + return None + + except Exception as e: + self._log(f"[{thread_name}] Error: {str(e)}", "error") + return None + + + def _is_bubble_detector_loaded(self, ocr_settings: Dict[str, Any]) -> Tuple[bool, str]: + """Check if the configured bubble detector's model is already loaded. + Returns (loaded, detector_type). Safe: does not trigger a load. + """ + try: + bd = self._get_thread_bubble_detector() + except Exception: + return False, ocr_settings.get('detector_type', 'rtdetr_onnx') + det = ocr_settings.get('detector_type', 'rtdetr_onnx') + try: + if det == 'rtdetr_onnx': + return bool(getattr(bd, 'rtdetr_onnx_loaded', False)), det + elif det == 'rtdetr': + return bool(getattr(bd, 'rtdetr_loaded', False)), det + elif det == 'yolo': + return bool(getattr(bd, 'model_loaded', False)), det + else: + # Auto or unknown – consider any ready model as loaded + ready = bool(getattr(bd, 'rtdetr_loaded', False) or getattr(bd, 'rtdetr_onnx_loaded', False) or getattr(bd, 'model_loaded', False)) + return ready, det + except Exception: + return False, det + + def _is_local_inpainter_loaded(self) -> Tuple[bool, Optional[str]]: + """Check if a local inpainter model is already loaded for current settings. + Returns (loaded, local_method) or (False, None). + This respects UI flags: skip_inpainting / use_cloud_inpainting. + """ + try: + # If skipping or using cloud, this does not apply + if getattr(self, 'skip_inpainting', False) or getattr(self, 'use_cloud_inpainting', False): + return False, None + except Exception: + pass + inpaint_cfg = self.manga_settings.get('inpainting', {}) if hasattr(self, 'manga_settings') else {} + local_method = inpaint_cfg.get('local_method', 'anime') + try: + model_path = self.main_gui.config.get(f'manga_{local_method}_model_path', '') if hasattr(self, 'main_gui') else '' + except Exception: + model_path = '' + # Singleton path + if getattr(self, 'use_singleton_models', False): + inp = getattr(MangaTranslator, '_singleton_local_inpainter', None) + return (bool(getattr(inp, 'model_loaded', False)), local_method) + # Thread-local/pooled path + inp = getattr(self, 'local_inpainter', None) + if inp is not None and getattr(inp, 'model_loaded', False): + return True, local_method + try: + key = (local_method, model_path or '') + rec = MangaTranslator._inpaint_pool.get(key) + # Consider the shared 'inpainter' loaded or any spare that is model_loaded + if rec: + if rec.get('loaded') and rec.get('inpainter') is not None and getattr(rec['inpainter'], 'model_loaded', False): + return True, local_method + for spare in rec.get('spares') or []: + if getattr(spare, 'model_loaded', False): + return True, local_method + except Exception: + pass + return False, local_method + + def _log_model_status(self): + """Emit concise status lines for already-loaded heavy models to avoid confusing 'loading' logs.""" + try: + ocr_settings = self.manga_settings.get('ocr', {}) if hasattr(self, 'manga_settings') else {} + if ocr_settings.get('bubble_detection_enabled', False): + loaded, det = self._is_bubble_detector_loaded(ocr_settings) + det_name = 'YOLO' if det == 'yolo' else ('RT-DETR' if det == 'rtdetr' else 'RTEDR_onnx') + if loaded: + self._log("🤖 Using bubble detector (already loaded)", "info") + else: + self._log("🤖 Bubble detector will load on first use", "debug") + except Exception: + pass + try: + loaded, local_method = self._is_local_inpainter_loaded() + if local_method: + label = local_method.upper() + if loaded: + self._log("🎨 Using local inpainter (already loaded)", "info") + else: + self._log("🎨 Local inpainter will load on first use", "debug") + except Exception: + pass + + def process_image(self, image_path: str, output_path: Optional[str] = None, + batch_index: int = None, batch_total: int = None) -> Dict[str, Any]: + """Process a single manga image through the full pipeline""" + # Ensure local references exist for cleanup in finally + image = None + inpainted = None + final_image = None + mask = None + mask_viz = None + pil_image = None + heatmap = None + + # Set batch tracking if provided + if batch_index is not None and batch_total is not None: + self.batch_current = batch_index + self.batch_size = batch_total + self.batch_mode = True + + # Simplified header for batch mode + if not self.batch_mode: + self._log(f"\n{'='*60}") + self._log(f"📷 STARTING MANGA TRANSLATION PIPELINE") + self._log(f"📁 Input: {image_path}") + self._log(f"📁 Output: {output_path or 'Auto-generated'}") + self._log(f"{'='*60}\n") + else: + self._log(f"\n[{batch_index}/{batch_total}] Processing: {os.path.basename(image_path)}") + + # Before heavy work, report model status to avoid confusing 'loading' logs later + try: + self._log_model_status() + except Exception: + pass + + result = { + 'success': False, + 'input_path': image_path, + 'output_path': output_path, + 'regions': [], + 'errors': [], + 'interrupted': False, + 'format_info': {} + } + + try: + # RAM cap gating before heavy processing + try: + self._block_if_over_cap("processing image") + except Exception: + pass + + # Determine the output directory from output_path + if output_path: + output_dir = os.path.dirname(output_path) + else: + # If no output path specified, use default + output_dir = os.path.join(os.path.dirname(image_path), "translated_images") + + # Ensure output directory exists + os.makedirs(output_dir, exist_ok=True) + + # Initialize HistoryManager with the output directory + if self.contextual_enabled and not self.history_manager_initialized: + # Only initialize if we're in a new output directory + if output_dir != getattr(self, 'history_output_dir', None): + try: + self.history_manager = HistoryManager(output_dir) + self.history_manager_initialized = True + self.history_output_dir = output_dir + self._log(f"📚 Initialized HistoryManager in output directory: {output_dir}") + except Exception as e: + self._log(f"⚠️ Failed to initialize history manager: {str(e)}", "warning") + self.history_manager = None + + # Check for stop signal + if self._check_stop(): + result['interrupted'] = True + self._log("⏹️ Translation stopped before processing", "warning") + return result + + # Format detection if enabled + if self.manga_settings.get('advanced', {}).get('format_detection', False): + self._log("🔍 Analyzing image format...") + img = Image.open(image_path) + width, height = img.size + aspect_ratio = height / width + + # Detect format type + format_info = { + 'width': width, + 'height': height, + 'aspect_ratio': aspect_ratio, + 'is_webtoon': aspect_ratio > 3.0, + 'is_spread': width > height * 1.3, + 'format': 'unknown' + } + + if format_info['is_webtoon']: + format_info['format'] = 'webtoon' + self._log("📱 Detected WEBTOON format - vertical scroll manga") + elif format_info['is_spread']: + format_info['format'] = 'spread' + self._log("📖 Detected SPREAD format - two-page layout") + else: + format_info['format'] = 'single_page' + self._log("📄 Detected SINGLE PAGE format") + + result['format_info'] = format_info + + # Handle webtoon mode if detected and enabled + webtoon_mode = self.manga_settings.get('advanced', {}).get('webtoon_mode', 'auto') + if format_info['is_webtoon'] and webtoon_mode != 'disabled': + if webtoon_mode == 'auto' or webtoon_mode == 'force': + self._log("🔄 Webtoon mode active - will process in chunks for better OCR") + # Process webtoon in chunks + return self._process_webtoon_chunks(image_path, output_path, result) + + # Step 1: Detect text regions using Google Cloud Vision + self._log(f"📍 [STEP 1] Text Detection Phase") + regions = self.detect_text_regions(image_path) + + if not regions: + error_msg = "No text regions detected by Cloud Vision" + self._log(f"⚠️ {error_msg}", "warning") + result['errors'].append(error_msg) + # Still save the original image as "translated" if no text found + if output_path: + import shutil + shutil.copy2(image_path, output_path) + result['output_path'] = output_path + result['success'] = True + return result + + self._log(f"\n✅ Detection complete: {len(regions)} regions found") + + # Save debug outputs only if 'Save intermediate images' is enabled + if self.manga_settings.get('advanced', {}).get('save_intermediate', False): + self._save_debug_image(image_path, regions, debug_base_dir=output_dir) + + # Step 2: Translation & Inpainting (concurrent) + self._log(f"\n📍 [STEP 2] Translation & Inpainting Phase (concurrent)") + + # Load image once (used by inpainting task); keep PIL fallback for Unicode paths + import cv2 + self._log(f"🖼️ Loading image with OpenCV...") + try: + image = cv2.imread(image_path) + if image is None: + self._log(f" Using PIL to handle Unicode path...", "info") + from PIL import Image as PILImage + import numpy as np + pil_image = PILImage.open(image_path) + image = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR) + self._log(f" ✅ Successfully loaded with PIL", "info") + except Exception as e: + error_msg = f"Failed to load image: {image_path} - {str(e)}" + self._log(f"❌ {error_msg}", "error") + result['errors'].append(error_msg) + return result + + self._log(f" Image dimensions: {image.shape[1]}x{image.shape[0]}") + + # Save intermediate original image if enabled + if self.manga_settings.get('advanced', {}).get('save_intermediate', False): + self._save_intermediate_image(image_path, image, "original", debug_base_dir=output_dir) + + # Check if we should continue before kicking off tasks + if self._check_stop(): + result['interrupted'] = True + self._log("⏹️ Translation stopped before concurrent phase", "warning") + return result + + # Helper tasks + def _task_translate(): + try: + if self.full_page_context_enabled: + # Full page context translation mode + self._log(f"\n📄 Using FULL PAGE CONTEXT mode") + self._log(" This mode sends all text together for more consistent translations", "info") + if self._check_stop(): + return False + translations = self.translate_full_page_context(regions, image_path) + if translations: + translated_count = sum(1 for r in regions if getattr(r, 'translated_text', None) and r.translated_text and r.translated_text != r.text) + self._log(f"\n📊 Full page context translation complete: {translated_count}/{len(regions)} regions translated") + return True + else: + self._log("❌ Full page context translation failed", "error") + result['errors'].append("Full page context translation failed") + return False + else: + # Individual translation mode with parallel processing support + self._log(f"\n📝 Using INDIVIDUAL translation mode") + if self.manga_settings.get('advanced', {}).get('parallel_processing', False): + self._log("⚡ Parallel processing ENABLED") + _ = self._translate_regions_parallel(regions, image_path) + else: + _ = self.translate_regions(regions, image_path) + return True + except Exception as te: + self._log(f"❌ Translation task error: {te}", "error") + return False + + def _task_inpaint(): + try: + if getattr(self, 'skip_inpainting', False): + self._log(f"🎨 Skipping inpainting (preserving original art)", "info") + return image.copy() + + self._log(f"🎭 Creating text mask...") + try: + self._block_if_over_cap("mask creation") + except Exception: + pass + mask_local = self.create_text_mask(image, regions) + + # Save mask and overlay only if 'Save intermediate images' is enabled + if self.manga_settings.get('advanced', {}).get('save_intermediate', False): + try: + debug_dir = os.path.join(output_dir, 'debug') + os.makedirs(debug_dir, exist_ok=True) + base_name = os.path.splitext(os.path.basename(image_path))[0] + mask_path = os.path.join(debug_dir, f"{base_name}_mask.png") + cv2.imwrite(mask_path, mask_local) + mask_percentage = ((mask_local > 0).sum() / mask_local.size) * 100 + self._log(f" 🎭 DEBUG: Saved mask to {mask_path}", "info") + self._log(f" 📊 Mask coverage: {mask_percentage:.1f}% of image", "info") + + # Save mask overlay visualization + mask_viz_local = image.copy() + mask_viz_local[mask_local > 0] = [0, 0, 255] + viz_path = os.path.join(debug_dir, f"{base_name}_mask_overlay.png") + cv2.imwrite(viz_path, mask_viz_local) + self._log(f" 🎭 DEBUG: Saved mask overlay to {viz_path}", "info") + except Exception as e: + self._log(f" ❌ Failed to save mask debug: {str(e)}", "error") + + # Also save intermediate copies + try: + self._save_intermediate_image(image_path, mask_local, "mask", debug_base_dir=output_dir) + except Exception: + pass + + self._log(f"🎨 Inpainting to remove original text") + try: + self._block_if_over_cap("inpainting") + except Exception: + pass + inpainted_local = self.inpaint_regions(image, mask_local) + + if self.manga_settings.get('advanced', {}).get('save_intermediate', False): + try: + self._save_intermediate_image(image_path, inpainted_local, "inpainted", debug_base_dir=output_dir) + except Exception: + pass + return inpainted_local + except Exception as ie: + self._log(f"❌ Inpainting task error: {ie}", "error") + return image.copy() + + # Gate on advanced setting (default enabled) + adv = self.manga_settings.get('advanced', {}) + run_concurrent = adv.get('concurrent_inpaint_translate', True) + + if run_concurrent: + self._log("🔀 Running translation and inpainting concurrently", "info") + with ThreadPoolExecutor(max_workers=2) as _executor: + fut_translate = _executor.submit(_task_translate) + fut_inpaint = _executor.submit(_task_inpaint) + # Wait for completion + try: + translate_ok = fut_translate.result() + except Exception: + translate_ok = False + try: + inpainted = fut_inpaint.result() + except Exception: + inpainted = image.copy() + else: + self._log("↪️ Concurrent mode disabled — running sequentially", "info") + translate_ok = _task_translate() + inpainted = _task_inpaint() + + # After concurrent phase, validate translation + if self._check_stop(): + result['interrupted'] = True + self._log("⏹️ Translation cancelled before rendering", "warning") + result['regions'] = [r.to_dict() for r in regions] + return result + + if not any(getattr(region, 'translated_text', None) for region in regions): + result['interrupted'] = True + self._log("⏹️ No regions were translated - translation was interrupted", "warning") + result['regions'] = [r.to_dict() for r in regions] + return result + + # Render translated text + self._log(f"✍️ Rendering translated text...") + self._log(f" Using enhanced renderer with custom settings", "info") + final_image = self.render_translated_text(inpainted, regions) + + # Save output + try: + if not output_path: + base, ext = os.path.splitext(image_path) + output_path = f"{base}_translated{ext}" + + success = cv2.imwrite(output_path, final_image) + + if not success: + self._log(f" Using PIL to save with Unicode path...", "info") + from PIL import Image as PILImage + + rgb_image = cv2.cvtColor(final_image, cv2.COLOR_BGR2RGB) + pil_image = PILImage.fromarray(rgb_image) + pil_image.save(output_path) + self._log(f" ✅ Successfully saved with PIL", "info") + + result['output_path'] = output_path + self._log(f"\n💾 Saved output to: {output_path}") + + except Exception as e: + error_msg = f"Failed to save output image: {str(e)}" + self._log(f"❌ {error_msg}", "error") + result['errors'].append(error_msg) + result['success'] = False + return result + + # Update result + result['regions'] = [r.to_dict() for r in regions] + if not result.get('interrupted', False): + result['success'] = True + self._log(f"\n✅ TRANSLATION PIPELINE COMPLETE", "success") + else: + self._log(f"\n⚠️ TRANSLATION INTERRUPTED - Partial output saved", "warning") + + self._log(f"{'='*60}\n") + + except Exception as e: + error_msg = f"Error processing image: {str(e)}\n{traceback.format_exc()}" + self._log(f"\n❌ PIPELINE ERROR:", "error") + self._log(f" {str(e)}", "error") + self._log(f" Type: {type(e).__name__}", "error") + self._log(traceback.format_exc(), "error") + result['errors'].append(error_msg) + finally: + # Per-image memory cleanup to reduce RAM growth across pages + try: + # Clear self-held large attributes + try: + self.current_image = None + self.current_mask = None + self.final_image = None + self.text_regions = [] + self.translated_regions = [] + except Exception: + pass + + # Clear local large objects if present + locs = locals() + for name in [ + 'image', 'inpainted', 'final_image', 'mask', 'mask_viz', 'pil_image', 'heatmap' + ]: + try: + if name in locs: + # Explicitly delete reference from locals + del locs[name] + except Exception: + pass + + # Reset caches for the next image (non-destructive to loaded models) + try: + self.reset_for_new_image() + except Exception: + pass + + # Encourage release of native resources + try: + import cv2 as _cv2 + try: + _cv2.destroyAllWindows() + except Exception: + pass + except Exception: + pass + + # Free CUDA memory if torch is available + try: + import torch + if torch.cuda.is_available(): + torch.cuda.empty_cache() + except Exception: + pass + + # Release thread-local heavy objects to curb RAM growth across runs + try: + self._cleanup_thread_locals() + except Exception: + pass + + # Deep cleanup control - respects user settings and parallel processing + try: + # Check if auto cleanup is enabled in settings + auto_cleanup_enabled = False # Default disabled by default + try: + if hasattr(self, 'manga_settings'): + auto_cleanup_enabled = self.manga_settings.get('advanced', {}).get('auto_cleanup_models', False) + except Exception: + pass + + if not auto_cleanup_enabled: + # User has disabled automatic cleanup + self._log("🔑 Auto cleanup disabled - models will remain in RAM", "debug") + else: + # Determine if we should cleanup now + should_cleanup_now = True + + # Check if we're in batch mode + is_last_in_batch = False + try: + if getattr(self, 'batch_mode', False): + bc = getattr(self, 'batch_current', None) + bt = getattr(self, 'batch_size', None) + if bc is not None and bt is not None: + is_last_in_batch = (bc >= bt) + # In batch mode, only cleanup at the end + should_cleanup_now = is_last_in_batch + except Exception: + pass + + # For parallel panel translation, cleanup is handled differently + # (it's handled in manga_integration.py after all panels complete) + is_parallel_panel = False + try: + if hasattr(self, 'manga_settings'): + is_parallel_panel = self.manga_settings.get('advanced', {}).get('parallel_panel_translation', False) + except Exception: + pass + + if is_parallel_panel: + # Don't cleanup here - let manga_integration handle it after all panels + self._log("🎯 Deferring cleanup until all parallel panels complete", "debug") + should_cleanup_now = False + + if should_cleanup_now: + # Perform the cleanup + self._deep_cleanup_models() + + # Also clear HF cache for RT-DETR (best-effort) + if is_last_in_batch or not getattr(self, 'batch_mode', False): + try: + self._clear_hf_cache() + except Exception: + pass + except Exception: + pass + + # Force a garbage collection cycle + try: + import gc + gc.collect() + except Exception: + pass + + # Aggressively trim process working set (Windows) or libc heap (Linux) + try: + self._trim_working_set() + except Exception: + pass + except Exception: + # Never let cleanup fail the pipeline + pass + + return result + + def reset_history_manager(self): + """Reset history manager for new translation batch""" + self.history_manager = None + self.history_manager_initialized = False + self.history_output_dir = None + self.translation_context = [] + self._log("📚 Reset history manager for new batch", "debug") + + def cleanup_all_models(self): + """Public method to force cleanup of all models - call this after translation! + This ensures all models (YOLO, RT-DETR, inpainters, OCR) are unloaded from RAM. + """ + self._log("🧹 Forcing cleanup of all models to free RAM...", "info") + + # Call the comprehensive cleanup + self._deep_cleanup_models() + + # Also cleanup thread locals + try: + self._cleanup_thread_locals() + except Exception: + pass + + # Clear HF cache + try: + self._clear_hf_cache() + except Exception: + pass + + # Trim working set + try: + self._trim_working_set() + except Exception: + pass + + self._log("✅ All models cleaned up - RAM freed!", "info") + + def clear_internal_state(self): + """Clear all internal state and cached data to free memory. + This is called when the translator instance is being reset. + Ensures OCR manager, inpainters, and bubble detector are also cleaned. + """ + try: + # Clear image data + self.current_image = None + self.current_mask = None + self.final_image = None + + # Clear text regions + if hasattr(self, 'text_regions'): + self.text_regions = [] + if hasattr(self, 'translated_regions'): + self.translated_regions = [] + + # Clear ALL caches (including text caches) + # THREAD-SAFE: Use lock for parallel panel translation + if hasattr(self, 'cache'): + self.cache.clear() + if hasattr(self, 'ocr_roi_cache'): + with self._cache_lock: + self.ocr_roi_cache.clear() + self._current_image_hash = None + + # Clear history and context + if hasattr(self, 'translation_context'): + self.translation_context = [] + if hasattr(self, 'history_manager'): + self.history_manager = None + self.history_manager_initialized = False + self.history_output_dir = None + + # IMPORTANT: Properly unload OCR manager + if hasattr(self, 'ocr_manager') and self.ocr_manager: + try: + ocr = self.ocr_manager + if hasattr(ocr, 'providers'): + for provider_name, provider in ocr.providers.items(): + # Clear all model references + if hasattr(provider, 'model'): + provider.model = None + if hasattr(provider, 'processor'): + provider.processor = None + if hasattr(provider, 'tokenizer'): + provider.tokenizer = None + if hasattr(provider, 'reader'): + provider.reader = None + if hasattr(provider, 'client'): + provider.client = None + if hasattr(provider, 'is_loaded'): + provider.is_loaded = False + ocr.providers.clear() + self.ocr_manager = None + self._log(" ✓ OCR manager cleared", "debug") + except Exception as e: + self._log(f" Warning: OCR cleanup failed: {e}", "debug") + + # IMPORTANT: Handle local inpainter cleanup carefully + # DO NOT unload if it's a shared/checked-out instance from the pool + if hasattr(self, 'local_inpainter') and self.local_inpainter: + try: + # Only unload if this is NOT a checked-out or shared instance + is_from_pool = hasattr(self, '_checked_out_inpainter') or hasattr(self, '_inpainter_pool_key') + if not is_from_pool and hasattr(self.local_inpainter, 'unload'): + self.local_inpainter.unload() + self._log(" ✓ Local inpainter unloaded", "debug") + else: + self._log(" ✓ Local inpainter reference cleared (pool instance preserved)", "debug") + self.local_inpainter = None + except Exception as e: + self._log(f" Warning: Inpainter cleanup failed: {e}", "debug") + + # Also clear hybrid and generic inpainter references + if hasattr(self, 'hybrid_inpainter'): + if self.hybrid_inpainter and hasattr(self.hybrid_inpainter, 'unload'): + try: + self.hybrid_inpainter.unload() + except Exception: + pass + self.hybrid_inpainter = None + + if hasattr(self, 'inpainter'): + if self.inpainter and hasattr(self.inpainter, 'unload'): + try: + self.inpainter.unload() + except Exception: + pass + self.inpainter = None + + # IMPORTANT: Handle bubble detector cleanup carefully + # DO NOT unload if it's a singleton or from a preloaded pool + if hasattr(self, 'bubble_detector') and self.bubble_detector: + try: + is_singleton = getattr(self, 'use_singleton_bubble_detector', False) + # Check if it's from thread-local which might have gotten it from the pool + is_from_pool = hasattr(self, '_thread_local') and hasattr(self._thread_local, 'bubble_detector') + + if not is_singleton and not is_from_pool: + if hasattr(self.bubble_detector, 'unload'): + self.bubble_detector.unload(release_shared=True) + self._log(" ✓ Bubble detector unloaded", "debug") + else: + self._log(" ✓ Bubble detector reference cleared (pool/singleton instance preserved)", "debug") + # In all cases, clear our instance reference + self.bubble_detector = None + except Exception as e: + self._log(f" Warning: Bubble detector cleanup failed: {e}", "debug") + + # Clear any file handles or temp data + if hasattr(self, '_thread_local'): + try: + self._cleanup_thread_locals() + except Exception: + pass + + # Clear processing flags + self.is_processing = False + self.cancel_requested = False + + self._log("🧹 Internal state and all components cleared", "debug") + + except Exception as e: + self._log(f"⚠️ Warning: Failed to clear internal state: {e}", "warning") + + def _process_webtoon_chunks(self, image_path: str, output_path: str, result: Dict) -> Dict: + """Process webtoon in chunks for better OCR""" + import cv2 + import numpy as np + from PIL import Image as PILImage + + try: + self._log("📱 Processing webtoon in chunks for better OCR", "info") + + # Load the image + image = cv2.imread(image_path) + if image is None: + pil_image = PILImage.open(image_path) + image = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR) + + height, width = image.shape[:2] + + # Get chunk settings from config + chunk_height = self.manga_settings.get('preprocessing', {}).get('chunk_height', 1000) + chunk_overlap = self.manga_settings.get('preprocessing', {}).get('chunk_overlap', 100) + + self._log(f" Image dimensions: {width}x{height}", "info") + self._log(f" Chunk height: {chunk_height}px, Overlap: {chunk_overlap}px", "info") + + # Calculate number of chunks needed + effective_chunk_height = chunk_height - chunk_overlap + num_chunks = max(1, (height - chunk_overlap) // effective_chunk_height + 1) + + self._log(f" Will process in {num_chunks} chunks", "info") + + # Process each chunk + all_regions = [] + chunk_offsets = [] + + for i in range(num_chunks): + # Calculate chunk boundaries + start_y = i * effective_chunk_height + end_y = min(start_y + chunk_height, height) + + # Make sure we don't miss the bottom part + if i == num_chunks - 1: + end_y = height + + self._log(f"\n 📄 Processing chunk {i+1}/{num_chunks} (y: {start_y}-{end_y})", "info") + + # Extract chunk + chunk = image[start_y:end_y, 0:width] + + # Save chunk temporarily for OCR + import tempfile + with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp: + chunk_path = tmp.name + cv2.imwrite(chunk_path, chunk) + + try: + # Detect text in this chunk + chunk_regions = self.detect_text_regions(chunk_path) + + # Adjust region coordinates to full image space + for region in chunk_regions: + # Adjust bounding box + x, y, w, h = region.bounding_box + region.bounding_box = (x, y + start_y, w, h) + + # Adjust vertices if present + if hasattr(region, 'vertices') and region.vertices: + adjusted_vertices = [] + for vx, vy in region.vertices: + adjusted_vertices.append((vx, vy + start_y)) + region.vertices = adjusted_vertices + + # Mark which chunk this came from (for deduplication) + region.chunk_index = i + region.chunk_y_range = (start_y, end_y) + + all_regions.extend(chunk_regions) + chunk_offsets.append(start_y) + + self._log(f" Found {len(chunk_regions)} text regions in chunk {i+1}", "info") + + finally: + # Clean up temp file + import os + if os.path.exists(chunk_path): + os.remove(chunk_path) + + # Remove duplicate regions from overlapping areas + self._log(f"\n 🔍 Deduplicating regions from overlaps...", "info") + unique_regions = self._deduplicate_chunk_regions(all_regions, chunk_overlap) + + self._log(f" Total regions: {len(all_regions)} → {len(unique_regions)} after deduplication", "info") + + if not unique_regions: + self._log("⚠️ No text regions detected in webtoon", "warning") + result['errors'].append("No text regions detected") + return result + + # Now process the regions as normal + self._log(f"\n📍 Translating {len(unique_regions)} unique regions", "info") + + # Translate regions + if self.full_page_context_enabled: + translations = self.translate_full_page_context(unique_regions, image_path) + for region in unique_regions: + if region.text in translations: + region.translated_text = translations[region.text] + else: + unique_regions = self.translate_regions(unique_regions, image_path) + + # Create mask and inpaint + self._log(f"\n🎨 Creating mask and inpainting...", "info") + mask = self.create_text_mask(image, unique_regions) + + if self.skip_inpainting: + inpainted = image.copy() + else: + inpainted = self.inpaint_regions(image, mask) + + # Render translated text + self._log(f"✍️ Rendering translated text...", "info") + final_image = self.render_translated_text(inpainted, unique_regions) + + # Save output + if not output_path: + base, ext = os.path.splitext(image_path) + output_path = f"{base}_translated{ext}" + + cv2.imwrite(output_path, final_image) + + result['output_path'] = output_path + result['regions'] = [r.to_dict() for r in unique_regions] + result['success'] = True + result['format_info']['chunks_processed'] = num_chunks + + self._log(f"\n✅ Webtoon processing complete: {output_path}", "success") + + return result + + except Exception as e: + error_msg = f"Error processing webtoon chunks: {str(e)}" + self._log(f"❌ {error_msg}", "error") + result['errors'].append(error_msg) + return result + + def _deduplicate_chunk_regions(self, regions: List, overlap_height: int) -> List: + """Remove duplicate regions from overlapping chunk areas""" + if not regions: + return regions + + # Sort regions by y position + regions.sort(key=lambda r: r.bounding_box[1]) + + unique_regions = [] + used_indices = set() + + for i, region1 in enumerate(regions): + if i in used_indices: + continue + + # Check if this region is in an overlap zone + x1, y1, w1, h1 = region1.bounding_box + chunk_idx = region1.chunk_index if hasattr(region1, 'chunk_index') else 0 + chunk_y_start, chunk_y_end = region1.chunk_y_range if hasattr(region1, 'chunk_y_range') else (0, float('inf')) + + # Check if region is near chunk boundary (in overlap zone) + in_overlap_zone = (y1 < chunk_y_start + overlap_height) and chunk_idx > 0 + + if in_overlap_zone: + # Look for duplicate in previous chunk's regions + found_duplicate = False + + for j, region2 in enumerate(regions): + if j >= i or j in used_indices: + continue + + if hasattr(region2, 'chunk_index') and region2.chunk_index == chunk_idx - 1: + x2, y2, w2, h2 = region2.bounding_box + + # Check if regions are the same (similar position and size) + if (abs(x1 - x2) < 20 and + abs(y1 - y2) < 20 and + abs(w1 - w2) < 20 and + abs(h1 - h2) < 20): + + # Check text similarity + if region1.text == region2.text: + # This is a duplicate + found_duplicate = True + used_indices.add(i) + self._log(f" Removed duplicate: '{region1.text[:30]}...'", "debug") + break + + if not found_duplicate: + unique_regions.append(region1) + used_indices.add(i) + else: + # Not in overlap zone, keep it + unique_regions.append(region1) + used_indices.add(i) + + return unique_regions + + def _save_intermediate_image(self, original_path: str, image, stage: str, debug_base_dir: str = None): + """Save intermediate processing stages under translated_images/debug or provided base dir""" + if debug_base_dir is None: + translated_dir = os.path.join(os.path.dirname(original_path), 'translated_images') + debug_dir = os.path.join(translated_dir, 'debug') + else: + debug_dir = os.path.join(debug_base_dir, 'debug') + os.makedirs(debug_dir, exist_ok=True) + + base_name = os.path.splitext(os.path.basename(original_path))[0] + output_path = os.path.join(debug_dir, f"{base_name}_{stage}.png") + + cv2.imwrite(output_path, image) + self._log(f" 💾 Saved {stage} image: {output_path}") diff --git a/ocr_manager.py b/ocr_manager.py new file mode 100644 index 0000000000000000000000000000000000000000..53ed6e28755a64a79d5388df7d90bd5c19388f7e --- /dev/null +++ b/ocr_manager.py @@ -0,0 +1,1904 @@ +# ocr_manager.py +""" +OCR Manager for handling multiple OCR providers +Handles installation, model downloading, and OCR processing +Updated with HuggingFace donut model and proper bubble detection integration +""" +import os +import sys +import cv2 +import json +import subprocess +import threading +import traceback +from typing import List, Dict, Optional, Tuple, Any +import numpy as np +from dataclasses import dataclass +from PIL import Image +import logging +import time +import random +import base64 +import io +import requests + +try: + import gptqmodel + HAS_GPTQ = True +except ImportError: + try: + import auto_gptq + HAS_GPTQ = True + except ImportError: + HAS_GPTQ = False + +try: + import optimum + HAS_OPTIMUM = True +except ImportError: + HAS_OPTIMUM = False + +try: + import accelerate + HAS_ACCELERATE = True +except ImportError: + HAS_ACCELERATE = False + +logger = logging.getLogger(__name__) + +@dataclass +class OCRResult: + """Unified OCR result format with built-in sanitization to prevent data corruption.""" + text: str + bbox: Tuple[int, int, int, int] # x, y, w, h + confidence: float + vertices: Optional[List[Tuple[int, int]]] = None + + def __post_init__(self): + """ + This special method is called automatically after the object is created. + It acts as a final safeguard to ensure the 'text' attribute is ALWAYS a clean string. + """ + # --- THIS IS THE DEFINITIVE FIX --- + # If the text we received is a tuple, we extract the first element. + # This makes it impossible for a tuple to exist in a finished object. + if isinstance(self.text, tuple): + # Log that we are fixing a critical data error. + print(f"CRITICAL WARNING: Corrupted tuple detected in OCRResult. Sanitizing '{self.text}' to '{self.text[0]}'.") + self.text = self.text[0] + + # Ensure the final result is always a stripped string. + self.text = str(self.text).strip() + +class OCRProvider: + """Base class for OCR providers""" + + def __init__(self, log_callback=None): + # Set thread limits early if environment indicates single-threaded mode + try: + if os.environ.get('OMP_NUM_THREADS') == '1': + # Already in single-threaded mode, ensure it's applied to this process + try: + import sys + if 'torch' in sys.modules: + import torch + torch.set_num_threads(1) + except (ImportError, RuntimeError, AttributeError): + pass + try: + import cv2 + cv2.setNumThreads(1) + except (ImportError, AttributeError): + pass + except Exception: + pass + + self.log_callback = log_callback + self.is_installed = False + self.is_loaded = False + self.model = None + self.stop_flag = None + self._stopped = False + + def _log(self, message: str, level: str = "info"): + """Log message with stop suppression""" + # Suppress logs when stopped (allow only essential stop confirmation messages) + if self._check_stop(): + essential_stop_keywords = [ + "⏹️ Translation stopped by user", + "⏹️ OCR processing stopped", + "cleanup", "🧹" + ] + if not any(keyword in message for keyword in essential_stop_keywords): + return + + if self.log_callback: + self.log_callback(message, level) + else: + print(f"[{level.upper()}] {message}") + + def set_stop_flag(self, stop_flag): + """Set the stop flag for checking interruptions""" + self.stop_flag = stop_flag + self._stopped = False + + def _check_stop(self) -> bool: + """Check if stop has been requested""" + if self._stopped: + return True + if self.stop_flag and self.stop_flag.is_set(): + self._stopped = True + return True + # Check global manga translator cancellation + try: + from manga_translator import MangaTranslator + if MangaTranslator.is_globally_cancelled(): + self._stopped = True + return True + except Exception: + pass + return False + + def reset_stop_flags(self): + """Reset stop flags when starting new processing""" + self._stopped = False + + def check_installation(self) -> bool: + """Check if provider is installed""" + raise NotImplementedError + + def install(self, progress_callback=None) -> bool: + """Install the provider""" + raise NotImplementedError + + def load_model(self, **kwargs) -> bool: + """Load the OCR model""" + raise NotImplementedError + + def detect_text(self, image: np.ndarray, **kwargs) -> List[OCRResult]: + """Detect text in image""" + raise NotImplementedError + +class CustomAPIProvider(OCRProvider): + """Custom API OCR provider that uses existing GUI variables""" + + def __init__(self, log_callback=None): + super().__init__(log_callback) + + # Use EXISTING environment variables from TranslatorGUI + self.api_url = os.environ.get('OPENAI_CUSTOM_BASE_URL', '') + self.api_key = os.environ.get('API_KEY', '') or os.environ.get('OPENAI_API_KEY', '') + self.model_name = os.environ.get('MODEL', 'gpt-4o-mini') + + # OCR prompt - use system prompt or a dedicated OCR prompt variable + self.ocr_prompt = os.environ.get('OCR_SYSTEM_PROMPT', + os.environ.get('SYSTEM_PROMPT', + "YOU ARE AN OCR SYSTEM. YOUR ONLY JOB IS TEXT EXTRACTION.\n\n" + "CRITICAL RULES:\n" + "1. DO NOT TRANSLATE ANYTHING\n" + "2. DO NOT MODIFY THE TEXT\n" + "3. DO NOT EXPLAIN OR COMMENT\n" + "4. ONLY OUTPUT THE EXACT TEXT YOU SEE\n" + "5. PRESERVE NATURAL TEXT FLOW - DO NOT ADD UNNECESSARY LINE BREAKS\n\n" + "If you see Korean text, output it in Korean.\n" + "If you see Japanese text, output it in Japanese.\n" + "If you see Chinese text, output it in Chinese.\n" + "If you see English text, output it in English.\n\n" + "IMPORTANT: Only use line breaks where they naturally occur in the original text " + "(e.g., between dialogue lines or paragraphs). Do not break text mid-sentence or " + "between every word/character.\n\n" + "For vertical text common in manga/comics, transcribe it as a continuous line unless " + "there are clear visual breaks.\n\n" + "NEVER translate. ONLY extract exactly what is written.\n" + "Output ONLY the raw text, nothing else." + )) + + # Use existing temperature and token settings + self.temperature = float(os.environ.get('TRANSLATION_TEMPERATURE', '0.01')) + # Don't hardcode to 8192 - get fresh value when actually used + self.max_tokens = int(os.environ.get('MAX_OUTPUT_TOKENS', '4096')) + + # Image settings from existing compression variables + self.image_format = 'jpeg' if os.environ.get('IMAGE_COMPRESSION_FORMAT', 'auto') != 'png' else 'png' + self.image_quality = int(os.environ.get('JPEG_QUALITY', '100')) + + # Simple defaults + self.api_format = 'openai' # Most custom endpoints are OpenAI-compatible + self.timeout = int(os.environ.get('CHUNK_TIMEOUT', '30')) + self.api_headers = {} # Additional custom headers + + # Retry configuration for Custom API OCR calls + self.max_retries = int(os.environ.get('CUSTOM_OCR_MAX_RETRIES', '3')) + self.retry_initial_delay = float(os.environ.get('CUSTOM_OCR_RETRY_INITIAL_DELAY', '0.8')) + self.retry_backoff = float(os.environ.get('CUSTOM_OCR_RETRY_BACKOFF', '1.8')) + self.retry_jitter = float(os.environ.get('CUSTOM_OCR_RETRY_JITTER', '0.4')) + self.retry_on_empty = os.environ.get('CUSTOM_OCR_RETRY_ON_EMPTY', '1') == '1' + + def check_installation(self) -> bool: + """Always installed - uses UnifiedClient""" + self.is_installed = True + return True + + def install(self, progress_callback=None) -> bool: + """No installation needed for API-based provider""" + return self.check_installation() + + def load_model(self, **kwargs) -> bool: + """Initialize UnifiedClient with current settings""" + try: + from unified_api_client import UnifiedClient + + # Support passing API key from GUI if available + if 'api_key' in kwargs: + api_key = kwargs['api_key'] + else: + api_key = os.environ.get('API_KEY', '') or os.environ.get('OPENAI_API_KEY', '') + + if 'model' in kwargs: + model = kwargs['model'] + else: + model = os.environ.get('MODEL', 'gpt-4o-mini') + + if not api_key: + self._log("❌ No API key configured", "error") + return False + + # Create UnifiedClient just like translations do + self.client = UnifiedClient(model=model, api_key=api_key) + + #self._log(f"✅ Using {model} for OCR via UnifiedClient") + self.is_loaded = True + return True + + except Exception as e: + self._log(f"❌ Failed to initialize UnifiedClient: {str(e)}", "error") + return False + + def _test_connection(self) -> bool: + """Test API connection with a simple request""" + try: + # Create a small test image + test_image = np.ones((100, 100, 3), dtype=np.uint8) * 255 + cv2.putText(test_image, "TEST", (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 2) + + # Encode image + image_base64 = self._encode_image(test_image) + + # Prepare test request based on API format + if self.api_format == 'openai': + test_payload = { + "model": self.model_name, + "messages": [ + { + "role": "user", + "content": [ + {"type": "text", "text": "What text do you see?"}, + {"type": "image_url", "image_url": {"url": f"data:image/{self.image_format};base64,{image_base64}"}} + ] + } + ], + "max_tokens": 50 + } + else: + # For other formats, just try a basic health check + return True + + headers = self._prepare_headers() + response = requests.post( + self.api_url, + headers=headers, + json=test_payload, + timeout=10 + ) + + return response.status_code == 200 + + except Exception: + return False + + def _encode_image(self, image: np.ndarray) -> str: + """Encode numpy array to base64 string""" + # Convert BGR to RGB if needed + if len(image.shape) == 3 and image.shape[2] == 3: + image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + else: + image_rgb = image + + # Convert to PIL Image + pil_image = Image.fromarray(image_rgb) + + # Save to bytes buffer + buffer = io.BytesIO() + if self.image_format.lower() == 'png': + pil_image.save(buffer, format='PNG') + else: + pil_image.save(buffer, format='JPEG', quality=self.image_quality) + + # Encode to base64 + buffer.seek(0) + image_base64 = base64.b64encode(buffer.read()).decode('utf-8') + + return image_base64 + + def _prepare_headers(self) -> dict: + """Prepare request headers""" + headers = { + "Content-Type": "application/json" + } + + # Add API key if configured + if self.api_key: + if self.api_format == 'anthropic': + headers["x-api-key"] = self.api_key + else: + headers["Authorization"] = f"Bearer {self.api_key}" + + # Add any custom headers + headers.update(self.api_headers) + + return headers + + def _prepare_request_payload(self, image_base64: str) -> dict: + """Prepare request payload based on API format""" + if self.api_format == 'openai': + return { + "model": self.model_name, + "messages": [ + { + "role": "user", + "content": [ + {"type": "text", "text": self.ocr_prompt}, + { + "type": "image_url", + "image_url": { + "url": f"data:image/{self.image_format};base64,{image_base64}" + } + } + ] + } + ], + "max_tokens": self.max_tokens, + "temperature": self.temperature + } + + elif self.api_format == 'anthropic': + return { + "model": self.model_name, + "max_tokens": self.max_tokens, + "temperature": self.temperature, + "messages": [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": self.ocr_prompt + }, + { + "type": "image", + "source": { + "type": "base64", + "media_type": f"image/{self.image_format}", + "data": image_base64 + } + } + ] + } + ] + } + + else: + # Custom format - use environment variable for template + template = os.environ.get('CUSTOM_OCR_REQUEST_TEMPLATE', '{}') + payload = json.loads(template) + + # Replace placeholders + payload_str = json.dumps(payload) + payload_str = payload_str.replace('{{IMAGE_BASE64}}', image_base64) + payload_str = payload_str.replace('{{PROMPT}}', self.ocr_prompt) + payload_str = payload_str.replace('{{MODEL}}', self.model_name) + payload_str = payload_str.replace('{{MAX_TOKENS}}', str(self.max_tokens)) + payload_str = payload_str.replace('{{TEMPERATURE}}', str(self.temperature)) + + return json.loads(payload_str) + + def _extract_text_from_response(self, response_data: dict) -> str: + """Extract text from API response based on format""" + try: + if self.api_format == 'openai': + # OpenAI format: response.choices[0].message.content + return response_data.get('choices', [{}])[0].get('message', {}).get('content', '') + + elif self.api_format == 'anthropic': + # Anthropic format: response.content[0].text + content = response_data.get('content', []) + if content and isinstance(content, list): + return content[0].get('text', '') + return '' + + else: + # Custom format - use environment variable for path + response_path = os.environ.get('CUSTOM_OCR_RESPONSE_PATH', 'text') + + # Navigate through the response using the path + result = response_data + for key in response_path.split('.'): + if isinstance(result, dict): + result = result.get(key, '') + elif isinstance(result, list) and key.isdigit(): + idx = int(key) + result = result[idx] if idx < len(result) else '' + else: + result = '' + break + + return str(result) + + except Exception as e: + self._log(f"Failed to extract text from response: {e}", "error") + return '' + + def detect_text(self, image: np.ndarray, **kwargs) -> List[OCRResult]: + """Process image using UnifiedClient.send_image()""" + results = [] + + try: + # Get fresh max_tokens from environment - GUI will have set this + max_tokens = int(os.environ.get('MAX_OUTPUT_TOKENS', '4096')) + if not self.is_loaded: + if not self.load_model(): + return results + + import cv2 + from PIL import Image + import base64 + import io + + # Convert numpy array to PIL Image + image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + pil_image = Image.fromarray(image_rgb) + h, w = image.shape[:2] + + # Convert PIL Image to base64 string + buffer = io.BytesIO() + + # Use the image format from settings + if self.image_format.lower() == 'png': + pil_image.save(buffer, format='PNG') + else: + pil_image.save(buffer, format='JPEG', quality=self.image_quality) + + buffer.seek(0) + image_base64 = base64.b64encode(buffer.read()).decode('utf-8') + + # For OpenAI vision models, we need BOTH: + # 1. System prompt with instructions + # 2. User message that includes the image + messages = [ + { + "role": "system", + "content": self.ocr_prompt # The OCR instruction as system prompt + }, + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Image:" # Minimal text, just to have something + }, + { + "type": "image_url", + "image_url": { + "url": f"data:image/jpeg;base64,{image_base64}" + } + } + ] + } + ] + + # Now send this properly formatted message + # The UnifiedClient should handle this correctly + # But we're NOT using send_image, we're using regular send + + # Retry-aware call + from unified_api_client import UnifiedClientError # local import to avoid hard dependency at module import time + max_attempts = max(1, self.max_retries) + attempt = 0 + last_error = None + + # Common refusal/error phrases that indicate a non-OCR response + refusal_phrases = [ + "I can't extract", "I cannot extract", + "I'm sorry", "I am sorry", + "I'm unable", "I am unable", + "cannot process images", + "I can't help with that", + "cannot view images", + "no text in the image" + ] + + while attempt < max_attempts: + # Check for stop before each attempt + if self._check_stop(): + self._log("⏹️ OCR processing stopped by user", "warning") + return results + + try: + response = self.client.send( + messages=messages, + temperature=self.temperature, + max_tokens=max_tokens + ) + + # Extract content from response object + content, finish_reason = response + + # Validate content + has_content = bool(content and str(content).strip()) + refused = False + if has_content: + # Filter out explicit failure markers + if "[" in content and "FAILED]" in content: + refused = True + elif any(phrase.lower() in content.lower() for phrase in refusal_phrases): + refused = True + + # Decide success or retry + if has_content and not refused: + text = str(content).strip() + results.append(OCRResult( + text=text, + bbox=(0, 0, w, h), + confidence=kwargs.get('confidence', 0.85), + vertices=[(0, 0), (w, 0), (w, h), (0, h)] + )) + self._log(f"✅ Detected: {text[:50]}...") + break # success + else: + reason = "empty result" if not has_content else "refusal/non-OCR response" + last_error = f"{reason} (finish_reason: {finish_reason})" + # Check if we should retry on empty or refusal + should_retry = (not has_content and self.retry_on_empty) or refused + attempt += 1 + if attempt >= max_attempts or not should_retry: + # No more retries or shouldn't retry + if not has_content: + self._log(f"⚠️ No text detected (finish_reason: {finish_reason})") + else: + self._log(f"❌ Model returned non-OCR response: {str(content)[:120]}", "warning") + break + # Backoff before retrying + delay = self.retry_initial_delay * (self.retry_backoff ** (attempt - 1)) + random.uniform(0, self.retry_jitter) + self._log(f"🔄 Retry {attempt}/{max_attempts - 1} after {delay:.1f}s due to {reason}...", "warning") + time.sleep(delay) + time.sleep(0.1) # Brief pause for stability + self._log("💤 OCR retry pausing briefly for stability", "debug") + continue + + except UnifiedClientError as ue: + msg = str(ue) + last_error = msg + # Do not retry on explicit user cancellation + if 'cancelled' in msg.lower() or 'stopped by user' in msg.lower(): + self._log(f"❌ OCR cancelled: {msg}", "error") + break + attempt += 1 + if attempt >= max_attempts: + self._log(f"❌ OCR failed after {attempt} attempts: {msg}", "error") + break + delay = self.retry_initial_delay * (self.retry_backoff ** (attempt - 1)) + random.uniform(0, self.retry_jitter) + self._log(f"🔄 API error, retry {attempt}/{max_attempts - 1} after {delay:.1f}s: {msg}", "warning") + time.sleep(delay) + time.sleep(0.1) # Brief pause for stability + self._log("💤 OCR API error retry pausing briefly for stability", "debug") + continue + except Exception as e_inner: + last_error = str(e_inner) + attempt += 1 + if attempt >= max_attempts: + self._log(f"❌ OCR exception after {attempt} attempts: {last_error}", "error") + break + delay = self.retry_initial_delay * (self.retry_backoff ** (attempt - 1)) + random.uniform(0, self.retry_jitter) + self._log(f"🔄 Exception, retry {attempt}/{max_attempts - 1} after {delay:.1f}s: {last_error}", "warning") + time.sleep(delay) + time.sleep(0.1) # Brief pause for stability + self._log("💤 OCR exception retry pausing briefly for stability", "debug") + continue + + except Exception as e: + self._log(f"❌ Error: {str(e)}", "error") + import traceback + self._log(traceback.format_exc(), "debug") + + return results + +class MangaOCRProvider(OCRProvider): + """Manga OCR provider using HuggingFace model directly""" + + def __init__(self, log_callback=None): + super().__init__(log_callback) + self.processor = None + self.model = None + self.tokenizer = None + + def check_installation(self) -> bool: + """Check if transformers is installed""" + try: + import transformers + import torch + self.is_installed = True + return True + except ImportError: + return False + + def install(self, progress_callback=None) -> bool: + """Install transformers and torch""" + pass + + def _is_valid_local_model_dir(self, path: str) -> bool: + """Check that a local HF model directory has required files.""" + try: + if not path or not os.path.isdir(path): + return False + needed_any_weights = any( + os.path.exists(os.path.join(path, name)) for name in ( + 'pytorch_model.bin', + 'model.safetensors' + ) + ) + has_config = os.path.exists(os.path.join(path, 'config.json')) + has_processor = ( + os.path.exists(os.path.join(path, 'preprocessor_config.json')) or + os.path.exists(os.path.join(path, 'processor_config.json')) + ) + has_tokenizer = ( + os.path.exists(os.path.join(path, 'tokenizer.json')) or + os.path.exists(os.path.join(path, 'tokenizer_config.json')) + ) + return has_config and needed_any_weights and has_processor and has_tokenizer + except Exception: + return False + + def load_model(self, **kwargs) -> bool: + """Load the manga-ocr model, preferring a local directory to avoid re-downloading""" + print("\n>>> MangaOCRProvider.load_model() called") + try: + if not self.is_installed and not self.check_installation(): + print("ERROR: Transformers not installed") + self._log("❌ Transformers not installed", "error") + return False + + # Always disable progress bars to avoid tqdm issues in some environments + import os + os.environ.setdefault("HF_HUB_DISABLE_PROGRESS_BARS", "1") + + from transformers import VisionEncoderDecoderModel, AutoTokenizer, AutoImageProcessor + import torch + + # Prefer a local model directory if present to avoid any Hub access + candidates = [] + env_local = os.environ.get("MANGA_OCR_LOCAL_DIR") + if env_local: + candidates.append(env_local) + + # Project root one level up from this file + root_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) + candidates.append(os.path.join(root_dir, 'models', 'manga-ocr-base')) + candidates.append(os.path.join(root_dir, 'models', 'kha-white', 'manga-ocr-base')) + + model_source = None + local_only = False + # Find a valid local dir + for cand in candidates: + if self._is_valid_local_model_dir(cand): + model_source = cand + local_only = True + break + + # If no valid local dir, use Hub + if not model_source: + model_source = "kha-white/manga-ocr-base" + # Make sure we are not forcing offline mode + if os.environ.get("HF_HUB_OFFLINE") == "1": + try: + del os.environ["HF_HUB_OFFLINE"] + except Exception: + pass + self._log("🔥 Loading manga-ocr model from Hugging Face Hub") + self._log(f" Repo: {model_source}") + else: + # Only set offline when local dir is fully valid + os.environ.setdefault("HF_HUB_OFFLINE", "1") + self._log("🔥 Loading manga-ocr model from local directory") + self._log(f" Local path: {model_source}") + + # Decide target device once; we will move after full CPU load to avoid meta tensors + use_cuda = torch.cuda.is_available() + + # Try loading components, falling back to Hub if local-only fails + def _load_components(source: str, local_flag: bool): + self._log(" Loading tokenizer...") + tok = AutoTokenizer.from_pretrained(source, local_files_only=local_flag) + + self._log(" Loading image processor...") + try: + from transformers import AutoProcessor + except Exception: + AutoProcessor = None + try: + proc = AutoImageProcessor.from_pretrained(source, local_files_only=local_flag) + except Exception as e_proc: + if AutoProcessor is not None: + self._log(f" ⚠️ AutoImageProcessor failed: {e_proc}. Trying AutoProcessor...", "warning") + proc = AutoProcessor.from_pretrained(source, local_files_only=local_flag) + else: + raise + + self._log(" Loading model...") + # Prevent meta tensors by forcing full materialization on CPU at load time + os.environ.setdefault('TORCHDYNAMO_DISABLE', '1') + mdl = VisionEncoderDecoderModel.from_pretrained( + source, + local_files_only=local_flag, + low_cpu_mem_usage=False, + device_map=None, + torch_dtype=torch.float32 # Use torch_dtype instead of dtype + ) + return tok, proc, mdl + + try: + self.tokenizer, self.processor, self.model = _load_components(model_source, local_only) + except Exception as e_local: + if local_only: + # Fallback to Hub once if local fails + self._log(f" ⚠️ Local model load failed: {e_local}", "warning") + try: + if os.environ.get("HF_HUB_OFFLINE") == "1": + del os.environ["HF_HUB_OFFLINE"] + except Exception: + pass + model_source = "kha-white/manga-ocr-base" + local_only = False + self._log(" Retrying from Hugging Face Hub...") + self.tokenizer, self.processor, self.model = _load_components(model_source, local_only) + else: + raise + + # Move to CUDA only after full CPU materialization + target_device = 'cpu' + if use_cuda: + try: + self.model = self.model.to('cuda') + target_device = 'cuda' + except Exception as move_err: + self._log(f" ⚠️ Could not move model to CUDA: {move_err}", "warning") + target_device = 'cpu' + + # Finalize eval mode + self.model.eval() + + # Sanity-check: ensure no parameter remains on 'meta' device + try: + for n, p in self.model.named_parameters(): + dev = getattr(p, 'device', None) + if dev is not None and getattr(dev, 'type', '') == 'meta': + raise RuntimeError(f"Parameter {n} is on 'meta' after load") + except Exception as sanity_err: + self._log(f"❌ Manga-OCR model load sanity check failed: {sanity_err}", "error") + return False + + print(f"SUCCESS: Model loaded on {target_device.upper()}") + self._log(f" ✅ Model loaded on {target_device.upper()}") + self.is_loaded = True + self._log("✅ Manga OCR model ready") + print(">>> Returning True from load_model()") + return True + + except Exception as e: + print(f"\nEXCEPTION in load_model: {e}") + import traceback + print(traceback.format_exc()) + self._log(f"❌ Failed to load manga-ocr model: {str(e)}", "error") + self._log(traceback.format_exc(), "error") + try: + if 'local_only' in locals() and local_only: + self._log("Hint: Local load failed. Ensure your models/manga-ocr-base contains required files (config.json, preprocessor_config.json, tokenizer.json or tokenizer_config.json, and model weights).", "warning") + except Exception: + pass + return False + + def _run_ocr(self, pil_image): + """Run OCR on a PIL image using the HuggingFace model""" + import torch + + # Process image (keyword arg for broader compatibility across transformers versions) + inputs = self.processor(images=pil_image, return_tensors="pt") + pixel_values = inputs["pixel_values"] + + # Move to same device as model + try: + model_device = next(self.model.parameters()).device + except StopIteration: + model_device = torch.device('cpu') + pixel_values = pixel_values.to(model_device) + + # Generate text + with torch.no_grad(): + generated_ids = self.model.generate(pixel_values) + + # Decode + generated_text = self.tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0] + + return generated_text + + def detect_text(self, image: np.ndarray, **kwargs) -> List[OCRResult]: + """ + Process the image region passed to it. + This could be a bubble region or the full image. + """ + results = [] + + # Check for stop at start + if self._check_stop(): + self._log("⏹️ Manga-OCR processing stopped by user", "warning") + return results + + try: + if not self.is_loaded: + if not self.load_model(): + return results + + import cv2 + from PIL import Image + + # Get confidence from kwargs + confidence = kwargs.get('confidence', 0.7) + + # Convert numpy array to PIL + image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + pil_image = Image.fromarray(image_rgb) + h, w = image.shape[:2] + + self._log("🔍 Processing region with manga-ocr...") + + # Check for stop before inference + if self._check_stop(): + self._log("⏹️ Manga-OCR inference stopped by user", "warning") + return results + + # Run OCR on the image region + text = self._run_ocr(pil_image) + + if text and text.strip(): + # Return result for this region with its actual bbox + results.append(OCRResult( + text=text.strip(), + bbox=(0, 0, w, h), # Relative to the region passed in + confidence=confidence, + vertices=[(0, 0), (w, 0), (w, h), (0, h)] + )) + self._log(f"✅ Detected text: {text[:50]}...") + + except Exception as e: + self._log(f"❌ Error in manga-ocr: {str(e)}", "error") + + return results + +class Qwen2VL(OCRProvider): + """OCR using Qwen2-VL - Vision Language Model that can read Korean text""" + + def __init__(self, log_callback=None): + super().__init__(log_callback) + self.processor = None + self.model = None + self.tokenizer = None + + # Get OCR prompt from environment or use default + self.ocr_prompt = os.environ.get('OCR_SYSTEM_PROMPT', + "YOU ARE AN OCR SYSTEM. YOUR ONLY JOB IS TEXT EXTRACTION.\n\n" + "CRITICAL RULES:\n" + "1. DO NOT TRANSLATE ANYTHING\n" + "2. DO NOT MODIFY THE TEXT\n" + "3. DO NOT EXPLAIN OR COMMENT\n" + "4. ONLY OUTPUT THE EXACT TEXT YOU SEE\n" + "5. PRESERVE NATURAL TEXT FLOW - DO NOT ADD UNNECESSARY LINE BREAKS\n\n" + "If you see Korean text, output it in Korean.\n" + "If you see Japanese text, output it in Japanese.\n" + "If you see Chinese text, output it in Chinese.\n" + "If you see English text, output it in English.\n\n" + "IMPORTANT: Only use line breaks where they naturally occur in the original text " + "(e.g., between dialogue lines or paragraphs). Do not break text mid-sentence or " + "between every word/character.\n\n" + "For vertical text common in manga/comics, transcribe it as a continuous line unless " + "there are clear visual breaks.\n\n" + "NEVER translate. ONLY extract exactly what is written.\n" + "Output ONLY the raw text, nothing else." + ) + + def set_ocr_prompt(self, prompt: str): + """Allow setting the OCR prompt dynamically""" + self.ocr_prompt = prompt + + def check_installation(self) -> bool: + """Check if required packages are installed""" + try: + import transformers + import torch + self.is_installed = True + return True + except ImportError: + return False + + def install(self, progress_callback=None) -> bool: + """Install requirements for Qwen2-VL""" + pass + + def load_model(self, model_size=None, **kwargs) -> bool: + """Load Qwen2-VL model with size selection""" + self._log(f"DEBUG: load_model called with model_size={model_size}") + + try: + if not self.is_installed and not self.check_installation(): + self._log("❌ Not installed", "error") + return False + + self._log("🔥 Loading Qwen2-VL for Advanced OCR...") + + + + from transformers import AutoProcessor, AutoTokenizer + import torch + + # Model options + model_options = { + "1": "Qwen/Qwen2-VL-2B-Instruct", + "2": "Qwen/Qwen2-VL-7B-Instruct", + "3": "Qwen/Qwen2-VL-72B-Instruct", + "4": "custom" + } + # CHANGE: Default to 7B instead of 2B + # Check for saved preference first + if model_size is None: + # Try to get from environment or config + import os + model_size = os.environ.get('QWEN2VL_MODEL_SIZE', '1') + + # Determine which model to load + if model_size and str(model_size).startswith("custom:"): + # Custom model passed with ID + model_id = str(model_size).replace("custom:", "") + self.loaded_model_size = "Custom" + self.model_id = model_id + self._log(f"Loading custom model: {model_id}") + elif model_size == "4": + # Custom option selected but no ID - shouldn't happen + self._log("❌ Custom model selected but no ID provided", "error") + return False + elif model_size and str(model_size) in model_options: + # Standard model option + option = model_options[str(model_size)] + if option == "custom": + self._log("❌ Custom model needs an ID", "error") + return False + model_id = option + # Set loaded_model_size for status display + if model_size == "1": + self.loaded_model_size = "2B" + elif model_size == "2": + self.loaded_model_size = "7B" + elif model_size == "3": + self.loaded_model_size = "72B" + else: + # CHANGE: Default to 7B (option "2") instead of 2B + model_id = model_options["1"] # Changed from "1" to "2" + self.loaded_model_size = "2B" # Changed from "2B" to "7B" + self._log("No model size specified, defaulting to 2B") # Changed message + + self._log(f"Loading model: {model_id}") + + # Load processor and tokenizer + self.processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True) + self.tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True) + + # Load the model - let it figure out the class dynamically + if torch.cuda.is_available(): + self._log(f"GPU: {torch.cuda.get_device_name(0)}") + # Use auto model class + from transformers import AutoModelForVision2Seq + self.model = AutoModelForVision2Seq.from_pretrained( + model_id, + dtype=torch.float16, + device_map="auto", + trust_remote_code=True + ) + self._log("✅ Model loaded on GPU") + else: + self._log("Loading on CPU...") + from transformers import AutoModelForVision2Seq + self.model = AutoModelForVision2Seq.from_pretrained( + model_id, + dtype=torch.float32, + trust_remote_code=True + ) + self._log("✅ Model loaded on CPU") + + self.model.eval() + self.is_loaded = True + self._log("✅ Qwen2-VL ready for Advanced OCR!") + return True + + except Exception as e: + self._log(f"❌ Failed to load: {str(e)}", "error") + import traceback + self._log(traceback.format_exc(), "debug") + return False + + def detect_text(self, image: np.ndarray, **kwargs) -> List[OCRResult]: + """Process image with Qwen2-VL for Korean text extraction""" + results = [] + if hasattr(self, 'model_id'): + self._log(f"DEBUG: Using model: {self.model_id}", "debug") + + # Check if OCR prompt was passed in kwargs (for dynamic updates) + if 'ocr_prompt' in kwargs: + self.ocr_prompt = kwargs['ocr_prompt'] + + try: + if not self.is_loaded: + if not self.load_model(): + return results + + import cv2 + from PIL import Image + import torch + + # Convert to PIL + image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + pil_image = Image.fromarray(image_rgb) + h, w = image.shape[:2] + + self._log(f"🔍 Processing with Qwen2-VL ({w}x{h} pixels)...") + + # Use the configurable OCR prompt + messages = [ + { + "role": "user", + "content": [ + { + "type": "image", + "image": pil_image, + }, + { + "type": "text", + "text": self.ocr_prompt # Use the configurable prompt + } + ] + } + ] + + # Alternative simpler prompt if the above still causes issues: + # "text": "OCR: Extract text as-is" + + # Process with Qwen2-VL + text = self.processor.apply_chat_template( + messages, + tokenize=False, + add_generation_prompt=True + ) + + inputs = self.processor( + text=[text], + images=[pil_image], + padding=True, + return_tensors="pt" + ) + + # Get the device and dtype the model is currently on + model_device = next(self.model.parameters()).device + model_dtype = next(self.model.parameters()).dtype + + # Move inputs to the same device as the model and cast float tensors to model dtype + try: + # Move first + inputs = inputs.to(model_device) + # Then align dtypes only for floating tensors (e.g., pixel_values) + for k, v in inputs.items(): + if isinstance(v, torch.Tensor) and torch.is_floating_point(v): + inputs[k] = v.to(model_dtype) + except Exception: + # Fallback: ensure at least pixel_values is correct if present + try: + if isinstance(inputs, dict) and "pixel_values" in inputs: + pv = inputs["pixel_values"].to(model_device) + if torch.is_floating_point(pv): + inputs["pixel_values"] = pv.to(model_dtype) + except Exception: + pass + + # Ensure pixel_values explicitly matches model dtype if present + try: + if isinstance(inputs, dict) and "pixel_values" in inputs: + inputs["pixel_values"] = inputs["pixel_values"].to(device=model_device, dtype=model_dtype) + except Exception: + pass + + # Generate text with stricter parameters to avoid creative responses + use_amp = (hasattr(torch, 'cuda') and model_device.type == 'cuda' and model_dtype in (torch.float16, torch.bfloat16)) + autocast_dev = 'cuda' if model_device.type == 'cuda' else 'cpu' + autocast_dtype = model_dtype if model_dtype in (torch.float16, torch.bfloat16) else None + + with torch.no_grad(): + if use_amp and autocast_dtype is not None: + with torch.autocast(autocast_dev, dtype=autocast_dtype): + generated_ids = self.model.generate( + **inputs, + max_new_tokens=128, # Reduced from 512 - manga bubbles are typically short + do_sample=False, # Keep deterministic + temperature=0.01, # Keep your very low temperature + top_p=1.0, # Keep no nucleus sampling + repetition_penalty=1.0, # Keep no repetition penalty + num_beams=1, # Ensure greedy decoding (faster than beam search) + use_cache=True, # Enable KV cache for speed + early_stopping=True, # Stop at EOS token + pad_token_id=self.tokenizer.pad_token_id, # Proper padding + eos_token_id=self.tokenizer.eos_token_id, # Proper stopping + ) + else: + generated_ids = self.model.generate( + **inputs, + max_new_tokens=128, # Reduced from 512 - manga bubbles are typically short + do_sample=False, # Keep deterministic + temperature=0.01, # Keep your very low temperature + top_p=1.0, # Keep no nucleus sampling + repetition_penalty=1.0, # Keep no repetition penalty + num_beams=1, # Ensure greedy decoding (faster than beam search) + use_cache=True, # Enable KV cache for speed + early_stopping=True, # Stop at EOS token + pad_token_id=self.tokenizer.pad_token_id, # Proper padding + eos_token_id=self.tokenizer.eos_token_id, # Proper stopping + ) + + # Decode the output + generated_ids_trimmed = [ + out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids) + ] + output_text = self.processor.batch_decode( + generated_ids_trimmed, + skip_special_tokens=True, + clean_up_tokenization_spaces=False + )[0] + + if output_text and output_text.strip(): + text = output_text.strip() + + # ADDED: Filter out any response that looks like an explanation or apology + # Common patterns that indicate the model is being "helpful" instead of just extracting + unwanted_patterns = [ + "죄송합니다", # "I apologize" + "sorry", + "apologize", + "이미지에는", # "in this image" + "텍스트가 없습니다", # "there is no text" + "I cannot", + "I don't see", + "There is no", + "질문이 있으시면", # "if you have questions" + ] + + # Check if response contains unwanted patterns + text_lower = text.lower() + is_explanation = any(pattern.lower() in text_lower for pattern in unwanted_patterns) + + # Also check if the response is suspiciously long for a bubble + # Most manga bubbles are short, if we get 50+ chars it might be an explanation + is_too_long = len(text) > 100 and ('.' in text or ',' in text or '!' in text) + + if is_explanation or is_too_long: + self._log(f"⚠️ Model returned explanation instead of text, ignoring", "warning") + # Return empty result or just skip this region + return results + + # Check language + has_korean = any('\uAC00' <= c <= '\uD7AF' for c in text) + has_japanese = any('\u3040' <= c <= '\u309F' or '\u30A0' <= c <= '\u30FF' for c in text) + has_chinese = any('\u4E00' <= c <= '\u9FFF' for c in text) + + if has_korean: + self._log(f"✅ Korean detected: {text[:50]}...") + elif has_japanese: + self._log(f"✅ Japanese detected: {text[:50]}...") + elif has_chinese: + self._log(f"✅ Chinese detected: {text[:50]}...") + else: + self._log(f"✅ Text: {text[:50]}...") + + results.append(OCRResult( + text=text, + bbox=(0, 0, w, h), + confidence=0.9, + vertices=[(0, 0), (w, 0), (w, h), (0, h)] + )) + else: + self._log("⚠️ No text detected", "warning") + + except Exception as e: + self._log(f"❌ Error: {str(e)}", "error") + import traceback + self._log(traceback.format_exc(), "debug") + + return results + +class EasyOCRProvider(OCRProvider): + """EasyOCR provider for multiple languages""" + + def __init__(self, log_callback=None, languages=None): + super().__init__(log_callback) + # Default to safe language combination + self.languages = languages or ['ja', 'en'] # Safe default + self._validate_language_combination() + + def _validate_language_combination(self): + """Validate and fix EasyOCR language combinations""" + # EasyOCR language compatibility rules + incompatible_pairs = [ + (['ja', 'ko'], 'Japanese and Korean cannot be used together'), + (['ja', 'zh'], 'Japanese and Chinese cannot be used together'), + (['ko', 'zh'], 'Korean and Chinese cannot be used together') + ] + + for incompatible, reason in incompatible_pairs: + if all(lang in self.languages for lang in incompatible): + self._log(f"⚠️ EasyOCR: {reason}", "warning") + # Keep first language + English + self.languages = [self.languages[0], 'en'] + self._log(f"🔧 Auto-adjusted to: {self.languages}", "info") + break + + def check_installation(self) -> bool: + """Check if easyocr is installed""" + try: + import easyocr + self.is_installed = True + return True + except ImportError: + return False + + def install(self, progress_callback=None) -> bool: + """Install easyocr""" + pass + + def load_model(self, **kwargs) -> bool: + """Load easyocr model""" + try: + if not self.is_installed and not self.check_installation(): + self._log("❌ easyocr not installed", "error") + return False + + self._log(f"🔥 Loading easyocr model for languages: {self.languages}...") + import easyocr + + # This will download models on first run + self.model = easyocr.Reader(self.languages, gpu=True) + self.is_loaded = True + + self._log("✅ easyocr model loaded successfully") + return True + + except Exception as e: + self._log(f"❌ Failed to load easyocr: {str(e)}", "error") + # Try CPU mode if GPU fails + try: + import easyocr + self.model = easyocr.Reader(self.languages, gpu=False) + self.is_loaded = True + self._log("✅ easyocr loaded in CPU mode") + return True + except: + return False + + def detect_text(self, image: np.ndarray, **kwargs) -> List[OCRResult]: + """Detect text using easyocr""" + results = [] + + try: + if not self.is_loaded: + if not self.load_model(): + return results + + # EasyOCR can work directly with numpy arrays + ocr_results = self.model.readtext(image, detail=1) + + # Parse results + for (bbox, text, confidence) in ocr_results: + # bbox is a list of 4 points + xs = [point[0] for point in bbox] + ys = [point[1] for point in bbox] + x_min, x_max = min(xs), max(xs) + y_min, y_max = min(ys), max(ys) + + results.append(OCRResult( + text=text, + bbox=(int(x_min), int(y_min), int(x_max - x_min), int(y_max - y_min)), + confidence=confidence, + vertices=[(int(p[0]), int(p[1])) for p in bbox] + )) + + self._log(f"✅ Detected {len(results)} text regions") + + except Exception as e: + self._log(f"❌ Error in easyocr detection: {str(e)}", "error") + + return results + + +class PaddleOCRProvider(OCRProvider): + """PaddleOCR provider with memory safety measures""" + + def check_installation(self) -> bool: + """Check if paddleocr is installed""" + try: + from paddleocr import PaddleOCR + self.is_installed = True + return True + except ImportError: + return False + + def install(self, progress_callback=None) -> bool: + """Install paddleocr""" + pass + + def load_model(self, **kwargs) -> bool: + """Load paddleocr model with memory-safe configurations""" + try: + if not self.is_installed and not self.check_installation(): + self._log("❌ paddleocr not installed", "error") + return False + + self._log("🔥 Loading PaddleOCR model...") + + # Set memory-safe environment variables BEFORE importing + import os + os.environ['OMP_NUM_THREADS'] = '1' # Prevent OpenMP conflicts + os.environ['MKL_NUM_THREADS'] = '1' # Prevent MKL conflicts + os.environ['OPENBLAS_NUM_THREADS'] = '1' # Prevent OpenBLAS conflicts + os.environ['FLAGS_use_mkldnn'] = '0' # Disable MKL-DNN + + from paddleocr import PaddleOCR + + # Try memory-safe configurations + configs_to_try = [ + # Config 1: Most memory-safe configuration + { + 'use_angle_cls': False, # Disable angle to save memory + 'lang': 'ch', + 'rec_batch_num': 1, # Process one at a time + 'max_text_length': 100, # Limit text length + 'drop_score': 0.5, # Higher threshold to reduce detections + 'cpu_threads': 1, # Single thread to avoid conflicts + }, + # Config 2: Minimal memory footprint + { + 'lang': 'ch', + 'rec_batch_num': 1, + 'cpu_threads': 1, + }, + # Config 3: Absolute minimal + { + 'lang': 'ch' + }, + # Config 4: Empty config + {} + ] + + for i, config in enumerate(configs_to_try): + try: + self._log(f" Trying configuration {i+1}/{len(configs_to_try)}: {config}") + + # Force garbage collection before loading + import gc + gc.collect() + + self.model = PaddleOCR(**config) + self.is_loaded = True + self.current_config = config + self._log(f"✅ PaddleOCR loaded successfully with config: {config}") + return True + except Exception as e: + error_str = str(e) + self._log(f" Config {i+1} failed: {error_str}", "debug") + + # Clean up on failure + if hasattr(self, 'model'): + del self.model + gc.collect() + continue + + self._log(f"❌ PaddleOCR failed to load with any configuration", "error") + return False + + except Exception as e: + self._log(f"❌ Failed to load paddleocr: {str(e)}", "error") + import traceback + self._log(traceback.format_exc(), "debug") + return False + + def detect_text(self, image: np.ndarray, **kwargs) -> List[OCRResult]: + """Detect text with memory safety measures""" + results = [] + + try: + if not self.is_loaded: + if not self.load_model(): + return results + + import cv2 + import numpy as np + import gc + + # Memory safety: Ensure image isn't too large + h, w = image.shape[:2] if len(image.shape) >= 2 else (0, 0) + + # Limit image size to prevent memory issues + MAX_DIMENSION = 1500 + if h > MAX_DIMENSION or w > MAX_DIMENSION: + scale = min(MAX_DIMENSION/h, MAX_DIMENSION/w) + new_h, new_w = int(h*scale), int(w*scale) + self._log(f"⚠️ Resizing large image from {w}x{h} to {new_w}x{new_h} for memory safety", "warning") + image = cv2.resize(image, (new_w, new_h), interpolation=cv2.INTER_AREA) + scale_factor = 1/scale + else: + scale_factor = 1.0 + + # Ensure correct format + if len(image.shape) == 2: # Grayscale + image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR) + elif len(image.shape) == 4: # Batch + image = image[0] + + # Ensure uint8 type + if image.dtype != np.uint8: + if image.max() <= 1.0: + image = (image * 255).astype(np.uint8) + else: + image = image.astype(np.uint8) + + # Make a copy to avoid memory corruption + image_copy = image.copy() + + # Force garbage collection before OCR + gc.collect() + + # Process with timeout protection + import signal + import threading + + ocr_results = None + ocr_error = None + + def run_ocr(): + nonlocal ocr_results, ocr_error + try: + ocr_results = self.model.ocr(image_copy) + except Exception as e: + ocr_error = e + + # Run OCR in a separate thread with timeout + ocr_thread = threading.Thread(target=run_ocr) + ocr_thread.daemon = True + ocr_thread.start() + ocr_thread.join(timeout=30) # 30 second timeout + + if ocr_thread.is_alive(): + self._log("❌ PaddleOCR timeout - taking too long", "error") + return results + + if ocr_error: + raise ocr_error + + # Parse results + results = self._parse_ocr_results(ocr_results) + + # Scale coordinates back if image was resized + if scale_factor != 1.0 and results: + for r in results: + x, y, width, height = r.bbox + r.bbox = (int(x*scale_factor), int(y*scale_factor), + int(width*scale_factor), int(height*scale_factor)) + r.vertices = [(int(v[0]*scale_factor), int(v[1]*scale_factor)) + for v in r.vertices] + + if results: + self._log(f"✅ Detected {len(results)} text regions", "info") + else: + self._log("No text regions found", "debug") + + # Clean up + del image_copy + gc.collect() + + except Exception as e: + error_msg = str(e) if str(e) else type(e).__name__ + + if "memory" in error_msg.lower() or "0x" in error_msg: + self._log("❌ Memory access violation in PaddleOCR", "error") + self._log(" This is a known Windows issue with PaddleOCR", "info") + self._log(" Please switch to EasyOCR or manga-ocr instead", "warning") + elif "trace_order.size()" in error_msg: + self._log("❌ PaddleOCR internal error", "error") + self._log(" Please switch to EasyOCR or manga-ocr", "warning") + else: + self._log(f"❌ Error in paddleocr detection: {error_msg}", "error") + + import traceback + self._log(traceback.format_exc(), "debug") + + return results + + def _parse_ocr_results(self, ocr_results) -> List[OCRResult]: + """Parse OCR results safely""" + results = [] + + if isinstance(ocr_results, bool) and ocr_results == False: + return results + + if ocr_results is None or not isinstance(ocr_results, list): + return results + + if len(ocr_results) == 0: + return results + + # Handle batch format + if isinstance(ocr_results[0], list) and len(ocr_results[0]) > 0: + first_item = ocr_results[0][0] + if isinstance(first_item, list) and len(first_item) > 0: + if isinstance(first_item[0], (list, tuple)) and len(first_item[0]) == 2: + ocr_results = ocr_results[0] + + # Parse detections + for detection in ocr_results: + if not detection or isinstance(detection, bool): + continue + + if not isinstance(detection, (list, tuple)) or len(detection) < 2: + continue + + try: + bbox_points = detection[0] + text_data = detection[1] + + if not isinstance(bbox_points, (list, tuple)) or len(bbox_points) != 4: + continue + + if not isinstance(text_data, (tuple, list)) or len(text_data) < 2: + continue + + text = str(text_data[0]).strip() + confidence = float(text_data[1]) + + if not text or confidence < 0.3: + continue + + xs = [float(p[0]) for p in bbox_points] + ys = [float(p[1]) for p in bbox_points] + x_min, x_max = min(xs), max(xs) + y_min, y_max = min(ys), max(ys) + + if (x_max - x_min) < 5 or (y_max - y_min) < 5: + continue + + results.append(OCRResult( + text=text, + bbox=(int(x_min), int(y_min), int(x_max - x_min), int(y_max - y_min)), + confidence=confidence, + vertices=[(int(p[0]), int(p[1])) for p in bbox_points] + )) + + except Exception: + continue + + return results + +class DocTROCRProvider(OCRProvider): + """DocTR OCR provider""" + + def check_installation(self) -> bool: + """Check if doctr is installed""" + try: + from doctr.models import ocr_predictor + self.is_installed = True + return True + except ImportError: + return False + + def install(self, progress_callback=None) -> bool: + """Install doctr""" + pass + + def load_model(self, **kwargs) -> bool: + """Load doctr model""" + try: + if not self.is_installed and not self.check_installation(): + self._log("❌ doctr not installed", "error") + return False + + self._log("🔥 Loading DocTR model...") + from doctr.models import ocr_predictor + + # Load pretrained model + self.model = ocr_predictor(pretrained=True) + self.is_loaded = True + + self._log("✅ DocTR model loaded successfully") + return True + + except Exception as e: + self._log(f"❌ Failed to load doctr: {str(e)}", "error") + return False + + def detect_text(self, image: np.ndarray, **kwargs) -> List[OCRResult]: + """Detect text using doctr""" + results = [] + + try: + if not self.is_loaded: + if not self.load_model(): + return results + + from doctr.io import DocumentFile + + # DocTR expects document format + # Convert numpy array to PIL and save temporarily + import tempfile + import cv2 + + with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp: + cv2.imwrite(tmp.name, image) + doc = DocumentFile.from_images(tmp.name) + + # Run OCR + result = self.model(doc) + + # Parse results + h, w = image.shape[:2] + for page in result.pages: + for block in page.blocks: + for line in block.lines: + for word in line.words: + # Handle different geometry formats + geometry = word.geometry + + if len(geometry) == 4: + # Standard format: (x1, y1, x2, y2) + x1, y1, x2, y2 = geometry + elif len(geometry) == 2: + # Alternative format: ((x1, y1), (x2, y2)) + (x1, y1), (x2, y2) = geometry + else: + self._log(f"Unexpected geometry format: {geometry}", "warning") + continue + + # Convert relative coordinates to absolute + x1, x2 = int(x1 * w), int(x2 * w) + y1, y2 = int(y1 * h), int(y2 * h) + + results.append(OCRResult( + text=word.value, + bbox=(x1, y1, x2 - x1, y2 - y1), + confidence=word.confidence, + vertices=[(x1, y1), (x2, y1), (x2, y2), (x1, y2)] + )) + + # Clean up temp file + try: + os.unlink(tmp.name) + except: + pass + + self._log(f"DocTR detected {len(results)} text regions") + + except Exception as e: + self._log(f"Error in doctr detection: {str(e)}", "error") + import traceback + self._log(traceback.format_exc(), "error") + + return results + + +class RapidOCRProvider(OCRProvider): + """RapidOCR provider for fast local OCR""" + + def check_installation(self) -> bool: + """Check if rapidocr is installed""" + try: + import rapidocr_onnxruntime + self.is_installed = True + return True + except ImportError: + return False + + def install(self, progress_callback=None) -> bool: + """Install rapidocr (requires manual pip install)""" + # RapidOCR requires manual installation + if progress_callback: + progress_callback("RapidOCR requires manual pip installation") + self._log("Run: pip install rapidocr-onnxruntime", "info") + return False # Always return False since we can't auto-install + + def load_model(self, **kwargs) -> bool: + """Load RapidOCR model""" + try: + if not self.is_installed and not self.check_installation(): + self._log("RapidOCR not installed", "error") + return False + + self._log("Loading RapidOCR...") + from rapidocr_onnxruntime import RapidOCR + + self.model = RapidOCR() + self.is_loaded = True + + self._log("RapidOCR model loaded successfully") + return True + + except Exception as e: + self._log(f"Failed to load RapidOCR: {str(e)}", "error") + return False + + def detect_text(self, image: np.ndarray, **kwargs) -> List[OCRResult]: + """Detect text using RapidOCR""" + if not self.is_loaded: + self._log("RapidOCR model not loaded", "error") + return [] + + results = [] + + try: + # Convert numpy array to PIL Image for RapidOCR + if len(image.shape) == 3: + # BGR to RGB + image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + else: + image_rgb = image + + # RapidOCR expects PIL Image or numpy array + ocr_results, _ = self.model(image_rgb) + + if ocr_results: + for result in ocr_results: + # RapidOCR returns [bbox, text, confidence] + bbox_points = result[0] # 4 corner points + text = result[1] + confidence = float(result[2]) + + if not text or not text.strip(): + continue + + # Convert 4-point bbox to x,y,w,h format + xs = [point[0] for point in bbox_points] + ys = [point[1] for point in bbox_points] + x_min, x_max = min(xs), max(xs) + y_min, y_max = min(ys), max(ys) + + results.append(OCRResult( + text=text.strip(), + bbox=(int(x_min), int(y_min), int(x_max - x_min), int(y_max - y_min)), + confidence=confidence, + vertices=[(int(p[0]), int(p[1])) for p in bbox_points] + )) + + self._log(f"Detected {len(results)} text regions") + + except Exception as e: + self._log(f"Error in RapidOCR detection: {str(e)}", "error") + + return results + +class OCRManager: + """Manager for multiple OCR providers""" + + def __init__(self, log_callback=None): + self.log_callback = log_callback + self.providers = { + 'custom-api': CustomAPIProvider(log_callback) , + 'manga-ocr': MangaOCRProvider(log_callback), + 'easyocr': EasyOCRProvider(log_callback), + 'paddleocr': PaddleOCRProvider(log_callback), + 'doctr': DocTROCRProvider(log_callback), + 'rapidocr': RapidOCRProvider(log_callback), + 'Qwen2-VL': Qwen2VL(log_callback) + } + self.current_provider = None + self.stop_flag = None + + def get_provider(self, name: str) -> Optional[OCRProvider]: + """Get OCR provider by name""" + return self.providers.get(name) + + def set_current_provider(self, name: str): + """Set current active provider""" + if name in self.providers: + self.current_provider = name + return True + return False + + def check_provider_status(self, name: str) -> Dict[str, bool]: + """Check installation and loading status of provider""" + provider = self.providers.get(name) + if not provider: + return {'installed': False, 'loaded': False} + + result = { + 'installed': provider.check_installation(), + 'loaded': provider.is_loaded + } + if self.log_callback: + self.log_callback(f"DEBUG: check_provider_status({name}) returning loaded={result['loaded']}", "debug") + return result + + def install_provider(self, name: str, progress_callback=None) -> bool: + """Install a provider""" + provider = self.providers.get(name) + if not provider: + return False + + return provider.install(progress_callback) + + def load_provider(self, name: str, **kwargs) -> bool: + """Load a provider's model with optional parameters""" + provider = self.providers.get(name) + if not provider: + return False + + return provider.load_model(**kwargs) # <-- Passes model_size and any other kwargs + + def shutdown(self): + """Release models/processors/tokenizers for all providers and clear caches.""" + try: + import gc + for name, provider in list(self.providers.items()): + try: + if hasattr(provider, 'model'): + provider.model = None + if hasattr(provider, 'processor'): + provider.processor = None + if hasattr(provider, 'tokenizer'): + provider.tokenizer = None + if hasattr(provider, 'reader'): + provider.reader = None + if hasattr(provider, 'is_loaded'): + provider.is_loaded = False + except Exception: + pass + gc.collect() + try: + import torch + torch.cuda.empty_cache() + except Exception: + pass + except Exception: + pass + + def detect_text(self, image: np.ndarray, provider_name: str = None, **kwargs) -> List[OCRResult]: + """Detect text using specified or current provider""" + provider_name = provider_name or self.current_provider + if not provider_name: + return [] + + provider = self.providers.get(provider_name) + if not provider: + return [] + + return provider.detect_text(image, **kwargs) + + def set_stop_flag(self, stop_flag): + """Set stop flag for all providers""" + self.stop_flag = stop_flag + for provider in self.providers.values(): + if hasattr(provider, 'set_stop_flag'): + provider.set_stop_flag(stop_flag) + + def reset_stop_flags(self): + """Reset stop flags for all providers""" + for provider in self.providers.values(): + if hasattr(provider, 'reset_stop_flags'): + provider.reset_stop_flags()