diff --git a/app.py b/app.py
new file mode 100644
index 0000000000000000000000000000000000000000..3ff547f09a9df631f66b9f44e2cafcb8ca124a2b
--- /dev/null
+++ b/app.py
@@ -0,0 +1,5320 @@
+#!/usr/bin/env python3
+"""
+Glossarion Web - Gradio Web Interface
+AI-powered translation in your browser
+"""
+
+import gradio as gr
+import os
+import sys
+import json
+import tempfile
+import base64
+from pathlib import Path
+
+# Import API key encryption/decryption
+try:
+ from api_key_encryption import APIKeyEncryption
+ API_KEY_ENCRYPTION_AVAILABLE = True
+ # Create web-specific encryption handler with its own key file
+ _web_encryption_handler = None
+ def get_web_encryption_handler():
+ global _web_encryption_handler
+ if _web_encryption_handler is None:
+ _web_encryption_handler = APIKeyEncryption()
+ # Use web-specific key file
+ from pathlib import Path
+ _web_encryption_handler.key_file = Path('.glossarion_web_key')
+ _web_encryption_handler.cipher = _web_encryption_handler._get_or_create_cipher()
+ # Add web-specific fields to encrypt
+ _web_encryption_handler.api_key_fields.extend([
+ 'azure_vision_key',
+ 'google_vision_credentials'
+ ])
+ return _web_encryption_handler
+
+ def decrypt_config(config):
+ return get_web_encryption_handler().decrypt_config(config)
+
+ def encrypt_config(config):
+ return get_web_encryption_handler().encrypt_config(config)
+except ImportError:
+ API_KEY_ENCRYPTION_AVAILABLE = False
+ def decrypt_config(config):
+ return config # Fallback: return config as-is
+ def encrypt_config(config):
+ return config # Fallback: return config as-is
+
+# Import your existing translation modules
+try:
+ import TransateKRtoEN
+ from model_options import get_model_options
+ TRANSLATION_AVAILABLE = True
+except ImportError:
+ TRANSLATION_AVAILABLE = False
+ print("⚠️ Translation modules not found")
+
+# Import manga translation modules
+try:
+ from manga_translator import MangaTranslator
+ from unified_api_client import UnifiedClient
+ MANGA_TRANSLATION_AVAILABLE = True
+ print("✅ Manga translation modules loaded successfully")
+except ImportError as e:
+ MANGA_TRANSLATION_AVAILABLE = False
+ print(f"⚠️ Manga translation modules not found: {e}")
+ print(f"⚠️ Current working directory: {os.getcwd()}")
+ print(f"⚠️ Python path: {sys.path[:3]}...")
+
+ # Check if files exist
+ files_to_check = ['manga_translator.py', 'unified_api_client.py', 'bubble_detector.py', 'local_inpainter.py']
+ for file in files_to_check:
+ if os.path.exists(file):
+ print(f"✅ Found: {file}")
+ else:
+ print(f"❌ Missing: {file}")
+
+
+class GlossarionWeb:
+ """Web interface for Glossarion translator"""
+
+ def __init__(self):
+ # Determine config file path based on environment
+ is_hf_spaces = os.getenv('SPACE_ID') is not None or os.getenv('HF_SPACES') == 'true'
+
+ if is_hf_spaces:
+ # Use /data directory for Hugging Face Spaces persistent storage
+ data_dir = '/data'
+ if not os.path.exists(data_dir):
+ # Fallback to current directory if /data doesn't exist
+ data_dir = '.'
+ self.config_file = os.path.join(data_dir, 'config_web.json')
+ print(f"🤗 HF Spaces detected - using config path: {self.config_file}")
+ print(f"📁 Directory exists: {os.path.exists(os.path.dirname(self.config_file))}")
+ else:
+ # Local mode - use current directory
+ self.config_file = "config_web.json"
+ print(f"🏠 Local mode - using config path: {self.config_file}")
+
+ # Load raw config first
+ self.config = self.load_config()
+
+ # Create a decrypted version for display/use in the UI
+ # but keep the original for saving
+ self.decrypted_config = self.config.copy()
+ if API_KEY_ENCRYPTION_AVAILABLE:
+ self.decrypted_config = decrypt_config(self.decrypted_config)
+
+ self.models = get_model_options() if TRANSLATION_AVAILABLE else ["gpt-4", "claude-3-5-sonnet"]
+ print(f"🤖 Loaded {len(self.models)} models: {self.models[:5]}{'...' if len(self.models) > 5 else ''}")
+
+ # Translation state management
+ import threading
+ self.is_translating = False
+ self.stop_flag = threading.Event()
+ self.translation_thread = None
+ self.current_unified_client = None # Track active client to allow cancellation
+ self.current_translator = None # Track active translator to allow shutdown
+
+ # Add stop flags for different translation types
+ self.epub_translation_stop = False
+ self.epub_translation_thread = None
+ self.glossary_extraction_stop = False
+ self.glossary_extraction_thread = None
+
+ # Default prompts from the GUI (same as translator_gui.py)
+ self.default_prompts = {
+ "korean": (
+ "You are a professional Korean to English novel translator, you must strictly output only English text and HTML tags while following these rules:\n"
+ "- Use a natural, comedy-friendly English translation style that captures both humor and readability without losing any original meaning.\n"
+ "- Include 100% of the source text - every word, phrase, and sentence must be fully translated without exception.\n"
+ "- Retain Korean honorifics and respectful speech markers in romanized form, including but not limited to: -nim, -ssi, -yang, -gun, -isiyeo, -hasoseo. For archaic/classical Korean honorific forms (like 이시여/isiyeo, 하소서/hasoseo), preserve them as-is rather than converting to modern equivalents.\n"
+ "- Always localize Korean terminology to proper English equivalents instead of literal translations (examples: 마왕 = Demon King; 마술 = magic).\n"
+ "- When translating Korean's pronoun-dropping style, insert pronouns in English only where needed for clarity: prioritize original pronouns as implied or according to the glossary, and only use they/them as a last resort, use I/me for first-person narration, and maintain natural English flow without overusing pronouns just because they're omitted in Korean.\n"
+ "- All Korean profanity must be translated to English profanity.\n"
+ "- Preserve original intent, and speech tone.\n"
+ "- Retain onomatopoeia in Romaji.\n"
+ "- Keep original Korean quotation marks (" ", ' ', 「」, 『』) as-is without converting to English quotes.\n"
+ "- Every Korean/Chinese/Japanese character must be converted to its English meaning. Examples: The character 생 means 'life/living', 활 means 'active', 관 means 'hall/building' - together 생활관 means Dormitory.\n"
+ "- Preserve ALL HTML tags exactly as they appear in the source, including
, ,
,
,
, ,
, etc.\n"
+ ),
+ "japanese": (
+ "You are a professional Japanese to English novel translator, you must strictly output only English text and HTML tags while following these rules:\n"
+ "- Use a natural, comedy-friendly English translation style that captures both humor and readability without losing any original meaning.\n"
+ "- Include 100% of the source text - every word, phrase, and sentence must be fully translated without exception.\n"
+ "- Retain Japanese honorifics and respectful speech markers in romanized form, including but not limited to: -san, -sama, -chan, -kun, -dono, -sensei, -senpai, -kouhai. For archaic/classical Japanese honorific forms, preserve them as-is rather than converting to modern equivalents.\n"
+ "- Always localize Japanese terminology to proper English equivalents instead of literal translations (examples: 魔王 = Demon King; 魔術 = magic).\n"
+ "- When translating Japanese's pronoun-dropping style, insert pronouns in English only where needed for clarity: prioritize original pronouns as implied or according to the glossary, and only use they/them as a last resort, use I/me for first-person narration while reflecting the Japanese pronoun's nuance (私/僕/俺/etc.) through speech patterns rather than the pronoun itself, and maintain natural English flow without overusing pronouns just because they're omitted in Japanese.\n"
+ "- All Japanese profanity must be translated to English profanity.\n"
+ "- Preserve original intent, and speech tone.\n"
+ "- Retain onomatopoeia in Romaji.\n"
+ "- Keep original Japanese quotation marks (「」, 『』) as-is without converting to English quotes.\n"
+ "- Every Korean/Chinese/Japanese character must be converted to its English meaning. Examples: The character 生 means 'life/living', 活 means 'active', 館 means 'hall/building' - together 生活館 means Dormitory.\n"
+ "- Preserve ALL HTML tags exactly as they appear in the source, including , ,
,
,
, ,
, etc.\n"
+ ),
+ "chinese": (
+ "You are a professional Chinese to English novel translator, you must strictly output only English text and HTML tags while following these rules:\n"
+ "- Use a natural, comedy-friendly English translation style that captures both humor and readability without losing any original meaning.\n"
+ "- Include 100% of the source text - every word, phrase, and sentence must be fully translated without exception.\n"
+ "- Always localize Chinese terminology to proper English equivalents instead of literal translations (examples: 魔王 = Demon King; 魔法 = magic).\n"
+ "- When translating Chinese's pronoun-dropping style, insert pronouns in English only where needed for clarity while maintaining natural English flow.\n"
+ "- All Chinese profanity must be translated to English profanity.\n"
+ "- Preserve original intent, and speech tone.\n"
+ "- Retain onomatopoeia in Pinyin.\n"
+ "- Keep original Chinese quotation marks (「」, 『』) as-is without converting to English quotes.\n"
+ "- Every Korean/Chinese/Japanese character must be converted to its English meaning. Examples: The character 生 means 'life/living', 活 means 'active', 館 means 'hall/building' - together 生活館 means Dormitory.\n"
+ "- Preserve ALL HTML tags exactly as they appear in the source, including , ,
,
,
, ,
, etc.\n"
+ ),
+ "Manga_JP": (
+ "You are a professional Japanese to English Manga translator.\n"
+ "You have both the image of the Manga panel and the extracted text to work with.\n"
+ "Output only English text while following these rules: \n\n"
+
+ "VISUAL CONTEXT:\n"
+ "- Analyze the character's facial expressions and body language in the image.\n"
+ "- Consider the scene's mood and atmosphere.\n"
+ "- Note any action or movement depicted.\n"
+ "- Use visual cues to determine the appropriate tone and emotion.\n"
+ "- USE THE IMAGE to inform your translation choices. The image is not decorative - it contains essential context for accurate translation.\n\n"
+
+ "DIALOGUE REQUIREMENTS:\n"
+ "- Match the translation tone to the character's expression.\n"
+ "- If a character looks angry, use appropriately intense language.\n"
+ "- If a character looks shy or embarrassed, reflect that in the translation.\n"
+ "- Keep speech patterns consistent with the character's appearance and demeanor.\n"
+ "- Retain honorifics and onomatopoeia in Romaji.\n\n"
+
+ "IMPORTANT: Use both the visual context and text to create the most accurate and natural-sounding translation.\n"
+ ),
+ "Manga_KR": (
+ "You are a professional Korean to English Manhwa translator.\n"
+ "You have both the image of the Manhwa panel and the extracted text to work with.\n"
+ "Output only English text while following these rules: \n\n"
+
+ "VISUAL CONTEXT:\n"
+ "- Analyze the character's facial expressions and body language in the image.\n"
+ "- Consider the scene's mood and atmosphere.\n"
+ "- Note any action or movement depicted.\n"
+ "- Use visual cues to determine the appropriate tone and emotion.\n"
+ "- USE THE IMAGE to inform your translation choices. The image is not decorative - it contains essential context for accurate translation.\n\n"
+
+ "DIALOGUE REQUIREMENTS:\n"
+ "- Match the translation tone to the character's expression.\n"
+ "- If a character looks angry, use appropriately intense language.\n"
+ "- If a character looks shy or embarrassed, reflect that in the translation.\n"
+ "- Keep speech patterns consistent with the character's appearance and demeanor.\n"
+ "- Retain honorifics and onomatopoeia in Romaji.\n\n"
+
+ "IMPORTANT: Use both the visual context and text to create the most accurate and natural-sounding translation.\n"
+ ),
+ "Manga_CN": (
+ "You are a professional Chinese to English Manga translator.\n"
+ "You have both the image of the Manga panel and the extracted text to work with.\n"
+ "Output only English text while following these rules: \n\n"
+
+ "VISUAL CONTEXT:\n"
+ "- Analyze the character's facial expressions and body language in the image.\n"
+ "- Consider the scene's mood and atmosphere.\n"
+ "- Note any action or movement depicted.\n"
+ "- Use visual cues to determine the appropriate tone and emotion.\n"
+ "- USE THE IMAGE to inform your translation choices. The image is not decorative - it contains essential context for accurate translation.\n"
+
+ "DIALOGUE REQUIREMENTS:\n"
+ "- Match the translation tone to the character's expression.\n"
+ "- If a character looks angry, use appropriately intense language.\n"
+ "- If a character looks shy or embarrassed, reflect that in the translation.\n"
+ "- Keep speech patterns consistent with the character's appearance and demeanor.\n"
+ "- Retain honorifics and onomatopoeia in Romaji.\n\n"
+
+ "IMPORTANT: Use both the visual context and text to create the most accurate and natural-sounding translation.\n"
+ ),
+ "Original": "Return everything exactly as seen on the source."
+ }
+
+ # Load profiles from config and merge with defaults
+ # Always include default prompts, then overlay any custom ones from config
+ self.profiles = self.default_prompts.copy()
+ config_profiles = self.config.get('prompt_profiles', {})
+ if config_profiles:
+ self.profiles.update(config_profiles)
+
+ def get_config_value(self, key, default=None):
+ """Get value from decrypted config with fallback"""
+ return self.decrypted_config.get(key, default)
+
+ def get_current_config_for_update(self):
+ """Get the current config for updating (uses in-memory version)"""
+ # Return a copy of the in-memory config, not loaded from file
+ return self.config.copy()
+
+ def get_default_config(self):
+ """Get default configuration for Hugging Face Spaces"""
+ return {
+ 'model': 'gpt-4-turbo',
+ 'api_key': '',
+ 'ocr_provider': 'custom-api',
+ 'bubble_detection_enabled': True,
+ 'inpainting_enabled': True,
+ 'manga_font_size_mode': 'auto',
+ 'manga_font_size': 24,
+ 'manga_font_size_multiplier': 1.0,
+ 'manga_max_font_size': 48,
+ 'manga_text_color': [255, 255, 255], # White text (like manga integration)
+ 'manga_shadow_enabled': True,
+ 'manga_shadow_color': [0, 0, 0], # Black shadow (like manga integration)
+ 'manga_shadow_offset_x': 2, # Match manga integration
+ 'manga_shadow_offset_y': 2, # Match manga integration
+ 'manga_shadow_blur': 0, # Match manga integration (no blur)
+ 'manga_bg_opacity': 180,
+ 'manga_bg_style': 'circle',
+ 'manga_settings': {
+ 'ocr': {
+ 'detector_type': 'rtdetr_onnx',
+ 'rtdetr_confidence': 0.3,
+ 'bubble_confidence': 0.3,
+ 'detect_text_bubbles': True,
+ 'detect_empty_bubbles': True,
+ 'detect_free_text': True,
+ 'bubble_max_detections_yolo': 100
+ },
+ 'inpainting': {
+ 'local_method': 'anime',
+ 'method': 'local',
+ 'batch_size': 10,
+ 'enable_cache': True
+ },
+ 'advanced': {
+ 'parallel_processing': True,
+ 'max_workers': 2,
+ 'parallel_panel_translation': False,
+ 'panel_max_workers': 7,
+ 'format_detection': True,
+ 'webtoon_mode': 'auto',
+ 'torch_precision': 'fp16',
+ 'auto_cleanup_models': False,
+ 'debug_mode': False,
+ 'save_intermediate': False
+ },
+ 'rendering': {
+ 'auto_min_size': 12,
+ 'auto_max_size': 48,
+ 'auto_fit_style': 'balanced'
+ },
+ 'font_sizing': {
+ 'algorithm': 'smart',
+ 'prefer_larger': True,
+ 'max_lines': 10,
+ 'line_spacing': 1.3,
+ 'bubble_size_factor': True,
+ 'min_size': 12,
+ 'max_size': 48
+ },
+ 'tiling': {
+ 'enabled': False,
+ 'tile_size': 480,
+ 'tile_overlap': 64
+ }
+ }
+ }
+
+ def load_config(self):
+ """Load configuration - from persistent file on HF Spaces or local file"""
+ is_hf_spaces = os.getenv('SPACE_ID') is not None or os.getenv('HF_SPACES') == 'true'
+
+ # Try to load from file (works both locally and on HF Spaces with persistent storage)
+ try:
+ if os.path.exists(self.config_file):
+ with open(self.config_file, 'r', encoding='utf-8') as f:
+ loaded_config = json.load(f)
+ # Start with defaults
+ default_config = self.get_default_config()
+ # Deep merge - preserve nested structures from loaded config
+ self._deep_merge_config(default_config, loaded_config)
+
+ if is_hf_spaces:
+ print(f"✅ Loaded config from persistent storage: {self.config_file}")
+ else:
+ print(f"✅ Loaded config from local file: {self.config_file}")
+
+ return default_config
+ except Exception as e:
+ print(f"Could not load config from {self.config_file}: {e}")
+
+ # If loading fails or file doesn't exist - return defaults
+ print(f"📝 Using default configuration")
+ return self.get_default_config()
+
+ def _deep_merge_config(self, base, override):
+ """Deep merge override config into base config"""
+ for key, value in override.items():
+ if key in base and isinstance(base[key], dict) and isinstance(value, dict):
+ # Recursively merge nested dicts
+ self._deep_merge_config(base[key], value)
+ else:
+ # Override the value
+ base[key] = value
+
+ def set_all_environment_variables(self):
+ """Set all environment variables from config for translation engines"""
+ config = self.get_config_value
+
+ # Chapter Processing Options
+ os.environ['BATCH_TRANSLATE_HEADERS'] = '1' if config('batch_translate_headers', False) else '0'
+ os.environ['HEADERS_PER_BATCH'] = str(config('headers_per_batch', 400))
+ os.environ['USE_NCX_NAVIGATION'] = '1' if config('use_ncx_navigation', False) else '0'
+ os.environ['ATTACH_CSS_TO_CHAPTERS'] = '1' if config('attach_css_to_chapters', False) else '0'
+ os.environ['RETAIN_SOURCE_EXTENSION'] = '1' if config('retain_source_extension', True) else '0'
+ os.environ['USE_CONSERVATIVE_BATCHING'] = '1' if config('use_conservative_batching', False) else '0'
+ os.environ['DISABLE_GEMINI_SAFETY'] = '1' if config('disable_gemini_safety', False) else '0'
+ os.environ['USE_HTTP_OPENROUTER'] = '1' if config('use_http_openrouter', False) else '0'
+ os.environ['DISABLE_OPENROUTER_COMPRESSION'] = '1' if config('disable_openrouter_compression', False) else '0'
+
+ # Chapter Extraction Settings
+ os.environ['TEXT_EXTRACTION_METHOD'] = config('text_extraction_method', 'standard')
+ os.environ['FILE_FILTERING_LEVEL'] = config('file_filtering_level', 'smart')
+
+ # Thinking Mode Settings
+ os.environ['ENABLE_GPT_THINKING'] = '1' if config('enable_gpt_thinking', True) else '0'
+ os.environ['GPT_THINKING_EFFORT'] = config('gpt_thinking_effort', 'medium')
+ os.environ['OR_THINKING_TOKENS'] = str(config('or_thinking_tokens', 2000))
+ os.environ['ENABLE_GEMINI_THINKING'] = '1' if config('enable_gemini_thinking', False) else '0'
+ os.environ['GEMINI_THINKING_BUDGET'] = str(config('gemini_thinking_budget', 0))
+ # IMPORTANT: Also set THINKING_BUDGET for unified_api_client compatibility
+ os.environ['THINKING_BUDGET'] = str(config('gemini_thinking_budget', 0))
+
+ # Translation Settings
+ os.environ['CONTEXTUAL'] = '1' if config('contextual', False) else '0'
+ os.environ['TRANSLATION_HISTORY_LIMIT'] = str(config('translation_history_limit', 2))
+ os.environ['TRANSLATION_HISTORY_ROLLING'] = '1' if config('translation_history_rolling', False) else '0'
+ os.environ['BATCH_TRANSLATION'] = '1' if config('batch_translation', True) else '0'
+ os.environ['BATCH_SIZE'] = str(config('batch_size', 10))
+ os.environ['THREAD_SUBMISSION_DELAY'] = str(config('thread_submission_delay', 0.1))
+ os.environ['DELAY'] = str(config('delay', 1))
+ os.environ['CHAPTER_RANGE'] = config('chapter_range', '')
+ os.environ['TOKEN_LIMIT'] = str(config('token_limit', 200000))
+ os.environ['TOKEN_LIMIT_DISABLED'] = '1' if config('token_limit_disabled', False) else '0'
+ os.environ['DISABLE_INPUT_TOKEN_LIMIT'] = '1' if config('token_limit_disabled', False) else '0'
+
+ # Glossary Settings
+ os.environ['ENABLE_AUTO_GLOSSARY'] = '1' if config('enable_auto_glossary', False) else '0'
+ os.environ['APPEND_GLOSSARY_TO_PROMPT'] = '1' if config('append_glossary_to_prompt', True) else '0'
+ os.environ['GLOSSARY_MIN_FREQUENCY'] = str(config('glossary_min_frequency', 2))
+ os.environ['GLOSSARY_MAX_NAMES'] = str(config('glossary_max_names', 50))
+ os.environ['GLOSSARY_MAX_TITLES'] = str(config('glossary_max_titles', 30))
+ os.environ['GLOSSARY_BATCH_SIZE'] = str(config('glossary_batch_size', 50))
+ os.environ['GLOSSARY_FILTER_MODE'] = config('glossary_filter_mode', 'all')
+ os.environ['GLOSSARY_FUZZY_THRESHOLD'] = str(config('glossary_fuzzy_threshold', 0.90))
+
+ # Manual Glossary Settings
+ os.environ['MANUAL_GLOSSARY_MIN_FREQUENCY'] = str(config('manual_glossary_min_frequency', 2))
+ os.environ['MANUAL_GLOSSARY_MAX_NAMES'] = str(config('manual_glossary_max_names', 50))
+ os.environ['MANUAL_GLOSSARY_MAX_TITLES'] = str(config('manual_glossary_max_titles', 30))
+ os.environ['GLOSSARY_MAX_TEXT_SIZE'] = str(config('glossary_max_text_size', 50000))
+ os.environ['GLOSSARY_MAX_SENTENCES'] = str(config('glossary_max_sentences', 200))
+ os.environ['GLOSSARY_CHAPTER_SPLIT_THRESHOLD'] = str(config('glossary_chapter_split_threshold', 8192))
+ os.environ['MANUAL_GLOSSARY_FILTER_MODE'] = config('manual_glossary_filter_mode', 'all')
+ os.environ['STRIP_HONORIFICS'] = '1' if config('strip_honorifics', True) else '0'
+ os.environ['MANUAL_GLOSSARY_FUZZY_THRESHOLD'] = str(config('manual_glossary_fuzzy_threshold', 0.90))
+ os.environ['GLOSSARY_USE_LEGACY_CSV'] = '1' if config('glossary_use_legacy_csv', False) else '0'
+
+ # QA Scanner Settings
+ os.environ['ENABLE_POST_TRANSLATION_SCAN'] = '1' if config('enable_post_translation_scan', False) else '0'
+ os.environ['QA_MIN_FOREIGN_CHARS'] = str(config('qa_min_foreign_chars', 10))
+ os.environ['QA_CHECK_REPETITION'] = '1' if config('qa_check_repetition', True) else '0'
+ os.environ['QA_CHECK_GLOSSARY_LEAKAGE'] = '1' if config('qa_check_glossary_leakage', True) else '0'
+ os.environ['QA_MIN_FILE_LENGTH'] = str(config('qa_min_file_length', 0))
+ os.environ['QA_CHECK_MULTIPLE_HEADERS'] = '1' if config('qa_check_multiple_headers', True) else '0'
+ os.environ['QA_CHECK_MISSING_HTML'] = '1' if config('qa_check_missing_html', True) else '0'
+ os.environ['QA_CHECK_INSUFFICIENT_PARAGRAPHS'] = '1' if config('qa_check_insufficient_paragraphs', True) else '0'
+ os.environ['QA_MIN_PARAGRAPH_PERCENTAGE'] = str(config('qa_min_paragraph_percentage', 30))
+ os.environ['QA_REPORT_FORMAT'] = config('qa_report_format', 'detailed')
+ os.environ['QA_AUTO_SAVE_REPORT'] = '1' if config('qa_auto_save_report', True) else '0'
+
+ # Manga/Image Translation Settings (when available)
+ os.environ['BUBBLE_DETECTION_ENABLED'] = '1' if config('bubble_detection_enabled', True) else '0'
+ os.environ['INPAINTING_ENABLED'] = '1' if config('inpainting_enabled', True) else '0'
+ os.environ['MANGA_FONT_SIZE_MODE'] = config('manga_font_size_mode', 'auto')
+ os.environ['MANGA_FONT_SIZE'] = str(config('manga_font_size', 24))
+ os.environ['MANGA_FONT_MULTIPLIER'] = str(config('manga_font_multiplier', 1.0))
+ os.environ['MANGA_MIN_FONT_SIZE'] = str(config('manga_min_font_size', 12))
+ os.environ['MANGA_MAX_FONT_SIZE'] = str(config('manga_max_font_size', 48))
+ os.environ['MANGA_SHADOW_ENABLED'] = '1' if config('manga_shadow_enabled', True) else '0'
+ os.environ['MANGA_SHADOW_OFFSET_X'] = str(config('manga_shadow_offset_x', 2))
+ os.environ['MANGA_SHADOW_OFFSET_Y'] = str(config('manga_shadow_offset_y', 2))
+ os.environ['MANGA_SHADOW_BLUR'] = str(config('manga_shadow_blur', 0))
+ os.environ['MANGA_BG_OPACITY'] = str(config('manga_bg_opacity', 130))
+ os.environ['MANGA_BG_STYLE'] = config('manga_bg_style', 'circle')
+
+ # OCR Provider Settings
+ os.environ['OCR_PROVIDER'] = config('ocr_provider', 'custom-api')
+
+ # Advanced Manga Settings
+ manga_settings = config('manga_settings', {})
+ if manga_settings:
+ advanced = manga_settings.get('advanced', {})
+ os.environ['PARALLEL_PANEL_TRANSLATION'] = '1' if advanced.get('parallel_panel_translation', False) else '0'
+ os.environ['PANEL_MAX_WORKERS'] = str(advanced.get('panel_max_workers', 7))
+ os.environ['PANEL_START_STAGGER_MS'] = str(advanced.get('panel_start_stagger_ms', 0))
+ os.environ['WEBTOON_MODE'] = '1' if advanced.get('webtoon_mode', False) else '0'
+ os.environ['DEBUG_MODE'] = '1' if advanced.get('debug_mode', False) else '0'
+ os.environ['SAVE_INTERMEDIATE'] = '1' if advanced.get('save_intermediate', False) else '0'
+ os.environ['PARALLEL_PROCESSING'] = '1' if advanced.get('parallel_processing', True) else '0'
+ os.environ['MAX_WORKERS'] = str(advanced.get('max_workers', 4))
+ os.environ['AUTO_CLEANUP_MODELS'] = '1' if advanced.get('auto_cleanup_models', False) else '0'
+ os.environ['TORCH_PRECISION'] = advanced.get('torch_precision', 'auto')
+ os.environ['PRELOAD_LOCAL_INPAINTING_FOR_PANELS'] = '1' if advanced.get('preload_local_inpainting_for_panels', False) else '0'
+
+ # OCR settings
+ ocr = manga_settings.get('ocr', {})
+ os.environ['DETECTOR_TYPE'] = ocr.get('detector_type', 'rtdetr_onnx')
+ os.environ['RTDETR_CONFIDENCE'] = str(ocr.get('rtdetr_confidence', 0.3))
+ os.environ['BUBBLE_CONFIDENCE'] = str(ocr.get('bubble_confidence', 0.3))
+ os.environ['DETECT_TEXT_BUBBLES'] = '1' if ocr.get('detect_text_bubbles', True) else '0'
+ os.environ['DETECT_EMPTY_BUBBLES'] = '1' if ocr.get('detect_empty_bubbles', True) else '0'
+ os.environ['DETECT_FREE_TEXT'] = '1' if ocr.get('detect_free_text', True) else '0'
+ os.environ['BUBBLE_MAX_DETECTIONS_YOLO'] = str(ocr.get('bubble_max_detections_yolo', 100))
+
+ # Inpainting settings
+ inpainting = manga_settings.get('inpainting', {})
+ os.environ['LOCAL_INPAINT_METHOD'] = inpainting.get('local_method', 'anime_onnx')
+ os.environ['INPAINT_BATCH_SIZE'] = str(inpainting.get('batch_size', 10))
+ os.environ['INPAINT_CACHE_ENABLED'] = '1' if inpainting.get('enable_cache', True) else '0'
+
+ # HD Strategy
+ os.environ['HD_STRATEGY'] = advanced.get('hd_strategy', 'resize')
+ os.environ['HD_RESIZE_LIMIT'] = str(advanced.get('hd_strategy_resize_limit', 1536))
+ os.environ['HD_CROP_MARGIN'] = str(advanced.get('hd_strategy_crop_margin', 16))
+ os.environ['HD_CROP_TRIGGER'] = str(advanced.get('hd_strategy_crop_trigger_size', 1024))
+
+ # Concise Pipeline Logs
+ os.environ['CONCISE_PIPELINE_LOGS'] = '1' if config('concise_pipeline_logs', False) else '0'
+
+ print("✅ All environment variables set from configuration")
+
+ def save_config(self, config):
+ """Save configuration - to persistent file on HF Spaces or local file"""
+ is_hf_spaces = os.getenv('SPACE_ID') is not None or os.getenv('HF_SPACES') == 'true'
+
+ # Always try to save to file (works both locally and on HF Spaces with persistent storage)
+ try:
+ config_to_save = config.copy()
+
+ # Only encrypt if we have the encryption module AND keys aren't already encrypted
+ if API_KEY_ENCRYPTION_AVAILABLE:
+ # Check if keys need encryption (not already encrypted)
+ needs_encryption = False
+ for key in ['api_key', 'azure_vision_key', 'google_vision_credentials']:
+ if key in config_to_save:
+ value = config_to_save[key]
+ # If it's a non-empty string that doesn't start with 'ENC:', it needs encryption
+ if value and isinstance(value, str) and not value.startswith('ENC:'):
+ needs_encryption = True
+ break
+
+ if needs_encryption:
+ config_to_save = encrypt_config(config_to_save)
+
+ # Create directory if it doesn't exist (important for HF Spaces)
+ os.makedirs(os.path.dirname(self.config_file) or '.', exist_ok=True)
+
+ # Debug output
+ if is_hf_spaces:
+ print(f"📝 Saving to HF Spaces persistent storage: {self.config_file}")
+
+ print(f"DEBUG save_config called with model={config.get('model')}, batch_size={config.get('batch_size')}")
+ print(f"DEBUG self.config before={self.config.get('model') if hasattr(self, 'config') else 'N/A'}")
+ print(f"DEBUG self.decrypted_config before={self.decrypted_config.get('model') if hasattr(self, 'decrypted_config') else 'N/A'}")
+
+ with open(self.config_file, 'w', encoding='utf-8') as f:
+ json.dump(config_to_save, f, ensure_ascii=False, indent=2)
+
+ # IMPORTANT: Update the in-memory configs so the UI reflects the changes immediately
+ self.config = config_to_save
+ # Update decrypted config too
+ self.decrypted_config = config.copy() # Use the original (unencrypted) version
+ if API_KEY_ENCRYPTION_AVAILABLE:
+ # Make sure decrypted_config has decrypted values
+ self.decrypted_config = decrypt_config(self.decrypted_config)
+
+ print(f"DEBUG self.config after={self.config.get('model')}")
+ print(f"DEBUG self.decrypted_config after={self.decrypted_config.get('model')}")
+
+ if is_hf_spaces:
+ print(f"✅ Saved to persistent storage: {self.config_file}")
+ # Also verify the file was written
+ if os.path.exists(self.config_file):
+ file_size = os.path.getsize(self.config_file)
+ print(f"✅ File confirmed: {file_size} bytes")
+ return "✅ Settings saved to persistent storage!"
+ else:
+ print(f"✅ Saved to {self.config_file}")
+ return "✅ Settings saved successfully!"
+
+ except Exception as e:
+ print(f"❌ Save error: {e}")
+ if is_hf_spaces:
+ print(f"💡 Note: Make sure you have persistent storage enabled for your Space")
+ return f"❌ Failed to save: {str(e)}\n\nNote: Persistent storage may not be enabled"
+ return f"❌ Failed to save: {str(e)}"
+
+ def translate_epub(
+ self,
+ epub_file,
+ model,
+ api_key,
+ profile_name,
+ system_prompt,
+ temperature,
+ max_tokens,
+ enable_image_trans=False,
+ glossary_file=None
+ ):
+ """Translate EPUB file - yields progress updates"""
+
+ if not TRANSLATION_AVAILABLE:
+ yield None, None, None, "❌ Translation modules not loaded", None, "Error", 0
+ return
+
+ if not epub_file:
+ yield None, None, None, "❌ Please upload an EPUB or TXT file", None, "Error", 0
+ return
+
+ if not api_key:
+ yield None, None, None, "❌ Please provide an API key", None, "Error", 0
+ return
+
+ if not profile_name:
+ yield None, None, None, "❌ Please select a translation profile", None, "Error", 0
+ return
+
+ # Initialize logs list
+ translation_logs = []
+
+ try:
+ # Initial status
+ input_path = epub_file.name if hasattr(epub_file, 'name') else epub_file
+ file_ext = os.path.splitext(input_path)[1].lower()
+ file_type = "EPUB" if file_ext == ".epub" else "TXT"
+
+ translation_logs.append(f"📚 Starting {file_type} translation...")
+ yield None, None, gr.update(visible=True), "\n".join(translation_logs), gr.update(visible=True), "Starting...", 0
+
+ # Save uploaded file to temp location if needed
+ epub_base = os.path.splitext(os.path.basename(input_path))[0]
+
+ translation_logs.append(f"📖 Input: {os.path.basename(input_path)}")
+ translation_logs.append(f"🤖 Model: {model}")
+ translation_logs.append(f"📝 Profile: {profile_name}")
+ yield None, None, gr.update(visible=True), "\n".join(translation_logs), gr.update(visible=True), "Initializing...", 5
+
+ # Use the provided system prompt (user may have edited it)
+ translation_prompt = system_prompt if system_prompt else self.profiles.get(profile_name, "")
+
+ # Set the input path as a command line argument simulation
+ import sys
+ original_argv = sys.argv.copy()
+ sys.argv = ['glossarion_web.py', input_path]
+
+ # Set environment variables for TransateKRtoEN.main()
+ os.environ['INPUT_PATH'] = input_path
+ os.environ['MODEL'] = model
+ os.environ['TRANSLATION_TEMPERATURE'] = str(temperature)
+ os.environ['MAX_OUTPUT_TOKENS'] = str(max_tokens)
+ os.environ['ENABLE_IMAGE_TRANSLATION'] = '1' if enable_image_trans else '0'
+ # Set output directory to current working directory
+ os.environ['OUTPUT_DIRECTORY'] = os.getcwd()
+
+ # Set all additional environment variables from config
+ self.set_all_environment_variables()
+
+ # OVERRIDE critical safety features AFTER config load
+ # CORRECT variable name is EMERGENCY_PARAGRAPH_RESTORE (no ATION)
+ os.environ['EMERGENCY_PARAGRAPH_RESTORE'] = '0' # DISABLED
+ os.environ['REMOVE_AI_ARTIFACTS'] = '1' # ENABLED
+
+ # Debug: Verify settings
+ translation_logs.append(f"\n🔧 Debug: EMERGENCY_PARAGRAPH_RESTORE = '{os.environ.get('EMERGENCY_PARAGRAPH_RESTORE', 'NOT SET')}'")
+ translation_logs.append(f"🔧 Debug: REMOVE_AI_ARTIFACTS = '{os.environ.get('REMOVE_AI_ARTIFACTS', 'NOT SET')}'")
+ yield None, None, gr.update(visible=True), "\n".join(translation_logs), gr.update(visible=True), "Configuration set...", 10
+
+ # Set API key environment variable
+ if 'gpt' in model.lower() or 'openai' in model.lower():
+ os.environ['OPENAI_API_KEY'] = api_key
+ os.environ['API_KEY'] = api_key
+ elif 'claude' in model.lower():
+ os.environ['ANTHROPIC_API_KEY'] = api_key
+ os.environ['API_KEY'] = api_key
+ elif 'gemini' in model.lower():
+ os.environ['GOOGLE_API_KEY'] = api_key
+ os.environ['API_KEY'] = api_key
+ else:
+ os.environ['API_KEY'] = api_key
+
+ # Set the system prompt
+ if translation_prompt:
+ # Save to temp profile
+ temp_config = self.config.copy()
+ temp_config['prompt_profiles'] = temp_config.get('prompt_profiles', {})
+ temp_config['prompt_profiles'][profile_name] = translation_prompt
+ temp_config['active_profile'] = profile_name
+
+ # Save temporarily
+ with open(self.config_file, 'w', encoding='utf-8') as f:
+ json.dump(temp_config, f, ensure_ascii=False, indent=2)
+
+ translation_logs.append("⚙️ Configuration set")
+ yield None, None, gr.update(visible=True), "\n".join(translation_logs), gr.update(visible=True), "Starting translation...", 10
+
+ # Create a thread-safe queue for capturing logs
+ import queue
+ import threading
+ import time
+ log_queue = queue.Queue()
+ translation_complete = threading.Event()
+ translation_error = [None]
+
+ def log_callback(msg):
+ """Capture log messages"""
+ if msg and msg.strip():
+ log_queue.put(msg.strip())
+
+ # Run translation in a separate thread
+ def run_translation():
+ try:
+ result = TransateKRtoEN.main(
+ log_callback=log_callback,
+ stop_callback=None
+ )
+ translation_error[0] = None
+ except Exception as e:
+ translation_error[0] = e
+ finally:
+ translation_complete.set()
+
+ translation_thread = threading.Thread(target=run_translation, daemon=True)
+ translation_thread.start()
+
+ # Monitor progress
+ last_yield_time = time.time()
+ progress_percent = 10
+
+ while not translation_complete.is_set() or not log_queue.empty():
+ # Check if stop was requested
+ if self.epub_translation_stop:
+ translation_logs.append("⚠️ Stopping translation...")
+ # Try to stop the translation thread
+ translation_complete.set()
+ break
+
+ # Collect logs
+ new_logs = []
+ while not log_queue.empty():
+ try:
+ msg = log_queue.get_nowait()
+ new_logs.append(msg)
+ except queue.Empty:
+ break
+
+ # Add new logs
+ if new_logs:
+ translation_logs.extend(new_logs)
+
+ # Update progress based on log content
+ for log in new_logs:
+ if 'Chapter' in log or 'chapter' in log:
+ progress_percent = min(progress_percent + 5, 90)
+ elif '✅' in log or 'Complete' in log:
+ progress_percent = min(progress_percent + 10, 95)
+ elif 'Translating' in log:
+ progress_percent = min(progress_percent + 2, 85)
+
+ # Yield updates periodically
+ current_time = time.time()
+ if new_logs or (current_time - last_yield_time) > 1.0:
+ status_text = new_logs[-1] if new_logs else "Processing..."
+ # Keep only last 100 logs to avoid UI overflow
+ display_logs = translation_logs[-100:] if len(translation_logs) > 100 else translation_logs
+ yield None, None, gr.update(visible=True), "\n".join(display_logs), gr.update(visible=True), status_text, progress_percent
+ last_yield_time = current_time
+
+ # Small delay to avoid CPU spinning
+ time.sleep(0.1)
+
+ # Wait for thread to complete
+ translation_thread.join(timeout=5)
+
+ # Restore original sys.argv
+ sys.argv = original_argv
+
+ # Log any errors but don't fail immediately - check for output first
+ if translation_error[0]:
+ error_msg = f"⚠️ Translation completed with warnings: {str(translation_error[0])}"
+ translation_logs.append(error_msg)
+ translation_logs.append("🔍 Checking for output file...")
+
+ # Check for output file - just grab any .epub from the output directory
+ output_dir = epub_base
+ compiled_epub = None
+
+ # First, try to find ANY .epub file in the output directory
+ output_dir_path = os.path.join(os.getcwd(), output_dir)
+ if os.path.isdir(output_dir_path):
+ translation_logs.append(f"\n📂 Checking output directory: {output_dir_path}")
+ for file in os.listdir(output_dir_path):
+ if file.endswith('.epub'):
+ full_path = os.path.join(output_dir_path, file)
+ # Make sure it's not a temp/backup file
+ if os.path.isfile(full_path) and os.path.getsize(full_path) > 1000:
+ compiled_epub = full_path
+ translation_logs.append(f" ✅ Found EPUB in output dir: {file}")
+ break
+
+ # If we found it in the output directory, return it immediately
+ if compiled_epub:
+ file_size = os.path.getsize(compiled_epub)
+ translation_logs.append(f"\n✅ Translation complete: {os.path.basename(compiled_epub)}")
+ translation_logs.append(f"🔗 File path: {compiled_epub}")
+ translation_logs.append(f"📏 File size: {file_size:,} bytes ({file_size/1024/1024:.2f} MB)")
+ translation_logs.append(f"📥 Click 'Download Translated {file_type}' below to save your file")
+ final_status = "Translation complete!" if not translation_error[0] else "Translation completed with warnings"
+
+ yield (
+ compiled_epub,
+ gr.update(value="### ✅ Translation Complete!", visible=True),
+ gr.update(visible=False),
+ "\n".join(translation_logs),
+ gr.update(value=final_status, visible=True),
+ final_status,
+ 100
+ )
+ return
+
+ # Determine output extension based on input file type
+ output_ext = ".epub" if file_ext == ".epub" else ".txt"
+
+ # Get potential base directories
+ base_dirs = [
+ os.getcwd(), # Current working directory
+ os.path.dirname(input_path), # Input file directory
+ "/tmp", # Common temp directory on Linux/HF Spaces
+ "/home/user/app", # HF Spaces app directory
+ os.path.expanduser("~"), # Home directory
+ ]
+
+ # Look for multiple possible output locations
+ possible_paths = []
+
+ # Extract title from input filename for more patterns
+ # e.g., "tales of terror_dick donovan 2" -> "Tales of Terror"
+ title_parts = os.path.basename(input_path).replace(output_ext, '').split('_')
+ possible_titles = [
+ epub_base, # Original: tales of terror_dick donovan 2
+ ' '.join(title_parts[:-2]).title() if len(title_parts) > 2 else epub_base, # Tales Of Terror
+ ]
+
+ for base_dir in base_dirs:
+ if base_dir and os.path.exists(base_dir):
+ for title in possible_titles:
+ # Direct in base directory
+ possible_paths.append(os.path.join(base_dir, f"{title}_translated{output_ext}"))
+ possible_paths.append(os.path.join(base_dir, f"{title}{output_ext}"))
+ # In output subdirectory
+ possible_paths.append(os.path.join(base_dir, output_dir, f"{title}_translated{output_ext}"))
+ possible_paths.append(os.path.join(base_dir, output_dir, f"{title}{output_ext}"))
+ # In nested output directory
+ possible_paths.append(os.path.join(base_dir, epub_base, f"{title}_translated{output_ext}"))
+ possible_paths.append(os.path.join(base_dir, epub_base, f"{title}{output_ext}"))
+
+ # Also add relative paths
+ possible_paths.extend([
+ f"{epub_base}_translated{output_ext}",
+ os.path.join(output_dir, f"{epub_base}_translated{output_ext}"),
+ os.path.join(output_dir, f"{epub_base}{output_ext}"),
+ ])
+
+ # Also search for any translated file in the output directory
+ if os.path.isdir(output_dir):
+ for file in os.listdir(output_dir):
+ if file.endswith(f'_translated{output_ext}'):
+ possible_paths.insert(0, os.path.join(output_dir, file))
+
+ # Add debug information about current environment
+ translation_logs.append(f"\n📁 Debug Info:")
+ translation_logs.append(f" Current working directory: {os.getcwd()}")
+ translation_logs.append(f" Input file directory: {os.path.dirname(input_path)}")
+ translation_logs.append(f" Looking for: {epub_base}_translated{output_ext}")
+
+ translation_logs.append(f"\n🔍 Searching for output file...")
+ for potential_epub in possible_paths[:10]: # Show first 10 paths
+ translation_logs.append(f" Checking: {potential_epub}")
+ if os.path.exists(potential_epub):
+ compiled_epub = potential_epub
+ translation_logs.append(f" ✅ Found: {potential_epub}")
+ break
+
+ if not compiled_epub and len(possible_paths) > 10:
+ translation_logs.append(f" ... and {len(possible_paths) - 10} more paths")
+
+ if compiled_epub:
+ # Verify file exists and is readable
+ if os.path.exists(compiled_epub) and os.path.isfile(compiled_epub):
+ file_size = os.path.getsize(compiled_epub)
+ translation_logs.append(f"✅ Translation complete: {os.path.basename(compiled_epub)}")
+ translation_logs.append(f"🔗 File path: {compiled_epub}")
+ translation_logs.append(f"📏 File size: {file_size:,} bytes ({file_size/1024/1024:.2f} MB)")
+ translation_logs.append(f"📥 Click 'Download Translated {file_type}' below to save your file")
+ # Make the file component visible with the translated file
+ final_status = "Translation complete!" if not translation_error[0] else "Translation completed with warnings"
+
+ # Return the actual file path WITH visibility update
+ yield (
+ compiled_epub, # epub_output - The file path (Gradio will handle it)
+ gr.update(value="### ✅ Translation Complete!", visible=True), # epub_status_message
+ gr.update(visible=False), # epub_progress_group
+ "\n".join(translation_logs), # epub_logs
+ gr.update(value=final_status, visible=True), # epub_status
+ final_status, # epub_progress_text
+ 100 # epub_progress_bar
+ )
+ return
+ else:
+ translation_logs.append(f"⚠️ File found but not accessible: {compiled_epub}")
+ compiled_epub = None # Force search
+
+ # Output file not found - search recursively in relevant directories
+ translation_logs.append("⚠️ Output file not in expected locations, searching recursively...")
+ found_files = []
+
+ # Search in multiple directories
+ search_dirs = [
+ os.getcwd(), # Current directory
+ os.path.dirname(input_path), # Input file directory
+ "/tmp", # Temp directory (HF Spaces)
+ "/home/user/app", # HF Spaces app directory
+ ]
+
+ for search_dir in search_dirs:
+ if not os.path.exists(search_dir):
+ continue
+
+ translation_logs.append(f" Searching in: {search_dir}")
+ try:
+ for root, dirs, files in os.walk(search_dir, topdown=True):
+ # Limit depth to 3 levels and skip hidden/system directories
+ depth = root[len(search_dir):].count(os.sep)
+ if depth >= 3:
+ dirs[:] = [] # Don't go deeper
+ else:
+ dirs[:] = [d for d in dirs if not d.startswith('.') and d not in ['__pycache__', 'node_modules', 'venv', '.git']]
+
+ for file in files:
+ # Look for files with _translated in name or matching our pattern
+ if (f'_translated{output_ext}' in file or
+ (file.endswith(output_ext) and epub_base in file)):
+ full_path = os.path.join(root, file)
+ found_files.append(full_path)
+ translation_logs.append(f" ✅ Found: {full_path}")
+ except (PermissionError, OSError) as e:
+ translation_logs.append(f" ⚠️ Could not search {search_dir}: {e}")
+
+ if found_files:
+ # Use the most recently modified file
+ compiled_epub = max(found_files, key=os.path.getmtime)
+
+ # Verify file exists and get info
+ if os.path.exists(compiled_epub) and os.path.isfile(compiled_epub):
+ file_size = os.path.getsize(compiled_epub)
+ translation_logs.append(f"✅ Found output file: {os.path.basename(compiled_epub)}")
+ translation_logs.append(f"🔗 File path: {compiled_epub}")
+ translation_logs.append(f"📏 File size: {file_size:,} bytes ({file_size/1024/1024:.2f} MB)")
+ translation_logs.append(f"📥 Click 'Download Translated {file_type}' below to save your file")
+ # Return the actual file path directly
+ yield (
+ compiled_epub, # epub_output - Just the file path
+ gr.update(value="### ✅ Translation Complete!", visible=True), # epub_status_message
+ gr.update(visible=False), # epub_progress_group
+ "\n".join(translation_logs), # epub_logs
+ gr.update(value="Translation complete!", visible=True), # epub_status
+ "Translation complete!", # epub_progress_text
+ 100 # epub_progress_bar
+ )
+ return
+
+ # Still couldn't find output - report failure
+ translation_logs.append("❌ Could not locate translated output file")
+ translation_logs.append(f"🔍 Checked paths: {', '.join(possible_paths[:5])}...")
+ translation_logs.append("\n💡 Troubleshooting tips:")
+ translation_logs.append(" 1. Check if TransateKRtoEN.py completed successfully")
+ translation_logs.append(" 2. Look for any error messages in the logs above")
+ translation_logs.append(" 3. The output might be in a subdirectory - check manually")
+ yield None, gr.update(value="### ⚠️ Output Not Found", visible=True), gr.update(visible=False), "\n".join(translation_logs), gr.update(value="Translation process completed but output file not found", visible=True), "Output not found", 90
+
+ except Exception as e:
+ import traceback
+ error_msg = f"❌ Error during translation:\n{str(e)}\n\n{traceback.format_exc()}"
+ translation_logs.append(error_msg)
+ yield None, None, gr.update(visible=False), "\n".join(translation_logs), gr.update(visible=True), "Error occurred", 0
+
+ def translate_epub_with_stop(self, *args):
+ """Wrapper for translate_epub that includes button visibility control"""
+ self.epub_translation_stop = False
+
+ # Show stop button, hide translate button at start
+ for result in self.translate_epub(*args):
+ if self.epub_translation_stop:
+ # Translation was stopped
+ yield result[0], result[1], result[2], result[3] + "\n\n⚠️ Translation stopped by user", result[4], "Stopped", 0, gr.update(visible=True), gr.update(visible=False)
+ return
+ # Add button visibility updates to the yields
+ yield result[0], result[1], result[2], result[3], result[4], result[5], result[6], gr.update(visible=False), gr.update(visible=True)
+
+ # Reset buttons at the end
+ yield result[0], result[1], result[2], result[3], result[4], result[5], result[6], gr.update(visible=True), gr.update(visible=False)
+
+ def stop_epub_translation(self):
+ """Stop the ongoing EPUB translation"""
+ self.epub_translation_stop = True
+ if self.epub_translation_thread and self.epub_translation_thread.is_alive():
+ # The thread will check the stop flag
+ pass
+ return gr.update(visible=True), gr.update(visible=False), "Translation stopped"
+
+ def extract_glossary(
+ self,
+ epub_file,
+ model,
+ api_key,
+ min_frequency,
+ max_names,
+ max_titles=30,
+ max_text_size=50000,
+ max_sentences=200,
+ translation_batch=50,
+ chapter_split_threshold=8192,
+ filter_mode='all',
+ strip_honorifics=True,
+ fuzzy_threshold=0.90,
+ extraction_prompt=None,
+ format_instructions=None,
+ use_legacy_csv=False
+ ):
+ """Extract glossary from EPUB with manual extraction settings - yields progress updates"""
+
+ if not epub_file:
+ yield None, None, None, "❌ Please upload an EPUB file", None, "Error", 0
+ return
+
+ extraction_logs = []
+
+ try:
+ import extract_glossary_from_epub
+
+ extraction_logs.append("🔍 Starting glossary extraction...")
+ yield None, None, gr.update(visible=True), "\n".join(extraction_logs), gr.update(visible=True), "Starting...", 0
+
+ input_path = epub_file.name if hasattr(epub_file, 'name') else epub_file
+ output_path = input_path.replace('.epub', '_glossary.csv')
+
+ extraction_logs.append(f"📖 Input: {os.path.basename(input_path)}")
+ extraction_logs.append(f"🤖 Model: {model}")
+ yield None, None, gr.update(visible=True), "\n".join(extraction_logs), gr.update(visible=True), "Initializing...", 10
+
+ # Set all environment variables from config
+ self.set_all_environment_variables()
+
+ # Set API key
+ if 'gpt' in model.lower():
+ os.environ['OPENAI_API_KEY'] = api_key
+ elif 'claude' in model.lower():
+ os.environ['ANTHROPIC_API_KEY'] = api_key
+ else:
+ os.environ['API_KEY'] = api_key
+
+ extraction_logs.append("📋 Extracting text from EPUB...")
+ yield None, None, gr.update(visible=True), "\n".join(extraction_logs), gr.update(visible=True), "Extracting text...", 20
+
+ # Set environment variables for glossary extraction
+ os.environ['MODEL'] = model
+ os.environ['GLOSSARY_MIN_FREQUENCY'] = str(min_frequency)
+ os.environ['GLOSSARY_MAX_NAMES'] = str(max_names)
+ os.environ['GLOSSARY_MAX_TITLES'] = str(max_titles)
+ os.environ['GLOSSARY_BATCH_SIZE'] = str(translation_batch)
+ os.environ['GLOSSARY_MAX_TEXT_SIZE'] = str(max_text_size)
+ os.environ['GLOSSARY_MAX_SENTENCES'] = str(max_sentences)
+ os.environ['GLOSSARY_CHAPTER_SPLIT_THRESHOLD'] = str(chapter_split_threshold)
+ os.environ['GLOSSARY_FILTER_MODE'] = filter_mode
+ os.environ['GLOSSARY_STRIP_HONORIFICS'] = '1' if strip_honorifics else '0'
+ os.environ['GLOSSARY_FUZZY_THRESHOLD'] = str(fuzzy_threshold)
+ os.environ['GLOSSARY_USE_LEGACY_CSV'] = '1' if use_legacy_csv else '0'
+
+ # Set prompts if provided
+ if extraction_prompt:
+ os.environ['GLOSSARY_SYSTEM_PROMPT'] = extraction_prompt
+ if format_instructions:
+ os.environ['GLOSSARY_FORMAT_INSTRUCTIONS'] = format_instructions
+
+ extraction_logs.append(f"⚙️ Settings: Min freq={min_frequency}, Max names={max_names}, Filter={filter_mode}")
+ extraction_logs.append(f"⚙️ Options: Strip honorifics={strip_honorifics}, Fuzzy threshold={fuzzy_threshold:.2f}")
+ yield None, None, gr.update(visible=True), "\n".join(extraction_logs), gr.update(visible=True), "Processing...", 40
+
+ # Create a thread-safe queue for capturing logs
+ import queue
+ import threading
+ import time
+ log_queue = queue.Queue()
+ extraction_complete = threading.Event()
+ extraction_error = [None]
+ extraction_result = [None]
+
+ def log_callback(msg):
+ """Capture log messages"""
+ if msg and msg.strip():
+ log_queue.put(msg.strip())
+
+ # Run extraction in a separate thread
+ def run_extraction():
+ try:
+ result = extract_glossary_from_epub.main(
+ log_callback=log_callback,
+ stop_callback=None
+ )
+ extraction_result[0] = result
+ extraction_error[0] = None
+ except Exception as e:
+ extraction_error[0] = e
+ finally:
+ extraction_complete.set()
+
+ extraction_thread = threading.Thread(target=run_extraction, daemon=True)
+ extraction_thread.start()
+
+ # Monitor progress
+ last_yield_time = time.time()
+ progress_percent = 40
+
+ while not extraction_complete.is_set() or not log_queue.empty():
+ # Check if stop was requested
+ if self.glossary_extraction_stop:
+ extraction_logs.append("⚠️ Stopping extraction...")
+ # Try to stop the extraction thread
+ extraction_complete.set()
+ break
+
+ # Collect logs
+ new_logs = []
+ while not log_queue.empty():
+ try:
+ msg = log_queue.get_nowait()
+ new_logs.append(msg)
+ except queue.Empty:
+ break
+
+ # Add new logs
+ if new_logs:
+ extraction_logs.extend(new_logs)
+
+ # Update progress based on log content
+ for log in new_logs:
+ if 'Processing' in log or 'Extracting' in log:
+ progress_percent = min(progress_percent + 5, 80)
+ elif 'Writing' in log or 'Saving' in log:
+ progress_percent = min(progress_percent + 10, 90)
+
+ # Yield updates periodically
+ current_time = time.time()
+ if new_logs or (current_time - last_yield_time) > 1.0:
+ status_text = new_logs[-1] if new_logs else "Processing..."
+ # Keep only last 100 logs
+ display_logs = extraction_logs[-100:] if len(extraction_logs) > 100 else extraction_logs
+ yield None, None, gr.update(visible=True), "\n".join(display_logs), gr.update(visible=True), status_text, progress_percent
+ last_yield_time = current_time
+
+ # Small delay to avoid CPU spinning
+ time.sleep(0.1)
+
+ # Wait for thread to complete
+ extraction_thread.join(timeout=5)
+
+ # Check for errors
+ if extraction_error[0]:
+ error_msg = f"❌ Extraction error: {str(extraction_error[0])}"
+ extraction_logs.append(error_msg)
+ yield None, None, gr.update(visible=False), "\n".join(extraction_logs), gr.update(visible=True), error_msg, 0
+ return
+
+ extraction_logs.append("🖍️ Writing glossary to CSV...")
+ yield None, None, gr.update(visible=True), "\n".join(extraction_logs), gr.update(visible=True), "Writing CSV...", 95
+
+ if os.path.exists(output_path):
+ extraction_logs.append(f"✅ Glossary extracted successfully!")
+ extraction_logs.append(f"💾 Saved to: {os.path.basename(output_path)}")
+ yield output_path, gr.update(visible=True), gr.update(visible=False), "\n".join(extraction_logs), gr.update(visible=True), "Extraction complete!", 100
+ else:
+ extraction_logs.append("❌ Glossary extraction failed - output file not created")
+ yield None, None, gr.update(visible=False), "\n".join(extraction_logs), gr.update(visible=True), "Extraction failed", 0
+
+ except Exception as e:
+ import traceback
+ error_msg = f"❌ Error during extraction:\n{str(e)}\n\n{traceback.format_exc()}"
+ extraction_logs.append(error_msg)
+ yield None, None, gr.update(visible=False), "\n".join(extraction_logs), gr.update(visible=True), "Error occurred", 0
+
+ def extract_glossary_with_stop(self, *args):
+ """Wrapper for extract_glossary that includes button visibility control"""
+ self.glossary_extraction_stop = False
+
+ # Show stop button, hide extract button at start
+ for result in self.extract_glossary(*args):
+ if self.glossary_extraction_stop:
+ # Extraction was stopped
+ yield result[0], result[1], result[2], result[3] + "\n\n⚠️ Extraction stopped by user", result[4], "Stopped", 0, gr.update(visible=True), gr.update(visible=False)
+ return
+ # Add button visibility updates to the yields
+ yield result[0], result[1], result[2], result[3], result[4], result[5], result[6], gr.update(visible=False), gr.update(visible=True)
+
+ # Reset buttons at the end
+ yield result[0], result[1], result[2], result[3], result[4], result[5], result[6], gr.update(visible=True), gr.update(visible=False)
+
+ def stop_glossary_extraction(self):
+ """Stop the ongoing glossary extraction"""
+ self.glossary_extraction_stop = True
+ if self.glossary_extraction_thread and self.glossary_extraction_thread.is_alive():
+ # The thread will check the stop flag
+ pass
+ return gr.update(visible=True), gr.update(visible=False), "Extraction stopped"
+
+ def run_qa_scan(self, folder_path, min_foreign_chars, check_repetition,
+ check_glossary_leakage, min_file_length, check_multiple_headers,
+ check_missing_html, check_insufficient_paragraphs,
+ min_paragraph_percentage, report_format, auto_save_report):
+ """Run Quick QA scan on output folder - yields progress updates"""
+
+ # Handle both string paths and File objects
+ if hasattr(folder_path, 'name'):
+ # It's a File object from Gradio
+ folder_path = folder_path.name
+
+ if not folder_path:
+ yield gr.update(visible=False), gr.update(value="### ❌ Error", visible=True), gr.update(visible=False), "❌ Please provide a folder path or upload a ZIP file", gr.update(visible=False), "Error", 0
+ return
+
+ if isinstance(folder_path, str):
+ folder_path = folder_path.strip()
+
+ if not os.path.exists(folder_path):
+ yield gr.update(visible=False), gr.update(value=f"### ❌ File/Folder not found", visible=True), gr.update(visible=False), f"❌ File/Folder not found: {folder_path}", gr.update(visible=False), "Error", 0
+ return
+
+ # Initialize scan_logs early
+ scan_logs = []
+
+ # Check if it's a ZIP or EPUB file (for Hugging Face Spaces or convenience)
+ if os.path.isfile(folder_path) and (folder_path.lower().endswith('.zip') or folder_path.lower().endswith('.epub')):
+ # Extract ZIP/EPUB to temp folder
+ import zipfile
+ import tempfile
+
+ temp_dir = tempfile.mkdtemp(prefix="qa_scan_")
+
+ try:
+ file_type = "EPUB" if folder_path.lower().endswith('.epub') else "ZIP"
+ scan_logs.append(f"📦 Extracting {file_type} file: {os.path.basename(folder_path)}")
+
+ with zipfile.ZipFile(folder_path, 'r') as zip_ref:
+ # For EPUB files, look for the content folders
+ if file_type == "EPUB":
+ # EPUB files typically have OEBPS, EPUB, or similar content folders
+ all_files = zip_ref.namelist()
+ # Extract everything
+ zip_ref.extractall(temp_dir)
+
+ # Try to find the content directory
+ content_dirs = ['OEBPS', 'EPUB', 'OPS', 'content']
+ actual_content_dir = None
+ for dir_name in content_dirs:
+ potential_dir = os.path.join(temp_dir, dir_name)
+ if os.path.exists(potential_dir):
+ actual_content_dir = potential_dir
+ break
+
+ # If no standard content dir found, use the temp_dir itself
+ if actual_content_dir:
+ folder_path = actual_content_dir
+ scan_logs.append(f"📁 Found EPUB content directory: {os.path.basename(actual_content_dir)}")
+ else:
+ folder_path = temp_dir
+ scan_logs.append(f"📁 Using extracted root directory")
+ else:
+ # Regular ZIP file
+ zip_ref.extractall(temp_dir)
+ folder_path = temp_dir
+
+ scan_logs.append(f"✅ Successfully extracted to temporary folder")
+ # Continue with normal processing, but include initial logs
+ # Note: we'll need to pass scan_logs through the rest of the function
+
+ except Exception as e:
+ yield gr.update(visible=False), gr.update(value=f"### ❌ {file_type} extraction failed", visible=True), gr.update(visible=False), f"❌ Failed to extract {file_type}: {str(e)}", gr.update(visible=False), "Error", 0
+ return
+ elif not os.path.isdir(folder_path):
+ yield gr.update(visible=False), gr.update(value=f"### ❌ Not a folder, ZIP, or EPUB", visible=True), gr.update(visible=False), f"❌ Path is not a folder, ZIP, or EPUB file: {folder_path}", gr.update(visible=False), "Error", 0
+ return
+
+ try:
+ scan_logs.append("🔍 Starting Quick QA Scan...")
+ scan_logs.append(f"📁 Scanning folder: {folder_path}")
+ yield gr.update(visible=False), gr.update(value="### Scanning...", visible=True), gr.update(visible=True), "\n".join(scan_logs), gr.update(visible=False), "Starting...", 0
+
+ # Find all HTML/XHTML files in the folder and subfolders
+ html_files = []
+ for root, dirs, files in os.walk(folder_path):
+ for file in files:
+ if file.lower().endswith(('.html', '.xhtml', '.htm')):
+ html_files.append(os.path.join(root, file))
+
+ if not html_files:
+ scan_logs.append(f"⚠️ No HTML/XHTML files found in {folder_path}")
+ yield gr.update(visible=False), gr.update(value="### ⚠️ No files found", visible=True), gr.update(visible=False), "\n".join(scan_logs), gr.update(visible=False), "No files to scan", 0
+ return
+
+ scan_logs.append(f"📄 Found {len(html_files)} HTML/XHTML files to scan")
+ scan_logs.append("⚡ Quick Scan Mode (85% threshold, Speed optimized)")
+ yield gr.update(visible=False), gr.update(value="### Initializing...", visible=True), gr.update(visible=True), "\n".join(scan_logs), gr.update(visible=False), "Initializing...", 10
+
+ # QA scanning process
+ total_files = len(html_files)
+ issues_found = []
+ chapters_scanned = set()
+
+ for i, file_path in enumerate(html_files):
+ if self.qa_scan_stop:
+ scan_logs.append("⚠️ Scan stopped by user")
+ break
+
+ # Get relative path from base folder for cleaner display
+ rel_path = os.path.relpath(file_path, folder_path)
+ file_name = rel_path.replace('\\', '/')
+
+ # Quick scan optimization: skip if we've already scanned similar chapters
+ # (consecutive chapter checking)
+ chapter_match = None
+ for pattern in ['chapter', 'ch', 'c']:
+ if pattern in file_name.lower():
+ import re
+ match = re.search(r'(\d+)', file_name)
+ if match:
+ chapter_num = int(match.group(1))
+ # Skip if we've already scanned nearby chapters (Quick Scan optimization)
+ if any(abs(chapter_num - ch) <= 1 for ch in chapters_scanned):
+ if len(chapters_scanned) > 5: # Only skip after scanning a few
+ continue
+ chapters_scanned.add(chapter_num)
+ break
+
+ scan_logs.append(f"\n🔍 Scanning: {file_name}")
+ progress = int(10 + (80 * i / total_files))
+ yield None, None, gr.update(visible=True), "\n".join(scan_logs), gr.update(visible=True), f"Scanning {file_name}...", progress
+
+ # Read and check the HTML file
+ try:
+ with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
+ content = f.read()
+
+ file_issues = []
+
+ # Check file length
+ if len(content) < min_file_length:
+ continue # Skip short files
+
+ # Check for foreign characters (simulation - would need actual implementation)
+ # In real implementation, would check for source language characters
+ import random
+
+ # Check for multiple headers
+ if check_multiple_headers:
+ import re
+ headers = re.findall(r']*>', content, re.IGNORECASE)
+ if len(headers) >= 2:
+ file_issues.append("Multiple headers detected")
+
+ # Check for missing html tag
+ if check_missing_html:
+ if ' tag")
+
+ # Check for insufficient paragraphs
+ if check_insufficient_paragraphs:
+ p_tags = content.count('
', content, re.DOTALL)
+ p_text_length = sum(len(t) for t in p_text)
+ percentage = (p_text_length / text_length) * 100
+ if percentage < min_paragraph_percentage:
+ file_issues.append(f"Only {percentage:.1f}% text in
tags")
+
+ # Simulated additional checks
+ if check_repetition and random.random() > 0.85:
+ file_issues.append("Excessive repetition detected")
+
+ if check_glossary_leakage and random.random() > 0.9:
+ file_issues.append("Glossary leakage detected")
+
+ # Report issues found
+ if file_issues:
+ for issue in file_issues:
+ issues_found.append(f" ⚠️ {file_name}: {issue}")
+ scan_logs.append(f" ⚠️ Issue: {issue}")
+ else:
+ scan_logs.append(f" ✅ No issues found")
+
+ except Exception as e:
+ scan_logs.append(f" ❌ Error reading file: {str(e)}")
+
+ # Update logs periodically
+ if len(scan_logs) > 100:
+ scan_logs = scan_logs[-100:] # Keep only last 100 logs
+
+ yield gr.update(visible=False), None, gr.update(visible=True), "\n".join(scan_logs), gr.update(visible=False), f"Scanning {file_name}...", progress
+
+ # Generate report
+ scan_logs.append("\n📝 Generating report...")
+ yield gr.update(visible=False), None, gr.update(visible=True), "\n".join(scan_logs), gr.update(visible=False), "Generating report...", 95
+
+ # Create report content based on selected format
+ if report_format == "summary":
+ # Summary format - brief overview only
+ report_content = "QA SCAN REPORT - SUMMARY\n"
+ report_content += "=" * 50 + "\n\n"
+ report_content += f"Total files scanned: {total_files}\n"
+ report_content += f"Issues found: {len(issues_found)}\n\n"
+ if issues_found:
+ report_content += f"Files with issues: {min(len(issues_found), 10)} (showing first 10)\n"
+ report_content += "\n".join(issues_found[:10])
+ else:
+ report_content += "✅ No issues detected."
+
+ elif report_format == "verbose":
+ # Verbose format - all data including passed files
+ report_content = "QA SCAN REPORT - VERBOSE (ALL DATA)\n"
+ report_content += "=" * 50 + "\n\n"
+ from datetime import datetime
+ report_content += f"Scan Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n"
+ report_content += f"Folder Scanned: {folder_path}\n"
+ report_content += f"Total files scanned: {total_files}\n"
+ report_content += f"Issues found: {len(issues_found)}\n"
+ report_content += f"Settings used:\n"
+ report_content += f" - Min foreign chars: {min_foreign_chars}\n"
+ report_content += f" - Check repetition: {check_repetition}\n"
+ report_content += f" - Check glossary leakage: {check_glossary_leakage}\n"
+ report_content += f" - Min file length: {min_file_length}\n"
+ report_content += f" - Check multiple headers: {check_multiple_headers}\n"
+ report_content += f" - Check missing HTML: {check_missing_html}\n"
+ report_content += f" - Check insufficient paragraphs: {check_insufficient_paragraphs}\n"
+ report_content += f" - Min paragraph percentage: {min_paragraph_percentage}%\n\n"
+
+ report_content += "ALL FILES PROCESSED:\n"
+ report_content += "-" * 30 + "\n"
+ for file in html_files:
+ rel_path = os.path.relpath(file, folder_path)
+ report_content += f" {rel_path}\n"
+
+ if issues_found:
+ report_content += "\n\nISSUES DETECTED (DETAILED):\n"
+ report_content += "\n".join(issues_found)
+ else:
+ report_content += "\n\n✅ No issues detected. All files passed scan."
+
+ else: # detailed (default/recommended)
+ # Detailed format - recommended balance
+ report_content = "QA SCAN REPORT - DETAILED\n"
+ report_content += "=" * 50 + "\n\n"
+ report_content += f"Total files scanned: {total_files}\n"
+ report_content += f"Issues found: {len(issues_found)}\n\n"
+
+ if issues_found:
+ report_content += "ISSUES DETECTED:\n"
+ report_content += "\n".join(issues_found)
+ else:
+ report_content += "No issues detected. All files passed quick scan."
+
+ # Always save report to file for download
+ from datetime import datetime
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+ report_filename = f"qa_scan_report_{timestamp}.txt"
+ report_path = os.path.join(os.getcwd(), report_filename)
+
+ # Always write the report file
+ with open(report_path, 'w', encoding='utf-8') as f:
+ f.write(report_content)
+
+ if auto_save_report:
+ scan_logs.append(f"💾 Report auto-saved to: {report_filename}")
+ else:
+ scan_logs.append(f"📄 Report ready for download: {report_filename}")
+
+ scan_logs.append(f"\n✅ QA Scan completed!")
+ scan_logs.append(f"📊 Summary: {total_files} files scanned, {len(issues_found)} issues found")
+ scan_logs.append(f"\n📥 Click 'Download QA Report' below to save the report")
+
+ # Always return the report path and make File component visible
+ final_status = f"✅ Scan complete!\n{total_files} files scanned\n{len(issues_found)} issues found"
+ yield gr.update(value=report_path, visible=True), gr.update(value=f"### {final_status}", visible=True), gr.update(visible=False), "\n".join(scan_logs), gr.update(value=final_status, visible=True), "Scan complete!", 100
+
+ except Exception as e:
+ import traceback
+ error_msg = f"❌ Error during QA scan:\n{str(e)}\n\n{traceback.format_exc()}"
+ scan_logs.append(error_msg)
+ yield gr.update(visible=False), gr.update(value="### ❌ Error occurred", visible=True), gr.update(visible=False), "\n".join(scan_logs), gr.update(visible=True), "Error occurred", 0
+
+ def run_qa_scan_with_stop(self, *args):
+ """Wrapper for run_qa_scan that includes button visibility control"""
+ self.qa_scan_stop = False
+
+ # Show stop button, hide scan button at start
+ for result in self.run_qa_scan(*args):
+ if self.qa_scan_stop:
+ # Scan was stopped
+ yield result[0], result[1], result[2], result[3] + "\n\n⚠️ Scan stopped by user", result[4], "Stopped", 0, gr.update(visible=True), gr.update(visible=False)
+ return
+ # Add button visibility updates to the yields
+ yield result[0], result[1], result[2], result[3], result[4], result[5], result[6], gr.update(visible=False), gr.update(visible=True)
+
+ # Reset buttons at the end
+ yield result[0], result[1], result[2], result[3], result[4], result[5], result[6], gr.update(visible=True), gr.update(visible=False)
+
+ def stop_qa_scan(self):
+ """Stop the ongoing QA scan"""
+ self.qa_scan_stop = True
+ return gr.update(visible=True), gr.update(visible=False), "Scan stopped"
+
+ def stop_translation(self):
+ """Stop the ongoing translation process"""
+ print(f"DEBUG: stop_translation called, was_translating={self.is_translating}")
+ if self.is_translating:
+ print("DEBUG: Setting stop flag and cancellation")
+ self.stop_flag.set()
+ self.is_translating = False
+
+ # Best-effort: cancel any in-flight API operation on the active client
+ try:
+ if getattr(self, 'current_unified_client', None):
+ self.current_unified_client.cancel_current_operation()
+ print("DEBUG: Requested UnifiedClient cancellation")
+ except Exception as e:
+ print(f"DEBUG: UnifiedClient cancel failed: {e}")
+
+ # Also propagate to MangaTranslator class if available
+ try:
+ if MANGA_TRANSLATION_AVAILABLE:
+ from manga_translator import MangaTranslator
+ MangaTranslator.set_global_cancellation(True)
+ print("DEBUG: Set MangaTranslator global cancellation")
+ except ImportError:
+ pass
+
+ # Also propagate to UnifiedClient if available
+ try:
+ if MANGA_TRANSLATION_AVAILABLE:
+ from unified_api_client import UnifiedClient
+ UnifiedClient.set_global_cancellation(True)
+ print("DEBUG: Set UnifiedClient global cancellation")
+ except ImportError:
+ pass
+
+ # Kick off translator shutdown to free resources quickly
+ try:
+ tr = getattr(self, 'current_translator', None)
+ if tr and hasattr(tr, 'shutdown'):
+ import threading as _th
+ _th.Thread(target=tr.shutdown, name="WebMangaTranslatorShutdown", daemon=True).start()
+ print("DEBUG: Initiated translator shutdown thread")
+ # Clear reference so a new start creates a fresh instance
+ self.current_translator = None
+ except Exception as e:
+ print(f"DEBUG: Failed to start translator shutdown: {e}")
+ else:
+ print("DEBUG: stop_translation called but not translating")
+
+ def _reset_translation_flags(self):
+ """Reset all translation flags for new translation"""
+ self.is_translating = False
+ self.stop_flag.clear()
+
+ # Reset global cancellation flags
+ try:
+ if MANGA_TRANSLATION_AVAILABLE:
+ from manga_translator import MangaTranslator
+ MangaTranslator.set_global_cancellation(False)
+ except ImportError:
+ pass
+
+ try:
+ if MANGA_TRANSLATION_AVAILABLE:
+ from unified_api_client import UnifiedClient
+ UnifiedClient.set_global_cancellation(False)
+ except ImportError:
+ pass
+
+ def translate_manga(
+ self,
+ image_files,
+ model,
+ api_key,
+ profile_name,
+ system_prompt,
+ ocr_provider,
+ google_creds_path,
+ azure_key,
+ azure_endpoint,
+ enable_bubble_detection,
+ enable_inpainting,
+ font_size_mode,
+ font_size,
+ font_multiplier,
+ min_font_size,
+ max_font_size,
+ text_color,
+ shadow_enabled,
+ shadow_color,
+ shadow_offset_x,
+ shadow_offset_y,
+ shadow_blur,
+ bg_opacity,
+ bg_style,
+ parallel_panel_translation=False,
+ panel_max_workers=10
+ ):
+ """Translate manga images - GENERATOR that yields (logs, image, cbz_file, status, progress_group, progress_text, progress_bar) updates"""
+
+ # Reset translation flags and set running state
+ self._reset_translation_flags()
+ self.is_translating = True
+
+ if not MANGA_TRANSLATION_AVAILABLE:
+ self.is_translating = False
+ yield "❌ Manga translation modules not loaded", None, None, gr.update(value="❌ Error", visible=True), gr.update(visible=False), gr.update(value="Error"), gr.update(value=0)
+ return
+
+ if not image_files:
+ self.is_translating = False
+ yield "❌ Please upload at least one image", gr.update(visible=False), gr.update(visible=False), gr.update(value="❌ Error", visible=True), gr.update(visible=False), gr.update(value="Error"), gr.update(value=0)
+ return
+
+ if not api_key:
+ self.is_translating = False
+ yield "❌ Please provide an API key", gr.update(visible=False), gr.update(visible=False), gr.update(value="❌ Error", visible=True), gr.update(visible=False), gr.update(value="Error"), gr.update(value=0)
+ return
+
+ # Check for stop request
+ if self.stop_flag.is_set():
+ self.is_translating = False
+ yield "⏹️ Translation stopped by user", gr.update(visible=False), gr.update(visible=False), gr.update(value="⏹️ Stopped", visible=True), gr.update(visible=False), gr.update(value="Stopped"), gr.update(value=0)
+ return
+
+ if ocr_provider == "google":
+ # Check if credentials are provided or saved in config
+ if not google_creds_path and not self.get_config_value('google_vision_credentials'):
+ yield "❌ Please provide Google Cloud credentials JSON file", gr.update(visible=False), gr.update(visible=False), gr.update(value="❌ Error", visible=True), gr.update(visible=False), gr.update(value="Error"), gr.update(value=0)
+ return
+
+ if ocr_provider == "azure":
+ # Ensure azure credentials are strings
+ azure_key_str = str(azure_key) if azure_key else ''
+ azure_endpoint_str = str(azure_endpoint) if azure_endpoint else ''
+ if not azure_key_str.strip() or not azure_endpoint_str.strip():
+ yield "❌ Please provide Azure API key and endpoint", gr.update(visible=False), gr.update(visible=False), gr.update(value="❌ Error", visible=True), gr.update(visible=False), gr.update(value="Error"), gr.update(value=0)
+ return
+
+ try:
+
+ # Set all environment variables from config
+ self.set_all_environment_variables()
+
+ # Set API key environment variable
+ if 'gpt' in model.lower() or 'openai' in model.lower():
+ os.environ['OPENAI_API_KEY'] = api_key
+ elif 'claude' in model.lower():
+ os.environ['ANTHROPIC_API_KEY'] = api_key
+ elif 'gemini' in model.lower():
+ os.environ['GOOGLE_API_KEY'] = api_key
+
+ # Set Google Cloud credentials if provided and save to config
+ if ocr_provider == "google":
+ if google_creds_path:
+ # New file provided - save it
+ creds_path = google_creds_path.name if hasattr(google_creds_path, 'name') else google_creds_path
+ os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = creds_path
+ # Auto-save to config
+ self.config['google_vision_credentials'] = creds_path
+ self.save_config(self.config)
+ elif self.get_config_value('google_vision_credentials'):
+ # Use saved credentials from config
+ creds_path = self.get_config_value('google_vision_credentials')
+ if os.path.exists(creds_path):
+ os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = creds_path
+ else:
+ yield f"❌ Saved Google credentials not found: {creds_path}", gr.update(visible=False), gr.update(visible=False), gr.update(value="❌ Error", visible=True), gr.update(visible=False), gr.update(value="Error"), gr.update(value=0)
+ return
+
+ # Set Azure credentials if provided and save to config
+ if ocr_provider == "azure":
+ # Convert to strings and strip whitespace
+ azure_key_str = str(azure_key).strip() if azure_key else ''
+ azure_endpoint_str = str(azure_endpoint).strip() if azure_endpoint else ''
+
+ os.environ['AZURE_VISION_KEY'] = azure_key_str
+ os.environ['AZURE_VISION_ENDPOINT'] = azure_endpoint_str
+ # Auto-save to config
+ self.config['azure_vision_key'] = azure_key_str
+ self.config['azure_vision_endpoint'] = azure_endpoint_str
+ self.save_config(self.config)
+
+ # Apply text visibility settings to config
+ # Convert hex color to RGB tuple
+ def hex_to_rgb(hex_color):
+ # Handle different color formats
+ if isinstance(hex_color, (list, tuple)):
+ # Already RGB format
+ return tuple(hex_color[:3])
+ elif isinstance(hex_color, str):
+ # Remove any brackets or spaces if present
+ hex_color = hex_color.strip().strip('[]').strip()
+ if hex_color.startswith('#'):
+ # Hex format
+ hex_color = hex_color.lstrip('#')
+ if len(hex_color) == 6:
+ return tuple(int(hex_color[i:i+2], 16) for i in (0, 2, 4))
+ elif len(hex_color) == 3:
+ # Short hex format like #FFF
+ return tuple(int(hex_color[i]*2, 16) for i in range(3))
+ elif ',' in hex_color:
+ # RGB string format like "255, 0, 0"
+ try:
+ parts = hex_color.split(',')
+ return tuple(int(p.strip()) for p in parts[:3])
+ except:
+ pass
+ # Default to black if parsing fails
+ return (0, 0, 0)
+
+ # Debug logging for color values
+ print(f"DEBUG: text_color type: {type(text_color)}, value: {text_color}")
+ print(f"DEBUG: shadow_color type: {type(shadow_color)}, value: {shadow_color}")
+
+ try:
+ text_rgb = hex_to_rgb(text_color)
+ shadow_rgb = hex_to_rgb(shadow_color)
+ except Exception as e:
+ print(f"WARNING: Error converting colors: {e}")
+ print(f"WARNING: Using default colors - text: black, shadow: white")
+ text_rgb = (0, 0, 0) # Default to black text
+ shadow_rgb = (255, 255, 255) # Default to white shadow
+
+ self.config['manga_font_size_mode'] = font_size_mode
+ self.config['manga_font_size'] = int(font_size)
+ self.config['manga_font_size_multiplier'] = float(font_multiplier)
+ self.config['manga_max_font_size'] = int(max_font_size)
+ self.config['manga_text_color'] = list(text_rgb)
+ self.config['manga_shadow_enabled'] = bool(shadow_enabled)
+ self.config['manga_shadow_color'] = list(shadow_rgb)
+ self.config['manga_shadow_offset_x'] = int(shadow_offset_x)
+ self.config['manga_shadow_offset_y'] = int(shadow_offset_y)
+ self.config['manga_shadow_blur'] = int(shadow_blur)
+ self.config['manga_bg_opacity'] = int(bg_opacity)
+ self.config['manga_bg_style'] = bg_style
+
+ # Also update nested manga_settings structure
+ if 'manga_settings' not in self.config:
+ self.config['manga_settings'] = {}
+ if 'rendering' not in self.config['manga_settings']:
+ self.config['manga_settings']['rendering'] = {}
+ if 'font_sizing' not in self.config['manga_settings']:
+ self.config['manga_settings']['font_sizing'] = {}
+
+ self.config['manga_settings']['rendering']['auto_min_size'] = int(min_font_size)
+ self.config['manga_settings']['font_sizing']['min_size'] = int(min_font_size)
+ self.config['manga_settings']['rendering']['auto_max_size'] = int(max_font_size)
+ self.config['manga_settings']['font_sizing']['max_size'] = int(max_font_size)
+
+ # Prepare output directory
+ output_dir = tempfile.mkdtemp(prefix="manga_translated_")
+ translated_files = []
+ cbz_mode = False
+ cbz_output_path = None
+
+ # Initialize translation logs early (needed for CBZ processing)
+ translation_logs = []
+
+ # Check if any file is a CBZ/ZIP archive
+ import zipfile
+ files_to_process = image_files if isinstance(image_files, list) else [image_files]
+ extracted_images = []
+
+ for file in files_to_process:
+ file_path = file.name if hasattr(file, 'name') else file
+ if file_path.lower().endswith(('.cbz', '.zip')):
+ # Extract CBZ
+ cbz_mode = True
+ translation_logs.append(f"📚 Extracting CBZ: {os.path.basename(file_path)}")
+ extract_dir = tempfile.mkdtemp(prefix="cbz_extract_")
+
+ try:
+ with zipfile.ZipFile(file_path, 'r') as zip_ref:
+ zip_ref.extractall(extract_dir)
+
+ # Find all image files in extracted directory
+ import glob
+ for ext in ['*.png', '*.jpg', '*.jpeg', '*.webp', '*.bmp', '*.gif']:
+ extracted_images.extend(glob.glob(os.path.join(extract_dir, '**', ext), recursive=True))
+
+ # Sort naturally (by filename)
+ extracted_images.sort()
+ translation_logs.append(f"✅ Extracted {len(extracted_images)} images from CBZ")
+
+ # Prepare CBZ output path
+ cbz_output_path = os.path.join(output_dir, f"{os.path.splitext(os.path.basename(file_path))[0]}_translated.cbz")
+ except Exception as e:
+ translation_logs.append(f"❌ Error extracting CBZ: {str(e)}")
+ else:
+ # Regular image file
+ extracted_images.append(file_path)
+
+ # Use extracted images if CBZ was processed, otherwise use original files
+ if extracted_images:
+ # Create mock file objects for extracted images
+ class MockFile:
+ def __init__(self, path):
+ self.name = path
+
+ files_to_process = [MockFile(img) for img in extracted_images]
+
+ total_images = len(files_to_process)
+
+ # Merge web app config with SimpleConfig for MangaTranslator
+ # This includes all the text visibility settings we just set
+ merged_config = self.config.copy()
+
+ # Override with web-specific settings
+ merged_config['model'] = model
+ merged_config['active_profile'] = profile_name
+
+ # Update manga_settings
+ if 'manga_settings' not in merged_config:
+ merged_config['manga_settings'] = {}
+ if 'ocr' not in merged_config['manga_settings']:
+ merged_config['manga_settings']['ocr'] = {}
+ if 'inpainting' not in merged_config['manga_settings']:
+ merged_config['manga_settings']['inpainting'] = {}
+ if 'advanced' not in merged_config['manga_settings']:
+ merged_config['manga_settings']['advanced'] = {}
+
+ merged_config['manga_settings']['ocr']['provider'] = ocr_provider
+ merged_config['manga_settings']['ocr']['bubble_detection_enabled'] = enable_bubble_detection
+ merged_config['manga_settings']['inpainting']['method'] = 'local' if enable_inpainting else 'none'
+ # Make sure local_method is set from config (defaults to anime)
+ if 'local_method' not in merged_config['manga_settings']['inpainting']:
+ merged_config['manga_settings']['inpainting']['local_method'] = self.get_config_value('manga_settings', {}).get('inpainting', {}).get('local_method', 'anime')
+
+ # Set parallel panel translation settings from config (Manga Settings tab)
+ # These are controlled in the Manga Settings tab, so reload config to get latest values
+ current_config = self.load_config()
+ if API_KEY_ENCRYPTION_AVAILABLE:
+ current_config = decrypt_config(current_config)
+
+ config_parallel = current_config.get('manga_settings', {}).get('advanced', {}).get('parallel_panel_translation', False)
+ config_max_workers = current_config.get('manga_settings', {}).get('advanced', {}).get('panel_max_workers', 10)
+
+ # Map web UI settings to MangaTranslator expected names
+ merged_config['manga_settings']['advanced']['parallel_panel_translation'] = config_parallel
+ merged_config['manga_settings']['advanced']['panel_max_workers'] = int(config_max_workers)
+ # CRITICAL: Also set the setting names that MangaTranslator actually checks
+ merged_config['manga_settings']['advanced']['parallel_processing'] = config_parallel
+ merged_config['manga_settings']['advanced']['max_workers'] = int(config_max_workers)
+
+ # Log the parallel settings being used
+ print(f"🔧 Reloaded config - Using parallel panel translation: {config_parallel}")
+ print(f"🔧 Reloaded config - Using panel max workers: {config_max_workers}")
+
+ # CRITICAL: Set skip_inpainting flag to False when inpainting is enabled
+ merged_config['manga_skip_inpainting'] = not enable_inpainting
+
+ # Create a simple config object for MangaTranslator
+ class SimpleConfig:
+ def __init__(self, cfg):
+ self.config = cfg
+
+ def get(self, key, default=None):
+ return self.config.get(key, default)
+
+ # Create mock GUI object with necessary attributes
+ class MockGUI:
+ def __init__(self, config, profile_name, system_prompt, max_output_tokens, api_key, model):
+ self.config = config
+ # Add profile_var mock for MangaTranslator compatibility
+ class ProfileVar:
+ def __init__(self, profile):
+ self.profile = str(profile) if profile else ''
+ def get(self):
+ return self.profile
+ self.profile_var = ProfileVar(profile_name)
+ # Add prompt_profiles BOTH to config AND as attribute (manga_translator checks both)
+ if 'prompt_profiles' not in self.config:
+ self.config['prompt_profiles'] = {}
+ self.config['prompt_profiles'][profile_name] = system_prompt
+ # Also set as direct attribute for line 4653 check
+ self.prompt_profiles = self.config['prompt_profiles']
+ # Add max_output_tokens as direct attribute (line 299 check)
+ self.max_output_tokens = max_output_tokens
+ # Add mock GUI attributes that MangaTranslator expects
+ class MockVar:
+ def __init__(self, val):
+ # Ensure val is properly typed
+ self.val = val
+ def get(self):
+ return self.val
+ self.delay_entry = MockVar(float(config.get('delay', 2.0)))
+ self.trans_temp = MockVar(float(config.get('translation_temperature', 0.3)))
+ self.contextual_var = MockVar(bool(config.get('contextual', False)))
+ self.trans_history = MockVar(int(config.get('translation_history_limit', 2)))
+ self.translation_history_rolling_var = MockVar(bool(config.get('translation_history_rolling', False)))
+ self.token_limit_disabled = bool(config.get('token_limit_disabled', False))
+ # IMPORTANT: token_limit_entry must return STRING because manga_translator calls .strip() on it
+ self.token_limit_entry = MockVar(str(config.get('token_limit', 200000)))
+ # Add API key and model for custom-api OCR provider - ensure strings
+ self.api_key_entry = MockVar(str(api_key) if api_key else '')
+ self.model_var = MockVar(str(model) if model else '')
+
+ simple_config = SimpleConfig(merged_config)
+ # Get max_output_tokens from config or use from web app config
+ web_max_tokens = merged_config.get('max_output_tokens', 16000)
+ mock_gui = MockGUI(simple_config.config, profile_name, system_prompt, web_max_tokens, api_key, model)
+
+ # Ensure model path is in config for local inpainting
+ if enable_inpainting:
+ local_method = merged_config.get('manga_settings', {}).get('inpainting', {}).get('local_method', 'anime')
+ # Set the model path key that MangaTranslator expects
+ model_path_key = f'manga_{local_method}_model_path'
+ if model_path_key not in merged_config:
+ # Use default model path or empty string
+ default_model_path = self.get_config_value(model_path_key, '')
+ merged_config[model_path_key] = default_model_path
+ print(f"Set {model_path_key} to: {default_model_path}")
+
+ # Setup OCR configuration
+ ocr_config = {
+ 'provider': ocr_provider
+ }
+
+ if ocr_provider == 'google':
+ ocr_config['google_credentials_path'] = google_creds_path.name if google_creds_path else None
+ elif ocr_provider == 'azure':
+ # Use string versions
+ azure_key_str = str(azure_key).strip() if azure_key else ''
+ azure_endpoint_str = str(azure_endpoint).strip() if azure_endpoint else ''
+ ocr_config['azure_key'] = azure_key_str
+ ocr_config['azure_endpoint'] = azure_endpoint_str
+
+ # Create UnifiedClient for translation API calls
+ try:
+ unified_client = UnifiedClient(
+ api_key=api_key,
+ model=model,
+ output_dir=output_dir
+ )
+ # Store reference for stop() cancellation support
+ self.current_unified_client = unified_client
+ except Exception as e:
+ error_log = f"❌ Failed to initialize API client: {str(e)}"
+ yield error_log, gr.update(visible=False), gr.update(visible=False), gr.update(value=error_log, visible=True), gr.update(visible=False), gr.update(value="Error"), gr.update(value=0)
+ return
+
+ # Log storage - will be yielded as live updates
+ last_yield_log_count = [0] # Track when we last yielded
+ last_yield_time = [0] # Track last yield time
+
+ # Track current image being processed
+ current_image_idx = [0]
+
+ import time
+
+ def should_yield_logs():
+ """Check if we should yield log updates (every 2 logs or 1 second)"""
+ current_time = time.time()
+ log_count_diff = len(translation_logs) - last_yield_log_count[0]
+ time_diff = current_time - last_yield_time[0]
+
+ # Yield if 2+ new logs OR 1+ seconds passed
+ return log_count_diff >= 2 or time_diff >= 1.0
+
+ def capture_log(msg, level="info"):
+ """Capture logs - caller will yield periodically"""
+ if msg and msg.strip():
+ log_msg = msg.strip()
+ translation_logs.append(log_msg)
+
+ # Initialize timing
+ last_yield_time[0] = time.time()
+
+ # Create MangaTranslator instance
+ try:
+ # Debug: Log inpainting config
+ inpaint_cfg = merged_config.get('manga_settings', {}).get('inpainting', {})
+ print(f"\n=== INPAINTING CONFIG DEBUG ===")
+ print(f"Inpainting enabled checkbox: {enable_inpainting}")
+ print(f"Inpainting method: {inpaint_cfg.get('method')}")
+ print(f"Local method: {inpaint_cfg.get('local_method')}")
+ print(f"Full inpainting config: {inpaint_cfg}")
+ print("=== END DEBUG ===\n")
+
+ translator = MangaTranslator(
+ ocr_config=ocr_config,
+ unified_client=unified_client,
+ main_gui=mock_gui,
+ log_callback=capture_log
+ )
+
+ # Keep a reference for stop/shutdown support
+ self.current_translator = translator
+
+ # Connect stop flag so translator can react immediately to stop requests
+ if hasattr(translator, 'set_stop_flag'):
+ try:
+ translator.set_stop_flag(self.stop_flag)
+ except Exception:
+ pass
+
+ # CRITICAL: Set skip_inpainting flag directly on translator instance
+ translator.skip_inpainting = not enable_inpainting
+ print(f"Set translator.skip_inpainting = {translator.skip_inpainting}")
+
+ # Explicitly initialize local inpainting if enabled
+ if enable_inpainting:
+ print(f"🎨 Initializing local inpainting...")
+ try:
+ # Force initialization of the inpainter
+ init_result = translator._initialize_local_inpainter()
+ if init_result:
+ print(f"✅ Local inpainter initialized successfully")
+ else:
+ print(f"⚠️ Local inpainter initialization returned False")
+ except Exception as init_error:
+ print(f"❌ Failed to initialize inpainter: {init_error}")
+ import traceback
+ traceback.print_exc()
+
+ except Exception as e:
+ import traceback
+ full_error = traceback.format_exc()
+ print(f"\n\n=== MANGA TRANSLATOR INIT ERROR ===")
+ print(full_error)
+ print(f"\nocr_config: {ocr_config}")
+ print(f"\nmock_gui.model_var.get(): {mock_gui.model_var.get()}")
+ print(f"\nmock_gui.api_key_entry.get(): {type(mock_gui.api_key_entry.get())}")
+ print("=== END ERROR ===")
+ error_log = f"❌ Failed to initialize manga translator: {str(e)}\n\nCheck console for full traceback"
+ yield error_log, gr.update(visible=False), gr.update(visible=False), gr.update(value=error_log, visible=True), gr.update(visible=False), gr.update(value="Error"), gr.update(value=0)
+ return
+
+ # Process each image with real progress tracking
+ for idx, img_file in enumerate(files_to_process, 1):
+ try:
+ # Check for stop request before processing each image
+ if self.stop_flag.is_set():
+ translation_logs.append(f"\n⏹️ Translation stopped by user before image {idx}/{total_images}")
+ self.is_translating = False
+ yield "\n".join(translation_logs), gr.update(visible=False), gr.update(visible=False), gr.update(value="⏹️ Translation stopped", visible=True), gr.update(visible=True), gr.update(value="Stopped"), gr.update(value=0)
+ return
+
+ # Update current image index for log capture
+ current_image_idx[0] = idx
+
+ # Calculate progress range for this image
+ start_progress = (idx - 1) / total_images
+ end_progress = idx / total_images
+
+ input_path = img_file.name if hasattr(img_file, 'name') else img_file
+ output_path = os.path.join(output_dir, f"translated_{os.path.basename(input_path)}")
+ filename = os.path.basename(input_path)
+
+ # Log start of processing and YIELD update
+ start_msg = f"🎨 [{idx}/{total_images}] Starting: {filename}"
+ translation_logs.append(start_msg)
+ translation_logs.append(f"Image path: {input_path}")
+ translation_logs.append(f"Processing with OCR: {ocr_provider}, Model: {model}")
+ translation_logs.append("-" * 60)
+
+ # Yield initial log update with progress
+ progress_percent = int(((idx - 1) / total_images) * 100)
+ status_text = f"Processing {idx}/{total_images}: {filename}"
+ last_yield_log_count[0] = len(translation_logs)
+ last_yield_time[0] = time.time()
+ yield "\n".join(translation_logs), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(value=status_text), gr.update(value=progress_percent)
+
+ # Start processing in a thread so we can yield logs periodically
+ import threading
+ processing_complete = [False]
+ result_container = [None]
+
+ def process_wrapper():
+ result_container[0] = translator.process_image(
+ image_path=input_path,
+ output_path=output_path,
+ batch_index=idx,
+ batch_total=total_images
+ )
+ processing_complete[0] = True
+
+ # Start processing in background
+ process_thread = threading.Thread(target=process_wrapper, daemon=True)
+ process_thread.start()
+
+ # Poll for log updates while processing
+ while not processing_complete[0]:
+ time.sleep(0.5) # Check every 0.5 seconds
+
+ # Check for stop request during processing
+ if self.stop_flag.is_set():
+ translation_logs.append(f"\n⏹️ Translation stopped by user while processing image {idx}/{total_images}")
+ self.is_translating = False
+ yield "\n".join(translation_logs), gr.update(visible=False), gr.update(visible=False), gr.update(value="⏹️ Translation stopped", visible=True), gr.update(visible=True), gr.update(value="Stopped"), gr.update(value=0)
+ return
+
+ if should_yield_logs():
+ progress_percent = int(((idx - 0.5) / total_images) * 100) # Mid-processing
+ status_text = f"Processing {idx}/{total_images}: {filename} (in progress...)"
+ last_yield_log_count[0] = len(translation_logs)
+ last_yield_time[0] = time.time()
+ yield "\n".join(translation_logs), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(value=status_text), gr.update(value=progress_percent)
+
+ # Wait for thread to complete
+ process_thread.join(timeout=1)
+ result = result_container[0]
+
+ if result.get('success'):
+ # Use the output path from the result
+ final_output = result.get('output_path', output_path)
+ if os.path.exists(final_output):
+ translated_files.append(final_output)
+ translation_logs.append(f"✅ Image {idx}/{total_images} COMPLETE: {filename} | Total: {len(translated_files)}/{total_images} done")
+ translation_logs.append("")
+ # Yield progress update with all translated images so far
+ progress_percent = int((idx / total_images) * 100)
+ status_text = f"Completed {idx}/{total_images}: {filename}"
+ # Show all translated files as gallery
+ yield "\n".join(translation_logs), gr.update(value=translated_files, visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(value=status_text), gr.update(value=progress_percent)
+ else:
+ translation_logs.append(f"⚠️ Image {idx}/{total_images}: Output file missing for {filename}")
+ translation_logs.append(f"⚠️ Warning: Output file not found for image {idx}")
+ translation_logs.append("")
+ # Yield progress update
+ progress_percent = int((idx / total_images) * 100)
+ status_text = f"Warning: {idx}/{total_images} - Output missing for {filename}"
+ yield "\n".join(translation_logs), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(value=status_text), gr.update(value=progress_percent)
+ else:
+ errors = result.get('errors', [])
+ error_msg = errors[0] if errors else 'Unknown error'
+ translation_logs.append(f"❌ Image {idx}/{total_images} FAILED: {error_msg[:50]}")
+ translation_logs.append(f"⚠️ Error on image {idx}: {error_msg}")
+ translation_logs.append("")
+ # Yield progress update
+ progress_percent = int((idx / total_images) * 100)
+ status_text = f"Failed: {idx}/{total_images} - {filename}"
+ yield "\n".join(translation_logs), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(value=status_text), gr.update(value=progress_percent)
+
+ # If translation failed, save original with error overlay
+ from PIL import Image as PILImage, ImageDraw, ImageFont
+ img = PILImage.open(input_path)
+ draw = ImageDraw.Draw(img)
+ # Add error message
+ draw.text((10, 10), f"Translation Error: {error_msg[:50]}", fill="red")
+ img.save(output_path)
+ translated_files.append(output_path)
+
+ except Exception as e:
+ import traceback
+ error_trace = traceback.format_exc()
+ translation_logs.append(f"❌ Image {idx}/{total_images} ERROR: {str(e)[:60]}")
+ translation_logs.append(f"❌ Exception on image {idx}: {str(e)}")
+ print(f"Manga translation error for {input_path}:\n{error_trace}")
+
+ # Save original on error
+ try:
+ from PIL import Image as PILImage
+ img = PILImage.open(input_path)
+ img.save(output_path)
+ translated_files.append(output_path)
+ except:
+ pass
+ continue
+
+ # Check for stop request before final processing
+ if self.stop_flag.is_set():
+ translation_logs.append("\n⏹️ Translation stopped by user")
+ self.is_translating = False
+ yield "\n".join(translation_logs), gr.update(visible=False), gr.update(visible=False), gr.update(value="⏹️ Translation stopped", visible=True), gr.update(visible=True), gr.update(value="Stopped"), gr.update(value=0)
+ return
+
+ # Add completion message
+ translation_logs.append("\n" + "="*60)
+ translation_logs.append(f"✅ ALL COMPLETE! Successfully translated {len(translated_files)}/{total_images} images")
+ translation_logs.append("="*60)
+
+ # If CBZ mode, compile translated images into CBZ archive
+ final_output_for_display = None
+ if cbz_mode and cbz_output_path and translated_files:
+ translation_logs.append("\n📦 Compiling translated images into CBZ archive...")
+ try:
+ with zipfile.ZipFile(cbz_output_path, 'w', zipfile.ZIP_DEFLATED) as cbz:
+ for img_path in translated_files:
+ # Preserve original filename structure
+ arcname = os.path.basename(img_path).replace("translated_", "")
+ cbz.write(img_path, arcname)
+
+ translation_logs.append(f"✅ CBZ archive created: {os.path.basename(cbz_output_path)}")
+ translation_logs.append(f"📁 Archive location: {cbz_output_path}")
+ final_output_for_display = cbz_output_path
+ except Exception as e:
+ translation_logs.append(f"❌ Error creating CBZ: {str(e)}")
+
+ # Build final status with detailed panel information
+ final_status_lines = []
+ if translated_files:
+ final_status_lines.append(f"✅ Successfully translated {len(translated_files)}/{total_images} image(s)!")
+ final_status_lines.append("")
+ final_status_lines.append("🖼️ **Translated Panels:**")
+ for i, file_path in enumerate(translated_files, 1):
+ filename = os.path.basename(file_path)
+ final_status_lines.append(f" {i}. {filename}")
+
+ final_status_lines.append("")
+ final_status_lines.append("🔄 **Download Options:**")
+ if cbz_mode and cbz_output_path:
+ final_status_lines.append(f" 📦 CBZ Archive: {os.path.basename(cbz_output_path)}")
+ final_status_lines.append(f" 📁 Location: {cbz_output_path}")
+ else:
+ final_status_lines.append(f" 📁 Output directory: {output_dir}")
+ final_status_lines.append(" 🖼️ Individual images: Click on images in gallery above to download")
+ else:
+ final_status_lines.append("❌ Translation failed - no images were processed")
+
+ final_status_text = "\n".join(final_status_lines)
+
+ # Final yield with complete logs, image, CBZ, and final status
+ # Format: (logs_textbox, output_image, cbz_file, status_textbox, progress_group, progress_text, progress_bar)
+ final_progress_text = f"Complete! Processed {len(translated_files)}/{total_images} images"
+ if translated_files:
+ # Show all translated images in gallery
+ if cbz_mode and cbz_output_path and os.path.exists(cbz_output_path):
+ yield (
+ "\n".join(translation_logs),
+ gr.update(value=translated_files, visible=True), # Show all images in gallery
+ gr.update(value=cbz_output_path, visible=True), # CBZ file for download with visibility
+ gr.update(value=final_status_text, visible=True),
+ gr.update(visible=True),
+ gr.update(value=final_progress_text),
+ gr.update(value=100)
+ )
+ else:
+ yield (
+ "\n".join(translation_logs),
+ gr.update(value=translated_files, visible=True), # Show all images in gallery
+ gr.update(visible=False), # Hide CBZ component
+ gr.update(value=final_status_text, visible=True),
+ gr.update(visible=True),
+ gr.update(value=final_progress_text),
+ gr.update(value=100)
+ )
+ else:
+ yield (
+ "\n".join(translation_logs),
+ gr.update(visible=False),
+ gr.update(visible=False), # Hide CBZ component
+ gr.update(value=final_status_text, visible=True),
+ gr.update(visible=True),
+ gr.update(value=final_progress_text),
+ gr.update(value=0) # 0% if nothing was processed
+ )
+
+ except Exception as e:
+ import traceback
+ error_msg = f"❌ Error during manga translation:\n{str(e)}\n\n{traceback.format_exc()}"
+ self.is_translating = False
+ yield error_msg, gr.update(visible=False), gr.update(visible=False), gr.update(value=error_msg, visible=True), gr.update(visible=False), gr.update(value="Error occurred"), gr.update(value=0)
+ finally:
+ # Always reset translation state when done
+ self.is_translating = False
+ # Clear active references on full completion
+ try:
+ self.current_translator = None
+ self.current_unified_client = None
+ except Exception:
+ pass
+
+ def stop_manga_translation(self):
+ """Simple function to stop manga translation"""
+ print("DEBUG: Stop button clicked")
+ if self.is_translating:
+ print("DEBUG: Stopping active translation")
+ self.stop_translation()
+ # Return UI updates for button visibility and status
+ return (
+ gr.update(visible=True), # translate button - show
+ gr.update(visible=False), # stop button - hide
+ "⏹️ Translation stopped by user"
+ )
+ else:
+ print("DEBUG: No active translation to stop")
+ return (
+ gr.update(visible=True), # translate button - show
+ gr.update(visible=False), # stop button - hide
+ "No active translation to stop"
+ )
+
+ def start_manga_translation(self, *args):
+ """Simple function to start manga translation - GENERATOR FUNCTION"""
+ print("DEBUG: Translate button clicked")
+
+ # Reset flags for new translation and mark as translating BEFORE first yield
+ self._reset_translation_flags()
+ self.is_translating = True
+
+ # Initial yield to update button visibility
+ yield (
+ "🚀 Starting translation...",
+ gr.update(visible=False), # manga_output_gallery - hide initially
+ gr.update(visible=False), # manga_cbz_output
+ gr.update(value="Starting...", visible=True), # manga_status
+ gr.update(visible=False), # manga_progress_group
+ gr.update(value="Initializing..."), # manga_progress_text
+ gr.update(value=0), # manga_progress_bar
+ gr.update(visible=False), # translate button - hide during translation
+ gr.update(visible=True) # stop button - show during translation
+ )
+
+ # Call the translate function and yield all its results
+ last_result = None
+ try:
+ for result in self.translate_manga(*args):
+ # Check if stop was requested during iteration
+ if self.stop_flag.is_set():
+ print("DEBUG: Stop flag detected, breaking translation loop")
+ break
+
+ last_result = result
+ # Pad result to include button states (translate_visible=False, stop_visible=True)
+ if len(result) >= 7:
+ yield result + (gr.update(visible=False), gr.update(visible=True))
+ else:
+ # Pad result to match expected length (7 values) then add button states
+ padded_result = list(result) + [gr.update(visible=False)] * (7 - len(result))
+ yield tuple(padded_result) + (gr.update(visible=False), gr.update(visible=True))
+
+ except GeneratorExit:
+ print("DEBUG: Translation generator was closed")
+ self.is_translating = False
+ return
+ except Exception as e:
+ print(f"DEBUG: Exception during translation: {e}")
+ self.is_translating = False
+ # Show error and reset buttons
+ error_msg = f"❌ Error during translation: {str(e)}"
+ yield (
+ error_msg,
+ gr.update(visible=False),
+ gr.update(visible=False),
+ gr.update(value=error_msg, visible=True),
+ gr.update(visible=False),
+ gr.update(value="Error occurred"),
+ gr.update(value=0),
+ gr.update(visible=True), # translate button - show after error
+ gr.update(visible=False) # stop button - hide after error
+ )
+ return
+ finally:
+ # Clear active references when the loop exits
+ self.is_translating = False
+ try:
+ self.current_translator = None
+ self.current_unified_client = None
+ except Exception:
+ pass
+
+ # Check if we stopped early
+ if self.stop_flag.is_set():
+ yield (
+ "⏹️ Translation stopped by user",
+ gr.update(visible=False),
+ gr.update(visible=False),
+ gr.update(value="⏹️ Translation stopped", visible=True),
+ gr.update(visible=False),
+ gr.update(value="Stopped"),
+ gr.update(value=0),
+ gr.update(visible=True), # translate button - show after stop
+ gr.update(visible=False) # stop button - hide after stop
+ )
+ return
+
+ # Final yield to reset buttons after successful completion
+ print("DEBUG: Translation completed normally, resetting buttons")
+ if last_result is None:
+ last_result = ("", gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(value="Complete"), gr.update(value=100))
+
+ if len(last_result) >= 7:
+ yield last_result[:7] + (gr.update(visible=True), gr.update(visible=False))
+ else:
+ # Pad result to match expected length then add button states
+ padded_result = list(last_result) + [gr.update(visible=False)] * (7 - len(last_result))
+ yield tuple(padded_result) + (gr.update(visible=True), gr.update(visible=False))
+
+ def create_interface(self):
+ """Create and return the Gradio interface"""
+ # Reload config before creating interface to get latest values
+ self.config = self.load_config()
+ self.decrypted_config = decrypt_config(self.config.copy()) if API_KEY_ENCRYPTION_AVAILABLE else self.config.copy()
+
+ # Load and encode icon as base64
+ icon_base64 = ""
+ icon_path = "Halgakos.ico" if os.path.exists("Halgakos.ico") else "Halgakos.ico"
+ if os.path.exists(icon_path):
+ with open(icon_path, "rb") as f:
+ icon_base64 = base64.b64encode(f.read()).decode()
+
+ # Custom CSS to hide Gradio footer and add favicon
+ custom_css = """
+ footer {display: none !important;}
+ .gradio-container {min-height: 100vh;}
+
+ /* Stop button styling */
+ .gr-button[data-variant="stop"] {
+ background-color: #dc3545 !important;
+ border-color: #dc3545 !important;
+ color: white !important;
+ }
+ .gr-button[data-variant="stop"]:hover {
+ background-color: #c82333 !important;
+ border-color: #bd2130 !important;
+ color: white !important;
+ }
+ """
+
+ # JavaScript for localStorage persistence - SIMPLE VERSION
+ localStorage_js = """
+
+ """
+
+ with gr.Blocks(
+ title="Glossarion - AI Translation",
+ theme=gr.themes.Soft(),
+ css=custom_css
+ ) as app:
+
+ # Add custom HTML with favicon link and title with icon
+ icon_img_tag = f'' if icon_base64 else ''
+
+ gr.HTML(f"""
+
+
+
+
+ {icon_img_tag}
+
Glossarion - AI-Powered Translation
+
+ {localStorage_js}
+ """)
+
+ with gr.Row():
+ gr.Markdown("""
+ Translate novels and books using advanced AI models (GPT-5, Claude, etc.)
+ """)
+
+
+ # SECURITY: Save Config button disabled for Hugging Face to prevent API key leakage
+ # Users should use localStorage (browser-based storage) instead
+ # with gr.Column(scale=0):
+ # save_config_btn = gr.Button(
+ # "💾 Save Config",
+ # variant="secondary",
+ # size="sm"
+ # )
+ # save_status_text = gr.Markdown(
+ # "",
+ # visible=False
+ # )
+
+ with gr.Tabs() as main_tabs:
+ # EPUB Translation Tab
+ with gr.Tab("📚 EPUB Translation"):
+ with gr.Row():
+ with gr.Column():
+ epub_file = gr.File(
+ label="📖 Upload EPUB or TXT File",
+ file_types=[".epub", ".txt"]
+ )
+
+ with gr.Row():
+ translate_btn = gr.Button(
+ "🚀 Translate EPUB",
+ variant="primary",
+ size="lg",
+ scale=2
+ )
+
+ stop_epub_btn = gr.Button(
+ "⏹️ Stop Translation",
+ variant="stop",
+ size="lg",
+ visible=False,
+ scale=1
+ )
+
+ epub_model = gr.Dropdown(
+ choices=self.models,
+ value=self.get_config_value('model', 'gpt-4-turbo'),
+ label="🤖 AI Model",
+ interactive=True,
+ allow_custom_value=True,
+ filterable=True
+ )
+
+ epub_api_key = gr.Textbox(
+ label="🔑 API Key",
+ type="password",
+ placeholder="Enter your API key",
+ value=self.get_config_value('api_key', '')
+ )
+
+ # Use all profiles without filtering
+ profile_choices = list(self.profiles.keys())
+ # Use saved active_profile instead of hardcoded default
+ default_profile = self.get_config_value('active_profile', profile_choices[0] if profile_choices else '')
+
+ epub_profile = gr.Dropdown(
+ choices=profile_choices,
+ value=default_profile,
+ label="📝 Translation Profile"
+ )
+
+ epub_system_prompt = gr.Textbox(
+ label="System Prompt (Translation Instructions)",
+ lines=8,
+ max_lines=15,
+ interactive=True,
+ placeholder="Select a profile to load translation instructions...",
+ value=self.profiles.get(default_profile, '') if default_profile else ''
+ )
+
+ with gr.Accordion("⚙️ Advanced Settings", open=False):
+ epub_temperature = gr.Slider(
+ minimum=0,
+ maximum=1,
+ value=self.get_config_value('temperature', 0.3),
+ step=0.1,
+ label="Temperature"
+ )
+
+ epub_max_tokens = gr.Number(
+ label="Max Output Tokens",
+ value=self.get_config_value('max_output_tokens', 16000),
+ minimum=0
+ )
+
+ gr.Markdown("### Image Translation")
+
+ enable_image_translation = gr.Checkbox(
+ label="Enable Image Translation",
+ value=self.get_config_value('enable_image_translation', False),
+ info="Extracts and translates text from images using vision models"
+ )
+
+ gr.Markdown("### Glossary Settings")
+
+ enable_auto_glossary = gr.Checkbox(
+ label="Enable Automatic Glossary Generation",
+ value=self.get_config_value('enable_auto_glossary', False),
+ info="Automatic extraction and translation of character names/terms"
+ )
+
+ append_glossary = gr.Checkbox(
+ label="Append Glossary to System Prompt",
+ value=self.get_config_value('append_glossary_to_prompt', True),
+ info="Applies to ALL glossaries - manual and automatic"
+ )
+
+ # Automatic glossary extraction settings (only show when enabled)
+ with gr.Group(visible=self.get_config_value('enable_auto_glossary', False)) as auto_glossary_settings:
+ gr.Markdown("#### Automatic Glossary Extraction Settings")
+
+ with gr.Row():
+ auto_glossary_min_freq = gr.Slider(
+ minimum=1,
+ maximum=10,
+ value=self.get_config_value('glossary_min_frequency', 2),
+ step=1,
+ label="Min Frequency",
+ info="Minimum times a name must appear"
+ )
+
+ auto_glossary_max_names = gr.Slider(
+ minimum=10,
+ maximum=200,
+ value=self.get_config_value('glossary_max_names', 50),
+ step=10,
+ label="Max Names",
+ info="Maximum number of character names"
+ )
+
+ with gr.Row():
+ auto_glossary_max_titles = gr.Slider(
+ minimum=10,
+ maximum=100,
+ value=self.get_config_value('glossary_max_titles', 30),
+ step=5,
+ label="Max Titles",
+ info="Maximum number of titles/terms"
+ )
+
+ auto_glossary_batch_size = gr.Slider(
+ minimum=10,
+ maximum=100,
+ value=self.get_config_value('glossary_batch_size', 50),
+ step=5,
+ label="Translation Batch Size",
+ info="Terms per API call"
+ )
+
+ auto_glossary_filter_mode = gr.Radio(
+ choices=[
+ ("All names & terms", "all"),
+ ("Names with honorifics only", "only_with_honorifics"),
+ ("Names without honorifics & terms", "only_without_honorifics")
+ ],
+ value=self.get_config_value('glossary_filter_mode', 'all'),
+ label="Filter Mode",
+ info="What types of names to extract"
+ )
+
+ auto_glossary_fuzzy_threshold = gr.Slider(
+ minimum=0.5,
+ maximum=1.0,
+ value=self.get_config_value('glossary_fuzzy_threshold', 0.90),
+ step=0.05,
+ label="Fuzzy Matching Threshold",
+ info="How similar names must be to match (0.9 = 90% match)"
+ )
+
+ # Toggle visibility of auto glossary settings
+ enable_auto_glossary.change(
+ fn=lambda x: gr.update(visible=x),
+ inputs=[enable_auto_glossary],
+ outputs=[auto_glossary_settings]
+ )
+
+ gr.Markdown("### Quality Assurance")
+
+ enable_post_translation_scan = gr.Checkbox(
+ label="Enable post-translation Scanning phase",
+ value=self.get_config_value('enable_post_translation_scan', False),
+ info="Automatically run QA Scanner after translation completes"
+ )
+
+ glossary_file = gr.File(
+ label="📋 Manual Glossary CSV (optional)",
+ file_types=[".csv", ".json", ".txt"]
+ )
+
+ with gr.Column():
+ # Add logo and status at top
+ with gr.Row():
+ gr.Image(
+ value="Halgakos.png",
+ label=None,
+ show_label=False,
+ width=80,
+ height=80,
+ interactive=False,
+ show_download_button=False,
+ container=False
+ )
+ epub_status_message = gr.Markdown(
+ value="### Ready to translate\nUpload an EPUB or TXT file and click 'Translate' to begin.",
+ visible=True
+ )
+
+ # Progress section (similar to manga tab)
+ with gr.Group(visible=False) as epub_progress_group:
+ gr.Markdown("### Progress")
+ epub_progress_text = gr.Textbox(
+ label="📨 Current Status",
+ value="Ready to start",
+ interactive=False,
+ lines=1
+ )
+ epub_progress_bar = gr.Slider(
+ minimum=0,
+ maximum=100,
+ value=0,
+ step=1,
+ label="📋 Translation Progress",
+ interactive=False,
+ show_label=True
+ )
+
+ epub_logs = gr.Textbox(
+ label="📋 Translation Logs",
+ lines=20,
+ max_lines=30,
+ value="Ready to translate. Upload an EPUB or TXT file and configure settings.",
+ visible=True,
+ interactive=False
+ )
+
+ epub_output = gr.File(
+ label="📥 Download Translated File",
+ visible=True # Always visible, will show file when ready
+ )
+
+ epub_status = gr.Textbox(
+ label="Final Status",
+ lines=3,
+ max_lines=5,
+ visible=False,
+ interactive=False
+ )
+
+ # Sync handlers will be connected after manga components are created
+
+ # Translation button handler - now with progress outputs
+ translate_btn.click(
+ fn=self.translate_epub_with_stop,
+ inputs=[
+ epub_file,
+ epub_model,
+ epub_api_key,
+ epub_profile,
+ epub_system_prompt,
+ epub_temperature,
+ epub_max_tokens,
+ enable_image_translation,
+ glossary_file
+ ],
+ outputs=[
+ epub_output, # Download file
+ epub_status_message, # Top status message
+ epub_progress_group, # Progress group visibility
+ epub_logs, # Translation logs
+ epub_status, # Final status
+ epub_progress_text, # Progress text
+ epub_progress_bar, # Progress bar
+ translate_btn, # Show/hide translate button
+ stop_epub_btn # Show/hide stop button
+ ]
+ )
+
+ # Stop button handler
+ stop_epub_btn.click(
+ fn=self.stop_epub_translation,
+ inputs=[],
+ outputs=[translate_btn, stop_epub_btn, epub_status]
+ )
+
+ # Manga Translation Tab
+ with gr.Tab("🎨 Manga Translation"):
+ with gr.Row():
+ with gr.Column():
+ manga_images = gr.File(
+ label="🖼️ Upload Manga Images or CBZ",
+ file_types=[".png", ".jpg", ".jpeg", ".webp", ".bmp", ".gif", ".cbz", ".zip"],
+ file_count="multiple"
+ )
+
+ with gr.Row():
+ translate_manga_btn = gr.Button(
+ "🚀 Translate Manga",
+ variant="primary",
+ size="lg",
+ scale=2
+ )
+
+ stop_manga_btn = gr.Button(
+ "⏹️ Stop Translation",
+ variant="stop",
+ size="lg",
+ visible=False,
+ scale=1
+ )
+
+ manga_model = gr.Dropdown(
+ choices=self.models,
+ value=self.get_config_value('model', 'gpt-4-turbo'),
+ label="🤖 AI Model",
+ interactive=True,
+ allow_custom_value=True,
+ filterable=True
+ )
+
+ manga_api_key = gr.Textbox(
+ label="🔑 API Key",
+ type="password",
+ placeholder="Enter your API key",
+ value=self.get_config_value('api_key', '') # Pre-fill from config
+ )
+
+ # Use all profiles without filtering
+ profile_choices = list(self.profiles.keys())
+ # Use the active profile from config, same as EPUB tab
+ default_profile = self.get_config_value('active_profile', profile_choices[0] if profile_choices else '')
+
+ manga_profile = gr.Dropdown(
+ choices=profile_choices,
+ value=default_profile,
+ label="📝 Translation Profile"
+ )
+
+ # Editable manga system prompt
+ manga_system_prompt = gr.Textbox(
+ label="Manga System Prompt (Translation Instructions)",
+ lines=8,
+ max_lines=15,
+ interactive=True,
+ placeholder="Select a manga profile to load translation instructions...",
+ value=self.profiles.get(default_profile, '') if default_profile else ''
+ )
+
+ with gr.Accordion("⚙️ OCR Settings", open=False):
+ gr.Markdown("🔒 **Credentials are auto-saved** to your config (encrypted) after first use.")
+
+ ocr_provider = gr.Radio(
+ choices=["google", "azure", "custom-api"],
+ value=self.get_config_value('ocr_provider', 'custom-api'),
+ label="OCR Provider"
+ )
+
+ # Show saved Google credentials path if available
+ saved_google_path = self.get_config_value('google_vision_credentials', '')
+ if saved_google_path and os.path.exists(saved_google_path):
+ gr.Markdown(f"✅ **Saved credentials found:** `{os.path.basename(saved_google_path)}`")
+ gr.Markdown("💡 *Using saved credentials. Upload a new file only if you want to change them.*")
+ else:
+ gr.Markdown("⚠️ No saved Google credentials found. Please upload your JSON file.")
+
+ # Note: File component doesn't support pre-filling paths due to browser security
+ google_creds = gr.File(
+ label="Google Cloud Credentials JSON (upload to update)",
+ file_types=[".json"]
+ )
+
+ azure_key = gr.Textbox(
+ label="Azure Vision API Key (if using Azure)",
+ type="password",
+ placeholder="Enter Azure API key",
+ value=self.get_config_value('azure_vision_key', '')
+ )
+
+ azure_endpoint = gr.Textbox(
+ label="Azure Vision Endpoint (if using Azure)",
+ placeholder="https://your-resource.cognitiveservices.azure.com/",
+ value=self.get_config_value('azure_vision_endpoint', '')
+ )
+
+ bubble_detection = gr.Checkbox(
+ label="Enable Bubble Detection",
+ value=self.get_config_value('bubble_detection_enabled', True)
+ )
+
+ inpainting = gr.Checkbox(
+ label="Enable Text Removal (Inpainting)",
+ value=self.get_config_value('inpainting_enabled', True)
+ )
+
+ with gr.Accordion("⚡ Parallel Processing", open=False):
+ gr.Markdown("### Parallel Panel Translation")
+ gr.Markdown("*Process multiple panels simultaneously for faster translation*")
+
+ # Check environment variables first, then config
+ parallel_enabled = os.getenv('PARALLEL_PANEL_TRANSLATION', '').lower() == 'true'
+ if not parallel_enabled:
+ # Fall back to config if not set in env
+ parallel_enabled = self.get_config_value('manga_settings', {}).get('advanced', {}).get('parallel_panel_translation', False)
+
+ # Get max workers from env or config
+ max_workers_env = os.getenv('PANEL_MAX_WORKERS', '')
+ if max_workers_env.isdigit():
+ max_workers = int(max_workers_env)
+ else:
+ max_workers = self.get_config_value('manga_settings', {}).get('advanced', {}).get('panel_max_workers', 7)
+
+ parallel_panel_translation = gr.Checkbox(
+ label="Enable Parallel Panel Translation",
+ value=parallel_enabled,
+ info="Translates multiple panels at once instead of sequentially"
+ )
+
+ panel_max_workers = gr.Slider(
+ minimum=1,
+ maximum=20,
+ value=max_workers,
+ step=1,
+ label="Max concurrent panels",
+ interactive=True,
+ info="Number of panels to process simultaneously (higher = faster but more memory)"
+ )
+
+ with gr.Accordion("✨ Text Visibility Settings", open=False):
+ gr.Markdown("### Font Settings")
+
+ font_size_mode = gr.Radio(
+ choices=["auto", "fixed", "multiplier"],
+ value=self.get_config_value('manga_font_size_mode', 'auto'),
+ label="Font Size Mode"
+ )
+
+ font_size = gr.Slider(
+ minimum=0,
+ maximum=72,
+ value=self.get_config_value('manga_font_size', 24),
+ step=1,
+ label="Fixed Font Size (0=auto, used when mode=fixed)"
+ )
+
+ font_multiplier = gr.Slider(
+ minimum=0.5,
+ maximum=2.0,
+ value=self.get_config_value('manga_font_size_multiplier', 1.0),
+ step=0.1,
+ label="Font Size Multiplier (when mode=multiplier)"
+ )
+
+ min_font_size = gr.Slider(
+ minimum=0,
+ maximum=100,
+ value=self.get_config_value('manga_settings', {}).get('rendering', {}).get('auto_min_size', 12),
+ step=1,
+ label="Minimum Font Size (0=no limit)"
+ )
+
+ max_font_size = gr.Slider(
+ minimum=20,
+ maximum=100,
+ value=self.get_config_value('manga_max_font_size', 48),
+ step=1,
+ label="Maximum Font Size"
+ )
+
+ gr.Markdown("### Text Color")
+
+ # Convert RGB array to hex if needed
+ def to_hex_color(color_value, default='#000000'):
+ if isinstance(color_value, (list, tuple)) and len(color_value) >= 3:
+ return '#{:02x}{:02x}{:02x}'.format(int(color_value[0]), int(color_value[1]), int(color_value[2]))
+ elif isinstance(color_value, str):
+ return color_value if color_value.startswith('#') else default
+ return default
+
+ text_color_rgb = gr.ColorPicker(
+ label="Font Color",
+ value=to_hex_color(self.get_config_value('manga_text_color', [255, 255, 255]), '#FFFFFF') # Default white
+ )
+
+ gr.Markdown("### Shadow Settings")
+
+ shadow_enabled = gr.Checkbox(
+ label="Enable Text Shadow",
+ value=self.get_config_value('manga_shadow_enabled', True)
+ )
+
+ shadow_color = gr.ColorPicker(
+ label="Shadow Color",
+ value=to_hex_color(self.get_config_value('manga_shadow_color', [0, 0, 0]), '#000000') # Default black
+ )
+
+ shadow_offset_x = gr.Slider(
+ minimum=-10,
+ maximum=10,
+ value=self.get_config_value('manga_shadow_offset_x', 2),
+ step=1,
+ label="Shadow Offset X"
+ )
+
+ shadow_offset_y = gr.Slider(
+ minimum=-10,
+ maximum=10,
+ value=self.get_config_value('manga_shadow_offset_y', 2),
+ step=1,
+ label="Shadow Offset Y"
+ )
+
+ shadow_blur = gr.Slider(
+ minimum=0,
+ maximum=10,
+ value=self.get_config_value('manga_shadow_blur', 0),
+ step=1,
+ label="Shadow Blur"
+ )
+
+ gr.Markdown("### Background Settings")
+
+ bg_opacity = gr.Slider(
+ minimum=0,
+ maximum=255,
+ value=self.get_config_value('manga_bg_opacity', 130),
+ step=1,
+ label="Background Opacity"
+ )
+
+ # Ensure bg_style value is valid
+ bg_style_value = self.get_config_value('manga_bg_style', 'circle')
+ if bg_style_value not in ["box", "circle", "wrap"]:
+ bg_style_value = 'circle' # Default fallback
+
+ bg_style = gr.Radio(
+ choices=["box", "circle", "wrap"],
+ value=bg_style_value,
+ label="Background Style"
+ )
+
+ with gr.Column():
+ # Add logo and loading message at top
+ with gr.Row():
+ gr.Image(
+ value="Halgakos.png",
+ label=None,
+ show_label=False,
+ width=80,
+ height=80,
+ interactive=False,
+ show_download_button=False,
+ container=False
+ )
+ status_message = gr.Markdown(
+ value="### Ready to translate\nUpload an image and click 'Translate Manga' to begin.",
+ visible=True
+ )
+
+ # Progress section for manga translation (similar to manga integration script)
+ with gr.Group(visible=False) as manga_progress_group:
+ gr.Markdown("### Progress")
+ manga_progress_text = gr.Textbox(
+ label="📈 Current Status",
+ value="Ready to start",
+ interactive=False,
+ lines=1
+ )
+ manga_progress_bar = gr.Slider(
+ minimum=0,
+ maximum=100,
+ value=0,
+ step=1,
+ label="📋 Translation Progress",
+ interactive=False,
+ show_label=True
+ )
+
+ manga_logs = gr.Textbox(
+ label="📋 Translation Logs",
+ lines=20,
+ max_lines=30,
+ value="Ready to translate. Click 'Translate Manga' to begin.",
+ visible=True,
+ interactive=False
+ )
+
+ # Use Gallery to show all translated images
+ manga_output_gallery = gr.Gallery(
+ label="📷 Translated Images (click to download)",
+ visible=False,
+ show_label=True,
+ elem_id="manga_output_gallery",
+ columns=3,
+ rows=2,
+ height="auto",
+ allow_preview=True,
+ show_download_button=True # Allow download of individual images
+ )
+ # Keep CBZ output for bulk download
+ manga_cbz_output = gr.File(label="📦 Download Translated CBZ", visible=False)
+ manga_status = gr.Textbox(
+ label="Final Status",
+ lines=8,
+ max_lines=15,
+ visible=False
+ )
+
+ # Global sync flag to prevent loops
+ self._syncing_active = False
+
+ # Auto-save Azure credentials on change
+ def save_azure_credentials(key, endpoint):
+ """Save Azure credentials to config"""
+ try:
+ current_config = self.get_current_config_for_update()
+ # Don't decrypt - just update what we need
+ if key and key.strip():
+ current_config['azure_vision_key'] = str(key).strip()
+ if endpoint and endpoint.strip():
+ current_config['azure_vision_endpoint'] = str(endpoint).strip()
+ self.save_config(current_config)
+ return None
+ except Exception as e:
+ print(f"Failed to save Azure credentials: {e}")
+ return None
+
+ # All auto-save handlers removed - use manual Save Config button to avoid constant writes to persistent storage
+
+ # Only update system prompts when profiles change - no cross-tab syncing
+ epub_profile.change(
+ fn=lambda p: self.profiles.get(p, ''),
+ inputs=[epub_profile],
+ outputs=[epub_system_prompt]
+ )
+
+ manga_profile.change(
+ fn=lambda p: self.profiles.get(p, ''),
+ inputs=[manga_profile],
+ outputs=[manga_system_prompt]
+ )
+
+ # Manual save function for all configuration
+ def save_all_config(
+ model, api_key, profile, temperature, max_tokens,
+ enable_image_trans, enable_auto_gloss, append_gloss,
+ # Auto glossary settings
+ auto_gloss_min_freq, auto_gloss_max_names, auto_gloss_max_titles,
+ auto_gloss_batch_size, auto_gloss_filter_mode, auto_gloss_fuzzy,
+ enable_post_scan,
+ # Manual glossary extraction settings
+ manual_min_freq, manual_max_names, manual_max_titles,
+ manual_max_text_size, manual_max_sentences, manual_trans_batch,
+ manual_chapter_split, manual_filter_mode, manual_strip_honorifics,
+ manual_fuzzy, manual_extraction_prompt, manual_format_instructions,
+ manual_use_legacy_csv,
+ # QA Scanner settings
+ qa_min_foreign, qa_check_rep, qa_check_gloss_leak,
+ qa_min_file_len, qa_check_headers, qa_check_html,
+ qa_check_paragraphs, qa_min_para_percent, qa_report_fmt, qa_auto_save,
+ # Chapter processing options
+ batch_trans_headers, headers_batch, ncx_nav, attach_css, retain_ext,
+ conservative_batch, gemini_safety, http_openrouter, openrouter_compress,
+ extraction_method, filter_level,
+ # Thinking mode settings
+ gpt_thinking_enabled, gpt_effort, or_tokens,
+ gemini_thinking_enabled, gemini_budget,
+ manga_model, manga_api_key, manga_profile,
+ ocr_prov, azure_k, azure_e,
+ bubble_det, inpaint,
+ font_mode, font_s, font_mult, min_font, max_font,
+ text_col, shadow_en, shadow_col,
+ shadow_x, shadow_y, shadow_b,
+ bg_op, bg_st,
+ parallel_trans, panel_workers,
+ # Advanced Settings fields
+ detector_type_val, rtdetr_conf, bubble_conf,
+ detect_text, detect_empty, detect_free, max_detections,
+ local_method_val, webtoon_val,
+ batch_size_val, cache_enabled_val,
+ parallel_proc, max_work,
+ preload_local, stagger_ms,
+ torch_prec, auto_cleanup,
+ debug, save_inter, concise_logs
+ ):
+ """Save all configuration values at once"""
+ try:
+ config = self.get_current_config_for_update()
+
+ # Save all values
+ config['model'] = model
+ if api_key: # Only save non-empty API keys
+ config['api_key'] = api_key
+ config['active_profile'] = profile
+ config['temperature'] = temperature
+ config['max_output_tokens'] = max_tokens
+ config['enable_image_translation'] = enable_image_trans
+ config['enable_auto_glossary'] = enable_auto_gloss
+ config['append_glossary_to_prompt'] = append_gloss
+
+ # Auto glossary settings
+ config['glossary_min_frequency'] = auto_gloss_min_freq
+ config['glossary_max_names'] = auto_gloss_max_names
+ config['glossary_max_titles'] = auto_gloss_max_titles
+ config['glossary_batch_size'] = auto_gloss_batch_size
+ config['glossary_filter_mode'] = auto_gloss_filter_mode
+ config['glossary_fuzzy_threshold'] = auto_gloss_fuzzy
+
+ # Manual glossary extraction settings
+ config['manual_glossary_min_frequency'] = manual_min_freq
+ config['manual_glossary_max_names'] = manual_max_names
+ config['manual_glossary_max_titles'] = manual_max_titles
+ config['glossary_max_text_size'] = manual_max_text_size
+ config['glossary_max_sentences'] = manual_max_sentences
+ config['manual_glossary_batch_size'] = manual_trans_batch
+ config['glossary_chapter_split_threshold'] = manual_chapter_split
+ config['manual_glossary_filter_mode'] = manual_filter_mode
+ config['strip_honorifics'] = manual_strip_honorifics
+ config['manual_glossary_fuzzy_threshold'] = manual_fuzzy
+ config['manual_glossary_prompt'] = manual_extraction_prompt
+ config['glossary_format_instructions'] = manual_format_instructions
+ config['glossary_use_legacy_csv'] = manual_use_legacy_csv
+ config['enable_post_translation_scan'] = enable_post_scan
+
+ # QA Scanner settings
+ config['qa_min_foreign_chars'] = qa_min_foreign
+ config['qa_check_repetition'] = qa_check_rep
+ config['qa_check_glossary_leakage'] = qa_check_gloss_leak
+ config['qa_min_file_length'] = qa_min_file_len
+ config['qa_check_multiple_headers'] = qa_check_headers
+ config['qa_check_missing_html'] = qa_check_html
+ config['qa_check_insufficient_paragraphs'] = qa_check_paragraphs
+ config['qa_min_paragraph_percentage'] = qa_min_para_percent
+ config['qa_report_format'] = qa_report_fmt
+ config['qa_auto_save_report'] = qa_auto_save
+
+ # Chapter processing options
+ config['batch_translate_headers'] = batch_trans_headers
+ config['headers_per_batch'] = headers_batch
+ config['use_ncx_navigation'] = ncx_nav
+ config['attach_css_to_chapters'] = attach_css
+ config['retain_source_extension'] = retain_ext
+ config['use_conservative_batching'] = conservative_batch
+ config['disable_gemini_safety'] = gemini_safety
+ config['use_http_openrouter'] = http_openrouter
+ config['disable_openrouter_compression'] = openrouter_compress
+ config['text_extraction_method'] = extraction_method
+ config['file_filtering_level'] = filter_level
+
+ # Thinking mode settings
+ config['enable_gpt_thinking'] = gpt_thinking_enabled
+ config['gpt_thinking_effort'] = gpt_effort
+ config['or_thinking_tokens'] = or_tokens
+ config['enable_gemini_thinking'] = gemini_thinking_enabled
+ config['gemini_thinking_budget'] = gemini_budget
+
+ # Manga settings
+ config['ocr_provider'] = ocr_prov
+ if azure_k:
+ config['azure_vision_key'] = azure_k
+ if azure_e:
+ config['azure_vision_endpoint'] = azure_e
+ config['bubble_detection_enabled'] = bubble_det
+ config['inpainting_enabled'] = inpaint
+ config['manga_font_size_mode'] = font_mode
+ config['manga_font_size'] = font_s
+ config['manga_font_multiplier'] = font_mult
+ config['manga_min_font_size'] = min_font
+ config['manga_max_font_size'] = max_font
+ config['manga_text_color'] = text_col
+ config['manga_shadow_enabled'] = shadow_en
+ config['manga_shadow_color'] = shadow_col
+ config['manga_shadow_offset_x'] = shadow_x
+ config['manga_shadow_offset_y'] = shadow_y
+ config['manga_shadow_blur'] = shadow_b
+ config['manga_bg_opacity'] = bg_op
+ config['manga_bg_style'] = bg_st
+
+ # Advanced settings
+ if 'manga_settings' not in config:
+ config['manga_settings'] = {}
+ if 'advanced' not in config['manga_settings']:
+ config['manga_settings']['advanced'] = {}
+ config['manga_settings']['advanced']['parallel_panel_translation'] = parallel_trans
+ config['manga_settings']['advanced']['panel_max_workers'] = panel_workers
+
+ # Advanced bubble detection and inpainting settings
+ if 'ocr' not in config['manga_settings']:
+ config['manga_settings']['ocr'] = {}
+ if 'inpainting' not in config['manga_settings']:
+ config['manga_settings']['inpainting'] = {}
+
+ config['manga_settings']['ocr']['detector_type'] = detector_type_val
+ config['manga_settings']['ocr']['rtdetr_confidence'] = rtdetr_conf
+ config['manga_settings']['ocr']['bubble_confidence'] = bubble_conf
+ config['manga_settings']['ocr']['detect_text_bubbles'] = detect_text
+ config['manga_settings']['ocr']['detect_empty_bubbles'] = detect_empty
+ config['manga_settings']['ocr']['detect_free_text'] = detect_free
+ config['manga_settings']['ocr']['bubble_max_detections_yolo'] = max_detections
+ config['manga_settings']['inpainting']['local_method'] = local_method_val
+ config['manga_settings']['advanced']['webtoon_mode'] = webtoon_val
+ config['manga_settings']['inpainting']['batch_size'] = batch_size_val
+ config['manga_settings']['inpainting']['enable_cache'] = cache_enabled_val
+ config['manga_settings']['advanced']['parallel_processing'] = parallel_proc
+ config['manga_settings']['advanced']['max_workers'] = max_work
+ config['manga_settings']['advanced']['preload_local_inpainting_for_panels'] = preload_local
+ config['manga_settings']['advanced']['panel_start_stagger_ms'] = stagger_ms
+ config['manga_settings']['advanced']['torch_precision'] = torch_prec
+ config['manga_settings']['advanced']['auto_cleanup_models'] = auto_cleanup
+ config['manga_settings']['advanced']['debug_mode'] = debug
+ config['manga_settings']['advanced']['save_intermediate'] = save_inter
+ config['concise_pipeline_logs'] = concise_logs
+
+ # Save to file
+ result = self.save_config(config)
+
+ # Show success message for 3 seconds
+ return gr.update(value=result, visible=True)
+
+ except Exception as e:
+ return gr.update(value=f"❌ Save failed: {str(e)}", visible=True)
+
+ # Save button will be configured after all components are created
+
+ # Auto-hide status message after 3 seconds
+ def hide_status_after_delay():
+ import time
+ time.sleep(3)
+ return gr.update(visible=False)
+
+ # Note: We can't use the change event to auto-hide because it would trigger immediately
+ # The status will remain visible until manually dismissed or page refresh
+
+ # All individual field auto-save handlers removed - use manual Save Config button instead
+
+ # Translate button click handler
+ translate_manga_btn.click(
+ fn=self.start_manga_translation,
+ inputs=[
+ manga_images,
+ manga_model,
+ manga_api_key,
+ manga_profile,
+ manga_system_prompt,
+ ocr_provider,
+ google_creds,
+ azure_key,
+ azure_endpoint,
+ bubble_detection,
+ inpainting,
+ font_size_mode,
+ font_size,
+ font_multiplier,
+ min_font_size,
+ max_font_size,
+ text_color_rgb,
+ shadow_enabled,
+ shadow_color,
+ shadow_offset_x,
+ shadow_offset_y,
+ shadow_blur,
+ bg_opacity,
+ bg_style,
+ parallel_panel_translation,
+ panel_max_workers
+ ],
+ outputs=[manga_logs, manga_output_gallery, manga_cbz_output, manga_status, manga_progress_group, manga_progress_text, manga_progress_bar, translate_manga_btn, stop_manga_btn]
+ )
+
+ # Stop button click handler
+ stop_manga_btn.click(
+ fn=self.stop_manga_translation,
+ inputs=[],
+ outputs=[translate_manga_btn, stop_manga_btn, manga_status]
+ )
+
+ # Load settings from localStorage on page load
+ def load_settings_from_storage():
+ """Load settings from localStorage or config file"""
+ is_hf_spaces = os.getenv('SPACE_ID') is not None or os.getenv('HF_SPACES') == 'true'
+
+ if not is_hf_spaces:
+ # Load from config file locally
+ config = self.load_config()
+ # Decrypt API keys if needed
+ if API_KEY_ENCRYPTION_AVAILABLE:
+ config = decrypt_config(config)
+ return [
+ config.get('model', 'gpt-4-turbo'),
+ config.get('api_key', ''),
+ config.get('active_profile', list(self.profiles.keys())[0] if self.profiles else ''), # profile
+ self.profiles.get(config.get('active_profile', list(self.profiles.keys())[0] if self.profiles else ''), ''), # prompt
+ config.get('ocr_provider', 'custom-api'),
+ None, # google_creds (file component - can't be pre-filled)
+ config.get('azure_vision_key', ''),
+ config.get('azure_vision_endpoint', ''),
+ config.get('bubble_detection_enabled', True),
+ config.get('inpainting_enabled', True),
+ config.get('manga_font_size_mode', 'auto'),
+ config.get('manga_font_size', 24),
+ config.get('manga_font_multiplier', 1.0),
+ config.get('manga_min_font_size', 12),
+ config.get('manga_max_font_size', 48),
+ config.get('manga_text_color', [255, 255, 255]), # Default white text
+ config.get('manga_shadow_enabled', True),
+ config.get('manga_shadow_color', [0, 0, 0]), # Default black shadow
+ config.get('manga_shadow_offset_x', 2),
+ config.get('manga_shadow_offset_y', 2),
+ config.get('manga_shadow_blur', 0),
+ config.get('manga_bg_opacity', 180),
+ config.get('manga_bg_style', 'auto'),
+ config.get('manga_settings', {}).get('advanced', {}).get('parallel_panel_translation', False),
+ config.get('manga_settings', {}).get('advanced', {}).get('panel_max_workers', 7)
+ ]
+ else:
+ # For HF Spaces, return defaults (will be overridden by JS)
+ return [
+ 'gpt-4-turbo', # model
+ '', # api_key
+ list(self.profiles.keys())[0] if self.profiles else '', # profile
+ self.profiles.get(list(self.profiles.keys())[0] if self.profiles else '', ''), # prompt
+ 'custom-api', # ocr_provider
+ None, # google_creds (file component - can't be pre-filled)
+ '', # azure_key
+ '', # azure_endpoint
+ True, # bubble_detection
+ True, # inpainting
+ 'auto', # font_size_mode
+ 24, # font_size
+ 1.0, # font_multiplier
+ 12, # min_font_size
+ 48, # max_font_size
+ '#FFFFFF', # text_color - white
+ True, # shadow_enabled
+ '#000000', # shadow_color - black
+ 2, # shadow_offset_x
+ 2, # shadow_offset_y
+ 0, # shadow_blur
+ 180, # bg_opacity
+ 'auto', # bg_style
+ False, # parallel_panel_translation
+ 7 # panel_max_workers
+ ]
+
+ # Store references for load handler
+ self.manga_components = {
+ 'model': manga_model,
+ 'api_key': manga_api_key,
+ 'profile': manga_profile,
+ 'prompt': manga_system_prompt,
+ 'ocr_provider': ocr_provider,
+ 'google_creds': google_creds,
+ 'azure_key': azure_key,
+ 'azure_endpoint': azure_endpoint,
+ 'bubble_detection': bubble_detection,
+ 'inpainting': inpainting,
+ 'font_size_mode': font_size_mode,
+ 'font_size': font_size,
+ 'font_multiplier': font_multiplier,
+ 'min_font_size': min_font_size,
+ 'max_font_size': max_font_size,
+ 'text_color_rgb': text_color_rgb,
+ 'shadow_enabled': shadow_enabled,
+ 'shadow_color': shadow_color,
+ 'shadow_offset_x': shadow_offset_x,
+ 'shadow_offset_y': shadow_offset_y,
+ 'shadow_blur': shadow_blur,
+ 'bg_opacity': bg_opacity,
+ 'bg_style': bg_style,
+ 'parallel_panel_translation': parallel_panel_translation,
+ 'panel_max_workers': panel_max_workers
+ }
+ self.load_settings_fn = load_settings_from_storage
+
+ # Manga Settings Tab - NEW
+ with gr.Tab("🎬 Manga Settings"):
+ gr.Markdown("### Advanced Manga Translation Settings")
+ gr.Markdown("Configure bubble detection, inpainting, preprocessing, and rendering options.")
+
+ with gr.Accordion("🕹️ Bubble Detection & Inpainting", open=True):
+ gr.Markdown("#### Bubble Detection")
+
+ detector_type = gr.Radio(
+ choices=["rtdetr_onnx", "rtdetr", "yolo"],
+ value=self.get_config_value('manga_settings', {}).get('ocr', {}).get('detector_type', 'rtdetr_onnx'),
+ label="Detector Type",
+ interactive=True
+ )
+
+ rtdetr_confidence = gr.Slider(
+ minimum=0.0,
+ maximum=1.0,
+ value=self.get_config_value('manga_settings', {}).get('ocr', {}).get('rtdetr_confidence', 0.3),
+ step=0.05,
+ label="RT-DETR Confidence Threshold",
+ interactive=True
+ )
+
+ bubble_confidence = gr.Slider(
+ minimum=0.0,
+ maximum=1.0,
+ value=self.get_config_value('manga_settings', {}).get('ocr', {}).get('bubble_confidence', 0.3),
+ step=0.05,
+ label="YOLO Bubble Confidence Threshold",
+ interactive=True
+ )
+
+ detect_text_bubbles = gr.Checkbox(
+ label="Detect Text Bubbles",
+ value=self.get_config_value('manga_settings', {}).get('ocr', {}).get('detect_text_bubbles', True)
+ )
+
+ detect_empty_bubbles = gr.Checkbox(
+ label="Detect Empty Bubbles",
+ value=self.get_config_value('manga_settings', {}).get('ocr', {}).get('detect_empty_bubbles', True)
+ )
+
+ detect_free_text = gr.Checkbox(
+ label="Detect Free Text (outside bubbles)",
+ value=self.get_config_value('manga_settings', {}).get('ocr', {}).get('detect_free_text', True)
+ )
+
+ bubble_max_detections = gr.Slider(
+ minimum=1,
+ maximum=2000,
+ value=self.get_config_value('manga_settings', {}).get('ocr', {}).get('bubble_max_detections_yolo', 100),
+ step=1,
+ label="Max detections (YOLO only)",
+ interactive=True,
+ info="Maximum number of bubble detections for YOLO detector"
+ )
+
+ gr.Markdown("#### Inpainting")
+
+ local_inpaint_method = gr.Radio(
+ choices=["anime_onnx", "anime", "lama", "lama_onnx", "aot", "aot_onnx"],
+ value=self.get_config_value('manga_settings', {}).get('inpainting', {}).get('local_method', 'anime_onnx'),
+ label="Local Inpainting Model",
+ interactive=True
+ )
+
+ with gr.Row():
+ download_models_btn = gr.Button(
+ "📥 Download Models",
+ variant="secondary",
+ size="sm"
+ )
+ load_models_btn = gr.Button(
+ "📂 Load Models",
+ variant="secondary",
+ size="sm"
+ )
+
+ gr.Markdown("#### Mask Dilation")
+
+ auto_iterations = gr.Checkbox(
+ label="Auto Iterations (Recommended)",
+ value=self.get_config_value('manga_settings', {}).get('auto_iterations', True)
+ )
+
+ mask_dilation = gr.Slider(
+ minimum=0,
+ maximum=20,
+ value=self.get_config_value('manga_settings', {}).get('mask_dilation', 0),
+ step=1,
+ label="General Mask Dilation",
+ interactive=True
+ )
+
+ text_bubble_dilation = gr.Slider(
+ minimum=0,
+ maximum=20,
+ value=self.get_config_value('manga_settings', {}).get('text_bubble_dilation_iterations', 2),
+ step=1,
+ label="Text Bubble Dilation Iterations",
+ interactive=True
+ )
+
+ empty_bubble_dilation = gr.Slider(
+ minimum=0,
+ maximum=20,
+ value=self.get_config_value('manga_settings', {}).get('empty_bubble_dilation_iterations', 3),
+ step=1,
+ label="Empty Bubble Dilation Iterations",
+ interactive=True
+ )
+
+ free_text_dilation = gr.Slider(
+ minimum=0,
+ maximum=20,
+ value=self.get_config_value('manga_settings', {}).get('free_text_dilation_iterations', 3),
+ step=1,
+ label="Free Text Dilation Iterations",
+ interactive=True
+ )
+
+ with gr.Accordion("🖌️ Image Preprocessing", open=False):
+ preprocessing_enabled = gr.Checkbox(
+ label="Enable Preprocessing",
+ value=self.get_config_value('manga_settings', {}).get('preprocessing', {}).get('enabled', False)
+ )
+
+ auto_detect_quality = gr.Checkbox(
+ label="Auto Detect Image Quality",
+ value=self.get_config_value('manga_settings', {}).get('preprocessing', {}).get('auto_detect_quality', True)
+ )
+
+ enhancement_strength = gr.Slider(
+ minimum=1.0,
+ maximum=3.0,
+ value=self.get_config_value('manga_settings', {}).get('preprocessing', {}).get('enhancement_strength', 1.5),
+ step=0.1,
+ label="Enhancement Strength",
+ interactive=True
+ )
+
+ denoise_strength = gr.Slider(
+ minimum=0,
+ maximum=50,
+ value=self.get_config_value('manga_settings', {}).get('preprocessing', {}).get('denoise_strength', 10),
+ step=1,
+ label="Denoise Strength",
+ interactive=True
+ )
+
+ max_image_dimension = gr.Number(
+ label="Max Image Dimension (pixels)",
+ value=self.get_config_value('manga_settings', {}).get('preprocessing', {}).get('max_image_dimension', 2000),
+ minimum=500
+ )
+
+ chunk_height = gr.Number(
+ label="Chunk Height for Large Images",
+ value=self.get_config_value('manga_settings', {}).get('preprocessing', {}).get('chunk_height', 1000),
+ minimum=500
+ )
+
+ gr.Markdown("#### HD Strategy for Inpainting")
+ gr.Markdown("*Controls how large images are processed during inpainting*")
+
+ hd_strategy = gr.Radio(
+ choices=["original", "resize", "crop"],
+ value=self.get_config_value('manga_settings', {}).get('advanced', {}).get('hd_strategy', 'resize'),
+ label="HD Strategy",
+ interactive=True,
+ info="original = legacy full-image; resize/crop = faster"
+ )
+
+ hd_strategy_resize_limit = gr.Slider(
+ minimum=512,
+ maximum=4096,
+ value=self.get_config_value('manga_settings', {}).get('advanced', {}).get('hd_strategy_resize_limit', 1536),
+ step=64,
+ label="Resize Limit (long edge, px)",
+ info="For resize strategy",
+ interactive=True
+ )
+
+ hd_strategy_crop_margin = gr.Slider(
+ minimum=0,
+ maximum=256,
+ value=self.get_config_value('manga_settings', {}).get('advanced', {}).get('hd_strategy_crop_margin', 16),
+ step=2,
+ label="Crop Margin (px)",
+ info="For crop strategy",
+ interactive=True
+ )
+
+ hd_strategy_crop_trigger = gr.Slider(
+ minimum=256,
+ maximum=4096,
+ value=self.get_config_value('manga_settings', {}).get('advanced', {}).get('hd_strategy_crop_trigger_size', 1024),
+ step=64,
+ label="Crop Trigger Size (px)",
+ info="Apply crop only if long edge exceeds this",
+ interactive=True
+ )
+
+ gr.Markdown("#### Image Tiling")
+ gr.Markdown("*Alternative tiling strategy (note: HD Strategy takes precedence)*")
+
+ tiling_enabled = gr.Checkbox(
+ label="Enable Tiling",
+ value=self.get_config_value('manga_settings', {}).get('tiling', {}).get('enabled', False)
+ )
+
+ tiling_tile_size = gr.Slider(
+ minimum=256,
+ maximum=1024,
+ value=self.get_config_value('manga_settings', {}).get('tiling', {}).get('tile_size', 480),
+ step=64,
+ label="Tile Size (px)",
+ interactive=True
+ )
+
+ tiling_tile_overlap = gr.Slider(
+ minimum=0,
+ maximum=128,
+ value=self.get_config_value('manga_settings', {}).get('tiling', {}).get('tile_overlap', 64),
+ step=16,
+ label="Tile Overlap (px)",
+ interactive=True
+ )
+
+ with gr.Accordion("🎨 Font & Text Rendering", open=False):
+ gr.Markdown("#### Font Sizing Algorithm")
+
+ font_algorithm = gr.Radio(
+ choices=["smart", "simple"],
+ value=self.get_config_value('manga_settings', {}).get('font_sizing', {}).get('algorithm', 'smart'),
+ label="Font Sizing Algorithm",
+ interactive=True
+ )
+
+ prefer_larger = gr.Checkbox(
+ label="Prefer Larger Fonts",
+ value=self.get_config_value('manga_settings', {}).get('font_sizing', {}).get('prefer_larger', True)
+ )
+
+ max_lines = gr.Slider(
+ minimum=1,
+ maximum=20,
+ value=self.get_config_value('manga_settings', {}).get('font_sizing', {}).get('max_lines', 10),
+ step=1,
+ label="Maximum Lines Per Bubble",
+ interactive=True
+ )
+
+ line_spacing = gr.Slider(
+ minimum=0.5,
+ maximum=3.0,
+ value=self.get_config_value('manga_settings', {}).get('font_sizing', {}).get('line_spacing', 1.3),
+ step=0.1,
+ label="Line Spacing Multiplier",
+ interactive=True
+ )
+
+ bubble_size_factor = gr.Checkbox(
+ label="Use Bubble Size Factor",
+ value=self.get_config_value('manga_settings', {}).get('font_sizing', {}).get('bubble_size_factor', True)
+ )
+
+ auto_fit_style = gr.Radio(
+ choices=["balanced", "aggressive", "conservative"],
+ value=self.get_config_value('manga_settings', {}).get('rendering', {}).get('auto_fit_style', 'balanced'),
+ label="Auto Fit Style",
+ interactive=True
+ )
+
+ with gr.Accordion("⚙️ Advanced Options", open=False):
+ gr.Markdown("#### Format Detection")
+
+ format_detection = gr.Checkbox(
+ label="Enable Format Detection (manga/webtoon)",
+ value=self.get_config_value('manga_settings', {}).get('advanced', {}).get('format_detection', True)
+ )
+
+ webtoon_mode = gr.Radio(
+ choices=["auto", "force_manga", "force_webtoon"],
+ value=self.get_config_value('manga_settings', {}).get('advanced', {}).get('webtoon_mode', 'auto'),
+ label="Webtoon Mode",
+ interactive=True
+ )
+
+ gr.Markdown("#### Inpainting Performance")
+
+ inpaint_batch_size = gr.Slider(
+ minimum=1,
+ maximum=32,
+ value=self.get_config_value('manga_settings', {}).get('inpainting', {}).get('batch_size', 10),
+ step=1,
+ label="Batch Size",
+ interactive=True,
+ info="Process multiple regions at once"
+ )
+
+ inpaint_cache_enabled = gr.Checkbox(
+ label="Enable inpainting cache (speeds up repeated processing)",
+ value=self.get_config_value('manga_settings', {}).get('inpainting', {}).get('enable_cache', True)
+ )
+
+ gr.Markdown("#### Performance")
+
+ parallel_processing = gr.Checkbox(
+ label="Enable Parallel Processing",
+ value=self.get_config_value('manga_settings', {}).get('advanced', {}).get('parallel_processing', True)
+ )
+
+ max_workers = gr.Slider(
+ minimum=1,
+ maximum=8,
+ value=self.get_config_value('manga_settings', {}).get('advanced', {}).get('max_workers', 2),
+ step=1,
+ label="Max Worker Threads",
+ interactive=True
+ )
+
+ gr.Markdown("**⚡ Advanced Performance**")
+
+ preload_local_inpainting = gr.Checkbox(
+ label="Preload local inpainting instances for panel-parallel runs",
+ value=self.get_config_value('manga_settings', {}).get('advanced', {}).get('preload_local_inpainting_for_panels', True),
+ info="Preloads inpainting models to speed up parallel processing"
+ )
+
+ panel_start_stagger = gr.Slider(
+ minimum=0,
+ maximum=1000,
+ value=self.get_config_value('manga_settings', {}).get('advanced', {}).get('panel_start_stagger_ms', 30),
+ step=10,
+ label="Panel start stagger",
+ interactive=True,
+ info="Milliseconds delay between panel starts"
+ )
+
+ gr.Markdown("#### Model Optimization")
+
+ torch_precision = gr.Radio(
+ choices=["fp32", "fp16"],
+ value=self.get_config_value('manga_settings', {}).get('advanced', {}).get('torch_precision', 'fp16'),
+ label="Torch Precision",
+ interactive=True
+ )
+
+ auto_cleanup_models = gr.Checkbox(
+ label="Auto Cleanup Models from Memory",
+ value=self.get_config_value('manga_settings', {}).get('advanced', {}).get('auto_cleanup_models', False)
+ )
+
+ gr.Markdown("#### Debug Options")
+
+ debug_mode = gr.Checkbox(
+ label="Enable Debug Mode",
+ value=self.get_config_value('manga_settings', {}).get('advanced', {}).get('debug_mode', False)
+ )
+
+ save_intermediate = gr.Checkbox(
+ label="Save Intermediate Files",
+ value=self.get_config_value('manga_settings', {}).get('advanced', {}).get('save_intermediate', False)
+ )
+
+ concise_pipeline_logs = gr.Checkbox(
+ label="Concise Pipeline Logs",
+ value=self.get_config_value('concise_pipeline_logs', True)
+ )
+
+ # Button handlers for model management
+ def download_models_handler(detector_type_val, inpaint_method_val):
+ """Download selected models"""
+ messages = []
+
+ try:
+ # Download bubble detection model
+ if detector_type_val:
+ messages.append(f"📥 Downloading {detector_type_val} bubble detector...")
+ try:
+ from bubble_detector import BubbleDetector
+ bd = BubbleDetector()
+
+ if detector_type_val == "rtdetr_onnx":
+ if bd.load_rtdetr_onnx_model():
+ messages.append("✅ RT-DETR ONNX model downloaded successfully")
+ else:
+ messages.append("❌ Failed to download RT-DETR ONNX model")
+ elif detector_type_val == "rtdetr":
+ if bd.load_rtdetr_model():
+ messages.append("✅ RT-DETR model downloaded successfully")
+ else:
+ messages.append("❌ Failed to download RT-DETR model")
+ elif detector_type_val == "yolo":
+ messages.append("ℹ️ YOLO models are downloaded automatically on first use")
+ except Exception as e:
+ messages.append(f"❌ Error downloading detector: {str(e)}")
+
+ # Download inpainting model
+ if inpaint_method_val:
+ messages.append(f"\n📥 Downloading {inpaint_method_val} inpainting model...")
+ try:
+ from local_inpainter import LocalInpainter, LAMA_JIT_MODELS
+
+ inpainter = LocalInpainter({})
+
+ # Map method names to download keys
+ method_map = {
+ 'anime_onnx': 'anime_onnx',
+ 'anime': 'anime',
+ 'lama': 'lama',
+ 'lama_onnx': 'lama_onnx',
+ 'aot': 'aot',
+ 'aot_onnx': 'aot_onnx'
+ }
+
+ method_key = method_map.get(inpaint_method_val)
+ if method_key and method_key in LAMA_JIT_MODELS:
+ model_info = LAMA_JIT_MODELS[method_key]
+ messages.append(f"Downloading {model_info['name']}...")
+
+ model_path = inpainter.download_jit_model(method_key)
+ if model_path:
+ messages.append(f"✅ {model_info['name']} downloaded to: {model_path}")
+ else:
+ messages.append(f"❌ Failed to download {model_info['name']}")
+ else:
+ messages.append(f"ℹ️ {inpaint_method_val} is downloaded automatically on first use")
+
+ except Exception as e:
+ messages.append(f"❌ Error downloading inpainting model: {str(e)}")
+
+ if not messages:
+ messages.append("ℹ️ No models selected for download")
+
+ except Exception as e:
+ messages.append(f"❌ Error during download: {str(e)}")
+
+ return gr.Info("\n".join(messages))
+
+ def load_models_handler(detector_type_val, inpaint_method_val):
+ """Load selected models into memory"""
+ messages = []
+
+ try:
+ # Load bubble detection model
+ if detector_type_val:
+ messages.append(f"📦 Loading {detector_type_val} bubble detector...")
+ try:
+ from bubble_detector import BubbleDetector
+ bd = BubbleDetector()
+
+ if detector_type_val == "rtdetr_onnx":
+ if bd.load_rtdetr_onnx_model():
+ messages.append("✅ RT-DETR ONNX model loaded successfully")
+ else:
+ messages.append("❌ Failed to load RT-DETR ONNX model")
+ elif detector_type_val == "rtdetr":
+ if bd.load_rtdetr_model():
+ messages.append("✅ RT-DETR model loaded successfully")
+ else:
+ messages.append("❌ Failed to load RT-DETR model")
+ elif detector_type_val == "yolo":
+ messages.append("ℹ️ YOLO models are loaded automatically when needed")
+ except Exception as e:
+ messages.append(f"❌ Error loading detector: {str(e)}")
+
+ # Load inpainting model
+ if inpaint_method_val:
+ messages.append(f"\n📦 Loading {inpaint_method_val} inpainting model...")
+ try:
+ from local_inpainter import LocalInpainter, LAMA_JIT_MODELS
+ import os
+
+ inpainter = LocalInpainter({})
+
+ # Map method names to model keys
+ method_map = {
+ 'anime_onnx': 'anime_onnx',
+ 'anime': 'anime',
+ 'lama': 'lama',
+ 'lama_onnx': 'lama_onnx',
+ 'aot': 'aot',
+ 'aot_onnx': 'aot_onnx'
+ }
+
+ method_key = method_map.get(inpaint_method_val)
+ if method_key:
+ # First check if model exists, download if not
+ if method_key in LAMA_JIT_MODELS:
+ model_info = LAMA_JIT_MODELS[method_key]
+ cache_dir = os.path.expanduser('~/.cache/inpainting')
+ model_filename = os.path.basename(model_info['url'])
+ model_path = os.path.join(cache_dir, model_filename)
+
+ if not os.path.exists(model_path):
+ messages.append(f"Model not found, downloading first...")
+ model_path = inpainter.download_jit_model(method_key)
+ if not model_path:
+ messages.append(f"❌ Failed to download model")
+ return gr.Info("\n".join(messages))
+
+ # Now load the model
+ if inpainter.load_model(method_key, model_path):
+ messages.append(f"✅ {model_info['name']} loaded successfully")
+ else:
+ messages.append(f"❌ Failed to load {model_info['name']}")
+ else:
+ messages.append(f"ℹ️ {inpaint_method_val} will be loaded automatically when needed")
+ else:
+ messages.append(f"ℹ️ Unknown method: {inpaint_method_val}")
+
+ except Exception as e:
+ messages.append(f"❌ Error loading inpainting model: {str(e)}")
+
+ if not messages:
+ messages.append("ℹ️ No models selected for loading")
+
+ except Exception as e:
+ messages.append(f"❌ Error during loading: {str(e)}")
+
+ return gr.Info("\n".join(messages))
+
+ download_models_btn.click(
+ fn=download_models_handler,
+ inputs=[detector_type, local_inpaint_method],
+ outputs=None
+ )
+
+ load_models_btn.click(
+ fn=load_models_handler,
+ inputs=[detector_type, local_inpaint_method],
+ outputs=None
+ )
+
+ # Auto-save parallel panel translation settings
+ def save_parallel_settings(preload_enabled, parallel_enabled, max_workers, stagger_ms):
+ """Save parallel panel translation settings to config"""
+ try:
+ current_config = self.get_current_config_for_update()
+ # Don't decrypt - just update what we need
+
+ # Initialize nested structure if not exists
+ if 'manga_settings' not in current_config:
+ current_config['manga_settings'] = {}
+ if 'advanced' not in current_config['manga_settings']:
+ current_config['manga_settings']['advanced'] = {}
+
+ current_config['manga_settings']['advanced']['preload_local_inpainting_for_panels'] = bool(preload_enabled)
+ current_config['manga_settings']['advanced']['parallel_panel_translation'] = bool(parallel_enabled)
+ current_config['manga_settings']['advanced']['panel_max_workers'] = int(max_workers)
+ current_config['manga_settings']['advanced']['panel_start_stagger_ms'] = int(stagger_ms)
+
+ self.save_config(current_config)
+ return None
+ except Exception as e:
+ print(f"Failed to save parallel panel settings: {e}")
+ return None
+
+ # Auto-save inpainting performance settings
+ def save_inpainting_settings(batch_size, cache_enabled):
+ """Save inpainting performance settings to config"""
+ try:
+ current_config = self.get_current_config_for_update()
+ # Don't decrypt - just update what we need
+
+ # Initialize nested structure if not exists
+ if 'manga_settings' not in current_config:
+ current_config['manga_settings'] = {}
+ if 'inpainting' not in current_config['manga_settings']:
+ current_config['manga_settings']['inpainting'] = {}
+
+ current_config['manga_settings']['inpainting']['batch_size'] = int(batch_size)
+ current_config['manga_settings']['inpainting']['enable_cache'] = bool(cache_enabled)
+
+ self.save_config(current_config)
+ return None
+ except Exception as e:
+ print(f"Failed to save inpainting settings: {e}")
+ return None
+
+ # Auto-save preload local inpainting setting
+ def save_preload_setting(preload_enabled):
+ """Save preload local inpainting setting to config"""
+ try:
+ current_config = self.get_current_config_for_update()
+ # Don't decrypt - just update what we need
+
+ # Initialize nested structure if not exists
+ if 'manga_settings' not in current_config:
+ current_config['manga_settings'] = {}
+ if 'advanced' not in current_config['manga_settings']:
+ current_config['manga_settings']['advanced'] = {}
+
+ current_config['manga_settings']['advanced']['preload_local_inpainting_for_panels'] = bool(preload_enabled)
+
+ self.save_config(current_config)
+ return None
+ except Exception as e:
+ print(f"Failed to save preload setting: {e}")
+ return None
+
+ # Auto-save bubble detection settings
+ def save_bubble_detection_settings(detector_type_val, rtdetr_conf, bubble_conf, detect_text, detect_empty, detect_free, max_detections, local_method_val):
+ """Save bubble detection settings to config"""
+ try:
+ current_config = self.get_current_config_for_update()
+ # Don't decrypt - just update what we need
+
+ # Initialize nested structure
+ if 'manga_settings' not in current_config:
+ current_config['manga_settings'] = {}
+ if 'ocr' not in current_config['manga_settings']:
+ current_config['manga_settings']['ocr'] = {}
+ if 'inpainting' not in current_config['manga_settings']:
+ current_config['manga_settings']['inpainting'] = {}
+
+ # Save bubble detection settings
+ current_config['manga_settings']['ocr']['detector_type'] = detector_type_val
+ current_config['manga_settings']['ocr']['rtdetr_confidence'] = float(rtdetr_conf)
+ current_config['manga_settings']['ocr']['bubble_confidence'] = float(bubble_conf)
+ current_config['manga_settings']['ocr']['detect_text_bubbles'] = bool(detect_text)
+ current_config['manga_settings']['ocr']['detect_empty_bubbles'] = bool(detect_empty)
+ current_config['manga_settings']['ocr']['detect_free_text'] = bool(detect_free)
+ current_config['manga_settings']['ocr']['bubble_max_detections_yolo'] = int(max_detections)
+
+ # Save inpainting method
+ current_config['manga_settings']['inpainting']['local_method'] = local_method_val
+
+ self.save_config(current_config)
+ return None
+ except Exception as e:
+ print(f"Failed to save bubble detection settings: {e}")
+ return None
+
+ # All Advanced Settings auto-save handlers removed - use manual Save Config button
+
+ gr.Markdown("\n---\n**Note:** These settings will be saved to your config and applied to all manga translations.")
+
+ # Manual Glossary Extraction Tab
+ with gr.Tab("📝 Manual Glossary Extraction"):
+ gr.Markdown("""
+ ### Extract character names and terms from EPUB files
+ Configure extraction settings below, then upload an EPUB file to extract a glossary.
+ """)
+
+ with gr.Row():
+ with gr.Column():
+ glossary_epub = gr.File(
+ label="📖 Upload EPUB File",
+ file_types=[".epub"]
+ )
+
+ with gr.Row():
+ extract_btn = gr.Button(
+ "🔍 Extract Glossary",
+ variant="primary",
+ size="lg",
+ scale=2
+ )
+
+ stop_glossary_btn = gr.Button(
+ "⏹️ Stop Extraction",
+ variant="stop",
+ size="lg",
+ visible=False,
+ scale=1
+ )
+
+ glossary_model = gr.Dropdown(
+ choices=self.models,
+ value=self.get_config_value('model', 'gpt-4-turbo'),
+ label="🤖 AI Model",
+ interactive=True,
+ allow_custom_value=True,
+ filterable=True
+ )
+
+ glossary_api_key = gr.Textbox(
+ label="🔑 API Key",
+ type="password",
+ placeholder="Enter your API key",
+ value=self.get_config_value('api_key', '')
+ )
+
+ # Tabs for different settings sections
+ with gr.Tabs():
+ # Extraction Settings Tab
+ with gr.Tab("Extraction Settings"):
+ with gr.Accordion("🎯 Targeted Extraction Settings", open=True):
+ with gr.Row():
+ with gr.Column():
+ min_freq = gr.Slider(
+ minimum=1,
+ maximum=10,
+ value=self.get_config_value('glossary_min_frequency', 2),
+ step=1,
+ label="Min frequency",
+ info="How many times a name must appear (lower = more terms)"
+ )
+
+ max_titles = gr.Slider(
+ minimum=10,
+ maximum=100,
+ value=self.get_config_value('glossary_max_titles', 30),
+ step=5,
+ label="Max titles",
+ info="Limits to prevent huge glossaries"
+ )
+
+ max_text_size = gr.Number(
+ label="Max text size",
+ value=self.get_config_value('glossary_max_text_size', 50000),
+ info="Characters to analyze (0 = entire text)"
+ )
+
+ max_sentences = gr.Slider(
+ minimum=50,
+ maximum=500,
+ value=self.get_config_value('glossary_max_sentences', 200),
+ step=10,
+ label="Max sentences",
+ info="Maximum sentences to send to AI (increase for more context)"
+ )
+
+ with gr.Column():
+ max_names_slider = gr.Slider(
+ minimum=10,
+ maximum=200,
+ value=self.get_config_value('glossary_max_names', 50),
+ step=10,
+ label="Max names",
+ info="Maximum number of character names to extract"
+ )
+
+ translation_batch = gr.Slider(
+ minimum=10,
+ maximum=100,
+ value=self.get_config_value('glossary_batch_size', 50),
+ step=5,
+ label="Translation batch",
+ info="Terms per API call (larger = faster but may reduce quality)"
+ )
+
+ chapter_split_threshold = gr.Number(
+ label="Chapter split threshold",
+ value=self.get_config_value('glossary_chapter_split_threshold', 8192),
+ info="Split large texts into chunks (0 = no splitting)"
+ )
+
+ # Filter mode selection
+ filter_mode = gr.Radio(
+ choices=[
+ "all",
+ "only_with_honorifics",
+ "only_without_honorifics"
+ ],
+ value=self.get_config_value('glossary_filter_mode', 'all'),
+ label="Filter mode",
+ info="What types of names to extract"
+ )
+
+ # Strip honorifics checkbox
+ strip_honorifics = gr.Checkbox(
+ label="Remove honorifics from extracted names",
+ value=self.get_config_value('strip_honorifics', True),
+ info="Remove suffixes like '님', 'さん', '先生' from names"
+ )
+
+ # Fuzzy threshold slider
+ fuzzy_threshold = gr.Slider(
+ minimum=0.5,
+ maximum=1.0,
+ value=self.get_config_value('glossary_fuzzy_threshold', 0.90),
+ step=0.05,
+ label="Fuzzy threshold",
+ info="How similar names must be to match (0.9 = 90% match, 1.0 = exact match)"
+ )
+
+
+ # Extraction Prompt Tab
+ with gr.Tab("Extraction Prompt"):
+ gr.Markdown("""
+ ### System Prompt for Extraction
+ Customize how the AI extracts names and terms from your text.
+ """)
+
+ extraction_prompt = gr.Textbox(
+ label="Extraction Template (Use placeholders: {language}, {min_frequency}, {max_names}, {max_titles})",
+ lines=10,
+ value=self.get_config_value('manual_glossary_prompt',
+ "Extract character names and important terms from the following text.\n\n"
+ "Output format:\n{fields}\n\n"
+ "Rules:\n- Output ONLY CSV lines in the exact format shown above\n"
+ "- No headers, no extra text, no JSON\n"
+ "- One entry per line\n"
+ "- Leave gender empty for terms (just end with comma)")
+ )
+
+ reset_extraction_prompt_btn = gr.Button(
+ "Reset to Default",
+ variant="secondary",
+ size="sm"
+ )
+
+ # Format Instructions Tab
+ with gr.Tab("Format Instructions"):
+ gr.Markdown("""
+ ### Output Format Instructions
+ These instructions tell the AI exactly how to format the extracted glossary.
+ """)
+
+ format_instructions = gr.Textbox(
+ label="Format Instructions (Use placeholder: {text_sample})",
+ lines=10,
+ value=self.get_config_value('glossary_format_instructions',
+ "Return the results in EXACT CSV format with this header:\n"
+ "type,raw_name,translated_name\n\n"
+ "For example:\n"
+ "character,김상현,Kim Sang-hyun\n"
+ "character,갈편제,Gale Hardest\n"
+ "term,마법사,Mage\n\n"
+ "Only include terms that actually appear in the text.\n"
+ "Do not use quotes around values unless they contain commas.\n\n"
+ "Text to analyze:\n{text_sample}")
+ )
+
+ use_legacy_csv = gr.Checkbox(
+ label="Use legacy CSV format",
+ value=self.get_config_value('glossary_use_legacy_csv', False),
+ info="When disabled: Uses clean format with sections (===CHARACTERS===). When enabled: Uses traditional CSV format with repeated type columns."
+ )
+
+ with gr.Column():
+ # Add logo and status at top
+ with gr.Row():
+ gr.Image(
+ value="Halgakos.png",
+ label=None,
+ show_label=False,
+ width=80,
+ height=80,
+ interactive=False,
+ show_download_button=False,
+ container=False
+ )
+ glossary_status_message = gr.Markdown(
+ value="### Ready to extract\nUpload an EPUB file and click 'Extract Glossary' to begin.",
+ visible=True
+ )
+
+ # Progress section (similar to translation tabs)
+ with gr.Group(visible=False) as glossary_progress_group:
+ gr.Markdown("### Progress")
+ glossary_progress_text = gr.Textbox(
+ label="📨 Current Status",
+ value="Ready to start",
+ interactive=False,
+ lines=1
+ )
+ glossary_progress_bar = gr.Slider(
+ minimum=0,
+ maximum=100,
+ value=0,
+ step=1,
+ label="📋 Extraction Progress",
+ interactive=False,
+ show_label=True
+ )
+
+ glossary_logs = gr.Textbox(
+ label="📋 Extraction Logs",
+ lines=20,
+ max_lines=30,
+ value="Ready to extract. Upload an EPUB file and configure settings.",
+ visible=True,
+ interactive=False
+ )
+
+ glossary_output = gr.File(
+ label="📥 Download Glossary CSV",
+ visible=False
+ )
+
+ glossary_status = gr.Textbox(
+ label="Final Status",
+ lines=3,
+ max_lines=5,
+ visible=False,
+ interactive=False
+ )
+
+ extract_btn.click(
+ fn=self.extract_glossary_with_stop,
+ inputs=[
+ glossary_epub,
+ glossary_model,
+ glossary_api_key,
+ min_freq,
+ max_names_slider,
+ max_titles,
+ max_text_size,
+ max_sentences,
+ translation_batch,
+ chapter_split_threshold,
+ filter_mode,
+ strip_honorifics,
+ fuzzy_threshold,
+ extraction_prompt,
+ format_instructions,
+ use_legacy_csv
+ ],
+ outputs=[
+ glossary_output,
+ glossary_status_message,
+ glossary_progress_group,
+ glossary_logs,
+ glossary_status,
+ glossary_progress_text,
+ glossary_progress_bar,
+ extract_btn,
+ stop_glossary_btn
+ ]
+ )
+
+ # Stop button handler
+ stop_glossary_btn.click(
+ fn=self.stop_glossary_extraction,
+ inputs=[],
+ outputs=[extract_btn, stop_glossary_btn, glossary_status]
+ )
+
+ # QA Scanner Tab
+ with gr.Tab("🔍 QA Scanner"):
+ gr.Markdown("""
+ ### Quick Scan for Translation Quality
+ Scan translated content for common issues like untranslated text, formatting problems, and quality concerns.
+
+ **Supported inputs:**
+ - 📁 Output folder containing extracted HTML/XHTML files
+ - 📖 EPUB file (will be automatically extracted and scanned)
+ - 📦 ZIP file containing HTML/XHTML files
+ """)
+
+ with gr.Row():
+ with gr.Column():
+ # Check if running on Hugging Face Spaces
+ is_hf_spaces = os.getenv('SPACE_ID') is not None or os.getenv('HF_SPACES') == 'true'
+
+ if is_hf_spaces:
+ gr.Markdown("""
+ **🤗 Hugging Face Spaces Mode**
+ Upload an EPUB or ZIP file containing the translated content.
+ The scanner will extract and analyze the HTML/XHTML files inside.
+ """)
+ qa_folder_path = gr.File(
+ label="📂 Upload EPUB or ZIP file",
+ file_types=[".epub", ".zip"],
+ type="filepath"
+ )
+ else:
+ qa_folder_path = gr.Textbox(
+ label="📁 Path to Folder, EPUB, or ZIP",
+ placeholder="Enter path to: folder with HTML files, EPUB file, or ZIP file",
+ info="Can be a folder path, or direct path to an EPUB/ZIP file"
+ )
+
+ with gr.Row():
+ qa_scan_btn = gr.Button(
+ "⚡ Quick Scan",
+ variant="primary",
+ size="lg",
+ scale=2
+ )
+
+ stop_qa_btn = gr.Button(
+ "⏹️ Stop Scan",
+ variant="stop",
+ size="lg",
+ visible=False,
+ scale=1
+ )
+
+ with gr.Accordion("⚙️ Quick Scan Settings", open=True):
+ gr.Markdown("""
+ **Quick Scan Mode (85% threshold, Speed optimized)**
+ - 3-5x faster scanning
+ - Checks consecutive chapters only
+ - Simplified analysis
+ - Good for large libraries
+ - Minimal resource usage
+ """)
+
+ # Foreign Character Detection
+ gr.Markdown("#### Foreign Character Detection")
+ min_foreign_chars = gr.Slider(
+ minimum=0,
+ maximum=50,
+ value=self.get_config_value('qa_min_foreign_chars', 10),
+ step=1,
+ label="Minimum foreign characters to flag",
+ info="0 = always flag, higher = more tolerant"
+ )
+
+ # Detection Options
+ gr.Markdown("#### Detection Options")
+ check_repetition = gr.Checkbox(
+ label="Check for excessive repetition",
+ value=self.get_config_value('qa_check_repetition', True)
+ )
+
+ check_glossary_leakage = gr.Checkbox(
+ label="Check for glossary leakage (raw glossary entries in translation)",
+ value=self.get_config_value('qa_check_glossary_leakage', True)
+ )
+
+ # File Processing
+ gr.Markdown("#### File Processing")
+ min_file_length = gr.Slider(
+ minimum=0,
+ maximum=5000,
+ value=self.get_config_value('qa_min_file_length', 0),
+ step=100,
+ label="Minimum file length (characters)",
+ info="Skip files shorter than this"
+ )
+
+ # Additional Checks
+ gr.Markdown("#### Additional Checks")
+ check_multiple_headers = gr.Checkbox(
+ label="Detect files with 2 or more headers (h1-h6 tags)",
+ value=self.get_config_value('qa_check_multiple_headers', True),
+ info="Identifies files that may have been incorrectly split or merged"
+ )
+
+ check_missing_html = gr.Checkbox(
+ label="Flag HTML files with missing tag",
+ value=self.get_config_value('qa_check_missing_html', True),
+ info="Checks if HTML files have proper structure"
+ )
+
+ check_insufficient_paragraphs = gr.Checkbox(
+ label="Check for insufficient paragraph tags",
+ value=self.get_config_value('qa_check_insufficient_paragraphs', True)
+ )
+
+ min_paragraph_percentage = gr.Slider(
+ minimum=10,
+ maximum=90,
+ value=self.get_config_value('qa_min_paragraph_percentage', 30),
+ step=5,
+ label="Minimum text in
tags (%)",
+ info="Files with less than this percentage will be flagged"
+ )
+
+ # Report Settings
+ gr.Markdown("#### Report Settings")
+
+ report_format = gr.Radio(
+ choices=["summary", "detailed", "verbose"],
+ value=self.get_config_value('qa_report_format', 'detailed'),
+ label="Report format",
+ info="Summary = brief overview, Detailed = recommended, Verbose = all data"
+ )
+
+ auto_save_report = gr.Checkbox(
+ label="Automatically save report after scan",
+ value=self.get_config_value('qa_auto_save_report', True)
+ )
+
+ with gr.Column():
+ # Add logo and status at top
+ with gr.Row():
+ gr.Image(
+ value="Halgakos.png",
+ label=None,
+ show_label=False,
+ width=80,
+ height=80,
+ interactive=False,
+ show_download_button=False,
+ container=False
+ )
+ qa_status_message = gr.Markdown(
+ value="### Ready to scan\nEnter the path to your output folder and click 'Quick Scan' to begin.",
+ visible=True
+ )
+
+ # Progress section
+ with gr.Group(visible=False) as qa_progress_group:
+ gr.Markdown("### Progress")
+ qa_progress_text = gr.Textbox(
+ label="📨 Current Status",
+ value="Ready to start",
+ interactive=False,
+ lines=1
+ )
+ qa_progress_bar = gr.Slider(
+ minimum=0,
+ maximum=100,
+ value=0,
+ step=1,
+ label="📋 Scan Progress",
+ interactive=False,
+ show_label=True
+ )
+
+ qa_logs = gr.Textbox(
+ label="📋 Scan Logs",
+ lines=20,
+ max_lines=30,
+ value="Ready to scan. Enter output folder path and configure settings.",
+ visible=True,
+ interactive=False
+ )
+
+ qa_report = gr.File(
+ label="📄 Download QA Report",
+ visible=False
+ )
+
+ qa_status = gr.Textbox(
+ label="Final Status",
+ lines=3,
+ max_lines=5,
+ visible=False,
+ interactive=False
+ )
+
+ # QA Scan button handler
+ qa_scan_btn.click(
+ fn=self.run_qa_scan_with_stop,
+ inputs=[
+ qa_folder_path,
+ min_foreign_chars,
+ check_repetition,
+ check_glossary_leakage,
+ min_file_length,
+ check_multiple_headers,
+ check_missing_html,
+ check_insufficient_paragraphs,
+ min_paragraph_percentage,
+ report_format,
+ auto_save_report
+ ],
+ outputs=[
+ qa_report,
+ qa_status_message,
+ qa_progress_group,
+ qa_logs,
+ qa_status,
+ qa_progress_text,
+ qa_progress_bar,
+ qa_scan_btn,
+ stop_qa_btn
+ ]
+ )
+
+ # Stop button handler
+ stop_qa_btn.click(
+ fn=self.stop_qa_scan,
+ inputs=[],
+ outputs=[qa_scan_btn, stop_qa_btn, qa_status]
+ )
+
+ # Settings Tab
+ with gr.Tab("⚙️ Settings"):
+ gr.Markdown("### Configuration")
+
+ gr.Markdown("#### Translation Profiles")
+ gr.Markdown("Profiles are loaded from your `config_web.json` file. The web interface has its own separate configuration.")
+
+ with gr.Accordion("View All Profiles", open=False):
+ profiles_text = "\n\n".join(
+ [f"**{name}**:\n```\n{prompt[:200]}...\n```"
+ for name, prompt in self.profiles.items()]
+ )
+ gr.Markdown(profiles_text if profiles_text else "No profiles found")
+
+ gr.Markdown("---")
+ gr.Markdown("#### Advanced Translation Settings")
+
+ with gr.Row():
+ with gr.Column():
+ thread_delay = gr.Slider(
+ minimum=0,
+ maximum=5,
+ value=self.get_config_value('thread_submission_delay', 0.1),
+ step=0.1,
+ label="Threading delay (s)",
+ interactive=True
+ )
+
+ api_delay = gr.Slider(
+ minimum=0,
+ maximum=10,
+ value=self.get_config_value('delay', 1),
+ step=0.5,
+ label="API call delay (s)",
+ interactive=True
+ )
+
+ chapter_range = gr.Textbox(
+ label="Chapter range (e.g., 5-10)",
+ value=self.get_config_value('chapter_range', ''),
+ placeholder="Leave empty for all chapters"
+ )
+
+ token_limit = gr.Number(
+ label="Input Token limit",
+ value=self.get_config_value('token_limit', 200000),
+ minimum=0
+ )
+
+ disable_token_limit = gr.Checkbox(
+ label="Disable Input Token Limit",
+ value=self.get_config_value('token_limit_disabled', False)
+ )
+
+ output_token_limit = gr.Number(
+ label="Output Token limit",
+ value=self.get_config_value('max_output_tokens', 16000),
+ minimum=0
+ )
+
+ with gr.Column():
+ contextual = gr.Checkbox(
+ label="Contextual Translation",
+ value=self.get_config_value('contextual', False)
+ )
+
+ history_limit = gr.Number(
+ label="Translation History Limit",
+ value=self.get_config_value('translation_history_limit', 2),
+ minimum=0
+ )
+
+ rolling_history = gr.Checkbox(
+ label="Rolling History Window",
+ value=self.get_config_value('translation_history_rolling', False)
+ )
+
+ batch_translation = gr.Checkbox(
+ label="Batch Translation",
+ value=self.get_config_value('batch_translation', True)
+ )
+
+ batch_size = gr.Number(
+ label="Batch Size",
+ value=self.get_config_value('batch_size', 10),
+ minimum=1
+ )
+
+ gr.Markdown("---")
+ gr.Markdown("#### Chapter Processing Options")
+
+ with gr.Row():
+ with gr.Column():
+ # Chapter Header Translation
+ batch_translate_headers = gr.Checkbox(
+ label="Batch Translate Headers",
+ value=self.get_config_value('batch_translate_headers', False)
+ )
+
+ headers_per_batch = gr.Number(
+ label="Headers per batch",
+ value=self.get_config_value('headers_per_batch', 400),
+ minimum=1
+ )
+
+ # NCX and CSS options
+ use_ncx_navigation = gr.Checkbox(
+ label="Use NCX-only Navigation (Compatibility Mode)",
+ value=self.get_config_value('use_ncx_navigation', False)
+ )
+
+ attach_css_to_chapters = gr.Checkbox(
+ label="Attach CSS to Chapters (Fixes styling issues)",
+ value=self.get_config_value('attach_css_to_chapters', False)
+ )
+
+ retain_source_extension = gr.Checkbox(
+ label="Retain source extension (no 'response_' prefix)",
+ value=self.get_config_value('retain_source_extension', True)
+ )
+
+ with gr.Column():
+ # Conservative Batching
+ use_conservative_batching = gr.Checkbox(
+ label="Use Conservative Batching",
+ value=self.get_config_value('use_conservative_batching', False),
+ info="Groups chapters in batches of 3x batch size for memory management"
+ )
+
+ # Gemini API Safety
+ disable_gemini_safety = gr.Checkbox(
+ label="Disable Gemini API Safety Filters",
+ value=self.get_config_value('disable_gemini_safety', False),
+ info="⚠️ Disables ALL content safety filters for Gemini models (BLOCK_NONE)"
+ )
+
+ # OpenRouter Options
+ use_http_openrouter = gr.Checkbox(
+ label="Use HTTP-only for OpenRouter (bypass SDK)",
+ value=self.get_config_value('use_http_openrouter', False),
+ info="Direct HTTP POST with explicit headers"
+ )
+
+ disable_openrouter_compression = gr.Checkbox(
+ label="Disable compression for OpenRouter (Accept-Encoding)",
+ value=self.get_config_value('disable_openrouter_compression', False),
+ info="Sends Accept-Encoding: identity for uncompressed responses"
+ )
+
+ gr.Markdown("---")
+ gr.Markdown("#### Chapter Extraction Settings")
+
+ with gr.Row():
+ with gr.Column():
+ gr.Markdown("**Text Extraction Method:**")
+ text_extraction_method = gr.Radio(
+ choices=["standard", "enhanced"],
+ value=self.get_config_value('text_extraction_method', 'standard'),
+ label="",
+ info="Standard uses BeautifulSoup, Enhanced uses html2text"
+ )
+
+ gr.Markdown("• **Standard (BeautifulSoup)** - Traditional HTML parsing, fast and reliable")
+ gr.Markdown("• **Enhanced (html2text)** - Superior Unicode handling, cleaner text extraction")
+
+ with gr.Column():
+ gr.Markdown("**File Filtering Level:**")
+ file_filtering_level = gr.Radio(
+ choices=["smart", "moderate", "full"],
+ value=self.get_config_value('file_filtering_level', 'smart'),
+ label="",
+ info="Controls which files are extracted from EPUBs"
+ )
+
+ gr.Markdown("• **Smart (Aggressive Filtering)** - Skips navigation, TOC, copyright files")
+ gr.Markdown("• **Moderate** - Only skips obvious navigation files")
+ gr.Markdown("• **Full (No Filtering)** - Extracts ALL HTML/XHTML files")
+
+ gr.Markdown("---")
+ gr.Markdown("#### Response Handling & Retry Logic")
+
+ with gr.Row():
+ with gr.Column():
+ gr.Markdown("**GPT-5 Thinking (OpenRouter/OpenAI-style)**")
+ enable_gpt_thinking = gr.Checkbox(
+ label="Enable GPT / OR Thinking",
+ value=self.get_config_value('enable_gpt_thinking', True),
+ info="Controls GPT-5 and OpenRouter reasoning"
+ )
+
+ with gr.Row():
+ gpt_thinking_effort = gr.Dropdown(
+ choices=["low", "medium", "high"],
+ value=self.get_config_value('gpt_thinking_effort', 'medium'),
+ label="Effort",
+ interactive=True
+ )
+
+ or_thinking_tokens = gr.Number(
+ label="OR Thinking Tokens",
+ value=self.get_config_value('or_thinking_tokens', 2000),
+ minimum=0,
+ maximum=50000,
+ info="tokens"
+ )
+
+ gr.Markdown("*Provide Tokens to force a max token budget for other models; GPT-5 only uses Effort (low/medium/high)*", elem_classes=["markdown-small"])
+
+ with gr.Column():
+ gr.Markdown("**Gemini Thinking Mode**")
+ enable_gemini_thinking = gr.Checkbox(
+ label="Enable Gemini Thinking",
+ value=self.get_config_value('enable_gemini_thinking', False),
+ info="Control Gemini's thinking process",
+ interactive=True
+ )
+
+ gemini_thinking_budget = gr.Number(
+ label="Budget",
+ value=self.get_config_value('gemini_thinking_budget', 0),
+ minimum=0,
+ maximum=50000,
+ info="tokens (0 = disabled)",
+ interactive=True
+ )
+
+ gr.Markdown("*0 = disabled, 512-24576 = limited thinking*", elem_classes=["markdown-small"])
+
+ gr.Markdown("---")
+ gr.Markdown("🔒 **API keys are encrypted** when saved to config using AES encryption.")
+
+ save_api_key = gr.Checkbox(
+ label="Save API Key (Encrypted)",
+ value=True
+ )
+
+ save_status = gr.Textbox(label="Settings Status", value="Use the 'Save Config' button to save changes", interactive=False)
+
+ # Hidden HTML component for JavaScript execution
+ js_executor = gr.HTML("", visible=False)
+
+ # Auto-save function for settings tab
+ def save_settings_tab(thread_delay_val, api_delay_val, chapter_range_val, token_limit_val, disable_token_limit_val, output_token_limit_val, contextual_val, history_limit_val, rolling_history_val, batch_translation_val, batch_size_val, save_api_key_val):
+ """Save settings from the Settings tab"""
+ try:
+ current_config = self.get_current_config_for_update()
+ # Don't decrypt - just update non-encrypted fields
+
+ # Update settings
+ current_config['thread_submission_delay'] = float(thread_delay_val)
+ current_config['delay'] = float(api_delay_val)
+ current_config['chapter_range'] = str(chapter_range_val)
+ current_config['token_limit'] = int(token_limit_val)
+ current_config['token_limit_disabled'] = bool(disable_token_limit_val)
+ current_config['max_output_tokens'] = int(output_token_limit_val)
+ current_config['contextual'] = bool(contextual_val)
+ current_config['translation_history_limit'] = int(history_limit_val)
+ current_config['translation_history_rolling'] = bool(rolling_history_val)
+ current_config['batch_translation'] = bool(batch_translation_val)
+ current_config['batch_size'] = int(batch_size_val)
+
+ # Save to file
+ self.save_config(current_config)
+
+ # JavaScript to save to localStorage
+ js_code = """
+
+ """ % (
+ thread_delay_val, api_delay_val, chapter_range_val, token_limit_val,
+ str(disable_token_limit_val).lower(), output_token_limit_val,
+ str(contextual_val).lower(), history_limit_val,
+ str(rolling_history_val).lower(), str(batch_translation_val).lower(),
+ batch_size_val
+ )
+
+ return "✅ Settings saved successfully", js_code
+ except Exception as e:
+ return f"❌ Failed to save: {str(e)}", ""
+
+ # Settings tab auto-save handlers removed - use manual Save Config button
+
+ # Token sync handlers removed - use manual Save Config button
+
+ # Help Tab
+ with gr.Tab("❓ Help"):
+ gr.Markdown("""
+ ## How to Use Glossarion
+
+ ### Translation
+ 1. Upload an EPUB file
+ 2. Select AI model (GPT-4, Claude, etc.)
+ 3. Enter your API key
+ 4. Click "Translate"
+ 5. Download the translated EPUB
+
+ ### Manga Translation
+ 1. Upload manga image(s) (PNG, JPG, etc.)
+ 2. Select AI model and enter API key
+ 3. Choose translation profile (e.g., Manga_JP, Manga_KR)
+ 4. Configure OCR settings (Google Cloud Vision recommended)
+ 5. Enable bubble detection and inpainting for best results
+ 6. Click "Translate Manga"
+
+ ### Glossary Extraction
+ 1. Upload an EPUB file
+ 2. Configure extraction settings
+ 3. Click "Extract Glossary"
+ 4. Use the CSV in future translations
+
+ ### API Keys
+ - **OpenAI**: Get from https://platform.openai.com/api-keys
+ - **Anthropic**: Get from https://console.anthropic.com/
+
+ ### Translation Profiles
+ Profiles contain detailed translation instructions and rules.
+ Select a profile that matches your source language and style preferences.
+
+ You can create and edit profiles in the desktop application.
+
+ ### Tips
+ - Use glossaries for consistent character name translation
+ - Lower temperature (0.1-0.3) for more literal translations
+ - Higher temperature (0.5-0.7) for more creative translations
+ """)
+
+ # Create a comprehensive load function that refreshes ALL values
+ def load_all_settings():
+ """Load all settings from config file on page refresh"""
+ # Reload config to get latest values
+ self.config = self.load_config()
+ self.decrypted_config = decrypt_config(self.config.copy()) if API_KEY_ENCRYPTION_AVAILABLE else self.config.copy()
+
+ # Helper function to convert RGB arrays to hex
+ def to_hex_color(color_value, default='#000000'):
+ if isinstance(color_value, (list, tuple)) and len(color_value) >= 3:
+ return '#{:02x}{:02x}{:02x}'.format(int(color_value[0]), int(color_value[1]), int(color_value[2]))
+ elif isinstance(color_value, str):
+ return color_value if color_value.startswith('#') else default
+ return default
+
+ # Return values for all tracked components
+ return [
+ self.get_config_value('model', 'gpt-4-turbo'), # epub_model
+ self.get_config_value('api_key', ''), # epub_api_key
+ self.get_config_value('active_profile', list(self.profiles.keys())[0] if self.profiles else ''), # epub_profile
+ self.profiles.get(self.get_config_value('active_profile', ''), ''), # epub_system_prompt
+ self.get_config_value('temperature', 0.3), # epub_temperature
+ self.get_config_value('max_output_tokens', 16000), # epub_max_tokens
+ self.get_config_value('enable_image_translation', False), # enable_image_translation
+ self.get_config_value('enable_auto_glossary', False), # enable_auto_glossary
+ self.get_config_value('append_glossary_to_prompt', True), # append_glossary
+ # Auto glossary settings
+ self.get_config_value('glossary_min_frequency', 2), # auto_glossary_min_freq
+ self.get_config_value('glossary_max_names', 50), # auto_glossary_max_names
+ self.get_config_value('glossary_max_titles', 30), # auto_glossary_max_titles
+ self.get_config_value('glossary_batch_size', 50), # auto_glossary_batch_size
+ self.get_config_value('glossary_filter_mode', 'all'), # auto_glossary_filter_mode
+ self.get_config_value('glossary_fuzzy_threshold', 0.90), # auto_glossary_fuzzy_threshold
+ # Manual glossary extraction settings
+ self.get_config_value('manual_glossary_min_frequency', self.get_config_value('glossary_min_frequency', 2)), # min_freq
+ self.get_config_value('manual_glossary_max_names', self.get_config_value('glossary_max_names', 50)), # max_names_slider
+ self.get_config_value('manual_glossary_max_titles', self.get_config_value('glossary_max_titles', 30)), # max_titles
+ self.get_config_value('glossary_max_text_size', 50000), # max_text_size
+ self.get_config_value('glossary_max_sentences', 200), # max_sentences
+ self.get_config_value('manual_glossary_batch_size', self.get_config_value('glossary_batch_size', 50)), # translation_batch
+ self.get_config_value('glossary_chapter_split_threshold', 8192), # chapter_split_threshold
+ self.get_config_value('manual_glossary_filter_mode', self.get_config_value('glossary_filter_mode', 'all')), # filter_mode
+ self.get_config_value('strip_honorifics', True), # strip_honorifics
+ self.get_config_value('manual_glossary_fuzzy_threshold', self.get_config_value('glossary_fuzzy_threshold', 0.90)), # fuzzy_threshold
+ # Chapter processing options
+ self.get_config_value('batch_translate_headers', False), # batch_translate_headers
+ self.get_config_value('headers_per_batch', 400), # headers_per_batch
+ self.get_config_value('use_ncx_navigation', False), # use_ncx_navigation
+ self.get_config_value('attach_css_to_chapters', False), # attach_css_to_chapters
+ self.get_config_value('retain_source_extension', True), # retain_source_extension
+ self.get_config_value('use_conservative_batching', False), # use_conservative_batching
+ self.get_config_value('disable_gemini_safety', False), # disable_gemini_safety
+ self.get_config_value('use_http_openrouter', False), # use_http_openrouter
+ self.get_config_value('disable_openrouter_compression', False), # disable_openrouter_compression
+ self.get_config_value('text_extraction_method', 'standard'), # text_extraction_method
+ self.get_config_value('file_filtering_level', 'smart'), # file_filtering_level
+ # QA report format
+ self.get_config_value('qa_report_format', 'detailed'), # report_format
+ # Thinking mode settings
+ self.get_config_value('enable_gpt_thinking', True), # enable_gpt_thinking
+ self.get_config_value('gpt_thinking_effort', 'medium'), # gpt_thinking_effort
+ self.get_config_value('or_thinking_tokens', 2000), # or_thinking_tokens
+ self.get_config_value('enable_gemini_thinking', False), # enable_gemini_thinking - disabled by default
+ self.get_config_value('gemini_thinking_budget', 0), # gemini_thinking_budget - 0 = disabled
+ # Manga settings
+ self.get_config_value('model', 'gpt-4-turbo'), # manga_model
+ self.get_config_value('api_key', ''), # manga_api_key
+ self.get_config_value('active_profile', list(self.profiles.keys())[0] if self.profiles else ''), # manga_profile
+ self.profiles.get(self.get_config_value('active_profile', ''), ''), # manga_system_prompt
+ self.get_config_value('ocr_provider', 'custom-api'), # ocr_provider
+ self.get_config_value('azure_vision_key', ''), # azure_key
+ self.get_config_value('azure_vision_endpoint', ''), # azure_endpoint
+ self.get_config_value('bubble_detection_enabled', True), # bubble_detection
+ self.get_config_value('inpainting_enabled', True), # inpainting
+ self.get_config_value('manga_font_size_mode', 'auto'), # font_size_mode
+ self.get_config_value('manga_font_size', 24), # font_size
+ self.get_config_value('manga_font_multiplier', 1.0), # font_multiplier
+ self.get_config_value('manga_min_font_size', 12), # min_font_size
+ self.get_config_value('manga_max_font_size', 48), # max_font_size
+ # Convert colors to hex format if they're stored as RGB arrays (white text, black shadow like manga integration)
+ to_hex_color(self.get_config_value('manga_text_color', [255, 255, 255]), '#FFFFFF'), # text_color_rgb - default white
+ self.get_config_value('manga_shadow_enabled', True), # shadow_enabled
+ to_hex_color(self.get_config_value('manga_shadow_color', [0, 0, 0]), '#000000'), # shadow_color - default black
+ self.get_config_value('manga_shadow_offset_x', 2), # shadow_offset_x
+ self.get_config_value('manga_shadow_offset_y', 2), # shadow_offset_y
+ self.get_config_value('manga_shadow_blur', 0), # shadow_blur
+ self.get_config_value('manga_bg_opacity', 130), # bg_opacity
+ self.get_config_value('manga_bg_style', 'circle'), # bg_style
+ self.get_config_value('manga_settings', {}).get('advanced', {}).get('parallel_panel_translation', False), # parallel_panel_translation
+ self.get_config_value('manga_settings', {}).get('advanced', {}).get('panel_max_workers', 7), # panel_max_workers
+ ]
+
+
+ # SECURITY: Save Config button DISABLED to prevent API keys from being saved to persistent storage on HF Spaces
+ # This is a critical security measure to prevent API key leakage in shared environments
+ # save_config_btn.click(
+ # fn=save_all_config,
+ # inputs=[
+ # # EPUB tab fields
+ # epub_model, epub_api_key, epub_profile, epub_temperature, epub_max_tokens,
+ # enable_image_translation, enable_auto_glossary, append_glossary,
+ # # Auto glossary settings
+ # auto_glossary_min_freq, auto_glossary_max_names, auto_glossary_max_titles,
+ # auto_glossary_batch_size, auto_glossary_filter_mode, auto_glossary_fuzzy_threshold,
+ # enable_post_translation_scan,
+ # # Manual glossary extraction settings
+ # min_freq, max_names_slider, max_titles,
+ # max_text_size, max_sentences, translation_batch,
+ # chapter_split_threshold, filter_mode, strip_honorifics,
+ # fuzzy_threshold, extraction_prompt, format_instructions,
+ # use_legacy_csv,
+ # # QA Scanner settings
+ # min_foreign_chars, check_repetition, check_glossary_leakage,
+ # min_file_length, check_multiple_headers, check_missing_html,
+ # check_insufficient_paragraphs, min_paragraph_percentage,
+ # report_format, auto_save_report,
+ # # Chapter processing options
+ # batch_translate_headers, headers_per_batch, use_ncx_navigation,
+ # attach_css_to_chapters, retain_source_extension,
+ # use_conservative_batching, disable_gemini_safety,
+ # use_http_openrouter, disable_openrouter_compression,
+ # text_extraction_method, file_filtering_level,
+ # # Thinking mode settings
+ # enable_gpt_thinking, gpt_thinking_effort, or_thinking_tokens,
+ # enable_gemini_thinking, gemini_thinking_budget,
+ # # Manga tab fields
+ # manga_model, manga_api_key, manga_profile,
+ # ocr_provider, azure_key, azure_endpoint,
+ # bubble_detection, inpainting,
+ # font_size_mode, font_size, font_multiplier, min_font_size, max_font_size,
+ # text_color_rgb, shadow_enabled, shadow_color,
+ # shadow_offset_x, shadow_offset_y, shadow_blur,
+ # bg_opacity, bg_style,
+ # parallel_panel_translation, panel_max_workers,
+ # # Advanced Settings fields
+ # detector_type, rtdetr_confidence, bubble_confidence,
+ # detect_text_bubbles, detect_empty_bubbles, detect_free_text, bubble_max_detections,
+ # local_inpaint_method, webtoon_mode,
+ # inpaint_batch_size, inpaint_cache_enabled,
+ # parallel_processing, max_workers,
+ # preload_local_inpainting, panel_start_stagger,
+ # torch_precision, auto_cleanup_models,
+ # debug_mode, save_intermediate, concise_pipeline_logs
+ # ],
+ # outputs=[save_status_text]
+ # )
+
+ # Add load handler to restore settings on page load
+ app.load(
+ fn=load_all_settings,
+ inputs=[],
+ outputs=[
+ epub_model, epub_api_key, epub_profile, epub_system_prompt, epub_temperature, epub_max_tokens,
+ enable_image_translation, enable_auto_glossary, append_glossary,
+ # Auto glossary settings
+ auto_glossary_min_freq, auto_glossary_max_names, auto_glossary_max_titles,
+ auto_glossary_batch_size, auto_glossary_filter_mode, auto_glossary_fuzzy_threshold,
+ # Manual glossary extraction settings
+ min_freq, max_names_slider, max_titles,
+ max_text_size, max_sentences, translation_batch,
+ chapter_split_threshold, filter_mode, strip_honorifics,
+ fuzzy_threshold,
+ # Chapter processing options
+ batch_translate_headers, headers_per_batch, use_ncx_navigation,
+ attach_css_to_chapters, retain_source_extension,
+ use_conservative_batching, disable_gemini_safety,
+ use_http_openrouter, disable_openrouter_compression,
+ text_extraction_method, file_filtering_level,
+ report_format,
+ # Thinking mode settings
+ enable_gpt_thinking, gpt_thinking_effort, or_thinking_tokens,
+ enable_gemini_thinking, gemini_thinking_budget,
+ # Manga settings
+ manga_model, manga_api_key, manga_profile, manga_system_prompt,
+ ocr_provider, azure_key, azure_endpoint, bubble_detection, inpainting,
+ font_size_mode, font_size, font_multiplier, min_font_size, max_font_size,
+ text_color_rgb, shadow_enabled, shadow_color, shadow_offset_x, shadow_offset_y,
+ shadow_blur, bg_opacity, bg_style, parallel_panel_translation, panel_max_workers
+ ]
+ )
+
+ return app
+
+
+def main():
+ """Launch Gradio web app"""
+ print("🚀 Starting Glossarion Web Interface...")
+
+ # Check if running on Hugging Face Spaces
+ is_spaces = os.getenv('SPACE_ID') is not None or os.getenv('HF_SPACES') == 'true'
+ if is_spaces:
+ print("🤗 Running on Hugging Face Spaces")
+ print(f"📁 Space ID: {os.getenv('SPACE_ID', 'Unknown')}")
+ print(f"📁 Files in current directory: {len(os.listdir('.'))} items")
+ print(f"📁 Working directory: {os.getcwd()}")
+ print(f"😎 Available manga modules: {MANGA_TRANSLATION_AVAILABLE}")
+ else:
+ print("🏠 Running locally")
+
+ web_app = GlossarionWeb()
+ app = web_app.create_interface()
+
+ # Set favicon with absolute path if available (skip for Spaces)
+ favicon_path = None
+ if not is_spaces and os.path.exists("Halgakos.ico"):
+ favicon_path = os.path.abspath("Halgakos.ico")
+ print(f"✅ Using favicon: {favicon_path}")
+ elif not is_spaces:
+ print("⚠️ Halgakos.ico not found")
+
+ # Launch with options appropriate for environment
+ launch_args = {
+ "server_name": "0.0.0.0", # Allow external access
+ "server_port": 7860,
+ "share": False,
+ "show_error": True,
+ }
+
+ # Only add favicon for non-Spaces environments
+ if not is_spaces and favicon_path:
+ launch_args["favicon_path"] = favicon_path
+
+ app.launch(**launch_args)
+
+
+if __name__ == "__main__":
+ main()
\ No newline at end of file
diff --git a/bubble_detector.py b/bubble_detector.py
new file mode 100644
index 0000000000000000000000000000000000000000..86b633385624dc3ae2de7fba5ef90276fbd1d3ba
--- /dev/null
+++ b/bubble_detector.py
@@ -0,0 +1,2030 @@
+"""
+bubble_detector.py - Modified version that works in frozen PyInstaller executables
+Replace your bubble_detector.py with this version
+"""
+import os
+import sys
+import json
+import numpy as np
+import cv2
+from typing import List, Tuple, Optional, Dict, Any
+import logging
+import traceback
+import hashlib
+from pathlib import Path
+import threading
+import time
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+# Check if we're running in a frozen environment
+IS_FROZEN = getattr(sys, 'frozen', False)
+if IS_FROZEN:
+ # In frozen environment, set proper paths for ML libraries
+ MEIPASS = sys._MEIPASS
+ os.environ['TORCH_HOME'] = MEIPASS
+ os.environ['TRANSFORMERS_CACHE'] = os.path.join(MEIPASS, 'transformers')
+ os.environ['HF_HOME'] = os.path.join(MEIPASS, 'huggingface')
+ logger.info(f"Running in frozen environment: {MEIPASS}")
+
+# Modified import checks for frozen environment
+YOLO_AVAILABLE = False
+YOLO = None
+torch = None
+TORCH_AVAILABLE = False
+ONNX_AVAILABLE = False
+TRANSFORMERS_AVAILABLE = False
+RTDetrForObjectDetection = None
+RTDetrImageProcessor = None
+PIL_AVAILABLE = False
+
+# Try to import YOLO dependencies with better error handling
+if IS_FROZEN:
+ # In frozen environment, try harder to import
+ try:
+ # First try to import torch components individually
+ import torch
+ import torch.nn
+ import torch.cuda
+ TORCH_AVAILABLE = True
+ logger.info("✓ PyTorch loaded in frozen environment")
+ except Exception as e:
+ logger.warning(f"PyTorch not available in frozen environment: {e}")
+ TORCH_AVAILABLE = False
+ torch = None
+
+ # Try ultralytics after torch
+ if TORCH_AVAILABLE:
+ try:
+ from ultralytics import YOLO
+ YOLO_AVAILABLE = True
+ logger.info("✓ Ultralytics YOLO loaded in frozen environment")
+ except Exception as e:
+ logger.warning(f"Ultralytics not available in frozen environment: {e}")
+ YOLO_AVAILABLE = False
+
+ # Try transformers
+ try:
+ import transformers
+ # Try specific imports
+ try:
+ from transformers import RTDetrForObjectDetection, RTDetrImageProcessor
+ TRANSFORMERS_AVAILABLE = True
+ logger.info("✓ Transformers RT-DETR loaded in frozen environment")
+ except ImportError:
+ # Try alternative import
+ try:
+ from transformers import AutoModel, AutoImageProcessor
+ RTDetrForObjectDetection = AutoModel
+ RTDetrImageProcessor = AutoImageProcessor
+ TRANSFORMERS_AVAILABLE = True
+ logger.info("✓ Transformers loaded with AutoModel fallback")
+ except:
+ TRANSFORMERS_AVAILABLE = False
+ logger.warning("Transformers RT-DETR not available in frozen environment")
+ except Exception as e:
+ logger.warning(f"Transformers not available in frozen environment: {e}")
+ TRANSFORMERS_AVAILABLE = False
+else:
+ # Normal environment - original import logic
+ try:
+ from ultralytics import YOLO
+ YOLO_AVAILABLE = True
+ except:
+ YOLO_AVAILABLE = False
+ logger.warning("Ultralytics YOLO not available")
+
+ try:
+ import torch
+ # Test if cuda attribute exists
+ _ = torch.cuda
+ TORCH_AVAILABLE = True
+ except (ImportError, AttributeError):
+ TORCH_AVAILABLE = False
+ torch = None
+ logger.warning("PyTorch not available or incomplete")
+
+ try:
+ from transformers import RTDetrForObjectDetection, RTDetrImageProcessor
+ try:
+ from transformers import RTDetrV2ForObjectDetection
+ RTDetrForObjectDetection = RTDetrV2ForObjectDetection
+ except ImportError:
+ pass
+ TRANSFORMERS_AVAILABLE = True
+ except:
+ TRANSFORMERS_AVAILABLE = False
+ logger.info("Transformers not available for RT-DETR")
+
+# Configure ORT memory behavior before importing
+try:
+ os.environ.setdefault('ORT_DISABLE_MEMORY_ARENA', '1')
+except Exception:
+ pass
+# ONNX Runtime - works well in frozen environments
+try:
+ import onnxruntime as ort
+ ONNX_AVAILABLE = True
+ logger.info("✓ ONNX Runtime available")
+except ImportError:
+ ONNX_AVAILABLE = False
+ logger.warning("ONNX Runtime not available")
+
+# PIL
+try:
+ from PIL import Image
+ PIL_AVAILABLE = True
+except ImportError:
+ PIL_AVAILABLE = False
+ logger.info("PIL not available")
+
+
+class BubbleDetector:
+ """
+ Combined YOLOv8 and RT-DETR speech bubble detector for comics and manga.
+ Supports multiple model formats and provides configurable detection.
+ Backward compatible with existing code while adding RT-DETR support.
+ """
+
+ # Process-wide shared RT-DETR to avoid concurrent meta-device loads
+ _rtdetr_init_lock = threading.Lock()
+ _rtdetr_shared_model = None
+ _rtdetr_shared_processor = None
+ _rtdetr_loaded = False
+ _rtdetr_repo_id = 'ogkalu/comic-text-and-bubble-detector'
+
+ # Shared RT-DETR (ONNX) across process to avoid device/context storms
+ _rtdetr_onnx_init_lock = threading.Lock()
+ _rtdetr_onnx_shared_session = None
+ _rtdetr_onnx_loaded = False
+ _rtdetr_onnx_providers = None
+ _rtdetr_onnx_model_path = None
+ # Limit concurrent runs to avoid device hangs. Defaults to 2 for better parallelism.
+ # Can be overridden via env DML_MAX_CONCURRENT or config rtdetr_max_concurrency
+ try:
+ _rtdetr_onnx_max_concurrent = int(os.environ.get('DML_MAX_CONCURRENT', '2'))
+ except Exception:
+ _rtdetr_onnx_max_concurrent = 2
+ _rtdetr_onnx_sema = threading.Semaphore(max(1, _rtdetr_onnx_max_concurrent))
+ _rtdetr_onnx_sema_initialized = False
+
+ def __init__(self, config_path: str = "config.json"):
+ """
+ Initialize the bubble detector.
+
+ Args:
+ config_path: Path to configuration file
+ """
+ # Set thread limits early if environment indicates single-threaded mode
+ try:
+ if os.environ.get('OMP_NUM_THREADS') == '1':
+ # Already in single-threaded mode, ensure it's applied to this process
+ # Check if torch is available at module level before trying to use it
+ if TORCH_AVAILABLE and torch is not None:
+ try:
+ torch.set_num_threads(1)
+ except (RuntimeError, AttributeError):
+ pass
+ try:
+ import cv2
+ cv2.setNumThreads(1)
+ except (ImportError, AttributeError):
+ pass
+ except Exception:
+ pass
+
+ self.config_path = config_path
+ self.config = self._load_config()
+
+ # YOLOv8 components (original)
+ self.model = None
+ self.model_loaded = False
+ self.model_type = None # 'yolo', 'onnx', or 'torch'
+ self.onnx_session = None
+
+ # RT-DETR components (new)
+ self.rtdetr_model = None
+ self.rtdetr_processor = None
+ self.rtdetr_loaded = False
+ self.rtdetr_repo = 'ogkalu/comic-text-and-bubble-detector'
+
+ # RT-DETR (ONNX) backend components
+ self.rtdetr_onnx_session = None
+ self.rtdetr_onnx_loaded = False
+ self.rtdetr_onnx_repo = 'ogkalu/comic-text-and-bubble-detector'
+
+ # RT-DETR class definitions
+ self.CLASS_BUBBLE = 0 # Empty speech bubble
+ self.CLASS_TEXT_BUBBLE = 1 # Bubble with text
+ self.CLASS_TEXT_FREE = 2 # Text without bubble
+
+ # Detection settings
+ self.default_confidence = 0.3
+ self.default_iou_threshold = 0.45
+ # Allow override from settings
+ try:
+ ocr_cfg = self.config.get('manga_settings', {}).get('ocr', {}) if isinstance(self.config, dict) else {}
+ self.default_max_detections = int(ocr_cfg.get('bubble_max_detections', 100))
+ self.max_det_yolo = int(ocr_cfg.get('bubble_max_detections_yolo', self.default_max_detections))
+ self.max_det_rtdetr = int(ocr_cfg.get('bubble_max_detections_rtdetr', self.default_max_detections))
+ except Exception:
+ self.default_max_detections = 100
+ self.max_det_yolo = 100
+ self.max_det_rtdetr = 100
+
+ # Cache directory for ONNX conversions
+ self.cache_dir = os.environ.get('BUBBLE_CACHE_DIR', 'models')
+ os.makedirs(self.cache_dir, exist_ok=True)
+
+ # RT-DETR concurrency setting from config
+ try:
+ rtdetr_max_conc = int(ocr_cfg.get('rtdetr_max_concurrency', 2))
+ # Update class-level semaphore if not yet initialized or if value changed
+ if not BubbleDetector._rtdetr_onnx_sema_initialized or rtdetr_max_conc != BubbleDetector._rtdetr_onnx_max_concurrent:
+ BubbleDetector._rtdetr_onnx_max_concurrent = max(1, rtdetr_max_conc)
+ BubbleDetector._rtdetr_onnx_sema = threading.Semaphore(BubbleDetector._rtdetr_onnx_max_concurrent)
+ BubbleDetector._rtdetr_onnx_sema_initialized = True
+ logger.info(f"RT-DETR concurrency set to: {BubbleDetector._rtdetr_onnx_max_concurrent}")
+ except Exception as e:
+ logger.warning(f"Failed to set RT-DETR concurrency: {e}")
+
+ # GPU availability
+ self.use_gpu = TORCH_AVAILABLE and torch.cuda.is_available()
+ self.device = 'cuda' if self.use_gpu else 'cpu'
+
+ # Quantization/precision settings
+ adv_cfg = self.config.get('manga_settings', {}).get('advanced', {}) if isinstance(self.config, dict) else {}
+ ocr_cfg = self.config.get('manga_settings', {}).get('ocr', {}) if isinstance(self.config, dict) else {}
+ env_quant = os.environ.get('MODEL_QUANTIZE', 'false').lower() == 'true'
+ self.quantize_enabled = bool(env_quant or adv_cfg.get('quantize_models', False) or ocr_cfg.get('quantize_bubble_detector', False))
+ self.quantize_dtype = str(adv_cfg.get('torch_precision', os.environ.get('TORCH_PRECISION', 'auto'))).lower()
+ # Prefer advanced.onnx_quantize; fall back to env or global quantize
+ self.onnx_quantize_enabled = bool(adv_cfg.get('onnx_quantize', os.environ.get('ONNX_QUANTIZE', 'false').lower() == 'true' or self.quantize_enabled))
+
+ # Stop flag support
+ self.stop_flag = None
+ self._stopped = False
+ self.log_callback = None
+
+ logger.info(f"🗨️ BubbleDetector initialized")
+ logger.info(f" GPU: {'Available' if self.use_gpu else 'Not available'}")
+ logger.info(f" YOLO: {'Available' if YOLO_AVAILABLE else 'Not installed'}")
+ logger.info(f" ONNX: {'Available' if ONNX_AVAILABLE else 'Not installed'}")
+ logger.info(f" RT-DETR: {'Available' if TRANSFORMERS_AVAILABLE else 'Not installed'}")
+ logger.info(f" Quantization: {'ENABLED' if self.quantize_enabled else 'disabled'} (torch_precision={self.quantize_dtype}, onnx_quantize={'on' if self.onnx_quantize_enabled else 'off'})" )
+
+ def _load_config(self) -> Dict[str, Any]:
+ """Load configuration from file."""
+ if os.path.exists(self.config_path):
+ try:
+ with open(self.config_path, 'r', encoding='utf-8') as f:
+ return json.load(f)
+ except Exception as e:
+ logger.warning(f"Failed to load config: {e}")
+ return {}
+
+ def _save_config(self):
+ """Save configuration to file."""
+ try:
+ with open(self.config_path, 'w', encoding='utf-8') as f:
+ json.dump(self.config, f, indent=2)
+ except Exception as e:
+ logger.error(f"Failed to save config: {e}")
+
+ def set_stop_flag(self, stop_flag):
+ """Set the stop flag for checking interruptions"""
+ self.stop_flag = stop_flag
+ self._stopped = False
+
+ def set_log_callback(self, log_callback):
+ """Set log callback for GUI integration"""
+ self.log_callback = log_callback
+
+ def _check_stop(self) -> bool:
+ """Check if stop has been requested"""
+ if self._stopped:
+ return True
+ if self.stop_flag and self.stop_flag.is_set():
+ self._stopped = True
+ return True
+ # Check global manga translator cancellation
+ try:
+ from manga_translator import MangaTranslator
+ if MangaTranslator.is_globally_cancelled():
+ self._stopped = True
+ return True
+ except Exception:
+ pass
+ return False
+
+ def _log(self, message: str, level: str = "info"):
+ """Log message with stop suppression"""
+ # Suppress logs when stopped (allow only essential stop confirmation messages)
+ if self._check_stop():
+ essential_stop_keywords = [
+ "⏹️ Translation stopped by user",
+ "⏹️ Bubble detection stopped",
+ "cleanup", "🧹"
+ ]
+ if not any(keyword in message for keyword in essential_stop_keywords):
+ return
+
+ if self.log_callback:
+ self.log_callback(message, level)
+ else:
+ logger.info(message) if level == 'info' else getattr(logger, level, logger.info)(message)
+
+ def reset_stop_flags(self):
+ """Reset stop flags when starting new processing"""
+ self._stopped = False
+
+ def load_model(self, model_path: str, force_reload: bool = False) -> bool:
+ """
+ Load a YOLOv8 model for bubble detection.
+
+ Args:
+ model_path: Path to model file (.pt, .onnx, or .torchscript)
+ force_reload: Force reload even if model is already loaded
+
+ Returns:
+ True if model loaded successfully, False otherwise
+ """
+ try:
+ # If given a Hugging Face repo ID (e.g., 'owner/name'), fetch detector.onnx into models/
+ if model_path and (('/' in model_path) and not os.path.exists(model_path)):
+ try:
+ from huggingface_hub import hf_hub_download
+ os.makedirs(self.cache_dir, exist_ok=True)
+ logger.info(f"📥 Resolving repo '{model_path}' to detector.onnx in {self.cache_dir}...")
+ resolved = hf_hub_download(repo_id=model_path, filename='detector.onnx', cache_dir=self.cache_dir, local_dir=self.cache_dir, local_dir_use_symlinks=False)
+ if resolved and os.path.exists(resolved):
+ model_path = resolved
+ logger.info(f"✅ Downloaded detector.onnx to: {model_path}")
+ except Exception as repo_err:
+ logger.error(f"Failed to download from repo '{model_path}': {repo_err}")
+ if not os.path.exists(model_path):
+ logger.error(f"Model file not found: {model_path}")
+ return False
+
+ # Check if it's the same model already loaded
+ if self.model_loaded and not force_reload:
+ last_path = self.config.get('last_model_path', '')
+ if last_path == model_path:
+ logger.info("Model already loaded (same path)")
+ return True
+ else:
+ logger.info(f"Model path changed from {last_path} to {model_path}, reloading...")
+ force_reload = True
+
+ # Clear previous model if force reload
+ if force_reload:
+ logger.info("Force reloading model...")
+ self.model = None
+ self.onnx_session = None
+ self.model_loaded = False
+ self.model_type = None
+
+ logger.info(f"📥 Loading bubble detection model: {model_path}")
+
+ # Determine model type by extension
+ ext = Path(model_path).suffix.lower()
+
+ if ext in ['.pt', '.pth']:
+ if not YOLO_AVAILABLE:
+ logger.warning("Ultralytics package not available in this build")
+ logger.info("Bubble detection will be disabled - this is normal for lightweight builds")
+ # Don't return False immediately, try other fallbacks
+ self.model_loaded = False
+ return False
+
+ # Load YOLOv8 model
+ try:
+ self.model = YOLO(model_path)
+ self.model_type = 'yolo'
+
+ # Set to eval mode
+ if hasattr(self.model, 'model'):
+ self.model.model.eval()
+
+ # Move to GPU if available
+ if self.use_gpu and TORCH_AVAILABLE:
+ try:
+ self.model.to('cuda')
+ except Exception as gpu_error:
+ logger.warning(f"Could not move model to GPU: {gpu_error}")
+
+ logger.info("✅ YOLOv8 model loaded successfully")
+ # Apply optional FP16 precision to reduce VRAM if enabled
+ if self.quantize_enabled and self.use_gpu and TORCH_AVAILABLE:
+ try:
+ m = self.model.model if hasattr(self.model, 'model') else self.model
+ m.half()
+ logger.info("🔻 Applied FP16 precision to YOLO model (GPU)")
+ except Exception as _e:
+ logger.warning(f"Could not switch YOLO model to FP16: {_e}")
+
+ except Exception as yolo_error:
+ logger.error(f"Failed to load YOLO model: {yolo_error}")
+ return False
+
+ elif ext == '.onnx':
+ if not ONNX_AVAILABLE:
+ logger.warning("ONNX Runtime not available in this build")
+ logger.info("ONNX model support disabled - this is normal for lightweight builds")
+ return False
+
+ try:
+ # Load ONNX model
+ providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if self.use_gpu else ['CPUExecutionProvider']
+ session_path = model_path
+ if self.quantize_enabled:
+ try:
+ from onnxruntime.quantization import quantize_dynamic, QuantType
+ quant_path = os.path.splitext(model_path)[0] + ".int8.onnx"
+ if not os.path.exists(quant_path) or os.environ.get('FORCE_ONNX_REBUILD', 'false').lower() == 'true':
+ logger.info("🔻 Quantizing ONNX model weights to INT8 (dynamic)...")
+ quantize_dynamic(model_input=model_path, model_output=quant_path, weight_type=QuantType.QInt8, op_types_to_quantize=['Conv', 'MatMul'])
+ session_path = quant_path
+ self.config['last_onnx_quantized_path'] = quant_path
+ self._save_config()
+ logger.info(f"✅ Using quantized ONNX model: {quant_path}")
+ except Exception as qe:
+ logger.warning(f"ONNX quantization not applied: {qe}")
+ # Use conservative ORT memory options to reduce RAM growth
+ so = ort.SessionOptions()
+ try:
+ so.enable_mem_pattern = False
+ so.enable_cpu_mem_arena = False
+ except Exception:
+ pass
+ self.onnx_session = ort.InferenceSession(session_path, sess_options=so, providers=providers)
+ self.model_type = 'onnx'
+
+ logger.info("✅ ONNX model loaded successfully")
+
+ except Exception as onnx_error:
+ logger.error(f"Failed to load ONNX model: {onnx_error}")
+ return False
+
+ elif ext == '.torchscript':
+ if not TORCH_AVAILABLE:
+ logger.warning("PyTorch not available in this build")
+ logger.info("TorchScript model support disabled - this is normal for lightweight builds")
+ return False
+
+ try:
+ # Add safety check for torch being None
+ if torch is None:
+ logger.error("PyTorch module is None - cannot load TorchScript model")
+ return False
+
+ # Load TorchScript model
+ self.model = torch.jit.load(model_path, map_location='cpu')
+ self.model.eval()
+ self.model_type = 'torch'
+
+ if self.use_gpu:
+ try:
+ self.model = self.model.cuda()
+ except Exception as gpu_error:
+ logger.warning(f"Could not move TorchScript model to GPU: {gpu_error}")
+
+ logger.info("✅ TorchScript model loaded successfully")
+
+ # Optional FP16 precision on GPU
+ if self.quantize_enabled and self.use_gpu and TORCH_AVAILABLE:
+ try:
+ self.model = self.model.half()
+ logger.info("🔻 Applied FP16 precision to TorchScript model (GPU)")
+ except Exception as _e:
+ logger.warning(f"Could not switch TorchScript model to FP16: {_e}")
+
+ except Exception as torch_error:
+ logger.error(f"Failed to load TorchScript model: {torch_error}")
+ return False
+
+ else:
+ logger.error(f"Unsupported model format: {ext}")
+ logger.info("Supported formats: .pt/.pth (YOLOv8), .onnx (ONNX), .torchscript (TorchScript)")
+ return False
+
+ # Only set loaded if we actually succeeded
+ self.model_loaded = True
+ self.config['last_model_path'] = model_path
+ self.config['model_type'] = self.model_type
+ self._save_config()
+
+ return True
+
+ except Exception as e:
+ logger.error(f"Failed to load model: {e}")
+ logger.error(traceback.format_exc())
+ self.model_loaded = False
+
+ # Provide helpful context for .exe users
+ logger.info("Note: If running from .exe, some ML libraries may not be included")
+ logger.info("This is normal for lightweight builds - bubble detection will be disabled")
+
+ return False
+
+ def load_rtdetr_model(self, model_path: str = None, model_id: str = None, force_reload: bool = False) -> bool:
+ """
+ Load RT-DETR model for advanced bubble and text detection.
+ This implementation avoids the 'meta tensor' copy error by:
+ - Serializing the entire load under a class lock (no concurrent loads)
+ - Loading directly onto the target device (CUDA if available) via device_map='auto'
+ - Avoiding .to() on a potentially-meta model; no device migration post-load
+
+ Args:
+ model_path: Optional path to local model
+ model_id: Optional HuggingFace model ID (default: 'ogkalu/comic-text-and-bubble-detector')
+ force_reload: Force reload even if already loaded
+
+ Returns:
+ True if successful, False otherwise
+ """
+ if not TRANSFORMERS_AVAILABLE:
+ logger.error("Transformers library required for RT-DETR. Install with: pip install transformers")
+ return False
+
+ if not PIL_AVAILABLE:
+ logger.error("PIL required for RT-DETR. Install with: pip install pillow")
+ return False
+
+ if self.rtdetr_loaded and not force_reload:
+ logger.info("RT-DETR model already loaded")
+ return True
+
+ # Fast path: if shared already loaded and not forcing reload, attach
+ if BubbleDetector._rtdetr_loaded and not force_reload:
+ self.rtdetr_model = BubbleDetector._rtdetr_shared_model
+ self.rtdetr_processor = BubbleDetector._rtdetr_shared_processor
+ self.rtdetr_loaded = True
+ logger.info("RT-DETR model attached from shared cache")
+ return True
+
+ # Serialize the ENTIRE loading sequence to avoid concurrent init issues
+ with BubbleDetector._rtdetr_init_lock:
+ try:
+ # Re-check after acquiring lock
+ if BubbleDetector._rtdetr_loaded and not force_reload:
+ self.rtdetr_model = BubbleDetector._rtdetr_shared_model
+ self.rtdetr_processor = BubbleDetector._rtdetr_shared_processor
+ self.rtdetr_loaded = True
+ logger.info("RT-DETR model attached from shared cache (post-lock)")
+ return True
+
+ # Use custom model_id if provided, otherwise use default
+ repo_id = model_id if model_id else self.rtdetr_repo
+ logger.info(f"📥 Loading RT-DETR model from {repo_id}...")
+
+ # Ensure TorchDynamo/compile doesn't interfere on some builds
+ try:
+ os.environ.setdefault('TORCHDYNAMO_DISABLE', '1')
+ except Exception:
+ pass
+
+ # Decide device strategy
+ gpu_available = bool(TORCH_AVAILABLE and hasattr(torch, 'cuda') and torch.cuda.is_available())
+ device_map = 'auto' if gpu_available else None
+ # Choose dtype
+ dtype = None
+ if TORCH_AVAILABLE:
+ try:
+ dtype = torch.float16 if gpu_available else torch.float32
+ except Exception:
+ dtype = None
+ low_cpu = True if gpu_available else False
+
+ # Load processor (once)
+ self.rtdetr_processor = RTDetrImageProcessor.from_pretrained(
+ repo_id,
+ size={"width": 640, "height": 640},
+ cache_dir=self.cache_dir if not model_path else None
+ )
+
+ # Prepare kwargs for from_pretrained
+ from_kwargs = {
+ 'cache_dir': self.cache_dir if not model_path else None,
+ 'low_cpu_mem_usage': low_cpu,
+ 'device_map': device_map,
+ }
+ if dtype is not None:
+ from_kwargs['dtype'] = dtype
+
+ # First attempt: load directly to target (CUDA if available)
+ try:
+ self.rtdetr_model = RTDetrForObjectDetection.from_pretrained(
+ model_path if model_path else repo_id,
+ **from_kwargs,
+ )
+ except Exception as primary_err:
+ # Fallback to a simple CPU load (no device move) if CUDA path fails
+ logger.warning(f"RT-DETR primary load failed ({primary_err}); retrying on CPU...")
+ from_kwargs_fallback = {
+ 'cache_dir': self.cache_dir if not model_path else None,
+ 'low_cpu_mem_usage': False,
+ 'device_map': None,
+ }
+ if TORCH_AVAILABLE:
+ from_kwargs_fallback['dtype'] = torch.float32
+ self.rtdetr_model = RTDetrForObjectDetection.from_pretrained(
+ model_path if model_path else repo_id,
+ **from_kwargs_fallback,
+ )
+
+ # Optional dynamic quantization for linear layers (CPU only)
+ if self.quantize_enabled and TORCH_AVAILABLE and (not gpu_available):
+ try:
+ try:
+ import torch.ao.quantization as tq
+ quantize_dynamic = tq.quantize_dynamic # type: ignore
+ except Exception:
+ import torch.quantization as tq # type: ignore
+ quantize_dynamic = tq.quantize_dynamic # type: ignore
+ self.rtdetr_model = quantize_dynamic(self.rtdetr_model, {torch.nn.Linear}, dtype=torch.qint8)
+ logger.info("🔻 Applied dynamic INT8 quantization to RT-DETR linear layers (CPU)")
+ except Exception as qe:
+ logger.warning(f"RT-DETR dynamic quantization skipped: {qe}")
+
+ # Finalize
+ self.rtdetr_model.eval()
+
+ # Sanity check: ensure no parameter is left on 'meta' device
+ try:
+ for n, p in self.rtdetr_model.named_parameters():
+ dev = getattr(p, 'device', None)
+ if dev is not None and getattr(dev, 'type', '') == 'meta':
+ raise RuntimeError(f"Parameter {n} is on 'meta' device after load")
+ except Exception as e:
+ logger.error(f"RT-DETR load sanity check failed: {e}")
+ self.rtdetr_loaded = False
+ return False
+
+ # Publish shared cache
+ BubbleDetector._rtdetr_shared_model = self.rtdetr_model
+ BubbleDetector._rtdetr_shared_processor = self.rtdetr_processor
+ BubbleDetector._rtdetr_loaded = True
+ BubbleDetector._rtdetr_repo_id = repo_id
+
+ self.rtdetr_loaded = True
+
+ # Save the model ID that was used
+ self.config['rtdetr_loaded'] = True
+ self.config['rtdetr_model_id'] = repo_id
+ self._save_config()
+
+ loc = 'CUDA' if gpu_available else 'CPU'
+ logger.info(f"✅ RT-DETR model loaded successfully ({loc})")
+ logger.info(" Classes: Empty bubbles, Text bubbles, Free text")
+
+ # Auto-convert to ONNX for RT-DETR only if explicitly enabled
+ if os.environ.get('AUTO_CONVERT_RTDETR_ONNX', 'false').lower() == 'true':
+ onnx_path = os.path.join(self.cache_dir, 'rtdetr_comic.onnx')
+ if self.convert_to_onnx('rtdetr', onnx_path):
+ logger.info("🚀 RT-DETR converted to ONNX for faster inference")
+ # Store ONNX path for later use
+ self.config['rtdetr_onnx_path'] = onnx_path
+ self._save_config()
+ # Optionally quantize ONNX for reduced RAM
+ if self.onnx_quantize_enabled:
+ try:
+ from onnxruntime.quantization import quantize_dynamic, QuantType
+ quant_path = os.path.splitext(onnx_path)[0] + ".int8.onnx"
+ if not os.path.exists(quant_path) or os.environ.get('FORCE_ONNX_REBUILD', 'false').lower() == 'true':
+ logger.info("🔻 Quantizing RT-DETR ONNX to INT8 (dynamic)...")
+ quantize_dynamic(model_input=onnx_path, model_output=quant_path, weight_type=QuantType.QInt8, op_types_to_quantize=['Conv', 'MatMul'])
+ self.config['rtdetr_onnx_quantized_path'] = quant_path
+ self._save_config()
+ logger.info(f"✅ Quantized RT-DETR ONNX saved to: {quant_path}")
+ except Exception as qe:
+ logger.warning(f"ONNX quantization for RT-DETR skipped: {qe}")
+ else:
+ logger.info("ℹ️ Skipping RT-DETR ONNX export (converter not supported in current environment)")
+
+ return True
+ except Exception as e:
+ logger.error(f"❌ Failed to load RT-DETR: {e}")
+ self.rtdetr_loaded = False
+ return False
+
+ def check_rtdetr_available(self, model_id: str = None) -> bool:
+ """
+ Check if RT-DETR model is available (cached).
+
+ Args:
+ model_id: Optional HuggingFace model ID
+
+ Returns:
+ True if model is cached and available
+ """
+ try:
+ from pathlib import Path
+
+ # Use provided model_id or default
+ repo_id = model_id if model_id else self.rtdetr_repo
+
+ # Check HuggingFace cache
+ cache_dir = Path.home() / ".cache" / "huggingface" / "hub"
+ model_id_formatted = repo_id.replace("/", "--")
+
+ # Look for model folder
+ model_folders = list(cache_dir.glob(f"models--{model_id_formatted}*"))
+
+ if model_folders:
+ for folder in model_folders:
+ if (folder / "snapshots").exists():
+ snapshots = list((folder / "snapshots").iterdir())
+ if snapshots:
+ return True
+
+ return False
+
+ except Exception:
+ return False
+
+ def detect_bubbles(self,
+ image_path: str,
+ confidence: float = None,
+ iou_threshold: float = None,
+ max_detections: int = None,
+ use_rtdetr: bool = None) -> List[Tuple[int, int, int, int]]:
+ """
+ Detect speech bubbles in an image (backward compatible method).
+
+ Args:
+ image_path: Path to image file
+ confidence: Minimum confidence threshold (0-1)
+ iou_threshold: IOU threshold for NMS (0-1)
+ max_detections: Maximum number of detections to return
+ use_rtdetr: If True, use RT-DETR instead of YOLOv8 (if available)
+
+ Returns:
+ List of bubble bounding boxes as (x, y, width, height) tuples
+ """
+ # Check for stop at start
+ if self._check_stop():
+ self._log("⏹️ Bubble detection stopped by user", "warning")
+ return []
+
+ # Decide which model to use
+ if use_rtdetr is None:
+ # Auto-select: prefer RT-DETR if available
+ use_rtdetr = self.rtdetr_loaded
+
+ if use_rtdetr:
+ # Prefer ONNX backend if available, else PyTorch
+ if getattr(self, 'rtdetr_onnx_loaded', False):
+ results = self.detect_with_rtdetr_onnx(
+ image_path=image_path,
+ confidence=confidence,
+ return_all_bubbles=True
+ )
+ return results
+ if self.rtdetr_loaded:
+ results = self.detect_with_rtdetr(
+ image_path=image_path,
+ confidence=confidence,
+ return_all_bubbles=True
+ )
+ return results
+
+ # Original YOLOv8 detection
+ if not self.model_loaded:
+ logger.error("No model loaded. Call load_model() first.")
+ return []
+
+ # Use defaults if not specified
+ confidence = confidence or self.default_confidence
+ iou_threshold = iou_threshold or self.default_iou_threshold
+ max_detections = max_detections or self.default_max_detections
+
+ try:
+ # Load image
+ image = cv2.imread(image_path)
+ if image is None:
+ logger.error(f"Failed to load image: {image_path}")
+ return []
+
+ h, w = image.shape[:2]
+ self._log(f"🔍 Detecting bubbles in {w}x{h} image")
+
+ # Check for stop before inference
+ if self._check_stop():
+ self._log("⏹️ Bubble detection inference stopped by user", "warning")
+ return []
+
+ if self.model_type == 'yolo':
+ # YOLOv8 inference
+ results = self.model(
+ image_path,
+ conf=confidence,
+ iou=iou_threshold,
+ max_det=min(max_detections, getattr(self, 'max_det_yolo', max_detections)),
+ verbose=False
+ )
+
+ bubbles = []
+ for r in results:
+ if r.boxes is not None:
+ for box in r.boxes:
+ # Get box coordinates
+ x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
+ x, y = int(x1), int(y1)
+ width = int(x2 - x1)
+ height = int(y2 - y1)
+
+ # Get confidence
+ conf = float(box.conf[0])
+
+ # Add to list
+ if len(bubbles) < max_detections:
+ bubbles.append((x, y, width, height))
+
+ logger.debug(f" Bubble: ({x},{y}) {width}x{height} conf={conf:.2f}")
+
+ elif self.model_type == 'onnx':
+ # ONNX inference
+ bubbles = self._detect_with_onnx(image, confidence, iou_threshold, max_detections)
+
+ elif self.model_type == 'torch':
+ # TorchScript inference
+ bubbles = self._detect_with_torchscript(image, confidence, iou_threshold, max_detections)
+
+ else:
+ logger.error(f"Unknown model type: {self.model_type}")
+ return []
+
+ logger.info(f"✅ Detected {len(bubbles)} speech bubbles")
+ time.sleep(0.1) # Brief pause for stability
+ logger.debug("💤 Bubble detection pausing briefly for stability")
+ return bubbles
+
+ except Exception as e:
+ logger.error(f"Detection failed: {e}")
+ logger.error(traceback.format_exc())
+ return []
+
+ def detect_with_rtdetr(self,
+ image_path: str = None,
+ image: np.ndarray = None,
+ confidence: float = None,
+ return_all_bubbles: bool = False) -> Any:
+ """
+ Detect using RT-DETR model with 3-class detection (PyTorch backend).
+
+ Args:
+ image_path: Path to image file
+ image: Image array (BGR format)
+ confidence: Confidence threshold
+ return_all_bubbles: If True, return list of bubble boxes (for compatibility)
+ If False, return dict with all classes
+
+ Returns:
+ List of bubbles if return_all_bubbles=True, else dict with classes
+ """
+ # Check for stop at start
+ if self._check_stop():
+ self._log("⏹️ RT-DETR detection stopped by user", "warning")
+ if return_all_bubbles:
+ return []
+ return {'bubbles': [], 'text_bubbles': [], 'text_free': []}
+
+ if not self.rtdetr_loaded:
+ self._log("RT-DETR not loaded. Call load_rtdetr_model() first.", "warning")
+ if return_all_bubbles:
+ return []
+ return {'bubbles': [], 'text_bubbles': [], 'text_free': []}
+
+ confidence = confidence or self.default_confidence
+
+ try:
+ # Load image
+ if image_path:
+ image = cv2.imread(image_path)
+ elif image is None:
+ logger.error("No image provided")
+ if return_all_bubbles:
+ return []
+ return {'bubbles': [], 'text_bubbles': [], 'text_free': []}
+
+ # Convert BGR to RGB for PIL
+ image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+ pil_image = Image.fromarray(image_rgb)
+
+ # Prepare image for model
+ inputs = self.rtdetr_processor(images=pil_image, return_tensors="pt")
+
+ # Move inputs to the same device as the model and match model dtype for floating tensors
+ model_device = next(self.rtdetr_model.parameters()).device if self.rtdetr_model is not None else (torch.device('cpu') if TORCH_AVAILABLE else 'cpu')
+ model_dtype = None
+ if TORCH_AVAILABLE and self.rtdetr_model is not None:
+ try:
+ model_dtype = next(self.rtdetr_model.parameters()).dtype
+ except Exception:
+ model_dtype = None
+
+ if TORCH_AVAILABLE:
+ new_inputs = {}
+ for k, v in inputs.items():
+ if isinstance(v, torch.Tensor):
+ v = v.to(model_device)
+ if model_dtype is not None and torch.is_floating_point(v):
+ v = v.to(model_dtype)
+ new_inputs[k] = v
+ inputs = new_inputs
+
+ # Run inference with autocast when model is half/bfloat16 on CUDA
+ use_amp = TORCH_AVAILABLE and hasattr(model_device, 'type') and model_device.type == 'cuda' and (model_dtype in (torch.float16, torch.bfloat16))
+ autocast_dtype = model_dtype if model_dtype in (torch.float16, torch.bfloat16) else None
+
+ with torch.no_grad():
+ if use_amp and autocast_dtype is not None:
+ with torch.autocast('cuda', dtype=autocast_dtype):
+ outputs = self.rtdetr_model(**inputs)
+ else:
+ outputs = self.rtdetr_model(**inputs)
+
+ # Brief pause for stability after inference
+ time.sleep(0.1)
+ logger.debug("💤 RT-DETR inference pausing briefly for stability")
+
+ # Post-process results
+ target_sizes = torch.tensor([pil_image.size[::-1]]) if TORCH_AVAILABLE else None
+ if TORCH_AVAILABLE and hasattr(model_device, 'type') and model_device.type == "cuda":
+ target_sizes = target_sizes.to(model_device)
+
+ results = self.rtdetr_processor.post_process_object_detection(
+ outputs,
+ target_sizes=target_sizes,
+ threshold=confidence
+ )[0]
+
+ # Apply per-detector cap if configured
+ cap = getattr(self, 'max_det_rtdetr', self.default_max_detections)
+ if cap and len(results['boxes']) > cap:
+ # Keep top-scoring first
+ scores = results['scores']
+ top_idx = scores.topk(k=cap).indices if hasattr(scores, 'topk') else range(cap)
+ results = {
+ 'boxes': [results['boxes'][i] for i in top_idx],
+ 'scores': [results['scores'][i] for i in top_idx],
+ 'labels': [results['labels'][i] for i in top_idx]
+ }
+
+ logger.info(f"📊 RT-DETR found {len(results['boxes'])} detections above {confidence:.2f} confidence")
+
+ # Apply NMS to remove duplicate detections
+ # Group detections by class
+ class_detections = {self.CLASS_BUBBLE: [], self.CLASS_TEXT_BUBBLE: [], self.CLASS_TEXT_FREE: []}
+
+ for box, score, label in zip(results['boxes'], results['scores'], results['labels']):
+ x1, y1, x2, y2 = map(float, box.tolist())
+ label_id = label.item()
+ if label_id in class_detections:
+ class_detections[label_id].append((x1, y1, x2, y2, float(score.item())))
+
+ # Apply NMS per class to remove duplicates
+ def compute_iou(box1, box2):
+ """Compute IoU between two boxes (x1, y1, x2, y2)"""
+ x1_1, y1_1, x2_1, y2_1 = box1[:4]
+ x1_2, y1_2, x2_2, y2_2 = box2[:4]
+
+ # Intersection
+ x_left = max(x1_1, x1_2)
+ y_top = max(y1_1, y1_2)
+ x_right = min(x2_1, x2_2)
+ y_bottom = min(y2_1, y2_2)
+
+ if x_right < x_left or y_bottom < y_top:
+ return 0.0
+
+ intersection = (x_right - x_left) * (y_bottom - y_top)
+
+ # Union
+ area1 = (x2_1 - x1_1) * (y2_1 - y1_1)
+ area2 = (x2_2 - x1_2) * (y2_2 - y1_2)
+ union = area1 + area2 - intersection
+
+ return intersection / union if union > 0 else 0.0
+
+ def apply_nms(boxes_with_scores, iou_threshold=0.45):
+ """Apply Non-Maximum Suppression"""
+ if not boxes_with_scores:
+ return []
+
+ # Sort by score (descending)
+ sorted_boxes = sorted(boxes_with_scores, key=lambda x: x[4], reverse=True)
+ keep = []
+
+ while sorted_boxes:
+ # Keep the box with highest score
+ current = sorted_boxes.pop(0)
+ keep.append(current)
+
+ # Remove boxes with high IoU
+ sorted_boxes = [box for box in sorted_boxes if compute_iou(current, box) < iou_threshold]
+
+ return keep
+
+ # Apply NMS and organize by class
+ detections = {
+ 'bubbles': [], # Empty speech bubbles
+ 'text_bubbles': [], # Bubbles with text
+ 'text_free': [] # Text without bubbles
+ }
+
+ for class_id, boxes_list in class_detections.items():
+ nms_boxes = apply_nms(boxes_list, iou_threshold=self.default_iou_threshold)
+
+ for x1, y1, x2, y2, scr in nms_boxes:
+ width = int(x2 - x1)
+ height = int(y2 - y1)
+ # Store as (x, y, width, height) to match YOLOv8 format
+ bbox = (int(x1), int(y1), width, height)
+
+ if class_id == self.CLASS_BUBBLE:
+ detections['bubbles'].append(bbox)
+ elif class_id == self.CLASS_TEXT_BUBBLE:
+ detections['text_bubbles'].append(bbox)
+ elif class_id == self.CLASS_TEXT_FREE:
+ detections['text_free'].append(bbox)
+
+ # Stop early if we hit the configured cap across all classes
+ total_count = len(detections['bubbles']) + len(detections['text_bubbles']) + len(detections['text_free'])
+ if total_count >= (self.config.get('manga_settings', {}).get('ocr', {}).get('bubble_max_detections', self.default_max_detections) if isinstance(self.config, dict) else self.default_max_detections):
+ break
+
+ # Log results
+ total = len(detections['bubbles']) + len(detections['text_bubbles']) + len(detections['text_free'])
+ logger.info(f"✅ RT-DETR detected {total} objects:")
+ logger.info(f" - Empty bubbles: {len(detections['bubbles'])}")
+ logger.info(f" - Text bubbles: {len(detections['text_bubbles'])}")
+ logger.info(f" - Free text: {len(detections['text_free'])}")
+
+ # Return format based on compatibility mode
+ if return_all_bubbles:
+ # Return all bubbles (empty + with text) for backward compatibility
+ all_bubbles = detections['bubbles'] + detections['text_bubbles']
+ return all_bubbles
+ else:
+ return detections
+
+ except Exception as e:
+ logger.error(f"RT-DETR detection failed: {e}")
+ logger.error(traceback.format_exc())
+ if return_all_bubbles:
+ return []
+ return {'bubbles': [], 'text_bubbles': [], 'text_free': []}
+
+ def detect_all_text_regions(self, image_path: str = None, image: np.ndarray = None) -> List[Tuple[int, int, int, int]]:
+ """
+ Detect all text regions using RT-DETR (both in bubbles and free text).
+
+ Returns:
+ List of bounding boxes for all text regions
+ """
+ if not self.rtdetr_loaded:
+ logger.warning("RT-DETR required for text detection")
+ return []
+
+ detections = self.detect_with_rtdetr(image_path=image_path, image=image, return_all_bubbles=False)
+
+ # Combine text bubbles and free text
+ all_text = detections['text_bubbles'] + detections['text_free']
+
+ logger.info(f"📝 Found {len(all_text)} text regions total")
+ return all_text
+
+ def _detect_with_onnx(self, image: np.ndarray, confidence: float,
+ iou_threshold: float, max_detections: int) -> List[Tuple[int, int, int, int]]:
+ """Run detection using ONNX model."""
+ # Preprocess image
+ img_size = 640 # Standard YOLOv8 input size
+ img_resized = cv2.resize(image, (img_size, img_size))
+ img_norm = img_resized.astype(np.float32) / 255.0
+ img_transposed = np.transpose(img_norm, (2, 0, 1))
+ img_batch = np.expand_dims(img_transposed, axis=0)
+
+ # Run inference
+ input_name = self.onnx_session.get_inputs()[0].name
+ outputs = self.onnx_session.run(None, {input_name: img_batch})
+
+ # Process outputs (YOLOv8 format)
+ predictions = outputs[0][0] # Remove batch dimension
+
+ # Filter by confidence and apply NMS
+ bubbles = []
+ boxes = []
+ scores = []
+
+ for pred in predictions.T: # Transpose to get predictions per detection
+ if len(pred) >= 5:
+ x_center, y_center, width, height, obj_conf = pred[:5]
+
+ if obj_conf >= confidence:
+ # Convert to corner coordinates
+ x1 = x_center - width / 2
+ y1 = y_center - height / 2
+
+ # Scale to original image size
+ h, w = image.shape[:2]
+ x1 = int(x1 * w / img_size)
+ y1 = int(y1 * h / img_size)
+ width = int(width * w / img_size)
+ height = int(height * h / img_size)
+
+ boxes.append([x1, y1, x1 + width, y1 + height])
+ scores.append(float(obj_conf))
+
+ # Apply NMS
+ if boxes:
+ indices = cv2.dnn.NMSBoxes(boxes, scores, confidence, iou_threshold)
+ if len(indices) > 0:
+ indices = indices.flatten()[:max_detections]
+ for i in indices:
+ x1, y1, x2, y2 = boxes[i]
+ bubbles.append((x1, y1, x2 - x1, y2 - y1))
+
+ return bubbles
+
+ def _detect_with_torchscript(self, image: np.ndarray, confidence: float,
+ iou_threshold: float, max_detections: int) -> List[Tuple[int, int, int, int]]:
+ """Run detection using TorchScript model."""
+ # Similar to ONNX but using PyTorch tensors
+ img_size = 640
+ img_resized = cv2.resize(image, (img_size, img_size))
+ img_norm = img_resized.astype(np.float32) / 255.0
+ img_tensor = torch.from_numpy(img_norm).permute(2, 0, 1).unsqueeze(0)
+
+ if self.use_gpu:
+ img_tensor = img_tensor.cuda()
+
+ with torch.no_grad():
+ outputs = self.model(img_tensor)
+
+ # Process outputs similar to ONNX
+ # Implementation depends on exact model output format
+ # This is a placeholder - adjust based on your model
+ return []
+
+ def visualize_detections(self, image_path: str, bubbles: List[Tuple[int, int, int, int]] = None,
+ output_path: str = None, use_rtdetr: bool = False) -> np.ndarray:
+ """
+ Visualize detected bubbles on the image.
+
+ Args:
+ image_path: Path to original image
+ bubbles: List of bubble bounding boxes (if None, will detect)
+ output_path: Optional path to save visualization
+ use_rtdetr: Use RT-DETR for visualization with class colors
+
+ Returns:
+ Image with drawn bounding boxes
+ """
+ image = cv2.imread(image_path)
+ if image is None:
+ logger.error(f"Failed to load image: {image_path}")
+ return None
+
+ vis_image = image.copy()
+
+ if use_rtdetr and self.rtdetr_loaded:
+ # RT-DETR visualization with different colors per class
+ detections = self.detect_with_rtdetr(image_path=image_path, return_all_bubbles=False)
+
+ # Colors for each class
+ colors = {
+ 'bubbles': (0, 255, 0), # Green for empty bubbles
+ 'text_bubbles': (255, 0, 0), # Blue for text bubbles
+ 'text_free': (0, 0, 255) # Red for free text
+ }
+
+ # Draw detections
+ for class_name, bboxes in detections.items():
+ color = colors[class_name]
+
+ for i, (x, y, w, h) in enumerate(bboxes):
+ # Draw rectangle
+ cv2.rectangle(vis_image, (x, y), (x + w, y + h), color, 2)
+
+ # Add label
+ label = f"{class_name.replace('_', ' ').title()} {i+1}"
+ label_size, _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
+ cv2.rectangle(vis_image, (x, y - label_size[1] - 4),
+ (x + label_size[0], y), color, -1)
+ cv2.putText(vis_image, label, (x, y - 2),
+ cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
+ else:
+ # Original YOLOv8 visualization
+ if bubbles is None:
+ bubbles = self.detect_bubbles(image_path)
+
+ # Draw bounding boxes
+ for i, (x, y, w, h) in enumerate(bubbles):
+ # Draw rectangle
+ color = (0, 255, 0) # Green
+ thickness = 2
+ cv2.rectangle(vis_image, (x, y), (x + w, y + h), color, thickness)
+
+ # Add label
+ label = f"Bubble {i+1}"
+ label_size, _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
+ cv2.rectangle(vis_image, (x, y - label_size[1] - 4), (x + label_size[0], y), color, -1)
+ cv2.putText(vis_image, label, (x, y - 2), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
+
+ # Save if output path provided
+ if output_path:
+ cv2.imwrite(output_path, vis_image)
+ logger.info(f"💾 Visualization saved to: {output_path}")
+
+ return vis_image
+
+ def convert_to_onnx(self, model_path: str, output_path: str = None) -> bool:
+ """
+ Convert a YOLOv8 or RT-DETR model to ONNX format.
+
+ Args:
+ model_path: Path to model file or 'rtdetr' for loaded RT-DETR
+ output_path: Path for ONNX output (auto-generated if None)
+
+ Returns:
+ True if conversion successful, False otherwise
+ """
+ try:
+ logger.info(f"🔄 Converting {model_path} to ONNX...")
+
+ # Generate output path if not provided
+ if output_path is None:
+ if model_path == 'rtdetr' and self.rtdetr_loaded:
+ base_name = 'rtdetr_comic'
+ else:
+ base_name = Path(model_path).stem
+ output_path = os.path.join(self.cache_dir, f"{base_name}.onnx")
+
+ # Check if already exists
+ if os.path.exists(output_path) and not os.environ.get('FORCE_ONNX_REBUILD', 'false').lower() == 'true':
+ logger.info(f"✅ ONNX model already exists: {output_path}")
+ return True
+
+ # Handle RT-DETR conversion
+ if model_path == 'rtdetr' and self.rtdetr_loaded:
+ if not TORCH_AVAILABLE:
+ logger.error("PyTorch required for RT-DETR ONNX conversion")
+ return False
+
+ # RT-DETR specific conversion
+ self.rtdetr_model.eval()
+
+ # Create dummy input (pixel values): BxCxHxW
+ dummy_input = torch.randn(1, 3, 640, 640)
+ if self.device == 'cuda':
+ dummy_input = dummy_input.to('cuda')
+
+ # Wrap the model to return only tensors (logits, pred_boxes)
+ class _RTDetrExportWrapper(torch.nn.Module):
+ def __init__(self, mdl):
+ super().__init__()
+ self.mdl = mdl
+ def forward(self, images):
+ out = self.mdl(pixel_values=images)
+ # Handle dict/ModelOutput/tuple outputs
+ logits = None
+ boxes = None
+ try:
+ if isinstance(out, dict):
+ logits = out.get('logits', None)
+ boxes = out.get('pred_boxes', out.get('boxes', None))
+ else:
+ logits = getattr(out, 'logits', None)
+ boxes = getattr(out, 'pred_boxes', getattr(out, 'boxes', None))
+ except Exception:
+ pass
+ if (logits is None or boxes is None) and isinstance(out, (tuple, list)) and len(out) >= 2:
+ logits, boxes = out[0], out[1]
+ return logits, boxes
+
+ wrapper = _RTDetrExportWrapper(self.rtdetr_model)
+ if self.device == 'cuda':
+ wrapper = wrapper.to('cuda')
+
+ # Try PyTorch 2.x dynamo_export first (more tolerant of newer aten ops)
+ try:
+ success = False
+ try:
+ from torch.onnx import dynamo_export
+ try:
+ exp = dynamo_export(wrapper, dummy_input)
+ except TypeError:
+ # Older PyTorch dynamo_export may not support this calling convention
+ exp = dynamo_export(wrapper, dummy_input)
+ # exp may have save(); otherwise, it may expose model_proto
+ try:
+ exp.save(output_path) # type: ignore
+ success = True
+ except Exception:
+ try:
+ import onnx as _onnx
+ _onnx.save(exp.model_proto, output_path) # type: ignore
+ success = True
+ except Exception as _se:
+ logger.warning(f"dynamo_export produced model but could not save: {_se}")
+ except Exception as de:
+ logger.warning(f"dynamo_export failed; falling back to legacy exporter: {de}")
+ if success:
+ logger.info(f"✅ RT-DETR ONNX saved to: {output_path} (dynamo_export)")
+ return True
+ except Exception as de2:
+ logger.warning(f"dynamo_export path error: {de2}")
+
+ # Legacy exporter with opset fallback
+ last_err = None
+ for opset in [19, 18, 17, 16, 15, 14, 13]:
+ try:
+ torch.onnx.export(
+ wrapper,
+ dummy_input,
+ output_path,
+ export_params=True,
+ opset_version=opset,
+ do_constant_folding=True,
+ input_names=['pixel_values'],
+ output_names=['logits', 'boxes'],
+ dynamic_axes={
+ 'pixel_values': {0: 'batch', 2: 'height', 3: 'width'},
+ 'logits': {0: 'batch'},
+ 'boxes': {0: 'batch'}
+ }
+ )
+ logger.info(f"✅ RT-DETR ONNX saved to: {output_path} (opset {opset})")
+ return True
+ except Exception as _e:
+ last_err = _e
+ try:
+ msg = str(_e)
+ except Exception:
+ msg = ''
+ logger.warning(f"RT-DETR ONNX export failed at opset {opset}: {msg}")
+ continue
+
+ logger.error(f"All RT-DETR ONNX export attempts failed. Last error: {last_err}")
+ return False
+
+ # Handle YOLOv8 conversion - FIXED
+ elif YOLO_AVAILABLE and os.path.exists(model_path):
+ logger.info(f"Loading YOLOv8 model from: {model_path}")
+
+ # Load model
+ model = YOLO(model_path)
+
+ # Export to ONNX - this returns the path to the exported model
+ logger.info("Exporting to ONNX format...")
+ exported_path = model.export(format='onnx', imgsz=640, simplify=True)
+
+ # exported_path could be a string or Path object
+ exported_path = str(exported_path) if exported_path else None
+
+ if exported_path and os.path.exists(exported_path):
+ # Move to desired location if different
+ if exported_path != output_path:
+ import shutil
+ logger.info(f"Moving ONNX from {exported_path} to {output_path}")
+ shutil.move(exported_path, output_path)
+
+ logger.info(f"✅ YOLOv8 ONNX saved to: {output_path}")
+ return True
+ else:
+ # Fallback: check if it was created with expected name
+ expected_onnx = model_path.replace('.pt', '.onnx')
+ if os.path.exists(expected_onnx):
+ if expected_onnx != output_path:
+ import shutil
+ shutil.move(expected_onnx, output_path)
+ logger.info(f"✅ YOLOv8 ONNX saved to: {output_path}")
+ return True
+ else:
+ logger.error(f"ONNX export failed - no output file found")
+ return False
+
+ else:
+ logger.error(f"Cannot convert {model_path}: Model not found or dependencies missing")
+ return False
+
+ except Exception as e:
+ logger.error(f"Conversion failed: {e}")
+ # Avoid noisy full stack trace in production logs; return False gracefully
+ return False
+
+ def batch_detect(self, image_paths: List[str], **kwargs) -> Dict[str, List[Tuple[int, int, int, int]]]:
+ """
+ Detect bubbles in multiple images.
+
+ Args:
+ image_paths: List of image paths
+ **kwargs: Detection parameters (confidence, iou_threshold, max_detections, use_rtdetr)
+
+ Returns:
+ Dictionary mapping image paths to bubble lists
+ """
+ results = {}
+
+ for i, image_path in enumerate(image_paths):
+ logger.info(f"Processing image {i+1}/{len(image_paths)}: {os.path.basename(image_path)}")
+ bubbles = self.detect_bubbles(image_path, **kwargs)
+ results[image_path] = bubbles
+
+ return results
+
+ def unload(self, release_shared: bool = False):
+ """Release model resources held by this detector instance.
+ Args:
+ release_shared: If True, also clear class-level shared RT-DETR caches.
+ """
+ try:
+ # Release instance-level models and sessions
+ try:
+ if getattr(self, 'onnx_session', None) is not None:
+ self.onnx_session = None
+ except Exception:
+ pass
+ try:
+ if getattr(self, 'rtdetr_onnx_session', None) is not None:
+ self.rtdetr_onnx_session = None
+ except Exception:
+ pass
+ for attr in ['model', 'rtdetr_model', 'rtdetr_processor']:
+ try:
+ if hasattr(self, attr):
+ setattr(self, attr, None)
+ except Exception:
+ pass
+ for flag in ['model_loaded', 'rtdetr_loaded', 'rtdetr_onnx_loaded']:
+ try:
+ if hasattr(self, flag):
+ setattr(self, flag, False)
+ except Exception:
+ pass
+
+ # Optional: release shared caches
+ if release_shared:
+ try:
+ BubbleDetector._rtdetr_shared_model = None
+ BubbleDetector._rtdetr_shared_processor = None
+ BubbleDetector._rtdetr_loaded = False
+ except Exception:
+ pass
+
+ # Free CUDA cache and trigger GC
+ try:
+ if TORCH_AVAILABLE and torch is not None and torch.cuda.is_available():
+ torch.cuda.empty_cache()
+ except Exception:
+ pass
+ try:
+ import gc
+ gc.collect()
+ except Exception:
+ pass
+ except Exception:
+ # Best-effort only
+ pass
+
+ def get_bubble_masks(self, image_path: str, bubbles: List[Tuple[int, int, int, int]]) -> np.ndarray:
+ """
+ Create a mask image with bubble regions.
+
+ Args:
+ image_path: Path to original image
+ bubbles: List of bubble bounding boxes
+
+ Returns:
+ Binary mask with bubble regions as white (255)
+ """
+ image = cv2.imread(image_path)
+ if image is None:
+ return None
+
+ h, w = image.shape[:2]
+ mask = np.zeros((h, w), dtype=np.uint8)
+
+ # Fill bubble regions
+ for x, y, bw, bh in bubbles:
+ cv2.rectangle(mask, (x, y), (x + bw, y + bh), 255, -1)
+
+ return mask
+
+ def filter_bubbles_by_size(self, bubbles: List[Tuple[int, int, int, int]],
+ min_area: int = 100,
+ max_area: int = None) -> List[Tuple[int, int, int, int]]:
+ """
+ Filter bubbles by area.
+
+ Args:
+ bubbles: List of bubble bounding boxes
+ min_area: Minimum area in pixels
+ max_area: Maximum area in pixels (None for no limit)
+
+ Returns:
+ Filtered list of bubbles
+ """
+ filtered = []
+
+ for x, y, w, h in bubbles:
+ area = w * h
+ if area >= min_area and (max_area is None or area <= max_area):
+ filtered.append((x, y, w, h))
+
+ return filtered
+
+ def merge_overlapping_bubbles(self, bubbles: List[Tuple[int, int, int, int]],
+ overlap_threshold: float = 0.1) -> List[Tuple[int, int, int, int]]:
+ """
+ Merge overlapping bubble detections.
+
+ Args:
+ bubbles: List of bubble bounding boxes
+ overlap_threshold: Minimum overlap ratio to merge
+
+ Returns:
+ Merged list of bubbles
+ """
+ if not bubbles:
+ return []
+
+ # Convert to numpy array for easier manipulation
+ boxes = np.array([(x, y, x+w, y+h) for x, y, w, h in bubbles])
+
+ merged = []
+ used = set()
+
+ for i, box1 in enumerate(boxes):
+ if i in used:
+ continue
+
+ # Start with current box
+ x1, y1, x2, y2 = box1
+
+ # Check for overlaps with remaining boxes
+ for j in range(i + 1, len(boxes)):
+ if j in used:
+ continue
+
+ box2 = boxes[j]
+
+ # Calculate intersection
+ ix1 = max(x1, box2[0])
+ iy1 = max(y1, box2[1])
+ ix2 = min(x2, box2[2])
+ iy2 = min(y2, box2[3])
+
+ if ix1 < ix2 and iy1 < iy2:
+ # Calculate overlap ratio
+ intersection = (ix2 - ix1) * (iy2 - iy1)
+ area1 = (x2 - x1) * (y2 - y1)
+ area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
+ overlap = intersection / min(area1, area2)
+
+ if overlap >= overlap_threshold:
+ # Merge boxes
+ x1 = min(x1, box2[0])
+ y1 = min(y1, box2[1])
+ x2 = max(x2, box2[2])
+ y2 = max(y2, box2[3])
+ used.add(j)
+
+ merged.append((int(x1), int(y1), int(x2 - x1), int(y2 - y1)))
+
+ return merged
+
+ # ============================
+ # RT-DETR (ONNX) BACKEND
+ # ============================
+ def load_rtdetr_onnx_model(self, model_id: str = None, force_reload: bool = False) -> bool:
+ """
+ Load RT-DETR ONNX model using onnxruntime. Downloads detector.onnx and config.json
+ from the provided Hugging Face repo if not already cached.
+ """
+ if not ONNX_AVAILABLE:
+ logger.error("ONNX Runtime not available for RT-DETR ONNX backend")
+ return False
+ try:
+ # If singleton mode and already loaded, just attach shared session
+ try:
+ adv = (self.config or {}).get('manga_settings', {}).get('advanced', {}) if isinstance(self.config, dict) else {}
+ singleton = bool(adv.get('use_singleton_models', True))
+ except Exception:
+ singleton = True
+ if singleton and BubbleDetector._rtdetr_onnx_loaded and not force_reload and BubbleDetector._rtdetr_onnx_shared_session is not None:
+ self.rtdetr_onnx_session = BubbleDetector._rtdetr_onnx_shared_session
+ self.rtdetr_onnx_loaded = True
+ return True
+
+ repo = model_id or self.rtdetr_onnx_repo
+ try:
+ from huggingface_hub import hf_hub_download
+ except Exception as e:
+ logger.error(f"huggingface-hub required to fetch RT-DETR ONNX: {e}")
+ return False
+
+ # Ensure local models dir (use configured cache_dir directly: e.g., 'models')
+ cache_dir = self.cache_dir
+ os.makedirs(cache_dir, exist_ok=True)
+
+ # Download files into models/ and avoid symlinks so the file is visible there
+ try:
+ _ = hf_hub_download(repo_id=repo, filename='config.json', cache_dir=cache_dir, local_dir=cache_dir, local_dir_use_symlinks=False)
+ except Exception:
+ pass
+ onnx_fp = hf_hub_download(repo_id=repo, filename='detector.onnx', cache_dir=cache_dir, local_dir=cache_dir, local_dir_use_symlinks=False)
+ BubbleDetector._rtdetr_onnx_model_path = onnx_fp
+
+ # Pick providers: prefer CUDA if available; otherwise CPU. Do NOT use DML.
+ providers = ['CPUExecutionProvider']
+ try:
+ avail = ort.get_available_providers() if ONNX_AVAILABLE else []
+ if 'CUDAExecutionProvider' in avail:
+ providers = ['CUDAExecutionProvider', 'CPUExecutionProvider']
+ except Exception:
+ pass
+
+ # Session options with reduced memory arena and optional thread limiting in singleton mode
+ so = ort.SessionOptions()
+ try:
+ so.enable_mem_pattern = False
+ so.enable_cpu_mem_arena = False
+ except Exception:
+ pass
+ # If singleton models mode is enabled in config, limit ORT threading to reduce CPU spikes
+ try:
+ adv = (self.config or {}).get('manga_settings', {}).get('advanced', {}) if isinstance(self.config, dict) else {}
+ if bool(adv.get('use_singleton_models', True)):
+ so.intra_op_num_threads = 1
+ so.inter_op_num_threads = 1
+ try:
+ so.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL
+ except Exception:
+ pass
+ try:
+ so.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_BASIC
+ except Exception:
+ pass
+ except Exception:
+ pass
+
+ # Create session (serialize creation in singleton mode to avoid device storms)
+ if singleton:
+ with BubbleDetector._rtdetr_onnx_init_lock:
+ # Re-check after acquiring lock
+ if BubbleDetector._rtdetr_onnx_loaded and BubbleDetector._rtdetr_onnx_shared_session is not None and not force_reload:
+ self.rtdetr_onnx_session = BubbleDetector._rtdetr_onnx_shared_session
+ self.rtdetr_onnx_loaded = True
+ return True
+ sess = ort.InferenceSession(onnx_fp, providers=providers, sess_options=so)
+ BubbleDetector._rtdetr_onnx_shared_session = sess
+ BubbleDetector._rtdetr_onnx_loaded = True
+ BubbleDetector._rtdetr_onnx_providers = providers
+ self.rtdetr_onnx_session = sess
+ self.rtdetr_onnx_loaded = True
+ else:
+ self.rtdetr_onnx_session = ort.InferenceSession(onnx_fp, providers=providers, sess_options=so)
+ self.rtdetr_onnx_loaded = True
+ logger.info("✅ RT-DETR (ONNX) model ready")
+ return True
+ except Exception as e:
+ logger.error(f"Failed to load RT-DETR ONNX: {e}")
+ self.rtdetr_onnx_session = None
+ self.rtdetr_onnx_loaded = False
+ return False
+
+ def detect_with_rtdetr_onnx(self,
+ image_path: str = None,
+ image: np.ndarray = None,
+ confidence: float = 0.3,
+ return_all_bubbles: bool = False) -> Any:
+ """Detect using RT-DETR ONNX backend.
+ Returns bubbles list if return_all_bubbles else dict by classes similar to PyTorch path.
+ """
+ if not self.rtdetr_onnx_loaded or self.rtdetr_onnx_session is None:
+ logger.warning("RT-DETR ONNX not loaded")
+ return [] if return_all_bubbles else {'bubbles': [], 'text_bubbles': [], 'text_free': []}
+ try:
+ # Acquire image
+ if image_path is not None:
+ import cv2
+ image = cv2.imread(image_path)
+ if image is None:
+ raise RuntimeError(f"Failed to read image: {image_path}")
+ image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+ else:
+ if image is None:
+ raise RuntimeError("No image provided")
+ # Assume image is BGR np.ndarray if from OpenCV
+ try:
+ import cv2
+ image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+ except Exception:
+ image_rgb = image
+
+ # To PIL then resize 640x640 as in reference
+ from PIL import Image as _PILImage
+ pil_image = _PILImage.fromarray(image_rgb)
+ im_resized = pil_image.resize((640, 640))
+ arr = np.asarray(im_resized, dtype=np.float32) / 255.0
+ arr = np.transpose(arr, (2, 0, 1)) # (3,H,W)
+ im_data = arr[np.newaxis, ...]
+
+ w, h = pil_image.size
+ orig_size = np.array([[w, h]], dtype=np.int64)
+
+ # Run with a concurrency guard to prevent device hangs and limit memory usage
+ # Apply semaphore for ALL providers (not just DML) to control concurrency
+ providers = BubbleDetector._rtdetr_onnx_providers or []
+ def _do_run(session):
+ return session.run(None, {
+ 'images': im_data,
+ 'orig_target_sizes': orig_size
+ })
+
+ # Always use semaphore to limit concurrent RT-DETR calls
+ acquired = False
+ try:
+ BubbleDetector._rtdetr_onnx_sema.acquire()
+ acquired = True
+
+ # Special DML error handling
+ if 'DmlExecutionProvider' in providers:
+ try:
+ outputs = _do_run(self.rtdetr_onnx_session)
+ except Exception as dml_err:
+ msg = str(dml_err)
+ if '887A0005' in msg or '887A0006' in msg or 'Dml' in msg:
+ # Rebuild CPU session and retry once
+ try:
+ base_path = BubbleDetector._rtdetr_onnx_model_path
+ if base_path:
+ so = ort.SessionOptions()
+ so.enable_mem_pattern = False
+ so.enable_cpu_mem_arena = False
+ cpu_providers = ['CPUExecutionProvider']
+ # Serialize rebuild
+ with BubbleDetector._rtdetr_onnx_init_lock:
+ sess = ort.InferenceSession(base_path, providers=cpu_providers, sess_options=so)
+ BubbleDetector._rtdetr_onnx_shared_session = sess
+ BubbleDetector._rtdetr_onnx_providers = cpu_providers
+ self.rtdetr_onnx_session = sess
+ outputs = _do_run(self.rtdetr_onnx_session)
+ else:
+ raise
+ except Exception:
+ raise
+ else:
+ raise
+ else:
+ # Non-DML providers - just run directly
+ outputs = _do_run(self.rtdetr_onnx_session)
+ finally:
+ if acquired:
+ try:
+ BubbleDetector._rtdetr_onnx_sema.release()
+ except Exception:
+ pass
+
+ # outputs expected: labels, boxes, scores
+ labels, boxes, scores = outputs[:3]
+ if labels.ndim == 2 and labels.shape[0] == 1:
+ labels = labels[0]
+ if scores.ndim == 2 and scores.shape[0] == 1:
+ scores = scores[0]
+ if boxes.ndim == 3 and boxes.shape[0] == 1:
+ boxes = boxes[0]
+
+ # Apply NMS to remove duplicate detections
+ # Group detections by class and apply NMS per class
+ class_detections = {self.CLASS_BUBBLE: [], self.CLASS_TEXT_BUBBLE: [], self.CLASS_TEXT_FREE: []}
+
+ for lab, box, scr in zip(labels, boxes, scores):
+ if float(scr) < float(confidence):
+ continue
+ label_id = int(lab)
+ if label_id in class_detections:
+ x1, y1, x2, y2 = map(float, box)
+ class_detections[label_id].append((x1, y1, x2, y2, float(scr)))
+
+ # Apply NMS per class to remove duplicates
+ def compute_iou(box1, box2):
+ """Compute IoU between two boxes (x1, y1, x2, y2)"""
+ x1_1, y1_1, x2_1, y2_1 = box1[:4]
+ x1_2, y1_2, x2_2, y2_2 = box2[:4]
+
+ # Intersection
+ x_left = max(x1_1, x1_2)
+ y_top = max(y1_1, y1_2)
+ x_right = min(x2_1, x2_2)
+ y_bottom = min(y2_1, y2_2)
+
+ if x_right < x_left or y_bottom < y_top:
+ return 0.0
+
+ intersection = (x_right - x_left) * (y_bottom - y_top)
+
+ # Union
+ area1 = (x2_1 - x1_1) * (y2_1 - y1_1)
+ area2 = (x2_2 - x1_2) * (y2_2 - y1_2)
+ union = area1 + area2 - intersection
+
+ return intersection / union if union > 0 else 0.0
+
+ def apply_nms(boxes_with_scores, iou_threshold=0.45):
+ """Apply Non-Maximum Suppression"""
+ if not boxes_with_scores:
+ return []
+
+ # Sort by score (descending)
+ sorted_boxes = sorted(boxes_with_scores, key=lambda x: x[4], reverse=True)
+ keep = []
+
+ while sorted_boxes:
+ # Keep the box with highest score
+ current = sorted_boxes.pop(0)
+ keep.append(current)
+
+ # Remove boxes with high IoU
+ sorted_boxes = [box for box in sorted_boxes if compute_iou(current, box) < iou_threshold]
+
+ return keep
+
+ # Apply NMS and build final detections
+ detections = {'bubbles': [], 'text_bubbles': [], 'text_free': []}
+ bubbles_all = []
+
+ for class_id, boxes_list in class_detections.items():
+ nms_boxes = apply_nms(boxes_list, iou_threshold=self.default_iou_threshold)
+
+ for x1, y1, x2, y2, scr in nms_boxes:
+ bbox = (int(x1), int(y1), int(x2 - x1), int(y2 - y1))
+
+ if class_id == self.CLASS_BUBBLE:
+ detections['bubbles'].append(bbox)
+ bubbles_all.append(bbox)
+ elif class_id == self.CLASS_TEXT_BUBBLE:
+ detections['text_bubbles'].append(bbox)
+ bubbles_all.append(bbox)
+ elif class_id == self.CLASS_TEXT_FREE:
+ detections['text_free'].append(bbox)
+
+ return bubbles_all if return_all_bubbles else detections
+ except Exception as e:
+ logger.error(f"RT-DETR ONNX detection failed: {e}")
+ return [] if return_all_bubbles else {'bubbles': [], 'text_bubbles': [], 'text_free': []}
+
+
+# Standalone utility functions
+def download_model_from_huggingface(repo_id: str = "ogkalu/comic-speech-bubble-detector-yolov8m",
+ filename: str = "comic-speech-bubble-detector-yolov8m.pt",
+ cache_dir: str = "models") -> str:
+ """
+ Download model from Hugging Face Hub.
+
+ Args:
+ repo_id: Hugging Face repository ID
+ filename: Model filename in the repository
+ cache_dir: Local directory to cache the model
+
+ Returns:
+ Path to downloaded model file
+ """
+ try:
+ from huggingface_hub import hf_hub_download
+
+ os.makedirs(cache_dir, exist_ok=True)
+
+ logger.info(f"📥 Downloading {filename} from {repo_id}...")
+
+ model_path = hf_hub_download(
+ repo_id=repo_id,
+ filename=filename,
+ cache_dir=cache_dir,
+ local_dir=cache_dir
+ )
+
+ logger.info(f"✅ Model downloaded to: {model_path}")
+ return model_path
+
+ except ImportError:
+ logger.error("huggingface-hub package required. Install with: pip install huggingface-hub")
+ return None
+ except Exception as e:
+ logger.error(f"Download failed: {e}")
+ return None
+
+
+def download_rtdetr_model(cache_dir: str = "models") -> bool:
+ """
+ Download RT-DETR model for advanced detection.
+
+ Args:
+ cache_dir: Directory to cache the model
+
+ Returns:
+ True if successful
+ """
+ if not TRANSFORMERS_AVAILABLE:
+ logger.error("Transformers required. Install with: pip install transformers")
+ return False
+
+ try:
+ logger.info("📥 Downloading RT-DETR model...")
+ from transformers import RTDetrForObjectDetection, RTDetrImageProcessor
+
+ # This will download and cache the model
+ processor = RTDetrImageProcessor.from_pretrained(
+ "ogkalu/comic-text-and-bubble-detector",
+ cache_dir=cache_dir
+ )
+ model = RTDetrForObjectDetection.from_pretrained(
+ "ogkalu/comic-text-and-bubble-detector",
+ cache_dir=cache_dir
+ )
+
+ logger.info("✅ RT-DETR model downloaded successfully")
+ return True
+
+ except Exception as e:
+ logger.error(f"Download failed: {e}")
+ return False
+
+
+# Example usage and testing
+if __name__ == "__main__":
+ import sys
+
+ # Create detector
+ detector = BubbleDetector()
+
+ if len(sys.argv) > 1:
+ if sys.argv[1] == "download":
+ # Download model from Hugging Face
+ model_path = download_model_from_huggingface()
+ if model_path:
+ print(f"YOLOv8 model downloaded to: {model_path}")
+
+ # Also download RT-DETR
+ if download_rtdetr_model():
+ print("RT-DETR model downloaded")
+
+ elif sys.argv[1] == "detect" and len(sys.argv) > 3:
+ # Detect bubbles in an image
+ model_path = sys.argv[2]
+ image_path = sys.argv[3]
+
+ # Load appropriate model
+ if 'rtdetr' in model_path.lower():
+ if detector.load_rtdetr_model():
+ # Use RT-DETR
+ results = detector.detect_with_rtdetr(image_path)
+ print(f"RT-DETR Detection:")
+ print(f" Empty bubbles: {len(results['bubbles'])}")
+ print(f" Text bubbles: {len(results['text_bubbles'])}")
+ print(f" Free text: {len(results['text_free'])}")
+ else:
+ if detector.load_model(model_path):
+ bubbles = detector.detect_bubbles(image_path, confidence=0.5)
+ print(f"YOLOv8 detected {len(bubbles)} bubbles:")
+ for i, (x, y, w, h) in enumerate(bubbles):
+ print(f" Bubble {i+1}: position=({x},{y}) size=({w}x{h})")
+
+ # Optionally visualize
+ if len(sys.argv) > 4:
+ output_path = sys.argv[4]
+ detector.visualize_detections(image_path, output_path=output_path,
+ use_rtdetr='rtdetr' in model_path.lower())
+
+ elif sys.argv[1] == "test-both" and len(sys.argv) > 2:
+ # Test both models
+ image_path = sys.argv[2]
+
+ # Load YOLOv8
+ yolo_path = "models/comic-speech-bubble-detector-yolov8m.pt"
+ if os.path.exists(yolo_path):
+ detector.load_model(yolo_path)
+ yolo_bubbles = detector.detect_bubbles(image_path, use_rtdetr=False)
+ print(f"YOLOv8: {len(yolo_bubbles)} bubbles")
+
+ # Load RT-DETR
+ if detector.load_rtdetr_model():
+ rtdetr_bubbles = detector.detect_bubbles(image_path, use_rtdetr=True)
+ print(f"RT-DETR: {len(rtdetr_bubbles)} bubbles")
+
+ else:
+ print("Usage:")
+ print(" python bubble_detector.py download")
+ print(" python bubble_detector.py detect [output_path]")
+ print(" python bubble_detector.py test-both ")
+
+ else:
+ print("Bubble Detector Module (YOLOv8 + RT-DETR)")
+ print("Usage:")
+ print(" python bubble_detector.py download")
+ print(" python bubble_detector.py detect [output_path]")
+ print(" python bubble_detector.py test-both ")
diff --git a/hyphen_textwrap.py b/hyphen_textwrap.py
new file mode 100644
index 0000000000000000000000000000000000000000..443a66d5e0a4bea562973d9c7b6c18bbd37ac6f4
--- /dev/null
+++ b/hyphen_textwrap.py
@@ -0,0 +1,508 @@
+# modified textwrap module to add hyphens whenever it breaks a long word
+# https://github.com/python/cpython/blob/main/Lib/textwrap.py
+
+"""Text wrapping and filling with improved hyphenation support.
+
+This module is adapted from comic-translate's enhanced textwrap implementation.
+It provides better hyphenation behavior when breaking long words across lines.
+"""
+
+# Copyright (C) 1999-2001 Gregory P. Ward.
+# Copyright (C) 2002, 2003 Python Software Foundation.
+# Written by Greg Ward
+
+import re
+
+__all__ = ['TextWrapper', 'wrap', 'fill', 'dedent', 'indent', 'shorten']
+
+# Hardcode the recognized whitespace characters to the US-ASCII
+# whitespace characters. The main reason for doing this is that
+# some Unicode spaces (like \u00a0) are non-breaking whitespaces.
+_whitespace = '\t\n\x0b\x0c\r '
+
+class TextWrapper:
+ """
+ Object for wrapping/filling text. The public interface consists of
+ the wrap() and fill() methods; the other methods are just there for
+ subclasses to override in order to tweak the default behaviour.
+ If you want to completely replace the main wrapping algorithm,
+ you'll probably have to override _wrap_chunks().
+
+ Several instance attributes control various aspects of wrapping:
+ width (default: 70)
+ the maximum width of wrapped lines (unless break_long_words
+ is false)
+ initial_indent (default: "")
+ string that will be prepended to the first line of wrapped
+ output. Counts towards the line's width.
+ subsequent_indent (default: "")
+ string that will be prepended to all lines save the first
+ of wrapped output; also counts towards each line's width.
+ expand_tabs (default: true)
+ Expand tabs in input text to spaces before further processing.
+ Each tab will become 0 .. 'tabsize' spaces, depending on its position
+ in its line. If false, each tab is treated as a single character.
+ tabsize (default: 8)
+ Expand tabs in input text to 0 .. 'tabsize' spaces, unless
+ 'expand_tabs' is false.
+ replace_whitespace (default: true)
+ Replace all whitespace characters in the input text by spaces
+ after tab expansion. Note that if expand_tabs is false and
+ replace_whitespace is true, every tab will be converted to a
+ single space!
+ fix_sentence_endings (default: false)
+ Ensure that sentence-ending punctuation is always followed
+ by two spaces. Off by default because the algorithm is
+ (unavoidably) imperfect.
+ break_long_words (default: true)
+ Break words longer than 'width'. If false, those words will not
+ be broken, and some lines might be longer than 'width'.
+ break_on_hyphens (default: true)
+ Allow breaking hyphenated words. If true, wrapping will occur
+ preferably on whitespaces and right after hyphens part of
+ compound words.
+ drop_whitespace (default: true)
+ Drop leading and trailing whitespace from lines.
+ max_lines (default: None)
+ Truncate wrapped lines.
+ placeholder (default: ' [...]')
+ Append to the last line of truncated text.
+ hyphenate_broken_words (default: True)
+ Add hyphens when breaking long words across lines.
+ """
+
+ unicode_whitespace_trans = dict.fromkeys(map(ord, _whitespace), ord(' '))
+
+ # This funky little regex is just the trick for splitting
+ # text up into word-wrappable chunks. E.g.
+ # "Hello there -- you goof-ball, use the -b option!"
+ # splits into
+ # Hello/ /there/ /--/ /you/ /goof-/ball,/ /use/ /the/ /-b/ /option!
+ # (after stripping out empty strings).
+ word_punct = r'[\w!"\'\&.,?]'
+ letter = r'[^\d\W]'
+ whitespace = r'[%s]' % re.escape(_whitespace)
+ nowhitespace = '[^' + whitespace[1:]
+ wordsep_re = re.compile(r'''
+ ( # any whitespace
+ %(ws)s+
+ | # em-dash between words
+ (?<=%(wp)s) -{2,} (?=\w)
+ | # word, possibly hyphenated
+ %(nws)s+? (?:
+ # hyphenated word
+ -(?: (?<=%(lt)s{2}-) | (?<=%(lt)s-%(lt)s-))
+ (?= %(lt)s -? %(lt)s)
+ | # end of word
+ (?=%(ws)s|\Z)
+ | # em-dash
+ (?<=%(wp)s) (?=-{2,}\w)
+ )
+ )''' % {'wp': word_punct, 'lt': letter,
+ 'ws': whitespace, 'nws': nowhitespace},
+ re.VERBOSE)
+ del word_punct, letter, nowhitespace
+
+ # This less funky little regex just split on recognized spaces. E.g.
+ # "Hello there -- you goof-ball, use the -b option!"
+ # splits into
+ # Hello/ /there/ /--/ /you/ /goof-ball,/ /use/ /the/ /-b/ /option!/
+ wordsep_simple_re = re.compile(r'(%s+)' % whitespace)
+ del whitespace
+
+ # XXX this is not locale- or charset-aware -- string.lowercase
+ # is US-ASCII only (and therefore English-only)
+ sentence_end_re = re.compile(r'[a-z]' # lowercase letter
+ r'[\.\!\?]' # sentence-ending punct.
+ r'[\"\']?' # optional end-of-quote
+ r'\Z') # end of chunk
+
+ def __init__(self,
+ width=70,
+ initial_indent="",
+ subsequent_indent="",
+ expand_tabs=True,
+ replace_whitespace=True,
+ fix_sentence_endings=False,
+ break_long_words=True,
+ drop_whitespace=True,
+ break_on_hyphens=True,
+ hyphenate_broken_words=True,
+ tabsize=8,
+ *,
+ max_lines=None,
+ placeholder=' [...]'):
+ self.width = width
+ self.initial_indent = initial_indent
+ self.subsequent_indent = subsequent_indent
+ self.expand_tabs = expand_tabs
+ self.replace_whitespace = replace_whitespace
+ self.fix_sentence_endings = fix_sentence_endings
+ self.break_long_words = break_long_words
+ self.drop_whitespace = drop_whitespace
+ self.break_on_hyphens = break_on_hyphens
+ self.tabsize = tabsize
+ self.max_lines = max_lines
+ self.placeholder = placeholder
+ self.hyphenate_broken_words = hyphenate_broken_words
+
+
+ # -- Private methods -----------------------------------------------
+ # (possibly useful for subclasses to override)
+
+ def _munge_whitespace(self, text):
+ """_munge_whitespace(text : string) -> string
+
+ Munge whitespace in text: expand tabs and convert all other
+ whitespace characters to spaces. Eg. " foo\\tbar\\n\\nbaz"
+ becomes " foo bar baz".
+ """
+ if self.expand_tabs:
+ text = text.expandtabs(self.tabsize)
+ if self.replace_whitespace:
+ text = text.translate(self.unicode_whitespace_trans)
+ return text
+
+
+ def _split(self, text):
+ """_split(text : string) -> [string]
+
+ Split the text to wrap into indivisible chunks. Chunks are
+ not quite the same as words; see _wrap_chunks() for full
+ details. As an example, the text
+ Look, goof-ball -- use the -b option!
+ breaks into the following chunks:
+ 'Look,', ' ', 'goof-', 'ball', ' ', '--', ' ',
+ 'use', ' ', 'the', ' ', '-b', ' ', 'option!'
+ if break_on_hyphens is True, or in:
+ 'Look,', ' ', 'goof-ball', ' ', '--', ' ',
+ 'use', ' ', 'the', ' ', '-b', ' ', option!'
+ otherwise.
+ """
+ if self.break_on_hyphens is True:
+ chunks = self.wordsep_re.split(text)
+ else:
+ chunks = self.wordsep_simple_re.split(text)
+ chunks = [c for c in chunks if c]
+
+ return chunks
+
+ def _fix_sentence_endings(self, chunks):
+ """_fix_sentence_endings(chunks : [string])
+
+ Correct for sentence endings buried in 'chunks'. Eg. when the
+ original text contains "... foo.\\nBar ...", munge_whitespace()
+ and split() will convert that to [..., "foo.", " ", "Bar", ...]
+ which has one too few spaces; this method simply changes the one
+ space to two.
+ """
+ i = 0
+ patsearch = self.sentence_end_re.search
+ while i < len(chunks)-1:
+ if chunks[i+1] == " " and patsearch(chunks[i]):
+ chunks[i+1] = " "
+ i += 2
+ else:
+ i += 1
+
+ def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
+ """_handle_long_word(chunks : [string],
+ cur_line : [string],
+ cur_len : int, width : int)
+
+ Handle a chunk of text (most likely a word, not whitespace) that
+ is too long to fit in any line.
+ """
+ # Figure out when indent is larger than the specified width, and make
+ # sure at least one character is stripped off on every pass
+ if width < 1:
+ space_left = 1
+ else:
+ space_left = width - cur_len
+
+ # If we're allowed to break long words, then do so: put as much
+ # of the next chunk onto the current line as will fit.
+ if self.break_long_words:
+ end = space_left
+ chunk = reversed_chunks[-1]
+ if self.break_on_hyphens and len(chunk) > space_left:
+ # break after last hyphen, but only if there are
+ # non-hyphens before it
+ hyphen = chunk.rfind('-', 0, space_left)
+ if hyphen > 0 and any(c != '-' for c in chunk[:hyphen]):
+ end = hyphen + 1
+
+ if chunk[:end]:
+ cur_line.append(chunk[:end])
+ # Now adds a hyphen whenever a long word is split to the next line
+ # unless certain chracters already exists at the split
+ if self.hyphenate_broken_words and chunk[:end][-1] not in ['-','.',',']:
+ cur_line.append('-')
+ reversed_chunks[-1] = chunk[end:]
+
+ # Otherwise, we have to preserve the long word intact. Only add
+ # it to the current line if there's nothing already there --
+ # that minimizes how much we violate the width constraint.
+ elif not cur_line:
+ cur_line.append(reversed_chunks.pop())
+
+ # If we're not allowed to break long words, and there's already
+ # text on the current line, do nothing. Next time through the
+ # main loop of _wrap_chunks(), we'll wind up here again, but
+ # cur_len will be zero, so the next line will be entirely
+ # devoted to the long word that we can't handle right now.
+
+ def _wrap_chunks(self, chunks):
+ """_wrap_chunks(chunks : [string]) -> [string]
+
+ Wrap a sequence of text chunks and return a list of lines of
+ length 'self.width' or less. (If 'break_long_words' is false,
+ some lines may be longer than this.) Chunks correspond roughly
+ to words and the whitespace between them: each chunk is
+ indivisible (modulo 'break_long_words'), but a line break can
+ come between any two chunks. Chunks should not have internal
+ whitespace; ie. a chunk is either all whitespace or a "word".
+ Whitespace chunks will be removed from the beginning and end of
+ lines, but apart from that whitespace is preserved.
+ """
+ lines = []
+ if self.width <= 0:
+ raise ValueError("invalid width %r (must be > 0)" % self.width)
+ if self.max_lines is not None:
+ if self.max_lines > 1:
+ indent = self.subsequent_indent
+ else:
+ indent = self.initial_indent
+ if len(indent) + len(self.placeholder.lstrip()) > self.width:
+ raise ValueError("placeholder too large for max width")
+
+ # Arrange in reverse order so items can be efficiently popped
+ # from a stack of chucks.
+ chunks.reverse()
+
+ while chunks:
+
+ # Start the list of chunks that will make up the current line.
+ # cur_len is just the length of all the chunks in cur_line.
+ cur_line = []
+ cur_len = 0
+
+ # Figure out which static string will prefix this line.
+ if lines:
+ indent = self.subsequent_indent
+ else:
+ indent = self.initial_indent
+
+ # Maximum width for this line.
+ width = self.width - len(indent)
+
+ # First chunk on line is whitespace -- drop it, unless this
+ # is the very beginning of the text (ie. no lines started yet).
+ if self.drop_whitespace and chunks[-1].strip() == '' and lines:
+ del chunks[-1]
+
+ while chunks:
+ l = len(chunks[-1])
+
+ # Can at least squeeze this chunk onto the current line.
+ if cur_len + l <= width:
+ cur_line.append(chunks.pop())
+ cur_len += l
+
+ # Nope, this line is full.
+ else:
+ break
+
+ # The current line is full, and the next chunk is too big to
+ # fit on *any* line (not just this one).
+ if chunks and len(chunks[-1]) > width:
+ self._handle_long_word(chunks, cur_line, cur_len, width)
+ cur_len = sum(map(len, cur_line))
+
+ # If the last chunk on this line is all whitespace, drop it.
+ if self.drop_whitespace and cur_line and cur_line[-1].strip() == '':
+ cur_len -= len(cur_line[-1])
+ del cur_line[-1]
+
+ if cur_line:
+ if (self.max_lines is None or
+ len(lines) + 1 < self.max_lines or
+ (not chunks or
+ self.drop_whitespace and
+ len(chunks) == 1 and
+ not chunks[0].strip()) and cur_len <= width):
+ # Convert current line back to a string and store it in
+ # list of all lines (return value).
+ lines.append(indent + ''.join(cur_line))
+ else:
+ while cur_line:
+ if (cur_line[-1].strip() and
+ cur_len + len(self.placeholder) <= width):
+ cur_line.append(self.placeholder)
+ lines.append(indent + ''.join(cur_line))
+ break
+ cur_len -= len(cur_line[-1])
+ del cur_line[-1]
+ else:
+ if lines:
+ prev_line = lines[-1].rstrip()
+ if (len(prev_line) + len(self.placeholder) <=
+ self.width):
+ lines[-1] = prev_line + self.placeholder
+ break
+ lines.append(indent + self.placeholder.lstrip())
+ break
+
+ return lines
+
+ def _split_chunks(self, text):
+ text = self._munge_whitespace(text)
+ return self._split(text)
+
+ # -- Public interface ----------------------------------------------
+
+ def wrap(self, text):
+ """wrap(text : string) -> [string]
+
+ Reformat the single paragraph in 'text' so it fits in lines of
+ no more than 'self.width' columns, and return a list of wrapped
+ lines. Tabs in 'text' are expanded with string.expandtabs(),
+ and all other whitespace characters (including newline) are
+ converted to space.
+ """
+ chunks = self._split_chunks(text)
+ if self.fix_sentence_endings:
+ self._fix_sentence_endings(chunks)
+ return self._wrap_chunks(chunks)
+
+ def fill(self, text):
+ """fill(text : string) -> string
+
+ Reformat the single paragraph in 'text' to fit in lines of no
+ more than 'self.width' columns, and return a new string
+ containing the entire wrapped paragraph.
+ """
+ return "\n".join(self.wrap(text))
+
+
+# -- Convenience interface ---------------------------------------------
+
+def wrap(text, width=70, **kwargs):
+ """Wrap a single paragraph of text, returning a list of wrapped lines.
+
+ Reformat the single paragraph in 'text' so it fits in lines of no
+ more than 'width' columns, and return a list of wrapped lines. By
+ default, tabs in 'text' are expanded with string.expandtabs(), and
+ all other whitespace characters (including newline) are converted to
+ space. See TextWrapper class for available keyword args to customize
+ wrapping behaviour.
+ """
+ w = TextWrapper(width=width, **kwargs)
+ return w.wrap(text)
+
+def fill(text, width=70, **kwargs):
+ """Fill a single paragraph of text, returning a new string.
+
+ Reformat the single paragraph in 'text' to fit in lines of no more
+ than 'width' columns, and return a new string containing the entire
+ wrapped paragraph. As with wrap(), tabs are expanded and other
+ whitespace characters converted to space. See TextWrapper class for
+ available keyword args to customize wrapping behaviour.
+ """
+ w = TextWrapper(width=width, **kwargs)
+ return w.fill(text)
+
+def shorten(text, width, **kwargs):
+ """Collapse and truncate the given text to fit in the given width.
+
+ The text first has its whitespace collapsed. If it then fits in
+ the *width*, it is returned as is. Otherwise, as many words
+ as possible are joined and then the placeholder is appended::
+
+ >>> textwrap.shorten("Hello world!", width=12)
+ 'Hello world!'
+ >>> textwrap.shorten("Hello world!", width=11)
+ 'Hello [...]'
+ """
+ w = TextWrapper(width=width, max_lines=1, **kwargs)
+ return w.fill(' '.join(text.strip().split()))
+
+
+# -- Loosely related functionality -------------------------------------
+
+_whitespace_only_re = re.compile('^[ \t]+$', re.MULTILINE)
+_leading_whitespace_re = re.compile('(^[ \t]*)(?:[^ \t\n])', re.MULTILINE)
+
+def dedent(text):
+ """Remove any common leading whitespace from every line in `text`.
+
+ This can be used to make triple-quoted strings line up with the left
+ edge of the display, while still presenting them in the source code
+ in indented form.
+
+ Note that tabs and spaces are both treated as whitespace, but they
+ are not equal: the lines " hello" and "\\thello" are
+ considered to have no common leading whitespace.
+
+ Entirely blank lines are normalized to a newline character.
+ """
+ # Look for the longest leading string of spaces and tabs common to
+ # all lines.
+ margin = None
+ text = _whitespace_only_re.sub('', text)
+ indents = _leading_whitespace_re.findall(text)
+ for indent in indents:
+ if margin is None:
+ margin = indent
+
+ # Current line more deeply indented than previous winner:
+ # no change (previous winner is still on top).
+ elif indent.startswith(margin):
+ pass
+
+ # Current line consistent with and no deeper than previous winner:
+ # it's the new winner.
+ elif margin.startswith(indent):
+ margin = indent
+
+ # Find the largest common whitespace between current line and previous
+ # winner.
+ else:
+ for i, (x, y) in enumerate(zip(margin, indent)):
+ if x != y:
+ margin = margin[:i]
+ break
+
+ # sanity check (testing/debugging only)
+ if 0 and margin:
+ for line in text.split("\n"):
+ assert not line or line.startswith(margin), \
+ "line = %r, margin = %r" % (line, margin)
+
+ if margin:
+ text = re.sub(r'(?m)^' + margin, '', text)
+ return text
+
+
+def indent(text, prefix, predicate=None):
+ """Adds 'prefix' to the beginning of selected lines in 'text'.
+
+ If 'predicate' is provided, 'prefix' will only be added to the lines
+ where 'predicate(line)' is True. If 'predicate' is not provided,
+ it will default to adding 'prefix' to all non-empty lines that do not
+ consist solely of whitespace characters.
+ """
+ if predicate is None:
+ # str.splitlines(True) doesn't produce empty string.
+ # ''.splitlines(True) => []
+ # 'foo\n'.splitlines(True) => ['foo\n']
+ # So we can use just `not s.isspace()` here.
+ predicate = lambda s: not s.isspace()
+
+ prefixed_lines = []
+ for line in text.splitlines(True):
+ if predicate(line):
+ prefixed_lines.append(prefix)
+ prefixed_lines.append(line)
+
+ return ''.join(prefixed_lines)
diff --git a/local_inpainter.py b/local_inpainter.py
new file mode 100644
index 0000000000000000000000000000000000000000..1d788627b8c3d8ebf924ee27aaa0f3a8e2e8b9e3
--- /dev/null
+++ b/local_inpainter.py
@@ -0,0 +1,2531 @@
+"""
+Local inpainting implementation - COMPATIBLE VERSION WITH JIT SUPPORT
+Maintains full backward compatibility while adding proper JIT model support
+"""
+import os
+import sys
+import json
+import numpy as np
+import cv2
+from typing import Optional, List, Tuple, Dict, Any
+import logging
+import traceback
+import re
+import hashlib
+import urllib.request
+from pathlib import Path
+import threading
+import time
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+# Check if we're running in a frozen environment
+IS_FROZEN = getattr(sys, 'frozen', False)
+if IS_FROZEN:
+ MEIPASS = sys._MEIPASS
+ os.environ['TORCH_HOME'] = MEIPASS
+ os.environ['TRANSFORMERS_CACHE'] = os.path.join(MEIPASS, 'transformers')
+ os.environ['HF_HOME'] = os.path.join(MEIPASS, 'huggingface')
+ logger.info(f"Running in frozen environment: {MEIPASS}")
+
+# Environment variables for ONNX
+ONNX_CACHE_DIR = os.environ.get('ONNX_CACHE_DIR', 'models')
+AUTO_CONVERT_TO_ONNX = os.environ.get('AUTO_CONVERT_TO_ONNX', 'false').lower() == 'true'
+SKIP_ONNX_FOR_CKPT = os.environ.get('SKIP_ONNX_FOR_CKPT', 'true').lower() == 'true'
+FORCE_ONNX_REBUILD = os.environ.get('FORCE_ONNX_REBUILD', 'false').lower() == 'true'
+CACHE_DIR = os.environ.get('MODEL_CACHE_DIR', os.path.expanduser('~/.cache/inpainting'))
+
+# Modified import handling for frozen environment
+TORCH_AVAILABLE = False
+torch = None
+nn = None
+F = None
+BaseModel = object
+
+try:
+ import onnxruntime_extensions
+ ONNX_EXTENSIONS_AVAILABLE = True
+except ImportError:
+ ONNX_EXTENSIONS_AVAILABLE = False
+ logger.info("ONNX Runtime Extensions not available - FFT models won't work in ONNX")
+
+if IS_FROZEN:
+ # In frozen environment, try harder to import
+ try:
+ import torch
+ import torch.nn as nn
+ import torch.nn.functional as F
+ TORCH_AVAILABLE = True
+ BaseModel = nn.Module
+ logger.info("✓ PyTorch loaded in frozen environment")
+ except Exception as e:
+ logger.error(f"PyTorch not available in frozen environment: {e}")
+ logger.error("❌ Inpainting disabled - PyTorch is required")
+else:
+ # Normal environment
+ try:
+ import torch
+ import torch.nn as nn
+ import torch.nn.functional as F
+ TORCH_AVAILABLE = True
+ BaseModel = nn.Module
+ except ImportError:
+ TORCH_AVAILABLE = False
+ logger.error("PyTorch not available - inpainting disabled")
+
+# Configure ORT memory behavior before importing
+try:
+ os.environ.setdefault('ORT_DISABLE_MEMORY_ARENA', '1')
+except Exception:
+ pass
+# ONNX Runtime - usually works well in frozen environments
+ONNX_AVAILABLE = False
+try:
+ import onnx
+ import onnxruntime as ort
+ ONNX_AVAILABLE = True
+ logger.info("✓ ONNX Runtime available")
+except ImportError:
+ ONNX_AVAILABLE = False
+ logger.warning("ONNX Runtime not available")
+
+# Bubble detector - optional
+BUBBLE_DETECTOR_AVAILABLE = False
+try:
+ from bubble_detector import BubbleDetector
+ BUBBLE_DETECTOR_AVAILABLE = True
+ logger.info("✓ Bubble detector available")
+except ImportError:
+ logger.info("Bubble detector not available - basic inpainting will be used")
+
+
+# JIT Model URLs (for automatic download)
+LAMA_JIT_MODELS = {
+ 'lama': {
+ 'url': 'https://github.com/Sanster/models/releases/download/add_big_lama/big-lama.pt',
+ 'md5': 'e3aa4aaa15225a33ec84f9f4bc47e500',
+ 'name': 'BigLama'
+ },
+ 'anime': {
+ 'url': 'https://github.com/Sanster/models/releases/download/AnimeMangaInpainting/anime-manga-big-lama.pt',
+ 'md5': '29f284f36a0a510bcacf39ecf4c4d54f',
+ 'name': 'Anime-Manga BigLama'
+ },
+ 'lama_official': {
+ 'url': 'https://github.com/Sanster/models/releases/download/lama/lama.pt',
+ 'md5': '4b1a1de53b7a74e0ff9dd622834e8e1e',
+ 'name': 'LaMa Official'
+ },
+ 'aot': {
+ 'url': 'https://huggingface.co/ogkalu/aot-inpainting-jit/resolve/main/aot_traced.pt',
+ 'md5': '5ecdac562c1d56267468fc4fbf80db27',
+ 'name': 'AOT GAN'
+ },
+ 'aot_onnx': {
+ 'url': 'https://huggingface.co/ogkalu/aot-inpainting/resolve/main/aot.onnx',
+ 'md5': 'ffd39ed8e2a275869d3b49180d030f0d8b8b9c2c20ed0e099ecd207201f0eada',
+ 'name': 'AOT ONNX (Fast)',
+ 'is_onnx': True
+ },
+ 'lama_onnx': {
+ 'url': 'https://huggingface.co/Carve/LaMa-ONNX/resolve/main/lama_fp32.onnx',
+ 'md5': None, # Add MD5 if you want to verify
+ 'name': 'LaMa ONNX (Carve)',
+ 'is_onnx': True # Flag to indicate this is ONNX, not JIT
+ },
+ 'anime_onnx': {
+ 'url': 'https://huggingface.co/ogkalu/lama-manga-onnx-dynamic/resolve/main/lama-manga-dynamic.onnx',
+ 'md5': 'de31ffa5ba26916b8ea35319f6c12151ff9654d4261bccf0583a69bb095315f9',
+ 'name': 'Anime/Manga ONNX (Dynamic)',
+ 'is_onnx': True # Flag to indicate this is ONNX
+ }
+}
+
+
+def norm_img(img: np.ndarray) -> np.ndarray:
+ """Normalize image to [0, 1] range"""
+ if img.dtype == np.uint8:
+ return img.astype(np.float32) / 255.0
+ return img
+
+
+def get_cache_path_by_url(url: str) -> str:
+ """Get cache path for a model URL"""
+ os.makedirs(CACHE_DIR, exist_ok=True)
+ filename = os.path.basename(url)
+ return os.path.join(CACHE_DIR, filename)
+
+
+def download_model(url: str, md5: str = None) -> str:
+ """Download model if not cached"""
+ cache_path = get_cache_path_by_url(url)
+
+ if os.path.exists(cache_path):
+ logger.info(f"✅ Model already cached: {cache_path}")
+ return cache_path
+
+ logger.info(f"📥 Downloading model from {url}")
+
+ try:
+ urllib.request.urlretrieve(url, cache_path)
+ logger.info(f"✅ Model downloaded to: {cache_path}")
+ return cache_path
+ except Exception as e:
+ logger.error(f"❌ Download failed: {e}")
+ if os.path.exists(cache_path):
+ os.remove(cache_path)
+ raise
+
+
+class FFCInpaintModel(BaseModel): # Use BaseModel instead of nn.Module
+ """FFC model for LaMa inpainting - for checkpoint compatibility"""
+
+ def __init__(self):
+ if not TORCH_AVAILABLE:
+ # Initialize as a simple object when PyTorch is not available
+ super().__init__()
+ logger.warning("PyTorch not available - FFCInpaintModel initialized as placeholder")
+ self._pytorch_available = False
+ return
+
+ # Additional safety check for nn being None
+ if nn is None:
+ super().__init__()
+ logger.error("Neural network modules not available - FFCInpaintModel disabled")
+ self._pytorch_available = False
+ return
+
+ super().__init__()
+ self._pytorch_available = True
+
+ try:
+ # Encoder
+ self.model_1_ffc_convl2l = nn.Conv2d(4, 64, 7, padding=3)
+ self.model_1_bn_l = nn.BatchNorm2d(64)
+
+ self.model_2_ffc_convl2l = nn.Conv2d(64, 128, 3, padding=1)
+ self.model_2_bn_l = nn.BatchNorm2d(128)
+
+ self.model_3_ffc_convl2l = nn.Conv2d(128, 256, 3, padding=1)
+ self.model_3_bn_l = nn.BatchNorm2d(256)
+
+ self.model_4_ffc_convl2l = nn.Conv2d(256, 128, 3, padding=1)
+ self.model_4_ffc_convl2g = nn.Conv2d(256, 384, 3, padding=1)
+ self.model_4_bn_l = nn.BatchNorm2d(128)
+ self.model_4_bn_g = nn.BatchNorm2d(384)
+
+ # FFC blocks
+ for i in range(5, 23):
+ for conv_type in ['conv1', 'conv2']:
+ setattr(self, f'model_{i}_{conv_type}_ffc_convl2l', nn.Conv2d(128, 128, 3, padding=1))
+ setattr(self, f'model_{i}_{conv_type}_ffc_convl2g', nn.Conv2d(128, 384, 3, padding=1))
+ setattr(self, f'model_{i}_{conv_type}_ffc_convg2l', nn.Conv2d(384, 128, 3, padding=1))
+ setattr(self, f'model_{i}_{conv_type}_ffc_convg2g_conv1_0', nn.Conv2d(384, 192, 1))
+ setattr(self, f'model_{i}_{conv_type}_ffc_convg2g_conv1_1', nn.BatchNorm2d(192))
+ setattr(self, f'model_{i}_{conv_type}_ffc_convg2g_fu_conv_layer', nn.Conv2d(384, 384, 1))
+ setattr(self, f'model_{i}_{conv_type}_ffc_convg2g_fu_bn', nn.BatchNorm2d(384))
+ setattr(self, f'model_{i}_{conv_type}_ffc_convg2g_conv2', nn.Conv2d(192, 384, 1))
+ setattr(self, f'model_{i}_{conv_type}_bn_l', nn.BatchNorm2d(128))
+ setattr(self, f'model_{i}_{conv_type}_bn_g', nn.BatchNorm2d(384))
+
+ # Decoder
+ self.model_24 = nn.Conv2d(512, 256, 3, padding=1)
+ self.model_25 = nn.BatchNorm2d(256)
+
+ self.model_27 = nn.Conv2d(256, 128, 3, padding=1)
+ self.model_28 = nn.BatchNorm2d(128)
+
+ self.model_30 = nn.Conv2d(128, 64, 3, padding=1)
+ self.model_31 = nn.BatchNorm2d(64)
+
+ self.model_34 = nn.Conv2d(64, 3, 7, padding=3)
+
+ # Activation functions
+ self.relu = nn.ReLU(inplace=True)
+ self.tanh = nn.Tanh()
+
+ logger.info("FFCInpaintModel initialized successfully")
+
+ except Exception as e:
+ logger.error(f"Failed to initialize FFCInpaintModel: {e}")
+ self._pytorch_available = False
+ raise
+
+ def forward(self, image, mask):
+ if not self._pytorch_available:
+ logger.error("PyTorch not available for forward pass")
+ raise RuntimeError("PyTorch not available for forward pass")
+
+ if not TORCH_AVAILABLE or torch is None:
+ logger.error("PyTorch not available for forward pass")
+ raise RuntimeError("PyTorch not available for forward pass")
+
+ try:
+ x = torch.cat([image, mask], dim=1)
+
+ x = self.relu(self.model_1_bn_l(self.model_1_ffc_convl2l(x)))
+ x = self.relu(self.model_2_bn_l(self.model_2_ffc_convl2l(x)))
+ x = self.relu(self.model_3_bn_l(self.model_3_ffc_convl2l(x)))
+
+ x_l = self.relu(self.model_4_bn_l(self.model_4_ffc_convl2l(x)))
+ x_g = self.relu(self.model_4_bn_g(self.model_4_ffc_convl2g(x)))
+
+ for i in range(5, 23):
+ identity_l, identity_g = x_l, x_g
+ x_l, x_g = self._ffc_block(x_l, x_g, i, 'conv1')
+ x_l, x_g = self._ffc_block(x_l, x_g, i, 'conv2')
+ x_l = x_l + identity_l
+ x_g = x_g + identity_g
+
+ x = torch.cat([x_l, x_g], dim=1)
+ x = self.relu(self.model_25(self.model_24(x)))
+ x = self.relu(self.model_28(self.model_27(x)))
+ x = self.relu(self.model_31(self.model_30(x)))
+ x = self.tanh(self.model_34(x))
+
+ mask_3ch = mask.repeat(1, 3, 1, 1)
+ return x * mask_3ch + image * (1 - mask_3ch)
+
+ except Exception as e:
+ logger.error(f"Forward pass failed: {e}")
+ raise RuntimeError(f"Forward pass failed: {e}")
+
+ def _ffc_block(self, x_l, x_g, idx, conv_type):
+ if not self._pytorch_available:
+ raise RuntimeError("PyTorch not available for FFC block")
+
+ if not TORCH_AVAILABLE:
+ raise RuntimeError("PyTorch not available for FFC block")
+
+ try:
+ convl2l = getattr(self, f'model_{idx}_{conv_type}_ffc_convl2l')
+ convl2g = getattr(self, f'model_{idx}_{conv_type}_ffc_convl2g')
+ convg2l = getattr(self, f'model_{idx}_{conv_type}_ffc_convg2l')
+ convg2g_conv1 = getattr(self, f'model_{idx}_{conv_type}_ffc_convg2g_conv1_0')
+ convg2g_bn1 = getattr(self, f'model_{idx}_{conv_type}_ffc_convg2g_conv1_1')
+ fu_conv = getattr(self, f'model_{idx}_{conv_type}_ffc_convg2g_fu_conv_layer')
+ fu_bn = getattr(self, f'model_{idx}_{conv_type}_ffc_convg2g_fu_bn')
+ convg2g_conv2 = getattr(self, f'model_{idx}_{conv_type}_ffc_convg2g_conv2')
+ bn_l = getattr(self, f'model_{idx}_{conv_type}_bn_l')
+ bn_g = getattr(self, f'model_{idx}_{conv_type}_bn_g')
+
+ out_xl = convl2l(x_l) + convg2l(x_g)
+ out_xg = convl2g(x_l) + convg2g_conv2(self.relu(convg2g_bn1(convg2g_conv1(x_g)))) + self.relu(fu_bn(fu_conv(x_g)))
+
+ return self.relu(bn_l(out_xl)), self.relu(bn_g(out_xg))
+
+ except Exception as e:
+ logger.error(f"FFC block failed: {e}")
+ raise RuntimeError(f"FFC block failed: {e}")
+
+
+class LocalInpainter:
+ """Local inpainter with full backward compatibility"""
+
+ # MAINTAIN ORIGINAL SUPPORTED_METHODS for compatibility
+ SUPPORTED_METHODS = {
+ 'lama': ('LaMa Inpainting', FFCInpaintModel),
+ 'mat': ('MAT Inpainting', FFCInpaintModel),
+ 'aot': ('AOT GAN Inpainting', FFCInpaintModel),
+ 'aot_onnx': ('AOT ONNX (Fast)', FFCInpaintModel),
+ 'sd': ('Stable Diffusion Inpainting', FFCInpaintModel),
+ 'anime': ('Anime/Manga Inpainting', FFCInpaintModel),
+ 'anime_onnx': ('Anime ONNX (Fast)', FFCInpaintModel),
+ 'lama_official': ('Official LaMa', FFCInpaintModel),
+ }
+
+ def __init__(self, config_path="config.json"):
+ # Set thread limits early if environment indicates single-threaded mode
+ try:
+ if os.environ.get('OMP_NUM_THREADS') == '1':
+ # Already in single-threaded mode, ensure it's applied to this process
+ # Check if torch is available at module level before trying to use it
+ if TORCH_AVAILABLE and torch is not None:
+ try:
+ torch.set_num_threads(1)
+ except (RuntimeError, AttributeError):
+ pass
+ try:
+ import cv2
+ cv2.setNumThreads(1)
+ except (ImportError, AttributeError):
+ pass
+ except Exception:
+ pass
+
+ self.config_path = config_path
+ self.config = self._load_config()
+ self.model = None
+ self.model_loaded = False
+ self.current_method = None
+ self.use_opencv_fallback = False # FORCED DISABLED - No OpenCV fallback allowed
+ self.onnx_session = None
+ self.use_onnx = False
+ self.is_jit_model = False
+ self.pad_mod = 8
+
+ # Default tiling settings - OFF by default for most models
+ self.tiling_enabled = False
+ self.tile_size = 512
+ self.tile_overlap = 64
+
+ # ONNX-specific settings
+ self.onnx_model_loaded = False
+ self.onnx_input_size = None # Will be detected from model
+
+ # Quantization diagnostics flags
+ self.onnx_quantize_applied = False
+ self.torch_quantize_applied = False
+
+ # Bubble detection
+ self.bubble_detector = None
+ self.bubble_model_loaded = False
+
+ # Create directories
+ os.makedirs(ONNX_CACHE_DIR, exist_ok=True)
+ os.makedirs(CACHE_DIR, exist_ok=True)
+ logger.info(f"📁 ONNX cache directory: {ONNX_CACHE_DIR}")
+ logger.info(f" Contents: {os.listdir(ONNX_CACHE_DIR) if os.path.exists(ONNX_CACHE_DIR) else 'Directory does not exist'}")
+
+
+ # Check GPU availability safely
+ self.use_gpu = False
+ self.device = None
+
+ if TORCH_AVAILABLE and torch is not None:
+ try:
+ self.use_gpu = torch.cuda.is_available()
+ self.device = torch.device('cuda' if self.use_gpu else 'cpu')
+ if self.use_gpu:
+ logger.info(f"🚀 GPU: {torch.cuda.get_device_name(0)}")
+ else:
+ logger.info("💻 Using CPU")
+ except AttributeError:
+ # torch module exists but doesn't have cuda attribute
+ self.use_gpu = False
+ self.device = None
+ logger.info("⚠️ PyTorch incomplete - inpainting disabled")
+ else:
+ logger.info("⚠️ PyTorch not available - inpainting disabled")
+
+ # Quantization/precision toggle (off by default)
+ try:
+ adv_cfg = self.config.get('manga_settings', {}).get('advanced', {}) if isinstance(self.config, dict) else {}
+ # Track singleton mode from settings for thread limiting (deprecated - kept for compatibility)
+ self.singleton_mode = bool(adv_cfg.get('use_singleton_models', True))
+ env_quant = os.environ.get('MODEL_QUANTIZE', 'false').lower() == 'true'
+ self.quantize_enabled = bool(env_quant or adv_cfg.get('quantize_models', False))
+ # ONNX quantization is now strictly opt-in (config or env), decoupled from general quantize_models
+ self.onnx_quantize_enabled = bool(
+ adv_cfg.get('onnx_quantize', os.environ.get('ONNX_QUANTIZE', 'false').lower() == 'true')
+ )
+ self.torch_precision = str(adv_cfg.get('torch_precision', os.environ.get('TORCH_PRECISION', 'auto'))).lower()
+ logger.info(f"Quantization: {'ENABLED' if self.quantize_enabled else 'disabled'} for Local Inpainter; onnx_quantize={'on' if self.onnx_quantize_enabled else 'off'}; torch_precision={self.torch_precision}")
+ self.int8_enabled = bool(
+ adv_cfg.get('int8_quantize', False)
+ or adv_cfg.get('quantize_int8', False)
+ or os.environ.get('TORCH_INT8', 'false').lower() == 'true'
+ or self.torch_precision in ('int8', 'int8_dynamic')
+ )
+ logger.info(
+ f"Quantization: {'ENABLED' if self.quantize_enabled else 'disabled'} for Local Inpainter; "
+ f"onnx_quantize={'on' if self.onnx_quantize_enabled else 'off'}; "
+ f"torch_precision={self.torch_precision}; int8={'on' if self.int8_enabled else 'off'}"
+ )
+ except Exception:
+ self.quantize_enabled = False
+ self.onnx_quantize_enabled = False
+ self.torch_precision = 'auto'
+ self.int8_enabled = False
+
+ # HD strategy defaults (mirror of comic-translate behavior)
+ try:
+ adv_cfg = self.config.get('manga_settings', {}).get('advanced', {}) if isinstance(self.config, dict) else {}
+ except Exception:
+ adv_cfg = {}
+ try:
+ self.hd_strategy = str(os.environ.get('HD_STRATEGY', adv_cfg.get('hd_strategy', 'resize'))).lower()
+ except Exception:
+ self.hd_strategy = 'resize'
+ try:
+ self.hd_resize_limit = int(os.environ.get('HD_RESIZE_LIMIT', adv_cfg.get('hd_strategy_resize_limit', 1536)))
+ except Exception:
+ self.hd_resize_limit = 1536
+ try:
+ self.hd_crop_margin = int(os.environ.get('HD_CROP_MARGIN', adv_cfg.get('hd_strategy_crop_margin', 16)))
+ except Exception:
+ self.hd_crop_margin = 16
+ try:
+ self.hd_crop_trigger_size = int(os.environ.get('HD_CROP_TRIGGER', adv_cfg.get('hd_strategy_crop_trigger_size', 1024)))
+ except Exception:
+ self.hd_crop_trigger_size = 1024
+ logger.info(f"HD strategy: {self.hd_strategy} (resize_limit={self.hd_resize_limit}, crop_margin={self.hd_crop_margin}, crop_trigger={self.hd_crop_trigger_size})")
+
+ # Stop flag support
+ self.stop_flag = None
+ self._stopped = False
+ self.log_callback = None
+
+ # Initialize bubble detector if available
+ if BUBBLE_DETECTOR_AVAILABLE:
+ try:
+ self.bubble_detector = BubbleDetector()
+ logger.info("🗨️ Bubble detection available")
+ except:
+ self.bubble_detector = None
+ logger.info("🗨️ Bubble detection not available")
+
+ def _load_config(self):
+ try:
+ if self.config_path and os.path.exists(self.config_path):
+ with open(self.config_path, 'r', encoding='utf-8') as f:
+ content = f.read().strip()
+ if not content:
+ return {}
+ try:
+ return json.loads(content)
+ except json.JSONDecodeError:
+ # Likely a concurrent write; retry once after a short delay
+ try:
+ import time
+ time.sleep(0.05)
+ with open(self.config_path, 'r', encoding='utf-8') as f2:
+ return json.load(f2)
+ except Exception:
+ return {}
+ except Exception:
+ return {}
+ return {}
+
+ def _save_config(self):
+ # Don't save if config is empty (prevents purging)
+ if not getattr(self, 'config', None):
+ return
+ try:
+ # Load existing (best-effort)
+ full_config = {}
+ if self.config_path and os.path.exists(self.config_path):
+ try:
+ with open(self.config_path, 'r', encoding='utf-8') as f:
+ full_config = json.load(f)
+ except Exception as read_err:
+ logger.debug(f"Config read during save failed (non-critical): {read_err}")
+ full_config = {}
+ # Update
+ full_config.update(self.config)
+ # Atomic write: write to temp then replace
+ tmp_path = (self.config_path or 'config.json') + '.tmp'
+ with open(tmp_path, 'w', encoding='utf-8') as f:
+ json.dump(full_config, f, indent=2, ensure_ascii=False)
+ try:
+ os.replace(tmp_path, self.config_path or 'config.json')
+ except Exception as replace_err:
+ logger.debug(f"Config atomic replace failed, trying direct write: {replace_err}")
+ # Fallback to direct write
+ with open(self.config_path or 'config.json', 'w', encoding='utf-8') as f:
+ json.dump(full_config, f, indent=2, ensure_ascii=False)
+ except Exception as save_err:
+ # Never crash on config save, but log for debugging
+ logger.debug(f"Config save failed (non-critical): {save_err}")
+ pass
+
+ def set_stop_flag(self, stop_flag):
+ """Set the stop flag for checking interruptions"""
+ self.stop_flag = stop_flag
+ self._stopped = False
+
+ def set_log_callback(self, log_callback):
+ """Set log callback for GUI integration"""
+ self.log_callback = log_callback
+
+ def _check_stop(self) -> bool:
+ """Check if stop has been requested"""
+ if self._stopped:
+ return True
+ if self.stop_flag and self.stop_flag.is_set():
+ self._stopped = True
+ return True
+ # Check global manga translator cancellation
+ try:
+ from manga_translator import MangaTranslator
+ if MangaTranslator.is_globally_cancelled():
+ self._stopped = True
+ return True
+ except Exception:
+ pass
+ return False
+
+ def _log(self, message: str, level: str = "info"):
+ """Log message with stop suppression"""
+ # Suppress logs when stopped (allow only essential stop confirmation messages)
+ if self._check_stop():
+ essential_stop_keywords = [
+ "⏹️ Translation stopped by user",
+ "⏹️ Inpainting stopped",
+ "cleanup", "🧹"
+ ]
+ if not any(keyword in message for keyword in essential_stop_keywords):
+ return
+
+ if self.log_callback:
+ self.log_callback(message, level)
+ else:
+ logger.info(message) if level == 'info' else getattr(logger, level, logger.info)(message)
+
+ def reset_stop_flags(self):
+ """Reset stop flags when starting new processing"""
+ self._stopped = False
+
+ def convert_to_onnx(self, model_path: str, method: str) -> Optional[str]:
+ """Convert a PyTorch model to ONNX format with FFT handling via custom operators"""
+ if not ONNX_AVAILABLE:
+ logger.warning("ONNX not available, skipping conversion")
+ return None
+
+ try:
+ # Generate ONNX path
+ model_name = os.path.basename(model_path).replace('.pt', '')
+ onnx_path = os.path.join(ONNX_CACHE_DIR, f"{model_name}_{method}.onnx")
+
+ # Check if ONNX already exists
+ if os.path.exists(onnx_path) and not FORCE_ONNX_REBUILD:
+ logger.info(f"✅ ONNX model already exists: {onnx_path}")
+ return onnx_path
+
+ logger.info(f"🔄 Converting {method} model to ONNX...")
+
+ # The model should already be loaded at this point
+ if not self.model_loaded or self.current_method != method:
+ logger.error("Model not loaded for ONNX conversion")
+ return None
+
+ # Create dummy inputs
+ dummy_image = torch.randn(1, 3, 512, 512).to(self.device)
+ dummy_mask = torch.randn(1, 1, 512, 512).to(self.device)
+
+ # For FFT models, we can't convert directly
+ fft_models = ['lama', 'anime', 'lama_official']
+ if method in fft_models:
+ logger.warning(f"⚠️ {method.upper()} uses FFT operations that cannot be exported")
+ return None # Just return None, don't suggest Carve
+
+ # Standard export for non-FFT models
+ try:
+ torch.onnx.export(
+ self.model,
+ (dummy_image, dummy_mask),
+ onnx_path,
+ export_params=True,
+ opset_version=13,
+ do_constant_folding=True,
+ input_names=['image', 'mask'],
+ output_names=['output'],
+ dynamic_axes={
+ 'image': {0: 'batch', 2: 'height', 3: 'width'},
+ 'mask': {0: 'batch', 2: 'height', 3: 'width'},
+ 'output': {0: 'batch', 2: 'height', 3: 'width'}
+ }
+ )
+ logger.info(f"✅ ONNX model saved to: {onnx_path}")
+ return onnx_path
+
+ except torch.onnx.errors.UnsupportedOperatorError as e:
+ logger.error(f"❌ Unsupported operator: {e}")
+ return None
+
+ except Exception as e:
+ logger.error(f"❌ ONNX conversion failed: {e}")
+ logger.error(traceback.format_exc())
+ return None
+
+ def load_onnx_model(self, onnx_path: str) -> bool:
+ """Load an ONNX model with custom operator support"""
+ if not ONNX_AVAILABLE:
+ logger.error("ONNX Runtime not available")
+ return False
+
+ # Check if this exact ONNX model is already loaded
+ if (self.onnx_session is not None and
+ hasattr(self, 'current_onnx_path') and
+ self.current_onnx_path == onnx_path):
+ logger.debug(f"✅ ONNX model already loaded: {onnx_path}")
+ return True
+
+ try:
+ # Don't log here if we already logged in load_model
+ logger.debug(f"📦 ONNX Runtime loading: {onnx_path}")
+
+ # Store the path for later checking
+ self.current_onnx_path = onnx_path
+
+ # Check if this is a Carve model (fixed 512x512)
+ is_carve_model = "lama_fp32" in onnx_path or "carve" in onnx_path.lower()
+ if is_carve_model:
+ logger.info("📦 Detected Carve ONNX model (fixed 512x512 input)")
+ self.onnx_fixed_size = (512, 512)
+ else:
+ self.onnx_fixed_size = None
+
+ # Standard ONNX loading: prefer CUDA if available; otherwise CPU. Do NOT use DML.
+ try:
+ avail = ort.get_available_providers() if ONNX_AVAILABLE else []
+ except Exception:
+ avail = []
+ if 'CUDAExecutionProvider' in avail:
+ providers = ['CUDAExecutionProvider', 'CPUExecutionProvider']
+ else:
+ providers = ['CPUExecutionProvider']
+ session_path = onnx_path
+ try:
+ fname_lower = os.path.basename(onnx_path).lower()
+ except Exception:
+ fname_lower = str(onnx_path).lower()
+
+ # Device-aware policy for LaMa-type ONNX (Carve or contains 'lama')
+ is_lama_model = is_carve_model or ('lama' in fname_lower)
+ if is_lama_model:
+ base = os.path.splitext(onnx_path)[0]
+ if self.use_gpu:
+ # Prefer FP16 on CUDA
+ fp16_path = base + '.fp16.onnx'
+ if (not os.path.exists(fp16_path)) or FORCE_ONNX_REBUILD:
+ try:
+ import onnx as _onnx
+ try:
+ from onnxruntime_tools.transformers.float16 import convert_float_to_float16 as _to_fp16
+ except Exception:
+ try:
+ from onnxconverter_common import float16
+ def _to_fp16(m, keep_io_types=True):
+ return float16.convert_float_to_float16(m, keep_io_types=keep_io_types)
+ except Exception:
+ _to_fp16 = None
+ if _to_fp16 is not None:
+ m = _onnx.load(onnx_path)
+ m_fp16 = _to_fp16(m, keep_io_types=True)
+ _onnx.save(m_fp16, fp16_path)
+ logger.info(f"✅ Generated FP16 ONNX for LaMa: {fp16_path}")
+ except Exception as e:
+ logger.warning(f"FP16 conversion for LaMa failed: {e}")
+ if os.path.exists(fp16_path):
+ session_path = fp16_path
+ else:
+ # CPU path for LaMa: quantize only if enabled, and MatMul-only to avoid artifacts
+ if self.onnx_quantize_enabled:
+ try:
+ from onnxruntime.quantization import quantize_dynamic, QuantType
+ quant_path = base + '.matmul.int8.onnx'
+ if (not os.path.exists(quant_path)) or FORCE_ONNX_REBUILD:
+ logger.info("🔻 LaMa: Quantizing ONNX weights to INT8 (dynamic, ops=['MatMul'])...")
+ quantize_dynamic(
+ model_input=onnx_path,
+ model_output=quant_path,
+ weight_type=QuantType.QInt8,
+ op_types_to_quantize=['MatMul']
+ )
+ self.onnx_quantize_applied = True
+ # Validate dynamic quant result
+ try:
+ import onnx as _onnx
+ _m_q = _onnx.load(quant_path)
+ _onnx.checker.check_model(_m_q)
+ except Exception as _qchk:
+ logger.warning(f"LaMa dynamic quant model invalid; deleting and falling back: {_qchk}")
+ try:
+ os.remove(quant_path)
+ except Exception:
+ pass
+ quant_path = None
+ except Exception as dy_err:
+ logger.warning(f"LaMa dynamic quantization failed: {dy_err}")
+ quant_path = None
+ # Fallback: static QDQ MatMul-only with zero data reader
+ if quant_path is None:
+ try:
+ import onnx as _onnx
+ from onnxruntime.quantization import (
+ CalibrationDataReader, quantize_static,
+ QuantFormat, QuantType, CalibrationMethod
+ )
+ m = _onnx.load(onnx_path)
+ shapes = {}
+ for inp in m.graph.input:
+ dims = []
+ for d in inp.type.tensor_type.shape.dim:
+ dims.append(d.dim_value if d.dim_value > 0 else 1)
+ shapes[inp.name] = dims
+ class _ZeroReader(CalibrationDataReader):
+ def __init__(self, shapes):
+ self.shapes = shapes
+ self.done = False
+ def get_next(self):
+ if self.done:
+ return None
+ feed = {}
+ for name, s in self.shapes.items():
+ ss = list(s)
+ if len(ss) == 4:
+ if ss[2] <= 1: ss[2] = 512
+ if ss[3] <= 1: ss[3] = 512
+ if ss[1] <= 1 and 'mask' not in name.lower():
+ ss[1] = 3
+ feed[name] = np.zeros(ss, dtype=np.float32)
+ self.done = True
+ return feed
+ dr = _ZeroReader(shapes)
+ quant_path = base + '.matmul.int8.onnx'
+ quantize_static(
+ model_input=onnx_path,
+ model_output=quant_path,
+ calibration_data_reader=dr,
+ quant_format=QuantFormat.QDQ,
+ activation_type=QuantType.QUInt8,
+ weight_type=QuantType.QInt8,
+ per_channel=False,
+ calibrate_method=CalibrationMethod.MinMax,
+ op_types_to_quantize=['MatMul']
+ )
+ # Validate
+ try:
+ _m_q = _onnx.load(quant_path)
+ _onnx.checker.check_model(_m_q)
+ except Exception as _qchk2:
+ logger.warning(f"LaMa static MatMul-only quant model invalid; deleting: {_qchk2}")
+ try:
+ os.remove(quant_path)
+ except Exception:
+ pass
+ quant_path = None
+ else:
+ logger.info(f"✅ Generated MatMul-only INT8 ONNX for LaMa: {quant_path}")
+ self.onnx_quantize_applied = True
+ except Exception as st_err:
+ logger.warning(f"LaMa static MatMul-only quantization failed: {st_err}")
+ quant_path = None
+ # Use the quantized model if valid
+ if quant_path and os.path.exists(quant_path):
+ session_path = quant_path
+ logger.info(f"✅ Using LaMa quantized ONNX model: {quant_path}")
+ # If quantization not enabled or failed, session_path remains onnx_path (FP32)
+
+ # Optional dynamic/static quantization for other models (opt-in)
+ if (not is_lama_model) and self.onnx_quantize_enabled:
+ base = os.path.splitext(onnx_path)[0]
+ fname = os.path.basename(onnx_path).lower()
+ is_aot = 'aot' in fname
+ # For AOT: ignore any MatMul-only file and prefer Conv+MatMul
+ if is_aot:
+ try:
+ ignored_matmul = base + ".matmul.int8.onnx"
+ if os.path.exists(ignored_matmul):
+ logger.info(f"⏭️ Ignoring MatMul-only quantized file for AOT: {ignored_matmul}")
+ except Exception:
+ pass
+ # Choose target quant file and ops
+ if is_aot:
+ quant_path = base + ".int8.onnx"
+ ops_to_quant = ['MatMul']
+ # Use MatMul-only for safer quantization across models
+ ops_for_static = ['MatMul']
+ # Try to simplify AOT graph prior to quantization
+ quant_input_path = onnx_path
+ try:
+ import onnx as _onnx
+ try:
+ from onnxsim import simplify as _onnx_simplify
+ _model = _onnx.load(onnx_path)
+ _sim_model, _check = _onnx_simplify(_model)
+ if _check:
+ sim_path = base + ".sim.onnx"
+ _onnx.save(_sim_model, sim_path)
+ quant_input_path = sim_path
+ logger.info(f"🧰 Simplified AOT ONNX before quantization: {sim_path}")
+ except Exception as _sim_err:
+ logger.info(f"AOT simplification skipped: {_sim_err}")
+ # No ONNX shape inference; keep original graph structure
+ # Ensure opset >= 13 for QDQ (axis attribute on DequantizeLinear)
+ try:
+ _m_tmp = _onnx.load(quant_input_path)
+ _opset = max([op.version for op in _m_tmp.opset_import]) if _m_tmp.opset_import else 11
+ if _opset < 13:
+ from onnx import version_converter as _vc
+ _m13 = _vc.convert_version(_m_tmp, 13)
+ up_path = base + ".op13.onnx"
+ _onnx.save(_m13, up_path)
+ quant_input_path = up_path
+ logger.info(f"🧰 Upgraded ONNX opset to 13 before QDQ quantization: {up_path}")
+ except Exception as _operr:
+ logger.info(f"Opset upgrade skipped: {_operr}")
+ except Exception:
+ quant_input_path = onnx_path
+ else:
+ quant_path = base + ".matmul.int8.onnx"
+ ops_to_quant = ['MatMul']
+ ops_for_static = ops_to_quant
+ quant_input_path = onnx_path
+ # Perform quantization if needed
+ if not os.path.exists(quant_path) or FORCE_ONNX_REBUILD:
+ if is_aot:
+ # Directly perform static QDQ quantization for MatMul only (avoid Conv activations)
+ try:
+ import onnx as _onnx
+ from onnxruntime.quantization import CalibrationDataReader, quantize_static, QuantFormat, QuantType, CalibrationMethod
+ _model = _onnx.load(quant_input_path)
+ # Build input shapes from the model graph
+ input_shapes = {}
+ for inp in _model.graph.input:
+ dims = []
+ for d in inp.type.tensor_type.shape.dim:
+ if d.dim_value > 0:
+ dims.append(d.dim_value)
+ else:
+ # default fallback dimension
+ dims.append(1)
+ input_shapes[inp.name] = dims
+ class _ZeroDataReader(CalibrationDataReader):
+ def __init__(self, input_shapes):
+ self._shapes = input_shapes
+ self._provided = False
+ def get_next(self):
+ if self._provided:
+ return None
+ feed = {}
+ for name, shape in self._shapes.items():
+ # Ensure reasonable default spatial size
+ s = list(shape)
+ if len(s) == 4:
+ if s[2] <= 1:
+ s[2] = 512
+ if s[3] <= 1:
+ s[3] = 512
+ # channel fallback
+ if s[1] <= 1 and 'mask' not in name.lower():
+ s[1] = 3
+ feed[name] = (np.zeros(s, dtype=np.float32))
+ self._provided = True
+ return feed
+ dr = _ZeroDataReader(input_shapes)
+ quantize_static(
+ model_input=quant_input_path,
+ model_output=quant_path,
+ calibration_data_reader=dr,
+ quant_format=QuantFormat.QDQ,
+ activation_type=QuantType.QUInt8,
+ weight_type=QuantType.QInt8,
+ per_channel=True,
+ calibrate_method=CalibrationMethod.MinMax,
+ op_types_to_quantize=ops_for_static
+ )
+ # Validate quantized model to catch structural errors early
+ try:
+ _m_q = _onnx.load(quant_path)
+ _onnx.checker.check_model(_m_q)
+ except Exception as _qchk:
+ logger.warning(f"Quantized AOT model validation failed: {_qchk}")
+ # Remove broken quantized file to force fallback
+ try:
+ os.remove(quant_path)
+ except Exception:
+ pass
+ else:
+ logger.info(f"✅ Static INT8 quantization produced: {quant_path}")
+ except Exception as st_err:
+ logger.warning(f"Static ONNX quantization failed: {st_err}")
+ else:
+ # First attempt: dynamic quantization (MatMul)
+ try:
+ from onnxruntime.quantization import quantize_dynamic, QuantType
+ logger.info("🔻 Quantizing ONNX inpainting model weights to INT8 (dynamic, ops=['MatMul'])...")
+ quantize_dynamic(
+ model_input=quant_input_path,
+ model_output=quant_path,
+ weight_type=QuantType.QInt8,
+ op_types_to_quantize=['MatMul']
+ )
+ except Exception as dy_err:
+ logger.warning(f"Dynamic ONNX quantization failed: {dy_err}; attempting static quantization...")
+ # Fallback: static quantization with a zero data reader
+ try:
+ import onnx as _onnx
+ from onnxruntime.quantization import CalibrationDataReader, quantize_static, QuantFormat, QuantType, CalibrationMethod
+ _model = _onnx.load(quant_input_path)
+ # Build input shapes from the model graph
+ input_shapes = {}
+ for inp in _model.graph.input:
+ dims = []
+ for d in inp.type.tensor_type.shape.dim:
+ if d.dim_value > 0:
+ dims.append(d.dim_value)
+ else:
+ # default fallback dimension
+ dims.append(1)
+ input_shapes[inp.name] = dims
+ class _ZeroDataReader(CalibrationDataReader):
+ def __init__(self, input_shapes):
+ self._shapes = input_shapes
+ self._provided = False
+ def get_next(self):
+ if self._provided:
+ return None
+ feed = {}
+ for name, shape in self._shapes.items():
+ # Ensure reasonable default spatial size
+ s = list(shape)
+ if len(s) == 4:
+ if s[2] <= 1:
+ s[2] = 512
+ if s[3] <= 1:
+ s[3] = 512
+ # channel fallback
+ if s[1] <= 1 and 'mask' not in name.lower():
+ s[1] = 3
+ feed[name] = (np.zeros(s, dtype=np.float32))
+ self._provided = True
+ return feed
+ dr = _ZeroDataReader(input_shapes)
+ quantize_static(
+ model_input=quant_input_path,
+ model_output=quant_path,
+ calibration_data_reader=dr,
+ quant_format=QuantFormat.QDQ,
+ activation_type=QuantType.QUInt8,
+ weight_type=QuantType.QInt8,
+ per_channel=True,
+ calibrate_method=CalibrationMethod.MinMax,
+ op_types_to_quantize=ops_for_static
+ )
+ # Validate quantized model to catch structural errors early
+ try:
+ _m_q = _onnx.load(quant_path)
+ _onnx.checker.check_model(_m_q)
+ except Exception as _qchk:
+ logger.warning(f"Quantized AOT model validation failed: {_qchk}")
+ # Remove broken quantized file to force fallback
+ try:
+ os.remove(quant_path)
+ except Exception:
+ pass
+ else:
+ logger.info(f"✅ Static INT8 quantization produced: {quant_path}")
+ except Exception as st_err:
+ logger.warning(f"Static ONNX quantization failed: {st_err}")
+ # Prefer the quantized file if it now exists
+ if os.path.exists(quant_path):
+ # Validate existing quantized model before using it
+ try:
+ import onnx as _onnx
+ _m_q = _onnx.load(quant_path)
+ _onnx.checker.check_model(_m_q)
+ except Exception as _qchk:
+ logger.warning(f"Existing quantized ONNX invalid; deleting and falling back: {_qchk}")
+ try:
+ os.remove(quant_path)
+ except Exception:
+ pass
+ else:
+ session_path = quant_path
+ logger.info(f"✅ Using quantized ONNX model: {quant_path}")
+ else:
+ logger.warning("ONNX quantization not applied: quantized file not created")
+ # Use conservative ORT memory options to reduce RAM growth
+ so = ort.SessionOptions()
+ try:
+ so.enable_mem_pattern = False
+ so.enable_cpu_mem_arena = False
+ except Exception:
+ pass
+ # Enable optimal performance settings (let ONNX use all CPU cores)
+ try:
+ # Use all available CPU threads for best performance
+ # ONNX Runtime will automatically use optimal thread count
+ so.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_EXTENDED
+ except Exception:
+ pass
+ # Try to create an inference session, with graceful fallbacks
+ try:
+ self.onnx_session = ort.InferenceSession(session_path, sess_options=so, providers=providers)
+ except Exception as e:
+ err = str(e)
+ logger.warning(f"ONNX session creation failed for {session_path}: {err}")
+ # If quantized path failed due to unsupported ops or invalid graph, remove it and retry unquantized
+ if session_path != onnx_path and ('ConvInteger' in err or 'NOT_IMPLEMENTED' in err or 'INVALID_ARGUMENT' in err):
+ try:
+ if os.path.exists(session_path):
+ os.remove(session_path)
+ logger.info(f"🧹 Deleted invalid quantized model: {session_path}")
+ except Exception:
+ pass
+ try:
+ logger.info("Retrying with unquantized ONNX model...")
+ self.onnx_session = ort.InferenceSession(onnx_path, sess_options=so, providers=providers)
+ session_path = onnx_path
+ except Exception as e2:
+ logger.warning(f"Unquantized ONNX session failed with current providers: {e2}")
+ # As a last resort, try CPU-only
+ try:
+ logger.info("Retrying ONNX on CPUExecutionProvider only...")
+ self.onnx_session = ort.InferenceSession(onnx_path, sess_options=so, providers=['CPUExecutionProvider'])
+ session_path = onnx_path
+ providers = ['CPUExecutionProvider']
+ except Exception as e3:
+ logger.error(f"Failed to create ONNX session on CPU: {e3}")
+ raise
+ else:
+ # If we weren't quantized but failed on CUDA, try CPU-only once
+ if self.use_gpu and 'NOT_IMPLEMENTED' in err:
+ try:
+ logger.info("Retrying ONNX on CPUExecutionProvider only...")
+ self.onnx_session = ort.InferenceSession(session_path, sess_options=so, providers=['CPUExecutionProvider'])
+ providers = ['CPUExecutionProvider']
+ except Exception as e4:
+ logger.error(f"Failed to create ONNX session on CPU: {e4}")
+ raise
+
+ # Get input/output names
+ if self.onnx_session is None:
+ raise RuntimeError("ONNX session was not created")
+ self.onnx_input_names = [i.name for i in self.onnx_session.get_inputs()]
+ self.onnx_output_names = [o.name for o in self.onnx_session.get_outputs()]
+
+ # Check input shapes to detect fixed-size models
+ input_shape = self.onnx_session.get_inputs()[0].shape
+ if len(input_shape) == 4 and input_shape[2] == 512 and input_shape[3] == 512:
+ self.onnx_fixed_size = (512, 512)
+ logger.info(f" Model expects fixed size: 512x512")
+
+ # Log success with I/O info in a single line
+ logger.debug(f"✅ ONNX session created - Inputs: {self.onnx_input_names}, Outputs: {self.onnx_output_names}")
+
+ self.use_onnx = True
+ return True
+
+ except Exception as e:
+ logger.error(f"❌ Failed to load ONNX: {e}")
+ import traceback
+ logger.debug(f"ONNX load traceback: {traceback.format_exc()}")
+ self.use_onnx = False
+ self.model_loaded = False
+ return False
+
+ def _convert_checkpoint_key(self, key):
+ """Convert checkpoint key format to model format"""
+ # model.24.weight -> model_24.weight
+ if re.match(r'^model\.(\d+)\.(weight|bias|running_mean|running_var)$', key):
+ return re.sub(r'model\.(\d+)\.', r'model_\1.', key)
+
+ # model.5.conv1.ffc.weight -> model_5_conv1_ffc.weight
+ if key.startswith('model.'):
+ parts = key.split('.')
+ if parts[-1] in ['weight', 'bias', 'running_mean', 'running_var']:
+ return '_'.join(parts[:-1]).replace('model_', 'model_') + '.' + parts[-1]
+
+ return key.replace('.', '_')
+
+ def _load_weights_with_mapping(self, model, state_dict):
+ """Load weights with proper mapping"""
+ model_dict = model.state_dict()
+
+ logger.info(f"📊 Model expects {len(model_dict)} weights")
+ logger.info(f"📊 Checkpoint has {len(state_dict)} weights")
+
+ # Filter out num_batches_tracked
+ actual_weights = {k: v for k, v in state_dict.items() if 'num_batches_tracked' not in k}
+ logger.info(f" Actual weights: {len(actual_weights)}")
+
+ mapped = {}
+ unmapped_ckpt = []
+ unmapped_model = list(model_dict.keys())
+
+ # Map checkpoint weights
+ for ckpt_key, ckpt_val in actual_weights.items():
+ success = False
+ converted_key = self._convert_checkpoint_key(ckpt_key)
+
+ if converted_key in model_dict:
+ target_shape = model_dict[converted_key].shape
+
+ if target_shape == ckpt_val.shape:
+ mapped[converted_key] = ckpt_val
+ success = True
+ elif len(ckpt_val.shape) == 4 and len(target_shape) == 4:
+ # 4D permute for decoder convs
+ permuted = ckpt_val.permute(1, 0, 2, 3)
+ if target_shape == permuted.shape:
+ mapped[converted_key] = permuted
+ logger.info(f" ✅ Permuted: {ckpt_key}")
+ success = True
+ elif len(ckpt_val.shape) == 2 and len(target_shape) == 2:
+ # 2D transpose
+ transposed = ckpt_val.transpose(0, 1)
+ if target_shape == transposed.shape:
+ mapped[converted_key] = transposed
+ success = True
+
+ if success and converted_key in unmapped_model:
+ unmapped_model.remove(converted_key)
+
+ if not success:
+ unmapped_ckpt.append(ckpt_key)
+
+ # Try fallback mapping for unmapped
+ if unmapped_ckpt:
+ logger.info(f" 🔧 Fallback mapping for {len(unmapped_ckpt)} weights...")
+ for ckpt_key in unmapped_ckpt[:]:
+ ckpt_val = actual_weights[ckpt_key]
+ for model_key in unmapped_model[:]:
+ if model_dict[model_key].shape == ckpt_val.shape:
+ if ('weight' in ckpt_key and 'weight' in model_key) or \
+ ('bias' in ckpt_key and 'bias' in model_key):
+ mapped[model_key] = ckpt_val
+ unmapped_model.remove(model_key)
+ unmapped_ckpt.remove(ckpt_key)
+ logger.info(f" ✅ Mapped: {ckpt_key} -> {model_key}")
+ break
+
+ # Initialize missing weights
+ complete_dict = model_dict.copy()
+ complete_dict.update(mapped)
+
+ for key in unmapped_model:
+ param = complete_dict[key]
+ if 'weight' in key:
+ if 'conv' in key.lower():
+ nn.init.kaiming_normal_(param, mode='fan_out', nonlinearity='relu')
+ else:
+ nn.init.xavier_uniform_(param)
+ elif 'bias' in key:
+ nn.init.zeros_(param)
+ elif 'running_mean' in key:
+ nn.init.zeros_(param)
+ elif 'running_var' in key:
+ nn.init.ones_(param)
+
+ # Report
+ logger.info(f"✅ Mapped {len(actual_weights) - len(unmapped_ckpt)}/{len(actual_weights)} checkpoint weights")
+ logger.info(f" Filled {len(mapped)}/{len(model_dict)} model positions")
+
+ if unmapped_model:
+ pct = (len(unmapped_model) / len(model_dict)) * 100
+ logger.info(f" ⚠️ Initialized {len(unmapped_model)} missing weights ({pct:.1f}%)")
+ if pct > 20:
+ logger.warning(" ⚠️ May produce artifacts - checkpoint is incomplete")
+ logger.warning(" 💡 Consider downloading JIT model for better quality:")
+ logger.warning(f" inpainter.download_jit_model('{self.current_method or 'lama'}')")
+
+ model.load_state_dict(complete_dict, strict=True)
+ return True
+
+ def download_jit_model(self, method: str) -> str:
+ """Download JIT model for a method"""
+ if method in LAMA_JIT_MODELS:
+ model_info = LAMA_JIT_MODELS[method]
+ logger.info(f"📥 Downloading {model_info['name']}...")
+
+ try:
+ model_path = download_model(model_info['url'], model_info['md5'])
+ return model_path
+ except Exception as e:
+ logger.error(f"Failed to download {method}: {e}")
+ else:
+ logger.warning(f"No JIT model available for {method}")
+
+ return None
+
+ def load_model(self, method, model_path, force_reload=False):
+ """Load model - supports both JIT and checkpoint files with ONNX conversion"""
+ try:
+ if not TORCH_AVAILABLE:
+ logger.warning("PyTorch not available in this build")
+ logger.info("Inpainting features will be disabled - this is normal for lightweight builds")
+ logger.info("The application will continue to work without local inpainting")
+ self.model_loaded = False
+ return False
+
+ # Additional safety check for torch being None
+ if torch is None or nn is None:
+ logger.warning("PyTorch modules not properly loaded")
+ logger.info("Inpainting features will be disabled - this is normal for lightweight builds")
+ self.model_loaded = False
+ return False
+
+ # Check if model path changed - but only if we had a previous path saved
+ current_saved_path = self.config.get(f'{method}_model_path', '')
+ if current_saved_path and current_saved_path != model_path:
+ logger.info(f"📍 Model path changed for {method}")
+ logger.info(f" Old: {current_saved_path}")
+ logger.info(f" New: {model_path}")
+ force_reload = True
+
+ if not os.path.exists(model_path):
+ # Try to auto-download JIT model if path doesn't exist
+ logger.warning(f"Model not found: {model_path}")
+ logger.info("Attempting to download JIT model...")
+
+ try:
+ jit_path = self.download_jit_model(method)
+ if jit_path and os.path.exists(jit_path):
+ model_path = jit_path
+ logger.info(f"Using downloaded JIT model: {jit_path}")
+ else:
+ logger.error(f"Model not found and download failed: {model_path}")
+ logger.info("Inpainting will be unavailable for this session")
+ return False
+ except Exception as download_error:
+ logger.error(f"Download failed: {download_error}")
+ logger.info("Inpainting will be unavailable for this session")
+ return False
+
+ # Check if already loaded in THIS instance
+ if self.model_loaded and self.current_method == method and not force_reload:
+ # Additional check for ONNX - make sure the session exists
+ if self.use_onnx and self.onnx_session is not None:
+ logger.debug(f"✅ {method.upper()} ONNX already loaded (skipping reload)")
+ return True
+ elif not self.use_onnx and self.model is not None:
+ logger.debug(f"✅ {method.upper()} already loaded (skipping reload)")
+ return True
+ else:
+ # Model claims to be loaded but objects are missing - force reload
+ logger.warning(f"⚠️ Model claims loaded but session/model object is None - forcing reload")
+ force_reload = True
+ self.model_loaded = False
+
+ # Clear previous model if force reload
+ if force_reload:
+ logger.info(f"🔄 Force reloading {method} model...")
+ self.model = None
+ self.onnx_session = None
+ self.model_loaded = False
+ self.is_jit_model = False
+ # Only log loading message when actually loading
+ logger.info(f"📥 Loading {method} from {model_path}")
+ elif self.model_loaded and self.current_method != method:
+ # If we have a model loaded but it's a different method, clear it
+ logger.info(f"🔄 Switching from {self.current_method} to {method}")
+ self.model = None
+ self.onnx_session = None
+ self.model_loaded = False
+ self.is_jit_model = False
+ # Only log loading message when actually loading
+ logger.info(f"📥 Loading {method} from {model_path}")
+ elif not self.model_loaded:
+ # Only log when we're actually going to load
+ logger.info(f"📥 Loading {method} from {model_path}")
+ # else: model is loaded and current, no logging needed
+
+ # Normalize path and enforce expected extension for certain methods
+ try:
+ _ext = os.path.splitext(model_path)[1].lower()
+ _method_lower = str(method).lower()
+ # For explicit ONNX methods, ensure we use a .onnx path
+ if _method_lower in ("lama_onnx", "anime_onnx", "aot_onnx") and _ext != ".onnx":
+ # If the file exists, try to detect if it's actually an ONNX model and correct the extension
+ if os.path.exists(model_path) and ONNX_AVAILABLE:
+ try:
+ import onnx as _onnx
+ _ = _onnx.load(model_path) # will raise if not ONNX
+ # Build a corrected path under the ONNX cache dir
+ base_name = os.path.splitext(os.path.basename(model_path))[0]
+ if base_name.endswith('.pt'):
+ base_name = base_name[:-3]
+ corrected_path = os.path.join(ONNX_CACHE_DIR, base_name + ".onnx")
+ # Avoid overwriting a valid file with an invalid one
+ if model_path != corrected_path:
+ try:
+ import shutil as _shutil
+ _shutil.copy2(model_path, corrected_path)
+ model_path = corrected_path
+ logger.info(f"🔧 Corrected ONNX model extension/path: {model_path}")
+ except Exception as _cp_e:
+ # As a fallback, try in-place rename to .onnx
+ try:
+ in_place = os.path.splitext(model_path)[0] + ".onnx"
+ os.replace(model_path, in_place)
+ model_path = in_place
+ logger.info(f"🔧 Renamed ONNX model to: {model_path}")
+ except Exception:
+ logger.warning(f"Could not correct ONNX extension automatically: {_cp_e}")
+ except Exception:
+ # Not an ONNX file; leave as-is
+ pass
+ # If the path doesn't exist or still wrong, prefer the known ONNX download for this method
+ if (not os.path.exists(model_path)) or (os.path.splitext(model_path)[1].lower() != ".onnx"):
+ try:
+ # Download the appropriate ONNX model based on the method
+ if _method_lower == "anime_onnx":
+ _dl = self.download_jit_model("anime_onnx")
+ elif _method_lower == "aot_onnx":
+ _dl = self.download_jit_model("aot_onnx")
+ else:
+ _dl = self.download_jit_model("lama_onnx")
+ if _dl and os.path.exists(_dl):
+ model_path = _dl
+ logger.info(f"🔧 Using downloaded {_method_lower.upper()} model: {model_path}")
+ except Exception:
+ pass
+ except Exception:
+ pass
+
+ # Check file signature to detect ONNX files (even with wrong extension)
+ # or check file extension
+ ext = model_path.lower().split('.')[-1]
+ is_onnx = False
+
+ # Check by file signature
+ try:
+ with open(model_path, 'rb') as f:
+ file_header = f.read(8)
+ if file_header.startswith(b'\x08'):
+ is_onnx = True
+ logger.debug("📦 Detected ONNX file signature")
+ except Exception:
+ pass
+
+ # Check by extension
+ if ext == 'onnx':
+ is_onnx = True
+
+ # Handle ONNX files
+ if is_onnx:
+ # Note: load_onnx_model will handle its own logging
+ try:
+ onnx_load_result = self.load_onnx_model(model_path)
+ if onnx_load_result:
+ # CRITICAL: Set model_loaded flag FIRST before any other operations
+ # This ensures concurrent threads see the correct state immediately
+ self.model_loaded = True
+ self.use_onnx = True
+ self.is_jit_model = False
+ # Ensure aot_onnx is properly set as current method
+ if 'aot' in method.lower():
+ self.current_method = 'aot_onnx'
+ else:
+ self.current_method = method
+ # Save with BOTH key formats for compatibility (non-critical - do last)
+ try:
+ self.config[f'{method}_model_path'] = model_path
+ self.config[f'manga_{method}_model_path'] = model_path
+ self._save_config()
+ except Exception as cfg_err:
+ logger.debug(f"Config save after ONNX load failed (non-critical): {cfg_err}")
+ logger.info(f"✅ {method.upper()} ONNX loaded with method: {self.current_method}")
+ # Double-check model_loaded flag is still set
+ if not self.model_loaded:
+ logger.error("❌ CRITICAL: model_loaded flag was unset after successful ONNX load!")
+ self.model_loaded = True
+ return True
+ else:
+ logger.error("Failed to load ONNX model - load_onnx_model returned False")
+ self.model_loaded = False
+ return False
+ except Exception as onnx_err:
+ logger.error(f"Exception during ONNX model loading: {onnx_err}")
+ import traceback
+ logger.debug(traceback.format_exc())
+ self.model_loaded = False
+ return False
+
+ # Check if it's a JIT model (.pt) or checkpoint (.ckpt/.pth)
+ if model_path.endswith('.pt'):
+ try:
+ # Try loading as JIT/TorchScript
+ logger.info("📦 Attempting to load as JIT model...")
+ self.model = torch.jit.load(model_path, map_location=self.device or 'cpu')
+ self.model.eval()
+
+ if self.use_gpu and self.device:
+ try:
+ self.model = self.model.to(self.device)
+ except Exception as gpu_error:
+ logger.warning(f"Could not move model to GPU: {gpu_error}")
+ logger.info("Using CPU instead")
+
+ self.is_jit_model = True
+ self.model_loaded = True
+ self.current_method = method
+ logger.info("✅ JIT model loaded successfully!")
+ time.sleep(0.1) # Brief pause for stability
+ logger.debug("💤 JIT model loading pausing briefly for stability")
+
+ # Optional FP16 precision on GPU to reduce VRAM
+ if self.quantize_enabled and self.use_gpu:
+ try:
+ if self.torch_precision in ('fp16', 'auto'):
+ self.model = self.model.half()
+ logger.info("🔻 Applied FP16 precision to inpainting model (GPU)")
+ else:
+ logger.info("Torch precision set to fp32; skipping half()")
+ except Exception as _e:
+ logger.warning(f"Could not switch inpainting model precision: {_e}")
+
+ # Optional INT8 dynamic quantization for CPU TorchScript (best-effort)
+ if (self.int8_enabled or (self.quantize_enabled and not self.use_gpu and self.torch_precision in ('auto', 'int8'))) and not self.use_gpu:
+ try:
+ applied = False
+ # Try TorchScript dynamic quantization API (older PyTorch)
+ try:
+ from torch.quantization import quantize_dynamic_jit # type: ignore
+ self.model = quantize_dynamic_jit(self.model, {"aten::linear"}, dtype=torch.qint8) # type: ignore
+ applied = True
+ except Exception:
+ pass
+ # Try eager-style dynamic quantization on the scripted module (may no-op)
+ if not applied:
+ try:
+ import torch.ao.quantization as tq # type: ignore
+ self.model = tq.quantize_dynamic(self.model, {nn.Linear}, dtype=torch.qint8) # type: ignore
+ applied = True
+ except Exception:
+ pass
+ # Always try to optimize TorchScript for inference
+ try:
+ self.model = torch.jit.optimize_for_inference(self.model) # type: ignore
+ except Exception:
+ pass
+ if applied:
+ logger.info("🔻 Applied INT8 dynamic quantization to JIT inpainting model (CPU)")
+ self.torch_quantize_applied = True
+ else:
+ logger.info("ℹ️ INT8 dynamic quantization not applied (unsupported for this JIT graph); using FP32 CPU")
+ except Exception as _qe:
+ logger.warning(f"INT8 quantization skipped: {_qe}")
+
+ # Save with BOTH key formats for compatibility
+ self.config[f'{method}_model_path'] = model_path
+ self.config[f'manga_{method}_model_path'] = model_path
+ self._save_config()
+
+ # ONNX CONVERSION (optionally in background)
+ if AUTO_CONVERT_TO_ONNX and self.model_loaded:
+ def _convert_and_switch():
+ try:
+ onnx_path = self.convert_to_onnx(model_path, method)
+ if onnx_path and self.load_onnx_model(onnx_path):
+ logger.info("🚀 Using ONNX model for inference")
+ else:
+ logger.info("📦 Using PyTorch JIT model for inference")
+ except Exception as onnx_error:
+ logger.warning(f"ONNX conversion failed: {onnx_error}")
+ logger.info("📦 Using PyTorch JIT model for inference")
+
+ if os.environ.get('AUTO_CONVERT_TO_ONNX_BACKGROUND', 'true').lower() == 'true':
+ threading.Thread(target=_convert_and_switch, daemon=True).start()
+ else:
+ _convert_and_switch()
+
+ return True
+ except Exception as jit_error:
+ logger.info(f" Not a JIT model, trying as regular checkpoint... ({jit_error})")
+ try:
+ checkpoint = torch.load(model_path, map_location='cpu', weights_only=False)
+ self.is_jit_model = False
+ except Exception as load_error:
+ logger.error(f"Failed to load checkpoint: {load_error}")
+ return False
+ else:
+ # Load as regular checkpoint
+ try:
+ checkpoint = torch.load(model_path, map_location='cpu', weights_only=False)
+ self.is_jit_model = False
+ except Exception as load_error:
+ logger.error(f"Failed to load checkpoint: {load_error}")
+ logger.info("This may happen if PyTorch is not fully available in the .exe build")
+ return False
+
+ # If we get here, it's not JIT, so load as checkpoint
+ if not self.is_jit_model:
+ try:
+ # Try to create the model - this might fail if nn.Module is None
+ self.model = FFCInpaintModel()
+
+ if isinstance(checkpoint, dict):
+ if 'gen_state_dict' in checkpoint:
+ state_dict = checkpoint['gen_state_dict']
+ logger.info("📦 Found gen_state_dict")
+ elif 'state_dict' in checkpoint:
+ state_dict = checkpoint['state_dict']
+ elif 'model' in checkpoint:
+ state_dict = checkpoint['model']
+ else:
+ state_dict = checkpoint
+ else:
+ state_dict = checkpoint
+
+ self._load_weights_with_mapping(self.model, state_dict)
+
+ self.model.eval()
+ if self.use_gpu and self.device:
+ try:
+ self.model = self.model.to(self.device)
+ except Exception as gpu_error:
+ logger.warning(f"Could not move model to GPU: {gpu_error}")
+ logger.info("Using CPU instead")
+
+ # Optional INT8 dynamic quantization for CPU eager model
+ if (self.int8_enabled or (self.quantize_enabled and not self.use_gpu and self.torch_precision in ('auto', 'int8'))) and not self.use_gpu:
+ try:
+ import torch.ao.quantization as tq # type: ignore
+ self.model = tq.quantize_dynamic(self.model, {nn.Linear}, dtype=torch.qint8) # type: ignore
+ logger.info("🔻 Applied dynamic INT8 quantization to inpainting model (CPU)")
+ self.torch_quantize_applied = True
+ except Exception as qe:
+ logger.warning(f"INT8 dynamic quantization not applied: {qe}")
+
+ except Exception as model_error:
+ logger.error(f"Failed to create or initialize model: {model_error}")
+ logger.info("This may happen if PyTorch neural network modules are not available in the .exe build")
+ return False
+
+ self.model_loaded = True
+ self.current_method = method
+
+ self.config[f'{method}_model_path'] = model_path
+ self._save_config()
+
+ logger.info(f"✅ {method.upper()} loaded!")
+
+ # ONNX CONVERSION (optionally in background)
+ if AUTO_CONVERT_TO_ONNX and model_path.endswith('.pt') and self.model_loaded:
+ def _convert_and_switch():
+ try:
+ onnx_path = self.convert_to_onnx(model_path, method)
+ if onnx_path and self.load_onnx_model(onnx_path):
+ logger.info("🚀 Using ONNX model for inference")
+ except Exception as onnx_error:
+ logger.warning(f"ONNX conversion failed: {onnx_error}")
+ logger.info("📦 Continuing with PyTorch model")
+
+ if os.environ.get('AUTO_CONVERT_TO_ONNX_BACKGROUND', 'true').lower() == 'true':
+ threading.Thread(target=_convert_and_switch, daemon=True).start()
+ else:
+ _convert_and_switch()
+
+ return True
+
+ except Exception as e:
+ logger.error(f"❌ Failed to load model: {e}")
+ logger.error(traceback.format_exc())
+ logger.info("Note: If running from .exe, some ML libraries may not be included")
+ logger.info("This is normal for lightweight builds - inpainting will be disabled")
+ self.model_loaded = False
+ return False
+
+ def load_model_with_retry(self, method, model_path, force_reload=False, retries: int = 2, retry_delay: float = 0.5) -> bool:
+ """Attempt to load a model with retries.
+ Returns True if loaded; False if all attempts fail. On failure, the inpainter will safely no-op.
+ """
+ try:
+ attempts = max(0, int(retries)) + 1
+ except Exception:
+ attempts = 1
+ for attempt in range(attempts):
+ try:
+ ok = self.load_model(method, model_path, force_reload=force_reload)
+ if ok:
+ return True
+ except Exception as e:
+ logger.warning(f"Load attempt {attempt+1} failed with exception: {e}")
+ # brief delay before next try
+ if attempt < attempts - 1:
+ try:
+ time.sleep(max(0.0, float(retry_delay)))
+ except Exception:
+ pass
+ # If we reach here, loading failed. Leave model unloaded so inpaint() no-ops and returns original image.
+ logger.warning("All load attempts failed; local inpainting will fall back to returning original images (no-op)")
+ self.model_loaded = False
+ # Keep current_method for logging/context if provided
+ try:
+ self.current_method = method
+ except Exception:
+ pass
+ return False
+
+ def unload(self):
+ """Release all heavy resources held by this inpainter instance."""
+ try:
+ # Release ONNX session and metadata
+ try:
+ if self.onnx_session is not None:
+ self.onnx_session = None
+ except Exception:
+ pass
+ for attr in ['onnx_input_names', 'onnx_output_names', 'current_onnx_path', 'onnx_fixed_size']:
+ try:
+ if hasattr(self, attr):
+ setattr(self, attr, None)
+ except Exception:
+ pass
+
+ # Release PyTorch model
+ try:
+ if self.model is not None:
+ if TORCH_AVAILABLE and torch is not None:
+ try:
+ # Move to CPU then drop reference
+ self.model = self.model.to('cpu') if hasattr(self.model, 'to') else None
+ except Exception:
+ pass
+ self.model = None
+ except Exception:
+ pass
+
+ # Drop bubble detector reference (not the global cache)
+ try:
+ self.bubble_detector = None
+ except Exception:
+ pass
+
+ # Update flags
+ self.model_loaded = False
+ self.use_onnx = False
+ self.is_jit_model = False
+
+ # Free CUDA cache and trigger GC
+ try:
+ if TORCH_AVAILABLE and torch is not None and torch.cuda.is_available():
+ torch.cuda.empty_cache()
+ except Exception:
+ pass
+ try:
+ import gc
+ gc.collect()
+ except Exception:
+ pass
+ except Exception:
+ # Never raise from unload
+ pass
+
+ def pad_img_to_modulo(self, img: np.ndarray, mod: int) -> Tuple[np.ndarray, Tuple[int, int, int, int]]:
+ """Pad image to be divisible by mod"""
+ if len(img.shape) == 2:
+ height, width = img.shape
+ else:
+ height, width = img.shape[:2]
+
+ pad_height = (mod - height % mod) % mod
+ pad_width = (mod - width % mod) % mod
+
+ pad_top = pad_height // 2
+ pad_bottom = pad_height - pad_top
+ pad_left = pad_width // 2
+ pad_right = pad_width - pad_left
+
+ if len(img.shape) == 2:
+ padded = np.pad(img, ((pad_top, pad_bottom), (pad_left, pad_right)), mode='reflect')
+ else:
+ padded = np.pad(img, ((pad_top, pad_bottom), (pad_left, pad_right), (0, 0)), mode='reflect')
+
+ return padded, (pad_top, pad_bottom, pad_left, pad_right)
+
+ def remove_padding(self, img: np.ndarray, padding: Tuple[int, int, int, int]) -> np.ndarray:
+ """Remove padding from image"""
+ pad_top, pad_bottom, pad_left, pad_right = padding
+
+ if len(img.shape) == 2:
+ return img[pad_top:img.shape[0]-pad_bottom, pad_left:img.shape[1]-pad_right]
+ else:
+ return img[pad_top:img.shape[0]-pad_bottom, pad_left:img.shape[1]-pad_right, :]
+
+ def _inpaint_tiled(self, image, mask, tile_size, overlap, refinement='normal'):
+ """Process image in tiles"""
+ orig_h, orig_w = image.shape[:2]
+ result = image.copy()
+
+ # Calculate tile positions
+ for y in range(0, orig_h, tile_size - overlap):
+ for x in range(0, orig_w, tile_size - overlap):
+ # Calculate tile boundaries
+ x_end = min(x + tile_size, orig_w)
+ y_end = min(y + tile_size, orig_h)
+
+ # Adjust start to ensure full tile size if possible
+ if x_end - x < tile_size and x > 0:
+ x = max(0, x_end - tile_size)
+ if y_end - y < tile_size and y > 0:
+ y = max(0, y_end - tile_size)
+
+ # Extract tile
+ tile_img = image[y:y_end, x:x_end]
+ tile_mask = mask[y:y_end, x:x_end]
+
+ # Skip if no inpainting needed
+ if np.sum(tile_mask) == 0:
+ continue
+
+ # Process this tile with the actual model
+ processed_tile = self._process_single_tile(tile_img, tile_mask, tile_size, refinement)
+
+ # Auto-retry for tile if no visible change
+ try:
+ if self._is_noop(tile_img, processed_tile, tile_mask):
+ kernel = np.ones((3, 3), np.uint8)
+ expanded = cv2.dilate(tile_mask, kernel, iterations=1)
+ processed_retry = self._process_single_tile(tile_img, expanded, tile_size, 'fast')
+ if self._is_noop(tile_img, processed_retry, expanded):
+ logger.warning("Tile remained unchanged after retry; proceeding without further fallback")
+ processed_tile = processed_retry
+ else:
+ processed_tile = processed_retry
+ except Exception as e:
+ logger.debug(f"Tiled no-op detection error: {e}")
+
+ # Blend tile back into result
+ if overlap > 0 and (x > 0 or y > 0):
+ result[y:y_end, x:x_end] = self._blend_tile(
+ result[y:y_end, x:x_end],
+ processed_tile,
+ x > 0,
+ y > 0,
+ overlap
+ )
+ else:
+ result[y:y_end, x:x_end] = processed_tile
+
+ logger.info(f"✅ Tiled inpainting complete ({orig_w}x{orig_h} in {tile_size}x{tile_size} tiles)")
+ time.sleep(0.1) # Brief pause for stability
+ logger.debug("💤 Tiled inpainting completion pausing briefly for stability")
+ return result
+
+ def _process_single_tile(self, tile_img, tile_mask, tile_size, refinement):
+ """Process a single tile without tiling"""
+ # Temporarily disable tiling
+ old_tiling = self.tiling_enabled
+ self.tiling_enabled = False
+ result = self.inpaint(tile_img, tile_mask, refinement, _skip_hd=True)
+ self.tiling_enabled = old_tiling
+ return result
+
+ def _blend_tile(self, existing, new_tile, blend_x, blend_y, overlap):
+ """Blend a tile with existing result"""
+ if not blend_x and not blend_y:
+ # No blending needed for first tile
+ return new_tile
+
+ h, w = new_tile.shape[:2]
+ result = new_tile.copy()
+
+ # Create blend weights
+ if blend_x and overlap > 0 and w > overlap:
+ # Horizontal blend on left edge
+ for i in range(overlap):
+ alpha = i / overlap
+ result[:, i] = existing[:, i] * (1 - alpha) + new_tile[:, i] * alpha
+
+ if blend_y and overlap > 0 and h > overlap:
+ # Vertical blend on top edge
+ for i in range(overlap):
+ alpha = i / overlap
+ result[i, :] = existing[i, :] * (1 - alpha) + new_tile[i, :] * alpha
+
+ return result
+
+ def _is_noop(self, original: np.ndarray, result: np.ndarray, mask: np.ndarray, threshold: float = 0.75) -> bool:
+ """Return True if inpainting produced negligible change within the masked area."""
+ try:
+ if original is None or result is None:
+ return True
+ if original.shape != result.shape:
+ return False
+ # Normalize mask to single channel boolean
+ if mask is None:
+ return False
+ if len(mask.shape) == 3:
+ mask_gray = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)
+ else:
+ mask_gray = mask
+ m = mask_gray > 0
+ if not np.any(m):
+ return False
+ # Fast path
+ if np.array_equal(original, result):
+ return True
+ diff = cv2.absdiff(result, original)
+ if len(diff.shape) == 3:
+ diff_gray = cv2.cvtColor(diff, cv2.COLOR_BGR2GRAY)
+ else:
+ diff_gray = diff
+ mean_diff = float(np.mean(diff_gray[m]))
+ return mean_diff < threshold
+ except Exception as e:
+ logger.debug(f"No-op detection failed: {e}")
+ return False
+
+ def _is_white_paste(self, result: np.ndarray, mask: np.ndarray, white_threshold: int = 245, ratio: float = 0.90) -> bool:
+ """Detect 'white paste' failure: masked area mostly saturated near white."""
+ try:
+ if result is None or mask is None:
+ return False
+ if len(mask.shape) == 3:
+ mask_gray = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)
+ else:
+ mask_gray = mask
+ m = mask_gray > 0
+ if not np.any(m):
+ return False
+ if len(result.shape) == 3:
+ white = (result[..., 0] >= white_threshold) & (result[..., 1] >= white_threshold) & (result[..., 2] >= white_threshold)
+ else:
+ white = result >= white_threshold
+ count_mask = int(np.count_nonzero(m))
+ count_white = int(np.count_nonzero(white & m))
+ if count_mask == 0:
+ return False
+ frac = count_white / float(count_mask)
+ return frac >= ratio
+ except Exception as e:
+ logger.debug(f"White paste detection failed: {e}")
+ return False
+
+ def _log_inpaint_diag(self, path: str, result: np.ndarray, mask: np.ndarray):
+ try:
+ h, w = result.shape[:2]
+ if len(result.shape) == 3:
+ stats = (float(result.min()), float(result.max()), float(result.mean()))
+ else:
+ stats = (float(result.min()), float(result.max()), float(result.mean()))
+ logger.info(f"[Diag] Path={path} onnx_quant={self.onnx_quantize_applied} torch_quant={self.torch_quantize_applied} size={w}x{h} stats(min,max,mean)={stats}")
+ if self._is_white_paste(result, mask):
+ logger.warning(f"[Diag] White-paste detected (mask>0 mostly white)")
+ except Exception as e:
+ logger.debug(f"Diag log failed: {e}")
+
+ def inpaint(self, image, mask, refinement='normal', _retry_attempt: int = 0, _skip_hd: bool = False, _skip_tiling: bool = False):
+ """Inpaint - compatible with JIT, checkpoint, and ONNX models
+ Implements HD strategy (Resize/Crop) similar to comic-translate to speed up large images.
+ """
+ # Check for stop at start
+ if self._check_stop():
+ self._log("⏹️ Inpainting stopped by user", "warning")
+ return image
+
+ if not self.model_loaded:
+ self._log("No model loaded", "error")
+ return image
+
+ try:
+ # Store original dimensions
+ orig_h, orig_w = image.shape[:2]
+
+ # HD strategy (mirror of comic-translate): optional RESIZE or CROP before core inpainting
+ if not _skip_hd:
+ try:
+ strategy = getattr(self, 'hd_strategy', 'resize') or 'resize'
+ except Exception:
+ strategy = 'resize'
+ H, W = orig_h, orig_w
+ if strategy == 'resize' and max(H, W) > max(16, int(getattr(self, 'hd_resize_limit', 1536))):
+ limit = max(16, int(getattr(self, 'hd_resize_limit', 1536)))
+ ratio = float(limit) / float(max(H, W))
+ new_w = max(1, int(W * ratio + 0.5))
+ new_h = max(1, int(H * ratio + 0.5))
+ image_small = cv2.resize(image, (new_w, new_h), interpolation=cv2.INTER_LANCZOS4)
+ mask_small = mask if len(mask.shape) == 2 else cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)
+ mask_small = cv2.resize(mask_small, (new_w, new_h), interpolation=cv2.INTER_NEAREST)
+ result_small = self.inpaint(image_small, mask_small, refinement, 0, _skip_hd=True, _skip_tiling=True)
+ result_full = cv2.resize(result_small, (W, H), interpolation=cv2.INTER_LANCZOS4)
+ # Paste only masked area
+ mask_gray = mask_small # already gray but at small size
+ mask_gray = cv2.resize(mask_gray, (W, H), interpolation=cv2.INTER_NEAREST)
+ m = mask_gray > 0
+ out = image.copy()
+ out[m] = result_full[m]
+ return out
+ elif strategy == 'crop' and max(H, W) > max(16, int(getattr(self, 'hd_crop_trigger_size', 1024))):
+ mask_gray0 = mask if len(mask.shape) == 2 else cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)
+ _, thresh = cv2.threshold(mask_gray0, 127, 255, cv2.THRESH_BINARY)
+ contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+ if contours:
+ out = image.copy()
+ margin = max(0, int(getattr(self, 'hd_crop_margin', 16)))
+ for cnt in contours:
+ x, y, w, h = cv2.boundingRect(cnt)
+ l = max(0, x - margin); t = max(0, y - margin)
+ r = min(W, x + w + margin); b = min(H, y + h + margin)
+ if r <= l or b <= t:
+ continue
+ crop_img = image[t:b, l:r]
+ crop_mask = mask_gray0[t:b, l:r]
+ patch = self.inpaint(crop_img, crop_mask, refinement, 0, _skip_hd=True, _skip_tiling=True)
+ out[t:b, l:r] = patch
+ return out
+
+ if len(mask.shape) == 3:
+ mask = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)
+
+ # Apply dilation for anime method
+ if self.current_method == 'anime':
+ kernel = np.ones((7, 7), np.uint8)
+ mask = cv2.dilate(mask, kernel, iterations=1)
+
+ # Use instance tiling settings for ALL models
+ logger.info(f"🔍 Tiling check: enabled={self.tiling_enabled}, tile_size={self.tile_size}, image_size={orig_h}x{orig_w}")
+
+ # If tiling is enabled and image is larger than tile size
+ if (not _skip_tiling) and self.tiling_enabled and (orig_h > self.tile_size or orig_w > self.tile_size):
+ logger.info(f"🔲 Using tiled inpainting: {self.tile_size}x{self.tile_size} tiles with {self.tile_overlap}px overlap")
+ return self._inpaint_tiled(image, mask, self.tile_size, self.tile_overlap, refinement)
+
+ # ONNX inference path
+ if self.use_onnx and self.onnx_session:
+ logger.debug("Using ONNX inference")
+
+ # CRITICAL: Convert BGR (OpenCV default) to RGB (ML model expected)
+ image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+
+ # Check if this is a Carve model
+ is_carve_model = False
+ if hasattr(self, 'current_onnx_path'):
+ is_carve_model = "lama_fp32" in self.current_onnx_path or "carve" in self.current_onnx_path.lower()
+
+ # Handle fixed-size models (resize instead of padding)
+ if hasattr(self, 'onnx_fixed_size') and self.onnx_fixed_size:
+ fixed_h, fixed_w = self.onnx_fixed_size
+ # Resize to fixed size
+ image_resized = cv2.resize(image_rgb, (fixed_w, fixed_h), interpolation=cv2.INTER_LANCZOS4)
+ mask_resized = cv2.resize(mask, (fixed_w, fixed_h), interpolation=cv2.INTER_NEAREST)
+
+ # Prepare inputs based on model type
+ if is_carve_model:
+ # Carve model expects normalized input [0, 1] range
+ logger.debug("Using Carve model normalization [0, 1]")
+ img_np = image_resized.astype(np.float32) / 255.0
+ mask_np = mask_resized.astype(np.float32) / 255.0
+ mask_np = (mask_np > 0.5) * 1.0 # Binary mask
+ elif self.current_method == 'aot' or 'aot' in str(self.current_method).lower():
+ # AOT normalization: [-1, 1] range for image
+ logger.debug("Using AOT model normalization [-1, 1] for image, [0, 1] for mask")
+ img_np = (image_resized.astype(np.float32) / 127.5) - 1.0
+ mask_np = mask_resized.astype(np.float32) / 255.0
+ mask_np = (mask_np > 0.5) * 1.0 # Binary mask
+ img_np = img_np * (1 - mask_np[:, :, np.newaxis]) # Mask out regions
+ else:
+ # Standard LaMa normalization: [0, 1] range
+ logger.debug("Using standard LaMa normalization [0, 1]")
+ img_np = image_resized.astype(np.float32) / 255.0
+ mask_np = mask_resized.astype(np.float32) / 255.0
+ mask_np = (mask_np > 0) * 1.0
+
+ # Convert to NCHW format
+ img_np = img_np.transpose(2, 0, 1)[np.newaxis, ...]
+ mask_np = mask_np[np.newaxis, np.newaxis, ...]
+
+ # Run ONNX inference
+ ort_inputs = {
+ self.onnx_input_names[0]: img_np.astype(np.float32),
+ self.onnx_input_names[1]: mask_np.astype(np.float32)
+ }
+
+ ort_outputs = self.onnx_session.run(self.onnx_output_names, ort_inputs)
+ output = ort_outputs[0]
+
+ # Post-process output based on model type
+ if is_carve_model:
+ # CRITICAL: Carve model outputs values ALREADY in [0, 255] range!
+ # DO NOT multiply by 255 or apply any scaling
+ logger.debug("Carve model output is already in [0, 255] range")
+ raw_output = output[0].transpose(1, 2, 0)
+ logger.debug(f"Carve output stats: min={raw_output.min():.3f}, max={raw_output.max():.3f}, mean={raw_output.mean():.3f}")
+ result = raw_output # Just transpose, no scaling
+ elif self.current_method == 'aot' or 'aot' in str(self.current_method).lower():
+ # AOT: [-1, 1] to [0, 255]
+ result = ((output[0].transpose(1, 2, 0) + 1.0) * 127.5)
+ else:
+ # Standard: [0, 1] to [0, 255]
+ result = output[0].transpose(1, 2, 0) * 255
+
+ result = np.clip(np.round(result), 0, 255).astype(np.uint8)
+ # CRITICAL: Convert RGB (model output) back to BGR (OpenCV expected)
+ result = cv2.cvtColor(result, cv2.COLOR_RGB2BGR)
+
+ # Resize back to original size
+ result = cv2.resize(result, (orig_w, orig_h), interpolation=cv2.INTER_LANCZOS4)
+ self._log_inpaint_diag('onnx-fixed', result, mask)
+
+ else:
+ # Variable-size models (use padding)
+ image_padded, padding = self.pad_img_to_modulo(image_rgb, self.pad_mod)
+ mask_padded, _ = self.pad_img_to_modulo(mask, self.pad_mod)
+
+ # Prepare inputs based on model type
+ if is_carve_model:
+ # Carve model normalization [0, 1]
+ logger.debug("Using Carve model normalization [0, 1]")
+ img_np = image_padded.astype(np.float32) / 255.0
+ mask_np = mask_padded.astype(np.float32) / 255.0
+ mask_np = (mask_np > 0.5) * 1.0
+ elif self.current_method == 'aot' or 'aot' in str(self.current_method).lower():
+ # AOT normalization: [-1, 1] for image
+ logger.debug("Using AOT model normalization [-1, 1] for image, [0, 1] for mask")
+ img_np = (image_padded.astype(np.float32) / 127.5) - 1.0
+ mask_np = mask_padded.astype(np.float32) / 255.0
+ mask_np = (mask_np > 0.5) * 1.0
+ img_np = img_np * (1 - mask_np[:, :, np.newaxis]) # Mask out regions
+ else:
+ # Standard LaMa normalization: [0, 1]
+ logger.debug("Using standard LaMa normalization [0, 1]")
+ img_np = image_padded.astype(np.float32) / 255.0
+ mask_np = mask_padded.astype(np.float32) / 255.0
+ mask_np = (mask_np > 0) * 1.0
+
+ # Convert to NCHW format
+ img_np = img_np.transpose(2, 0, 1)[np.newaxis, ...]
+ mask_np = mask_np[np.newaxis, np.newaxis, ...]
+
+ # Check for stop before inference
+ if self._check_stop():
+ self._log("⏹️ ONNX inference stopped by user", "warning")
+ return image
+
+ # Run ONNX inference
+ ort_inputs = {
+ self.onnx_input_names[0]: img_np.astype(np.float32),
+ self.onnx_input_names[1]: mask_np.astype(np.float32)
+ }
+
+ ort_outputs = self.onnx_session.run(self.onnx_output_names, ort_inputs)
+ output = ort_outputs[0]
+
+ # Post-process output
+ if is_carve_model:
+ # CRITICAL: Carve model outputs values ALREADY in [0, 255] range!
+ logger.debug("Carve model output is already in [0, 255] range")
+ raw_output = output[0].transpose(1, 2, 0)
+ logger.debug(f"Carve output stats: min={raw_output.min():.3f}, max={raw_output.max():.3f}, mean={raw_output.mean():.3f}")
+ result = raw_output # Just transpose, no scaling
+ elif self.current_method == 'aot' or 'aot' in str(self.current_method).lower():
+ result = ((output[0].transpose(1, 2, 0) + 1.0) * 127.5)
+ else:
+ result = output[0].transpose(1, 2, 0) * 255
+
+ result = np.clip(np.round(result), 0, 255).astype(np.uint8)
+ # CRITICAL: Convert RGB (model output) back to BGR (OpenCV expected)
+ result = cv2.cvtColor(result, cv2.COLOR_RGB2BGR)
+
+ # Remove padding
+ result = self.remove_padding(result, padding)
+ self._log_inpaint_diag('onnx-padded', result, mask)
+
+ elif self.is_jit_model:
+ # JIT model processing
+ if self.current_method == 'aot':
+ # Special handling for AOT model
+ logger.debug("Using AOT-specific preprocessing")
+
+ # CRITICAL: Convert BGR (OpenCV) to RGB (AOT model expected)
+ image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+
+ # Pad images to be divisible by mod
+ image_padded, padding = self.pad_img_to_modulo(image_rgb, self.pad_mod)
+ mask_padded, _ = self.pad_img_to_modulo(mask, self.pad_mod)
+
+ # AOT normalization: [-1, 1] range
+ img_torch = torch.from_numpy(image_padded).permute(2, 0, 1).unsqueeze_(0).float() / 127.5 - 1.0
+ mask_torch = torch.from_numpy(mask_padded).unsqueeze_(0).unsqueeze_(0).float() / 255.0
+
+ # Binarize mask for AOT
+ mask_torch[mask_torch < 0.5] = 0
+ mask_torch[mask_torch >= 0.5] = 1
+
+ # Move to device
+ img_torch = img_torch.to(self.device)
+ mask_torch = mask_torch.to(self.device)
+
+ # Optional FP16 on GPU for lower VRAM
+ if self.quantize_enabled and self.use_gpu:
+ try:
+ if self.torch_precision == 'fp16' or self.torch_precision == 'auto':
+ img_torch = img_torch.half()
+ mask_torch = mask_torch.half()
+ except Exception:
+ pass
+
+ # CRITICAL FOR AOT: Apply mask to input image
+ img_torch = img_torch * (1 - mask_torch)
+
+ logger.debug(f"AOT Image shape: {img_torch.shape}, Mask shape: {mask_torch.shape}")
+
+ # Run inference
+ with torch.no_grad():
+ inpainted = self.model(img_torch, mask_torch)
+
+ # Post-process AOT output: denormalize from [-1, 1] to [0, 255]
+ result = ((inpainted.cpu().squeeze_(0).permute(1, 2, 0).numpy() + 1.0) * 127.5)
+ result = np.clip(np.round(result), 0, 255).astype(np.uint8)
+
+ # CRITICAL: Convert RGB (model output) back to BGR (OpenCV expected)
+ result = cv2.cvtColor(result, cv2.COLOR_RGB2BGR)
+
+ # Remove padding
+ result = self.remove_padding(result, padding)
+ self._log_inpaint_diag('jit-aot', result, mask)
+
+ else:
+ # LaMa/Anime model processing
+ logger.debug(f"Using standard processing for {self.current_method}")
+
+ # CRITICAL: Convert BGR (OpenCV) to RGB (LaMa/JIT models expected)
+ image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+
+ # Pad images to be divisible by mod
+ image_padded, padding = self.pad_img_to_modulo(image_rgb, self.pad_mod)
+ mask_padded, _ = self.pad_img_to_modulo(mask, self.pad_mod)
+
+ # CRITICAL: Normalize to [0, 1] range for LaMa models
+ image_norm = image_padded.astype(np.float32) / 255.0
+ mask_norm = mask_padded.astype(np.float32) / 255.0
+
+ # Binary mask (values > 0 become 1)
+ mask_binary = (mask_norm > 0) * 1.0
+
+ # Convert to PyTorch tensors with correct shape
+ # Image should be [B, C, H, W]
+ image_tensor = torch.from_numpy(image_norm).permute(2, 0, 1).unsqueeze(0).float()
+ mask_tensor = torch.from_numpy(mask_binary).unsqueeze(0).unsqueeze(0).float()
+
+ # Move to device
+ image_tensor = image_tensor.to(self.device)
+ mask_tensor = mask_tensor.to(self.device)
+
+ # Optional FP16 on GPU for lower VRAM
+ if self.quantize_enabled and self.use_gpu:
+ try:
+ if self.torch_precision == 'fp16' or self.torch_precision == 'auto':
+ image_tensor = image_tensor.half()
+ mask_tensor = mask_tensor.half()
+ except Exception:
+ pass
+
+ # Debug shapes
+ logger.debug(f"Image tensor shape: {image_tensor.shape}") # Should be [1, 3, H, W]
+ logger.debug(f"Mask tensor shape: {mask_tensor.shape}") # Should be [1, 1, H, W]
+
+ # Ensure spatial dimensions match
+ if image_tensor.shape[2:] != mask_tensor.shape[2:]:
+ logger.warning(f"Spatial dimension mismatch: image {image_tensor.shape[2:]}, mask {mask_tensor.shape[2:]}")
+ # Resize mask to match image
+ mask_tensor = F.interpolate(mask_tensor, size=image_tensor.shape[2:], mode='nearest')
+
+ # Run inference with proper error handling
+ with torch.no_grad():
+ try:
+ # Standard LaMa JIT models expect (image, mask)
+ inpainted = self.model(image_tensor, mask_tensor)
+ except RuntimeError as e:
+ error_str = str(e)
+ logger.error(f"Model inference failed: {error_str}")
+
+ # If tensor size mismatch, log detailed info
+ if "size of tensor" in error_str.lower():
+ logger.error(f"Image shape: {image_tensor.shape}")
+ logger.error(f"Mask shape: {mask_tensor.shape}")
+
+ # Try transposing if needed
+ if "dimension 3" in error_str and "880" in error_str:
+ # This suggests the tensors might be in wrong format
+ # Try different permutation
+ logger.info("Attempting to fix tensor format...")
+
+ # Ensure image is [B, C, H, W] not [B, H, W, C]
+ if image_tensor.shape[1] > 3:
+ image_tensor = image_tensor.permute(0, 3, 1, 2)
+ logger.info(f"Permuted image to: {image_tensor.shape}")
+
+ # Try again
+ inpainted = self.model(image_tensor, mask_tensor)
+ else:
+ # As last resort, try swapped arguments
+ logger.info("Trying swapped arguments (mask, image)...")
+ inpainted = self.model(mask_tensor, image_tensor)
+ else:
+ raise e
+
+ # Process output
+ # Output should be [B, C, H, W]
+ if len(inpainted.shape) == 4:
+ # Remove batch dimension and permute to [H, W, C]
+ result = inpainted[0].permute(1, 2, 0).detach().cpu().numpy()
+ else:
+ # Handle unexpected output shape
+ result = inpainted.detach().cpu().numpy()
+ if len(result.shape) == 3 and result.shape[0] == 3:
+ result = result.transpose(1, 2, 0)
+
+ # Denormalize to 0-255 range
+ result = np.clip(result * 255, 0, 255).astype(np.uint8)
+
+ # CRITICAL: Convert RGB (model output) back to BGR (OpenCV expected)
+ result = cv2.cvtColor(result, cv2.COLOR_RGB2BGR)
+
+ # Remove padding
+ result = self.remove_padding(result, padding)
+ self._log_inpaint_diag('jit-lama', result, mask)
+
+ else:
+ # Original checkpoint model processing (keep as is)
+ h, w = image.shape[:2]
+ size = 768 if self.current_method == 'anime' else 512
+
+ img_resized = cv2.resize(image, (size, size), interpolation=cv2.INTER_LANCZOS4)
+ mask_resized = cv2.resize(mask, (size, size), interpolation=cv2.INTER_NEAREST)
+
+ img_norm = img_resized.astype(np.float32) / 127.5 - 1
+ mask_norm = mask_resized.astype(np.float32) / 255.0
+
+ img_tensor = torch.from_numpy(img_norm).permute(2, 0, 1).unsqueeze(0).float()
+ mask_tensor = torch.from_numpy(mask_norm).unsqueeze(0).unsqueeze(0).float()
+
+ if self.use_gpu and self.device:
+ img_tensor = img_tensor.to(self.device)
+ mask_tensor = mask_tensor.to(self.device)
+
+ with torch.no_grad():
+ output = self.model(img_tensor, mask_tensor)
+
+ result = output.squeeze(0).permute(1, 2, 0).cpu().numpy()
+ result = ((result + 1) * 127.5).clip(0, 255).astype(np.uint8)
+ result = cv2.resize(result, (w, h), interpolation=cv2.INTER_LANCZOS4)
+ self._log_inpaint_diag('ckpt', result, mask)
+
+ # Ensure result matches original size exactly
+ if result.shape[:2] != (orig_h, orig_w):
+ result = cv2.resize(result, (orig_w, orig_h), interpolation=cv2.INTER_LANCZOS4)
+
+ # Apply refinement blending if requested
+ if refinement != 'fast':
+ # Ensure mask is same size as result
+ if mask.shape[:2] != (orig_h, orig_w):
+ mask = cv2.resize(mask, (orig_w, orig_h), interpolation=cv2.INTER_NEAREST)
+
+ mask_3ch = cv2.cvtColor(mask, cv2.COLOR_GRAY2BGR) / 255.0
+ kernel = cv2.getGaussianKernel(21, 5)
+ kernel = kernel @ kernel.T
+ mask_blur = cv2.filter2D(mask_3ch, -1, kernel)
+ result = (result * mask_blur + image * (1 - mask_blur)).astype(np.uint8)
+
+ # No-op detection and auto-retry
+ try:
+ if self._is_noop(image, result, mask):
+ if _retry_attempt == 0:
+ logger.warning("⚠️ Inpainting produced no visible change; retrying with slight mask dilation and fast refinement")
+ kernel = np.ones((3, 3), np.uint8)
+ expanded_mask = cv2.dilate(mask, kernel, iterations=1)
+ return self.inpaint(image, expanded_mask, refinement='fast', _retry_attempt=1)
+ elif _retry_attempt == 1:
+ logger.warning("⚠️ Still no visible change after retry; attempting a second dilation and fast refinement")
+ kernel = np.ones((5, 5), np.uint8)
+ expanded_mask2 = cv2.dilate(mask, kernel, iterations=1)
+ return self.inpaint(image, expanded_mask2, refinement='fast', _retry_attempt=2)
+ else:
+ logger.warning("⚠️ No further retries; returning last result without fallback")
+ except Exception as e:
+ logger.debug(f"No-op detection step failed: {e}")
+
+ logger.info("✅ Inpainted successfully!")
+
+ # Force garbage collection to reduce memory spikes
+ try:
+ import gc
+ gc.collect()
+ # Clear CUDA cache if using GPU
+ if torch is not None and hasattr(torch, 'cuda') and torch.cuda.is_available():
+ torch.cuda.empty_cache()
+ except Exception:
+ pass
+
+ time.sleep(0.1) # Brief pause for stability
+ logger.debug("💤 Inpainting completion pausing briefly for stability")
+ return result
+
+ except Exception as e:
+ logger.error(f"❌ Inpainting failed: {e}")
+ logger.error(traceback.format_exc())
+
+ # Return original image on failure
+ logger.warning("Returning original image due to error")
+ return image
+
+ def inpaint_with_prompt(self, image, mask, prompt=None):
+ """Compatibility method"""
+ return self.inpaint(image, mask)
+
+ def batch_inpaint(self, images, masks):
+ """Batch inpainting"""
+ return [self.inpaint(img, mask) for img, mask in zip(images, masks)]
+
+ def load_bubble_model(self, model_path: str) -> bool:
+ """Load bubble detection model"""
+ if not BUBBLE_DETECTOR_AVAILABLE:
+ logger.warning("Bubble detector not available")
+ return False
+
+ if self.bubble_detector is None:
+ self.bubble_detector = BubbleDetector()
+
+ if self.bubble_detector.load_model(model_path):
+ self.bubble_model_loaded = True
+ self.config['bubble_model_path'] = model_path
+ self._save_config()
+ logger.info("✅ Bubble detection model loaded")
+ return True
+
+ return False
+
+ def detect_bubbles(self, image_path: str, confidence: float = 0.5) -> List[Tuple[int, int, int, int]]:
+ """Detect speech bubbles in image"""
+ if not self.bubble_model_loaded or self.bubble_detector is None:
+ logger.warning("No bubble model loaded")
+ return []
+
+ return self.bubble_detector.detect_bubbles(image_path, confidence=confidence)
+
+ def create_bubble_mask(self, image: np.ndarray, bubbles: List[Tuple[int, int, int, int]],
+ expand_pixels: int = 5) -> np.ndarray:
+ """Create mask from detected bubbles"""
+ h, w = image.shape[:2]
+ mask = np.zeros((h, w), dtype=np.uint8)
+
+ for x, y, bw, bh in bubbles:
+ x1 = max(0, x - expand_pixels)
+ y1 = max(0, y - expand_pixels)
+ x2 = min(w, x + bw + expand_pixels)
+ y2 = min(h, y + bh + expand_pixels)
+
+ cv2.rectangle(mask, (x1, y1), (x2, y2), 255, -1)
+
+ return mask
+
+ def inpaint_with_bubble_detection(self, image_path: str, confidence: float = 0.5,
+ expand_pixels: int = 5, refinement: str = 'normal') -> np.ndarray:
+ """Inpaint using automatic bubble detection"""
+ image = cv2.imread(image_path)
+ if image is None:
+ logger.error(f"Failed to load image: {image_path}")
+ return None
+
+ bubbles = self.detect_bubbles(image_path, confidence)
+ if not bubbles:
+ logger.warning("No bubbles detected")
+ return image
+
+ logger.info(f"Detected {len(bubbles)} bubbles")
+
+ mask = self.create_bubble_mask(image, bubbles, expand_pixels)
+ result = self.inpaint(image, mask, refinement)
+
+ return result
+
+ def batch_inpaint_with_bubbles(self, image_paths: List[str], **kwargs) -> List[np.ndarray]:
+ """Batch inpaint multiple images with bubble detection"""
+ results = []
+
+ for i, image_path in enumerate(image_paths):
+ logger.info(f"Processing image {i+1}/{len(image_paths)}")
+ result = self.inpaint_with_bubble_detection(image_path, **kwargs)
+ results.append(result)
+
+ return results
+
+
+# Compatibility classes - MAINTAIN ALL ORIGINAL CLASSES
+class LaMaModel(FFCInpaintModel):
+ pass
+
+class MATModel(FFCInpaintModel):
+ pass
+
+class AOTModel(FFCInpaintModel):
+ pass
+
+class SDInpaintModel(FFCInpaintModel):
+ pass
+
+class AnimeMangaInpaintModel(FFCInpaintModel):
+ pass
+
+class LaMaOfficialModel(FFCInpaintModel):
+ pass
+
+
+class HybridInpainter:
+ """Hybrid inpainter for compatibility"""
+
+ def __init__(self):
+ self.inpainters = {}
+
+ def add_method(self, name, method, model_path):
+ """Add a method - maintains compatibility"""
+ try:
+ inpainter = LocalInpainter()
+ if inpainter.load_model(method, model_path):
+ self.inpainters[name] = inpainter
+ return True
+ except:
+ pass
+ return False
+
+ def inpaint_ensemble(self, image: np.ndarray, mask: np.ndarray,
+ weights: Dict[str, float] = None) -> np.ndarray:
+ """Ensemble inpainting"""
+ if not self.inpainters:
+ logger.error("No inpainters loaded")
+ return image
+
+ if weights is None:
+ weights = {name: 1.0 / len(self.inpainters) for name in self.inpainters}
+
+ results = []
+ for name, inpainter in self.inpainters.items():
+ result = inpainter.inpaint(image, mask)
+ weight = weights.get(name, 1.0 / len(self.inpainters))
+ results.append(result * weight)
+
+ ensemble = np.sum(results, axis=0).astype(np.uint8)
+ return ensemble
+
+
+# Helper function for quick setup
+def setup_inpainter_for_manga(auto_download=True):
+ """Quick setup for manga inpainting"""
+ inpainter = LocalInpainter()
+
+ if auto_download:
+ # Try to download anime JIT model
+ jit_path = inpainter.download_jit_model('anime')
+ if jit_path:
+ inpainter.load_model('anime', jit_path)
+ logger.info("✅ Manga inpainter ready with JIT model")
+
+ return inpainter
+
+
+if __name__ == "__main__":
+ import sys
+
+ if len(sys.argv) > 1:
+ if sys.argv[1] == "download_jit":
+ # Download JIT models
+ inpainter = LocalInpainter()
+ for method in ['lama', 'anime', 'lama_official']:
+ print(f"\nDownloading {method}...")
+ path = inpainter.download_jit_model(method)
+ if path:
+ print(f" ✅ Downloaded to: {path}")
+
+ elif len(sys.argv) > 2:
+ # Test with model
+ inpainter = LocalInpainter()
+ inpainter.load_model('lama', sys.argv[1])
+ print("Model loaded - check logs for details")
+
+ else:
+ print("\nLocal Inpainter - Compatible Version")
+ print("=====================================")
+ print("\nSupports both:")
+ print(" - JIT models (.pt) - RECOMMENDED")
+ print(" - Checkpoint files (.ckpt) - With warnings")
+ print("\nTo download JIT models:")
+ print(" python local_inpainter.py download_jit")
+ print("\nTo test:")
+ print(" python local_inpainter.py ")
\ No newline at end of file
diff --git a/manga_integration.py b/manga_integration.py
new file mode 100644
index 0000000000000000000000000000000000000000..81c41f48ddd5fa88da4866046d04a1d1dc30da65
--- /dev/null
+++ b/manga_integration.py
@@ -0,0 +1,8390 @@
+# manga_integration.py
+"""
+Enhanced GUI Integration module for Manga Translation with text visibility controls
+Integrates with TranslatorGUI using WindowManager and existing infrastructure
+Now includes full page context mode with customizable prompt
+"""
+import sys
+import os
+import json
+import threading
+import time
+import hashlib
+import traceback
+import concurrent.futures
+from PySide6.QtWidgets import (QWidget, QLabel, QFrame, QPushButton, QVBoxLayout, QHBoxLayout,
+ QGroupBox, QListWidget, QComboBox, QLineEdit, QCheckBox,
+ QRadioButton, QSlider, QSpinBox, QDoubleSpinBox, QTextEdit,
+ QProgressBar, QFileDialog, QMessageBox, QColorDialog, QScrollArea,
+ QDialog, QButtonGroup, QApplication)
+from PySide6.QtCore import Qt, QTimer, Signal, QObject, Slot, QEvent
+from PySide6.QtGui import QFont, QColor, QTextCharFormat, QIcon, QKeyEvent
+import tkinter as tk
+from tkinter import ttk, filedialog as tk_filedialog, messagebox as tk_messagebox, scrolledtext
+try:
+ import ttkbootstrap as tb
+except ImportError:
+ tb = ttk
+from typing import List, Dict, Optional, Any
+from queue import Queue
+import logging
+from manga_translator import MangaTranslator, GOOGLE_CLOUD_VISION_AVAILABLE
+from manga_settings_dialog import MangaSettingsDialog
+
+
+# Try to import UnifiedClient for API initialization
+try:
+ from unified_api_client import UnifiedClient
+except ImportError:
+ UnifiedClient = None
+
+# Module-level function for multiprocessing (must be picklable)
+def _preload_models_worker(models_list, progress_queue):
+ """Worker function to preload models in separate process (module-level for pickling)"""
+ try:
+ total_steps = len(models_list)
+
+ for idx, (model_type, model_key, model_name, model_path) in enumerate(models_list):
+ try:
+ # Send start progress
+ base_progress = int((idx / total_steps) * 100)
+ progress_queue.put(('progress', base_progress, model_name))
+
+ if model_type == 'detector':
+ from bubble_detector import BubbleDetector
+ from manga_translator import MangaTranslator
+
+ # Progress: 0-25% of this model's portion
+ progress_queue.put(('progress', base_progress + int(25 / total_steps), f"{model_name} - Initializing"))
+ bd = BubbleDetector()
+
+ # Progress: 25-75% - loading model
+ progress_queue.put(('progress', base_progress + int(50 / total_steps), f"{model_name} - Downloading/Loading"))
+ if model_key == 'rtdetr_onnx':
+ model_repo = model_path if model_path else 'ogkalu/comic-text-and-bubble-detector'
+ bd.load_rtdetr_onnx_model(model_repo)
+ elif model_key == 'rtdetr':
+ bd.load_rtdetr_model()
+ elif model_key == 'yolo':
+ if model_path:
+ bd.load_model(model_path)
+
+ # Progress: 75-100% - finalizing
+ progress_queue.put(('progress', base_progress + int(75 / total_steps), f"{model_name} - Finalizing"))
+ progress_queue.put(('loaded', model_type, model_name))
+
+ elif model_type == 'inpainter':
+ from local_inpainter import LocalInpainter
+
+ # Progress: 0-25%
+ progress_queue.put(('progress', base_progress + int(25 / total_steps), f"{model_name} - Initializing"))
+ inp = LocalInpainter()
+ resolved_path = model_path
+
+ if not resolved_path or not os.path.exists(resolved_path):
+ # Progress: 25-50% - downloading
+ progress_queue.put(('progress', base_progress + int(40 / total_steps), f"{model_name} - Downloading"))
+ try:
+ resolved_path = inp.download_jit_model(model_key)
+ except:
+ resolved_path = None
+
+ if resolved_path and os.path.exists(resolved_path):
+ # Progress: 50-90% - loading
+ progress_queue.put(('progress', base_progress + int(60 / total_steps), f"{model_name} - Loading model"))
+ success = inp.load_model_with_retry(model_key, resolved_path)
+
+ # Progress: 90-100% - finalizing
+ progress_queue.put(('progress', base_progress + int(85 / total_steps), f"{model_name} - Finalizing"))
+ if success:
+ progress_queue.put(('loaded', model_type, model_name))
+
+ except Exception as e:
+ progress_queue.put(('error', model_name, str(e)))
+
+ # Send completion signal
+ progress_queue.put(('complete', None, None))
+
+ except Exception as e:
+ progress_queue.put(('error', 'Process', str(e)))
+
+class _MangaGuiLogHandler(logging.Handler):
+ """Forward logging records into MangaTranslationTab._log."""
+ def __init__(self, gui_ref, level=logging.INFO):
+ super().__init__(level)
+ self.gui_ref = gui_ref
+ self._last_msg = None
+ self.setFormatter(logging.Formatter('%(levelname)s:%(name)s:%(message)s'))
+
+ def emit(self, record: logging.LogRecord) -> None:
+ # Avoid looping/duplicates from this module's own messages or when stdio is redirected
+ try:
+ if getattr(self.gui_ref, '_stdio_redirect_active', False):
+ return
+ # Filter out manga_translator, bubble_detector, local_inpainter logs as they're already shown
+ if record and isinstance(record.name, str):
+ if record.name.startswith(('manga_integration', 'manga_translator', 'bubble_detector', 'local_inpainter', 'unified_api_client', 'google_genai', 'httpx')):
+ return
+ except Exception:
+ pass
+ try:
+ msg = self.format(record)
+ except Exception:
+ msg = record.getMessage()
+ # Deduplicate identical consecutive messages
+ if msg == self._last_msg:
+ return
+ self._last_msg = msg
+
+ # Map logging levels to our tag levels
+ lvl = record.levelname.lower()
+ tag = 'info'
+ if lvl.startswith('warn'):
+ tag = 'warning'
+ elif lvl.startswith('err') or lvl.startswith('crit'):
+ tag = 'error'
+ elif lvl.startswith('debug'):
+ tag = 'debug'
+ elif lvl.startswith('info'):
+ tag = 'info'
+
+ # Always store to persistent log (even if GUI is closed)
+ try:
+ with MangaTranslationTab._persistent_log_lock:
+ if len(MangaTranslationTab._persistent_log) >= 1000:
+ MangaTranslationTab._persistent_log.pop(0)
+ MangaTranslationTab._persistent_log.append((msg, tag))
+ except Exception:
+ pass
+
+ # Also try to display in GUI if it exists
+ try:
+ if hasattr(self.gui_ref, '_log'):
+ self.gui_ref._log(msg, tag)
+ except Exception:
+ pass
+
+class _StreamToGuiLog:
+ """A minimal file-like stream that forwards lines to _log."""
+ def __init__(self, write_cb):
+ self._write_cb = write_cb
+ self._buf = ''
+
+ def write(self, s: str):
+ try:
+ self._buf += s
+ while '\n' in self._buf:
+ line, self._buf = self._buf.split('\n', 1)
+ if line.strip():
+ self._write_cb(line)
+ except Exception:
+ pass
+
+ def flush(self):
+ try:
+ if self._buf.strip():
+ self._write_cb(self._buf)
+ self._buf = ''
+ except Exception:
+ pass
+
+class MangaTranslationTab:
+ """GUI interface for manga translation integrated with TranslatorGUI"""
+
+ # Class-level cancellation flag for all instances
+ _global_cancelled = False
+ _global_cancel_lock = threading.RLock()
+
+ # Class-level log storage to persist across window closures
+ _persistent_log = []
+ _persistent_log_lock = threading.RLock()
+
+ # Class-level preload tracking to prevent duplicate loading
+ _preload_in_progress = False
+ _preload_lock = threading.RLock()
+ _preload_completed_models = set() # Track which models have been loaded
+
+ @classmethod
+ def set_global_cancellation(cls, cancelled: bool):
+ """Set global cancellation flag for all translation instances"""
+ with cls._global_cancel_lock:
+ cls._global_cancelled = cancelled
+
+ @classmethod
+ def is_globally_cancelled(cls) -> bool:
+ """Check if globally cancelled"""
+ with cls._global_cancel_lock:
+ return cls._global_cancelled
+
+ def __init__(self, parent_widget, main_gui, dialog, scroll_area=None):
+ """Initialize manga translation interface
+
+ Args:
+ parent_widget: The content widget for the interface (PySide6 QWidget)
+ main_gui: Reference to TranslatorGUI instance
+ dialog: The dialog window (PySide6 QDialog)
+ scroll_area: The scroll area widget (PySide6 QScrollArea, optional)
+ """
+ # CRITICAL: Set thread limits FIRST before any imports or processing
+ import os
+ parallel_enabled = main_gui.config.get('manga_settings', {}).get('advanced', {}).get('parallel_processing', False)
+ if not parallel_enabled:
+ # Force single-threaded mode for all libraries
+ os.environ['OMP_NUM_THREADS'] = '1'
+ os.environ['MKL_NUM_THREADS'] = '1'
+ os.environ['OPENBLAS_NUM_THREADS'] = '1'
+ os.environ['NUMEXPR_NUM_THREADS'] = '1'
+ os.environ['VECLIB_MAXIMUM_THREADS'] = '1'
+ os.environ['ONNXRUNTIME_NUM_THREADS'] = '1'
+ # Also set torch and cv2 thread limits if already imported
+ try:
+ import torch
+ torch.set_num_threads(1)
+ except (ImportError, RuntimeError):
+ pass
+ try:
+ import cv2
+ cv2.setNumThreads(1)
+ except (ImportError, AttributeError):
+ pass
+
+ self.parent_widget = parent_widget
+ self.main_gui = main_gui
+ self.dialog = dialog
+ self.scroll_area = scroll_area
+
+ # Translation state
+ self.translator = None
+ self.is_running = False
+ self.stop_flag = threading.Event()
+ self.translation_thread = None
+ self.translation_future = None
+ # Shared executor from main GUI if available
+ try:
+ if hasattr(self.main_gui, 'executor') and self.main_gui.executor:
+ self.executor = self.main_gui.executor
+ else:
+ self.executor = None
+ except Exception:
+ self.executor = None
+ self.selected_files = []
+ self.current_file_index = 0
+ self.font_mapping = {} # Initialize font mapping dictionary
+
+
+ # Progress tracking
+ self.total_files = 0
+ self.completed_files = 0
+ self.failed_files = 0
+ self.qwen2vl_model_size = self.main_gui.config.get('qwen2vl_model_size', '1')
+
+ # Advanced performance toggles
+ try:
+ adv_cfg = self.main_gui.config.get('manga_settings', {}).get('advanced', {})
+ except Exception:
+ adv_cfg = {}
+ # In singleton mode, reduce OpenCV thread usage to avoid CPU spikes
+ try:
+ if bool(adv_cfg.get('use_singleton_models', False)):
+ import cv2 as _cv2
+ try:
+ _cv2.setNumThreads(1)
+ except Exception:
+ pass
+ except Exception:
+ pass
+ # Do NOT preload big local models by default to avoid startup crashes
+ self.preload_local_models_on_open = bool(adv_cfg.get('preload_local_models_on_open', False))
+
+ # Queue for thread-safe GUI updates
+ self.update_queue = Queue()
+
+ # Flags for stdio redirection to avoid duplicate GUI logs
+ self._stdout_redirect_on = False
+ self._stderr_redirect_on = False
+ self._stdio_redirect_active = False
+
+ # Flag to prevent saving during initialization
+ self._initializing = True
+
+ # IMPORTANT: Load settings BEFORE building interface
+ # This ensures all variables are initialized before they're used in the GUI
+ self._load_rendering_settings()
+
+ # Initialize the full page context prompt
+ self.full_page_context_prompt = (
+ "You will receive multiple text segments from a manga page, each prefixed with an index like [0], [1], etc. "
+ "Translate each segment considering the context of all segments together. "
+ "Maintain consistency in character names, tone, and style across all translations.\n\n"
+ "CRITICAL: Return your response as a valid JSON object where each key includes BOTH the index prefix "
+ "AND the original text EXACTLY as provided (e.g., '[0] こんにちは'), and each value is the translation.\n"
+ "This is essential for correct mapping - do not modify or omit the index prefixes!\n\n"
+ "Make sure to properly escape any special characters in the JSON:\n"
+ "- Use \\n for newlines\n"
+ "- Use \\\" for quotes\n"
+ "- Use \\\\ for backslashes\n\n"
+ "Example:\n"
+ '{\n'
+ ' "[0] こんにちは": "Hello",\n'
+ ' "[1] ありがとう": "Thank you",\n'
+ ' "[2] さようなら": "Goodbye"\n'
+ '}\n\n'
+ 'REMEMBER: Keep the [index] prefix in each JSON key exactly as shown in the input!'
+ )
+
+ # Initialize the OCR system prompt
+ self.ocr_prompt = self.main_gui.config.get('manga_ocr_prompt',
+ "YOU ARE AN OCR SYSTEM. YOUR ONLY JOB IS TEXT EXTRACTION.\n\n"
+ "CRITICAL RULES:\n"
+ "1. DO NOT TRANSLATE ANYTHING\n"
+ "2. DO NOT MODIFY THE TEXT\n"
+ "3. DO NOT EXPLAIN OR COMMENT\n"
+ "4. ONLY OUTPUT THE EXACT TEXT YOU SEE\n"
+ "5. PRESERVE NATURAL TEXT FLOW - DO NOT ADD UNNECESSARY LINE BREAKS\n\n"
+ "If you see Korean text, output it in Korean.\n"
+ "If you see Japanese text, output it in Japanese.\n"
+ "If you see Chinese text, output it in Chinese.\n"
+ "If you see English text, output it in English.\n\n"
+ "IMPORTANT: Only use line breaks where they naturally occur in the original text "
+ "(e.g., between dialogue lines or paragraphs). Do not break text mid-sentence or "
+ "between every word/character.\n\n"
+ "For vertical text common in manga/comics, transcribe it as a continuous line unless "
+ "there are clear visual breaks.\n\n"
+ "NEVER translate. ONLY extract exactly what is written.\n"
+ "Output ONLY the raw text, nothing else."
+ )
+
+ # flag to skip status checks during init
+ self._initializing_gui = True
+
+ # Build interface AFTER loading settings
+ self._build_interface()
+
+ # Now allow status checks
+ self._initializing_gui = False
+
+ # Do one status check after everything is built
+ # Use QTimer for PySide6 dialog
+ QTimer.singleShot(100, self._check_provider_status)
+
+ # Start model preloading in background
+ QTimer.singleShot(200, self._start_model_preloading)
+
+ # Now that everything is initialized, allow saving
+ self._initializing = False
+
+ # Attach logging bridge so library logs appear in our log area
+ self._attach_logging_bridge()
+
+ # Start update loop
+ self._process_updates()
+
+ # Install event filter for F11 fullscreen toggle
+ self._install_fullscreen_handler()
+
+ def _is_stop_requested(self) -> bool:
+ """Check if stop has been requested using multiple sources"""
+ # Check global cancellation first
+ if self.is_globally_cancelled():
+ return True
+
+ # Check local stop flag
+ if hasattr(self, 'stop_flag') and self.stop_flag.is_set():
+ return True
+
+ # Check running state
+ if hasattr(self, 'is_running') and not self.is_running:
+ return True
+
+ return False
+
+ def _reset_global_cancellation(self):
+ """Reset all global cancellation flags for new translation"""
+ # Reset local class flag
+ self.set_global_cancellation(False)
+
+ # Reset MangaTranslator class flag
+ try:
+ from manga_translator import MangaTranslator
+ MangaTranslator.set_global_cancellation(False)
+ except ImportError:
+ pass
+
+ # Reset UnifiedClient flag
+ try:
+ from unified_api_client import UnifiedClient
+ UnifiedClient.set_global_cancellation(False)
+ except ImportError:
+ pass
+
+ def reset_stop_flags(self):
+ """Reset all stop flags when starting new translation"""
+ self.is_running = False
+ if hasattr(self, 'stop_flag'):
+ self.stop_flag.clear()
+ self._reset_global_cancellation()
+ self._log("🔄 Stop flags reset for new translation", "debug")
+
+ def _install_fullscreen_handler(self):
+ """Install event filter to handle F11 key for fullscreen toggle"""
+ if not self.dialog:
+ return
+
+ # Create event filter for the dialog
+ class FullscreenEventFilter(QObject):
+ def __init__(self, dialog_ref):
+ super().__init__()
+ self.dialog = dialog_ref
+ self.is_fullscreen = False
+ self.normal_geometry = None
+
+ def eventFilter(self, obj, event):
+ if event.type() == QEvent.KeyPress:
+ key_event = event
+ if key_event.key() == Qt.Key_F11:
+ self.toggle_fullscreen()
+ return True
+ return False
+
+ def toggle_fullscreen(self):
+ if self.is_fullscreen:
+ # Exit fullscreen
+ self.dialog.setWindowState(self.dialog.windowState() & ~Qt.WindowFullScreen)
+ if self.normal_geometry:
+ self.dialog.setGeometry(self.normal_geometry)
+ self.is_fullscreen = False
+ else:
+ # Enter fullscreen
+ self.normal_geometry = self.dialog.geometry()
+ self.dialog.setWindowState(self.dialog.windowState() | Qt.WindowFullScreen)
+ self.is_fullscreen = True
+
+ # Create and install the event filter
+ self._fullscreen_filter = FullscreenEventFilter(self.dialog)
+ self.dialog.installEventFilter(self._fullscreen_filter)
+
+ def _distribute_stop_flags(self):
+ """Distribute stop flags to all manga translation components"""
+ if not hasattr(self, 'translator') or not self.translator:
+ return
+
+ # Set stop flag on translator
+ if hasattr(self.translator, 'set_stop_flag'):
+ self.translator.set_stop_flag(self.stop_flag)
+
+ # Set stop flag on OCR manager and all providers
+ if hasattr(self.translator, 'ocr_manager') and self.translator.ocr_manager:
+ if hasattr(self.translator.ocr_manager, 'set_stop_flag'):
+ self.translator.ocr_manager.set_stop_flag(self.stop_flag)
+
+ # Set stop flag on bubble detector if available
+ if hasattr(self.translator, 'bubble_detector') and self.translator.bubble_detector:
+ if hasattr(self.translator.bubble_detector, 'set_stop_flag'):
+ self.translator.bubble_detector.set_stop_flag(self.stop_flag)
+
+ # Set stop flag on local inpainter if available
+ if hasattr(self.translator, 'local_inpainter') and self.translator.local_inpainter:
+ if hasattr(self.translator.local_inpainter, 'set_stop_flag'):
+ self.translator.local_inpainter.set_stop_flag(self.stop_flag)
+
+ # Also try to set on thread-local components if accessible
+ if hasattr(self.translator, '_thread_local'):
+ thread_local = self.translator._thread_local
+ # Set on thread-local bubble detector
+ if hasattr(thread_local, 'bubble_detector') and thread_local.bubble_detector:
+ if hasattr(thread_local.bubble_detector, 'set_stop_flag'):
+ thread_local.bubble_detector.set_stop_flag(self.stop_flag)
+
+ # Set on thread-local inpainters
+ if hasattr(thread_local, 'local_inpainters') and isinstance(thread_local.local_inpainters, dict):
+ for inpainter in thread_local.local_inpainters.values():
+ if hasattr(inpainter, 'set_stop_flag'):
+ inpainter.set_stop_flag(self.stop_flag)
+
+ self._log("🔄 Stop flags distributed to all components", "debug")
+
+ def _preflight_bubble_detector(self, ocr_settings: dict) -> bool:
+ """Check if bubble detector is preloaded in the pool or already loaded.
+ Returns True if a ready instance or preloaded spare is available; no heavy loads are performed here.
+ """
+ try:
+ import time as _time
+ start = _time.time()
+ if not ocr_settings.get('bubble_detection_enabled', False):
+ return False
+ det_type = ocr_settings.get('detector_type', 'rtdetr_onnx')
+ model_id = ocr_settings.get('rtdetr_model_url') or ocr_settings.get('bubble_model_path') or ''
+
+ # 1) If translator already has a ready detector, report success
+ try:
+ bd = getattr(self, 'translator', None) and getattr(self.translator, 'bubble_detector', None)
+ if bd and (getattr(bd, 'rtdetr_loaded', False) or getattr(bd, 'rtdetr_onnx_loaded', False) or getattr(bd, 'model_loaded', False)):
+ self._log("🤖 Bubble detector already loaded", "debug")
+ return True
+ except Exception:
+ pass
+
+ # 2) Check shared preload pool for spares
+ try:
+ from manga_translator import MangaTranslator
+ key = (det_type, model_id)
+ with MangaTranslator._detector_pool_lock:
+ rec = MangaTranslator._detector_pool.get(key)
+ spares = (rec or {}).get('spares') or []
+ if len(spares) > 0:
+ self._log(f"🤖 Preflight: found {len(spares)} preloaded bubble detector spare(s) for key={key}", "info")
+ return True
+ except Exception:
+ pass
+
+ # 3) No spares/ready detector yet; do not load here. Just report timing and return False.
+ elapsed = _time.time() - start
+ self._log(f"⏱️ Preflight checked bubble detector pool in {elapsed:.2f}s — no ready instance", "debug")
+ return False
+ except Exception:
+ return False
+
+ def _start_model_preloading(self):
+ """Start preloading models in separate process for true background loading"""
+ from multiprocessing import Process, Queue as MPQueue
+ import queue
+
+ # Check if preload is already in progress
+ with MangaTranslationTab._preload_lock:
+ if MangaTranslationTab._preload_in_progress:
+ print("Model preloading already in progress, skipping...")
+ return
+
+ # Get settings
+ manga_settings = self.main_gui.config.get('manga_settings', {})
+ ocr_settings = manga_settings.get('ocr', {})
+ inpaint_settings = manga_settings.get('inpainting', {})
+
+ models_to_load = []
+ bubble_detection_enabled = ocr_settings.get('bubble_detection_enabled', False)
+ skip_inpainting = self.main_gui.config.get('manga_skip_inpainting', False)
+ inpainting_method = inpaint_settings.get('method', 'local')
+ inpainting_enabled = not skip_inpainting and inpainting_method == 'local'
+
+ # Check if models need loading
+ try:
+ from manga_translator import MangaTranslator
+
+ if bubble_detection_enabled:
+ detector_type = ocr_settings.get('detector_type', 'rtdetr_onnx')
+ model_url = ocr_settings.get('rtdetr_model_url') or ocr_settings.get('bubble_model_path') or ''
+ key = (detector_type, model_url)
+ model_id = f"detector_{detector_type}_{model_url}"
+
+ # Skip if already loaded in this session
+ if model_id not in MangaTranslationTab._preload_completed_models:
+ with MangaTranslator._detector_pool_lock:
+ rec = MangaTranslator._detector_pool.get(key)
+ if not rec or (not rec.get('spares') and not rec.get('loaded')):
+ detector_name = 'RT-DETR ONNX' if detector_type == 'rtdetr_onnx' else 'RT-DETR' if detector_type == 'rtdetr' else 'YOLO'
+ models_to_load.append(('detector', detector_type, detector_name, model_url))
+
+ if inpainting_enabled:
+ # Check top-level config first (manga_local_inpaint_model), then nested config
+ local_method = self.main_gui.config.get('manga_local_inpaint_model',
+ inpaint_settings.get('local_method', 'anime_onnx'))
+ model_path = self.main_gui.config.get(f'manga_{local_method}_model_path', '')
+ # Fallback to non-prefixed key if not found
+ if not model_path:
+ model_path = self.main_gui.config.get(f'{local_method}_model_path', '')
+ key = (local_method, model_path or '')
+ model_id = f"inpainter_{local_method}_{model_path}"
+
+ # Skip if already loaded in this session
+ if model_id not in MangaTranslationTab._preload_completed_models:
+ with MangaTranslator._inpaint_pool_lock:
+ rec = MangaTranslator._inpaint_pool.get(key)
+ if not rec or (not rec.get('loaded') and not rec.get('spares')):
+ models_to_load.append(('inpainter', local_method, local_method.capitalize(), model_path))
+ except Exception as e:
+ print(f"Error checking models: {e}")
+ return
+
+ if not models_to_load:
+ return
+
+ # Set preload in progress flag
+ with MangaTranslationTab._preload_lock:
+ MangaTranslationTab._preload_in_progress = True
+
+ # Show progress bar
+ self.preload_progress_frame.setVisible(True)
+
+ # Create queue for IPC
+ progress_queue = MPQueue()
+
+ # Start loading in separate process using module-level function
+ load_process = Process(target=_preload_models_worker, args=(models_to_load, progress_queue), daemon=True)
+ load_process.start()
+
+ # Store models being loaded for tracking
+ models_being_loaded = []
+ for model_type, model_key, model_name, model_path in models_to_load:
+ if model_type == 'detector':
+ models_being_loaded.append(f"detector_{model_key}_{model_path}")
+ elif model_type == 'inpainter':
+ models_being_loaded.append(f"inpainter_{model_key}_{model_path}")
+
+ # Monitor progress with QTimer
+ def check_progress():
+ try:
+ while True:
+ try:
+ msg = progress_queue.get_nowait()
+ msg_type = msg[0]
+
+ if msg_type == 'progress':
+ _, progress, model_name = msg
+ self.preload_progress_bar.setValue(progress)
+ self.preload_status_label.setText(f"Loading {model_name}...")
+
+ elif msg_type == 'loaded':
+ _, model_type, model_name = msg
+ print(f"✓ Loaded {model_name}")
+
+ elif msg_type == 'error':
+ _, model_name, error = msg
+ print(f"✗ Failed to load {model_name}: {error}")
+
+ elif msg_type == 'complete':
+ # Child process cached models
+ self.preload_progress_bar.setValue(100)
+ self.preload_status_label.setText("✓ Models ready")
+
+ # Mark all models as completed
+ with MangaTranslationTab._preload_lock:
+ MangaTranslationTab._preload_completed_models.update(models_being_loaded)
+ MangaTranslationTab._preload_in_progress = False
+
+ # Load RT-DETR into pool in background (doesn't block GUI)
+ def load_rtdetr_bg():
+ try:
+ from manga_translator import MangaTranslator
+ from bubble_detector import BubbleDetector
+
+ for model_type, model_key, model_name, model_path in models_to_load:
+ if model_type == 'detector' and model_key == 'rtdetr_onnx':
+ key = (model_key, model_path)
+
+ # Check if already loaded
+ with MangaTranslator._detector_pool_lock:
+ rec = MangaTranslator._detector_pool.get(key)
+ if rec and rec.get('spares'):
+ print(f"⏭️ {model_name} already in pool")
+ continue
+
+ # Load into pool
+ bd = BubbleDetector()
+ model_repo = model_path if model_path else 'ogkalu/comic-text-and-bubble-detector'
+ bd.load_rtdetr_onnx_model(model_repo)
+
+ with MangaTranslator._detector_pool_lock:
+ rec = MangaTranslator._detector_pool.get(key)
+ if not rec:
+ rec = {'spares': []}
+ MangaTranslator._detector_pool[key] = rec
+ rec['spares'].append(bd)
+ print(f"✓ Loaded {model_name} into pool (background)")
+ except Exception as e:
+ print(f"✗ Background RT-DETR loading error: {e}")
+
+ # Start background loading
+ threading.Thread(target=load_rtdetr_bg, daemon=True).start()
+
+ QTimer.singleShot(2000, lambda: self.preload_progress_frame.setVisible(False))
+ return
+
+ except queue.Empty:
+ break
+
+ QTimer.singleShot(100, check_progress)
+
+ except Exception as e:
+ print(f"Progress check error: {e}")
+ self.preload_progress_frame.setVisible(False)
+ # Reset flag on error
+ with MangaTranslationTab._preload_lock:
+ MangaTranslationTab._preload_in_progress = False
+
+ QTimer.singleShot(100, check_progress)
+
+ def _disable_spinbox_mousewheel(self, spinbox):
+ """Disable mousewheel scrolling on a spinbox (PySide6)"""
+ # Override wheelEvent to prevent scrolling
+ spinbox.wheelEvent = lambda event: None
+
+ def _disable_combobox_mousewheel(self, combobox):
+ """Disable mousewheel scrolling on a combobox (PySide6)"""
+ # Override wheelEvent to prevent scrolling
+ combobox.wheelEvent = lambda event: None
+
+ def _create_styled_checkbox(self, text):
+ """Create a checkbox with proper checkmark using text overlay"""
+ from PySide6.QtWidgets import QCheckBox, QLabel
+ from PySide6.QtCore import Qt, QTimer
+ from PySide6.QtGui import QFont
+
+ checkbox = QCheckBox(text)
+ checkbox.setStyleSheet("""
+ QCheckBox {
+ color: white;
+ spacing: 6px;
+ }
+ QCheckBox::indicator {
+ width: 14px;
+ height: 14px;
+ border: 1px solid #5a9fd4;
+ border-radius: 2px;
+ background-color: #2d2d2d;
+ }
+ QCheckBox::indicator:checked {
+ background-color: #5a9fd4;
+ border-color: #5a9fd4;
+ }
+ QCheckBox::indicator:hover {
+ border-color: #7bb3e0;
+ }
+ QCheckBox:disabled {
+ color: #666666;
+ }
+ QCheckBox::indicator:disabled {
+ background-color: #1a1a1a;
+ border-color: #3a3a3a;
+ }
+ """)
+
+ # Create checkmark overlay
+ checkmark = QLabel("✓", checkbox)
+ checkmark.setStyleSheet("""
+ QLabel {
+ color: white;
+ background: transparent;
+ font-weight: bold;
+ font-size: 11px;
+ }
+ """)
+ checkmark.setAlignment(Qt.AlignCenter)
+ checkmark.hide()
+ checkmark.setAttribute(Qt.WA_TransparentForMouseEvents) # Make checkmark click-through
+
+ # Position checkmark properly after widget is shown
+ def position_checkmark():
+ # Position over the checkbox indicator
+ checkmark.setGeometry(2, 1, 14, 14)
+
+ # Show/hide checkmark based on checked state
+ def update_checkmark():
+ if checkbox.isChecked():
+ position_checkmark()
+ checkmark.show()
+ else:
+ checkmark.hide()
+
+ checkbox.stateChanged.connect(update_checkmark)
+ # Delay initial positioning to ensure widget is properly rendered
+ QTimer.singleShot(0, lambda: (position_checkmark(), update_checkmark()))
+
+ return checkbox
+
+ def _download_hf_model(self):
+ """Download HuggingFace models with progress tracking - PySide6 version"""
+ from PySide6.QtWidgets import (QDialog, QVBoxLayout, QHBoxLayout, QLabel,
+ QRadioButton, QButtonGroup, QLineEdit, QPushButton,
+ QGroupBox, QTextEdit, QProgressBar, QFrame,
+ QScrollArea, QWidget, QSizePolicy)
+ from PySide6.QtCore import Qt, QThread, Signal, QTimer
+ from PySide6.QtGui import QFont
+
+ provider = self.ocr_provider_value
+
+ # Model sizes (approximate in MB)
+ model_sizes = {
+ 'manga-ocr': 450,
+ 'Qwen2-VL': {
+ '2B': 4000,
+ '7B': 14000,
+ '72B': 144000,
+ 'custom': 10000 # Default estimate for custom models
+ }
+ }
+
+ # For Qwen2-VL, show model selection dialog first
+ if provider == 'Qwen2-VL':
+ # Create PySide6 dialog
+ selection_dialog = QDialog(self.dialog)
+ selection_dialog.setWindowTitle("Select Qwen2-VL Model Size")
+ selection_dialog.setMinimumSize(600, 500)
+ main_layout = QVBoxLayout(selection_dialog)
+
+ # Title
+ title_label = QLabel("Select Qwen2-VL Model Size")
+ title_font = QFont("Arial", 14, QFont.Weight.Bold)
+ title_label.setFont(title_font)
+ title_label.setAlignment(Qt.AlignmentFlag.AlignCenter)
+ main_layout.addWidget(title_label)
+
+ # Model selection frame
+ model_frame = QGroupBox("Model Options")
+ model_frame_font = QFont("Arial", 11, QFont.Weight.Bold)
+ model_frame.setFont(model_frame_font)
+ model_frame_layout = QVBoxLayout(model_frame)
+ model_frame_layout.setContentsMargins(15, 15, 15, 15)
+ model_frame_layout.setSpacing(10)
+
+ model_options = {
+ "2B": {
+ "title": "2B Model",
+ "desc": "• Smallest model (~4GB download, 4-8GB VRAM)\n• Fast but less accurate\n• Good for quick testing"
+ },
+ "7B": {
+ "title": "7B Model",
+ "desc": "• Medium model (~14GB download, 12-16GB VRAM)\n• Best balance of speed and quality\n• Recommended for most users"
+ },
+ "72B": {
+ "title": "72B Model",
+ "desc": "• Largest model (~144GB download, 80GB+ VRAM)\n• Highest quality but very slow\n• Requires high-end GPU"
+ },
+ "custom": {
+ "title": "Custom Model",
+ "desc": "• Enter any Hugging Face model ID\n• For advanced users\n• Size varies by model"
+ }
+ }
+
+ # Store selected model
+ selected_model_key = {"value": "2B"}
+ custom_model_id_text = {"value": ""}
+
+ # Radio button group
+ button_group = QButtonGroup(selection_dialog)
+
+ for idx, (key, info) in enumerate(model_options.items()):
+ # Radio button
+ rb = QRadioButton(info["title"])
+ rb_font = QFont("Arial", 11, QFont.Weight.Bold)
+ rb.setFont(rb_font)
+ if idx == 0:
+ rb.setChecked(True)
+ rb.clicked.connect(lambda checked, k=key: selected_model_key.update({"value": k}))
+ button_group.addButton(rb)
+ model_frame_layout.addWidget(rb)
+
+ # Description
+ desc_label = QLabel(info["desc"])
+ desc_font = QFont("Arial", 9)
+ desc_label.setFont(desc_font)
+ desc_label.setStyleSheet("color: #666666; margin-left: 20px;")
+ model_frame_layout.addWidget(desc_label)
+
+ # Separator
+ if key != "custom":
+ separator = QFrame()
+ separator.setFrameShape(QFrame.Shape.HLine)
+ separator.setFrameShadow(QFrame.Shadow.Sunken)
+ model_frame_layout.addWidget(separator)
+
+ main_layout.addWidget(model_frame)
+
+ # Custom model ID frame (initially hidden)
+ custom_frame = QGroupBox("Custom Model ID")
+ custom_frame_font = QFont("Arial", 11, QFont.Weight.Bold)
+ custom_frame.setFont(custom_frame_font)
+ custom_frame_layout = QHBoxLayout(custom_frame)
+ custom_frame_layout.setContentsMargins(15, 15, 15, 15)
+
+ custom_label = QLabel("Model ID:")
+ custom_label_font = QFont("Arial", 10)
+ custom_label.setFont(custom_label_font)
+ custom_frame_layout.addWidget(custom_label)
+
+ custom_entry = QLineEdit()
+ custom_entry.setPlaceholderText("e.g., Qwen/Qwen2-VL-2B-Instruct")
+ custom_entry.setFont(custom_label_font)
+ custom_entry.textChanged.connect(lambda text: custom_model_id_text.update({"value": text}))
+ custom_frame_layout.addWidget(custom_entry)
+
+ custom_frame.hide() # Hidden by default
+ main_layout.addWidget(custom_frame)
+
+ # Toggle custom frame visibility
+ def toggle_custom_frame():
+ if selected_model_key["value"] == "custom":
+ custom_frame.show()
+ else:
+ custom_frame.hide()
+
+ for rb in button_group.buttons():
+ rb.clicked.connect(toggle_custom_frame)
+
+ # GPU status frame
+ gpu_frame = QGroupBox("System Status")
+ gpu_frame_font = QFont("Arial", 11, QFont.Weight.Bold)
+ gpu_frame.setFont(gpu_frame_font)
+ gpu_frame_layout = QVBoxLayout(gpu_frame)
+ gpu_frame_layout.setContentsMargins(15, 15, 15, 15)
+
+ try:
+ import torch
+ if torch.cuda.is_available():
+ gpu_mem = torch.cuda.get_device_properties(0).total_memory / 1e9
+ gpu_text = f"✓ GPU: {torch.cuda.get_device_name(0)} ({gpu_mem:.1f}GB)"
+ gpu_color = '#4CAF50'
+ else:
+ gpu_text = "✗ No GPU detected - will use CPU (very slow)"
+ gpu_color = '#f44336'
+ except:
+ gpu_text = "? GPU status unknown - install torch with CUDA"
+ gpu_color = '#FF9800'
+
+ gpu_label = QLabel(gpu_text)
+ gpu_label_font = QFont("Arial", 10)
+ gpu_label.setFont(gpu_label_font)
+ gpu_label.setStyleSheet(f"color: {gpu_color};")
+ gpu_frame_layout.addWidget(gpu_label)
+
+ main_layout.addWidget(gpu_frame)
+
+ # Buttons
+ button_layout = QHBoxLayout()
+ button_layout.addStretch()
+
+ model_confirmed = {'value': False, 'model_key': None, 'model_id': None}
+
+ def confirm_selection():
+ selected = selected_model_key["value"]
+ if selected == "custom":
+ if not custom_model_id_text["value"].strip():
+ from PySide6.QtWidgets import QMessageBox
+ QMessageBox.critical(selection_dialog, "Error", "Please enter a model ID")
+ return
+ model_confirmed['model_key'] = selected
+ model_confirmed['model_id'] = custom_model_id_text["value"].strip()
+ else:
+ model_confirmed['model_key'] = selected
+ model_confirmed['model_id'] = f"Qwen/Qwen2-VL-{selected}-Instruct"
+ model_confirmed['value'] = True
+ selection_dialog.accept()
+
+ proceed_btn = QPushButton("Continue")
+ proceed_btn.setStyleSheet("QPushButton { background-color: #4CAF50; color: white; padding: 8px 20px; font-weight: bold; }")
+ proceed_btn.clicked.connect(confirm_selection)
+ button_layout.addWidget(proceed_btn)
+
+ cancel_btn = QPushButton("Cancel")
+ cancel_btn.setStyleSheet("QPushButton { background-color: #9E9E9E; color: white; padding: 8px 20px; }")
+ cancel_btn.clicked.connect(selection_dialog.reject)
+ button_layout.addWidget(cancel_btn)
+
+ button_layout.addStretch()
+ main_layout.addLayout(button_layout)
+
+ # Show dialog and wait for result
+ result = selection_dialog.exec()
+
+ if not model_confirmed['value'] or result == QDialog.DialogCode.Rejected:
+ return
+
+ selected_model_key = model_confirmed['model_key']
+ model_id = model_confirmed['model_id']
+ total_size_mb = model_sizes['Qwen2-VL'][selected_model_key]
+ elif provider == 'rapidocr':
+ total_size_mb = 50 # Approximate size for display
+ model_id = None
+ selected_model_key = None
+ else:
+ total_size_mb = model_sizes.get(provider, 500)
+ model_id = None
+ selected_model_key = None
+
+ # Create download dialog with window manager - pass Tkinter root instead of PySide6 dialog
+ download_dialog, scrollable_frame, canvas = self.main_gui.wm.setup_scrollable(
+ self.main_gui.master,
+ f"Download {provider} Model",
+ width=600,
+ height=450,
+ max_width_ratio=0.6,
+ max_height_ratio=0.6
+ )
+
+ # Info section
+ info_frame = tk.LabelFrame(
+ scrollable_frame,
+ text="Model Information",
+ font=('Arial', 11, 'bold'),
+ padx=15,
+ pady=10
+ )
+ info_frame.pack(fill=tk.X, padx=20, pady=10)
+
+ if provider == 'Qwen2-VL':
+ info_text = f"📚 Qwen2-VL {selected_model_key} Model\n"
+ info_text += f"Model ID: {model_id}\n"
+ info_text += f"Estimated size: ~{total_size_mb/1000:.1f}GB\n"
+ info_text += "Vision-Language model for Korean OCR"
+ else:
+ info_text = f"📚 {provider} Model\nOptimized for manga/manhwa text detection"
+
+ tk.Label(info_frame, text=info_text, font=('Arial', 10), justify=tk.LEFT).pack(anchor='w')
+
+ # Progress section
+ progress_frame = tk.LabelFrame(
+ scrollable_frame,
+ text="Download Progress",
+ font=('Arial', 11, 'bold'),
+ padx=15,
+ pady=10
+ )
+ progress_frame.pack(fill=tk.X, padx=20, pady=10)
+
+ progress_label = tk.Label(progress_frame, text="Ready to download", font=('Arial', 10))
+ progress_label.pack(pady=(5, 10))
+
+ progress_var = tk.DoubleVar()
+ try:
+ # Try to use our custom progress bar style
+ progress_bar = ttk.Progressbar(progress_frame, length=550, mode='determinate',
+ variable=progress_var,
+ style="MangaProgress.Horizontal.TProgressbar")
+ except Exception:
+ # Fallback to default if style not available yet
+ progress_bar = ttk.Progressbar(progress_frame, length=550, mode='determinate',
+ variable=progress_var)
+ progress_bar.pack(pady=(0, 5))
+
+ size_label = tk.Label(progress_frame, text="", font=('Arial', 9), fg='#666666')
+ size_label.pack()
+
+ speed_label = tk.Label(progress_frame, text="", font=('Arial', 9), fg='#666666')
+ speed_label.pack()
+
+ status_label = tk.Label(progress_frame, text="Click 'Download' to begin",
+ font=('Arial', 9), fg='#666666')
+ status_label.pack(pady=(5, 0))
+
+ # Log section
+ log_frame = tk.LabelFrame(
+ scrollable_frame,
+ text="Download Log",
+ font=('Arial', 11, 'bold'),
+ padx=15,
+ pady=10
+ )
+ log_frame.pack(fill=tk.BOTH, expand=True, padx=20, pady=10)
+
+ # Create a frame to hold the text widget and scrollbar
+ text_frame = tk.Frame(log_frame)
+ text_frame.pack(fill=tk.BOTH, expand=True)
+
+ details_text = tk.Text(
+ text_frame,
+ height=12,
+ width=70,
+ font=('Courier', 9),
+ bg='#f5f5f5'
+ )
+ details_text.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
+
+ # Attach scrollbar to the frame, not the text widget
+ scrollbar = ttk.Scrollbar(text_frame, command=details_text.yview)
+ scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
+ details_text.config(yscrollcommand=scrollbar.set)
+
+ def add_log(message):
+ """Add message to log"""
+ details_text.insert(tk.END, f"{message}\n")
+ details_text.see(tk.END)
+ details_text.update()
+
+ # Buttons frame
+ button_frame = tk.Frame(download_dialog)
+ button_frame.pack(pady=15)
+
+ # Download tracking variables
+ download_active = {'value': False}
+
+ def get_dir_size(path):
+ """Get total size of directory"""
+ total = 0
+ try:
+ for dirpath, dirnames, filenames in os.walk(path):
+ for filename in filenames:
+ filepath = os.path.join(dirpath, filename)
+ if os.path.exists(filepath):
+ total += os.path.getsize(filepath)
+ except:
+ pass
+ return total
+
+ def download_with_progress():
+ """Download model with real progress tracking"""
+ import time
+
+ download_active['value'] = True
+ total_size = total_size_mb * 1024 * 1024
+
+ try:
+ if provider == 'manga-ocr':
+ progress_label.config(text="Downloading manga-ocr model...")
+ add_log("Downloading manga-ocr model from Hugging Face...")
+ add_log("This will download ~450MB of model files")
+ progress_var.set(10)
+
+ try:
+ from huggingface_hub import snapshot_download
+
+ # Download the model files directly without importing manga_ocr
+ model_repo = "kha-white/manga-ocr-base"
+ add_log(f"Repository: {model_repo}")
+
+ cache_dir = os.path.expanduser("~/.cache/huggingface/hub")
+ initial_size = get_dir_size(cache_dir) if os.path.exists(cache_dir) else 0
+ start_time = time.time()
+
+ add_log("Starting download...")
+ progress_var.set(20)
+
+ # Download with progress tracking
+ import threading
+ download_complete = threading.Event()
+ download_error = [None]
+
+ def download_model():
+ try:
+ snapshot_download(
+ repo_id=model_repo,
+ repo_type="model",
+ resume_download=True,
+ local_files_only=False
+ )
+ download_complete.set()
+ except Exception as e:
+ download_error[0] = e
+ download_complete.set()
+
+ download_thread = threading.Thread(target=download_model, daemon=True)
+ download_thread.start()
+
+ # Show progress while downloading
+ while not download_complete.is_set() and download_active['value']:
+ current_size = get_dir_size(cache_dir) if os.path.exists(cache_dir) else 0
+ downloaded = current_size - initial_size
+
+ if downloaded > 0:
+ progress = min(20 + (downloaded / total_size) * 70, 95)
+ progress_var.set(progress)
+
+ elapsed = time.time() - start_time
+ if elapsed > 1:
+ speed = downloaded / elapsed
+ speed_mb = speed / (1024 * 1024)
+ speed_label.config(text=f"Speed: {speed_mb:.1f} MB/s")
+
+ mb_downloaded = downloaded / (1024 * 1024)
+ mb_total = total_size / (1024 * 1024)
+ size_label.config(text=f"{mb_downloaded:.1f} MB / {mb_total:.1f} MB")
+ progress_label.config(text=f"Downloading: {progress:.1f}%")
+
+ time.sleep(0.5)
+
+ download_thread.join(timeout=5)
+
+ if download_error[0]:
+ raise download_error[0]
+
+ if download_complete.is_set() and not download_error[0]:
+ progress_var.set(100)
+ progress_label.config(text="✅ Download complete!")
+ status_label.config(text="Model files downloaded")
+ add_log("✅ Model files downloaded successfully")
+ add_log("")
+ add_log("Next step: Click 'Load Model' to initialize manga-ocr")
+ # Schedule status check on main thread
+ self.update_queue.put(('call_method', self._check_provider_status, ()))
+ else:
+ raise Exception("Download was cancelled")
+
+ except ImportError:
+ progress_label.config(text="❌ Missing huggingface_hub")
+ status_label.config(text="Install huggingface_hub first")
+ add_log("ERROR: huggingface_hub not installed")
+ add_log("Run: pip install huggingface_hub")
+ except Exception as e:
+ raise # Re-raise to be caught by outer exception handler
+
+ elif provider == 'Qwen2-VL':
+ try:
+ from transformers import AutoProcessor, AutoTokenizer, AutoModelForVision2Seq
+ import torch
+ except ImportError as e:
+ progress_label.config(text="❌ Missing dependencies")
+ status_label.config(text="Install dependencies first")
+ add_log(f"ERROR: {str(e)}")
+ add_log("Please install manually:")
+ add_log("pip install transformers torch torchvision")
+ return
+
+ progress_label.config(text=f"Downloading model...")
+ add_log(f"Starting download of {model_id}")
+ progress_var.set(10)
+
+ add_log("Downloading processor...")
+ status_label.config(text="Downloading processor...")
+ processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
+ progress_var.set(30)
+ add_log("✓ Processor downloaded")
+
+ add_log("Downloading tokenizer...")
+ status_label.config(text="Downloading tokenizer...")
+ tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
+ progress_var.set(50)
+ add_log("✓ Tokenizer downloaded")
+
+ add_log("Downloading model weights (this may take several minutes)...")
+ status_label.config(text="Downloading model weights...")
+ progress_label.config(text="Downloading model weights...")
+
+ if torch.cuda.is_available():
+ add_log(f"Using GPU: {torch.cuda.get_device_name(0)}")
+ model = AutoModelForVision2Seq.from_pretrained(
+ model_id,
+ dtype=torch.float16,
+ device_map="auto",
+ trust_remote_code=True
+ )
+ else:
+ add_log("No GPU detected, will load on CPU")
+ model = AutoModelForVision2Seq.from_pretrained(
+ model_id,
+ dtype=torch.float32,
+ trust_remote_code=True
+ )
+
+ progress_var.set(90)
+ add_log("✓ Model weights downloaded")
+
+ add_log("Initializing model...")
+ status_label.config(text="Initializing...")
+
+ qwen_provider = self.ocr_manager.get_provider('Qwen2-VL')
+ if qwen_provider:
+ qwen_provider.processor = processor
+ qwen_provider.tokenizer = tokenizer
+ qwen_provider.model = model
+ qwen_provider.model.eval()
+ qwen_provider.is_loaded = True
+ qwen_provider.is_installed = True
+
+ if selected_model_key:
+ qwen_provider.loaded_model_size = selected_model_key
+
+ progress_var.set(100)
+ progress_label.config(text="✅ Download complete!")
+ status_label.config(text="Model ready for Korean OCR!")
+ add_log("✓ Model ready to use!")
+
+ # Schedule status check on main thread
+ self.update_queue.put(('call_method', self._check_provider_status, ()))
+
+ elif provider == 'rapidocr':
+ progress_label.config(text="📦 RapidOCR Installation Instructions")
+ add_log("RapidOCR requires manual pip installation")
+ progress_var.set(20)
+
+ add_log("Command to run:")
+ add_log("pip install rapidocr-onnxruntime")
+ progress_var.set(50)
+
+ add_log("")
+ add_log("After installation:")
+ add_log("1. Close this dialog")
+ add_log("2. Click 'Load Model' to initialize RapidOCR")
+ add_log("3. Status should show '✅ Model loaded'")
+ progress_var.set(100)
+
+ progress_label.config(text="📦 Installation instructions shown")
+ status_label.config(text="Manual pip install required")
+
+ download_btn.config(state=tk.DISABLED)
+ cancel_btn.config(text="Close")
+
+ except Exception as e:
+ progress_label.config(text="❌ Download failed")
+ status_label.config(text=f"Error: {str(e)[:50]}")
+ add_log(f"ERROR: {str(e)}")
+ self._log(f"Download error: {str(e)}", "error")
+
+ finally:
+ download_active['value'] = False
+
+ def start_download():
+ """Start download in background thread or executor"""
+ download_btn.config(state=tk.DISABLED)
+ cancel_btn.config(text="Cancel")
+
+ try:
+ if hasattr(self.main_gui, '_ensure_executor'):
+ self.main_gui._ensure_executor()
+ execu = getattr(self.main_gui, 'executor', None)
+ if execu:
+ execu.submit(download_with_progress)
+ else:
+ import threading
+ download_thread = threading.Thread(target=download_with_progress, daemon=True)
+ download_thread.start()
+ except Exception:
+ import threading
+ download_thread = threading.Thread(target=download_with_progress, daemon=True)
+ download_thread.start()
+
+ def cancel_download():
+ """Cancel or close dialog"""
+ if download_active['value']:
+ download_active['value'] = False
+ status_label.config(text="Cancelling...")
+ else:
+ download_dialog.destroy()
+
+ download_btn = tb.Button(button_frame, text="Download", command=start_download, bootstyle="primary")
+ download_btn.pack(side=tk.LEFT, padx=5)
+
+ cancel_btn = tb.Button(button_frame, text="Close", command=cancel_download, bootstyle="secondary")
+ cancel_btn.pack(side=tk.LEFT, padx=5)
+
+ # Auto-resize
+ self.main_gui.wm.auto_resize_dialog(download_dialog, canvas, max_width_ratio=0.5, max_height_ratio=0.6)
+
+ def _check_provider_status(self):
+ """Check and display OCR provider status"""
+ # Skip during initialization to prevent lag
+ if hasattr(self, '_initializing_gui') and self._initializing_gui:
+ if hasattr(self, 'provider_status_label'):
+ self.provider_status_label.setText("")
+ self.provider_status_label.setStyleSheet("color: black;")
+ return
+
+ # Get provider value
+ if not hasattr(self, 'ocr_provider_value'):
+ # Not initialized yet, skip
+ return
+ provider = self.ocr_provider_value
+
+ # Hide ALL buttons first
+ if hasattr(self, 'provider_setup_btn'):
+ self.provider_setup_btn.setVisible(False)
+ if hasattr(self, 'download_model_btn'):
+ self.download_model_btn.setVisible(False)
+
+ if provider == 'google':
+ # Google - check for credentials file
+ google_creds = self.main_gui.config.get('google_vision_credentials', '')
+ if google_creds and os.path.exists(google_creds):
+ self.provider_status_label.setText("✅ Ready")
+ self.provider_status_label.setStyleSheet("color: green;")
+ else:
+ self.provider_status_label.setText("❌ Credentials needed")
+ self.provider_status_label.setStyleSheet("color: red;")
+
+ elif provider == 'azure':
+ # Azure - check for API key
+ azure_key = self.main_gui.config.get('azure_vision_key', '')
+ if azure_key:
+ self.provider_status_label.setText("✅ Ready")
+ self.provider_status_label.setStyleSheet("color: green;")
+ else:
+ self.provider_status_label.setText("❌ Key needed")
+ self.provider_status_label.setStyleSheet("color: red;")
+
+ elif provider == 'custom-api':
+ # Custom API - check for main API key
+ api_key = None
+ if hasattr(self.main_gui, 'api_key_entry') and self.main_gui.api_key_entry.get().strip():
+ api_key = self.main_gui.api_key_entry.get().strip()
+ elif hasattr(self.main_gui, 'config') and self.main_gui.config.get('api_key'):
+ api_key = self.main_gui.config.get('api_key')
+
+ # Check if AI bubble detection is enabled
+ manga_settings = self.main_gui.config.get('manga_settings', {})
+ ocr_settings = manga_settings.get('ocr', {})
+ bubble_detection_enabled = ocr_settings.get('bubble_detection_enabled', False)
+
+ if api_key:
+ if bubble_detection_enabled:
+ self.provider_status_label.setText("✅ Ready")
+ self.provider_status_label.setStyleSheet("color: green;")
+ else:
+ self.provider_status_label.setText("⚠️ Enable AI bubble detection for best results")
+ self.provider_status_label.setStyleSheet("color: orange;")
+ else:
+ self.provider_status_label.setText("❌ API key needed")
+ self.provider_status_label.setStyleSheet("color: red;")
+
+ elif provider == 'Qwen2-VL':
+ # Initialize OCR manager if needed
+ if not hasattr(self, 'ocr_manager'):
+ from ocr_manager import OCRManager
+ self.ocr_manager = OCRManager(log_callback=self._log)
+
+ # Check status first
+ status = self.ocr_manager.check_provider_status(provider)
+
+ # Load saved model size if available
+ if hasattr(self, 'qwen2vl_model_size'):
+ saved_model_size = self.qwen2vl_model_size
+ else:
+ saved_model_size = self.main_gui.config.get('qwen2vl_model_size', '1')
+
+ # When displaying status for loaded model
+ if status['loaded']:
+ # Map the saved size to display name
+ size_names = {'1': '2B', '2': '7B', '3': '72B', '4': 'custom'}
+ display_size = size_names.get(saved_model_size, saved_model_size)
+ self.provider_status_label.setText(f"✅ {display_size} model loaded")
+ self.provider_status_label.setStyleSheet("color: green;")
+
+ # Show reload button
+ self.provider_setup_btn.setText("Reload")
+ self.provider_setup_btn.setVisible(True)
+
+ elif status['installed']:
+ # Dependencies installed but model not loaded
+ self.provider_status_label.setText("📦 Dependencies ready")
+ self.provider_status_label.setStyleSheet("color: orange;")
+
+ # Show Load button
+ self.provider_setup_btn.setText("Load Model")
+ self.provider_setup_btn.setVisible(True)
+
+ # Also show Download button
+ self.download_model_btn.setText("📥 Download Model")
+ self.download_model_btn.setVisible(True)
+
+ else:
+ # Not installed
+ self.provider_status_label.setText("❌ Not installed")
+ self.provider_status_label.setStyleSheet("color: red;")
+
+ # Show BOTH buttons
+ self.provider_setup_btn.setText("Load Model")
+ self.provider_setup_btn.setVisible(True)
+
+ self.download_model_btn.setText("📥 Download Qwen2-VL")
+ self.download_model_btn.setVisible(True)
+
+ # Additional GPU status check for Qwen2-VL
+ if not status['loaded']:
+ try:
+ import torch
+ if not torch.cuda.is_available():
+ self._log("⚠️ No GPU detected - Qwen2-VL will run slowly on CPU", "warning")
+ except ImportError:
+ pass
+
+ else:
+ # Local OCR providers
+ if not hasattr(self, 'ocr_manager'):
+ from ocr_manager import OCRManager
+ self.ocr_manager = OCRManager(log_callback=self._log)
+
+ status = self.ocr_manager.check_provider_status(provider)
+
+ if status['loaded']:
+ # Model is loaded and ready
+ if provider == 'Qwen2-VL':
+ # Check which model size is loaded
+ qwen_provider = self.ocr_manager.get_provider('Qwen2-VL')
+ if qwen_provider and hasattr(qwen_provider, 'loaded_model_size'):
+ model_size = qwen_provider.loaded_model_size
+ status_text = f"✅ {model_size} model loaded"
+ else:
+ status_text = "✅ Model loaded"
+ self.provider_status_label.setText(status_text)
+ self.provider_status_label.setStyleSheet("color: green;")
+ else:
+ self.provider_status_label.setText("✅ Model loaded")
+ self.provider_status_label.setStyleSheet("color: green;")
+
+ # Show reload button for all local providers
+ self.provider_setup_btn.setText("Reload")
+ self.provider_setup_btn.setVisible(True)
+
+ elif status['installed']:
+ # Dependencies installed but model not loaded
+ self.provider_status_label.setText("📦 Dependencies ready")
+ self.provider_status_label.setStyleSheet("color: orange;")
+
+ # Show Load button for all providers
+ self.provider_setup_btn.setText("Load Model")
+ self.provider_setup_btn.setVisible(True)
+
+ # Also show Download button for models that need downloading
+ if provider in ['Qwen2-VL', 'manga-ocr']:
+ self.download_model_btn.setText("📥 Download Model")
+ self.download_model_btn.setVisible(True)
+
+ else:
+ # Not installed
+ self.provider_status_label.setText("❌ Not installed")
+ self.provider_status_label.setStyleSheet("color: red;")
+
+ # Categorize providers
+ huggingface_providers = ['manga-ocr', 'Qwen2-VL', 'rapidocr'] # Move rapidocr here
+ pip_providers = ['easyocr', 'paddleocr', 'doctr'] # Remove rapidocr from here
+
+ if provider in huggingface_providers:
+ # For HuggingFace models, show BOTH buttons
+ self.provider_setup_btn.setText("Load Model")
+ self.provider_setup_btn.setVisible(True)
+
+ # Download button
+ if provider == 'rapidocr':
+ self.download_model_btn.setText("📥 Install RapidOCR")
+ else:
+ self.download_model_btn.setText(f"📥 Download {provider}")
+ self.download_model_btn.setVisible(True)
+
+ elif provider in pip_providers:
+ # Check if running as .exe
+ if getattr(sys, 'frozen', False):
+ # Running as .exe - can't pip install
+ self.provider_status_label.setText("❌ Not available in .exe")
+ self.provider_status_label.setStyleSheet("color: red;")
+ self._log(f"⚠️ {provider} cannot be installed in standalone .exe version", "warning")
+ else:
+ # Running from Python - can pip install
+ self.provider_setup_btn.setText("Install")
+ self.provider_setup_btn.setVisible(True)
+
+ def _setup_ocr_provider(self):
+ """Setup/install/load OCR provider"""
+ provider = self.ocr_provider_value
+
+ if provider in ['google', 'azure']:
+ return # Cloud providers don't need setup
+
+ # your own api key
+ if provider == 'custom-api':
+ # Open configuration dialog for custom API
+ try:
+ from custom_api_config_dialog import CustomAPIConfigDialog
+ dialog = CustomAPIConfigDialog(
+ self.manga_window,
+ self.main_gui.config,
+ self.main_gui.save_config
+ )
+ # After dialog closes, refresh status
+ from PySide6.QtCore import QTimer
+ QTimer.singleShot(100, self._check_provider_status)
+ except ImportError:
+ # If dialog not available, show message
+ from PySide6.QtWidgets import QMessageBox
+ from PySide6.QtCore import QTimer
+ QTimer.singleShot(0, lambda: QMessageBox.information(
+ self.dialog,
+ "Custom API Configuration",
+ "This mode uses your own API key in the main GUI:\n\n"
+ "- Make sure your API supports vision\n"
+ "- api_key: Your API key\n"
+ "- model: Model name\n"
+ "- custom url: You can override API endpoint under Other settings"
+ ))
+ return
+
+ status = self.ocr_manager.check_provider_status(provider)
+
+ # For Qwen2-VL, check if we need to select model size first
+ model_size = None
+ if provider == 'Qwen2-VL' and status['installed'] and not status['loaded']:
+ # Create PySide6 dialog for model selection
+ from PySide6.QtWidgets import (QDialog, QVBoxLayout, QHBoxLayout, QLabel,
+ QRadioButton, QButtonGroup, QLineEdit, QPushButton,
+ QGroupBox, QFrame, QMessageBox)
+ from PySide6.QtCore import Qt
+ from PySide6.QtGui import QFont
+
+ selection_dialog = QDialog(self.dialog)
+ selection_dialog.setWindowTitle("Select Qwen2-VL Model Size")
+ selection_dialog.setMinimumSize(600, 450)
+ main_layout = QVBoxLayout(selection_dialog)
+
+ # Title
+ title_label = QLabel("Select Model Size to Load")
+ title_font = QFont("Arial", 12, QFont.Weight.Bold)
+ title_label.setFont(title_font)
+ title_label.setAlignment(Qt.AlignmentFlag.AlignCenter)
+ main_layout.addWidget(title_label)
+
+ # Model selection frame
+ model_frame = QGroupBox("Available Models")
+ model_frame_font = QFont("Arial", 11, QFont.Weight.Bold)
+ model_frame.setFont(model_frame_font)
+ model_frame_layout = QVBoxLayout(model_frame)
+ model_frame_layout.setContentsMargins(15, 15, 15, 15)
+ model_frame_layout.setSpacing(10)
+
+ # Model options
+ model_options = {
+ "1": {"name": "Qwen2-VL 2B", "desc": "Smallest (4-8GB VRAM)"},
+ "2": {"name": "Qwen2-VL 7B", "desc": "Medium (12-16GB VRAM)"},
+ "3": {"name": "Qwen2-VL 72B", "desc": "Largest (80GB+ VRAM)"},
+ "4": {"name": "Custom Model", "desc": "Enter any HF model ID"},
+ }
+
+ # Store selected model
+ selected_model_key = {"value": "1"}
+ custom_model_id_text = {"value": ""}
+
+ # Radio button group
+ button_group = QButtonGroup(selection_dialog)
+
+ for idx, (key, info) in enumerate(model_options.items()):
+ # Radio button
+ rb = QRadioButton(f"{info['name']} - {info['desc']}")
+ rb_font = QFont("Arial", 10)
+ rb.setFont(rb_font)
+ if idx == 0:
+ rb.setChecked(True)
+ rb.clicked.connect(lambda checked, k=key: selected_model_key.update({"value": k}))
+ button_group.addButton(rb)
+ model_frame_layout.addWidget(rb)
+
+ # Separator
+ if key != "4":
+ separator = QFrame()
+ separator.setFrameShape(QFrame.Shape.HLine)
+ separator.setFrameShadow(QFrame.Shadow.Sunken)
+ model_frame_layout.addWidget(separator)
+
+ main_layout.addWidget(model_frame)
+
+ # Custom model ID frame (initially hidden)
+ custom_frame = QGroupBox("Custom Model Configuration")
+ custom_frame_font = QFont("Arial", 11, QFont.Weight.Bold)
+ custom_frame.setFont(custom_frame_font)
+ custom_frame_layout = QHBoxLayout(custom_frame)
+ custom_frame_layout.setContentsMargins(15, 15, 15, 15)
+
+ custom_label = QLabel("Model ID:")
+ custom_label_font = QFont("Arial", 10)
+ custom_label.setFont(custom_label_font)
+ custom_frame_layout.addWidget(custom_label)
+
+ custom_entry = QLineEdit()
+ custom_entry.setPlaceholderText("e.g., Qwen/Qwen2-VL-2B-Instruct")
+ custom_entry.setFont(custom_label_font)
+ custom_entry.textChanged.connect(lambda text: custom_model_id_text.update({"value": text}))
+ custom_frame_layout.addWidget(custom_entry)
+
+ custom_frame.hide() # Hidden by default
+ main_layout.addWidget(custom_frame)
+
+ # Toggle custom frame visibility
+ def toggle_custom_frame():
+ if selected_model_key["value"] == "4":
+ custom_frame.show()
+ else:
+ custom_frame.hide()
+
+ for rb in button_group.buttons():
+ rb.clicked.connect(toggle_custom_frame)
+
+ # Buttons with centering
+ button_layout = QHBoxLayout()
+ button_layout.addStretch()
+
+ model_confirmed = {'value': False, 'size': None}
+
+ def confirm_selection():
+ selected = selected_model_key["value"]
+ self._log(f"DEBUG: Radio button selection = {selected}")
+ if selected == "4":
+ if not custom_model_id_text["value"].strip():
+ QMessageBox.critical(selection_dialog, "Error", "Please enter a model ID")
+ return
+ model_confirmed['size'] = f"custom:{custom_model_id_text['value'].strip()}"
+ else:
+ model_confirmed['size'] = selected
+ model_confirmed['value'] = True
+ selection_dialog.accept()
+
+ load_btn = QPushButton("Load")
+ load_btn.setStyleSheet("QPushButton { background-color: #4CAF50; color: white; padding: 8px 20px; font-weight: bold; }")
+ load_btn.clicked.connect(confirm_selection)
+ button_layout.addWidget(load_btn)
+
+ cancel_btn = QPushButton("Cancel")
+ cancel_btn.setStyleSheet("QPushButton { background-color: #9E9E9E; color: white; padding: 8px 20px; }")
+ cancel_btn.clicked.connect(selection_dialog.reject)
+ button_layout.addWidget(cancel_btn)
+
+ button_layout.addStretch()
+ main_layout.addLayout(button_layout)
+
+ # Show dialog and wait for result (PySide6 modal dialog)
+ result = selection_dialog.exec()
+
+ if result != QDialog.DialogCode.Accepted or not model_confirmed['value']:
+ return
+
+ model_size = model_confirmed['size']
+ self._log(f"DEBUG: Dialog closed, model_size set to: {model_size}")
+
+ # Create PySide6 progress dialog
+ from PySide6.QtWidgets import QDialog, QVBoxLayout, QLabel, QProgressBar, QGroupBox
+ from PySide6.QtCore import QTimer
+ from PySide6.QtGui import QFont
+
+ progress_dialog = QDialog(self.dialog)
+ progress_dialog.setWindowTitle(f"Setting up {provider}")
+ progress_dialog.setMinimumSize(400, 200)
+ progress_layout = QVBoxLayout(progress_dialog)
+
+ # Progress section
+ progress_section = QGroupBox("Setup Progress")
+ progress_section_font = QFont("Arial", 11, QFont.Weight.Bold)
+ progress_section.setFont(progress_section_font)
+ progress_section_layout = QVBoxLayout(progress_section)
+ progress_section_layout.setContentsMargins(15, 15, 15, 15)
+ progress_section_layout.setSpacing(10)
+
+ progress_label = QLabel("Initializing...")
+ progress_label_font = QFont("Arial", 10)
+ progress_label.setFont(progress_label_font)
+ progress_section_layout.addWidget(progress_label)
+
+ progress_bar = QProgressBar()
+ progress_bar.setMinimum(0)
+ progress_bar.setMaximum(0) # Indeterminate mode
+ progress_bar.setMinimumWidth(350)
+ progress_section_layout.addWidget(progress_bar)
+
+ status_label = QLabel("")
+ status_label_font = QFont("Arial", 9)
+ status_label.setFont(status_label_font)
+ status_label.setStyleSheet("color: #666666;")
+ progress_section_layout.addWidget(status_label)
+
+ progress_layout.addWidget(progress_section)
+
+ def update_progress(message, percent=None):
+ """Update progress display (thread-safe)"""
+ # Use lambda to ensure we capture the correct widget references
+ def update_ui():
+ progress_label.setText(message)
+ if percent is not None:
+ progress_bar.setMaximum(100) # Switch to determinate mode
+ progress_bar.setValue(int(percent))
+
+ # Schedule on main thread
+ self.update_queue.put(('call_method', update_ui, ()))
+
+ def setup_thread():
+ """Run setup in background thread"""
+ nonlocal model_size
+ print(f"\n=== SETUP THREAD STARTED for {provider} ===")
+ print(f"Status: {status}")
+ print(f"Model size: {model_size}")
+
+ try:
+ # Check if we need to install
+ if not status['installed']:
+ # Install provider
+ print(f"Installing {provider}...")
+ update_progress(f"Installing {provider}...")
+ success = self.ocr_manager.install_provider(provider, update_progress)
+ print(f"Install result: {success}")
+
+ if not success:
+ print("Installation FAILED")
+ update_progress("❌ Installation failed!", 0)
+ self._log(f"Failed to install {provider}", "error")
+ return
+ else:
+ # Already installed, skip installation
+ print(f"{provider} dependencies already installed")
+ self._log(f"DEBUG: {provider} dependencies already installed")
+ success = True # Mark as success since deps are ready
+
+ # Load model
+ print(f"About to load {provider} model...")
+ update_progress(f"Loading {provider} model...")
+ self._log(f"DEBUG: Loading provider {provider}, status['installed']={status.get('installed', False)}")
+
+ # Special handling for Qwen2-VL - pass model_size
+ if provider == 'Qwen2-VL':
+ if success and model_size:
+ # Save the model size to config
+ self.qwen2vl_model_size = model_size
+ self.main_gui.config['qwen2vl_model_size'] = model_size
+
+ # Save config immediately
+ if hasattr(self.main_gui, 'save_config'):
+ self.main_gui.save_config(show_message=False)
+ self._log(f"DEBUG: In thread, about to load with model_size={model_size}")
+ if model_size:
+ success = self.ocr_manager.load_provider(provider, model_size=model_size)
+
+ if success:
+ provider_obj = self.ocr_manager.get_provider('Qwen2-VL')
+ if provider_obj:
+ provider_obj.loaded_model_size = {
+ "1": "2B",
+ "2": "7B",
+ "3": "72B",
+ "4": "custom"
+ }.get(model_size, model_size)
+ else:
+ self._log("Warning: No model size specified for Qwen2-VL, defaulting to 2B", "warning")
+ success = self.ocr_manager.load_provider(provider, model_size="1")
+ else:
+ print(f"Loading {provider} without model_size parameter")
+ self._log(f"DEBUG: Loading {provider} without model_size parameter")
+ success = self.ocr_manager.load_provider(provider)
+ print(f"load_provider returned: {success}")
+ self._log(f"DEBUG: load_provider returned success={success}")
+
+ print(f"\nFinal success value: {success}")
+ if success:
+ print("SUCCESS! Model loaded successfully")
+ update_progress(f"✅ {provider} ready!", 100)
+ self._log(f"✅ {provider} is ready to use", "success")
+ # Schedule status check on main thread
+ self.update_queue.put(('call_method', self._check_provider_status, ()))
+ else:
+ print("FAILED! Model did not load")
+ update_progress("❌ Failed to load model!", 0)
+ self._log(f"Failed to load {provider} model", "error")
+
+ except Exception as e:
+ print(f"\n!!! EXCEPTION CAUGHT !!!")
+ print(f"Exception type: {type(e).__name__}")
+ print(f"Exception message: {str(e)}")
+ import traceback
+ traceback_str = traceback.format_exc()
+ print(f"Traceback:\n{traceback_str}")
+
+ error_msg = f"❌ Error: {str(e)}"
+ update_progress(error_msg, 0)
+ self._log(f"Setup error: {str(e)}", "error")
+ self._log(traceback_str, "debug")
+ # Don't close dialog on error - let user read the error
+ return
+
+ # Only close dialog on success
+ if success:
+ # Schedule dialog close on main thread after 2 seconds
+ import time
+ time.sleep(2)
+ self.update_queue.put(('call_method', progress_dialog.close, ()))
+ else:
+ # On failure, keep dialog open so user can see the error
+ import time
+ time.sleep(5)
+ self.update_queue.put(('call_method', progress_dialog.close, ()))
+
+ # Show progress dialog (non-blocking)
+ progress_dialog.show()
+
+ # Start setup in background via executor if available
+ try:
+ if hasattr(self.main_gui, '_ensure_executor'):
+ self.main_gui._ensure_executor()
+ execu = getattr(self.main_gui, 'executor', None)
+ if execu:
+ execu.submit(setup_thread)
+ else:
+ import threading
+ threading.Thread(target=setup_thread, daemon=True).start()
+ except Exception:
+ import threading
+ threading.Thread(target=setup_thread, daemon=True).start()
+
+ def _on_ocr_provider_change(self, event=None):
+ """Handle OCR provider change"""
+ # Get the new provider value from combo box
+ if hasattr(self, 'provider_combo'):
+ provider = self.provider_combo.currentText()
+ self.ocr_provider_value = provider
+ else:
+ provider = self.ocr_provider_value
+
+ # Hide ALL provider-specific frames first (PySide6)
+ if hasattr(self, 'google_creds_frame'):
+ self.google_creds_frame.setVisible(False)
+ if hasattr(self, 'azure_frame'):
+ self.azure_frame.setVisible(False)
+
+ # Show only the relevant settings frame for the selected provider
+ if provider == 'google':
+ # Show Google credentials frame
+ if hasattr(self, 'google_creds_frame'):
+ self.google_creds_frame.setVisible(True)
+
+ elif provider == 'azure':
+ # Show Azure settings frame
+ if hasattr(self, 'azure_frame'):
+ self.azure_frame.setVisible(True)
+
+ # For all other providers (manga-ocr, Qwen2-VL, easyocr, paddleocr, doctr)
+ # Don't show any cloud credential frames - they use local models
+
+ # Check provider status to show appropriate buttons
+ self._check_provider_status()
+
+ # Update the main status label at the top based on new provider
+ self._update_main_status_label()
+
+ # Log the change
+ provider_descriptions = {
+ 'custom-api': "Custom API - use your own vision model",
+ 'google': "Google Cloud Vision (requires credentials)",
+ 'azure': "Azure Computer Vision (requires API key)",
+ 'manga-ocr': "Manga OCR - optimized for Japanese manga",
+ 'rapidocr': "RapidOCR - fast local OCR with region detection",
+ 'Qwen2-VL': "Qwen2-VL - a big model",
+ 'easyocr': "EasyOCR - multi-language support",
+ 'paddleocr': "PaddleOCR - CJK language support",
+ 'doctr': "DocTR - document text recognition"
+ }
+
+ self._log(f"📋 OCR provider changed to: {provider_descriptions.get(provider, provider)}", "info")
+
+ # Save the selection
+ self.main_gui.config['manga_ocr_provider'] = provider
+ if hasattr(self.main_gui, 'save_config'):
+ self.main_gui.save_config(show_message=False)
+
+ # IMPORTANT: Reset translator to force recreation with new OCR provider
+ if hasattr(self, 'translator') and self.translator:
+ self._log(f"OCR provider changed to {provider.upper()}. Translator will be recreated on next run.", "info")
+ self.translator = None # Force recreation on next translation
+
+ def _update_main_status_label(self):
+ """Update the main status label at the top based on current provider and credentials"""
+ if not hasattr(self, 'status_label'):
+ return
+
+ # Get API key
+ try:
+ if hasattr(self.main_gui.api_key_entry, 'text'):
+ has_api_key = bool(self.main_gui.api_key_entry.text().strip())
+ elif hasattr(self.main_gui.api_key_entry, 'get'):
+ has_api_key = bool(self.main_gui.api_key_entry.get().strip())
+ else:
+ has_api_key = False
+ except:
+ has_api_key = False
+
+ # Get current provider
+ provider = self.ocr_provider_value if hasattr(self, 'ocr_provider_value') else self.main_gui.config.get('manga_ocr_provider', 'custom-api')
+
+ # Determine readiness based on provider
+ if provider == 'google':
+ has_vision = os.path.exists(self.main_gui.config.get('google_vision_credentials', ''))
+ is_ready = has_api_key and has_vision
+ elif provider == 'azure':
+ has_azure = bool(self.main_gui.config.get('azure_vision_key', ''))
+ is_ready = has_api_key and has_azure
+ else:
+ # Local providers or custom-api only need API key for translation
+ is_ready = has_api_key
+
+ # Update label
+ status_text = "✅ Ready" if is_ready else "❌ Setup Required"
+ status_color = "green" if is_ready else "red"
+
+ self.status_label.setText(status_text)
+ self.status_label.setStyleSheet(f"color: {status_color};")
+
+ def _build_interface(self):
+ """Build the enhanced manga translation interface using PySide6"""
+ # Create main layout for PySide6 widget
+ main_layout = QVBoxLayout(self.parent_widget)
+ main_layout.setContentsMargins(10, 10, 10, 10)
+ main_layout.setSpacing(6)
+ self._build_pyside6_interface(main_layout)
+
+ def _build_pyside6_interface(self, main_layout):
+ # Import QSizePolicy for layout management
+ from PySide6.QtWidgets import QSizePolicy
+
+ # Apply global stylesheet for checkboxes and radio buttons
+ checkbox_radio_style = """
+ QCheckBox {
+ color: white;
+ spacing: 6px;
+ }
+ QCheckBox::indicator {
+ width: 14px;
+ height: 14px;
+ border: 1px solid #5a9fd4;
+ border-radius: 2px;
+ background-color: #2d2d2d;
+ }
+ QCheckBox::indicator:checked {
+ background-color: #5a9fd4;
+ border-color: #5a9fd4;
+ }
+ QCheckBox::indicator:hover {
+ border-color: #7bb3e0;
+ }
+ QCheckBox:disabled {
+ color: #666666;
+ }
+ QCheckBox::indicator:disabled {
+ background-color: #1a1a1a;
+ border-color: #3a3a3a;
+ }
+ QRadioButton {
+ color: white;
+ spacing: 5px;
+ }
+ QRadioButton::indicator {
+ width: 13px;
+ height: 13px;
+ border: 2px solid #5a9fd4;
+ border-radius: 7px;
+ background-color: #2d2d2d;
+ }
+ QRadioButton::indicator:checked {
+ background-color: #5a9fd4;
+ border: 2px solid #5a9fd4;
+ }
+ QRadioButton::indicator:hover {
+ border-color: #7bb3e0;
+ }
+ QRadioButton:disabled {
+ color: #666666;
+ }
+ QRadioButton::indicator:disabled {
+ background-color: #1a1a1a;
+ border-color: #3a3a3a;
+ }
+ /* Disabled fields styling */
+ QLineEdit:disabled, QComboBox:disabled, QSpinBox:disabled, QDoubleSpinBox:disabled {
+ background-color: #1a1a1a;
+ color: #666666;
+ border: 1px solid #3a3a3a;
+ }
+ QLabel:disabled {
+ color: #666666;
+ }
+ """
+ self.parent_widget.setStyleSheet(checkbox_radio_style)
+
+ # Title (at the very top)
+ title_frame = QWidget()
+ title_layout = QHBoxLayout(title_frame)
+ title_layout.setContentsMargins(0, 0, 0, 0)
+ title_layout.setSpacing(8)
+
+ title_label = QLabel("🎌 Manga Translation")
+ title_font = QFont("Arial", 13)
+ title_font.setBold(True)
+ title_label.setFont(title_font)
+ title_layout.addWidget(title_label)
+
+ # Requirements check - based on selected OCR provider
+ has_api_key = bool(self.main_gui.api_key_entry.text().strip()) if hasattr(self.main_gui.api_key_entry, 'text') else bool(self.main_gui.api_key_entry.get().strip())
+
+ # Get the saved OCR provider to check appropriate credentials
+ saved_provider = self.main_gui.config.get('manga_ocr_provider', 'custom-api')
+
+ # Determine readiness based on provider
+ if saved_provider == 'google':
+ has_vision = os.path.exists(self.main_gui.config.get('google_vision_credentials', ''))
+ is_ready = has_api_key and has_vision
+ elif saved_provider == 'azure':
+ has_azure = bool(self.main_gui.config.get('azure_vision_key', ''))
+ is_ready = has_api_key and has_azure
+ else:
+ # Local providers or custom-api only need API key for translation
+ is_ready = has_api_key
+
+ status_text = "✅ Ready" if is_ready else "❌ Setup Required"
+ status_color = "green" if is_ready else "red"
+
+ status_label = QLabel(status_text)
+ status_font = QFont("Arial", 10)
+ status_label.setFont(status_font)
+ status_label.setStyleSheet(f"color: {status_color};")
+ title_layout.addStretch()
+ title_layout.addWidget(status_label)
+
+ main_layout.addWidget(title_frame)
+
+ # Store reference for updates
+ self.status_label = status_label
+
+ # Model Preloading Progress Bar (right after title, initially hidden)
+ self.preload_progress_frame = QWidget()
+ self.preload_progress_frame.setStyleSheet(
+ "background-color: #2d2d2d; "
+ "border: 1px solid #4a5568; "
+ "border-radius: 4px; "
+ "padding: 6px;"
+ )
+ preload_layout = QVBoxLayout(self.preload_progress_frame)
+ preload_layout.setContentsMargins(8, 6, 8, 6)
+ preload_layout.setSpacing(4)
+
+ self.preload_status_label = QLabel("Loading models...")
+ preload_status_font = QFont("Segoe UI", 9)
+ preload_status_font.setBold(True)
+ self.preload_status_label.setFont(preload_status_font)
+ self.preload_status_label.setStyleSheet("color: #ffffff; background: transparent; border: none;")
+ self.preload_status_label.setAlignment(Qt.AlignCenter)
+ preload_layout.addWidget(self.preload_status_label)
+
+ self.preload_progress_bar = QProgressBar()
+ self.preload_progress_bar.setRange(0, 100)
+ self.preload_progress_bar.setValue(0)
+ self.preload_progress_bar.setTextVisible(True)
+ self.preload_progress_bar.setMinimumHeight(22)
+ self.preload_progress_bar.setStyleSheet("""
+ QProgressBar {
+ border: 1px solid #4a5568;
+ border-radius: 3px;
+ text-align: center;
+ background-color: #1e1e1e;
+ color: #ffffff;
+ font-weight: bold;
+ font-size: 9px;
+ }
+ QProgressBar::chunk {
+ background: qlineargradient(x1:0, y1:0, x2:1, y2:0,
+ stop:0 #2d6a4f, stop:0.5 #1b4332, stop:1 #081c15);
+ border-radius: 2px;
+ margin: 0px;
+ }
+ """)
+ preload_layout.addWidget(self.preload_progress_bar)
+
+ self.preload_progress_frame.setVisible(False) # Hidden by default
+ main_layout.addWidget(self.preload_progress_frame)
+
+ # Add instructions based on selected provider
+ if not is_ready:
+ req_frame = QWidget()
+ req_layout = QVBoxLayout(req_frame)
+ req_layout.setContentsMargins(0, 5, 0, 5)
+
+ req_text = []
+ if not has_api_key:
+ req_text.append("• API Key not configured")
+
+ # Only show provider-specific credential warnings
+ if saved_provider == 'google':
+ has_vision = os.path.exists(self.main_gui.config.get('google_vision_credentials', ''))
+ if not has_vision:
+ req_text.append("• Google Cloud Vision credentials not set")
+ elif saved_provider == 'azure':
+ has_azure = bool(self.main_gui.config.get('azure_vision_key', ''))
+ if not has_azure:
+ req_text.append("• Azure credentials not configured")
+
+ if req_text: # Only show frame if there are actual missing requirements
+ req_label = QLabel("\n".join(req_text))
+ req_font = QFont("Arial", 10)
+ req_label.setFont(req_font)
+ req_label.setStyleSheet("color: red;")
+ req_label.setAlignment(Qt.AlignLeft)
+ req_layout.addWidget(req_label)
+ main_layout.addWidget(req_frame)
+ else:
+ # Create empty frame to maintain layout consistency
+ req_frame = QWidget()
+ req_frame.setVisible(False)
+ main_layout.addWidget(req_frame)
+
+ # File selection frame - SPANS BOTH COLUMNS
+ file_frame = QGroupBox("Select Manga Images")
+ file_frame_font = QFont("Arial", 10)
+ file_frame_font.setBold(True)
+ file_frame.setFont(file_frame_font)
+ file_frame_layout = QVBoxLayout(file_frame)
+ file_frame_layout.setContentsMargins(10, 10, 10, 8)
+ file_frame_layout.setSpacing(6)
+
+ # File listbox (QListWidget handles scrolling automatically)
+ self.file_listbox = QListWidget()
+ self.file_listbox.setSelectionMode(QListWidget.ExtendedSelection)
+ self.file_listbox.setMinimumHeight(200)
+ file_frame_layout.addWidget(self.file_listbox)
+
+ # File buttons
+ file_btn_frame = QWidget()
+ file_btn_layout = QHBoxLayout(file_btn_frame)
+ file_btn_layout.setContentsMargins(0, 6, 0, 0)
+ file_btn_layout.setSpacing(4)
+
+ add_files_btn = QPushButton("Add Files")
+ add_files_btn.clicked.connect(self._add_files)
+ add_files_btn.setStyleSheet("QPushButton { background-color: #007bff; color: white; padding: 3px 10px; font-size: 9pt; }")
+ file_btn_layout.addWidget(add_files_btn)
+
+ add_folder_btn = QPushButton("Add Folder")
+ add_folder_btn.clicked.connect(self._add_folder)
+ add_folder_btn.setStyleSheet("QPushButton { background-color: #007bff; color: white; padding: 3px 10px; font-size: 9pt; }")
+ file_btn_layout.addWidget(add_folder_btn)
+
+ remove_btn = QPushButton("Remove Selected")
+ remove_btn.clicked.connect(self._remove_selected)
+ remove_btn.setStyleSheet("QPushButton { background-color: #dc3545; color: white; padding: 3px 10px; font-size: 9pt; }")
+ file_btn_layout.addWidget(remove_btn)
+
+ clear_btn = QPushButton("Clear All")
+ clear_btn.clicked.connect(self._clear_all)
+ clear_btn.setStyleSheet("QPushButton { background-color: #ffc107; color: black; padding: 3px 10px; font-size: 9pt; }")
+ file_btn_layout.addWidget(clear_btn)
+
+ file_btn_layout.addStretch()
+ file_frame_layout.addWidget(file_btn_frame)
+
+ main_layout.addWidget(file_frame)
+
+ # Create 2-column layout for settings
+ columns_container = QWidget()
+ columns_layout = QHBoxLayout(columns_container)
+ columns_layout.setContentsMargins(0, 0, 0, 0)
+ columns_layout.setSpacing(10)
+
+ # Left column (Column 1)
+ left_column = QWidget()
+ left_column_layout = QVBoxLayout(left_column)
+ left_column_layout.setContentsMargins(0, 0, 0, 0)
+ left_column_layout.setSpacing(6)
+
+ # Right column (Column 2)
+ right_column = QWidget()
+ right_column_layout = QVBoxLayout(right_column)
+ right_column_layout.setContentsMargins(0, 0, 0, 0)
+ right_column_layout.setSpacing(6)
+
+ # Settings frame - GOES TO LEFT COLUMN
+ settings_frame = QGroupBox("Translation Settings")
+ settings_frame_font = QFont("Arial", 10)
+ settings_frame_font.setBold(True)
+ settings_frame.setFont(settings_frame_font)
+ settings_frame_layout = QVBoxLayout(settings_frame)
+ settings_frame_layout.setContentsMargins(10, 10, 10, 8)
+ settings_frame_layout.setSpacing(6)
+
+ # API Settings - Hybrid approach
+ api_frame = QWidget()
+ api_layout = QHBoxLayout(api_frame)
+ api_layout.setContentsMargins(0, 0, 0, 10)
+ api_layout.setSpacing(10)
+
+ api_label = QLabel("OCR: Google Cloud Vision | Translation: API Key")
+ api_font = QFont("Arial", 10)
+ api_font.setItalic(True)
+ api_label.setFont(api_font)
+ api_label.setStyleSheet("color: gray;")
+ api_layout.addWidget(api_label)
+
+ # Show current model from main GUI
+ current_model = 'Unknown'
+ try:
+ if hasattr(self.main_gui, 'model_combo') and hasattr(self.main_gui.model_combo, 'currentText'):
+ # PySide6 QComboBox
+ current_model = self.main_gui.model_combo.currentText()
+ elif hasattr(self.main_gui, 'model_var'):
+ # Tkinter StringVar
+ current_model = self.main_gui.model_var.get() if hasattr(self.main_gui.model_var, 'get') else str(self.main_gui.model_var)
+ elif hasattr(self.main_gui, 'config'):
+ # Fallback to config
+ current_model = self.main_gui.config.get('model', 'Unknown')
+ except Exception as e:
+ print(f"Error getting model: {e}")
+ current_model = 'Unknown'
+
+ model_label = QLabel(f"Model: {current_model}")
+ model_font = QFont("Arial", 10)
+ model_font.setItalic(True)
+ model_label.setFont(model_font)
+ model_label.setStyleSheet("color: gray;")
+ api_layout.addStretch()
+ api_layout.addWidget(model_label)
+
+ settings_frame_layout.addWidget(api_frame)
+
+ # OCR Provider Selection - ENHANCED VERSION
+ self.ocr_provider_frame = QWidget()
+ ocr_provider_layout = QHBoxLayout(self.ocr_provider_frame)
+ ocr_provider_layout.setContentsMargins(0, 0, 0, 10)
+ ocr_provider_layout.setSpacing(10)
+
+ provider_label = QLabel("OCR Provider:")
+ provider_label.setMinimumWidth(150)
+ provider_label.setAlignment(Qt.AlignLeft)
+ ocr_provider_layout.addWidget(provider_label)
+
+ # Expanded provider list with descriptions
+ ocr_providers = [
+ ('custom-api', 'Your Own key'),
+ ('google', 'Google Cloud Vision'),
+ ('azure', 'Azure Computer Vision'),
+ ('rapidocr', '⚡ RapidOCR (Fast & Local)'),
+ ('manga-ocr', '🇯🇵 Manga OCR (Japanese)'),
+ ('Qwen2-VL', '🇰🇷 Qwen2-VL (Korean)'),
+ ('easyocr', '🌏 EasyOCR (Multi-lang)'),
+ #('paddleocr', '🐼 PaddleOCR'),
+ ('doctr', '📄 DocTR'),
+ ]
+
+ # Just the values for the combobox
+ provider_values = [p[0] for p in ocr_providers]
+ provider_display = [f"{p[0]} - {p[1]}" for p in ocr_providers]
+
+ self.ocr_provider_value = self.main_gui.config.get('manga_ocr_provider', 'custom-api')
+ self.provider_combo = QComboBox()
+ self.provider_combo.addItems(provider_values)
+ self.provider_combo.setCurrentText(self.ocr_provider_value)
+ self.provider_combo.setMinimumWidth(120) # Reduced for better fit
+ self.provider_combo.setSizePolicy(QSizePolicy.Policy.Expanding, QSizePolicy.Policy.Fixed)
+ self.provider_combo.currentTextChanged.connect(self._on_ocr_provider_change)
+ self._disable_combobox_mousewheel(self.provider_combo) # Disable mousewheel scrolling
+ ocr_provider_layout.addWidget(self.provider_combo)
+
+ # Provider status indicator with more detail
+ self.provider_status_label = QLabel("")
+ status_font = QFont("Arial", 9)
+ self.provider_status_label.setFont(status_font)
+ self.provider_status_label.setWordWrap(True) # Allow text wrapping
+ self.provider_status_label.setSizePolicy(QSizePolicy.Policy.Expanding, QSizePolicy.Policy.Preferred)
+ ocr_provider_layout.addWidget(self.provider_status_label)
+
+ # Setup/Install button for non-cloud providers
+ self.provider_setup_btn = QPushButton("Setup")
+ self.provider_setup_btn.clicked.connect(self._setup_ocr_provider)
+ self.provider_setup_btn.setStyleSheet("QPushButton { background-color: #17a2b8; color: white; padding: 5px 15px; }")
+ self.provider_setup_btn.setMinimumWidth(100)
+ self.provider_setup_btn.setVisible(False) # Hidden by default, _check_provider_status will show it
+ ocr_provider_layout.addWidget(self.provider_setup_btn)
+
+ # Add explicit download button for Hugging Face models
+ self.download_model_btn = QPushButton("📥 Download")
+ self.download_model_btn.clicked.connect(self._download_hf_model)
+ self.download_model_btn.setStyleSheet("QPushButton { background-color: #28a745; color: white; padding: 5px 15px; }")
+ self.download_model_btn.setMinimumWidth(150)
+ self.download_model_btn.setVisible(False) # Hidden by default
+ ocr_provider_layout.addWidget(self.download_model_btn)
+
+ ocr_provider_layout.addStretch()
+ settings_frame_layout.addWidget(self.ocr_provider_frame)
+
+ # Initialize OCR manager
+ from ocr_manager import OCRManager
+ self.ocr_manager = OCRManager(log_callback=self._log)
+
+ # Check initial provider status
+ self._check_provider_status()
+
+ # Google Cloud Credentials section (now in a frame that can be hidden)
+ self.google_creds_frame = QWidget()
+ google_creds_layout = QHBoxLayout(self.google_creds_frame)
+ google_creds_layout.setContentsMargins(0, 0, 0, 10)
+ google_creds_layout.setSpacing(10)
+
+ google_label = QLabel("Google Cloud Credentials:")
+ google_label.setMinimumWidth(150)
+ google_label.setAlignment(Qt.AlignLeft)
+ google_creds_layout.addWidget(google_label)
+
+ # Show current credentials file
+ google_creds_path = self.main_gui.config.get('google_vision_credentials', '') or self.main_gui.config.get('google_cloud_credentials', '')
+ creds_display = os.path.basename(google_creds_path) if google_creds_path else "Not Set"
+
+ self.creds_label = QLabel(creds_display)
+ creds_font = QFont("Arial", 9)
+ self.creds_label.setFont(creds_font)
+ self.creds_label.setStyleSheet(f"color: {'green' if google_creds_path else 'red'};")
+ google_creds_layout.addWidget(self.creds_label)
+
+ browse_btn = QPushButton("Browse")
+ browse_btn.clicked.connect(self._browse_google_credentials_permanent)
+ browse_btn.setStyleSheet("QPushButton { background-color: #007bff; color: white; padding: 5px 15px; }")
+ google_creds_layout.addWidget(browse_btn)
+
+ google_creds_layout.addStretch()
+ settings_frame_layout.addWidget(self.google_creds_frame)
+ self.google_creds_frame.setVisible(False) # Hidden by default
+
+ # Azure settings frame (hidden by default)
+ self.azure_frame = QWidget()
+ azure_frame_layout = QVBoxLayout(self.azure_frame)
+ azure_frame_layout.setContentsMargins(0, 0, 0, 10)
+ azure_frame_layout.setSpacing(5)
+
+ # Azure Key
+ azure_key_frame = QWidget()
+ azure_key_layout = QHBoxLayout(azure_key_frame)
+ azure_key_layout.setContentsMargins(0, 0, 0, 0)
+ azure_key_layout.setSpacing(10)
+
+ azure_key_label = QLabel("Azure Key:")
+ azure_key_label.setMinimumWidth(150)
+ azure_key_label.setAlignment(Qt.AlignLeft)
+ azure_key_layout.addWidget(azure_key_label)
+
+ self.azure_key_entry = QLineEdit()
+ self.azure_key_entry.setEchoMode(QLineEdit.Password)
+ self.azure_key_entry.setMinimumWidth(150) # Reduced for better fit
+ self.azure_key_entry.setSizePolicy(QSizePolicy.Policy.Expanding, QSizePolicy.Policy.Fixed)
+ azure_key_layout.addWidget(self.azure_key_entry)
+
+ # Show/Hide button for Azure key
+ self.show_azure_key_checkbox = self._create_styled_checkbox("Show")
+ self.show_azure_key_checkbox.stateChanged.connect(self._toggle_azure_key_visibility)
+ azure_key_layout.addWidget(self.show_azure_key_checkbox)
+ azure_key_layout.addStretch()
+ azure_frame_layout.addWidget(azure_key_frame)
+
+ # Azure Endpoint
+ azure_endpoint_frame = QWidget()
+ azure_endpoint_layout = QHBoxLayout(azure_endpoint_frame)
+ azure_endpoint_layout.setContentsMargins(0, 0, 0, 0)
+ azure_endpoint_layout.setSpacing(10)
+
+ azure_endpoint_label = QLabel("Azure Endpoint:")
+ azure_endpoint_label.setMinimumWidth(150)
+ azure_endpoint_label.setAlignment(Qt.AlignLeft)
+ azure_endpoint_layout.addWidget(azure_endpoint_label)
+
+ self.azure_endpoint_entry = QLineEdit()
+ self.azure_endpoint_entry.setMinimumWidth(150) # Reduced for better fit
+ self.azure_endpoint_entry.setSizePolicy(QSizePolicy.Policy.Expanding, QSizePolicy.Policy.Fixed)
+ azure_endpoint_layout.addWidget(self.azure_endpoint_entry)
+ azure_endpoint_layout.addStretch()
+ azure_frame_layout.addWidget(azure_endpoint_frame)
+
+ # Load saved Azure settings
+ saved_key = self.main_gui.config.get('azure_vision_key', '')
+ saved_endpoint = self.main_gui.config.get('azure_vision_endpoint', 'https://YOUR-RESOURCE.cognitiveservices.azure.com/')
+ self.azure_key_entry.setText(saved_key)
+ self.azure_endpoint_entry.setText(saved_endpoint)
+
+ settings_frame_layout.addWidget(self.azure_frame)
+ self.azure_frame.setVisible(False) # Hidden by default
+
+ # Initially show/hide based on saved provider
+ self._on_ocr_provider_change()
+
+ # Separator for context settings
+ separator1 = QFrame()
+ separator1.setFrameShape(QFrame.HLine)
+ separator1.setFrameShadow(QFrame.Sunken)
+ settings_frame_layout.addWidget(separator1)
+
+ # Context and Full Page Mode Settings
+ context_frame = QGroupBox("🔄 Context & Translation Mode")
+ context_frame_font = QFont("Arial", 11)
+ context_frame_font.setBold(True)
+ context_frame.setFont(context_frame_font)
+ context_frame_layout = QVBoxLayout(context_frame)
+ context_frame_layout.setContentsMargins(10, 10, 10, 10)
+ context_frame_layout.setSpacing(10)
+
+ # Show current contextual settings from main GUI
+ context_info = QWidget()
+ context_info_layout = QVBoxLayout(context_info)
+ context_info_layout.setContentsMargins(0, 0, 0, 10)
+ context_info_layout.setSpacing(5)
+
+ context_title = QLabel("Main GUI Context Settings:")
+ title_font = QFont("Arial", 10)
+ title_font.setBold(True)
+ context_title.setFont(title_font)
+ context_info_layout.addWidget(context_title)
+
+ # Display current settings
+ settings_frame_display = QWidget()
+ settings_display_layout = QVBoxLayout(settings_frame_display)
+ settings_display_layout.setContentsMargins(20, 0, 0, 0)
+ settings_display_layout.setSpacing(3)
+
+ # Contextual enabled status
+ contextual_status = "Enabled" if self.main_gui.contextual_var.get() else "Disabled"
+ self.contextual_status_label = QLabel(f"• Contextual Translation: {contextual_status}")
+ status_font = QFont("Arial", 10)
+ self.contextual_status_label.setFont(status_font)
+ settings_display_layout.addWidget(self.contextual_status_label)
+
+ # History limit
+ history_limit = self.main_gui.trans_history.get() if hasattr(self.main_gui, 'trans_history') else "3"
+ self.history_limit_label = QLabel(f"• Translation History Limit: {history_limit} exchanges")
+ self.history_limit_label.setFont(status_font)
+ settings_display_layout.addWidget(self.history_limit_label)
+
+ # Rolling history status
+ rolling_status = "Enabled (Rolling Window)" if self.main_gui.translation_history_rolling_var.get() else "Disabled (Reset on Limit)"
+ self.rolling_status_label = QLabel(f"• Rolling History: {rolling_status}")
+ self.rolling_status_label.setFont(status_font)
+ settings_display_layout.addWidget(self.rolling_status_label)
+
+ context_info_layout.addWidget(settings_frame_display)
+ context_frame_layout.addWidget(context_info)
+
+ # Refresh button to update from main GUI
+ refresh_btn = QPushButton("↻ Refresh from Main GUI")
+ refresh_btn.clicked.connect(self._refresh_context_settings)
+ refresh_btn.setStyleSheet("QPushButton { background-color: #6c757d; color: white; padding: 5px 15px; }")
+ context_frame_layout.addWidget(refresh_btn)
+
+ # Separator
+ separator2 = QFrame()
+ separator2.setFrameShape(QFrame.HLine)
+ separator2.setFrameShadow(QFrame.Sunken)
+ context_frame_layout.addWidget(separator2)
+
+ # Full Page Context Translation Settings
+ full_page_frame = QWidget()
+ full_page_layout = QVBoxLayout(full_page_frame)
+ full_page_layout.setContentsMargins(0, 0, 0, 0)
+ full_page_layout.setSpacing(5)
+
+ full_page_title = QLabel("Full Page Context Mode (Manga-specific):")
+ title_font2 = QFont("Arial", 10)
+ title_font2.setBold(True)
+ full_page_title.setFont(title_font2)
+ full_page_layout.addWidget(full_page_title)
+
+ # Enable/disable toggle
+ self.full_page_context_checked = self.main_gui.config.get('manga_full_page_context', True)
+
+ toggle_frame = QWidget()
+ toggle_layout = QHBoxLayout(toggle_frame)
+ toggle_layout.setContentsMargins(20, 0, 0, 0)
+ toggle_layout.setSpacing(10)
+
+ self.context_checkbox = self._create_styled_checkbox("Enable Full Page Context Translation")
+ self.context_checkbox.setChecked(self.full_page_context_checked)
+ self.context_checkbox.stateChanged.connect(self._on_context_toggle)
+ toggle_layout.addWidget(self.context_checkbox)
+
+ # Edit prompt button
+ edit_prompt_btn = QPushButton("Edit Prompt")
+ edit_prompt_btn.clicked.connect(self._edit_context_prompt)
+ edit_prompt_btn.setStyleSheet("QPushButton { background-color: #6c757d; color: white; padding: 5px 15px; }")
+ toggle_layout.addWidget(edit_prompt_btn)
+
+ # Help button for full page context
+ help_btn = QPushButton("?")
+ help_btn.setFixedWidth(30)
+ help_btn.clicked.connect(lambda: self._show_help_dialog(
+ "Full Page Context Mode",
+ "Full page context sends all text regions from the page together in a single request.\n\n"
+ "This allows the AI to see all text at once for more contextually accurate translations, "
+ "especially useful for maintaining character name consistency and understanding "
+ "conversation flow across multiple speech bubbles.\n\n"
+ "✅ Pros:\n"
+ "• Better context awareness\n"
+ "• Consistent character names\n"
+ "• Understanding of conversation flow\n"
+ "• Maintains tone across bubbles\n\n"
+ "❌ Cons:\n"
+ "• Single API call failure affects all text\n"
+ "• May use more tokens\n"
+ "• Slower for pages with many text regions"
+ ))
+ help_btn.setStyleSheet("QPushButton { background-color: #17a2b8; color: white; padding: 5px; }")
+ toggle_layout.addWidget(help_btn)
+ toggle_layout.addStretch()
+
+ full_page_layout.addWidget(toggle_frame)
+ context_frame_layout.addWidget(full_page_frame)
+
+ # Separator
+ separator3 = QFrame()
+ separator3.setFrameShape(QFrame.HLine)
+ separator3.setFrameShadow(QFrame.Sunken)
+ context_frame_layout.addWidget(separator3)
+
+ # Visual Context Settings (for non-vision model support)
+ visual_frame = QWidget()
+ visual_layout = QVBoxLayout(visual_frame)
+ visual_layout.setContentsMargins(0, 0, 0, 0)
+ visual_layout.setSpacing(5)
+
+ visual_title = QLabel("Visual Context (Image Support):")
+ title_font3 = QFont("Arial", 10)
+ title_font3.setBold(True)
+ visual_title.setFont(title_font3)
+ visual_layout.addWidget(visual_title)
+
+ # Visual context toggle
+ self.visual_context_enabled_checked = self.main_gui.config.get('manga_visual_context_enabled', True)
+
+ visual_toggle_frame = QWidget()
+ visual_toggle_layout = QHBoxLayout(visual_toggle_frame)
+ visual_toggle_layout.setContentsMargins(20, 0, 0, 0)
+ visual_toggle_layout.setSpacing(10)
+
+ self.visual_context_checkbox = self._create_styled_checkbox("Include page image in translation requests")
+ self.visual_context_checkbox.setChecked(self.visual_context_enabled_checked)
+ self.visual_context_checkbox.stateChanged.connect(self._on_visual_context_toggle)
+ visual_toggle_layout.addWidget(self.visual_context_checkbox)
+
+ # Help button for visual context
+ visual_help_btn = QPushButton("?")
+ visual_help_btn.setFixedWidth(30)
+ visual_help_btn.clicked.connect(lambda: self._show_help_dialog(
+ "Visual Context Settings",
+ "Visual context includes the manga page image with translation requests.\n\n"
+ "⚠️ WHEN TO DISABLE:\n"
+ "• Using text-only models (Claude, GPT-3.5, standard Gemini)\n"
+ "• Model doesn't support images\n"
+ "• Want to reduce token usage\n"
+ "• Testing text-only translation\n\n"
+ "✅ WHEN TO ENABLE:\n"
+ "• Using vision models (Gemini Vision, GPT-4V, Claude 3)\n"
+ "• Want spatial awareness of text position\n"
+ "• Need visual context for better translation\n\n"
+ "Impact:\n"
+ "• Disabled: Only text is sent (compatible with any model)\n"
+ "• Enabled: Text + image sent (requires vision model)\n\n"
+ "Note: Disabling may reduce translation quality as the AI won't see\n"
+ "the artwork context or spatial layout of the text."
+ ))
+ visual_help_btn.setStyleSheet("QPushButton { background-color: #17a2b8; color: white; padding: 5px; }")
+ visual_toggle_layout.addWidget(visual_help_btn)
+ visual_toggle_layout.addStretch()
+
+ visual_layout.addWidget(visual_toggle_frame)
+
+ # Output settings - moved here to be below visual context
+ output_settings_frame = QWidget()
+ output_settings_layout = QHBoxLayout(output_settings_frame)
+ output_settings_layout.setContentsMargins(20, 10, 0, 0)
+ output_settings_layout.setSpacing(10)
+
+ self.create_subfolder_checkbox = self._create_styled_checkbox("Create 'translated' subfolder for output")
+ self.create_subfolder_checkbox.setChecked(self.main_gui.config.get('manga_create_subfolder', True))
+ self.create_subfolder_checkbox.stateChanged.connect(self._save_rendering_settings)
+ output_settings_layout.addWidget(self.create_subfolder_checkbox)
+ output_settings_layout.addStretch()
+
+ visual_layout.addWidget(output_settings_frame)
+
+ context_frame_layout.addWidget(visual_frame)
+
+ # Add the completed context_frame to settings_frame
+ settings_frame_layout.addWidget(context_frame)
+
+ # Add main settings frame to left column
+ left_column_layout.addWidget(settings_frame)
+
+ # Text Rendering Settings Frame - SPLIT BETWEEN COLUMNS
+ render_frame = QGroupBox("Text Visibility Settings")
+ render_frame_font = QFont("Arial", 12)
+ render_frame_font.setBold(True)
+ render_frame.setFont(render_frame_font)
+ render_frame_layout = QVBoxLayout(render_frame)
+ render_frame_layout.setContentsMargins(15, 15, 15, 10)
+ render_frame_layout.setSpacing(10)
+
+ # Inpainting section
+ inpaint_group = QGroupBox("Inpainting")
+ inpaint_group_font = QFont("Arial", 11)
+ inpaint_group_font.setBold(True)
+ inpaint_group.setFont(inpaint_group_font)
+ inpaint_group_layout = QVBoxLayout(inpaint_group)
+ inpaint_group_layout.setContentsMargins(15, 15, 15, 10)
+ inpaint_group_layout.setSpacing(10)
+
+ # Skip inpainting toggle - use value loaded from config
+ self.skip_inpainting_checkbox = self._create_styled_checkbox("Skip Inpainter")
+ self.skip_inpainting_checkbox.setChecked(self.skip_inpainting_value)
+ self.skip_inpainting_checkbox.stateChanged.connect(self._toggle_inpaint_visibility)
+ inpaint_group_layout.addWidget(self.skip_inpainting_checkbox)
+
+ # Inpainting method selection (only visible when inpainting is enabled)
+ self.inpaint_method_frame = QWidget()
+ inpaint_method_layout = QHBoxLayout(self.inpaint_method_frame)
+ inpaint_method_layout.setContentsMargins(0, 0, 0, 0)
+ inpaint_method_layout.setSpacing(10)
+
+ method_label = QLabel("Inpaint Method:")
+ method_label_font = QFont('Arial', 9)
+ method_label.setFont(method_label_font)
+ method_label.setMinimumWidth(95)
+ method_label.setAlignment(Qt.AlignLeft)
+ inpaint_method_layout.addWidget(method_label)
+
+ # Radio buttons for inpaint method
+ method_selection_frame = QWidget()
+ method_selection_layout = QHBoxLayout(method_selection_frame)
+ method_selection_layout.setContentsMargins(0, 0, 0, 0)
+ method_selection_layout.setSpacing(10)
+
+ self.inpaint_method_value = self.main_gui.config.get('manga_inpaint_method', 'local')
+ self.inpaint_method_group = QButtonGroup()
+
+ # Set smaller font for radio buttons
+ radio_font = QFont('Arial', 9)
+
+ cloud_radio = QRadioButton("Cloud API")
+ cloud_radio.setFont(radio_font)
+ cloud_radio.setChecked(self.inpaint_method_value == 'cloud')
+ cloud_radio.toggled.connect(lambda checked: self._on_inpaint_method_change() if checked else None)
+ self.inpaint_method_group.addButton(cloud_radio, 0)
+ method_selection_layout.addWidget(cloud_radio)
+
+ local_radio = QRadioButton("Local Model")
+ local_radio.setFont(radio_font)
+ local_radio.setChecked(self.inpaint_method_value == 'local')
+ local_radio.toggled.connect(lambda checked: self._on_inpaint_method_change() if checked else None)
+ self.inpaint_method_group.addButton(local_radio, 1)
+ method_selection_layout.addWidget(local_radio)
+
+ hybrid_radio = QRadioButton("Hybrid")
+ hybrid_radio.setFont(radio_font)
+ hybrid_radio.setChecked(self.inpaint_method_value == 'hybrid')
+ hybrid_radio.toggled.connect(lambda checked: self._on_inpaint_method_change() if checked else None)
+ self.inpaint_method_group.addButton(hybrid_radio, 2)
+ method_selection_layout.addWidget(hybrid_radio)
+
+ # Store references to radio buttons
+ self.cloud_radio = cloud_radio
+ self.local_radio = local_radio
+ self.hybrid_radio = hybrid_radio
+
+ inpaint_method_layout.addWidget(method_selection_frame)
+ inpaint_method_layout.addStretch()
+ inpaint_group_layout.addWidget(self.inpaint_method_frame)
+
+ # Cloud settings frame
+ self.cloud_inpaint_frame = QWidget()
+ # Ensure this widget doesn't become a window
+ self.cloud_inpaint_frame.setWindowFlags(Qt.WindowType.Widget)
+ cloud_inpaint_layout = QVBoxLayout(self.cloud_inpaint_frame)
+ cloud_inpaint_layout.setContentsMargins(0, 0, 0, 0)
+ cloud_inpaint_layout.setSpacing(5)
+
+ # Quality selection for cloud
+ quality_frame = QWidget()
+ quality_layout = QHBoxLayout(quality_frame)
+ quality_layout.setContentsMargins(0, 0, 0, 0)
+ quality_layout.setSpacing(10)
+
+ quality_label = QLabel("Cloud Quality:")
+ quality_label_font = QFont('Arial', 9)
+ quality_label.setFont(quality_label_font)
+ quality_label.setMinimumWidth(95)
+ quality_label.setAlignment(Qt.AlignLeft)
+ quality_layout.addWidget(quality_label)
+
+ # inpaint_quality_value is already loaded from config in _load_rendering_settings
+ self.quality_button_group = QButtonGroup()
+
+ quality_options = [('high', 'High Quality'), ('fast', 'Fast')]
+ for idx, (value, text) in enumerate(quality_options):
+ quality_radio = QRadioButton(text)
+ quality_radio.setChecked(self.inpaint_quality_value == value)
+ quality_radio.toggled.connect(lambda checked, v=value: self._save_rendering_settings() if checked else None)
+ self.quality_button_group.addButton(quality_radio, idx)
+ quality_layout.addWidget(quality_radio)
+
+ quality_layout.addStretch()
+ cloud_inpaint_layout.addWidget(quality_frame)
+
+ # Conditional separator
+ self.inpaint_separator = QFrame()
+ self.inpaint_separator.setFrameShape(QFrame.HLine)
+ self.inpaint_separator.setFrameShadow(QFrame.Sunken)
+ if not self.skip_inpainting_value:
+ cloud_inpaint_layout.addWidget(self.inpaint_separator)
+
+ # Cloud API status
+ api_status_frame = QWidget()
+ api_status_layout = QHBoxLayout(api_status_frame)
+ api_status_layout.setContentsMargins(0, 10, 0, 0)
+ api_status_layout.setSpacing(10)
+
+ # Check if API key exists
+ saved_api_key = self.main_gui.config.get('replicate_api_key', '')
+ if saved_api_key:
+ status_text = "✅ Cloud API configured"
+ status_color = 'green'
+ else:
+ status_text = "❌ Cloud API not configured"
+ status_color = 'red'
+
+ self.inpaint_api_status_label = QLabel(status_text)
+ api_status_font = QFont('Arial', 9)
+ self.inpaint_api_status_label.setFont(api_status_font)
+ self.inpaint_api_status_label.setStyleSheet(f"color: {status_color};")
+ api_status_layout.addWidget(self.inpaint_api_status_label)
+
+ configure_api_btn = QPushButton("Configure API Key")
+ configure_api_btn.clicked.connect(self._configure_inpaint_api)
+ configure_api_btn.setStyleSheet("QPushButton { background-color: #17a2b8; color: white; padding: 5px 15px; }")
+ api_status_layout.addWidget(configure_api_btn)
+
+ if saved_api_key:
+ clear_api_btn = QPushButton("Clear")
+ clear_api_btn.clicked.connect(self._clear_inpaint_api)
+ clear_api_btn.setStyleSheet("QPushButton { background-color: #6c757d; color: white; padding: 5px 15px; }")
+ api_status_layout.addWidget(clear_api_btn)
+
+ api_status_layout.addStretch()
+ cloud_inpaint_layout.addWidget(api_status_frame)
+ inpaint_group_layout.addWidget(self.cloud_inpaint_frame)
+
+ # Local inpainting settings frame
+ self.local_inpaint_frame = QWidget()
+ # Ensure this widget doesn't become a window
+ self.local_inpaint_frame.setWindowFlags(Qt.WindowType.Widget)
+ local_inpaint_layout = QVBoxLayout(self.local_inpaint_frame)
+ local_inpaint_layout.setContentsMargins(0, 0, 0, 0)
+ local_inpaint_layout.setSpacing(5)
+
+ # Local model selection
+ local_model_frame = QWidget()
+ local_model_layout = QHBoxLayout(local_model_frame)
+ local_model_layout.setContentsMargins(0, 0, 0, 0)
+ local_model_layout.setSpacing(10)
+
+ local_model_label = QLabel("Local Model:")
+ local_model_label_font = QFont('Arial', 9)
+ local_model_label.setFont(local_model_label_font)
+ local_model_label.setMinimumWidth(95)
+ local_model_label.setAlignment(Qt.AlignLeft)
+ local_model_layout.addWidget(local_model_label)
+ self.local_model_label = local_model_label
+
+ self.local_model_type_value = self.main_gui.config.get('manga_local_inpaint_model', 'anime_onnx')
+ local_model_combo = QComboBox()
+ local_model_combo.addItems(['aot', 'aot_onnx', 'lama', 'lama_onnx', 'anime', 'anime_onnx', 'mat', 'ollama', 'sd_local'])
+ local_model_combo.setCurrentText(self.local_model_type_value)
+ local_model_combo.setMinimumWidth(120)
+ local_model_combo.setMaximumWidth(120)
+ local_combo_font = QFont('Arial', 9)
+ local_model_combo.setFont(local_combo_font)
+ local_model_combo.currentTextChanged.connect(self._on_local_model_change)
+ self._disable_combobox_mousewheel(local_model_combo) # Disable mousewheel scrolling
+ local_model_layout.addWidget(local_model_combo)
+ self.local_model_combo = local_model_combo
+
+ # Model descriptions
+ model_desc = {
+ 'lama': 'LaMa (Best quality)',
+ 'aot': 'AOT GAN (Fast)',
+ 'aot_onnx': 'AOT ONNX (Optimized)',
+ 'mat': 'MAT (High-res)',
+ 'sd_local': 'Stable Diffusion (Anime)',
+ 'anime': 'Anime/Manga Inpainting',
+ 'anime_onnx': 'Anime ONNX (Fast/Optimized)',
+ 'lama_onnx': 'LaMa ONNX (Optimized)',
+ }
+ self.model_desc_label = QLabel(model_desc.get(self.local_model_type_value, ''))
+ desc_font = QFont('Arial', 8)
+ self.model_desc_label.setFont(desc_font)
+ self.model_desc_label.setStyleSheet("color: gray;")
+ self.model_desc_label.setMaximumWidth(200)
+ local_model_layout.addWidget(self.model_desc_label)
+ local_model_layout.addStretch()
+
+ local_inpaint_layout.addWidget(local_model_frame)
+
+ # Model file selection
+ model_path_frame = QWidget()
+ model_path_layout = QHBoxLayout(model_path_frame)
+ model_path_layout.setContentsMargins(0, 5, 0, 0)
+ model_path_layout.setSpacing(10)
+
+ model_file_label = QLabel("Model File:")
+ model_file_label_font = QFont('Arial', 9)
+ model_file_label.setFont(model_file_label_font)
+ model_file_label.setMinimumWidth(95)
+ model_file_label.setAlignment(Qt.AlignLeft)
+ model_path_layout.addWidget(model_file_label)
+ self.model_file_label = model_file_label
+
+ self.local_model_path_value = self.main_gui.config.get(f'manga_{self.local_model_type_value}_model_path', '')
+ self.local_model_entry = QLineEdit(self.local_model_path_value)
+ self.local_model_entry.setReadOnly(True)
+ self.local_model_entry.setMinimumWidth(100) # Reduced for better fit
+ self.local_model_entry.setSizePolicy(QSizePolicy.Policy.Expanding, QSizePolicy.Policy.Fixed)
+ self.local_model_entry.setStyleSheet(
+ "QLineEdit { background-color: #2b2b2b; color: #ffffff; }"
+ )
+ model_path_layout.addWidget(self.local_model_entry)
+
+ browse_model_btn = QPushButton("Browse")
+ browse_model_btn.clicked.connect(self._browse_local_model)
+ browse_model_btn.setStyleSheet("QPushButton { background-color: #007bff; color: white; padding: 5px 15px; }")
+ model_path_layout.addWidget(browse_model_btn)
+ self.browse_model_btn = browse_model_btn
+
+ # Manual load button to avoid auto-loading on dialog open
+ load_model_btn = QPushButton("Load")
+ load_model_btn.clicked.connect(self._click_load_local_model)
+ load_model_btn.setStyleSheet("QPushButton { background-color: #28a745; color: white; padding: 5px 15px; }")
+ model_path_layout.addWidget(load_model_btn)
+ self.load_model_btn = load_model_btn
+ model_path_layout.addStretch()
+
+ local_inpaint_layout.addWidget(model_path_frame)
+
+ # Model status
+ self.local_model_status_label = QLabel("")
+ status_font = QFont('Arial', 9)
+ self.local_model_status_label.setFont(status_font)
+ local_inpaint_layout.addWidget(self.local_model_status_label)
+
+ # Download model button
+ download_model_btn = QPushButton("📥 Download Model")
+ download_model_btn.clicked.connect(self._download_model)
+ download_model_btn.setStyleSheet("QPushButton { background-color: #17a2b8; color: white; padding: 5px 15px; }")
+ local_inpaint_layout.addWidget(download_model_btn)
+
+ # Model info button
+ model_info_btn = QPushButton("ℹ️ Model Info")
+ model_info_btn.clicked.connect(self._show_model_info)
+ model_info_btn.setStyleSheet("QPushButton { background-color: #6c757d; color: white; padding: 5px 15px; }")
+ local_inpaint_layout.addWidget(model_info_btn)
+
+ # Add local_inpaint_frame to inpaint_group
+ inpaint_group_layout.addWidget(self.local_inpaint_frame)
+
+ # Hide both frames by default to prevent window popup
+ self.cloud_inpaint_frame.hide()
+ self.local_inpaint_frame.hide()
+
+ # Try to load saved model for current type on dialog open
+ initial_model_type = self.local_model_type_value
+ initial_model_path = self.main_gui.config.get(f'manga_{initial_model_type}_model_path', '')
+
+ if initial_model_path and os.path.exists(initial_model_path):
+ self.local_model_entry.setText(initial_model_path)
+ if getattr(self, 'preload_local_models_on_open', False):
+ self.local_model_status_label.setText("⏳ Loading saved model...")
+ self.local_model_status_label.setStyleSheet("color: orange;")
+ # Auto-load after dialog is ready
+ QTimer.singleShot(500, lambda: self._try_load_model(initial_model_type, initial_model_path))
+ else:
+ # Do not auto-load large models at startup to avoid crashes on some systems
+ self.local_model_status_label.setText("💤 Saved model detected (not loaded). Click 'Load' to initialize.")
+ self.local_model_status_label.setStyleSheet("color: blue;")
+ else:
+ self.local_model_status_label.setText("No model loaded")
+ self.local_model_status_label.setStyleSheet("color: gray;")
+
+ # Initialize visibility based on current settings
+ self._toggle_inpaint_visibility()
+
+ # Add inpaint_group to render_frame
+ render_frame_layout.addWidget(inpaint_group)
+
+ # Add render_frame (inpainting only) to LEFT COLUMN
+ left_column_layout.addWidget(render_frame)
+
+ # Advanced Settings button at the TOP OF RIGHT COLUMN
+ advanced_button_frame = QWidget()
+ advanced_button_layout = QHBoxLayout(advanced_button_frame)
+ advanced_button_layout.setContentsMargins(0, 0, 0, 10)
+ advanced_button_layout.setSpacing(10)
+
+ advanced_settings_desc = QLabel("Configure OCR, preprocessing, and performance options")
+ desc_font = QFont("Arial", 9)
+ advanced_settings_desc.setFont(desc_font)
+ advanced_settings_desc.setStyleSheet("color: gray;")
+ advanced_button_layout.addWidget(advanced_settings_desc)
+
+ advanced_button_layout.addStretch()
+
+ advanced_settings_btn = QPushButton("⚙️ Advanced Settings")
+ advanced_settings_btn.clicked.connect(self._open_advanced_settings)
+ advanced_settings_btn.setStyleSheet("QPushButton { background-color: #17a2b8; color: white; padding: 5px 15px; }")
+ advanced_button_layout.addWidget(advanced_settings_btn)
+
+ right_column_layout.addWidget(advanced_button_frame)
+
+ # Background Settings - MOVED TO RIGHT COLUMN
+ self.bg_settings_frame = QGroupBox("Background Settings")
+ bg_settings_font = QFont("Arial", 10)
+ bg_settings_font.setBold(True)
+ self.bg_settings_frame.setFont(bg_settings_font)
+ bg_settings_layout = QVBoxLayout(self.bg_settings_frame)
+ bg_settings_layout.setContentsMargins(10, 10, 10, 10)
+ bg_settings_layout.setSpacing(8)
+
+ # Free text only background opacity toggle (applies BG opacity only to free-text regions)
+ self.ft_only_checkbox = self._create_styled_checkbox("Free text only background opacity")
+ self.ft_only_checkbox.setChecked(self.free_text_only_bg_opacity_value)
+ # Connect directly to save+apply (working pattern)
+ self.ft_only_checkbox.stateChanged.connect(lambda: (self._on_ft_only_bg_opacity_changed(), self._save_rendering_settings(), self._apply_rendering_settings()))
+ bg_settings_layout.addWidget(self.ft_only_checkbox)
+
+ # Background opacity slider
+ opacity_frame = QWidget()
+ opacity_layout = QHBoxLayout(opacity_frame)
+ opacity_layout.setContentsMargins(0, 5, 0, 5)
+ opacity_layout.setSpacing(10)
+
+ opacity_label_text = QLabel("Background Opacity:")
+ opacity_label_text.setMinimumWidth(150)
+ opacity_layout.addWidget(opacity_label_text)
+
+ self.opacity_slider = QSlider(Qt.Horizontal)
+ self.opacity_slider.setMinimum(0)
+ self.opacity_slider.setMaximum(255)
+ self.opacity_slider.setValue(self.bg_opacity_value)
+ self.opacity_slider.setMinimumWidth(200)
+ self.opacity_slider.valueChanged.connect(lambda value: (self._update_opacity_label(value), self._save_rendering_settings(), self._apply_rendering_settings()))
+ opacity_layout.addWidget(self.opacity_slider)
+
+ self.opacity_label = QLabel("100%")
+ self.opacity_label.setMinimumWidth(50)
+ opacity_layout.addWidget(self.opacity_label)
+ opacity_layout.addStretch()
+
+ bg_settings_layout.addWidget(opacity_frame)
+
+ # Initialize the label with the loaded value
+ self._update_opacity_label(self.bg_opacity_value)
+
+ # Background size reduction
+ reduction_frame = QWidget()
+ reduction_layout = QHBoxLayout(reduction_frame)
+ reduction_layout.setContentsMargins(0, 5, 0, 5)
+ reduction_layout.setSpacing(10)
+
+ reduction_label_text = QLabel("Background Size:")
+ reduction_label_text.setMinimumWidth(150)
+ reduction_layout.addWidget(reduction_label_text)
+
+ self.reduction_slider = QDoubleSpinBox()
+ self.reduction_slider.setMinimum(0.5)
+ self.reduction_slider.setMaximum(2.0)
+ self.reduction_slider.setSingleStep(0.05)
+ self.reduction_slider.setValue(self.bg_reduction_value)
+ self.reduction_slider.setMinimumWidth(100)
+ self.reduction_slider.valueChanged.connect(lambda value: (self._update_reduction_label(value), self._save_rendering_settings(), self._apply_rendering_settings()))
+ self._disable_spinbox_mousewheel(self.reduction_slider)
+ reduction_layout.addWidget(self.reduction_slider)
+
+ self.reduction_label = QLabel("100%")
+ self.reduction_label.setMinimumWidth(50)
+ reduction_layout.addWidget(self.reduction_label)
+ reduction_layout.addStretch()
+
+ bg_settings_layout.addWidget(reduction_frame)
+
+ # Initialize the label with the loaded value
+ self._update_reduction_label(self.bg_reduction_value)
+
+ # Background style selection
+ style_frame = QWidget()
+ style_layout = QHBoxLayout(style_frame)
+ style_layout.setContentsMargins(0, 5, 0, 5)
+ style_layout.setSpacing(10)
+
+ style_label = QLabel("Background Style:")
+ style_label.setMinimumWidth(150)
+ style_layout.addWidget(style_label)
+
+ # Radio buttons for background style
+ self.bg_style_group = QButtonGroup()
+
+ box_radio = QRadioButton("Box")
+ box_radio.setChecked(self.bg_style_value == "box")
+ box_radio.toggled.connect(lambda checked: (setattr(self, 'bg_style_value', 'box'), self._save_rendering_settings(), self._apply_rendering_settings()) if checked else None)
+ self.bg_style_group.addButton(box_radio, 0)
+ style_layout.addWidget(box_radio)
+
+ circle_radio = QRadioButton("Circle")
+ circle_radio.setChecked(self.bg_style_value == "circle")
+ circle_radio.toggled.connect(lambda checked: (setattr(self, 'bg_style_value', 'circle'), self._save_rendering_settings(), self._apply_rendering_settings()) if checked else None)
+ self.bg_style_group.addButton(circle_radio, 1)
+ style_layout.addWidget(circle_radio)
+
+ wrap_radio = QRadioButton("Wrap")
+ wrap_radio.setChecked(self.bg_style_value == "wrap")
+ wrap_radio.toggled.connect(lambda checked: (setattr(self, 'bg_style_value', 'wrap'), self._save_rendering_settings(), self._apply_rendering_settings()) if checked else None)
+ self.bg_style_group.addButton(wrap_radio, 2)
+ style_layout.addWidget(wrap_radio)
+
+ # Store references
+ self.box_radio = box_radio
+ self.circle_radio = circle_radio
+ self.wrap_radio = wrap_radio
+
+ # Add tooltips or descriptions
+ style_help = QLabel("(Box: rounded rectangle, Circle: ellipse, Wrap: per-line)")
+ style_help_font = QFont('Arial', 9)
+ style_help.setFont(style_help_font)
+ style_help.setStyleSheet("color: gray;")
+ style_layout.addWidget(style_help)
+ style_layout.addStretch()
+
+ bg_settings_layout.addWidget(style_frame)
+
+ # Add Background Settings to RIGHT COLUMN
+ right_column_layout.addWidget(self.bg_settings_frame)
+
+ # Font Settings group (consolidated) - GOES TO RIGHT COLUMN (after background settings)
+ font_render_frame = QGroupBox("Font & Text Settings")
+ font_render_frame_font = QFont("Arial", 10)
+ font_render_frame_font.setBold(True)
+ font_render_frame.setFont(font_render_frame_font)
+ font_render_frame_layout = QVBoxLayout(font_render_frame)
+ font_render_frame_layout.setContentsMargins(15, 15, 15, 10)
+ font_render_frame_layout.setSpacing(10)
+ self.sizing_group = QGroupBox("Font Settings")
+ sizing_group_font = QFont("Arial", 9)
+ sizing_group_font.setBold(True)
+ self.sizing_group.setFont(sizing_group_font)
+ sizing_group_layout = QVBoxLayout(self.sizing_group)
+ sizing_group_layout.setContentsMargins(10, 10, 10, 10)
+ sizing_group_layout.setSpacing(8)
+
+ # Font sizing algorithm selection
+ algo_frame = QWidget()
+ algo_layout = QHBoxLayout(algo_frame)
+ algo_layout.setContentsMargins(0, 6, 0, 0)
+ algo_layout.setSpacing(10)
+
+ algo_label = QLabel("Font Size Algorithm:")
+ algo_label.setMinimumWidth(150)
+ algo_layout.addWidget(algo_label)
+
+ # Radio buttons for algorithm selection
+ self.font_algorithm_group = QButtonGroup()
+
+ for idx, (value, text) in enumerate([
+ ('conservative', 'Conservative'),
+ ('smart', 'Smart'),
+ ('aggressive', 'Aggressive')
+ ]):
+ rb = QRadioButton(text)
+ rb.setChecked(self.font_algorithm_value == value)
+ rb.toggled.connect(lambda checked, v=value: (setattr(self, 'font_algorithm_value', v), self._save_rendering_settings(), self._apply_rendering_settings()) if checked else None)
+ self.font_algorithm_group.addButton(rb, idx)
+ algo_layout.addWidget(rb)
+
+ algo_layout.addStretch()
+ sizing_group_layout.addWidget(algo_frame)
+
+ # Font size selection with mode toggle
+ font_frame_container = QWidget()
+ font_frame_layout = QVBoxLayout(font_frame_container)
+ font_frame_layout.setContentsMargins(0, 5, 0, 5)
+ font_frame_layout.setSpacing(10)
+
+ # Mode selection frame
+ mode_frame = QWidget()
+ mode_layout = QHBoxLayout(mode_frame)
+ mode_layout.setContentsMargins(0, 0, 0, 0)
+ mode_layout.setSpacing(10)
+
+ mode_label = QLabel("Font Size Mode:")
+ mode_label.setMinimumWidth(150)
+ mode_layout.addWidget(mode_label)
+
+ # Radio buttons for mode selection
+ self.font_size_mode_group = QButtonGroup()
+
+ auto_radio = QRadioButton("Auto")
+ auto_radio.setChecked(self.font_size_mode_value == "auto")
+ auto_radio.toggled.connect(lambda checked: (setattr(self, 'font_size_mode_value', 'auto'), self._toggle_font_size_mode()) if checked else None)
+ self.font_size_mode_group.addButton(auto_radio, 0)
+ mode_layout.addWidget(auto_radio)
+
+ fixed_radio = QRadioButton("Fixed Size")
+ fixed_radio.setChecked(self.font_size_mode_value == "fixed")
+ fixed_radio.toggled.connect(lambda checked: (setattr(self, 'font_size_mode_value', 'fixed'), self._toggle_font_size_mode()) if checked else None)
+ self.font_size_mode_group.addButton(fixed_radio, 1)
+ mode_layout.addWidget(fixed_radio)
+
+ multiplier_radio = QRadioButton("Dynamic Multiplier")
+ multiplier_radio.setChecked(self.font_size_mode_value == "multiplier")
+ multiplier_radio.toggled.connect(lambda checked: (setattr(self, 'font_size_mode_value', 'multiplier'), self._toggle_font_size_mode()) if checked else None)
+ self.font_size_mode_group.addButton(multiplier_radio, 2)
+ mode_layout.addWidget(multiplier_radio)
+
+ # Store references
+ self.auto_mode_radio = auto_radio
+ self.fixed_mode_radio = fixed_radio
+ self.multiplier_mode_radio = multiplier_radio
+
+ mode_layout.addStretch()
+ font_frame_layout.addWidget(mode_frame)
+
+ # Fixed font size frame
+ self.fixed_size_frame = QWidget()
+ fixed_size_layout = QHBoxLayout(self.fixed_size_frame)
+ fixed_size_layout.setContentsMargins(0, 8, 0, 0)
+ fixed_size_layout.setSpacing(10)
+
+ fixed_size_label = QLabel("Font Size:")
+ fixed_size_label.setMinimumWidth(150)
+ fixed_size_layout.addWidget(fixed_size_label)
+
+ self.font_size_spinbox = QSpinBox()
+ self.font_size_spinbox.setMinimum(0)
+ self.font_size_spinbox.setMaximum(72)
+ self.font_size_spinbox.setValue(self.font_size_value)
+ self.font_size_spinbox.setMinimumWidth(100)
+ self.font_size_spinbox.valueChanged.connect(lambda value: (setattr(self, 'font_size_value', value), self._save_rendering_settings(), self._apply_rendering_settings()))
+ self._disable_spinbox_mousewheel(self.font_size_spinbox)
+ fixed_size_layout.addWidget(self.font_size_spinbox)
+
+ fixed_help_label = QLabel("(0 = Auto)")
+ fixed_help_font = QFont('Arial', 9)
+ fixed_help_label.setFont(fixed_help_font)
+ fixed_help_label.setStyleSheet("color: gray;")
+ fixed_size_layout.addWidget(fixed_help_label)
+ fixed_size_layout.addStretch()
+
+ font_frame_layout.addWidget(self.fixed_size_frame)
+
+ # Dynamic multiplier frame
+ self.multiplier_frame = QWidget()
+ multiplier_layout = QHBoxLayout(self.multiplier_frame)
+ multiplier_layout.setContentsMargins(0, 8, 0, 0)
+ multiplier_layout.setSpacing(10)
+
+ multiplier_label_text = QLabel("Size Multiplier:")
+ multiplier_label_text.setMinimumWidth(150)
+ multiplier_layout.addWidget(multiplier_label_text)
+
+ self.multiplier_slider = QDoubleSpinBox()
+ self.multiplier_slider.setMinimum(0.5)
+ self.multiplier_slider.setMaximum(2.0)
+ self.multiplier_slider.setSingleStep(0.1)
+ self.multiplier_slider.setValue(self.font_size_multiplier_value)
+ self.multiplier_slider.setMinimumWidth(100)
+ self.multiplier_slider.valueChanged.connect(lambda value: (self._update_multiplier_label(value), self._save_rendering_settings(), self._apply_rendering_settings()))
+ self._disable_spinbox_mousewheel(self.multiplier_slider)
+ multiplier_layout.addWidget(self.multiplier_slider)
+
+ self.multiplier_label = QLabel("1.0x")
+ self.multiplier_label.setMinimumWidth(50)
+ multiplier_layout.addWidget(self.multiplier_label)
+
+ multiplier_help_label = QLabel("(Scales with panel size)")
+ multiplier_help_font = QFont('Arial', 9)
+ multiplier_help_label.setFont(multiplier_help_font)
+ multiplier_help_label.setStyleSheet("color: gray;")
+ multiplier_layout.addWidget(multiplier_help_label)
+ multiplier_layout.addStretch()
+
+ font_frame_layout.addWidget(self.multiplier_frame)
+
+ # Constraint checkbox frame (only visible in multiplier mode)
+ self.constraint_frame = QWidget()
+ constraint_layout = QHBoxLayout(self.constraint_frame)
+ constraint_layout.setContentsMargins(20, 0, 0, 0)
+ constraint_layout.setSpacing(10)
+
+ self.constrain_checkbox = self._create_styled_checkbox("Constrain text to bubble boundaries")
+ self.constrain_checkbox.setChecked(self.constrain_to_bubble_value)
+ self.constrain_checkbox.stateChanged.connect(lambda: (setattr(self, 'constrain_to_bubble_value', self.constrain_checkbox.isChecked()), self._save_rendering_settings(), self._apply_rendering_settings()))
+ constraint_layout.addWidget(self.constrain_checkbox)
+
+ constraint_help_label = QLabel("(Unchecked allows text to exceed bubbles)")
+ constraint_help_font = QFont('Arial', 9)
+ constraint_help_label.setFont(constraint_help_font)
+ constraint_help_label.setStyleSheet("color: gray;")
+ constraint_layout.addWidget(constraint_help_label)
+ constraint_layout.addStretch()
+
+ font_frame_layout.addWidget(self.constraint_frame)
+
+ # Add font_frame_container to sizing_group_layout
+ sizing_group_layout.addWidget(font_frame_container)
+
+ # Minimum Font Size (Auto mode lower bound)
+ self.min_size_frame = QWidget()
+ min_size_layout = QHBoxLayout(self.min_size_frame)
+ min_size_layout.setContentsMargins(0, 5, 0, 5)
+ min_size_layout.setSpacing(10)
+
+ min_size_label = QLabel("Minimum Font Size:")
+ min_size_label.setMinimumWidth(150)
+ min_size_layout.addWidget(min_size_label)
+
+ self.min_size_spinbox = QSpinBox()
+ self.min_size_spinbox.setMinimum(8)
+ self.min_size_spinbox.setMaximum(20)
+ self.min_size_spinbox.setValue(self.auto_min_size_value)
+ self.min_size_spinbox.setMinimumWidth(100)
+ self.min_size_spinbox.valueChanged.connect(lambda value: (setattr(self, 'auto_min_size_value', value), self._save_rendering_settings(), self._apply_rendering_settings()))
+ self._disable_spinbox_mousewheel(self.min_size_spinbox)
+ min_size_layout.addWidget(self.min_size_spinbox)
+
+ min_help_label = QLabel("(Auto mode won't go below this)")
+ min_help_font = QFont('Arial', 9)
+ min_help_label.setFont(min_help_font)
+ min_help_label.setStyleSheet("color: gray;")
+ min_size_layout.addWidget(min_help_label)
+ min_size_layout.addStretch()
+
+ sizing_group_layout.addWidget(self.min_size_frame)
+
+ # Maximum Font Size (Auto mode upper bound)
+ self.max_size_frame = QWidget()
+ max_size_layout = QHBoxLayout(self.max_size_frame)
+ max_size_layout.setContentsMargins(0, 5, 0, 5)
+ max_size_layout.setSpacing(10)
+
+ max_size_label = QLabel("Maximum Font Size:")
+ max_size_label.setMinimumWidth(150)
+ max_size_layout.addWidget(max_size_label)
+
+ self.max_size_spinbox = QSpinBox()
+ self.max_size_spinbox.setMinimum(20)
+ self.max_size_spinbox.setMaximum(100)
+ self.max_size_spinbox.setValue(self.max_font_size_value)
+ self.max_size_spinbox.setMinimumWidth(100)
+ self.max_size_spinbox.valueChanged.connect(lambda value: (setattr(self, 'max_font_size_value', value), self._save_rendering_settings(), self._apply_rendering_settings()))
+ self._disable_spinbox_mousewheel(self.max_size_spinbox)
+ max_size_layout.addWidget(self.max_size_spinbox)
+
+ max_help_label = QLabel("(Limits maximum text size)")
+ max_help_font = QFont('Arial', 9)
+ max_help_label.setFont(max_help_font)
+ max_help_label.setStyleSheet("color: gray;")
+ max_size_layout.addWidget(max_help_label)
+ max_size_layout.addStretch()
+
+ sizing_group_layout.addWidget(self.max_size_frame)
+
+ # Initialize visibility AFTER all frames are created
+ self._toggle_font_size_mode()
+
+ # Auto Fit Style (applies to Auto mode)
+ fit_row = QWidget()
+ fit_layout = QHBoxLayout(fit_row)
+ fit_layout.setContentsMargins(0, 0, 0, 6)
+ fit_layout.setSpacing(10)
+
+ fit_label = QLabel("Auto Fit Style:")
+ fit_label.setMinimumWidth(110)
+ fit_layout.addWidget(fit_label)
+
+ # Radio buttons for auto fit style
+ self.auto_fit_style_group = QButtonGroup()
+
+ for idx, (value, text) in enumerate([('compact','Compact'), ('balanced','Balanced'), ('readable','Readable')]):
+ rb = QRadioButton(text)
+ rb.setChecked(self.auto_fit_style_value == value)
+ rb.toggled.connect(lambda checked, v=value: (setattr(self, 'auto_fit_style_value', v), self._save_rendering_settings(), self._apply_rendering_settings()) if checked else None)
+ self.auto_fit_style_group.addButton(rb, idx)
+ fit_layout.addWidget(rb)
+
+ fit_layout.addStretch()
+ sizing_group_layout.addWidget(fit_row)
+
+ # Behavior toggles
+ self.prefer_larger_checkbox = self._create_styled_checkbox("Prefer larger text")
+ self.prefer_larger_checkbox.setChecked(self.prefer_larger_value)
+ self.prefer_larger_checkbox.stateChanged.connect(lambda: (setattr(self, 'prefer_larger_value', self.prefer_larger_checkbox.isChecked()), self._save_rendering_settings(), self._apply_rendering_settings()))
+ sizing_group_layout.addWidget(self.prefer_larger_checkbox)
+
+ self.bubble_size_factor_checkbox = self._create_styled_checkbox("Scale with bubble size")
+ self.bubble_size_factor_checkbox.setChecked(self.bubble_size_factor_value)
+ self.bubble_size_factor_checkbox.stateChanged.connect(lambda: (setattr(self, 'bubble_size_factor_value', self.bubble_size_factor_checkbox.isChecked()), self._save_rendering_settings(), self._apply_rendering_settings()))
+ sizing_group_layout.addWidget(self.bubble_size_factor_checkbox)
+
+ # Line Spacing row with live value label
+ row_ls = QWidget()
+ ls_layout = QHBoxLayout(row_ls)
+ ls_layout.setContentsMargins(0, 6, 0, 2)
+ ls_layout.setSpacing(10)
+
+ ls_label = QLabel("Line Spacing:")
+ ls_label.setMinimumWidth(110)
+ ls_layout.addWidget(ls_label)
+
+ self.line_spacing_spinbox = QDoubleSpinBox()
+ self.line_spacing_spinbox.setMinimum(1.0)
+ self.line_spacing_spinbox.setMaximum(2.0)
+ self.line_spacing_spinbox.setSingleStep(0.01)
+ self.line_spacing_spinbox.setValue(self.line_spacing_value)
+ self.line_spacing_spinbox.setMinimumWidth(100)
+ self.line_spacing_spinbox.valueChanged.connect(lambda value: (self._on_line_spacing_changed(value), self._save_rendering_settings(), self._apply_rendering_settings()))
+ self._disable_spinbox_mousewheel(self.line_spacing_spinbox)
+ ls_layout.addWidget(self.line_spacing_spinbox)
+
+ self.line_spacing_value_label = QLabel(f"{self.line_spacing_value:.2f}")
+ self.line_spacing_value_label.setMinimumWidth(50)
+ ls_layout.addWidget(self.line_spacing_value_label)
+ ls_layout.addStretch()
+
+ sizing_group_layout.addWidget(row_ls)
+
+ # Max Lines
+ row_ml = QWidget()
+ ml_layout = QHBoxLayout(row_ml)
+ ml_layout.setContentsMargins(0, 2, 0, 4)
+ ml_layout.setSpacing(10)
+
+ ml_label = QLabel("Max Lines:")
+ ml_label.setMinimumWidth(110)
+ ml_layout.addWidget(ml_label)
+
+ self.max_lines_spinbox = QSpinBox()
+ self.max_lines_spinbox.setMinimum(5)
+ self.max_lines_spinbox.setMaximum(20)
+ self.max_lines_spinbox.setValue(self.max_lines_value)
+ self.max_lines_spinbox.setMinimumWidth(100)
+ self.max_lines_spinbox.valueChanged.connect(lambda value: (setattr(self, 'max_lines_value', value), self._save_rendering_settings(), self._apply_rendering_settings()))
+ self._disable_spinbox_mousewheel(self.max_lines_spinbox)
+ ml_layout.addWidget(self.max_lines_spinbox)
+ ml_layout.addStretch()
+
+ sizing_group_layout.addWidget(row_ml)
+
+ # Quick Presets (horizontal) merged into sizing group
+ row_presets = QWidget()
+ presets_layout = QHBoxLayout(row_presets)
+ presets_layout.setContentsMargins(0, 6, 0, 2)
+ presets_layout.setSpacing(10)
+
+ presets_label = QLabel("Quick Presets:")
+ presets_label.setMinimumWidth(110)
+ presets_layout.addWidget(presets_label)
+
+ small_preset_btn = QPushButton("Small Bubbles")
+ small_preset_btn.setMinimumWidth(120)
+ small_preset_btn.clicked.connect(lambda: self._set_font_preset('small'))
+ presets_layout.addWidget(small_preset_btn)
+
+ balanced_preset_btn = QPushButton("Balanced")
+ balanced_preset_btn.setMinimumWidth(120)
+ balanced_preset_btn.clicked.connect(lambda: self._set_font_preset('balanced'))
+ presets_layout.addWidget(balanced_preset_btn)
+
+ large_preset_btn = QPushButton("Large Text")
+ large_preset_btn.setMinimumWidth(120)
+ large_preset_btn.clicked.connect(lambda: self._set_font_preset('large'))
+ presets_layout.addWidget(large_preset_btn)
+
+ presets_layout.addStretch()
+ sizing_group_layout.addWidget(row_presets)
+
+ # Text wrapping mode (moved into Font Settings)
+ wrap_frame = QWidget()
+ wrap_layout = QVBoxLayout(wrap_frame)
+ wrap_layout.setContentsMargins(0, 12, 0, 4)
+ wrap_layout.setSpacing(5)
+
+ self.strict_wrap_checkbox = self._create_styled_checkbox("Strict text wrapping (force text to fit within bubbles)")
+ self.strict_wrap_checkbox.setChecked(self.strict_text_wrapping_value)
+ self.strict_wrap_checkbox.stateChanged.connect(lambda: (setattr(self, 'strict_text_wrapping_value', self.strict_wrap_checkbox.isChecked()), self._save_rendering_settings(), self._apply_rendering_settings()))
+ wrap_layout.addWidget(self.strict_wrap_checkbox)
+
+ wrap_help_label = QLabel("(Break words with hyphens if needed)")
+ wrap_help_font = QFont('Arial', 9)
+ wrap_help_label.setFont(wrap_help_font)
+ wrap_help_label.setStyleSheet("color: gray; margin-left: 20px;")
+ wrap_layout.addWidget(wrap_help_label)
+
+ # Force CAPS LOCK directly below strict wrapping
+ self.force_caps_checkbox = self._create_styled_checkbox("Force CAPS LOCK")
+ self.force_caps_checkbox.setChecked(self.force_caps_lock_value)
+ self.force_caps_checkbox.stateChanged.connect(lambda: (setattr(self, 'force_caps_lock_value', self.force_caps_checkbox.isChecked()), self._save_rendering_settings(), self._apply_rendering_settings()))
+ wrap_layout.addWidget(self.force_caps_checkbox)
+
+ sizing_group_layout.addWidget(wrap_frame)
+
+ # Update multiplier label with loaded value
+ self._update_multiplier_label(self.font_size_multiplier_value)
+
+ # Add sizing_group to font_render_frame (right column)
+ font_render_frame_layout.addWidget(self.sizing_group)
+
+ # Font style selection (moved into Font Settings)
+ font_style_frame = QWidget()
+ font_style_layout = QHBoxLayout(font_style_frame)
+ font_style_layout.setContentsMargins(0, 6, 0, 4)
+ font_style_layout.setSpacing(10)
+
+ font_style_label = QLabel("Font Style:")
+ font_style_label.setMinimumWidth(110)
+ font_style_layout.addWidget(font_style_label)
+
+ # Font style will be set from loaded config in _load_rendering_settings
+ self.font_combo = QComboBox()
+ self.font_combo.addItems(self._get_available_fonts())
+ self.font_combo.setCurrentText(self.font_style_value)
+ self.font_combo.setMinimumWidth(120) # Reduced for better fit
+ self.font_combo.setSizePolicy(QSizePolicy.Policy.Expanding, QSizePolicy.Policy.Fixed)
+ self.font_combo.currentTextChanged.connect(lambda: (self._on_font_selected(), self._save_rendering_settings(), self._apply_rendering_settings()))
+ self._disable_combobox_mousewheel(self.font_combo) # Disable mousewheel scrolling
+ font_style_layout.addWidget(self.font_combo)
+ font_style_layout.addStretch()
+
+ font_render_frame_layout.addWidget(font_style_frame)
+
+ # Font color selection (moved into Font Settings)
+ color_frame = QWidget()
+ color_layout = QHBoxLayout(color_frame)
+ color_layout.setContentsMargins(0, 6, 0, 12)
+ color_layout.setSpacing(10)
+
+ color_label = QLabel("Font Color:")
+ color_label.setMinimumWidth(110)
+ color_layout.addWidget(color_label)
+
+ # Color preview frame
+ self.color_preview_frame = QFrame()
+ self.color_preview_frame.setFixedSize(40, 30)
+ self.color_preview_frame.setFrameShape(QFrame.Box)
+ self.color_preview_frame.setLineWidth(1)
+ # Initialize with current color
+ r, g, b = self.text_color_r_value, self.text_color_g_value, self.text_color_b_value
+ self.color_preview_frame.setStyleSheet(f"background-color: rgb({r},{g},{b}); border: 1px solid #5a9fd4;")
+ color_layout.addWidget(self.color_preview_frame)
+
+ # RGB display label
+ r, g, b = self.text_color_r_value, self.text_color_g_value, self.text_color_b_value
+ self.rgb_label = QLabel(f"RGB({r},{g},{b})")
+ self.rgb_label.setMinimumWidth(100)
+ color_layout.addWidget(self.rgb_label)
+
+ # Color picker button
+ def pick_font_color():
+ # Get current color
+ current_color = QColor(self.text_color_r_value, self.text_color_g_value, self.text_color_b_value)
+
+ # Open color dialog
+ color = QColorDialog.getColor(current_color, self.dialog, "Choose Font Color")
+ if color.isValid():
+ # Update RGB values
+ self.text_color_r_value = color.red()
+ self.text_color_g_value = color.green()
+ self.text_color_b_value = color.blue()
+ # Update display
+ self.rgb_label.setText(f"RGB({color.red()},{color.green()},{color.blue()})")
+ self._update_color_preview(None)
+ # Save settings to config
+ self._save_rendering_settings()
+
+ choose_color_btn = QPushButton("Choose Color")
+ choose_color_btn.clicked.connect(pick_font_color)
+ choose_color_btn.setStyleSheet("QPushButton { background-color: #17a2b8; color: white; padding: 5px 15px; }")
+ color_layout.addWidget(choose_color_btn)
+ color_layout.addStretch()
+
+ font_render_frame_layout.addWidget(color_frame)
+
+ self._update_color_preview(None) # Initialize with loaded colors
+
+ # Text Shadow settings (moved into Font Settings)
+ shadow_header = QWidget()
+ shadow_header_layout = QHBoxLayout(shadow_header)
+ shadow_header_layout.setContentsMargins(0, 4, 0, 4)
+
+ # Shadow enabled checkbox
+ self.shadow_enabled_checkbox = self._create_styled_checkbox("Enable Shadow")
+ self.shadow_enabled_checkbox.setChecked(self.shadow_enabled_value)
+ self.shadow_enabled_checkbox.stateChanged.connect(lambda: (setattr(self, 'shadow_enabled_value', self.shadow_enabled_checkbox.isChecked()), self._toggle_shadow_controls(), self._save_rendering_settings(), self._apply_rendering_settings()))
+ shadow_header_layout.addWidget(self.shadow_enabled_checkbox)
+ shadow_header_layout.addStretch()
+
+ font_render_frame_layout.addWidget(shadow_header)
+
+ # Shadow controls container
+ self.shadow_controls = QWidget()
+ shadow_controls_layout = QVBoxLayout(self.shadow_controls)
+ shadow_controls_layout.setContentsMargins(0, 2, 0, 6)
+ shadow_controls_layout.setSpacing(5)
+
+ # Shadow color
+ shadow_color_frame = QWidget()
+ shadow_color_layout = QHBoxLayout(shadow_color_frame)
+ shadow_color_layout.setContentsMargins(0, 2, 0, 8)
+ shadow_color_layout.setSpacing(10)
+
+ shadow_color_label = QLabel("Shadow Color:")
+ shadow_color_label.setMinimumWidth(110)
+ shadow_color_layout.addWidget(shadow_color_label)
+
+ # Shadow color preview
+ self.shadow_preview_frame = QFrame()
+ self.shadow_preview_frame.setFixedSize(30, 25)
+ self.shadow_preview_frame.setFrameShape(QFrame.Box)
+ self.shadow_preview_frame.setLineWidth(1)
+ # Initialize with current color
+ sr, sg, sb = self.shadow_color_r_value, self.shadow_color_g_value, self.shadow_color_b_value
+ self.shadow_preview_frame.setStyleSheet(f"background-color: rgb({sr},{sg},{sb}); border: 1px solid #5a9fd4;")
+ shadow_color_layout.addWidget(self.shadow_preview_frame)
+
+ # Shadow RGB display label
+ sr, sg, sb = self.shadow_color_r_value, self.shadow_color_g_value, self.shadow_color_b_value
+ self.shadow_rgb_label = QLabel(f"RGB({sr},{sg},{sb})")
+ self.shadow_rgb_label.setMinimumWidth(120)
+ shadow_color_layout.addWidget(self.shadow_rgb_label)
+
+ # Shadow color picker button
+ def pick_shadow_color():
+ # Get current color
+ current_color = QColor(self.shadow_color_r_value, self.shadow_color_g_value, self.shadow_color_b_value)
+
+ # Open color dialog
+ color = QColorDialog.getColor(current_color, self.dialog, "Choose Shadow Color")
+ if color.isValid():
+ # Update RGB values
+ self.shadow_color_r_value = color.red()
+ self.shadow_color_g_value = color.green()
+ self.shadow_color_b_value = color.blue()
+ # Update display
+ self.shadow_rgb_label.setText(f"RGB({color.red()},{color.green()},{color.blue()})")
+ self._update_shadow_preview(None)
+ # Save settings to config
+ self._save_rendering_settings()
+
+ choose_shadow_btn = QPushButton("Choose Color")
+ choose_shadow_btn.setMinimumWidth(120)
+ choose_shadow_btn.clicked.connect(pick_shadow_color)
+ choose_shadow_btn.setStyleSheet("QPushButton { background-color: #17a2b8; color: white; padding: 5px 15px; }")
+ shadow_color_layout.addWidget(choose_shadow_btn)
+ shadow_color_layout.addStretch()
+
+ shadow_controls_layout.addWidget(shadow_color_frame)
+
+ self._update_shadow_preview(None) # Initialize with loaded colors
+
+ # Shadow offset
+ offset_frame = QWidget()
+ offset_layout = QHBoxLayout(offset_frame)
+ offset_layout.setContentsMargins(0, 2, 0, 0)
+ offset_layout.setSpacing(10)
+
+ offset_label = QLabel("Shadow Offset:")
+ offset_label.setMinimumWidth(110)
+ offset_layout.addWidget(offset_label)
+
+ # X offset
+ x_label = QLabel("X:")
+ offset_layout.addWidget(x_label)
+
+ self.shadow_offset_x_spinbox = QSpinBox()
+ self.shadow_offset_x_spinbox.setMinimum(-10)
+ self.shadow_offset_x_spinbox.setMaximum(10)
+ self.shadow_offset_x_spinbox.setValue(self.shadow_offset_x_value)
+ self.shadow_offset_x_spinbox.setMinimumWidth(60)
+ self.shadow_offset_x_spinbox.valueChanged.connect(lambda value: (setattr(self, 'shadow_offset_x_value', value), self._save_rendering_settings(), self._apply_rendering_settings()))
+ self._disable_spinbox_mousewheel(self.shadow_offset_x_spinbox)
+ offset_layout.addWidget(self.shadow_offset_x_spinbox)
+
+ # Y offset
+ y_label = QLabel("Y:")
+ offset_layout.addWidget(y_label)
+
+ self.shadow_offset_y_spinbox = QSpinBox()
+ self.shadow_offset_y_spinbox.setMinimum(-10)
+ self.shadow_offset_y_spinbox.setMaximum(10)
+ self.shadow_offset_y_spinbox.setValue(self.shadow_offset_y_value)
+ self.shadow_offset_y_spinbox.setMinimumWidth(60)
+ self.shadow_offset_y_spinbox.valueChanged.connect(lambda value: (setattr(self, 'shadow_offset_y_value', value), self._save_rendering_settings(), self._apply_rendering_settings()))
+ self._disable_spinbox_mousewheel(self.shadow_offset_y_spinbox)
+ offset_layout.addWidget(self.shadow_offset_y_spinbox)
+ offset_layout.addStretch()
+
+ shadow_controls_layout.addWidget(offset_frame)
+
+ # Shadow blur
+ blur_frame = QWidget()
+ blur_layout = QHBoxLayout(blur_frame)
+ blur_layout.setContentsMargins(0, 2, 0, 0)
+ blur_layout.setSpacing(10)
+
+ blur_label = QLabel("Shadow Blur:")
+ blur_label.setMinimumWidth(110)
+ blur_layout.addWidget(blur_label)
+
+ self.shadow_blur_spinbox = QSpinBox()
+ self.shadow_blur_spinbox.setMinimum(0)
+ self.shadow_blur_spinbox.setMaximum(10)
+ self.shadow_blur_spinbox.setValue(self.shadow_blur_value)
+ self.shadow_blur_spinbox.setMinimumWidth(100)
+ self.shadow_blur_spinbox.valueChanged.connect(lambda value: (self._on_shadow_blur_changed(value), self._save_rendering_settings(), self._apply_rendering_settings()))
+ self._disable_spinbox_mousewheel(self.shadow_blur_spinbox)
+ blur_layout.addWidget(self.shadow_blur_spinbox)
+
+ # Shadow blur value label
+ self.shadow_blur_value_label = QLabel(f"{self.shadow_blur_value}")
+ self.shadow_blur_value_label.setMinimumWidth(30)
+ blur_layout.addWidget(self.shadow_blur_value_label)
+
+ blur_help_label = QLabel("(0=sharp, 10=blurry)")
+ blur_help_font = QFont('Arial', 9)
+ blur_help_label.setFont(blur_help_font)
+ blur_help_label.setStyleSheet("color: gray;")
+ blur_layout.addWidget(blur_help_label)
+ blur_layout.addStretch()
+
+ shadow_controls_layout.addWidget(blur_frame)
+
+ # Add shadow_controls to font_render_frame_layout
+ font_render_frame_layout.addWidget(self.shadow_controls)
+
+ # Initially disable shadow controls
+ self._toggle_shadow_controls()
+
+ # Add font_render_frame to RIGHT COLUMN
+ right_column_layout.addWidget(font_render_frame)
+
+ # Control buttons - IN LEFT COLUMN
+ # Check if ready based on selected provider
+ # Get API key from main GUI - handle both Tkinter and PySide6
+ try:
+ if hasattr(self.main_gui.api_key_entry, 'text'): # PySide6 QLineEdit
+ has_api_key = bool(self.main_gui.api_key_entry.text().strip())
+ elif hasattr(self.main_gui.api_key_entry, 'get'): # Tkinter Entry
+ has_api_key = bool(self.main_gui.api_key_entry.get().strip())
+ else:
+ has_api_key = False
+ except:
+ has_api_key = False
+
+ provider = self.ocr_provider_value
+
+ # Determine readiness based on provider
+ if provider == 'google':
+ has_vision = os.path.exists(self.main_gui.config.get('google_vision_credentials', ''))
+ is_ready = has_api_key and has_vision
+ elif provider == 'azure':
+ has_azure = bool(self.main_gui.config.get('azure_vision_key', ''))
+ is_ready = has_api_key and has_azure
+ elif provider == 'custom-api':
+ is_ready = has_api_key # Only needs API key
+ else:
+ # Local providers (manga-ocr, easyocr, etc.) only need API key for translation
+ is_ready = has_api_key
+
+ control_frame = QWidget()
+ control_layout = QVBoxLayout(control_frame)
+ control_layout.setContentsMargins(10, 15, 10, 10)
+ control_layout.setSpacing(15)
+
+ self.start_button = QPushButton("▶ Start Translation")
+ self.start_button.clicked.connect(self._start_translation)
+ self.start_button.setEnabled(is_ready)
+ self.start_button.setMinimumHeight(90) # Increased from 80 to 90
+ self.start_button.setStyleSheet(
+ "QPushButton { "
+ " background-color: #28a745; "
+ " color: white; "
+ " padding: 22px 30px; "
+ " font-size: 14pt; "
+ " font-weight: bold; "
+ " border-radius: 8px; "
+ "} "
+ "QPushButton:hover { background-color: #218838; } "
+ "QPushButton:disabled { "
+ " background-color: #2d2d2d; "
+ " color: #666666; "
+ "}"
+ )
+ control_layout.addWidget(self.start_button)
+
+ # Add tooltip to show why button is disabled
+ if not is_ready:
+ reasons = []
+ if not has_api_key:
+ reasons.append("API key not configured")
+ if provider == 'google' and not os.path.exists(self.main_gui.config.get('google_vision_credentials', '')):
+ reasons.append("Google Vision credentials not set")
+ elif provider == 'azure' and not self.main_gui.config.get('azure_vision_key', ''):
+ reasons.append("Azure credentials not configured")
+ tooltip_text = "Cannot start: " + ", ".join(reasons)
+ self.start_button.setToolTip(tooltip_text)
+
+ self.stop_button = QPushButton("⏹ Stop")
+ self.stop_button.clicked.connect(self._stop_translation)
+ self.stop_button.setEnabled(False)
+ self.stop_button.setMinimumHeight(90) # Increased from 80 to 90
+ self.stop_button.setStyleSheet(
+ "QPushButton { "
+ " background-color: #dc3545; "
+ " color: white; "
+ " padding: 22px 30px; "
+ " font-size: 14pt; "
+ " font-weight: bold; "
+ " border-radius: 8px; "
+ "} "
+ "QPushButton:hover { background-color: #c82333; } "
+ "QPushButton:disabled { "
+ " background-color: #2d2d2d; "
+ " color: #999999; "
+ "}"
+ )
+ control_layout.addWidget(self.stop_button)
+
+ # Add control buttons to LEFT COLUMN
+ left_column_layout.addWidget(control_frame)
+
+ # Add stretch to balance columns
+ left_column_layout.addStretch()
+ right_column_layout.addStretch()
+
+ # Set size policies to make columns expand and shrink properly
+ left_column.setSizePolicy(QSizePolicy.Policy.Preferred, QSizePolicy.Policy.Preferred)
+ right_column.setSizePolicy(QSizePolicy.Policy.Preferred, QSizePolicy.Policy.Preferred)
+
+ # Set minimum widths for columns to allow shrinking
+ left_column.setMinimumWidth(300)
+ right_column.setMinimumWidth(300)
+
+ # Add columns to container with equal stretch factors
+ columns_layout.addWidget(left_column, stretch=1)
+ columns_layout.addWidget(right_column, stretch=1)
+
+ # Make the columns container itself have proper size policy
+ columns_container.setSizePolicy(QSizePolicy.Policy.Expanding, QSizePolicy.Policy.Preferred)
+
+ # Add columns container to main layout
+ main_layout.addWidget(columns_container)
+
+ # Progress frame
+ progress_frame = QGroupBox("Progress")
+ progress_frame_font = QFont('Arial', 10)
+ progress_frame_font.setBold(True)
+ progress_frame.setFont(progress_frame_font)
+ progress_frame_layout = QVBoxLayout(progress_frame)
+ progress_frame_layout.setContentsMargins(10, 10, 10, 8)
+ progress_frame_layout.setSpacing(6)
+
+ # Overall progress
+ self.progress_label = QLabel("Ready to start")
+ progress_label_font = QFont('Arial', 9)
+ self.progress_label.setFont(progress_label_font)
+ self.progress_label.setStyleSheet("color: white;")
+ progress_frame_layout.addWidget(self.progress_label)
+
+ # Create and configure progress bar
+ self.progress_bar = QProgressBar()
+ self.progress_bar.setMinimum(0)
+ self.progress_bar.setMaximum(100)
+ self.progress_bar.setValue(0)
+ self.progress_bar.setMinimumHeight(18)
+ self.progress_bar.setTextVisible(True)
+ self.progress_bar.setStyleSheet("""
+ QProgressBar {
+ border: 1px solid #4a5568;
+ border-radius: 3px;
+ background-color: #2d3748;
+ text-align: center;
+ color: white;
+ }
+ QProgressBar::chunk {
+ background-color: white;
+ }
+ """)
+ progress_frame_layout.addWidget(self.progress_bar)
+
+ # Current file status
+ self.current_file_label = QLabel("")
+ current_file_font = QFont('Arial', 10)
+ self.current_file_label.setFont(current_file_font)
+ self.current_file_label.setStyleSheet("color: lightgray;")
+ progress_frame_layout.addWidget(self.current_file_label)
+
+ main_layout.addWidget(progress_frame)
+
+ # Log frame
+ log_frame = QGroupBox("Translation Log")
+ log_frame_font = QFont('Arial', 10)
+ log_frame_font.setBold(True)
+ log_frame.setFont(log_frame_font)
+ log_frame_layout = QVBoxLayout(log_frame)
+ log_frame_layout.setContentsMargins(10, 10, 10, 8)
+ log_frame_layout.setSpacing(6)
+
+ # Log text widget (QTextEdit handles scrolling automatically)
+ self.log_text = QTextEdit()
+ self.log_text.setReadOnly(True)
+ self.log_text.setMinimumHeight(600) # Increased from 400 to 600 for better visibility
+ self.log_text.setStyleSheet("""
+ QTextEdit {
+ background-color: #1e1e1e;
+ color: white;
+ font-family: 'Consolas', 'Courier New', monospace;
+ font-size: 10pt;
+ border: 1px solid #4a5568;
+ }
+ """)
+ log_frame_layout.addWidget(self.log_text)
+
+ main_layout.addWidget(log_frame)
+
+ # Restore persistent log from previous sessions
+ self._restore_persistent_log()
+
+ def _restore_persistent_log(self):
+ """Restore log messages from persistent storage"""
+ try:
+ with MangaTranslationTab._persistent_log_lock:
+ if MangaTranslationTab._persistent_log:
+ # PySide6 QTextEdit
+ color_map = {
+ 'info': 'white',
+ 'success': 'green',
+ 'warning': 'orange',
+ 'error': 'red',
+ 'debug': 'lightblue'
+ }
+ for message, level in MangaTranslationTab._persistent_log:
+ color = color_map.get(level, 'white')
+ self.log_text.setTextColor(QColor(color))
+ self.log_text.append(message)
+ except Exception as e:
+ print(f"Failed to restore persistent log: {e}")
+
+ def _show_help_dialog(self, title: str, message: str):
+ """Show a help dialog with the given title and message"""
+ # Create a PySide6 dialog
+ help_dialog = QDialog(self.dialog)
+ help_dialog.setWindowTitle(title)
+ help_dialog.resize(500, 400)
+ help_dialog.setModal(True)
+
+ # Main layout
+ main_layout = QVBoxLayout(help_dialog)
+ main_layout.setContentsMargins(20, 20, 20, 20)
+ main_layout.setSpacing(10)
+
+ # Icon and title
+ title_frame = QWidget()
+ title_layout = QHBoxLayout(title_frame)
+ title_layout.setContentsMargins(0, 0, 0, 10)
+
+ icon_label = QLabel("ℹ️")
+ icon_font = QFont('Arial', 20)
+ icon_label.setFont(icon_font)
+ title_layout.addWidget(icon_label)
+
+ title_label = QLabel(title)
+ title_font = QFont('Arial', 12)
+ title_font.setBold(True)
+ title_label.setFont(title_font)
+ title_layout.addWidget(title_label)
+ title_layout.addStretch()
+
+ main_layout.addWidget(title_frame)
+
+ # Help text in a scrollable text widget
+ text_widget = QTextEdit()
+ text_widget.setReadOnly(True)
+ text_widget.setPlainText(message)
+ text_font = QFont('Arial', 10)
+ text_widget.setFont(text_font)
+ main_layout.addWidget(text_widget)
+
+ # Close button
+ close_btn = QPushButton("Close")
+ close_btn.clicked.connect(help_dialog.accept)
+ close_btn.setStyleSheet("QPushButton { background-color: #6c757d; color: white; padding: 5px 20px; }")
+ main_layout.addWidget(close_btn, alignment=Qt.AlignCenter)
+
+ # Show the dialog
+ help_dialog.exec()
+
+ def _on_visual_context_toggle(self):
+ """Handle visual context toggle"""
+ enabled = self.visual_context_enabled_value
+ self.main_gui.config['manga_visual_context_enabled'] = enabled
+
+ # Update translator if it exists
+ if self.translator:
+ self.translator.visual_context_enabled = enabled
+
+ # Save config
+ if hasattr(self.main_gui, 'save_config'):
+ self.main_gui.save_config(show_message=False)
+
+ # Log the change
+ if enabled:
+ self._log("📷 Visual context ENABLED - Images will be sent to API", "info")
+ self._log(" Make sure you're using a vision-capable model", "warning")
+ else:
+ self._log("📝 Visual context DISABLED - Text-only mode", "info")
+ self._log(" Compatible with non-vision models (Claude, GPT-3.5, etc.)", "success")
+
+ def _open_advanced_settings(self):
+ """Open the manga advanced settings dialog"""
+ try:
+ def on_settings_saved(settings):
+ """Callback when settings are saved"""
+ # Update config with new settings
+ self.main_gui.config['manga_settings'] = settings
+
+ # Mirror critical font size values into nested settings (avoid legacy top-level min key)
+ try:
+ rendering = settings.get('rendering', {}) if isinstance(settings, dict) else {}
+ font_sizing = settings.get('font_sizing', {}) if isinstance(settings, dict) else {}
+ min_from_dialog = rendering.get('auto_min_size', font_sizing.get('min_readable', font_sizing.get('min_size')))
+ max_from_dialog = rendering.get('auto_max_size', font_sizing.get('max_size'))
+ if min_from_dialog is not None:
+ ms = self.main_gui.config.setdefault('manga_settings', {})
+ rend = ms.setdefault('rendering', {})
+ font = ms.setdefault('font_sizing', {})
+ rend['auto_min_size'] = int(min_from_dialog)
+ font['min_size'] = int(min_from_dialog)
+ if hasattr(self, 'auto_min_size_value'):
+ self.auto_min_size_value = int(min_from_dialog)
+ if max_from_dialog is not None:
+ self.main_gui.config['manga_max_font_size'] = int(max_from_dialog)
+ if hasattr(self, 'max_font_size_value'):
+ self.max_font_size_value = int(max_from_dialog)
+ except Exception:
+ pass
+
+ # Persist mirrored values
+ try:
+ if hasattr(self.main_gui, 'save_config'):
+ self.main_gui.save_config(show_message=False)
+ except Exception:
+ pass
+
+ # Reload settings in translator if it exists
+ if self.translator:
+ self._log("📋 Reloading settings in translator...", "info")
+ # The translator will pick up new settings on next operation
+
+ self._log("✅ Advanced settings saved and applied", "success")
+
+ # Open the settings dialog
+ # Note: MangaSettingsDialog is still Tkinter-based, so pass Tkinter root
+ MangaSettingsDialog(
+ parent=self.main_gui.master, # Use Tkinter root instead of PySide6 dialog
+ main_gui=self.main_gui,
+ config=self.main_gui.config,
+ callback=on_settings_saved
+ )
+
+ except Exception as e:
+ from PySide6.QtWidgets import QMessageBox
+ self._log(f"❌ Error opening settings dialog: {str(e)}", "error")
+ QMessageBox.critical(self.dialog, "Error", f"Failed to open settings dialog:\n{str(e)}")
+
+ def _toggle_font_size_mode(self):
+ """Toggle between auto, fixed size and multiplier modes"""
+ mode = self.font_size_mode_value
+
+ # Handle main frames (fixed size and multiplier)
+ if hasattr(self, 'fixed_size_frame') and hasattr(self, 'multiplier_frame'):
+ if mode == "fixed":
+ self.fixed_size_frame.show()
+ self.multiplier_frame.hide()
+ if hasattr(self, 'constraint_frame'):
+ self.constraint_frame.hide()
+ elif mode == "multiplier":
+ self.fixed_size_frame.hide()
+ self.multiplier_frame.show()
+ if hasattr(self, 'constraint_frame'):
+ self.constraint_frame.show()
+ else: # auto
+ self.fixed_size_frame.hide()
+ self.multiplier_frame.hide()
+ if hasattr(self, 'constraint_frame'):
+ self.constraint_frame.hide()
+
+ # MIN/MAX FIELDS ARE ALWAYS VISIBLE - NEVER HIDE THEM
+ # They are packed at creation time and stay visible in all modes
+
+ # Only save and apply if we're not initializing
+ if not hasattr(self, '_initializing') or not self._initializing:
+ self._save_rendering_settings()
+ self._apply_rendering_settings()
+
+ def _update_multiplier_label(self, value):
+ """Update multiplier label and value variable"""
+ self.font_size_multiplier_value = float(value) # UPDATE THE VALUE VARIABLE!
+ self.multiplier_label.setText(f"{float(value):.1f}x")
+
+ def _on_line_spacing_changed(self, value):
+ """Update line spacing value label and value variable"""
+ self.line_spacing_value = float(value) # UPDATE THE VALUE VARIABLE!
+ try:
+ if hasattr(self, 'line_spacing_value_label'):
+ self.line_spacing_value_label.setText(f"{float(value):.2f}")
+ except Exception:
+ pass
+
+ def _on_shadow_blur_changed(self, value):
+ """Update shadow blur value label and value variable"""
+ self.shadow_blur_value = int(float(value)) # UPDATE THE VALUE VARIABLE!
+ try:
+ if hasattr(self, 'shadow_blur_value_label'):
+ self.shadow_blur_value_label.setText(f"{int(float(value))}")
+ except Exception:
+ pass
+
+ def _on_ft_only_bg_opacity_changed(self):
+ """Handle free text only background opacity checkbox change (PySide6)"""
+ # Update the value from checkbox state
+ self.free_text_only_bg_opacity_value = self.ft_only_checkbox.isChecked()
+
+ def _update_color_preview(self, event=None):
+ """Update the font color preview"""
+ r = self.text_color_r_value
+ g = self.text_color_g_value
+ b = self.text_color_b_value
+ if hasattr(self, 'color_preview_frame'):
+ self.color_preview_frame.setStyleSheet(f"background-color: rgb({r},{g},{b}); border: 1px solid #5a9fd4;")
+ # Auto-save and apply on change
+ if event is not None: # Only save on user interaction, not initial load
+ self._save_rendering_settings()
+ self._apply_rendering_settings()
+
+ def _update_shadow_preview(self, event=None):
+ """Update the shadow color preview"""
+ r = self.shadow_color_r_value
+ g = self.shadow_color_g_value
+ b = self.shadow_color_b_value
+ if hasattr(self, 'shadow_preview_frame'):
+ self.shadow_preview_frame.setStyleSheet(f"background-color: rgb({r},{g},{b}); border: 1px solid #5a9fd4;")
+ # Auto-save and apply on change
+ if event is not None: # Only save on user interaction, not initial load
+ self._save_rendering_settings()
+ self._apply_rendering_settings()
+
+ def _toggle_azure_key_visibility(self, state):
+ """Toggle visibility of Azure API key"""
+ from PySide6.QtWidgets import QLineEdit
+ from PySide6.QtCore import Qt
+
+ # Check the checkbox state directly to be sure
+ is_checked = self.show_azure_key_checkbox.isChecked()
+
+ if is_checked:
+ # Show the key
+ self.azure_key_entry.setEchoMode(QLineEdit.Normal)
+ else:
+ # Hide the key
+ self.azure_key_entry.setEchoMode(QLineEdit.Password)
+
+ def _toggle_shadow_controls(self):
+ """Enable/disable shadow controls based on checkbox"""
+ if self.shadow_enabled_value:
+ if hasattr(self, 'shadow_controls'):
+ self.shadow_controls.setEnabled(True)
+ else:
+ if hasattr(self, 'shadow_controls'):
+ self.shadow_controls.setEnabled(False)
+
+ def _set_font_preset(self, preset: str):
+ """Apply font sizing preset (moved from dialog)"""
+ try:
+ if preset == 'small':
+ self.font_algorithm_value = 'conservative'
+ self.auto_min_size_value = 10
+ self.max_font_size_value = 32
+ self.prefer_larger_value = False
+ self.bubble_size_factor_value = True
+ self.line_spacing_value = 1.2
+ self.max_lines_value = 8
+ elif preset == 'balanced':
+ self.font_algorithm_value = 'smart'
+ self.auto_min_size_value = 12
+ self.max_font_size_value = 48
+ self.prefer_larger_value = True
+ self.bubble_size_factor_value = True
+ self.line_spacing_value = 1.3
+ self.max_lines_value = 10
+ elif preset == 'large':
+ self.font_algorithm_value = 'aggressive'
+ self.auto_min_size_value = 14
+ self.max_font_size_value = 64
+ self.prefer_larger_value = True
+ self.bubble_size_factor_value = False
+ self.line_spacing_value = 1.4
+ self.max_lines_value = 12
+
+ # Update all spinboxes with new values
+ if hasattr(self, 'min_size_spinbox'):
+ self.min_size_spinbox.setValue(self.auto_min_size_value)
+ if hasattr(self, 'max_size_spinbox'):
+ self.max_size_spinbox.setValue(self.max_font_size_value)
+ if hasattr(self, 'line_spacing_spinbox'):
+ self.line_spacing_spinbox.setValue(self.line_spacing_value)
+ if hasattr(self, 'max_lines_spinbox'):
+ self.max_lines_spinbox.setValue(self.max_lines_value)
+
+ # Update checkboxes
+ if hasattr(self, 'prefer_larger_checkbox'):
+ self.prefer_larger_checkbox.setChecked(self.prefer_larger_value)
+ if hasattr(self, 'bubble_size_factor_checkbox'):
+ self.bubble_size_factor_checkbox.setChecked(self.bubble_size_factor_value)
+
+ # Update the line spacing label
+ if hasattr(self, 'line_spacing_value_label'):
+ self.line_spacing_value_label.setText(f"{float(self.line_spacing_value):.2f}")
+
+ self._save_rendering_settings()
+ except Exception as e:
+ self._log(f"Error setting preset: {e}", "debug")
+
+ def _enable_widget_tree(self, widget):
+ """Recursively enable a widget and its children (PySide6 version)"""
+ try:
+ widget.setEnabled(True)
+ except:
+ pass
+ # PySide6 way to iterate children
+ try:
+ for child in widget.children():
+ if hasattr(child, 'setEnabled'):
+ self._enable_widget_tree(child)
+ except:
+ pass
+
+ def _disable_widget_tree(self, widget):
+ """Recursively disable a widget and its children (PySide6 version)"""
+ try:
+ widget.setEnabled(False)
+ except:
+ pass
+ # PySide6 way to iterate children
+ try:
+ for child in widget.children():
+ if hasattr(child, 'setEnabled'):
+ self._disable_widget_tree(child)
+ except:
+ pass
+
+ def _load_rendering_settings(self):
+ """Load text rendering settings from config"""
+ config = self.main_gui.config
+
+ # One-time migration for legacy min font size key
+ try:
+ legacy_min = config.get('manga_min_readable_size', None)
+ if legacy_min is not None:
+ ms = config.setdefault('manga_settings', {})
+ rend = ms.setdefault('rendering', {})
+ font = ms.setdefault('font_sizing', {})
+ current_min = rend.get('auto_min_size', font.get('min_size'))
+ if current_min is None or int(current_min) < int(legacy_min):
+ rend['auto_min_size'] = int(legacy_min)
+ font['min_size'] = int(legacy_min)
+ # Remove legacy key
+ try:
+ del config['manga_min_readable_size']
+ except Exception:
+ pass
+ # Persist migration silently
+ if hasattr(self.main_gui, 'save_config'):
+ self.main_gui.save_config(show_message=False)
+ except Exception:
+ pass
+
+ # Get inpainting settings from the nested location
+ manga_settings = config.get('manga_settings', {})
+ inpaint_settings = manga_settings.get('inpainting', {})
+
+ # Load inpaint method from the correct location (no Tkinter variables in PySide6)
+ self.inpaint_method_value = inpaint_settings.get('method', 'local')
+ self.local_model_type_value = inpaint_settings.get('local_method', 'anime_onnx')
+
+ # Load model paths
+ self.local_model_path_value = ''
+ for model_type in ['aot', 'aot_onnx', 'lama', 'lama_onnx', 'anime', 'anime_onnx', 'mat', 'ollama', 'sd_local']:
+ path = inpaint_settings.get(f'{model_type}_model_path', '')
+ if model_type == self.local_model_type_value:
+ self.local_model_path_value = path
+
+ # Initialize with defaults (plain Python values, no Tkinter variables)
+ self.bg_opacity_value = config.get('manga_bg_opacity', 130)
+ self.free_text_only_bg_opacity_value = config.get('manga_free_text_only_bg_opacity', True)
+ self.bg_style_value = config.get('manga_bg_style', 'circle')
+ self.bg_reduction_value = config.get('manga_bg_reduction', 1.0)
+ self.font_size_value = config.get('manga_font_size', 0)
+
+ self.selected_font_path = config.get('manga_font_path', None)
+ self.skip_inpainting_value = config.get('manga_skip_inpainting', False)
+ self.inpaint_quality_value = config.get('manga_inpaint_quality', 'high')
+ self.inpaint_dilation_value = config.get('manga_inpaint_dilation', 15)
+ self.inpaint_passes_value = config.get('manga_inpaint_passes', 2)
+
+ self.font_size_mode_value = config.get('manga_font_size_mode', 'fixed')
+ self.font_size_multiplier_value = config.get('manga_font_size_multiplier', 1.0)
+
+ # Auto fit style for auto mode
+ try:
+ rend_cfg = (config.get('manga_settings', {}) or {}).get('rendering', {})
+ except Exception:
+ rend_cfg = {}
+ self.auto_fit_style_value = rend_cfg.get('auto_fit_style', 'balanced')
+
+ # Auto minimum font size (from rendering or font_sizing)
+ try:
+ font_cfg = (config.get('manga_settings', {}) or {}).get('font_sizing', {})
+ except Exception:
+ font_cfg = {}
+ auto_min_default = rend_cfg.get('auto_min_size', font_cfg.get('min_size', 10))
+ self.auto_min_size_value = int(auto_min_default)
+
+ self.force_caps_lock_value = config.get('manga_force_caps_lock', True)
+ self.constrain_to_bubble_value = config.get('manga_constrain_to_bubble', True)
+
+ # Advanced font sizing (from manga_settings.font_sizing)
+ font_settings = (config.get('manga_settings', {}) or {}).get('font_sizing', {})
+ self.font_algorithm_value = str(font_settings.get('algorithm', 'smart'))
+ self.prefer_larger_value = bool(font_settings.get('prefer_larger', True))
+ self.bubble_size_factor_value = bool(font_settings.get('bubble_size_factor', True))
+ self.line_spacing_value = float(font_settings.get('line_spacing', 1.3))
+ self.max_lines_value = int(font_settings.get('max_lines', 10))
+
+ # Determine effective max font size with fallback
+ font_max_top = config.get('manga_max_font_size', None)
+ nested_ms = config.get('manga_settings', {}) if isinstance(config.get('manga_settings', {}), dict) else {}
+ nested_render = nested_ms.get('rendering', {}) if isinstance(nested_ms.get('rendering', {}), dict) else {}
+ nested_font = nested_ms.get('font_sizing', {}) if isinstance(nested_ms.get('font_sizing', {}), dict) else {}
+ effective_max = font_max_top if font_max_top is not None else (
+ nested_render.get('auto_max_size', nested_font.get('max_size', 48))
+ )
+ self.max_font_size_value = int(effective_max)
+
+ # If top-level keys were missing, mirror max now (won't save during initialization)
+ if font_max_top is None:
+ self.main_gui.config['manga_max_font_size'] = int(effective_max)
+
+ self.strict_text_wrapping_value = config.get('manga_strict_text_wrapping', True)
+
+ # Font color settings
+ manga_text_color = config.get('manga_text_color', [102, 0, 0])
+ self.text_color_r_value = manga_text_color[0]
+ self.text_color_g_value = manga_text_color[1]
+ self.text_color_b_value = manga_text_color[2]
+
+ # Shadow settings
+ self.shadow_enabled_value = config.get('manga_shadow_enabled', True)
+
+ manga_shadow_color = config.get('manga_shadow_color', [204, 128, 128])
+ self.shadow_color_r_value = manga_shadow_color[0]
+ self.shadow_color_g_value = manga_shadow_color[1]
+ self.shadow_color_b_value = manga_shadow_color[2]
+
+ self.shadow_offset_x_value = config.get('manga_shadow_offset_x', 2)
+ self.shadow_offset_y_value = config.get('manga_shadow_offset_y', 2)
+ self.shadow_blur_value = config.get('manga_shadow_blur', 0)
+
+ # Initialize font_style with saved value or default
+ self.font_style_value = config.get('manga_font_style', 'Default')
+
+ # Full page context settings
+ self.full_page_context_value = config.get('manga_full_page_context', False)
+
+ self.full_page_context_prompt = config.get('manga_full_page_context_prompt',
+ "You will receive multiple text segments from a manga page, each prefixed with an index like [0], [1], etc. "
+ "Translate each segment considering the context of all segments together. "
+ "Maintain consistency in character names, tone, and style across all translations.\n\n"
+ "CRITICAL: Return your response as a valid JSON object where each key includes BOTH the index prefix "
+ "AND the original text EXACTLY as provided (e.g., '[0] こんにちは'), and each value is the translation.\n"
+ "This is essential for correct mapping - do not modify or omit the index prefixes!\n\n"
+ "Make sure to properly escape any special characters in the JSON:\n"
+ "- Use \\n for newlines\n"
+ "- Use \\\" for quotes\n"
+ "- Use \\\\ for backslashes\n\n"
+ "Example:\n"
+ '{\n'
+ ' "[0] こんにちは": "Hello",\n'
+ ' "[1] ありがとう": "Thank you",\n'
+ ' "[2] さようなら": "Goodbye"\n'
+ '}\n\n'
+ 'REMEMBER: Keep the [index] prefix in each JSON key exactly as shown in the input!'
+ )
+
+ # Load OCR prompt
+ self.ocr_prompt = config.get('manga_ocr_prompt',
+ "YOU ARE AN OCR SYSTEM. YOUR ONLY JOB IS TEXT EXTRACTION.\n\n"
+ "CRITICAL RULES:\n"
+ "1. DO NOT TRANSLATE ANYTHING\n"
+ "2. DO NOT MODIFY THE TEXT\n"
+ "3. DO NOT EXPLAIN OR COMMENT\n"
+ "4. ONLY OUTPUT THE EXACT TEXT YOU SEE\n"
+ "5. PRESERVE NATURAL TEXT FLOW - DO NOT ADD UNNECESSARY LINE BREAKS\n\n"
+ "If you see Korean text, output it in Korean.\n"
+ "If you see Japanese text, output it in Japanese.\n"
+ "If you see Chinese text, output it in Chinese.\n"
+ "If you see English text, output it in English.\n\n"
+ "IMPORTANT: Only use line breaks where they naturally occur in the original text "
+ "(e.g., between dialogue lines or paragraphs). Do not break text mid-sentence or "
+ "between every word/character.\n\n"
+ "For vertical text common in manga/comics, transcribe it as a continuous line unless "
+ "there are clear visual breaks.\n\n"
+ "NEVER translate. ONLY extract exactly what is written.\n"
+ "Output ONLY the raw text, nothing else."
+ )
+ # Visual context setting
+ self.visual_context_enabled_value = self.main_gui.config.get('manga_visual_context_enabled', True)
+ self.qwen2vl_model_size = config.get('qwen2vl_model_size', '1') # Default to '1' (2B)
+
+ # Initialize RapidOCR settings
+ self.rapidocr_use_recognition_value = self.main_gui.config.get('rapidocr_use_recognition', True)
+ self.rapidocr_language_value = self.main_gui.config.get('rapidocr_language', 'auto')
+ self.rapidocr_detection_mode_value = self.main_gui.config.get('rapidocr_detection_mode', 'document')
+
+ # Output settings
+ self.create_subfolder_value = config.get('manga_create_subfolder', True)
+
+ def _save_rendering_settings(self):
+ """Save rendering settings with validation"""
+ # Don't save during initialization
+ if hasattr(self, '_initializing') and self._initializing:
+ return
+
+ # Validate that variables exist and have valid values before saving
+ try:
+ # Ensure manga_settings structure exists
+ if 'manga_settings' not in self.main_gui.config:
+ self.main_gui.config['manga_settings'] = {}
+ if 'inpainting' not in self.main_gui.config['manga_settings']:
+ self.main_gui.config['manga_settings']['inpainting'] = {}
+
+ # Save to nested location
+ inpaint = self.main_gui.config['manga_settings']['inpainting']
+ if hasattr(self, 'inpaint_method_value'):
+ inpaint['method'] = self.inpaint_method_value
+ if hasattr(self, 'local_model_type_value'):
+ inpaint['local_method'] = self.local_model_type_value
+ model_type = self.local_model_type_value
+ if hasattr(self, 'local_model_path_value'):
+ inpaint[f'{model_type}_model_path'] = self.local_model_path_value
+
+ # Add new inpainting settings
+ if hasattr(self, 'inpaint_method_value'):
+ self.main_gui.config['manga_inpaint_method'] = self.inpaint_method_value
+ if hasattr(self, 'local_model_type_value'):
+ self.main_gui.config['manga_local_inpaint_model'] = self.local_model_type_value
+
+ # Save model paths for each type
+ for model_type in ['aot', 'lama', 'lama_onnx', 'anime', 'mat', 'ollama', 'sd_local']:
+ if hasattr(self, 'local_model_type_value'):
+ if model_type == self.local_model_type_value:
+ if hasattr(self, 'local_model_path_value'):
+ path = self.local_model_path_value
+ if path:
+ self.main_gui.config[f'manga_{model_type}_model_path'] = path
+
+ # Save all other settings with validation
+ if hasattr(self, 'bg_opacity_value'):
+ self.main_gui.config['manga_bg_opacity'] = self.bg_opacity_value
+ if hasattr(self, 'bg_style_value'):
+ self.main_gui.config['manga_bg_style'] = self.bg_style_value
+ if hasattr(self, 'bg_reduction_value'):
+ self.main_gui.config['manga_bg_reduction'] = self.bg_reduction_value
+
+ # Save free-text-only background opacity toggle
+ if hasattr(self, 'free_text_only_bg_opacity_value'):
+ self.main_gui.config['manga_free_text_only_bg_opacity'] = bool(self.free_text_only_bg_opacity_value)
+
+ # CRITICAL: Font size settings - validate before saving
+ if hasattr(self, 'font_size_value'):
+ value = self.font_size_value
+ self.main_gui.config['manga_font_size'] = value
+
+ if hasattr(self, 'max_font_size_value'):
+ value = self.max_font_size_value
+ # Validate the value is reasonable
+ if 0 <= value <= 200:
+ self.main_gui.config['manga_max_font_size'] = value
+
+ # Mirror these into nested manga_settings so the dialog and integration stay in sync
+ try:
+ ms = self.main_gui.config.setdefault('manga_settings', {})
+ rend = ms.setdefault('rendering', {})
+ font = ms.setdefault('font_sizing', {})
+ # Mirror bounds
+ if hasattr(self, 'auto_min_size_value'):
+ rend['auto_min_size'] = int(self.auto_min_size_value)
+ font['min_size'] = int(self.auto_min_size_value)
+ if hasattr(self, 'max_font_size_value'):
+ rend['auto_max_size'] = int(self.max_font_size_value)
+ font['max_size'] = int(self.max_font_size_value)
+ # Persist advanced font sizing controls
+ if hasattr(self, 'font_algorithm_value'):
+ font['algorithm'] = str(self.font_algorithm_value)
+ if hasattr(self, 'prefer_larger_value'):
+ font['prefer_larger'] = bool(self.prefer_larger_value)
+ if hasattr(self, 'bubble_size_factor_value'):
+ font['bubble_size_factor'] = bool(self.bubble_size_factor_value)
+ if hasattr(self, 'line_spacing_value'):
+ font['line_spacing'] = float(self.line_spacing_value)
+ if hasattr(self, 'max_lines_value'):
+ font['max_lines'] = int(self.max_lines_value)
+ if hasattr(self, 'auto_fit_style_value'):
+ rend['auto_fit_style'] = str(self.auto_fit_style_value)
+ except Exception:
+ pass
+
+ # Continue with other settings
+ self.main_gui.config['manga_font_path'] = self.selected_font_path
+
+ if hasattr(self, 'skip_inpainting_value'):
+ self.main_gui.config['manga_skip_inpainting'] = self.skip_inpainting_value
+ if hasattr(self, 'inpaint_quality_value'):
+ self.main_gui.config['manga_inpaint_quality'] = self.inpaint_quality_value
+ if hasattr(self, 'inpaint_dilation_value'):
+ self.main_gui.config['manga_inpaint_dilation'] = self.inpaint_dilation_value
+ if hasattr(self, 'inpaint_passes_value'):
+ self.main_gui.config['manga_inpaint_passes'] = self.inpaint_passes_value
+ if hasattr(self, 'font_size_mode_value'):
+ self.main_gui.config['manga_font_size_mode'] = self.font_size_mode_value
+ if hasattr(self, 'font_size_multiplier_value'):
+ self.main_gui.config['manga_font_size_multiplier'] = self.font_size_multiplier_value
+ if hasattr(self, 'font_style_value'):
+ self.main_gui.config['manga_font_style'] = self.font_style_value
+ if hasattr(self, 'constrain_to_bubble_value'):
+ self.main_gui.config['manga_constrain_to_bubble'] = self.constrain_to_bubble_value
+ if hasattr(self, 'strict_text_wrapping_value'):
+ self.main_gui.config['manga_strict_text_wrapping'] = self.strict_text_wrapping_value
+ if hasattr(self, 'force_caps_lock_value'):
+ self.main_gui.config['manga_force_caps_lock'] = self.force_caps_lock_value
+
+ # Save font color as list
+ if hasattr(self, 'text_color_r_value') and hasattr(self, 'text_color_g_value') and hasattr(self, 'text_color_b_value'):
+ self.main_gui.config['manga_text_color'] = [
+ self.text_color_r_value,
+ self.text_color_g_value,
+ self.text_color_b_value
+ ]
+
+ # Save shadow settings
+ if hasattr(self, 'shadow_enabled_value'):
+ self.main_gui.config['manga_shadow_enabled'] = self.shadow_enabled_value
+ if hasattr(self, 'shadow_color_r_value') and hasattr(self, 'shadow_color_g_value') and hasattr(self, 'shadow_color_b_value'):
+ self.main_gui.config['manga_shadow_color'] = [
+ self.shadow_color_r_value,
+ self.shadow_color_g_value,
+ self.shadow_color_b_value
+ ]
+ if hasattr(self, 'shadow_offset_x_value'):
+ self.main_gui.config['manga_shadow_offset_x'] = self.shadow_offset_x_value
+ if hasattr(self, 'shadow_offset_y_value'):
+ self.main_gui.config['manga_shadow_offset_y'] = self.shadow_offset_y_value
+ if hasattr(self, 'shadow_blur_value'):
+ self.main_gui.config['manga_shadow_blur'] = self.shadow_blur_value
+
+ # Save output settings
+ if hasattr(self, 'create_subfolder_value'):
+ self.main_gui.config['manga_create_subfolder'] = self.create_subfolder_value
+
+ # Save full page context settings
+ if hasattr(self, 'full_page_context_value'):
+ self.main_gui.config['manga_full_page_context'] = self.full_page_context_value
+ if hasattr(self, 'full_page_context_prompt'):
+ self.main_gui.config['manga_full_page_context_prompt'] = self.full_page_context_prompt
+
+ # OCR prompt
+ if hasattr(self, 'ocr_prompt'):
+ self.main_gui.config['manga_ocr_prompt'] = self.ocr_prompt
+
+ # Qwen and custom models
+ if hasattr(self, 'qwen2vl_model_size'):
+ self.main_gui.config['qwen2vl_model_size'] = self.qwen2vl_model_size
+
+ # RapidOCR specific settings
+ if hasattr(self, 'rapidocr_use_recognition_value'):
+ self.main_gui.config['rapidocr_use_recognition'] = self.rapidocr_use_recognition_value
+ if hasattr(self, 'rapidocr_detection_mode_value'):
+ self.main_gui.config['rapidocr_detection_mode'] = self.rapidocr_detection_mode_value
+ if hasattr(self, 'rapidocr_language_value'):
+ self.main_gui.config['rapidocr_language'] = self.rapidocr_language_value
+
+ # Auto-save to disk (PySide6 version - no Tkinter black window issue)
+ # Settings are stored in self.main_gui.config and persisted immediately
+ if hasattr(self.main_gui, 'save_config'):
+ self.main_gui.save_config(show_message=False)
+
+ except Exception as e:
+ # Log error but don't crash
+ print(f"Error saving manga settings: {e}")
+
+ def _on_context_toggle(self):
+ """Handle full page context toggle"""
+ enabled = self.full_page_context_value
+ self._save_rendering_settings()
+
+ def _edit_context_prompt(self):
+ """Open dialog to edit full page context prompt and OCR prompt"""
+ from PySide6.QtWidgets import (QDialog, QVBoxLayout, QLabel, QTextEdit,
+ QPushButton, QHBoxLayout)
+ from PySide6.QtCore import Qt
+
+ # Create PySide6 dialog
+ dialog = QDialog(self.dialog)
+ dialog.setWindowTitle("Edit Prompts")
+ dialog.setMinimumSize(700, 600)
+
+ layout = QVBoxLayout(dialog)
+
+ # Instructions
+ instructions = QLabel(
+ "Edit the prompt used for full page context translation.\n"
+ "This will be appended to the main translation system prompt."
+ )
+ instructions.setWordWrap(True)
+ layout.addWidget(instructions)
+
+ # Full Page Context label
+ context_label = QLabel("Full Page Context Prompt:")
+ font = context_label.font()
+ font.setBold(True)
+ context_label.setFont(font)
+ layout.addWidget(context_label)
+
+ # Text editor for context
+ text_editor = QTextEdit()
+ text_editor.setMinimumHeight(200)
+ text_editor.setPlainText(self.full_page_context_prompt)
+ layout.addWidget(text_editor)
+
+ # OCR Prompt label
+ ocr_label = QLabel("OCR System Prompt:")
+ ocr_label.setFont(font)
+ layout.addWidget(ocr_label)
+
+ # Text editor for OCR
+ ocr_editor = QTextEdit()
+ ocr_editor.setMinimumHeight(200)
+
+ # Get current OCR prompt
+ if hasattr(self, 'ocr_prompt'):
+ ocr_editor.setPlainText(self.ocr_prompt)
+ else:
+ ocr_editor.setPlainText("")
+
+ layout.addWidget(ocr_editor)
+
+ def save_prompt():
+ self.full_page_context_prompt = text_editor.toPlainText().strip()
+ self.ocr_prompt = ocr_editor.toPlainText().strip()
+
+ # Save to config
+ self.main_gui.config['manga_full_page_context_prompt'] = self.full_page_context_prompt
+ self.main_gui.config['manga_ocr_prompt'] = self.ocr_prompt
+
+ self._save_rendering_settings()
+ self._log("✅ Updated prompts", "success")
+ dialog.accept()
+
+ def reset_prompt():
+ default_prompt = (
+ "You will receive multiple text segments from a manga page, each prefixed with an index like [0], [1], etc. "
+ "Translate each segment considering the context of all segments together. "
+ "Maintain consistency in character names, tone, and style across all translations.\n\n"
+ "CRITICAL: Return your response as a valid JSON object where each key includes BOTH the index prefix "
+ "AND the original text EXACTLY as provided (e.g., '[0] こんにちは'), and each value is the translation.\n"
+ "This is essential for correct mapping - do not modify or omit the index prefixes!\n\n"
+ "Make sure to properly escape any special characters in the JSON:\n"
+ "- Use \\n for newlines\n"
+ "- Use \\\" for quotes\n"
+ "- Use \\\\ for backslashes\n\n"
+ "Example:\n"
+ '{\n'
+ ' "[0] こんにちは": "Hello",\n'
+ ' "[1] ありがとう": "Thank you",\n'
+ ' "[2] さようなら": "Goodbye"\n'
+ '}\n\n'
+ 'REMEMBER: Keep the [index] prefix in each JSON key exactly as shown in the input!'
+ )
+ text_editor.setPlainText(default_prompt)
+
+ default_ocr = (
+ "YOU ARE AN OCR SYSTEM. YOUR ONLY JOB IS TEXT EXTRACTION.\n\n"
+ "CRITICAL RULES:\n"
+ "1. DO NOT TRANSLATE ANYTHING\n"
+ "2. DO NOT MODIFY THE TEXT\n"
+ "3. DO NOT EXPLAIN OR COMMENT\n"
+ "4. ONLY OUTPUT THE EXACT TEXT YOU SEE\n"
+ "5. PRESERVE NATURAL TEXT FLOW - DO NOT ADD UNNECESSARY LINE BREAKS\n\n"
+ "If you see Korean text, output it in Korean.\n"
+ "If you see Japanese text, output it in Japanese.\n"
+ "If you see Chinese text, output it in Chinese.\n"
+ "If you see English text, output it in English.\n\n"
+ "IMPORTANT: Only use line breaks where they naturally occur in the original text "
+ "(e.g., between dialogue lines or paragraphs). Do not break text mid-sentence or "
+ "between every word/character.\n\n"
+ "For vertical text common in manga/comics, transcribe it as a continuous line unless "
+ "there are clear visual breaks.\n\n"
+ "NEVER translate. ONLY extract exactly what is written.\n"
+ "Output ONLY the raw text, nothing else."
+ )
+ ocr_editor.setPlainText(default_ocr)
+
+ # Button layout
+ button_layout = QHBoxLayout()
+
+ save_btn = QPushButton("Save")
+ save_btn.clicked.connect(save_prompt)
+ button_layout.addWidget(save_btn)
+
+ reset_btn = QPushButton("Reset to Default")
+ reset_btn.clicked.connect(reset_prompt)
+ button_layout.addWidget(reset_btn)
+
+ cancel_btn = QPushButton("Cancel")
+ cancel_btn.clicked.connect(dialog.reject)
+ button_layout.addWidget(cancel_btn)
+
+ button_layout.addStretch()
+ layout.addLayout(button_layout)
+
+ # Show dialog
+ dialog.exec()
+
+ def _refresh_context_settings(self):
+ """Refresh context settings from main GUI"""
+ # Actually fetch the current values from main GUI
+ if hasattr(self.main_gui, 'contextual_var'):
+ contextual_enabled = self.main_gui.contextual_var.get()
+ if hasattr(self, 'contextual_status_label'):
+ self.contextual_status_label.setText(f"• Contextual Translation: {'Enabled' if contextual_enabled else 'Disabled'}")
+
+ if hasattr(self.main_gui, 'trans_history'):
+ history_limit = self.main_gui.trans_history.get()
+ if hasattr(self, 'history_limit_label'):
+ self.history_limit_label.setText(f"• Translation History Limit: {history_limit} exchanges")
+
+ if hasattr(self.main_gui, 'translation_history_rolling_var'):
+ rolling_enabled = self.main_gui.translation_history_rolling_var.get()
+ rolling_status = "Enabled (Rolling Window)" if rolling_enabled else "Disabled (Reset on Limit)"
+ if hasattr(self, 'rolling_status_label'):
+ self.rolling_status_label.setText(f"• Rolling History: {rolling_status}")
+
+ # Get and update model from main GUI
+ current_model = None
+ model_changed = False
+
+ if hasattr(self.main_gui, 'model_var'):
+ current_model = self.main_gui.model_var.get()
+ elif hasattr(self.main_gui, 'model_combo'):
+ current_model = self.main_gui.model_combo.get()
+ elif hasattr(self.main_gui, 'config'):
+ current_model = self.main_gui.config.get('model', 'Unknown')
+
+ # Update model display in the API Settings frame (skip if parent_frame doesn't exist)
+ if hasattr(self, 'parent_frame') and hasattr(self.parent_frame, 'winfo_children'):
+ try:
+ for widget in self.parent_frame.winfo_children():
+ if isinstance(widget, tk.LabelFrame) and "Translation Settings" in widget.cget("text"):
+ for child in widget.winfo_children():
+ if isinstance(child, tk.Frame):
+ for subchild in child.winfo_children():
+ if isinstance(subchild, tk.Label) and "Model:" in subchild.cget("text"):
+ old_model_text = subchild.cget("text")
+ old_model = old_model_text.split("Model: ")[-1] if "Model: " in old_model_text else None
+ if old_model != current_model:
+ model_changed = True
+ subchild.config(text=f"Model: {current_model}")
+ break
+ except Exception:
+ pass # Silently skip if there's an issue with Tkinter widgets
+
+ # If model changed, reset translator and client to force recreation
+ if model_changed and current_model:
+ if self.translator:
+ self._log(f"Model changed to {current_model}. Translator will be recreated on next run.", "info")
+ self.translator = None # Force recreation on next translation
+
+ # Also reset the client if it exists to ensure new model is used
+ if hasattr(self.main_gui, 'client') and self.main_gui.client:
+ if hasattr(self.main_gui.client, 'model') and self.main_gui.client.model != current_model:
+ self.main_gui.client = None # Force recreation with new model
+
+ # If translator exists, update its history manager settings
+ if self.translator and hasattr(self.translator, 'history_manager'):
+ try:
+ # Update the history manager with current main GUI settings
+ if hasattr(self.main_gui, 'contextual_var'):
+ self.translator.history_manager.contextual_enabled = self.main_gui.contextual_var.get()
+
+ if hasattr(self.main_gui, 'trans_history'):
+ self.translator.history_manager.max_history = int(self.main_gui.trans_history.get())
+
+ if hasattr(self.main_gui, 'translation_history_rolling_var'):
+ self.translator.history_manager.rolling_enabled = self.main_gui.translation_history_rolling_var.get()
+
+ # Reset the history to apply new settings
+ self.translator.history_manager.reset()
+
+ self._log("✅ Refreshed context settings from main GUI and updated translator", "success")
+ except Exception as e:
+ self._log(f"✅ Refreshed context settings display (translator will update on next run)", "success")
+ else:
+ log_message = "✅ Refreshed context settings from main GUI"
+ if model_changed:
+ log_message += f" (Model: {current_model})"
+ self._log(log_message, "success")
+
+ def _browse_google_credentials_permanent(self):
+ """Browse and set Google Cloud Vision credentials from the permanent button"""
+ from PySide6.QtWidgets import QFileDialog
+
+ file_path, _ = QFileDialog.getOpenFileName(
+ self.dialog,
+ "Select Google Cloud Service Account JSON",
+ "",
+ "JSON files (*.json);;All files (*.*)"
+ )
+
+ if file_path:
+ # Save to config with both keys for compatibility
+ self.main_gui.config['google_vision_credentials'] = file_path
+ self.main_gui.config['google_cloud_credentials'] = file_path
+
+ # Save configuration
+ if hasattr(self.main_gui, 'save_config'):
+ self.main_gui.save_config(show_message=False)
+
+
+ from PySide6.QtWidgets import QMessageBox
+
+ # Update button state immediately
+ if hasattr(self, 'start_button'):
+ self.start_button.setEnabled(True)
+
+ # Update credentials display
+ if hasattr(self, 'creds_label'):
+ self.creds_label.setText(os.path.basename(file_path))
+ self.creds_label.setStyleSheet("color: green;")
+
+ # Update the main status label and provider status
+ self._update_main_status_label()
+ self._check_provider_status()
+
+ QMessageBox.information(self.dialog, "Success", "Google Cloud credentials set successfully!")
+
+ def _update_status_display(self):
+ """Update the status display after credentials change"""
+ # This would update the status label if we had a reference to it
+ # For now, we'll just ensure the button is enabled
+ google_creds_path = self.main_gui.config.get('google_vision_credentials', '') or self.main_gui.config.get('google_cloud_credentials', '')
+ has_vision = os.path.exists(google_creds_path) if google_creds_path else False
+
+ if has_vision and hasattr(self, 'start_button'):
+ self.start_button.setEnabled(True)
+
+ def _get_available_fonts(self):
+ """Get list of available fonts from system and custom directories"""
+ fonts = ["Default"] # Default option
+
+ # Reset font mapping
+ self.font_mapping = {}
+
+ # Comprehensive map of Windows font filenames to proper display names
+ font_name_map = {
+ # === BASIC LATIN FONTS ===
+ # Arial family
+ 'arial': 'Arial',
+ 'ariali': 'Arial Italic',
+ 'arialbd': 'Arial Bold',
+ 'arialbi': 'Arial Bold Italic',
+ 'ariblk': 'Arial Black',
+
+ # Times New Roman
+ 'times': 'Times New Roman',
+ 'timesbd': 'Times New Roman Bold',
+ 'timesi': 'Times New Roman Italic',
+ 'timesbi': 'Times New Roman Bold Italic',
+
+ # Calibri family
+ 'calibri': 'Calibri',
+ 'calibrib': 'Calibri Bold',
+ 'calibrii': 'Calibri Italic',
+ 'calibriz': 'Calibri Bold Italic',
+ 'calibril': 'Calibri Light',
+ 'calibrili': 'Calibri Light Italic',
+
+ # Comic Sans family
+ 'comic': 'Comic Sans MS',
+ 'comici': 'Comic Sans MS Italic',
+ 'comicbd': 'Comic Sans MS Bold',
+ 'comicz': 'Comic Sans MS Bold Italic',
+
+ # Segoe UI family
+ 'segoeui': 'Segoe UI',
+ 'segoeuib': 'Segoe UI Bold',
+ 'segoeuii': 'Segoe UI Italic',
+ 'segoeuiz': 'Segoe UI Bold Italic',
+ 'segoeuil': 'Segoe UI Light',
+ 'segoeuisl': 'Segoe UI Semilight',
+ 'seguisb': 'Segoe UI Semibold',
+ 'seguisbi': 'Segoe UI Semibold Italic',
+ 'seguisli': 'Segoe UI Semilight Italic',
+ 'seguili': 'Segoe UI Light Italic',
+ 'seguibl': 'Segoe UI Black',
+ 'seguibli': 'Segoe UI Black Italic',
+ 'seguihis': 'Segoe UI Historic',
+ 'seguiemj': 'Segoe UI Emoji',
+ 'seguisym': 'Segoe UI Symbol',
+
+ # Courier
+ 'cour': 'Courier New',
+ 'courbd': 'Courier New Bold',
+ 'couri': 'Courier New Italic',
+ 'courbi': 'Courier New Bold Italic',
+
+ # Verdana
+ 'verdana': 'Verdana',
+ 'verdanab': 'Verdana Bold',
+ 'verdanai': 'Verdana Italic',
+ 'verdanaz': 'Verdana Bold Italic',
+
+ # Georgia
+ 'georgia': 'Georgia',
+ 'georgiab': 'Georgia Bold',
+ 'georgiai': 'Georgia Italic',
+ 'georgiaz': 'Georgia Bold Italic',
+
+ # Tahoma
+ 'tahoma': 'Tahoma',
+ 'tahomabd': 'Tahoma Bold',
+
+ # Trebuchet
+ 'trebuc': 'Trebuchet MS',
+ 'trebucbd': 'Trebuchet MS Bold',
+ 'trebucit': 'Trebuchet MS Italic',
+ 'trebucbi': 'Trebuchet MS Bold Italic',
+
+ # Impact
+ 'impact': 'Impact',
+
+ # Consolas
+ 'consola': 'Consolas',
+ 'consolab': 'Consolas Bold',
+ 'consolai': 'Consolas Italic',
+ 'consolaz': 'Consolas Bold Italic',
+
+ # Sitka family (from your screenshot)
+ 'sitka': 'Sitka Small',
+ 'sitkab': 'Sitka Small Bold',
+ 'sitkai': 'Sitka Small Italic',
+ 'sitkaz': 'Sitka Small Bold Italic',
+ 'sitkavf': 'Sitka Text',
+ 'sitkavfb': 'Sitka Text Bold',
+ 'sitkavfi': 'Sitka Text Italic',
+ 'sitkavfz': 'Sitka Text Bold Italic',
+ 'sitkasubheading': 'Sitka Subheading',
+ 'sitkasubheadingb': 'Sitka Subheading Bold',
+ 'sitkasubheadingi': 'Sitka Subheading Italic',
+ 'sitkasubheadingz': 'Sitka Subheading Bold Italic',
+ 'sitkaheading': 'Sitka Heading',
+ 'sitkaheadingb': 'Sitka Heading Bold',
+ 'sitkaheadingi': 'Sitka Heading Italic',
+ 'sitkaheadingz': 'Sitka Heading Bold Italic',
+ 'sitkadisplay': 'Sitka Display',
+ 'sitkadisplayb': 'Sitka Display Bold',
+ 'sitkadisplayi': 'Sitka Display Italic',
+ 'sitkadisplayz': 'Sitka Display Bold Italic',
+ 'sitkabanner': 'Sitka Banner',
+ 'sitkabannerb': 'Sitka Banner Bold',
+ 'sitkabanneri': 'Sitka Banner Italic',
+ 'sitkabannerz': 'Sitka Banner Bold Italic',
+
+ # Ink Free (from your screenshot)
+ 'inkfree': 'Ink Free',
+
+ # Lucida family
+ 'l_10646': 'Lucida Sans Unicode',
+ 'lucon': 'Lucida Console',
+ 'ltype': 'Lucida Sans Typewriter',
+ 'ltypeb': 'Lucida Sans Typewriter Bold',
+ 'ltypei': 'Lucida Sans Typewriter Italic',
+ 'ltypebi': 'Lucida Sans Typewriter Bold Italic',
+
+ # Palatino Linotype
+ 'pala': 'Palatino Linotype',
+ 'palab': 'Palatino Linotype Bold',
+ 'palabi': 'Palatino Linotype Bold Italic',
+ 'palai': 'Palatino Linotype Italic',
+
+ # Noto fonts
+ 'notosansjp': 'Noto Sans JP',
+ 'notoserifjp': 'Noto Serif JP',
+
+ # UD Digi Kyokasho (Japanese educational font)
+ 'uddigikyokashon-b': 'UD Digi Kyokasho NK-B',
+ 'uddigikyokashon-r': 'UD Digi Kyokasho NK-R',
+ 'uddigikyokashonk-b': 'UD Digi Kyokasho NK-B',
+ 'uddigikyokashonk-r': 'UD Digi Kyokasho NK-R',
+
+ # Urdu Typesetting
+ 'urdtype': 'Urdu Typesetting',
+ 'urdtypeb': 'Urdu Typesetting Bold',
+
+ # Segoe variants
+ 'segmdl2': 'Segoe MDL2 Assets',
+ 'segoeicons': 'Segoe Fluent Icons',
+ 'segoepr': 'Segoe Print',
+ 'segoeprb': 'Segoe Print Bold',
+ 'segoesc': 'Segoe Script',
+ 'segoescb': 'Segoe Script Bold',
+ 'seguivar': 'Segoe UI Variable',
+
+ # Sans Serif Collection
+ 'sansserifcollection': 'Sans Serif Collection',
+
+ # Additional common Windows 10/11 fonts
+ 'holomdl2': 'HoloLens MDL2 Assets',
+ 'gadugi': 'Gadugi',
+ 'gadugib': 'Gadugi Bold',
+
+ # Cascadia Code (developer font)
+ 'cascadiacode': 'Cascadia Code',
+ 'cascadiacodepl': 'Cascadia Code PL',
+ 'cascadiamono': 'Cascadia Mono',
+ 'cascadiamonopl': 'Cascadia Mono PL',
+
+ # More Segoe UI variants
+ 'seguibli': 'Segoe UI Black Italic',
+ 'segoeuiblack': 'Segoe UI Black',
+
+ # Other fonts
+ 'aldhabi': 'Aldhabi',
+ 'andiso': 'Andalus', # This is likely Andalus font
+ 'arabtype': 'Arabic Typesetting',
+ 'mstmc': 'Myanmar Text', # Alternate file name
+ 'monbaiti': 'Mongolian Baiti', # Shorter filename variant
+ 'leeluisl': 'Leelawadee UI Semilight', # Missing variant
+ 'simsunextg': 'SimSun-ExtG', # Extended SimSun variant
+ 'ebrima': 'Ebrima',
+ 'ebrimabd': 'Ebrima Bold',
+ 'gabriola': 'Gabriola',
+
+ # Bahnschrift variants
+ 'bahnschrift': 'Bahnschrift',
+ 'bahnschriftlight': 'Bahnschrift Light',
+ 'bahnschriftsemibold': 'Bahnschrift SemiBold',
+ 'bahnschriftbold': 'Bahnschrift Bold',
+
+ # Majalla (African language font)
+ 'majalla': 'Sakkal Majalla',
+ 'majallab': 'Sakkal Majalla Bold',
+
+ # Additional fonts that might be missing
+ 'amiri': 'Amiri',
+ 'amiri-bold': 'Amiri Bold',
+ 'amiri-slanted': 'Amiri Slanted',
+ 'amiri-boldslanted': 'Amiri Bold Slanted',
+ 'aparaj': 'Aparajita',
+ 'aparajb': 'Aparajita Bold',
+ 'aparaji': 'Aparajita Italic',
+ 'aparajbi': 'Aparajita Bold Italic',
+ 'kokila': 'Kokila',
+ 'kokilab': 'Kokila Bold',
+ 'kokilai': 'Kokila Italic',
+ 'kokilabi': 'Kokila Bold Italic',
+ 'utsaah': 'Utsaah',
+ 'utsaahb': 'Utsaah Bold',
+ 'utsaahi': 'Utsaah Italic',
+ 'utsaahbi': 'Utsaah Bold Italic',
+ 'vani': 'Vani',
+ 'vanib': 'Vani Bold',
+
+ # === JAPANESE FONTS ===
+ 'msgothic': 'MS Gothic',
+ 'mspgothic': 'MS PGothic',
+ 'msmincho': 'MS Mincho',
+ 'mspmincho': 'MS PMincho',
+ 'meiryo': 'Meiryo',
+ 'meiryob': 'Meiryo Bold',
+ 'yugothic': 'Yu Gothic',
+ 'yugothb': 'Yu Gothic Bold',
+ 'yugothl': 'Yu Gothic Light',
+ 'yugothm': 'Yu Gothic Medium',
+ 'yugothr': 'Yu Gothic Regular',
+ 'yumin': 'Yu Mincho',
+ 'yumindb': 'Yu Mincho Demibold',
+ 'yuminl': 'Yu Mincho Light',
+
+ # === KOREAN FONTS ===
+ 'malgun': 'Malgun Gothic',
+ 'malgunbd': 'Malgun Gothic Bold',
+ 'malgunsl': 'Malgun Gothic Semilight',
+ 'gulim': 'Gulim',
+ 'gulimche': 'GulimChe',
+ 'dotum': 'Dotum',
+ 'dotumche': 'DotumChe',
+ 'batang': 'Batang',
+ 'batangche': 'BatangChe',
+ 'gungsuh': 'Gungsuh',
+ 'gungsuhche': 'GungsuhChe',
+
+ # === CHINESE FONTS ===
+ # Simplified Chinese
+ 'simsun': 'SimSun',
+ 'simsunb': 'SimSun Bold',
+ 'simsunextb': 'SimSun ExtB',
+ 'nsimsun': 'NSimSun',
+ 'simhei': 'SimHei',
+ 'simkai': 'KaiTi',
+ 'simfang': 'FangSong',
+ 'simli': 'LiSu',
+ 'simyou': 'YouYuan',
+ 'stcaiyun': 'STCaiyun',
+ 'stfangsong': 'STFangsong',
+ 'sthupo': 'STHupo',
+ 'stkaiti': 'STKaiti',
+ 'stliti': 'STLiti',
+ 'stsong': 'STSong',
+ 'stxihei': 'STXihei',
+ 'stxingkai': 'STXingkai',
+ 'stxinwei': 'STXinwei',
+ 'stzhongsong': 'STZhongsong',
+
+ # Traditional Chinese
+ 'msjh': 'Microsoft JhengHei',
+ 'msjhbd': 'Microsoft JhengHei Bold',
+ 'msjhl': 'Microsoft JhengHei Light',
+ 'mingliu': 'MingLiU',
+ 'pmingliu': 'PMingLiU',
+ 'mingliub': 'MingLiU Bold',
+ 'mingliuhk': 'MingLiU_HKSCS',
+ 'mingliuextb': 'MingLiU ExtB',
+ 'pmingliuextb': 'PMingLiU ExtB',
+ 'mingliuhkextb': 'MingLiU_HKSCS ExtB',
+ 'kaiu': 'DFKai-SB',
+
+ # Microsoft YaHei
+ 'msyh': 'Microsoft YaHei',
+ 'msyhbd': 'Microsoft YaHei Bold',
+ 'msyhl': 'Microsoft YaHei Light',
+
+ # === THAI FONTS ===
+ 'leelawui': 'Leelawadee UI',
+ 'leelauib': 'Leelawadee UI Bold',
+ 'leelauisl': 'Leelawadee UI Semilight',
+ 'leelawad': 'Leelawadee',
+ 'leelawdb': 'Leelawadee Bold',
+
+ # === INDIC FONTS ===
+ 'mangal': 'Mangal',
+ 'vrinda': 'Vrinda',
+ 'raavi': 'Raavi',
+ 'shruti': 'Shruti',
+ 'tunga': 'Tunga',
+ 'gautami': 'Gautami',
+ 'kartika': 'Kartika',
+ 'latha': 'Latha',
+ 'kalinga': 'Kalinga',
+ 'vijaya': 'Vijaya',
+ 'nirmala': 'Nirmala UI',
+ 'nirmalab': 'Nirmala UI Bold',
+ 'nirmalas': 'Nirmala UI Semilight',
+
+ # === ARABIC FONTS ===
+ 'arial': 'Arial',
+ 'trado': 'Traditional Arabic',
+ 'tradbdo': 'Traditional Arabic Bold',
+ 'simpo': 'Simplified Arabic',
+ 'simpbdo': 'Simplified Arabic Bold',
+ 'simpfxo': 'Simplified Arabic Fixed',
+
+ # === OTHER ASIAN FONTS ===
+ 'javatext': 'Javanese Text',
+ 'himalaya': 'Microsoft Himalaya',
+ 'mongolianbaiti': 'Mongolian Baiti',
+ 'msuighur': 'Microsoft Uighur',
+ 'msuighub': 'Microsoft Uighur Bold',
+ 'msyi': 'Microsoft Yi Baiti',
+ 'taileb': 'Microsoft Tai Le Bold',
+ 'taile': 'Microsoft Tai Le',
+ 'ntailu': 'Microsoft New Tai Lue',
+ 'ntailub': 'Microsoft New Tai Lue Bold',
+ 'phagspa': 'Microsoft PhagsPa',
+ 'phagspab': 'Microsoft PhagsPa Bold',
+ 'mmrtext': 'Myanmar Text',
+ 'mmrtextb': 'Myanmar Text Bold',
+
+ # === SYMBOL FONTS ===
+ 'symbol': 'Symbol',
+ 'webdings': 'Webdings',
+ 'wingding': 'Wingdings',
+ 'wingdng2': 'Wingdings 2',
+ 'wingdng3': 'Wingdings 3',
+ 'mtextra': 'MT Extra',
+ 'marlett': 'Marlett',
+
+ # === OTHER FONTS ===
+ 'mvboli': 'MV Boli',
+ 'sylfaen': 'Sylfaen',
+ 'estrangelo': 'Estrangelo Edessa',
+ 'euphemia': 'Euphemia',
+ 'plantagenet': 'Plantagenet Cherokee',
+ 'micross': 'Microsoft Sans Serif',
+
+ # Franklin Gothic
+ 'framd': 'Franklin Gothic Medium',
+ 'framdit': 'Franklin Gothic Medium Italic',
+ 'fradm': 'Franklin Gothic Demi',
+ 'fradmcn': 'Franklin Gothic Demi Cond',
+ 'fradmit': 'Franklin Gothic Demi Italic',
+ 'frahv': 'Franklin Gothic Heavy',
+ 'frahvit': 'Franklin Gothic Heavy Italic',
+ 'frabook': 'Franklin Gothic Book',
+ 'frabookit': 'Franklin Gothic Book Italic',
+
+ # Cambria
+ 'cambria': 'Cambria',
+ 'cambriab': 'Cambria Bold',
+ 'cambriai': 'Cambria Italic',
+ 'cambriaz': 'Cambria Bold Italic',
+ 'cambria&cambria math': 'Cambria Math',
+
+ # Candara
+ 'candara': 'Candara',
+ 'candarab': 'Candara Bold',
+ 'candarai': 'Candara Italic',
+ 'candaraz': 'Candara Bold Italic',
+ 'candaral': 'Candara Light',
+ 'candarali': 'Candara Light Italic',
+
+ # Constantia
+ 'constan': 'Constantia',
+ 'constanb': 'Constantia Bold',
+ 'constani': 'Constantia Italic',
+ 'constanz': 'Constantia Bold Italic',
+
+ # Corbel
+ 'corbel': 'Corbel',
+ 'corbelb': 'Corbel Bold',
+ 'corbeli': 'Corbel Italic',
+ 'corbelz': 'Corbel Bold Italic',
+ 'corbell': 'Corbel Light',
+ 'corbelli': 'Corbel Light Italic',
+
+ # Bahnschrift
+ 'bahnschrift': 'Bahnschrift',
+
+ # Garamond
+ 'gara': 'Garamond',
+ 'garabd': 'Garamond Bold',
+ 'garait': 'Garamond Italic',
+
+ # Century Gothic
+ 'gothic': 'Century Gothic',
+ 'gothicb': 'Century Gothic Bold',
+ 'gothici': 'Century Gothic Italic',
+ 'gothicz': 'Century Gothic Bold Italic',
+
+ # Bookman Old Style
+ 'bookos': 'Bookman Old Style',
+ 'bookosb': 'Bookman Old Style Bold',
+ 'bookosi': 'Bookman Old Style Italic',
+ 'bookosbi': 'Bookman Old Style Bold Italic',
+ }
+
+ # Dynamically discover all Windows fonts
+ windows_fonts = []
+ windows_font_dir = "C:/Windows/Fonts"
+
+ if os.path.exists(windows_font_dir):
+ for font_file in os.listdir(windows_font_dir):
+ font_path = os.path.join(windows_font_dir, font_file)
+
+ # Check if it's a font file
+ if os.path.isfile(font_path) and font_file.lower().endswith(('.ttf', '.ttc', '.otf')):
+ # Get base name without extension
+ base_name = os.path.splitext(font_file)[0]
+ base_name_lower = base_name.lower()
+
+ # Check if we have a proper name mapping
+ if base_name_lower in font_name_map:
+ display_name = font_name_map[base_name_lower]
+ else:
+ # Generic cleanup for unmapped fonts
+ display_name = base_name.replace('_', ' ').replace('-', ' ')
+ display_name = ' '.join(word.capitalize() for word in display_name.split())
+
+ windows_fonts.append((display_name, font_path))
+
+ # Sort alphabetically
+ windows_fonts.sort(key=lambda x: x[0])
+
+ # Add all discovered fonts to the list
+ for font_name, font_path in windows_fonts:
+ fonts.append(font_name)
+ self.font_mapping[font_name] = font_path
+
+ # Check for custom fonts directory (keep your existing code)
+ script_dir = os.path.dirname(os.path.abspath(__file__))
+ fonts_dir = os.path.join(script_dir, "fonts")
+
+ if os.path.exists(fonts_dir):
+ for root, dirs, files in os.walk(fonts_dir):
+ for font_file in files:
+ if font_file.endswith(('.ttf', '.ttc', '.otf')):
+ font_path = os.path.join(root, font_file)
+ font_name = os.path.splitext(font_file)[0]
+ # Add category from folder
+ category = os.path.basename(root)
+ if category != "fonts":
+ font_name = f"{font_name} ({category})"
+ fonts.append(font_name)
+ self.font_mapping[font_name] = font_path
+
+ # Load previously saved custom fonts (keep your existing code)
+ if 'custom_fonts' in self.main_gui.config:
+ for custom_font in self.main_gui.config['custom_fonts']:
+ if os.path.exists(custom_font['path']):
+ # Check if this font is already in the list
+ if custom_font['name'] not in fonts:
+ fonts.append(custom_font['name'])
+ self.font_mapping[custom_font['name']] = custom_font['path']
+
+ # Add custom fonts option at the end
+ fonts.append("Browse Custom Font...")
+
+ return fonts
+
+ def _on_font_selected(self):
+ """Handle font selection - updates font path only, save+apply called by widget"""
+ if not hasattr(self, 'font_combo'):
+ return
+ selected = self.font_combo.currentText()
+
+ if selected == "Default":
+ self.selected_font_path = None
+ elif selected == "Browse Custom Font...":
+ # Open file dialog to select custom font using PySide6
+ font_path, _ = QFileDialog.getOpenFileName(
+ self.dialog if hasattr(self, 'dialog') else None,
+ "Select Font File",
+ "",
+ "Font files (*.ttf *.ttc *.otf);;TrueType fonts (*.ttf);;TrueType collections (*.ttc);;OpenType fonts (*.otf);;All files (*.*)"
+ )
+
+ # Check if user selected a file (not cancelled)
+ if font_path and font_path.strip():
+ # Add to combo box
+ font_name = os.path.basename(font_path)
+
+ # Insert before "Browse Custom Font..." option
+ if font_name not in [n for n in self.font_mapping.keys()]:
+ # Add to combo box (PySide6)
+ self.font_combo.insertItem(self.font_combo.count() - 1, font_name)
+ self.font_combo.setCurrentText(font_name)
+
+ # Update font mapping
+ self.font_mapping[font_name] = font_path
+ self.selected_font_path = font_path
+
+ # Save custom font to config
+ if 'custom_fonts' not in self.main_gui.config:
+ self.main_gui.config['custom_fonts'] = []
+
+ custom_font_entry = {'name': font_name, 'path': font_path}
+ # Check if this exact entry already exists
+ font_exists = False
+ for existing_font in self.main_gui.config['custom_fonts']:
+ if existing_font['path'] == font_path:
+ font_exists = True
+ break
+
+ if not font_exists:
+ self.main_gui.config['custom_fonts'].append(custom_font_entry)
+ # Save config immediately to persist custom fonts
+ if hasattr(self.main_gui, 'save_config'):
+ self.main_gui.save_config(show_message=False)
+ else:
+ # Font already exists, just select it
+ self.font_combo.setCurrentText(font_name)
+ self.selected_font_path = self.font_mapping[font_name]
+ else:
+ # User cancelled, revert to previous selection
+ if hasattr(self, 'previous_font_selection'):
+ self.font_combo.setCurrentText(self.previous_font_selection)
+ else:
+ self.font_combo.setCurrentText("Default")
+ return
+ else:
+ # Check if it's in the font mapping
+ if selected in self.font_mapping:
+ self.selected_font_path = self.font_mapping[selected]
+ else:
+ # This shouldn't happen, but just in case
+ self.selected_font_path = None
+
+ # Store current selection for next time
+ self.previous_font_selection = selected
+
+ def _update_opacity_label(self, value):
+ """Update opacity percentage label and value variable"""
+ self.bg_opacity_value = int(value) # UPDATE THE VALUE VARIABLE!
+ percentage = int((float(value) / 255) * 100)
+ self.opacity_label.setText(f"{percentage}%")
+
+ def _update_reduction_label(self, value):
+ """Update size reduction percentage label and value variable"""
+ self.bg_reduction_value = float(value) # UPDATE THE VALUE VARIABLE!
+ percentage = int(float(value) * 100)
+ self.reduction_label.setText(f"{percentage}%")
+
+ def _toggle_inpaint_quality_visibility(self):
+ """Show/hide inpaint quality options based on skip_inpainting setting"""
+ if hasattr(self, 'inpaint_quality_frame'):
+ if self.skip_inpainting_value:
+ # Hide quality options when inpainting is skipped
+ self.inpaint_quality_frame.hide()
+ else:
+ # Show quality options when inpainting is enabled
+ self.inpaint_quality_frame.show()
+
+ def _toggle_inpaint_visibility(self):
+ """Show/hide inpainting options based on skip toggle"""
+ # Update the value from the checkbox
+ self.skip_inpainting_value = self.skip_inpainting_checkbox.isChecked()
+
+ if self.skip_inpainting_value:
+ # Hide all inpainting options
+ self.inpaint_method_frame.hide()
+ self.cloud_inpaint_frame.hide()
+ self.local_inpaint_frame.hide()
+ self.inpaint_separator.hide() # Hide separator
+ else:
+ # Show method selection
+ self.inpaint_method_frame.show()
+ self.inpaint_separator.show() # Show separator
+ self._on_inpaint_method_change()
+
+ # Don't save during initialization
+ if not (hasattr(self, '_initializing') and self._initializing):
+ self._save_rendering_settings()
+
+ def _on_inpaint_method_change(self):
+ """Show appropriate inpainting settings based on method"""
+ # Determine current method from radio buttons
+ if self.cloud_radio.isChecked():
+ method = 'cloud'
+ elif self.local_radio.isChecked():
+ method = 'local'
+ elif self.hybrid_radio.isChecked():
+ method = 'hybrid'
+ else:
+ method = 'local' # Default fallback
+
+ # Update the stored value
+ self.inpaint_method_value = method
+
+ if method == 'cloud':
+ self.cloud_inpaint_frame.show()
+ self.local_inpaint_frame.hide()
+ elif method == 'local':
+ self.local_inpaint_frame.show()
+ self.cloud_inpaint_frame.hide()
+ elif method == 'hybrid':
+ # Show both frames for hybrid
+ self.local_inpaint_frame.show()
+ self.cloud_inpaint_frame.show()
+
+ # Don't save during initialization
+ if not (hasattr(self, '_initializing') and self._initializing):
+ self._save_rendering_settings()
+
+ def _on_local_model_change(self, new_model_type=None):
+ """Handle model type change and auto-load if model exists"""
+ # Get model type from combo box (PySide6)
+ if new_model_type is None:
+ model_type = self.local_model_combo.currentText()
+ else:
+ model_type = new_model_type
+
+ # Update stored value
+ self.local_model_type_value = model_type
+
+ # Update description
+ model_desc = {
+ 'lama': 'LaMa (Best quality)',
+ 'aot': 'AOT GAN (Fast)',
+ 'aot_onnx': 'AOT ONNX (Optimized)',
+ 'mat': 'MAT (High-res)',
+ 'sd_local': 'Stable Diffusion (Anime)',
+ 'anime': 'Anime/Manga Inpainting',
+ 'anime_onnx': 'Anime ONNX (Fast/Optimized)',
+ 'lama_onnx': 'LaMa ONNX (Optimized)',
+ }
+ self.model_desc_label.setText(model_desc.get(model_type, ''))
+
+ # Check for saved path for this model type
+ saved_path = self.main_gui.config.get(f'manga_{model_type}_model_path', '')
+
+ if saved_path and os.path.exists(saved_path):
+ # Update the path display
+ self.local_model_entry.setText(saved_path)
+ self.local_model_path_value = saved_path
+ self.local_model_status_label.setText("⏳ Loading saved model...")
+ self.local_model_status_label.setStyleSheet("color: orange;")
+
+ # Auto-load the model after a short delay using QTimer
+ from PySide6.QtCore import QTimer
+ QTimer.singleShot(100, lambda: self._try_load_model(model_type, saved_path))
+ else:
+ # Clear the path display
+ self.local_model_entry.setText("")
+ self.local_model_path_value = ""
+ self.local_model_status_label.setText("No model loaded")
+ self.local_model_status_label.setStyleSheet("color: gray;")
+
+ self._save_rendering_settings()
+
+ def _browse_local_model(self):
+ """Browse for local inpainting model and auto-load"""
+ from PySide6.QtWidgets import QFileDialog
+ from PySide6.QtCore import QTimer
+
+ model_type = self.local_model_type_value
+
+ if model_type == 'sd_local':
+ filter_str = "Model files (*.safetensors *.pt *.pth *.ckpt *.onnx);;SafeTensors (*.safetensors);;Checkpoint files (*.ckpt);;PyTorch models (*.pt *.pth);;ONNX models (*.onnx);;All files (*.*)"
+ else:
+ filter_str = "Model files (*.pt *.pth *.ckpt *.onnx);;Checkpoint files (*.ckpt);;PyTorch models (*.pt *.pth);;ONNX models (*.onnx);;All files (*.*)"
+
+ path, _ = QFileDialog.getOpenFileName(
+ self.dialog,
+ f"Select {model_type.upper()} Model",
+ "",
+ filter_str
+ )
+
+ if path:
+ self.local_model_entry.setText(path)
+ self.local_model_path_value = path
+ # Save to config
+ self.main_gui.config[f'manga_{model_type}_model_path'] = path
+ self._save_rendering_settings()
+
+ # Update status first
+ self._update_local_model_status()
+
+ # Auto-load the selected model using QTimer
+ QTimer.singleShot(100, lambda: self._try_load_model(model_type, path))
+
+ def _click_load_local_model(self):
+ """Manually trigger loading of the selected local inpainting model"""
+ from PySide6.QtWidgets import QMessageBox
+ from PySide6.QtCore import QTimer
+
+ try:
+ model_type = self.local_model_type_value if hasattr(self, 'local_model_type_value') else None
+ path = self.local_model_path_value if hasattr(self, 'local_model_path_value') else ''
+ if not model_type or not path:
+ QMessageBox.information(self.dialog, "Load Model", "Please select a model file first using the Browse button.")
+ return
+ # Defer to keep UI responsive using QTimer
+ QTimer.singleShot(50, lambda: self._try_load_model(model_type, path))
+ except Exception:
+ pass
+
+ def _try_load_model(self, method: str, model_path: str):
+ """Try to load a model and update status (runs loading on a background thread)."""
+ from PySide6.QtCore import QTimer, QMetaObject, Qt, Q_ARG, QThread
+ from PySide6.QtWidgets import QApplication
+
+ try:
+ # Show loading status immediately
+ self.local_model_status_label.setText("⏳ Loading model...")
+ self.local_model_status_label.setStyleSheet("color: orange;")
+ QApplication.processEvents() # Process pending events to update UI
+ self.main_gui.append_log(f"⏳ Loading {method.upper()} model...")
+
+ def do_load():
+ from local_inpainter import LocalInpainter
+ success = False
+ try:
+ test_inpainter = LocalInpainter()
+ success = test_inpainter.load_model_with_retry(method, model_path, force_reload=True)
+ print(f"DEBUG: Model loading completed, success={success}")
+ except Exception as e:
+ print(f"DEBUG: Model loading exception: {e}")
+ self.main_gui.append_log(f"❌ Error loading model: {e}")
+ success = False
+
+ # Update UI directly from thread (works in PySide6/Qt6)
+ print(f"DEBUG: Updating UI, success={success}")
+ try:
+ if success:
+ self.local_model_status_label.setText(f"✅ {method.upper()} model ready")
+ self.local_model_status_label.setStyleSheet("color: green;")
+ self.main_gui.append_log(f"✅ {method.upper()} model loaded successfully!")
+ if hasattr(self, 'translator') and self.translator:
+ for attr in ('local_inpainter', '_last_local_method', '_last_local_model_path'):
+ if hasattr(self.translator, attr):
+ try:
+ delattr(self.translator, attr)
+ except Exception:
+ pass
+ else:
+ self.local_model_status_label.setText("⚠️ Model file found but failed to load")
+ self.local_model_status_label.setStyleSheet("color: orange;")
+ self.main_gui.append_log("⚠️ Model file found but failed to load")
+ print(f"DEBUG: UI update completed")
+ except Exception as e:
+ print(f"ERROR updating UI after load: {e}")
+ import traceback
+ traceback.print_exc()
+
+ # Fire background loader
+ threading.Thread(target=do_load, daemon=True).start()
+ return True
+ except Exception as e:
+ try:
+ self.local_model_status_label.setText(f"❌ Error: {str(e)[:50]}")
+ self.local_model_status_label.setStyleSheet("color: red;")
+ except Exception:
+ pass
+ self.main_gui.append_log(f"❌ Error loading model: {e}")
+ return False
+
+ def _update_local_model_status(self):
+ """Update local model status display"""
+ path = self.local_model_path_value if hasattr(self, 'local_model_path_value') else ''
+
+ if not path:
+ self.local_model_status_label.setText("⚠️ No model selected")
+ self.local_model_status_label.setStyleSheet("color: orange;")
+ return
+
+ if not os.path.exists(path):
+ self.local_model_status_label.setText("❌ Model file not found")
+ self.local_model_status_label.setStyleSheet("color: red;")
+ return
+
+ # Check for ONNX cache
+ if path.endswith(('.pt', '.pth', '.safetensors')):
+ onnx_dir = os.path.join(os.path.dirname(path), 'models')
+ if os.path.exists(onnx_dir):
+ # Check if ONNX file exists for this model
+ model_hash = hashlib.md5(path.encode()).hexdigest()[:8]
+ onnx_files = [f for f in os.listdir(onnx_dir) if model_hash in f]
+ if onnx_files:
+ self.local_model_status_label.setText("✅ Model ready (ONNX cached)")
+ self.local_model_status_label.setStyleSheet("color: green;")
+ else:
+ self.local_model_status_label.setText("ℹ️ Will convert to ONNX on first use")
+ self.local_model_status_label.setStyleSheet("color: blue;")
+ else:
+ self.local_model_status_label.setText("ℹ️ Will convert to ONNX on first use")
+ self.local_model_status_label.setStyleSheet("color: blue;")
+ else:
+ self.local_model_status_label.setText("✅ ONNX model ready")
+ self.local_model_status_label.setStyleSheet("color: green;")
+
+ def _download_model(self):
+ """Actually download the model for the selected type"""
+ from PySide6.QtWidgets import QMessageBox
+
+ model_type = self.local_model_type_value
+
+ # Define URLs for each model type
+ model_urls = {
+ 'aot': 'https://huggingface.co/ogkalu/aot-inpainting-jit/resolve/main/aot_traced.pt',
+ 'aot_onnx': 'https://huggingface.co/ogkalu/aot-inpainting/resolve/main/aot.onnx',
+ 'lama': 'https://github.com/Sanster/models/releases/download/add_big_lama/big-lama.pt',
+ 'lama_onnx': 'https://huggingface.co/Carve/LaMa-ONNX/resolve/main/lama_fp32.onnx',
+ 'anime': 'https://github.com/Sanster/models/releases/download/AnimeMangaInpainting/anime-manga-big-lama.pt',
+ 'anime_onnx': 'https://huggingface.co/ogkalu/lama-manga-onnx-dynamic/resolve/main/lama-manga-dynamic.onnx',
+ 'mat': '', # User must provide
+ 'ollama': '', # Not applicable
+ 'sd_local': '' # User must provide
+ }
+
+ url = model_urls.get(model_type, '')
+
+ if not url:
+ QMessageBox.information(self.dialog, "Manual Download",
+ f"Please manually download and browse for {model_type} model")
+ return
+
+ # Determine filename
+ filename_map = {
+ 'aot': 'aot_traced.pt',
+ 'aot_onnx': 'aot.onnx',
+ 'lama': 'big-lama.pt',
+ 'anime': 'anime-manga-big-lama.pt',
+ 'anime_onnx': 'lama-manga-dynamic.onnx',
+ 'lama_onnx': 'lama_fp32.onnx',
+ 'fcf_onnx': 'fcf.onnx',
+ 'sd_inpaint_onnx': 'sd_inpaint_unet.onnx'
+ }
+
+ filename = filename_map.get(model_type, f'{model_type}.pt')
+ save_path = os.path.join('models', filename)
+
+ # Create models directory
+ os.makedirs('models', exist_ok=True)
+
+ # Check if already exists
+ if os.path.exists(save_path):
+ self.local_model_entry.setText(save_path)
+ self.local_model_path_value = save_path
+ self.local_model_status_label.setText("✅ Model already downloaded")
+ self.local_model_status_label.setStyleSheet("color: green;")
+ QMessageBox.information(self.dialog, "Model Ready", f"Model already exists at:\n{save_path}")
+ return
+
+ # Download the model
+ self._perform_download(url, save_path, model_type)
+
+ def _perform_download(self, url: str, save_path: str, model_name: str):
+ """Perform the actual download with progress indication"""
+ import threading
+ import requests
+ from PySide6.QtWidgets import QDialog, QVBoxLayout, QLabel, QProgressBar, QPushButton
+ from PySide6.QtCore import Qt, QTimer
+ from PySide6.QtGui import QIcon
+
+ # Create a progress dialog
+ progress_dialog = QDialog(self.dialog)
+ progress_dialog.setWindowTitle(f"Downloading {model_name.upper()} Model")
+ progress_dialog.setFixedSize(400, 150)
+ progress_dialog.setModal(True)
+
+ # Set window icon
+ icon_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'halgakos.ico')
+ if os.path.exists(icon_path):
+ progress_dialog.setWindowIcon(QIcon(icon_path))
+
+ layout = QVBoxLayout(progress_dialog)
+
+ # Progress label
+ progress_label = QLabel("⏳ Downloading...")
+ progress_label.setAlignment(Qt.AlignCenter)
+ layout.addWidget(progress_label)
+
+ # Progress bar
+ progress_bar = QProgressBar()
+ progress_bar.setMinimum(0)
+ progress_bar.setMaximum(100)
+ progress_bar.setValue(0)
+ layout.addWidget(progress_bar)
+
+ # Status label
+ status_label = QLabel("0%")
+ status_label.setAlignment(Qt.AlignCenter)
+ layout.addWidget(status_label)
+
+ # Cancel flag
+ cancel_download = {'value': False}
+
+ def on_cancel():
+ cancel_download['value'] = True
+ progress_dialog.close()
+
+ progress_dialog.closeEvent = lambda event: on_cancel()
+
+ def download_thread():
+ import time
+ try:
+ # Download with progress and speed tracking
+ response = requests.get(url, stream=True, timeout=30)
+ response.raise_for_status()
+
+ total_size = int(response.headers.get('content-length', 0))
+ downloaded = 0
+ start_time = time.time()
+ last_update = start_time
+
+ with open(save_path, 'wb') as f:
+ for chunk in response.iter_content(chunk_size=8192):
+ if cancel_download['value']:
+ # Clean up partial file
+ f.close()
+ if os.path.exists(save_path):
+ os.remove(save_path)
+ return
+
+ if chunk:
+ f.write(chunk)
+ downloaded += len(chunk)
+
+ # Update progress (throttle updates to every 0.1 seconds)
+ current_time = time.time()
+ if total_size > 0 and (current_time - last_update > 0.1):
+ last_update = current_time
+ elapsed = current_time - start_time
+ speed = downloaded / elapsed if elapsed > 0 else 0
+ speed_mb = speed / (1024 * 1024)
+ progress = (downloaded / total_size) * 100
+
+ # Direct widget updates work in PySide6 from threads
+ try:
+ progress_bar.setValue(int(progress))
+ status_label.setText(f"{progress:.1f}% - {speed_mb:.2f} MB/s")
+ progress_label.setText(f"⏳ Downloading... {downloaded//1024//1024}MB / {total_size//1024//1024}MB")
+ except RuntimeError:
+ # Widget was destroyed, exit
+ cancel_download['value'] = True
+ return
+
+ # Success - direct call (works in PySide6/Qt6)
+ try:
+ progress_dialog.close()
+ self._download_complete(save_path, model_name)
+ except Exception as e:
+ print(f"Error in download completion: {e}")
+
+ except requests.exceptions.RequestException as e:
+ # Error - direct call
+ if not cancel_download['value']:
+ try:
+ progress_dialog.close()
+ self._download_failed(str(e))
+ except Exception as ex:
+ print(f"Error handling download failure: {ex}")
+ except Exception as e:
+ if not cancel_download['value']:
+ try:
+ progress_dialog.close()
+ self._download_failed(str(e))
+ except Exception as ex:
+ print(f"Error handling download failure: {ex}")
+
+ # Start download in background thread
+ thread = threading.Thread(target=download_thread, daemon=True)
+ thread.start()
+
+ # Show dialog
+ progress_dialog.exec()
+
+ def _download_complete(self, save_path: str, model_name: str):
+ """Handle successful download"""
+ from PySide6.QtWidgets import QMessageBox
+
+ # Update the model path entry
+ self.local_model_entry.setText(save_path)
+ self.local_model_path_value = save_path
+
+ # Save to config
+ self.main_gui.config[f'manga_{model_name}_model_path'] = save_path
+ self._save_rendering_settings()
+
+ # Log to main GUI
+ self.main_gui.append_log(f"✅ Downloaded {model_name} model to: {save_path}")
+
+ # Auto-load the downloaded model (direct call)
+ self.local_model_status_label.setText("⏳ Loading downloaded model...")
+ self.local_model_status_label.setStyleSheet("color: orange;")
+
+ # Try to load immediately
+ if self._try_load_model(model_name, save_path):
+ QMessageBox.information(self.dialog, "Success", f"{model_name.upper()} model downloaded and loaded!")
+ else:
+ QMessageBox.information(self.dialog, "Download Complete", f"{model_name.upper()} model downloaded but needs manual loading")
+
+ def _download_failed(self, error: str):
+ """Handle download failure"""
+ from PySide6.QtWidgets import QMessageBox
+
+ QMessageBox.critical(self.dialog, "Download Failed", f"Failed to download model:\n{error}")
+ self.main_gui.append_log(f"❌ Model download failed: {error}")
+
+ def _show_model_info(self):
+ """Show information about models"""
+ model_type = self.local_model_type_value
+
+ info = {
+ 'aot': "AOT GAN Model:\n\n"
+ "• Auto-downloads from HuggingFace\n"
+ "• Traced PyTorch JIT model\n"
+ "• Good for general inpainting\n"
+ "• Fast processing speed\n"
+ "• File size: ~100MB",
+
+ 'aot_onnx': "AOT ONNX Model:\n\n"
+ "• Optimized ONNX version\n"
+ "• Auto-downloads from HuggingFace\n"
+ "• 2-3x faster than PyTorch version\n"
+ "• Great for batch processing\n"
+ "• Lower memory usage\n"
+ "• File size: ~100MB",
+
+ 'lama': "LaMa Model:\n\n"
+ "• Auto-downloads anime-optimized version\n"
+ "• Best quality for manga/anime\n"
+ "• Large model (~200MB)\n"
+ "• Excellent at removing text from bubbles\n"
+ "• Preserves art style well",
+
+ 'anime': "Anime-Specific Model:\n\n"
+ "• Same as LaMa anime version\n"
+ "• Optimized for manga/anime art\n"
+ "• Auto-downloads from GitHub\n"
+ "• Recommended for manga translation\n"
+ "• Preserves screen tones and patterns",
+
+ 'anime_onnx': "Anime ONNX Model:\n\n"
+ "• Optimized ONNX version for speed\n"
+ "• Auto-downloads from HuggingFace\n"
+ "• 2-3x faster than PyTorch version\n"
+ "• Perfect for batch processing\n"
+ "• Same quality as anime model\n"
+ "• File size: ~190MB\n"
+ "• DEFAULT for inpainting",
+
+ 'mat': "MAT Model:\n\n"
+ "• Manual download required\n"
+ "• Get from: github.com/fenglinglwb/MAT\n"
+ "• Good for high-resolution images\n"
+ "• Slower but high quality\n"
+ "• File size: ~500MB",
+
+ 'ollama': "Ollama:\n\n"
+ "• Uses local Ollama server\n"
+ "• No model download needed here\n"
+ "• Run: ollama pull llava\n"
+ "• Context-aware inpainting\n"
+ "• Requires Ollama running locally",
+
+ 'sd_local': "Stable Diffusion:\n\n"
+ "• Manual download required\n"
+ "• Get from HuggingFace\n"
+ "• Requires significant VRAM (4-8GB)\n"
+ "• Best quality but slowest\n"
+ "• Can use custom prompts"
+ }
+
+ from PySide6.QtWidgets import QDialog, QVBoxLayout, QTextEdit, QPushButton
+ from PySide6.QtCore import Qt
+
+ # Create info dialog
+ info_dialog = QDialog(self.dialog)
+ info_dialog.setWindowTitle(f"{model_type.upper()} Model Information")
+ info_dialog.setFixedSize(450, 350)
+ info_dialog.setModal(True)
+
+ layout = QVBoxLayout(info_dialog)
+
+ # Info text
+ text_widget = QTextEdit()
+ text_widget.setReadOnly(True)
+ text_widget.setPlainText(info.get(model_type, "Please select a model type first"))
+ layout.addWidget(text_widget)
+
+ # Close button
+ close_btn = QPushButton("Close")
+ close_btn.clicked.connect(info_dialog.close)
+ close_btn.setStyleSheet("QPushButton { background-color: #6c757d; color: white; padding: 5px 15px; }")
+ layout.addWidget(close_btn)
+
+ info_dialog.exec()
+
+ def _toggle_inpaint_controls_visibility(self):
+ """Toggle visibility of inpaint controls (mask expansion and passes) based on skip inpainting setting"""
+ # Just return if the frame doesn't exist - prevents AttributeError
+ if not hasattr(self, 'inpaint_controls_frame'):
+ return
+
+ if self.skip_inpainting_value:
+ self.inpaint_controls_frame.hide()
+ else:
+ # Show it back
+ self.inpaint_controls_frame.show()
+
+ def _configure_inpaint_api(self):
+ """Configure cloud inpainting API"""
+ from PySide6.QtWidgets import QMessageBox, QDialog, QVBoxLayout, QHBoxLayout, QLabel, QLineEdit, QPushButton
+ from PySide6.QtCore import Qt
+ import webbrowser
+
+ # Show instructions
+ result = QMessageBox.question(
+ self.dialog,
+ "Configure Cloud Inpainting",
+ "Cloud inpainting uses Replicate API for questionable results.\n\n"
+ "1. Go to replicate.com and sign up (free tier available?)\n"
+ "2. Get your API token from Account Settings\n"
+ "3. Enter it here\n\n"
+ "Pricing: ~$0.0023 per image?\n"
+ "Free tier: ~100 images per month?\n\n"
+ "Would you like to proceed?",
+ QMessageBox.Yes | QMessageBox.No
+ )
+
+ if result != QMessageBox.Yes:
+ return
+
+ # Open Replicate page
+ webbrowser.open("https://replicate.com/account/api-tokens")
+
+ # Create API key input dialog
+ api_dialog = QDialog(self.dialog)
+ api_dialog.setWindowTitle("Replicate API Key")
+ api_dialog.setFixedSize(400, 150)
+ api_dialog.setModal(True)
+
+ layout = QVBoxLayout(api_dialog)
+ layout.setContentsMargins(20, 20, 20, 20)
+
+ # Label
+ label = QLabel("Enter your Replicate API key:")
+ layout.addWidget(label)
+
+ # Entry with show/hide
+ entry_layout = QHBoxLayout()
+ entry = QLineEdit()
+ entry.setEchoMode(QLineEdit.Password)
+ entry_layout.addWidget(entry)
+
+ # Toggle show/hide
+ show_btn = QPushButton("Show")
+ show_btn.setFixedWidth(60)
+ def toggle_show():
+ if entry.echoMode() == QLineEdit.Password:
+ entry.setEchoMode(QLineEdit.Normal)
+ show_btn.setText("Hide")
+ else:
+ entry.setEchoMode(QLineEdit.Password)
+ show_btn.setText("Show")
+ show_btn.clicked.connect(toggle_show)
+ entry_layout.addWidget(show_btn)
+
+ layout.addLayout(entry_layout)
+
+ # Buttons
+ btn_layout = QHBoxLayout()
+ btn_layout.addStretch()
+
+ cancel_btn = QPushButton("Cancel")
+ cancel_btn.clicked.connect(api_dialog.reject)
+ btn_layout.addWidget(cancel_btn)
+
+ ok_btn = QPushButton("OK")
+ ok_btn.setStyleSheet("QPushButton { background-color: #28a745; color: white; padding: 5px 15px; }")
+ ok_btn.clicked.connect(api_dialog.accept)
+ btn_layout.addWidget(ok_btn)
+
+ layout.addLayout(btn_layout)
+
+ # Focus and key bindings
+ entry.setFocus()
+
+ # Execute dialog
+ if api_dialog.exec() == QDialog.Accepted:
+ api_key = entry.text().strip()
+
+ if api_key:
+ try:
+ # Save the API key
+ self.main_gui.config['replicate_api_key'] = api_key
+ self.main_gui.save_config(show_message=False)
+
+ # Update UI
+ self.inpaint_api_status_label.setText("✅ Cloud inpainting configured")
+ self.inpaint_api_status_label.setStyleSheet("color: green;")
+
+ # Set flag on translator
+ if self.translator:
+ self.translator.use_cloud_inpainting = True
+ self.translator.replicate_api_key = api_key
+
+ self._log("✅ Cloud inpainting API configured", "success")
+
+ except Exception as e:
+ QMessageBox.critical(self.dialog, "Error", f"Failed to save API key:\n{str(e)}")
+
+ def _clear_inpaint_api(self):
+ """Clear the inpainting API configuration"""
+ self.main_gui.config['replicate_api_key'] = ''
+ self.main_gui.save_config(show_message=False)
+
+ self.inpaint_api_status_label.setText("❌ Inpainting API not configured")
+ self.inpaint_api_status_label.setStyleSheet("color: red;")
+
+ if hasattr(self, 'translator') and self.translator:
+ self.translator.use_cloud_inpainting = False
+ self.translator.replicate_api_key = None
+
+ self._log("🗑️ Cleared inpainting API configuration", "info")
+
+ # Note: Clear button management would need to be handled differently in PySide6
+ # For now, we'll skip automatic button removal
+
+ def _add_files(self):
+ """Add image files (and CBZ archives) to the list"""
+ from PySide6.QtWidgets import QFileDialog
+
+ files, _ = QFileDialog.getOpenFileNames(
+ self.dialog,
+ "Select Manga Images or CBZ",
+ "",
+ "Images / CBZ (*.png *.jpg *.jpeg *.gif *.bmp *.webp *.cbz);;Image files (*.png *.jpg *.jpeg *.gif *.bmp *.webp);;Comic Book Zip (*.cbz);;All files (*.*)"
+ )
+
+ if not files:
+ return
+
+ # Ensure temp root for CBZ extraction lives for the session
+ cbz_temp_root = getattr(self, 'cbz_temp_root', None)
+ if cbz_temp_root is None:
+ try:
+ import tempfile
+ cbz_temp_root = tempfile.mkdtemp(prefix='glossarion_cbz_')
+ self.cbz_temp_root = cbz_temp_root
+ except Exception:
+ cbz_temp_root = None
+
+ for path in files:
+ lower = path.lower()
+ if lower.endswith('.cbz'):
+ # Extract images from CBZ and add them in natural sort order
+ try:
+ import zipfile, shutil
+ base = os.path.splitext(os.path.basename(path))[0]
+ extract_dir = os.path.join(self.cbz_temp_root or os.path.dirname(path), base)
+ os.makedirs(extract_dir, exist_ok=True)
+ with zipfile.ZipFile(path, 'r') as zf:
+ # Extract all to preserve subfolders and avoid name collisions
+ zf.extractall(extract_dir)
+ # Initialize CBZ job tracking
+ if not hasattr(self, 'cbz_jobs'):
+ self.cbz_jobs = {}
+ if not hasattr(self, 'cbz_image_to_job'):
+ self.cbz_image_to_job = {}
+ # Prepare output dir next to source CBZ
+ out_dir = os.path.join(os.path.dirname(path), f"{base}_translated")
+ self.cbz_jobs[path] = {
+ 'extract_dir': extract_dir,
+ 'out_dir': out_dir,
+ }
+ # Collect all images recursively from extract_dir
+ added = 0
+ for root, _, files_in_dir in os.walk(extract_dir):
+ for fn in sorted(files_in_dir):
+ if fn.lower().endswith(('.png', '.jpg', '.jpeg', '.webp', '.bmp', '.gif')):
+ target_path = os.path.join(root, fn)
+ if target_path not in self.selected_files:
+ self.selected_files.append(target_path)
+ self.file_listbox.addItem(os.path.basename(target_path))
+ added += 1
+ # Map extracted image to its CBZ job
+ self.cbz_image_to_job[target_path] = path
+ self._log(f"📦 Added {added} images from CBZ: {os.path.basename(path)}", "info")
+ except Exception as e:
+ self._log(f"❌ Failed to read CBZ {os.path.basename(path)}: {e}", "error")
+ else:
+ if path not in self.selected_files:
+ self.selected_files.append(path)
+ self.file_listbox.addItem(os.path.basename(path))
+
+ def _add_folder(self):
+ """Add all images (and CBZ archives) from a folder"""
+ from PySide6.QtWidgets import QFileDialog
+
+ folder = QFileDialog.getExistingDirectory(
+ self.dialog,
+ "Select Folder with Manga Images or CBZ"
+ )
+ if not folder:
+ return
+
+ # Extensions
+ image_extensions = {'.png', '.jpg', '.jpeg', '.gif', '.bmp', '.webp'}
+ cbz_ext = '.cbz'
+
+ # Ensure temp root for CBZ extraction lives for the session
+ cbz_temp_root = getattr(self, 'cbz_temp_root', None)
+ if cbz_temp_root is None:
+ try:
+ import tempfile
+ cbz_temp_root = tempfile.mkdtemp(prefix='glossarion_cbz_')
+ self.cbz_temp_root = cbz_temp_root
+ except Exception:
+ cbz_temp_root = None
+
+ for filename in sorted(os.listdir(folder)):
+ filepath = os.path.join(folder, filename)
+ if not os.path.isfile(filepath):
+ continue
+ lower = filename.lower()
+ if any(lower.endswith(ext) for ext in image_extensions):
+ if filepath not in self.selected_files:
+ self.selected_files.append(filepath)
+ self.file_listbox.addItem(filename)
+ elif lower.endswith(cbz_ext):
+ # Extract images from CBZ archive
+ try:
+ import zipfile, shutil
+ base = os.path.splitext(os.path.basename(filepath))[0]
+ extract_dir = os.path.join(self.cbz_temp_root or folder, base)
+ os.makedirs(extract_dir, exist_ok=True)
+ with zipfile.ZipFile(filepath, 'r') as zf:
+ zf.extractall(extract_dir)
+ # Initialize CBZ job tracking
+ if not hasattr(self, 'cbz_jobs'):
+ self.cbz_jobs = {}
+ if not hasattr(self, 'cbz_image_to_job'):
+ self.cbz_image_to_job = {}
+ # Prepare output dir next to source CBZ
+ out_dir = os.path.join(os.path.dirname(filepath), f"{base}_translated")
+ self.cbz_jobs[filepath] = {
+ 'extract_dir': extract_dir,
+ 'out_dir': out_dir,
+ }
+ # Collect all images recursively
+ added = 0
+ for root, _, files_in_dir in os.walk(extract_dir):
+ for fn in sorted(files_in_dir):
+ if fn.lower().endswith(tuple(image_extensions)):
+ target_path = os.path.join(root, fn)
+ if target_path not in self.selected_files:
+ self.selected_files.append(target_path)
+ self.file_listbox.addItem(os.path.basename(target_path))
+ added += 1
+ # Map extracted image to its CBZ job
+ self.cbz_image_to_job[target_path] = filepath
+ self._log(f"📦 Added {added} images from CBZ: {filename}", "info")
+ except Exception as e:
+ self._log(f"❌ Failed to read CBZ {filename}: {e}", "error")
+
+ def _remove_selected(self):
+ """Remove selected files from the list"""
+ selected_items = self.file_listbox.selectedItems()
+
+ if not selected_items:
+ return
+
+ # Remove in reverse order to maintain indices
+ for item in selected_items:
+ row = self.file_listbox.row(item)
+ self.file_listbox.takeItem(row)
+ if 0 <= row < len(self.selected_files):
+ del self.selected_files[row]
+
+ def _clear_all(self):
+ """Clear all files from the list"""
+ self.file_listbox.clear()
+ self.selected_files.clear()
+
+ def _finalize_cbz_jobs(self):
+ """Package translated outputs back into .cbz for each imported CBZ.
+ - Always creates a CLEAN archive with only final translated pages.
+ - If save_intermediate is enabled in settings, also creates a DEBUG archive that
+ contains the same final pages at root plus debug/raw artifacts under subfolders.
+ """
+ try:
+ if not hasattr(self, 'cbz_jobs') or not self.cbz_jobs:
+ return
+ import zipfile
+ # Read debug flag from settings
+ save_debug = False
+ try:
+ save_debug = bool(self.main_gui.config.get('manga_settings', {}).get('advanced', {}).get('save_intermediate', False))
+ except Exception:
+ save_debug = False
+ image_exts = ('.png', '.jpg', '.jpeg', '.webp', '.bmp', '.gif')
+ text_exts = ('.txt', '.json', '.csv', '.log')
+ excluded_patterns = ('_mask', '_overlay', '_debug', '_raw', '_ocr', '_regions', '_chunk', '_clean', '_cleaned', '_inpaint', '_inpainted')
+
+ for cbz_path, job in self.cbz_jobs.items():
+ out_dir = job.get('out_dir')
+ if not out_dir or not os.path.isdir(out_dir):
+ continue
+ parent = os.path.dirname(cbz_path)
+ base = os.path.splitext(os.path.basename(cbz_path))[0]
+
+ # Compute original basenames from extracted images mapping
+ original_basenames = set()
+ try:
+ if hasattr(self, 'cbz_image_to_job'):
+ for img_path, job_path in self.cbz_image_to_job.items():
+ if job_path == cbz_path:
+ original_basenames.add(os.path.basename(img_path))
+ except Exception:
+ pass
+
+ # Helper to iterate files in out_dir
+ all_files = []
+ for root, _, files in os.walk(out_dir):
+ for fn in files:
+ fp = os.path.join(root, fn)
+ rel = os.path.relpath(fp, out_dir)
+ all_files.append((fp, rel, fn))
+
+ # 1) CLEAN ARCHIVE: only final images matching original basenames
+ clean_zip = os.path.join(parent, f"{base}_translated.cbz")
+ clean_count = 0
+ with zipfile.ZipFile(clean_zip, 'w', zipfile.ZIP_DEFLATED) as zf:
+ for fp, rel, fn in all_files:
+ fn_lower = fn.lower()
+ if not fn_lower.endswith(image_exts):
+ continue
+ if original_basenames and fn not in original_basenames:
+ # Only include pages corresponding to original entries
+ continue
+ # Also skip obvious debug artifacts by pattern (extra safeguard)
+ if any(p in fn_lower for p in excluded_patterns):
+ continue
+ zf.write(fp, fn) # place at root with page filename
+ clean_count += 1
+ self._log(f"📦 Compiled CLEAN {clean_count} pages into {os.path.basename(clean_zip)}", "success")
+
+ # 2) DEBUG ARCHIVE: include final pages + extras under subfolders
+ if save_debug:
+ debug_zip = os.path.join(parent, f"{base}_translated_debug.cbz")
+ dbg_count = 0
+ raw_count = 0
+ page_count = 0
+ with zipfile.ZipFile(debug_zip, 'w', zipfile.ZIP_DEFLATED) as zf:
+ for fp, rel, fn in all_files:
+ fn_lower = fn.lower()
+ # Final pages at root
+ if fn_lower.endswith(image_exts) and (not original_basenames or fn in original_basenames) and not any(p in fn_lower for p in excluded_patterns):
+ zf.write(fp, fn)
+ page_count += 1
+ continue
+ # Raw text/logs
+ if fn_lower.endswith(text_exts):
+ zf.write(fp, os.path.join('raw', rel))
+ raw_count += 1
+ continue
+ # Other images or artifacts -> debug/
+ zf.write(fp, os.path.join('debug', rel))
+ dbg_count += 1
+ self._log(f"📦 Compiled DEBUG archive: pages={page_count}, debug_files={dbg_count}, raw={raw_count} -> {os.path.basename(debug_zip)}", "info")
+ except Exception as e:
+ self._log(f"⚠️ Failed to compile CBZ packages: {e}", "warning")
+
+ def _attach_logging_bridge(self):
+ """Attach a root logging handler that forwards records into the GUI log."""
+ try:
+ if getattr(self, '_gui_log_handler', None) is None:
+ handler = _MangaGuiLogHandler(self, level=logging.INFO)
+ root_logger = logging.getLogger()
+ # Avoid duplicates
+ if all(not isinstance(h, _MangaGuiLogHandler) for h in root_logger.handlers):
+ root_logger.addHandler(handler)
+ self._gui_log_handler = handler
+ # Ensure common module loggers propagate
+ for name in ['bubble_detector', 'local_inpainter', 'manga_translator']:
+ try:
+ lg = logging.getLogger(name)
+ lg.setLevel(logging.INFO)
+ lg.propagate = True
+ except Exception:
+ pass
+ except Exception:
+ pass
+
+ def _redirect_stderr(self, enable: bool):
+ """Temporarily redirect stderr to the GUI log (captures tqdm/HF progress)."""
+ try:
+ if enable:
+ if not hasattr(self, '_old_stderr') or self._old_stderr is None:
+ self._old_stderr = sys.stderr
+ sys.stderr = _StreamToGuiLog(lambda s: self._log(s, 'info'))
+ self._stderr_redirect_on = True
+ else:
+ if hasattr(self, '_old_stderr') and self._old_stderr is not None:
+ sys.stderr = self._old_stderr
+ self._old_stderr = None
+ self._stderr_redirect_on = False
+ # Update combined flag to avoid double-forwarding with logging handler
+ self._stdio_redirect_active = bool(self._stdout_redirect_on or self._stderr_redirect_on)
+ except Exception:
+ pass
+
+ def _redirect_stdout(self, enable: bool):
+ """Temporarily redirect stdout to the GUI log."""
+ try:
+ if enable:
+ if not hasattr(self, '_old_stdout') or self._old_stdout is None:
+ self._old_stdout = sys.stdout
+ sys.stdout = _StreamToGuiLog(lambda s: self._log(s, 'info'))
+ self._stdout_redirect_on = True
+ else:
+ if hasattr(self, '_old_stdout') and self._old_stdout is not None:
+ sys.stdout = self._old_stdout
+ self._old_stdout = None
+ self._stdout_redirect_on = False
+ # Update combined flag to avoid double-forwarding with logging handler
+ self._stdio_redirect_active = bool(self._stdout_redirect_on or self._stderr_redirect_on)
+ except Exception:
+ pass
+
+ def _log(self, message: str, level: str = "info"):
+ """Log message to GUI text widget or console with enhanced stop suppression"""
+ # Enhanced stop suppression - allow only essential stop confirmation messages
+ if self._is_stop_requested() or self.is_globally_cancelled():
+ # Only allow very specific stop confirmation messages - nothing else
+ essential_stop_keywords = [
+ "⏹️ Translation stopped by user",
+ "🧹 Cleaning up models to free RAM",
+ "✅ Model cleanup complete - RAM should be freed",
+ "✅ All models cleaned up - RAM freed!"
+ ]
+ # Suppress ALL other messages when stopped - be very restrictive
+ if not any(keyword in message for keyword in essential_stop_keywords):
+ return
+
+ # Lightweight deduplication: ignore identical lines within a short interval
+ try:
+ now = time.time()
+ last_msg = getattr(self, '_last_log_msg', None)
+ last_ts = getattr(self, '_last_log_time', 0)
+ if last_msg == message and (now - last_ts) < 0.7:
+ return
+ except Exception:
+ pass
+
+ # Store in persistent log (thread-safe)
+ try:
+ with MangaTranslationTab._persistent_log_lock:
+ # Keep only last 1000 messages to avoid unbounded growth
+ if len(MangaTranslationTab._persistent_log) >= 1000:
+ MangaTranslationTab._persistent_log.pop(0)
+ MangaTranslationTab._persistent_log.append((message, level))
+ except Exception:
+ pass
+
+ # Check if log_text widget exists yet
+ if hasattr(self, 'log_text') and self.log_text:
+ # Thread-safe logging to GUI
+ if threading.current_thread() == threading.main_thread():
+ # We're in the main thread, update directly
+ try:
+ # PySide6 QTextEdit - append with color
+ color_map = {
+ 'info': 'white',
+ 'success': 'green',
+ 'warning': 'orange',
+ 'error': 'red',
+ 'debug': 'lightblue'
+ }
+ color = color_map.get(level, 'white')
+ self.log_text.setTextColor(QColor(color))
+ self.log_text.append(message)
+ except Exception:
+ pass
+ else:
+ # We're in a background thread, use queue
+ self.update_queue.put(('log', message, level))
+ else:
+ # Widget doesn't exist yet or we're in initialization, print to console
+ print(message)
+
+ # Update deduplication state
+ try:
+ self._last_log_msg = message
+ self._last_log_time = time.time()
+ except Exception:
+ pass
+
+ def _update_progress(self, current: int, total: int, status: str):
+ """Thread-safe progress update"""
+ self.update_queue.put(('progress', current, total, status))
+
+ def _update_current_file(self, filename: str):
+ """Thread-safe current file update"""
+ self.update_queue.put(('current_file', filename))
+
+ def _start_startup_heartbeat(self):
+ """Show a small spinner in the progress label during startup so there is no silence."""
+ try:
+ self._startup_heartbeat_running = True
+ self._heartbeat_idx = 0
+ chars = ['|', '/', '-', '\\']
+ def tick():
+ if not getattr(self, '_startup_heartbeat_running', False):
+ return
+ try:
+ c = chars[self._heartbeat_idx % len(chars)]
+ if hasattr(self, 'progress_label'):
+ self.progress_label.setText(f"Starting… {c}")
+ self.progress_label.setStyleSheet("color: white;")
+ # Force update to ensure it's visible
+ from PySide6.QtWidgets import QApplication
+ QApplication.processEvents()
+ except Exception:
+ pass
+ self._heartbeat_idx += 1
+ # Schedule next tick with QTimer - only if still running
+ if getattr(self, '_startup_heartbeat_running', False):
+ QTimer.singleShot(250, tick)
+ # Kick off
+ QTimer.singleShot(0, tick)
+ except Exception:
+ pass
+
+ def _stop_startup_heartbeat(self):
+ """Stop the startup heartbeat spinner"""
+ try:
+ self._startup_heartbeat_running = False
+ # Clear the spinner text immediately
+ if hasattr(self, 'progress_label') and self.progress_label:
+ self.progress_label.setText("Initializing...")
+ self.progress_label.setStyleSheet("color: white;")
+ except Exception:
+ pass
+
+ def _process_updates(self):
+ """Process queued GUI updates"""
+ try:
+ while True:
+ update = self.update_queue.get_nowait()
+
+ if update[0] == 'log':
+ _, message, level = update
+ try:
+ # PySide6 QTextEdit
+ color_map = {
+ 'info': 'white',
+ 'success': 'green',
+ 'warning': 'orange',
+ 'error': 'red',
+ 'debug': 'lightblue'
+ }
+ color = color_map.get(level, 'white')
+ self.log_text.setTextColor(QColor(color))
+ self.log_text.append(message)
+ except Exception:
+ pass
+
+ elif update[0] == 'progress':
+ _, current, total, status = update
+ if total > 0:
+ percentage = (current / total) * 100
+ self.progress_bar.setValue(int(percentage))
+
+ # Check if this is a stopped status and style accordingly
+ if "stopped" in status.lower() or "cancelled" in status.lower():
+ # Make the status more prominent for stopped translations
+ self.progress_label.setText(f"⏹️ {status}")
+ self.progress_label.setStyleSheet("color: orange;")
+ elif "complete" in status.lower() or "finished" in status.lower():
+ # Success status
+ self.progress_label.setText(f"✅ {status}")
+ self.progress_label.setStyleSheet("color: green;")
+ elif "error" in status.lower() or "failed" in status.lower():
+ # Error status
+ self.progress_label.setText(f"❌ {status}")
+ self.progress_label.setStyleSheet("color: red;")
+ else:
+ # Normal status - white for dark mode
+ self.progress_label.setText(status)
+ self.progress_label.setStyleSheet("color: white;")
+
+ elif update[0] == 'current_file':
+ _, filename = update
+ # Style the current file display based on the status
+ if "stopped" in filename.lower() or "cancelled" in filename.lower():
+ self.current_file_label.setText(f"⏹️ {filename}")
+ self.current_file_label.setStyleSheet("color: orange;")
+ elif "complete" in filename.lower() or "finished" in filename.lower():
+ self.current_file_label.setText(f"✅ {filename}")
+ self.current_file_label.setStyleSheet("color: green;")
+ elif "error" in filename.lower() or "failed" in filename.lower():
+ self.current_file_label.setText(f"❌ {filename}")
+ self.current_file_label.setStyleSheet("color: red;")
+ else:
+ self.current_file_label.setText(f"Current: {filename}")
+ self.current_file_label.setStyleSheet("color: lightgray;")
+
+ elif update[0] == 'ui_state':
+ _, state = update
+ if state == 'translation_started':
+ try:
+ if hasattr(self, 'start_button') and self.start_button:
+ self.start_button.setEnabled(False)
+ if hasattr(self, 'stop_button') and self.stop_button:
+ self.stop_button.setEnabled(True)
+ if hasattr(self, 'file_listbox') and self.file_listbox:
+ self.file_listbox.setEnabled(False)
+ except Exception:
+ pass
+
+ elif update[0] == 'call_method':
+ # Call a method on the main thread
+ _, method, args = update
+ try:
+ method(*args)
+ except Exception as e:
+ import traceback
+ print(f"Error calling method {method}: {e}")
+ print(traceback.format_exc())
+
+ except Exception:
+ # Queue is empty or some other exception
+ pass
+
+ # Schedule next update with QTimer
+ QTimer.singleShot(100, self._process_updates)
+
+ def load_local_inpainting_model(self, model_path):
+ """Load a local inpainting model
+
+ Args:
+ model_path: Path to the model file
+
+ Returns:
+ bool: True if successful
+ """
+ try:
+ # Store the model path
+ self.local_inpaint_model_path = model_path
+
+ # If using diffusers/torch models, load them here
+ if model_path.endswith('.safetensors') or model_path.endswith('.ckpt'):
+ # Initialize your inpainting pipeline
+ # This depends on your specific inpainting implementation
+ # Example:
+ # from diffusers import StableDiffusionInpaintPipeline
+ # self.inpaint_pipeline = StableDiffusionInpaintPipeline.from_single_file(model_path)
+ pass
+
+ return True
+ except Exception as e:
+ self._log(f"Failed to load inpainting model: {e}", "error")
+ return False
+
+ def _start_translation(self):
+ """Start the translation process"""
+ # Check files BEFORE redirecting stdout to avoid deadlock
+ if not self.selected_files:
+ from PySide6.QtWidgets import QMessageBox
+ QMessageBox.warning(self.dialog, "No Files", "Please select manga images to translate.")
+ return
+
+ # Immediately disable Start to prevent double-clicks
+ try:
+ if hasattr(self, 'start_button') and self.start_button:
+ self.start_button.setEnabled(False)
+ except Exception:
+ pass
+
+ # Immediate minimal feedback using direct log append
+ try:
+ if hasattr(self, 'log_text') and self.log_text:
+ from PySide6.QtGui import QColor
+ self.log_text.setTextColor(QColor('white'))
+ self.log_text.append("Starting translation...")
+ except Exception:
+ pass
+
+ # Start heartbeat spinner so there's visible activity until logs stream
+ self._start_startup_heartbeat()
+
+ # Reset all stop flags at the start of new translation
+ self.is_running = False
+ if hasattr(self, 'stop_flag'):
+ self.stop_flag.clear()
+ self._reset_global_cancellation()
+
+ # Log start directly to GUI
+ try:
+ if hasattr(self, 'log_text') and self.log_text:
+ from PySide6.QtGui import QColor, QTextCursor
+ from PySide6.QtCore import QTimer
+ self.log_text.setTextColor(QColor('white'))
+ self.log_text.append("🚀 Starting new manga translation batch")
+
+ # Scroll to bottom after a short delay to ensure it happens after button processing
+ def scroll_to_bottom():
+ try:
+ if hasattr(self, 'log_text') and self.log_text:
+ self.log_text.moveCursor(QTextCursor.End)
+ self.log_text.ensureCursorVisible()
+ # Also scroll the parent scroll area if it exists
+ if hasattr(self, 'scroll_area') and self.scroll_area:
+ scrollbar = self.scroll_area.verticalScrollBar()
+ if scrollbar:
+ scrollbar.setValue(scrollbar.maximum())
+ except Exception:
+ pass
+
+ # Schedule scroll with a small delay
+ QTimer.singleShot(50, scroll_to_bottom)
+ QTimer.singleShot(150, scroll_to_bottom) # Second attempt to be sure
+ except Exception:
+ pass
+
+ # Force GUI update
+ try:
+ from PySide6.QtWidgets import QApplication
+ QApplication.processEvents()
+ except Exception:
+ pass
+
+ # Run the heavy preparation and kickoff in a background thread to avoid GUI freeze
+ threading.Thread(target=self._start_translation_heavy, name="MangaStartHeavy", daemon=True).start()
+ return
+
+ def _start_translation_heavy(self):
+ """Heavy part of start: build configs, init client/translator, and launch worker (runs off-main-thread)."""
+ try:
+ # Set thread limits based on parallel processing settings
+ try:
+ advanced = self.main_gui.config.get('manga_settings', {}).get('advanced', {})
+ parallel_enabled = advanced.get('parallel_processing', False)
+
+ if parallel_enabled:
+ # Allow multiple threads for parallel processing
+ num_threads = advanced.get('max_workers', 4)
+ import os
+ os.environ['OMP_NUM_THREADS'] = str(num_threads)
+ os.environ['MKL_NUM_THREADS'] = str(num_threads)
+ os.environ['OPENBLAS_NUM_THREADS'] = str(num_threads)
+ os.environ['NUMEXPR_NUM_THREADS'] = str(num_threads)
+ os.environ['VECLIB_MAXIMUM_THREADS'] = str(num_threads)
+ os.environ['ONNXRUNTIME_NUM_THREADS'] = str(num_threads)
+ try:
+ import torch
+ torch.set_num_threads(num_threads)
+ except ImportError:
+ pass
+ try:
+ import cv2
+ cv2.setNumThreads(num_threads)
+ except (ImportError, AttributeError):
+ pass
+ self._log(f"⚡ Thread limit: {num_threads} threads (parallel processing enabled)", "debug")
+ else:
+ # HARDCODED: Limit to exactly 1 thread for sequential processing
+ import os
+ os.environ['OMP_NUM_THREADS'] = '1'
+ os.environ['MKL_NUM_THREADS'] = '1'
+ os.environ['OPENBLAS_NUM_THREADS'] = '1'
+ os.environ['NUMEXPR_NUM_THREADS'] = '1'
+ os.environ['VECLIB_MAXIMUM_THREADS'] = '1'
+ os.environ['ONNXRUNTIME_NUM_THREADS'] = '1'
+ try:
+ import torch
+ torch.set_num_threads(1) # Hardcoded to 1
+ except ImportError:
+ pass
+ try:
+ import cv2
+ cv2.setNumThreads(1) # Limit OpenCV to 1 thread
+ except (ImportError, AttributeError):
+ pass
+ self._log("⚡ Thread limit: 1 thread (sequential processing)", "debug")
+ except Exception as e:
+ self._log(f"⚠️ Warning: Could not set thread limits: {e}", "warning")
+
+ # Early feedback
+ self._log("⏳ Preparing configuration...", "info")
+ # Build OCR configuration
+ ocr_config = {'provider': self.ocr_provider_value}
+
+ if ocr_config['provider'] == 'Qwen2-VL':
+ qwen_provider = self.ocr_manager.get_provider('Qwen2-VL')
+ if qwen_provider:
+ # Set model size configuration
+ if hasattr(qwen_provider, 'loaded_model_size'):
+ if qwen_provider.loaded_model_size == "Custom":
+ ocr_config['model_size'] = f"custom:{qwen_provider.model_id}"
+ else:
+ size_map = {'2B': '1', '7B': '2', '72B': '3'}
+ ocr_config['model_size'] = size_map.get(qwen_provider.loaded_model_size, '2')
+ self._log(f"Setting ocr_config['model_size'] = {ocr_config['model_size']}", "info")
+
+ # Set OCR prompt if available
+ if hasattr(self, 'ocr_prompt'):
+ # Set it via environment variable (Qwen2VL will read this)
+ os.environ['OCR_SYSTEM_PROMPT'] = self.ocr_prompt
+
+ # Also set it directly on the provider if it has the method
+ if hasattr(qwen_provider, 'set_ocr_prompt'):
+ qwen_provider.set_ocr_prompt(self.ocr_prompt)
+ else:
+ # If no setter method, set it directly
+ qwen_provider.ocr_prompt = self.ocr_prompt
+
+ self._log("✅ Set custom OCR prompt for Qwen2-VL", "info")
+
+ elif ocr_config['provider'] == 'google':
+ import os
+ google_creds = self.main_gui.config.get('google_vision_credentials', '') or self.main_gui.config.get('google_cloud_credentials', '')
+ if not google_creds or not os.path.exists(google_creds):
+ self._log("❌ Google Cloud Vision credentials not found. Please set up credentials in the main settings.", "error")
+ self._stop_startup_heartbeat()
+ self._reset_ui_state()
+ return
+ ocr_config['google_credentials_path'] = google_creds
+
+ elif ocr_config['provider'] == 'azure':
+ # Support both PySide6 QLineEdit (.text()) and Tkinter Entry (.get())
+ if hasattr(self.azure_key_entry, 'text'):
+ azure_key = self.azure_key_entry.text().strip()
+ elif hasattr(self.azure_key_entry, 'get'):
+ azure_key = self.azure_key_entry.get().strip()
+ else:
+ azure_key = ''
+ if hasattr(self.azure_endpoint_entry, 'text'):
+ azure_endpoint = self.azure_endpoint_entry.text().strip()
+ elif hasattr(self.azure_endpoint_entry, 'get'):
+ azure_endpoint = self.azure_endpoint_entry.get().strip()
+ else:
+ azure_endpoint = ''
+
+ if not azure_key or not azure_endpoint:
+ self._log("❌ Azure credentials not configured.", "error")
+ self._stop_startup_heartbeat()
+ self._reset_ui_state()
+ return
+
+ # Save Azure settings
+ self.main_gui.config['azure_vision_key'] = azure_key
+ self.main_gui.config['azure_vision_endpoint'] = azure_endpoint
+ if hasattr(self.main_gui, 'save_config'):
+ self.main_gui.save_config(show_message=False)
+
+ ocr_config['azure_key'] = azure_key
+ ocr_config['azure_endpoint'] = azure_endpoint
+
+ # Get current API key and model for translation
+ api_key = None
+ model = 'gemini-2.5-flash' # default
+
+ # Try to get API key from various sources (support PySide6 and Tkinter widgets)
+ if hasattr(self.main_gui, 'api_key_entry'):
+ try:
+ if hasattr(self.main_gui.api_key_entry, 'text'):
+ api_key_candidate = self.main_gui.api_key_entry.text()
+ elif hasattr(self.main_gui.api_key_entry, 'get'):
+ api_key_candidate = self.main_gui.api_key_entry.get()
+ else:
+ api_key_candidate = ''
+ if api_key_candidate and api_key_candidate.strip():
+ api_key = api_key_candidate.strip()
+ except Exception:
+ pass
+ if not api_key and hasattr(self.main_gui, 'config') and self.main_gui.config.get('api_key'):
+ api_key = self.main_gui.config.get('api_key')
+
+ # Try to get model - ALWAYS get the current selection from GUI
+ if hasattr(self.main_gui, 'model_var'):
+ model = self.main_gui.model_var.get()
+ elif hasattr(self.main_gui, 'config') and self.main_gui.config.get('model'):
+ model = self.main_gui.config.get('model')
+
+ if not api_key:
+ self._log("❌ API key not found. Please configure your API key in the main settings.", "error")
+ self._stop_startup_heartbeat()
+ self._reset_ui_state()
+ return
+
+ # Check if we need to create or update the client
+ needs_new_client = False
+ self._log("🔎 Checking API client...", "debug")
+
+ if not hasattr(self.main_gui, 'client') or not self.main_gui.client:
+ needs_new_client = True
+ self._log(f"🛠 Creating new API client with model: {model}", "info")
+ elif hasattr(self.main_gui.client, 'model') and self.main_gui.client.model != model:
+ needs_new_client = True
+ self._log(f"🛠 Model changed from {self.main_gui.client.model} to {model}, creating new client", "info")
+ else:
+ self._log("♻️ Reusing existing API client", "debug")
+
+ if needs_new_client:
+ # Apply multi-key settings from config so UnifiedClient picks them up
+ try:
+ import os # Import os here
+ use_mk = bool(self.main_gui.config.get('use_multi_api_keys', False))
+ mk_list = self.main_gui.config.get('multi_api_keys', [])
+ if use_mk and mk_list:
+ os.environ['USE_MULTI_API_KEYS'] = '1'
+ os.environ['USE_MULTI_KEYS'] = '1' # backward-compat for retry paths
+ os.environ['MULTI_API_KEYS'] = json.dumps(mk_list)
+ os.environ['FORCE_KEY_ROTATION'] = '1' if self.main_gui.config.get('force_key_rotation', True) else '0'
+ os.environ['ROTATION_FREQUENCY'] = str(self.main_gui.config.get('rotation_frequency', 1))
+ self._log("🔑 Multi-key mode ENABLED for manga translator", "info")
+ else:
+ # Explicitly disable if not configured
+ os.environ['USE_MULTI_API_KEYS'] = '0'
+ os.environ['USE_MULTI_KEYS'] = '0'
+ # Fallback keys (optional)
+ if self.main_gui.config.get('use_fallback_keys', False):
+ os.environ['USE_FALLBACK_KEYS'] = '1'
+ os.environ['FALLBACK_KEYS'] = json.dumps(self.main_gui.config.get('fallback_keys', []))
+ else:
+ os.environ['USE_FALLBACK_KEYS'] = '0'
+ os.environ['FALLBACK_KEYS'] = '[]'
+ except Exception as env_err:
+ self._log(f"⚠️ Failed to apply multi-key settings: {env_err}", "warning")
+
+ # Create the unified client with the current model
+ try:
+ from unified_api_client import UnifiedClient
+ self._log("⏳ Creating API client (network/model handshake)...", "debug")
+ self.main_gui.client = UnifiedClient(model=model, api_key=api_key)
+ self._log(f"✅ API client ready (model: {model})", "info")
+ try:
+ time.sleep(0.05)
+ except Exception:
+ pass
+ except Exception as e:
+ self._log(f"❌ Failed to create API client: {str(e)}", "error")
+ import traceback
+ self._log(traceback.format_exc(), "debug")
+ self._stop_startup_heartbeat()
+ self._reset_ui_state()
+ return
+
+ # Reset the translator's history manager for new batch
+ if hasattr(self, 'translator') and self.translator and hasattr(self.translator, 'reset_history_manager'):
+ self.translator.reset_history_manager()
+
+ # Set environment variables for custom-api provider
+ if ocr_config['provider'] == 'custom-api':
+ import os # Import os for environment variables
+ env_vars = self.main_gui._get_environment_variables(
+ epub_path='', # Not needed for manga
+ api_key=api_key
+ )
+
+ # Apply all environment variables EXCEPT SYSTEM_PROMPT
+ for key, value in env_vars.items():
+ if key == 'SYSTEM_PROMPT':
+ # DON'T SET THE TRANSLATION SYSTEM PROMPT FOR OCR
+ continue
+ os.environ[key] = str(value)
+
+ # Set a VERY EXPLICIT OCR prompt that OpenAI can't ignore
+ os.environ['OCR_SYSTEM_PROMPT'] = (
+ "YOU ARE AN OCR SYSTEM. YOUR ONLY JOB IS TEXT EXTRACTION.\n\n"
+ "CRITICAL RULES:\n"
+ "1. DO NOT TRANSLATE ANYTHING\n"
+ "2. DO NOT MODIFY THE TEXT\n"
+ "3. DO NOT EXPLAIN OR COMMENT\n"
+ "4. ONLY OUTPUT THE EXACT TEXT YOU SEE\n"
+ "5. PRESERVE NATURAL TEXT FLOW - DO NOT ADD UNNECESSARY LINE BREAKS\n\n"
+ "If you see Korean text, output it in Korean.\n"
+ "If you see Japanese text, output it in Japanese.\n"
+ "If you see Chinese text, output it in Chinese.\n"
+ "If you see English text, output it in English.\n\n"
+ "IMPORTANT: Only use line breaks where they naturally occur in the original text "
+ "(e.g., between dialogue lines or paragraphs). Do not break text mid-sentence or "
+ "between every word/character.\n\n"
+ "For vertical text common in manga/comics, transcribe it as a continuous line unless "
+ "there are clear visual breaks.\n\n"
+ "NEVER translate. ONLY extract exactly what is written.\n"
+ "Output ONLY the raw text, nothing else."
+ )
+
+ self._log("✅ Set environment variables for custom-api OCR (excluded SYSTEM_PROMPT)")
+
+ # Respect user settings: only set default detector values when bubble detection is OFF.
+ try:
+ ms = self.main_gui.config.setdefault('manga_settings', {})
+ ocr_set = ms.setdefault('ocr', {})
+ changed = False
+ bubble_enabled = bool(ocr_set.get('bubble_detection_enabled', False))
+
+ if not bubble_enabled:
+ # User has bubble detection OFF -> set non-intrusive defaults only
+ if 'detector_type' not in ocr_set:
+ ocr_set['detector_type'] = 'rtdetr_onnx'
+ changed = True
+ if not ocr_set.get('rtdetr_model_url') and not ocr_set.get('bubble_model_path'):
+ # Default HF repo (detector.onnx lives here)
+ ocr_set['rtdetr_model_url'] = 'ogkalu/comic-text-and-bubble-detector'
+ changed = True
+ if changed and hasattr(self.main_gui, 'save_config'):
+ self.main_gui.save_config(show_message=False)
+ # Do not preload bubble detector for custom-api here; it will load on use or via panel preloading
+ self._preloaded_bd = None
+ except Exception:
+ self._preloaded_bd = None
+ except Exception as e:
+ # Surface any startup error and reset UI so the app doesn't look stuck
+ try:
+ import traceback
+ self._log(f"❌ Startup error: {e}", "error")
+ self._log(traceback.format_exc(), "debug")
+ except Exception:
+ pass
+ self._stop_startup_heartbeat()
+ self._reset_ui_state()
+ return
+
+ # Initialize translator if needed (or if it was reset or client was cleared during shutdown)
+ needs_new_translator = (not hasattr(self, 'translator')) or (self.translator is None)
+ if not needs_new_translator:
+ try:
+ needs_new_translator = getattr(self.translator, 'client', None) is None
+ if needs_new_translator:
+ self._log("♻️ Translator exists but client was cleared — reinitializing translator", "debug")
+ except Exception:
+ needs_new_translator = True
+ if needs_new_translator:
+ self._log("⚙️ Initializing translator...", "info")
+
+ # CRITICAL: Set batch environment variables BEFORE creating translator
+ # This ensures MangaTranslator picks up the batch settings on initialization
+ try:
+ # Get batch translation setting from main GUI
+ batch_translation_enabled = False
+ batch_size_value = 1
+
+ if hasattr(self.main_gui, 'batch_translation_var'):
+ # Check if batch translation is enabled in GUI
+ try:
+ if hasattr(self.main_gui.batch_translation_var, 'get'):
+ batch_translation_enabled = bool(self.main_gui.batch_translation_var.get())
+ else:
+ batch_translation_enabled = bool(self.main_gui.batch_translation_var)
+ except Exception:
+ pass
+
+ if hasattr(self.main_gui, 'batch_size_var'):
+ # Get batch size from GUI
+ try:
+ if hasattr(self.main_gui.batch_size_var, 'get'):
+ batch_size_value = int(self.main_gui.batch_size_var.get())
+ else:
+ batch_size_value = int(self.main_gui.batch_size_var)
+ except Exception:
+ batch_size_value = 1
+
+ # Set environment variables for the translator to pick up
+ if batch_translation_enabled:
+ os.environ['BATCH_TRANSLATION'] = '1'
+ os.environ['BATCH_SIZE'] = str(max(1, batch_size_value))
+ self._log(f"📦 Batch Translation ENABLED: {batch_size_value} concurrent API calls", "info")
+ else:
+ os.environ['BATCH_TRANSLATION'] = '0'
+ os.environ['BATCH_SIZE'] = '1'
+ self._log("📦 Batch Translation DISABLED: Sequential API calls", "info")
+ except Exception as e:
+ self._log(f"⚠️ Warning: Could not set batch settings: {e}", "warning")
+ os.environ['BATCH_TRANSLATION'] = '0'
+ os.environ['BATCH_SIZE'] = '1'
+
+ try:
+ self.translator = MangaTranslator(
+ ocr_config,
+ self.main_gui.client,
+ self.main_gui,
+ log_callback=self._log
+ )
+
+ # Fix 4: Safely set OCR manager
+ if hasattr(self, 'ocr_manager'):
+ self.translator.ocr_manager = self.ocr_manager
+ else:
+ from ocr_manager import OCRManager
+ self.ocr_manager = OCRManager(log_callback=self._log)
+ self.translator.ocr_manager = self.ocr_manager
+
+ # Attach preloaded RT-DETR if available
+ try:
+ if hasattr(self, '_preloaded_bd') and self._preloaded_bd:
+ self.translator.bubble_detector = self._preloaded_bd
+ self._log("🤖 RT-DETR preloaded and attached to translator", "debug")
+ except Exception:
+ pass
+
+ # Distribute stop flags to all components
+ self._distribute_stop_flags()
+
+ # Provide Replicate API key to translator if present, but DO NOT force-enable cloud mode here.
+ # Actual inpainting mode is chosen by the UI and applied in _apply_rendering_settings.
+ saved_api_key = self.main_gui.config.get('replicate_api_key', '')
+ if saved_api_key:
+ self.translator.replicate_api_key = saved_api_key
+
+ # Apply text rendering settings (this sets skip/cloud/local based on UI)
+ self._apply_rendering_settings()
+
+ try:
+ time.sleep(0.05)
+ except Exception:
+ pass
+ self._log("✅ Translator ready", "info")
+
+ except Exception as e:
+ self._log(f"❌ Failed to initialize translator: {str(e)}", "error")
+ import traceback
+ self._log(traceback.format_exc(), "error")
+ self._stop_startup_heartbeat()
+ self._reset_ui_state()
+ return
+ else:
+ # Update batch settings for existing translator
+ try:
+ batch_translation_enabled = False
+ batch_size_value = 1
+
+ if hasattr(self.main_gui, 'batch_translation_var'):
+ try:
+ if hasattr(self.main_gui.batch_translation_var, 'get'):
+ batch_translation_enabled = bool(self.main_gui.batch_translation_var.get())
+ else:
+ batch_translation_enabled = bool(self.main_gui.batch_translation_var)
+ except Exception:
+ pass
+
+ if hasattr(self.main_gui, 'batch_size_var'):
+ try:
+ if hasattr(self.main_gui.batch_size_var, 'get'):
+ batch_size_value = int(self.main_gui.batch_size_var.get())
+ else:
+ batch_size_value = int(self.main_gui.batch_size_var)
+ except Exception:
+ batch_size_value = 1
+
+ # Update environment variables and translator attributes
+ if batch_translation_enabled:
+ os.environ['BATCH_TRANSLATION'] = '1'
+ os.environ['BATCH_SIZE'] = str(max(1, batch_size_value))
+ self.translator.batch_mode = True
+ self.translator.batch_size = max(1, batch_size_value)
+ self._log(f"📦 Batch Translation UPDATED: {batch_size_value} concurrent API calls", "info")
+ else:
+ os.environ['BATCH_TRANSLATION'] = '0'
+ os.environ['BATCH_SIZE'] = '1'
+ self.translator.batch_mode = False
+ self.translator.batch_size = 1
+ self._log("📦 Batch Translation UPDATED: Sequential API calls", "info")
+ except Exception as e:
+ self._log(f"⚠️ Warning: Could not update batch settings: {e}", "warning")
+
+ # Update the translator with the new client if model changed
+ if needs_new_client and hasattr(self.translator, 'client'):
+ self.translator.client = self.main_gui.client
+ self._log(f"Updated translator with new API client", "info")
+
+ # Distribute stop flags to all components
+ self._distribute_stop_flags()
+
+ # Update rendering settings
+ self._apply_rendering_settings()
+
+ # Ensure inpainting settings are properly synchronized
+ if hasattr(self, 'inpainting_mode_var'):
+ inpainting_mode = self.inpainting_mode_var.get()
+
+ if inpainting_mode == 'skip':
+ self.translator.skip_inpainting = True
+ self.translator.use_cloud_inpainting = False
+ self._log("Inpainting: SKIP", "debug")
+
+ elif inpainting_mode == 'local':
+ self.translator.skip_inpainting = False
+ self.translator.use_cloud_inpainting = False
+
+ # IMPORTANT: Load the local inpainting model if not already loaded
+ if hasattr(self, 'local_model_var'):
+ selected_model = self.local_model_var.get()
+ if selected_model and selected_model != "None":
+ # Get model path from available models
+ model_info = self.available_local_models.get(selected_model)
+ if model_info:
+ model_path = model_info['path']
+ # Load the model into translator
+ if hasattr(self.translator, 'load_local_inpainting_model'):
+ success = self.translator.load_local_inpainting_model(model_path)
+ if success:
+ self._log(f"Inpainting: LOCAL - Loaded {selected_model}", "info")
+ else:
+ self._log(f"Inpainting: Failed to load local model {selected_model}", "error")
+ else:
+ # Set the model path directly if no load method
+ self.translator.local_inpaint_model_path = model_path
+ self._log(f"Inpainting: LOCAL - Set model path for {selected_model}", "info")
+ else:
+ self._log("Inpainting: LOCAL - No model selected", "warning")
+ else:
+ self._log("Inpainting: LOCAL - No model configured", "warning")
+ else:
+ self._log("Inpainting: LOCAL (default)", "debug")
+
+ elif inpainting_mode == 'cloud':
+ self.translator.skip_inpainting = False
+ saved_api_key = self.main_gui.config.get('replicate_api_key', '')
+ if saved_api_key:
+ self.translator.use_cloud_inpainting = True
+ self.translator.replicate_api_key = saved_api_key
+ self._log("Inpainting: CLOUD (Replicate)", "debug")
+ else:
+ # Fallback to local if no API key
+ self.translator.use_cloud_inpainting = False
+ self._log("Inpainting: LOCAL (no Replicate key, fallback)", "warning")
+ else:
+ # Default to local inpainting if variable doesn't exist
+ self.translator.skip_inpainting = False
+ self.translator.use_cloud_inpainting = False
+ self._log("Inpainting: LOCAL (default)", "debug")
+
+ # Double-check the settings are applied correctly
+ self._log(f"Inpainting final status:", "debug")
+ self._log(f" - Skip: {self.translator.skip_inpainting}", "debug")
+ self._log(f" - Cloud: {self.translator.use_cloud_inpainting}", "debug")
+ self._log(f" - Mode: {'SKIP' if self.translator.skip_inpainting else 'CLOUD' if self.translator.use_cloud_inpainting else 'LOCAL'}", "debug")
+
+ # Preflight RT-DETR to avoid first-page fallback after aggressive cleanup
+ try:
+ ocr_set = self.main_gui.config.get('manga_settings', {}).get('ocr', {}) or {}
+ if ocr_set.get('bubble_detection_enabled', False):
+ # Ensure a default RT-DETR model id exists when required
+ if ocr_set.get('detector_type', 'rtdetr') in ('rtdetr', 'auto'):
+ if not ocr_set.get('rtdetr_model_url') and not ocr_set.get('bubble_model_path'):
+ ocr_set['rtdetr_model_url'] = 'ogkalu/comic-text-and-bubble-detector'
+ if hasattr(self.main_gui, 'save_config'):
+ self.main_gui.save_config(show_message=False)
+ self._preflight_bubble_detector(ocr_set)
+ except Exception:
+ pass
+
+ # Reset progress
+ self.total_files = len(self.selected_files)
+ self.completed_files = 0
+ self.failed_files = 0
+ self.current_file_index = 0
+
+ # Reset all global cancellation flags for new translation
+ self._reset_global_cancellation()
+
+ # Update UI state (PySide6) - queue UI updates for main thread
+ self.is_running = True
+ self.stop_flag.clear()
+ # Queue UI updates to be processed by main thread
+ self.update_queue.put(('ui_state', 'translation_started'))
+
+ # Log start message
+ self._log(f"Starting translation of {self.total_files} files...", "info")
+ self._log(f"Using OCR provider: {ocr_config['provider'].upper()}", "info")
+ if ocr_config['provider'] == 'google':
+ self._log(f"Using Google Vision credentials: {os.path.basename(ocr_config['google_credentials_path'])}", "info")
+ elif ocr_config['provider'] == 'azure':
+ self._log(f"Using Azure endpoint: {ocr_config['azure_endpoint']}", "info")
+ else:
+ self._log(f"Using local OCR provider: {ocr_config['provider'].upper()}", "info")
+ # Report effective API routing/model with multi-key awareness
+ try:
+ c = getattr(self.main_gui, 'client', None)
+ if c is not None:
+ if getattr(c, 'use_multi_keys', False):
+ total_keys = 0
+ try:
+ stats = c.get_stats()
+ total_keys = stats.get('total_keys', 0)
+ except Exception:
+ pass
+ self._log(
+ f"API routing: Multi-key pool enabled — starting model '{getattr(c, 'model', 'unknown')}', keys={total_keys}, rotation={getattr(c, '_rotation_frequency', 1)}",
+ "info"
+ )
+ else:
+ self._log(f"API model: {getattr(c, 'model', 'unknown')}", "info")
+ except Exception:
+ pass
+ self._log(f"Contextual: {'Enabled' if self.main_gui.contextual_var.get() else 'Disabled'}", "info")
+ self._log(f"History limit: {self.main_gui.trans_history.get()} exchanges", "info")
+ self._log(f"Rolling history: {'Enabled' if self.main_gui.translation_history_rolling_var.get() else 'Disabled'}", "info")
+ self._log(f" Full Page Context: {'Enabled' if self.full_page_context_value else 'Disabled'}", "info")
+
+ # Stop heartbeat before launching worker; now regular progress takes over
+ self._stop_startup_heartbeat()
+
+ # Update progress to show we're starting the translation worker
+ self._log("🚀 Launching translation worker...", "info")
+ self._update_progress(0, self.total_files, "Starting translation...")
+
+ # Start translation via executor
+ try:
+ # Sync with main GUI executor if possible and update EXTRACTION_WORKERS
+ if hasattr(self.main_gui, '_ensure_executor'):
+ self.main_gui._ensure_executor()
+ self.executor = self.main_gui.executor
+ # Ensure env var reflects current worker setting from main GUI
+ try:
+ os.environ["EXTRACTION_WORKERS"] = str(self.main_gui.extraction_workers_var.get())
+ except Exception:
+ pass
+
+ if self.executor:
+ self.translation_future = self.executor.submit(self._translation_worker)
+ else:
+ # Fallback to dedicated thread
+ self.translation_thread = threading.Thread(
+ target=self._translation_worker,
+ daemon=True
+ )
+ self.translation_thread.start()
+ except Exception:
+ # Last resort fallback to thread
+ self.translation_thread = threading.Thread(
+ target=self._translation_worker,
+ daemon=True
+ )
+ self.translation_thread.start()
+
+ def _apply_rendering_settings(self):
+ """Apply current rendering settings to translator (PySide6 version)"""
+ if not self.translator:
+ return
+
+ # Read all values from PySide6 widgets to ensure they're current
+ # Background opacity slider
+ if hasattr(self, 'opacity_slider'):
+ self.bg_opacity_value = self.opacity_slider.value()
+
+ # Background reduction slider
+ if hasattr(self, 'reduction_slider'):
+ self.bg_reduction_value = self.reduction_slider.value()
+
+ # Background style (radio buttons)
+ if hasattr(self, 'bg_style_group'):
+ checked_id = self.bg_style_group.checkedId()
+ if checked_id == 0:
+ self.bg_style_value = "box"
+ elif checked_id == 1:
+ self.bg_style_value = "circle"
+ elif checked_id == 2:
+ self.bg_style_value = "wrap"
+
+ # Font selection
+ if hasattr(self, 'font_combo'):
+ selected = self.font_combo.currentText()
+ if selected == "Default":
+ self.selected_font_path = None
+ elif selected in self.font_mapping:
+ self.selected_font_path = self.font_mapping[selected]
+
+ # Text color (stored in value variables updated by color picker)
+ text_color = (
+ self.text_color_r_value,
+ self.text_color_g_value,
+ self.text_color_b_value
+ )
+
+ # Shadow enabled checkbox
+ if hasattr(self, 'shadow_enabled_checkbox'):
+ self.shadow_enabled_value = self.shadow_enabled_checkbox.isChecked()
+
+ # Shadow color (stored in value variables updated by color picker)
+ shadow_color = (
+ self.shadow_color_r_value,
+ self.shadow_color_g_value,
+ self.shadow_color_b_value
+ )
+
+ # Shadow offset spinboxes
+ if hasattr(self, 'shadow_offset_x_spinbox'):
+ self.shadow_offset_x_value = self.shadow_offset_x_spinbox.value()
+ if hasattr(self, 'shadow_offset_y_spinbox'):
+ self.shadow_offset_y_value = self.shadow_offset_y_spinbox.value()
+
+ # Shadow blur spinbox
+ if hasattr(self, 'shadow_blur_spinbox'):
+ self.shadow_blur_value = self.shadow_blur_spinbox.value()
+
+ # Force caps lock checkbox
+ if hasattr(self, 'force_caps_checkbox'):
+ self.force_caps_lock_value = self.force_caps_checkbox.isChecked()
+
+ # Strict text wrapping checkbox
+ if hasattr(self, 'strict_wrap_checkbox'):
+ self.strict_text_wrapping_value = self.strict_wrap_checkbox.isChecked()
+
+ # Font sizing controls
+ if hasattr(self, 'min_size_spinbox'):
+ self.auto_min_size_value = self.min_size_spinbox.value()
+ if hasattr(self, 'max_size_spinbox'):
+ self.max_font_size_value = self.max_size_spinbox.value()
+ if hasattr(self, 'multiplier_slider'):
+ self.font_size_multiplier_value = self.multiplier_slider.value()
+
+ # Determine font size value based on mode
+ if self.font_size_mode_value == 'multiplier':
+ # Pass negative value to indicate multiplier mode
+ font_size = -self.font_size_multiplier_value
+ else:
+ # Fixed mode - use the font size value directly
+ font_size = self.font_size_value if self.font_size_value > 0 else None
+
+ # Apply concise logging toggle from Advanced settings
+ try:
+ adv_cfg = self.main_gui.config.get('manga_settings', {}).get('advanced', {})
+ self.translator.concise_logs = bool(adv_cfg.get('concise_logs', False))
+ except Exception:
+ pass
+
+ # Push rendering settings to translator
+ self.translator.update_text_rendering_settings(
+ bg_opacity=self.bg_opacity_value,
+ bg_style=self.bg_style_value,
+ bg_reduction=self.bg_reduction_value,
+ font_style=self.selected_font_path,
+ font_size=font_size,
+ text_color=text_color,
+ shadow_enabled=self.shadow_enabled_value,
+ shadow_color=shadow_color,
+ shadow_offset_x=self.shadow_offset_x_value,
+ shadow_offset_y=self.shadow_offset_y_value,
+ shadow_blur=self.shadow_blur_value,
+ force_caps_lock=self.force_caps_lock_value
+ )
+
+ # Free-text-only background opacity toggle -> read from checkbox (PySide6)
+ try:
+ if hasattr(self, 'ft_only_checkbox'):
+ ft_only_enabled = self.ft_only_checkbox.isChecked()
+ self.translator.free_text_only_bg_opacity = bool(ft_only_enabled)
+ # Also update the value variable
+ self.free_text_only_bg_opacity_value = ft_only_enabled
+ except Exception:
+ pass
+
+ # Update font mode and multiplier explicitly
+ self.translator.font_size_mode = self.font_size_mode_value
+ self.translator.font_size_multiplier = self.font_size_multiplier_value
+ self.translator.min_readable_size = self.auto_min_size_value
+ self.translator.max_font_size_limit = self.max_font_size_value
+ self.translator.strict_text_wrapping = self.strict_text_wrapping_value
+ self.translator.force_caps_lock = self.force_caps_lock_value
+
+ # Update constrain to bubble setting
+ if hasattr(self, 'constrain_to_bubble_value'):
+ self.translator.constrain_to_bubble = self.constrain_to_bubble_value
+
+ # Handle inpainting mode (radio: skip/local/cloud/hybrid)
+ mode = None
+ if hasattr(self, 'inpainting_mode_var'):
+ mode = self.inpainting_mode_var.get()
+ else:
+ mode = 'local'
+
+ # Persist selected mode on translator
+ self.translator.inpaint_mode = mode
+
+ if mode == 'skip':
+ self.translator.skip_inpainting = True
+ self.translator.use_cloud_inpainting = False
+ self._log(" Inpainting: Skipped", "info")
+ elif mode == 'cloud':
+ self.translator.skip_inpainting = False
+ saved_api_key = self.main_gui.config.get('replicate_api_key', '')
+ if saved_api_key:
+ self.translator.use_cloud_inpainting = True
+ self.translator.replicate_api_key = saved_api_key
+ self._log(" Inpainting: Cloud (Replicate)", "info")
+ else:
+ self.translator.use_cloud_inpainting = False
+ self._log(" Inpainting: Local (no Replicate key, fallback)", "warning")
+ elif mode == 'hybrid':
+ self.translator.skip_inpainting = False
+ self.translator.use_cloud_inpainting = False
+ self._log(" Inpainting: Hybrid", "info")
+ else:
+ # Local (default)
+ self.translator.skip_inpainting = False
+ self.translator.use_cloud_inpainting = False
+ self._log(" Inpainting: Local", "info")
+
+ # Persist free-text-only BG opacity setting to config (handled in _save_rendering_settings)
+ # Value is now read directly from checkbox in PySide6
+
+ # Log the applied rendering and inpainting settings
+ self._log(f"Applied rendering settings:", "info")
+ self._log(f" Background: {self.bg_style_value} @ {int(self.bg_opacity_value/255*100)}% opacity", "info")
+ import os
+ self._log(f" Font: {os.path.basename(self.selected_font_path) if self.selected_font_path else 'Default'}", "info")
+ self._log(f" Minimum Font Size: {self.auto_min_size_value}pt", "info")
+ self._log(f" Maximum Font Size: {self.max_font_size_value}pt", "info")
+ self._log(f" Strict Text Wrapping: {'Enabled (force fit)' if self.strict_text_wrapping_value else 'Disabled (allow overflow)'}", "info")
+ if self.font_size_mode_value == 'multiplier':
+ self._log(f" Font Size: Dynamic multiplier ({self.font_size_multiplier_value:.1f}x)", "info")
+ if hasattr(self, 'constrain_to_bubble_value'):
+ constraint_status = "constrained" if self.constrain_to_bubble_value else "unconstrained"
+ self._log(f" Text Constraint: {constraint_status}", "info")
+ else:
+ size_text = f"{self.font_size_value}pt" if self.font_size_value > 0 else "Auto"
+ self._log(f" Font Size: Fixed ({size_text})", "info")
+ self._log(f" Text Color: RGB({text_color[0]}, {text_color[1]}, {text_color[2]})", "info")
+ self._log(f" Shadow: {'Enabled' if self.shadow_enabled_value else 'Disabled'}", "info")
+ try:
+ self._log(f" Free-text-only BG opacity: {'Enabled' if getattr(self, 'free_text_only_bg_opacity_value', False) else 'Disabled'}", "info")
+ except Exception:
+ pass
+ self._log(f" Full Page Context: {'Enabled' if self.full_page_context_value else 'Disabled'}", "info")
+
+ def _translation_worker(self):
+ """Worker thread for translation"""
+ try:
+ # Defensive: ensure translator exists before using it (legacy callers may start this worker early)
+ if not hasattr(self, 'translator') or self.translator is None:
+ self._log("⚠️ Translator not initialized yet; skipping worker start", "warning")
+ return
+ if hasattr(self.translator, 'set_stop_flag'):
+ self.translator.set_stop_flag(self.stop_flag)
+
+ # Ensure API parallelism (batch API calls) is controlled independently of local parallel processing.
+ # Propagate the GUI "Batch Translation" toggle into environment so Unified API Client applies it globally
+ # for all providers (including custom endpoints).
+ try:
+ import os as _os
+ _os.environ['BATCH_TRANSLATION'] = '1' if getattr(self.main_gui, 'batch_translation_var', None) and self.main_gui.batch_translation_var.get() else '0'
+ # Use GUI batch size if available; default to 3 to match existing default
+ bs_val = None
+ try:
+ bs_val = str(int(self.main_gui.batch_size_var.get())) if hasattr(self.main_gui, 'batch_size_var') else None
+ except Exception:
+ bs_val = None
+ _os.environ['BATCH_SIZE'] = bs_val or _os.environ.get('BATCH_SIZE', '3')
+ except Exception:
+ # Non-fatal if env cannot be set
+ pass
+
+ # Panel-level parallelization setting (LOCAL threading for panels)
+ advanced = self.main_gui.config.get('manga_settings', {}).get('advanced', {})
+ panel_parallel = bool(advanced.get('parallel_panel_translation', False))
+ requested_panel_workers = int(advanced.get('panel_max_workers', 2))
+
+ # Decouple from global parallel processing: panel concurrency is governed ONLY by panel settings
+ effective_workers = requested_panel_workers if (panel_parallel and len(self.selected_files) > 1) else 1
+
+ # Hint translator about preferred BD ownership: use singleton only when not using panel parallelism
+ try:
+ if hasattr(self, 'translator') and self.translator:
+ self.translator.use_singleton_bubble_detector = not (panel_parallel and effective_workers > 1)
+ except Exception:
+ pass
+
+ # Model preloading phase
+ self._log("🔧 Model preloading phase", "info")
+ # Log current counters (diagnostic)
+ try:
+ st = self.translator.get_preload_counters() if hasattr(self.translator, 'get_preload_counters') else None
+ if st:
+ self._log(f" Preload counters before: inpaint_spares={st.get('inpaint_spares',0)}, detector_spares={st.get('detector_spares',0)}", "debug")
+ except Exception:
+ pass
+ # 1) Warm up bubble detector instances first (so detection can start immediately)
+ try:
+ ocr_set = self.main_gui.config.get('manga_settings', {}).get('ocr', {}) or {}
+ if (
+ effective_workers > 1
+ and ocr_set.get('bubble_detection_enabled', True)
+ and hasattr(self, 'translator')
+ and self.translator
+ ):
+ # For parallel panel translation, prefer thread-local detectors (avoid singleton for concurrency)
+ try:
+ self.translator.use_singleton_bubble_detector = False
+ except Exception:
+ pass
+ desired_bd = min(int(effective_workers), max(1, int(len(self.selected_files) or 1)))
+ self._log(f"🧰 Preloading bubble detector instances for {desired_bd} panel worker(s)...", "info")
+ try:
+ import time as _time
+ t0 = _time.time()
+ self.translator.preload_bubble_detectors(ocr_set, desired_bd)
+ dt = _time.time() - t0
+ self._log(f"⏱️ Bubble detector preload finished in {dt:.2f}s", "info")
+ except Exception as _e:
+ self._log(f"⚠️ Bubble detector preload skipped: {_e}", "warning")
+ except Exception:
+ pass
+ # 2) Preload LOCAL inpainting instances for panel parallelism
+ inpaint_preload_event = None
+ try:
+ inpaint_method = self.main_gui.config.get('manga_inpaint_method', 'cloud')
+ if (
+ effective_workers > 1
+ and inpaint_method == 'local'
+ and hasattr(self, 'translator')
+ and self.translator
+ ):
+ local_method = self.main_gui.config.get('manga_local_inpaint_model', 'anime')
+ model_path = self.main_gui.config.get(f'manga_{local_method}_model_path', '')
+ if not model_path:
+ model_path = self.main_gui.config.get(f'{local_method}_model_path', '')
+
+ # Preload one shared instance plus spares for parallel panel processing
+ # Constrain to actual number of files (no need for more workers than files)
+ desired_inp = min(int(effective_workers), max(1, int(len(self.selected_files) or 1)))
+ self._log(f"🧰 Preloading {desired_inp} local inpainting instance(s) for panel workers...", "info")
+ try:
+ import time as _time
+ t0 = _time.time()
+ # Use synchronous preload to ensure instances are ready before panel processing starts
+ self.translator.preload_local_inpainters(local_method, model_path, desired_inp)
+ dt = _time.time() - t0
+ self._log(f"⏱️ Local inpainting preload finished in {dt:.2f}s", "info")
+ except Exception as _e:
+ self._log(f"⚠️ Local inpainting preload failed: {_e}", "warning")
+ import traceback
+ self._log(traceback.format_exc(), "debug")
+ except Exception as preload_err:
+ self._log(f"⚠️ Inpainting preload setup failed: {preload_err}", "warning")
+
+ # Log updated counters (diagnostic)
+ try:
+ st2 = self.translator.get_preload_counters() if hasattr(self.translator, 'get_preload_counters') else None
+ if st2:
+ self._log(f" Preload counters after: inpaint_spares={st2.get('inpaint_spares',0)}, detector_spares={st2.get('detector_spares',0)}", "debug")
+ except Exception:
+ pass
+
+ if panel_parallel and len(self.selected_files) > 1 and effective_workers > 1:
+ self._log(f"🚀 Parallel PANEL translation ENABLED ({effective_workers} workers)", "info")
+
+ import concurrent.futures
+ import threading as _threading
+ progress_lock = _threading.Lock()
+ # Memory barrier: ensures resources are fully released before next panel starts
+ completion_barrier = _threading.Semaphore(1) # Only one panel can complete at a time
+ counters = {
+ 'started': 0,
+ 'done': 0,
+ 'failed': 0
+ }
+ total = self.total_files
+
+ def process_single(idx, filepath):
+ # Check stop flag at the very beginning
+ if self.stop_flag.is_set():
+ return False
+
+ # Create an isolated translator instance per panel
+ translator = None # Initialize outside try block for cleanup
+ try:
+ # Check again before starting expensive work
+ if self.stop_flag.is_set():
+ return False
+ from manga_translator import MangaTranslator
+ import os
+ # Build full OCR config for this thread (mirror _start_translation)
+ ocr_config = {'provider': self.ocr_provider_value}
+ if ocr_config['provider'] == 'google':
+ google_creds = self.main_gui.config.get('google_vision_credentials', '') or \
+ self.main_gui.config.get('google_cloud_credentials', '')
+ if google_creds and os.path.exists(google_creds):
+ ocr_config['google_credentials_path'] = google_creds
+ else:
+ self._log("⚠️ Google Cloud Vision credentials not found for parallel task", "warning")
+ elif ocr_config['provider'] == 'azure':
+ azure_key = self.main_gui.config.get('azure_vision_key', '')
+ azure_endpoint = self.main_gui.config.get('azure_vision_endpoint', '')
+ if azure_key and azure_endpoint:
+ ocr_config['azure_key'] = azure_key
+ ocr_config['azure_endpoint'] = azure_endpoint
+ else:
+ self._log("⚠️ Azure credentials not found for parallel task", "warning")
+
+ translator = MangaTranslator(ocr_config, self.main_gui.client, self.main_gui, log_callback=self._log)
+ translator.set_stop_flag(self.stop_flag)
+
+ # CRITICAL: Disable singleton bubble detector for parallel panel processing
+ # Each panel should use pool-based detectors for true parallelism
+ try:
+ translator.use_singleton_bubble_detector = False
+ self._log(f" 🤖 Panel translator: bubble detector pool mode enabled", "debug")
+ except Exception:
+ pass
+
+ # Ensure parallel processing settings are properly applied to each panel translator
+ # The web UI maps parallel_panel_translation to parallel_processing for MangaTranslator compatibility
+ try:
+ advanced = self.main_gui.config.get('manga_settings', {}).get('advanced', {})
+ if advanced.get('parallel_panel_translation', False):
+ # Override the manga_settings in this translator instance to enable parallel processing
+ # for bubble regions within each panel
+ translator.manga_settings.setdefault('advanced', {})['parallel_processing'] = True
+ panel_workers = int(advanced.get('panel_max_workers', 2))
+ translator.manga_settings.setdefault('advanced', {})['max_workers'] = panel_workers
+ # Also set the instance attributes directly
+ translator.parallel_processing = True
+ translator.max_workers = panel_workers
+ self._log(f" 📋 Panel translator configured: parallel_processing={translator.parallel_processing}, max_workers={translator.max_workers}", "debug")
+ else:
+ self._log(f" 📋 Panel translator: parallel_panel_translation=False, using sequential bubble processing", "debug")
+ except Exception as e:
+ self._log(f" ⚠️ Warning: Failed to configure parallel processing for panel translator: {e}", "warning")
+
+ # Also propagate global cancellation to isolated translator
+ from manga_translator import MangaTranslator as MTClass
+ if MTClass.is_globally_cancelled():
+ return False
+
+ # Check stop flag before configuration
+ if self.stop_flag.is_set():
+ return False
+
+ # Apply inpainting and rendering options roughly matching current translator
+ try:
+ translator.constrain_to_bubble = getattr(self, 'constrain_to_bubble_var').get() if hasattr(self, 'constrain_to_bubble_var') else True
+ except Exception:
+ pass
+
+ # Set full page context based on UI
+ try:
+ translator.set_full_page_context(
+ enabled=self.full_page_context_var.get(),
+ custom_prompt=self.full_page_context_prompt
+ )
+ except Exception:
+ pass
+
+ # Another check before path setup
+ if self.stop_flag.is_set():
+ return False
+
+ # Determine output path (route CBZ images to job out_dir)
+ filename = os.path.basename(filepath)
+ output_path = None
+ try:
+ if hasattr(self, 'cbz_image_to_job') and filepath in self.cbz_image_to_job:
+ cbz_file = self.cbz_image_to_job[filepath]
+ job = getattr(self, 'cbz_jobs', {}).get(cbz_file)
+ if job:
+ output_dir = job.get('out_dir')
+ os.makedirs(output_dir, exist_ok=True)
+ output_path = os.path.join(output_dir, filename)
+ except Exception:
+ output_path = None
+ if not output_path:
+ if self.create_subfolder_value:
+ output_dir = os.path.join(os.path.dirname(filepath), 'translated')
+ os.makedirs(output_dir, exist_ok=True)
+ output_path = os.path.join(output_dir, filename)
+ else:
+ base, ext = os.path.splitext(filepath)
+ output_path = f"{base}_translated{ext}"
+
+ # Announce start
+ self._update_current_file(filename)
+ with progress_lock:
+ counters['started'] += 1
+ self._update_progress(counters['done'], total, f"Processing {counters['started']}/{total}: {filename}")
+
+ # Final check before expensive processing
+ if self.stop_flag.is_set():
+ return False
+
+ # Process image
+ result = translator.process_image(filepath, output_path, batch_index=idx+1, batch_total=total)
+
+ # CRITICAL: Explicitly cleanup this panel's translator resources
+ # This prevents resource leaks and partial translation issues
+ try:
+ if translator:
+ # Return checked-out inpainter to pool for reuse
+ if hasattr(translator, '_return_inpainter_to_pool'):
+ translator._return_inpainter_to_pool()
+ # Return bubble detector to pool for reuse
+ if hasattr(translator, '_return_bubble_detector_to_pool'):
+ translator._return_bubble_detector_to_pool()
+ # Clear all caches and state
+ if hasattr(translator, 'reset_for_new_image'):
+ translator.reset_for_new_image()
+ # Clear internal state
+ if hasattr(translator, 'clear_internal_state'):
+ translator.clear_internal_state()
+ except Exception as cleanup_err:
+ self._log(f"⚠️ Panel translator cleanup warning: {cleanup_err}", "debug")
+
+ # CRITICAL: Use completion barrier to prevent resource conflicts
+ # This ensures only one panel completes/cleans up at a time
+ with completion_barrier:
+ # Update counters only if not stopped
+ with progress_lock:
+ if self.stop_flag.is_set():
+ # Don't update counters if translation was stopped
+ return False
+
+ # Check if translation actually produced valid output
+ translation_successful = False
+ if result.get('success', False) and not result.get('interrupted', False):
+ # Verify there's an actual output file and translated regions
+ output_exists = result.get('output_path') and os.path.exists(result.get('output_path', ''))
+ regions = result.get('regions', [])
+ has_translations = any(r.get('translated_text', '') for r in regions)
+
+ # CRITICAL: Verify all detected regions got translated
+ # Partial failures indicate inpainting or rendering issues
+ if has_translations and regions:
+ translated_count = sum(1 for r in regions if r.get('translated_text', '').strip())
+ detected_count = len(regions)
+ completion_rate = translated_count / detected_count if detected_count > 0 else 0
+
+ # Log warning if completion rate is less than 100%
+ if completion_rate < 1.0:
+ self._log(f"⚠️ Partial translation: {translated_count}/{detected_count} regions translated ({completion_rate*100:.1f}%)", "warning")
+ self._log(f" This may indicate bubble detector or inpainter issues", "warning")
+
+ # Only consider successful if at least 50% of regions translated
+ # This prevents marking completely failed images as successful
+ translation_successful = output_exists and completion_rate >= 0.5
+ else:
+ translation_successful = output_exists and has_translations
+
+ if translation_successful:
+ self.completed_files += 1
+ self._log(f"✅ Translation completed: {filename}", "success")
+ # Memory barrier: ensure resources are released before next completion
+ time.sleep(0.15) # Slightly longer pause for stability
+ self._log("💤 Panel completion pausing for resource cleanup", "debug")
+ else:
+ self.failed_files += 1
+ # Log the specific reason for failure
+ if result.get('interrupted', False):
+ self._log(f"⚠️ Translation interrupted: {filename}", "warning")
+ elif not result.get('success', False):
+ self._log(f"❌ Translation failed: {filename}", "error")
+ elif not result.get('output_path') or not os.path.exists(result.get('output_path', '')):
+ self._log(f"❌ Output file not created: {filename}", "error")
+ else:
+ self._log(f"❌ No text was translated: {filename}", "error")
+ counters['failed'] += 1
+ counters['done'] += 1
+ self._update_progress(counters['done'], total, f"Completed {counters['done']}/{total}")
+ # End of completion_barrier block - resources now released for next panel
+
+ return result.get('success', False)
+ except Exception as e:
+ with progress_lock:
+ # Don't update error counters if stopped
+ if not self.stop_flag.is_set():
+ self.failed_files += 1
+ counters['failed'] += 1
+ counters['done'] += 1
+ if not self.stop_flag.is_set():
+ self._log(f"❌ Error in panel task: {str(e)}", "error")
+ self._log(traceback.format_exc(), "error")
+ return False
+ finally:
+ # CRITICAL: Always cleanup translator resources, even on error
+ # This prevents resource leaks and ensures proper cleanup in parallel mode
+ try:
+ if translator:
+ # Return checked-out inpainter to pool for reuse
+ if hasattr(translator, '_return_inpainter_to_pool'):
+ translator._return_inpainter_to_pool()
+ # Return bubble detector to pool for reuse
+ if hasattr(translator, '_return_bubble_detector_to_pool'):
+ translator._return_bubble_detector_to_pool()
+ # Force cleanup of all models and caches
+ if hasattr(translator, 'clear_internal_state'):
+ translator.clear_internal_state()
+ # Clear any remaining references
+ translator = None
+ except Exception:
+ pass # Never let cleanup fail the finally block
+
+ with concurrent.futures.ThreadPoolExecutor(max_workers=max(1, effective_workers)) as executor:
+ futures = []
+ stagger_ms = int(advanced.get('panel_start_stagger_ms', 30))
+ for idx, filepath in enumerate(self.selected_files):
+ if self.stop_flag.is_set():
+ break
+ futures.append(executor.submit(process_single, idx, filepath))
+ if stagger_ms > 0:
+ time.sleep(stagger_ms / 1000.0)
+ time.sleep(0.1) # Brief pause for stability
+ self._log("💤 Staggered submission pausing briefly for stability", "debug")
+
+ # Handle completion and stop behavior
+ try:
+ for f in concurrent.futures.as_completed(futures):
+ if self.stop_flag.is_set():
+ # More aggressive cancellation
+ for rem in futures:
+ rem.cancel()
+ # Try to shutdown executor immediately
+ try:
+ executor.shutdown(wait=False)
+ except Exception:
+ pass
+ break
+ try:
+ # Consume future result to let it raise exceptions or return
+ f.result(timeout=0.1) # Very short timeout
+ except Exception:
+ # Ignore; counters are updated inside process_single
+ pass
+ except Exception:
+ # If as_completed fails due to shutdown, that's ok
+ pass
+
+ # If stopped during parallel processing, do not log panel completion
+ if self.stop_flag.is_set():
+ pass
+ else:
+ # After parallel processing, skip sequential loop
+ pass
+
+ # After parallel processing, skip sequential loop
+
+ # Finalize CBZ packaging after parallel mode finishes
+ try:
+ self._finalize_cbz_jobs()
+ except Exception:
+ pass
+
+ else:
+ # Sequential processing (or panel parallel requested but capped to 1 by global setting)
+ for index, filepath in enumerate(self.selected_files):
+ if self.stop_flag.is_set():
+ self._log("\n⏹️ Translation stopped by user", "warning")
+ break
+
+ # IMPORTANT: Reset translator state for each new image
+ if hasattr(self.translator, 'reset_for_new_image'):
+ self.translator.reset_for_new_image()
+
+ self.current_file_index = index
+ filename = os.path.basename(filepath)
+
+ self._update_current_file(filename)
+ self._update_progress(
+ index,
+ self.total_files,
+ f"Processing {index + 1}/{self.total_files}: {filename}"
+ )
+
+ try:
+ # Determine output path (route CBZ images to job out_dir)
+ job_output_path = None
+ try:
+ if hasattr(self, 'cbz_image_to_job') and filepath in self.cbz_image_to_job:
+ cbz_file = self.cbz_image_to_job[filepath]
+ job = getattr(self, 'cbz_jobs', {}).get(cbz_file)
+ if job:
+ output_dir = job.get('out_dir')
+ os.makedirs(output_dir, exist_ok=True)
+ job_output_path = os.path.join(output_dir, filename)
+ except Exception:
+ job_output_path = None
+ if job_output_path:
+ output_path = job_output_path
+ else:
+ if self.create_subfolder_value:
+ output_dir = os.path.join(os.path.dirname(filepath), 'translated')
+ os.makedirs(output_dir, exist_ok=True)
+ output_path = os.path.join(output_dir, filename)
+ else:
+ base, ext = os.path.splitext(filepath)
+ output_path = f"{base}_translated{ext}"
+
+ # Process the image
+ result = self.translator.process_image(filepath, output_path)
+
+ # Check if translation was interrupted
+ if result.get('interrupted', False):
+ self._log(f"⏸️ Translation of {filename} was interrupted", "warning")
+ self.failed_files += 1
+ if self.stop_flag.is_set():
+ break
+ elif result.get('success', False):
+ # Verify translation actually produced valid output
+ output_exists = result.get('output_path') and os.path.exists(result.get('output_path', ''))
+ has_translations = any(r.get('translated_text', '') for r in result.get('regions', []))
+
+ if output_exists and has_translations:
+ self.completed_files += 1
+ self._log(f"✅ Translation completed: {filename}", "success")
+ time.sleep(0.1) # Brief pause for stability
+ self._log("💤 Sequential completion pausing briefly for stability", "debug")
+ else:
+ self.failed_files += 1
+ if not output_exists:
+ self._log(f"❌ Output file not created: {filename}", "error")
+ else:
+ self._log(f"❌ No text was translated: {filename}", "error")
+ else:
+ self.failed_files += 1
+ errors = '\n'.join(result.get('errors', ['Unknown error']))
+ self._log(f"❌ Translation failed: {filename}\n{errors}", "error")
+
+ # Check for specific error types in the error messages
+ errors_lower = errors.lower()
+ if '429' in errors or 'rate limit' in errors_lower:
+ self._log(f"⚠️ RATE LIMIT DETECTED - Please wait before continuing", "error")
+ self._log(f" The API provider is limiting your requests", "error")
+ self._log(f" Consider increasing delay between requests in settings", "error")
+
+ # Optionally pause for a bit
+ self._log(f" Pausing for 60 seconds...", "warning")
+ for sec in range(60):
+ if self.stop_flag.is_set():
+ break
+ time.sleep(1)
+ if sec % 10 == 0:
+ self._log(f" Waiting... {60-sec} seconds remaining", "warning")
+
+ except Exception as e:
+ self.failed_files += 1
+ error_str = str(e)
+ error_type = type(e).__name__
+
+ self._log(f"❌ Error processing {filename}:", "error")
+ self._log(f" Error type: {error_type}", "error")
+ self._log(f" Details: {error_str}", "error")
+
+ # Check for specific API errors
+ if "429" in error_str or "rate limit" in error_str.lower():
+ self._log(f"⚠️ RATE LIMIT ERROR (429) - API is throttling requests", "error")
+ self._log(f" Please wait before continuing or reduce request frequency", "error")
+ self._log(f" Consider increasing the API delay in settings", "error")
+
+ # Pause for rate limit
+ self._log(f" Pausing for 60 seconds...", "warning")
+ for sec in range(60):
+ if self.stop_flag.is_set():
+ break
+ time.sleep(1)
+ if sec % 10 == 0:
+ self._log(f" Waiting... {60-sec} seconds remaining", "warning")
+
+ elif "401" in error_str or "unauthorized" in error_str.lower():
+ self._log(f"❌ AUTHENTICATION ERROR (401) - Check your API key", "error")
+ self._log(f" The API key appears to be invalid or expired", "error")
+
+ elif "403" in error_str or "forbidden" in error_str.lower():
+ self._log(f"❌ FORBIDDEN ERROR (403) - Access denied", "error")
+ self._log(f" Check your API subscription and permissions", "error")
+
+ elif "timeout" in error_str.lower():
+ self._log(f"⏱️ TIMEOUT ERROR - Request took too long", "error")
+ self._log(f" Consider increasing timeout settings", "error")
+
+ else:
+ # Generic error with full traceback
+ self._log(f" Full traceback:", "error")
+ self._log(traceback.format_exc(), "error")
+
+
+ # Finalize CBZ packaging (both modes)
+ try:
+ self._finalize_cbz_jobs()
+ except Exception:
+ pass
+
+ # Final summary - only if not stopped
+ if not self.stop_flag.is_set():
+ self._log(f"\n{'='*60}", "info")
+ self._log(f"📊 Translation Summary:", "info")
+ self._log(f" Total files: {self.total_files}", "info")
+ self._log(f" ✅ Successful: {self.completed_files}", "success")
+ self._log(f" ❌ Failed: {self.failed_files}", "error" if self.failed_files > 0 else "info")
+ self._log(f"{'='*60}\n", "info")
+
+ self._update_progress(
+ self.total_files,
+ self.total_files,
+ f"Complete! {self.completed_files} successful, {self.failed_files} failed"
+ )
+
+ except Exception as e:
+ self._log(f"\n❌ Translation error: {str(e)}", "error")
+ self._log(traceback.format_exc(), "error")
+
+ finally:
+ # Check if auto cleanup is enabled in settings
+ auto_cleanup_enabled = False # Default disabled by default
+ try:
+ advanced_settings = self.main_gui.config.get('manga_settings', {}).get('advanced', {})
+ auto_cleanup_enabled = advanced_settings.get('auto_cleanup_models', False)
+ except Exception:
+ pass
+
+ if auto_cleanup_enabled:
+ # Clean up all models to free RAM
+ try:
+ # For parallel panel translation, cleanup happens here after ALL panels complete
+ is_parallel_panel = False
+ try:
+ advanced_settings = self.main_gui.config.get('manga_settings', {}).get('advanced', {})
+ is_parallel_panel = advanced_settings.get('parallel_panel_translation', True)
+ except Exception:
+ pass
+
+ # Skip the "all parallel panels complete" message if stopped
+ if is_parallel_panel and not self.stop_flag.is_set():
+ self._log("\n🧹 All parallel panels complete - cleaning up models to free RAM...", "info")
+ elif not is_parallel_panel:
+ self._log("\n🧹 Cleaning up models to free RAM...", "info")
+
+ # Clean up the shared translator if parallel processing was used
+ if 'translator' in locals():
+ translator.cleanup_all_models()
+ self._log("✅ Shared translator models cleaned up!", "info")
+
+ # Also clean up the instance translator if it exists
+ if hasattr(self, 'translator') and self.translator:
+ self.translator.cleanup_all_models()
+ # Set to None to ensure it's released
+ self.translator = None
+ self._log("✅ Instance translator models cleaned up!", "info")
+
+ self._log("✅ All models cleaned up - RAM freed!", "info")
+
+ except Exception as e:
+ self._log(f"⚠️ Warning: Model cleanup failed: {e}", "warning")
+
+ # Force garbage collection to ensure memory is freed
+ try:
+ import gc
+ gc.collect()
+ except Exception:
+ pass
+ else:
+ # Only log if not stopped
+ if not self.stop_flag.is_set():
+ self._log("🔑 Auto cleanup disabled - models will remain in RAM for faster subsequent translations", "info")
+
+ # IMPORTANT: Reset the entire translator instance to free ALL memory
+ # Controlled by a separate "Unload models after translation" toggle
+ try:
+ # Check if we should reset the translator instance
+ reset_translator = False # default disabled
+ try:
+ advanced_settings = self.main_gui.config.get('manga_settings', {}).get('advanced', {})
+ reset_translator = bool(advanced_settings.get('unload_models_after_translation', False))
+ except Exception:
+ reset_translator = False
+
+ if reset_translator:
+ self._log("\n🗑️ Resetting translator instance to free all memory...", "info")
+
+ # Clear the instance translator completely
+ if hasattr(self, 'translator'):
+ # First ensure models are cleaned if not already done
+ try:
+ if self.translator and hasattr(self.translator, 'cleanup_all_models'):
+ self.translator.cleanup_all_models()
+ except Exception:
+ pass
+
+ # Clear all internal state using the dedicated method
+ try:
+ if self.translator and hasattr(self.translator, 'clear_internal_state'):
+ self.translator.clear_internal_state()
+ except Exception:
+ pass
+
+ # Clear remaining references with proper cleanup
+ try:
+ if self.translator:
+ # Properly unload OCR manager and all its providers
+ if hasattr(self.translator, 'ocr_manager') and self.translator.ocr_manager:
+ try:
+ ocr_manager = self.translator.ocr_manager
+ # Clear all loaded OCR providers
+ if hasattr(ocr_manager, 'providers'):
+ for provider_name, provider in ocr_manager.providers.items():
+ # Unload each provider's models
+ if hasattr(provider, 'model'):
+ provider.model = None
+ if hasattr(provider, 'processor'):
+ provider.processor = None
+ if hasattr(provider, 'tokenizer'):
+ provider.tokenizer = None
+ if hasattr(provider, 'reader'):
+ provider.reader = None
+ if hasattr(provider, 'is_loaded'):
+ provider.is_loaded = False
+ self._log(f" ✓ Unloaded OCR provider: {provider_name}", "debug")
+ ocr_manager.providers.clear()
+ self._log(" ✓ OCR manager fully unloaded", "debug")
+ except Exception as e:
+ self._log(f" Warning: OCR manager cleanup failed: {e}", "debug")
+ finally:
+ self.translator.ocr_manager = None
+
+ # Properly unload local inpainter
+ if hasattr(self.translator, 'local_inpainter') and self.translator.local_inpainter:
+ try:
+ if hasattr(self.translator.local_inpainter, 'unload'):
+ self.translator.local_inpainter.unload()
+ self._log(" ✓ Local inpainter unloaded", "debug")
+ except Exception as e:
+ self._log(f" Warning: Local inpainter cleanup failed: {e}", "debug")
+ finally:
+ self.translator.local_inpainter = None
+
+ # Properly unload bubble detector
+ if hasattr(self.translator, 'bubble_detector') and self.translator.bubble_detector:
+ try:
+ if hasattr(self.translator.bubble_detector, 'unload'):
+ self.translator.bubble_detector.unload(release_shared=True)
+ self._log(" ✓ Bubble detector unloaded", "debug")
+ except Exception as e:
+ self._log(f" Warning: Bubble detector cleanup failed: {e}", "debug")
+ finally:
+ self.translator.bubble_detector = None
+
+ # Clear API clients
+ if hasattr(self.translator, 'client'):
+ self.translator.client = None
+ if hasattr(self.translator, 'vision_client'):
+ self.translator.vision_client = None
+ except Exception:
+ pass
+
+ # Call translator shutdown to free all resources
+ try:
+ if translator and hasattr(translator, 'shutdown'):
+ translator.shutdown()
+ except Exception:
+ pass
+ # Finally, delete the translator instance entirely
+ self.translator = None
+ self._log("✅ Translator instance reset - all memory freed!", "info")
+
+ # Also clear the shared translator from parallel processing if it exists
+ if 'translator' in locals():
+ try:
+ # Clear internal references
+ if hasattr(translator, 'cache'):
+ translator.cache = None
+ if hasattr(translator, 'text_regions'):
+ translator.text_regions = None
+ if hasattr(translator, 'translated_regions'):
+ translator.translated_regions = None
+ # Delete the local reference
+ del translator
+ except Exception:
+ pass
+
+ # Clear standalone OCR manager if it exists in manga_integration
+ if hasattr(self, 'ocr_manager') and self.ocr_manager:
+ try:
+ ocr_manager = self.ocr_manager
+ # Clear all loaded OCR providers
+ if hasattr(ocr_manager, 'providers'):
+ for provider_name, provider in ocr_manager.providers.items():
+ # Unload each provider's models
+ if hasattr(provider, 'model'):
+ provider.model = None
+ if hasattr(provider, 'processor'):
+ provider.processor = None
+ if hasattr(provider, 'tokenizer'):
+ provider.tokenizer = None
+ if hasattr(provider, 'reader'):
+ provider.reader = None
+ if hasattr(provider, 'is_loaded'):
+ provider.is_loaded = False
+ ocr_manager.providers.clear()
+ self.ocr_manager = None
+ self._log(" ✓ Standalone OCR manager cleared", "debug")
+ except Exception as e:
+ self._log(f" Warning: Standalone OCR manager cleanup failed: {e}", "debug")
+
+ # Force multiple garbage collection passes to ensure everything is freed
+ try:
+ import gc
+ gc.collect()
+ gc.collect() # Multiple passes for stubborn references
+ gc.collect()
+ self._log("✅ Memory fully reclaimed", "debug")
+ except Exception:
+ pass
+ else:
+ # Only log if not stopped
+ if not self.stop_flag.is_set():
+ self._log("🔑 Translator instance preserved for faster subsequent translations", "debug")
+
+ except Exception as e:
+ self._log(f"⚠️ Warning: Failed to reset translator instance: {e}", "warning")
+
+ # Reset UI state (PySide6 - call directly)
+ try:
+ self._reset_ui_state()
+ except Exception as e:
+ self._log(f"Error resetting UI: {e}", "warning")
+
+ def _stop_translation(self):
+ """Stop the translation process"""
+ if self.is_running:
+ # Set local stop flag
+ self.stop_flag.set()
+
+ # Set global cancellation flags for coordinated stopping
+ self.set_global_cancellation(True)
+
+ # Also propagate to MangaTranslator class
+ try:
+ from manga_translator import MangaTranslator
+ MangaTranslator.set_global_cancellation(True)
+ except ImportError:
+ pass
+
+ # Also propagate to UnifiedClient if available
+ try:
+ from unified_api_client import UnifiedClient
+ UnifiedClient.set_global_cancellation(True)
+ except ImportError:
+ pass
+
+ # Update progress to show stopped status
+ self._update_progress(
+ self.completed_files,
+ self.total_files,
+ f"Stopped - {self.completed_files}/{self.total_files} completed"
+ )
+
+ # Try to style the progress bar to indicate stopped status
+ try:
+ # Set progress bar to a distinctive value and try to change appearance
+ if hasattr(self, 'progress_bar'):
+ # You could also set a custom style here if needed
+ # For now, we'll rely on the text indicators
+ pass
+ except Exception:
+ pass
+
+ # Update current file display to show stopped
+ self._update_current_file("Translation stopped")
+
+ # Kick off immediate resource shutdown to free RAM
+ try:
+ tr = getattr(self, 'translator', None)
+ if tr and hasattr(tr, 'shutdown'):
+ import threading
+ threading.Thread(target=tr.shutdown, name="MangaTranslatorShutdown", daemon=True).start()
+ self._log("🧹 Initiated translator resource shutdown", "info")
+ # Important: clear the stale translator reference so the next Start creates a fresh instance
+ self.translator = None
+ except Exception as e:
+ self._log(f"⚠️ Failed to start shutdown: {e}", "warning")
+
+ # Immediately reset UI state to re-enable start button
+ self._reset_ui_state()
+ self._log("\n⏹️ Translation stopped by user", "warning")
+
+ def _reset_ui_state(self):
+ """Reset UI to ready state - with widget existence checks (PySide6)"""
+ # Restore stdio redirection if active
+ self._redirect_stderr(False)
+ self._redirect_stdout(False)
+ # Stop any startup heartbeat if still running
+ try:
+ self._stop_startup_heartbeat()
+ except Exception:
+ pass
+ try:
+ # Check if the dialog still exists (PySide6)
+ if not hasattr(self, 'dialog') or not self.dialog:
+ return
+
+ # Reset running flag
+ self.is_running = False
+
+ # Check and update start_button if it exists (PySide6)
+ if hasattr(self, 'start_button') and self.start_button:
+ if not self.start_button.isEnabled():
+ self.start_button.setEnabled(True)
+
+ # Check and update stop_button if it exists (PySide6)
+ if hasattr(self, 'stop_button') and self.stop_button:
+ if self.stop_button.isEnabled():
+ self.stop_button.setEnabled(False)
+
+ # Re-enable file modification - check if listbox exists (PySide6)
+ if hasattr(self, 'file_listbox') and self.file_listbox:
+ if not self.file_listbox.isEnabled():
+ self.file_listbox.setEnabled(True)
+
+ except Exception as e:
+ # Log the error but don't crash
+ if hasattr(self, '_log'):
+ self._log(f"Error resetting UI state: {str(e)}", "warning")
diff --git a/manga_settings_dialog.py b/manga_settings_dialog.py
new file mode 100644
index 0000000000000000000000000000000000000000..5f630c5ded20bc902293d80b93471fa4ea86ac8f
--- /dev/null
+++ b/manga_settings_dialog.py
@@ -0,0 +1,4412 @@
+# manga_settings_dialog.py
+"""
+Enhanced settings dialog for manga translation with all settings visible
+Properly integrated with TranslatorGUI's WindowManager and UIHelper
+"""
+
+import os
+import json
+from PySide6.QtWidgets import (QDialog, QVBoxLayout, QHBoxLayout, QGridLayout,
+ QLabel, QPushButton, QCheckBox, QSpinBox, QDoubleSpinBox,
+ QSlider, QComboBox, QLineEdit, QGroupBox, QTabWidget,
+ QWidget, QScrollArea, QFrame, QRadioButton, QButtonGroup,
+ QMessageBox, QFileDialog, QSizePolicy, QApplication)
+from PySide6.QtCore import Qt, Signal, QTimer, QEvent, QObject
+from PySide6.QtGui import QFont, QIcon
+from typing import Dict, Any, Optional, Callable
+from bubble_detector import BubbleDetector
+import logging
+import time
+import copy
+
+# Use the same logging infrastructure initialized by translator_gui
+logger = logging.getLogger(__name__)
+
+class MangaSettingsDialog(QDialog):
+ """Settings dialog for manga translation"""
+
+ def __init__(self, parent, main_gui, config: Dict[str, Any], callback: Optional[Callable] = None):
+ """Initialize settings dialog
+
+ Args:
+ parent: Parent window (should be QWidget or None)
+ main_gui: Reference to TranslatorGUI instance
+ config: Configuration dictionary
+ callback: Function to call after saving
+ """
+ # Ensure parent is a QWidget or None for proper PySide6 initialization
+ if parent is not None and not hasattr(parent, 'windowTitle'):
+ # If parent is not a QWidget, use None
+ parent = None
+ super().__init__(parent)
+ self.parent = parent
+ self.main_gui = main_gui
+ self.config = config
+ self.callback = callback
+
+ # Make dialog non-modal so it doesn't block the manga integration GUI
+ self.setModal(False)
+
+ # Enhanced default settings structure with all options
+ self.default_settings = {
+ 'preprocessing': {
+ 'enabled': False,
+ 'auto_detect_quality': True,
+ 'contrast_threshold': 0.4,
+ 'sharpness_threshold': 0.3,
+ 'noise_threshold': 20,
+ 'enhancement_strength': 1.5,
+ 'denoise_strength': 10,
+ 'max_image_dimension': 2000,
+ 'max_image_pixels': 2000000,
+ 'chunk_height': 2000,
+ 'chunk_overlap': 100,
+ # Inpainting tiling
+ 'inpaint_tiling_enabled': False, # Off by default
+ 'inpaint_tile_size': 512, # Default tile size
+ 'inpaint_tile_overlap': 64 # Overlap to avoid seams
+ },
+ 'compression': {
+ 'enabled': False,
+ 'format': 'jpeg',
+ 'jpeg_quality': 85,
+ 'png_compress_level': 6,
+ 'webp_quality': 85
+ },
+ 'ocr': {
+ 'language_hints': ['ja', 'ko', 'zh'],
+ 'confidence_threshold': 0.7,
+ 'merge_nearby_threshold': 20,
+ 'azure_merge_multiplier': 3.0,
+ 'text_detection_mode': 'document',
+ 'enable_rotation_correction': True,
+ 'bubble_detection_enabled': True,
+ 'roi_locality_enabled': False,
+ 'bubble_model_path': '',
+ 'bubble_confidence': 0.3,
+ 'detector_type': 'rtdetr_onnx',
+ 'rtdetr_confidence': 0.3,
+ 'detect_empty_bubbles': True,
+ 'detect_text_bubbles': True,
+ 'detect_free_text': True,
+ 'rtdetr_model_url': '',
+ 'use_rtdetr_for_ocr_regions': True,
+ 'azure_reading_order': 'natural',
+ 'azure_model_version': 'latest',
+ 'azure_max_wait': 60,
+ 'azure_poll_interval': 0.5,
+ 'min_text_length': 0,
+ 'exclude_english_text': False,
+ 'english_exclude_threshold': 0.7,
+ 'english_exclude_min_chars': 4,
+ 'english_exclude_short_tokens': False
+ },
+ 'advanced': {
+ 'format_detection': True,
+ 'webtoon_mode': 'auto',
+ 'debug_mode': False,
+ 'save_intermediate': False,
+ 'parallel_processing': True,
+ 'max_workers': 2,
+ 'parallel_panel_translation': False,
+ 'panel_max_workers': 2,
+ 'use_singleton_models': False,
+ 'auto_cleanup_models': False,
+ 'unload_models_after_translation': False,
+ 'auto_convert_to_onnx': False, # Disabled by default
+ 'auto_convert_to_onnx_background': True,
+ 'quantize_models': False,
+ 'onnx_quantize': False,
+ 'torch_precision': 'fp16',
+ # HD strategy defaults (mirrors comic-translate)
+ 'hd_strategy': 'resize', # 'original' | 'resize' | 'crop'
+ 'hd_strategy_resize_limit': 1536, # long-edge cap for resize
+ 'hd_strategy_crop_margin': 16, # pixels padding around cropped ROIs
+ 'hd_strategy_crop_trigger_size': 1024, # only crop if long edge exceeds this
+ # RAM cap defaults
+ 'ram_cap_enabled': False,
+ 'ram_cap_mb': 4096,
+ 'ram_cap_mode': 'soft',
+ 'ram_gate_timeout_sec': 15.0,
+ 'ram_min_floor_over_baseline_mb': 256
+ },
+ 'inpainting': {
+ 'batch_size': 10,
+ 'enable_cache': True,
+ 'method': 'local',
+ 'local_method': 'anime'
+ },
+ 'font_sizing': {
+ 'algorithm': 'smart', # 'smart', 'conservative', 'aggressive'
+ 'prefer_larger': True, # Prefer larger readable text
+ 'max_lines': 10, # Maximum lines before forcing smaller
+ 'line_spacing': 1.3, # Line height multiplier
+ 'bubble_size_factor': True # Scale font based on bubble size
+ },
+
+ # Mask dilation settings with new iteration controls
+ 'mask_dilation': 0,
+ 'use_all_iterations': True, # Master control - use same for all by default
+ 'all_iterations': 2, # Value when using same for all
+ 'text_bubble_dilation_iterations': 2, # Text-filled speech bubbles
+ 'empty_bubble_dilation_iterations': 3, # Empty speech bubbles
+ 'free_text_dilation_iterations': 0, # Free text (0 for clean B&W)
+ 'bubble_dilation_iterations': 2, # Legacy support
+ 'dilation_iterations': 2, # Legacy support
+
+ # Cloud inpainting settings
+ 'cloud_inpaint_model': 'ideogram-v2',
+ 'cloud_custom_version': '',
+ 'cloud_inpaint_prompt': 'clean background, smooth surface',
+ 'cloud_negative_prompt': 'text, writing, letters',
+ 'cloud_inference_steps': 20,
+ 'cloud_timeout': 60
+ }
+
+ # Merge with existing config
+ self.settings = self._merge_settings(config.get('manga_settings', {}))
+
+ # Show dialog
+ self.show_dialog()
+
+ def _create_styled_checkbox(self, text):
+ """Create a checkbox with proper checkmark using text overlay (same as manga_integration.py)"""
+ checkbox = QCheckBox(text)
+ checkbox.setStyleSheet("""
+ QCheckBox {
+ color: white;
+ spacing: 6px;
+ }
+ QCheckBox::indicator {
+ width: 14px;
+ height: 14px;
+ border: 1px solid #5a9fd4;
+ border-radius: 2px;
+ background-color: #2d2d2d;
+ }
+ QCheckBox::indicator:checked {
+ background-color: #5a9fd4;
+ border-color: #5a9fd4;
+ }
+ QCheckBox::indicator:hover {
+ border-color: #7bb3e0;
+ }
+ QCheckBox:disabled {
+ color: #666666;
+ }
+ QCheckBox::indicator:disabled {
+ background-color: #1a1a1a;
+ border-color: #3a3a3a;
+ }
+ """)
+
+ # Create checkmark overlay
+ checkmark = QLabel("✓", checkbox)
+ checkmark.setStyleSheet("""
+ QLabel {
+ color: white;
+ background: transparent;
+ font-weight: bold;
+ font-size: 11px;
+ }
+ """)
+ checkmark.setAlignment(Qt.AlignCenter)
+ checkmark.hide()
+ checkmark.setAttribute(Qt.WA_TransparentForMouseEvents) # Make checkmark click-through
+
+ # Position checkmark properly after widget is shown
+ def position_checkmark():
+ # Position over the checkbox indicator
+ checkmark.setGeometry(2, 1, 14, 14)
+
+ # Show/hide checkmark based on checked state
+ def update_checkmark():
+ if checkbox.isChecked():
+ position_checkmark()
+ checkmark.show()
+ else:
+ checkmark.hide()
+
+ checkbox.stateChanged.connect(update_checkmark)
+ # Delay initial positioning to ensure widget is properly rendered
+ QTimer.singleShot(0, lambda: (position_checkmark(), update_checkmark()))
+
+ return checkbox
+
+ def _disable_spinbox_scroll(self, widget):
+ """Disable mouse wheel scrolling on a spinbox, combobox, or slider (PySide6 version)"""
+ # Install event filter to block wheel events
+ class WheelEventFilter(QObject):
+ def eventFilter(self, obj, event):
+ if event.type() == QEvent.Wheel:
+ event.ignore()
+ return True
+ return False
+
+ filter_obj = WheelEventFilter(widget) # Parent it to the widget
+ widget.installEventFilter(filter_obj)
+ # Store the filter so it doesn't get garbage collected
+ if not hasattr(widget, '_wheel_filter'):
+ widget._wheel_filter = filter_obj
+
+ def _disable_all_spinbox_scrolling(self, parent):
+ """Recursively find and disable scrolling on all spinboxes, comboboxes, and sliders (PySide6 version)"""
+ # Check if the parent itself is a spinbox, combobox, or slider
+ if isinstance(parent, (QSpinBox, QDoubleSpinBox, QComboBox, QSlider)):
+ self._disable_spinbox_scroll(parent)
+
+ # Check all children recursively
+ if hasattr(parent, 'children'):
+ for child in parent.children():
+ if isinstance(child, QWidget):
+ self._disable_all_spinbox_scrolling(child)
+
+ def _create_font_size_controls(self, parent_layout):
+ """Create improved font size controls with presets"""
+
+ # Font size frame
+ font_frame = QWidget()
+ font_layout = QHBoxLayout(font_frame)
+ font_layout.setContentsMargins(0, 0, 0, 0)
+ parent_layout.addWidget(font_frame)
+
+ label = QLabel("Font Size:")
+ label.setMinimumWidth(150)
+ font_layout.addWidget(label)
+
+ # Font size mode selection
+ mode_frame = QWidget()
+ mode_layout = QHBoxLayout(mode_frame)
+ mode_layout.setContentsMargins(0, 0, 0, 0)
+ font_layout.addWidget(mode_frame)
+
+ # Radio buttons for mode - using QButtonGroup
+ self.font_size_mode_group = QButtonGroup()
+ self.font_size_mode = 'auto' # Store mode as string attribute
+
+ modes = [
+ ("Auto", "auto", "Automatically fit text to bubble size"),
+ ("Fixed", "fixed", "Use a specific font size"),
+ ("Scale", "scale", "Scale auto size by percentage")
+ ]
+
+ for text, value, tooltip in modes:
+ rb = QRadioButton(text)
+ rb.setChecked(value == 'auto')
+ rb.setToolTip(tooltip)
+ rb.toggled.connect(lambda checked, v=value: self._on_font_mode_change(v) if checked else None)
+ mode_layout.addWidget(rb)
+ self.font_size_mode_group.addButton(rb)
+
+ # Controls frame (changes based on mode)
+ self.font_controls_frame = QWidget()
+ self.font_controls_layout = QVBoxLayout(self.font_controls_frame)
+ self.font_controls_layout.setContentsMargins(20, 5, 0, 5)
+ parent_layout.addWidget(self.font_controls_frame)
+
+ # Fixed size controls
+ self.fixed_size_frame = QWidget()
+ fixed_layout = QHBoxLayout(self.fixed_size_frame)
+ fixed_layout.setContentsMargins(0, 0, 0, 0)
+ fixed_layout.addWidget(QLabel("Size:"))
+
+ self.fixed_font_size_spin = QSpinBox()
+ self.fixed_font_size_spin.setRange(8, 72)
+ self.fixed_font_size_spin.setValue(16)
+ self.fixed_font_size_spin.valueChanged.connect(self._save_rendering_settings)
+ fixed_layout.addWidget(self.fixed_font_size_spin)
+
+ # Quick presets for fixed size
+ fixed_layout.addWidget(QLabel("Presets:"))
+
+ presets = [
+ ("Small", 12),
+ ("Medium", 16),
+ ("Large", 20),
+ ("XL", 24)
+ ]
+
+ for text, size in presets:
+ btn = QPushButton(text)
+ btn.setMaximumWidth(60)
+ btn.clicked.connect(lambda checked, s=size: self._set_fixed_size(s))
+ fixed_layout.addWidget(btn)
+
+ fixed_layout.addStretch()
+
+ # Scale controls
+ self.scale_frame = QWidget()
+ scale_layout = QHBoxLayout(self.scale_frame)
+ scale_layout.setContentsMargins(0, 0, 0, 0)
+ scale_layout.addWidget(QLabel("Scale:"))
+
+ # QSlider uses integers, so we'll use 50-200 to represent 0.5-2.0
+ self.font_scale_slider = QSlider(Qt.Horizontal)
+ self.font_scale_slider.setRange(50, 200)
+ self.font_scale_slider.setValue(100)
+ self.font_scale_slider.setMinimumWidth(200)
+ self.font_scale_slider.valueChanged.connect(self._update_scale_label)
+ scale_layout.addWidget(self.font_scale_slider)
+
+ self.scale_label = QLabel("100%")
+ self.scale_label.setMinimumWidth(50)
+ scale_layout.addWidget(self.scale_label)
+
+ # Quick scale presets
+ scale_layout.addWidget(QLabel("Quick:"))
+
+ scale_presets = [
+ ("75%", 0.75),
+ ("100%", 1.0),
+ ("125%", 1.25),
+ ("150%", 1.5)
+ ]
+
+ for text, scale in scale_presets:
+ btn = QPushButton(text)
+ btn.setMaximumWidth(50)
+ btn.clicked.connect(lambda checked, s=scale: self._set_scale(s))
+ scale_layout.addWidget(btn)
+
+ scale_layout.addStretch()
+
+ # Auto size settings
+ self.auto_frame = QWidget()
+ auto_layout = QVBoxLayout(self.auto_frame)
+ auto_layout.setContentsMargins(0, 0, 0, 0)
+
+ # Min/Max size constraints for auto mode
+ constraints_frame = QWidget()
+ constraints_layout = QHBoxLayout(constraints_frame)
+ constraints_layout.setContentsMargins(0, 0, 0, 0)
+ auto_layout.addWidget(constraints_frame)
+
+ constraints_layout.addWidget(QLabel("Size Range:"))
+
+ constraints_layout.addWidget(QLabel("Min:"))
+ self.min_font_size_spin = QSpinBox()
+ self.min_font_size_spin.setRange(6, 20)
+ self.min_font_size_spin.setValue(10)
+ self.min_font_size_spin.valueChanged.connect(self._save_rendering_settings)
+ constraints_layout.addWidget(self.min_font_size_spin)
+
+ constraints_layout.addWidget(QLabel("Max:"))
+ self.max_font_size_spin = QSpinBox()
+ self.max_font_size_spin.setRange(16, 48)
+ self.max_font_size_spin.setValue(28)
+ self.max_font_size_spin.valueChanged.connect(self._save_rendering_settings)
+ constraints_layout.addWidget(self.max_font_size_spin)
+
+ constraints_layout.addStretch()
+
+ # Auto fit quality
+ quality_frame = QWidget()
+ quality_layout = QHBoxLayout(quality_frame)
+ quality_layout.setContentsMargins(0, 0, 0, 0)
+ auto_layout.addWidget(quality_frame)
+
+ quality_layout.addWidget(QLabel("Fit Style:"))
+
+ self.auto_fit_style_group = QButtonGroup()
+ self.auto_fit_style = 'balanced' # Store as string attribute
+
+ fit_styles = [
+ ("Compact", "compact", "Fit more text, smaller size"),
+ ("Balanced", "balanced", "Balance readability and fit"),
+ ("Readable", "readable", "Prefer larger, more readable text")
+ ]
+
+ for text, value, tooltip in fit_styles:
+ rb = QRadioButton(text)
+ rb.setChecked(value == 'balanced')
+ rb.setToolTip(tooltip)
+ rb.toggled.connect(lambda checked, v=value: self._on_fit_style_change(v) if checked else None)
+ quality_layout.addWidget(rb)
+ self.auto_fit_style_group.addButton(rb)
+
+ quality_layout.addStretch()
+
+ # Initialize the correct frame
+ self._on_font_mode_change('auto')
+
+ def _on_font_mode_change(self, mode):
+ """Show/hide appropriate font controls based on mode"""
+ # Update the stored mode
+ self.font_size_mode = mode
+
+ # Remove all frames from layout
+ self.font_controls_layout.removeWidget(self.fixed_size_frame)
+ self.font_controls_layout.removeWidget(self.scale_frame)
+ self.font_controls_layout.removeWidget(self.auto_frame)
+ self.fixed_size_frame.hide()
+ self.scale_frame.hide()
+ self.auto_frame.hide()
+
+ # Show the appropriate frame
+ if mode == 'fixed':
+ self.font_controls_layout.addWidget(self.fixed_size_frame)
+ self.fixed_size_frame.show()
+ elif mode == 'scale':
+ self.font_controls_layout.addWidget(self.scale_frame)
+ self.scale_frame.show()
+ else: # auto
+ self.font_controls_layout.addWidget(self.auto_frame)
+ self.auto_frame.show()
+
+ self._save_rendering_settings()
+
+ def _set_fixed_size(self, size):
+ """Set fixed font size from preset"""
+ self.fixed_font_size_spin.setValue(size)
+ self._save_rendering_settings()
+
+ def _set_scale(self, scale):
+ """Set font scale from preset"""
+ # Scale is 0.5-2.0, slider uses 50-200
+ self.font_scale_slider.setValue(int(scale * 100))
+ self._update_scale_label()
+ self._save_rendering_settings()
+
+ def _update_scale_label(self):
+ """Update the scale percentage label"""
+ # Get value from slider (50-200) and convert to percentage
+ scale_value = self.font_scale_slider.value()
+ self.scale_label.setText(f"{scale_value}%")
+ self._save_rendering_settings()
+
+ def _on_fit_style_change(self, style):
+ """Handle fit style change"""
+ self.auto_fit_style = style
+ self._save_rendering_settings()
+
+ def _create_tooltip(self, widget, text):
+ """Create a tooltip for a widget - PySide6 version"""
+ # In PySide6, tooltips are much simpler - just set the toolTip property
+ widget.setToolTip(text)
+
+ def _merge_settings(self, existing: Dict) -> Dict:
+ """Merge existing settings with defaults"""
+ result = self.default_settings.copy()
+
+ def deep_merge(base: Dict, update: Dict) -> Dict:
+ for key, value in update.items():
+ if key in base and isinstance(base[key], dict) and isinstance(value, dict):
+ base[key] = deep_merge(base[key], value)
+ else:
+ base[key] = value
+ return base
+
+ return deep_merge(result, existing)
+
+ def show_dialog(self):
+ """Display the settings dialog using PySide6"""
+ # Set initialization flag to prevent auto-saves during setup
+ self._initializing = True
+
+ # Set dialog properties
+ self.setWindowTitle("Manga Translation Settings")
+ # Dialog is already non-modal from __init__, don't override it
+
+ # Set the halgakos.ico icon
+ try:
+ icon_path = os.path.join(os.path.dirname(__file__), 'Halgakos.ico')
+ if os.path.exists(icon_path):
+ self.setWindowIcon(QIcon(icon_path))
+ except Exception:
+ pass # Fail silently if icon can't be loaded
+
+ # Apply overall dark theme styling
+ self.setStyleSheet("""
+ QDialog {
+ background-color: #1e1e1e;
+ color: white;
+ font-family: Arial;
+ }
+ QGroupBox {
+ font-family: Arial;
+ font-size: 10pt;
+ font-weight: bold;
+ color: white;
+ border: 1px solid #555;
+ border-radius: 5px;
+ margin-top: 10px;
+ padding-top: 5px;
+ }
+ QGroupBox::title {
+ subcontrol-origin: margin;
+ left: 10px;
+ padding: 0 5px 0 5px;
+ }
+ QLabel {
+ color: white;
+ font-family: Arial;
+ font-size: 9pt;
+ }
+ QLineEdit {
+ background-color: #2d2d2d;
+ color: white;
+ border: 1px solid #555;
+ border-radius: 3px;
+ padding: 3px;
+ font-family: Arial;
+ font-size: 9pt;
+ }
+ QSpinBox, QDoubleSpinBox {
+ background-color: #2d2d2d;
+ color: white;
+ border: 1px solid #555;
+ border-radius: 3px;
+ padding: 3px;
+ font-family: Arial;
+ font-size: 9pt;
+ }
+ QComboBox {
+ background-color: #2d2d2d;
+ color: white;
+ border: 1px solid #555;
+ border-radius: 3px;
+ padding: 3px 5px;
+ padding-right: 25px;
+ font-family: Arial;
+ font-size: 9pt;
+ min-height: 20px;
+ }
+ QComboBox:hover {
+ border: 1px solid #7bb3e0;
+ }
+ QComboBox:focus {
+ border: 1px solid #5a9fd4;
+ }
+ QComboBox::drop-down {
+ subcontrol-origin: padding;
+ subcontrol-position: right center;
+ width: 20px;
+ border-left: 1px solid #555;
+ background-color: #3c3c3c;
+ border-top-right-radius: 3px;
+ border-bottom-right-radius: 3px;
+ }
+ QComboBox::drop-down:hover {
+ background-color: #4a4a4a;
+ }
+ QComboBox::down-arrow {
+ image: none;
+ border-left: 4px solid transparent;
+ border-right: 4px solid transparent;
+ border-top: 5px solid #aaa;
+ width: 0;
+ height: 0;
+ margin-right: 5px;
+ }
+ QComboBox::down-arrow:hover {
+ border-top: 5px solid #fff;
+ }
+ QComboBox QAbstractItemView {
+ background-color: #2d2d2d;
+ color: white;
+ selection-background-color: #5a9fd4;
+ selection-color: white;
+ border: 1px solid #555;
+ outline: none;
+ }
+ QPushButton {
+ font-family: Arial;
+ font-size: 9pt;
+ padding: 5px 15px;
+ border-radius: 3px;
+ border: none;
+ }
+ QSlider::groove:horizontal {
+ border: 1px solid #555;
+ height: 6px;
+ background: #2d2d2d;
+ border-radius: 3px;
+ }
+ QSlider::handle:horizontal {
+ background: #5a9fd4;
+ border: 1px solid #5a9fd4;
+ width: 18px;
+ border-radius: 9px;
+ margin: -6px 0;
+ }
+ QSlider::handle:horizontal:hover {
+ background: #7bb3e0;
+ border: 1px solid #7bb3e0;
+ }
+ QRadioButton {
+ color: white;
+ spacing: 6px;
+ font-family: Arial;
+ font-size: 9pt;
+ }
+ QRadioButton::indicator {
+ width: 16px;
+ height: 16px;
+ border: 2px solid #5a9fd4;
+ border-radius: 8px;
+ background-color: #2d2d2d;
+ }
+ QRadioButton::indicator:checked {
+ background-color: #5a9fd4;
+ border: 2px solid #5a9fd4;
+ }
+ QRadioButton::indicator:hover {
+ border-color: #7bb3e0;
+ }
+ QRadioButton:disabled {
+ color: #666666;
+ }
+ QRadioButton::indicator:disabled {
+ background-color: #1a1a1a;
+ border-color: #3a3a3a;
+ }
+ QCheckBox {
+ color: white;
+ spacing: 6px;
+ }
+ QCheckBox::indicator {
+ width: 14px;
+ height: 14px;
+ border: 1px solid #5a9fd4;
+ border-radius: 2px;
+ background-color: #2d2d2d;
+ }
+ QCheckBox::indicator:checked {
+ background-color: #5a9fd4;
+ border-color: #5a9fd4;
+ }
+ QCheckBox::indicator:hover {
+ border-color: #7bb3e0;
+ }
+ QCheckBox:disabled {
+ color: #666666;
+ }
+ QCheckBox::indicator:disabled {
+ background-color: #1a1a1a;
+ border-color: #3a3a3a;
+ }
+ QLineEdit:disabled, QComboBox:disabled, QSpinBox:disabled, QDoubleSpinBox:disabled {
+ background-color: #1a1a1a;
+ color: #666666;
+ border: 1px solid #3a3a3a;
+ }
+ QLabel:disabled {
+ color: #666666;
+ }
+ QScrollArea {
+ background-color: #1e1e1e;
+ border: none;
+ }
+ QWidget {
+ background-color: #1e1e1e;
+ color: white;
+ }
+ """)
+
+ # Calculate size based on screen dimensions
+ # Use availableGeometry to exclude taskbar and other system UI
+ app = QApplication.instance()
+ if app:
+ screen = app.primaryScreen().availableGeometry()
+ else:
+ screen = self.parent.screen().availableGeometry() if self.parent else self.screen().availableGeometry()
+
+ dialog_width = min(800, int(screen.width() * 0.5))
+ dialog_height = int(screen.height() * 0.90) # Use 90% of available height for more screen space with safety margin
+ self.resize(dialog_width, dialog_height)
+
+ # Center the dialog within available screen space
+ dialog_x = screen.x() + (screen.width() - dialog_width) // 2
+ dialog_y = screen.y() + (screen.height() - dialog_height) // 2
+ self.move(dialog_x, dialog_y)
+
+ # Create main layout
+ main_layout = QVBoxLayout(self)
+ main_layout.setContentsMargins(10, 10, 10, 10)
+ main_layout.setSpacing(10)
+
+ # Create scroll area for the content
+ scroll_area = QScrollArea()
+ scroll_area.setWidgetResizable(True)
+ scroll_area.setHorizontalScrollBarPolicy(Qt.ScrollBarAsNeeded)
+ scroll_area.setVerticalScrollBarPolicy(Qt.ScrollBarAsNeeded)
+
+ # Create content widget that will go inside scroll area
+ content_widget = QWidget()
+ content_layout = QVBoxLayout(content_widget)
+ content_layout.setContentsMargins(5, 5, 5, 5)
+
+ # Create tab widget with enhanced styling
+ self.tab_widget = QTabWidget()
+ self.tab_widget.setStyleSheet("""
+ QTabWidget::pane {
+ border: 1px solid #555;
+ background-color: #2b2b2b;
+ }
+ QTabBar::tab {
+ background-color: #3c3c3c;
+ color: #cccccc;
+ border: 1px solid #555;
+ border-bottom: none;
+ padding: 8px 16px;
+ margin-right: 2px;
+ font-family: Arial;
+ font-size: 10pt;
+ font-weight: bold;
+ }
+ QTabBar::tab:selected {
+ background-color: #5a9fd4;
+ color: white;
+ border-color: #7bb3e0;
+ margin-bottom: -1px;
+ }
+ QTabBar::tab:hover:!selected {
+ background-color: #4a4a4a;
+ color: white;
+ border-color: #7bb3e0;
+ }
+ QTabBar::tab:first {
+ margin-left: 0;
+ }
+ """)
+ content_layout.addWidget(self.tab_widget)
+
+ # Create all tabs
+ self._create_preprocessing_tab()
+ self._create_ocr_tab()
+ self._create_inpainting_tab()
+ self._create_advanced_tab()
+ self._create_cloud_api_tab()
+ # NOTE: Font Sizing tab removed; controls are now in Manga Integration UI
+
+ # Set content widget in scroll area
+ scroll_area.setWidget(content_widget)
+ main_layout.addWidget(scroll_area)
+
+ # Create button frame at bottom
+ button_frame = QWidget()
+ button_layout = QHBoxLayout(button_frame)
+ button_layout.setContentsMargins(0, 5, 0, 0)
+
+ # Reset button on left
+ reset_button = QPushButton("Reset to Defaults")
+ reset_button.clicked.connect(self._reset_defaults)
+ reset_button.setMinimumWidth(120)
+ reset_button.setMinimumHeight(32)
+ reset_button.setStyleSheet("""
+ QPushButton {
+ background-color: #ffc107;
+ color: #1a1a1a;
+ font-weight: bold;
+ font-size: 10pt;
+ border: none;
+ border-radius: 4px;
+ padding: 6px 16px;
+ }
+ QPushButton:hover {
+ background-color: #ffcd38;
+ }
+ QPushButton:pressed {
+ background-color: #e0a800;
+ }
+ """)
+ button_layout.addWidget(reset_button)
+
+ button_layout.addStretch() # Push other buttons to the right
+
+ # Cancel and Save buttons on right
+ cancel_button = QPushButton("Cancel")
+ cancel_button.clicked.connect(self._cancel)
+ cancel_button.setMinimumWidth(90)
+ cancel_button.setMinimumHeight(32)
+ cancel_button.setStyleSheet("""
+ QPushButton {
+ background-color: #6c757d;
+ color: white;
+ font-weight: bold;
+ font-size: 10pt;
+ border: none;
+ border-radius: 4px;
+ padding: 6px 16px;
+ }
+ QPushButton:hover {
+ background-color: #7d8a96;
+ }
+ QPushButton:pressed {
+ background-color: #5a6268;
+ }
+ """)
+ button_layout.addWidget(cancel_button)
+
+ save_button = QPushButton("Save")
+ save_button.clicked.connect(self._save_settings)
+ save_button.setDefault(True) # Make it the default button
+ save_button.setMinimumWidth(90)
+ save_button.setMinimumHeight(32)
+ save_button.setStyleSheet("""
+ QPushButton {
+ background-color: #28a745;
+ color: white;
+ font-weight: bold;
+ font-size: 10pt;
+ border: none;
+ border-radius: 4px;
+ padding: 6px 16px;
+ }
+ QPushButton:hover {
+ background-color: #34c759;
+ }
+ QPushButton:pressed {
+ background-color: #218838;
+ }
+ """)
+ button_layout.addWidget(save_button)
+
+ main_layout.addWidget(button_frame)
+
+ # Clear initialization flag after setup is complete
+ self._initializing = False
+
+ # Initialize preprocessing state
+ self._toggle_preprocessing()
+
+ # Initialize tiling controls state (must be after widgets are created)
+ try:
+ self._toggle_tiling_controls()
+ except Exception as e:
+ print(f"Warning: Failed to initialize tiling controls: {e}")
+
+ # Initialize iteration controls state
+ try:
+ self._toggle_iteration_controls()
+ except Exception:
+ pass
+
+ # Disable mouse wheel scrolling on all spinboxes and comboboxes
+ self._disable_all_spinbox_scrolling(self)
+
+ # Show the dialog
+ self.show()
+
+ def _create_preprocessing_tab(self):
+ """Create preprocessing settings tab with all options"""
+ # Create tab widget and add to tab widget
+ tab_widget = QWidget()
+ self.tab_widget.addTab(tab_widget, "Preprocessing")
+
+ # Main scrollable content
+ main_layout = QVBoxLayout(tab_widget)
+ main_layout.setContentsMargins(5, 5, 5, 5)
+ main_layout.setSpacing(6)
+
+ # Enable preprocessing group
+ enable_group = QGroupBox("Image Preprocessing")
+ main_layout.addWidget(enable_group)
+ enable_layout = QVBoxLayout(enable_group)
+ enable_layout.setContentsMargins(8, 8, 8, 6)
+ enable_layout.setSpacing(4)
+
+ self.preprocess_enabled = self._create_styled_checkbox("Enable Image Preprocessing")
+ self.preprocess_enabled.setChecked(self.settings['preprocessing']['enabled'])
+ self.preprocess_enabled.toggled.connect(self._toggle_preprocessing)
+ enable_layout.addWidget(self.preprocess_enabled)
+
+ # Store all preprocessing controls for enable/disable
+ self.preprocessing_controls = []
+
+ # Auto quality detection
+ self.auto_detect = self._create_styled_checkbox("Auto-detect image quality issues")
+ self.auto_detect.setChecked(self.settings['preprocessing']['auto_detect_quality'])
+ enable_layout.addWidget(self.auto_detect)
+ self.preprocessing_controls.append(self.auto_detect)
+
+ # Quality thresholds section
+ threshold_group = QGroupBox("Image Enhancement")
+ main_layout.addWidget(threshold_group)
+ threshold_layout = QVBoxLayout(threshold_group)
+ threshold_layout.setContentsMargins(8, 8, 8, 6)
+ threshold_layout.setSpacing(4)
+ self.preprocessing_controls.append(threshold_group)
+
+ # Contrast threshold
+ contrast_frame = QWidget()
+ contrast_layout = QHBoxLayout(contrast_frame)
+ contrast_layout.setContentsMargins(0, 0, 0, 0)
+ threshold_layout.addWidget(contrast_frame)
+
+ contrast_label = QLabel("Contrast Adjustment:")
+ contrast_label.setMinimumWidth(150)
+ contrast_layout.addWidget(contrast_label)
+ self.preprocessing_controls.append(contrast_label)
+
+ self.contrast_threshold = QDoubleSpinBox()
+ self.contrast_threshold.setRange(0.0, 1.0)
+ self.contrast_threshold.setSingleStep(0.01)
+ self.contrast_threshold.setDecimals(2)
+ self.contrast_threshold.setValue(self.settings['preprocessing']['contrast_threshold'])
+ contrast_layout.addWidget(self.contrast_threshold)
+ self.preprocessing_controls.append(self.contrast_threshold)
+ contrast_layout.addStretch()
+
+ # Sharpness threshold
+ sharpness_frame = QWidget()
+ sharpness_layout = QHBoxLayout(sharpness_frame)
+ sharpness_layout.setContentsMargins(0, 0, 0, 0)
+ threshold_layout.addWidget(sharpness_frame)
+
+ sharpness_label = QLabel("Sharpness Enhancement:")
+ sharpness_label.setMinimumWidth(150)
+ sharpness_layout.addWidget(sharpness_label)
+ self.preprocessing_controls.append(sharpness_label)
+
+ self.sharpness_threshold = QDoubleSpinBox()
+ self.sharpness_threshold.setRange(0.0, 1.0)
+ self.sharpness_threshold.setSingleStep(0.01)
+ self.sharpness_threshold.setDecimals(2)
+ self.sharpness_threshold.setValue(self.settings['preprocessing']['sharpness_threshold'])
+ sharpness_layout.addWidget(self.sharpness_threshold)
+ self.preprocessing_controls.append(self.sharpness_threshold)
+ sharpness_layout.addStretch()
+
+ # Enhancement strength
+ enhance_frame = QWidget()
+ enhance_layout = QHBoxLayout(enhance_frame)
+ enhance_layout.setContentsMargins(0, 0, 0, 0)
+ threshold_layout.addWidget(enhance_frame)
+
+ enhance_label = QLabel("Overall Enhancement:")
+ enhance_label.setMinimumWidth(150)
+ enhance_layout.addWidget(enhance_label)
+ self.preprocessing_controls.append(enhance_label)
+
+ self.enhancement_strength = QDoubleSpinBox()
+ self.enhancement_strength.setRange(0.0, 3.0)
+ self.enhancement_strength.setSingleStep(0.01)
+ self.enhancement_strength.setDecimals(2)
+ self.enhancement_strength.setValue(self.settings['preprocessing']['enhancement_strength'])
+ enhance_layout.addWidget(self.enhancement_strength)
+ self.preprocessing_controls.append(self.enhancement_strength)
+ enhance_layout.addStretch()
+
+ # Noise reduction section
+ noise_group = QGroupBox("Noise Reduction")
+ main_layout.addWidget(noise_group)
+ noise_layout = QVBoxLayout(noise_group)
+ noise_layout.setContentsMargins(8, 8, 8, 6)
+ noise_layout.setSpacing(4)
+ self.preprocessing_controls.append(noise_group)
+
+ # Noise threshold
+ noise_threshold_frame = QWidget()
+ noise_threshold_layout = QHBoxLayout(noise_threshold_frame)
+ noise_threshold_layout.setContentsMargins(0, 0, 0, 0)
+ noise_layout.addWidget(noise_threshold_frame)
+
+ noise_label = QLabel("Noise Threshold:")
+ noise_label.setMinimumWidth(150)
+ noise_threshold_layout.addWidget(noise_label)
+ self.preprocessing_controls.append(noise_label)
+
+ self.noise_threshold = QSpinBox()
+ self.noise_threshold.setRange(0, 50)
+ self.noise_threshold.setValue(self.settings['preprocessing']['noise_threshold'])
+ noise_threshold_layout.addWidget(self.noise_threshold)
+ self.preprocessing_controls.append(self.noise_threshold)
+ noise_threshold_layout.addStretch()
+
+ # Denoise strength
+ denoise_frame = QWidget()
+ denoise_layout = QHBoxLayout(denoise_frame)
+ denoise_layout.setContentsMargins(0, 0, 0, 0)
+ noise_layout.addWidget(denoise_frame)
+
+ denoise_label = QLabel("Denoise Strength:")
+ denoise_label.setMinimumWidth(150)
+ denoise_layout.addWidget(denoise_label)
+ self.preprocessing_controls.append(denoise_label)
+
+ self.denoise_strength = QSpinBox()
+ self.denoise_strength.setRange(0, 30)
+ self.denoise_strength.setValue(self.settings['preprocessing']['denoise_strength'])
+ denoise_layout.addWidget(self.denoise_strength)
+ self.preprocessing_controls.append(self.denoise_strength)
+ denoise_layout.addStretch()
+
+ # Size limits section
+ size_group = QGroupBox("Image Size Limits")
+ main_layout.addWidget(size_group)
+ size_layout = QVBoxLayout(size_group)
+ size_layout.setContentsMargins(8, 8, 8, 6)
+ size_layout.setSpacing(4)
+ self.preprocessing_controls.append(size_group)
+
+ # Max dimension
+ dimension_frame = QWidget()
+ dimension_layout = QHBoxLayout(dimension_frame)
+ dimension_layout.setContentsMargins(0, 0, 0, 0)
+ size_layout.addWidget(dimension_frame)
+
+ dimension_label = QLabel("Max Dimension:")
+ dimension_label.setMinimumWidth(150)
+ dimension_layout.addWidget(dimension_label)
+ self.preprocessing_controls.append(dimension_label)
+
+ self.dimension_spinbox = QSpinBox()
+ self.dimension_spinbox.setRange(500, 4000)
+ self.dimension_spinbox.setSingleStep(100)
+ self.dimension_spinbox.setValue(self.settings['preprocessing']['max_image_dimension'])
+ dimension_layout.addWidget(self.dimension_spinbox)
+ self.preprocessing_controls.append(self.dimension_spinbox)
+
+ dimension_layout.addWidget(QLabel("pixels"))
+ dimension_layout.addStretch()
+
+ # Max pixels
+ pixels_frame = QWidget()
+ pixels_layout = QHBoxLayout(pixels_frame)
+ pixels_layout.setContentsMargins(0, 0, 0, 0)
+ size_layout.addWidget(pixels_frame)
+
+ pixels_label = QLabel("Max Total Pixels:")
+ pixels_label.setMinimumWidth(150)
+ pixels_layout.addWidget(pixels_label)
+ self.preprocessing_controls.append(pixels_label)
+
+ self.pixels_spinbox = QSpinBox()
+ self.pixels_spinbox.setRange(1000000, 10000000)
+ self.pixels_spinbox.setSingleStep(100000)
+ self.pixels_spinbox.setValue(self.settings['preprocessing']['max_image_pixels'])
+ pixels_layout.addWidget(self.pixels_spinbox)
+ self.preprocessing_controls.append(self.pixels_spinbox)
+
+ pixels_layout.addWidget(QLabel("pixels"))
+ pixels_layout.addStretch()
+
+ # Compression section
+ compression_group = QGroupBox("Image Compression (applies to OCR uploads)")
+ main_layout.addWidget(compression_group)
+ compression_layout = QVBoxLayout(compression_group)
+ compression_layout.setContentsMargins(8, 8, 8, 6)
+ compression_layout.setSpacing(4)
+ # Do NOT add compression controls to preprocessing_controls; keep independent of preprocessing toggle
+
+ # Enable compression toggle
+ self.compression_enabled = self._create_styled_checkbox("Enable compression for OCR uploads")
+ self.compression_enabled.setChecked(self.settings.get('compression', {}).get('enabled', False))
+ self.compression_enabled.toggled.connect(self._toggle_compression_enabled)
+ compression_layout.addWidget(self.compression_enabled)
+
+ # Format selection
+ format_frame = QWidget()
+ format_layout = QHBoxLayout(format_frame)
+ format_layout.setContentsMargins(0, 0, 0, 0)
+ compression_layout.addWidget(format_frame)
+
+ self.format_label = QLabel("Format:")
+ self.format_label.setMinimumWidth(150)
+ format_layout.addWidget(self.format_label)
+
+ self.compression_format_combo = QComboBox()
+ self.compression_format_combo.addItems(['jpeg', 'png', 'webp'])
+ self.compression_format_combo.setCurrentText(self.settings.get('compression', {}).get('format', 'jpeg'))
+ self.compression_format_combo.currentTextChanged.connect(self._toggle_compression_format)
+ format_layout.addWidget(self.compression_format_combo)
+ format_layout.addStretch()
+
+ # JPEG quality
+ self.jpeg_frame = QWidget()
+ jpeg_layout = QHBoxLayout(self.jpeg_frame)
+ jpeg_layout.setContentsMargins(0, 0, 0, 0)
+ compression_layout.addWidget(self.jpeg_frame)
+
+ self.jpeg_label = QLabel("JPEG Quality:")
+ self.jpeg_label.setMinimumWidth(150)
+ jpeg_layout.addWidget(self.jpeg_label)
+
+ self.jpeg_quality_spin = QSpinBox()
+ self.jpeg_quality_spin.setRange(1, 95)
+ self.jpeg_quality_spin.setValue(self.settings.get('compression', {}).get('jpeg_quality', 85))
+ jpeg_layout.addWidget(self.jpeg_quality_spin)
+
+ self.jpeg_help = QLabel("(higher = better quality, larger size)")
+ self.jpeg_help.setStyleSheet("color: gray; font-size: 9pt;")
+ jpeg_layout.addWidget(self.jpeg_help)
+ jpeg_layout.addStretch()
+
+ # PNG compression level
+ self.png_frame = QWidget()
+ png_layout = QHBoxLayout(self.png_frame)
+ png_layout.setContentsMargins(0, 0, 0, 0)
+ compression_layout.addWidget(self.png_frame)
+
+ self.png_label = QLabel("PNG Compression:")
+ self.png_label.setMinimumWidth(150)
+ png_layout.addWidget(self.png_label)
+
+ self.png_level_spin = QSpinBox()
+ self.png_level_spin.setRange(0, 9)
+ self.png_level_spin.setValue(self.settings.get('compression', {}).get('png_compress_level', 6))
+ png_layout.addWidget(self.png_level_spin)
+
+ self.png_help = QLabel("(0 = fastest, 9 = smallest)")
+ self.png_help.setStyleSheet("color: gray; font-size: 9pt;")
+ png_layout.addWidget(self.png_help)
+ png_layout.addStretch()
+
+ # WEBP quality
+ self.webp_frame = QWidget()
+ webp_layout = QHBoxLayout(self.webp_frame)
+ webp_layout.setContentsMargins(0, 0, 0, 0)
+ compression_layout.addWidget(self.webp_frame)
+
+ self.webp_label = QLabel("WEBP Quality:")
+ self.webp_label.setMinimumWidth(150)
+ webp_layout.addWidget(self.webp_label)
+
+ self.webp_quality_spin = QSpinBox()
+ self.webp_quality_spin.setRange(1, 100)
+ self.webp_quality_spin.setValue(self.settings.get('compression', {}).get('webp_quality', 85))
+ webp_layout.addWidget(self.webp_quality_spin)
+
+ self.webp_help = QLabel("(higher = better quality, larger size)")
+ self.webp_help.setStyleSheet("color: gray; font-size: 9pt;")
+ webp_layout.addWidget(self.webp_help)
+ webp_layout.addStretch()
+
+ # Initialize format-specific visibility and enabled state
+ self._toggle_compression_format()
+ self._toggle_compression_enabled()
+
+ # HD Strategy (Inpainting acceleration) - Independent of preprocessing toggle
+ hd_group = QGroupBox("Inpainting HD Strategy")
+ main_layout.addWidget(hd_group)
+ hd_layout = QVBoxLayout(hd_group)
+ hd_layout.setContentsMargins(8, 8, 8, 6)
+ hd_layout.setSpacing(4)
+ # Do NOT add to preprocessing_controls - HD Strategy should be independent
+
+ # Chunk settings for large images - Independent of preprocessing toggle
+ chunk_group = QGroupBox("Large Image Processing")
+ main_layout.addWidget(chunk_group)
+ chunk_layout = QVBoxLayout(chunk_group)
+ chunk_layout.setContentsMargins(8, 8, 8, 6)
+ chunk_layout.setSpacing(4)
+ # Do NOT add to preprocessing_controls - Large Image Processing should be independent
+
+ # Strategy selector
+ strat_frame = QWidget()
+ strat_layout = QHBoxLayout(strat_frame)
+ strat_layout.setContentsMargins(0, 0, 0, 0)
+ hd_layout.addWidget(strat_frame)
+
+ strat_label = QLabel("Strategy:")
+ strat_label.setMinimumWidth(150)
+ strat_layout.addWidget(strat_label)
+
+ self.hd_strategy_combo = QComboBox()
+ self.hd_strategy_combo.addItems(['original', 'resize', 'crop'])
+ self.hd_strategy_combo.setCurrentText(self.settings.get('advanced', {}).get('hd_strategy', 'resize'))
+ self.hd_strategy_combo.currentTextChanged.connect(self._on_hd_strategy_change)
+ strat_layout.addWidget(self.hd_strategy_combo)
+
+ strat_help = QLabel("(original = legacy full-image; resize/crop = faster)")
+ strat_help.setStyleSheet("color: gray; font-size: 9pt;")
+ strat_layout.addWidget(strat_help)
+ strat_layout.addStretch()
+
+ # Resize limit row
+ self.hd_resize_frame = QWidget()
+ resize_layout = QHBoxLayout(self.hd_resize_frame)
+ resize_layout.setContentsMargins(0, 0, 0, 0)
+ hd_layout.addWidget(self.hd_resize_frame)
+
+ resize_label = QLabel("Resize limit (long edge):")
+ resize_label.setMinimumWidth(150)
+ resize_layout.addWidget(resize_label)
+
+ self.hd_resize_limit_spin = QSpinBox()
+ self.hd_resize_limit_spin.setRange(512, 4096)
+ self.hd_resize_limit_spin.setSingleStep(64)
+ self.hd_resize_limit_spin.setValue(int(self.settings.get('advanced', {}).get('hd_strategy_resize_limit', 1536)))
+ resize_layout.addWidget(self.hd_resize_limit_spin)
+
+ resize_layout.addWidget(QLabel("px"))
+ resize_layout.addStretch()
+
+ # Crop params rows
+ self.hd_crop_margin_frame = QWidget()
+ margin_layout = QHBoxLayout(self.hd_crop_margin_frame)
+ margin_layout.setContentsMargins(0, 0, 0, 0)
+ hd_layout.addWidget(self.hd_crop_margin_frame)
+
+ margin_label = QLabel("Crop margin:")
+ margin_label.setMinimumWidth(150)
+ margin_layout.addWidget(margin_label)
+
+ self.hd_crop_margin_spin = QSpinBox()
+ self.hd_crop_margin_spin.setRange(0, 256)
+ self.hd_crop_margin_spin.setSingleStep(2)
+ self.hd_crop_margin_spin.setValue(int(self.settings.get('advanced', {}).get('hd_strategy_crop_margin', 16)))
+ margin_layout.addWidget(self.hd_crop_margin_spin)
+
+ margin_layout.addWidget(QLabel("px"))
+ margin_layout.addStretch()
+
+ self.hd_crop_trigger_frame = QWidget()
+ trigger_layout = QHBoxLayout(self.hd_crop_trigger_frame)
+ trigger_layout.setContentsMargins(0, 0, 0, 0)
+ hd_layout.addWidget(self.hd_crop_trigger_frame)
+
+ trigger_label = QLabel("Crop trigger size:")
+ trigger_label.setMinimumWidth(150)
+ trigger_layout.addWidget(trigger_label)
+
+ self.hd_crop_trigger_spin = QSpinBox()
+ self.hd_crop_trigger_spin.setRange(256, 4096)
+ self.hd_crop_trigger_spin.setSingleStep(64)
+ self.hd_crop_trigger_spin.setValue(int(self.settings.get('advanced', {}).get('hd_strategy_crop_trigger_size', 1024)))
+ trigger_layout.addWidget(self.hd_crop_trigger_spin)
+
+ trigger_help = QLabel("px (apply crop only if long edge > trigger)")
+ trigger_layout.addWidget(trigger_help)
+ trigger_layout.addStretch()
+
+ # Initialize strategy-specific visibility
+ self._on_hd_strategy_change()
+
+ # Clarifying note about precedence with tiling
+ note_label = QLabel(
+ "Note: HD Strategy (resize/crop) takes precedence over Inpainting Tiling when it triggers.\n"
+ "Set strategy to 'original' if you want tiling to control large-image behavior."
+ )
+ note_label.setStyleSheet("color: gray; font-size: 9pt;")
+ note_label.setWordWrap(True)
+ hd_layout.addWidget(note_label)
+
+ # Chunk height
+ chunk_height_frame = QWidget()
+ chunk_height_layout = QHBoxLayout(chunk_height_frame)
+ chunk_height_layout.setContentsMargins(0, 0, 0, 0)
+ chunk_layout.addWidget(chunk_height_frame)
+
+ chunk_height_label = QLabel("Chunk Height:")
+ chunk_height_label.setMinimumWidth(150)
+ chunk_height_layout.addWidget(chunk_height_label)
+ # Do NOT add to preprocessing_controls - chunk settings should be independent
+
+ self.chunk_height_spinbox = QSpinBox()
+ self.chunk_height_spinbox.setRange(500, 2000)
+ self.chunk_height_spinbox.setSingleStep(100)
+ self.chunk_height_spinbox.setValue(self.settings['preprocessing']['chunk_height'])
+ chunk_height_layout.addWidget(self.chunk_height_spinbox)
+ # Do NOT add to preprocessing_controls - chunk settings should be independent
+
+ chunk_height_layout.addWidget(QLabel("pixels"))
+ chunk_height_layout.addStretch()
+
+ # Chunk overlap
+ chunk_overlap_frame = QWidget()
+ chunk_overlap_layout = QHBoxLayout(chunk_overlap_frame)
+ chunk_overlap_layout.setContentsMargins(0, 0, 0, 0)
+ chunk_layout.addWidget(chunk_overlap_frame)
+
+ chunk_overlap_label = QLabel("Chunk Overlap:")
+ chunk_overlap_label.setMinimumWidth(150)
+ chunk_overlap_layout.addWidget(chunk_overlap_label)
+ # Do NOT add to preprocessing_controls - chunk settings should be independent
+
+ self.chunk_overlap_spinbox = QSpinBox()
+ self.chunk_overlap_spinbox.setRange(0, 200)
+ self.chunk_overlap_spinbox.setSingleStep(10)
+ self.chunk_overlap_spinbox.setValue(self.settings['preprocessing']['chunk_overlap'])
+ chunk_overlap_layout.addWidget(self.chunk_overlap_spinbox)
+ # Do NOT add to preprocessing_controls - chunk settings should be independent
+
+ chunk_overlap_layout.addWidget(QLabel("pixels"))
+ chunk_overlap_layout.addStretch()
+
+ # Inpainting Tiling section
+ tiling_group = QGroupBox("Inpainting Tiling")
+ main_layout.addWidget(tiling_group)
+ tiling_layout = QVBoxLayout(tiling_group)
+ tiling_layout.setContentsMargins(8, 8, 8, 6)
+ tiling_layout.setSpacing(4)
+ # Do NOT add to preprocessing_controls - tiling should be independent
+
+ # Enable tiling
+ # Prefer values from legacy 'tiling' section if present, otherwise use 'preprocessing'
+ tiling_enabled_value = self.settings['preprocessing'].get('inpaint_tiling_enabled', False)
+ if 'tiling' in self.settings and isinstance(self.settings['tiling'], dict) and 'enabled' in self.settings['tiling']:
+ tiling_enabled_value = self.settings['tiling']['enabled']
+
+ self.inpaint_tiling_enabled = self._create_styled_checkbox("Enable automatic tiling for inpainting (processes large images in tiles)")
+ self.inpaint_tiling_enabled.setChecked(tiling_enabled_value)
+ self.inpaint_tiling_enabled.toggled.connect(self._toggle_tiling_controls)
+ tiling_layout.addWidget(self.inpaint_tiling_enabled)
+
+ # Tile size
+ tile_size_frame = QWidget()
+ tile_size_layout = QHBoxLayout(tile_size_frame)
+ tile_size_layout.setContentsMargins(0, 0, 0, 0)
+ tiling_layout.addWidget(tile_size_frame)
+
+ self.tile_size_label = QLabel("Tile Size:")
+ self.tile_size_label.setMinimumWidth(150)
+ tile_size_layout.addWidget(self.tile_size_label)
+
+ tile_size_value = self.settings['preprocessing'].get('inpaint_tile_size', 512)
+ if 'tiling' in self.settings and isinstance(self.settings['tiling'], dict) and 'tile_size' in self.settings['tiling']:
+ tile_size_value = self.settings['tiling']['tile_size']
+
+ self.tile_size_spinbox = QSpinBox()
+ self.tile_size_spinbox.setRange(256, 2048)
+ self.tile_size_spinbox.setSingleStep(128)
+ self.tile_size_spinbox.setValue(tile_size_value)
+ tile_size_layout.addWidget(self.tile_size_spinbox)
+
+ self.tile_size_unit_label = QLabel("pixels")
+ tile_size_layout.addWidget(self.tile_size_unit_label)
+ tile_size_layout.addStretch()
+
+ # Tile overlap
+ tile_overlap_frame = QWidget()
+ tile_overlap_layout = QHBoxLayout(tile_overlap_frame)
+ tile_overlap_layout.setContentsMargins(0, 0, 0, 0)
+ tiling_layout.addWidget(tile_overlap_frame)
+
+ self.tile_overlap_label = QLabel("Tile Overlap:")
+ self.tile_overlap_label.setMinimumWidth(150)
+ tile_overlap_layout.addWidget(self.tile_overlap_label)
+
+ tile_overlap_value = self.settings['preprocessing'].get('inpaint_tile_overlap', 64)
+ if 'tiling' in self.settings and isinstance(self.settings['tiling'], dict) and 'tile_overlap' in self.settings['tiling']:
+ tile_overlap_value = self.settings['tiling']['tile_overlap']
+
+ self.tile_overlap_spinbox = QSpinBox()
+ self.tile_overlap_spinbox.setRange(0, 256)
+ self.tile_overlap_spinbox.setSingleStep(16)
+ self.tile_overlap_spinbox.setValue(tile_overlap_value)
+ tile_overlap_layout.addWidget(self.tile_overlap_spinbox)
+
+ self.tile_overlap_unit_label = QLabel("pixels")
+ tile_overlap_layout.addWidget(self.tile_overlap_unit_label)
+ tile_overlap_layout.addStretch()
+
+ # Don't initialize here - will be done after dialog is shown
+
+ # ELIMINATE ALL EMPTY SPACE - Add stretch at the end
+ main_layout.addStretch()
+
+ def _create_inpainting_tab(self):
+ """Create inpainting settings tab with comprehensive per-text-type dilation controls"""
+ # Create tab widget and add to tab widget
+ tab_widget = QWidget()
+ self.tab_widget.addTab(tab_widget, "Inpainting")
+
+ # Main scrollable content
+ main_layout = QVBoxLayout(tab_widget)
+ main_layout.setContentsMargins(5, 5, 5, 5)
+ main_layout.setSpacing(6)
+
+ # General Mask Settings (applies to all inpainting methods)
+ mask_group = QGroupBox("Mask Settings")
+ main_layout.addWidget(mask_group)
+ mask_layout = QVBoxLayout(mask_group)
+ mask_layout.setContentsMargins(8, 8, 8, 6)
+ mask_layout.setSpacing(4)
+
+ # Auto toggle (affects both mask dilation AND iterations)
+ self.auto_iterations_checkbox = self._create_styled_checkbox("Auto Iterations (automatically set values based on OCR provider and B&W vs Color)")
+ self.auto_iterations_checkbox.setChecked(self.settings.get('auto_iterations', True))
+ self.auto_iterations_checkbox.toggled.connect(self._toggle_iteration_controls)
+ self.auto_iterations_checkbox.toggled.connect(self._on_primary_auto_toggle) # Sync with "Use Same For All"
+ mask_layout.addWidget(self.auto_iterations_checkbox)
+
+ # Mask Dilation frame (affected by auto setting)
+ mask_dilation_group = QGroupBox("Mask Dilation")
+ mask_layout.addWidget(mask_dilation_group)
+ mask_dilation_layout = QVBoxLayout(mask_dilation_group)
+ mask_dilation_layout.setContentsMargins(8, 8, 8, 6)
+ mask_dilation_layout.setSpacing(4)
+
+ # Note about dilation importance
+ note_label = QLabel(
+ "Mask dilation is critical for avoiding white spots in final images.\n"
+ "Adjust per text type for optimal results."
+ )
+ note_label.setStyleSheet("color: gray; font-style: italic;")
+ note_label.setWordWrap(True)
+ mask_dilation_layout.addWidget(note_label)
+
+ # Keep all three dilation controls in a list for easy access
+ if not hasattr(self, 'mask_dilation_controls'):
+ self.mask_dilation_controls = []
+
+ # Mask dilation size
+ dilation_frame = QWidget()
+ dilation_layout = QHBoxLayout(dilation_frame)
+ dilation_layout.setContentsMargins(0, 0, 0, 0)
+ mask_dilation_layout.addWidget(dilation_frame)
+
+ self.dilation_label = QLabel("Mask Dilation:")
+ self.dilation_label.setMinimumWidth(150)
+ dilation_layout.addWidget(self.dilation_label)
+
+ self.mask_dilation_spinbox = QSpinBox()
+ self.mask_dilation_spinbox.setRange(0, 50)
+ self.mask_dilation_spinbox.setSingleStep(5)
+ self.mask_dilation_spinbox.setValue(self.settings.get('mask_dilation', 15))
+ dilation_layout.addWidget(self.mask_dilation_spinbox)
+
+ self.dilation_unit_label = QLabel("pixels (expand mask beyond text)")
+ dilation_layout.addWidget(self.dilation_unit_label)
+ dilation_layout.addStretch()
+
+ # Per-Text-Type Iterations - EXPANDED SECTION
+ iterations_group = QGroupBox("Dilation Iterations Control")
+ iterations_layout = QVBoxLayout(iterations_group)
+ mask_dilation_layout.addWidget(iterations_group)
+
+ # All Iterations Master Control (NEW)
+ all_iter_widget = QWidget()
+ all_iter_layout = QHBoxLayout(all_iter_widget)
+ all_iter_layout.setContentsMargins(0, 0, 0, 0)
+ iterations_layout.addWidget(all_iter_widget)
+
+ # Checkbox to enable/disable uniform iterations
+ self.use_all_iterations_checkbox = self._create_styled_checkbox("Use Same For All:")
+ self.use_all_iterations_checkbox.setChecked(self.settings.get('use_all_iterations', True))
+ self.use_all_iterations_checkbox.toggled.connect(self._toggle_iteration_controls)
+ all_iter_layout.addWidget(self.use_all_iterations_checkbox)
+
+ all_iter_layout.addSpacing(10)
+
+ self.all_iterations_spinbox = QSpinBox()
+ self.all_iterations_spinbox.setRange(0, 5)
+ self.all_iterations_spinbox.setValue(self.settings.get('all_iterations', 2))
+ self.all_iterations_spinbox.setEnabled(self.use_all_iterations_checkbox.isChecked())
+ all_iter_layout.addWidget(self.all_iterations_spinbox)
+
+ self.all_iter_label = QLabel("iterations (applies to all text types)")
+ all_iter_layout.addWidget(self.all_iter_label)
+ all_iter_layout.addStretch()
+
+ # Separator
+ separator1 = QFrame()
+ separator1.setFrameShape(QFrame.Shape.HLine)
+ separator1.setFrameShadow(QFrame.Shadow.Sunken)
+ iterations_layout.addWidget(separator1)
+
+ # Individual Controls Label
+ self.individual_controls_header_label = QLabel("Individual Text Type Controls:")
+ individual_label_font = QFont('Arial', 9)
+ individual_label_font.setBold(True)
+ self.individual_controls_header_label.setFont(individual_label_font)
+ iterations_layout.addWidget(self.individual_controls_header_label)
+
+ # Text Bubble iterations (modified from original bubble iterations)
+ text_bubble_iter_widget = QWidget()
+ text_bubble_iter_layout = QHBoxLayout(text_bubble_iter_widget)
+ text_bubble_iter_layout.setContentsMargins(0, 0, 0, 0)
+ iterations_layout.addWidget(text_bubble_iter_widget)
+
+ self.text_bubble_label = QLabel("Text Bubbles:")
+ self.text_bubble_label.setMinimumWidth(120)
+ text_bubble_iter_layout.addWidget(self.text_bubble_label)
+
+ self.text_bubble_iter_spinbox = QSpinBox()
+ self.text_bubble_iter_spinbox.setRange(0, 5)
+ self.text_bubble_iter_spinbox.setValue(self.settings.get('text_bubble_dilation_iterations',
+ self.settings.get('bubble_dilation_iterations', 2)))
+ text_bubble_iter_layout.addWidget(self.text_bubble_iter_spinbox)
+
+ self.text_bubble_desc = QLabel("iterations (speech/dialogue bubbles)")
+ text_bubble_iter_layout.addWidget(self.text_bubble_desc)
+ text_bubble_iter_layout.addStretch()
+
+ # Empty Bubble iterations (NEW)
+ empty_bubble_iter_widget = QWidget()
+ empty_bubble_iter_layout = QHBoxLayout(empty_bubble_iter_widget)
+ empty_bubble_iter_layout.setContentsMargins(0, 0, 0, 0)
+ iterations_layout.addWidget(empty_bubble_iter_widget)
+
+ self.empty_bubble_label = QLabel("Empty Bubbles:")
+ self.empty_bubble_label.setMinimumWidth(120)
+ empty_bubble_iter_layout.addWidget(self.empty_bubble_label)
+
+ self.empty_bubble_iter_spinbox = QSpinBox()
+ self.empty_bubble_iter_spinbox.setRange(0, 5)
+ self.empty_bubble_iter_spinbox.setValue(self.settings.get('empty_bubble_dilation_iterations', 3))
+ empty_bubble_iter_layout.addWidget(self.empty_bubble_iter_spinbox)
+
+ self.empty_bubble_desc = QLabel("iterations (empty speech bubbles)")
+ empty_bubble_iter_layout.addWidget(self.empty_bubble_desc)
+ empty_bubble_iter_layout.addStretch()
+
+ # Free text iterations
+ free_text_iter_widget = QWidget()
+ free_text_iter_layout = QHBoxLayout(free_text_iter_widget)
+ free_text_iter_layout.setContentsMargins(0, 0, 0, 0)
+ iterations_layout.addWidget(free_text_iter_widget)
+
+ self.free_text_label = QLabel("Free Text:")
+ self.free_text_label.setMinimumWidth(120)
+ free_text_iter_layout.addWidget(self.free_text_label)
+
+ self.free_text_iter_spinbox = QSpinBox()
+ self.free_text_iter_spinbox.setRange(0, 5)
+ self.free_text_iter_spinbox.setValue(self.settings.get('free_text_dilation_iterations', 0))
+ free_text_iter_layout.addWidget(self.free_text_iter_spinbox)
+
+ self.free_text_desc = QLabel("iterations (0 = perfect for B&W panels)")
+ free_text_iter_layout.addWidget(self.free_text_desc)
+ free_text_iter_layout.addStretch()
+
+ # Store individual control widgets for enable/disable (includes descriptive labels)
+ self.individual_iteration_controls = [
+ (self.text_bubble_label, self.text_bubble_iter_spinbox, self.text_bubble_desc),
+ (self.empty_bubble_label, self.empty_bubble_iter_spinbox, self.empty_bubble_desc),
+ (self.free_text_label, self.free_text_iter_spinbox, self.free_text_desc)
+ ]
+
+ # Apply initial state
+ self._toggle_iteration_controls()
+
+ # Quick presets - UPDATED VERSION
+ preset_widget = QWidget()
+ preset_layout = QHBoxLayout(preset_widget)
+ preset_layout.setContentsMargins(0, 0, 0, 0)
+ mask_dilation_layout.addWidget(preset_widget)
+
+ preset_label = QLabel("Quick Presets:")
+ preset_layout.addWidget(preset_label)
+ preset_layout.addSpacing(10)
+
+ bw_manga_btn = QPushButton("B&W Manga")
+ bw_manga_btn.setStyleSheet("""
+ QPushButton {
+ background-color: #3a7ca5;
+ color: white;
+ border: none;
+ padding: 6px 12px;
+ border-radius: 4px;
+ font-weight: bold;
+ }
+ QPushButton:hover {
+ background-color: #4a8cb5;
+ }
+ QPushButton:pressed {
+ background-color: #2a6c95;
+ }
+ """)
+ bw_manga_btn.clicked.connect(lambda: self._set_mask_preset(15, False, 2, 2, 3, 0))
+ preset_layout.addWidget(bw_manga_btn)
+
+ colored_btn = QPushButton("Colored")
+ colored_btn.setStyleSheet("""
+ QPushButton {
+ background-color: #3a7ca5;
+ color: white;
+ border: none;
+ padding: 6px 12px;
+ border-radius: 4px;
+ font-weight: bold;
+ }
+ QPushButton:hover {
+ background-color: #4a8cb5;
+ }
+ QPushButton:pressed {
+ background-color: #2a6c95;
+ }
+ """)
+ colored_btn.clicked.connect(lambda: self._set_mask_preset(15, False, 2, 2, 3, 3))
+ preset_layout.addWidget(colored_btn)
+
+ uniform_btn = QPushButton("Uniform")
+ uniform_btn.setStyleSheet("""
+ QPushButton {
+ background-color: #3a7ca5;
+ color: white;
+ border: none;
+ padding: 6px 12px;
+ border-radius: 4px;
+ font-weight: bold;
+ }
+ QPushButton:hover {
+ background-color: #4a8cb5;
+ }
+ QPushButton:pressed {
+ background-color: #2a6c95;
+ }
+ """)
+ uniform_btn.clicked.connect(lambda: self._set_mask_preset(0, True, 2, 2, 2, 0))
+ preset_layout.addWidget(uniform_btn)
+
+ preset_layout.addStretch()
+
+ # Help text - UPDATED
+ help_text = QLabel(
+ "💡 B&W Manga: Optimized for black & white panels with clean bubbles\n"
+ "💡 Colored: For colored manga with complex backgrounds\n"
+ "💡 Aggressive: For difficult text removal cases\n"
+ "💡 Uniform: Good for Manga-OCR\n"
+ "ℹ️ Empty bubbles often need more iterations than text bubbles\n"
+ "ℹ️ Set Free Text to 0 for crisp B&W panels without bleeding"
+ )
+ help_text_font = QFont('Arial', 9)
+ help_text.setFont(help_text_font)
+ help_text.setStyleSheet("color: gray;")
+ help_text.setWordWrap(True)
+ mask_dilation_layout.addWidget(help_text)
+
+ main_layout.addStretch()
+
+ def _toggle_iteration_controls(self):
+ """Enable/disable iteration controls based on Auto and 'Use Same For All' toggles"""
+ # Get auto checkbox state
+ auto_on = False
+ if hasattr(self, 'auto_iterations_checkbox'):
+ auto_on = self.auto_iterations_checkbox.isChecked()
+
+ # Get use_all checkbox state
+ use_all = False
+ if hasattr(self, 'use_all_iterations_checkbox'):
+ use_all = self.use_all_iterations_checkbox.isChecked()
+
+ # Also update the auto_iterations_enabled attribute
+ self.auto_iterations_enabled = auto_on
+
+ if auto_on:
+ # Disable ALL mask dilation and iteration controls when auto is on
+ # Mask dilation controls
+ try:
+ if hasattr(self, 'mask_dilation_spinbox'):
+ self.mask_dilation_spinbox.setEnabled(False)
+ except Exception:
+ pass
+ try:
+ if hasattr(self, 'dilation_label'):
+ self.dilation_label.setEnabled(False)
+ except Exception:
+ pass
+ try:
+ if hasattr(self, 'dilation_unit_label'):
+ self.dilation_unit_label.setEnabled(False)
+ except Exception:
+ pass
+ # Iteration controls
+ try:
+ self.all_iterations_spinbox.setEnabled(False)
+ except Exception:
+ pass
+ try:
+ if hasattr(self, 'all_iter_label'):
+ self.all_iter_label.setEnabled(False)
+ except Exception:
+ pass
+ try:
+ if hasattr(self, 'use_all_iterations_checkbox'):
+ self.use_all_iterations_checkbox.setEnabled(False)
+ except Exception:
+ pass
+ try:
+ if hasattr(self, 'individual_controls_header_label'):
+ self.individual_controls_header_label.setEnabled(False)
+ except Exception:
+ pass
+ # Disable individual controls and their description labels
+ for control_tuple in getattr(self, 'individual_iteration_controls', []):
+ try:
+ if len(control_tuple) == 3:
+ label, spinbox, desc_label = control_tuple
+ spinbox.setEnabled(False)
+ label.setEnabled(False)
+ desc_label.setEnabled(False)
+ elif len(control_tuple) == 2:
+ label, spinbox = control_tuple
+ spinbox.setEnabled(False)
+ label.setEnabled(False)
+ except Exception:
+ pass
+ return
+
+ # Auto off -> enable mask dilation (always) and "Use Same For All" (respect its state)
+ # Mask dilation is always enabled when auto is off
+ try:
+ if hasattr(self, 'mask_dilation_spinbox'):
+ self.mask_dilation_spinbox.setEnabled(True)
+ except Exception:
+ pass
+ try:
+ if hasattr(self, 'dilation_label'):
+ self.dilation_label.setEnabled(True)
+ except Exception:
+ pass
+ try:
+ if hasattr(self, 'dilation_unit_label'):
+ self.dilation_unit_label.setEnabled(True)
+ except Exception:
+ pass
+
+ try:
+ if hasattr(self, 'use_all_iterations_checkbox'):
+ self.use_all_iterations_checkbox.setEnabled(True)
+ except Exception:
+ pass
+
+ try:
+ self.all_iterations_spinbox.setEnabled(use_all)
+ except Exception:
+ pass
+ try:
+ if hasattr(self, 'all_iter_label'):
+ self.all_iter_label.setEnabled(use_all)
+ except Exception:
+ pass
+ try:
+ if hasattr(self, 'individual_controls_header_label'):
+ self.individual_controls_header_label.setEnabled(not use_all)
+ except Exception:
+ pass
+
+ # Individual controls respect the "Use Same For All" state
+ for control_tuple in getattr(self, 'individual_iteration_controls', []):
+ enabled = not use_all
+ try:
+ if len(control_tuple) == 3:
+ label, spinbox, desc_label = control_tuple
+ spinbox.setEnabled(enabled)
+ label.setEnabled(enabled)
+ desc_label.setEnabled(enabled)
+ elif len(control_tuple) == 2:
+ label, spinbox = control_tuple
+ spinbox.setEnabled(enabled)
+ label.setEnabled(enabled)
+ except Exception:
+ pass
+
+ def _on_primary_auto_toggle(self, checked):
+ """When primary Auto toggle changes, disable/enable 'Use Same For All' checkbox"""
+ if hasattr(self, 'use_all_iterations_checkbox'):
+ self.use_all_iterations_checkbox.setEnabled(not checked)
+
+ def _set_mask_preset(self, dilation, use_all, all_iter, text_bubble_iter, empty_bubble_iter, free_text_iter):
+ """Set mask dilation preset values with comprehensive iteration controls"""
+ self.mask_dilation_spinbox.setValue(dilation)
+ self.use_all_iterations_checkbox.setChecked(use_all)
+ self.all_iterations_spinbox.setValue(all_iter)
+ self.text_bubble_iter_spinbox.setValue(text_bubble_iter)
+ self.empty_bubble_iter_spinbox.setValue(empty_bubble_iter)
+ self.free_text_iter_spinbox.setValue(free_text_iter)
+ self._toggle_iteration_controls()
+
+ def _create_cloud_api_tab(self):
+ """Create cloud API settings tab"""
+ # Create tab widget and add to tab widget
+ tab_widget = QWidget()
+ self.tab_widget.addTab(tab_widget, "Cloud API")
+
+ # Create scroll area for content
+ scroll_area = QScrollArea()
+ scroll_area.setWidgetResizable(True)
+ scroll_area.setFrameShape(QFrame.Shape.NoFrame)
+
+ content_widget = QWidget()
+ content_layout = QVBoxLayout(content_widget)
+ content_layout.setSpacing(10)
+ content_layout.setContentsMargins(20, 20, 20, 20)
+
+ scroll_area.setWidget(content_widget)
+
+ # Add scroll area to parent layout
+ parent_layout = QVBoxLayout(tab_widget)
+ parent_layout.setContentsMargins(0, 0, 0, 0)
+ parent_layout.addWidget(scroll_area)
+
+ # API Model Selection
+ model_group = QGroupBox("Inpainting Model")
+ model_layout = QVBoxLayout(model_group)
+ content_layout.addWidget(model_group)
+
+ model_desc = QLabel("Select the Replicate model to use for inpainting:")
+ model_layout.addWidget(model_desc)
+ model_layout.addSpacing(10)
+
+ # Model options - use button group for radio buttons
+ self.cloud_model_button_group = QButtonGroup()
+ self.cloud_model_selected = self.settings.get('cloud_inpaint_model', 'ideogram-v2')
+
+ models = [
+ ('ideogram-v2', 'Ideogram V2 (Best quality, with prompts)', 'ideogram-ai/ideogram-v2'),
+ ('sd-inpainting', 'Stable Diffusion Inpainting (Classic, fast)', 'stability-ai/stable-diffusion-inpainting'),
+ ('flux-inpainting', 'FLUX Dev Inpainting (High quality)', 'zsxkib/flux-dev-inpainting'),
+ ('custom', 'Custom Model (Enter model identifier)', '')
+ ]
+
+ for value, text, model_id in models:
+ row_widget = QWidget()
+ row_layout = QHBoxLayout(row_widget)
+ row_layout.setContentsMargins(0, 0, 0, 0)
+ model_layout.addWidget(row_widget)
+
+ rb = QRadioButton(text)
+ rb.setChecked(value == self.cloud_model_selected)
+ rb.toggled.connect(lambda checked, v=value: self._on_cloud_model_change(v) if checked else None)
+ self.cloud_model_button_group.addButton(rb)
+ row_layout.addWidget(rb)
+
+ if model_id:
+ model_id_label = QLabel(f"({model_id})")
+ model_id_font = QFont('Arial', 8)
+ model_id_label.setFont(model_id_font)
+ model_id_label.setStyleSheet("color: gray;")
+ row_layout.addWidget(model_id_label)
+
+ row_layout.addStretch()
+
+ # Custom version ID (now model identifier)
+ self.custom_version_widget = QWidget()
+ custom_version_layout = QVBoxLayout(self.custom_version_widget)
+ custom_version_layout.setContentsMargins(0, 10, 0, 0)
+ model_layout.addWidget(self.custom_version_widget)
+
+ custom_id_row = QWidget()
+ custom_id_layout = QHBoxLayout(custom_id_row)
+ custom_id_layout.setContentsMargins(0, 0, 0, 0)
+ custom_version_layout.addWidget(custom_id_row)
+
+ custom_id_label = QLabel("Model ID:")
+ custom_id_label.setMinimumWidth(120)
+ custom_id_layout.addWidget(custom_id_label)
+
+ self.custom_version_entry = QLineEdit()
+ self.custom_version_entry.setText(self.settings.get('cloud_custom_version', ''))
+ custom_id_layout.addWidget(self.custom_version_entry)
+
+ # Add helper text for custom model
+ helper_text = QLabel("Format: owner/model-name (e.g. stability-ai/stable-diffusion-inpainting)")
+ helper_font = QFont('Arial', 8)
+ helper_text.setFont(helper_font)
+ helper_text.setStyleSheet("color: gray;")
+ helper_text.setContentsMargins(120, 0, 0, 0)
+ custom_version_layout.addWidget(helper_text)
+
+ # Initially hide custom version entry
+ if self.cloud_model_selected != 'custom':
+ self.custom_version_widget.setVisible(False)
+
+ # Performance Settings
+ perf_group = QGroupBox("Performance Settings")
+ perf_layout = QVBoxLayout(perf_group)
+ content_layout.addWidget(perf_group)
+
+ # Timeout
+ timeout_widget = QWidget()
+ timeout_layout = QHBoxLayout(timeout_widget)
+ timeout_layout.setContentsMargins(0, 0, 0, 0)
+ perf_layout.addWidget(timeout_widget)
+
+ timeout_label = QLabel("API Timeout:")
+ timeout_label.setMinimumWidth(120)
+ timeout_layout.addWidget(timeout_label)
+
+ self.cloud_timeout_spinbox = QSpinBox()
+ self.cloud_timeout_spinbox.setRange(30, 300)
+ self.cloud_timeout_spinbox.setValue(self.settings.get('cloud_timeout', 60))
+ timeout_layout.addWidget(self.cloud_timeout_spinbox)
+
+ timeout_unit = QLabel("seconds")
+ timeout_unit_font = QFont('Arial', 9)
+ timeout_unit.setFont(timeout_unit_font)
+ timeout_layout.addWidget(timeout_unit)
+ timeout_layout.addStretch()
+
+ # Help text
+ help_text = QLabel(
+ "💡 Tips:\n"
+ "• Ideogram V2 is currently the best quality option\n"
+ "• SD inpainting is fast and supports prompts\n"
+ "• FLUX inpainting offers high quality results\n"
+ "• Find more models at replicate.com/collections/inpainting"
+ )
+ help_font = QFont('Arial', 9)
+ help_text.setFont(help_font)
+ help_text.setStyleSheet("color: gray;")
+ help_text.setWordWrap(True)
+ content_layout.addWidget(help_text)
+
+ # Prompt Settings (for all models except custom)
+ self.prompt_group = QGroupBox("Prompt Settings")
+ prompt_layout = QVBoxLayout(self.prompt_group)
+ content_layout.addWidget(self.prompt_group)
+
+ # Positive prompt
+ prompt_label = QLabel("Inpainting Prompt:")
+ prompt_layout.addWidget(prompt_label)
+
+ self.cloud_prompt_entry = QLineEdit()
+ self.cloud_prompt_entry.setText(self.settings.get('cloud_inpaint_prompt', 'clean background, smooth surface'))
+ prompt_layout.addWidget(self.cloud_prompt_entry)
+
+ # Add note about prompts
+ prompt_tip = QLabel("Tip: Describe what you want in the inpainted area (e.g., 'white wall', 'wooden floor')")
+ prompt_tip_font = QFont('Arial', 8)
+ prompt_tip.setFont(prompt_tip_font)
+ prompt_tip.setStyleSheet("color: gray;")
+ prompt_tip.setWordWrap(True)
+ prompt_tip.setContentsMargins(0, 2, 0, 10)
+ prompt_layout.addWidget(prompt_tip)
+
+ # Negative prompt (mainly for SD)
+ self.negative_prompt_label = QLabel("Negative Prompt (SD only):")
+ prompt_layout.addWidget(self.negative_prompt_label)
+
+ self.negative_entry = QLineEdit()
+ self.negative_entry.setText(self.settings.get('cloud_negative_prompt', 'text, writing, letters'))
+ prompt_layout.addWidget(self.negative_entry)
+
+ # Inference steps (for SD)
+ self.steps_widget = QWidget()
+ steps_layout = QHBoxLayout(self.steps_widget)
+ steps_layout.setContentsMargins(0, 10, 0, 5)
+ prompt_layout.addWidget(self.steps_widget)
+
+ self.steps_label = QLabel("Inference Steps (SD only):")
+ self.steps_label.setMinimumWidth(180)
+ steps_layout.addWidget(self.steps_label)
+
+ self.steps_spinbox = QSpinBox()
+ self.steps_spinbox.setRange(10, 50)
+ self.steps_spinbox.setValue(self.settings.get('cloud_inference_steps', 20))
+ steps_layout.addWidget(self.steps_spinbox)
+
+ steps_desc = QLabel("(Higher = better quality, slower)")
+ steps_desc_font = QFont('Arial', 9)
+ steps_desc.setFont(steps_desc_font)
+ steps_desc.setStyleSheet("color: gray;")
+ steps_layout.addWidget(steps_desc)
+ steps_layout.addStretch()
+
+ # Add stretch at end
+ content_layout.addStretch()
+
+ # Initially hide prompt frame if not using appropriate model
+ if self.cloud_model_selected == 'custom':
+ self.prompt_group.setVisible(False)
+
+ # Show/hide SD-specific options based on model
+ self._on_cloud_model_change(self.cloud_model_selected)
+
+ def _on_cloud_model_change(self, model):
+ """Handle cloud model selection change"""
+ # Store the selected model
+ self.cloud_model_selected = model
+
+ # Show/hide custom version entry
+ if model == 'custom':
+ self.custom_version_widget.setVisible(True)
+ # DON'T HIDE THE PROMPT FRAME FOR CUSTOM MODELS
+ self.prompt_group.setVisible(True)
+ else:
+ self.custom_version_widget.setVisible(False)
+ self.prompt_group.setVisible(True)
+
+ # Show/hide SD-specific options
+ if model == 'sd-inpainting':
+ # Show negative prompt and steps
+ self.negative_prompt_label.setVisible(True)
+ self.negative_entry.setVisible(True)
+ self.steps_widget.setVisible(True)
+ else:
+ # Hide SD-specific options
+ self.negative_prompt_label.setVisible(False)
+ self.negative_entry.setVisible(False)
+ self.steps_widget.setVisible(False)
+
+ def _toggle_preprocessing(self):
+ """Enable/disable preprocessing controls based on main toggle"""
+ enabled = self.preprocess_enabled.isChecked()
+
+ # Process each control in preprocessing_controls list
+ for control in self.preprocessing_controls:
+ try:
+ if isinstance(control, QGroupBox):
+ # Enable/disable entire group box children
+ self._toggle_frame_children(control, enabled)
+ elif isinstance(control, (QSlider, QSpinBox, QCheckBox, QDoubleSpinBox, QComboBox, QLabel)):
+ # Just use setEnabled() - the global stylesheet handles the visual state
+ control.setEnabled(enabled)
+ except Exception as e:
+ pass
+
+ # Ensure tiling fields respect their own toggle regardless of preprocessing state
+ try:
+ if hasattr(self, '_toggle_tiling_controls'):
+ self._toggle_tiling_controls()
+ except Exception:
+ pass
+
+ def _toggle_frame_children(self, widget, enabled):
+ """Recursively enable/disable all children of a widget"""
+ # Handle all controls including labels - just use setEnabled()
+ for child in widget.findChildren(QWidget):
+ if isinstance(child, (QSlider, QSpinBox, QCheckBox, QDoubleSpinBox, QComboBox, QLineEdit, QLabel)):
+ try:
+ child.setEnabled(enabled)
+ except Exception:
+ pass
+
+ def _toggle_roi_locality_controls(self):
+ """Show/hide ROI locality controls based on toggle."""
+ try:
+ enabled = self.roi_locality_checkbox.isChecked()
+ except Exception:
+ enabled = False
+ # Rows to manage
+ rows = [
+ getattr(self, 'roi_pad_row', None),
+ getattr(self, 'roi_min_row', None),
+ getattr(self, 'roi_area_row', None),
+ getattr(self, 'roi_max_row', None)
+ ]
+ for row in rows:
+ try:
+ if row is None: continue
+ row.setVisible(enabled)
+ except Exception:
+ pass
+
+ def _toggle_tiling_controls(self):
+ """Enable/disable tiling size/overlap fields based on tiling toggle."""
+ try:
+ enabled = bool(self.inpaint_tiling_enabled.isChecked())
+ except Exception:
+ enabled = False
+
+ # Enable/disable tiling widgets and their labels
+ widgets_to_toggle = [
+ ('tile_size_spinbox', 'tile_size_label', 'tile_size_unit_label'),
+ ('tile_overlap_spinbox', 'tile_overlap_label', 'tile_overlap_unit_label')
+ ]
+
+ for widget_names in widgets_to_toggle:
+ for widget_name in widget_names:
+ try:
+ widget = getattr(self, widget_name, None)
+ if widget is not None:
+ # Just use setEnabled() for everything - stylesheet handles visuals
+ widget.setEnabled(enabled)
+ except Exception:
+ pass
+
+ def _on_hd_strategy_change(self):
+ """Show/hide HD strategy controls based on selected strategy."""
+ try:
+ strategy = self.hd_strategy_combo.currentText()
+ except Exception:
+ strategy = 'original'
+
+ # Show/hide resize limit based on strategy
+ if hasattr(self, 'hd_resize_frame'):
+ self.hd_resize_frame.setVisible(strategy == 'resize')
+
+ # Show/hide crop params based on strategy
+ if hasattr(self, 'hd_crop_margin_frame'):
+ self.hd_crop_margin_frame.setVisible(strategy == 'crop')
+ if hasattr(self, 'hd_crop_trigger_frame'):
+ self.hd_crop_trigger_frame.setVisible(strategy == 'crop')
+
+ def _toggle_compression_enabled(self):
+ """Enable/disable compression controls based on compression toggle."""
+ try:
+ enabled = bool(self.compression_enabled.isChecked())
+ except Exception:
+ enabled = False
+
+ # Enable/disable all compression format controls
+ compression_widgets = [
+ getattr(self, 'format_label', None),
+ getattr(self, 'compression_format_combo', None),
+ getattr(self, 'jpeg_frame', None),
+ getattr(self, 'jpeg_label', None),
+ getattr(self, 'jpeg_quality_spin', None),
+ getattr(self, 'jpeg_help', None),
+ getattr(self, 'png_frame', None),
+ getattr(self, 'png_label', None),
+ getattr(self, 'png_level_spin', None),
+ getattr(self, 'png_help', None),
+ getattr(self, 'webp_frame', None),
+ getattr(self, 'webp_label', None),
+ getattr(self, 'webp_quality_spin', None),
+ getattr(self, 'webp_help', None),
+ ]
+
+ for widget in compression_widgets:
+ try:
+ if widget is not None:
+ widget.setEnabled(enabled)
+ except Exception:
+ pass
+
+ def _toggle_compression_format(self):
+ """Show only the controls relevant to the selected format (hide others)."""
+ fmt = self.compression_format_combo.currentText().lower() if hasattr(self, 'compression_format_combo') else 'jpeg'
+ try:
+ # Hide all rows first
+ for row in [getattr(self, 'jpeg_frame', None), getattr(self, 'png_frame', None), getattr(self, 'webp_frame', None)]:
+ try:
+ if row is not None:
+ row.setVisible(False)
+ except Exception:
+ pass
+ # Show the selected one
+ if fmt == 'jpeg':
+ if hasattr(self, 'jpeg_frame') and self.jpeg_frame is not None:
+ self.jpeg_frame.setVisible(True)
+ elif fmt == 'png':
+ if hasattr(self, 'png_frame') and self.png_frame is not None:
+ self.png_frame.setVisible(True)
+ else: # webp
+ if hasattr(self, 'webp_frame') and self.webp_frame is not None:
+ self.webp_frame.setVisible(True)
+ except Exception:
+ pass
+
+ def _toggle_ocr_batching_controls(self):
+ """Show/hide OCR batching rows based on enable toggle."""
+ try:
+ enabled = bool(self.ocr_batch_enabled_checkbox.isChecked())
+ except Exception:
+ enabled = False
+ try:
+ if hasattr(self, 'ocr_bs_row') and self.ocr_bs_row:
+ self.ocr_bs_row.setVisible(enabled)
+ except Exception:
+ pass
+ try:
+ if hasattr(self, 'ocr_cc_row') and self.ocr_cc_row:
+ self.ocr_cc_row.setVisible(enabled)
+ except Exception:
+ pass
+
+ def _create_ocr_tab(self):
+ """Create OCR settings tab with all options"""
+ # Create tab widget and add to tab widget
+ tab_widget = QWidget()
+ self.tab_widget.addTab(tab_widget, "OCR")
+
+ # Create scroll area for OCR settings
+ scroll_area = QScrollArea()
+ scroll_area.setWidgetResizable(True)
+ scroll_area.setFrameShape(QFrame.Shape.NoFrame)
+
+ content_widget = QWidget()
+ content_layout = QVBoxLayout(content_widget)
+ content_layout.setSpacing(10)
+ content_layout.setContentsMargins(20, 20, 20, 20)
+
+ scroll_area.setWidget(content_widget)
+
+ # Add scroll area to parent layout
+ parent_layout = QVBoxLayout(tab_widget)
+ parent_layout.setContentsMargins(0, 0, 0, 0)
+ parent_layout.addWidget(scroll_area)
+
+ # Language hints
+ lang_group = QGroupBox("Language Detection")
+ lang_layout = QVBoxLayout(lang_group)
+ content_layout.addWidget(lang_group)
+
+ lang_desc = QLabel("Select languages to prioritize during OCR:")
+ lang_desc_font = QFont('Arial', 10)
+ lang_desc.setFont(lang_desc_font)
+ lang_layout.addWidget(lang_desc)
+ lang_layout.addSpacing(10)
+
+ # Language checkboxes
+ self.lang_checkboxes = {}
+ languages = [
+ ('ja', 'Japanese'),
+ ('ko', 'Korean'),
+ ('zh', 'Chinese (Simplified)'),
+ ('zh-TW', 'Chinese (Traditional)'),
+ ('en', 'English')
+ ]
+
+ lang_grid_widget = QWidget()
+ lang_grid_layout = QGridLayout(lang_grid_widget)
+ lang_layout.addWidget(lang_grid_widget)
+
+ for i, (code, name) in enumerate(languages):
+ checkbox = self._create_styled_checkbox(name)
+ checkbox.setChecked(code in self.settings['ocr']['language_hints'])
+ self.lang_checkboxes[code] = checkbox
+ lang_grid_layout.addWidget(checkbox, i//2, i%2)
+
+ # OCR parameters
+ ocr_group = QGroupBox("OCR Parameters")
+ ocr_layout = QVBoxLayout(ocr_group)
+ content_layout.addWidget(ocr_group)
+
+ # Confidence threshold
+ conf_widget = QWidget()
+ conf_layout = QHBoxLayout(conf_widget)
+ conf_layout.setContentsMargins(0, 0, 0, 0)
+ ocr_layout.addWidget(conf_widget)
+
+ conf_label = QLabel("Confidence Threshold:")
+ conf_label.setMinimumWidth(180)
+ conf_layout.addWidget(conf_label)
+
+ self.confidence_threshold_slider = QSlider(Qt.Orientation.Horizontal)
+ self.confidence_threshold_slider.setRange(0, 100)
+ self.confidence_threshold_slider.setValue(int(self.settings['ocr']['confidence_threshold'] * 100))
+ self.confidence_threshold_slider.setMinimumWidth(250)
+ conf_layout.addWidget(self.confidence_threshold_slider)
+
+ self.confidence_threshold_label = QLabel(f"{self.settings['ocr']['confidence_threshold']:.2f}")
+ self.confidence_threshold_label.setMinimumWidth(50)
+ self.confidence_threshold_slider.valueChanged.connect(
+ lambda v: self.confidence_threshold_label.setText(f"{v/100:.2f}")
+ )
+ conf_layout.addWidget(self.confidence_threshold_label)
+ conf_layout.addStretch()
+
+ # Detection mode
+ mode_widget = QWidget()
+ mode_layout = QHBoxLayout(mode_widget)
+ mode_layout.setContentsMargins(0, 0, 0, 0)
+ ocr_layout.addWidget(mode_widget)
+
+ mode_label = QLabel("Detection Mode:")
+ mode_label.setMinimumWidth(180)
+ mode_layout.addWidget(mode_label)
+
+ self.detection_mode_combo = QComboBox()
+ self.detection_mode_combo.addItems(['document', 'text'])
+ self.detection_mode_combo.setCurrentText(self.settings['ocr']['text_detection_mode'])
+ mode_layout.addWidget(self.detection_mode_combo)
+
+ mode_desc = QLabel("(document = better for manga, text = simple layouts)")
+ mode_desc_font = QFont('Arial', 9)
+ mode_desc.setFont(mode_desc_font)
+ mode_desc.setStyleSheet("color: gray;")
+ mode_layout.addWidget(mode_desc)
+ mode_layout.addStretch()
+
+ # Text merging settings
+ merge_group = QGroupBox("Text Region Merging")
+ merge_layout = QVBoxLayout(merge_group)
+ content_layout.addWidget(merge_group)
+
+ # Merge nearby threshold
+ nearby_widget = QWidget()
+ nearby_layout = QHBoxLayout(nearby_widget)
+ nearby_layout.setContentsMargins(0, 0, 0, 0)
+ merge_layout.addWidget(nearby_widget)
+
+ nearby_label = QLabel("Merge Distance:")
+ nearby_label.setMinimumWidth(180)
+ nearby_layout.addWidget(nearby_label)
+
+ self.merge_nearby_threshold_spinbox = QSpinBox()
+ self.merge_nearby_threshold_spinbox.setRange(0, 200)
+ self.merge_nearby_threshold_spinbox.setSingleStep(10)
+ self.merge_nearby_threshold_spinbox.setValue(self.settings['ocr']['merge_nearby_threshold'])
+ nearby_layout.addWidget(self.merge_nearby_threshold_spinbox)
+
+ nearby_unit = QLabel("pixels")
+ nearby_layout.addWidget(nearby_unit)
+ nearby_layout.addStretch()
+
+ # Text Filtering Setting
+ filter_group = QGroupBox("Text Filtering")
+ filter_layout = QVBoxLayout(filter_group)
+ content_layout.addWidget(filter_group)
+
+ # Minimum text length
+ min_length_widget = QWidget()
+ min_length_layout = QHBoxLayout(min_length_widget)
+ min_length_layout.setContentsMargins(0, 0, 0, 0)
+ filter_layout.addWidget(min_length_widget)
+
+ min_length_label = QLabel("Min Text Length:")
+ min_length_label.setMinimumWidth(180)
+ min_length_layout.addWidget(min_length_label)
+
+ self.min_text_length_spinbox = QSpinBox()
+ self.min_text_length_spinbox.setRange(1, 10)
+ self.min_text_length_spinbox.setValue(self.settings['ocr'].get('min_text_length', 0))
+ min_length_layout.addWidget(self.min_text_length_spinbox)
+
+ min_length_unit = QLabel("characters")
+ min_length_layout.addWidget(min_length_unit)
+
+ min_length_desc = QLabel("(skip text shorter than this)")
+ min_length_desc_font = QFont('Arial', 9)
+ min_length_desc.setFont(min_length_desc_font)
+ min_length_desc.setStyleSheet("color: gray;")
+ min_length_layout.addWidget(min_length_desc)
+ min_length_layout.addStretch()
+
+ # Exclude English text checkbox
+ self.exclude_english_checkbox = self._create_styled_checkbox("Exclude primarily English text (tunable threshold)")
+ self.exclude_english_checkbox.setChecked(self.settings['ocr'].get('exclude_english_text', False))
+ filter_layout.addWidget(self.exclude_english_checkbox)
+
+ # Threshold slider
+ english_threshold_widget = QWidget()
+ english_threshold_layout = QHBoxLayout(english_threshold_widget)
+ english_threshold_layout.setContentsMargins(0, 0, 0, 0)
+ filter_layout.addWidget(english_threshold_widget)
+
+ threshold_label = QLabel("English Exclude Threshold:")
+ threshold_label.setMinimumWidth(240)
+ english_threshold_layout.addWidget(threshold_label)
+
+ self.english_exclude_threshold_slider = QSlider(Qt.Orientation.Horizontal)
+ self.english_exclude_threshold_slider.setRange(60, 99)
+ self.english_exclude_threshold_slider.setValue(int(self.settings['ocr'].get('english_exclude_threshold', 0.7) * 100))
+ self.english_exclude_threshold_slider.setMinimumWidth(250)
+ english_threshold_layout.addWidget(self.english_exclude_threshold_slider)
+
+ self.english_threshold_label = QLabel(f"{int(self.settings['ocr'].get('english_exclude_threshold', 0.7)*100)}%")
+ self.english_threshold_label.setMinimumWidth(50)
+ self.english_exclude_threshold_slider.valueChanged.connect(
+ lambda v: self.english_threshold_label.setText(f"{v}%")
+ )
+ english_threshold_layout.addWidget(self.english_threshold_label)
+ english_threshold_layout.addStretch()
+
+ # Minimum character count
+ min_chars_widget = QWidget()
+ min_chars_layout = QHBoxLayout(min_chars_widget)
+ min_chars_layout.setContentsMargins(0, 0, 0, 0)
+ filter_layout.addWidget(min_chars_widget)
+
+ min_chars_label = QLabel("Min chars to exclude as English:")
+ min_chars_label.setMinimumWidth(240)
+ min_chars_layout.addWidget(min_chars_label)
+
+ self.english_exclude_min_chars_spinbox = QSpinBox()
+ self.english_exclude_min_chars_spinbox.setRange(1, 10)
+ self.english_exclude_min_chars_spinbox.setValue(self.settings['ocr'].get('english_exclude_min_chars', 4))
+ min_chars_layout.addWidget(self.english_exclude_min_chars_spinbox)
+
+ min_chars_unit = QLabel("characters")
+ min_chars_layout.addWidget(min_chars_unit)
+ min_chars_layout.addStretch()
+
+ # Legacy aggressive short-token filter
+ self.english_exclude_short_tokens_checkbox = self._create_styled_checkbox("Aggressively drop very short ASCII tokens (legacy)")
+ self.english_exclude_short_tokens_checkbox.setChecked(self.settings['ocr'].get('english_exclude_short_tokens', False))
+ filter_layout.addWidget(self.english_exclude_short_tokens_checkbox)
+
+ # Help text
+ filter_help = QLabel(
+ "💡 Text filtering helps skip:\n"
+ " • UI elements and watermarks\n"
+ " • Page numbers and copyright text\n"
+ " • Single characters or symbols\n"
+ " • Non-target language text"
+ )
+ filter_help_font = QFont('Arial', 9)
+ filter_help.setFont(filter_help_font)
+ filter_help.setStyleSheet("color: gray;")
+ filter_help.setWordWrap(True)
+ filter_help.setContentsMargins(0, 10, 0, 0)
+ filter_layout.addWidget(filter_help)
+
+ # Azure-specific OCR settings
+ azure_ocr_group = QGroupBox("Azure OCR Settings")
+ azure_ocr_layout = QVBoxLayout(azure_ocr_group)
+ content_layout.addWidget(azure_ocr_group)
+
+ # Azure merge multiplier
+ merge_mult_widget = QWidget()
+ merge_mult_layout = QHBoxLayout(merge_mult_widget)
+ merge_mult_layout.setContentsMargins(0, 0, 0, 0)
+ azure_ocr_layout.addWidget(merge_mult_widget)
+
+ merge_mult_label = QLabel("Merge Multiplier:")
+ merge_mult_label.setMinimumWidth(180)
+ merge_mult_layout.addWidget(merge_mult_label)
+
+ self.azure_merge_multiplier_slider = QSlider(Qt.Orientation.Horizontal)
+ self.azure_merge_multiplier_slider.setRange(100, 500)
+ self.azure_merge_multiplier_slider.setValue(int(self.settings['ocr'].get('azure_merge_multiplier', 2.0) * 100))
+ self.azure_merge_multiplier_slider.setMinimumWidth(200)
+ merge_mult_layout.addWidget(self.azure_merge_multiplier_slider)
+
+ self.azure_label = QLabel(f"{self.settings['ocr'].get('azure_merge_multiplier', 2.0):.2f}x")
+ self.azure_label.setMinimumWidth(50)
+ self.azure_merge_multiplier_slider.valueChanged.connect(
+ lambda v: self.azure_label.setText(f"{v/100:.2f}x")
+ )
+ merge_mult_layout.addWidget(self.azure_label)
+
+ merge_mult_desc = QLabel("(multiplies merge distance for Azure lines)")
+ merge_mult_desc_font = QFont('Arial', 9)
+ merge_mult_desc.setFont(merge_mult_desc_font)
+ merge_mult_desc.setStyleSheet("color: gray;")
+ merge_mult_layout.addWidget(merge_mult_desc)
+ merge_mult_layout.addStretch()
+
+ # Reading order
+ reading_order_widget = QWidget()
+ reading_order_layout = QHBoxLayout(reading_order_widget)
+ reading_order_layout.setContentsMargins(0, 0, 0, 0)
+ azure_ocr_layout.addWidget(reading_order_widget)
+
+ reading_order_label = QLabel("Reading Order:")
+ reading_order_label.setMinimumWidth(180)
+ reading_order_layout.addWidget(reading_order_label)
+
+ self.azure_reading_order_combo = QComboBox()
+ self.azure_reading_order_combo.addItems(['basic', 'natural'])
+ self.azure_reading_order_combo.setCurrentText(self.settings['ocr'].get('azure_reading_order', 'natural'))
+ reading_order_layout.addWidget(self.azure_reading_order_combo)
+
+ reading_order_desc = QLabel("(natural = better for complex layouts)")
+ reading_order_desc_font = QFont('Arial', 9)
+ reading_order_desc.setFont(reading_order_desc_font)
+ reading_order_desc.setStyleSheet("color: gray;")
+ reading_order_layout.addWidget(reading_order_desc)
+ reading_order_layout.addStretch()
+
+ # Model version
+ model_version_widget = QWidget()
+ model_version_layout = QHBoxLayout(model_version_widget)
+ model_version_layout.setContentsMargins(0, 0, 0, 0)
+ azure_ocr_layout.addWidget(model_version_widget)
+
+ model_version_label = QLabel("Model Version:")
+ model_version_label.setMinimumWidth(180)
+ model_version_layout.addWidget(model_version_label)
+
+ self.azure_model_version_combo = QComboBox()
+ self.azure_model_version_combo.addItems(['latest', '2022-04-30', '2022-01-30', '2021-09-30'])
+ self.azure_model_version_combo.setCurrentText(self.settings['ocr'].get('azure_model_version', 'latest'))
+ self.azure_model_version_combo.setEditable(True)
+ model_version_layout.addWidget(self.azure_model_version_combo)
+
+ model_version_desc = QLabel("(use 'latest' for newest features)")
+ model_version_desc_font = QFont('Arial', 9)
+ model_version_desc.setFont(model_version_desc_font)
+ model_version_desc.setStyleSheet("color: gray;")
+ model_version_layout.addWidget(model_version_desc)
+ model_version_layout.addStretch()
+
+ # Timeout settings
+ timeout_widget = QWidget()
+ timeout_layout = QHBoxLayout(timeout_widget)
+ timeout_layout.setContentsMargins(0, 0, 0, 0)
+ azure_ocr_layout.addWidget(timeout_widget)
+
+ timeout_label = QLabel("Max Wait Time:")
+ timeout_label.setMinimumWidth(180)
+ timeout_layout.addWidget(timeout_label)
+
+ self.azure_max_wait_spinbox = QSpinBox()
+ self.azure_max_wait_spinbox.setRange(10, 120)
+ self.azure_max_wait_spinbox.setSingleStep(5)
+ self.azure_max_wait_spinbox.setValue(self.settings['ocr'].get('azure_max_wait', 60))
+ timeout_layout.addWidget(self.azure_max_wait_spinbox)
+
+ timeout_unit = QLabel("seconds")
+ timeout_layout.addWidget(timeout_unit)
+ timeout_layout.addStretch()
+
+ # Poll interval
+ poll_widget = QWidget()
+ poll_layout = QHBoxLayout(poll_widget)
+ poll_layout.setContentsMargins(0, 0, 0, 0)
+ azure_ocr_layout.addWidget(poll_widget)
+
+ poll_label = QLabel("Poll Interval:")
+ poll_label.setMinimumWidth(180)
+ poll_layout.addWidget(poll_label)
+
+ self.azure_poll_interval_slider = QSlider(Qt.Orientation.Horizontal)
+ self.azure_poll_interval_slider.setRange(0, 200)
+ self.azure_poll_interval_slider.setValue(int(self.settings['ocr'].get('azure_poll_interval', 0.5) * 100))
+ self.azure_poll_interval_slider.setMinimumWidth(200)
+ poll_layout.addWidget(self.azure_poll_interval_slider)
+
+ self.azure_poll_interval_label = QLabel(f"{self.settings['ocr'].get('azure_poll_interval', 0.5):.2f}")
+ self.azure_poll_interval_label.setMinimumWidth(50)
+ self.azure_poll_interval_slider.valueChanged.connect(
+ lambda v: self.azure_poll_interval_label.setText(f"{v/100:.2f}")
+ )
+ poll_layout.addWidget(self.azure_poll_interval_label)
+
+ poll_unit = QLabel("sec")
+ poll_layout.addWidget(poll_unit)
+ poll_layout.addStretch()
+
+ # Help text
+ azure_help = QLabel(
+ "💡 Azure Read API auto-detects language well\n"
+ "💡 Natural reading order works better for manga panels"
+ )
+ azure_help_font = QFont('Arial', 9)
+ azure_help.setFont(azure_help_font)
+ azure_help.setStyleSheet("color: gray;")
+ azure_help.setWordWrap(True)
+ azure_help.setContentsMargins(0, 10, 0, 0)
+ azure_ocr_layout.addWidget(azure_help)
+
+ # Rotation correction
+ self.enable_rotation_checkbox = self._create_styled_checkbox("Enable automatic rotation correction for tilted text")
+ self.enable_rotation_checkbox.setChecked(self.settings['ocr']['enable_rotation_correction'])
+ merge_layout.addWidget(self.enable_rotation_checkbox)
+
+ # OCR batching and locality settings
+ ocr_batch_group = QGroupBox("OCR Batching & Concurrency")
+ ocr_batch_layout = QVBoxLayout(ocr_batch_group)
+ content_layout.addWidget(ocr_batch_group)
+
+ # Enable OCR batching
+ self.ocr_batch_enabled_checkbox = self._create_styled_checkbox("Enable OCR batching (independent of translation batching)")
+ self.ocr_batch_enabled_checkbox.setChecked(self.settings['ocr'].get('ocr_batch_enabled', True))
+ self.ocr_batch_enabled_checkbox.stateChanged.connect(self._toggle_ocr_batching_controls)
+ ocr_batch_layout.addWidget(self.ocr_batch_enabled_checkbox)
+
+ # OCR batch size
+ ocr_bs_widget = QWidget()
+ ocr_bs_layout = QHBoxLayout(ocr_bs_widget)
+ ocr_bs_layout.setContentsMargins(0, 0, 0, 0)
+ ocr_batch_layout.addWidget(ocr_bs_widget)
+ self.ocr_bs_row = ocr_bs_widget
+
+ ocr_bs_label = QLabel("OCR Batch Size:")
+ ocr_bs_label.setMinimumWidth(180)
+ ocr_bs_layout.addWidget(ocr_bs_label)
+
+ self.ocr_batch_size_spinbox = QSpinBox()
+ self.ocr_batch_size_spinbox.setRange(1, 32)
+ self.ocr_batch_size_spinbox.setValue(int(self.settings['ocr'].get('ocr_batch_size', 8)))
+ ocr_bs_layout.addWidget(self.ocr_batch_size_spinbox)
+
+ ocr_bs_desc = QLabel("(Google: items/request; Azure: drives concurrency)")
+ ocr_bs_desc_font = QFont('Arial', 9)
+ ocr_bs_desc.setFont(ocr_bs_desc_font)
+ ocr_bs_desc.setStyleSheet("color: gray;")
+ ocr_bs_layout.addWidget(ocr_bs_desc)
+ ocr_bs_layout.addStretch()
+
+ # OCR Max Concurrency
+ ocr_cc_widget = QWidget()
+ ocr_cc_layout = QHBoxLayout(ocr_cc_widget)
+ ocr_cc_layout.setContentsMargins(0, 0, 0, 0)
+ ocr_batch_layout.addWidget(ocr_cc_widget)
+ self.ocr_cc_row = ocr_cc_widget
+
+ ocr_cc_label = QLabel("OCR Max Concurrency:")
+ ocr_cc_label.setMinimumWidth(180)
+ ocr_cc_layout.addWidget(ocr_cc_label)
+
+ self.ocr_max_conc_spinbox = QSpinBox()
+ self.ocr_max_conc_spinbox.setRange(1, 8)
+ self.ocr_max_conc_spinbox.setValue(int(self.settings['ocr'].get('ocr_max_concurrency', 2)))
+ ocr_cc_layout.addWidget(self.ocr_max_conc_spinbox)
+
+ ocr_cc_desc = QLabel("(Google: concurrent requests; Azure: workers, capped at 4)")
+ ocr_cc_desc_font = QFont('Arial', 9)
+ ocr_cc_desc.setFont(ocr_cc_desc_font)
+ ocr_cc_desc.setStyleSheet("color: gray;")
+ ocr_cc_layout.addWidget(ocr_cc_desc)
+ ocr_cc_layout.addStretch()
+
+ # Apply initial visibility for OCR batching controls
+ try:
+ self._toggle_ocr_batching_controls()
+ except Exception:
+ pass
+
+ # ROI sizing
+ roi_group = QGroupBox("ROI Locality Controls")
+ roi_layout = QVBoxLayout(roi_group)
+ content_layout.addWidget(roi_group)
+
+ # ROI locality toggle (now inside this section)
+ self.roi_locality_checkbox = self._create_styled_checkbox("Enable ROI-based OCR locality and batching (uses bubble detection)")
+ self.roi_locality_checkbox.setChecked(self.settings['ocr'].get('roi_locality_enabled', False))
+ self.roi_locality_checkbox.stateChanged.connect(self._toggle_roi_locality_controls)
+ roi_layout.addWidget(self.roi_locality_checkbox)
+
+ # ROI padding ratio
+ roi_pad_widget = QWidget()
+ roi_pad_layout = QHBoxLayout(roi_pad_widget)
+ roi_pad_layout.setContentsMargins(0, 0, 0, 0)
+ roi_layout.addWidget(roi_pad_widget)
+ self.roi_pad_row = roi_pad_widget
+
+ roi_pad_label = QLabel("ROI Padding Ratio:")
+ roi_pad_label.setMinimumWidth(180)
+ roi_pad_layout.addWidget(roi_pad_label)
+
+ self.roi_padding_ratio_slider = QSlider(Qt.Orientation.Horizontal)
+ self.roi_padding_ratio_slider.setRange(0, 30)
+ self.roi_padding_ratio_slider.setValue(int(float(self.settings['ocr'].get('roi_padding_ratio', 0.08)) * 100))
+ self.roi_padding_ratio_slider.setMinimumWidth(200)
+ roi_pad_layout.addWidget(self.roi_padding_ratio_slider)
+
+ self.roi_padding_ratio_label = QLabel(f"{float(self.settings['ocr'].get('roi_padding_ratio', 0.08)):.2f}")
+ self.roi_padding_ratio_label.setMinimumWidth(50)
+ self.roi_padding_ratio_slider.valueChanged.connect(
+ lambda v: self.roi_padding_ratio_label.setText(f"{v/100:.2f}")
+ )
+ roi_pad_layout.addWidget(self.roi_padding_ratio_label)
+ roi_pad_layout.addStretch()
+
+ # ROI min side / area
+ roi_min_widget = QWidget()
+ roi_min_layout = QHBoxLayout(roi_min_widget)
+ roi_min_layout.setContentsMargins(0, 0, 0, 0)
+ roi_layout.addWidget(roi_min_widget)
+ self.roi_min_row = roi_min_widget
+
+ roi_min_label = QLabel("Min ROI Side:")
+ roi_min_label.setMinimumWidth(180)
+ roi_min_layout.addWidget(roi_min_label)
+
+ self.roi_min_side_spinbox = QSpinBox()
+ self.roi_min_side_spinbox.setRange(1, 64)
+ self.roi_min_side_spinbox.setValue(int(self.settings['ocr'].get('roi_min_side_px', 12)))
+ roi_min_layout.addWidget(self.roi_min_side_spinbox)
+
+ roi_min_unit = QLabel("px")
+ roi_min_layout.addWidget(roi_min_unit)
+ roi_min_layout.addStretch()
+
+ roi_area_widget = QWidget()
+ roi_area_layout = QHBoxLayout(roi_area_widget)
+ roi_area_layout.setContentsMargins(0, 0, 0, 0)
+ roi_layout.addWidget(roi_area_widget)
+ self.roi_area_row = roi_area_widget
+
+ roi_area_label = QLabel("Min ROI Area:")
+ roi_area_label.setMinimumWidth(180)
+ roi_area_layout.addWidget(roi_area_label)
+
+ self.roi_min_area_spinbox = QSpinBox()
+ self.roi_min_area_spinbox.setRange(1, 5000)
+ self.roi_min_area_spinbox.setValue(int(self.settings['ocr'].get('roi_min_area_px', 100)))
+ roi_area_layout.addWidget(self.roi_min_area_spinbox)
+
+ roi_area_unit = QLabel("px^2")
+ roi_area_layout.addWidget(roi_area_unit)
+ roi_area_layout.addStretch()
+
+ # ROI max side (0 disables)
+ roi_max_widget = QWidget()
+ roi_max_layout = QHBoxLayout(roi_max_widget)
+ roi_max_layout.setContentsMargins(0, 0, 0, 0)
+ roi_layout.addWidget(roi_max_widget)
+ self.roi_max_row = roi_max_widget
+
+ roi_max_label = QLabel("ROI Max Side (0=off):")
+ roi_max_label.setMinimumWidth(180)
+ roi_max_layout.addWidget(roi_max_label)
+
+ self.roi_max_side_spinbox = QSpinBox()
+ self.roi_max_side_spinbox.setRange(0, 2048)
+ self.roi_max_side_spinbox.setValue(int(self.settings['ocr'].get('roi_max_side', 0)))
+ roi_max_layout.addWidget(self.roi_max_side_spinbox)
+ roi_max_layout.addStretch()
+
+ # Apply initial visibility based on toggle
+ self._toggle_roi_locality_controls()
+
+ # AI Bubble Detection Settings
+ bubble_group = QGroupBox("AI Bubble Detection")
+ bubble_layout = QVBoxLayout(bubble_group)
+ content_layout.addWidget(bubble_group)
+
+ # Enable bubble detection
+ self.bubble_detection_enabled_checkbox = self._create_styled_checkbox("Enable AI-powered bubble detection (overrides traditional merging)")
+ # IMPORTANT: Default to True for optimal text detection (especially for Chinese/Japanese text)
+ self.bubble_detection_enabled_checkbox.setChecked(self.settings['ocr'].get('bubble_detection_enabled', True))
+ self.bubble_detection_enabled_checkbox.stateChanged.connect(self._toggle_bubble_controls)
+ bubble_layout.addWidget(self.bubble_detection_enabled_checkbox)
+
+ # Use RT-DETR for text region detection (not just bubble detection)
+ self.use_rtdetr_for_ocr_checkbox = self._create_styled_checkbox("Use RT-DETR to guide OCR (Google/Azure only - others already do this)")
+ self.use_rtdetr_for_ocr_checkbox.setChecked(self.settings['ocr'].get('use_rtdetr_for_ocr_regions', True)) # Default: True
+ self.use_rtdetr_for_ocr_checkbox.setToolTip(
+ "When enabled, RT-DETR first detects all text regions (text bubbles + free text), \n"
+ "then your OCR provider reads each region separately.\n\n"
+ "🎯 Applies to: Google Cloud Vision, Azure Computer Vision\n"
+ "✓ Already enabled: Qwen2-VL, Custom API, EasyOCR, PaddleOCR, DocTR, manga-ocr\n\n"
+ "Benefits:\n"
+ "• More accurate text detection (trained specifically for manga/comics)\n"
+ "• Better separation of overlapping text\n"
+ "• Improved handling of different text types (bubbles vs. free text)\n"
+ "• Focused OCR on actual text regions (faster, more accurate)\n\n"
+ "Note: Requires bubble detection to be enabled and uses the selected detector above."
+ )
+ bubble_layout.addWidget(self.use_rtdetr_for_ocr_checkbox)
+
+ # Detector type dropdown
+ detector_type_widget = QWidget()
+ detector_type_layout = QHBoxLayout(detector_type_widget)
+ detector_type_layout.setContentsMargins(0, 10, 0, 0)
+ bubble_layout.addWidget(detector_type_widget)
+
+ detector_type_label = QLabel("Detector:")
+ detector_type_label.setMinimumWidth(120)
+ detector_type_layout.addWidget(detector_type_label)
+
+ # Model mapping
+ self.detector_models = {
+ 'RTEDR_onnx': 'ogkalu/comic-text-and-bubble-detector',
+ 'RT-DETR': 'ogkalu/comic-text-and-bubble-detector',
+ 'YOLOv8 Speech': 'ogkalu/comic-speech-bubble-detector-yolov8m',
+ 'YOLOv8 Text': 'ogkalu/comic-text-segmenter-yolov8m',
+ 'YOLOv8 Manga': 'ogkalu/manga-text-detector-yolov8s',
+ 'Custom Model': ''
+ }
+
+ # Get saved detector type (default to ONNX backend)
+ saved_type = self.settings['ocr'].get('detector_type', 'rtdetr_onnx')
+ if saved_type == 'rtdetr_onnx':
+ initial_selection = 'RTEDR_onnx'
+ elif saved_type == 'rtdetr':
+ initial_selection = 'RT-DETR'
+ elif saved_type == 'yolo':
+ initial_selection = 'YOLOv8 Speech'
+ elif saved_type == 'custom':
+ initial_selection = 'Custom Model'
+ else:
+ initial_selection = 'RTEDR_onnx'
+
+ self.detector_type_combo = QComboBox()
+ self.detector_type_combo.addItems(list(self.detector_models.keys()))
+ self.detector_type_combo.setCurrentText(initial_selection)
+ self.detector_type_combo.currentTextChanged.connect(self._on_detector_type_changed)
+ detector_type_layout.addWidget(self.detector_type_combo)
+ detector_type_layout.addStretch()
+
+ # NOW create the settings frame
+ self.yolo_settings_group = QGroupBox("Model Settings")
+ yolo_settings_layout = QVBoxLayout(self.yolo_settings_group)
+ bubble_layout.addWidget(self.yolo_settings_group)
+ self.rtdetr_settings_frame = self.yolo_settings_group # Alias for compatibility
+
+ # Model path/URL row
+ model_widget = QWidget()
+ model_layout = QHBoxLayout(model_widget)
+ model_layout.setContentsMargins(0, 5, 0, 0)
+ yolo_settings_layout.addWidget(model_widget)
+
+ model_label = QLabel("Model:")
+ model_label.setMinimumWidth(100)
+ model_layout.addWidget(model_label)
+
+ self.bubble_model_entry = QLineEdit()
+ self.bubble_model_entry.setText(self.settings['ocr'].get('bubble_model_path', ''))
+ self.bubble_model_entry.setReadOnly(True)
+ self.bubble_model_entry.setStyleSheet(
+ "QLineEdit { background-color: #1e1e1e; color: #ffffff; border: 1px solid #3a3a3a; }"
+ )
+ model_layout.addWidget(self.bubble_model_entry)
+ self.rtdetr_url_entry = self.bubble_model_entry # Alias
+
+ # Store for compatibility
+ self.detector_radio_widgets = [self.detector_type_combo]
+
+ # Browse and Clear buttons (initially hidden for HuggingFace models)
+ self.bubble_browse_btn = QPushButton("Browse")
+ self.bubble_browse_btn.clicked.connect(self._browse_bubble_model)
+ model_layout.addWidget(self.bubble_browse_btn)
+
+ self.bubble_clear_btn = QPushButton("Clear")
+ self.bubble_clear_btn.clicked.connect(self._clear_bubble_model)
+ model_layout.addWidget(self.bubble_clear_btn)
+ model_layout.addStretch()
+
+ # Download and Load buttons
+ button_widget = QWidget()
+ button_layout = QHBoxLayout(button_widget)
+ button_layout.setContentsMargins(0, 10, 0, 0)
+ yolo_settings_layout.addWidget(button_widget)
+
+ button_label = QLabel("Actions:")
+ button_label.setMinimumWidth(100)
+ button_layout.addWidget(button_label)
+
+ self.rtdetr_download_btn = QPushButton("Download")
+ self.rtdetr_download_btn.clicked.connect(self._download_rtdetr_model)
+ self.rtdetr_download_btn.setStyleSheet("""
+ QPushButton {
+ background-color: #5a9fd4;
+ color: white;
+ font-weight: bold;
+ border: none;
+ border-radius: 3px;
+ padding: 5px 15px;
+ }
+ QPushButton:hover {
+ background-color: #7bb3e0;
+ }
+ QPushButton:pressed {
+ background-color: #4a8fc4;
+ }
+ """)
+ button_layout.addWidget(self.rtdetr_download_btn)
+
+ self.rtdetr_load_btn = QPushButton("Load Model")
+ self.rtdetr_load_btn.clicked.connect(self._load_rtdetr_model)
+ self.rtdetr_load_btn.setStyleSheet("""
+ QPushButton {
+ background-color: #5a9fd4;
+ color: white;
+ font-weight: bold;
+ border: none;
+ border-radius: 3px;
+ padding: 5px 15px;
+ }
+ QPushButton:hover {
+ background-color: #7bb3e0;
+ }
+ QPushButton:pressed {
+ background-color: #4a8fc4;
+ }
+ """)
+ button_layout.addWidget(self.rtdetr_load_btn)
+
+ self.rtdetr_status_label = QLabel("")
+ rtdetr_status_font = QFont('Arial', 9)
+ self.rtdetr_status_label.setFont(rtdetr_status_font)
+ button_layout.addWidget(self.rtdetr_status_label)
+ button_layout.addStretch()
+
+ # RT-DETR Detection classes
+ rtdetr_classes_widget = QWidget()
+ rtdetr_classes_layout = QHBoxLayout(rtdetr_classes_widget)
+ rtdetr_classes_layout.setContentsMargins(0, 10, 0, 0)
+ yolo_settings_layout.addWidget(rtdetr_classes_widget)
+ self.rtdetr_classes_frame = rtdetr_classes_widget
+
+ classes_label = QLabel("Detect:")
+ classes_label.setMinimumWidth(100)
+ rtdetr_classes_layout.addWidget(classes_label)
+
+ self.detect_empty_bubbles_checkbox = self._create_styled_checkbox("Empty Bubbles")
+ self.detect_empty_bubbles_checkbox.setChecked(self.settings['ocr'].get('detect_empty_bubbles', True))
+ rtdetr_classes_layout.addWidget(self.detect_empty_bubbles_checkbox)
+
+ self.detect_text_bubbles_checkbox = self._create_styled_checkbox("Text Bubbles")
+ self.detect_text_bubbles_checkbox.setChecked(self.settings['ocr'].get('detect_text_bubbles', True))
+ rtdetr_classes_layout.addWidget(self.detect_text_bubbles_checkbox)
+
+ self.detect_free_text_checkbox = self._create_styled_checkbox("Free Text")
+ self.detect_free_text_checkbox.setChecked(self.settings['ocr'].get('detect_free_text', True))
+ rtdetr_classes_layout.addWidget(self.detect_free_text_checkbox)
+ rtdetr_classes_layout.addStretch()
+
+ # Confidence
+ conf_widget = QWidget()
+ conf_layout = QHBoxLayout(conf_widget)
+ conf_layout.setContentsMargins(0, 10, 0, 0)
+ yolo_settings_layout.addWidget(conf_widget)
+
+ conf_label = QLabel("Confidence:")
+ conf_label.setMinimumWidth(100)
+ conf_layout.addWidget(conf_label)
+
+ detector_label = self.detector_type_combo.currentText()
+ default_conf = 0.3 if ('RT-DETR' in detector_label or 'RTEDR_onnx' in detector_label or 'onnx' in detector_label.lower()) else 0.5
+
+ self.bubble_conf_slider = QSlider(Qt.Orientation.Horizontal)
+ self.bubble_conf_slider.setRange(0, 99)
+ self.bubble_conf_slider.setValue(int(self.settings['ocr'].get('bubble_confidence', default_conf) * 100))
+ self.bubble_conf_slider.setMinimumWidth(200)
+ conf_layout.addWidget(self.bubble_conf_slider)
+ self.rtdetr_conf_scale = self.bubble_conf_slider # Alias
+
+ self.bubble_conf_label = QLabel(f"{self.settings['ocr'].get('bubble_confidence', default_conf):.2f}")
+ self.bubble_conf_label.setMinimumWidth(50)
+ self.bubble_conf_slider.valueChanged.connect(
+ lambda v: self.bubble_conf_label.setText(f"{v/100:.2f}")
+ )
+ conf_layout.addWidget(self.bubble_conf_label)
+ self.rtdetr_conf_label = self.bubble_conf_label # Alias
+ conf_layout.addStretch()
+
+ # YOLO-specific: Max detections (only visible for YOLO)
+ self.yolo_maxdet_widget = QWidget()
+ yolo_maxdet_layout = QHBoxLayout(self.yolo_maxdet_widget)
+ yolo_maxdet_layout.setContentsMargins(0, 6, 0, 0)
+ yolo_settings_layout.addWidget(self.yolo_maxdet_widget)
+ self.yolo_maxdet_row = self.yolo_maxdet_widget # Alias
+ self.yolo_maxdet_widget.setVisible(False) # Hidden initially
+
+ maxdet_label = QLabel("Max detections:")
+ maxdet_label.setMinimumWidth(100)
+ yolo_maxdet_layout.addWidget(maxdet_label)
+
+ self.bubble_max_det_yolo_spinbox = QSpinBox()
+ self.bubble_max_det_yolo_spinbox.setRange(1, 2000)
+ self.bubble_max_det_yolo_spinbox.setValue(self.settings['ocr'].get('bubble_max_detections_yolo', 100))
+ yolo_maxdet_layout.addWidget(self.bubble_max_det_yolo_spinbox)
+ yolo_maxdet_layout.addStretch()
+
+ # Status label at the bottom of bubble group
+ self.bubble_status_label = QLabel("")
+ bubble_status_font = QFont('Arial', 9)
+ self.bubble_status_label.setFont(bubble_status_font)
+ bubble_status_label_container = QWidget()
+ bubble_status_label_layout = QVBoxLayout(bubble_status_label_container)
+ bubble_status_label_layout.setContentsMargins(0, 10, 0, 0)
+ bubble_status_label_layout.addWidget(self.bubble_status_label)
+ bubble_layout.addWidget(bubble_status_label_container)
+
+ # Store controls for enable/disable
+ self.bubble_controls = [
+ self.detector_type_combo,
+ self.bubble_model_entry,
+ self.bubble_browse_btn,
+ self.bubble_clear_btn,
+ self.bubble_conf_slider,
+ self.rtdetr_download_btn,
+ self.rtdetr_load_btn
+ ]
+
+ self.rtdetr_controls = [
+ self.bubble_model_entry,
+ self.rtdetr_load_btn,
+ self.rtdetr_download_btn,
+ self.bubble_conf_slider,
+ self.detect_empty_bubbles_checkbox,
+ self.detect_text_bubbles_checkbox,
+ self.detect_free_text_checkbox
+ ]
+
+ self.yolo_controls = [
+ self.bubble_model_entry,
+ self.bubble_browse_btn,
+ self.bubble_clear_btn,
+ self.bubble_conf_slider,
+ self.yolo_maxdet_widget
+ ]
+
+ # Add stretch to end of OCR tab content
+ content_layout.addStretch()
+
+ # Initialize control states
+ self._toggle_bubble_controls()
+
+ # Only call detector change after everything is initialized
+ if self.bubble_detection_enabled_checkbox.isChecked():
+ try:
+ self._on_detector_type_changed()
+ self._update_bubble_status()
+ except AttributeError:
+ # Frames not yet created, skip initialization
+ pass
+
+ # Check status after dialog ready
+ QTimer.singleShot(500, self._check_rtdetr_status)
+
+ def _on_detector_type_changed(self, detector=None):
+ """Handle detector type change"""
+ if not hasattr(self, 'bubble_detection_enabled_checkbox'):
+ return
+
+ if not self.bubble_detection_enabled_checkbox.isChecked():
+ self.yolo_settings_group.setVisible(False)
+ return
+
+ if detector is None:
+ detector = self.detector_type_combo.currentText()
+
+ # Handle different detector types
+ if detector == 'Custom Model':
+ # Custom model - enable manual entry
+ self.bubble_model_entry.setText(self.settings['ocr'].get('custom_model_path', ''))
+ self.bubble_model_entry.setReadOnly(False)
+ self.bubble_model_entry.setStyleSheet(
+ "QLineEdit { background-color: #2b2b2b; color: #ffffff; border: 1px solid #3a3a3a; }"
+ )
+ # Show browse/clear buttons for custom
+ self.bubble_browse_btn.setVisible(True)
+ self.bubble_clear_btn.setVisible(True)
+ # Hide download button
+ self.rtdetr_download_btn.setVisible(False)
+ elif detector in self.detector_models:
+ # HuggingFace model
+ url = self.detector_models[detector]
+ self.bubble_model_entry.setText(url)
+ # Make entry read-only for HuggingFace models
+ self.bubble_model_entry.setReadOnly(True)
+ self.bubble_model_entry.setStyleSheet(
+ "QLineEdit { background-color: #1e1e1e; color: #ffffff; border: 1px solid #3a3a3a; }"
+ )
+ # Hide browse/clear buttons for HuggingFace models
+ self.bubble_browse_btn.setVisible(False)
+ self.bubble_clear_btn.setVisible(False)
+ # Show download button
+ self.rtdetr_download_btn.setVisible(True)
+
+ # Show/hide RT-DETR specific controls
+ is_rtdetr = 'RT-DETR' in detector or 'RTEDR_onnx' in detector
+
+ if is_rtdetr:
+ self.rtdetr_classes_frame.setVisible(True)
+ # Hide YOLO-only max det row
+ self.yolo_maxdet_widget.setVisible(False)
+ else:
+ self.rtdetr_classes_frame.setVisible(False)
+ # Show YOLO-only max det row for YOLO models
+ if 'YOLO' in detector or 'Yolo' in detector or 'yolo' in detector or detector == 'Custom Model':
+ self.yolo_maxdet_widget.setVisible(True)
+ else:
+ self.yolo_maxdet_widget.setVisible(False)
+
+ # Show/hide RT-DETR concurrency control in Performance section (Advanced tab)
+ # Only update if the widget has been created (Advanced tab may not be loaded yet)
+ if hasattr(self, 'rtdetr_conc_frame'):
+ self.rtdetr_conc_frame.setVisible(is_rtdetr)
+
+ # Always show settings frame
+ self.yolo_settings_group.setVisible(True)
+
+ # Update status
+ self._update_bubble_status()
+
+ def _download_rtdetr_model(self):
+ """Download selected model"""
+ try:
+ detector = self.detector_type_combo.currentText()
+ model_url = self.bubble_model_entry.text()
+
+ self.rtdetr_status_label.setText("Downloading...")
+ self.rtdetr_status_label.setStyleSheet("color: orange;")
+ QApplication.processEvents()
+
+ if 'RTEDR_onnx' in detector:
+ from bubble_detector import BubbleDetector
+ bd = BubbleDetector()
+ if bd.load_rtdetr_onnx_model(model_id=model_url):
+ self.rtdetr_status_label.setText("✅ Downloaded")
+ self.rtdetr_status_label.setStyleSheet("color: green;")
+ QMessageBox.information(self, "Success", f"RTEDR_onnx model downloaded successfully!")
+ else:
+ self.rtdetr_status_label.setText("❌ Failed")
+ self.rtdetr_status_label.setStyleSheet("color: red;")
+ QMessageBox.critical(self, "Error", f"Failed to download RTEDR_onnx model")
+ elif 'RT-DETR' in detector:
+ # RT-DETR handling (works fine)
+ from bubble_detector import BubbleDetector
+ bd = BubbleDetector()
+
+ if bd.load_rtdetr_model(model_id=model_url):
+ self.rtdetr_status_label.setText("✅ Downloaded")
+ self.rtdetr_status_label.setStyleSheet("color: green;")
+ QMessageBox.information(self, "Success", f"RT-DETR model downloaded successfully!")
+ else:
+ self.rtdetr_status_label.setText("❌ Failed")
+ self.rtdetr_status_label.setStyleSheet("color: red;")
+ QMessageBox.critical(self, "Error", f"Failed to download RT-DETR model")
+ else:
+ # FIX FOR YOLO: Download to a simpler local path
+ from huggingface_hub import hf_hub_download
+ import os
+
+ # Create models directory
+ models_dir = "models"
+ os.makedirs(models_dir, exist_ok=True)
+
+ # Define simple local filenames
+ filename_map = {
+ 'ogkalu/comic-speech-bubble-detector-yolov8m': 'comic-speech-bubble-detector.pt',
+ 'ogkalu/comic-text-segmenter-yolov8m': 'comic-text-segmenter.pt',
+ 'ogkalu/manga-text-detector-yolov8s': 'manga-text-detector.pt'
+ }
+
+ filename = filename_map.get(model_url, 'model.pt')
+
+ # Download to cache first
+ cached_path = hf_hub_download(repo_id=model_url, filename=filename)
+
+ # Copy to local models directory with simple path
+ import shutil
+ local_path = os.path.join(models_dir, filename)
+ shutil.copy2(cached_path, local_path)
+
+ # Set the simple local path instead of the cache path
+ self.bubble_model_entry.setText(local_path)
+ self.rtdetr_status_label.setText("✅ Downloaded")
+ self.rtdetr_status_label.setStyleSheet("color: green;")
+ QMessageBox.information(self, "Success", f"Model downloaded to:\n{local_path}")
+
+ except ImportError:
+ self.rtdetr_status_label.setText("❌ Missing deps")
+ self.rtdetr_status_label.setStyleSheet("color: red;")
+ QMessageBox.critical(self, "Error", "Install: pip install huggingface-hub transformers")
+ except Exception as e:
+ self.rtdetr_status_label.setText("❌ Error")
+ self.rtdetr_status_label.setStyleSheet("color: red;")
+ QMessageBox.critical(self, "Error", f"Download failed: {e}")
+
+ def _check_rtdetr_status(self):
+ """Check if model is already loaded"""
+ try:
+ from bubble_detector import BubbleDetector
+
+ if hasattr(self.main_gui, 'manga_tab') and hasattr(self.main_gui.manga_tab, 'translator'):
+ translator = self.main_gui.manga_tab.translator
+ if hasattr(translator, 'bubble_detector') and translator.bubble_detector:
+ if getattr(translator.bubble_detector, 'rtdetr_onnx_loaded', False):
+ self.rtdetr_status_label.setText("✅ Loaded")
+ self.rtdetr_status_label.setStyleSheet("color: green;")
+ return True
+ if getattr(translator.bubble_detector, 'rtdetr_loaded', False):
+ self.rtdetr_status_label.setText("✅ Loaded")
+ self.rtdetr_status_label.setStyleSheet("color: green;")
+ return True
+ elif getattr(translator.bubble_detector, 'model_loaded', False):
+ self.rtdetr_status_label.setText("✅ Loaded")
+ self.rtdetr_status_label.setStyleSheet("color: green;")
+ return True
+
+ self.rtdetr_status_label.setText("Not loaded")
+ self.rtdetr_status_label.setStyleSheet("color: gray;")
+ return False
+
+ except ImportError:
+ self.rtdetr_status_label.setText("❌ Missing deps")
+ self.rtdetr_status_label.setStyleSheet("color: red;")
+ return False
+ except Exception:
+ self.rtdetr_status_label.setText("Not loaded")
+ self.rtdetr_status_label.setStyleSheet("color: gray;")
+ return False
+
+ def _load_rtdetr_model(self):
+ """Load selected model"""
+ try:
+ from bubble_detector import BubbleDetector
+ from PySide6.QtWidgets import QApplication
+
+ self.rtdetr_status_label.setText("Loading...")
+ self.rtdetr_status_label.setStyleSheet("color: orange;")
+ QApplication.processEvents()
+
+ bd = BubbleDetector()
+ detector = self.detector_type_combo.currentText()
+ model_path = self.bubble_model_entry.text()
+
+ if 'RTEDR_onnx' in detector:
+ # RT-DETR (ONNX) uses repo id directly
+ if bd.load_rtdetr_onnx_model(model_id=model_path):
+ self.rtdetr_status_label.setText("✅ Ready")
+ self.rtdetr_status_label.setStyleSheet("color: green;")
+ QMessageBox.information(self, "Success", f"RTEDR_onnx model loaded successfully!")
+ else:
+ self.rtdetr_status_label.setText("❌ Failed")
+ self.rtdetr_status_label.setStyleSheet("color: red;")
+ elif 'RT-DETR' in detector:
+ # RT-DETR uses model_id directly
+ if bd.load_rtdetr_model(model_id=model_path):
+ self.rtdetr_status_label.setText("✅ Ready")
+ self.rtdetr_status_label.setStyleSheet("color: green;")
+ QMessageBox.information(self, "Success", f"RT-DETR model loaded successfully!")
+ else:
+ self.rtdetr_status_label.setText("❌ Failed")
+ self.rtdetr_status_label.setStyleSheet("color: red;")
+ else:
+ # YOLOv8 - CHECK LOCAL MODELS FOLDER FIRST
+ if model_path.startswith('ogkalu/'):
+ # It's a HuggingFace ID - check if already downloaded
+ filename_map = {
+ 'ogkalu/comic-speech-bubble-detector-yolov8m': 'comic-speech-bubble-detector.pt',
+ 'ogkalu/comic-text-segmenter-yolov8m': 'comic-text-segmenter.pt',
+ 'ogkalu/manga-text-detector-yolov8s': 'manga-text-detector.pt'
+ }
+
+ filename = filename_map.get(model_path, 'model.pt')
+ local_path = os.path.join('models', filename)
+
+ # Check if it exists locally
+ if os.path.exists(local_path):
+ # Use the local file
+ model_path = local_path
+ self.bubble_model_entry.setText(local_path) # Update the field
+ else:
+ # Not downloaded yet
+ QMessageBox.warning(self, "Download Required",
+ f"Model not found locally.\nPlease download it first using the Download button.")
+ self.rtdetr_status_label.setText("❌ Not downloaded")
+ self.rtdetr_status_label.setStyleSheet("color: orange;")
+ return
+
+ # Now model_path should be a local file
+ if not os.path.exists(model_path):
+ QMessageBox.critical(self, "Error", f"Model file not found: {model_path}")
+ self.rtdetr_status_label.setText("❌ File not found")
+ self.rtdetr_status_label.setStyleSheet("color: red;")
+ return
+
+ # Load the YOLOv8 model from local file
+ if bd.load_model(model_path):
+ self.rtdetr_status_label.setText("✅ Ready")
+ self.rtdetr_status_label.setStyleSheet("color: green;")
+ QMessageBox.information(self, "Success", f"YOLOv8 model loaded successfully!")
+
+ # Auto-convert to ONNX if enabled
+ if os.environ.get('AUTO_CONVERT_TO_ONNX', 'true').lower() == 'true':
+ onnx_path = model_path.replace('.pt', '.onnx')
+ if not os.path.exists(onnx_path):
+ if bd.convert_to_onnx(model_path, onnx_path):
+ logger.info(f"✅ Converted to ONNX: {onnx_path}")
+ else:
+ self.rtdetr_status_label.setText("❌ Failed")
+ self.rtdetr_status_label.setStyleSheet("color: red;")
+
+ except ImportError:
+ self.rtdetr_status_label.setText("❌ Missing deps")
+ self.rtdetr_status_label.setStyleSheet("color: red;")
+ QMessageBox.critical(self, "Error", "Install transformers: pip install transformers")
+ except Exception as e:
+ self.rtdetr_status_label.setText("❌ Error")
+ self.rtdetr_status_label.setStyleSheet("color: red;")
+ QMessageBox.critical(self, "Error", f"Failed to load: {e}")
+
+ def _toggle_bubble_controls(self):
+ """Enable/disable bubble detection controls"""
+ enabled = self.bubble_detection_enabled_checkbox.isChecked()
+
+ if enabled:
+ # Enable controls
+ for widget in self.bubble_controls:
+ try:
+ widget.setEnabled(True)
+ except:
+ pass
+
+ # Show/hide frames based on detector type
+ self._on_detector_type_changed()
+ else:
+ # Disable controls
+ for widget in self.bubble_controls:
+ try:
+ widget.setEnabled(False)
+ except:
+ pass
+
+ # Hide frames
+ self.yolo_settings_group.setVisible(False)
+ self.bubble_status_label.setText("")
+
+ def _browse_bubble_model(self):
+ """Browse for model file"""
+ path, _ = QFileDialog.getOpenFileName(
+ self,
+ "Select Model File",
+ "",
+ "Model files (*.pt *.pth *.bin *.safetensors);;All files (*.*)"
+ )
+
+ if path:
+ self.bubble_model_entry.setText(path)
+ self._update_bubble_status()
+
+ def _clear_bubble_model(self):
+ """Clear selected model"""
+ self.bubble_model_entry.setText("")
+ self._update_bubble_status()
+
+ def _update_bubble_status(self):
+ """Update bubble model status label"""
+ if not self.bubble_detection_enabled_checkbox.isChecked():
+ self.bubble_status_label.setText("")
+ return
+
+ detector = self.detector_type_combo.currentText()
+ model_path = self.bubble_model_entry.text()
+
+ if not model_path:
+ self.bubble_status_label.setText("⚠️ No model selected")
+ self.bubble_status_label.setStyleSheet("color: orange;")
+ return
+
+ if model_path.startswith("ogkalu/"):
+ self.bubble_status_label.setText(f"📥 {detector} ready to download")
+ self.bubble_status_label.setStyleSheet("color: blue;")
+ elif os.path.exists(model_path):
+ self.bubble_status_label.setText("✅ Model file ready")
+ self.bubble_status_label.setStyleSheet("color: green;")
+ else:
+ self.bubble_status_label.setText("❌ Model file not found")
+ self.bubble_status_label.setStyleSheet("color: red;")
+
+ def _update_azure_label(self):
+ """Update Azure multiplier label"""
+ # This method is deprecated - Azure multiplier UI was removed
+ pass
+
+ def _set_azure_multiplier(self, value):
+ """Set Azure multiplier from preset"""
+ # This method is deprecated - Azure multiplier UI was removed
+ pass
+
+ def _create_advanced_tab(self):
+ """Create advanced settings tab with all options"""
+ # Create tab widget and add to tab widget
+ tab_widget = QWidget()
+ self.tab_widget.addTab(tab_widget, "Advanced")
+
+ # Main scrollable content
+ main_layout = QVBoxLayout(tab_widget)
+ main_layout.setContentsMargins(5, 5, 5, 5)
+ main_layout.setSpacing(6)
+
+ # Format detection
+ detect_group = QGroupBox("Format Detection")
+ main_layout.addWidget(detect_group)
+ detect_layout = QVBoxLayout(detect_group)
+ detect_layout.setContentsMargins(8, 8, 8, 6)
+ detect_layout.setSpacing(4)
+
+ self.format_detection_checkbox = self._create_styled_checkbox("Enable automatic manga format detection (reading direction)")
+ self.format_detection_checkbox.setChecked(self.settings['advanced']['format_detection'])
+ detect_layout.addWidget(self.format_detection_checkbox)
+
+ # Webtoon mode
+ webtoon_frame = QWidget()
+ webtoon_layout = QHBoxLayout(webtoon_frame)
+ webtoon_layout.setContentsMargins(0, 0, 0, 0)
+ detect_layout.addWidget(webtoon_frame)
+
+ webtoon_label = QLabel("Webtoon Mode:")
+ webtoon_label.setMinimumWidth(150)
+ webtoon_layout.addWidget(webtoon_label)
+
+ self.webtoon_mode_combo = QComboBox()
+ self.webtoon_mode_combo.addItems(['auto', 'enabled', 'disabled'])
+ self.webtoon_mode_combo.setCurrentText(self.settings['advanced']['webtoon_mode'])
+ webtoon_layout.addWidget(self.webtoon_mode_combo)
+ webtoon_layout.addStretch()
+
+ # Debug settings
+ debug_group = QGroupBox("Debug Options")
+ main_layout.addWidget(debug_group)
+ debug_layout = QVBoxLayout(debug_group)
+ debug_layout.setContentsMargins(8, 8, 8, 6)
+ debug_layout.setSpacing(4)
+
+ self.debug_mode_checkbox = self._create_styled_checkbox("Enable debug mode (verbose logging)")
+ self.debug_mode_checkbox.setChecked(self.settings['advanced']['debug_mode'])
+ debug_layout.addWidget(self.debug_mode_checkbox)
+
+ # New: Concise pipeline logs (reduce noise)
+ self.concise_logs_checkbox = self._create_styled_checkbox("Concise pipeline logs (reduce noise)")
+ self.concise_logs_checkbox.setChecked(bool(self.settings.get('advanced', {}).get('concise_logs', True)))
+ def _save_concise():
+ try:
+ if 'advanced' not in self.settings:
+ self.settings['advanced'] = {}
+ self.settings['advanced']['concise_logs'] = bool(self.concise_logs_checkbox.isChecked())
+ if hasattr(self, 'config'):
+ self.config['manga_settings'] = self.settings
+ if hasattr(self.main_gui, 'save_config'):
+ self.main_gui.save_config(show_message=False)
+ except Exception:
+ pass
+ self.concise_logs_checkbox.toggled.connect(_save_concise)
+ debug_layout.addWidget(self.concise_logs_checkbox)
+
+ self.save_intermediate_checkbox = self._create_styled_checkbox("Save intermediate images (preprocessed, detection overlays)")
+ self.save_intermediate_checkbox.setChecked(self.settings['advanced']['save_intermediate'])
+ debug_layout.addWidget(self.save_intermediate_checkbox)
+
+ # Performance settings
+ perf_group = QGroupBox("Performance")
+ main_layout.addWidget(perf_group)
+ perf_layout = QVBoxLayout(perf_group)
+ perf_layout.setContentsMargins(8, 8, 8, 6)
+ perf_layout.setSpacing(4)
+
+ # New: Parallel rendering (per-region overlays)
+ self.render_parallel_checkbox = self._create_styled_checkbox("Enable parallel rendering (per-region overlays)")
+ self.render_parallel_checkbox.setChecked(self.settings.get('advanced', {}).get('render_parallel', True))
+ perf_layout.addWidget(self.render_parallel_checkbox)
+
+ self.parallel_processing_checkbox = self._create_styled_checkbox("Enable parallel processing (experimental)")
+ self.parallel_processing_checkbox.setChecked(self.settings['advanced']['parallel_processing'])
+ self.parallel_processing_checkbox.toggled.connect(self._toggle_workers)
+ perf_layout.addWidget(self.parallel_processing_checkbox)
+
+ # Max workers
+ workers_frame = QWidget()
+ workers_layout = QHBoxLayout(workers_frame)
+ workers_layout.setContentsMargins(0, 0, 0, 0)
+ perf_layout.addWidget(workers_frame)
+
+ self.workers_label = QLabel("Max Workers:")
+ self.workers_label.setMinimumWidth(150)
+ workers_layout.addWidget(self.workers_label)
+
+ self.max_workers_spinbox = QSpinBox()
+ self.max_workers_spinbox.setRange(1, 999)
+ self.max_workers_spinbox.setValue(self.settings['advanced']['max_workers'])
+ workers_layout.addWidget(self.max_workers_spinbox)
+
+ self.workers_desc_label = QLabel("(threads for parallel processing)")
+ workers_layout.addWidget(self.workers_desc_label)
+ workers_layout.addStretch()
+
+ # Initialize workers state
+ self._toggle_workers()
+
+ # Memory management section
+ memory_group = QGroupBox("Memory Management")
+ main_layout.addWidget(memory_group)
+ memory_layout = QVBoxLayout(memory_group)
+ memory_layout.setContentsMargins(8, 8, 8, 6)
+ memory_layout.setSpacing(4)
+
+ # Singleton mode checkbox - will connect handler later after panel widgets created
+ self.use_singleton_models_checkbox = self._create_styled_checkbox("Use single model instances (saves RAM, only affects local models)")
+ self.use_singleton_models_checkbox.setChecked(self.settings.get('advanced', {}).get('use_singleton_models', True))
+ self.use_singleton_models_checkbox.toggled.connect(self._toggle_singleton_controls)
+ memory_layout.addWidget(self.use_singleton_models_checkbox)
+
+ # Singleton note
+ singleton_note = QLabel(
+ "When enabled: One bubble detector & one inpainter shared across all images.\n"
+ "When disabled: Each thread/image can have its own models (uses more RAM).\n"
+ "✅ Batch API translation remains fully functional with singleton mode enabled."
+ )
+ singleton_note_font = QFont('Arial', 9)
+ singleton_note.setFont(singleton_note_font)
+ singleton_note.setStyleSheet("color: gray;")
+ singleton_note.setWordWrap(True)
+ memory_layout.addWidget(singleton_note)
+
+ self.auto_cleanup_models_checkbox = self._create_styled_checkbox("Automatically cleanup models after translation to free RAM")
+ self.auto_cleanup_models_checkbox.setChecked(self.settings.get('advanced', {}).get('auto_cleanup_models', False))
+ memory_layout.addWidget(self.auto_cleanup_models_checkbox)
+
+ # Unload models after translation (disabled by default)
+ self.unload_models_checkbox = self._create_styled_checkbox("Unload models after translation (reset translator instance)")
+ self.unload_models_checkbox.setChecked(self.settings.get('advanced', {}).get('unload_models_after_translation', False))
+ memory_layout.addWidget(self.unload_models_checkbox)
+
+ # Add a note about parallel processing
+ note_label = QLabel("Note: When parallel panel translation is enabled, cleanup happens after ALL panels complete.")
+ note_font = QFont('Arial', 9)
+ note_label.setFont(note_font)
+ note_label.setStyleSheet("color: gray;")
+ note_label.setWordWrap(True)
+ memory_layout.addWidget(note_label)
+
+ # Panel-level parallel translation
+ panel_group = QGroupBox("Parallel Panel Translation")
+ main_layout.addWidget(panel_group)
+ panel_layout = QVBoxLayout(panel_group)
+ panel_layout.setContentsMargins(8, 8, 8, 6)
+ panel_layout.setSpacing(4)
+
+ # New: Preload local inpainting for panels (default ON)
+ self.preload_local_panels_checkbox = self._create_styled_checkbox("Preload local inpainting instances for panel-parallel runs")
+ self.preload_local_panels_checkbox.setChecked(self.settings.get('advanced', {}).get('preload_local_inpainting_for_panels', True))
+ panel_layout.addWidget(self.preload_local_panels_checkbox)
+
+ self.parallel_panel_checkbox = self._create_styled_checkbox("Enable parallel panel translation (process multiple images concurrently)")
+ self.parallel_panel_checkbox.setChecked(self.settings.get('advanced', {}).get('parallel_panel_translation', False))
+ self.parallel_panel_checkbox.toggled.connect(self._toggle_panel_controls)
+ panel_layout.addWidget(self.parallel_panel_checkbox)
+
+ # Local LLM Performance (add to performance group)
+ inpaint_perf_group = QGroupBox("Local LLM Performance")
+ perf_layout.addWidget(inpaint_perf_group)
+ inpaint_perf_layout = QVBoxLayout(inpaint_perf_group)
+ inpaint_perf_layout.setContentsMargins(8, 8, 8, 6)
+ inpaint_perf_layout.setSpacing(4)
+
+ # RT-DETR Concurrency (for memory optimization)
+ rtdetr_conc_widget = QWidget()
+ rtdetr_conc_layout = QHBoxLayout(rtdetr_conc_widget)
+ rtdetr_conc_layout.setContentsMargins(0, 0, 0, 0)
+ inpaint_perf_layout.addWidget(rtdetr_conc_widget)
+ self.rtdetr_conc_frame = rtdetr_conc_widget
+
+ rtdetr_conc_label = QLabel("RT-DETR Concurrency:")
+ rtdetr_conc_label.setMinimumWidth(150)
+ rtdetr_conc_layout.addWidget(rtdetr_conc_label)
+
+ self.rtdetr_max_concurrency_spinbox = QSpinBox()
+ self.rtdetr_max_concurrency_spinbox.setRange(1, 999)
+ self.rtdetr_max_concurrency_spinbox.setValue(self.settings['ocr'].get('rtdetr_max_concurrency', 12))
+ self.rtdetr_max_concurrency_spinbox.setToolTip("Maximum concurrent RT-DETR region OCR calls (rate limiting handled via delays)")
+ rtdetr_conc_layout.addWidget(self.rtdetr_max_concurrency_spinbox)
+
+ rtdetr_conc_desc = QLabel("parallel OCR calls (lower = less RAM)")
+ rtdetr_conc_desc_font = QFont('Arial', 9)
+ rtdetr_conc_desc.setFont(rtdetr_conc_desc_font)
+ rtdetr_conc_desc.setStyleSheet("color: gray;")
+ rtdetr_conc_layout.addWidget(rtdetr_conc_desc)
+ rtdetr_conc_layout.addStretch()
+
+ # Initially hide RT-DETR concurrency control until we check detector type
+ self.rtdetr_conc_frame.setVisible(False)
+
+ # Inpainting Concurrency
+ inpaint_bs_frame = QWidget()
+ inpaint_bs_layout = QHBoxLayout(inpaint_bs_frame)
+ inpaint_bs_layout.setContentsMargins(0, 0, 0, 0)
+ inpaint_perf_layout.addWidget(inpaint_bs_frame)
+
+ inpaint_bs_label = QLabel("Inpainting Concurrency:")
+ inpaint_bs_label.setMinimumWidth(150)
+ inpaint_bs_layout.addWidget(inpaint_bs_label)
+
+ self.inpaint_batch_size_spinbox = QSpinBox()
+ self.inpaint_batch_size_spinbox.setRange(1, 32)
+ self.inpaint_batch_size_spinbox.setValue(self.settings.get('inpainting', {}).get('batch_size', 10))
+ inpaint_bs_layout.addWidget(self.inpaint_batch_size_spinbox)
+
+ inpaint_bs_help = QLabel("(process multiple regions at once)")
+ inpaint_bs_help_font = QFont('Arial', 9)
+ inpaint_bs_help.setFont(inpaint_bs_help_font)
+ inpaint_bs_help.setStyleSheet("color: gray;")
+ inpaint_bs_layout.addWidget(inpaint_bs_help)
+ inpaint_bs_layout.addStretch()
+
+ self.enable_cache_checkbox = self._create_styled_checkbox("Enable inpainting cache (speeds up repeated processing)")
+ self.enable_cache_checkbox.setChecked(self.settings.get('inpainting', {}).get('enable_cache', True))
+ inpaint_perf_layout.addWidget(self.enable_cache_checkbox)
+
+ # Max concurrent panels
+ panels_frame = QWidget()
+ panels_layout = QHBoxLayout(panels_frame)
+ panels_layout.setContentsMargins(0, 0, 0, 0)
+ panel_layout.addWidget(panels_frame)
+
+ self.panels_label = QLabel("Max concurrent panels:")
+ self.panels_label.setMinimumWidth(150)
+ panels_layout.addWidget(self.panels_label)
+
+ self.panel_max_workers_spinbox = QSpinBox()
+ self.panel_max_workers_spinbox.setRange(1, 12)
+ self.panel_max_workers_spinbox.setValue(self.settings.get('advanced', {}).get('panel_max_workers', 2))
+ panels_layout.addWidget(self.panel_max_workers_spinbox)
+ panels_layout.addStretch()
+
+ # Panel start stagger (ms)
+ stagger_frame = QWidget()
+ stagger_layout = QHBoxLayout(stagger_frame)
+ stagger_layout.setContentsMargins(0, 0, 0, 0)
+ panel_layout.addWidget(stagger_frame)
+
+ self.stagger_label = QLabel("Panel start stagger:")
+ self.stagger_label.setMinimumWidth(150)
+ stagger_layout.addWidget(self.stagger_label)
+
+ self.panel_stagger_ms_spinbox = QSpinBox()
+ self.panel_stagger_ms_spinbox.setRange(0, 1000)
+ self.panel_stagger_ms_spinbox.setValue(self.settings.get('advanced', {}).get('panel_start_stagger_ms', 30))
+ stagger_layout.addWidget(self.panel_stagger_ms_spinbox)
+
+ self.stagger_unit_label = QLabel("ms")
+ stagger_layout.addWidget(self.stagger_unit_label)
+ stagger_layout.addStretch()
+
+ # Initialize panel controls state
+ self._toggle_panel_controls()
+ self._toggle_singleton_controls()
+
+ # ONNX conversion settings
+ onnx_group = QGroupBox("ONNX Conversion")
+ main_layout.addWidget(onnx_group)
+ onnx_layout = QVBoxLayout(onnx_group)
+ onnx_layout.setContentsMargins(8, 8, 8, 6)
+ onnx_layout.setSpacing(4)
+
+ self.auto_convert_onnx_checkbox = self._create_styled_checkbox("Auto-convert local models to ONNX for faster inference (recommended)")
+ self.auto_convert_onnx_checkbox.setChecked(self.settings['advanced'].get('auto_convert_to_onnx', False))
+ onnx_layout.addWidget(self.auto_convert_onnx_checkbox)
+
+ self.auto_convert_onnx_bg_checkbox = self._create_styled_checkbox("Convert in background (non-blocking; switches to ONNX when ready)")
+ self.auto_convert_onnx_bg_checkbox.setChecked(self.settings['advanced'].get('auto_convert_to_onnx_background', True))
+ onnx_layout.addWidget(self.auto_convert_onnx_bg_checkbox)
+
+ # Connect toggle handler
+ def _toggle_onnx_controls():
+ self.auto_convert_onnx_bg_checkbox.setEnabled(self.auto_convert_onnx_checkbox.isChecked())
+ self.auto_convert_onnx_checkbox.toggled.connect(_toggle_onnx_controls)
+ _toggle_onnx_controls()
+
+ # Model memory optimization (quantization)
+ quant_group = QGroupBox("Model Memory Optimization")
+ main_layout.addWidget(quant_group)
+ quant_layout = QVBoxLayout(quant_group)
+ quant_layout.setContentsMargins(8, 8, 8, 6)
+ quant_layout.setSpacing(4)
+
+ self.quantize_models_checkbox = self._create_styled_checkbox("Reduce RAM with quantized models (global switch)")
+ self.quantize_models_checkbox.setChecked(self.settings['advanced'].get('quantize_models', False))
+ quant_layout.addWidget(self.quantize_models_checkbox)
+
+ # ONNX quantize sub-toggle
+ onnx_quant_frame = QWidget()
+ onnx_quant_layout = QHBoxLayout(onnx_quant_frame)
+ onnx_quant_layout.setContentsMargins(0, 0, 0, 0)
+ quant_layout.addWidget(onnx_quant_frame)
+
+ self.onnx_quantize_checkbox = self._create_styled_checkbox("Quantize ONNX models to INT8 (dynamic)")
+ self.onnx_quantize_checkbox.setChecked(self.settings['advanced'].get('onnx_quantize', False))
+ onnx_quant_layout.addWidget(self.onnx_quantize_checkbox)
+
+ onnx_quant_help = QLabel("(lower RAM/CPU; slight accuracy trade-off)")
+ onnx_quant_help_font = QFont('Arial', 9)
+ onnx_quant_help.setFont(onnx_quant_help_font)
+ onnx_quant_help.setStyleSheet("color: gray;")
+ onnx_quant_layout.addWidget(onnx_quant_help)
+ onnx_quant_layout.addStretch()
+
+ # Torch precision dropdown
+ precision_frame = QWidget()
+ precision_layout = QHBoxLayout(precision_frame)
+ precision_layout.setContentsMargins(0, 0, 0, 0)
+ quant_layout.addWidget(precision_frame)
+
+ precision_label = QLabel("Torch precision:")
+ precision_label.setMinimumWidth(150)
+ precision_layout.addWidget(precision_label)
+
+ self.torch_precision_combo = QComboBox()
+ self.torch_precision_combo.addItems(['fp16', 'fp32', 'auto'])
+ self.torch_precision_combo.setCurrentText(self.settings['advanced'].get('torch_precision', 'fp16'))
+ precision_layout.addWidget(self.torch_precision_combo)
+
+ precision_help = QLabel("(fp16 only, since fp32 is currently bugged)")
+ precision_help_font = QFont('Arial', 9)
+ precision_help.setFont(precision_help_font)
+ precision_help.setStyleSheet("color: gray;")
+ precision_layout.addWidget(precision_help)
+ precision_layout.addStretch()
+
+ # Aggressive memory cleanup
+ cleanup_group = QGroupBox("Memory & Cleanup")
+ main_layout.addWidget(cleanup_group)
+ cleanup_layout = QVBoxLayout(cleanup_group)
+ cleanup_layout.setContentsMargins(8, 8, 8, 6)
+ cleanup_layout.setSpacing(4)
+
+ self.force_deep_cleanup_checkbox = self._create_styled_checkbox("Force deep model cleanup after every image (slowest, lowest RAM)")
+ self.force_deep_cleanup_checkbox.setChecked(self.settings.get('advanced', {}).get('force_deep_cleanup_each_image', False))
+ cleanup_layout.addWidget(self.force_deep_cleanup_checkbox)
+
+ cleanup_help = QLabel("Also clears shared caches at batch end.")
+ cleanup_help_font = QFont('Arial', 9)
+ cleanup_help.setFont(cleanup_help_font)
+ cleanup_help.setStyleSheet("color: gray;")
+ cleanup_layout.addWidget(cleanup_help)
+
+ # RAM cap controls
+ self.ram_cap_enabled_checkbox = self._create_styled_checkbox("Enable RAM cap")
+ self.ram_cap_enabled_checkbox.setChecked(self.settings.get('advanced', {}).get('ram_cap_enabled', False))
+ cleanup_layout.addWidget(self.ram_cap_enabled_checkbox)
+
+ # RAM cap value
+ ramcap_value_frame = QWidget()
+ ramcap_value_layout = QHBoxLayout(ramcap_value_frame)
+ ramcap_value_layout.setContentsMargins(0, 0, 0, 0)
+ cleanup_layout.addWidget(ramcap_value_frame)
+
+ ramcap_value_label = QLabel("Max RAM (MB):")
+ ramcap_value_label.setMinimumWidth(150)
+ ramcap_value_layout.addWidget(ramcap_value_label)
+
+ self.ram_cap_mb_spinbox = QSpinBox()
+ self.ram_cap_mb_spinbox.setRange(512, 131072)
+ self.ram_cap_mb_spinbox.setValue(int(self.settings.get('advanced', {}).get('ram_cap_mb', 0) or 0))
+ ramcap_value_layout.addWidget(self.ram_cap_mb_spinbox)
+
+ ramcap_value_help = QLabel("(0 = disabled)")
+ ramcap_value_help_font = QFont('Arial', 9)
+ ramcap_value_help.setFont(ramcap_value_help_font)
+ ramcap_value_help.setStyleSheet("color: gray;")
+ ramcap_value_layout.addWidget(ramcap_value_help)
+ ramcap_value_layout.addStretch()
+
+ # RAM cap mode
+ ramcap_mode_frame = QWidget()
+ ramcap_mode_layout = QHBoxLayout(ramcap_mode_frame)
+ ramcap_mode_layout.setContentsMargins(0, 0, 0, 0)
+ cleanup_layout.addWidget(ramcap_mode_frame)
+
+ ramcap_mode_label = QLabel("Cap mode:")
+ ramcap_mode_label.setMinimumWidth(150)
+ ramcap_mode_layout.addWidget(ramcap_mode_label)
+
+ self.ram_cap_mode_combo = QComboBox()
+ self.ram_cap_mode_combo.addItems(['soft', 'hard (Windows only)'])
+ self.ram_cap_mode_combo.setCurrentText(self.settings.get('advanced', {}).get('ram_cap_mode', 'soft'))
+ ramcap_mode_layout.addWidget(self.ram_cap_mode_combo)
+
+ ramcap_mode_help = QLabel("Soft = clean/trim, Hard = OS-enforced (may OOM)")
+ ramcap_mode_help_font = QFont('Arial', 9)
+ ramcap_mode_help.setFont(ramcap_mode_help_font)
+ ramcap_mode_help.setStyleSheet("color: gray;")
+ ramcap_mode_layout.addWidget(ramcap_mode_help)
+ ramcap_mode_layout.addStretch()
+
+ # Advanced RAM gate tuning
+ gate_frame = QWidget()
+ gate_layout = QHBoxLayout(gate_frame)
+ gate_layout.setContentsMargins(0, 0, 0, 0)
+ cleanup_layout.addWidget(gate_frame)
+
+ gate_label = QLabel("Gate timeout (sec):")
+ gate_label.setMinimumWidth(150)
+ gate_layout.addWidget(gate_label)
+
+ self.ram_gate_timeout_spinbox = QDoubleSpinBox()
+ self.ram_gate_timeout_spinbox.setRange(2.0, 60.0)
+ self.ram_gate_timeout_spinbox.setSingleStep(0.5)
+ self.ram_gate_timeout_spinbox.setValue(float(self.settings.get('advanced', {}).get('ram_gate_timeout_sec', 10.0)))
+ gate_layout.addWidget(self.ram_gate_timeout_spinbox)
+ gate_layout.addStretch()
+
+ # Gate floor
+ floor_frame = QWidget()
+ floor_layout = QHBoxLayout(floor_frame)
+ floor_layout.setContentsMargins(0, 0, 0, 0)
+ cleanup_layout.addWidget(floor_frame)
+
+ floor_label = QLabel("Gate floor over baseline (MB):")
+ floor_label.setMinimumWidth(180)
+ floor_layout.addWidget(floor_label)
+
+ self.ram_gate_floor_spinbox = QSpinBox()
+ self.ram_gate_floor_spinbox.setRange(64, 2048)
+ self.ram_gate_floor_spinbox.setValue(int(self.settings.get('advanced', {}).get('ram_min_floor_over_baseline_mb', 128)))
+ floor_layout.addWidget(self.ram_gate_floor_spinbox)
+ floor_layout.addStretch()
+
+ # Update RT-DETR concurrency control visibility based on current detector type
+ # This is called after the Advanced tab is fully created to sync with OCR tab state
+ QTimer.singleShot(0, self._sync_rtdetr_concurrency_visibility)
+
+ def _sync_rtdetr_concurrency_visibility(self):
+ """Sync RT-DETR concurrency control visibility with detector type selection"""
+ if hasattr(self, 'detector_type_combo') and hasattr(self, 'rtdetr_conc_frame'):
+ detector = self.detector_type_combo.currentText()
+ is_rtdetr = 'RT-DETR' in detector or 'RTEDR_onnx' in detector
+ self.rtdetr_conc_frame.setVisible(is_rtdetr)
+
+ def _toggle_workers(self):
+ """Enable/disable worker settings based on parallel processing toggle"""
+ if hasattr(self, 'parallel_processing_checkbox'):
+ enabled = bool(self.parallel_processing_checkbox.isChecked())
+ if hasattr(self, 'max_workers_spinbox'):
+ self.max_workers_spinbox.setEnabled(enabled)
+ if hasattr(self, 'workers_label'):
+ self.workers_label.setEnabled(enabled)
+ self.workers_label.setStyleSheet("color: white;" if enabled else "color: gray;")
+ if hasattr(self, 'workers_desc_label'):
+ self.workers_desc_label.setEnabled(enabled)
+ self.workers_desc_label.setStyleSheet("color: white;" if enabled else "color: gray;")
+
+ def _toggle_singleton_controls(self):
+ """Enable/disable parallel panel translation based on singleton toggle."""
+ # When singleton mode is ENABLED, parallel panel translation should be DISABLED
+ try:
+ singleton_enabled = bool(self.use_singleton_models_checkbox.isChecked())
+ except Exception:
+ singleton_enabled = True # Default
+
+ # Disable parallel panel checkbox when singleton is enabled
+ if hasattr(self, 'parallel_panel_checkbox'):
+ self.parallel_panel_checkbox.setEnabled(not singleton_enabled)
+ if singleton_enabled:
+ # Also gray out the label when disabled
+ pass # The checkbox itself shows as disabled
+
+ def _toggle_panel_controls(self):
+ """Enable/disable panel control fields based on parallel panel toggle."""
+ try:
+ enabled = bool(self.parallel_panel_checkbox.isChecked())
+ except Exception:
+ enabled = False
+
+ # Enable/disable panel control widgets and their labels
+ panel_widgets = [
+ ('panel_max_workers_spinbox', 'panels_label'),
+ ('panel_stagger_ms_spinbox', 'stagger_label', 'stagger_unit_label'),
+ ('preload_local_panels_checkbox',) # Add preload checkbox
+ ]
+
+ for widget_names in panel_widgets:
+ for widget_name in widget_names:
+ try:
+ widget = getattr(self, widget_name, None)
+ if widget is not None:
+ # Just use setEnabled() - stylesheet handles visuals
+ widget.setEnabled(enabled)
+ except Exception:
+ pass
+
+ def _apply_defaults_to_controls(self):
+ """Apply default values to all visible Tk variables/controls across tabs without rebuilding the dialog."""
+ try:
+ # Use current in-memory settings (which we set to defaults above)
+ s = self.settings if isinstance(getattr(self, 'settings', None), dict) else self.default_settings
+ pre = s.get('preprocessing', {})
+ comp = s.get('compression', {})
+ ocr = s.get('ocr', {})
+ adv = s.get('advanced', {})
+ inp = s.get('inpainting', {})
+ font = s.get('font_sizing', {})
+
+ # Preprocessing
+ if hasattr(self, 'preprocess_enabled'): self.preprocess_enabled.set(bool(pre.get('enabled', False)))
+ if hasattr(self, 'auto_detect'): self.auto_detect.set(bool(pre.get('auto_detect_quality', True)))
+ if hasattr(self, 'contrast_threshold'): self.contrast_threshold.set(float(pre.get('contrast_threshold', 0.4)))
+ if hasattr(self, 'sharpness_threshold'): self.sharpness_threshold.set(float(pre.get('sharpness_threshold', 0.3)))
+ if hasattr(self, 'enhancement_strength'): self.enhancement_strength.set(float(pre.get('enhancement_strength', 1.5)))
+ if hasattr(self, 'noise_threshold'): self.noise_threshold.set(int(pre.get('noise_threshold', 20)))
+ if hasattr(self, 'denoise_strength'): self.denoise_strength.set(int(pre.get('denoise_strength', 10)))
+ if hasattr(self, 'max_dimension'): self.max_dimension.set(int(pre.get('max_image_dimension', 2000)))
+ if hasattr(self, 'max_pixels'): self.max_pixels.set(int(pre.get('max_image_pixels', 2000000)))
+ if hasattr(self, 'chunk_height'): self.chunk_height.set(int(pre.get('chunk_height', 1000)))
+ if hasattr(self, 'chunk_overlap'): self.chunk_overlap.set(int(pre.get('chunk_overlap', 100)))
+ # Compression
+ if hasattr(self, 'compression_enabled_var'): self.compression_enabled_var.set(bool(comp.get('enabled', False)))
+ if hasattr(self, 'compression_format_var'): self.compression_format_var.set(str(comp.get('format', 'jpeg')))
+ if hasattr(self, 'jpeg_quality_var'): self.jpeg_quality_var.set(int(comp.get('jpeg_quality', 85)))
+ if hasattr(self, 'png_level_var'): self.png_level_var.set(int(comp.get('png_compress_level', 6)))
+ if hasattr(self, 'webp_quality_var'): self.webp_quality_var.set(int(comp.get('webp_quality', 85)))
+ # Tiling
+ if hasattr(self, 'inpaint_tiling_enabled'): self.inpaint_tiling_enabled.set(bool(pre.get('inpaint_tiling_enabled', False)))
+ if hasattr(self, 'inpaint_tile_size'): self.inpaint_tile_size.set(int(pre.get('inpaint_tile_size', 512)))
+ if hasattr(self, 'inpaint_tile_overlap'): self.inpaint_tile_overlap.set(int(pre.get('inpaint_tile_overlap', 64)))
+
+ # OCR basic
+ if hasattr(self, 'confidence_threshold'): self.confidence_threshold.set(float(ocr.get('confidence_threshold', 0.7)))
+ if hasattr(self, 'detection_mode'): self.detection_mode.set(str(ocr.get('text_detection_mode', 'document')))
+ if hasattr(self, 'merge_nearby_threshold'): self.merge_nearby_threshold.set(int(ocr.get('merge_nearby_threshold', 20)))
+ if hasattr(self, 'enable_rotation'): self.enable_rotation.set(bool(ocr.get('enable_rotation_correction', True)))
+
+ # Language checkboxes
+ try:
+ if hasattr(self, 'lang_vars') and isinstance(self.lang_vars, dict):
+ langs = set(ocr.get('language_hints', ['ja', 'ko', 'zh']))
+ for code, var in self.lang_vars.items():
+ var.set(code in langs)
+ except Exception:
+ pass
+
+ # OCR batching/locality
+ if hasattr(self, 'ocr_batch_enabled_var'): self.ocr_batch_enabled_var.set(bool(ocr.get('ocr_batch_enabled', True)))
+ if hasattr(self, 'ocr_batch_size_var'): self.ocr_batch_size_var.set(int(ocr.get('ocr_batch_size', 8)))
+ if hasattr(self, 'ocr_max_conc_var'): self.ocr_max_conc_var.set(int(ocr.get('ocr_max_concurrency', 2)))
+ if hasattr(self, 'roi_locality_var'): self.roi_locality_var.set(bool(ocr.get('roi_locality_enabled', False)))
+ if hasattr(self, 'roi_padding_ratio_var'): self.roi_padding_ratio_var.set(float(ocr.get('roi_padding_ratio', 0.08)))
+ if hasattr(self, 'roi_min_side_var'): self.roi_min_side_var.set(int(ocr.get('roi_min_side_px', 12)))
+ if hasattr(self, 'roi_min_area_var'): self.roi_min_area_var.set(int(ocr.get('roi_min_area_px', 100)))
+ if hasattr(self, 'roi_max_side_var'): self.roi_max_side_var.set(int(ocr.get('roi_max_side', 0)))
+
+ # English filters
+ if hasattr(self, 'exclude_english_var'): self.exclude_english_var.set(bool(ocr.get('exclude_english_text', False)))
+ if hasattr(self, 'english_exclude_threshold'): self.english_exclude_threshold.set(float(ocr.get('english_exclude_threshold', 0.7)))
+ if hasattr(self, 'english_exclude_min_chars'): self.english_exclude_min_chars.set(int(ocr.get('english_exclude_min_chars', 4)))
+ if hasattr(self, 'english_exclude_short_tokens'): self.english_exclude_short_tokens.set(bool(ocr.get('english_exclude_short_tokens', False)))
+
+ # Azure
+ if hasattr(self, 'azure_merge_multiplier'): self.azure_merge_multiplier.set(float(ocr.get('azure_merge_multiplier', 3.0)))
+ if hasattr(self, 'azure_reading_order'): self.azure_reading_order.set(str(ocr.get('azure_reading_order', 'natural')))
+ if hasattr(self, 'azure_model_version'): self.azure_model_version.set(str(ocr.get('azure_model_version', 'latest')))
+ if hasattr(self, 'azure_max_wait'): self.azure_max_wait.set(int(ocr.get('azure_max_wait', 60)))
+ if hasattr(self, 'azure_poll_interval'): self.azure_poll_interval.set(float(ocr.get('azure_poll_interval', 0.5)))
+ try:
+ self._update_azure_label()
+ except Exception:
+ pass
+
+ # Bubble detector
+ if hasattr(self, 'bubble_detection_enabled'): self.bubble_detection_enabled.set(bool(ocr.get('bubble_detection_enabled', False)))
+ # Detector type mapping to UI labels
+ if hasattr(self, 'detector_type'):
+ dt = str(ocr.get('detector_type', 'rtdetr_onnx'))
+ if dt == 'rtdetr_onnx': self.detector_type.set('RTEDR_onnx')
+ elif dt == 'rtdetr': self.detector_type.set('RT-DETR')
+ elif dt == 'yolo': self.detector_type.set('YOLOv8 Speech')
+ elif dt == 'custom': self.detector_type.set('Custom Model')
+ else: self.detector_type.set('RTEDR_onnx')
+ if hasattr(self, 'bubble_model_path'): self.bubble_model_path.set(str(ocr.get('bubble_model_path', '')))
+ if hasattr(self, 'bubble_confidence'): self.bubble_confidence.set(float(ocr.get('bubble_confidence', 0.5)))
+ if hasattr(self, 'detect_empty_bubbles'): self.detect_empty_bubbles.set(bool(ocr.get('detect_empty_bubbles', True)))
+ if hasattr(self, 'detect_text_bubbles'): self.detect_text_bubbles.set(bool(ocr.get('detect_text_bubbles', True)))
+ if hasattr(self, 'detect_free_text'): self.detect_free_text.set(bool(ocr.get('detect_free_text', True)))
+ if hasattr(self, 'bubble_max_det_yolo_var'): self.bubble_max_det_yolo_var.set(int(ocr.get('bubble_max_detections_yolo', 100)))
+
+ # Inpainting
+ if hasattr(self, 'inpaint_batch_size'): self.inpaint_batch_size.set(int(inp.get('batch_size', 1)))
+ if hasattr(self, 'enable_cache_var'): self.enable_cache_var.set(bool(inp.get('enable_cache', True)))
+ if hasattr(self, 'mask_dilation_var'): self.mask_dilation_var.set(int(s.get('mask_dilation', 0)))
+ if hasattr(self, 'use_all_iterations_var'): self.use_all_iterations_var.set(bool(s.get('use_all_iterations', True)))
+ if hasattr(self, 'all_iterations_var'): self.all_iterations_var.set(int(s.get('all_iterations', 2)))
+ if hasattr(self, 'text_bubble_iterations_var'): self.text_bubble_iterations_var.set(int(s.get('text_bubble_dilation_iterations', 2)))
+ if hasattr(self, 'empty_bubble_iterations_var'): self.empty_bubble_iterations_var.set(int(s.get('empty_bubble_dilation_iterations', 3)))
+ if hasattr(self, 'free_text_iterations_var'): self.free_text_iterations_var.set(int(s.get('free_text_dilation_iterations', 0)))
+
+ # Advanced
+ if hasattr(self, 'format_detection'): self.format_detection.set(1 if adv.get('format_detection', True) else 0)
+ if hasattr(self, 'webtoon_mode'): self.webtoon_mode.set(str(adv.get('webtoon_mode', 'auto')))
+ if hasattr(self, 'debug_mode'): self.debug_mode.set(1 if adv.get('debug_mode', False) else 0)
+ if hasattr(self, 'save_intermediate'): self.save_intermediate.set(1 if adv.get('save_intermediate', False) else 0)
+ if hasattr(self, 'parallel_processing'): self.parallel_processing.set(1 if adv.get('parallel_processing', False) else 0)
+ if hasattr(self, 'max_workers'): self.max_workers.set(int(adv.get('max_workers', 4)))
+ if hasattr(self, 'use_singleton_models'): self.use_singleton_models.set(bool(adv.get('use_singleton_models', True)))
+ if hasattr(self, 'auto_cleanup_models'): self.auto_cleanup_models.set(bool(adv.get('auto_cleanup_models', False)))
+ if hasattr(self, 'unload_models_var'): self.unload_models_var.set(bool(adv.get('unload_models_after_translation', False)))
+ if hasattr(self, 'parallel_panel_var'): self.parallel_panel_var.set(bool(adv.get('parallel_panel_translation', False)))
+ if hasattr(self, 'panel_max_workers_var'): self.panel_max_workers_var.set(int(adv.get('panel_max_workers', 2)))
+ if hasattr(self, 'panel_stagger_ms_var'): self.panel_stagger_ms_var.set(int(adv.get('panel_start_stagger_ms', 30)))
+ # New: preload local inpainting for parallel panels (default True)
+ if hasattr(self, 'preload_local_panels_var'): self.preload_local_panels_var.set(bool(adv.get('preload_local_inpainting_for_panels', True)))
+ if hasattr(self, 'auto_convert_onnx_var'): self.auto_convert_onnx_var.set(bool(adv.get('auto_convert_to_onnx', False)))
+ if hasattr(self, 'auto_convert_onnx_bg_var'): self.auto_convert_onnx_bg_var.set(bool(adv.get('auto_convert_to_onnx_background', True)))
+ if hasattr(self, 'quantize_models_var'): self.quantize_models_var.set(bool(adv.get('quantize_models', False)))
+ if hasattr(self, 'onnx_quantize_var'): self.onnx_quantize_var.set(bool(adv.get('onnx_quantize', False)))
+ if hasattr(self, 'torch_precision_var'): self.torch_precision_var.set(str(adv.get('torch_precision', 'auto')))
+
+ # Font sizing tab
+ if hasattr(self, 'font_algorithm_var'): self.font_algorithm_var.set(str(font.get('algorithm', 'smart')))
+ if hasattr(self, 'min_font_size_var'): self.min_font_size_var.set(int(font.get('min_size', 10)))
+ if hasattr(self, 'max_font_size_var'): self.max_font_size_var.set(int(font.get('max_size', 40)))
+ if hasattr(self, 'min_readable_var'): self.min_readable_var.set(int(font.get('min_readable', 14)))
+ if hasattr(self, 'prefer_larger_var'): self.prefer_larger_var.set(bool(font.get('prefer_larger', True)))
+ if hasattr(self, 'bubble_size_factor_var'): self.bubble_size_factor_var.set(bool(font.get('bubble_size_factor', True)))
+ if hasattr(self, 'line_spacing_var'): self.line_spacing_var.set(float(font.get('line_spacing', 1.3)))
+ if hasattr(self, 'max_lines_var'): self.max_lines_var.set(int(font.get('max_lines', 10)))
+ try:
+ if hasattr(self, '_on_font_mode_change'):
+ self._on_font_mode_change()
+ except Exception:
+ pass
+
+ # Rendering controls (if present in this dialog)
+ if hasattr(self, 'font_size_mode_var'): self.font_size_mode_var.set(str(s.get('rendering', {}).get('font_size_mode', 'auto')))
+ if hasattr(self, 'fixed_font_size_var'): self.fixed_font_size_var.set(int(s.get('rendering', {}).get('fixed_font_size', 16)))
+ if hasattr(self, 'font_scale_var'): self.font_scale_var.set(float(s.get('rendering', {}).get('font_scale', 1.0)))
+ if hasattr(self, 'auto_fit_style_var'): self.auto_fit_style_var.set(str(s.get('rendering', {}).get('auto_fit_style', 'balanced')))
+
+ # Cloud API tab
+ if hasattr(self, 'cloud_model_var'): self.cloud_model_var.set(str(s.get('cloud_inpaint_model', 'ideogram-v2')))
+ if hasattr(self, 'custom_version_var'): self.custom_version_var.set(str(s.get('cloud_custom_version', '')))
+ if hasattr(self, 'cloud_prompt_var'): self.cloud_prompt_var.set(str(s.get('cloud_inpaint_prompt', 'clean background, smooth surface')))
+ if hasattr(self, 'cloud_negative_prompt_var'): self.cloud_negative_prompt_var.set(str(s.get('cloud_negative_prompt', 'text, writing, letters')))
+ if hasattr(self, 'cloud_steps_var'): self.cloud_steps_var.set(int(s.get('cloud_inference_steps', 20)))
+ if hasattr(self, 'cloud_timeout_var'): self.cloud_timeout_var.set(int(s.get('cloud_timeout', 60)))
+
+ # Trigger dependent UI updates
+ try:
+ self._toggle_preprocessing()
+ except Exception:
+ pass
+ try:
+ if hasattr(self, '_on_cloud_model_change'):
+ self._on_cloud_model_change()
+ except Exception:
+ pass
+ try:
+ self._toggle_iteration_controls()
+ except Exception:
+ pass
+ try:
+ self._toggle_roi_locality_controls()
+ except Exception:
+ pass
+ try:
+ self._toggle_workers()
+ except Exception:
+ pass
+
+ # Build/attach advanced control for local inpainting preload if not present
+ try:
+ if not hasattr(self, 'preload_local_panels_var') and hasattr(self, '_create_advanced_tab_ui'):
+ # If there is a helper to build advanced UI, we rely on it. Otherwise, attach to existing advanced frame if available.
+ pass
+ except Exception:
+ pass
+ try:
+ if hasattr(self, 'compression_format_combo'):
+ self._toggle_compression_format()
+ except Exception:
+ pass
+ try:
+ if hasattr(self, 'detector_type'):
+ self._on_detector_type_changed()
+ except Exception:
+ pass
+ try:
+ self.dialog.update_idletasks()
+ except Exception:
+ pass
+ except Exception:
+ # Best-effort application only
+ pass
+
+
+ def _set_font_preset(self, preset: str):
+ """Apply font sizing preset"""
+ if preset == 'small':
+ # For manga with small bubbles
+ self.font_algorithm_var.set('conservative')
+ self.min_font_size_var.set(8)
+ self.max_font_size_var.set(24)
+ self.min_readable_var.set(12)
+ self.prefer_larger_var.set(False)
+ self.bubble_size_factor_var.set(True)
+ self.line_spacing_var.set(1.2)
+ self.max_lines_var.set(8)
+ elif preset == 'balanced':
+ # Default balanced settings
+ self.font_algorithm_var.set('smart')
+ self.min_font_size_var.set(10)
+ self.max_font_size_var.set(40)
+ self.min_readable_var.set(14)
+ self.prefer_larger_var.set(True)
+ self.bubble_size_factor_var.set(True)
+ self.line_spacing_var.set(1.3)
+ self.max_lines_var.set(10)
+ elif preset == 'large':
+ # For maximum readability
+ self.font_algorithm_var.set('aggressive')
+ self.min_font_size_var.set(14)
+ self.max_font_size_var.set(50)
+ self.min_readable_var.set(16)
+ self.prefer_larger_var.set(True)
+ self.bubble_size_factor_var.set(False)
+ self.line_spacing_var.set(1.4)
+ self.max_lines_var.set(12)
+
+
+ def _save_rendering_settings(self, *args):
+ """Auto-save font and rendering settings when controls change"""
+ # Don't save during initialization
+ if hasattr(self, '_initializing') and self._initializing:
+ return
+
+ try:
+ # Ensure rendering section exists in settings
+ if 'rendering' not in self.settings:
+ self.settings['rendering'] = {}
+
+ # Save font size controls if they exist
+ if hasattr(self, 'font_size_mode_var'):
+ self.settings['rendering']['font_size_mode'] = self.font_size_mode_var.get()
+ self.settings['rendering']['fixed_font_size'] = self.fixed_font_size_var.get()
+ self.settings['rendering']['font_scale'] = self.font_scale_var.get()
+ self.settings['rendering']['auto_fit_style'] = self.auto_fit_style_var.get()
+
+ # Save min/max for auto mode
+ if hasattr(self, 'min_font_size_var'):
+ self.settings['rendering']['auto_min_size'] = self.min_font_size_var.get()
+ if hasattr(self, 'max_font_size_var'):
+ self.settings['rendering']['auto_max_size'] = self.max_font_size_var.get()
+
+ # Update config
+ self.config['manga_settings'] = self.settings
+
+ # Mirror only auto max to top-level config for backward compatibility; keep min nested
+ try:
+ auto_max = self.settings.get('rendering', {}).get('auto_max_size', None)
+ if auto_max is not None:
+ self.config['manga_max_font_size'] = int(auto_max)
+ except Exception:
+ pass
+
+ # Save to file immediately
+ if hasattr(self.main_gui, 'save_config'):
+ self.main_gui.save_config()
+ print(f"Auto-saved rendering settings")
+ time.sleep(0.1) # Brief pause for stability
+ print("💤 Auto-save pausing briefly for stability")
+
+ except Exception as e:
+ print(f"Error auto-saving rendering settings: {e}")
+
+ def _save_settings(self):
+ """Save all settings including expanded iteration controls"""
+ try:
+ # Collect all preprocessing settings
+ self.settings['preprocessing']['enabled'] = self.preprocess_enabled.isChecked()
+ self.settings['preprocessing']['auto_detect_quality'] = self.auto_detect.isChecked()
+ self.settings['preprocessing']['contrast_threshold'] = self.contrast_threshold.value()
+ self.settings['preprocessing']['sharpness_threshold'] = self.sharpness_threshold.value()
+ self.settings['preprocessing']['enhancement_strength'] = self.enhancement_strength.value()
+ self.settings['preprocessing']['noise_threshold'] = self.noise_threshold.value()
+ self.settings['preprocessing']['denoise_strength'] = self.denoise_strength.value()
+ self.settings['preprocessing']['max_image_dimension'] = self.dimension_spinbox.value()
+ self.settings['preprocessing']['max_image_pixels'] = self.pixels_spinbox.value()
+ self.settings['preprocessing']['chunk_height'] = self.chunk_height_spinbox.value()
+ self.settings['preprocessing']['chunk_overlap'] = self.chunk_overlap_spinbox.value()
+
+ # Compression (saved separately from preprocessing)
+ if 'compression' not in self.settings:
+ self.settings['compression'] = {}
+ self.settings['compression']['enabled'] = bool(self.compression_enabled.isChecked())
+ self.settings['compression']['format'] = str(self.compression_format_combo.currentText())
+ self.settings['compression']['jpeg_quality'] = int(self.jpeg_quality_spin.value())
+ self.settings['compression']['png_compress_level'] = int(self.png_level_spin.value())
+ self.settings['compression']['webp_quality'] = int(self.webp_quality_spin.value())
+
+ # TILING SETTINGS - save under preprocessing (primary) and mirror under 'tiling' for backward compatibility
+ self.settings['preprocessing']['inpaint_tiling_enabled'] = self.inpaint_tiling_enabled.isChecked()
+ self.settings['preprocessing']['inpaint_tile_size'] = self.tile_size_spinbox.value()
+ self.settings['preprocessing']['inpaint_tile_overlap'] = self.tile_overlap_spinbox.value()
+ # Back-compat mirror
+ self.settings['tiling'] = {
+ 'enabled': self.inpaint_tiling_enabled.isChecked(),
+ 'tile_size': self.tile_size_spinbox.value(),
+ 'tile_overlap': self.tile_overlap_spinbox.value()
+ }
+
+ # OCR settings
+ self.settings['ocr']['language_hints'] = [code for code, checkbox in self.lang_checkboxes.items() if checkbox.isChecked()]
+ self.settings['ocr']['confidence_threshold'] = self.confidence_threshold_slider.value() / 100.0
+ self.settings['ocr']['text_detection_mode'] = self.detection_mode_combo.currentText()
+ self.settings['ocr']['merge_nearby_threshold'] = self.merge_nearby_threshold_spinbox.value()
+ self.settings['ocr']['enable_rotation_correction'] = self.enable_rotation_checkbox.isChecked()
+ self.settings['ocr']['azure_merge_multiplier'] = self.azure_merge_multiplier_slider.value() / 100.0
+ self.settings['ocr']['azure_reading_order'] = self.azure_reading_order_combo.currentText()
+ self.settings['ocr']['azure_model_version'] = self.azure_model_version_combo.currentText()
+ self.settings['ocr']['azure_max_wait'] = self.azure_max_wait_spinbox.value()
+ self.settings['ocr']['azure_poll_interval'] = self.azure_poll_interval_slider.value() / 100.0
+ self.settings['ocr']['min_text_length'] = self.min_text_length_spinbox.value()
+ self.settings['ocr']['exclude_english_text'] = self.exclude_english_checkbox.isChecked()
+
+ # OCR batching & locality
+ self.settings['ocr']['ocr_batch_enabled'] = bool(self.ocr_batch_enabled_checkbox.isChecked())
+ self.settings['ocr']['ocr_batch_size'] = int(self.ocr_batch_size_spinbox.value())
+ self.settings['ocr']['ocr_max_concurrency'] = int(self.ocr_max_conc_spinbox.value())
+ self.settings['ocr']['roi_locality_enabled'] = bool(self.roi_locality_checkbox.isChecked())
+ self.settings['ocr']['roi_padding_ratio'] = float(self.roi_padding_ratio_slider.value() / 100.0)
+ self.settings['ocr']['roi_min_side_px'] = int(self.roi_min_side_spinbox.value())
+ self.settings['ocr']['roi_min_area_px'] = int(self.roi_min_area_spinbox.value())
+ self.settings['ocr']['roi_max_side'] = int(self.roi_max_side_spinbox.value())
+ self.settings['ocr']['english_exclude_threshold'] = self.english_exclude_threshold_slider.value() / 100.0
+ self.settings['ocr']['english_exclude_min_chars'] = self.english_exclude_min_chars_spinbox.value()
+ self.settings['ocr']['english_exclude_short_tokens'] = self.english_exclude_short_tokens_checkbox.isChecked()
+
+ # Bubble detection settings
+ self.settings['ocr']['bubble_detection_enabled'] = self.bubble_detection_enabled_checkbox.isChecked()
+ self.settings['ocr']['use_rtdetr_for_ocr_regions'] = self.use_rtdetr_for_ocr_checkbox.isChecked() # NEW: RT-DETR for OCR guidance
+ self.settings['ocr']['bubble_model_path'] = self.bubble_model_entry.text()
+ self.settings['ocr']['bubble_confidence'] = self.bubble_conf_slider.value() / 100.0
+ self.settings['ocr']['rtdetr_confidence'] = self.bubble_conf_slider.value() / 100.0
+ self.settings['ocr']['detect_empty_bubbles'] = self.detect_empty_bubbles_checkbox.isChecked()
+ self.settings['ocr']['detect_text_bubbles'] = self.detect_text_bubbles_checkbox.isChecked()
+ self.settings['ocr']['detect_free_text'] = self.detect_free_text_checkbox.isChecked()
+ self.settings['ocr']['rtdetr_model_url'] = self.bubble_model_entry.text()
+ self.settings['ocr']['bubble_max_detections_yolo'] = int(self.bubble_max_det_yolo_spinbox.value())
+ self.settings['ocr']['rtdetr_max_concurrency'] = int(self.rtdetr_max_concurrency_spinbox.value())
+
+ # Save the detector type properly
+ detector_display = self.detector_type_combo.currentText()
+ if 'RTEDR_onnx' in detector_display or 'ONNX' in detector_display.upper():
+ self.settings['ocr']['detector_type'] = 'rtdetr_onnx'
+ elif 'RT-DETR' in detector_display:
+ self.settings['ocr']['detector_type'] = 'rtdetr'
+ elif 'YOLOv8' in detector_display:
+ self.settings['ocr']['detector_type'] = 'yolo'
+ elif detector_display == 'Custom Model':
+ self.settings['ocr']['detector_type'] = 'custom'
+ self.settings['ocr']['custom_model_path'] = self.bubble_model_entry.text()
+ else:
+ self.settings['ocr']['detector_type'] = 'rtdetr_onnx'
+
+ # Inpainting settings
+ if 'inpainting' not in self.settings:
+ self.settings['inpainting'] = {}
+ self.settings['inpainting']['batch_size'] = self.inpaint_batch_size_spinbox.value()
+ self.settings['inpainting']['enable_cache'] = self.enable_cache_checkbox.isChecked()
+
+ # Save all dilation settings
+ self.settings['mask_dilation'] = self.mask_dilation_spinbox.value()
+ self.settings['use_all_iterations'] = self.use_all_iterations_checkbox.isChecked()
+ self.settings['all_iterations'] = self.all_iterations_spinbox.value()
+ self.settings['text_bubble_dilation_iterations'] = self.text_bubble_iter_spinbox.value()
+ self.settings['empty_bubble_dilation_iterations'] = self.empty_bubble_iter_spinbox.value()
+ self.settings['free_text_dilation_iterations'] = self.free_text_iter_spinbox.value()
+ self.settings['auto_iterations'] = self.auto_iterations_checkbox.isChecked()
+
+ # Legacy support
+ self.settings['bubble_dilation_iterations'] = self.text_bubble_iter_spinbox.value()
+ self.settings['dilation_iterations'] = self.text_bubble_iter_spinbox.value()
+
+ # Advanced settings
+ self.settings['advanced']['format_detection'] = bool(self.format_detection_checkbox.isChecked())
+ self.settings['advanced']['webtoon_mode'] = self.webtoon_mode_combo.currentText()
+ self.settings['advanced']['debug_mode'] = bool(self.debug_mode_checkbox.isChecked())
+ self.settings['advanced']['save_intermediate'] = bool(self.save_intermediate_checkbox.isChecked())
+ self.settings['advanced']['parallel_processing'] = bool(self.parallel_processing_checkbox.isChecked())
+ self.settings['advanced']['max_workers'] = self.max_workers_spinbox.value()
+
+ # Save HD strategy settings
+ self.settings['advanced']['hd_strategy'] = str(self.hd_strategy_combo.currentText())
+ self.settings['advanced']['hd_strategy_resize_limit'] = int(self.hd_resize_limit_spin.value())
+ self.settings['advanced']['hd_strategy_crop_margin'] = int(self.hd_crop_margin_spin.value())
+ self.settings['advanced']['hd_strategy_crop_trigger_size'] = int(self.hd_crop_trigger_spin.value())
+ # Also reflect into environment for immediate effect in this session
+ os.environ['HD_STRATEGY'] = self.settings['advanced']['hd_strategy']
+ os.environ['HD_RESIZE_LIMIT'] = str(self.settings['advanced']['hd_strategy_resize_limit'])
+ os.environ['HD_CROP_MARGIN'] = str(self.settings['advanced']['hd_strategy_crop_margin'])
+ os.environ['HD_CROP_TRIGGER'] = str(self.settings['advanced']['hd_strategy_crop_trigger_size'])
+
+ # Save parallel rendering toggle
+ if hasattr(self, 'render_parallel_checkbox'):
+ self.settings['advanced']['render_parallel'] = bool(self.render_parallel_checkbox.isChecked())
+
+ # Panel-level parallel translation settings
+ self.settings['advanced']['parallel_panel_translation'] = bool(self.parallel_panel_checkbox.isChecked())
+ self.settings['advanced']['panel_max_workers'] = int(self.panel_max_workers_spinbox.value())
+ self.settings['advanced']['panel_start_stagger_ms'] = int(self.panel_stagger_ms_spinbox.value())
+ # New: preload local inpainting for panels
+ if hasattr(self, 'preload_local_panels_checkbox'):
+ self.settings['advanced']['preload_local_inpainting_for_panels'] = bool(self.preload_local_panels_checkbox.isChecked())
+
+ # Memory management settings
+ self.settings['advanced']['use_singleton_models'] = bool(self.use_singleton_models_checkbox.isChecked())
+ self.settings['advanced']['auto_cleanup_models'] = bool(self.auto_cleanup_models_checkbox.isChecked())
+ self.settings['advanced']['unload_models_after_translation'] = bool(self.unload_models_checkbox.isChecked() if hasattr(self, 'unload_models_checkbox') else False)
+
+ # ONNX auto-convert settings (persist and apply to environment)
+ if hasattr(self, 'auto_convert_onnx_checkbox'):
+ self.settings['advanced']['auto_convert_to_onnx'] = bool(self.auto_convert_onnx_checkbox.isChecked())
+ os.environ['AUTO_CONVERT_TO_ONNX'] = 'true' if self.auto_convert_onnx_checkbox.isChecked() else 'false'
+ if hasattr(self, 'auto_convert_onnx_bg_checkbox'):
+ self.settings['advanced']['auto_convert_to_onnx_background'] = bool(self.auto_convert_onnx_bg_checkbox.isChecked())
+ os.environ['AUTO_CONVERT_TO_ONNX_BACKGROUND'] = 'true' if self.auto_convert_onnx_bg_checkbox.isChecked() else 'false'
+
+ # Quantization toggles and precision
+ if hasattr(self, 'quantize_models_checkbox'):
+ self.settings['advanced']['quantize_models'] = bool(self.quantize_models_checkbox.isChecked())
+ os.environ['MODEL_QUANTIZE'] = 'true' if self.quantize_models_checkbox.isChecked() else 'false'
+ if hasattr(self, 'onnx_quantize_checkbox'):
+ self.settings['advanced']['onnx_quantize'] = bool(self.onnx_quantize_checkbox.isChecked())
+ os.environ['ONNX_QUANTIZE'] = 'true' if self.onnx_quantize_checkbox.isChecked() else 'false'
+ if hasattr(self, 'torch_precision_combo'):
+ self.settings['advanced']['torch_precision'] = str(self.torch_precision_combo.currentText())
+ os.environ['TORCH_PRECISION'] = self.settings['advanced']['torch_precision']
+
+ # Memory cleanup toggle
+ if hasattr(self, 'force_deep_cleanup_checkbox'):
+ if 'advanced' not in self.settings:
+ self.settings['advanced'] = {}
+ self.settings['advanced']['force_deep_cleanup_each_image'] = bool(self.force_deep_cleanup_checkbox.isChecked())
+
+ # RAM cap settings
+ if hasattr(self, 'ram_cap_enabled_checkbox'):
+ self.settings['advanced']['ram_cap_enabled'] = bool(self.ram_cap_enabled_checkbox.isChecked())
+ if hasattr(self, 'ram_cap_mb_spinbox'):
+ self.settings['advanced']['ram_cap_mb'] = int(self.ram_cap_mb_spinbox.value())
+ if hasattr(self, 'ram_cap_mode_combo'):
+ mode = self.ram_cap_mode_combo.currentText()
+ self.settings['advanced']['ram_cap_mode'] = 'hard' if mode.startswith('hard') else 'soft'
+ if hasattr(self, 'ram_gate_timeout_spinbox'):
+ self.settings['advanced']['ram_gate_timeout_sec'] = float(self.ram_gate_timeout_spinbox.value())
+ if hasattr(self, 'ram_gate_floor_spinbox'):
+ self.settings['advanced']['ram_min_floor_over_baseline_mb'] = int(self.ram_gate_floor_spinbox.value())
+
+ # Cloud API settings
+ if hasattr(self, 'cloud_model_selected'):
+ self.settings['cloud_inpaint_model'] = self.cloud_model_selected
+ self.settings['cloud_custom_version'] = self.custom_version_entry.text()
+ self.settings['cloud_inpaint_prompt'] = self.cloud_prompt_entry.text()
+ self.settings['cloud_negative_prompt'] = self.negative_entry.text()
+ self.settings['cloud_inference_steps'] = self.steps_spinbox.value()
+ self.settings['cloud_timeout'] = self.cloud_timeout_spinbox.value()
+
+ # Clear bubble detector cache to force reload with new settings
+ if hasattr(self.main_gui, 'manga_tab') and hasattr(self.main_gui.manga_tab, 'translator'):
+ if hasattr(self.main_gui.manga_tab.translator, 'bubble_detector'):
+ self.main_gui.manga_tab.translator.bubble_detector = None
+
+ # Save to config
+ self.config['manga_settings'] = self.settings
+
+ # Save to file - using the correct method name
+ try:
+ if hasattr(self.main_gui, 'save_config'):
+ self.main_gui.save_config(show_message=False)
+ print("Settings saved successfully")
+ elif hasattr(self.main_gui, 'save_configuration'):
+ self.main_gui.save_configuration()
+ print("Settings saved successfully")
+ else:
+ # Try direct save as fallback
+ if hasattr(self.main_gui, 'config_file'):
+ with open(self.main_gui.config_file, 'w') as f:
+ json.dump(self.config, f, indent=2)
+ print("Settings saved directly to config file")
+ except Exception as e:
+ print(f"Error saving configuration: {e}")
+ QMessageBox.critical(self, "Save Error", f"Failed to save settings: {e}")
+ return
+
+ # Call callback if provided
+ if self.callback:
+ try:
+ self.callback(self.settings)
+ except Exception as e:
+ print(f"Error in callback: {e}")
+
+ # Close dialog
+ self.accept()
+
+ except Exception as e:
+ import traceback
+ print(f"Critical error in _save_settings: {e}")
+ print(traceback.format_exc())
+ QMessageBox.critical(self, "Save Error", f"Failed to save settings: {e}")
+
+ def _reset_defaults(self):
+ """Reset by removing manga_settings from config and reinitializing the dialog."""
+ reply = QMessageBox.question(self, "Reset Settings",
+ "Reset all manga settings to defaults?\nThis will remove custom manga settings from config.json.",
+ QMessageBox.StandardButton.Yes | QMessageBox.StandardButton.No,
+ QMessageBox.StandardButton.No)
+ if reply != QMessageBox.StandardButton.Yes:
+ return
+ # Remove manga_settings key to force defaults
+ try:
+ if isinstance(self.config, dict) and 'manga_settings' in self.config:
+ del self.config['manga_settings']
+ except Exception:
+ pass
+ # Persist changes WITHOUT showing message
+ try:
+ if hasattr(self.main_gui, 'save_config'):
+ self.main_gui.save_config(show_message=False)
+ elif hasattr(self.main_gui, 'save_configuration'):
+ self.main_gui.save_configuration()
+ elif hasattr(self.main_gui, 'config_file') and isinstance(self.main_gui.config_file, str):
+ with open(self.main_gui.config_file, 'w', encoding='utf-8') as f:
+ json.dump(self.config, f, ensure_ascii=False, indent=2)
+ except Exception:
+ try:
+ if hasattr(self.main_gui, 'CONFIG_FILE') and isinstance(self.main_gui.CONFIG_FILE, str):
+ with open(self.main_gui.CONFIG_FILE, 'w', encoding='utf-8') as f:
+ json.dump(self.config, f, ensure_ascii=False, indent=2)
+ except Exception:
+ pass
+ # Close and reopen dialog so defaults apply
+ self.close()
+ try:
+ MangaSettingsDialog(parent=self.parent, main_gui=self.main_gui, config=self.config, callback=self.callback)
+ except Exception:
+ pass # Don't show any message
+
+ def _cancel(self):
+ """Cancel without saving"""
+ self.reject()
diff --git a/manga_translator.py b/manga_translator.py
new file mode 100644
index 0000000000000000000000000000000000000000..33ccf93f071baae09452e40fa50fa4d19081aed1
--- /dev/null
+++ b/manga_translator.py
@@ -0,0 +1,11216 @@
+# manga_translator.py
+"""
+Enhanced Manga Translation Pipeline with improved text visibility controls
+Handles OCR, translation, and advanced text rendering for manga panels
+Now with proper history management and full page context support
+"""
+
+import os
+import json
+import base64
+import logging
+import time
+import traceback
+import cv2
+from PIL import ImageEnhance, ImageFilter
+from typing import List, Dict, Tuple, Optional, Any
+from dataclasses import dataclass
+from concurrent.futures import ThreadPoolExecutor, as_completed
+import threading
+from PIL import Image, ImageDraw, ImageFont
+import numpy as np
+from bubble_detector import BubbleDetector
+from TransateKRtoEN import send_with_interrupt
+
+# Google Cloud Vision imports
+try:
+ from google.cloud import vision
+ GOOGLE_CLOUD_VISION_AVAILABLE = True
+except ImportError:
+ GOOGLE_CLOUD_VISION_AVAILABLE = False
+ print("Warning: Google Cloud Vision not installed. Install with: pip install google-cloud-vision")
+
+# Import HistoryManager for proper context management
+try:
+ from history_manager import HistoryManager
+except ImportError:
+ HistoryManager = None
+ print("Warning: HistoryManager not available. Context tracking will be limited.")
+
+logger = logging.getLogger(__name__)
+
+@dataclass
+class TextRegion:
+ """Represents a detected text region (speech bubble, narration box, etc.)"""
+ text: str
+ vertices: List[Tuple[int, int]] # Polygon vertices from Cloud Vision
+ bounding_box: Tuple[int, int, int, int] # x, y, width, height
+ confidence: float
+ region_type: str # 'text_block' from Cloud Vision
+ translated_text: Optional[str] = None
+ bubble_bounds: Optional[Tuple[int, int, int, int]] = None # RT-DETR bubble bounds for rendering
+
+ def to_dict(self):
+ return {
+ 'text': self.text,
+ 'vertices': self.vertices,
+ 'bounding_box': self.bounding_box,
+ 'confidence': self.confidence,
+ 'region_type': self.region_type,
+ 'translated_text': self.translated_text
+ }
+
+class MangaTranslator:
+ """Main class for manga translation pipeline using Google Cloud Vision + API Key"""
+
+ # Global, process-wide registry to make local inpainting init safe across threads
+ # Only dictionary operations are locked (microseconds); heavy work happens outside the lock.
+ _inpaint_pool_lock = threading.Lock()
+ _inpaint_pool = {} # (method, model_path) -> {'inpainter': obj|None, 'loaded': bool, 'event': threading.Event()}
+
+ # Detector preloading pool for non-singleton bubble detector instances
+ _detector_pool_lock = threading.Lock()
+ _detector_pool = {} # (detector_type, model_id_or_path) -> {'spares': list[BubbleDetector]}
+
+ # Bubble detector singleton loading coordination
+ _singleton_bd_event = threading.Event()
+ _singleton_bd_loading = False
+
+ # SINGLETON PATTERN: Shared model instances across all translators
+ _singleton_lock = threading.Lock()
+ _singleton_bubble_detector = None
+ _singleton_local_inpainter = None
+ _singleton_refs = 0 # Reference counter for singleton instances
+
+ # Class-level cancellation flag for all instances
+ _global_cancelled = False
+ _global_cancel_lock = threading.RLock()
+
+ @classmethod
+ def set_global_cancellation(cls, cancelled: bool):
+ """Set global cancellation flag for all translator instances"""
+ with cls._global_cancel_lock:
+ cls._global_cancelled = cancelled
+
+ @classmethod
+ def is_globally_cancelled(cls) -> bool:
+ """Check if globally cancelled"""
+ with cls._global_cancel_lock:
+ return cls._global_cancelled
+
+ @classmethod
+ def reset_global_flags(cls):
+ """Reset global cancellation flags when starting new translation"""
+ with cls._global_cancel_lock:
+ cls._global_cancelled = False
+
+ def _return_inpainter_to_pool(self):
+ """Return a checked-out inpainter instance back to the pool for reuse."""
+ if not hasattr(self, '_checked_out_inpainter') or not hasattr(self, '_inpainter_pool_key'):
+ return # Nothing checked out
+
+ try:
+ with MangaTranslator._inpaint_pool_lock:
+ key = self._inpainter_pool_key
+ rec = MangaTranslator._inpaint_pool.get(key)
+ if rec and 'checked_out' in rec:
+ checked_out = rec['checked_out']
+ if self._checked_out_inpainter in checked_out:
+ checked_out.remove(self._checked_out_inpainter)
+ self._log(f"🔄 Returned inpainter to pool ({len(checked_out)}/{len(rec.get('spares', []))} still in use)", "info")
+ # Clear the references
+ self._checked_out_inpainter = None
+ self._inpainter_pool_key = None
+ except Exception as e:
+ # Non-critical - just log
+ try:
+ self._log(f"⚠️ Failed to return inpainter to pool: {e}", "debug")
+ except:
+ pass
+
+ def _return_bubble_detector_to_pool(self):
+ """Return a checked-out bubble detector instance back to the pool for reuse."""
+ if not hasattr(self, '_checked_out_bubble_detector') or not hasattr(self, '_bubble_detector_pool_key'):
+ return # Nothing checked out
+
+ try:
+ with MangaTranslator._detector_pool_lock:
+ key = self._bubble_detector_pool_key
+ rec = MangaTranslator._detector_pool.get(key)
+ if rec and 'checked_out' in rec:
+ checked_out = rec['checked_out']
+ if self._checked_out_bubble_detector in checked_out:
+ checked_out.remove(self._checked_out_bubble_detector)
+ self._log(f"🔄 Returned bubble detector to pool ({len(checked_out)}/{len(rec.get('spares', []))} still in use)", "info")
+ # Clear the references
+ self._checked_out_bubble_detector = None
+ self._bubble_detector_pool_key = None
+ except Exception as e:
+ # Non-critical - just log
+ try:
+ self._log(f"⚠️ Failed to return bubble detector to pool: {e}", "debug")
+ except:
+ pass
+
+ @classmethod
+ def cleanup_singletons(cls, force=False):
+ """Clean up singleton instances when no longer needed
+
+ Args:
+ force: If True, cleanup even if references exist (for app shutdown)
+ """
+ with cls._singleton_lock:
+ if force or cls._singleton_refs == 0:
+ # Cleanup singleton bubble detector
+ if cls._singleton_bubble_detector is not None:
+ try:
+ if hasattr(cls._singleton_bubble_detector, 'unload'):
+ cls._singleton_bubble_detector.unload(release_shared=True)
+ cls._singleton_bubble_detector = None
+ print("🤖 Singleton bubble detector cleaned up")
+ except Exception as e:
+ print(f"Failed to cleanup singleton bubble detector: {e}")
+
+ # Cleanup singleton local inpainter
+ if cls._singleton_local_inpainter is not None:
+ try:
+ if hasattr(cls._singleton_local_inpainter, 'unload'):
+ cls._singleton_local_inpainter.unload()
+ cls._singleton_local_inpainter = None
+ print("🎨 Singleton local inpainter cleaned up")
+ except Exception as e:
+ print(f"Failed to cleanup singleton local inpainter: {e}")
+
+ cls._singleton_refs = 0
+
+ def __init__(self, ocr_config: dict, unified_client, main_gui, log_callback=None):
+ """Initialize with OCR configuration and API client from main GUI
+
+ Args:
+ ocr_config: Dictionary with OCR provider settings:
+ {
+ 'provider': 'google' or 'azure',
+ 'google_credentials_path': str (if google),
+ 'azure_key': str (if azure),
+ 'azure_endpoint': str (if azure)
+ }
+ """
+ # CRITICAL: Set thread limits FIRST before any heavy library operations
+ # This must happen before cv2, torch, numpy operations
+ try:
+ parallel_enabled = main_gui.config.get('manga_settings', {}).get('advanced', {}).get('parallel_processing', False)
+ if not parallel_enabled:
+ # Force single-threaded mode for all computational libraries
+ os.environ['OMP_NUM_THREADS'] = '1'
+ os.environ['MKL_NUM_THREADS'] = '1'
+ os.environ['OPENBLAS_NUM_THREADS'] = '1'
+ os.environ['NUMEXPR_NUM_THREADS'] = '1'
+ os.environ['VECLIB_MAXIMUM_THREADS'] = '1'
+ os.environ['ONNXRUNTIME_NUM_THREADS'] = '1'
+ # Set torch and cv2 thread limits if already imported
+ try:
+ import torch
+ torch.set_num_threads(1)
+ except (ImportError, RuntimeError):
+ pass
+ try:
+ cv2.setNumThreads(1)
+ except (AttributeError, NameError):
+ pass
+ except Exception:
+ pass # Silently fail if config not available
+
+ # Set up logging first
+ self.log_callback = log_callback
+ self.main_gui = main_gui
+
+ # Set up stdout capture to redirect prints to GUI
+ self._setup_stdout_capture()
+
+ # Pass log callback to unified client
+ self.client = unified_client
+ if hasattr(self.client, 'log_callback'):
+ self.client.log_callback = log_callback
+ elif hasattr(self.client, 'set_log_callback'):
+ self.client.set_log_callback(log_callback)
+ self.ocr_config = ocr_config
+ self.main_gui = main_gui
+ self.log_callback = log_callback
+ self.config = main_gui.config
+ self.manga_settings = self.config.get('manga_settings', {})
+ # Concise logging flag from Advanced settings
+ try:
+ self.concise_logs = bool(self.manga_settings.get('advanced', {}).get('concise_logs', True))
+ except Exception:
+ self.concise_logs = True
+
+ # Ensure all GUI environment variables are set
+ self._sync_environment_variables()
+
+ # Initialize attributes
+ self.current_image = None
+ self.current_mask = None
+ self.text_regions = []
+ self.translated_regions = []
+ self.final_image = None
+
+ # Initialize inpainter attributes
+ self.local_inpainter = None
+ self.hybrid_inpainter = None
+ self.inpainter = None
+
+ # Initialize bubble detector (will check singleton mode later)
+ self.bubble_detector = None
+ # Default: do NOT use singleton models unless explicitly enabled
+ self.use_singleton_models = self.manga_settings.get('advanced', {}).get('use_singleton_models', False)
+
+ # For bubble detector specifically, prefer a singleton so it stays resident in RAM
+ self.use_singleton_bubble_detector = self.manga_settings.get('advanced', {}).get('use_singleton_bubble_detector', True)
+
+ # Processing flags
+ self.is_processing = False
+ self.cancel_requested = False
+ self.stop_flag = None # Initialize stop_flag attribute
+
+ # Initialize batch mode attributes (API parallelism) from environment, not GUI local toggles
+ # BATCH_TRANSLATION controls whether UnifiedClient allows concurrent API calls across threads.
+ try:
+ self.batch_mode = os.getenv('BATCH_TRANSLATION', '0') == '1'
+ except Exception:
+ self.batch_mode = False
+
+ # OCR ROI cache - PER IMAGE ONLY (cleared aggressively to prevent text leakage)
+ # CRITICAL: This cache MUST be cleared before every new image to prevent text contamination
+ # THREAD-SAFE: Each translator instance has its own cache (safe for parallel panel translation)
+ self.ocr_roi_cache = {}
+ self._current_image_hash = None # Track current image to force cache invalidation
+
+ # Thread-safe lock for cache operations (critical for parallel panel translation)
+ import threading
+ self._cache_lock = threading.Lock()
+ try:
+ self.batch_size = int(os.getenv('BATCH_SIZE', '1'))
+ except Exception:
+ # Fallback to GUI entry if present; otherwise default to 1
+ try:
+ self.batch_size = int(main_gui.batch_size_var.get()) if hasattr(main_gui, 'batch_size_var') else 1
+ except Exception:
+ self.batch_size = 1
+ self.batch_current = 1
+
+ if self.batch_mode:
+ self._log(f"📦 BATCH MODE: Processing {self.batch_size} images")
+ self._log(f"⏱️ Keeping API delay for rate limit protection")
+
+ # NOTE: We NO LONGER preload models here!
+ # Models should only be loaded when actually needed
+ # This was causing unnecessary RAM usage
+ ocr_settings = self.manga_settings.get('ocr', {})
+ bubble_detection_enabled = ocr_settings.get('bubble_detection_enabled', False)
+ if bubble_detection_enabled:
+ self._log("📦 BATCH MODE: Bubble detection will be loaded on first use")
+ else:
+ self._log("📦 BATCH MODE: Bubble detection is disabled")
+
+ # Cache for processed images - DEPRECATED/UNUSED (kept for backward compatibility)
+ # DO NOT USE THIS FOR TEXT DATA - IT CAN LEAK BETWEEN IMAGES
+ self.cache = {}
+ # Determine OCR provider
+ self.ocr_provider = ocr_config.get('provider', 'google')
+
+ if self.ocr_provider == 'google':
+ if not GOOGLE_CLOUD_VISION_AVAILABLE:
+ raise ImportError("Google Cloud Vision required. Install with: pip install google-cloud-vision")
+
+ google_path = ocr_config.get('google_credentials_path')
+ if not google_path:
+ raise ValueError("Google credentials path required")
+
+ os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = google_path
+ self.vision_client = vision.ImageAnnotatorClient()
+
+ elif self.ocr_provider == 'azure':
+ # Import Azure libraries
+ try:
+ from azure.cognitiveservices.vision.computervision import ComputerVisionClient
+ from msrest.authentication import CognitiveServicesCredentials
+ self.azure_cv = ComputerVisionClient
+ self.azure_creds = CognitiveServicesCredentials
+ except ImportError:
+ raise ImportError("Azure Computer Vision required. Install with: pip install azure-cognitiveservices-vision-computervision")
+
+ azure_key = ocr_config.get('azure_key')
+ azure_endpoint = ocr_config.get('azure_endpoint')
+
+ if not azure_key or not azure_endpoint:
+ raise ValueError("Azure key and endpoint required")
+
+ self.vision_client = self.azure_cv(
+ azure_endpoint,
+ self.azure_creds(azure_key)
+ )
+ else:
+ # New OCR providers handled by OCR manager
+ try:
+ from ocr_manager import OCRManager
+ self.ocr_manager = OCRManager(log_callback=log_callback)
+ print(f"Initialized OCR Manager for {self.ocr_provider}")
+ # Initialize OCR manager with stop flag awareness
+ if hasattr(self.ocr_manager, 'reset_stop_flags'):
+ self.ocr_manager.reset_stop_flags()
+ except Exception as _e:
+ self.ocr_manager = None
+ self._log(f"Failed to initialize OCRManager: {str(_e)}", "error")
+
+ self.client = unified_client
+ self.main_gui = main_gui
+ self.log_callback = log_callback
+
+ # Prefer allocator that can return memory to OS (effective before torch loads)
+ try:
+ os.environ.setdefault("PYTORCH_CUDA_ALLOC_CONF", "expandable_segments:True")
+ os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")
+ except Exception:
+ pass
+
+ # Get all settings from GUI
+ self.api_delay = float(self.main_gui.delay_entry.get() if hasattr(main_gui, 'delay_entry') else 2.0)
+ # Propagate API delay to unified_api_client via env var so its internal pacing/logging matches GUI
+ try:
+ os.environ["SEND_INTERVAL_SECONDS"] = str(self.api_delay)
+ except Exception:
+ pass
+ self.temperature = float(main_gui.trans_temp.get() if hasattr(main_gui, 'trans_temp') else 0.3)
+ self.max_tokens = int(main_gui.max_output_tokens if hasattr(main_gui, 'max_output_tokens') else 4000)
+ if hasattr(main_gui, 'token_limit_disabled') and main_gui.token_limit_disabled:
+ self.input_token_limit = None # None means no limit
+ self._log("📊 Input token limit: DISABLED (unlimited)")
+ else:
+ token_limit_value = main_gui.token_limit_entry.get() if hasattr(main_gui, 'token_limit_entry') else '120000'
+ if token_limit_value and token_limit_value.strip().isdigit():
+ self.input_token_limit = int(token_limit_value.strip())
+ else:
+ self.input_token_limit = 120000 # Default
+ self._log(f"📊 Input token limit: {self.input_token_limit} tokens")
+
+ # Get contextual settings from GUI
+ self.contextual_enabled = main_gui.contextual_var.get() if hasattr(main_gui, 'contextual_var') else False
+ self.translation_history_limit = int(main_gui.trans_history.get() if hasattr(main_gui, 'trans_history') else 3)
+ self.rolling_history_enabled = main_gui.translation_history_rolling_var.get() if hasattr(main_gui, 'translation_history_rolling_var') else False
+
+ # Initialize HistoryManager placeholder
+ self.history_manager = None
+ self.history_manager_initialized = False
+ self.history_output_dir = None
+
+ # Full page context translation settings
+ self.full_page_context_enabled = True
+
+ # Default prompt for full page context mode
+ self.full_page_context_prompt = (
+ "You will receive multiple text segments from a manga page, each prefixed with an index like [0], [1], etc. "
+ "Translate each segment considering the context of all segments together. "
+ "Maintain consistency in character names, tone, and style across all translations.\n\n"
+ "CRITICAL: Return your response as a valid JSON object where each key includes BOTH the index prefix "
+ "AND the original text EXACTLY as provided (e.g., '[0] こんにちは'), and each value is the translation.\n"
+ "This is essential for correct mapping - do not modify or omit the index prefixes!\n\n"
+ "Make sure to properly escape any special characters in the JSON:\n"
+ "- Use \\n for newlines\n"
+ "- Use \\\" for quotes\n"
+ "- Use \\\\ for backslashes\n\n"
+ "Example:\n"
+ '{\n'
+ ' "[0] こんにちは": "Hello",\n'
+ ' "[1] ありがとう": "Thank you",\n'
+ ' "[2] さようなら": "Goodbye"\n'
+ '}\n\n'
+ 'REMEMBER: Keep the [index] prefix in each JSON key exactly as shown in the input!'
+ )
+
+ # Visual context setting (for non-vision model support)
+ self.visual_context_enabled = main_gui.config.get('manga_visual_context_enabled', True)
+
+ # Store context for contextual translation (backwards compatibility)
+ self.translation_context = []
+
+ # Font settings for text rendering
+ self.font_path = self._find_font()
+ self.min_font_size = 10
+ self.max_font_size = 60
+ try:
+ _ms = main_gui.config.get('manga_settings', {}) or {}
+ _rend = _ms.get('rendering', {}) or {}
+ _font = _ms.get('font_sizing', {}) or {}
+ self.min_readable_size = int(_rend.get('auto_min_size', _font.get('min_size', 16)))
+ except Exception:
+ self.min_readable_size = int(main_gui.config.get('manga_min_readable_size', 16))
+ self.max_font_size_limit = main_gui.config.get('manga_max_font_size', 24)
+ self.strict_text_wrapping = main_gui.config.get('manga_strict_text_wrapping', False)
+
+ # Enhanced text rendering settings - Load from config if available
+ config = main_gui.config if hasattr(main_gui, 'config') else {}
+
+ self.text_bg_opacity = config.get('manga_bg_opacity', 255) # 0-255, default fully opaque
+ self.text_bg_style = config.get('manga_bg_style', 'box') # 'box', 'circle', 'wrap'
+ self.text_bg_reduction = config.get('manga_bg_reduction', 1.0) # Size reduction factor (0.5-1.0)
+ self.constrain_to_bubble = config.get('manga_constrain_to_bubble', True)
+
+ # Text color from config
+ manga_text_color = config.get('manga_text_color', [0, 0, 0])
+ self.text_color = tuple(manga_text_color) # Convert list to tuple
+
+ self.outline_color = (255, 255, 255) # White outline
+ self.outline_width_factor = 15 # Divider for font_size to get outline width
+ self.selected_font_style = config.get('manga_font_path', None) # Will store selected font path
+ self.custom_font_size = config.get('manga_font_size', None) if config.get('manga_font_size', 0) > 0 else None
+
+ # Text shadow settings from config
+ self.shadow_enabled = config.get('manga_shadow_enabled', False)
+ manga_shadow_color = config.get('manga_shadow_color', [128, 128, 128])
+ self.shadow_color = tuple(manga_shadow_color) # Convert list to tuple
+ self.shadow_offset_x = config.get('manga_shadow_offset_x', 2)
+ self.shadow_offset_y = config.get('manga_shadow_offset_y', 2)
+ self.shadow_blur = config.get('manga_shadow_blur', 0) # 0 = sharp shadow, higher = more blur
+ self.force_caps_lock = config.get('manga_force_caps_lock', False)
+ self.skip_inpainting = config.get('manga_skip_inpainting', True)
+
+ # Font size multiplier mode - Load from config
+ self.font_size_mode = config.get('manga_font_size_mode', 'fixed') # 'fixed' or 'multiplier'
+ self.font_size_multiplier = config.get('manga_font_size_multiplier', 1.0) # Default multiplierr
+
+ #inpainting quality
+ self.inpaint_quality = config.get('manga_inpaint_quality', 'high') # 'high' or 'fast'
+
+ self._log("\n🔧 MangaTranslator initialized with settings:")
+ self._log(f" API Delay: {self.api_delay}s")
+ self._log(f" Temperature: {self.temperature}")
+ self._log(f" Max Output Tokens: {self.max_tokens}")
+ self._log(f" Input Token Limit: {'DISABLED' if self.input_token_limit is None else self.input_token_limit}")
+ self._log(f" Contextual Translation: {'ENABLED' if self.contextual_enabled else 'DISABLED'}")
+ self._log(f" Translation History Limit: {self.translation_history_limit}")
+ self._log(f" Rolling History: {'ENABLED' if self.rolling_history_enabled else 'DISABLED'}")
+ self._log(f" Font Path: {self.font_path or 'Default'}")
+ self._log(f" Text Rendering: BG {self.text_bg_style}, Opacity {int(self.text_bg_opacity/255*100)}%")
+ self._log(f" Shadow: {'ENABLED' if self.shadow_enabled else 'DISABLED'}\n")
+
+ self.manga_settings = config.get('manga_settings', {})
+
+ # Initialize local inpainter if configured (respects singleton mode)
+ if self.manga_settings.get('inpainting', {}).get('method') == 'local':
+ if self.use_singleton_models:
+ self._initialize_singleton_local_inpainter()
+ else:
+ self._initialize_local_inpainter()
+
+ # advanced settings
+ self.debug_mode = self.manga_settings.get('advanced', {}).get('debug_mode', False)
+ self.save_intermediate = self.manga_settings.get('advanced', {}).get('save_intermediate', False)
+ self.parallel_processing = self.manga_settings.get('advanced', {}).get('parallel_processing', True)
+ self.max_workers = self.manga_settings.get('advanced', {}).get('max_workers', 2)
+ # Deep cleanup control: if True, release models after every image (aggressive)
+ self.force_deep_cleanup_each_image = self.manga_settings.get('advanced', {}).get('force_deep_cleanup_each_image', False)
+
+ # RAM cap
+ adv = self.manga_settings.get('advanced', {})
+ self.ram_cap_enabled = bool(adv.get('ram_cap_enabled', False))
+ self.ram_cap_mb = int(adv.get('ram_cap_mb', 0) or 0)
+ self.ram_cap_mode = str(adv.get('ram_cap_mode', 'soft'))
+ self.ram_check_interval_sec = float(adv.get('ram_check_interval_sec', 1.0))
+ self.ram_recovery_margin_mb = int(adv.get('ram_recovery_margin_mb', 256))
+ self._mem_over_cap = False
+ self._mem_stop_event = threading.Event()
+ self._mem_thread = None
+ # Advanced RAM gate tuning
+ self.ram_gate_timeout_sec = float(adv.get('ram_gate_timeout_sec', 10.0))
+ self.ram_min_floor_over_baseline_mb = int(adv.get('ram_min_floor_over_baseline_mb', 128))
+ # Measure baseline at init
+ try:
+ self.ram_baseline_mb = self._get_process_rss_mb() or 0
+ except Exception:
+ self.ram_baseline_mb = 0
+ if self.ram_cap_enabled and self.ram_cap_mb > 0:
+ self._init_ram_cap()
+
+
+ def set_stop_flag(self, stop_flag):
+ """Set the stop flag for checking interruptions"""
+ self.stop_flag = stop_flag
+ self.cancel_requested = False
+
+ def reset_stop_flags(self):
+ """Reset all stop flags when starting new translation"""
+ self.cancel_requested = False
+ self.is_processing = False
+ # Reset global flags
+ self.reset_global_flags()
+ self._log("🔄 Stop flags reset for new translation", "debug")
+
+ def _check_stop(self):
+ """Check if stop has been requested using multiple sources"""
+ # Check global cancellation first
+ if self.is_globally_cancelled():
+ self.cancel_requested = True
+ return True
+
+ # Check local stop flag (only if it exists and is set)
+ if hasattr(self, 'stop_flag') and self.stop_flag and self.stop_flag.is_set():
+ self.cancel_requested = True
+ return True
+
+ # Check processing flag
+ if hasattr(self, 'cancel_requested') and self.cancel_requested:
+ return True
+
+ return False
+
+ def _setup_stdout_capture(self):
+ """Set up stdout capture to redirect print statements to GUI"""
+ import sys
+ import builtins
+
+ # Store original print function
+ self._original_print = builtins.print
+
+ # Create custom print function
+ def gui_print(*args, **kwargs):
+ """Custom print that redirects to GUI"""
+ # Convert args to string
+ message = ' '.join(str(arg) for arg in args)
+
+ # Check if this is one of the specific messages we want to capture
+ if any(marker in message for marker in ['🔍', '✅', '⏳', 'INFO:', 'ERROR:', 'WARNING:']):
+ if self.log_callback:
+ # Clean up the message
+ message = message.strip()
+
+ # Determine level
+ level = 'info'
+ if 'ERROR:' in message or '❌' in message:
+ level = 'error'
+ elif 'WARNING:' in message or '⚠️' in message:
+ level = 'warning'
+
+ # Remove prefixes like "INFO:" if present
+ for prefix in ['INFO:', 'ERROR:', 'WARNING:', 'DEBUG:']:
+ message = message.replace(prefix, '').strip()
+
+ # Send to GUI
+ self.log_callback(message, level)
+ return # Don't print to console
+
+ # For other messages, use original print
+ self._original_print(*args, **kwargs)
+
+ # Replace the built-in print
+ builtins.print = gui_print
+
+ def __del__(self):
+ """Restore original print when MangaTranslator is destroyed"""
+ if hasattr(self, '_original_print'):
+ import builtins
+ builtins.print = self._original_print
+ # Best-effort shutdown in case caller forgot to call shutdown()
+ try:
+ self.shutdown()
+ except Exception:
+ pass
+
+ def _cleanup_thread_locals(self):
+ """Aggressively release thread-local heavy objects (onnx sessions, detectors)."""
+ try:
+ if hasattr(self, '_thread_local'):
+ tl = self._thread_local
+ # Release thread-local inpainters
+ if hasattr(tl, 'local_inpainters') and isinstance(tl.local_inpainters, dict):
+ try:
+ for inp in list(tl.local_inpainters.values()):
+ try:
+ if hasattr(inp, 'unload'):
+ inp.unload()
+ except Exception:
+ pass
+ finally:
+ try:
+ tl.local_inpainters.clear()
+ except Exception:
+ pass
+ # Return thread-local bubble detector to pool (DO NOT unload)
+ if hasattr(tl, 'bubble_detector') and tl.bubble_detector is not None:
+ try:
+ # Instead of unloading, return to pool for reuse
+ self._return_bubble_detector_to_pool()
+ # Keep thread-local reference intact for reuse in next image
+ # Only clear if we're truly shutting down the thread
+ except Exception:
+ pass
+ except Exception:
+ # Best-effort cleanup only
+ pass
+
+ def shutdown(self):
+ """Fully release resources for MangaTranslator (models, detectors, torch caches, threads)."""
+ try:
+ # Decrement singleton reference counter if using singleton mode
+ if hasattr(self, 'use_singleton_models') and self.use_singleton_models:
+ with MangaTranslator._singleton_lock:
+ MangaTranslator._singleton_refs = max(0, MangaTranslator._singleton_refs - 1)
+ self._log(f"Singleton refs: {MangaTranslator._singleton_refs}", "debug")
+
+ # Stop memory watchdog thread if running
+ if hasattr(self, '_mem_stop_event') and getattr(self, '_mem_stop_event', None) is not None:
+ try:
+ self._mem_stop_event.set()
+ except Exception:
+ pass
+ # Perform deep cleanup, then try to teardown torch
+ try:
+ self._deep_cleanup_models()
+ except Exception:
+ pass
+ try:
+ self._force_torch_teardown()
+ except Exception:
+ pass
+ try:
+ self._huggingface_teardown()
+ except Exception:
+ pass
+ try:
+ self._trim_working_set()
+ except Exception:
+ pass
+ # Null out heavy references
+ for attr in [
+ 'client', 'vision_client', 'local_inpainter', 'hybrid_inpainter', 'inpainter',
+ 'bubble_detector', 'ocr_manager', 'history_manager', 'current_image', 'current_mask',
+ 'text_regions', 'translated_regions', 'final_image'
+ ]:
+ try:
+ if hasattr(self, attr):
+ setattr(self, attr, None)
+ except Exception:
+ pass
+ except Exception as e:
+ try:
+ self._log(f"⚠️ shutdown() encountered: {e}", "warning")
+ except Exception:
+ pass
+
+ def _sync_environment_variables(self):
+ """Sync all GUI environment variables to ensure manga translation respects GUI settings
+ This ensures settings like RETRY_TRUNCATED, THINKING_BUDGET, etc. are properly set
+ """
+ try:
+ # Get config from main_gui if available
+ if not hasattr(self, 'main_gui') or not self.main_gui:
+ return
+
+ # Use the main_gui's set_all_environment_variables method if available
+ if hasattr(self.main_gui, 'set_all_environment_variables'):
+ self.main_gui.set_all_environment_variables()
+ else:
+ # Fallback: manually set key variables
+ config = self.main_gui.config if hasattr(self.main_gui, 'config') else {}
+
+ # Thinking settings (most important for speed)
+ thinking_enabled = config.get('enable_gemini_thinking', True)
+ thinking_budget = config.get('gemini_thinking_budget', -1)
+
+ # CRITICAL FIX: If thinking is disabled, force budget to 0 regardless of config value
+ if not thinking_enabled:
+ thinking_budget = 0
+
+ os.environ['ENABLE_GEMINI_THINKING'] = '1' if thinking_enabled else '0'
+ os.environ['GEMINI_THINKING_BUDGET'] = str(thinking_budget)
+ os.environ['THINKING_BUDGET'] = str(thinking_budget) # Also set for unified_api_client
+
+ # Retry settings
+ retry_truncated = config.get('retry_truncated', False)
+ max_retry_tokens = config.get('max_retry_tokens', 16384)
+ max_retries = config.get('max_retries', 7)
+ os.environ['RETRY_TRUNCATED'] = '1' if retry_truncated else '0'
+ os.environ['MAX_RETRY_TOKENS'] = str(max_retry_tokens)
+ os.environ['MAX_RETRIES'] = str(max_retries)
+
+ # Safety settings
+ disable_gemini_safety = config.get('disable_gemini_safety', False)
+ os.environ['DISABLE_GEMINI_SAFETY'] = '1' if disable_gemini_safety else '0'
+
+ except Exception as e:
+ self._log(f"⚠️ Failed to sync environment variables: {e}", "warning")
+
+ def _force_torch_teardown(self):
+ """Best-effort teardown of PyTorch CUDA context and caches to drop closer to baseline.
+ Safe to call even if CUDA is not available.
+ """
+ try:
+ import torch, os, gc
+ # CPU: free cached tensors
+ try:
+ gc.collect()
+ except Exception:
+ pass
+ # CUDA path
+ if hasattr(torch, 'cuda') and torch.cuda.is_available():
+ try:
+ torch.cuda.synchronize()
+ except Exception:
+ pass
+ try:
+ torch.cuda.empty_cache()
+ except Exception:
+ pass
+ try:
+ torch.cuda.ipc_collect()
+ except Exception:
+ pass
+ # Try to clear cuBLAS workspaces (not always available)
+ try:
+ getattr(torch._C, "_cuda_clearCublasWorkspaces")()
+ except Exception:
+ pass
+ # Optional hard reset via CuPy if present
+ reset_done = False
+ try:
+ import cupy
+ try:
+ cupy.cuda.runtime.deviceReset()
+ reset_done = True
+ self._log("CUDA deviceReset via CuPy", "debug")
+ except Exception:
+ pass
+ except Exception:
+ pass
+ # Fallback: attempt to call cudaDeviceReset from cudart on Windows
+ if os.name == 'nt' and not reset_done:
+ try:
+ import ctypes
+ candidates = [
+ "cudart64_12.dll", "cudart64_120.dll", "cudart64_110.dll",
+ "cudart64_102.dll", "cudart64_101.dll", "cudart64_100.dll", "cudart64_90.dll"
+ ]
+ for name in candidates:
+ try:
+ dll = ctypes.CDLL(name)
+ dll.cudaDeviceReset.restype = ctypes.c_int
+ rc = dll.cudaDeviceReset()
+ self._log(f"cudaDeviceReset via {name} rc={rc}", "debug")
+ reset_done = True
+ break
+ except Exception:
+ continue
+ except Exception:
+ pass
+ except Exception:
+ pass
+
+ def _huggingface_teardown(self):
+ """Best-effort teardown of HuggingFace/transformers/tokenizers state.
+ - Clears on-disk model cache for known repos (via _clear_hf_cache)
+ - Optionally purges relevant modules from sys.modules (AGGRESSIVE_HF_UNLOAD=1)
+ """
+ try:
+ import os, sys, gc
+ # Clear disk cache for detectors (and any default repo) to avoid growth across runs
+ try:
+ self._clear_hf_cache()
+ except Exception:
+ pass
+ # Optional aggressive purge of modules to free Python-level caches
+ if os.getenv('AGGRESSIVE_HF_UNLOAD', '1') == '1':
+ prefixes = (
+ 'transformers',
+ 'huggingface_hub',
+ 'tokenizers',
+ 'safetensors',
+ 'accelerate',
+ )
+ to_purge = [m for m in list(sys.modules.keys()) if m.startswith(prefixes)]
+ for m in to_purge:
+ try:
+ del sys.modules[m]
+ except Exception:
+ pass
+ gc.collect()
+ except Exception:
+ pass
+
+ def _deep_cleanup_models(self):
+ """Release ALL model references and caches to reduce RAM after translation.
+ This is the COMPREHENSIVE cleanup that ensures all models are unloaded from RAM.
+ """
+ self._log("🧹 Starting comprehensive model cleanup to free RAM...", "info")
+
+ try:
+ # ========== 1. CLEANUP OCR MODELS ==========
+ try:
+ if hasattr(self, 'ocr_manager'):
+ ocr_manager = getattr(self, 'ocr_manager', None)
+ if ocr_manager:
+ self._log(" Cleaning up OCR models...", "debug")
+ # Clear all loaded OCR providers
+ if hasattr(ocr_manager, 'providers'):
+ for provider_name, provider in ocr_manager.providers.items():
+ try:
+ # Unload the model
+ if hasattr(provider, 'model'):
+ provider.model = None
+ if hasattr(provider, 'processor'):
+ provider.processor = None
+ if hasattr(provider, 'tokenizer'):
+ provider.tokenizer = None
+ if hasattr(provider, 'reader'):
+ provider.reader = None
+ if hasattr(provider, 'is_loaded'):
+ provider.is_loaded = False
+ self._log(f" ✓ Unloaded {provider_name} OCR provider", "debug")
+ except Exception as e:
+ self._log(f" Warning: Failed to unload {provider_name}: {e}", "debug")
+ # Clear the entire OCR manager
+ self.ocr_manager = None
+ self._log(" ✓ OCR models cleaned up", "debug")
+ except Exception as e:
+ self._log(f" Warning: OCR cleanup failed: {e}", "debug")
+
+ # ========== 2. CLEANUP BUBBLE DETECTOR (YOLO/RT-DETR) ==========
+ try:
+ # Instance-level bubble detector
+ if hasattr(self, 'bubble_detector') and self.bubble_detector is not None:
+ # Check if using singleton mode - don't unload shared instance
+ if (getattr(self, 'use_singleton_bubble_detector', False)) or (hasattr(self, 'use_singleton_models') and self.use_singleton_models):
+ self._log(" Skipping bubble detector cleanup (singleton mode)", "debug")
+ # Just clear our reference, don't unload the shared instance
+ self.bubble_detector = None
+ else:
+ self._log(" Cleaning up bubble detector (YOLO/RT-DETR)...", "debug")
+ bd = self.bubble_detector
+ try:
+ if hasattr(bd, 'unload'):
+ bd.unload(release_shared=True) # This unloads YOLO and RT-DETR models
+ self._log(" ✓ Called bubble detector unload", "debug")
+ except Exception as e:
+ self._log(f" Warning: Bubble detector unload failed: {e}", "debug")
+ self.bubble_detector = None
+ self._log(" ✓ Bubble detector cleaned up", "debug")
+
+ # Also clean class-level shared RT-DETR models unless keeping singleton warm
+ if not getattr(self, 'use_singleton_bubble_detector', False):
+ try:
+ from bubble_detector import BubbleDetector
+ if hasattr(BubbleDetector, '_rtdetr_shared_model'):
+ BubbleDetector._rtdetr_shared_model = None
+ if hasattr(BubbleDetector, '_rtdetr_shared_processor'):
+ BubbleDetector._rtdetr_shared_processor = None
+ if hasattr(BubbleDetector, '_rtdetr_loaded'):
+ BubbleDetector._rtdetr_loaded = False
+ self._log(" ✓ Cleared shared RT-DETR cache", "debug")
+ except Exception:
+ pass
+ # Clear preloaded detector spares
+ try:
+ with MangaTranslator._detector_pool_lock:
+ for rec in MangaTranslator._detector_pool.values():
+ try:
+ rec['spares'] = []
+ except Exception:
+ pass
+ except Exception:
+ pass
+ except Exception as e:
+ self._log(f" Warning: Bubble detector cleanup failed: {e}", "debug")
+
+ # ========== 3. CLEANUP INPAINTERS ==========
+ try:
+ self._log(" Cleaning up inpainter models...", "debug")
+
+ # Instance-level inpainter
+ if hasattr(self, 'local_inpainter') and self.local_inpainter is not None:
+ # Check if using singleton mode - don't unload shared instance
+ if hasattr(self, 'use_singleton_models') and self.use_singleton_models:
+ self._log(" Skipping local inpainter cleanup (singleton mode)", "debug")
+ # Just clear our reference, don't unload the shared instance
+ self.local_inpainter = None
+ else:
+ try:
+ if hasattr(self.local_inpainter, 'unload'):
+ self.local_inpainter.unload()
+ self._log(" ✓ Unloaded local inpainter", "debug")
+ except Exception:
+ pass
+ self.local_inpainter = None
+
+ # Hybrid inpainter
+ if hasattr(self, 'hybrid_inpainter') and self.hybrid_inpainter is not None:
+ try:
+ if hasattr(self.hybrid_inpainter, 'unload'):
+ self.hybrid_inpainter.unload()
+ self._log(" ✓ Unloaded hybrid inpainter", "debug")
+ except Exception:
+ pass
+ self.hybrid_inpainter = None
+
+ # Generic inpainter reference
+ if hasattr(self, 'inpainter') and self.inpainter is not None:
+ try:
+ if hasattr(self.inpainter, 'unload'):
+ self.inpainter.unload()
+ self._log(" ✓ Unloaded inpainter", "debug")
+ except Exception:
+ pass
+ self.inpainter = None
+
+ # Release any shared inpainters in the global pool
+ with MangaTranslator._inpaint_pool_lock:
+ for key, rec in list(MangaTranslator._inpaint_pool.items()):
+ try:
+ inp = rec.get('inpainter') if isinstance(rec, dict) else None
+ if inp is not None:
+ try:
+ if hasattr(inp, 'unload'):
+ inp.unload()
+ self._log(f" ✓ Unloaded pooled inpainter: {key}", "debug")
+ except Exception:
+ pass
+ # Drop any spare instances as well
+ try:
+ for spare in rec.get('spares') or []:
+ try:
+ if hasattr(spare, 'unload'):
+ spare.unload()
+ except Exception:
+ pass
+ rec['spares'] = []
+ except Exception:
+ pass
+ except Exception:
+ pass
+ MangaTranslator._inpaint_pool.clear()
+ self._log(" ✓ Cleared inpainter pool", "debug")
+
+ # Release process-wide shared inpainter
+ if hasattr(MangaTranslator, '_shared_local_inpainter'):
+ shared = getattr(MangaTranslator, '_shared_local_inpainter', None)
+ if shared is not None:
+ try:
+ if hasattr(shared, 'unload'):
+ shared.unload()
+ self._log(" ✓ Unloaded shared inpainter", "debug")
+ except Exception:
+ pass
+ setattr(MangaTranslator, '_shared_local_inpainter', None)
+
+ self._log(" ✓ Inpainter models cleaned up", "debug")
+ except Exception as e:
+ self._log(f" Warning: Inpainter cleanup failed: {e}", "debug")
+
+ # ========== 4. CLEANUP THREAD-LOCAL MODELS ==========
+ try:
+ if hasattr(self, '_thread_local') and self._thread_local is not None:
+ self._log(" Cleaning up thread-local models...", "debug")
+ tl = self._thread_local
+
+ # Thread-local inpainters
+ if hasattr(tl, 'local_inpainters') and isinstance(tl.local_inpainters, dict):
+ for key, inp in list(tl.local_inpainters.items()):
+ try:
+ if hasattr(inp, 'unload'):
+ inp.unload()
+ self._log(f" ✓ Unloaded thread-local inpainter: {key}", "debug")
+ except Exception:
+ pass
+ tl.local_inpainters.clear()
+
+ # Thread-local bubble detector
+ if hasattr(tl, 'bubble_detector') and tl.bubble_detector is not None:
+ try:
+ if hasattr(tl.bubble_detector, 'unload'):
+ tl.bubble_detector.unload(release_shared=False)
+ self._log(" ✓ Unloaded thread-local bubble detector", "debug")
+ except Exception:
+ pass
+ tl.bubble_detector = None
+
+ self._log(" ✓ Thread-local models cleaned up", "debug")
+ except Exception as e:
+ self._log(f" Warning: Thread-local cleanup failed: {e}", "debug")
+
+ # ========== 5. CLEAR PYTORCH/CUDA CACHE ==========
+ try:
+ import torch
+ if torch.cuda.is_available():
+ torch.cuda.empty_cache()
+ torch.cuda.synchronize()
+ self._log(" ✓ Cleared CUDA cache", "debug")
+ except Exception:
+ pass
+
+ # ========== 6. FORCE GARBAGE COLLECTION ==========
+ try:
+ import gc
+ gc.collect()
+ # Multiple passes for stubborn references
+ gc.collect()
+ gc.collect()
+ self._log(" ✓ Forced garbage collection", "debug")
+ except Exception:
+ pass
+
+ self._log("✅ Model cleanup complete - RAM should be freed", "info")
+
+ except Exception as e:
+ # Never raise from deep cleanup
+ self._log(f"⚠️ Model cleanup encountered error: {e}", "warning")
+ pass
+
+ def _clear_hf_cache(self, repo_id: str = None):
+ """Best-effort: clear Hugging Face cache for a specific repo (RT-DETR by default).
+ This targets disk cache; it won’t directly reduce RAM but helps avoid growth across runs.
+ """
+ try:
+ # Determine repo_id from BubbleDetector if not provided
+ if repo_id is None:
+ try:
+ import bubble_detector as _bdmod
+ BD = getattr(_bdmod, 'BubbleDetector', None)
+ if BD is not None and hasattr(BD, '_rtdetr_repo_id'):
+ repo_id = getattr(BD, '_rtdetr_repo_id') or 'ogkalu/comic-text-and-bubble-detector'
+ else:
+ repo_id = 'ogkalu/comic-text-and-bubble-detector'
+ except Exception:
+ repo_id = 'ogkalu/comic-text-and-bubble-detector'
+
+ # Try to use huggingface_hub to delete just the matching repo cache
+ try:
+ from huggingface_hub import scan_cache_dir
+ info = scan_cache_dir()
+ repos = getattr(info, 'repos', [])
+ to_delete = []
+ for repo in repos:
+ rid = getattr(repo, 'repo_id', None) or getattr(repo, 'id', None)
+ if rid == repo_id:
+ to_delete.append(repo)
+ if to_delete:
+ # Prefer the high-level deletion API if present
+ if hasattr(info, 'delete_repos'):
+ info.delete_repos(to_delete)
+ else:
+ import shutil
+ for repo in to_delete:
+ repo_dir = getattr(repo, 'repo_path', None) or getattr(repo, 'repo_dir', None)
+ if repo_dir and os.path.exists(repo_dir):
+ shutil.rmtree(repo_dir, ignore_errors=True)
+ except Exception:
+ # Fallback: try removing default HF cache dir for this repo pattern
+ try:
+ from pathlib import Path
+ hf_home = os.environ.get('HF_HOME')
+ if hf_home:
+ base = Path(hf_home)
+ else:
+ base = Path.home() / '.cache' / 'huggingface' / 'hub'
+ # Repo cache dirs are named like models--{org}--{name}
+ safe_name = repo_id.replace('/', '--')
+ candidates = list(base.glob(f'models--{safe_name}*'))
+ import shutil
+ for c in candidates:
+ shutil.rmtree(str(c), ignore_errors=True)
+ except Exception:
+ pass
+ except Exception:
+ # Best-effort only
+ pass
+
+ def _trim_working_set(self):
+ """Release freed memory back to the OS where possible.
+ - On Windows: use EmptyWorkingSet on current process
+ - On Linux: attempt malloc_trim(0)
+ - On macOS: no direct API; rely on GC
+ """
+ import sys
+ import platform
+ try:
+ system = platform.system()
+ if system == 'Windows':
+ import ctypes
+ psapi = ctypes.windll.psapi
+ kernel32 = ctypes.windll.kernel32
+ h_process = kernel32.GetCurrentProcess()
+ psapi.EmptyWorkingSet(h_process)
+ elif system == 'Linux':
+ import ctypes
+ libc = ctypes.CDLL('libc.so.6')
+ try:
+ libc.malloc_trim(0)
+ except Exception:
+ pass
+ except Exception:
+ pass
+
+ def _get_process_rss_mb(self) -> int:
+ """Return current RSS in MB (cross-platform best-effort)."""
+ try:
+ import psutil, os as _os
+ return int(psutil.Process(_os.getpid()).memory_info().rss / (1024*1024))
+ except Exception:
+ # Windows fallback
+ try:
+ import ctypes, os as _os
+ class PROCESS_MEMORY_COUNTERS(ctypes.Structure):
+ _fields_ = [
+ ("cb", ctypes.c_uint),
+ ("PageFaultCount", ctypes.c_uint),
+ ("PeakWorkingSetSize", ctypes.c_size_t),
+ ("WorkingSetSize", ctypes.c_size_t),
+ ("QuotaPeakPagedPoolUsage", ctypes.c_size_t),
+ ("QuotaPagedPoolUsage", ctypes.c_size_t),
+ ("QuotaPeakNonPagedPoolUsage", ctypes.c_size_t),
+ ("QuotaNonPagedPoolUsage", ctypes.c_size_t),
+ ("PagefileUsage", ctypes.c_size_t),
+ ("PeakPagefileUsage", ctypes.c_size_t),
+ ]
+ GetCurrentProcess = ctypes.windll.kernel32.GetCurrentProcess
+ GetProcessMemoryInfo = ctypes.windll.psapi.GetProcessMemoryInfo
+ counters = PROCESS_MEMORY_COUNTERS()
+ counters.cb = ctypes.sizeof(PROCESS_MEMORY_COUNTERS)
+ GetProcessMemoryInfo(GetCurrentProcess(), ctypes.byref(counters), counters.cb)
+ return int(counters.WorkingSetSize / (1024*1024))
+ except Exception:
+ return 0
+
+ def _apply_windows_job_memory_limit(self, cap_mb: int) -> bool:
+ """Apply a hard memory cap using Windows Job Objects. Returns True on success."""
+ try:
+ import ctypes
+ from ctypes import wintypes
+ JOB_OBJECT_LIMIT_JOB_MEMORY = 0x00000200
+ JobObjectExtendedLimitInformation = 9
+
+ class JOBOBJECT_BASIC_LIMIT_INFORMATION(ctypes.Structure):
+ _fields_ = [
+ ("PerProcessUserTimeLimit", ctypes.c_longlong),
+ ("PerJobUserTimeLimit", ctypes.c_longlong),
+ ("LimitFlags", wintypes.DWORD),
+ ("MinimumWorkingSetSize", ctypes.c_size_t),
+ ("MaximumWorkingSetSize", ctypes.c_size_t),
+ ("ActiveProcessLimit", wintypes.DWORD),
+ ("Affinity", ctypes.c_void_p),
+ ("PriorityClass", wintypes.DWORD),
+ ("SchedulingClass", wintypes.DWORD),
+ ]
+
+ class IO_COUNTERS(ctypes.Structure):
+ _fields_ = [
+ ("ReadOperationCount", ctypes.c_ulonglong),
+ ("WriteOperationCount", ctypes.c_ulonglong),
+ ("OtherOperationCount", ctypes.c_ulonglong),
+ ("ReadTransferCount", ctypes.c_ulonglong),
+ ("WriteTransferCount", ctypes.c_ulonglong),
+ ("OtherTransferCount", ctypes.c_ulonglong),
+ ]
+
+ class JOBOBJECT_EXTENDED_LIMIT_INFORMATION(ctypes.Structure):
+ _fields_ = [
+ ("BasicLimitInformation", JOBOBJECT_BASIC_LIMIT_INFORMATION),
+ ("IoInfo", IO_COUNTERS),
+ ("ProcessMemoryLimit", ctypes.c_size_t),
+ ("JobMemoryLimit", ctypes.c_size_t),
+ ("PeakProcessMemoryUsed", ctypes.c_size_t),
+ ("PeakJobMemoryUsed", ctypes.c_size_t),
+ ]
+
+ kernel32 = ctypes.WinDLL('kernel32', use_last_error=True)
+ CreateJobObject = kernel32.CreateJobObjectW
+ CreateJobObject.argtypes = [ctypes.c_void_p, wintypes.LPCWSTR]
+ CreateJobObject.restype = wintypes.HANDLE
+ SetInformationJobObject = kernel32.SetInformationJobObject
+ SetInformationJobObject.argtypes = [wintypes.HANDLE, wintypes.INT, ctypes.c_void_p, wintypes.DWORD]
+ SetInformationJobObject.restype = wintypes.BOOL
+ AssignProcessToJobObject = kernel32.AssignProcessToJobObject
+ AssignProcessToJobObject.argtypes = [wintypes.HANDLE, wintypes.HANDLE]
+ AssignProcessToJobObject.restype = wintypes.BOOL
+ GetCurrentProcess = kernel32.GetCurrentProcess
+ GetCurrentProcess.restype = wintypes.HANDLE
+
+ hJob = CreateJobObject(None, None)
+ if not hJob:
+ return False
+
+ info = JOBOBJECT_EXTENDED_LIMIT_INFORMATION()
+ info.BasicLimitInformation.LimitFlags = JOB_OBJECT_LIMIT_JOB_MEMORY
+ info.JobMemoryLimit = ctypes.c_size_t(int(cap_mb) * 1024 * 1024)
+
+ ok = SetInformationJobObject(hJob, JobObjectExtendedLimitInformation, ctypes.byref(info), ctypes.sizeof(info))
+ if not ok:
+ return False
+
+ ok = AssignProcessToJobObject(hJob, GetCurrentProcess())
+ if not ok:
+ return False
+ return True
+ except Exception:
+ return False
+
+ def _memory_watchdog(self):
+ try:
+ import time
+ while not self._mem_stop_event.is_set():
+ if not self.ram_cap_enabled or self.ram_cap_mb <= 0:
+ break
+ rss = self._get_process_rss_mb()
+ if rss and rss > self.ram_cap_mb:
+ self._mem_over_cap = True
+ # Aggressive attempt to reduce memory
+ try:
+ self._deep_cleanup_models()
+ except Exception:
+ pass
+ try:
+ self._trim_working_set()
+ except Exception:
+ pass
+ # Wait a bit before re-checking
+ time.sleep(max(0.2, self.ram_check_interval_sec / 2))
+ time.sleep(0.1) # Brief pause for stability
+ self._log("💤 Memory watchdog pausing briefly for stability", "debug")
+ else:
+ # Below cap or couldn't read RSS
+ self._mem_over_cap = False
+ time.sleep(self.ram_check_interval_sec)
+ except Exception:
+ pass
+
+ def _init_ram_cap(self):
+ # Hard cap via Windows Job Object if selected and on Windows
+ try:
+ import platform
+ if self.ram_cap_mode.startswith('hard') or self.ram_cap_mode == 'hard':
+ if platform.system() == 'Windows':
+ if not self._apply_windows_job_memory_limit(self.ram_cap_mb):
+ self._log("⚠️ Failed to apply hard RAM cap; falling back to soft mode", "warning")
+ self.ram_cap_mode = 'soft'
+ else:
+ self._log("⚠️ Hard RAM cap only supported on Windows; using soft mode", "warning")
+ self.ram_cap_mode = 'soft'
+ except Exception:
+ self.ram_cap_mode = 'soft'
+ # Start watchdog regardless of mode to proactively stay under cap during operations
+ try:
+ self._mem_thread = threading.Thread(target=self._memory_watchdog, daemon=True)
+ self._mem_thread.start()
+ except Exception:
+ pass
+
+ def _block_if_over_cap(self, context_msg: str = ""):
+ # If over cap, block until we drop under cap - margin
+ if not self.ram_cap_enabled or self.ram_cap_mb <= 0:
+ return
+ import time
+ # Never require target below baseline + floor margin
+ baseline = max(0, getattr(self, 'ram_baseline_mb', 0))
+ floor = baseline + max(0, self.ram_min_floor_over_baseline_mb)
+ # Compute target below cap by recovery margin, but not below floor
+ target = self.ram_cap_mb - max(64, min(self.ram_recovery_margin_mb, self.ram_cap_mb // 4))
+ target = max(target, floor)
+ start = time.time()
+ waited = False
+ last_log = 0
+ while True:
+ rss = self._get_process_rss_mb()
+ now = time.time()
+ if rss and rss <= target:
+ break
+ # Timeout to avoid deadlock when baseline can't go lower than target
+ if now - start > max(2.0, self.ram_gate_timeout_sec):
+ self._log(f"⌛ RAM gate timeout for {context_msg}: RSS={rss} MB, target={target} MB; proceeding in low-memory mode", "warning")
+ break
+ waited = True
+ # Periodic log to help diagnose
+ if now - last_log > 3.0 and rss:
+ self._log(f"⏳ Waiting for RAM drop: RSS={rss} MB, target={target} MB ({context_msg})", "info")
+ last_log = now
+ # Attempt cleanup while waiting
+ try:
+ self._deep_cleanup_models()
+ except Exception:
+ pass
+ try:
+ self._trim_working_set()
+ except Exception:
+ pass
+ if self._check_stop():
+ break
+ time.sleep(0.1) # Brief pause for stability
+ self._log("💤 RAM gate pausing briefly for stability", "debug")
+ if waited and context_msg:
+ self._log(f"🧹 Proceeding with {context_msg} (RSS now {self._get_process_rss_mb()} MB; target {target} MB)", "info")
+
+ def set_batch_mode(self, enabled: bool, batch_size: int = 1):
+ """Enable or disable batch mode optimizations"""
+ self.batch_mode = enabled
+ self.batch_size = batch_size
+
+ if enabled:
+ # Check if bubble detection is actually enabled before considering preload
+ ocr_settings = self.manga_settings.get('ocr', {}) if hasattr(self, 'manga_settings') else {}
+ bubble_detection_enabled = ocr_settings.get('bubble_detection_enabled', False)
+
+ # Only suggest preloading if bubble detection is actually going to be used
+ if bubble_detection_enabled:
+ self._log("📦 BATCH MODE: Bubble detection models will load on first use")
+ # NOTE: We don't actually preload anymore to save RAM
+ # Models are loaded on-demand when first needed
+
+ # Similarly for OCR models - they load on demand
+ if hasattr(self, 'ocr_manager') and self.ocr_manager:
+ self._log(f"📦 BATCH MODE: {self.ocr_provider} will load on first use")
+ # NOTE: We don't preload OCR models either
+
+ self._log(f"📦 BATCH MODE ENABLED: Processing {batch_size} images")
+ self._log(f"⏱️ API delay: {self.api_delay}s (preserved for rate limiting)")
+ else:
+ self._log("📝 BATCH MODE DISABLED")
+
+ def _ensure_bubble_detector_ready(self, ocr_settings):
+ """Ensure a usable BubbleDetector for current thread, auto-reloading models after cleanup."""
+ try:
+ bd = self._get_thread_bubble_detector()
+ detector_type = ocr_settings.get('detector_type', 'rtdetr_onnx')
+ if detector_type == 'rtdetr_onnx':
+ if not getattr(bd, 'rtdetr_onnx_loaded', False):
+ model_id = ocr_settings.get('rtdetr_model_url') or ocr_settings.get('bubble_model_path')
+ if not bd.load_rtdetr_onnx_model(model_id=model_id):
+ return None
+ elif detector_type == 'rtdetr':
+ if not getattr(bd, 'rtdetr_loaded', False):
+ model_id = ocr_settings.get('rtdetr_model_url') or ocr_settings.get('bubble_model_path')
+ if not bd.load_rtdetr_model(model_id=model_id):
+ return None
+ elif detector_type == 'yolo':
+ model_path = ocr_settings.get('bubble_model_path')
+ if model_path and not getattr(bd, 'model_loaded', False):
+ if not bd.load_model(model_path):
+ return None
+ else: # auto
+ # Prefer RT-DETR if available, else YOLO if configured
+ if not getattr(bd, 'rtdetr_loaded', False):
+ bd.load_rtdetr_model(model_id=ocr_settings.get('rtdetr_model_url') or ocr_settings.get('bubble_model_path'))
+ return bd
+ except Exception:
+ return None
+
+ def _merge_with_bubble_detection(self, regions: List[TextRegion], image_path: str) -> List[TextRegion]:
+ """Merge text regions by bubble and filter based on RT-DETR class settings"""
+ try:
+ # Get detector settings from config
+ ocr_settings = self.main_gui.config.get('manga_settings', {}).get('ocr', {})
+ detector_type = ocr_settings.get('detector_type', 'rtdetr_onnx')
+
+ # Ensure detector is ready (auto-reload after cleanup)
+ bd = self._ensure_bubble_detector_ready(ocr_settings)
+ if bd is None:
+ self._log("⚠️ Bubble detector unavailable after cleanup; falling back to proximity merge", "warning")
+ # Use more conservative threshold for Azure/Google to avoid cross-bubble merging
+ threshold = 30 if getattr(self, 'ocr_provider', '').lower() in ('azure', 'google') else 50
+ return self._merge_nearby_regions(regions, threshold=threshold)
+
+ # Check if bubble detection is enabled
+ if not ocr_settings.get('bubble_detection_enabled', False):
+ self._log("📦 Bubble detection is disabled in settings", "info")
+ # Use more conservative threshold for Azure/Google to avoid cross-bubble merging
+ threshold = 30 if getattr(self, 'ocr_provider', '').lower() in ('azure', 'google') else 50
+ return self._merge_nearby_regions(regions, threshold=threshold)
+
+ # Initialize thread-local detector
+ bd = self._get_thread_bubble_detector()
+
+ bubbles = None
+ rtdetr_detections = None
+
+ if detector_type == 'rtdetr_onnx':
+ if not self.batch_mode:
+ self._log("🤖 Using RTEDR_onnx for bubble detection", "info")
+ if self.batch_mode and getattr(bd, 'rtdetr_onnx_loaded', False):
+ pass
+ elif not getattr(bd, 'rtdetr_onnx_loaded', False):
+ self._log("📥 Loading RTEDR_onnx model...", "info")
+ if not bd.load_rtdetr_onnx_model():
+ self._log("⚠️ Failed to load RTEDR_onnx, falling back to traditional merging", "warning")
+ return self._merge_nearby_regions(regions)
+ else:
+ # Model loaded successfully - mark in pool for reuse
+ try:
+ model_id = ocr_settings.get('rtdetr_model_url') or ocr_settings.get('bubble_model_path') or ''
+ key = ('rtdetr_onnx', model_id)
+ with MangaTranslator._detector_pool_lock:
+ if key not in MangaTranslator._detector_pool:
+ MangaTranslator._detector_pool[key] = {'spares': []}
+ # Mark this detector type as loaded for next run
+ MangaTranslator._detector_pool[key]['loaded'] = True
+ except Exception:
+ pass
+ rtdetr_confidence = ocr_settings.get('rtdetr_confidence', 0.3)
+ detect_empty = ocr_settings.get('detect_empty_bubbles', True)
+ detect_text_bubbles = ocr_settings.get('detect_text_bubbles', True)
+ detect_free_text = ocr_settings.get('detect_free_text', True)
+ if not self.batch_mode:
+ self._log(f"📋 RTEDR_onnx class filters:", "info")
+ self._log(f" Empty bubbles: {'✓' if detect_empty else '✗'}", "info")
+ self._log(f" Text bubbles: {'✓' if detect_text_bubbles else '✗'}", "info")
+ self._log(f" Free text: {'✓' if detect_free_text else '✗'}", "info")
+ self._log(f"🎯 RTEDR_onnx confidence threshold: {rtdetr_confidence:.2f}", "info")
+ rtdetr_detections = bd.detect_with_rtdetr_onnx(
+ image_path=image_path,
+ confidence=rtdetr_confidence,
+ return_all_bubbles=False
+ )
+ # Combine enabled bubble types for merging
+ bubbles = []
+ if detect_empty and 'bubbles' in rtdetr_detections:
+ bubbles.extend(rtdetr_detections['bubbles'])
+ if detect_text_bubbles and 'text_bubbles' in rtdetr_detections:
+ bubbles.extend(rtdetr_detections['text_bubbles'])
+ # Store free text locations for filtering later
+ free_text_regions = rtdetr_detections.get('text_free', []) if detect_free_text else []
+ self._log(f"✅ RTEDR_onnx detected:", "success")
+ self._log(f" {len(rtdetr_detections.get('bubbles', []))} empty bubbles", "info")
+ self._log(f" {len(rtdetr_detections.get('text_bubbles', []))} text bubbles", "info")
+ self._log(f" {len(rtdetr_detections.get('text_free', []))} free text regions", "info")
+ elif detector_type == 'rtdetr':
+ # BATCH OPTIMIZATION: Less verbose logging
+ if not self.batch_mode:
+ self._log("🤖 Using RT-DETR for bubble detection", "info")
+
+ # BATCH OPTIMIZATION: Don't reload if already loaded
+ if self.batch_mode and bd.rtdetr_loaded:
+ # Model already loaded, skip the loading step entirely
+ pass
+ elif not bd.rtdetr_loaded:
+ self._log("📥 Loading RT-DETR model...", "info")
+ if not bd.load_rtdetr_model():
+ self._log("⚠️ Failed to load RT-DETR, falling back to traditional merging", "warning")
+ return self._merge_nearby_regions(regions)
+ else:
+ # Model loaded successfully - mark in pool for reuse
+ try:
+ model_id = ocr_settings.get('rtdetr_model_url') or ocr_settings.get('bubble_model_path') or ''
+ key = ('rtdetr', model_id)
+ with MangaTranslator._detector_pool_lock:
+ if key not in MangaTranslator._detector_pool:
+ MangaTranslator._detector_pool[key] = {'spares': []}
+ # Mark this detector type as loaded for next run
+ MangaTranslator._detector_pool[key]['loaded'] = True
+ except Exception:
+ pass
+
+ # Get settings
+ rtdetr_confidence = ocr_settings.get('rtdetr_confidence', 0.3)
+ detect_empty = ocr_settings.get('detect_empty_bubbles', True)
+ detect_text_bubbles = ocr_settings.get('detect_text_bubbles', True)
+ detect_free_text = ocr_settings.get('detect_free_text', True)
+
+ # BATCH OPTIMIZATION: Reduce logging
+ if not self.batch_mode:
+ self._log(f"📋 RT-DETR class filters:", "info")
+ self._log(f" Empty bubbles: {'✓' if detect_empty else '✗'}", "info")
+ self._log(f" Text bubbles: {'✓' if detect_text_bubbles else '✗'}", "info")
+ self._log(f" Free text: {'✓' if detect_free_text else '✗'}", "info")
+ self._log(f"🎯 RT-DETR confidence threshold: {rtdetr_confidence:.2f}", "info")
+
+ # Get FULL RT-DETR detections (not just bubbles)
+ rtdetr_detections = bd.detect_with_rtdetr(
+ image_path=image_path,
+ confidence=rtdetr_confidence,
+ return_all_bubbles=False # Get dict with all classes
+ )
+
+ # Combine enabled bubble types for merging
+ bubbles = []
+ if detect_empty and 'bubbles' in rtdetr_detections:
+ bubbles.extend(rtdetr_detections['bubbles'])
+ if detect_text_bubbles and 'text_bubbles' in rtdetr_detections:
+ bubbles.extend(rtdetr_detections['text_bubbles'])
+
+ # Store free text locations for filtering later
+ free_text_regions = rtdetr_detections.get('text_free', []) if detect_free_text else []
+
+ # Helper to test if a point lies in any bbox
+ def _point_in_any_bbox(cx, cy, boxes):
+ try:
+ for (bx, by, bw, bh) in boxes or []:
+ if bx <= cx <= bx + bw and by <= cy <= by + bh:
+ return True
+ except Exception:
+ pass
+ return False
+
+ self._log(f"✅ RT-DETR detected:", "success")
+ self._log(f" {len(rtdetr_detections.get('bubbles', []))} empty bubbles", "info")
+ self._log(f" {len(rtdetr_detections.get('text_bubbles', []))} text bubbles", "info")
+ self._log(f" {len(rtdetr_detections.get('text_free', []))} free text regions", "info")
+
+ elif detector_type == 'yolo':
+ # Use YOLOv8 (existing code)
+ self._log("🤖 Using YOLOv8 for bubble detection", "info")
+
+ model_path = ocr_settings.get('bubble_model_path')
+ if not model_path:
+ self._log("⚠️ No YOLO model configured, falling back to traditional merging", "warning")
+ return self._merge_nearby_regions(regions)
+
+ if not bd.model_loaded:
+ self._log(f"📥 Loading YOLO model: {os.path.basename(model_path)}")
+ if not bd.load_model(model_path):
+ self._log("⚠️ Failed to load YOLO model, falling back to traditional merging", "warning")
+ return self._merge_nearby_regions(regions)
+
+ confidence = ocr_settings.get('bubble_confidence', 0.3)
+ self._log(f"🎯 Detecting bubbles with YOLO (confidence >= {confidence:.2f})")
+ bubbles = bd.detect_bubbles(image_path, confidence=confidence, use_rtdetr=False)
+
+ else:
+ # Unknown detector type
+ self._log(f"❌ Unknown detector type: {detector_type}", "error")
+ self._log(" Valid options: rtdetr_onnx, rtdetr, yolo", "error")
+ return self._merge_nearby_regions(regions)
+
+ if not bubbles:
+ self._log("⚠️ No bubbles detected, using traditional merging", "warning")
+ return self._merge_nearby_regions(regions)
+
+ self._log(f"✅ Found {len(bubbles)} bubbles for grouping", "success")
+
+ # Merge regions within bubbles
+ merged_regions = []
+ used_indices = set()
+
+ # Build lookup of free text regions for exclusion
+ free_text_bboxes = free_text_regions if detector_type in ('rtdetr', 'rtdetr_onnx') else []
+
+ # DEBUG: Log free text bboxes
+ if free_text_bboxes:
+ self._log(f"🔍 Free text exclusion zones: {len(free_text_bboxes)} regions", "debug")
+ for idx, (fx, fy, fw, fh) in enumerate(free_text_bboxes):
+ self._log(f" Free text zone {idx + 1}: x={fx:.0f}, y={fy:.0f}, w={fw:.0f}, h={fh:.0f}", "debug")
+ else:
+ self._log(f"⚠️ No free text exclusion zones detected by RT-DETR", "warning")
+
+ # Helper to check if a point is in any free text region
+ def _point_in_free_text(cx, cy, free_boxes):
+ try:
+ for idx, (fx, fy, fw, fh) in enumerate(free_boxes or []):
+ if fx <= cx <= fx + fw and fy <= cy <= fy + fh:
+ self._log(f" ✓ Point ({cx:.0f}, {cy:.0f}) is in free text zone {idx + 1}", "debug")
+ return True
+ except Exception as e:
+ self._log(f" ⚠️ Error checking free text: {e}", "debug")
+ pass
+ return False
+
+ for bubble_idx, (bx, by, bw, bh) in enumerate(bubbles):
+ bubble_regions = []
+ self._log(f"\n Processing bubble {bubble_idx + 1}: x={bx:.0f}, y={by:.0f}, w={bw:.0f}, h={bh:.0f}", "debug")
+
+ for idx, region in enumerate(regions):
+ if idx in used_indices:
+ continue
+
+ rx, ry, rw, rh = region.bounding_box
+ region_center_x = rx + rw / 2
+ region_center_y = ry + rh / 2
+
+ # Check if center is inside this bubble
+ if (bx <= region_center_x <= bx + bw and
+ by <= region_center_y <= by + bh):
+
+ self._log(f" Region '{region.text[:20]}...' center ({region_center_x:.0f}, {region_center_y:.0f}) is in bubble", "debug")
+
+ # CRITICAL: Don't merge if this region is in a free text area
+ # Free text should stay separate from bubbles
+ if _point_in_free_text(region_center_x, region_center_y, free_text_bboxes):
+ # This region is in a free text area, don't merge it into bubble
+ self._log(f" ❌ SKIPPING: Region overlaps with free text area", "debug")
+ continue
+
+ self._log(f" ✓ Adding region to bubble {bubble_idx + 1}", "debug")
+ bubble_regions.append(region)
+ used_indices.add(idx)
+
+ if bubble_regions:
+ # CRITICAL: Check if this "bubble" actually contains multiple separate bubbles
+ # This happens when RT-DETR detects one large bubble over stacked speech bubbles
+ split_groups = self._split_bubble_if_needed(bubble_regions)
+
+ # Process each split group as a separate bubble
+ for group_idx, group in enumerate(split_groups):
+ merged_text = " ".join(r.text for r in group)
+
+ min_x = min(r.bounding_box[0] for r in group)
+ min_y = min(r.bounding_box[1] for r in group)
+ max_x = max(r.bounding_box[0] + r.bounding_box[2] for r in group)
+ max_y = max(r.bounding_box[1] + r.bounding_box[3] for r in group)
+
+ all_vertices = []
+ for r in group:
+ if hasattr(r, 'vertices') and r.vertices:
+ all_vertices.extend(r.vertices)
+
+ if not all_vertices:
+ all_vertices = [
+ (min_x, min_y),
+ (max_x, min_y),
+ (max_x, max_y),
+ (min_x, max_y)
+ ]
+
+ merged_region = TextRegion(
+ text=merged_text,
+ vertices=all_vertices,
+ bounding_box=(min_x, min_y, max_x - min_x, max_y - min_y),
+ confidence=0.95,
+ region_type='bubble_detected',
+ bubble_bounds=(bx, by, bw, bh) # Pass bubble_bounds in constructor
+ )
+
+ # Store original regions for masking
+ merged_region.original_regions = group
+ # Classify as text bubble for downstream rendering/masking
+ merged_region.bubble_type = 'text_bubble'
+ # Mark that this should be inpainted
+ merged_region.should_inpaint = True
+
+ merged_regions.append(merged_region)
+
+ # DEBUG: Verify bubble_bounds was set
+ if not getattr(self, 'concise_logs', False):
+ has_bb = hasattr(merged_region, 'bubble_bounds') and merged_region.bubble_bounds is not None
+ self._log(f" 🔍 Merged region has bubble_bounds: {has_bb}", "debug")
+ if has_bb:
+ self._log(f" bubble_bounds = {merged_region.bubble_bounds}", "debug")
+
+ if len(split_groups) > 1:
+ self._log(f" Bubble {bubble_idx + 1}.{group_idx + 1}: Merged {len(group)} text regions (split from {len(bubble_regions)} total)", "info")
+ else:
+ self._log(f" Bubble {bubble_idx + 1}: Merged {len(group)} text regions", "info")
+
+ # Handle text outside bubbles based on RT-DETR settings
+ for idx, region in enumerate(regions):
+ if idx not in used_indices:
+ # This text is outside any bubble
+
+ # For RT-DETR mode, check if we should include free text
+ if detector_type in ('rtdetr', 'rtdetr_onnx'):
+ # If "Free Text" checkbox is checked, include ALL text outside bubbles
+ # Don't require RT-DETR to specifically detect it as free text
+ if ocr_settings.get('detect_free_text', True):
+ region.should_inpaint = True
+ # If RT-DETR detected free text box covering this region's center, mark explicitly
+ try:
+ cx = region.bounding_box[0] + region.bounding_box[2] / 2
+ cy = region.bounding_box[1] + region.bounding_box[3] / 2
+ # Find which free text bbox this region belongs to (if any)
+ found_free_text_box = False
+ for fx, fy, fw, fh in free_text_bboxes:
+ if fx <= cx <= fx + fw and fy <= cy <= fy + fh:
+ region.bubble_type = 'free_text'
+ # CRITICAL: Set bubble_bounds to the RT-DETR free text detection box
+ # This ensures rendering uses the full RT-DETR bounds, not just OCR polygon
+ if not hasattr(region, 'bubble_bounds') or region.bubble_bounds is None:
+ region.bubble_bounds = (fx, fy, fw, fh)
+ found_free_text_box = True
+ self._log(f" Free text region INCLUDED: '{region.text[:30]}...'", "debug")
+ break
+
+ if not found_free_text_box:
+ # Text outside bubbles but not in free text box - still mark as free text
+ region.bubble_type = 'free_text'
+ # Use region's own bbox if no RT-DETR free text box found
+ if not hasattr(region, 'bubble_bounds') or region.bubble_bounds is None:
+ region.bubble_bounds = region.bounding_box
+ self._log(f" Text outside bubbles INCLUDED (as free text): '{region.text[:30]}...'", "debug")
+ except Exception:
+ # Default to free text if check fails
+ region.bubble_type = 'free_text'
+ if not hasattr(region, 'bubble_bounds') or region.bubble_bounds is None:
+ region.bubble_bounds = region.bounding_box
+ else:
+ region.should_inpaint = False
+ self._log(f" Text outside bubbles EXCLUDED (Free Text unchecked): '{region.text[:30]}...'", "info")
+ else:
+ # For YOLO/auto, include all text by default
+ region.should_inpaint = True
+
+ merged_regions.append(region)
+
+ # Log summary
+ regions_to_inpaint = sum(1 for r in merged_regions if getattr(r, 'should_inpaint', True))
+ regions_to_skip = len(merged_regions) - regions_to_inpaint
+
+ self._log(f"📊 Bubble detection complete: {len(regions)} → {len(merged_regions)} regions", "success")
+ if detector_type == 'rtdetr':
+ self._log(f" {regions_to_inpaint} regions will be inpainted", "info")
+ if regions_to_skip > 0:
+ self._log(f" {regions_to_skip} regions will be preserved (Free Text unchecked)", "info")
+
+ return merged_regions
+
+ except Exception as e:
+ self._log(f"❌ Bubble detection error: {str(e)}", "error")
+ self._log(" Falling back to traditional merging", "warning")
+ return self._merge_nearby_regions(regions)
+
+ def set_full_page_context(self, enabled: bool, custom_prompt: str = None):
+ """Configure full page context translation mode
+
+ Args:
+ enabled: Whether to translate all text regions in a single contextual request
+ custom_prompt: Optional custom prompt for full page context mode
+ """
+ self.full_page_context_enabled = enabled
+ if custom_prompt:
+ self.full_page_context_prompt = custom_prompt
+
+ self._log(f"📄 Full page context mode: {'ENABLED' if enabled else 'DISABLED'}")
+ if enabled:
+ self._log(" All text regions will be sent together for contextual translation")
+ else:
+ self._log(" Text regions will be translated individually")
+
+ def update_text_rendering_settings(self,
+ bg_opacity: int = None,
+ bg_style: str = None,
+ bg_reduction: float = None,
+ font_style: str = None,
+ font_size: int = None,
+ text_color: tuple = None,
+ shadow_enabled: bool = None,
+ shadow_color: tuple = None,
+ shadow_offset_x: int = None,
+ shadow_offset_y: int = None,
+ shadow_blur: int = None,
+ force_caps_lock: bool = None): # ADD THIS PARAMETER
+ """Update text rendering settings"""
+ self._log("📐 Updating text rendering settings:", "info")
+
+ if bg_opacity is not None:
+ self.text_bg_opacity = max(0, min(255, bg_opacity))
+ self._log(f" Background opacity: {int(self.text_bg_opacity/255*100)}%", "info")
+ if bg_style is not None and bg_style in ['box', 'circle', 'wrap']:
+ self.text_bg_style = bg_style
+ self._log(f" Background style: {bg_style}", "info")
+ if bg_reduction is not None:
+ self.text_bg_reduction = max(0.5, min(2.0, bg_reduction))
+ self._log(f" Background size: {int(self.text_bg_reduction*100)}%", "info")
+ if font_style is not None:
+ self.selected_font_style = font_style
+ font_name = os.path.basename(font_style) if font_style else 'Default'
+ self._log(f" Font: {font_name}", "info")
+ if font_size is not None:
+ if font_size < 0:
+ # Negative value indicates multiplier mode
+ self.font_size_mode = 'multiplier'
+ self.font_size_multiplier = abs(font_size)
+ self.custom_font_size = None # Clear fixed size
+ self._log(f" Font size mode: Dynamic multiplier ({self.font_size_multiplier:.1f}x)", "info")
+ else:
+ # Positive value or 0 indicates fixed mode
+ self.font_size_mode = 'fixed'
+ self.custom_font_size = font_size if font_size > 0 else None
+ self._log(f" Font size mode: Fixed ({font_size if font_size > 0 else 'Auto'})", "info")
+ if text_color is not None:
+ self.text_color = text_color
+ self._log(f" Text color: RGB{text_color}", "info")
+ if shadow_enabled is not None:
+ self.shadow_enabled = shadow_enabled
+ self._log(f" Shadow: {'Enabled' if shadow_enabled else 'Disabled'}", "info")
+ if shadow_color is not None:
+ self.shadow_color = shadow_color
+ self._log(f" Shadow color: RGB{shadow_color}", "info")
+ if shadow_offset_x is not None:
+ self.shadow_offset_x = shadow_offset_x
+ if shadow_offset_y is not None:
+ self.shadow_offset_y = shadow_offset_y
+ if shadow_blur is not None:
+ self.shadow_blur = max(0, shadow_blur)
+ if force_caps_lock is not None: # ADD THIS BLOCK
+ self.force_caps_lock = force_caps_lock
+ self._log(f" Force Caps Lock: {'Enabled' if force_caps_lock else 'Disabled'}", "info")
+
+ self._log("✅ Rendering settings updated", "info")
+
+ def _log(self, message: str, level: str = "info"):
+ """Log message to GUI or console, and also to file logger.
+ The file logger is configured in translator_gui._setup_file_logging().
+ Enhanced with comprehensive stop suppression.
+ """
+ # Enhanced stop suppression - allow only essential stop confirmation messages
+ if self._check_stop() or self.is_globally_cancelled():
+ # Only allow very specific stop confirmation messages - nothing else
+ essential_stop_keywords = [
+ "⏹️ Translation stopped by user",
+ "🧹 Cleaning up models to free RAM",
+ "✅ Model cleanup complete - RAM should be freed",
+ "✅ All models cleaned up - RAM freed!"
+ ]
+ # Suppress ALL other messages when stopped - be very restrictive
+ if not any(keyword in message for keyword in essential_stop_keywords):
+ return
+
+ # Concise pipeline logs: keep only high-level messages and errors/warnings
+ if getattr(self, 'concise_logs', False):
+ if level in ("error", "warning"):
+ pass
+ else:
+ keep_prefixes = (
+ # Pipeline boundaries and IO
+ "📷 STARTING", "📁 Input", "📁 Output",
+ # Step markers
+ "📍 [STEP",
+ # Step 1 essentials
+ "🔍 Detecting text regions", # start of detection on file
+ "📄 Detected", # format detected
+ "Using OCR provider:", # provider line
+ "Using Azure Read API", # azure-specific run mode
+ "⚠️ Converting image to PNG", # azure PNG compatibility
+ "🤖 Using AI bubble detection", # BD merge mode
+ "🤖 Using RTEDR_onnx", # selected BD
+ "✅ Detected", # detected N regions after merging
+ # Detectors/inpainter readiness
+ "🤖 Using bubble detector", "🎨 Using local inpainter",
+ # Step 2: key actions
+ "🔀 Running", # Running translation and inpainting concurrently
+ "📄 Using FULL PAGE CONTEXT", # Explicit mode notice
+ "📄 Full page context mode", # Alternate phrasing
+ "📄 Full page context translation", # Start/summary
+ "🎭 Creating text mask", "📊 Mask breakdown", "📏 Applying",
+ "🎨 Inpainting", "🧽 Using local inpainting",
+ # Detection and summary
+ "📊 Bubble detection complete", "✅ Detection complete",
+ # Mapping/translation summary
+ "📊 Mapping", "📊 Full page context translation complete",
+ # Rendering
+ "✍️ Rendering", "✅ ENHANCED text rendering complete",
+ # Output and final summary
+ "💾 Saved output", "✅ TRANSLATION PIPELINE COMPLETE",
+ "📊 Translation Summary", "✅ Successful", "❌ Failed",
+ # Cleanup
+ "🔑 Auto cleanup", "🔑 Translator instance preserved"
+ )
+ _msg = message.lstrip() if isinstance(message, str) else message
+ if not any(_msg.startswith(p) for p in keep_prefixes):
+ return
+
+ # In batch mode, only log important messages
+ if self.batch_mode:
+ # Skip verbose/debug messages in batch mode
+ if level == "debug" or "DEBUG:" in message:
+ return
+ # Skip repetitive messages
+ if any(skip in message for skip in [
+ "Using vertex-based", "Using", "Applying", "Font size",
+ "Region", "Found text", "Style:"
+ ]):
+ return
+
+ # Send to GUI if available
+ if self.log_callback:
+ try:
+ self.log_callback(message, level)
+ except Exception:
+ # Fall back to print if GUI callback fails
+ print(message)
+ else:
+ print(message)
+
+ # Always record to the Python logger (file)
+ try:
+ _logger = logging.getLogger(__name__)
+ if level == "error":
+ _logger.error(message)
+ elif level == "warning":
+ _logger.warning(message)
+ elif level == "debug":
+ _logger.debug(message)
+ else:
+ # Map custom levels like 'success' to INFO
+ _logger.info(message)
+ except Exception:
+ pass
+
+ def _is_primarily_english(self, text: str) -> bool:
+ """Heuristic: treat text as English if it has no CJK and a high ASCII ratio.
+ Conservative by default to avoid dropping legitimate content.
+ Tunable via manga_settings.ocr:
+ - english_exclude_threshold (float, default 0.70)
+ - english_exclude_min_chars (int, default 4)
+ - english_exclude_short_tokens (bool, default False)
+ """
+ if not text:
+ return False
+
+ # Pull tuning knobs from settings (with safe defaults)
+ ocr_settings = {}
+ try:
+ ocr_settings = self.main_gui.config.get('manga_settings', {}).get('ocr', {})
+ except Exception:
+ pass
+ threshold = float(ocr_settings.get('english_exclude_threshold', 0.70))
+ min_chars = int(ocr_settings.get('english_exclude_min_chars', 4))
+ exclude_short = bool(ocr_settings.get('english_exclude_short_tokens', False))
+
+ # 1) If text contains any CJK or full-width characters, do NOT treat as English
+ has_cjk = any(
+ '\u4e00' <= char <= '\u9fff' or # Chinese
+ '\u3040' <= char <= '\u309f' or # Hiragana
+ '\u30a0' <= char <= '\u30ff' or # Katakana
+ '\uac00' <= char <= '\ud7af' or # Korean
+ '\uff00' <= char <= '\uffef' # Full-width characters
+ for char in text
+ )
+ if has_cjk:
+ return False
+
+ text_stripped = text.strip()
+ non_space_len = sum(1 for c in text_stripped if not c.isspace())
+
+ # 2) By default, do not exclude very short tokens to avoid losing interjections like "Ah", "Eh?", etc.
+ if not exclude_short and non_space_len < max(1, min_chars):
+ return False
+
+ # Optional legacy behavior: aggressively drop very short pure-ASCII tokens
+ if exclude_short:
+ if len(text_stripped) == 1 and text_stripped.isalpha() and ord(text_stripped) < 128:
+ self._log(f" Excluding single English letter: '{text_stripped}'", "debug")
+ return True
+ if len(text_stripped) <= 3:
+ ascii_letters = sum(1 for char in text_stripped if char.isalpha() and ord(char) < 128)
+ if ascii_letters >= len(text_stripped) * 0.5:
+ self._log(f" Excluding short English text: '{text_stripped}'", "debug")
+ return True
+
+ # 3) Compute ASCII ratio (exclude spaces)
+ ascii_chars = sum(1 for char in text if 33 <= ord(char) <= 126)
+ total_chars = sum(1 for char in text if not char.isspace())
+ if total_chars == 0:
+ return False
+ ratio = ascii_chars / total_chars
+
+ if ratio > threshold:
+ self._log(f" Excluding English text ({ratio:.0%} ASCII, threshold {threshold:.0%}, len={non_space_len}): '{text[:30]}...'", "debug")
+ return True
+ return False
+
+ def _load_bubble_detector(self, ocr_settings, image_path):
+ """Load bubble detector with appropriate model based on settings
+
+ Returns:
+ dict: Detection results or None if failed
+ """
+ detector_type = ocr_settings.get('detector_type', 'rtdetr_onnx')
+ model_path = ocr_settings.get('bubble_model_path', '')
+ confidence = ocr_settings.get('bubble_confidence', 0.3)
+
+ bd = self._get_thread_bubble_detector()
+
+ if detector_type == 'rtdetr_onnx' or 'RTEDR_onnx' in str(detector_type):
+ # Load RT-DETR ONNX model
+ if bd.load_rtdetr_onnx_model(model_id=ocr_settings.get('rtdetr_model_url') or model_path):
+ return bd.detect_with_rtdetr_onnx(
+ image_path=image_path,
+ confidence=ocr_settings.get('rtdetr_confidence', confidence),
+ return_all_bubbles=False
+ )
+ elif detector_type == 'rtdetr' or 'RT-DETR' in str(detector_type):
+ # Load RT-DETR (PyTorch) model
+ if bd.load_rtdetr_model(model_id=ocr_settings.get('rtdetr_model_url') or model_path):
+ return bd.detect_with_rtdetr(
+ image_path=image_path,
+ confidence=ocr_settings.get('rtdetr_confidence', confidence),
+ return_all_bubbles=False
+ )
+ elif detector_type == 'custom':
+ # Custom model - try to determine type from path
+ custom_path = ocr_settings.get('custom_model_path', model_path)
+ if 'rtdetr' in custom_path.lower():
+ # Custom RT-DETR model
+ if bd.load_rtdetr_model(model_id=custom_path):
+ return bd.detect_with_rtdetr(
+ image_path=image_path,
+ confidence=confidence,
+ return_all_bubbles=False
+ )
+ else:
+ # Assume YOLO format for other custom models
+ if custom_path and bd.load_model(custom_path):
+ detections = bd.detect_bubbles(
+ image_path,
+ confidence=confidence
+ )
+ return {
+ 'text_bubbles': detections if detections else [],
+ 'text_free': [],
+ 'bubbles': []
+ }
+ else:
+ # Standard YOLO model
+ if model_path and bd.load_model(model_path):
+ detections = bd.detect_bubbles(
+ image_path,
+ confidence=confidence
+ )
+ return {
+ 'text_bubbles': detections if detections else [],
+ 'text_free': [],
+ 'bubbles': []
+ }
+ return None
+
+ def _ensure_google_client(self):
+ try:
+ if getattr(self, 'vision_client', None) is None:
+ from google.cloud import vision
+ google_path = self.ocr_config.get('google_credentials_path') if hasattr(self, 'ocr_config') else None
+ if google_path:
+ os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = google_path
+ self.vision_client = vision.ImageAnnotatorClient()
+ self._log("✅ Reinitialized Google Vision client", "debug")
+ except Exception as e:
+ self._log(f"❌ Failed to initialize Google Vision client: {e}", "error")
+
+ def _ensure_azure_client(self):
+ try:
+ if getattr(self, 'vision_client', None) is None:
+ from azure.cognitiveservices.vision.computervision import ComputerVisionClient
+ from msrest.authentication import CognitiveServicesCredentials
+ key = None
+ endpoint = None
+ try:
+ key = (self.ocr_config or {}).get('azure_key')
+ endpoint = (self.ocr_config or {}).get('azure_endpoint')
+ except Exception:
+ pass
+ if not key:
+ key = self.main_gui.config.get('azure_vision_key', '') if hasattr(self, 'main_gui') else None
+ if not endpoint:
+ endpoint = self.main_gui.config.get('azure_vision_endpoint', '') if hasattr(self, 'main_gui') else None
+ if not key or not endpoint:
+ raise ValueError("Azure credentials missing for client init")
+ self.vision_client = ComputerVisionClient(endpoint, CognitiveServicesCredentials(key))
+ self._log("✅ Reinitialized Azure Computer Vision client", "debug")
+ except Exception as e:
+ self._log(f"❌ Failed to initialize Azure CV client: {e}", "error")
+
+ def detect_text_regions(self, image_path: str) -> List[TextRegion]:
+ """Detect text regions using configured OCR provider"""
+ # Reduce logging in batch mode
+ if not self.batch_mode:
+ self._log(f"🔍 Detecting text regions in: {os.path.basename(image_path)}")
+ self._log(f" Using OCR provider: {self.ocr_provider.upper()}")
+ else:
+ # Only show batch progress if batch_current is set properly
+ if hasattr(self, 'batch_current') and hasattr(self, 'batch_size'):
+ self._log(f"🔍 [{self.batch_current}/{self.batch_size}] {os.path.basename(image_path)}")
+ else:
+ self._log(f"🔍 Detecting text: {os.path.basename(image_path)}")
+
+ try:
+ # ============================================================
+ # CRITICAL: FORCE CLEAR ALL TEXT-RELATED CACHES
+ # This MUST happen for EVERY image to prevent text contamination
+ # NO EXCEPTIONS - batch mode or not, ALL caches get cleared
+ # ============================================================
+
+ # 1. Clear OCR ROI cache (prevents text from previous images leaking)
+ # THREAD-SAFE: Use lock to prevent race conditions in parallel panel translation
+ if hasattr(self, 'ocr_roi_cache'):
+ with self._cache_lock:
+ self.ocr_roi_cache.clear()
+ self._log("🧹 Cleared OCR ROI cache", "debug")
+
+ # 2. Clear OCR manager caches (multiple potential cache locations)
+ if hasattr(self, 'ocr_manager') and self.ocr_manager:
+ # Clear last_results (can contain text from previous image)
+ if hasattr(self.ocr_manager, 'last_results'):
+ self.ocr_manager.last_results = None
+ # Clear generic cache
+ if hasattr(self.ocr_manager, 'cache'):
+ self.ocr_manager.cache.clear()
+ # Clear provider-level caches
+ if hasattr(self.ocr_manager, 'providers'):
+ for provider_name, provider in self.ocr_manager.providers.items():
+ if hasattr(provider, 'last_results'):
+ provider.last_results = None
+ if hasattr(provider, 'cache'):
+ provider.cache.clear()
+ self._log("🧹 Cleared OCR manager caches", "debug")
+
+ # 3. Clear bubble detector cache (can contain text region info)
+ if hasattr(self, 'bubble_detector') and self.bubble_detector:
+ if hasattr(self.bubble_detector, 'last_detections'):
+ self.bubble_detector.last_detections = None
+ if hasattr(self.bubble_detector, 'cache'):
+ self.bubble_detector.cache.clear()
+ self._log("🧹 Cleared bubble detector cache", "debug")
+
+ # Get manga settings from main_gui config
+ manga_settings = self.main_gui.config.get('manga_settings', {})
+ preprocessing = manga_settings.get('preprocessing', {})
+ ocr_settings = manga_settings.get('ocr', {})
+
+ # Get text filtering settings
+ min_text_length = ocr_settings.get('min_text_length', 2)
+ exclude_english = ocr_settings.get('exclude_english_text', True)
+ confidence_threshold = ocr_settings.get('confidence_threshold', 0.1)
+
+ # Load and preprocess image if enabled
+ if preprocessing.get('enabled', False):
+ self._log("📐 Preprocessing enabled - enhancing image quality")
+ processed_image_data = self._preprocess_image(image_path, preprocessing)
+ else:
+ # Read image with optional compression (separate from preprocessing)
+ try:
+ comp_cfg = (self.main_gui.config.get('manga_settings', {}) or {}).get('compression', {})
+ if comp_cfg.get('enabled', False):
+ processed_image_data = self._load_image_with_compression_only(image_path, comp_cfg)
+ else:
+ with open(image_path, 'rb') as image_file:
+ processed_image_data = image_file.read()
+ except Exception:
+ with open(image_path, 'rb') as image_file:
+ processed_image_data = image_file.read()
+
+ # Compute per-image hash for caching (based on uploaded bytes)
+ # CRITICAL FIX #1: Never allow None page_hash to prevent cache key collisions
+ try:
+ import hashlib
+ page_hash = hashlib.sha1(processed_image_data).hexdigest()
+
+ # CRITICAL: Never allow None page_hash
+ if page_hash is None:
+ # Fallback: use image path + timestamp for uniqueness
+ import time
+ import uuid
+ page_hash = hashlib.sha1(
+ f"{image_path}_{time.time()}_{uuid.uuid4()}".encode()
+ ).hexdigest()
+ self._log("⚠️ Using fallback page hash for cache isolation", "warning")
+
+ # CRITICAL: If image hash changed, force clear ROI cache
+ # THREAD-SAFE: Use lock for parallel panel translation
+ if hasattr(self, '_current_image_hash') and self._current_image_hash != page_hash:
+ if hasattr(self, 'ocr_roi_cache'):
+ with self._cache_lock:
+ self.ocr_roi_cache.clear()
+ self._log("🧹 Image changed - cleared ROI cache", "debug")
+ self._current_image_hash = page_hash
+ except Exception as e:
+ # Emergency fallback - never let page_hash be None
+ import uuid
+ page_hash = str(uuid.uuid4())
+ self._current_image_hash = page_hash
+ self._log(f"⚠️ Page hash generation failed: {e}, using UUID fallback", "error")
+
+ regions = []
+
+ # Route to appropriate provider
+ if self.ocr_provider == 'google':
+ # === GOOGLE CLOUD VISION ===
+ # Ensure client exists (it might have been cleaned up between runs)
+ try:
+ self._ensure_google_client()
+ except Exception:
+ pass
+
+ # Check if we should use RT-DETR for text region detection (NEW FEATURE)
+ # IMPORTANT: bubble_detection_enabled should default to True for optimal detection
+ if ocr_settings.get('bubble_detection_enabled', True) and ocr_settings.get('use_rtdetr_for_ocr_regions', True):
+ self._log("🎯 Using RT-DETR to guide Google Cloud Vision OCR")
+
+ # Run RT-DETR to detect text regions first
+ _ = self._get_thread_bubble_detector()
+ rtdetr_detections = self._load_bubble_detector(ocr_settings, image_path)
+
+ if rtdetr_detections:
+ # Collect all text-containing regions WITH TYPE TRACKING
+ all_regions = []
+ # Track region type to assign bubble_type later
+ region_types = {}
+ idx = 0
+ if 'text_bubbles' in rtdetr_detections:
+ for bbox in rtdetr_detections.get('text_bubbles', []):
+ all_regions.append(bbox)
+ region_types[idx] = 'text_bubble'
+ idx += 1
+ if 'text_free' in rtdetr_detections:
+ for bbox in rtdetr_detections.get('text_free', []):
+ all_regions.append(bbox)
+ region_types[idx] = 'free_text'
+ idx += 1
+
+ if all_regions:
+ self._log(f"📊 RT-DETR detected {len(all_regions)} text regions, OCR-ing each with Google Vision")
+
+ # Load image for cropping
+ import cv2
+ cv_image = cv2.imread(image_path)
+ if cv_image is None:
+ self._log("⚠️ Failed to load image, falling back to full-page OCR", "warning")
+ else:
+ # Define worker function for concurrent OCR
+ def ocr_region_google(region_data):
+ i, region_idx, x, y, w, h = region_data
+ try:
+ # RATE LIMITING: Add small delay to avoid potential rate limits
+ # Google has high limits (1,800/min paid tier) but being conservative
+ import time
+ import random
+ time.sleep(0.1 + random.random() * 0.2) # 0.1-0.3s random delay
+
+ # Crop region
+ cropped = self._safe_crop_region(cv_image, x, y, w, h)
+ if cropped is None:
+ return None
+
+ # Validate and resize crop if needed (Google Vision requires minimum dimensions)
+ h_crop, w_crop = cropped.shape[:2]
+ MIN_SIZE = 50 # Minimum dimension (increased from 10 for better OCR)
+ MIN_AREA = 2500 # Minimum area (50x50)
+
+ if h_crop < MIN_SIZE or w_crop < MIN_SIZE or h_crop * w_crop < MIN_AREA:
+ # Region too small - try to resize it
+ scale_w = MIN_SIZE / w_crop if w_crop < MIN_SIZE else 1.0
+ scale_h = MIN_SIZE / h_crop if h_crop < MIN_SIZE else 1.0
+ scale = max(scale_w, scale_h)
+
+ if scale > 1.0:
+ new_w = int(w_crop * scale)
+ new_h = int(h_crop * scale)
+ cropped = cv2.resize(cropped, (new_w, new_h), interpolation=cv2.INTER_CUBIC)
+ self._log(f"🔍 Region {i} resized from {w_crop}x{h_crop}px to {new_w}x{new_h}px for OCR", "debug")
+ h_crop, w_crop = new_h, new_w
+
+ # Final validation
+ if h_crop < 10 or w_crop < 10:
+ self._log(f"⚠️ Region {i} too small even after resize ({w_crop}x{h_crop}px), skipping", "debug")
+ return None
+
+ # Encode cropped image
+ _, encoded = cv2.imencode('.jpg', cropped, [cv2.IMWRITE_JPEG_QUALITY, 95])
+ region_image_data = encoded.tobytes()
+
+ # Create Vision API image object
+ vision_image = vision.Image(content=region_image_data)
+ image_context = vision.ImageContext(
+ language_hints=ocr_settings.get('language_hints', ['ja', 'ko', 'zh'])
+ )
+
+ # Detect text in this region
+ detection_mode = ocr_settings.get('text_detection_mode', 'document')
+ if detection_mode == 'document':
+ response = self.vision_client.document_text_detection(
+ image=vision_image,
+ image_context=image_context
+ )
+ else:
+ response = self.vision_client.text_detection(
+ image=vision_image,
+ image_context=image_context
+ )
+
+ if response.error.message:
+ self._log(f"⚠️ Region {i} error: {response.error.message}", "warning")
+ return None
+
+ # Extract text from this region
+ region_text = response.full_text_annotation.text if response.full_text_annotation else ""
+ if region_text.strip():
+ # Clean the text
+ region_text = self._fix_encoding_issues(region_text)
+ region_text = self._sanitize_unicode_characters(region_text)
+ region_text = region_text.strip()
+
+ # Create TextRegion with original image coordinates
+ region = TextRegion(
+ text=region_text,
+ vertices=[(x, y), (x+w, y), (x+w, y+h), (x, y+h)],
+ bounding_box=(x, y, w, h),
+ confidence=0.9, # RT-DETR confidence
+ region_type='text_block'
+ )
+ # Assign bubble_type from RT-DETR detection
+ region.bubble_type = region_types.get(region_idx, 'text_bubble')
+ if not getattr(self, 'concise_logs', False):
+ self._log(f"✅ Region {i}/{len(all_regions)} ({region.bubble_type}): {region_text[:50]}...")
+ return region
+ return None
+
+ except Exception as e:
+ # Provide more detailed error info for debugging
+ error_msg = str(e)
+ if 'Bad Request' in error_msg or 'invalid' in error_msg.lower():
+ self._log(f"⏭️ Skipping region {i}: Too small or invalid for Google Vision (dimensions < 10x10px or area < 100px²)", "debug")
+ else:
+ self._log(f"⚠️ Error OCR-ing region {i}: {e}", "warning")
+ return None
+
+ # Process regions concurrently with RT-DETR concurrency control
+ from concurrent.futures import ThreadPoolExecutor, as_completed
+ # Use rtdetr_max_concurrency setting (default 12) to control parallel OCR calls
+ max_workers = min(ocr_settings.get('rtdetr_max_concurrency', 12), len(all_regions))
+
+ region_data_list = [(i+1, i, x, y, w, h) for i, (x, y, w, h) in enumerate(all_regions)]
+
+ with ThreadPoolExecutor(max_workers=max_workers) as executor:
+ futures = {executor.submit(ocr_region_google, rd): rd for rd in region_data_list}
+ for future in as_completed(futures):
+ try:
+ result = future.result()
+ if result:
+ regions.append(result)
+ finally:
+ # Clean up future to free memory
+ del future
+
+ # If we got results, sort and post-process
+ if regions:
+ # CRITICAL: Sort regions by position (top-to-bottom, left-to-right)
+ # Concurrent processing returns them in completion order, not detection order
+ regions.sort(key=lambda r: (r.bounding_box[1], r.bounding_box[0]))
+ self._log(f"✅ RT-DETR + Google Vision: {len(regions)} text regions detected (sorted by position)")
+
+ # POST-PROCESS: Check for text_bubbles that overlap with free_text regions
+ # If a text_bubble's center is within a free_text bbox, reclassify it as free_text
+ free_text_bboxes = rtdetr_detections.get('text_free', [])
+ if free_text_bboxes:
+ reclassified_count = 0
+ for region in regions:
+ if getattr(region, 'bubble_type', None) == 'text_bubble':
+ # Get region center
+ x, y, w, h = region.bounding_box
+ cx = x + w / 2
+ cy = y + h / 2
+
+ self._log(f" Checking text_bubble '{region.text[:30]}...' at center ({cx:.0f}, {cy:.0f})", "debug")
+
+ # Check if center is in any free_text bbox
+ for bbox_idx, (fx, fy, fw, fh) in enumerate(free_text_bboxes):
+ in_x = fx <= cx <= fx + fw
+ in_y = fy <= cy <= fy + fh
+ self._log(f" vs free_text bbox {bbox_idx+1}: in_x={in_x}, in_y={in_y}", "debug")
+
+ if in_x and in_y:
+ # Reclassify as free text
+ old_type = region.bubble_type
+ region.bubble_type = 'free_text'
+ reclassified_count += 1
+ self._log(f" ✅ RECLASSIFIED '{region.text[:30]}...' from {old_type} to free_text", "info")
+ break
+
+ if reclassified_count > 0:
+ self._log(f"🔄 Reclassified {reclassified_count} overlapping regions as free_text", "info")
+
+ # MERGE: Combine free_text regions that are within the same free_text bbox
+ # Group free_text regions by which free_text bbox they belong to
+ free_text_groups = {}
+ other_regions = []
+
+ for region in regions:
+ if getattr(region, 'bubble_type', None) == 'free_text':
+ # Find which free_text bbox this region belongs to
+ x, y, w, h = region.bounding_box
+ cx = x + w / 2
+ cy = y + h / 2
+
+ for bbox_idx, (fx, fy, fw, fh) in enumerate(free_text_bboxes):
+ if fx <= cx <= fx + fw and fy <= cy <= fy + fh:
+ if bbox_idx not in free_text_groups:
+ free_text_groups[bbox_idx] = []
+ free_text_groups[bbox_idx].append(region)
+ break
+ else:
+ # Free text region not in any bbox (shouldn't happen, but handle it)
+ other_regions.append(region)
+ else:
+ other_regions.append(region)
+
+ # Merge each group of free_text regions
+ merged_free_text = []
+ for bbox_idx, group in free_text_groups.items():
+ if len(group) > 1:
+ # Merge multiple free text regions in same bbox
+ merged_text = " ".join(r.text for r in group)
+
+ min_x = min(r.bounding_box[0] for r in group)
+ min_y = min(r.bounding_box[1] for r in group)
+ max_x = max(r.bounding_box[0] + r.bounding_box[2] for r in group)
+ max_y = max(r.bounding_box[1] + r.bounding_box[3] for r in group)
+
+ all_vertices = []
+ for r in group:
+ if hasattr(r, 'vertices') and r.vertices:
+ all_vertices.extend(r.vertices)
+
+ if not all_vertices:
+ all_vertices = [
+ (min_x, min_y),
+ (max_x, min_y),
+ (max_x, max_y),
+ (min_x, max_y)
+ ]
+
+ merged_region = TextRegion(
+ text=merged_text,
+ vertices=all_vertices,
+ bounding_box=(min_x, min_y, max_x - min_x, max_y - min_y),
+ confidence=0.95,
+ region_type='text_block'
+ )
+ merged_region.bubble_type = 'free_text'
+ merged_region.should_inpaint = True
+ merged_free_text.append(merged_region)
+ self._log(f"🔀 Merged {len(group)} free_text regions into one: '{merged_text[:50]}...'", "debug")
+ else:
+ # Single region, keep as-is
+ merged_free_text.extend(group)
+
+ # Combine all regions
+ regions = other_regions + merged_free_text
+ self._log(f"✅ Final: {len(regions)} regions after reclassification and merging", "info")
+
+ # Skip merging section and return directly
+ return regions
+ else:
+ self._log("⚠️ No text found in RT-DETR regions, falling back to full-page OCR", "warning")
+
+ # If bubble detection is enabled and batch variables suggest batching, do ROI-based batched OCR
+ try:
+ use_roi_locality = ocr_settings.get('bubble_detection_enabled', False) and ocr_settings.get('roi_locality_enabled', False)
+ # Determine OCR batching enable
+ if 'ocr_batch_enabled' in ocr_settings:
+ ocr_batch_enabled = bool(ocr_settings.get('ocr_batch_enabled'))
+ else:
+ ocr_batch_enabled = (os.getenv('BATCH_OCR', '0') == '1') or (os.getenv('BATCH_TRANSLATION', '0') == '1') or getattr(self, 'batch_mode', False)
+ # Determine OCR batch size
+ bs = int(ocr_settings.get('ocr_batch_size') or 0)
+ if bs <= 0:
+ bs = int(os.getenv('OCR_BATCH_SIZE', '0') or 0)
+ if bs <= 0:
+ bs = int(os.getenv('BATCH_SIZE', str(getattr(self, 'batch_size', 1))) or 1)
+ ocr_batch_size = max(1, bs)
+ except Exception:
+ use_roi_locality = False
+ ocr_batch_enabled = False
+ ocr_batch_size = 1
+ if use_roi_locality and (ocr_batch_enabled or ocr_batch_size > 1):
+ rois = self._prepare_ocr_rois_from_bubbles(image_path, ocr_settings, preprocessing, page_hash)
+ if rois:
+ # Determine concurrency for Google: OCR_MAX_CONCURRENCY env or min(BATCH_SIZE,2)
+ try:
+ max_cc = int(ocr_settings.get('ocr_max_concurrency') or 0)
+ if max_cc <= 0:
+ max_cc = int(os.getenv('OCR_MAX_CONCURRENCY', '0') or 0)
+ if max_cc <= 0:
+ max_cc = min(max(1, ocr_batch_size), 2)
+ except Exception:
+ max_cc = min(max(1, ocr_batch_size), 2)
+ regions = self._google_ocr_rois_batched(rois, ocr_settings, max(1, ocr_batch_size), max_cc, page_hash)
+ self._log(f"✅ Google OCR batched over {len(rois)} ROIs → {len(regions)} regions (cc={max_cc})", "info")
+
+ # Force garbage collection after concurrent OCR to reduce memory spikes
+ try:
+ import gc
+ gc.collect()
+ except Exception:
+ pass
+
+ return regions
+
+ # Start local inpainter preload while Google OCR runs (background; multiple if panel-parallel)
+ try:
+ if not getattr(self, 'skip_inpainting', False) and not getattr(self, 'use_cloud_inpainting', False):
+ already_loaded, _lm = self._is_local_inpainter_loaded()
+ if not already_loaded:
+ import threading as _threading
+ local_method = (self.manga_settings.get('inpainting', {}) or {}).get('local_method', 'anime')
+ model_path = self.main_gui.config.get(f'manga_{local_method}_model_path', '') if hasattr(self, 'main_gui') else ''
+ adv = self.main_gui.config.get('manga_settings', {}).get('advanced', {}) if hasattr(self, 'main_gui') else {}
+ # Determine desired instances from panel-parallel settings
+ desired = 1
+ if adv.get('parallel_panel_translation', False):
+ try:
+ desired = max(1, int(adv.get('panel_max_workers', 2)))
+ except Exception:
+ desired = 2
+ # Honor advanced toggle for panel-local preload; for non-panel (desired==1) always allow
+ allow = True if desired == 1 else bool(adv.get('preload_local_inpainting_for_panels', True))
+ if allow:
+ self._inpaint_preload_event = _threading.Event()
+ def _preload_inp_many():
+ try:
+ self.preload_local_inpainters_concurrent(local_method, model_path, desired)
+ finally:
+ try:
+ self._inpaint_preload_event.set()
+ except Exception:
+ pass
+ _threading.Thread(target=_preload_inp_many, name="InpaintPreload@GoogleOCR", daemon=True).start()
+ except Exception:
+ pass
+
+ # Create Vision API image object (full-page fallback)
+ image = vision.Image(content=processed_image_data)
+
+ # Build image context with all parameters
+ image_context = vision.ImageContext(
+ language_hints=ocr_settings.get('language_hints', ['ja', 'ko', 'zh'])
+ )
+
+ # Add text detection params if available in your API version
+ if hasattr(vision, 'TextDetectionParams'):
+ image_context.text_detection_params = vision.TextDetectionParams(
+ enable_text_detection_confidence_score=True
+ )
+
+ # Configure text detection based on settings
+ detection_mode = ocr_settings.get('text_detection_mode', 'document')
+
+ if detection_mode == 'document':
+ response = self.vision_client.document_text_detection(
+ image=image,
+ image_context=image_context
+ )
+ else:
+ response = self.vision_client.text_detection(
+ image=image,
+ image_context=image_context
+ )
+
+ if response.error.message:
+ raise Exception(f"Cloud Vision API error: {response.error.message}")
+
+ # Process each page (usually just one for manga)
+ for page in response.full_text_annotation.pages:
+ for block in page.blocks:
+ # Extract text first to check if it's worth processing
+ block_text = ""
+ total_confidence = 0.0
+ word_count = 0
+
+ for paragraph in block.paragraphs:
+ for word in paragraph.words:
+ # Get word-level confidence (more reliable than block level)
+ word_confidence = getattr(word, 'confidence', 0.0) # Default to 0 if not available
+ word_text = ''.join([symbol.text for symbol in word.symbols])
+
+ # Only include words above threshold
+ if word_confidence >= confidence_threshold:
+ block_text += word_text + " "
+ total_confidence += word_confidence
+ word_count += 1
+ else:
+ if not getattr(self, 'concise_logs', False):
+ self._log(f" Skipping low confidence word ({word_confidence:.2f}): {word_text}")
+
+ block_text = block_text.strip()
+
+ # CLEAN ORIGINAL OCR TEXT - Fix cube characters and encoding issues
+ original_text = block_text
+ block_text = self._fix_encoding_issues(block_text)
+ block_text = self._sanitize_unicode_characters(block_text)
+
+ # Log cleaning if changes were made
+ if block_text != original_text:
+ self._log(f"🧹 Cleaned OCR text: '{original_text[:30]}...' → '{block_text[:30]}...'", "debug")
+
+ # TEXT FILTERING SECTION
+ # Skip if text is too short (after cleaning)
+ if len(block_text.strip()) < min_text_length:
+ if not getattr(self, 'concise_logs', False):
+ self._log(f" Skipping short text ({len(block_text)} chars): {block_text}")
+ continue
+
+ # Skip if primarily English and exclude_english is enabled
+ if exclude_english and self._is_primarily_english(block_text):
+ if not getattr(self, 'concise_logs', False):
+ self._log(f" Skipping English text: {block_text[:50]}...")
+ continue
+
+ # Skip if no confident words found
+ if word_count == 0 or not block_text:
+ if not getattr(self, 'concise_logs', False):
+ self._log(f" Skipping block - no words above threshold {confidence_threshold}")
+ continue
+
+ # Calculate average confidence for the block
+ avg_confidence = total_confidence / word_count if word_count > 0 else 0.0
+
+ # Extract vertices and create region
+ vertices = [(v.x, v.y) for v in block.bounding_box.vertices]
+
+ # Calculate bounding box
+ xs = [v[0] for v in vertices]
+ ys = [v[1] for v in vertices]
+ x_min, x_max = min(xs), max(xs)
+ y_min, y_max = min(ys), max(ys)
+
+ region = TextRegion(
+ text=block_text,
+ vertices=vertices,
+ bounding_box=(x_min, y_min, x_max - x_min, y_max - y_min),
+ confidence=avg_confidence, # Use average confidence
+ region_type='text_block'
+ )
+ regions.append(region)
+ if not getattr(self, 'concise_logs', False):
+ self._log(f" Found text region ({avg_confidence:.2f}): {block_text[:50]}...")
+
+ elif self.ocr_provider == 'azure':
+ # === AZURE COMPUTER VISION ===
+ # Ensure client exists (it might have been cleaned up between runs)
+ try:
+ self._ensure_azure_client()
+ except Exception:
+ pass
+ import io
+ import time
+ from azure.cognitiveservices.vision.computervision.models import OperationStatusCodes
+
+ # Check if we should use RT-DETR for text region detection (NEW FEATURE)
+ if ocr_settings.get('bubble_detection_enabled', False) and ocr_settings.get('use_rtdetr_for_ocr_regions', True):
+ self._log("🎯 Using RT-DETR to guide Azure Computer Vision OCR")
+
+ # Run RT-DETR to detect text regions first
+ _ = self._get_thread_bubble_detector()
+ rtdetr_detections = self._load_bubble_detector(ocr_settings, image_path)
+
+ if rtdetr_detections:
+ # Collect all text-containing regions WITH TYPE TRACKING
+ all_regions = []
+ # Track region type to assign bubble_type later
+ region_types = {}
+ idx = 0
+ if 'text_bubbles' in rtdetr_detections:
+ for bbox in rtdetr_detections.get('text_bubbles', []):
+ all_regions.append(bbox)
+ region_types[idx] = 'text_bubble'
+ idx += 1
+ if 'text_free' in rtdetr_detections:
+ for bbox in rtdetr_detections.get('text_free', []):
+ all_regions.append(bbox)
+ region_types[idx] = 'free_text'
+ idx += 1
+
+ if all_regions:
+ self._log(f"📊 RT-DETR detected {len(all_regions)} text regions, OCR-ing each with Azure Vision")
+
+ # Load image for cropping
+ import cv2
+ cv_image = cv2.imread(image_path)
+ if cv_image is None:
+ self._log("⚠️ Failed to load image, falling back to full-page OCR", "warning")
+ else:
+ ocr_results = []
+
+ # Get Azure settings
+ azure_reading_order = ocr_settings.get('azure_reading_order', 'natural')
+ azure_model_version = ocr_settings.get('azure_model_version', 'latest')
+ azure_max_wait = ocr_settings.get('azure_max_wait', 60)
+ azure_poll_interval = ocr_settings.get('azure_poll_interval', 1.0)
+
+ # Define worker function for concurrent OCR
+ def ocr_region_azure(region_data):
+ i, region_idx, x, y, w, h = region_data
+ try:
+ # Crop region
+ cropped = self._safe_crop_region(cv_image, x, y, w, h)
+ if cropped is None:
+ return None
+
+ # Validate and resize crop if needed (Azure Vision requires minimum dimensions)
+ h_crop, w_crop = cropped.shape[:2]
+ MIN_SIZE = 50 # Minimum dimension (Azure requirement)
+ MIN_AREA = 2500 # Minimum area (50x50)
+
+ if h_crop < MIN_SIZE or w_crop < MIN_SIZE or h_crop * w_crop < MIN_AREA:
+ # Region too small - try to resize it
+ scale_w = MIN_SIZE / w_crop if w_crop < MIN_SIZE else 1.0
+ scale_h = MIN_SIZE / h_crop if h_crop < MIN_SIZE else 1.0
+ scale = max(scale_w, scale_h)
+
+ if scale > 1.0:
+ new_w = int(w_crop * scale)
+ new_h = int(h_crop * scale)
+ cropped = cv2.resize(cropped, (new_w, new_h), interpolation=cv2.INTER_CUBIC)
+ self._log(f"🔍 Region {i} resized from {w_crop}x{h_crop}px to {new_w}x{new_h}px for Azure OCR", "debug")
+ h_crop, w_crop = new_h, new_w
+
+ # Final validation
+ if h_crop < 10 or w_crop < 10:
+ self._log(f"⚠️ Region {i} too small even after resize ({w_crop}x{h_crop}px), skipping", "debug")
+ return None
+
+ # RATE LIMITING: Add delay between Azure API calls to avoid "Too Many Requests"
+ # Azure Free tier: 20 calls/minute = 1 call per 3 seconds
+ # Azure Standard tier: Higher limits but still needs throttling
+ import time
+ import random
+ # Stagger requests with randomized delay (0.1-0.3 seconds)
+ time.sleep(0.1 + random.random() * 0.2) # 0.1-0.3s random delay
+
+ # Encode cropped image
+ _, encoded = cv2.imencode('.jpg', cropped, [cv2.IMWRITE_JPEG_QUALITY, 95])
+ region_image_bytes = encoded.tobytes()
+
+ # Call Azure Read API
+ read_response = self.vision_client.read_in_stream(
+ io.BytesIO(region_image_bytes),
+ language=ocr_settings.get('language_hints', ['ja'])[0] if ocr_settings.get('language_hints') else 'ja',
+ model_version=azure_model_version,
+ reading_order=azure_reading_order,
+ raw=True
+ )
+
+ # Get operation location
+ operation_location = read_response.headers['Operation-Location']
+ operation_id = operation_location.split('/')[-1]
+
+ # Poll for result
+ start_time = time.time()
+ while True:
+ result = self.vision_client.get_read_result(operation_id)
+ if result.status not in [OperationStatusCodes.not_started, OperationStatusCodes.running]:
+ break
+ if time.time() - start_time > azure_max_wait:
+ self._log(f"⚠️ Azure timeout for region {i}", "warning")
+ break
+ time.sleep(azure_poll_interval)
+
+ if result.status == OperationStatusCodes.succeeded:
+ # Extract text from result
+ region_text = ""
+ for text_result in result.analyze_result.read_results:
+ for line in text_result.lines:
+ region_text += line.text + "\n"
+
+ region_text = region_text.strip()
+ if region_text:
+ # Clean the text
+ region_text = self._fix_encoding_issues(region_text)
+ region_text = self._sanitize_unicode_characters(region_text)
+
+ # Create TextRegion with original image coordinates
+ region = TextRegion(
+ text=region_text,
+ vertices=[(x, y), (x+w, y), (x+w, y+h), (x, y+h)],
+ bounding_box=(x, y, w, h),
+ confidence=0.9, # RT-DETR confidence
+ region_type='text_block'
+ )
+ # Assign bubble_type from RT-DETR detection
+ region.bubble_type = region_types.get(region_idx, 'text_bubble')
+ if not getattr(self, 'concise_logs', False):
+ self._log(f"✅ Region {i}/{len(all_regions)} ({region.bubble_type}): {region_text[:50]}...")
+ return region
+ return None
+
+ except Exception as e:
+ # Provide more detailed error info for debugging
+ error_msg = str(e)
+ if 'Bad Request' in error_msg or 'invalid' in error_msg.lower() or 'Too Many Requests' in error_msg:
+ if 'Too Many Requests' in error_msg:
+ self._log(f"⏸️ Region {i}: Azure rate limit hit, consider increasing delays", "warning")
+ else:
+ self._log(f"⏭️ Skipping region {i}: Too small or invalid for Azure Vision", "debug")
+ else:
+ self._log(f"⚠️ Error OCR-ing region {i}: {e}", "warning")
+ return None
+
+ # Process regions concurrently with RT-DETR concurrency control
+ from concurrent.futures import ThreadPoolExecutor, as_completed
+ # Use rtdetr_max_concurrency setting (default 12)
+ # Note: Rate limiting is handled via 0.1-0.3s delays per request
+ max_workers = min(ocr_settings.get('rtdetr_max_concurrency', 12), len(all_regions))
+
+ region_data_list = [(i+1, i, x, y, w, h) for i, (x, y, w, h) in enumerate(all_regions)]
+
+ with ThreadPoolExecutor(max_workers=max_workers) as executor:
+ futures = {executor.submit(ocr_region_azure, rd): rd for rd in region_data_list}
+ for future in as_completed(futures):
+ try:
+ result = future.result()
+ if result:
+ regions.append(result)
+ finally:
+ # Clean up future to free memory
+ del future
+
+ # If we got results, sort and post-process
+ if regions:
+ # CRITICAL: Sort regions by position (top-to-bottom, left-to-right)
+ # Concurrent processing returns them in completion order, not detection order
+ regions.sort(key=lambda r: (r.bounding_box[1], r.bounding_box[0]))
+ self._log(f"✅ RT-DETR + Azure Vision: {len(regions)} text regions detected (sorted by position)")
+
+ # POST-PROCESS: Check for text_bubbles that overlap with free_text regions
+ # If a text_bubble's center is within a free_text bbox, reclassify it as free_text
+ free_text_bboxes = rtdetr_detections.get('text_free', [])
+
+ # DEBUG: Log what we have
+ self._log(f"🔍 POST-PROCESS: Found {len(free_text_bboxes)} free_text bboxes from RT-DETR", "debug")
+ for idx, (fx, fy, fw, fh) in enumerate(free_text_bboxes):
+ self._log(f" Free text bbox {idx+1}: x={fx:.0f}, y={fy:.0f}, w={fw:.0f}, h={fh:.0f}", "debug")
+
+ text_bubble_count = sum(1 for r in regions if getattr(r, 'bubble_type', None) == 'text_bubble')
+ free_text_count = sum(1 for r in regions if getattr(r, 'bubble_type', None) == 'free_text')
+ self._log(f"🔍 Before reclassification: {text_bubble_count} text_bubbles, {free_text_count} free_text", "debug")
+
+ if free_text_bboxes:
+ reclassified_count = 0
+ for region in regions:
+ if getattr(region, 'bubble_type', None) == 'text_bubble':
+ # Get region center
+ x, y, w, h = region.bounding_box
+ cx = x + w / 2
+ cy = y + h / 2
+
+ self._log(f" Checking text_bubble '{region.text[:30]}...' at center ({cx:.0f}, {cy:.0f})", "debug")
+
+ # Check if center is in any free_text bbox
+ for bbox_idx, (fx, fy, fw, fh) in enumerate(free_text_bboxes):
+ in_x = fx <= cx <= fx + fw
+ in_y = fy <= cy <= fy + fh
+ self._log(f" vs free_text bbox {bbox_idx+1}: in_x={in_x}, in_y={in_y}", "debug")
+
+ if in_x and in_y:
+ # Reclassify as free text
+ old_type = region.bubble_type
+ region.bubble_type = 'free_text'
+ reclassified_count += 1
+ self._log(f" ✅ RECLASSIFIED '{region.text[:30]}...' from {old_type} to free_text", "info")
+ break
+
+ if reclassified_count > 0:
+ self._log(f"🔄 Reclassified {reclassified_count} overlapping regions as free_text", "info")
+
+ # MERGE: Combine free_text regions that are within the same free_text bbox
+ # Group free_text regions by which free_text bbox they belong to
+ free_text_groups = {}
+ other_regions = []
+
+ for region in regions:
+ if getattr(region, 'bubble_type', None) == 'free_text':
+ # Find which free_text bbox this region belongs to
+ x, y, w, h = region.bounding_box
+ cx = x + w / 2
+ cy = y + h / 2
+
+ for bbox_idx, (fx, fy, fw, fh) in enumerate(free_text_bboxes):
+ if fx <= cx <= fx + fw and fy <= cy <= fy + fh:
+ if bbox_idx not in free_text_groups:
+ free_text_groups[bbox_idx] = []
+ free_text_groups[bbox_idx].append(region)
+ break
+ else:
+ # Free text region not in any bbox (shouldn't happen, but handle it)
+ other_regions.append(region)
+ else:
+ other_regions.append(region)
+
+ # Merge each group of free_text regions
+ merged_free_text = []
+ for bbox_idx, group in free_text_groups.items():
+ if len(group) > 1:
+ # Merge multiple free text regions in same bbox
+ merged_text = " ".join(r.text for r in group)
+
+ min_x = min(r.bounding_box[0] for r in group)
+ min_y = min(r.bounding_box[1] for r in group)
+ max_x = max(r.bounding_box[0] + r.bounding_box[2] for r in group)
+ max_y = max(r.bounding_box[1] + r.bounding_box[3] for r in group)
+
+ all_vertices = []
+ for r in group:
+ if hasattr(r, 'vertices') and r.vertices:
+ all_vertices.extend(r.vertices)
+
+ if not all_vertices:
+ all_vertices = [
+ (min_x, min_y),
+ (max_x, min_y),
+ (max_x, max_y),
+ (min_x, max_y)
+ ]
+
+ merged_region = TextRegion(
+ text=merged_text,
+ vertices=all_vertices,
+ bounding_box=(min_x, min_y, max_x - min_x, max_y - min_y),
+ confidence=0.95,
+ region_type='text_block'
+ )
+ merged_region.bubble_type = 'free_text'
+ merged_region.should_inpaint = True
+ merged_free_text.append(merged_region)
+ self._log(f"🔀 Merged {len(group)} free_text regions into one: '{merged_text[:50]}...'", "debug")
+ else:
+ # Single region, keep as-is
+ merged_free_text.extend(group)
+
+ # Combine all regions
+ regions = other_regions + merged_free_text
+ self._log(f"✅ Final: {len(regions)} regions after reclassification and merging", "info")
+
+ # Skip merging section and return directly
+ return regions
+ else:
+ self._log("⚠️ No text found in RT-DETR regions, falling back to full-page OCR", "warning")
+
+ # ROI-based concurrent OCR when bubble detection is enabled and batching is requested
+ try:
+ use_roi_locality = ocr_settings.get('bubble_detection_enabled', False) and ocr_settings.get('roi_locality_enabled', False)
+ if 'ocr_batch_enabled' in ocr_settings:
+ ocr_batch_enabled = bool(ocr_settings.get('ocr_batch_enabled'))
+ else:
+ ocr_batch_enabled = (os.getenv('BATCH_OCR', '0') == '1') or (os.getenv('BATCH_TRANSLATION', '0') == '1') or getattr(self, 'batch_mode', False)
+ bs = int(ocr_settings.get('ocr_batch_size') or 0)
+ if bs <= 0:
+ bs = int(os.getenv('OCR_BATCH_SIZE', '0') or 0)
+ if bs <= 0:
+ bs = int(os.getenv('BATCH_SIZE', str(getattr(self, 'batch_size', 1))) or 1)
+ ocr_batch_size = max(1, bs)
+ except Exception:
+ use_roi_locality = False
+ ocr_batch_enabled = False
+ ocr_batch_size = 1
+ if use_roi_locality and (ocr_batch_enabled or ocr_batch_size > 1):
+ rois = self._prepare_ocr_rois_from_bubbles(image_path, ocr_settings, preprocessing, page_hash)
+ if rois:
+ # AZURE RATE LIMITING: Force low concurrency to prevent "Too Many Requests"
+ # Azure has strict rate limits that vary by tier:
+ # - Free tier: 20 requests/minute
+ # - Standard tier: Higher but still limited
+ try:
+ azure_workers = int(ocr_settings.get('ocr_max_concurrency') or 0)
+ if azure_workers <= 0:
+ azure_workers = 1 # Force sequential by default
+ else:
+ azure_workers = min(2, max(1, azure_workers)) # Cap at 2 max
+ except Exception:
+ azure_workers = 1 # Safe default
+ regions = self._azure_ocr_rois_concurrent(rois, ocr_settings, azure_workers, page_hash)
+ self._log(f"✅ Azure OCR concurrent over {len(rois)} ROIs → {len(regions)} regions (workers={azure_workers})", "info")
+
+ # Force garbage collection after concurrent OCR to reduce memory spikes
+ try:
+ import gc
+ gc.collect()
+ except Exception:
+ pass
+
+ return regions
+
+ # Start local inpainter preload while Azure OCR runs (background; multiple if panel-parallel)
+ try:
+ if not getattr(self, 'skip_inpainting', False) and not getattr(self, 'use_cloud_inpainting', False):
+ already_loaded, _lm = self._is_local_inpainter_loaded()
+ if not already_loaded:
+ import threading as _threading
+ local_method = (self.manga_settings.get('inpainting', {}) or {}).get('local_method', 'anime')
+ model_path = self.main_gui.config.get(f'manga_{local_method}_model_path', '') if hasattr(self, 'main_gui') else ''
+ adv = self.main_gui.config.get('manga_settings', {}).get('advanced', {}) if hasattr(self, 'main_gui') else {}
+ desired = 1
+ if adv.get('parallel_panel_translation', False):
+ try:
+ desired = max(1, int(adv.get('panel_max_workers', 2)))
+ except Exception:
+ desired = 2
+ allow = True if desired == 1 else bool(adv.get('preload_local_inpainting_for_panels', True))
+ if allow:
+ self._inpaint_preload_event = _threading.Event()
+ def _preload_inp_many():
+ try:
+ self.preload_local_inpainters_concurrent(local_method, model_path, desired)
+ finally:
+ try:
+ self._inpaint_preload_event.set()
+ except Exception:
+ pass
+ _threading.Thread(target=_preload_inp_many, name="InpaintPreload@AzureOCR", daemon=True).start()
+ except Exception:
+ pass
+
+ # Ensure Azure-supported format for the BYTES we are sending.
+ # If compression is enabled and produced an Azure-supported format (JPEG/PNG/BMP/TIFF),
+ # DO NOT force-convert to PNG. Only convert when the current bytes are in an unsupported format.
+ file_ext = os.path.splitext(image_path)[1].lower()
+ azure_supported_exts = ['.jpg', '.jpeg', '.png', '.bmp', '.pdf', '.tiff']
+ azure_supported_fmts = ['jpeg', 'jpg', 'png', 'bmp', 'tiff']
+
+ # Probe the actual byte format we will upload
+ try:
+ from PIL import Image as _PILImage
+ img_probe = _PILImage.open(io.BytesIO(processed_image_data))
+ fmt = (img_probe.format or '').lower()
+ except Exception:
+ fmt = ''
+
+ # If original is a PDF, allow as-is (Azure supports PDF streams)
+ if file_ext == '.pdf':
+ needs_convert = False
+ else:
+ # Decide based on the detected format of the processed bytes
+ needs_convert = fmt not in azure_supported_fmts
+
+ if needs_convert:
+ # If compression settings are enabled and target format is Azure-supported, prefer that
+ try:
+ comp_cfg = (self.main_gui.config.get('manga_settings', {}) or {}).get('compression', {})
+ except Exception:
+ comp_cfg = {}
+
+ # Determine if conversion is actually needed based on compression and current format
+ try:
+ from PIL import Image as _PILImage
+ img2 = _PILImage.open(io.BytesIO(processed_image_data))
+ fmt_lower = (img2.format or '').lower()
+ except Exception:
+ img2 = None
+ fmt_lower = ''
+
+ accepted = {'jpeg', 'jpg', 'png', 'bmp', 'tiff'}
+ convert_needed = False
+ target_fmt = None
+
+ if comp_cfg.get('enabled', False):
+ cf = str(comp_cfg.get('format', '')).lower()
+ desired = None
+ if cf in ('jpeg', 'jpg'):
+ desired = 'JPEG'
+ elif cf == 'png':
+ desired = 'PNG'
+ elif cf == 'bmp':
+ desired = 'BMP'
+ elif cf == 'tiff':
+ desired = 'TIFF'
+ # If WEBP or others, desired remains None and we fall back to PNG only if unsupported
+
+ if desired is not None:
+ # Skip conversion if already in the desired supported format
+ already_matches = ((fmt_lower in ('jpeg', 'jpg') and desired == 'JPEG') or (fmt_lower == desired.lower()))
+ if not already_matches:
+ convert_needed = True
+ target_fmt = desired
+ else:
+ # Compression format not supported by Azure (e.g., WEBP); convert only if unsupported
+ if fmt_lower not in accepted:
+ convert_needed = True
+ target_fmt = 'PNG'
+ else:
+ # No compression preference; convert only if unsupported by Azure
+ if fmt_lower not in accepted:
+ convert_needed = True
+ target_fmt = 'PNG'
+
+ if convert_needed:
+ self._log(f"⚠️ Converting image to {target_fmt} for Azure compatibility")
+ try:
+ if img2 is None:
+ from PIL import Image as _PILImage
+ img2 = _PILImage.open(io.BytesIO(processed_image_data))
+ buffer = io.BytesIO()
+ if target_fmt == 'JPEG' and img2.mode != 'RGB':
+ img2 = img2.convert('RGB')
+ img2.save(buffer, format=target_fmt)
+ processed_image_data = buffer.getvalue()
+ except Exception:
+ pass
+
+ # Create stream from image data
+ image_stream = io.BytesIO(processed_image_data)
+
+ # Get Azure-specific settings
+ reading_order = ocr_settings.get('azure_reading_order', 'natural')
+ model_version = ocr_settings.get('azure_model_version', 'latest')
+ max_wait = ocr_settings.get('azure_max_wait', 60)
+ poll_interval = ocr_settings.get('azure_poll_interval', 0.5)
+
+ # Map language hints to Azure language codes
+ language_hints = ocr_settings.get('language_hints', ['ja', 'ko', 'zh'])
+
+ # Build parameters dictionary
+ read_params = {
+ 'raw': True,
+ 'readingOrder': reading_order
+ }
+
+ # Add model version if not using latest
+ if model_version != 'latest':
+ read_params['model-version'] = model_version
+
+ # Use language parameter only if single language is selected
+ if len(language_hints) == 1:
+ azure_lang = language_hints[0]
+ # Map to Azure language codes
+ lang_mapping = {
+ 'zh': 'zh-Hans',
+ 'zh-TW': 'zh-Hant',
+ 'zh-CN': 'zh-Hans',
+ 'ja': 'ja',
+ 'ko': 'ko',
+ 'en': 'en'
+ }
+ azure_lang = lang_mapping.get(azure_lang, azure_lang)
+ read_params['language'] = azure_lang
+ self._log(f" Using Azure Read API with language: {azure_lang}, order: {reading_order}")
+ else:
+ self._log(f" Using Azure Read API (auto-detect for {len(language_hints)} languages, order: {reading_order})")
+
+ # Start Read operation with error handling and rate limit retry
+ # Use max_retries from config (default 7, configurable in Other Settings)
+ max_retries = self.main_gui.config.get('max_retries', 7)
+ retry_delay = 60 # Start with 60 seconds for rate limits
+ read_response = None
+
+ for retry_attempt in range(max_retries):
+ try:
+ # Ensure client is alive before starting
+ if getattr(self, 'vision_client', None) is None:
+ self._log("⚠️ Azure client missing before read; reinitializing...", "warning")
+ self._ensure_azure_client()
+ if getattr(self, 'vision_client', None) is None:
+ raise RuntimeError("Azure Computer Vision client is not initialized. Check your key/endpoint and azure-cognitiveservices-vision-computervision installation.")
+
+ # Reset stream position for retry
+ image_stream.seek(0)
+
+ read_response = self.vision_client.read_in_stream(
+ image_stream,
+ **read_params
+ )
+ # Success! Break out of retry loop
+ break
+
+ except Exception as e:
+ error_msg = str(e)
+
+ # Handle rate limit errors with fixed 60s wait
+ if 'Too Many Requests' in error_msg or '429' in error_msg:
+ if retry_attempt < max_retries - 1:
+ wait_time = retry_delay # Fixed 60s wait each time
+ self._log(f"⚠️ Azure rate limit hit. Waiting {wait_time}s before retry {retry_attempt + 1}/{max_retries}...", "warning")
+ time.sleep(wait_time)
+ continue
+ else:
+ self._log(f"❌ Azure rate limit: Exhausted {max_retries} retries", "error")
+ raise
+
+ # Handle bad request errors
+ elif 'Bad Request' in error_msg:
+ self._log("⚠️ Azure Read API Bad Request - likely invalid image format or too small. Retrying without language parameter...", "warning")
+ # Retry without language parameter
+ image_stream.seek(0)
+ read_params.pop('language', None)
+ if getattr(self, 'vision_client', None) is None:
+ self._ensure_azure_client()
+ read_response = self.vision_client.read_in_stream(
+ image_stream,
+ **read_params
+ )
+ break
+ else:
+ raise
+
+ if read_response is None:
+ raise RuntimeError("Failed to get response from Azure Read API after retries")
+
+ # Get operation ID
+ operation_location = read_response.headers.get("Operation-Location") if hasattr(read_response, 'headers') else None
+ if not operation_location:
+ raise RuntimeError("Azure Read API did not return Operation-Location header")
+ operation_id = operation_location.split("/")[-1]
+
+ # Poll for results with configurable timeout
+ self._log(f" Waiting for Azure OCR to complete (max {max_wait}s)...")
+ wait_time = 0
+ last_status = None
+ result = None
+
+ while wait_time < max_wait:
+ try:
+ if getattr(self, 'vision_client', None) is None:
+ # Client got cleaned up mid-poll; reinitialize and continue
+ self._log("⚠️ Azure client became None during polling; reinitializing...", "warning")
+ self._ensure_azure_client()
+ if getattr(self, 'vision_client', None) is None:
+ raise AttributeError("Azure client lost and could not be reinitialized")
+ result = self.vision_client.get_read_result(operation_id)
+ except AttributeError as e:
+ # Defensive: reinitialize once and retry this iteration
+ self._log(f"⚠️ {e} — reinitializing Azure client and retrying once", "warning")
+ self._ensure_azure_client()
+ if getattr(self, 'vision_client', None) is None:
+ raise
+ result = self.vision_client.get_read_result(operation_id)
+
+ # Log status changes
+ if result.status != last_status:
+ self._log(f" Status: {result.status}")
+ last_status = result.status
+
+ if result.status not in [OperationStatusCodes.running, OperationStatusCodes.not_started]:
+ break
+
+ time.sleep(poll_interval)
+ self._log("💤 Azure OCR polling pausing briefly for stability", "debug")
+ wait_time += poll_interval
+
+ if not result:
+ raise RuntimeError("Azure Read API polling did not return a result")
+ if result.status == OperationStatusCodes.succeeded:
+ # Track statistics
+ total_lines = 0
+ handwritten_lines = 0
+
+ for page_num, page in enumerate(result.analyze_result.read_results):
+ if len(result.analyze_result.read_results) > 1:
+ self._log(f" Processing page {page_num + 1}/{len(result.analyze_result.read_results)}")
+
+ for line in page.lines:
+ # CLEAN ORIGINAL OCR TEXT FOR AZURE - Fix cube characters and encoding issues
+ original_azure_text = line.text
+ cleaned_line_text = self._fix_encoding_issues(line.text)
+ cleaned_line_text = self._sanitize_unicode_characters(cleaned_line_text)
+
+ # Log cleaning if changes were made
+ if cleaned_line_text != original_azure_text:
+ self._log(f"🧹 Cleaned Azure OCR text: '{original_azure_text[:30]}...' → '{cleaned_line_text[:30]}...'", "debug")
+
+ # TEXT FILTERING FOR AZURE
+ # Skip if text is too short (after cleaning)
+ if len(cleaned_line_text.strip()) < min_text_length:
+ if not getattr(self, 'concise_logs', False):
+ self._log(f" Skipping short text ({len(cleaned_line_text)} chars): {cleaned_line_text}")
+ continue
+
+ # Skip if primarily English and exclude_english is enabled (use cleaned text)
+ if exclude_english and self._is_primarily_english(cleaned_line_text):
+ if not getattr(self, 'concise_logs', False):
+ self._log(f" Skipping English text: {cleaned_line_text[:50]}...")
+ continue
+
+ # Azure provides 8-point bounding box
+ bbox = line.bounding_box
+ vertices = [
+ (bbox[0], bbox[1]),
+ (bbox[2], bbox[3]),
+ (bbox[4], bbox[5]),
+ (bbox[6], bbox[7])
+ ]
+
+ # Calculate rectangular bounding box
+ xs = [v[0] for v in vertices]
+ ys = [v[1] for v in vertices]
+ x_min, x_max = min(xs), max(xs)
+ y_min, y_max = min(ys), max(ys)
+
+ # Calculate confidence from word-level data
+ confidence = 0.95 # Default high confidence
+
+ if hasattr(line, 'words') and line.words:
+ # Calculate average confidence from words
+ confidences = []
+ for word in line.words:
+ if hasattr(word, 'confidence'):
+ confidences.append(word.confidence)
+
+ if confidences:
+ confidence = sum(confidences) / len(confidences)
+ if not getattr(self, 'concise_logs', False):
+ self._log(f" Line has {len(line.words)} words, avg confidence: {confidence:.3f}")
+
+ # Check for handwriting style (if available)
+ style = 'print' # Default
+ style_confidence = None
+
+ if hasattr(line, 'appearance') and line.appearance:
+ if hasattr(line.appearance, 'style'):
+ style_info = line.appearance.style
+ if hasattr(style_info, 'name'):
+ style = style_info.name
+ if style == 'handwriting':
+ handwritten_lines += 1
+ if hasattr(style_info, 'confidence'):
+ style_confidence = style_info.confidence
+ if not getattr(self, 'concise_logs', False):
+ self._log(f" Style: {style} (confidence: {style_confidence:.2f})")
+
+ # Apply confidence threshold filtering
+ if confidence >= confidence_threshold:
+ region = TextRegion(
+ text=cleaned_line_text, # Use cleaned text instead of original
+ vertices=vertices,
+ bounding_box=(x_min, y_min, x_max - x_min, y_max - y_min),
+ confidence=confidence,
+ region_type='text_line'
+ )
+
+ # Add extra attributes for Azure-specific info
+ region.style = style
+ region.style_confidence = style_confidence
+
+ regions.append(region)
+ total_lines += 1
+
+ # More detailed logging (use cleaned text)
+ if not getattr(self, 'concise_logs', False):
+ if style == 'handwriting':
+ self._log(f" Found handwritten text ({confidence:.2f}): {cleaned_line_text[:50]}...")
+ else:
+ self._log(f" Found text region ({confidence:.2f}): {cleaned_line_text[:50]}...")
+ else:
+ if not getattr(self, 'concise_logs', False):
+ self._log(f" Skipping low confidence text ({confidence:.2f}): {cleaned_line_text[:30]}...")
+
+ # Log summary statistics
+ if total_lines > 0 and not getattr(self, 'concise_logs', False):
+ self._log(f" Total lines detected: {total_lines}")
+ if handwritten_lines > 0:
+ self._log(f" Handwritten lines: {handwritten_lines} ({handwritten_lines/total_lines*100:.1f}%)")
+
+ elif result.status == OperationStatusCodes.failed:
+ # More detailed error handling
+ error_msg = "Azure OCR failed"
+ if hasattr(result, 'message'):
+ error_msg += f": {result.message}"
+ if hasattr(result.analyze_result, 'errors') and result.analyze_result.errors:
+ for error in result.analyze_result.errors:
+ self._log(f" Error: {error}", "error")
+ raise Exception(error_msg)
+ else:
+ # Timeout or other status
+ raise Exception(f"Azure OCR ended with status: {result.status} after {wait_time}s")
+
+ else:
+ # === NEW OCR PROVIDERS ===
+ import cv2
+ import numpy as np
+ from ocr_manager import OCRManager
+
+ # Load image as numpy array
+ if isinstance(processed_image_data, bytes):
+ # Convert bytes to numpy array
+ nparr = np.frombuffer(processed_image_data, np.uint8)
+ image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
+ else:
+ # Load from file path
+ image = cv2.imread(image_path)
+ if image is None:
+ # Try with PIL for Unicode paths
+ from PIL import Image as PILImage
+ pil_image = PILImage.open(image_path)
+ image = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR)
+
+ # Ensure OCR manager is available
+ if not hasattr(self, 'ocr_manager') or self.ocr_manager is None:
+ try:
+ # Prefer GUI-provided manager if available
+ if hasattr(self, 'main_gui') and hasattr(self.main_gui, 'ocr_manager') and self.main_gui.ocr_manager is not None:
+ self.ocr_manager = self.main_gui.ocr_manager
+ else:
+ from ocr_manager import OCRManager
+ self.ocr_manager = OCRManager(log_callback=self.log_callback)
+ self._log("Initialized internal OCRManager instance", "info")
+ except Exception as _e:
+ self.ocr_manager = None
+ self._log(f"Failed to initialize OCRManager: {str(_e)}", "error")
+ if self.ocr_manager is None:
+ raise RuntimeError("OCRManager is not available; cannot proceed with OCR provider.")
+
+ # Check provider status and load if needed
+ provider_status = self.ocr_manager.check_provider_status(self.ocr_provider)
+
+ if not provider_status['installed']:
+ self._log(f"❌ {self.ocr_provider} is not installed", "error")
+ self._log(f" Please install it from the GUI settings", "error")
+ raise Exception(f"{self.ocr_provider} OCR provider is not installed")
+
+ # Start local inpainter preload while provider is being readied/used (non-cloud path only; background)
+ try:
+ if not getattr(self, 'skip_inpainting', False) and not getattr(self, 'use_cloud_inpainting', False):
+ already_loaded, _lm = self._is_local_inpainter_loaded()
+ if not already_loaded:
+ import threading as _threading
+ local_method = (self.manga_settings.get('inpainting', {}) or {}).get('local_method', 'anime')
+ model_path = self.main_gui.config.get(f'manga_{local_method}_model_path', '') if hasattr(self, 'main_gui') else ''
+ adv = self.main_gui.config.get('manga_settings', {}).get('advanced', {}) if hasattr(self, 'main_gui') else {}
+ desired = 1
+ if adv.get('parallel_panel_translation', False):
+ try:
+ desired = max(1, int(adv.get('panel_max_workers', 2)))
+ except Exception:
+ desired = 2
+ allow = True if desired == 1 else bool(adv.get('preload_local_inpainting_for_panels', True))
+ if allow:
+ self._inpaint_preload_event = _threading.Event()
+ def _preload_inp_many():
+ try:
+ self.preload_local_inpainters_concurrent(local_method, model_path, desired)
+ finally:
+ try:
+ self._inpaint_preload_event.set()
+ except Exception:
+ pass
+ _threading.Thread(target=_preload_inp_many, name="InpaintPreload@OCRProvider", daemon=True).start()
+ except Exception:
+ pass
+
+ if not provider_status['loaded']:
+ # Check if Qwen2-VL - if it's supposedly not loaded but actually is, skip
+ if self.ocr_provider == 'Qwen2-VL':
+ provider = self.ocr_manager.get_provider('Qwen2-VL')
+ if provider and hasattr(provider, 'model') and provider.model is not None:
+ self._log("✅ Qwen2-VL model actually already loaded, skipping reload")
+ success = True
+ else:
+ # Only actually load if truly not loaded
+ model_size = self.ocr_config.get('model_size', '2') if hasattr(self, 'ocr_config') else '2'
+ self._log(f"Loading Qwen2-VL with model_size={model_size}")
+ success = self.ocr_manager.load_provider(self.ocr_provider, model_size=model_size)
+ if not success:
+ raise Exception(f"Failed to load {self.ocr_provider} model")
+ elif self.ocr_provider == 'custom-api':
+ # Custom API needs to initialize UnifiedClient with credentials
+ self._log("📡 Loading custom-api provider...")
+ # Try to get API key and model from GUI if available
+ load_kwargs = {}
+ if hasattr(self, 'main_gui'):
+ # Get API key from GUI
+ if hasattr(self.main_gui, 'api_key_entry'):
+ api_key = self.main_gui.api_key_entry.get()
+ if api_key:
+ load_kwargs['api_key'] = api_key
+ # Get model from GUI
+ if hasattr(self.main_gui, 'model_var'):
+ model = self.main_gui.model_var.get()
+ if model:
+ load_kwargs['model'] = model
+ success = self.ocr_manager.load_provider(self.ocr_provider, **load_kwargs)
+ if not success:
+ raise Exception(f"Failed to initialize {self.ocr_provider}")
+ else:
+ # Other providers
+ success = self.ocr_manager.load_provider(self.ocr_provider)
+ if not success:
+ raise Exception(f"Failed to load {self.ocr_provider} model")
+
+ if not success:
+ raise Exception(f"Failed to load {self.ocr_provider} model")
+
+ # Initialize ocr_results here before any provider-specific code
+ ocr_results = []
+
+ # Special handling for manga-ocr (needs region detection first)
+ if self.ocr_provider == 'manga-ocr':
+ # IMPORTANT: Initialize fresh results list
+ ocr_results = []
+
+ # Check if we should use bubble detection for regions
+ if ocr_settings.get('bubble_detection_enabled', False):
+ self._log("📝 Using bubble detection regions for manga-ocr...")
+
+ # Run bubble detection to get regions
+ if self.bubble_detector is None:
+ from bubble_detector import BubbleDetector
+ self.bubble_detector = BubbleDetector()
+
+ # Get regions from bubble detector
+ rtdetr_detections = self._load_bubble_detector(ocr_settings, image_path)
+ if rtdetr_detections:
+
+ # Process detections immediately and don't store
+ all_regions = []
+
+ # ONLY ADD TEXT-CONTAINING REGIONS
+ # Skip empty bubbles since they shouldn't have text
+ if 'text_bubbles' in rtdetr_detections:
+ all_regions.extend(rtdetr_detections.get('text_bubbles', []))
+ if 'text_free' in rtdetr_detections:
+ all_regions.extend(rtdetr_detections.get('text_free', []))
+
+ # DO NOT ADD empty bubbles - they're duplicates of text_bubbles
+ # if 'bubbles' in rtdetr_detections: # <-- REMOVE THIS
+ # all_regions.extend(rtdetr_detections.get('bubbles', []))
+
+ self._log(f"📊 Processing {len(all_regions)} text-containing regions (skipping empty bubbles)")
+
+ # Clear detection results after extracting regions
+ rtdetr_detections = None
+
+ # Check if parallel processing is enabled
+ if self.parallel_processing and len(all_regions) > 1:
+ self._log(f"🚀 Using PARALLEL OCR for {len(all_regions)} regions with manga-ocr")
+ ocr_results = self._parallel_ocr_regions(image, all_regions, 'manga-ocr', confidence_threshold)
+ else:
+ # Process each region with manga-ocr
+ for i, (x, y, w, h) in enumerate(all_regions):
+ cropped = self._safe_crop_region(image, x, y, w, h)
+ if cropped is None:
+ continue
+ result = self.ocr_manager.detect_text(cropped, 'manga-ocr', confidence=confidence_threshold)
+ if result and len(result) > 0 and result[0].text.strip():
+ result[0].bbox = (x, y, w, h)
+ result[0].vertices = [(x, y), (x+w, y), (x+w, y+h), (x, y+h)]
+ # CRITICAL: Store RT-DETR bubble bounds for rendering
+ # The bbox/vertices are the small OCR polygon, but bubble_bounds is the full RT-DETR bubble
+ result[0].bubble_bounds = (x, y, w, h)
+ ocr_results.append(result[0])
+ self._log(f"🔍 Processing region {i+1}/{len(all_regions)} with manga-ocr...")
+ self._log(f"✅ Detected text: {result[0].text[:50]}...")
+
+ # Clear regions list after processing
+ all_regions = None
+ else:
+ # NO bubble detection - just process full image
+ self._log("📝 Processing full image with manga-ocr (no bubble detection)")
+ ocr_results = self.ocr_manager.detect_text(image, self.ocr_provider, confidence=confidence_threshold)
+
+ elif self.ocr_provider == 'Qwen2-VL':
+ # Initialize results list
+ ocr_results = []
+
+ # Configure Qwen2-VL for Korean text
+ language_hints = ocr_settings.get('language_hints', ['ko'])
+ self._log("🍩 Qwen2-VL OCR for Korean text recognition")
+
+ # Check if we should use bubble detection for regions
+ if ocr_settings.get('bubble_detection_enabled', False):
+ self._log("📝 Using bubble detection regions for Qwen2-VL...")
+
+ # Run bubble detection to get regions (thread-local)
+ _ = self._get_thread_bubble_detector()
+
+ # Get regions from bubble detector
+ rtdetr_detections = self._load_bubble_detector(ocr_settings, image_path)
+ if rtdetr_detections:
+
+ # Process only text-containing regions
+ all_regions = []
+ if 'text_bubbles' in rtdetr_detections:
+ all_regions.extend(rtdetr_detections.get('text_bubbles', []))
+ if 'text_free' in rtdetr_detections:
+ all_regions.extend(rtdetr_detections.get('text_free', []))
+
+ self._log(f"📊 Processing {len(all_regions)} text regions with Qwen2-VL")
+
+ # Check if parallel processing is enabled
+ if self.parallel_processing and len(all_regions) > 1:
+ self._log(f"🚀 Using PARALLEL OCR for {len(all_regions)} regions with Qwen2-VL")
+ ocr_results = self._parallel_ocr_regions(image, all_regions, 'Qwen2-VL', confidence_threshold)
+ else:
+ # Process each region with Qwen2-VL
+ for i, (x, y, w, h) in enumerate(all_regions):
+ cropped = self._safe_crop_region(image, x, y, w, h)
+ if cropped is None:
+ continue
+ result = self.ocr_manager.detect_text(cropped, 'Qwen2-VL', confidence=confidence_threshold)
+ if result and len(result) > 0 and result[0].text.strip():
+ result[0].bbox = (x, y, w, h)
+ result[0].vertices = [(x, y), (x+w, y), (x+w, y+h), (x, y+h)]
+ ocr_results.append(result[0])
+ self._log(f"✅ Region {i+1}: {result[0].text[:50]}...")
+ else:
+ # Process full image without bubble detection
+ self._log("📝 Processing full image with Qwen2-VL")
+ ocr_results = self.ocr_manager.detect_text(image, self.ocr_provider)
+
+ elif self.ocr_provider == 'custom-api':
+ # Initialize results list
+ ocr_results = []
+
+ # Configure Custom API for text extraction
+ self._log("🔌 Using Custom API for OCR")
+
+ # Check if we should use bubble detection for regions
+ if ocr_settings.get('bubble_detection_enabled', False):
+ self._log("📝 Using bubble detection regions for Custom API...")
+
+ # Run bubble detection to get regions (thread-local)
+ _ = self._get_thread_bubble_detector()
+
+ # Get regions from bubble detector
+ rtdetr_detections = self._load_bubble_detector(ocr_settings, image_path)
+ if rtdetr_detections:
+
+ # Process only text-containing regions
+ all_regions = []
+ if 'text_bubbles' in rtdetr_detections:
+ all_regions.extend(rtdetr_detections.get('text_bubbles', []))
+ if 'text_free' in rtdetr_detections:
+ all_regions.extend(rtdetr_detections.get('text_free', []))
+
+ self._log(f"📊 Processing {len(all_regions)} text regions with Custom API")
+
+ # Clear detections after extracting regions
+ rtdetr_detections = None
+
+ # Decide parallelization for custom-api:
+ # Use API batch mode OR local parallel toggle so that API calls can run in parallel
+ if (getattr(self, 'batch_mode', False) or self.parallel_processing) and len(all_regions) > 1:
+ self._log(f"🚀 Using PARALLEL OCR for {len(all_regions)} regions (custom-api; API batch mode honored)")
+ ocr_results = self._parallel_ocr_regions(image, all_regions, 'custom-api', confidence_threshold)
+ else:
+ # Original sequential processing
+ for i, (x, y, w, h) in enumerate(all_regions):
+ cropped = self._safe_crop_region(image, x, y, w, h)
+ if cropped is None:
+ continue
+ result = self.ocr_manager.detect_text(
+ cropped,
+ 'custom-api',
+ confidence=confidence_threshold
+ )
+ if result and len(result) > 0 and result[0].text.strip():
+ result[0].bbox = (x, y, w, h)
+ result[0].vertices = [(x, y), (x+w, y), (x+w, y+h), (x, y+h)]
+ ocr_results.append(result[0])
+ self._log(f"🔍 Region {i+1}/{len(all_regions)}: {result[0].text[:50]}...")
+
+ # Clear regions list after processing
+ all_regions = None
+ else:
+ # Process full image without bubble detection
+ self._log("📝 Processing full image with Custom API")
+ ocr_results = self.ocr_manager.detect_text(
+ image,
+ 'custom-api',
+ confidence=confidence_threshold
+ )
+
+ elif self.ocr_provider == 'easyocr':
+ # Initialize results list
+ ocr_results = []
+
+ # Configure EasyOCR languages
+ language_hints = ocr_settings.get('language_hints', ['ja', 'en'])
+ validated_languages = self._validate_easyocr_languages(language_hints)
+
+ easyocr_provider = self.ocr_manager.get_provider('easyocr')
+ if easyocr_provider:
+ if easyocr_provider.languages != validated_languages:
+ easyocr_provider.languages = validated_languages
+ easyocr_provider.is_loaded = False
+ self._log(f"🔥 Reloading EasyOCR with languages: {validated_languages}")
+ self.ocr_manager.load_provider('easyocr')
+
+ # Check if we should use bubble detection
+ if ocr_settings.get('bubble_detection_enabled', False):
+ self._log("📝 Using bubble detection regions for EasyOCR...")
+
+ # Run bubble detection to get regions (thread-local)
+ _ = self._get_thread_bubble_detector()
+
+ # Get regions from bubble detector
+ rtdetr_detections = self._load_bubble_detector(ocr_settings, image_path)
+ if rtdetr_detections:
+
+ # Process only text-containing regions
+ all_regions = []
+ if 'text_bubbles' in rtdetr_detections:
+ all_regions.extend(rtdetr_detections.get('text_bubbles', []))
+ if 'text_free' in rtdetr_detections:
+ all_regions.extend(rtdetr_detections.get('text_free', []))
+
+ self._log(f"📊 Processing {len(all_regions)} text regions with EasyOCR")
+
+ # Check if parallel processing is enabled
+ if self.parallel_processing and len(all_regions) > 1:
+ self._log(f"🚀 Using PARALLEL OCR for {len(all_regions)} regions with EasyOCR")
+ ocr_results = self._parallel_ocr_regions(image, all_regions, 'easyocr', confidence_threshold)
+ else:
+ # Process each region with EasyOCR
+ for i, (x, y, w, h) in enumerate(all_regions):
+ cropped = self._safe_crop_region(image, x, y, w, h)
+ if cropped is None:
+ continue
+ result = self.ocr_manager.detect_text(cropped, 'easyocr', confidence=confidence_threshold)
+ if result and len(result) > 0 and result[0].text.strip():
+ result[0].bbox = (x, y, w, h)
+ result[0].vertices = [(x, y), (x+w, y), (x+w, y+h), (x, y+h)]
+ ocr_results.append(result[0])
+ self._log(f"✅ Region {i+1}: {result[0].text[:50]}...")
+ else:
+ # Process full image without bubble detection
+ self._log("📝 Processing full image with EasyOCR")
+ ocr_results = self.ocr_manager.detect_text(image, self.ocr_provider)
+
+ elif self.ocr_provider == 'paddleocr':
+ # Initialize results list
+ ocr_results = []
+
+ # Configure PaddleOCR language
+ language_hints = ocr_settings.get('language_hints', ['ja'])
+ lang_map = {'ja': 'japan', 'ko': 'korean', 'zh': 'ch', 'en': 'en'}
+ paddle_lang = lang_map.get(language_hints[0] if language_hints else 'ja', 'japan')
+
+ # Reload if language changed
+ paddle_provider = self.ocr_manager.get_provider('paddleocr')
+ if paddle_provider and paddle_provider.is_loaded:
+ if hasattr(paddle_provider.model, 'lang') and paddle_provider.model.lang != paddle_lang:
+ from paddleocr import PaddleOCR
+ paddle_provider.model = PaddleOCR(
+ use_angle_cls=True,
+ lang=paddle_lang,
+ use_gpu=True,
+ show_log=False
+ )
+ self._log(f"🔥 Reloaded PaddleOCR with language: {paddle_lang}")
+
+ # Check if we should use bubble detection
+ if ocr_settings.get('bubble_detection_enabled', False):
+ self._log("📝 Using bubble detection regions for PaddleOCR...")
+
+ # Run bubble detection to get regions (thread-local)
+ _ = self._get_thread_bubble_detector()
+
+ # Get regions from bubble detector
+ rtdetr_detections = self._load_bubble_detector(ocr_settings, image_path)
+ if rtdetr_detections:
+
+ # Process only text-containing regions
+ all_regions = []
+ if 'text_bubbles' in rtdetr_detections:
+ all_regions.extend(rtdetr_detections.get('text_bubbles', []))
+ if 'text_free' in rtdetr_detections:
+ all_regions.extend(rtdetr_detections.get('text_free', []))
+
+ self._log(f"📊 Processing {len(all_regions)} text regions with PaddleOCR")
+
+ # Check if parallel processing is enabled
+ if self.parallel_processing and len(all_regions) > 1:
+ self._log(f"🚀 Using PARALLEL OCR for {len(all_regions)} regions with PaddleOCR")
+ ocr_results = self._parallel_ocr_regions(image, all_regions, 'paddleocr', confidence_threshold)
+ else:
+ # Process each region with PaddleOCR
+ for i, (x, y, w, h) in enumerate(all_regions):
+ cropped = self._safe_crop_region(image, x, y, w, h)
+ if cropped is None:
+ continue
+ result = self.ocr_manager.detect_text(cropped, 'paddleocr', confidence=confidence_threshold)
+ if result and len(result) > 0 and result[0].text.strip():
+ result[0].bbox = (x, y, w, h)
+ result[0].vertices = [(x, y), (x+w, y), (x+w, y+h), (x, y+h)]
+ ocr_results.append(result[0])
+ self._log(f"✅ Region {i+1}: {result[0].text[:50]}...")
+ else:
+ # Process full image without bubble detection
+ self._log("📝 Processing full image with PaddleOCR")
+ ocr_results = self.ocr_manager.detect_text(image, self.ocr_provider)
+
+ elif self.ocr_provider == 'doctr':
+ # Initialize results list
+ ocr_results = []
+
+ self._log("📄 DocTR OCR for document text recognition")
+
+ # Check if we should use bubble detection
+ if ocr_settings.get('bubble_detection_enabled', False):
+ self._log("📝 Using bubble detection regions for DocTR...")
+
+ # Run bubble detection to get regions (thread-local)
+ _ = self._get_thread_bubble_detector()
+
+ # Get regions from bubble detector
+ rtdetr_detections = self._load_bubble_detector(ocr_settings, image_path)
+ if rtdetr_detections:
+
+ # Process only text-containing regions
+ all_regions = []
+ if 'text_bubbles' in rtdetr_detections:
+ all_regions.extend(rtdetr_detections.get('text_bubbles', []))
+ if 'text_free' in rtdetr_detections:
+ all_regions.extend(rtdetr_detections.get('text_free', []))
+
+ self._log(f"📊 Processing {len(all_regions)} text regions with DocTR")
+
+ # Check if parallel processing is enabled
+ if self.parallel_processing and len(all_regions) > 1:
+ self._log(f"🚀 Using PARALLEL OCR for {len(all_regions)} regions with DocTR")
+ ocr_results = self._parallel_ocr_regions(image, all_regions, 'doctr', confidence_threshold)
+ else:
+ # Process each region with DocTR
+ for i, (x, y, w, h) in enumerate(all_regions):
+ cropped = self._safe_crop_region(image, x, y, w, h)
+ if cropped is None:
+ continue
+ result = self.ocr_manager.detect_text(cropped, 'doctr', confidence=confidence_threshold)
+ if result and len(result) > 0 and result[0].text.strip():
+ result[0].bbox = (x, y, w, h)
+ result[0].vertices = [(x, y), (x+w, y), (x+w, y+h), (x, y+h)]
+ ocr_results.append(result[0])
+ self._log(f"✅ Region {i+1}: {result[0].text[:50]}...")
+ else:
+ # Process full image without bubble detection
+ self._log("📝 Processing full image with DocTR")
+ ocr_results = self.ocr_manager.detect_text(image, self.ocr_provider)
+
+ elif self.ocr_provider == 'rapidocr':
+ # Initialize results list
+ ocr_results = []
+
+ # Get RapidOCR settings
+ use_recognition = self.main_gui.config.get('rapidocr_use_recognition', True)
+ language = self.main_gui.config.get('rapidocr_language', 'auto')
+ detection_mode = self.main_gui.config.get('rapidocr_detection_mode', 'document')
+
+ self._log(f"⚡ RapidOCR - Recognition: {'Full' if use_recognition else 'Detection Only'}")
+
+ # ALWAYS process full image with RapidOCR for best results
+ self._log("📊 Processing full image with RapidOCR")
+ ocr_results = self.ocr_manager.detect_text(
+ image,
+ 'rapidocr',
+ confidence=confidence_threshold,
+ use_recognition=use_recognition,
+ language=language,
+ detection_mode=detection_mode
+ )
+
+ # RT-DETR detection only affects merging, not OCR
+ if ocr_settings.get('bubble_detection_enabled', False):
+ self._log("🤖 RT-DETR will be used for bubble-based merging")
+
+ else:
+ # Default processing for any other providers
+ ocr_results = self.ocr_manager.detect_text(image, self.ocr_provider)
+
+ # Convert OCR results to TextRegion format
+ for result in ocr_results:
+ # CLEAN ORIGINAL OCR TEXT - Fix cube characters and encoding issues
+ original_ocr_text = result.text
+ cleaned_result_text = self._fix_encoding_issues(result.text)
+ cleaned_result_text = self._normalize_unicode_width(cleaned_result_text)
+ cleaned_result_text = self._sanitize_unicode_characters(cleaned_result_text)
+
+ # Log cleaning if changes were made
+ if cleaned_result_text != original_ocr_text:
+ self._log(f"🧹 Cleaned OCR manager text: '{original_ocr_text[:30]}...' → '{cleaned_result_text[:30]}...'", "debug")
+
+ # Apply filtering (use cleaned text)
+ if len(cleaned_result_text.strip()) < min_text_length:
+ if not getattr(self, 'concise_logs', False):
+ self._log(f" Skipping short text ({len(cleaned_result_text)} chars): {cleaned_result_text}")
+ continue
+
+ if exclude_english and self._is_primarily_english(cleaned_result_text):
+ if not getattr(self, 'concise_logs', False):
+ self._log(f" Skipping English text: {cleaned_result_text[:50]}...")
+ continue
+
+ if result.confidence < confidence_threshold:
+ if not getattr(self, 'concise_logs', False):
+ self._log(f" Skipping low confidence ({result.confidence:.2f}): {cleaned_result_text[:30]}...")
+ continue
+
+ # Create TextRegion (use cleaned text)
+ # CRITICAL: Preserve bubble_bounds if it was set during OCR (e.g., manga-ocr with RT-DETR)
+ region_kwargs = {
+ 'text': cleaned_result_text, # Use cleaned text instead of original
+ 'vertices': result.vertices if result.vertices else [
+ (result.bbox[0], result.bbox[1]),
+ (result.bbox[0] + result.bbox[2], result.bbox[1]),
+ (result.bbox[0] + result.bbox[2], result.bbox[1] + result.bbox[3]),
+ (result.bbox[0], result.bbox[1] + result.bbox[3])
+ ],
+ 'bounding_box': result.bbox,
+ 'confidence': result.confidence,
+ 'region_type': 'text_block'
+ }
+ # Preserve bubble_bounds from OCR result if present
+ if hasattr(result, 'bubble_bounds') and result.bubble_bounds is not None:
+ region_kwargs['bubble_bounds'] = result.bubble_bounds
+ self._log(f" 🔍 Preserved bubble_bounds from OCR: {result.bubble_bounds}", "debug")
+ else:
+ if hasattr(result, 'bubble_bounds'):
+ self._log(f" ⚠️ OCR result has bubble_bounds but it's None!", "debug")
+ else:
+ self._log(f" ℹ️ OCR result has no bubble_bounds attribute", "debug")
+
+ region = TextRegion(**region_kwargs)
+ regions.append(region)
+ if not getattr(self, 'concise_logs', False):
+ self._log(f" Found text ({result.confidence:.2f}): {cleaned_result_text[:50]}...")
+
+ # MERGING SECTION (applies to all providers)
+ # Check if bubble detection is enabled
+ if ocr_settings.get('bubble_detection_enabled', False):
+ # For manga-ocr and similar providers, skip merging since regions already have bubble_bounds from OCR
+ # Only Azure and Google need merging because they return line-level OCR results
+ if self.ocr_provider in ['manga-ocr', 'Qwen2-VL', 'custom-api', 'easyocr', 'paddleocr', 'doctr']:
+ self._log("🎯 Skipping bubble detection merge for manga-ocr (regions already aligned with RT-DETR)")
+ # Regions already have bubble_bounds set from OCR phase - no need to merge
+ else:
+ # Azure and Google return line-level results that need to be merged into bubbles
+ self._log("🤖 Using AI bubble detection for merging")
+ regions = self._merge_with_bubble_detection(regions, image_path)
+ else:
+ # Traditional merging
+ merge_threshold = ocr_settings.get('merge_nearby_threshold', 20)
+
+ # Apply provider-specific adjustments
+ if self.ocr_provider == 'azure':
+ azure_multiplier = ocr_settings.get('azure_merge_multiplier', 2.0)
+ merge_threshold = int(merge_threshold * azure_multiplier)
+ self._log(f"📋 Using Azure-adjusted merge threshold: {merge_threshold}px")
+
+ # Pre-group Azure lines if the method exists
+ if hasattr(self, '_pregroup_azure_lines'):
+ regions = self._pregroup_azure_lines(regions, merge_threshold)
+
+ elif self.ocr_provider in ['paddleocr', 'easyocr', 'doctr']:
+ # These providers often return smaller text segments
+ line_multiplier = ocr_settings.get('line_ocr_merge_multiplier', 1.5)
+ merge_threshold = int(merge_threshold * line_multiplier)
+ self._log(f"📋 Using line-based OCR adjusted threshold: {merge_threshold}px")
+
+ # Apply standard merging
+ regions = self._merge_nearby_regions(regions, threshold=merge_threshold)
+
+ self._log(f"✅ Detected {len(regions)} text regions after merging")
+
+ # NOTE: Debug images are saved in process_image() with correct output_dir
+ # Removed duplicate save here to avoid creating unexpected 'translated_images' folders
+
+ return regions
+
+ except Exception as e:
+ self._log(f"❌ Error detecting text: {str(e)}", "error")
+ import traceback
+ self._log(traceback.format_exc(), "error")
+ raise
+
+ def _validate_easyocr_languages(self, languages):
+ """Validate EasyOCR language combinations"""
+ # EasyOCR compatibility rules
+ incompatible_sets = [
+ {'ja', 'ko'}, # Japanese + Korean
+ {'ja', 'zh'}, # Japanese + Chinese
+ {'ko', 'zh'} # Korean + Chinese
+ ]
+
+ lang_set = set(languages)
+
+ for incompatible in incompatible_sets:
+ if incompatible.issubset(lang_set):
+ # Conflict detected - keep first language + English
+ primary_lang = languages[0] if languages else 'en'
+ result = [primary_lang, 'en'] if primary_lang != 'en' else ['en']
+
+ self._log(f"⚠️ EasyOCR: {' + '.join(incompatible)} not compatible", "warning")
+ self._log(f"🔧 Auto-adjusted from {languages} to {result}", "info")
+ return result
+
+ return languages
+
+ def _parallel_ocr_regions(self, image: np.ndarray, regions: List, provider: str, confidence_threshold: float) -> List:
+ """Process multiple regions in parallel using ThreadPoolExecutor"""
+ from concurrent.futures import ThreadPoolExecutor, as_completed
+ import threading
+
+ ocr_results = []
+ results_lock = threading.Lock()
+
+ def process_single_region(index: int, bbox: Tuple[int, int, int, int]):
+ """Process a single region with OCR"""
+ x, y, w, h = bbox
+ try:
+ # Use the safe crop method
+ cropped = self._safe_crop_region(image, x, y, w, h)
+
+ # Skip if crop failed
+ if cropped is None:
+ self._log(f"⚠️ Skipping region {index} - invalid crop", "warning")
+ return
+
+ # Run OCR on this region
+ result = self.ocr_manager.detect_text(
+ cropped,
+ provider,
+ confidence=confidence_threshold
+ )
+
+ if result and len(result) > 0 and result[0].text.strip():
+ # Adjust coordinates to full image space
+ result[0].bbox = (x, y, w, h)
+ result[0].vertices = [(x, y), (x+w, y), (x+w, y+h), (x, y+h)]
+ # CRITICAL: Store RT-DETR bubble bounds for rendering (for non-Azure/Google providers)
+ result[0].bubble_bounds = (x, y, w, h)
+ return (index, result[0])
+ return (index, None)
+
+ except Exception as e:
+ self._log(f"Error processing region {index}: {str(e)}", "error")
+ return (index, None)
+
+ # Process regions in parallel
+ max_workers = self.manga_settings.get('advanced', {}).get('max_workers', 4)
+ # For custom-api, treat OCR calls as API calls: use batch size when batch mode is enabled
+ try:
+ if provider == 'custom-api':
+ # prefer MangaTranslator.batch_size (from env BATCH_SIZE)
+ bs = int(getattr(self, 'batch_size', 0) or int(os.getenv('BATCH_SIZE', '0')))
+ if bs and bs > 0:
+ max_workers = bs
+ except Exception:
+ pass
+ # Never spawn more workers than regions
+ max_workers = max(1, min(max_workers, len(regions)))
+ with ThreadPoolExecutor(max_workers=max_workers) as executor:
+ # Submit all tasks
+ future_to_index = {}
+ for i, bbox in enumerate(regions):
+ future = executor.submit(process_single_region, i, bbox)
+ future_to_index[future] = i
+
+ # Collect results
+ results_dict = {}
+ completed = 0
+ for future in as_completed(future_to_index):
+ try:
+ index, result = future.result(timeout=30)
+ if result:
+ results_dict[index] = result
+ completed += 1
+ self._log(f"✅ [{completed}/{len(regions)}] Processed region {index+1}")
+ except Exception as e:
+ self._log(f"Failed to process region: {str(e)}", "error")
+
+ # Sort results by index to maintain order
+ for i in range(len(regions)):
+ if i in results_dict:
+ ocr_results.append(results_dict[i])
+
+ self._log(f"📊 Parallel OCR complete: {len(ocr_results)}/{len(regions)} regions extracted")
+ return ocr_results
+
+ def _pregroup_azure_lines(self, lines: List[TextRegion], base_threshold: int) -> List[TextRegion]:
+ """Pre-group Azure lines that are obviously part of the same text block
+ This makes them more like Google's blocks before the main merge logic"""
+
+ if len(lines) <= 1:
+ return lines
+
+ # Sort by vertical position first, then horizontal
+ lines.sort(key=lambda r: (r.bounding_box[1], r.bounding_box[0]))
+
+ pregrouped = []
+ i = 0
+
+ while i < len(lines):
+ current_group = [lines[i]]
+ current_bbox = list(lines[i].bounding_box)
+
+ # Look ahead for lines that should obviously be grouped
+ j = i + 1
+ while j < len(lines):
+ x1, y1, w1, h1 = current_bbox
+ x2, y2, w2, h2 = lines[j].bounding_box
+
+ # Calculate gaps
+ vertical_gap = y2 - (y1 + h1) if y2 > y1 + h1 else 0
+
+ # Check horizontal alignment
+ center_x1 = x1 + w1 / 2
+ center_x2 = x2 + w2 / 2
+ horizontal_offset = abs(center_x1 - center_x2)
+ avg_width = (w1 + w2) / 2
+
+ # Group if:
+ # 1. Lines are vertically adjacent (small gap)
+ # 2. Lines are well-aligned horizontally (likely same bubble)
+ if (vertical_gap < h1 * 0.5 and # Less than half line height gap
+ horizontal_offset < avg_width * 0.5): # Well centered
+
+ # Add to group
+ current_group.append(lines[j])
+
+ # Update bounding box to include new line
+ min_x = min(x1, x2)
+ min_y = min(y1, y2)
+ max_x = max(x1 + w1, x2 + w2)
+ max_y = max(y1 + h1, y2 + h2)
+ current_bbox = [min_x, min_y, max_x - min_x, max_y - min_y]
+
+ j += 1
+ else:
+ break
+
+ # Create merged region from group
+ if len(current_group) > 1:
+ merged_text = " ".join([line.text for line in current_group])
+ all_vertices = []
+ for line in current_group:
+ all_vertices.extend(line.vertices)
+
+ merged_region = TextRegion(
+ text=merged_text,
+ vertices=all_vertices,
+ bounding_box=tuple(current_bbox),
+ confidence=0.95,
+ region_type='pregrouped_lines'
+ )
+ pregrouped.append(merged_region)
+
+ self._log(f" Pre-grouped {len(current_group)} Azure lines into block")
+ else:
+ # Single line, keep as is
+ pregrouped.append(lines[i])
+
+ i = j if j > i + 1 else i + 1
+
+ self._log(f" Azure pre-grouping: {len(lines)} lines → {len(pregrouped)} blocks")
+ return pregrouped
+
+ def _safe_crop_region(self, image, x, y, w, h):
+ """Safely crop a region from image with validation"""
+ img_h, img_w = image.shape[:2]
+
+ # Validate and clamp coordinates
+ x = max(0, min(x, img_w - 1))
+ y = max(0, min(y, img_h - 1))
+ x2 = min(x + w, img_w)
+ y2 = min(y + h, img_h)
+
+ # Ensure valid region
+ if x2 <= x or y2 <= y:
+ self._log(f"⚠️ Invalid crop region: ({x},{y},{w},{h}) for image {img_w}x{img_h}", "warning")
+ return None
+
+ # Minimum size check
+ if (x2 - x) < 5 or (y2 - y) < 5:
+ self._log(f"⚠️ Region too small: {x2-x}x{y2-y} pixels", "warning")
+ return None
+
+ cropped = image[y:y2, x:x2]
+
+ if cropped.size == 0:
+ self._log(f"⚠️ Empty crop result", "warning")
+ return None
+
+ return cropped
+
+ def _prepare_ocr_rois_from_bubbles(self, image_path: str, ocr_settings: Dict, preprocessing: Dict, page_hash: str) -> List[Dict[str, Any]]:
+ """Prepare ROI crops (bytes) from bubble detection to use with OCR locality.
+ - Enhancements/resizing are gated by preprocessing['enabled'].
+ - Compression/encoding is controlled by manga_settings['compression'] independently.
+ Returns list of dicts: {id, bbox, bytes, type}
+ """
+ try:
+ # Run bubble detector and collect text-containing boxes
+ detections = self._load_bubble_detector(ocr_settings, image_path)
+ if not detections:
+ return []
+ regions = []
+ for key in ('text_bubbles', 'text_free'):
+ for i, (bx, by, bw, bh) in enumerate(detections.get(key, []) or []):
+ regions.append({'type': 'text_bubble' if key == 'text_bubbles' else 'free_text',
+ 'bbox': (int(bx), int(by), int(bw), int(bh)),
+ 'id': f"{key}_{i}"})
+ if not regions:
+ return []
+
+ # Open original image once
+ pil = Image.open(image_path)
+ if pil.mode != 'RGB':
+ pil = pil.convert('RGB')
+
+ pad_ratio = float(ocr_settings.get('roi_padding_ratio', 0.08)) # 8% padding default
+ preproc_enabled = bool(preprocessing.get('enabled', False))
+ # Compression settings (separate from preprocessing)
+ comp = {}
+ try:
+ comp = (self.main_gui.config.get('manga_settings', {}) or {}).get('compression', {})
+ except Exception:
+ comp = {}
+ comp_enabled = bool(comp.get('enabled', False))
+ comp_format = str(comp.get('format', 'jpeg')).lower()
+ jpeg_q = int(comp.get('jpeg_quality', 85))
+ png_lvl = int(comp.get('png_compress_level', 6))
+ webp_q = int(comp.get('webp_quality', 85))
+
+ out = []
+ W, H = pil.size
+ # Pre-filter tiny ROIs (skip before cropping)
+ min_side_px = int(ocr_settings.get('roi_min_side_px', 12))
+ min_area_px = int(ocr_settings.get('roi_min_area_px', 100))
+ for rec in regions:
+ x, y, w, h = rec['bbox']
+ if min(w, h) < max(1, min_side_px) or (w * h) < max(1, min_area_px):
+ # Skip tiny ROI
+ continue
+ # Apply padding
+ px = int(w * pad_ratio)
+ py = int(h * pad_ratio)
+ x1 = max(0, x - px)
+ y1 = max(0, y - py)
+ x2 = min(W, x + w + px)
+ y2 = min(H, y + h + py)
+ if x2 <= x1 or y2 <= y1:
+ continue
+ crop = pil.crop((x1, y1, x2, y2))
+
+ # Quality-affecting steps only when preprocessing enabled
+ if preproc_enabled:
+ try:
+ # Enhance contrast/sharpness/brightness if configured
+ c = float(preprocessing.get('contrast_threshold', 0.4))
+ s = float(preprocessing.get('sharpness_threshold', 0.3))
+ g = float(preprocessing.get('enhancement_strength', 1.5))
+ if c:
+ crop = ImageEnhance.Contrast(crop).enhance(1 + c)
+ if s:
+ crop = ImageEnhance.Sharpness(crop).enhance(1 + s)
+ if g and g != 1.0:
+ crop = ImageEnhance.Brightness(crop).enhance(g)
+ # Optional ROI resize limit (short side cap)
+ roi_max_side = int(ocr_settings.get('roi_max_side', 0) or 0)
+ if roi_max_side and (crop.width > roi_max_side or crop.height > roi_max_side):
+ ratio = min(roi_max_side / crop.width, roi_max_side / crop.height)
+ crop = crop.resize((max(1, int(crop.width * ratio)), max(1, int(crop.height * ratio))), Image.Resampling.LANCZOS)
+ except Exception:
+ pass
+ # Encoding/Compression independent of preprocessing
+ from io import BytesIO
+ buf = BytesIO()
+ try:
+ if comp_enabled:
+ if comp_format in ('jpeg', 'jpg'):
+ if crop.mode != 'RGB':
+ crop = crop.convert('RGB')
+ crop.save(buf, format='JPEG', quality=max(1, min(95, jpeg_q)), optimize=True, progressive=True)
+ elif comp_format == 'png':
+ crop.save(buf, format='PNG', optimize=True, compress_level=max(0, min(9, png_lvl)))
+ elif comp_format == 'webp':
+ crop.save(buf, format='WEBP', quality=max(1, min(100, webp_q)))
+ else:
+ crop.save(buf, format='PNG', optimize=True)
+ else:
+ # Default lossless PNG
+ crop.save(buf, format='PNG', optimize=True)
+ img_bytes = buf.getvalue()
+ except Exception:
+ buf = BytesIO()
+ crop.save(buf, format='PNG', optimize=True)
+ img_bytes = buf.getvalue()
+
+ out.append({
+ 'id': rec['id'],
+ 'bbox': (x, y, w, h), # keep original bbox without padding for placement
+ 'bytes': img_bytes,
+ 'type': rec['type'],
+ 'page_hash': page_hash
+ })
+ return out
+ except Exception as e:
+ self._log(f"⚠️ ROI preparation failed: {e}", "warning")
+ return []
+
+ def _google_ocr_rois_batched(self, rois: List[Dict[str, Any]], ocr_settings: Dict, batch_size: int, max_concurrency: int, page_hash: str) -> List[TextRegion]:
+ """Batch OCR of ROI crops using Google Vision batchAnnotateImages.
+ - Uses bounded concurrency for multiple batches in flight.
+ - Consults and updates an in-memory ROI OCR cache.
+ """
+ try:
+ from google.cloud import vision as _vision
+ except Exception:
+ self._log("❌ Google Vision SDK not available for ROI batching", "error")
+ return []
+
+ lang_hints = ocr_settings.get('language_hints', ['ja', 'ko', 'zh'])
+ detection_mode = ocr_settings.get('text_detection_mode', 'document')
+ feature_type = _vision.Feature.Type.DOCUMENT_TEXT_DETECTION if detection_mode == 'document' else _vision.Feature.Type.TEXT_DETECTION
+ feature = _vision.Feature(type=feature_type)
+
+ results: List[TextRegion] = []
+ min_text_length = int(ocr_settings.get('min_text_length', 2))
+ exclude_english = bool(ocr_settings.get('exclude_english_text', True))
+
+ # Check cache first and build work list of uncached ROIs
+ work_rois = []
+ for roi in rois:
+ x, y, w, h = roi['bbox']
+ # Include region type in cache key to prevent mismapping
+ cache_key = ("google", page_hash, x, y, w, h, tuple(lang_hints), detection_mode, roi.get('type', 'unknown'))
+ # THREAD-SAFE: Use lock for cache access in parallel panel translation
+ with self._cache_lock:
+ cached_text = self.ocr_roi_cache.get(cache_key)
+ if cached_text:
+ region = TextRegion(
+ text=cached_text,
+ vertices=[(x, y), (x+w, y), (x+w, y+h), (x, y+h)],
+ bounding_box=(x, y, w, h),
+ confidence=0.95,
+ region_type='ocr_roi'
+ )
+ try:
+ region.bubble_type = 'free_text' if roi.get('type') == 'free_text' else 'text_bubble'
+ region.should_inpaint = True
+ except Exception:
+ pass
+ results.append(region)
+ else:
+ roi['cache_key'] = cache_key
+ work_rois.append(roi)
+
+ if not work_rois:
+ return results
+
+ # Create batches
+ batch_size = max(1, batch_size)
+ batches = [work_rois[i:i+batch_size] for i in range(0, len(work_rois), batch_size)]
+ max_concurrency = max(1, int(max_concurrency or 1))
+
+ def do_batch(batch):
+ # RATE LIMITING: Add small delay before batch submission
+ import time
+ import random
+ time.sleep(0.1 + random.random() * 0.2) # 0.1-0.3s random delay
+
+ requests = []
+ for roi in batch:
+ img = _vision.Image(content=roi['bytes'])
+ ctx = _vision.ImageContext(language_hints=list(lang_hints))
+ req = _vision.AnnotateImageRequest(image=img, features=[feature], image_context=ctx)
+ requests.append(req)
+ return self.vision_client.batch_annotate_images(requests=requests), batch
+
+ # Execute with concurrency
+ if max_concurrency == 1 or len(batches) == 1:
+ iter_batches = [(self.vision_client.batch_annotate_images(requests=[
+ _vision.AnnotateImageRequest(image=_vision.Image(content=roi['bytes']), features=[feature], image_context=_vision.ImageContext(language_hints=list(lang_hints)))
+ for roi in batch
+ ]), batch) for batch in batches]
+ else:
+ from concurrent.futures import ThreadPoolExecutor, as_completed
+ iter_batches = []
+ with ThreadPoolExecutor(max_workers=max_concurrency) as ex:
+ futures = [ex.submit(do_batch, b) for b in batches]
+ for fut in as_completed(futures):
+ try:
+ iter_batches.append(fut.result())
+ except Exception as e:
+ self._log(f"⚠️ Google batch failed: {e}", "warning")
+ continue
+
+ # Consume responses and update cache
+ for resp, batch in iter_batches:
+ for roi, ann in zip(batch, resp.responses):
+ if getattr(ann, 'error', None) and ann.error.message:
+ self._log(f"⚠️ ROI OCR error: {ann.error.message}", "warning")
+ continue
+ text = ''
+ try:
+ if getattr(ann, 'full_text_annotation', None) and ann.full_text_annotation.text:
+ text = ann.full_text_annotation.text
+ elif ann.text_annotations:
+ text = ann.text_annotations[0].description
+ except Exception:
+ text = ''
+ text = (text or '').strip()
+ text_clean = self._sanitize_unicode_characters(self._fix_encoding_issues(text))
+ if len(text_clean.strip()) < min_text_length:
+ continue
+ if exclude_english and self._is_primarily_english(text_clean):
+ continue
+ x, y, w, h = roi['bbox']
+ # Update cache
+ # THREAD-SAFE: Use lock for cache write in parallel panel translation
+ try:
+ ck = roi.get('cache_key') or ("google", page_hash, x, y, w, h, tuple(lang_hints), detection_mode)
+ with self._cache_lock:
+ self.ocr_roi_cache[ck] = text_clean
+ except Exception:
+ pass
+ region = TextRegion(
+ text=text_clean,
+ vertices=[(x, y), (x+w, y), (x+w, y+h), (x, y+h)],
+ bounding_box=(x, y, w, h),
+ confidence=0.95,
+ region_type='ocr_roi'
+ )
+ try:
+ region.bubble_type = 'free_text' if roi.get('type') == 'free_text' else 'text_bubble'
+ region.should_inpaint = True
+ except Exception:
+ pass
+ results.append(region)
+ return results
+
+ def _azure_ocr_rois_concurrent(self, rois: List[Dict[str, Any]], ocr_settings: Dict, max_workers: int, page_hash: str) -> List[TextRegion]:
+ """Concurrent ROI OCR for Azure Read API. Each ROI is sent as a separate call.
+ Concurrency is bounded by max_workers. Consults/updates cache.
+ """
+ from concurrent.futures import ThreadPoolExecutor, as_completed
+ from azure.cognitiveservices.vision.computervision.models import OperationStatusCodes
+ import io
+ results: List[TextRegion] = []
+
+ # Read settings
+ reading_order = ocr_settings.get('azure_reading_order', 'natural')
+ model_version = ocr_settings.get('azure_model_version', 'latest')
+ language_hints = ocr_settings.get('language_hints', ['ja'])
+ read_params = {'raw': True, 'readingOrder': reading_order}
+ if model_version != 'latest':
+ read_params['model-version'] = model_version
+ if len(language_hints) == 1:
+ lang_mapping = {'zh': 'zh-Hans', 'zh-TW': 'zh-Hant', 'zh-CN': 'zh-Hans', 'ja': 'ja', 'ko': 'ko', 'en': 'en'}
+ read_params['language'] = lang_mapping.get(language_hints[0], language_hints[0])
+
+ min_text_length = int(ocr_settings.get('min_text_length', 2))
+ exclude_english = bool(ocr_settings.get('exclude_english_text', True))
+
+ # Check cache first and split into cached vs work rois
+ cached_regions: List[TextRegion] = []
+ work_rois: List[Dict[str, Any]] = []
+ for roi in rois:
+ x, y, w, h = roi['bbox']
+ # Include region type in cache key to prevent mismapping
+ cache_key = ("azure", page_hash, x, y, w, h, reading_order, roi.get('type', 'unknown'))
+ # THREAD-SAFE: Use lock for cache access in parallel panel translation
+ with self._cache_lock:
+ text_cached = self.ocr_roi_cache.get(cache_key)
+ if text_cached:
+ region = TextRegion(
+ text=text_cached,
+ vertices=[(x, y), (x+w, y), (x+w, y+h), (x, y+h)],
+ bounding_box=(x, y, w, h),
+ confidence=0.95,
+ region_type='ocr_roi'
+ )
+ try:
+ region.bubble_type = 'free_text' if roi.get('type') == 'free_text' else 'text_bubble'
+ region.should_inpaint = True
+ except Exception:
+ pass
+ cached_regions.append(region)
+ else:
+ roi['cache_key'] = cache_key
+ work_rois.append(roi)
+
+ def ocr_one(roi):
+ try:
+ # RATE LIMITING: Add delay between Azure API calls to avoid "Too Many Requests"
+ import time
+ import random
+ # Stagger requests with randomized delay
+ time.sleep(0.1 + random.random() * 0.2) # 0.1-0.3s random delay
+
+ # Ensure Azure-supported format for ROI bytes; honor compression preference when possible
+ data = roi['bytes']
+ try:
+ from PIL import Image as _PILImage
+ im = _PILImage.open(io.BytesIO(data))
+ fmt = (im.format or '').lower()
+ if fmt not in ['jpeg', 'jpg', 'png', 'bmp', 'tiff']:
+ # Choose conversion target based on compression settings if available
+ try:
+ comp_cfg = (self.main_gui.config.get('manga_settings', {}) or {}).get('compression', {})
+ except Exception:
+ comp_cfg = {}
+ target_fmt = 'PNG'
+ try:
+ if comp_cfg.get('enabled', False):
+ cf = str(comp_cfg.get('format', '')).lower()
+ if cf in ('jpeg', 'jpg'):
+ target_fmt = 'JPEG'
+ elif cf == 'png':
+ target_fmt = 'PNG'
+ elif cf == 'bmp':
+ target_fmt = 'BMP'
+ elif cf == 'tiff':
+ target_fmt = 'TIFF'
+ except Exception:
+ pass
+ buf2 = io.BytesIO()
+ if target_fmt == 'JPEG' and im.mode != 'RGB':
+ im = im.convert('RGB')
+ im.save(buf2, format=target_fmt)
+ data = buf2.getvalue()
+ except Exception:
+ pass
+ stream = io.BytesIO(data)
+ read_response = self.vision_client.read_in_stream(stream, **read_params)
+ op_loc = read_response.headers.get('Operation-Location') if hasattr(read_response, 'headers') else None
+ if not op_loc:
+ return None
+ op_id = op_loc.split('/')[-1]
+ # Poll
+ import time
+ waited = 0.0
+ poll_interval = float(ocr_settings.get('azure_poll_interval', 0.5))
+ max_wait = float(ocr_settings.get('azure_max_wait', 60))
+ while waited < max_wait:
+ result = self.vision_client.get_read_result(op_id)
+ if result.status not in [OperationStatusCodes.running, OperationStatusCodes.not_started]:
+ break
+ time.sleep(poll_interval)
+ waited += poll_interval
+ if result.status != OperationStatusCodes.succeeded:
+ return None
+ # Aggregate text lines
+ texts = []
+ for page in result.analyze_result.read_results:
+ for line in page.lines:
+ t = self._sanitize_unicode_characters(self._fix_encoding_issues(line.text or ''))
+ if t:
+ texts.append(t)
+ text_all = ' '.join(texts).strip()
+ if len(text_all) < min_text_length:
+ return None
+ if exclude_english and self._is_primarily_english(text_all):
+ return None
+ x, y, w, h = roi['bbox']
+ # Update cache
+ # THREAD-SAFE: Use lock for cache write in parallel panel translation
+ try:
+ ck = roi.get('cache_key')
+ if ck:
+ with self._cache_lock:
+ self.ocr_roi_cache[ck] = text_all
+ except Exception:
+ pass
+ region = TextRegion(
+ text=text_all,
+ vertices=[(x, y), (x+w, y), (x+w, y+h), (x, y+h)],
+ bounding_box=(x, y, w, h),
+ confidence=0.95,
+ region_type='ocr_roi'
+ )
+ try:
+ region.bubble_type = 'free_text' if roi.get('type') == 'free_text' else 'text_bubble'
+ region.should_inpaint = True
+ except Exception:
+ pass
+ return region
+ except Exception:
+ return None
+
+ # Combine cached and new results
+ results.extend(cached_regions)
+
+ if work_rois:
+ max_workers = max(1, min(max_workers, len(work_rois)))
+ with ThreadPoolExecutor(max_workers=max_workers) as ex:
+ fut_map = {ex.submit(ocr_one, r): r for r in work_rois}
+ for fut in as_completed(fut_map):
+ reg = fut.result()
+ if reg is not None:
+ results.append(reg)
+ return results
+
+ def _detect_text_azure(self, image_data: bytes, ocr_settings: dict) -> List[TextRegion]:
+ """Detect text using Azure Computer Vision"""
+ import io
+ from azure.cognitiveservices.vision.computervision.models import OperationStatusCodes
+
+ stream = io.BytesIO(image_data)
+
+ # Use Read API for better manga text detection
+ read_result = self.vision_client.read_in_stream(
+ stream,
+ raw=True,
+ language='ja' # or from ocr_settings
+ )
+
+ # Get operation ID from headers
+ operation_location = read_result.headers["Operation-Location"]
+ operation_id = operation_location.split("/")[-1]
+
+ # Wait for completion
+ import time
+ while True:
+ result = self.vision_client.get_read_result(operation_id)
+ if result.status not in [OperationStatusCodes.running, OperationStatusCodes.not_started]:
+ break
+ time.sleep(0.1) # Brief pause for stability
+ logger.debug("💤 Azure text detection pausing briefly for stability")
+
+ regions = []
+ confidence_threshold = ocr_settings.get('confidence_threshold', 0.6)
+
+ if result.status == OperationStatusCodes.succeeded:
+ for page in result.analyze_result.read_results:
+ for line in page.lines:
+ # Azure returns bounding box as 8 coordinates
+ bbox = line.bounding_box
+ vertices = [
+ (bbox[0], bbox[1]),
+ (bbox[2], bbox[3]),
+ (bbox[4], bbox[5]),
+ (bbox[6], bbox[7])
+ ]
+
+ xs = [v[0] for v in vertices]
+ ys = [v[1] for v in vertices]
+ x_min, x_max = min(xs), max(xs)
+ y_min, y_max = min(ys), max(ys)
+
+ # Azure doesn't provide per-line confidence in Read API
+ confidence = 0.95 # Default high confidence
+
+ if confidence >= confidence_threshold:
+ region = TextRegion(
+ text=line.text,
+ vertices=vertices,
+ bounding_box=(x_min, y_min, x_max - x_min, y_max - y_min),
+ confidence=confidence,
+ region_type='text_line'
+ )
+ regions.append(region)
+
+ return regions
+
+ def _load_image_with_compression_only(self, image_path: str, comp: Dict) -> bytes:
+ """Load image and apply compression settings only (no enhancements/resizing)."""
+ from io import BytesIO
+ pil = Image.open(image_path)
+ if pil.mode != 'RGB':
+ pil = pil.convert('RGB')
+ buf = BytesIO()
+ try:
+ fmt = str(comp.get('format', 'jpeg')).lower()
+ if fmt in ('jpeg', 'jpg'):
+ q = max(1, min(95, int(comp.get('jpeg_quality', 85))))
+ pil.save(buf, format='JPEG', quality=q, optimize=True, progressive=True)
+ elif fmt == 'png':
+ lvl = max(0, min(9, int(comp.get('png_compress_level', 6))))
+ pil.save(buf, format='PNG', optimize=True, compress_level=lvl)
+ elif fmt == 'webp':
+ wq = max(1, min(100, int(comp.get('webp_quality', 85))))
+ pil.save(buf, format='WEBP', quality=wq)
+ else:
+ pil.save(buf, format='PNG', optimize=True)
+ except Exception:
+ pil.save(buf, format='PNG', optimize=True)
+ return buf.getvalue()
+
+ def _preprocess_image(self, image_path: str, preprocessing_settings: Dict) -> bytes:
+ """Preprocess image for better OCR results
+ - Enhancements/resizing controlled by preprocessing_settings
+ - Compression controlled by manga_settings['compression'] independently
+ """
+ try:
+ # Open image with PIL
+ pil_image = Image.open(image_path)
+
+ # Convert to RGB if necessary
+ if pil_image.mode != 'RGB':
+ pil_image = pil_image.convert('RGB')
+
+ # Auto-detect quality issues if enabled
+ if preprocessing_settings.get('auto_detect_quality', True):
+ needs_enhancement = self._detect_quality_issues(pil_image, preprocessing_settings)
+ if needs_enhancement:
+ self._log(" Auto-detected quality issues - applying enhancements")
+ else:
+ needs_enhancement = True
+
+ if needs_enhancement:
+ # Apply contrast enhancement
+ contrast_threshold = preprocessing_settings.get('contrast_threshold', 0.4)
+ enhancer = ImageEnhance.Contrast(pil_image)
+ pil_image = enhancer.enhance(1 + contrast_threshold)
+
+ # Apply sharpness enhancement
+ sharpness_threshold = preprocessing_settings.get('sharpness_threshold', 0.3)
+ enhancer = ImageEnhance.Sharpness(pil_image)
+ pil_image = enhancer.enhance(1 + sharpness_threshold)
+
+ # Apply general enhancement strength
+ enhancement_strength = preprocessing_settings.get('enhancement_strength', 1.5)
+ if enhancement_strength != 1.0:
+ # Brightness adjustment
+ enhancer = ImageEnhance.Brightness(pil_image)
+ pil_image = enhancer.enhance(enhancement_strength)
+
+ # Resize if too large
+ max_dimension = preprocessing_settings.get('max_image_dimension', 2000)
+ if pil_image.width > max_dimension or pil_image.height > max_dimension:
+ ratio = min(max_dimension / pil_image.width, max_dimension / pil_image.height)
+ new_size = (int(pil_image.width * ratio), int(pil_image.height * ratio))
+ pil_image = pil_image.resize(new_size, Image.Resampling.LANCZOS)
+ self._log(f" Resized image to {new_size[0]}x{new_size[1]}")
+
+ # Convert back to bytes with compression settings from global config
+ from io import BytesIO
+ buffered = BytesIO()
+ comp = {}
+ try:
+ comp = (self.main_gui.config.get('manga_settings', {}) or {}).get('compression', {})
+ except Exception:
+ comp = {}
+ try:
+ if comp.get('enabled', False):
+ fmt = str(comp.get('format', 'jpeg')).lower()
+ if fmt in ('jpeg', 'jpg'):
+ if pil_image.mode != 'RGB':
+ pil_image = pil_image.convert('RGB')
+ quality = max(1, min(95, int(comp.get('jpeg_quality', 85))))
+ pil_image.save(buffered, format='JPEG', quality=quality, optimize=True, progressive=True)
+ self._log(f" Compressed image as JPEG (q={quality})")
+ elif fmt == 'png':
+ level = max(0, min(9, int(comp.get('png_compress_level', 6))))
+ pil_image.save(buffered, format='PNG', optimize=True, compress_level=level)
+ self._log(f" Compressed image as PNG (level={level})")
+ elif fmt == 'webp':
+ q = max(1, min(100, int(comp.get('webp_quality', 85))))
+ pil_image.save(buffered, format='WEBP', quality=q)
+ self._log(f" Compressed image as WEBP (q={q})")
+ else:
+ pil_image.save(buffered, format='PNG', optimize=True)
+ self._log(" Unknown compression format; saved as optimized PNG")
+ else:
+ pil_image.save(buffered, format='PNG', optimize=True)
+ except Exception as _e:
+ self._log(f" ⚠️ Compression failed ({_e}); saved as optimized PNG", "warning")
+ pil_image.save(buffered, format='PNG', optimize=True)
+ return buffered.getvalue()
+
+ except Exception as e:
+ self._log(f"⚠️ Preprocessing failed: {str(e)}, using original image", "warning")
+ with open(image_path, 'rb') as f:
+ return f.read()
+
+ def _detect_quality_issues(self, image: Image.Image, settings: Dict) -> bool:
+ """Auto-detect if image needs quality enhancement"""
+ # Convert to grayscale for analysis
+ gray = image.convert('L')
+
+ # Get histogram
+ hist = gray.histogram()
+
+ # Calculate contrast (simplified)
+ pixels = sum(hist)
+ mean = sum(i * hist[i] for i in range(256)) / pixels
+ variance = sum(hist[i] * (i - mean) ** 2 for i in range(256)) / pixels
+ std_dev = variance ** 0.5
+
+ # Low contrast if std deviation is low
+ contrast_threshold = settings.get('contrast_threshold', 0.4) * 100
+ if std_dev < contrast_threshold:
+ self._log(" Low contrast detected")
+ return True
+
+ # Check for blur using Laplacian variance
+ import numpy as np
+ gray_array = np.array(gray)
+ laplacian = cv2.Laplacian(gray_array, cv2.CV_64F)
+ variance = laplacian.var()
+
+ sharpness_threshold = settings.get('sharpness_threshold', 0.3) * 100
+ if variance < sharpness_threshold:
+ self._log(" Blur detected")
+ return True
+
+ return False
+
+ def _save_debug_image(self, image_path: str, regions: List[TextRegion], debug_base_dir: str = None):
+ """Save debug image with detected regions highlighted, respecting save_intermediate toggle.
+ All files are written under /debug (or provided debug_base_dir)."""
+ advanced_settings = self.manga_settings.get('advanced', {})
+ # Skip debug images in batch mode unless explicitly requested
+ if self.batch_mode and not advanced_settings.get('force_debug_batch', False):
+ return
+ # Respect the 'Save intermediate images' toggle only
+ if not advanced_settings.get('save_intermediate', False):
+ return
+ # Compute debug directory under translated_images
+ if debug_base_dir is None:
+ translated_dir = os.path.join(os.path.dirname(image_path), 'translated_images')
+ debug_dir = os.path.join(translated_dir, 'debug')
+ else:
+ debug_dir = os.path.join(debug_base_dir, 'debug')
+ os.makedirs(debug_dir, exist_ok=True)
+ base_name = os.path.splitext(os.path.basename(image_path))[0]
+
+ try:
+ import cv2
+ import numpy as np
+ from PIL import Image as PILImage
+
+ # Handle Unicode paths
+ try:
+ img = cv2.imread(image_path)
+ if img is None:
+ # Fallback to PIL for Unicode paths
+ pil_image = PILImage.open(image_path)
+ img = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR)
+ except Exception as e:
+ self._log(f" Failed to load image for debug: {str(e)}", "warning")
+ return
+
+ # Debug directory prepared earlier; compute base name
+ # base_name already computed above
+
+ # Draw rectangles around detected text regions
+ overlay = img.copy()
+
+ # Calculate statistics
+ total_chars = sum(len(r.text) for r in regions)
+ avg_confidence = np.mean([r.confidence for r in regions]) if regions else 0
+
+ for i, region in enumerate(regions):
+ # Convert to int to avoid OpenCV type errors
+ x, y, w, h = map(int, region.bounding_box)
+
+ # Color based on confidence
+ if region.confidence > 0.95:
+ color = (0, 255, 0) # Green - high confidence
+ elif region.confidence > 0.8:
+ color = (0, 165, 255) # Orange - medium confidence
+ else:
+ color = (0, 0, 255) # Red - low confidence
+
+ # Draw rectangle
+ cv2.rectangle(overlay, (x, y), (x + w, y + h), color, 2)
+
+ # Add region info
+ info_text = f"#{i} ({region.confidence:.2f})"
+ cv2.putText(overlay, info_text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX,
+ 0.5, color, 1, cv2.LINE_AA)
+
+ # Add character count
+ char_count = len(region.text.strip())
+ cv2.putText(overlay, f"{char_count} chars", (x, y + h + 15),
+ cv2.FONT_HERSHEY_SIMPLEX, 0.4, color, 1, cv2.LINE_AA)
+
+ # Add detected text preview if in verbose debug mode
+ if self.manga_settings.get('advanced', {}).get('save_intermediate', False):
+ text_preview = region.text[:20] + "..." if len(region.text) > 20 else region.text
+ cv2.putText(overlay, text_preview, (x, y + h + 30), cv2.FONT_HERSHEY_SIMPLEX,
+ 0.4, color, 1, cv2.LINE_AA)
+
+ # Add overall statistics to the image
+ stats_bg = overlay.copy()
+ cv2.rectangle(stats_bg, (10, 10), (300, 90), (0, 0, 0), -1)
+ cv2.addWeighted(stats_bg, 0.7, overlay, 0.3, 0, overlay)
+
+ stats_text = [
+ f"Regions: {len(regions)}",
+ f"Total chars: {total_chars}",
+ f"Avg confidence: {avg_confidence:.2f}"
+ ]
+
+ for i, text in enumerate(stats_text):
+ cv2.putText(overlay, text, (20, 35 + i*20), cv2.FONT_HERSHEY_SIMPLEX,
+ 0.5, (255, 255, 255), 1, cv2.LINE_AA)
+
+ # Save main debug image (always under translated_images/debug when enabled)
+ debug_path = os.path.join(debug_dir, f"{base_name}_debug_regions.png")
+ cv2.imwrite(debug_path, overlay)
+ self._log(f" 📸 Saved debug image: {debug_path}")
+
+ # Save text mask
+ mask = self.create_text_mask(img, regions)
+ mask_debug_path = debug_path.replace('_debug', '_mask')
+ cv2.imwrite(mask_debug_path, mask)
+ mask_percentage = ((mask > 0).sum() / mask.size) * 100
+ self._log(f" 🎭 Saved mask image: {mask_debug_path}", "info")
+ self._log(f" 📊 Mask coverage: {mask_percentage:.1f}% of image", "info")
+
+ # If save_intermediate is enabled, save additional debug images
+ if self.manga_settings.get('advanced', {}).get('save_intermediate', False):
+ # Save confidence heatmap
+ heatmap = self._create_confidence_heatmap(img, regions)
+ heatmap_path = os.path.join(debug_dir, f"{base_name}_confidence_heatmap.png")
+ cv2.imwrite(heatmap_path, heatmap)
+ self._log(f" 🌡️ Saved confidence heatmap: {heatmap_path}")
+
+ # Save polygon visualization with safe text areas
+ if any(hasattr(r, 'vertices') and r.vertices for r in regions):
+ polygon_img = img.copy()
+ for region in regions:
+ if hasattr(region, 'vertices') and region.vertices:
+ # Draw polygon
+ pts = np.array(region.vertices, np.int32)
+ pts = pts.reshape((-1, 1, 2))
+
+ # Fill with transparency
+ overlay_poly = polygon_img.copy()
+ cv2.fillPoly(overlay_poly, [pts], (0, 255, 255))
+ cv2.addWeighted(overlay_poly, 0.2, polygon_img, 0.8, 0, polygon_img)
+
+ # Draw outline
+ cv2.polylines(polygon_img, [pts], True, (255, 0, 0), 2)
+
+ # Draw safe text area
+ try:
+ safe_x, safe_y, safe_w, safe_h = self.get_safe_text_area(region)
+ # Convert to int for OpenCV
+ safe_x, safe_y, safe_w, safe_h = map(int, (safe_x, safe_y, safe_w, safe_h))
+ cv2.rectangle(polygon_img, (safe_x, safe_y),
+ (safe_x + safe_w, safe_y + safe_h),
+ (0, 255, 0), 1)
+ except:
+ pass # Skip if get_safe_text_area fails
+
+ # Add legend to explain colors
+ legend_bg = polygon_img.copy()
+ legend_height = 140
+ legend_width = 370
+ cv2.rectangle(legend_bg, (10, 10), (10 + legend_width, 10 + legend_height), (0, 0, 0), -1)
+ cv2.addWeighted(legend_bg, 0.8, polygon_img, 0.2, 0, polygon_img)
+
+ # Add legend items
+ # Note: OpenCV uses BGR format, so (255, 0, 0) = Blue, (0, 0, 255) = Red
+ legend_items = [
+ ("Blue outline: OCR polygon (detected text)", (255, 0, 0)),
+ ("Yellow fill: Mask area (will be inpainted)", (0, 255, 255)),
+ ("Green rect: Safe text area (algorithm-based)", (0, 255, 0)),
+ ("Magenta rect: Mask bounds (actual render area)", (255, 0, 255))
+ ]
+
+ for i, (text, color) in enumerate(legend_items):
+ y_pos = 30 + i * 30
+ # Draw color sample
+ if i == 1: # Yellow fill
+ cv2.rectangle(polygon_img, (20, y_pos - 8), (35, y_pos + 8), color, -1)
+ else:
+ cv2.rectangle(polygon_img, (20, y_pos - 8), (35, y_pos + 8), color, 2)
+ # Draw text
+ cv2.putText(polygon_img, text, (45, y_pos + 5), cv2.FONT_HERSHEY_SIMPLEX,
+ 0.45, (255, 255, 255), 1, cv2.LINE_AA)
+
+ polygon_path = os.path.join(debug_dir, f"{base_name}_polygons.png")
+ cv2.imwrite(polygon_path, polygon_img)
+ self._log(f" 🔷 Saved polygon visualization: {polygon_path}")
+
+ # Save individual region crops with more info
+ regions_dir = os.path.join(debug_dir, 'regions')
+ os.makedirs(regions_dir, exist_ok=True)
+
+ for i, region in enumerate(regions[:10]): # Limit to first 10 regions
+ # Convert to int to avoid OpenCV type errors
+ x, y, w, h = map(int, region.bounding_box)
+ # Add padding
+ pad = 10
+ x1 = max(0, x - pad)
+ y1 = max(0, y - pad)
+ x2 = min(img.shape[1], x + w + pad)
+ y2 = min(img.shape[0], y + h + pad)
+
+ region_crop = img[y1:y2, x1:x2].copy()
+
+ # Draw bounding box on crop
+ cv2.rectangle(region_crop, (pad, pad),
+ (pad + w, pad + h), (0, 255, 0), 2)
+
+ # Add text info on the crop
+ info = f"Conf: {region.confidence:.2f} | Chars: {len(region.text)}"
+ cv2.putText(region_crop, info, (5, 15), cv2.FONT_HERSHEY_SIMPLEX,
+ 0.4, (255, 255, 255), 1, cv2.LINE_AA)
+
+ # Save with meaningful filename
+ safe_text = region.text[:20].replace('/', '_').replace('\\', '_').strip()
+ region_path = os.path.join(regions_dir, f"region_{i:03d}_{safe_text}.png")
+ cv2.imwrite(region_path, region_crop)
+
+ self._log(f" 📁 Saved individual region crops to: {regions_dir}")
+
+ except Exception as e:
+ self._log(f" ❌ Failed to save debug image: {str(e)}", "warning")
+ if self.manga_settings.get('advanced', {}).get('debug_mode', False):
+ # If debug mode is on, log the full traceback
+ import traceback
+ self._log(traceback.format_exc(), "warning")
+
+ def _create_confidence_heatmap(self, img, regions):
+ """Create a heatmap showing OCR confidence levels"""
+ heatmap = np.zeros_like(img[:, :, 0], dtype=np.float32)
+
+ for region in regions:
+ # Convert to int for array indexing
+ x, y, w, h = map(int, region.bounding_box)
+ confidence = region.confidence
+ heatmap[y:y+h, x:x+w] = confidence
+
+ # Convert to color heatmap
+ heatmap_normalized = (heatmap * 255).astype(np.uint8)
+ heatmap_colored = cv2.applyColorMap(heatmap_normalized, cv2.COLORMAP_JET)
+
+ # Blend with original image
+ result = cv2.addWeighted(img, 0.7, heatmap_colored, 0.3, 0)
+ return result
+
+ def _get_translation_history_context(self) -> List[Dict[str, str]]:
+ """Get translation history context from HistoryManager"""
+ if not self.history_manager or not self.contextual_enabled:
+ return []
+
+ try:
+ # Load full history
+ full_history = self.history_manager.load_history()
+
+ if not full_history:
+ return []
+
+ # Extract only the contextual messages up to the limit
+ context = []
+ exchange_count = 0
+
+ # Process history in pairs (user + assistant messages)
+ for i in range(0, len(full_history), 2):
+ if i + 1 < len(full_history):
+ user_msg = full_history[i]
+ assistant_msg = full_history[i + 1]
+
+ if user_msg.get("role") == "user" and assistant_msg.get("role") == "assistant":
+ context.extend([user_msg, assistant_msg])
+ exchange_count += 1
+
+ # Only keep up to the history limit
+ if exchange_count >= self.translation_history_limit:
+ # Get only the most recent exchanges
+ context = context[-(self.translation_history_limit * 2):]
+ break
+
+ return context
+
+ except Exception as e:
+ self._log(f"⚠️ Error loading history context: {str(e)}", "warning")
+ return []
+
+ def translate_text(self, text: str, context: Optional[List[Dict]] = None, image_path: str = None, region: TextRegion = None) -> str:
+ """Translate text using API with GUI system prompt and full image context"""
+ try:
+ # Build per-request log prefix for clearer parallel logs
+ try:
+ import threading
+ thread_name = threading.current_thread().name
+ except Exception:
+ thread_name = "MainThread"
+ bbox_info = ""
+ try:
+ if region and hasattr(region, 'bounding_box') and region.bounding_box:
+ x, y, w, h = region.bounding_box
+ bbox_info = f" [bbox={x},{y},{w}x{h}]"
+ except Exception:
+ pass
+ prefix = f"[{thread_name}]{bbox_info}"
+
+ self._log(f"\n{prefix} 🌐 Starting translation for text: '{text[:50]}...'")
+ # CHECK 1: Before starting
+ if self._check_stop():
+ self._log("⏹️ Translation stopped before full page context processing", "warning")
+ return {}
+
+ # Get system prompt from GUI profile
+ profile_name = self.main_gui.profile_var.get()
+
+ # Get the prompt from prompt_profiles dictionary
+ system_prompt = ''
+ if hasattr(self.main_gui, 'prompt_profiles') and profile_name in self.main_gui.prompt_profiles:
+ system_prompt = self.main_gui.prompt_profiles[profile_name]
+ self._log(f"📋 Using profile: {profile_name}")
+ else:
+ self._log(f"⚠️ Profile '{profile_name}' not found in prompt_profiles", "warning")
+
+ self._log(f"{prefix} 📝 System prompt: {system_prompt[:100]}..." if system_prompt else f"{prefix} 📝 No system prompt configured")
+
+ if system_prompt:
+ messages = [{"role": "system", "content": system_prompt}]
+ else:
+ messages = []
+
+
+ # Add contextual translations if enabled
+ if self.contextual_enabled and self.history_manager:
+ # Get history from HistoryManager
+ history_context = self._get_translation_history_context()
+
+ if history_context:
+ context_count = len(history_context) // 2 # Each exchange is 2 messages
+ self._log(f"🔗 Adding {context_count} previous exchanges from history (limit: {self.translation_history_limit})")
+ messages.extend(history_context)
+ else:
+ self._log(f"🔗 Contextual enabled but no history available yet")
+ else:
+ self._log(f"{prefix} 🔗 Contextual: {'Disabled' if not self.contextual_enabled else 'No HistoryManager'}")
+
+ # Add full image context if available AND visual context is enabled
+ if image_path and self.visual_context_enabled:
+ try:
+ import base64
+ from PIL import Image as PILImage
+
+ self._log(f"{prefix} 📷 Adding full page visual context for translation")
+
+ # Read and encode the full image
+ with open(image_path, 'rb') as img_file:
+ img_data = img_file.read()
+
+ # Check image size
+ img_size_mb = len(img_data) / (1024 * 1024)
+ self._log(f"{prefix} 📊 Image size: {img_size_mb:.2f} MB")
+
+ # Optionally resize if too large (Gemini has limits)
+ if img_size_mb > 10: # If larger than 10MB
+ self._log(f"📉 Resizing large image for API limits...")
+ pil_image = PILImage.open(image_path)
+
+ # Calculate new size (max 2048px on longest side)
+ max_size = 2048
+ ratio = min(max_size / pil_image.width, max_size / pil_image.height)
+ if ratio < 1:
+ new_size = (int(pil_image.width * ratio), int(pil_image.height * ratio))
+ pil_image = pil_image.resize(new_size, PILImage.Resampling.LANCZOS)
+
+ # Re-encode
+ from io import BytesIO
+ buffered = BytesIO()
+ pil_image.save(buffered, format="PNG", optimize=True)
+ img_data = buffered.getvalue()
+ self._log(f"{prefix} ✅ Resized to {new_size[0]}x{new_size[1]}px ({len(img_data)/(1024*1024):.2f} MB)")
+
+ # Encode to base64
+ img_base64 = base64.b64encode(img_data).decode('utf-8')
+
+ # Build the message with image and text location info
+ location_description = ""
+ if region:
+ x, y, w, h = region.bounding_box
+ # Describe where on the page this text is located
+ page_width = PILImage.open(image_path).width
+ page_height = PILImage.open(image_path).height
+
+ # Determine position
+ h_pos = "left" if x < page_width/3 else "center" if x < 2*page_width/3 else "right"
+ v_pos = "top" if y < page_height/3 else "middle" if y < 2*page_height/3 else "bottom"
+
+ location_description = f"\n\nThe text to translate is located in the {v_pos}-{h_pos} area of the page, "
+ location_description += f"at coordinates ({x}, {y}) with size {w}x{h} pixels."
+
+ # Add image and text to translate
+ messages.append({
+ "role": "user",
+ "content": [
+ {
+ "type": "image_url",
+ "image_url": {
+ "url": f"data:image/png;base64,{img_base64}"
+ }
+ },
+ {
+ "type": "text",
+ "text": f"Looking at this full manga page, translate the following text: '{text}'{location_description}"
+ }
+ ]
+ })
+
+ self._log(f"{prefix} ✅ Added full page image as visual context")
+
+ except Exception as e:
+ self._log(f"⚠️ Failed to add image context: {str(e)}", "warning")
+ self._log(f" Error type: {type(e).__name__}", "warning")
+ import traceback
+ self._log(traceback.format_exc(), "warning")
+ # Fall back to text-only translation
+ messages.append({"role": "user", "content": text})
+ elif image_path and not self.visual_context_enabled:
+ # Visual context disabled - text-only mode
+ self._log(f"{prefix} 📝 Text-only mode (visual context disabled)")
+ messages.append({"role": "user", "content": text})
+ else:
+ # No image path provided - text-only translation
+ messages.append({"role": "user", "content": text})
+
+ # Check input token limit
+ text_tokens = 0
+ image_tokens = 0
+
+ for msg in messages:
+ if isinstance(msg.get("content"), str):
+ # Simple text message
+ text_tokens += len(msg["content"]) // 4
+ elif isinstance(msg.get("content"), list):
+ # Message with mixed content (text + image)
+ for content_part in msg["content"]:
+ if content_part.get("type") == "text":
+ text_tokens += len(content_part.get("text", "")) // 4
+ elif content_part.get("type") == "image_url":
+ # Only count image tokens if visual context is enabled
+ if self.visual_context_enabled:
+ image_tokens += 258
+
+ estimated_tokens = text_tokens + image_tokens
+
+ # Check token limit only if it's enabled
+ if self.input_token_limit is None:
+ self._log(f"{prefix} 📊 Token estimate - Text: {text_tokens}, Images: {image_tokens} (Total: {estimated_tokens} / unlimited)")
+ else:
+ self._log(f"{prefix} 📊 Token estimate - Text: {text_tokens}, Images: {image_tokens} (Total: {estimated_tokens} / {self.input_token_limit})")
+
+ if estimated_tokens > self.input_token_limit:
+ self._log(f"⚠️ Token limit exceeded, trimming context", "warning")
+ # Keep system prompt, image, and current text only
+ if image_path:
+ messages = [messages[0], messages[-1]]
+ else:
+ messages = [messages[0], {"role": "user", "content": text}]
+ # Recalculate tokens after trimming
+ text_tokens = len(messages[0]["content"]) // 4
+ if isinstance(messages[-1].get("content"), str):
+ text_tokens += len(messages[-1]["content"]) // 4
+ else:
+ text_tokens += len(messages[-1]["content"][0]["text"]) // 4
+ estimated_tokens = text_tokens + image_tokens
+ self._log(f"📊 Trimmed token estimate: {estimated_tokens}")
+
+ start_time = time.time()
+ api_time = 0 # Initialize to avoid NameError
+
+ try:
+ response = send_with_interrupt(
+ messages=messages,
+ client=self.client,
+ temperature=self.temperature,
+ max_tokens=self.max_tokens,
+ stop_check_fn=self._check_stop
+
+ )
+ api_time = time.time() - start_time
+ self._log(f"{prefix} ✅ API responded in {api_time:.2f} seconds")
+
+ # Normalize response to plain text (handle tuples and bytes)
+ if hasattr(response, 'content'):
+ response_text = response.content
+ else:
+ response_text = response
+
+ # Handle tuple response like (text, 'stop') from some clients
+ if isinstance(response_text, tuple):
+ response_text = response_text[0]
+
+ # Decode bytes/bytearray
+ if isinstance(response_text, (bytes, bytearray)):
+ try:
+ response_text = response_text.decode('utf-8', errors='replace')
+ except Exception:
+ response_text = str(response_text)
+
+ # Ensure string
+ if not isinstance(response_text, str):
+ response_text = str(response_text)
+
+ response_text = response_text.strip()
+
+ # If it's a stringified tuple like "('text', 'stop')", extract the first element
+ if response_text.startswith("('") or response_text.startswith('("'):
+ import ast, re
+ try:
+ parsed_tuple = ast.literal_eval(response_text)
+ if isinstance(parsed_tuple, tuple) and parsed_tuple:
+ response_text = str(parsed_tuple[0])
+ self._log("📦 Extracted response from tuple literal", "debug")
+ except Exception:
+ match = re.match(r"^\('(.+?)',\s*'.*'\)$", response_text, re.DOTALL)
+ if match:
+ tmp = match.group(1)
+ tmp = tmp.replace('\\n', '\n').replace("\\'", "'").replace('\\\"', '"').replace('\\\\', '\\')
+ response_text = tmp
+ self._log("📦 Extracted response using regex from tuple literal", "debug")
+
+ self._log(f"{prefix} 📥 Received response ({len(response_text)} chars)")
+
+ except Exception as api_error:
+ api_time = time.time() - start_time
+ error_str = str(api_error).lower()
+ error_type = type(api_error).__name__
+
+ # Check for specific error types
+ if "429" in error_str or "rate limit" in error_str:
+ self._log(f"⚠️ RATE LIMIT ERROR (429) after {api_time:.2f}s", "error")
+ self._log(f" The API rate limit has been exceeded", "error")
+ self._log(f" Please wait before retrying or reduce request frequency", "error")
+ self._log(f" Error details: {str(api_error)}", "error")
+ raise Exception(f"Rate limit exceeded (429): {str(api_error)}")
+
+ elif "401" in error_str or "unauthorized" in error_str:
+ self._log(f"❌ AUTHENTICATION ERROR (401) after {api_time:.2f}s", "error")
+ self._log(f" Invalid API key or authentication failed", "error")
+ self._log(f" Please check your API key in settings", "error")
+ self._log(f" Error details: {str(api_error)}", "error")
+ raise Exception(f"Authentication failed (401): {str(api_error)}")
+
+ elif "403" in error_str or "forbidden" in error_str:
+ self._log(f"❌ FORBIDDEN ERROR (403) after {api_time:.2f}s", "error")
+ self._log(f" Access denied - check API permissions", "error")
+ self._log(f" Error details: {str(api_error)}", "error")
+ raise Exception(f"Access forbidden (403): {str(api_error)}")
+
+ elif "400" in error_str or "bad request" in error_str:
+ self._log(f"❌ BAD REQUEST ERROR (400) after {api_time:.2f}s", "error")
+ self._log(f" Invalid request format or parameters", "error")
+ self._log(f" Error details: {str(api_error)}", "error")
+ raise Exception(f"Bad request (400): {str(api_error)}")
+
+ elif "timeout" in error_str:
+ self._log(f"⏱️ TIMEOUT ERROR after {api_time:.2f}s", "error")
+ self._log(f" API request timed out", "error")
+ self._log(f" Consider increasing timeout or retry", "error")
+ self._log(f" Error details: {str(api_error)}", "error")
+ raise Exception(f"Request timeout: {str(api_error)}")
+
+ else:
+ # Generic API error
+ self._log(f"❌ API ERROR ({error_type}) after {api_time:.2f}s", "error")
+ self._log(f" Error details: {str(api_error)}", "error")
+ self._log(f" Full traceback:", "error")
+ self._log(traceback.format_exc(), "error")
+ raise
+
+
+
+ # Initialize translated with extracted response text to avoid UnboundLocalError
+ if response_text is None:
+ translated = ""
+ elif isinstance(response_text, str):
+ translated = response_text
+ elif isinstance(response_text, (bytes, bytearray)):
+ try:
+ translated = response_text.decode('utf-8', errors='replace')
+ except Exception:
+ translated = str(response_text)
+ else:
+ translated = str(response_text)
+
+ # ADD THIS DEBUG CODE:
+ self._log(f"🔍 RAW API RESPONSE DEBUG:", "debug")
+ self._log(f" Type: {type(translated)}", "debug")
+ #self._log(f" Raw content length: {len(translated)}", "debug")
+ #self._log(f" First 200 chars: {translated[:200]}", "debug")
+ #self._log(f" Last 200 chars: {translated[-200:]}", "debug")
+
+ # Check if both Japanese and English are present
+ has_japanese = any('\u3040' <= c <= '\u9fff' or '\uac00' <= c <= '\ud7af' for c in translated)
+ has_english = any('a' <= c.lower() <= 'z' for c in translated)
+
+ if has_japanese and has_english:
+ self._log(f" ⚠️ WARNING: Response contains BOTH Japanese AND English!", "warning")
+ self._log(f" This might be causing the duplicate text issue", "warning")
+
+ # Check if response looks like JSON (contains both { and } and : characters)
+ if '{' in translated and '}' in translated and ':' in translated:
+ try:
+ # It might be JSON, try to fix and parse it
+ fixed_json = self._fix_json_response(translated)
+ import json
+ parsed = json.loads(fixed_json)
+
+ # If it's a dict with a single translation, extract it
+ if isinstance(parsed, dict) and len(parsed) == 1:
+ translated = list(parsed.values())[0]
+ translated = self._clean_translation_text(translated)
+ self._log("📦 Extracted translation from JSON response", "debug")
+ except:
+ # Not JSON or failed to parse, use as-is
+ pass
+
+ self._log(f"{prefix} 🔍 Raw response type: {type(translated)}")
+ self._log(f"{prefix} 🔍 Raw response content: '{translated[:5000]}...'")
+
+ # Check if the response looks like a Python literal (tuple/string representation)
+ if translated.startswith("('") or translated.startswith('("') or translated.startswith("('''"):
+ self._log(f"⚠️ Detected Python literal in response, attempting to extract actual text", "warning")
+ original = translated
+ try:
+ # Try to evaluate it as a Python literal
+ import ast
+ evaluated = ast.literal_eval(translated)
+ self._log(f"📦 Evaluated type: {type(evaluated)}")
+
+ if isinstance(evaluated, tuple):
+ # Take the first element of the tuple
+ translated = str(evaluated[0])
+ self._log(f"📦 Extracted from tuple: '{translated[:50]}...'")
+ elif isinstance(evaluated, str):
+ translated = evaluated
+ self._log(f"📦 Extracted string: '{translated[:50]}...'")
+ else:
+ self._log(f"⚠️ Unexpected type after eval: {type(evaluated)}", "warning")
+
+ except Exception as e:
+ self._log(f"⚠️ Failed to parse Python literal: {e}", "warning")
+ self._log(f"⚠️ Original content: {original[:200]}", "warning")
+
+ # Try multiple levels of unescaping
+ temp = translated
+ for i in range(5): # Try up to 5 levels of unescaping
+ if temp.startswith("('") or temp.startswith('("'):
+ # Try regex as fallback
+ import re
+ match = re.search(r"^\(['\"](.+)['\"]\)$", temp, re.DOTALL)
+ if match:
+ temp = match.group(1)
+ self._log(f"📦 Regex extracted (level {i+1}): '{temp[:50]}...'")
+ else:
+ break
+ else:
+ break
+ translated = temp
+
+ # Additional check for escaped content
+ #if '\\\\' in translated or '\\n' in translated or "\\'" in translated or '\\"' in translated:
+ # self._log(f"⚠️ Detected escaped content, unescaping...", "warning")
+ # try:
+ # before = translated
+ #
+ # # Handle quotes and apostrophes
+ # translated = translated.replace("\\'", "'")
+ # translated = translated.replace('\\"', '"')
+ # translated = translated.replace("\\`", "`")
+
+ # DON'T UNESCAPE NEWLINES BEFORE JSON PARSING!
+ # translated = translated.replace('\\n', '\n') # COMMENT THIS OUT
+
+ # translated = translated.replace('\\\\', '\\')
+ # translated = translated.replace('\\/', '/')
+ # translated = translated.replace('\\t', '\t') # COMMENT THIS OUT TOO
+ # translated = translated.replace('\\r', '\r') # AND THIS
+
+ # self._log(f"📦 Unescaped safely: '{before[:50]}...' -> '{translated[:50]}...'")
+ # except Exception as e:
+ # self._log(f"⚠️ Failed to unescape: {e}", "warning")
+
+ # Clean up unwanted trailing apostrophes/quotes
+ import re
+ response_text = translated
+ response_text = re.sub(r"['''\"`]$", "", response_text.strip()) # Remove trailing
+ response_text = re.sub(r"^['''\"`]", "", response_text.strip()) # Remove leading
+ response_text = re.sub(r"\s+['''\"`]\s+", " ", response_text) # Remove isolated
+ translated = response_text
+ translated = self._clean_translation_text(translated)
+
+ # Apply glossary if available
+ if hasattr(self.main_gui, 'manual_glossary') and self.main_gui.manual_glossary:
+ glossary_count = len(self.main_gui.manual_glossary)
+ self._log(f"📚 Applying glossary with {glossary_count} entries")
+
+ replacements = 0
+ for entry in self.main_gui.manual_glossary:
+ if 'source' in entry and 'target' in entry:
+ if entry['source'] in translated:
+ translated = translated.replace(entry['source'], entry['target'])
+ replacements += 1
+
+ if replacements > 0:
+ self._log(f" ✏️ Made {replacements} glossary replacements")
+
+ translated = self._clean_translation_text(translated)
+
+ # Store in history if HistoryManager is available
+ if self.history_manager and self.contextual_enabled:
+ try:
+ # Append to history with proper limit handling
+ self.history_manager.append_to_history(
+ user_content=text,
+ assistant_content=translated,
+ hist_limit=self.translation_history_limit,
+ reset_on_limit=not self.rolling_history_enabled,
+ rolling_window=self.rolling_history_enabled
+ )
+
+ # Check if we're about to hit the limit
+ if self.history_manager.will_reset_on_next_append(
+ self.translation_history_limit,
+ self.rolling_history_enabled
+ ):
+ mode = "roll over" if self.rolling_history_enabled else "reset"
+ self._log(f"📚 History will {mode} on next translation (at limit: {self.translation_history_limit})")
+
+ except Exception as e:
+ self._log(f"⚠️ Failed to save to history: {str(e)}", "warning")
+
+ # Also store in legacy context for compatibility
+ self.translation_context.append({
+ "original": text,
+ "translated": translated
+ })
+
+ return translated
+
+ except Exception as e:
+ self._log(f"❌ Translation error: {str(e)}", "error")
+ self._log(f" Error type: {type(e).__name__}", "error")
+ import traceback
+ self._log(f" Traceback: {traceback.format_exc()}", "error")
+ return text
+
+ def translate_full_page_context(self, regions: List[TextRegion], image_path: str, _in_fallback=False) -> Dict[str, str]:
+ """Translate all text regions with full page context in a single request
+
+ Args:
+ regions: List of text regions to translate
+ image_path: Path to the manga page image
+ _in_fallback: Internal flag to prevent infinite recursion during fallback attempts
+ """
+ try:
+ import time
+ import traceback
+ import json
+
+ # Initialize response_text at the start
+ response_text = ""
+
+ self._log(f"\n📄 Full page context translation of {len(regions)} text regions")
+
+ # Get system prompt from GUI profile
+ profile_name = self.main_gui.profile_var.get()
+
+ # Ensure visual_context_enabled exists (temporary fix)
+ if not hasattr(self, 'visual_context_enabled'):
+ self.visual_context_enabled = self.main_gui.config.get('manga_visual_context_enabled', True)
+
+ # Try to get the prompt from prompt_profiles dictionary (for all profiles including custom ones)
+ system_prompt = ''
+ if hasattr(self.main_gui, 'prompt_profiles') and profile_name in self.main_gui.prompt_profiles:
+ system_prompt = self.main_gui.prompt_profiles[profile_name]
+ self._log(f"📋 Using profile: {profile_name}")
+ else:
+ # Fallback to check if it's stored as a direct attribute (legacy support)
+ system_prompt = getattr(self.main_gui, profile_name.replace(' ', '_'), '')
+ if system_prompt:
+ self._log(f"📋 Using profile (legacy): {profile_name}")
+ else:
+ self._log(f"⚠️ Profile '{profile_name}' not found, using empty prompt", "warning")
+
+ # Combine with full page context instructions
+ if system_prompt:
+ system_prompt = f"{system_prompt}\n\n{self.full_page_context_prompt}"
+ else:
+ system_prompt = self.full_page_context_prompt
+
+ messages = [{"role": "system", "content": system_prompt}]
+
+ # CHECK 2: Before adding context
+ if self._check_stop():
+ self._log("⏹️ Translation stopped during context preparation", "warning")
+ return {}
+
+ # Add contextual translations if enabled
+ if self.contextual_enabled and self.history_manager:
+ history_context = self._get_translation_history_context()
+ if history_context:
+ context_count = len(history_context) // 2
+ self._log(f"🔗 Adding {context_count} previous exchanges from history")
+ messages.extend(history_context)
+
+ # Prepare text segments with indices
+ all_texts = {}
+ text_list = []
+ for i, region in enumerate(regions):
+ # Use index-based key to handle duplicate texts
+ key = f"[{i}] {region.text}"
+ all_texts[key] = region.text
+ text_list.append(f"{key}")
+
+ # CHECK 3: Before image processing
+ if self._check_stop():
+ self._log("⏹️ Translation stopped before image processing", "warning")
+ return {}
+
+ # Create the full context message text
+ context_text = "\n".join(text_list)
+
+ # Log text content info
+ total_chars = sum(len(region.text) for region in regions)
+ self._log(f"📝 Text content: {len(regions)} regions, {total_chars} total characters")
+
+ # Process image if visual context is enabled
+ if self.visual_context_enabled:
+ try:
+ import base64
+ from PIL import Image as PILImage
+
+ self._log(f"📷 Adding full page visual context for translation")
+
+ # Read and encode the image
+ with open(image_path, 'rb') as img_file:
+ img_data = img_file.read()
+
+ # Check image size
+ img_size_mb = len(img_data) / (1024 * 1024)
+ self._log(f"📊 Image size: {img_size_mb:.2f} MB")
+
+ # Get image dimensions
+ pil_image = PILImage.open(image_path)
+ self._log(f" Image dimensions: {pil_image.width}x{pil_image.height}")
+
+ # CHECK 4: Before resizing (which can take time)
+ if self._check_stop():
+ self._log("⏹️ Translation stopped during image preparation", "warning")
+ return {}
+
+ # Resize if needed
+ if img_size_mb > 10:
+ self._log(f"📉 Resizing large image for API limits...")
+ max_size = 2048
+ ratio = min(max_size / pil_image.width, max_size / pil_image.height)
+ if ratio < 1:
+ new_size = (int(pil_image.width * ratio), int(pil_image.height * ratio))
+ pil_image = pil_image.resize(new_size, PILImage.Resampling.LANCZOS)
+ from io import BytesIO
+ buffered = BytesIO()
+ pil_image.save(buffered, format="PNG", optimize=True)
+ img_data = buffered.getvalue()
+ self._log(f"✅ Resized to {new_size[0]}x{new_size[1]}px ({len(img_data)/(1024*1024):.2f} MB)")
+
+ # Convert to base64
+ img_b64 = base64.b64encode(img_data).decode('utf-8')
+
+ # Create message with both text and image
+ messages.append({
+ "role": "user",
+ "content": [
+ {"type": "text", "text": context_text},
+ {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{img_b64}"}}
+ ]
+ })
+
+ self._log(f"✅ Added full page image as visual context")
+
+ except Exception as e:
+ self._log(f"⚠️ Failed to add image context: {str(e)}", "warning")
+ self._log(f" Error type: {type(e).__name__}", "warning")
+ import traceback
+ self._log(traceback.format_exc(), "warning")
+ self._log(f" Falling back to text-only translation", "warning")
+
+ # Fall back to text-only translation
+ messages.append({"role": "user", "content": context_text})
+ else:
+ # Visual context disabled - send text only
+ self._log(f"📝 Text-only mode (visual context disabled for non-vision models)")
+ messages.append({"role": "user", "content": context_text})
+
+ # CHECK 5: Before API call
+ if self._check_stop():
+ self._log("⏹️ Translation stopped before API call", "warning")
+ return {}
+
+ # Store original model for fallback
+ original_model = self.client.model if hasattr(self.client, 'model') else None
+
+ # Check input token limit
+ text_tokens = 0
+ image_tokens = 0
+
+ for msg in messages:
+ if isinstance(msg.get("content"), str):
+ # Simple text message
+ text_tokens += len(msg["content"]) // 4
+ elif isinstance(msg.get("content"), list):
+ # Message with mixed content (text + image)
+ for content_part in msg["content"]:
+ if content_part.get("type") == "text":
+ text_tokens += len(content_part.get("text", "")) // 4
+ elif content_part.get("type") == "image_url":
+ # Only count image tokens if visual context is enabled
+ if self.visual_context_enabled:
+ image_tokens += 258
+
+ estimated_tokens = text_tokens + image_tokens
+
+ # Check token limit only if it's enabled
+ if self.input_token_limit is None:
+ self._log(f"📊 Token estimate - Text: {text_tokens}, Images: {image_tokens} (Total: {estimated_tokens} / unlimited)")
+ else:
+ self._log(f"📊 Token estimate - Text: {text_tokens}, Images: {image_tokens} (Total: {estimated_tokens} / {self.input_token_limit})")
+
+ if estimated_tokens > self.input_token_limit:
+ self._log(f"⚠️ Token limit exceeded, trimming context", "warning")
+ # Keep system prompt and current message only
+ messages = [messages[0], messages[-1]]
+ # Recalculate tokens
+ text_tokens = len(messages[0]["content"]) // 4
+ if isinstance(messages[-1]["content"], str):
+ text_tokens += len(messages[-1]["content"]) // 4
+ else:
+ for content_part in messages[-1]["content"]:
+ if content_part.get("type") == "text":
+ text_tokens += len(content_part.get("text", "")) // 4
+ estimated_tokens = text_tokens + image_tokens
+ self._log(f"📊 Trimmed token estimate: {estimated_tokens}")
+
+ # Make API call using the client's send method (matching translate_text)
+ self._log(f"🌐 Sending full page context to API...")
+ self._log(f" API Model: {self.client.model if hasattr(self.client, 'model') else 'unknown'}")
+ self._log(f" Temperature: {self.temperature}")
+ self._log(f" Max Output Tokens: {self.max_tokens}")
+
+ start_time = time.time()
+ api_time = 0 # Initialize to avoid NameError
+
+ try:
+ response = send_with_interrupt(
+ messages=messages,
+ client=self.client,
+ temperature=self.temperature,
+ max_tokens=self.max_tokens,
+ stop_check_fn=self._check_stop
+ )
+ api_time = time.time() - start_time
+
+ # Extract content from response
+ if hasattr(response, 'content'):
+ response_text = response.content
+ # Check if it's a tuple representation
+ if isinstance(response_text, tuple):
+ response_text = response_text[0] # Get first element of tuple
+ response_text = response_text.strip()
+ elif hasattr(response, 'text'):
+ # Gemini responses have .text attribute
+ response_text = response.text.strip()
+ elif hasattr(response, 'candidates') and response.candidates:
+ # Handle Gemini GenerateContentResponse structure
+ try:
+ response_text = response.candidates[0].content.parts[0].text.strip()
+ except (IndexError, AttributeError):
+ response_text = str(response).strip()
+ else:
+ # If response is a string or other format
+ response_text = str(response).strip()
+
+ # Check if it's a stringified tuple
+ if response_text.startswith("('") or response_text.startswith('("'):
+ # It's a tuple converted to string, extract the JSON part
+ import ast
+ try:
+ parsed_tuple = ast.literal_eval(response_text)
+ if isinstance(parsed_tuple, tuple):
+ response_text = parsed_tuple[0] # Get first element
+ self._log("📦 Extracted response from tuple format", "debug")
+ except:
+ # If literal_eval fails, try regex
+ import re
+ match = re.match(r"^\('(.+)', '.*'\)$", response_text, re.DOTALL)
+ if match:
+ response_text = match.group(1)
+ # Unescape the string
+ response_text = response_text.replace('\\n', '\n')
+ response_text = response_text.replace("\\'", "'")
+ response_text = response_text.replace('\\"', '"')
+ response_text = response_text.replace('\\\\', '\\')
+ self._log("📦 Extracted response using regex from tuple string", "debug")
+
+ # CHECK 6: Immediately after API response
+ if self._check_stop():
+ self._log(f"⏹️ Translation stopped after API call ({api_time:.2f}s)", "warning")
+ return {}
+
+ self._log(f"✅ API responded in {api_time:.2f} seconds")
+ self._log(f"📥 Received response ({len(response_text)} chars)")
+
+ except Exception as api_error:
+ api_time = time.time() - start_time
+
+ # CHECK 7: After API error
+ if self._check_stop():
+ self._log(f"⏹️ Translation stopped during API error handling", "warning")
+ return {}
+
+ error_str = str(api_error).lower()
+ error_type = type(api_error).__name__
+
+ # Check for specific error types
+ if "429" in error_str or "rate limit" in error_str:
+ self._log(f"⚠️ RATE LIMIT ERROR (429) after {api_time:.2f}s", "error")
+ self._log(f" The API rate limit has been exceeded", "error")
+ self._log(f" Please wait before retrying or reduce request frequency", "error")
+ self._log(f" Error details: {str(api_error)}", "error")
+ raise Exception(f"Rate limit exceeded (429): {str(api_error)}")
+
+ elif "401" in error_str or "unauthorized" in error_str:
+ self._log(f"❌ AUTHENTICATION ERROR (401) after {api_time:.2f}s", "error")
+ self._log(f" Invalid API key or authentication failed", "error")
+ self._log(f" Please check your API key in settings", "error")
+ self._log(f" Error details: {str(api_error)}", "error")
+ raise Exception(f"Authentication failed (401): {str(api_error)}")
+
+ elif "403" in error_str or "forbidden" in error_str:
+ self._log(f"❌ FORBIDDEN ERROR (403) after {api_time:.2f}s", "error")
+ self._log(f" Access denied - check API permissions", "error")
+ self._log(f" Error details: {str(api_error)}", "error")
+ raise Exception(f"Access forbidden (403): {str(api_error)}")
+
+ elif "400" in error_str or "bad request" in error_str:
+ self._log(f"❌ BAD REQUEST ERROR (400) after {api_time:.2f}s", "error")
+ self._log(f" Invalid request format or parameters", "error")
+ self._log(f" Error details: {str(api_error)}", "error")
+ raise Exception(f"Bad request (400): {str(api_error)}")
+
+ elif "timeout" in error_str:
+ self._log(f"⏱️ TIMEOUT ERROR after {api_time:.2f}s", "error")
+ self._log(f" API request timed out", "error")
+ self._log(f" Consider increasing timeout or retry", "error")
+ self._log(f" Error details: {str(api_error)}", "error")
+ raise Exception(f"Request timeout: {str(api_error)}")
+
+ else:
+ # Generic API error
+ self._log(f"❌ API ERROR ({error_type}) after {api_time:.2f}s", "error")
+ self._log(f" Error details: {str(api_error)}", "error")
+ self._log(f" Full traceback:", "error")
+ self._log(traceback.format_exc(), "error")
+ raise
+
+ # CHECK 8: Before parsing response
+ if self._check_stop():
+ self._log("⏹️ Translation stopped before parsing response", "warning")
+ return {}
+
+ # Check if we got a response
+ if not response_text:
+ self._log("❌ Empty response from API", "error")
+ return {}
+
+ self._log(f"🔍 Raw response type: {type(response_text)}")
+ self._log(f"🔍 Raw response preview: '{response_text[:2000]}...'")
+
+ # Clean up response_text (handle Python literals, escapes, etc.)
+ if response_text.startswith("('") or response_text.startswith('("') or response_text.startswith("('''"):
+ self._log(f"⚠️ Detected Python literal in response, attempting to extract actual text", "warning")
+ try:
+ import ast
+ evaluated = ast.literal_eval(response_text)
+ if isinstance(evaluated, tuple):
+ response_text = str(evaluated[0])
+ elif isinstance(evaluated, str):
+ response_text = evaluated
+ except Exception as e:
+ self._log(f"⚠️ Failed to parse Python literal: {e}", "warning")
+
+ # Handle escaped content
+ #if '\\\\' in response_text or '\\n' in response_text or "\\'" in response_text or '\\"' in response_text:
+ # self._log(f"⚠️ Detected escaped content, unescaping...", "warning")
+ # response_text = response_text.replace("\\'", "'")
+ # response_text = response_text.replace('\\"', '"')
+ # response_text = response_text.replace('\\n', '\n')
+ # response_text = response_text.replace('\\\\', '\\')
+ # response_text = response_text.replace('\\/', '/')
+ # response_text = response_text.replace('\\t', '\t')
+ # response_text = response_text.replace('\\r', '\r')
+
+ # Clean up quotes
+ import re
+ response_text = re.sub(r"['''\"`]$", "", response_text.strip())
+ response_text = re.sub(r"^['''\"`]", "", response_text.strip())
+ response_text = re.sub(r"\s+['''\"`]\s+", " ", response_text)
+
+ # Try to parse as JSON
+ translations = {}
+ try:
+ # Strip markdown blocks more aggressively
+ import re
+ import json
+
+ # Method 1: Find JSON object directly (most reliable)
+ json_match = re.search(r'\{.*\}', response_text, re.DOTALL)
+ if json_match:
+ json_text = json_match.group(0)
+ try:
+ translations = json.loads(json_text)
+ self._log(f"✅ Successfully parsed {len(translations)} translations (direct extraction)")
+ except json.JSONDecodeError:
+ # Try to fix the extracted JSON
+ json_text = self._fix_json_response(json_text)
+ translations = json.loads(json_text)
+ self._log(f"✅ Successfully parsed {len(translations)} translations (after fix)")
+ else:
+ # Method 2: Try stripping markdown if no JSON found
+ cleaned = response_text
+
+ # Remove markdown code blocks
+ if '```' in cleaned:
+ # This pattern handles ```json, ``json, ``` or ``
+ patterns = [
+ r'```json\s*\n?(.*?)```',
+ r'``json\s*\n?(.*?)``',
+ r'```\s*\n?(.*?)```',
+ r'``\s*\n?(.*?)``'
+ ]
+
+ for pattern in patterns:
+ match = re.search(pattern, cleaned, re.DOTALL)
+ if match:
+ cleaned = match.group(1).strip()
+ break
+
+ # Try to parse the cleaned text
+ translations = json.loads(cleaned)
+ self._log(f"✅ Successfully parsed {len(translations)} translations (after markdown strip)")
+
+ # Handle different response formats
+ if isinstance(translations, list):
+ # Array of translations only - map by position
+ temp = {}
+ for i, region in enumerate(regions):
+ if i < len(translations):
+ temp[region.text] = translations[i]
+ translations = temp
+
+ self._log(f"📊 Total translations: {len(translations)}")
+
+ except Exception as e:
+ self._log(f"❌ Failed to parse JSON: {str(e)}", "error")
+ self._log(f"Response preview: {response_text[:500]}...", "warning")
+
+ # CRITICAL: Check if this is a refusal message BEFORE regex fallback
+ # OpenAI and other APIs refuse certain content with text responses instead of JSON
+ # ONLY check if response looks like plain text refusal (not malformed JSON with translations)
+ import re
+ response_lower = response_text.lower()
+
+ # Quick check: if response starts with refusal keywords, it's definitely a refusal
+ refusal_starts = ['sorry', 'i cannot', "i can't", 'i apologize', 'i am unable', "i'm unable"]
+ if any(response_lower.strip().startswith(start) for start in refusal_starts):
+ # Very likely a refusal - raise immediately
+ from unified_api_client import UnifiedClientError
+ raise UnifiedClientError(
+ f"Content refused by API",
+ error_type="prohibited_content",
+ details={"refusal_message": response_text[:500]}
+ )
+
+ # Skip refusal check if response contains valid-looking JSON structure with translations
+ # (indicates malformed JSON that should go to regex fallback, not a refusal)
+ has_json_structure = (
+ (response_text.strip().startswith('{') and ':' in response_text and '"' in response_text) or
+ (response_text.strip().startswith('[') and ':' in response_text and '"' in response_text)
+ )
+
+ # Also check if response contains short translations (not refusal paragraphs)
+ # Refusals are typically long paragraphs, translations are short
+ avg_value_length = 0
+ if has_json_structure:
+ # Quick estimate: count chars between quotes
+ import re
+ values = re.findall(r'"([^"]{1,200})"\s*[,}]', response_text)
+ if values:
+ avg_value_length = sum(len(v) for v in values) / len(values)
+
+ # If looks like JSON with short values, skip refusal check (go to regex fallback)
+ if has_json_structure and avg_value_length > 0 and avg_value_length < 150:
+ self._log(f"🔍 Detected malformed JSON with translations (avg len: {avg_value_length:.0f}), trying regex fallback", "debug")
+ # Skip refusal detection, go straight to regex fallback
+ pass
+ else:
+ # Check for refusal patterns
+ # Refusal patterns - both simple strings and regex patterns
+ # Must be strict to avoid false positives on valid translations
+ refusal_patterns = [
+ "i cannot assist",
+ "i can't assist",
+ "i cannot help",
+ "i can't help",
+ r"sorry.{0,10}i can't (assist|help|translate)", # OpenAI specific
+ "i'm unable to translate",
+ "i am unable to translate",
+ "i apologize, but i cannot",
+ "i'm sorry, but i cannot",
+ "i don't have the ability to",
+ "this request cannot be",
+ "unable to process this",
+ "cannot complete this",
+ r"against.{0,20}(content )?policy", # "against policy" or "against content policy"
+ "violates.*policy",
+ r"(can't|cannot).{0,30}(sexual|explicit|inappropriate)", # "can't translate sexual"
+ "appears to sexualize",
+ "who appear to be",
+ "prohibited content",
+ "content blocked",
+ ]
+
+ # Check both simple string matching and regex patterns
+ is_refusal = False
+ for pattern in refusal_patterns:
+ if '.*' in pattern or r'.{' in pattern:
+ # It's a regex pattern
+ if re.search(pattern, response_lower):
+ is_refusal = True
+ break
+ else:
+ # Simple string match
+ if pattern in response_lower:
+ is_refusal = True
+ break
+
+ if is_refusal:
+ # Raise UnifiedClientError with prohibited_content type
+ # Fallback mechanism will handle this automatically
+ from unified_api_client import UnifiedClientError
+ raise UnifiedClientError(
+ f"Content refused by API",
+ error_type="prohibited_content",
+ details={"refusal_message": response_text[:500]}
+ )
+
+ # Fallback: try regex extraction (handles both quoted and unquoted keys)
+ try:
+ import re
+ translations = {}
+
+ # Try 1: Standard quoted keys and values
+ pattern1 = r'"([^"]+)"\s*:\s*"([^"]*(?:\\.[^"]*)*)"'
+ matches = re.findall(pattern1, response_text)
+
+ if matches:
+ for key, value in matches:
+ value = value.replace('\\n', '\n').replace('\\"', '"').replace('\\\\', '\\')
+ translations[key] = value
+ self._log(f"✅ Recovered {len(translations)} translations using regex (quoted keys)")
+ else:
+ # Try 2: Unquoted keys (for invalid JSON like: key: "value")
+ pattern2 = r'([^\s:{}]+)\s*:\s*([^\n}]+)'
+ matches = re.findall(pattern2, response_text)
+
+ for key, value in matches:
+ # Clean up key and value
+ key = key.strip()
+ value = value.strip().rstrip(',')
+ # Remove quotes from value if present
+ if value.startswith('"') and value.endswith('"'):
+ value = value[1:-1]
+ elif value.startswith("'") and value.endswith("'"):
+ value = value[1:-1]
+ translations[key] = value
+
+ if translations:
+ self._log(f"✅ Recovered {len(translations)} translations using regex (unquoted keys)")
+
+ if not translations:
+ self._log("❌ All parsing attempts failed", "error")
+ return {}
+ except Exception as e:
+ self._log(f"❌ Failed to recover JSON: {e}", "error")
+ return {}
+
+ # Map translations back to regions
+ result = {}
+ all_originals = []
+ all_translations = []
+
+ # Extract translation values in order
+ translation_values = list(translations.values()) if translations else []
+
+ # DEBUG: Log what we extracted
+ self._log(f"📊 Extracted {len(translation_values)} translation values", "debug")
+ for i, val in enumerate(translation_values[:1000]): # First 1000 for debugging
+ # Safely handle None values
+ val_str = str(val) if val is not None else ""
+ self._log(f" Translation {i}: '{val_str[:1000]}...'", "debug")
+
+ # Clean all translation values to remove quotes
+ # CRITICAL: Also clean the keys in the dictionary to maintain correct mapping
+ cleaned_translations = {}
+ for key, value in translations.items():
+ cleaned_key = key
+ cleaned_value = self._clean_translation_text(value)
+ # Only add if the cleaned value is not empty (avoid misalignment)
+ if cleaned_value:
+ cleaned_translations[cleaned_key] = cleaned_value
+ else:
+ self._log(f"🔍 Skipping empty translation after cleaning: '{key}' → ''", "debug")
+
+ # Replace original dict with cleaned version
+ translations = cleaned_translations
+ translation_values = list(translations.values()) if translations else []
+
+ self._log(f"🔍 DEBUG: translation_values after cleaning:", "debug")
+ for i, val in enumerate(translation_values):
+ self._log(f" [{i}]: {repr(val)}", "debug")
+
+ # CRITICAL: Check if translation values are actually refusal messages
+ # API sometimes returns valid JSON where each "translation" is a refusal
+ if translation_values:
+ # Check first few translations for refusal patterns
+ import re
+ refusal_patterns = [
+ "i cannot",
+ "i can't",
+ r"sorry.{0,5}i can't help",
+ r"sorry.{0,5}i can't",
+ "sexually explicit",
+ "content policy",
+ "prohibited content",
+ "appears to be",
+ "who appear to be",
+ ]
+
+ # Sample first 3 translations (or all if fewer)
+ sample_size = min(3, len(translation_values))
+ refusal_count = 0
+
+ for sample_val in translation_values[:sample_size]:
+ if sample_val:
+ val_lower = sample_val.lower()
+ for pattern in refusal_patterns:
+ if '.*' in pattern or r'.{' in pattern:
+ if re.search(pattern, val_lower):
+ refusal_count += 1
+ break
+ else:
+ if pattern in val_lower:
+ refusal_count += 1
+ break
+
+ # If most translations are refusals, treat as refusal
+ if refusal_count >= sample_size * 0.5: # 50% threshold
+ # Raise UnifiedClientError with prohibited_content type
+ # Fallback mechanism will handle this automatically
+ from unified_api_client import UnifiedClientError
+ raise UnifiedClientError(
+ f"Content refused by API",
+ error_type="prohibited_content",
+ details={"refusal_message": translation_values[0][:500]}
+ )
+
+ # Key-based mapping (prioritize indexed format as requested in prompt)
+ self._log(f"📋 Mapping {len(translations)} translations to {len(regions)} regions")
+
+ for i, region in enumerate(regions):
+ if i % 10 == 0 and self._check_stop():
+ self._log(f"⏹️ Translation stopped during mapping (processed {i}/{len(regions)} regions)", "warning")
+ return result
+
+ # Get translation using multiple strategies (indexed format is most reliable)
+ translated = ""
+
+ # Strategy 1: Indexed key format "[N] original_text" (NEW STANDARD - most reliable)
+ key = f"[{i}] {region.text}"
+ if key in translations:
+ translated = translations[key]
+ self._log(f" ✅ Matched indexed key: '{key[:40]}...'", "debug")
+ # Strategy 2: Direct key match without index (backward compatibility)
+ elif region.text in translations:
+ translated = translations[region.text]
+ self._log(f" ✅ Matched direct key: '{region.text[:40]}...'", "debug")
+ # Strategy 3: Position-based fallback (least reliable, only if counts match exactly)
+ elif i < len(translation_values) and len(translation_values) == len(regions):
+ translated = translation_values[i]
+ self._log(f" ⚠️ Using position-based fallback for region {i}", "debug")
+
+ # Only mark as missing if we genuinely have no translation
+ # NOTE: Keep translation even if it matches original (e.g., numbers, names, SFX)
+ if not translated:
+ self._log(f" ⚠️ No translation for region {i}, leaving empty", "warning")
+ translated = ""
+
+ # Apply glossary if we have a translation
+ if translated and hasattr(self.main_gui, 'manual_glossary') and self.main_gui.manual_glossary:
+ for entry in self.main_gui.manual_glossary:
+ if 'source' in entry and 'target' in entry:
+ if entry['source'] in translated:
+ translated = translated.replace(entry['source'], entry['target'])
+
+ result[region.text] = translated
+ region.translated_text = translated
+
+ if translated:
+ all_originals.append(f"[{i+1}] {region.text}")
+ all_translations.append(f"[{i+1}] {translated}")
+ self._log(f" ✅ Translated: '{region.text[:30]}...' → '{translated[:30]}...'", "debug")
+
+ # Save history if enabled
+ if self.history_manager and self.contextual_enabled and all_originals:
+ try:
+ combined_original = "\n".join(all_originals)
+ combined_translation = "\n".join(all_translations)
+
+ self.history_manager.append_to_history(
+ user_content=combined_original,
+ assistant_content=combined_translation,
+ hist_limit=self.translation_history_limit,
+ reset_on_limit=not self.rolling_history_enabled,
+ rolling_window=self.rolling_history_enabled
+ )
+
+ self._log(f"📚 Saved {len(all_originals)} translations as 1 combined history entry", "success")
+ except Exception as e:
+ self._log(f"⚠️ Failed to save page to history: {str(e)}", "warning")
+
+ return result
+
+ except Exception as e:
+ if self._check_stop():
+ self._log("⏹️ Translation stopped due to user request", "warning")
+ return {}
+
+ # Check if this is a prohibited_content error
+ from unified_api_client import UnifiedClientError
+ if isinstance(e, UnifiedClientError) and getattr(e, "error_type", None) == "prohibited_content":
+ # Check if USE_FALLBACK_KEYS is enabled and we're not already in a fallback attempt
+ use_fallback = os.getenv('USE_FALLBACK_KEYS', '0') == '1'
+
+ if use_fallback and not _in_fallback:
+ self._log(f"⛔ Content refused by primary model, trying fallback keys...", "warning")
+
+ # Store original credentials to restore after fallback attempts
+ original_api_key = self.client.api_key
+ original_model = self.client.model
+
+ # Try to get fallback keys from environment
+ try:
+ fallback_keys_json = os.getenv('FALLBACK_KEYS', '[]')
+ fallback_keys = json.loads(fallback_keys_json) if fallback_keys_json != '[]' else []
+
+ if fallback_keys:
+ for idx, fallback in enumerate(fallback_keys, 1):
+ if self._check_stop():
+ self._log("⏹️ Translation stopped during fallback", "warning")
+ return {}
+
+ fallback_model = fallback.get('model')
+ fallback_key = fallback.get('api_key')
+
+ if not fallback_model or not fallback_key:
+ continue
+
+ self._log(f"🔄 Trying fallback {idx}/{len(fallback_keys)}: {fallback_model}", "info")
+
+ try:
+ # Temporarily switch to fallback model
+ old_key = self.client.api_key
+ old_model = self.client.model
+
+ self.client.api_key = fallback_key
+ self.client.model = fallback_model
+
+ # Re-setup client with new credentials
+ if hasattr(self.client, '_setup_client'):
+ self.client._setup_client()
+
+ # Retry the translation with fallback model (mark as in_fallback to prevent recursion)
+ return self.translate_full_page_context(regions, image_path, _in_fallback=True)
+
+ except UnifiedClientError as fallback_err:
+ if getattr(fallback_err, "error_type", None) == "prohibited_content":
+ self._log(f" ⛔ Fallback {idx} also refused", "warning")
+ # Restore original credentials and try next fallback
+ self.client.api_key = old_key
+ self.client.model = old_model
+ if hasattr(self.client, '_setup_client'):
+ self.client._setup_client()
+ continue
+ else:
+ # Other error, restore and raise
+ self.client.api_key = old_key
+ self.client.model = old_model
+ if hasattr(self.client, '_setup_client'):
+ self.client._setup_client()
+ raise
+ except Exception as fallback_err:
+ self._log(f" ❌ Fallback {idx} error: {str(fallback_err)[:100]}", "error")
+ # Restore original credentials and try next fallback
+ self.client.api_key = old_key
+ self.client.model = old_model
+ if hasattr(self.client, '_setup_client'):
+ self.client._setup_client()
+ continue
+
+ self._log(f"❌ All fallback keys refused content", "error")
+ else:
+ self._log(f"⚠️ No fallback keys configured", "warning")
+ except Exception as fallback_error:
+ self._log(f"❌ Error processing fallback keys: {str(fallback_error)}", "error")
+ finally:
+ # Always restore original credentials after fallback attempts
+ try:
+ self.client.api_key = original_api_key
+ self.client.model = original_model
+ if hasattr(self.client, '_setup_client'):
+ self.client._setup_client()
+ except Exception:
+ pass # Ignore errors during credential restoration
+
+ # If we get here, all fallbacks failed or weren't configured
+ self._log(f"❌ Content refused by API", "error")
+ return {}
+
+ self._log(f"❌ Full page context translation error: {str(e)}", "error")
+ self._log(traceback.format_exc(), "error")
+ return {}
+
+ def _fix_json_response(self, response_text: str) -> str:
+ import re
+ import json
+
+ # Debug: Show what we received
+ self._log(f"DEBUG: Original length: {len(response_text)}", "debug")
+ self._log(f"DEBUG: First 50 chars: [{response_text[:50]}]", "debug")
+
+ cleaned = response_text
+ if "```json" in cleaned:
+ match = re.search(r'```json\s*(.*?)```', cleaned, re.DOTALL)
+ if match:
+ cleaned = match.group(1).strip()
+ self._log(f"DEBUG: Extracted {len(cleaned)} chars from markdown", "debug")
+ else:
+ self._log("DEBUG: Regex didn't match!", "warning")
+
+ # Try to parse
+ try:
+ result = json.loads(cleaned)
+ self._log(f"✅ Parsed JSON with {len(result)} entries", "info")
+ return cleaned
+ except json.JSONDecodeError as e:
+ self._log(f"⚠️ JSON invalid: {str(e)}", "warning")
+ self._log(f"DEBUG: Cleaned text starts with: [{cleaned[:20]}]", "debug")
+ return cleaned
+
+ def _clean_translation_text(self, text: str) -> str:
+ """Remove unnecessary quotation marks, dots, and invalid characters from translated text"""
+ if not text:
+ return text
+
+ # Log what we're cleaning
+ original = text
+
+ # First, fix encoding issues
+ text = self._fix_encoding_issues(text)
+
+ # Normalize width/compatibility (e.g., fullwidth → ASCII, circled numbers → digits)
+ text = self._normalize_unicode_width(text)
+
+ # Remove Unicode replacement characters and invalid symbols
+ text = self._sanitize_unicode_characters(text)
+
+ # Remove leading and trailing whitespace
+ text = text.strip()
+
+ # Remove ALL types of quotes and dots from start/end
+ # Keep removing until no more quotes/dots at edges
+ while len(text) > 0:
+ old_len = len(text)
+
+ # Remove from start
+ text = text.lstrip('"\'`''""「」『』【】《》〈〉.·•°')
+
+ # Remove from end (but preserve ... and !!)
+ if not text.endswith('...') and not text.endswith('!!'):
+ text = text.rstrip('"\'`''""「」『』【】《》〈〉.·•°')
+
+ # If nothing changed, we're done
+ if len(text) == old_len:
+ break
+
+ # Final strip
+ text = text.strip()
+
+ # Log if we made changes
+ if text != original:
+ self._log(f"🧹 Cleaned text: '{original}' → '{text}'", "debug")
+
+ return text
+
+ def _sanitize_unicode_characters(self, text: str) -> str:
+ """Remove invalid Unicode characters, replacement characters, and box symbols.
+ Also more aggressively exclude square-like glyphs that leak as 'cubes' in some fonts.
+ """
+ if not text:
+ return text
+
+ import re
+ original = text
+
+
+ # Remove Unicode replacement character (�) and similar invalid symbols
+ text = text.replace('\ufffd', '') # Unicode replacement character
+
+ # Geometric squares and variants (broad sweep)
+ geo_squares = [
+ '□','■','▢','▣','▤','▥','▦','▧','▨','▩','◻','⬛','⬜',
+ '\u25a1','\u25a0','\u2b1c','\u2b1b'
+ ]
+ for s in geo_squares:
+ text = text.replace(s, '')
+
+ # Extra cube-like CJK glyphs commonly misrendered in non-CJK fonts
+ # (unconditionally removed per user request)
+ cube_likes = [
+ '口', # U+53E3
+ '囗', # U+56D7
+ '日', # U+65E5 (often boxy)
+ '曰', # U+66F0
+ '田', # U+7530
+ '回', # U+56DE
+ 'ロ', # U+30ED (Katakana RO)
+ 'ロ', # U+FF9B (Halfwidth RO)
+ 'ㅁ', # U+3141 (Hangul MIEUM)
+ '丨', # U+4E28 (CJK radical two) tall bar
+ ]
+ for s in cube_likes:
+ text = text.replace(s, '')
+
+ # Remove entire ranges that commonly render as boxes/blocks
+ # Box Drawing, Block Elements, Geometric Shapes (full range), plus a common white/black large square range already handled
+ text = re.sub(r'[\u2500-\u257F\u2580-\u259F\u25A0-\u25FF]', '', text)
+
+ # Optional debug: log culprits found in original text (before removal)
+ try:
+ culprits = re.findall(r'[\u2500-\u257F\u2580-\u259F\u25A0-\u25FF\u2B1B\u2B1C\u53E3\u56D7\u65E5\u66F0\u7530\u56DE\u30ED\uFF9B\u3141\u4E28]', original)
+ if culprits:
+ as_codes = [f'U+{ord(c):04X}' for c in culprits]
+ self._log(f"🧊 Removed box-like glyphs: {', '.join(as_codes)}", "debug")
+ except Exception:
+ pass
+
+ # If line is mostly ASCII, strip any remaining single CJK ideographs that stand alone
+ try:
+ ascii_count = sum(1 for ch in text if ord(ch) < 128)
+ ratio = ascii_count / max(1, len(text))
+ if ratio >= 0.8:
+ text = re.sub(r'(?:(?<=\s)|^)[\u3000-\u303F\u3040-\u30FF\u3400-\u9FFF\uFF00-\uFFEF](?=(?:\s)|$)', '', text)
+ except Exception:
+ pass
+
+ # Remove invisible and zero-width characters
+ text = re.sub(r'[\u200b-\u200f\u2028-\u202f\u205f-\u206f\ufeff]', '', text)
+
+ # Remove remaining control characters (except common ones like newline, tab)
+ text = re.sub(r'[\x00-\x08\x0B-\x0C\x0E-\x1F\x7F-\x9F]', '', text)
+
+ # Remove any remaining characters that can't be properly encoded
+ try:
+ text = text.encode('utf-8', errors='ignore').decode('utf-8')
+ except UnicodeError:
+ pass
+
+ if text != original:
+ try:
+ self._log(f"🔧 Sanitized Unicode: '{original}' → '{text}'", "debug")
+ except Exception:
+ pass
+
+ return text
+
+ def _normalize_unicode_width(self, text: str) -> str:
+ """Normalize Unicode to NFKC to 'unsquare' fullwidth/stylized forms while preserving CJK text"""
+ if not text:
+ return text
+ try:
+ import unicodedata
+ original = text
+ # NFKC folds compatibility characters (fullwidth forms, circled digits, etc.) to standard forms
+ text = unicodedata.normalize('NFKC', text)
+ if text != original:
+ try:
+ self._log(f"🔤 Normalized width/compat: '{original[:30]}...' → '{text[:30]}...'", "debug")
+ except Exception:
+ pass
+ return text
+ except Exception:
+ return text
+
+ def _fix_encoding_issues(self, text: str) -> str:
+ """Fix common encoding issues in text, especially for Korean"""
+ if not text:
+ return text
+
+ # Check for mojibake indicators (UTF-8 misinterpreted as Latin-1)
+ mojibake_indicators = ['ë', 'ì', 'ê°', 'ã', 'Ã', 'â', 'ä', 'ð', 'í', 'ë', 'ì´']
+
+ if any(indicator in text for indicator in mojibake_indicators):
+ self._log("🔧 Detected mojibake encoding issue, attempting fixes...", "debug")
+
+ # Try multiple encoding fixes
+ encodings_to_try = [
+ ('latin-1', 'utf-8'),
+ ('windows-1252', 'utf-8'),
+ ('iso-8859-1', 'utf-8'),
+ ('cp1252', 'utf-8')
+ ]
+
+ for from_enc, to_enc in encodings_to_try:
+ try:
+ fixed = text.encode(from_enc, errors='ignore').decode(to_enc, errors='ignore')
+
+ # Check if the fix actually improved things
+ # Should have Korean characters (Hangul range) or be cleaner
+ if any('\uAC00' <= c <= '\uD7AF' for c in fixed) or fixed.count('�') < text.count('�'):
+ self._log(f"✅ Fixed encoding using {from_enc} -> {to_enc}", "debug")
+ return fixed
+ except:
+ continue
+
+ # Clean up any remaining control characters and replacement characters
+ import re
+ text = re.sub(r'[\x00-\x08\x0B-\x0C\x0E-\x1F\x7F]', '', text)
+
+ # Additional cleanup for common encoding artifacts
+ # Remove sequences that commonly appear from encoding errors
+ text = re.sub(r'\ufffd+', '', text) # Remove multiple replacement characters
+ text = re.sub(r'[\u25a0-\u25ff]+', '', text) # Remove geometric shapes (common fallbacks)
+
+ # Clean up double spaces and normalize whitespace
+ text = re.sub(r'\s+', ' ', text).strip()
+
+ return text
+
+ def create_text_mask(self, image: np.ndarray, regions: List[TextRegion]) -> np.ndarray:
+ """Create mask with comprehensive per-text-type dilation settings"""
+ mask = np.zeros(image.shape[:2], dtype=np.uint8)
+
+ regions_masked = 0
+ regions_skipped = 0
+
+ self._log(f"🎭 Creating text mask for {len(regions)} regions", "info")
+
+ # Get manga settings
+ manga_settings = self.main_gui.config.get('manga_settings', {})
+
+ # Get dilation settings
+ base_dilation_size = manga_settings.get('mask_dilation', 15)
+
+ # If Auto Iterations is enabled, auto-set dilation by OCR provider and RT-DETR guide status
+ auto_iterations = manga_settings.get('auto_iterations', True)
+ if auto_iterations:
+ try:
+ ocr_settings = manga_settings.get('ocr', {})
+ use_rtdetr_guide = ocr_settings.get('use_rtdetr_for_ocr_regions', True)
+ bubble_detection_enabled = ocr_settings.get('bubble_detection_enabled', False)
+
+ # If RT-DETR guide is enabled for Google/Azure, force dilation to 0
+ if (getattr(self, 'ocr_provider', '').lower() in ('azure', 'google') and
+ bubble_detection_enabled and use_rtdetr_guide):
+ base_dilation_size = 0
+ self._log(f"📏 Auto dilation (RT-DETR guided): 0px (using iterations only)", "info")
+ elif getattr(self, 'ocr_provider', '').lower() in ('azure', 'google'):
+ # CRITICAL: Without RT-DETR, Azure/Google OCR is very conservative
+ # Use base dilation to expand masks to actual bubble size
+ base_dilation_size = 15 # Base expansion for Azure/Google without RT-DETR
+ self._log(f"📏 Auto dilation by provider ({self.ocr_provider}, no RT-DETR): {base_dilation_size}px", "info")
+ else:
+ base_dilation_size = 0
+ self._log(f"📏 Auto dilation by provider ({self.ocr_provider}): {base_dilation_size}px", "info")
+ except Exception:
+ pass
+
+ # Auto iterations: decide by image color vs B&W
+ auto_iterations = manga_settings.get('auto_iterations', True)
+ if auto_iterations:
+ try:
+ # Heuristic: consider image B&W if RGB channels are near-equal
+ if len(image.shape) < 3 or image.shape[2] == 1:
+ is_bw = True
+ else:
+ # Compute mean absolute differences between channels
+ ch0 = image[:, :, 0].astype(np.int16)
+ ch1 = image[:, :, 1].astype(np.int16)
+ ch2 = image[:, :, 2].astype(np.int16)
+ diff01 = np.mean(np.abs(ch0 - ch1))
+ diff12 = np.mean(np.abs(ch1 - ch2))
+ diff02 = np.mean(np.abs(ch0 - ch2))
+ # If channels are essentially the same, treat as B&W
+ is_bw = max(diff01, diff12, diff02) < 2.0
+ if is_bw:
+ text_bubble_iterations = 2
+ empty_bubble_iterations = 2
+ free_text_iterations = 0
+ self._log("📏 Auto iterations (B&W): text=2, empty=2, free=0", "info")
+ else:
+ text_bubble_iterations = 4
+ empty_bubble_iterations = 4
+ free_text_iterations = 4
+ self._log("📏 Auto iterations (Color): all=3", "info")
+ except Exception:
+ # Fallback to configured behavior on any error
+ auto_iterations = False
+
+ if not auto_iterations:
+ # Check if using uniform iterations for all text types
+ use_all_iterations = manga_settings.get('use_all_iterations', False)
+
+ if use_all_iterations:
+ # Use the same iteration count for all text types
+ all_iterations = manga_settings.get('all_iterations', 2)
+ text_bubble_iterations = all_iterations
+ empty_bubble_iterations = all_iterations
+ free_text_iterations = all_iterations
+ self._log(f"📏 Using uniform iterations: {all_iterations} for all text types", "info")
+ else:
+ # Use individual iteration settings
+ text_bubble_iterations = manga_settings.get('text_bubble_dilation_iterations',
+ manga_settings.get('bubble_dilation_iterations', 2))
+ empty_bubble_iterations = manga_settings.get('empty_bubble_dilation_iterations', 3)
+ free_text_iterations = manga_settings.get('free_text_dilation_iterations', 0)
+ self._log(f"📏 Using individual iterations - Text bubbles: {text_bubble_iterations}, "
+ f"Empty bubbles: {empty_bubble_iterations}, Free text: {free_text_iterations}", "info")
+
+ # Create separate masks for different text types
+ text_bubble_mask = np.zeros(image.shape[:2], dtype=np.uint8)
+ empty_bubble_mask = np.zeros(image.shape[:2], dtype=np.uint8)
+ free_text_mask = np.zeros(image.shape[:2], dtype=np.uint8)
+
+ text_bubble_count = 0
+ empty_bubble_count = 0
+ free_text_count = 0
+
+ for i, region in enumerate(regions):
+ # CHECK: Should this region be inpainted?
+ if not getattr(region, 'should_inpaint', True):
+ # Skip this region - it shouldn't be inpainted
+ regions_skipped += 1
+ self._log(f" Region {i+1}: SKIPPED (filtered by settings)", "debug")
+ continue
+
+ regions_masked += 1
+
+ # Determine text type
+ text_type = 'free_text' # default
+
+ # Check if region has bubble_type attribute (from bubble detection)
+ if hasattr(region, 'bubble_type'):
+ # RT-DETR classifications
+ if region.bubble_type == 'empty_bubble':
+ text_type = 'empty_bubble'
+ elif region.bubble_type == 'text_bubble':
+ text_type = 'text_bubble'
+ else: # 'free_text' or others
+ text_type = 'free_text'
+ else:
+ # Fallback: use simple heuristics if no bubble detection
+ x, y, w, h = region.bounding_box
+ x, y, w, h = int(x), int(y), int(w), int(h)
+ aspect_ratio = w / h if h > 0 else 1
+
+ # Check if region has text
+ has_text = hasattr(region, 'text') and region.text and len(region.text.strip()) > 0
+
+ # Heuristic: bubbles tend to be more square-ish or tall
+ # Free text tends to be wide and short
+ if aspect_ratio < 2.5 and w > 50 and h > 50:
+ if has_text:
+ text_type = 'text_bubble'
+ else:
+ # Could be empty bubble if it's round/oval shaped
+ text_type = 'empty_bubble'
+ else:
+ text_type = 'free_text'
+
+ # Select appropriate mask and increment counter
+ if text_type == 'text_bubble':
+ target_mask = text_bubble_mask
+ text_bubble_count += 1
+ mask_type = "TEXT BUBBLE"
+ elif text_type == 'empty_bubble':
+ target_mask = empty_bubble_mask
+ empty_bubble_count += 1
+ mask_type = "EMPTY BUBBLE"
+ else:
+ target_mask = free_text_mask
+ free_text_count += 1
+ mask_type = "FREE TEXT"
+
+ # Check if this is a merged region with original regions
+ if hasattr(region, 'original_regions') and region.original_regions:
+ # Use original regions for precise masking
+ self._log(f" Region {i+1} ({mask_type}): Using {len(region.original_regions)} original regions", "debug")
+
+ for orig_region in region.original_regions:
+ if hasattr(orig_region, 'vertices') and orig_region.vertices:
+ pts = np.array(orig_region.vertices, np.int32)
+ pts = pts.reshape((-1, 1, 2))
+ cv2.fillPoly(target_mask, [pts], 255)
+ else:
+ x, y, w, h = orig_region.bounding_box
+ x, y, w, h = int(x), int(y), int(w), int(h)
+ cv2.rectangle(target_mask, (x, y), (x + w, y + h), 255, -1)
+ else:
+ # Normal region
+ if hasattr(region, 'vertices') and region.vertices and len(region.vertices) <= 8:
+ pts = np.array(region.vertices, np.int32)
+ pts = pts.reshape((-1, 1, 2))
+ cv2.fillPoly(target_mask, [pts], 255)
+ self._log(f" Region {i+1} ({mask_type}): Using polygon", "debug")
+ else:
+ x, y, w, h = region.bounding_box
+ x, y, w, h = int(x), int(y), int(w), int(h)
+ cv2.rectangle(target_mask, (x, y), (x + w, y + h), 255, -1)
+ self._log(f" Region {i+1} ({mask_type}): Using bounding box", "debug")
+
+ self._log(f"📊 Mask breakdown: {text_bubble_count} text bubbles, {empty_bubble_count} empty bubbles, "
+ f"{free_text_count} free text regions, {regions_skipped} skipped", "info")
+
+ # Apply different dilation settings to each mask type
+ if base_dilation_size > 0:
+ kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (base_dilation_size, base_dilation_size))
+
+ # Apply dilation to text bubble mask
+ if text_bubble_count > 0 and text_bubble_iterations > 0:
+ self._log(f"📏 Applying text bubble dilation: {base_dilation_size}px, {text_bubble_iterations} iterations", "info")
+ text_bubble_mask = cv2.dilate(text_bubble_mask, kernel, iterations=text_bubble_iterations)
+
+ # Apply dilation to empty bubble mask
+ if empty_bubble_count > 0 and empty_bubble_iterations > 0:
+ self._log(f"📏 Applying empty bubble dilation: {base_dilation_size}px, {empty_bubble_iterations} iterations", "info")
+ empty_bubble_mask = cv2.dilate(empty_bubble_mask, kernel, iterations=empty_bubble_iterations)
+
+ # Apply dilation to free text mask
+ if free_text_count > 0 and free_text_iterations > 0:
+ self._log(f"📏 Applying free text dilation: {base_dilation_size}px, {free_text_iterations} iterations", "info")
+ free_text_mask = cv2.dilate(free_text_mask, kernel, iterations=free_text_iterations)
+ elif free_text_count > 0 and free_text_iterations == 0:
+ self._log(f"📏 No dilation for free text (iterations=0, perfect for B&W panels)", "info")
+
+ # Combine all masks
+ mask = cv2.bitwise_or(text_bubble_mask, empty_bubble_mask)
+ mask = cv2.bitwise_or(mask, free_text_mask)
+
+ coverage_percent = (np.sum(mask > 0) / mask.size) * 100
+ self._log(f"📊 Final mask coverage: {coverage_percent:.1f}% of image", "info")
+
+ return mask
+
+ def _get_or_init_shared_local_inpainter(self, local_method: str, model_path: str, force_reload: bool = False):
+ """Return a shared LocalInpainter for (local_method, model_path) with minimal locking.
+ If another thread is loading the same model, wait on its event instead of competing.
+ Set force_reload=True only when the method or model_path actually changed.
+
+ If spare instances are available in the pool, check one out for use.
+ The instance will stay assigned to this translator until cleanup.
+ """
+ from local_inpainter import LocalInpainter
+ key = (local_method, model_path or '')
+
+ # FIRST: Try to check out a spare instance if available (for true parallelism)
+ # Don't pop it - instead mark it as 'in use' so it stays in memory
+ with MangaTranslator._inpaint_pool_lock:
+ rec = MangaTranslator._inpaint_pool.get(key)
+ if rec and rec.get('spares'):
+ spares = rec.get('spares') or []
+ # Initialize checked_out list if it doesn't exist
+ if 'checked_out' not in rec:
+ rec['checked_out'] = []
+ checked_out = rec['checked_out']
+
+ # Look for an available spare (not checked out)
+ for spare in spares:
+ if spare not in checked_out and spare and getattr(spare, 'model_loaded', False):
+ # Mark as checked out
+ checked_out.append(spare)
+ self._log(f"🧰 Checked out spare inpainter ({len(checked_out)}/{len(spares)} in use)", "debug")
+ # Store reference for later return
+ self._checked_out_inpainter = spare
+ self._inpainter_pool_key = key
+ return spare
+
+ # FALLBACK: Use the shared instance
+ rec = MangaTranslator._inpaint_pool.get(key)
+ if rec and rec.get('loaded') and rec.get('inpainter'):
+ # Already loaded - do NOT force reload!
+ return rec['inpainter']
+ # Create or wait for loader
+ with MangaTranslator._inpaint_pool_lock:
+ rec = MangaTranslator._inpaint_pool.get(key)
+ if rec and rec.get('loaded') and rec.get('inpainter'):
+ # Already loaded - do NOT force reload!
+ return rec['inpainter']
+ if not rec:
+ # Register loading record
+ rec = {'inpainter': None, 'loaded': False, 'event': threading.Event()}
+ MangaTranslator._inpaint_pool[key] = rec
+ is_loader = True
+ else:
+ is_loader = False
+ event = rec['event']
+ # Loader performs heavy work without holding the lock
+ if is_loader:
+ try:
+ inp = LocalInpainter()
+ # Apply tiling settings once to the shared instance
+ tiling_settings = self.manga_settings.get('tiling', {})
+ inp.tiling_enabled = tiling_settings.get('enabled', False)
+ inp.tile_size = tiling_settings.get('tile_size', 512)
+ inp.tile_overlap = tiling_settings.get('tile_overlap', 64)
+ # Ensure model path
+ if not model_path or not os.path.exists(model_path):
+ try:
+ model_path = inp.download_jit_model(local_method)
+ except Exception as e:
+ self._log(f"⚠️ JIT download failed: {e}", "warning")
+ model_path = None
+ # Load model - NEVER force reload for first-time shared pool loading
+ loaded_ok = False
+ if model_path and os.path.exists(model_path):
+ try:
+ self._log(f"📦 Loading inpainter model...", "debug")
+ self._log(f" Method: {local_method}", "debug")
+ self._log(f" Path: {model_path}", "debug")
+ # Only force reload if explicitly requested AND this is not the first load
+ # For shared pool, we should never force reload on initial load
+ loaded_ok = inp.load_model_with_retry(local_method, model_path, force_reload=force_reload)
+ if not loaded_ok:
+ # Retry with force_reload if initial load failed
+ self._log(f"🔄 Initial load failed, retrying with force_reload=True", "warning")
+ loaded_ok = inp.load_model_with_retry(local_method, model_path, force_reload=True)
+ if not loaded_ok:
+ self._log(f"❌ Both load attempts failed", "error")
+ # Check file validity
+ try:
+ size_mb = os.path.getsize(model_path) / (1024 * 1024)
+ self._log(f" File size: {size_mb:.2f} MB", "info")
+ if size_mb < 1:
+ self._log(f" ⚠️ File may be corrupted (too small)", "warning")
+ except Exception:
+ self._log(f" ⚠️ Could not read model file", "warning")
+ except Exception as e:
+ self._log(f"⚠️ Inpainter load exception: {e}", "warning")
+ import traceback
+ self._log(traceback.format_exc(), "debug")
+ loaded_ok = False
+ elif not model_path:
+ self._log(f"⚠️ No model path configured for {local_method}", "warning")
+ elif not os.path.exists(model_path):
+ self._log(f"⚠️ Model file does not exist: {model_path}", "warning")
+ # Publish result
+ with MangaTranslator._inpaint_pool_lock:
+ rec = MangaTranslator._inpaint_pool.get(key) or rec
+ rec['inpainter'] = inp
+ rec['loaded'] = bool(loaded_ok)
+ rec['event'].set()
+ return inp
+ except Exception as e:
+ with MangaTranslator._inpaint_pool_lock:
+ rec = MangaTranslator._inpaint_pool.get(key) or rec
+ rec['inpainter'] = None
+ rec['loaded'] = False
+ rec['event'].set()
+ self._log(f"⚠️ Shared inpainter setup failed: {e}", "warning")
+ return None
+ else:
+ # Wait for loader to finish (without holding the lock)
+ success = event.wait(timeout=120)
+ if not success:
+ self._log(f"⏱️ Timeout waiting for inpainter to load (120s)", "warning")
+ return None
+
+ # Check if load was successful
+ rec2 = MangaTranslator._inpaint_pool.get(key)
+ if not rec2:
+ self._log(f"⚠️ Inpainter pool record disappeared after load", "warning")
+ return None
+
+ inp = rec2.get('inpainter')
+ loaded = rec2.get('loaded', False)
+
+ if inp and loaded:
+ # Successfully loaded by another thread
+ return inp
+ elif inp and not loaded:
+ # Inpainter created but model failed to load
+ # Try to load it ourselves
+ self._log(f"⚠️ Inpainter exists but model not loaded, attempting to load", "debug")
+ if model_path and os.path.exists(model_path):
+ try:
+ loaded_ok = inp.load_model_with_retry(local_method, model_path, force_reload=True)
+ if loaded_ok:
+ # Update the pool record
+ with MangaTranslator._inpaint_pool_lock:
+ rec2['loaded'] = True
+ self._log(f"✅ Successfully loaded model on retry in waiting thread", "info")
+ return inp
+ except Exception as e:
+ self._log(f"❌ Failed to load in waiting thread: {e}", "warning")
+ return inp # Return anyway, inpaint will no-op
+ else:
+ self._log(f"⚠️ Loader thread failed to create inpainter", "warning")
+ return None
+
+ @classmethod
+ def _count_preloaded_inpainters(cls) -> int:
+ try:
+ with cls._inpaint_pool_lock:
+ total = 0
+ for rec in cls._inpaint_pool.values():
+ try:
+ total += len(rec.get('spares') or [])
+ except Exception:
+ pass
+ return total
+ except Exception:
+ return 0
+
+ def preload_local_inpainters(self, local_method: str, model_path: str, count: int) -> int:
+ """Preload N local inpainting instances sequentially into the shared pool for parallel panel translation.
+ Returns the number of instances successfully preloaded.
+ """
+ # Respect singleton mode: do not create extra instances/spares
+ if getattr(self, 'use_singleton_models', False):
+ try:
+ self._log("🧰 Skipping local inpainting preload (singleton mode)", "debug")
+ except Exception:
+ pass
+ return 0
+ try:
+ from local_inpainter import LocalInpainter
+ except Exception:
+ self._log("❌ Local inpainter module not available for preloading", "error")
+ return 0
+ key = (local_method, model_path or '')
+ created = 0
+
+ # FIRST: Ensure the shared instance is initialized and ready
+ # This prevents race conditions when spare instances run out
+ with MangaTranslator._inpaint_pool_lock:
+ rec = MangaTranslator._inpaint_pool.get(key)
+ if not rec or not rec.get('loaded') or not rec.get('inpainter'):
+ # Need to create the shared instance
+ if not rec:
+ rec = {'inpainter': None, 'loaded': False, 'event': threading.Event(), 'spares': []}
+ MangaTranslator._inpaint_pool[key] = rec
+ need_init_shared = True
+ else:
+ need_init_shared = not (rec.get('loaded') and rec.get('inpainter'))
+ else:
+ need_init_shared = False
+
+ if need_init_shared:
+ self._log(f"📦 Initializing shared inpainter instance first...", "info")
+ try:
+ shared_inp = self._get_or_init_shared_local_inpainter(local_method, model_path, force_reload=False)
+ if shared_inp and getattr(shared_inp, 'model_loaded', False):
+ self._log(f"✅ Shared instance initialized and model loaded", "info")
+ # Verify the pool record is updated
+ with MangaTranslator._inpaint_pool_lock:
+ rec_check = MangaTranslator._inpaint_pool.get(key)
+ if rec_check:
+ self._log(f" Pool record: loaded={rec_check.get('loaded')}, has_inpainter={rec_check.get('inpainter') is not None}", "debug")
+ else:
+ self._log(f"⚠️ Shared instance initialization returned but model not loaded", "warning")
+ if shared_inp:
+ self._log(f" Instance exists but model_loaded={getattr(shared_inp, 'model_loaded', 'ATTR_MISSING')}", "debug")
+ except Exception as e:
+ self._log(f"⚠️ Shared instance initialization failed: {e}", "warning")
+ import traceback
+ self._log(traceback.format_exc(), "debug")
+
+ # Ensure pool record and spares list exist
+ with MangaTranslator._inpaint_pool_lock:
+ rec = MangaTranslator._inpaint_pool.get(key)
+ if not rec:
+ rec = {'inpainter': None, 'loaded': False, 'event': threading.Event(), 'spares': []}
+ MangaTranslator._inpaint_pool[key] = rec
+ if 'spares' not in rec or rec['spares'] is None:
+ rec['spares'] = []
+ spares = rec.get('spares')
+ # Prepare tiling settings
+ tiling_settings = self.manga_settings.get('tiling', {}) if hasattr(self, 'manga_settings') else {}
+ desired = max(0, int(count) - len(spares))
+ if desired <= 0:
+ return 0
+ ctx = " for parallel panels" if int(count) > 1 else ""
+ self._log(f"🧰 Preloading {desired} local inpainting instance(s){ctx}", "info")
+ for i in range(desired):
+ try:
+ inp = LocalInpainter()
+ inp.tiling_enabled = tiling_settings.get('enabled', False)
+ inp.tile_size = tiling_settings.get('tile_size', 512)
+ inp.tile_overlap = tiling_settings.get('tile_overlap', 64)
+ # Resolve model path if needed
+ resolved = model_path
+ if not resolved or not os.path.exists(resolved):
+ try:
+ resolved = inp.download_jit_model(local_method)
+ except Exception as e:
+ self._log(f"⚠️ Preload JIT download failed: {e}", "warning")
+ resolved = None
+ if resolved and os.path.exists(resolved):
+ ok = inp.load_model_with_retry(local_method, resolved, force_reload=False)
+ if ok and getattr(inp, 'model_loaded', False):
+ with MangaTranslator._inpaint_pool_lock:
+ rec = MangaTranslator._inpaint_pool.get(key) or {'spares': []}
+ if 'spares' not in rec or rec['spares'] is None:
+ rec['spares'] = []
+ rec['spares'].append(inp)
+ MangaTranslator._inpaint_pool[key] = rec
+ created += 1
+ elif ok and not getattr(inp, 'model_loaded', False):
+ self._log(f"⚠️ Preload: load_model_with_retry returned True but model_loaded is False", "warning")
+ elif not ok:
+ self._log(f"⚠️ Preload: load_model_with_retry returned False", "warning")
+ else:
+ self._log("⚠️ Preload skipped: no model path available", "warning")
+ except Exception as e:
+ self._log(f"⚠️ Preload error: {e}", "warning")
+ self._log(f"✅ Preloaded {created} local inpainting instance(s)", "info")
+ return created
+
+ def preload_local_inpainters_concurrent(self, local_method: str, model_path: str, count: int, max_parallel: int = None) -> int:
+ """Preload N local inpainting instances concurrently into the shared pool.
+ Honors advanced toggles for panel/region parallelism to pick a reasonable parallelism.
+ Returns number of instances successfully preloaded.
+ """
+ # Respect singleton mode: do not create extra instances/spares
+ if getattr(self, 'use_singleton_models', False):
+ try:
+ self._log("🧰 Skipping concurrent local inpainting preload (singleton mode)", "debug")
+ except Exception:
+ pass
+ return 0
+ try:
+ from local_inpainter import LocalInpainter
+ except Exception:
+ self._log("❌ Local inpainter module not available for preloading", "error")
+ return 0
+ key = (local_method, model_path or '')
+ # Determine desired number based on existing spares
+ with MangaTranslator._inpaint_pool_lock:
+ rec = MangaTranslator._inpaint_pool.get(key)
+ if not rec:
+ rec = {'inpainter': None, 'loaded': False, 'event': threading.Event(), 'spares': []}
+ MangaTranslator._inpaint_pool[key] = rec
+ spares = (rec.get('spares') or [])
+ desired = max(0, int(count) - len(spares))
+ if desired <= 0:
+ return 0
+ # Determine max_parallel from advanced settings if not provided
+ if max_parallel is None:
+ adv = {}
+ try:
+ adv = self.main_gui.config.get('manga_settings', {}).get('advanced', {}) if hasattr(self, 'main_gui') else {}
+ except Exception:
+ adv = {}
+ if adv.get('parallel_panel_translation', False):
+ try:
+ max_parallel = max(1, int(adv.get('panel_max_workers', 2)))
+ except Exception:
+ max_parallel = 2
+ elif adv.get('parallel_processing', False):
+ try:
+ max_parallel = max(1, int(adv.get('max_workers', 4)))
+ except Exception:
+ max_parallel = 2
+ else:
+ max_parallel = 1
+ max_parallel = max(1, min(int(max_parallel), int(desired)))
+ ctx = " for parallel panels" if int(count) > 1 else ""
+ self._log(f"🧰 Preloading {desired} local inpainting instance(s){ctx} (parallel={max_parallel})", "info")
+ # Resolve model path once
+ resolved_path = model_path
+ if not resolved_path or not os.path.exists(resolved_path):
+ try:
+ probe_inp = LocalInpainter()
+ resolved_path = probe_inp.download_jit_model(local_method)
+ except Exception as e:
+ self._log(f"⚠️ JIT download failed for concurrent preload: {e}", "warning")
+ resolved_path = None
+ tiling_settings = self.manga_settings.get('tiling', {}) if hasattr(self, 'manga_settings') else {}
+ from concurrent.futures import ThreadPoolExecutor, as_completed
+ created = 0
+ def _one():
+ try:
+ inp = LocalInpainter()
+ inp.tiling_enabled = tiling_settings.get('enabled', False)
+ inp.tile_size = tiling_settings.get('tile_size', 512)
+ inp.tile_overlap = tiling_settings.get('tile_overlap', 64)
+ if resolved_path and os.path.exists(resolved_path):
+ ok = inp.load_model_with_retry(local_method, resolved_path, force_reload=False)
+ if ok and getattr(inp, 'model_loaded', False):
+ with MangaTranslator._inpaint_pool_lock:
+ rec2 = MangaTranslator._inpaint_pool.get(key) or {'spares': []}
+ if 'spares' not in rec2 or rec2['spares'] is None:
+ rec2['spares'] = []
+ rec2['spares'].append(inp)
+ MangaTranslator._inpaint_pool[key] = rec2
+ return True
+ except Exception as e:
+ self._log(f"⚠️ Concurrent preload error: {e}", "warning")
+ return False
+ with ThreadPoolExecutor(max_workers=max_parallel) as ex:
+ futs = [ex.submit(_one) for _ in range(desired)]
+ for f in as_completed(futs):
+ try:
+ if f.result():
+ created += 1
+ except Exception:
+ pass
+ self._log(f"✅ Preloaded {created} local inpainting instance(s)", "info")
+ return created
+ return created
+
+ @classmethod
+ def _count_preloaded_detectors(cls) -> int:
+ try:
+ with cls._detector_pool_lock:
+ return sum(len((rec or {}).get('spares') or []) for rec in cls._detector_pool.values())
+ except Exception:
+ return 0
+
+ @classmethod
+ def get_preload_counters(cls) -> Dict[str, int]:
+ """Return current counters for preloaded instances (for diagnostics/logging)."""
+ try:
+ with cls._inpaint_pool_lock:
+ inpaint_spares = sum(len((rec or {}).get('spares') or []) for rec in cls._inpaint_pool.values())
+ inpaint_keys = len(cls._inpaint_pool)
+ with cls._detector_pool_lock:
+ detector_spares = sum(len((rec or {}).get('spares') or []) for rec in cls._detector_pool.values())
+ detector_keys = len(cls._detector_pool)
+ return {
+ 'inpaint_spares': inpaint_spares,
+ 'inpaint_keys': inpaint_keys,
+ 'detector_spares': detector_spares,
+ 'detector_keys': detector_keys,
+ }
+ except Exception:
+ return {'inpaint_spares': 0, 'inpaint_keys': 0, 'detector_spares': 0, 'detector_keys': 0}
+
+ def preload_bubble_detectors(self, ocr_settings: Dict[str, Any], count: int) -> int:
+ """Preload N bubble detector instances (non-singleton) for panel parallelism.
+ Only applies when not using singleton models.
+ """
+ try:
+ from bubble_detector import BubbleDetector
+ except Exception:
+ self._log("❌ BubbleDetector module not available for preloading", "error")
+ return 0
+ # Skip if singleton mode
+ if getattr(self, 'use_singleton_models', False):
+ return 0
+ det_type = (ocr_settings or {}).get('detector_type', 'rtdetr_onnx')
+ model_id = (ocr_settings or {}).get('rtdetr_model_url') or (ocr_settings or {}).get('bubble_model_path') or ''
+ key = (det_type, model_id)
+ created = 0
+ with MangaTranslator._detector_pool_lock:
+ rec = MangaTranslator._detector_pool.get(key)
+ if not rec:
+ rec = {'spares': []}
+ MangaTranslator._detector_pool[key] = rec
+ spares = rec.get('spares')
+ if spares is None:
+ spares = []
+ rec['spares'] = spares
+ desired = max(0, int(count) - len(spares))
+ if desired <= 0:
+ return 0
+ self._log(f"🧰 Preloading {desired} bubble detector instance(s) [{det_type}]", "info")
+ for i in range(desired):
+ try:
+ bd = BubbleDetector()
+ ok = False
+ if det_type == 'rtdetr_onnx':
+ ok = bool(bd.load_rtdetr_onnx_model(model_id=model_id))
+ elif det_type == 'rtdetr':
+ ok = bool(bd.load_rtdetr_model(model_id=model_id))
+ elif det_type == 'yolo':
+ if model_id:
+ ok = bool(bd.load_model(model_id))
+ else:
+ # auto: prefer RT-DETR
+ ok = bool(bd.load_rtdetr_model(model_id=model_id))
+ if ok:
+ with MangaTranslator._detector_pool_lock:
+ rec = MangaTranslator._detector_pool.get(key) or {'spares': []}
+ if 'spares' not in rec or rec['spares'] is None:
+ rec['spares'] = []
+ rec['spares'].append(bd)
+ MangaTranslator._detector_pool[key] = rec
+ created += 1
+ except Exception as e:
+ self._log(f"⚠️ Bubble detector preload error: {e}", "warning")
+ self._log(f"✅ Preloaded {created} bubble detector instance(s)", "info")
+ return created
+
+ def _initialize_local_inpainter(self):
+ """Initialize local inpainting if configured"""
+ try:
+ from local_inpainter import LocalInpainter, HybridInpainter, AnimeMangaInpaintModel
+
+ # LOAD THE SETTINGS FROM CONFIG FIRST
+ # The dialog saves it as 'manga_local_inpaint_model' at root level
+ saved_local_method = self.main_gui.config.get('manga_local_inpaint_model', 'anime')
+ saved_inpaint_method = self.main_gui.config.get('manga_inpaint_method', 'cloud')
+
+ # MIGRATION: Ensure manga_ prefixed model path keys exist for ONNX methods
+ # This fixes compatibility where model paths were saved without manga_ prefix
+ for method_variant in ['anime', 'anime_onnx', 'lama', 'lama_onnx', 'aot', 'aot_onnx']:
+ non_prefixed_key = f'{method_variant}_model_path'
+ prefixed_key = f'manga_{method_variant}_model_path'
+ # If we have the non-prefixed but not the prefixed, migrate it
+ if non_prefixed_key in self.main_gui.config and prefixed_key not in self.main_gui.config:
+ self.main_gui.config[prefixed_key] = self.main_gui.config[non_prefixed_key]
+ self._log(f"🔄 Migrated model path config: {non_prefixed_key} → {prefixed_key}", "debug")
+
+ # Update manga_settings with the saved values
+ # ALWAYS use the top-level saved config to ensure correct model is loaded
+ if 'inpainting' not in self.manga_settings:
+ self.manga_settings['inpainting'] = {}
+
+ # Always override with saved values from top-level config
+ # This ensures the user's model selection in the settings dialog is respected
+ self.manga_settings['inpainting']['method'] = saved_inpaint_method
+ self.manga_settings['inpainting']['local_method'] = saved_local_method
+
+ # Now get the values (they'll be correct now)
+ inpaint_method = self.manga_settings.get('inpainting', {}).get('method', 'cloud')
+
+ if inpaint_method == 'local':
+ # This will now get the correct saved value
+ local_method = self.manga_settings.get('inpainting', {}).get('local_method', 'anime')
+
+ # Model path is saved with manga_ prefix - try both key formats for compatibility
+ model_path = self.main_gui.config.get(f'manga_{local_method}_model_path', '')
+ if not model_path:
+ # Fallback to non-prefixed key (older format)
+ model_path = self.main_gui.config.get(f'{local_method}_model_path', '')
+
+ self._log(f"Using local method: {local_method} (loaded from config)", "info")
+
+ # Check if we already have a loaded instance in the shared pool
+ # This avoids unnecessary tracking and reloading
+ inp_shared = self._get_or_init_shared_local_inpainter(local_method, model_path, force_reload=False)
+
+ # Only track changes AFTER getting the shared instance
+ # This prevents spurious reloads on first initialization
+ if not hasattr(self, '_last_local_method'):
+ self._last_local_method = local_method
+ self._last_local_model_path = model_path
+ else:
+ # Check if settings actually changed and we need to force reload
+ need_reload = False
+ if self._last_local_method != local_method:
+ self._log(f"🔄 Local method changed from {self._last_local_method} to {local_method}", "info")
+ need_reload = True
+ # If method changed, we need a different model - get it with force_reload
+ inp_shared = self._get_or_init_shared_local_inpainter(local_method, model_path, force_reload=True)
+ elif self._last_local_model_path != model_path:
+ self._log(f"🔄 Model path changed", "info")
+ if self._last_local_model_path:
+ self._log(f" Old: {os.path.basename(self._last_local_model_path)}", "debug")
+ if model_path:
+ self._log(f" New: {os.path.basename(model_path)}", "debug")
+ need_reload = True
+ # If path changed, reload the model
+ inp_shared = self._get_or_init_shared_local_inpainter(local_method, model_path, force_reload=True)
+
+ # Update tracking only if changes were made
+ if need_reload:
+ self._last_local_method = local_method
+ self._last_local_model_path = model_path
+ if inp_shared is not None:
+ self.local_inpainter = inp_shared
+ if getattr(self.local_inpainter, 'model_loaded', False):
+ self._log(f"✅ Using shared {local_method.upper()} inpainting model", "info")
+ return True
+ else:
+ self._log(f"⚠️ Shared inpainter created but model not loaded", "warning")
+ self._log(f"🔄 Attempting to retry model loading...", "info")
+
+ # Retry loading the model
+ if model_path and os.path.exists(model_path):
+ self._log(f"📦 Model path: {model_path}", "info")
+ self._log(f"📋 Method: {local_method}", "info")
+ try:
+ loaded_ok = inp_shared.load_model_with_retry(local_method, model_path, force_reload=True)
+ if loaded_ok and getattr(inp_shared, 'model_loaded', False):
+ self._log(f"✅ Model loaded successfully on retry", "info")
+ return True
+ else:
+ self._log(f"❌ Model still not loaded after retry", "error")
+ # Check if model file exists and is valid
+ try:
+ size_mb = os.path.getsize(model_path) / (1024 * 1024)
+ self._log(f"📊 Model file size: {size_mb:.2f} MB", "info")
+ if size_mb < 1:
+ self._log(f"⚠️ Model file seems too small (< 1 MB) - may be corrupted", "warning")
+ except Exception:
+ pass
+ except Exception as e:
+ self._log(f"❌ Retry load failed: {e}", "error")
+ import traceback
+ self._log(traceback.format_exc(), "debug")
+ elif not model_path:
+ self._log(f"❌ No model path provided", "error")
+ elif not os.path.exists(model_path):
+ self._log(f"❌ Model path does not exist: {model_path}", "error")
+ self._log(f"📥 Tip: Try downloading the model from the Manga Settings dialog", "info")
+
+ # If retry failed, fall through to fallback logic below
+
+ # Fall back to instance-level init only if shared init completely failed
+ self._log("⚠️ Shared inpainter init failed, falling back to instance creation", "warning")
+ try:
+ from local_inpainter import LocalInpainter
+
+ # Create local inpainter instance
+ self.local_inpainter = LocalInpainter()
+ tiling_settings = self.manga_settings.get('tiling', {})
+ self.local_inpainter.tiling_enabled = tiling_settings.get('enabled', False)
+ self.local_inpainter.tile_size = tiling_settings.get('tile_size', 512)
+ self.local_inpainter.tile_overlap = tiling_settings.get('tile_overlap', 64)
+ self._log(f"✅ Set tiling: enabled={self.local_inpainter.tiling_enabled}, size={self.local_inpainter.tile_size}, overlap={self.local_inpainter.tile_overlap}", "info")
+
+ # If no model path or doesn't exist, try to find or download one
+ if not model_path or not os.path.exists(model_path):
+ self._log(f"⚠️ Model path not found: {model_path}", "warning")
+ self._log("📥 Attempting to download JIT model...", "info")
+ try:
+ downloaded_path = self.local_inpainter.download_jit_model(local_method)
+ except Exception as e:
+ self._log(f"⚠️ JIT download failed: {e}", "warning")
+ downloaded_path = None
+ if downloaded_path:
+ model_path = downloaded_path
+ self._log(f"✅ Downloaded JIT model to: {model_path}")
+ else:
+ self._log("⚠️ JIT model download did not return a path", "warning")
+
+ # Load model with retry to avoid transient file/JSON issues under parallel init
+ loaded_ok = False
+ if model_path and os.path.exists(model_path):
+ for attempt in range(2):
+ try:
+ self._log(f"📥 Loading {local_method} model... (attempt {attempt+1})", "info")
+ if self.local_inpainter.load_model(local_method, model_path, force_reload=need_reload):
+ loaded_ok = True
+ break
+ except Exception as e:
+ self._log(f"⚠️ Load attempt {attempt+1} failed: {e}", "warning")
+ time.sleep(0.5)
+ if loaded_ok:
+ self._log(f"✅ Local inpainter loaded with {local_method.upper()} (fallback instance)")
+ else:
+ self._log(f"⚠️ Failed to load model, but inpainter is ready", "warning")
+ else:
+ self._log(f"⚠️ No model available, but inpainter is initialized", "warning")
+
+ return True
+
+ except Exception as e:
+ self._log(f"❌ Local inpainter module not available: {e}", "error")
+ return False
+
+ elif inpaint_method == 'hybrid':
+ # Track hybrid settings changes
+ if not hasattr(self, '_last_hybrid_config'):
+ self._last_hybrid_config = None
+
+ # Set tiling from tiling section
+ tiling_settings = self.manga_settings.get('tiling', {})
+ self.local_inpainter.tiling_enabled = tiling_settings.get('enabled', False)
+ self.local_inpainter.tile_size = tiling_settings.get('tile_size', 512)
+ self.local_inpainter.tile_overlap = tiling_settings.get('tile_overlap', 64)
+
+ self._log(f"✅ Set tiling: enabled={self.local_inpainter.tiling_enabled}, size={self.local_inpainter.tile_size}, overlap={self.local_inpainter.tile_overlap}", "info")
+
+ current_hybrid_config = self.manga_settings.get('inpainting', {}).get('hybrid_methods', [])
+
+ # Check if hybrid config changed
+ need_reload = self._last_hybrid_config != current_hybrid_config
+ if need_reload:
+ self._log("🔄 Hybrid configuration changed, reloading...", "info")
+ self.hybrid_inpainter = None # Clear old instance
+
+ self._last_hybrid_config = current_hybrid_config.copy() if current_hybrid_config else []
+
+ if self.hybrid_inpainter is None:
+ self.hybrid_inpainter = HybridInpainter()
+ # REMOVED: No longer override tiling settings for HybridInpainter
+
+ # Load multiple methods
+ methods = self.manga_settings.get('inpainting', {}).get('hybrid_methods', [])
+ loaded = 0
+
+ for method_config in methods:
+ method = method_config.get('method')
+ model_path = method_config.get('model_path')
+
+ if method and model_path:
+ if self.hybrid_inpainter.add_method(method, method, model_path):
+ loaded += 1
+ self._log(f"✅ Added {method.upper()} to hybrid inpainter")
+
+ if loaded > 0:
+ self._log(f"✅ Hybrid inpainter ready with {loaded} methods")
+ else:
+ self._log("⚠️ Hybrid inpainter initialized but no methods loaded", "warning")
+
+ return True
+
+ return False
+
+ except ImportError:
+ self._log("❌ Local inpainter module not available", "error")
+ return False
+ except Exception as e:
+ self._log(f"❌ Error initializing inpainter: {e}", "error")
+ return False
+
+
+ def inpaint_regions(self, image: np.ndarray, mask: np.ndarray) -> np.ndarray:
+ """Inpaint using configured method (cloud, local, or hybrid)"""
+ # Primary source of truth is the runtime flags set by the UI.
+ if getattr(self, 'skip_inpainting', False):
+ self._log(" ⏭️ Skipping inpainting (preserving original art)", "info")
+ return image.copy()
+
+ # Cloud mode explicitly selected in UI
+ if getattr(self, 'use_cloud_inpainting', False):
+ return self._cloud_inpaint(image, mask)
+
+ # Hybrid mode if UI requested it (fallback to settings key if present)
+ mode = getattr(self, 'inpaint_mode', None) or self.manga_settings.get('inpainting', {}).get('method')
+ if mode == 'hybrid' and hasattr(self, 'hybrid_inpainter'):
+ self._log(" 🔄 Using hybrid ensemble inpainting", "info")
+ return self.hybrid_inpainter.inpaint_ensemble(image, mask)
+
+ # If a background preload is running, wait until it's finished before inpainting
+ try:
+ if hasattr(self, '_inpaint_preload_event') and self._inpaint_preload_event and not self._inpaint_preload_event.is_set():
+ self._log(" ⏳ Waiting for local inpainting models to finish preloading...", "info")
+ # Wait with a generous timeout, but proceed afterward regardless
+ self._inpaint_preload_event.wait(timeout=300)
+ except Exception:
+ pass
+
+ # Default to local inpainting
+ local_method = self.manga_settings.get('inpainting', {}).get('local_method', 'anime')
+ model_path = self.main_gui.config.get(f'manga_{local_method}_model_path', '')
+
+ # Use a thread-local inpainter instance
+ inp = self._get_thread_local_inpainter(local_method, model_path)
+ if inp and getattr(inp, 'model_loaded', False):
+ self._log(" 🧽 Using local inpainting", "info")
+ return inp.inpaint(image, mask)
+ else:
+ # Conservative fallback: try shared instance only; do not attempt risky reloads that can corrupt output
+ try:
+ shared_inp = self._get_or_init_shared_local_inpainter(local_method, model_path)
+ if shared_inp and getattr(shared_inp, 'model_loaded', False):
+ self._log(" ✅ Using shared inpainting instance", "info")
+ return shared_inp.inpaint(image, mask)
+ except Exception:
+ pass
+
+ # RETRY LOGIC: Attempt to reload model with multiple strategies
+ self._log(" ⚠️ Local inpainting model not loaded; attempting retry...", "warning")
+
+ retry_attempts = [
+ {'force_reload': True, 'desc': 'force reload'},
+ {'force_reload': True, 'desc': 'force reload with delay', 'delay': 1.0},
+ {'force_reload': False, 'desc': 'standard reload'},
+ ]
+
+ for attempt_num, retry_config in enumerate(retry_attempts, 1):
+ try:
+ self._log(f" 🔄 Retry attempt {attempt_num}/{len(retry_attempts)}: {retry_config['desc']}", "info")
+
+ # Apply delay if specified
+ if retry_config.get('delay'):
+ import time
+ time.sleep(retry_config['delay'])
+
+ # Try to get or create a fresh inpainter instance
+ retry_inp = self._get_or_init_shared_local_inpainter(
+ local_method,
+ model_path,
+ force_reload=retry_config['force_reload']
+ )
+
+ if retry_inp:
+ # Check if model is loaded
+ if getattr(retry_inp, 'model_loaded', False):
+ self._log(f" ✅ Model loaded successfully on retry attempt {attempt_num}", "info")
+ return retry_inp.inpaint(image, mask)
+ else:
+ # Model exists but not loaded - try loading it directly
+ self._log(f" 🔧 Model not loaded, attempting direct load...", "info")
+ if model_path and os.path.exists(model_path):
+ try:
+ loaded_ok = retry_inp.load_model_with_retry(
+ local_method,
+ model_path,
+ force_reload=True
+ )
+ if loaded_ok and getattr(retry_inp, 'model_loaded', False):
+ self._log(f" ✅ Direct load successful on attempt {attempt_num}", "info")
+ return retry_inp.inpaint(image, mask)
+ else:
+ self._log(f" ⚠️ Direct load returned {loaded_ok}, model_loaded={getattr(retry_inp, 'model_loaded', False)}", "warning")
+ except Exception as load_err:
+ self._log(f" ⚠️ Direct load failed: {load_err}", "warning")
+ else:
+ if not model_path:
+ self._log(f" ⚠️ No model path configured", "warning")
+ elif not os.path.exists(model_path):
+ self._log(f" ⚠️ Model file does not exist: {model_path}", "warning")
+ else:
+ self._log(f" ⚠️ Failed to get inpainter instance on attempt {attempt_num}", "warning")
+
+ except Exception as retry_err:
+ self._log(f" ⚠️ Retry attempt {attempt_num} failed: {retry_err}", "warning")
+ import traceback
+ self._log(traceback.format_exc(), "debug")
+
+ # All retries exhausted - provide detailed diagnostic information
+ self._log(" ❌ All retry attempts exhausted. Diagnostics:", "error")
+ self._log(f" Method: {local_method}", "error")
+ if model_path:
+ self._log(f" Model path: {model_path}", "error")
+ if os.path.exists(model_path):
+ try:
+ size_mb = os.path.getsize(model_path) / (1024 * 1024)
+ self._log(f" File size: {size_mb:.2f} MB", "error")
+ if size_mb < 1:
+ self._log(f" ⚠️ File may be corrupted (too small)", "error")
+ except Exception:
+ self._log(f" ⚠️ Cannot read model file", "error")
+ else:
+ self._log(f" ⚠️ Model file does not exist", "error")
+ else:
+ self._log(f" ⚠️ No model path configured", "error")
+
+ self._log(" 💡 Suggestion: Check Manga Settings and download the model if needed", "error")
+ self._log(" ⚠️ Returning original image without inpainting", "warning")
+ return image.copy()
+
+ def _cloud_inpaint(self, image: np.ndarray, mask: np.ndarray) -> np.ndarray:
+ """Use Replicate API for inpainting"""
+ try:
+ import requests
+ import base64
+ from io import BytesIO
+ from PIL import Image as PILImage
+ import cv2
+
+ self._log(" ☁️ Cloud inpainting via Replicate API", "info")
+
+ # Convert to PIL
+ image_pil = PILImage.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
+ mask_pil = PILImage.fromarray(mask).convert('L')
+
+ # Convert to base64
+ img_buffer = BytesIO()
+ image_pil.save(img_buffer, format='PNG')
+ img_base64 = base64.b64encode(img_buffer.getvalue()).decode()
+
+ mask_buffer = BytesIO()
+ mask_pil.save(mask_buffer, format='PNG')
+ mask_base64 = base64.b64encode(mask_buffer.getvalue()).decode()
+
+ # Get cloud settings
+ cloud_settings = self.main_gui.config.get('manga_settings', {})
+ model_type = cloud_settings.get('cloud_inpaint_model', 'ideogram-v2')
+ timeout = cloud_settings.get('cloud_timeout', 60)
+
+ # Determine model identifier based on model type
+ if model_type == 'ideogram-v2':
+ model = 'ideogram-ai/ideogram-v2'
+ self._log(f" Using Ideogram V2 inpainting model", "info")
+ elif model_type == 'sd-inpainting':
+ model = 'stability-ai/stable-diffusion-inpainting'
+ self._log(f" Using Stable Diffusion inpainting model", "info")
+ elif model_type == 'flux-inpainting':
+ model = 'zsxkib/flux-dev-inpainting'
+ self._log(f" Using FLUX inpainting model", "info")
+ elif model_type == 'custom':
+ model = cloud_settings.get('cloud_custom_version', '')
+ if not model:
+ raise Exception("No custom model identifier specified")
+ self._log(f" Using custom model: {model}", "info")
+ else:
+ # Default to Ideogram V2
+ model = 'ideogram-ai/ideogram-v2'
+ self._log(f" Using default Ideogram V2 model", "info")
+
+ # Build input data based on model type
+ input_data = {
+ 'image': f'data:image/png;base64,{img_base64}',
+ 'mask': f'data:image/png;base64,{mask_base64}'
+ }
+
+ # Add prompt settings for models that support them
+ if model_type in ['ideogram-v2', 'sd-inpainting', 'flux-inpainting', 'custom']:
+ prompt = cloud_settings.get('cloud_inpaint_prompt', 'clean background, smooth surface')
+ input_data['prompt'] = prompt
+ self._log(f" Prompt: {prompt}", "info")
+
+ # SD-specific parameters
+ if model_type == 'sd-inpainting':
+ negative_prompt = cloud_settings.get('cloud_negative_prompt', 'text, writing, letters')
+ input_data['negative_prompt'] = negative_prompt
+ input_data['num_inference_steps'] = cloud_settings.get('cloud_inference_steps', 20)
+ self._log(f" Negative prompt: {negative_prompt}", "info")
+
+ # Get the latest version of the model
+ headers = {
+ 'Authorization': f'Token {self.replicate_api_key}',
+ 'Content-Type': 'application/json'
+ }
+
+ # First, get the latest version of the model
+ model_response = requests.get(
+ f'https://api.replicate.com/v1/models/{model}',
+ headers=headers
+ )
+
+ if model_response.status_code != 200:
+ # If model lookup fails, try direct prediction with model identifier
+ self._log(f" Model lookup returned {model_response.status_code}, trying direct prediction", "warning")
+ version = None
+ else:
+ model_info = model_response.json()
+ version = model_info.get('latest_version', {}).get('id')
+ if not version:
+ raise Exception(f"Could not get version for model {model}")
+
+ # Create prediction
+ prediction_data = {
+ 'input': input_data
+ }
+
+ if version:
+ prediction_data['version'] = version
+ else:
+ # For custom models, try extracting version from model string
+ if ':' in model:
+ # Format: owner/model:version
+ model_name, version_id = model.split(':', 1)
+ prediction_data['version'] = version_id
+ else:
+ raise Exception(f"Could not determine version for model {model}. Try using format: owner/model:version")
+
+ response = requests.post(
+ 'https://api.replicate.com/v1/predictions',
+ headers=headers,
+ json=prediction_data
+ )
+
+ if response.status_code != 201:
+ raise Exception(f"API error: {response.text}")
+
+ # Get prediction URL
+ prediction = response.json()
+ prediction_url = prediction.get('urls', {}).get('get') or prediction.get('id')
+
+ if not prediction_url:
+ raise Exception("No prediction URL returned")
+
+ # If we only got an ID, construct the URL
+ if not prediction_url.startswith('http'):
+ prediction_url = f'https://api.replicate.com/v1/predictions/{prediction_url}'
+
+ # Poll for result with configured timeout
+ import time
+ for i in range(timeout):
+ response = requests.get(prediction_url, headers=headers)
+ result = response.json()
+
+ # Log progress every 5 seconds
+ if i % 5 == 0 and i > 0:
+ self._log(f" ⏳ Still processing... ({i}s elapsed)", "info")
+
+ if result['status'] == 'succeeded':
+ # Download result image (handle both single URL and list)
+ output = result.get('output')
+ if not output:
+ raise Exception("No output returned from model")
+
+ if isinstance(output, list):
+ output_url = output[0] if output else None
+ else:
+ output_url = output
+
+ if not output_url:
+ raise Exception("No output URL in result")
+
+ img_response = requests.get(output_url)
+
+ # Convert back to numpy
+ result_pil = PILImage.open(BytesIO(img_response.content))
+ result_bgr = cv2.cvtColor(np.array(result_pil), cv2.COLOR_RGB2BGR)
+
+ self._log(" ✅ Cloud inpainting completed", "success")
+ return result_bgr
+
+ elif result['status'] == 'failed':
+ error_msg = result.get('error', 'Unknown error')
+ # Check for common errors
+ if 'version' in error_msg.lower():
+ error_msg += f" (Try using the model identifier '{model}' in the custom field)"
+ raise Exception(f"Inpainting failed: {error_msg}")
+
+ time.sleep(1)
+
+ raise Exception(f"Timeout waiting for inpainting (>{timeout}s)")
+
+ except Exception as e:
+ self._log(f" ❌ Cloud inpainting failed: {str(e)}", "error")
+ return image.copy()
+
+
+ def _regions_overlap(self, region1: TextRegion, region2: TextRegion) -> bool:
+ """Check if two regions overlap"""
+ x1, y1, w1, h1 = region1.bounding_box
+ x2, y2, w2, h2 = region2.bounding_box
+
+ # Check if rectangles overlap
+ if (x1 + w1 < x2 or x2 + w2 < x1 or
+ y1 + h1 < y2 or y2 + h2 < y1):
+ return False
+
+ return True
+
+ def _calculate_overlap_area(self, region1: TextRegion, region2: TextRegion) -> float:
+ """Calculate the area of overlap between two regions"""
+ x1, y1, w1, h1 = region1.bounding_box
+ x2, y2, w2, h2 = region2.bounding_box
+
+ # Calculate intersection
+ x_left = max(x1, x2)
+ y_top = max(y1, y2)
+ x_right = min(x1 + w1, x2 + w2)
+ y_bottom = min(y1 + h1, y2 + h2)
+
+ if x_right < x_left or y_bottom < y_top:
+ return 0.0
+
+ return (x_right - x_left) * (y_bottom - y_top)
+
+ def _adjust_overlapping_regions(self, regions: List[TextRegion], image_width: int, image_height: int) -> List[TextRegion]:
+ """Adjust positions of overlapping regions to prevent overlap while preserving text mapping"""
+ if len(regions) <= 1:
+ return regions
+
+ # Create a copy of regions with preserved indices
+ adjusted_regions = []
+ for idx, region in enumerate(regions):
+ # Create a new TextRegion with copied values
+ adjusted_region = TextRegion(
+ text=region.text,
+ vertices=list(region.vertices),
+ bounding_box=list(region.bounding_box),
+ confidence=region.confidence,
+ region_type=region.region_type
+ )
+ if hasattr(region, 'translated_text'):
+ adjusted_region.translated_text = region.translated_text
+
+ # IMPORTANT: Preserve original index to maintain text mapping
+ adjusted_region.original_index = idx
+ adjusted_region.original_bbox = tuple(region.bounding_box) # Store original position
+
+ adjusted_regions.append(adjusted_region)
+
+ # DON'T SORT - This breaks the text-to-region mapping!
+ # Process in original order to maintain associations
+
+ # Track which regions have been moved to avoid cascade effects
+ moved_regions = set()
+
+ # Adjust overlapping regions
+ for i in range(len(adjusted_regions)):
+ if i in moved_regions:
+ continue # Skip if already moved
+
+ for j in range(i + 1, len(adjusted_regions)):
+ if j in moved_regions:
+ continue # Skip if already moved
+
+ region1 = adjusted_regions[i]
+ region2 = adjusted_regions[j]
+
+ if self._regions_overlap(region1, region2):
+ x1, y1, w1, h1 = region1.bounding_box
+ x2, y2, w2, h2 = region2.bounding_box
+
+ # Calculate centers using ORIGINAL positions for better logic
+ orig_x1, orig_y1, _, _ = region1.original_bbox
+ orig_x2, orig_y2, _, _ = region2.original_bbox
+
+ # Determine which region to move based on original positions
+ # Move the one that's naturally "later" in reading order
+ if orig_y2 > orig_y1 + h1/2: # region2 is below
+ # Move region2 down slightly
+ min_gap = 10
+ new_y2 = y1 + h1 + min_gap
+ if new_y2 + h2 <= image_height:
+ region2.bounding_box = (x2, new_y2, w2, h2)
+ moved_regions.add(j)
+ self._log(f" 📍 Adjusted region {j} down (preserving order)", "debug")
+ elif orig_y1 > orig_y2 + h2/2: # region1 is below
+ # Move region1 down slightly
+ min_gap = 10
+ new_y1 = y2 + h2 + min_gap
+ if new_y1 + h1 <= image_height:
+ region1.bounding_box = (x1, new_y1, w1, h1)
+ moved_regions.add(i)
+ self._log(f" 📍 Adjusted region {i} down (preserving order)", "debug")
+ elif orig_x2 > orig_x1 + w1/2: # region2 is to the right
+ # Move region2 right slightly
+ min_gap = 10
+ new_x2 = x1 + w1 + min_gap
+ if new_x2 + w2 <= image_width:
+ region2.bounding_box = (new_x2, y2, w2, h2)
+ moved_regions.add(j)
+ self._log(f" 📍 Adjusted region {j} right (preserving order)", "debug")
+ else:
+ # Minimal adjustment - just separate them slightly
+ # without changing their relative order
+ min_gap = 5
+ if y2 >= y1: # region2 is lower or same level
+ new_y2 = y2 + min_gap
+ if new_y2 + h2 <= image_height:
+ region2.bounding_box = (x2, new_y2, w2, h2)
+ moved_regions.add(j)
+ else: # region1 is lower
+ new_y1 = y1 + min_gap
+ if new_y1 + h1 <= image_height:
+ region1.bounding_box = (x1, new_y1, w1, h1)
+ moved_regions.add(i)
+
+ # IMPORTANT: Return in ORIGINAL order to preserve text mapping
+ # Sort by original_index to restore the original order
+ adjusted_regions.sort(key=lambda r: r.original_index)
+
+ return adjusted_regions
+
+ # Emote-only mixed font fallback (Meiryo) — primary font remains unchanged
+ def _get_emote_fallback_font(self, font_size: int):
+ """Return a Meiryo Bold fallback font if available (preferred), else Meiryo.
+ Does not change the primary font; used only for emote glyphs.
+ """
+ try:
+ from PIL import ImageFont as _ImageFont
+ import os as _os
+ # Prefer Meiryo Bold TTC first; try common face indices, then regular Meiryo
+ candidates = [
+ ("C:/Windows/Fonts/meiryob.ttc", [0,1,2,3]), # Meiryo Bold (and variants) TTC
+ ("C:/Windows/Fonts/meiryo.ttc", [1,0,2,3]), # Try bold-ish index first if present
+ ]
+ for path, idxs in candidates:
+ if _os.path.exists(path):
+ for idx in idxs:
+ try:
+ return _ImageFont.truetype(path, font_size, index=idx)
+ except Exception:
+ continue
+ return None
+ except Exception:
+ return None
+
+ def _is_emote_char(self, ch: str) -> bool:
+ # Strict whitelist of emote-like symbols to render with Meiryo
+ EMOTES = set([
+ '\u2661', # ♡
+ '\u2665', # ♥
+ '\u2764', # ❤
+ '\u2605', # ★
+ '\u2606', # ☆
+ '\u266A', # ♪
+ '\u266B', # ♫
+ '\u203B', # ※
+ ])
+ return ch in EMOTES
+
+ def _line_width_emote_mixed(self, draw, text: str, primary_font, emote_font) -> int:
+ if not emote_font:
+ bbox = draw.textbbox((0, 0), text, font=primary_font)
+ return (bbox[2] - bbox[0])
+ w = 0
+ i = 0
+ while i < len(text):
+ ch = text[i]
+ # Treat VS16/VS15 as zero-width modifiers
+ if ch in ('\ufe0f', '\ufe0e'):
+ i += 1
+ continue
+ f = emote_font if self._is_emote_char(ch) else primary_font
+ try:
+ bbox = draw.textbbox((0, 0), ch, font=f)
+ w += (bbox[2] - bbox[0])
+ except Exception:
+ w += max(1, int(getattr(primary_font, 'size', 12) * 0.6))
+ i += 1
+ return w
+
+ def _draw_text_line_emote_mixed(self, draw, line: str, x: int, y: int, primary_font, emote_font,
+ fill_rgba, outline_rgba, outline_width: int,
+ shadow_enabled: bool, shadow_color_rgba, shadow_off):
+ cur_x = x
+ i = 0
+ while i < len(line):
+ ch = line[i]
+ if ch in ('\ufe0f', '\ufe0e'):
+ i += 1
+ continue
+ f = emote_font if (emote_font and self._is_emote_char(ch)) else primary_font
+ # measure
+ try:
+ bbox = draw.textbbox((0, 0), ch, font=f)
+ cw = bbox[2] - bbox[0]
+ except Exception:
+ cw = max(1, int(getattr(primary_font, 'size', 12) * 0.6))
+ # shadow
+ if shadow_enabled:
+ sx, sy = shadow_off
+ draw.text((cur_x + sx, y + sy), ch, font=f, fill=shadow_color_rgba)
+ # outline
+ if outline_width > 0:
+ for dx in range(-outline_width, outline_width + 1):
+ for dy in range(-outline_width, outline_width + 1):
+ if dx == 0 and dy == 0:
+ continue
+ draw.text((cur_x + dx, y + dy), ch, font=f, fill=outline_rgba)
+ # main
+ draw.text((cur_x, y), ch, font=f, fill=fill_rgba)
+ cur_x += cw
+ i += 1
+
+
+ def render_translated_text(self, image: np.ndarray, regions: List[TextRegion]) -> np.ndarray:
+ """Enhanced text rendering with customizable backgrounds and styles"""
+ self._log(f"\n🎨 Starting ENHANCED text rendering with custom settings:", "info")
+ self._log(f" ✅ Using ENHANCED renderer (not the simple version)", "info")
+ self._log(f" Background: {self.text_bg_style} @ {int(self.text_bg_opacity/255*100)}% opacity", "info")
+ self._log(f" Text color: RGB{self.text_color}", "info")
+ self._log(f" Shadow: {'Enabled' if self.shadow_enabled else 'Disabled'}", "info")
+ self._log(f" Font: {os.path.basename(self.selected_font_style) if self.selected_font_style else 'Default'}", "info")
+ if self.force_caps_lock:
+ self._log(f" Force Caps Lock: ENABLED", "info")
+
+ # Convert to PIL for text rendering
+ import cv2
+ pil_image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
+
+ # Get image dimensions for boundary checking
+ image_height, image_width = image.shape[:2]
+
+ # Create text mask to get accurate render boundaries
+ # This represents what will actually be inpainted
+ try:
+ text_mask = self.create_text_mask(image, regions)
+ use_mask_for_rendering = True
+ self._log(f" 🎭 Created text mask for accurate render boundaries", "info")
+ except Exception as e:
+ text_mask = None
+ use_mask_for_rendering = False
+ if not getattr(self, 'concise_logs', False):
+ self._log(f" ⚠️ Failed to create mask, using polygon bounds: {e}", "warning")
+
+ # Only adjust overlapping regions if constraining to bubbles
+ if self.constrain_to_bubble:
+ adjusted_regions = self._adjust_overlapping_regions(regions, image_width, image_height)
+ else:
+ # Skip adjustment when not constraining (allows overflow)
+ adjusted_regions = regions
+ self._log(" 📝 Using original regions (overflow allowed)", "info")
+
+ # Check if any regions still overlap after adjustment (shouldn't happen, but let's verify)
+ has_overlaps = False
+ for i, region1 in enumerate(adjusted_regions):
+ for region2 in adjusted_regions[i+1:]:
+ if self._regions_overlap(region1, region2):
+ has_overlaps = True
+ self._log(" ⚠️ Regions still overlap after adjustment", "warning")
+ break
+ if has_overlaps:
+ break
+
+ # Handle transparency settings based on overlaps
+ if has_overlaps and self.text_bg_opacity < 255 and self.text_bg_opacity > 0:
+ self._log(" ⚠️ Overlapping regions detected with partial transparency", "warning")
+ self._log(" ℹ️ Rendering with requested transparency level", "info")
+
+ region_count = 0
+
+ # Decide rendering path based on transparency needs
+ # For full transparency (opacity = 0) or no overlaps, use RGBA rendering
+ # For overlaps with partial transparency, we still use RGBA to honor user settings
+ use_rgba_rendering = True # Always use RGBA for consistent transparency support
+
+ if use_rgba_rendering:
+ # Transparency-enabled rendering path
+ pil_image = pil_image.convert('RGBA')
+
+ # Decide parallel rendering from advanced settings
+ try:
+ adv = getattr(self, 'manga_settings', {}).get('advanced', {}) if hasattr(self, 'manga_settings') else {}
+ except Exception:
+ adv = {}
+ render_parallel = bool(adv.get('render_parallel', True))
+ max_workers = None
+ try:
+ max_workers = int(adv.get('max_workers', 4))
+ except Exception:
+ max_workers = 4
+
+ def _render_one(region, idx):
+ # Build a separate overlay for this region
+ from PIL import Image as _PIL
+ overlay = _PIL.new('RGBA', pil_image.size, (0,0,0,0))
+ draw = ImageDraw.Draw(overlay)
+ # Work on local copy of text for caps lock
+ tr_text = region.translated_text or ''
+ if self.force_caps_lock:
+ tr_text = tr_text.upper()
+
+ # Get original bounding box
+ x, y, w, h = region.bounding_box
+
+ # CRITICAL: Always prefer mask bounds when available (most accurate)
+ # Mask bounds are especially important for Azure/Google without RT-DETR,
+ # where OCR polygons are unreliable.
+ if use_mask_for_rendering and text_mask is not None:
+ # Use mask bounds directly - most accurate method
+ safe_x, safe_y, safe_w, safe_h = self.get_safe_text_area(
+ region,
+ use_mask_bounds=True,
+ full_mask=text_mask
+ )
+ render_x, render_y, render_w, render_h = safe_x, safe_y, safe_w, safe_h
+ elif hasattr(region, 'vertices') and region.vertices:
+ # Fallback: use polygon-based safe area (for RT-DETR regions)
+ safe_x, safe_y, safe_w, safe_h = self.get_safe_text_area(region, use_mask_bounds=False)
+ render_x, render_y, render_w, render_h = safe_x, safe_y, safe_w, safe_h
+ else:
+ # Last resort: use simple bounding box
+ render_x, render_y, render_w, render_h = x, y, w, h
+
+ # Fit text - use render dimensions for proper sizing
+ if self.custom_font_size:
+ font_size = self.custom_font_size
+ lines = self._wrap_text(tr_text, self._get_font(font_size), render_w, draw)
+ elif self.font_size_mode == 'multiplier':
+ # Pass use_as_is=True since render dimensions are already safe area
+ font_size, lines = self._fit_text_to_region(tr_text, render_w, render_h, draw, region, use_as_is=True)
+ else:
+ # Pass use_as_is=True since render dimensions are already safe area
+ font_size, lines = self._fit_text_to_region(tr_text, render_w, render_h, draw, region, use_as_is=True)
+ # Fonts
+ font = self._get_font(font_size)
+ emote_font = self._get_emote_fallback_font(font_size)
+ # Layout - use render dimensions (safe area if available)
+ # CRITICAL: Use actual text bbox height for accurate positioning
+ line_height = font_size * 1.2
+
+ # Calculate actual total height using text bbox for first line as reference
+ if lines:
+ sample_bbox = draw.textbbox((0, 0), lines[0] if lines[0] else "Ay", font=font)
+ actual_line_height = sample_bbox[3] - sample_bbox[1]
+ # Use the larger of: computed line_height or actual_line_height
+ line_height = max(line_height, actual_line_height * 1.1)
+
+ total_height = len(lines) * line_height
+
+ # Ensure text doesn't overflow vertically - constrain start_y
+ ideal_start_y = render_y + (render_h - total_height) // 2
+ # Make sure text starts within render area and doesn't extend past bottom
+ max_start_y = render_y + render_h - total_height
+ start_y = max(render_y, min(ideal_start_y, max_start_y))
+
+ # Debug logging for vertical constraint
+ if not getattr(self, 'concise_logs', False):
+ end_y = start_y + total_height
+ render_end_y = render_y + render_h
+ overflow = max(0, end_y - render_end_y)
+ if overflow > 0:
+ self._log(f" ⚠️ Text would overflow by {overflow}px, constrained to render area", "debug")
+ self._log(f" 📏 Render area: y={render_y}-{render_end_y} (h={render_h}), Text: y={start_y}-{end_y} (h={total_height:.0f})", "debug")
+ # BG - use render dimensions
+ draw_bg = self.text_bg_opacity > 0
+ try:
+ if draw_bg and getattr(self, 'free_text_only_bg_opacity', False):
+ draw_bg = self._is_free_text_region(region)
+ except Exception:
+ pass
+ if draw_bg:
+ self._draw_text_background(draw, render_x, render_y, render_w, render_h, lines, font, font_size, start_y, emote_font)
+ # Text - use render dimensions for centering
+ for i, line in enumerate(lines):
+ if emote_font is not None:
+ text_width = self._line_width_emote_mixed(draw, line, font, emote_font)
+ else:
+ tb = draw.textbbox((0,0), line, font=font)
+ text_width = tb[2]-tb[0]
+ tx = render_x + (render_w - text_width)//2
+ ty = start_y + i*line_height
+ ow = max(1, font_size // self.outline_width_factor)
+ if emote_font is not None:
+ self._draw_text_line_emote_mixed(draw, line, tx, ty, font, emote_font,
+ self.text_color + (255,), self.outline_color + (255,), ow,
+ self.shadow_enabled,
+ self.shadow_color + (255,) if isinstance(self.shadow_color, tuple) and len(self.shadow_color)==3 else (0,0,0,255),
+ (self.shadow_offset_x, self.shadow_offset_y))
+ else:
+ if self.shadow_enabled:
+ self._draw_text_shadow(draw, tx, ty, line, font)
+ for dx in range(-ow, ow+1):
+ for dy in range(-ow, ow+1):
+ if dx!=0 or dy!=0:
+ draw.text((tx+dx, ty+dy), line, font=font, fill=self.outline_color + (255,))
+ draw.text((tx, ty), line, font=font, fill=self.text_color + (255,))
+ return overlay
+
+ overlays = []
+ if render_parallel and len(adjusted_regions) > 1:
+ from concurrent.futures import ThreadPoolExecutor, as_completed
+ workers = max(1, min(max_workers, len(adjusted_regions)))
+ with ThreadPoolExecutor(max_workers=workers) as ex:
+ fut_to_idx = {ex.submit(_render_one, r, i): i for i, r in enumerate(adjusted_regions) if r.translated_text}
+ # Collect in order
+ temp = {}
+ for fut in as_completed(fut_to_idx):
+ i = fut_to_idx[fut]
+ try:
+ temp[i] = fut.result()
+ except Exception:
+ temp[i] = None
+ overlays = [temp.get(i) for i in range(len(adjusted_regions))]
+ else:
+ for i, r in enumerate(adjusted_regions):
+ if not r.translated_text:
+ overlays.append(None)
+ continue
+ overlays.append(_render_one(r, i))
+
+ # Composite overlays sequentially
+ for ov in overlays:
+ if ov is not None:
+ pil_image = Image.alpha_composite(pil_image, ov)
+ region_count += 1
+
+ # Convert back to RGB
+ pil_image = pil_image.convert('RGB')
+
+ else:
+ # This path is now deprecated but kept for backwards compatibility
+ # Direct rendering without transparency layers
+ draw = ImageDraw.Draw(pil_image)
+
+ for region in adjusted_regions:
+ if not region.translated_text:
+ continue
+
+ self._log(f"DEBUG: Rendering - Original: '{region.text[:30]}...' -> Translated: '{region.translated_text[:30]}...'", "debug")
+
+
+ # APPLY CAPS LOCK TRANSFORMATION HERE
+ if self.force_caps_lock:
+ region.translated_text = region.translated_text.upper()
+
+ region_count += 1
+ self._log(f" Rendering region {region_count}: {region.translated_text[:30]}...", "info")
+
+ # Get original bounding box
+ x, y, w, h = region.bounding_box
+
+ # CRITICAL: Always prefer mask bounds when available (most accurate)
+ # Mask bounds are especially important for Azure/Google without RT-DETR,
+ # where OCR polygons are unreliable.
+ if use_mask_for_rendering and text_mask is not None:
+ # Use mask bounds directly - most accurate method
+ safe_x, safe_y, safe_w, safe_h = self.get_safe_text_area(
+ region,
+ use_mask_bounds=True,
+ full_mask=text_mask
+ )
+ render_x, render_y, render_w, render_h = safe_x, safe_y, safe_w, safe_h
+ elif hasattr(region, 'vertices') and region.vertices:
+ # Fallback: use polygon-based safe area (for RT-DETR regions)
+ safe_x, safe_y, safe_w, safe_h = self.get_safe_text_area(region, use_mask_bounds=False)
+ render_x, render_y, render_w, render_h = safe_x, safe_y, safe_w, safe_h
+ else:
+ # Last resort: use simple bounding box
+ render_x, render_y, render_w, render_h = x, y, w, h
+
+ # Find optimal font size - use render dimensions for proper sizing
+ if self.custom_font_size:
+ font_size = self.custom_font_size
+ lines = self._wrap_text(region.translated_text,
+ self._get_font(font_size),
+ render_w, draw)
+ else:
+ # Pass use_as_is=True since render dimensions are already safe area
+ font_size, lines = self._fit_text_to_region(
+ region.translated_text, render_w, render_h, draw, region, use_as_is=True
+ )
+
+ # Load font
+ font = self._get_font(font_size)
+
+ # Calculate text layout - use render dimensions
+ # CRITICAL: Use actual text bbox height for accurate positioning
+ line_height = font_size * 1.2
+
+ # Calculate actual total height using text bbox for first line as reference
+ if lines:
+ sample_bbox = draw.textbbox((0, 0), lines[0] if lines[0] else "Ay", font=font)
+ actual_line_height = sample_bbox[3] - sample_bbox[1]
+ # Use the larger of: computed line_height or actual_line_height
+ line_height = max(line_height, actual_line_height * 1.1)
+
+ total_height = len(lines) * line_height
+
+ # Ensure text doesn't overflow vertically - constrain start_y
+ ideal_start_y = render_y + (render_h - total_height) // 2
+ # Make sure text starts within render area and doesn't extend past bottom
+ max_start_y = render_y + render_h - total_height
+ start_y = max(render_y, min(ideal_start_y, max_start_y))
+
+ # Draw opaque background (optionally only for free text) - use render dimensions
+ draw_bg = self.text_bg_opacity > 0
+ try:
+ if draw_bg and getattr(self, 'free_text_only_bg_opacity', False):
+ draw_bg = self._is_free_text_region(region)
+ except Exception:
+ pass
+ if draw_bg:
+ self._draw_text_background(draw, render_x, render_y, render_w, render_h, lines, font,
+ font_size, start_y)
+
+ # Draw text - use render dimensions
+ for i, line in enumerate(lines):
+ # Mixed fallback not supported in legacy path; keep primary measurement
+ text_bbox = draw.textbbox((0, 0), line, font=font)
+ text_width = text_bbox[2] - text_bbox[0]
+
+ text_x = render_x + (render_w - text_width) // 2
+ text_y = start_y + i * line_height
+
+ if self.shadow_enabled:
+ self._draw_text_shadow(draw, text_x, text_y, line, font)
+
+ outline_width = max(1, font_size // self.outline_width_factor)
+
+ # Draw outline
+ for dx in range(-outline_width, outline_width + 1):
+ for dy in range(-outline_width, outline_width + 1):
+ if dx != 0 or dy != 0:
+ draw.text((text_x + dx, text_y + dy), line,
+ font=font, fill=self.outline_color)
+
+ # Draw main text
+ draw.text((text_x, text_y), line, font=font, fill=self.text_color)
+
+ # Convert back to numpy array
+ result = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR)
+ self._log(f"✅ ENHANCED text rendering complete - rendered {region_count} regions", "info")
+ return result
+
+ def _is_free_text_region(self, region) -> bool:
+ """Heuristic: determine if the region is free text (not a bubble).
+ Uses bubble_type when available; otherwise falls back to aspect ratio heuristics.
+ """
+ try:
+ if hasattr(region, 'bubble_type') and region.bubble_type:
+ return region.bubble_type == 'free_text'
+ # Fallback heuristic
+ x, y, w, h = region.bounding_box
+ w, h = int(w), int(h)
+ if h <= 0:
+ return True
+ aspect = w / max(1, h)
+ # Wider, shorter regions are often free text
+ return aspect >= 2.5 or h < 50
+ except Exception:
+ return False
+
+ def _draw_text_background(self, draw: ImageDraw, x: int, y: int, w: int, h: int,
+ lines: List[str], font: ImageFont, font_size: int,
+ start_y: int, emote_font: ImageFont = None):
+ """Draw background behind text with selected style.
+ If emote_font is provided, measure lines with emote-only mixing.
+ """
+ # Early return if opacity is 0 (fully transparent)
+ if self.text_bg_opacity == 0:
+ return
+
+ # Calculate actual text bounds
+ line_height = font_size * 1.2
+ max_width = 0
+
+ for line in lines:
+ if emote_font is not None:
+ line_width = self._line_width_emote_mixed(draw, line, font, emote_font)
+ else:
+ bbox = draw.textbbox((0, 0), line, font=font)
+ line_width = bbox[2] - bbox[0]
+ max_width = max(max_width, line_width)
+
+ # Apply size reduction
+ padding = int(font_size * 0.3)
+ bg_width = int((max_width + padding * 2) * self.text_bg_reduction)
+ bg_height = int((len(lines) * line_height + padding * 2) * self.text_bg_reduction)
+
+ # Center background
+ bg_x = x + (w - bg_width) // 2
+ bg_y = int(start_y - padding)
+
+ # Create semi-transparent color
+ bg_color = (255, 255, 255, self.text_bg_opacity)
+
+ if self.text_bg_style == 'box':
+ # Rounded rectangle
+ radius = min(20, bg_width // 10, bg_height // 10)
+ self._draw_rounded_rectangle(draw, bg_x, bg_y, bg_x + bg_width,
+ bg_y + bg_height, radius, bg_color)
+
+ elif self.text_bg_style == 'circle':
+ # Ellipse that encompasses the text
+ center_x = bg_x + bg_width // 2
+ center_y = bg_y + bg_height // 2
+ # Make it slightly wider to look more natural
+ ellipse_width = int(bg_width * 1.2)
+ ellipse_height = bg_height
+
+ draw.ellipse([center_x - ellipse_width // 2, center_y - ellipse_height // 2,
+ center_x + ellipse_width // 2, center_y + ellipse_height // 2],
+ fill=bg_color)
+
+ elif self.text_bg_style == 'wrap':
+ # Individual background for each line
+ for i, line in enumerate(lines):
+ bbox = draw.textbbox((0, 0), line, font=font)
+ line_width = bbox[2] - bbox[0]
+
+ line_bg_width = int((line_width + padding) * self.text_bg_reduction)
+ line_bg_x = x + (w - line_bg_width) // 2
+ line_bg_y = int(start_y + i * line_height - padding // 2)
+ line_bg_height = int(line_height + padding // 2)
+
+ # Draw rounded rectangle for each line
+ radius = min(10, line_bg_width // 10, line_bg_height // 10)
+ self._draw_rounded_rectangle(draw, line_bg_x, line_bg_y,
+ line_bg_x + line_bg_width,
+ line_bg_y + line_bg_height, radius, bg_color)
+
+ def _draw_text_shadow(self, draw: ImageDraw, x: int, y: int, text: str, font: ImageFont):
+ """Draw text shadow with optional blur effect"""
+ if self.shadow_blur == 0:
+ # Simple sharp shadow
+ shadow_x = x + self.shadow_offset_x
+ shadow_y = y + self.shadow_offset_y
+ draw.text((shadow_x, shadow_y), text, font=font, fill=self.shadow_color)
+ else:
+ # Blurred shadow (simulated with multiple layers)
+ blur_range = self.shadow_blur
+ opacity_step = 80 // (blur_range + 1) # Distribute opacity across blur layers
+
+ for blur_offset in range(blur_range, 0, -1):
+ layer_opacity = opacity_step * (blur_range - blur_offset + 1)
+ shadow_color_with_opacity = self.shadow_color + (layer_opacity,)
+
+ # Draw shadow at multiple positions for blur effect
+ for dx in range(-blur_offset, blur_offset + 1):
+ for dy in range(-blur_offset, blur_offset + 1):
+ if dx*dx + dy*dy <= blur_offset*blur_offset: # Circular blur
+ shadow_x = x + self.shadow_offset_x + dx
+ shadow_y = y + self.shadow_offset_y + dy
+ draw.text((shadow_x, shadow_y), text, font=font,
+ fill=shadow_color_with_opacity)
+
+ def _draw_rounded_rectangle(self, draw: ImageDraw, x1: int, y1: int,
+ x2: int, y2: int, radius: int, fill):
+ """Draw a rounded rectangle"""
+ # Draw the main rectangle
+ draw.rectangle([x1 + radius, y1, x2 - radius, y2], fill=fill)
+ draw.rectangle([x1, y1 + radius, x2, y2 - radius], fill=fill)
+
+ # Draw the corners
+ draw.pieslice([x1, y1, x1 + 2 * radius, y1 + 2 * radius], 180, 270, fill=fill)
+ draw.pieslice([x2 - 2 * radius, y1, x2, y1 + 2 * radius], 270, 360, fill=fill)
+ draw.pieslice([x1, y2 - 2 * radius, x1 + 2 * radius, y2], 90, 180, fill=fill)
+ draw.pieslice([x2 - 2 * radius, y2 - 2 * radius, x2, y2], 0, 90, fill=fill)
+
+ def _get_font(self, font_size: int) -> ImageFont:
+ """Get font with specified size, using selected style if available"""
+ font_path = self.selected_font_style or self.font_path
+
+ if font_path:
+ try:
+ return ImageFont.truetype(font_path, font_size)
+ except:
+ pass
+
+ return ImageFont.load_default()
+
+ def _pil_word_wrap(self, text: str, font_path: str, roi_width: int, roi_height: int,
+ init_font_size: int, min_font_size: int, draw: ImageDraw) -> Tuple[str, int]:
+ """Comic-translate's pil_word_wrap algorithm - top-down font sizing with column wrapping.
+
+ Break long text to multiple lines, and reduce point size until all text fits within bounds.
+ This is a direct port from comic-translate for better text fitting.
+ """
+ from hyphen_textwrap import wrap as hyphen_wrap
+
+ mutable_message = text
+ font_size = init_font_size
+
+ def eval_metrics(txt, font):
+ """Calculate width/height of multiline text.
+
+ CRITICAL: Must match the rendering logic exactly to prevent overflow.
+ Rendering uses font_size * 1.2 as line_height, so we must do the same here.
+ """
+ lines = txt.split('\n')
+ if not lines:
+ return (0, 0)
+
+ max_width = 0
+
+ for line in lines:
+ bbox = draw.textbbox((0, 0), line if line else "A", font=font)
+ line_width = bbox[2] - bbox[0]
+ max_width = max(max_width, line_width)
+
+ # Calculate height using same logic as rendering:
+ # line_height = max(font_size * 1.2, actual_bbox_height * 1.1)
+ sample_bbox = draw.textbbox((0, 0), lines[0] if lines[0] else "Ay", font=font)
+ actual_line_height = sample_bbox[3] - sample_bbox[1]
+ line_height = max(font_size * 1.2, actual_line_height * 1.1)
+ total_height = len(lines) * line_height
+
+ return (max_width, total_height)
+
+ # Get initial font
+ try:
+ if font_path:
+ font = ImageFont.truetype(font_path, font_size)
+ else:
+ font = ImageFont.load_default()
+ except Exception:
+ font = ImageFont.load_default()
+
+ # Top-down algorithm: start with large font, shrink until it fits
+ while font_size > min_font_size:
+ try:
+ if font_path:
+ font = ImageFont.truetype(font_path, font_size)
+ else:
+ font = ImageFont.load_default()
+ except Exception:
+ font = ImageFont.load_default()
+
+ width, height = eval_metrics(mutable_message, font)
+
+ if height > roi_height:
+ # Text is too tall, reduce font size
+ font_size -= 0.75
+ mutable_message = text # Restore original text
+ elif width > roi_width:
+ # Text is too wide, try wrapping with column optimization
+ columns = len(mutable_message)
+
+ # Search for optimal column width
+ while columns > 0:
+ columns -= 1
+ if columns == 0:
+ break
+
+ # Use hyphen_wrap for smart wrapping
+ try:
+ wrapped = '\n'.join(hyphen_wrap(
+ text, columns,
+ break_on_hyphens=False,
+ break_long_words=False,
+ hyphenate_broken_words=True
+ ))
+ wrapped_width, _ = eval_metrics(wrapped, font)
+ if wrapped_width <= roi_width:
+ mutable_message = wrapped
+ break
+ except Exception:
+ # Fallback to simple wrapping if hyphen_wrap fails
+ break
+
+ if columns < 1:
+ # Couldn't find good column width, reduce font size
+ font_size -= 0.75
+ mutable_message = text # Restore original text
+ else:
+ # Text fits!
+ break
+
+ # If we hit minimum font size, do brute-force optimization
+ if font_size <= min_font_size:
+ font_size = min_font_size
+ mutable_message = text
+
+ try:
+ if font_path:
+ font = ImageFont.truetype(font_path, font_size)
+ else:
+ font = ImageFont.load_default()
+ except Exception:
+ font = ImageFont.load_default()
+
+ # Brute force: minimize cost function (width - roi_width)^2 + (height - roi_height)^2
+ min_cost = 1e9
+ min_text = text
+
+ for columns in range(1, min(len(text) + 1, 100)): # Limit iterations for performance
+ try:
+ wrapped_text = '\n'.join(hyphen_wrap(
+ text, columns,
+ break_on_hyphens=False,
+ break_long_words=False,
+ hyphenate_broken_words=True
+ ))
+ wrapped_width, wrapped_height = eval_metrics(wrapped_text, font)
+ cost = (wrapped_width - roi_width)**2 + (wrapped_height - roi_height)**2
+
+ if cost < min_cost:
+ min_cost = cost
+ min_text = wrapped_text
+ except Exception:
+ continue
+
+ mutable_message = min_text
+
+ return mutable_message, int(font_size)
+
+ def get_mask_bounds(self, region: TextRegion, full_mask: np.ndarray) -> Tuple[int, int, int, int]:
+ """Extract the actual mask boundaries for a region.
+
+ For non-Azure/Google OCR providers (manga-ocr, etc.), use RT-DETR bubble_bounds directly.
+ For Azure/Google, extract from the mask overlap to handle full-page OCR.
+ """
+ # PRIORITY 1: For manga-ocr and other RT-DETR-guided OCR providers, use bubble_bounds directly
+ # These providers already OCR within RT-DETR bubbles, so bubble_bounds IS the correct render area
+ is_azure_google = getattr(self, 'ocr_provider', '').lower() in ('azure', 'google')
+ if not is_azure_google and hasattr(region, 'bubble_bounds') and region.bubble_bounds:
+ # Use the RT-DETR bubble bounds directly - this is the full bubble area
+ bx, by, bw, bh = region.bubble_bounds
+ if not getattr(self, 'concise_logs', False):
+ self._log(f" ✅ Using RT-DETR bubble_bounds for mask: {int(bw)}×{int(bh)} at ({int(bx)}, {int(by)})", "debug")
+ return int(bx), int(by), int(bw), int(bh)
+ elif not is_azure_google:
+ # Debug: Why are we not using bubble_bounds?
+ if not getattr(self, 'concise_logs', False):
+ has_attr = hasattr(region, 'bubble_bounds')
+ is_none = getattr(region, 'bubble_bounds', None) is None if has_attr else True
+ #self._log(f" ⚠️ manga-ocr but NO bubble_bounds (has_attr={has_attr}, is_none={is_none})", "warning")
+
+ # PRIORITY 2: For Azure/Google or when bubble_bounds not available, extract from mask
+ if full_mask is not None:
+ try:
+ import cv2
+ import numpy as np
+
+ # Create a blank mask for this region
+ region_mask = np.zeros(full_mask.shape, dtype=np.uint8)
+
+ # Fill the region's area in the mask
+ if hasattr(region, 'vertices') and region.vertices:
+ vertices_np = np.array(region.vertices, dtype=np.int32)
+ cv2.fillPoly(region_mask, [vertices_np], 255)
+ else:
+ x, y, w, h = region.bounding_box
+ cv2.rectangle(region_mask, (int(x), int(y)), (int(x+w), int(y+h)), 255, -1)
+
+ # Find where this region overlaps with the full mask
+ overlap = cv2.bitwise_and(region_mask, full_mask)
+
+ # Get bounding box of the overlap
+ contours, _ = cv2.findContours(overlap, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+ if contours:
+ # Get the largest contour (should be the main text region)
+ largest_contour = max(contours, key=cv2.contourArea)
+ x, y, w, h = cv2.boundingRect(largest_contour)
+
+ if w > 0 and h > 0:
+ return x, y, w, h
+ except Exception as e:
+ if not getattr(self, 'concise_logs', False):
+ self._log(f" ⚠️ Failed to extract mask bounds: {e}, falling back", "debug")
+
+ # Fallback to original bounding box
+ x, y, w, h = region.bounding_box
+ return int(x), int(y), int(w), int(h)
+
+ def get_safe_text_area(self, region: TextRegion, use_mask_bounds: bool = False, full_mask: np.ndarray = None) -> Tuple[int, int, int, int]:
+ """Get safe text area with algorithm-aware shrink strategy.
+
+ Respects font_algorithm and auto_fit_style settings:
+ - conservative: Comic-translate's 15% shrink (85% usable)
+ - smart: Adaptive 10-20% shrink based on bubble shape
+ - aggressive: Minimal 5% shrink (95% usable)
+
+ Also applies OCR-specific adjustments for Azure/Google without RT-DETR guidance.
+
+ Args:
+ region: The text region to calculate safe area for
+ use_mask_bounds: If True, use actual mask boundaries instead of shrinking from polygon
+ full_mask: The complete mask image (required if use_mask_bounds=True)
+ """
+ # Get font sizing settings from config
+ try:
+ manga_settings = self.main_gui.config.get('manga_settings', {})
+ font_sizing = manga_settings.get('font_sizing', {})
+ rendering = manga_settings.get('rendering', {})
+
+ font_algorithm = font_sizing.get('algorithm', 'smart')
+ auto_fit_style = rendering.get('auto_fit_style', 'balanced')
+
+ # Check if using Azure/Google without RT-DETR guidance
+ ocr_settings = manga_settings.get('ocr', {})
+ use_rtdetr_guide = ocr_settings.get('use_rtdetr_for_ocr_regions', True)
+ is_azure_google = getattr(self, 'ocr_provider', '').lower() in ('azure', 'google')
+ needs_aggressive = is_azure_google and not use_rtdetr_guide
+ except Exception:
+ font_algorithm = 'smart'
+ auto_fit_style = 'balanced'
+ needs_aggressive = False
+
+ # Base margin factor by algorithm
+ if font_algorithm == 'conservative':
+ # Comic-translate default: 15% shrink = 85% usable
+ base_margin = 0.85
+ elif font_algorithm == 'aggressive':
+ # Aggressive: 5% shrink = 95% usable
+ base_margin = 0.95
+ else: # 'smart'
+ # Smart: adaptive based on auto_fit_style
+ if auto_fit_style == 'compact':
+ base_margin = 0.82 # 18% shrink - tight fit
+ elif auto_fit_style == 'readable':
+ base_margin = 0.92 # 8% shrink - loose fit
+ else: # 'balanced'
+ base_margin = 0.87 # 13% shrink - balanced
+
+ # SPECIAL CASE: Azure/Google without RT-DETR guidance
+ # Their OCR is too conservative, so we need more aggressive sizing
+ if needs_aggressive:
+ # Boost margin by 5-8% to compensate for conservative OCR bounds
+ base_margin = min(0.98, base_margin + 0.08)
+ self._log(f" 🎯 Azure/Google non-RT-DETR mode: Using aggressive {int(base_margin*100)}% margin", "debug")
+
+ # OPTION 1: Use mask boundaries directly (most accurate)
+ if use_mask_bounds and full_mask is not None:
+ mask_x, mask_y, mask_w, mask_h = self.get_mask_bounds(region, full_mask)
+ # Use the FULL mask bounds directly - the mask already represents the accurate
+ # inpainted area from the inpainting process. The inpainting itself already includes
+ # padding/margins, so we don't need to shrink further. Using 100% maximizes text
+ # utilization and prevents the "text too small" issue.
+
+ # CRITICAL: Use 100% of mask area for maximum text utilization
+ # The inpainting mask already has built-in margins from the mask generation process
+ safe_x, safe_y, safe_w, safe_h = mask_x, mask_y, mask_w, mask_h
+
+ if not getattr(self, 'concise_logs', False):
+ self._log(f" 📐 Using FULL mask bounds: {mask_w}×{mask_h} (100% utilization)", "debug")
+ self._log(f" Mask position: ({mask_x}, {mask_y})", "debug")
+ if hasattr(region, 'bounding_box'):
+ orig_x, orig_y, orig_w, orig_h = region.bounding_box
+ self._log(f" Original bbox: {orig_w}×{orig_h} at ({orig_x}, {orig_y})", "debug")
+ return safe_x, safe_y, safe_w, safe_h
+
+ # OPTION 2: Handle regions without vertices (simple bounding box)
+ if not hasattr(region, 'vertices') or not region.vertices:
+ x, y, w, h = region.bounding_box
+ safe_width = int(w * base_margin)
+ safe_height = int(h * base_margin)
+ safe_x = x + (w - safe_width) // 2
+ safe_y = y + (h - safe_height) // 2
+ return safe_x, safe_y, safe_width, safe_height
+
+ # Calculate convexity for shape-aware adjustment (only for 'smart' algorithm)
+ margin_factor = base_margin
+ if font_algorithm == 'smart':
+ try:
+ # Convert vertices to numpy array with correct dtype
+ vertices = np.array(region.vertices, dtype=np.int32)
+ hull = cv2.convexHull(vertices)
+ hull_area = cv2.contourArea(hull)
+ poly_area = cv2.contourArea(vertices)
+
+ if poly_area > 0:
+ convexity = hull_area / poly_area
+ else:
+ convexity = 1.0
+
+ # Adjust margin based on bubble shape
+ if convexity < 0.85: # Speech bubble with tail
+ # More aggressive shrink for tailed bubbles (avoid the tail)
+ margin_factor = base_margin - 0.10
+ if not getattr(self, 'concise_logs', False):
+ self._log(f" Speech bubble with tail: {int(margin_factor*100)}% usable area", "debug")
+ elif convexity > 0.98: # Rectangular/square
+ # Less shrink for rectangular regions
+ margin_factor = base_margin + 0.05
+ if not getattr(self, 'concise_logs', False):
+ self._log(f" Rectangular region: {int(margin_factor*100)}% usable area", "debug")
+ else: # Regular oval bubble
+ # Use base margin
+ margin_factor = base_margin
+ if not getattr(self, 'concise_logs', False):
+ self._log(f" Regular bubble: {int(margin_factor*100)}% usable area", "debug")
+
+ # Clamp margin factor
+ margin_factor = max(0.70, min(0.98, margin_factor))
+ except Exception:
+ margin_factor = base_margin
+
+ # Convert vertices to numpy array for boundingRect
+ vertices_np = np.array(region.vertices, dtype=np.int32)
+ x, y, w, h = cv2.boundingRect(vertices_np)
+
+ safe_width = int(w * margin_factor)
+ safe_height = int(h * margin_factor)
+ safe_x = x + (w - safe_width) // 2
+ safe_y = y + (h - safe_height) // 2
+
+ return safe_x, safe_y, safe_width, safe_height
+
+ def _fit_text_to_region(self, text: str, max_width: int, max_height: int, draw: ImageDraw, region: TextRegion = None, use_as_is: bool = False) -> Tuple[int, List[str]]:
+ """Find optimal font size using comic-translate's pil_word_wrap algorithm with algorithm-aware adjustments
+
+ Args:
+ text: Text to fit
+ max_width: Maximum width available
+ max_height: Maximum height available
+ draw: PIL ImageDraw object
+ region: Optional TextRegion for safe area calculation
+ use_as_is: If True, use max_width/max_height directly without further shrinking
+ """
+
+ # Get font sizing settings
+ try:
+ manga_settings = self.main_gui.config.get('manga_settings', {})
+ font_sizing = manga_settings.get('font_sizing', {})
+ font_algorithm = font_sizing.get('algorithm', 'smart')
+ prefer_larger = font_sizing.get('prefer_larger', True)
+ except Exception:
+ font_algorithm = 'smart'
+ prefer_larger = True
+
+ # Get usable area
+ if use_as_is:
+ # Dimensions are already safe area - use them directly (no double shrinking)
+ usable_width = max_width
+ usable_height = max_height
+ elif region and hasattr(region, 'vertices') and region.vertices:
+ # Calculate safe area from region
+ safe_x, safe_y, safe_width, safe_height = self.get_safe_text_area(region)
+ usable_width = safe_width
+ usable_height = safe_height
+ else:
+ # Fallback: use algorithm-aware margin
+ if font_algorithm == 'conservative':
+ margin = 0.85 # Comic-translate default
+ elif font_algorithm == 'aggressive':
+ margin = 0.95
+ else: # smart
+ margin = 0.87
+ usable_width = int(max_width * margin)
+ usable_height = int(max_height * margin)
+
+ # Font size limits (GUI settings with algorithm adjustments)
+ min_font_size = max(10, self.min_readable_size)
+
+ # Adjust initial font size based on algorithm and prefer_larger
+ base_init = min(40, self.max_font_size_limit)
+ if font_algorithm == 'aggressive' and prefer_larger:
+ # Start higher for aggressive mode
+ init_font_size = min(int(base_init * 1.2), self.max_font_size_limit)
+ elif font_algorithm == 'conservative':
+ # Start lower for conservative mode
+ init_font_size = int(base_init * 0.9)
+ else:
+ init_font_size = base_init
+
+ # Use comic-translate's pil_word_wrap algorithm
+ wrapped_text, final_font_size = self._pil_word_wrap(
+ text=text,
+ font_path=self.selected_font_style or self.font_path,
+ roi_width=usable_width,
+ roi_height=usable_height,
+ init_font_size=init_font_size,
+ min_font_size=min_font_size,
+ draw=draw
+ )
+
+ # Convert wrapped text to lines
+ lines = wrapped_text.split('\n') if wrapped_text else [text]
+
+ # Log font algorithm used (debug)
+ if not getattr(self, 'concise_logs', False):
+ self._log(f" Font algorithm: {font_algorithm}, init_size: {init_font_size}, final_size: {final_font_size}", "debug")
+
+ # Apply multiplier if in multiplier mode
+ if self.font_size_mode == 'multiplier':
+ target_size = int(final_font_size * self.font_size_multiplier)
+
+ # Check if multiplied size still fits (if constrained)
+ if self.constrain_to_bubble:
+ # Re-wrap at target size to check fit
+ test_wrapped, _ = self._pil_word_wrap(
+ text=text,
+ font_path=self.selected_font_style or self.font_path,
+ roi_width=usable_width,
+ roi_height=usable_height,
+ init_font_size=target_size,
+ min_font_size=target_size, # Force this size
+ draw=draw
+ )
+ test_lines = test_wrapped.split('\n') if test_wrapped else [text]
+ test_height = len(test_lines) * target_size * 1.2
+
+ if test_height <= usable_height:
+ final_font_size = target_size
+ lines = test_lines
+ else:
+ self._log(f" Multiplier {self.font_size_multiplier}x would exceed bubble", "debug")
+ else:
+ # Not constrained, use multiplied size
+ final_font_size = target_size
+ lines = wrapped_text.split('\n') if wrapped_text else [text]
+
+ self._log(f" Font sizing: text_len={len(text)}, size={final_font_size}, lines={len(lines)}", "debug")
+
+ return final_font_size, lines
+
+ def _fit_text_simple_topdown(self, text: str, usable_width: int, usable_height: int,
+ draw: ImageDraw, min_size: int, max_size: int) -> Tuple[int, List[str]]:
+ """Simple top-down approach - start large and shrink only if needed"""
+ # Start from a reasonable large size
+ start_size = int(max_size * 0.8)
+
+ for font_size in range(start_size, min_size - 1, -2): # Step by 2 for speed
+ font = self._get_font(font_size)
+ lines = self._wrap_text(text, font, usable_width, draw)
+
+ line_height = font_size * 1.2 # Tighter for overlaps
+ total_height = len(lines) * line_height
+
+ if total_height <= usable_height:
+ return font_size, lines
+
+ # If nothing fits, use minimum
+ font = self._get_font(min_size)
+ lines = self._wrap_text(text, font, usable_width, draw)
+ return min_size, lines
+
+ def _check_potential_overlap(self, region: TextRegion) -> bool:
+ """Check if this region might overlap with others based on position"""
+ if not region or not hasattr(region, 'bounding_box'):
+ return False
+
+ x, y, w, h = region.bounding_box
+
+ # Simple heuristic: small regions or regions at edges might overlap
+ # You can make this smarter based on your needs
+ if w < 100 or h < 50: # Small bubbles often overlap
+ return True
+
+ # Add more overlap detection logic here if needed
+ # For now, default to no overlap for larger bubbles
+ return False
+
+ def _wrap_text(self, text: str, font: ImageFont, max_width: int, draw: ImageDraw) -> List[str]:
+ """Wrap text to fit within max_width with optional strict wrapping"""
+ # Handle empty text
+ if not text.strip():
+ return []
+
+ # Only enforce width check if constrain_to_bubble is enabled
+ if self.constrain_to_bubble and max_width <= 0:
+ self._log(f" ⚠️ Invalid max_width: {max_width}, using fallback", "warning")
+ return [text[:20] + "..."] if len(text) > 20 else [text]
+
+ words = text.split()
+ lines = []
+ current_line = []
+
+ for word in words:
+ # Check if word alone is too long
+ word_bbox = draw.textbbox((0, 0), word, font=font)
+ word_width = word_bbox[2] - word_bbox[0]
+
+ if word_width > max_width and len(word) > 1:
+ # Word is too long for the bubble
+ if current_line:
+ # Save current line first
+ lines.append(' '.join(current_line))
+ current_line = []
+
+ if self.strict_text_wrapping:
+ # STRICT MODE: Force break the word to fit within bubble
+ # This is the original behavior that ensures text stays within bounds
+ broken_parts = self._force_break_word(word, font, max_width, draw)
+ lines.extend(broken_parts)
+ else:
+ # RELAXED MODE: Keep word whole (may exceed bubble)
+ lines.append(word)
+ # self._log(f" ⚠️ Word '{word}' exceeds bubble width, keeping whole", "warning")
+ else:
+ # Normal word processing
+ if current_line:
+ test_line = ' '.join(current_line + [word])
+ else:
+ test_line = word
+
+ text_bbox = draw.textbbox((0, 0), test_line, font=font)
+ text_width = text_bbox[2] - text_bbox[0]
+
+ if text_width <= max_width:
+ current_line.append(word)
+ else:
+ if current_line:
+ lines.append(' '.join(current_line))
+ current_line = [word]
+ else:
+ # Single word that fits
+ lines.append(word)
+
+ if current_line:
+ lines.append(' '.join(current_line))
+
+ return lines
+
+ # Keep the existing _force_break_word method as is (the complete version from earlier):
+ def _force_break_word(self, word: str, font: ImageFont, max_width: int, draw: ImageDraw) -> List[str]:
+ """Force break a word that's too long to fit"""
+ lines = []
+
+ # Binary search to find how many characters fit
+ low = 1
+ high = len(word)
+ chars_that_fit = 1
+
+ while low <= high:
+ mid = (low + high) // 2
+ test_text = word[:mid]
+ bbox = draw.textbbox((0, 0), test_text, font=font)
+ width = bbox[2] - bbox[0]
+
+ if width <= max_width:
+ chars_that_fit = mid
+ low = mid + 1
+ else:
+ high = mid - 1
+
+ # Break the word into pieces
+ remaining = word
+ while remaining:
+ if len(remaining) <= chars_that_fit:
+ # Last piece
+ lines.append(remaining)
+ break
+ else:
+ # Find the best break point
+ break_at = chars_that_fit
+
+ # Try to break at a more natural point if possible
+ # Look for vowel-consonant boundaries for better hyphenation
+ for i in range(min(chars_that_fit, len(remaining) - 1), max(1, chars_that_fit - 5), -1):
+ if i < len(remaining) - 1:
+ current_char = remaining[i].lower()
+ next_char = remaining[i + 1].lower()
+
+ # Good hyphenation points:
+ # - Between consonant and vowel
+ # - After prefix (un-, re-, pre-, etc.)
+ # - Before suffix (-ing, -ed, -er, etc.)
+ if (current_char in 'bcdfghjklmnpqrstvwxyz' and next_char in 'aeiou') or \
+ (current_char in 'aeiou' and next_char in 'bcdfghjklmnpqrstvwxyz'):
+ break_at = i + 1
+ break
+
+ # Add hyphen if we're breaking in the middle of a word
+ if break_at < len(remaining):
+ # Check if adding hyphen still fits
+ test_with_hyphen = remaining[:break_at] + '-'
+ bbox = draw.textbbox((0, 0), test_with_hyphen, font=font)
+ width = bbox[2] - bbox[0]
+
+ if width <= max_width:
+ lines.append(remaining[:break_at] + '-')
+ else:
+ # Hyphen doesn't fit, break without it
+ lines.append(remaining[:break_at])
+ else:
+ lines.append(remaining[:break_at])
+
+ remaining = remaining[break_at:]
+
+ return lines
+
+ def _estimate_font_size_for_region(self, region: TextRegion) -> int:
+ """Estimate the likely font size for a text region based on its dimensions and text content"""
+ x, y, w, h = region.bounding_box
+ text_length = len(region.text.strip())
+
+ if text_length == 0:
+ return self.max_font_size // 2 # Default middle size
+
+ # Calculate area per character
+ area = w * h
+ area_per_char = area / text_length
+
+ # Estimate font size based on area per character
+ # These ratios are approximate and based on typical manga text
+ if area_per_char > 800:
+ estimated_size = int(self.max_font_size * 0.8)
+ elif area_per_char > 400:
+ estimated_size = int(self.max_font_size * 0.6)
+ elif area_per_char > 200:
+ estimated_size = int(self.max_font_size * 0.4)
+ elif area_per_char > 100:
+ estimated_size = int(self.max_font_size * 0.3)
+ else:
+ estimated_size = int(self.max_font_size * 0.2)
+
+ # Clamp to reasonable bounds
+ return max(self.min_font_size, min(estimated_size, self.max_font_size))
+
+
+ def _split_bubble_if_needed(self, bubble_regions: List[TextRegion]) -> List[List[TextRegion]]:
+ """Split a detected bubble if it actually contains multiple separate speech bubbles
+
+ This happens when RT-DETR detects one large bounding box over vertically or
+ horizontally stacked speech bubbles. We detect this by checking if text regions
+ within the bubble have LARGE gaps between them.
+
+ For manga-ocr and other non-Google/Azure OCR providers, RT-DETR detection is trusted
+ completely and splitting is disabled.
+
+ Returns:
+ List of region groups - each group represents a separate bubble
+ """
+ # For manga-ocr and other providers that use RT-DETR regions directly, trust RT-DETR
+ # Splitting is only needed for Google/Azure which do full-page OCR
+ if hasattr(self, 'ocr_provider') and self.ocr_provider not in ('google', 'azure'):
+ return [bubble_regions] # Trust RT-DETR completely for these providers
+
+ if len(bubble_regions) <= 1:
+ return [bubble_regions] # Single region, no splitting needed
+
+ # Sort regions by position (top-to-bottom, left-to-right)
+ sorted_regions = sorted(bubble_regions, key=lambda r: (r.bounding_box[1], r.bounding_box[0]))
+
+ # Group regions that should be together
+ groups = [[sorted_regions[0]]]
+
+ for i in range(1, len(sorted_regions)):
+ current_region = sorted_regions[i]
+ cx, cy, cw, ch = current_region.bounding_box
+ placed = False
+
+ # Try to place in an existing group
+ for group in groups:
+ # Check if current region should be in this group
+ # We look at the closest region in the group
+ min_gap = float('inf')
+ min_vertical_gap = float('inf')
+ min_horizontal_gap = float('inf')
+ closest_region = None
+
+ for group_region in group:
+ gx, gy, gw, gh = group_region.bounding_box
+
+ # Calculate gap between regions
+ horizontal_gap = 0
+ if gx + gw < cx:
+ horizontal_gap = cx - (gx + gw)
+ elif cx + cw < gx:
+ horizontal_gap = gx - (cx + cw)
+
+ vertical_gap = 0
+ if gy + gh < cy:
+ vertical_gap = cy - (gy + gh)
+ elif cy + ch < gy:
+ vertical_gap = gy - (cy + ch)
+
+ # Use Euclidean distance as overall gap measure
+ gap = (horizontal_gap ** 2 + vertical_gap ** 2) ** 0.5
+
+ if gap < min_gap:
+ min_gap = gap
+ closest_region = group_region
+ # Store individual gaps for aggressive vertical splitting
+ min_vertical_gap = vertical_gap
+ min_horizontal_gap = horizontal_gap
+
+ # AGGRESSIVE SPLIT for MANGA: Check for large vertical gaps first
+ # Manga often has vertically stacked speech bubbles that RT-DETR detects as one
+ if closest_region and min_vertical_gap > 50:
+ # Large vertical gap (>50px) - likely separate bubbles stacked vertically
+ # Check if there's NO vertical overlap (completely separate)
+ gx, gy, gw, gh = closest_region.bounding_box
+ vertical_overlap = min(gy + gh, cy + ch) - max(gy, cy)
+
+ if vertical_overlap <= 0:
+ # No vertical overlap at all - definitely separate bubbles
+ # Create new group (don't merge)
+ pass # Will create new group below
+ else:
+ # Some overlap despite gap - check other criteria
+ horizontal_overlap = min(gx + gw, cx + cw) - max(gx, cx)
+ min_width = min(gw, cw)
+ min_height = min(gh, ch)
+
+ # Only merge if there's very strong overlap (>75%)
+ if (horizontal_overlap > min_width * 0.75 or
+ vertical_overlap > min_height * 0.75):
+ group.append(current_region)
+ placed = True
+ break
+ # BALANCED SPLIT CRITERIA:
+ # Split if gap is > 21px unless there's strong overlap (>62%)
+ elif closest_region and min_gap < 21: # Within 21px - likely same bubble
+ group.append(current_region)
+ placed = True
+ break
+ elif closest_region:
+ # Check if they have significant overlap despite the gap
+ gx, gy, gw, gh = closest_region.bounding_box
+
+ horizontal_overlap = min(gx + gw, cx + cw) - max(gx, cx)
+ vertical_overlap = min(gy + gh, cy + ch) - max(gy, cy)
+
+ min_width = min(gw, cw)
+ min_height = min(gh, ch)
+
+ # If they have strong overlap (>62%) in either direction, keep together
+ if (horizontal_overlap > min_width * 0.62 or
+ vertical_overlap > min_height * 0.62):
+ group.append(current_region)
+ placed = True
+ break
+
+ # If not placed in any existing group, create a new group
+ if not placed:
+ groups.append([current_region])
+
+ # Log if we split the bubble
+ if len(groups) > 1:
+ self._log(f" 🔪 SPLIT: Detected bubble actually contains {len(groups)} separate bubbles", "warning")
+ for idx, group in enumerate(groups):
+ group_texts = [r.text[:15] + '...' for r in group]
+ self._log(f" Sub-bubble {idx + 1}: {len(group)} regions - {group_texts}", "info")
+
+ return groups
+
+ def _likely_different_bubbles(self, region1: TextRegion, region2: TextRegion) -> bool:
+ """Detect if regions are likely in different speech bubbles based on spatial patterns"""
+ x1, y1, w1, h1 = region1.bounding_box
+ x2, y2, w2, h2 = region2.bounding_box
+
+ # Calculate gaps and positions
+ horizontal_gap = 0
+ if x1 + w1 < x2:
+ horizontal_gap = x2 - (x1 + w1)
+ elif x2 + w2 < x1:
+ horizontal_gap = x1 - (x2 + w2)
+
+ vertical_gap = 0
+ if y1 + h1 < y2:
+ vertical_gap = y2 - (y1 + h1)
+ elif y2 + h2 < y1:
+ vertical_gap = y1 - (y2 + h2)
+
+ # Calculate relative positions
+ center_x1 = x1 + w1 / 2
+ center_x2 = x2 + w2 / 2
+ center_y1 = y1 + h1 / 2
+ center_y2 = y2 + h2 / 2
+
+ horizontal_center_diff = abs(center_x1 - center_x2)
+ avg_width = (w1 + w2) / 2
+
+ # FIRST CHECK: Very small gaps always indicate same bubble
+ if horizontal_gap < 15 and vertical_gap < 15:
+ return False # Definitely same bubble
+
+ # STRICTER CHECK: For regions that are horizontally far apart
+ # Even if they pass the gap threshold, check if they're likely different bubbles
+ if horizontal_gap > 40: # Significant horizontal gap
+ # Unless they're VERY well aligned vertically, they're different bubbles
+ vertical_overlap = min(y1 + h1, y2 + h2) - max(y1, y2)
+ min_height = min(h1, h2)
+
+ if vertical_overlap < min_height * 0.8: # Need 80% overlap to be same bubble
+ return True
+
+ # SPECIFIC FIX: Check for multi-line text pattern
+ # If regions are well-aligned horizontally, they're likely in the same bubble
+ if horizontal_center_diff < avg_width * 0.35: # Relaxed from 0.2 to 0.35
+ # Additional checks for multi-line text:
+ # 1. Similar widths (common in speech bubbles)
+ width_ratio = max(w1, w2) / min(w1, w2) if min(w1, w2) > 0 else 999
+
+ # 2. Reasonable vertical spacing (not too far apart)
+ avg_height = (h1 + h2) / 2
+
+ if width_ratio < 2.0 and vertical_gap < avg_height * 1.5:
+ # This is very likely multi-line text in the same bubble
+ return False
+
+ # Pattern 1: Side-by-side bubbles (common in manga)
+ # Characteristics: Significant horizontal gap, similar vertical position
+ if horizontal_gap > 50: # Increased from 25 to avoid false positives
+ vertical_overlap = min(y1 + h1, y2 + h2) - max(y1, y2)
+ min_height = min(h1, h2)
+
+ # If they have good vertical overlap, they're likely side-by-side bubbles
+ if vertical_overlap > min_height * 0.5:
+ return True
+
+ # Pattern 2: Stacked bubbles
+ # Characteristics: Significant vertical gap, similar horizontal position
+ # CRITICAL: Lower threshold to catch vertically stacked bubbles in manga
+ if vertical_gap > 15: # Reduced from 25 to catch closer stacked bubbles
+ horizontal_overlap = min(x1 + w1, x2 + w2) - max(x1, x2)
+ min_width = min(w1, w2)
+
+ # If they have good horizontal overlap, they're likely stacked bubbles
+ if horizontal_overlap > min_width * 0.5:
+ return True
+
+ # Pattern 3: Diagonal arrangement (different speakers)
+ # If regions are separated both horizontally and vertically
+ if horizontal_gap > 20 and vertical_gap > 20:
+ return True
+
+ # Pattern 4: Large gap relative to region size
+ avg_height = (h1 + h2) / 2
+
+ if horizontal_gap > avg_width * 0.6 or vertical_gap > avg_height * 0.6:
+ return True
+
+ return False
+
+ def _regions_should_merge(self, region1: TextRegion, region2: TextRegion, threshold: int = 50) -> bool:
+ """Determine if two regions should be merged - with bubble detection"""
+
+ # First check if they're close enough spatially
+ if not self._regions_are_nearby(region1, region2, threshold):
+ return False
+
+ x1, y1, w1, h1 = region1.bounding_box
+ x2, y2, w2, h2 = region2.bounding_box
+
+ # ONLY apply special handling if regions are from Azure
+ if hasattr(region1, 'from_azure') and region1.from_azure:
+ # Azure lines are typically small - be more lenient
+ avg_height = (h1 + h2) / 2
+ if avg_height < 50: # Likely single lines
+ self._log(f" Azure lines detected, using lenient merge criteria", "info")
+
+ center_x1 = x1 + w1 / 2
+ center_x2 = x2 + w2 / 2
+ horizontal_center_diff = abs(center_x1 - center_x2)
+ avg_width = (w1 + w2) / 2
+
+ # If horizontally aligned and nearby, merge them
+ if horizontal_center_diff < avg_width * 0.7:
+ return True
+
+ # GOOGLE LOGIC - unchanged from your original
+ # SPECIAL CASE: If one region is very small, bypass strict checks
+ area1 = w1 * h1
+ area2 = w2 * h2
+ if area1 < 500 or area2 < 500:
+ self._log(f" Small text region (area: {min(area1, area2)}), bypassing strict alignment checks", "info")
+ return True
+
+ # Calculate actual gaps between regions
+ horizontal_gap = 0
+ if x1 + w1 < x2:
+ horizontal_gap = x2 - (x1 + w1)
+ elif x2 + w2 < x1:
+ horizontal_gap = x1 - (x2 + w2)
+
+ vertical_gap = 0
+ if y1 + h1 < y2:
+ vertical_gap = y2 - (y1 + h1)
+ elif y2 + h2 < y1:
+ vertical_gap = y1 - (y2 + h2)
+
+ # Calculate centers for alignment checks
+ center_x1 = x1 + w1 / 2
+ center_x2 = x2 + w2 / 2
+ center_y1 = y1 + h1 / 2
+ center_y2 = y2 + h2 / 2
+
+ horizontal_center_diff = abs(center_x1 - center_x2)
+ vertical_center_diff = abs(center_y1 - center_y2)
+
+ avg_width = (w1 + w2) / 2
+ avg_height = (h1 + h2) / 2
+
+ # Determine text orientation and layout
+ is_horizontal_text = horizontal_gap > vertical_gap or (horizontal_center_diff < avg_width * 0.5)
+ is_vertical_text = vertical_gap > horizontal_gap or (vertical_center_diff < avg_height * 0.5)
+
+ # PRELIMINARY CHECK: If regions overlap or are extremely close, merge them
+ # This handles text that's clearly in the same bubble
+
+ # Check for overlap
+ overlap_x = max(0, min(x1 + w1, x2 + w2) - max(x1, x2))
+ overlap_y = max(0, min(y1 + h1, y2 + h2) - max(y1, y2))
+ has_overlap = overlap_x > 0 and overlap_y > 0
+
+ if has_overlap:
+ self._log(f" Regions overlap - definitely same bubble, merging", "info")
+ return True
+
+ # If gaps are tiny (< 10 pixels), merge regardless of other factors
+ if horizontal_gap < 10 and vertical_gap < 10:
+ self._log(f" Very small gaps ({horizontal_gap}, {vertical_gap}) - merging", "info")
+ return True
+
+ # BUBBLE BOUNDARY CHECK: Use spatial patterns to detect different bubbles
+ # But be less aggressive if gaps are small
+ # CRITICAL: Reduced threshold to allow bubble boundary detection for stacked bubbles
+ if horizontal_gap < 12 and vertical_gap < 12:
+ # Very close regions are almost certainly in the same bubble
+ self._log(f" Regions very close, skipping bubble boundary check", "info")
+ elif self._likely_different_bubbles(region1, region2):
+ self._log(f" Regions likely in different speech bubbles", "info")
+ return False
+
+ # CHECK 1: For well-aligned text with small gaps, merge immediately
+ # This catches multi-line text in the same bubble
+ if is_horizontal_text and vertical_center_diff < avg_height * 0.4:
+ # Horizontal text that's well-aligned vertically
+ if horizontal_gap <= threshold and vertical_gap <= threshold * 0.5:
+ self._log(f" Well-aligned horizontal text with acceptable gaps, merging", "info")
+ return True
+
+ if is_vertical_text and horizontal_center_diff < avg_width * 0.4:
+ # Vertical text that's well-aligned horizontally
+ if vertical_gap <= threshold and horizontal_gap <= threshold * 0.5:
+ self._log(f" Well-aligned vertical text with acceptable gaps, merging", "info")
+ return True
+
+ # ADDITIONAL CHECK: Multi-line text in speech bubbles
+ # Even if not perfectly aligned, check for typical multi-line patterns
+ if horizontal_center_diff < avg_width * 0.5 and vertical_gap <= threshold:
+ # Lines that are reasonably centered and within threshold should merge
+ self._log(f" Multi-line text pattern detected, merging", "info")
+ return True
+
+ # CHECK 2: Check alignment quality
+ # Poor alignment often indicates different bubbles
+ if is_horizontal_text:
+ # For horizontal text, check vertical alignment
+ if vertical_center_diff > avg_height * 0.6:
+ self._log(f" Poor vertical alignment for horizontal text", "info")
+ return False
+ elif is_vertical_text:
+ # For vertical text, check horizontal alignment
+ if horizontal_center_diff > avg_width * 0.6:
+ self._log(f" Poor horizontal alignment for vertical text", "info")
+ return False
+
+ # CHECK 3: Font size check (but be reasonable)
+ font_size1 = self._estimate_font_size_for_region(region1)
+ font_size2 = self._estimate_font_size_for_region(region2)
+ size_ratio = max(font_size1, font_size2) / max(min(font_size1, font_size2), 1)
+
+ # Allow some variation for emphasis or stylistic choices
+ if size_ratio > 2.0:
+ self._log(f" Font sizes too different ({font_size1} vs {font_size2})", "info")
+ return False
+
+ # CHECK 4: Final sanity check on merged area
+ merged_width = max(x1 + w1, x2 + w2) - min(x1, x2)
+ merged_height = max(y1 + h1, y2 + h2) - min(y1, y2)
+ merged_area = merged_width * merged_height
+ combined_area = (w1 * h1) + (w2 * h2)
+
+ # If merged area is way larger than combined areas, they're probably far apart
+ if merged_area > combined_area * 2.5:
+ self._log(f" Merged area indicates regions are too far apart", "info")
+ return False
+
+ # If we get here, apply standard threshold checks
+ if horizontal_gap <= threshold and vertical_gap <= threshold:
+ self._log(f" Standard threshold check passed, merging", "info")
+ return True
+
+ self._log(f" No merge conditions met", "info")
+ return False
+
+ def _merge_nearby_regions(self, regions: List[TextRegion], threshold: int = 50) -> List[TextRegion]:
+ """Merge text regions that are likely part of the same speech bubble - with debug logging"""
+ if len(regions) <= 1:
+ return regions
+
+ self._log(f"\n=== MERGE DEBUG: Starting merge analysis ===", "info")
+ self._log(f" Total regions: {len(regions)}", "info")
+ self._log(f" Threshold: {threshold}px", "info")
+
+ # First, let's log what regions we have
+ for i, region in enumerate(regions):
+ x, y, w, h = region.bounding_box
+ self._log(f" Region {i}: pos({x},{y}) size({w}x{h}) text='{region.text[:20]}...'", "info")
+
+ # Sort regions by area (largest first) to handle contained regions properly
+ sorted_indices = sorted(range(len(regions)),
+ key=lambda i: regions[i].bounding_box[2] * regions[i].bounding_box[3],
+ reverse=True)
+
+ merged = []
+ used = set()
+
+ # Process each region in order of size (largest first)
+ for idx in sorted_indices:
+ i = idx
+ if i in used:
+ continue
+
+ region1 = regions[i]
+
+ # Start with this region
+ merged_text = region1.text
+ merged_vertices = list(region1.vertices) if hasattr(region1, 'vertices') else []
+ regions_merged = [i] # Track which regions were merged
+
+ self._log(f"\n Checking region {i} for merges:", "info")
+
+ # Check against all other unused regions
+ for j in range(len(regions)):
+ if j == i or j in used:
+ continue
+
+ region2 = regions[j]
+ self._log(f" Testing merge with region {j}:", "info")
+
+ # Check if region2 is contained within region1
+ x1, y1, w1, h1 = region1.bounding_box
+ x2, y2, w2, h2 = region2.bounding_box
+
+ # Check if region2 is fully contained within region1
+ if (x2 >= x1 and y2 >= y1 and
+ x2 + w2 <= x1 + w1 and y2 + h2 <= y1 + h1):
+ self._log(f" ✓ Region {j} is INSIDE region {i} - merging!", "success")
+ merged_text += " " + region2.text
+ if hasattr(region2, 'vertices'):
+ merged_vertices.extend(region2.vertices)
+ used.add(j)
+ regions_merged.append(j)
+ continue
+
+ # Check if region1 is contained within region2 (shouldn't happen due to sorting, but be safe)
+ if (x1 >= x2 and y1 >= y2 and
+ x1 + w1 <= x2 + w2 and y1 + h1 <= y2 + h2):
+ self._log(f" ✓ Region {i} is INSIDE region {j} - merging!", "success")
+ merged_text += " " + region2.text
+ if hasattr(region2, 'vertices'):
+ merged_vertices.extend(region2.vertices)
+ used.add(j)
+ regions_merged.append(j)
+ # Update region1's bounding box to the larger region
+ region1 = TextRegion(
+ text=merged_text,
+ vertices=merged_vertices,
+ bounding_box=region2.bounding_box,
+ confidence=region1.confidence,
+ region_type='temp_merge'
+ )
+ continue
+
+ # FIX: Always check proximity against ORIGINAL regions, not the expanded one
+ # This prevents cascade merging across bubble boundaries
+ if self._regions_are_nearby(regions[i], region2, threshold): # Use regions[i] not region1
+ #self._log(f" ✓ Regions are nearby", "info")
+
+ # Then check if they should merge (also use original region)
+ if self._regions_should_merge(regions[i], region2, threshold): # Use regions[i] not region1
+ #self._log(f" ✓ Regions should merge!", "success")
+
+ # Actually perform the merge
+ merged_text += " " + region2.text
+ if hasattr(region2, 'vertices'):
+ merged_vertices.extend(region2.vertices)
+ used.add(j)
+ regions_merged.append(j)
+
+ # DON'T update region1 for proximity checks - keep using original regions
+ else:
+ self._log(f" ✗ Regions should not merge", "warning")
+ else:
+ self._log(f" ✗ Regions not nearby", "warning")
+
+ # Log if we merged multiple regions
+ if len(regions_merged) > 1:
+ self._log(f" ✅ MERGED regions {regions_merged} into one bubble", "success")
+ else:
+ self._log(f" ℹ️ Region {i} not merged with any other", "info")
+
+ # Create final merged region with all the merged vertices
+ if merged_vertices:
+ xs = [v[0] for v in merged_vertices]
+ ys = [v[1] for v in merged_vertices]
+ else:
+ # Fallback: calculate from all merged regions
+ all_xs = []
+ all_ys = []
+ for idx in regions_merged:
+ x, y, w, h = regions[idx].bounding_box
+ all_xs.extend([x, x + w])
+ all_ys.extend([y, y + h])
+ xs = all_xs
+ ys = all_ys
+
+ min_x, max_x = min(xs), max(xs)
+ min_y, max_y = min(ys), max(ys)
+ merged_bbox = (min_x, min_y, max_x - min_x, max_y - min_y)
+
+ merged_region = TextRegion(
+ text=merged_text,
+ vertices=merged_vertices,
+ bounding_box=merged_bbox,
+ confidence=regions[i].confidence,
+ region_type='merged_text_block' if len(regions_merged) > 1 else regions[i].region_type
+ )
+
+ # Copy over any additional attributes
+ if hasattr(regions[i], 'translated_text'):
+ merged_region.translated_text = regions[i].translated_text
+
+ merged.append(merged_region)
+ used.add(i)
+
+ self._log(f"\n=== MERGE DEBUG: Complete ===", "info")
+ self._log(f" Final region count: {len(merged)} (was {len(regions)})", "info")
+
+ # Verify the merge worked
+ if len(merged) == len(regions):
+ self._log(f" ⚠️ WARNING: No regions were actually merged!", "warning")
+
+ return merged
+
+ def _regions_are_nearby(self, region1: TextRegion, region2: TextRegion, threshold: int = 50) -> bool:
+ """Check if two regions are close enough to be in the same bubble - WITH DEBUG"""
+ x1, y1, w1, h1 = region1.bounding_box
+ x2, y2, w2, h2 = region2.bounding_box
+
+ #self._log(f"\n === NEARBY CHECK DEBUG ===", "info")
+ #self._log(f" Region 1: pos({x1},{y1}) size({w1}x{h1})", "info")
+ #self._log(f" Region 2: pos({x2},{y2}) size({w2}x{h2})", "info")
+ #self._log(f" Threshold: {threshold}", "info")
+
+ # Calculate gaps between closest edges
+ horizontal_gap = 0
+ if x1 + w1 < x2: # region1 is to the left
+ horizontal_gap = x2 - (x1 + w1)
+ elif x2 + w2 < x1: # region2 is to the left
+ horizontal_gap = x1 - (x2 + w2)
+
+ vertical_gap = 0
+ if y1 + h1 < y2: # region1 is above
+ vertical_gap = y2 - (y1 + h1)
+ elif y2 + h2 < y1: # region2 is above
+ vertical_gap = y1 - (y2 + h2)
+
+ #self._log(f" Horizontal gap: {horizontal_gap}", "info")
+ #self._log(f" Vertical gap: {vertical_gap}", "info")
+
+ # Detect if regions are likely vertical text based on aspect ratio
+ aspect1 = w1 / max(h1, 1)
+ aspect2 = w2 / max(h2, 1)
+
+ # More permissive vertical text detection
+ # Vertical text typically has aspect ratio < 1.0 (taller than wide)
+ is_vertical_text = (aspect1 < 1.0 and aspect2 < 1.0) or (aspect1 < 0.5 or aspect2 < 0.5)
+
+ # Also check if text is arranged vertically (one above the other with minimal horizontal offset)
+ center_x1 = x1 + w1 / 2
+ center_x2 = x2 + w2 / 2
+ horizontal_center_diff = abs(center_x1 - center_x2)
+ avg_width = (w1 + w2) / 2
+
+ # If regions are vertically stacked with aligned centers, treat as vertical text
+ is_vertically_stacked = (horizontal_center_diff < avg_width * 1.5) and (vertical_gap >= 0)
+
+ #self._log(f" Is vertical text: {is_vertical_text}", "info")
+ #self._log(f" Is vertically stacked: {is_vertically_stacked}", "info")
+ #self._log(f" Horizontal center diff: {horizontal_center_diff:.1f}", "info")
+
+ # SIMPLE APPROACH: Just check if gaps are within threshold
+ # Don't overthink it
+ if horizontal_gap <= threshold and vertical_gap <= threshold:
+ #self._log(f" ✅ NEARBY: Both gaps within threshold", "success")
+ return True
+
+ # SPECIAL CASE: Vertically stacked text with good alignment
+ # This is specifically for multi-line text in bubbles
+ if horizontal_center_diff < avg_width * 0.8 and vertical_gap <= threshold * 1.5:
+ #self._log(f" ✅ NEARBY: Vertically aligned text in same bubble", "success")
+ return True
+
+ # If one gap is small and the other is slightly over, still consider nearby
+ if (horizontal_gap <= threshold * 0.5 and vertical_gap <= threshold * 1.5) or \
+ (vertical_gap <= threshold * 0.5 and horizontal_gap <= threshold * 1.5):
+ #self._log(f" ✅ NEARBY: One small gap, other slightly over", "success")
+ return True
+
+ # Special case: Wide bubbles with text on sides
+ # If regions are at nearly the same vertical position, they might be in a wide bubble
+ if abs(y1 - y2) < 10: # Nearly same vertical position
+ # Check if this could be a wide bubble spanning both regions
+ if horizontal_gap <= threshold * 3: # Allow up to 3x threshold for wide bubbles
+ #self._log(f" ✅ NEARBY: Same vertical level, possibly wide bubble", "success")
+ return True
+
+ #self._log(f" ❌ NOT NEARBY: Gaps exceed threshold", "warning")
+ return False
+
+ def _find_font(self) -> str:
+ """Find a suitable font for text rendering"""
+ font_candidates = [
+ "C:/Windows/Fonts/comicbd.ttf", # Comic Sans MS Bold as first choice
+ "C:/Windows/Fonts/arial.ttf",
+ "C:/Windows/Fonts/calibri.ttf",
+ "C:/Windows/Fonts/tahoma.ttf",
+ "/System/Library/Fonts/Helvetica.ttc",
+ "/usr/share/fonts/truetype/liberation/LiberationSans-Regular.ttf",
+ "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf"
+ ]
+
+ for font_path in font_candidates:
+ if os.path.exists(font_path):
+ return font_path
+
+ return None # Will use default font
+
+ def _get_singleton_bubble_detector(self):
+ """Get or initialize the singleton bubble detector instance with load coordination."""
+ start_time = None
+ with MangaTranslator._singleton_lock:
+ if MangaTranslator._singleton_bubble_detector is not None:
+ self._log("🤖 Using bubble detector (already loaded)", "info")
+ MangaTranslator._singleton_refs += 1
+ return MangaTranslator._singleton_bubble_detector
+ # If another thread is loading, wait for it
+ if MangaTranslator._singleton_bd_loading:
+ self._log("⏳ Waiting for bubble detector to finish loading (singleton)", "debug")
+ evt = MangaTranslator._singleton_bd_event
+ # Drop the lock while waiting
+ pass
+ else:
+ # Mark as loading and proceed to load outside lock
+ MangaTranslator._singleton_bd_loading = True
+ MangaTranslator._singleton_bd_event.clear()
+ start_time = time.time()
+ # Release lock and perform heavy load
+ pass
+ # Outside the lock: perform load or wait
+ if start_time is None:
+ # We are a waiter
+ try:
+ MangaTranslator._singleton_bd_event.wait(timeout=300)
+ except Exception:
+ pass
+ with MangaTranslator._singleton_lock:
+ if MangaTranslator._singleton_bubble_detector is not None:
+ MangaTranslator._singleton_refs += 1
+ return MangaTranslator._singleton_bubble_detector
+ else:
+ # We are the loader
+ try:
+ from bubble_detector import BubbleDetector
+ bd = None
+
+ # First, try to get a preloaded detector from the pool
+ try:
+ ocr_settings = self.main_gui.config.get('manga_settings', {}).get('ocr', {}) if hasattr(self, 'main_gui') else {}
+ det_type = ocr_settings.get('detector_type', 'rtdetr_onnx')
+ model_id = ocr_settings.get('rtdetr_model_url') or ocr_settings.get('bubble_model_path') or ''
+ key = (det_type, model_id)
+ self._log(f"[DEBUG] Looking for detector in pool with key: {key}", "debug")
+ with MangaTranslator._detector_pool_lock:
+ self._log(f"[DEBUG] Pool keys available: {list(MangaTranslator._detector_pool.keys())}", "debug")
+ rec = MangaTranslator._detector_pool.get(key)
+ if rec and isinstance(rec, dict):
+ spares = rec.get('spares') or []
+ self._log(f"[DEBUG] Found pool record with {len(spares)} spares", "debug")
+ # For singleton mode, we can use a pool instance without checking it out
+ # since the singleton will keep it loaded permanently
+ if spares:
+ # Just use the first spare (don't pop or check out)
+ # Singleton will keep it loaded, pool can still track it
+ bd = spares[0]
+ self._log(f"🤖 Using pool bubble detector for singleton (no check-out needed)", "info")
+ else:
+ self._log(f"[DEBUG] No pool record found for key: {key}", "debug")
+ except Exception as e:
+ self._log(f"Could not fetch preloaded detector: {e}", "debug")
+
+ # If no preloaded detector, create a new one
+ if bd is None:
+ bd = BubbleDetector()
+ self._log("🤖 Created new bubble detector instance", "info")
+
+ # Optionally: defer model load until first actual call inside BD; keeping instance resident
+ with MangaTranslator._singleton_lock:
+ MangaTranslator._singleton_bubble_detector = bd
+ MangaTranslator._singleton_refs += 1
+ MangaTranslator._singleton_bd_loading = False
+ try:
+ MangaTranslator._singleton_bd_event.set()
+ except Exception:
+ pass
+ elapsed = time.time() - start_time
+ self._log(f"🤖 Singleton bubble detector ready (took {elapsed:.2f}s)", "info")
+ return bd
+ except Exception as e:
+ with MangaTranslator._singleton_lock:
+ MangaTranslator._singleton_bd_loading = False
+ try:
+ MangaTranslator._singleton_bd_event.set()
+ except Exception:
+ pass
+ self._log(f"Failed to create singleton bubble detector: {e}", "error")
+ return None
+
+ def _initialize_singleton_local_inpainter(self):
+ """Initialize singleton local inpainter instance"""
+ with MangaTranslator._singleton_lock:
+ was_existing = MangaTranslator._singleton_local_inpainter is not None
+ if MangaTranslator._singleton_local_inpainter is None:
+ try:
+ from local_inpainter import LocalInpainter
+ local_method = self.manga_settings.get('inpainting', {}).get('local_method', 'anime')
+ # LocalInpainter only accepts config_path, not method
+ MangaTranslator._singleton_local_inpainter = LocalInpainter()
+ # Now load the model with the specified method
+ if local_method:
+ # Try to load the model
+ model_path = self.manga_settings.get('inpainting', {}).get('local_model_path')
+ if not model_path:
+ # Try to download if no path specified
+ try:
+ model_path = MangaTranslator._singleton_local_inpainter.download_jit_model(local_method)
+ except Exception as e:
+ self._log(f"⚠️ Failed to download model for {local_method}: {e}", "warning")
+
+ if model_path and os.path.exists(model_path):
+ success = MangaTranslator._singleton_local_inpainter.load_model_with_retry(local_method, model_path)
+ if success:
+ self._log(f"🎨 Created singleton local inpainter with {local_method} model", "info")
+ else:
+ self._log(f"⚠️ Failed to load {local_method} model", "warning")
+ else:
+ self._log(f"🎨 Created singleton local inpainter (no model loaded yet)", "info")
+ else:
+ self._log(f"🎨 Created singleton local inpainter (default)", "info")
+ except Exception as e:
+ self._log(f"Failed to create singleton local inpainter: {e}", "error")
+ return
+ # Use the singleton instance
+ self.local_inpainter = MangaTranslator._singleton_local_inpainter
+ self.inpainter = self.local_inpainter
+ MangaTranslator._singleton_refs += 1
+ if was_existing:
+ self._log("🎨 Using local inpainter (already loaded)", "info")
+
+ def _get_thread_bubble_detector(self):
+ """Get or initialize bubble detector (singleton or thread-local based on settings).
+ Will consume a preloaded detector if available for current settings.
+ """
+ if getattr(self, 'use_singleton_bubble_detector', False) or (hasattr(self, 'use_singleton_models') and self.use_singleton_models):
+ # Use singleton instance (preferred)
+ if self.bubble_detector is None:
+ self.bubble_detector = self._get_singleton_bubble_detector()
+ return self.bubble_detector
+ else:
+ # Use thread-local instance (original behavior for parallel processing)
+ if not hasattr(self, '_thread_local') or getattr(self, '_thread_local', None) is None:
+ self._thread_local = threading.local()
+ if not hasattr(self._thread_local, 'bubble_detector') or self._thread_local.bubble_detector is None:
+ from bubble_detector import BubbleDetector
+ # Try to check out a preloaded spare for the current detector settings
+ try:
+ ocr_settings = self.main_gui.config.get('manga_settings', {}).get('ocr', {}) if hasattr(self, 'main_gui') else {}
+ det_type = ocr_settings.get('detector_type', 'rtdetr_onnx')
+ model_id = ocr_settings.get('rtdetr_model_url') or ocr_settings.get('bubble_model_path') or ''
+ key = (det_type, model_id)
+ with MangaTranslator._detector_pool_lock:
+ rec = MangaTranslator._detector_pool.get(key)
+ if rec and isinstance(rec, dict):
+ spares = rec.get('spares') or []
+ # Initialize checked_out list if it doesn't exist
+ if 'checked_out' not in rec:
+ rec['checked_out'] = []
+ checked_out = rec['checked_out']
+
+ # Look for an available spare (not checked out)
+ if spares:
+ for spare in spares:
+ if spare not in checked_out and spare:
+ # Check out this spare instance
+ checked_out.append(spare)
+ self._thread_local.bubble_detector = spare
+ # Store references for later return
+ self._checked_out_bubble_detector = spare
+ self._bubble_detector_pool_key = key
+ self._log(f"🤖 Checked out bubble detector from pool ({len(checked_out)}/{len(spares)} in use)", "info")
+ break
+ except Exception:
+ pass
+ # If still not set, create a fresh detector and store it for future use
+ if not hasattr(self._thread_local, 'bubble_detector') or self._thread_local.bubble_detector is None:
+ self._thread_local.bubble_detector = BubbleDetector()
+ self._log("🤖 Created thread-local bubble detector", "debug")
+
+ # Store this new detector in the pool for future reuse
+ try:
+ with MangaTranslator._detector_pool_lock:
+ if key not in MangaTranslator._detector_pool:
+ MangaTranslator._detector_pool[key] = {'spares': [], 'checked_out': []}
+ # Add this new detector to spares and immediately check it out
+ rec = MangaTranslator._detector_pool[key]
+ if 'spares' not in rec:
+ rec['spares'] = []
+ if 'checked_out' not in rec:
+ rec['checked_out'] = []
+ rec['spares'].append(self._thread_local.bubble_detector)
+ rec['checked_out'].append(self._thread_local.bubble_detector)
+ # Store references for later return
+ self._checked_out_bubble_detector = self._thread_local.bubble_detector
+ self._bubble_detector_pool_key = key
+ except Exception:
+ pass
+ return self._thread_local.bubble_detector
+
+ def _get_thread_local_inpainter(self, local_method: str, model_path: str):
+ """Get or create a LocalInpainter (singleton or thread-local based on settings).
+ Loads the requested model if needed.
+ """
+ if hasattr(self, 'use_singleton_models') and self.use_singleton_models:
+ # Use singleton instance
+ if self.local_inpainter is None:
+ self._initialize_singleton_local_inpainter()
+ return self.local_inpainter
+
+ # Use thread-local instance (original behavior for parallel processing)
+ # Ensure thread-local storage exists and has a dict
+ tl = getattr(self, '_thread_local', None)
+ if tl is None:
+ self._thread_local = threading.local()
+ tl = self._thread_local
+ if not hasattr(tl, 'local_inpainters') or getattr(tl, 'local_inpainters', None) is None:
+ tl.local_inpainters = {}
+ key = (local_method or 'anime', model_path or '')
+ if key not in tl.local_inpainters or tl.local_inpainters[key] is None:
+ # First, try to use a preloaded spare instance from the shared pool
+ try:
+ rec = MangaTranslator._inpaint_pool.get(key)
+ if rec and isinstance(rec, dict):
+ spares = rec.get('spares') or []
+ if spares:
+ tl.local_inpainters[key] = spares.pop(0)
+ self._log("🎨 Using preloaded local inpainting instance", "info")
+ return tl.local_inpainters[key]
+ # If there's a fully loaded shared instance but no spares, use it as a last resort
+ if rec.get('loaded') and rec.get('inpainter') is not None:
+ tl.local_inpainters[key] = rec.get('inpainter')
+ self._log("🎨 Using shared preloaded inpainting instance", "info")
+ return tl.local_inpainters[key]
+ except Exception:
+ pass
+
+ # No preloaded instance available: create and load thread-local instance
+ try:
+ from local_inpainter import LocalInpainter
+ # Use a per-thread config path to avoid concurrent JSON writes
+ try:
+ import tempfile
+ thread_cfg = os.path.join(tempfile.gettempdir(), f"gl_inpainter_{threading.get_ident()}.json")
+ except Exception:
+ thread_cfg = "config_thread_local.json"
+ inp = LocalInpainter(config_path=thread_cfg)
+ # Apply tiling settings
+ tiling_settings = self.manga_settings.get('tiling', {}) if hasattr(self, 'manga_settings') else {}
+ inp.tiling_enabled = tiling_settings.get('enabled', False)
+ inp.tile_size = tiling_settings.get('tile_size', 512)
+ inp.tile_overlap = tiling_settings.get('tile_overlap', 64)
+
+ # Ensure model is available
+ resolved_model_path = model_path
+ if not resolved_model_path or not os.path.exists(resolved_model_path):
+ try:
+ resolved_model_path = inp.download_jit_model(local_method)
+ except Exception as e:
+ self._log(f"⚠️ JIT model download failed for {local_method}: {e}", "warning")
+ resolved_model_path = None
+
+ # Load model for this thread's instance
+ if resolved_model_path and os.path.exists(resolved_model_path):
+ try:
+ self._log(f"📥 Loading {local_method} inpainting model (thread-local)", "info")
+ inp.load_model_with_retry(local_method, resolved_model_path, force_reload=False)
+ except Exception as e:
+ self._log(f"⚠️ Thread-local inpainter load error: {e}", "warning")
+ else:
+ self._log("⚠️ No model path available for thread-local inpainter", "warning")
+
+ # Re-check thread-local and publish ONLY if model loaded successfully
+ tl2 = getattr(self, '_thread_local', None)
+ if tl2 is None:
+ self._thread_local = threading.local()
+ tl2 = self._thread_local
+ if not hasattr(tl2, 'local_inpainters') or getattr(tl2, 'local_inpainters', None) is None:
+ tl2.local_inpainters = {}
+ if getattr(inp, 'model_loaded', False):
+ tl2.local_inpainters[key] = inp
+
+ # Store this loaded instance info in the pool for future reuse
+ try:
+ with MangaTranslator._inpaint_pool_lock:
+ if key not in MangaTranslator._inpaint_pool:
+ MangaTranslator._inpaint_pool[key] = {'inpainter': None, 'loaded': False, 'event': threading.Event(), 'spares': []}
+ # Mark that we have a loaded instance available
+ MangaTranslator._inpaint_pool[key]['loaded'] = True
+ MangaTranslator._inpaint_pool[key]['inpainter'] = inp # Store reference
+ if MangaTranslator._inpaint_pool[key].get('event'):
+ MangaTranslator._inpaint_pool[key]['event'].set()
+ except Exception:
+ pass
+ else:
+ # Ensure future calls will attempt a fresh init instead of using a half-initialized instance
+ tl2.local_inpainters[key] = None
+ except Exception as e:
+ self._log(f"❌ Failed to create thread-local inpainter: {e}", "error")
+ try:
+ tl3 = getattr(self, '_thread_local', None)
+ if tl3 is None:
+ self._thread_local = threading.local()
+ tl3 = self._thread_local
+ if not hasattr(tl3, 'local_inpainters') or getattr(tl3, 'local_inpainters', None) is None:
+ tl3.local_inpainters = {}
+ tl3.local_inpainters[key] = None
+ except Exception:
+ pass
+ return getattr(self._thread_local, 'local_inpainters', {}).get(key)
+
+ def translate_regions(self, regions: List[TextRegion], image_path: str) -> List[TextRegion]:
+ """Translate all text regions with API delay"""
+ self._log(f"\n📝 Translating {len(regions)} text regions...")
+
+ # Check stop before even starting
+ if self._check_stop():
+ self._log(f"\n⏹️ Translation stopped before processing any regions", "warning")
+ return regions
+
+ # Check if parallel processing OR batch translation is enabled
+ parallel_enabled = self.manga_settings.get('advanced', {}).get('parallel_processing', False)
+ batch_enabled = getattr(self, 'batch_mode', False)
+ max_workers = self.manga_settings.get('advanced', {}).get('max_workers', 4)
+
+ # Batch translation (parallel API calls) should work independently of parallel processing
+ if batch_enabled:
+ max_workers = getattr(self, 'batch_size', max_workers)
+ self._log(f"📦 Using BATCH TRANSLATION with {max_workers} concurrent API calls")
+ return self._translate_regions_parallel(regions, image_path, max_workers)
+ elif parallel_enabled and len(regions) > 1:
+ self._log(f"🚀 Using PARALLEL processing with {max_workers} workers")
+ return self._translate_regions_parallel(regions, image_path, max_workers)
+ else:
+ # SEQUENTIAL CODE
+ for i, region in enumerate(regions):
+ if self._check_stop():
+ self._log(f"\n⏹️ Translation stopped by user after {i}/{len(regions)} regions", "warning")
+ break
+ if region.text.strip():
+ self._log(f"\n[{i+1}/{len(regions)}] Original: {region.text}")
+
+ # Get context for translation
+ context = self.translation_context[-5:] if self.contextual_enabled else None
+
+ # Translate with image context
+ translated = self.translate_text(
+ region.text,
+ context,
+ image_path=image_path,
+ region=region
+ )
+ region.translated_text = translated
+
+ self._log(f"Translated: {translated}")
+
+ # SAVE TO HISTORY HERE
+ if self.history_manager and self.contextual_enabled and translated:
+ try:
+ self.history_manager.append_to_history(
+ user_content=region.text,
+ assistant_content=translated,
+ hist_limit=self.translation_history_limit,
+ reset_on_limit=not self.rolling_history_enabled,
+ rolling_window=self.rolling_history_enabled
+ )
+ self._log(f"📚 Saved to history (exchange {i+1})")
+ except Exception as e:
+ self._log(f"⚠️ Failed to save history: {e}", "warning")
+
+ # Apply API delay
+ if i < len(regions) - 1: # Don't delay after last translation
+ self._log(f"⏳ Waiting {self.api_delay}s before next translation...")
+ # Check stop flag every 0.1 seconds during delay
+ for _ in range(int(self.api_delay * 10)):
+ if self._check_stop():
+ self._log(f"\n⏹️ Translation stopped during delay", "warning")
+ return regions
+ time.sleep(0.1)
+
+ return regions
+
+ # parallel processing:
+
+ def _wait_for_api_slot(self, min_interval=None, jitter_max=0.25):
+ """Global, thread-safe front-edge rate limiter for API calls.
+ Ensures parallel requests are spaced out before dispatch, avoiding tail latency.
+ """
+ import time
+ import random
+ import threading
+
+ if min_interval is None:
+ try:
+ min_interval = float(getattr(self, "api_delay", 0.0))
+ except Exception:
+ min_interval = 0.0
+ if min_interval < 0:
+ min_interval = 0.0
+
+ # Lazy init shared state
+ if not hasattr(self, "_api_rl_lock"):
+ self._api_rl_lock = threading.Lock()
+ self._api_next_allowed = 0.0 # monotonic seconds
+
+ while True:
+ now = time.monotonic()
+ with self._api_rl_lock:
+ # If we're allowed now, book the next slot and proceed
+ if now >= self._api_next_allowed:
+ jitter = random.uniform(0.0, max(jitter_max, 0.0)) if jitter_max else 0.0
+ self._api_next_allowed = now + min_interval + jitter
+ return
+
+ # Otherwise compute wait time (don’t hold the lock while sleeping)
+ wait = self._api_next_allowed - now
+
+ # Sleep outside the lock in short increments so stop flags can be honored
+ if wait > 0:
+ try:
+ if self._check_stop():
+ return
+ except Exception:
+ pass
+ time.sleep(min(wait, 0.05))
+
+ def _translate_regions_parallel(self, regions: List[TextRegion], image_path: str, max_workers: int = None) -> List[TextRegion]:
+ """Translate regions using parallel processing"""
+ # Get max_workers from settings if not provided
+ if max_workers is None:
+ max_workers = self.manga_settings.get('advanced', {}).get('max_workers', 4)
+
+ # Override with API batch size when batch mode is enabled — these are API calls.
+ try:
+ if getattr(self, 'batch_mode', False):
+ bs = int(getattr(self, 'batch_size', 0) or int(os.getenv('BATCH_SIZE', '0')))
+ if bs and bs > 0:
+ max_workers = bs
+ except Exception:
+ pass
+ # Bound to number of regions
+ max_workers = max(1, min(max_workers, len(regions)))
+
+ # Thread-safe storage for results
+ results_lock = threading.Lock()
+ translated_regions = {}
+ failed_indices = []
+
+ # Filter out empty regions
+ valid_regions = [(i, region) for i, region in enumerate(regions) if region.text.strip()]
+
+ if not valid_regions:
+ return regions
+
+ # Create a thread pool
+ with ThreadPoolExecutor(max_workers=max_workers) as executor:
+ # Submit all translation tasks
+ future_to_data = {}
+
+ for i, region in valid_regions:
+ # Check for stop signal before submitting
+ if self._check_stop():
+ self._log(f"\n⏹️ Translation stopped before submitting region {i+1}", "warning")
+ break
+
+ # Submit translation task
+ future = executor.submit(
+ self._translate_single_region_parallel,
+ region,
+ i,
+ len(valid_regions),
+ image_path
+ )
+ future_to_data[future] = (i, region)
+
+ # Process completed translations
+ completed = 0
+ for future in as_completed(future_to_data):
+ i, region = future_to_data[future]
+
+ # Check for stop signal
+ if self._check_stop():
+ self._log(f"\n⏹️ Translation stopped at {completed}/{len(valid_regions)} completed", "warning")
+ # Cancel remaining futures
+ for f in future_to_data:
+ f.cancel()
+ break
+
+ try:
+ translated_text = future.result()
+ if translated_text:
+ with results_lock:
+ translated_regions[i] = translated_text
+ completed += 1
+ self._log(f"✅ [{completed}/{len(valid_regions)}] Completed region {i+1}")
+ else:
+ with results_lock:
+ failed_indices.append(i)
+ self._log(f"❌ [{completed}/{len(valid_regions)}] Failed region {i+1}", "error")
+
+ except Exception as e:
+ with results_lock:
+ failed_indices.append(i)
+ self._log(f"❌ Error in region {i+1}: {str(e)}", "error")
+
+ # Apply translations back to regions
+ for i, region in enumerate(regions):
+ if i in translated_regions:
+ region.translated_text = translated_regions[i]
+
+ # Report summary
+ success_count = len(translated_regions)
+ fail_count = len(failed_indices)
+ self._log(f"\n📊 Parallel translation complete: {success_count} succeeded, {fail_count} failed")
+
+ return regions
+
+ def reset_for_new_image(self):
+ """Reset internal state for processing a new image"""
+ # ============================================================
+ # CRITICAL: COMPREHENSIVE CACHE CLEARING FOR NEW IMAGE
+ # This ensures NO text data leaks between images
+ # ============================================================
+
+ # Clear any cached detection results
+ if hasattr(self, 'last_detection_results'):
+ del self.last_detection_results
+
+ # FORCE clear OCR ROI cache (main text contamination source)
+ # THREAD-SAFE: Use lock for parallel panel translation
+ if hasattr(self, 'ocr_roi_cache'):
+ with self._cache_lock:
+ self.ocr_roi_cache.clear()
+ self._current_image_hash = None
+
+ # Clear OCR manager and ALL provider caches
+ if hasattr(self, 'ocr_manager') and self.ocr_manager:
+ if hasattr(self.ocr_manager, 'last_results'):
+ self.ocr_manager.last_results = None
+ if hasattr(self.ocr_manager, 'cache'):
+ self.ocr_manager.cache.clear()
+ # Clear ALL provider-level caches
+ if hasattr(self.ocr_manager, 'providers'):
+ for provider_name, provider in self.ocr_manager.providers.items():
+ if hasattr(provider, 'last_results'):
+ provider.last_results = None
+ if hasattr(provider, 'cache'):
+ provider.cache.clear()
+
+ # Clear bubble detector cache
+ if hasattr(self, 'bubble_detector') and self.bubble_detector:
+ if hasattr(self.bubble_detector, 'last_detections'):
+ self.bubble_detector.last_detections = None
+ if hasattr(self.bubble_detector, 'cache'):
+ self.bubble_detector.cache.clear()
+
+ # Don't clear translation context if using rolling history
+ if not self.rolling_history_enabled:
+ self.translation_context = []
+
+ # Clear any cached regions
+ if hasattr(self, '_cached_regions'):
+ del self._cached_regions
+
+ self._log("🔄 Reset translator state for new image (ALL text caches cleared)", "debug")
+
+ def _translate_single_region_parallel(self, region: TextRegion, index: int, total: int, image_path: str) -> Optional[str]:
+ """Translate a single region for parallel processing"""
+ try:
+ thread_name = threading.current_thread().name
+ self._log(f"\n[{thread_name}] [{index+1}/{total}] Original: {region.text}")
+
+ # Note: Context is not used in parallel mode to avoid race conditions
+ # Pass None for context to maintain compatibility with your translate_text method
+ # Front-edge rate limiting across threads
+ self._wait_for_api_slot()
+
+ translated = self.translate_text(
+ region.text,
+ None, # No context in parallel mode
+ image_path=image_path,
+ region=region
+ )
+
+ if translated:
+ self._log(f"[{thread_name}] Translated: {translated}")
+ return translated
+ else:
+ self._log(f"[{thread_name}] Translation failed", "error")
+ return None
+
+ except Exception as e:
+ self._log(f"[{thread_name}] Error: {str(e)}", "error")
+ return None
+
+
+ def _is_bubble_detector_loaded(self, ocr_settings: Dict[str, Any]) -> Tuple[bool, str]:
+ """Check if the configured bubble detector's model is already loaded.
+ Returns (loaded, detector_type). Safe: does not trigger a load.
+ """
+ try:
+ bd = self._get_thread_bubble_detector()
+ except Exception:
+ return False, ocr_settings.get('detector_type', 'rtdetr_onnx')
+ det = ocr_settings.get('detector_type', 'rtdetr_onnx')
+ try:
+ if det == 'rtdetr_onnx':
+ return bool(getattr(bd, 'rtdetr_onnx_loaded', False)), det
+ elif det == 'rtdetr':
+ return bool(getattr(bd, 'rtdetr_loaded', False)), det
+ elif det == 'yolo':
+ return bool(getattr(bd, 'model_loaded', False)), det
+ else:
+ # Auto or unknown – consider any ready model as loaded
+ ready = bool(getattr(bd, 'rtdetr_loaded', False) or getattr(bd, 'rtdetr_onnx_loaded', False) or getattr(bd, 'model_loaded', False))
+ return ready, det
+ except Exception:
+ return False, det
+
+ def _is_local_inpainter_loaded(self) -> Tuple[bool, Optional[str]]:
+ """Check if a local inpainter model is already loaded for current settings.
+ Returns (loaded, local_method) or (False, None).
+ This respects UI flags: skip_inpainting / use_cloud_inpainting.
+ """
+ try:
+ # If skipping or using cloud, this does not apply
+ if getattr(self, 'skip_inpainting', False) or getattr(self, 'use_cloud_inpainting', False):
+ return False, None
+ except Exception:
+ pass
+ inpaint_cfg = self.manga_settings.get('inpainting', {}) if hasattr(self, 'manga_settings') else {}
+ local_method = inpaint_cfg.get('local_method', 'anime')
+ try:
+ model_path = self.main_gui.config.get(f'manga_{local_method}_model_path', '') if hasattr(self, 'main_gui') else ''
+ except Exception:
+ model_path = ''
+ # Singleton path
+ if getattr(self, 'use_singleton_models', False):
+ inp = getattr(MangaTranslator, '_singleton_local_inpainter', None)
+ return (bool(getattr(inp, 'model_loaded', False)), local_method)
+ # Thread-local/pooled path
+ inp = getattr(self, 'local_inpainter', None)
+ if inp is not None and getattr(inp, 'model_loaded', False):
+ return True, local_method
+ try:
+ key = (local_method, model_path or '')
+ rec = MangaTranslator._inpaint_pool.get(key)
+ # Consider the shared 'inpainter' loaded or any spare that is model_loaded
+ if rec:
+ if rec.get('loaded') and rec.get('inpainter') is not None and getattr(rec['inpainter'], 'model_loaded', False):
+ return True, local_method
+ for spare in rec.get('spares') or []:
+ if getattr(spare, 'model_loaded', False):
+ return True, local_method
+ except Exception:
+ pass
+ return False, local_method
+
+ def _log_model_status(self):
+ """Emit concise status lines for already-loaded heavy models to avoid confusing 'loading' logs."""
+ try:
+ ocr_settings = self.manga_settings.get('ocr', {}) if hasattr(self, 'manga_settings') else {}
+ if ocr_settings.get('bubble_detection_enabled', False):
+ loaded, det = self._is_bubble_detector_loaded(ocr_settings)
+ det_name = 'YOLO' if det == 'yolo' else ('RT-DETR' if det == 'rtdetr' else 'RTEDR_onnx')
+ if loaded:
+ self._log("🤖 Using bubble detector (already loaded)", "info")
+ else:
+ self._log("🤖 Bubble detector will load on first use", "debug")
+ except Exception:
+ pass
+ try:
+ loaded, local_method = self._is_local_inpainter_loaded()
+ if local_method:
+ label = local_method.upper()
+ if loaded:
+ self._log("🎨 Using local inpainter (already loaded)", "info")
+ else:
+ self._log("🎨 Local inpainter will load on first use", "debug")
+ except Exception:
+ pass
+
+ def process_image(self, image_path: str, output_path: Optional[str] = None,
+ batch_index: int = None, batch_total: int = None) -> Dict[str, Any]:
+ """Process a single manga image through the full pipeline"""
+ # Ensure local references exist for cleanup in finally
+ image = None
+ inpainted = None
+ final_image = None
+ mask = None
+ mask_viz = None
+ pil_image = None
+ heatmap = None
+
+ # Set batch tracking if provided
+ if batch_index is not None and batch_total is not None:
+ self.batch_current = batch_index
+ self.batch_size = batch_total
+ self.batch_mode = True
+
+ # Simplified header for batch mode
+ if not self.batch_mode:
+ self._log(f"\n{'='*60}")
+ self._log(f"📷 STARTING MANGA TRANSLATION PIPELINE")
+ self._log(f"📁 Input: {image_path}")
+ self._log(f"📁 Output: {output_path or 'Auto-generated'}")
+ self._log(f"{'='*60}\n")
+ else:
+ self._log(f"\n[{batch_index}/{batch_total}] Processing: {os.path.basename(image_path)}")
+
+ # Before heavy work, report model status to avoid confusing 'loading' logs later
+ try:
+ self._log_model_status()
+ except Exception:
+ pass
+
+ result = {
+ 'success': False,
+ 'input_path': image_path,
+ 'output_path': output_path,
+ 'regions': [],
+ 'errors': [],
+ 'interrupted': False,
+ 'format_info': {}
+ }
+
+ try:
+ # RAM cap gating before heavy processing
+ try:
+ self._block_if_over_cap("processing image")
+ except Exception:
+ pass
+
+ # Determine the output directory from output_path
+ if output_path:
+ output_dir = os.path.dirname(output_path)
+ else:
+ # If no output path specified, use default
+ output_dir = os.path.join(os.path.dirname(image_path), "translated_images")
+
+ # Ensure output directory exists
+ os.makedirs(output_dir, exist_ok=True)
+
+ # Initialize HistoryManager with the output directory
+ if self.contextual_enabled and not self.history_manager_initialized:
+ # Only initialize if we're in a new output directory
+ if output_dir != getattr(self, 'history_output_dir', None):
+ try:
+ self.history_manager = HistoryManager(output_dir)
+ self.history_manager_initialized = True
+ self.history_output_dir = output_dir
+ self._log(f"📚 Initialized HistoryManager in output directory: {output_dir}")
+ except Exception as e:
+ self._log(f"⚠️ Failed to initialize history manager: {str(e)}", "warning")
+ self.history_manager = None
+
+ # Check for stop signal
+ if self._check_stop():
+ result['interrupted'] = True
+ self._log("⏹️ Translation stopped before processing", "warning")
+ return result
+
+ # Format detection if enabled
+ if self.manga_settings.get('advanced', {}).get('format_detection', False):
+ self._log("🔍 Analyzing image format...")
+ img = Image.open(image_path)
+ width, height = img.size
+ aspect_ratio = height / width
+
+ # Detect format type
+ format_info = {
+ 'width': width,
+ 'height': height,
+ 'aspect_ratio': aspect_ratio,
+ 'is_webtoon': aspect_ratio > 3.0,
+ 'is_spread': width > height * 1.3,
+ 'format': 'unknown'
+ }
+
+ if format_info['is_webtoon']:
+ format_info['format'] = 'webtoon'
+ self._log("📱 Detected WEBTOON format - vertical scroll manga")
+ elif format_info['is_spread']:
+ format_info['format'] = 'spread'
+ self._log("📖 Detected SPREAD format - two-page layout")
+ else:
+ format_info['format'] = 'single_page'
+ self._log("📄 Detected SINGLE PAGE format")
+
+ result['format_info'] = format_info
+
+ # Handle webtoon mode if detected and enabled
+ webtoon_mode = self.manga_settings.get('advanced', {}).get('webtoon_mode', 'auto')
+ if format_info['is_webtoon'] and webtoon_mode != 'disabled':
+ if webtoon_mode == 'auto' or webtoon_mode == 'force':
+ self._log("🔄 Webtoon mode active - will process in chunks for better OCR")
+ # Process webtoon in chunks
+ return self._process_webtoon_chunks(image_path, output_path, result)
+
+ # Step 1: Detect text regions using Google Cloud Vision
+ self._log(f"📍 [STEP 1] Text Detection Phase")
+ regions = self.detect_text_regions(image_path)
+
+ if not regions:
+ error_msg = "No text regions detected by Cloud Vision"
+ self._log(f"⚠️ {error_msg}", "warning")
+ result['errors'].append(error_msg)
+ # Still save the original image as "translated" if no text found
+ if output_path:
+ import shutil
+ shutil.copy2(image_path, output_path)
+ result['output_path'] = output_path
+ result['success'] = True
+ return result
+
+ self._log(f"\n✅ Detection complete: {len(regions)} regions found")
+
+ # Save debug outputs only if 'Save intermediate images' is enabled
+ if self.manga_settings.get('advanced', {}).get('save_intermediate', False):
+ self._save_debug_image(image_path, regions, debug_base_dir=output_dir)
+
+ # Step 2: Translation & Inpainting (concurrent)
+ self._log(f"\n📍 [STEP 2] Translation & Inpainting Phase (concurrent)")
+
+ # Load image once (used by inpainting task); keep PIL fallback for Unicode paths
+ import cv2
+ self._log(f"🖼️ Loading image with OpenCV...")
+ try:
+ image = cv2.imread(image_path)
+ if image is None:
+ self._log(f" Using PIL to handle Unicode path...", "info")
+ from PIL import Image as PILImage
+ import numpy as np
+ pil_image = PILImage.open(image_path)
+ image = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR)
+ self._log(f" ✅ Successfully loaded with PIL", "info")
+ except Exception as e:
+ error_msg = f"Failed to load image: {image_path} - {str(e)}"
+ self._log(f"❌ {error_msg}", "error")
+ result['errors'].append(error_msg)
+ return result
+
+ self._log(f" Image dimensions: {image.shape[1]}x{image.shape[0]}")
+
+ # Save intermediate original image if enabled
+ if self.manga_settings.get('advanced', {}).get('save_intermediate', False):
+ self._save_intermediate_image(image_path, image, "original", debug_base_dir=output_dir)
+
+ # Check if we should continue before kicking off tasks
+ if self._check_stop():
+ result['interrupted'] = True
+ self._log("⏹️ Translation stopped before concurrent phase", "warning")
+ return result
+
+ # Helper tasks
+ def _task_translate():
+ try:
+ if self.full_page_context_enabled:
+ # Full page context translation mode
+ self._log(f"\n📄 Using FULL PAGE CONTEXT mode")
+ self._log(" This mode sends all text together for more consistent translations", "info")
+ if self._check_stop():
+ return False
+ translations = self.translate_full_page_context(regions, image_path)
+ if translations:
+ translated_count = sum(1 for r in regions if getattr(r, 'translated_text', None) and r.translated_text and r.translated_text != r.text)
+ self._log(f"\n📊 Full page context translation complete: {translated_count}/{len(regions)} regions translated")
+ return True
+ else:
+ self._log("❌ Full page context translation failed", "error")
+ result['errors'].append("Full page context translation failed")
+ return False
+ else:
+ # Individual translation mode with parallel processing support
+ self._log(f"\n📝 Using INDIVIDUAL translation mode")
+ if self.manga_settings.get('advanced', {}).get('parallel_processing', False):
+ self._log("⚡ Parallel processing ENABLED")
+ _ = self._translate_regions_parallel(regions, image_path)
+ else:
+ _ = self.translate_regions(regions, image_path)
+ return True
+ except Exception as te:
+ self._log(f"❌ Translation task error: {te}", "error")
+ return False
+
+ def _task_inpaint():
+ try:
+ if getattr(self, 'skip_inpainting', False):
+ self._log(f"🎨 Skipping inpainting (preserving original art)", "info")
+ return image.copy()
+
+ self._log(f"🎭 Creating text mask...")
+ try:
+ self._block_if_over_cap("mask creation")
+ except Exception:
+ pass
+ mask_local = self.create_text_mask(image, regions)
+
+ # Save mask and overlay only if 'Save intermediate images' is enabled
+ if self.manga_settings.get('advanced', {}).get('save_intermediate', False):
+ try:
+ debug_dir = os.path.join(output_dir, 'debug')
+ os.makedirs(debug_dir, exist_ok=True)
+ base_name = os.path.splitext(os.path.basename(image_path))[0]
+ mask_path = os.path.join(debug_dir, f"{base_name}_mask.png")
+ cv2.imwrite(mask_path, mask_local)
+ mask_percentage = ((mask_local > 0).sum() / mask_local.size) * 100
+ self._log(f" 🎭 DEBUG: Saved mask to {mask_path}", "info")
+ self._log(f" 📊 Mask coverage: {mask_percentage:.1f}% of image", "info")
+
+ # Save mask overlay visualization
+ mask_viz_local = image.copy()
+ mask_viz_local[mask_local > 0] = [0, 0, 255]
+ viz_path = os.path.join(debug_dir, f"{base_name}_mask_overlay.png")
+ cv2.imwrite(viz_path, mask_viz_local)
+ self._log(f" 🎭 DEBUG: Saved mask overlay to {viz_path}", "info")
+ except Exception as e:
+ self._log(f" ❌ Failed to save mask debug: {str(e)}", "error")
+
+ # Also save intermediate copies
+ try:
+ self._save_intermediate_image(image_path, mask_local, "mask", debug_base_dir=output_dir)
+ except Exception:
+ pass
+
+ self._log(f"🎨 Inpainting to remove original text")
+ try:
+ self._block_if_over_cap("inpainting")
+ except Exception:
+ pass
+ inpainted_local = self.inpaint_regions(image, mask_local)
+
+ if self.manga_settings.get('advanced', {}).get('save_intermediate', False):
+ try:
+ self._save_intermediate_image(image_path, inpainted_local, "inpainted", debug_base_dir=output_dir)
+ except Exception:
+ pass
+ return inpainted_local
+ except Exception as ie:
+ self._log(f"❌ Inpainting task error: {ie}", "error")
+ return image.copy()
+
+ # Gate on advanced setting (default enabled)
+ adv = self.manga_settings.get('advanced', {})
+ run_concurrent = adv.get('concurrent_inpaint_translate', True)
+
+ if run_concurrent:
+ self._log("🔀 Running translation and inpainting concurrently", "info")
+ with ThreadPoolExecutor(max_workers=2) as _executor:
+ fut_translate = _executor.submit(_task_translate)
+ fut_inpaint = _executor.submit(_task_inpaint)
+ # Wait for completion
+ try:
+ translate_ok = fut_translate.result()
+ except Exception:
+ translate_ok = False
+ try:
+ inpainted = fut_inpaint.result()
+ except Exception:
+ inpainted = image.copy()
+ else:
+ self._log("↪️ Concurrent mode disabled — running sequentially", "info")
+ translate_ok = _task_translate()
+ inpainted = _task_inpaint()
+
+ # After concurrent phase, validate translation
+ if self._check_stop():
+ result['interrupted'] = True
+ self._log("⏹️ Translation cancelled before rendering", "warning")
+ result['regions'] = [r.to_dict() for r in regions]
+ return result
+
+ if not any(getattr(region, 'translated_text', None) for region in regions):
+ result['interrupted'] = True
+ self._log("⏹️ No regions were translated - translation was interrupted", "warning")
+ result['regions'] = [r.to_dict() for r in regions]
+ return result
+
+ # Render translated text
+ self._log(f"✍️ Rendering translated text...")
+ self._log(f" Using enhanced renderer with custom settings", "info")
+ final_image = self.render_translated_text(inpainted, regions)
+
+ # Save output
+ try:
+ if not output_path:
+ base, ext = os.path.splitext(image_path)
+ output_path = f"{base}_translated{ext}"
+
+ success = cv2.imwrite(output_path, final_image)
+
+ if not success:
+ self._log(f" Using PIL to save with Unicode path...", "info")
+ from PIL import Image as PILImage
+
+ rgb_image = cv2.cvtColor(final_image, cv2.COLOR_BGR2RGB)
+ pil_image = PILImage.fromarray(rgb_image)
+ pil_image.save(output_path)
+ self._log(f" ✅ Successfully saved with PIL", "info")
+
+ result['output_path'] = output_path
+ self._log(f"\n💾 Saved output to: {output_path}")
+
+ except Exception as e:
+ error_msg = f"Failed to save output image: {str(e)}"
+ self._log(f"❌ {error_msg}", "error")
+ result['errors'].append(error_msg)
+ result['success'] = False
+ return result
+
+ # Update result
+ result['regions'] = [r.to_dict() for r in regions]
+ if not result.get('interrupted', False):
+ result['success'] = True
+ self._log(f"\n✅ TRANSLATION PIPELINE COMPLETE", "success")
+ else:
+ self._log(f"\n⚠️ TRANSLATION INTERRUPTED - Partial output saved", "warning")
+
+ self._log(f"{'='*60}\n")
+
+ except Exception as e:
+ error_msg = f"Error processing image: {str(e)}\n{traceback.format_exc()}"
+ self._log(f"\n❌ PIPELINE ERROR:", "error")
+ self._log(f" {str(e)}", "error")
+ self._log(f" Type: {type(e).__name__}", "error")
+ self._log(traceback.format_exc(), "error")
+ result['errors'].append(error_msg)
+ finally:
+ # Per-image memory cleanup to reduce RAM growth across pages
+ try:
+ # Clear self-held large attributes
+ try:
+ self.current_image = None
+ self.current_mask = None
+ self.final_image = None
+ self.text_regions = []
+ self.translated_regions = []
+ except Exception:
+ pass
+
+ # Clear local large objects if present
+ locs = locals()
+ for name in [
+ 'image', 'inpainted', 'final_image', 'mask', 'mask_viz', 'pil_image', 'heatmap'
+ ]:
+ try:
+ if name in locs:
+ # Explicitly delete reference from locals
+ del locs[name]
+ except Exception:
+ pass
+
+ # Reset caches for the next image (non-destructive to loaded models)
+ try:
+ self.reset_for_new_image()
+ except Exception:
+ pass
+
+ # Encourage release of native resources
+ try:
+ import cv2 as _cv2
+ try:
+ _cv2.destroyAllWindows()
+ except Exception:
+ pass
+ except Exception:
+ pass
+
+ # Free CUDA memory if torch is available
+ try:
+ import torch
+ if torch.cuda.is_available():
+ torch.cuda.empty_cache()
+ except Exception:
+ pass
+
+ # Release thread-local heavy objects to curb RAM growth across runs
+ try:
+ self._cleanup_thread_locals()
+ except Exception:
+ pass
+
+ # Deep cleanup control - respects user settings and parallel processing
+ try:
+ # Check if auto cleanup is enabled in settings
+ auto_cleanup_enabled = False # Default disabled by default
+ try:
+ if hasattr(self, 'manga_settings'):
+ auto_cleanup_enabled = self.manga_settings.get('advanced', {}).get('auto_cleanup_models', False)
+ except Exception:
+ pass
+
+ if not auto_cleanup_enabled:
+ # User has disabled automatic cleanup
+ self._log("🔑 Auto cleanup disabled - models will remain in RAM", "debug")
+ else:
+ # Determine if we should cleanup now
+ should_cleanup_now = True
+
+ # Check if we're in batch mode
+ is_last_in_batch = False
+ try:
+ if getattr(self, 'batch_mode', False):
+ bc = getattr(self, 'batch_current', None)
+ bt = getattr(self, 'batch_size', None)
+ if bc is not None and bt is not None:
+ is_last_in_batch = (bc >= bt)
+ # In batch mode, only cleanup at the end
+ should_cleanup_now = is_last_in_batch
+ except Exception:
+ pass
+
+ # For parallel panel translation, cleanup is handled differently
+ # (it's handled in manga_integration.py after all panels complete)
+ is_parallel_panel = False
+ try:
+ if hasattr(self, 'manga_settings'):
+ is_parallel_panel = self.manga_settings.get('advanced', {}).get('parallel_panel_translation', False)
+ except Exception:
+ pass
+
+ if is_parallel_panel:
+ # Don't cleanup here - let manga_integration handle it after all panels
+ self._log("🎯 Deferring cleanup until all parallel panels complete", "debug")
+ should_cleanup_now = False
+
+ if should_cleanup_now:
+ # Perform the cleanup
+ self._deep_cleanup_models()
+
+ # Also clear HF cache for RT-DETR (best-effort)
+ if is_last_in_batch or not getattr(self, 'batch_mode', False):
+ try:
+ self._clear_hf_cache()
+ except Exception:
+ pass
+ except Exception:
+ pass
+
+ # Force a garbage collection cycle
+ try:
+ import gc
+ gc.collect()
+ except Exception:
+ pass
+
+ # Aggressively trim process working set (Windows) or libc heap (Linux)
+ try:
+ self._trim_working_set()
+ except Exception:
+ pass
+ except Exception:
+ # Never let cleanup fail the pipeline
+ pass
+
+ return result
+
+ def reset_history_manager(self):
+ """Reset history manager for new translation batch"""
+ self.history_manager = None
+ self.history_manager_initialized = False
+ self.history_output_dir = None
+ self.translation_context = []
+ self._log("📚 Reset history manager for new batch", "debug")
+
+ def cleanup_all_models(self):
+ """Public method to force cleanup of all models - call this after translation!
+ This ensures all models (YOLO, RT-DETR, inpainters, OCR) are unloaded from RAM.
+ """
+ self._log("🧹 Forcing cleanup of all models to free RAM...", "info")
+
+ # Call the comprehensive cleanup
+ self._deep_cleanup_models()
+
+ # Also cleanup thread locals
+ try:
+ self._cleanup_thread_locals()
+ except Exception:
+ pass
+
+ # Clear HF cache
+ try:
+ self._clear_hf_cache()
+ except Exception:
+ pass
+
+ # Trim working set
+ try:
+ self._trim_working_set()
+ except Exception:
+ pass
+
+ self._log("✅ All models cleaned up - RAM freed!", "info")
+
+ def clear_internal_state(self):
+ """Clear all internal state and cached data to free memory.
+ This is called when the translator instance is being reset.
+ Ensures OCR manager, inpainters, and bubble detector are also cleaned.
+ """
+ try:
+ # Clear image data
+ self.current_image = None
+ self.current_mask = None
+ self.final_image = None
+
+ # Clear text regions
+ if hasattr(self, 'text_regions'):
+ self.text_regions = []
+ if hasattr(self, 'translated_regions'):
+ self.translated_regions = []
+
+ # Clear ALL caches (including text caches)
+ # THREAD-SAFE: Use lock for parallel panel translation
+ if hasattr(self, 'cache'):
+ self.cache.clear()
+ if hasattr(self, 'ocr_roi_cache'):
+ with self._cache_lock:
+ self.ocr_roi_cache.clear()
+ self._current_image_hash = None
+
+ # Clear history and context
+ if hasattr(self, 'translation_context'):
+ self.translation_context = []
+ if hasattr(self, 'history_manager'):
+ self.history_manager = None
+ self.history_manager_initialized = False
+ self.history_output_dir = None
+
+ # IMPORTANT: Properly unload OCR manager
+ if hasattr(self, 'ocr_manager') and self.ocr_manager:
+ try:
+ ocr = self.ocr_manager
+ if hasattr(ocr, 'providers'):
+ for provider_name, provider in ocr.providers.items():
+ # Clear all model references
+ if hasattr(provider, 'model'):
+ provider.model = None
+ if hasattr(provider, 'processor'):
+ provider.processor = None
+ if hasattr(provider, 'tokenizer'):
+ provider.tokenizer = None
+ if hasattr(provider, 'reader'):
+ provider.reader = None
+ if hasattr(provider, 'client'):
+ provider.client = None
+ if hasattr(provider, 'is_loaded'):
+ provider.is_loaded = False
+ ocr.providers.clear()
+ self.ocr_manager = None
+ self._log(" ✓ OCR manager cleared", "debug")
+ except Exception as e:
+ self._log(f" Warning: OCR cleanup failed: {e}", "debug")
+
+ # IMPORTANT: Handle local inpainter cleanup carefully
+ # DO NOT unload if it's a shared/checked-out instance from the pool
+ if hasattr(self, 'local_inpainter') and self.local_inpainter:
+ try:
+ # Only unload if this is NOT a checked-out or shared instance
+ is_from_pool = hasattr(self, '_checked_out_inpainter') or hasattr(self, '_inpainter_pool_key')
+ if not is_from_pool and hasattr(self.local_inpainter, 'unload'):
+ self.local_inpainter.unload()
+ self._log(" ✓ Local inpainter unloaded", "debug")
+ else:
+ self._log(" ✓ Local inpainter reference cleared (pool instance preserved)", "debug")
+ self.local_inpainter = None
+ except Exception as e:
+ self._log(f" Warning: Inpainter cleanup failed: {e}", "debug")
+
+ # Also clear hybrid and generic inpainter references
+ if hasattr(self, 'hybrid_inpainter'):
+ if self.hybrid_inpainter and hasattr(self.hybrid_inpainter, 'unload'):
+ try:
+ self.hybrid_inpainter.unload()
+ except Exception:
+ pass
+ self.hybrid_inpainter = None
+
+ if hasattr(self, 'inpainter'):
+ if self.inpainter and hasattr(self.inpainter, 'unload'):
+ try:
+ self.inpainter.unload()
+ except Exception:
+ pass
+ self.inpainter = None
+
+ # IMPORTANT: Handle bubble detector cleanup carefully
+ # DO NOT unload if it's a singleton or from a preloaded pool
+ if hasattr(self, 'bubble_detector') and self.bubble_detector:
+ try:
+ is_singleton = getattr(self, 'use_singleton_bubble_detector', False)
+ # Check if it's from thread-local which might have gotten it from the pool
+ is_from_pool = hasattr(self, '_thread_local') and hasattr(self._thread_local, 'bubble_detector')
+
+ if not is_singleton and not is_from_pool:
+ if hasattr(self.bubble_detector, 'unload'):
+ self.bubble_detector.unload(release_shared=True)
+ self._log(" ✓ Bubble detector unloaded", "debug")
+ else:
+ self._log(" ✓ Bubble detector reference cleared (pool/singleton instance preserved)", "debug")
+ # In all cases, clear our instance reference
+ self.bubble_detector = None
+ except Exception as e:
+ self._log(f" Warning: Bubble detector cleanup failed: {e}", "debug")
+
+ # Clear any file handles or temp data
+ if hasattr(self, '_thread_local'):
+ try:
+ self._cleanup_thread_locals()
+ except Exception:
+ pass
+
+ # Clear processing flags
+ self.is_processing = False
+ self.cancel_requested = False
+
+ self._log("🧹 Internal state and all components cleared", "debug")
+
+ except Exception as e:
+ self._log(f"⚠️ Warning: Failed to clear internal state: {e}", "warning")
+
+ def _process_webtoon_chunks(self, image_path: str, output_path: str, result: Dict) -> Dict:
+ """Process webtoon in chunks for better OCR"""
+ import cv2
+ import numpy as np
+ from PIL import Image as PILImage
+
+ try:
+ self._log("📱 Processing webtoon in chunks for better OCR", "info")
+
+ # Load the image
+ image = cv2.imread(image_path)
+ if image is None:
+ pil_image = PILImage.open(image_path)
+ image = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR)
+
+ height, width = image.shape[:2]
+
+ # Get chunk settings from config
+ chunk_height = self.manga_settings.get('preprocessing', {}).get('chunk_height', 1000)
+ chunk_overlap = self.manga_settings.get('preprocessing', {}).get('chunk_overlap', 100)
+
+ self._log(f" Image dimensions: {width}x{height}", "info")
+ self._log(f" Chunk height: {chunk_height}px, Overlap: {chunk_overlap}px", "info")
+
+ # Calculate number of chunks needed
+ effective_chunk_height = chunk_height - chunk_overlap
+ num_chunks = max(1, (height - chunk_overlap) // effective_chunk_height + 1)
+
+ self._log(f" Will process in {num_chunks} chunks", "info")
+
+ # Process each chunk
+ all_regions = []
+ chunk_offsets = []
+
+ for i in range(num_chunks):
+ # Calculate chunk boundaries
+ start_y = i * effective_chunk_height
+ end_y = min(start_y + chunk_height, height)
+
+ # Make sure we don't miss the bottom part
+ if i == num_chunks - 1:
+ end_y = height
+
+ self._log(f"\n 📄 Processing chunk {i+1}/{num_chunks} (y: {start_y}-{end_y})", "info")
+
+ # Extract chunk
+ chunk = image[start_y:end_y, 0:width]
+
+ # Save chunk temporarily for OCR
+ import tempfile
+ with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp:
+ chunk_path = tmp.name
+ cv2.imwrite(chunk_path, chunk)
+
+ try:
+ # Detect text in this chunk
+ chunk_regions = self.detect_text_regions(chunk_path)
+
+ # Adjust region coordinates to full image space
+ for region in chunk_regions:
+ # Adjust bounding box
+ x, y, w, h = region.bounding_box
+ region.bounding_box = (x, y + start_y, w, h)
+
+ # Adjust vertices if present
+ if hasattr(region, 'vertices') and region.vertices:
+ adjusted_vertices = []
+ for vx, vy in region.vertices:
+ adjusted_vertices.append((vx, vy + start_y))
+ region.vertices = adjusted_vertices
+
+ # Mark which chunk this came from (for deduplication)
+ region.chunk_index = i
+ region.chunk_y_range = (start_y, end_y)
+
+ all_regions.extend(chunk_regions)
+ chunk_offsets.append(start_y)
+
+ self._log(f" Found {len(chunk_regions)} text regions in chunk {i+1}", "info")
+
+ finally:
+ # Clean up temp file
+ import os
+ if os.path.exists(chunk_path):
+ os.remove(chunk_path)
+
+ # Remove duplicate regions from overlapping areas
+ self._log(f"\n 🔍 Deduplicating regions from overlaps...", "info")
+ unique_regions = self._deduplicate_chunk_regions(all_regions, chunk_overlap)
+
+ self._log(f" Total regions: {len(all_regions)} → {len(unique_regions)} after deduplication", "info")
+
+ if not unique_regions:
+ self._log("⚠️ No text regions detected in webtoon", "warning")
+ result['errors'].append("No text regions detected")
+ return result
+
+ # Now process the regions as normal
+ self._log(f"\n📍 Translating {len(unique_regions)} unique regions", "info")
+
+ # Translate regions
+ if self.full_page_context_enabled:
+ translations = self.translate_full_page_context(unique_regions, image_path)
+ for region in unique_regions:
+ if region.text in translations:
+ region.translated_text = translations[region.text]
+ else:
+ unique_regions = self.translate_regions(unique_regions, image_path)
+
+ # Create mask and inpaint
+ self._log(f"\n🎨 Creating mask and inpainting...", "info")
+ mask = self.create_text_mask(image, unique_regions)
+
+ if self.skip_inpainting:
+ inpainted = image.copy()
+ else:
+ inpainted = self.inpaint_regions(image, mask)
+
+ # Render translated text
+ self._log(f"✍️ Rendering translated text...", "info")
+ final_image = self.render_translated_text(inpainted, unique_regions)
+
+ # Save output
+ if not output_path:
+ base, ext = os.path.splitext(image_path)
+ output_path = f"{base}_translated{ext}"
+
+ cv2.imwrite(output_path, final_image)
+
+ result['output_path'] = output_path
+ result['regions'] = [r.to_dict() for r in unique_regions]
+ result['success'] = True
+ result['format_info']['chunks_processed'] = num_chunks
+
+ self._log(f"\n✅ Webtoon processing complete: {output_path}", "success")
+
+ return result
+
+ except Exception as e:
+ error_msg = f"Error processing webtoon chunks: {str(e)}"
+ self._log(f"❌ {error_msg}", "error")
+ result['errors'].append(error_msg)
+ return result
+
+ def _deduplicate_chunk_regions(self, regions: List, overlap_height: int) -> List:
+ """Remove duplicate regions from overlapping chunk areas"""
+ if not regions:
+ return regions
+
+ # Sort regions by y position
+ regions.sort(key=lambda r: r.bounding_box[1])
+
+ unique_regions = []
+ used_indices = set()
+
+ for i, region1 in enumerate(regions):
+ if i in used_indices:
+ continue
+
+ # Check if this region is in an overlap zone
+ x1, y1, w1, h1 = region1.bounding_box
+ chunk_idx = region1.chunk_index if hasattr(region1, 'chunk_index') else 0
+ chunk_y_start, chunk_y_end = region1.chunk_y_range if hasattr(region1, 'chunk_y_range') else (0, float('inf'))
+
+ # Check if region is near chunk boundary (in overlap zone)
+ in_overlap_zone = (y1 < chunk_y_start + overlap_height) and chunk_idx > 0
+
+ if in_overlap_zone:
+ # Look for duplicate in previous chunk's regions
+ found_duplicate = False
+
+ for j, region2 in enumerate(regions):
+ if j >= i or j in used_indices:
+ continue
+
+ if hasattr(region2, 'chunk_index') and region2.chunk_index == chunk_idx - 1:
+ x2, y2, w2, h2 = region2.bounding_box
+
+ # Check if regions are the same (similar position and size)
+ if (abs(x1 - x2) < 20 and
+ abs(y1 - y2) < 20 and
+ abs(w1 - w2) < 20 and
+ abs(h1 - h2) < 20):
+
+ # Check text similarity
+ if region1.text == region2.text:
+ # This is a duplicate
+ found_duplicate = True
+ used_indices.add(i)
+ self._log(f" Removed duplicate: '{region1.text[:30]}...'", "debug")
+ break
+
+ if not found_duplicate:
+ unique_regions.append(region1)
+ used_indices.add(i)
+ else:
+ # Not in overlap zone, keep it
+ unique_regions.append(region1)
+ used_indices.add(i)
+
+ return unique_regions
+
+ def _save_intermediate_image(self, original_path: str, image, stage: str, debug_base_dir: str = None):
+ """Save intermediate processing stages under translated_images/debug or provided base dir"""
+ if debug_base_dir is None:
+ translated_dir = os.path.join(os.path.dirname(original_path), 'translated_images')
+ debug_dir = os.path.join(translated_dir, 'debug')
+ else:
+ debug_dir = os.path.join(debug_base_dir, 'debug')
+ os.makedirs(debug_dir, exist_ok=True)
+
+ base_name = os.path.splitext(os.path.basename(original_path))[0]
+ output_path = os.path.join(debug_dir, f"{base_name}_{stage}.png")
+
+ cv2.imwrite(output_path, image)
+ self._log(f" 💾 Saved {stage} image: {output_path}")
diff --git a/ocr_manager.py b/ocr_manager.py
new file mode 100644
index 0000000000000000000000000000000000000000..53ed6e28755a64a79d5388df7d90bd5c19388f7e
--- /dev/null
+++ b/ocr_manager.py
@@ -0,0 +1,1904 @@
+# ocr_manager.py
+"""
+OCR Manager for handling multiple OCR providers
+Handles installation, model downloading, and OCR processing
+Updated with HuggingFace donut model and proper bubble detection integration
+"""
+import os
+import sys
+import cv2
+import json
+import subprocess
+import threading
+import traceback
+from typing import List, Dict, Optional, Tuple, Any
+import numpy as np
+from dataclasses import dataclass
+from PIL import Image
+import logging
+import time
+import random
+import base64
+import io
+import requests
+
+try:
+ import gptqmodel
+ HAS_GPTQ = True
+except ImportError:
+ try:
+ import auto_gptq
+ HAS_GPTQ = True
+ except ImportError:
+ HAS_GPTQ = False
+
+try:
+ import optimum
+ HAS_OPTIMUM = True
+except ImportError:
+ HAS_OPTIMUM = False
+
+try:
+ import accelerate
+ HAS_ACCELERATE = True
+except ImportError:
+ HAS_ACCELERATE = False
+
+logger = logging.getLogger(__name__)
+
+@dataclass
+class OCRResult:
+ """Unified OCR result format with built-in sanitization to prevent data corruption."""
+ text: str
+ bbox: Tuple[int, int, int, int] # x, y, w, h
+ confidence: float
+ vertices: Optional[List[Tuple[int, int]]] = None
+
+ def __post_init__(self):
+ """
+ This special method is called automatically after the object is created.
+ It acts as a final safeguard to ensure the 'text' attribute is ALWAYS a clean string.
+ """
+ # --- THIS IS THE DEFINITIVE FIX ---
+ # If the text we received is a tuple, we extract the first element.
+ # This makes it impossible for a tuple to exist in a finished object.
+ if isinstance(self.text, tuple):
+ # Log that we are fixing a critical data error.
+ print(f"CRITICAL WARNING: Corrupted tuple detected in OCRResult. Sanitizing '{self.text}' to '{self.text[0]}'.")
+ self.text = self.text[0]
+
+ # Ensure the final result is always a stripped string.
+ self.text = str(self.text).strip()
+
+class OCRProvider:
+ """Base class for OCR providers"""
+
+ def __init__(self, log_callback=None):
+ # Set thread limits early if environment indicates single-threaded mode
+ try:
+ if os.environ.get('OMP_NUM_THREADS') == '1':
+ # Already in single-threaded mode, ensure it's applied to this process
+ try:
+ import sys
+ if 'torch' in sys.modules:
+ import torch
+ torch.set_num_threads(1)
+ except (ImportError, RuntimeError, AttributeError):
+ pass
+ try:
+ import cv2
+ cv2.setNumThreads(1)
+ except (ImportError, AttributeError):
+ pass
+ except Exception:
+ pass
+
+ self.log_callback = log_callback
+ self.is_installed = False
+ self.is_loaded = False
+ self.model = None
+ self.stop_flag = None
+ self._stopped = False
+
+ def _log(self, message: str, level: str = "info"):
+ """Log message with stop suppression"""
+ # Suppress logs when stopped (allow only essential stop confirmation messages)
+ if self._check_stop():
+ essential_stop_keywords = [
+ "⏹️ Translation stopped by user",
+ "⏹️ OCR processing stopped",
+ "cleanup", "🧹"
+ ]
+ if not any(keyword in message for keyword in essential_stop_keywords):
+ return
+
+ if self.log_callback:
+ self.log_callback(message, level)
+ else:
+ print(f"[{level.upper()}] {message}")
+
+ def set_stop_flag(self, stop_flag):
+ """Set the stop flag for checking interruptions"""
+ self.stop_flag = stop_flag
+ self._stopped = False
+
+ def _check_stop(self) -> bool:
+ """Check if stop has been requested"""
+ if self._stopped:
+ return True
+ if self.stop_flag and self.stop_flag.is_set():
+ self._stopped = True
+ return True
+ # Check global manga translator cancellation
+ try:
+ from manga_translator import MangaTranslator
+ if MangaTranslator.is_globally_cancelled():
+ self._stopped = True
+ return True
+ except Exception:
+ pass
+ return False
+
+ def reset_stop_flags(self):
+ """Reset stop flags when starting new processing"""
+ self._stopped = False
+
+ def check_installation(self) -> bool:
+ """Check if provider is installed"""
+ raise NotImplementedError
+
+ def install(self, progress_callback=None) -> bool:
+ """Install the provider"""
+ raise NotImplementedError
+
+ def load_model(self, **kwargs) -> bool:
+ """Load the OCR model"""
+ raise NotImplementedError
+
+ def detect_text(self, image: np.ndarray, **kwargs) -> List[OCRResult]:
+ """Detect text in image"""
+ raise NotImplementedError
+
+class CustomAPIProvider(OCRProvider):
+ """Custom API OCR provider that uses existing GUI variables"""
+
+ def __init__(self, log_callback=None):
+ super().__init__(log_callback)
+
+ # Use EXISTING environment variables from TranslatorGUI
+ self.api_url = os.environ.get('OPENAI_CUSTOM_BASE_URL', '')
+ self.api_key = os.environ.get('API_KEY', '') or os.environ.get('OPENAI_API_KEY', '')
+ self.model_name = os.environ.get('MODEL', 'gpt-4o-mini')
+
+ # OCR prompt - use system prompt or a dedicated OCR prompt variable
+ self.ocr_prompt = os.environ.get('OCR_SYSTEM_PROMPT',
+ os.environ.get('SYSTEM_PROMPT',
+ "YOU ARE AN OCR SYSTEM. YOUR ONLY JOB IS TEXT EXTRACTION.\n\n"
+ "CRITICAL RULES:\n"
+ "1. DO NOT TRANSLATE ANYTHING\n"
+ "2. DO NOT MODIFY THE TEXT\n"
+ "3. DO NOT EXPLAIN OR COMMENT\n"
+ "4. ONLY OUTPUT THE EXACT TEXT YOU SEE\n"
+ "5. PRESERVE NATURAL TEXT FLOW - DO NOT ADD UNNECESSARY LINE BREAKS\n\n"
+ "If you see Korean text, output it in Korean.\n"
+ "If you see Japanese text, output it in Japanese.\n"
+ "If you see Chinese text, output it in Chinese.\n"
+ "If you see English text, output it in English.\n\n"
+ "IMPORTANT: Only use line breaks where they naturally occur in the original text "
+ "(e.g., between dialogue lines or paragraphs). Do not break text mid-sentence or "
+ "between every word/character.\n\n"
+ "For vertical text common in manga/comics, transcribe it as a continuous line unless "
+ "there are clear visual breaks.\n\n"
+ "NEVER translate. ONLY extract exactly what is written.\n"
+ "Output ONLY the raw text, nothing else."
+ ))
+
+ # Use existing temperature and token settings
+ self.temperature = float(os.environ.get('TRANSLATION_TEMPERATURE', '0.01'))
+ # Don't hardcode to 8192 - get fresh value when actually used
+ self.max_tokens = int(os.environ.get('MAX_OUTPUT_TOKENS', '4096'))
+
+ # Image settings from existing compression variables
+ self.image_format = 'jpeg' if os.environ.get('IMAGE_COMPRESSION_FORMAT', 'auto') != 'png' else 'png'
+ self.image_quality = int(os.environ.get('JPEG_QUALITY', '100'))
+
+ # Simple defaults
+ self.api_format = 'openai' # Most custom endpoints are OpenAI-compatible
+ self.timeout = int(os.environ.get('CHUNK_TIMEOUT', '30'))
+ self.api_headers = {} # Additional custom headers
+
+ # Retry configuration for Custom API OCR calls
+ self.max_retries = int(os.environ.get('CUSTOM_OCR_MAX_RETRIES', '3'))
+ self.retry_initial_delay = float(os.environ.get('CUSTOM_OCR_RETRY_INITIAL_DELAY', '0.8'))
+ self.retry_backoff = float(os.environ.get('CUSTOM_OCR_RETRY_BACKOFF', '1.8'))
+ self.retry_jitter = float(os.environ.get('CUSTOM_OCR_RETRY_JITTER', '0.4'))
+ self.retry_on_empty = os.environ.get('CUSTOM_OCR_RETRY_ON_EMPTY', '1') == '1'
+
+ def check_installation(self) -> bool:
+ """Always installed - uses UnifiedClient"""
+ self.is_installed = True
+ return True
+
+ def install(self, progress_callback=None) -> bool:
+ """No installation needed for API-based provider"""
+ return self.check_installation()
+
+ def load_model(self, **kwargs) -> bool:
+ """Initialize UnifiedClient with current settings"""
+ try:
+ from unified_api_client import UnifiedClient
+
+ # Support passing API key from GUI if available
+ if 'api_key' in kwargs:
+ api_key = kwargs['api_key']
+ else:
+ api_key = os.environ.get('API_KEY', '') or os.environ.get('OPENAI_API_KEY', '')
+
+ if 'model' in kwargs:
+ model = kwargs['model']
+ else:
+ model = os.environ.get('MODEL', 'gpt-4o-mini')
+
+ if not api_key:
+ self._log("❌ No API key configured", "error")
+ return False
+
+ # Create UnifiedClient just like translations do
+ self.client = UnifiedClient(model=model, api_key=api_key)
+
+ #self._log(f"✅ Using {model} for OCR via UnifiedClient")
+ self.is_loaded = True
+ return True
+
+ except Exception as e:
+ self._log(f"❌ Failed to initialize UnifiedClient: {str(e)}", "error")
+ return False
+
+ def _test_connection(self) -> bool:
+ """Test API connection with a simple request"""
+ try:
+ # Create a small test image
+ test_image = np.ones((100, 100, 3), dtype=np.uint8) * 255
+ cv2.putText(test_image, "TEST", (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 2)
+
+ # Encode image
+ image_base64 = self._encode_image(test_image)
+
+ # Prepare test request based on API format
+ if self.api_format == 'openai':
+ test_payload = {
+ "model": self.model_name,
+ "messages": [
+ {
+ "role": "user",
+ "content": [
+ {"type": "text", "text": "What text do you see?"},
+ {"type": "image_url", "image_url": {"url": f"data:image/{self.image_format};base64,{image_base64}"}}
+ ]
+ }
+ ],
+ "max_tokens": 50
+ }
+ else:
+ # For other formats, just try a basic health check
+ return True
+
+ headers = self._prepare_headers()
+ response = requests.post(
+ self.api_url,
+ headers=headers,
+ json=test_payload,
+ timeout=10
+ )
+
+ return response.status_code == 200
+
+ except Exception:
+ return False
+
+ def _encode_image(self, image: np.ndarray) -> str:
+ """Encode numpy array to base64 string"""
+ # Convert BGR to RGB if needed
+ if len(image.shape) == 3 and image.shape[2] == 3:
+ image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+ else:
+ image_rgb = image
+
+ # Convert to PIL Image
+ pil_image = Image.fromarray(image_rgb)
+
+ # Save to bytes buffer
+ buffer = io.BytesIO()
+ if self.image_format.lower() == 'png':
+ pil_image.save(buffer, format='PNG')
+ else:
+ pil_image.save(buffer, format='JPEG', quality=self.image_quality)
+
+ # Encode to base64
+ buffer.seek(0)
+ image_base64 = base64.b64encode(buffer.read()).decode('utf-8')
+
+ return image_base64
+
+ def _prepare_headers(self) -> dict:
+ """Prepare request headers"""
+ headers = {
+ "Content-Type": "application/json"
+ }
+
+ # Add API key if configured
+ if self.api_key:
+ if self.api_format == 'anthropic':
+ headers["x-api-key"] = self.api_key
+ else:
+ headers["Authorization"] = f"Bearer {self.api_key}"
+
+ # Add any custom headers
+ headers.update(self.api_headers)
+
+ return headers
+
+ def _prepare_request_payload(self, image_base64: str) -> dict:
+ """Prepare request payload based on API format"""
+ if self.api_format == 'openai':
+ return {
+ "model": self.model_name,
+ "messages": [
+ {
+ "role": "user",
+ "content": [
+ {"type": "text", "text": self.ocr_prompt},
+ {
+ "type": "image_url",
+ "image_url": {
+ "url": f"data:image/{self.image_format};base64,{image_base64}"
+ }
+ }
+ ]
+ }
+ ],
+ "max_tokens": self.max_tokens,
+ "temperature": self.temperature
+ }
+
+ elif self.api_format == 'anthropic':
+ return {
+ "model": self.model_name,
+ "max_tokens": self.max_tokens,
+ "temperature": self.temperature,
+ "messages": [
+ {
+ "role": "user",
+ "content": [
+ {
+ "type": "text",
+ "text": self.ocr_prompt
+ },
+ {
+ "type": "image",
+ "source": {
+ "type": "base64",
+ "media_type": f"image/{self.image_format}",
+ "data": image_base64
+ }
+ }
+ ]
+ }
+ ]
+ }
+
+ else:
+ # Custom format - use environment variable for template
+ template = os.environ.get('CUSTOM_OCR_REQUEST_TEMPLATE', '{}')
+ payload = json.loads(template)
+
+ # Replace placeholders
+ payload_str = json.dumps(payload)
+ payload_str = payload_str.replace('{{IMAGE_BASE64}}', image_base64)
+ payload_str = payload_str.replace('{{PROMPT}}', self.ocr_prompt)
+ payload_str = payload_str.replace('{{MODEL}}', self.model_name)
+ payload_str = payload_str.replace('{{MAX_TOKENS}}', str(self.max_tokens))
+ payload_str = payload_str.replace('{{TEMPERATURE}}', str(self.temperature))
+
+ return json.loads(payload_str)
+
+ def _extract_text_from_response(self, response_data: dict) -> str:
+ """Extract text from API response based on format"""
+ try:
+ if self.api_format == 'openai':
+ # OpenAI format: response.choices[0].message.content
+ return response_data.get('choices', [{}])[0].get('message', {}).get('content', '')
+
+ elif self.api_format == 'anthropic':
+ # Anthropic format: response.content[0].text
+ content = response_data.get('content', [])
+ if content and isinstance(content, list):
+ return content[0].get('text', '')
+ return ''
+
+ else:
+ # Custom format - use environment variable for path
+ response_path = os.environ.get('CUSTOM_OCR_RESPONSE_PATH', 'text')
+
+ # Navigate through the response using the path
+ result = response_data
+ for key in response_path.split('.'):
+ if isinstance(result, dict):
+ result = result.get(key, '')
+ elif isinstance(result, list) and key.isdigit():
+ idx = int(key)
+ result = result[idx] if idx < len(result) else ''
+ else:
+ result = ''
+ break
+
+ return str(result)
+
+ except Exception as e:
+ self._log(f"Failed to extract text from response: {e}", "error")
+ return ''
+
+ def detect_text(self, image: np.ndarray, **kwargs) -> List[OCRResult]:
+ """Process image using UnifiedClient.send_image()"""
+ results = []
+
+ try:
+ # Get fresh max_tokens from environment - GUI will have set this
+ max_tokens = int(os.environ.get('MAX_OUTPUT_TOKENS', '4096'))
+ if not self.is_loaded:
+ if not self.load_model():
+ return results
+
+ import cv2
+ from PIL import Image
+ import base64
+ import io
+
+ # Convert numpy array to PIL Image
+ image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+ pil_image = Image.fromarray(image_rgb)
+ h, w = image.shape[:2]
+
+ # Convert PIL Image to base64 string
+ buffer = io.BytesIO()
+
+ # Use the image format from settings
+ if self.image_format.lower() == 'png':
+ pil_image.save(buffer, format='PNG')
+ else:
+ pil_image.save(buffer, format='JPEG', quality=self.image_quality)
+
+ buffer.seek(0)
+ image_base64 = base64.b64encode(buffer.read()).decode('utf-8')
+
+ # For OpenAI vision models, we need BOTH:
+ # 1. System prompt with instructions
+ # 2. User message that includes the image
+ messages = [
+ {
+ "role": "system",
+ "content": self.ocr_prompt # The OCR instruction as system prompt
+ },
+ {
+ "role": "user",
+ "content": [
+ {
+ "type": "text",
+ "text": "Image:" # Minimal text, just to have something
+ },
+ {
+ "type": "image_url",
+ "image_url": {
+ "url": f"data:image/jpeg;base64,{image_base64}"
+ }
+ }
+ ]
+ }
+ ]
+
+ # Now send this properly formatted message
+ # The UnifiedClient should handle this correctly
+ # But we're NOT using send_image, we're using regular send
+
+ # Retry-aware call
+ from unified_api_client import UnifiedClientError # local import to avoid hard dependency at module import time
+ max_attempts = max(1, self.max_retries)
+ attempt = 0
+ last_error = None
+
+ # Common refusal/error phrases that indicate a non-OCR response
+ refusal_phrases = [
+ "I can't extract", "I cannot extract",
+ "I'm sorry", "I am sorry",
+ "I'm unable", "I am unable",
+ "cannot process images",
+ "I can't help with that",
+ "cannot view images",
+ "no text in the image"
+ ]
+
+ while attempt < max_attempts:
+ # Check for stop before each attempt
+ if self._check_stop():
+ self._log("⏹️ OCR processing stopped by user", "warning")
+ return results
+
+ try:
+ response = self.client.send(
+ messages=messages,
+ temperature=self.temperature,
+ max_tokens=max_tokens
+ )
+
+ # Extract content from response object
+ content, finish_reason = response
+
+ # Validate content
+ has_content = bool(content and str(content).strip())
+ refused = False
+ if has_content:
+ # Filter out explicit failure markers
+ if "[" in content and "FAILED]" in content:
+ refused = True
+ elif any(phrase.lower() in content.lower() for phrase in refusal_phrases):
+ refused = True
+
+ # Decide success or retry
+ if has_content and not refused:
+ text = str(content).strip()
+ results.append(OCRResult(
+ text=text,
+ bbox=(0, 0, w, h),
+ confidence=kwargs.get('confidence', 0.85),
+ vertices=[(0, 0), (w, 0), (w, h), (0, h)]
+ ))
+ self._log(f"✅ Detected: {text[:50]}...")
+ break # success
+ else:
+ reason = "empty result" if not has_content else "refusal/non-OCR response"
+ last_error = f"{reason} (finish_reason: {finish_reason})"
+ # Check if we should retry on empty or refusal
+ should_retry = (not has_content and self.retry_on_empty) or refused
+ attempt += 1
+ if attempt >= max_attempts or not should_retry:
+ # No more retries or shouldn't retry
+ if not has_content:
+ self._log(f"⚠️ No text detected (finish_reason: {finish_reason})")
+ else:
+ self._log(f"❌ Model returned non-OCR response: {str(content)[:120]}", "warning")
+ break
+ # Backoff before retrying
+ delay = self.retry_initial_delay * (self.retry_backoff ** (attempt - 1)) + random.uniform(0, self.retry_jitter)
+ self._log(f"🔄 Retry {attempt}/{max_attempts - 1} after {delay:.1f}s due to {reason}...", "warning")
+ time.sleep(delay)
+ time.sleep(0.1) # Brief pause for stability
+ self._log("💤 OCR retry pausing briefly for stability", "debug")
+ continue
+
+ except UnifiedClientError as ue:
+ msg = str(ue)
+ last_error = msg
+ # Do not retry on explicit user cancellation
+ if 'cancelled' in msg.lower() or 'stopped by user' in msg.lower():
+ self._log(f"❌ OCR cancelled: {msg}", "error")
+ break
+ attempt += 1
+ if attempt >= max_attempts:
+ self._log(f"❌ OCR failed after {attempt} attempts: {msg}", "error")
+ break
+ delay = self.retry_initial_delay * (self.retry_backoff ** (attempt - 1)) + random.uniform(0, self.retry_jitter)
+ self._log(f"🔄 API error, retry {attempt}/{max_attempts - 1} after {delay:.1f}s: {msg}", "warning")
+ time.sleep(delay)
+ time.sleep(0.1) # Brief pause for stability
+ self._log("💤 OCR API error retry pausing briefly for stability", "debug")
+ continue
+ except Exception as e_inner:
+ last_error = str(e_inner)
+ attempt += 1
+ if attempt >= max_attempts:
+ self._log(f"❌ OCR exception after {attempt} attempts: {last_error}", "error")
+ break
+ delay = self.retry_initial_delay * (self.retry_backoff ** (attempt - 1)) + random.uniform(0, self.retry_jitter)
+ self._log(f"🔄 Exception, retry {attempt}/{max_attempts - 1} after {delay:.1f}s: {last_error}", "warning")
+ time.sleep(delay)
+ time.sleep(0.1) # Brief pause for stability
+ self._log("💤 OCR exception retry pausing briefly for stability", "debug")
+ continue
+
+ except Exception as e:
+ self._log(f"❌ Error: {str(e)}", "error")
+ import traceback
+ self._log(traceback.format_exc(), "debug")
+
+ return results
+
+class MangaOCRProvider(OCRProvider):
+ """Manga OCR provider using HuggingFace model directly"""
+
+ def __init__(self, log_callback=None):
+ super().__init__(log_callback)
+ self.processor = None
+ self.model = None
+ self.tokenizer = None
+
+ def check_installation(self) -> bool:
+ """Check if transformers is installed"""
+ try:
+ import transformers
+ import torch
+ self.is_installed = True
+ return True
+ except ImportError:
+ return False
+
+ def install(self, progress_callback=None) -> bool:
+ """Install transformers and torch"""
+ pass
+
+ def _is_valid_local_model_dir(self, path: str) -> bool:
+ """Check that a local HF model directory has required files."""
+ try:
+ if not path or not os.path.isdir(path):
+ return False
+ needed_any_weights = any(
+ os.path.exists(os.path.join(path, name)) for name in (
+ 'pytorch_model.bin',
+ 'model.safetensors'
+ )
+ )
+ has_config = os.path.exists(os.path.join(path, 'config.json'))
+ has_processor = (
+ os.path.exists(os.path.join(path, 'preprocessor_config.json')) or
+ os.path.exists(os.path.join(path, 'processor_config.json'))
+ )
+ has_tokenizer = (
+ os.path.exists(os.path.join(path, 'tokenizer.json')) or
+ os.path.exists(os.path.join(path, 'tokenizer_config.json'))
+ )
+ return has_config and needed_any_weights and has_processor and has_tokenizer
+ except Exception:
+ return False
+
+ def load_model(self, **kwargs) -> bool:
+ """Load the manga-ocr model, preferring a local directory to avoid re-downloading"""
+ print("\n>>> MangaOCRProvider.load_model() called")
+ try:
+ if not self.is_installed and not self.check_installation():
+ print("ERROR: Transformers not installed")
+ self._log("❌ Transformers not installed", "error")
+ return False
+
+ # Always disable progress bars to avoid tqdm issues in some environments
+ import os
+ os.environ.setdefault("HF_HUB_DISABLE_PROGRESS_BARS", "1")
+
+ from transformers import VisionEncoderDecoderModel, AutoTokenizer, AutoImageProcessor
+ import torch
+
+ # Prefer a local model directory if present to avoid any Hub access
+ candidates = []
+ env_local = os.environ.get("MANGA_OCR_LOCAL_DIR")
+ if env_local:
+ candidates.append(env_local)
+
+ # Project root one level up from this file
+ root_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
+ candidates.append(os.path.join(root_dir, 'models', 'manga-ocr-base'))
+ candidates.append(os.path.join(root_dir, 'models', 'kha-white', 'manga-ocr-base'))
+
+ model_source = None
+ local_only = False
+ # Find a valid local dir
+ for cand in candidates:
+ if self._is_valid_local_model_dir(cand):
+ model_source = cand
+ local_only = True
+ break
+
+ # If no valid local dir, use Hub
+ if not model_source:
+ model_source = "kha-white/manga-ocr-base"
+ # Make sure we are not forcing offline mode
+ if os.environ.get("HF_HUB_OFFLINE") == "1":
+ try:
+ del os.environ["HF_HUB_OFFLINE"]
+ except Exception:
+ pass
+ self._log("🔥 Loading manga-ocr model from Hugging Face Hub")
+ self._log(f" Repo: {model_source}")
+ else:
+ # Only set offline when local dir is fully valid
+ os.environ.setdefault("HF_HUB_OFFLINE", "1")
+ self._log("🔥 Loading manga-ocr model from local directory")
+ self._log(f" Local path: {model_source}")
+
+ # Decide target device once; we will move after full CPU load to avoid meta tensors
+ use_cuda = torch.cuda.is_available()
+
+ # Try loading components, falling back to Hub if local-only fails
+ def _load_components(source: str, local_flag: bool):
+ self._log(" Loading tokenizer...")
+ tok = AutoTokenizer.from_pretrained(source, local_files_only=local_flag)
+
+ self._log(" Loading image processor...")
+ try:
+ from transformers import AutoProcessor
+ except Exception:
+ AutoProcessor = None
+ try:
+ proc = AutoImageProcessor.from_pretrained(source, local_files_only=local_flag)
+ except Exception as e_proc:
+ if AutoProcessor is not None:
+ self._log(f" ⚠️ AutoImageProcessor failed: {e_proc}. Trying AutoProcessor...", "warning")
+ proc = AutoProcessor.from_pretrained(source, local_files_only=local_flag)
+ else:
+ raise
+
+ self._log(" Loading model...")
+ # Prevent meta tensors by forcing full materialization on CPU at load time
+ os.environ.setdefault('TORCHDYNAMO_DISABLE', '1')
+ mdl = VisionEncoderDecoderModel.from_pretrained(
+ source,
+ local_files_only=local_flag,
+ low_cpu_mem_usage=False,
+ device_map=None,
+ torch_dtype=torch.float32 # Use torch_dtype instead of dtype
+ )
+ return tok, proc, mdl
+
+ try:
+ self.tokenizer, self.processor, self.model = _load_components(model_source, local_only)
+ except Exception as e_local:
+ if local_only:
+ # Fallback to Hub once if local fails
+ self._log(f" ⚠️ Local model load failed: {e_local}", "warning")
+ try:
+ if os.environ.get("HF_HUB_OFFLINE") == "1":
+ del os.environ["HF_HUB_OFFLINE"]
+ except Exception:
+ pass
+ model_source = "kha-white/manga-ocr-base"
+ local_only = False
+ self._log(" Retrying from Hugging Face Hub...")
+ self.tokenizer, self.processor, self.model = _load_components(model_source, local_only)
+ else:
+ raise
+
+ # Move to CUDA only after full CPU materialization
+ target_device = 'cpu'
+ if use_cuda:
+ try:
+ self.model = self.model.to('cuda')
+ target_device = 'cuda'
+ except Exception as move_err:
+ self._log(f" ⚠️ Could not move model to CUDA: {move_err}", "warning")
+ target_device = 'cpu'
+
+ # Finalize eval mode
+ self.model.eval()
+
+ # Sanity-check: ensure no parameter remains on 'meta' device
+ try:
+ for n, p in self.model.named_parameters():
+ dev = getattr(p, 'device', None)
+ if dev is not None and getattr(dev, 'type', '') == 'meta':
+ raise RuntimeError(f"Parameter {n} is on 'meta' after load")
+ except Exception as sanity_err:
+ self._log(f"❌ Manga-OCR model load sanity check failed: {sanity_err}", "error")
+ return False
+
+ print(f"SUCCESS: Model loaded on {target_device.upper()}")
+ self._log(f" ✅ Model loaded on {target_device.upper()}")
+ self.is_loaded = True
+ self._log("✅ Manga OCR model ready")
+ print(">>> Returning True from load_model()")
+ return True
+
+ except Exception as e:
+ print(f"\nEXCEPTION in load_model: {e}")
+ import traceback
+ print(traceback.format_exc())
+ self._log(f"❌ Failed to load manga-ocr model: {str(e)}", "error")
+ self._log(traceback.format_exc(), "error")
+ try:
+ if 'local_only' in locals() and local_only:
+ self._log("Hint: Local load failed. Ensure your models/manga-ocr-base contains required files (config.json, preprocessor_config.json, tokenizer.json or tokenizer_config.json, and model weights).", "warning")
+ except Exception:
+ pass
+ return False
+
+ def _run_ocr(self, pil_image):
+ """Run OCR on a PIL image using the HuggingFace model"""
+ import torch
+
+ # Process image (keyword arg for broader compatibility across transformers versions)
+ inputs = self.processor(images=pil_image, return_tensors="pt")
+ pixel_values = inputs["pixel_values"]
+
+ # Move to same device as model
+ try:
+ model_device = next(self.model.parameters()).device
+ except StopIteration:
+ model_device = torch.device('cpu')
+ pixel_values = pixel_values.to(model_device)
+
+ # Generate text
+ with torch.no_grad():
+ generated_ids = self.model.generate(pixel_values)
+
+ # Decode
+ generated_text = self.tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+
+ return generated_text
+
+ def detect_text(self, image: np.ndarray, **kwargs) -> List[OCRResult]:
+ """
+ Process the image region passed to it.
+ This could be a bubble region or the full image.
+ """
+ results = []
+
+ # Check for stop at start
+ if self._check_stop():
+ self._log("⏹️ Manga-OCR processing stopped by user", "warning")
+ return results
+
+ try:
+ if not self.is_loaded:
+ if not self.load_model():
+ return results
+
+ import cv2
+ from PIL import Image
+
+ # Get confidence from kwargs
+ confidence = kwargs.get('confidence', 0.7)
+
+ # Convert numpy array to PIL
+ image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+ pil_image = Image.fromarray(image_rgb)
+ h, w = image.shape[:2]
+
+ self._log("🔍 Processing region with manga-ocr...")
+
+ # Check for stop before inference
+ if self._check_stop():
+ self._log("⏹️ Manga-OCR inference stopped by user", "warning")
+ return results
+
+ # Run OCR on the image region
+ text = self._run_ocr(pil_image)
+
+ if text and text.strip():
+ # Return result for this region with its actual bbox
+ results.append(OCRResult(
+ text=text.strip(),
+ bbox=(0, 0, w, h), # Relative to the region passed in
+ confidence=confidence,
+ vertices=[(0, 0), (w, 0), (w, h), (0, h)]
+ ))
+ self._log(f"✅ Detected text: {text[:50]}...")
+
+ except Exception as e:
+ self._log(f"❌ Error in manga-ocr: {str(e)}", "error")
+
+ return results
+
+class Qwen2VL(OCRProvider):
+ """OCR using Qwen2-VL - Vision Language Model that can read Korean text"""
+
+ def __init__(self, log_callback=None):
+ super().__init__(log_callback)
+ self.processor = None
+ self.model = None
+ self.tokenizer = None
+
+ # Get OCR prompt from environment or use default
+ self.ocr_prompt = os.environ.get('OCR_SYSTEM_PROMPT',
+ "YOU ARE AN OCR SYSTEM. YOUR ONLY JOB IS TEXT EXTRACTION.\n\n"
+ "CRITICAL RULES:\n"
+ "1. DO NOT TRANSLATE ANYTHING\n"
+ "2. DO NOT MODIFY THE TEXT\n"
+ "3. DO NOT EXPLAIN OR COMMENT\n"
+ "4. ONLY OUTPUT THE EXACT TEXT YOU SEE\n"
+ "5. PRESERVE NATURAL TEXT FLOW - DO NOT ADD UNNECESSARY LINE BREAKS\n\n"
+ "If you see Korean text, output it in Korean.\n"
+ "If you see Japanese text, output it in Japanese.\n"
+ "If you see Chinese text, output it in Chinese.\n"
+ "If you see English text, output it in English.\n\n"
+ "IMPORTANT: Only use line breaks where they naturally occur in the original text "
+ "(e.g., between dialogue lines or paragraphs). Do not break text mid-sentence or "
+ "between every word/character.\n\n"
+ "For vertical text common in manga/comics, transcribe it as a continuous line unless "
+ "there are clear visual breaks.\n\n"
+ "NEVER translate. ONLY extract exactly what is written.\n"
+ "Output ONLY the raw text, nothing else."
+ )
+
+ def set_ocr_prompt(self, prompt: str):
+ """Allow setting the OCR prompt dynamically"""
+ self.ocr_prompt = prompt
+
+ def check_installation(self) -> bool:
+ """Check if required packages are installed"""
+ try:
+ import transformers
+ import torch
+ self.is_installed = True
+ return True
+ except ImportError:
+ return False
+
+ def install(self, progress_callback=None) -> bool:
+ """Install requirements for Qwen2-VL"""
+ pass
+
+ def load_model(self, model_size=None, **kwargs) -> bool:
+ """Load Qwen2-VL model with size selection"""
+ self._log(f"DEBUG: load_model called with model_size={model_size}")
+
+ try:
+ if not self.is_installed and not self.check_installation():
+ self._log("❌ Not installed", "error")
+ return False
+
+ self._log("🔥 Loading Qwen2-VL for Advanced OCR...")
+
+
+
+ from transformers import AutoProcessor, AutoTokenizer
+ import torch
+
+ # Model options
+ model_options = {
+ "1": "Qwen/Qwen2-VL-2B-Instruct",
+ "2": "Qwen/Qwen2-VL-7B-Instruct",
+ "3": "Qwen/Qwen2-VL-72B-Instruct",
+ "4": "custom"
+ }
+ # CHANGE: Default to 7B instead of 2B
+ # Check for saved preference first
+ if model_size is None:
+ # Try to get from environment or config
+ import os
+ model_size = os.environ.get('QWEN2VL_MODEL_SIZE', '1')
+
+ # Determine which model to load
+ if model_size and str(model_size).startswith("custom:"):
+ # Custom model passed with ID
+ model_id = str(model_size).replace("custom:", "")
+ self.loaded_model_size = "Custom"
+ self.model_id = model_id
+ self._log(f"Loading custom model: {model_id}")
+ elif model_size == "4":
+ # Custom option selected but no ID - shouldn't happen
+ self._log("❌ Custom model selected but no ID provided", "error")
+ return False
+ elif model_size and str(model_size) in model_options:
+ # Standard model option
+ option = model_options[str(model_size)]
+ if option == "custom":
+ self._log("❌ Custom model needs an ID", "error")
+ return False
+ model_id = option
+ # Set loaded_model_size for status display
+ if model_size == "1":
+ self.loaded_model_size = "2B"
+ elif model_size == "2":
+ self.loaded_model_size = "7B"
+ elif model_size == "3":
+ self.loaded_model_size = "72B"
+ else:
+ # CHANGE: Default to 7B (option "2") instead of 2B
+ model_id = model_options["1"] # Changed from "1" to "2"
+ self.loaded_model_size = "2B" # Changed from "2B" to "7B"
+ self._log("No model size specified, defaulting to 2B") # Changed message
+
+ self._log(f"Loading model: {model_id}")
+
+ # Load processor and tokenizer
+ self.processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
+ self.tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
+
+ # Load the model - let it figure out the class dynamically
+ if torch.cuda.is_available():
+ self._log(f"GPU: {torch.cuda.get_device_name(0)}")
+ # Use auto model class
+ from transformers import AutoModelForVision2Seq
+ self.model = AutoModelForVision2Seq.from_pretrained(
+ model_id,
+ dtype=torch.float16,
+ device_map="auto",
+ trust_remote_code=True
+ )
+ self._log("✅ Model loaded on GPU")
+ else:
+ self._log("Loading on CPU...")
+ from transformers import AutoModelForVision2Seq
+ self.model = AutoModelForVision2Seq.from_pretrained(
+ model_id,
+ dtype=torch.float32,
+ trust_remote_code=True
+ )
+ self._log("✅ Model loaded on CPU")
+
+ self.model.eval()
+ self.is_loaded = True
+ self._log("✅ Qwen2-VL ready for Advanced OCR!")
+ return True
+
+ except Exception as e:
+ self._log(f"❌ Failed to load: {str(e)}", "error")
+ import traceback
+ self._log(traceback.format_exc(), "debug")
+ return False
+
+ def detect_text(self, image: np.ndarray, **kwargs) -> List[OCRResult]:
+ """Process image with Qwen2-VL for Korean text extraction"""
+ results = []
+ if hasattr(self, 'model_id'):
+ self._log(f"DEBUG: Using model: {self.model_id}", "debug")
+
+ # Check if OCR prompt was passed in kwargs (for dynamic updates)
+ if 'ocr_prompt' in kwargs:
+ self.ocr_prompt = kwargs['ocr_prompt']
+
+ try:
+ if not self.is_loaded:
+ if not self.load_model():
+ return results
+
+ import cv2
+ from PIL import Image
+ import torch
+
+ # Convert to PIL
+ image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+ pil_image = Image.fromarray(image_rgb)
+ h, w = image.shape[:2]
+
+ self._log(f"🔍 Processing with Qwen2-VL ({w}x{h} pixels)...")
+
+ # Use the configurable OCR prompt
+ messages = [
+ {
+ "role": "user",
+ "content": [
+ {
+ "type": "image",
+ "image": pil_image,
+ },
+ {
+ "type": "text",
+ "text": self.ocr_prompt # Use the configurable prompt
+ }
+ ]
+ }
+ ]
+
+ # Alternative simpler prompt if the above still causes issues:
+ # "text": "OCR: Extract text as-is"
+
+ # Process with Qwen2-VL
+ text = self.processor.apply_chat_template(
+ messages,
+ tokenize=False,
+ add_generation_prompt=True
+ )
+
+ inputs = self.processor(
+ text=[text],
+ images=[pil_image],
+ padding=True,
+ return_tensors="pt"
+ )
+
+ # Get the device and dtype the model is currently on
+ model_device = next(self.model.parameters()).device
+ model_dtype = next(self.model.parameters()).dtype
+
+ # Move inputs to the same device as the model and cast float tensors to model dtype
+ try:
+ # Move first
+ inputs = inputs.to(model_device)
+ # Then align dtypes only for floating tensors (e.g., pixel_values)
+ for k, v in inputs.items():
+ if isinstance(v, torch.Tensor) and torch.is_floating_point(v):
+ inputs[k] = v.to(model_dtype)
+ except Exception:
+ # Fallback: ensure at least pixel_values is correct if present
+ try:
+ if isinstance(inputs, dict) and "pixel_values" in inputs:
+ pv = inputs["pixel_values"].to(model_device)
+ if torch.is_floating_point(pv):
+ inputs["pixel_values"] = pv.to(model_dtype)
+ except Exception:
+ pass
+
+ # Ensure pixel_values explicitly matches model dtype if present
+ try:
+ if isinstance(inputs, dict) and "pixel_values" in inputs:
+ inputs["pixel_values"] = inputs["pixel_values"].to(device=model_device, dtype=model_dtype)
+ except Exception:
+ pass
+
+ # Generate text with stricter parameters to avoid creative responses
+ use_amp = (hasattr(torch, 'cuda') and model_device.type == 'cuda' and model_dtype in (torch.float16, torch.bfloat16))
+ autocast_dev = 'cuda' if model_device.type == 'cuda' else 'cpu'
+ autocast_dtype = model_dtype if model_dtype in (torch.float16, torch.bfloat16) else None
+
+ with torch.no_grad():
+ if use_amp and autocast_dtype is not None:
+ with torch.autocast(autocast_dev, dtype=autocast_dtype):
+ generated_ids = self.model.generate(
+ **inputs,
+ max_new_tokens=128, # Reduced from 512 - manga bubbles are typically short
+ do_sample=False, # Keep deterministic
+ temperature=0.01, # Keep your very low temperature
+ top_p=1.0, # Keep no nucleus sampling
+ repetition_penalty=1.0, # Keep no repetition penalty
+ num_beams=1, # Ensure greedy decoding (faster than beam search)
+ use_cache=True, # Enable KV cache for speed
+ early_stopping=True, # Stop at EOS token
+ pad_token_id=self.tokenizer.pad_token_id, # Proper padding
+ eos_token_id=self.tokenizer.eos_token_id, # Proper stopping
+ )
+ else:
+ generated_ids = self.model.generate(
+ **inputs,
+ max_new_tokens=128, # Reduced from 512 - manga bubbles are typically short
+ do_sample=False, # Keep deterministic
+ temperature=0.01, # Keep your very low temperature
+ top_p=1.0, # Keep no nucleus sampling
+ repetition_penalty=1.0, # Keep no repetition penalty
+ num_beams=1, # Ensure greedy decoding (faster than beam search)
+ use_cache=True, # Enable KV cache for speed
+ early_stopping=True, # Stop at EOS token
+ pad_token_id=self.tokenizer.pad_token_id, # Proper padding
+ eos_token_id=self.tokenizer.eos_token_id, # Proper stopping
+ )
+
+ # Decode the output
+ generated_ids_trimmed = [
+ out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
+ ]
+ output_text = self.processor.batch_decode(
+ generated_ids_trimmed,
+ skip_special_tokens=True,
+ clean_up_tokenization_spaces=False
+ )[0]
+
+ if output_text and output_text.strip():
+ text = output_text.strip()
+
+ # ADDED: Filter out any response that looks like an explanation or apology
+ # Common patterns that indicate the model is being "helpful" instead of just extracting
+ unwanted_patterns = [
+ "죄송합니다", # "I apologize"
+ "sorry",
+ "apologize",
+ "이미지에는", # "in this image"
+ "텍스트가 없습니다", # "there is no text"
+ "I cannot",
+ "I don't see",
+ "There is no",
+ "질문이 있으시면", # "if you have questions"
+ ]
+
+ # Check if response contains unwanted patterns
+ text_lower = text.lower()
+ is_explanation = any(pattern.lower() in text_lower for pattern in unwanted_patterns)
+
+ # Also check if the response is suspiciously long for a bubble
+ # Most manga bubbles are short, if we get 50+ chars it might be an explanation
+ is_too_long = len(text) > 100 and ('.' in text or ',' in text or '!' in text)
+
+ if is_explanation or is_too_long:
+ self._log(f"⚠️ Model returned explanation instead of text, ignoring", "warning")
+ # Return empty result or just skip this region
+ return results
+
+ # Check language
+ has_korean = any('\uAC00' <= c <= '\uD7AF' for c in text)
+ has_japanese = any('\u3040' <= c <= '\u309F' or '\u30A0' <= c <= '\u30FF' for c in text)
+ has_chinese = any('\u4E00' <= c <= '\u9FFF' for c in text)
+
+ if has_korean:
+ self._log(f"✅ Korean detected: {text[:50]}...")
+ elif has_japanese:
+ self._log(f"✅ Japanese detected: {text[:50]}...")
+ elif has_chinese:
+ self._log(f"✅ Chinese detected: {text[:50]}...")
+ else:
+ self._log(f"✅ Text: {text[:50]}...")
+
+ results.append(OCRResult(
+ text=text,
+ bbox=(0, 0, w, h),
+ confidence=0.9,
+ vertices=[(0, 0), (w, 0), (w, h), (0, h)]
+ ))
+ else:
+ self._log("⚠️ No text detected", "warning")
+
+ except Exception as e:
+ self._log(f"❌ Error: {str(e)}", "error")
+ import traceback
+ self._log(traceback.format_exc(), "debug")
+
+ return results
+
+class EasyOCRProvider(OCRProvider):
+ """EasyOCR provider for multiple languages"""
+
+ def __init__(self, log_callback=None, languages=None):
+ super().__init__(log_callback)
+ # Default to safe language combination
+ self.languages = languages or ['ja', 'en'] # Safe default
+ self._validate_language_combination()
+
+ def _validate_language_combination(self):
+ """Validate and fix EasyOCR language combinations"""
+ # EasyOCR language compatibility rules
+ incompatible_pairs = [
+ (['ja', 'ko'], 'Japanese and Korean cannot be used together'),
+ (['ja', 'zh'], 'Japanese and Chinese cannot be used together'),
+ (['ko', 'zh'], 'Korean and Chinese cannot be used together')
+ ]
+
+ for incompatible, reason in incompatible_pairs:
+ if all(lang in self.languages for lang in incompatible):
+ self._log(f"⚠️ EasyOCR: {reason}", "warning")
+ # Keep first language + English
+ self.languages = [self.languages[0], 'en']
+ self._log(f"🔧 Auto-adjusted to: {self.languages}", "info")
+ break
+
+ def check_installation(self) -> bool:
+ """Check if easyocr is installed"""
+ try:
+ import easyocr
+ self.is_installed = True
+ return True
+ except ImportError:
+ return False
+
+ def install(self, progress_callback=None) -> bool:
+ """Install easyocr"""
+ pass
+
+ def load_model(self, **kwargs) -> bool:
+ """Load easyocr model"""
+ try:
+ if not self.is_installed and not self.check_installation():
+ self._log("❌ easyocr not installed", "error")
+ return False
+
+ self._log(f"🔥 Loading easyocr model for languages: {self.languages}...")
+ import easyocr
+
+ # This will download models on first run
+ self.model = easyocr.Reader(self.languages, gpu=True)
+ self.is_loaded = True
+
+ self._log("✅ easyocr model loaded successfully")
+ return True
+
+ except Exception as e:
+ self._log(f"❌ Failed to load easyocr: {str(e)}", "error")
+ # Try CPU mode if GPU fails
+ try:
+ import easyocr
+ self.model = easyocr.Reader(self.languages, gpu=False)
+ self.is_loaded = True
+ self._log("✅ easyocr loaded in CPU mode")
+ return True
+ except:
+ return False
+
+ def detect_text(self, image: np.ndarray, **kwargs) -> List[OCRResult]:
+ """Detect text using easyocr"""
+ results = []
+
+ try:
+ if not self.is_loaded:
+ if not self.load_model():
+ return results
+
+ # EasyOCR can work directly with numpy arrays
+ ocr_results = self.model.readtext(image, detail=1)
+
+ # Parse results
+ for (bbox, text, confidence) in ocr_results:
+ # bbox is a list of 4 points
+ xs = [point[0] for point in bbox]
+ ys = [point[1] for point in bbox]
+ x_min, x_max = min(xs), max(xs)
+ y_min, y_max = min(ys), max(ys)
+
+ results.append(OCRResult(
+ text=text,
+ bbox=(int(x_min), int(y_min), int(x_max - x_min), int(y_max - y_min)),
+ confidence=confidence,
+ vertices=[(int(p[0]), int(p[1])) for p in bbox]
+ ))
+
+ self._log(f"✅ Detected {len(results)} text regions")
+
+ except Exception as e:
+ self._log(f"❌ Error in easyocr detection: {str(e)}", "error")
+
+ return results
+
+
+class PaddleOCRProvider(OCRProvider):
+ """PaddleOCR provider with memory safety measures"""
+
+ def check_installation(self) -> bool:
+ """Check if paddleocr is installed"""
+ try:
+ from paddleocr import PaddleOCR
+ self.is_installed = True
+ return True
+ except ImportError:
+ return False
+
+ def install(self, progress_callback=None) -> bool:
+ """Install paddleocr"""
+ pass
+
+ def load_model(self, **kwargs) -> bool:
+ """Load paddleocr model with memory-safe configurations"""
+ try:
+ if not self.is_installed and not self.check_installation():
+ self._log("❌ paddleocr not installed", "error")
+ return False
+
+ self._log("🔥 Loading PaddleOCR model...")
+
+ # Set memory-safe environment variables BEFORE importing
+ import os
+ os.environ['OMP_NUM_THREADS'] = '1' # Prevent OpenMP conflicts
+ os.environ['MKL_NUM_THREADS'] = '1' # Prevent MKL conflicts
+ os.environ['OPENBLAS_NUM_THREADS'] = '1' # Prevent OpenBLAS conflicts
+ os.environ['FLAGS_use_mkldnn'] = '0' # Disable MKL-DNN
+
+ from paddleocr import PaddleOCR
+
+ # Try memory-safe configurations
+ configs_to_try = [
+ # Config 1: Most memory-safe configuration
+ {
+ 'use_angle_cls': False, # Disable angle to save memory
+ 'lang': 'ch',
+ 'rec_batch_num': 1, # Process one at a time
+ 'max_text_length': 100, # Limit text length
+ 'drop_score': 0.5, # Higher threshold to reduce detections
+ 'cpu_threads': 1, # Single thread to avoid conflicts
+ },
+ # Config 2: Minimal memory footprint
+ {
+ 'lang': 'ch',
+ 'rec_batch_num': 1,
+ 'cpu_threads': 1,
+ },
+ # Config 3: Absolute minimal
+ {
+ 'lang': 'ch'
+ },
+ # Config 4: Empty config
+ {}
+ ]
+
+ for i, config in enumerate(configs_to_try):
+ try:
+ self._log(f" Trying configuration {i+1}/{len(configs_to_try)}: {config}")
+
+ # Force garbage collection before loading
+ import gc
+ gc.collect()
+
+ self.model = PaddleOCR(**config)
+ self.is_loaded = True
+ self.current_config = config
+ self._log(f"✅ PaddleOCR loaded successfully with config: {config}")
+ return True
+ except Exception as e:
+ error_str = str(e)
+ self._log(f" Config {i+1} failed: {error_str}", "debug")
+
+ # Clean up on failure
+ if hasattr(self, 'model'):
+ del self.model
+ gc.collect()
+ continue
+
+ self._log(f"❌ PaddleOCR failed to load with any configuration", "error")
+ return False
+
+ except Exception as e:
+ self._log(f"❌ Failed to load paddleocr: {str(e)}", "error")
+ import traceback
+ self._log(traceback.format_exc(), "debug")
+ return False
+
+ def detect_text(self, image: np.ndarray, **kwargs) -> List[OCRResult]:
+ """Detect text with memory safety measures"""
+ results = []
+
+ try:
+ if not self.is_loaded:
+ if not self.load_model():
+ return results
+
+ import cv2
+ import numpy as np
+ import gc
+
+ # Memory safety: Ensure image isn't too large
+ h, w = image.shape[:2] if len(image.shape) >= 2 else (0, 0)
+
+ # Limit image size to prevent memory issues
+ MAX_DIMENSION = 1500
+ if h > MAX_DIMENSION or w > MAX_DIMENSION:
+ scale = min(MAX_DIMENSION/h, MAX_DIMENSION/w)
+ new_h, new_w = int(h*scale), int(w*scale)
+ self._log(f"⚠️ Resizing large image from {w}x{h} to {new_w}x{new_h} for memory safety", "warning")
+ image = cv2.resize(image, (new_w, new_h), interpolation=cv2.INTER_AREA)
+ scale_factor = 1/scale
+ else:
+ scale_factor = 1.0
+
+ # Ensure correct format
+ if len(image.shape) == 2: # Grayscale
+ image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
+ elif len(image.shape) == 4: # Batch
+ image = image[0]
+
+ # Ensure uint8 type
+ if image.dtype != np.uint8:
+ if image.max() <= 1.0:
+ image = (image * 255).astype(np.uint8)
+ else:
+ image = image.astype(np.uint8)
+
+ # Make a copy to avoid memory corruption
+ image_copy = image.copy()
+
+ # Force garbage collection before OCR
+ gc.collect()
+
+ # Process with timeout protection
+ import signal
+ import threading
+
+ ocr_results = None
+ ocr_error = None
+
+ def run_ocr():
+ nonlocal ocr_results, ocr_error
+ try:
+ ocr_results = self.model.ocr(image_copy)
+ except Exception as e:
+ ocr_error = e
+
+ # Run OCR in a separate thread with timeout
+ ocr_thread = threading.Thread(target=run_ocr)
+ ocr_thread.daemon = True
+ ocr_thread.start()
+ ocr_thread.join(timeout=30) # 30 second timeout
+
+ if ocr_thread.is_alive():
+ self._log("❌ PaddleOCR timeout - taking too long", "error")
+ return results
+
+ if ocr_error:
+ raise ocr_error
+
+ # Parse results
+ results = self._parse_ocr_results(ocr_results)
+
+ # Scale coordinates back if image was resized
+ if scale_factor != 1.0 and results:
+ for r in results:
+ x, y, width, height = r.bbox
+ r.bbox = (int(x*scale_factor), int(y*scale_factor),
+ int(width*scale_factor), int(height*scale_factor))
+ r.vertices = [(int(v[0]*scale_factor), int(v[1]*scale_factor))
+ for v in r.vertices]
+
+ if results:
+ self._log(f"✅ Detected {len(results)} text regions", "info")
+ else:
+ self._log("No text regions found", "debug")
+
+ # Clean up
+ del image_copy
+ gc.collect()
+
+ except Exception as e:
+ error_msg = str(e) if str(e) else type(e).__name__
+
+ if "memory" in error_msg.lower() or "0x" in error_msg:
+ self._log("❌ Memory access violation in PaddleOCR", "error")
+ self._log(" This is a known Windows issue with PaddleOCR", "info")
+ self._log(" Please switch to EasyOCR or manga-ocr instead", "warning")
+ elif "trace_order.size()" in error_msg:
+ self._log("❌ PaddleOCR internal error", "error")
+ self._log(" Please switch to EasyOCR or manga-ocr", "warning")
+ else:
+ self._log(f"❌ Error in paddleocr detection: {error_msg}", "error")
+
+ import traceback
+ self._log(traceback.format_exc(), "debug")
+
+ return results
+
+ def _parse_ocr_results(self, ocr_results) -> List[OCRResult]:
+ """Parse OCR results safely"""
+ results = []
+
+ if isinstance(ocr_results, bool) and ocr_results == False:
+ return results
+
+ if ocr_results is None or not isinstance(ocr_results, list):
+ return results
+
+ if len(ocr_results) == 0:
+ return results
+
+ # Handle batch format
+ if isinstance(ocr_results[0], list) and len(ocr_results[0]) > 0:
+ first_item = ocr_results[0][0]
+ if isinstance(first_item, list) and len(first_item) > 0:
+ if isinstance(first_item[0], (list, tuple)) and len(first_item[0]) == 2:
+ ocr_results = ocr_results[0]
+
+ # Parse detections
+ for detection in ocr_results:
+ if not detection or isinstance(detection, bool):
+ continue
+
+ if not isinstance(detection, (list, tuple)) or len(detection) < 2:
+ continue
+
+ try:
+ bbox_points = detection[0]
+ text_data = detection[1]
+
+ if not isinstance(bbox_points, (list, tuple)) or len(bbox_points) != 4:
+ continue
+
+ if not isinstance(text_data, (tuple, list)) or len(text_data) < 2:
+ continue
+
+ text = str(text_data[0]).strip()
+ confidence = float(text_data[1])
+
+ if not text or confidence < 0.3:
+ continue
+
+ xs = [float(p[0]) for p in bbox_points]
+ ys = [float(p[1]) for p in bbox_points]
+ x_min, x_max = min(xs), max(xs)
+ y_min, y_max = min(ys), max(ys)
+
+ if (x_max - x_min) < 5 or (y_max - y_min) < 5:
+ continue
+
+ results.append(OCRResult(
+ text=text,
+ bbox=(int(x_min), int(y_min), int(x_max - x_min), int(y_max - y_min)),
+ confidence=confidence,
+ vertices=[(int(p[0]), int(p[1])) for p in bbox_points]
+ ))
+
+ except Exception:
+ continue
+
+ return results
+
+class DocTROCRProvider(OCRProvider):
+ """DocTR OCR provider"""
+
+ def check_installation(self) -> bool:
+ """Check if doctr is installed"""
+ try:
+ from doctr.models import ocr_predictor
+ self.is_installed = True
+ return True
+ except ImportError:
+ return False
+
+ def install(self, progress_callback=None) -> bool:
+ """Install doctr"""
+ pass
+
+ def load_model(self, **kwargs) -> bool:
+ """Load doctr model"""
+ try:
+ if not self.is_installed and not self.check_installation():
+ self._log("❌ doctr not installed", "error")
+ return False
+
+ self._log("🔥 Loading DocTR model...")
+ from doctr.models import ocr_predictor
+
+ # Load pretrained model
+ self.model = ocr_predictor(pretrained=True)
+ self.is_loaded = True
+
+ self._log("✅ DocTR model loaded successfully")
+ return True
+
+ except Exception as e:
+ self._log(f"❌ Failed to load doctr: {str(e)}", "error")
+ return False
+
+ def detect_text(self, image: np.ndarray, **kwargs) -> List[OCRResult]:
+ """Detect text using doctr"""
+ results = []
+
+ try:
+ if not self.is_loaded:
+ if not self.load_model():
+ return results
+
+ from doctr.io import DocumentFile
+
+ # DocTR expects document format
+ # Convert numpy array to PIL and save temporarily
+ import tempfile
+ import cv2
+
+ with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp:
+ cv2.imwrite(tmp.name, image)
+ doc = DocumentFile.from_images(tmp.name)
+
+ # Run OCR
+ result = self.model(doc)
+
+ # Parse results
+ h, w = image.shape[:2]
+ for page in result.pages:
+ for block in page.blocks:
+ for line in block.lines:
+ for word in line.words:
+ # Handle different geometry formats
+ geometry = word.geometry
+
+ if len(geometry) == 4:
+ # Standard format: (x1, y1, x2, y2)
+ x1, y1, x2, y2 = geometry
+ elif len(geometry) == 2:
+ # Alternative format: ((x1, y1), (x2, y2))
+ (x1, y1), (x2, y2) = geometry
+ else:
+ self._log(f"Unexpected geometry format: {geometry}", "warning")
+ continue
+
+ # Convert relative coordinates to absolute
+ x1, x2 = int(x1 * w), int(x2 * w)
+ y1, y2 = int(y1 * h), int(y2 * h)
+
+ results.append(OCRResult(
+ text=word.value,
+ bbox=(x1, y1, x2 - x1, y2 - y1),
+ confidence=word.confidence,
+ vertices=[(x1, y1), (x2, y1), (x2, y2), (x1, y2)]
+ ))
+
+ # Clean up temp file
+ try:
+ os.unlink(tmp.name)
+ except:
+ pass
+
+ self._log(f"DocTR detected {len(results)} text regions")
+
+ except Exception as e:
+ self._log(f"Error in doctr detection: {str(e)}", "error")
+ import traceback
+ self._log(traceback.format_exc(), "error")
+
+ return results
+
+
+class RapidOCRProvider(OCRProvider):
+ """RapidOCR provider for fast local OCR"""
+
+ def check_installation(self) -> bool:
+ """Check if rapidocr is installed"""
+ try:
+ import rapidocr_onnxruntime
+ self.is_installed = True
+ return True
+ except ImportError:
+ return False
+
+ def install(self, progress_callback=None) -> bool:
+ """Install rapidocr (requires manual pip install)"""
+ # RapidOCR requires manual installation
+ if progress_callback:
+ progress_callback("RapidOCR requires manual pip installation")
+ self._log("Run: pip install rapidocr-onnxruntime", "info")
+ return False # Always return False since we can't auto-install
+
+ def load_model(self, **kwargs) -> bool:
+ """Load RapidOCR model"""
+ try:
+ if not self.is_installed and not self.check_installation():
+ self._log("RapidOCR not installed", "error")
+ return False
+
+ self._log("Loading RapidOCR...")
+ from rapidocr_onnxruntime import RapidOCR
+
+ self.model = RapidOCR()
+ self.is_loaded = True
+
+ self._log("RapidOCR model loaded successfully")
+ return True
+
+ except Exception as e:
+ self._log(f"Failed to load RapidOCR: {str(e)}", "error")
+ return False
+
+ def detect_text(self, image: np.ndarray, **kwargs) -> List[OCRResult]:
+ """Detect text using RapidOCR"""
+ if not self.is_loaded:
+ self._log("RapidOCR model not loaded", "error")
+ return []
+
+ results = []
+
+ try:
+ # Convert numpy array to PIL Image for RapidOCR
+ if len(image.shape) == 3:
+ # BGR to RGB
+ image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+ else:
+ image_rgb = image
+
+ # RapidOCR expects PIL Image or numpy array
+ ocr_results, _ = self.model(image_rgb)
+
+ if ocr_results:
+ for result in ocr_results:
+ # RapidOCR returns [bbox, text, confidence]
+ bbox_points = result[0] # 4 corner points
+ text = result[1]
+ confidence = float(result[2])
+
+ if not text or not text.strip():
+ continue
+
+ # Convert 4-point bbox to x,y,w,h format
+ xs = [point[0] for point in bbox_points]
+ ys = [point[1] for point in bbox_points]
+ x_min, x_max = min(xs), max(xs)
+ y_min, y_max = min(ys), max(ys)
+
+ results.append(OCRResult(
+ text=text.strip(),
+ bbox=(int(x_min), int(y_min), int(x_max - x_min), int(y_max - y_min)),
+ confidence=confidence,
+ vertices=[(int(p[0]), int(p[1])) for p in bbox_points]
+ ))
+
+ self._log(f"Detected {len(results)} text regions")
+
+ except Exception as e:
+ self._log(f"Error in RapidOCR detection: {str(e)}", "error")
+
+ return results
+
+class OCRManager:
+ """Manager for multiple OCR providers"""
+
+ def __init__(self, log_callback=None):
+ self.log_callback = log_callback
+ self.providers = {
+ 'custom-api': CustomAPIProvider(log_callback) ,
+ 'manga-ocr': MangaOCRProvider(log_callback),
+ 'easyocr': EasyOCRProvider(log_callback),
+ 'paddleocr': PaddleOCRProvider(log_callback),
+ 'doctr': DocTROCRProvider(log_callback),
+ 'rapidocr': RapidOCRProvider(log_callback),
+ 'Qwen2-VL': Qwen2VL(log_callback)
+ }
+ self.current_provider = None
+ self.stop_flag = None
+
+ def get_provider(self, name: str) -> Optional[OCRProvider]:
+ """Get OCR provider by name"""
+ return self.providers.get(name)
+
+ def set_current_provider(self, name: str):
+ """Set current active provider"""
+ if name in self.providers:
+ self.current_provider = name
+ return True
+ return False
+
+ def check_provider_status(self, name: str) -> Dict[str, bool]:
+ """Check installation and loading status of provider"""
+ provider = self.providers.get(name)
+ if not provider:
+ return {'installed': False, 'loaded': False}
+
+ result = {
+ 'installed': provider.check_installation(),
+ 'loaded': provider.is_loaded
+ }
+ if self.log_callback:
+ self.log_callback(f"DEBUG: check_provider_status({name}) returning loaded={result['loaded']}", "debug")
+ return result
+
+ def install_provider(self, name: str, progress_callback=None) -> bool:
+ """Install a provider"""
+ provider = self.providers.get(name)
+ if not provider:
+ return False
+
+ return provider.install(progress_callback)
+
+ def load_provider(self, name: str, **kwargs) -> bool:
+ """Load a provider's model with optional parameters"""
+ provider = self.providers.get(name)
+ if not provider:
+ return False
+
+ return provider.load_model(**kwargs) # <-- Passes model_size and any other kwargs
+
+ def shutdown(self):
+ """Release models/processors/tokenizers for all providers and clear caches."""
+ try:
+ import gc
+ for name, provider in list(self.providers.items()):
+ try:
+ if hasattr(provider, 'model'):
+ provider.model = None
+ if hasattr(provider, 'processor'):
+ provider.processor = None
+ if hasattr(provider, 'tokenizer'):
+ provider.tokenizer = None
+ if hasattr(provider, 'reader'):
+ provider.reader = None
+ if hasattr(provider, 'is_loaded'):
+ provider.is_loaded = False
+ except Exception:
+ pass
+ gc.collect()
+ try:
+ import torch
+ torch.cuda.empty_cache()
+ except Exception:
+ pass
+ except Exception:
+ pass
+
+ def detect_text(self, image: np.ndarray, provider_name: str = None, **kwargs) -> List[OCRResult]:
+ """Detect text using specified or current provider"""
+ provider_name = provider_name or self.current_provider
+ if not provider_name:
+ return []
+
+ provider = self.providers.get(provider_name)
+ if not provider:
+ return []
+
+ return provider.detect_text(image, **kwargs)
+
+ def set_stop_flag(self, stop_flag):
+ """Set stop flag for all providers"""
+ self.stop_flag = stop_flag
+ for provider in self.providers.values():
+ if hasattr(provider, 'set_stop_flag'):
+ provider.set_stop_flag(stop_flag)
+
+ def reset_stop_flags(self):
+ """Reset stop flags for all providers"""
+ for provider in self.providers.values():
+ if hasattr(provider, 'reset_stop_flags'):
+ provider.reset_stop_flags()