Spaces:

Shirochi
/

Glossarion

Running

App Files Files Community

Glossarion / bubble_detector.py

Shirochi

Upload 7 files

f66ccd1 verified about 1 month ago

raw

history blame contribute delete

92.8 kB

	"""
	bubble_detector.py - Modified version that works in frozen PyInstaller executables
	Replace your bubble_detector.py with this version
	"""
	import os
	import sys
	import json
	import numpy as np
	import cv2
	from typing import List, Tuple, Optional, Dict, Any
	import logging
	import traceback
	import hashlib
	from pathlib import Path
	import threading
	import time

	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	# Check if we're running in a frozen environment
	IS_FROZEN = getattr(sys, 'frozen', False)
	if IS_FROZEN:
	# In frozen environment, set proper paths for ML libraries
	MEIPASS = sys._MEIPASS
	os.environ['TORCH_HOME'] = MEIPASS
	os.environ['TRANSFORMERS_CACHE'] = os.path.join(MEIPASS, 'transformers')
	os.environ['HF_HOME'] = os.path.join(MEIPASS, 'huggingface')
	logger.info(f"Running in frozen environment: {MEIPASS}")

	# Modified import checks for frozen environment
	YOLO_AVAILABLE = False
	YOLO = None
	torch = None
	TORCH_AVAILABLE = False
	ONNX_AVAILABLE = False
	TRANSFORMERS_AVAILABLE = False
	RTDetrForObjectDetection = None
	RTDetrImageProcessor = None
	PIL_AVAILABLE = False

	# Try to import YOLO dependencies with better error handling
	if IS_FROZEN:
	# In frozen environment, try harder to import
	try:
	# First try to import torch components individually
	import torch
	import torch.nn
	import torch.cuda
	TORCH_AVAILABLE = True
	logger.info("✓ PyTorch loaded in frozen environment")
	except Exception as e:
	logger.warning(f"PyTorch not available in frozen environment: {e}")
	TORCH_AVAILABLE = False
	torch = None

	# Try ultralytics after torch
	if TORCH_AVAILABLE:
	try:
	from ultralytics import YOLO
	YOLO_AVAILABLE = True
	logger.info("✓ Ultralytics YOLO loaded in frozen environment")
	except Exception as e:
	logger.warning(f"Ultralytics not available in frozen environment: {e}")
	YOLO_AVAILABLE = False

	# Try transformers
	try:
	import transformers
	# Try specific imports
	try:
	from transformers import RTDetrForObjectDetection, RTDetrImageProcessor
	TRANSFORMERS_AVAILABLE = True
	logger.info("✓ Transformers RT-DETR loaded in frozen environment")
	except ImportError:
	# Try alternative import
	try:
	from transformers import AutoModel, AutoImageProcessor
	RTDetrForObjectDetection = AutoModel
	RTDetrImageProcessor = AutoImageProcessor
	TRANSFORMERS_AVAILABLE = True
	logger.info("✓ Transformers loaded with AutoModel fallback")
	except:
	TRANSFORMERS_AVAILABLE = False
	logger.warning("Transformers RT-DETR not available in frozen environment")
	except Exception as e:
	logger.warning(f"Transformers not available in frozen environment: {e}")
	TRANSFORMERS_AVAILABLE = False
	else:
	# Normal environment - original import logic
	try:
	from ultralytics import YOLO
	YOLO_AVAILABLE = True
	except:
	YOLO_AVAILABLE = False
	logger.warning("Ultralytics YOLO not available")

	try:
	import torch
	# Test if cuda attribute exists
	_ = torch.cuda
	TORCH_AVAILABLE = True
	except (ImportError, AttributeError):
	TORCH_AVAILABLE = False
	torch = None
	logger.warning("PyTorch not available or incomplete")

	try:
	from transformers import RTDetrForObjectDetection, RTDetrImageProcessor
	try:
	from transformers import RTDetrV2ForObjectDetection
	RTDetrForObjectDetection = RTDetrV2ForObjectDetection
	except ImportError:
	pass
	TRANSFORMERS_AVAILABLE = True
	except:
	TRANSFORMERS_AVAILABLE = False
	logger.info("Transformers not available for RT-DETR")

	# Configure ORT memory behavior before importing
	try:
	os.environ.setdefault('ORT_DISABLE_MEMORY_ARENA', '1')
	except Exception:
	pass
	# ONNX Runtime - works well in frozen environments
	try:
	import onnxruntime as ort
	ONNX_AVAILABLE = True
	logger.info("✓ ONNX Runtime available")
	except ImportError:
	ONNX_AVAILABLE = False
	logger.warning("ONNX Runtime not available")

	# PIL
	try:
	from PIL import Image
	PIL_AVAILABLE = True
	except ImportError:
	PIL_AVAILABLE = False
	logger.info("PIL not available")


	class BubbleDetector:
	"""
	Combined YOLOv8 and RT-DETR speech bubble detector for comics and manga.
	Supports multiple model formats and provides configurable detection.
	Backward compatible with existing code while adding RT-DETR support.
	"""

	# Process-wide shared RT-DETR to avoid concurrent meta-device loads
	_rtdetr_init_lock = threading.Lock()
	_rtdetr_shared_model = None
	_rtdetr_shared_processor = None
	_rtdetr_loaded = False
	_rtdetr_repo_id = 'ogkalu/comic-text-and-bubble-detector'

	# Shared RT-DETR (ONNX) across process to avoid device/context storms
	_rtdetr_onnx_init_lock = threading.Lock()
	_rtdetr_onnx_shared_session = None
	_rtdetr_onnx_loaded = False
	_rtdetr_onnx_providers = None
	_rtdetr_onnx_model_path = None
	# Limit concurrent runs to avoid device hangs. Defaults to 2 for better parallelism.
	# Can be overridden via env DML_MAX_CONCURRENT or config rtdetr_max_concurrency
	try:
	_rtdetr_onnx_max_concurrent = int(os.environ.get('DML_MAX_CONCURRENT', '2'))
	except Exception:
	_rtdetr_onnx_max_concurrent = 2
	_rtdetr_onnx_sema = threading.Semaphore(max(1, _rtdetr_onnx_max_concurrent))
	_rtdetr_onnx_sema_initialized = False

	def __init__(self, config_path: str = "config.json"):
	"""
	Initialize the bubble detector.

	Args:
	config_path: Path to configuration file
	"""
	# Set thread limits early if environment indicates single-threaded mode
	try:
	if os.environ.get('OMP_NUM_THREADS') == '1':
	# Already in single-threaded mode, ensure it's applied to this process
	# Check if torch is available at module level before trying to use it
	if TORCH_AVAILABLE and torch is not None:
	try:
	torch.set_num_threads(1)
	except (RuntimeError, AttributeError):
	pass
	try:
	import cv2
	cv2.setNumThreads(1)
	except (ImportError, AttributeError):
	pass
	except Exception:
	pass

	self.config_path = config_path
	self.config = self._load_config()

	# YOLOv8 components (original)
	self.model = None
	self.model_loaded = False
	self.model_type = None # 'yolo', 'onnx', or 'torch'
	self.onnx_session = None

	# RT-DETR components (new)
	self.rtdetr_model = None
	self.rtdetr_processor = None
	self.rtdetr_loaded = False
	self.rtdetr_repo = 'ogkalu/comic-text-and-bubble-detector'

	# RT-DETR (ONNX) backend components
	self.rtdetr_onnx_session = None
	self.rtdetr_onnx_loaded = False
	self.rtdetr_onnx_repo = 'ogkalu/comic-text-and-bubble-detector'

	# RT-DETR class definitions
	self.CLASS_BUBBLE = 0 # Empty speech bubble
	self.CLASS_TEXT_BUBBLE = 1 # Bubble with text
	self.CLASS_TEXT_FREE = 2 # Text without bubble

	# Detection settings
	self.default_confidence = 0.3
	self.default_iou_threshold = 0.45
	# Allow override from settings
	try:
	ocr_cfg = self.config.get('manga_settings', {}).get('ocr', {}) if isinstance(self.config, dict) else {}
	self.default_max_detections = int(ocr_cfg.get('bubble_max_detections', 100))
	self.max_det_yolo = int(ocr_cfg.get('bubble_max_detections_yolo', self.default_max_detections))
	self.max_det_rtdetr = int(ocr_cfg.get('bubble_max_detections_rtdetr', self.default_max_detections))
	except Exception:
	self.default_max_detections = 100
	self.max_det_yolo = 100
	self.max_det_rtdetr = 100

	# Cache directory for ONNX conversions
	self.cache_dir = os.environ.get('BUBBLE_CACHE_DIR', 'models')
	os.makedirs(self.cache_dir, exist_ok=True)

	# RT-DETR concurrency setting from config
	try:
	rtdetr_max_conc = int(ocr_cfg.get('rtdetr_max_concurrency', 2))
	# Update class-level semaphore if not yet initialized or if value changed
	if not BubbleDetector._rtdetr_onnx_sema_initialized or rtdetr_max_conc != BubbleDetector._rtdetr_onnx_max_concurrent:
	BubbleDetector._rtdetr_onnx_max_concurrent = max(1, rtdetr_max_conc)
	BubbleDetector._rtdetr_onnx_sema = threading.Semaphore(BubbleDetector._rtdetr_onnx_max_concurrent)
	BubbleDetector._rtdetr_onnx_sema_initialized = True
	logger.info(f"RT-DETR concurrency set to: {BubbleDetector._rtdetr_onnx_max_concurrent}")
	except Exception as e:
	logger.warning(f"Failed to set RT-DETR concurrency: {e}")

	# GPU availability
	self.use_gpu = TORCH_AVAILABLE and torch.cuda.is_available()
	self.device = 'cuda' if self.use_gpu else 'cpu'

	# Quantization/precision settings
	adv_cfg = self.config.get('manga_settings', {}).get('advanced', {}) if isinstance(self.config, dict) else {}
	ocr_cfg = self.config.get('manga_settings', {}).get('ocr', {}) if isinstance(self.config, dict) else {}
	env_quant = os.environ.get('MODEL_QUANTIZE', 'false').lower() == 'true'
	self.quantize_enabled = bool(env_quant or adv_cfg.get('quantize_models', False) or ocr_cfg.get('quantize_bubble_detector', False))
	self.quantize_dtype = str(adv_cfg.get('torch_precision', os.environ.get('TORCH_PRECISION', 'auto'))).lower()
	# Prefer advanced.onnx_quantize; fall back to env or global quantize
	self.onnx_quantize_enabled = bool(adv_cfg.get('onnx_quantize', os.environ.get('ONNX_QUANTIZE', 'false').lower() == 'true' or self.quantize_enabled))

	# Stop flag support
	self.stop_flag = None
	self._stopped = False
	self.log_callback = None

	logger.info(f"🗨️ BubbleDetector initialized")
	logger.info(f" GPU: {'Available' if self.use_gpu else 'Not available'}")
	logger.info(f" YOLO: {'Available' if YOLO_AVAILABLE else 'Not installed'}")
	logger.info(f" ONNX: {'Available' if ONNX_AVAILABLE else 'Not installed'}")
	logger.info(f" RT-DETR: {'Available' if TRANSFORMERS_AVAILABLE else 'Not installed'}")
	logger.info(f" Quantization: {'ENABLED' if self.quantize_enabled else 'disabled'} (torch_precision={self.quantize_dtype}, onnx_quantize={'on' if self.onnx_quantize_enabled else 'off'})" )

	def _load_config(self) -> Dict[str, Any]:
	"""Load configuration from file."""
	if os.path.exists(self.config_path):
	try:
	with open(self.config_path, 'r', encoding='utf-8') as f:
	return json.load(f)
	except Exception as e:
	logger.warning(f"Failed to load config: {e}")
	return {}

	def _save_config(self):
	"""Save configuration to file."""
	try:
	with open(self.config_path, 'w', encoding='utf-8') as f:
	json.dump(self.config, f, indent=2)
	except Exception as e:
	logger.error(f"Failed to save config: {e}")

	def set_stop_flag(self, stop_flag):
	"""Set the stop flag for checking interruptions"""
	self.stop_flag = stop_flag
	self._stopped = False

	def set_log_callback(self, log_callback):
	"""Set log callback for GUI integration"""
	self.log_callback = log_callback

	def _check_stop(self) -> bool:
	"""Check if stop has been requested"""
	if self._stopped:
	return True
	if self.stop_flag and self.stop_flag.is_set():
	self._stopped = True
	return True
	# Check global manga translator cancellation
	try:
	from manga_translator import MangaTranslator
	if MangaTranslator.is_globally_cancelled():
	self._stopped = True
	return True
	except Exception:
	pass
	return False

	def _log(self, message: str, level: str = "info"):
	"""Log message with stop suppression"""
	# Suppress logs when stopped (allow only essential stop confirmation messages)
	if self._check_stop():
	essential_stop_keywords = [
	"⏹️ Translation stopped by user",
	"⏹️ Bubble detection stopped",
	"cleanup", "🧹"
	]
	if not any(keyword in message for keyword in essential_stop_keywords):
	return

	if self.log_callback:
	self.log_callback(message, level)
	else:
	logger.info(message) if level == 'info' else getattr(logger, level, logger.info)(message)

	def reset_stop_flags(self):
	"""Reset stop flags when starting new processing"""
	self._stopped = False

	def load_model(self, model_path: str, force_reload: bool = False) -> bool:
	"""
	Load a YOLOv8 model for bubble detection.

	Args:
	model_path: Path to model file (.pt, .onnx, or .torchscript)
	force_reload: Force reload even if model is already loaded

	Returns:
	True if model loaded successfully, False otherwise
	"""
	try:
	# If given a Hugging Face repo ID (e.g., 'owner/name'), fetch detector.onnx into models/
	if model_path and (('/' in model_path) and not os.path.exists(model_path)):
	try:
	from huggingface_hub import hf_hub_download
	os.makedirs(self.cache_dir, exist_ok=True)
	logger.info(f"📥 Resolving repo '{model_path}' to detector.onnx in {self.cache_dir}...")
	resolved = hf_hub_download(repo_id=model_path, filename='detector.onnx', cache_dir=self.cache_dir, local_dir=self.cache_dir, local_dir_use_symlinks=False)
	if resolved and os.path.exists(resolved):
	model_path = resolved
	logger.info(f"✅ Downloaded detector.onnx to: {model_path}")
	except Exception as repo_err:
	logger.error(f"Failed to download from repo '{model_path}': {repo_err}")
	if not os.path.exists(model_path):
	logger.error(f"Model file not found: {model_path}")
	return False

	# Check if it's the same model already loaded
	if self.model_loaded and not force_reload:
	last_path = self.config.get('last_model_path', '')
	if last_path == model_path:
	logger.info("Model already loaded (same path)")
	return True
	else:
	logger.info(f"Model path changed from {last_path} to {model_path}, reloading...")
	force_reload = True

	# Clear previous model if force reload
	if force_reload:
	logger.info("Force reloading model...")
	self.model = None
	self.onnx_session = None
	self.model_loaded = False
	self.model_type = None

	logger.info(f"📥 Loading bubble detection model: {model_path}")

	# Determine model type by extension
	ext = Path(model_path).suffix.lower()

	if ext in ['.pt', '.pth']:
	if not YOLO_AVAILABLE:
	logger.warning("Ultralytics package not available in this build")
	logger.info("Bubble detection will be disabled - this is normal for lightweight builds")
	# Don't return False immediately, try other fallbacks
	self.model_loaded = False
	return False

	# Load YOLOv8 model
	try:
	self.model = YOLO(model_path)
	self.model_type = 'yolo'

	# Set to eval mode
	if hasattr(self.model, 'model'):
	self.model.model.eval()

	# Move to GPU if available
	if self.use_gpu and TORCH_AVAILABLE:
	try:
	self.model.to('cuda')
	except Exception as gpu_error:
	logger.warning(f"Could not move model to GPU: {gpu_error}")

	logger.info("✅ YOLOv8 model loaded successfully")
	# Apply optional FP16 precision to reduce VRAM if enabled
	if self.quantize_enabled and self.use_gpu and TORCH_AVAILABLE:
	try:
	m = self.model.model if hasattr(self.model, 'model') else self.model
	m.half()
	logger.info("🔻 Applied FP16 precision to YOLO model (GPU)")
	except Exception as _e:
	logger.warning(f"Could not switch YOLO model to FP16: {_e}")

	except Exception as yolo_error:
	logger.error(f"Failed to load YOLO model: {yolo_error}")
	return False

	elif ext == '.onnx':
	if not ONNX_AVAILABLE:
	logger.warning("ONNX Runtime not available in this build")
	logger.info("ONNX model support disabled - this is normal for lightweight builds")
	return False

	try:
	# Load ONNX model
	providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if self.use_gpu else ['CPUExecutionProvider']
	session_path = model_path
	if self.quantize_enabled:
	try:
	from onnxruntime.quantization import quantize_dynamic, QuantType
	quant_path = os.path.splitext(model_path)[0] + ".int8.onnx"
	if not os.path.exists(quant_path) or os.environ.get('FORCE_ONNX_REBUILD', 'false').lower() == 'true':
	logger.info("🔻 Quantizing ONNX model weights to INT8 (dynamic)...")
	quantize_dynamic(model_input=model_path, model_output=quant_path, weight_type=QuantType.QInt8, op_types_to_quantize=['Conv', 'MatMul'])
	session_path = quant_path
	self.config['last_onnx_quantized_path'] = quant_path
	self._save_config()
	logger.info(f"✅ Using quantized ONNX model: {quant_path}")
	except Exception as qe:
	logger.warning(f"ONNX quantization not applied: {qe}")
	# Use conservative ORT memory options to reduce RAM growth
	so = ort.SessionOptions()
	try:
	so.enable_mem_pattern = False
	so.enable_cpu_mem_arena = False
	except Exception:
	pass
	self.onnx_session = ort.InferenceSession(session_path, sess_options=so, providers=providers)
	self.model_type = 'onnx'

	logger.info("✅ ONNX model loaded successfully")

	except Exception as onnx_error:
	logger.error(f"Failed to load ONNX model: {onnx_error}")
	return False

	elif ext == '.torchscript':
	if not TORCH_AVAILABLE:
	logger.warning("PyTorch not available in this build")
	logger.info("TorchScript model support disabled - this is normal for lightweight builds")
	return False

	try:
	# Add safety check for torch being None
	if torch is None:
	logger.error("PyTorch module is None - cannot load TorchScript model")
	return False

	# Load TorchScript model
	self.model = torch.jit.load(model_path, map_location='cpu')
	self.model.eval()
	self.model_type = 'torch'

	if self.use_gpu:
	try:
	self.model = self.model.cuda()
	except Exception as gpu_error:
	logger.warning(f"Could not move TorchScript model to GPU: {gpu_error}")

	logger.info("✅ TorchScript model loaded successfully")

	# Optional FP16 precision on GPU
	if self.quantize_enabled and self.use_gpu and TORCH_AVAILABLE:
	try:
	self.model = self.model.half()
	logger.info("🔻 Applied FP16 precision to TorchScript model (GPU)")
	except Exception as _e:
	logger.warning(f"Could not switch TorchScript model to FP16: {_e}")

	except Exception as torch_error:
	logger.error(f"Failed to load TorchScript model: {torch_error}")
	return False

	else:
	logger.error(f"Unsupported model format: {ext}")
	logger.info("Supported formats: .pt/.pth (YOLOv8), .onnx (ONNX), .torchscript (TorchScript)")
	return False

	# Only set loaded if we actually succeeded
	self.model_loaded = True
	self.config['last_model_path'] = model_path
	self.config['model_type'] = self.model_type
	self._save_config()

	return True

	except Exception as e:
	logger.error(f"Failed to load model: {e}")
	logger.error(traceback.format_exc())
	self.model_loaded = False

	# Provide helpful context for .exe users
	logger.info("Note: If running from .exe, some ML libraries may not be included")
	logger.info("This is normal for lightweight builds - bubble detection will be disabled")

	return False

	def load_rtdetr_model(self, model_path: str = None, model_id: str = None, force_reload: bool = False) -> bool:
	"""
	Load RT-DETR model for advanced bubble and text detection.
	This implementation avoids the 'meta tensor' copy error by:
	- Serializing the entire load under a class lock (no concurrent loads)
	- Loading directly onto the target device (CUDA if available) via device_map='auto'
	- Avoiding .to() on a potentially-meta model; no device migration post-load

	Args:
	model_path: Optional path to local model
	model_id: Optional HuggingFace model ID (default: 'ogkalu/comic-text-and-bubble-detector')
	force_reload: Force reload even if already loaded

	Returns:
	True if successful, False otherwise
	"""
	if not TRANSFORMERS_AVAILABLE:
	logger.error("Transformers library required for RT-DETR. Install with: pip install transformers")
	return False

	if not PIL_AVAILABLE:
	logger.error("PIL required for RT-DETR. Install with: pip install pillow")
	return False

	if self.rtdetr_loaded and not force_reload:
	logger.info("RT-DETR model already loaded")
	return True

	# Fast path: if shared already loaded and not forcing reload, attach
	if BubbleDetector._rtdetr_loaded and not force_reload:
	self.rtdetr_model = BubbleDetector._rtdetr_shared_model
	self.rtdetr_processor = BubbleDetector._rtdetr_shared_processor
	self.rtdetr_loaded = True
	logger.info("RT-DETR model attached from shared cache")
	return True

	# Serialize the ENTIRE loading sequence to avoid concurrent init issues
	with BubbleDetector._rtdetr_init_lock:
	try:
	# Re-check after acquiring lock
	if BubbleDetector._rtdetr_loaded and not force_reload:
	self.rtdetr_model = BubbleDetector._rtdetr_shared_model
	self.rtdetr_processor = BubbleDetector._rtdetr_shared_processor
	self.rtdetr_loaded = True
	logger.info("RT-DETR model attached from shared cache (post-lock)")
	return True

	# Use custom model_id if provided, otherwise use default
	repo_id = model_id if model_id else self.rtdetr_repo
	logger.info(f"📥 Loading RT-DETR model from {repo_id}...")

	# Ensure TorchDynamo/compile doesn't interfere on some builds
	try:
	os.environ.setdefault('TORCHDYNAMO_DISABLE', '1')
	except Exception:
	pass

	# Decide device strategy
	gpu_available = bool(TORCH_AVAILABLE and hasattr(torch, 'cuda') and torch.cuda.is_available())
	device_map = 'auto' if gpu_available else None
	# Choose dtype
	dtype = None
	if TORCH_AVAILABLE:
	try:
	dtype = torch.float16 if gpu_available else torch.float32
	except Exception:
	dtype = None
	low_cpu = True if gpu_available else False

	# Load processor (once)
	self.rtdetr_processor = RTDetrImageProcessor.from_pretrained(
	repo_id,
	size={"width": 640, "height": 640},
	cache_dir=self.cache_dir if not model_path else None
	)

	# Prepare kwargs for from_pretrained
	from_kwargs = {
	'cache_dir': self.cache_dir if not model_path else None,
	'low_cpu_mem_usage': low_cpu,
	'device_map': device_map,
	}
	# Note: dtype is handled via torch_dtype parameter in newer transformers
	if dtype is not None:
	from_kwargs['torch_dtype'] = dtype

	# First attempt: load directly to target (CUDA if available)
	try:
	self.rtdetr_model = RTDetrForObjectDetection.from_pretrained(
	model_path if model_path else repo_id,
	**from_kwargs,
	)
	except Exception as primary_err:
	# Fallback to a simple CPU load (no device move) if CUDA path fails
	logger.warning(f"RT-DETR primary load failed ({primary_err}); retrying on CPU...")
	from_kwargs_fallback = {
	'cache_dir': self.cache_dir if not model_path else None,
	'low_cpu_mem_usage': False,
	'device_map': None,
	}
	if TORCH_AVAILABLE:
	from_kwargs_fallback['torch_dtype'] = torch.float32
	self.rtdetr_model = RTDetrForObjectDetection.from_pretrained(
	model_path if model_path else repo_id,
	**from_kwargs_fallback,
	)

	# Optional dynamic quantization for linear layers (CPU only)
	if self.quantize_enabled and TORCH_AVAILABLE and (not gpu_available):
	try:
	try:
	import torch.ao.quantization as tq
	quantize_dynamic = tq.quantize_dynamic # type: ignore
	except Exception:
	import torch.quantization as tq # type: ignore
	quantize_dynamic = tq.quantize_dynamic # type: ignore
	self.rtdetr_model = quantize_dynamic(self.rtdetr_model, {torch.nn.Linear}, dtype=torch.qint8)
	logger.info("🔻 Applied dynamic INT8 quantization to RT-DETR linear layers (CPU)")
	except Exception as qe:
	logger.warning(f"RT-DETR dynamic quantization skipped: {qe}")

	# Finalize
	self.rtdetr_model.eval()

	# Sanity check: ensure no parameter is left on 'meta' device
	try:
	for n, p in self.rtdetr_model.named_parameters():
	dev = getattr(p, 'device', None)
	if dev is not None and getattr(dev, 'type', '') == 'meta':
	raise RuntimeError(f"Parameter {n} is on 'meta' device after load")
	except Exception as e:
	logger.error(f"RT-DETR load sanity check failed: {e}")
	self.rtdetr_loaded = False
	return False

	# Publish shared cache
	BubbleDetector._rtdetr_shared_model = self.rtdetr_model
	BubbleDetector._rtdetr_shared_processor = self.rtdetr_processor
	BubbleDetector._rtdetr_loaded = True
	BubbleDetector._rtdetr_repo_id = repo_id

	self.rtdetr_loaded = True

	# Save the model ID that was used
	self.config['rtdetr_loaded'] = True
	self.config['rtdetr_model_id'] = repo_id
	self._save_config()

	loc = 'CUDA' if gpu_available else 'CPU'
	logger.info(f"✅ RT-DETR model loaded successfully ({loc})")
	logger.info(" Classes: Empty bubbles, Text bubbles, Free text")

	# Auto-convert to ONNX for RT-DETR only if explicitly enabled
	if os.environ.get('AUTO_CONVERT_RTDETR_ONNX', 'false').lower() == 'true':
	onnx_path = os.path.join(self.cache_dir, 'rtdetr_comic.onnx')
	if self.convert_to_onnx('rtdetr', onnx_path):
	logger.info("🚀 RT-DETR converted to ONNX for faster inference")
	# Store ONNX path for later use
	self.config['rtdetr_onnx_path'] = onnx_path
	self._save_config()
	# Optionally quantize ONNX for reduced RAM
	if self.onnx_quantize_enabled:
	try:
	from onnxruntime.quantization import quantize_dynamic, QuantType
	quant_path = os.path.splitext(onnx_path)[0] + ".int8.onnx"
	if not os.path.exists(quant_path) or os.environ.get('FORCE_ONNX_REBUILD', 'false').lower() == 'true':
	logger.info("🔻 Quantizing RT-DETR ONNX to INT8 (dynamic)...")
	quantize_dynamic(model_input=onnx_path, model_output=quant_path, weight_type=QuantType.QInt8, op_types_to_quantize=['Conv', 'MatMul'])
	self.config['rtdetr_onnx_quantized_path'] = quant_path
	self._save_config()
	logger.info(f"✅ Quantized RT-DETR ONNX saved to: {quant_path}")
	except Exception as qe:
	logger.warning(f"ONNX quantization for RT-DETR skipped: {qe}")
	else:
	logger.info("ℹ️ Skipping RT-DETR ONNX export (converter not supported in current environment)")

	return True
	except Exception as e:
	logger.error(f"❌ Failed to load RT-DETR: {e}")
	self.rtdetr_loaded = False
	return False

	def check_rtdetr_available(self, model_id: str = None) -> bool:
	"""
	Check if RT-DETR model is available (cached).

	Args:
	model_id: Optional HuggingFace model ID

	Returns:
	True if model is cached and available
	"""
	try:
	from pathlib import Path

	# Use provided model_id or default
	repo_id = model_id if model_id else self.rtdetr_repo

	# Check HuggingFace cache
	cache_dir = Path.home() / ".cache" / "huggingface" / "hub"
	model_id_formatted = repo_id.replace("/", "--")

	# Look for model folder
	model_folders = list(cache_dir.glob(f"models--{model_id_formatted}*"))

	if model_folders:
	for folder in model_folders:
	if (folder / "snapshots").exists():
	snapshots = list((folder / "snapshots").iterdir())
	if snapshots:
	return True

	return False

	except Exception:
	return False

	def detect_bubbles(self,
	image_path: str,
	confidence: float = None,
	iou_threshold: float = None,
	max_detections: int = None,
	use_rtdetr: bool = None) -> List[Tuple[int, int, int, int]]:
	"""
	Detect speech bubbles in an image (backward compatible method).

	Args:
	image_path: Path to image file
	confidence: Minimum confidence threshold (0-1)
	iou_threshold: IOU threshold for NMS (0-1)
	max_detections: Maximum number of detections to return
	use_rtdetr: If True, use RT-DETR instead of YOLOv8 (if available)

	Returns:
	List of bubble bounding boxes as (x, y, width, height) tuples
	"""
	# Check for stop at start
	if self._check_stop():
	self._log("⏹️ Bubble detection stopped by user", "warning")
	return []

	# Decide which model to use
	if use_rtdetr is None:
	# Auto-select: prefer RT-DETR if available
	use_rtdetr = self.rtdetr_loaded

	if use_rtdetr:
	# Prefer ONNX backend if available, else PyTorch
	if getattr(self, 'rtdetr_onnx_loaded', False):
	results = self.detect_with_rtdetr_onnx(
	image_path=image_path,
	confidence=confidence,
	return_all_bubbles=True
	)
	return results
	if self.rtdetr_loaded:
	results = self.detect_with_rtdetr(
	image_path=image_path,
	confidence=confidence,
	return_all_bubbles=True
	)
	return results

	# Original YOLOv8 detection
	if not self.model_loaded:
	logger.error("No model loaded. Call load_model() first.")
	return []

	# Use defaults if not specified
	confidence = confidence or self.default_confidence
	iou_threshold = iou_threshold or self.default_iou_threshold
	max_detections = max_detections or self.default_max_detections

	try:
	# Load image
	image = cv2.imread(image_path)
	if image is None:
	logger.error(f"Failed to load image: {image_path}")
	return []

	h, w = image.shape[:2]
	self._log(f"🔍 Detecting bubbles in {w}x{h} image")

	# Check for stop before inference
	if self._check_stop():
	self._log("⏹️ Bubble detection inference stopped by user", "warning")
	return []

	if self.model_type == 'yolo':
	# YOLOv8 inference
	results = self.model(
	image_path,
	conf=confidence,
	iou=iou_threshold,
	max_det=min(max_detections, getattr(self, 'max_det_yolo', max_detections)),
	verbose=False
	)

	bubbles = []
	for r in results:
	if r.boxes is not None:
	for box in r.boxes:
	# Get box coordinates
	x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
	x, y = int(x1), int(y1)
	width = int(x2 - x1)
	height = int(y2 - y1)

	# Get confidence
	conf = float(box.conf[0])

	# Add to list
	if len(bubbles) < max_detections:
	bubbles.append((x, y, width, height))

	logger.debug(f" Bubble: ({x},{y}) {width}x{height} conf={conf:.2f}")

	elif self.model_type == 'onnx':
	# ONNX inference
	bubbles = self._detect_with_onnx(image, confidence, iou_threshold, max_detections)

	elif self.model_type == 'torch':
	# TorchScript inference
	bubbles = self._detect_with_torchscript(image, confidence, iou_threshold, max_detections)

	else:
	logger.error(f"Unknown model type: {self.model_type}")
	return []

	logger.info(f"✅ Detected {len(bubbles)} speech bubbles")
	time.sleep(0.1) # Brief pause for stability
	logger.debug("💤 Bubble detection pausing briefly for stability")
	return bubbles

	except Exception as e:
	logger.error(f"Detection failed: {e}")
	logger.error(traceback.format_exc())
	return []

	def detect_with_rtdetr(self,
	image_path: str = None,
	image: np.ndarray = None,
	confidence: float = None,
	return_all_bubbles: bool = False) -> Any:
	"""
	Detect using RT-DETR model with 3-class detection (PyTorch backend).

	Args:
	image_path: Path to image file
	image: Image array (BGR format)
	confidence: Confidence threshold
	return_all_bubbles: If True, return list of bubble boxes (for compatibility)
	If False, return dict with all classes

	Returns:
	List of bubbles if return_all_bubbles=True, else dict with classes
	"""
	# Check for stop at start
	if self._check_stop():
	self._log("⏹️ RT-DETR detection stopped by user", "warning")
	if return_all_bubbles:
	return []
	return {'bubbles': [], 'text_bubbles': [], 'text_free': []}

	if not self.rtdetr_loaded:
	self._log("RT-DETR not loaded. Call load_rtdetr_model() first.", "warning")
	if return_all_bubbles:
	return []
	return {'bubbles': [], 'text_bubbles': [], 'text_free': []}

	confidence = confidence or self.default_confidence

	try:
	# Load image
	if image_path:
	image = cv2.imread(image_path)
	elif image is None:
	logger.error("No image provided")
	if return_all_bubbles:
	return []
	return {'bubbles': [], 'text_bubbles': [], 'text_free': []}

	# Convert BGR to RGB for PIL
	image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
	pil_image = Image.fromarray(image_rgb)

	# Prepare image for model
	inputs = self.rtdetr_processor(images=pil_image, return_tensors="pt")

	# Move inputs to the same device as the model and match model dtype for floating tensors
	model_device = next(self.rtdetr_model.parameters()).device if self.rtdetr_model is not None else (torch.device('cpu') if TORCH_AVAILABLE else 'cpu')
	model_dtype = None
	if TORCH_AVAILABLE and self.rtdetr_model is not None:
	try:
	model_dtype = next(self.rtdetr_model.parameters()).dtype
	except Exception:
	model_dtype = None

	if TORCH_AVAILABLE:
	new_inputs = {}
	for k, v in inputs.items():
	if isinstance(v, torch.Tensor):
	v = v.to(model_device)
	if model_dtype is not None and torch.is_floating_point(v):
	v = v.to(model_dtype)
	new_inputs[k] = v
	inputs = new_inputs

	# Run inference with autocast when model is half/bfloat16 on CUDA
	use_amp = TORCH_AVAILABLE and hasattr(model_device, 'type') and model_device.type == 'cuda' and (model_dtype in (torch.float16, torch.bfloat16))
	autocast_dtype = model_dtype if model_dtype in (torch.float16, torch.bfloat16) else None

	with torch.no_grad():
	if use_amp and autocast_dtype is not None:
	with torch.autocast('cuda', dtype=autocast_dtype):
	outputs = self.rtdetr_model(**inputs)
	else:
	outputs = self.rtdetr_model(**inputs)

	# Brief pause for stability after inference
	time.sleep(0.1)
	logger.debug("💤 RT-DETR inference pausing briefly for stability")

	# Post-process results
	target_sizes = torch.tensor([pil_image.size[::-1]]) if TORCH_AVAILABLE else None
	if TORCH_AVAILABLE and hasattr(model_device, 'type') and model_device.type == "cuda":
	target_sizes = target_sizes.to(model_device)

	results = self.rtdetr_processor.post_process_object_detection(
	outputs,
	target_sizes=target_sizes,
	threshold=confidence
	)[0]

	# Apply per-detector cap if configured
	cap = getattr(self, 'max_det_rtdetr', self.default_max_detections)
	if cap and len(results['boxes']) > cap:
	# Keep top-scoring first
	scores = results['scores']
	top_idx = scores.topk(k=cap).indices if hasattr(scores, 'topk') else range(cap)
	results = {
	'boxes': [results['boxes'][i] for i in top_idx],
	'scores': [results['scores'][i] for i in top_idx],
	'labels': [results['labels'][i] for i in top_idx]
	}

	logger.info(f"📊 RT-DETR found {len(results['boxes'])} detections above {confidence:.2f} confidence")

	# Apply NMS to remove duplicate detections
	# Group detections by class
	class_detections = {self.CLASS_BUBBLE: [], self.CLASS_TEXT_BUBBLE: [], self.CLASS_TEXT_FREE: []}

	for box, score, label in zip(results['boxes'], results['scores'], results['labels']):
	x1, y1, x2, y2 = map(float, box.tolist())
	label_id = label.item()
	if label_id in class_detections:
	class_detections[label_id].append((x1, y1, x2, y2, float(score.item())))

	# Apply NMS per class to remove duplicates
	def compute_iou(box1, box2):
	"""Compute IoU between two boxes (x1, y1, x2, y2)"""
	x1_1, y1_1, x2_1, y2_1 = box1[:4]
	x1_2, y1_2, x2_2, y2_2 = box2[:4]

	# Intersection
	x_left = max(x1_1, x1_2)
	y_top = max(y1_1, y1_2)
	x_right = min(x2_1, x2_2)
	y_bottom = min(y2_1, y2_2)

	if x_right < x_left or y_bottom < y_top:
	return 0.0

	intersection = (x_right - x_left) * (y_bottom - y_top)

	# Union
	area1 = (x2_1 - x1_1) * (y2_1 - y1_1)
	area2 = (x2_2 - x1_2) * (y2_2 - y1_2)
	union = area1 + area2 - intersection

	return intersection / union if union > 0 else 0.0

	def apply_nms(boxes_with_scores, iou_threshold=0.45):
	"""Apply Non-Maximum Suppression"""
	if not boxes_with_scores:
	return []

	# Sort by score (descending)
	sorted_boxes = sorted(boxes_with_scores, key=lambda x: x[4], reverse=True)
	keep = []

	while sorted_boxes:
	# Keep the box with highest score
	current = sorted_boxes.pop(0)
	keep.append(current)

	# Remove boxes with high IoU
	sorted_boxes = [box for box in sorted_boxes if compute_iou(current, box) < iou_threshold]

	return keep

	# Apply NMS and organize by class
	detections = {
	'bubbles': [], # Empty speech bubbles
	'text_bubbles': [], # Bubbles with text
	'text_free': [] # Text without bubbles
	}

	for class_id, boxes_list in class_detections.items():
	nms_boxes = apply_nms(boxes_list, iou_threshold=self.default_iou_threshold)

	for x1, y1, x2, y2, scr in nms_boxes:
	width = int(x2 - x1)
	height = int(y2 - y1)
	# Store as (x, y, width, height) to match YOLOv8 format
	bbox = (int(x1), int(y1), width, height)

	if class_id == self.CLASS_BUBBLE:
	detections['bubbles'].append(bbox)
	elif class_id == self.CLASS_TEXT_BUBBLE:
	detections['text_bubbles'].append(bbox)
	elif class_id == self.CLASS_TEXT_FREE:
	detections['text_free'].append(bbox)

	# Stop early if we hit the configured cap across all classes
	total_count = len(detections['bubbles']) + len(detections['text_bubbles']) + len(detections['text_free'])
	if total_count >= (self.config.get('manga_settings', {}).get('ocr', {}).get('bubble_max_detections', self.default_max_detections) if isinstance(self.config, dict) else self.default_max_detections):
	break

	# Log results
	total = len(detections['bubbles']) + len(detections['text_bubbles']) + len(detections['text_free'])
	logger.info(f"✅ RT-DETR detected {total} objects:")
	logger.info(f" - Empty bubbles: {len(detections['bubbles'])}")
	logger.info(f" - Text bubbles: {len(detections['text_bubbles'])}")
	logger.info(f" - Free text: {len(detections['text_free'])}")

	# Return format based on compatibility mode
	if return_all_bubbles:
	# Return all bubbles (empty + with text) for backward compatibility
	all_bubbles = detections['bubbles'] + detections['text_bubbles']
	return all_bubbles
	else:
	return detections

	except Exception as e:
	logger.error(f"RT-DETR detection failed: {e}")
	logger.error(traceback.format_exc())
	if return_all_bubbles:
	return []
	return {'bubbles': [], 'text_bubbles': [], 'text_free': []}

	def detect_all_text_regions(self, image_path: str = None, image: np.ndarray = None) -> List[Tuple[int, int, int, int]]:
	"""
	Detect all text regions using RT-DETR (both in bubbles and free text).

	Returns:
	List of bounding boxes for all text regions
	"""
	if not self.rtdetr_loaded:
	logger.warning("RT-DETR required for text detection")
	return []

	detections = self.detect_with_rtdetr(image_path=image_path, image=image, return_all_bubbles=False)

	# Combine text bubbles and free text
	all_text = detections['text_bubbles'] + detections['text_free']

	logger.info(f"📝 Found {len(all_text)} text regions total")
	return all_text

	def _detect_with_onnx(self, image: np.ndarray, confidence: float,
	iou_threshold: float, max_detections: int) -> List[Tuple[int, int, int, int]]:
	"""Run detection using ONNX model."""
	# Preprocess image
	img_size = 640 # Standard YOLOv8 input size
	img_resized = cv2.resize(image, (img_size, img_size))
	img_norm = img_resized.astype(np.float32) / 255.0
	img_transposed = np.transpose(img_norm, (2, 0, 1))
	img_batch = np.expand_dims(img_transposed, axis=0)

	# Run inference
	input_name = self.onnx_session.get_inputs()[0].name
	outputs = self.onnx_session.run(None, {input_name: img_batch})

	# Process outputs (YOLOv8 format)
	predictions = outputs[0][0] # Remove batch dimension

	# Filter by confidence and apply NMS
	bubbles = []
	boxes = []
	scores = []

	for pred in predictions.T: # Transpose to get predictions per detection
	if len(pred) >= 5:
	x_center, y_center, width, height, obj_conf = pred[:5]

	if obj_conf >= confidence:
	# Convert to corner coordinates
	x1 = x_center - width / 2
	y1 = y_center - height / 2

	# Scale to original image size
	h, w = image.shape[:2]
	x1 = int(x1 * w / img_size)
	y1 = int(y1 * h / img_size)
	width = int(width * w / img_size)
	height = int(height * h / img_size)

	boxes.append([x1, y1, x1 + width, y1 + height])
	scores.append(float(obj_conf))

	# Apply NMS
	if boxes:
	indices = cv2.dnn.NMSBoxes(boxes, scores, confidence, iou_threshold)
	if len(indices) > 0:
	indices = indices.flatten()[:max_detections]
	for i in indices:
	x1, y1, x2, y2 = boxes[i]
	bubbles.append((x1, y1, x2 - x1, y2 - y1))

	return bubbles

	def _detect_with_torchscript(self, image: np.ndarray, confidence: float,
	iou_threshold: float, max_detections: int) -> List[Tuple[int, int, int, int]]:
	"""Run detection using TorchScript model."""
	# Similar to ONNX but using PyTorch tensors
	img_size = 640
	img_resized = cv2.resize(image, (img_size, img_size))
	img_norm = img_resized.astype(np.float32) / 255.0
	img_tensor = torch.from_numpy(img_norm).permute(2, 0, 1).unsqueeze(0)

	if self.use_gpu:
	img_tensor = img_tensor.cuda()

	with torch.no_grad():
	outputs = self.model(img_tensor)

	# Process outputs similar to ONNX
	# Implementation depends on exact model output format
	# This is a placeholder - adjust based on your model
	return []

	def visualize_detections(self, image_path: str, bubbles: List[Tuple[int, int, int, int]] = None,
	output_path: str = None, use_rtdetr: bool = False) -> np.ndarray:
	"""
	Visualize detected bubbles on the image.

	Args:
	image_path: Path to original image
	bubbles: List of bubble bounding boxes (if None, will detect)
	output_path: Optional path to save visualization
	use_rtdetr: Use RT-DETR for visualization with class colors

	Returns:
	Image with drawn bounding boxes
	"""
	image = cv2.imread(image_path)
	if image is None:
	logger.error(f"Failed to load image: {image_path}")
	return None

	vis_image = image.copy()

	if use_rtdetr and self.rtdetr_loaded:
	# RT-DETR visualization with different colors per class
	detections = self.detect_with_rtdetr(image_path=image_path, return_all_bubbles=False)

	# Colors for each class
	colors = {
	'bubbles': (0, 255, 0), # Green for empty bubbles
	'text_bubbles': (255, 0, 0), # Blue for text bubbles
	'text_free': (0, 0, 255) # Red for free text
	}

	# Draw detections
	for class_name, bboxes in detections.items():
	color = colors[class_name]

	for i, (x, y, w, h) in enumerate(bboxes):
	# Draw rectangle
	cv2.rectangle(vis_image, (x, y), (x + w, y + h), color, 2)

	# Add label
	label = f"{class_name.replace('_', ' ').title()} {i+1}"
	label_size, _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
	cv2.rectangle(vis_image, (x, y - label_size[1] - 4),
	(x + label_size[0], y), color, -1)
	cv2.putText(vis_image, label, (x, y - 2),
	cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
	else:
	# Original YOLOv8 visualization
	if bubbles is None:
	bubbles = self.detect_bubbles(image_path)

	# Draw bounding boxes
	for i, (x, y, w, h) in enumerate(bubbles):
	# Draw rectangle
	color = (0, 255, 0) # Green
	thickness = 2
	cv2.rectangle(vis_image, (x, y), (x + w, y + h), color, thickness)

	# Add label
	label = f"Bubble {i+1}"
	label_size, _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
	cv2.rectangle(vis_image, (x, y - label_size[1] - 4), (x + label_size[0], y), color, -1)
	cv2.putText(vis_image, label, (x, y - 2), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)

	# Save if output path provided
	if output_path:
	cv2.imwrite(output_path, vis_image)
	logger.info(f"💾 Visualization saved to: {output_path}")

	return vis_image

	def convert_to_onnx(self, model_path: str, output_path: str = None) -> bool:
	"""
	Convert a YOLOv8 or RT-DETR model to ONNX format.

	Args:
	model_path: Path to model file or 'rtdetr' for loaded RT-DETR
	output_path: Path for ONNX output (auto-generated if None)

	Returns:
	True if conversion successful, False otherwise
	"""
	try:
	logger.info(f"🔄 Converting {model_path} to ONNX...")

	# Generate output path if not provided
	if output_path is None:
	if model_path == 'rtdetr' and self.rtdetr_loaded:
	base_name = 'rtdetr_comic'
	else:
	base_name = Path(model_path).stem
	output_path = os.path.join(self.cache_dir, f"{base_name}.onnx")

	# Check if already exists
	if os.path.exists(output_path) and not os.environ.get('FORCE_ONNX_REBUILD', 'false').lower() == 'true':
	logger.info(f"✅ ONNX model already exists: {output_path}")
	return True

	# Handle RT-DETR conversion
	if model_path == 'rtdetr' and self.rtdetr_loaded:
	if not TORCH_AVAILABLE:
	logger.error("PyTorch required for RT-DETR ONNX conversion")
	return False

	# RT-DETR specific conversion
	self.rtdetr_model.eval()

	# Create dummy input (pixel values): BxCxHxW
	dummy_input = torch.randn(1, 3, 640, 640)
	if self.device == 'cuda':
	dummy_input = dummy_input.to('cuda')

	# Wrap the model to return only tensors (logits, pred_boxes)
	class _RTDetrExportWrapper(torch.nn.Module):
	def __init__(self, mdl):
	super().__init__()
	self.mdl = mdl
	def forward(self, images):
	out = self.mdl(pixel_values=images)
	# Handle dict/ModelOutput/tuple outputs
	logits = None
	boxes = None
	try:
	if isinstance(out, dict):
	logits = out.get('logits', None)
	boxes = out.get('pred_boxes', out.get('boxes', None))
	else:
	logits = getattr(out, 'logits', None)
	boxes = getattr(out, 'pred_boxes', getattr(out, 'boxes', None))
	except Exception:
	pass
	if (logits is None or boxes is None) and isinstance(out, (tuple, list)) and len(out) >= 2:
	logits, boxes = out[0], out[1]
	return logits, boxes

	wrapper = _RTDetrExportWrapper(self.rtdetr_model)
	if self.device == 'cuda':
	wrapper = wrapper.to('cuda')

	# Try PyTorch 2.x dynamo_export first (more tolerant of newer aten ops)
	try:
	success = False
	try:
	from torch.onnx import dynamo_export
	try:
	exp = dynamo_export(wrapper, dummy_input)
	except TypeError:
	# Older PyTorch dynamo_export may not support this calling convention
	exp = dynamo_export(wrapper, dummy_input)
	# exp may have save(); otherwise, it may expose model_proto
	try:
	exp.save(output_path) # type: ignore
	success = True
	except Exception:
	try:
	import onnx as _onnx
	_onnx.save(exp.model_proto, output_path) # type: ignore
	success = True
	except Exception as _se:
	logger.warning(f"dynamo_export produced model but could not save: {_se}")
	except Exception as de:
	logger.warning(f"dynamo_export failed; falling back to legacy exporter: {de}")
	if success:
	logger.info(f"✅ RT-DETR ONNX saved to: {output_path} (dynamo_export)")
	return True
	except Exception as de2:
	logger.warning(f"dynamo_export path error: {de2}")

	# Legacy exporter with opset fallback
	last_err = None
	for opset in [19, 18, 17, 16, 15, 14, 13]:
	try:
	torch.onnx.export(
	wrapper,
	dummy_input,
	output_path,
	export_params=True,
	opset_version=opset,
	do_constant_folding=True,
	input_names=['pixel_values'],
	output_names=['logits', 'boxes'],
	dynamic_axes={
	'pixel_values': {0: 'batch', 2: 'height', 3: 'width'},
	'logits': {0: 'batch'},
	'boxes': {0: 'batch'}
	}
	)
	logger.info(f"✅ RT-DETR ONNX saved to: {output_path} (opset {opset})")
	return True
	except Exception as _e:
	last_err = _e
	try:
	msg = str(_e)
	except Exception:
	msg = ''
	logger.warning(f"RT-DETR ONNX export failed at opset {opset}: {msg}")
	continue

	logger.error(f"All RT-DETR ONNX export attempts failed. Last error: {last_err}")
	return False

	# Handle YOLOv8 conversion - FIXED
	elif YOLO_AVAILABLE and os.path.exists(model_path):
	logger.info(f"Loading YOLOv8 model from: {model_path}")

	# Load model
	model = YOLO(model_path)

	# Export to ONNX - this returns the path to the exported model
	logger.info("Exporting to ONNX format...")
	exported_path = model.export(format='onnx', imgsz=640, simplify=True)

	# exported_path could be a string or Path object
	exported_path = str(exported_path) if exported_path else None

	if exported_path and os.path.exists(exported_path):
	# Move to desired location if different
	if exported_path != output_path:
	import shutil
	logger.info(f"Moving ONNX from {exported_path} to {output_path}")
	shutil.move(exported_path, output_path)

	logger.info(f"✅ YOLOv8 ONNX saved to: {output_path}")
	return True
	else:
	# Fallback: check if it was created with expected name
	expected_onnx = model_path.replace('.pt', '.onnx')
	if os.path.exists(expected_onnx):
	if expected_onnx != output_path:
	import shutil
	shutil.move(expected_onnx, output_path)
	logger.info(f"✅ YOLOv8 ONNX saved to: {output_path}")
	return True
	else:
	logger.error(f"ONNX export failed - no output file found")
	return False

	else:
	logger.error(f"Cannot convert {model_path}: Model not found or dependencies missing")
	return False

	except Exception as e:
	logger.error(f"Conversion failed: {e}")
	# Avoid noisy full stack trace in production logs; return False gracefully
	return False

	def batch_detect(self, image_paths: List[str], **kwargs) -> Dict[str, List[Tuple[int, int, int, int]]]:
	"""
	Detect bubbles in multiple images.

	Args:
	image_paths: List of image paths
	**kwargs: Detection parameters (confidence, iou_threshold, max_detections, use_rtdetr)

	Returns:
	Dictionary mapping image paths to bubble lists
	"""
	results = {}

	for i, image_path in enumerate(image_paths):
	logger.info(f"Processing image {i+1}/{len(image_paths)}: {os.path.basename(image_path)}")
	bubbles = self.detect_bubbles(image_path, **kwargs)
	results[image_path] = bubbles

	return results

	def unload(self, release_shared: bool = False):
	"""Release model resources held by this detector instance.
	Args:
	release_shared: If True, also clear class-level shared RT-DETR caches.
	"""
	try:
	# Release instance-level models and sessions
	try:
	if getattr(self, 'onnx_session', None) is not None:
	self.onnx_session = None
	except Exception:
	pass
	try:
	if getattr(self, 'rtdetr_onnx_session', None) is not None:
	self.rtdetr_onnx_session = None
	except Exception:
	pass
	for attr in ['model', 'rtdetr_model', 'rtdetr_processor']:
	try:
	if hasattr(self, attr):
	setattr(self, attr, None)
	except Exception:
	pass
	for flag in ['model_loaded', 'rtdetr_loaded', 'rtdetr_onnx_loaded']:
	try:
	if hasattr(self, flag):
	setattr(self, flag, False)
	except Exception:
	pass

	# Optional: release shared caches
	if release_shared:
	try:
	BubbleDetector._rtdetr_shared_model = None
	BubbleDetector._rtdetr_shared_processor = None
	BubbleDetector._rtdetr_loaded = False
	except Exception:
	pass

	# Free CUDA cache and trigger GC
	try:
	if TORCH_AVAILABLE and torch is not None and torch.cuda.is_available():
	torch.cuda.empty_cache()
	except Exception:
	pass
	try:
	import gc
	gc.collect()
	except Exception:
	pass
	except Exception:
	# Best-effort only
	pass

	def get_bubble_masks(self, image_path: str, bubbles: List[Tuple[int, int, int, int]]) -> np.ndarray:
	"""
	Create a mask image with bubble regions.

	Args:
	image_path: Path to original image
	bubbles: List of bubble bounding boxes

	Returns:
	Binary mask with bubble regions as white (255)
	"""
	image = cv2.imread(image_path)
	if image is None:
	return None

	h, w = image.shape[:2]
	mask = np.zeros((h, w), dtype=np.uint8)

	# Fill bubble regions
	for x, y, bw, bh in bubbles:
	cv2.rectangle(mask, (x, y), (x + bw, y + bh), 255, -1)

	return mask

	def filter_bubbles_by_size(self, bubbles: List[Tuple[int, int, int, int]],
	min_area: int = 100,
	max_area: int = None) -> List[Tuple[int, int, int, int]]:
	"""
	Filter bubbles by area.

	Args:
	bubbles: List of bubble bounding boxes
	min_area: Minimum area in pixels
	max_area: Maximum area in pixels (None for no limit)

	Returns:
	Filtered list of bubbles
	"""
	filtered = []

	for x, y, w, h in bubbles:
	area = w * h
	if area >= min_area and (max_area is None or area <= max_area):
	filtered.append((x, y, w, h))

	return filtered

	def merge_overlapping_bubbles(self, bubbles: List[Tuple[int, int, int, int]],
	overlap_threshold: float = 0.1) -> List[Tuple[int, int, int, int]]:
	"""
	Merge overlapping bubble detections.

	Args:
	bubbles: List of bubble bounding boxes
	overlap_threshold: Minimum overlap ratio to merge

	Returns:
	Merged list of bubbles
	"""
	if not bubbles:
	return []

	# Convert to numpy array for easier manipulation
	boxes = np.array([(x, y, x+w, y+h) for x, y, w, h in bubbles])

	merged = []
	used = set()

	for i, box1 in enumerate(boxes):
	if i in used:
	continue

	# Start with current box
	x1, y1, x2, y2 = box1

	# Check for overlaps with remaining boxes
	for j in range(i + 1, len(boxes)):
	if j in used:
	continue

	box2 = boxes[j]

	# Calculate intersection
	ix1 = max(x1, box2[0])
	iy1 = max(y1, box2[1])
	ix2 = min(x2, box2[2])
	iy2 = min(y2, box2[3])

	if ix1 < ix2 and iy1 < iy2:
	# Calculate overlap ratio
	intersection = (ix2 - ix1) * (iy2 - iy1)
	area1 = (x2 - x1) * (y2 - y1)
	area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
	overlap = intersection / min(area1, area2)

	if overlap >= overlap_threshold:
	# Merge boxes
	x1 = min(x1, box2[0])
	y1 = min(y1, box2[1])
	x2 = max(x2, box2[2])
	y2 = max(y2, box2[3])
	used.add(j)

	merged.append((int(x1), int(y1), int(x2 - x1), int(y2 - y1)))

	return merged

	# ============================
	# RT-DETR (ONNX) BACKEND
	# ============================
	def load_rtdetr_onnx_model(self, model_id: str = None, force_reload: bool = False) -> bool:
	"""
	Load RT-DETR ONNX model using onnxruntime. Downloads detector.onnx and config.json
	from the provided Hugging Face repo if not already cached.
	"""
	if not ONNX_AVAILABLE:
	logger.error("ONNX Runtime not available for RT-DETR ONNX backend")
	return False
	try:
	# If singleton mode and already loaded, just attach shared session
	try:
	adv = (self.config or {}).get('manga_settings', {}).get('advanced', {}) if isinstance(self.config, dict) else {}
	singleton = bool(adv.get('use_singleton_models', True))
	except Exception:
	singleton = True
	if singleton and BubbleDetector._rtdetr_onnx_loaded and not force_reload and BubbleDetector._rtdetr_onnx_shared_session is not None:
	self.rtdetr_onnx_session = BubbleDetector._rtdetr_onnx_shared_session
	self.rtdetr_onnx_loaded = True
	return True

	repo = model_id or self.rtdetr_onnx_repo
	try:
	from huggingface_hub import hf_hub_download
	except Exception as e:
	logger.error(f"huggingface-hub required to fetch RT-DETR ONNX: {e}")
	return False

	# Ensure local models dir (use configured cache_dir directly: e.g., 'models')
	cache_dir = self.cache_dir
	os.makedirs(cache_dir, exist_ok=True)

	# Download files into models/ and avoid symlinks so the file is visible there
	try:
	_ = hf_hub_download(repo_id=repo, filename='config.json', cache_dir=cache_dir, local_dir=cache_dir, local_dir_use_symlinks=False)
	except Exception:
	pass
	onnx_fp = hf_hub_download(repo_id=repo, filename='detector.onnx', cache_dir=cache_dir, local_dir=cache_dir, local_dir_use_symlinks=False)
	BubbleDetector._rtdetr_onnx_model_path = onnx_fp

	# Pick providers: prefer CUDA if available; otherwise CPU. Do NOT use DML.
	providers = ['CPUExecutionProvider']
	try:
	avail = ort.get_available_providers() if ONNX_AVAILABLE else []
	if 'CUDAExecutionProvider' in avail:
	providers = ['CUDAExecutionProvider', 'CPUExecutionProvider']
	except Exception:
	pass

	# Session options with reduced memory arena and optional thread limiting in singleton mode
	so = ort.SessionOptions()
	try:
	so.enable_mem_pattern = False
	so.enable_cpu_mem_arena = False
	except Exception:
	pass
	# If singleton models mode is enabled in config, limit ORT threading to reduce CPU spikes
	try:
	adv = (self.config or {}).get('manga_settings', {}).get('advanced', {}) if isinstance(self.config, dict) else {}
	if bool(adv.get('use_singleton_models', True)):
	so.intra_op_num_threads = 1
	so.inter_op_num_threads = 1
	try:
	so.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL
	except Exception:
	pass
	try:
	so.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_BASIC
	except Exception:
	pass
	except Exception:
	pass

	# Create session (serialize creation in singleton mode to avoid device storms)
	if singleton:
	with BubbleDetector._rtdetr_onnx_init_lock:
	# Re-check after acquiring lock
	if BubbleDetector._rtdetr_onnx_loaded and BubbleDetector._rtdetr_onnx_shared_session is not None and not force_reload:
	self.rtdetr_onnx_session = BubbleDetector._rtdetr_onnx_shared_session
	self.rtdetr_onnx_loaded = True
	return True
	sess = ort.InferenceSession(onnx_fp, providers=providers, sess_options=so)
	BubbleDetector._rtdetr_onnx_shared_session = sess
	BubbleDetector._rtdetr_onnx_loaded = True
	BubbleDetector._rtdetr_onnx_providers = providers
	self.rtdetr_onnx_session = sess
	self.rtdetr_onnx_loaded = True
	else:
	self.rtdetr_onnx_session = ort.InferenceSession(onnx_fp, providers=providers, sess_options=so)
	self.rtdetr_onnx_loaded = True
	logger.info("✅ RT-DETR (ONNX) model ready")
	return True
	except Exception as e:
	logger.error(f"Failed to load RT-DETR ONNX: {e}")
	self.rtdetr_onnx_session = None
	self.rtdetr_onnx_loaded = False
	return False

	def detect_with_rtdetr_onnx(self,
	image_path: str = None,
	image: np.ndarray = None,
	confidence: float = 0.3,
	return_all_bubbles: bool = False) -> Any:
	"""Detect using RT-DETR ONNX backend.
	Returns bubbles list if return_all_bubbles else dict by classes similar to PyTorch path.
	"""
	if not self.rtdetr_onnx_loaded or self.rtdetr_onnx_session is None:
	logger.warning("RT-DETR ONNX not loaded")
	return [] if return_all_bubbles else {'bubbles': [], 'text_bubbles': [], 'text_free': []}
	try:
	# Acquire image
	if image_path is not None:
	import cv2
	image = cv2.imread(image_path)
	if image is None:
	raise RuntimeError(f"Failed to read image: {image_path}")
	image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
	else:
	if image is None:
	raise RuntimeError("No image provided")
	# Assume image is BGR np.ndarray if from OpenCV
	try:
	import cv2
	image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
	except Exception:
	image_rgb = image

	# To PIL then resize 640x640 as in reference
	from PIL import Image as _PILImage
	pil_image = _PILImage.fromarray(image_rgb)
	im_resized = pil_image.resize((640, 640))
	arr = np.asarray(im_resized, dtype=np.float32) / 255.0
	arr = np.transpose(arr, (2, 0, 1)) # (3,H,W)
	im_data = arr[np.newaxis, ...]

	w, h = pil_image.size
	orig_size = np.array([[w, h]], dtype=np.int64)

	# Run with a concurrency guard to prevent device hangs and limit memory usage
	# Apply semaphore for ALL providers (not just DML) to control concurrency
	providers = BubbleDetector._rtdetr_onnx_providers or []
	def _do_run(session):
	return session.run(None, {
	'images': im_data,
	'orig_target_sizes': orig_size
	})

	# Always use semaphore to limit concurrent RT-DETR calls
	acquired = False
	try:
	BubbleDetector._rtdetr_onnx_sema.acquire()
	acquired = True

	# Special DML error handling
	if 'DmlExecutionProvider' in providers:
	try:
	outputs = _do_run(self.rtdetr_onnx_session)
	except Exception as dml_err:
	msg = str(dml_err)
	if '887A0005' in msg or '887A0006' in msg or 'Dml' in msg:
	# Rebuild CPU session and retry once
	try:
	base_path = BubbleDetector._rtdetr_onnx_model_path
	if base_path:
	so = ort.SessionOptions()
	so.enable_mem_pattern = False
	so.enable_cpu_mem_arena = False
	cpu_providers = ['CPUExecutionProvider']
	# Serialize rebuild
	with BubbleDetector._rtdetr_onnx_init_lock:
	sess = ort.InferenceSession(base_path, providers=cpu_providers, sess_options=so)
	BubbleDetector._rtdetr_onnx_shared_session = sess
	BubbleDetector._rtdetr_onnx_providers = cpu_providers
	self.rtdetr_onnx_session = sess
	outputs = _do_run(self.rtdetr_onnx_session)
	else:
	raise
	except Exception:
	raise
	else:
	raise
	else:
	# Non-DML providers - just run directly
	outputs = _do_run(self.rtdetr_onnx_session)
	finally:
	if acquired:
	try:
	BubbleDetector._rtdetr_onnx_sema.release()
	except Exception:
	pass

	# outputs expected: labels, boxes, scores
	labels, boxes, scores = outputs[:3]
	if labels.ndim == 2 and labels.shape[0] == 1:
	labels = labels[0]
	if scores.ndim == 2 and scores.shape[0] == 1:
	scores = scores[0]
	if boxes.ndim == 3 and boxes.shape[0] == 1:
	boxes = boxes[0]

	# Apply NMS to remove duplicate detections
	# Group detections by class and apply NMS per class
	class_detections = {self.CLASS_BUBBLE: [], self.CLASS_TEXT_BUBBLE: [], self.CLASS_TEXT_FREE: []}

	for lab, box, scr in zip(labels, boxes, scores):
	if float(scr) < float(confidence):
	continue
	label_id = int(lab)
	if label_id in class_detections:
	x1, y1, x2, y2 = map(float, box)
	class_detections[label_id].append((x1, y1, x2, y2, float(scr)))

	# Apply NMS per class to remove duplicates
	def compute_iou(box1, box2):
	"""Compute IoU between two boxes (x1, y1, x2, y2)"""
	x1_1, y1_1, x2_1, y2_1 = box1[:4]
	x1_2, y1_2, x2_2, y2_2 = box2[:4]

	# Intersection
	x_left = max(x1_1, x1_2)
	y_top = max(y1_1, y1_2)
	x_right = min(x2_1, x2_2)
	y_bottom = min(y2_1, y2_2)

	if x_right < x_left or y_bottom < y_top:
	return 0.0

	intersection = (x_right - x_left) * (y_bottom - y_top)

	# Union
	area1 = (x2_1 - x1_1) * (y2_1 - y1_1)
	area2 = (x2_2 - x1_2) * (y2_2 - y1_2)
	union = area1 + area2 - intersection

	return intersection / union if union > 0 else 0.0

	def apply_nms(boxes_with_scores, iou_threshold=0.45):
	"""Apply Non-Maximum Suppression"""
	if not boxes_with_scores:
	return []

	# Sort by score (descending)
	sorted_boxes = sorted(boxes_with_scores, key=lambda x: x[4], reverse=True)
	keep = []

	while sorted_boxes:
	# Keep the box with highest score
	current = sorted_boxes.pop(0)
	keep.append(current)

	# Remove boxes with high IoU
	sorted_boxes = [box for box in sorted_boxes if compute_iou(current, box) < iou_threshold]

	return keep

	# Apply NMS and build final detections
	detections = {'bubbles': [], 'text_bubbles': [], 'text_free': []}
	bubbles_all = []

	for class_id, boxes_list in class_detections.items():
	nms_boxes = apply_nms(boxes_list, iou_threshold=self.default_iou_threshold)

	for x1, y1, x2, y2, scr in nms_boxes:
	bbox = (int(x1), int(y1), int(x2 - x1), int(y2 - y1))

	if class_id == self.CLASS_BUBBLE:
	detections['bubbles'].append(bbox)
	bubbles_all.append(bbox)
	elif class_id == self.CLASS_TEXT_BUBBLE:
	detections['text_bubbles'].append(bbox)
	bubbles_all.append(bbox)
	elif class_id == self.CLASS_TEXT_FREE:
	detections['text_free'].append(bbox)

	return bubbles_all if return_all_bubbles else detections
	except Exception as e:
	logger.error(f"RT-DETR ONNX detection failed: {e}")
	return [] if return_all_bubbles else {'bubbles': [], 'text_bubbles': [], 'text_free': []}


	# Standalone utility functions
	def download_model_from_huggingface(repo_id: str = "ogkalu/comic-speech-bubble-detector-yolov8m",
	filename: str = "comic-speech-bubble-detector-yolov8m.pt",
	cache_dir: str = "models") -> str:
	"""
	Download model from Hugging Face Hub.

	Args:
	repo_id: Hugging Face repository ID
	filename: Model filename in the repository
	cache_dir: Local directory to cache the model

	Returns:
	Path to downloaded model file
	"""
	try:
	from huggingface_hub import hf_hub_download

	os.makedirs(cache_dir, exist_ok=True)

	logger.info(f"📥 Downloading {filename} from {repo_id}...")

	model_path = hf_hub_download(
	repo_id=repo_id,
	filename=filename,
	cache_dir=cache_dir,
	local_dir=cache_dir
	)

	logger.info(f"✅ Model downloaded to: {model_path}")
	return model_path

	except ImportError:
	logger.error("huggingface-hub package required. Install with: pip install huggingface-hub")
	return None
	except Exception as e:
	logger.error(f"Download failed: {e}")
	return None


	def download_rtdetr_model(cache_dir: str = "models") -> bool:
	"""
	Download RT-DETR model for advanced detection.

	Args:
	cache_dir: Directory to cache the model

	Returns:
	True if successful
	"""
	if not TRANSFORMERS_AVAILABLE:
	logger.error("Transformers required. Install with: pip install transformers")
	return False

	try:
	logger.info("📥 Downloading RT-DETR model...")
	from transformers import RTDetrForObjectDetection, RTDetrImageProcessor

	# This will download and cache the model
	processor = RTDetrImageProcessor.from_pretrained(
	"ogkalu/comic-text-and-bubble-detector",
	cache_dir=cache_dir
	)
	model = RTDetrForObjectDetection.from_pretrained(
	"ogkalu/comic-text-and-bubble-detector",
	cache_dir=cache_dir
	)

	logger.info("✅ RT-DETR model downloaded successfully")
	return True

	except Exception as e:
	logger.error(f"Download failed: {e}")
	return False


	# Example usage and testing
	if __name__ == "__main__":
	import sys

	# Create detector
	detector = BubbleDetector()

	if len(sys.argv) > 1:
	if sys.argv[1] == "download":
	# Download model from Hugging Face
	model_path = download_model_from_huggingface()
	if model_path:
	print(f"YOLOv8 model downloaded to: {model_path}")

	# Also download RT-DETR
	if download_rtdetr_model():
	print("RT-DETR model downloaded")

	elif sys.argv[1] == "detect" and len(sys.argv) > 3:
	# Detect bubbles in an image
	model_path = sys.argv[2]
	image_path = sys.argv[3]

	# Load appropriate model
	if 'rtdetr' in model_path.lower():
	if detector.load_rtdetr_model():
	# Use RT-DETR
	results = detector.detect_with_rtdetr(image_path)
	print(f"RT-DETR Detection:")
	print(f" Empty bubbles: {len(results['bubbles'])}")
	print(f" Text bubbles: {len(results['text_bubbles'])}")
	print(f" Free text: {len(results['text_free'])}")
	else:
	if detector.load_model(model_path):
	bubbles = detector.detect_bubbles(image_path, confidence=0.5)
	print(f"YOLOv8 detected {len(bubbles)} bubbles:")
	for i, (x, y, w, h) in enumerate(bubbles):
	print(f" Bubble {i+1}: position=({x},{y}) size=({w}x{h})")

	# Optionally visualize
	if len(sys.argv) > 4:
	output_path = sys.argv[4]
	detector.visualize_detections(image_path, output_path=output_path,
	use_rtdetr='rtdetr' in model_path.lower())

	elif sys.argv[1] == "test-both" and len(sys.argv) > 2:
	# Test both models
	image_path = sys.argv[2]

	# Load YOLOv8
	yolo_path = "models/comic-speech-bubble-detector-yolov8m.pt"
	if os.path.exists(yolo_path):
	detector.load_model(yolo_path)
	yolo_bubbles = detector.detect_bubbles(image_path, use_rtdetr=False)
	print(f"YOLOv8: {len(yolo_bubbles)} bubbles")

	# Load RT-DETR
	if detector.load_rtdetr_model():
	rtdetr_bubbles = detector.detect_bubbles(image_path, use_rtdetr=True)
	print(f"RT-DETR: {len(rtdetr_bubbles)} bubbles")

	else:
	print("Usage:")
	print(" python bubble_detector.py download")
	print(" python bubble_detector.py detect <model_path> <image_path> [output_path]")
	print(" python bubble_detector.py test-both <image_path>")

	else:
	print("Bubble Detector Module (YOLOv8 + RT-DETR)")
	print("Usage:")
	print(" python bubble_detector.py download")
	print(" python bubble_detector.py detect <model_path> <image_path> [output_path]")
	print(" python bubble_detector.py test-both <image_path>")