import cv2 import numpy as np import easyocr from typing import List, Tuple, Optional class TextResizer: def __init__(self, languages=['en', 'ch_sim'], gpu=False): """ 初始化文字缩放器 Args: languages: OCR支持的语言列表 gpu: 是否使用GPU """ self.reader = easyocr.Reader(languages, gpu=gpu) def read_text(self, image: np.ndarray) -> List[Tuple]: """ 从图像中识别文字 Args: image: RGB格式的图像数组 Returns: OCR结果列表,每个元素为(bbox, text, confidence) """ return self.reader.readtext(image) def extract_text_mask_by_content(self, image: np.ndarray, results: List[Tuple], target_text: str) -> np.ndarray: """ 根据目标文字内容提取文字mask Args: image: RGB格式的图像数组 results: OCR识别结果 target_text: 目标文字内容 Returns: 文字mask,白色为文字区域 """ h, w = image.shape[:2] mask = np.zeros((h, w), dtype=np.uint8) for (bbox, text, _) in results: if text.strip() != target_text: continue x_min = int(min([pt[0] for pt in bbox])) x_max = int(max([pt[0] for pt in bbox])) y_min = int(min([pt[1] for pt in bbox])) y_max = int(max([pt[1] for pt in bbox])) roi = image[y_min:y_max, x_min:x_max] gray = cv2.cvtColor(roi, cv2.COLOR_RGB2GRAY) thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2) contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) mask_roi = np.zeros_like(thresh) cv2.drawContours(mask_roi, contours, -1, 255, -1) mask[y_min:y_max, x_min:x_max] = np.maximum(mask[y_min:y_max, x_min:x_max], mask_roi) return mask def inpaint_image(self, image: np.ndarray, mask: np.ndarray) -> np.ndarray: """ 使用mask对图像进行修复 Args: image: RGB格式的图像数组 mask: 需要修复的区域mask Returns: 修复后的图像 """ return cv2.inpaint(image, mask, 3, cv2.INPAINT_TELEA) def find_target_bbox(self, results: List[Tuple], target_text: str) -> Optional[List]: """ 查找目标文字的边界框 Args: results: OCR识别结果 target_text: 目标文字内容 Returns: 目标文字的边界框,如果未找到则返回None """ for (bbox, text, _) in results: if text.strip() == target_text: return bbox return None def create_resized_text_patch(self, image: np.ndarray, bbox: List, scale_factor: float) -> Tuple[np.ndarray, int, int]: """ 创建缩放后的文字补丁 Args: image: RGB格式的图像数组 bbox: 文字边界框 scale_factor: 缩放因子 Returns: (RGBA格式的缩放后文字补丁, 原始中心x坐标, 原始中心y坐标) """ # 提取ROI x_min = int(min(pt[0] for pt in bbox)) x_max = int(max(pt[0] for pt in bbox)) y_min = int(min(pt[1] for pt in bbox)) y_max = int(max(pt[1] for pt in bbox)) roi = image[y_min:y_max, x_min:x_max] # 创建文字mask gray = cv2.cvtColor(roi, cv2.COLOR_RGB2GRAY) thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2) contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) mask_roi = np.zeros_like(thresh) cv2.drawContours(mask_roi, contours, -1, 255, -1) # 创建RGBA补丁 rgba_patch = cv2.cvtColor(roi, cv2.COLOR_RGB2RGBA) rgba_patch[:, :, 3] = mask_roi # 缩放 h, w = rgba_patch.shape[:2] new_size = (int(w * scale_factor), int(h * scale_factor)) resized_patch = cv2.resize(rgba_patch, new_size, interpolation=cv2.INTER_LINEAR) # 计算原始中心点 cx = (x_min + x_max) // 2 cy = (y_min + y_max) // 2 return resized_patch, cx, cy def blend_text_patch(self, canvas: np.ndarray, patch: np.ndarray, center_x: int, center_y: int) -> np.ndarray: """ 将文字补丁混合到画布上 Args: canvas: 目标画布(RGB格式) patch: RGBA格式的文字补丁 center_x: 放置的中心x坐标 center_y: 放置的中心y坐标 Returns: 混合后的图像 """ result = canvas.copy() new_h, new_w = patch.shape[:2] top_left_x = max(0, center_x - new_w // 2) top_left_y = max(0, center_y - new_h // 2) for y in range(new_h): for x in range(new_w): if patch[y, x, 3] > 0: # 如果alpha > 0 yy = top_left_y + y xx = top_left_x + x if 0 <= yy < result.shape[0] and 0 <= xx < result.shape[1]: alpha = patch[y, x, 3] / 255.0 result[yy, xx] = ( (1 - alpha) * result[yy, xx] + alpha * patch[y, x, :3] ).astype(np.uint8) return result def resize_text(self, image: np.ndarray, target_text: str, scale_factor: float) -> np.ndarray: """ 完整的文字缩放流程 Args: image: RGB格式的图像数组 target_text: 目标文字内容 scale_factor: 缩放因子 Returns: 处理后的图像 """ # 1. OCR识别 results = self.read_text(image) # 2. 查找目标文字 target_bbox = self.find_target_bbox(results, target_text) if target_bbox is None: raise ValueError(f"未找到目标文字: {target_text}") # 3. 提取文字mask text_mask = self.extract_text_mask_by_content(image, results, target_text) # 4. 图像修复 inpainted = self.inpaint_image(image, text_mask) # 5. 创建缩放后的文字补丁 resized_patch, cx, cy = self.create_resized_text_patch(image, target_bbox, scale_factor) # 6. 混合文字补丁 result = self.blend_text_patch(inpainted, resized_patch, cx, cy) return result