Spaces:
Sleeping
Sleeping
File size: 6,981 Bytes
838e8f6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 |
import cv2
import numpy as np
import easyocr
from typing import List, Tuple, Optional
class TextResizer:
def __init__(self, languages=['en', 'ch_sim'], gpu=False):
"""
初始化文字缩放器
Args:
languages: OCR支持的语言列表
gpu: 是否使用GPU
"""
self.reader = easyocr.Reader(languages, gpu=gpu)
def read_text(self, image: np.ndarray) -> List[Tuple]:
"""
从图像中识别文字
Args:
image: RGB格式的图像数组
Returns:
OCR结果列表,每个元素为(bbox, text, confidence)
"""
return self.reader.readtext(image)
def extract_text_mask_by_content(self, image: np.ndarray, results: List[Tuple], target_text: str) -> np.ndarray:
"""
根据目标文字内容提取文字mask
Args:
image: RGB格式的图像数组
results: OCR识别结果
target_text: 目标文字内容
Returns:
文字mask,白色为文字区域
"""
h, w = image.shape[:2]
mask = np.zeros((h, w), dtype=np.uint8)
for (bbox, text, _) in results:
if text.strip() != target_text:
continue
x_min = int(min([pt[0] for pt in bbox]))
x_max = int(max([pt[0] for pt in bbox]))
y_min = int(min([pt[1] for pt in bbox]))
y_max = int(max([pt[1] for pt in bbox]))
roi = image[y_min:y_max, x_min:x_max]
gray = cv2.cvtColor(roi, cv2.COLOR_RGB2GRAY)
thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY_INV, 11, 2)
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
mask_roi = np.zeros_like(thresh)
cv2.drawContours(mask_roi, contours, -1, 255, -1)
mask[y_min:y_max, x_min:x_max] = np.maximum(mask[y_min:y_max, x_min:x_max], mask_roi)
return mask
def inpaint_image(self, image: np.ndarray, mask: np.ndarray) -> np.ndarray:
"""
使用mask对图像进行修复
Args:
image: RGB格式的图像数组
mask: 需要修复的区域mask
Returns:
修复后的图像
"""
return cv2.inpaint(image, mask, 3, cv2.INPAINT_TELEA)
def find_target_bbox(self, results: List[Tuple], target_text: str) -> Optional[List]:
"""
查找目标文字的边界框
Args:
results: OCR识别结果
target_text: 目标文字内容
Returns:
目标文字的边界框,如果未找到则返回None
"""
for (bbox, text, _) in results:
if text.strip() == target_text:
return bbox
return None
def create_resized_text_patch(self, image: np.ndarray, bbox: List, scale_factor: float) -> Tuple[np.ndarray, int, int]:
"""
创建缩放后的文字补丁
Args:
image: RGB格式的图像数组
bbox: 文字边界框
scale_factor: 缩放因子
Returns:
(RGBA格式的缩放后文字补丁, 原始中心x坐标, 原始中心y坐标)
"""
# 提取ROI
x_min = int(min(pt[0] for pt in bbox))
x_max = int(max(pt[0] for pt in bbox))
y_min = int(min(pt[1] for pt in bbox))
y_max = int(max(pt[1] for pt in bbox))
roi = image[y_min:y_max, x_min:x_max]
# 创建文字mask
gray = cv2.cvtColor(roi, cv2.COLOR_RGB2GRAY)
thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY_INV, 11, 2)
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
mask_roi = np.zeros_like(thresh)
cv2.drawContours(mask_roi, contours, -1, 255, -1)
# 创建RGBA补丁
rgba_patch = cv2.cvtColor(roi, cv2.COLOR_RGB2RGBA)
rgba_patch[:, :, 3] = mask_roi
# 缩放
h, w = rgba_patch.shape[:2]
new_size = (int(w * scale_factor), int(h * scale_factor))
resized_patch = cv2.resize(rgba_patch, new_size, interpolation=cv2.INTER_LINEAR)
# 计算原始中心点
cx = (x_min + x_max) // 2
cy = (y_min + y_max) // 2
return resized_patch, cx, cy
def blend_text_patch(self, canvas: np.ndarray, patch: np.ndarray, center_x: int, center_y: int) -> np.ndarray:
"""
将文字补丁混合到画布上
Args:
canvas: 目标画布(RGB格式)
patch: RGBA格式的文字补丁
center_x: 放置的中心x坐标
center_y: 放置的中心y坐标
Returns:
混合后的图像
"""
result = canvas.copy()
new_h, new_w = patch.shape[:2]
top_left_x = max(0, center_x - new_w // 2)
top_left_y = max(0, center_y - new_h // 2)
for y in range(new_h):
for x in range(new_w):
if patch[y, x, 3] > 0: # 如果alpha > 0
yy = top_left_y + y
xx = top_left_x + x
if 0 <= yy < result.shape[0] and 0 <= xx < result.shape[1]:
alpha = patch[y, x, 3] / 255.0
result[yy, xx] = (
(1 - alpha) * result[yy, xx] + alpha * patch[y, x, :3]
).astype(np.uint8)
return result
def resize_text(self, image: np.ndarray, target_text: str, scale_factor: float) -> np.ndarray:
"""
完整的文字缩放流程
Args:
image: RGB格式的图像数组
target_text: 目标文字内容
scale_factor: 缩放因子
Returns:
处理后的图像
"""
# 1. OCR识别
results = self.read_text(image)
# 2. 查找目标文字
target_bbox = self.find_target_bbox(results, target_text)
if target_bbox is None:
raise ValueError(f"未找到目标文字: {target_text}")
# 3. 提取文字mask
text_mask = self.extract_text_mask_by_content(image, results, target_text)
# 4. 图像修复
inpainted = self.inpaint_image(image, text_mask)
# 5. 创建缩放后的文字补丁
resized_patch, cx, cy = self.create_resized_text_patch(image, target_bbox, scale_factor)
# 6. 混合文字补丁
result = self.blend_text_patch(inpainted, resized_patch, cx, cy)
return result |