Spaces:

yingzhac
/

text_editing

Sleeping

App Files Files Community

text_editing / app.py

yingzhac

去除多余的黑框样式 - 改为普通文字显示

845f960 4 months ago

raw

history blame contribute delete

15 kB

	import gradio as gr
	import numpy as np
	import os
	from PIL import Image
	import cv2
	import spaces

	# Import our custom modules
	from core import TextResizer
	from prompt_handler import PromptHandler
	from utils import (
	load_image,
	save_image,
	validate_scale_factor,
	parse_percentage_to_scale_factor,
	create_output_filename
	)

	# Initialize the text resizer with GPU support (English only)
	text_resizer = TextResizer(languages=['en'], gpu=True)

	def find_target_text_in_prompt(user_prompt, ocr_results):
	"""
	从用户prompt中智能查找目标文字

	Args:
	user_prompt: 用户输入的指令
	ocr_results: OCR识别结果列表

	Returns:
	找到的目标文字，如果没找到则返回None
	"""
	import re

	# 提取所有OCR识别的文字
	ocr_texts = [text.strip() for _, text, _ in ocr_results]

	# 1. 首先查找被引号包围的文字 (单引号或双引号)
	quoted_matches = re.findall(r'["\']([^"\']+)["\']', user_prompt)
	for quoted_text in quoted_matches:
	# 在OCR结果中查找完全匹配或部分匹配
	for ocr_text in ocr_texts:
	if quoted_text.lower() == ocr_text.lower():
	return ocr_text
	if quoted_text.lower() in ocr_text.lower() or ocr_text.lower() in quoted_text.lower():
	return ocr_text

	# 2. 如果没有引号，尝试查找prompt中包含的OCR文字
	user_prompt_lower = user_prompt.lower()
	for ocr_text in ocr_texts:
	if ocr_text.lower() in user_prompt_lower:
	return ocr_text

	# 3. 尝试查找部分匹配的单词
	prompt_words = re.findall(r'\b\w+\b', user_prompt_lower)
	for word in prompt_words:
	if len(word) > 2: # 忽略太短的单词
	for ocr_text in ocr_texts:
	if word in ocr_text.lower():
	return ocr_text

	return None

	@spaces.GPU
	def process_image(input_image, user_prompt, use_ai_parsing=True, api_key=None):
	"""
	Process image with text resizing based on user prompt
	"""
	try:
	if input_image is None:
	return None, "❌ 错误: 请上传一张图片"

	# Convert PIL to RGB numpy array
	image_rgb = np.array(input_image.convert('RGB'))

	# Perform OCR
	ocr_results = text_resizer.read_text(image_rgb)

	if not ocr_results:
	return None, "❌ 错误: 未在图像中识别到任何文字"

	# Parse user prompt
	try:
	if use_ai_parsing and api_key:
	# Use OpenAI API parsing
	prompt_handler = PromptHandler(api_key=api_key)
	parsed_result = prompt_handler.parse_user_request(ocr_results, user_prompt)

	if not prompt_handler.validate_parsed_result(parsed_result, ocr_results):
	raise Exception("AI解析结果验证失败")

	target_text = parsed_result["target_text"]
	scale_factor = validate_scale_factor(parsed_result["scale_factor"])
	status_msg = f"✅ AI解析成功: 目标文字='{target_text}', 缩放因子={scale_factor}"

	else:
	# Use fallback parsing
	scale_factor = parse_percentage_to_scale_factor(user_prompt)
	if scale_factor == 1.0:
	return None, "❌ 错误: 无法从用户指令中解析出缩放信息"

	# Try to find target text from user prompt
	target_text = find_target_text_in_prompt(user_prompt, ocr_results)
	if not target_text:
	# If no specific text found in prompt, ask user to specify
	available_texts = [text.strip() for _, text, _ in ocr_results]
	return None, f"❌ 错误: 无法确定要调整的文字。请在指令中明确指定文字，如 'enlarge \"具体文字\" by 50%'\n\n📝 可用的文字: {available_texts}"

	status_msg = f"✅ 备用解析: 目标文字='{target_text}', 缩放因子={scale_factor}"

	except Exception as e:
	return None, f"❌ 错误: 指令解析失败: {str(e)}"

	# Process the image
	try:
	result_image = text_resizer.resize_text(image_rgb, target_text, scale_factor)

	# Convert back to PIL Image
	result_pil = Image.fromarray(result_image)

	return result_pil, status_msg

	except ValueError as e:
	# Show available texts
	available_texts = [text.strip() for _, text, _ in ocr_results]
	error_msg = f"❌ 错误: {str(e)}\n\n📝 可用的文字: {available_texts}"
	return None, error_msg

	except Exception as e:
	return None, f"❌ 处理过程中出现错误: {str(e)}"

	@spaces.GPU
	def get_ocr_info(input_image):
	"""
	Get OCR information from the image
	"""
	if input_image is None:
	return "请先上传图片"

	try:
	# Convert PIL to RGB numpy array
	image_rgb = np.array(input_image.convert('RGB'))

	# Perform OCR
	ocr_results = text_resizer.read_text(image_rgb)

	if not ocr_results:
	return "未识别到任何文字"

	# Format results
	info = f"📝 识别到 {len(ocr_results)} 个文字区域:\n"
	info += "=" * 50 + "\n"
	for i, (bbox, text, conf) in enumerate(ocr_results):
	info += f"{i+1:2d}. '{text}' (置信度: {conf:.2f})\n"
	info += "=" * 50

	return info

	except Exception as e:
	return f"❌ OCR识别失败: {str(e)}"

	# Define CSS for styling
	css = """
	/* Global text color fixes - high priority */
	body, .gradio-container, .gradio-container * {
	color: #333 !important;
	}

	/* Force all text elements to have good contrast */
	p, div, span, label, input, textarea, button, h1, h2, h3, h4, h5, h6 {
	color: #333 !important;
	}

	#col-container {
	margin: 0 auto;
	max-width: 1000px;
	color: #333 !important;
	}

	#input-section {
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
	padding: 20px;
	border-radius: 15px;
	margin-bottom: 20px;
	}

	#output-section {
	background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%);
	padding: 20px;
	border-radius: 15px;
	}

	.gradio-container {
	background: linear-gradient(135deg, #ffecd2 0%, #fcb69f 100%);
	}

	#title {
	text-align: center;
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
	-webkit-background-clip: text;
	-webkit-text-fill-color: transparent;
	font-size: 2.5em;
	font-weight: bold;
	margin-bottom: 20px;
	}

	#description {
	text-align: center;
	color: #222 !important;
	font-size: 1.1em;
	margin-bottom: 30px;
	line-height: 1.6;
	}

	.process-button {
	background: linear-gradient(135deg, #4CAF50 0%, #45a049 100%);
	color: white !important;
	border: none;
	padding: 15px 30px;
	font-size: 16px;
	border-radius: 10px;
	cursor: pointer;
	transition: all 0.3s ease;
	}

	.process-button:hover {
	transform: translateY(-2px);
	box-shadow: 0 5px 15px rgba(0,0,0,0.2);
	}

	/* White text for dark gradient sections */
	#input-section, #input-section * {
	color: white !important;
	}

	#output-section, #output-section * {
	color: white !important;
	}

	/* Override Gradio's default text colors */
	.gradio-container .gr-markdown p,
	.gradio-container .gr-markdown div,
	.gradio-container .gr-markdown span,
	.gradio-container .gr-markdown li {
	color: #333 !important;
	}

	#input-section .gr-markdown p,
	#input-section .gr-markdown div,
	#input-section .gr-markdown span,
	#input-section .gr-markdown li {
	color: white !important;
	}

	#output-section .gr-markdown p,
	#output-section .gr-markdown div,
	#output-section .gr-markdown span,
	#output-section .gr-markdown li {
	color: white !important;
	}

	/* Force all labels and form elements to have proper contrast */
	label, .gr-form label, .gr-textbox label, .gr-button, .gr-checkbox label {
	color: #333 !important;
	font-weight: 500;
	}

	#input-section label,
	#input-section .gr-form label,
	#input-section .gr-textbox label,
	#input-section .gr-button,
	#input-section .gr-checkbox label {
	color: white !important;
	}

	#output-section label,
	#output-section .gr-form label,
	#output-section .gr-textbox label,
	#output-section .gr-button,
	#output-section .gr-checkbox label {
	color: white !important;
	}

	/* Additional fallback for any missed text elements */
	.gradio-container [class*="text"],
	.gradio-container [class*="label"],
	.gradio-container [class*="markdown"] {
	color: #333 !important;
	}

	#input-section [class*="text"],
	#input-section [class*="label"],
	#input-section [class*="markdown"] {
	color: white !important;
	}

	#output-section [class*="text"],
	#output-section [class*="label"],
	#output-section [class*="markdown"] {
	color: white !important;
	}
	"""

	# Create the Gradio interface
	with gr.Blocks(css=css, title="智能文字缩放工具") as demo:
	with gr.Column(elem_id="col-container"):
	gr.Markdown("# 🎨 智能文字缩放工具", elem_id="title")
	gr.Markdown(
	"""
	🚀 使用AI技术智能调整图片中的文字大小

	📝 支持自然语言指令，如：
	- enlarge 'Hello' by 50% - 将'Hello'放大50%
	- make the title bigger - 让标题变大
	- shrink the footer text - 缩小页脚文字

	🎯 使用方法：
	1. 上传包含文字的图片
	2. 输入文字调整指令
	3. 点击处理按钮
	4. 查看处理结果
	""",
	elem_id="description"
	)

	with gr.Row():
	with gr.Column(scale=1):
	with gr.Group(elem_id="input-section"):
	gr.Markdown("### 📤 输入设置")

	# Image input
	input_image = gr.Image(
	label="上传图片",
	type="pil",
	height=300,
	sources=["upload", "clipboard", "webcam"]
	)

	# Prompt input
	user_prompt = gr.Textbox(
	label="文字调整指令",
	placeholder="例如: enlarge 'Hello' by 50%",
	lines=2,
	info="支持自然语言描述，如 make XX bigger 或 enlarge XX by 50%"
	)

	# OCR info button
	ocr_button = gr.Button(
	"🔍 查看图片中的文字",
	variant="secondary",
	size="sm"
	)

	# Advanced settings
	with gr.Accordion("⚙️ 高级设置", open=False):
	use_ai_parsing = gr.Checkbox(
	label="🤖 使用AI解析 (推荐，需要OpenAI API密钥)",
	value=True,
	info="使用GPT-4.1-nano模型智能理解自然语言指令"
	)

	api_key = gr.Textbox(
	label="🔑 OpenAI API密钥 (可选)",
	placeholder="sk-...",
	type="password",
	info="仅在使用AI解析时需要"
	)

	# Process button
	process_button = gr.Button(
	"🎯 开始处理",
	variant="primary",
	size="lg",
	elem_classes="process-button"
	)

	with gr.Column(scale=1):
	with gr.Group(elem_id="output-section"):
	gr.Markdown("### 📤 处理结果")

	# Output image
	output_image = gr.Image(
	label="处理后的图片",
	height=300,
	show_download_button=True
	)

	# Status message
	status_message = gr.Textbox(
	label="💬 状态信息",
	lines=4,
	max_lines=8,
	interactive=False
	)

	# OCR info display
	ocr_info = gr.Textbox(
	label="📝 OCR识别结果",
	lines=6,
	max_lines=10,
	interactive=False
	)

	# Examples section
	gr.Markdown("### 📚 示例用法")
	gr.Markdown(
	"""
	示例指令格式：

	🔍 指定文字 + 具体比例：
	- enlarge 'Hello' by 50% - 将'Hello'放大50%
	- shrink 'Title' by 30% - 将'Title'缩小30%

	🎯 自然语言描述：
	- make the title bigger - 让标题变大
	- make the text smaller - 让文字变小
	- enlarge the heading - 放大标题

	💡 使用提示：
	1. 上传包含文字的图片
	2. 先点击"查看图片中的文字"了解可用文字
	3. 输入调整指令
	4. 点击"开始处理"
	"""
	)

	# Event handlers
	process_button.click(
	fn=process_image,
	inputs=[input_image, user_prompt, use_ai_parsing, api_key],
	outputs=[output_image, status_message]
	)

	ocr_button.click(
	fn=get_ocr_info,
	inputs=[input_image],
	outputs=[ocr_info]
	)

	# Auto-run OCR when image is uploaded
	input_image.change(
	fn=get_ocr_info,
	inputs=[input_image],
	outputs=[ocr_info]
	)

	# Footer
	gr.Markdown(
	"""
	---

	🎨 智能文字缩放工具 \| 基于OCR和AI技术的智能图像文字处理

	📧 如有问题或建议，请联系开发者
	"""
	)

	if __name__ == "__main__":
	# Fixed text contrast issues - force redeploy
	demo.launch(share=True, server_name="0.0.0.0", server_port=7860)