text_editing / app.py
yingzhac's picture
去除多余的黑框样式 - 改为普通文字显示
845f960
import gradio as gr
import numpy as np
import os
from PIL import Image
import cv2
import spaces
# Import our custom modules
from core import TextResizer
from prompt_handler import PromptHandler
from utils import (
load_image,
save_image,
validate_scale_factor,
parse_percentage_to_scale_factor,
create_output_filename
)
# Initialize the text resizer with GPU support (English only)
text_resizer = TextResizer(languages=['en'], gpu=True)
def find_target_text_in_prompt(user_prompt, ocr_results):
"""
从用户prompt中智能查找目标文字
Args:
user_prompt: 用户输入的指令
ocr_results: OCR识别结果列表
Returns:
找到的目标文字,如果没找到则返回None
"""
import re
# 提取所有OCR识别的文字
ocr_texts = [text.strip() for _, text, _ in ocr_results]
# 1. 首先查找被引号包围的文字 (单引号或双引号)
quoted_matches = re.findall(r'["\']([^"\']+)["\']', user_prompt)
for quoted_text in quoted_matches:
# 在OCR结果中查找完全匹配或部分匹配
for ocr_text in ocr_texts:
if quoted_text.lower() == ocr_text.lower():
return ocr_text
if quoted_text.lower() in ocr_text.lower() or ocr_text.lower() in quoted_text.lower():
return ocr_text
# 2. 如果没有引号,尝试查找prompt中包含的OCR文字
user_prompt_lower = user_prompt.lower()
for ocr_text in ocr_texts:
if ocr_text.lower() in user_prompt_lower:
return ocr_text
# 3. 尝试查找部分匹配的单词
prompt_words = re.findall(r'\b\w+\b', user_prompt_lower)
for word in prompt_words:
if len(word) > 2: # 忽略太短的单词
for ocr_text in ocr_texts:
if word in ocr_text.lower():
return ocr_text
return None
@spaces.GPU
def process_image(input_image, user_prompt, use_ai_parsing=True, api_key=None):
"""
Process image with text resizing based on user prompt
"""
try:
if input_image is None:
return None, "❌ 错误: 请上传一张图片"
# Convert PIL to RGB numpy array
image_rgb = np.array(input_image.convert('RGB'))
# Perform OCR
ocr_results = text_resizer.read_text(image_rgb)
if not ocr_results:
return None, "❌ 错误: 未在图像中识别到任何文字"
# Parse user prompt
try:
if use_ai_parsing and api_key:
# Use OpenAI API parsing
prompt_handler = PromptHandler(api_key=api_key)
parsed_result = prompt_handler.parse_user_request(ocr_results, user_prompt)
if not prompt_handler.validate_parsed_result(parsed_result, ocr_results):
raise Exception("AI解析结果验证失败")
target_text = parsed_result["target_text"]
scale_factor = validate_scale_factor(parsed_result["scale_factor"])
status_msg = f"✅ AI解析成功: 目标文字='{target_text}', 缩放因子={scale_factor}"
else:
# Use fallback parsing
scale_factor = parse_percentage_to_scale_factor(user_prompt)
if scale_factor == 1.0:
return None, "❌ 错误: 无法从用户指令中解析出缩放信息"
# Try to find target text from user prompt
target_text = find_target_text_in_prompt(user_prompt, ocr_results)
if not target_text:
# If no specific text found in prompt, ask user to specify
available_texts = [text.strip() for _, text, _ in ocr_results]
return None, f"❌ 错误: 无法确定要调整的文字。请在指令中明确指定文字,如 'enlarge \"具体文字\" by 50%'\n\n📝 可用的文字: {available_texts}"
status_msg = f"✅ 备用解析: 目标文字='{target_text}', 缩放因子={scale_factor}"
except Exception as e:
return None, f"❌ 错误: 指令解析失败: {str(e)}"
# Process the image
try:
result_image = text_resizer.resize_text(image_rgb, target_text, scale_factor)
# Convert back to PIL Image
result_pil = Image.fromarray(result_image)
return result_pil, status_msg
except ValueError as e:
# Show available texts
available_texts = [text.strip() for _, text, _ in ocr_results]
error_msg = f"❌ 错误: {str(e)}\n\n📝 可用的文字: {available_texts}"
return None, error_msg
except Exception as e:
return None, f"❌ 处理过程中出现错误: {str(e)}"
@spaces.GPU
def get_ocr_info(input_image):
"""
Get OCR information from the image
"""
if input_image is None:
return "请先上传图片"
try:
# Convert PIL to RGB numpy array
image_rgb = np.array(input_image.convert('RGB'))
# Perform OCR
ocr_results = text_resizer.read_text(image_rgb)
if not ocr_results:
return "未识别到任何文字"
# Format results
info = f"📝 识别到 {len(ocr_results)} 个文字区域:\n"
info += "=" * 50 + "\n"
for i, (bbox, text, conf) in enumerate(ocr_results):
info += f"{i+1:2d}. '{text}' (置信度: {conf:.2f})\n"
info += "=" * 50
return info
except Exception as e:
return f"❌ OCR识别失败: {str(e)}"
# Define CSS for styling
css = """
/* Global text color fixes - high priority */
body, .gradio-container, .gradio-container * {
color: #333 !important;
}
/* Force all text elements to have good contrast */
p, div, span, label, input, textarea, button, h1, h2, h3, h4, h5, h6 {
color: #333 !important;
}
#col-container {
margin: 0 auto;
max-width: 1000px;
color: #333 !important;
}
#input-section {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
padding: 20px;
border-radius: 15px;
margin-bottom: 20px;
}
#output-section {
background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%);
padding: 20px;
border-radius: 15px;
}
.gradio-container {
background: linear-gradient(135deg, #ffecd2 0%, #fcb69f 100%);
}
#title {
text-align: center;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
font-size: 2.5em;
font-weight: bold;
margin-bottom: 20px;
}
#description {
text-align: center;
color: #222 !important;
font-size: 1.1em;
margin-bottom: 30px;
line-height: 1.6;
}
.process-button {
background: linear-gradient(135deg, #4CAF50 0%, #45a049 100%);
color: white !important;
border: none;
padding: 15px 30px;
font-size: 16px;
border-radius: 10px;
cursor: pointer;
transition: all 0.3s ease;
}
.process-button:hover {
transform: translateY(-2px);
box-shadow: 0 5px 15px rgba(0,0,0,0.2);
}
/* White text for dark gradient sections */
#input-section, #input-section * {
color: white !important;
}
#output-section, #output-section * {
color: white !important;
}
/* Override Gradio's default text colors */
.gradio-container .gr-markdown p,
.gradio-container .gr-markdown div,
.gradio-container .gr-markdown span,
.gradio-container .gr-markdown li {
color: #333 !important;
}
#input-section .gr-markdown p,
#input-section .gr-markdown div,
#input-section .gr-markdown span,
#input-section .gr-markdown li {
color: white !important;
}
#output-section .gr-markdown p,
#output-section .gr-markdown div,
#output-section .gr-markdown span,
#output-section .gr-markdown li {
color: white !important;
}
/* Force all labels and form elements to have proper contrast */
label, .gr-form label, .gr-textbox label, .gr-button, .gr-checkbox label {
color: #333 !important;
font-weight: 500;
}
#input-section label,
#input-section .gr-form label,
#input-section .gr-textbox label,
#input-section .gr-button,
#input-section .gr-checkbox label {
color: white !important;
}
#output-section label,
#output-section .gr-form label,
#output-section .gr-textbox label,
#output-section .gr-button,
#output-section .gr-checkbox label {
color: white !important;
}
/* Additional fallback for any missed text elements */
.gradio-container [class*="text"],
.gradio-container [class*="label"],
.gradio-container [class*="markdown"] {
color: #333 !important;
}
#input-section [class*="text"],
#input-section [class*="label"],
#input-section [class*="markdown"] {
color: white !important;
}
#output-section [class*="text"],
#output-section [class*="label"],
#output-section [class*="markdown"] {
color: white !important;
}
"""
# Create the Gradio interface
with gr.Blocks(css=css, title="智能文字缩放工具") as demo:
with gr.Column(elem_id="col-container"):
gr.Markdown("# 🎨 智能文字缩放工具", elem_id="title")
gr.Markdown(
"""
🚀 **使用AI技术智能调整图片中的文字大小**
📝 支持自然语言指令,如:
- enlarge 'Hello' by 50% - 将'Hello'放大50%
- make the title bigger - 让标题变大
- shrink the footer text - 缩小页脚文字
🎯 **使用方法**:
1. 上传包含文字的图片
2. 输入文字调整指令
3. 点击处理按钮
4. 查看处理结果
""",
elem_id="description"
)
with gr.Row():
with gr.Column(scale=1):
with gr.Group(elem_id="input-section"):
gr.Markdown("### 📤 输入设置")
# Image input
input_image = gr.Image(
label="上传图片",
type="pil",
height=300,
sources=["upload", "clipboard", "webcam"]
)
# Prompt input
user_prompt = gr.Textbox(
label="文字调整指令",
placeholder="例如: enlarge 'Hello' by 50%",
lines=2,
info="支持自然语言描述,如 make XX bigger 或 enlarge XX by 50%"
)
# OCR info button
ocr_button = gr.Button(
"🔍 查看图片中的文字",
variant="secondary",
size="sm"
)
# Advanced settings
with gr.Accordion("⚙️ 高级设置", open=False):
use_ai_parsing = gr.Checkbox(
label="🤖 使用AI解析 (推荐,需要OpenAI API密钥)",
value=True,
info="使用GPT-4.1-nano模型智能理解自然语言指令"
)
api_key = gr.Textbox(
label="🔑 OpenAI API密钥 (可选)",
placeholder="sk-...",
type="password",
info="仅在使用AI解析时需要"
)
# Process button
process_button = gr.Button(
"🎯 开始处理",
variant="primary",
size="lg",
elem_classes="process-button"
)
with gr.Column(scale=1):
with gr.Group(elem_id="output-section"):
gr.Markdown("### 📤 处理结果")
# Output image
output_image = gr.Image(
label="处理后的图片",
height=300,
show_download_button=True
)
# Status message
status_message = gr.Textbox(
label="💬 状态信息",
lines=4,
max_lines=8,
interactive=False
)
# OCR info display
ocr_info = gr.Textbox(
label="📝 OCR识别结果",
lines=6,
max_lines=10,
interactive=False
)
# Examples section
gr.Markdown("### 📚 示例用法")
gr.Markdown(
"""
**示例指令格式:**
🔍 **指定文字 + 具体比例:**
- enlarge 'Hello' by 50% - 将'Hello'放大50%
- shrink 'Title' by 30% - 将'Title'缩小30%
🎯 **自然语言描述:**
- make the title bigger - 让标题变大
- make the text smaller - 让文字变小
- enlarge the heading - 放大标题
💡 **使用提示:**
1. 上传包含文字的图片
2. 先点击"查看图片中的文字"了解可用文字
3. 输入调整指令
4. 点击"开始处理"
"""
)
# Event handlers
process_button.click(
fn=process_image,
inputs=[input_image, user_prompt, use_ai_parsing, api_key],
outputs=[output_image, status_message]
)
ocr_button.click(
fn=get_ocr_info,
inputs=[input_image],
outputs=[ocr_info]
)
# Auto-run OCR when image is uploaded
input_image.change(
fn=get_ocr_info,
inputs=[input_image],
outputs=[ocr_info]
)
# Footer
gr.Markdown(
"""
---
🎨 **智能文字缩放工具** | 基于OCR和AI技术的智能图像文字处理
📧 如有问题或建议,请联系开发者
"""
)
if __name__ == "__main__":
# Fixed text contrast issues - force redeploy
demo.launch(share=True, server_name="0.0.0.0", server_port=7860)