import gradio as gr import numpy as np import os from PIL import Image import cv2 import spaces # Import our custom modules from core import TextResizer from prompt_handler import PromptHandler from utils import ( load_image, save_image, validate_scale_factor, parse_percentage_to_scale_factor, create_output_filename ) # Initialize the text resizer with GPU support (English only) text_resizer = TextResizer(languages=['en'], gpu=True) def find_target_text_in_prompt(user_prompt, ocr_results): """ 从用户prompt中智能查找目标文字 Args: user_prompt: 用户输入的指令 ocr_results: OCR识别结果列表 Returns: 找到的目标文字,如果没找到则返回None """ import re # 提取所有OCR识别的文字 ocr_texts = [text.strip() for _, text, _ in ocr_results] # 1. 首先查找被引号包围的文字 (单引号或双引号) quoted_matches = re.findall(r'["\']([^"\']+)["\']', user_prompt) for quoted_text in quoted_matches: # 在OCR结果中查找完全匹配或部分匹配 for ocr_text in ocr_texts: if quoted_text.lower() == ocr_text.lower(): return ocr_text if quoted_text.lower() in ocr_text.lower() or ocr_text.lower() in quoted_text.lower(): return ocr_text # 2. 如果没有引号,尝试查找prompt中包含的OCR文字 user_prompt_lower = user_prompt.lower() for ocr_text in ocr_texts: if ocr_text.lower() in user_prompt_lower: return ocr_text # 3. 尝试查找部分匹配的单词 prompt_words = re.findall(r'\b\w+\b', user_prompt_lower) for word in prompt_words: if len(word) > 2: # 忽略太短的单词 for ocr_text in ocr_texts: if word in ocr_text.lower(): return ocr_text return None @spaces.GPU def process_image(input_image, user_prompt, use_ai_parsing=True, api_key=None): """ Process image with text resizing based on user prompt """ try: if input_image is None: return None, "❌ 错误: 请上传一张图片" # Convert PIL to RGB numpy array image_rgb = np.array(input_image.convert('RGB')) # Perform OCR ocr_results = text_resizer.read_text(image_rgb) if not ocr_results: return None, "❌ 错误: 未在图像中识别到任何文字" # Parse user prompt try: if use_ai_parsing and api_key: # Use OpenAI API parsing prompt_handler = PromptHandler(api_key=api_key) parsed_result = prompt_handler.parse_user_request(ocr_results, user_prompt) if not prompt_handler.validate_parsed_result(parsed_result, ocr_results): raise Exception("AI解析结果验证失败") target_text = parsed_result["target_text"] scale_factor = validate_scale_factor(parsed_result["scale_factor"]) status_msg = f"✅ AI解析成功: 目标文字='{target_text}', 缩放因子={scale_factor}" else: # Use fallback parsing scale_factor = parse_percentage_to_scale_factor(user_prompt) if scale_factor == 1.0: return None, "❌ 错误: 无法从用户指令中解析出缩放信息" # Try to find target text from user prompt target_text = find_target_text_in_prompt(user_prompt, ocr_results) if not target_text: # If no specific text found in prompt, ask user to specify available_texts = [text.strip() for _, text, _ in ocr_results] return None, f"❌ 错误: 无法确定要调整的文字。请在指令中明确指定文字,如 'enlarge \"具体文字\" by 50%'\n\n📝 可用的文字: {available_texts}" status_msg = f"✅ 备用解析: 目标文字='{target_text}', 缩放因子={scale_factor}" except Exception as e: return None, f"❌ 错误: 指令解析失败: {str(e)}" # Process the image try: result_image = text_resizer.resize_text(image_rgb, target_text, scale_factor) # Convert back to PIL Image result_pil = Image.fromarray(result_image) return result_pil, status_msg except ValueError as e: # Show available texts available_texts = [text.strip() for _, text, _ in ocr_results] error_msg = f"❌ 错误: {str(e)}\n\n📝 可用的文字: {available_texts}" return None, error_msg except Exception as e: return None, f"❌ 处理过程中出现错误: {str(e)}" @spaces.GPU def get_ocr_info(input_image): """ Get OCR information from the image """ if input_image is None: return "请先上传图片" try: # Convert PIL to RGB numpy array image_rgb = np.array(input_image.convert('RGB')) # Perform OCR ocr_results = text_resizer.read_text(image_rgb) if not ocr_results: return "未识别到任何文字" # Format results info = f"📝 识别到 {len(ocr_results)} 个文字区域:\n" info += "=" * 50 + "\n" for i, (bbox, text, conf) in enumerate(ocr_results): info += f"{i+1:2d}. '{text}' (置信度: {conf:.2f})\n" info += "=" * 50 return info except Exception as e: return f"❌ OCR识别失败: {str(e)}" # Define CSS for styling css = """ /* Global text color fixes - high priority */ body, .gradio-container, .gradio-container * { color: #333 !important; } /* Force all text elements to have good contrast */ p, div, span, label, input, textarea, button, h1, h2, h3, h4, h5, h6 { color: #333 !important; } #col-container { margin: 0 auto; max-width: 1000px; color: #333 !important; } #input-section { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); padding: 20px; border-radius: 15px; margin-bottom: 20px; } #output-section { background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%); padding: 20px; border-radius: 15px; } .gradio-container { background: linear-gradient(135deg, #ffecd2 0%, #fcb69f 100%); } #title { text-align: center; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); -webkit-background-clip: text; -webkit-text-fill-color: transparent; font-size: 2.5em; font-weight: bold; margin-bottom: 20px; } #description { text-align: center; color: #222 !important; font-size: 1.1em; margin-bottom: 30px; line-height: 1.6; } .process-button { background: linear-gradient(135deg, #4CAF50 0%, #45a049 100%); color: white !important; border: none; padding: 15px 30px; font-size: 16px; border-radius: 10px; cursor: pointer; transition: all 0.3s ease; } .process-button:hover { transform: translateY(-2px); box-shadow: 0 5px 15px rgba(0,0,0,0.2); } /* White text for dark gradient sections */ #input-section, #input-section * { color: white !important; } #output-section, #output-section * { color: white !important; } /* Override Gradio's default text colors */ .gradio-container .gr-markdown p, .gradio-container .gr-markdown div, .gradio-container .gr-markdown span, .gradio-container .gr-markdown li { color: #333 !important; } #input-section .gr-markdown p, #input-section .gr-markdown div, #input-section .gr-markdown span, #input-section .gr-markdown li { color: white !important; } #output-section .gr-markdown p, #output-section .gr-markdown div, #output-section .gr-markdown span, #output-section .gr-markdown li { color: white !important; } /* Force all labels and form elements to have proper contrast */ label, .gr-form label, .gr-textbox label, .gr-button, .gr-checkbox label { color: #333 !important; font-weight: 500; } #input-section label, #input-section .gr-form label, #input-section .gr-textbox label, #input-section .gr-button, #input-section .gr-checkbox label { color: white !important; } #output-section label, #output-section .gr-form label, #output-section .gr-textbox label, #output-section .gr-button, #output-section .gr-checkbox label { color: white !important; } /* Additional fallback for any missed text elements */ .gradio-container [class*="text"], .gradio-container [class*="label"], .gradio-container [class*="markdown"] { color: #333 !important; } #input-section [class*="text"], #input-section [class*="label"], #input-section [class*="markdown"] { color: white !important; } #output-section [class*="text"], #output-section [class*="label"], #output-section [class*="markdown"] { color: white !important; } """ # Create the Gradio interface with gr.Blocks(css=css, title="智能文字缩放工具") as demo: with gr.Column(elem_id="col-container"): gr.Markdown("# 🎨 智能文字缩放工具", elem_id="title") gr.Markdown( """ 🚀 **使用AI技术智能调整图片中的文字大小** 📝 支持自然语言指令,如: - enlarge 'Hello' by 50% - 将'Hello'放大50% - make the title bigger - 让标题变大 - shrink the footer text - 缩小页脚文字 🎯 **使用方法**: 1. 上传包含文字的图片 2. 输入文字调整指令 3. 点击处理按钮 4. 查看处理结果 """, elem_id="description" ) with gr.Row(): with gr.Column(scale=1): with gr.Group(elem_id="input-section"): gr.Markdown("### 📤 输入设置") # Image input input_image = gr.Image( label="上传图片", type="pil", height=300, sources=["upload", "clipboard", "webcam"] ) # Prompt input user_prompt = gr.Textbox( label="文字调整指令", placeholder="例如: enlarge 'Hello' by 50%", lines=2, info="支持自然语言描述,如 make XX bigger 或 enlarge XX by 50%" ) # OCR info button ocr_button = gr.Button( "🔍 查看图片中的文字", variant="secondary", size="sm" ) # Advanced settings with gr.Accordion("⚙️ 高级设置", open=False): use_ai_parsing = gr.Checkbox( label="🤖 使用AI解析 (推荐,需要OpenAI API密钥)", value=True, info="使用GPT-4.1-nano模型智能理解自然语言指令" ) api_key = gr.Textbox( label="🔑 OpenAI API密钥 (可选)", placeholder="sk-...", type="password", info="仅在使用AI解析时需要" ) # Process button process_button = gr.Button( "🎯 开始处理", variant="primary", size="lg", elem_classes="process-button" ) with gr.Column(scale=1): with gr.Group(elem_id="output-section"): gr.Markdown("### 📤 处理结果") # Output image output_image = gr.Image( label="处理后的图片", height=300, show_download_button=True ) # Status message status_message = gr.Textbox( label="💬 状态信息", lines=4, max_lines=8, interactive=False ) # OCR info display ocr_info = gr.Textbox( label="📝 OCR识别结果", lines=6, max_lines=10, interactive=False ) # Examples section gr.Markdown("### 📚 示例用法") gr.Markdown( """ **示例指令格式:** 🔍 **指定文字 + 具体比例:** - enlarge 'Hello' by 50% - 将'Hello'放大50% - shrink 'Title' by 30% - 将'Title'缩小30% 🎯 **自然语言描述:** - make the title bigger - 让标题变大 - make the text smaller - 让文字变小 - enlarge the heading - 放大标题 💡 **使用提示:** 1. 上传包含文字的图片 2. 先点击"查看图片中的文字"了解可用文字 3. 输入调整指令 4. 点击"开始处理" """ ) # Event handlers process_button.click( fn=process_image, inputs=[input_image, user_prompt, use_ai_parsing, api_key], outputs=[output_image, status_message] ) ocr_button.click( fn=get_ocr_info, inputs=[input_image], outputs=[ocr_info] ) # Auto-run OCR when image is uploaded input_image.change( fn=get_ocr_info, inputs=[input_image], outputs=[ocr_info] ) # Footer gr.Markdown( """ --- 🎨 **智能文字缩放工具** | 基于OCR和AI技术的智能图像文字处理 📧 如有问题或建议,请联系开发者 """ ) if __name__ == "__main__": # Fixed text contrast issues - force redeploy demo.launch(share=True, server_name="0.0.0.0", server_port=7860)