Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import numpy as np | |
| import os | |
| from PIL import Image | |
| import cv2 | |
| import spaces | |
| # Import our custom modules | |
| from core import TextResizer | |
| from prompt_handler import PromptHandler | |
| from utils import ( | |
| load_image, | |
| save_image, | |
| validate_scale_factor, | |
| parse_percentage_to_scale_factor, | |
| create_output_filename | |
| ) | |
| # Initialize the text resizer with GPU support (English only) | |
| text_resizer = TextResizer(languages=['en'], gpu=True) | |
| def find_target_text_in_prompt(user_prompt, ocr_results): | |
| """ | |
| 从用户prompt中智能查找目标文字 | |
| Args: | |
| user_prompt: 用户输入的指令 | |
| ocr_results: OCR识别结果列表 | |
| Returns: | |
| 找到的目标文字,如果没找到则返回None | |
| """ | |
| import re | |
| # 提取所有OCR识别的文字 | |
| ocr_texts = [text.strip() for _, text, _ in ocr_results] | |
| # 1. 首先查找被引号包围的文字 (单引号或双引号) | |
| quoted_matches = re.findall(r'["\']([^"\']+)["\']', user_prompt) | |
| for quoted_text in quoted_matches: | |
| # 在OCR结果中查找完全匹配或部分匹配 | |
| for ocr_text in ocr_texts: | |
| if quoted_text.lower() == ocr_text.lower(): | |
| return ocr_text | |
| if quoted_text.lower() in ocr_text.lower() or ocr_text.lower() in quoted_text.lower(): | |
| return ocr_text | |
| # 2. 如果没有引号,尝试查找prompt中包含的OCR文字 | |
| user_prompt_lower = user_prompt.lower() | |
| for ocr_text in ocr_texts: | |
| if ocr_text.lower() in user_prompt_lower: | |
| return ocr_text | |
| # 3. 尝试查找部分匹配的单词 | |
| prompt_words = re.findall(r'\b\w+\b', user_prompt_lower) | |
| for word in prompt_words: | |
| if len(word) > 2: # 忽略太短的单词 | |
| for ocr_text in ocr_texts: | |
| if word in ocr_text.lower(): | |
| return ocr_text | |
| return None | |
| def process_image(input_image, user_prompt, use_ai_parsing=True, api_key=None): | |
| """ | |
| Process image with text resizing based on user prompt | |
| """ | |
| try: | |
| if input_image is None: | |
| return None, "❌ 错误: 请上传一张图片" | |
| # Convert PIL to RGB numpy array | |
| image_rgb = np.array(input_image.convert('RGB')) | |
| # Perform OCR | |
| ocr_results = text_resizer.read_text(image_rgb) | |
| if not ocr_results: | |
| return None, "❌ 错误: 未在图像中识别到任何文字" | |
| # Parse user prompt | |
| try: | |
| if use_ai_parsing and api_key: | |
| # Use OpenAI API parsing | |
| prompt_handler = PromptHandler(api_key=api_key) | |
| parsed_result = prompt_handler.parse_user_request(ocr_results, user_prompt) | |
| if not prompt_handler.validate_parsed_result(parsed_result, ocr_results): | |
| raise Exception("AI解析结果验证失败") | |
| target_text = parsed_result["target_text"] | |
| scale_factor = validate_scale_factor(parsed_result["scale_factor"]) | |
| status_msg = f"✅ AI解析成功: 目标文字='{target_text}', 缩放因子={scale_factor}" | |
| else: | |
| # Use fallback parsing | |
| scale_factor = parse_percentage_to_scale_factor(user_prompt) | |
| if scale_factor == 1.0: | |
| return None, "❌ 错误: 无法从用户指令中解析出缩放信息" | |
| # Try to find target text from user prompt | |
| target_text = find_target_text_in_prompt(user_prompt, ocr_results) | |
| if not target_text: | |
| # If no specific text found in prompt, ask user to specify | |
| available_texts = [text.strip() for _, text, _ in ocr_results] | |
| return None, f"❌ 错误: 无法确定要调整的文字。请在指令中明确指定文字,如 'enlarge \"具体文字\" by 50%'\n\n📝 可用的文字: {available_texts}" | |
| status_msg = f"✅ 备用解析: 目标文字='{target_text}', 缩放因子={scale_factor}" | |
| except Exception as e: | |
| return None, f"❌ 错误: 指令解析失败: {str(e)}" | |
| # Process the image | |
| try: | |
| result_image = text_resizer.resize_text(image_rgb, target_text, scale_factor) | |
| # Convert back to PIL Image | |
| result_pil = Image.fromarray(result_image) | |
| return result_pil, status_msg | |
| except ValueError as e: | |
| # Show available texts | |
| available_texts = [text.strip() for _, text, _ in ocr_results] | |
| error_msg = f"❌ 错误: {str(e)}\n\n📝 可用的文字: {available_texts}" | |
| return None, error_msg | |
| except Exception as e: | |
| return None, f"❌ 处理过程中出现错误: {str(e)}" | |
| def get_ocr_info(input_image): | |
| """ | |
| Get OCR information from the image | |
| """ | |
| if input_image is None: | |
| return "请先上传图片" | |
| try: | |
| # Convert PIL to RGB numpy array | |
| image_rgb = np.array(input_image.convert('RGB')) | |
| # Perform OCR | |
| ocr_results = text_resizer.read_text(image_rgb) | |
| if not ocr_results: | |
| return "未识别到任何文字" | |
| # Format results | |
| info = f"📝 识别到 {len(ocr_results)} 个文字区域:\n" | |
| info += "=" * 50 + "\n" | |
| for i, (bbox, text, conf) in enumerate(ocr_results): | |
| info += f"{i+1:2d}. '{text}' (置信度: {conf:.2f})\n" | |
| info += "=" * 50 | |
| return info | |
| except Exception as e: | |
| return f"❌ OCR识别失败: {str(e)}" | |
| # Define CSS for styling | |
| css = """ | |
| /* Global text color fixes - high priority */ | |
| body, .gradio-container, .gradio-container * { | |
| color: #333 !important; | |
| } | |
| /* Force all text elements to have good contrast */ | |
| p, div, span, label, input, textarea, button, h1, h2, h3, h4, h5, h6 { | |
| color: #333 !important; | |
| } | |
| #col-container { | |
| margin: 0 auto; | |
| max-width: 1000px; | |
| color: #333 !important; | |
| } | |
| #input-section { | |
| background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); | |
| padding: 20px; | |
| border-radius: 15px; | |
| margin-bottom: 20px; | |
| } | |
| #output-section { | |
| background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%); | |
| padding: 20px; | |
| border-radius: 15px; | |
| } | |
| .gradio-container { | |
| background: linear-gradient(135deg, #ffecd2 0%, #fcb69f 100%); | |
| } | |
| #title { | |
| text-align: center; | |
| background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); | |
| -webkit-background-clip: text; | |
| -webkit-text-fill-color: transparent; | |
| font-size: 2.5em; | |
| font-weight: bold; | |
| margin-bottom: 20px; | |
| } | |
| #description { | |
| text-align: center; | |
| color: #222 !important; | |
| font-size: 1.1em; | |
| margin-bottom: 30px; | |
| line-height: 1.6; | |
| } | |
| .process-button { | |
| background: linear-gradient(135deg, #4CAF50 0%, #45a049 100%); | |
| color: white !important; | |
| border: none; | |
| padding: 15px 30px; | |
| font-size: 16px; | |
| border-radius: 10px; | |
| cursor: pointer; | |
| transition: all 0.3s ease; | |
| } | |
| .process-button:hover { | |
| transform: translateY(-2px); | |
| box-shadow: 0 5px 15px rgba(0,0,0,0.2); | |
| } | |
| /* White text for dark gradient sections */ | |
| #input-section, #input-section * { | |
| color: white !important; | |
| } | |
| #output-section, #output-section * { | |
| color: white !important; | |
| } | |
| /* Override Gradio's default text colors */ | |
| .gradio-container .gr-markdown p, | |
| .gradio-container .gr-markdown div, | |
| .gradio-container .gr-markdown span, | |
| .gradio-container .gr-markdown li { | |
| color: #333 !important; | |
| } | |
| #input-section .gr-markdown p, | |
| #input-section .gr-markdown div, | |
| #input-section .gr-markdown span, | |
| #input-section .gr-markdown li { | |
| color: white !important; | |
| } | |
| #output-section .gr-markdown p, | |
| #output-section .gr-markdown div, | |
| #output-section .gr-markdown span, | |
| #output-section .gr-markdown li { | |
| color: white !important; | |
| } | |
| /* Force all labels and form elements to have proper contrast */ | |
| label, .gr-form label, .gr-textbox label, .gr-button, .gr-checkbox label { | |
| color: #333 !important; | |
| font-weight: 500; | |
| } | |
| #input-section label, | |
| #input-section .gr-form label, | |
| #input-section .gr-textbox label, | |
| #input-section .gr-button, | |
| #input-section .gr-checkbox label { | |
| color: white !important; | |
| } | |
| #output-section label, | |
| #output-section .gr-form label, | |
| #output-section .gr-textbox label, | |
| #output-section .gr-button, | |
| #output-section .gr-checkbox label { | |
| color: white !important; | |
| } | |
| /* Additional fallback for any missed text elements */ | |
| .gradio-container [class*="text"], | |
| .gradio-container [class*="label"], | |
| .gradio-container [class*="markdown"] { | |
| color: #333 !important; | |
| } | |
| #input-section [class*="text"], | |
| #input-section [class*="label"], | |
| #input-section [class*="markdown"] { | |
| color: white !important; | |
| } | |
| #output-section [class*="text"], | |
| #output-section [class*="label"], | |
| #output-section [class*="markdown"] { | |
| color: white !important; | |
| } | |
| """ | |
| # Create the Gradio interface | |
| with gr.Blocks(css=css, title="智能文字缩放工具") as demo: | |
| with gr.Column(elem_id="col-container"): | |
| gr.Markdown("# 🎨 智能文字缩放工具", elem_id="title") | |
| gr.Markdown( | |
| """ | |
| 🚀 **使用AI技术智能调整图片中的文字大小** | |
| 📝 支持自然语言指令,如: | |
| - enlarge 'Hello' by 50% - 将'Hello'放大50% | |
| - make the title bigger - 让标题变大 | |
| - shrink the footer text - 缩小页脚文字 | |
| 🎯 **使用方法**: | |
| 1. 上传包含文字的图片 | |
| 2. 输入文字调整指令 | |
| 3. 点击处理按钮 | |
| 4. 查看处理结果 | |
| """, | |
| elem_id="description" | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| with gr.Group(elem_id="input-section"): | |
| gr.Markdown("### 📤 输入设置") | |
| # Image input | |
| input_image = gr.Image( | |
| label="上传图片", | |
| type="pil", | |
| height=300, | |
| sources=["upload", "clipboard", "webcam"] | |
| ) | |
| # Prompt input | |
| user_prompt = gr.Textbox( | |
| label="文字调整指令", | |
| placeholder="例如: enlarge 'Hello' by 50%", | |
| lines=2, | |
| info="支持自然语言描述,如 make XX bigger 或 enlarge XX by 50%" | |
| ) | |
| # OCR info button | |
| ocr_button = gr.Button( | |
| "🔍 查看图片中的文字", | |
| variant="secondary", | |
| size="sm" | |
| ) | |
| # Advanced settings | |
| with gr.Accordion("⚙️ 高级设置", open=False): | |
| use_ai_parsing = gr.Checkbox( | |
| label="🤖 使用AI解析 (推荐,需要OpenAI API密钥)", | |
| value=True, | |
| info="使用GPT-4.1-nano模型智能理解自然语言指令" | |
| ) | |
| api_key = gr.Textbox( | |
| label="🔑 OpenAI API密钥 (可选)", | |
| placeholder="sk-...", | |
| type="password", | |
| info="仅在使用AI解析时需要" | |
| ) | |
| # Process button | |
| process_button = gr.Button( | |
| "🎯 开始处理", | |
| variant="primary", | |
| size="lg", | |
| elem_classes="process-button" | |
| ) | |
| with gr.Column(scale=1): | |
| with gr.Group(elem_id="output-section"): | |
| gr.Markdown("### 📤 处理结果") | |
| # Output image | |
| output_image = gr.Image( | |
| label="处理后的图片", | |
| height=300, | |
| show_download_button=True | |
| ) | |
| # Status message | |
| status_message = gr.Textbox( | |
| label="💬 状态信息", | |
| lines=4, | |
| max_lines=8, | |
| interactive=False | |
| ) | |
| # OCR info display | |
| ocr_info = gr.Textbox( | |
| label="📝 OCR识别结果", | |
| lines=6, | |
| max_lines=10, | |
| interactive=False | |
| ) | |
| # Examples section | |
| gr.Markdown("### 📚 示例用法") | |
| gr.Markdown( | |
| """ | |
| **示例指令格式:** | |
| 🔍 **指定文字 + 具体比例:** | |
| - enlarge 'Hello' by 50% - 将'Hello'放大50% | |
| - shrink 'Title' by 30% - 将'Title'缩小30% | |
| 🎯 **自然语言描述:** | |
| - make the title bigger - 让标题变大 | |
| - make the text smaller - 让文字变小 | |
| - enlarge the heading - 放大标题 | |
| 💡 **使用提示:** | |
| 1. 上传包含文字的图片 | |
| 2. 先点击"查看图片中的文字"了解可用文字 | |
| 3. 输入调整指令 | |
| 4. 点击"开始处理" | |
| """ | |
| ) | |
| # Event handlers | |
| process_button.click( | |
| fn=process_image, | |
| inputs=[input_image, user_prompt, use_ai_parsing, api_key], | |
| outputs=[output_image, status_message] | |
| ) | |
| ocr_button.click( | |
| fn=get_ocr_info, | |
| inputs=[input_image], | |
| outputs=[ocr_info] | |
| ) | |
| # Auto-run OCR when image is uploaded | |
| input_image.change( | |
| fn=get_ocr_info, | |
| inputs=[input_image], | |
| outputs=[ocr_info] | |
| ) | |
| # Footer | |
| gr.Markdown( | |
| """ | |
| --- | |
| 🎨 **智能文字缩放工具** | 基于OCR和AI技术的智能图像文字处理 | |
| 📧 如有问题或建议,请联系开发者 | |
| """ | |
| ) | |
| if __name__ == "__main__": | |
| # Fixed text contrast issues - force redeploy | |
| demo.launch(share=True, server_name="0.0.0.0", server_port=7860) |