Spaces:

yingzhac
/

text_editing

Sleeping

File size: 14,985 Bytes

import gradio as gr
import numpy as np
import os
from PIL import Image
import cv2
import spaces

# Import our custom modules
from core import TextResizer
from prompt_handler import PromptHandler
from utils import (
    load_image,
    save_image,
    validate_scale_factor,
    parse_percentage_to_scale_factor,
    create_output_filename
)

# Initialize the text resizer with GPU support (English only)
text_resizer = TextResizer(languages=['en'], gpu=True)

def find_target_text_in_prompt(user_prompt, ocr_results):
    """
    从用户prompt中智能查找目标文字
    
    Args:
        user_prompt: 用户输入的指令
        ocr_results: OCR识别结果列表
        
    Returns:
        找到的目标文字，如果没找到则返回None
    """
    import re
    
    # 提取所有OCR识别的文字
    ocr_texts = [text.strip() for _, text, _ in ocr_results]
    
    # 1. 首先查找被引号包围的文字 (单引号或双引号)
    quoted_matches = re.findall(r'["\']([^"\']+)["\']', user_prompt)
    for quoted_text in quoted_matches:
        # 在OCR结果中查找完全匹配或部分匹配
        for ocr_text in ocr_texts:
            if quoted_text.lower() == ocr_text.lower():
                return ocr_text
            if quoted_text.lower() in ocr_text.lower() or ocr_text.lower() in quoted_text.lower():
                return ocr_text
    
    # 2. 如果没有引号，尝试查找prompt中包含的OCR文字
    user_prompt_lower = user_prompt.lower()
    for ocr_text in ocr_texts:
        if ocr_text.lower() in user_prompt_lower:
            return ocr_text
    
    # 3. 尝试查找部分匹配的单词
    prompt_words = re.findall(r'\b\w+\b', user_prompt_lower)
    for word in prompt_words:
        if len(word) > 2:  # 忽略太短的单词
            for ocr_text in ocr_texts:
                if word in ocr_text.lower():
                    return ocr_text
    
    return None

@spaces.GPU
def process_image(input_image, user_prompt, use_ai_parsing=True, api_key=None):
    """
    Process image with text resizing based on user prompt
    """
    try:
        if input_image is None:
            return None, "❌ 错误: 请上传一张图片"
        
        # Convert PIL to RGB numpy array
        image_rgb = np.array(input_image.convert('RGB'))
        
        # Perform OCR
        ocr_results = text_resizer.read_text(image_rgb)
        
        if not ocr_results:
            return None, "❌ 错误: 未在图像中识别到任何文字"
        
        # Parse user prompt
        try:
            if use_ai_parsing and api_key:
                # Use OpenAI API parsing
                prompt_handler = PromptHandler(api_key=api_key)
                parsed_result = prompt_handler.parse_user_request(ocr_results, user_prompt)
                
                if not prompt_handler.validate_parsed_result(parsed_result, ocr_results):
                    raise Exception("AI解析结果验证失败")
                
                target_text = parsed_result["target_text"]
                scale_factor = validate_scale_factor(parsed_result["scale_factor"])
                status_msg = f"✅ AI解析成功: 目标文字='{target_text}', 缩放因子={scale_factor}"
                
            else:
                # Use fallback parsing
                scale_factor = parse_percentage_to_scale_factor(user_prompt)
                if scale_factor == 1.0:
                    return None, "❌ 错误: 无法从用户指令中解析出缩放信息"
                
                # Try to find target text from user prompt
                target_text = find_target_text_in_prompt(user_prompt, ocr_results)
                if not target_text:
                    # If no specific text found in prompt, ask user to specify
                    available_texts = [text.strip() for _, text, _ in ocr_results]
                    return None, f"❌ 错误: 无法确定要调整的文字。请在指令中明确指定文字，如 'enlarge \"具体文字\" by 50%'\n\n📝 可用的文字: {available_texts}"
                
                status_msg = f"✅ 备用解析: 目标文字='{target_text}', 缩放因子={scale_factor}"
                
        except Exception as e:
            return None, f"❌ 错误: 指令解析失败: {str(e)}"
        
        # Process the image
        try:
            result_image = text_resizer.resize_text(image_rgb, target_text, scale_factor)
            
            # Convert back to PIL Image
            result_pil = Image.fromarray(result_image)
            
            return result_pil, status_msg
            
        except ValueError as e:
            # Show available texts
            available_texts = [text.strip() for _, text, _ in ocr_results]
            error_msg = f"❌ 错误: {str(e)}\n\n📝 可用的文字: {available_texts}"
            return None, error_msg
            
    except Exception as e:
        return None, f"❌ 处理过程中出现错误: {str(e)}"

@spaces.GPU
def get_ocr_info(input_image):
    """
    Get OCR information from the image
    """
    if input_image is None:
        return "请先上传图片"
    
    try:
        # Convert PIL to RGB numpy array
        image_rgb = np.array(input_image.convert('RGB'))
        
        # Perform OCR
        ocr_results = text_resizer.read_text(image_rgb)
        
        if not ocr_results:
            return "未识别到任何文字"
        
        # Format results
        info = f"📝 识别到 {len(ocr_results)} 个文字区域:\n"
        info += "=" * 50 + "\n"
        for i, (bbox, text, conf) in enumerate(ocr_results):
            info += f"{i+1:2d}. '{text}' (置信度: {conf:.2f})\n"
        info += "=" * 50
        
        return info
        
    except Exception as e:
        return f"❌ OCR识别失败: {str(e)}"

# Define CSS for styling
css = """
/* Global text color fixes - high priority */
body, .gradio-container, .gradio-container * {
    color: #333 !important;
}

/* Force all text elements to have good contrast */
p, div, span, label, input, textarea, button, h1, h2, h3, h4, h5, h6 {
    color: #333 !important;
}

#col-container {
    margin: 0 auto;
    max-width: 1000px;
    color: #333 !important;
}

#input-section {
    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
    padding: 20px;
    border-radius: 15px;
    margin-bottom: 20px;
}

#output-section {
    background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%);
    padding: 20px;
    border-radius: 15px;
}

.gradio-container {
    background: linear-gradient(135deg, #ffecd2 0%, #fcb69f 100%);
}

#title {
    text-align: center;
    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
    -webkit-background-clip: text;
    -webkit-text-fill-color: transparent;
    font-size: 2.5em;
    font-weight: bold;
    margin-bottom: 20px;
}

#description {
    text-align: center;
    color: #222 !important;
    font-size: 1.1em;
    margin-bottom: 30px;
    line-height: 1.6;
}

.process-button {
    background: linear-gradient(135deg, #4CAF50 0%, #45a049 100%);
    color: white !important;
    border: none;
    padding: 15px 30px;
    font-size: 16px;
    border-radius: 10px;
    cursor: pointer;
    transition: all 0.3s ease;
}

.process-button:hover {
    transform: translateY(-2px);
    box-shadow: 0 5px 15px rgba(0,0,0,0.2);
}

/* White text for dark gradient sections */
#input-section, #input-section * {
    color: white !important;
}

#output-section, #output-section * {
    color: white !important;
}

/* Override Gradio's default text colors */
.gradio-container .gr-markdown p,
.gradio-container .gr-markdown div,
.gradio-container .gr-markdown span,
.gradio-container .gr-markdown li {
    color: #333 !important;
}

#input-section .gr-markdown p,
#input-section .gr-markdown div,
#input-section .gr-markdown span,
#input-section .gr-markdown li {
    color: white !important;
}

#output-section .gr-markdown p,
#output-section .gr-markdown div,
#output-section .gr-markdown span,
#output-section .gr-markdown li {
    color: white !important;
}

/* Force all labels and form elements to have proper contrast */
label, .gr-form label, .gr-textbox label, .gr-button, .gr-checkbox label {
    color: #333 !important;
    font-weight: 500;
}

#input-section label,
#input-section .gr-form label,
#input-section .gr-textbox label,
#input-section .gr-button,
#input-section .gr-checkbox label {
    color: white !important;
}

#output-section label,
#output-section .gr-form label,
#output-section .gr-textbox label,
#output-section .gr-button,
#output-section .gr-checkbox label {
    color: white !important;
}

/* Additional fallback for any missed text elements */
.gradio-container [class*="text"], 
.gradio-container [class*="label"], 
.gradio-container [class*="markdown"] {
    color: #333 !important;
}

#input-section [class*="text"], 
#input-section [class*="label"], 
#input-section [class*="markdown"] {
    color: white !important;
}

#output-section [class*="text"], 
#output-section [class*="label"], 
#output-section [class*="markdown"] {
    color: white !important;
}
"""

# Create the Gradio interface
with gr.Blocks(css=css, title="智能文字缩放工具") as demo:
    with gr.Column(elem_id="col-container"):
        gr.Markdown("# 🎨 智能文字缩放工具", elem_id="title")
        gr.Markdown(
            """
            🚀 **使用AI技术智能调整图片中的文字大小** 
            
            📝 支持自然语言指令，如：
            - enlarge 'Hello' by 50% - 将'Hello'放大50%
            - make the title bigger - 让标题变大
            - shrink the footer text - 缩小页脚文字
            
            🎯 **使用方法**：
            1. 上传包含文字的图片
            2. 输入文字调整指令
            3. 点击处理按钮
            4. 查看处理结果
            """,
            elem_id="description"
        )
        
        with gr.Row():
            with gr.Column(scale=1):
                with gr.Group(elem_id="input-section"):
                    gr.Markdown("### 📤 输入设置")
                    
                    # Image input
                    input_image = gr.Image(
                        label="上传图片",
                        type="pil",
                        height=300,
                        sources=["upload", "clipboard", "webcam"]
                    )
                    
                    # Prompt input
                    user_prompt = gr.Textbox(
                        label="文字调整指令",
                        placeholder="例如: enlarge 'Hello' by 50%",
                        lines=2,
                        info="支持自然语言描述，如 make XX bigger 或 enlarge XX by 50%"
                    )
                    
                    # OCR info button
                    ocr_button = gr.Button(
                        "🔍 查看图片中的文字",
                        variant="secondary",
                        size="sm"
                    )
                    
                    # Advanced settings
                    with gr.Accordion("⚙️ 高级设置", open=False):
                        use_ai_parsing = gr.Checkbox(
                            label="🤖 使用AI解析 (推荐，需要OpenAI API密钥)",
                            value=True,
                            info="使用GPT-4.1-nano模型智能理解自然语言指令"
                        )
                        
                        api_key = gr.Textbox(
                            label="🔑 OpenAI API密钥 (可选)",
                            placeholder="sk-...",
                            type="password",
                            info="仅在使用AI解析时需要"
                        )
                    
                    # Process button
                    process_button = gr.Button(
                        "🎯 开始处理",
                        variant="primary",
                        size="lg",
                        elem_classes="process-button"
                    )
            
            with gr.Column(scale=1):
                with gr.Group(elem_id="output-section"):
                    gr.Markdown("### 📤 处理结果")
                    
                    # Output image
                    output_image = gr.Image(
                        label="处理后的图片",
                        height=300,
                        show_download_button=True
                    )
                    
                    # Status message
                    status_message = gr.Textbox(
                        label="💬 状态信息",
                        lines=4,
                        max_lines=8,
                        interactive=False
                    )
                    
                    # OCR info display
                    ocr_info = gr.Textbox(
                        label="📝 OCR识别结果",
                        lines=6,
                        max_lines=10,
                        interactive=False
                    )
        
        # Examples section
        gr.Markdown("### 📚 示例用法")
        gr.Markdown(
            """
            **示例指令格式：**
            
            🔍 **指定文字 + 具体比例：**
            - enlarge 'Hello' by 50% - 将'Hello'放大50%
            - shrink 'Title' by 30% - 将'Title'缩小30%
            
            🎯 **自然语言描述：**
            - make the title bigger - 让标题变大
            - make the text smaller - 让文字变小
            - enlarge the heading - 放大标题
            
            💡 **使用提示：**
            1. 上传包含文字的图片
            2. 先点击"查看图片中的文字"了解可用文字
            3. 输入调整指令
            4. 点击"开始处理"
            """
        )
        
        # Event handlers
        process_button.click(
            fn=process_image,
            inputs=[input_image, user_prompt, use_ai_parsing, api_key],
            outputs=[output_image, status_message]
        )
        
        ocr_button.click(
            fn=get_ocr_info,
            inputs=[input_image],
            outputs=[ocr_info]
        )
        
        # Auto-run OCR when image is uploaded
        input_image.change(
            fn=get_ocr_info,
            inputs=[input_image],
            outputs=[ocr_info]
        )
        
        # Footer
        gr.Markdown(
            """
            ---
            
            🎨 **智能文字缩放工具** | 基于OCR和AI技术的智能图像文字处理
            
            📧 如有问题或建议，请联系开发者
            """
        )

if __name__ == "__main__":
    # Fixed text contrast issues - force redeploy
    demo.launch(share=True, server_name="0.0.0.0", server_port=7860)