Spaces:

yingzhac
/

text_editing

Sleeping

yingzhac commited on Jul 3

Commit

e219479

1 Parent(s): 986650d

🔧 Fix major issues in text processing

1. 🌍 OCR: Switch to English-only recognition
2. 🤖 AI: Use gpt-4.1-nano model instead of gpt-4o-mini
3. 🎯 Smart Text Selection: Fix fallback parsing that always picked first result
- Add intelligent text matching from user prompts
- Support quoted text detection ('text' or "text")
- Add fuzzy matching for better text selection
- Provide clear error messages when text not found
4. 🔄 UI: Default enable AI parsing for better user experience

Files changed (2) hide show

app.py +54 -7
prompt_handler.py +1 -1

app.py CHANGED Viewed

@@ -16,8 +16,50 @@ from utils import (
     create_output_filename
 )
-# Initialize the text resizer with GPU support
-text_resizer = TextResizer(languages=['en', 'ch_sim'], gpu=True)
 @spaces.GPU
 def process_image(input_image, user_prompt, use_ai_parsing=True, api_key=None):
@@ -57,8 +99,13 @@ def process_image(input_image, user_prompt, use_ai_parsing=True, api_key=None):
                 if scale_factor == 1.0:
                     return None, "❌ 错误: 无法从用户指令中解析出缩放信息"
-                # Use the first detected text as target
-                target_text = ocr_results[0][1].strip()
                 status_msg = f"✅ 备用解析: 目标文字='{target_text}', 缩放因子={scale_factor}"
         except Exception as e:
@@ -224,9 +271,9 @@ with gr.Blocks(css=css, title="智能文字缩放工具") as demo:
                     # Advanced settings
                     with gr.Accordion("⚙️ 高级设置", open=False):
                         use_ai_parsing = gr.Checkbox(
-                            label="🤖 使用AI解析 (需要OpenAI API密钥)",
-                            value=False,
-                            info="使用GPT模型理解自然语言指令"
                         )
                         api_key = gr.Textbox(

     create_output_filename
 )
+# Initialize the text resizer with GPU support (English only)
+text_resizer = TextResizer(languages=['en'], gpu=True)
+def find_target_text_in_prompt(user_prompt, ocr_results):
+    """
+    从用户prompt中智能查找目标文字
+    Args:
+        user_prompt: 用户输入的指令
+        ocr_results: OCR识别结果列表
+    Returns:
+        找到的目标文字，如果没找到则返回None
+    """
+    import re
+    # 提取所有OCR识别的文字
+    ocr_texts = [text.strip() for _, text, _ in ocr_results]
+    # 1. 首先查找被引号包围的文字 (单引号或双引号)
+    quoted_matches = re.findall(r'["\']([^"\']+)["\']', user_prompt)
+    for quoted_text in quoted_matches:
+        # 在OCR结果中查找完全匹配或部分匹配
+        for ocr_text in ocr_texts:
+            if quoted_text.lower() == ocr_text.lower():
+                return ocr_text
+            if quoted_text.lower() in ocr_text.lower() or ocr_text.lower() in quoted_text.lower():
+                return ocr_text
+    # 2. 如果没有引号，尝试查找prompt中包含的OCR文字
+    user_prompt_lower = user_prompt.lower()
+    for ocr_text in ocr_texts:
+        if ocr_text.lower() in user_prompt_lower:
+            return ocr_text
+    # 3. 尝试查找部分匹配的单词
+    prompt_words = re.findall(r'\b\w+\b', user_prompt_lower)
+    for word in prompt_words:
+        if len(word) > 2:  # 忽略太短的单词
+            for ocr_text in ocr_texts:
+                if word in ocr_text.lower():
+                    return ocr_text
+    return None
 @spaces.GPU
 def process_image(input_image, user_prompt, use_ai_parsing=True, api_key=None):
                 if scale_factor == 1.0:
                     return None, "❌ 错误: 无法从用户指令中解析出缩放信息"
+                # Try to find target text from user prompt
+                target_text = find_target_text_in_prompt(user_prompt, ocr_results)
+                if not target_text:
+                    # If no specific text found in prompt, ask user to specify
+                    available_texts = [text.strip() for _, text, _ in ocr_results]
+                    return None, f"❌ 错误: 无法确定要调整的文字。请在指令中明确指定文字，如 'enlarge \"具体文字\" by 50%'\n\n📝 可用的文字: {available_texts}"
                 status_msg = f"✅ 备用解析: 目标文字='{target_text}', 缩放因子={scale_factor}"
         except Exception as e:
                     # Advanced settings
                     with gr.Accordion("⚙️ 高级设置", open=False):
                         use_ai_parsing = gr.Checkbox(
+                            label="🤖 使用AI解析 (推荐，需要OpenAI API密钥)",
+                            value=True,
+                            info="使用GPT-4.1-nano模型智能理解自然语言指令"
                         )
                         api_key = gr.Textbox(

prompt_handler.py CHANGED Viewed

@@ -5,7 +5,7 @@ from utils import format_ocr_results_for_prompt, robust_parse_reply
 class PromptHandler:
-    def __init__(self, api_key: str = None, model: str = "gpt-4o-mini"):
         """
         初始化Prompt处理器

 class PromptHandler:
+    def __init__(self, api_key: str = None, model: str = "gpt-4.1-nano"):
         """
         初始化Prompt处理器