Spaces:
Sleeping
Sleeping
🔧 Fix major issues in text processing
Browse files1. 🌍 OCR: Switch to English-only recognition
2. 🤖 AI: Use gpt-4.1-nano model instead of gpt-4o-mini
3. 🎯 Smart Text Selection: Fix fallback parsing that always picked first result
- Add intelligent text matching from user prompts
- Support quoted text detection ('text' or "text")
- Add fuzzy matching for better text selection
- Provide clear error messages when text not found
4. 🔄 UI: Default enable AI parsing for better user experience
- app.py +54 -7
- prompt_handler.py +1 -1
app.py
CHANGED
|
@@ -16,8 +16,50 @@ from utils import (
|
|
| 16 |
create_output_filename
|
| 17 |
)
|
| 18 |
|
| 19 |
-
# Initialize the text resizer with GPU support
|
| 20 |
-
text_resizer = TextResizer(languages=['en'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
|
| 22 |
@spaces.GPU
|
| 23 |
def process_image(input_image, user_prompt, use_ai_parsing=True, api_key=None):
|
|
@@ -57,8 +99,13 @@ def process_image(input_image, user_prompt, use_ai_parsing=True, api_key=None):
|
|
| 57 |
if scale_factor == 1.0:
|
| 58 |
return None, "❌ 错误: 无法从用户指令中解析出缩放信息"
|
| 59 |
|
| 60 |
-
#
|
| 61 |
-
target_text = ocr_results
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
status_msg = f"✅ 备用解析: 目标文字='{target_text}', 缩放因子={scale_factor}"
|
| 63 |
|
| 64 |
except Exception as e:
|
|
@@ -224,9 +271,9 @@ with gr.Blocks(css=css, title="智能文字缩放工具") as demo:
|
|
| 224 |
# Advanced settings
|
| 225 |
with gr.Accordion("⚙️ 高级设置", open=False):
|
| 226 |
use_ai_parsing = gr.Checkbox(
|
| 227 |
-
label="🤖 使用AI解析 (
|
| 228 |
-
value=
|
| 229 |
-
info="使用GPT
|
| 230 |
)
|
| 231 |
|
| 232 |
api_key = gr.Textbox(
|
|
|
|
| 16 |
create_output_filename
|
| 17 |
)
|
| 18 |
|
| 19 |
+
# Initialize the text resizer with GPU support (English only)
|
| 20 |
+
text_resizer = TextResizer(languages=['en'], gpu=True)
|
| 21 |
+
|
| 22 |
+
def find_target_text_in_prompt(user_prompt, ocr_results):
|
| 23 |
+
"""
|
| 24 |
+
从用户prompt中智能查找目标文字
|
| 25 |
+
|
| 26 |
+
Args:
|
| 27 |
+
user_prompt: 用户输入的指令
|
| 28 |
+
ocr_results: OCR识别结果列表
|
| 29 |
+
|
| 30 |
+
Returns:
|
| 31 |
+
找到的目标文字,如果没找到则返回None
|
| 32 |
+
"""
|
| 33 |
+
import re
|
| 34 |
+
|
| 35 |
+
# 提取所有OCR识别的文字
|
| 36 |
+
ocr_texts = [text.strip() for _, text, _ in ocr_results]
|
| 37 |
+
|
| 38 |
+
# 1. 首先查找被引号包围的文字 (单引号或双引号)
|
| 39 |
+
quoted_matches = re.findall(r'["\']([^"\']+)["\']', user_prompt)
|
| 40 |
+
for quoted_text in quoted_matches:
|
| 41 |
+
# 在OCR结果中查找完全匹配或部分匹配
|
| 42 |
+
for ocr_text in ocr_texts:
|
| 43 |
+
if quoted_text.lower() == ocr_text.lower():
|
| 44 |
+
return ocr_text
|
| 45 |
+
if quoted_text.lower() in ocr_text.lower() or ocr_text.lower() in quoted_text.lower():
|
| 46 |
+
return ocr_text
|
| 47 |
+
|
| 48 |
+
# 2. 如果没有引号,尝试查找prompt中包含的OCR文字
|
| 49 |
+
user_prompt_lower = user_prompt.lower()
|
| 50 |
+
for ocr_text in ocr_texts:
|
| 51 |
+
if ocr_text.lower() in user_prompt_lower:
|
| 52 |
+
return ocr_text
|
| 53 |
+
|
| 54 |
+
# 3. 尝试查找部分匹配的单词
|
| 55 |
+
prompt_words = re.findall(r'\b\w+\b', user_prompt_lower)
|
| 56 |
+
for word in prompt_words:
|
| 57 |
+
if len(word) > 2: # 忽略太短的单词
|
| 58 |
+
for ocr_text in ocr_texts:
|
| 59 |
+
if word in ocr_text.lower():
|
| 60 |
+
return ocr_text
|
| 61 |
+
|
| 62 |
+
return None
|
| 63 |
|
| 64 |
@spaces.GPU
|
| 65 |
def process_image(input_image, user_prompt, use_ai_parsing=True, api_key=None):
|
|
|
|
| 99 |
if scale_factor == 1.0:
|
| 100 |
return None, "❌ 错误: 无法从用户指令中解析出缩放信息"
|
| 101 |
|
| 102 |
+
# Try to find target text from user prompt
|
| 103 |
+
target_text = find_target_text_in_prompt(user_prompt, ocr_results)
|
| 104 |
+
if not target_text:
|
| 105 |
+
# If no specific text found in prompt, ask user to specify
|
| 106 |
+
available_texts = [text.strip() for _, text, _ in ocr_results]
|
| 107 |
+
return None, f"❌ 错误: 无法确定要调整的文字。请在指令中明确指定文字,如 'enlarge \"具体文字\" by 50%'\n\n📝 可用的文字: {available_texts}"
|
| 108 |
+
|
| 109 |
status_msg = f"✅ 备用解析: 目标文字='{target_text}', 缩放因子={scale_factor}"
|
| 110 |
|
| 111 |
except Exception as e:
|
|
|
|
| 271 |
# Advanced settings
|
| 272 |
with gr.Accordion("⚙️ 高级设置", open=False):
|
| 273 |
use_ai_parsing = gr.Checkbox(
|
| 274 |
+
label="🤖 使用AI解析 (推荐,需要OpenAI API密钥)",
|
| 275 |
+
value=True,
|
| 276 |
+
info="使用GPT-4.1-nano模型智能理解自然语言指令"
|
| 277 |
)
|
| 278 |
|
| 279 |
api_key = gr.Textbox(
|
prompt_handler.py
CHANGED
|
@@ -5,7 +5,7 @@ from utils import format_ocr_results_for_prompt, robust_parse_reply
|
|
| 5 |
|
| 6 |
|
| 7 |
class PromptHandler:
|
| 8 |
-
def __init__(self, api_key: str = None, model: str = "gpt-
|
| 9 |
"""
|
| 10 |
初始化Prompt处理器
|
| 11 |
|
|
|
|
| 5 |
|
| 6 |
|
| 7 |
class PromptHandler:
|
| 8 |
+
def __init__(self, api_key: str = None, model: str = "gpt-4.1-nano"):
|
| 9 |
"""
|
| 10 |
初始化Prompt处理器
|
| 11 |
|