CassianK's picture
Update app.py
5c7e360 verified
raw
history blame
5.44 kB
# app.py โ€” DeepSeek-OCR (HF Space, Gradio-only stable)
# - Gradio UI ์ œ๊ณต (Claude Skill์€ Gradio /run/predict API๋กœ ํ˜ธ์ถœ)
# - deepseek_ocr.py ๋˜๋Š” run_dpsk_ocr_image.py๋ฅผ ํŒŒ์ผ๊ฒฝ๋กœ๋กœ ์ง์ ‘ ๋กœ๋“œ
import io, os, sys, base64, importlib.util, tempfile, traceback
from typing import Optional
from PIL import Image
import numpy as np
import gradio as gr
ROOT = os.path.dirname(__file__)
# ํ›„๋ณด ๋””๋ ‰ํ„ฐ๋ฆฌ: ๋ฃจํŠธ/DeepSeek-OCR-master, DeepSeek-OCR-main/DeepSeek-OCR-master, DeepSeek-OCR-hf ๋“ฑ
DIR_CANDIDATES = [
"DeepSeek-OCR-master",
os.path.join("DeepSeek-OCR-main", "DeepSeek-OCR-master"),
"DeepSeek-OCR-hf",
os.path.join("DeepSeek-OCR-main", "DeepSeek-OCR-hf"),
]
FILE_CANDIDATES = [
"deepseek_ocr.py", # ํ•จ์ˆ˜ํ˜• ๋˜๋Š” ํด๋ž˜์Šคํ˜• ์—”ํŠธ๋ฆฌ ๊ธฐ๋Œ€
"run_dpsk_ocr_image.py", # CLI ์Šคํƒ€์ผ ์—”ํŠธ๋ฆฌ ๊ฐ€๋Šฅ
"run_dpsk_ocr.py", # HF ์Šคํฌ๋ฆฝํŠธ
]
def _find_file():
for d in DIR_CANDIDATES:
absd = os.path.join(ROOT, d)
if not os.path.isdir(absd):
continue
for fname in FILE_CANDIDATES:
path = os.path.join(absd, fname)
if os.path.isfile(path):
return path
return None
def _load_module_from_path(path: str):
name = os.path.splitext(os.path.basename(path))[0]
spec = importlib.util.spec_from_file_location(name, path)
if spec is None or spec.loader is None:
raise ImportError(f"Cannot load module from {path}")
mod = importlib.util.module_from_spec(spec)
sys.modules[name] = mod
spec.loader.exec_module(mod)
return mod
class OCRAdapter:
def __init__(self):
self.entry = None
self.mode = "demo"
self.path = _find_file()
print(f"[Adapter] candidate path: {self.path}")
if not self.path:
return
try:
mod = _load_module_from_path(self.path)
# 1) ํ•จ์ˆ˜ํ˜• ์—”ํŠธ๋ฆฌ: ocr_image(image, lang="auto")
if hasattr(mod, "ocr_image"):
self.entry = lambda img, lang="auto": mod.ocr_image(img, lang=lang)
self.mode = "func_ocr_image"
print("[Adapter] using ocr_image(image, lang)")
return
# 2) ํด๋ž˜์Šคํ˜• ์—”ํŠธ๋ฆฌ: DeepSeekOCR().recognize(image, lang)
if hasattr(mod, "DeepSeekOCR"):
inst = mod.DeepSeekOCR()
if hasattr(inst, "recognize"):
self.entry = lambda img, lang="auto": inst.recognize(img, lang=lang)
self.mode = "class_recognize"
print("[Adapter] using DeepSeekOCR().recognize(image, lang)")
return
# 3) ์Šคํฌ๋ฆฝํŠธ/CLIํ˜•: run() / infer() / main() โ€” ๊ฒฝ๋กœ ์š”๊ตฌ ๊ฐ€๋Šฅ
for cand in ("run", "infer", "main", "predict"):
if hasattr(mod, cand):
fn = getattr(mod, cand)
def _call(img, lang="auto", _fn=fn):
# ์ด๋ฏธ์ง€๊ฐ€ ํŒŒ์ผ๊ฒฝ๋กœ๋ฅผ ์š”๊ตฌํ•  ์ˆ˜ ์žˆ์œผ๋ฏ€๋กœ ์ž„์‹œ ์ €์žฅ
with tempfile.NamedTemporaryFile(suffix=".png", delete=True) as tmp:
img.save(tmp.name)
try:
return str(_fn(tmp.name))
except TypeError:
# ํ˜น์‹œ lang ๋“ฑ ๋‹ค๋ฅธ ์ธ์ž ๊ตฌ์กฐ์ผ ๊ฒฝ์šฐ ์‹œ๋„
return str(_fn(tmp.name, lang=lang))
self.entry = _call
self.mode = f"script_{cand}"
print(f"[Adapter] using {os.path.basename(self.path)}.{cand}(...) via temp file")
return
except Exception as e:
print("[Adapter] load failed:", e)
print(traceback.format_exc())
# fallback
self.entry = lambda img, lang="auto": "[DEMO] ์—ฐ๊ฒฐ ์„ฑ๊ณต โ€” ์‹ค์ œ ์ถ”๋ก  ํ•จ์ˆ˜ ํ™•์ธ ํ•„์š”."
self.mode = "demo"
def recognize(self, image: Image.Image, lang="auto") -> str:
return self.entry(image.convert("RGB"), lang)
ADAPTER = OCRAdapter()
def _to_pil(x) -> Image.Image:
if isinstance(x, Image.Image):
return x.convert("RGB")
if isinstance(x, (bytes, bytearray)):
return Image.open(io.BytesIO(x)).convert("RGB")
if isinstance(x, np.ndarray):
return Image.fromarray(x).convert("RGB")
raise TypeError("Unsupported image type")
def _b64_to_image(image_b64: str) -> Image.Image:
import base64
return _to_pil(base64.b64decode(image_b64))
# โ”€โ”€ Gradio UI (Claude Skill์€ /run/predict API ์‚ฌ์šฉ) โ”€โ”€
def gradio_predict(image, lang):
if image is None:
return "No image provided."
return ADAPTER.recognize(_to_pil(image), lang)
with gr.Blocks(title="DeepSeek-OCR (HF Gradio)") as demo:
gr.Markdown("### DeepSeek-OCR (HF Space, Gradio)\nํ˜„์žฌ ๋ชจ๋“œ: **" + ADAPTER.mode + "** \n๊ฒฝ๋กœ: " + str(ADAPTER.path))
with gr.Row():
img = gr.Image(type="pil", label="Input Image")
out = gr.Textbox(label="OCR Result", lines=8)
lang = gr.Radio(["auto","en","ko","ja","zh"], value="auto", label="Language")
btn = gr.Button("Run OCR")
btn.click(gradio_predict, inputs=[img, lang], outputs=[out])
# Hugging Face (sdk: gradio)๋Š” ์ „์—ญ ๋ณ€์ˆ˜ `demo`๋ฅผ ์ž๋™ ์‹คํ–‰ํ•ฉ๋‹ˆ๋‹ค.
# demo.queue() # ํ•„์š”์‹œ ์‚ฌ์šฉ (๋ฒ„์ „๋ณ„ ์ธ์ž ์—†์ด)