File size: 5,440 Bytes
5c7e360
 
 
9889d2d
5c7e360
b94ccd9
9889d2d
 
 
 
 
5c7e360
 
 
b94ccd9
 
5c7e360
b94ccd9
 
9889d2d
5c7e360
 
 
 
 
9889d2d
5c7e360
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9889d2d
5c7e360
 
 
 
 
 
b94ccd9
5c7e360
 
 
 
 
 
 
 
 
 
 
 
b94ccd9
5c7e360
 
b94ccd9
5c7e360
 
 
 
 
 
 
 
9889d2d
 
5c7e360
 
9889d2d
b94ccd9
5c7e360
 
9889d2d
5c7e360
 
9889d2d
5c7e360
9889d2d
 
 
 
 
 
 
 
 
 
 
5c7e360
b94ccd9
9889d2d
5c7e360
 
 
b94ccd9
5c7e360
9889d2d
5c7e360
 
9889d2d
b94ccd9
9889d2d
 
 
b94ccd9
9889d2d
5c7e360
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
# app.py โ€” DeepSeek-OCR (HF Space, Gradio-only stable)
# - Gradio UI ์ œ๊ณต (Claude Skill์€ Gradio /run/predict API๋กœ ํ˜ธ์ถœ)
# - deepseek_ocr.py ๋˜๋Š” run_dpsk_ocr_image.py๋ฅผ ํŒŒ์ผ๊ฒฝ๋กœ๋กœ ์ง์ ‘ ๋กœ๋“œ

import io, os, sys, base64, importlib.util, tempfile, traceback
from typing import Optional
from PIL import Image
import numpy as np
import gradio as gr

ROOT = os.path.dirname(__file__)

# ํ›„๋ณด ๋””๋ ‰ํ„ฐ๋ฆฌ: ๋ฃจํŠธ/DeepSeek-OCR-master, DeepSeek-OCR-main/DeepSeek-OCR-master, DeepSeek-OCR-hf ๋“ฑ
DIR_CANDIDATES = [
    "DeepSeek-OCR-master",
    os.path.join("DeepSeek-OCR-main", "DeepSeek-OCR-master"),
    "DeepSeek-OCR-hf",
    os.path.join("DeepSeek-OCR-main", "DeepSeek-OCR-hf"),
]

FILE_CANDIDATES = [
    "deepseek_ocr.py",           # ํ•จ์ˆ˜ํ˜• ๋˜๋Š” ํด๋ž˜์Šคํ˜• ์—”ํŠธ๋ฆฌ ๊ธฐ๋Œ€
    "run_dpsk_ocr_image.py",     # CLI ์Šคํƒ€์ผ ์—”ํŠธ๋ฆฌ ๊ฐ€๋Šฅ
    "run_dpsk_ocr.py",           # HF ์Šคํฌ๋ฆฝํŠธ
]

def _find_file():
    for d in DIR_CANDIDATES:
        absd = os.path.join(ROOT, d)
        if not os.path.isdir(absd):
            continue
        for fname in FILE_CANDIDATES:
            path = os.path.join(absd, fname)
            if os.path.isfile(path):
                return path
    return None

def _load_module_from_path(path: str):
    name = os.path.splitext(os.path.basename(path))[0]
    spec = importlib.util.spec_from_file_location(name, path)
    if spec is None or spec.loader is None:
        raise ImportError(f"Cannot load module from {path}")
    mod = importlib.util.module_from_spec(spec)
    sys.modules[name] = mod
    spec.loader.exec_module(mod)
    return mod

class OCRAdapter:
    def __init__(self):
        self.entry = None
        self.mode = "demo"
        self.path = _find_file()
        print(f"[Adapter] candidate path: {self.path}")
        if not self.path:
            return
        try:
            mod = _load_module_from_path(self.path)
            # 1) ํ•จ์ˆ˜ํ˜• ์—”ํŠธ๋ฆฌ: ocr_image(image, lang="auto")
            if hasattr(mod, "ocr_image"):
                self.entry = lambda img, lang="auto": mod.ocr_image(img, lang=lang)
                self.mode = "func_ocr_image"
                print("[Adapter] using ocr_image(image, lang)")
                return
            # 2) ํด๋ž˜์Šคํ˜• ์—”ํŠธ๋ฆฌ: DeepSeekOCR().recognize(image, lang)
            if hasattr(mod, "DeepSeekOCR"):
                inst = mod.DeepSeekOCR()
                if hasattr(inst, "recognize"):
                    self.entry = lambda img, lang="auto": inst.recognize(img, lang=lang)
                    self.mode = "class_recognize"
                    print("[Adapter] using DeepSeekOCR().recognize(image, lang)")
                    return
            # 3) ์Šคํฌ๋ฆฝํŠธ/CLIํ˜•: run() / infer() / main() โ€” ๊ฒฝ๋กœ ์š”๊ตฌ ๊ฐ€๋Šฅ
            for cand in ("run", "infer", "main", "predict"):
                if hasattr(mod, cand):
                    fn = getattr(mod, cand)
                    def _call(img, lang="auto", _fn=fn):
                        # ์ด๋ฏธ์ง€๊ฐ€ ํŒŒ์ผ๊ฒฝ๋กœ๋ฅผ ์š”๊ตฌํ•  ์ˆ˜ ์žˆ์œผ๋ฏ€๋กœ ์ž„์‹œ ์ €์žฅ
                        with tempfile.NamedTemporaryFile(suffix=".png", delete=True) as tmp:
                            img.save(tmp.name)
                            try:
                                return str(_fn(tmp.name))
                            except TypeError:
                                # ํ˜น์‹œ lang ๋“ฑ ๋‹ค๋ฅธ ์ธ์ž ๊ตฌ์กฐ์ผ ๊ฒฝ์šฐ ์‹œ๋„
                                return str(_fn(tmp.name, lang=lang))
                    self.entry = _call
                    self.mode = f"script_{cand}"
                    print(f"[Adapter] using {os.path.basename(self.path)}.{cand}(...) via temp file")
                    return
        except Exception as e:
            print("[Adapter] load failed:", e)
            print(traceback.format_exc())

        # fallback
        self.entry = lambda img, lang="auto": "[DEMO] ์—ฐ๊ฒฐ ์„ฑ๊ณต โ€” ์‹ค์ œ ์ถ”๋ก  ํ•จ์ˆ˜ ํ™•์ธ ํ•„์š”."
        self.mode = "demo"

    def recognize(self, image: Image.Image, lang="auto") -> str:
        return self.entry(image.convert("RGB"), lang)

ADAPTER = OCRAdapter()

def _to_pil(x) -> Image.Image:
    if isinstance(x, Image.Image):
        return x.convert("RGB")
    if isinstance(x, (bytes, bytearray)):
        return Image.open(io.BytesIO(x)).convert("RGB")
    if isinstance(x, np.ndarray):
        return Image.fromarray(x).convert("RGB")
    raise TypeError("Unsupported image type")

def _b64_to_image(image_b64: str) -> Image.Image:
    import base64
    return _to_pil(base64.b64decode(image_b64))

# โ”€โ”€ Gradio UI (Claude Skill์€ /run/predict API ์‚ฌ์šฉ) โ”€โ”€
def gradio_predict(image, lang):
    if image is None:
        return "No image provided."
    return ADAPTER.recognize(_to_pil(image), lang)

with gr.Blocks(title="DeepSeek-OCR (HF Gradio)") as demo:
    gr.Markdown("### DeepSeek-OCR (HF Space, Gradio)\nํ˜„์žฌ ๋ชจ๋“œ: **" + ADAPTER.mode + "**  \n๊ฒฝ๋กœ: " + str(ADAPTER.path))
    with gr.Row():
        img = gr.Image(type="pil", label="Input Image")
        out = gr.Textbox(label="OCR Result", lines=8)
    lang = gr.Radio(["auto","en","ko","ja","zh"], value="auto", label="Language")
    btn = gr.Button("Run OCR")
    btn.click(gradio_predict, inputs=[img, lang], outputs=[out])

# Hugging Face (sdk: gradio)๋Š” ์ „์—ญ ๋ณ€์ˆ˜ `demo`๋ฅผ ์ž๋™ ์‹คํ–‰ํ•ฉ๋‹ˆ๋‹ค.
# demo.queue()  # ํ•„์š”์‹œ ์‚ฌ์šฉ (๋ฒ„์ „๋ณ„ ์ธ์ž ์—†์ด)