Spaces:
Running
Running
File size: 8,819 Bytes
9889d2d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 |
# app.py β DeepSeek-OCR (HF Space, Claude Skill-ready)
# - /ocr : REST API (POST) β file / image_b64 / image_url μ§μ
# - Gradio UI : λΈλΌμ°μ μμ μ
λ‘λ ν
μ€νΈ
# ν΄λ ꡬ쑰 μ μ :
# /app.py
# /DeepSeek-OCR-master/ (repo κ·Έλλ‘)
# /requirements.txt
import io
import os
import sys
import base64
import traceback
from typing import Optional
from PIL import Image
import numpy as np
import gradio as gr
from fastapi import FastAPI, UploadFile, File, Body
from fastapi.responses import JSONResponse
# βββββββββββββββββββββββββββββββββββββββββββββ
# 0) Repo κ²½λ‘ μΆκ°
# βββββββββββββββββββββββββββββββββββββββββββββ
ROOT = os.path.dirname(__file__)
DEEPSEEK_ROOT = os.path.join(ROOT, "DeepSeek-OCR-master")
if DEEPSEEK_ROOT not in sys.path:
sys.path.append(DEEPSEEK_ROOT)
# βββββββββββββββββββββββββββββββββββββββββββββ
# 1) DeepSeek-OCR μ΄λν°
# - μ μ₯μκ° μ 곡νλ μ€μ μ§μ
μ μ΄λ¦μ΄ λ€λ₯Ό μ μμ΄
# μ¬λ¬ ν¨ν΄μ μλνλλ‘ κ΅¬μ±νμ΅λλ€.
# - νμ μ μλ "TODO" λΆλΆμ μ€μ ν¨μλͺ
μΌλ‘ λ°κΎΈμΈμ.
# βββββββββββββββββββββββββββββββββββββββββββββ
class DeepSeekOCRAdapter:
def __init__(self):
"""
κ°λ₯ν μνΈλ¦¬ μλ리μ€:
A) deeps eek_ocr.py λ΄λΆμ ν΄λμ€/ν¨μ μ 곡
- class DeepSeekOCR β .recognize(Image) λ°ν
- def ocr_image(Image, lang="auto") λ°ν
B) run_dpsk_ocr_image.py λ΄λΆμ ν¨μ μ 곡
- def infer(Image) λλ def run(Image, ...) λ±
"""
self.backend = None
self.fn = None # callable(image, lang='auto') -> str
# A-1) class DeepSeekOCR μλ
try:
import deeps eek_ocr as dso # DeepSeek-OCR-master/deeps eek_ocr.py
if hasattr(dso, "DeepSeekOCR"):
self.backend = dso.DeepSeekOCR()
def _call(image: Image.Image, lang="auto"):
# ν΄λμ€κ° recognize(image, lang) 보μ νλ€κ³ κ°μ
if hasattr(self.backend, "recognize"):
return self.backend.recognize(image, lang=lang)
# νΉμ run/image_to_text λ±μ μ΄λ¦μΌ μ μμ
for cand in ("run", "infer", "image_to_text", "predict"):
if hasattr(self.backend, cand):
return getattr(self.backend, cand)(image)
raise AttributeError("DeepSeekOCR class found but no callable method.")
self.fn = _call
print("[DeepSeekOCRAdapter] Using deeps eek_ocr.DeepSeekOCR")
return
except Exception as e:
print("[DeepSeekOCRAdapter] A-1 fallback:", e)
# A-2) ν¨μν ocr_image μλ
try:
import deeps eek_ocr as dso
if hasattr(dso, "ocr_image"):
def _call(image: Image.Image, lang="auto"):
return dso.ocr_image(image, lang=lang) # TODO: νμ μ μΈμλͺ
λ§μΆκΈ°
self.fn = _call
print("[DeepSeekOCRAdapter] Using deeps eek_ocr.ocr_image")
return
except Exception as e:
print("[DeepSeekOCRAdapter] A-2 fallback:", e)
# B) run_dpsk_ocr_image.py μ€ν¬λ¦½νΈν μλ
try:
import run_dpsk_ocr_image as runner
for cand in ("infer", "run", "predict", "main"):
if hasattr(runner, cand) and callable(getattr(runner, cand)):
def _call(image: Image.Image, lang="auto", _fn=getattr(runner, cand)):
# NOTE: ν΄λΉ ν¨μκ° PIL.Imageκ° μλ νμΌκ²½λ‘λ₯Ό μꡬν μ μμ΅λλ€.
# κ·Έλ° κ²½μ° μμ νμΌλ‘ μ μ₯ν΄ λκΉλλ€.
try:
return _fn(image) # PIL.Image μ§μ λ°λ μΌμ΄μ€
except Exception:
import tempfile
with tempfile.NamedTemporaryFile(suffix=".png", delete=True) as tmp:
image.save(tmp.name)
# κ°μ₯ νν CLI μ€νμΌ: (path)λ§ or (path, config)
try:
return _fn(tmp.name)
except Exception:
# λ°νμ΄ dict/text λ± λ¬΄μμ΄λ strλ‘ μΊμ€ν
return str(_fn(tmp.name))
self.fn = _call
print(f"[DeepSeekOCRAdapter] Using run_dpsk_ocr_image.{cand}")
return
except Exception as e:
print("[DeepSeekOCRAdapter] B fallback:", e)
# λ§μ§λ§ μμ μ₯μΉ: λ°λͺ¨
print("[DeepSeekOCRAdapter] No concrete entry found. Falling back to DEMO.")
def _demo(image: Image.Image, lang="auto"):
return "[DEMO] μ°κ²° μλ£ β μ€μ ν¨μλͺ
μ app.pyμμ ν μ€λ§ λ°κΏμ£ΌμΈμ."
self.fn = _demo
def recognize(self, image: Image.Image, lang: str = "auto") -> str:
return self.fn(image, lang=lang)
# βββββββββββββββββββββββββββββββββββββββββββββ
# 2) μ νΈ
# βββββββββββββββββββββββββββββββββββββββββββββ
def _to_pil(x) -> Image.Image:
if isinstance(x, Image.Image):
return x.convert("RGB")
if isinstance(x, (bytes, bytearray)):
return Image.open(io.BytesIO(x)).convert("RGB")
if isinstance(x, np.ndarray):
return Image.fromarray(x).convert("RGB")
raise TypeError("Unsupported image type")
def _b64_to_image(image_b64: str) -> Image.Image:
raw = base64.b64decode(image_b64)
return _to_pil(raw)
def _url_to_image(url: str) -> Image.Image:
import requests
r = requests.get(url, timeout=20)
r.raise_for_status()
return _to_pil(r.content)
# βββββββββββββββββββββββββββββββββββββββββββββ
# 3) FastAPI (REST)
# βββββββββββββββββββββββββββββββββββββββββββββ
api = FastAPI(title="DeepSeek-OCR API")
_engine = DeepSeekOCRAdapter()
@api.post("/ocr")
async def ocr(
image_b64: Optional[str] = Body(default=None),
image_url: Optional[str] = Body(default=None),
lang: str = Body(default="auto"),
file: Optional[UploadFile] = File(default=None),
):
try:
if file is not None:
image = _to_pil(await file.read())
elif image_b64:
image = _b64_to_image(image_b64)
elif image_url:
image = _url_to_image(image_url)
else:
return JSONResponse(status_code=400, content={
"ok": False, "error": "Provide one of: file | image_b64 | image_url"
})
text = _engine.recognize(image, lang=lang)
return {"ok": True, "text": text}
except Exception as e:
return JSONResponse(status_code=500, content={
"ok": False, "error": str(e), "trace": traceback.format_exc()
})
# βββββββββββββββββββββββββββββββββββββββββββββ
# 4) Gradio UI (ν
μ€νΈ)
# βββββββββββββββββββββββββββββββββββββββββββββ
def _predict(image, lang):
if image is None:
return "No image."
pil = _to_pil(image)
return _engine.recognize(pil, lang=lang)
with gr.Blocks(title="DeepSeek-OCR (Claude-ready)") as demo:
gr.Markdown("### DeepSeek-OCR (HF Space)\nμ΄λ―Έμ§λ₯Ό μ
λ‘λνλ©΄ ν
μ€νΈλ₯Ό μΆμΆν©λλ€.")
with gr.Row():
img = gr.Image(type="pil", label="Input image")
out = gr.Textbox(label="OCR Result", lines=8)
lang = gr.Radio(["auto","en","ko","ja","zh"], value="auto", label="Language")
btn = gr.Button("Run OCR")
btn.click(_predict, inputs=[img, lang], outputs=[out])
# HF Spacesλ λ³΄ν΅ Gradio μ±μ κΈ°λ³Έ μνΈλ¦¬λ‘ λμ°μ§λ§,
# FastAPI μλν¬μΈνΈλ ν¨κ» λ
ΈμΆνλ €λ©΄ μλμ²λΌ aliasλ₯Ό λ‘λλ€.
app = api
demo.queue(concurrency_count=1) |