Spaces:
Running
Running
| # app.py β DeepSeek-OCR (HF Space, Claude Skill-ready) | |
| # - /ocr : REST API (POST) β file / image_b64 / image_url μ§μ | |
| # - Gradio UI : λΈλΌμ°μ μμ μ λ‘λ ν μ€νΈ | |
| # ν΄λ ꡬ쑰 μ μ : | |
| # /app.py | |
| # /DeepSeek-OCR-master/ (repo κ·Έλλ‘) | |
| # /requirements.txt | |
| import io | |
| import os | |
| import sys | |
| import base64 | |
| import traceback | |
| from typing import Optional | |
| from PIL import Image | |
| import numpy as np | |
| import gradio as gr | |
| from fastapi import FastAPI, UploadFile, File, Body | |
| from fastapi.responses import JSONResponse | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| # 0) Repo κ²½λ‘ μΆκ° | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| ROOT = os.path.dirname(__file__) | |
| DEEPSEEK_ROOT = os.path.join(ROOT, "DeepSeek-OCR-master") | |
| if DEEPSEEK_ROOT not in sys.path: | |
| sys.path.append(DEEPSEEK_ROOT) | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| # 1) DeepSeek-OCR μ΄λν° | |
| # - μ μ₯μκ° μ 곡νλ μ€μ μ§μ μ μ΄λ¦μ΄ λ€λ₯Ό μ μμ΄ | |
| # μ¬λ¬ ν¨ν΄μ μλνλλ‘ κ΅¬μ±νμ΅λλ€. | |
| # - νμ μ μλ "TODO" λΆλΆμ μ€μ ν¨μλͺ μΌλ‘ λ°κΎΈμΈμ. | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| class DeepSeekOCRAdapter: | |
| def __init__(self): | |
| """ | |
| κ°λ₯ν μνΈλ¦¬ μλ리μ€: | |
| A) deeps eek_ocr.py λ΄λΆμ ν΄λμ€/ν¨μ μ 곡 | |
| - class DeepSeekOCR β .recognize(Image) λ°ν | |
| - def ocr_image(Image, lang="auto") λ°ν | |
| B) run_dpsk_ocr_image.py λ΄λΆμ ν¨μ μ 곡 | |
| - def infer(Image) λλ def run(Image, ...) λ± | |
| """ | |
| self.backend = None | |
| self.fn = None # callable(image, lang='auto') -> str | |
| # A-1) class DeepSeekOCR μλ | |
| try: | |
| import deeps eek_ocr as dso # DeepSeek-OCR-master/deeps eek_ocr.py | |
| if hasattr(dso, "DeepSeekOCR"): | |
| self.backend = dso.DeepSeekOCR() | |
| def _call(image: Image.Image, lang="auto"): | |
| # ν΄λμ€κ° recognize(image, lang) 보μ νλ€κ³ κ°μ | |
| if hasattr(self.backend, "recognize"): | |
| return self.backend.recognize(image, lang=lang) | |
| # νΉμ run/image_to_text λ±μ μ΄λ¦μΌ μ μμ | |
| for cand in ("run", "infer", "image_to_text", "predict"): | |
| if hasattr(self.backend, cand): | |
| return getattr(self.backend, cand)(image) | |
| raise AttributeError("DeepSeekOCR class found but no callable method.") | |
| self.fn = _call | |
| print("[DeepSeekOCRAdapter] Using deeps eek_ocr.DeepSeekOCR") | |
| return | |
| except Exception as e: | |
| print("[DeepSeekOCRAdapter] A-1 fallback:", e) | |
| # A-2) ν¨μν ocr_image μλ | |
| try: | |
| import deeps eek_ocr as dso | |
| if hasattr(dso, "ocr_image"): | |
| def _call(image: Image.Image, lang="auto"): | |
| return dso.ocr_image(image, lang=lang) # TODO: νμ μ μΈμλͺ λ§μΆκΈ° | |
| self.fn = _call | |
| print("[DeepSeekOCRAdapter] Using deeps eek_ocr.ocr_image") | |
| return | |
| except Exception as e: | |
| print("[DeepSeekOCRAdapter] A-2 fallback:", e) | |
| # B) run_dpsk_ocr_image.py μ€ν¬λ¦½νΈν μλ | |
| try: | |
| import run_dpsk_ocr_image as runner | |
| for cand in ("infer", "run", "predict", "main"): | |
| if hasattr(runner, cand) and callable(getattr(runner, cand)): | |
| def _call(image: Image.Image, lang="auto", _fn=getattr(runner, cand)): | |
| # NOTE: ν΄λΉ ν¨μκ° PIL.Imageκ° μλ νμΌκ²½λ‘λ₯Ό μꡬν μ μμ΅λλ€. | |
| # κ·Έλ° κ²½μ° μμ νμΌλ‘ μ μ₯ν΄ λκΉλλ€. | |
| try: | |
| return _fn(image) # PIL.Image μ§μ λ°λ μΌμ΄μ€ | |
| except Exception: | |
| import tempfile | |
| with tempfile.NamedTemporaryFile(suffix=".png", delete=True) as tmp: | |
| image.save(tmp.name) | |
| # κ°μ₯ νν CLI μ€νμΌ: (path)λ§ or (path, config) | |
| try: | |
| return _fn(tmp.name) | |
| except Exception: | |
| # λ°νμ΄ dict/text λ± λ¬΄μμ΄λ strλ‘ μΊμ€ν | |
| return str(_fn(tmp.name)) | |
| self.fn = _call | |
| print(f"[DeepSeekOCRAdapter] Using run_dpsk_ocr_image.{cand}") | |
| return | |
| except Exception as e: | |
| print("[DeepSeekOCRAdapter] B fallback:", e) | |
| # λ§μ§λ§ μμ μ₯μΉ: λ°λͺ¨ | |
| print("[DeepSeekOCRAdapter] No concrete entry found. Falling back to DEMO.") | |
| def _demo(image: Image.Image, lang="auto"): | |
| return "[DEMO] μ°κ²° μλ£ β μ€μ ν¨μλͺ μ app.pyμμ ν μ€λ§ λ°κΏμ£ΌμΈμ." | |
| self.fn = _demo | |
| def recognize(self, image: Image.Image, lang: str = "auto") -> str: | |
| return self.fn(image, lang=lang) | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| # 2) μ νΈ | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| def _to_pil(x) -> Image.Image: | |
| if isinstance(x, Image.Image): | |
| return x.convert("RGB") | |
| if isinstance(x, (bytes, bytearray)): | |
| return Image.open(io.BytesIO(x)).convert("RGB") | |
| if isinstance(x, np.ndarray): | |
| return Image.fromarray(x).convert("RGB") | |
| raise TypeError("Unsupported image type") | |
| def _b64_to_image(image_b64: str) -> Image.Image: | |
| raw = base64.b64decode(image_b64) | |
| return _to_pil(raw) | |
| def _url_to_image(url: str) -> Image.Image: | |
| import requests | |
| r = requests.get(url, timeout=20) | |
| r.raise_for_status() | |
| return _to_pil(r.content) | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| # 3) FastAPI (REST) | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| api = FastAPI(title="DeepSeek-OCR API") | |
| _engine = DeepSeekOCRAdapter() | |
| async def ocr( | |
| image_b64: Optional[str] = Body(default=None), | |
| image_url: Optional[str] = Body(default=None), | |
| lang: str = Body(default="auto"), | |
| file: Optional[UploadFile] = File(default=None), | |
| ): | |
| try: | |
| if file is not None: | |
| image = _to_pil(await file.read()) | |
| elif image_b64: | |
| image = _b64_to_image(image_b64) | |
| elif image_url: | |
| image = _url_to_image(image_url) | |
| else: | |
| return JSONResponse(status_code=400, content={ | |
| "ok": False, "error": "Provide one of: file | image_b64 | image_url" | |
| }) | |
| text = _engine.recognize(image, lang=lang) | |
| return {"ok": True, "text": text} | |
| except Exception as e: | |
| return JSONResponse(status_code=500, content={ | |
| "ok": False, "error": str(e), "trace": traceback.format_exc() | |
| }) | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| # 4) Gradio UI (ν μ€νΈ) | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| def _predict(image, lang): | |
| if image is None: | |
| return "No image." | |
| pil = _to_pil(image) | |
| return _engine.recognize(pil, lang=lang) | |
| with gr.Blocks(title="DeepSeek-OCR (Claude-ready)") as demo: | |
| gr.Markdown("### DeepSeek-OCR (HF Space)\nμ΄λ―Έμ§λ₯Ό μ λ‘λνλ©΄ ν μ€νΈλ₯Ό μΆμΆν©λλ€.") | |
| with gr.Row(): | |
| img = gr.Image(type="pil", label="Input image") | |
| out = gr.Textbox(label="OCR Result", lines=8) | |
| lang = gr.Radio(["auto","en","ko","ja","zh"], value="auto", label="Language") | |
| btn = gr.Button("Run OCR") | |
| btn.click(_predict, inputs=[img, lang], outputs=[out]) | |
| # HF Spacesλ λ³΄ν΅ Gradio μ±μ κΈ°λ³Έ μνΈλ¦¬λ‘ λμ°μ§λ§, | |
| # FastAPI μλν¬μΈνΈλ ν¨κ» λ ΈμΆνλ €λ©΄ μλμ²λΌ aliasλ₯Ό λ‘λλ€. | |
| app = api | |
| demo.queue(concurrency_count=1) |