Spaces:

CassianK
/

deepseek-ocr-test

Running

App Files Files Community

deepseek-ocr-test / app.py

CassianK

Create app.py

9889d2d verified 8 days ago

raw

history blame

8.82 kB

	# app.py — DeepSeek-OCR (HF Space, Claude Skill-ready)
	# - /ocr : REST API (POST) → file / image_b64 / image_url 지원
	# - Gradio UI : 브라우저에서 업로드 테스트
	# 폴더 구조 전제:
	# /app.py
	# /DeepSeek-OCR-master/ (repo 그대로)
	# /requirements.txt

	import io
	import os
	import sys
	import base64
	import traceback
	from typing import Optional

	from PIL import Image
	import numpy as np

	import gradio as gr
	from fastapi import FastAPI, UploadFile, File, Body
	from fastapi.responses import JSONResponse

	# ─────────────────────────────────────────────
	# 0) Repo 경로 추가
	# ─────────────────────────────────────────────
	ROOT = os.path.dirname(__file__)
	DEEPSEEK_ROOT = os.path.join(ROOT, "DeepSeek-OCR-master")
	if DEEPSEEK_ROOT not in sys.path:
	sys.path.append(DEEPSEEK_ROOT)

	# ─────────────────────────────────────────────
	# 1) DeepSeek-OCR 어댑터
	# - 저장소가 제공하는 실제 진입점 이름이 다를 수 있어
	# 여러 패턴을 시도하도록 구성했습니다.
	# - 필요 시 아래 "TODO" 부분을 실제 함수명으로 바꾸세요.
	# ─────────────────────────────────────────────
	class DeepSeekOCRAdapter:
	def __init__(self):
	"""
	가능한 엔트리 시나리오:
	A) deeps eek_ocr.py 내부에 클래스/함수 제공
	- class DeepSeekOCR → .recognize(Image) 반환
	- def ocr_image(Image, lang="auto") 반환
	B) run_dpsk_ocr_image.py 내부에 함수 제공
	- def infer(Image) 또는 def run(Image, ...) 등
	"""
	self.backend = None
	self.fn = None # callable(image, lang='auto') -> str

	# A-1) class DeepSeekOCR 시도
	try:
	import deeps eek_ocr as dso # DeepSeek-OCR-master/deeps eek_ocr.py
	if hasattr(dso, "DeepSeekOCR"):
	self.backend = dso.DeepSeekOCR()
	def _call(image: Image.Image, lang="auto"):
	# 클래스가 recognize(image, lang) 보유한다고 가정
	if hasattr(self.backend, "recognize"):
	return self.backend.recognize(image, lang=lang)
	# 혹은 run/image_to_text 등의 이름일 수 있음
	for cand in ("run", "infer", "image_to_text", "predict"):
	if hasattr(self.backend, cand):
	return getattr(self.backend, cand)(image)
	raise AttributeError("DeepSeekOCR class found but no callable method.")
	self.fn = _call
	print("[DeepSeekOCRAdapter] Using deeps eek_ocr.DeepSeekOCR")
	return
	except Exception as e:
	print("[DeepSeekOCRAdapter] A-1 fallback:", e)

	# A-2) 함수형 ocr_image 시도
	try:
	import deeps eek_ocr as dso
	if hasattr(dso, "ocr_image"):
	def _call(image: Image.Image, lang="auto"):
	return dso.ocr_image(image, lang=lang) # TODO: 필요 시 인자명 맞추기
	self.fn = _call
	print("[DeepSeekOCRAdapter] Using deeps eek_ocr.ocr_image")
	return
	except Exception as e:
	print("[DeepSeekOCRAdapter] A-2 fallback:", e)

	# B) run_dpsk_ocr_image.py 스크립트형 시도
	try:
	import run_dpsk_ocr_image as runner
	for cand in ("infer", "run", "predict", "main"):
	if hasattr(runner, cand) and callable(getattr(runner, cand)):
	def _call(image: Image.Image, lang="auto", _fn=getattr(runner, cand)):
	# NOTE: 해당 함수가 PIL.Image가 아닌 파일경로를 요구할 수 있습니다.
	# 그런 경우 임시 파일로 저장해 넘깁니다.
	try:
	return _fn(image) # PIL.Image 직접 받는 케이스
	except Exception:
	import tempfile
	with tempfile.NamedTemporaryFile(suffix=".png", delete=True) as tmp:
	image.save(tmp.name)
	# 가장 흔한 CLI 스타일: (path)만 or (path, config)
	try:
	return _fn(tmp.name)
	except Exception:
	# 반환이 dict/text 등 무엇이든 str로 캐스팅
	return str(_fn(tmp.name))
	self.fn = _call
	print(f"[DeepSeekOCRAdapter] Using run_dpsk_ocr_image.{cand}")
	return
	except Exception as e:
	print("[DeepSeekOCRAdapter] B fallback:", e)

	# 마지막 안전장치: 데모
	print("[DeepSeekOCRAdapter] No concrete entry found. Falling back to DEMO.")
	def _demo(image: Image.Image, lang="auto"):
	return "[DEMO] 연결 완료 — 실제 함수명을 app.py에서 한 줄만 바꿔주세요."
	self.fn = _demo

	def recognize(self, image: Image.Image, lang: str = "auto") -> str:
	return self.fn(image, lang=lang)


	# ─────────────────────────────────────────────
	# 2) 유틸
	# ─────────────────────────────────────────────
	def _to_pil(x) -> Image.Image:
	if isinstance(x, Image.Image):
	return x.convert("RGB")
	if isinstance(x, (bytes, bytearray)):
	return Image.open(io.BytesIO(x)).convert("RGB")
	if isinstance(x, np.ndarray):
	return Image.fromarray(x).convert("RGB")
	raise TypeError("Unsupported image type")

	def _b64_to_image(image_b64: str) -> Image.Image:
	raw = base64.b64decode(image_b64)
	return _to_pil(raw)

	def _url_to_image(url: str) -> Image.Image:
	import requests
	r = requests.get(url, timeout=20)
	r.raise_for_status()
	return _to_pil(r.content)


	# ─────────────────────────────────────────────
	# 3) FastAPI (REST)
	# ─────────────────────────────────────────────
	api = FastAPI(title="DeepSeek-OCR API")
	_engine = DeepSeekOCRAdapter()

	@api.post("/ocr")
	async def ocr(
	image_b64: Optional[str] = Body(default=None),
	image_url: Optional[str] = Body(default=None),
	lang: str = Body(default="auto"),
	file: Optional[UploadFile] = File(default=None),
	):
	try:
	if file is not None:
	image = _to_pil(await file.read())
	elif image_b64:
	image = _b64_to_image(image_b64)
	elif image_url:
	image = _url_to_image(image_url)
	else:
	return JSONResponse(status_code=400, content={
	"ok": False, "error": "Provide one of: file \| image_b64 \| image_url"
	})
	text = _engine.recognize(image, lang=lang)
	return {"ok": True, "text": text}
	except Exception as e:
	return JSONResponse(status_code=500, content={
	"ok": False, "error": str(e), "trace": traceback.format_exc()
	})

	# ─────────────────────────────────────────────
	# 4) Gradio UI (테스트)
	# ─────────────────────────────────────────────
	def _predict(image, lang):
	if image is None:
	return "No image."
	pil = _to_pil(image)
	return _engine.recognize(pil, lang=lang)

	with gr.Blocks(title="DeepSeek-OCR (Claude-ready)") as demo:
	gr.Markdown("### DeepSeek-OCR (HF Space)\n이미지를 업로드하면 텍스트를 추출합니다.")
	with gr.Row():
	img = gr.Image(type="pil", label="Input image")
	out = gr.Textbox(label="OCR Result", lines=8)
	lang = gr.Radio(["auto","en","ko","ja","zh"], value="auto", label="Language")
	btn = gr.Button("Run OCR")
	btn.click(_predict, inputs=[img, lang], outputs=[out])

	# HF Spaces는 보통 Gradio 앱을 기본 엔트리로 띄우지만,
	# FastAPI 엔드포인트도 함께 노출하려면 아래처럼 alias를 둡니다.
	app = api
	demo.queue(concurrency_count=1)