akhaliq HF Staff commited on
Commit
086e346
Β·
verified Β·
1 Parent(s): a39355b

Deploy Gradio app with multiple files

Browse files
Files changed (2) hide show
  1. app.py +180 -0
  2. requirements.txt +7 -0
app.py ADDED
@@ -0,0 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import AutoModel, AutoTokenizer
4
+ from PIL import Image
5
+ import io
6
+ import os
7
+ from typing import Optional
8
+
9
+ # Set device
10
+ os.environ["CUDA_VISIBLE_DEVICES"] = "0"
11
+ device = "cuda" if torch.cuda.is_available() else "cpu"
12
+
13
+ # Load model and tokenizer
14
+ model_name = "deepseek-ai/DeepSeek-OCR"
15
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
16
+ model = AutoModel.from_pretrained(
17
+ model_name,
18
+ _attn_implementation="flash_attention_2",
19
+ trust_remote_code=True,
20
+ use_safetensors=True,
21
+ )
22
+ model = model.eval().to(device)
23
+ if device == "cuda":
24
+ model = model.to(torch.bfloat16)
25
+
26
+
27
+ def ocr_process(
28
+ image_input: Image.Image,
29
+ task_type: str = "ocr",
30
+ base_size: int = 1024,
31
+ image_size: int = 640,
32
+ crop_mode: bool = True,
33
+ ) -> str:
34
+ """
35
+ Process image and extract text using DeepSeek-OCR model.
36
+
37
+ Args:
38
+ image_input: Input image
39
+ task_type: Type of task - "ocr" for text extraction or "markdown" for document conversion
40
+ base_size: Base size for model processing
41
+ image_size: Target image size
42
+ crop_mode: Whether to use crop mode
43
+
44
+ Returns:
45
+ Extracted text or markdown content
46
+ """
47
+ if image_input is None:
48
+ return "Please upload an image first."
49
+
50
+ try:
51
+ # Save image temporarily
52
+ temp_image_path = "/tmp/temp_ocr_image.jpg"
53
+ image_input.save(temp_image_path)
54
+
55
+ # Set prompt based on task type
56
+ if task_type == "markdown":
57
+ prompt = "<image>\n<|grounding|>Convert the document to markdown. "
58
+ else:
59
+ prompt = "<image>\nFree OCR. "
60
+
61
+ # Run inference
62
+ output = model.infer(
63
+ tokenizer,
64
+ prompt=prompt,
65
+ image_file=temp_image_path,
66
+ output_path="",
67
+ base_size=base_size,
68
+ image_size=image_size,
69
+ crop_mode=crop_mode,
70
+ save_results=False,
71
+ test_compress=False,
72
+ )
73
+
74
+ # Clean up temp file
75
+ if os.path.exists(temp_image_path):
76
+ os.remove(temp_image_path)
77
+
78
+ return output if output else "No text detected in image."
79
+
80
+ except Exception as e:
81
+ return f"Error processing image: {str(e)}"
82
+
83
+
84
+ # Create Gradio interface
85
+ with gr.Blocks(title="DeepSeek OCR") as demo:
86
+ gr.HTML(
87
+ """
88
+ <div style="text-align: center; margin-bottom: 20px;">
89
+ <h1>πŸ” DeepSeek OCR</h1>
90
+ <p>Extract text and convert documents to markdown using DeepSeek-OCR</p>
91
+ <p>Built with <a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank" style="color: #0066cc; text-decoration: none;">anycoder</a></p>
92
+ </div>
93
+ """
94
+ )
95
+
96
+ with gr.Row():
97
+ with gr.Column(scale=1):
98
+ gr.Markdown("### Upload Image")
99
+ image_input = gr.Image(
100
+ label="Input Image",
101
+ type="pil",
102
+ sources=["upload", "webcam", "clipboard"],
103
+ )
104
+
105
+ gr.Markdown("### Settings")
106
+ task_type = gr.Radio(
107
+ choices=["ocr", "markdown"],
108
+ value="ocr",
109
+ label="Task Type",
110
+ info="OCR: Extract text | Markdown: Convert document to markdown",
111
+ )
112
+
113
+ base_size = gr.Slider(
114
+ minimum=512,
115
+ maximum=1280,
116
+ step=128,
117
+ value=1024,
118
+ label="Base Size",
119
+ info="Model processing size (larger = better quality, slower)",
120
+ )
121
+
122
+ image_size = gr.Slider(
123
+ minimum=512,
124
+ maximum=1280,
125
+ step=128,
126
+ value=640,
127
+ label="Image Size",
128
+ info="Target image size",
129
+ )
130
+
131
+ crop_mode = gr.Checkbox(
132
+ value=True,
133
+ label="Crop Mode",
134
+ info="Enable crop mode for better processing",
135
+ )
136
+
137
+ submit_btn = gr.Button("πŸš€ Extract Text", variant="primary", size="lg")
138
+
139
+ with gr.Column(scale=1):
140
+ gr.Markdown("### Output")
141
+ output_text = gr.Textbox(
142
+ label="Extracted Text",
143
+ lines=10,
144
+ interactive=False,
145
+ placeholder="Text will appear here...",
146
+ )
147
+
148
+ copy_btn = gr.Button("πŸ“‹ Copy Output")
149
+
150
+ # Event handlers
151
+ submit_btn.click(
152
+ fn=ocr_process,
153
+ inputs=[image_input, task_type, base_size, image_size, crop_mode],
154
+ outputs=output_text,
155
+ )
156
+
157
+ copy_btn.click(
158
+ fn=lambda text: text,
159
+ inputs=output_text,
160
+ outputs=output_text,
161
+ js="(text) => { navigator.clipboard.writeText(text); alert('Copied to clipboard!'); return text; }",
162
+ )
163
+
164
+ # Examples section
165
+ gr.Markdown("### Examples")
166
+ gr.Examples(
167
+ examples=[
168
+ ["https://images.unsplash.com/photo-1507003211169-0a1dd7228f2d?w=500", "ocr"],
169
+ [
170
+ "https://images.unsplash.com/photo-1481627834876-b7833e8f5570?w=500",
171
+ "markdown",
172
+ ],
173
+ ],
174
+ inputs=[image_input, task_type],
175
+ label="Try these examples",
176
+ )
177
+
178
+
179
+ if __name__ == "__main__":
180
+ demo.launch(share=False)
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ gradio>=4.0.0
2
+ torch>=2.0.0
3
+ transformers>=4.40.0
4
+ Pillow>=10.0.0
5
+ deepseek-ai
6
+ safetensors>=0.4.0
7
+ flash-attn>=2.5.0