akhaliq HF Staff commited on
Commit
bd0cfb9
·
verified ·
1 Parent(s): 35d8939

Update Gradio app with multiple files

Browse files
Files changed (2) hide show
  1. app.py +12 -5
  2. requirements.txt +2 -2
app.py CHANGED
@@ -7,6 +7,9 @@ import os
7
  from typing import Optional
8
  import spaces
9
 
 
 
 
10
  # Load model and tokenizer
11
  model_name = "deepseek-ai/DeepSeek-OCR"
12
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
@@ -45,11 +48,15 @@ def ocr_process(
45
 
46
  try:
47
  # Move model to GPU and set dtype
48
- model.to("cuda")
49
- model.to(torch.bfloat16)
50
  # Save image temporarily
51
  temp_image_path = "/tmp/temp_ocr_image.jpg"
52
  image_input.save(temp_image_path)
 
 
 
 
53
 
54
  # Set prompt based on task type
55
  if task_type == "markdown":
@@ -62,7 +69,7 @@ def ocr_process(
62
  tokenizer,
63
  prompt=prompt,
64
  image_file=temp_image_path,
65
- output_path="",
66
  base_size=base_size,
67
  image_size=image_size,
68
  crop_mode=crop_mode,
@@ -122,7 +129,7 @@ with gr.Blocks(title="DeepSeek OCR") as demo:
122
  step=128,
123
  value=1024,
124
  label="Base Size",
125
- info="Model processing size (larger = better quality, slower)",
126
  )
127
 
128
  image_size = gr.Slider(
@@ -131,7 +138,7 @@ with gr.Blocks(title="DeepSeek OCR") as demo:
131
  step=128,
132
  value=640,
133
  label="Image Size",
134
- info="Target image size",
135
  )
136
 
137
  crop_mode = gr.Checkbox(
 
7
  from typing import Optional
8
  import spaces
9
 
10
+ # Set CUDA device
11
+ os.environ["CUDA_VISIBLE_DEVICES"] = '0'
12
+
13
  # Load model and tokenizer
14
  model_name = "deepseek-ai/DeepSeek-OCR"
15
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
 
48
 
49
  try:
50
  # Move model to GPU and set dtype
51
+ model.cuda().to(torch.bfloat16)
52
+
53
  # Save image temporarily
54
  temp_image_path = "/tmp/temp_ocr_image.jpg"
55
  image_input.save(temp_image_path)
56
+
57
+ # Create output directory
58
+ output_path = "/tmp/ocr_output"
59
+ os.makedirs(output_path, exist_ok=True)
60
 
61
  # Set prompt based on task type
62
  if task_type == "markdown":
 
69
  tokenizer,
70
  prompt=prompt,
71
  image_file=temp_image_path,
72
+ output_path=output_path,
73
  base_size=base_size,
74
  image_size=image_size,
75
  crop_mode=crop_mode,
 
129
  step=128,
130
  value=1024,
131
  label="Base Size",
132
+ info="Model processing size - Tiny: 512, Small: 640, Base: 1024, Large: 1280",
133
  )
134
 
135
  image_size = gr.Slider(
 
138
  step=128,
139
  value=640,
140
  label="Image Size",
141
+ info="Target image size - Gundam mode: 640 with crop, others match base_size",
142
  )
143
 
144
  crop_mode = gr.Checkbox(
requirements.txt CHANGED
@@ -2,7 +2,7 @@ torch==2.6.0
2
  transformers==4.46.3
3
  tokenizers==0.20.3
4
  einops
5
- addict
6
  easydict
7
  gradio>=4.0.0
8
  spaces>=0.20.0
@@ -12,4 +12,4 @@ accelerate>=0.24.0
12
  sentencepiece>=0.1.99
13
  protobuf>=3.20.0
14
  torchvision
15
- flash-attn @ https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.3/flash_attn-2.7.3+cu12torch2.6cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
 
2
  transformers==4.46.3
3
  tokenizers==0.20.3
4
  einops
5
+ addict
6
  easydict
7
  gradio>=4.0.0
8
  spaces>=0.20.0
 
12
  sentencepiece>=0.1.99
13
  protobuf>=3.20.0
14
  torchvision
15
+ flash-attn @ https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.3/flash_attn-2.7.3+cu12torch2.6cxx11abiFALSE-cp310-cp310-linux_x86_64.whl