# TODO: change modes # Tiny: base_size = 512, image_size = 512, crop_mode = False # Small: base_size = 640, image_size = 640, crop_mode = False # Base: base_size = 1024, image_size = 1024, crop_mode = False # Large: base_size = 1280, image_size = 1280, crop_mode = False # Gundam: base_size = 1024, image_size = 640, crop_mode = True BASE_SIZE = 1024 IMAGE_SIZE = 640 CROP_MODE = True MIN_CROPS= 2 MAX_CROPS= 6 # max:9; If your GPU memory is small, it is recommended to set it to 6. MAX_CONCURRENCY = 100 # If you have limited GPU memory, lower the concurrency count. NUM_WORKERS = 64 # image pre-process (resize/padding) workers PRINT_NUM_VIS_TOKENS = False SKIP_REPEAT = True MODEL_PATH = 'deepseek-ai/DeepSeek-OCR' # change to your model path # TODO: change INPUT_PATH # .pdf: run_dpsk_ocr_pdf.py; # .jpg, .png, .jpeg: run_dpsk_ocr_image.py; # Omnidocbench images path: run_dpsk_ocr_eval_batch.py INPUT_PATH = '' OUTPUT_PATH = '' PROMPT = '\n<|grounding|>Convert the document to markdown.' # PROMPT = '\nFree OCR.' # TODO commonly used prompts # document: \n<|grounding|>Convert the document to markdown. # other image: \n<|grounding|>OCR this image. # without layouts: \nFree OCR. # figures in document: \nParse the figure. # general: \nDescribe this image in detail. # rec: \nLocate <|ref|>xxxx<|/ref|> in the image. # '先天下之忧而忧' # ....... from transformers import AutoTokenizer TOKENIZER = AutoTokenizer.from_pretrained(MODEL_PATH, trust_remote_code=True)