YNS-Elaine commited on
Commit
b6538da
·
verified ·
1 Parent(s): 8fb7b24

Create simple_test.py

Browse files
Files changed (1) hide show
  1. simple_test.py +42 -0
simple_test.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoProcessor, AutoModelForImageTextToText
2
+ from PIL import Image
3
+ import torch
4
+
5
+
6
+
7
+ def smoldocling_readimage(image, prompt_text="Convert this page to docling."):
8
+ # Load model and processor
9
+ processor = AutoProcessor.from_pretrained("ds4sd/SmolDocling-256M-preview")
10
+ model = AutoModelForImageTextToText.from_pretrained("ds4sd/SmolDocling-256M-preview")
11
+
12
+ # Create input messages
13
+ messages = [
14
+ {
15
+ "role": "user",
16
+ "content": [
17
+ {"type": "image"},
18
+ {"type": "text", "text": prompt_text}
19
+ ]
20
+ },
21
+ ]
22
+
23
+
24
+ # Prepare inputs
25
+ prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
26
+ inputs = processor(text=prompt, images=[image], return_tensors="pt")
27
+ # inputs = inputs.to(device)
28
+
29
+
30
+ # Generate outputs
31
+ generated_ids = model.generate(**inputs, max_new_tokens=1024) # Reduced for testing
32
+ prompt_length = inputs.input_ids.shape[1]
33
+ trimmed_generated_ids = generated_ids[:, prompt_length:]
34
+ doctags = processor.batch_decode(
35
+ trimmed_generated_ids,
36
+ skip_special_tokens=False,
37
+ )[0].lstrip()
38
+
39
+ # Clean the output
40
+ doctags = doctags.replace("<end_of_utterance>", "").strip()
41
+
42
+ return doctags