PauloFN commited on
Commit
6a6918c
·
1 Parent(s): bf27b3b
Files changed (44) hide show
  1. .gitignore +61 -0
  2. Dockerfile +47 -0
  3. draft_computation/__init__.py +9 -0
  4. draft_computation/draft_computation_app/__init__.py +3 -0
  5. draft_computation/draft_computation_app/app.py +164 -0
  6. draft_computation/draft_computation_app/constants.py +4 -0
  7. draft_computation/draft_computation_app/dummy_data/__init__.py +0 -0
  8. draft_computation/draft_computation_app/dummy_data/results.py +127 -0
  9. draft_computation/draft_computation_app/dummy_data/results2.py +164 -0
  10. draft_computation/draft_computation_app/dummy_test/test2_app.py +34 -0
  11. draft_computation/draft_computation_app/dummy_test/test_app.py +34 -0
  12. draft_computation/draft_computation_app/main.py +29 -0
  13. draft_computation/draft_computation_app/ocr_placeholder.py +31 -0
  14. draft_computation/draft_computation_app/utils.py +40 -0
  15. draft_computation/endpoint.py +49 -0
  16. draft_computation/models/pose.pt +3 -0
  17. draft_computation/models/seg.pt +3 -0
  18. draft_computation/ocr/0.26.0 +27 -0
  19. draft_computation/ocr/inference.py +113 -0
  20. draft_computation/ocr/ocr_model_output/checkpoint-441/config.json +65 -0
  21. draft_computation/ocr/ocr_model_output/checkpoint-441/generation_config.json +10 -0
  22. draft_computation/ocr/ocr_model_output/checkpoint-441/model.safetensors +3 -0
  23. draft_computation/ocr/ocr_model_output/checkpoint-441/preprocessor_config.json +23 -0
  24. draft_computation/ocr/ocr_model_output/checkpoint-441/rng_state.pth +3 -0
  25. draft_computation/ocr/ocr_model_output/checkpoint-441/scheduler.pt +3 -0
  26. draft_computation/ocr/ocr_model_output/checkpoint-441/special_tokens_map.json +7 -0
  27. draft_computation/ocr/ocr_model_output/checkpoint-441/tokenizer.json +0 -0
  28. draft_computation/ocr/ocr_model_output/checkpoint-441/tokenizer_config.json +58 -0
  29. draft_computation/ocr/ocr_model_output/checkpoint-441/trainer_state.json +2828 -0
  30. draft_computation/ocr/ocr_model_output/checkpoint-441/training_args.bin +3 -0
  31. draft_computation/ocr/ocr_model_output/checkpoint-441/vocab.txt +0 -0
  32. draft_computation/ocr/text_dataset/annotations.json +295 -0
  33. draft_computation/ocr/train_ocr.py +162 -0
  34. integration_test.py +86 -0
  35. main.py +125 -0
  36. models/__init__.py +0 -0
  37. models/database.py +35 -0
  38. models/schemas.py +16 -0
  39. requirements.txt +74 -0
  40. services/__init__.py +0 -0
  41. services/measurement_service.py +30 -0
  42. services/ml_results_example.txt +724 -0
  43. services/pdf_service.py +128 -0
  44. services/persistence_service.py +148 -0
.gitignore ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python
2
+ __pycache__/
3
+ *.pyc
4
+ *.pyo
5
+ *.pyd
6
+ .Python/
7
+ build/
8
+ develop-eggs/
9
+ dist/
10
+ egg-info/
11
+ .eggs/
12
+ lib/
13
+ lib64/
14
+ parts/
15
+ sbin/
16
+ share/
17
+ venv/
18
+ .venv/
19
+ *.egg
20
+ .coverage
21
+ .tox/
22
+ .mypy_cache/
23
+ .pytest_cache/
24
+
25
+ # Jupyter Notebook
26
+ .ipynb_checkpoints
27
+
28
+ # React / Node.js
29
+ node_modules/
30
+ dist/
31
+ build/
32
+ .env
33
+ .env.local
34
+ .env.development.local
35
+ .env.test.local
36
+ .env.production.local
37
+ npm-debug.log*
38
+ yarn-debug.log*
39
+ yarn-error.log*
40
+ .pnpm-debug.log*
41
+ .parcel-cache/
42
+ .vite/
43
+
44
+ # macOS
45
+ .DS_Store
46
+ .Trashes
47
+ ._*
48
+
49
+ # Windows
50
+ Thumbs.db
51
+ ehthumbs.db
52
+ Desktop.ini
53
+
54
+ # IDEs
55
+ .idea/
56
+ .vscode/
57
+ *.iml
58
+
59
+ # Database
60
+ *.db
61
+ reports/*
Dockerfile ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use an official Python runtime as a parent image.
2
+ # The "slim" variant is a good choice as it's smaller than the full version.
3
+ FROM python:3.11-slim
4
+
5
+ # Set environment variables to prevent Python from writing pyc files to disc
6
+ # and to prevent it from buffering stdout and stderr.
7
+ ENV PYTHONDONTWRITEBYTECODE 1
8
+ ENV PYTHONUNBUFFERED 1
9
+
10
+ # Set the working directory in the container to /app.
11
+ # This is where your application's code will live.
12
+ WORKDIR /app
13
+
14
+ # Copy the requirements file into the container at /app.
15
+ # This is done as a separate step to take advantage of Docker's layer caching.
16
+ # If your requirements don't change, this layer won't be rebuilt, speeding up future builds.
17
+ COPY requirements.txt .
18
+
19
+ # Install any needed packages specified in requirements.txt.
20
+ # --no-cache-dir disables the pip cache, which helps keep the image size down.
21
+ RUN pip install --no-cache-dir -r requirements.txt
22
+
23
+ # Copy the rest of your application's code into the container at /app.
24
+ COPY . .
25
+
26
+ # Expose a port to the outside world.
27
+ # Replace 8000 with the port your application listens on (e.g., 5000 for Flask, 8000 for FastAPI).
28
+ EXPOSE 8000
29
+
30
+ # Define the command to run your application.
31
+ # The command is broken into a list of strings for best practice.
32
+ # ---
33
+ # UNCOMMENT THE ONE YOU NEED AND EDIT IT ---
34
+ # ---
35
+
36
+ # For a generic Python script:
37
+ # CMD ["python", "main.py"]
38
+
39
+ # For a FastAPI application with uvicorn:
40
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
41
+
42
+ # For a Flask application (using the development server):
43
+ # NOTE: For production, you should use a proper WSGI server like Gunicorn.
44
+ # CMD ["flask", "run", "--host=0.0.0.0", "--port=8000"]
45
+
46
+ # For a Flask application with Gunicorn:
47
+ # CMD ["gunicorn", "--bind", "0.0.0.0:7860", "main:app"]
draft_computation/__init__.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+
3
+ sys.path.append('./draft_computation')
4
+ sys.path.append('./draft_computation/models')
5
+ sys.path.append('./draft_computation/draft_computation_app')
6
+ sys.path.append('./draft_computation/ocr')
7
+ from .endpoint import run
8
+
9
+ __all__ = ['run']
draft_computation/draft_computation_app/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from .app import calculate_draft
2
+
3
+ __all__ = ["calculate_draft"]
draft_computation/draft_computation_app/app.py ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import numpy as np
3
+ import cv2
4
+ import os
5
+ from datetime import datetime
6
+ import uuid
7
+ from . import constants
8
+ from . import utils
9
+ from . import ocr_placeholder
10
+
11
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
12
+
13
+ def calculate_draft(pose_results, segment_data, original_image):
14
+ # Ensure original_image is a NumPy array
15
+ if not isinstance(original_image, np.ndarray):
16
+ original_image = np.array(original_image) # Attempt to convert if not already
17
+
18
+ # Create the segment mask internally
19
+ mask = np.zeros(original_image.shape[:2], dtype=np.uint8)
20
+ if len(segment_data)>1:
21
+ r = [len(i) for i in segment_data]
22
+ segment_data = segment_data[np.argmax(r)]
23
+ pts = np.array(segment_data, dtype=np.int32)
24
+ cv2.fillPoly(mask, [pts], 1)
25
+ segment_mask = mask
26
+ """
27
+ Calculates the draft measurement.
28
+ """
29
+ mark_names = ["meter mark", "80cm mark", "60cm mark", "40cm mark", "20cm mark"]
30
+
31
+ def find_lowest_mark_group(pose_results):
32
+ lowest_mark_group = None
33
+ max_y = -1
34
+
35
+ for mark_group in pose_results:
36
+ last_valid_keypoint = None
37
+ for keypoint in reversed(mark_group):
38
+ if keypoint[2] >= constants.CONF_THRESHOLD:
39
+ last_valid_keypoint = keypoint
40
+ break
41
+
42
+ if last_valid_keypoint is not None:
43
+ _, y, _ = last_valid_keypoint
44
+ if y > max_y:
45
+ max_y = y
46
+ lowest_mark_group = mark_group
47
+
48
+ return lowest_mark_group
49
+
50
+ def extract_meter_mark(image, mark_group, group_index):
51
+ first_keypoint = mark_group[0]
52
+ x, y, _ = first_keypoint
53
+
54
+ twenty_cm_in_pixels = utils.calc_distance(mark_group[0], mark_group[1])
55
+ square_size = utils.calc_sqr_size(constants.DEFAULT_SQUARE_SIZE_CM, twenty_cm_in_pixels)
56
+ square_size *= 1.2
57
+
58
+ half_size = int(square_size / 2)
59
+
60
+ h, w, _ = image.shape
61
+
62
+ x1 = int(x - half_size - (square_size * 0.2))
63
+ y1 = int(y - half_size)
64
+ x2 = int(x + half_size)
65
+ y2 = int(y + half_size)
66
+
67
+ if x1 < 0: x1 = 0
68
+ if y1 < 0: y1 = 0
69
+ if x2 > w: x2 = w
70
+ if y2 > h: y2 = h
71
+
72
+ cropped_image = image[y1:y2, x1:x2]
73
+
74
+ if constants.SAVE_CROPPED_IMAGE:
75
+ output_folder = os.path.join(os.path.dirname(__file__), constants.CROP_OUTPUT_FOLDER)
76
+ if not os.path.exists(output_folder):
77
+ os.makedirs(output_folder)
78
+
79
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
80
+ filename = f"{timestamp}_{group_index}_{uuid.uuid4()}.png"
81
+ cv2.imwrite(os.path.join(output_folder, filename), cropped_image)
82
+
83
+ return cropped_image
84
+
85
+ # Crop and save meter marks for all groups with high confidence
86
+ for i, mark_group in enumerate(pose_results):
87
+ if mark_group[0][2] >= constants.CONF_THRESHOLD:
88
+ extract_meter_mark(original_image, mark_group, i)
89
+
90
+ lowest_mark_group = find_lowest_mark_group(pose_results)
91
+ if lowest_mark_group is None:
92
+ logging.error("No lowest mark group found.")
93
+ return -1
94
+ logging.info(f"Lowest mark group found: {lowest_mark_group}")
95
+
96
+ meter_mark_image = extract_meter_mark(original_image, lowest_mark_group, -1)
97
+ meter_value_str = ocr_placeholder.perform_ocr(meter_mark_image)
98
+ meter_value = int(meter_value_str.replace('m', ''))
99
+ logging.info(f"Meter value from OCR: {meter_value}m")
100
+
101
+ last_valid_keypoint = None
102
+ last_valid_keypoint_index = -1
103
+ for i, keypoint in reversed(list(enumerate(lowest_mark_group))):
104
+ if keypoint[2] >= constants.CONF_THRESHOLD:
105
+ last_valid_keypoint = keypoint
106
+ last_valid_keypoint_index = i
107
+ break
108
+
109
+ if last_valid_keypoint is None:
110
+ logging.error("No last valid keypoint found.")
111
+ return -1
112
+ logging.info(f"Last valid keypoint found: {mark_names[last_valid_keypoint_index]} ({last_valid_keypoint_index}) at coordinates {last_valid_keypoint[:2]}")
113
+
114
+ x, y, _ = last_valid_keypoint
115
+
116
+ # Find the water line in the segment mask
117
+ column = segment_mask[:, int(x)]
118
+ water_line_indices = np.where(column > 0)
119
+
120
+ if len(water_line_indices[0]) > 0:
121
+ water_line_top_y = water_line_indices[0][0]
122
+ water_line_bottom_y = water_line_indices[0][-1]
123
+
124
+ # Define the waterline segment as a vertical line in the column of the keypoint
125
+ segment_start = (x, water_line_top_y)
126
+ segment_end = (x, water_line_bottom_y)
127
+
128
+ pixel_distance = utils.distance_point_to_segment((x, y), segment_start, segment_end)
129
+ else:
130
+ logging.error("No water line found.")
131
+ return -1
132
+
133
+ logging.info(f"Pixel distance between keypoint and water line: {pixel_distance}")
134
+
135
+ distances = []
136
+ for i in range(len(lowest_mark_group) - 1):
137
+ if lowest_mark_group[i][2] >= constants.CONF_THRESHOLD and lowest_mark_group[i+1][2] >= constants.CONF_THRESHOLD:
138
+ distances.append(utils.calc_distance(lowest_mark_group[i], lowest_mark_group[i+1]))
139
+ if not distances:
140
+ logging.error("No valid consecutive keypoints found to calculate 20cm in pixels.")
141
+ return -1
142
+ twenty_cm_in_pixels = np.mean(distances)
143
+ logging.info(f"20cm in pixels: {twenty_cm_in_pixels}")
144
+
145
+ cm_distance = (pixel_distance / twenty_cm_in_pixels) * 20
146
+ logging.info(f"Distance in cm between keypoint and water line: {cm_distance}")
147
+
148
+ last_valid_keypoint_cm = (100 - (last_valid_keypoint_index * 20))
149
+ logging.info(f"Last valid keypoint cm value: {last_valid_keypoint_cm}")
150
+
151
+ final_draft_cm = (last_valid_keypoint_cm + 5) - cm_distance
152
+ logging.info(f"Final draft cm value: {final_draft_cm}")
153
+
154
+ final_draft = (meter_value - 1) + (final_draft_cm / 100)
155
+ logging.info(f"Final calculated draft: {final_draft}")
156
+
157
+ mid_results = {
158
+ 'meter_value': meter_value,
159
+ 'last_valid_keypoint_cm': last_valid_keypoint_cm,
160
+ 'cm_distance': cm_distance,
161
+ 'final_draft_cm': final_draft_cm,
162
+ }
163
+
164
+ return final_draft, mid_results
draft_computation/draft_computation_app/constants.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ CONF_THRESHOLD = 0.25
2
+ DEFAULT_SQUARE_SIZE_CM = 17
3
+ CROP_OUTPUT_FOLDER = "output"
4
+ SAVE_CROPPED_IMAGE = False
draft_computation/draft_computation_app/dummy_data/__init__.py ADDED
File without changes
draft_computation/draft_computation_app/dummy_data/results.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ pose = np.array([[[6.5433e+02, 5.8361e+02, 9.9946e-01],
3
+ [6.8423e+02, 6.5411e+02, 9.9998e-01],
4
+ [6.9117e+02, 7.2399e+02, 9.9999e-01],
5
+ [6.9718e+02, 7.9327e+02, 9.9987e-01],
6
+ [7.0293e+02, 8.5917e+02, 9.3761e-01]],
7
+
8
+ [[6.3452e+02, 2.8308e+02, 9.9482e-01],
9
+ [6.5906e+02, 3.4467e+02, 9.9938e-01],
10
+ [6.5794e+02, 4.0197e+02, 1.0000e+00],
11
+ [6.6237e+02, 4.6714e+02, 9.9999e-01],
12
+ [6.6390e+02, 5.2631e+02, 9.9611e-01]],
13
+
14
+ [[6.3164e+02, 4.3033e+00, 9.6437e-01],
15
+ [6.4993e+02, 5.6203e+01, 9.9290e-01],
16
+ [6.5003e+02, 1.1126e+02, 9.9973e-01],
17
+ [6.5038e+02, 1.6832e+02, 9.9971e-01],
18
+ [6.4932e+02, 2.2213e+02, 9.7749e-01]],
19
+
20
+ [[6.9001e+02, 9.0983e+02, 9.5246e-01],
21
+ [7.1195e+02, 9.7251e+02, 9.9073e-01],
22
+ [7.1940e+02, 1.0361e+03, 9.9944e-01],
23
+ [7.2875e+02, 1.1032e+03, 9.9775e-01],
24
+ [7.3482e+02, 1.1613e+03, 2.4290e-01]],
25
+
26
+ [[6.9527e+02, 8.9296e+02, 9.9578e-01],
27
+ [7.2340e+02, 9.6668e+02, 9.9980e-01],
28
+ [7.2624e+02, 1.0355e+03, 9.9973e-01],
29
+ [7.3433e+02, 1.1097e+03, 9.9814e-01],
30
+ [7.4265e+02, 1.1769e+03, 5.6901e-01]]])
31
+
32
+ segment = np.array(
33
+ [[ 0.22812, 1159.4],
34
+ [ 0.22812, 1366.9],
35
+ [ 1224, 1366.9],
36
+ [ 1224, 1257.8],
37
+ [ 1217.4, 1257.8],
38
+ [ 1215.2, 1255.6],
39
+ [ 1200.2, 1255.6],
40
+ [ 1198.1, 1253.5],
41
+ [ 1183.1, 1253.5],
42
+ [ 1181, 1251.4],
43
+ [ 1163.9, 1251.4],
44
+ [ 1161.7, 1249.2],
45
+ [ 1144.6, 1249.2],
46
+ [ 1142.5, 1247.1],
47
+ [ 1121.1, 1247.1],
48
+ [ 1119, 1244.9],
49
+ [ 1116.8, 1247.1],
50
+ [ 1110.4, 1247.1],
51
+ [ 1108.3, 1244.9],
52
+ [ 1069.8, 1244.9],
53
+ [ 1067.6, 1242.8],
54
+ [ 1044.1, 1242.8],
55
+ [ 1042, 1240.7],
56
+ [ 1027, 1240.7],
57
+ [ 1024.8, 1238.5],
58
+ [ 1009.9, 1238.5],
59
+ [ 1007.7, 1236.4],
60
+ [ 994.89, 1236.4],
61
+ [ 992.75, 1234.2],
62
+ [ 975.64, 1234.2],
63
+ [ 973.5, 1232.1],
64
+ [ 952.11, 1232.1],
65
+ [ 949.97, 1230],
66
+ [ 924.3, 1230],
67
+ [ 922.16, 1227.8],
68
+ [ 902.91, 1227.8],
69
+ [ 900.77, 1225.7],
70
+ [ 883.66, 1225.7],
71
+ [ 881.52, 1223.5],
72
+ [ 860.13, 1223.5],
73
+ [ 857.99, 1221.4],
74
+ [ 843.02, 1221.4],
75
+ [ 840.88, 1219.3],
76
+ [ 825.91, 1219.3],
77
+ [ 823.77, 1217.1],
78
+ [ 798.1, 1217.1],
79
+ [ 795.96, 1215],
80
+ [ 751.04, 1215],
81
+ [ 748.9, 1212.8],
82
+ [ 714.68, 1212.8],
83
+ [ 712.54, 1210.7],
84
+ [ 706.12, 1210.7],
85
+ [ 703.98, 1208.6],
86
+ [ 697.56, 1208.6],
87
+ [ 695.42, 1206.4],
88
+ [ 678.31, 1206.4],
89
+ [ 676.17, 1204.3],
90
+ [ 663.34, 1204.3],
91
+ [ 661.2, 1202.2],
92
+ [ 639.81, 1202.2],
93
+ [ 637.67, 1200],
94
+ [ 603.44, 1200],
95
+ [ 601.3, 1197.9],
96
+ [ 590.61, 1197.9],
97
+ [ 588.47, 1195.7],
98
+ [ 577.78, 1195.7],
99
+ [ 575.64, 1193.6],
100
+ [ 562.8, 1193.6],
101
+ [ 560.66, 1191.5],
102
+ [ 547.83, 1191.5],
103
+ [ 545.69, 1189.3],
104
+ [ 532.85, 1189.3],
105
+ [ 530.72, 1187.2],
106
+ [ 479.38, 1187.2],
107
+ [ 477.24, 1185],
108
+ [ 447.29, 1185],
109
+ [ 445.15, 1182.9],
110
+ [ 406.65, 1182.9],
111
+ [ 404.51, 1180.8],
112
+ [ 357.45, 1180.8],
113
+ [ 355.31, 1178.6],
114
+ [ 323.23, 1178.6],
115
+ [ 321.09, 1176.5],
116
+ [ 306.11, 1176.5],
117
+ [ 303.98, 1174.3],
118
+ [ 289, 1174.3],
119
+ [ 286.86, 1172.2],
120
+ [ 269.75, 1172.2],
121
+ [ 267.61, 1170.1],
122
+ [ 244.08, 1170.1],
123
+ [ 241.94, 1167.9],
124
+ [ 233.39, 1167.9],
125
+ [ 231.25, 1165.8],
126
+ [ 231.25, 1159.4]]
127
+ )
draft_computation/draft_computation_app/dummy_data/results2.py ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ pose = np.array(
3
+ [[[2.6978e+02, 5.4799e+01, 9.9702e-01],
4
+ [2.7735e+02, 8.0360e+01, 9.9991e-01],
5
+ [2.7792e+02, 1.0644e+02, 9.9998e-01],
6
+ [2.7866e+02, 1.3190e+02, 9.9988e-01],
7
+ [2.8056e+02, 1.5711e+02, 8.7975e-01]],
8
+
9
+ [[2.7160e+02, 1.8630e+02, 9.9943e-01],
10
+ [2.7907e+02, 2.1047e+02, 9.9792e-01],
11
+ [2.7965e+02, 2.3873e+02, 9.4452e-01],
12
+ [2.8155e+02, 2.6565e+02, 9.4245e-02],
13
+ [2.8508e+02, 2.8900e+02, 4.7978e-05]],
14
+
15
+ [[2.7191e+02, 1.8308e+02, 9.7766e-01],
16
+ [2.7931e+02, 2.0942e+02, 9.9106e-01],
17
+ [2.8075e+02, 2.3690e+02, 9.9677e-01],
18
+ [2.8196e+02, 2.6651e+02, 8.2379e-01],
19
+ [2.8113e+02, 2.8766e+02, 6.3545e-05]]]
20
+ )
21
+ segment = np.array(
22
+ [[ 81.934, 247.37],
23
+ [ 81.934, 248.93],
24
+ [ 80.369, 250.5],
25
+ [ 79.847, 250.5],
26
+ [ 79.325, 251.02],
27
+ [ 78.803, 251.02],
28
+ [ 78.281, 251.54],
29
+ [ 77.759, 251.54],
30
+ [ 77.238, 252.07],
31
+ [ 76.194, 252.07],
32
+ [ 75.672, 252.59],
33
+ [ 74.106, 252.59],
34
+ [ 73.584, 253.11],
35
+ [ 72.541, 253.11],
36
+ [ 72.019, 253.63],
37
+ [ 70.975, 253.63],
38
+ [ 70.453, 254.15],
39
+ [ 69.409, 254.15],
40
+ [ 68.888, 254.68],
41
+ [ 68.366, 254.68],
42
+ [ 67.844, 255.2],
43
+ [ 65.756, 255.2],
44
+ [ 65.234, 255.72],
45
+ [ 64.191, 255.72],
46
+ [ 63.669, 256.24],
47
+ [ 61.581, 256.24],
48
+ [ 61.059, 256.76],
49
+ [ 58.45, 256.76],
50
+ [ 57.928, 257.28],
51
+ [ 55.319, 257.28],
52
+ [ 54.797, 257.81],
53
+ [ 52.188, 257.81],
54
+ [ 51.666, 258.33],
55
+ [ 48.534, 258.33],
56
+ [ 48.013, 258.85],
57
+ [ 44.881, 258.85],
58
+ [ 44.359, 259.37],
59
+ [ 38.097, 259.37],
60
+ [ 37.575, 259.89],
61
+ [ 1.0438, 259.89],
62
+ [ 1.0438, 333.48],
63
+ [ 333.48, 333.48],
64
+ [ 333.48, 269.29],
65
+ [ 332.43, 269.29],
66
+ [ 331.91, 268.77],
67
+ [ 322.52, 268.77],
68
+ [ 322, 268.24],
69
+ [ 315.21, 268.24],
70
+ [ 314.69, 267.72],
71
+ [ 311.04, 267.72],
72
+ [ 310.52, 268.24],
73
+ [ 302.69, 268.24],
74
+ [ 302.17, 268.77],
75
+ [ 288.6, 268.77],
76
+ [ 288.08, 268.24],
77
+ [ 284.42, 268.24],
78
+ [ 283.9, 267.72],
79
+ [ 280.77, 267.72],
80
+ [ 280.25, 267.2],
81
+ [ 278.16, 267.2],
82
+ [ 277.64, 266.68],
83
+ [ 276.07, 266.68],
84
+ [ 275.55, 266.16],
85
+ [ 274.51, 266.16],
86
+ [ 273.98, 265.63],
87
+ [ 270.85, 265.63],
88
+ [ 270.33, 265.11],
89
+ [ 264.59, 265.11],
90
+ [ 264.07, 264.59],
91
+ [ 258.33, 264.59],
92
+ [ 257.81, 264.07],
93
+ [ 254.68, 264.07],
94
+ [ 254.15, 263.55],
95
+ [ 251.54, 263.55],
96
+ [ 251.02, 263.02],
97
+ [ 249.46, 263.02],
98
+ [ 248.93, 262.5],
99
+ [ 246.85, 262.5],
100
+ [ 246.33, 261.98],
101
+ [ 244.76, 261.98],
102
+ [ 244.24, 261.46],
103
+ [ 237.45, 261.46],
104
+ [ 236.93, 260.94],
105
+ [ 236.41, 261.46],
106
+ [ 231.71, 261.46],
107
+ [ 231.19, 261.98],
108
+ [ 228.06, 261.98],
109
+ [ 227.54, 262.5],
110
+ [ 223.36, 262.5],
111
+ [ 222.84, 263.02],
112
+ [ 217.1, 263.02],
113
+ [ 216.58, 262.5],
114
+ [ 212.4, 262.5],
115
+ [ 211.88, 261.98],
116
+ [ 210.84, 261.98],
117
+ [ 210.32, 261.46],
118
+ [ 208.75, 261.46],
119
+ [ 208.23, 260.94],
120
+ [ 206.66, 260.94],
121
+ [ 206.14, 260.42],
122
+ [ 203.53, 260.42],
123
+ [ 203.01, 259.89],
124
+ [ 198.31, 259.89],
125
+ [ 197.79, 259.37],
126
+ [ 192.05, 259.37],
127
+ [ 191.53, 258.85],
128
+ [ 187.88, 258.85],
129
+ [ 187.35, 258.33],
130
+ [ 185.79, 258.33],
131
+ [ 185.27, 257.81],
132
+ [ 183.7, 257.81],
133
+ [ 183.18, 257.28],
134
+ [ 180.57, 257.28],
135
+ [ 180.05, 256.76],
136
+ [ 174.83, 256.76],
137
+ [ 174.31, 256.24],
138
+ [ 165.96, 256.24],
139
+ [ 165.43, 255.72],
140
+ [ 158.13, 255.72],
141
+ [ 157.61, 255.2],
142
+ [ 152.39, 255.2],
143
+ [ 151.87, 254.68],
144
+ [ 130.99, 254.68],
145
+ [ 130.47, 255.2],
146
+ [ 129.95, 254.68],
147
+ [ 125.25, 254.68],
148
+ [ 124.73, 254.15],
149
+ [ 123.16, 254.15],
150
+ [ 122.64, 253.63],
151
+ [ 122.12, 253.63],
152
+ [ 121.6, 253.11],
153
+ [ 120.55, 253.11],
154
+ [ 120.03, 252.59],
155
+ [ 119.51, 252.59],
156
+ [ 118.99, 252.07],
157
+ [ 118.47, 252.07],
158
+ [ 117.94, 251.54],
159
+ [ 117.42, 251.54],
160
+ [ 115.86, 249.98],
161
+ [ 115.33, 249.98],
162
+ [ 114.81, 249.46],
163
+ [ 114.81, 247.37]]
164
+ )
draft_computation/draft_computation_app/dummy_test/test2_app.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import sys
3
+ import os
4
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..')))
5
+
6
+ import numpy as np
7
+ import cv2
8
+ from draft_computation_app import calculate_draft
9
+ from draft_computation_app.dummy_data import results2 as results
10
+
11
+ def test_calculate_draft():
12
+ pose_results = results.pose
13
+ segment_data = results.segment
14
+
15
+ # Load the test image
16
+ image_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "dummy_data", "test2.jpg")
17
+ original_image = cv2.imread(image_path)
18
+
19
+ # Set the ocr placeholder to return a specific value for the test
20
+ from draft_computation_app import ocr_placeholder
21
+ ocr_placeholder.perform_ocr = lambda image: "12m"
22
+
23
+ draft = calculate_draft(pose_results, segment_data, original_image)
24
+
25
+ print(f"The calculated draft is: {draft}")
26
+
27
+ # Add an assertion to check if the result is within a reasonable range
28
+ # This expected value is just a placeholder and should be adjusted
29
+ # based on the actual expected output.
30
+ expected_draft = 11.41
31
+ assert abs(draft - expected_draft) < 0.1
32
+
33
+ if __name__ == "__main__":
34
+ test_calculate_draft()
draft_computation/draft_computation_app/dummy_test/test_app.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import sys
3
+ import os
4
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..')))
5
+
6
+ import numpy as np
7
+ import cv2
8
+ from draft_computation_app import calculate_draft
9
+ from draft_computation_app.dummy_data import results
10
+
11
+ def test_calculate_draft():
12
+ pose_results = results.pose
13
+ segment_data = results.segment
14
+
15
+ # Load the test image
16
+ image_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "dummy_data", "test.jpg")
17
+ original_image = cv2.imread(image_path)
18
+
19
+ # Set the ocr placeholder to return a specific value for the test
20
+ from draft_computation_app import ocr_placeholder
21
+ ocr_placeholder.perform_ocr = lambda image: "7m"
22
+
23
+ draft = calculate_draft(pose_results, segment_data, original_image)
24
+
25
+ print(f"The calculated draft is: {draft}")
26
+
27
+ # Add an assertion to check if the result is within a reasonable range
28
+ # This expected value is just a placeholder and should be adjusted
29
+ # based on the actual expected output.
30
+ expected_draft = 6.15
31
+ assert abs(draft - expected_draft) < 0.1
32
+
33
+ if __name__ == "__main__":
34
+ test_calculate_draft()
draft_computation/draft_computation_app/main.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import sys
3
+ import os
4
+ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
5
+
6
+ import numpy as np
7
+ import cv2
8
+ from draft_computation_app import calculate_draft
9
+
10
+ def create_dummy_data():
11
+ """
12
+ Creates dummy data for testing.
13
+ """
14
+ pose_results = np.array([
15
+ [[100, 200, 1], [100, 220, 1], [100, 240, 1], [100, 260, 1], [100, 280, 1]],
16
+ [[200, 300, 1], [200, 320, 1], [200, 340, 1], [200, 360, 1], [200, 380, 1]],
17
+ ])
18
+
19
+ segment_mask = np.zeros((500, 500), dtype=np.uint8)
20
+ segment_mask[400:, :] = 1 # Water line at y=400
21
+
22
+ original_image = np.zeros((500, 500, 3), dtype=np.uint8)
23
+
24
+ return pose_results, segment_mask, original_image
25
+
26
+ if __name__ == "__main__":
27
+ pose_results, segment_mask, original_image = create_dummy_data()
28
+ draft = calculate_draft(pose_results, segment_mask, original_image)
29
+ print(f"The draft is: {draft} meters")
draft_computation/draft_computation_app/ocr_placeholder.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ocr.inference import OCRInference
2
+ import os
3
+
4
+ # Determine the absolute path to the model
5
+ # Assuming the script is run from the root of draft_computation
6
+ # or that the ocr_placeholder.py is called in a way that its parent directory is in sys.path
7
+ # For direct import from draft_computation_app, the path needs to be relative to the project root.
8
+ # The model path in ocr/inference.py is "./ocr_model_output/checkpoint-441"
9
+ # Relative to draft_computation_app, this would be "../ocr/ocr_model_output/checkpoint-441"
10
+ # Let's make it absolute for robustness.
11
+ # Get the directory of the current script (ocr_placeholder.py)
12
+ current_script_dir = os.path.dirname(os.path.abspath(__file__))
13
+ # Navigate up to the project root (c:\Users\dev-n\OneDrive\Desktop\draft_computation)
14
+ project_root = os.path.abspath(os.path.join(current_script_dir, ".."))
15
+ # Construct the absolute path to the OCR model
16
+ OCR_MODEL_PATH = os.path.join(project_root, "ocr", "ocr_model_output", "checkpoint-441")
17
+
18
+ print(f"OCR Model Path: {OCR_MODEL_PATH}")
19
+ # Initialize the OCRInference engine globally or as a singleton if preferred
20
+ # For simplicity, initializing here. Consider lazy loading or a proper singleton pattern for production.
21
+ ocr_engine = OCRInference(model_path=OCR_MODEL_PATH)
22
+
23
+ def perform_ocr(image_input):
24
+ """
25
+ Performs OCR using the integrated OCRInference engine.
26
+ Args:
27
+ image_input: Path to the image file or a NumPy array representing the image.
28
+ Returns:
29
+ The predicted text from the image.
30
+ """
31
+ return ocr_engine.perform_inference(image_input)
draft_computation/draft_computation_app/utils.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+
3
+ def calc_distance(a, b=None, c=None):
4
+ if b is None and c is None:
5
+ raise ValueError("At least one of 'b' or 'c' must be provided.")
6
+
7
+ distances = []
8
+ if b is not None:
9
+ distances.append(np.linalg.norm(np.array(a) - np.array(b)))
10
+ if c is not None:
11
+ distances.append(np.linalg.norm(np.array(a) - np.array(c)))
12
+
13
+ return np.mean(distances)
14
+
15
+ def calc_sqr_size(cm_size, twenty_cm_in_pixels):
16
+ return (cm_size / 20) * twenty_cm_in_pixels
17
+
18
+ def distance_point_to_segment(point, segment_start, segment_end):
19
+ point = np.array(point)
20
+ segment_start = np.array(segment_start)
21
+ segment_end = np.array(segment_end)
22
+
23
+ # Vector from segment_start to segment_end
24
+ segment_vector = segment_end - segment_start
25
+
26
+ # Vector from segment_start to point
27
+ point_vector = point - segment_start
28
+
29
+ # Project point_vector onto segment_vector
30
+ segment_len_sq = np.dot(segment_vector, segment_vector)
31
+ if segment_len_sq == 0: # Segment is a point
32
+ return np.linalg.norm(point - segment_start)
33
+
34
+ t = np.dot(point_vector, segment_vector) / segment_len_sq
35
+ t = max(0, min(1, t)) # Clamp t to [0, 1]
36
+
37
+ # Closest point on the line segment
38
+ closest_point = segment_start + t * segment_vector
39
+
40
+ return np.linalg.norm(point - closest_point)
draft_computation/endpoint.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .draft_computation_app import calculate_draft
2
+ import os
3
+ import cv2
4
+
5
+ from ultralytics import YOLO
6
+
7
+ model1 = YOLO("./draft_computation/models/pose.pt")
8
+ model2 = YOLO("./draft_computation/models/seg.pt")
9
+
10
+ def combine_plots(original_image, plot1, plot2):
11
+ combined_image_1 = cv2.addWeighted(original_image, 0.7, plot1, 0.3, 0)
12
+ final_image = cv2.addWeighted(combined_image_1, 0.7, plot2, 0.3, 0)
13
+ return final_image
14
+
15
+ def run(img_path_or_array):
16
+
17
+ # image_filename = "create_ocr_dataset/images/IMG_0044_01_jpg.rf.c4e4413436401ee76e86bd92e736b908.jpg"
18
+
19
+ results1 = model1(img_path_or_array)
20
+ results2 = model2(img_path_or_array, conf=0.15)
21
+
22
+ try:
23
+ pose_results = results1[0].keypoints.data
24
+ segment_results = results2[0].masks.xy
25
+
26
+ draft, mid_results = calculate_draft(pose_results, segment_results, results1[0].orig_img)
27
+ print(draft)
28
+ except Exception as e:
29
+ print(f"Error processing image: {e}")
30
+ return
31
+
32
+ output = {
33
+ "draft": draft,
34
+ "pose_results": pose_results,
35
+ "segment_results": segment_results,
36
+ "original_image": results1[0].orig_img,
37
+ "pose_image_result": results1[0].plot(),
38
+ "segment_image_result": results2[0].plot(),
39
+ }
40
+ output['final_image_result'] = combine_plots(output['original_image'],
41
+ output['pose_image_result'],
42
+ output['segment_image_result']
43
+ )
44
+ output.update(mid_results)
45
+ return output
46
+
47
+ if __name__ == "__main__":
48
+ print(run("create_ocr_dataset/images/IMG_0044_01_jpg.rf.c4e4413436401ee76e86bd92e736b908.jpg"))
49
+
draft_computation/models/pose.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ddeff68ebdcb7399594c7b9527bee099ecd8f62f3ac068af1d98cd628db102e
3
+ size 5697217
draft_computation/models/seg.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c09bf2df17498bf258f26584fa5442ecad621a9f187e3472b8bf43cf83df2db6
3
+ size 6052445
draft_computation/ocr/0.26.0 ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Collecting accelerate
2
+ Downloading accelerate-1.10.1-py3-none-any.whl.metadata (19 kB)
3
+ Requirement already satisfied: numpy<3.0.0,>=1.17 in c:\users\dev-n\miniconda3\envs\dji-ml\lib\site-packages (from accelerate) (2.2.6)
4
+ Requirement already satisfied: packaging>=20.0 in c:\users\dev-n\miniconda3\envs\dji-ml\lib\site-packages (from accelerate) (25.0)
5
+ Requirement already satisfied: psutil in c:\users\dev-n\miniconda3\envs\dji-ml\lib\site-packages (from accelerate) (7.0.0)
6
+ Requirement already satisfied: pyyaml in c:\users\dev-n\miniconda3\envs\dji-ml\lib\site-packages (from accelerate) (6.0.2)
7
+ Requirement already satisfied: torch>=2.0.0 in c:\users\dev-n\miniconda3\envs\dji-ml\lib\site-packages (from accelerate) (2.8.0)
8
+ Requirement already satisfied: huggingface_hub>=0.21.0 in c:\users\dev-n\miniconda3\envs\dji-ml\lib\site-packages (from accelerate) (0.34.4)
9
+ Requirement already satisfied: safetensors>=0.4.3 in c:\users\dev-n\miniconda3\envs\dji-ml\lib\site-packages (from accelerate) (0.6.2)
10
+ Requirement already satisfied: filelock in c:\users\dev-n\miniconda3\envs\dji-ml\lib\site-packages (from huggingface_hub>=0.21.0->accelerate) (3.19.1)
11
+ Requirement already satisfied: fsspec>=2023.5.0 in c:\users\dev-n\miniconda3\envs\dji-ml\lib\site-packages (from huggingface_hub>=0.21.0->accelerate) (2025.7.0)
12
+ Requirement already satisfied: requests in c:\users\dev-n\miniconda3\envs\dji-ml\lib\site-packages (from huggingface_hub>=0.21.0->accelerate) (2.32.5)
13
+ Requirement already satisfied: tqdm>=4.42.1 in c:\users\dev-n\miniconda3\envs\dji-ml\lib\site-packages (from huggingface_hub>=0.21.0->accelerate) (4.67.1)
14
+ Requirement already satisfied: typing-extensions>=3.7.4.3 in c:\users\dev-n\miniconda3\envs\dji-ml\lib\site-packages (from huggingface_hub>=0.21.0->accelerate) (4.14.1)
15
+ Requirement already satisfied: sympy>=1.13.3 in c:\users\dev-n\miniconda3\envs\dji-ml\lib\site-packages (from torch>=2.0.0->accelerate) (1.14.0)
16
+ Requirement already satisfied: networkx in c:\users\dev-n\miniconda3\envs\dji-ml\lib\site-packages (from torch>=2.0.0->accelerate) (3.5)
17
+ Requirement already satisfied: jinja2 in c:\users\dev-n\miniconda3\envs\dji-ml\lib\site-packages (from torch>=2.0.0->accelerate) (3.1.6)
18
+ Requirement already satisfied: mpmath<1.4,>=1.1.0 in c:\users\dev-n\miniconda3\envs\dji-ml\lib\site-packages (from sympy>=1.13.3->torch>=2.0.0->accelerate) (1.3.0)
19
+ Requirement already satisfied: colorama in c:\users\dev-n\miniconda3\envs\dji-ml\lib\site-packages (from tqdm>=4.42.1->huggingface_hub>=0.21.0->accelerate) (0.4.6)
20
+ Requirement already satisfied: MarkupSafe>=2.0 in c:\users\dev-n\miniconda3\envs\dji-ml\lib\site-packages (from jinja2->torch>=2.0.0->accelerate) (3.0.2)
21
+ Requirement already satisfied: charset_normalizer<4,>=2 in c:\users\dev-n\miniconda3\envs\dji-ml\lib\site-packages (from requests->huggingface_hub>=0.21.0->accelerate) (3.4.3)
22
+ Requirement already satisfied: idna<4,>=2.5 in c:\users\dev-n\miniconda3\envs\dji-ml\lib\site-packages (from requests->huggingface_hub>=0.21.0->accelerate) (3.10)
23
+ Requirement already satisfied: urllib3<3,>=1.21.1 in c:\users\dev-n\miniconda3\envs\dji-ml\lib\site-packages (from requests->huggingface_hub>=0.21.0->accelerate) (2.5.0)
24
+ Requirement already satisfied: certifi>=2017.4.17 in c:\users\dev-n\miniconda3\envs\dji-ml\lib\site-packages (from requests->huggingface_hub>=0.21.0->accelerate) (2025.8.3)
25
+ Downloading accelerate-1.10.1-py3-none-any.whl (374 kB)
26
+ Installing collected packages: accelerate
27
+ Successfully installed accelerate-1.10.1
draft_computation/ocr/inference.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from PIL import Image
3
+ from transformers import VisionEncoderDecoderModel, AutoImageProcessor, AutoTokenizer
4
+ import os
5
+ import numpy as np
6
+ from typing import Union
7
+
8
+ # --- Configuration ---
9
+ MODEL_PATH = "./ocr_model_output/checkpoint-441"
10
+
11
+ class OCRInference:
12
+ """A class to perform OCR inference using a trained model."""
13
+
14
+ def __init__(self, model_path: str, encoder_id: str = "google/vit-base-patch16-224-in21k", decoder_id: str = "prajjwal1/bert-tiny"):
15
+ """
16
+ Initializes the OCRInference class by loading the model, image processor, and tokenizer.
17
+
18
+ Args:
19
+ model_path (str): The path to the trained model checkpoint.
20
+ encoder_id (str): The encoder ID to load the image processor from.
21
+ decoder_id (str): The decoder ID to load the tokenizer from.
22
+ """
23
+ print(f"Loading model from: {model_path}")
24
+ self.model = VisionEncoderDecoderModel.from_pretrained(model_path)
25
+
26
+ # Load image processor and save it if not present
27
+ try:
28
+ self.image_processor = AutoImageProcessor.from_pretrained(model_path)
29
+ except OSError:
30
+ print("Image processor not found locally. Loading from encoder ID and saving.")
31
+ self.image_processor = AutoImageProcessor.from_pretrained(encoder_id)
32
+ self.image_processor.save_pretrained(model_path)
33
+
34
+ # Load tokenizer and save it if not present
35
+ try:
36
+ self.tokenizer = AutoTokenizer.from_pretrained(model_path)
37
+ except (KeyError, OSError):
38
+ print("Tokenizer not found locally. Loading from decoder ID and saving.")
39
+ self.tokenizer = AutoTokenizer.from_pretrained(decoder_id)
40
+ self.tokenizer.save_pretrained(model_path)
41
+
42
+ # --- Set special tokens and generation parameters ---
43
+ self.model.config.decoder_start_token_id = self.tokenizer.cls_token_id
44
+ self.model.config.pad_token_id = self.tokenizer.pad_token_id
45
+ self.model.config.vocab_size = self.tokenizer.vocab_size
46
+
47
+ self.model.config.eos_token_id = self.tokenizer.sep_token_id
48
+ self.model.config.max_length = 64
49
+ self.model.config.early_stopping = True
50
+ self.model.config.no_repeat_ngram_size = 3
51
+ self.model.config.length_penalty = 2.0
52
+ self.model.config.num_beams = 4
53
+
54
+ print("Model, image processor, and tokenizer loaded.")
55
+
56
+ def perform_inference(self, image_input: Union[str, np.ndarray]) -> str:
57
+ """
58
+ Performs inference on a single image, which can be a file path or a NumPy array.
59
+
60
+ Args:
61
+ image_input (Union[str, np.ndarray]): Path to the input image or a NumPy array representing the image.
62
+
63
+ Returns:
64
+ str: The predicted text.
65
+ """
66
+ if isinstance(image_input, str):
67
+ if not os.path.exists(image_input):
68
+ raise FileNotFoundError(f"Image file not found at: {image_input}")
69
+ image = Image.open(image_input).convert("RGB")
70
+ elif isinstance(image_input, np.ndarray):
71
+ image = Image.fromarray(image_input).convert("RGB")
72
+ else:
73
+ raise TypeError("image_input must be a file path (str) or a NumPy array.")
74
+
75
+ # Process the image
76
+ pixel_values = self.image_processor(images=image, return_tensors="pt").pixel_values
77
+
78
+ # Generate text
79
+ with torch.no_grad():
80
+ output_ids = self.model.generate(pixel_values, max_length=64, num_beams=4, early_stopping=True)
81
+
82
+ # Decode the generated ids to text
83
+ preds = self.tokenizer.decode(output_ids[0], skip_special_tokens=True)
84
+ return preds
85
+
86
+ if __name__ == '__main__':
87
+ # Provide a path to an image for inference
88
+ # Using an example image from the dataset
89
+ image_path = "../ai_augment_output/20250901_115123_336458_ccd9d646-fc99-4d27-8076-0c17d0dba784.png"
90
+
91
+ # --- Initialize the Inference Class ---
92
+ ocr_engine = OCRInference(model_path=MODEL_PATH)
93
+
94
+ # --- Perform Inference from a file path ---
95
+ try:
96
+ predicted_text = ocr_engine.perform_inference(image_path)
97
+ print(f"\n--- Inference from file path ---")
98
+ print(f"Image: {image_path}")
99
+ print(f"Predicted Text: {predicted_text}")
100
+ except FileNotFoundError as e:
101
+ print(e)
102
+ print("Please update the 'image_path' variable in the script with a valid image path.")
103
+
104
+ # --- Perform Inference from a NumPy array (example) ---
105
+ try:
106
+ # Create a dummy numpy array for demonstration
107
+ if os.path.exists(image_path):
108
+ dummy_image_array = np.array(Image.open(image_path))
109
+ predicted_text_from_array = ocr_engine.perform_inference(dummy_image_array)
110
+ print(f"\n--- Inference from NumPy array ---")
111
+ print(f"Predicted Text: {predicted_text_from_array}")
112
+ except Exception as e:
113
+ print(f"An error occurred during inference from NumPy array: {e}")
draft_computation/ocr/ocr_model_output/checkpoint-441/config.json ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "VisionEncoderDecoderModel"
4
+ ],
5
+ "decoder": {
6
+ "_name_or_path": "prajjwal1/bert-tiny",
7
+ "add_cross_attention": true,
8
+ "attention_probs_dropout_prob": 0.1,
9
+ "classifier_dropout": null,
10
+ "dtype": "float32",
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 128,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 512,
16
+ "is_decoder": true,
17
+ "layer_norm_eps": 1e-12,
18
+ "max_position_embeddings": 512,
19
+ "model_type": "bert",
20
+ "num_attention_heads": 2,
21
+ "num_hidden_layers": 2,
22
+ "position_embedding_type": "absolute",
23
+ "type_vocab_size": 2,
24
+ "use_cache": true,
25
+ "vocab_size": 30522
26
+ },
27
+ "decoder_start_token_id": 101,
28
+ "dtype": "float32",
29
+ "early_stopping": null,
30
+ "encoder": {
31
+ "_name_or_path": "google/vit-base-patch16-224-in21k",
32
+ "architectures": [
33
+ "ViTModel"
34
+ ],
35
+ "attention_probs_dropout_prob": 0.0,
36
+ "dtype": "float32",
37
+ "encoder_stride": 16,
38
+ "hidden_act": "gelu",
39
+ "hidden_dropout_prob": 0.0,
40
+ "hidden_size": 768,
41
+ "image_size": 224,
42
+ "initializer_range": 0.02,
43
+ "intermediate_size": 3072,
44
+ "layer_norm_eps": 1e-12,
45
+ "model_type": "vit",
46
+ "num_attention_heads": 12,
47
+ "num_channels": 3,
48
+ "num_hidden_layers": 12,
49
+ "patch_size": 16,
50
+ "pooler_act": "tanh",
51
+ "pooler_output_size": 768,
52
+ "qkv_bias": true
53
+ },
54
+ "eos_token_id": 102,
55
+ "is_encoder_decoder": true,
56
+ "length_penalty": null,
57
+ "max_length": null,
58
+ "model_type": "vision-encoder-decoder",
59
+ "no_repeat_ngram_size": null,
60
+ "num_beams": null,
61
+ "pad_token_id": 0,
62
+ "tie_word_embeddings": false,
63
+ "transformers_version": "4.56.0",
64
+ "vocab_size": 30522
65
+ }
draft_computation/ocr/ocr_model_output/checkpoint-441/generation_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "early_stopping": true,
4
+ "length_penalty": 2.0,
5
+ "max_length": 64,
6
+ "no_repeat_ngram_size": 3,
7
+ "num_beams": 4,
8
+ "pad_token_id": 0,
9
+ "transformers_version": "4.56.0"
10
+ }
draft_computation/ocr/ocr_model_output/checkpoint-441/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03e0036a9723a25ce80e9d3194ccc557b38644b508aa539aa17fcf1db56abde8
3
+ size 364179888
draft_computation/ocr/ocr_model_output/checkpoint-441/preprocessor_config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_convert_rgb": null,
3
+ "do_normalize": true,
4
+ "do_rescale": true,
5
+ "do_resize": true,
6
+ "image_mean": [
7
+ 0.5,
8
+ 0.5,
9
+ 0.5
10
+ ],
11
+ "image_processor_type": "ViTImageProcessor",
12
+ "image_std": [
13
+ 0.5,
14
+ 0.5,
15
+ 0.5
16
+ ],
17
+ "resample": 2,
18
+ "rescale_factor": 0.00392156862745098,
19
+ "size": {
20
+ "height": 224,
21
+ "width": 224
22
+ }
23
+ }
draft_computation/ocr/ocr_model_output/checkpoint-441/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b51fe74d1587712e04878e1f3aaa0d29dc3d63c924734ddb29bd9d7ceb8928f
3
+ size 14645
draft_computation/ocr/ocr_model_output/checkpoint-441/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05d65119eb0fddac420e99f8c71db31930fb82a064e4c445ec9ba3b643474893
3
+ size 1465
draft_computation/ocr/ocr_model_output/checkpoint-441/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
draft_computation/ocr/ocr_model_output/checkpoint-441/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
draft_computation/ocr/ocr_model_output/checkpoint-441/tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": true,
48
+ "extra_special_tokens": {},
49
+ "mask_token": "[MASK]",
50
+ "model_max_length": 1000000000000000019884624838656,
51
+ "never_split": null,
52
+ "pad_token": "[PAD]",
53
+ "sep_token": "[SEP]",
54
+ "strip_accents": null,
55
+ "tokenize_chinese_chars": true,
56
+ "tokenizer_class": "BertTokenizer",
57
+ "unk_token": "[UNK]"
58
+ }
draft_computation/ocr/ocr_model_output/checkpoint-441/trainer_state.json ADDED
@@ -0,0 +1,2828 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 2793,
3
+ "best_metric": 0.2159090909090909,
4
+ "best_model_checkpoint": "./ocr_model_output/checkpoint-2793",
5
+ "epoch": 25.0,
6
+ "eval_steps": 500,
7
+ "global_step": 3675,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.06802721088435375,
14
+ "grad_norm": 65.23433685302734,
15
+ "learning_rate": 4.9877551020408165e-05,
16
+ "loss": 8.6208,
17
+ "step": 10
18
+ },
19
+ {
20
+ "epoch": 0.1360544217687075,
21
+ "grad_norm": 70.44102478027344,
22
+ "learning_rate": 4.974149659863946e-05,
23
+ "loss": 4.5903,
24
+ "step": 20
25
+ },
26
+ {
27
+ "epoch": 0.20408163265306123,
28
+ "grad_norm": 35.348358154296875,
29
+ "learning_rate": 4.960544217687075e-05,
30
+ "loss": 1.9034,
31
+ "step": 30
32
+ },
33
+ {
34
+ "epoch": 0.272108843537415,
35
+ "grad_norm": 22.223546981811523,
36
+ "learning_rate": 4.9469387755102045e-05,
37
+ "loss": 1.3092,
38
+ "step": 40
39
+ },
40
+ {
41
+ "epoch": 0.3401360544217687,
42
+ "grad_norm": 35.46092987060547,
43
+ "learning_rate": 4.933333333333334e-05,
44
+ "loss": 1.0594,
45
+ "step": 50
46
+ },
47
+ {
48
+ "epoch": 0.40816326530612246,
49
+ "grad_norm": 22.021345138549805,
50
+ "learning_rate": 4.9197278911564624e-05,
51
+ "loss": 1.0721,
52
+ "step": 60
53
+ },
54
+ {
55
+ "epoch": 0.47619047619047616,
56
+ "grad_norm": 16.65888214111328,
57
+ "learning_rate": 4.9061224489795924e-05,
58
+ "loss": 0.825,
59
+ "step": 70
60
+ },
61
+ {
62
+ "epoch": 0.54421768707483,
63
+ "grad_norm": 13.323294639587402,
64
+ "learning_rate": 4.892517006802722e-05,
65
+ "loss": 0.8588,
66
+ "step": 80
67
+ },
68
+ {
69
+ "epoch": 0.6122448979591837,
70
+ "grad_norm": 16.98369598388672,
71
+ "learning_rate": 4.87891156462585e-05,
72
+ "loss": 0.6796,
73
+ "step": 90
74
+ },
75
+ {
76
+ "epoch": 0.6802721088435374,
77
+ "grad_norm": 14.879609107971191,
78
+ "learning_rate": 4.8653061224489796e-05,
79
+ "loss": 0.715,
80
+ "step": 100
81
+ },
82
+ {
83
+ "epoch": 0.7482993197278912,
84
+ "grad_norm": 13.820905685424805,
85
+ "learning_rate": 4.8517006802721096e-05,
86
+ "loss": 0.6773,
87
+ "step": 110
88
+ },
89
+ {
90
+ "epoch": 0.8163265306122449,
91
+ "grad_norm": 14.802565574645996,
92
+ "learning_rate": 4.838095238095238e-05,
93
+ "loss": 0.6992,
94
+ "step": 120
95
+ },
96
+ {
97
+ "epoch": 0.8843537414965986,
98
+ "grad_norm": 15.70506763458252,
99
+ "learning_rate": 4.8244897959183675e-05,
100
+ "loss": 0.7254,
101
+ "step": 130
102
+ },
103
+ {
104
+ "epoch": 0.9523809523809523,
105
+ "grad_norm": 16.910625457763672,
106
+ "learning_rate": 4.810884353741497e-05,
107
+ "loss": 0.8015,
108
+ "step": 140
109
+ },
110
+ {
111
+ "epoch": 1.0,
112
+ "eval_cer": 0.8118686868686869,
113
+ "eval_loss": 0.8795642256736755,
114
+ "eval_runtime": 3.4405,
115
+ "eval_samples_per_second": 85.161,
116
+ "eval_steps_per_second": 42.726,
117
+ "step": 147
118
+ },
119
+ {
120
+ "epoch": 1.0204081632653061,
121
+ "grad_norm": 12.733670234680176,
122
+ "learning_rate": 4.797278911564626e-05,
123
+ "loss": 0.6511,
124
+ "step": 150
125
+ },
126
+ {
127
+ "epoch": 1.08843537414966,
128
+ "grad_norm": 13.70749568939209,
129
+ "learning_rate": 4.7836734693877554e-05,
130
+ "loss": 0.7377,
131
+ "step": 160
132
+ },
133
+ {
134
+ "epoch": 1.1564625850340136,
135
+ "grad_norm": 15.44306468963623,
136
+ "learning_rate": 4.770068027210885e-05,
137
+ "loss": 0.6797,
138
+ "step": 170
139
+ },
140
+ {
141
+ "epoch": 1.2244897959183674,
142
+ "grad_norm": 19.31881332397461,
143
+ "learning_rate": 4.756462585034014e-05,
144
+ "loss": 0.6719,
145
+ "step": 180
146
+ },
147
+ {
148
+ "epoch": 1.2925170068027212,
149
+ "grad_norm": 12.049680709838867,
150
+ "learning_rate": 4.742857142857143e-05,
151
+ "loss": 0.6491,
152
+ "step": 190
153
+ },
154
+ {
155
+ "epoch": 1.3605442176870748,
156
+ "grad_norm": 14.28982925415039,
157
+ "learning_rate": 4.729251700680272e-05,
158
+ "loss": 0.7678,
159
+ "step": 200
160
+ },
161
+ {
162
+ "epoch": 1.4285714285714286,
163
+ "grad_norm": 25.51521110534668,
164
+ "learning_rate": 4.715646258503402e-05,
165
+ "loss": 0.7534,
166
+ "step": 210
167
+ },
168
+ {
169
+ "epoch": 1.4965986394557822,
170
+ "grad_norm": 14.158224105834961,
171
+ "learning_rate": 4.7020408163265306e-05,
172
+ "loss": 0.5653,
173
+ "step": 220
174
+ },
175
+ {
176
+ "epoch": 1.564625850340136,
177
+ "grad_norm": 10.563309669494629,
178
+ "learning_rate": 4.68843537414966e-05,
179
+ "loss": 0.6038,
180
+ "step": 230
181
+ },
182
+ {
183
+ "epoch": 1.6326530612244898,
184
+ "grad_norm": 14.436025619506836,
185
+ "learning_rate": 4.67482993197279e-05,
186
+ "loss": 0.5794,
187
+ "step": 240
188
+ },
189
+ {
190
+ "epoch": 1.7006802721088436,
191
+ "grad_norm": 14.293322563171387,
192
+ "learning_rate": 4.6612244897959185e-05,
193
+ "loss": 0.6102,
194
+ "step": 250
195
+ },
196
+ {
197
+ "epoch": 1.7687074829931972,
198
+ "grad_norm": 6.588962078094482,
199
+ "learning_rate": 4.647619047619048e-05,
200
+ "loss": 0.6853,
201
+ "step": 260
202
+ },
203
+ {
204
+ "epoch": 1.836734693877551,
205
+ "grad_norm": 16.505081176757812,
206
+ "learning_rate": 4.634013605442177e-05,
207
+ "loss": 0.554,
208
+ "step": 270
209
+ },
210
+ {
211
+ "epoch": 1.9047619047619047,
212
+ "grad_norm": 16.433975219726562,
213
+ "learning_rate": 4.6204081632653064e-05,
214
+ "loss": 0.5481,
215
+ "step": 280
216
+ },
217
+ {
218
+ "epoch": 1.9727891156462585,
219
+ "grad_norm": 14.68355655670166,
220
+ "learning_rate": 4.606802721088436e-05,
221
+ "loss": 0.4728,
222
+ "step": 290
223
+ },
224
+ {
225
+ "epoch": 2.0,
226
+ "eval_cer": 0.571969696969697,
227
+ "eval_loss": 0.6962071061134338,
228
+ "eval_runtime": 3.8738,
229
+ "eval_samples_per_second": 75.637,
230
+ "eval_steps_per_second": 37.948,
231
+ "step": 294
232
+ },
233
+ {
234
+ "epoch": 2.0408163265306123,
235
+ "grad_norm": 9.795095443725586,
236
+ "learning_rate": 4.593197278911564e-05,
237
+ "loss": 0.4635,
238
+ "step": 300
239
+ },
240
+ {
241
+ "epoch": 2.108843537414966,
242
+ "grad_norm": 17.107149124145508,
243
+ "learning_rate": 4.579591836734694e-05,
244
+ "loss": 0.4794,
245
+ "step": 310
246
+ },
247
+ {
248
+ "epoch": 2.17687074829932,
249
+ "grad_norm": 11.940792083740234,
250
+ "learning_rate": 4.5659863945578236e-05,
251
+ "loss": 0.677,
252
+ "step": 320
253
+ },
254
+ {
255
+ "epoch": 2.2448979591836733,
256
+ "grad_norm": 8.351872444152832,
257
+ "learning_rate": 4.552380952380952e-05,
258
+ "loss": 0.6732,
259
+ "step": 330
260
+ },
261
+ {
262
+ "epoch": 2.312925170068027,
263
+ "grad_norm": 2.6622140407562256,
264
+ "learning_rate": 4.538775510204082e-05,
265
+ "loss": 0.3907,
266
+ "step": 340
267
+ },
268
+ {
269
+ "epoch": 2.380952380952381,
270
+ "grad_norm": 10.555298805236816,
271
+ "learning_rate": 4.5251700680272115e-05,
272
+ "loss": 0.5443,
273
+ "step": 350
274
+ },
275
+ {
276
+ "epoch": 2.4489795918367347,
277
+ "grad_norm": 7.857567310333252,
278
+ "learning_rate": 4.51156462585034e-05,
279
+ "loss": 0.5688,
280
+ "step": 360
281
+ },
282
+ {
283
+ "epoch": 2.5170068027210886,
284
+ "grad_norm": 15.215005874633789,
285
+ "learning_rate": 4.4979591836734694e-05,
286
+ "loss": 0.5548,
287
+ "step": 370
288
+ },
289
+ {
290
+ "epoch": 2.5850340136054424,
291
+ "grad_norm": 8.438752174377441,
292
+ "learning_rate": 4.484353741496599e-05,
293
+ "loss": 0.3828,
294
+ "step": 380
295
+ },
296
+ {
297
+ "epoch": 2.6530612244897958,
298
+ "grad_norm": 9.824787139892578,
299
+ "learning_rate": 4.470748299319728e-05,
300
+ "loss": 0.3945,
301
+ "step": 390
302
+ },
303
+ {
304
+ "epoch": 2.7210884353741496,
305
+ "grad_norm": 13.708135604858398,
306
+ "learning_rate": 4.4571428571428574e-05,
307
+ "loss": 0.3751,
308
+ "step": 400
309
+ },
310
+ {
311
+ "epoch": 2.7891156462585034,
312
+ "grad_norm": 10.32359790802002,
313
+ "learning_rate": 4.4435374149659867e-05,
314
+ "loss": 0.4604,
315
+ "step": 410
316
+ },
317
+ {
318
+ "epoch": 2.857142857142857,
319
+ "grad_norm": 4.256906509399414,
320
+ "learning_rate": 4.429931972789116e-05,
321
+ "loss": 0.4558,
322
+ "step": 420
323
+ },
324
+ {
325
+ "epoch": 2.925170068027211,
326
+ "grad_norm": 12.131647109985352,
327
+ "learning_rate": 4.416326530612245e-05,
328
+ "loss": 0.4292,
329
+ "step": 430
330
+ },
331
+ {
332
+ "epoch": 2.9931972789115644,
333
+ "grad_norm": 14.156941413879395,
334
+ "learning_rate": 4.4027210884353746e-05,
335
+ "loss": 0.3713,
336
+ "step": 440
337
+ },
338
+ {
339
+ "epoch": 3.0,
340
+ "eval_cer": 0.5454545454545454,
341
+ "eval_loss": 0.5351251363754272,
342
+ "eval_runtime": 3.9015,
343
+ "eval_samples_per_second": 75.099,
344
+ "eval_steps_per_second": 37.678,
345
+ "step": 441
346
+ },
347
+ {
348
+ "epoch": 3.061224489795918,
349
+ "grad_norm": 5.429965496063232,
350
+ "learning_rate": 4.389115646258504e-05,
351
+ "loss": 0.6159,
352
+ "step": 450
353
+ },
354
+ {
355
+ "epoch": 3.129251700680272,
356
+ "grad_norm": 11.695433616638184,
357
+ "learning_rate": 4.3755102040816325e-05,
358
+ "loss": 0.4156,
359
+ "step": 460
360
+ },
361
+ {
362
+ "epoch": 3.197278911564626,
363
+ "grad_norm": 14.333072662353516,
364
+ "learning_rate": 4.361904761904762e-05,
365
+ "loss": 0.4372,
366
+ "step": 470
367
+ },
368
+ {
369
+ "epoch": 3.2653061224489797,
370
+ "grad_norm": 14.760481834411621,
371
+ "learning_rate": 4.348299319727892e-05,
372
+ "loss": 0.4279,
373
+ "step": 480
374
+ },
375
+ {
376
+ "epoch": 3.3333333333333335,
377
+ "grad_norm": 12.052332878112793,
378
+ "learning_rate": 4.3346938775510204e-05,
379
+ "loss": 0.3854,
380
+ "step": 490
381
+ },
382
+ {
383
+ "epoch": 3.4013605442176873,
384
+ "grad_norm": 14.331747055053711,
385
+ "learning_rate": 4.32108843537415e-05,
386
+ "loss": 0.3839,
387
+ "step": 500
388
+ },
389
+ {
390
+ "epoch": 3.4693877551020407,
391
+ "grad_norm": 21.911863327026367,
392
+ "learning_rate": 4.307482993197279e-05,
393
+ "loss": 0.4815,
394
+ "step": 510
395
+ },
396
+ {
397
+ "epoch": 3.5374149659863945,
398
+ "grad_norm": 6.438183307647705,
399
+ "learning_rate": 4.293877551020408e-05,
400
+ "loss": 0.2597,
401
+ "step": 520
402
+ },
403
+ {
404
+ "epoch": 3.6054421768707483,
405
+ "grad_norm": 13.568741798400879,
406
+ "learning_rate": 4.2802721088435376e-05,
407
+ "loss": 0.3735,
408
+ "step": 530
409
+ },
410
+ {
411
+ "epoch": 3.673469387755102,
412
+ "grad_norm": 0.18484297394752502,
413
+ "learning_rate": 4.266666666666667e-05,
414
+ "loss": 0.3343,
415
+ "step": 540
416
+ },
417
+ {
418
+ "epoch": 3.741496598639456,
419
+ "grad_norm": 0.7180817127227783,
420
+ "learning_rate": 4.253061224489796e-05,
421
+ "loss": 0.1867,
422
+ "step": 550
423
+ },
424
+ {
425
+ "epoch": 3.8095238095238093,
426
+ "grad_norm": 0.15185348689556122,
427
+ "learning_rate": 4.2394557823129255e-05,
428
+ "loss": 0.4185,
429
+ "step": 560
430
+ },
431
+ {
432
+ "epoch": 3.877551020408163,
433
+ "grad_norm": 10.095551490783691,
434
+ "learning_rate": 4.225850340136054e-05,
435
+ "loss": 0.1698,
436
+ "step": 570
437
+ },
438
+ {
439
+ "epoch": 3.945578231292517,
440
+ "grad_norm": 15.650616645812988,
441
+ "learning_rate": 4.212244897959184e-05,
442
+ "loss": 0.4307,
443
+ "step": 580
444
+ },
445
+ {
446
+ "epoch": 4.0,
447
+ "eval_cer": 0.4911616161616162,
448
+ "eval_loss": 0.4221580922603607,
449
+ "eval_runtime": 3.8592,
450
+ "eval_samples_per_second": 75.922,
451
+ "eval_steps_per_second": 38.091,
452
+ "step": 588
453
+ },
454
+ {
455
+ "epoch": 4.01360544217687,
456
+ "grad_norm": 8.345415115356445,
457
+ "learning_rate": 4.1986394557823134e-05,
458
+ "loss": 0.2111,
459
+ "step": 590
460
+ },
461
+ {
462
+ "epoch": 4.081632653061225,
463
+ "grad_norm": 4.688040256500244,
464
+ "learning_rate": 4.185034013605442e-05,
465
+ "loss": 0.3211,
466
+ "step": 600
467
+ },
468
+ {
469
+ "epoch": 4.149659863945578,
470
+ "grad_norm": 19.075292587280273,
471
+ "learning_rate": 4.1714285714285714e-05,
472
+ "loss": 0.3103,
473
+ "step": 610
474
+ },
475
+ {
476
+ "epoch": 4.217687074829932,
477
+ "grad_norm": 7.607833385467529,
478
+ "learning_rate": 4.1578231292517014e-05,
479
+ "loss": 0.2308,
480
+ "step": 620
481
+ },
482
+ {
483
+ "epoch": 4.285714285714286,
484
+ "grad_norm": 8.69675350189209,
485
+ "learning_rate": 4.14421768707483e-05,
486
+ "loss": 0.2981,
487
+ "step": 630
488
+ },
489
+ {
490
+ "epoch": 4.35374149659864,
491
+ "grad_norm": 2.994023323059082,
492
+ "learning_rate": 4.130612244897959e-05,
493
+ "loss": 0.2922,
494
+ "step": 640
495
+ },
496
+ {
497
+ "epoch": 4.421768707482993,
498
+ "grad_norm": 6.535342693328857,
499
+ "learning_rate": 4.1170068027210886e-05,
500
+ "loss": 0.3441,
501
+ "step": 650
502
+ },
503
+ {
504
+ "epoch": 4.489795918367347,
505
+ "grad_norm": 0.3404290974140167,
506
+ "learning_rate": 4.103401360544218e-05,
507
+ "loss": 0.3523,
508
+ "step": 660
509
+ },
510
+ {
511
+ "epoch": 4.557823129251701,
512
+ "grad_norm": 10.70188045501709,
513
+ "learning_rate": 4.089795918367347e-05,
514
+ "loss": 0.4148,
515
+ "step": 670
516
+ },
517
+ {
518
+ "epoch": 4.625850340136054,
519
+ "grad_norm": 14.900300979614258,
520
+ "learning_rate": 4.0761904761904765e-05,
521
+ "loss": 0.4041,
522
+ "step": 680
523
+ },
524
+ {
525
+ "epoch": 4.6938775510204085,
526
+ "grad_norm": 14.758822441101074,
527
+ "learning_rate": 4.062585034013606e-05,
528
+ "loss": 0.2453,
529
+ "step": 690
530
+ },
531
+ {
532
+ "epoch": 4.761904761904762,
533
+ "grad_norm": 0.7187572717666626,
534
+ "learning_rate": 4.048979591836735e-05,
535
+ "loss": 0.3703,
536
+ "step": 700
537
+ },
538
+ {
539
+ "epoch": 4.829931972789115,
540
+ "grad_norm": 9.603391647338867,
541
+ "learning_rate": 4.035374149659864e-05,
542
+ "loss": 0.3354,
543
+ "step": 710
544
+ },
545
+ {
546
+ "epoch": 4.8979591836734695,
547
+ "grad_norm": 1.358739972114563,
548
+ "learning_rate": 4.021768707482994e-05,
549
+ "loss": 0.3297,
550
+ "step": 720
551
+ },
552
+ {
553
+ "epoch": 4.965986394557823,
554
+ "grad_norm": 0.37522122263908386,
555
+ "learning_rate": 4.008163265306122e-05,
556
+ "loss": 0.2072,
557
+ "step": 730
558
+ },
559
+ {
560
+ "epoch": 5.0,
561
+ "eval_cer": 0.5921717171717171,
562
+ "eval_loss": 0.358783096075058,
563
+ "eval_runtime": 3.5411,
564
+ "eval_samples_per_second": 82.742,
565
+ "eval_steps_per_second": 41.512,
566
+ "step": 735
567
+ },
568
+ {
569
+ "epoch": 5.034013605442177,
570
+ "grad_norm": 3.8917222023010254,
571
+ "learning_rate": 3.9945578231292516e-05,
572
+ "loss": 0.2488,
573
+ "step": 740
574
+ },
575
+ {
576
+ "epoch": 5.1020408163265305,
577
+ "grad_norm": 16.159465789794922,
578
+ "learning_rate": 3.9809523809523816e-05,
579
+ "loss": 0.3769,
580
+ "step": 750
581
+ },
582
+ {
583
+ "epoch": 5.170068027210885,
584
+ "grad_norm": 17.226072311401367,
585
+ "learning_rate": 3.96734693877551e-05,
586
+ "loss": 0.2559,
587
+ "step": 760
588
+ },
589
+ {
590
+ "epoch": 5.238095238095238,
591
+ "grad_norm": 0.8567410707473755,
592
+ "learning_rate": 3.9537414965986396e-05,
593
+ "loss": 0.248,
594
+ "step": 770
595
+ },
596
+ {
597
+ "epoch": 5.3061224489795915,
598
+ "grad_norm": 1.5087652206420898,
599
+ "learning_rate": 3.940136054421769e-05,
600
+ "loss": 0.2737,
601
+ "step": 780
602
+ },
603
+ {
604
+ "epoch": 5.374149659863946,
605
+ "grad_norm": 3.331455945968628,
606
+ "learning_rate": 3.926530612244898e-05,
607
+ "loss": 0.1665,
608
+ "step": 790
609
+ },
610
+ {
611
+ "epoch": 5.442176870748299,
612
+ "grad_norm": 11.933197021484375,
613
+ "learning_rate": 3.9129251700680275e-05,
614
+ "loss": 0.322,
615
+ "step": 800
616
+ },
617
+ {
618
+ "epoch": 5.510204081632653,
619
+ "grad_norm": 13.032449722290039,
620
+ "learning_rate": 3.899319727891156e-05,
621
+ "loss": 0.239,
622
+ "step": 810
623
+ },
624
+ {
625
+ "epoch": 5.578231292517007,
626
+ "grad_norm": 5.0442047119140625,
627
+ "learning_rate": 3.885714285714286e-05,
628
+ "loss": 0.157,
629
+ "step": 820
630
+ },
631
+ {
632
+ "epoch": 5.646258503401361,
633
+ "grad_norm": 1.6076925992965698,
634
+ "learning_rate": 3.8721088435374154e-05,
635
+ "loss": 0.1926,
636
+ "step": 830
637
+ },
638
+ {
639
+ "epoch": 5.714285714285714,
640
+ "grad_norm": 22.085569381713867,
641
+ "learning_rate": 3.858503401360544e-05,
642
+ "loss": 0.3699,
643
+ "step": 840
644
+ },
645
+ {
646
+ "epoch": 5.782312925170068,
647
+ "grad_norm": 10.831768989562988,
648
+ "learning_rate": 3.844897959183674e-05,
649
+ "loss": 0.291,
650
+ "step": 850
651
+ },
652
+ {
653
+ "epoch": 5.850340136054422,
654
+ "grad_norm": 0.5946142077445984,
655
+ "learning_rate": 3.831292517006803e-05,
656
+ "loss": 0.2154,
657
+ "step": 860
658
+ },
659
+ {
660
+ "epoch": 5.918367346938775,
661
+ "grad_norm": 1.5171136856079102,
662
+ "learning_rate": 3.817687074829932e-05,
663
+ "loss": 0.212,
664
+ "step": 870
665
+ },
666
+ {
667
+ "epoch": 5.986394557823129,
668
+ "grad_norm": 18.592945098876953,
669
+ "learning_rate": 3.804081632653061e-05,
670
+ "loss": 0.2963,
671
+ "step": 880
672
+ },
673
+ {
674
+ "epoch": 6.0,
675
+ "eval_cer": 0.5151515151515151,
676
+ "eval_loss": 0.2578863501548767,
677
+ "eval_runtime": 3.7328,
678
+ "eval_samples_per_second": 78.493,
679
+ "eval_steps_per_second": 39.38,
680
+ "step": 882
681
+ },
682
+ {
683
+ "epoch": 6.054421768707483,
684
+ "grad_norm": 0.10681638866662979,
685
+ "learning_rate": 3.7904761904761905e-05,
686
+ "loss": 0.1761,
687
+ "step": 890
688
+ },
689
+ {
690
+ "epoch": 6.122448979591836,
691
+ "grad_norm": 7.661993503570557,
692
+ "learning_rate": 3.77687074829932e-05,
693
+ "loss": 0.2027,
694
+ "step": 900
695
+ },
696
+ {
697
+ "epoch": 6.190476190476191,
698
+ "grad_norm": 12.70997428894043,
699
+ "learning_rate": 3.763265306122449e-05,
700
+ "loss": 0.1828,
701
+ "step": 910
702
+ },
703
+ {
704
+ "epoch": 6.258503401360544,
705
+ "grad_norm": 0.06931126117706299,
706
+ "learning_rate": 3.7496598639455784e-05,
707
+ "loss": 0.1242,
708
+ "step": 920
709
+ },
710
+ {
711
+ "epoch": 6.326530612244898,
712
+ "grad_norm": 17.21261215209961,
713
+ "learning_rate": 3.736054421768708e-05,
714
+ "loss": 0.3785,
715
+ "step": 930
716
+ },
717
+ {
718
+ "epoch": 6.394557823129252,
719
+ "grad_norm": 0.10957188904285431,
720
+ "learning_rate": 3.722448979591837e-05,
721
+ "loss": 0.1184,
722
+ "step": 940
723
+ },
724
+ {
725
+ "epoch": 6.462585034013605,
726
+ "grad_norm": 14.104228019714355,
727
+ "learning_rate": 3.7088435374149663e-05,
728
+ "loss": 0.2253,
729
+ "step": 950
730
+ },
731
+ {
732
+ "epoch": 6.530612244897959,
733
+ "grad_norm": 0.2823491394519806,
734
+ "learning_rate": 3.6952380952380956e-05,
735
+ "loss": 0.1337,
736
+ "step": 960
737
+ },
738
+ {
739
+ "epoch": 6.598639455782313,
740
+ "grad_norm": 0.17527176439762115,
741
+ "learning_rate": 3.681632653061224e-05,
742
+ "loss": 0.1745,
743
+ "step": 970
744
+ },
745
+ {
746
+ "epoch": 6.666666666666667,
747
+ "grad_norm": 12.178832054138184,
748
+ "learning_rate": 3.6680272108843536e-05,
749
+ "loss": 0.3322,
750
+ "step": 980
751
+ },
752
+ {
753
+ "epoch": 6.73469387755102,
754
+ "grad_norm": 18.27240753173828,
755
+ "learning_rate": 3.6544217687074836e-05,
756
+ "loss": 0.245,
757
+ "step": 990
758
+ },
759
+ {
760
+ "epoch": 6.802721088435375,
761
+ "grad_norm": 8.510261535644531,
762
+ "learning_rate": 3.640816326530612e-05,
763
+ "loss": 0.2928,
764
+ "step": 1000
765
+ },
766
+ {
767
+ "epoch": 6.870748299319728,
768
+ "grad_norm": 8.47603988647461,
769
+ "learning_rate": 3.6272108843537415e-05,
770
+ "loss": 0.2048,
771
+ "step": 1010
772
+ },
773
+ {
774
+ "epoch": 6.938775510204081,
775
+ "grad_norm": 9.21681022644043,
776
+ "learning_rate": 3.6136054421768715e-05,
777
+ "loss": 0.1416,
778
+ "step": 1020
779
+ },
780
+ {
781
+ "epoch": 7.0,
782
+ "eval_cer": 0.4166666666666667,
783
+ "eval_loss": 0.1848345547914505,
784
+ "eval_runtime": 4.0307,
785
+ "eval_samples_per_second": 72.692,
786
+ "eval_steps_per_second": 36.47,
787
+ "step": 1029
788
+ },
789
+ {
790
+ "epoch": 7.006802721088436,
791
+ "grad_norm": 8.593953132629395,
792
+ "learning_rate": 3.6e-05,
793
+ "loss": 0.1307,
794
+ "step": 1030
795
+ },
796
+ {
797
+ "epoch": 7.074829931972789,
798
+ "grad_norm": 6.022789001464844,
799
+ "learning_rate": 3.5863945578231294e-05,
800
+ "loss": 0.1292,
801
+ "step": 1040
802
+ },
803
+ {
804
+ "epoch": 7.142857142857143,
805
+ "grad_norm": 10.953206062316895,
806
+ "learning_rate": 3.572789115646259e-05,
807
+ "loss": 0.0853,
808
+ "step": 1050
809
+ },
810
+ {
811
+ "epoch": 7.210884353741497,
812
+ "grad_norm": 5.332366943359375,
813
+ "learning_rate": 3.559183673469388e-05,
814
+ "loss": 0.112,
815
+ "step": 1060
816
+ },
817
+ {
818
+ "epoch": 7.27891156462585,
819
+ "grad_norm": 12.159287452697754,
820
+ "learning_rate": 3.545578231292517e-05,
821
+ "loss": 0.1007,
822
+ "step": 1070
823
+ },
824
+ {
825
+ "epoch": 7.346938775510204,
826
+ "grad_norm": 0.3854842483997345,
827
+ "learning_rate": 3.531972789115646e-05,
828
+ "loss": 0.2318,
829
+ "step": 1080
830
+ },
831
+ {
832
+ "epoch": 7.414965986394558,
833
+ "grad_norm": 11.151751518249512,
834
+ "learning_rate": 3.518367346938776e-05,
835
+ "loss": 0.1768,
836
+ "step": 1090
837
+ },
838
+ {
839
+ "epoch": 7.482993197278912,
840
+ "grad_norm": 20.415531158447266,
841
+ "learning_rate": 3.504761904761905e-05,
842
+ "loss": 0.1839,
843
+ "step": 1100
844
+ },
845
+ {
846
+ "epoch": 7.551020408163265,
847
+ "grad_norm": 0.7234401106834412,
848
+ "learning_rate": 3.491156462585034e-05,
849
+ "loss": 0.2944,
850
+ "step": 1110
851
+ },
852
+ {
853
+ "epoch": 7.619047619047619,
854
+ "grad_norm": 0.522950291633606,
855
+ "learning_rate": 3.477551020408164e-05,
856
+ "loss": 0.087,
857
+ "step": 1120
858
+ },
859
+ {
860
+ "epoch": 7.687074829931973,
861
+ "grad_norm": 8.606940269470215,
862
+ "learning_rate": 3.463945578231293e-05,
863
+ "loss": 0.1253,
864
+ "step": 1130
865
+ },
866
+ {
867
+ "epoch": 7.755102040816326,
868
+ "grad_norm": 22.922000885009766,
869
+ "learning_rate": 3.450340136054422e-05,
870
+ "loss": 0.2799,
871
+ "step": 1140
872
+ },
873
+ {
874
+ "epoch": 7.8231292517006805,
875
+ "grad_norm": 21.035017013549805,
876
+ "learning_rate": 3.436734693877551e-05,
877
+ "loss": 0.1441,
878
+ "step": 1150
879
+ },
880
+ {
881
+ "epoch": 7.891156462585034,
882
+ "grad_norm": 5.825491905212402,
883
+ "learning_rate": 3.4231292517006804e-05,
884
+ "loss": 0.0218,
885
+ "step": 1160
886
+ },
887
+ {
888
+ "epoch": 7.959183673469388,
889
+ "grad_norm": 12.801454544067383,
890
+ "learning_rate": 3.40952380952381e-05,
891
+ "loss": 0.2319,
892
+ "step": 1170
893
+ },
894
+ {
895
+ "epoch": 8.0,
896
+ "eval_cer": 0.42424242424242425,
897
+ "eval_loss": 0.13813678920269012,
898
+ "eval_runtime": 4.039,
899
+ "eval_samples_per_second": 72.542,
900
+ "eval_steps_per_second": 36.395,
901
+ "step": 1176
902
+ },
903
+ {
904
+ "epoch": 8.02721088435374,
905
+ "grad_norm": 1.3456509113311768,
906
+ "learning_rate": 3.395918367346939e-05,
907
+ "loss": 0.1622,
908
+ "step": 1180
909
+ },
910
+ {
911
+ "epoch": 8.095238095238095,
912
+ "grad_norm": 0.21156376600265503,
913
+ "learning_rate": 3.382312925170068e-05,
914
+ "loss": 0.0447,
915
+ "step": 1190
916
+ },
917
+ {
918
+ "epoch": 8.16326530612245,
919
+ "grad_norm": 0.08529641479253769,
920
+ "learning_rate": 3.3687074829931976e-05,
921
+ "loss": 0.3241,
922
+ "step": 1200
923
+ },
924
+ {
925
+ "epoch": 8.231292517006803,
926
+ "grad_norm": 8.225408554077148,
927
+ "learning_rate": 3.355102040816327e-05,
928
+ "loss": 0.1136,
929
+ "step": 1210
930
+ },
931
+ {
932
+ "epoch": 8.299319727891156,
933
+ "grad_norm": 0.0486108660697937,
934
+ "learning_rate": 3.341496598639456e-05,
935
+ "loss": 0.2109,
936
+ "step": 1220
937
+ },
938
+ {
939
+ "epoch": 8.36734693877551,
940
+ "grad_norm": 0.3150612413883209,
941
+ "learning_rate": 3.3278911564625855e-05,
942
+ "loss": 0.0678,
943
+ "step": 1230
944
+ },
945
+ {
946
+ "epoch": 8.435374149659864,
947
+ "grad_norm": 2.7837414741516113,
948
+ "learning_rate": 3.314285714285714e-05,
949
+ "loss": 0.0582,
950
+ "step": 1240
951
+ },
952
+ {
953
+ "epoch": 8.503401360544217,
954
+ "grad_norm": 0.08137867599725723,
955
+ "learning_rate": 3.3006802721088434e-05,
956
+ "loss": 0.2023,
957
+ "step": 1250
958
+ },
959
+ {
960
+ "epoch": 8.571428571428571,
961
+ "grad_norm": 0.12031784653663635,
962
+ "learning_rate": 3.2870748299319734e-05,
963
+ "loss": 0.2334,
964
+ "step": 1260
965
+ },
966
+ {
967
+ "epoch": 8.639455782312925,
968
+ "grad_norm": 0.7248769998550415,
969
+ "learning_rate": 3.273469387755102e-05,
970
+ "loss": 0.0562,
971
+ "step": 1270
972
+ },
973
+ {
974
+ "epoch": 8.70748299319728,
975
+ "grad_norm": 9.878806114196777,
976
+ "learning_rate": 3.259863945578231e-05,
977
+ "loss": 0.1711,
978
+ "step": 1280
979
+ },
980
+ {
981
+ "epoch": 8.775510204081632,
982
+ "grad_norm": 11.397530555725098,
983
+ "learning_rate": 3.2462585034013606e-05,
984
+ "loss": 0.2418,
985
+ "step": 1290
986
+ },
987
+ {
988
+ "epoch": 8.843537414965986,
989
+ "grad_norm": 13.183833122253418,
990
+ "learning_rate": 3.23265306122449e-05,
991
+ "loss": 0.056,
992
+ "step": 1300
993
+ },
994
+ {
995
+ "epoch": 8.91156462585034,
996
+ "grad_norm": 0.8917256593704224,
997
+ "learning_rate": 3.219047619047619e-05,
998
+ "loss": 0.0436,
999
+ "step": 1310
1000
+ },
1001
+ {
1002
+ "epoch": 8.979591836734693,
1003
+ "grad_norm": 7.448833465576172,
1004
+ "learning_rate": 3.2054421768707485e-05,
1005
+ "loss": 0.0476,
1006
+ "step": 1320
1007
+ },
1008
+ {
1009
+ "epoch": 9.0,
1010
+ "eval_cer": 0.37752525252525254,
1011
+ "eval_loss": 0.11764977127313614,
1012
+ "eval_runtime": 3.9588,
1013
+ "eval_samples_per_second": 74.013,
1014
+ "eval_steps_per_second": 37.133,
1015
+ "step": 1323
1016
+ },
1017
+ {
1018
+ "epoch": 9.047619047619047,
1019
+ "grad_norm": 7.476083278656006,
1020
+ "learning_rate": 3.191836734693878e-05,
1021
+ "loss": 0.098,
1022
+ "step": 1330
1023
+ },
1024
+ {
1025
+ "epoch": 9.115646258503402,
1026
+ "grad_norm": 0.2827729880809784,
1027
+ "learning_rate": 3.178231292517007e-05,
1028
+ "loss": 0.1332,
1029
+ "step": 1340
1030
+ },
1031
+ {
1032
+ "epoch": 9.183673469387756,
1033
+ "grad_norm": 1.02791166305542,
1034
+ "learning_rate": 3.164625850340136e-05,
1035
+ "loss": 0.0211,
1036
+ "step": 1350
1037
+ },
1038
+ {
1039
+ "epoch": 9.251700680272108,
1040
+ "grad_norm": 14.376386642456055,
1041
+ "learning_rate": 3.151020408163266e-05,
1042
+ "loss": 0.1141,
1043
+ "step": 1360
1044
+ },
1045
+ {
1046
+ "epoch": 9.319727891156463,
1047
+ "grad_norm": 0.9921436309814453,
1048
+ "learning_rate": 3.137414965986395e-05,
1049
+ "loss": 0.0931,
1050
+ "step": 1370
1051
+ },
1052
+ {
1053
+ "epoch": 9.387755102040817,
1054
+ "grad_norm": 0.1799956113100052,
1055
+ "learning_rate": 3.123809523809524e-05,
1056
+ "loss": 0.0095,
1057
+ "step": 1380
1058
+ },
1059
+ {
1060
+ "epoch": 9.45578231292517,
1061
+ "grad_norm": 0.05473727360367775,
1062
+ "learning_rate": 3.110204081632653e-05,
1063
+ "loss": 0.1565,
1064
+ "step": 1390
1065
+ },
1066
+ {
1067
+ "epoch": 9.523809523809524,
1068
+ "grad_norm": 0.5269390344619751,
1069
+ "learning_rate": 3.096598639455782e-05,
1070
+ "loss": 0.0674,
1071
+ "step": 1400
1072
+ },
1073
+ {
1074
+ "epoch": 9.591836734693878,
1075
+ "grad_norm": 2.706407070159912,
1076
+ "learning_rate": 3.0829931972789116e-05,
1077
+ "loss": 0.0966,
1078
+ "step": 1410
1079
+ },
1080
+ {
1081
+ "epoch": 9.65986394557823,
1082
+ "grad_norm": 0.16416242718696594,
1083
+ "learning_rate": 3.069387755102041e-05,
1084
+ "loss": 0.0667,
1085
+ "step": 1420
1086
+ },
1087
+ {
1088
+ "epoch": 9.727891156462585,
1089
+ "grad_norm": 15.035467147827148,
1090
+ "learning_rate": 3.05578231292517e-05,
1091
+ "loss": 0.1272,
1092
+ "step": 1430
1093
+ },
1094
+ {
1095
+ "epoch": 9.795918367346939,
1096
+ "grad_norm": 0.19053949415683746,
1097
+ "learning_rate": 3.0421768707482995e-05,
1098
+ "loss": 0.2847,
1099
+ "step": 1440
1100
+ },
1101
+ {
1102
+ "epoch": 9.863945578231293,
1103
+ "grad_norm": 0.12774477899074554,
1104
+ "learning_rate": 3.0285714285714288e-05,
1105
+ "loss": 0.1103,
1106
+ "step": 1450
1107
+ },
1108
+ {
1109
+ "epoch": 9.931972789115646,
1110
+ "grad_norm": 0.474401593208313,
1111
+ "learning_rate": 3.0149659863945578e-05,
1112
+ "loss": 0.0972,
1113
+ "step": 1460
1114
+ },
1115
+ {
1116
+ "epoch": 10.0,
1117
+ "grad_norm": 0.0236662644892931,
1118
+ "learning_rate": 3.0013605442176874e-05,
1119
+ "loss": 0.0626,
1120
+ "step": 1470
1121
+ },
1122
+ {
1123
+ "epoch": 10.0,
1124
+ "eval_cer": 0.4305555555555556,
1125
+ "eval_loss": 0.09693024307489395,
1126
+ "eval_runtime": 4.1253,
1127
+ "eval_samples_per_second": 71.025,
1128
+ "eval_steps_per_second": 35.633,
1129
+ "step": 1470
1130
+ },
1131
+ {
1132
+ "epoch": 10.068027210884354,
1133
+ "grad_norm": 0.03369349241256714,
1134
+ "learning_rate": 2.987755102040816e-05,
1135
+ "loss": 0.0542,
1136
+ "step": 1480
1137
+ },
1138
+ {
1139
+ "epoch": 10.136054421768707,
1140
+ "grad_norm": 0.2802339792251587,
1141
+ "learning_rate": 2.9741496598639457e-05,
1142
+ "loss": 0.1356,
1143
+ "step": 1490
1144
+ },
1145
+ {
1146
+ "epoch": 10.204081632653061,
1147
+ "grad_norm": 0.48600488901138306,
1148
+ "learning_rate": 2.960544217687075e-05,
1149
+ "loss": 0.1529,
1150
+ "step": 1500
1151
+ },
1152
+ {
1153
+ "epoch": 10.272108843537415,
1154
+ "grad_norm": 2.2791683673858643,
1155
+ "learning_rate": 2.946938775510204e-05,
1156
+ "loss": 0.1232,
1157
+ "step": 1510
1158
+ },
1159
+ {
1160
+ "epoch": 10.34013605442177,
1161
+ "grad_norm": 6.162140369415283,
1162
+ "learning_rate": 2.9333333333333336e-05,
1163
+ "loss": 0.1948,
1164
+ "step": 1520
1165
+ },
1166
+ {
1167
+ "epoch": 10.408163265306122,
1168
+ "grad_norm": 1.365488886833191,
1169
+ "learning_rate": 2.919727891156463e-05,
1170
+ "loss": 0.0417,
1171
+ "step": 1530
1172
+ },
1173
+ {
1174
+ "epoch": 10.476190476190476,
1175
+ "grad_norm": 14.345423698425293,
1176
+ "learning_rate": 2.906122448979592e-05,
1177
+ "loss": 0.0859,
1178
+ "step": 1540
1179
+ },
1180
+ {
1181
+ "epoch": 10.54421768707483,
1182
+ "grad_norm": 0.1597384810447693,
1183
+ "learning_rate": 2.892517006802721e-05,
1184
+ "loss": 0.0376,
1185
+ "step": 1550
1186
+ },
1187
+ {
1188
+ "epoch": 10.612244897959183,
1189
+ "grad_norm": 0.24928878247737885,
1190
+ "learning_rate": 2.87891156462585e-05,
1191
+ "loss": 0.5477,
1192
+ "step": 1560
1193
+ },
1194
+ {
1195
+ "epoch": 10.680272108843537,
1196
+ "grad_norm": 0.3629794418811798,
1197
+ "learning_rate": 2.8653061224489798e-05,
1198
+ "loss": 0.0102,
1199
+ "step": 1570
1200
+ },
1201
+ {
1202
+ "epoch": 10.748299319727892,
1203
+ "grad_norm": 17.98668098449707,
1204
+ "learning_rate": 2.851700680272109e-05,
1205
+ "loss": 0.0821,
1206
+ "step": 1580
1207
+ },
1208
+ {
1209
+ "epoch": 10.816326530612244,
1210
+ "grad_norm": 5.073668479919434,
1211
+ "learning_rate": 2.838095238095238e-05,
1212
+ "loss": 0.053,
1213
+ "step": 1590
1214
+ },
1215
+ {
1216
+ "epoch": 10.884353741496598,
1217
+ "grad_norm": 12.528962135314941,
1218
+ "learning_rate": 2.8244897959183673e-05,
1219
+ "loss": 0.0463,
1220
+ "step": 1600
1221
+ },
1222
+ {
1223
+ "epoch": 10.952380952380953,
1224
+ "grad_norm": 1.0358648300170898,
1225
+ "learning_rate": 2.810884353741497e-05,
1226
+ "loss": 0.0124,
1227
+ "step": 1610
1228
+ },
1229
+ {
1230
+ "epoch": 11.0,
1231
+ "eval_cer": 0.39646464646464646,
1232
+ "eval_loss": 0.08722148090600967,
1233
+ "eval_runtime": 3.8388,
1234
+ "eval_samples_per_second": 76.326,
1235
+ "eval_steps_per_second": 38.293,
1236
+ "step": 1617
1237
+ },
1238
+ {
1239
+ "epoch": 11.020408163265307,
1240
+ "grad_norm": 0.19600285589694977,
1241
+ "learning_rate": 2.797278911564626e-05,
1242
+ "loss": 0.118,
1243
+ "step": 1620
1244
+ },
1245
+ {
1246
+ "epoch": 11.08843537414966,
1247
+ "grad_norm": 26.618637084960938,
1248
+ "learning_rate": 2.7836734693877553e-05,
1249
+ "loss": 0.3147,
1250
+ "step": 1630
1251
+ },
1252
+ {
1253
+ "epoch": 11.156462585034014,
1254
+ "grad_norm": 0.05680645629763603,
1255
+ "learning_rate": 2.7700680272108842e-05,
1256
+ "loss": 0.2198,
1257
+ "step": 1640
1258
+ },
1259
+ {
1260
+ "epoch": 11.224489795918368,
1261
+ "grad_norm": 13.787897109985352,
1262
+ "learning_rate": 2.7564625850340135e-05,
1263
+ "loss": 0.2091,
1264
+ "step": 1650
1265
+ },
1266
+ {
1267
+ "epoch": 11.29251700680272,
1268
+ "grad_norm": 1.339880108833313,
1269
+ "learning_rate": 2.742857142857143e-05,
1270
+ "loss": 0.0221,
1271
+ "step": 1660
1272
+ },
1273
+ {
1274
+ "epoch": 11.360544217687075,
1275
+ "grad_norm": 0.18231110274791718,
1276
+ "learning_rate": 2.729251700680272e-05,
1277
+ "loss": 0.0567,
1278
+ "step": 1670
1279
+ },
1280
+ {
1281
+ "epoch": 11.428571428571429,
1282
+ "grad_norm": 0.15762682259082794,
1283
+ "learning_rate": 2.7156462585034014e-05,
1284
+ "loss": 0.0118,
1285
+ "step": 1680
1286
+ },
1287
+ {
1288
+ "epoch": 11.496598639455783,
1289
+ "grad_norm": 0.19339053332805634,
1290
+ "learning_rate": 2.702040816326531e-05,
1291
+ "loss": 0.0082,
1292
+ "step": 1690
1293
+ },
1294
+ {
1295
+ "epoch": 11.564625850340136,
1296
+ "grad_norm": 11.242050170898438,
1297
+ "learning_rate": 2.6884353741496597e-05,
1298
+ "loss": 0.0398,
1299
+ "step": 1700
1300
+ },
1301
+ {
1302
+ "epoch": 11.63265306122449,
1303
+ "grad_norm": 0.104960598051548,
1304
+ "learning_rate": 2.6748299319727893e-05,
1305
+ "loss": 0.0745,
1306
+ "step": 1710
1307
+ },
1308
+ {
1309
+ "epoch": 11.700680272108844,
1310
+ "grad_norm": 0.4059283137321472,
1311
+ "learning_rate": 2.6612244897959187e-05,
1312
+ "loss": 0.0854,
1313
+ "step": 1720
1314
+ },
1315
+ {
1316
+ "epoch": 11.768707482993197,
1317
+ "grad_norm": 0.11370517313480377,
1318
+ "learning_rate": 2.6476190476190476e-05,
1319
+ "loss": 0.084,
1320
+ "step": 1730
1321
+ },
1322
+ {
1323
+ "epoch": 11.83673469387755,
1324
+ "grad_norm": 6.6930365562438965,
1325
+ "learning_rate": 2.6340136054421773e-05,
1326
+ "loss": 0.082,
1327
+ "step": 1740
1328
+ },
1329
+ {
1330
+ "epoch": 11.904761904761905,
1331
+ "grad_norm": 0.035495854914188385,
1332
+ "learning_rate": 2.620408163265306e-05,
1333
+ "loss": 0.0704,
1334
+ "step": 1750
1335
+ },
1336
+ {
1337
+ "epoch": 11.972789115646258,
1338
+ "grad_norm": 0.19433455169200897,
1339
+ "learning_rate": 2.6068027210884355e-05,
1340
+ "loss": 0.0755,
1341
+ "step": 1760
1342
+ },
1343
+ {
1344
+ "epoch": 12.0,
1345
+ "eval_cer": 0.4090909090909091,
1346
+ "eval_loss": 0.06354419887065887,
1347
+ "eval_runtime": 3.8826,
1348
+ "eval_samples_per_second": 75.466,
1349
+ "eval_steps_per_second": 37.862,
1350
+ "step": 1764
1351
+ },
1352
+ {
1353
+ "epoch": 12.040816326530612,
1354
+ "grad_norm": 0.22618860006332397,
1355
+ "learning_rate": 2.593197278911565e-05,
1356
+ "loss": 0.0842,
1357
+ "step": 1770
1358
+ },
1359
+ {
1360
+ "epoch": 12.108843537414966,
1361
+ "grad_norm": 0.2526053786277771,
1362
+ "learning_rate": 2.5795918367346938e-05,
1363
+ "loss": 0.0257,
1364
+ "step": 1780
1365
+ },
1366
+ {
1367
+ "epoch": 12.17687074829932,
1368
+ "grad_norm": 5.3073649406433105,
1369
+ "learning_rate": 2.5659863945578234e-05,
1370
+ "loss": 0.0723,
1371
+ "step": 1790
1372
+ },
1373
+ {
1374
+ "epoch": 12.244897959183673,
1375
+ "grad_norm": 0.07300838083028793,
1376
+ "learning_rate": 2.5523809523809527e-05,
1377
+ "loss": 0.0447,
1378
+ "step": 1800
1379
+ },
1380
+ {
1381
+ "epoch": 12.312925170068027,
1382
+ "grad_norm": 1.9569120407104492,
1383
+ "learning_rate": 2.5387755102040817e-05,
1384
+ "loss": 0.0437,
1385
+ "step": 1810
1386
+ },
1387
+ {
1388
+ "epoch": 12.380952380952381,
1389
+ "grad_norm": 4.616933822631836,
1390
+ "learning_rate": 2.525170068027211e-05,
1391
+ "loss": 0.0802,
1392
+ "step": 1820
1393
+ },
1394
+ {
1395
+ "epoch": 12.448979591836734,
1396
+ "grad_norm": 0.09218256920576096,
1397
+ "learning_rate": 2.51156462585034e-05,
1398
+ "loss": 0.0345,
1399
+ "step": 1830
1400
+ },
1401
+ {
1402
+ "epoch": 12.517006802721088,
1403
+ "grad_norm": 0.10980120301246643,
1404
+ "learning_rate": 2.4979591836734696e-05,
1405
+ "loss": 0.0167,
1406
+ "step": 1840
1407
+ },
1408
+ {
1409
+ "epoch": 12.585034013605442,
1410
+ "grad_norm": 1.9402213096618652,
1411
+ "learning_rate": 2.4843537414965986e-05,
1412
+ "loss": 0.0189,
1413
+ "step": 1850
1414
+ },
1415
+ {
1416
+ "epoch": 12.653061224489797,
1417
+ "grad_norm": 0.28110960125923157,
1418
+ "learning_rate": 2.4707482993197282e-05,
1419
+ "loss": 0.0939,
1420
+ "step": 1860
1421
+ },
1422
+ {
1423
+ "epoch": 12.72108843537415,
1424
+ "grad_norm": 14.07165813446045,
1425
+ "learning_rate": 2.4571428571428572e-05,
1426
+ "loss": 0.1164,
1427
+ "step": 1870
1428
+ },
1429
+ {
1430
+ "epoch": 12.789115646258503,
1431
+ "grad_norm": 0.09300262480974197,
1432
+ "learning_rate": 2.4435374149659865e-05,
1433
+ "loss": 0.0511,
1434
+ "step": 1880
1435
+ },
1436
+ {
1437
+ "epoch": 12.857142857142858,
1438
+ "grad_norm": 0.08766383677721024,
1439
+ "learning_rate": 2.4299319727891158e-05,
1440
+ "loss": 0.0821,
1441
+ "step": 1890
1442
+ },
1443
+ {
1444
+ "epoch": 12.92517006802721,
1445
+ "grad_norm": 4.915824890136719,
1446
+ "learning_rate": 2.416326530612245e-05,
1447
+ "loss": 0.0425,
1448
+ "step": 1900
1449
+ },
1450
+ {
1451
+ "epoch": 12.993197278911564,
1452
+ "grad_norm": 0.25317126512527466,
1453
+ "learning_rate": 2.4027210884353744e-05,
1454
+ "loss": 0.048,
1455
+ "step": 1910
1456
+ },
1457
+ {
1458
+ "epoch": 13.0,
1459
+ "eval_cer": 0.43813131313131315,
1460
+ "eval_loss": 0.044823333621025085,
1461
+ "eval_runtime": 3.9907,
1462
+ "eval_samples_per_second": 73.42,
1463
+ "eval_steps_per_second": 36.835,
1464
+ "step": 1911
1465
+ },
1466
+ {
1467
+ "epoch": 13.061224489795919,
1468
+ "grad_norm": 11.498345375061035,
1469
+ "learning_rate": 2.3891156462585034e-05,
1470
+ "loss": 0.0573,
1471
+ "step": 1920
1472
+ },
1473
+ {
1474
+ "epoch": 13.129251700680273,
1475
+ "grad_norm": 9.521465301513672,
1476
+ "learning_rate": 2.3755102040816327e-05,
1477
+ "loss": 0.0186,
1478
+ "step": 1930
1479
+ },
1480
+ {
1481
+ "epoch": 13.197278911564625,
1482
+ "grad_norm": 5.737509250640869,
1483
+ "learning_rate": 2.361904761904762e-05,
1484
+ "loss": 0.0189,
1485
+ "step": 1940
1486
+ },
1487
+ {
1488
+ "epoch": 13.26530612244898,
1489
+ "grad_norm": 0.3337570130825043,
1490
+ "learning_rate": 2.3482993197278913e-05,
1491
+ "loss": 0.0249,
1492
+ "step": 1950
1493
+ },
1494
+ {
1495
+ "epoch": 13.333333333333334,
1496
+ "grad_norm": 0.21208225190639496,
1497
+ "learning_rate": 2.3346938775510206e-05,
1498
+ "loss": 0.0266,
1499
+ "step": 1960
1500
+ },
1501
+ {
1502
+ "epoch": 13.401360544217686,
1503
+ "grad_norm": 0.034205980598926544,
1504
+ "learning_rate": 2.3210884353741495e-05,
1505
+ "loss": 0.0062,
1506
+ "step": 1970
1507
+ },
1508
+ {
1509
+ "epoch": 13.46938775510204,
1510
+ "grad_norm": 6.113767623901367,
1511
+ "learning_rate": 2.3074829931972792e-05,
1512
+ "loss": 0.1134,
1513
+ "step": 1980
1514
+ },
1515
+ {
1516
+ "epoch": 13.537414965986395,
1517
+ "grad_norm": 1.1116629838943481,
1518
+ "learning_rate": 2.293877551020408e-05,
1519
+ "loss": 0.0873,
1520
+ "step": 1990
1521
+ },
1522
+ {
1523
+ "epoch": 13.60544217687075,
1524
+ "grad_norm": 8.336064338684082,
1525
+ "learning_rate": 2.2802721088435375e-05,
1526
+ "loss": 0.0285,
1527
+ "step": 2000
1528
+ },
1529
+ {
1530
+ "epoch": 13.673469387755102,
1531
+ "grad_norm": 0.6493708491325378,
1532
+ "learning_rate": 2.2666666666666668e-05,
1533
+ "loss": 0.0107,
1534
+ "step": 2010
1535
+ },
1536
+ {
1537
+ "epoch": 13.741496598639456,
1538
+ "grad_norm": 4.806843280792236,
1539
+ "learning_rate": 2.253061224489796e-05,
1540
+ "loss": 0.0163,
1541
+ "step": 2020
1542
+ },
1543
+ {
1544
+ "epoch": 13.80952380952381,
1545
+ "grad_norm": 0.049966610968112946,
1546
+ "learning_rate": 2.2394557823129254e-05,
1547
+ "loss": 0.01,
1548
+ "step": 2030
1549
+ },
1550
+ {
1551
+ "epoch": 13.877551020408163,
1552
+ "grad_norm": 0.1961314082145691,
1553
+ "learning_rate": 2.2258503401360543e-05,
1554
+ "loss": 0.107,
1555
+ "step": 2040
1556
+ },
1557
+ {
1558
+ "epoch": 13.945578231292517,
1559
+ "grad_norm": 0.04836405813694,
1560
+ "learning_rate": 2.2122448979591836e-05,
1561
+ "loss": 0.0518,
1562
+ "step": 2050
1563
+ },
1564
+ {
1565
+ "epoch": 14.0,
1566
+ "eval_cer": 0.37752525252525254,
1567
+ "eval_loss": 0.04542345553636551,
1568
+ "eval_runtime": 3.7709,
1569
+ "eval_samples_per_second": 77.701,
1570
+ "eval_steps_per_second": 38.983,
1571
+ "step": 2058
1572
+ },
1573
+ {
1574
+ "epoch": 14.013605442176871,
1575
+ "grad_norm": 0.12112589925527573,
1576
+ "learning_rate": 2.1986394557823133e-05,
1577
+ "loss": 0.007,
1578
+ "step": 2060
1579
+ },
1580
+ {
1581
+ "epoch": 14.081632653061224,
1582
+ "grad_norm": 0.18053178489208221,
1583
+ "learning_rate": 2.1850340136054422e-05,
1584
+ "loss": 0.1291,
1585
+ "step": 2070
1586
+ },
1587
+ {
1588
+ "epoch": 14.149659863945578,
1589
+ "grad_norm": 0.2066822201013565,
1590
+ "learning_rate": 2.1714285714285715e-05,
1591
+ "loss": 0.0257,
1592
+ "step": 2080
1593
+ },
1594
+ {
1595
+ "epoch": 14.217687074829932,
1596
+ "grad_norm": 0.11014904081821442,
1597
+ "learning_rate": 2.1578231292517005e-05,
1598
+ "loss": 0.0535,
1599
+ "step": 2090
1600
+ },
1601
+ {
1602
+ "epoch": 14.285714285714286,
1603
+ "grad_norm": 12.59344482421875,
1604
+ "learning_rate": 2.14421768707483e-05,
1605
+ "loss": 0.1287,
1606
+ "step": 2100
1607
+ },
1608
+ {
1609
+ "epoch": 14.353741496598639,
1610
+ "grad_norm": 0.045586470514535904,
1611
+ "learning_rate": 2.1306122448979595e-05,
1612
+ "loss": 0.0348,
1613
+ "step": 2110
1614
+ },
1615
+ {
1616
+ "epoch": 14.421768707482993,
1617
+ "grad_norm": 0.16031715273857117,
1618
+ "learning_rate": 2.1170068027210884e-05,
1619
+ "loss": 0.0664,
1620
+ "step": 2120
1621
+ },
1622
+ {
1623
+ "epoch": 14.489795918367347,
1624
+ "grad_norm": 2.561408281326294,
1625
+ "learning_rate": 2.1034013605442177e-05,
1626
+ "loss": 0.04,
1627
+ "step": 2130
1628
+ },
1629
+ {
1630
+ "epoch": 14.5578231292517,
1631
+ "grad_norm": 0.2670551538467407,
1632
+ "learning_rate": 2.089795918367347e-05,
1633
+ "loss": 0.0092,
1634
+ "step": 2140
1635
+ },
1636
+ {
1637
+ "epoch": 14.625850340136054,
1638
+ "grad_norm": 0.12540097534656525,
1639
+ "learning_rate": 2.0761904761904763e-05,
1640
+ "loss": 0.0111,
1641
+ "step": 2150
1642
+ },
1643
+ {
1644
+ "epoch": 14.693877551020408,
1645
+ "grad_norm": 1.2808445692062378,
1646
+ "learning_rate": 2.0625850340136056e-05,
1647
+ "loss": 0.0595,
1648
+ "step": 2160
1649
+ },
1650
+ {
1651
+ "epoch": 14.761904761904763,
1652
+ "grad_norm": 0.03977341949939728,
1653
+ "learning_rate": 2.0489795918367346e-05,
1654
+ "loss": 0.0437,
1655
+ "step": 2170
1656
+ },
1657
+ {
1658
+ "epoch": 14.829931972789115,
1659
+ "grad_norm": 0.40472060441970825,
1660
+ "learning_rate": 2.0353741496598642e-05,
1661
+ "loss": 0.0268,
1662
+ "step": 2180
1663
+ },
1664
+ {
1665
+ "epoch": 14.89795918367347,
1666
+ "grad_norm": 0.08766451478004456,
1667
+ "learning_rate": 2.0217687074829932e-05,
1668
+ "loss": 0.0072,
1669
+ "step": 2190
1670
+ },
1671
+ {
1672
+ "epoch": 14.965986394557824,
1673
+ "grad_norm": 0.34755828976631165,
1674
+ "learning_rate": 2.0081632653061225e-05,
1675
+ "loss": 0.0482,
1676
+ "step": 2200
1677
+ },
1678
+ {
1679
+ "epoch": 15.0,
1680
+ "eval_cer": 0.38257575757575757,
1681
+ "eval_loss": 0.03393391892313957,
1682
+ "eval_runtime": 3.9624,
1683
+ "eval_samples_per_second": 73.945,
1684
+ "eval_steps_per_second": 37.098,
1685
+ "step": 2205
1686
+ },
1687
+ {
1688
+ "epoch": 15.034013605442176,
1689
+ "grad_norm": 0.07853918522596359,
1690
+ "learning_rate": 1.9945578231292518e-05,
1691
+ "loss": 0.0218,
1692
+ "step": 2210
1693
+ },
1694
+ {
1695
+ "epoch": 15.10204081632653,
1696
+ "grad_norm": 0.08187614381313324,
1697
+ "learning_rate": 1.980952380952381e-05,
1698
+ "loss": 0.0059,
1699
+ "step": 2220
1700
+ },
1701
+ {
1702
+ "epoch": 15.170068027210885,
1703
+ "grad_norm": 0.5369409918785095,
1704
+ "learning_rate": 1.9673469387755104e-05,
1705
+ "loss": 0.0105,
1706
+ "step": 2230
1707
+ },
1708
+ {
1709
+ "epoch": 15.238095238095237,
1710
+ "grad_norm": 0.46228042244911194,
1711
+ "learning_rate": 1.9537414965986394e-05,
1712
+ "loss": 0.0028,
1713
+ "step": 2240
1714
+ },
1715
+ {
1716
+ "epoch": 15.306122448979592,
1717
+ "grad_norm": 0.1829945147037506,
1718
+ "learning_rate": 1.940136054421769e-05,
1719
+ "loss": 0.0355,
1720
+ "step": 2250
1721
+ },
1722
+ {
1723
+ "epoch": 15.374149659863946,
1724
+ "grad_norm": 0.08586379885673523,
1725
+ "learning_rate": 1.926530612244898e-05,
1726
+ "loss": 0.0096,
1727
+ "step": 2260
1728
+ },
1729
+ {
1730
+ "epoch": 15.4421768707483,
1731
+ "grad_norm": 0.13552436232566833,
1732
+ "learning_rate": 1.9129251700680273e-05,
1733
+ "loss": 0.1455,
1734
+ "step": 2270
1735
+ },
1736
+ {
1737
+ "epoch": 15.510204081632653,
1738
+ "grad_norm": 0.3371906876564026,
1739
+ "learning_rate": 1.8993197278911566e-05,
1740
+ "loss": 0.0229,
1741
+ "step": 2280
1742
+ },
1743
+ {
1744
+ "epoch": 15.578231292517007,
1745
+ "grad_norm": 0.03920818492770195,
1746
+ "learning_rate": 1.885714285714286e-05,
1747
+ "loss": 0.0224,
1748
+ "step": 2290
1749
+ },
1750
+ {
1751
+ "epoch": 15.646258503401361,
1752
+ "grad_norm": 0.8000497817993164,
1753
+ "learning_rate": 1.8721088435374152e-05,
1754
+ "loss": 0.0681,
1755
+ "step": 2300
1756
+ },
1757
+ {
1758
+ "epoch": 15.714285714285714,
1759
+ "grad_norm": 0.15928244590759277,
1760
+ "learning_rate": 1.8585034013605442e-05,
1761
+ "loss": 0.0338,
1762
+ "step": 2310
1763
+ },
1764
+ {
1765
+ "epoch": 15.782312925170068,
1766
+ "grad_norm": 2.3006467819213867,
1767
+ "learning_rate": 1.8448979591836735e-05,
1768
+ "loss": 0.0085,
1769
+ "step": 2320
1770
+ },
1771
+ {
1772
+ "epoch": 15.850340136054422,
1773
+ "grad_norm": 0.3312649726867676,
1774
+ "learning_rate": 1.8312925170068028e-05,
1775
+ "loss": 0.0015,
1776
+ "step": 2330
1777
+ },
1778
+ {
1779
+ "epoch": 15.918367346938776,
1780
+ "grad_norm": 11.605361938476562,
1781
+ "learning_rate": 1.817687074829932e-05,
1782
+ "loss": 0.0616,
1783
+ "step": 2340
1784
+ },
1785
+ {
1786
+ "epoch": 15.986394557823129,
1787
+ "grad_norm": 0.06783591210842133,
1788
+ "learning_rate": 1.8040816326530614e-05,
1789
+ "loss": 0.0061,
1790
+ "step": 2350
1791
+ },
1792
+ {
1793
+ "epoch": 16.0,
1794
+ "eval_cer": 0.36742424242424243,
1795
+ "eval_loss": 0.01737603358924389,
1796
+ "eval_runtime": 3.7608,
1797
+ "eval_samples_per_second": 77.91,
1798
+ "eval_steps_per_second": 39.088,
1799
+ "step": 2352
1800
+ },
1801
+ {
1802
+ "epoch": 16.05442176870748,
1803
+ "grad_norm": 0.03763847053050995,
1804
+ "learning_rate": 1.7904761904761904e-05,
1805
+ "loss": 0.018,
1806
+ "step": 2360
1807
+ },
1808
+ {
1809
+ "epoch": 16.122448979591837,
1810
+ "grad_norm": 1.681272268295288,
1811
+ "learning_rate": 1.77687074829932e-05,
1812
+ "loss": 0.0158,
1813
+ "step": 2370
1814
+ },
1815
+ {
1816
+ "epoch": 16.19047619047619,
1817
+ "grad_norm": 0.31114622950553894,
1818
+ "learning_rate": 1.763265306122449e-05,
1819
+ "loss": 0.0045,
1820
+ "step": 2380
1821
+ },
1822
+ {
1823
+ "epoch": 16.258503401360546,
1824
+ "grad_norm": 3.3073906898498535,
1825
+ "learning_rate": 1.7496598639455783e-05,
1826
+ "loss": 0.0266,
1827
+ "step": 2390
1828
+ },
1829
+ {
1830
+ "epoch": 16.3265306122449,
1831
+ "grad_norm": 0.05872774124145508,
1832
+ "learning_rate": 1.7360544217687076e-05,
1833
+ "loss": 0.0094,
1834
+ "step": 2400
1835
+ },
1836
+ {
1837
+ "epoch": 16.39455782312925,
1838
+ "grad_norm": 0.36872556805610657,
1839
+ "learning_rate": 1.722448979591837e-05,
1840
+ "loss": 0.0031,
1841
+ "step": 2410
1842
+ },
1843
+ {
1844
+ "epoch": 16.462585034013607,
1845
+ "grad_norm": 0.18953841924667358,
1846
+ "learning_rate": 1.7088435374149662e-05,
1847
+ "loss": 0.0072,
1848
+ "step": 2420
1849
+ },
1850
+ {
1851
+ "epoch": 16.53061224489796,
1852
+ "grad_norm": 0.053531669080257416,
1853
+ "learning_rate": 1.695238095238095e-05,
1854
+ "loss": 0.0214,
1855
+ "step": 2430
1856
+ },
1857
+ {
1858
+ "epoch": 16.598639455782312,
1859
+ "grad_norm": 0.9419485926628113,
1860
+ "learning_rate": 1.6816326530612244e-05,
1861
+ "loss": 0.0311,
1862
+ "step": 2440
1863
+ },
1864
+ {
1865
+ "epoch": 16.666666666666668,
1866
+ "grad_norm": 0.10363417118787766,
1867
+ "learning_rate": 1.668027210884354e-05,
1868
+ "loss": 0.0145,
1869
+ "step": 2450
1870
+ },
1871
+ {
1872
+ "epoch": 16.73469387755102,
1873
+ "grad_norm": 11.27441692352295,
1874
+ "learning_rate": 1.654421768707483e-05,
1875
+ "loss": 0.0174,
1876
+ "step": 2460
1877
+ },
1878
+ {
1879
+ "epoch": 16.802721088435373,
1880
+ "grad_norm": 0.07533001154661179,
1881
+ "learning_rate": 1.6408163265306124e-05,
1882
+ "loss": 0.0043,
1883
+ "step": 2470
1884
+ },
1885
+ {
1886
+ "epoch": 16.87074829931973,
1887
+ "grad_norm": 0.09791432321071625,
1888
+ "learning_rate": 1.6272108843537413e-05,
1889
+ "loss": 0.0192,
1890
+ "step": 2480
1891
+ },
1892
+ {
1893
+ "epoch": 16.93877551020408,
1894
+ "grad_norm": 1.773054838180542,
1895
+ "learning_rate": 1.613605442176871e-05,
1896
+ "loss": 0.0846,
1897
+ "step": 2490
1898
+ },
1899
+ {
1900
+ "epoch": 17.0,
1901
+ "eval_cer": 0.22853535353535354,
1902
+ "eval_loss": 0.016882039606571198,
1903
+ "eval_runtime": 3.9198,
1904
+ "eval_samples_per_second": 74.749,
1905
+ "eval_steps_per_second": 37.502,
1906
+ "step": 2499
1907
+ },
1908
+ {
1909
+ "epoch": 17.006802721088434,
1910
+ "grad_norm": 0.08635395020246506,
1911
+ "learning_rate": 1.6000000000000003e-05,
1912
+ "loss": 0.0117,
1913
+ "step": 2500
1914
+ },
1915
+ {
1916
+ "epoch": 17.07482993197279,
1917
+ "grad_norm": 0.04216604679822922,
1918
+ "learning_rate": 1.5863945578231292e-05,
1919
+ "loss": 0.0075,
1920
+ "step": 2510
1921
+ },
1922
+ {
1923
+ "epoch": 17.142857142857142,
1924
+ "grad_norm": 0.3129735589027405,
1925
+ "learning_rate": 1.5727891156462585e-05,
1926
+ "loss": 0.0024,
1927
+ "step": 2520
1928
+ },
1929
+ {
1930
+ "epoch": 17.2108843537415,
1931
+ "grad_norm": 0.0337909497320652,
1932
+ "learning_rate": 1.559183673469388e-05,
1933
+ "loss": 0.0032,
1934
+ "step": 2530
1935
+ },
1936
+ {
1937
+ "epoch": 17.27891156462585,
1938
+ "grad_norm": 0.3642733097076416,
1939
+ "learning_rate": 1.545578231292517e-05,
1940
+ "loss": 0.1344,
1941
+ "step": 2540
1942
+ },
1943
+ {
1944
+ "epoch": 17.346938775510203,
1945
+ "grad_norm": 0.06059624254703522,
1946
+ "learning_rate": 1.5319727891156464e-05,
1947
+ "loss": 0.0039,
1948
+ "step": 2550
1949
+ },
1950
+ {
1951
+ "epoch": 17.41496598639456,
1952
+ "grad_norm": 0.5465549826622009,
1953
+ "learning_rate": 1.5183673469387754e-05,
1954
+ "loss": 0.0395,
1955
+ "step": 2560
1956
+ },
1957
+ {
1958
+ "epoch": 17.482993197278912,
1959
+ "grad_norm": 0.048258326947689056,
1960
+ "learning_rate": 1.5047619047619049e-05,
1961
+ "loss": 0.0226,
1962
+ "step": 2570
1963
+ },
1964
+ {
1965
+ "epoch": 17.551020408163264,
1966
+ "grad_norm": 0.5764261484146118,
1967
+ "learning_rate": 1.4911564625850342e-05,
1968
+ "loss": 0.0348,
1969
+ "step": 2580
1970
+ },
1971
+ {
1972
+ "epoch": 17.61904761904762,
1973
+ "grad_norm": 1.802079439163208,
1974
+ "learning_rate": 1.4775510204081633e-05,
1975
+ "loss": 0.004,
1976
+ "step": 2590
1977
+ },
1978
+ {
1979
+ "epoch": 17.687074829931973,
1980
+ "grad_norm": 0.03979931399226189,
1981
+ "learning_rate": 1.4639455782312925e-05,
1982
+ "loss": 0.004,
1983
+ "step": 2600
1984
+ },
1985
+ {
1986
+ "epoch": 17.755102040816325,
1987
+ "grad_norm": 0.25388839840888977,
1988
+ "learning_rate": 1.450340136054422e-05,
1989
+ "loss": 0.0039,
1990
+ "step": 2610
1991
+ },
1992
+ {
1993
+ "epoch": 17.82312925170068,
1994
+ "grad_norm": 0.44963565468788147,
1995
+ "learning_rate": 1.436734693877551e-05,
1996
+ "loss": 0.006,
1997
+ "step": 2620
1998
+ },
1999
+ {
2000
+ "epoch": 17.891156462585034,
2001
+ "grad_norm": 0.0887552797794342,
2002
+ "learning_rate": 1.4231292517006804e-05,
2003
+ "loss": 0.0042,
2004
+ "step": 2630
2005
+ },
2006
+ {
2007
+ "epoch": 17.959183673469386,
2008
+ "grad_norm": 0.11289983987808228,
2009
+ "learning_rate": 1.4095238095238095e-05,
2010
+ "loss": 0.0034,
2011
+ "step": 2640
2012
+ },
2013
+ {
2014
+ "epoch": 18.0,
2015
+ "eval_cer": 0.24242424242424243,
2016
+ "eval_loss": 0.013762996532022953,
2017
+ "eval_runtime": 3.9761,
2018
+ "eval_samples_per_second": 73.691,
2019
+ "eval_steps_per_second": 36.971,
2020
+ "step": 2646
2021
+ },
2022
+ {
2023
+ "epoch": 18.027210884353742,
2024
+ "grad_norm": 0.06268062442541122,
2025
+ "learning_rate": 1.395918367346939e-05,
2026
+ "loss": 0.0031,
2027
+ "step": 2650
2028
+ },
2029
+ {
2030
+ "epoch": 18.095238095238095,
2031
+ "grad_norm": 0.03095332533121109,
2032
+ "learning_rate": 1.3823129251700681e-05,
2033
+ "loss": 0.0356,
2034
+ "step": 2660
2035
+ },
2036
+ {
2037
+ "epoch": 18.163265306122447,
2038
+ "grad_norm": 0.6670628786087036,
2039
+ "learning_rate": 1.3687074829931972e-05,
2040
+ "loss": 0.004,
2041
+ "step": 2670
2042
+ },
2043
+ {
2044
+ "epoch": 18.231292517006803,
2045
+ "grad_norm": 0.09079564362764359,
2046
+ "learning_rate": 1.3551020408163265e-05,
2047
+ "loss": 0.0036,
2048
+ "step": 2680
2049
+ },
2050
+ {
2051
+ "epoch": 18.299319727891156,
2052
+ "grad_norm": 0.17814789712429047,
2053
+ "learning_rate": 1.3414965986394558e-05,
2054
+ "loss": 0.0016,
2055
+ "step": 2690
2056
+ },
2057
+ {
2058
+ "epoch": 18.367346938775512,
2059
+ "grad_norm": 0.053088486194610596,
2060
+ "learning_rate": 1.3278911564625852e-05,
2061
+ "loss": 0.0037,
2062
+ "step": 2700
2063
+ },
2064
+ {
2065
+ "epoch": 18.435374149659864,
2066
+ "grad_norm": 0.05287722125649452,
2067
+ "learning_rate": 1.3142857142857143e-05,
2068
+ "loss": 0.0031,
2069
+ "step": 2710
2070
+ },
2071
+ {
2072
+ "epoch": 18.503401360544217,
2073
+ "grad_norm": 0.22168047726154327,
2074
+ "learning_rate": 1.3006802721088434e-05,
2075
+ "loss": 0.003,
2076
+ "step": 2720
2077
+ },
2078
+ {
2079
+ "epoch": 18.571428571428573,
2080
+ "grad_norm": 0.2615916430950165,
2081
+ "learning_rate": 1.2870748299319729e-05,
2082
+ "loss": 0.0022,
2083
+ "step": 2730
2084
+ },
2085
+ {
2086
+ "epoch": 18.639455782312925,
2087
+ "grad_norm": 0.04484458267688751,
2088
+ "learning_rate": 1.273469387755102e-05,
2089
+ "loss": 0.083,
2090
+ "step": 2740
2091
+ },
2092
+ {
2093
+ "epoch": 18.707482993197278,
2094
+ "grad_norm": 0.4530847370624542,
2095
+ "learning_rate": 1.2598639455782313e-05,
2096
+ "loss": 0.0034,
2097
+ "step": 2750
2098
+ },
2099
+ {
2100
+ "epoch": 18.775510204081634,
2101
+ "grad_norm": 0.11792109161615372,
2102
+ "learning_rate": 1.2462585034013606e-05,
2103
+ "loss": 0.0295,
2104
+ "step": 2760
2105
+ },
2106
+ {
2107
+ "epoch": 18.843537414965986,
2108
+ "grad_norm": 0.049426767975091934,
2109
+ "learning_rate": 1.2326530612244898e-05,
2110
+ "loss": 0.0019,
2111
+ "step": 2770
2112
+ },
2113
+ {
2114
+ "epoch": 18.91156462585034,
2115
+ "grad_norm": 0.04500193893909454,
2116
+ "learning_rate": 1.219047619047619e-05,
2117
+ "loss": 0.0134,
2118
+ "step": 2780
2119
+ },
2120
+ {
2121
+ "epoch": 18.979591836734695,
2122
+ "grad_norm": 0.14980462193489075,
2123
+ "learning_rate": 1.2054421768707484e-05,
2124
+ "loss": 0.0032,
2125
+ "step": 2790
2126
+ },
2127
+ {
2128
+ "epoch": 19.0,
2129
+ "eval_cer": 0.2159090909090909,
2130
+ "eval_loss": 0.012968610972166061,
2131
+ "eval_runtime": 3.6907,
2132
+ "eval_samples_per_second": 79.389,
2133
+ "eval_steps_per_second": 39.83,
2134
+ "step": 2793
2135
+ },
2136
+ {
2137
+ "epoch": 19.047619047619047,
2138
+ "grad_norm": 0.5596031546592712,
2139
+ "learning_rate": 1.1918367346938777e-05,
2140
+ "loss": 0.004,
2141
+ "step": 2800
2142
+ },
2143
+ {
2144
+ "epoch": 19.1156462585034,
2145
+ "grad_norm": 0.09450047463178635,
2146
+ "learning_rate": 1.178231292517007e-05,
2147
+ "loss": 0.0176,
2148
+ "step": 2810
2149
+ },
2150
+ {
2151
+ "epoch": 19.183673469387756,
2152
+ "grad_norm": 0.030270878225564957,
2153
+ "learning_rate": 1.1646258503401361e-05,
2154
+ "loss": 0.0031,
2155
+ "step": 2820
2156
+ },
2157
+ {
2158
+ "epoch": 19.25170068027211,
2159
+ "grad_norm": 0.5096073746681213,
2160
+ "learning_rate": 1.1510204081632654e-05,
2161
+ "loss": 0.0231,
2162
+ "step": 2830
2163
+ },
2164
+ {
2165
+ "epoch": 19.31972789115646,
2166
+ "grad_norm": 0.2736698389053345,
2167
+ "learning_rate": 1.1374149659863946e-05,
2168
+ "loss": 0.0016,
2169
+ "step": 2840
2170
+ },
2171
+ {
2172
+ "epoch": 19.387755102040817,
2173
+ "grad_norm": 0.06535348296165466,
2174
+ "learning_rate": 1.1238095238095239e-05,
2175
+ "loss": 0.0013,
2176
+ "step": 2850
2177
+ },
2178
+ {
2179
+ "epoch": 19.45578231292517,
2180
+ "grad_norm": 0.0859360322356224,
2181
+ "learning_rate": 1.1102040816326532e-05,
2182
+ "loss": 0.003,
2183
+ "step": 2860
2184
+ },
2185
+ {
2186
+ "epoch": 19.523809523809526,
2187
+ "grad_norm": 0.037795525044202805,
2188
+ "learning_rate": 1.0965986394557825e-05,
2189
+ "loss": 0.0026,
2190
+ "step": 2870
2191
+ },
2192
+ {
2193
+ "epoch": 19.591836734693878,
2194
+ "grad_norm": 1.3488638401031494,
2195
+ "learning_rate": 1.0829931972789116e-05,
2196
+ "loss": 0.004,
2197
+ "step": 2880
2198
+ },
2199
+ {
2200
+ "epoch": 19.65986394557823,
2201
+ "grad_norm": 0.05746370553970337,
2202
+ "learning_rate": 1.0693877551020409e-05,
2203
+ "loss": 0.0192,
2204
+ "step": 2890
2205
+ },
2206
+ {
2207
+ "epoch": 19.727891156462587,
2208
+ "grad_norm": 0.025979384779930115,
2209
+ "learning_rate": 1.05578231292517e-05,
2210
+ "loss": 0.0091,
2211
+ "step": 2900
2212
+ },
2213
+ {
2214
+ "epoch": 19.79591836734694,
2215
+ "grad_norm": 4.972421646118164,
2216
+ "learning_rate": 1.0421768707482993e-05,
2217
+ "loss": 0.0082,
2218
+ "step": 2910
2219
+ },
2220
+ {
2221
+ "epoch": 19.86394557823129,
2222
+ "grad_norm": 0.49525704979896545,
2223
+ "learning_rate": 1.0285714285714286e-05,
2224
+ "loss": 0.0034,
2225
+ "step": 2920
2226
+ },
2227
+ {
2228
+ "epoch": 19.931972789115648,
2229
+ "grad_norm": 0.02950323186814785,
2230
+ "learning_rate": 1.014965986394558e-05,
2231
+ "loss": 0.07,
2232
+ "step": 2930
2233
+ },
2234
+ {
2235
+ "epoch": 20.0,
2236
+ "grad_norm": 0.047243040055036545,
2237
+ "learning_rate": 1.001360544217687e-05,
2238
+ "loss": 0.0019,
2239
+ "step": 2940
2240
+ },
2241
+ {
2242
+ "epoch": 20.0,
2243
+ "eval_cer": 0.30176767676767674,
2244
+ "eval_loss": 0.011288419365882874,
2245
+ "eval_runtime": 4.0348,
2246
+ "eval_samples_per_second": 72.618,
2247
+ "eval_steps_per_second": 36.433,
2248
+ "step": 2940
2249
+ },
2250
+ {
2251
+ "epoch": 20.068027210884352,
2252
+ "grad_norm": 8.58004093170166,
2253
+ "learning_rate": 9.877551020408164e-06,
2254
+ "loss": 0.02,
2255
+ "step": 2950
2256
+ },
2257
+ {
2258
+ "epoch": 20.13605442176871,
2259
+ "grad_norm": 0.2544482946395874,
2260
+ "learning_rate": 9.741496598639455e-06,
2261
+ "loss": 0.0156,
2262
+ "step": 2960
2263
+ },
2264
+ {
2265
+ "epoch": 20.20408163265306,
2266
+ "grad_norm": 0.5715163350105286,
2267
+ "learning_rate": 9.60544217687075e-06,
2268
+ "loss": 0.0027,
2269
+ "step": 2970
2270
+ },
2271
+ {
2272
+ "epoch": 20.272108843537413,
2273
+ "grad_norm": 0.134610116481781,
2274
+ "learning_rate": 9.469387755102041e-06,
2275
+ "loss": 0.0394,
2276
+ "step": 2980
2277
+ },
2278
+ {
2279
+ "epoch": 20.34013605442177,
2280
+ "grad_norm": 0.23469507694244385,
2281
+ "learning_rate": 9.333333333333334e-06,
2282
+ "loss": 0.0142,
2283
+ "step": 2990
2284
+ },
2285
+ {
2286
+ "epoch": 20.408163265306122,
2287
+ "grad_norm": 0.19277207553386688,
2288
+ "learning_rate": 9.197278911564626e-06,
2289
+ "loss": 0.0214,
2290
+ "step": 3000
2291
+ },
2292
+ {
2293
+ "epoch": 20.476190476190474,
2294
+ "grad_norm": 0.04216855764389038,
2295
+ "learning_rate": 9.061224489795919e-06,
2296
+ "loss": 0.0014,
2297
+ "step": 3010
2298
+ },
2299
+ {
2300
+ "epoch": 20.54421768707483,
2301
+ "grad_norm": 0.02860959619283676,
2302
+ "learning_rate": 8.925170068027212e-06,
2303
+ "loss": 0.0049,
2304
+ "step": 3020
2305
+ },
2306
+ {
2307
+ "epoch": 20.612244897959183,
2308
+ "grad_norm": 0.38055145740509033,
2309
+ "learning_rate": 8.789115646258505e-06,
2310
+ "loss": 0.0193,
2311
+ "step": 3030
2312
+ },
2313
+ {
2314
+ "epoch": 20.68027210884354,
2315
+ "grad_norm": 0.034134916961193085,
2316
+ "learning_rate": 8.653061224489796e-06,
2317
+ "loss": 0.0015,
2318
+ "step": 3040
2319
+ },
2320
+ {
2321
+ "epoch": 20.74829931972789,
2322
+ "grad_norm": 0.6501132845878601,
2323
+ "learning_rate": 8.517006802721089e-06,
2324
+ "loss": 0.003,
2325
+ "step": 3050
2326
+ },
2327
+ {
2328
+ "epoch": 20.816326530612244,
2329
+ "grad_norm": 0.26927316188812256,
2330
+ "learning_rate": 8.38095238095238e-06,
2331
+ "loss": 0.0064,
2332
+ "step": 3060
2333
+ },
2334
+ {
2335
+ "epoch": 20.8843537414966,
2336
+ "grad_norm": 0.308063805103302,
2337
+ "learning_rate": 8.244897959183674e-06,
2338
+ "loss": 0.0171,
2339
+ "step": 3070
2340
+ },
2341
+ {
2342
+ "epoch": 20.952380952380953,
2343
+ "grad_norm": 0.0912749320268631,
2344
+ "learning_rate": 8.108843537414967e-06,
2345
+ "loss": 0.0034,
2346
+ "step": 3080
2347
+ },
2348
+ {
2349
+ "epoch": 21.0,
2350
+ "eval_cer": 0.23863636363636365,
2351
+ "eval_loss": 0.009314554743468761,
2352
+ "eval_runtime": 3.7562,
2353
+ "eval_samples_per_second": 78.005,
2354
+ "eval_steps_per_second": 39.135,
2355
+ "step": 3087
2356
+ },
2357
+ {
2358
+ "epoch": 21.020408163265305,
2359
+ "grad_norm": 0.031063944101333618,
2360
+ "learning_rate": 7.97278911564626e-06,
2361
+ "loss": 0.0025,
2362
+ "step": 3090
2363
+ },
2364
+ {
2365
+ "epoch": 21.08843537414966,
2366
+ "grad_norm": 0.45678919553756714,
2367
+ "learning_rate": 7.836734693877551e-06,
2368
+ "loss": 0.0017,
2369
+ "step": 3100
2370
+ },
2371
+ {
2372
+ "epoch": 21.156462585034014,
2373
+ "grad_norm": 0.06373850256204605,
2374
+ "learning_rate": 7.700680272108844e-06,
2375
+ "loss": 0.0203,
2376
+ "step": 3110
2377
+ },
2378
+ {
2379
+ "epoch": 21.224489795918366,
2380
+ "grad_norm": 0.04051206260919571,
2381
+ "learning_rate": 7.564625850340136e-06,
2382
+ "loss": 0.0028,
2383
+ "step": 3120
2384
+ },
2385
+ {
2386
+ "epoch": 21.292517006802722,
2387
+ "grad_norm": 0.20778831839561462,
2388
+ "learning_rate": 7.428571428571429e-06,
2389
+ "loss": 0.0032,
2390
+ "step": 3130
2391
+ },
2392
+ {
2393
+ "epoch": 21.360544217687075,
2394
+ "grad_norm": 0.23982657492160797,
2395
+ "learning_rate": 7.292517006802721e-06,
2396
+ "loss": 0.0097,
2397
+ "step": 3140
2398
+ },
2399
+ {
2400
+ "epoch": 21.428571428571427,
2401
+ "grad_norm": 0.30359897017478943,
2402
+ "learning_rate": 7.1564625850340144e-06,
2403
+ "loss": 0.002,
2404
+ "step": 3150
2405
+ },
2406
+ {
2407
+ "epoch": 21.496598639455783,
2408
+ "grad_norm": 0.844930112361908,
2409
+ "learning_rate": 7.020408163265306e-06,
2410
+ "loss": 0.064,
2411
+ "step": 3160
2412
+ },
2413
+ {
2414
+ "epoch": 21.564625850340136,
2415
+ "grad_norm": 0.2660425305366516,
2416
+ "learning_rate": 6.884353741496599e-06,
2417
+ "loss": 0.011,
2418
+ "step": 3170
2419
+ },
2420
+ {
2421
+ "epoch": 21.632653061224488,
2422
+ "grad_norm": 0.1279953122138977,
2423
+ "learning_rate": 6.748299319727891e-06,
2424
+ "loss": 0.0027,
2425
+ "step": 3180
2426
+ },
2427
+ {
2428
+ "epoch": 21.700680272108844,
2429
+ "grad_norm": 0.05603710934519768,
2430
+ "learning_rate": 6.612244897959184e-06,
2431
+ "loss": 0.0012,
2432
+ "step": 3190
2433
+ },
2434
+ {
2435
+ "epoch": 21.768707482993197,
2436
+ "grad_norm": 0.05168928578495979,
2437
+ "learning_rate": 6.476190476190476e-06,
2438
+ "loss": 0.0176,
2439
+ "step": 3200
2440
+ },
2441
+ {
2442
+ "epoch": 21.836734693877553,
2443
+ "grad_norm": 0.046198636293411255,
2444
+ "learning_rate": 6.340136054421769e-06,
2445
+ "loss": 0.0013,
2446
+ "step": 3210
2447
+ },
2448
+ {
2449
+ "epoch": 21.904761904761905,
2450
+ "grad_norm": 0.042502377182245255,
2451
+ "learning_rate": 6.2040816326530614e-06,
2452
+ "loss": 0.0015,
2453
+ "step": 3220
2454
+ },
2455
+ {
2456
+ "epoch": 21.972789115646258,
2457
+ "grad_norm": 0.24547749757766724,
2458
+ "learning_rate": 6.0680272108843545e-06,
2459
+ "loss": 0.0023,
2460
+ "step": 3230
2461
+ },
2462
+ {
2463
+ "epoch": 22.0,
2464
+ "eval_cer": 0.23106060606060605,
2465
+ "eval_loss": 0.009045995771884918,
2466
+ "eval_runtime": 3.8812,
2467
+ "eval_samples_per_second": 75.492,
2468
+ "eval_steps_per_second": 37.875,
2469
+ "step": 3234
2470
+ },
2471
+ {
2472
+ "epoch": 22.040816326530614,
2473
+ "grad_norm": 1.8699299097061157,
2474
+ "learning_rate": 5.931972789115647e-06,
2475
+ "loss": 0.0095,
2476
+ "step": 3240
2477
+ },
2478
+ {
2479
+ "epoch": 22.108843537414966,
2480
+ "grad_norm": 0.028658084571361542,
2481
+ "learning_rate": 5.795918367346939e-06,
2482
+ "loss": 0.002,
2483
+ "step": 3250
2484
+ },
2485
+ {
2486
+ "epoch": 22.17687074829932,
2487
+ "grad_norm": 0.11185970157384872,
2488
+ "learning_rate": 5.659863945578232e-06,
2489
+ "loss": 0.0139,
2490
+ "step": 3260
2491
+ },
2492
+ {
2493
+ "epoch": 22.244897959183675,
2494
+ "grad_norm": 0.08078885078430176,
2495
+ "learning_rate": 5.523809523809524e-06,
2496
+ "loss": 0.0093,
2497
+ "step": 3270
2498
+ },
2499
+ {
2500
+ "epoch": 22.312925170068027,
2501
+ "grad_norm": 0.033784542232751846,
2502
+ "learning_rate": 5.387755102040816e-06,
2503
+ "loss": 0.0035,
2504
+ "step": 3280
2505
+ },
2506
+ {
2507
+ "epoch": 22.38095238095238,
2508
+ "grad_norm": 0.04999591037631035,
2509
+ "learning_rate": 5.251700680272109e-06,
2510
+ "loss": 0.0158,
2511
+ "step": 3290
2512
+ },
2513
+ {
2514
+ "epoch": 22.448979591836736,
2515
+ "grad_norm": 0.47869572043418884,
2516
+ "learning_rate": 5.1156462585034015e-06,
2517
+ "loss": 0.002,
2518
+ "step": 3300
2519
+ },
2520
+ {
2521
+ "epoch": 22.517006802721088,
2522
+ "grad_norm": 0.5939333438873291,
2523
+ "learning_rate": 4.9795918367346945e-06,
2524
+ "loss": 0.0025,
2525
+ "step": 3310
2526
+ },
2527
+ {
2528
+ "epoch": 22.58503401360544,
2529
+ "grad_norm": 0.02597820572555065,
2530
+ "learning_rate": 4.843537414965987e-06,
2531
+ "loss": 0.017,
2532
+ "step": 3320
2533
+ },
2534
+ {
2535
+ "epoch": 22.653061224489797,
2536
+ "grad_norm": 0.06343343108892441,
2537
+ "learning_rate": 4.707482993197279e-06,
2538
+ "loss": 0.0414,
2539
+ "step": 3330
2540
+ },
2541
+ {
2542
+ "epoch": 22.72108843537415,
2543
+ "grad_norm": 0.40962278842926025,
2544
+ "learning_rate": 4.571428571428572e-06,
2545
+ "loss": 0.0114,
2546
+ "step": 3340
2547
+ },
2548
+ {
2549
+ "epoch": 22.7891156462585,
2550
+ "grad_norm": 0.29765334725379944,
2551
+ "learning_rate": 4.435374149659864e-06,
2552
+ "loss": 0.0023,
2553
+ "step": 3350
2554
+ },
2555
+ {
2556
+ "epoch": 22.857142857142858,
2557
+ "grad_norm": 0.17841386795043945,
2558
+ "learning_rate": 4.299319727891156e-06,
2559
+ "loss": 0.0016,
2560
+ "step": 3360
2561
+ },
2562
+ {
2563
+ "epoch": 22.92517006802721,
2564
+ "grad_norm": 0.44667163491249084,
2565
+ "learning_rate": 4.163265306122449e-06,
2566
+ "loss": 0.0021,
2567
+ "step": 3370
2568
+ },
2569
+ {
2570
+ "epoch": 22.993197278911566,
2571
+ "grad_norm": 0.19756975769996643,
2572
+ "learning_rate": 4.0272108843537416e-06,
2573
+ "loss": 0.0073,
2574
+ "step": 3380
2575
+ },
2576
+ {
2577
+ "epoch": 23.0,
2578
+ "eval_cer": 0.23737373737373738,
2579
+ "eval_loss": 0.008361349813640118,
2580
+ "eval_runtime": 3.9088,
2581
+ "eval_samples_per_second": 74.958,
2582
+ "eval_steps_per_second": 37.607,
2583
+ "step": 3381
2584
+ },
2585
+ {
2586
+ "epoch": 23.06122448979592,
2587
+ "grad_norm": 4.8710503578186035,
2588
+ "learning_rate": 3.891156462585034e-06,
2589
+ "loss": 0.0383,
2590
+ "step": 3390
2591
+ },
2592
+ {
2593
+ "epoch": 23.12925170068027,
2594
+ "grad_norm": 0.05327881500124931,
2595
+ "learning_rate": 3.7551020408163268e-06,
2596
+ "loss": 0.0026,
2597
+ "step": 3400
2598
+ },
2599
+ {
2600
+ "epoch": 23.197278911564627,
2601
+ "grad_norm": 0.4828534722328186,
2602
+ "learning_rate": 3.619047619047619e-06,
2603
+ "loss": 0.0024,
2604
+ "step": 3410
2605
+ },
2606
+ {
2607
+ "epoch": 23.26530612244898,
2608
+ "grad_norm": 0.03481818363070488,
2609
+ "learning_rate": 3.4829931972789116e-06,
2610
+ "loss": 0.0068,
2611
+ "step": 3420
2612
+ },
2613
+ {
2614
+ "epoch": 23.333333333333332,
2615
+ "grad_norm": 0.19071730971336365,
2616
+ "learning_rate": 3.346938775510204e-06,
2617
+ "loss": 0.0171,
2618
+ "step": 3430
2619
+ },
2620
+ {
2621
+ "epoch": 23.401360544217688,
2622
+ "grad_norm": 0.08115135878324509,
2623
+ "learning_rate": 3.210884353741497e-06,
2624
+ "loss": 0.0015,
2625
+ "step": 3440
2626
+ },
2627
+ {
2628
+ "epoch": 23.46938775510204,
2629
+ "grad_norm": 0.13966763019561768,
2630
+ "learning_rate": 3.074829931972789e-06,
2631
+ "loss": 0.0309,
2632
+ "step": 3450
2633
+ },
2634
+ {
2635
+ "epoch": 23.537414965986393,
2636
+ "grad_norm": 0.08014482259750366,
2637
+ "learning_rate": 2.9387755102040816e-06,
2638
+ "loss": 0.0028,
2639
+ "step": 3460
2640
+ },
2641
+ {
2642
+ "epoch": 23.60544217687075,
2643
+ "grad_norm": 0.7266091108322144,
2644
+ "learning_rate": 2.8027210884353742e-06,
2645
+ "loss": 0.0019,
2646
+ "step": 3470
2647
+ },
2648
+ {
2649
+ "epoch": 23.6734693877551,
2650
+ "grad_norm": 0.763943076133728,
2651
+ "learning_rate": 2.666666666666667e-06,
2652
+ "loss": 0.0029,
2653
+ "step": 3480
2654
+ },
2655
+ {
2656
+ "epoch": 23.741496598639454,
2657
+ "grad_norm": 0.20366428792476654,
2658
+ "learning_rate": 2.5306122448979594e-06,
2659
+ "loss": 0.0077,
2660
+ "step": 3490
2661
+ },
2662
+ {
2663
+ "epoch": 23.80952380952381,
2664
+ "grad_norm": 0.06424231082201004,
2665
+ "learning_rate": 2.394557823129252e-06,
2666
+ "loss": 0.0048,
2667
+ "step": 3500
2668
+ },
2669
+ {
2670
+ "epoch": 23.877551020408163,
2671
+ "grad_norm": 0.0471784844994545,
2672
+ "learning_rate": 2.2585034013605447e-06,
2673
+ "loss": 0.0027,
2674
+ "step": 3510
2675
+ },
2676
+ {
2677
+ "epoch": 23.94557823129252,
2678
+ "grad_norm": 0.6746675968170166,
2679
+ "learning_rate": 2.122448979591837e-06,
2680
+ "loss": 0.0022,
2681
+ "step": 3520
2682
+ },
2683
+ {
2684
+ "epoch": 24.0,
2685
+ "eval_cer": 0.2702020202020202,
2686
+ "eval_loss": 0.008024842478334904,
2687
+ "eval_runtime": 3.9491,
2688
+ "eval_samples_per_second": 74.195,
2689
+ "eval_steps_per_second": 37.224,
2690
+ "step": 3528
2691
+ },
2692
+ {
2693
+ "epoch": 24.01360544217687,
2694
+ "grad_norm": 0.248748779296875,
2695
+ "learning_rate": 1.9863945578231295e-06,
2696
+ "loss": 0.0013,
2697
+ "step": 3530
2698
+ },
2699
+ {
2700
+ "epoch": 24.081632653061224,
2701
+ "grad_norm": 5.284445762634277,
2702
+ "learning_rate": 1.8503401360544219e-06,
2703
+ "loss": 0.0097,
2704
+ "step": 3540
2705
+ },
2706
+ {
2707
+ "epoch": 24.14965986394558,
2708
+ "grad_norm": 0.07442311942577362,
2709
+ "learning_rate": 1.7142857142857145e-06,
2710
+ "loss": 0.011,
2711
+ "step": 3550
2712
+ },
2713
+ {
2714
+ "epoch": 24.217687074829932,
2715
+ "grad_norm": 0.1644800305366516,
2716
+ "learning_rate": 1.5782312925170069e-06,
2717
+ "loss": 0.0014,
2718
+ "step": 3560
2719
+ },
2720
+ {
2721
+ "epoch": 24.285714285714285,
2722
+ "grad_norm": 0.05962975695729256,
2723
+ "learning_rate": 1.4421768707482995e-06,
2724
+ "loss": 0.0018,
2725
+ "step": 3570
2726
+ },
2727
+ {
2728
+ "epoch": 24.35374149659864,
2729
+ "grad_norm": 0.039682451635599136,
2730
+ "learning_rate": 1.306122448979592e-06,
2731
+ "loss": 0.0025,
2732
+ "step": 3580
2733
+ },
2734
+ {
2735
+ "epoch": 24.421768707482993,
2736
+ "grad_norm": 0.0817071795463562,
2737
+ "learning_rate": 1.1700680272108845e-06,
2738
+ "loss": 0.0032,
2739
+ "step": 3590
2740
+ },
2741
+ {
2742
+ "epoch": 24.489795918367346,
2743
+ "grad_norm": 0.07747264206409454,
2744
+ "learning_rate": 1.034013605442177e-06,
2745
+ "loss": 0.015,
2746
+ "step": 3600
2747
+ },
2748
+ {
2749
+ "epoch": 24.5578231292517,
2750
+ "grad_norm": 0.04547140747308731,
2751
+ "learning_rate": 8.979591836734694e-07,
2752
+ "loss": 0.0015,
2753
+ "step": 3610
2754
+ },
2755
+ {
2756
+ "epoch": 24.625850340136054,
2757
+ "grad_norm": 0.04727374389767647,
2758
+ "learning_rate": 7.619047619047619e-07,
2759
+ "loss": 0.0023,
2760
+ "step": 3620
2761
+ },
2762
+ {
2763
+ "epoch": 24.693877551020407,
2764
+ "grad_norm": 0.14271779358386993,
2765
+ "learning_rate": 6.258503401360544e-07,
2766
+ "loss": 0.0025,
2767
+ "step": 3630
2768
+ },
2769
+ {
2770
+ "epoch": 24.761904761904763,
2771
+ "grad_norm": 0.4749351441860199,
2772
+ "learning_rate": 4.897959183673469e-07,
2773
+ "loss": 0.0026,
2774
+ "step": 3640
2775
+ },
2776
+ {
2777
+ "epoch": 24.829931972789115,
2778
+ "grad_norm": 10.92783260345459,
2779
+ "learning_rate": 3.537414965986395e-07,
2780
+ "loss": 0.0217,
2781
+ "step": 3650
2782
+ },
2783
+ {
2784
+ "epoch": 24.897959183673468,
2785
+ "grad_norm": 0.14225248992443085,
2786
+ "learning_rate": 2.1768707482993197e-07,
2787
+ "loss": 0.0016,
2788
+ "step": 3660
2789
+ },
2790
+ {
2791
+ "epoch": 24.965986394557824,
2792
+ "grad_norm": 0.06080883741378784,
2793
+ "learning_rate": 8.16326530612245e-08,
2794
+ "loss": 0.0391,
2795
+ "step": 3670
2796
+ },
2797
+ {
2798
+ "epoch": 25.0,
2799
+ "eval_cer": 0.26515151515151514,
2800
+ "eval_loss": 0.007971594110131264,
2801
+ "eval_runtime": 3.9963,
2802
+ "eval_samples_per_second": 73.317,
2803
+ "eval_steps_per_second": 36.784,
2804
+ "step": 3675
2805
+ }
2806
+ ],
2807
+ "logging_steps": 10,
2808
+ "max_steps": 3675,
2809
+ "num_input_tokens_seen": 0,
2810
+ "num_train_epochs": 25,
2811
+ "save_steps": 500,
2812
+ "stateful_callbacks": {
2813
+ "TrainerControl": {
2814
+ "args": {
2815
+ "should_epoch_stop": false,
2816
+ "should_evaluate": false,
2817
+ "should_log": false,
2818
+ "should_save": true,
2819
+ "should_training_stop": true
2820
+ },
2821
+ "attributes": {}
2822
+ }
2823
+ },
2824
+ "total_flos": 5.759922930951168e+17,
2825
+ "train_batch_size": 2,
2826
+ "trial_name": null,
2827
+ "trial_params": null
2828
+ }
draft_computation/ocr/ocr_model_output/checkpoint-441/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8d8686ca1447cb934c17499be896c358a24d6e284b3ea5081fb360b7d8be171
3
+ size 5713
draft_computation/ocr/ocr_model_output/checkpoint-441/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
draft_computation/ocr/text_dataset/annotations.json ADDED
@@ -0,0 +1,295 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "20250901_110721_523742_0_77883344-c895-435f-ad6a-d53f4e90fdcd.png": "14m",
3
+ "20250901_110721_523742_1_d135a15f-406a-41e1-aa2e-1075310998f1.png": "13m",
4
+ "20250901_110721_531767_2_62f4702c-e9a8-461d-9bf0-33141bc68dc5.png": "12m",
5
+ "20250901_110721_539904_-1_7e51008c-1efe-474e-9622-bb7bc67faf22.png": "12m",
6
+ "20250901_110722_323657_0_e4f10b40-1499-4797-8231-4d473f96e6d7.png": "14m",
7
+ "20250901_110722_327428_1_8f722716-1f47-43ed-9699-af34b59b02ea.png": "13m",
8
+ "20250901_110722_327428_3_ea8bf041-6248-4211-81b7-0b0fcfe66c60.png": "12m",
9
+ "20250901_110722_335742_-1_278ac5a9-360a-400f-b90d-a5ed8720e5c8.png": "12m",
10
+ "20250901_110723_106464_0_671f5b06-ea61-4772-887f-3c877568a98c.png": "14m",
11
+ "20250901_110723_106464_1_ddf150d8-c885-44cd-a263-006516f56810.png": "13m",
12
+ "20250901_110723_114490_2_7fba45f1-6a56-423d-b26c-3e5f510e8fcc.png": "12m",
13
+ "20250901_110723_122149_-1_8d6f8f16-712b-422b-93e1-0a2f01f6bd34.png": "12m",
14
+ "20250901_110724_038727_0_097ae6d1-966c-48c5-a4d0-5dbc565defc1.png": "14m",
15
+ "20250901_110724_046664_1_7aa3f37d-abc6-44db-a7c1-d572c0568842.png": "13m",
16
+ "20250901_110724_048915_3_65c67748-4408-481f-9570-4665ac851578.png": "12m",
17
+ "20250901_110724_057310_-1_7add8a33-51be-46bb-be74-eb5d763f6b53.png": "12m",
18
+ "20250901_110724_910973_0_5808a60e-a0cc-497a-9fad-4720aa3e06a9.png": "14m",
19
+ "20250901_110724_910973_1_f867928e-3317-4123-a48d-3966489eefac.png": "13m",
20
+ "20250901_110724_919454_2_0184cc25-dd62-4505-ae21-aacfe8e60661.png": "12m",
21
+ "20250901_110724_927618_-1_c8be306e-7800-49c8-b237-2704f9f68dca.png": "12m",
22
+ "20250901_110725_861849_0_277ca2b6-7962-48a4-a7b1-a81f0c1aaa66.png": "14m",
23
+ "20250901_110725_868310_1_17f2e9ed-ec58-4c28-ab78-96dd5952bed5.png": "13m",
24
+ "20250901_110725_872233_2_34a6cee8-67a5-44a5-9a57-f0eedd987627.png": "12m",
25
+ "20250901_110725_876757_-1_56cdf980-683e-4068-b9a5-38b253addc2b.png": "12m",
26
+ "20250901_110726_631739_0_be1d9991-2c0e-4681-a7de-db23c3807ef5.png": "13m",
27
+ "20250901_110726_631739_1_cb01fdd1-0590-4e74-81e3-494c2164652c.png": "14m",
28
+ "20250901_110726_640094_2_ac70a2c8-e23c-44bb-8ca9-1e3a49016ee5.png": "12m",
29
+ "20250901_110726_650578_-1_f55a4311-325a-4c2c-93b4-ad5a5312795b.png": "12m",
30
+ "20250901_110727_472075_0_ace3d469-c961-4b01-811b-2a1ee3cc5c21.png": "14m",
31
+ "20250901_110727_474716_1_68403705-fd34-4d6e-8130-525080aee7ef.png": "13m",
32
+ "20250901_110727_474716_2_31822346-99d4-4a9a-bd59-c14986f9bf2f.png": "12m",
33
+ "20250901_110727_488163_-1_4e5bbe3e-eaca-4275-88b7-74065bafc709.png": "12m",
34
+ "20250901_110728_317026_0_151a2d4b-c0ba-43f1-9d77-733b861d681b.png": "14m",
35
+ "20250901_110728_322467_1_147b49c5-0542-4f33-bbd0-41891793af34.png": "13m",
36
+ "20250901_110728_325279_-1_9cb3fc8e-e496-4db2-ae3f-11bb06b247a1.png": "12m",
37
+ "20250901_110728_325279_3_115476a8-52be-4b71-bcfd-d2b22754690f.png": "12m",
38
+ "20250901_110729_197626_0_5aee19e2-4c3f-4b91-8d78-8c4ef826d486.png": "14m",
39
+ "20250901_110729_203715_1_0f33f548-1612-4829-ba0b-c5f0bcb17fc1.png": "13m",
40
+ "20250901_110729_209774_3_4884ac53-ec55-4041-a74c-3fca9521dc4d.png": "12m",
41
+ "20250901_110729_215945_-1_951127a0-b8dd-44d9-92ca-2ea54ae0786f.png": "12m",
42
+ "20250901_110730_092452_0_08be0e55-6bca-4f07-8922-b6b78d1ec9e9.png": "13m",
43
+ "20250901_110730_101845_1_762d8228-85ee-473f-8801-e21c98d942c7.png": "14m",
44
+ "20250901_110730_102862_3_4353ff04-9147-4a9b-aa9c-c9175fc5f124.png": "12m",
45
+ "20250901_110730_110994_-1_f6a9c263-67ea-421c-9e78-e287a4f965fc.png": "12m",
46
+ "20250901_110731_050376_0_03785b0d-806e-4340-a594-3ea44eb51765.png": "11m",
47
+ "20250901_110731_055882_1_7e50c5eb-bba9-4c3d-99ef-c0ebe6756f21.png": "12m",
48
+ "20250901_110731_060667_2_73483a9e-7ea8-4968-a6bb-842c7c2de02b.png": "4m",
49
+ "20250901_110731_060667_3_e3641cdd-124b-4be5-9a66-45cc7544d3c4.png": "4m",
50
+ "20250901_110731_066292_4_c86b169c-37a8-4738-938e-5384059a9e36.png": "3m",
51
+ "20250901_110731_074607_-1_c4bb3843-7c38-4462-8789-a7524182768a.png": "11m",
52
+ "20250901_110732_049485_0_5856ece6-24a7-4722-a1e0-e0c6de8392bc.png": "11m",
53
+ "20250901_110732_049485_1_1335e62c-2aca-44d9-97a4-e29ef27a09c9.png": "15m",
54
+ "20250901_110732_059977_2_49015f80-4a95-4ddb-983b-f3177896e083.png": "12m",
55
+ "20250901_110732_061995_3_c1fe47d1-6b42-4000-b862-11cfe103eb0a.png": "4m",
56
+ "20250901_110732_064019_4_c3f4bb89-6746-413a-a9ba-b3193d5a1b0c.png": "13m",
57
+ "20250901_110732_066048_-1_19231220-cfd3-4216-a917-5e04001ece6a.png": "11m",
58
+ "20250901_110732_883670_0_c4970682-dd5e-4a53-bfa9-2ef91828b6c9.png": "11m",
59
+ "20250901_110732_891787_1_cd09526a-b768-4436-8b29-fc3c06b31521.png": "15m",
60
+ "20250901_110732_895838_2_a89e56a5-5576-4a42-9da5-172f72912d31.png": "12m",
61
+ "20250901_110732_900155_3_e02f7adc-0d0d-44ec-9409-0add9ddfd08e.png": "4m",
62
+ "20250901_110732_908363_4_f61656b8-71dc-4638-8302-45e7626ac3bb.png": "13m",
63
+ "20250901_110732_916488_-1_c9cceaf7-3a72-4fe4-ba73-f9c86e88ec1d.png": "11m",
64
+ "20250901_110733_715690_0_482373d1-79f0-4322-bdcb-1c55a8e7e4a8.png": "11m",
65
+ "20250901_110733_715690_1_2181016f-9bbf-40bc-ab56-9a2286bbd48f.png": "15m",
66
+ "20250901_110733_715690_2_648ea41d-2391-4624-a83c-1894b98a8b7f.png": "12m",
67
+ "20250901_110733_723931_-1_d28f13f7-f986-4478-a4ed-2e7c45086446.png": "11m",
68
+ "20250901_110733_723931_3_a951ad78-ad8b-49bc-aa1e-d179ec1aefae.png": "4m",
69
+ "20250901_110733_723931_4_1265c702-15f3-44eb-ab8e-cb9683ac3d7c.png": "13m",
70
+ "20250901_110734_562670_0_02c7bad7-b6e6-472d-b0ac-9d393675753a.png": "11m",
71
+ "20250901_110734_562670_1_2557caa2-fb93-4e3b-b3a7-c5f7076026e9.png": "5m",
72
+ "20250901_110734_566582_2_be069710-3d9c-4630-bd66-e282928835a4.png": "12m",
73
+ "20250901_110734_571028_3_473e0d35-954d-4a19-a834-a0f478f3744b.png": "4m",
74
+ "20250901_110734_579119_-1_3d97881b-e5e9-40d2-be5a-3dfa8626e5e5.png": "11m",
75
+ "20250901_110734_579119_4_eccd3c39-a2e7-48b4-91b7-6584e3b9ef04.png": "13m",
76
+ "20250901_110735_466481_0_d5a6eddf-9059-423b-a041-87ec3a0e549f.png": "11m",
77
+ "20250901_110735_474821_1_07da98c0-1a04-45f0-919e-74fe522136b7.png": "5m",
78
+ "20250901_110735_483069_2_cbba91c1-48b3-48a8-8dde-a7c147aa8119.png": "12m",
79
+ "20250901_110735_483069_3_1d0c150e-91a4-4071-8de2-5de551ec6b61.png": "4m",
80
+ "20250901_110735_483069_4_dc7ceb73-593d-4026-8f54-c06cec3b32fd.png": "13m",
81
+ "20250901_110735_495264_-1_79c11c6a-9045-45ae-b526-8c09eef652e4.png": "11m",
82
+ "20250901_110736_257943_0_c175b9a1-6373-4ac2-9938-77a77d0aacb4.png": "11m",
83
+ "20250901_110736_261137_1_8ffe93ed-d02a-456e-9b4b-95777d963bfc.png": "12m",
84
+ "20250901_110736_272241_2_39fe9e71-a6f8-4e42-b89b-62f07a43f981.png": "5m",
85
+ "20250901_110736_274783_3_96acee8a-182d-4129-949a-924c4ec1b540.png": "4m",
86
+ "20250901_110736_279880_4_4e61a14c-7778-4e4e-acea-72b95b43bae6.png": "3m",
87
+ "20250901_110736_286713_-1_72fc2f51-8f5a-4674-8dd2-794fbab658b4.png": "11m",
88
+ "20250901_110737_058311_0_23958389-f9cc-4faf-862d-ab5cd4a29487.png": "11m",
89
+ "20250901_110737_072241_1_63a9f873-4d8b-4737-b349-1d6c31fb9f8d.png": "5m",
90
+ "20250901_110737_074415_2_7cb9cbfa-2385-4841-b665-569e37bbd043.png": "12m",
91
+ "20250901_110737_074415_3_684107d2-7dd5-453d-bc2f-0cba2d152a2c.png": "4m",
92
+ "20250901_110737_074415_4_782a3de6-9aea-4b81-a892-1126fb64dfdc.png": "3m",
93
+ "20250901_110737_082811_-1_62b596ba-3d33-45ea-bb66-cca17068c7b1.png": "11m",
94
+ "20250901_110738_032886_0_5b96512a-bcb1-4a13-9bbe-4e8b53148c3f.png": "11m",
95
+ "20250901_110738_036560_1_2cb56eed-8eea-46fd-9c61-e83ff108eeb4.png": "15m",
96
+ "20250901_110738_040963_2_09032b3c-3ab1-4d3b-a190-00e0afcda277.png": "12m",
97
+ "20250901_110738_040963_3_bdbb4222-19e9-4326-a8dd-f3cf571fdf97.png": "4m",
98
+ "20250901_110738_040963_4_1e9d31de-2c6c-43f0-aef1-9d900c2a0606.png": "3m",
99
+ "20250901_110738_049420_-1_fbcfb155-c160-4ebc-a331-4b5e2d9d0197.png": "11m",
100
+ "20250901_110738_995134_0_d094f29b-fee1-4118-b269-4f7c6f276d19.png": "11m",
101
+ "20250901_110738_996909_1_1fa69b29-db32-4aca-84b3-710b0b5c8fdc.png": "5m",
102
+ "20250901_110739_005168_2_57b87de9-30d6-44cc-ae02-f767ddee7e97.png": "12m",
103
+ "20250901_110739_009221_3_90c1e27d-b575-4446-b7f4-f2b10c57ad61.png": "4m",
104
+ "20250901_110739_014821_4_41e19b85-c195-442d-9b28-75fe69dfedde.png": "3m",
105
+ "20250901_110739_023831_-1_31e43da7-5488-4cfe-a255-c074c19e20b2.png": "11m",
106
+ "20250901_110740_242363_0_fe93b4fb-10b1-4154-98e5-6c595c996121.png": "11m",
107
+ "20250901_110740_252412_1_35de4ee5-140b-4074-a5ba-2a18ba46f88d.png": "5m",
108
+ "20250901_110740_262564_2_a9ef2bdb-5e44-46ac-bf8c-b53dcfb5b68a.png": "12m",
109
+ "20250901_110740_266621_3_0e892df9-b216-4541-99f0-cffde487de6a.png": "4m",
110
+ "20250901_110740_276833_4_a3960e53-a8fd-4be5-8df4-1788a5082817.png": "13m",
111
+ "20250901_110740_289309_-1_22ee6f80-6913-40e9-b15c-622ec319c9c3.png": "11m",
112
+ "20250901_110741_527428_0_58c1774c-b1d1-4b79-a897-fa1112427abe.png": "11m",
113
+ "20250901_110741_531483_1_737e89d4-0422-47b7-8c9a-01f40c3772f4.png": "5m",
114
+ "20250901_110741_533503_2_70c58d86-1339-424d-8464-21e4bc1284c0.png": "12m",
115
+ "20250901_110741_539589_3_d234e321-048d-4257-af42-ecbb1145f8ec.png": "4m",
116
+ "20250901_110741_543639_4_5ff6b8ed-6972-4313-9b62-1cfa3a06ff7d.png": "13m",
117
+ "20250901_110741_553778_-1_27d2a153-40a5-4ebf-a487-2e70a1e07bdc.png": "11m",
118
+ "20250901_110742_724929_0_9569b0f3-0701-42f8-a815-f46b106c0eb5.png": "11m",
119
+ "20250901_110742_733425_1_a39c72a4-c2e3-4a48-9101-5a0aacf3f0a0.png": "12m",
120
+ "20250901_110742_737468_2_a0894a4d-c879-4597-8f60-979fc66fab15.png": "5m",
121
+ "20250901_110742_741507_3_f7da96f6-7075-4fdf-8c1d-f0192edaa2b0.png": "4m",
122
+ "20250901_110742_745549_4_8a62a416-6687-42f0-991f-474ce04e9fcd.png": "13m",
123
+ "20250901_110742_753624_-1_298c0eaa-89dd-4881-b137-a2819e1f30d5.png": "11m",
124
+ "20250901_110743_957712_0_cee9d66c-0b90-4574-86d3-7a3265ed8f3e.png": "11m",
125
+ "20250901_110743_968174_1_ad9fee8b-0227-4bca-b66e-8e7573a5fd71.png": "5m",
126
+ "20250901_110743_973070_2_7f7149cc-94d5-42e3-a39d-7f2f7f60938a.png": "12m",
127
+ "20250901_110743_979304_3_7e513aae-0002-45f1-a01d-9008e04d7bb5.png": "4m",
128
+ "20250901_110743_985974_4_4380849d-c89e-4fa9-9da9-826b7918d9bf.png": "13m",
129
+ "20250901_110743_997815_-1_832af38e-3c09-4be9-b5bf-d0735d86ce99.png": "11m",
130
+ "20250901_110745_110026_0_2c37ea99-dd7e-4299-8d0b-cc3894158feb.png": "11m",
131
+ "20250901_110745_110026_1_bd0308da-537d-4d2e-8cc6-998f9942c5f9.png": "12m",
132
+ "20250901_110745_120117_2_4fa422cf-658b-46f9-99f3-b0e88a026495.png": "5m",
133
+ "20250901_110745_125193_3_bcd6e997-b0e7-46dc-8aad-03da74a0d20e.png": "4m",
134
+ "20250901_110745_130276_4_53b8e4a7-3caf-423f-ba98-a25a18118256.png": "3m",
135
+ "20250901_110745_142766_-1_32d5cccd-9b18-4f3f-bbb5-dfad403ad1c6.png": "11m",
136
+ "20250901_110746_148180_0_381d9ce6-ecc6-41c5-9e3d-9ba805095218.png": "11m",
137
+ "20250901_110746_148180_1_37296257-866e-4548-82bf-54acc1956f47.png": "12m",
138
+ "20250901_110746_156314_2_dfafce54-5ff2-49e0-a0d5-2b4e078a8f05.png": "5m",
139
+ "20250901_110746_157033_3_3c855392-6bc8-48b1-9008-ed0f0da699e9.png": "4m",
140
+ "20250901_110746_157033_4_d5f421b5-ea01-430f-ac5a-6ae5a8ee44d2.png": "3m",
141
+ "20250901_110746_164717_-1_4b96ef1a-dde1-4f16-a363-7e7c3d5ed87c.png": "11m",
142
+ "20250901_110747_166409_0_58fa60a7-6120-42fa-be42-d5f940301762.png": "11m",
143
+ "20250901_110747_166409_1_5d7f9bfe-7cb0-48cb-8caa-6c4e9347c602.png": "5m",
144
+ "20250901_110747_174983_2_b262a5e5-8638-4d6e-a7ec-cfeec75037db.png": "12m",
145
+ "20250901_110747_181051_3_e44ec2af-62c1-4b43-9896-f3974952fb54.png": "4m",
146
+ "20250901_110747_185094_4_cbf92ded-dd6c-4773-9948-b22b9e272c8c.png": "13m",
147
+ "20250901_110747_191153_-1_021b9c8e-4860-4977-975e-fe1360a0a881.png": "11m",
148
+ "20250901_110748_127606_0_ded1fddb-fd17-478c-9f9f-b3b701d418ed.png": "11m",
149
+ "20250901_110748_132370_1_e1afb38f-5df9-4d13-bf35-385ecfd75679.png": "5m",
150
+ "20250901_110748_142259_2_327bef8a-9e8a-44c8-8198-39db8b0d0b9d.png": "12m",
151
+ "20250901_110748_142259_3_b4facba1-5774-4434-949c-069a950050d2.png": "4m",
152
+ "20250901_110748_149489_4_335e6a53-9e14-4589-a70f-7c9b3c902d6f.png": "3m",
153
+ "20250901_110748_158347_-1_5a393b1d-6ce6-493f-ba84-d44ec3a46b0c.png": "11m",
154
+ "20250901_110749_140486_0_1af9b7de-62e4-4363-b23a-f15ae4985be9.png": "11m",
155
+ "20250901_110749_192045_1_a2290d9e-453a-4908-8017-d2d13a93cfbc.png": "5m",
156
+ "20250901_110749_204155_2_abe40318-0e1e-43b9-ba43-b06a27c19566.png": "12m",
157
+ "20250901_110749_208427_3_b2d8e0a0-7f14-4cec-a795-17b948ef2b9d.png": "4m",
158
+ "20250901_110749_214215_4_3fcdc32a-ba64-4eac-94d4-fd75bdb93652.png": "13m",
159
+ "20250901_110749_233420_-1_35ed8eb0-e649-4c28-9d17-38a9fe7eb676.png": "11m",
160
+ "20250901_110750_453401_0_341855af-1896-4416-a8d0-7050eec096d6.png": "11m",
161
+ "20250901_110750_459457_1_78799c59-d8ff-46e7-9261-db89745437ac.png": "12m",
162
+ "20250901_110750_463490_2_724a2147-98c5-49bf-80c8-b2d570de7780.png": "5m",
163
+ "20250901_110750_463490_3_f14a3e50-231d-4d8f-8cdb-0954b99bae7f.png": "4m",
164
+ "20250901_110750_478548_4_6c6d9317-befe-46b0-aa52-e1bc565c816b.png": "13m",
165
+ "20250901_110750_488132_-1_2025df41-fd2f-4aac-bb2e-b79e6d019067.png": "11m",
166
+ "20250901_110751_593471_0_d3ab5357-f24c-4d59-b055-21f422d31a77.png": "11m",
167
+ "20250901_110751_593471_1_e85b8e3c-606f-45af-82e7-1fbd1cf6ef4d.png": "12m",
168
+ "20250901_110752_763214_-1_63e934d0-29b5-4507-afdc-b1088f76935f.png": "11m",
169
+ "20250901_110753_607346_0_f0da8e18-1b5b-43ce-8d7f-737e4a5bc733.png": "12m",
170
+ "20250901_110753_622867_-1_a5122d01-1269-47c7-84b1-5dc3081861c1.png": "12m",
171
+ "20250901_110755_181386_0_da01b9dd-6b80-4aaa-b799-d7c9f394cb14.png": "12m",
172
+ "20250901_110755_193684_-1_54562da4-15d0-420f-b46a-0afb6f1b65f0.png": "12m",
173
+ "20250901_110756_024068_0_6d8377c0-000e-4aed-931d-bd57fb66fc98.png": "12m",
174
+ "20250901_110756_031124_-1_aaebd6fa-a7e7-455e-86c4-b3e49107bbe3.png": "12m",
175
+ "20250901_110756_842576_-1_2f0ead39-678f-4894-a53f-62c5c51f9d6f.png": "12m",
176
+ "20250901_110756_842576_0_d2b1cbd6-9769-4f78-abff-dc8c81b0a080.png": "12m",
177
+ "20250901_110757_624176_0_3b696edd-e970-4bc4-84e7-e30ad65e9540.png": "12m",
178
+ "20250901_110757_632413_-1_ee80972b-3c66-44a7-93dd-d2a396cbb87d.png": "12m",
179
+ "20250901_110758_418102_0_c469f442-c7a0-447b-90ea-d8bcc00f5813.png": "12m",
180
+ "20250901_110758_426280_-1_b4925713-b825-4d58-8275-7dfe3becc2f4.png": "12m",
181
+ "20250901_110759_190922_0_2f075c68-b503-4810-b031-9eaa2328cf4a.png": "12m",
182
+ "20250901_110759_201042_-1_8cd71c1d-fcd8-4e4b-8262-8e809bb14cf7.png": "12m",
183
+ "20250901_115122_737088_b60ca42b-e36a-4857-b291-f4ab2706bbff.png": "4m",
184
+ "20250901_115122_737088_e30cc112-520f-4b23-a4a8-06ceebf40210.png": "2m",
185
+ "20250901_115122_757359_5b96e73f-08e9-4652-8a16-bc821dd834cf.png": "6m",
186
+ "20250901_115122_757359_6ba69a37-718f-4655-b385-3a24784c8117.png": "5m",
187
+ "20250901_115122_773083_af85514c-974e-4843-82ad-c101c7dc8846.png": "6m",
188
+ "20250901_115122_784786_239a3775-551a-4245-9092-aa4a182a0cd5.png": "9m",
189
+ "20250901_115122_784786_45b77755-762b-4d73-8992-16687c18ec58.png": "12m",
190
+ "20250901_115122_800586_67f0c472-8644-4961-bf4a-e4a4d9edc6a7.png": "15m",
191
+ "20250901_115122_803834_c5fe9435-dc49-4627-9e16-5913ba6386d2.png": "10m",
192
+ "20250901_115122_816423_21d2435f-32ea-4059-ad79-9a9b09a597b7.png": "18m",
193
+ "20250901_115122_816423_cf4f837a-c885-401f-b1be-5c3f7d3c76d1.png": "11m",
194
+ "20250901_115122_837783_03e39e3c-65ad-4605-8e08-78732a58a92d.png": "19m",
195
+ "20250901_115122_837783_fcd4612d-9e10-47d6-9f9f-f8e589716830.png": "10m",
196
+ "20250901_115122_880373_79086f98-b512-4fbf-b5d7-fda77a55297b.png": "20m",
197
+ "20250901_115122_888832_3378a1b0-3663-4007-8479-c1a05d63b7cf.png": "21m",
198
+ "20250901_115122_905730_ca2407dd-3139-4fd8-8958-b7bf520cb892.png": "24m",
199
+ "20250901_115122_966748_65d3c518-ae86-4e0b-af9e-ec3a140aab25.png": "2m",
200
+ "20250901_115122_976855_0235186d-74a8-4bf5-8be8-d019f11d4da1.png": "4m",
201
+ "20250901_115122_985780_c1426c76-02d6-4ad4-81fe-17af6b419e11.png": "6m",
202
+ "20250901_115122_993430_374dcfc5-34fd-4632-bdba-ae556e7ec6e2.png": "5m",
203
+ "20250901_115123_004276_14ed5806-472a-4922-b34f-a02c160d5e5e.png": "9m",
204
+ "20250901_115123_004276_a0f76936-e44d-47b0-89bc-4afbbdcac4d1.png": "6m",
205
+ "20250901_115123_017493_09be0601-d644-4f68-8915-d0ffaa4b005d.png": "15m",
206
+ "20250901_115123_017493_cb933601-78bf-401a-b8b8-c330304ba62b.png": "12m",
207
+ "20250901_115123_033463_6726df96-8aab-47d7-b328-987c162b5def.png": "10m",
208
+ "20250901_115123_049374_3384425a-99c7-4230-944b-172ff6cbc2d0.png": "18m",
209
+ "20250901_115123_049374_eb973787-0bbc-4592-a690-971b781a8f4f.png": "11m",
210
+ "20250901_115123_065175_00f6a787-a457-4752-8c87-1be56d27345d.png": "10m",
211
+ "20250901_115123_065175_07db25ae-5b31-4dfe-a6b2-435e750bbc1f.png": "9m",
212
+ "20250901_115123_104291_d1a8a407-1a73-408f-b22a-ed280be1c3c6.png": "22m",
213
+ "20250901_115123_112956_addbafda-8e4e-421a-8f98-d4bbd21015e5.png": "20m",
214
+ "20250901_115123_112956_ca4bdd2d-a486-4d65-bbd9-b6575642af2f.png": "21m",
215
+ "20250901_115123_129022_0c442bb8-e30f-442e-8d6a-700b3ba8a838.png": "24m",
216
+ "20250901_115123_185798_f22a9190-2e38-4127-87de-26ab2759101f.png": "2m",
217
+ "20250901_115123_203280_bcfc599b-bc34-4d7a-aa2c-ae8e071afa17.png": "4m",
218
+ "20250901_115123_211439_d6776585-3c03-469c-adf1-525674511898.png": "6m",
219
+ "20250901_115123_218436_a8760228-8a47-4ecd-bc0c-0e49afad6a9c.png": "5m",
220
+ "20250901_115123_229794_5496edda-37c3-4a5e-95fa-1ad243e57426.png": "6m",
221
+ "20250901_115123_241536_137e093b-436c-4504-b5f1-5e5e116e0024.png": "12m",
222
+ "20250901_115123_241536_67dda1c8-665c-447d-803e-c986c46b2633.png": "9m",
223
+ "20250901_115123_257521_4e8af324-5ed5-413d-b768-c356a9f74505.png": "10m",
224
+ "20250901_115123_257521_eae41e7c-3f17-4458-ba13-5c57655d6168.png": "15m",
225
+ "20250901_115123_273307_d86e7a9e-ba57-4ddb-bd69-6e6a9076df3e.png": "11m",
226
+ "20250901_115123_289126_3341f07a-fcdd-40a0-8a92-6833323b8ee8.png": "18m",
227
+ "20250901_115123_289126_a4ad73e9-404e-4e85-9974-d6fc09328a14.png": "9m",
228
+ "20250901_115123_304293_666ca2a2-3c9a-4208-838a-311b7d72b5a6.png": "10m",
229
+ "20250901_115123_336458_ccd9d646-fc99-4d27-8076-0c17d0dba784.png": "20m",
230
+ "20250901_115123_352471_eb97e5c2-d7a9-430b-8d27-991b279f45aa.png": "21m",
231
+ "20250901_115123_370505_07463c4d-b6d3-4097-ba93-07bd0688e660.png": "24m",
232
+ "20250901_115123_439065_a90eb93a-3cfa-4719-a9a9-e2604f15e30d.png": "2m",
233
+ "20250901_115123_451797_1e34678b-34dc-4069-8c38-cdf5bd64ffd0.png": "4m",
234
+ "20250901_115123_464998_8ef984aa-c0cb-4994-972b-ddd59f9ea102.png": "6m",
235
+ "20250901_115123_474046_fded311f-f5c6-409b-a475-7dc4fdecd9a1.png": "5m",
236
+ "20250901_115123_489859_2675e9d2-ba97-43ac-96d3-a936b4cc5dd4.png": "6m",
237
+ "20250901_115123_504456_9c24ae1b-8ea8-4f08-86e4-28ffd2a78e59.png": "9m",
238
+ "20250901_115123_524380_5ad58da2-1dee-4170-952b-41ecbbed99e0.png": "12m",
239
+ "20250901_115123_543594_8c8665a1-87ec-479b-a220-5e86bbb64796.png": "15m",
240
+ "20250901_115123_556927_b39709ae-1050-48b1-afd3-c0104153e96b.png": "10m",
241
+ "20250901_115123_568476_27577bae-79f9-4b00-9372-961cb4884adf.png": "11m",
242
+ "20250901_115123_589175_39e9b685-3465-4694-afd8-7d906047916d.png": "18m",
243
+ "20250901_115123_604957_a8075d2a-a7a9-415c-8da8-f532d64b9b78.png": "19m",
244
+ "20250901_115123_621023_88ad3804-c7bf-4d68-998b-900f6227a7dd.png": "10m",
245
+ "20250901_115123_667595_546e7b12-116b-43c9-aeca-2fc12d679b93.png": "20m",
246
+ "20250901_115123_681572_210448b8-0a07-4db7-8ae3-ac78a17c1a98.png": "21m",
247
+ "20250901_115123_705358_aa484c23-6785-4769-8a2c-2c6de1e41aa0.png": "24m",
248
+ "20250901_115123_785623_be871ea8-2775-4dbc-a375-a4ee3d2bd1fc.png": "3m",
249
+ "20250901_115123_797630_632e0e57-5dfe-4bd6-a46e-8f07e72c8603.png": "17m",
250
+ "20250901_115123_805521_6b001405-a652-47a2-a103-e3e693fedc4f.png": "9m",
251
+ "20250901_115123_821506_8e8a254d-6ee9-4854-a8c2-d21596a8422c.png": "5m",
252
+ "20250901_115123_835334_ac8a0fbd-635f-4696-9e47-a8840c6351ae.png": "6m",
253
+ "20250901_115123_849150_1b48853e-9379-4495-98db-4db823ec0500.png": "9m",
254
+ "20250901_115123_864830_20a63037-fce4-40dc-b7b4-0b47c1be4bea.png": "12m",
255
+ "20250901_115123_870854_3b9dff5f-fcea-4757-8cba-82c39a3b126a.png": "20m",
256
+ "20250901_115123_886502_3d8a48e5-fc90-4c44-97f0-e6ecf61be99e.png": "111m",
257
+ "20250901_115123_905675_fb46adf9-ba37-4f99-8cc6-9c0a92072476.png": "5m",
258
+ "20250901_115123_919629_a3d629b2-de72-4732-87ed-3830b8a21ad2.png": "18m",
259
+ "20250901_115123_937797_8b4b1164-2c09-41aa-9268-07667de40944.png": "10m",
260
+ "20250901_115123_987695_1746c889-9414-44ac-8e91-9102538abe22.png": "22m",
261
+ "20250901_115124_005885_8f7a730c-38a5-41d5-bee7-f5e7775a33fd.png": "20m",
262
+ "20250901_115124_021137_847c1e86-a2d6-4a78-a13b-0a5b879cbfd9.png": "11m",
263
+ "20250901_115124_052035_412f7fdb-91d8-427b-a8c6-1b99e3e4ae72.png": "24m",
264
+ "20250901_115124_127127_ac246a2e-df6d-4816-bcb2-e63b47c483e2.png": "2m",
265
+ "20250901_115124_141403_d5304ac9-c1ea-4a9b-949f-b684417ee1fe.png": "4m",
266
+ "20250901_115124_160100_a4ae6720-11dd-4a1e-9078-1b6d859b010d.png": "6m",
267
+ "20250901_115124_172402_09e4467c-fba4-466c-a223-69dc9a76fc86.png": "5m",
268
+ "20250901_115124_190034_1ccd5c91-e282-415d-805a-d8e75eef9f1d.png": "6m",
269
+ "20250901_115124_206181_e17ce8f6-2187-44aa-94ba-08d428a1e29c.png": "9m",
270
+ "20250901_115124_222412_720b86b9-f3b6-4c15-897e-8b6342b877f8.png": "12m",
271
+ "20250901_115124_239513_c7883dd5-9bd6-48e4-83f3-5f33f46d39c4.png": "15m",
272
+ "20250901_115124_257685_6f98bb03-0f37-4fab-a431-375a583d392e.png": "10m",
273
+ "20250901_115124_273432_c40866ae-8972-4aaa-8fae-578a0be1d079.png": "11m",
274
+ "20250901_115124_300408_8111398a-cfe2-44a6-8664-97c997c236c3.png": "18m",
275
+ "20250901_115124_316275_183f3d54-e669-4a95-9114-232bc02e204c.png": "19m",
276
+ "20250901_115124_338303_69e2d01b-0c08-450e-9f67-c289f2e65601.png": "10m",
277
+ "20250901_115124_444477_300fb1ba-4091-41bc-b50b-3e600ecdd5ff.png": "20m",
278
+ "20250901_115124_469974_96d323c0-2fee-4232-abbf-27f3a02397ff.png": "21m",
279
+ "20250901_115124_535793_32ef3807-daea-4133-8e9c-3b1834af5edb.png": "25m",
280
+ "20250901_115124_662183_957fb87b-31e1-429c-9b30-74c6fd0b33ae.png": "3m",
281
+ "20250901_115124_688589_873184fb-08eb-4e86-8a4f-19a21cb4200b.png": "17m",
282
+ "20250901_115124_706491_8121ff6c-059e-4cfd-88ce-78ed85b81d77.png": "9m",
283
+ "20250901_115124_736644_adf67776-b838-4122-9389-1fb2809a20c5.png": "5m",
284
+ "20250901_115124_759518_086d9708-c195-4195-ba8b-4f2a2a192b12.png": "6m",
285
+ "20250901_115124_778600_d6cc87d2-c3a1-4b46-9cf8-ac7b6566c623.png": "9m",
286
+ "20250901_115124_805454_96ca12ed-f023-4699-ba55-16046056a449.png": "12m",
287
+ "20250901_115124_828357_ebe93008-d691-4ebc-ae57-dd81dd9779e5.png": "20m",
288
+ "20250901_115124_878237_96f48286-1f16-4f23-b1fd-054d26918d54.png": "5m",
289
+ "20250901_115124_910340_979f06e1-7cf9-4858-900a-e519ca70fe7d.png": "18m",
290
+ "20250901_115124_958182_f9045a1a-0c12-427c-863a-f3dbcdf1d1a4.png": "10m",
291
+ "20250901_115125_035647_f1277afb-e842-4f15-b028-e9333d6613fb.png": "22m",
292
+ "20250901_115125_070685_0aacbb16-ea5c-4965-b36b-fdbbecdc801f.png": "20m",
293
+ "20250901_115125_081033_4353bcf3-a3d0-41d3-8e5e-bd486b8170fa.png": "11m",
294
+ "20250901_115125_153835_f2fb5958-5e82-4481-9f4b-377b0441f0cd.png": "24m"
295
+ }
draft_computation/ocr/train_ocr.py ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import os
3
+ import json
4
+ from PIL import Image
5
+ import torch
6
+ from torch.utils.data import Dataset, DataLoader
7
+ import math # For ceil in input_lengths calculation
8
+ import shutil # For cleaning up dummy data
9
+ import logging
10
+ import numpy as np
11
+
12
+ from transformers import VisionEncoderDecoderModel, AutoImageProcessor, AutoTokenizer, TrainingArguments, Trainer
13
+ from jiwer import cer # For CER calculation
14
+
15
+ # --- Setup Logging ---
16
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
17
+
18
+ # --- OCRDataset (Adapted for Hugging Face) ---
19
+ class OCRDataset(Dataset):
20
+ def __init__(self, root_dir):
21
+ self.root_dir = root_dir
22
+ self.image_dir = os.path.join(root_dir, "images")
23
+
24
+ # Load the JSON mapping file
25
+ mapping_file_path = os.path.join(root_dir, "annotations.json") # Assuming the JSON is named annotations.json
26
+ logging.info(f"Loading mapping file from: {mapping_file_path}")
27
+ with open(mapping_file_path, 'r', encoding='utf-8') as f:
28
+ self.data = json.load(f)
29
+ logging.info(f"Loaded {len(self.data)} entries from mapping file.")
30
+
31
+ # Store image filenames and their corresponding texts
32
+ self.image_filenames = list(self.data.keys())
33
+
34
+ def __len__(self):
35
+ return len(self.image_filenames)
36
+
37
+ def __getitem__(self, idx):
38
+ image_filename = self.image_filenames[idx]
39
+ text = self.data[image_filename]
40
+
41
+ image_path = os.path.join(self.image_dir, image_filename)
42
+ image = Image.open(image_path).convert("RGB") # Ensure image is in RGB format
43
+
44
+ # Return raw PIL Image and text string
45
+ return image, text
46
+
47
+ # --- Custom Collate Function for Hugging Face Processors ---
48
+ # This function will be passed to the DataLoader
49
+ def collate_fn_hf(batch, image_processor, tokenizer):
50
+ images, texts = zip(*batch)
51
+
52
+ # Process images using AutoImageProcessor
53
+ # This handles resizing, normalization, and converting to tensor
54
+ pixel_values = image_processor(images=list(images), return_tensors="pt").pixel_values
55
+
56
+ # Tokenize texts using AutoTokenizer
57
+ # This handles tokenization, padding, and converting to tensor
58
+ labels = tokenizer(text=list(texts), padding="longest", return_tensors="pt").input_ids
59
+
60
+ # Return a dictionary expected by the Hugging Face Trainer
61
+ return {"pixel_values": pixel_values, "labels": labels}
62
+
63
+ # --- Define compute_metrics for Trainer ---
64
+ def compute_metrics(pred):
65
+ labels_ids = pred.label_ids
66
+ pred_logits = pred.predictions[0]
67
+ pred_ids = np.argmax(pred_logits, axis=-1)
68
+
69
+ # Replace -100 in labels as we can't decode them (they are padding tokens)
70
+ labels_ids[labels_ids == -100] = tokenizer.pad_token_id
71
+
72
+ pred_str = tokenizer.batch_decode(pred_ids, skip_special_tokens=True)
73
+ label_str = tokenizer.batch_decode(labels_ids, skip_special_tokens=True)
74
+
75
+ # Calculate CER
76
+ cer_score = cer(label_str, pred_str)
77
+ logging.info(f"Validation CER: {cer_score}")
78
+ return {"cer": cer_score}
79
+
80
+ # --- Main Training Script ---
81
+ if __name__ == '__main__':
82
+ logging.info("Starting OCR training script.")
83
+ data_root_dir = "text_dataset"
84
+ logging.info(f"Using dataset at: {os.path.abspath(data_root_dir)}")
85
+
86
+ # --- Hugging Face Model and Processor Loading ---
87
+ #encoder_id = "google/mobilenet_v3_small_100_224"
88
+ encoder_id = "google/vit-base-patch16-224-in21k"
89
+ decoder_id = "prajjwal1/bert-tiny"
90
+
91
+ logging.info(f"Loading encoder: {encoder_id}")
92
+ logging.info(f"Loading decoder: {decoder_id}")
93
+
94
+ model = VisionEncoderDecoderModel.from_encoder_decoder_pretrained(
95
+ encoder_pretrained_model_name_or_path=encoder_id,
96
+ decoder_pretrained_model_name_or_path=decoder_id,
97
+ )
98
+
99
+ image_processor = AutoImageProcessor.from_pretrained(encoder_id)
100
+ tokenizer = AutoTokenizer.from_pretrained(decoder_id)
101
+ logging.info("Model, image processor, and tokenizer loaded.")
102
+
103
+ # --- Set special tokens and generation parameters ---
104
+ model.config.decoder_start_token_id = tokenizer.cls_token_id
105
+ model.config.pad_token_id = tokenizer.pad_token_id
106
+ model.config.vocab_size = tokenizer.vocab_size # Ensure model knows decoder vocab size
107
+
108
+ model.config.eos_token_id = tokenizer.sep_token_id
109
+ model.config.max_length = 64
110
+ model.config.early_stopping = True
111
+ model.config.no_repeat_ngram_size = 3
112
+ model.config.length_penalty = 2.0
113
+ model.config.num_beams = 4
114
+ logging.info("Model configuration set.")
115
+
116
+ # --- Dataset and DataLoader Setup ---
117
+ logging.info("Setting up datasets.")
118
+ train_dataset = OCRDataset(root_dir=data_root_dir)
119
+ # For a real project, you'd split your data into train/val/test
120
+ # For this example, we'll use the same dummy data for simplicity
121
+ val_dataset = OCRDataset(root_dir=data_root_dir)
122
+ logging.info(f"Training dataset size: {len(train_dataset)}")
123
+ logging.info(f"Validation dataset size: {len(val_dataset)}")
124
+
125
+ # --- Training Arguments ---
126
+ training_args = TrainingArguments(
127
+ output_dir="./ocr_model_output", # Output directory for checkpoints and logs
128
+ per_device_train_batch_size=2,
129
+ per_device_eval_batch_size=2,
130
+ num_train_epochs=3, # Small number for quick demo
131
+ logging_dir="./logs",
132
+ logging_steps=10,
133
+ # save_steps=500, # Save checkpoint every 500 steps
134
+ eval_strategy ="epoch", # Evaluate at the end of each epoch
135
+ save_strategy ="epoch", # Evaluate at the end of each epoch
136
+ save_total_limit=2, # Only keep the last 2 checkpoints
137
+ report_to="none", # Disable reporting to W&B, MLflow etc. for simplicity
138
+ # predict_with_generate=True, # Crucial for generation tasks (uses model.generate() for eval)
139
+ load_best_model_at_end=True, # Load the best model based on eval_loss at the end of training
140
+ metric_for_best_model="cer", # Metric to monitor for best model
141
+ greater_is_better=False, # Lower CER is better
142
+ )
143
+ logging.info("Training arguments set.")
144
+
145
+ # --- Trainer Initialization ---
146
+ trainer = Trainer(
147
+ model=model,
148
+ args=training_args,
149
+ train_dataset=train_dataset,
150
+ eval_dataset=val_dataset,
151
+ # Pass image_processor and tokenizer to collate_fn using a lambda
152
+ data_collator=lambda batch: collate_fn_hf(batch, image_processor, tokenizer),
153
+ compute_metrics=compute_metrics,
154
+ )
155
+ logging.info("Trainer initialized.")
156
+
157
+ # --- Start Training ---
158
+ logging.info("--- Starting Training ---")
159
+ trainer.train()
160
+ logging.info("--- Training finished! ---")
161
+
162
+
integration_test.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import json
3
+ import random
4
+ import os
5
+ from datetime import datetime, timedelta
6
+
7
+ # --- Configuration ---
8
+ API_URL = "http://127.0.0.1:8000/measure"
9
+ IMAGE_DIR = "test_data"
10
+ IMAGE_NAME = "test1_post.png" # The name of the image file you will provide
11
+
12
+ # --- Pre-defined list of sample metadata ---
13
+ METADATA_SAMPLES = [
14
+ {
15
+ "ship_id": "IMO9321483",
16
+ "timestamp": (datetime.utcnow() - timedelta(hours=2)).isoformat() + "Z",
17
+ "latitude": 1.2646,
18
+ "longitude": 103.8357,
19
+ "camera_id": "CAM-04"
20
+ },
21
+ {
22
+ "ship_id": "IMO9839272",
23
+ "timestamp": (datetime.utcnow() - timedelta(minutes=45)).isoformat() + "Z",
24
+ "latitude": 51.9432,
25
+ "longitude": 4.1497,
26
+ "camera_id": "JETTY-7B"
27
+ },
28
+ {
29
+ "ship_id": "IMO9450259",
30
+ "timestamp": (datetime.utcnow() - timedelta(days=1)).isoformat() + "Z",
31
+ "latitude": 31.2244,
32
+ "longitude": 121.4737,
33
+ "camera_id": "FIXED-PIER-3"
34
+ },
35
+ {
36
+ "ship_id": "IMO9226788",
37
+ "timestamp": datetime.utcnow().isoformat() + "Z",
38
+ "latitude": 33.7542,
39
+ "longitude": -118.2165,
40
+ "camera_id": "DRONE-ALPHA"
41
+ }
42
+ ]
43
+
44
+ def run_test():
45
+ """Runs a single integration test against the API."""
46
+ image_path = os.path.join(IMAGE_DIR, IMAGE_NAME)
47
+
48
+ # 1. Check if the image file exists
49
+ if not os.path.exists(image_path):
50
+ print(f"Error: Test image not found at '{image_path}'")
51
+ print("Please place your test image there before running the script.")
52
+ return
53
+
54
+ # 2. Randomly select a metadata object
55
+ metadata = random.choice(METADATA_SAMPLES)
56
+ print(f"Selected metadata for this test run:\n{json.dumps(metadata, indent=2)}\n")
57
+
58
+ # 3. Open the image file and send the request
59
+ try:
60
+ with open(image_path, "rb") as image_file:
61
+ files = {"image": (IMAGE_NAME, image_file, "image/png")}
62
+ form_data = {"metadata_json": json.dumps(metadata)}
63
+
64
+ print(f"Sending request to {API_URL}...")
65
+ response = requests.post(API_URL, files=files, data=form_data)
66
+
67
+ # 4. Print the server's response
68
+ print(f"\n--- Server Response ---")
69
+ print(f"Status Code: {response.status_code}")
70
+ if response.status_code == 200:
71
+ print("Response JSON:")
72
+ print(response.json())
73
+ else:
74
+ print("Error Response Text:")
75
+ print(response.text)
76
+ print("-----------------------")
77
+
78
+ except requests.exceptions.ConnectionError as e:
79
+ print(f"\nError: Connection to the API server failed.")
80
+ print("Please ensure the main application is running (`python main.py`).")
81
+ print(f"Details: {e}")
82
+ except Exception as e:
83
+ print(f"An unexpected error occurred: {e}")
84
+
85
+ if __name__ == "__main__":
86
+ run_test()
main.py ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, File, UploadFile, Form, Depends
2
+ import uvicorn
3
+ import json
4
+ from contextlib import asynccontextmanager
5
+ from fastapi.staticfiles import StaticFiles # Add this import
6
+ from PIL import Image
7
+ import io
8
+
9
+ from services.measurement_service import MeasurementService
10
+ from services.pdf_service import PdfService
11
+ from services.persistence_service import PersistenceService
12
+ from models.schemas import MeasurementMetadata
13
+ from models.database import create_db_and_tables
14
+
15
+
16
+ @asynccontextmanager
17
+ async def lifespan(app: FastAPI):
18
+ # create database and tables on startup
19
+ create_db_and_tables()
20
+ yield
21
+
22
+ from fastapi.middleware.cors import CORSMiddleware
23
+
24
+ app = FastAPI(title="Ship Draft Measurement API", lifespan=lifespan)
25
+
26
+ app.add_middleware(
27
+ CORSMiddleware,
28
+ allow_origins=["*"], # Allows all origins
29
+ allow_credentials=True,
30
+ allow_methods=["*"], # Allows all methods
31
+ allow_headers=["*"], # Allows all headers
32
+ )
33
+
34
+ # Mount static files directory
35
+ app.mount("/static", StaticFiles(directory="./reports"), name="static") # Add this line
36
+
37
+ # Initialize services
38
+ measurement_service = MeasurementService()
39
+ pdf_service = PdfService()
40
+ persistence_service = PersistenceService()
41
+
42
+ @app.get("/", tags=["Health Check"])
43
+ def read_root():
44
+ """A simple endpoint to check if the API is running."""
45
+ return {"status": "API is running"}
46
+
47
+ @app.post("/measure", tags=["Measurement"])
48
+ async def measure_draft(
49
+ image: UploadFile = File(...),
50
+ metadata_json: str = Form(...)
51
+ ):
52
+ """
53
+ Receives an image and metadata, performs draft measurement,
54
+ generates a PDF report, and saves it.
55
+ """
56
+ # 1. Read image content
57
+ image_bytes = await image.read()
58
+
59
+ # 2. Deserialize and validate metadata
60
+ metadata_dict = json.loads(metadata_json)
61
+ metadata = MeasurementMetadata(**metadata_dict)
62
+
63
+ # 3. Perform measurement (using placeholder service)
64
+ image_pil = Image.open(io.BytesIO(image_bytes))
65
+ measurement_results, ml_results = measurement_service.measure(image_pil)
66
+
67
+ # 4. Generate PDF report
68
+ pdf_report_bytes = pdf_service.create_report(
69
+ image_bytes=image_bytes,
70
+ metadata=metadata,
71
+ results=measurement_results,
72
+ ml_results=ml_results
73
+ )
74
+
75
+ # 5. Persist the report
76
+ report_path = persistence_service.save_to_disk(pdf_report_bytes, metadata)
77
+ persistence_service.save_to_db(
78
+ report_path,
79
+ metadata,
80
+ measurement_results.draft_measurement,
81
+ measurement_results.confidence_score,
82
+ image_bytes
83
+ )
84
+
85
+ # Optional: Send email
86
+ persistence_service.send_by_email(report_path, "pfnfcat@gmail.com")
87
+
88
+ return {
89
+ "message": "Measurement complete and report generated.",
90
+ "report_path": report_path,
91
+ "results": measurement_results
92
+ }
93
+
94
+ from datetime import datetime
95
+ from typing import Optional
96
+
97
+ @app.get("/reports", tags=["Reports"])
98
+ def get_reports(
99
+ skip: int = 0,
100
+ limit: int = 10,
101
+ search: Optional[str] = None,
102
+ start_date: Optional[datetime] = None,
103
+ end_date: Optional[datetime] = None,
104
+ ):
105
+ """Returns a list of all reports."""
106
+ return persistence_service.get_all_reports(
107
+ skip=skip,
108
+ limit=limit,
109
+ search=search,
110
+ start_date=start_date,
111
+ end_date=end_date,
112
+ )
113
+
114
+
115
+ @app.get("/reports/{report_id}", tags=["Reports"])
116
+ def get_report(report_id: int):
117
+ """Returns a single report by its ID."""
118
+ report = persistence_service.get_report_by_id(report_id)
119
+ if report is None:
120
+ return {"error": "Report not found"}
121
+ return report
122
+
123
+
124
+ if __name__ == "__main__":
125
+ uvicorn.run(app, host="0.0.0.0", port=8000)
models/__init__.py ADDED
File without changes
models/database.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sqlalchemy import create_engine, Column, Integer, String, Float, DateTime, LargeBinary
2
+ from sqlalchemy.ext.declarative import declarative_base
3
+ from sqlalchemy.orm import sessionmaker
4
+
5
+ # Define the database connection URL
6
+ DATABASE_URL = "sqlite:///./ship_draft_reports.db"
7
+
8
+ # Create the SQLAlchemy engine
9
+ engine = create_engine(
10
+ DATABASE_URL, connect_args={"check_same_thread": False} # check_same_thread is for SQLite only
11
+ )
12
+
13
+ # Create a session factory
14
+ SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
15
+
16
+ # Create a base class for declarative models
17
+ Base = declarative_base()
18
+
19
+ # Define the Report model
20
+ class Report(Base):
21
+ __tablename__ = "reports"
22
+
23
+ id = Column(Integer, primary_key=True, index=True)
24
+ ship_id = Column(String, index=True)
25
+ timestamp = Column(DateTime)
26
+ latitude = Column(Float)
27
+ longitude = Column(Float)
28
+ draft_measurement = Column(Float)
29
+ confidence_score = Column(Float) # New field
30
+ pdf_path = Column(String, unique=True)
31
+ image_bytes = Column(LargeBinary) # New field
32
+
33
+ def create_db_and_tables():
34
+ """Function to create the database and tables."""
35
+ Base.metadata.create_all(bind=engine)
models/schemas.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+ from datetime import datetime
3
+ from typing import Optional
4
+
5
+ class MeasurementMetadata(BaseModel):
6
+ """Schema for the metadata received with the image."""
7
+ ship_id: str
8
+ timestamp: datetime
9
+ latitude: float
10
+ longitude: float
11
+ camera_id: Optional[str] = None
12
+
13
+ class MeasurementResult(BaseModel):
14
+ """Schema for the results from the measurement service."""
15
+ draft_measurement: float # The measured draft in meters
16
+ confidence_score: float
requirements.txt ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn[standard]
3
+ python-multipart
4
+ SQLAlchemy
5
+ reportlab
6
+ Pillow
7
+ accelerate==1.10.1
8
+ annotated-types==0.7.0
9
+ anyio==4.10.0
10
+ av==15.0.0
11
+ certifi==2025.8.3
12
+ charset-normalizer==3.4.3
13
+ click==8.2.1
14
+ colorama==0.4.6
15
+ contourpy==1.3.3
16
+ cycler==0.12.1
17
+ fastapi==0.116.1
18
+ filelock==3.19.1
19
+ fonttools==4.59.2
20
+ fsspec==2025.7.0
21
+ greenlet==3.2.4
22
+ grpcio==1.74.0
23
+ grpcio-tools==1.74.0
24
+ h11==0.16.0
25
+ hf-xet==1.1.9
26
+ httptools==0.6.4
27
+ huggingface-hub==0.34.4
28
+ idna==3.10
29
+ Jinja2==3.1.6
30
+ jiwer==4.0.0
31
+ kiwisolver==1.4.9
32
+ MarkupSafe==3.0.2
33
+ matplotlib==3.10.5
34
+ mpmath==1.3.0
35
+ networkx==3.5
36
+ numpy==2.2.6
37
+ opencv-python==4.12.0.88
38
+ packaging==25.0
39
+ pillow==11.3.0
40
+ polars==1.32.3
41
+ protobuf==6.32.0
42
+ psutil==7.0.0
43
+ py-cpuinfo==9.0.0
44
+ pydantic==2.11.7
45
+ pydantic_core==2.33.2
46
+ pyparsing==3.2.3
47
+ python-dateutil==2.9.0.post0
48
+ python-dotenv==1.1.1
49
+ python-multipart==0.0.20
50
+ PyYAML==6.0.2
51
+ RapidFuzz==3.14.0
52
+ regex==2025.8.29
53
+ reportlab==4.4.3
54
+ requests==2.32.5
55
+ safetensors==0.6.2
56
+ scipy==1.16.1
57
+ six==1.17.0
58
+ sniffio==1.3.1
59
+ SQLAlchemy==2.0.43
60
+ starlette==0.47.3
61
+ sympy==1.14.0
62
+ tokenizers==0.22.0
63
+ torch==2.8.0
64
+ torchvision==0.23.0
65
+ tqdm==4.67.1
66
+ transformers==4.56.0
67
+ typing-inspection==0.4.1
68
+ typing_extensions==4.14.1
69
+ ultralytics==8.3.189
70
+ ultralytics-thop==2.0.16
71
+ urllib3==2.5.0
72
+ uvicorn==0.35.0
73
+ watchfiles==1.1.0
74
+ websockets==15.0.1
services/__init__.py ADDED
File without changes
services/measurement_service.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from models.schemas import MeasurementResult
2
+ import time
3
+
4
+ from draft_computation import run
5
+
6
+ class MeasurementService:
7
+ """
8
+ A service to handle the draft measurement logic.
9
+ This is a placeholder that simulates a PyTorch model's output.
10
+ """
11
+ def measure(self, image_bytes: bytes) -> MeasurementResult:
12
+ """
13
+ Simulates running a deep learning model on the image.
14
+
15
+ Args:
16
+ image_bytes: The raw bytes of the image file.
17
+
18
+ Returns:
19
+ A MeasurementResult object with simulated data.
20
+ """
21
+ # Simulate model processing time
22
+ time.sleep(2) # Simulate a 2-second processing time
23
+
24
+ results = run(image_bytes)
25
+ print(results)
26
+
27
+ return MeasurementResult(
28
+ draft_measurement=7.85, # Example measurement in meters
29
+ confidence_score=0.958
30
+ ), results
services/ml_results_example.txt ADDED
@@ -0,0 +1,724 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {'draft': np.float64(11.85), 'pose_results': tensor([[[3.8799e+02, 2.3064e+02, 9.9727e-01],
2
+ [4.3032e+02, 2.9595e+02, 9.9999e-01],
3
+ [4.5217e+02, 3.5503e+02, 1.0000e+00],
4
+ [4.7843e+02, 4.1626e+02, 1.0000e+00],
5
+ [5.0423e+02, 4.7074e+02, 9.9908e-01]],
6
+
7
+ [[5.1116e+02, 5.2845e+02, 9.9898e-01],
8
+ [5.5497e+02, 5.8498e+02, 1.0000e+00],
9
+ [5.8281e+02, 6.3961e+02, 1.0000e+00],
10
+ [6.1391e+02, 6.9308e+02, 1.0000e+00],
11
+ [6.4442e+02, 7.4322e+02, 9.9942e-01]],
12
+
13
+ [[3.3734e+02, 0.0000e+00, 1.9305e-03],
14
+ [3.5891e+02, 2.2499e+01, 6.0478e-03],
15
+ [3.4428e+02, 4.3506e+01, 9.6756e-01],
16
+ [3.6704e+02, 1.1002e+02, 9.9514e-01],
17
+ [3.8660e+02, 1.7183e+02, 9.8539e-01]],
18
+
19
+ [[6.5912e+02, 7.8940e+02, 9.9636e-01],
20
+ [7.1144e+02, 8.3895e+02, 9.9728e-01],
21
+ [6.7609e+02, 8.3011e+02, 1.3168e-04],
22
+ [6.7713e+02, 8.5438e+02, 5.6889e-07],
23
+ [6.6890e+02, 8.6667e+02, 4.8566e-10]]]), 'segment_results': [array([[ 666.56, 698.62],
24
+ [ 666.56, 705.38],
25
+ [ 669.94, 708.75],
26
+ [ 681.75, 708.75],
27
+ [ 685.12, 712.12],
28
+ [ 683.44, 713.81],
29
+ [ 669.94, 713.81],
30
+ [ 666.56, 717.19],
31
+ [ 668.25, 718.88],
32
+ [ 676.69, 718.88],
33
+ [ 678.38, 720.56],
34
+ [ 685.12, 720.56],
35
+ [ 686.81, 722.25],
36
+ [ 688.5, 722.25],
37
+ [ 690.19, 723.94],
38
+ [ 691.88, 723.94],
39
+ [ 693.56, 725.62],
40
+ [ 696.94, 725.62],
41
+ [ 700.31, 729],
42
+ [ 702, 729],
43
+ [ 703.69, 730.69],
44
+ [ 707.06, 730.69],
45
+ [ 708.75, 732.38],
46
+ [ 715.5, 732.38],
47
+ [ 717.19, 734.06],
48
+ [ 722.25, 734.06],
49
+ [ 723.94, 735.75],
50
+ [ 727.31, 735.75],
51
+ [ 729, 737.44],
52
+ [ 739.12, 737.44],
53
+ [ 740.81, 739.12],
54
+ [ 749.25, 739.12],
55
+ [ 750.94, 737.44],
56
+ [ 754.31, 737.44],
57
+ [ 756, 739.12],
58
+ [ 762.75, 739.12],
59
+ [ 764.44, 737.44],
60
+ [ 767.81, 737.44],
61
+ [ 769.5, 739.12],
62
+ [ 772.88, 739.12],
63
+ [ 774.56, 737.44],
64
+ [ 776.25, 737.44],
65
+ [ 777.94, 735.75],
66
+ [ 781.31, 735.75],
67
+ [ 784.69, 732.38],
68
+ [ 796.5, 732.38],
69
+ [ 796.5, 730.69],
70
+ [ 794.81, 730.69],
71
+ [ 793.12, 729],
72
+ [ 789.75, 729],
73
+ [ 788.06, 730.69],
74
+ [ 784.69, 730.69],
75
+ [ 783, 729],
76
+ [ 781.31, 729],
77
+ [ 779.62, 727.31],
78
+ [ 769.5, 727.31],
79
+ [ 766.12, 723.94],
80
+ [ 767.81, 722.25],
81
+ [ 781.31, 722.25],
82
+ [ 783, 720.56],
83
+ [ 806.62, 720.56],
84
+ [ 808.31, 722.25],
85
+ [ 815.06, 722.25],
86
+ [ 818.44, 725.62],
87
+ [ 816.75, 727.31],
88
+ [ 815.06, 727.31],
89
+ [ 813.38, 729],
90
+ [ 804.94, 729],
91
+ [ 803.25, 730.69],
92
+ [ 801.56, 730.69],
93
+ [ 803.25, 732.38],
94
+ [ 813.38, 732.38],
95
+ [ 815.06, 734.06],
96
+ [ 828.56, 734.06],
97
+ [ 830.25, 735.75],
98
+ [ 837, 735.75],
99
+ [ 840.38, 739.12],
100
+ [ 838.69, 740.81],
101
+ [ 835.31, 740.81],
102
+ [ 833.62, 742.5],
103
+ [ 831.94, 742.5],
104
+ [ 830.25, 744.19],
105
+ [ 831.94, 745.88],
106
+ [ 838.69, 745.88],
107
+ [ 840.38, 747.56],
108
+ [ 843.75, 747.56],
109
+ [ 845.44, 749.25],
110
+ [ 848.81, 749.25],
111
+ [ 850.5, 750.94],
112
+ [ 852.19, 750.94],
113
+ [ 853.88, 752.62],
114
+ [ 853.88, 754.31],
115
+ [ 855.56, 756],
116
+ [ 855.56, 757.69],
117
+ [ 857.25, 759.38],
118
+ [ 862.31, 759.38],
119
+ [ 864, 761.06],
120
+ [ 874.12, 761.06],
121
+ [ 875.81, 762.75],
122
+ [ 879.19, 762.75],
123
+ [ 880.88, 764.44],
124
+ [ 887.62, 764.44],
125
+ [ 889.31, 766.12],
126
+ [ 892.69, 766.12],
127
+ [ 894.38, 767.81],
128
+ [ 901.12, 767.81],
129
+ [ 902.81, 769.5],
130
+ [ 906.19, 769.5],
131
+ [ 907.88, 771.19],
132
+ [ 911.25, 771.19],
133
+ [ 912.94, 772.88],
134
+ [ 919.69, 772.88],
135
+ [ 921.38, 774.56],
136
+ [ 926.44, 774.56],
137
+ [ 928.12, 776.25],
138
+ [ 934.88, 776.25],
139
+ [ 936.56, 777.94],
140
+ [ 943.31, 777.94],
141
+ [ 945, 779.62],
142
+ [ 956.81, 779.62],
143
+ [ 958.5, 781.31],
144
+ [ 970.31, 781.31],
145
+ [ 972, 783],
146
+ [ 975.38, 783],
147
+ [ 977.06, 784.69],
148
+ [ 983.81, 784.69],
149
+ [ 985.5, 786.38],
150
+ [ 995.62, 786.38],
151
+ [ 997.31, 788.06],
152
+ [ 1024.3, 788.06],
153
+ [ 1026, 789.75],
154
+ [ 1039.5, 789.75],
155
+ [ 1041.2, 791.44],
156
+ [ 1044.6, 791.44],
157
+ [ 1046.2, 789.75],
158
+ [ 1053, 789.75],
159
+ [ 1054.7, 791.44],
160
+ [ 1056.4, 791.44],
161
+ [ 1058.1, 789.75],
162
+ [ 1059.8, 789.75],
163
+ [ 1061.4, 788.06],
164
+ [ 1066.5, 788.06],
165
+ [ 1068.2, 786.38],
166
+ [ 1069.9, 786.38],
167
+ [ 1069.9, 784.69],
168
+ [ 1064.8, 779.62],
169
+ [ 1064.8, 776.25],
170
+ [ 1066.5, 774.56],
171
+ [ 1066.5, 769.5],
172
+ [ 1064.8, 767.81],
173
+ [ 1064.8, 762.75],
174
+ [ 1066.5, 761.06],
175
+ [ 1066.5, 759.38],
176
+ [ 1068.2, 757.69],
177
+ [ 1068.2, 747.56],
178
+ [ 1069.9, 745.88],
179
+ [ 1068.2, 744.19],
180
+ [ 1068.2, 742.5],
181
+ [ 1066.5, 740.81],
182
+ [ 1066.5, 739.12],
183
+ [ 1064.8, 737.44],
184
+ [ 1064.8, 734.06],
185
+ [ 1063.1, 732.38],
186
+ [ 1063.1, 730.69],
187
+ [ 1061.4, 729],
188
+ [ 1061.4, 725.62],
189
+ [ 1058.1, 722.25],
190
+ [ 1056.4, 722.25],
191
+ [ 1054.7, 720.56],
192
+ [ 1041.2, 720.56],
193
+ [ 1039.5, 718.88],
194
+ [ 1029.4, 718.88],
195
+ [ 1027.7, 717.19],
196
+ [ 1026, 717.19],
197
+ [ 1022.6, 713.81],
198
+ [ 1019.2, 713.81],
199
+ [ 1017.6, 712.12],
200
+ [ 1014.2, 712.12],
201
+ [ 1012.5, 710.44],
202
+ [ 1012.5, 708.75],
203
+ [ 1010.8, 708.75],
204
+ [ 1005.8, 703.69],
205
+ [ 1005.8, 698.62],
206
+ [ 666.56, 698.62],
207
+ [ 631.12, 698.62],
208
+ [ 631.12, 703.69],
209
+ [ 629.44, 705.38],
210
+ [ 627.75, 703.69],
211
+ [ 627.75, 698.62],
212
+ [ 617.62, 707.06],
213
+ [ 615.94, 707.06],
214
+ [ 614.25, 708.75],
215
+ [ 612.56, 708.75],
216
+ [ 610.88, 710.44],
217
+ [ 607.5, 710.44],
218
+ [ 605.81, 708.75],
219
+ [ 595.69, 708.75],
220
+ [ 594, 707.06],
221
+ [ 588.94, 707.06],
222
+ [ 587.25, 705.38],
223
+ [ 575.44, 705.38],
224
+ [ 573.75, 703.69],
225
+ [ 573.75, 698.62],
226
+ [ 545.06, 823.5],
227
+ [ 540, 823.5],
228
+ [ 538.31, 821.81],
229
+ [ 533.25, 821.81],
230
+ [ 531.56, 820.12],
231
+ [ 526.5, 820.12],
232
+ [ 524.81, 818.44],
233
+ [ 518.06, 818.44],
234
+ [ 516.38, 816.75],
235
+ [ 509.62, 816.75],
236
+ [ 507.94, 815.06],
237
+ [ 501.19, 815.06],
238
+ [ 499.5, 813.38],
239
+ [ 494.44, 813.38],
240
+ [ 492.75, 811.69],
241
+ [ 487.69, 811.69],
242
+ [ 486, 810],
243
+ [ 480.94, 810],
244
+ [ 479.25, 808.31],
245
+ [ 474.19, 808.31],
246
+ [ 472.5, 806.62],
247
+ [ 467.44, 806.62],
248
+ [ 465.75, 804.94],
249
+ [ 460.69, 804.94],
250
+ [ 459, 803.25],
251
+ [ 452.25, 803.25],
252
+ [ 450.56, 801.56],
253
+ [ 443.81, 801.56],
254
+ [ 442.12, 799.88],
255
+ [ 433.69, 799.88],
256
+ [ 432, 798.19],
257
+ [ 425.25, 798.19],
258
+ [ 423.56, 796.5],
259
+ [ 418.5, 796.5],
260
+ [ 416.81, 794.81],
261
+ [ 410.06, 794.81],
262
+ [ 408.38, 793.12],
263
+ [ 401.62, 793.12],
264
+ [ 399.94, 791.44],
265
+ [ 394.88, 791.44],
266
+ [ 393.19, 789.75],
267
+ [ 389.81, 789.75],
268
+ [ 388.12, 788.06],
269
+ [ 381.38, 788.06],
270
+ [ 379.69, 786.38],
271
+ [ 376.31, 786.38],
272
+ [ 374.62, 784.69],
273
+ [ 369.56, 784.69],
274
+ [ 367.88, 783],
275
+ [ 362.81, 783],
276
+ [ 361.12, 781.31],
277
+ [ 357.75, 781.31],
278
+ [ 356.06, 779.62],
279
+ [ 352.69, 779.62],
280
+ [ 351, 777.94],
281
+ [ 344.25, 777.94],
282
+ [ 342.56, 776.25],
283
+ [ 337.5, 776.25],
284
+ [ 335.81, 774.56],
285
+ [ 329.06, 774.56],
286
+ [ 327.38, 772.88],
287
+ [ 322.31, 772.88],
288
+ [ 320.62, 771.19],
289
+ [ 315.56, 771.19],
290
+ [ 313.88, 769.5],
291
+ [ 310.5, 769.5],
292
+ [ 308.81, 767.81],
293
+ [ 303.75, 767.81],
294
+ [ 302.06, 766.12],
295
+ [ 297, 766.12],
296
+ [ 295.31, 764.44],
297
+ [ 290.25, 764.44],
298
+ [ 288.56, 762.75],
299
+ [ 285.19, 762.75],
300
+ [ 283.5, 761.06],
301
+ [ 278.44, 761.06],
302
+ [ 276.75, 759.38],
303
+ [ 271.69, 759.38],
304
+ [ 270, 757.69],
305
+ [ 264.94, 757.69],
306
+ [ 263.25, 756],
307
+ [ 256.5, 756],
308
+ [ 254.81, 754.31],
309
+ [ 249.75, 754.31],
310
+ [ 248.06, 752.62],
311
+ [ 243, 752.62],
312
+ [ 241.31, 750.94],
313
+ [ 234.56, 750.94],
314
+ [ 232.88, 749.25],
315
+ [ 226.12, 749.25],
316
+ [ 224.44, 747.56],
317
+ [ 217.69, 747.56],
318
+ [ 216, 745.88],
319
+ [ 210.94, 745.88],
320
+ [ 209.25, 744.19],
321
+ [ 204.19, 744.19],
322
+ [ 202.5, 742.5],
323
+ [ 197.44, 742.5],
324
+ [ 195.75, 740.81],
325
+ [ 189, 740.81],
326
+ [ 187.31, 739.12],
327
+ [ 180.56, 739.12],
328
+ [ 178.88, 737.44],
329
+ [ 173.81, 737.44],
330
+ [ 172.12, 735.75],
331
+ [ 168.75, 735.75],
332
+ [ 167.06, 734.06],
333
+ [ 162, 734.06],
334
+ [ 160.31, 732.38],
335
+ [ 153.56, 732.38],
336
+ [ 151.88, 730.69],
337
+ [ 146.81, 730.69],
338
+ [ 145.12, 729],
339
+ [ 140.06, 729],
340
+ [ 138.38, 727.31],
341
+ [ 133.31, 727.31],
342
+ [ 131.62, 725.62],
343
+ [ 126.56, 725.62],
344
+ [ 124.88, 723.94],
345
+ [ 119.81, 723.94],
346
+ [ 118.12, 722.25],
347
+ [ 113.06, 722.25],
348
+ [ 111.38, 720.56],
349
+ [ 108, 720.56],
350
+ [ 106.31, 718.88],
351
+ [ 101.25, 718.88],
352
+ [ 99.562, 717.19],
353
+ [ 94.5, 717.19],
354
+ [ 92.812, 715.5],
355
+ [ 89.438, 715.5],
356
+ [ 87.75, 713.81],
357
+ [ 84.375, 713.81],
358
+ [ 82.688, 712.12],
359
+ [ 79.312, 712.12],
360
+ [ 77.625, 710.44],
361
+ [ 74.25, 710.44],
362
+ [ 72.562, 708.75],
363
+ [ 70.875, 708.75],
364
+ [ 69.188, 707.06],
365
+ [ 65.812, 707.06],
366
+ [ 64.125, 705.38],
367
+ [ 62.438, 705.38],
368
+ [ 60.75, 703.69],
369
+ [ 60.75, 698.62],
370
+ [ 10.125, 698.62],
371
+ [ 10.125, 1078.3],
372
+ [ 1078.3, 1078.3],
373
+ [ 1078.3, 936.56],
374
+ [ 1066.5, 936.56],
375
+ [ 1064.8, 934.88],
376
+ [ 1056.4, 934.88],
377
+ [ 1054.7, 933.19],
378
+ [ 1049.6, 933.19],
379
+ [ 1047.9, 931.5],
380
+ [ 1042.9, 931.5],
381
+ [ 1041.2, 929.81],
382
+ [ 1034.4, 929.81],
383
+ [ 1032.8, 928.12],
384
+ [ 1027.7, 928.12],
385
+ [ 1026, 926.44],
386
+ [ 1017.6, 926.44],
387
+ [ 1015.9, 924.75],
388
+ [ 1007.4, 924.75],
389
+ [ 1005.8, 923.06],
390
+ [ 999, 923.06],
391
+ [ 997.31, 921.38],
392
+ [ 992.25, 921.38],
393
+ [ 990.56, 919.69],
394
+ [ 985.5, 919.69],
395
+ [ 983.81, 918],
396
+ [ 978.75, 918],
397
+ [ 977.06, 916.31],
398
+ [ 972, 916.31],
399
+ [ 970.31, 914.62],
400
+ [ 963.56, 914.62],
401
+ [ 961.88, 912.94],
402
+ [ 955.12, 912.94],
403
+ [ 953.44, 911.25],
404
+ [ 941.62, 911.25],
405
+ [ 939.94, 909.56],
406
+ [ 929.81, 909.56],
407
+ [ 928.12, 907.88],
408
+ [ 919.69, 907.88],
409
+ [ 918, 906.19],
410
+ [ 912.94, 906.19],
411
+ [ 911.25, 904.5],
412
+ [ 907.88, 904.5],
413
+ [ 906.19, 902.81],
414
+ [ 902.81, 902.81],
415
+ [ 901.12, 901.12],
416
+ [ 896.06, 901.12],
417
+ [ 894.38, 899.44],
418
+ [ 889.31, 899.44],
419
+ [ 887.62, 897.75],
420
+ [ 879.19, 897.75],
421
+ [ 877.5, 896.06],
422
+ [ 872.44, 896.06],
423
+ [ 870.75, 894.38],
424
+ [ 864, 894.38],
425
+ [ 862.31, 892.69],
426
+ [ 855.56, 892.69],
427
+ [ 853.88, 891],
428
+ [ 848.81, 891],
429
+ [ 847.12, 889.31],
430
+ [ 843.75, 889.31],
431
+ [ 842.06, 887.62],
432
+ [ 837, 887.62],
433
+ [ 835.31, 885.94],
434
+ [ 826.88, 885.94],
435
+ [ 825.19, 884.25],
436
+ [ 818.44, 884.25],
437
+ [ 816.75, 882.56],
438
+ [ 810, 882.56],
439
+ [ 808.31, 880.88],
440
+ [ 801.56, 880.88],
441
+ [ 799.88, 879.19],
442
+ [ 794.81, 879.19],
443
+ [ 793.12, 877.5],
444
+ [ 788.06, 877.5],
445
+ [ 786.38, 875.81],
446
+ [ 779.62, 875.81],
447
+ [ 777.94, 874.12],
448
+ [ 769.5, 874.12],
449
+ [ 767.81, 872.44],
450
+ [ 759.38, 872.44],
451
+ [ 757.69, 870.75],
452
+ [ 752.62, 870.75],
453
+ [ 750.94, 869.06],
454
+ [ 744.19, 869.06],
455
+ [ 742.5, 867.38],
456
+ [ 737.44, 867.38],
457
+ [ 735.75, 865.69],
458
+ [ 732.38, 865.69],
459
+ [ 730.69, 864],
460
+ [ 727.31, 864],
461
+ [ 725.62, 862.31],
462
+ [ 722.25, 862.31],
463
+ [ 720.56, 860.62],
464
+ [ 715.5, 860.62],
465
+ [ 713.81, 858.94],
466
+ [ 703.69, 858.94],
467
+ [ 702, 857.25],
468
+ [ 691.88, 857.25],
469
+ [ 690.19, 855.56],
470
+ [ 685.12, 855.56],
471
+ [ 683.44, 853.88],
472
+ [ 676.69, 853.88],
473
+ [ 675, 852.19],
474
+ [ 669.94, 852.19],
475
+ [ 668.25, 850.5],
476
+ [ 661.5, 850.5],
477
+ [ 659.81, 848.81],
478
+ [ 653.06, 848.81],
479
+ [ 651.38, 847.12],
480
+ [ 646.31, 847.12],
481
+ [ 644.62, 845.44],
482
+ [ 639.56, 845.44],
483
+ [ 637.88, 843.75],
484
+ [ 632.81, 843.75],
485
+ [ 631.12, 842.06],
486
+ [ 627.75, 842.06],
487
+ [ 626.06, 840.38],
488
+ [ 621, 840.38],
489
+ [ 619.31, 838.69],
490
+ [ 615.94, 838.69],
491
+ [ 614.25, 837],
492
+ [ 609.19, 837],
493
+ [ 607.5, 835.31],
494
+ [ 602.44, 835.31],
495
+ [ 600.75, 833.62],
496
+ [ 594, 833.62],
497
+ [ 592.31, 831.94],
498
+ [ 580.5, 831.94],
499
+ [ 578.81, 830.25],
500
+ [ 568.69, 830.25],
501
+ [ 567, 828.56],
502
+ [ 560.25, 828.56],
503
+ [ 558.56, 826.88],
504
+ [ 553.5, 826.88],
505
+ [ 551.81, 825.19],
506
+ [ 546.75, 825.19],
507
+ [ 545.06, 823.5],
508
+ [ 573.75, 698.62],
509
+ [ 621, 698.62],
510
+ [ 621, 703.69],
511
+ [ 617.62, 707.06],
512
+ [ 627.75, 698.62],
513
+ [ 631.12, 698.62]], dtype=float32)], 'original_image': array([[[255, 236, 71],
514
+ [251, 229, 63],
515
+ [233, 207, 41],
516
+ ...,
517
+ [241, 184, 1],
518
+ [241, 184, 1],
519
+ [241, 184, 1]],
520
+
521
+ [[255, 235, 70],
522
+ [251, 229, 63],
523
+ [237, 210, 42],
524
+ ...,
525
+ [241, 184, 1],
526
+ [241, 184, 1],
527
+ [241, 184, 1]],
528
+
529
+ [[255, 231, 65],
530
+ [253, 229, 63],
531
+ [243, 214, 46],
532
+ ...,
533
+ [241, 184, 1],
534
+ [241, 184, 1],
535
+ [241, 184, 1]],
536
+
537
+ ...,
538
+
539
+ [[ 88, 116, 50],
540
+ [ 88, 116, 50],
541
+ [ 88, 116, 50],
542
+ ...,
543
+ [ 61, 81, 28],
544
+ [ 61, 81, 28],
545
+ [ 61, 81, 28]],
546
+
547
+ [[ 88, 116, 50],
548
+ [ 88, 116, 50],
549
+ [ 88, 116, 50],
550
+ ...,
551
+ [ 61, 81, 28],
552
+ [ 61, 81, 28],
553
+ [ 61, 81, 28]],
554
+
555
+ [[ 88, 116, 50],
556
+ [ 88, 116, 50],
557
+ [ 88, 116, 50],
558
+ ...,
559
+ [ 61, 81, 28],
560
+ [ 61, 81, 28],
561
+ [ 61, 81, 28]]], shape=(1080, 1080, 3), dtype=uint8), 'pose_image_result': array([[[255, 236, 71],
562
+ [251, 229, 63],
563
+ [233, 207, 41],
564
+ ...,
565
+ [241, 184, 1],
566
+ [241, 184, 1],
567
+ [241, 184, 1]],
568
+
569
+ [[255, 235, 70],
570
+ [251, 229, 63],
571
+ [237, 210, 42],
572
+ ...,
573
+ [241, 184, 1],
574
+ [241, 184, 1],
575
+ [241, 184, 1]],
576
+
577
+ [[255, 231, 65],
578
+ [253, 229, 63],
579
+ [243, 214, 46],
580
+ ...,
581
+ [241, 184, 1],
582
+ [241, 184, 1],
583
+ [241, 184, 1]],
584
+
585
+ ...,
586
+
587
+ [[ 88, 116, 50],
588
+ [ 88, 116, 50],
589
+ [ 88, 116, 50],
590
+ ...,
591
+ [ 61, 81, 28],
592
+ [ 61, 81, 28],
593
+ [ 61, 81, 28]],
594
+
595
+ [[ 88, 116, 50],
596
+ [ 88, 116, 50],
597
+ [ 88, 116, 50],
598
+ ...,
599
+ [ 61, 81, 28],
600
+ [ 61, 81, 28],
601
+ [ 61, 81, 28]],
602
+
603
+ [[ 88, 116, 50],
604
+ [ 88, 116, 50],
605
+ [ 88, 116, 50],
606
+ ...,
607
+ [ 61, 81, 28],
608
+ [ 61, 81, 28],
609
+ [ 61, 81, 28]]], shape=(1080, 1080, 3), dtype=uint8), 'segment_image_result': array([[[253, 232, 67],
610
+ [245, 222, 56],
611
+ [232, 206, 39],
612
+ ...,
613
+ [241, 184, 0],
614
+ [241, 184, 0],
615
+ [241, 184, 0]],
616
+
617
+ [[253, 231, 65],
618
+ [247, 222, 56],
619
+ [237, 209, 42],
620
+ ...,
621
+ [241, 184, 0],
622
+ [241, 184, 0],
623
+ [241, 184, 0]],
624
+
625
+ [[254, 229, 63],
626
+ [250, 223, 56],
627
+ [243, 213, 45],
628
+ ...,
629
+ [241, 184, 0],
630
+ [241, 184, 0],
631
+ [241, 184, 0]],
632
+
633
+ ...,
634
+
635
+ [[ 88, 115, 49],
636
+ [ 88, 115, 49],
637
+ [ 88, 115, 49],
638
+ ...,
639
+ [158, 61, 15],
640
+ [158, 61, 15],
641
+ [158, 61, 15]],
642
+
643
+ [[ 88, 115, 49],
644
+ [ 88, 115, 49],
645
+ [ 88, 115, 49],
646
+ ...,
647
+ [158, 61, 15],
648
+ [158, 61, 15],
649
+ [158, 61, 15]],
650
+
651
+ [[ 88, 115, 49],
652
+ [ 88, 115, 49],
653
+ [ 88, 115, 49],
654
+ ...,
655
+ [158, 61, 15],
656
+ [158, 61, 15],
657
+ [158, 61, 15]]], shape=(1080, 1080, 3), dtype=uint8), 'final_image_result': array([[[254, 235, 70],
658
+ [249, 227, 61],
659
+ [233, 207, 40],
660
+ ...,
661
+ [241, 184, 1],
662
+ [241, 184, 1],
663
+ [241, 184, 1]],
664
+
665
+ [[254, 234, 68],
666
+ [250, 227, 61],
667
+ [237, 210, 42],
668
+ ...,
669
+ [241, 184, 1],
670
+ [241, 184, 1],
671
+ [241, 184, 1]],
672
+
673
+ [[255, 230, 64],
674
+ [252, 227, 61],
675
+ [243, 214, 46],
676
+ ...,
677
+ [241, 184, 1],
678
+ [241, 184, 1],
679
+ [241, 184, 1]],
680
+
681
+ ...,
682
+
683
+ [[ 88, 116, 50],
684
+ [ 88, 116, 50],
685
+ [ 88, 116, 50],
686
+ ...,
687
+ [ 90, 75, 24],
688
+ [ 90, 75, 24],
689
+ [ 90, 75, 24]],
690
+
691
+ [[ 88, 116, 50],
692
+ [ 88, 116, 50],
693
+ [ 88, 116, 50],
694
+ ...,
695
+ [ 90, 75, 24],
696
+ [ 90, 75, 24],
697
+ [ 90, 75, 24]],
698
+
699
+ [[ 88, 116, 50],
700
+ [ 88, 116, 50],
701
+ [ 88, 116, 50],
702
+ ...,
703
+ [ 90, 75, 24],
704
+ [ 90, 75, 24],
705
+ [ 90, 75, 24]]], shape=(1080, 1080, 3), dtype=uint8), 'meter_value': 12, 'last_valid_keypoint_cm': 80, 'cm_distance': np.float64(0.0), 'final_draft_cm': np.float64(85.0)}
706
+ ...,
707
+ [ 90, 75, 24],
708
+ [ 90, 75, 24],
709
+ ...,
710
+ [ 90, 75, 24],
711
+ ...,
712
+ [ 90, 75, 24],
713
+ ...,
714
+ ...,
715
+ ...,
716
+ ...,
717
+ [ 90, 75, 24],
718
+ ...,
719
+ [ 90, 75, 24],
720
+ ...,
721
+ [ 90, 75, 24],
722
+ [ 90, 75, 24],
723
+ [ 90, 75, 24],
724
+ [ 90, 75, 24]]], shape=(1080, 1080, 3), dtype=uint8), 'meter_value': 12, 'last_valid_keypoint_cm': 80, 'cm_distance': np.float64(0.0), 'final_draft_cm': np.float64(85.0)}
services/pdf_service.py ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from reportlab.pdfgen import canvas
2
+ from reportlab.lib.pagesizes import letter
3
+ from reportlab.lib.utils import ImageReader
4
+ from io import BytesIO
5
+ from models.schemas import MeasurementMetadata, MeasurementResult
6
+ from PIL import Image, ImageDraw
7
+ import logging
8
+
9
+ logging.basicConfig(level=logging.INFO)
10
+ logger = logging.getLogger(__name__)
11
+
12
+ class PdfService:
13
+ """
14
+ A service to generate PDF reports.
15
+ """
16
+ def create_report(
17
+ self,
18
+ image_bytes: bytes,
19
+ metadata: MeasurementMetadata,
20
+ results: MeasurementResult,
21
+ ml_results: dict
22
+ ) -> bytes:
23
+ """
24
+ Generates a PDF report with the measurement data.
25
+ """
26
+ logger.info(f"Creating PDF with metadata: {metadata}")
27
+ logger.info(f"Creating PDF with results: {results}")
28
+
29
+ buffer = BytesIO()
30
+ p = canvas.Canvas(buffer, pagesize=letter)
31
+ width, height = letter
32
+
33
+ # Title
34
+ p.setFont("Helvetica-Bold", 16)
35
+ p.drawString(72, height - 72, "Ship Draft Measurement Report")
36
+
37
+ # Metadata
38
+ p.setFont("Helvetica", 12)
39
+ p.drawString(72, height - 108, f"Ship ID: {metadata.ship_id}")
40
+ p.drawString(72, height - 126, f"Timestamp: {metadata.timestamp.strftime('%Y-%m-%d %H:%M:%S')}")
41
+ p.drawString(72, height - 144, f"Latitude: {metadata.latitude}")
42
+ p.drawString(72, height - 162, f"Longitude: {metadata.longitude}")
43
+
44
+ # # Measurement Results
45
+ # p.setFont("Helvetica-Bold", 14)
46
+ # p.drawString(72, height - 198, "Measurement Results")
47
+ # p.setFont("Helvetica", 12)
48
+ # p.drawString(90, height - 218, f"Draft Measurement: {results.draft_measurement:.1f} meters")
49
+ # p.drawString(90, height - 236, f"Confidence Score: {results.confidence_score:.2%}")
50
+
51
+ # ML Results
52
+ p.setFont("Helvetica-Bold", 14)
53
+ y_position = height - 270
54
+ p.drawString(72, y_position, "ML Results")
55
+ y_position -= 18
56
+ p.setFont("Helvetica", 12)
57
+
58
+ # Highlight draft from ML results
59
+ if 'draft' in ml_results:
60
+ p.setFont("Helvetica-Bold", 12) # Highlight
61
+ p.drawString(90, y_position, f"ML Draft: {ml_results['draft']:.2f} meters")
62
+ p.setFont("Helvetica", 12) # Reset font
63
+ y_position -= 18
64
+
65
+ # Add other ML results (excluding arrays and images)
66
+ for key, value in ml_results.items():
67
+ if key in ['pose_results', 'segment_results', 'original_image', 'pose_image_result', 'segment_image_result', 'final_image_result']:
68
+ continue
69
+ p.drawString(90, y_position, f"{key.replace('_', ' ').title()}: {value}")
70
+ y_position -= 18
71
+
72
+ # Images
73
+ p.setFont("Helvetica-Bold", 14)
74
+ y_position -= 18
75
+ p.drawString(72, y_position, "Images")
76
+ y_position -= 18
77
+ p.setFont("Helvetica", 12)
78
+
79
+ # Convert numpy arrays to PIL Image and then to bytes for ReportLab
80
+ def get_image_bytes(np_array):
81
+ if np_array is None:
82
+ return None
83
+ img = Image.fromarray(np_array.astype('uint8'))
84
+ img_byte_arr = BytesIO()
85
+ img.save(img_byte_arr, format='PNG')
86
+ return img_byte_arr.getvalue()
87
+
88
+ # Define starting positions and dimensions for horizontal layout
89
+ x_start = 72
90
+ image_width = 150
91
+ image_height = 150 # Assuming square or adjust as needed
92
+ y_image_row = y_position - image_height - 20 # Position for the bottom of the images
93
+
94
+ current_x = x_start
95
+
96
+ # Draw original image
97
+ if 'original_image' in ml_results and ml_results['original_image'] is not None:
98
+ original_img_bytes = get_image_bytes(ml_results['original_image'])
99
+ if original_img_bytes:
100
+ p.drawString(current_x, y_image_row + image_height + 5, "Original Image:") # Label above image
101
+ p.drawImage(ImageReader(BytesIO(original_img_bytes)), current_x, y_image_row, width=image_width, height=image_height, preserveAspectRatio=True)
102
+ current_x += image_width + 20 # Move x for next image
103
+
104
+ # Draw pose image result
105
+ if 'pose_image_result' in ml_results and ml_results['pose_image_result'] is not None:
106
+ pose_img_bytes = get_image_bytes(ml_results['pose_image_result'])
107
+ if pose_img_bytes:
108
+ p.drawString(current_x, y_image_row + image_height + 5, "Pose Image Result:")
109
+ p.drawImage(ImageReader(BytesIO(pose_img_bytes)), current_x, y_image_row, width=image_width, height=image_height, preserveAspectRatio=True)
110
+ current_x += image_width + 20
111
+
112
+ # Draw segment image result
113
+ if 'segment_image_result' in ml_results and ml_results['segment_image_result'] is not None:
114
+ segment_img_bytes = get_image_bytes(ml_results['segment_image_result'])
115
+ if segment_img_bytes:
116
+ p.drawString(current_x, y_image_row + image_height + 5, "Segment Image Result:")
117
+ p.drawImage(ImageReader(BytesIO(segment_img_bytes)), current_x, y_image_row, width=image_width, height=image_height, preserveAspectRatio=True)
118
+ # No need to update current_x as it's the last image in the row
119
+
120
+ # Update y_position for content after images
121
+ y_position = y_image_row - 20 # Adjust y_position to be below the images
122
+
123
+ p.showPage()
124
+ p.save()
125
+
126
+ pdf_bytes = buffer.getvalue()
127
+ buffer.close()
128
+ return pdf_bytes
services/persistence_service.py ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import os
3
+ from models.database import SessionLocal, Report
4
+ from models.schemas import MeasurementMetadata
5
+ import smtplib
6
+ from email.mime.multipart import MIMEMultipart
7
+ from email.mime.base import MIMEBase
8
+ from email import encoders
9
+
10
+ from typing import Optional
11
+ from datetime import datetime
12
+
13
+
14
+ REPORTS_DIR = "reports"
15
+
16
+ class PersistenceService:
17
+ """
18
+ A service to handle the persistence of the report.
19
+ """
20
+ def __init__(self):
21
+ if not os.path.exists(REPORTS_DIR):
22
+ os.makedirs(REPORTS_DIR)
23
+
24
+ def save_to_disk(self, pdf_bytes: bytes, metadata: MeasurementMetadata) -> str:
25
+ """Saves the PDF report to the disk."""
26
+ filename = f"report_{metadata.ship_id}_{metadata.timestamp.strftime('%Y%m%d%H%M%S')}.pdf"
27
+ file_path = os.path.join(REPORTS_DIR, filename)
28
+ with open(file_path, "wb") as f:
29
+ f.write(pdf_bytes)
30
+ return file_path # Return the full file_path
31
+
32
+ def save_to_db(self, file_path: str, metadata: MeasurementMetadata, draft_measurement: float, confidence_score: float, image_bytes: bytes):
33
+ """Saves the report metadata to the database."""
34
+ db = SessionLocal()
35
+ db_report = Report(
36
+ ship_id=metadata.ship_id,
37
+ timestamp=metadata.timestamp,
38
+ latitude=metadata.latitude,
39
+ longitude=metadata.longitude,
40
+ draft_measurement=draft_measurement,
41
+ confidence_score=confidence_score,
42
+ pdf_path=file_path,
43
+ image_bytes=image_bytes
44
+ )
45
+ db.add(db_report)
46
+ db.commit()
47
+ db.refresh(db_report)
48
+ db.close()
49
+
50
+ def get_all_reports(
51
+ self,
52
+ skip: int = 0,
53
+ limit: int = 10,
54
+ search: Optional[str] = None,
55
+ start_date: Optional[datetime] = None,
56
+ end_date: Optional[datetime] = None,
57
+ ):
58
+ """Retrieves all reports from the database with pagination and filtering."""
59
+ db = SessionLocal()
60
+ # Explicitly select columns including image_bytes
61
+ query = db.query(
62
+ Report.id,
63
+ Report.ship_id,
64
+ Report.timestamp,
65
+ Report.latitude,
66
+ Report.longitude,
67
+ Report.draft_measurement,
68
+ Report.confidence_score,
69
+ Report.pdf_path,
70
+ Report.image_bytes # Include image_bytes
71
+ )
72
+
73
+ if search:
74
+ query = query.filter(Report.ship_id.contains(search))
75
+
76
+ if start_date:
77
+ query = query.filter(Report.timestamp >= start_date)
78
+
79
+ if end_date:
80
+ query = query.filter(Report.timestamp <= end_date)
81
+
82
+ reports = query.offset(skip).limit(limit).all()
83
+ db.close()
84
+
85
+ # Convert SQLAlchemy Row objects to dictionaries and Base64 encode image_bytes
86
+ column_names = [
87
+ "id", "ship_id", "timestamp", "latitude", "longitude",
88
+ "draft_measurement", "confidence_score", "pdf_path", "image_bytes" # Add image_bytes
89
+ ]
90
+ reports_as_dicts = []
91
+ for report_row in reports:
92
+ report_dict = {name: value for name, value in zip(column_names, report_row)}
93
+ if report_dict["image_bytes"]:
94
+ # Base64 encode the image bytes
95
+ report_dict["image_bytes"] = base64.b64encode(report_dict["image_bytes"]).decode('utf-8')
96
+ reports_as_dicts.append(report_dict)
97
+
98
+ return reports_as_dicts
99
+
100
+ def get_report_by_id(self, report_id: int):
101
+ """Retrieves a single report by its ID."""
102
+ db = SessionLocal()
103
+ report = db.query(Report).filter(Report.id == report_id).first()
104
+ db.close()
105
+ return report
106
+
107
+ def send_by_email(self, file_path: str, recipient_email: str):
108
+ """
109
+ Sends the PDF report as an email attachment.
110
+ NOTE: This is a placeholder and requires real SMTP configuration.
111
+ """
112
+ # In a real application, these would come from a config file
113
+ smtp_server = "smtp.gmail.com"
114
+ smtp_port = 587
115
+ sender_email = "copfnf@gmail.com"
116
+ sender_password = "@26484295may"
117
+ recipient_email = "pfnfcat@gmail.com"
118
+ msg = MIMEMultipart()
119
+ msg["From"] = sender_email
120
+ msg["To"] = recipient_email
121
+ msg["Subject"] = f"Ship Draft Report: {os.path.basename(file_path)}"
122
+
123
+ with open(file_path, "rb") as attachment:
124
+ part = MIMEBase("application", "octet-stream")
125
+ part.set_payload(attachment.read())
126
+
127
+ encoders.encode_base64(part)
128
+ part.add_header(
129
+ "Content-Disposition",
130
+ f"attachment; filename= {os.path.basename(file_path)}",
131
+ )
132
+ msg.attach(part)
133
+
134
+ print(f"\n--- EMAIL SIMULATION ---")
135
+ print(f"Sending email to {recipient_email} from {sender_email}")
136
+ print(f"Attaching file: {file_path}")
137
+ print(f"--- END EMAIL SIMULATION ---")
138
+
139
+ # The following code would send the email
140
+ try:
141
+ server = smtplib.SMTP(smtp_server, smtp_port)
142
+ server.starttls()
143
+ server.login(sender_email, sender_password)
144
+ server.sendmail(sender_email, recipient_email, msg.as_string())
145
+ server.quit()
146
+ print("Email sent successfully!")
147
+ except Exception as e:
148
+ print(f"Failed to send email: {e}")