Spaces:
Running
Running
Update updated_word.py
Browse files- updated_word.py +80 -14
updated_word.py
CHANGED
|
@@ -603,6 +603,59 @@ def split_sentences_keep(text: str) -> List[str]:
|
|
| 603 |
_sent_split = re.compile(r'(?<=[.!?])\s+|\n+')
|
| 604 |
_date_pat = re.compile(r'\b(?:\d{1,2}(?:st|nd|rd|th)\s+[A-Za-z]+\s+\d{4}|\d{1,2}/\d{1,2}/\d{2,4}|[A-Za-z]+\s+\d{1,2},\s*\d{4})\b')
|
| 605 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 606 |
def extract_summary_snippets(desired_text: str):
|
| 607 |
sents = _sentences(desired_text)
|
| 608 |
dates = [m.group(0) for m in _date_pat.finditer(desired_text)]
|
|
@@ -1349,12 +1402,18 @@ def map_driver_cols(table: Table) -> Dict[str,int]:
|
|
| 1349 |
if all(n in t for n in needles):
|
| 1350 |
return j
|
| 1351 |
return None
|
| 1352 |
-
|
| 1353 |
-
|
| 1354 |
-
idx["
|
| 1355 |
-
|
| 1356 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1357 |
if wd is not None: idx["wd"] = wd
|
|
|
|
| 1358 |
return {k:v for k,v in idx.items() if v is not None}
|
| 1359 |
|
| 1360 |
def fill_driver_table(table: Table, arrays: Dict[str, List[str]]):
|
|
@@ -1362,12 +1421,15 @@ def fill_driver_table(table: Table, arrays: Dict[str, List[str]]):
|
|
| 1362 |
if not colmap:
|
| 1363 |
return
|
| 1364 |
|
| 1365 |
-
names
|
| 1366 |
-
|
| 1367 |
-
|
| 1368 |
-
|
|
|
|
|
|
|
|
|
|
| 1369 |
|
| 1370 |
-
n = max(len(
|
| 1371 |
clear_data_rows_keep_headers(table, header_rows=1)
|
| 1372 |
ensure_rows(table, n)
|
| 1373 |
|
|
@@ -1377,14 +1439,18 @@ def fill_driver_table(table: Table, arrays: Dict[str, List[str]]):
|
|
| 1377 |
row = table.rows[i+1]
|
| 1378 |
if "name" in colmap and has_any_name:
|
| 1379 |
replace_red_in_cell(row.cells[colmap["name"]], names[i] if i < len(names) else "")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1380 |
if "roster" in colmap:
|
| 1381 |
replace_red_in_cell(row.cells[colmap["roster"]], rosters[i] if i < len(rosters) else "")
|
| 1382 |
if "fit" in colmap:
|
| 1383 |
-
replace_red_in_cell(row.cells[colmap["fit"]],
|
| 1384 |
if "wd" in colmap:
|
| 1385 |
-
replace_red_in_cell(row.cells[colmap["wd"]],
|
| 1386 |
-
|
| 1387 |
-
|
| 1388 |
|
| 1389 |
# ----------------------------- main mapping -----------------------------
|
| 1390 |
def flatten_simple_sections(data: Dict) -> Dict[str, str]:
|
|
|
|
| 603 |
_sent_split = re.compile(r'(?<=[.!?])\s+|\n+')
|
| 604 |
_date_pat = re.compile(r'\b(?:\d{1,2}(?:st|nd|rd|th)\s+[A-Za-z]+\s+\d{4}|\d{1,2}/\d{1,2}/\d{2,4}|[A-Za-z]+\s+\d{1,2},\s*\d{4})\b')
|
| 605 |
|
| 606 |
+
def _sentences(text: str) -> list:
|
| 607 |
+
"""Split text into sentences."""
|
| 608 |
+
if not text:
|
| 609 |
+
return []
|
| 610 |
+
return [s.strip() for s in _sent_split.split(text) if s.strip()]
|
| 611 |
+
|
| 612 |
+
def _extract_sheet_phrase_from_desired(text: str) -> str:
|
| 613 |
+
"""Extract sheet-related phrase from desired text."""
|
| 614 |
+
if not text:
|
| 615 |
+
return ""
|
| 616 |
+
# Simple extraction of sheet-related content
|
| 617 |
+
sheet_match = re.search(r'([^.]*?\bsheet\b[^.]*)', text, re.I)
|
| 618 |
+
return sheet_match.group(1).strip() if sheet_match else ""
|
| 619 |
+
|
| 620 |
+
def find_all_summary_tables(doc):
|
| 621 |
+
"""Find all summary tables in the document."""
|
| 622 |
+
summary_tables = []
|
| 623 |
+
for table in doc.tables:
|
| 624 |
+
# Check if this is a summary table by looking at headers
|
| 625 |
+
if table.rows:
|
| 626 |
+
header_text = ' '.join(cell_text(cell) for cell in table.rows[0].cells).lower()
|
| 627 |
+
if 'summary' in header_text or 'details' in header_text:
|
| 628 |
+
# Determine section type
|
| 629 |
+
section_key = 'maintenance' if 'maintenance' in header_text else 'mass' if 'mass' in header_text else 'fatigue' if 'fatigue' in header_text else None
|
| 630 |
+
if section_key:
|
| 631 |
+
# Find label and details columns
|
| 632 |
+
lcol, dcol = 0, 1 # Default assumption
|
| 633 |
+
for i, cell in enumerate(table.rows[0].cells):
|
| 634 |
+
if 'details' in cell_text(cell).lower():
|
| 635 |
+
dcol = i
|
| 636 |
+
break
|
| 637 |
+
summary_tables.append((section_key, table, lcol, dcol))
|
| 638 |
+
return summary_tables
|
| 639 |
+
|
| 640 |
+
def patch_details_cell_from_json(cell, value):
|
| 641 |
+
"""Patch details cell with value from JSON."""
|
| 642 |
+
if not value:
|
| 643 |
+
return
|
| 644 |
+
|
| 645 |
+
# Clear existing content
|
| 646 |
+
for paragraph in cell.paragraphs:
|
| 647 |
+
for run in paragraph.runs:
|
| 648 |
+
run.clear()
|
| 649 |
+
|
| 650 |
+
# Add new content
|
| 651 |
+
if cell.paragraphs:
|
| 652 |
+
p = cell.paragraphs[0]
|
| 653 |
+
else:
|
| 654 |
+
p = cell.add_paragraph()
|
| 655 |
+
|
| 656 |
+
run = p.add_run(str(value))
|
| 657 |
+
run.font.color.rgb = RGBColor(0, 0, 0) # Set to black
|
| 658 |
+
|
| 659 |
def extract_summary_snippets(desired_text: str):
|
| 660 |
sents = _sentences(desired_text)
|
| 661 |
dates = [m.group(0) for m in _date_pat.finditer(desired_text)]
|
|
|
|
| 1402 |
if all(n in t for n in needles):
|
| 1403 |
return j
|
| 1404 |
return None
|
| 1405 |
+
|
| 1406 |
+
# Enhanced column detection
|
| 1407 |
+
idx["name"] = first_col("driver", "name") or first_col("scheduler", "name")
|
| 1408 |
+
idx["driver_tlif"] = first_col("driver", "tlif") or first_col("driver", "course")
|
| 1409 |
+
idx["scheduler_tlif"] = first_col("scheduler", "tlif") or first_col("scheduler", "course")
|
| 1410 |
+
idx["medical"] = first_col("medical", "certificates") or first_col("medical")
|
| 1411 |
+
idx["roster"] = first_col("roster", "safe") or first_col("roster") or first_col("schedule")
|
| 1412 |
+
idx["fit"] = first_col("fit for duty") or first_col("fit", "duty")
|
| 1413 |
+
# Work diary might be split across two headers
|
| 1414 |
+
wd = first_col("work diary") or first_col("electronic work diary") or first_col("diary")
|
| 1415 |
if wd is not None: idx["wd"] = wd
|
| 1416 |
+
|
| 1417 |
return {k:v for k,v in idx.items() if v is not None}
|
| 1418 |
|
| 1419 |
def fill_driver_table(table: Table, arrays: Dict[str, List[str]]):
|
|
|
|
| 1421 |
if not colmap:
|
| 1422 |
return
|
| 1423 |
|
| 1424 |
+
names = arrays.get("Driver / Scheduler Name", [])
|
| 1425 |
+
driver_tlif = arrays.get("Driver TLIF Course # Completed", [])
|
| 1426 |
+
scheduler_tlif = arrays.get("Scheduler TLIF Course # Completed", [])
|
| 1427 |
+
medical = arrays.get("Medical Certificates (Current Yes/No) Date of expiry", [])
|
| 1428 |
+
rosters = arrays.get("Roster / Schedule / Safe Driving Plan (Date Range)", [])
|
| 1429 |
+
fit = arrays.get("Fit for Duty Statement Completed (Yes/No)", [])
|
| 1430 |
+
wd = arrays.get("Work Diary Pages (Page Numbers) Electronic Work Diary Records (Date Range)", [])
|
| 1431 |
|
| 1432 |
+
n = max(len(names), len(driver_tlif), len(scheduler_tlif), len(medical), len(rosters), len(fit), len(wd))
|
| 1433 |
clear_data_rows_keep_headers(table, header_rows=1)
|
| 1434 |
ensure_rows(table, n)
|
| 1435 |
|
|
|
|
| 1439 |
row = table.rows[i+1]
|
| 1440 |
if "name" in colmap and has_any_name:
|
| 1441 |
replace_red_in_cell(row.cells[colmap["name"]], names[i] if i < len(names) else "")
|
| 1442 |
+
if "driver_tlif" in colmap:
|
| 1443 |
+
replace_red_in_cell(row.cells[colmap["driver_tlif"]], driver_tlif[i] if i < len(driver_tlif) else "")
|
| 1444 |
+
if "scheduler_tlif" in colmap:
|
| 1445 |
+
replace_red_in_cell(row.cells[colmap["scheduler_tlif"]], scheduler_tlif[i] if i < len(scheduler_tlif) else "")
|
| 1446 |
+
if "medical" in colmap:
|
| 1447 |
+
replace_red_in_cell(row.cells[colmap["medical"]], medical[i] if i < len(medical) else "")
|
| 1448 |
if "roster" in colmap:
|
| 1449 |
replace_red_in_cell(row.cells[colmap["roster"]], rosters[i] if i < len(rosters) else "")
|
| 1450 |
if "fit" in colmap:
|
| 1451 |
+
replace_red_in_cell(row.cells[colmap["fit"]], fit[i] if i < len(fit) else "")
|
| 1452 |
if "wd" in colmap:
|
| 1453 |
+
replace_red_in_cell(row.cells[colmap["wd"]], wd[i] if i < len(wd) else "")
|
|
|
|
|
|
|
| 1454 |
|
| 1455 |
# ----------------------------- main mapping -----------------------------
|
| 1456 |
def flatten_simple_sections(data: Dict) -> Dict[str, str]:
|