Shami96 commited on
Commit
274cd20
·
verified ·
1 Parent(s): d053da2

Update updated_word.py

Browse files
Files changed (1) hide show
  1. updated_word.py +80 -14
updated_word.py CHANGED
@@ -603,6 +603,59 @@ def split_sentences_keep(text: str) -> List[str]:
603
  _sent_split = re.compile(r'(?<=[.!?])\s+|\n+')
604
  _date_pat = re.compile(r'\b(?:\d{1,2}(?:st|nd|rd|th)\s+[A-Za-z]+\s+\d{4}|\d{1,2}/\d{1,2}/\d{2,4}|[A-Za-z]+\s+\d{1,2},\s*\d{4})\b')
605
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
606
  def extract_summary_snippets(desired_text: str):
607
  sents = _sentences(desired_text)
608
  dates = [m.group(0) for m in _date_pat.finditer(desired_text)]
@@ -1349,12 +1402,18 @@ def map_driver_cols(table: Table) -> Dict[str,int]:
1349
  if all(n in t for n in needles):
1350
  return j
1351
  return None
1352
- idx["name"] = first_col("driver", "name")
1353
- idx["roster"]= first_col("roster", "safe")
1354
- idx["fit"] = first_col("fit for duty")
1355
- # Work diary might be split across two headers; match "work diary" OR "electronic work diary"
1356
- wd = first_col("work diary") or first_col("electronic work diary")
 
 
 
 
 
1357
  if wd is not None: idx["wd"] = wd
 
1358
  return {k:v for k,v in idx.items() if v is not None}
1359
 
1360
  def fill_driver_table(table: Table, arrays: Dict[str, List[str]]):
@@ -1362,12 +1421,15 @@ def fill_driver_table(table: Table, arrays: Dict[str, List[str]]):
1362
  if not colmap:
1363
  return
1364
 
1365
- names = arrays.get("Driver / Scheduler Name", [])
1366
- rosters = arrays.get("Roster / Schedule / Safe Driving Plan (Date Range)", [])
1367
- fit = arrays.get("Fit for Duty Statement Completed (Yes/No)", [])
1368
- wd = arrays.get("Work Diary Pages (Page Numbers) Electronic Work Diary Records (Date Range)", [])
 
 
 
1369
 
1370
- n = max(len(rosters), len(fit), len(wd), len(names))
1371
  clear_data_rows_keep_headers(table, header_rows=1)
1372
  ensure_rows(table, n)
1373
 
@@ -1377,14 +1439,18 @@ def fill_driver_table(table: Table, arrays: Dict[str, List[str]]):
1377
  row = table.rows[i+1]
1378
  if "name" in colmap and has_any_name:
1379
  replace_red_in_cell(row.cells[colmap["name"]], names[i] if i < len(names) else "")
 
 
 
 
 
 
1380
  if "roster" in colmap:
1381
  replace_red_in_cell(row.cells[colmap["roster"]], rosters[i] if i < len(rosters) else "")
1382
  if "fit" in colmap:
1383
- replace_red_in_cell(row.cells[colmap["fit"]], fit[i] if i < len(fit) else "")
1384
  if "wd" in colmap:
1385
- replace_red_in_cell(row.cells[colmap["wd"]], wd[i] if i < len(wd) else "")
1386
-
1387
-
1388
 
1389
  # ----------------------------- main mapping -----------------------------
1390
  def flatten_simple_sections(data: Dict) -> Dict[str, str]:
 
603
  _sent_split = re.compile(r'(?<=[.!?])\s+|\n+')
604
  _date_pat = re.compile(r'\b(?:\d{1,2}(?:st|nd|rd|th)\s+[A-Za-z]+\s+\d{4}|\d{1,2}/\d{1,2}/\d{2,4}|[A-Za-z]+\s+\d{1,2},\s*\d{4})\b')
605
 
606
+ def _sentences(text: str) -> list:
607
+ """Split text into sentences."""
608
+ if not text:
609
+ return []
610
+ return [s.strip() for s in _sent_split.split(text) if s.strip()]
611
+
612
+ def _extract_sheet_phrase_from_desired(text: str) -> str:
613
+ """Extract sheet-related phrase from desired text."""
614
+ if not text:
615
+ return ""
616
+ # Simple extraction of sheet-related content
617
+ sheet_match = re.search(r'([^.]*?\bsheet\b[^.]*)', text, re.I)
618
+ return sheet_match.group(1).strip() if sheet_match else ""
619
+
620
+ def find_all_summary_tables(doc):
621
+ """Find all summary tables in the document."""
622
+ summary_tables = []
623
+ for table in doc.tables:
624
+ # Check if this is a summary table by looking at headers
625
+ if table.rows:
626
+ header_text = ' '.join(cell_text(cell) for cell in table.rows[0].cells).lower()
627
+ if 'summary' in header_text or 'details' in header_text:
628
+ # Determine section type
629
+ section_key = 'maintenance' if 'maintenance' in header_text else 'mass' if 'mass' in header_text else 'fatigue' if 'fatigue' in header_text else None
630
+ if section_key:
631
+ # Find label and details columns
632
+ lcol, dcol = 0, 1 # Default assumption
633
+ for i, cell in enumerate(table.rows[0].cells):
634
+ if 'details' in cell_text(cell).lower():
635
+ dcol = i
636
+ break
637
+ summary_tables.append((section_key, table, lcol, dcol))
638
+ return summary_tables
639
+
640
+ def patch_details_cell_from_json(cell, value):
641
+ """Patch details cell with value from JSON."""
642
+ if not value:
643
+ return
644
+
645
+ # Clear existing content
646
+ for paragraph in cell.paragraphs:
647
+ for run in paragraph.runs:
648
+ run.clear()
649
+
650
+ # Add new content
651
+ if cell.paragraphs:
652
+ p = cell.paragraphs[0]
653
+ else:
654
+ p = cell.add_paragraph()
655
+
656
+ run = p.add_run(str(value))
657
+ run.font.color.rgb = RGBColor(0, 0, 0) # Set to black
658
+
659
  def extract_summary_snippets(desired_text: str):
660
  sents = _sentences(desired_text)
661
  dates = [m.group(0) for m in _date_pat.finditer(desired_text)]
 
1402
  if all(n in t for n in needles):
1403
  return j
1404
  return None
1405
+
1406
+ # Enhanced column detection
1407
+ idx["name"] = first_col("driver", "name") or first_col("scheduler", "name")
1408
+ idx["driver_tlif"] = first_col("driver", "tlif") or first_col("driver", "course")
1409
+ idx["scheduler_tlif"] = first_col("scheduler", "tlif") or first_col("scheduler", "course")
1410
+ idx["medical"] = first_col("medical", "certificates") or first_col("medical")
1411
+ idx["roster"] = first_col("roster", "safe") or first_col("roster") or first_col("schedule")
1412
+ idx["fit"] = first_col("fit for duty") or first_col("fit", "duty")
1413
+ # Work diary might be split across two headers
1414
+ wd = first_col("work diary") or first_col("electronic work diary") or first_col("diary")
1415
  if wd is not None: idx["wd"] = wd
1416
+
1417
  return {k:v for k,v in idx.items() if v is not None}
1418
 
1419
  def fill_driver_table(table: Table, arrays: Dict[str, List[str]]):
 
1421
  if not colmap:
1422
  return
1423
 
1424
+ names = arrays.get("Driver / Scheduler Name", [])
1425
+ driver_tlif = arrays.get("Driver TLIF Course # Completed", [])
1426
+ scheduler_tlif = arrays.get("Scheduler TLIF Course # Completed", [])
1427
+ medical = arrays.get("Medical Certificates (Current Yes/No) Date of expiry", [])
1428
+ rosters = arrays.get("Roster / Schedule / Safe Driving Plan (Date Range)", [])
1429
+ fit = arrays.get("Fit for Duty Statement Completed (Yes/No)", [])
1430
+ wd = arrays.get("Work Diary Pages (Page Numbers) Electronic Work Diary Records (Date Range)", [])
1431
 
1432
+ n = max(len(names), len(driver_tlif), len(scheduler_tlif), len(medical), len(rosters), len(fit), len(wd))
1433
  clear_data_rows_keep_headers(table, header_rows=1)
1434
  ensure_rows(table, n)
1435
 
 
1439
  row = table.rows[i+1]
1440
  if "name" in colmap and has_any_name:
1441
  replace_red_in_cell(row.cells[colmap["name"]], names[i] if i < len(names) else "")
1442
+ if "driver_tlif" in colmap:
1443
+ replace_red_in_cell(row.cells[colmap["driver_tlif"]], driver_tlif[i] if i < len(driver_tlif) else "")
1444
+ if "scheduler_tlif" in colmap:
1445
+ replace_red_in_cell(row.cells[colmap["scheduler_tlif"]], scheduler_tlif[i] if i < len(scheduler_tlif) else "")
1446
+ if "medical" in colmap:
1447
+ replace_red_in_cell(row.cells[colmap["medical"]], medical[i] if i < len(medical) else "")
1448
  if "roster" in colmap:
1449
  replace_red_in_cell(row.cells[colmap["roster"]], rosters[i] if i < len(rosters) else "")
1450
  if "fit" in colmap:
1451
+ replace_red_in_cell(row.cells[colmap["fit"]], fit[i] if i < len(fit) else "")
1452
  if "wd" in colmap:
1453
+ replace_red_in_cell(row.cells[colmap["wd"]], wd[i] if i < len(wd) else "")
 
 
1454
 
1455
  # ----------------------------- main mapping -----------------------------
1456
  def flatten_simple_sections(data: Dict) -> Dict[str, str]: