Shami96 commited on
Commit
878a622
Β·
verified Β·
1 Parent(s): 575fdf9

Update updated_word.py

Browse files
Files changed (1) hide show
  1. updated_word.py +135 -50
updated_word.py CHANGED
@@ -570,9 +570,10 @@ def handle_attendance_list_table_enhanced(table, flat_json):
570
  return replacements_made
571
 
572
  def fix_management_summary_details_column(table, flat_json):
573
- """DEBUG VERSION: Enhanced management summary processing with detailed debugging - FIXED FOR FLATTENED JSON"""
574
  replacements_made = 0
575
  print(f" 🎯 FIX: Management Summary DETAILS column processing")
 
576
 
577
  # Determine which type of management summary this is
578
  table_text = ""
@@ -600,24 +601,39 @@ def fix_management_summary_details_column(table, flat_json):
600
  for mgmt_type in mgmt_types:
601
  print(f" βœ… Confirmed {mgmt_type} table processing")
602
 
603
- # Build management data dict from flattened keys
604
  mgmt_data = {}
605
 
606
  # Look for flattened keys like "Mass Management Summary.Std 5. Verification"
 
607
  for key, value in flat_json.items():
608
  if key.startswith(mgmt_type + "."):
609
  # Extract the standard part (after the management type)
610
  std_key = key[len(mgmt_type) + 1:] # Remove "Mass Management Summary." prefix
611
- mgmt_data[std_key] = value
612
- print(f" βœ… Found flattened standard: '{std_key}' = {value}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
613
 
614
  if not mgmt_data:
615
- print(f" ⚠️ No JSON data found for {mgmt_type}")
616
  continue
617
 
618
- print(f" πŸ“‹ Processing {mgmt_type} with standards: {list(mgmt_data.keys())}")
619
 
620
- # DEBUG: Check every row in the table
621
  print(f" πŸ” Analyzing all {len(table.rows)} rows in table:")
622
 
623
  for row_idx, row in enumerate(table.rows):
@@ -630,59 +646,94 @@ def fix_management_summary_details_column(table, flat_json):
630
 
631
  print(f" πŸ“‹ Row {row_idx + 1}:")
632
  print(f" πŸ“„ Standard: '{standard_text}'")
633
- print(f" πŸ“„ Details: '{details_text[:50]}...' (length: {len(details_text)})")
634
- print(f" πŸ”΄ Has red text: {has_red_text(details_cell)}")
635
 
636
- # Skip header rows
637
- if any(header in standard_text_lower for header in ["standard", "requirement", "details", "management"]):
 
638
  print(f" ⏭️ Skipping header row")
639
  continue
640
 
641
- # Check if this row has red text
 
642
  if not has_red_text(details_cell):
643
- print(f" ⏭️ No red text found, skipping")
644
  continue
645
 
646
- print(f" 🎯 PROCESSING row {row_idx + 1}: '{standard_text}'")
647
 
648
- # Extract standard number and match
 
 
 
 
 
 
 
 
649
  replacement_value = None
650
  matched_std = None
651
 
 
652
  std_match = re.search(r'std\s*(\d+)', standard_text_lower)
653
  if std_match:
654
  std_num = std_match.group(1)
655
- print(f" 🎯 Looking for Standard {std_num}")
656
 
657
- # Look for matching standard in mgmt_data
658
  for std_key, std_value in mgmt_data.items():
659
  if f"std {std_num}" in std_key.lower():
660
  replacement_value = std_value
661
  matched_std = std_key
662
- print(f" βœ… Found match by std number: '{std_key}'")
663
  break
664
 
665
- # Keyword-based matching if std number doesn't work
666
  if not replacement_value:
667
- print(f" πŸ” No std number match, trying keyword matching...")
668
- if "daily" in standard_text_lower and "check" in standard_text_lower:
669
- replacement_value = find_best_standard_value(mgmt_data, ["Std 1. Daily Check", "Daily Check"])
670
- matched_std = "Daily Check related"
671
- elif "verification" in standard_text_lower:
672
- replacement_value = find_best_standard_value(mgmt_data, ["Std 5. Verification", "Verification"])
673
- matched_std = "Verification related"
674
- elif "internal review" in standard_text_lower:
675
- replacement_value = find_best_standard_value(mgmt_data, ["Std 6. Internal Review", "Std 7. Internal Review", "Std 5. Internal Review", "Internal Review"])
676
- matched_std = "Internal Review related"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
677
 
678
- # Last resort: use any available standard
679
- if not replacement_value and mgmt_data:
680
- print(f" πŸ” No specific match, using first available standard...")
 
 
 
681
  for std_key, std_value in mgmt_data.items():
682
- replacement_value = std_value
683
- matched_std = std_key
684
- print(f" ⚑ Using available standard: '{std_key}'")
685
- break
 
 
 
686
 
687
  # Apply replacement if found
688
  if replacement_value:
@@ -695,36 +746,70 @@ def fix_management_summary_details_column(table, flat_json):
695
  else:
696
  replacement_text = str(replacement_value)
697
 
698
- print(f" 🎯 About to replace red text with: '{replacement_text[:100]}...'")
699
-
700
- # DEBUG: Show red text segments before replacement
701
- red_segments = extract_red_text_segments(details_cell)
702
- print(f" πŸ” Found {len(red_segments)} red text segments:")
703
- for i, segment in enumerate(red_segments):
704
- print(f" Segment {i+1}: '{segment['text'][:50]}...'")
705
 
 
706
  cell_replacements = replace_red_text_in_cell(details_cell, replacement_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
707
  replacements_made += cell_replacements
708
 
709
  if cell_replacements > 0:
710
- print(f" βœ… SUCCESSFULLY replaced '{standard_text}' details in {mgmt_type}")
711
- print(f" πŸ“‹ Used data from: '{matched_std}'")
712
 
713
  # Verify the replacement worked
714
  new_details_text = get_clean_text(details_cell).strip()
715
- print(f" πŸ” New details text: '{new_details_text[:100]}...'")
 
716
  else:
717
  print(f" ❌ Failed to replace red text in cell")
718
- print(f" πŸ” Cell still contains: '{get_clean_text(details_cell)[:100]}...'")
719
  else:
720
- print(f" ⚠️ No replacement found for '{standard_text}' in {mgmt_type}")
721
- print(f" πŸ“‹ Available standards: {list(mgmt_data.keys())}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
722
  else:
723
  print(f" ⚠️ Row {row_idx + 1} has insufficient columns ({len(row.cells)})")
724
 
725
- print(f" πŸ“Š Total management summary replacements: {replacements_made}")
726
  return replacements_made
727
 
 
728
  def find_best_standard_value(mgmt_data, candidate_keys):
729
  """ENHANCED: Find the best matching value for a standard from management data"""
730
  print(f" πŸ” Searching for candidates: {candidate_keys}")
 
570
  return replacements_made
571
 
572
  def fix_management_summary_details_column(table, flat_json):
573
+ """CORRECTED VERSION: Replace red text with UPDATED values from JSON (not old extracted values)"""
574
  replacements_made = 0
575
  print(f" 🎯 FIX: Management Summary DETAILS column processing")
576
+ print(f" πŸ“‹ NOTE: JSON contains UPDATED values to replace red text with")
577
 
578
  # Determine which type of management summary this is
579
  table_text = ""
 
601
  for mgmt_type in mgmt_types:
602
  print(f" βœ… Confirmed {mgmt_type} table processing")
603
 
604
+ # Build management data dict from flattened keys - these contain UPDATED values
605
  mgmt_data = {}
606
 
607
  # Look for flattened keys like "Mass Management Summary.Std 5. Verification"
608
+ # IMPORTANT: Prioritize longer, more detailed values over shorter ones
609
  for key, value in flat_json.items():
610
  if key.startswith(mgmt_type + "."):
611
  # Extract the standard part (after the management type)
612
  std_key = key[len(mgmt_type) + 1:] # Remove "Mass Management Summary." prefix
613
+
614
+ # Check if this is a longer, more detailed version than what we already have
615
+ if std_key in mgmt_data:
616
+ # Compare value lengths - prefer longer, more detailed content
617
+ existing_value = mgmt_data[std_key]
618
+ existing_length = len(str(existing_value)) if not isinstance(existing_value, list) else len(str(existing_value[0]) if existing_value else "")
619
+ new_length = len(str(value)) if not isinstance(value, list) else len(str(value[0]) if value else "")
620
+
621
+ if new_length > existing_length:
622
+ mgmt_data[std_key] = value
623
+ print(f" βœ… UPDATED to longer standard: '{std_key}' = {value}")
624
+ else:
625
+ print(f" ⏭️ Keeping existing longer standard: '{std_key}'")
626
+ else:
627
+ mgmt_data[std_key] = value
628
+ print(f" βœ… Found UPDATED standard: '{std_key}' = {value}")
629
 
630
  if not mgmt_data:
631
+ print(f" ⚠️ No UPDATED JSON data found for {mgmt_type}")
632
  continue
633
 
634
+ print(f" πŸ“‹ Processing {mgmt_type} with {len(mgmt_data)} updated standards: {list(mgmt_data.keys())}")
635
 
636
+ # Process each row looking for red text in details column
637
  print(f" πŸ” Analyzing all {len(table.rows)} rows in table:")
638
 
639
  for row_idx, row in enumerate(table.rows):
 
646
 
647
  print(f" πŸ“‹ Row {row_idx + 1}:")
648
  print(f" πŸ“„ Standard: '{standard_text}'")
649
+ print(f" πŸ“„ Current Details: '{details_text[:50]}...' (length: {len(details_text)})")
650
+ print(f" πŸ”΄ Has red text (OLD data): {has_red_text(details_cell)}")
651
 
652
+ # Skip header rows - be more specific about what constitutes a header
653
+ header_indicators = ["standard", "requirement", "details", mgmt_type.lower().split()[0]]
654
+ if any(header in standard_text_lower for header in header_indicators) and len(standard_text) < 50:
655
  print(f" ⏭️ Skipping header row")
656
  continue
657
 
658
+ # IMPORTANT: We want to replace red text (old data) with updated data from JSON
659
+ # Check if this row has red text in details cell - this is what we need to replace
660
  if not has_red_text(details_cell):
661
+ print(f" ⏭️ No red text found in details cell (already updated?), skipping")
662
  continue
663
 
664
+ print(f" 🎯 PROCESSING row {row_idx + 1} - REPLACING OLD red text with NEW data")
665
 
666
+ # Extract current red text (this is the OLD data we're replacing)
667
+ red_segments = extract_red_text_segments(details_cell)
668
+ current_red_text = ""
669
+ for segment in red_segments:
670
+ current_red_text += segment['text']
671
+
672
+ print(f" πŸ”΄ Current red text (OLD): '{current_red_text[:100]}...'")
673
+
674
+ # Find the UPDATED replacement value from JSON
675
  replacement_value = None
676
  matched_std = None
677
 
678
+ # Strategy 1: Extract standard number and match
679
  std_match = re.search(r'std\s*(\d+)', standard_text_lower)
680
  if std_match:
681
  std_num = std_match.group(1)
682
+ print(f" 🎯 Looking for UPDATED Standard {std_num} data")
683
 
684
+ # Look for matching standard in mgmt_data (contains UPDATED values)
685
  for std_key, std_value in mgmt_data.items():
686
  if f"std {std_num}" in std_key.lower():
687
  replacement_value = std_value
688
  matched_std = std_key
689
+ print(f" βœ… Found UPDATED data for std {std_num}: '{std_key}'")
690
  break
691
 
692
+ # Strategy 2: Keyword-based matching if std number doesn't work
693
  if not replacement_value:
694
+ print(f" πŸ” No std number match, trying keyword matching for UPDATED data...")
695
+
696
+ # More comprehensive keyword matching
697
+ keyword_mappings = {
698
+ "daily check": ["Std 1. Daily Check", "Daily Check"],
699
+ "verification": ["Std 5. Verification", "Verification"],
700
+ "internal review": ["Std 6. Internal Review", "Std 7. Internal Review", "Std 5. Internal Review", "Internal Review"],
701
+ "fault recording": ["Std 2. Fault Recording", "Fault Recording/ Reporting"],
702
+ "fault repair": ["Std 3. Fault Repair", "Fault Repair"],
703
+ "maintenance schedules": ["Std 4. Maintenance Schedules", "Maintenance Schedules"],
704
+ "responsibilities": ["Std 1. Responsibilities", "Std 6. Responsibilities"],
705
+ "vehicle control": ["Std 2. Vehicle Control", "Vehicle Control"],
706
+ "vehicle use": ["Std 3. Vehicle Use", "Vehicle Use"],
707
+ "records and documentation": ["Std 4. Records", "Std 5. Records", "Records and Documentation"],
708
+ "training": ["Std 8. Training", "Std 3. Training", "Training"],
709
+ "suspension": ["Std 8. Maintenance of Suspension", "Suspension"],
710
+ "scheduling": ["Std 1. Scheduling", "Scheduling"],
711
+ "health and wellbeing": ["Std 2. Health", "Health and wellbeing"],
712
+ "workplace conditions": ["Std 7. Workplace", "Workplace conditions"]
713
+ }
714
+
715
+ for keyword, candidates in keyword_mappings.items():
716
+ if keyword in standard_text_lower:
717
+ replacement_value = find_best_standard_value(mgmt_data, candidates)
718
+ if replacement_value:
719
+ matched_std = f"{keyword} related"
720
+ print(f" βœ… Found UPDATED data for keyword '{keyword}'")
721
+ break
722
 
723
+ # Strategy 3: Try exact standard name matching
724
+ if not replacement_value:
725
+ print(f" πŸ” Trying exact standard name matching for UPDATED data...")
726
+ # Clean the standard text for better matching
727
+ clean_standard = re.sub(r'\([^)]*\)', '', standard_text).strip()
728
+
729
  for std_key, std_value in mgmt_data.items():
730
+ # Try partial matching
731
+ if (clean_standard.lower() in std_key.lower() or
732
+ std_key.lower() in clean_standard.lower()):
733
+ replacement_value = std_value
734
+ matched_std = std_key
735
+ print(f" βœ… Found UPDATED data via partial match: '{std_key}'")
736
+ break
737
 
738
  # Apply replacement if found
739
  if replacement_value:
 
746
  else:
747
  replacement_text = str(replacement_value)
748
 
749
+ print(f" 🎯 REPLACING old red text with UPDATED data: '{replacement_text[:100]}...'")
 
 
 
 
 
 
750
 
751
+ # Use robust red text replacement
752
  cell_replacements = replace_red_text_in_cell(details_cell, replacement_text)
753
+
754
+ # FALLBACK: If replace_red_text_in_cell fails, try manual replacement
755
+ if cell_replacements == 0:
756
+ print(f" ⚠️ Standard replacement failed, trying manual approach...")
757
+
758
+ # Try to replace red text manually
759
+ for paragraph in details_cell.paragraphs:
760
+ for run in paragraph.runs:
761
+ if is_red(run) and run.text.strip():
762
+ print(f" πŸ”§ Manually replacing red run: '{run.text[:50]}...'")
763
+ run.text = replacement_text
764
+ run.font.color.rgb = RGBColor(0, 0, 0)
765
+ cell_replacements = 1
766
+ break
767
+ if cell_replacements > 0:
768
+ break
769
+
770
  replacements_made += cell_replacements
771
 
772
  if cell_replacements > 0:
773
+ print(f" βœ… SUCCESSFULLY UPDATED '{standard_text}' with NEW data in {mgmt_type}")
774
+ print(f" πŸ“‹ Used UPDATED data from: '{matched_std}'")
775
 
776
  # Verify the replacement worked
777
  new_details_text = get_clean_text(details_cell).strip()
778
+ print(f" πŸ” NEW details text: '{new_details_text[:100]}...'")
779
+ print(f" πŸŽ‰ OLD red text replaced with UPDATED data!")
780
  else:
781
  print(f" ❌ Failed to replace red text in cell")
782
+ print(f" πŸ” Cell still contains OLD data: '{get_clean_text(details_cell)[:100]}...'")
783
  else:
784
+ print(f" ⚠️ No UPDATED replacement found for '{standard_text}' in {mgmt_type}")
785
+ print(f" πŸ“‹ Available UPDATED standards: {list(mgmt_data.keys())}")
786
+
787
+ # FALLBACK: Try to find ANY available standard that might fit
788
+ if mgmt_data and current_red_text:
789
+ print(f" πŸ”„ Trying fallback - any available UPDATED standard...")
790
+ # Use the first available standard as a fallback
791
+ first_std_key = list(mgmt_data.keys())[0]
792
+ fallback_value = mgmt_data[first_std_key]
793
+
794
+ if isinstance(fallback_value, list):
795
+ fallback_text = "\n".join(str(item) for item in fallback_value)
796
+ else:
797
+ fallback_text = str(fallback_value)
798
+
799
+ print(f" πŸ”„ Using fallback UPDATED data: '{fallback_text[:100]}...'")
800
+
801
+ cell_replacements = replace_red_text_in_cell(details_cell, fallback_text)
802
+ if cell_replacements > 0:
803
+ replacements_made += cell_replacements
804
+ print(f" βœ… Applied fallback UPDATED data successfully")
805
+
806
  else:
807
  print(f" ⚠️ Row {row_idx + 1} has insufficient columns ({len(row.cells)})")
808
 
809
+ print(f" πŸ“Š Total management summary UPDATES: {replacements_made}")
810
  return replacements_made
811
 
812
+
813
  def find_best_standard_value(mgmt_data, candidate_keys):
814
  """ENHANCED: Find the best matching value for a standard from management data"""
815
  print(f" πŸ” Searching for candidates: {candidate_keys}")