Spaces:
Running
Running
Update updated_word.py
Browse files- updated_word.py +135 -50
updated_word.py
CHANGED
|
@@ -570,9 +570,10 @@ def handle_attendance_list_table_enhanced(table, flat_json):
|
|
| 570 |
return replacements_made
|
| 571 |
|
| 572 |
def fix_management_summary_details_column(table, flat_json):
|
| 573 |
-
"""
|
| 574 |
replacements_made = 0
|
| 575 |
print(f" π― FIX: Management Summary DETAILS column processing")
|
|
|
|
| 576 |
|
| 577 |
# Determine which type of management summary this is
|
| 578 |
table_text = ""
|
|
@@ -600,24 +601,39 @@ def fix_management_summary_details_column(table, flat_json):
|
|
| 600 |
for mgmt_type in mgmt_types:
|
| 601 |
print(f" β
Confirmed {mgmt_type} table processing")
|
| 602 |
|
| 603 |
-
# Build management data dict from flattened keys
|
| 604 |
mgmt_data = {}
|
| 605 |
|
| 606 |
# Look for flattened keys like "Mass Management Summary.Std 5. Verification"
|
|
|
|
| 607 |
for key, value in flat_json.items():
|
| 608 |
if key.startswith(mgmt_type + "."):
|
| 609 |
# Extract the standard part (after the management type)
|
| 610 |
std_key = key[len(mgmt_type) + 1:] # Remove "Mass Management Summary." prefix
|
| 611 |
-
|
| 612 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 613 |
|
| 614 |
if not mgmt_data:
|
| 615 |
-
print(f" β οΈ No JSON data found for {mgmt_type}")
|
| 616 |
continue
|
| 617 |
|
| 618 |
-
print(f" π Processing {mgmt_type} with standards: {list(mgmt_data.keys())}")
|
| 619 |
|
| 620 |
-
#
|
| 621 |
print(f" π Analyzing all {len(table.rows)} rows in table:")
|
| 622 |
|
| 623 |
for row_idx, row in enumerate(table.rows):
|
|
@@ -630,59 +646,94 @@ def fix_management_summary_details_column(table, flat_json):
|
|
| 630 |
|
| 631 |
print(f" π Row {row_idx + 1}:")
|
| 632 |
print(f" π Standard: '{standard_text}'")
|
| 633 |
-
print(f" π Details: '{details_text[:50]}...' (length: {len(details_text)})")
|
| 634 |
-
print(f" π΄ Has red text: {has_red_text(details_cell)}")
|
| 635 |
|
| 636 |
-
# Skip header rows
|
| 637 |
-
|
|
|
|
| 638 |
print(f" βοΈ Skipping header row")
|
| 639 |
continue
|
| 640 |
|
| 641 |
-
#
|
|
|
|
| 642 |
if not has_red_text(details_cell):
|
| 643 |
-
print(f" βοΈ No red text found, skipping")
|
| 644 |
continue
|
| 645 |
|
| 646 |
-
print(f" π― PROCESSING row {row_idx + 1}
|
| 647 |
|
| 648 |
-
# Extract
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 649 |
replacement_value = None
|
| 650 |
matched_std = None
|
| 651 |
|
|
|
|
| 652 |
std_match = re.search(r'std\s*(\d+)', standard_text_lower)
|
| 653 |
if std_match:
|
| 654 |
std_num = std_match.group(1)
|
| 655 |
-
print(f" π― Looking for Standard {std_num}")
|
| 656 |
|
| 657 |
-
# Look for matching standard in mgmt_data
|
| 658 |
for std_key, std_value in mgmt_data.items():
|
| 659 |
if f"std {std_num}" in std_key.lower():
|
| 660 |
replacement_value = std_value
|
| 661 |
matched_std = std_key
|
| 662 |
-
print(f" β
Found
|
| 663 |
break
|
| 664 |
|
| 665 |
-
# Keyword-based matching if std number doesn't work
|
| 666 |
if not replacement_value:
|
| 667 |
-
print(f" π No std number match, trying keyword matching...")
|
| 668 |
-
|
| 669 |
-
|
| 670 |
-
|
| 671 |
-
|
| 672 |
-
|
| 673 |
-
|
| 674 |
-
|
| 675 |
-
|
| 676 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 677 |
|
| 678 |
-
#
|
| 679 |
-
if not replacement_value
|
| 680 |
-
print(f" π
|
|
|
|
|
|
|
|
|
|
| 681 |
for std_key, std_value in mgmt_data.items():
|
| 682 |
-
|
| 683 |
-
|
| 684 |
-
|
| 685 |
-
|
|
|
|
|
|
|
|
|
|
| 686 |
|
| 687 |
# Apply replacement if found
|
| 688 |
if replacement_value:
|
|
@@ -695,36 +746,70 @@ def fix_management_summary_details_column(table, flat_json):
|
|
| 695 |
else:
|
| 696 |
replacement_text = str(replacement_value)
|
| 697 |
|
| 698 |
-
print(f" π―
|
| 699 |
-
|
| 700 |
-
# DEBUG: Show red text segments before replacement
|
| 701 |
-
red_segments = extract_red_text_segments(details_cell)
|
| 702 |
-
print(f" π Found {len(red_segments)} red text segments:")
|
| 703 |
-
for i, segment in enumerate(red_segments):
|
| 704 |
-
print(f" Segment {i+1}: '{segment['text'][:50]}...'")
|
| 705 |
|
|
|
|
| 706 |
cell_replacements = replace_red_text_in_cell(details_cell, replacement_text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 707 |
replacements_made += cell_replacements
|
| 708 |
|
| 709 |
if cell_replacements > 0:
|
| 710 |
-
print(f" β
SUCCESSFULLY
|
| 711 |
-
print(f" π Used data from: '{matched_std}'")
|
| 712 |
|
| 713 |
# Verify the replacement worked
|
| 714 |
new_details_text = get_clean_text(details_cell).strip()
|
| 715 |
-
print(f" π
|
|
|
|
| 716 |
else:
|
| 717 |
print(f" β Failed to replace red text in cell")
|
| 718 |
-
print(f" π Cell still contains: '{get_clean_text(details_cell)[:100]}...'")
|
| 719 |
else:
|
| 720 |
-
print(f" β οΈ No replacement found for '{standard_text}' in {mgmt_type}")
|
| 721 |
-
print(f" π Available standards: {list(mgmt_data.keys())}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 722 |
else:
|
| 723 |
print(f" β οΈ Row {row_idx + 1} has insufficient columns ({len(row.cells)})")
|
| 724 |
|
| 725 |
-
print(f" π Total management summary
|
| 726 |
return replacements_made
|
| 727 |
|
|
|
|
| 728 |
def find_best_standard_value(mgmt_data, candidate_keys):
|
| 729 |
"""ENHANCED: Find the best matching value for a standard from management data"""
|
| 730 |
print(f" π Searching for candidates: {candidate_keys}")
|
|
|
|
| 570 |
return replacements_made
|
| 571 |
|
| 572 |
def fix_management_summary_details_column(table, flat_json):
|
| 573 |
+
"""CORRECTED VERSION: Replace red text with UPDATED values from JSON (not old extracted values)"""
|
| 574 |
replacements_made = 0
|
| 575 |
print(f" π― FIX: Management Summary DETAILS column processing")
|
| 576 |
+
print(f" π NOTE: JSON contains UPDATED values to replace red text with")
|
| 577 |
|
| 578 |
# Determine which type of management summary this is
|
| 579 |
table_text = ""
|
|
|
|
| 601 |
for mgmt_type in mgmt_types:
|
| 602 |
print(f" β
Confirmed {mgmt_type} table processing")
|
| 603 |
|
| 604 |
+
# Build management data dict from flattened keys - these contain UPDATED values
|
| 605 |
mgmt_data = {}
|
| 606 |
|
| 607 |
# Look for flattened keys like "Mass Management Summary.Std 5. Verification"
|
| 608 |
+
# IMPORTANT: Prioritize longer, more detailed values over shorter ones
|
| 609 |
for key, value in flat_json.items():
|
| 610 |
if key.startswith(mgmt_type + "."):
|
| 611 |
# Extract the standard part (after the management type)
|
| 612 |
std_key = key[len(mgmt_type) + 1:] # Remove "Mass Management Summary." prefix
|
| 613 |
+
|
| 614 |
+
# Check if this is a longer, more detailed version than what we already have
|
| 615 |
+
if std_key in mgmt_data:
|
| 616 |
+
# Compare value lengths - prefer longer, more detailed content
|
| 617 |
+
existing_value = mgmt_data[std_key]
|
| 618 |
+
existing_length = len(str(existing_value)) if not isinstance(existing_value, list) else len(str(existing_value[0]) if existing_value else "")
|
| 619 |
+
new_length = len(str(value)) if not isinstance(value, list) else len(str(value[0]) if value else "")
|
| 620 |
+
|
| 621 |
+
if new_length > existing_length:
|
| 622 |
+
mgmt_data[std_key] = value
|
| 623 |
+
print(f" β
UPDATED to longer standard: '{std_key}' = {value}")
|
| 624 |
+
else:
|
| 625 |
+
print(f" βοΈ Keeping existing longer standard: '{std_key}'")
|
| 626 |
+
else:
|
| 627 |
+
mgmt_data[std_key] = value
|
| 628 |
+
print(f" β
Found UPDATED standard: '{std_key}' = {value}")
|
| 629 |
|
| 630 |
if not mgmt_data:
|
| 631 |
+
print(f" β οΈ No UPDATED JSON data found for {mgmt_type}")
|
| 632 |
continue
|
| 633 |
|
| 634 |
+
print(f" π Processing {mgmt_type} with {len(mgmt_data)} updated standards: {list(mgmt_data.keys())}")
|
| 635 |
|
| 636 |
+
# Process each row looking for red text in details column
|
| 637 |
print(f" π Analyzing all {len(table.rows)} rows in table:")
|
| 638 |
|
| 639 |
for row_idx, row in enumerate(table.rows):
|
|
|
|
| 646 |
|
| 647 |
print(f" π Row {row_idx + 1}:")
|
| 648 |
print(f" π Standard: '{standard_text}'")
|
| 649 |
+
print(f" π Current Details: '{details_text[:50]}...' (length: {len(details_text)})")
|
| 650 |
+
print(f" π΄ Has red text (OLD data): {has_red_text(details_cell)}")
|
| 651 |
|
| 652 |
+
# Skip header rows - be more specific about what constitutes a header
|
| 653 |
+
header_indicators = ["standard", "requirement", "details", mgmt_type.lower().split()[0]]
|
| 654 |
+
if any(header in standard_text_lower for header in header_indicators) and len(standard_text) < 50:
|
| 655 |
print(f" βοΈ Skipping header row")
|
| 656 |
continue
|
| 657 |
|
| 658 |
+
# IMPORTANT: We want to replace red text (old data) with updated data from JSON
|
| 659 |
+
# Check if this row has red text in details cell - this is what we need to replace
|
| 660 |
if not has_red_text(details_cell):
|
| 661 |
+
print(f" βοΈ No red text found in details cell (already updated?), skipping")
|
| 662 |
continue
|
| 663 |
|
| 664 |
+
print(f" π― PROCESSING row {row_idx + 1} - REPLACING OLD red text with NEW data")
|
| 665 |
|
| 666 |
+
# Extract current red text (this is the OLD data we're replacing)
|
| 667 |
+
red_segments = extract_red_text_segments(details_cell)
|
| 668 |
+
current_red_text = ""
|
| 669 |
+
for segment in red_segments:
|
| 670 |
+
current_red_text += segment['text']
|
| 671 |
+
|
| 672 |
+
print(f" π΄ Current red text (OLD): '{current_red_text[:100]}...'")
|
| 673 |
+
|
| 674 |
+
# Find the UPDATED replacement value from JSON
|
| 675 |
replacement_value = None
|
| 676 |
matched_std = None
|
| 677 |
|
| 678 |
+
# Strategy 1: Extract standard number and match
|
| 679 |
std_match = re.search(r'std\s*(\d+)', standard_text_lower)
|
| 680 |
if std_match:
|
| 681 |
std_num = std_match.group(1)
|
| 682 |
+
print(f" π― Looking for UPDATED Standard {std_num} data")
|
| 683 |
|
| 684 |
+
# Look for matching standard in mgmt_data (contains UPDATED values)
|
| 685 |
for std_key, std_value in mgmt_data.items():
|
| 686 |
if f"std {std_num}" in std_key.lower():
|
| 687 |
replacement_value = std_value
|
| 688 |
matched_std = std_key
|
| 689 |
+
print(f" β
Found UPDATED data for std {std_num}: '{std_key}'")
|
| 690 |
break
|
| 691 |
|
| 692 |
+
# Strategy 2: Keyword-based matching if std number doesn't work
|
| 693 |
if not replacement_value:
|
| 694 |
+
print(f" π No std number match, trying keyword matching for UPDATED data...")
|
| 695 |
+
|
| 696 |
+
# More comprehensive keyword matching
|
| 697 |
+
keyword_mappings = {
|
| 698 |
+
"daily check": ["Std 1. Daily Check", "Daily Check"],
|
| 699 |
+
"verification": ["Std 5. Verification", "Verification"],
|
| 700 |
+
"internal review": ["Std 6. Internal Review", "Std 7. Internal Review", "Std 5. Internal Review", "Internal Review"],
|
| 701 |
+
"fault recording": ["Std 2. Fault Recording", "Fault Recording/ Reporting"],
|
| 702 |
+
"fault repair": ["Std 3. Fault Repair", "Fault Repair"],
|
| 703 |
+
"maintenance schedules": ["Std 4. Maintenance Schedules", "Maintenance Schedules"],
|
| 704 |
+
"responsibilities": ["Std 1. Responsibilities", "Std 6. Responsibilities"],
|
| 705 |
+
"vehicle control": ["Std 2. Vehicle Control", "Vehicle Control"],
|
| 706 |
+
"vehicle use": ["Std 3. Vehicle Use", "Vehicle Use"],
|
| 707 |
+
"records and documentation": ["Std 4. Records", "Std 5. Records", "Records and Documentation"],
|
| 708 |
+
"training": ["Std 8. Training", "Std 3. Training", "Training"],
|
| 709 |
+
"suspension": ["Std 8. Maintenance of Suspension", "Suspension"],
|
| 710 |
+
"scheduling": ["Std 1. Scheduling", "Scheduling"],
|
| 711 |
+
"health and wellbeing": ["Std 2. Health", "Health and wellbeing"],
|
| 712 |
+
"workplace conditions": ["Std 7. Workplace", "Workplace conditions"]
|
| 713 |
+
}
|
| 714 |
+
|
| 715 |
+
for keyword, candidates in keyword_mappings.items():
|
| 716 |
+
if keyword in standard_text_lower:
|
| 717 |
+
replacement_value = find_best_standard_value(mgmt_data, candidates)
|
| 718 |
+
if replacement_value:
|
| 719 |
+
matched_std = f"{keyword} related"
|
| 720 |
+
print(f" β
Found UPDATED data for keyword '{keyword}'")
|
| 721 |
+
break
|
| 722 |
|
| 723 |
+
# Strategy 3: Try exact standard name matching
|
| 724 |
+
if not replacement_value:
|
| 725 |
+
print(f" π Trying exact standard name matching for UPDATED data...")
|
| 726 |
+
# Clean the standard text for better matching
|
| 727 |
+
clean_standard = re.sub(r'\([^)]*\)', '', standard_text).strip()
|
| 728 |
+
|
| 729 |
for std_key, std_value in mgmt_data.items():
|
| 730 |
+
# Try partial matching
|
| 731 |
+
if (clean_standard.lower() in std_key.lower() or
|
| 732 |
+
std_key.lower() in clean_standard.lower()):
|
| 733 |
+
replacement_value = std_value
|
| 734 |
+
matched_std = std_key
|
| 735 |
+
print(f" β
Found UPDATED data via partial match: '{std_key}'")
|
| 736 |
+
break
|
| 737 |
|
| 738 |
# Apply replacement if found
|
| 739 |
if replacement_value:
|
|
|
|
| 746 |
else:
|
| 747 |
replacement_text = str(replacement_value)
|
| 748 |
|
| 749 |
+
print(f" π― REPLACING old red text with UPDATED data: '{replacement_text[:100]}...'")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 750 |
|
| 751 |
+
# Use robust red text replacement
|
| 752 |
cell_replacements = replace_red_text_in_cell(details_cell, replacement_text)
|
| 753 |
+
|
| 754 |
+
# FALLBACK: If replace_red_text_in_cell fails, try manual replacement
|
| 755 |
+
if cell_replacements == 0:
|
| 756 |
+
print(f" β οΈ Standard replacement failed, trying manual approach...")
|
| 757 |
+
|
| 758 |
+
# Try to replace red text manually
|
| 759 |
+
for paragraph in details_cell.paragraphs:
|
| 760 |
+
for run in paragraph.runs:
|
| 761 |
+
if is_red(run) and run.text.strip():
|
| 762 |
+
print(f" π§ Manually replacing red run: '{run.text[:50]}...'")
|
| 763 |
+
run.text = replacement_text
|
| 764 |
+
run.font.color.rgb = RGBColor(0, 0, 0)
|
| 765 |
+
cell_replacements = 1
|
| 766 |
+
break
|
| 767 |
+
if cell_replacements > 0:
|
| 768 |
+
break
|
| 769 |
+
|
| 770 |
replacements_made += cell_replacements
|
| 771 |
|
| 772 |
if cell_replacements > 0:
|
| 773 |
+
print(f" β
SUCCESSFULLY UPDATED '{standard_text}' with NEW data in {mgmt_type}")
|
| 774 |
+
print(f" π Used UPDATED data from: '{matched_std}'")
|
| 775 |
|
| 776 |
# Verify the replacement worked
|
| 777 |
new_details_text = get_clean_text(details_cell).strip()
|
| 778 |
+
print(f" π NEW details text: '{new_details_text[:100]}...'")
|
| 779 |
+
print(f" π OLD red text replaced with UPDATED data!")
|
| 780 |
else:
|
| 781 |
print(f" β Failed to replace red text in cell")
|
| 782 |
+
print(f" π Cell still contains OLD data: '{get_clean_text(details_cell)[:100]}...'")
|
| 783 |
else:
|
| 784 |
+
print(f" β οΈ No UPDATED replacement found for '{standard_text}' in {mgmt_type}")
|
| 785 |
+
print(f" π Available UPDATED standards: {list(mgmt_data.keys())}")
|
| 786 |
+
|
| 787 |
+
# FALLBACK: Try to find ANY available standard that might fit
|
| 788 |
+
if mgmt_data and current_red_text:
|
| 789 |
+
print(f" π Trying fallback - any available UPDATED standard...")
|
| 790 |
+
# Use the first available standard as a fallback
|
| 791 |
+
first_std_key = list(mgmt_data.keys())[0]
|
| 792 |
+
fallback_value = mgmt_data[first_std_key]
|
| 793 |
+
|
| 794 |
+
if isinstance(fallback_value, list):
|
| 795 |
+
fallback_text = "\n".join(str(item) for item in fallback_value)
|
| 796 |
+
else:
|
| 797 |
+
fallback_text = str(fallback_value)
|
| 798 |
+
|
| 799 |
+
print(f" π Using fallback UPDATED data: '{fallback_text[:100]}...'")
|
| 800 |
+
|
| 801 |
+
cell_replacements = replace_red_text_in_cell(details_cell, fallback_text)
|
| 802 |
+
if cell_replacements > 0:
|
| 803 |
+
replacements_made += cell_replacements
|
| 804 |
+
print(f" β
Applied fallback UPDATED data successfully")
|
| 805 |
+
|
| 806 |
else:
|
| 807 |
print(f" β οΈ Row {row_idx + 1} has insufficient columns ({len(row.cells)})")
|
| 808 |
|
| 809 |
+
print(f" π Total management summary UPDATES: {replacements_made}")
|
| 810 |
return replacements_made
|
| 811 |
|
| 812 |
+
|
| 813 |
def find_best_standard_value(mgmt_data, candidate_keys):
|
| 814 |
"""ENHANCED: Find the best matching value for a standard from management data"""
|
| 815 |
print(f" π Searching for candidates: {candidate_keys}")
|