Spaces:
Running
Running
Update updated_word.py
Browse files- updated_word.py +75 -69
updated_word.py
CHANGED
|
@@ -570,7 +570,7 @@ def handle_attendance_list_table_enhanced(table, flat_json):
|
|
| 570 |
return replacements_made
|
| 571 |
|
| 572 |
def fix_management_summary_details_column(table, flat_json):
|
| 573 |
-
"""
|
| 574 |
replacements_made = 0
|
| 575 |
print(f" π― FIX: Management Summary DETAILS column processing")
|
| 576 |
|
|
@@ -600,96 +600,86 @@ def fix_management_summary_details_column(table, flat_json):
|
|
| 600 |
for mgmt_type in mgmt_types:
|
| 601 |
print(f" β
Confirmed {mgmt_type} table processing")
|
| 602 |
|
| 603 |
-
# FIXED: Build management data dict from
|
| 604 |
mgmt_data = {}
|
| 605 |
|
| 606 |
-
#
|
| 607 |
-
|
| 608 |
-
|
| 609 |
-
|
| 610 |
-
|
| 611 |
-
|
| 612 |
-
|
| 613 |
-
# Strategy 2: Look for flattened keys like "Mass Management Summary.Std 5. Verification"
|
| 614 |
-
if not mgmt_data:
|
| 615 |
-
for key, value in flat_json.items():
|
| 616 |
-
if key.startswith(mgmt_type + "."):
|
| 617 |
-
# Extract the standard part (after the management type)
|
| 618 |
-
std_key = key[len(mgmt_type) + 1:] # Remove "Mass Management Summary." prefix
|
| 619 |
-
mgmt_data[std_key] = value
|
| 620 |
-
print(f" β
Found flattened standard: '{std_key}' = {value}")
|
| 621 |
-
|
| 622 |
-
if mgmt_data:
|
| 623 |
-
print(f" β
Collected {len(mgmt_data)} standards from flattened keys for {mgmt_type}")
|
| 624 |
-
|
| 625 |
-
# Strategy 3: Search for keys that contain the management type
|
| 626 |
-
if not mgmt_data:
|
| 627 |
-
for key, value in flat_json.items():
|
| 628 |
-
if mgmt_type.lower().replace(" ", "") in key.lower().replace(" ", ""):
|
| 629 |
-
if isinstance(value, dict):
|
| 630 |
-
mgmt_data = value
|
| 631 |
-
print(f" β
Found data using key variation: '{key}'")
|
| 632 |
-
break
|
| 633 |
|
| 634 |
if not mgmt_data:
|
| 635 |
print(f" β οΈ No JSON data found for {mgmt_type}")
|
| 636 |
continue
|
| 637 |
-
|
| 638 |
print(f" π Processing {mgmt_type} with standards: {list(mgmt_data.keys())}")
|
| 639 |
|
| 640 |
-
# Process the table rows
|
| 641 |
for row_idx, row in enumerate(table.rows):
|
| 642 |
if len(row.cells) >= 2:
|
| 643 |
standard_cell = row.cells[0]
|
| 644 |
details_cell = row.cells[1]
|
| 645 |
-
standard_text = get_clean_text(standard_cell).strip()
|
|
|
|
| 646 |
|
| 647 |
# Skip header rows
|
| 648 |
-
if
|
| 649 |
continue
|
| 650 |
|
|
|
|
| 651 |
if not has_red_text(details_cell):
|
| 652 |
continue
|
| 653 |
|
| 654 |
-
print(f" π Processing
|
| 655 |
|
| 656 |
-
# FIXED:
|
| 657 |
replacement_value = None
|
|
|
|
| 658 |
|
| 659 |
-
# Strategy 1:
|
| 660 |
-
|
| 661 |
-
|
| 662 |
-
|
|
|
|
| 663 |
|
| 664 |
-
|
| 665 |
-
|
| 666 |
-
|
| 667 |
-
|
| 668 |
-
|
| 669 |
-
|
| 670 |
-
|
| 671 |
-
|
| 672 |
-
elif "std 6" in standard_text or "internal review" in standard_text:
|
| 673 |
-
replacement_value = find_best_standard_value(mgmt_data, ["Std 6. Internal Review", "Std 6", "Internal Review"])
|
| 674 |
-
print(f" π― Looking for Std 6 Internal Review")
|
| 675 |
-
|
| 676 |
-
elif "std 7" in standard_text:
|
| 677 |
-
replacement_value = find_best_standard_value(mgmt_data, ["Std 7. Internal Review", "Std 7", "Internal Review"])
|
| 678 |
-
print(f" π― Looking for Std 7 Internal Review")
|
| 679 |
|
| 680 |
-
# Strategy 2:
|
| 681 |
if not replacement_value:
|
| 682 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 683 |
for std_key, std_value in mgmt_data.items():
|
| 684 |
-
|
| 685 |
-
|
| 686 |
-
|
| 687 |
-
|
| 688 |
-
key_match = re.search(r'std\s*(\d+)', std_key_lower)
|
| 689 |
-
if std_match and key_match and std_match.group(1) == key_match.group(1):
|
| 690 |
-
replacement_value = std_value
|
| 691 |
-
print(f" β
Fuzzy matched by std number: {std_key}")
|
| 692 |
-
break
|
| 693 |
|
| 694 |
# Apply replacement if found
|
| 695 |
if replacement_value:
|
|
@@ -702,18 +692,24 @@ def fix_management_summary_details_column(table, flat_json):
|
|
| 702 |
else:
|
| 703 |
replacement_text = str(replacement_value)
|
| 704 |
|
|
|
|
| 705 |
cell_replacements = replace_red_text_in_cell(details_cell, replacement_text)
|
| 706 |
replacements_made += cell_replacements
|
| 707 |
-
|
| 708 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 709 |
else:
|
| 710 |
print(f" β οΈ No replacement found for '{standard_text}' in {mgmt_type}")
|
| 711 |
print(f" π Available standards: {list(mgmt_data.keys())}")
|
| 712 |
|
|
|
|
| 713 |
return replacements_made
|
| 714 |
|
| 715 |
def find_best_standard_value(mgmt_data, candidate_keys):
|
| 716 |
-
"""
|
| 717 |
print(f" π Searching for candidates: {candidate_keys}")
|
| 718 |
print(f" π In available keys: {list(mgmt_data.keys())}")
|
| 719 |
|
|
@@ -730,13 +726,23 @@ def find_best_standard_value(mgmt_data, candidate_keys):
|
|
| 730 |
print(f" β
Case-insensitive match found: '{key}' for '{candidate}'")
|
| 731 |
return value
|
| 732 |
|
| 733 |
-
# Partial match
|
| 734 |
for candidate in candidate_keys:
|
| 735 |
for key, value in mgmt_data.items():
|
| 736 |
if candidate.lower() in key.lower() or key.lower() in candidate.lower():
|
| 737 |
print(f" β
Partial match found: '{key}' for '{candidate}'")
|
| 738 |
return value
|
| 739 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 740 |
print(f" β No match found for any candidate")
|
| 741 |
return None
|
| 742 |
|
|
|
|
| 570 |
return replacements_made
|
| 571 |
|
| 572 |
def fix_management_summary_details_column(table, flat_json):
|
| 573 |
+
"""FINAL FIX: Enhanced management summary processing that processes ALL standards correctly"""
|
| 574 |
replacements_made = 0
|
| 575 |
print(f" π― FIX: Management Summary DETAILS column processing")
|
| 576 |
|
|
|
|
| 600 |
for mgmt_type in mgmt_types:
|
| 601 |
print(f" β
Confirmed {mgmt_type} table processing")
|
| 602 |
|
| 603 |
+
# FIXED: Build management data dict from flattened keys
|
| 604 |
mgmt_data = {}
|
| 605 |
|
| 606 |
+
# Look for flattened keys like "Mass Management Summary.Std 5. Verification"
|
| 607 |
+
for key, value in flat_json.items():
|
| 608 |
+
if key.startswith(mgmt_type + "."):
|
| 609 |
+
# Extract the standard part (after the management type)
|
| 610 |
+
std_key = key[len(mgmt_type) + 1:] # Remove "Mass Management Summary." prefix
|
| 611 |
+
mgmt_data[std_key] = value
|
| 612 |
+
print(f" β
Found flattened standard: '{std_key}' = {value}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 613 |
|
| 614 |
if not mgmt_data:
|
| 615 |
print(f" β οΈ No JSON data found for {mgmt_type}")
|
| 616 |
continue
|
| 617 |
+
|
| 618 |
print(f" π Processing {mgmt_type} with standards: {list(mgmt_data.keys())}")
|
| 619 |
|
| 620 |
+
# Process the table rows - FIXED: Better row processing
|
| 621 |
for row_idx, row in enumerate(table.rows):
|
| 622 |
if len(row.cells) >= 2:
|
| 623 |
standard_cell = row.cells[0]
|
| 624 |
details_cell = row.cells[1]
|
| 625 |
+
standard_text = get_clean_text(standard_cell).strip()
|
| 626 |
+
standard_text_lower = standard_text.lower()
|
| 627 |
|
| 628 |
# Skip header rows
|
| 629 |
+
if any(header in standard_text_lower for header in ["standard", "requirement", "details", "management"]):
|
| 630 |
continue
|
| 631 |
|
| 632 |
+
# Only process cells with red text in details column
|
| 633 |
if not has_red_text(details_cell):
|
| 634 |
continue
|
| 635 |
|
| 636 |
+
print(f" π Processing row {row_idx + 1}: '{standard_text}'")
|
| 637 |
|
| 638 |
+
# FIXED: Comprehensive standard matching
|
| 639 |
replacement_value = None
|
| 640 |
+
matched_std = None
|
| 641 |
|
| 642 |
+
# Strategy 1: Extract standard number and match
|
| 643 |
+
std_match = re.search(r'std\s*(\d+)', standard_text_lower)
|
| 644 |
+
if std_match:
|
| 645 |
+
std_num = std_match.group(1)
|
| 646 |
+
print(f" π― Looking for Standard {std_num}")
|
| 647 |
|
| 648 |
+
# Look for matching standard in mgmt_data
|
| 649 |
+
for std_key, std_value in mgmt_data.items():
|
| 650 |
+
if f"std {std_num}" in std_key.lower():
|
| 651 |
+
replacement_value = std_value
|
| 652 |
+
matched_std = std_key
|
| 653 |
+
print(f" β
Found match by std number: '{std_key}'")
|
| 654 |
+
break
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 655 |
|
| 656 |
+
# Strategy 2: Keyword-based matching if std number doesn't work
|
| 657 |
if not replacement_value:
|
| 658 |
+
if "daily" in standard_text_lower and "check" in standard_text_lower:
|
| 659 |
+
replacement_value = find_best_standard_value(mgmt_data, ["Std 1. Daily Check", "Daily Check"])
|
| 660 |
+
matched_std = "Daily Check related"
|
| 661 |
+
elif "verification" in standard_text_lower:
|
| 662 |
+
replacement_value = find_best_standard_value(mgmt_data, ["Std 5. Verification", "Verification"])
|
| 663 |
+
matched_std = "Verification related"
|
| 664 |
+
elif "internal review" in standard_text_lower:
|
| 665 |
+
replacement_value = find_best_standard_value(mgmt_data, ["Std 6. Internal Review", "Std 7. Internal Review", "Std 5. Internal Review", "Internal Review"])
|
| 666 |
+
matched_std = "Internal Review related"
|
| 667 |
+
elif "fault" in standard_text_lower and "recording" in standard_text_lower:
|
| 668 |
+
replacement_value = find_best_standard_value(mgmt_data, ["Std 2. Fault Recording", "Fault Recording"])
|
| 669 |
+
matched_std = "Fault Recording related"
|
| 670 |
+
elif "fault" in standard_text_lower and "repair" in standard_text_lower:
|
| 671 |
+
replacement_value = find_best_standard_value(mgmt_data, ["Std 3. Fault Repair", "Fault Repair"])
|
| 672 |
+
matched_std = "Fault Repair related"
|
| 673 |
+
|
| 674 |
+
# Strategy 3: Try all available standards if nothing specific matches
|
| 675 |
+
if not replacement_value and mgmt_data:
|
| 676 |
+
print(f" π No specific match, trying all available standards...")
|
| 677 |
+
# Just take the first available standard for this row
|
| 678 |
for std_key, std_value in mgmt_data.items():
|
| 679 |
+
replacement_value = std_value
|
| 680 |
+
matched_std = std_key
|
| 681 |
+
print(f" β‘ Using available standard: '{std_key}'")
|
| 682 |
+
break
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 683 |
|
| 684 |
# Apply replacement if found
|
| 685 |
if replacement_value:
|
|
|
|
| 692 |
else:
|
| 693 |
replacement_text = str(replacement_value)
|
| 694 |
|
| 695 |
+
print(f" π― About to replace red text with: '{replacement_text[:100]}...'")
|
| 696 |
cell_replacements = replace_red_text_in_cell(details_cell, replacement_text)
|
| 697 |
replacements_made += cell_replacements
|
| 698 |
+
|
| 699 |
+
if cell_replacements > 0:
|
| 700 |
+
print(f" β
SUCCESSFULLY replaced '{standard_text}' details in {mgmt_type}")
|
| 701 |
+
print(f" π Used data from: '{matched_std}'")
|
| 702 |
+
else:
|
| 703 |
+
print(f" β Failed to replace red text in cell")
|
| 704 |
else:
|
| 705 |
print(f" β οΈ No replacement found for '{standard_text}' in {mgmt_type}")
|
| 706 |
print(f" π Available standards: {list(mgmt_data.keys())}")
|
| 707 |
|
| 708 |
+
print(f" π Total management summary replacements: {replacements_made}")
|
| 709 |
return replacements_made
|
| 710 |
|
| 711 |
def find_best_standard_value(mgmt_data, candidate_keys):
|
| 712 |
+
"""ENHANCED: Find the best matching value for a standard from management data"""
|
| 713 |
print(f" π Searching for candidates: {candidate_keys}")
|
| 714 |
print(f" π In available keys: {list(mgmt_data.keys())}")
|
| 715 |
|
|
|
|
| 726 |
print(f" β
Case-insensitive match found: '{key}' for '{candidate}'")
|
| 727 |
return value
|
| 728 |
|
| 729 |
+
# Partial match (contains)
|
| 730 |
for candidate in candidate_keys:
|
| 731 |
for key, value in mgmt_data.items():
|
| 732 |
if candidate.lower() in key.lower() or key.lower() in candidate.lower():
|
| 733 |
print(f" β
Partial match found: '{key}' for '{candidate}'")
|
| 734 |
return value
|
| 735 |
|
| 736 |
+
# Extract number and match by number
|
| 737 |
+
for candidate in candidate_keys:
|
| 738 |
+
candidate_num = re.search(r'(\d+)', candidate)
|
| 739 |
+
if candidate_num:
|
| 740 |
+
for key, value in mgmt_data.items():
|
| 741 |
+
key_num = re.search(r'(\d+)', key)
|
| 742 |
+
if key_num and candidate_num.group(1) == key_num.group(1):
|
| 743 |
+
print(f" β
Number match found: '{key}' for '{candidate}'")
|
| 744 |
+
return value
|
| 745 |
+
|
| 746 |
print(f" β No match found for any candidate")
|
| 747 |
return None
|
| 748 |
|