Shami96 commited on
Commit
4e326f4
·
verified ·
1 Parent(s): 1f4d3cf

Update updated_word.py

Browse files
Files changed (1) hide show
  1. updated_word.py +107 -31
updated_word.py CHANGED
@@ -878,6 +878,7 @@ def handle_management_summary_fix(cell, flat_json):
878
  def handle_operator_declaration_fix(table, flat_json):
879
  """Wrapper for small declaration tables. Delegate to canonical fix first.
880
  If canonical did not change anything, fall back to the small-table auditor handling.
 
881
  """
882
  replacements_made = 0
883
 
@@ -886,7 +887,7 @@ def handle_operator_declaration_fix(table, flat_json):
886
  print(f" ⏭️ Skipping - Operator Declaration table already processed")
887
  return 0
888
 
889
- # only intended for small tables; if large, skip (your original condition)
890
  if len(table.rows) > 4:
891
  return 0
892
 
@@ -897,46 +898,121 @@ def handle_operator_declaration_fix(table, flat_json):
897
  # canonical handled it and set the processed flag
898
  return replacements_made
899
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
900
  # fallback: original small-table behaviour (auditor declaration etc.)
901
- # (This mirrors your earlier auditor-specific logic but will not run if canonical updated table)
902
  print(f" 🎯 Processing other declaration table (fallback small-table behavior)")
903
 
904
  for row_idx, row in enumerate(table.rows):
905
  for cell_idx, cell in enumerate(row.cells):
906
- if has_red_text(cell):
907
- declaration_fields = [
908
- "NHVAS Approved Auditor Declaration.Print Name",
909
- "Auditor name",
910
- "Signature",
911
- "Date"
912
- ]
913
 
914
- replaced = False
915
- for field in declaration_fields:
916
- field_value = find_matching_json_value(field, flat_json)
917
- if field_value is not None:
918
- replacement_text = get_value_as_string(field_value, field)
919
- if replacement_text.strip():
920
- cell_replacements = replace_red_text_in_cell(cell, replacement_text)
921
- if cell_replacements > 0:
922
- replacements_made += cell_replacements
923
- print(f" ✅ Fixed declaration field: {field}")
924
- replaced = True
925
- break
 
926
 
927
- if not replaced:
928
- red_text = ""
929
- for paragraph in cell.paragraphs:
930
- for run in paragraph.runs:
931
- if is_red(run):
932
- red_text += run.text
933
 
934
- if "signature" in red_text.lower():
935
- cell_replacements = replace_red_text_in_cell(cell, "[Signature]")
 
 
 
 
 
 
 
 
 
 
 
 
 
936
  replacements_made += cell_replacements
937
- elif "date" in red_text.lower():
938
- cell_replacements = replace_red_text_in_cell(cell, "[Date]")
 
 
 
 
 
 
 
 
 
 
 
 
939
  replacements_made += cell_replacements
 
 
 
 
 
 
 
 
 
 
 
 
 
 
940
 
941
  # if any replacements made here, mark processed
942
  if replacements_made > 0:
 
878
  def handle_operator_declaration_fix(table, flat_json):
879
  """Wrapper for small declaration tables. Delegate to canonical fix first.
880
  If canonical did not change anything, fall back to the small-table auditor handling.
881
+ Safeguards: do not replace with date-like values; prefer person/role candidates.
882
  """
883
  replacements_made = 0
884
 
 
887
  print(f" ⏭️ Skipping - Operator Declaration table already processed")
888
  return 0
889
 
890
+ # only intended for small tables; if large, skip
891
  if len(table.rows) > 4:
892
  return 0
893
 
 
898
  # canonical handled it and set the processed flag
899
  return replacements_made
900
 
901
+ # --- Helper validators (local, minimal, safe) ---
902
+ def is_date_like(s: str) -> bool:
903
+ if not s:
904
+ return False
905
+ s = s.strip()
906
+ # common tokens that indicate a date string
907
+ month_names = r"(jan|feb|mar|apr|may|jun|jul|aug|sep|sept|oct|nov|dec|january|february|march|april|may|june|july|august|september|october|november|december)"
908
+ # patterns: "2nd November 2023", "02/11/2023", "2023-11-02", "November 2023", "Date"
909
+ if re.search(r"\bDate\b", s, re.IGNORECASE):
910
+ return True
911
+ if re.search(r"\b\d{1,2}(?:st|nd|rd|th)?\b\s+" + month_names, s, re.IGNORECASE):
912
+ return True
913
+ if re.search(month_names + r".*\b\d{4}\b", s, re.IGNORECASE):
914
+ return True
915
+ if re.search(r"\b\d{1,2}[\/\.\-]\d{1,2}[\/\.\-]\d{2,4}\b", s):
916
+ return True
917
+ if re.search(r"\b\d{4}[\/\.\-]\d{1,2}[\/\.\-]\d{1,2}\b", s):
918
+ return True
919
+ # single 4-digit year alone
920
+ if re.fullmatch(r"\d{4}", s):
921
+ return True
922
+ return False
923
+
924
+ def looks_like_person_name(s: str) -> bool:
925
+ if not s:
926
+ return False
927
+ low = s.lower().strip()
928
+ # reject org/company terms
929
+ bad_terms = ["pty ltd", "p/l", "plc", "company", "farming", "farm", "trust", "ltd"]
930
+ if any(bt in low for bt in bad_terms):
931
+ return False
932
+ # minimal length check and presence of alphabetic characters
933
+ if len(low) < 3:
934
+ return False
935
+ return bool(re.search(r"[a-zA-Z]", low))
936
+
937
+ def looks_like_position(s: str) -> bool:
938
+ if not s:
939
+ return False
940
+ low = s.lower()
941
+ roles = ["manager", "auditor", "owner", "director", "supervisor", "coordinator", "driver", "operator", "representative", "chief"]
942
+ return any(r in low for r in roles)
943
+
944
  # fallback: original small-table behaviour (auditor declaration etc.)
 
945
  print(f" 🎯 Processing other declaration table (fallback small-table behavior)")
946
 
947
  for row_idx, row in enumerate(table.rows):
948
  for cell_idx, cell in enumerate(row.cells):
949
+ if not has_red_text(cell):
950
+ # do not overwrite non-red content in fallback
951
+ continue
 
 
 
 
952
 
953
+ # Try auditor-specific fields first
954
+ declaration_fields = [
955
+ "NHVAS Approved Auditor Declaration.Print Name",
956
+ "Auditor name",
957
+ "Signature",
958
+ "Date"
959
+ ]
960
+
961
+ replaced_this_cell = False
962
+ for field in declaration_fields:
963
+ field_value = find_matching_json_value(field, flat_json)
964
+ if field_value is None:
965
+ continue
966
 
967
+ replacement_text = get_value_as_string(field_value, field).strip()
968
+ if not replacement_text:
969
+ continue
 
 
 
970
 
971
+ # SAFEGUARD: do not replace with date-like text for name/position cells
972
+ if is_date_like(replacement_text):
973
+ # allow genuinely date-targeted cells (if red text explicitly contains 'date')
974
+ # but skip using a date string to fill 'name' or 'position' slots
975
+ # check the red text in the cell to see if it expects a date
976
+ red_text = "".join(run.text for p in cell.paragraphs for run in p.runs if is_red(run)).strip()
977
+ if "date" not in red_text.lower():
978
+ print(f" ⚠️ Skipping date-like replacement for field '{field}' -> '{replacement_text[:30]}...'")
979
+ continue
980
+
981
+ # Further safeguard: if replacement looks like a person or role, only then write into name/position cells
982
+ if (looks_like_person_name(replacement_text) or looks_like_position(replacement_text) or "signature" in field.lower() or "date" in field.lower()):
983
+ # Replace only red runs (safe)
984
+ cell_replacements = replace_red_text_in_cell(cell, replacement_text)
985
+ if cell_replacements > 0:
986
  replacements_made += cell_replacements
987
+ replaced_this_cell = True
988
+ print(f" ✅ Fixed declaration field: {field} -> '{replacement_text}'")
989
+ break
990
+ else:
991
+ # Not a person or role-looking text — skip to avoid clobbering name/position with unrelated content
992
+ print(f" ⚠️ Replacement for field '{field}' does not look like name/role, skipping: '{replacement_text[:30]}...'")
993
+ continue
994
+
995
+ # If not replaced by the declared fields, try to infer from the cell's red text (date/signature fallback)
996
+ if not replaced_this_cell:
997
+ red_text = "".join(run.text for p in cell.paragraphs for run in p.runs if is_red(run)).strip().lower()
998
+ if "signature" in red_text:
999
+ cell_replacements = replace_red_text_in_cell(cell, "[Signature]")
1000
+ if cell_replacements > 0:
1001
  replacements_made += cell_replacements
1002
+ print(f" ✅ Inserted placeholder [Signature]")
1003
+ elif "date" in red_text:
1004
+ # Try to find a date value in JSON for an explicit date slot else skip
1005
+ date_value = find_matching_json_value("Date", flat_json) or find_matching_json_value("Date of Audit", flat_json) or find_matching_json_value("Audit was conducted on", flat_json)
1006
+ if date_value is not None:
1007
+ date_text = get_value_as_string(date_value)
1008
+ if not is_date_like(date_text):
1009
+ # defensive: if the date value is not date-like, skip
1010
+ print(f" ⚠️ Found date-value but not date-like, skipping: '{date_text}'")
1011
+ else:
1012
+ cell_replacements = replace_red_text_in_cell(cell, date_text)
1013
+ if cell_replacements > 0:
1014
+ replacements_made += cell_replacements
1015
+ print(f" ✅ Inserted date value: '{date_text}'")
1016
 
1017
  # if any replacements made here, mark processed
1018
  if replacements_made > 0: