Shami96 commited on
Commit
d9eda51
Β·
verified Β·
1 Parent(s): 560885b

Update updated_word.py

Browse files
Files changed (1) hide show
  1. updated_word.py +58 -39
updated_word.py CHANGED
@@ -1434,60 +1434,80 @@ def process_headings(document, flat_json):
1434
 
1435
  return replacements_made
1436
 
1437
-
1438
- def process_red_text_in_paragraph(paragraph, context_text, flat_json):
1439
  replacements_made = 0
1440
  red_text_segments = []
 
1441
  for run in paragraph.runs:
1442
  if is_red(run) and run.text.strip():
1443
  red_text_segments.append(run.text.strip())
 
1444
  if not red_text_segments:
1445
  return 0
 
1446
  combined_red_text = " ".join(red_text_segments).strip()
1447
- print(f" πŸ” Red text found: '{combined_red_text}'")
1448
- kv = find_matching_json_key_and_value(combined_red_text, flat_json)
1449
- json_value = kv[1] if kv else None
1450
-
1451
- if json_value is None:
1452
- if "NHVAS APPROVED AUDITOR" in context_text.upper():
1453
- auditor_fields = ["auditor name", "auditor", "nhvas auditor", "approved auditor", "print name"]
1454
- for field in auditor_fields:
1455
- kv = find_matching_json_key_and_value(field, flat_json)
1456
- if kv:
1457
- print(f" βœ… Found auditor match with field: '{kv[0]}'")
1458
- json_value = kv[1]
1459
- break
1460
- elif "OPERATOR DECLARATION" in context_text.upper():
1461
- operator_fields = ["operator name", "operator", "company name", "organisation name", "print name"]
1462
- for field in operator_fields:
1463
- kv = find_matching_json_key_and_value(field, flat_json)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1464
  if kv:
1465
- print(f" βœ… Found operator match with field: '{kv[0]}'")
1466
- json_value = kv[1]
1467
  break
1468
-
1469
- if json_value is None:
1470
- context_queries = [f"{context_text} {combined_red_text}", combined_red_text, context_text]
1471
- for query in context_queries:
1472
- kv = find_matching_json_key_and_value(query, flat_json)
1473
- if kv:
1474
- print(f" βœ… Found match with combined query -> {kv[0]}")
1475
- json_value = kv[1]
1476
- break
1477
-
1478
- if json_value is not None:
1479
- replacement_text = get_value_as_string(json_value, combined_red_text)
1480
  red_runs = [run for run in paragraph.runs if is_red(run) and run.text.strip()]
1481
  if red_runs:
1482
- red_runs[0].text = replacement_text
1483
  red_runs[0].font.color.rgb = RGBColor(0, 0, 0)
1484
  for run in red_runs[1:]:
1485
  run.text = ''
1486
  replacements_made = 1
1487
- print(f" βœ… Replaced with: '{replacement_text}'")
1488
  else:
1489
- print(f" ❌ No match found for red text: '{combined_red_text}'")
1490
-
1491
  return replacements_made
1492
 
1493
  def process_red_text_in_context_paragraph(paragraph, heading_text, flat_json, operator_name):
@@ -1593,8 +1613,7 @@ def process_hf(json_file, docx_file, output_file):
1593
  table_replacements = process_tables(doc, flat_json)
1594
  paragraph_replacements = process_paragraphs(doc, flat_json)
1595
  heading_replacements = process_headings(doc, flat_json)
1596
- red_text_para = process_red_text_in_paragraph(paragraph, context_text, flat_json)
1597
- total_replacements = table_replacements + paragraph_replacements + heading_replacements + red_text_para
1598
 
1599
  # Save unmatched headers for iterative improvement
1600
  if _unmatched_headers:
 
1434
 
1435
  return replacements_made
1436
 
1437
+ def process_red_text_in_heading_paragraph(paragraph, paragraph_text, flat_json, operator_name):
1438
+ """Process red text found in heading paragraphs"""
1439
  replacements_made = 0
1440
  red_text_segments = []
1441
+
1442
  for run in paragraph.runs:
1443
  if is_red(run) and run.text.strip():
1444
  red_text_segments.append(run.text.strip())
1445
+
1446
  if not red_text_segments:
1447
  return 0
1448
+
1449
  combined_red_text = " ".join(red_text_segments).strip()
1450
+ print(f" πŸ” Red text found in heading: '{combined_red_text}'")
1451
+
1452
+ replacement_value = None
1453
+
1454
+ # Determine what to replace based on heading context
1455
+ if any(mgmt_type in paragraph_text.upper() for mgmt_type in ["MAINTENANCE MANAGEMENT", "MASS MANAGEMENT", "FATIGUE MANAGEMENT"]):
1456
+ # For management section headings, replace with operator name
1457
+ if operator_name:
1458
+ replacement_value = operator_name
1459
+ print(f" βœ… Using operator name for management section: '{operator_name}'")
1460
+
1461
+ elif "NHVAS APPROVED AUDITOR DECLARATION" in paragraph_text.upper():
1462
+ # For auditor declarations, look for auditor name
1463
+ auditor_name = None
1464
+ for key, value in flat_json.items():
1465
+ if "auditor" in key.lower() and "name" in key.lower():
1466
+ if isinstance(value, list) and value:
1467
+ auditor_name = str(value[0]).strip()
1468
+ elif value:
1469
+ auditor_name = str(value).strip()
1470
+ break
1471
+
1472
+ if auditor_name:
1473
+ replacement_value = auditor_name
1474
+ print(f" βœ… Using auditor name: '{auditor_name}'")
1475
+
1476
+ elif "OPERATOR DECLARATION" in paragraph_text.upper():
1477
+ # For operator declarations, use operator name
1478
+ if operator_name:
1479
+ replacement_value = operator_name
1480
+ print(f" βœ… Using operator name for operator declaration: '{operator_name}'")
1481
+
1482
+ else:
1483
+ # For other headings, try to find a relevant match
1484
+ # First try direct match
1485
+ kv = find_matching_json_key_and_value(combined_red_text, flat_json)
1486
+ if kv:
1487
+ replacement_value = get_value_as_string(kv[1], combined_red_text)
1488
+ else:
1489
+ # Try contextual search with heading
1490
+ context_queries = [f"{paragraph_text} {combined_red_text}", combined_red_text, paragraph_text]
1491
+ for query in context_queries:
1492
+ kv = find_matching_json_key_and_value(query, flat_json)
1493
  if kv:
1494
+ replacement_value = get_value_as_string(kv[1], combined_red_text)
1495
+ print(f" βœ… Found match with combined query: {kv[0]}")
1496
  break
1497
+
1498
+ # Apply the replacement if we found a suitable value
1499
+ if replacement_value:
 
 
 
 
 
 
 
 
 
1500
  red_runs = [run for run in paragraph.runs if is_red(run) and run.text.strip()]
1501
  if red_runs:
1502
+ red_runs[0].text = replacement_value
1503
  red_runs[0].font.color.rgb = RGBColor(0, 0, 0)
1504
  for run in red_runs[1:]:
1505
  run.text = ''
1506
  replacements_made = 1
1507
+ print(f" βœ… Replaced with: '{replacement_value}'")
1508
  else:
1509
+ print(f" ❌ No suitable replacement found for: '{combined_red_text}'")
1510
+
1511
  return replacements_made
1512
 
1513
  def process_red_text_in_context_paragraph(paragraph, heading_text, flat_json, operator_name):
 
1613
  table_replacements = process_tables(doc, flat_json)
1614
  paragraph_replacements = process_paragraphs(doc, flat_json)
1615
  heading_replacements = process_headings(doc, flat_json)
1616
+ total_replacements = table_replacements + paragraph_replacements + heading_replacements
 
1617
 
1618
  # Save unmatched headers for iterative improvement
1619
  if _unmatched_headers: