Spaces:
Running
Running
Update updated_word.py
Browse files- updated_word.py +58 -39
updated_word.py
CHANGED
|
@@ -1434,60 +1434,80 @@ def process_headings(document, flat_json):
|
|
| 1434 |
|
| 1435 |
return replacements_made
|
| 1436 |
|
| 1437 |
-
|
| 1438 |
-
|
| 1439 |
replacements_made = 0
|
| 1440 |
red_text_segments = []
|
|
|
|
| 1441 |
for run in paragraph.runs:
|
| 1442 |
if is_red(run) and run.text.strip():
|
| 1443 |
red_text_segments.append(run.text.strip())
|
|
|
|
| 1444 |
if not red_text_segments:
|
| 1445 |
return 0
|
|
|
|
| 1446 |
combined_red_text = " ".join(red_text_segments).strip()
|
| 1447 |
-
print(f" π Red text found: '{combined_red_text}'")
|
| 1448 |
-
|
| 1449 |
-
|
| 1450 |
-
|
| 1451 |
-
|
| 1452 |
-
|
| 1453 |
-
|
| 1454 |
-
|
| 1455 |
-
|
| 1456 |
-
|
| 1457 |
-
|
| 1458 |
-
|
| 1459 |
-
|
| 1460 |
-
|
| 1461 |
-
|
| 1462 |
-
|
| 1463 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1464 |
if kv:
|
| 1465 |
-
|
| 1466 |
-
|
| 1467 |
break
|
| 1468 |
-
|
| 1469 |
-
if
|
| 1470 |
-
|
| 1471 |
-
for query in context_queries:
|
| 1472 |
-
kv = find_matching_json_key_and_value(query, flat_json)
|
| 1473 |
-
if kv:
|
| 1474 |
-
print(f" β
Found match with combined query -> {kv[0]}")
|
| 1475 |
-
json_value = kv[1]
|
| 1476 |
-
break
|
| 1477 |
-
|
| 1478 |
-
if json_value is not None:
|
| 1479 |
-
replacement_text = get_value_as_string(json_value, combined_red_text)
|
| 1480 |
red_runs = [run for run in paragraph.runs if is_red(run) and run.text.strip()]
|
| 1481 |
if red_runs:
|
| 1482 |
-
red_runs[0].text =
|
| 1483 |
red_runs[0].font.color.rgb = RGBColor(0, 0, 0)
|
| 1484 |
for run in red_runs[1:]:
|
| 1485 |
run.text = ''
|
| 1486 |
replacements_made = 1
|
| 1487 |
-
print(f" β
Replaced with: '{
|
| 1488 |
else:
|
| 1489 |
-
print(f" β No
|
| 1490 |
-
|
| 1491 |
return replacements_made
|
| 1492 |
|
| 1493 |
def process_red_text_in_context_paragraph(paragraph, heading_text, flat_json, operator_name):
|
|
@@ -1593,8 +1613,7 @@ def process_hf(json_file, docx_file, output_file):
|
|
| 1593 |
table_replacements = process_tables(doc, flat_json)
|
| 1594 |
paragraph_replacements = process_paragraphs(doc, flat_json)
|
| 1595 |
heading_replacements = process_headings(doc, flat_json)
|
| 1596 |
-
|
| 1597 |
-
total_replacements = table_replacements + paragraph_replacements + heading_replacements + red_text_para
|
| 1598 |
|
| 1599 |
# Save unmatched headers for iterative improvement
|
| 1600 |
if _unmatched_headers:
|
|
|
|
| 1434 |
|
| 1435 |
return replacements_made
|
| 1436 |
|
| 1437 |
+
def process_red_text_in_heading_paragraph(paragraph, paragraph_text, flat_json, operator_name):
|
| 1438 |
+
"""Process red text found in heading paragraphs"""
|
| 1439 |
replacements_made = 0
|
| 1440 |
red_text_segments = []
|
| 1441 |
+
|
| 1442 |
for run in paragraph.runs:
|
| 1443 |
if is_red(run) and run.text.strip():
|
| 1444 |
red_text_segments.append(run.text.strip())
|
| 1445 |
+
|
| 1446 |
if not red_text_segments:
|
| 1447 |
return 0
|
| 1448 |
+
|
| 1449 |
combined_red_text = " ".join(red_text_segments).strip()
|
| 1450 |
+
print(f" π Red text found in heading: '{combined_red_text}'")
|
| 1451 |
+
|
| 1452 |
+
replacement_value = None
|
| 1453 |
+
|
| 1454 |
+
# Determine what to replace based on heading context
|
| 1455 |
+
if any(mgmt_type in paragraph_text.upper() for mgmt_type in ["MAINTENANCE MANAGEMENT", "MASS MANAGEMENT", "FATIGUE MANAGEMENT"]):
|
| 1456 |
+
# For management section headings, replace with operator name
|
| 1457 |
+
if operator_name:
|
| 1458 |
+
replacement_value = operator_name
|
| 1459 |
+
print(f" β
Using operator name for management section: '{operator_name}'")
|
| 1460 |
+
|
| 1461 |
+
elif "NHVAS APPROVED AUDITOR DECLARATION" in paragraph_text.upper():
|
| 1462 |
+
# For auditor declarations, look for auditor name
|
| 1463 |
+
auditor_name = None
|
| 1464 |
+
for key, value in flat_json.items():
|
| 1465 |
+
if "auditor" in key.lower() and "name" in key.lower():
|
| 1466 |
+
if isinstance(value, list) and value:
|
| 1467 |
+
auditor_name = str(value[0]).strip()
|
| 1468 |
+
elif value:
|
| 1469 |
+
auditor_name = str(value).strip()
|
| 1470 |
+
break
|
| 1471 |
+
|
| 1472 |
+
if auditor_name:
|
| 1473 |
+
replacement_value = auditor_name
|
| 1474 |
+
print(f" β
Using auditor name: '{auditor_name}'")
|
| 1475 |
+
|
| 1476 |
+
elif "OPERATOR DECLARATION" in paragraph_text.upper():
|
| 1477 |
+
# For operator declarations, use operator name
|
| 1478 |
+
if operator_name:
|
| 1479 |
+
replacement_value = operator_name
|
| 1480 |
+
print(f" β
Using operator name for operator declaration: '{operator_name}'")
|
| 1481 |
+
|
| 1482 |
+
else:
|
| 1483 |
+
# For other headings, try to find a relevant match
|
| 1484 |
+
# First try direct match
|
| 1485 |
+
kv = find_matching_json_key_and_value(combined_red_text, flat_json)
|
| 1486 |
+
if kv:
|
| 1487 |
+
replacement_value = get_value_as_string(kv[1], combined_red_text)
|
| 1488 |
+
else:
|
| 1489 |
+
# Try contextual search with heading
|
| 1490 |
+
context_queries = [f"{paragraph_text} {combined_red_text}", combined_red_text, paragraph_text]
|
| 1491 |
+
for query in context_queries:
|
| 1492 |
+
kv = find_matching_json_key_and_value(query, flat_json)
|
| 1493 |
if kv:
|
| 1494 |
+
replacement_value = get_value_as_string(kv[1], combined_red_text)
|
| 1495 |
+
print(f" β
Found match with combined query: {kv[0]}")
|
| 1496 |
break
|
| 1497 |
+
|
| 1498 |
+
# Apply the replacement if we found a suitable value
|
| 1499 |
+
if replacement_value:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1500 |
red_runs = [run for run in paragraph.runs if is_red(run) and run.text.strip()]
|
| 1501 |
if red_runs:
|
| 1502 |
+
red_runs[0].text = replacement_value
|
| 1503 |
red_runs[0].font.color.rgb = RGBColor(0, 0, 0)
|
| 1504 |
for run in red_runs[1:]:
|
| 1505 |
run.text = ''
|
| 1506 |
replacements_made = 1
|
| 1507 |
+
print(f" β
Replaced with: '{replacement_value}'")
|
| 1508 |
else:
|
| 1509 |
+
print(f" β No suitable replacement found for: '{combined_red_text}'")
|
| 1510 |
+
|
| 1511 |
return replacements_made
|
| 1512 |
|
| 1513 |
def process_red_text_in_context_paragraph(paragraph, heading_text, flat_json, operator_name):
|
|
|
|
| 1613 |
table_replacements = process_tables(doc, flat_json)
|
| 1614 |
paragraph_replacements = process_paragraphs(doc, flat_json)
|
| 1615 |
heading_replacements = process_headings(doc, flat_json)
|
| 1616 |
+
total_replacements = table_replacements + paragraph_replacements + heading_replacements
|
|
|
|
| 1617 |
|
| 1618 |
# Save unmatched headers for iterative improvement
|
| 1619 |
if _unmatched_headers:
|