Shami96 commited on
Commit
6f54dab
·
verified ·
1 Parent(s): 364a368

Update extract_red_text.py

Browse files
Files changed (1) hide show
  1. extract_red_text.py +0 -90
extract_red_text.py CHANGED
@@ -279,96 +279,6 @@ def extract_red_text(input_doc):
279
  out["paragraphs"] = paras
280
  return out
281
 
282
- def handle_management_summary_table(table, flat_json):
283
- """Enhanced function to handle Management Summary tables specifically"""
284
- replacements_made = 0
285
-
286
- # Check if this is a Management Summary table
287
- table_text = ""
288
- for row in table.rows[:3]:
289
- for cell in row.cells:
290
- table_text += get_clean_text(cell).lower() + " "
291
-
292
- # Detect which type of management summary
293
- management_type = None
294
- if "mass management" in table_text and "details" in table_text:
295
- management_type = "Mass Management"
296
- elif "maintenance management" in table_text and "details" in table_text:
297
- management_type = "Maintenance Management"
298
- elif "fatigue management" in table_text and "details" in table_text:
299
- management_type = "Fatigue Management"
300
-
301
- if not management_type:
302
- return 0
303
-
304
- print(f" 📋 Detected {management_type} Summary table with DETAILS column")
305
-
306
- # Process each row to find standards and update DETAILS column
307
- for row_idx, row in enumerate(table.rows):
308
- if len(row.cells) < 2:
309
- continue
310
-
311
- # Skip header row
312
- if row_idx == 0:
313
- continue
314
-
315
- standard_cell = row.cells[0]
316
- details_cell = row.cells[1]
317
-
318
- standard_text = get_clean_text(standard_cell).strip()
319
-
320
- # Check if this row contains a standard (Std 1., Std 2., etc.)
321
- if not re.match(r'Std \d+\.', standard_text):
322
- continue
323
-
324
- print(f" 📌 Processing {standard_text}")
325
-
326
- # Only process if DETAILS cell has red text
327
- if not has_red_text(details_cell):
328
- continue
329
-
330
- # Try multiple approaches to find matching data
331
- json_value = None
332
-
333
- # Approach 1: Try direct standard match in the base management section
334
- base_management_data = flat_json.get(management_type, {})
335
- if isinstance(base_management_data, dict):
336
- for key, value in base_management_data.items():
337
- if standard_text in key and isinstance(value, list) and len(value) > 0:
338
- json_value = value
339
- print(f" ✅ Found match in {management_type}: '{key}'")
340
- break
341
-
342
- # Approach 2: Try the summary section
343
- if json_value is None:
344
- summary_section = flat_json.get(f"{management_type} Summary", {})
345
- if isinstance(summary_section, dict):
346
- for key, value in summary_section.items():
347
- if standard_text in key and isinstance(value, list) and len(value) > 0:
348
- json_value = value
349
- print(f" ✅ Found match in {management_type} Summary: '{key}'")
350
- break
351
-
352
- # Approach 3: Try fuzzy matching with all keys
353
- if json_value is None:
354
- json_value = find_matching_json_value(standard_text, flat_json)
355
-
356
- # Replace red text if we found data
357
- if json_value is not None:
358
- replacement_text = get_value_as_string(json_value, standard_text)
359
- if isinstance(json_value, list):
360
- replacement_text = "\n".join(str(item) for item in json_value if str(item).strip())
361
-
362
- cell_replacements = replace_red_text_in_cell(details_cell, replacement_text)
363
- replacements_made += cell_replacements
364
-
365
- if cell_replacements > 0:
366
- print(f" ✅ Updated DETAILS for {standard_text}")
367
- else:
368
- print(f" ❌ No data found for {standard_text}")
369
-
370
- return replacements_made
371
-
372
  def extract_red_text_filelike(input_file, output_file):
373
  """
374
  Accepts:
 
279
  out["paragraphs"] = paras
280
  return out
281
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
282
  def extract_red_text_filelike(input_file, output_file):
283
  """
284
  Accepts: