LiamKhoaLe commited on
Commit
53f192f
·
1 Parent(s): fb18cd9

Upd code rendering from pdf.py

Browse files
Files changed (1) hide show
  1. utils/service/pdf.py +30 -26
utils/service/pdf.py CHANGED
@@ -454,52 +454,56 @@ def _apply_syntax_highlight(escaped_code: str, language: str) -> str:
454
  Apply lightweight syntax highlighting on XML-escaped code text.
455
  Works with escaped entities (< > &), so regexes should not rely on raw quotes.
456
  """
457
- # Generic number highlighting
458
- out = re.sub(r"\b(\d+\.?\d*)\b", r"<font color='#d19a66'>\1</font>", escaped_code)
459
-
 
 
 
 
 
460
  lang = (language or 'text').lower()
461
 
462
  if lang in ('python', 'py'):
 
 
463
  keywords = (
464
  'def|class|if|else|elif|for|while|try|except|finally|import|from|as|with|return|yield|lambda|and|or|not|in|is|True|False|None|pass|break|continue|raise|assert'
465
  )
466
- out = re.sub(rf"\b({keywords})\b", r"<font color='#c678dd'><b>\1</b></font>", out)
467
- # Comments starting with # to end of line
468
- out = re.sub(r"(#[^\n]*)", r"<font color='#5c6370'>\1</font>", out)
469
 
470
  elif lang in ('javascript', 'js', 'typescript', 'ts'):
 
 
471
  keywords = (
472
  'function|var|let|const|if|else|for|while|do|switch|case|break|continue|return|try|catch|finally|throw|new|this|typeof|instanceof|true|false|null|undefined|async|await'
473
  )
474
- out = re.sub(rf"\b({keywords})\b", r"<font color='#c678dd'><b>\1</b></font>", out)
475
- # Line comments
476
- out = re.sub(r"(//[^\n]*)", r"<font color='#5c6370'>\1</font>", out)
477
- # Block comments (escaped form still contains /* */)
478
- out = re.sub(r"/\*[\s\S]*?\*/", lambda m: f"<font color='#5c6370'>{m.group(0)}</font>", out)
479
-
480
- elif lang in ('json'):
481
- # true|false|null
482
- out = re.sub(r"\b(true|false|null)\b", r"<font color='#56b6c2'><b>\1</b></font>", out)
483
- # Keys "key": stay as &quot;key&quot; after escaping; highlight inside quotes followed by :
484
- out = re.sub(r"(&quot;[^&]*?&quot;)(\s*:)", r"<font color='#61afef'>\1</font>\2", out)
485
 
486
  elif lang in ('bash', 'sh', 'shell'):
487
- out = re.sub(r"(^|\n)(\s*)([a-zA-Z_][a-zA-Z0-9_-]*)", r"\1\2<font color='#c678dd'><b>\3</b></font>", out)
488
- out = re.sub(r"(#[^\n]*)", r"<font color='#5c6370'>\1</font>", out)
489
 
490
  elif lang in ('yaml', 'yml'):
491
- out = re.sub(r"(^|\n)(\s*)([^:\n]+)(:)", r"\1\2<font color='#61afef'>\3</font>\4", out)
492
- out = re.sub(r"\b(true|false|yes|no|on|off)\b", r"<font color='#56b6c2'><b>\1</b></font>", out, flags=re.IGNORECASE)
493
 
494
- elif lang in ('sql'):
495
  keywords = (
496
  'SELECT|FROM|WHERE|INSERT|UPDATE|DELETE|CREATE|DROP|ALTER|TABLE|INDEX|VIEW|DATABASE|SCHEMA|JOIN|LEFT|RIGHT|INNER|OUTER|ON|GROUP|BY|ORDER|HAVING|UNION|DISTINCT|COUNT|SUM|AVG|MAX|MIN|AND|OR|NOT|IN|BETWEEN|LIKE|IS|NULL|ASC|DESC|LIMIT|OFFSET'
497
  )
498
- out = re.sub(rf"\b({keywords})\b", r"<font color='#c678dd'><b>\1</b></font>", out, flags=re.IGNORECASE)
 
 
 
 
499
 
500
- # Strings: handle common forms using escaped quotes &quot; and &#x27;
501
- out = re.sub(r"(&quot;.*?&quot;)", r"<font color='#98c379'>\1</font>", out)
502
- out = re.sub(r"(&#x27;.*?&#x27;)", r"<font color='#98c379'>\1</font>", out)
503
 
504
  return out
505
 
 
454
  Apply lightweight syntax highlighting on XML-escaped code text.
455
  Works with escaped entities (&lt; &gt; &amp;), so regexes should not rely on raw quotes.
456
  """
457
+ def sub_outside_tags(pattern, repl, text, flags=0):
458
+ parts = re.split(r'(</?[^>]+>)', text)
459
+ for idx in range(0, len(parts)):
460
+ if idx % 2 == 0: # outside tags
461
+ parts[idx] = re.sub(pattern, repl, parts[idx], flags=flags)
462
+ return ''.join(parts)
463
+
464
+ out = escaped_code
465
  lang = (language or 'text').lower()
466
 
467
  if lang in ('python', 'py'):
468
+ # Comments first
469
+ out = sub_outside_tags(r"(#[^\n]*)", r"<font color='#5c6370'>\1</font>", out)
470
  keywords = (
471
  'def|class|if|else|elif|for|while|try|except|finally|import|from|as|with|return|yield|lambda|and|or|not|in|is|True|False|None|pass|break|continue|raise|assert'
472
  )
473
+ out = sub_outside_tags(rf"\b({keywords})\b", r"<font color='#c678dd'><b>\1</b></font>", out)
 
 
474
 
475
  elif lang in ('javascript', 'js', 'typescript', 'ts'):
476
+ out = sub_outside_tags(r"(//[^\n]*)", r"<font color='#5c6370'>\1</font>", out)
477
+ out = sub_outside_tags(r"/\*[\s\S]*?\*/", lambda m: f"<font color='#5c6370'>{m.group(0)}</font>", out)
478
  keywords = (
479
  'function|var|let|const|if|else|for|while|do|switch|case|break|continue|return|try|catch|finally|throw|new|this|typeof|instanceof|true|false|null|undefined|async|await'
480
  )
481
+ out = sub_outside_tags(rf"\b({keywords})\b", r"<font color='#c678dd'><b>\1</b></font>", out)
482
+
483
+ elif lang in ('json',):
484
+ out = sub_outside_tags(r"\b(true|false|null)\b", r"<font color='#56b6c2'><b>\1</b></font>", out)
485
+ out = sub_outside_tags(r"(&quot;[^&]*?&quot;)(\s*:)", r"<font color='#61afef'>\1</font>\2", out)
 
 
 
 
 
 
486
 
487
  elif lang in ('bash', 'sh', 'shell'):
488
+ out = sub_outside_tags(r"(#[^\n]*)", r"<font color='#5c6370'>\1</font>", out)
489
+ out = sub_outside_tags(r"(^|\n)(\s*)([a-zA-Z_][a-zA-Z0-9_-]*)", r"\1\2<font color='#c678dd'><b>\3</b></font>", out)
490
 
491
  elif lang in ('yaml', 'yml'):
492
+ out = sub_outside_tags(r"(^|\n)(\s*)([^:\n]+)(:)", r"\1\2<font color='#61afef'>\3</font>\4", out)
493
+ out = sub_outside_tags(r"\b(true|false|yes|no|on|off)\b", r"<font color='#56b6c2'><b>\1</b></font>", out, flags=re.IGNORECASE)
494
 
495
+ elif lang in ('sql',):
496
  keywords = (
497
  'SELECT|FROM|WHERE|INSERT|UPDATE|DELETE|CREATE|DROP|ALTER|TABLE|INDEX|VIEW|DATABASE|SCHEMA|JOIN|LEFT|RIGHT|INNER|OUTER|ON|GROUP|BY|ORDER|HAVING|UNION|DISTINCT|COUNT|SUM|AVG|MAX|MIN|AND|OR|NOT|IN|BETWEEN|LIKE|IS|NULL|ASC|DESC|LIMIT|OFFSET'
498
  )
499
+ out = sub_outside_tags(rf"\b({keywords})\b", r"<font color='#c678dd'><b>\1</b></font>", out, flags=re.IGNORECASE)
500
+
501
+ # Strings
502
+ out = sub_outside_tags(r"(&quot;.*?&quot;)", r"<font color='#98c379'>\1</font>", out)
503
+ out = sub_outside_tags(r"(&#x27;.*?&#x27;)", r"<font color='#98c379'>\1</font>", out)
504
 
505
+ # Numbers last
506
+ out = sub_outside_tags(r"\b(\d+\.?\d*)\b", r"<font color='#d19a66'>\1</font>", out)
 
507
 
508
  return out
509