File size: 2,678 Bytes
fcaa164 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
import re
from collections import OrderedDict
from ProjectPageAgent.html_finder import HtmlFinder
import os
_LINK_CSS_RE = re.compile(
r'''(?isx)
<link[^>]*?
href\s*=\s*
(?:
"([^"]+?\.css(?:\?[^"]*)?)" |
'([^']+?\.css(?:\?[^']*)?)' |
([^\s"'=<>`]+?\.css(?:\?[^\s"'=<>`]*)?)
)
[^>]*?>
'''
)
_IMPORT_CSS_RE = re.compile(
r'''(?isx)
@import
\s+(?:url\()?
\s*
(?:
"([^"]+?\.css(?:\?[^"]*)?)" |
'([^']+?\.css(?:\?[^']*)?)' |
([^'")\s;]+?\.css(?:\?[^'")\s;]+)?)
)
\s*
\)?
'''
)
def _first_nonempty(groups_list):
out = []
for groups in groups_list:
for g in groups:
if g:
out.append(g)
break
return out
def extract_css_paths(html: str):
links = _first_nonempty(_LINK_CSS_RE.findall(html))
imports = _first_nonempty(_IMPORT_CSS_RE.findall(html))
seen = OrderedDict()
for u in links + imports:
u = u.strip()
if u and u not in seen:
seen[u] = True
return list(seen.keys())
def check_css(generated_html: str, template_html: str):
generated_css = extract_css_paths(generated_html)
template_css = extract_css_paths(template_html)
print(f'num of css in generated page: {len(generated_css)}')
print(f'num of css in template page: {len(template_css)}')
template_css_name = {css.strip().split('/')[-1]: css for css in template_css}
errors = {}
for css in generated_css:
if css.startswith('http'):
continue
if css not in template_css:
match = template_css_name.get(css.strip().split('/')[-1], None)
if match is not None:
errors[css] = match
else:
print(f"[⚠️ Warning] Missing CSS match for {css}")
new_html = generated_html
for css, new_css in errors.items():
if new_css:
new_html = new_html.replace(css, new_css)
return new_html
if __name__ == "__main__":
templates_root = '/home/jimu/Project_resources/project_page/page_assets/'
html_finder = HtmlFinder(specific_name='index.html')
count = 0
for page in os.listdir('generated_FastVGGT'):
print(page)
count += 1
with open(html_finder.find_html(os.path.join('generated_FastVGGT', page)), 'r') as f:
generated_html = f.read()
with open(html_finder.find_html(os.path.join(templates_root, page)), 'r') as f:
template_html = f.read()
_ = check_css(generated_html, template_html, page)
print(count)
|