File size: 2,678 Bytes
fcaa164
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import re
from collections import OrderedDict
from ProjectPageAgent.html_finder import HtmlFinder
import os



_LINK_CSS_RE = re.compile(
    r'''(?isx)
    <link[^>]*?                 
    href\s*=\s*                 
    (?:
        "([^"]+?\.css(?:\?[^"]*)?)" |
        '([^']+?\.css(?:\?[^']*)?)' |
        ([^\s"'=<>`]+?\.css(?:\?[^\s"'=<>`]*)?)
    )
    [^>]*?>
    '''
)


_IMPORT_CSS_RE = re.compile(
    r'''(?isx)
    @import
    \s+(?:url\()?
    \s*
    (?:
        "([^"]+?\.css(?:\?[^"]*)?)" |
        '([^']+?\.css(?:\?[^']*)?)' |
        ([^'")\s;]+?\.css(?:\?[^'")\s;]+)?)
    )        
    \s*
    \)?
    '''
)


def _first_nonempty(groups_list):
    out = []
    for groups in groups_list:
        for g in groups:
            if g:
                out.append(g)
                break
    return out

def extract_css_paths(html: str):

    links = _first_nonempty(_LINK_CSS_RE.findall(html))
    imports = _first_nonempty(_IMPORT_CSS_RE.findall(html))
    seen = OrderedDict()
    for u in links + imports:
        u = u.strip()
        if u and u not in seen:
            seen[u] = True
    return list(seen.keys())

def check_css(generated_html: str, template_html: str):
    generated_css = extract_css_paths(generated_html)
    template_css = extract_css_paths(template_html)
    print(f'num of css in generated page: {len(generated_css)}')
    print(f'num of css in template page: {len(template_css)}')
    template_css_name = {css.strip().split('/')[-1]: css for css in template_css}
    
    errors = {}
    for css in generated_css:
        if css.startswith('http'):
            continue
        if css not in template_css:
            match = template_css_name.get(css.strip().split('/')[-1], None)
            if match is not None:
                errors[css] = match
            else:
                print(f"[⚠️ Warning] Missing CSS match for {css}")
    
    new_html = generated_html
    for css, new_css in errors.items():
        if new_css:
            new_html = new_html.replace(css, new_css)
    
    return new_html





if __name__ == "__main__":

    templates_root = '/home/jimu/Project_resources/project_page/page_assets/'
    html_finder = HtmlFinder(specific_name='index.html')

    count = 0
    for page in os.listdir('generated_FastVGGT'):
        print(page)
        count += 1
        with open(html_finder.find_html(os.path.join('generated_FastVGGT', page)), 'r') as f:
            generated_html = f.read()

        with open(html_finder.find_html(os.path.join(templates_root, page)), 'r') as f:
            template_html = f.read()


        _ = check_css(generated_html, template_html, page)
    print(count)