File size: 5,338 Bytes
457b8fd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
import os
import re

def diagnose_epub_directory(directory="."):
    """Diagnose issues with EPUB output directory"""
    
    print(f"\n{'='*60}")
    print(f"EPUB Directory Diagnostic Tool")
    print(f"{'='*60}\n")
    
    # Get absolute path
    abs_path = os.path.abspath(directory)
    print(f"πŸ“ Checking directory: {abs_path}")
    
    # Check if directory exists
    if not os.path.exists(abs_path):
        print(f"❌ ERROR: Directory does not exist!")
        return
    
    if not os.path.isdir(abs_path):
        print(f"❌ ERROR: Path is not a directory!")
        return
    
    # List contents
    try:
        contents = os.listdir(abs_path)
        print(f"βœ… Directory is accessible")
        print(f"πŸ“Š Total items: {len(contents)}\n")
    except Exception as e:
        print(f"❌ ERROR: Cannot read directory: {e}")
        return
    
    # Categorize files
    html_files = []
    response_files = []
    css_files = []
    image_files = []
    directories = []
    other_files = []
    
    for item in contents:
        item_path = os.path.join(abs_path, item)
        
        if os.path.isdir(item_path):
            directories.append(item)
        elif item.endswith('.html'):
            html_files.append(item)
            if item.startswith('response_'):
                response_files.append(item)
        elif item.endswith('.css'):
            css_files.append(item)
        elif item.lower().endswith(('.jpg', '.jpeg', '.png', '.gif', '.svg')):
            image_files.append(item)
        else:
            other_files.append(item)
    
    # Report findings
    print("πŸ“‹ Directory Contents Summary:")
    print(f"   β€’ HTML files: {len(html_files)}")
    print(f"   β€’ Response files (translated chapters): {len(response_files)}")
    print(f"   β€’ CSS files: {len(css_files)}")
    print(f"   β€’ Image files: {len(image_files)}")
    print(f"   β€’ Subdirectories: {len(directories)}")
    print(f"   β€’ Other files: {len(other_files)}")
    
    # Check for required items
    print(f"\nπŸ“ Checking Required Items:")
    
    # Check for metadata.json
    if 'metadata.json' in contents:
        print("   βœ… metadata.json found")
    else:
        print("   ❌ metadata.json NOT FOUND")
    
    # Check for response files
    if response_files:
        print(f"   βœ… {len(response_files)} translated chapter files found")
        
        # Analyze chapter numbers
        chapter_nums = []
        for f in response_files:
            m = re.match(r'response_(\d+)_', f)
            if m:
                chapter_nums.append(int(m.group(1)))
        
        if chapter_nums:
            chapter_nums.sort()
            print(f"   πŸ“– Chapter range: {min(chapter_nums)} to {max(chapter_nums)}")
            
            # Check for missing chapters
            expected = set(range(min(chapter_nums), max(chapter_nums) + 1))
            actual = set(chapter_nums)
            missing = expected - actual
            if missing:
                print(f"   ⚠️ Missing chapters: {sorted(missing)}")
    else:
        print("   ❌ No response_*.html files found!")
        
        if html_files:
            print(f"\n   πŸ” Found {len(html_files)} HTML files with different names:")
            for i, f in enumerate(html_files[:5]):
                print(f"      {i+1}. {f}")
            if len(html_files) > 5:
                print(f"      ... and {len(html_files) - 5} more")
    
    # Check subdirectories
    if directories:
        print(f"\nπŸ“‚ Subdirectories found:")
        for d in directories:
            print(f"   β€’ {d}/")
            
            # Check contents of important subdirectories
            if d in ['css', 'images', 'fonts']:
                try:
                    sub_contents = os.listdir(os.path.join(abs_path, d))
                    print(f"     Contains {len(sub_contents)} items")
                except:
                    print(f"     Cannot read contents")
    
    # Sample file check
    if response_files:
        print(f"\nπŸ” Checking a sample chapter file...")
        sample_file = response_files[0]
        sample_path = os.path.join(abs_path, sample_file)
        
        try:
            with open(sample_path, 'r', encoding='utf-8') as f:
                content = f.read()
                print(f"   βœ… {sample_file} is readable")
                print(f"   πŸ“ File size: {len(content):,} characters")
                
                # Check for basic HTML structure
                if '<html' in content.lower():
                    print("   βœ… Contains HTML tag")
                if '<body' in content.lower():
                    print("   βœ… Contains BODY tag")
                if '<p>' in content or '<p ' in content:
                    print("   βœ… Contains paragraph tags")
                    
        except Exception as e:
            print(f"   ❌ Cannot read {sample_file}: {e}")
    
    print(f"\n{'='*60}")
    print("Diagnostic complete!")
    print(f"{'='*60}\n")

if __name__ == "__main__":
    import sys
    if len(sys.argv) > 1:
        diagnose_epub_directory(sys.argv[1])
    else:
        diagnose_epub_directory(".")