File size: 9,839 Bytes
e903a32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
#!/usr/bin/env node

import { Client } from '@notionhq/client';

/**
 * Notion Metadata Extractor
 * Extracts document metadata from Notion pages for frontmatter generation
 */

/**
 * Extract metadata from Notion page
 * @param {string} pageId - Notion page ID
 * @param {string} notionToken - Notion API token
 * @returns {object} - Extracted metadata object
 */
export async function extractNotionMetadata(pageId, notionToken) {
    const notion = new Client({
        auth: notionToken,
    });

    const metadata = {};

    try {
        // Get page information
        const page = await notion.pages.retrieve({ page_id: pageId });

        // Extract title from page properties
        if (page.properties.title && page.properties.title.title && page.properties.title.title.length > 0) {
            metadata.title = page.properties.title.title[0].plain_text;
        }

        // Extract creation date
        if (page.created_time) {
            metadata.published = new Date(page.created_time).toLocaleDateString('en-US', {
                year: 'numeric',
                month: 'short',
                day: '2-digit'
            });
            metadata.created_time = page.created_time;
        }

        // Extract last edited date
        if (page.last_edited_time) {
            metadata.last_edited_time = page.last_edited_time;
        }

        // Extract created by
        if (page.created_by && page.created_by.id) {
            metadata.created_by = page.created_by.id;
        }

        // Extract last edited by
        if (page.last_edited_by && page.last_edited_by.id) {
            metadata.last_edited_by = page.last_edited_by.id;
        }

        // Extract page URL
        metadata.notion_url = page.url;

        // Extract page ID
        metadata.notion_id = page.id;

        // Extract parent information
        if (page.parent) {
            metadata.parent = {
                type: page.parent.type,
                id: page.parent[page.parent.type]?.id || page.parent[page.parent.type]
            };
        }

        // Extract cover image if available
        if (page.cover) {
            metadata.cover = {
                type: page.cover.type,
                url: page.cover[page.cover.type]?.url || page.cover[page.cover.type]
            };
        }

        // Extract icon if available
        if (page.icon) {
            metadata.icon = {
                type: page.icon.type,
                emoji: page.icon.emoji,
                url: page.icon.external?.url || page.icon.file?.url
            };
        }

        // Extract authors and custom properties
        const customProperties = {};
        for (const [key, value] of Object.entries(page.properties)) {
            if (key !== 'title') { // Skip title as it's handled separately
                const extractedValue = extractPropertyValue(value);

                // Check for author-related properties
                if (key.toLowerCase().includes('author') ||
                    key.toLowerCase().includes('writer') ||
                    key.toLowerCase().includes('creator') ||
                    value.type === 'people') {
                    metadata.authors = extractedValue;
                } else {
                    customProperties[key] = extractedValue;
                }
            }
        }

        // If no authors found in properties, try to get from created_by
        if (!metadata.authors && page.created_by) {
            try {
                const user = await notion.users.retrieve({ user_id: page.created_by.id });
                metadata.authors = [{
                    name: user.name || user.id,
                    id: user.id
                }];
            } catch (error) {
                console.log('    ⚠️  Could not fetch author from created_by:', error.message);
                // Fallback to basic info
                metadata.authors = [{
                    name: page.created_by.name || page.created_by.id,
                    id: page.created_by.id
                }];
            }
        }

        if (Object.keys(customProperties).length > 0) {
            metadata.properties = customProperties;
        }

        // Try to extract description from page content (first paragraph)
        try {
            const blocks = await notion.blocks.children.list({ block_id: pageId });
            const firstParagraph = blocks.results.find(block =>
                block.type === 'paragraph' &&
                block.paragraph.rich_text &&
                block.paragraph.rich_text.length > 0
            );

            if (firstParagraph) {
                const description = firstParagraph.paragraph.rich_text
                    .map(text => text.plain_text)
                    .join('')
                    .trim();

                if (description && description.length > 0) {
                    metadata.description = description.substring(0, 200) + (description.length > 200 ? '...' : '');
                }
            }
        } catch (error) {
            console.log('  ⚠️  Could not extract description from page content');
        }

        // Generate tags from page properties
        const tags = [];
        for (const [key, value] of Object.entries(page.properties)) {
            if (value.type === 'multi_select' && value.multi_select) {
                value.multi_select.forEach(option => {
                    tags.push(option.name);
                });
            } else if (value.type === 'select' && value.select) {
                tags.push(value.select.name);
            }
        }

        if (tags.length > 0) {
            metadata.tags = tags;
        }

    } catch (error) {
        console.error('Error extracting Notion metadata:', error.message);
        // Return basic metadata if extraction fails
        metadata.title = "Notion Article";
        metadata.published = new Date().toLocaleDateString('en-US', {
            year: 'numeric',
            month: 'short',
            day: '2-digit'
        });
    }

    return metadata;
}

/**
 * Extract value from Notion property
 * @param {object} property - Notion property object
 * @returns {any} - Extracted value
 */
function extractPropertyValue(property) {
    switch (property.type) {
        case 'rich_text':
            return property.rich_text.map(text => text.plain_text).join('');
        case 'title':
            return property.title.map(text => text.plain_text).join('');
        case 'number':
            return property.number;
        case 'select':
            return property.select?.name || null;
        case 'multi_select':
            return property.multi_select.map(option => option.name);
        case 'date':
            return property.date?.start || null;
        case 'checkbox':
            return property.checkbox;
        case 'url':
            return property.url;
        case 'email':
            return property.email;
        case 'phone_number':
            return property.phone_number;
        case 'created_time':
            return property.created_time;
        case 'created_by':
            return property.created_by?.id || null;
        case 'last_edited_time':
            return property.last_edited_time;
        case 'last_edited_by':
            return property.last_edited_by?.id || null;
        case 'people':
            return property.people.map(person => ({
                name: person.name || person.id,
                id: person.id
            }));
        default:
            return null;
    }
}

/**
 * Generate YAML frontmatter from metadata object
 * @param {object} metadata - Metadata object
 * @returns {string} - YAML frontmatter string
 */
export function generateNotionFrontmatter(metadata) {
    let frontmatter = '---\n';

    // Title
    if (metadata.title) {
        frontmatter += `title: "${metadata.title}"\n`;
    }

    // Description
    if (metadata.description) {
        frontmatter += `description: "${metadata.description}"\n`;
    }

    // Publication date
    if (metadata.published) {
        frontmatter += `published: "${metadata.published}"\n`;
    }

    // Authors
    if (metadata.authors && metadata.authors.length > 0) {
        frontmatter += 'authors:\n';
        metadata.authors.forEach(author => {
            if (typeof author === 'string') {
                frontmatter += `  - name: "${author}"\n`;
            } else if (author.name) {
                frontmatter += `  - name: "${author.name}"\n`;
            }
        });
    }

    // Tags
    if (metadata.tags && metadata.tags.length > 0) {
        frontmatter += 'tags:\n';
        metadata.tags.forEach(tag => {
            frontmatter += `  - "${tag}"\n`;
        });
    }

    // Notion metadata removed - keeping only standard frontmatter fields

    // Cover image
    if (metadata.cover && metadata.cover.url) {
        frontmatter += `cover: "${metadata.cover.url}"\n`;
    }

    // Icon
    if (metadata.icon) {
        if (metadata.icon.emoji) {
            frontmatter += `icon: "${metadata.icon.emoji}"\n`;
        } else if (metadata.icon.url) {
            frontmatter += `icon: "${metadata.icon.url}"\n`;
        }
    }

    // Custom properties removed - keeping frontmatter clean and standard

    // Default Astro configuration
    frontmatter += 'tableOfContentsAutoCollapse: true\n';
    frontmatter += '---\n\n';

    return frontmatter;
}

/**
 * Extract and generate frontmatter from Notion page
 * @param {string} pageId - Notion page ID
 * @param {string} notionToken - Notion API token
 * @returns {string} - Complete YAML frontmatter
 */
export async function extractAndGenerateNotionFrontmatter(pageId, notionToken) {
    const metadata = await extractNotionMetadata(pageId, notionToken);
    return generateNotionFrontmatter(metadata);
}