|
|
#!/usr/bin/env node |
|
|
|
|
|
import { readFileSync, writeFileSync, existsSync } from 'fs'; |
|
|
import { join, dirname, basename } from 'path'; |
|
|
import { fileURLToPath } from 'url'; |
|
|
|
|
|
const __filename = fileURLToPath(import.meta.url); |
|
|
const __dirname = dirname(__filename); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
export function postProcessMarkdown(content) { |
|
|
console.log('π§ Post-processing Notion Markdown for MDX compatibility...'); |
|
|
|
|
|
let processedContent = content; |
|
|
|
|
|
|
|
|
processedContent = cleanNotionArtifacts(processedContent); |
|
|
processedContent = fixNotionLinks(processedContent); |
|
|
processedContent = optimizeImages(processedContent); |
|
|
processedContent = shiftHeadingLevels(processedContent); |
|
|
processedContent = cleanEmptyLines(processedContent); |
|
|
processedContent = fixCodeBlocks(processedContent); |
|
|
processedContent = fixCodeBlockEndings(processedContent); |
|
|
processedContent = optimizeTables(processedContent); |
|
|
|
|
|
return processedContent; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function cleanNotionArtifacts(content) { |
|
|
console.log(' π§Ή Cleaning Notion artifacts...'); |
|
|
|
|
|
let cleanedCount = 0; |
|
|
|
|
|
|
|
|
content = content.replace(/\[([^\]]+)\]\(https:\/\/www\.notion\.so\/[^)]+\)/g, (match, text) => { |
|
|
cleanedCount++; |
|
|
return text; |
|
|
}); |
|
|
|
|
|
|
|
|
content = content.replace(/^> \*\*([^*]+)\*\*\s*\n/gm, '> **$1**\n\n'); |
|
|
|
|
|
|
|
|
content = content.replace(/^---+\s*$/gm, ''); |
|
|
|
|
|
|
|
|
content = content.replace(/^>\s*$/gm, ''); |
|
|
|
|
|
if (cleanedCount > 0) { |
|
|
console.log(` β
Cleaned ${cleanedCount} Notion artifact(s)`); |
|
|
} |
|
|
|
|
|
return content; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function fixNotionLinks(content) { |
|
|
console.log(' π Fixing Notion internal links...'); |
|
|
|
|
|
let fixedCount = 0; |
|
|
|
|
|
|
|
|
content = content.replace(/\[([^\]]+)\]\(https:\/\/www\.notion\.so\/[^/]+\/([^?#)]+)\)/g, (match, text, pageId) => { |
|
|
fixedCount++; |
|
|
|
|
|
return `[${text}](#${pageId})`; |
|
|
}); |
|
|
|
|
|
|
|
|
content = content.replace(/\[([^\]]+)\]\(https:\/\/www\.notion\.so\/[^)]*\)/g, (match, text) => { |
|
|
fixedCount++; |
|
|
return text; |
|
|
}); |
|
|
|
|
|
if (fixedCount > 0) { |
|
|
console.log(` β
Fixed ${fixedCount} Notion link(s)`); |
|
|
} |
|
|
|
|
|
return content; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function optimizeImages(content) { |
|
|
console.log(' πΌοΈ Optimizing images...'); |
|
|
|
|
|
let optimizedCount = 0; |
|
|
|
|
|
|
|
|
content = content.replace(/!\[\]\(([^)]+)\)/g, (match, src) => { |
|
|
optimizedCount++; |
|
|
const filename = basename(src); |
|
|
return ``; |
|
|
}); |
|
|
|
|
|
|
|
|
content = content.replace(/!\[([^\]]*)\]\(([^)]+)\?[^)]*\)/g, (match, alt, src) => { |
|
|
optimizedCount++; |
|
|
return ``; |
|
|
}); |
|
|
|
|
|
if (optimizedCount > 0) { |
|
|
console.log(` β
Optimized ${optimizedCount} image(s)`); |
|
|
} |
|
|
|
|
|
return content; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function shiftHeadingLevels(content) { |
|
|
console.log(' π Shifting heading levels down by one...'); |
|
|
|
|
|
let shiftedCount = 0; |
|
|
|
|
|
|
|
|
|
|
|
content = content.replace(/^##### (.*$)/gim, '###### $1'); |
|
|
content = content.replace(/^#### (.*$)/gim, '##### $1'); |
|
|
content = content.replace(/^### (.*$)/gim, '#### $1'); |
|
|
content = content.replace(/^## (.*$)/gim, '### $1'); |
|
|
content = content.replace(/^# (.*$)/gim, '## $1'); |
|
|
|
|
|
|
|
|
const headingMatches = content.match(/^#{1,6} /gm); |
|
|
if (headingMatches) { |
|
|
shiftedCount = headingMatches.length; |
|
|
} |
|
|
|
|
|
console.log(` β
Shifted ${shiftedCount} heading level(s)`); |
|
|
return content; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function fixCodeBlockEndings(content) { |
|
|
console.log(' π» Fixing code block endings...'); |
|
|
|
|
|
let fixedCount = 0; |
|
|
|
|
|
|
|
|
content = content.replace(/```text\n/g, '```\n'); |
|
|
|
|
|
|
|
|
const textEndingMatches = content.match(/```text\n/g); |
|
|
if (textEndingMatches) { |
|
|
fixedCount = textEndingMatches.length; |
|
|
} |
|
|
|
|
|
if (fixedCount > 0) { |
|
|
console.log(` β
Fixed ${fixedCount} code block ending(s)`); |
|
|
} |
|
|
|
|
|
return content; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function cleanEmptyLines(content) { |
|
|
console.log(' π Cleaning excessive empty lines...'); |
|
|
|
|
|
|
|
|
const cleanedContent = content.replace(/\n{3,}/g, '\n\n'); |
|
|
|
|
|
const originalLines = content.split('\n').length; |
|
|
const cleanedLines = cleanedContent.split('\n').length; |
|
|
const removedLines = originalLines - cleanedLines; |
|
|
|
|
|
if (removedLines > 0) { |
|
|
console.log(` β
Removed ${removedLines} excessive empty line(s)`); |
|
|
} |
|
|
|
|
|
return cleanedContent; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function fixCodeBlocks(content) { |
|
|
console.log(' π» Fixing code blocks...'); |
|
|
|
|
|
let fixedCount = 0; |
|
|
|
|
|
|
|
|
content = content.replace(/^```\s*$/gm, '```text'); |
|
|
|
|
|
|
|
|
content = content.replace(/^```(\w+)\s*\n([\s\S]*?)\n```$/gm, (match, lang, code) => { |
|
|
|
|
|
const cleanCode = code.replace(/\u00A0/g, ' '); |
|
|
return `\`\`\`${lang}\n${cleanCode}\n\`\`\``; |
|
|
}); |
|
|
|
|
|
if (fixedCount > 0) { |
|
|
console.log(` β
Fixed ${fixedCount} code block(s)`); |
|
|
} |
|
|
|
|
|
return content; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function optimizeTables(content) { |
|
|
console.log(' π Optimizing tables...'); |
|
|
|
|
|
let optimizedCount = 0; |
|
|
|
|
|
|
|
|
content = content.replace(/^\|(.+)\|\s*$/gm, (match, row) => { |
|
|
const cells = row.split('|').map(cell => cell.trim()); |
|
|
const cleanCells = cells.filter(cell => cell.length > 0); |
|
|
|
|
|
if (cleanCells.length > 0) { |
|
|
optimizedCount++; |
|
|
return `| ${cleanCells.join(' | ')} |`; |
|
|
} |
|
|
return match; |
|
|
}); |
|
|
|
|
|
|
|
|
content = content.replace(/^\|(.+)\|\s*\n\|([-:\s|]+)\|\s*$/gm, (match, header, separator) => { |
|
|
const headerCells = header.split('|').map(cell => cell.trim()).filter(cell => cell.length > 0); |
|
|
const separatorCells = separator.split('|').map(cell => cell.trim()).filter(cell => cell.length > 0); |
|
|
|
|
|
if (headerCells.length !== separatorCells.length) { |
|
|
optimizedCount++; |
|
|
const newSeparator = headerCells.map(() => '---').join(' | '); |
|
|
return `| ${headerCells.join(' | ')} |\n| ${newSeparator} |`; |
|
|
} |
|
|
return match; |
|
|
}); |
|
|
|
|
|
if (optimizedCount > 0) { |
|
|
console.log(` β
Optimized ${optimizedCount} table(s)`); |
|
|
} |
|
|
|
|
|
return content; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
export function generateFrontmatter(pageProperties) { |
|
|
console.log(' π Generating frontmatter from Notion properties...'); |
|
|
|
|
|
const frontmatter = { |
|
|
title: pageProperties.title || 'Untitled', |
|
|
published: new Date().toISOString().split('T')[0], |
|
|
tableOfContentsAutoCollapse: true |
|
|
}; |
|
|
|
|
|
|
|
|
if (pageProperties.description) { |
|
|
frontmatter.description = pageProperties.description; |
|
|
} |
|
|
if (pageProperties.tags) { |
|
|
frontmatter.tags = pageProperties.tags; |
|
|
} |
|
|
if (pageProperties.author) { |
|
|
frontmatter.author = pageProperties.author; |
|
|
} |
|
|
|
|
|
|
|
|
const yamlLines = Object.entries(frontmatter) |
|
|
.map(([key, value]) => { |
|
|
if (Array.isArray(value)) { |
|
|
return `${key}:\n${value.map(v => ` - ${v}`).join('\n')}`; |
|
|
} |
|
|
return `${key}: "${value}"`; |
|
|
}); |
|
|
|
|
|
return `---\n${yamlLines.join('\n')}\n---\n\n`; |
|
|
} |
|
|
|
|
|
function main() { |
|
|
const args = process.argv.slice(2); |
|
|
|
|
|
if (args.includes('--help') || args.includes('-h')) { |
|
|
console.log(` |
|
|
π§ Notion Markdown Post-Processor |
|
|
|
|
|
Usage: |
|
|
node post-processor.mjs [options] [input-file] [output-file] |
|
|
|
|
|
Options: |
|
|
--verbose Show detailed processing information |
|
|
--help, -h Show this help |
|
|
|
|
|
Examples: |
|
|
# Process a single file |
|
|
node post-processor.mjs input.md output.md |
|
|
|
|
|
# Process with verbose output |
|
|
node post-processor.mjs --verbose input.md output.md |
|
|
`); |
|
|
process.exit(0); |
|
|
} |
|
|
|
|
|
const verbose = args.includes('--verbose'); |
|
|
const inputFile = args.find(arg => !arg.startsWith('--') && arg.endsWith('.md')); |
|
|
const outputFile = args.find(arg => !arg.startsWith('--') && arg !== inputFile && arg.endsWith('.md')); |
|
|
|
|
|
if (!inputFile) { |
|
|
console.error('β Please provide an input markdown file'); |
|
|
process.exit(1); |
|
|
} |
|
|
|
|
|
if (!existsSync(inputFile)) { |
|
|
console.error(`β Input file not found: ${inputFile}`); |
|
|
process.exit(1); |
|
|
} |
|
|
|
|
|
try { |
|
|
console.log(`π Reading: ${inputFile}`); |
|
|
const content = readFileSync(inputFile, 'utf8'); |
|
|
|
|
|
const processedContent = postProcessMarkdown(content); |
|
|
|
|
|
const finalOutputFile = outputFile || inputFile.replace('.md', '.processed.md'); |
|
|
writeFileSync(finalOutputFile, processedContent); |
|
|
|
|
|
console.log(`β
Processed: ${finalOutputFile}`); |
|
|
|
|
|
if (verbose) { |
|
|
console.log(`π Input: ${content.length} chars β Output: ${processedContent.length} chars`); |
|
|
} |
|
|
|
|
|
} catch (error) { |
|
|
console.error('β Processing failed:', error.message); |
|
|
process.exit(1); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
if (import.meta.url === `file://${process.argv[1]}`) { |
|
|
main(); |
|
|
} |
|
|
|