|
|
#!/usr/bin/env node |
|
|
|
|
|
import { readFileSync, writeFileSync, existsSync, mkdirSync, readdirSync, statSync } from 'fs'; |
|
|
import { join, dirname, basename, extname } from 'path'; |
|
|
import { fileURLToPath } from 'url'; |
|
|
import matter from 'gray-matter'; |
|
|
import { extractAndGenerateNotionFrontmatter } from './notion-metadata-extractor.mjs'; |
|
|
|
|
|
const __filename = fileURLToPath(import.meta.url); |
|
|
const __dirname = dirname(__filename); |
|
|
|
|
|
|
|
|
const DEFAULT_INPUT = join(__dirname, 'output'); |
|
|
const DEFAULT_OUTPUT = join(__dirname, 'output'); |
|
|
|
|
|
function parseArgs() { |
|
|
const args = process.argv.slice(2); |
|
|
const config = { |
|
|
input: DEFAULT_INPUT, |
|
|
output: DEFAULT_OUTPUT, |
|
|
}; |
|
|
|
|
|
for (const arg of args) { |
|
|
if (arg.startsWith('--input=')) { |
|
|
config.input = arg.substring('--input='.length); |
|
|
} else if (arg.startsWith('--output=')) { |
|
|
config.output = arg.substring('--output='.length); |
|
|
} else if (arg === '--help' || arg === '-h') { |
|
|
console.log(` |
|
|
π Notion Markdown to MDX Converter |
|
|
|
|
|
Usage: |
|
|
node mdx-converter.mjs [options] |
|
|
|
|
|
Options: |
|
|
--input=PATH Input directory or file (default: ${DEFAULT_INPUT}) |
|
|
--output=PATH Output directory (default: ${DEFAULT_OUTPUT}) |
|
|
--help, -h Show this help |
|
|
|
|
|
Examples: |
|
|
# Convert all markdown files in output directory |
|
|
node mdx-converter.mjs |
|
|
|
|
|
# Convert specific file |
|
|
node mdx-converter.mjs --input=article.md --output=converted/ |
|
|
|
|
|
# Convert directory |
|
|
node mdx-converter.mjs --input=markdown-files/ --output=mdx-files/ |
|
|
`); |
|
|
process.exit(0); |
|
|
} else if (!config.input) { |
|
|
config.input = arg; |
|
|
} else if (!config.output) { |
|
|
config.output = arg; |
|
|
} |
|
|
} |
|
|
return config; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const usedComponents = new Set(); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const imageImports = new Map(); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function generateImageVarName(src) { |
|
|
|
|
|
const filename = src.split('/').pop().replace(/\.[^.]+$/, ''); |
|
|
return filename.replace(/[^a-zA-Z0-9]/g, '_').replace(/^[0-9]/, 'img_$&'); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function addComponentImports(content) { |
|
|
console.log(' π¦ Adding component and image imports...'); |
|
|
|
|
|
let imports = []; |
|
|
|
|
|
|
|
|
if (usedComponents.size > 0) { |
|
|
const componentImports = Array.from(usedComponents) |
|
|
.map(component => `import ${component} from '../components/${component}.astro';`); |
|
|
imports.push(...componentImports); |
|
|
console.log(` β
Importing components: ${Array.from(usedComponents).join(', ')}`); |
|
|
} |
|
|
|
|
|
|
|
|
if (imageImports.size > 0) { |
|
|
const imageImportStatements = Array.from(imageImports.entries()) |
|
|
.map(([src, varName]) => `import ${varName} from '${src}';`); |
|
|
imports.push(...imageImportStatements); |
|
|
console.log(` β
Importing ${imageImports.size} image(s)`); |
|
|
} |
|
|
|
|
|
if (imports.length === 0) { |
|
|
console.log(' βΉοΈ No imports needed'); |
|
|
return content; |
|
|
} |
|
|
|
|
|
const importBlock = imports.join('\n'); |
|
|
|
|
|
|
|
|
const frontmatterEnd = content.indexOf('---', 3) + 3; |
|
|
if (frontmatterEnd > 2) { |
|
|
return content.slice(0, frontmatterEnd) + '\n\n' + importBlock + '\n' + content.slice(frontmatterEnd); |
|
|
} else { |
|
|
|
|
|
return importBlock + '\n\n' + content; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function transformImages(content) { |
|
|
console.log(' πΌοΈ Transforming images to Figure components...'); |
|
|
|
|
|
let hasImages = false; |
|
|
|
|
|
|
|
|
const cleanSrcPath = (src) => { |
|
|
|
|
|
return src.replace(/^\/media\//, './media/') |
|
|
.replace(/^\.\/media\//, './media/'); |
|
|
}; |
|
|
|
|
|
|
|
|
const cleanCaption = (caption) => { |
|
|
return caption |
|
|
.replace(/<[^>]*>/g, '') |
|
|
.replace(/\n/g, ' ') |
|
|
.replace(/\r/g, ' ') |
|
|
.replace(/\s+/g, ' ') |
|
|
.replace(/'/g, "\\'") |
|
|
.trim(); |
|
|
}; |
|
|
|
|
|
|
|
|
const cleanAltText = (alt, maxLength = 100) => { |
|
|
const cleaned = alt |
|
|
.replace(/<[^>]*>/g, '') |
|
|
.replace(/\n/g, ' ') |
|
|
.replace(/\r/g, ' ') |
|
|
.replace(/\s+/g, ' ') |
|
|
.trim(); |
|
|
|
|
|
return cleaned.length > maxLength |
|
|
? cleaned.substring(0, maxLength) + '...' |
|
|
: cleaned; |
|
|
}; |
|
|
|
|
|
|
|
|
const createFigureComponent = (src, alt = '', caption = '') => { |
|
|
const cleanSrc = cleanSrcPath(src); |
|
|
|
|
|
|
|
|
if (cleanSrc.includes('.pdf') || cleanSrc.includes('arxiv.org/pdf') || |
|
|
(cleanSrc.startsWith('http') && !cleanSrc.includes('/media/'))) { |
|
|
console.log(` β οΈ Skipping external/PDF URL: ${cleanSrc}`); |
|
|
|
|
|
return ``; |
|
|
} |
|
|
|
|
|
const varName = generateImageVarName(cleanSrc); |
|
|
imageImports.set(cleanSrc, varName); |
|
|
usedComponents.add('Figure'); |
|
|
|
|
|
const props = []; |
|
|
props.push(`src={${varName}}`); |
|
|
props.push('zoomable'); |
|
|
props.push('downloadable'); |
|
|
props.push('layout="fixed"'); |
|
|
if (alt) props.push(`alt="${alt}"`); |
|
|
if (caption) props.push(`caption={'${caption}'}`); |
|
|
|
|
|
return `<Figure\n ${props.join('\n ')}\n/>`; |
|
|
}; |
|
|
|
|
|
|
|
|
content = content.replace(/!\[([^\]]*)\]\(([^)]+)\)/g, (match, alt, src) => { |
|
|
const cleanSrc = cleanSrcPath(src); |
|
|
const cleanAlt = cleanAltText(alt || 'Figure'); |
|
|
hasImages = true; |
|
|
|
|
|
return createFigureComponent(cleanSrc, cleanAlt); |
|
|
}); |
|
|
|
|
|
|
|
|
content = content.replace(/!\[([^\]]*)\]\(([^)]+)\)\s*\n\s*([^\n]+)/g, (match, alt, src, caption) => { |
|
|
const cleanSrc = cleanSrcPath(src); |
|
|
const cleanAlt = cleanAltText(alt || 'Figure'); |
|
|
const cleanCap = cleanCaption(caption); |
|
|
hasImages = true; |
|
|
|
|
|
return createFigureComponent(cleanSrc, cleanAlt, cleanCap); |
|
|
}); |
|
|
|
|
|
if (hasImages) { |
|
|
console.log(' β
Figure components with imports will be created'); |
|
|
} |
|
|
|
|
|
return content; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function transformCallouts(content) { |
|
|
console.log(' π Transforming callouts to Note components...'); |
|
|
|
|
|
let transformedCount = 0; |
|
|
|
|
|
|
|
|
content = content.replace(/^> \*\*([^*]+)\*\*\s*\n> (.+?)(?=\n> \*\*|\n\n|\n$)/gms, (match, title, content) => { |
|
|
transformedCount++; |
|
|
usedComponents.add('Note'); |
|
|
|
|
|
const cleanContent = content |
|
|
.replace(/^> /gm, '') |
|
|
.replace(/\n+/g, '\n') |
|
|
.trim(); |
|
|
|
|
|
return `<Note type="${title.toLowerCase()}" title="${title}">\n${cleanContent}\n</Note>\n\n`; |
|
|
}); |
|
|
|
|
|
if (transformedCount > 0) { |
|
|
console.log(` β
Transformed ${transformedCount} callout(s) to Note components`); |
|
|
} |
|
|
|
|
|
return content; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function transformTables(content) { |
|
|
console.log(' π Enhancing tables...'); |
|
|
|
|
|
let enhancedCount = 0; |
|
|
|
|
|
|
|
|
content = content.replace(/^(\|[^|\n]+\|[\s\S]*?)(?=\n\n|\n$)/gm, (match) => { |
|
|
if (match.includes('|') && match.split('\n').length > 2) { |
|
|
enhancedCount++; |
|
|
return `<div class="table-container">\n\n${match}\n\n</div>`; |
|
|
} |
|
|
return match; |
|
|
}); |
|
|
|
|
|
if (enhancedCount > 0) { |
|
|
console.log(` β
Enhanced ${enhancedCount} table(s)`); |
|
|
} |
|
|
|
|
|
return content; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function transformCodeBlocks(content) { |
|
|
console.log(' π» Enhancing code blocks...'); |
|
|
|
|
|
let enhancedCount = 0; |
|
|
|
|
|
|
|
|
content = content.replace(/^```(\w+)\n([\s\S]*?)\n```$/gm, (match, lang, code) => { |
|
|
enhancedCount++; |
|
|
return `\`\`\`${lang} copy\n${code}\n\`\`\``; |
|
|
}); |
|
|
|
|
|
if (enhancedCount > 0) { |
|
|
console.log(` β
Enhanced ${enhancedCount} code block(s)`); |
|
|
} |
|
|
|
|
|
return content; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function fixNotionFormatting(content) { |
|
|
console.log(' π§ Fixing Notion formatting issues...'); |
|
|
|
|
|
let fixedCount = 0; |
|
|
|
|
|
|
|
|
content = content.replace(/^(\s*)β’\s*(.+)$/gm, (match, indent, text) => { |
|
|
fixedCount++; |
|
|
return `${indent}- ${text}`; |
|
|
}); |
|
|
|
|
|
|
|
|
content = content.replace(/^(\s*)\d+\.\s*(.+)$/gm, (match, indent, text) => { |
|
|
|
|
|
if (!text.includes('\n') || text.split('\n').length === 1) { |
|
|
return match; |
|
|
} |
|
|
fixedCount++; |
|
|
return `${indent}1. ${text}`; |
|
|
}); |
|
|
|
|
|
|
|
|
content = content.replace(/\*\*([^*]+)\*\*([^*]+)\*\*([^*]+)\*\*/g, (match, part1, part2, part3) => { |
|
|
fixedCount++; |
|
|
return `**${part1}${part2}${part3}**`; |
|
|
}); |
|
|
|
|
|
if (fixedCount > 0) { |
|
|
console.log(` β
Fixed ${fixedCount} formatting issue(s)`); |
|
|
} |
|
|
|
|
|
return content; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async function ensureFrontmatter(content, pageId = null, notionToken = null) { |
|
|
console.log(' π Ensuring proper frontmatter...'); |
|
|
|
|
|
if (!content.startsWith('---')) { |
|
|
let frontmatter; |
|
|
|
|
|
if (pageId && notionToken) { |
|
|
try { |
|
|
console.log(' π Extracting Notion metadata...'); |
|
|
frontmatter = await extractAndGenerateNotionFrontmatter(pageId, notionToken); |
|
|
console.log(' β
Generated rich frontmatter from Notion'); |
|
|
} catch (error) { |
|
|
console.log(' β οΈ Failed to extract Notion metadata, using basic frontmatter'); |
|
|
frontmatter = generateBasicFrontmatter(); |
|
|
} |
|
|
} else { |
|
|
frontmatter = generateBasicFrontmatter(); |
|
|
console.log(' β
Generated basic frontmatter'); |
|
|
} |
|
|
|
|
|
return frontmatter + content; |
|
|
} |
|
|
|
|
|
|
|
|
try { |
|
|
const { data, content: body } = matter(content); |
|
|
|
|
|
|
|
|
if (pageId && notionToken && (!data.notion_id || data.notion_id !== pageId)) { |
|
|
try { |
|
|
console.log(' π Enhancing frontmatter with Notion metadata...'); |
|
|
const notionFrontmatter = await extractAndGenerateNotionFrontmatter(pageId, notionToken); |
|
|
const { data: notionData } = matter(notionFrontmatter); |
|
|
|
|
|
|
|
|
const enhancedData = { ...data, ...notionData }; |
|
|
const enhancedContent = matter.stringify(body, enhancedData); |
|
|
console.log(' β
Enhanced frontmatter with Notion metadata'); |
|
|
return enhancedContent; |
|
|
} catch (error) { |
|
|
console.log(' β οΈ Could not enhance with Notion metadata, keeping existing'); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
if (!data.title) data.title = 'Notion Article'; |
|
|
if (!data.published) data.published = new Date().toISOString().split('T')[0]; |
|
|
if (!data.tableOfContentsAutoCollapse) data.tableOfContentsAutoCollapse = true; |
|
|
|
|
|
const enhancedContent = matter.stringify(body, data); |
|
|
console.log(' β
Enhanced existing frontmatter'); |
|
|
return enhancedContent; |
|
|
} catch (error) { |
|
|
console.log(' β οΈ Could not parse frontmatter, keeping as is'); |
|
|
return content; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function generateBasicFrontmatter() { |
|
|
const currentDate = new Date().toLocaleDateString('en-US', { |
|
|
year: 'numeric', |
|
|
month: 'short', |
|
|
day: '2-digit' |
|
|
}); |
|
|
return `--- |
|
|
title: "Notion Article" |
|
|
published: "${currentDate}" |
|
|
tableOfContentsAutoCollapse: true |
|
|
--- |
|
|
|
|
|
`; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async function processMdxContent(content, pageId = null, notionToken = null) { |
|
|
console.log('π§ Processing for Astro MDX compatibility...'); |
|
|
|
|
|
|
|
|
usedComponents.clear(); |
|
|
imageImports.clear(); |
|
|
|
|
|
let processedContent = content; |
|
|
|
|
|
|
|
|
processedContent = await ensureFrontmatter(processedContent, pageId, notionToken); |
|
|
processedContent = fixNotionFormatting(processedContent); |
|
|
processedContent = transformCallouts(processedContent); |
|
|
processedContent = transformImages(processedContent); |
|
|
processedContent = transformTables(processedContent); |
|
|
processedContent = transformCodeBlocks(processedContent); |
|
|
|
|
|
|
|
|
processedContent = addComponentImports(processedContent); |
|
|
|
|
|
return processedContent; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async function convertFileToMdx(inputFile, outputDir, pageId = null, notionToken = null) { |
|
|
const filename = basename(inputFile, '.md'); |
|
|
const outputFile = join(outputDir, `${filename}.mdx`); |
|
|
|
|
|
console.log(`π Converting: ${basename(inputFile)} β ${basename(outputFile)}`); |
|
|
|
|
|
try { |
|
|
const markdownContent = readFileSync(inputFile, 'utf8'); |
|
|
const mdxContent = await processMdxContent(markdownContent, pageId, notionToken); |
|
|
writeFileSync(outputFile, mdxContent); |
|
|
|
|
|
console.log(` β
Converted: ${outputFile}`); |
|
|
|
|
|
|
|
|
const inputSize = Math.round(markdownContent.length / 1024); |
|
|
const outputSize = Math.round(mdxContent.length / 1024); |
|
|
console.log(` π Input: ${inputSize}KB β Output: ${outputSize}KB`); |
|
|
|
|
|
} catch (error) { |
|
|
console.error(` β Failed to convert ${inputFile}: ${error.message}`); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async function convertToMdx(inputPath, outputDir, pageId = null, notionToken = null) { |
|
|
console.log('π Notion Markdown to Astro MDX Converter'); |
|
|
console.log(`π Input: ${inputPath}`); |
|
|
console.log(`π Output: ${outputDir}`); |
|
|
|
|
|
|
|
|
if (!existsSync(inputPath)) { |
|
|
console.error(`β Input not found: ${inputPath}`); |
|
|
process.exit(1); |
|
|
} |
|
|
|
|
|
try { |
|
|
|
|
|
if (!existsSync(outputDir)) { |
|
|
mkdirSync(outputDir, { recursive: true }); |
|
|
} |
|
|
|
|
|
let filesToConvert = []; |
|
|
|
|
|
if (statSync(inputPath).isDirectory()) { |
|
|
|
|
|
const files = readdirSync(inputPath); |
|
|
filesToConvert = files |
|
|
.filter(file => file.endsWith('.md')) |
|
|
.map(file => join(inputPath, file)); |
|
|
} else if (inputPath.endsWith('.md')) { |
|
|
|
|
|
filesToConvert = [inputPath]; |
|
|
} else { |
|
|
console.error('β Input must be a .md file or directory containing .md files'); |
|
|
process.exit(1); |
|
|
} |
|
|
|
|
|
if (filesToConvert.length === 0) { |
|
|
console.log('βΉοΈ No .md files found to convert'); |
|
|
return; |
|
|
} |
|
|
|
|
|
console.log(`π Found ${filesToConvert.length} file(s) to convert`); |
|
|
|
|
|
|
|
|
for (const file of filesToConvert) { |
|
|
await convertFileToMdx(file, outputDir, pageId, notionToken); |
|
|
} |
|
|
|
|
|
console.log(`β
Conversion completed! ${filesToConvert.length} file(s) processed`); |
|
|
|
|
|
} catch (error) { |
|
|
console.error('β Conversion failed:', error.message); |
|
|
process.exit(1); |
|
|
} |
|
|
} |
|
|
|
|
|
export { convertToMdx }; |
|
|
|
|
|
function main() { |
|
|
const config = parseArgs(); |
|
|
convertToMdx(config.input, config.output); |
|
|
console.log('π MDX conversion completed!'); |
|
|
} |
|
|
|
|
|
if (import.meta.url === `file://${process.argv[1]}`) { |
|
|
main(); |
|
|
} |
|
|
|