|
|
#!/usr/bin/env node |
|
|
|
|
|
import { readFileSync, writeFileSync, existsSync } from 'fs'; |
|
|
import { join, dirname, basename, extname } from 'path'; |
|
|
import { fileURLToPath } from 'url'; |
|
|
import { extractAndGenerateFrontmatter } from './metadata-extractor.mjs'; |
|
|
|
|
|
const __filename = fileURLToPath(import.meta.url); |
|
|
const __dirname = dirname(__filename); |
|
|
|
|
|
|
|
|
const DEFAULT_INPUT = join(__dirname, 'output', 'main.md'); |
|
|
const DEFAULT_OUTPUT = join(__dirname, 'output', 'main.mdx'); |
|
|
|
|
|
function parseArgs() { |
|
|
const args = process.argv.slice(2); |
|
|
const config = { |
|
|
input: DEFAULT_INPUT, |
|
|
output: DEFAULT_OUTPUT, |
|
|
}; |
|
|
|
|
|
for (const arg of args) { |
|
|
if (arg.startsWith('--input=')) { |
|
|
config.input = arg.substring('--input='.length); |
|
|
} else if (arg.startsWith('--output=')) { |
|
|
config.output = arg.substring('--output='.length); |
|
|
} else if (arg === '--help' || arg === '-h') { |
|
|
console.log(` |
|
|
π Markdown to MDX Converter |
|
|
|
|
|
Usage: |
|
|
node mdx-converter.mjs [options] |
|
|
|
|
|
Options: |
|
|
--input=PATH Input Markdown file (default: ${DEFAULT_INPUT}) |
|
|
--output=PATH Output MDX file (default: ${DEFAULT_OUTPUT}) |
|
|
--help, -h Show this help |
|
|
|
|
|
Examples: |
|
|
# Basic conversion |
|
|
node mdx-converter.mjs |
|
|
|
|
|
# Custom paths |
|
|
node mdx-converter.mjs --input=article.md --output=article.mdx |
|
|
`); |
|
|
process.exit(0); |
|
|
} else if (!config.input) { |
|
|
config.input = arg; |
|
|
} else if (!config.output) { |
|
|
config.output = arg; |
|
|
} |
|
|
} |
|
|
return config; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const usedComponents = new Set(); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const imageImports = new Map(); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function generateImageVarName(src) { |
|
|
|
|
|
const filename = src.split('/').pop().replace(/\.[^.]+$/, ''); |
|
|
return filename.replace(/[^a-zA-Z0-9]/g, '_').replace(/^[0-9]/, 'img_$&'); |
|
|
} |
|
|
|
|
|
function addComponentImports(content) { |
|
|
console.log(' π¦ Adding component and image imports...'); |
|
|
|
|
|
let imports = []; |
|
|
|
|
|
|
|
|
if (usedComponents.size > 0) { |
|
|
const componentImports = Array.from(usedComponents) |
|
|
.map(component => `import ${component} from '../components/${component}.astro';`); |
|
|
imports.push(...componentImports); |
|
|
console.log(` β
Importing components: ${Array.from(usedComponents).join(', ')}`); |
|
|
} |
|
|
|
|
|
|
|
|
if (imageImports.size > 0) { |
|
|
const imageImportStatements = Array.from(imageImports.entries()) |
|
|
.map(([src, varName]) => `import ${varName} from '${src}';`); |
|
|
imports.push(...imageImportStatements); |
|
|
console.log(` β
Importing ${imageImports.size} image(s)`); |
|
|
} |
|
|
|
|
|
if (imports.length === 0) { |
|
|
console.log(' βΉοΈ No imports needed'); |
|
|
return content; |
|
|
} |
|
|
|
|
|
const importBlock = imports.join('\n'); |
|
|
|
|
|
|
|
|
const frontmatterEnd = content.indexOf('---', 3) + 3; |
|
|
if (frontmatterEnd > 2) { |
|
|
return content.slice(0, frontmatterEnd) + '\n\n' + importBlock + '\n' + content.slice(frontmatterEnd); |
|
|
} else { |
|
|
|
|
|
return importBlock + '\n\n' + content; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function convertSubfiguresToMultiFigure(content) { |
|
|
console.log(' πΌοΈβ¨ Converting subfigures to MultiFigure components...'); |
|
|
|
|
|
let convertedCount = 0; |
|
|
|
|
|
|
|
|
|
|
|
const subfigureGroupPattern = /<figure>\s*((?:<figure>[\s\S]*?<\/figure>\s*){2,})<figcaption>([\s\S]*?)<\/figcaption>\s*<\/figure>/g; |
|
|
|
|
|
const convertedContent = content.replace(subfigureGroupPattern, (match, figuresMatch, globalCaption) => { |
|
|
convertedCount++; |
|
|
|
|
|
|
|
|
|
|
|
const individualFigurePattern = /<figure>\s*<img src="([^"]*)"[^>]*\/>\s*<p><span id="([^"]*)"[^&]*><\/span><\/p>\s*<figcaption>([\s\S]*?)<\/figcaption>\s*<\/figure>/g; |
|
|
|
|
|
const images = []; |
|
|
let figureMatch; |
|
|
|
|
|
while ((figureMatch = individualFigurePattern.exec(figuresMatch)) !== null) { |
|
|
const [, src, id, caption] = figureMatch; |
|
|
|
|
|
|
|
|
const cleanSrc = src.replace(/.*\/output\/assets\//, './assets/') |
|
|
.replace(/\/Users\/[^\/]+\/[^\/]+\/[^\/]+\/[^\/]+\/[^\/]+\/app\/scripts\/latex-to-markdown\/output\/assets\//, './assets/'); |
|
|
|
|
|
|
|
|
const cleanCaption = caption |
|
|
.replace(/<[^>]*>/g, '') |
|
|
.replace(/\n/g, ' ') |
|
|
.replace(/\s+/g, ' ') |
|
|
.replace(/'/g, "\\'") |
|
|
.trim(); |
|
|
|
|
|
|
|
|
const altText = cleanCaption.length > 100 |
|
|
? cleanCaption.substring(0, 100) + '...' |
|
|
: cleanCaption; |
|
|
|
|
|
|
|
|
const varName = generateImageVarName(cleanSrc); |
|
|
imageImports.set(cleanSrc, varName); |
|
|
|
|
|
images.push({ |
|
|
src: varName, |
|
|
alt: altText, |
|
|
caption: cleanCaption, |
|
|
id: id |
|
|
}); |
|
|
} |
|
|
|
|
|
|
|
|
const cleanGlobalCaption = globalCaption |
|
|
.replace(/<[^>]*>/g, '') |
|
|
.replace(/\n/g, ' ') |
|
|
.replace(/\s+/g, ' ') |
|
|
.replace(/'/g, "\\'") |
|
|
.trim(); |
|
|
|
|
|
|
|
|
usedComponents.add('MultiFigure'); |
|
|
|
|
|
|
|
|
let layout = 'auto'; |
|
|
if (images.length === 2) layout = '2-column'; |
|
|
else if (images.length === 3) layout = '3-column'; |
|
|
else if (images.length === 4) layout = '4-column'; |
|
|
|
|
|
|
|
|
const imagesJson = images.map(img => |
|
|
` {\n src: ${img.src},\n alt: "${img.alt}",\n caption: "${img.caption}",\n id: "${img.id}"\n }` |
|
|
).join(',\n'); |
|
|
|
|
|
return `<MultiFigure |
|
|
images={[ |
|
|
${imagesJson} |
|
|
]} |
|
|
layout="${layout}" |
|
|
zoomable |
|
|
downloadable |
|
|
caption="${cleanGlobalCaption}" |
|
|
/>`; |
|
|
}); |
|
|
|
|
|
if (convertedCount > 0) { |
|
|
console.log(` β
Converted ${convertedCount} subfigure group(s) to MultiFigure component(s)`); |
|
|
} else { |
|
|
console.log(' βΉοΈ No subfigure groups found'); |
|
|
} |
|
|
|
|
|
return convertedContent; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function createFigureComponent(src, alt = '', id = '', caption = '', width = '') { |
|
|
const varName = generateImageVarName(src); |
|
|
imageImports.set(src, varName); |
|
|
usedComponents.add('Figure'); |
|
|
|
|
|
const props = []; |
|
|
props.push(`src={${varName}}`); |
|
|
props.push('zoomable'); |
|
|
props.push('downloadable'); |
|
|
if (id) props.push(`id="${id}"`); |
|
|
props.push('layout="fixed"'); |
|
|
if (alt) props.push(`alt="${alt}"`); |
|
|
if (caption) props.push(`caption={'${caption}'}`); |
|
|
|
|
|
return `<Figure\n ${props.join('\n ')}\n/>`; |
|
|
} |
|
|
|
|
|
function transformImages(content) { |
|
|
console.log(' πΌοΈ Transforming images to Figure components with imports...'); |
|
|
|
|
|
let hasImages = false; |
|
|
|
|
|
|
|
|
const cleanSrcPath = (src) => { |
|
|
return src.replace(/.*\/output\/assets\//, './assets/') |
|
|
.replace(/\/Users\/[^\/]+\/[^\/]+\/[^\/]+\/[^\/]+\/[^\/]+\/app\/scripts\/latex-to-markdown\/output\/assets\//, './assets/'); |
|
|
}; |
|
|
|
|
|
|
|
|
const cleanCaption = (caption) => { |
|
|
return caption |
|
|
.replace(/<[^>]*>/g, '') |
|
|
.replace(/\n/g, ' ') |
|
|
.replace(/\r/g, ' ') |
|
|
.replace(/\s+/g, ' ') |
|
|
.replace(/'/g, "\\'") |
|
|
.trim(); |
|
|
}; |
|
|
|
|
|
|
|
|
const cleanAltText = (alt, maxLength = 100) => { |
|
|
const cleaned = alt |
|
|
.replace(/<[^>]*>/g, '') |
|
|
.replace(/\n/g, ' ') |
|
|
.replace(/\r/g, ' ') |
|
|
.replace(/\s+/g, ' ') |
|
|
.trim(); |
|
|
|
|
|
return cleaned.length > maxLength |
|
|
? cleaned.substring(0, maxLength) + '...' |
|
|
: cleaned; |
|
|
}; |
|
|
|
|
|
|
|
|
content = content.replace( |
|
|
/<figure id="([^"]*)">\s*<img src="([^"]*)"(?:\s+style="([^"]*)")?\s*\/>\s*<figcaption>\s*(.*?)\s*<\/figcaption>\s*<\/figure>/gs, |
|
|
(match, id, src, style, caption) => { |
|
|
const cleanSrc = cleanSrcPath(src); |
|
|
const cleanCap = cleanCaption(caption); |
|
|
const altText = cleanAltText(cleanCap); |
|
|
hasImages = true; |
|
|
|
|
|
return createFigureComponent(cleanSrc, altText, id, cleanCap); |
|
|
} |
|
|
); |
|
|
|
|
|
|
|
|
content = content.replace( |
|
|
/<img src="([^"]*)"(?:\s+style="([^"]*)")?\s*(?:alt="([^"]*)")?\s*\/>/g, |
|
|
(match, src, style, alt) => { |
|
|
const cleanSrc = cleanSrcPath(src); |
|
|
const cleanAlt = cleanAltText(alt || 'Figure'); |
|
|
hasImages = true; |
|
|
|
|
|
return createFigureComponent(cleanSrc, cleanAlt); |
|
|
} |
|
|
); |
|
|
|
|
|
|
|
|
content = content.replace( |
|
|
/<div class="wrapfigure">\s*r[\d.]+\s*<img src="([^"]*)"[^>]*\/>\s*<\/div>/gs, |
|
|
(match, src) => { |
|
|
const cleanSrc = cleanSrcPath(src); |
|
|
hasImages = true; |
|
|
|
|
|
return createFigureComponent(cleanSrc, 'Figure'); |
|
|
} |
|
|
); |
|
|
|
|
|
|
|
|
content = content.replace( |
|
|
/<figure id="([^"]*)">\s*<img src="([^"]*)" \/>\s*<figcaption>\s*(.*?)\s*<\/figcaption>\s*<\/figure>/gs, |
|
|
(match, id, src, caption) => { |
|
|
const cleanSrc = cleanSrcPath(src); |
|
|
const cleanCap = cleanCaption(caption); |
|
|
const altText = cleanAltText(cleanCap); |
|
|
hasImages = true; |
|
|
|
|
|
return createFigureComponent(cleanSrc, altText, id, cleanCap); |
|
|
} |
|
|
); |
|
|
|
|
|
|
|
|
content = content.replace( |
|
|
/<figure id="([^"]*)">\s*<div class="minipage">\s*<img src="([^"]*)"[^>]*\/>\s*<\/div>\s*<figcaption[^>]*>(.*?)<\/figcaption>\s*<\/figure>/gs, |
|
|
(match, id, src, caption) => { |
|
|
const cleanSrc = cleanSrcPath(src); |
|
|
const cleanCap = cleanCaption(caption); |
|
|
const altText = cleanAltText(cleanCap); |
|
|
hasImages = true; |
|
|
|
|
|
return createFigureComponent(cleanSrc, altText, id, cleanCap); |
|
|
} |
|
|
); |
|
|
|
|
|
|
|
|
content = content.replace( |
|
|
/!\[([^\]]*)\]\(([^)]+)\)(?:\{([^}]+)\})?/g, |
|
|
(match, alt, src, attributes) => { |
|
|
const cleanSrc = cleanSrcPath(src); |
|
|
const cleanAlt = cleanAltText(alt || 'Figure'); |
|
|
hasImages = true; |
|
|
|
|
|
let id = ''; |
|
|
if (attributes) { |
|
|
const idMatch = attributes.match(/#([\w-]+)/); |
|
|
if (idMatch) id = idMatch[1]; |
|
|
} |
|
|
|
|
|
return createFigureComponent(cleanSrc, cleanAlt, id); |
|
|
} |
|
|
); |
|
|
|
|
|
if (hasImages) { |
|
|
console.log(' β
Figure components with imports will be created'); |
|
|
} |
|
|
|
|
|
return content; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function transformStyledSpans(content) { |
|
|
console.log(' π¨ Transforming styled spans...'); |
|
|
|
|
|
|
|
|
content = content.replace( |
|
|
/<span style="color: ([^"]+)">(.*?)<\/span>/g, |
|
|
(match, color, text) => { |
|
|
|
|
|
const colorMap = { |
|
|
'hf2': 'text-hf-secondary', |
|
|
'hf1': 'text-hf-primary' |
|
|
}; |
|
|
|
|
|
const className = colorMap[color] || `text-${color}`; |
|
|
return `<span class="${className}">${text}</span>`; |
|
|
} |
|
|
); |
|
|
|
|
|
|
|
|
content = content.replace( |
|
|
/\[([^\]]+)\]\{style="color: ([^"]+)"\}/g, |
|
|
(match, text, color) => { |
|
|
|
|
|
const colorMap = { |
|
|
'hf2': 'text-hf-secondary', |
|
|
'hf1': 'text-hf-primary' |
|
|
}; |
|
|
|
|
|
const className = colorMap[color] || `text-${color}`; |
|
|
return `<span class="${className}">${text}</span>`; |
|
|
} |
|
|
); |
|
|
|
|
|
return content; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function fixHtmlEscaping(content) { |
|
|
console.log(' π§ Fixing HTML escaping in spans...'); |
|
|
|
|
|
let fixedCount = 0; |
|
|
|
|
|
|
|
|
content = content.replace(/\\<span id="([^"]*)" style="([^"]*)"\\>\\<\/span\\>/g, (match, id, style) => { |
|
|
fixedCount++; |
|
|
|
|
|
const cleanStyle = style.replace('position- absolute;', 'position: absolute;'); |
|
|
return `<span id="${id}" style="${cleanStyle}"></span>`; |
|
|
}); |
|
|
|
|
|
|
|
|
content = content.replace(/\\<span class="([^"]*)"\\>([^\\]+)\\<\/span\\>/g, (match, className, text) => { |
|
|
fixedCount++; |
|
|
|
|
|
let cleanText = text; |
|
|
if (className === 'highlight') { |
|
|
cleanText = text.replace(/^\(\d+\)\s*/, ''); |
|
|
} |
|
|
return `<span class="${className}">${cleanText}</span>`; |
|
|
}); |
|
|
|
|
|
|
|
|
|
|
|
content = content.replace(/<p><span id="([^"]*)" style="([^"]*)"><\/span><\/p>/g, (match, id, style) => { |
|
|
fixedCount++; |
|
|
|
|
|
const cleanStyle = style.replace('position- absolute;', 'position: absolute;'); |
|
|
return `<span id="${id}" style="${cleanStyle}"></span>`; |
|
|
}); |
|
|
|
|
|
|
|
|
|
|
|
content = content.replace(/<p><span class="([^"]*)">([^&]*)<\/span><\/p>/g, (match, className, text) => { |
|
|
fixedCount++; |
|
|
|
|
|
let cleanText = text; |
|
|
if (className === 'highlight') { |
|
|
cleanText = text.replace(/^\(\d+\)\s*/, ''); |
|
|
} |
|
|
return `<span class="${className}">${cleanText}</span>`; |
|
|
}); |
|
|
|
|
|
if (fixedCount > 0) { |
|
|
console.log(` β
Fixed ${fixedCount} escaped span(s)`); |
|
|
} |
|
|
|
|
|
return content; |
|
|
} |
|
|
|
|
|
function cleanHighlightNumbering(content) { |
|
|
console.log(' π’ Removing numbering from highlight spans...'); |
|
|
|
|
|
let cleanedCount = 0; |
|
|
|
|
|
content = content.replace(/<span class="highlight">(\(\d+\)\s*)([^<]+)<\/span>/g, (match, numbering, text) => { |
|
|
cleanedCount++; |
|
|
return `<span class="highlight">${text}</span>`; |
|
|
}); |
|
|
|
|
|
if (cleanedCount > 0) { |
|
|
console.log(` β
Removed numbering from ${cleanedCount} highlight span(s)`); |
|
|
} |
|
|
|
|
|
return content; |
|
|
} |
|
|
|
|
|
function transformReferenceLinks(content) { |
|
|
console.log(' π Transforming reference links...'); |
|
|
|
|
|
|
|
|
return content.replace( |
|
|
/\[([^\]]+)\]\((#[^)]+)\)\{[^}]*reference[^}]*\}/g, |
|
|
(match, text, href) => { |
|
|
return `[${text}](${href})`; |
|
|
} |
|
|
); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function ensureFrontmatter(content, latexContent = '') { |
|
|
console.log(' π Ensuring proper frontmatter...'); |
|
|
|
|
|
if (!content.startsWith('---')) { |
|
|
let frontmatter; |
|
|
|
|
|
if (latexContent) { |
|
|
|
|
|
frontmatter = extractAndGenerateFrontmatter(latexContent); |
|
|
console.log(' β
Generated frontmatter from LaTeX metadata'); |
|
|
} else { |
|
|
|
|
|
const currentDate = new Date().toLocaleDateString('en-US', { |
|
|
year: 'numeric', |
|
|
month: 'short', |
|
|
day: '2-digit' |
|
|
}); |
|
|
frontmatter = `--- |
|
|
title: "Research Article" |
|
|
published: "${currentDate}" |
|
|
tableOfContentsAutoCollapse: true |
|
|
--- |
|
|
|
|
|
`; |
|
|
console.log(' β
Generated basic frontmatter'); |
|
|
} |
|
|
|
|
|
return frontmatter + content; |
|
|
} |
|
|
|
|
|
return content; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function fixMixedMathDelimiters(content) { |
|
|
console.log(' π§ Fixing mixed math delimiters...'); |
|
|
|
|
|
let fixedCount = 0; |
|
|
|
|
|
|
|
|
content = content.replace(/\$`([^`]*)`\$/g, (match, mathContent) => { |
|
|
fixedCount++; |
|
|
return `$${mathContent}$`; |
|
|
}); |
|
|
|
|
|
|
|
|
content = content.replace(/`([^`]*)`\$/g, (match, mathContent) => { |
|
|
fixedCount++; |
|
|
return `$${mathContent}$`; |
|
|
}); |
|
|
|
|
|
|
|
|
content = content.replace(/\$`([^`]*)`(?!\$)/g, (match, mathContent) => { |
|
|
fixedCount++; |
|
|
return `$${mathContent}$`; |
|
|
}); |
|
|
|
|
|
if (fixedCount > 0) { |
|
|
console.log(` β
Fixed ${fixedCount} mixed math delimiter(s)`); |
|
|
} |
|
|
|
|
|
return content; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function cleanOrphanedMathDelimiters(content) { |
|
|
console.log(' π§Ή Cleaning orphaned math delimiters...'); |
|
|
console.log(' π Content length:', content.length, 'chars'); |
|
|
|
|
|
let fixedCount = 0; |
|
|
|
|
|
|
|
|
|
|
|
content = content.replace(/^\$\$\s*$(?!\s*[\s\S]*?\$\$)/gm, () => { |
|
|
fixedCount++; |
|
|
return ''; |
|
|
}); |
|
|
|
|
|
|
|
|
const mathMatches = content.match(/\$\$([\s\S]*?)\$\$/g); |
|
|
console.log(` π Found ${mathMatches ? mathMatches.length : 0} math blocks`); |
|
|
|
|
|
content = content.replace(/\$\$([\s\S]*?)\$\$/g, (match, mathContent) => { |
|
|
|
|
|
let cleanedMath = mathContent; |
|
|
|
|
|
|
|
|
const backticksBefore = (mathContent.match(/`/g) || []).length; |
|
|
|
|
|
if (backticksBefore > 0) { |
|
|
console.log(` π§ Found math block with ${backticksBefore} backtick(s)`); |
|
|
} |
|
|
|
|
|
|
|
|
cleanedMath = cleanedMath.replace(/`/g, ''); |
|
|
|
|
|
const backticksAfter = (cleanedMath.match(/`/g) || []).length; |
|
|
|
|
|
if (backticksBefore > 0) { |
|
|
fixedCount++; |
|
|
console.log(` π§ Removed ${backticksBefore} backtick(s) from math block`); |
|
|
return `$$${cleanedMath}$$`; |
|
|
} |
|
|
return match; |
|
|
}); |
|
|
|
|
|
|
|
|
content = content.replace(/\\begin\{align\}/g, (match) => { |
|
|
fixedCount++; |
|
|
return '\\begin{align}'; |
|
|
}); |
|
|
|
|
|
content = content.replace(/\\end\{align\}/g, (match) => { |
|
|
fixedCount++; |
|
|
return '\\end{align}'; |
|
|
}); |
|
|
|
|
|
|
|
|
|
|
|
content = content.replace(/``` math\s*\n([\s\S]*?)\n```\s*([^`\n]*?)\s*``` math/g, (match, math1, text, math2) => { |
|
|
if (text.trim().length > 0 && !text.includes('```')) { |
|
|
fixedCount++; |
|
|
return '```' + ' math\n' + math1 + '\n```\n\n' + text.trim() + '\n\n```' + ' math'; |
|
|
} |
|
|
return match; |
|
|
}); |
|
|
|
|
|
if (fixedCount > 0) { |
|
|
console.log(` β
Fixed ${fixedCount} orphaned math delimiter(s)`); |
|
|
} |
|
|
|
|
|
return content; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function cleanSingleLineMathNewlines(content) { |
|
|
console.log(' π’ Cleaning newlines in single-dollar math blocks ($...$)...'); |
|
|
|
|
|
let cleanedCount = 0; |
|
|
|
|
|
|
|
|
|
|
|
const cleanedContent = content.replace(/\$(?!\$)([\s\S]*?)\$(?!\$)/g, (match, mathContent) => { |
|
|
|
|
|
if (mathContent.includes('\n')) { |
|
|
cleanedCount++; |
|
|
|
|
|
|
|
|
const cleanedMath = mathContent |
|
|
.replace(/\n+/g, ' ') |
|
|
.replace(/\r+/g, ' ') |
|
|
.replace(/\s+/g, ' ') |
|
|
.trim(); |
|
|
|
|
|
return `$${cleanedMath}$`; |
|
|
} |
|
|
return match; |
|
|
}); |
|
|
|
|
|
if (cleanedCount > 0) { |
|
|
console.log(` β
Cleaned ${cleanedCount} single-dollar math block(s) with newlines`); |
|
|
} |
|
|
|
|
|
return cleanedContent; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function formatDisplayMathBlocks(content) { |
|
|
console.log(' π Formatting display math blocks with proper spacing...'); |
|
|
|
|
|
let formattedCount = 0; |
|
|
|
|
|
|
|
|
|
|
|
const formattedContent = content.replace(/\$\$([\s\S]*?)\$\$/g, (match, mathContent) => { |
|
|
formattedCount++; |
|
|
|
|
|
|
|
|
const cleanedMath = mathContent.trim(); |
|
|
|
|
|
|
|
|
return `\n$$\n${cleanedMath}\n$$\n`; |
|
|
}); |
|
|
|
|
|
if (formattedCount > 0) { |
|
|
console.log(` β
Formatted ${formattedCount} display math block(s) with proper spacing`); |
|
|
} |
|
|
|
|
|
return formattedContent; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function cleanFigcaptionNewlines(content) { |
|
|
console.log(' π Cleaning newlines in figcaption elements...'); |
|
|
|
|
|
let cleanedCount = 0; |
|
|
|
|
|
|
|
|
const cleanedContent = content.replace(/<figcaption([^>]*)>([\s\S]*?)<\/figcaption>/g, (match, attributes, captionContent) => { |
|
|
|
|
|
if (captionContent.includes('\n')) { |
|
|
cleanedCount++; |
|
|
|
|
|
|
|
|
const cleanedCaption = captionContent |
|
|
.replace(/\n+/g, ' ') |
|
|
.replace(/\s+/g, ' ') |
|
|
.trim(); |
|
|
|
|
|
return `<figcaption${attributes}>${cleanedCaption}</figcaption>`; |
|
|
} |
|
|
|
|
|
return match; |
|
|
}); |
|
|
|
|
|
if (cleanedCount > 0) { |
|
|
console.log(` β
Cleaned ${cleanedCount} figcaption element(s)`); |
|
|
} else { |
|
|
console.log(` βΉοΈ No figcaption elements with newlines found`); |
|
|
} |
|
|
|
|
|
return cleanedContent; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function removeHtmlComments(content) { |
|
|
console.log(' ποΈ Removing HTML comments...'); |
|
|
|
|
|
let removedCount = 0; |
|
|
|
|
|
|
|
|
const cleanedContent = content.replace(/<!--[\s\S]*?-->/g, () => { |
|
|
removedCount++; |
|
|
return ''; |
|
|
}); |
|
|
|
|
|
if (removedCount > 0) { |
|
|
console.log(` β
Removed ${removedCount} HTML comment(s)`); |
|
|
} |
|
|
|
|
|
return cleanedContent; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function cleanMdxSyntax(content) { |
|
|
console.log(' π§Ή Cleaning MDX syntax...'); |
|
|
|
|
|
return content |
|
|
|
|
|
|
|
|
.replace(/>\s*</g, '>\n<') |
|
|
|
|
|
.replace(/^(#{1,6}\s+[^{#\n]+)\{[^}]+\}$/gm, '$1') |
|
|
|
|
|
.replace(/\\("|')/g, '$1'); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function processMdxContent(content, latexContent = '') { |
|
|
console.log('π§ Processing for Astro MDX compatibility...'); |
|
|
|
|
|
|
|
|
usedComponents.clear(); |
|
|
imageImports.clear(); |
|
|
|
|
|
let processedContent = content; |
|
|
|
|
|
|
|
|
processedContent = ensureFrontmatter(processedContent, latexContent); |
|
|
processedContent = fixMixedMathDelimiters(processedContent); |
|
|
|
|
|
|
|
|
const mathBlocksAfterMixed = (processedContent.match(/\$\$([\s\S]*?)\$\$/g) || []).length; |
|
|
console.log(` π Math blocks after mixed delimiters fix: ${mathBlocksAfterMixed}`); |
|
|
|
|
|
processedContent = cleanOrphanedMathDelimiters(processedContent); |
|
|
processedContent = cleanSingleLineMathNewlines(processedContent); |
|
|
processedContent = formatDisplayMathBlocks(processedContent); |
|
|
processedContent = removeHtmlComments(processedContent); |
|
|
processedContent = cleanMdxSyntax(processedContent); |
|
|
processedContent = convertSubfiguresToMultiFigure(processedContent); |
|
|
processedContent = transformImages(processedContent); |
|
|
processedContent = transformStyledSpans(processedContent); |
|
|
processedContent = transformReferenceLinks(processedContent); |
|
|
processedContent = fixHtmlEscaping(processedContent); |
|
|
processedContent = cleanHighlightNumbering(processedContent); |
|
|
processedContent = cleanFigcaptionNewlines(processedContent); |
|
|
|
|
|
|
|
|
processedContent = addComponentImports(processedContent); |
|
|
|
|
|
return processedContent; |
|
|
} |
|
|
|
|
|
function convertToMdx(inputFile, outputFile) { |
|
|
console.log('π Modular Markdown to Astro MDX Converter'); |
|
|
console.log(`π Input: ${inputFile}`); |
|
|
console.log(`π Output: ${outputFile}`); |
|
|
|
|
|
|
|
|
if (!existsSync(inputFile)) { |
|
|
console.error(`β Input file not found: ${inputFile}`); |
|
|
process.exit(1); |
|
|
} |
|
|
|
|
|
try { |
|
|
console.log('π Reading Markdown file...'); |
|
|
const markdownContent = readFileSync(inputFile, 'utf8'); |
|
|
|
|
|
|
|
|
let latexContent = ''; |
|
|
try { |
|
|
const inputDir = dirname(inputFile); |
|
|
const latexFile = join(inputDir, '..', 'input', 'main.tex'); |
|
|
if (existsSync(latexFile)) { |
|
|
latexContent = readFileSync(latexFile, 'utf8'); |
|
|
} |
|
|
} catch (error) { |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
const mdxContent = processMdxContent(markdownContent, latexContent); |
|
|
|
|
|
console.log('πΎ Writing MDX file...'); |
|
|
writeFileSync(outputFile, mdxContent); |
|
|
|
|
|
console.log(`β
Conversion completed: ${outputFile}`); |
|
|
|
|
|
|
|
|
const inputSize = Math.round(markdownContent.length / 1024); |
|
|
const outputSize = Math.round(mdxContent.length / 1024); |
|
|
console.log(`π Input: ${inputSize}KB β Output: ${outputSize}KB`); |
|
|
|
|
|
} catch (error) { |
|
|
console.error('β Conversion failed:'); |
|
|
console.error(error.message); |
|
|
process.exit(1); |
|
|
} |
|
|
} |
|
|
|
|
|
export { convertToMdx }; |
|
|
|
|
|
function main() { |
|
|
const config = parseArgs(); |
|
|
convertToMdx(config.input, config.output); |
|
|
console.log('π MDX conversion completed!'); |
|
|
} |
|
|
|
|
|
if (import.meta.url === `file://${process.argv[1]}`) { |
|
|
main(); |
|
|
} |
|
|
|