|
|
#!/usr/bin/env node |
|
|
|
|
|
import { readFileSync, writeFileSync, existsSync, readdirSync } from 'fs'; |
|
|
import { join, dirname } from 'path'; |
|
|
import { fileURLToPath } from 'url'; |
|
|
|
|
|
const __filename = fileURLToPath(import.meta.url); |
|
|
const __dirname = dirname(__filename); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function removeTexGroupingCommands(content) { |
|
|
console.log(' π§Ή Removing TeX grouping commands...'); |
|
|
|
|
|
return content |
|
|
.replace(/\\mathopen\{\}\\mathclose\\bgroup/g, '') |
|
|
.replace(/\\aftergroup\\egroup/g, '') |
|
|
.replace(/\\bgroup/g, '') |
|
|
.replace(/\\egroup/g, ''); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function simplifyLatexDelimiters(content) { |
|
|
console.log(' π§ Simplifying LaTeX delimiters...'); |
|
|
|
|
|
return content |
|
|
.replace(/\\left\[\s*/g, '[') |
|
|
.replace(/\s*\\right\]/g, ']'); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function removeOrphanedLabels(content) { |
|
|
console.log(' π·οΈ Removing orphaned labels...'); |
|
|
|
|
|
return content |
|
|
.replace(/^\s*\\label\{[^}]+\}\s*$/gm, '') |
|
|
.replace(/\\label\{[^}]+\}/g, ''); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function fixMathCommands(content) { |
|
|
console.log(' π Fixing KaTeX-incompatible math commands...'); |
|
|
|
|
|
return content |
|
|
|
|
|
.replace(/\\hdots/g, '\\ldots') |
|
|
|
|
|
.replace(/\\vdots/g, '\\vdots'); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function fixMatrixCommands(content) { |
|
|
console.log(' π’ Converting matrix commands to KaTeX format...'); |
|
|
|
|
|
let fixedCount = 0; |
|
|
|
|
|
|
|
|
content = content.replace(/\\pmatrix\{([^{}]*(?:\{[^{}]*\}[^{}]*)*)\}/g, (match, matrixContent) => { |
|
|
fixedCount++; |
|
|
|
|
|
const rows = matrixContent.split('\\\\').map(row => row.trim()).filter(row => row); |
|
|
return `\\begin{pmatrix}\n${rows.join(' \\\\\n')}\n\\end{pmatrix}`; |
|
|
}); |
|
|
|
|
|
|
|
|
content = content.replace(/\\bmatrix\{([^{}]*(?:\{[^{}]*\}[^{}]*)*)\}/g, (match, matrixContent) => { |
|
|
fixedCount++; |
|
|
const rows = matrixContent.split('\\\\').map(row => row.trim()).filter(row => row); |
|
|
return `\\begin{bmatrix}\n${rows.join(' \\\\\n')}\n\\end{bmatrix}`; |
|
|
}); |
|
|
|
|
|
|
|
|
content = content.replace(/\\vmatrix\{([^{}]*(?:\{[^{}]*\}[^{}]*)*)\}/g, (match, matrixContent) => { |
|
|
fixedCount++; |
|
|
const rows = matrixContent.split('\\\\').map(row => row.trim()).filter(row => row); |
|
|
return `\\begin{vmatrix}\n${rows.join(' \\\\\n')}\n\\end{vmatrix}`; |
|
|
}); |
|
|
|
|
|
if (fixedCount > 0) { |
|
|
console.log(` β
Fixed ${fixedCount} matrix command(s)`); |
|
|
} |
|
|
|
|
|
return content; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function fixUnicodeIssues(content) { |
|
|
console.log(' π Fixing Unicode characters for MDX compatibility...'); |
|
|
|
|
|
return content |
|
|
|
|
|
.replace(/\$([^$]*?)Β·([^$]*?)\$/g, (match, before, after) => { |
|
|
return `$${before}\\cdot${after}$`; |
|
|
}) |
|
|
|
|
|
.replace(/\$\$([^$]*?)Β·([^$]*?)\$\$/g, (match, before, after) => { |
|
|
return `$$${before}\\cdot${after}$$`; |
|
|
}) |
|
|
|
|
|
.replace(/[""]/g, '"') |
|
|
.replace(/['']/g, "'") |
|
|
.replace(/β¦/g, '...') |
|
|
.replace(/β/g, '-') |
|
|
.replace(/β/g, '--'); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function fixMultilineMath(content) { |
|
|
console.log(' π Fixing multiline math expressions for MDX...'); |
|
|
|
|
|
return content |
|
|
|
|
|
|
|
|
.replace(/\$([^$\n]*\\\\[^$\n]*)\$/g, (match, mathContent) => { |
|
|
|
|
|
if (mathContent.includes('\\\\') && /[=+\-*/^_{}]/.test(mathContent)) { |
|
|
|
|
|
const cleanedMath = mathContent |
|
|
.replace(/^\s+|\s+$/g, '') |
|
|
.replace(/\s*\\\\\s*/g, '\\\\\n '); |
|
|
return `$$\n${cleanedMath}\n$$`; |
|
|
} |
|
|
return match; |
|
|
}) |
|
|
|
|
|
.replace(/\$\$\s*\n\s*([^$]+?)\s*\n\s*\$\$/g, (match, mathContent) => { |
|
|
return `\n$$\n${mathContent.trim()}\n$$\n`; |
|
|
}); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function injectCodeSnippets(content, inputDir = null) { |
|
|
console.log(' π» Injecting code snippets...'); |
|
|
|
|
|
if (!inputDir) { |
|
|
console.log(' β οΈ No input directory provided, skipping code injection'); |
|
|
return content; |
|
|
} |
|
|
|
|
|
const snippetsDir = join(inputDir, 'snippets'); |
|
|
|
|
|
if (!existsSync(snippetsDir)) { |
|
|
console.log(' β οΈ Snippets directory not found, skipping code injection'); |
|
|
return content; |
|
|
} |
|
|
|
|
|
|
|
|
let availableSnippets = []; |
|
|
try { |
|
|
availableSnippets = readdirSync(snippetsDir); |
|
|
console.log(` π Found ${availableSnippets.length} snippet file(s): ${availableSnippets.join(', ')}`); |
|
|
} catch (error) { |
|
|
console.log(` β Error reading snippets directory: ${error.message}`); |
|
|
return content; |
|
|
} |
|
|
|
|
|
|
|
|
const emptyCodeBlockPattern = /```\s*(\w+)\s*\n\s*```/g; |
|
|
|
|
|
let processedContent = content; |
|
|
let injectionCount = 0; |
|
|
|
|
|
processedContent = processedContent.replace(emptyCodeBlockPattern, (match, language) => { |
|
|
|
|
|
const extensionMap = { |
|
|
'python': 'py', |
|
|
'javascript': 'js', |
|
|
'typescript': 'ts', |
|
|
'bash': 'sh', |
|
|
'shell': 'sh' |
|
|
}; |
|
|
|
|
|
const fileExtension = extensionMap[language] || language; |
|
|
|
|
|
|
|
|
const matchingFiles = availableSnippets.filter(file => |
|
|
file.endsWith(`.${fileExtension}`) |
|
|
); |
|
|
|
|
|
if (matchingFiles.length === 0) { |
|
|
console.log(` β οΈ No ${language} snippet found (looking for .${fileExtension})`); |
|
|
return match; |
|
|
} |
|
|
|
|
|
|
|
|
const selectedFile = matchingFiles[0]; |
|
|
const snippetPath = join(snippetsDir, selectedFile); |
|
|
|
|
|
try { |
|
|
const snippetContent = readFileSync(snippetPath, 'utf8'); |
|
|
injectionCount++; |
|
|
console.log(` β
Injected: ${selectedFile}`); |
|
|
return `\`\`\`${language}\n${snippetContent.trim()}\n\`\`\``; |
|
|
} catch (error) { |
|
|
console.log(` β Error reading ${selectedFile}: ${error.message}`); |
|
|
return match; |
|
|
} |
|
|
}); |
|
|
|
|
|
if (injectionCount > 0) { |
|
|
console.log(` π Injected ${injectionCount} code snippet(s)`); |
|
|
} |
|
|
|
|
|
return processedContent; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function fixAllAttributes(content) { |
|
|
console.log(' π Fixing all attributes with colons...'); |
|
|
|
|
|
let fixedCount = 0; |
|
|
|
|
|
|
|
|
content = content.replace(/href="([^"]*):([^"]*)"/g, (match, before, after) => { |
|
|
fixedCount++; |
|
|
return `href="${before}-${after}"`; |
|
|
}); |
|
|
|
|
|
|
|
|
content = content.replace(/data-reference="([^"]*):([^"]*)"/g, (match, before, after) => { |
|
|
fixedCount++; |
|
|
return `data-reference="${before}-${after}"`; |
|
|
}); |
|
|
|
|
|
|
|
|
content = content.replace(/id="([^"]*):([^"]*)"/g, (match, before, after) => { |
|
|
fixedCount++; |
|
|
return `id="${before}-${after}"`; |
|
|
}); |
|
|
|
|
|
if (fixedCount > 0) { |
|
|
console.log(` β
Fixed ${fixedCount} attribute(s) with colons`); |
|
|
} |
|
|
|
|
|
return content; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function fixLinkTextContent(content) { |
|
|
console.log(' π Fixing link text content with colons...'); |
|
|
|
|
|
let fixedCount = 0; |
|
|
|
|
|
|
|
|
|
|
|
const cleanedContent = content.replace(/<a([^>]*)>\[([^:]*):([^\]]*)\]<\/a>/g, (match, attributes, before, after) => { |
|
|
fixedCount++; |
|
|
return `<a${attributes}>[${before}-${after}]</a>`; |
|
|
}); |
|
|
|
|
|
if (fixedCount > 0) { |
|
|
console.log(` β
Fixed ${fixedCount} link text(s) with colons`); |
|
|
} |
|
|
|
|
|
return cleanedContent; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function convertAlignAnchors(content) { |
|
|
console.log(' π·οΈ Converting align anchor markers to HTML spans...'); |
|
|
|
|
|
let convertedCount = 0; |
|
|
|
|
|
|
|
|
content = content.replace(/``` math\n%%ALIGN_ANCHOR_ID\{([^}]+)\}%%\n([\s\S]*?)\n```/g, (match, anchorId, mathContent) => { |
|
|
convertedCount++; |
|
|
return `<span id="${anchorId}" style="position: absolute;"></span>\n\n\`\`\` math\n${mathContent}\n\`\`\``; |
|
|
}); |
|
|
|
|
|
if (convertedCount > 0) { |
|
|
console.log(` β
Converted ${convertedCount} align anchor marker(s) to spans`); |
|
|
} |
|
|
|
|
|
return content; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
export function postProcessMarkdown(content, inputDir = null) { |
|
|
console.log('π§ Post-processing for KaTeX compatibility...'); |
|
|
|
|
|
let processedContent = content; |
|
|
|
|
|
|
|
|
processedContent = removeTexGroupingCommands(processedContent); |
|
|
processedContent = simplifyLatexDelimiters(processedContent); |
|
|
processedContent = removeOrphanedLabels(processedContent); |
|
|
processedContent = convertAlignAnchors(processedContent); |
|
|
processedContent = fixMathCommands(processedContent); |
|
|
processedContent = fixMatrixCommands(processedContent); |
|
|
processedContent = fixUnicodeIssues(processedContent); |
|
|
processedContent = fixMultilineMath(processedContent); |
|
|
processedContent = fixAllAttributes(processedContent); |
|
|
processedContent = fixLinkTextContent(processedContent); |
|
|
|
|
|
|
|
|
if (inputDir) { |
|
|
processedContent = injectCodeSnippets(processedContent, inputDir); |
|
|
} |
|
|
|
|
|
return processedContent; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function parseArgs() { |
|
|
const args = process.argv.slice(2); |
|
|
const config = { |
|
|
input: join(__dirname, 'output', 'main.md'), |
|
|
output: null, |
|
|
verbose: false, |
|
|
}; |
|
|
|
|
|
for (const arg of args) { |
|
|
if (arg.startsWith('--input=')) { |
|
|
config.input = arg.substring('--input='.length); |
|
|
} else if (arg.startsWith('--output=')) { |
|
|
config.output = arg.substring('--output='.length); |
|
|
} else if (arg === '--verbose') { |
|
|
config.verbose = true; |
|
|
} else if (arg === '--help' || arg === '-h') { |
|
|
console.log(` |
|
|
π§ Markdown Post-Processor |
|
|
|
|
|
Usage: |
|
|
node post-processor.mjs [options] |
|
|
|
|
|
Options: |
|
|
--input=PATH Input Markdown file (default: output/main.md) |
|
|
--output=PATH Output file (default: overwrites input) |
|
|
--verbose Verbose output |
|
|
--help, -h Show this help |
|
|
|
|
|
Examples: |
|
|
# Process main.md in-place |
|
|
node post-processor.mjs |
|
|
|
|
|
# Process with custom paths |
|
|
node post-processor.mjs --input=raw.md --output=clean.md |
|
|
`); |
|
|
process.exit(0); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
if (!config.output) { |
|
|
config.output = config.input; |
|
|
} |
|
|
|
|
|
return config; |
|
|
} |
|
|
|
|
|
function main() { |
|
|
const config = parseArgs(); |
|
|
|
|
|
console.log('π§ Markdown Post-Processor'); |
|
|
console.log(`π Input: ${config.input}`); |
|
|
console.log(`π Output: ${config.output}`); |
|
|
|
|
|
try { |
|
|
const content = readFileSync(config.input, 'utf8'); |
|
|
const processedContent = postProcessMarkdown(content); |
|
|
|
|
|
writeFileSync(config.output, processedContent); |
|
|
|
|
|
console.log(`β
Post-processing completed: ${config.output}`); |
|
|
|
|
|
|
|
|
if (config.verbose) { |
|
|
const originalLines = content.split('\n').length; |
|
|
const processedLines = processedContent.split('\n').length; |
|
|
console.log(`π Lines: ${originalLines} β ${processedLines}`); |
|
|
} |
|
|
|
|
|
} catch (error) { |
|
|
console.error('β Post-processing failed:'); |
|
|
console.error(error.message); |
|
|
process.exit(1); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
if (import.meta.url === `file://${process.argv[1]}`) { |
|
|
main(); |
|
|
} |
|
|
|