|
|
#!/usr/bin/env node |
|
|
|
|
|
import { execSync } from 'child_process'; |
|
|
import { readFileSync, writeFileSync, existsSync, mkdirSync } from 'fs'; |
|
|
import { join, dirname, basename } from 'path'; |
|
|
import { fileURLToPath } from 'url'; |
|
|
import { cleanBibliography } from './bib-cleaner.mjs'; |
|
|
import { postProcessMarkdown } from './post-processor.mjs'; |
|
|
import { preprocessLatexReferences } from './reference-preprocessor.mjs'; |
|
|
|
|
|
const __filename = fileURLToPath(import.meta.url); |
|
|
const __dirname = dirname(__filename); |
|
|
|
|
|
|
|
|
const DEFAULT_INPUT = join(__dirname, 'input', 'main.tex'); |
|
|
const DEFAULT_OUTPUT = join(__dirname, 'output'); |
|
|
|
|
|
function parseArgs() { |
|
|
const args = process.argv.slice(2); |
|
|
const config = { |
|
|
input: DEFAULT_INPUT, |
|
|
output: DEFAULT_OUTPUT, |
|
|
clean: false |
|
|
}; |
|
|
|
|
|
for (const arg of args) { |
|
|
if (arg.startsWith('--input=')) { |
|
|
config.input = arg.split('=')[1]; |
|
|
} else if (arg.startsWith('--output=')) { |
|
|
config.output = arg.split('=')[1]; |
|
|
} else if (arg === '--clean') { |
|
|
config.clean = true; |
|
|
} |
|
|
} |
|
|
|
|
|
return config; |
|
|
} |
|
|
|
|
|
function ensureDirectory(dir) { |
|
|
if (!existsSync(dir)) { |
|
|
mkdirSync(dir, { recursive: true }); |
|
|
} |
|
|
} |
|
|
|
|
|
function cleanDirectory(dir) { |
|
|
if (existsSync(dir)) { |
|
|
execSync(`rm -rf "${dir}"/*`, { stdio: 'inherit' }); |
|
|
} |
|
|
} |
|
|
|
|
|
function preprocessLatexFile(inputFile, outputDir) { |
|
|
const inputDir = dirname(inputFile); |
|
|
const tempFile = join(outputDir, 'temp_main.tex'); |
|
|
|
|
|
console.log('π Preprocessing LaTeX file to resolve \\input commands...'); |
|
|
|
|
|
let content = readFileSync(inputFile, 'utf8'); |
|
|
|
|
|
|
|
|
console.log('π§Ή Cleaning problematic LaTeX constructs...'); |
|
|
|
|
|
|
|
|
content = content.replace(/\$p_0\$(?![A-Za-z])/g, 'p0'); |
|
|
|
|
|
|
|
|
content = content.replace(/\$\$\\begin\{equation\*\}/g, '$$'); |
|
|
content = content.replace(/\\end\{equation\*\}\$\$/g, '$$'); |
|
|
content = content.replace(/\\begin\{equation\*\}/g, '$$'); |
|
|
content = content.replace(/\\end\{equation\*\}/g, '$$'); |
|
|
|
|
|
|
|
|
const alignBlocks = []; |
|
|
content = content.replace(/\\begin\{align\}([\s\S]*?)\\end\{align\}/g, (match, alignContent) => { |
|
|
alignBlocks.push(match); |
|
|
return `__ALIGN_BLOCK_${alignBlocks.length - 1}__`; |
|
|
}); |
|
|
|
|
|
|
|
|
content = content.replace(/&=/g, '='); |
|
|
content = content.replace(/&/g, ''); |
|
|
|
|
|
|
|
|
alignBlocks.forEach((block, index) => { |
|
|
content = content.replace(`__ALIGN_BLOCK_${index}__`, block); |
|
|
}); |
|
|
|
|
|
|
|
|
content = content.replace(/\\cite[tp]?\{([^}]+)\}/g, (match, citations) => { |
|
|
|
|
|
return citations.split(',').map(cite => `@${cite.trim()}`).join(', '); |
|
|
}); |
|
|
|
|
|
|
|
|
content = content.replace(/\\textsc\{([^{}]*(?:\{[^{}]*\}[^{}]*)*)\}/g, (match, content_inside, offset) => { |
|
|
|
|
|
const before = content.substring(Math.max(0, offset - 50), offset); |
|
|
if (before.includes('\\newcommand') || before.includes('\\renewcommand') || before.includes('\\def')) { |
|
|
return match; |
|
|
} |
|
|
|
|
|
|
|
|
const simplified = content_inside.replace(/\\\([^)]+\\\)/g, 'MATHEXPR'); |
|
|
return `\\text{${simplified}}`; |
|
|
}); |
|
|
|
|
|
|
|
|
content = content.replace(/\\input\{snippets\/[^}]+\}/g, '% Code snippet removed'); |
|
|
|
|
|
|
|
|
const inputRegex = /^([^%]*?)\\input\{([^}]+)\}/gm; |
|
|
let match; |
|
|
|
|
|
while ((match = inputRegex.exec(content)) !== null) { |
|
|
const beforeInput = match[1]; |
|
|
const inputPath = match[2]; |
|
|
|
|
|
|
|
|
if (beforeInput.includes('%')) { |
|
|
continue; |
|
|
} |
|
|
let fullPath; |
|
|
|
|
|
|
|
|
if (inputPath.includes('snippets/')) { |
|
|
console.log(` Skipping: ${inputPath}`); |
|
|
content = content.replace(`\\input{${inputPath}}`, `% Skipped: ${inputPath}`); |
|
|
continue; |
|
|
} |
|
|
|
|
|
|
|
|
if (inputPath.endsWith('.tex')) { |
|
|
fullPath = join(inputDir, inputPath); |
|
|
} else { |
|
|
fullPath = join(inputDir, inputPath + '.tex'); |
|
|
} |
|
|
|
|
|
if (existsSync(fullPath)) { |
|
|
console.log(` Including: ${inputPath}`); |
|
|
let includedContent = readFileSync(fullPath, 'utf8'); |
|
|
|
|
|
|
|
|
includedContent = includedContent.replace(/\$p_0\$/g, 'p0'); |
|
|
includedContent = includedContent.replace(/\\input\{snippets\/[^}]+\}/g, '% Code snippet removed'); |
|
|
|
|
|
|
|
|
includedContent = includedContent.replace(/\\textsc\{([^{}]*(?:\{[^{}]*\}[^{}]*)*)\}/g, (match, content_inside, offset) => { |
|
|
|
|
|
const before = includedContent.substring(Math.max(0, offset - 50), offset); |
|
|
if (before.includes('\\newcommand') || before.includes('\\renewcommand') || before.includes('\\def')) { |
|
|
return match; |
|
|
} |
|
|
|
|
|
const simplified = content_inside.replace(/\\\([^)]+\\\)/g, 'MATHEXPR'); |
|
|
return `\\text{${simplified}}`; |
|
|
}); |
|
|
|
|
|
|
|
|
const alignBlocksIncluded = []; |
|
|
includedContent = includedContent.replace(/\\begin\{align\}([\s\S]*?)\\end\{align\}/g, (match, alignContent) => { |
|
|
alignBlocksIncluded.push(match); |
|
|
return `__ALIGN_BLOCK_${alignBlocksIncluded.length - 1}__`; |
|
|
}); |
|
|
|
|
|
|
|
|
includedContent = includedContent.replace(/&=/g, '='); |
|
|
includedContent = includedContent.replace(/&/g, ''); |
|
|
|
|
|
|
|
|
alignBlocksIncluded.forEach((block, index) => { |
|
|
includedContent = includedContent.replace(`__ALIGN_BLOCK_${index}__`, block); |
|
|
}); |
|
|
|
|
|
|
|
|
includedContent = includedContent.replace(/\$\$\\begin\{equation\*\}/g, '$$'); |
|
|
includedContent = includedContent.replace(/\\end\{equation\*\}\$\$/g, '$$'); |
|
|
includedContent = includedContent.replace(/\\begin\{equation\*\}/g, '$$'); |
|
|
includedContent = includedContent.replace(/\\end\{equation\*\}/g, '$$'); |
|
|
|
|
|
|
|
|
includedContent = includedContent.replace(/\\cite[tp]?\{([^}]+)\}/g, (match, citations) => { |
|
|
return citations.split(',').map(cite => `@${cite.trim()}`).join(', '); |
|
|
}); |
|
|
|
|
|
content = content.replace(`\\input{${inputPath}}`, includedContent); |
|
|
} else { |
|
|
console.log(` β οΈ File not found: ${fullPath} (skipping)`); |
|
|
content = content.replace(`\\input{${inputPath}}`, `% File not found: ${inputPath}`); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
console.log('π§ Preprocessing LaTeX references for MDX compatibility...'); |
|
|
const referenceResult = preprocessLatexReferences(content); |
|
|
content = referenceResult.content; |
|
|
|
|
|
|
|
|
writeFileSync(tempFile, content); |
|
|
return tempFile; |
|
|
} |
|
|
|
|
|
function processBibliography(inputFile, outputDir) { |
|
|
const bibFile = join(dirname(inputFile), 'main.bib'); |
|
|
const outputBibFile = join(outputDir, 'main.bib'); |
|
|
|
|
|
if (!existsSync(bibFile)) { |
|
|
console.log(' β οΈ No bibliography file found'); |
|
|
return null; |
|
|
} |
|
|
|
|
|
const success = cleanBibliography(bibFile, outputBibFile); |
|
|
return success ? outputBibFile : null; |
|
|
} |
|
|
|
|
|
export function convertLatexToMarkdown(inputFile, outputDir) { |
|
|
console.log('π Simple LaTeX to Markdown Converter'); |
|
|
console.log(`π Input: ${inputFile}`); |
|
|
console.log(`π Output: ${outputDir}`); |
|
|
|
|
|
|
|
|
if (!existsSync(inputFile)) { |
|
|
console.error(`β Input file not found: ${inputFile}`); |
|
|
process.exit(1); |
|
|
} |
|
|
|
|
|
|
|
|
ensureDirectory(outputDir); |
|
|
|
|
|
try { |
|
|
|
|
|
execSync('pandoc --version', { stdio: 'pipe' }); |
|
|
} catch (error) { |
|
|
console.error('β Pandoc not found. Please install it: brew install pandoc'); |
|
|
process.exit(1); |
|
|
} |
|
|
|
|
|
|
|
|
const cleanBibFile = processBibliography(inputFile, outputDir); |
|
|
|
|
|
|
|
|
const preprocessedFile = preprocessLatexFile(inputFile, outputDir); |
|
|
|
|
|
const inputFileName = basename(inputFile, '.tex'); |
|
|
const outputFile = join(outputDir, `${inputFileName}.md`); |
|
|
|
|
|
try { |
|
|
console.log('π Converting with Pandoc...'); |
|
|
|
|
|
|
|
|
const bibOption = cleanBibFile ? `--bibliography="${cleanBibFile}"` : ''; |
|
|
|
|
|
|
|
|
const mediaDir = join(outputDir, 'assets', 'image'); |
|
|
ensureDirectory(mediaDir); |
|
|
const inputDir = dirname(inputFile); |
|
|
const equationFilterPath = join(__dirname, 'filters', 'equation-ids.lua'); |
|
|
const pandocCommand = `pandoc "${preprocessedFile}" -f latex+latex_macros -t gfm+tex_math_dollars+raw_html --shift-heading-level-by=1 --wrap=none ${bibOption} --extract-media="${mediaDir}" --resource-path="${inputDir}" --lua-filter="${equationFilterPath}" -o "${outputFile}"`; |
|
|
|
|
|
console.log(` Running: ${pandocCommand}`); |
|
|
execSync(pandocCommand, { stdio: 'pipe' }); |
|
|
|
|
|
|
|
|
execSync(`rm "${preprocessedFile}"`, { stdio: 'pipe' }); |
|
|
|
|
|
|
|
|
let markdownContent = readFileSync(outputFile, 'utf8'); |
|
|
|
|
|
|
|
|
markdownContent = postProcessMarkdown(markdownContent, inputDir); |
|
|
|
|
|
writeFileSync(outputFile, markdownContent); |
|
|
|
|
|
console.log(`β
Conversion completed: ${outputFile}`); |
|
|
|
|
|
|
|
|
const stats = execSync(`wc -l "${outputFile}"`, { encoding: 'utf8' }); |
|
|
const lines = stats.trim().split(' ')[0]; |
|
|
console.log(`π Result: ${lines} lines written`); |
|
|
|
|
|
} catch (error) { |
|
|
console.error('β Pandoc conversion failed:'); |
|
|
console.error(error.message); |
|
|
|
|
|
try { |
|
|
execSync(`rm "${preprocessedFile}"`, { stdio: 'pipe' }); |
|
|
} catch { } |
|
|
process.exit(1); |
|
|
} |
|
|
} |
|
|
|
|
|
function main() { |
|
|
const config = parseArgs(); |
|
|
|
|
|
if (config.clean) { |
|
|
console.log('π§Ή Cleaning output directory...'); |
|
|
cleanDirectory(config.output); |
|
|
} |
|
|
|
|
|
convertLatexToMarkdown(config.input, config.output); |
|
|
|
|
|
console.log('π Simple conversion completed!'); |
|
|
} |
|
|
|
|
|
|
|
|
if (process.argv.includes('--help') || process.argv.includes('-h')) { |
|
|
console.log(` |
|
|
π Simple LaTeX to Markdown Converter |
|
|
|
|
|
Usage: |
|
|
node scripts/simple-latex-to-markdown.mjs [options] |
|
|
|
|
|
Options: |
|
|
--input=PATH Input LaTeX file (default: latex-converter/input-example/main.tex) |
|
|
--output=PATH Output directory (default: output/) |
|
|
--clean Clean output directory before conversion |
|
|
--help, -h Show this help |
|
|
|
|
|
Examples: |
|
|
# Basic conversion |
|
|
node scripts/simple-latex-to-markdown.mjs |
|
|
|
|
|
# Custom paths |
|
|
node scripts/simple-latex-to-markdown.mjs --input=my-paper.tex --output=converted/ |
|
|
|
|
|
# Clean output first |
|
|
node scripts/simple-latex-to-markdown.mjs --clean |
|
|
`); |
|
|
process.exit(0); |
|
|
} |
|
|
|
|
|
main(); |
|
|
|