|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
export function extractLatexMetadata(latexContent) { |
|
|
const metadata = {}; |
|
|
|
|
|
|
|
|
const titleMatch = latexContent.match(/\\title\s*\{\s*([^}]+)\s*\}/s); |
|
|
if (titleMatch) { |
|
|
metadata.title = titleMatch[1] |
|
|
.replace(/\n/g, ' ') |
|
|
.trim(); |
|
|
} |
|
|
|
|
|
|
|
|
const authors = []; |
|
|
const authorMatches = latexContent.matchAll(/\\authorOne\[[^\]]*\]\{([^}]+)\}/g); |
|
|
|
|
|
for (const match of authorMatches) { |
|
|
const fullAuthorInfo = match[1]; |
|
|
|
|
|
|
|
|
const affiliations = []; |
|
|
if (fullAuthorInfo.includes('\\ensps')) { |
|
|
affiliations.push(1); |
|
|
} |
|
|
if (fullAuthorInfo.includes('\\hf')) { |
|
|
affiliations.push(2); |
|
|
} |
|
|
|
|
|
|
|
|
let authorName = fullAuthorInfo |
|
|
.replace(/\\ensps/g, '') |
|
|
.replace(/\\hf/g, '') |
|
|
.replace(/\s+/g, ' ') |
|
|
.trim(); |
|
|
|
|
|
|
|
|
if (authorName && authorName !== '...') { |
|
|
authors.push({ |
|
|
name: authorName, |
|
|
affiliations: affiliations.length > 0 ? affiliations : [2] |
|
|
}); |
|
|
} |
|
|
} |
|
|
|
|
|
if (authors.length > 0) { |
|
|
metadata.authors = authors; |
|
|
} |
|
|
|
|
|
|
|
|
metadata.affiliations = [ |
|
|
{ |
|
|
name: "École Normale Supérieure Paris-Saclay" |
|
|
}, |
|
|
{ |
|
|
name: "Hugging Face" |
|
|
} |
|
|
]; |
|
|
|
|
|
|
|
|
const datePatterns = [ |
|
|
/\\date\s*\{([^}]+)\}/, |
|
|
/\\newcommand\s*\{\\date\}\s*\{([^}]+)\}/, |
|
|
]; |
|
|
|
|
|
for (const pattern of datePatterns) { |
|
|
const dateMatch = latexContent.match(pattern); |
|
|
if (dateMatch) { |
|
|
metadata.published = dateMatch[1].trim(); |
|
|
break; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
if (!metadata.published) { |
|
|
metadata.published = new Date().toLocaleDateString('en-US', { |
|
|
year: 'numeric', |
|
|
month: 'short', |
|
|
day: '2-digit' |
|
|
}); |
|
|
} |
|
|
|
|
|
return metadata; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
export function generateFrontmatter(metadata) { |
|
|
let frontmatter = '---\n'; |
|
|
|
|
|
|
|
|
if (metadata.title) { |
|
|
frontmatter += `title: "${metadata.title}"\n`; |
|
|
} |
|
|
|
|
|
|
|
|
if (metadata.authors && metadata.authors.length > 0) { |
|
|
frontmatter += 'authors:\n'; |
|
|
metadata.authors.forEach(author => { |
|
|
frontmatter += ` - name: "${author.name}"\n`; |
|
|
if (author.url) { |
|
|
frontmatter += ` url: "${author.url}"\n`; |
|
|
} |
|
|
frontmatter += ` affiliations: [${author.affiliations.join(', ')}]\n`; |
|
|
}); |
|
|
} |
|
|
|
|
|
|
|
|
if (metadata.affiliations && metadata.affiliations.length > 0) { |
|
|
frontmatter += 'affiliations:\n'; |
|
|
metadata.affiliations.forEach((affiliation, index) => { |
|
|
frontmatter += ` - name: "${affiliation.name}"\n`; |
|
|
if (affiliation.url) { |
|
|
frontmatter += ` url: "${affiliation.url}"\n`; |
|
|
} |
|
|
}); |
|
|
} |
|
|
|
|
|
|
|
|
if (metadata.published) { |
|
|
frontmatter += `published: "${metadata.published}"\n`; |
|
|
} |
|
|
|
|
|
|
|
|
if (metadata.doi) { |
|
|
frontmatter += `doi: "${metadata.doi}"\n`; |
|
|
} |
|
|
|
|
|
if (metadata.description) { |
|
|
frontmatter += `description: "${metadata.description}"\n`; |
|
|
} |
|
|
|
|
|
if (metadata.licence) { |
|
|
frontmatter += `licence: >\n ${metadata.licence}\n`; |
|
|
} |
|
|
|
|
|
if (metadata.tags && metadata.tags.length > 0) { |
|
|
frontmatter += 'tags:\n'; |
|
|
metadata.tags.forEach(tag => { |
|
|
frontmatter += ` - ${tag}\n`; |
|
|
}); |
|
|
} |
|
|
|
|
|
|
|
|
frontmatter += 'tableOfContentsAutoCollapse: true\n'; |
|
|
frontmatter += '---\n\n'; |
|
|
|
|
|
return frontmatter; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
export function extractAndGenerateFrontmatter(latexContent) { |
|
|
const metadata = extractLatexMetadata(latexContent); |
|
|
return generateFrontmatter(metadata); |
|
|
} |
|
|
|