Spaces:
Running
Running
| /** | |
| * Main Alpine.js application for OCR Text Explorer | |
| */ | |
| document.addEventListener('alpine:init', () => { | |
| Alpine.data('ocrExplorer', () => ({ | |
| // Dataset state | |
| datasetId: 'davanstrien/exams-ocr', | |
| datasetConfig: 'default', | |
| datasetSplit: 'train', | |
| // Example datasets | |
| exampleDatasets: [ | |
| { id: 'davanstrien/exams-ocr', name: 'Exams OCR', description: 'Historical exam papers with VLM corrections' }, | |
| { id: 'davanstrien/rolm-test', name: 'ROLM Test', description: 'Documents processed with RolmOCR model' }, | |
| { id: 'davanstrien/india-medical-ocr-test', name: 'India Medical OCR', description: 'Medical documents with NuMarkdown reasoning traces' }, | |
| { id: 'davanstrien/handbooks-lighton-ocr-32k-test', name: 'Handbooks Lighton OCR', description: 'National Library of Scotland handbooks processed with LightOnOCR-0.9B (32k vocab)' } | |
| ], | |
| // Navigation state | |
| currentIndex: 0, | |
| totalSamples: null, | |
| currentSample: null, | |
| jumpToPage: '', | |
| // UI state | |
| loading: false, | |
| error: null, | |
| activeTab: 'comparison', | |
| diffMode: 'char', | |
| darkMode: false, | |
| showAbout: false, | |
| showFlowView: false, | |
| showDock: false, | |
| renderMarkdown: false, | |
| hasMarkdown: false, | |
| showShareSuccess: false, | |
| // Reasoning trace state | |
| hasReasoningTrace: false, | |
| showReasoning: false, | |
| reasoningContent: null, | |
| answerContent: null, | |
| reasoningStats: null, | |
| formattedReasoning: null, | |
| // Flow view state | |
| flowItems: [], | |
| flowStartIndex: 0, | |
| flowVisibleCount: 7, | |
| flowOffset: 0, | |
| // Dock state | |
| dockItems: [], | |
| dockHideTimeout: null, | |
| dockStartIndex: 0, | |
| dockVisibleCount: 10, | |
| // Computed diff HTML | |
| diffHtml: '', | |
| // Statistics | |
| similarity: 0, | |
| charStats: { total: 0, added: 0, removed: 0 }, | |
| wordStats: { original: 0, improved: 0 }, | |
| // API instance | |
| api: null, | |
| // Markdown cache | |
| markdownCache: new Map(), | |
| // Model info | |
| modelInfo: null, | |
| columnInfo: null, | |
| async init() { | |
| // Initialize API | |
| this.api = new DatasetAPI(); | |
| // Read URL parameters for deep linking | |
| const urlParams = new URLSearchParams(window.location.search); | |
| const urlDataset = urlParams.get('dataset'); | |
| const urlIndex = urlParams.get('index'); | |
| const urlView = urlParams.get('view'); | |
| const urlDiff = urlParams.get('diff'); | |
| const urlMarkdown = urlParams.get('markdown'); | |
| const urlReasoning = urlParams.get('reasoning'); | |
| // Apply URL parameters if present | |
| if (urlDataset) { | |
| this.datasetId = urlDataset; | |
| } | |
| if (urlView && ['comparison', 'diff', 'improved'].includes(urlView)) { | |
| this.activeTab = urlView; | |
| } | |
| if (urlDiff && ['char', 'word', 'line', 'markdown'].includes(urlDiff)) { | |
| this.diffMode = urlDiff; | |
| } | |
| if (urlMarkdown !== null) { | |
| this.renderMarkdown = urlMarkdown === 'true'; | |
| } | |
| if (urlReasoning !== null) { | |
| this.showReasoning = urlReasoning === 'true'; | |
| } | |
| // Apply dark mode from localStorage | |
| this.darkMode = localStorage.getItem('darkMode') === 'true'; | |
| this.$watch('darkMode', value => { | |
| localStorage.setItem('darkMode', value); | |
| document.documentElement.classList.toggle('dark', value); | |
| }); | |
| document.documentElement.classList.toggle('dark', this.darkMode); | |
| // Setup keyboard navigation | |
| this.setupKeyboardNavigation(); | |
| // Setup watchers for URL updates | |
| this.initWatchers(); | |
| // Check if we have a specific index to load | |
| const hasUrlIndex = urlIndex !== null; | |
| // Load initial dataset (skip initial load if we have a URL index) | |
| await this.loadDataset(hasUrlIndex); | |
| // Jump to specific index if provided in URL | |
| if (hasUrlIndex) { | |
| const index = parseInt(urlIndex); | |
| if (!isNaN(index) && index >= 0 && index < this.totalSamples) { | |
| await this.loadSample(index); | |
| } else { | |
| // If invalid index, load the first sample | |
| await this.loadSample(0); | |
| } | |
| } | |
| }, | |
| setupKeyboardNavigation() { | |
| document.addEventListener('keydown', (e) => { | |
| // Ignore if user is typing in input | |
| if (e.target.tagName === 'INPUT') return; | |
| switch(e.key) { | |
| case 'ArrowLeft': | |
| e.preventDefault(); | |
| if (e.shiftKey && this.showDock) { | |
| this.scrollDockLeft(); | |
| } else { | |
| this.previousSample(); | |
| } | |
| break; | |
| case 'ArrowRight': | |
| e.preventDefault(); | |
| if (e.shiftKey && this.showDock) { | |
| this.scrollDockRight(); | |
| } else { | |
| this.nextSample(); | |
| } | |
| break; | |
| case 'k': | |
| case 'K': | |
| e.preventDefault(); | |
| this.previousSample(); | |
| break; | |
| case 'j': | |
| case 'J': | |
| e.preventDefault(); | |
| this.nextSample(); | |
| break; | |
| case '1': | |
| this.activeTab = 'comparison'; | |
| break; | |
| case '2': | |
| this.activeTab = 'diff'; | |
| break; | |
| case '3': | |
| this.activeTab = 'improved'; | |
| break; | |
| case 'v': | |
| case 'V': | |
| // Toggle dock with V key | |
| if (this.showDock) { | |
| this.hideDockPreview(); | |
| } else { | |
| this.showDockPreview(); | |
| } | |
| break; | |
| } | |
| }); | |
| }, | |
| async loadDataset(skipInitialLoad = false) { | |
| this.loading = true; | |
| this.error = null; | |
| // Clear markdown cache when loading new dataset | |
| this.markdownCache.clear(); | |
| try { | |
| // Validate dataset | |
| await this.api.validateDataset(this.datasetId); | |
| // Get dataset info | |
| const info = await this.api.getDatasetInfo(this.datasetId); | |
| this.datasetConfig = info.defaultConfig; | |
| this.datasetSplit = info.defaultSplit; | |
| // Get total rows | |
| this.totalSamples = await this.api.getTotalRows( | |
| this.datasetId, | |
| this.datasetConfig, | |
| this.datasetSplit | |
| ); | |
| // Load first sample only if not skipping | |
| if (!skipInitialLoad) { | |
| this.currentIndex = 0; | |
| await this.loadSample(0); | |
| } | |
| } catch (error) { | |
| this.error = error.message; | |
| } finally { | |
| this.loading = false; | |
| } | |
| }, | |
| async loadSample(index) { | |
| try { | |
| const data = await this.api.getRow( | |
| this.datasetId, | |
| this.datasetConfig, | |
| this.datasetSplit, | |
| index | |
| ); | |
| this.currentSample = data.row; | |
| this.currentIndex = index; | |
| this.columnInfo = data.columns; | |
| // Extract model info if available | |
| this.extractModelInfo(); | |
| // Debug: Log column info | |
| console.log('Column info:', this.columnInfo); | |
| console.log('Current sample keys:', Object.keys(this.currentSample)); | |
| // Check if improved text contains markdown and reasoning traces | |
| const improvedText = this.getImprovedText(); | |
| this.parseReasoningTrace(improvedText); | |
| this.hasMarkdown = this.detectMarkdown(this.answerContent || improvedText); | |
| // Update diff when sample changes | |
| this.updateDiff(); | |
| // Update URL without triggering navigation | |
| this.updateURL(); | |
| } catch (error) { | |
| this.error = `Failed to load sample: ${error.message}`; | |
| } | |
| }, | |
| async nextSample() { | |
| if (this.currentIndex < this.totalSamples - 1) { | |
| await this.loadSample(this.currentIndex + 1); | |
| } | |
| }, | |
| async previousSample() { | |
| if (this.currentIndex > 0) { | |
| await this.loadSample(this.currentIndex - 1); | |
| } | |
| }, | |
| async jumpToSample() { | |
| const pageNum = parseInt(this.jumpToPage); | |
| if (!isNaN(pageNum) && pageNum >= 1 && pageNum <= this.totalSamples) { | |
| // Convert 1-based page number to 0-based index | |
| await this.loadSample(pageNum - 1); | |
| // Clear the input after jumping | |
| this.jumpToPage = ''; | |
| } else { | |
| // Show error or just reset | |
| this.jumpToPage = ''; | |
| } | |
| }, | |
| async selectDataset(datasetId) { | |
| this.datasetId = datasetId; | |
| await this.loadDataset(); | |
| }, | |
| extractModelInfo() { | |
| this.modelInfo = null; | |
| if (!this.currentSample || !this.columnInfo || !this.columnInfo.inferenceInfo) { | |
| console.log('No inference info column detected'); | |
| return; | |
| } | |
| const inferenceData = this.currentSample[this.columnInfo.inferenceInfo]; | |
| if (!inferenceData) { | |
| console.log('No inference data in current sample'); | |
| return; | |
| } | |
| console.log('Raw inference data:', inferenceData); | |
| const parsed = this.api.parseInferenceInfo(inferenceData); | |
| console.log('Parsed inference data:', parsed); | |
| if (parsed) { | |
| const formattedInfo = this.formatModelInfo(parsed); | |
| // Ensure it's a plain object, not a proxy | |
| this.modelInfo = formattedInfo ? {...formattedInfo} : null; | |
| console.log('Formatted model info:', this.modelInfo); | |
| } | |
| }, | |
| formatModelInfo(info) { | |
| if (!info) return null; | |
| return { | |
| modelId: info.model_id || 'Unknown', | |
| modelName: info.model_id ? info.model_id.split('/').pop() : 'Unknown', | |
| processingDate: info.processing_date ? new Date(info.processing_date).toLocaleDateString() : null, | |
| scriptVersion: info.script_version || null, | |
| batchSize: info.batch_size || null, | |
| maxTokens: info.max_tokens || null, | |
| scriptUrl: info.script_url || null, | |
| columnName: info.column_name || null | |
| }; | |
| }, | |
| parseReasoningTrace(text) { | |
| // Reset reasoning state | |
| this.hasReasoningTrace = false; | |
| this.reasoningContent = null; | |
| this.answerContent = null; | |
| this.reasoningStats = null; | |
| this.formattedReasoning = null; | |
| if (!text || !window.ReasoningParser) return; | |
| // Check if text contains reasoning trace | |
| if (ReasoningParser.detectReasoningTrace(text)) { | |
| const parsed = ReasoningParser.parseReasoningContent(text); | |
| if (parsed.hasReasoning) { | |
| this.hasReasoningTrace = true; | |
| this.reasoningContent = parsed.reasoning; | |
| this.answerContent = parsed.answer; | |
| this.formattedReasoning = ReasoningParser.formatReasoningSteps(parsed.reasoning); | |
| this.reasoningStats = ReasoningParser.getReasoningStats(parsed); | |
| console.log('Reasoning trace detected:', this.reasoningStats); | |
| } else { | |
| // No reasoning found, use original text as answer | |
| this.answerContent = text; | |
| } | |
| } else { | |
| // No reasoning markers, use original text | |
| this.answerContent = text; | |
| } | |
| }, | |
| getOriginalText() { | |
| if (!this.currentSample) return ''; | |
| const columns = this.api.detectColumns(null, this.currentSample); | |
| return this.currentSample[columns.originalText] || 'No original text found'; | |
| }, | |
| getImprovedText() { | |
| if (!this.currentSample) return ''; | |
| const columns = this.api.detectColumns(null, this.currentSample); | |
| const rawText = this.currentSample[columns.improvedText] || 'No improved text found'; | |
| // If we have parsed answer content from reasoning trace, use that | |
| // Otherwise return the raw text | |
| return this.hasReasoningTrace && this.answerContent ? this.answerContent : rawText; | |
| }, | |
| getRawImprovedText() { | |
| // Get the raw improved text without parsing reasoning traces | |
| if (!this.currentSample) return ''; | |
| const columns = this.api.detectColumns(null, this.currentSample); | |
| return this.currentSample[columns.improvedText] || 'No improved text found'; | |
| }, | |
| detectMarkdown(text) { | |
| // Check for common markdown patterns | |
| const markdownPatterns = [ | |
| /^#{1,6}\s/m, // Headers | |
| /\*\*[^*]+\*\*/, // Bold | |
| /\*[^*]+\*/, // Italic | |
| /\[[^\]]+\]\([^)]+\)/, // Links | |
| /^[-*+]\s/m, // Lists | |
| /^\d+\.\s/m, // Numbered lists | |
| /^>/m, // Blockquotes | |
| /```[\s\S]*?```/, // Code blocks | |
| /`[^`]+`/, // Inline code | |
| /\|.*\|.*\|/m, // Tables (basic detection) | |
| /<table>/i, // HTML tables | |
| /<thead>/i // HTML table headers | |
| ]; | |
| return markdownPatterns.some(pattern => pattern.test(text)); | |
| }, | |
| escapeHtml(text) { | |
| const div = document.createElement('div'); | |
| div.textContent = text; | |
| return div.innerHTML; | |
| }, | |
| renderMarkdownText(text) { | |
| if (!text || !this.renderMarkdown) return text; | |
| // Check cache first | |
| const cacheKey = `${this.currentIndex}_${text.substring(0, 100)}`; | |
| if (this.markdownCache.has(cacheKey)) { | |
| return this.markdownCache.get(cacheKey); | |
| } | |
| try { | |
| // Configure marked options for security | |
| const renderer = new marked.Renderer(); | |
| // Override link rendering to open in new tab and sanitize | |
| const self = this; | |
| renderer.link = function(href, title, text) { | |
| // Basic URL sanitization | |
| const safeHref = href.replace(/javascript:/gi, '').replace(/data:/gi, ''); | |
| const safeTitle = (title || '').replace(/"/g, '"'); | |
| const safeText = self.escapeHtml(text); | |
| return `<a href="${safeHref}" title="${safeTitle}" target="_blank" rel="noopener noreferrer" class="text-blue-600 dark:text-blue-400 underline">${safeText}</a>`; | |
| }; | |
| // Override image rendering for safety | |
| renderer.image = function(href, title, text) { | |
| const safeHref = href.replace(/javascript:/gi, '').replace(/data:/gi, ''); | |
| const safeTitle = (title || '').replace(/"/g, '"'); | |
| const safeAlt = self.escapeHtml(text); | |
| return `<img src="${safeHref}" alt="${safeAlt}" title="${safeTitle}" class="max-w-full h-auto rounded">`; | |
| }; | |
| // Override HTML rendering to prevent XSS but allow safe table elements | |
| renderer.html = function(html) { | |
| // Allow specific safe HTML tags for tables | |
| const allowedTags = ['table', 'thead', 'tbody', 'tr', 'th', 'td']; | |
| const tagPattern = new RegExp(`</?(?:${allowedTags.join('|')})(?:\\s[^>]*)?>`, 'gi'); | |
| // Check if the HTML contains only allowed tags | |
| const strippedHtml = html.replace(tagPattern, ''); | |
| const hasDisallowedTags = /<[^>]+>/.test(strippedHtml); | |
| if (!hasDisallowedTags) { | |
| // Return the HTML if it only contains allowed table tags | |
| return html; | |
| } | |
| // Strip all HTML by default | |
| return ''; | |
| }; | |
| marked.setOptions({ | |
| renderer: renderer, | |
| breaks: true, // Convert \n to <br> | |
| gfm: true, // GitHub Flavored Markdown | |
| pedantic: false, | |
| smartLists: true, | |
| smartypants: true, | |
| headerIds: false, // Disable header IDs for security | |
| mangle: false, // Don't mangle email addresses | |
| sanitize: false, // We handle sanitization ourselves | |
| tables: true // Enable table parsing | |
| }); | |
| // Render markdown | |
| let html = marked.parse(text); | |
| // Add Tailwind classes to common elements | |
| html = html.replace(/<h1>/g, '<h1 class="text-2xl font-bold mb-4 text-gray-900 dark:text-gray-100">'); | |
| html = html.replace(/<h2>/g, '<h2 class="text-xl font-bold mb-3 text-gray-900 dark:text-gray-100">'); | |
| html = html.replace(/<h3>/g, '<h3 class="text-lg font-bold mb-2 text-gray-900 dark:text-gray-100">'); | |
| html = html.replace(/<h4>/g, '<h4 class="text-base font-bold mb-2 text-gray-900 dark:text-gray-100">'); | |
| html = html.replace(/<p>/g, '<p class="mb-4 text-gray-900 dark:text-gray-100">'); | |
| html = html.replace(/<ul>/g, '<ul class="list-disc list-inside mb-4 text-gray-900 dark:text-gray-100">'); | |
| html = html.replace(/<ol>/g, '<ol class="list-decimal list-inside mb-4 text-gray-900 dark:text-gray-100">'); | |
| html = html.replace(/<li>/g, '<li class="mb-1">'); | |
| html = html.replace(/<blockquote>/g, '<blockquote class="border-l-4 border-gray-300 dark:border-gray-600 pl-4 my-4 italic text-gray-700 dark:text-gray-300">'); | |
| html = html.replace(/<code>/g, '<code class="bg-gray-100 dark:bg-gray-800 px-1 py-0.5 rounded text-sm font-mono">'); | |
| html = html.replace(/<pre>/g, '<pre class="bg-gray-100 dark:bg-gray-800 p-4 rounded-lg overflow-x-auto mb-4">'); | |
| html = html.replace(/<table>/g, '<table class="min-w-full divide-y divide-gray-300 dark:divide-gray-600 mb-4 border border-gray-300 dark:border-gray-600">'); | |
| html = html.replace(/<thead>/g, '<thead class="bg-gray-50 dark:bg-gray-800">'); | |
| html = html.replace(/<tbody>/g, '<tbody class="bg-white dark:bg-gray-900 divide-y divide-gray-200 dark:divide-gray-700">'); | |
| html = html.replace(/<tr>/g, '<tr class="hover:bg-gray-50 dark:hover:bg-gray-800">'); | |
| html = html.replace(/<th>/g, '<th class="px-3 py-3.5 text-left text-sm font-semibold text-gray-900 dark:text-gray-100 border border-gray-300 dark:border-gray-600">'); | |
| html = html.replace(/<td>/g, '<td class="px-3 py-4 text-sm text-gray-900 dark:text-gray-100 border border-gray-300 dark:border-gray-600">'); | |
| const result = `<div class="prose prose-sm dark:prose-invert max-w-none">${html}</div>`; | |
| // Cache the result (limit cache size to prevent memory issues) | |
| if (this.markdownCache.size > 50) { | |
| // Remove oldest entries | |
| const firstKey = this.markdownCache.keys().next().value; | |
| this.markdownCache.delete(firstKey); | |
| } | |
| this.markdownCache.set(cacheKey, result); | |
| return result; | |
| } catch (error) { | |
| console.error('Markdown rendering error:', error); | |
| return text; | |
| } | |
| }, | |
| getImprovedTextRendered() { | |
| const text = this.getImprovedText(); | |
| return this.renderMarkdownText(text); | |
| }, | |
| getImageData() { | |
| if (!this.currentSample) return null; | |
| const columns = this.api.detectColumns(null, this.currentSample); | |
| return columns.image ? this.currentSample[columns.image] : null; | |
| }, | |
| getImageSrc() { | |
| const imageData = this.getImageData(); | |
| return imageData?.src || ''; | |
| }, | |
| getImageDimensions() { | |
| const imageData = this.getImageData(); | |
| if (imageData?.width && imageData?.height) { | |
| return `${imageData.width}×${imageData.height}`; | |
| } | |
| return null; | |
| }, | |
| updateDiff() { | |
| const original = this.getOriginalText(); | |
| const improved = this.getImprovedText(); | |
| // Calculate statistics | |
| this.calculateStatistics(original, improved); | |
| // Use diff utility based on mode | |
| switch(this.diffMode) { | |
| case 'char': | |
| this.diffHtml = createCharacterDiff(original, improved); | |
| break; | |
| case 'word': | |
| this.diffHtml = createWordDiff(original, improved); | |
| break; | |
| case 'line': | |
| this.diffHtml = createLineDiff(original, improved); | |
| break; | |
| case 'markdown': | |
| // Pass the render function bound to this context | |
| this.diffHtml = createMarkdownDiff(original, improved, (text) => this.renderMarkdownText(text)); | |
| break; | |
| } | |
| }, | |
| calculateStatistics(original, improved) { | |
| // Calculate similarity | |
| this.similarity = calculateSimilarity(original, improved); | |
| // Character statistics | |
| const charDiff = this.getCharacterDiffStats(original, improved); | |
| this.charStats = charDiff; | |
| // Word statistics | |
| const originalWords = original.split(/\s+/).filter(w => w.length > 0); | |
| const improvedWords = improved.split(/\s+/).filter(w => w.length > 0); | |
| this.wordStats = { | |
| original: originalWords.length, | |
| improved: improvedWords.length | |
| }; | |
| }, | |
| getCharacterDiffStats(original, improved) { | |
| const dp = computeLCS(original, improved); | |
| const diff = buildDiff(original, improved, dp); | |
| let added = 0; | |
| let removed = 0; | |
| let unchanged = 0; | |
| for (const part of diff) { | |
| if (part.type === 'insert') { | |
| added += part.value.length; | |
| } else if (part.type === 'delete') { | |
| removed += part.value.length; | |
| } else { | |
| unchanged += part.value.length; | |
| } | |
| } | |
| return { | |
| total: original.length, | |
| added: added, | |
| removed: removed, | |
| unchanged: unchanged | |
| }; | |
| }, | |
| async handleImageError(event) { | |
| // Try to refresh the image URL | |
| console.log('Image failed to load, refreshing URL...'); | |
| try { | |
| const data = await this.api.refreshImageUrl( | |
| this.datasetId, | |
| this.datasetConfig, | |
| this.datasetSplit, | |
| this.currentIndex | |
| ); | |
| // Update the image source | |
| if (data.row && data.row[this.api.detectColumns(null, data.row).image]?.src) { | |
| event.target.src = data.row[this.api.detectColumns(null, data.row).image].src; | |
| } | |
| } catch (error) { | |
| console.error('Failed to refresh image URL:', error); | |
| // Set a placeholder image | |
| event.target.src = 'data:image/svg+xml;base64,PHN2ZyB3aWR0aD0iNDAwIiBoZWlnaHQ9IjMwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj48cmVjdCB3aWR0aD0iNDAwIiBoZWlnaHQ9IjMwMCIgZmlsbD0iI2VlZSIvPjx0ZXh0IHRleHQtYW5jaG9yPSJtaWRkbGUiIHg9IjIwMCIgeT0iMTUwIiBzdHlsZT0iZmlsbDojOTk5O2ZvbnQtZmFtaWx5OkFyaWFsLHNhbnMtc2VyaWY7Zm9udC1zaXplOjIwcHg7Zm9udC13ZWlnaHQ6Ym9sZCI+SW1hZ2UgVW5hdmFpbGFibGU8L3RleHQ+PC9zdmc+'; | |
| } | |
| }, | |
| exportComparison() { | |
| const original = this.getOriginalText(); | |
| const improved = this.getImprovedText(); | |
| const metadata = { | |
| dataset: this.datasetId, | |
| page: this.currentIndex + 1, | |
| totalPages: this.totalSamples, | |
| exportDate: new Date().toISOString(), | |
| similarity: `${this.similarity}%`, | |
| statistics: { | |
| characters: this.charStats, | |
| words: this.wordStats | |
| } | |
| }; | |
| // Create export content | |
| let content = `OCR Text Comparison Export\n`; | |
| content += `==========================\n\n`; | |
| content += `Dataset: ${metadata.dataset}\n`; | |
| content += `Page: ${metadata.page} of ${metadata.totalPages}\n`; | |
| content += `Export Date: ${new Date().toLocaleString()}\n`; | |
| content += `Similarity: ${metadata.similarity}\n`; | |
| content += `Characters: ${metadata.statistics.characters.total} total, `; | |
| content += `${metadata.statistics.characters.added} added, `; | |
| content += `${metadata.statistics.characters.removed} removed\n`; | |
| content += `Words: ${metadata.statistics.words.original} → ${metadata.statistics.words.improved}\n`; | |
| content += `\n${'='.repeat(50)}\n\n`; | |
| content += `ORIGINAL OCR:\n`; | |
| content += `${'='.repeat(50)}\n`; | |
| content += original; | |
| content += `\n\n${'='.repeat(50)}\n\n`; | |
| // Include reasoning trace if available | |
| if (this.hasReasoningTrace && this.reasoningContent) { | |
| content += `MODEL REASONING:\n`; | |
| content += `${'='.repeat(50)}\n`; | |
| content += this.reasoningContent; | |
| content += `\n\n${'='.repeat(50)}\n\n`; | |
| } | |
| content += `IMPROVED OCR:\n`; | |
| content += `${'='.repeat(50)}\n`; | |
| content += improved; | |
| // Download file | |
| const blob = new Blob([content], { type: 'text/plain' }); | |
| const url = URL.createObjectURL(blob); | |
| const a = document.createElement('a'); | |
| a.href = url; | |
| a.download = `ocr-comparison-${this.datasetId.replace('/', '-')}-page-${this.currentIndex + 1}.txt`; | |
| document.body.appendChild(a); | |
| a.click(); | |
| document.body.removeChild(a); | |
| URL.revokeObjectURL(url); | |
| }, | |
| // Flow view methods | |
| async toggleFlowView() { | |
| this.showFlowView = !this.showFlowView; | |
| if (this.showFlowView) { | |
| // Reset to center around current page when opening | |
| this.flowStartIndex = Math.max(0, this.currentIndex - Math.floor(this.flowVisibleCount / 2)); | |
| await this.loadFlowItems(); | |
| } | |
| }, | |
| async loadFlowItems() { | |
| // Load thumbnails from flowStartIndex | |
| const startIdx = this.flowStartIndex; | |
| this.flowItems = []; | |
| // Load visible items | |
| for (let i = 0; i < this.flowVisibleCount && (startIdx + i) < this.totalSamples; i++) { | |
| const idx = startIdx + i; | |
| try { | |
| const data = await this.api.getRow( | |
| this.datasetId, | |
| this.datasetConfig, | |
| this.datasetSplit, | |
| idx | |
| ); | |
| const columns = this.api.detectColumns(null, data.row); | |
| const imageData = columns.image ? data.row[columns.image] : null; | |
| this.flowItems.push({ | |
| index: idx, | |
| imageSrc: imageData?.src || '', | |
| row: data.row | |
| }); | |
| } catch (error) { | |
| console.error(`Failed to load flow item ${idx}:`, error); | |
| } | |
| } | |
| }, | |
| scrollFlowLeft() { | |
| if (this.flowStartIndex > 0) { | |
| this.flowStartIndex = Math.max(0, this.flowStartIndex - this.flowVisibleCount); | |
| this.loadFlowItems(); | |
| } | |
| }, | |
| scrollFlowRight() { | |
| if (this.flowStartIndex < this.totalSamples - this.flowVisibleCount) { | |
| this.flowStartIndex = Math.min( | |
| this.totalSamples - this.flowVisibleCount, | |
| this.flowStartIndex + this.flowVisibleCount | |
| ); | |
| this.loadFlowItems(); | |
| } | |
| }, | |
| async jumpToFlowPage(index) { | |
| this.showFlowView = false; | |
| await this.loadSample(index); | |
| }, | |
| async handleFlowImageError(event, index) { | |
| // Try to refresh the image URL for flow item | |
| try { | |
| const data = await this.api.refreshImageUrl( | |
| this.datasetId, | |
| this.datasetConfig, | |
| this.datasetSplit, | |
| index | |
| ); | |
| if (data.row) { | |
| const columns = this.api.detectColumns(null, data.row); | |
| const imageData = columns.image ? data.row[columns.image] : null; | |
| if (imageData?.src) { | |
| event.target.src = imageData.src; | |
| // Update the flow item | |
| const flowItem = this.flowItems.find(item => item.index === index); | |
| if (flowItem) { | |
| flowItem.imageSrc = imageData.src; | |
| } | |
| } | |
| } | |
| } catch (error) { | |
| console.error('Failed to refresh flow image URL:', error); | |
| } | |
| }, | |
| // Dock methods | |
| async showDockPreview() { | |
| // Clear any hide timeout | |
| if (this.dockHideTimeout) { | |
| clearTimeout(this.dockHideTimeout); | |
| this.dockHideTimeout = null; | |
| } | |
| this.showDock = true; | |
| // Center dock around current page | |
| this.dockStartIndex = Math.max(0, | |
| Math.min( | |
| this.currentIndex - Math.floor(this.dockVisibleCount / 2), | |
| this.totalSamples - this.dockVisibleCount | |
| ) | |
| ); | |
| // Always reload dock items to show current position | |
| await this.loadDockItems(); | |
| }, | |
| hideDockPreview() { | |
| // Add a small delay to prevent flickering | |
| this.dockHideTimeout = setTimeout(() => { | |
| this.showDock = false; | |
| }, 300); | |
| }, | |
| async loadDockItems() { | |
| // Load thumbnails based on dock start index | |
| const endIdx = Math.min(this.totalSamples, this.dockStartIndex + this.dockVisibleCount); | |
| this.dockItems = []; | |
| for (let i = this.dockStartIndex; i < endIdx; i++) { | |
| try { | |
| const data = await this.api.getRow( | |
| this.datasetId, | |
| this.datasetConfig, | |
| this.datasetSplit, | |
| i | |
| ); | |
| const columns = this.api.detectColumns(null, data.row); | |
| const imageData = columns.image ? data.row[columns.image] : null; | |
| this.dockItems.push({ | |
| index: i, | |
| imageSrc: imageData?.src || '', | |
| row: data.row | |
| }); | |
| } catch (error) { | |
| console.error(`Failed to load dock item ${i}:`, error); | |
| } | |
| } | |
| }, | |
| async scrollDockLeft() { | |
| if (this.dockStartIndex > 0) { | |
| this.dockStartIndex = Math.max(0, this.dockStartIndex - Math.floor(this.dockVisibleCount / 2)); | |
| await this.loadDockItems(); | |
| } | |
| }, | |
| async scrollDockRight() { | |
| if (this.dockStartIndex < this.totalSamples - this.dockVisibleCount) { | |
| this.dockStartIndex = Math.min( | |
| this.totalSamples - this.dockVisibleCount, | |
| this.dockStartIndex + Math.floor(this.dockVisibleCount / 2) | |
| ); | |
| await this.loadDockItems(); | |
| } | |
| }, | |
| async jumpToDockPage(index) { | |
| this.showDock = false; | |
| await this.loadSample(index); | |
| }, | |
| // Update URL with current state | |
| updateURL() { | |
| const url = new URL(window.location); | |
| url.searchParams.set('dataset', this.datasetId); | |
| url.searchParams.set('index', this.currentIndex); | |
| url.searchParams.set('view', this.activeTab); | |
| url.searchParams.set('diff', this.diffMode); | |
| url.searchParams.set('markdown', this.renderMarkdown); | |
| // Only add reasoning parameter if there's a reasoning trace | |
| if (this.hasReasoningTrace) { | |
| url.searchParams.set('reasoning', this.showReasoning); | |
| } | |
| window.history.replaceState({}, '', url); | |
| }, | |
| // Copy shareable link to clipboard | |
| async copyShareLink() { | |
| const url = new URL(window.location); | |
| url.searchParams.set('dataset', this.datasetId); | |
| url.searchParams.set('index', this.currentIndex); | |
| url.searchParams.set('view', this.activeTab); | |
| url.searchParams.set('diff', this.diffMode); | |
| url.searchParams.set('markdown', this.renderMarkdown); | |
| // Only add reasoning parameter if there's a reasoning trace | |
| if (this.hasReasoningTrace) { | |
| url.searchParams.set('reasoning', this.showReasoning); | |
| } | |
| const shareUrl = url.toString(); | |
| try { | |
| await navigator.clipboard.writeText(shareUrl); | |
| // Show success feedback | |
| this.showShareSuccess = true; | |
| setTimeout(() => { | |
| this.showShareSuccess = false; | |
| }, 2000); | |
| return true; | |
| } catch (err) { | |
| // Fallback for older browsers | |
| const textArea = document.createElement('textarea'); | |
| textArea.value = shareUrl; | |
| textArea.style.position = 'fixed'; | |
| textArea.style.opacity = '0'; | |
| document.body.appendChild(textArea); | |
| textArea.select(); | |
| try { | |
| document.execCommand('copy'); | |
| // Show success feedback | |
| this.showShareSuccess = true; | |
| setTimeout(() => { | |
| this.showShareSuccess = false; | |
| }, 2000); | |
| return true; | |
| } catch (err) { | |
| console.error('Failed to copy link:', err); | |
| return false; | |
| } finally { | |
| document.body.removeChild(textArea); | |
| } | |
| } | |
| }, | |
| // Watch for diff mode changes | |
| initWatchers() { | |
| this.$watch('diffMode', () => { | |
| this.updateDiff(); | |
| this.updateURL(); | |
| }); | |
| this.$watch('currentSample', () => this.updateDiff()); | |
| this.$watch('activeTab', () => this.updateURL()); | |
| this.$watch('renderMarkdown', () => this.updateURL()); | |
| this.$watch('showReasoning', () => this.updateURL()); | |
| } | |
| })); | |
| }); |