Spaces:
Running
Running
| /** | |
| * Main Alpine.js application for OCR Text Explorer | |
| */ | |
| document.addEventListener('alpine:init', () => { | |
| Alpine.data('ocrExplorer', () => ({ | |
| // Dataset state | |
| datasetId: 'davanstrien/exams-ocr', | |
| datasetConfig: 'default', | |
| datasetSplit: 'train', | |
| // Navigation state | |
| currentIndex: 0, | |
| totalSamples: null, | |
| currentSample: null, | |
| jumpToPage: '', | |
| // UI state | |
| loading: false, | |
| error: null, | |
| activeTab: 'comparison', | |
| diffMode: 'char', | |
| darkMode: false, | |
| showAbout: false, | |
| showFlowView: false, | |
| showDock: false, | |
| // Flow view state | |
| flowItems: [], | |
| flowStartIndex: 0, | |
| flowVisibleCount: 7, | |
| flowOffset: 0, | |
| // Dock state | |
| dockItems: [], | |
| dockHideTimeout: null, | |
| dockStartIndex: 0, | |
| dockVisibleCount: 10, | |
| // Computed diff HTML | |
| diffHtml: '', | |
| // Statistics | |
| similarity: 0, | |
| charStats: { total: 0, added: 0, removed: 0 }, | |
| wordStats: { original: 0, improved: 0 }, | |
| // API instance | |
| api: null, | |
| async init() { | |
| // Initialize API | |
| this.api = new DatasetAPI(); | |
| // Apply dark mode from localStorage | |
| this.darkMode = localStorage.getItem('darkMode') === 'true'; | |
| this.$watch('darkMode', value => { | |
| localStorage.setItem('darkMode', value); | |
| document.documentElement.classList.toggle('dark', value); | |
| }); | |
| document.documentElement.classList.toggle('dark', this.darkMode); | |
| // Setup keyboard navigation | |
| this.setupKeyboardNavigation(); | |
| // Load initial dataset | |
| await this.loadDataset(); | |
| }, | |
| setupKeyboardNavigation() { | |
| document.addEventListener('keydown', (e) => { | |
| // Ignore if user is typing in input | |
| if (e.target.tagName === 'INPUT') return; | |
| switch(e.key) { | |
| case 'ArrowLeft': | |
| e.preventDefault(); | |
| if (e.shiftKey && this.showDock) { | |
| this.scrollDockLeft(); | |
| } else { | |
| this.previousSample(); | |
| } | |
| break; | |
| case 'ArrowRight': | |
| e.preventDefault(); | |
| if (e.shiftKey && this.showDock) { | |
| this.scrollDockRight(); | |
| } else { | |
| this.nextSample(); | |
| } | |
| break; | |
| case 'k': | |
| case 'K': | |
| e.preventDefault(); | |
| this.previousSample(); | |
| break; | |
| case 'j': | |
| case 'J': | |
| e.preventDefault(); | |
| this.nextSample(); | |
| break; | |
| case '1': | |
| this.activeTab = 'comparison'; | |
| break; | |
| case '2': | |
| this.activeTab = 'diff'; | |
| break; | |
| case '3': | |
| this.activeTab = 'improved'; | |
| break; | |
| case 'v': | |
| case 'V': | |
| // Toggle dock with V key | |
| if (this.showDock) { | |
| this.hideDockPreview(); | |
| } else { | |
| this.showDockPreview(); | |
| } | |
| break; | |
| } | |
| }); | |
| }, | |
| async loadDataset() { | |
| this.loading = true; | |
| this.error = null; | |
| try { | |
| // Validate dataset | |
| await this.api.validateDataset(this.datasetId); | |
| // Get dataset info | |
| const info = await this.api.getDatasetInfo(this.datasetId); | |
| this.datasetConfig = info.defaultConfig; | |
| this.datasetSplit = info.defaultSplit; | |
| // Get total rows | |
| this.totalSamples = await this.api.getTotalRows( | |
| this.datasetId, | |
| this.datasetConfig, | |
| this.datasetSplit | |
| ); | |
| // Load first sample | |
| this.currentIndex = 0; | |
| await this.loadSample(0); | |
| } catch (error) { | |
| this.error = error.message; | |
| } finally { | |
| this.loading = false; | |
| } | |
| }, | |
| async loadSample(index) { | |
| try { | |
| const data = await this.api.getRow( | |
| this.datasetId, | |
| this.datasetConfig, | |
| this.datasetSplit, | |
| index | |
| ); | |
| this.currentSample = data.row; | |
| this.currentIndex = index; | |
| // Update diff when sample changes | |
| this.updateDiff(); | |
| // Update URL without triggering navigation | |
| const url = new URL(window.location); | |
| url.searchParams.set('dataset', this.datasetId); | |
| url.searchParams.set('index', index); | |
| window.history.replaceState({}, '', url); | |
| } catch (error) { | |
| this.error = `Failed to load sample: ${error.message}`; | |
| } | |
| }, | |
| async nextSample() { | |
| if (this.currentIndex < this.totalSamples - 1) { | |
| await this.loadSample(this.currentIndex + 1); | |
| } | |
| }, | |
| async previousSample() { | |
| if (this.currentIndex > 0) { | |
| await this.loadSample(this.currentIndex - 1); | |
| } | |
| }, | |
| async jumpToSample() { | |
| const pageNum = parseInt(this.jumpToPage); | |
| if (!isNaN(pageNum) && pageNum >= 1 && pageNum <= this.totalSamples) { | |
| // Convert 1-based page number to 0-based index | |
| await this.loadSample(pageNum - 1); | |
| // Clear the input after jumping | |
| this.jumpToPage = ''; | |
| } else { | |
| // Show error or just reset | |
| this.jumpToPage = ''; | |
| } | |
| }, | |
| getOriginalText() { | |
| if (!this.currentSample) return ''; | |
| const columns = this.api.detectColumns(null, this.currentSample); | |
| return this.currentSample[columns.originalText] || 'No original text found'; | |
| }, | |
| getImprovedText() { | |
| if (!this.currentSample) return ''; | |
| const columns = this.api.detectColumns(null, this.currentSample); | |
| return this.currentSample[columns.improvedText] || 'No improved text found'; | |
| }, | |
| getImageData() { | |
| if (!this.currentSample) return null; | |
| const columns = this.api.detectColumns(null, this.currentSample); | |
| return columns.image ? this.currentSample[columns.image] : null; | |
| }, | |
| getImageSrc() { | |
| const imageData = this.getImageData(); | |
| return imageData?.src || ''; | |
| }, | |
| getImageDimensions() { | |
| const imageData = this.getImageData(); | |
| if (imageData?.width && imageData?.height) { | |
| return `${imageData.width}×${imageData.height}`; | |
| } | |
| return null; | |
| }, | |
| updateDiff() { | |
| const original = this.getOriginalText(); | |
| const improved = this.getImprovedText(); | |
| // Calculate statistics | |
| this.calculateStatistics(original, improved); | |
| // Use diff utility based on mode | |
| switch(this.diffMode) { | |
| case 'char': | |
| this.diffHtml = createCharacterDiff(original, improved); | |
| break; | |
| case 'word': | |
| this.diffHtml = createWordDiff(original, improved); | |
| break; | |
| case 'line': | |
| this.diffHtml = createLineDiff(original, improved); | |
| break; | |
| } | |
| }, | |
| calculateStatistics(original, improved) { | |
| // Calculate similarity | |
| this.similarity = calculateSimilarity(original, improved); | |
| // Character statistics | |
| const charDiff = this.getCharacterDiffStats(original, improved); | |
| this.charStats = charDiff; | |
| // Word statistics | |
| const originalWords = original.split(/\s+/).filter(w => w.length > 0); | |
| const improvedWords = improved.split(/\s+/).filter(w => w.length > 0); | |
| this.wordStats = { | |
| original: originalWords.length, | |
| improved: improvedWords.length | |
| }; | |
| }, | |
| getCharacterDiffStats(original, improved) { | |
| const dp = computeLCS(original, improved); | |
| const diff = buildDiff(original, improved, dp); | |
| let added = 0; | |
| let removed = 0; | |
| let unchanged = 0; | |
| for (const part of diff) { | |
| if (part.type === 'insert') { | |
| added += part.value.length; | |
| } else if (part.type === 'delete') { | |
| removed += part.value.length; | |
| } else { | |
| unchanged += part.value.length; | |
| } | |
| } | |
| return { | |
| total: original.length, | |
| added: added, | |
| removed: removed, | |
| unchanged: unchanged | |
| }; | |
| }, | |
| async handleImageError(event) { | |
| // Try to refresh the image URL | |
| console.log('Image failed to load, refreshing URL...'); | |
| try { | |
| const data = await this.api.refreshImageUrl( | |
| this.datasetId, | |
| this.datasetConfig, | |
| this.datasetSplit, | |
| this.currentIndex | |
| ); | |
| // Update the image source | |
| if (data.row && data.row[this.api.detectColumns(null, data.row).image]?.src) { | |
| event.target.src = data.row[this.api.detectColumns(null, data.row).image].src; | |
| } | |
| } catch (error) { | |
| console.error('Failed to refresh image URL:', error); | |
| // Set a placeholder image | |
| event.target.src = 'data:image/svg+xml;base64,PHN2ZyB3aWR0aD0iNDAwIiBoZWlnaHQ9IjMwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj48cmVjdCB3aWR0aD0iNDAwIiBoZWlnaHQ9IjMwMCIgZmlsbD0iI2VlZSIvPjx0ZXh0IHRleHQtYW5jaG9yPSJtaWRkbGUiIHg9IjIwMCIgeT0iMTUwIiBzdHlsZT0iZmlsbDojOTk5O2ZvbnQtZmFtaWx5OkFyaWFsLHNhbnMtc2VyaWY7Zm9udC1zaXplOjIwcHg7Zm9udC13ZWlnaHQ6Ym9sZCI+SW1hZ2UgVW5hdmFpbGFibGU8L3RleHQ+PC9zdmc+'; | |
| } | |
| }, | |
| exportComparison() { | |
| const original = this.getOriginalText(); | |
| const improved = this.getImprovedText(); | |
| const metadata = { | |
| dataset: this.datasetId, | |
| page: this.currentIndex + 1, | |
| totalPages: this.totalSamples, | |
| exportDate: new Date().toISOString(), | |
| similarity: `${this.similarity}%`, | |
| statistics: { | |
| characters: this.charStats, | |
| words: this.wordStats | |
| } | |
| }; | |
| // Create export content | |
| let content = `OCR Text Comparison Export\n`; | |
| content += `==========================\n\n`; | |
| content += `Dataset: ${metadata.dataset}\n`; | |
| content += `Page: ${metadata.page} of ${metadata.totalPages}\n`; | |
| content += `Export Date: ${new Date().toLocaleString()}\n`; | |
| content += `Similarity: ${metadata.similarity}\n`; | |
| content += `Characters: ${metadata.statistics.characters.total} total, `; | |
| content += `${metadata.statistics.characters.added} added, `; | |
| content += `${metadata.statistics.characters.removed} removed\n`; | |
| content += `Words: ${metadata.statistics.words.original} → ${metadata.statistics.words.improved}\n`; | |
| content += `\n${'='.repeat(50)}\n\n`; | |
| content += `ORIGINAL OCR:\n`; | |
| content += `${'='.repeat(50)}\n`; | |
| content += original; | |
| content += `\n\n${'='.repeat(50)}\n\n`; | |
| content += `IMPROVED OCR:\n`; | |
| content += `${'='.repeat(50)}\n`; | |
| content += improved; | |
| // Download file | |
| const blob = new Blob([content], { type: 'text/plain' }); | |
| const url = URL.createObjectURL(blob); | |
| const a = document.createElement('a'); | |
| a.href = url; | |
| a.download = `ocr-comparison-${this.datasetId.replace('/', '-')}-page-${this.currentIndex + 1}.txt`; | |
| document.body.appendChild(a); | |
| a.click(); | |
| document.body.removeChild(a); | |
| URL.revokeObjectURL(url); | |
| }, | |
| // Flow view methods | |
| async toggleFlowView() { | |
| this.showFlowView = !this.showFlowView; | |
| if (this.showFlowView) { | |
| // Reset to center around current page when opening | |
| this.flowStartIndex = Math.max(0, this.currentIndex - Math.floor(this.flowVisibleCount / 2)); | |
| await this.loadFlowItems(); | |
| } | |
| }, | |
| async loadFlowItems() { | |
| // Load thumbnails from flowStartIndex | |
| const startIdx = this.flowStartIndex; | |
| this.flowItems = []; | |
| // Load visible items | |
| for (let i = 0; i < this.flowVisibleCount && (startIdx + i) < this.totalSamples; i++) { | |
| const idx = startIdx + i; | |
| try { | |
| const data = await this.api.getRow( | |
| this.datasetId, | |
| this.datasetConfig, | |
| this.datasetSplit, | |
| idx | |
| ); | |
| const columns = this.api.detectColumns(null, data.row); | |
| const imageData = columns.image ? data.row[columns.image] : null; | |
| this.flowItems.push({ | |
| index: idx, | |
| imageSrc: imageData?.src || '', | |
| row: data.row | |
| }); | |
| } catch (error) { | |
| console.error(`Failed to load flow item ${idx}:`, error); | |
| } | |
| } | |
| }, | |
| scrollFlowLeft() { | |
| if (this.flowStartIndex > 0) { | |
| this.flowStartIndex = Math.max(0, this.flowStartIndex - this.flowVisibleCount); | |
| this.loadFlowItems(); | |
| } | |
| }, | |
| scrollFlowRight() { | |
| if (this.flowStartIndex < this.totalSamples - this.flowVisibleCount) { | |
| this.flowStartIndex = Math.min( | |
| this.totalSamples - this.flowVisibleCount, | |
| this.flowStartIndex + this.flowVisibleCount | |
| ); | |
| this.loadFlowItems(); | |
| } | |
| }, | |
| async jumpToFlowPage(index) { | |
| this.showFlowView = false; | |
| await this.loadSample(index); | |
| }, | |
| async handleFlowImageError(event, index) { | |
| // Try to refresh the image URL for flow item | |
| try { | |
| const data = await this.api.refreshImageUrl( | |
| this.datasetId, | |
| this.datasetConfig, | |
| this.datasetSplit, | |
| index | |
| ); | |
| if (data.row) { | |
| const columns = this.api.detectColumns(null, data.row); | |
| const imageData = columns.image ? data.row[columns.image] : null; | |
| if (imageData?.src) { | |
| event.target.src = imageData.src; | |
| // Update the flow item | |
| const flowItem = this.flowItems.find(item => item.index === index); | |
| if (flowItem) { | |
| flowItem.imageSrc = imageData.src; | |
| } | |
| } | |
| } | |
| } catch (error) { | |
| console.error('Failed to refresh flow image URL:', error); | |
| } | |
| }, | |
| // Dock methods | |
| async showDockPreview() { | |
| // Clear any hide timeout | |
| if (this.dockHideTimeout) { | |
| clearTimeout(this.dockHideTimeout); | |
| this.dockHideTimeout = null; | |
| } | |
| this.showDock = true; | |
| // Center dock around current page | |
| this.dockStartIndex = Math.max(0, | |
| Math.min( | |
| this.currentIndex - Math.floor(this.dockVisibleCount / 2), | |
| this.totalSamples - this.dockVisibleCount | |
| ) | |
| ); | |
| // Always reload dock items to show current position | |
| await this.loadDockItems(); | |
| }, | |
| hideDockPreview() { | |
| // Add a small delay to prevent flickering | |
| this.dockHideTimeout = setTimeout(() => { | |
| this.showDock = false; | |
| }, 300); | |
| }, | |
| async loadDockItems() { | |
| // Load thumbnails based on dock start index | |
| const endIdx = Math.min(this.totalSamples, this.dockStartIndex + this.dockVisibleCount); | |
| this.dockItems = []; | |
| for (let i = this.dockStartIndex; i < endIdx; i++) { | |
| try { | |
| const data = await this.api.getRow( | |
| this.datasetId, | |
| this.datasetConfig, | |
| this.datasetSplit, | |
| i | |
| ); | |
| const columns = this.api.detectColumns(null, data.row); | |
| const imageData = columns.image ? data.row[columns.image] : null; | |
| this.dockItems.push({ | |
| index: i, | |
| imageSrc: imageData?.src || '', | |
| row: data.row | |
| }); | |
| } catch (error) { | |
| console.error(`Failed to load dock item ${i}:`, error); | |
| } | |
| } | |
| }, | |
| async scrollDockLeft() { | |
| if (this.dockStartIndex > 0) { | |
| this.dockStartIndex = Math.max(0, this.dockStartIndex - Math.floor(this.dockVisibleCount / 2)); | |
| await this.loadDockItems(); | |
| } | |
| }, | |
| async scrollDockRight() { | |
| if (this.dockStartIndex < this.totalSamples - this.dockVisibleCount) { | |
| this.dockStartIndex = Math.min( | |
| this.totalSamples - this.dockVisibleCount, | |
| this.dockStartIndex + Math.floor(this.dockVisibleCount / 2) | |
| ); | |
| await this.loadDockItems(); | |
| } | |
| }, | |
| async jumpToDockPage(index) { | |
| this.showDock = false; | |
| await this.loadSample(index); | |
| }, | |
| // Watch for diff mode changes | |
| initWatchers() { | |
| this.$watch('diffMode', () => this.updateDiff()); | |
| this.$watch('currentSample', () => this.updateDiff()); | |
| } | |
| })); | |
| }); | |
| // Initialize watchers after Alpine loads | |
| document.addEventListener('alpine:initialized', () => { | |
| Alpine.store('ocrExplorer')?.initWatchers?.(); | |
| }); |