Spaces:

johko
/

in-browser-rag

Running

File size: 38,211 Bytes

// Import transformers.js 3.0.0 from CDN (new Hugging Face ownership)
import { pipeline, env } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.0';

// Make available globally
window.transformers = { pipeline, env };
window.transformersLoaded = true;

console.log('✅ Transformers.js 3.0.0 loaded via ES modules (Hugging Face)');

// Global variables for transformers.js
let transformersPipeline = null;
let transformersEnv = null;
let transformersReady = false;

// Document storage and AI state
let documents = [
    {
        id: 0,
        title: "Artificial Intelligence Overview",
        content: "Artificial Intelligence (AI) is a branch of computer science that aims to create intelligent machines that work and react like humans. Some activities computers with AI are designed for include speech recognition, learning, planning, and problem-solving. AI is used in healthcare, finance, transportation, and entertainment. Machine learning enables computers to learn from experience without explicit programming. Deep learning uses neural networks to understand complex patterns in data.",
        embedding: null
    },
    {
        id: 1,
        title: "Space Exploration",
        content: "Space exploration is the ongoing discovery and exploration of celestial structures in outer space through evolving space technology. Physical exploration is conducted by unmanned robotic probes and human spaceflight. Space exploration has been used for geopolitical rivalries like the Cold War. The early era was driven by a Space Race between the Soviet Union and United States. Modern exploration includes Mars missions, the International Space Station, and satellite programs.",
        embedding: null
    },
    {
        id: 2,
        title: "Renewable Energy", 
        content: "Renewable energy comes from naturally replenished resources on a human timescale. It includes sunlight, wind, rain, tides, waves, and geothermal heat. Renewable energy contrasts with fossil fuels that are used faster than replenished. Most renewable sources are sustainable. Solar energy is abundant and promising. Wind energy and hydroelectric power are major contributors to renewable generation worldwide.",
        embedding: null
    }
];

let embeddingModel = null;
let qaModel = null;
let llmModel = null;
let loadedModelName = '';
let modelsInitialized = false;

// Calculate cosine similarity between two vectors
function cosineSimilarity(a, b) {
    const dotProduct = a.reduce((sum, val, i) => sum + val * b[i], 0);
    const magnitudeA = Math.sqrt(a.reduce((sum, val) => sum + val * val, 0));
    const magnitudeB = Math.sqrt(b.reduce((sum, val) => sum + val * val, 0));
    
    if (magnitudeA === 0 || magnitudeB === 0) return 0;
    return dotProduct / (magnitudeA * magnitudeB);
}

// Initialize transformers.js when the script loads
async function initTransformers() {
    try {
        console.log('🔄 Initializing Transformers.js...');
        
        // Try ES modules first (preferred method)
        if (window.transformers && window.transformersLoaded) {
            console.log('✅ Using ES modules version (Transformers.js 3.0.0)');
            ({ pipeline: transformersPipeline, env: transformersEnv } = window.transformers);
        } 
        // Fallback to UMD version
        else if (window.Transformers) {
            console.log('✅ Using UMD version (Transformers.js 3.0.0)');
            ({ pipeline: transformersPipeline, env: transformersEnv } = window.Transformers);
        }
        // Wait for library to load
        else {
            console.log('⏳ Waiting for library to load...');
            let attempts = 0;
            while (!window.Transformers && !window.transformersLoaded && attempts < 50) {
                await new Promise(resolve => setTimeout(resolve, 200));
                attempts++;
            }
            
            if (window.transformers && window.transformersLoaded) {
                ({ pipeline: transformersPipeline, env: transformersEnv } = window.transformers);
            } else if (window.Transformers) {
                ({ pipeline: transformersPipeline, env: transformersEnv } = window.Transformers);
            } else {
                throw new Error('Failed to load Transformers.js library');
            }
        }
        
        // Configure transformers.js with minimal settings
        if (transformersEnv) {
            transformersEnv.allowLocalModels = false;
            transformersEnv.allowRemoteModels = true;
            // Let Transformers.js use default WASM paths for better compatibility
        }
        
        transformersReady = true;
        console.log('✅ Transformers.js initialized successfully');
        
        // Update UI to show ready state
        updateStatus();
        
        // Update status indicator
        const statusSpan = document.getElementById('transformersStatus');
        if (statusSpan) {
            statusSpan.textContent = '✅ Ready!';
            statusSpan.style.color = 'green';
        }
        
    } catch (error) {
        console.error('❌ Error initializing Transformers.js:', error);
        
        // Show error in UI
        const statusDiv = document.getElementById('status');
        if (statusDiv) {
            statusDiv.textContent = `❌ Failed to load Transformers.js: ${error.message}`;
            statusDiv.style.color = 'red';
        }
        
        // Update status indicator
        const statusSpan = document.getElementById('transformersStatus');
        if (statusSpan) {
            statusSpan.textContent = `❌ Failed: ${error.message}`;
            statusSpan.style.color = 'red';
        }
    }
}

// Initialize when page loads
document.addEventListener('DOMContentLoaded', function() {
    initTransformers();
    initFileUpload();
});

// UI Functions
function showTab(tabName) {
    // Hide all tabs
    document.querySelectorAll('.tab-content').forEach(tab => {
        tab.classList.remove('active');
    });
    document.querySelectorAll('.tab').forEach(button => {
        button.classList.remove('active');
    });
    
    // Show selected tab
    document.getElementById(tabName).classList.add('active');
    event.target.classList.add('active');
}

function updateSliderValue(sliderId) {
    const slider = document.getElementById(sliderId);
    const valueSpan = document.getElementById(sliderId + 'Value');
    valueSpan.textContent = slider.value;
}

function updateStatus() {
    const status = document.getElementById('status');
    const transformersStatus = transformersReady ? 'Ready' : 'Not ready';
    const embeddingStatus = embeddingModel ? 'Loaded' : 'Not loaded';
    const qaStatus = qaModel ? 'Loaded' : 'Not loaded';
    const llmStatus = llmModel ? 'Loaded' : 'Not loaded';
    status.textContent = `📊 Documents: ${documents.length} | 🔧 Transformers.js: ${transformersStatus} | 🤖 QA: ${qaStatus} | 🧠 Embedding: ${embeddingStatus} | 🚀 LLM: ${llmStatus}`;
}

function updateProgress(percent, text) {
    const progressBar = document.getElementById('progressBar');
    const progressText = document.getElementById('progressText');
    progressBar.style.width = percent + '%';
    progressText.textContent = text;
}

// AI Functions
async function initializeModels() {
    const statusDiv = document.getElementById('initStatus');
    const progressDiv = document.getElementById('initProgress');
    const initBtn = document.getElementById('initBtn');
    
    statusDiv.style.display = 'block';
    progressDiv.style.display = 'block';
    initBtn.disabled = true;
    
    try {
        // Check if transformers.js is ready
        if (!transformersReady || !transformersPipeline) {
            updateProgress(5, "Waiting for Transformers.js to initialize...");
            statusDiv.innerHTML = '🔄 Initializing Transformers.js library...';
            
            // Wait for transformers.js to be ready
            let attempts = 0;
            while (!transformersReady && attempts < 30) {
                await new Promise(resolve => setTimeout(resolve, 1000));
                attempts++;
            }
            
            if (!transformersReady) {
                throw new Error('Transformers.js failed to initialize. Please refresh the page.');
            }
        }
        
        updateProgress(10, "Loading embedding model...");
        statusDiv.innerHTML = '🔄 Loading embedding model (Xenova/all-MiniLM-L6-v2)...';
        
        // Load embedding model with progress tracking
        embeddingModel = await transformersPipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2', {
            progress_callback: (progress) => {
                if (progress.status === 'downloading') {
                    const percent = progress.loaded && progress.total ? 
                        Math.round((progress.loaded / progress.total) * 100) : 0;
                    statusDiv.innerHTML = `🔄 Downloading embedding model: ${percent}%`;
                }
            }
        });
        
        updateProgress(40, "Loading question-answering model...");
        statusDiv.innerHTML = '🔄 Loading QA model (Xenova/distilbert-base-cased-distilled-squad)...';
        
        // Load QA model with progress tracking
        qaModel = await transformersPipeline('question-answering', 'Xenova/distilbert-base-cased-distilled-squad', {
            progress_callback: (progress) => {
                if (progress.status === 'downloading') {
                    const percent = progress.loaded && progress.total ? 
                        Math.round((progress.loaded / progress.total) * 100) : 0;
                    statusDiv.innerHTML = `🔄 Downloading QA model: ${percent}%`;
                }
            }
        });
        
        updateProgress(70, "Loading language model...");
        statusDiv.innerHTML = '🔄 Loading LLM (trying SmolLM models)...';
        
        // Load LLM model - Stable Transformers.js 3.0.0 configuration
        const modelsToTry = [
            {
                name: 'Xenova/gpt2',
                options: {}
            },
            {
                name: 'Xenova/distilgpt2', 
                options: {}
            }
        ];
        
        let modelLoaded = false;
        for (const model of modelsToTry) {
            try {
                console.log(`Trying to load ${model.name}...`);
                statusDiv.innerHTML = `🔄 Loading LLM (${model.name})...`;
                
                // Load LLM with progress tracking
                llmModel = await transformersPipeline('text-generation', model.name, {
                    progress_callback: (progress) => {
                        if (progress.status === 'downloading') {
                            const percent = progress.loaded && progress.total ? 
                                Math.round((progress.loaded / progress.total) * 100) : 0;
                            statusDiv.innerHTML = `🔄 Downloading ${model.name}: ${percent}%`;
                        }
                    }
                });
                
                console.log(`✅ Successfully loaded ${model.name}`);
                loadedModelName = model.name;
                modelLoaded = true;
                break;
            } catch (error) {
                console.warn(`${model.name} failed:`, error);
            }
        }
        
        if (!modelLoaded) {
            throw new Error('Failed to load any LLM model');
        }
        
        updateProgress(85, "Generating embeddings for documents...");
        statusDiv.innerHTML = '🔄 Generating embeddings for existing documents...';
        
        // Generate embeddings for all existing documents
        for (let i = 0; i < documents.length; i++) {
            const doc = documents[i];
            updateProgress(85 + (i / documents.length) * 10, `Processing document ${i + 1}/${documents.length}...`);
            doc.embedding = await generateEmbedding(doc.content);
        }
        
        updateProgress(100, "Initialization complete!");
        modelsInitialized = true;
        
        statusDiv.innerHTML = `✅ AI Models initialized successfully!
🧠 Embedding Model: Xenova/all-MiniLM-L6-v2 (384 dimensions)
🤖 QA Model: Xenova/distilbert-base-cased-distilled-squad
🚀 LLM Model: ${loadedModelName} (Language model for text generation)
📚 Documents processed: ${documents.length}
🔮 Ready for semantic search, Q&A, and LLM chat!

📊 Model Info:
• Embedding model size: ~23MB
• QA model size: ~28MB
• LLM model size: ~15-50MB (depending on model loaded)
• Total memory usage: ~70-100MB
• Inference speed: ~2-8 seconds per operation`;
        
        updateStatus();
        
    } catch (error) {
        console.error('Error initializing models:', error);
        statusDiv.innerHTML = `❌ Error initializing models: ${error.message}
        
Please check your internet connection and try again.`;
        updateProgress(0, "Initialization failed");
    } finally {
        initBtn.disabled = false;
        setTimeout(() => {
            progressDiv.style.display = 'none';
        }, 2000);
    }
}

async function generateEmbedding(text) {
    if (!transformersReady || !transformersPipeline) {
        throw new Error('Transformers.js not initialized');
    }
    
    if (!embeddingModel) {
        throw new Error('Embedding model not loaded');
    }
    
    try {
        const output = await embeddingModel(text, { pooling: 'mean', normalize: true });
        return Array.from(output.data);
    } catch (error) {
        console.error('Error generating embedding:', error);
        throw error;
    }
}

async function searchDocumentsSemantic() {
    const query = document.getElementById('searchQuery').value;
    const maxResults = parseInt(document.getElementById('maxResults').value);
    const resultsDiv = document.getElementById('searchResults');
    const searchBtn = document.getElementById('searchBtn');
    
    if (!query.trim()) {
        resultsDiv.style.display = 'block';
        resultsDiv.textContent = '❌ Please enter a search query';
        return;
    }
    
    if (!transformersReady || !modelsInitialized || !embeddingModel) {
        resultsDiv.style.display = 'block';
        resultsDiv.textContent = '❌ Please initialize AI models first!';
        return;
    }
    
    resultsDiv.style.display = 'block';
    resultsDiv.innerHTML = '<div class="loading"></div> Generating query embedding and searching...';
    searchBtn.disabled = true;
    
    try {
        // Generate embedding for query
        const queryEmbedding = await generateEmbedding(query);
        
        // Calculate similarities
        const results = [];
        documents.forEach(doc => {
            if (doc.embedding) {
                const similarity = cosineSimilarity(queryEmbedding, doc.embedding);
                results.push({ doc, similarity });
            }
        });
        
        // Sort by similarity
        results.sort((a, b) => b.similarity - a.similarity);
        
        if (results.length === 0) {
            resultsDiv.textContent = `❌ No documents with embeddings found for '${query}'`;
            return;
        }
        
        let output = `🔍 Semantic search results for '${query}':\n\n`;
        results.slice(0, maxResults).forEach((result, i) => {
            const doc = result.doc;
            const similarity = result.similarity;
            const excerpt = doc.content.length > 200 ? doc.content.substring(0, 200) + '...' : doc.content;
            output += `**Result ${i + 1}** (similarity: ${similarity.toFixed(3)})\n📄 Title: ${doc.title}\n📝 Content: ${excerpt}\n\n`;
        });
        
        resultsDiv.textContent = output;
        
    } catch (error) {
        console.error('Search error:', error);
        resultsDiv.textContent = `❌ Error during search: ${error.message}`;
    } finally {
        searchBtn.disabled = false;
    }
}

function searchDocumentsKeyword() {
    const query = document.getElementById('searchQuery').value;
    const maxResults = parseInt(document.getElementById('maxResults').value);
    const resultsDiv = document.getElementById('searchResults');
    
    if (!query.trim()) {
        resultsDiv.style.display = 'block';
        resultsDiv.textContent = '❌ Please enter a search query';
        return;
    }
    
    resultsDiv.style.display = 'block';
    resultsDiv.innerHTML = '<div class="loading"></div> Searching keywords...';
    
    setTimeout(() => {
        const results = [];
        const queryWords = query.toLowerCase().split(/\s+/);
        
        documents.forEach(doc => {
            const contentLower = doc.content.toLowerCase();
            const titleLower = doc.title.toLowerCase();
            
            let matches = 0;
            queryWords.forEach(word => {
                matches += (contentLower.match(new RegExp(word, 'g')) || []).length;
                matches += (titleLower.match(new RegExp(word, 'g')) || []).length * 2;
            });
            
            if (matches > 0) {
                results.push({ doc, score: matches });
            }
        });
        
        results.sort((a, b) => b.score - a.score);
        
        if (results.length === 0) {
            resultsDiv.textContent = `❌ No documents found containing '${query}'`;
            return;
        }
        
        let output = `🔍 Keyword search results for '${query}':\n\n`;
        results.slice(0, maxResults).forEach((result, i) => {
            const doc = result.doc;
            const excerpt = doc.content.length > 200 ? doc.content.substring(0, 200) + '...' : doc.content;
            output += `**Result ${i + 1}**\n📄 Title: ${doc.title}\n📝 Content: ${excerpt}\n\n`;
        });
        
        resultsDiv.textContent = output;
    }, 500);
}

async function chatWithRAG() {
    const question = document.getElementById('chatQuestion').value;
    const maxContext = parseInt(document.getElementById('maxContext').value);
    const responseDiv = document.getElementById('chatResponse');
    const chatBtn = document.getElementById('chatBtn');
    
    if (!question.trim()) {
        responseDiv.style.display = 'block';
        responseDiv.textContent = '❌ Please enter a question';
        return;
    }
    
    if (!transformersReady || !modelsInitialized || !embeddingModel || !qaModel) {
        responseDiv.style.display = 'block';
        responseDiv.textContent = '❌ AI models not loaded yet. Please initialize them first!';
        return;
    }
    
    responseDiv.style.display = 'block';
    responseDiv.innerHTML = '<div class="loading"></div> Generating answer with real AI...';
    chatBtn.disabled = true;
    
    try {
        // Generate embedding for the question
        const questionEmbedding = await generateEmbedding(question);
        
        // Find relevant documents using semantic similarity
        const relevantDocs = [];
        documents.forEach(doc => {
            if (doc.embedding) {
                const similarity = cosineSimilarity(questionEmbedding, doc.embedding);
                if (similarity > 0.1) {
                    relevantDocs.push({ doc, similarity });
                }
            }
        });
        
        relevantDocs.sort((a, b) => b.similarity - a.similarity);
        relevantDocs.splice(maxContext);
        
        if (relevantDocs.length === 0) {
            responseDiv.textContent = '❌ No relevant context found in the documents for your question.';
            return;
        }
        
        // Combine context from top documents
        const context = relevantDocs.map(item => item.doc.content).join(' ').substring(0, 2000);
        
        // Use the QA model to generate an answer
        const qaResult = await qaModel(question, context);
        
        let response = `🤖 AI Answer:\n${qaResult.answer}\n\n`;
        response += `📊 Confidence: ${(qaResult.score * 100).toFixed(1)}%\n\n`;
        response += `📚 Sources: ${relevantDocs.length} documents\n`;
        response += `🔍 Best match: "${relevantDocs[0].doc.title}" (similarity: ${relevantDocs[0].similarity.toFixed(3)})\n\n`;
        response += `📝 Context used:\n${context.substring(0, 300)}...`;
        
        responseDiv.textContent = response;
        
    } catch (error) {
        console.error('Chat error:', error);
        responseDiv.textContent = `❌ Error generating response: ${error.message}`;
    } finally {
        chatBtn.disabled = false;
    }
}

async function chatWithLLM() {
    const prompt = document.getElementById('llmPrompt').value;
    const maxTokens = parseInt(document.getElementById('maxTokens').value);
    const temperature = parseFloat(document.getElementById('temperature').value);
    const responseDiv = document.getElementById('llmResponse');
    const llmBtn = document.getElementById('llmBtn');
    
    if (!prompt.trim()) {
        responseDiv.style.display = 'block';
        responseDiv.textContent = '❌ Please enter a prompt';
        return;
    }
    
    if (!transformersReady || !modelsInitialized || !llmModel) {
        responseDiv.style.display = 'block';
        responseDiv.textContent = '❌ LLM model not loaded yet. Please initialize models first!';
        return;
    }
    
    responseDiv.style.display = 'block';
    responseDiv.innerHTML = '<div class="loading"></div> Generating text with LLM...';
    llmBtn.disabled = true;
    
    try {
        // Generate text with the LLM
        const result = await llmModel(prompt, {
            max_new_tokens: maxTokens,
            temperature: temperature,
            do_sample: true,
            return_full_text: false
        });
        
        let generatedText = result[0].generated_text;
        
        let response = `🚀 LLM Generated Text:\n\n"${generatedText}"\n\n`;
        response += `📊 Settings: ${maxTokens} tokens, temperature ${temperature}\n`;
        response += `🤖 Model: ${loadedModelName ? loadedModelName.split('/')[1] : 'Language Model'}\n`;
        response += `⏱️ Generated in real-time by your browser!`;
        
        responseDiv.textContent = response;
        
    } catch (error) {
        console.error('LLM error:', error);
        responseDiv.textContent = `❌ Error generating text: ${error.message}`;
    } finally {
        llmBtn.disabled = false;
    }
}

async function chatWithLLMRAG() {
    const prompt = document.getElementById('llmPrompt').value;
    const maxTokens = parseInt(document.getElementById('maxTokens').value);
    const temperature = parseFloat(document.getElementById('temperature').value);
    const responseDiv = document.getElementById('llmResponse');
    const llmRagBtn = document.getElementById('llmRagBtn');
    
    if (!prompt.trim()) {
        responseDiv.style.display = 'block';
        responseDiv.textContent = '❌ Please enter a prompt';
        return;
    }
    
    if (!transformersReady || !modelsInitialized || !llmModel || !embeddingModel) {
        responseDiv.style.display = 'block';
        responseDiv.textContent = '❌ Models not loaded yet. Please initialize all models first!';
        return;
    }
    
    responseDiv.style.display = 'block';
    responseDiv.innerHTML = '<div class="loading"></div> Finding relevant context and generating with LLM...';
    llmRagBtn.disabled = true;
    
    try {
        // Find relevant documents using semantic search
        const queryEmbedding = await generateEmbedding(prompt);
        const relevantDocs = [];
        
        documents.forEach(doc => {
            if (doc.embedding) {
                const similarity = cosineSimilarity(queryEmbedding, doc.embedding);
                if (similarity > 0.1) {
                    relevantDocs.push({ doc, similarity });
                }
            }
        });
        
        relevantDocs.sort((a, b) => b.similarity - a.similarity);
        relevantDocs.splice(3); // Limit to top 3 documents
        
        // Create enhanced prompt with context
        let enhancedPrompt = prompt;
        if (relevantDocs.length > 0) {
            const context = relevantDocs.map(item => item.doc.content.substring(0, 300)).join(' ');
            enhancedPrompt = `Context: ${context}\n\nQuestion: ${prompt}\n\nAnswer:`;
        }
        
        // Generate text with the LLM using enhanced prompt
        const result = await llmModel(enhancedPrompt, {
            max_new_tokens: maxTokens,
            temperature: temperature,
            do_sample: true,
            return_full_text: false
        });
        
        let generatedText = result[0].generated_text;
        
        let response = `🤖 LLM + RAG Generated Response:\n\n"${generatedText}"\n\n`;
        response += `📚 Context: ${relevantDocs.length} relevant documents used\n`;
        if (relevantDocs.length > 0) {
            response += `🔍 Best match: "${relevantDocs[0].doc.title}" (similarity: ${relevantDocs[0].similarity.toFixed(3)})\n`;
        }
        response += `📊 Settings: ${maxTokens} tokens, temperature ${temperature}\n`;
        response += `🚀 Model: ${loadedModelName ? loadedModelName.split('/')[1] : 'LLM'} enhanced with document retrieval`;
        
        responseDiv.textContent = response;
        
    } catch (error) {
        console.error('LLM+RAG error:', error);
        responseDiv.textContent = `❌ Error generating response: ${error.message}`;
    } finally {
        llmRagBtn.disabled = false;
    }
}

async function addDocumentManual() {
    const title = document.getElementById('docTitle').value || `User Document ${documents.length - 2}`;
    const content = document.getElementById('docContent').value;
    const statusDiv = document.getElementById('addStatus');
    const previewDiv = document.getElementById('docPreview');
    const addBtn = document.getElementById('addBtn');
    
    if (!content.trim()) {
        statusDiv.style.display = 'block';
        statusDiv.textContent = '❌ Please enter document content';
        previewDiv.style.display = 'none';
        return;
    }
    
    statusDiv.style.display = 'block';
    statusDiv.innerHTML = '<div class="loading"></div> Adding document...';
    addBtn.disabled = true;
    
    try {
        const docId = documents.length;
        const newDocument = {
            id: docId,
            title: title,
            content: content.trim(),
            embedding: null
        };
        
        // Generate embedding if models are initialized
        if (transformersReady && modelsInitialized && embeddingModel) {
            statusDiv.innerHTML = '<div class="loading"></div> Generating AI embedding...';
            newDocument.embedding = await generateEmbedding(content);
        }
        
        documents.push(newDocument);
        
        const preview = content.length > 300 ? content.substring(0, 300) + '...' : content;
        const status = `✅ Document added successfully!
📄 Title: ${title}
📊 Size: ${content.length.toLocaleString()} characters
📚 Total documents: ${documents.length}${(transformersReady && modelsInitialized) ? '\n🧠 AI embedding generated automatically' : '\n⚠️ AI embedding will be generated when models are loaded'}`;
        
        statusDiv.textContent = status;
        previewDiv.style.display = 'block';
        previewDiv.textContent = `📖 Preview:\n${preview}`;
        
        // Clear form
        document.getElementById('docTitle').value = '';
        document.getElementById('docContent').value = '';
        
        updateStatus();
        
    } catch (error) {
        console.error('Error adding document:', error);
        statusDiv.textContent = `❌ Error adding document: ${error.message}`;
    } finally {
        addBtn.disabled = false;
    }
}

// File upload functionality
function initFileUpload() {
    const uploadArea = document.getElementById('uploadArea');
    const fileInput = document.getElementById('fileInput');
    
    if (!uploadArea || !fileInput) return;
    
    // Click to select files
    uploadArea.addEventListener('click', () => {
        fileInput.click();
    });
    
    // Drag and drop functionality
    uploadArea.addEventListener('dragover', (e) => {
        e.preventDefault();
        uploadArea.classList.add('dragover');
    });
    
    uploadArea.addEventListener('dragleave', (e) => {
        e.preventDefault();
        uploadArea.classList.remove('dragover');
    });
    
    uploadArea.addEventListener('drop', (e) => {
        e.preventDefault();
        uploadArea.classList.remove('dragover');
        const files = e.dataTransfer.files;
        handleFiles(files);
    });
    
    // File input change
    fileInput.addEventListener('change', (e) => {
        handleFiles(e.target.files);
    });
}

async function handleFiles(files) {
    const uploadStatus = document.getElementById('uploadStatus');
    const uploadProgress = document.getElementById('uploadProgress');
    const uploadProgressBar = document.getElementById('uploadProgressBar');
    const uploadProgressText = document.getElementById('uploadProgressText');
    
    if (files.length === 0) return;
    
    uploadStatus.style.display = 'block';
    uploadProgress.style.display = 'block';
    uploadStatus.textContent = '';
    
    let successCount = 0;
    let errorCount = 0;
    
    for (let i = 0; i < files.length; i++) {
        const file = files[i];
        const progress = ((i + 1) / files.length) * 100;
        
        uploadProgressBar.style.width = progress + '%';
        if (file.size > 10000) {
            uploadProgressText.textContent = `Processing large file: ${file.name} (${i + 1}/${files.length}) - chunking for better search...`;
        } else {
            uploadProgressText.textContent = `Processing ${file.name} (${i + 1}/${files.length})...`;
        }
        
        try {
            await processFile(file);
            successCount++;
        } catch (error) {
            console.error(`Error processing ${file.name}:`, error);
            errorCount++;
        }
    }
    
    uploadProgress.style.display = 'none';
    
    let statusText = `✅ Upload complete!\n📁 ${successCount} files processed successfully`;
    if (errorCount > 0) {
        statusText += `\n❌ ${errorCount} files failed to process`;
    }
    statusText += `\n📊 Total documents: ${documents.length}`;
    statusText += `\n🧩 Large files automatically chunked for better search`;
    
    uploadStatus.textContent = statusText;
    updateStatus();
    
    // Clear file input
    document.getElementById('fileInput').value = '';
}

// Document chunking function for large files
function chunkDocument(content, maxChunkSize = 1000) {
    const sentences = content.split(/[.!?]+/).filter(s => s.trim().length > 0);
    const chunks = [];
    let currentChunk = '';
    
    for (let sentence of sentences) {
        sentence = sentence.trim();
        if (currentChunk.length + sentence.length > maxChunkSize && currentChunk.length > 0) {
            chunks.push(currentChunk.trim());
            currentChunk = sentence;
        } else {
            currentChunk += (currentChunk ? '. ' : '') + sentence;
        }
    }
    
    if (currentChunk.trim()) {
        chunks.push(currentChunk.trim());
    }
    
    return chunks.length > 0 ? chunks : [content];
}

async function processFile(file) {
    return new Promise((resolve, reject) => {
        const reader = new FileReader();
        
        reader.onload = async function(e) {
            try {
                const content = e.target.result.trim();
                const baseTitle = file.name.replace(/\.[^/.]+$/, ""); // Remove file extension
                
                // Check if document is large and needs chunking
                if (content.length > 2000) {
                    // Chunk large documents
                    const chunks = chunkDocument(content, 1500);
                    console.log(`📄 Chunking large file: ${chunks.length} chunks created from ${content.length} characters`);
                    
                    for (let i = 0; i < chunks.length; i++) {
                        const chunkTitle = chunks.length > 1 ? `${baseTitle} (Part ${i + 1}/${chunks.length})` : baseTitle;
                        const newDocument = {
                            id: documents.length,
                            title: chunkTitle,
                            content: chunks[i],
                            embedding: null
                        };
                        
                        // Generate embedding if models are loaded
                        if (transformersReady && modelsInitialized && embeddingModel) {
                            newDocument.embedding = await generateEmbedding(chunks[i]);
                        }
                        
                        documents.push(newDocument);
                    }
                } else {
                    // Small document - process as single document
                    const newDocument = {
                        id: documents.length,
                        title: baseTitle,
                        content: content,
                        embedding: null
                    };
                    
                    // Generate embedding if models are loaded
                    if (transformersReady && modelsInitialized && embeddingModel) {
                        newDocument.embedding = await generateEmbedding(content);
                    }
                    
                    documents.push(newDocument);
                }
                
                resolve();
                
            } catch (error) {
                reject(error);
            }
        };
        
        reader.onerror = function() {
            reject(new Error(`Failed to read file: ${file.name}`));
        };
        
        // Read file as text
        reader.readAsText(file);
    });
}

async function testSystem() {
    const outputDiv = document.getElementById('testOutput');
    const testBtn = document.getElementById('testBtn');
    
    outputDiv.style.display = 'block';
    outputDiv.innerHTML = '<div class="loading"></div> Running system tests...';
    testBtn.disabled = true;
    
    try {
        let output = `🧪 System Test Results:\n\n`;
        output += `📊 Documents: ${documents.length} loaded\n`;
        output += `🔧 Transformers.js: ${transformersReady ? '✅ Ready' : '❌ Not ready'}\n`;
        output += `🧠 Embedding Model: ${embeddingModel ? '✅ Loaded' : '❌ Not loaded'}\n`;
        output += `🤖 QA Model: ${qaModel ? '✅ Loaded' : '❌ Not loaded'}\n`;
        output += `🚀 LLM Model: ${llmModel ? '✅ Loaded' : '❌ Not loaded'}\n\n`;
        
        if (transformersReady && modelsInitialized && embeddingModel) {
            output += `🔍 Testing embedding generation...\n`;
            const testEmbedding = await generateEmbedding("test sentence");
            output += `✅ Embedding test: Generated ${testEmbedding.length}D vector\n\n`;
            
            output += `🔍 Testing semantic search...\n`;
            const testQuery = "artificial intelligence";
            const queryEmbedding = await generateEmbedding(testQuery);
            
            let testResults = [];
            documents.forEach(doc => {
                if (doc.embedding) {
                    const similarity = cosineSimilarity(queryEmbedding, doc.embedding);
                    testResults.push({ doc, similarity });
                }
            });
            testResults.sort((a, b) => b.similarity - a.similarity);
            
            if (testResults.length > 0) {
                output += `✅ Search test: Found ${testResults.length} results\n`;
                output += `📄 Top result: "${testResults[0].doc.title}" (similarity: ${testResults[0].similarity.toFixed(3)})\n\n`;
            }
            
            if (qaModel) {
                output += `🤖 Testing QA model...\n`;
                const context = documents[0].content.substring(0, 500);
                const testQuestion = "What is artificial intelligence?";
                const qaResult = await qaModel(testQuestion, context);
                output += `✅ QA test: Generated answer with ${(qaResult.score * 100).toFixed(1)}% confidence\n`;
                output += `💬 Answer: ${qaResult.answer.substring(0, 100)}...\n\n`;
            }
            
            if (llmModel) {
                output += `🚀 Testing LLM model...\n`;
                const testPrompt = "Explain artificial intelligence:";
                const llmResult = await llmModel(testPrompt, { max_new_tokens: 30, temperature: 0.7, do_sample: true, return_full_text: false });
                output += `✅ LLM test: Generated text completion\n`;
                output += `💬 Generated: "${llmResult[0].generated_text.substring(0, 100)}..."\n\n`;
            }
            
            output += `🎉 All tests passed! System is fully operational.`;
        } else {
            output += `⚠️ Models not initialized. Click "Initialize AI Models" first.`;
        }
        
        outputDiv.textContent = output;
        
    } catch (error) {
        console.error('Test error:', error);
        outputDiv.textContent = `❌ Test failed: ${error.message}`;
    } finally {
        testBtn.disabled = false;
    }
}

// Initialize UI
updateStatus();

// Show version info in console
console.log('🤖 AI-Powered RAG System with Transformers.js');
console.log('Models: Xenova/all-MiniLM-L6-v2, Xenova/distilbert-base-cased-distilled-squad');

// Export functions for global access
window.showTab = showTab;
window.updateSliderValue = updateSliderValue;
window.initializeModels = initializeModels;
window.searchDocumentsSemantic = searchDocumentsSemantic;
window.searchDocumentsKeyword = searchDocumentsKeyword;
window.chatWithRAG = chatWithRAG;
window.chatWithLLM = chatWithLLM;
window.chatWithLLMRAG = chatWithLLMRAG;
window.addDocumentManual = addDocumentManual;
window.testSystem = testSystem;