// Import transformers.js 3.0.0 from CDN (new Hugging Face ownership) import { pipeline, env } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.0'; // Make available globally window.transformers = { pipeline, env }; window.transformersLoaded = true; console.log('โœ… Transformers.js 3.0.0 loaded via ES modules (Hugging Face)'); // Global variables for transformers.js let transformersPipeline = null; let transformersEnv = null; let transformersReady = false; // Document storage and AI state let documents = [ { id: 0, title: "Artificial Intelligence Overview", content: "Artificial Intelligence (AI) is a branch of computer science that aims to create intelligent machines that work and react like humans. Some activities computers with AI are designed for include speech recognition, learning, planning, and problem-solving. AI is used in healthcare, finance, transportation, and entertainment. Machine learning enables computers to learn from experience without explicit programming. Deep learning uses neural networks to understand complex patterns in data.", embedding: null }, { id: 1, title: "Space Exploration", content: "Space exploration is the ongoing discovery and exploration of celestial structures in outer space through evolving space technology. Physical exploration is conducted by unmanned robotic probes and human spaceflight. Space exploration has been used for geopolitical rivalries like the Cold War. The early era was driven by a Space Race between the Soviet Union and United States. Modern exploration includes Mars missions, the International Space Station, and satellite programs.", embedding: null }, { id: 2, title: "Renewable Energy", content: "Renewable energy comes from naturally replenished resources on a human timescale. It includes sunlight, wind, rain, tides, waves, and geothermal heat. Renewable energy contrasts with fossil fuels that are used faster than replenished. Most renewable sources are sustainable. Solar energy is abundant and promising. Wind energy and hydroelectric power are major contributors to renewable generation worldwide.", embedding: null } ]; let embeddingModel = null; let qaModel = null; let llmModel = null; let loadedModelName = ''; let modelsInitialized = false; // Calculate cosine similarity between two vectors function cosineSimilarity(a, b) { const dotProduct = a.reduce((sum, val, i) => sum + val * b[i], 0); const magnitudeA = Math.sqrt(a.reduce((sum, val) => sum + val * val, 0)); const magnitudeB = Math.sqrt(b.reduce((sum, val) => sum + val * val, 0)); if (magnitudeA === 0 || magnitudeB === 0) return 0; return dotProduct / (magnitudeA * magnitudeB); } // Initialize transformers.js when the script loads async function initTransformers() { try { console.log('๐Ÿ”„ Initializing Transformers.js...'); // Try ES modules first (preferred method) if (window.transformers && window.transformersLoaded) { console.log('โœ… Using ES modules version (Transformers.js 3.0.0)'); ({ pipeline: transformersPipeline, env: transformersEnv } = window.transformers); } // Fallback to UMD version else if (window.Transformers) { console.log('โœ… Using UMD version (Transformers.js 3.0.0)'); ({ pipeline: transformersPipeline, env: transformersEnv } = window.Transformers); } // Wait for library to load else { console.log('โณ Waiting for library to load...'); let attempts = 0; while (!window.Transformers && !window.transformersLoaded && attempts < 50) { await new Promise(resolve => setTimeout(resolve, 200)); attempts++; } if (window.transformers && window.transformersLoaded) { ({ pipeline: transformersPipeline, env: transformersEnv } = window.transformers); } else if (window.Transformers) { ({ pipeline: transformersPipeline, env: transformersEnv } = window.Transformers); } else { throw new Error('Failed to load Transformers.js library'); } } // Configure transformers.js with minimal settings if (transformersEnv) { transformersEnv.allowLocalModels = false; transformersEnv.allowRemoteModels = true; // Let Transformers.js use default WASM paths for better compatibility } transformersReady = true; console.log('โœ… Transformers.js initialized successfully'); // Update UI to show ready state updateStatus(); // Update status indicator const statusSpan = document.getElementById('transformersStatus'); if (statusSpan) { statusSpan.textContent = 'โœ… Ready!'; statusSpan.style.color = 'green'; } } catch (error) { console.error('โŒ Error initializing Transformers.js:', error); // Show error in UI const statusDiv = document.getElementById('status'); if (statusDiv) { statusDiv.textContent = `โŒ Failed to load Transformers.js: ${error.message}`; statusDiv.style.color = 'red'; } // Update status indicator const statusSpan = document.getElementById('transformersStatus'); if (statusSpan) { statusSpan.textContent = `โŒ Failed: ${error.message}`; statusSpan.style.color = 'red'; } } } // Initialize when page loads document.addEventListener('DOMContentLoaded', function() { initTransformers(); initFileUpload(); }); // UI Functions function showTab(tabName) { // Hide all tabs document.querySelectorAll('.tab-content').forEach(tab => { tab.classList.remove('active'); }); document.querySelectorAll('.tab').forEach(button => { button.classList.remove('active'); }); // Show selected tab document.getElementById(tabName).classList.add('active'); event.target.classList.add('active'); } function updateSliderValue(sliderId) { const slider = document.getElementById(sliderId); const valueSpan = document.getElementById(sliderId + 'Value'); valueSpan.textContent = slider.value; } function updateStatus() { const status = document.getElementById('status'); const transformersStatus = transformersReady ? 'Ready' : 'Not ready'; const embeddingStatus = embeddingModel ? 'Loaded' : 'Not loaded'; const qaStatus = qaModel ? 'Loaded' : 'Not loaded'; const llmStatus = llmModel ? 'Loaded' : 'Not loaded'; status.textContent = `๐Ÿ“Š Documents: ${documents.length} | ๐Ÿ”ง Transformers.js: ${transformersStatus} | ๐Ÿค– QA: ${qaStatus} | ๐Ÿง  Embedding: ${embeddingStatus} | ๐Ÿš€ LLM: ${llmStatus}`; } function updateProgress(percent, text) { const progressBar = document.getElementById('progressBar'); const progressText = document.getElementById('progressText'); progressBar.style.width = percent + '%'; progressText.textContent = text; } // AI Functions async function initializeModels() { const statusDiv = document.getElementById('initStatus'); const progressDiv = document.getElementById('initProgress'); const initBtn = document.getElementById('initBtn'); statusDiv.style.display = 'block'; progressDiv.style.display = 'block'; initBtn.disabled = true; try { // Check if transformers.js is ready if (!transformersReady || !transformersPipeline) { updateProgress(5, "Waiting for Transformers.js to initialize..."); statusDiv.innerHTML = '๐Ÿ”„ Initializing Transformers.js library...'; // Wait for transformers.js to be ready let attempts = 0; while (!transformersReady && attempts < 30) { await new Promise(resolve => setTimeout(resolve, 1000)); attempts++; } if (!transformersReady) { throw new Error('Transformers.js failed to initialize. Please refresh the page.'); } } updateProgress(10, "Loading embedding model..."); statusDiv.innerHTML = '๐Ÿ”„ Loading embedding model (Xenova/all-MiniLM-L6-v2)...'; // Load embedding model with progress tracking embeddingModel = await transformersPipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2', { progress_callback: (progress) => { if (progress.status === 'downloading') { const percent = progress.loaded && progress.total ? Math.round((progress.loaded / progress.total) * 100) : 0; statusDiv.innerHTML = `๐Ÿ”„ Downloading embedding model: ${percent}%`; } } }); updateProgress(40, "Loading question-answering model..."); statusDiv.innerHTML = '๐Ÿ”„ Loading QA model (Xenova/distilbert-base-cased-distilled-squad)...'; // Load QA model with progress tracking qaModel = await transformersPipeline('question-answering', 'Xenova/distilbert-base-cased-distilled-squad', { progress_callback: (progress) => { if (progress.status === 'downloading') { const percent = progress.loaded && progress.total ? Math.round((progress.loaded / progress.total) * 100) : 0; statusDiv.innerHTML = `๐Ÿ”„ Downloading QA model: ${percent}%`; } } }); updateProgress(70, "Loading language model..."); statusDiv.innerHTML = '๐Ÿ”„ Loading LLM (trying SmolLM models)...'; // Load LLM model - Stable Transformers.js 3.0.0 configuration const modelsToTry = [ { name: 'Xenova/gpt2', options: {} }, { name: 'Xenova/distilgpt2', options: {} } ]; let modelLoaded = false; for (const model of modelsToTry) { try { console.log(`Trying to load ${model.name}...`); statusDiv.innerHTML = `๐Ÿ”„ Loading LLM (${model.name})...`; // Load LLM with progress tracking llmModel = await transformersPipeline('text-generation', model.name, { progress_callback: (progress) => { if (progress.status === 'downloading') { const percent = progress.loaded && progress.total ? Math.round((progress.loaded / progress.total) * 100) : 0; statusDiv.innerHTML = `๐Ÿ”„ Downloading ${model.name}: ${percent}%`; } } }); console.log(`โœ… Successfully loaded ${model.name}`); loadedModelName = model.name; modelLoaded = true; break; } catch (error) { console.warn(`${model.name} failed:`, error); } } if (!modelLoaded) { throw new Error('Failed to load any LLM model'); } updateProgress(85, "Generating embeddings for documents..."); statusDiv.innerHTML = '๐Ÿ”„ Generating embeddings for existing documents...'; // Generate embeddings for all existing documents for (let i = 0; i < documents.length; i++) { const doc = documents[i]; updateProgress(85 + (i / documents.length) * 10, `Processing document ${i + 1}/${documents.length}...`); doc.embedding = await generateEmbedding(doc.content); } updateProgress(100, "Initialization complete!"); modelsInitialized = true; statusDiv.innerHTML = `โœ… AI Models initialized successfully! ๐Ÿง  Embedding Model: Xenova/all-MiniLM-L6-v2 (384 dimensions) ๐Ÿค– QA Model: Xenova/distilbert-base-cased-distilled-squad ๐Ÿš€ LLM Model: ${loadedModelName} (Language model for text generation) ๐Ÿ“š Documents processed: ${documents.length} ๐Ÿ”ฎ Ready for semantic search, Q&A, and LLM chat! ๐Ÿ“Š Model Info: โ€ข Embedding model size: ~23MB โ€ข QA model size: ~28MB โ€ข LLM model size: ~15-50MB (depending on model loaded) โ€ข Total memory usage: ~70-100MB โ€ข Inference speed: ~2-8 seconds per operation`; updateStatus(); } catch (error) { console.error('Error initializing models:', error); statusDiv.innerHTML = `โŒ Error initializing models: ${error.message} Please check your internet connection and try again.`; updateProgress(0, "Initialization failed"); } finally { initBtn.disabled = false; setTimeout(() => { progressDiv.style.display = 'none'; }, 2000); } } async function generateEmbedding(text) { if (!transformersReady || !transformersPipeline) { throw new Error('Transformers.js not initialized'); } if (!embeddingModel) { throw new Error('Embedding model not loaded'); } try { const output = await embeddingModel(text, { pooling: 'mean', normalize: true }); return Array.from(output.data); } catch (error) { console.error('Error generating embedding:', error); throw error; } } async function searchDocumentsSemantic() { const query = document.getElementById('searchQuery').value; const maxResults = parseInt(document.getElementById('maxResults').value); const resultsDiv = document.getElementById('searchResults'); const searchBtn = document.getElementById('searchBtn'); if (!query.trim()) { resultsDiv.style.display = 'block'; resultsDiv.textContent = 'โŒ Please enter a search query'; return; } if (!transformersReady || !modelsInitialized || !embeddingModel) { resultsDiv.style.display = 'block'; resultsDiv.textContent = 'โŒ Please initialize AI models first!'; return; } resultsDiv.style.display = 'block'; resultsDiv.innerHTML = '
Generating query embedding and searching...'; searchBtn.disabled = true; try { // Generate embedding for query const queryEmbedding = await generateEmbedding(query); // Calculate similarities const results = []; documents.forEach(doc => { if (doc.embedding) { const similarity = cosineSimilarity(queryEmbedding, doc.embedding); results.push({ doc, similarity }); } }); // Sort by similarity results.sort((a, b) => b.similarity - a.similarity); if (results.length === 0) { resultsDiv.textContent = `โŒ No documents with embeddings found for '${query}'`; return; } let output = `๐Ÿ” Semantic search results for '${query}':\n\n`; results.slice(0, maxResults).forEach((result, i) => { const doc = result.doc; const similarity = result.similarity; const excerpt = doc.content.length > 200 ? doc.content.substring(0, 200) + '...' : doc.content; output += `**Result ${i + 1}** (similarity: ${similarity.toFixed(3)})\n๐Ÿ“„ Title: ${doc.title}\n๐Ÿ“ Content: ${excerpt}\n\n`; }); resultsDiv.textContent = output; } catch (error) { console.error('Search error:', error); resultsDiv.textContent = `โŒ Error during search: ${error.message}`; } finally { searchBtn.disabled = false; } } function searchDocumentsKeyword() { const query = document.getElementById('searchQuery').value; const maxResults = parseInt(document.getElementById('maxResults').value); const resultsDiv = document.getElementById('searchResults'); if (!query.trim()) { resultsDiv.style.display = 'block'; resultsDiv.textContent = 'โŒ Please enter a search query'; return; } resultsDiv.style.display = 'block'; resultsDiv.innerHTML = '
Searching keywords...'; setTimeout(() => { const results = []; const queryWords = query.toLowerCase().split(/\s+/); documents.forEach(doc => { const contentLower = doc.content.toLowerCase(); const titleLower = doc.title.toLowerCase(); let matches = 0; queryWords.forEach(word => { matches += (contentLower.match(new RegExp(word, 'g')) || []).length; matches += (titleLower.match(new RegExp(word, 'g')) || []).length * 2; }); if (matches > 0) { results.push({ doc, score: matches }); } }); results.sort((a, b) => b.score - a.score); if (results.length === 0) { resultsDiv.textContent = `โŒ No documents found containing '${query}'`; return; } let output = `๐Ÿ” Keyword search results for '${query}':\n\n`; results.slice(0, maxResults).forEach((result, i) => { const doc = result.doc; const excerpt = doc.content.length > 200 ? doc.content.substring(0, 200) + '...' : doc.content; output += `**Result ${i + 1}**\n๐Ÿ“„ Title: ${doc.title}\n๐Ÿ“ Content: ${excerpt}\n\n`; }); resultsDiv.textContent = output; }, 500); } async function chatWithRAG() { const question = document.getElementById('chatQuestion').value; const maxContext = parseInt(document.getElementById('maxContext').value); const responseDiv = document.getElementById('chatResponse'); const chatBtn = document.getElementById('chatBtn'); if (!question.trim()) { responseDiv.style.display = 'block'; responseDiv.textContent = 'โŒ Please enter a question'; return; } if (!transformersReady || !modelsInitialized || !embeddingModel || !qaModel) { responseDiv.style.display = 'block'; responseDiv.textContent = 'โŒ AI models not loaded yet. Please initialize them first!'; return; } responseDiv.style.display = 'block'; responseDiv.innerHTML = '
Generating answer with real AI...'; chatBtn.disabled = true; try { // Generate embedding for the question const questionEmbedding = await generateEmbedding(question); // Find relevant documents using semantic similarity const relevantDocs = []; documents.forEach(doc => { if (doc.embedding) { const similarity = cosineSimilarity(questionEmbedding, doc.embedding); if (similarity > 0.1) { relevantDocs.push({ doc, similarity }); } } }); relevantDocs.sort((a, b) => b.similarity - a.similarity); relevantDocs.splice(maxContext); if (relevantDocs.length === 0) { responseDiv.textContent = 'โŒ No relevant context found in the documents for your question.'; return; } // Combine context from top documents const context = relevantDocs.map(item => item.doc.content).join(' ').substring(0, 2000); // Use the QA model to generate an answer const qaResult = await qaModel(question, context); let response = `๐Ÿค– AI Answer:\n${qaResult.answer}\n\n`; response += `๐Ÿ“Š Confidence: ${(qaResult.score * 100).toFixed(1)}%\n\n`; response += `๐Ÿ“š Sources: ${relevantDocs.length} documents\n`; response += `๐Ÿ” Best match: "${relevantDocs[0].doc.title}" (similarity: ${relevantDocs[0].similarity.toFixed(3)})\n\n`; response += `๐Ÿ“ Context used:\n${context.substring(0, 300)}...`; responseDiv.textContent = response; } catch (error) { console.error('Chat error:', error); responseDiv.textContent = `โŒ Error generating response: ${error.message}`; } finally { chatBtn.disabled = false; } } async function chatWithLLM() { const prompt = document.getElementById('llmPrompt').value; const maxTokens = parseInt(document.getElementById('maxTokens').value); const temperature = parseFloat(document.getElementById('temperature').value); const responseDiv = document.getElementById('llmResponse'); const llmBtn = document.getElementById('llmBtn'); if (!prompt.trim()) { responseDiv.style.display = 'block'; responseDiv.textContent = 'โŒ Please enter a prompt'; return; } if (!transformersReady || !modelsInitialized || !llmModel) { responseDiv.style.display = 'block'; responseDiv.textContent = 'โŒ LLM model not loaded yet. Please initialize models first!'; return; } responseDiv.style.display = 'block'; responseDiv.innerHTML = '
Generating text with LLM...'; llmBtn.disabled = true; try { // Generate text with the LLM const result = await llmModel(prompt, { max_new_tokens: maxTokens, temperature: temperature, do_sample: true, return_full_text: false }); let generatedText = result[0].generated_text; let response = `๐Ÿš€ LLM Generated Text:\n\n"${generatedText}"\n\n`; response += `๐Ÿ“Š Settings: ${maxTokens} tokens, temperature ${temperature}\n`; response += `๐Ÿค– Model: ${loadedModelName ? loadedModelName.split('/')[1] : 'Language Model'}\n`; response += `โฑ๏ธ Generated in real-time by your browser!`; responseDiv.textContent = response; } catch (error) { console.error('LLM error:', error); responseDiv.textContent = `โŒ Error generating text: ${error.message}`; } finally { llmBtn.disabled = false; } } async function chatWithLLMRAG() { const prompt = document.getElementById('llmPrompt').value; const maxTokens = parseInt(document.getElementById('maxTokens').value); const temperature = parseFloat(document.getElementById('temperature').value); const responseDiv = document.getElementById('llmResponse'); const llmRagBtn = document.getElementById('llmRagBtn'); if (!prompt.trim()) { responseDiv.style.display = 'block'; responseDiv.textContent = 'โŒ Please enter a prompt'; return; } if (!transformersReady || !modelsInitialized || !llmModel || !embeddingModel) { responseDiv.style.display = 'block'; responseDiv.textContent = 'โŒ Models not loaded yet. Please initialize all models first!'; return; } responseDiv.style.display = 'block'; responseDiv.innerHTML = '
Finding relevant context and generating with LLM...'; llmRagBtn.disabled = true; try { // Find relevant documents using semantic search const queryEmbedding = await generateEmbedding(prompt); const relevantDocs = []; documents.forEach(doc => { if (doc.embedding) { const similarity = cosineSimilarity(queryEmbedding, doc.embedding); if (similarity > 0.1) { relevantDocs.push({ doc, similarity }); } } }); relevantDocs.sort((a, b) => b.similarity - a.similarity); relevantDocs.splice(3); // Limit to top 3 documents // Create enhanced prompt with context let enhancedPrompt = prompt; if (relevantDocs.length > 0) { const context = relevantDocs.map(item => item.doc.content.substring(0, 300)).join(' '); enhancedPrompt = `Context: ${context}\n\nQuestion: ${prompt}\n\nAnswer:`; } // Generate text with the LLM using enhanced prompt const result = await llmModel(enhancedPrompt, { max_new_tokens: maxTokens, temperature: temperature, do_sample: true, return_full_text: false }); let generatedText = result[0].generated_text; let response = `๐Ÿค– LLM + RAG Generated Response:\n\n"${generatedText}"\n\n`; response += `๐Ÿ“š Context: ${relevantDocs.length} relevant documents used\n`; if (relevantDocs.length > 0) { response += `๐Ÿ” Best match: "${relevantDocs[0].doc.title}" (similarity: ${relevantDocs[0].similarity.toFixed(3)})\n`; } response += `๐Ÿ“Š Settings: ${maxTokens} tokens, temperature ${temperature}\n`; response += `๐Ÿš€ Model: ${loadedModelName ? loadedModelName.split('/')[1] : 'LLM'} enhanced with document retrieval`; responseDiv.textContent = response; } catch (error) { console.error('LLM+RAG error:', error); responseDiv.textContent = `โŒ Error generating response: ${error.message}`; } finally { llmRagBtn.disabled = false; } } async function addDocumentManual() { const title = document.getElementById('docTitle').value || `User Document ${documents.length - 2}`; const content = document.getElementById('docContent').value; const statusDiv = document.getElementById('addStatus'); const previewDiv = document.getElementById('docPreview'); const addBtn = document.getElementById('addBtn'); if (!content.trim()) { statusDiv.style.display = 'block'; statusDiv.textContent = 'โŒ Please enter document content'; previewDiv.style.display = 'none'; return; } statusDiv.style.display = 'block'; statusDiv.innerHTML = '
Adding document...'; addBtn.disabled = true; try { const docId = documents.length; const newDocument = { id: docId, title: title, content: content.trim(), embedding: null }; // Generate embedding if models are initialized if (transformersReady && modelsInitialized && embeddingModel) { statusDiv.innerHTML = '
Generating AI embedding...'; newDocument.embedding = await generateEmbedding(content); } documents.push(newDocument); const preview = content.length > 300 ? content.substring(0, 300) + '...' : content; const status = `โœ… Document added successfully! ๐Ÿ“„ Title: ${title} ๐Ÿ“Š Size: ${content.length.toLocaleString()} characters ๐Ÿ“š Total documents: ${documents.length}${(transformersReady && modelsInitialized) ? '\n๐Ÿง  AI embedding generated automatically' : '\nโš ๏ธ AI embedding will be generated when models are loaded'}`; statusDiv.textContent = status; previewDiv.style.display = 'block'; previewDiv.textContent = `๐Ÿ“– Preview:\n${preview}`; // Clear form document.getElementById('docTitle').value = ''; document.getElementById('docContent').value = ''; updateStatus(); } catch (error) { console.error('Error adding document:', error); statusDiv.textContent = `โŒ Error adding document: ${error.message}`; } finally { addBtn.disabled = false; } } // File upload functionality function initFileUpload() { const uploadArea = document.getElementById('uploadArea'); const fileInput = document.getElementById('fileInput'); if (!uploadArea || !fileInput) return; // Click to select files uploadArea.addEventListener('click', () => { fileInput.click(); }); // Drag and drop functionality uploadArea.addEventListener('dragover', (e) => { e.preventDefault(); uploadArea.classList.add('dragover'); }); uploadArea.addEventListener('dragleave', (e) => { e.preventDefault(); uploadArea.classList.remove('dragover'); }); uploadArea.addEventListener('drop', (e) => { e.preventDefault(); uploadArea.classList.remove('dragover'); const files = e.dataTransfer.files; handleFiles(files); }); // File input change fileInput.addEventListener('change', (e) => { handleFiles(e.target.files); }); } async function handleFiles(files) { const uploadStatus = document.getElementById('uploadStatus'); const uploadProgress = document.getElementById('uploadProgress'); const uploadProgressBar = document.getElementById('uploadProgressBar'); const uploadProgressText = document.getElementById('uploadProgressText'); if (files.length === 0) return; uploadStatus.style.display = 'block'; uploadProgress.style.display = 'block'; uploadStatus.textContent = ''; let successCount = 0; let errorCount = 0; for (let i = 0; i < files.length; i++) { const file = files[i]; const progress = ((i + 1) / files.length) * 100; uploadProgressBar.style.width = progress + '%'; if (file.size > 10000) { uploadProgressText.textContent = `Processing large file: ${file.name} (${i + 1}/${files.length}) - chunking for better search...`; } else { uploadProgressText.textContent = `Processing ${file.name} (${i + 1}/${files.length})...`; } try { await processFile(file); successCount++; } catch (error) { console.error(`Error processing ${file.name}:`, error); errorCount++; } } uploadProgress.style.display = 'none'; let statusText = `โœ… Upload complete!\n๐Ÿ“ ${successCount} files processed successfully`; if (errorCount > 0) { statusText += `\nโŒ ${errorCount} files failed to process`; } statusText += `\n๐Ÿ“Š Total documents: ${documents.length}`; statusText += `\n๐Ÿงฉ Large files automatically chunked for better search`; uploadStatus.textContent = statusText; updateStatus(); // Clear file input document.getElementById('fileInput').value = ''; } // Document chunking function for large files function chunkDocument(content, maxChunkSize = 1000) { const sentences = content.split(/[.!?]+/).filter(s => s.trim().length > 0); const chunks = []; let currentChunk = ''; for (let sentence of sentences) { sentence = sentence.trim(); if (currentChunk.length + sentence.length > maxChunkSize && currentChunk.length > 0) { chunks.push(currentChunk.trim()); currentChunk = sentence; } else { currentChunk += (currentChunk ? '. ' : '') + sentence; } } if (currentChunk.trim()) { chunks.push(currentChunk.trim()); } return chunks.length > 0 ? chunks : [content]; } async function processFile(file) { return new Promise((resolve, reject) => { const reader = new FileReader(); reader.onload = async function(e) { try { const content = e.target.result.trim(); const baseTitle = file.name.replace(/\.[^/.]+$/, ""); // Remove file extension // Check if document is large and needs chunking if (content.length > 2000) { // Chunk large documents const chunks = chunkDocument(content, 1500); console.log(`๐Ÿ“„ Chunking large file: ${chunks.length} chunks created from ${content.length} characters`); for (let i = 0; i < chunks.length; i++) { const chunkTitle = chunks.length > 1 ? `${baseTitle} (Part ${i + 1}/${chunks.length})` : baseTitle; const newDocument = { id: documents.length, title: chunkTitle, content: chunks[i], embedding: null }; // Generate embedding if models are loaded if (transformersReady && modelsInitialized && embeddingModel) { newDocument.embedding = await generateEmbedding(chunks[i]); } documents.push(newDocument); } } else { // Small document - process as single document const newDocument = { id: documents.length, title: baseTitle, content: content, embedding: null }; // Generate embedding if models are loaded if (transformersReady && modelsInitialized && embeddingModel) { newDocument.embedding = await generateEmbedding(content); } documents.push(newDocument); } resolve(); } catch (error) { reject(error); } }; reader.onerror = function() { reject(new Error(`Failed to read file: ${file.name}`)); }; // Read file as text reader.readAsText(file); }); } async function testSystem() { const outputDiv = document.getElementById('testOutput'); const testBtn = document.getElementById('testBtn'); outputDiv.style.display = 'block'; outputDiv.innerHTML = '
Running system tests...'; testBtn.disabled = true; try { let output = `๐Ÿงช System Test Results:\n\n`; output += `๐Ÿ“Š Documents: ${documents.length} loaded\n`; output += `๐Ÿ”ง Transformers.js: ${transformersReady ? 'โœ… Ready' : 'โŒ Not ready'}\n`; output += `๐Ÿง  Embedding Model: ${embeddingModel ? 'โœ… Loaded' : 'โŒ Not loaded'}\n`; output += `๐Ÿค– QA Model: ${qaModel ? 'โœ… Loaded' : 'โŒ Not loaded'}\n`; output += `๐Ÿš€ LLM Model: ${llmModel ? 'โœ… Loaded' : 'โŒ Not loaded'}\n\n`; if (transformersReady && modelsInitialized && embeddingModel) { output += `๐Ÿ” Testing embedding generation...\n`; const testEmbedding = await generateEmbedding("test sentence"); output += `โœ… Embedding test: Generated ${testEmbedding.length}D vector\n\n`; output += `๐Ÿ” Testing semantic search...\n`; const testQuery = "artificial intelligence"; const queryEmbedding = await generateEmbedding(testQuery); let testResults = []; documents.forEach(doc => { if (doc.embedding) { const similarity = cosineSimilarity(queryEmbedding, doc.embedding); testResults.push({ doc, similarity }); } }); testResults.sort((a, b) => b.similarity - a.similarity); if (testResults.length > 0) { output += `โœ… Search test: Found ${testResults.length} results\n`; output += `๐Ÿ“„ Top result: "${testResults[0].doc.title}" (similarity: ${testResults[0].similarity.toFixed(3)})\n\n`; } if (qaModel) { output += `๐Ÿค– Testing QA model...\n`; const context = documents[0].content.substring(0, 500); const testQuestion = "What is artificial intelligence?"; const qaResult = await qaModel(testQuestion, context); output += `โœ… QA test: Generated answer with ${(qaResult.score * 100).toFixed(1)}% confidence\n`; output += `๐Ÿ’ฌ Answer: ${qaResult.answer.substring(0, 100)}...\n\n`; } if (llmModel) { output += `๐Ÿš€ Testing LLM model...\n`; const testPrompt = "Explain artificial intelligence:"; const llmResult = await llmModel(testPrompt, { max_new_tokens: 30, temperature: 0.7, do_sample: true, return_full_text: false }); output += `โœ… LLM test: Generated text completion\n`; output += `๐Ÿ’ฌ Generated: "${llmResult[0].generated_text.substring(0, 100)}..."\n\n`; } output += `๐ŸŽ‰ All tests passed! System is fully operational.`; } else { output += `โš ๏ธ Models not initialized. Click "Initialize AI Models" first.`; } outputDiv.textContent = output; } catch (error) { console.error('Test error:', error); outputDiv.textContent = `โŒ Test failed: ${error.message}`; } finally { testBtn.disabled = false; } } // Initialize UI updateStatus(); // Show version info in console console.log('๐Ÿค– AI-Powered RAG System with Transformers.js'); console.log('Models: Xenova/all-MiniLM-L6-v2, Xenova/distilbert-base-cased-distilled-squad'); // Export functions for global access window.showTab = showTab; window.updateSliderValue = updateSliderValue; window.initializeModels = initializeModels; window.searchDocumentsSemantic = searchDocumentsSemantic; window.searchDocumentsKeyword = searchDocumentsKeyword; window.chatWithRAG = chatWithRAG; window.chatWithLLM = chatWithLLM; window.chatWithLLMRAG = chatWithLLMRAG; window.addDocumentManual = addDocumentManual; window.testSystem = testSystem;