Spaces:
Running
Running
| // Import transformers.js 3.0.0 from CDN (new Hugging Face ownership) | |
| import { pipeline, env } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.0'; | |
| // Make available globally | |
| window.transformers = { pipeline, env }; | |
| window.transformersLoaded = true; | |
| console.log('✅ Transformers.js 3.0.0 loaded via ES modules (Hugging Face)'); | |
| // Global variables for transformers.js | |
| let transformersPipeline = null; | |
| let transformersEnv = null; | |
| let transformersReady = false; | |
| // Document storage and AI state | |
| let documents = [ | |
| { | |
| id: 0, | |
| title: "Artificial Intelligence Overview", | |
| content: "Artificial Intelligence (AI) is a branch of computer science that aims to create intelligent machines that work and react like humans. Some activities computers with AI are designed for include speech recognition, learning, planning, and problem-solving. AI is used in healthcare, finance, transportation, and entertainment. Machine learning enables computers to learn from experience without explicit programming. Deep learning uses neural networks to understand complex patterns in data.", | |
| embedding: null | |
| }, | |
| { | |
| id: 1, | |
| title: "Space Exploration", | |
| content: "Space exploration is the ongoing discovery and exploration of celestial structures in outer space through evolving space technology. Physical exploration is conducted by unmanned robotic probes and human spaceflight. Space exploration has been used for geopolitical rivalries like the Cold War. The early era was driven by a Space Race between the Soviet Union and United States. Modern exploration includes Mars missions, the International Space Station, and satellite programs.", | |
| embedding: null | |
| }, | |
| { | |
| id: 2, | |
| title: "Renewable Energy", | |
| content: "Renewable energy comes from naturally replenished resources on a human timescale. It includes sunlight, wind, rain, tides, waves, and geothermal heat. Renewable energy contrasts with fossil fuels that are used faster than replenished. Most renewable sources are sustainable. Solar energy is abundant and promising. Wind energy and hydroelectric power are major contributors to renewable generation worldwide.", | |
| embedding: null | |
| } | |
| ]; | |
| let embeddingModel = null; | |
| let qaModel = null; | |
| let llmModel = null; | |
| let loadedModelName = ''; | |
| let modelsInitialized = false; | |
| // Calculate cosine similarity between two vectors | |
| function cosineSimilarity(a, b) { | |
| const dotProduct = a.reduce((sum, val, i) => sum + val * b[i], 0); | |
| const magnitudeA = Math.sqrt(a.reduce((sum, val) => sum + val * val, 0)); | |
| const magnitudeB = Math.sqrt(b.reduce((sum, val) => sum + val * val, 0)); | |
| if (magnitudeA === 0 || magnitudeB === 0) return 0; | |
| return dotProduct / (magnitudeA * magnitudeB); | |
| } | |
| // Initialize transformers.js when the script loads | |
| async function initTransformers() { | |
| try { | |
| console.log('🔄 Initializing Transformers.js...'); | |
| // Try ES modules first (preferred method) | |
| if (window.transformers && window.transformersLoaded) { | |
| console.log('✅ Using ES modules version (Transformers.js 3.0.0)'); | |
| ({ pipeline: transformersPipeline, env: transformersEnv } = window.transformers); | |
| } | |
| // Fallback to UMD version | |
| else if (window.Transformers) { | |
| console.log('✅ Using UMD version (Transformers.js 3.0.0)'); | |
| ({ pipeline: transformersPipeline, env: transformersEnv } = window.Transformers); | |
| } | |
| // Wait for library to load | |
| else { | |
| console.log('⏳ Waiting for library to load...'); | |
| let attempts = 0; | |
| while (!window.Transformers && !window.transformersLoaded && attempts < 50) { | |
| await new Promise(resolve => setTimeout(resolve, 200)); | |
| attempts++; | |
| } | |
| if (window.transformers && window.transformersLoaded) { | |
| ({ pipeline: transformersPipeline, env: transformersEnv } = window.transformers); | |
| } else if (window.Transformers) { | |
| ({ pipeline: transformersPipeline, env: transformersEnv } = window.Transformers); | |
| } else { | |
| throw new Error('Failed to load Transformers.js library'); | |
| } | |
| } | |
| // Configure transformers.js with minimal settings | |
| if (transformersEnv) { | |
| transformersEnv.allowLocalModels = false; | |
| transformersEnv.allowRemoteModels = true; | |
| // Let Transformers.js use default WASM paths for better compatibility | |
| } | |
| transformersReady = true; | |
| console.log('✅ Transformers.js initialized successfully'); | |
| // Update UI to show ready state | |
| updateStatus(); | |
| // Update status indicator | |
| const statusSpan = document.getElementById('transformersStatus'); | |
| if (statusSpan) { | |
| statusSpan.textContent = '✅ Ready!'; | |
| statusSpan.style.color = 'green'; | |
| } | |
| } catch (error) { | |
| console.error('❌ Error initializing Transformers.js:', error); | |
| // Show error in UI | |
| const statusDiv = document.getElementById('status'); | |
| if (statusDiv) { | |
| statusDiv.textContent = `❌ Failed to load Transformers.js: ${error.message}`; | |
| statusDiv.style.color = 'red'; | |
| } | |
| // Update status indicator | |
| const statusSpan = document.getElementById('transformersStatus'); | |
| if (statusSpan) { | |
| statusSpan.textContent = `❌ Failed: ${error.message}`; | |
| statusSpan.style.color = 'red'; | |
| } | |
| } | |
| } | |
| // Initialize when page loads | |
| document.addEventListener('DOMContentLoaded', function() { | |
| initTransformers(); | |
| initFileUpload(); | |
| }); | |
| // UI Functions | |
| function showTab(tabName) { | |
| // Hide all tabs | |
| document.querySelectorAll('.tab-content').forEach(tab => { | |
| tab.classList.remove('active'); | |
| }); | |
| document.querySelectorAll('.tab').forEach(button => { | |
| button.classList.remove('active'); | |
| }); | |
| // Show selected tab | |
| document.getElementById(tabName).classList.add('active'); | |
| event.target.classList.add('active'); | |
| } | |
| function updateSliderValue(sliderId) { | |
| const slider = document.getElementById(sliderId); | |
| const valueSpan = document.getElementById(sliderId + 'Value'); | |
| valueSpan.textContent = slider.value; | |
| } | |
| function updateStatus() { | |
| const status = document.getElementById('status'); | |
| const transformersStatus = transformersReady ? 'Ready' : 'Not ready'; | |
| const embeddingStatus = embeddingModel ? 'Loaded' : 'Not loaded'; | |
| const qaStatus = qaModel ? 'Loaded' : 'Not loaded'; | |
| const llmStatus = llmModel ? 'Loaded' : 'Not loaded'; | |
| status.textContent = `📊 Documents: ${documents.length} | 🔧 Transformers.js: ${transformersStatus} | 🤖 QA: ${qaStatus} | 🧠 Embedding: ${embeddingStatus} | 🚀 LLM: ${llmStatus}`; | |
| } | |
| function updateProgress(percent, text) { | |
| const progressBar = document.getElementById('progressBar'); | |
| const progressText = document.getElementById('progressText'); | |
| progressBar.style.width = percent + '%'; | |
| progressText.textContent = text; | |
| } | |
| // AI Functions | |
| async function initializeModels() { | |
| const statusDiv = document.getElementById('initStatus'); | |
| const progressDiv = document.getElementById('initProgress'); | |
| const initBtn = document.getElementById('initBtn'); | |
| statusDiv.style.display = 'block'; | |
| progressDiv.style.display = 'block'; | |
| initBtn.disabled = true; | |
| try { | |
| // Check if transformers.js is ready | |
| if (!transformersReady || !transformersPipeline) { | |
| updateProgress(5, "Waiting for Transformers.js to initialize..."); | |
| statusDiv.innerHTML = '🔄 Initializing Transformers.js library...'; | |
| // Wait for transformers.js to be ready | |
| let attempts = 0; | |
| while (!transformersReady && attempts < 30) { | |
| await new Promise(resolve => setTimeout(resolve, 1000)); | |
| attempts++; | |
| } | |
| if (!transformersReady) { | |
| throw new Error('Transformers.js failed to initialize. Please refresh the page.'); | |
| } | |
| } | |
| updateProgress(10, "Loading embedding model..."); | |
| statusDiv.innerHTML = '🔄 Loading embedding model (Xenova/all-MiniLM-L6-v2)...'; | |
| // Load embedding model with progress tracking | |
| embeddingModel = await transformersPipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2', { | |
| progress_callback: (progress) => { | |
| if (progress.status === 'downloading') { | |
| const percent = progress.loaded && progress.total ? | |
| Math.round((progress.loaded / progress.total) * 100) : 0; | |
| statusDiv.innerHTML = `🔄 Downloading embedding model: ${percent}%`; | |
| } | |
| } | |
| }); | |
| updateProgress(40, "Loading question-answering model..."); | |
| statusDiv.innerHTML = '🔄 Loading QA model (Xenova/distilbert-base-cased-distilled-squad)...'; | |
| // Load QA model with progress tracking | |
| qaModel = await transformersPipeline('question-answering', 'Xenova/distilbert-base-cased-distilled-squad', { | |
| progress_callback: (progress) => { | |
| if (progress.status === 'downloading') { | |
| const percent = progress.loaded && progress.total ? | |
| Math.round((progress.loaded / progress.total) * 100) : 0; | |
| statusDiv.innerHTML = `🔄 Downloading QA model: ${percent}%`; | |
| } | |
| } | |
| }); | |
| updateProgress(70, "Loading language model..."); | |
| statusDiv.innerHTML = '🔄 Loading LLM (trying SmolLM models)...'; | |
| // Load LLM model - Stable Transformers.js 3.0.0 configuration | |
| const modelsToTry = [ | |
| { | |
| name: 'Xenova/gpt2', | |
| options: {} | |
| }, | |
| { | |
| name: 'Xenova/distilgpt2', | |
| options: {} | |
| } | |
| ]; | |
| let modelLoaded = false; | |
| for (const model of modelsToTry) { | |
| try { | |
| console.log(`Trying to load ${model.name}...`); | |
| statusDiv.innerHTML = `🔄 Loading LLM (${model.name})...`; | |
| // Load LLM with progress tracking | |
| llmModel = await transformersPipeline('text-generation', model.name, { | |
| progress_callback: (progress) => { | |
| if (progress.status === 'downloading') { | |
| const percent = progress.loaded && progress.total ? | |
| Math.round((progress.loaded / progress.total) * 100) : 0; | |
| statusDiv.innerHTML = `🔄 Downloading ${model.name}: ${percent}%`; | |
| } | |
| } | |
| }); | |
| console.log(`✅ Successfully loaded ${model.name}`); | |
| loadedModelName = model.name; | |
| modelLoaded = true; | |
| break; | |
| } catch (error) { | |
| console.warn(`${model.name} failed:`, error); | |
| } | |
| } | |
| if (!modelLoaded) { | |
| throw new Error('Failed to load any LLM model'); | |
| } | |
| updateProgress(85, "Generating embeddings for documents..."); | |
| statusDiv.innerHTML = '🔄 Generating embeddings for existing documents...'; | |
| // Generate embeddings for all existing documents | |
| for (let i = 0; i < documents.length; i++) { | |
| const doc = documents[i]; | |
| updateProgress(85 + (i / documents.length) * 10, `Processing document ${i + 1}/${documents.length}...`); | |
| doc.embedding = await generateEmbedding(doc.content); | |
| } | |
| updateProgress(100, "Initialization complete!"); | |
| modelsInitialized = true; | |
| statusDiv.innerHTML = `✅ AI Models initialized successfully! | |
| 🧠 Embedding Model: Xenova/all-MiniLM-L6-v2 (384 dimensions) | |
| 🤖 QA Model: Xenova/distilbert-base-cased-distilled-squad | |
| 🚀 LLM Model: ${loadedModelName} (Language model for text generation) | |
| 📚 Documents processed: ${documents.length} | |
| 🔮 Ready for semantic search, Q&A, and LLM chat! | |
| 📊 Model Info: | |
| • Embedding model size: ~23MB | |
| • QA model size: ~28MB | |
| • LLM model size: ~15-50MB (depending on model loaded) | |
| • Total memory usage: ~70-100MB | |
| • Inference speed: ~2-8 seconds per operation`; | |
| updateStatus(); | |
| } catch (error) { | |
| console.error('Error initializing models:', error); | |
| statusDiv.innerHTML = `❌ Error initializing models: ${error.message} | |
| Please check your internet connection and try again.`; | |
| updateProgress(0, "Initialization failed"); | |
| } finally { | |
| initBtn.disabled = false; | |
| setTimeout(() => { | |
| progressDiv.style.display = 'none'; | |
| }, 2000); | |
| } | |
| } | |
| async function generateEmbedding(text) { | |
| if (!transformersReady || !transformersPipeline) { | |
| throw new Error('Transformers.js not initialized'); | |
| } | |
| if (!embeddingModel) { | |
| throw new Error('Embedding model not loaded'); | |
| } | |
| try { | |
| const output = await embeddingModel(text, { pooling: 'mean', normalize: true }); | |
| return Array.from(output.data); | |
| } catch (error) { | |
| console.error('Error generating embedding:', error); | |
| throw error; | |
| } | |
| } | |
| async function searchDocumentsSemantic() { | |
| const query = document.getElementById('searchQuery').value; | |
| const maxResults = parseInt(document.getElementById('maxResults').value); | |
| const resultsDiv = document.getElementById('searchResults'); | |
| const searchBtn = document.getElementById('searchBtn'); | |
| if (!query.trim()) { | |
| resultsDiv.style.display = 'block'; | |
| resultsDiv.textContent = '❌ Please enter a search query'; | |
| return; | |
| } | |
| if (!transformersReady || !modelsInitialized || !embeddingModel) { | |
| resultsDiv.style.display = 'block'; | |
| resultsDiv.textContent = '❌ Please initialize AI models first!'; | |
| return; | |
| } | |
| resultsDiv.style.display = 'block'; | |
| resultsDiv.innerHTML = '<div class="loading"></div> Generating query embedding and searching...'; | |
| searchBtn.disabled = true; | |
| try { | |
| // Generate embedding for query | |
| const queryEmbedding = await generateEmbedding(query); | |
| // Calculate similarities | |
| const results = []; | |
| documents.forEach(doc => { | |
| if (doc.embedding) { | |
| const similarity = cosineSimilarity(queryEmbedding, doc.embedding); | |
| results.push({ doc, similarity }); | |
| } | |
| }); | |
| // Sort by similarity | |
| results.sort((a, b) => b.similarity - a.similarity); | |
| if (results.length === 0) { | |
| resultsDiv.textContent = `❌ No documents with embeddings found for '${query}'`; | |
| return; | |
| } | |
| let output = `🔍 Semantic search results for '${query}':\n\n`; | |
| results.slice(0, maxResults).forEach((result, i) => { | |
| const doc = result.doc; | |
| const similarity = result.similarity; | |
| const excerpt = doc.content.length > 200 ? doc.content.substring(0, 200) + '...' : doc.content; | |
| output += `**Result ${i + 1}** (similarity: ${similarity.toFixed(3)})\n📄 Title: ${doc.title}\n📝 Content: ${excerpt}\n\n`; | |
| }); | |
| resultsDiv.textContent = output; | |
| } catch (error) { | |
| console.error('Search error:', error); | |
| resultsDiv.textContent = `❌ Error during search: ${error.message}`; | |
| } finally { | |
| searchBtn.disabled = false; | |
| } | |
| } | |
| function searchDocumentsKeyword() { | |
| const query = document.getElementById('searchQuery').value; | |
| const maxResults = parseInt(document.getElementById('maxResults').value); | |
| const resultsDiv = document.getElementById('searchResults'); | |
| if (!query.trim()) { | |
| resultsDiv.style.display = 'block'; | |
| resultsDiv.textContent = '❌ Please enter a search query'; | |
| return; | |
| } | |
| resultsDiv.style.display = 'block'; | |
| resultsDiv.innerHTML = '<div class="loading"></div> Searching keywords...'; | |
| setTimeout(() => { | |
| const results = []; | |
| const queryWords = query.toLowerCase().split(/\s+/); | |
| documents.forEach(doc => { | |
| const contentLower = doc.content.toLowerCase(); | |
| const titleLower = doc.title.toLowerCase(); | |
| let matches = 0; | |
| queryWords.forEach(word => { | |
| matches += (contentLower.match(new RegExp(word, 'g')) || []).length; | |
| matches += (titleLower.match(new RegExp(word, 'g')) || []).length * 2; | |
| }); | |
| if (matches > 0) { | |
| results.push({ doc, score: matches }); | |
| } | |
| }); | |
| results.sort((a, b) => b.score - a.score); | |
| if (results.length === 0) { | |
| resultsDiv.textContent = `❌ No documents found containing '${query}'`; | |
| return; | |
| } | |
| let output = `🔍 Keyword search results for '${query}':\n\n`; | |
| results.slice(0, maxResults).forEach((result, i) => { | |
| const doc = result.doc; | |
| const excerpt = doc.content.length > 200 ? doc.content.substring(0, 200) + '...' : doc.content; | |
| output += `**Result ${i + 1}**\n📄 Title: ${doc.title}\n📝 Content: ${excerpt}\n\n`; | |
| }); | |
| resultsDiv.textContent = output; | |
| }, 500); | |
| } | |
| async function chatWithRAG() { | |
| const question = document.getElementById('chatQuestion').value; | |
| const maxContext = parseInt(document.getElementById('maxContext').value); | |
| const responseDiv = document.getElementById('chatResponse'); | |
| const chatBtn = document.getElementById('chatBtn'); | |
| if (!question.trim()) { | |
| responseDiv.style.display = 'block'; | |
| responseDiv.textContent = '❌ Please enter a question'; | |
| return; | |
| } | |
| if (!transformersReady || !modelsInitialized || !embeddingModel || !qaModel) { | |
| responseDiv.style.display = 'block'; | |
| responseDiv.textContent = '❌ AI models not loaded yet. Please initialize them first!'; | |
| return; | |
| } | |
| responseDiv.style.display = 'block'; | |
| responseDiv.innerHTML = '<div class="loading"></div> Generating answer with real AI...'; | |
| chatBtn.disabled = true; | |
| try { | |
| // Generate embedding for the question | |
| const questionEmbedding = await generateEmbedding(question); | |
| // Find relevant documents using semantic similarity | |
| const relevantDocs = []; | |
| documents.forEach(doc => { | |
| if (doc.embedding) { | |
| const similarity = cosineSimilarity(questionEmbedding, doc.embedding); | |
| if (similarity > 0.1) { | |
| relevantDocs.push({ doc, similarity }); | |
| } | |
| } | |
| }); | |
| relevantDocs.sort((a, b) => b.similarity - a.similarity); | |
| relevantDocs.splice(maxContext); | |
| if (relevantDocs.length === 0) { | |
| responseDiv.textContent = '❌ No relevant context found in the documents for your question.'; | |
| return; | |
| } | |
| // Combine context from top documents | |
| const context = relevantDocs.map(item => item.doc.content).join(' ').substring(0, 2000); | |
| // Use the QA model to generate an answer | |
| const qaResult = await qaModel(question, context); | |
| let response = `🤖 AI Answer:\n${qaResult.answer}\n\n`; | |
| response += `📊 Confidence: ${(qaResult.score * 100).toFixed(1)}%\n\n`; | |
| response += `📚 Sources: ${relevantDocs.length} documents\n`; | |
| response += `🔍 Best match: "${relevantDocs[0].doc.title}" (similarity: ${relevantDocs[0].similarity.toFixed(3)})\n\n`; | |
| response += `📝 Context used:\n${context.substring(0, 300)}...`; | |
| responseDiv.textContent = response; | |
| } catch (error) { | |
| console.error('Chat error:', error); | |
| responseDiv.textContent = `❌ Error generating response: ${error.message}`; | |
| } finally { | |
| chatBtn.disabled = false; | |
| } | |
| } | |
| async function chatWithLLM() { | |
| const prompt = document.getElementById('llmPrompt').value; | |
| const maxTokens = parseInt(document.getElementById('maxTokens').value); | |
| const temperature = parseFloat(document.getElementById('temperature').value); | |
| const responseDiv = document.getElementById('llmResponse'); | |
| const llmBtn = document.getElementById('llmBtn'); | |
| if (!prompt.trim()) { | |
| responseDiv.style.display = 'block'; | |
| responseDiv.textContent = '❌ Please enter a prompt'; | |
| return; | |
| } | |
| if (!transformersReady || !modelsInitialized || !llmModel) { | |
| responseDiv.style.display = 'block'; | |
| responseDiv.textContent = '❌ LLM model not loaded yet. Please initialize models first!'; | |
| return; | |
| } | |
| responseDiv.style.display = 'block'; | |
| responseDiv.innerHTML = '<div class="loading"></div> Generating text with LLM...'; | |
| llmBtn.disabled = true; | |
| try { | |
| // Generate text with the LLM | |
| const result = await llmModel(prompt, { | |
| max_new_tokens: maxTokens, | |
| temperature: temperature, | |
| do_sample: true, | |
| return_full_text: false | |
| }); | |
| let generatedText = result[0].generated_text; | |
| let response = `🚀 LLM Generated Text:\n\n"${generatedText}"\n\n`; | |
| response += `📊 Settings: ${maxTokens} tokens, temperature ${temperature}\n`; | |
| response += `🤖 Model: ${loadedModelName ? loadedModelName.split('/')[1] : 'Language Model'}\n`; | |
| response += `⏱️ Generated in real-time by your browser!`; | |
| responseDiv.textContent = response; | |
| } catch (error) { | |
| console.error('LLM error:', error); | |
| responseDiv.textContent = `❌ Error generating text: ${error.message}`; | |
| } finally { | |
| llmBtn.disabled = false; | |
| } | |
| } | |
| async function chatWithLLMRAG() { | |
| const prompt = document.getElementById('llmPrompt').value; | |
| const maxTokens = parseInt(document.getElementById('maxTokens').value); | |
| const temperature = parseFloat(document.getElementById('temperature').value); | |
| const responseDiv = document.getElementById('llmResponse'); | |
| const llmRagBtn = document.getElementById('llmRagBtn'); | |
| if (!prompt.trim()) { | |
| responseDiv.style.display = 'block'; | |
| responseDiv.textContent = '❌ Please enter a prompt'; | |
| return; | |
| } | |
| if (!transformersReady || !modelsInitialized || !llmModel || !embeddingModel) { | |
| responseDiv.style.display = 'block'; | |
| responseDiv.textContent = '❌ Models not loaded yet. Please initialize all models first!'; | |
| return; | |
| } | |
| responseDiv.style.display = 'block'; | |
| responseDiv.innerHTML = '<div class="loading"></div> Finding relevant context and generating with LLM...'; | |
| llmRagBtn.disabled = true; | |
| try { | |
| // Find relevant documents using semantic search | |
| const queryEmbedding = await generateEmbedding(prompt); | |
| const relevantDocs = []; | |
| documents.forEach(doc => { | |
| if (doc.embedding) { | |
| const similarity = cosineSimilarity(queryEmbedding, doc.embedding); | |
| if (similarity > 0.1) { | |
| relevantDocs.push({ doc, similarity }); | |
| } | |
| } | |
| }); | |
| relevantDocs.sort((a, b) => b.similarity - a.similarity); | |
| relevantDocs.splice(3); // Limit to top 3 documents | |
| // Create enhanced prompt with context | |
| let enhancedPrompt = prompt; | |
| if (relevantDocs.length > 0) { | |
| const context = relevantDocs.map(item => item.doc.content.substring(0, 300)).join(' '); | |
| enhancedPrompt = `Context: ${context}\n\nQuestion: ${prompt}\n\nAnswer:`; | |
| } | |
| // Generate text with the LLM using enhanced prompt | |
| const result = await llmModel(enhancedPrompt, { | |
| max_new_tokens: maxTokens, | |
| temperature: temperature, | |
| do_sample: true, | |
| return_full_text: false | |
| }); | |
| let generatedText = result[0].generated_text; | |
| let response = `🤖 LLM + RAG Generated Response:\n\n"${generatedText}"\n\n`; | |
| response += `📚 Context: ${relevantDocs.length} relevant documents used\n`; | |
| if (relevantDocs.length > 0) { | |
| response += `🔍 Best match: "${relevantDocs[0].doc.title}" (similarity: ${relevantDocs[0].similarity.toFixed(3)})\n`; | |
| } | |
| response += `📊 Settings: ${maxTokens} tokens, temperature ${temperature}\n`; | |
| response += `🚀 Model: ${loadedModelName ? loadedModelName.split('/')[1] : 'LLM'} enhanced with document retrieval`; | |
| responseDiv.textContent = response; | |
| } catch (error) { | |
| console.error('LLM+RAG error:', error); | |
| responseDiv.textContent = `❌ Error generating response: ${error.message}`; | |
| } finally { | |
| llmRagBtn.disabled = false; | |
| } | |
| } | |
| async function addDocumentManual() { | |
| const title = document.getElementById('docTitle').value || `User Document ${documents.length - 2}`; | |
| const content = document.getElementById('docContent').value; | |
| const statusDiv = document.getElementById('addStatus'); | |
| const previewDiv = document.getElementById('docPreview'); | |
| const addBtn = document.getElementById('addBtn'); | |
| if (!content.trim()) { | |
| statusDiv.style.display = 'block'; | |
| statusDiv.textContent = '❌ Please enter document content'; | |
| previewDiv.style.display = 'none'; | |
| return; | |
| } | |
| statusDiv.style.display = 'block'; | |
| statusDiv.innerHTML = '<div class="loading"></div> Adding document...'; | |
| addBtn.disabled = true; | |
| try { | |
| const docId = documents.length; | |
| const newDocument = { | |
| id: docId, | |
| title: title, | |
| content: content.trim(), | |
| embedding: null | |
| }; | |
| // Generate embedding if models are initialized | |
| if (transformersReady && modelsInitialized && embeddingModel) { | |
| statusDiv.innerHTML = '<div class="loading"></div> Generating AI embedding...'; | |
| newDocument.embedding = await generateEmbedding(content); | |
| } | |
| documents.push(newDocument); | |
| const preview = content.length > 300 ? content.substring(0, 300) + '...' : content; | |
| const status = `✅ Document added successfully! | |
| 📄 Title: ${title} | |
| 📊 Size: ${content.length.toLocaleString()} characters | |
| 📚 Total documents: ${documents.length}${(transformersReady && modelsInitialized) ? '\n🧠 AI embedding generated automatically' : '\n⚠️ AI embedding will be generated when models are loaded'}`; | |
| statusDiv.textContent = status; | |
| previewDiv.style.display = 'block'; | |
| previewDiv.textContent = `📖 Preview:\n${preview}`; | |
| // Clear form | |
| document.getElementById('docTitle').value = ''; | |
| document.getElementById('docContent').value = ''; | |
| updateStatus(); | |
| } catch (error) { | |
| console.error('Error adding document:', error); | |
| statusDiv.textContent = `❌ Error adding document: ${error.message}`; | |
| } finally { | |
| addBtn.disabled = false; | |
| } | |
| } | |
| // File upload functionality | |
| function initFileUpload() { | |
| const uploadArea = document.getElementById('uploadArea'); | |
| const fileInput = document.getElementById('fileInput'); | |
| if (!uploadArea || !fileInput) return; | |
| // Click to select files | |
| uploadArea.addEventListener('click', () => { | |
| fileInput.click(); | |
| }); | |
| // Drag and drop functionality | |
| uploadArea.addEventListener('dragover', (e) => { | |
| e.preventDefault(); | |
| uploadArea.classList.add('dragover'); | |
| }); | |
| uploadArea.addEventListener('dragleave', (e) => { | |
| e.preventDefault(); | |
| uploadArea.classList.remove('dragover'); | |
| }); | |
| uploadArea.addEventListener('drop', (e) => { | |
| e.preventDefault(); | |
| uploadArea.classList.remove('dragover'); | |
| const files = e.dataTransfer.files; | |
| handleFiles(files); | |
| }); | |
| // File input change | |
| fileInput.addEventListener('change', (e) => { | |
| handleFiles(e.target.files); | |
| }); | |
| } | |
| async function handleFiles(files) { | |
| const uploadStatus = document.getElementById('uploadStatus'); | |
| const uploadProgress = document.getElementById('uploadProgress'); | |
| const uploadProgressBar = document.getElementById('uploadProgressBar'); | |
| const uploadProgressText = document.getElementById('uploadProgressText'); | |
| if (files.length === 0) return; | |
| uploadStatus.style.display = 'block'; | |
| uploadProgress.style.display = 'block'; | |
| uploadStatus.textContent = ''; | |
| let successCount = 0; | |
| let errorCount = 0; | |
| for (let i = 0; i < files.length; i++) { | |
| const file = files[i]; | |
| const progress = ((i + 1) / files.length) * 100; | |
| uploadProgressBar.style.width = progress + '%'; | |
| if (file.size > 10000) { | |
| uploadProgressText.textContent = `Processing large file: ${file.name} (${i + 1}/${files.length}) - chunking for better search...`; | |
| } else { | |
| uploadProgressText.textContent = `Processing ${file.name} (${i + 1}/${files.length})...`; | |
| } | |
| try { | |
| await processFile(file); | |
| successCount++; | |
| } catch (error) { | |
| console.error(`Error processing ${file.name}:`, error); | |
| errorCount++; | |
| } | |
| } | |
| uploadProgress.style.display = 'none'; | |
| let statusText = `✅ Upload complete!\n📁 ${successCount} files processed successfully`; | |
| if (errorCount > 0) { | |
| statusText += `\n❌ ${errorCount} files failed to process`; | |
| } | |
| statusText += `\n📊 Total documents: ${documents.length}`; | |
| statusText += `\n🧩 Large files automatically chunked for better search`; | |
| uploadStatus.textContent = statusText; | |
| updateStatus(); | |
| // Clear file input | |
| document.getElementById('fileInput').value = ''; | |
| } | |
| // Document chunking function for large files | |
| function chunkDocument(content, maxChunkSize = 1000) { | |
| const sentences = content.split(/[.!?]+/).filter(s => s.trim().length > 0); | |
| const chunks = []; | |
| let currentChunk = ''; | |
| for (let sentence of sentences) { | |
| sentence = sentence.trim(); | |
| if (currentChunk.length + sentence.length > maxChunkSize && currentChunk.length > 0) { | |
| chunks.push(currentChunk.trim()); | |
| currentChunk = sentence; | |
| } else { | |
| currentChunk += (currentChunk ? '. ' : '') + sentence; | |
| } | |
| } | |
| if (currentChunk.trim()) { | |
| chunks.push(currentChunk.trim()); | |
| } | |
| return chunks.length > 0 ? chunks : [content]; | |
| } | |
| async function processFile(file) { | |
| return new Promise((resolve, reject) => { | |
| const reader = new FileReader(); | |
| reader.onload = async function(e) { | |
| try { | |
| const content = e.target.result.trim(); | |
| const baseTitle = file.name.replace(/\.[^/.]+$/, ""); // Remove file extension | |
| // Check if document is large and needs chunking | |
| if (content.length > 2000) { | |
| // Chunk large documents | |
| const chunks = chunkDocument(content, 1500); | |
| console.log(`📄 Chunking large file: ${chunks.length} chunks created from ${content.length} characters`); | |
| for (let i = 0; i < chunks.length; i++) { | |
| const chunkTitle = chunks.length > 1 ? `${baseTitle} (Part ${i + 1}/${chunks.length})` : baseTitle; | |
| const newDocument = { | |
| id: documents.length, | |
| title: chunkTitle, | |
| content: chunks[i], | |
| embedding: null | |
| }; | |
| // Generate embedding if models are loaded | |
| if (transformersReady && modelsInitialized && embeddingModel) { | |
| newDocument.embedding = await generateEmbedding(chunks[i]); | |
| } | |
| documents.push(newDocument); | |
| } | |
| } else { | |
| // Small document - process as single document | |
| const newDocument = { | |
| id: documents.length, | |
| title: baseTitle, | |
| content: content, | |
| embedding: null | |
| }; | |
| // Generate embedding if models are loaded | |
| if (transformersReady && modelsInitialized && embeddingModel) { | |
| newDocument.embedding = await generateEmbedding(content); | |
| } | |
| documents.push(newDocument); | |
| } | |
| resolve(); | |
| } catch (error) { | |
| reject(error); | |
| } | |
| }; | |
| reader.onerror = function() { | |
| reject(new Error(`Failed to read file: ${file.name}`)); | |
| }; | |
| // Read file as text | |
| reader.readAsText(file); | |
| }); | |
| } | |
| async function testSystem() { | |
| const outputDiv = document.getElementById('testOutput'); | |
| const testBtn = document.getElementById('testBtn'); | |
| outputDiv.style.display = 'block'; | |
| outputDiv.innerHTML = '<div class="loading"></div> Running system tests...'; | |
| testBtn.disabled = true; | |
| try { | |
| let output = `🧪 System Test Results:\n\n`; | |
| output += `📊 Documents: ${documents.length} loaded\n`; | |
| output += `🔧 Transformers.js: ${transformersReady ? '✅ Ready' : '❌ Not ready'}\n`; | |
| output += `🧠 Embedding Model: ${embeddingModel ? '✅ Loaded' : '❌ Not loaded'}\n`; | |
| output += `🤖 QA Model: ${qaModel ? '✅ Loaded' : '❌ Not loaded'}\n`; | |
| output += `🚀 LLM Model: ${llmModel ? '✅ Loaded' : '❌ Not loaded'}\n\n`; | |
| if (transformersReady && modelsInitialized && embeddingModel) { | |
| output += `🔍 Testing embedding generation...\n`; | |
| const testEmbedding = await generateEmbedding("test sentence"); | |
| output += `✅ Embedding test: Generated ${testEmbedding.length}D vector\n\n`; | |
| output += `🔍 Testing semantic search...\n`; | |
| const testQuery = "artificial intelligence"; | |
| const queryEmbedding = await generateEmbedding(testQuery); | |
| let testResults = []; | |
| documents.forEach(doc => { | |
| if (doc.embedding) { | |
| const similarity = cosineSimilarity(queryEmbedding, doc.embedding); | |
| testResults.push({ doc, similarity }); | |
| } | |
| }); | |
| testResults.sort((a, b) => b.similarity - a.similarity); | |
| if (testResults.length > 0) { | |
| output += `✅ Search test: Found ${testResults.length} results\n`; | |
| output += `📄 Top result: "${testResults[0].doc.title}" (similarity: ${testResults[0].similarity.toFixed(3)})\n\n`; | |
| } | |
| if (qaModel) { | |
| output += `🤖 Testing QA model...\n`; | |
| const context = documents[0].content.substring(0, 500); | |
| const testQuestion = "What is artificial intelligence?"; | |
| const qaResult = await qaModel(testQuestion, context); | |
| output += `✅ QA test: Generated answer with ${(qaResult.score * 100).toFixed(1)}% confidence\n`; | |
| output += `💬 Answer: ${qaResult.answer.substring(0, 100)}...\n\n`; | |
| } | |
| if (llmModel) { | |
| output += `🚀 Testing LLM model...\n`; | |
| const testPrompt = "Explain artificial intelligence:"; | |
| const llmResult = await llmModel(testPrompt, { max_new_tokens: 30, temperature: 0.7, do_sample: true, return_full_text: false }); | |
| output += `✅ LLM test: Generated text completion\n`; | |
| output += `💬 Generated: "${llmResult[0].generated_text.substring(0, 100)}..."\n\n`; | |
| } | |
| output += `🎉 All tests passed! System is fully operational.`; | |
| } else { | |
| output += `⚠️ Models not initialized. Click "Initialize AI Models" first.`; | |
| } | |
| outputDiv.textContent = output; | |
| } catch (error) { | |
| console.error('Test error:', error); | |
| outputDiv.textContent = `❌ Test failed: ${error.message}`; | |
| } finally { | |
| testBtn.disabled = false; | |
| } | |
| } | |
| // Initialize UI | |
| updateStatus(); | |
| // Show version info in console | |
| console.log('🤖 AI-Powered RAG System with Transformers.js'); | |
| console.log('Models: Xenova/all-MiniLM-L6-v2, Xenova/distilbert-base-cased-distilled-squad'); | |
| // Export functions for global access | |
| window.showTab = showTab; | |
| window.updateSliderValue = updateSliderValue; | |
| window.initializeModels = initializeModels; | |
| window.searchDocumentsSemantic = searchDocumentsSemantic; | |
| window.searchDocumentsKeyword = searchDocumentsKeyword; | |
| window.chatWithRAG = chatWithRAG; | |
| window.chatWithLLM = chatWithLLM; | |
| window.chatWithLLMRAG = chatWithLLMRAG; | |
| window.addDocumentManual = addDocumentManual; | |
| window.testSystem = testSystem; | |