Spaces:

johko
/

in-browser-rag

Running

App Files Files Community

in-browser-rag / index.js

Johannes

init

cca4a24 2 months ago

history blame contribute delete

38.2 kB

	// Import transformers.js 3.0.0 from CDN (new Hugging Face ownership)
	import { pipeline, env } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.0';

	// Make available globally
	window.transformers = { pipeline, env };
	window.transformersLoaded = true;

	console.log('✅ Transformers.js 3.0.0 loaded via ES modules (Hugging Face)');

	// Global variables for transformers.js
	let transformersPipeline = null;
	let transformersEnv = null;
	let transformersReady = false;

	// Document storage and AI state
	let documents = [
	{
	id: 0,
	title: "Artificial Intelligence Overview",
	content: "Artificial Intelligence (AI) is a branch of computer science that aims to create intelligent machines that work and react like humans. Some activities computers with AI are designed for include speech recognition, learning, planning, and problem-solving. AI is used in healthcare, finance, transportation, and entertainment. Machine learning enables computers to learn from experience without explicit programming. Deep learning uses neural networks to understand complex patterns in data.",
	embedding: null
	},
	{
	id: 1,
	title: "Space Exploration",
	content: "Space exploration is the ongoing discovery and exploration of celestial structures in outer space through evolving space technology. Physical exploration is conducted by unmanned robotic probes and human spaceflight. Space exploration has been used for geopolitical rivalries like the Cold War. The early era was driven by a Space Race between the Soviet Union and United States. Modern exploration includes Mars missions, the International Space Station, and satellite programs.",
	embedding: null
	},
	{
	id: 2,
	title: "Renewable Energy",
	content: "Renewable energy comes from naturally replenished resources on a human timescale. It includes sunlight, wind, rain, tides, waves, and geothermal heat. Renewable energy contrasts with fossil fuels that are used faster than replenished. Most renewable sources are sustainable. Solar energy is abundant and promising. Wind energy and hydroelectric power are major contributors to renewable generation worldwide.",
	embedding: null
	}
	];

	let embeddingModel = null;
	let qaModel = null;
	let llmModel = null;
	let loadedModelName = '';
	let modelsInitialized = false;

	// Calculate cosine similarity between two vectors
	function cosineSimilarity(a, b) {
	const dotProduct = a.reduce((sum, val, i) => sum + val * b[i], 0);
	const magnitudeA = Math.sqrt(a.reduce((sum, val) => sum + val * val, 0));
	const magnitudeB = Math.sqrt(b.reduce((sum, val) => sum + val * val, 0));

	if (magnitudeA === 0 \|\| magnitudeB === 0) return 0;
	return dotProduct / (magnitudeA * magnitudeB);
	}

	// Initialize transformers.js when the script loads
	async function initTransformers() {
	try {
	console.log('🔄 Initializing Transformers.js...');

	// Try ES modules first (preferred method)
	if (window.transformers && window.transformersLoaded) {
	console.log('✅ Using ES modules version (Transformers.js 3.0.0)');
	({ pipeline: transformersPipeline, env: transformersEnv } = window.transformers);
	}
	// Fallback to UMD version
	else if (window.Transformers) {
	console.log('✅ Using UMD version (Transformers.js 3.0.0)');
	({ pipeline: transformersPipeline, env: transformersEnv } = window.Transformers);
	}
	// Wait for library to load
	else {
	console.log('⏳ Waiting for library to load...');
	let attempts = 0;
	while (!window.Transformers && !window.transformersLoaded && attempts < 50) {
	await new Promise(resolve => setTimeout(resolve, 200));
	attempts++;
	}

	if (window.transformers && window.transformersLoaded) {
	({ pipeline: transformersPipeline, env: transformersEnv } = window.transformers);
	} else if (window.Transformers) {
	({ pipeline: transformersPipeline, env: transformersEnv } = window.Transformers);
	} else {
	throw new Error('Failed to load Transformers.js library');
	}
	}

	// Configure transformers.js with minimal settings
	if (transformersEnv) {
	transformersEnv.allowLocalModels = false;
	transformersEnv.allowRemoteModels = true;
	// Let Transformers.js use default WASM paths for better compatibility
	}

	transformersReady = true;
	console.log('✅ Transformers.js initialized successfully');

	// Update UI to show ready state
	updateStatus();

	// Update status indicator
	const statusSpan = document.getElementById('transformersStatus');
	if (statusSpan) {
	statusSpan.textContent = '✅ Ready!';
	statusSpan.style.color = 'green';
	}

	} catch (error) {
	console.error('❌ Error initializing Transformers.js:', error);

	// Show error in UI
	const statusDiv = document.getElementById('status');
	if (statusDiv) {
	statusDiv.textContent = `❌ Failed to load Transformers.js: ${error.message}`;
	statusDiv.style.color = 'red';
	}

	// Update status indicator
	const statusSpan = document.getElementById('transformersStatus');
	if (statusSpan) {
	statusSpan.textContent = `❌ Failed: ${error.message}`;
	statusSpan.style.color = 'red';
	}
	}
	}

	// Initialize when page loads
	document.addEventListener('DOMContentLoaded', function() {
	initTransformers();
	initFileUpload();
	});

	// UI Functions
	function showTab(tabName) {
	// Hide all tabs
	document.querySelectorAll('.tab-content').forEach(tab => {
	tab.classList.remove('active');
	});
	document.querySelectorAll('.tab').forEach(button => {
	button.classList.remove('active');
	});

	// Show selected tab
	document.getElementById(tabName).classList.add('active');
	event.target.classList.add('active');
	}

	function updateSliderValue(sliderId) {
	const slider = document.getElementById(sliderId);
	const valueSpan = document.getElementById(sliderId + 'Value');
	valueSpan.textContent = slider.value;
	}

	function updateStatus() {
	const status = document.getElementById('status');
	const transformersStatus = transformersReady ? 'Ready' : 'Not ready';
	const embeddingStatus = embeddingModel ? 'Loaded' : 'Not loaded';
	const qaStatus = qaModel ? 'Loaded' : 'Not loaded';
	const llmStatus = llmModel ? 'Loaded' : 'Not loaded';
	status.textContent = `📊 Documents: ${documents.length} \| 🔧 Transformers.js: ${transformersStatus} \| 🤖 QA: ${qaStatus} \| 🧠 Embedding: ${embeddingStatus} \| 🚀 LLM: ${llmStatus}`;
	}

	function updateProgress(percent, text) {
	const progressBar = document.getElementById('progressBar');
	const progressText = document.getElementById('progressText');
	progressBar.style.width = percent + '%';
	progressText.textContent = text;
	}

	// AI Functions
	async function initializeModels() {
	const statusDiv = document.getElementById('initStatus');
	const progressDiv = document.getElementById('initProgress');
	const initBtn = document.getElementById('initBtn');

	statusDiv.style.display = 'block';
	progressDiv.style.display = 'block';
	initBtn.disabled = true;

	try {
	// Check if transformers.js is ready
	if (!transformersReady \|\| !transformersPipeline) {
	updateProgress(5, "Waiting for Transformers.js to initialize...");
	statusDiv.innerHTML = '🔄 Initializing Transformers.js library...';

	// Wait for transformers.js to be ready
	let attempts = 0;
	while (!transformersReady && attempts < 30) {
	await new Promise(resolve => setTimeout(resolve, 1000));
	attempts++;
	}

	if (!transformersReady) {
	throw new Error('Transformers.js failed to initialize. Please refresh the page.');
	}
	}

	updateProgress(10, "Loading embedding model...");
	statusDiv.innerHTML = '🔄 Loading embedding model (Xenova/all-MiniLM-L6-v2)...';

	// Load embedding model with progress tracking
	embeddingModel = await transformersPipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2', {
	progress_callback: (progress) => {
	if (progress.status === 'downloading') {
	const percent = progress.loaded && progress.total ?
	Math.round((progress.loaded / progress.total) * 100) : 0;
	statusDiv.innerHTML = `🔄 Downloading embedding model: ${percent}%`;
	}
	}
	});

	updateProgress(40, "Loading question-answering model...");
	statusDiv.innerHTML = '🔄 Loading QA model (Xenova/distilbert-base-cased-distilled-squad)...';

	// Load QA model with progress tracking
	qaModel = await transformersPipeline('question-answering', 'Xenova/distilbert-base-cased-distilled-squad', {
	progress_callback: (progress) => {
	if (progress.status === 'downloading') {
	const percent = progress.loaded && progress.total ?
	Math.round((progress.loaded / progress.total) * 100) : 0;
	statusDiv.innerHTML = `🔄 Downloading QA model: ${percent}%`;
	}
	}
	});

	updateProgress(70, "Loading language model...");
	statusDiv.innerHTML = '🔄 Loading LLM (trying SmolLM models)...';

	// Load LLM model - Stable Transformers.js 3.0.0 configuration
	const modelsToTry = [
	{
	name: 'Xenova/gpt2',
	options: {}
	},
	{
	name: 'Xenova/distilgpt2',
	options: {}
	}
	];

	let modelLoaded = false;
	for (const model of modelsToTry) {
	try {
	console.log(`Trying to load ${model.name}...`);
	statusDiv.innerHTML = `🔄 Loading LLM (${model.name})...`;

	// Load LLM with progress tracking
	llmModel = await transformersPipeline('text-generation', model.name, {
	progress_callback: (progress) => {
	if (progress.status === 'downloading') {
	const percent = progress.loaded && progress.total ?
	Math.round((progress.loaded / progress.total) * 100) : 0;
	statusDiv.innerHTML = `🔄 Downloading ${model.name}: ${percent}%`;
	}
	}
	});

	console.log(`✅ Successfully loaded ${model.name}`);
	loadedModelName = model.name;
	modelLoaded = true;
	break;
	} catch (error) {
	console.warn(`${model.name} failed:`, error);
	}
	}

	if (!modelLoaded) {
	throw new Error('Failed to load any LLM model');
	}

	updateProgress(85, "Generating embeddings for documents...");
	statusDiv.innerHTML = '🔄 Generating embeddings for existing documents...';

	// Generate embeddings for all existing documents
	for (let i = 0; i < documents.length; i++) {
	const doc = documents[i];
	updateProgress(85 + (i / documents.length) * 10, `Processing document ${i + 1}/${documents.length}...`);
	doc.embedding = await generateEmbedding(doc.content);
	}

	updateProgress(100, "Initialization complete!");
	modelsInitialized = true;

	statusDiv.innerHTML = `✅ AI Models initialized successfully!
	🧠 Embedding Model: Xenova/all-MiniLM-L6-v2 (384 dimensions)
	🤖 QA Model: Xenova/distilbert-base-cased-distilled-squad
	🚀 LLM Model: ${loadedModelName} (Language model for text generation)
	📚 Documents processed: ${documents.length}
	🔮 Ready for semantic search, Q&A, and LLM chat!

	📊 Model Info:
	• Embedding model size: ~23MB
	• QA model size: ~28MB
	• LLM model size: ~15-50MB (depending on model loaded)
	• Total memory usage: ~70-100MB
	• Inference speed: ~2-8 seconds per operation`;

	updateStatus();

	} catch (error) {
	console.error('Error initializing models:', error);
	statusDiv.innerHTML = `❌ Error initializing models: ${error.message}

	Please check your internet connection and try again.`;
	updateProgress(0, "Initialization failed");
	} finally {
	initBtn.disabled = false;
	setTimeout(() => {
	progressDiv.style.display = 'none';
	}, 2000);
	}
	}

	async function generateEmbedding(text) {
	if (!transformersReady \|\| !transformersPipeline) {
	throw new Error('Transformers.js not initialized');
	}

	if (!embeddingModel) {
	throw new Error('Embedding model not loaded');
	}

	try {
	const output = await embeddingModel(text, { pooling: 'mean', normalize: true });
	return Array.from(output.data);
	} catch (error) {
	console.error('Error generating embedding:', error);
	throw error;
	}
	}

	async function searchDocumentsSemantic() {
	const query = document.getElementById('searchQuery').value;
	const maxResults = parseInt(document.getElementById('maxResults').value);
	const resultsDiv = document.getElementById('searchResults');
	const searchBtn = document.getElementById('searchBtn');

	if (!query.trim()) {
	resultsDiv.style.display = 'block';
	resultsDiv.textContent = '❌ Please enter a search query';
	return;
	}

	if (!transformersReady \|\| !modelsInitialized \|\| !embeddingModel) {
	resultsDiv.style.display = 'block';
	resultsDiv.textContent = '❌ Please initialize AI models first!';
	return;
	}

	resultsDiv.style.display = 'block';
	resultsDiv.innerHTML = '<div class="loading"></div> Generating query embedding and searching...';
	searchBtn.disabled = true;

	try {
	// Generate embedding for query
	const queryEmbedding = await generateEmbedding(query);

	// Calculate similarities
	const results = [];
	documents.forEach(doc => {
	if (doc.embedding) {
	const similarity = cosineSimilarity(queryEmbedding, doc.embedding);
	results.push({ doc, similarity });
	}
	});

	// Sort by similarity
	results.sort((a, b) => b.similarity - a.similarity);

	if (results.length === 0) {
	resultsDiv.textContent = `❌ No documents with embeddings found for '${query}'`;
	return;
	}

	let output = `🔍 Semantic search results for '${query}':\n\n`;
	results.slice(0, maxResults).forEach((result, i) => {
	const doc = result.doc;
	const similarity = result.similarity;
	const excerpt = doc.content.length > 200 ? doc.content.substring(0, 200) + '...' : doc.content;
	output += `Result ${i + 1} (similarity: ${similarity.toFixed(3)})\n📄 Title: ${doc.title}\n📝 Content: ${excerpt}\n\n`;
	});

	resultsDiv.textContent = output;

	} catch (error) {
	console.error('Search error:', error);
	resultsDiv.textContent = `❌ Error during search: ${error.message}`;
	} finally {
	searchBtn.disabled = false;
	}
	}

	function searchDocumentsKeyword() {
	const query = document.getElementById('searchQuery').value;
	const maxResults = parseInt(document.getElementById('maxResults').value);
	const resultsDiv = document.getElementById('searchResults');

	if (!query.trim()) {
	resultsDiv.style.display = 'block';
	resultsDiv.textContent = '❌ Please enter a search query';
	return;
	}

	resultsDiv.style.display = 'block';
	resultsDiv.innerHTML = '<div class="loading"></div> Searching keywords...';

	setTimeout(() => {
	const results = [];
	const queryWords = query.toLowerCase().split(/\s+/);

	documents.forEach(doc => {
	const contentLower = doc.content.toLowerCase();
	const titleLower = doc.title.toLowerCase();

	let matches = 0;
	queryWords.forEach(word => {
	matches += (contentLower.match(new RegExp(word, 'g')) \|\| []).length;
	matches += (titleLower.match(new RegExp(word, 'g')) \|\| []).length * 2;
	});

	if (matches > 0) {
	results.push({ doc, score: matches });
	}
	});

	results.sort((a, b) => b.score - a.score);

	if (results.length === 0) {
	resultsDiv.textContent = `❌ No documents found containing '${query}'`;
	return;
	}

	let output = `🔍 Keyword search results for '${query}':\n\n`;
	results.slice(0, maxResults).forEach((result, i) => {
	const doc = result.doc;
	const excerpt = doc.content.length > 200 ? doc.content.substring(0, 200) + '...' : doc.content;
	output += `Result ${i + 1}\n📄 Title: ${doc.title}\n📝 Content: ${excerpt}\n\n`;
	});

	resultsDiv.textContent = output;
	}, 500);
	}

	async function chatWithRAG() {
	const question = document.getElementById('chatQuestion').value;
	const maxContext = parseInt(document.getElementById('maxContext').value);
	const responseDiv = document.getElementById('chatResponse');
	const chatBtn = document.getElementById('chatBtn');

	if (!question.trim()) {
	responseDiv.style.display = 'block';
	responseDiv.textContent = '❌ Please enter a question';
	return;
	}

	if (!transformersReady \|\| !modelsInitialized \|\| !embeddingModel \|\| !qaModel) {
	responseDiv.style.display = 'block';
	responseDiv.textContent = '❌ AI models not loaded yet. Please initialize them first!';
	return;
	}

	responseDiv.style.display = 'block';
	responseDiv.innerHTML = '<div class="loading"></div> Generating answer with real AI...';
	chatBtn.disabled = true;

	try {
	// Generate embedding for the question
	const questionEmbedding = await generateEmbedding(question);

	// Find relevant documents using semantic similarity
	const relevantDocs = [];
	documents.forEach(doc => {
	if (doc.embedding) {
	const similarity = cosineSimilarity(questionEmbedding, doc.embedding);
	if (similarity > 0.1) {
	relevantDocs.push({ doc, similarity });
	}
	}
	});

	relevantDocs.sort((a, b) => b.similarity - a.similarity);
	relevantDocs.splice(maxContext);

	if (relevantDocs.length === 0) {
	responseDiv.textContent = '❌ No relevant context found in the documents for your question.';
	return;
	}

	// Combine context from top documents
	const context = relevantDocs.map(item => item.doc.content).join(' ').substring(0, 2000);

	// Use the QA model to generate an answer
	const qaResult = await qaModel(question, context);

	let response = `🤖 AI Answer:\n${qaResult.answer}\n\n`;
	response += `📊 Confidence: ${(qaResult.score * 100).toFixed(1)}%\n\n`;
	response += `📚 Sources: ${relevantDocs.length} documents\n`;
	response += `🔍 Best match: "${relevantDocs[0].doc.title}" (similarity: ${relevantDocs[0].similarity.toFixed(3)})\n\n`;
	response += `📝 Context used:\n${context.substring(0, 300)}...`;

	responseDiv.textContent = response;

	} catch (error) {
	console.error('Chat error:', error);
	responseDiv.textContent = `❌ Error generating response: ${error.message}`;
	} finally {
	chatBtn.disabled = false;
	}
	}

	async function chatWithLLM() {
	const prompt = document.getElementById('llmPrompt').value;
	const maxTokens = parseInt(document.getElementById('maxTokens').value);
	const temperature = parseFloat(document.getElementById('temperature').value);
	const responseDiv = document.getElementById('llmResponse');
	const llmBtn = document.getElementById('llmBtn');

	if (!prompt.trim()) {
	responseDiv.style.display = 'block';
	responseDiv.textContent = '❌ Please enter a prompt';
	return;
	}

	if (!transformersReady \|\| !modelsInitialized \|\| !llmModel) {
	responseDiv.style.display = 'block';
	responseDiv.textContent = '❌ LLM model not loaded yet. Please initialize models first!';
	return;
	}

	responseDiv.style.display = 'block';
	responseDiv.innerHTML = '<div class="loading"></div> Generating text with LLM...';
	llmBtn.disabled = true;

	try {
	// Generate text with the LLM
	const result = await llmModel(prompt, {
	max_new_tokens: maxTokens,
	temperature: temperature,
	do_sample: true,
	return_full_text: false
	});

	let generatedText = result[0].generated_text;

	let response = `🚀 LLM Generated Text:\n\n"${generatedText}"\n\n`;
	response += `📊 Settings: ${maxTokens} tokens, temperature ${temperature}\n`;
	response += `🤖 Model: ${loadedModelName ? loadedModelName.split('/')[1] : 'Language Model'}\n`;
	response += `⏱️ Generated in real-time by your browser!`;

	responseDiv.textContent = response;

	} catch (error) {
	console.error('LLM error:', error);
	responseDiv.textContent = `❌ Error generating text: ${error.message}`;
	} finally {
	llmBtn.disabled = false;
	}
	}

	async function chatWithLLMRAG() {
	const prompt = document.getElementById('llmPrompt').value;
	const maxTokens = parseInt(document.getElementById('maxTokens').value);
	const temperature = parseFloat(document.getElementById('temperature').value);
	const responseDiv = document.getElementById('llmResponse');
	const llmRagBtn = document.getElementById('llmRagBtn');

	if (!prompt.trim()) {
	responseDiv.style.display = 'block';
	responseDiv.textContent = '❌ Please enter a prompt';
	return;
	}

	if (!transformersReady \|\| !modelsInitialized \|\| !llmModel \|\| !embeddingModel) {
	responseDiv.style.display = 'block';
	responseDiv.textContent = '❌ Models not loaded yet. Please initialize all models first!';
	return;
	}

	responseDiv.style.display = 'block';
	responseDiv.innerHTML = '<div class="loading"></div> Finding relevant context and generating with LLM...';
	llmRagBtn.disabled = true;

	try {
	// Find relevant documents using semantic search
	const queryEmbedding = await generateEmbedding(prompt);
	const relevantDocs = [];

	documents.forEach(doc => {
	if (doc.embedding) {
	const similarity = cosineSimilarity(queryEmbedding, doc.embedding);
	if (similarity > 0.1) {
	relevantDocs.push({ doc, similarity });
	}
	}
	});

	relevantDocs.sort((a, b) => b.similarity - a.similarity);
	relevantDocs.splice(3); // Limit to top 3 documents

	// Create enhanced prompt with context
	let enhancedPrompt = prompt;
	if (relevantDocs.length > 0) {
	const context = relevantDocs.map(item => item.doc.content.substring(0, 300)).join(' ');
	enhancedPrompt = `Context: ${context}\n\nQuestion: ${prompt}\n\nAnswer:`;
	}

	// Generate text with the LLM using enhanced prompt
	const result = await llmModel(enhancedPrompt, {
	max_new_tokens: maxTokens,
	temperature: temperature,
	do_sample: true,
	return_full_text: false
	});

	let generatedText = result[0].generated_text;

	let response = `🤖 LLM + RAG Generated Response:\n\n"${generatedText}"\n\n`;
	response += `📚 Context: ${relevantDocs.length} relevant documents used\n`;
	if (relevantDocs.length > 0) {
	response += `🔍 Best match: "${relevantDocs[0].doc.title}" (similarity: ${relevantDocs[0].similarity.toFixed(3)})\n`;
	}
	response += `📊 Settings: ${maxTokens} tokens, temperature ${temperature}\n`;
	response += `🚀 Model: ${loadedModelName ? loadedModelName.split('/')[1] : 'LLM'} enhanced with document retrieval`;

	responseDiv.textContent = response;

	} catch (error) {
	console.error('LLM+RAG error:', error);
	responseDiv.textContent = `❌ Error generating response: ${error.message}`;
	} finally {
	llmRagBtn.disabled = false;
	}
	}

	async function addDocumentManual() {
	const title = document.getElementById('docTitle').value \|\| `User Document ${documents.length - 2}`;
	const content = document.getElementById('docContent').value;
	const statusDiv = document.getElementById('addStatus');
	const previewDiv = document.getElementById('docPreview');
	const addBtn = document.getElementById('addBtn');

	if (!content.trim()) {
	statusDiv.style.display = 'block';
	statusDiv.textContent = '❌ Please enter document content';
	previewDiv.style.display = 'none';
	return;
	}

	statusDiv.style.display = 'block';
	statusDiv.innerHTML = '<div class="loading"></div> Adding document...';
	addBtn.disabled = true;

	try {
	const docId = documents.length;
	const newDocument = {
	id: docId,
	title: title,
	content: content.trim(),
	embedding: null
	};

	// Generate embedding if models are initialized
	if (transformersReady && modelsInitialized && embeddingModel) {
	statusDiv.innerHTML = '<div class="loading"></div> Generating AI embedding...';
	newDocument.embedding = await generateEmbedding(content);
	}

	documents.push(newDocument);

	const preview = content.length > 300 ? content.substring(0, 300) + '...' : content;
	const status = `✅ Document added successfully!
	📄 Title: ${title}
	📊 Size: ${content.length.toLocaleString()} characters
	📚 Total documents: ${documents.length}${(transformersReady && modelsInitialized) ? '\n🧠 AI embedding generated automatically' : '\n⚠️ AI embedding will be generated when models are loaded'}`;

	statusDiv.textContent = status;
	previewDiv.style.display = 'block';
	previewDiv.textContent = `📖 Preview:\n${preview}`;

	// Clear form
	document.getElementById('docTitle').value = '';
	document.getElementById('docContent').value = '';

	updateStatus();

	} catch (error) {
	console.error('Error adding document:', error);
	statusDiv.textContent = `❌ Error adding document: ${error.message}`;
	} finally {
	addBtn.disabled = false;
	}
	}

	// File upload functionality
	function initFileUpload() {
	const uploadArea = document.getElementById('uploadArea');
	const fileInput = document.getElementById('fileInput');

	if (!uploadArea \|\| !fileInput) return;

	// Click to select files
	uploadArea.addEventListener('click', () => {
	fileInput.click();
	});

	// Drag and drop functionality
	uploadArea.addEventListener('dragover', (e) => {
	e.preventDefault();
	uploadArea.classList.add('dragover');
	});

	uploadArea.addEventListener('dragleave', (e) => {
	e.preventDefault();
	uploadArea.classList.remove('dragover');
	});

	uploadArea.addEventListener('drop', (e) => {
	e.preventDefault();
	uploadArea.classList.remove('dragover');
	const files = e.dataTransfer.files;
	handleFiles(files);
	});

	// File input change
	fileInput.addEventListener('change', (e) => {
	handleFiles(e.target.files);
	});
	}

	async function handleFiles(files) {
	const uploadStatus = document.getElementById('uploadStatus');
	const uploadProgress = document.getElementById('uploadProgress');
	const uploadProgressBar = document.getElementById('uploadProgressBar');
	const uploadProgressText = document.getElementById('uploadProgressText');

	if (files.length === 0) return;

	uploadStatus.style.display = 'block';
	uploadProgress.style.display = 'block';
	uploadStatus.textContent = '';

	let successCount = 0;
	let errorCount = 0;

	for (let i = 0; i < files.length; i++) {
	const file = files[i];
	const progress = ((i + 1) / files.length) * 100;

	uploadProgressBar.style.width = progress + '%';
	if (file.size > 10000) {
	uploadProgressText.textContent = `Processing large file: ${file.name} (${i + 1}/${files.length}) - chunking for better search...`;
	} else {
	uploadProgressText.textContent = `Processing ${file.name} (${i + 1}/${files.length})...`;
	}

	try {
	await processFile(file);
	successCount++;
	} catch (error) {
	console.error(`Error processing ${file.name}:`, error);
	errorCount++;
	}
	}

	uploadProgress.style.display = 'none';

	let statusText = `✅ Upload complete!\n📁 ${successCount} files processed successfully`;
	if (errorCount > 0) {
	statusText += `\n❌ ${errorCount} files failed to process`;
	}
	statusText += `\n📊 Total documents: ${documents.length}`;
	statusText += `\n🧩 Large files automatically chunked for better search`;

	uploadStatus.textContent = statusText;
	updateStatus();

	// Clear file input
	document.getElementById('fileInput').value = '';
	}

	// Document chunking function for large files
	function chunkDocument(content, maxChunkSize = 1000) {
	const sentences = content.split(/[.!?]+/).filter(s => s.trim().length > 0);
	const chunks = [];
	let currentChunk = '';

	for (let sentence of sentences) {
	sentence = sentence.trim();
	if (currentChunk.length + sentence.length > maxChunkSize && currentChunk.length > 0) {
	chunks.push(currentChunk.trim());
	currentChunk = sentence;
	} else {
	currentChunk += (currentChunk ? '. ' : '') + sentence;
	}
	}

	if (currentChunk.trim()) {
	chunks.push(currentChunk.trim());
	}

	return chunks.length > 0 ? chunks : [content];
	}

	async function processFile(file) {
	return new Promise((resolve, reject) => {
	const reader = new FileReader();

	reader.onload = async function(e) {
	try {
	const content = e.target.result.trim();
	const baseTitle = file.name.replace(/\.[^/.]+$/, ""); // Remove file extension

	// Check if document is large and needs chunking
	if (content.length > 2000) {
	// Chunk large documents
	const chunks = chunkDocument(content, 1500);
	console.log(`📄 Chunking large file: ${chunks.length} chunks created from ${content.length} characters`);

	for (let i = 0; i < chunks.length; i++) {
	const chunkTitle = chunks.length > 1 ? `${baseTitle} (Part ${i + 1}/${chunks.length})` : baseTitle;
	const newDocument = {
	id: documents.length,
	title: chunkTitle,
	content: chunks[i],
	embedding: null
	};

	// Generate embedding if models are loaded
	if (transformersReady && modelsInitialized && embeddingModel) {
	newDocument.embedding = await generateEmbedding(chunks[i]);
	}

	documents.push(newDocument);
	}
	} else {
	// Small document - process as single document
	const newDocument = {
	id: documents.length,
	title: baseTitle,
	content: content,
	embedding: null
	};

	// Generate embedding if models are loaded
	if (transformersReady && modelsInitialized && embeddingModel) {
	newDocument.embedding = await generateEmbedding(content);
	}

	documents.push(newDocument);
	}

	resolve();

	} catch (error) {
	reject(error);
	}
	};

	reader.onerror = function() {
	reject(new Error(`Failed to read file: ${file.name}`));
	};

	// Read file as text
	reader.readAsText(file);
	});
	}

	async function testSystem() {
	const outputDiv = document.getElementById('testOutput');
	const testBtn = document.getElementById('testBtn');

	outputDiv.style.display = 'block';
	outputDiv.innerHTML = '<div class="loading"></div> Running system tests...';
	testBtn.disabled = true;

	try {
	let output = `🧪 System Test Results:\n\n`;
	output += `📊 Documents: ${documents.length} loaded\n`;
	output += `🔧 Transformers.js: ${transformersReady ? '✅ Ready' : '❌ Not ready'}\n`;
	output += `🧠 Embedding Model: ${embeddingModel ? '✅ Loaded' : '❌ Not loaded'}\n`;
	output += `🤖 QA Model: ${qaModel ? '✅ Loaded' : '❌ Not loaded'}\n`;
	output += `🚀 LLM Model: ${llmModel ? '✅ Loaded' : '❌ Not loaded'}\n\n`;

	if (transformersReady && modelsInitialized && embeddingModel) {
	output += `🔍 Testing embedding generation...\n`;
	const testEmbedding = await generateEmbedding("test sentence");
	output += `✅ Embedding test: Generated ${testEmbedding.length}D vector\n\n`;

	output += `🔍 Testing semantic search...\n`;
	const testQuery = "artificial intelligence";
	const queryEmbedding = await generateEmbedding(testQuery);

	let testResults = [];
	documents.forEach(doc => {
	if (doc.embedding) {
	const similarity = cosineSimilarity(queryEmbedding, doc.embedding);
	testResults.push({ doc, similarity });
	}
	});
	testResults.sort((a, b) => b.similarity - a.similarity);

	if (testResults.length > 0) {
	output += `✅ Search test: Found ${testResults.length} results\n`;
	output += `📄 Top result: "${testResults[0].doc.title}" (similarity: ${testResults[0].similarity.toFixed(3)})\n\n`;
	}

	if (qaModel) {
	output += `🤖 Testing QA model...\n`;
	const context = documents[0].content.substring(0, 500);
	const testQuestion = "What is artificial intelligence?";
	const qaResult = await qaModel(testQuestion, context);
	output += `✅ QA test: Generated answer with ${(qaResult.score * 100).toFixed(1)}% confidence\n`;
	output += `💬 Answer: ${qaResult.answer.substring(0, 100)}...\n\n`;
	}

	if (llmModel) {
	output += `🚀 Testing LLM model...\n`;
	const testPrompt = "Explain artificial intelligence:";
	const llmResult = await llmModel(testPrompt, { max_new_tokens: 30, temperature: 0.7, do_sample: true, return_full_text: false });
	output += `✅ LLM test: Generated text completion\n`;
	output += `💬 Generated: "${llmResult[0].generated_text.substring(0, 100)}..."\n\n`;
	}

	output += `🎉 All tests passed! System is fully operational.`;
	} else {
	output += `⚠️ Models not initialized. Click "Initialize AI Models" first.`;
	}

	outputDiv.textContent = output;

	} catch (error) {
	console.error('Test error:', error);
	outputDiv.textContent = `❌ Test failed: ${error.message}`;
	} finally {
	testBtn.disabled = false;
	}
	}

	// Initialize UI
	updateStatus();

	// Show version info in console
	console.log('🤖 AI-Powered RAG System with Transformers.js');
	console.log('Models: Xenova/all-MiniLM-L6-v2, Xenova/distilbert-base-cased-distilled-squad');

	// Export functions for global access
	window.showTab = showTab;
	window.updateSliderValue = updateSliderValue;
	window.initializeModels = initializeModels;
	window.searchDocumentsSemantic = searchDocumentsSemantic;
	window.searchDocumentsKeyword = searchDocumentsKeyword;
	window.chatWithRAG = chatWithRAG;
	window.chatWithLLM = chatWithLLM;
	window.chatWithLLMRAG = chatWithLLMRAG;
	window.addDocumentManual = addDocumentManual;
	window.testSystem = testSystem;