Spaces:

Srikesh
/

pdf_chat

Sleeping

App Files Files Community

pdf_chat / app.py

Srikesh

Update app.py

dd765b2 verified 16 days ago

raw

history blame

8.73 kB

	import os
	os.environ['HF_HUB_ENABLE_HF_TRANSFER'] = '0'

	import gradio as gr
	from sentence_transformers import SentenceTransformer
	import numpy as np
	from pypdf import PdfReader
	import torch
	from transformers import AutoTokenizer, AutoModelForCausalLM
	import re

	# Global variables
	chunks = []
	embeddings = []
	model = None
	tokenizer = None
	embed_model = None
	text_cache = ""

	def initialize_models():
	"""Initialize models on startup with optimizations"""
	global model, tokenizer, embed_model

	print("Loading models...")

	# Use smaller, faster embedding model
	embed_model = SentenceTransformer(
	'sentence-transformers/paraphrase-MiniLM-L3-v2', # Faster, smaller model
	device='cpu'
	)

	# Use smaller, faster language model
	model_name = "microsoft/phi-1_5" # Much faster than TinyLlama, better quality
	# Alternative: "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

	tokenizer = AutoTokenizer.from_pretrained(
	model_name,
	trust_remote_code=True
	)

	model = AutoModelForCausalLM.from_pretrained(
	model_name,
	torch_dtype=torch.float32,
	low_cpu_mem_usage=True,
	trust_remote_code=True
	)

	# Set padding token
	if tokenizer.pad_token is None:
	tokenizer.pad_token = tokenizer.eos_token

	print("Models loaded successfully!")

	def smart_chunk_text(text, chunk_size=500, overlap=100):
	"""Smarter chunking that respects sentence boundaries"""
	# Split into sentences
	sentences = re.split(r'[.!?]+', text)
	chunks = []
	current_chunk = ""

	for sentence in sentences:
	sentence = sentence.strip()
	if not sentence:
	continue

	# If adding this sentence exceeds chunk size, save current chunk
	if len(current_chunk) + len(sentence) > chunk_size and current_chunk:
	chunks.append(current_chunk)
	# Start new chunk with overlap
	words = current_chunk.split()
	current_chunk = " ".join(words[-20:]) + " " + sentence
	else:
	current_chunk += " " + sentence

	# Add the last chunk
	if current_chunk:
	chunks.append(current_chunk.strip())

	return chunks

	def process_pdf(pdf_file):
	"""Process PDF and create embeddings - OPTIMIZED"""
	global chunks, embeddings, embed_model, text_cache

	if pdf_file is None:
	return "❌ Please upload a PDF file!", None

	try:
	# Read PDF
	pdf_reader = PdfReader(pdf_file.name)
	text = ""
	for page in pdf_reader.pages:
	text += page.extract_text() + "\n"

	if not text.strip():
	return "❌ Could not extract text from PDF!", None

	text_cache = text # Cache for faster reprocessing

	# Smart chunking (smaller chunks = faster embedding)
	chunks = smart_chunk_text(text, chunk_size=500, overlap=100)

	# Batch encode for speed
	print(f"Creating embeddings for {len(chunks)} chunks...")
	embeddings = embed_model.encode(
	chunks,
	batch_size=32, # Process multiple chunks at once
	show_progress_bar=False,
	convert_to_numpy=True
	)

	return f"✅ PDF processed! Created {len(chunks)} chunks. You can now ask questions!", None

	except Exception as e:
	print(f"Error processing PDF: {str(e)}")
	return f"❌ Error: {str(e)}", None

	def find_relevant_chunks(query, top_k=2): # Reduced from 3 to 2 for speed
	"""Find most relevant chunks - OPTIMIZED"""
	global chunks, embeddings, embed_model

	if not chunks or len(embeddings) == 0:
	return []

	# Encode query
	query_embedding = embed_model.encode(
	[query],
	convert_to_numpy=True,
	show_progress_bar=False
	)[0]

	# Fast cosine similarity using numpy
	embeddings_norm = embeddings / np.linalg.norm(embeddings, axis=1, keepdims=True)
	query_norm = query_embedding / np.linalg.norm(query_embedding)
	similarities = np.dot(embeddings_norm, query_norm)

	# Get top k indices
	top_indices = np.argsort(similarities)[-top_k:][::-1]

	return [chunks[i] for i in top_indices]

	def generate_response(question, context):
	"""Generate response - OPTIMIZED"""
	global model, tokenizer

	# Shorter, more efficient prompt
	prompt = f"""Context: {context[:800]}

	Question: {question}

	Answer:"""

	inputs = tokenizer(
	prompt,
	return_tensors="pt",
	truncation=True,
	max_length=1024 # Reduced from 2048
	)

	# Faster generation settings
	with torch.no_grad():
	outputs = model.generate(
	**inputs,
	max_new_tokens=150, # Reduced from 300
	temperature=0.7,
	top_p=0.9,
	do_sample=True,
	pad_token_id=tokenizer.eos_token_id,
	num_beams=1, # Greedy search for speed
	early_stopping=True
	)

	response = tokenizer.decode(outputs[0], skip_special_tokens=True)

	# Extract answer
	if "Answer:" in response:
	response = response.split("Answer:")[-1].strip()

	# Clean up response
	response = response.split("\n")[0].strip() # Take first line

	return response

	def chat(message, history):
	"""Handle chat - OPTIMIZED"""
	global chunks

	if not chunks:
	return history + [[message, "⚠️ Please upload and process a PDF first!"]]

	if not message.strip():
	return history

	try:
	# Find relevant context (reduced chunks)
	relevant_chunks = find_relevant_chunks(message, top_k=2)
	context = " ".join(relevant_chunks)

	# Generate response
	response = generate_response(message, context)

	# Ensure response is not empty
	if not response or len(response) < 10:
	response = "I found relevant information but couldn't generate a clear answer. Please try rephrasing your question."

	return history + [[message, response]]

	except Exception as e:
	print(f"Error in chat: {str(e)}")
	return history + [[message, f"❌ Error: {str(e)}"]]

	def clear_all():
	"""Clear everything"""
	global chunks, embeddings, text_cache
	chunks = []
	embeddings = []
	text_cache = ""
	return None, "Ready to process a new PDF"

	# Create UI with better styling
	with gr.Blocks(title="Chat with PDF - Fast", theme=gr.themes.Soft()) as demo:
	gr.Markdown("# ⚡ Chat with PDF - Optimized Fast Version")
	gr.Markdown("Using lightweight models for faster responses")

	with gr.Row():
	with gr.Column(scale=1):
	pdf_input = gr.File(
	label="📎 Upload PDF",
	file_types=[".pdf"]
	)
	process_btn = gr.Button(
	"🔄 Process PDF",
	variant="primary",
	size="lg"
	)
	status = gr.Textbox(
	label="Status",
	lines=2,
	interactive=False
	)

	gr.Markdown("### Tips:")
	gr.Markdown("""
	- Processing is much faster now!
	- Ask specific questions
	- Keep questions concise
	""")

	clear_all_btn = gr.Button("🗑️ Clear All", variant="stop")

	with gr.Column(scale=2):
	chatbot = gr.Chatbot(
	label="💬 Chat",
	height=450,
	bubble_full_width=False
	)
	msg = gr.Textbox(
	label="Question",
	placeholder="Ask a question about the PDF...",
	lines=2
	)
	with gr.Row():
	send_btn = gr.Button("📤 Send", variant="primary")
	clear_btn = gr.Button("Clear Chat")

	# Events
	process_btn.click(
	process_pdf,
	inputs=[pdf_input],
	outputs=[status, chatbot]
	)

	msg.submit(
	chat,
	inputs=[msg, chatbot],
	outputs=[chatbot]
	).then(
	lambda: "",
	None,
	[msg]
	)

	send_btn.click(
	chat,
	inputs=[msg, chatbot],
	outputs=[chatbot]
	).then(
	lambda: "",
	None,
	[msg]
	)

	clear_btn.click(lambda: None, None, [chatbot])
	clear_all_btn.click(clear_all, None, [chatbot, status])

	# Initialize on startup
	initialize_models()

	if __name__ == "__main__":
	demo.queue() # Enable queuing for better performance
	demo.launch(share=False)