Spaces:

Shivamsinghtomar78
/

ChatWithPDF

Running

App Files Files Community

Shivamsinghtomar78 commited on Mar 24

Commit

084e167

verified ·

1 Parent(s): 6c8ddea

Update app.py

Browse files

Files changed (1) hide show

app.py +116 -83

app.py CHANGED Viewed

@@ -6,7 +6,6 @@ from langchain_core.prompts import PromptTemplate
 from langchain.chains import LLMChain
 from pydantic import BaseModel, Field
 from typing import List
-from dotenv import load_dotenv
 import os
 import time
 from datetime import datetime
@@ -18,12 +17,36 @@ from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings
 from langchain_community.vectorstores import FAISS
 from langchain_text_splitters import RecursiveCharacterTextSplitter
-load_dotenv()
-api_key = os.getenv("GOOGLE_API_KEY")
-llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro", google_api_key=api_key)
 class KeyPoint(BaseModel):
     point: str = Field(description="A key point extracted from the document.")
@@ -33,7 +56,10 @@ class Summary(BaseModel):
 class DocumentAnalysis(BaseModel):
     key_points: List[KeyPoint] = Field(description="List of key points from the document.")
     summary: Summary = Field(description="Summary of the document.")
 parser = PydanticOutputParser(pydantic_object=DocumentAnalysis)
 prompt_template = """
@@ -46,54 +72,51 @@ prompt = PromptTemplate(
     input_variables=["text"],
     partial_variables={"format_instructions": parser.get_format_instructions()}
 )
 chain = LLMChain(llm=llm, prompt=prompt, output_parser=parser)
 def analyze_text_structured(text):
-    output = chain.run(text=text)
-    return output
 def extract_text_from_pdf(pdf_file):
     pdf_reader = PyPDF2.PdfReader(pdf_file)
-    text = ""
-    for page in pdf_reader.pages:
-        text += page.extract_text()
-    return text
 def json_to_text(analysis):
     text_output = "=== Summary ===\n" + f"{analysis.summary.summary}\n\n"
     text_output += "=== Key Points ===\n"
     for i, key_point in enumerate(analysis.key_points, start=1):
         text_output += f"{i}. {key_point.point}\n"
     return text_output
 def create_pdf_report(analysis):
     pdf = FPDF()
     pdf.add_page()
     pdf.set_font('Helvetica', '', 12)
     pdf.cell(200, 10, txt="PDF Analysis Report", ln=True, align='C')
     pdf.cell(200, 10, txt=f"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", ln=True, align='C')
-    clean_text = json_to_text(analysis)
-    pdf.multi_cell(0, 10, txt=clean_text)
     return pdf.output(dest='S')
 def create_word_report(analysis):
     doc = Document()
     doc.add_heading('PDF Analysis Report', 0)
     doc.add_paragraph(f'Generated on: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}')
-    clean_text = json_to_text(analysis)
     doc.add_heading('Analysis', level=1)
-    doc.add_paragraph(clean_text)
     docx_bytes = io.BytesIO()
     doc.save(docx_bytes)
     docx_bytes.seek(0)
     return docx_bytes.getvalue()
-st.set_page_config(page_title="Chat With PDF", page_icon="😒")
 def local_css():
     st.markdown("""
     <style>
@@ -177,7 +200,8 @@ def local_css():
     """, unsafe_allow_html=True)
 local_css()
 if "current_file" not in st.session_state:
     st.session_state.current_file = None
 if "pdf_summary" not in st.session_state:
@@ -193,85 +217,94 @@ if "vectorstore" not in st.session_state:
 if "messages" not in st.session_state:
     st.session_state.messages = []
 st.markdown('<div class="main-header">', unsafe_allow_html=True)
 st.markdown('<div class="flag-stripe"></div>', unsafe_allow_html=True)
-st.title("😒 Chat With PDF")
-st.caption("Your AI-powered  Document Analyzer")
 st.markdown('</div>', unsafe_allow_html=True)
-st.markdown('<div class="card animate-fadeIn">', unsafe_allow_html=True)
-uploaded_file = st.file_uploader("Upload a PDF file", type="pdf")
-if uploaded_file is not None:
-    if st.session_state.current_file != uploaded_file.name:
-        st.session_state.current_file = uploaded_file.name
-        st.session_state.pdf_summary = None
-        st.session_state.pdf_report = None
-        st.session_state.word_report = None
-        if "vectorstore" in st.session_state:
-            del st.session_state.vectorstore
-        if "messages" in st.session_state:
             st.session_state.messages = []
-    text = extract_text_from_pdf(uploaded_file)
-    if st.button("Analyze Text"):
-        start_time = time.time()
-        with st.spinner("Analyzing..."):
-            analysis = analyze_text_structured(text)
-            st.session_state.pdf_summary = analysis
-            text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
-            chunks = text_splitter.split_text(text)
-            embeddings = HuggingFaceInferenceAPIEmbeddings(
-                pi_key=os.getenv("HUGGINGFACE_ACCESS_TOKEN"),
-                model_name="BAAI/bge-small-en-v1.5"
-            )
-            st.session_state.vectorstore = FAISS.from_texts(chunks, embeddings)
-            st.session_state.pdf_report = create_pdf_report(analysis)
-            st.session_state.word_report = create_word_report(analysis)
-        end_time = time.time()
-        st.session_state.analysis_time = end_time - start_time
-        st.subheader("Analysis Results")
-        st.text(json_to_text(analysis))
-        st.download_button(
-            label="Download PDF Report",
-            data=st.session_state.pdf_report,
-            file_name="analysis_report.pdf",
-            mime="application/pdf"
-        )
-        st.download_button(
-            label="Download Word Report",
-            data=st.session_state.word_report,
-            file_name="analysis_report.docx",
-            mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document"
-        )
-st.markdown('</div>', unsafe_allow_html=True)
 if "vectorstore" in st.session_state:
     st.subheader("Chat with the Document")
     for message in st.session_state.messages:
         with st.chat_message(message["role"]):
             st.markdown(message["content"])
     if prompt := st.chat_input("Ask a question about the document"):
         st.session_state.messages.append({"role": "user", "content": prompt})
         with st.chat_message("user"):
             st.markdown(prompt)
         with st.chat_message("assistant"):
             with st.spinner("Thinking..."):
                 docs = st.session_state.vectorstore.similarity_search(prompt, k=3)
                 context = "\n".join([doc.page_content for doc in docs])
                 messages = [
-                    SystemMessage(content="You are a  assistant. Answer the question based on the provided document context."),
                     HumanMessage(content=f"Context: {context}\n\nQuestion: {prompt}")
                 ]
                 response = llm.invoke(messages)
                 st.markdown(response.content)
         st.session_state.messages.append({"role": "assistant", "content": response.content})
-st.markdown(f'<div class="footer">Analysis Time: {st.session_state.analysis_time:.1f}s | Powered by Google Generative AI</div>', unsafe_allow_html=True)

 from langchain.chains import LLMChain
 from pydantic import BaseModel, Field
 from typing import List
 import os
 import time
 from datetime import datetime
 from langchain_community.vectorstores import FAISS
 from langchain_text_splitters import RecursiveCharacterTextSplitter
+# ======================
+# SECRETS CONFIGURATION
+# ======================
+# Get API keys from Hugging Face Secrets
+GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
+HUGGINGFACE_ACCESS_TOKEN = os.environ.get("HUGGINGFACE_ACCESS_TOKEN")
+# Validate required secrets
+if not GOOGLE_API_KEY:
+    st.error("❌ GOOGLE_API_KEY not found. Please set it in Space Settings > Secrets.")
+    st.stop()
+if not HUGGINGFACE_ACCESS_TOKEN:
+    st.error("❌ HUGGINGFACE_ACCESS_TOKEN not found. Please set it in Space Settings > Secrets.")
+    st.stop()
+# Initialize LLM and embeddings with secrets
+llm = ChatGoogleGenerativeAI(
+    model="gemini-1.5-pro",
+    google_api_key=GOOGLE_API_KEY
+)
+embeddings = HuggingFaceInferenceAPIEmbeddings(
+    api_key=HUGGINGFACE_ACCESS_TOKEN,
+    model_name="BAAI/bge-small-en-v1.5"
+)
+# ======================
+# DOCUMENT ANALYSIS CLASSES
+# ======================
 class KeyPoint(BaseModel):
     point: str = Field(description="A key point extracted from the document.")
 class DocumentAnalysis(BaseModel):
     key_points: List[KeyPoint] = Field(description="List of key points from the document.")
     summary: Summary = Field(description="Summary of the document.")
+# ======================
+# CHAIN SETUP
+# ======================
 parser = PydanticOutputParser(pydantic_object=DocumentAnalysis)
 prompt_template = """
     input_variables=["text"],
     partial_variables={"format_instructions": parser.get_format_instructions()}
 )
 chain = LLMChain(llm=llm, prompt=prompt, output_parser=parser)
+# ======================
+# UTILITY FUNCTIONS
+# ======================
 def analyze_text_structured(text):
+    return chain.run(text=text)
 def extract_text_from_pdf(pdf_file):
     pdf_reader = PyPDF2.PdfReader(pdf_file)
+    return "".join(page.extract_text() for page in pdf_reader.pages)
 def json_to_text(analysis):
     text_output = "=== Summary ===\n" + f"{analysis.summary.summary}\n\n"
     text_output += "=== Key Points ===\n"
     for i, key_point in enumerate(analysis.key_points, start=1):
         text_output += f"{i}. {key_point.point}\n"
     return text_output
 def create_pdf_report(analysis):
     pdf = FPDF()
     pdf.add_page()
     pdf.set_font('Helvetica', '', 12)
     pdf.cell(200, 10, txt="PDF Analysis Report", ln=True, align='C')
     pdf.cell(200, 10, txt=f"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", ln=True, align='C')
+    pdf.multi_cell(0, 10, txt=json_to_text(analysis))
     return pdf.output(dest='S')
 def create_word_report(analysis):
     doc = Document()
     doc.add_heading('PDF Analysis Report', 0)
     doc.add_paragraph(f'Generated on: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}')
     doc.add_heading('Analysis', level=1)
+    doc.add_paragraph(json_to_text(analysis))
     docx_bytes = io.BytesIO()
     doc.save(docx_bytes)
     docx_bytes.seek(0)
     return docx_bytes.getvalue()
+# ======================
+# STREAMLIT UI
+# ======================
+st.set_page_config(page_title="Chat With PDF", page_icon="📄")
 def local_css():
     st.markdown("""
     <style>
     """, unsafe_allow_html=True)
 local_css()
+# Initialize session state
 if "current_file" not in st.session_state:
     st.session_state.current_file = None
 if "pdf_summary" not in st.session_state:
 if "messages" not in st.session_state:
     st.session_state.messages = []
+# UI Components
 st.markdown('<div class="main-header">', unsafe_allow_html=True)
 st.markdown('<div class="flag-stripe"></div>', unsafe_allow_html=True)
+st.title("📄 Chat With PDF")
+st.caption("Your AI-powered Document Analyzer")
 st.markdown('</div>', unsafe_allow_html=True)
+# File Uploader
+with st.container():
+    st.markdown('<div class="card animate-fadeIn">', unsafe_allow_html=True)
+    uploaded_file = st.file_uploader("Upload a PDF file", type="pdf")
+    if uploaded_file is not None:
+        if st.session_state.current_file != uploaded_file.name:
+            st.session_state.current_file = uploaded_file.name
+            st.session_state.pdf_summary = None
+            st.session_state.pdf_report = None
+            st.session_state.word_report = None
+            st.session_state.vectorstore = None
             st.session_state.messages = []
+        text = extract_text_from_pdf(uploaded_file)
+        if st.button("Analyze Text"):
+            start_time = time.time()
+            with st.spinner("Analyzing..."):
+                analysis = analyze_text_structured(text)
+                st.session_state.pdf_summary = analysis
+                text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
+                chunks = text_splitter.split_text(text)
+                st.session_state.vectorstore = FAISS.from_texts(chunks, embeddings)
+                st.session_state.pdf_report = create_pdf_report(analysis)
+                st.session_state.word_report = create_word_report(analysis)
+            st.session_state.analysis_time = time.time() - start_time
+            st.subheader("Analysis Results")
+            st.text(json_to_text(analysis))
+            col1, col2 = st.columns(2)
+            with col1:
+                st.download_button(
+                    label="Download PDF Report",
+                    data=st.session_state.pdf_report,
+                    file_name="analysis_report.pdf",
+                    mime="application/pdf"
+                )
+            with col2:
+                st.download_button(
+                    label="Download Word Report",
+                    data=st.session_state.word_report,
+                    file_name="analysis_report.docx",
+                    mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document"
+                )
+    st.markdown('</div>', unsafe_allow_html=True)
+# Chat Interface
 if "vectorstore" in st.session_state:
     st.subheader("Chat with the Document")
     for message in st.session_state.messages:
         with st.chat_message(message["role"]):
             st.markdown(message["content"])
     if prompt := st.chat_input("Ask a question about the document"):
         st.session_state.messages.append({"role": "user", "content": prompt})
         with st.chat_message("user"):
             st.markdown(prompt)
         with st.chat_message("assistant"):
             with st.spinner("Thinking..."):
                 docs = st.session_state.vectorstore.similarity_search(prompt, k=3)
                 context = "\n".join([doc.page_content for doc in docs])
                 messages = [
+                    SystemMessage(content="You are a helpful assistant. Answer the question based on the provided document context."),
                     HumanMessage(content=f"Context: {context}\n\nQuestion: {prompt}")
                 ]
                 response = llm.invoke(messages)
                 st.markdown(response.content)
         st.session_state.messages.append({"role": "assistant", "content": response.content})
+# Footer
+st.markdown(
+    f'<div class="footer">Analysis Time: {st.session_state.analysis_time:.1f}s | Powered by Google Generative AI</div>',
+    unsafe_allow_html=True
+)