Spaces:

Refat81
/

Social_Media_Data_Extractor_Chatbot

Sleeping

App Files Files Community

Refat81 commited on Oct 21

Commit

a72a934

verified ·

1 Parent(s): 069aef5

Update pages/linkedin_extractor.py

Browse files

Files changed (1) hide show

pages/linkedin_extractor.py +294 -190

pages/linkedin_extractor.py CHANGED Viewed

@@ -20,19 +20,36 @@ st.set_page_config(
 )
 def get_embeddings():
-    """Initialize HuggingFace embeddings with fallback"""
     try:
-        embeddings = HuggingFaceEmbeddings(
-            model_name="sentence-transformers/all-MiniLM-L6-v2"
-        )
-        return embeddings
     except Exception as e:
-        st.error(f"❌ Failed to load embeddings: {e}")
-        st.info("🔧 Please make sure 'sentence-transformers' is in requirements.txt")
         return None
 def get_llm():
-    """Initialize HuggingFace LLM"""
     try:
         api_key = os.getenv('HUGGINGFACEHUB_API_TOKEN')
         if not api_key:
@@ -43,40 +60,55 @@ def get_llm():
             1. Go to Space Settings → Variables and Secrets
             2. Add: HUGGINGFACEHUB_API_TOKEN = "your_hf_token_here"
             3. Restart the Space
             """)
             return None
         llm = HuggingFaceHub(
-            repo_id="google/flan-t5-large",
             huggingfacehub_api_token=api_key,
             model_kwargs={
                 "temperature": 0.7,
-                "max_length": 512,
-                "max_new_tokens": 256
             }
         )
         return llm
     except Exception as e:
-        st.error(f"❌ HuggingFace error: {e}")
         return None
 def extract_linkedin_data(url, data_type):
     """Extract data from LinkedIn URLs"""
     try:
         headers = {
-            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
         }
         st.info(f"🌐 Accessing: {url}")
-        response = requests.get(url, headers=headers, timeout=20)
         if response.status_code != 200:
-            return f"❌ Failed to access page (Status: {response.status_code})"
         soup = BeautifulSoup(response.text, 'html.parser')
         # Remove scripts and styles
-        for script in soup(["script", "style", "meta", "link"]):
             script.decompose()
         # Extract and clean text
@@ -86,75 +118,96 @@ def extract_linkedin_data(url, data_type):
         clean_text = ' '.join(chunk for chunk in chunks if chunk)
         # Extract meaningful content
-        paragraphs = [p.strip() for p in clean_text.split('.') if len(p.strip()) > 30]
         if not paragraphs:
-            return "❌ No meaningful content found. The page might require login."
-        # Structure the result
-        result = f"🔗 LINKEDIN DATA EXTRACTION\n"
-        result += "=" * 60 + "\n\n"
-        result += f"📄 URL: {url}\n"
-        result += f"📊 Type: {data_type.upper()}\n"
-        result += f"⏰ Extracted: {time.strftime('%Y-%m-%d %H:%M:%S')}\n"
-        result += f"📝 Content Blocks: {len(paragraphs)}\n"
-        result += "=" * 60 + "\n\n"
-        # Add extracted content
-        for i, content in enumerate(paragraphs[:15], 1):
-            result += f"📄 Block {i}:\n"
-            result += f"{content}\n"
-            result += "-" * 40 + "\n\n"
-        result += "=" * 60 + "\n"
-        result += f"✅ Successfully extracted {len(paragraphs)} content blocks\n"
-        result += f"📊 Total characters: {len(clean_text):,}\n"
-        return result
     except requests.exceptions.Timeout:
-        return "❌ Error: Request timed out. Please try again."
     except requests.exceptions.ConnectionError:
-        return "❌ Error: Connection failed. Please check the URL."
     except Exception as e:
-        return f"❌ Error: {str(e)}"
-def get_text_chunks(text):
-    """Split text into chunks"""
-    if not text.strip():
-        return []
     splitter = CharacterTextSplitter(
         separator="\n",
-        chunk_size=800,
-        chunk_overlap=150,
         length_function=len
     )
-    return splitter.split_text(text)
-def get_vectorstore(text_chunks):
-    """Create vector store from text chunks"""
-    if not text_chunks:
-        return None
     try:
-        documents = [Document(page_content=chunk) for chunk in text_chunks]
         embeddings = get_embeddings()
         if embeddings is None:
-            return None
         vectorstore = FAISS.from_documents(documents, embeddings)
-        return vectorstore
     except Exception as e:
-        st.error(f"❌ Vector store creation failed: {e}")
-        return None
-def get_conversation_chain(vectorstore):
-    """Create conversational chain"""
-    if vectorstore is None:
-        return None
     try:
         llm = get_llm()
         if llm is None:
@@ -168,53 +221,63 @@ def get_conversation_chain(vectorstore):
         chain = ConversationalRetrievalChain.from_llm(
             llm=llm,
-            retriever=vectorstore.as_retriever(search_kwargs={"k": 3}),
             memory=memory,
             return_source_documents=True,
             output_key="answer"
         )
         return chain
     except Exception as e:
-        st.error(f"❌ Conversation chain error: {e}")
         return None
 def clear_chat_history():
     """Clear chat history while keeping extracted data"""
     if "vectorstore" in st.session_state and st.session_state.vectorstore:
-        st.session_state.chatbot = get_conversation_chain(st.session_state.vectorstore)
         st.session_state.chat_history = []
         st.success("🔄 Chat history cleared! Starting fresh conversation.")
 def main():
     st.title("💼 LinkedIn AI Analyzer")
     if st.button("← Back to Main Dashboard"):
         st.switch_page("app.py")
-    # Check API key
-    if not os.getenv('HUGGINGFACEHUB_API_TOKEN'):
-        st.error("""
-        🔑 **HuggingFace API Key Required**
-        To enable AI features:
-        1. Go to **Space Settings** → **Variables and Secrets**
-        2. Add: `HUGGINGFACEHUB_API_TOKEN = "your_hf_token_here"`
-        3. **Restart** the Space
-        Get free API key from: https://huggingface.co/settings/tokens
-        """)
     # Initialize session state
-    if "conversation" not in st.session_state:
-        st.session_state.conversation = None
-    if "chat_history" not in st.session_state:
-        st.session_state.chat_history = []
-    if "processed" not in st.session_state:
-        st.session_state.processed = False
     if "extracted_data" not in st.session_state:
-        st.session_state.extracted_data = ""
     if "vectorstore" not in st.session_state:
         st.session_state.vectorstore = None
     if "current_url" not in st.session_state:
         st.session_state.current_url = ""
@@ -229,7 +292,7 @@ def main():
             help="Select the type of LinkedIn content"
         )
-        # URL input with examples
         url_placeholder = {
             "profile": "https://www.linkedin.com/in/username/",
             "company": "https://www.linkedin.com/company/companyname/",
@@ -243,11 +306,12 @@ def main():
         )
         # Suggested URLs
-        st.markdown("### 💡 Try These:")
         suggested_urls = {
             "Microsoft": "https://www.linkedin.com/company/microsoft/",
             "Google": "https://www.linkedin.com/company/google/",
-            "Apple": "https://www.linkedin.com/company/apple/"
         }
         for name, url in suggested_urls.items():
@@ -256,145 +320,185 @@ def main():
                 st.rerun()
         # Extract button
-        col1, col2 = st.columns(2)
-        with col1:
-            if st.button("🚀 Extract & Analyze", type="primary", use_container_width=True):
-                url_to_use = linkedin_url.strip() or st.session_state.current_url
-                if not url_to_use:
-                    st.warning("⚠️ Please enter a LinkedIn URL")
-                elif not url_to_use.startswith('https://www.linkedin.com/'):
-                    st.error("❌ Please enter a valid LinkedIn URL")
-                else:
-                    with st.spinner("🔄 Extracting data from LinkedIn..."):
-                        extracted_data = extract_linkedin_data(url_to_use, data_type)
-                        if extracted_data and not extracted_data.startswith("❌"):
-                            # Process for AI
-                            chunks = get_text_chunks(extracted_data)
-                            if chunks:
-                                vectorstore = get_vectorstore(chunks)
-                                conversation = get_conversation_chain(vectorstore)
-                                if conversation:
-                                    st.session_state.conversation = conversation
-                                    st.session_state.vectorstore = vectorstore
-                                    st.session_state.processed = True
-                                    st.session_state.extracted_data = extracted_data
-                                    st.session_state.chat_history = []
-                                    st.session_state.current_url = url_to_use
-                                    st.success(f"✅ Ready to analyze {len(chunks)} content chunks!")
-                                else:
-                                    st.error("❌ Failed to initialize AI")
-                            else:
-                                st.error("❌ No content extracted")
                         else:
-                            st.error(extracted_data)
-        with col2:
-            if st.session_state.processed:
-                if st.button("🗑️ Clear Chat", type="secondary", use_container_width=True):
-                    clear_chat_history()
-        # Display extraction info
-        if st.session_state.processed:
             st.markdown("---")
-            st.markdown("### 📊 Extraction Info")
-            st.write(f"**Type:** {data_type.title()}")
-            st.write(f"**URL:** {st.session_state.current_url[:50]}...")
-            if st.session_state.extracted_data:
-                chunks = get_text_chunks(st.session_state.extracted_data)
-                st.write(f"**Chunks:** {len(chunks)}")
-                st.write(f"**Characters:** {len(st.session_state.extracted_data):,}")
     # Main content area
-    col1, col2 = st.columns([2, 1])
     with col1:
-        st.markdown("### 💬 AI Conversation")
-        # Display chat history
-        for i, chat in enumerate(st.session_state.chat_history):
-            if chat["role"] == "user":
-                with st.chat_message("user"):
-                    st.write(chat["content"])
-            elif chat["role"] == "assistant":
-                with st.chat_message("assistant"):
-                    st.write(chat["content"])
-        # Chat input
-        if st.session_state.processed and st.session_state.conversation:
-            user_input = st.chat_input("Ask about the LinkedIn data...")
-            if user_input:
-                # Add user message
-                st.session_state.chat_history.append({"role": "user", "content": user_input})
-                with st.chat_message("user"):
-                    st.write(user_input)
-                # Generate AI response
-                with st.chat_message("assistant"):
-                    with st.spinner("🤔 Analyzing..."):
-                        try:
-                            response = st.session_state.conversation.invoke({"question": user_input})
-                            answer = response.get("answer", "I couldn't generate a response based on the available data.")
-                            st.write(answer)
-                            st.session_state.chat_history.append({"role": "assistant", "content": answer})
-                        except Exception as e:
-                            error_msg = f"❌ Error generating response: {str(e)}"
-                            st.write(error_msg)
-                            st.session_state.chat_history.append({"role": "assistant", "content": error_msg})
-        elif st.session_state.processed:
-            st.info("💬 Extract data first to start chatting with AI")
         else:
             st.info("""
             👋 **Welcome to LinkedIn AI Analyzer!**
             **To get started:**
-            1. Select content type in sidebar
             2. Enter a LinkedIn URL or click a suggested company
             3. Click "Extract & Analyze"
             4. Chat with AI about the extracted content
             **Supported URLs:**
-            - 👤 Profiles: `https://www.linkedin.com/in/username/`
-            - 🏢 Companies: `https://www.linkedin.com/company/companyname/`
-            - 📝 Posts: `https://www.linkedin.com/posts/username_postid/`
-            **Note:** Only public profiles and content are accessible.
             """)
     with col2:
-        st.markdown("### 📈 Analytics")
-        if st.session_state.processed:
-            data = st.session_state.extracted_data
-            chunks = get_text_chunks(data)
-            st.metric("Content Type", data_type.title())
-            st.metric("Content Chunks", len(chunks))
-            st.metric("Total Characters", f"{len(data):,}")
-            st.metric("Conversation Turns", len(st.session_state.chat_history) // 2)
             # Suggested questions
             if not st.session_state.chat_history:
-                st.markdown("### 💡 Suggested Questions")
                 suggestions = [
-                    "Summarize the main information",
-                    "What are the key skills or experiences mentioned?",
-                    "Tell me about the company overview",
-                    "What's the main content of this page?",
-                    "Extract important achievements"
                 ]
                 for suggestion in suggestions:
                     if st.button(suggestion, key=f"suggest_{suggestion}", use_container_width=True):
                         st.info(f"💡 Try asking: '{suggestion}'")
         else:
-            st.info("📊 Analytics will appear here after data extraction")
 if __name__ == "__main__":
     main()

 )
 def get_embeddings():
+    """Initialize embeddings with multiple fallback options"""
     try:
+        # Try multiple embedding models
+        model_options = [
+            "sentence-transformers/all-MiniLM-L6-v2",  # Default
+            "sentence-transformers/paraphrase-albert-small-v2",  # Smaller alternative
+            "sentence-transformers/all-mpnet-base-v2"  # Higher quality
+        ]
+        for model_name in model_options:
+            try:
+                embeddings = HuggingFaceEmbeddings(
+                    model_name=model_name,
+                    model_kwargs={'device': 'cpu'},
+                    encode_kwargs={'normalize_embeddings': True}
+                )
+                st.success(f"✅ Loaded embeddings: {model_name.split('/')[-1]}")
+                return embeddings
+            except Exception as e:
+                continue
+        st.error("❌ All embedding models failed to load")
+        return None
     except Exception as e:
+        st.error(f"❌ Embeddings error: {e}")
         return None
 def get_llm():
+    """Initialize Mistral 7B LLM - Best for analysis"""
     try:
         api_key = os.getenv('HUGGINGFACEHUB_API_TOKEN')
         if not api_key:
             1. Go to Space Settings → Variables and Secrets
             2. Add: HUGGINGFACEHUB_API_TOKEN = "your_hf_token_here"
             3. Restart the Space
+            Get free API key: https://huggingface.co/settings/tokens
             """)
             return None
+        # Using Mistral 7B - Best balance of quality and accessibility
         llm = HuggingFaceHub(
+            repo_id="mistralai/Mistral-7B-Instruct-v0.1",
             huggingfacehub_api_token=api_key,
             model_kwargs={
                 "temperature": 0.7,
+                "max_length": 2048,
+                "max_new_tokens": 512,
+                "top_p": 0.95,
+                "repetition_penalty": 1.1,
+                "do_sample": True
             }
         )
         return llm
     except Exception as e:
+        st.error(f"❌ AI Model error: {e}")
         return None
 def extract_linkedin_data(url, data_type):
     """Extract data from LinkedIn URLs"""
     try:
         headers = {
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
+            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
+            'Accept-Language': 'en-US,en;q=0.5',
+            'Accept-Encoding': 'gzip, deflate, br',
+            'DNT': '1',
+            'Connection': 'keep-alive',
+            'Upgrade-Insecure-Requests': '1',
         }
         st.info(f"🌐 Accessing: {url}")
+        response = requests.get(url, headers=headers, timeout=25)
         if response.status_code != 200:
+            return {
+                "error": f"Failed to access page (Status: {response.status_code})",
+                "status": "error"
+            }
         soup = BeautifulSoup(response.text, 'html.parser')
         # Remove scripts and styles
+        for script in soup(["script", "style", "meta", "link", "nav", "header", "footer"]):
             script.decompose()
         # Extract and clean text
         clean_text = ' '.join(chunk for chunk in chunks if chunk)
         # Extract meaningful content
+        paragraphs = [p.strip() for p in clean_text.split('.') if len(p.strip()) > 40]
         if not paragraphs:
+            return {
+                "error": "No meaningful content found. The page might require login or have restricted access.",
+                "status": "error"
+            }
+        # Extract page title
+        title = soup.find('title')
+        page_title = title.text.strip() if title else "LinkedIn Page"
+        # Structure the extracted data
+        extracted_data = {
+            "page_info": {
+                "title": page_title,
+                "url": url,
+                "response_code": response.status_code,
+                "content_length": len(clean_text)
+            },
+            "content_blocks": paragraphs,
+            "extraction_time": time.strftime('%Y-%m-%d %H:%M:%S'),
+            "data_type": data_type,
+            "status": "success"
+        }
+        return extracted_data
     except requests.exceptions.Timeout:
+        return {"error": "Request timed out. Please try again.", "status": "error"}
     except requests.exceptions.ConnectionError:
+        return {"error": "Connection failed. Please check the URL and try again.", "status": "error"}
     except Exception as e:
+        return {"error": f"Extraction error: {str(e)}", "status": "error"}
+def process_extracted_data(extracted_data):
+    """Process extracted data for AI analysis"""
+    if not extracted_data or extracted_data.get("status") != "success":
+        return None, []
+    page_info = extracted_data['page_info']
+    content_blocks = extracted_data['content_blocks']
+    # Structure the data for AI
+    all_text = f"LINKEDIN DATA ANALYSIS REPORT\n"
+    all_text += "=" * 70 + "\n\n"
+    all_text += f"📄 PAGE INFORMATION:\n"
+    all_text += f"Title: {page_info['title']}\n"
+    all_text += f"URL: {page_info['url']}\n"
+    all_text += f"Type: {extracted_data['data_type'].upper()}\n"
+    all_text += f"Extracted: {extracted_data['extraction_time']}\n"
+    all_text += f"Response Code: {page_info['response_code']}\n"
+    all_text += f"Content Length: {page_info['content_length']} characters\n\n"
+    all_text += f"📊 CONTENT ANALYSIS:\n"
+    all_text += f"Total Content Blocks: {len(content_blocks)}\n\n"
+    # Add content blocks
+    for i, block in enumerate(content_blocks[:20]):
+        all_text += f"--- CONTENT BLOCK {i+1} ---\n"
+        all_text += f"Words: {len(block.split())} | Characters: {len(block)}\n"
+        all_text += f"Content: {block}\n\n"
+    all_text += "=" * 70 + "\n"
+    all_text += "END OF EXTRACTION REPORT"
+    # Split into chunks
     splitter = CharacterTextSplitter(
         separator="\n",
+        chunk_size=1000,
+        chunk_overlap=200,
         length_function=len
     )
+    chunks = splitter.split_text(all_text)
+    documents = [Document(page_content=chunk) for chunk in chunks]
+    # Create vector store
     try:
         embeddings = get_embeddings()
         if embeddings is None:
+            return None, []
         vectorstore = FAISS.from_documents(documents, embeddings)
+        return vectorstore, chunks
     except Exception as e:
+        st.error(f"Vector store creation failed: {e}")
+        return None, []
+def create_chatbot(vectorstore):
+    """Create conversational chatbot with Mistral"""
     try:
         llm = get_llm()
         if llm is None:
         chain = ConversationalRetrievalChain.from_llm(
             llm=llm,
+            retriever=vectorstore.as_retriever(search_kwargs={"k": 4}),
             memory=memory,
             return_source_documents=True,
             output_key="answer"
         )
         return chain
     except Exception as e:
+        st.error(f"Failed to create chatbot: {str(e)}")
         return None
 def clear_chat_history():
     """Clear chat history while keeping extracted data"""
     if "vectorstore" in st.session_state and st.session_state.vectorstore:
+        st.session_state.chatbot = create_chatbot(st.session_state.vectorstore)
         st.session_state.chat_history = []
         st.success("🔄 Chat history cleared! Starting fresh conversation.")
+def display_metrics(extracted_data):
+    """Display extraction metrics"""
+    if not extracted_data:
+        return
+    page_info = extracted_data['page_info']
+    content_blocks = extracted_data['content_blocks']
+    col1, col2, col3, col4 = st.columns(4)
+    with col1:
+        st.metric("Content Blocks", len(content_blocks))
+    with col2:
+        total_words = sum(len(block.split()) for block in content_blocks)
+        st.metric("Total Words", total_words)
+    with col3:
+        st.metric("Characters", f"{page_info['content_length']:,}")
+    with col4:
+        st.metric("Response Code", page_info['response_code'])
 def main():
     st.title("💼 LinkedIn AI Analyzer")
     if st.button("← Back to Main Dashboard"):
         st.switch_page("app.py")
     # Initialize session state
     if "extracted_data" not in st.session_state:
+        st.session_state.extracted_data = None
     if "vectorstore" not in st.session_state:
         st.session_state.vectorstore = None
+    if "chatbot" not in st.session_state:
+        st.session_state.chatbot = None
+    if "chat_history" not in st.session_state:
+        st.session_state.chat_history = []
+    if "processing" not in st.session_state:
+        st.session_state.processing = False
     if "current_url" not in st.session_state:
         st.session_state.current_url = ""
             help="Select the type of LinkedIn content"
         )
+        # URL input
         url_placeholder = {
             "profile": "https://www.linkedin.com/in/username/",
             "company": "https://www.linkedin.com/company/companyname/",
         )
         # Suggested URLs
+        st.markdown("### 🚀 Quick Test")
         suggested_urls = {
             "Microsoft": "https://www.linkedin.com/company/microsoft/",
             "Google": "https://www.linkedin.com/company/google/",
+            "Apple": "https://www.linkedin.com/company/apple/",
+            "Amazon": "https://www.linkedin.com/company/amazon/"
         }
         for name, url in suggested_urls.items():
                 st.rerun()
         # Extract button
+        if st.button("🚀 Extract & Analyze", type="primary", use_container_width=True):
+            url_to_use = linkedin_url.strip() or st.session_state.current_url
+            if not url_to_use:
+                st.warning("⚠️ Please enter a LinkedIn URL")
+            elif not url_to_use.startswith('https://www.linkedin.com/'):
+                st.error("❌ Please enter a valid LinkedIn URL")
+            else:
+                st.session_state.processing = True
+                with st.spinner("🔄 Extracting and analyzing data..."):
+                    extracted_data = extract_linkedin_data(url_to_use, data_type)
+                    if extracted_data.get("status") == "success":
+                        st.session_state.extracted_data = extracted_data
+                        st.session_state.current_url = url_to_use
+                        # Process for AI
+                        vectorstore, chunks = process_extracted_data(extracted_data)
+                        if vectorstore:
+                            st.session_state.vectorstore = vectorstore
+                            st.session_state.chatbot = create_chatbot(vectorstore)
+                            st.session_state.chat_history = []
+                            st.success(f"✅ Successfully processed {len(chunks)} content chunks!")
+                            st.balloons()
                         else:
+                            st.error("❌ Failed to process data for AI analysis")
+                    else:
+                        error_msg = extracted_data.get("error", "Unknown error occurred")
+                        st.error(f"❌ Extraction failed: {error_msg}")
+                st.session_state.processing = False
+        # Chat management
+        if st.session_state.chatbot and st.session_state.extracted_data:
             st.markdown("---")
+            st.subheader("💬 Chat Management")
+            if st.button("🗑️ Clear Chat History", type="secondary", use_container_width=True):
+                clear_chat_history()
     # Main content area
+    col1, col2 = st.columns([1, 1])
     with col1:
+        st.markdown("### 📊 Extraction Results")
+        if st.session_state.processing:
+            st.info("🔄 Processing LinkedIn data...")
+        elif st.session_state.extracted_data:
+            data = st.session_state.extracted_data
+            page_info = data['page_info']
+            content_blocks = data['content_blocks']
+            st.success("✅ Extraction Complete")
+            # Display metrics
+            display_metrics(data)
+            # Display page info
+            st.markdown("#### 🏷️ Page Information")
+            st.write(f"**Title:** {page_info['title']}")
+            st.write(f"**URL:** {page_info['url']}")
+            st.write(f"**Data Type:** {data['data_type'].title()}")
+            st.write(f"**Content Blocks:** {len(content_blocks)}")
+            st.write(f"**Extraction Time:** {data['extraction_time']}")
+            # Display sample content
+            st.markdown("#### 📝 Sample Content")
+            for i, block in enumerate(content_blocks[:3]):
+                with st.expander(f"Content Block {i+1} ({len(block.split())} words)"):
+                    st.write(block)
+            if len(content_blocks) > 3:
+                st.info(f"📄 And {len(content_blocks) - 3} more content blocks...")
         else:
             st.info("""
             👋 **Welcome to LinkedIn AI Analyzer!**
+            **Powered by Mistral 7B AI**
             **To get started:**
+            1. Select content type
             2. Enter a LinkedIn URL or click a suggested company
             3. Click "Extract & Analyze"
             4. Chat with AI about the extracted content
             **Supported URLs:**
+            - 👤 Public Profiles
+            - 🏢 Company Pages
+            - 📝 Public Posts
+            **AI Features:**
+            - Smart content analysis
+            - Conversational chat
+            - Data insights
+            - Content summarization
             """)
     with col2:
+        st.markdown("### 💬 AI Chat Analysis")
+        if st.session_state.chatbot and st.session_state.extracted_data:
+            # Display chat history
+            for i, chat in enumerate(st.session_state.chat_history):
+                if chat["role"] == "user":
+                    st.markdown(f"**👤 You:** {chat['content']}")
+                elif chat["role"] == "assistant":
+                    st.markdown(f"**🤖 AI:** {chat['content']}")
+            # Chat input
+            user_input = st.chat_input("Ask about the LinkedIn data...")
+            if user_input:
+                # Add user message
+                st.session_state.chat_history.append({"role": "user", "content": user_input})
+                # Generate AI response
+                with st.spinner("🤔 Mistral AI is analyzing..."):
+                    try:
+                        response = st.session_state.chatbot.invoke({"question": user_input})
+                        answer = response.get("answer", "I couldn't generate a response based on the available data.")
+                        st.session_state.chat_history.append({"role": "assistant", "content": answer})
+                        st.rerun()
+                    except Exception as e:
+                        error_msg = f"❌ Error generating response: {str(e)}"
+                        st.session_state.chat_history.append({"role": "assistant", "content": error_msg})
+                        st.rerun()
             # Suggested questions
             if not st.session_state.chat_history:
+                st.markdown("#### 💡 Suggested Questions")
                 suggestions = [
+                    "Summarize the main information from this page",
+                    "What are the key highlights or achievements?",
+                    "Analyze the business or professional focus",
+                    "What insights can you extract from this content?",
+                    "Provide a comprehensive overview"
                 ]
                 for suggestion in suggestions:
                     if st.button(suggestion, key=f"suggest_{suggestion}", use_container_width=True):
                         st.info(f"💡 Try asking: '{suggestion}'")
+        elif st.session_state.extracted_data:
+            st.info("💬 Start a conversation with the AI assistant")
         else:
+            st.info("🔍 Extract LinkedIn data to enable AI analysis")
+    # Features section
+    st.markdown("---")
+    st.markdown("### 🚀 Powered by Mistral 7B AI")
+    feature_cols = st.columns(3)
+    with feature_cols[0]:
+        st.markdown("""
+        **🤖 Advanced AI**
+        - Mistral 7B Instruct model
+        - Intelligent text analysis
+        - Contextual understanding
+        """)
+    with feature_cols[1]:
+        st.markdown("""
+        **💬 Smart Chat**
+        - Conversational memory
+        - Relevant responses
+        - Data-driven insights
+        """)
+    with feature_cols[2]:
+        st.markdown("""
+        **🔍 Deep Analysis**
+        - Content summarization
+        - Pattern recognition
+        - Professional insights
+        """)
 if __name__ == "__main__":
     main()