Spaces:

Refat81
/

Social_Media_Data_Extractor_Chatbot

Sleeping

App Files Files Community

Refat81 commited on Oct 21

Commit

46566cb

verified ·

1 Parent(s): 044142b

Upload 5 files

Browse files

Files changed (5) hide show

facebook.py +858 -0
let_deploy.py +453 -0
linkdin_deploy.py +246 -0
main_dashboard.py +238 -0
requirements.txt +15 -3

facebook.py ADDED Viewed

	@@ -0,0 +1,858 @@

+import streamlit as st
+import time
+from bs4 import BeautifulSoup
+from langchain.text_splitter import CharacterTextSplitter
+from langchain.embeddings import SentenceTransformerEmbeddings
+from langchain.vectorstores import FAISS
+from langchain.memory import ConversationBufferMemory
+from langchain.chains import ConversationalRetrievalChain
+from langchain.schema import Document
+from selenium import webdriver
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.support import expected_conditions as EC
+from selenium.webdriver.chrome.options import Options
+from langchain_community.llms.ollama import Ollama
+import re
+import requests
+import subprocess
+import os
+import json
+from datetime import datetime
+from typing import List
+import logging
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+class FacebookGroupExtractor:
+    def __init__(self):
+        self.driver = None
+        self.wait = None
+        self.is_logged_in = False
+    def setup_driver(self):
+        """Setup Chrome driver for manual login"""
+        chrome_options = Options()
+        chrome_options.add_argument("--start-maximized")
+        chrome_options.add_argument("--disable-gpu")
+        chrome_options.add_argument("--no-sandbox")
+        chrome_options.add_argument("--disable-dev-shm-usage")
+        chrome_options.add_argument("--disable-blink-features=AutomationControlled")
+        chrome_options.add_argument("--disable-extensions")
+        chrome_options.add_argument("--disable-infobars")
+        chrome_options.add_argument("--disable-popup-blocking")
+        chrome_options.add_argument("--ignore-certificate-errors")
+        chrome_options.add_argument("--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36")
+        try:
+            self.driver = webdriver.Chrome(options=chrome_options)
+            self.wait = WebDriverWait(self.driver, 25)
+            return True
+        except Exception as e:
+            st.error(f"Failed to setup driver: {str(e)}")
+            return False
+    def manual_login(self):
+        """Open Facebook for manual login"""
+        try:
+            st.info("🔓 Opening Facebook for manual login...")
+            self.driver.get("https://www.facebook.com")
+            time.sleep(3)
+            # Handle cookies
+            self._handle_cookies()
+            st.success("✅ Facebook opened successfully!")
+            st.info("""
+            **Please manually login to Facebook:**
+            1. Enter your email/phone and password
+            2. Complete any security checks if needed
+            3. Wait until you're fully logged in
+            4. Return to this app and click 'I'm Logged In'
+            """)
+            return True
+        except Exception as e:
+            st.error(f"Failed to open Facebook: {str(e)}")
+            return False
+    def check_login_status(self):
+        """Check if user is logged in"""
+        try:
+            # Check for login indicators
+            login_indicators = [
+                "//a[@aria-label='Profile']",
+                "//div[@aria-label='Account']",
+                "//span[contains(text(), 'Menu')]",
+                "//div[contains(@aria-label, 'Facebook')]"
+            ]
+            for indicator in login_indicators:
+                try:
+                    element = self.driver.find_element(By.XPATH, indicator)
+                    if element.is_displayed():
+                        self.is_logged_in = True
+                        return True
+                except:
+                    continue
+            # Check URL for login success
+            current_url = self.driver.current_url
+            if "facebook.com/home" in current_url or "facebook.com/?sk" in current_url:
+                self.is_logged_in = True
+                return True
+            return False
+        except Exception as e:
+            logger.error(f"Login check error: {str(e)}")
+            return False
+    def extract_group_data(self, group_url: str, max_scrolls: int = 10) -> dict:
+        """Extract data from Facebook group after manual login"""
+        try:
+            if not self.is_logged_in:
+                return {"error": "Not logged in. Please login first.", "status": "error"}
+            st.info(f"🌐 Accessing group: {group_url}")
+            # Clean the URL
+            if '?' in group_url:
+                group_url = group_url.split('?')[0]
+            self.driver.get(group_url)
+            time.sleep(5)
+            # Check if we have access to the group
+            if not self._verify_group_access():
+                return {"error": "Cannot access group. Check if URL is correct and you have permissions.", "status": "error"}
+            # Extract group info
+            group_info = self._extract_group_info()
+            # Scroll and extract posts
+            posts_data = self._scroll_and_extract_posts(max_scrolls)
+            return {
+                "group_info": group_info,
+                "posts": posts_data,
+                "extraction_time": datetime.now().isoformat(),
+                "total_posts": len(posts_data),
+                "status": "success"
+            }
+        except Exception as e:
+            logger.error(f"Extraction error: {str(e)}")
+            return {"error": f"Extraction failed: {str(e)}", "status": "error"}
+    def _handle_cookies(self):
+        """Handle cookie consent"""
+        try:
+            cookie_selectors = [
+                "button[data-testid='cookie-policy-manage-dialog-accept-button']",
+                "button[data-cookiebanner='accept_button']",
+                "button[title*='cookie' i]",
+                "button[title*='allow' i]",
+                "//button[contains(., 'Allow')]",
+                "//button[contains(., 'Accept')]"
+            ]
+            for selector in cookie_selectors:
+                try:
+                    if selector.startswith("//"):
+                        element = self.driver.find_element(By.XPATH, selector)
+                    else:
+                        element = self.driver.find_element(By.CSS_SELECTOR, selector)
+                    element.click()
+                    time.sleep(2)
+                    break
+                except:
+                    continue
+        except:
+            pass
+    def _verify_group_access(self) -> bool:
+        """Verify we can access the group"""
+        try:
+            # Check for group-specific elements
+            group_indicators = [
+                "//div[contains(@data-pagelet, 'Group')]",
+                "//div[contains(@aria-label, 'Group')]",
+                "//h1[contains(., 'Group')]",
+                "//div[@role='main']"
+            ]
+            for indicator in group_indicators:
+                try:
+                    element = self.driver.find_element(By.XPATH, indicator)
+                    if element.is_displayed():
+                        return True
+                except:
+                    continue
+            # Check for access denied messages
+            denied_indicators = [
+                "//*[contains(text(), 'content isn't available')]",
+                "//*[contains(text(), 'not available')]",
+                "//*[contains(text(), 'access')]",
+                "//*[contains(text(), 'permission')]"
+            ]
+            page_text = self.driver.page_source.lower()
+            if any(indicator in page_text for indicator in ['not available', 'content unavailable', 'access denied']):
+                return False
+            return "groups" in self.driver.current_url
+        except:
+            return False
+    def _extract_group_info(self) -> dict:
+        """Extract group information"""
+        group_info = {}
+        try:
+            # Get group name
+            name_selectors = [
+                "//h1",
+                "//div[contains(@class, 'groupName')]",
+                "//span[contains(@class, 'groupName')]",
+                "//title"
+            ]
+            for selector in name_selectors:
+                try:
+                    element = self.driver.find_element(By.XPATH, selector)
+                    name = element.text.strip()
+                    if name and len(name) > 3:
+                        group_info["name"] = name
+                        break
+                except:
+                    continue
+            # Get member count
+            member_selectors = [
+                "//*[contains(text(), 'members')]",
+                "//*[contains(text(), 'Members')]",
+                "//div[contains(@class, 'memberCount')]"
+            ]
+            for selector in member_selectors:
+                try:
+                    element = self.driver.find_element(By.XPATH, selector)
+                    member_text = element.text
+                    if 'members' in member_text.lower():
+                        group_info["member_count"] = member_text
+                        break
+                except:
+                    continue
+            # Get group description
+            desc_selectors = [
+                "//div[contains(@class, 'description')]",
+                "//div[contains(@class, 'about')]",
+                "//div[contains(@data-ad-comet-preview, 'message')]"
+            ]
+            for selector in desc_selectors:
+                try:
+                    element = self.driver.find_element(By.XPATH, selector)
+                    desc = element.text.strip()
+                    if desc:
+                        group_info["description"] = desc
+                        break
+                except:
+                    continue
+        except Exception as e:
+            logger.warning(f"Group info extraction failed: {str(e)}")
+        return group_info
+    def _scroll_and_extract_posts(self, max_scrolls: int) -> List[dict]:
+        """Scroll and extract posts with multiple strategies"""
+        all_posts = []
+        last_height = self.driver.execute_script("return document.body.scrollHeight")
+        for scroll_iteration in range(max_scrolls):
+            st.info(f"📜 Scrolling... ({scroll_iteration + 1}/{max_scrolls})")
+            # Extract posts from current view
+            current_posts = self._extract_posts_from_current_page()
+            # Add new posts
+            for post in current_posts:
+                if not self._is_duplicate_post(post, all_posts):
+                    all_posts.append(post)
+            # Scroll down
+            self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
+            time.sleep(4)
+            # Check if we've reached the end
+            new_height = self.driver.execute_script("return document.body.scrollHeight")
+            if new_height == last_height:
+                st.success("✅ Reached end of content")
+                break
+            last_height = new_height
+        return all_posts
+    def _extract_posts_from_current_page(self) -> List[dict]:
+        """Extract posts using multiple strategies"""
+        posts = []
+        # Strategy 1: Look for article elements (main posts)
+        posts.extend(self._extract_by_xpath("//div[@role='article']", "article"))
+        # Strategy 2: Look for story elements
+        posts.extend(self._extract_by_xpath("//div[contains(@data-pagelet, 'Feed')]//div", "feed"))
+        # Strategy 3: Look for user content
+        posts.extend(self._extract_by_xpath("//div[contains(@class, 'userContent')]", "userContent"))
+        # Strategy 4: Look for posts with substantial text
+        posts.extend(self._extract_text_rich_elements())
+        return posts
+    def _extract_by_xpath(self, xpath: str, source: str) -> List[dict]:
+        """Extract posts using XPath selector"""
+        posts = []
+        try:
+            elements = self.driver.find_elements(By.XPATH, xpath)
+            for i, element in enumerate(elements):
+                try:
+                    # Get the entire post text
+                    post_text = element.text.strip()
+                    if self._is_valid_post(post_text):
+                        # Try to get more structured data
+                        post_data = self._parse_structured_post(element, post_text, source)
+                        posts.append(post_data)
+                except Exception as e:
+                    logger.debug(f"Error extracting element {i}: {str(e)}")
+                    continue
+        except Exception as e:
+            logger.warning(f"XPath {source} failed: {str(e)}")
+        return posts
+    def _extract_text_rich_elements(self) -> List[dict]:
+        """Extract elements with substantial text content"""
+        posts = []
+        try:
+            # Look for divs with substantial text
+            elements = self.driver.find_elements(By.XPATH, "//div[string-length(text()) > 100]")
+            for element in elements:
+                try:
+                    text = element.text.strip()
+                    if self._is_valid_post(text):
+                        posts.append({
+                            "content": text,
+                            "source": "text_rich",
+                            "timestamp": datetime.now().isoformat(),
+                            "has_comments": "comment" in text.lower()[:200]
+                        })
+                except:
+                    continue
+        except Exception as e:
+            logger.warning(f"Text-rich extraction failed: {str(e)}")
+        return posts
+    def _parse_structured_post(self, element, text: str, source: str) -> dict:
+        """Parse post with structured data"""
+        post_data = {
+            "content": text,
+            "source": source,
+            "timestamp": datetime.now().isoformat(),
+            "has_comments": False,
+            "reactions": 0
+        }
+        try:
+            # Check for comments
+            comment_indicators = [
+                "//*[contains(text(), 'comment')]",
+                "//*[contains(text(), 'Comment')]"
+            ]
+            for indicator in comment_indicators:
+                try:
+                    comments = element.find_elements(By.XPATH, indicator)
+                    if comments:
+                        post_data["has_comments"] = True
+                        break
+                except:
+                    continue
+            # Check for reactions
+            reaction_indicators = [
+                "//*[contains(text(), 'Like')]",
+                "//*[contains(text(), 'Reaction')]"
+            ]
+            # Try to extract reaction count
+            reaction_text = text.lower()
+            if 'like' in reaction_text or 'reaction' in reaction_text:
+                # Simple regex to find numbers near reaction words
+                reaction_match = re.search(r'(\d+)\s*(like|reaction)', reaction_text)
+                if reaction_match:
+                    post_data["reactions"] = int(reaction_match.group(1))
+        except Exception as e:
+            logger.debug(f"Structured parsing failed: {str(e)}")
+        return post_data
+    def _is_valid_post(self, text: str) -> bool:
+        """Check if text is a valid post"""
+        if not text or len(text) < 50:
+            return False
+        # Exclude navigation and UI text
+        excluded_phrases = [
+            'facebook', 'login', 'sign up', 'password', 'email',
+            'cookie', 'privacy', 'terms', 'menu', 'navigation',
+            'home', 'search', 'notification', 'messenger', 'watch',
+            'marketplace', 'groups', 'pages', 'events'
+        ]
+        text_lower = text.lower()
+        if any(phrase in text_lower for phrase in excluded_phrases):
+            return False
+        # Check for reasonable word count
+        words = text.split()
+        if len(words) < 8:
+            return False
+        return True
+    def _is_duplicate_post(self, new_post: dict, existing_posts: List[dict]) -> bool:
+        """Check if post is duplicate"""
+        new_content = new_post.get("content", "")[:150]
+        for existing_post in existing_posts:
+            existing_content = existing_post.get("content", "")[:150]
+            similarity = self._calculate_similarity(new_content, existing_content)
+            if similarity > 0.8:  # 80% similarity
+                return True
+        return False
+    def _calculate_similarity(self, text1: str, text2: str) -> float:
+        """Calculate simple text similarity"""
+        words1 = set(text1.lower().split())
+        words2 = set(text2.lower().split())
+        if not words1 or not words2:
+            return 0.0
+        intersection = words1.intersection(words2)
+        union = words1.union(words2)
+        return len(intersection) / len(union) if union else 0.0
+    def close(self):
+        """Close the browser"""
+        if self.driver:
+            self.driver.quit()
+def check_ollama_running():
+    """Check if Ollama is running"""
+    try:
+        response = requests.get("http://localhost:11434/api/tags", timeout=5)
+        return response.status_code == 200
+    except:
+        return False
+def start_ollama():
+    """Start Ollama service"""
+    try:
+        if os.name == 'nt':  # Windows
+            subprocess.Popen(['ollama', 'serve'], creationflags=subprocess.CREATE_NO_WINDOW)
+        else:  # Linux/Mac
+            subprocess.Popen(['ollama', 'serve'], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+        time.sleep(5)
+        return check_ollama_running()
+    except Exception as e:
+        st.error(f"Failed to start Ollama: {e}")
+        return False
+def get_available_models():
+    """Get list of available Ollama models"""
+    try:
+        response = requests.get("http://localhost:11434/api/tags", timeout=5)
+        if response.status_code == 200:
+            models = response.json().get('models', [])
+            return [model['name'] for model in models]
+    except:
+        return ["llama2", "mistral", "gemma", "llama3"]
+def process_group_data(group_data: dict):
+    """Process extracted group data for chatbot"""
+    if not group_data or "posts" not in group_data or not group_data["posts"]:
+        return None, []
+    # Combine all posts into a single text
+    all_text = f"Group: {group_data.get('group_info', {}).get('name', 'Unknown')}\n\n"
+    all_text += f"Total Posts Extracted: {len(group_data['posts'])}\n\n"
+    for i, post in enumerate(group_data["posts"]):
+        content = post.get("content", "")
+        source = post.get("source", "unknown")
+        has_comments = post.get("has_comments", False)
+        reactions = post.get("reactions", 0)
+        all_text += f"--- Post {i+1} ---\n"
+        all_text += f"Source: {source}\n"
+        all_text += f"Reactions: {reactions}\n"
+        all_text += f"Has Comments: {has_comments}\n"
+        all_text += f"Content: {content}\n\n"
+    # Split into chunks
+    splitter = CharacterTextSplitter(
+        separator="\n",
+        chunk_size=1000,
+        chunk_overlap=200,
+        length_function=len
+    )
+    chunks = splitter.split_text(all_text)
+    documents = [Document(page_content=chunk) for chunk in chunks]
+    # Create vector store
+    embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
+    vectorstore = FAISS.from_documents(documents, embeddings)
+    return vectorstore, chunks
+def create_chatbot(vectorstore, model_name: str):
+    """Create conversational chatbot"""
+    try:
+        llm = Ollama(
+            model=model_name,
+            base_url="http://localhost:11434",
+            temperature=0.7,
+            top_k=40,
+            top_p=0.9,
+            num_predict=512
+        )
+        memory = ConversationBufferMemory(
+            memory_key="chat_history",
+            return_messages=True,
+            output_key="answer"
+        )
+        chain = ConversationalRetrievalChain.from_llm(
+            llm=llm,
+            retriever=vectorstore.as_retriever(search_kwargs={"k": 3}),
+            memory=memory,
+            return_source_documents=True,
+            output_key="answer"
+        )
+        return chain
+    except Exception as e:
+        st.error(f"Failed to create chatbot: {str(e)}")
+        return None
+def clear_chat_history():
+    """Clear chat history and recreate chatbot with fresh memory"""
+    if "vectorstore" in st.session_state and st.session_state.vectorstore:
+        # Recreate chatbot with fresh memory
+        model_name = st.session_state.get("current_model", "llama2")
+        st.session_state.chatbot = create_chatbot(st.session_state.vectorstore, model_name)
+        st.session_state.chat_history = []
+        st.success("🔄 Chat history cleared! You can now ask questions with a fresh conversation.")
+    else:
+        st.error("❌ No extracted data found. Please extract group data first.")
+def main():
+    st.set_page_config(
+        page_title="Facebook Group Analyzer with Manual Login",
+        page_icon="📘",
+        layout="wide"
+    )
+    st.title("📘 Facebook Group Data Extractor & Chatbot")
+    st.markdown("Manual login required for private groups - Works with both public and private groups")
+    # Initialize session state
+    if "extractor" not in st.session_state:
+        st.session_state.extractor = None
+    if "login_status" not in st.session_state:
+        st.session_state.login_status = "not_started"  # not_started, in_progress, completed, failed
+    if "group_data" not in st.session_state:
+        st.session_state.group_data = None
+    if "vectorstore" not in st.session_state:
+        st.session_state.vectorstore = None
+    if "chatbot" not in st.session_state:
+        st.session_state.chatbot = None
+    if "chat_history" not in st.session_state:
+        st.session_state.chat_history = []
+    if "current_model" not in st.session_state:
+        st.session_state.current_model = "llama2"
+    # Sidebar
+    with st.sidebar:
+        st.header("🔧 Configuration")
+        # Ollama status
+        st.subheader("🤖 Ollama Status")
+        if check_ollama_running():
+            st.success("✅ Ollama is running")
+        else:
+            st.error("❌ Ollama is not running")
+            if st.button("🔄 Start Ollama"):
+                if start_ollama():
+                    st.success("✅ Ollama started successfully")
+                    st.rerun()
+                else:
+                    st.error("❌ Failed to start Ollama")
+        # Model selection
+        available_models = get_available_models()
+        model_name = st.selectbox(
+            "Select AI Model",
+            available_models,
+            index=0 if available_models else 0,
+            key="model_selector"
+        )
+        # Store current model
+        st.session_state.current_model = model_name
+        # Login section
+        st.subheader("🔐 Facebook Login")
+        if st.session_state.login_status == "not_started":
+            if st.button("🚪 Start Manual Login", type="primary", use_container_width=True):
+                extractor = FacebookGroupExtractor()
+                if extractor.setup_driver():
+                    st.session_state.extractor = extractor
+                    if extractor.manual_login():
+                        st.session_state.login_status = "in_progress"
+                        st.rerun()
+        elif st.session_state.login_status == "in_progress":
+            st.info("🔄 Login in progress...")
+            col1, col2 = st.columns(2)
+            with col1:
+                if st.button("✅ I'm Logged In", type="primary"):
+                    if st.session_state.extractor and st.session_state.extractor.check_login_status():
+                        st.session_state.login_status = "completed"
+                        st.success("✅ Login successful!")
+                        st.rerun()
+                    else:
+                        st.error("❌ Login not detected. Please make sure you're logged in.")
+            with col2:
+                if st.button("❌ Cancel Login"):
+                    if st.session_state.extractor:
+                        st.session_state.extractor.close()
+                    st.session_state.login_status = "not_started"
+                    st.rerun()
+        elif st.session_state.login_status == "completed":
+            st.success("✅ Logged in to Facebook")
+            if st.button("🚪 Logout & Restart"):
+                if st.session_state.extractor:
+                    st.session_state.extractor.close()
+                st.session_state.login_status = "not_started"
+                st.session_state.group_data = None
+                st.session_state.vectorstore = None
+                st.session_state.chatbot = None
+                st.session_state.chat_history = []
+                st.rerun()
+        # Group extraction section
+        st.subheader("📝 Group Information")
+        group_url = st.text_input(
+            "Facebook Group URL",
+            placeholder="https://www.facebook.com/groups/groupname/",
+            help="Works with both public and private groups"
+        )
+        # Extraction settings
+        st.subheader("⚙️ Extraction Settings")
+        max_scrolls = st.slider("Number of scrolls", 5, 20, 10)
+        if st.button("🚀 Extract Group Data", type="primary", use_container_width=True):
+            if st.session_state.login_status != "completed":
+                st.error("❌ Please login to Facebook first")
+            elif not group_url or "facebook.com/groups/" not in group_url:
+                st.error("❌ Please enter a valid Facebook group URL")
+            elif not check_ollama_running():
+                st.error("❌ Ollama is not running")
+            else:
+                with st.spinner("🌐 Extracting group data... This may take a few minutes."):
+                    group_data = st.session_state.extractor.extract_group_data(group_url, max_scrolls)
+                    if group_data.get("status") == "success" and group_data.get("posts"):
+                        st.session_state.group_data = group_data
+                        # Process for chatbot
+                        vectorstore, chunks = process_group_data(group_data)
+                        if vectorstore:
+                            st.session_state.vectorstore = vectorstore
+                            st.session_state.chatbot = create_chatbot(vectorstore, model_name)
+                            st.session_state.chat_history = []
+                            st.success(f"✅ Successfully extracted {len(group_data['posts'])} posts!")
+                        else:
+                            st.error("❌ Failed to process group data")
+                    else:
+                        error_msg = group_data.get("error", "Unknown error")
+                        st.error(f"❌ Extraction failed: {error_msg}")
+        # Chat management section
+        if st.session_state.chatbot and st.session_state.group_data:
+            st.subheader("💬 Chat Management")
+            if st.button("🗑️ Clear Chat History", type="secondary", use_container_width=True):
+                clear_chat_history()
+                st.rerun()
+    # Main content area
+    col1, col2 = st.columns([1, 1])
+    with col1:
+        st.header("📊 Login & Extraction Status")
+        if st.session_state.login_status == "not_started":
+            st.info("""
+            ## 🔐 Manual Login Required
+            **How it works:**
+            1. Click 'Start Manual Login' in the sidebar
+            2. A browser window will open with Facebook
+            3. **Manually login** to your Facebook account
+            4. Complete any security checks if needed
+            5. Return here and click 'I'm Logged In'
+            **Benefits:**
+            - Works with both public and private groups
+            - No need to enter password in this app
+            - Handles 2FA and security checks
+            - More reliable than automated login
+            """)
+        elif st.session_state.login_status == "in_progress":
+            st.warning("""
+            ## 🔄 Login in Progress
+            **Please complete these steps:**
+            1. ✅ Browser window should be open with Facebook
+            2. 🔄 **Manually login** to your Facebook account
+            3. ✅ Wait until you see your Facebook home page
+            4. 🔄 Return here and click **'I'm Logged In'**
+            **Troubleshooting:**
+            - If browser didn't open, check popup blockers
+            - Make sure you're fully logged into Facebook
+            - If you see security checks, complete them first
+            """)
+        elif st.session_state.login_status == "completed":
+            st.success("""
+            ## ✅ Login Successful!
+            You can now:
+            1. Enter a Facebook group URL in the sidebar
+            2. Adjust extraction settings
+            3. Click 'Extract Group Data'
+            4. Chat with the extracted content
+            """)
+            if st.session_state.group_data:
+                group_info = st.session_state.group_data.get("group_info", {})
+                posts = st.session_state.group_data.get("posts", [])
+                st.subheader("🏷️ Group Information")
+                if group_info:
+                    for key, value in group_info.items():
+                        if value:
+                            st.write(f"**{key.replace('_', ' ').title()}:** {value}")
+                st.subheader(f"📝 Posts Extracted: {len(posts)}")
+                for i, post in enumerate(posts[:3]):
+                    with st.expander(f"Post {i+1}"):
+                        content = post.get("content", "")
+                        st.text_area(f"Content {i+1}", content, height=150, key=f"post_{i}")
+                        st.caption(f"Source: {post.get('source', 'unknown')} | Reactions: {post.get('reactions', 0)}")
+    with col2:
+        st.header("💬 Chat with Group Data")
+        # Chat management button at the top
+        if st.session_state.chatbot and st.session_state.group_data:
+            col_clear, col_info = st.columns([1, 3])
+            with col_clear:
+                if st.button("🗑️ Clear History", key="clear_top"):
+                    clear_chat_history()
+                    st.rerun()
+            with col_info:
+                st.caption("Clear conversation history while keeping extracted data")
+        if st.session_state.chatbot and st.session_state.group_data:
+            # Display chat history
+            for i, chat in enumerate(st.session_state.chat_history):
+                with st.chat_message("user"):
+                    st.write(chat["question"])
+                with st.chat_message("assistant"):
+                    st.write(chat["answer"])
+            # Chat input
+            user_question = st.chat_input("Ask about the group content...")
+            if user_question:
+                with st.chat_message("user"):
+                    st.write(user_question)
+                with st.chat_message("assistant"):
+                    with st.spinner("🤔 Analyzing..."):
+                        try:
+                            response = st.session_state.chatbot.invoke({"question": user_question})
+                            answer = response.get("answer", "I couldn't generate a response.")
+                            st.write(answer)
+                            st.session_state.chat_history.append({
+                                "question": user_question,
+                                "answer": answer
+                            })
+                        except Exception as e:
+                            error_msg = f"Error: {str(e)}"
+                            st.error(error_msg)
+            if not st.session_state.chat_history:
+                st.subheader("💡 Suggested Questions")
+                suggestions = [
+                    "What are the main topics discussed in this group?",
+                    "Summarize the most active discussions",
+                    "What kind of content gets the most engagement?",
+                    "Are there any common questions or problems?",
+                    "What's the overall tone of the group?"
+                ]
+                for suggestion in suggestions:
+                    if st.button(suggestion, key=suggestion):
+                        st.info(f"Type: '{suggestion}' in the chat input above")
+        elif st.session_state.login_status == "completed":
+            st.info("📊 Extract group data first to start chatting")
+        else:
+            st.info("🔐 Login to Facebook to get started")
+if __name__ == "__main__":
+    main()

let_deploy.py ADDED Viewed

	@@ -0,0 +1,453 @@

+# let_deploy.py
+import streamlit as st
+import time
+from bs4 import BeautifulSoup
+from langchain_text_splitters import CharacterTextSplitter
+from langchain_community.embeddings import HuggingFaceEmbeddings
+from langchain.vectorstores import FAISS
+from langchain.memory import ConversationBufferMemory
+from langchain.chains import ConversationalRetrievalChain
+from langchain.schema import Document
+from selenium import webdriver
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.support import expected_conditions as EC
+from selenium.webdriver.chrome.options import Options
+from selenium.webdriver.chrome.service import Service
+from webdriver_manager.chrome import ChromeDriverManager
+from langchain_community.llms import HuggingFaceHub
+import re
+import requests
+import os
+from datetime import datetime
+from typing import List
+import logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+st.set_page_config(page_title="Facebook Extractor 2.0", page_icon="📘", layout="wide")
+st.markdown("""
+<style>
+    .stApp { background-color: #0e1117; color: white; }
+    .main-header { background: linear-gradient(135deg, #FF6B35, #FF8E53); color: white; padding: 1.5rem; border-radius: 8px; margin-bottom: 1.5rem; text-align: center; }
+    .stButton>button { background-color: #1877F2; color: white; border: none; border-radius: 4px; padding: 8px 16px; width: 100%; }
+</style>
+""", unsafe_allow_html=True)
+def get_embeddings():
+    try:
+        embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
+        return embeddings
+    except Exception as e:
+        st.error(f"❌ Failed to load embeddings: {e}")
+        return None
+def get_llm():
+    api_key = st.session_state.get('hf_api_key')
+    if not api_key:
+        st.error("❌ HuggingFace API Key not found")
+        return None
+    try:
+        llm = HuggingFaceHub(
+            repo_id="google/flan-t5-large",
+            huggingfacehub_api_token=api_key,
+            model_kwargs={"temperature": 0.7, "max_length": 512}
+        )
+        return llm
+    except Exception as e:
+        st.error(f"❌ HuggingFace error: {e}")
+        return None
+class FacebookGroupExtractor:
+    def __init__(self):
+        self.driver = None
+        self.wait = None
+        self.is_logged_in = False
+    def setup_driver(self):
+        try:
+            chrome_options = Options()
+            chrome_options.add_argument("--no-sandbox")
+            chrome_options.add_argument("--disable-dev-shm-usage")
+            chrome_options.add_argument("--disable-gpu")
+            chrome_options.add_argument("--start-maximized")
+            chrome_options.add_argument("--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36")
+            st.info("🔄 Setting up Chrome browser...")
+            try:
+                service = Service(ChromeDriverManager().install())
+                self.driver = webdriver.Chrome(service=service, options=chrome_options)
+            except Exception as e:
+                self.driver = webdriver.Chrome(options=chrome_options)
+            self.driver.set_page_load_timeout(30)
+            self.wait = WebDriverWait(self.driver, 25)
+            st.success("✅ Chrome browser setup completed!")
+            return True
+        except Exception as e:
+            st.error(f"❌ Failed to setup Chrome: {str(e)}")
+            return False
+    def manual_login(self):
+        try:
+            st.info("🔓 Opening Facebook for manual login...")
+            self.driver.get("https://www.facebook.com")
+            time.sleep(3)
+            self.wait.until(EC.presence_of_element_located((By.TAG_NAME, "body")))
+            st.success("✅ Facebook opened successfully!")
+            st.info("""
+            **📝 Manual Login Instructions:**
+            1. Browser window opened with Facebook
+            2. Manually login to your account
+            3. Complete any security checks
+            4. Return here and click 'I'm Logged In'
+            """)
+            return True
+        except Exception as e:
+            st.error(f"❌ Failed to open Facebook: {str(e)}")
+            return False
+    def check_login_status(self):
+        try:
+            current_url = self.driver.current_url.lower()
+            login_success_urls = ["facebook.com/home", "facebook.com/groups", "facebook.com/marketplace"]
+            if any(url in current_url for url in login_success_urls):
+                self.is_logged_in = True
+                return True
+            login_indicators = ["//a[@aria-label='Profile']", "//div[@aria-label='Account']", "//span[contains(text(), 'Menu')]"]
+            for indicator in login_indicators:
+                try:
+                    elements = self.driver.find_elements(By.XPATH, indicator)
+                    for element in elements:
+                        if element.is_displayed():
+                            self.is_logged_in = True
+                            return True
+                except:
+                    continue
+            return False
+        except Exception as e:
+            logger.error(f"Login check error: {str(e)}")
+            return False
+    def extract_group_data(self, group_url: str, max_scrolls: int = 10) -> dict:
+        try:
+            if not self.is_logged_in:
+                return {"error": "Not logged in. Please login first.", "status": "error"}
+            st.info(f"🌐 Accessing group: {group_url}")
+            self.driver.get(group_url)
+            time.sleep(5)
+            # Extract group info
+            group_info = self._extract_group_info()
+            posts_data = self._scroll_and_extract_posts(max_scrolls)
+            return {
+                "group_info": group_info,
+                "posts": posts_data,
+                "extraction_time": datetime.now().isoformat(),
+                "total_posts": len(posts_data),
+                "status": "success"
+            }
+        except Exception as e:
+            logger.error(f"Extraction error: {str(e)}")
+            return {"error": f"Extraction failed: {str(e)}", "status": "error"}
+    def _extract_group_info(self) -> dict:
+        group_info = {}
+        try:
+            name_selectors = ["//h1", "//h2", "//h3", "//title"]
+            for selector in name_selectors:
+                try:
+                    elements = self.driver.find_elements(By.XPATH, selector)
+                    for element in elements:
+                        name = element.text.strip()
+                        if name and len(name) > 3:
+                            group_info["name"] = name
+                            break
+                    if "name" in group_info:
+                        break
+                except:
+                    continue
+        except Exception as e:
+            logger.warning(f"Group info extraction failed: {str(e)}")
+        return group_info
+    def _scroll_and_extract_posts(self, max_scrolls: int) -> List[dict]:
+        all_posts = []
+        last_height = self.driver.execute_script("return document.body.scrollHeight")
+        for scroll_iteration in range(max_scrolls):
+            current_posts = self._extract_posts_from_current_page()
+            for post in current_posts:
+                if not self._is_duplicate_post(post, all_posts):
+                    all_posts.append(post)
+            self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
+            time.sleep(3)
+            new_height = self.driver.execute_script("return document.body.scrollHeight")
+            if new_height == last_height:
+                break
+            last_height = new_height
+        return all_posts
+    def _extract_posts_from_current_page(self) -> List[dict]:
+        posts = []
+        strategies = [
+            ("//div[@role='article']", "article"),
+            ("//div[contains(@data-pagelet, 'Feed')]//div", "feed"),
+            ("//div[contains(@class, 'userContent')]", "userContent")
+        ]
+        for xpath, source in strategies:
+            posts.extend(self._extract_by_xpath(xpath, source))
+        return posts
+    def _extract_by_xpath(self, xpath: str, source: str) -> List[dict]:
+        posts = []
+        try:
+            elements = self.driver.find_elements(By.XPATH, xpath)
+            for element in elements:
+                try:
+                    post_text = element.text.strip()
+                    if self._is_valid_post(post_text):
+                        post_data = {
+                            "content": post_text,
+                            "source": source,
+                            "timestamp": datetime.now().isoformat(),
+                            "has_comments": False,
+                            "reactions": 0
+                        }
+                        posts.append(post_data)
+                except:
+                    continue
+        except:
+            pass
+        return posts
+    def _is_valid_post(self, text: str) -> bool:
+        if not text or len(text) < 30:
+            return False
+        excluded_phrases = ['facebook', 'login', 'sign up', 'password', 'menu', 'navigation']
+        text_lower = text.lower()
+        if any(phrase in text_lower for phrase in excluded_phrases):
+            return False
+        words = text.split()
+        return len(words) >= 5
+    def _is_duplicate_post(self, new_post: dict, existing_posts: List[dict]) -> bool:
+        new_content = new_post.get("content", "")[:100]
+        for existing_post in existing_posts:
+            existing_content = existing_post.get("content", "")[:100]
+            similarity = self._calculate_similarity(new_content, existing_content)
+            if similarity > 0.7:
+                return True
+        return False
+    def _calculate_similarity(self, text1: str, text2: str) -> float:
+        if not text1 or not text2:
+            return 0.0
+        words1 = set(text1.lower().split())
+        words2 = set(text2.lower().split())
+        if not words1 or not words2:
+            return 0.0
+        intersection = words1.intersection(words2)
+        union = words1.union(words2)
+        return len(intersection) / len(union) if union else 0.0
+    def close(self):
+        if self.driver:
+            try:
+                self.driver.quit()
+            except:
+                pass
+def process_group_data(group_data: dict):
+    if not group_data or "posts" not in group_data or not group_data["posts"]:
+        return None, []
+    all_text = f"Group: {group_data.get('group_info', {}).get('name', 'Unknown')}\n\n"
+    all_text += f"Total Posts: {len(group_data['posts'])}\n\n"
+    for i, post in enumerate(group_data["posts"]):
+        content = post.get("content", "")
+        all_text += f"--- Post {i+1} ---\n"
+        all_text += f"Content: {content}\n\n"
+    splitter = CharacterTextSplitter(separator="\n", chunk_size=1000, chunk_overlap=200)
+    chunks = splitter.split_text(all_text)
+    documents = [Document(page_content=chunk) for chunk in chunks]
+    try:
+        embeddings = get_embeddings()
+        if embeddings is None:
+            return None, []
+        vectorstore = FAISS.from_documents(documents, embeddings)
+        return vectorstore, chunks
+    except Exception as e:
+        st.error(f"Vector store creation failed: {e}")
+        return None, []
+def create_chatbot(vectorstore):
+    try:
+        llm = get_llm()
+        if llm is None:
+            return None
+        memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
+        chain = ConversationalRetrievalChain.from_llm(
+            llm=llm,
+            retriever=vectorstore.as_retriever(search_kwargs={"k": 3}),
+            memory=memory,
+            return_source_documents=True
+        )
+        return chain
+    except Exception as e:
+        st.error(f"Failed to create chatbot: {str(e)}")
+        return None
+def main():
+    st.markdown("""
+    <div class="main-header">
+        <h1>🔥 Facebook Group Extractor 2.0</h1>
+        <p>Professional Version - Powered by HuggingFace</p>
+    </div>
+    """, unsafe_allow_html=True)
+    if st.button("← Back to Main Dashboard", use_container_width=True):
+        st.info("Return to main dashboard")
+        return
+    if not st.session_state.get('hf_api_key'):
+        st.error("❌ API Key not configured. Please go back to main dashboard.")
+        return
+    # Initialize session state
+    if "extractor" not in st.session_state:
+        st.session_state.extractor = None
+    if "login_status" not in st.session_state:
+        st.session_state.login_status = "not_started"
+    if "group_data" not in st.session_state:
+        st.session_state.group_data = None
+    if "chatbot" not in st.session_state:
+        st.session_state.chatbot = None
+    if "chat_history" not in st.session_state:
+        st.session_state.chat_history = []
+    # Sidebar
+    with st.sidebar:
+        st.success("✅ HuggingFace API Active")
+        # Login section
+        st.subheader("🔐 Facebook Login")
+        if st.session_state.login_status == "not_started":
+            if st.button("🚪 Start Manual Login", type="primary", use_container_width=True):
+                with st.spinner("Setting up browser..."):
+                    extractor = FacebookGroupExtractor()
+                    if extractor.setup_driver():
+                        st.session_state.extractor = extractor
+                        if extractor.manual_login():
+                            st.session_state.login_status = "in_progress"
+                            st.rerun()
+        elif st.session_state.login_status == "in_progress":
+            st.info("🔄 Login in progress...")
+            col1, col2 = st.columns(2)
+            with col1:
+                if st.button("✅ I'm Logged In", type="primary"):
+                    if st.session_state.extractor and st.session_state.extractor.check_login_status():
+                        st.session_state.login_status = "completed"
+                        st.success("✅ Login successful!")
+                        st.rerun()
+            with col2:
+                if st.button("❌ Cancel"):
+                    if st.session_state.extractor:
+                        st.session_state.extractor.close()
+                    st.session_state.login_status = "not_started"
+                    st.rerun()
+        elif st.session_state.login_status == "completed":
+            st.success("✅ Logged in to Facebook")
+        # Group extraction
+        st.subheader("📝 Group Information")
+        group_url = st.text_input("Facebook Group URL", placeholder="https://www.facebook.com/groups/groupname/")
+        max_scrolls = st.slider("Number of scrolls", 5, 20, 10)
+        if st.button("🚀 Extract Group Data", type="primary", use_container_width=True):
+            if st.session_state.login_status != "completed":
+                st.error("❌ Please login to Facebook first")
+            elif not group_url or "facebook.com/groups/" not in group_url:
+                st.error("❌ Please enter a valid Facebook group URL")
+            else:
+                with st.spinner("🌐 Extracting group data..."):
+                    group_data = st.session_state.extractor.extract_group_data(group_url, max_scrolls)
+                    if group_data.get("status") == "success":
+                        st.session_state.group_data = group_data
+                        vectorstore, chunks = process_group_data(group_data)
+                        if vectorstore:
+                            st.session_state.chatbot = create_chatbot(vectorstore)
+                            st.session_state.chat_history = []
+                            st.success(f"✅ Successfully extracted {len(group_data['posts'])} posts!")
+    # Main content
+    col1, col2 = st.columns([1, 1])
+    with col1:
+        st.header("📊 Status")
+        if st.session_state.login_status == "not_started":
+            st.info("🔐 Start manual login to begin")
+        elif st.session_state.login_status == "in_progress":
+            st.warning("🔄 Complete login in the browser")
+        elif st.session_state.login_status == "completed":
+            st.success("✅ Ready to extract group data")
+            if st.session_state.group_data:
+                group_info = st.session_state.group_data.get("group_info", {})
+                posts = st.session_state.group_data.get("posts", [])
+                st.subheader("🏷️ Group Info")
+                if group_info.get("name"):
+                    st.write(f"**Name:** {group_info['name']}")
+                st.write(f"**Posts Extracted:** {len(posts)}")
+    with col2:
+        st.header("💬 Chat")
+        if st.session_state.chatbot and st.session_state.group_data:
+            for i, chat in enumerate(st.session_state.chat_history):
+                with st.chat_message("user"):
+                    st.write(chat["question"])
+                with st.chat_message("assistant"):
+                    st.write(chat["answer"])
+            user_question = st.chat_input("Ask about the group...")
+            if user_question:
+                with st.chat_message("user"):
+                    st.write(user_question)
+                with st.chat_message("assistant"):
+                    with st.spinner("🤔 Analyzing..."):
+                        try:
+                            response = st.session_state.chatbot.invoke({"question": user_question})
+                            answer = response.get("answer", "No response generated.")
+                            st.write(answer)
+                            st.session_state.chat_history.append({
+                                "question": user_question,
+                                "answer": answer
+                            })
+                        except Exception as e:
+                            st.error(f"Error: {str(e)}")
+        else:
+            st.info("📊 Extract group data first to start chatting")
+if __name__ == "__main__":
+    main()

linkdin_deploy.py ADDED Viewed

	@@ -0,0 +1,246 @@

+# linkdin_deploy.py
+import streamlit as st
+import requests
+from bs4 import BeautifulSoup
+from langchain_text_splitters import CharacterTextSplitter
+from langchain_community.embeddings import HuggingFaceEmbeddings
+from langchain_community.vectorstores import FAISS
+from langchain.memory import ConversationBufferMemory
+from langchain.chains import ConversationalRetrievalChain
+from langchain_core.documents import Document
+from langchain_community.llms import HuggingFaceHub
+import re
+import time
+# Configure the page
+st.set_page_config(
+    page_title="LinkedIn AI Analyzer",
+    page_icon="💼",
+    layout="wide"
+)
+st.markdown("""
+<style>
+    .stApp { background-color: #0e1117; color: white; }
+    .main-header { background: #0077B5; color: white; padding: 1.5rem; border-radius: 8px; margin-bottom: 1.5rem; text-align: center; }
+    .stButton>button { background-color: #0077b5; color: white; border: none; border-radius: 4px; padding: 8px 16px; width: 100%; }
+    .stTextInput>div>div>input { background-color: #262730; color: white; border: 1px solid #555; }
+    .stSelectbox>div>div>select { background-color: #262730; color: white; }
+    .stTextArea textarea { background-color: #262730; color: white; }
+</style>
+""", unsafe_allow_html=True)
+def get_embeddings():
+    try:
+        embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
+        return embeddings
+    except Exception as e:
+        st.error(f"❌ Failed to load embeddings: {e}")
+        return None
+def get_llm():
+    api_key = st.session_state.get('hf_api_key')
+    if not api_key:
+        st.error("❌ HuggingFace API Key not found")
+        return None
+    try:
+        llm = HuggingFaceHub(
+            repo_id="google/flan-t5-large",
+            huggingfacehub_api_token=api_key,
+            model_kwargs={"temperature": 0.7, "max_length": 500}
+        )
+        return llm
+    except Exception as e:
+        st.error(f"❌ HuggingFace error: {e}")
+        return None
+def extract_linkedin_data(url, data_type):
+    try:
+        headers = {
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
+        }
+        response = requests.get(url, headers=headers, timeout=15)
+        if response.status_code != 200:
+            return f"❌ Failed to access page (Status: {response.status_code})"
+        soup = BeautifulSoup(response.text, 'html.parser')
+        for script in soup(["script", "style"]):
+            script.decompose()
+        text = soup.get_text()
+        lines = (line.strip() for line in text.splitlines())
+        chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
+        text = ' '.join(chunk for chunk in chunks if chunk)
+        paragraphs = text.split('.')
+        meaningful_content = [p.strip() for p in paragraphs if len(p.strip()) > 50]
+        if not meaningful_content:
+            return "❌ No meaningful content found."
+        if data_type == "profile":
+            result = "👤 LINKEDIN PROFILE DATA\n\n"
+        elif data_type == "company":
+            result = "🏢 LINKEDIN COMPANY DATA\n\n"
+        else:
+            result = "📝 LINKEDIN POST DATA\n\n"
+        result += f"🔗 URL: {url}\n"
+        result += "="*50 + "\n\n"
+        for i, content in enumerate(meaningful_content[:10], 1):
+            result += f"{i}. {content}\n\n"
+        result += "="*50 + "\n"
+        result += f"✅ Extracted {len(meaningful_content)} content blocks\n"
+        return result
+    except Exception as e:
+        return f"❌ Error: {str(e)}"
+def get_text_chunks(text):
+    if not text.strip():
+        return []
+    splitter = CharacterTextSplitter(separator="\n", chunk_size=1000, chunk_overlap=200)
+    return splitter.split_text(text)
+def get_vectorstore(text_chunks):
+    if not text_chunks:
+        return None
+    documents = [Document(page_content=chunk) for chunk in text_chunks]
+    embeddings = get_embeddings()
+    if embeddings is None:
+        return None
+    vectorstore = FAISS.from_documents(documents, embeddings)
+    return vectorstore
+def get_conversation_chain(vectorstore):
+    if vectorstore is None:
+        return None
+    try:
+        llm = get_llm()
+        if llm is None:
+            return None
+        memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
+        chain = ConversationalRetrievalChain.from_llm(
+            llm=llm,
+            retriever=vectorstore.as_retriever(search_kwargs={"k": 3}),
+            memory=memory,
+            return_source_documents=True
+        )
+        return chain
+    except Exception as e:
+        st.error(f"❌ Error: {e}")
+        return None
+def main():
+    st.markdown("""
+    <div class="main-header">
+        <h1>💼 LinkedIn AI Analyzer</h1>
+        <p>Free Version - Powered by HuggingFace</p>
+    </div>
+    """, unsafe_allow_html=True)
+    if st.button("← Back to Main Dashboard", use_container_width=True):
+        st.info("Return to main dashboard")
+        return
+    if not st.session_state.get('hf_api_key'):
+        st.error("❌ API Key not configured. Please go back to main dashboard.")
+        return
+    # Initialize session state
+    if "conversation" not in st.session_state:
+        st.session_state.conversation = None
+    if "chat_history" not in st.session_state:
+        st.session_state.chat_history = []
+    if "processed" not in st.session_state:
+        st.session_state.processed = False
+    if "extracted_data" not in st.session_state:
+        st.session_state.extracted_data = ""
+    # Sidebar
+    with st.sidebar:
+        st.success("✅ HuggingFace API Active")
+        data_type = st.selectbox("📊 Content Type", ["profile", "company", "post"])
+        url_placeholder = {
+            "profile": "https://www.linkedin.com/in/username/",
+            "company": "https://www.linkedin.com/company/companyname/",
+            "post": "https://www.linkedin.com/posts/username_postid/"
+        }
+        linkedin_url = st.text_input("🌐 LinkedIn URL", placeholder=url_placeholder[data_type])
+        if st.button("🚀 Extract & Analyze", type="primary"):
+            if not linkedin_url.strip():
+                st.warning("Please enter a LinkedIn URL")
+            else:
+                with st.spinner("🔄 Extracting data..."):
+                    extracted_data = extract_linkedin_data(linkedin_url, data_type)
+                    if extracted_data and not extracted_data.startswith("❌"):
+                        chunks = get_text_chunks(extracted_data)
+                        if chunks:
+                            vectorstore = get_vectorstore(chunks)
+                            conversation = get_conversation_chain(vectorstore)
+                            if conversation:
+                                st.session_state.conversation = conversation
+                                st.session_state.processed = True
+                                st.session_state.extracted_data = extracted_data
+                                st.session_state.chat_history = []
+                                st.success(f"✅ Ready to analyze {len(chunks)} content chunks!")
+                            else:
+                                st.error("❌ Failed to initialize AI")
+                        else:
+                            st.error("❌ No content extracted")
+                    else:
+                        st.error(extracted_data)
+    # Main content
+    col1, col2 = st.columns([2, 1])
+    with col1:
+        st.markdown("### 💬 Chat")
+        for i, chat in enumerate(st.session_state.chat_history):
+            if chat["role"] == "user":
+                st.markdown(f"**👤 You:** {chat['content']}")
+            elif chat["role"] == "assistant":
+                if chat["content"]:
+                    st.markdown(f"**🤖 Assistant:** {chat['content']}")
+        if st.session_state.processed:
+            user_input = st.chat_input("Ask about the LinkedIn data...")
+            if user_input:
+                st.session_state.chat_history.append({"role": "user", "content": user_input})
+                with st.spinner("🤔 Analyzing..."):
+                    try:
+                        if st.session_state.conversation:
+                            response = st.session_state.conversation.invoke({"question": user_input})
+                            answer = response.get("answer", "No response generated.")
+                            st.session_state.chat_history.append({"role": "assistant", "content": answer})
+                            st.rerun()
+                    except Exception as e:
+                        st.session_state.chat_history.append({"role": "assistant", "content": f"❌ Error: {str(e)}"})
+                        st.rerun()
+        else:
+            st.info("👋 Enter a LinkedIn URL and click 'Extract & Analyze' to start")
+    with col2:
+        if st.session_state.processed:
+            st.markdown("### 📊 Overview")
+            data = st.session_state.extracted_data
+            chunks = get_text_chunks(data)
+            st.metric("Content Type", data_type.title())
+            st.metric("Text Chunks", len(chunks))
+            st.metric("Characters", f"{len(data):,}")
+if __name__ == "__main__":
+    main()

main_dashboard.py ADDED Viewed

	@@ -0,0 +1,238 @@

+# main_dashboard.py
+import streamlit as st
+import subprocess
+import sys
+import os
+import webbrowser
+import time
+import threading
+def check_port_in_use(port: int) -> bool:
+    import socket
+    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+        s.settimeout(1)
+        return s.connect_ex(('localhost', port)) == 0
+def get_available_port(start_port: int = 8601) -> int:
+    port = start_port
+    while check_port_in_use(port):
+        port += 1
+    return port
+def run_streamlit_app_in_thread(app_file: str, port: int):
+    def run_app():
+        try:
+            subprocess.run([
+                sys.executable, "-m", "streamlit", "run",
+                app_file,
+                "--server.port", str(port),
+                "--server.headless", "true",
+                "--browser.serverAddress", "localhost"
+            ], check=True)
+        except subprocess.CalledProcessError as e:
+            print(f"Error running {app_file}: {e}")
+    thread = threading.Thread(target=run_app, daemon=True)
+    thread.start()
+    return thread
+def main():
+    st.set_page_config(
+        page_title="Social Media Data Extractor",
+        page_icon="🔍",
+        layout="wide",
+        initial_sidebar_state="expanded"
+    )
+    st.markdown("""
+    <style>
+        .stApp { background-color: #0e1117; color: white; }
+        .main-header { background: linear-gradient(135deg, #1a2a6c, #b21f1f); color: white; padding: 2rem; border-radius: 10px; text-align: center; margin-bottom: 2rem; }
+        .platform-card { background-color: #262730; padding: 1.5rem; border-radius: 10px; border-left: 4px solid; margin: 1rem 0; height: 280px; }
+        .linkedin-card { border-left-color: #0077B5; }
+        .facebook-card { border-left-color: #1877F2; }
+        .facebook-pro-card { border-left-color: #FF6B35; }
+        .feature-list { margin: 1rem 0; padding-left: 1.5rem; flex-grow: 1; }
+        .api-key-section { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); padding: 1.5rem; border-radius: 10px; margin-bottom: 2rem; }
+        .status-box { background-color: #1a1a2e; padding: 1rem; border-radius: 5px; margin: 0.5rem 0; min-height: 120px; }
+    </style>
+    """, unsafe_allow_html=True)
+    # API Key Section
+    st.markdown("""
+    <div class="api-key-section">
+        <h2 style="margin:0; color:white;">🔑 HuggingFace API Key Required</h2>
+        <p style="margin:0; color:white; opacity:0.9;">Get FREE API key from: <a href="https://huggingface.co/settings/tokens" target="_blank" style="color:white; text-decoration:underline;">huggingface.co/settings/tokens</a></p>
+    </div>
+    """, unsafe_allow_html=True)
+    # API Configuration
+    hf_api_key = st.text_input(
+        "🤗 Enter Your HuggingFace API Key",
+        type="password",
+        placeholder="hf_xxxxxxxxxxxxxxxx",
+        help="Get FREE API key from huggingface.co/settings/tokens"
+    )
+    # Store API key
+    if hf_api_key:
+        st.session_state.hf_api_key = hf_api_key
+        st.success("✅ HuggingFace API Key saved! You can now launch extractors.")
+    # Header
+    st.markdown("""
+    <div class="main-header">
+        <h1 style="margin:0;">🔍 Social Media Data Extractor</h1>
+        <p style="margin:0; opacity: 0.9;">100% Free - No Local Setup Required</p>
+    </div>
+    """, unsafe_allow_html=True)
+    # Initialize session state
+    if 'linkedin_port' not in st.session_state:
+        st.session_state.linkedin_port = None
+    if 'facebook_port' not in st.session_state:
+        st.session_state.facebook_port = None
+    if 'facebook_pro_port' not in st.session_state:
+        st.session_state.facebook_pro_port = None
+    # Platform selection
+    st.markdown("## 🚀 Launch Extractors")
+    col1, col2, col3 = st.columns(3)
+    with col1:
+        st.markdown("""
+        <div class="platform-card linkedin-card">
+            <h3>💼 LinkedIn Extractor</h3>
+            <ul class="feature-list">
+                <li>No login required</li>
+                <li>Profile, company, and post analysis</li>
+                <li>Quick data extraction</li>
+                <li>AI-powered insights</li>
+                <li>100% Free</li>
+            </ul>
+        </div>
+        """, unsafe_allow_html=True)
+        if st.button("🚀 Launch LinkedIn Extractor", key="linkedin_btn", use_container_width=True):
+            if not st.session_state.get('hf_api_key'):
+                st.error("❌ Please enter your HuggingFace API Key first")
+            else:
+                if os.path.exists("linkdin_deploy.py"):
+                    port = get_available_port(8601)
+                    st.session_state.linkedin_port = port
+                    with st.spinner(f"Starting LinkedIn extractor..."):
+                        run_streamlit_app_in_thread("linkdin_deploy.py", port)
+                        time.sleep(3)
+                        webbrowser.open_new_tab(f"http://localhost:{port}")
+                        st.success(f"✅ LinkedIn extractor launched!")
+                else:
+                    st.error("❌ linkdin_deploy.py file not found!")
+    with col2:
+        st.markdown("""
+        <div class="platform-card facebook-card">
+            <h3>📘 Facebook Extractor</h3>
+            <ul class="feature-list">
+                <li>Manual login required</li>
+                <li>Group post extraction</li>
+                <li>Works with private groups</li>
+                <li>AI conversation analysis</li>
+                <li>100% Free</li>
+            </ul>
+        </div>
+        """, unsafe_allow_html=True)
+        if st.button("🚀 Launch Facebook Extractor", key="facebook_btn", use_container_width=True):
+            if not st.session_state.get('hf_api_key'):
+                st.error("❌ Please enter your HuggingFace API Key first")
+            else:
+                if os.path.exists("facebook_deploy.py"):
+                    port = get_available_port(8701)
+                    st.session_state.facebook_port = port
+                    with st.spinner(f"Starting Facebook extractor..."):
+                        run_streamlit_app_in_thread("facebook_deploy.py", port)
+                        time.sleep(3)
+                        webbrowser.open_new_tab(f"http://localhost:{port}")
+                        st.success(f"✅ Facebook extractor launched!")
+                else:
+                    st.error("❌ facebook_deploy.py file not found!")
+    with col3:
+        st.markdown("""
+        <div class="platform-card facebook-pro-card">
+            <h3>🔥 Facebook Extractor 2.0</h3>
+            <ul class="feature-list">
+                <li>Enhanced Facebook data extraction</li>
+                <li>More powerful algorithms</li>
+                <li>Faster processing speed</li>
+                <li>Advanced AI analysis</li>
+                <li>100% Free</li>
+            </ul>
+        </div>
+        """, unsafe_allow_html=True)
+        if st.button("🚀 Launch Facebook Extractor 2.0", key="facebook_pro_btn", use_container_width=True):
+            if not st.session_state.get('hf_api_key'):
+                st.error("❌ Please enter your HuggingFace API Key first")
+            else:
+                if os.path.exists("let_deploy.py"):
+                    port = get_available_port(8801)
+                    st.session_state.facebook_pro_port = port
+                    with st.spinner(f"Starting Facebook Extractor 2.0..."):
+                        run_streamlit_app_in_thread("let_deploy.py", port)
+                        time.sleep(3)
+                        webbrowser.open_new_tab(f"http://localhost:{port}")
+                        st.success(f"✅ Facebook Extractor 2.0 launched!")
+                else:
+                    st.error("❌ let_deploy.py file not found!")
+    # Status
+    st.markdown("---")
+    st.subheader("🔄 Current Status")
+    status_col1, status_col2, status_col3 = st.columns(3)
+    with status_col1:
+        st.markdown("### 💼 LinkedIn")
+        if st.session_state.linkedin_port:
+            st.success(f"✅ Running on port {st.session_state.linkedin_port}")
+        else:
+            st.info("💤 Not running")
+    with status_col2:
+        st.markdown("### 📘 Facebook")
+        if st.session_state.facebook_port:
+            st.success(f"✅ Running on port {st.session_state.facebook_port}")
+        else:
+            st.info("💤 Not running")
+    with status_col3:
+        st.markdown("### 🔥 Facebook 2.0")
+        if st.session_state.facebook_pro_port:
+            st.success(f"✅ Running on port {st.session_state.facebook_pro_port}")
+        else:
+            st.info("💤 Not running")
+    # Instructions
+    with st.expander("📋 How to Use", expanded=True):
+        st.markdown("""
+        1. **Get FREE API Key:**
+           - Go to https://huggingface.co/settings/tokens
+           - Create account (FREE)
+           - Click "New token"
+           - Copy your token (starts with hf_)
+        2. **Enter API Key above**
+        3. **Click any extractor to launch**
+        4. **For Streamlit Cloud:**
+           - Add this to Secrets:
+           ```
+           HUGGINGFACEHUB_API_TOKEN = "your_token_here"
+           ```
+        """)
+if __name__ == "__main__":
+    main()

requirements.txt CHANGED Viewed

@@ -1,3 +1,15 @@
-altair
-pandas
-streamlit

+streamlit>=1.28.0
+selenium>=4.15.0
+beautifulsoup4>=4.12.0
+requests>=2.31.0
+langchain>=0.0.350
+langchain-community>=0.0.10
+langchain-text-splitters>=0.0.1
+faiss-cpu>=1.7.0
+sentence-transformers>=2.2.0
+transformers>=4.35.0
+torch>=2.0.0
+accelerate>=0.24.0
+huggingface-hub>=0.19.0
+webdriver-manager>=4.0.0
+pydantic>=2.0.0