Refat81 commited on
Commit
46566cb
Β·
verified Β·
1 Parent(s): 044142b

Upload 5 files

Browse files
Files changed (5) hide show
  1. facebook.py +858 -0
  2. let_deploy.py +453 -0
  3. linkdin_deploy.py +246 -0
  4. main_dashboard.py +238 -0
  5. requirements.txt +15 -3
facebook.py ADDED
@@ -0,0 +1,858 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import time
3
+ from bs4 import BeautifulSoup
4
+ from langchain.text_splitter import CharacterTextSplitter
5
+ from langchain.embeddings import SentenceTransformerEmbeddings
6
+ from langchain.vectorstores import FAISS
7
+ from langchain.memory import ConversationBufferMemory
8
+ from langchain.chains import ConversationalRetrievalChain
9
+ from langchain.schema import Document
10
+ from selenium import webdriver
11
+ from selenium.webdriver.common.by import By
12
+ from selenium.webdriver.support.ui import WebDriverWait
13
+ from selenium.webdriver.support import expected_conditions as EC
14
+ from selenium.webdriver.chrome.options import Options
15
+ from langchain_community.llms.ollama import Ollama
16
+ import re
17
+ import requests
18
+ import subprocess
19
+ import os
20
+ import json
21
+ from datetime import datetime
22
+ from typing import List
23
+ import logging
24
+
25
+ # Set up logging
26
+ logging.basicConfig(level=logging.INFO)
27
+ logger = logging.getLogger(__name__)
28
+
29
+ class FacebookGroupExtractor:
30
+ def __init__(self):
31
+ self.driver = None
32
+ self.wait = None
33
+ self.is_logged_in = False
34
+
35
+ def setup_driver(self):
36
+ """Setup Chrome driver for manual login"""
37
+ chrome_options = Options()
38
+ chrome_options.add_argument("--start-maximized")
39
+ chrome_options.add_argument("--disable-gpu")
40
+ chrome_options.add_argument("--no-sandbox")
41
+ chrome_options.add_argument("--disable-dev-shm-usage")
42
+ chrome_options.add_argument("--disable-blink-features=AutomationControlled")
43
+ chrome_options.add_argument("--disable-extensions")
44
+ chrome_options.add_argument("--disable-infobars")
45
+ chrome_options.add_argument("--disable-popup-blocking")
46
+ chrome_options.add_argument("--ignore-certificate-errors")
47
+ chrome_options.add_argument("--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36")
48
+
49
+ try:
50
+ self.driver = webdriver.Chrome(options=chrome_options)
51
+ self.wait = WebDriverWait(self.driver, 25)
52
+ return True
53
+ except Exception as e:
54
+ st.error(f"Failed to setup driver: {str(e)}")
55
+ return False
56
+
57
+ def manual_login(self):
58
+ """Open Facebook for manual login"""
59
+ try:
60
+ st.info("πŸ”“ Opening Facebook for manual login...")
61
+ self.driver.get("https://www.facebook.com")
62
+ time.sleep(3)
63
+
64
+ # Handle cookies
65
+ self._handle_cookies()
66
+
67
+ st.success("βœ… Facebook opened successfully!")
68
+ st.info("""
69
+ **Please manually login to Facebook:**
70
+ 1. Enter your email/phone and password
71
+ 2. Complete any security checks if needed
72
+ 3. Wait until you're fully logged in
73
+ 4. Return to this app and click 'I'm Logged In'
74
+ """)
75
+
76
+ return True
77
+
78
+ except Exception as e:
79
+ st.error(f"Failed to open Facebook: {str(e)}")
80
+ return False
81
+
82
+ def check_login_status(self):
83
+ """Check if user is logged in"""
84
+ try:
85
+ # Check for login indicators
86
+ login_indicators = [
87
+ "//a[@aria-label='Profile']",
88
+ "//div[@aria-label='Account']",
89
+ "//span[contains(text(), 'Menu')]",
90
+ "//div[contains(@aria-label, 'Facebook')]"
91
+ ]
92
+
93
+ for indicator in login_indicators:
94
+ try:
95
+ element = self.driver.find_element(By.XPATH, indicator)
96
+ if element.is_displayed():
97
+ self.is_logged_in = True
98
+ return True
99
+ except:
100
+ continue
101
+
102
+ # Check URL for login success
103
+ current_url = self.driver.current_url
104
+ if "facebook.com/home" in current_url or "facebook.com/?sk" in current_url:
105
+ self.is_logged_in = True
106
+ return True
107
+
108
+ return False
109
+
110
+ except Exception as e:
111
+ logger.error(f"Login check error: {str(e)}")
112
+ return False
113
+
114
+ def extract_group_data(self, group_url: str, max_scrolls: int = 10) -> dict:
115
+ """Extract data from Facebook group after manual login"""
116
+ try:
117
+ if not self.is_logged_in:
118
+ return {"error": "Not logged in. Please login first.", "status": "error"}
119
+
120
+ st.info(f"🌐 Accessing group: {group_url}")
121
+
122
+ # Clean the URL
123
+ if '?' in group_url:
124
+ group_url = group_url.split('?')[0]
125
+
126
+ self.driver.get(group_url)
127
+ time.sleep(5)
128
+
129
+ # Check if we have access to the group
130
+ if not self._verify_group_access():
131
+ return {"error": "Cannot access group. Check if URL is correct and you have permissions.", "status": "error"}
132
+
133
+ # Extract group info
134
+ group_info = self._extract_group_info()
135
+
136
+ # Scroll and extract posts
137
+ posts_data = self._scroll_and_extract_posts(max_scrolls)
138
+
139
+ return {
140
+ "group_info": group_info,
141
+ "posts": posts_data,
142
+ "extraction_time": datetime.now().isoformat(),
143
+ "total_posts": len(posts_data),
144
+ "status": "success"
145
+ }
146
+
147
+ except Exception as e:
148
+ logger.error(f"Extraction error: {str(e)}")
149
+ return {"error": f"Extraction failed: {str(e)}", "status": "error"}
150
+
151
+ def _handle_cookies(self):
152
+ """Handle cookie consent"""
153
+ try:
154
+ cookie_selectors = [
155
+ "button[data-testid='cookie-policy-manage-dialog-accept-button']",
156
+ "button[data-cookiebanner='accept_button']",
157
+ "button[title*='cookie' i]",
158
+ "button[title*='allow' i]",
159
+ "//button[contains(., 'Allow')]",
160
+ "//button[contains(., 'Accept')]"
161
+ ]
162
+
163
+ for selector in cookie_selectors:
164
+ try:
165
+ if selector.startswith("//"):
166
+ element = self.driver.find_element(By.XPATH, selector)
167
+ else:
168
+ element = self.driver.find_element(By.CSS_SELECTOR, selector)
169
+ element.click()
170
+ time.sleep(2)
171
+ break
172
+ except:
173
+ continue
174
+ except:
175
+ pass
176
+
177
+ def _verify_group_access(self) -> bool:
178
+ """Verify we can access the group"""
179
+ try:
180
+ # Check for group-specific elements
181
+ group_indicators = [
182
+ "//div[contains(@data-pagelet, 'Group')]",
183
+ "//div[contains(@aria-label, 'Group')]",
184
+ "//h1[contains(., 'Group')]",
185
+ "//div[@role='main']"
186
+ ]
187
+
188
+ for indicator in group_indicators:
189
+ try:
190
+ element = self.driver.find_element(By.XPATH, indicator)
191
+ if element.is_displayed():
192
+ return True
193
+ except:
194
+ continue
195
+
196
+ # Check for access denied messages
197
+ denied_indicators = [
198
+ "//*[contains(text(), 'content isn't available')]",
199
+ "//*[contains(text(), 'not available')]",
200
+ "//*[contains(text(), 'access')]",
201
+ "//*[contains(text(), 'permission')]"
202
+ ]
203
+
204
+ page_text = self.driver.page_source.lower()
205
+ if any(indicator in page_text for indicator in ['not available', 'content unavailable', 'access denied']):
206
+ return False
207
+
208
+ return "groups" in self.driver.current_url
209
+
210
+ except:
211
+ return False
212
+
213
+ def _extract_group_info(self) -> dict:
214
+ """Extract group information"""
215
+ group_info = {}
216
+ try:
217
+ # Get group name
218
+ name_selectors = [
219
+ "//h1",
220
+ "//div[contains(@class, 'groupName')]",
221
+ "//span[contains(@class, 'groupName')]",
222
+ "//title"
223
+ ]
224
+
225
+ for selector in name_selectors:
226
+ try:
227
+ element = self.driver.find_element(By.XPATH, selector)
228
+ name = element.text.strip()
229
+ if name and len(name) > 3:
230
+ group_info["name"] = name
231
+ break
232
+ except:
233
+ continue
234
+
235
+ # Get member count
236
+ member_selectors = [
237
+ "//*[contains(text(), 'members')]",
238
+ "//*[contains(text(), 'Members')]",
239
+ "//div[contains(@class, 'memberCount')]"
240
+ ]
241
+
242
+ for selector in member_selectors:
243
+ try:
244
+ element = self.driver.find_element(By.XPATH, selector)
245
+ member_text = element.text
246
+ if 'members' in member_text.lower():
247
+ group_info["member_count"] = member_text
248
+ break
249
+ except:
250
+ continue
251
+
252
+ # Get group description
253
+ desc_selectors = [
254
+ "//div[contains(@class, 'description')]",
255
+ "//div[contains(@class, 'about')]",
256
+ "//div[contains(@data-ad-comet-preview, 'message')]"
257
+ ]
258
+
259
+ for selector in desc_selectors:
260
+ try:
261
+ element = self.driver.find_element(By.XPATH, selector)
262
+ desc = element.text.strip()
263
+ if desc:
264
+ group_info["description"] = desc
265
+ break
266
+ except:
267
+ continue
268
+
269
+ except Exception as e:
270
+ logger.warning(f"Group info extraction failed: {str(e)}")
271
+
272
+ return group_info
273
+
274
+ def _scroll_and_extract_posts(self, max_scrolls: int) -> List[dict]:
275
+ """Scroll and extract posts with multiple strategies"""
276
+ all_posts = []
277
+ last_height = self.driver.execute_script("return document.body.scrollHeight")
278
+
279
+ for scroll_iteration in range(max_scrolls):
280
+ st.info(f"πŸ“œ Scrolling... ({scroll_iteration + 1}/{max_scrolls})")
281
+
282
+ # Extract posts from current view
283
+ current_posts = self._extract_posts_from_current_page()
284
+
285
+ # Add new posts
286
+ for post in current_posts:
287
+ if not self._is_duplicate_post(post, all_posts):
288
+ all_posts.append(post)
289
+
290
+ # Scroll down
291
+ self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
292
+ time.sleep(4)
293
+
294
+ # Check if we've reached the end
295
+ new_height = self.driver.execute_script("return document.body.scrollHeight")
296
+ if new_height == last_height:
297
+ st.success("βœ… Reached end of content")
298
+ break
299
+ last_height = new_height
300
+
301
+ return all_posts
302
+
303
+ def _extract_posts_from_current_page(self) -> List[dict]:
304
+ """Extract posts using multiple strategies"""
305
+ posts = []
306
+
307
+ # Strategy 1: Look for article elements (main posts)
308
+ posts.extend(self._extract_by_xpath("//div[@role='article']", "article"))
309
+
310
+ # Strategy 2: Look for story elements
311
+ posts.extend(self._extract_by_xpath("//div[contains(@data-pagelet, 'Feed')]//div", "feed"))
312
+
313
+ # Strategy 3: Look for user content
314
+ posts.extend(self._extract_by_xpath("//div[contains(@class, 'userContent')]", "userContent"))
315
+
316
+ # Strategy 4: Look for posts with substantial text
317
+ posts.extend(self._extract_text_rich_elements())
318
+
319
+ return posts
320
+
321
+ def _extract_by_xpath(self, xpath: str, source: str) -> List[dict]:
322
+ """Extract posts using XPath selector"""
323
+ posts = []
324
+ try:
325
+ elements = self.driver.find_elements(By.XPATH, xpath)
326
+
327
+ for i, element in enumerate(elements):
328
+ try:
329
+ # Get the entire post text
330
+ post_text = element.text.strip()
331
+
332
+ if self._is_valid_post(post_text):
333
+ # Try to get more structured data
334
+ post_data = self._parse_structured_post(element, post_text, source)
335
+ posts.append(post_data)
336
+
337
+ except Exception as e:
338
+ logger.debug(f"Error extracting element {i}: {str(e)}")
339
+ continue
340
+
341
+ except Exception as e:
342
+ logger.warning(f"XPath {source} failed: {str(e)}")
343
+
344
+ return posts
345
+
346
+ def _extract_text_rich_elements(self) -> List[dict]:
347
+ """Extract elements with substantial text content"""
348
+ posts = []
349
+ try:
350
+ # Look for divs with substantial text
351
+ elements = self.driver.find_elements(By.XPATH, "//div[string-length(text()) > 100]")
352
+
353
+ for element in elements:
354
+ try:
355
+ text = element.text.strip()
356
+ if self._is_valid_post(text):
357
+ posts.append({
358
+ "content": text,
359
+ "source": "text_rich",
360
+ "timestamp": datetime.now().isoformat(),
361
+ "has_comments": "comment" in text.lower()[:200]
362
+ })
363
+ except:
364
+ continue
365
+
366
+ except Exception as e:
367
+ logger.warning(f"Text-rich extraction failed: {str(e)}")
368
+
369
+ return posts
370
+
371
+ def _parse_structured_post(self, element, text: str, source: str) -> dict:
372
+ """Parse post with structured data"""
373
+ post_data = {
374
+ "content": text,
375
+ "source": source,
376
+ "timestamp": datetime.now().isoformat(),
377
+ "has_comments": False,
378
+ "reactions": 0
379
+ }
380
+
381
+ try:
382
+ # Check for comments
383
+ comment_indicators = [
384
+ "//*[contains(text(), 'comment')]",
385
+ "//*[contains(text(), 'Comment')]"
386
+ ]
387
+
388
+ for indicator in comment_indicators:
389
+ try:
390
+ comments = element.find_elements(By.XPATH, indicator)
391
+ if comments:
392
+ post_data["has_comments"] = True
393
+ break
394
+ except:
395
+ continue
396
+
397
+ # Check for reactions
398
+ reaction_indicators = [
399
+ "//*[contains(text(), 'Like')]",
400
+ "//*[contains(text(), 'Reaction')]"
401
+ ]
402
+
403
+ # Try to extract reaction count
404
+ reaction_text = text.lower()
405
+ if 'like' in reaction_text or 'reaction' in reaction_text:
406
+ # Simple regex to find numbers near reaction words
407
+ reaction_match = re.search(r'(\d+)\s*(like|reaction)', reaction_text)
408
+ if reaction_match:
409
+ post_data["reactions"] = int(reaction_match.group(1))
410
+
411
+ except Exception as e:
412
+ logger.debug(f"Structured parsing failed: {str(e)}")
413
+
414
+ return post_data
415
+
416
+ def _is_valid_post(self, text: str) -> bool:
417
+ """Check if text is a valid post"""
418
+ if not text or len(text) < 50:
419
+ return False
420
+
421
+ # Exclude navigation and UI text
422
+ excluded_phrases = [
423
+ 'facebook', 'login', 'sign up', 'password', 'email',
424
+ 'cookie', 'privacy', 'terms', 'menu', 'navigation',
425
+ 'home', 'search', 'notification', 'messenger', 'watch',
426
+ 'marketplace', 'groups', 'pages', 'events'
427
+ ]
428
+
429
+ text_lower = text.lower()
430
+ if any(phrase in text_lower for phrase in excluded_phrases):
431
+ return False
432
+
433
+ # Check for reasonable word count
434
+ words = text.split()
435
+ if len(words) < 8:
436
+ return False
437
+
438
+ return True
439
+
440
+ def _is_duplicate_post(self, new_post: dict, existing_posts: List[dict]) -> bool:
441
+ """Check if post is duplicate"""
442
+ new_content = new_post.get("content", "")[:150]
443
+
444
+ for existing_post in existing_posts:
445
+ existing_content = existing_post.get("content", "")[:150]
446
+ similarity = self._calculate_similarity(new_content, existing_content)
447
+ if similarity > 0.8: # 80% similarity
448
+ return True
449
+
450
+ return False
451
+
452
+ def _calculate_similarity(self, text1: str, text2: str) -> float:
453
+ """Calculate simple text similarity"""
454
+ words1 = set(text1.lower().split())
455
+ words2 = set(text2.lower().split())
456
+
457
+ if not words1 or not words2:
458
+ return 0.0
459
+
460
+ intersection = words1.intersection(words2)
461
+ union = words1.union(words2)
462
+
463
+ return len(intersection) / len(union) if union else 0.0
464
+
465
+ def close(self):
466
+ """Close the browser"""
467
+ if self.driver:
468
+ self.driver.quit()
469
+
470
+ def check_ollama_running():
471
+ """Check if Ollama is running"""
472
+ try:
473
+ response = requests.get("http://localhost:11434/api/tags", timeout=5)
474
+ return response.status_code == 200
475
+ except:
476
+ return False
477
+
478
+ def start_ollama():
479
+ """Start Ollama service"""
480
+ try:
481
+ if os.name == 'nt': # Windows
482
+ subprocess.Popen(['ollama', 'serve'], creationflags=subprocess.CREATE_NO_WINDOW)
483
+ else: # Linux/Mac
484
+ subprocess.Popen(['ollama', 'serve'], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
485
+ time.sleep(5)
486
+ return check_ollama_running()
487
+ except Exception as e:
488
+ st.error(f"Failed to start Ollama: {e}")
489
+ return False
490
+
491
+ def get_available_models():
492
+ """Get list of available Ollama models"""
493
+ try:
494
+ response = requests.get("http://localhost:11434/api/tags", timeout=5)
495
+ if response.status_code == 200:
496
+ models = response.json().get('models', [])
497
+ return [model['name'] for model in models]
498
+ except:
499
+ return ["llama2", "mistral", "gemma", "llama3"]
500
+
501
+ def process_group_data(group_data: dict):
502
+ """Process extracted group data for chatbot"""
503
+ if not group_data or "posts" not in group_data or not group_data["posts"]:
504
+ return None, []
505
+
506
+ # Combine all posts into a single text
507
+ all_text = f"Group: {group_data.get('group_info', {}).get('name', 'Unknown')}\n\n"
508
+ all_text += f"Total Posts Extracted: {len(group_data['posts'])}\n\n"
509
+
510
+ for i, post in enumerate(group_data["posts"]):
511
+ content = post.get("content", "")
512
+ source = post.get("source", "unknown")
513
+ has_comments = post.get("has_comments", False)
514
+ reactions = post.get("reactions", 0)
515
+
516
+ all_text += f"--- Post {i+1} ---\n"
517
+ all_text += f"Source: {source}\n"
518
+ all_text += f"Reactions: {reactions}\n"
519
+ all_text += f"Has Comments: {has_comments}\n"
520
+ all_text += f"Content: {content}\n\n"
521
+
522
+ # Split into chunks
523
+ splitter = CharacterTextSplitter(
524
+ separator="\n",
525
+ chunk_size=1000,
526
+ chunk_overlap=200,
527
+ length_function=len
528
+ )
529
+
530
+ chunks = splitter.split_text(all_text)
531
+ documents = [Document(page_content=chunk) for chunk in chunks]
532
+
533
+ # Create vector store
534
+ embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
535
+ vectorstore = FAISS.from_documents(documents, embeddings)
536
+
537
+ return vectorstore, chunks
538
+
539
+ def create_chatbot(vectorstore, model_name: str):
540
+ """Create conversational chatbot"""
541
+ try:
542
+ llm = Ollama(
543
+ model=model_name,
544
+ base_url="http://localhost:11434",
545
+ temperature=0.7,
546
+ top_k=40,
547
+ top_p=0.9,
548
+ num_predict=512
549
+ )
550
+
551
+ memory = ConversationBufferMemory(
552
+ memory_key="chat_history",
553
+ return_messages=True,
554
+ output_key="answer"
555
+ )
556
+
557
+ chain = ConversationalRetrievalChain.from_llm(
558
+ llm=llm,
559
+ retriever=vectorstore.as_retriever(search_kwargs={"k": 3}),
560
+ memory=memory,
561
+ return_source_documents=True,
562
+ output_key="answer"
563
+ )
564
+
565
+ return chain
566
+ except Exception as e:
567
+ st.error(f"Failed to create chatbot: {str(e)}")
568
+ return None
569
+
570
+ def clear_chat_history():
571
+ """Clear chat history and recreate chatbot with fresh memory"""
572
+ if "vectorstore" in st.session_state and st.session_state.vectorstore:
573
+ # Recreate chatbot with fresh memory
574
+ model_name = st.session_state.get("current_model", "llama2")
575
+ st.session_state.chatbot = create_chatbot(st.session_state.vectorstore, model_name)
576
+ st.session_state.chat_history = []
577
+ st.success("πŸ”„ Chat history cleared! You can now ask questions with a fresh conversation.")
578
+ else:
579
+ st.error("❌ No extracted data found. Please extract group data first.")
580
+
581
+ def main():
582
+ st.set_page_config(
583
+ page_title="Facebook Group Analyzer with Manual Login",
584
+ page_icon="πŸ“˜",
585
+ layout="wide"
586
+ )
587
+
588
+ st.title("πŸ“˜ Facebook Group Data Extractor & Chatbot")
589
+ st.markdown("Manual login required for private groups - Works with both public and private groups")
590
+
591
+ # Initialize session state
592
+ if "extractor" not in st.session_state:
593
+ st.session_state.extractor = None
594
+ if "login_status" not in st.session_state:
595
+ st.session_state.login_status = "not_started" # not_started, in_progress, completed, failed
596
+ if "group_data" not in st.session_state:
597
+ st.session_state.group_data = None
598
+ if "vectorstore" not in st.session_state:
599
+ st.session_state.vectorstore = None
600
+ if "chatbot" not in st.session_state:
601
+ st.session_state.chatbot = None
602
+ if "chat_history" not in st.session_state:
603
+ st.session_state.chat_history = []
604
+ if "current_model" not in st.session_state:
605
+ st.session_state.current_model = "llama2"
606
+
607
+ # Sidebar
608
+ with st.sidebar:
609
+ st.header("πŸ”§ Configuration")
610
+
611
+ # Ollama status
612
+ st.subheader("πŸ€– Ollama Status")
613
+ if check_ollama_running():
614
+ st.success("βœ… Ollama is running")
615
+ else:
616
+ st.error("❌ Ollama is not running")
617
+ if st.button("πŸ”„ Start Ollama"):
618
+ if start_ollama():
619
+ st.success("βœ… Ollama started successfully")
620
+ st.rerun()
621
+ else:
622
+ st.error("❌ Failed to start Ollama")
623
+
624
+ # Model selection
625
+ available_models = get_available_models()
626
+ model_name = st.selectbox(
627
+ "Select AI Model",
628
+ available_models,
629
+ index=0 if available_models else 0,
630
+ key="model_selector"
631
+ )
632
+
633
+ # Store current model
634
+ st.session_state.current_model = model_name
635
+
636
+ # Login section
637
+ st.subheader("πŸ” Facebook Login")
638
+
639
+ if st.session_state.login_status == "not_started":
640
+ if st.button("πŸšͺ Start Manual Login", type="primary", use_container_width=True):
641
+ extractor = FacebookGroupExtractor()
642
+ if extractor.setup_driver():
643
+ st.session_state.extractor = extractor
644
+ if extractor.manual_login():
645
+ st.session_state.login_status = "in_progress"
646
+ st.rerun()
647
+
648
+ elif st.session_state.login_status == "in_progress":
649
+ st.info("πŸ”„ Login in progress...")
650
+
651
+ col1, col2 = st.columns(2)
652
+ with col1:
653
+ if st.button("βœ… I'm Logged In", type="primary"):
654
+ if st.session_state.extractor and st.session_state.extractor.check_login_status():
655
+ st.session_state.login_status = "completed"
656
+ st.success("βœ… Login successful!")
657
+ st.rerun()
658
+ else:
659
+ st.error("❌ Login not detected. Please make sure you're logged in.")
660
+ with col2:
661
+ if st.button("❌ Cancel Login"):
662
+ if st.session_state.extractor:
663
+ st.session_state.extractor.close()
664
+ st.session_state.login_status = "not_started"
665
+ st.rerun()
666
+
667
+ elif st.session_state.login_status == "completed":
668
+ st.success("βœ… Logged in to Facebook")
669
+ if st.button("πŸšͺ Logout & Restart"):
670
+ if st.session_state.extractor:
671
+ st.session_state.extractor.close()
672
+ st.session_state.login_status = "not_started"
673
+ st.session_state.group_data = None
674
+ st.session_state.vectorstore = None
675
+ st.session_state.chatbot = None
676
+ st.session_state.chat_history = []
677
+ st.rerun()
678
+
679
+ # Group extraction section
680
+ st.subheader("πŸ“ Group Information")
681
+ group_url = st.text_input(
682
+ "Facebook Group URL",
683
+ placeholder="https://www.facebook.com/groups/groupname/",
684
+ help="Works with both public and private groups"
685
+ )
686
+
687
+ # Extraction settings
688
+ st.subheader("βš™οΈ Extraction Settings")
689
+ max_scrolls = st.slider("Number of scrolls", 5, 20, 10)
690
+
691
+ if st.button("πŸš€ Extract Group Data", type="primary", use_container_width=True):
692
+ if st.session_state.login_status != "completed":
693
+ st.error("❌ Please login to Facebook first")
694
+ elif not group_url or "facebook.com/groups/" not in group_url:
695
+ st.error("❌ Please enter a valid Facebook group URL")
696
+ elif not check_ollama_running():
697
+ st.error("❌ Ollama is not running")
698
+ else:
699
+ with st.spinner("🌐 Extracting group data... This may take a few minutes."):
700
+ group_data = st.session_state.extractor.extract_group_data(group_url, max_scrolls)
701
+
702
+ if group_data.get("status") == "success" and group_data.get("posts"):
703
+ st.session_state.group_data = group_data
704
+
705
+ # Process for chatbot
706
+ vectorstore, chunks = process_group_data(group_data)
707
+ if vectorstore:
708
+ st.session_state.vectorstore = vectorstore
709
+ st.session_state.chatbot = create_chatbot(vectorstore, model_name)
710
+ st.session_state.chat_history = []
711
+ st.success(f"βœ… Successfully extracted {len(group_data['posts'])} posts!")
712
+ else:
713
+ st.error("❌ Failed to process group data")
714
+ else:
715
+ error_msg = group_data.get("error", "Unknown error")
716
+ st.error(f"❌ Extraction failed: {error_msg}")
717
+
718
+ # Chat management section
719
+ if st.session_state.chatbot and st.session_state.group_data:
720
+ st.subheader("πŸ’¬ Chat Management")
721
+ if st.button("πŸ—‘οΈ Clear Chat History", type="secondary", use_container_width=True):
722
+ clear_chat_history()
723
+ st.rerun()
724
+
725
+ # Main content area
726
+ col1, col2 = st.columns([1, 1])
727
+
728
+ with col1:
729
+ st.header("πŸ“Š Login & Extraction Status")
730
+
731
+ if st.session_state.login_status == "not_started":
732
+ st.info("""
733
+ ## πŸ” Manual Login Required
734
+
735
+ **How it works:**
736
+ 1. Click 'Start Manual Login' in the sidebar
737
+ 2. A browser window will open with Facebook
738
+ 3. **Manually login** to your Facebook account
739
+ 4. Complete any security checks if needed
740
+ 5. Return here and click 'I'm Logged In'
741
+
742
+ **Benefits:**
743
+ - Works with both public and private groups
744
+ - No need to enter password in this app
745
+ - Handles 2FA and security checks
746
+ - More reliable than automated login
747
+ """)
748
+
749
+ elif st.session_state.login_status == "in_progress":
750
+ st.warning("""
751
+ ## πŸ”„ Login in Progress
752
+
753
+ **Please complete these steps:**
754
+ 1. βœ… Browser window should be open with Facebook
755
+ 2. πŸ”„ **Manually login** to your Facebook account
756
+ 3. βœ… Wait until you see your Facebook home page
757
+ 4. πŸ”„ Return here and click **'I'm Logged In'**
758
+
759
+ **Troubleshooting:**
760
+ - If browser didn't open, check popup blockers
761
+ - Make sure you're fully logged into Facebook
762
+ - If you see security checks, complete them first
763
+ """)
764
+
765
+ elif st.session_state.login_status == "completed":
766
+ st.success("""
767
+ ## βœ… Login Successful!
768
+
769
+ You can now:
770
+ 1. Enter a Facebook group URL in the sidebar
771
+ 2. Adjust extraction settings
772
+ 3. Click 'Extract Group Data'
773
+ 4. Chat with the extracted content
774
+ """)
775
+
776
+ if st.session_state.group_data:
777
+ group_info = st.session_state.group_data.get("group_info", {})
778
+ posts = st.session_state.group_data.get("posts", [])
779
+
780
+ st.subheader("🏷️ Group Information")
781
+ if group_info:
782
+ for key, value in group_info.items():
783
+ if value:
784
+ st.write(f"**{key.replace('_', ' ').title()}:** {value}")
785
+
786
+ st.subheader(f"πŸ“ Posts Extracted: {len(posts)}")
787
+
788
+ for i, post in enumerate(posts[:3]):
789
+ with st.expander(f"Post {i+1}"):
790
+ content = post.get("content", "")
791
+ st.text_area(f"Content {i+1}", content, height=150, key=f"post_{i}")
792
+ st.caption(f"Source: {post.get('source', 'unknown')} | Reactions: {post.get('reactions', 0)}")
793
+
794
+ with col2:
795
+ st.header("πŸ’¬ Chat with Group Data")
796
+
797
+ # Chat management button at the top
798
+ if st.session_state.chatbot and st.session_state.group_data:
799
+ col_clear, col_info = st.columns([1, 3])
800
+ with col_clear:
801
+ if st.button("πŸ—‘οΈ Clear History", key="clear_top"):
802
+ clear_chat_history()
803
+ st.rerun()
804
+ with col_info:
805
+ st.caption("Clear conversation history while keeping extracted data")
806
+
807
+ if st.session_state.chatbot and st.session_state.group_data:
808
+ # Display chat history
809
+ for i, chat in enumerate(st.session_state.chat_history):
810
+ with st.chat_message("user"):
811
+ st.write(chat["question"])
812
+ with st.chat_message("assistant"):
813
+ st.write(chat["answer"])
814
+
815
+ # Chat input
816
+ user_question = st.chat_input("Ask about the group content...")
817
+
818
+ if user_question:
819
+ with st.chat_message("user"):
820
+ st.write(user_question)
821
+
822
+ with st.chat_message("assistant"):
823
+ with st.spinner("πŸ€” Analyzing..."):
824
+ try:
825
+ response = st.session_state.chatbot.invoke({"question": user_question})
826
+ answer = response.get("answer", "I couldn't generate a response.")
827
+ st.write(answer)
828
+
829
+ st.session_state.chat_history.append({
830
+ "question": user_question,
831
+ "answer": answer
832
+ })
833
+
834
+ except Exception as e:
835
+ error_msg = f"Error: {str(e)}"
836
+ st.error(error_msg)
837
+
838
+ if not st.session_state.chat_history:
839
+ st.subheader("πŸ’‘ Suggested Questions")
840
+ suggestions = [
841
+ "What are the main topics discussed in this group?",
842
+ "Summarize the most active discussions",
843
+ "What kind of content gets the most engagement?",
844
+ "Are there any common questions or problems?",
845
+ "What's the overall tone of the group?"
846
+ ]
847
+
848
+ for suggestion in suggestions:
849
+ if st.button(suggestion, key=suggestion):
850
+ st.info(f"Type: '{suggestion}' in the chat input above")
851
+
852
+ elif st.session_state.login_status == "completed":
853
+ st.info("πŸ“Š Extract group data first to start chatting")
854
+ else:
855
+ st.info("πŸ” Login to Facebook to get started")
856
+
857
+ if __name__ == "__main__":
858
+ main()
let_deploy.py ADDED
@@ -0,0 +1,453 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # let_deploy.py
2
+ import streamlit as st
3
+ import time
4
+ from bs4 import BeautifulSoup
5
+ from langchain_text_splitters import CharacterTextSplitter
6
+ from langchain_community.embeddings import HuggingFaceEmbeddings
7
+ from langchain.vectorstores import FAISS
8
+ from langchain.memory import ConversationBufferMemory
9
+ from langchain.chains import ConversationalRetrievalChain
10
+ from langchain.schema import Document
11
+ from selenium import webdriver
12
+ from selenium.webdriver.common.by import By
13
+ from selenium.webdriver.support.ui import WebDriverWait
14
+ from selenium.webdriver.support import expected_conditions as EC
15
+ from selenium.webdriver.chrome.options import Options
16
+ from selenium.webdriver.chrome.service import Service
17
+ from webdriver_manager.chrome import ChromeDriverManager
18
+ from langchain_community.llms import HuggingFaceHub
19
+ import re
20
+ import requests
21
+ import os
22
+ from datetime import datetime
23
+ from typing import List
24
+ import logging
25
+
26
+ logging.basicConfig(level=logging.INFO)
27
+ logger = logging.getLogger(__name__)
28
+
29
+ st.set_page_config(page_title="Facebook Extractor 2.0", page_icon="πŸ“˜", layout="wide")
30
+
31
+ st.markdown("""
32
+ <style>
33
+ .stApp { background-color: #0e1117; color: white; }
34
+ .main-header { background: linear-gradient(135deg, #FF6B35, #FF8E53); color: white; padding: 1.5rem; border-radius: 8px; margin-bottom: 1.5rem; text-align: center; }
35
+ .stButton>button { background-color: #1877F2; color: white; border: none; border-radius: 4px; padding: 8px 16px; width: 100%; }
36
+ </style>
37
+ """, unsafe_allow_html=True)
38
+
39
+ def get_embeddings():
40
+ try:
41
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
42
+ return embeddings
43
+ except Exception as e:
44
+ st.error(f"❌ Failed to load embeddings: {e}")
45
+ return None
46
+
47
+ def get_llm():
48
+ api_key = st.session_state.get('hf_api_key')
49
+ if not api_key:
50
+ st.error("❌ HuggingFace API Key not found")
51
+ return None
52
+
53
+ try:
54
+ llm = HuggingFaceHub(
55
+ repo_id="google/flan-t5-large",
56
+ huggingfacehub_api_token=api_key,
57
+ model_kwargs={"temperature": 0.7, "max_length": 512}
58
+ )
59
+ return llm
60
+ except Exception as e:
61
+ st.error(f"❌ HuggingFace error: {e}")
62
+ return None
63
+
64
+ class FacebookGroupExtractor:
65
+ def __init__(self):
66
+ self.driver = None
67
+ self.wait = None
68
+ self.is_logged_in = False
69
+
70
+ def setup_driver(self):
71
+ try:
72
+ chrome_options = Options()
73
+ chrome_options.add_argument("--no-sandbox")
74
+ chrome_options.add_argument("--disable-dev-shm-usage")
75
+ chrome_options.add_argument("--disable-gpu")
76
+ chrome_options.add_argument("--start-maximized")
77
+ chrome_options.add_argument("--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36")
78
+
79
+ st.info("πŸ”„ Setting up Chrome browser...")
80
+ try:
81
+ service = Service(ChromeDriverManager().install())
82
+ self.driver = webdriver.Chrome(service=service, options=chrome_options)
83
+ except Exception as e:
84
+ self.driver = webdriver.Chrome(options=chrome_options)
85
+
86
+ self.driver.set_page_load_timeout(30)
87
+ self.wait = WebDriverWait(self.driver, 25)
88
+ st.success("βœ… Chrome browser setup completed!")
89
+ return True
90
+ except Exception as e:
91
+ st.error(f"❌ Failed to setup Chrome: {str(e)}")
92
+ return False
93
+
94
+ def manual_login(self):
95
+ try:
96
+ st.info("πŸ”“ Opening Facebook for manual login...")
97
+ self.driver.get("https://www.facebook.com")
98
+ time.sleep(3)
99
+ self.wait.until(EC.presence_of_element_located((By.TAG_NAME, "body")))
100
+ st.success("βœ… Facebook opened successfully!")
101
+ st.info("""
102
+ **πŸ“ Manual Login Instructions:**
103
+ 1. Browser window opened with Facebook
104
+ 2. Manually login to your account
105
+ 3. Complete any security checks
106
+ 4. Return here and click 'I'm Logged In'
107
+ """)
108
+ return True
109
+ except Exception as e:
110
+ st.error(f"❌ Failed to open Facebook: {str(e)}")
111
+ return False
112
+
113
+ def check_login_status(self):
114
+ try:
115
+ current_url = self.driver.current_url.lower()
116
+ login_success_urls = ["facebook.com/home", "facebook.com/groups", "facebook.com/marketplace"]
117
+ if any(url in current_url for url in login_success_urls):
118
+ self.is_logged_in = True
119
+ return True
120
+
121
+ login_indicators = ["//a[@aria-label='Profile']", "//div[@aria-label='Account']", "//span[contains(text(), 'Menu')]"]
122
+ for indicator in login_indicators:
123
+ try:
124
+ elements = self.driver.find_elements(By.XPATH, indicator)
125
+ for element in elements:
126
+ if element.is_displayed():
127
+ self.is_logged_in = True
128
+ return True
129
+ except:
130
+ continue
131
+ return False
132
+ except Exception as e:
133
+ logger.error(f"Login check error: {str(e)}")
134
+ return False
135
+
136
+ def extract_group_data(self, group_url: str, max_scrolls: int = 10) -> dict:
137
+ try:
138
+ if not self.is_logged_in:
139
+ return {"error": "Not logged in. Please login first.", "status": "error"}
140
+
141
+ st.info(f"🌐 Accessing group: {group_url}")
142
+ self.driver.get(group_url)
143
+ time.sleep(5)
144
+
145
+ # Extract group info
146
+ group_info = self._extract_group_info()
147
+ posts_data = self._scroll_and_extract_posts(max_scrolls)
148
+
149
+ return {
150
+ "group_info": group_info,
151
+ "posts": posts_data,
152
+ "extraction_time": datetime.now().isoformat(),
153
+ "total_posts": len(posts_data),
154
+ "status": "success"
155
+ }
156
+ except Exception as e:
157
+ logger.error(f"Extraction error: {str(e)}")
158
+ return {"error": f"Extraction failed: {str(e)}", "status": "error"}
159
+
160
+ def _extract_group_info(self) -> dict:
161
+ group_info = {}
162
+ try:
163
+ name_selectors = ["//h1", "//h2", "//h3", "//title"]
164
+ for selector in name_selectors:
165
+ try:
166
+ elements = self.driver.find_elements(By.XPATH, selector)
167
+ for element in elements:
168
+ name = element.text.strip()
169
+ if name and len(name) > 3:
170
+ group_info["name"] = name
171
+ break
172
+ if "name" in group_info:
173
+ break
174
+ except:
175
+ continue
176
+ except Exception as e:
177
+ logger.warning(f"Group info extraction failed: {str(e)}")
178
+ return group_info
179
+
180
+ def _scroll_and_extract_posts(self, max_scrolls: int) -> List[dict]:
181
+ all_posts = []
182
+ last_height = self.driver.execute_script("return document.body.scrollHeight")
183
+
184
+ for scroll_iteration in range(max_scrolls):
185
+ current_posts = self._extract_posts_from_current_page()
186
+ for post in current_posts:
187
+ if not self._is_duplicate_post(post, all_posts):
188
+ all_posts.append(post)
189
+
190
+ self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
191
+ time.sleep(3)
192
+
193
+ new_height = self.driver.execute_script("return document.body.scrollHeight")
194
+ if new_height == last_height:
195
+ break
196
+ last_height = new_height
197
+
198
+ return all_posts
199
+
200
+ def _extract_posts_from_current_page(self) -> List[dict]:
201
+ posts = []
202
+ strategies = [
203
+ ("//div[@role='article']", "article"),
204
+ ("//div[contains(@data-pagelet, 'Feed')]//div", "feed"),
205
+ ("//div[contains(@class, 'userContent')]", "userContent")
206
+ ]
207
+
208
+ for xpath, source in strategies:
209
+ posts.extend(self._extract_by_xpath(xpath, source))
210
+
211
+ return posts
212
+
213
+ def _extract_by_xpath(self, xpath: str, source: str) -> List[dict]:
214
+ posts = []
215
+ try:
216
+ elements = self.driver.find_elements(By.XPATH, xpath)
217
+ for element in elements:
218
+ try:
219
+ post_text = element.text.strip()
220
+ if self._is_valid_post(post_text):
221
+ post_data = {
222
+ "content": post_text,
223
+ "source": source,
224
+ "timestamp": datetime.now().isoformat(),
225
+ "has_comments": False,
226
+ "reactions": 0
227
+ }
228
+ posts.append(post_data)
229
+ except:
230
+ continue
231
+ except:
232
+ pass
233
+ return posts
234
+
235
+ def _is_valid_post(self, text: str) -> bool:
236
+ if not text or len(text) < 30:
237
+ return False
238
+ excluded_phrases = ['facebook', 'login', 'sign up', 'password', 'menu', 'navigation']
239
+ text_lower = text.lower()
240
+ if any(phrase in text_lower for phrase in excluded_phrases):
241
+ return False
242
+ words = text.split()
243
+ return len(words) >= 5
244
+
245
+ def _is_duplicate_post(self, new_post: dict, existing_posts: List[dict]) -> bool:
246
+ new_content = new_post.get("content", "")[:100]
247
+ for existing_post in existing_posts:
248
+ existing_content = existing_post.get("content", "")[:100]
249
+ similarity = self._calculate_similarity(new_content, existing_content)
250
+ if similarity > 0.7:
251
+ return True
252
+ return False
253
+
254
+ def _calculate_similarity(self, text1: str, text2: str) -> float:
255
+ if not text1 or not text2:
256
+ return 0.0
257
+ words1 = set(text1.lower().split())
258
+ words2 = set(text2.lower().split())
259
+ if not words1 or not words2:
260
+ return 0.0
261
+ intersection = words1.intersection(words2)
262
+ union = words1.union(words2)
263
+ return len(intersection) / len(union) if union else 0.0
264
+
265
+ def close(self):
266
+ if self.driver:
267
+ try:
268
+ self.driver.quit()
269
+ except:
270
+ pass
271
+
272
+ def process_group_data(group_data: dict):
273
+ if not group_data or "posts" not in group_data or not group_data["posts"]:
274
+ return None, []
275
+
276
+ all_text = f"Group: {group_data.get('group_info', {}).get('name', 'Unknown')}\n\n"
277
+ all_text += f"Total Posts: {len(group_data['posts'])}\n\n"
278
+
279
+ for i, post in enumerate(group_data["posts"]):
280
+ content = post.get("content", "")
281
+ all_text += f"--- Post {i+1} ---\n"
282
+ all_text += f"Content: {content}\n\n"
283
+
284
+ splitter = CharacterTextSplitter(separator="\n", chunk_size=1000, chunk_overlap=200)
285
+ chunks = splitter.split_text(all_text)
286
+ documents = [Document(page_content=chunk) for chunk in chunks]
287
+
288
+ try:
289
+ embeddings = get_embeddings()
290
+ if embeddings is None:
291
+ return None, []
292
+ vectorstore = FAISS.from_documents(documents, embeddings)
293
+ return vectorstore, chunks
294
+ except Exception as e:
295
+ st.error(f"Vector store creation failed: {e}")
296
+ return None, []
297
+
298
+ def create_chatbot(vectorstore):
299
+ try:
300
+ llm = get_llm()
301
+ if llm is None:
302
+ return None
303
+
304
+ memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
305
+ chain = ConversationalRetrievalChain.from_llm(
306
+ llm=llm,
307
+ retriever=vectorstore.as_retriever(search_kwargs={"k": 3}),
308
+ memory=memory,
309
+ return_source_documents=True
310
+ )
311
+ return chain
312
+ except Exception as e:
313
+ st.error(f"Failed to create chatbot: {str(e)}")
314
+ return None
315
+
316
+ def main():
317
+ st.markdown("""
318
+ <div class="main-header">
319
+ <h1>πŸ”₯ Facebook Group Extractor 2.0</h1>
320
+ <p>Professional Version - Powered by HuggingFace</p>
321
+ </div>
322
+ """, unsafe_allow_html=True)
323
+
324
+ if st.button("← Back to Main Dashboard", use_container_width=True):
325
+ st.info("Return to main dashboard")
326
+ return
327
+
328
+ if not st.session_state.get('hf_api_key'):
329
+ st.error("❌ API Key not configured. Please go back to main dashboard.")
330
+ return
331
+
332
+ # Initialize session state
333
+ if "extractor" not in st.session_state:
334
+ st.session_state.extractor = None
335
+ if "login_status" not in st.session_state:
336
+ st.session_state.login_status = "not_started"
337
+ if "group_data" not in st.session_state:
338
+ st.session_state.group_data = None
339
+ if "chatbot" not in st.session_state:
340
+ st.session_state.chatbot = None
341
+ if "chat_history" not in st.session_state:
342
+ st.session_state.chat_history = []
343
+
344
+ # Sidebar
345
+ with st.sidebar:
346
+ st.success("βœ… HuggingFace API Active")
347
+
348
+ # Login section
349
+ st.subheader("πŸ” Facebook Login")
350
+
351
+ if st.session_state.login_status == "not_started":
352
+ if st.button("πŸšͺ Start Manual Login", type="primary", use_container_width=True):
353
+ with st.spinner("Setting up browser..."):
354
+ extractor = FacebookGroupExtractor()
355
+ if extractor.setup_driver():
356
+ st.session_state.extractor = extractor
357
+ if extractor.manual_login():
358
+ st.session_state.login_status = "in_progress"
359
+ st.rerun()
360
+
361
+ elif st.session_state.login_status == "in_progress":
362
+ st.info("πŸ”„ Login in progress...")
363
+ col1, col2 = st.columns(2)
364
+ with col1:
365
+ if st.button("βœ… I'm Logged In", type="primary"):
366
+ if st.session_state.extractor and st.session_state.extractor.check_login_status():
367
+ st.session_state.login_status = "completed"
368
+ st.success("βœ… Login successful!")
369
+ st.rerun()
370
+ with col2:
371
+ if st.button("❌ Cancel"):
372
+ if st.session_state.extractor:
373
+ st.session_state.extractor.close()
374
+ st.session_state.login_status = "not_started"
375
+ st.rerun()
376
+
377
+ elif st.session_state.login_status == "completed":
378
+ st.success("βœ… Logged in to Facebook")
379
+
380
+ # Group extraction
381
+ st.subheader("πŸ“ Group Information")
382
+ group_url = st.text_input("Facebook Group URL", placeholder="https://www.facebook.com/groups/groupname/")
383
+ max_scrolls = st.slider("Number of scrolls", 5, 20, 10)
384
+
385
+ if st.button("πŸš€ Extract Group Data", type="primary", use_container_width=True):
386
+ if st.session_state.login_status != "completed":
387
+ st.error("❌ Please login to Facebook first")
388
+ elif not group_url or "facebook.com/groups/" not in group_url:
389
+ st.error("❌ Please enter a valid Facebook group URL")
390
+ else:
391
+ with st.spinner("🌐 Extracting group data..."):
392
+ group_data = st.session_state.extractor.extract_group_data(group_url, max_scrolls)
393
+ if group_data.get("status") == "success":
394
+ st.session_state.group_data = group_data
395
+ vectorstore, chunks = process_group_data(group_data)
396
+ if vectorstore:
397
+ st.session_state.chatbot = create_chatbot(vectorstore)
398
+ st.session_state.chat_history = []
399
+ st.success(f"βœ… Successfully extracted {len(group_data['posts'])} posts!")
400
+
401
+ # Main content
402
+ col1, col2 = st.columns([1, 1])
403
+
404
+ with col1:
405
+ st.header("πŸ“Š Status")
406
+
407
+ if st.session_state.login_status == "not_started":
408
+ st.info("πŸ” Start manual login to begin")
409
+ elif st.session_state.login_status == "in_progress":
410
+ st.warning("πŸ”„ Complete login in the browser")
411
+ elif st.session_state.login_status == "completed":
412
+ st.success("βœ… Ready to extract group data")
413
+
414
+ if st.session_state.group_data:
415
+ group_info = st.session_state.group_data.get("group_info", {})
416
+ posts = st.session_state.group_data.get("posts", [])
417
+
418
+ st.subheader("🏷️ Group Info")
419
+ if group_info.get("name"):
420
+ st.write(f"**Name:** {group_info['name']}")
421
+ st.write(f"**Posts Extracted:** {len(posts)}")
422
+
423
+ with col2:
424
+ st.header("πŸ’¬ Chat")
425
+
426
+ if st.session_state.chatbot and st.session_state.group_data:
427
+ for i, chat in enumerate(st.session_state.chat_history):
428
+ with st.chat_message("user"):
429
+ st.write(chat["question"])
430
+ with st.chat_message("assistant"):
431
+ st.write(chat["answer"])
432
+
433
+ user_question = st.chat_input("Ask about the group...")
434
+ if user_question:
435
+ with st.chat_message("user"):
436
+ st.write(user_question)
437
+ with st.chat_message("assistant"):
438
+ with st.spinner("πŸ€” Analyzing..."):
439
+ try:
440
+ response = st.session_state.chatbot.invoke({"question": user_question})
441
+ answer = response.get("answer", "No response generated.")
442
+ st.write(answer)
443
+ st.session_state.chat_history.append({
444
+ "question": user_question,
445
+ "answer": answer
446
+ })
447
+ except Exception as e:
448
+ st.error(f"Error: {str(e)}")
449
+ else:
450
+ st.info("πŸ“Š Extract group data first to start chatting")
451
+
452
+ if __name__ == "__main__":
453
+ main()
linkdin_deploy.py ADDED
@@ -0,0 +1,246 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # linkdin_deploy.py
2
+ import streamlit as st
3
+ import requests
4
+ from bs4 import BeautifulSoup
5
+ from langchain_text_splitters import CharacterTextSplitter
6
+ from langchain_community.embeddings import HuggingFaceEmbeddings
7
+ from langchain_community.vectorstores import FAISS
8
+ from langchain.memory import ConversationBufferMemory
9
+ from langchain.chains import ConversationalRetrievalChain
10
+ from langchain_core.documents import Document
11
+ from langchain_community.llms import HuggingFaceHub
12
+ import re
13
+ import time
14
+
15
+ # Configure the page
16
+ st.set_page_config(
17
+ page_title="LinkedIn AI Analyzer",
18
+ page_icon="πŸ’Ό",
19
+ layout="wide"
20
+ )
21
+
22
+ st.markdown("""
23
+ <style>
24
+ .stApp { background-color: #0e1117; color: white; }
25
+ .main-header { background: #0077B5; color: white; padding: 1.5rem; border-radius: 8px; margin-bottom: 1.5rem; text-align: center; }
26
+ .stButton>button { background-color: #0077b5; color: white; border: none; border-radius: 4px; padding: 8px 16px; width: 100%; }
27
+ .stTextInput>div>div>input { background-color: #262730; color: white; border: 1px solid #555; }
28
+ .stSelectbox>div>div>select { background-color: #262730; color: white; }
29
+ .stTextArea textarea { background-color: #262730; color: white; }
30
+ </style>
31
+ """, unsafe_allow_html=True)
32
+
33
+ def get_embeddings():
34
+ try:
35
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
36
+ return embeddings
37
+ except Exception as e:
38
+ st.error(f"❌ Failed to load embeddings: {e}")
39
+ return None
40
+
41
+ def get_llm():
42
+ api_key = st.session_state.get('hf_api_key')
43
+ if not api_key:
44
+ st.error("❌ HuggingFace API Key not found")
45
+ return None
46
+
47
+ try:
48
+ llm = HuggingFaceHub(
49
+ repo_id="google/flan-t5-large",
50
+ huggingfacehub_api_token=api_key,
51
+ model_kwargs={"temperature": 0.7, "max_length": 500}
52
+ )
53
+ return llm
54
+ except Exception as e:
55
+ st.error(f"❌ HuggingFace error: {e}")
56
+ return None
57
+
58
+ def extract_linkedin_data(url, data_type):
59
+ try:
60
+ headers = {
61
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
62
+ }
63
+
64
+ response = requests.get(url, headers=headers, timeout=15)
65
+ if response.status_code != 200:
66
+ return f"❌ Failed to access page (Status: {response.status_code})"
67
+
68
+ soup = BeautifulSoup(response.text, 'html.parser')
69
+ for script in soup(["script", "style"]):
70
+ script.decompose()
71
+
72
+ text = soup.get_text()
73
+ lines = (line.strip() for line in text.splitlines())
74
+ chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
75
+ text = ' '.join(chunk for chunk in chunks if chunk)
76
+
77
+ paragraphs = text.split('.')
78
+ meaningful_content = [p.strip() for p in paragraphs if len(p.strip()) > 50]
79
+
80
+ if not meaningful_content:
81
+ return "❌ No meaningful content found."
82
+
83
+ if data_type == "profile":
84
+ result = "πŸ‘€ LINKEDIN PROFILE DATA\n\n"
85
+ elif data_type == "company":
86
+ result = "🏒 LINKEDIN COMPANY DATA\n\n"
87
+ else:
88
+ result = "πŸ“ LINKEDIN POST DATA\n\n"
89
+
90
+ result += f"πŸ”— URL: {url}\n"
91
+ result += "="*50 + "\n\n"
92
+
93
+ for i, content in enumerate(meaningful_content[:10], 1):
94
+ result += f"{i}. {content}\n\n"
95
+
96
+ result += "="*50 + "\n"
97
+ result += f"βœ… Extracted {len(meaningful_content)} content blocks\n"
98
+
99
+ return result
100
+
101
+ except Exception as e:
102
+ return f"❌ Error: {str(e)}"
103
+
104
+ def get_text_chunks(text):
105
+ if not text.strip():
106
+ return []
107
+ splitter = CharacterTextSplitter(separator="\n", chunk_size=1000, chunk_overlap=200)
108
+ return splitter.split_text(text)
109
+
110
+ def get_vectorstore(text_chunks):
111
+ if not text_chunks:
112
+ return None
113
+ documents = [Document(page_content=chunk) for chunk in text_chunks]
114
+ embeddings = get_embeddings()
115
+ if embeddings is None:
116
+ return None
117
+ vectorstore = FAISS.from_documents(documents, embeddings)
118
+ return vectorstore
119
+
120
+ def get_conversation_chain(vectorstore):
121
+ if vectorstore is None:
122
+ return None
123
+ try:
124
+ llm = get_llm()
125
+ if llm is None:
126
+ return None
127
+
128
+ memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
129
+ chain = ConversationalRetrievalChain.from_llm(
130
+ llm=llm,
131
+ retriever=vectorstore.as_retriever(search_kwargs={"k": 3}),
132
+ memory=memory,
133
+ return_source_documents=True
134
+ )
135
+ return chain
136
+ except Exception as e:
137
+ st.error(f"❌ Error: {e}")
138
+ return None
139
+
140
+ def main():
141
+ st.markdown("""
142
+ <div class="main-header">
143
+ <h1>πŸ’Ό LinkedIn AI Analyzer</h1>
144
+ <p>Free Version - Powered by HuggingFace</p>
145
+ </div>
146
+ """, unsafe_allow_html=True)
147
+
148
+ if st.button("← Back to Main Dashboard", use_container_width=True):
149
+ st.info("Return to main dashboard")
150
+ return
151
+
152
+ if not st.session_state.get('hf_api_key'):
153
+ st.error("❌ API Key not configured. Please go back to main dashboard.")
154
+ return
155
+
156
+ # Initialize session state
157
+ if "conversation" not in st.session_state:
158
+ st.session_state.conversation = None
159
+ if "chat_history" not in st.session_state:
160
+ st.session_state.chat_history = []
161
+ if "processed" not in st.session_state:
162
+ st.session_state.processed = False
163
+ if "extracted_data" not in st.session_state:
164
+ st.session_state.extracted_data = ""
165
+
166
+ # Sidebar
167
+ with st.sidebar:
168
+ st.success("βœ… HuggingFace API Active")
169
+
170
+ data_type = st.selectbox("πŸ“Š Content Type", ["profile", "company", "post"])
171
+
172
+ url_placeholder = {
173
+ "profile": "https://www.linkedin.com/in/username/",
174
+ "company": "https://www.linkedin.com/company/companyname/",
175
+ "post": "https://www.linkedin.com/posts/username_postid/"
176
+ }
177
+
178
+ linkedin_url = st.text_input("🌐 LinkedIn URL", placeholder=url_placeholder[data_type])
179
+
180
+ if st.button("πŸš€ Extract & Analyze", type="primary"):
181
+ if not linkedin_url.strip():
182
+ st.warning("Please enter a LinkedIn URL")
183
+ else:
184
+ with st.spinner("πŸ”„ Extracting data..."):
185
+ extracted_data = extract_linkedin_data(linkedin_url, data_type)
186
+
187
+ if extracted_data and not extracted_data.startswith("❌"):
188
+ chunks = get_text_chunks(extracted_data)
189
+ if chunks:
190
+ vectorstore = get_vectorstore(chunks)
191
+ conversation = get_conversation_chain(vectorstore)
192
+ if conversation:
193
+ st.session_state.conversation = conversation
194
+ st.session_state.processed = True
195
+ st.session_state.extracted_data = extracted_data
196
+ st.session_state.chat_history = []
197
+ st.success(f"βœ… Ready to analyze {len(chunks)} content chunks!")
198
+ else:
199
+ st.error("❌ Failed to initialize AI")
200
+ else:
201
+ st.error("❌ No content extracted")
202
+ else:
203
+ st.error(extracted_data)
204
+
205
+ # Main content
206
+ col1, col2 = st.columns([2, 1])
207
+
208
+ with col1:
209
+ st.markdown("### πŸ’¬ Chat")
210
+
211
+ for i, chat in enumerate(st.session_state.chat_history):
212
+ if chat["role"] == "user":
213
+ st.markdown(f"**πŸ‘€ You:** {chat['content']}")
214
+ elif chat["role"] == "assistant":
215
+ if chat["content"]:
216
+ st.markdown(f"**πŸ€– Assistant:** {chat['content']}")
217
+
218
+ if st.session_state.processed:
219
+ user_input = st.chat_input("Ask about the LinkedIn data...")
220
+ if user_input:
221
+ st.session_state.chat_history.append({"role": "user", "content": user_input})
222
+ with st.spinner("πŸ€” Analyzing..."):
223
+ try:
224
+ if st.session_state.conversation:
225
+ response = st.session_state.conversation.invoke({"question": user_input})
226
+ answer = response.get("answer", "No response generated.")
227
+ st.session_state.chat_history.append({"role": "assistant", "content": answer})
228
+ st.rerun()
229
+ except Exception as e:
230
+ st.session_state.chat_history.append({"role": "assistant", "content": f"❌ Error: {str(e)}"})
231
+ st.rerun()
232
+ else:
233
+ st.info("πŸ‘‹ Enter a LinkedIn URL and click 'Extract & Analyze' to start")
234
+
235
+ with col2:
236
+ if st.session_state.processed:
237
+ st.markdown("### πŸ“Š Overview")
238
+ data = st.session_state.extracted_data
239
+ chunks = get_text_chunks(data)
240
+
241
+ st.metric("Content Type", data_type.title())
242
+ st.metric("Text Chunks", len(chunks))
243
+ st.metric("Characters", f"{len(data):,}")
244
+
245
+ if __name__ == "__main__":
246
+ main()
main_dashboard.py ADDED
@@ -0,0 +1,238 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # main_dashboard.py
2
+ import streamlit as st
3
+ import subprocess
4
+ import sys
5
+ import os
6
+ import webbrowser
7
+ import time
8
+ import threading
9
+
10
+ def check_port_in_use(port: int) -> bool:
11
+ import socket
12
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
13
+ s.settimeout(1)
14
+ return s.connect_ex(('localhost', port)) == 0
15
+
16
+ def get_available_port(start_port: int = 8601) -> int:
17
+ port = start_port
18
+ while check_port_in_use(port):
19
+ port += 1
20
+ return port
21
+
22
+ def run_streamlit_app_in_thread(app_file: str, port: int):
23
+ def run_app():
24
+ try:
25
+ subprocess.run([
26
+ sys.executable, "-m", "streamlit", "run",
27
+ app_file,
28
+ "--server.port", str(port),
29
+ "--server.headless", "true",
30
+ "--browser.serverAddress", "localhost"
31
+ ], check=True)
32
+ except subprocess.CalledProcessError as e:
33
+ print(f"Error running {app_file}: {e}")
34
+
35
+ thread = threading.Thread(target=run_app, daemon=True)
36
+ thread.start()
37
+ return thread
38
+
39
+ def main():
40
+ st.set_page_config(
41
+ page_title="Social Media Data Extractor",
42
+ page_icon="πŸ”",
43
+ layout="wide",
44
+ initial_sidebar_state="expanded"
45
+ )
46
+
47
+ st.markdown("""
48
+ <style>
49
+ .stApp { background-color: #0e1117; color: white; }
50
+ .main-header { background: linear-gradient(135deg, #1a2a6c, #b21f1f); color: white; padding: 2rem; border-radius: 10px; text-align: center; margin-bottom: 2rem; }
51
+ .platform-card { background-color: #262730; padding: 1.5rem; border-radius: 10px; border-left: 4px solid; margin: 1rem 0; height: 280px; }
52
+ .linkedin-card { border-left-color: #0077B5; }
53
+ .facebook-card { border-left-color: #1877F2; }
54
+ .facebook-pro-card { border-left-color: #FF6B35; }
55
+ .feature-list { margin: 1rem 0; padding-left: 1.5rem; flex-grow: 1; }
56
+ .api-key-section { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); padding: 1.5rem; border-radius: 10px; margin-bottom: 2rem; }
57
+ .status-box { background-color: #1a1a2e; padding: 1rem; border-radius: 5px; margin: 0.5rem 0; min-height: 120px; }
58
+ </style>
59
+ """, unsafe_allow_html=True)
60
+
61
+ # API Key Section
62
+ st.markdown("""
63
+ <div class="api-key-section">
64
+ <h2 style="margin:0; color:white;">πŸ”‘ HuggingFace API Key Required</h2>
65
+ <p style="margin:0; color:white; opacity:0.9;">Get FREE API key from: <a href="https://huggingface.co/settings/tokens" target="_blank" style="color:white; text-decoration:underline;">huggingface.co/settings/tokens</a></p>
66
+ </div>
67
+ """, unsafe_allow_html=True)
68
+
69
+ # API Configuration
70
+ hf_api_key = st.text_input(
71
+ "πŸ€— Enter Your HuggingFace API Key",
72
+ type="password",
73
+ placeholder="hf_xxxxxxxxxxxxxxxx",
74
+ help="Get FREE API key from huggingface.co/settings/tokens"
75
+ )
76
+
77
+ # Store API key
78
+ if hf_api_key:
79
+ st.session_state.hf_api_key = hf_api_key
80
+ st.success("βœ… HuggingFace API Key saved! You can now launch extractors.")
81
+
82
+ # Header
83
+ st.markdown("""
84
+ <div class="main-header">
85
+ <h1 style="margin:0;">πŸ” Social Media Data Extractor</h1>
86
+ <p style="margin:0; opacity: 0.9;">100% Free - No Local Setup Required</p>
87
+ </div>
88
+ """, unsafe_allow_html=True)
89
+
90
+ # Initialize session state
91
+ if 'linkedin_port' not in st.session_state:
92
+ st.session_state.linkedin_port = None
93
+ if 'facebook_port' not in st.session_state:
94
+ st.session_state.facebook_port = None
95
+ if 'facebook_pro_port' not in st.session_state:
96
+ st.session_state.facebook_pro_port = None
97
+
98
+ # Platform selection
99
+ st.markdown("## πŸš€ Launch Extractors")
100
+
101
+ col1, col2, col3 = st.columns(3)
102
+
103
+ with col1:
104
+ st.markdown("""
105
+ <div class="platform-card linkedin-card">
106
+ <h3>πŸ’Ό LinkedIn Extractor</h3>
107
+ <ul class="feature-list">
108
+ <li>No login required</li>
109
+ <li>Profile, company, and post analysis</li>
110
+ <li>Quick data extraction</li>
111
+ <li>AI-powered insights</li>
112
+ <li>100% Free</li>
113
+ </ul>
114
+ </div>
115
+ """, unsafe_allow_html=True)
116
+
117
+ if st.button("πŸš€ Launch LinkedIn Extractor", key="linkedin_btn", use_container_width=True):
118
+ if not st.session_state.get('hf_api_key'):
119
+ st.error("❌ Please enter your HuggingFace API Key first")
120
+ else:
121
+ if os.path.exists("linkdin_deploy.py"):
122
+ port = get_available_port(8601)
123
+ st.session_state.linkedin_port = port
124
+ with st.spinner(f"Starting LinkedIn extractor..."):
125
+ run_streamlit_app_in_thread("linkdin_deploy.py", port)
126
+ time.sleep(3)
127
+ webbrowser.open_new_tab(f"http://localhost:{port}")
128
+ st.success(f"βœ… LinkedIn extractor launched!")
129
+ else:
130
+ st.error("❌ linkdin_deploy.py file not found!")
131
+
132
+ with col2:
133
+ st.markdown("""
134
+ <div class="platform-card facebook-card">
135
+ <h3>πŸ“˜ Facebook Extractor</h3>
136
+ <ul class="feature-list">
137
+ <li>Manual login required</li>
138
+ <li>Group post extraction</li>
139
+ <li>Works with private groups</li>
140
+ <li>AI conversation analysis</li>
141
+ <li>100% Free</li>
142
+ </ul>
143
+ </div>
144
+ """, unsafe_allow_html=True)
145
+
146
+ if st.button("πŸš€ Launch Facebook Extractor", key="facebook_btn", use_container_width=True):
147
+ if not st.session_state.get('hf_api_key'):
148
+ st.error("❌ Please enter your HuggingFace API Key first")
149
+ else:
150
+ if os.path.exists("facebook_deploy.py"):
151
+ port = get_available_port(8701)
152
+ st.session_state.facebook_port = port
153
+ with st.spinner(f"Starting Facebook extractor..."):
154
+ run_streamlit_app_in_thread("facebook_deploy.py", port)
155
+ time.sleep(3)
156
+ webbrowser.open_new_tab(f"http://localhost:{port}")
157
+ st.success(f"βœ… Facebook extractor launched!")
158
+ else:
159
+ st.error("❌ facebook_deploy.py file not found!")
160
+
161
+ with col3:
162
+ st.markdown("""
163
+ <div class="platform-card facebook-pro-card">
164
+ <h3>πŸ”₯ Facebook Extractor 2.0</h3>
165
+ <ul class="feature-list">
166
+ <li>Enhanced Facebook data extraction</li>
167
+ <li>More powerful algorithms</li>
168
+ <li>Faster processing speed</li>
169
+ <li>Advanced AI analysis</li>
170
+ <li>100% Free</li>
171
+ </ul>
172
+ </div>
173
+ """, unsafe_allow_html=True)
174
+
175
+ if st.button("πŸš€ Launch Facebook Extractor 2.0", key="facebook_pro_btn", use_container_width=True):
176
+ if not st.session_state.get('hf_api_key'):
177
+ st.error("❌ Please enter your HuggingFace API Key first")
178
+ else:
179
+ if os.path.exists("let_deploy.py"):
180
+ port = get_available_port(8801)
181
+ st.session_state.facebook_pro_port = port
182
+ with st.spinner(f"Starting Facebook Extractor 2.0..."):
183
+ run_streamlit_app_in_thread("let_deploy.py", port)
184
+ time.sleep(3)
185
+ webbrowser.open_new_tab(f"http://localhost:{port}")
186
+ st.success(f"βœ… Facebook Extractor 2.0 launched!")
187
+ else:
188
+ st.error("❌ let_deploy.py file not found!")
189
+
190
+ # Status
191
+ st.markdown("---")
192
+ st.subheader("πŸ”„ Current Status")
193
+
194
+ status_col1, status_col2, status_col3 = st.columns(3)
195
+
196
+ with status_col1:
197
+ st.markdown("### πŸ’Ό LinkedIn")
198
+ if st.session_state.linkedin_port:
199
+ st.success(f"βœ… Running on port {st.session_state.linkedin_port}")
200
+ else:
201
+ st.info("πŸ’€ Not running")
202
+
203
+ with status_col2:
204
+ st.markdown("### πŸ“˜ Facebook")
205
+ if st.session_state.facebook_port:
206
+ st.success(f"βœ… Running on port {st.session_state.facebook_port}")
207
+ else:
208
+ st.info("πŸ’€ Not running")
209
+
210
+ with status_col3:
211
+ st.markdown("### πŸ”₯ Facebook 2.0")
212
+ if st.session_state.facebook_pro_port:
213
+ st.success(f"βœ… Running on port {st.session_state.facebook_pro_port}")
214
+ else:
215
+ st.info("πŸ’€ Not running")
216
+
217
+ # Instructions
218
+ with st.expander("πŸ“‹ How to Use", expanded=True):
219
+ st.markdown("""
220
+ 1. **Get FREE API Key:**
221
+ - Go to https://huggingface.co/settings/tokens
222
+ - Create account (FREE)
223
+ - Click "New token"
224
+ - Copy your token (starts with hf_)
225
+
226
+ 2. **Enter API Key above**
227
+
228
+ 3. **Click any extractor to launch**
229
+
230
+ 4. **For Streamlit Cloud:**
231
+ - Add this to Secrets:
232
+ ```
233
+ HUGGINGFACEHUB_API_TOKEN = "your_token_here"
234
+ ```
235
+ """)
236
+
237
+ if __name__ == "__main__":
238
+ main()
requirements.txt CHANGED
@@ -1,3 +1,15 @@
1
- altair
2
- pandas
3
- streamlit
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ streamlit>=1.28.0
2
+ selenium>=4.15.0
3
+ beautifulsoup4>=4.12.0
4
+ requests>=2.31.0
5
+ langchain>=0.0.350
6
+ langchain-community>=0.0.10
7
+ langchain-text-splitters>=0.0.1
8
+ faiss-cpu>=1.7.0
9
+ sentence-transformers>=2.2.0
10
+ transformers>=4.35.0
11
+ torch>=2.0.0
12
+ accelerate>=0.24.0
13
+ huggingface-hub>=0.19.0
14
+ webdriver-manager>=4.0.0
15
+ pydantic>=2.0.0