Refat81 commited on
Commit
6bf8f95
Β·
verified Β·
1 Parent(s): 77ccd8c

Update pages/facebook_extractor_pro.py

Browse files
Files changed (1) hide show
  1. pages/facebook_extractor_pro.py +233 -83
pages/facebook_extractor_pro.py CHANGED
@@ -1,4 +1,4 @@
1
- # let_deploy.py
2
  import streamlit as st
3
  import time
4
  from bs4 import BeautifulSoup
@@ -81,6 +81,20 @@ st.markdown("""
81
  background: #374151;
82
  color: white;
83
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  </style>
85
  """, unsafe_allow_html=True)
86
 
@@ -175,6 +189,10 @@ class FacebookDataExtractor:
175
  og_description = soup.find('meta', property='og:description')
176
  og_image = soup.find('meta', property='og:image')
177
 
 
 
 
 
178
  # Structure the extracted data
179
  extracted_data = {
180
  "page_info": {
@@ -183,7 +201,11 @@ class FacebookDataExtractor:
183
  "og_title": og_title['content'] if og_title else "",
184
  "og_description": og_description['content'] if og_description else "",
185
  "og_image": og_image['content'] if og_image else "",
186
- "url": url
 
 
 
 
187
  },
188
  "content_blocks": self._extract_content_blocks(clean_text),
189
  "extraction_time": datetime.now().isoformat(),
@@ -208,18 +230,42 @@ class FacebookDataExtractor:
208
  # Split into paragraphs/sentences
209
  paragraphs = [p.strip() for p in text.split('.') if p.strip()]
210
 
211
- for i, paragraph in enumerate(paragraphs[:20]): # Limit to first 20 paragraphs
212
  if len(paragraph) > 30: # Only include substantial content
 
 
 
213
  block = {
214
  "id": i + 1,
215
  "content": paragraph,
216
  "length": len(paragraph),
217
- "word_count": len(paragraph.split())
 
 
 
 
218
  }
219
  blocks.append(block)
220
 
221
  return blocks
222
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
223
  def analyze_facebook_url(self, url: str) -> str:
224
  """Analyze Facebook URL and return structured information"""
225
  url_lower = url.lower()
@@ -232,6 +278,8 @@ class FacebookDataExtractor:
232
  return "Facebook Event (Limited access)"
233
  elif 'profile' in url_lower or 'user' in url_lower:
234
  return "Facebook Profile (Limited access - requires login)"
 
 
235
  else:
236
  return "Facebook Content (General)"
237
 
@@ -241,29 +289,49 @@ def process_extracted_data(extracted_data: dict):
241
  return None, []
242
 
243
  # Combine all content into a single text
244
- all_text = f"Page Title: {extracted_data['page_info']['title']}\n\n"
 
 
 
 
 
245
 
246
- if extracted_data['page_info']['description']:
247
- all_text += f"Description: {extracted_data['page_info']['description']}\n\n"
248
 
249
- if extracted_data['page_info']['og_description']:
250
- all_text += f"OpenGraph Description: {extracted_data['page_info']['og_description']}\n\n"
251
 
252
- all_text += f"Data Type: {extracted_data['data_type']}\n"
 
253
  all_text += f"Extraction Time: {extracted_data['extraction_time']}\n"
254
- all_text += f"Content Blocks: {len(extracted_data['content_blocks'])}\n\n"
 
255
 
256
- # Add content blocks
 
 
 
257
  for i, block in enumerate(extracted_data['content_blocks']):
258
- all_text += f"--- Content Block {i+1} ---\n"
259
- all_text += f"Words: {block['word_count']} | Characters: {block['length']}\n"
260
- all_text += f"Content: {block['content']}\n\n"
 
 
 
 
 
 
 
 
 
 
261
 
262
  # Split into chunks
263
  splitter = CharacterTextSplitter(
264
  separator="\n",
265
- chunk_size=800,
266
- chunk_overlap=150,
267
  length_function=len
268
  )
269
 
@@ -296,7 +364,7 @@ def create_chatbot(vectorstore):
296
 
297
  chain = ConversationalRetrievalChain.from_llm(
298
  llm=llm,
299
- retriever=vectorstore.as_retriever(search_kwargs={"k": 3}),
300
  memory=memory,
301
  return_source_documents=True,
302
  output_key="answer"
@@ -327,6 +395,48 @@ def display_status_indicator(status: str, message: str):
327
  </div>
328
  """, unsafe_allow_html=True)
329
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
330
  def main():
331
  """Main application function"""
332
 
@@ -334,7 +444,7 @@ def main():
334
  st.markdown("""
335
  <div class="main-header">
336
  <h1 style="margin:0; font-size: 2.5rem;">πŸ”₯ Facebook Extractor 2.0</h1>
337
- <p style="margin:0; opacity: 0.9; font-size: 1.2rem;">Enhanced Version - AI-Powered Analysis</p>
338
  </div>
339
  """, unsafe_allow_html=True)
340
 
@@ -373,7 +483,7 @@ def main():
373
 
374
  # Sidebar
375
  with st.sidebar:
376
- st.markdown("### βš™οΈ Configuration")
377
 
378
  # URL input
379
  st.subheader("πŸ”— Facebook URL")
@@ -386,27 +496,35 @@ def main():
386
  # Data type selection
387
  data_type = st.selectbox(
388
  "Content Type",
389
- ["page", "group", "profile", "event", "post"],
390
  help="Select the type of Facebook content"
391
  )
392
 
393
  # Extraction settings
394
- st.subheader("πŸ”§ Settings")
395
  analyze_depth = st.select_slider(
396
  "Analysis Depth",
397
- options=["Basic", "Standard", "Detailed"],
398
- value="Standard"
 
 
 
 
 
 
 
 
399
  )
400
 
401
  # Extract button
402
- if st.button("πŸš€ Extract & Analyze", type="primary", use_container_width=True):
403
  if not facebook_url.strip():
404
  st.warning("⚠️ Please enter a Facebook URL")
405
  elif not facebook_url.startswith('https://www.facebook.com/'):
406
  st.error("❌ Please enter a valid Facebook URL")
407
  else:
408
  st.session_state.processing = True
409
- with st.spinner("πŸ”„ Extracting data from Facebook..."):
410
  extracted_data = st.session_state.extractor.extract_public_data(facebook_url, data_type)
411
 
412
  if extracted_data.get("status") == "success":
@@ -419,6 +537,7 @@ def main():
419
  st.session_state.chatbot = create_chatbot(vectorstore)
420
  st.session_state.chat_history = []
421
  st.success(f"βœ… Successfully processed {len(chunks)} content chunks!")
 
422
  else:
423
  st.error("❌ Failed to process extracted data")
424
  else:
@@ -430,150 +549,181 @@ def main():
430
  # Chat management
431
  if st.session_state.chatbot and st.session_state.extracted_data:
432
  st.markdown("---")
433
- st.subheader("πŸ’¬ Chat Management")
434
- if st.button("πŸ—‘οΈ Clear Chat History", type="secondary", use_container_width=True):
435
- clear_chat_history()
 
 
 
 
 
436
 
437
  # Main content area
438
  col1, col2 = st.columns([1, 1])
439
 
440
  with col1:
441
- st.markdown("### πŸ“Š Extraction Results")
442
 
443
  if st.session_state.processing:
444
- display_status_indicator("warning", "πŸ”„ Processing...")
445
- st.info("Extracting data from Facebook. This may take a few seconds.")
446
 
447
  elif st.session_state.extracted_data:
448
  data = st.session_state.extracted_data
449
  page_info = data['page_info']
450
  content_blocks = data['content_blocks']
451
 
452
- display_status_indicator("success", "βœ… Extraction Complete")
 
 
 
453
 
454
  # Display page info
455
- st.markdown("#### 🏷️ Page Information")
 
 
 
 
 
456
  st.write(f"**Title:** {page_info['title']}")
457
 
458
  if page_info['description']:
459
- st.write(f"**Description:** {page_info['description'][:200]}...")
460
 
461
  if page_info['og_description']:
462
- st.write(f"**OG Description:** {page_info['og_description'][:200]}...")
463
 
464
  st.write(f"**URL:** {page_info['url']}")
465
  st.write(f"**Data Type:** {data['data_type'].title()}")
466
  st.write(f"**Content Blocks:** {len(content_blocks)}")
467
  st.write(f"**Extraction Time:** {data['extraction_time'][:19]}")
 
468
 
469
- # Display sample content
470
- st.markdown("#### πŸ“ Sample Content")
471
- for i, block in enumerate(content_blocks[:3]):
472
- with st.expander(f"Content Block {i+1} ({block['word_count']} words)"):
473
- st.write(block['content'])
 
474
 
475
- if len(content_blocks) > 3:
476
- st.info(f"πŸ“„ And {len(content_blocks) - 3} more content blocks...")
477
 
478
  else:
479
- display_status_indicator("warning", "⏳ Ready for Extraction")
480
  st.info("""
481
- **To get started:**
482
- 1. Enter a Facebook URL in the sidebar
483
- 2. Select content type
484
- 3. Click "Extract & Analyze"
 
 
 
485
 
486
- **Supported URLs:**
487
- - 🏒 Facebook Pages (best results)
488
- - πŸ“˜ Public Groups (limited)
489
- - πŸ‘€ Public Profiles (limited)
490
- - πŸŽ‰ Events (limited)
491
- - πŸ“ Posts (limited)
 
492
 
493
- **Note:** This version extracts public data only.
494
- Private content requires manual login (available in local deployment).
 
 
 
495
  """)
496
 
497
  with col2:
498
- st.markdown("### πŸ’¬ AI Analysis")
499
 
500
  if st.session_state.chatbot and st.session_state.extracted_data:
501
  # Display chat history
502
  for i, chat in enumerate(st.session_state.chat_history):
503
  if chat["role"] == "user":
504
- st.markdown(f'<div class="chat-message user-message"><strong>πŸ‘€ You:</strong> {chat["content"]}</div>',
505
  unsafe_allow_html=True)
506
  elif chat["role"] == "assistant":
507
- st.markdown(f'<div class="chat-message assistant-message"><strong>πŸ€– Assistant:</strong> {chat["content"]}</div>',
508
  unsafe_allow_html=True)
509
 
510
  # Chat input
511
- user_input = st.chat_input("Ask about the Facebook data...")
512
 
513
  if user_input:
514
  # Add user message
515
  st.session_state.chat_history.append({"role": "user", "content": user_input})
516
 
517
  # Generate AI response
518
- with st.spinner("πŸ€” Analyzing..."):
519
  try:
520
  response = st.session_state.chatbot.invoke({"question": user_input})
521
- answer = response.get("answer", "I couldn't generate a response based on the available data.")
522
 
523
  st.session_state.chat_history.append({"role": "assistant", "content": answer})
524
  st.rerun()
525
  except Exception as e:
526
- error_msg = f"❌ Error generating response: {str(e)}"
527
  st.session_state.chat_history.append({"role": "assistant", "content": error_msg})
528
  st.rerun()
529
 
530
- # Suggested questions
531
  if not st.session_state.chat_history:
532
- st.markdown("#### πŸ’‘ Suggested Questions")
533
  suggestions = [
534
- "Summarize the main content of this page",
535
- "What is this page primarily about?",
536
- "Extract key information from the content",
537
- "What are the main topics discussed?",
538
- "Provide an overview of this Facebook content"
 
539
  ]
540
 
541
  for suggestion in suggestions:
542
- if st.button(suggestion, key=f"suggest_{suggestion}", use_container_width=True):
543
- st.info(f"πŸ’‘ Try asking: '{suggestion}'")
544
 
545
  elif st.session_state.extracted_data:
546
- st.info("πŸ’¬ Extract data first to start chatting with AI")
547
  else:
548
- st.info("πŸ” Extract Facebook data to enable AI analysis")
549
 
550
- # Features section
551
  st.markdown("---")
552
- st.markdown("### πŸš€ Enhanced Features")
553
 
554
- feature_cols = st.columns(3)
555
 
556
  with feature_cols[0]:
557
  st.markdown("""
558
  <div class="feature-card">
559
- <h4>πŸ” Smart Extraction</h4>
560
- <p>Advanced algorithms for better content recognition and structure analysis</p>
561
  </div>
562
  """, unsafe_allow_html=True)
563
 
564
  with feature_cols[1]:
565
  st.markdown("""
566
  <div class="feature-card">
567
- <h4>πŸ€– AI-Powered Analysis</h4>
568
- <p>HuggingFace integration for intelligent content understanding and Q&A</p>
569
  </div>
570
  """, unsafe_allow_html=True)
571
 
572
  with feature_cols[2]:
573
  st.markdown("""
574
  <div class="feature-card">
575
- <h4>πŸ’¬ Contextual Memory</h4>
576
- <p>Maintains conversation context for more meaningful interactions</p>
 
 
 
 
 
 
 
 
577
  </div>
578
  """, unsafe_allow_html=True)
579
 
 
1
+ # pages/facebook_extractor_pro.py
2
  import streamlit as st
3
  import time
4
  from bs4 import BeautifulSoup
 
81
  background: #374151;
82
  color: white;
83
  }
84
+ .extraction-card {
85
+ background: linear-gradient(135deg, #1e3c72, #2a5298);
86
+ padding: 1.5rem;
87
+ border-radius: 10px;
88
+ margin: 1rem 0;
89
+ border-left: 4px solid #FF6B35;
90
+ }
91
+ .metric-card {
92
+ background: #262730;
93
+ padding: 1rem;
94
+ border-radius: 8px;
95
+ text-align: center;
96
+ border: 1px solid #444;
97
+ }
98
  </style>
99
  """, unsafe_allow_html=True)
100
 
 
189
  og_description = soup.find('meta', property='og:description')
190
  og_image = soup.find('meta', property='og:image')
191
 
192
+ # Extract additional metadata
193
+ keywords = soup.find('meta', attrs={'name': 'keywords'})
194
+ viewport = soup.find('meta', attrs={'name': 'viewport'})
195
+
196
  # Structure the extracted data
197
  extracted_data = {
198
  "page_info": {
 
201
  "og_title": og_title['content'] if og_title else "",
202
  "og_description": og_description['content'] if og_description else "",
203
  "og_image": og_image['content'] if og_image else "",
204
+ "keywords": keywords['content'] if keywords else "",
205
+ "viewport": viewport['content'] if viewport else "",
206
+ "url": url,
207
+ "response_code": response.status_code,
208
+ "content_length": len(response.text)
209
  },
210
  "content_blocks": self._extract_content_blocks(clean_text),
211
  "extraction_time": datetime.now().isoformat(),
 
230
  # Split into paragraphs/sentences
231
  paragraphs = [p.strip() for p in text.split('.') if p.strip()]
232
 
233
+ for i, paragraph in enumerate(paragraphs[:25]): # Limit to first 25 paragraphs
234
  if len(paragraph) > 30: # Only include substantial content
235
+ # Analyze content type
236
+ content_type = self._analyze_content_type(paragraph)
237
+
238
  block = {
239
  "id": i + 1,
240
  "content": paragraph,
241
  "length": len(paragraph),
242
+ "word_count": len(paragraph.split()),
243
+ "content_type": content_type,
244
+ "has_links": 'http' in paragraph.lower(),
245
+ "has_mentions": '@' in paragraph,
246
+ "has_hashtags": '#' in paragraph
247
  }
248
  blocks.append(block)
249
 
250
  return blocks
251
 
252
+ def _analyze_content_type(self, text: str) -> str:
253
+ """Analyze the type of content"""
254
+ text_lower = text.lower()
255
+
256
+ if any(word in text_lower for word in ['login', 'sign in', 'password', 'email']):
257
+ return "authentication"
258
+ elif any(word in text_lower for word in ['post', 'share', 'comment', 'like']):
259
+ return "social_interaction"
260
+ elif any(word in text_lower for word in ['group', 'community', 'member']):
261
+ return "community"
262
+ elif any(word in text_lower for word in ['event', 'calendar', 'date', 'time']):
263
+ return "event"
264
+ elif any(word in text_lower for word in ['marketplace', 'buy', 'sell', 'price']):
265
+ return "commerce"
266
+ else:
267
+ return "general"
268
+
269
  def analyze_facebook_url(self, url: str) -> str:
270
  """Analyze Facebook URL and return structured information"""
271
  url_lower = url.lower()
 
278
  return "Facebook Event (Limited access)"
279
  elif 'profile' in url_lower or 'user' in url_lower:
280
  return "Facebook Profile (Limited access - requires login)"
281
+ elif 'marketplace' in url_lower:
282
+ return "Facebook Marketplace (Limited access)"
283
  else:
284
  return "Facebook Content (General)"
285
 
 
289
  return None, []
290
 
291
  # Combine all content into a single text
292
+ all_text = f"FACEBOOK DATA EXTRACTION REPORT\n"
293
+ all_text += "=" * 60 + "\n\n"
294
+
295
+ page_info = extracted_data['page_info']
296
+ all_text += f"πŸ“„ PAGE INFORMATION:\n"
297
+ all_text += f"Title: {page_info['title']}\n"
298
 
299
+ if page_info['description']:
300
+ all_text += f"Description: {page_info['description']}\n"
301
 
302
+ if page_info['og_description']:
303
+ all_text += f"OpenGraph: {page_info['og_description']}\n"
304
 
305
+ all_text += f"URL: {page_info['url']}\n"
306
+ all_text += f"Data Type: {extracted_data['data_type'].upper()}\n"
307
  all_text += f"Extraction Time: {extracted_data['extraction_time']}\n"
308
+ all_text += f"Response Code: {page_info['response_code']}\n"
309
+ all_text += f"Content Length: {page_info['content_length']} characters\n\n"
310
 
311
+ all_text += f"πŸ“Š CONTENT ANALYSIS:\n"
312
+ all_text += f"Total Content Blocks: {len(extracted_data['content_blocks'])}\n\n"
313
+
314
+ # Add content blocks with enhanced information
315
  for i, block in enumerate(extracted_data['content_blocks']):
316
+ all_text += f"--- BLOCK {i+1} ---\n"
317
+ all_text += f"Type: {block['content_type'].upper()}\n"
318
+ all_text += f"Words: {block['word_count']} | Chars: {block['length']}\n"
319
+ all_text += f"Features: "
320
+ features = []
321
+ if block['has_links']: features.append("Links")
322
+ if block['has_mentions']: features.append("Mentions")
323
+ if block['has_hashtags']: features.append("Hashtags")
324
+ all_text += ", ".join(features) if features else "None"
325
+ all_text += f"\nContent: {block['content']}\n\n"
326
+
327
+ all_text += "=" * 60 + "\n"
328
+ all_text += "END OF EXTRACTION REPORT"
329
 
330
  # Split into chunks
331
  splitter = CharacterTextSplitter(
332
  separator="\n",
333
+ chunk_size=1000,
334
+ chunk_overlap=200,
335
  length_function=len
336
  )
337
 
 
364
 
365
  chain = ConversationalRetrievalChain.from_llm(
366
  llm=llm,
367
+ retriever=vectorstore.as_retriever(search_kwargs={"k": 4}),
368
  memory=memory,
369
  return_source_documents=True,
370
  output_key="answer"
 
395
  </div>
396
  """, unsafe_allow_html=True)
397
 
398
+ def display_metrics(extracted_data):
399
+ """Display extraction metrics"""
400
+ if not extracted_data:
401
+ return
402
+
403
+ page_info = extracted_data['page_info']
404
+ content_blocks = extracted_data['content_blocks']
405
+
406
+ col1, col2, col3, col4 = st.columns(4)
407
+
408
+ with col1:
409
+ st.markdown("""
410
+ <div class="metric-card">
411
+ <h3>πŸ“„ Content Blocks</h3>
412
+ <h2>{}</h2>
413
+ </div>
414
+ """.format(len(content_blocks)), unsafe_allow_html=True)
415
+
416
+ with col2:
417
+ st.markdown("""
418
+ <div class="metric-card">
419
+ <h3>πŸ“Š Total Words</h3>
420
+ <h2>{}</h2>
421
+ </div>
422
+ """.format(sum(block['word_count'] for block in content_blocks)), unsafe_allow_html=True)
423
+
424
+ with col3:
425
+ st.markdown("""
426
+ <div class="metric-card">
427
+ <h3>πŸ”— Links Found</h3>
428
+ <h2>{}</h2>
429
+ </div>
430
+ """.format(sum(1 for block in content_blocks if block['has_links'])), unsafe_allow_html=True)
431
+
432
+ with col4:
433
+ st.markdown("""
434
+ <div class="metric-card">
435
+ <h3>⏱️ Response Code</h3>
436
+ <h2>{}</h2>
437
+ </div>
438
+ """.format(page_info['response_code']), unsafe_allow_html=True)
439
+
440
  def main():
441
  """Main application function"""
442
 
 
444
  st.markdown("""
445
  <div class="main-header">
446
  <h1 style="margin:0; font-size: 2.5rem;">πŸ”₯ Facebook Extractor 2.0</h1>
447
+ <p style="margin:0; opacity: 0.9; font-size: 1.2rem;">Professional Version - Enhanced AI-Powered Analysis</p>
448
  </div>
449
  """, unsafe_allow_html=True)
450
 
 
483
 
484
  # Sidebar
485
  with st.sidebar:
486
+ st.markdown("### βš™οΈ Professional Configuration")
487
 
488
  # URL input
489
  st.subheader("πŸ”— Facebook URL")
 
496
  # Data type selection
497
  data_type = st.selectbox(
498
  "Content Type",
499
+ ["page", "group", "profile", "event", "post", "marketplace"],
500
  help="Select the type of Facebook content"
501
  )
502
 
503
  # Extraction settings
504
+ st.subheader("πŸ”§ Advanced Settings")
505
  analyze_depth = st.select_slider(
506
  "Analysis Depth",
507
+ options=["Basic", "Standard", "Detailed", "Comprehensive"],
508
+ value="Detailed"
509
+ )
510
+
511
+ content_limit = st.slider(
512
+ "Max Content Blocks",
513
+ min_value=10,
514
+ max_value=50,
515
+ value=25,
516
+ help="Limit the number of content blocks extracted"
517
  )
518
 
519
  # Extract button
520
+ if st.button("πŸš€ Advanced Extraction", type="primary", use_container_width=True):
521
  if not facebook_url.strip():
522
  st.warning("⚠️ Please enter a Facebook URL")
523
  elif not facebook_url.startswith('https://www.facebook.com/'):
524
  st.error("❌ Please enter a valid Facebook URL")
525
  else:
526
  st.session_state.processing = True
527
+ with st.spinner("πŸ”„ Performing advanced data extraction..."):
528
  extracted_data = st.session_state.extractor.extract_public_data(facebook_url, data_type)
529
 
530
  if extracted_data.get("status") == "success":
 
537
  st.session_state.chatbot = create_chatbot(vectorstore)
538
  st.session_state.chat_history = []
539
  st.success(f"βœ… Successfully processed {len(chunks)} content chunks!")
540
+ st.balloons()
541
  else:
542
  st.error("❌ Failed to process extracted data")
543
  else:
 
549
  # Chat management
550
  if st.session_state.chatbot and st.session_state.extracted_data:
551
  st.markdown("---")
552
+ st.subheader("πŸ’¬ Professional Chat")
553
+ col1, col2 = st.columns(2)
554
+ with col1:
555
+ if st.button("πŸ—‘οΈ Clear History", type="secondary", use_container_width=True):
556
+ clear_chat_history()
557
+ with col2:
558
+ if st.button("πŸ“Š Export Data", type="secondary", use_container_width=True):
559
+ st.info("πŸ“„ Data export feature - Coming soon!")
560
 
561
  # Main content area
562
  col1, col2 = st.columns([1, 1])
563
 
564
  with col1:
565
+ st.markdown("### πŸ“Š Professional Analysis")
566
 
567
  if st.session_state.processing:
568
+ display_status_indicator("warning", "πŸ”„ Advanced Processing...")
569
+ st.info("Performing comprehensive data extraction and analysis...")
570
 
571
  elif st.session_state.extracted_data:
572
  data = st.session_state.extracted_data
573
  page_info = data['page_info']
574
  content_blocks = data['content_blocks']
575
 
576
+ display_status_indicator("success", "βœ… Professional Extraction Complete")
577
+
578
+ # Display metrics
579
+ display_metrics(data)
580
 
581
  # Display page info
582
+ st.markdown("""
583
+ <div class="extraction-card">
584
+ <h4>🏷️ Page Information</h4>
585
+ </div>
586
+ """, unsafe_allow_html=True)
587
+
588
  st.write(f"**Title:** {page_info['title']}")
589
 
590
  if page_info['description']:
591
+ st.write(f"**Description:** {page_info['description']}")
592
 
593
  if page_info['og_description']:
594
+ st.write(f"**OpenGraph:** {page_info['og_description']}")
595
 
596
  st.write(f"**URL:** {page_info['url']}")
597
  st.write(f"**Data Type:** {data['data_type'].title()}")
598
  st.write(f"**Content Blocks:** {len(content_blocks)}")
599
  st.write(f"**Extraction Time:** {data['extraction_time'][:19]}")
600
+ st.write(f"**Response Code:** {page_info['response_code']}")
601
 
602
+ # Display content analysis
603
+ st.markdown("#### πŸ“ Content Analysis")
604
+ for i, block in enumerate(content_blocks[:5]):
605
+ with st.expander(f"Block {i+1} - {block['content_type'].title()} ({block['word_count']} words)"):
606
+ st.write(f"**Content:** {block['content']}")
607
+ st.caption(f"Features: {', '.join(['Links' if block['has_links'] else '', 'Mentions' if block['has_mentions'] else '', 'Hashtags' if block['has_hashtags'] else '']).strip() or 'None'}")
608
 
609
+ if len(content_blocks) > 5:
610
+ st.info(f"πŸ“„ And {len(content_blocks) - 5} more content blocks analyzed...")
611
 
612
  else:
613
+ display_status_indicator("warning", "⏳ Ready for Professional Extraction")
614
  st.info("""
615
+ **πŸš€ Professional Features:**
616
+
617
+ 1. **Advanced URL Analysis** - Intelligent content type detection
618
+ 2. **Enhanced Metadata Extraction** - OpenGraph, keywords, descriptions
619
+ 3. **Content Classification** - Automatic content type categorization
620
+ 4. **Comprehensive Analytics** - Word counts, link analysis, feature detection
621
+ 5. **AI-Powered Insights** - Advanced conversational analysis
622
 
623
+ **πŸ“Š Supported Content Types:**
624
+ - 🏒 Facebook Pages (optimal results)
625
+ - πŸ“˜ Public Groups (enhanced analysis)
626
+ - πŸ‘€ Public Profiles (comprehensive data)
627
+ - πŸŽ‰ Events (detailed extraction)
628
+ - πŸ“ Posts (advanced content analysis)
629
+ - πŸ›’ Marketplace (commerce detection)
630
 
631
+ **πŸ”§ Professional Tools:**
632
+ - Multi-level analysis depth
633
+ - Content block limiting
634
+ - Real-time metrics
635
+ - Export capabilities
636
  """)
637
 
638
  with col2:
639
+ st.markdown("### πŸ’¬ Professional AI Chat")
640
 
641
  if st.session_state.chatbot and st.session_state.extracted_data:
642
  # Display chat history
643
  for i, chat in enumerate(st.session_state.chat_history):
644
  if chat["role"] == "user":
645
+ st.markdown(f'<div class="chat-message user-message"><strong>πŸ‘€ Professional Analyst:</strong> {chat["content"]}</div>',
646
  unsafe_allow_html=True)
647
  elif chat["role"] == "assistant":
648
+ st.markdown(f'<div class="chat-message assistant-message"><strong>πŸ€– AI Assistant:</strong> {chat["content"]}</div>',
649
  unsafe_allow_html=True)
650
 
651
  # Chat input
652
+ user_input = st.chat_input("Ask professional questions about the Facebook data...")
653
 
654
  if user_input:
655
  # Add user message
656
  st.session_state.chat_history.append({"role": "user", "content": user_input})
657
 
658
  # Generate AI response
659
+ with st.spinner("πŸ€” Performing professional analysis..."):
660
  try:
661
  response = st.session_state.chatbot.invoke({"question": user_input})
662
+ answer = response.get("answer", "I couldn't generate a professional response based on the available data.")
663
 
664
  st.session_state.chat_history.append({"role": "assistant", "content": answer})
665
  st.rerun()
666
  except Exception as e:
667
+ error_msg = f"❌ Professional analysis error: {str(e)}"
668
  st.session_state.chat_history.append({"role": "assistant", "content": error_msg})
669
  st.rerun()
670
 
671
+ # Professional suggested questions
672
  if not st.session_state.chat_history:
673
+ st.markdown("#### πŸ’‘ Professional Questions")
674
  suggestions = [
675
+ "Provide a comprehensive analysis of this page",
676
+ "What are the key content patterns and themes?",
677
+ "Analyze the engagement potential of this content",
678
+ "Extract business intelligence from this data",
679
+ "What marketing insights can be derived?",
680
+ "Perform competitor analysis based on this content"
681
  ]
682
 
683
  for suggestion in suggestions:
684
+ if st.button(suggestion, key=f"pro_suggest_{suggestion}", use_container_width=True):
685
+ st.info(f"πŸ’‘ Professional question: '{suggestion}'")
686
 
687
  elif st.session_state.extracted_data:
688
+ st.info("πŸ’¬ Start a professional conversation with the AI assistant")
689
  else:
690
+ st.info("πŸ” Perform data extraction to enable professional AI analysis")
691
 
692
+ # Professional features section
693
  st.markdown("---")
694
+ st.markdown("### πŸš€ Professional Features")
695
 
696
+ feature_cols = st.columns(4)
697
 
698
  with feature_cols[0]:
699
  st.markdown("""
700
  <div class="feature-card">
701
+ <h4>πŸ” Advanced Extraction</h4>
702
+ <p>Multi-layer content analysis with intelligent pattern recognition</p>
703
  </div>
704
  """, unsafe_allow_html=True)
705
 
706
  with feature_cols[1]:
707
  st.markdown("""
708
  <div class="feature-card">
709
+ <h4>πŸ€– AI Intelligence</h4>
710
+ <p>Professional-grade analysis with contextual understanding</p>
711
  </div>
712
  """, unsafe_allow_html=True)
713
 
714
  with feature_cols[2]:
715
  st.markdown("""
716
  <div class="feature-card">
717
+ <h4>πŸ“Š Analytics Dashboard</h4>
718
+ <p>Comprehensive metrics and real-time data visualization</p>
719
+ </div>
720
+ """, unsafe_allow_html=True)
721
+
722
+ with feature_cols[3]:
723
+ st.markdown("""
724
+ <div class="feature-card">
725
+ <h4>πŸ’¬ Professional Chat</h4>
726
+ <p>Advanced conversational AI for business insights</p>
727
  </div>
728
  """, unsafe_allow_html=True)
729