from langchain_community.document_loaders import FireCrawlLoader
from langchain_core.documents import Document
from config import FIRE_CRAWL_API_KEY
def scrape_with_firecrawl(url: str) -> list[Document]:
loader = FireCrawlLoader(url=url,
api_key=FIRE_CRAWL_API_KEY,
mode='scrape')
pages = []
for page in loader.lazy_load(): # type: ignore
pages.append(page)
return pages
def get_markdown_from_documents(docs: list[Document]) -> str:
markdown_content = ""
for i, doc in enumerate(docs):
markdown_content += f"### Page {i+1}\n"
markdown_content += f"{doc.page_content}\n\n--------------\n\n"
return markdown_content
def scrape_and_get_markdown_with_firecrawl(url: str) -> str:
try:
docs = scrape_with_firecrawl(url)
if not docs:
return "❌ FireCrawl completed but returned no content. The page might be empty or inaccessible."
markdown = get_markdown_from_documents(docs)
return markdown
except Exception as e:
return f"❌ An error occurred while scraping with FireCrawl: {e}"