File size: 1,224 Bytes
9536c67
 
 
 
 
 
 
255e074
 
 
 
9536c67
 
 
 
 
 
 
 
 
 
255e074
 
b3c07b5
 
 
 
255e074
b3c07b5
 
 
255e074
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
"""
This module provides a client for interacting with the Crawl4AI library.

It encapsulates the logic for scraping a website using Crawl4AI and extracting
its content as a markdown string, handling potential errors during the process.
"""

from crawl4ai import AsyncWebCrawler


async def scrape_and_get_markdown_with_crawl4ai(url: str) -> str:
    """
    Asynchronously scrapes a given URL using Crawl4AI and returns its content as markdown.

    Args:
        url (str): The URL of the website to scrape.

    Returns:
        str: The scraped content in markdown format. If scraping fails or returns
             no content, a formatted error message string is returned.
    """
    try:
        async with AsyncWebCrawler() as crawler:
            result = await crawler.arun(url=url) 

            markdown_content = "❌ <span style='color:red;'>Crawl4AI completed but returned no content. The page might be empty or inaccessible.</span>"

            if result and result.markdown:
                markdown_content = result.markdown
            
            return markdown_content
    except Exception as e:
        return f"❌ <span style='color:red;'>An error occurred while scraping with Crawl4AI: {e}</span>"