Spaces:
Sleeping
Sleeping
| from smolagents import tool | |
| from youtube_transcript_api import YouTubeTranscriptApi | |
| from bs4 import BeautifulSoup | |
| from tools.fetch import fetch_webpage | |
| # a function to get youtube transcript from video id | |
| def get_youtube_transcript(video_id: str) -> str: | |
| """ | |
| Fetches the transcript of a YouTube video given its video ID. | |
| Args: | |
| video_id (str): The ID of the YouTube video. Pass in the video ID, NOT the video URL. For a video with the URL https://www.youtube.com/watch?v=12345 the ID is 12345. | |
| Returns: | |
| str: The transcript of the YouTube video. as a single string with each line separated by a newline character. | |
| """ | |
| # Initialize the YouTubeTranscriptApi | |
| ytt_api = YouTubeTranscriptApi() | |
| fetched_transcript = ytt_api.fetch(video_id) | |
| raw_data = fetched_transcript.to_raw_data() | |
| # raw data is in the form of [{ 'text': 'Hey there', 'start': 0.0, 'duration': 1.54 }, { 'text': 'how are you',, 'start': 1.54, 'duration': 4.16 }, ... ] we will return ony the text element as lines | |
| transcript = "\n".join([item['text'] for item in raw_data]) | |
| return transcript | |
| # a function to get video title and description from video url | |
| def get_youtube_title_description(video_url: str) -> str: | |
| """ | |
| Fetches the title and description of a YouTube video given its video ID. | |
| Args: | |
| video_url (str): The url of the YouTube video. | |
| Returns: | |
| str: The title and description of the YouTube video. | |
| """ | |
| # Initialize the YouTube object | |
| soup = BeautifulSoup(fetch_webpage(video_url, convert_to_markdown=False), "html.parser") | |
| # Extract the title by looking at the meta tag with name="title" and getting the content | |
| metatitle = soup.find("meta", {"name": "title"}) | |
| if metatitle is not None: | |
| title = metatitle["content"] | |
| else: | |
| title = "No title found" | |
| # same for description | |
| metadescription = soup.find("meta", {"name": "description"}) | |
| if metadescription is not None: | |
| description = metadescription["content"] | |
| else: | |
| description = "No description found" | |
| return f"Title: {title}\nDescription: {description}" | |
| if __name__ == "__main__": | |
| from dotenv import load_dotenv | |
| load_dotenv | |
| # Test the function | |
| video_id = "1htKBjuUWec" # Replace with your YouTube video ID | |
| video_url = "https://www.youtube.com/watch?v=" + video_id | |
| # Get the title and description | |
| try: | |
| title_description = get_youtube_title_description(video_url) | |
| print(title_description) | |
| except Exception as e: | |
| print(f"Error fetching title and description: {e}") | |
| try: | |
| transcript = get_youtube_transcript(video_id) | |
| except Exception as e: | |
| print(f"Error fetching transcript: {e}") | |
| print(transcript) |