Spaces:
Runtime error
Runtime error
| class LangChainChunker: | |
| def __init__(self, text): | |
| self.text = text | |
| def chunker(self, size=1000): | |
| from langchain.text_splitter import CharacterTextSplitter | |
| # attach the duration of the video to the chunk | |
| # [[chunk, duration]] | |
| text_splitter = CharacterTextSplitter( | |
| separator=" ", | |
| chunk_size=size, | |
| chunk_overlap=0.9, | |
| ) | |
| return text_splitter.split_text(self.text) | |
| def __sizeof__(self) -> int: | |
| count = 0 | |
| for _ in self.text: | |
| count += 1 | |
| return count | |
| def getSubsText(video_id="", getGenerated=False): | |
| from youtube_transcript_api import YouTubeTranscriptApi as ytapi | |
| from youtube_transcript_api.formatters import TextFormatter | |
| tList = ytapi.list_transcripts(video_id) | |
| data = "" | |
| if getGenerated: | |
| # TODO: implement getGenerated | |
| pass | |
| for t in tList: | |
| data = t.fetch() | |
| return (TextFormatter().format_transcript(data)).replace("\n", " ") | |