chinmayjha's picture
Deploy complete Second Brain AI Assistant with custom UI
b27eb78
raw
history blame
933 Bytes
from langchain_text_splitters import RecursiveCharacterTextSplitter
from loguru import logger
def get_splitter(chunk_size: int) -> RecursiveCharacterTextSplitter:
"""Returns a token-based text splitter with overlap.
Args:
chunk_size: Number of tokens for each text chunk.
summarization_type: Type of summarization to use ("contextual" or "simple").
**kwargs: Additional keyword arguments passed to the summarization agent.
Returns:
RecursiveCharacterTextSplitter: A configured text splitter instance with
summarization capabilities.
"""
chunk_overlap = int(0.15 * chunk_size)
logger.info(
f"Getting splitter with chunk size: {chunk_size} and overlap: {chunk_overlap}"
)
return RecursiveCharacterTextSplitter.from_tiktoken_encoder(
encoding_name="cl100k_base",
chunk_size=chunk_size,
chunk_overlap=chunk_overlap,
)