Spaces:

ganesh3
/

rag-youtube-assistant

Sleeping

App Files Files Community

rag-youtube-assistant / app /utils.py

ganesh3

Update app/utils.py

2085a8b verified about 1 year ago

raw

history blame contribute delete

2.85 kB

	import streamlit as st
	from transcript_extractor import get_transcript
	import logging
	import sys

	# Configure logging for stdout only
	logging.basicConfig(
	level=logging.INFO,
	format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
	stream=sys.stdout
	)
	logger = logging.getLogger(__name__)

	def process_single_video(db_handler, data_processor, video_id, embedding_model):
	"""Process a single video for indexing"""
	try:
	# Check for existing index
	existing_index = db_handler.get_elasticsearch_index_by_youtube_id(video_id)
	if existing_index:
	logger.info(f"Video {video_id} already processed. Using existing index.")
	return existing_index

	# Get transcript data
	transcript_data = get_transcript(video_id)
	if not transcript_data:
	logger.error(f"Failed to retrieve transcript for video {video_id}")
	return None

	# Process transcript
	processed_data = data_processor.process_transcript(video_id, transcript_data)
	if not processed_data:
	logger.error(f"Failed to process transcript for video {video_id}")
	return None

	# Prepare video data
	video_data = {
	'video_id': video_id,
	'title': transcript_data['metadata'].get('title', 'Unknown Title'),
	'author': transcript_data['metadata'].get('author', 'Unknown Author'),
	'upload_date': transcript_data['metadata'].get('upload_date', 'Unknown Date'),
	'view_count': int(transcript_data['metadata'].get('view_count', 0)),
	'like_count': int(transcript_data['metadata'].get('like_count', 0)),
	'comment_count': int(transcript_data['metadata'].get('comment_count', 0)),
	'video_duration': transcript_data['metadata'].get('duration', 'Unknown Duration'),
	'transcript_content': processed_data['content']
	}

	# Save to database
	db_handler.add_video(video_data)

	# Build index
	index_name = f"video_{video_id}_{embedding_model}".lower()
	index_name = data_processor.build_index(index_name)

	if index_name:
	# Save index information
	embedding_model_id = db_handler.add_embedding_model(embedding_model, "Description of the model")
	video_record = db_handler.get_video_by_youtube_id(video_id)
	if video_record:
	db_handler.add_elasticsearch_index(video_record[0], index_name, embedding_model_id)
	logger.info(f"Successfully processed video: {video_data['title']}")
	return index_name

	logger.error(f"Failed to process video {video_id}")
	return None

	except Exception as e:
	logger.error(f"Error processing video {video_id}: {str(e)}")
	return None