Spaces:

datbkpro
/

voicebot

Running

App Files Files Community

voicebot / services /audio_service.py

datbkpro

Update services/audio_service.py

0370947 verified 16 days ago

raw

history blame contribute delete

8.04 kB


	import numpy as np
	import soundfile as sf
	import io
	from groq import Groq
	from config.settings import settings
	from core.rag_system import EnhancedRAGSystem
	from core.tts_service import EnhancedTTSService
	from core.multilingual_manager import MultilingualManager

	class AudioService:
	def __init__(self, groq_client: Groq, rag_system: EnhancedRAGSystem, tts_service: EnhancedTTSService):
	self.groq_client = groq_client
	self.rag_system = rag_system
	self.tts_service = tts_service
	self.multilingual_manager = MultilingualManager()

	def transcribe_audio(self, audio: tuple) -> tuple:
	"""Chuyển đổi giọng nói thành văn bản sử dụng mô hình Whisper."""
	if not audio:
	return "❌ Lỗi: Không có dữ liệu âm thanh", "❌ Vui lòng cung cấp file âm thanh", None, "unknown"

	try:
	# Xử lý audio input từ Gradio
	if isinstance(audio, tuple):
	sr, y = audio
	else:
	return "❌ Lỗi: Định dạng âm thanh không hợp lệ", "❌ Định dạng âm thanh không được hỗ trợ", None, "unknown"

	# Kiểm tra và xử lý dữ liệu audio
	if y.size == 0:
	return "❌ Lỗi: Dữ liệu âm thanh trống", "❌ File âm thanh không có dữ liệu", None, "unknown"

	# Chuẩn hóa dữ liệu audio
	if y.ndim > 1:
	y = np.mean(y, axis=1) # Chuyển đổi sang mono
	y = y.astype(np.float32)

	# Normalize âm thanh
	if np.max(np.abs(y)) > 0:
	y /= np.max(np.abs(y))
	else:
	return "❌ Lỗi: Âm thanh quá yếu", "❌ Không thể phát hiện âm thanh", None, "unknown"

	# Tạo buffer với định dạng WAV được hỗ trợ
	buffer = io.BytesIO()
	sf.write(buffer, y, sr, format='WAV', subtype='PCM_16')
	buffer.seek(0)

	# Gọi Groq API với xử lý lỗi tốt hơn
	try:
	print(f"🔄 Đang gửi yêu cầu chuyển đổi giọng nói đến Groq API...")

	# SỬA LỖI Ở ĐÂY: Sử dụng đúng parameter names
	transcription = self.groq_client.audio.transcriptions.create(
	file=("audio.wav", buffer.read(), "audio/wav"),
	model=settings.WHISPER_MODEL,
	language="vi", # Thêm language parameter
	response_format="text"
	)

	# SỬA LỖI Ở ĐÂY: Kiểm tra response structure
	if hasattr(transcription, 'text'):
	transcription_text = transcription.text
	elif isinstance(transcription, str):
	transcription_text = transcription
	else:
	print(f"⚠️ Response structure không mong đợi: {type(transcription)}")
	transcription_text = str(transcription)

	except Exception as e:
	error_msg = f"❌ Lỗi API chuyển đổi giọng nói: {str(e)}"
	print(f"❌ Chi tiết lỗi: {e}")
	return error_msg, "❌ Không thể chuyển đổi giọng nói thành văn bản", None, "unknown"

	# Kiểm tra transcription có hợp lệ không
	if not transcription_text or len(transcription_text.strip()) == 0:
	return "❌ Không thể nhận dạng giọng nói", "❌ Vui lòng thử lại với âm thanh rõ hơn", None, "unknown"

	print(f"✅ Đã chuyển đổi giọng nói: {transcription_text}")

	# Phát hiện ngôn ngữ và tạo response
	language = self.multilingual_manager.detect_language(transcription_text)
	response = self._generate_response_with_rag(transcription_text, language)

	# Tạo TTS nếu response hợp lệ
	tts_audio = None
	if response and not response.startswith("❌") and not response.startswith("Error"):
	try:
	print(f"🔊 Đang tạo TTS cho response {len(response)} ký tự...")
	tts_bytes = self.tts_service.text_to_speech(response, language)
	if tts_bytes:
	tts_audio_path = self.tts_service.save_tts_audio(tts_bytes)
	tts_audio = tts_audio_path
	print(f"✅ Đã tạo TTS thành công")
	except Exception as e:
	print(f"⚠️ Lỗi TTS: {e}")
	# Vẫn trả về text response nếu TTS fail

	return transcription_text, response, tts_audio, language

	except Exception as e:
	error_msg = f"❌ Lỗi hệ thống xử lý âm thanh: {str(e)}"
	print(f"❌ Lỗi tổng hợp: {traceback.format_exc()}")
	return error_msg, "❌ Có lỗi xảy ra trong quá trình xử lý", None, "unknown"

	def _generate_response_with_rag(self, query: str, language: str) -> str:
	"""Tạo phản hồi sử dụng hệ thống RAG dựa trên truy vấn và ngôn ngữ."""
	if not query or query.strip() == "" or query.startswith("❌"):
	return "❌ Truy vấn không hợp lệ để tạo phản hồi."

	try:
	# Tìm kiếm trong RAG system
	rag_results = self.rag_system.semantic_search(query, top_k=3)
	context_text = ""

	if rag_results:
	for result in rag_results:
	context_text += f"- {result.text}\n"

	# Chọn model LLM phù hợp với ngôn ngữ
	llm_model = self.multilingual_manager.get_llm_model(language)

	# Tạo system prompt phù hợp với ngôn ngữ
	if language == "vi":
	system_prompt = """Bạn là trợ lý AI thông minh chuyên về tiếng Việt. Hãy trả lời câu hỏi một cách tự nhiên và hữu ích.

	Thông tin tham khảo:
	{context}

	Nếu có thông tin tham khảo, hãy sử dụng nó. Nếu không, dựa vào kiến thức chung của bạn."""
	else:
	system_prompt = """You are a smart AI assistant. Please answer questions naturally and helpfully.

	Reference information:
	{context}

	If reference information is available, use it. Otherwise, rely on your general knowledge."""

	messages = [
	{
	"role": "system",
	"content": system_prompt.format(context=context_text) if context_text else system_prompt.format(context="Không có thông tin tham khảo cụ thể.")
	},
	{
	"role": "user",
	"content": query
	}
	]

	# Gọi Groq API với xử lý lỗi
	try:
	completion = self.groq_client.chat.completions.create(
	model=llm_model,
	messages=messages,
	max_tokens=512,
	temperature=0.7,
	)

	# SỬA LỖI: Kiểm tra response structure
	if hasattr(completion.choices[0].message, 'content'):
	return completion.choices[0].message.content.strip()
	else:
	return "❌ Không thể tạo phản hồi từ AI"

	except Exception as e:
	return f"❌ Lỗi khi gọi Groq API: {str(e)}"

	except Exception as e:
	return f"❌ Lỗi tạo phản hồi: {str(e)}"