Spaces:

datbkpro
/

voicebot

Running

App Files Files Community

datbkpro commited on 22 days ago

Commit

1baee4c

verified ·

1 Parent(s): a42e45a

Update services/audio_service.py

Browse files

Files changed (1) hide show

services/audio_service.py +92 -50

services/audio_service.py CHANGED Viewed

@@ -11,83 +11,125 @@ class AudioService:
     def __init__(self, groq_client: Groq, rag_system: EnhancedRAGSystem, tts_service: EnhancedTTSService):
         self.groq_client = groq_client
-        self.rag_system = rag_system  # Sử dụng parameter thay vì tạo mới
-        self.tts_service = tts_service  # Sử dụng parameter thay vì tạo mới
         self.multilingual_manager = MultilingualManager()
     def transcribe_audio(self, audio: tuple) -> tuple:
         """Chuyển đổi giọng nói thành văn bản sử dụng mô hình Whisper."""
         if not audio:
-            return "Error: Audio input is empty.", "Error: Không có dữ liệu âm thanh", None, "unknown"
-        # Xử lý audio input từ Gradio
-        if isinstance(audio, tuple):
-            sr, y = audio
-        else:
-            return "Error: Invalid audio format", "Error: Định dạng âm thanh không hợp lệ", None, "unknown"
-        if y.ndim > 1:
-            y = np.mean(y, axis=1)  # Chuyển đổi sang mono nếu cần
-        y = y.astype(np.float32)
-        y /= np.max(np.abs(y))  # Chuẩn hóa âm thanh
-        buffer = io.BytesIO()
-        sf.write(buffer, y, sr, format='WAV')
-        buffer.seek(0)
-        try:
-            completion = self.groq_client.audio.transcriptions.create(
-                model=settings.WHISPER_MODEL,
-                file=buffer,
-                response_format="text"
-            )
-            transcription = completion.text
-        except Exception as e:
-            transcription = f"Error trong quá trình chuyển đổi giọng nói thành văn bản: {e}"
-        language = self.multilingual_manager.detect_language(transcription)
-        response = self._generate_response_with_rag(transcription, language)
-        tts_audio = None
-        if response and not response.startswith("Error"):
-            tts_bytes = self.tts_service.text_to_speech(response, language)
-            if tts_bytes:
-                tts_audio_path = self.tts_service.save_tts_audio(tts_bytes)
-                tts_audio = tts_audio_path
-        return transcription, response, tts_audio, language
     def _generate_response_with_rag(self, query: str, language: str) -> str:
         """Tạo phản hồi sử dụng hệ thống RAG dựa trên truy vấn và ngôn ngữ."""
-        if not query or query.startswith("Error"):
-            return "Error: Truy vấn không hợp lệ để tạo phản hồi."
         try:
             rag_results = self.rag_system.semantic_search(query, top_k=3)
             context_text = ""
             if rag_results:
                 for result in rag_results:
-                    context_text += result.document + "\n"
             llm_model = self.multilingual_manager.get_llm_model(language)
             if language == "vi":
-                system_prompt = """Bạn là trợ lý AI thông minh chuyên về tiếng Việt. Hãy sử dụng thông tin từ cơ sở kiến thức được cung cấp để trả lời câu hỏi một cách chính xác và hữu ích bằng tiếng Việt.
-                    Thông tin tham khảo từ cơ sở kiến thức:
-                    {context}
-                    Nếu thông tin từ cơ sở kiến thức không đủ để trả lời, hãy dựa vào kiến thức chung của bạn. Luôn trả lời bằng tiếng Việt tự nhiên và dễ hiểu."""
             else:
-                system_prompt = """You are a smart AI assistant. Please use the information from the provided knowledge base to answer questions accurately and helpfully in the same language as the user's question.
-                    Reference information from knowledge base:
-                    {context}
-                    If the information from the knowledge base is not sufficient to answer, rely on your general knowledge. Always respond in natural and easy-to-understand language matching the user's language."""
             messages = [
-                {"role": "system", "content": system_prompt.format(context=context_text)},
-                {"role": "user", "content": query}
             ]
             completion = self.groq_client.chat.completions.create(
                 model=llm_model,
                 messages=messages,
@@ -98,4 +140,4 @@ class AudioService:
             return completion.choices[0].message.content.strip()
         except Exception as e:
-            return f"Error trong quá trình tạo phản hồi với RAG: {e}"

     def __init__(self, groq_client: Groq, rag_system: EnhancedRAGSystem, tts_service: EnhancedTTSService):
         self.groq_client = groq_client
+        self.rag_system = rag_system
+        self.tts_service = tts_service
         self.multilingual_manager = MultilingualManager()
     def transcribe_audio(self, audio: tuple) -> tuple:
         """Chuyển đổi giọng nói thành văn bản sử dụng mô hình Whisper."""
         if not audio:
+            return "❌ Lỗi: Không có dữ liệu âm thanh", "❌ Vui lòng cung cấp file âm thanh", None, "unknown"
+        try:
+            # Xử lý audio input từ Gradio
+            if isinstance(audio, tuple):
+                sr, y = audio
+            else:
+                return "❌ Lỗi: Định dạng âm thanh không hợp lệ", "❌ Định dạng âm thanh không được hỗ trợ", None, "unknown"
+            # Kiểm tra và xử lý dữ liệu audio
+            if y.size == 0:
+                return "❌ Lỗi: Dữ liệu âm thanh trống", "❌ File âm thanh không có dữ liệu", None, "unknown"
+            # Chuẩn hóa dữ liệu audio
+            if y.ndim > 1:
+                y = np.mean(y, axis=1)  # Chuyển đổi sang mono
+            y = y.astype(np.float32)
+            # Normalize âm thanh
+            if np.max(np.abs(y)) > 0:
+                y /= np.max(np.abs(y))
+            else:
+                return "❌ Lỗi: Âm thanh quá yếu", "❌ Không thể phát hiện âm thanh", None, "unknown"
+            # Tạo buffer với định dạng WAV được hỗ trợ
+            buffer = io.BytesIO()
+            sf.write(buffer, y, sr, format='WAV', subtype='PCM_16')
+            buffer.seek(0)
+            # Gọi Groq API với định dạng file đúng
+            try:
+                transcription = self.groq_client.audio.transcriptions.create(
+                    model=settings.WHISPER_MODEL,
+                    file=("audio.wav", buffer.read(), "audio/wav"),  # Đảm bảo đúng định dạng
+                    response_format="text"
+                )
+                transcription_text = transcription.text
+            except Exception as e:
+                error_msg = f"❌ Lỗi API chuyển đổi giọng nói: {str(e)}"
+                return error_msg, "❌ Không thể chuyển đổi giọng nói thành văn bản", None, "unknown"
+            # Kiểm tra transcription có hợp lệ không
+            if not transcription_text or len(transcription_text.strip()) == 0:
+                return "❌ Không thể nhận dạng giọng nói", "❌ Vui lòng thử lại với âm thanh rõ hơn", None, "unknown"
+            # Phát hiện ngôn ngữ và tạo response
+            language = self.multilingual_manager.detect_language(transcription_text)
+            response = self._generate_response_with_rag(transcription_text, language)
+            # Tạo TTS nếu response hợp lệ
+            tts_audio = None
+            if response and not response.startswith("❌") and not response.startswith("Error"):
+                try:
+                    tts_bytes = self.tts_service.text_to_speech(response, language)
+                    if tts_bytes:
+                        tts_audio_path = self.tts_service.save_tts_audio(tts_bytes)
+                        tts_audio = tts_audio_path
+                except Exception as e:
+                    print(f"⚠️ Lỗi TTS: {e}")
+                    # Vẫn trả về text response nếu TTS fail
+            return transcription_text, response, tts_audio, language
+        except Exception as e:
+            error_msg = f"❌ Lỗi hệ thống xử lý âm thanh: {str(e)}"
+            return error_msg, "❌ Có lỗi xảy ra trong quá trình xử lý", None, "unknown"
     def _generate_response_with_rag(self, query: str, language: str) -> str:
         """Tạo phản hồi sử dụng hệ thống RAG dựa trên truy vấn và ngôn ngữ."""
+        if not query or query.strip() == "" or query.startswith("❌"):
+            return "❌ Truy vấn không hợp lệ để tạo phản hồi."
         try:
+            # Tìm kiếm trong RAG system
             rag_results = self.rag_system.semantic_search(query, top_k=3)
             context_text = ""
             if rag_results:
                 for result in rag_results:
+                    context_text += f"- {result.text}\n"
+            # Chọn model LLM phù hợp với ngôn ngữ
             llm_model = self.multilingual_manager.get_llm_model(language)
+            # Tạo system prompt phù hợp với ngôn ngữ
             if language == "vi":
+                system_prompt = """Bạn là trợ lý AI thông minh chuyên về tiếng Việt. Hãy trả lời câu hỏi một cách tự nhiên và hữu ích.
+                Thông tin tham khảo:
+                {context}
+                Nếu có thông tin tham khảo, hãy sử dụng nó. Nếu không, dựa vào kiến thức chung của bạn."""
             else:
+                system_prompt = """You are a smart AI assistant. Please answer questions naturally and helpfully.
+                Reference information:
+                {context}
+                If reference information is available, use it. Otherwise, rely on your general knowledge."""
             messages = [
+                {
+                    "role": "system",
+                    "content": system_prompt.format(context=context_text) if context_text else system_prompt.format(context="Không có thông tin tham khảo cụ thể.")
+                },
+                {
+                    "role": "user",
+                    "content": query
+                }
             ]
+            # Gọi Groq API
             completion = self.groq_client.chat.completions.create(
                 model=llm_model,
                 messages=messages,
             return completion.choices[0].message.content.strip()
         except Exception as e:
+            return f"❌ Lỗi tạo phản hồi: {str(e)}"