Spaces:

datbkpro
/

voicebot

Running

File size: 8,036 Bytes

3e3a497
dbf2148
 
 
 
 
 
 
a42e45a
dbf2148
 
 
 
1baee4c
 
a42e45a
dbf2148
a42e45a
dbf2148
 
1baee4c
a42e45a
1baee4c
 
 
 
 
 
dbf2148
1baee4c
 
 
dbf2148
1baee4c
 
 
 
 
 
 
 
 
 
dbf2148
1baee4c
 
 
 
dbf2148
3e3a497
1baee4c
3e3a497
 
 
1baee4c
3e3a497
1baee4c
3e3a497
1baee4c
 
3e3a497
 
 
 
 
 
 
 
 
 
1baee4c
 
3e3a497
1baee4c
dbf2148
1baee4c
 
 
 
3e3a497
 
1baee4c
 
 
 
 
 
 
 
3e3a497
1baee4c
 
 
 
3e3a497
1baee4c
 
 
 
 
 
 
 
3e3a497
1baee4c
a42e45a
dbf2148
 
1baee4c
 
 
dbf2148
1baee4c
dbf2148
 
1baee4c
dbf2148
 
1baee4c
a42e45a
1baee4c
dbf2148
a42e45a
1baee4c
dbf2148
1baee4c
 
 
 
 
 
dbf2148
1baee4c
 
 
 
 
 
a42e45a
 
1baee4c
 
 
 
 
 
 
 
dbf2148
a42e45a
3e3a497
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a42e45a
dbf2148
1baee4c


import numpy as np
import soundfile as sf
import io
from groq import Groq
from config.settings import settings
from core.rag_system import EnhancedRAGSystem
from core.tts_service import EnhancedTTSService
from core.multilingual_manager import MultilingualManager

class AudioService:
    def __init__(self, groq_client: Groq, rag_system: EnhancedRAGSystem, tts_service: EnhancedTTSService):
        self.groq_client = groq_client
        self.rag_system = rag_system
        self.tts_service = tts_service
        self.multilingual_manager = MultilingualManager()

    def transcribe_audio(self, audio: tuple) -> tuple:
        """Chuyển đổi giọng nói thành văn bản sử dụng mô hình Whisper."""
        if not audio:
            return "❌ Lỗi: Không có dữ liệu âm thanh", "❌ Vui lòng cung cấp file âm thanh", None, "unknown"
        
        try:
            # Xử lý audio input từ Gradio
            if isinstance(audio, tuple):
                sr, y = audio
            else:
                return "❌ Lỗi: Định dạng âm thanh không hợp lệ", "❌ Định dạng âm thanh không được hỗ trợ", None, "unknown"

            # Kiểm tra và xử lý dữ liệu audio
            if y.size == 0:
                return "❌ Lỗi: Dữ liệu âm thanh trống", "❌ File âm thanh không có dữ liệu", None, "unknown"

            # Chuẩn hóa dữ liệu audio
            if y.ndim > 1:
                y = np.mean(y, axis=1)  # Chuyển đổi sang mono
            y = y.astype(np.float32)
            
            # Normalize âm thanh
            if np.max(np.abs(y)) > 0:
                y /= np.max(np.abs(y))
            else:
                return "❌ Lỗi: Âm thanh quá yếu", "❌ Không thể phát hiện âm thanh", None, "unknown"

            # Tạo buffer với định dạng WAV được hỗ trợ
            buffer = io.BytesIO()
            sf.write(buffer, y, sr, format='WAV', subtype='PCM_16')
            buffer.seek(0)

            # Gọi Groq API với xử lý lỗi tốt hơn
            try:
                print(f"🔄 Đang gửi yêu cầu chuyển đổi giọng nói đến Groq API...")
                
                # SỬA LỖI Ở ĐÂY: Sử dụng đúng parameter names
                transcription = self.groq_client.audio.transcriptions.create(
                    file=("audio.wav", buffer.read(), "audio/wav"),
                    model=settings.WHISPER_MODEL,
                    language="vi",  # Thêm language parameter
                    response_format="text"
                )
                
                # SỬA LỖI Ở ĐÂY: Kiểm tra response structure
                if hasattr(transcription, 'text'):
                    transcription_text = transcription.text
                elif isinstance(transcription, str):
                    transcription_text = transcription
                else:
                    print(f"⚠️ Response structure không mong đợi: {type(transcription)}")
                    transcription_text = str(transcription)
                    
            except Exception as e:
                error_msg = f"❌ Lỗi API chuyển đổi giọng nói: {str(e)}"
                print(f"❌ Chi tiết lỗi: {e}")
                return error_msg, "❌ Không thể chuyển đổi giọng nói thành văn bản", None, "unknown"

            # Kiểm tra transcription có hợp lệ không
            if not transcription_text or len(transcription_text.strip()) == 0:
                return "❌ Không thể nhận dạng giọng nói", "❌ Vui lòng thử lại với âm thanh rõ hơn", None, "unknown"

            print(f"✅ Đã chuyển đổi giọng nói: {transcription_text}")

            # Phát hiện ngôn ngữ và tạo response
            language = self.multilingual_manager.detect_language(transcription_text)
            response = self._generate_response_with_rag(transcription_text, language)

            # Tạo TTS nếu response hợp lệ
            tts_audio = None
            if response and not response.startswith("❌") and not response.startswith("Error"):
                try:
                    print(f"🔊 Đang tạo TTS cho response {len(response)} ký tự...")
                    tts_bytes = self.tts_service.text_to_speech(response, language)
                    if tts_bytes:
                        tts_audio_path = self.tts_service.save_tts_audio(tts_bytes)
                        tts_audio = tts_audio_path
                        print(f"✅ Đã tạo TTS thành công")
                except Exception as e:
                    print(f"⚠️ Lỗi TTS: {e}")
                    # Vẫn trả về text response nếu TTS fail

            return transcription_text, response, tts_audio, language

        except Exception as e:
            error_msg = f"❌ Lỗi hệ thống xử lý âm thanh: {str(e)}"
            print(f"❌ Lỗi tổng hợp: {traceback.format_exc()}")
            return error_msg, "❌ Có lỗi xảy ra trong quá trình xử lý", None, "unknown"

    def _generate_response_with_rag(self, query: str, language: str) -> str:
        """Tạo phản hồi sử dụng hệ thống RAG dựa trên truy vấn và ngôn ngữ."""
        if not query or query.strip() == "" or query.startswith("❌"):
            return "❌ Truy vấn không hợp lệ để tạo phản hồi."
        
        try:
            # Tìm kiếm trong RAG system
            rag_results = self.rag_system.semantic_search(query, top_k=3)
            context_text = ""
            
            if rag_results:
                for result in rag_results:
                    context_text += f"- {result.text}\n"
            
            # Chọn model LLM phù hợp với ngôn ngữ
            llm_model = self.multilingual_manager.get_llm_model(language)
            
            # Tạo system prompt phù hợp với ngôn ngữ
            if language == "vi":
                system_prompt = """Bạn là trợ lý AI thông minh chuyên về tiếng Việt. Hãy trả lời câu hỏi một cách tự nhiên và hữu ích.
                
                Thông tin tham khảo:
                {context}
                
                Nếu có thông tin tham khảo, hãy sử dụng nó. Nếu không, dựa vào kiến thức chung của bạn."""
            else:
                system_prompt = """You are a smart AI assistant. Please answer questions naturally and helpfully.
                
                Reference information:
                {context}
                
                If reference information is available, use it. Otherwise, rely on your general knowledge."""
            
            messages = [
                {
                    "role": "system", 
                    "content": system_prompt.format(context=context_text) if context_text else system_prompt.format(context="Không có thông tin tham khảo cụ thể.")
                },
                {
                    "role": "user", 
                    "content": query
                }
            ]
            
            # Gọi Groq API với xử lý lỗi
            try:
                completion = self.groq_client.chat.completions.create(
                    model=llm_model,
                    messages=messages,
                    max_tokens=512,
                    temperature=0.7,
                )
                
                # SỬA LỖI: Kiểm tra response structure
                if hasattr(completion.choices[0].message, 'content'):
                    return completion.choices[0].message.content.strip()
                else:
                    return "❌ Không thể tạo phản hồi từ AI"
                    
            except Exception as e:
                return f"❌ Lỗi khi gọi Groq API: {str(e)}"
            
        except Exception as e:
            return f"❌ Lỗi tạo phản hồi: {str(e)}"