File size: 3,390 Bytes
1b5b7a4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dbf2148
 
 
 
 
759c130
 
 
 
2298900
673edb9
759c130
1b5b7a4
673edb9
759c130
1b5b7a4
759c130
 
1b5b7a4
8289f9b
673edb9
759c130
7775d9b
 
00dbcb7
f16065f
 
55d88b0
759c130
 
 
 
 
 
 
 
1b5b7a4
 
6323ac8
dbf2148
f16065f
759c130
8289f9b
f16065f
 
 
8289f9b
f16065f
2302a4d
f16065f
 
 
 
 
759c130
dbf2148
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
# import os
# from dotenv import load_dotenv

# load_dotenv()

# class Settings:
#     GROQ_API_KEY = os.getenv("GROQ_API_KEY")
    
#     # Multilingual Model Settings
#     VIETNAMESE_EMBEDDING_MODEL = 'keepitreal/vietnamese-sbert'
#     VIETNAMESE_LLM_MODEL = "llama-3.1-8b-instant"
    
#     MULTILINGUAL_EMBEDDING_MODEL = 'sentence-transformers/all-MiniLM-L6-v2'  # Sử dụng model nhẹ hơn
#     MULTILINGUAL_LLM_MODEL = "llama-3.1-8b-instant"
    
#     # Fallback models in case primary models fail
#     FALLBACK_MULTILINGUAL_EMBEDDING_MODEL = 'sentence-transformers/all-MiniLM-L6-v2'
    
#     # Default models (fallback)
#     DEFAULT_EMBEDDING_MODEL = 'sentence-transformers/all-MiniLM-L6-v2'
#     DEFAULT_LLM_MODEL = "llama-3.1-8b-instant"
    
#     # Whisper Settings - CẬP NHẬT
#     WHISPER_MODEL = "whisper-large-v3"
    
#     # TTS Settings
#     MAX_CHUNK_LENGTH = 200
#     SUPPORTED_LANGUAGES = {
#         'vi': 'vi', 'en': 'en', 'fr': 'fr', 'es': 'es', 
#         'de': 'de', 'ja': 'ja', 'ko': 'ko', 'zh': 'zh'
#     }
    
#     # RAG Settings - CẬP NHẬT
#     EMBEDDING_DIMENSION = 768  # For all-MiniLM-L6-v2
#     TOP_K_RESULTS = 5
    
#     # Audio Processing Settings - CẬP NHẬT QUAN TRỌNG
#     SAMPLE_RATE = 16000
#     AUDIO_CHUNK_SIZE = 1024
#     AUDIO_SILENCE_THRESHOLD = 0.005  # Ngưỡng âm lượng thấp hơn
#     MIN_AUDIO_DURATION = 0.5  # Giây - âm thanh tối thiểu
#     MAX_AUDIO_DURATION = 10.0  # Giây - âm thanh tối đa
    
#     # SpeechBrain VAD Settings - CẬP NHẬT
#     VAD_MODEL = "snakers4/silero-vad"
#     VAD_THRESHOLD = 0.2  # Giảm threshold để nhạy hơn
#     VAD_MIN_SILENCE_DURATION = 3  # Tăng thời gian silence
#     VAD_SPEECH_PAD_DURATION = 0.3   # Tăng padding

# settings = Settings()
import os
from dotenv import load_dotenv

load_dotenv()

class Settings:
    GROQ_API_KEY = os.getenv("GROQ_API_KEY")
    
    # Multilingual Model Settings
    VIETNAMESE_EMBEDDING_MODEL = 'keepitreal/vietnamese-sbert'
    VIETNAMESE_LLM_MODEL = "llama-3.1-8b-instant"
    
    MULTILINGUAL_EMBEDDING_MODEL = 'sentence-transformers/all-MiniLM-L6-v2'
    MULTILINGUAL_LLM_MODEL = "llama-3.1-8b-instant"
    
    # Fallback models
    FALLBACK_MULTILINGUAL_EMBEDDING_MODEL = 'sentence-transformers/all-MiniLM-L6-v2'
    
    # Default models
    DEFAULT_EMBEDDING_MODEL = 'sentence-transformers/all-MiniLM-L6-v2'
    DEFAULT_LLM_MODEL = "llama-3.1-8b-instant"
    
    # OCR Settings - SỬA LỖI NGÔN NGỮ
    OCR_MODEL = "kha-white/manga-ocr-base"
    EASYOCR_LANGUAGES = ['vi', 'en']  # SỬA 'zh' thành 'ch_sim'
    
    # Whisper Settings
    WHISPER_MODEL = "whisper-large-v3"
    
    # TTS Settings
    MAX_CHUNK_LENGTH = 200
    SUPPORTED_LANGUAGES = {
        'vi': 'vi', 'en': 'en', 'fr': 'fr', 'es': 'es', 
        'de': 'de', 'ja': 'ja', 'ko': 'ko', 'zh': 'zh'
    }
    
    # RAG Settings
    EMBEDDING_DIMENSION = 768
    TOP_K_RESULTS = 5
    
    # Audio Processing Settings
    SAMPLE_RATE = 16000
    AUDIO_CHUNK_SIZE = 1024
    AUDIO_SILENCE_THRESHOLD = 0.003
    MIN_AUDIO_DURATION = 0.8
    MAX_AUDIO_DURATION = 15.0
    
    # VAD Settings
    VAD_MODEL = "snakers4/silero-vad"
    VAD_THRESHOLD = 0.3
    VAD_MIN_SPEECH_DURATION = 1.0
    VAD_MIN_SILENCE_DURATION = 2.0
    VAD_SPEECH_PAD_DURATION = 0.5
    VAD_PRE_SPEECH_BUFFER = 0.3

settings = Settings()