# config.py
"""
Centralized configuration for the Live Radio Karaoke application.
"""
import os
from performance_config import PERF_CONFIG

# ASR Model Configuration - Optimized for HF Spaces Free CPU (smaller models)
MODEL_DIR_EN = "./sherpa-onnx-streaming-zipformer-en-20M-2023-02-17"
REPO_ID_EN = "csukuangfj/sherpa-onnx-streaming-zipformer-en-20M-2023-02-17"

MODEL_DIR_FR = "./sherpa-onnx-streaming-zipformer-fr-kroko-2025-08-06"
REPO_ID_FR = "csukuangfj/sherpa-onnx-streaming-zipformer-fr-kroko-2025-08-06"

MODEL_DIR_ZH = "./sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23"
REPO_ID_ZH = "csukuangfj/sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23"

# Summarizer Model Configuration (Optimized - smallest available model)
SUMMARIZER_MODEL_DIR = "./google_gemma-3-1b-it-qat-Q4_0.gguf"
SUMMARIZER_REPO_ID = "bartowski/google_gemma-3-1b-it-qat-GGUF"
SUMMARIZER_FILENAME = "google_gemma-3-1b-it-qat-Q4_0.gguf"

# Current model in use
CURRENT_MODEL = "en"  # Default to English
MODEL_DIRS = {
    "en": MODEL_DIR_EN,
    "fr": MODEL_DIR_FR,
    "zh": MODEL_DIR_ZH
}
REPO_IDS = {
    "en": REPO_ID_EN,
    "fr": REPO_ID_FR,
    "zh": REPO_ID_ZH
}

# Radio Station Configuration
RADIO_URLS = {
    ## English
    "KEXP (Seattle, 64 kbps)": "https://kexp.streamguys1.com/kexp64.aac",
    "KEXP (Seattle, 160 kbps)": "https://kexp.streamguys1.com/kexp160.aac",
    "NPR": "https://npr-ice.streamguys1.com/live.mp3",
    "WYPR 88.1 FM (Baltimore)": "https://wtmd-ice.streamguys1.com/wypr-1-mp3",
    "WAMU 88.5 FM (Washington DC)": "https://wamu.cdnstream1.com/wamu.mp3",
    "BBC World Service": "http://stream.live.vc.bbcmedia.co.uk/bbc_world_service",
    "BBC Radio 4 (UK)": "http://stream.live.vc.bbcmedia.co.uk/bbc_radio_fourfm",
    "BBC Radio 5 Live (UK)": "http://stream.live.vc.bbcmedia.co.uk/bbc_radio_five_live_online_nonuk",
    "BBC Radio 2 (UK)": "http://stream.live.vc.bbcmedia.co.uk/bbc_radio_two",
    "KQED NPR (San Francisco)": "https://streams.kqed.org/kqedradio",
    "WNYC 93.9 FM (New York)": "http://stream.wnyc.org/wnycfm",
    "WBUR 90.9 FM (Boston)": "http://icecast.wbur.org/wbur",
    "KPCC 89.3 FM (Los Angeles)": "http://kpcclive.streamguys1.com/kpcc64.aac",
    "WHYY 90.9 FM (Philadelphia)": "http://whyy.streamguys1.com/whyy-mp3",
    "ABC News Radio (Australia)": "http://live-radio01.mediahubaustralia.com/PBW/mp3/",
    "CBC Radio One (Toronto)": "http://cbc_r1_tor.akacast.akamaistream.net/7/15/451661/v1/rc.akacast.akamaistream.net/cbc_r1_tor",
    "Voice of America (VOA News Now)": "https://voa-18.akacast.akamaistream.net/7/983/437752/v1/ibb.akacast.akamaistream.net/voa-18",
    "Al Jazeera English (Audio)": "https://live-hls-web-aje.getaj.net/AJE/01.m3u8",
    "PRI The World": "http://stream.pri.org:8000/pri.mp3",
    "Radio Paradise (USA, Mix)": "http://stream.radioparadise.com/mp3-128",
    "KCRW 89.9 FM (Santa Monica)": "http://kcrw.streamguys1.com/kcrw_192",

    ## French
    "France Inter": "https://direct.franceinter.fr/live/franceinter-midfi.mp3",
    "France Info": "https://direct.franceinfo.fr/live/franceinfo-midfi.mp3",
    "France Culture": "https://direct.franceculture.fr/live/franceculture-midfi.mp3",
    "FIP": "https://direct.fip.fr/live/fip-midfi.mp3",
    "Radio Classique": "https://radioclassique.ice.infomaniak.ch/radioclassique-high.mp3",
    
    ## Mandarin Chinese (Traditional)
    "中廣新聞網": "https://stream.rcs.revma.com/78fm9wyy2tzuv",
    "News98新聞網": "https://stream.rcs.revma.com/pntx1639ntzuv.m4a",
    "飛碟聯播網": "https://stream.rcs.revma.com/em90w4aeewzuv",
}

# Map stations to languages
STATION_LANGUAGES = {
    # French stations
    "France Inter": "fr",
    "France Info": "fr",
    "France Culture": "fr",
    "FIP": "fr",
    "Radio Classique": "fr",
    
    # Mandarin stations
    "中廣新聞網": "zh",
    "News98新聞網": "zh",
    "飛碟聯播網": "zh",
}

# Supported ASR languages
SUPPORTED_ASR_LANGUAGES = {"en", "fr", "zh"}

# Language fallback mapping for unsupported languages
LANGUAGE_FALLBACK = {
    # Romance languages -> French (similar phonetics)
    "es": "fr",  # Spanish
    "it": "fr",  # Italian
    "pt": "fr",  # Portuguese
    "ro": "fr",  # Romanian
    "ca": "fr",  # Catalan
    
    # Germanic languages -> English (similar phonetics)
    "de": "en",  # German
    "nl": "en",  # Dutch
    "da": "en",  # Danish
    "sv": "en",  # Swedish
    "no": "en",  # Norwegian
    
    # Other European languages -> English
    "pl": "en",  # Polish
    "hu": "en",  # Hungarian
    "cs": "en",  # Czech
    "sk": "en",  # Slovak
    "fi": "en",  # Finnish
    
    # Asian languages -> Chinese or English
    "ja": "zh",  # Japanese -> Chinese (better for Asian phonetics)
    "ko": "zh",  # Korean -> Chinese
    "th": "en",  # Thai -> English
    "vi": "en",  # Vietnamese -> English
    
    # Arabic and others -> English
    "ar": "en",  # Arabic
    "tr": "en",  # Turkish
    "ru": "en",  # Russian
}

def get_asr_language(detected_language: str) -> tuple[str, bool]:
    """
    Get the appropriate ASR language and whether it's a fallback.
    
    Args:
        detected_language: Detected language code (e.g., 'es', 'de', 'ja')
        
    Returns:
        tuple: (asr_language, is_fallback)
    """
    if detected_language in SUPPORTED_ASR_LANGUAGES:
        return detected_language, False
    
    fallback = LANGUAGE_FALLBACK.get(detected_language, "en")
    return fallback, True

def detect_station_language(station_name: str, country: str = "", metadata: str = "") -> str:
    """
    Detect language for any station based on name and metadata.
    
    Args:
        station_name: Name of the station
        country: Country code (optional)
        metadata: Additional metadata like description or tags
        
    Returns:
        Language code
    """
    # Check existing mapping first
    if station_name in STATION_LANGUAGES:
        return STATION_LANGUAGES[station_name]
    
    # Use radio browser detection logic
    from api.radio_browser import RadioBrowserAPI
    browser = RadioBrowserAPI()
    
    # Create a fake station dict for detection
    fake_station = {
        'name': station_name.lower(),
        'country': country.upper(),
        'tags': metadata.lower(),
        'language': metadata.lower()
    }
    
    return browser._detect_language(fake_station)

# Default to English for all other stations
for station in RADIO_URLS:
    if station not in STATION_LANGUAGES:
        STATION_LANGUAGES[station] = "en"

# Default station
DEFAULT_RADIO_URL = RADIO_URLS["NPR"]

# Audio Processing Configuration - Dynamic based on performance mode
CHUNK_SIZE = PERF_CONFIG["chunk_size"]
SAMPLE_RATE = 16000
BYTES_PER_SAMPLE = 2  # s16le format is 16-bit -> 2 bytes

def get_current_model_dir():
    """Returns the current model directory based on CURRENT_MODEL setting."""
    return MODEL_DIRS.get(CURRENT_MODEL, MODEL_DIR_EN)

def get_current_repo_id():
    """Returns the current repo ID based on CURRENT_MODEL setting."""
    return REPO_IDS.get(CURRENT_MODEL, REPO_ID_EN)

def get_asr_config() -> dict:
    """
    Returns the configuration dictionary for the sherpa-onnx ASR model.
    Checks for the existence of model files and handles different naming conventions.
    """
    model_dir = get_current_model_dir()
    
    if not os.path.exists(os.path.join(model_dir, "tokens.txt")):
        raise FileNotFoundError(
            f"ASR model not found in {model_dir}. "
            "Please run the download script or ensure the path is correct."
        )

    # Try different naming conventions for model files
    def find_model_file(base_names):
        """Find the first existing file from a list of possible names"""
        for name in base_names:
            path = os.path.join(model_dir, name)
            if os.path.exists(path):
                return path
        return None

    # Look for encoder files (prefer int8 if available)
    encoder_path = find_model_file([
        "encoder-epoch-99-avg-1.int8.onnx",
        "encoder-epoch-99-avg-1.onnx", 
        "encoder-epoch-20-avg-1-chunk-16-left-128.int8.onnx",
        "encoder-epoch-20-avg-1-chunk-16-left-128.onnx",
        "encoder.int8.onnx",
        "encoder.onnx"
    ])
    
    # Look for decoder files
    decoder_path = find_model_file([
        "decoder-epoch-99-avg-1.onnx",
        "decoder-epoch-20-avg-1-chunk-16-left-128.onnx", 
        "decoder.onnx"
    ])
    
    # Look for joiner files (prefer int8 if available)
    joiner_path = find_model_file([
        "joiner-epoch-99-avg-1.int8.onnx",
        "joiner-epoch-99-avg-1.onnx",
        "joiner-epoch-20-avg-1-chunk-16-left-128.int8.onnx",
        "joiner-epoch-20-avg-1-chunk-16-left-128.onnx",
        "joiner.int8.onnx", 
        "joiner.onnx"
    ])

    if not encoder_path or not decoder_path or not joiner_path:
        raise FileNotFoundError(
            f"Required model files not found in {model_dir}. "
            f"Found: encoder={encoder_path}, decoder={decoder_path}, joiner={joiner_path}"
        )

    return {
        "tokens": os.path.join(model_dir, "tokens.txt"),
        "encoder": encoder_path,
        "decoder": decoder_path,
        "joiner": joiner_path,
        "enable_endpoint_detection": True,
        "num_threads": PERF_CONFIG["asr_threads"],
        "rule3_min_utterance_length": 500,  # Increased to reduce processing frequency
    }