Spaces:
Sleeping
Sleeping
Upload app.py
Browse files
app.py
CHANGED
|
@@ -15,12 +15,11 @@ from models import get_model_config, cleanup_all_models
|
|
| 15 |
from config import DEFAULT_ALPHA
|
| 16 |
from utils import clear_gpu_memory
|
| 17 |
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
"""Process uploaded ZIP file containing audio mixtures."""
|
| 21 |
|
| 22 |
if zip_file is None:
|
| 23 |
-
return None, "Please upload a ZIP file"
|
| 24 |
|
| 25 |
try:
|
| 26 |
# Use a fixed extraction path
|
|
@@ -56,18 +55,18 @@ def process_audio_files(zip_file, model_name, layer, alpha):
|
|
| 56 |
outs_dir = subitem
|
| 57 |
|
| 58 |
if refs_dir is None or outs_dir is None:
|
| 59 |
-
return None, "Could not find 'references' and 'outputs' directories in the ZIP file"
|
| 60 |
|
| 61 |
# Get audio files
|
| 62 |
ref_files = sorted([f for f in refs_dir.glob("*.wav")])
|
| 63 |
out_files = sorted([f for f in outs_dir.glob("*.wav")])
|
| 64 |
|
| 65 |
if len(ref_files) == 0:
|
| 66 |
-
return None, "No reference WAV files found"
|
| 67 |
if len(out_files) == 0:
|
| 68 |
-
return None, "No output WAV files found"
|
| 69 |
if len(ref_files) != len(out_files):
|
| 70 |
-
return None, f"Number of reference files ({len(ref_files)}) must match number of output files ({len(out_files)}). Files must be in the same order."
|
| 71 |
|
| 72 |
# Create manifest
|
| 73 |
manifest = [{
|
|
@@ -81,7 +80,7 @@ def process_audio_files(zip_file, model_name, layer, alpha):
|
|
| 81 |
# Validate model
|
| 82 |
allowed_models = set(get_model_config(0).keys())
|
| 83 |
if model_name not in allowed_models:
|
| 84 |
-
return None, f"Invalid model. Allowed: {', '.join(sorted(allowed_models))}"
|
| 85 |
|
| 86 |
# Set layer
|
| 87 |
if model_name == "raw":
|
|
@@ -94,17 +93,33 @@ def process_audio_files(zip_file, model_name, layer, alpha):
|
|
| 94 |
}
|
| 95 |
layer_final = layer if layer is not None else model_defaults.get(model_name, 12)
|
| 96 |
|
| 97 |
-
#
|
| 98 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 99 |
|
| 100 |
-
# Run experiment
|
| 101 |
results_dir = compute_mapss_measures(
|
| 102 |
models=[model_name],
|
| 103 |
mixtures=manifest,
|
| 104 |
layer=layer_final,
|
| 105 |
alpha=alpha,
|
| 106 |
verbose=True,
|
| 107 |
-
max_gpus=max_gpus,
|
| 108 |
add_ci=False # Disable CI for faster processing in demo
|
| 109 |
)
|
| 110 |
|
|
@@ -128,13 +143,30 @@ def process_audio_files(zip_file, model_name, layer, alpha):
|
|
| 128 |
return None, f"Processing completed but no output files were generated. Check if embeddings were computed."
|
| 129 |
|
| 130 |
except Exception as e:
|
| 131 |
-
error_msg = f"Error: {str(e)}\n{traceback.format_exc()}"
|
| 132 |
return None, error_msg
|
| 133 |
|
| 134 |
finally:
|
|
|
|
| 135 |
cleanup_all_models()
|
| 136 |
clear_gpu_memory()
|
| 137 |
gc.collect()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
|
| 139 |
def create_interface():
|
| 140 |
with gr.Blocks(title="MAPSS - Multi-source Audio Perceptual Separation Scores") as demo:
|
|
@@ -214,6 +246,7 @@ def create_interface():
|
|
| 214 |
- PS/PM scores are only computed between active speakers
|
| 215 |
- Processing time scales with number of sources and audio length
|
| 216 |
- GPU acceleration is automatically used when available
|
|
|
|
| 217 |
|
| 218 |
## Citation
|
| 219 |
|
|
|
|
| 15 |
from config import DEFAULT_ALPHA
|
| 16 |
from utils import clear_gpu_memory
|
| 17 |
|
| 18 |
+
def process_audio_files_cpu(zip_file, model_name, layer, alpha):
|
| 19 |
+
"""Process uploaded ZIP file containing audio mixtures - CPU part."""
|
|
|
|
| 20 |
|
| 21 |
if zip_file is None:
|
| 22 |
+
return None, "Please upload a ZIP file", None
|
| 23 |
|
| 24 |
try:
|
| 25 |
# Use a fixed extraction path
|
|
|
|
| 55 |
outs_dir = subitem
|
| 56 |
|
| 57 |
if refs_dir is None or outs_dir is None:
|
| 58 |
+
return None, "Could not find 'references' and 'outputs' directories in the ZIP file", None
|
| 59 |
|
| 60 |
# Get audio files
|
| 61 |
ref_files = sorted([f for f in refs_dir.glob("*.wav")])
|
| 62 |
out_files = sorted([f for f in outs_dir.glob("*.wav")])
|
| 63 |
|
| 64 |
if len(ref_files) == 0:
|
| 65 |
+
return None, "No reference WAV files found", None
|
| 66 |
if len(out_files) == 0:
|
| 67 |
+
return None, "No output WAV files found", None
|
| 68 |
if len(ref_files) != len(out_files):
|
| 69 |
+
return None, f"Number of reference files ({len(ref_files)}) must match number of output files ({len(out_files)}). Files must be in the same order.", None
|
| 70 |
|
| 71 |
# Create manifest
|
| 72 |
manifest = [{
|
|
|
|
| 80 |
# Validate model
|
| 81 |
allowed_models = set(get_model_config(0).keys())
|
| 82 |
if model_name not in allowed_models:
|
| 83 |
+
return None, f"Invalid model. Allowed: {', '.join(sorted(allowed_models))}", None
|
| 84 |
|
| 85 |
# Set layer
|
| 86 |
if model_name == "raw":
|
|
|
|
| 93 |
}
|
| 94 |
layer_final = layer if layer is not None else model_defaults.get(model_name, 12)
|
| 95 |
|
| 96 |
+
# Return preprocessed data for GPU processing
|
| 97 |
+
return manifest, layer_final, alpha
|
| 98 |
+
|
| 99 |
+
except Exception as e:
|
| 100 |
+
error_msg = f"Error in preprocessing: {str(e)}\n{traceback.format_exc()}"
|
| 101 |
+
return None, error_msg, None
|
| 102 |
+
|
| 103 |
+
@spaces.GPU(duration=300)
|
| 104 |
+
def process_audio_files_gpu(manifest, model_name, layer_final, alpha):
|
| 105 |
+
"""GPU processing part - only called when GPU is allocated."""
|
| 106 |
+
|
| 107 |
+
if manifest is None:
|
| 108 |
+
return None, "Invalid input data"
|
| 109 |
+
|
| 110 |
+
try:
|
| 111 |
+
# Force single GPU mode in Spaces environment
|
| 112 |
+
# The spaces decorator handles GPU allocation
|
| 113 |
+
max_gpus = 1 if torch.cuda.is_available() else 0
|
| 114 |
|
| 115 |
+
# Run experiment with forced single GPU
|
| 116 |
results_dir = compute_mapss_measures(
|
| 117 |
models=[model_name],
|
| 118 |
mixtures=manifest,
|
| 119 |
layer=layer_final,
|
| 120 |
alpha=alpha,
|
| 121 |
verbose=True,
|
| 122 |
+
max_gpus=max_gpus, # Force single GPU
|
| 123 |
add_ci=False # Disable CI for faster processing in demo
|
| 124 |
)
|
| 125 |
|
|
|
|
| 143 |
return None, f"Processing completed but no output files were generated. Check if embeddings were computed."
|
| 144 |
|
| 145 |
except Exception as e:
|
| 146 |
+
error_msg = f"Error in GPU processing: {str(e)}\n{traceback.format_exc()}"
|
| 147 |
return None, error_msg
|
| 148 |
|
| 149 |
finally:
|
| 150 |
+
# Cleanup is critical in Spaces environment
|
| 151 |
cleanup_all_models()
|
| 152 |
clear_gpu_memory()
|
| 153 |
gc.collect()
|
| 154 |
+
if torch.cuda.is_available():
|
| 155 |
+
torch.cuda.empty_cache()
|
| 156 |
+
|
| 157 |
+
def process_audio_files(zip_file, model_name, layer, alpha):
|
| 158 |
+
"""Main processing function that combines CPU and GPU parts."""
|
| 159 |
+
|
| 160 |
+
# First, do CPU preprocessing
|
| 161 |
+
manifest, layer_final, alpha_processed = process_audio_files_cpu(
|
| 162 |
+
zip_file, model_name, layer, alpha
|
| 163 |
+
)
|
| 164 |
+
|
| 165 |
+
if manifest is None:
|
| 166 |
+
return None, layer_final # layer_final contains error message in this case
|
| 167 |
+
|
| 168 |
+
# Then do GPU processing
|
| 169 |
+
return process_audio_files_gpu(manifest, model_name, layer_final, alpha_processed)
|
| 170 |
|
| 171 |
def create_interface():
|
| 172 |
with gr.Blocks(title="MAPSS - Multi-source Audio Perceptual Separation Scores") as demo:
|
|
|
|
| 246 |
- PS/PM scores are only computed between active speakers
|
| 247 |
- Processing time scales with number of sources and audio length
|
| 248 |
- GPU acceleration is automatically used when available
|
| 249 |
+
- **Note**: This Hugging Face Space runs with a single GPU allocation
|
| 250 |
|
| 251 |
## Citation
|
| 252 |
|