AIvry commited on
Commit
437e805
·
verified ·
1 Parent(s): 5b6a83c

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -14
app.py CHANGED
@@ -15,12 +15,11 @@ from models import get_model_config, cleanup_all_models
15
  from config import DEFAULT_ALPHA
16
  from utils import clear_gpu_memory
17
 
18
- @spaces.GPU(duration=300)
19
- def process_audio_files(zip_file, model_name, layer, alpha):
20
- """Process uploaded ZIP file containing audio mixtures."""
21
 
22
  if zip_file is None:
23
- return None, "Please upload a ZIP file"
24
 
25
  try:
26
  # Use a fixed extraction path
@@ -56,18 +55,18 @@ def process_audio_files(zip_file, model_name, layer, alpha):
56
  outs_dir = subitem
57
 
58
  if refs_dir is None or outs_dir is None:
59
- return None, "Could not find 'references' and 'outputs' directories in the ZIP file"
60
 
61
  # Get audio files
62
  ref_files = sorted([f for f in refs_dir.glob("*.wav")])
63
  out_files = sorted([f for f in outs_dir.glob("*.wav")])
64
 
65
  if len(ref_files) == 0:
66
- return None, "No reference WAV files found"
67
  if len(out_files) == 0:
68
- return None, "No output WAV files found"
69
  if len(ref_files) != len(out_files):
70
- return None, f"Number of reference files ({len(ref_files)}) must match number of output files ({len(out_files)}). Files must be in the same order."
71
 
72
  # Create manifest
73
  manifest = [{
@@ -81,7 +80,7 @@ def process_audio_files(zip_file, model_name, layer, alpha):
81
  # Validate model
82
  allowed_models = set(get_model_config(0).keys())
83
  if model_name not in allowed_models:
84
- return None, f"Invalid model. Allowed: {', '.join(sorted(allowed_models))}"
85
 
86
  # Set layer
87
  if model_name == "raw":
@@ -94,17 +93,33 @@ def process_audio_files(zip_file, model_name, layer, alpha):
94
  }
95
  layer_final = layer if layer is not None else model_defaults.get(model_name, 12)
96
 
97
- # Check GPU availability - use all available GPUs on the space
98
- max_gpus = torch.cuda.device_count() if torch.cuda.is_available() else 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
 
100
- # Run experiment
101
  results_dir = compute_mapss_measures(
102
  models=[model_name],
103
  mixtures=manifest,
104
  layer=layer_final,
105
  alpha=alpha,
106
  verbose=True,
107
- max_gpus=max_gpus,
108
  add_ci=False # Disable CI for faster processing in demo
109
  )
110
 
@@ -128,13 +143,30 @@ def process_audio_files(zip_file, model_name, layer, alpha):
128
  return None, f"Processing completed but no output files were generated. Check if embeddings were computed."
129
 
130
  except Exception as e:
131
- error_msg = f"Error: {str(e)}\n{traceback.format_exc()}"
132
  return None, error_msg
133
 
134
  finally:
 
135
  cleanup_all_models()
136
  clear_gpu_memory()
137
  gc.collect()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
 
139
  def create_interface():
140
  with gr.Blocks(title="MAPSS - Multi-source Audio Perceptual Separation Scores") as demo:
@@ -214,6 +246,7 @@ def create_interface():
214
  - PS/PM scores are only computed between active speakers
215
  - Processing time scales with number of sources and audio length
216
  - GPU acceleration is automatically used when available
 
217
 
218
  ## Citation
219
 
 
15
  from config import DEFAULT_ALPHA
16
  from utils import clear_gpu_memory
17
 
18
+ def process_audio_files_cpu(zip_file, model_name, layer, alpha):
19
+ """Process uploaded ZIP file containing audio mixtures - CPU part."""
 
20
 
21
  if zip_file is None:
22
+ return None, "Please upload a ZIP file", None
23
 
24
  try:
25
  # Use a fixed extraction path
 
55
  outs_dir = subitem
56
 
57
  if refs_dir is None or outs_dir is None:
58
+ return None, "Could not find 'references' and 'outputs' directories in the ZIP file", None
59
 
60
  # Get audio files
61
  ref_files = sorted([f for f in refs_dir.glob("*.wav")])
62
  out_files = sorted([f for f in outs_dir.glob("*.wav")])
63
 
64
  if len(ref_files) == 0:
65
+ return None, "No reference WAV files found", None
66
  if len(out_files) == 0:
67
+ return None, "No output WAV files found", None
68
  if len(ref_files) != len(out_files):
69
+ return None, f"Number of reference files ({len(ref_files)}) must match number of output files ({len(out_files)}). Files must be in the same order.", None
70
 
71
  # Create manifest
72
  manifest = [{
 
80
  # Validate model
81
  allowed_models = set(get_model_config(0).keys())
82
  if model_name not in allowed_models:
83
+ return None, f"Invalid model. Allowed: {', '.join(sorted(allowed_models))}", None
84
 
85
  # Set layer
86
  if model_name == "raw":
 
93
  }
94
  layer_final = layer if layer is not None else model_defaults.get(model_name, 12)
95
 
96
+ # Return preprocessed data for GPU processing
97
+ return manifest, layer_final, alpha
98
+
99
+ except Exception as e:
100
+ error_msg = f"Error in preprocessing: {str(e)}\n{traceback.format_exc()}"
101
+ return None, error_msg, None
102
+
103
+ @spaces.GPU(duration=300)
104
+ def process_audio_files_gpu(manifest, model_name, layer_final, alpha):
105
+ """GPU processing part - only called when GPU is allocated."""
106
+
107
+ if manifest is None:
108
+ return None, "Invalid input data"
109
+
110
+ try:
111
+ # Force single GPU mode in Spaces environment
112
+ # The spaces decorator handles GPU allocation
113
+ max_gpus = 1 if torch.cuda.is_available() else 0
114
 
115
+ # Run experiment with forced single GPU
116
  results_dir = compute_mapss_measures(
117
  models=[model_name],
118
  mixtures=manifest,
119
  layer=layer_final,
120
  alpha=alpha,
121
  verbose=True,
122
+ max_gpus=max_gpus, # Force single GPU
123
  add_ci=False # Disable CI for faster processing in demo
124
  )
125
 
 
143
  return None, f"Processing completed but no output files were generated. Check if embeddings were computed."
144
 
145
  except Exception as e:
146
+ error_msg = f"Error in GPU processing: {str(e)}\n{traceback.format_exc()}"
147
  return None, error_msg
148
 
149
  finally:
150
+ # Cleanup is critical in Spaces environment
151
  cleanup_all_models()
152
  clear_gpu_memory()
153
  gc.collect()
154
+ if torch.cuda.is_available():
155
+ torch.cuda.empty_cache()
156
+
157
+ def process_audio_files(zip_file, model_name, layer, alpha):
158
+ """Main processing function that combines CPU and GPU parts."""
159
+
160
+ # First, do CPU preprocessing
161
+ manifest, layer_final, alpha_processed = process_audio_files_cpu(
162
+ zip_file, model_name, layer, alpha
163
+ )
164
+
165
+ if manifest is None:
166
+ return None, layer_final # layer_final contains error message in this case
167
+
168
+ # Then do GPU processing
169
+ return process_audio_files_gpu(manifest, model_name, layer_final, alpha_processed)
170
 
171
  def create_interface():
172
  with gr.Blocks(title="MAPSS - Multi-source Audio Perceptual Separation Scores") as demo:
 
246
  - PS/PM scores are only computed between active speakers
247
  - Processing time scales with number of sources and audio length
248
  - GPU acceleration is automatically used when available
249
+ - **Note**: This Hugging Face Space runs with a single GPU allocation
250
 
251
  ## Citation
252