Spaces:
				
			
			
	
			
			
					
		Running
		
	
	
	
			
			
	
	
	
	
		
		
					
		Running
		
	Bug Fixes
Browse files
    	
        app.py
    CHANGED
    
    | @@ -1,7 +1,6 @@ | |
| 1 | 
             
            import os
         | 
| 2 | 
            -
            os.environ['KMP_DUPLICATE_LIB_OK'] = 'TRUE'
         | 
| 3 |  | 
| 4 | 
            -
            import  | 
| 5 | 
             
            import torch
         | 
| 6 | 
             
            import torch.nn as nn
         | 
| 7 | 
             
            import torch.nn.functional as F
         | 
| @@ -9,9 +8,7 @@ import soundfile as sf | |
| 9 | 
             
            import torchaudio
         | 
| 10 | 
             
            from transformers import Wav2Vec2FeatureExtractor, Wav2Vec2Model
         | 
| 11 | 
             
            import numpy as np
         | 
| 12 | 
            -
            from pathlib import Path
         | 
| 13 | 
             
            import json
         | 
| 14 | 
            -
            import tempfile
         | 
| 15 |  | 
| 16 | 
             
            # ============================================================
         | 
| 17 | 
             
            # MODEL DEFINITION
         | 
| @@ -41,75 +38,22 @@ class Wav2Vec2ForSpeakerEmbedding(nn.Module): | |
| 41 |  | 
| 42 |  | 
| 43 | 
             
            # ============================================================
         | 
| 44 | 
            -
            #  | 
| 45 | 
             
            # ============================================================
         | 
| 46 |  | 
| 47 | 
            -
             | 
| 48 | 
            -
                """Process uploaded audio file"""
         | 
| 49 | 
            -
                try:
         | 
| 50 | 
            -
                    # Save uploaded file temporarily
         | 
| 51 | 
            -
                    with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file:
         | 
| 52 | 
            -
                        tmp_file.write(audio_file.getvalue())
         | 
| 53 | 
            -
                        tmp_path = tmp_file.name
         | 
| 54 | 
            -
                    
         | 
| 55 | 
            -
                    # Load audio
         | 
| 56 | 
            -
                    waveform, sr = sf.read(tmp_path, dtype='float32')
         | 
| 57 | 
            -
                    waveform = torch.from_numpy(waveform)
         | 
| 58 | 
            -
                    
         | 
| 59 | 
            -
                    # Convert to mono
         | 
| 60 | 
            -
                    if len(waveform.shape) > 1:
         | 
| 61 | 
            -
                        waveform = torch.mean(waveform, dim=-1)
         | 
| 62 | 
            -
                    
         | 
| 63 | 
            -
                    # Resample to 16kHz
         | 
| 64 | 
            -
                    if sr != 16000:
         | 
| 65 | 
            -
                        resampler = torchaudio.transforms.Resample(sr, 16000)
         | 
| 66 | 
            -
                        waveform = resampler(waveform)
         | 
| 67 | 
            -
                    
         | 
| 68 | 
            -
                    # Take middle chunk
         | 
| 69 | 
            -
                    if len(waveform) > max_length:
         | 
| 70 | 
            -
                        start = (len(waveform) - max_length) // 2
         | 
| 71 | 
            -
                        waveform = waveform[start:start + max_length]
         | 
| 72 | 
            -
                    elif len(waveform) < max_length:
         | 
| 73 | 
            -
                        padding = max_length - len(waveform)
         | 
| 74 | 
            -
                        waveform = torch.nn.functional.pad(waveform, (0, padding))
         | 
| 75 | 
            -
                    
         | 
| 76 | 
            -
                    # Normalize
         | 
| 77 | 
            -
                    if waveform.abs().max() > 0:
         | 
| 78 | 
            -
                        waveform = waveform / waveform.abs().max()
         | 
| 79 | 
            -
                    
         | 
| 80 | 
            -
                    # Extract features
         | 
| 81 | 
            -
                    inputs = feature_extractor(
         | 
| 82 | 
            -
                        waveform.numpy(),
         | 
| 83 | 
            -
                        sampling_rate=16000,
         | 
| 84 | 
            -
                        return_tensors="pt"
         | 
| 85 | 
            -
                    )
         | 
| 86 | 
            -
                    
         | 
| 87 | 
            -
                    # Cleanup
         | 
| 88 | 
            -
                    os.unlink(tmp_path)
         | 
| 89 | 
            -
                    
         | 
| 90 | 
            -
                    return inputs.input_values, waveform.numpy(), sr
         | 
| 91 | 
            -
                
         | 
| 92 | 
            -
                except Exception as e:
         | 
| 93 | 
            -
                    st.error(f"Error processing audio: {e}")
         | 
| 94 | 
            -
                    return None, None, None
         | 
| 95 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
| 96 |  | 
| 97 | 
            -
             | 
| 98 | 
            -
                """Extract embedding from audio file"""
         | 
| 99 | 
            -
                inputs, waveform, sr = process_audio(audio_file, feature_extractor)
         | 
| 100 | 
            -
                if inputs is None:
         | 
| 101 | 
            -
                    return None
         | 
| 102 | 
            -
                
         | 
| 103 | 
            -
                model.eval()
         | 
| 104 | 
            -
                with torch.no_grad():
         | 
| 105 | 
            -
                    inputs = inputs.to(device)
         | 
| 106 | 
            -
                    embedding = model(inputs)
         | 
| 107 | 
            -
                
         | 
| 108 | 
            -
                return embedding.cpu().numpy()
         | 
| 109 |  | 
| 110 |  | 
| 111 | 
             
            # ============================================================
         | 
| 112 | 
            -
            #  | 
| 113 | 
             
            # ============================================================
         | 
| 114 |  | 
| 115 | 
             
            class EnrollmentDB:
         | 
| @@ -135,10 +79,6 @@ class EnrollmentDB: | |
| 135 | 
             
                    self.save_db()
         | 
| 136 |  | 
| 137 | 
             
                def verify(self, embedding, threshold=0.75):
         | 
| 138 | 
            -
                    """
         | 
| 139 | 
            -
                    Verify against all enrolled users
         | 
| 140 | 
            -
                    Returns: (best_match_name, similarity_score, is_verified)
         | 
| 141 | 
            -
                    """
         | 
| 142 | 
             
                    if not self.enrollments:
         | 
| 143 | 
             
                        return None, 0.0, False
         | 
| 144 |  | 
| @@ -156,12 +96,14 @@ class EnrollmentDB: | |
| 156 | 
             
                            best_match = name
         | 
| 157 |  | 
| 158 | 
             
                    is_verified = best_score >= threshold
         | 
| 159 | 
            -
                    
         | 
| 160 | 
             
                    return best_match, best_score, is_verified
         | 
| 161 |  | 
| 162 | 
             
                def get_all_users(self):
         | 
| 163 | 
             
                    return list(self.enrollments.keys())
         | 
| 164 |  | 
|  | |
|  | |
|  | |
| 165 | 
             
                def remove_user(self, name):
         | 
| 166 | 
             
                    if name in self.enrollments:
         | 
| 167 | 
             
                        del self.enrollments[name]
         | 
| @@ -169,326 +111,353 @@ class EnrollmentDB: | |
| 169 | 
             
                        return True
         | 
| 170 | 
             
                    return False
         | 
| 171 |  | 
|  | |
|  | |
| 172 |  | 
| 173 | 
             
            # ============================================================
         | 
| 174 | 
            -
            #  | 
| 175 | 
             
            # ============================================================
         | 
| 176 |  | 
| 177 | 
            -
             | 
| 178 | 
            -
             | 
| 179 | 
            -
                 | 
| 180 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 181 |  | 
| 182 | 
            -
                 | 
| 183 | 
            -
             | 
| 184 | 
            -
             | 
|  | |
|  | |
|  | |
| 185 | 
             
                model.eval()
         | 
| 186 | 
            -
                
         | 
| 187 | 
            -
             | 
| 188 | 
            -
             | 
| 189 | 
            -
             | 
|  | |
|  | |
| 190 |  | 
|  | |
|  | |
|  | |
| 191 |  | 
| 192 | 
            -
            def  | 
| 193 | 
            -
                 | 
| 194 | 
            -
             | 
| 195 | 
            -
                     | 
| 196 | 
            -
                    layout="wide"
         | 
| 197 | 
            -
                )
         | 
| 198 |  | 
| 199 | 
            -
                 | 
| 200 | 
            -
             | 
| 201 | 
            -
                    <style>
         | 
| 202 | 
            -
                    .big-font {
         | 
| 203 | 
            -
                        font-size:20px !important;
         | 
| 204 | 
            -
                        font-weight: bold;
         | 
| 205 | 
            -
                    }
         | 
| 206 | 
            -
                    .success-box {
         | 
| 207 | 
            -
                        padding: 20px;
         | 
| 208 | 
            -
                        border-radius: 10px;
         | 
| 209 | 
            -
                        background-color: #d4edda;
         | 
| 210 | 
            -
                        border: 2px solid #28a745;
         | 
| 211 | 
            -
                        color: #155724;
         | 
| 212 | 
            -
                    }
         | 
| 213 | 
            -
                    .failure-box {
         | 
| 214 | 
            -
                        padding: 20px;
         | 
| 215 | 
            -
                        border-radius: 10px;
         | 
| 216 | 
            -
                        background-color: #f8d7da;
         | 
| 217 | 
            -
                        border: 2px solid #dc3545;
         | 
| 218 | 
            -
                        color: #721c24;
         | 
| 219 | 
            -
                    }
         | 
| 220 | 
            -
                    .info-box {
         | 
| 221 | 
            -
                        padding: 20px;
         | 
| 222 | 
            -
                        border-radius: 10px;
         | 
| 223 | 
            -
                        background-color: #d1ecf1;
         | 
| 224 | 
            -
                        border: 2px solid #17a2b8;
         | 
| 225 | 
            -
                        color: #0c5460;
         | 
| 226 | 
            -
                    }
         | 
| 227 | 
            -
                    </style>
         | 
| 228 | 
            -
                """, unsafe_allow_html=True)
         | 
| 229 |  | 
| 230 | 
            -
                 | 
| 231 | 
            -
                st.title("Voice Biometry System - Proof of Concept")
         | 
| 232 | 
            -
                st.markdown("### Finetuned Wav2Vec 2.0")
         | 
| 233 |  | 
| 234 | 
            -
                 | 
| 235 | 
            -
             | 
| 236 | 
            -
                    model, feature_extractor, device = load_model()
         | 
| 237 |  | 
| 238 | 
            -
                 | 
| 239 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 240 |  | 
| 241 | 
            -
                 | 
| 242 | 
            -
             | 
| 243 | 
            -
                threshold = st.sidebar.slider(
         | 
| 244 | 
            -
                    "Verification Threshold",
         | 
| 245 | 
            -
                    min_value=0.5,
         | 
| 246 | 
            -
                    max_value=0.95,
         | 
| 247 | 
            -
                    value=0.75,
         | 
| 248 | 
            -
                    step=0.05,
         | 
| 249 | 
            -
                    help="Higher = more strict verification"
         | 
| 250 | 
            -
                )
         | 
| 251 |  | 
| 252 | 
            -
                 | 
| 253 | 
            -
             | 
| 254 | 
            -
             | 
| 255 | 
            -
             | 
| 256 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 257 |  | 
| 258 | 
            -
                 | 
| 259 | 
            -
             | 
| 260 | 
            -
             | 
| 261 | 
            -
                     | 
| 262 | 
            -
             | 
| 263 | 
            -
             | 
| 264 | 
            -
             | 
| 265 | 
            -
             | 
| 266 | 
            -
             | 
| 267 | 
            -
             | 
|  | |
| 268 |  | 
| 269 | 
            -
                 | 
| 270 | 
            -
                 | 
|  | |
|  | |
| 271 |  | 
| 272 | 
            -
                 | 
| 273 | 
            -
             | 
| 274 | 
            -
             | 
| 275 | 
            -
             | 
| 276 | 
            -
             | 
| 277 | 
            -
             | 
| 278 | 
            -
             | 
| 279 | 
            -
             | 
| 280 | 
            -
             | 
| 281 | 
            -
             | 
| 282 | 
            -
             | 
| 283 | 
            -
                            "User Name",
         | 
| 284 | 
            -
                            placeholder="Enter name (e.g., Abdou Diop)",
         | 
| 285 | 
            -
                            help="This name will be used to identify the speaker"
         | 
| 286 | 
             
                        )
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 287 |  | 
| 288 | 
            -
                         | 
| 289 | 
            -
             | 
| 290 | 
            -
             | 
| 291 | 
            -
                             | 
| 292 | 
            -
                             | 
|  | |
| 293 | 
             
                        )
         | 
| 294 |  | 
| 295 | 
            -
                     | 
| 296 | 
            -
             | 
| 297 | 
            -
                         | 
| 298 | 
            -
                        - Use clear audio
         | 
| 299 | 
            -
                        - 3-20 seconds long
         | 
| 300 | 
            -
                        - Minimal background noise
         | 
| 301 | 
            -
                        - Normal speaking voice
         | 
| 302 | 
            -
                        """)
         | 
| 303 | 
            -
                    
         | 
| 304 | 
            -
                    if st.button("🎯 Enroll User", type="primary", disabled=(not enroll_name or not enroll_audio)):
         | 
| 305 | 
            -
                        with st.spinner(f"Processing enrollment for {enroll_name}..."):
         | 
| 306 | 
            -
                            # Check if user already exists
         | 
| 307 | 
            -
                            if enroll_name in db.get_all_users():
         | 
| 308 | 
            -
                                st.warning(f"⚠️ User '{enroll_name}' already exists. Please use a different name or remove the existing user first.")
         | 
| 309 | 
            -
                            else:
         | 
| 310 | 
            -
                                # Get embedding
         | 
| 311 | 
            -
                                embedding = get_embedding(model, enroll_audio, feature_extractor, device)
         | 
| 312 | 
            -
                                
         | 
| 313 | 
            -
                                if embedding is not None:
         | 
| 314 | 
            -
                                    # Save enrollment
         | 
| 315 | 
            -
                                    db.enroll(enroll_name, embedding)
         | 
| 316 | 
            -
                                    
         | 
| 317 | 
            -
                                    st.markdown(f"""
         | 
| 318 | 
            -
                                    <div class="success-box">
         | 
| 319 | 
            -
                                        <h3>✅ Enrollment Successful!</h3>
         | 
| 320 | 
            -
                                        <p><strong>{enroll_name}</strong> has been enrolled in the system.</p>
         | 
| 321 | 
            -
                                        <p>Total enrolled users: {len(db.get_all_users())}</p>
         | 
| 322 | 
            -
                                    </div>
         | 
| 323 | 
            -
                                    """, unsafe_allow_html=True)
         | 
| 324 | 
            -
                                    
         | 
| 325 | 
            -
                                    #st.balloons()
         | 
| 326 | 
            -
                                else:
         | 
| 327 | 
            -
                                    st.error("❌ Failed to process audio. Please try again with a different recording.")
         | 
| 328 | 
            -
                
         | 
| 329 | 
            -
                # ============================================================
         | 
| 330 | 
            -
                # TAB 2: VERIFICATION
         | 
| 331 | 
            -
                # ============================================================
         | 
| 332 | 
            -
                with tab2:
         | 
| 333 | 
            -
                    st.header("Verify User Identity")
         | 
| 334 | 
            -
                    st.markdown("Upload a voice recording to verify against enrolled users.")
         | 
| 335 | 
            -
                    
         | 
| 336 | 
            -
                    if not db.get_all_users():
         | 
| 337 | 
            -
                        st.warning("⚠️ No users enrolled yet. Please enroll at least one user first.")
         | 
| 338 | 
            -
                    else:
         | 
| 339 | 
            -
                        col1, col2 = st.columns([2, 1])
         | 
| 340 |  | 
| 341 | 
            -
                        with  | 
| 342 | 
            -
                             | 
| 343 | 
            -
                                 | 
| 344 | 
            -
             | 
| 345 | 
            -
             | 
| 346 | 
            -
             | 
| 347 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 348 |  | 
| 349 | 
            -
                         | 
| 350 | 
            -
             | 
| 351 | 
            -
                            **Verification Info:**
         | 
| 352 | 
            -
                            - {len(db.get_all_users())} users enrolled
         | 
| 353 | 
            -
                            - Threshold: {threshold:.2f}
         | 
| 354 | 
            -
                            - Model: Wav2Vec 2.0
         | 
| 355 | 
            -
                            """)
         | 
| 356 |  | 
| 357 | 
            -
                         | 
| 358 | 
            -
                             | 
| 359 | 
            -
             | 
| 360 | 
            -
             | 
| 361 | 
            -
             | 
| 362 | 
            -
                                if embedding is not None:
         | 
| 363 | 
            -
                                    # Verify
         | 
| 364 | 
            -
                                    match_name, similarity, is_verified = db.verify(embedding, threshold)
         | 
| 365 | 
            -
                                    
         | 
| 366 | 
            -
                                    # Display results
         | 
| 367 | 
            -
                                    st.markdown("---")
         | 
| 368 | 
            -
                                    
         | 
| 369 | 
            -
                                    if is_verified:
         | 
| 370 | 
            -
                                        st.markdown(f"""
         | 
| 371 | 
            -
                                        <div class="success-box">
         | 
| 372 | 
            -
                                            <h2>✅ VERIFICATION SUCCESSFUL</h2>
         | 
| 373 | 
            -
                                            <h3>Identified as: {match_name}</h3>
         | 
| 374 | 
            -
                                            <p style="font-size: 18px;">Confidence Score: <strong>{similarity:.1%}</strong></p>
         | 
| 375 | 
            -
                                        </div>
         | 
| 376 | 
            -
                                        """, unsafe_allow_html=True)
         | 
| 377 | 
            -
                                        
         | 
| 378 | 
            -
                                        st.success(f"🎉 Welcome back, {match_name}!")
         | 
| 379 | 
            -
                                        
         | 
| 380 | 
            -
                                    else:
         | 
| 381 | 
            -
                                        st.markdown(f"""
         | 
| 382 | 
            -
                                        <div class="failure-box">
         | 
| 383 | 
            -
                                            <h2>❌ VERIFICATION FAILED</h2>
         | 
| 384 | 
            -
                                            <p>Closest match: <strong>{match_name}</strong></p>
         | 
| 385 | 
            -
                                            <p>Similarity: <strong>{similarity:.1%}</strong></p>
         | 
| 386 | 
            -
                                            <p>Threshold required: <strong>{threshold:.1%}</strong></p>
         | 
| 387 | 
            -
                                            <p><em>This speaker is not recognized in the system.</em></p>
         | 
| 388 | 
            -
                                        </div>
         | 
| 389 | 
            -
                                        """, unsafe_allow_html=True)
         | 
| 390 | 
            -
                                    
         | 
| 391 | 
            -
                                    # Show all scores
         | 
| 392 | 
            -
                                    with st.expander("📊 See detailed scores for all enrolled users"):
         | 
| 393 | 
            -
                                        st.markdown("### Similarity Scores")
         | 
| 394 | 
            -
                                        
         | 
| 395 | 
            -
                                        scores = []
         | 
| 396 | 
            -
                                        embedding_tensor = torch.from_numpy(embedding)
         | 
| 397 | 
            -
                                        
         | 
| 398 | 
            -
                                        for name, enrolled_emb in db.enrollments.items():
         | 
| 399 | 
            -
                                            enrolled_tensor = torch.from_numpy(enrolled_emb)
         | 
| 400 | 
            -
                                            sim = F.cosine_similarity(embedding_tensor, enrolled_tensor, dim=1).item()
         | 
| 401 | 
            -
                                            scores.append({
         | 
| 402 | 
            -
                                                'User': name,
         | 
| 403 | 
            -
                                                'Similarity': f"{sim:.1%}",
         | 
| 404 | 
            -
                                                'Status': '✅ Match' if sim >= threshold else '❌ No match'
         | 
| 405 | 
            -
                                            })
         | 
| 406 | 
            -
                                        
         | 
| 407 | 
            -
                                        # Sort by similarity
         | 
| 408 | 
            -
                                        scores.sort(key=lambda x: x['Similarity'], reverse=True)
         | 
| 409 | 
            -
                                        
         | 
| 410 | 
            -
                                        import pandas as pd
         | 
| 411 | 
            -
                                        df = pd.DataFrame(scores)
         | 
| 412 | 
            -
                                        st.dataframe(df, use_container_width=True, hide_index=True)
         | 
| 413 | 
            -
                                
         | 
| 414 | 
            -
                                else:
         | 
| 415 | 
            -
                                    st.error("❌ Failed to process audio. Please try again with a different recording.")
         | 
| 416 | 
            -
                
         | 
| 417 | 
            -
                # ============================================================
         | 
| 418 | 
            -
                # TAB 3: ABOUT
         | 
| 419 | 
            -
                # ============================================================
         | 
| 420 | 
            -
                with tab3:
         | 
| 421 | 
            -
                    st.header("About This System")
         | 
| 422 |  | 
| 423 | 
            -
                     | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 424 |  | 
| 425 | 
            -
                     | 
| 426 | 
            -
             | 
| 427 | 
            -
                         | 
|  | |
| 428 |  | 
| 429 | 
            -
                        ** | 
| 430 | 
             
                        - Base: Wav2Vec 2.0 (Facebook AI)
         | 
| 431 | 
            -
                        -  | 
| 432 | 
            -
                        - 1035  | 
| 433 | 
            -
                        -  | 
| 434 |  | 
| 435 | 
            -
                        ** | 
| 436 | 
             
                        - Loss: Supervised Contrastive Learning
         | 
| 437 | 
             
                        - Framework: PyTorch + Transformers
         | 
| 438 | 
            -
                        -  | 
| 439 | 
            -
                        -  | 
| 440 | 
            -
                         | 
| 441 | 
            -
             | 
| 442 | 
            -
             | 
| 443 | 
            -
                         | 
| 444 | 
            -
                         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 445 |  | 
| 446 | 
            -
                        ** | 
| 447 | 
            -
             | 
| 448 | 
            -
             | 
| 449 | 
            -
             | 
| 450 | 
            -
             | 
| 451 |  | 
| 452 | 
            -
                         | 
| 453 | 
            -
                         | 
| 454 | 
            -
                         | 
| 455 | 
            -
                        -  | 
|  | |
|  | |
|  | |
|  | |
| 456 | 
             
                        """)
         | 
| 457 | 
            -
             | 
| 458 | 
            -
             | 
| 459 | 
            -
                    
         | 
| 460 | 
            -
                    st.markdown("""
         | 
| 461 | 
            -
                    ### 🔧 How It Works
         | 
| 462 | 
            -
                    
         | 
| 463 | 
            -
                    1. **Enrollment Phase:**
         | 
| 464 | 
            -
                       - User uploads voice recording
         | 
| 465 | 
            -
                       - System extracts 256-dimensional embedding
         | 
| 466 | 
            -
                       - Embedding stored in database with user name
         | 
| 467 | 
            -
                    
         | 
| 468 | 
            -
                    2. **Verification Phase:**
         | 
| 469 | 
            -
                       - Unknown voice recording uploaded
         | 
| 470 | 
            -
                       - System extracts embedding
         | 
| 471 | 
            -
                       - Computes cosine similarity with all enrolled users
         | 
| 472 | 
            -
                       - Returns match if similarity exceeds threshold
         | 
| 473 | 
            -
                    
         | 
| 474 | 
            -
                    3. **Matching Algorithm:**
         | 
| 475 | 
            -
                       - Cosine similarity between embeddings
         | 
| 476 | 
            -
                       - Range: -1 (opposite) to +1 (identical)
         | 
| 477 | 
            -
                       - Typical same-speaker: 0.75-0.95
         | 
| 478 | 
            -
                       - Typical different-speaker: 0.30-0.70
         | 
| 479 | 
            -
                    """)
         | 
| 480 | 
            -
                    
         | 
| 481 | 
            -
                    st.markdown("---")
         | 
| 482 | 
            -
                    
         | 
| 483 | 
            -
                    st.info("""
         | 
| 484 | 
            -
                    **Note:** This is a proof of concept system. For production deployment, consider:
         | 
| 485 | 
            -
                    - Larger training dataset (10-20 samples per speaker)
         | 
| 486 | 
            -
                    - Better base model (WavLM for noisy conditions)
         | 
| 487 | 
            -
                    - Anti-spoofing measures
         | 
| 488 | 
            -
                    - Liveness detection
         | 
| 489 | 
            -
                    - Multi-enrollment (average multiple recordings per user)
         | 
| 490 | 
            -
                    """)
         | 
| 491 | 
            -
             | 
| 492 | 
            -
             | 
| 493 | 
            -
            if __name__ == "__main__":
         | 
| 494 | 
            -
                main()
         | 
|  | |
| 1 | 
             
            import os
         | 
|  | |
| 2 |  | 
| 3 | 
            +
            import gradio as gr
         | 
| 4 | 
             
            import torch
         | 
| 5 | 
             
            import torch.nn as nn
         | 
| 6 | 
             
            import torch.nn.functional as F
         | 
|  | |
| 8 | 
             
            import torchaudio
         | 
| 9 | 
             
            from transformers import Wav2Vec2FeatureExtractor, Wav2Vec2Model
         | 
| 10 | 
             
            import numpy as np
         | 
|  | |
| 11 | 
             
            import json
         | 
|  | |
| 12 |  | 
| 13 | 
             
            # ============================================================
         | 
| 14 | 
             
            # MODEL DEFINITION
         | 
|  | |
| 38 |  | 
| 39 |  | 
| 40 | 
             
            # ============================================================
         | 
| 41 | 
            +
            # GLOBAL SETUP
         | 
| 42 | 
             
            # ============================================================
         | 
| 43 |  | 
| 44 | 
            +
            device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 45 |  | 
| 46 | 
            +
            # Load model
         | 
| 47 | 
            +
            model = Wav2Vec2ForSpeakerEmbedding(embedding_size=256).to(device)
         | 
| 48 | 
            +
            checkpoint = torch.load('best_embedding_model.pth', map_location=device)
         | 
| 49 | 
            +
            model.load_state_dict(checkpoint['model_state_dict'])
         | 
| 50 | 
            +
            model.eval()
         | 
| 51 |  | 
| 52 | 
            +
            feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained("facebook/wav2vec2-base")
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 53 |  | 
| 54 |  | 
| 55 | 
             
            # ============================================================
         | 
| 56 | 
            +
            # DATABASE
         | 
| 57 | 
             
            # ============================================================
         | 
| 58 |  | 
| 59 | 
             
            class EnrollmentDB:
         | 
|  | |
| 79 | 
             
                    self.save_db()
         | 
| 80 |  | 
| 81 | 
             
                def verify(self, embedding, threshold=0.75):
         | 
|  | |
|  | |
|  | |
|  | |
| 82 | 
             
                    if not self.enrollments:
         | 
| 83 | 
             
                        return None, 0.0, False
         | 
| 84 |  | 
|  | |
| 96 | 
             
                            best_match = name
         | 
| 97 |  | 
| 98 | 
             
                    is_verified = best_score >= threshold
         | 
|  | |
| 99 | 
             
                    return best_match, best_score, is_verified
         | 
| 100 |  | 
| 101 | 
             
                def get_all_users(self):
         | 
| 102 | 
             
                    return list(self.enrollments.keys())
         | 
| 103 |  | 
| 104 | 
            +
                def get_user_count(self):
         | 
| 105 | 
            +
                    return len(self.enrollments)
         | 
| 106 | 
            +
                
         | 
| 107 | 
             
                def remove_user(self, name):
         | 
| 108 | 
             
                    if name in self.enrollments:
         | 
| 109 | 
             
                        del self.enrollments[name]
         | 
|  | |
| 111 | 
             
                        return True
         | 
| 112 | 
             
                    return False
         | 
| 113 |  | 
| 114 | 
            +
            db = EnrollmentDB()
         | 
| 115 | 
            +
             | 
| 116 |  | 
| 117 | 
             
            # ============================================================
         | 
| 118 | 
            +
            # AUDIO PROCESSING
         | 
| 119 | 
             
            # ============================================================
         | 
| 120 |  | 
| 121 | 
            +
            def process_audio(audio_path, max_length=16000*3):
         | 
| 122 | 
            +
                """Process audio file"""
         | 
| 123 | 
            +
                try:
         | 
| 124 | 
            +
                    waveform, sr = sf.read(audio_path, dtype='float32')
         | 
| 125 | 
            +
                    waveform = torch.from_numpy(waveform)
         | 
| 126 | 
            +
                    
         | 
| 127 | 
            +
                    if len(waveform.shape) > 1:
         | 
| 128 | 
            +
                        waveform = torch.mean(waveform, dim=-1)
         | 
| 129 | 
            +
                    
         | 
| 130 | 
            +
                    if sr != 16000:
         | 
| 131 | 
            +
                        resampler = torchaudio.transforms.Resample(sr, 16000)
         | 
| 132 | 
            +
                        waveform = resampler(waveform)
         | 
| 133 | 
            +
                    
         | 
| 134 | 
            +
                    if len(waveform) > max_length:
         | 
| 135 | 
            +
                        start = (len(waveform) - max_length) // 2
         | 
| 136 | 
            +
                        waveform = waveform[start:start + max_length]
         | 
| 137 | 
            +
                    elif len(waveform) < max_length:
         | 
| 138 | 
            +
                        padding = max_length - len(waveform)
         | 
| 139 | 
            +
                        waveform = torch.nn.functional.pad(waveform, (0, padding))
         | 
| 140 | 
            +
                    
         | 
| 141 | 
            +
                    if waveform.abs().max() > 0:
         | 
| 142 | 
            +
                        waveform = waveform / waveform.abs().max()
         | 
| 143 | 
            +
                    
         | 
| 144 | 
            +
                    inputs = feature_extractor(
         | 
| 145 | 
            +
                        waveform.numpy(),
         | 
| 146 | 
            +
                        sampling_rate=16000,
         | 
| 147 | 
            +
                        return_tensors="pt"
         | 
| 148 | 
            +
                    )
         | 
| 149 | 
            +
                    
         | 
| 150 | 
            +
                    return inputs.input_values
         | 
| 151 |  | 
| 152 | 
            +
                except Exception as e:
         | 
| 153 | 
            +
                    raise ValueError(f"Error processing audio: {e}")
         | 
| 154 | 
            +
             | 
| 155 | 
            +
             | 
| 156 | 
            +
            def get_embedding(audio_path):
         | 
| 157 | 
            +
                """Extract embedding from audio"""
         | 
| 158 | 
             
                model.eval()
         | 
| 159 | 
            +
                with torch.no_grad():
         | 
| 160 | 
            +
                    inputs = process_audio(audio_path)
         | 
| 161 | 
            +
                    inputs = inputs.to(device)
         | 
| 162 | 
            +
                    embedding = model(inputs)
         | 
| 163 | 
            +
                return embedding.cpu().numpy()
         | 
| 164 | 
            +
             | 
| 165 |  | 
| 166 | 
            +
            # ============================================================
         | 
| 167 | 
            +
            # GRADIO FUNCTIONS
         | 
| 168 | 
            +
            # ============================================================
         | 
| 169 |  | 
| 170 | 
            +
            def enroll_user(name, audio, threshold):
         | 
| 171 | 
            +
                """Enroll a new user"""
         | 
| 172 | 
            +
                if not name or not name.strip():
         | 
| 173 | 
            +
                    return "❌ Veuillez entrer un nom.", get_user_list(), get_stats()
         | 
|  | |
|  | |
| 174 |  | 
| 175 | 
            +
                if not audio:
         | 
| 176 | 
            +
                    return "❌ Veuillez uploader un enregistrement audio.", get_user_list(), get_stats()
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 177 |  | 
| 178 | 
            +
                name = name.strip()
         | 
|  | |
|  | |
| 179 |  | 
| 180 | 
            +
                if name in db.get_all_users():
         | 
| 181 | 
            +
                    return f"⚠️ L'utilisateur '{name}' existe déjà.", get_user_list(), get_stats()
         | 
|  | |
| 182 |  | 
| 183 | 
            +
                try:
         | 
| 184 | 
            +
                    embedding = get_embedding(audio)
         | 
| 185 | 
            +
                    db.enroll(name, embedding)
         | 
| 186 | 
            +
                    return f"✅ Enregistrement réussi!\n\n👤 {name} a été enregistré dans le système.\n📊 Total utilisateurs: {db.get_user_count()}", get_user_list(), get_stats()
         | 
| 187 | 
            +
                except Exception as e:
         | 
| 188 | 
            +
                    return f"❌ Erreur: {str(e)}", get_user_list(), get_stats()
         | 
| 189 | 
            +
             | 
| 190 | 
            +
             | 
| 191 | 
            +
            def verify_user(audio, threshold):
         | 
| 192 | 
            +
                """Verify a user"""
         | 
| 193 | 
            +
                if not audio:
         | 
| 194 | 
            +
                    return "❌ Veuillez uploader un enregistrement audio.", ""
         | 
| 195 |  | 
| 196 | 
            +
                if db.get_user_count() == 0:
         | 
| 197 | 
            +
                    return "⚠️ Aucun utilisateur enregistré. Veuillez d'abord enregistrer des utilisateurs.", ""
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 198 |  | 
| 199 | 
            +
                try:
         | 
| 200 | 
            +
                    embedding = get_embedding(audio)
         | 
| 201 | 
            +
                    match_name, similarity, is_verified = db.verify(embedding, threshold)
         | 
| 202 | 
            +
                    
         | 
| 203 | 
            +
                    # Build detailed results
         | 
| 204 | 
            +
                    details = "📊 **Scores détaillés:**\n\n"
         | 
| 205 | 
            +
                    embedding_tensor = torch.from_numpy(embedding)
         | 
| 206 | 
            +
                    
         | 
| 207 | 
            +
                    scores = []
         | 
| 208 | 
            +
                    for name, enrolled_emb in db.enrollments.items():
         | 
| 209 | 
            +
                        enrolled_tensor = torch.from_numpy(enrolled_emb)
         | 
| 210 | 
            +
                        sim = F.cosine_similarity(embedding_tensor, enrolled_tensor, dim=1).item()
         | 
| 211 | 
            +
                        status = "✅" if sim >= threshold else "❌"
         | 
| 212 | 
            +
                        scores.append((name, sim, status))
         | 
| 213 | 
            +
                    
         | 
| 214 | 
            +
                    scores.sort(key=lambda x: x[1], reverse=True)
         | 
| 215 | 
            +
                    
         | 
| 216 | 
            +
                    for name, sim, status in scores:
         | 
| 217 | 
            +
                        details += f"{status} **{name}**: {sim:.1%}\n"
         | 
| 218 | 
            +
                    
         | 
| 219 | 
            +
                    if is_verified:
         | 
| 220 | 
            +
                        result = f"""
         | 
| 221 | 
            +
            # ✅ VÉRIFICATION RÉUSSIE
         | 
| 222 | 
            +
             | 
| 223 | 
            +
            ## Identifié comme: **{match_name}**
         | 
| 224 | 
            +
            ### Score de confiance: **{similarity:.1%}**
         | 
| 225 | 
            +
             | 
| 226 | 
            +
            ---
         | 
| 227 | 
            +
            """
         | 
| 228 | 
            +
                        return result + details, details
         | 
| 229 | 
            +
                    else:
         | 
| 230 | 
            +
                        result = f"""
         | 
| 231 | 
            +
            # ❌ VÉRIFICATION ÉCHOUÉE
         | 
| 232 | 
            +
             | 
| 233 | 
            +
            Meilleure correspondance: **{match_name}**
         | 
| 234 | 
            +
            Similarité: **{similarity:.1%}**
         | 
| 235 | 
            +
            Seuil requis: **{threshold:.1%}**
         | 
| 236 | 
            +
             | 
| 237 | 
            +
            *Cette voix n'est pas reconnue dans le système.*
         | 
| 238 | 
            +
             | 
| 239 | 
            +
            ---
         | 
| 240 | 
            +
            """
         | 
| 241 | 
            +
                        return result + details, details
         | 
| 242 | 
            +
                        
         | 
| 243 | 
            +
                except Exception as e:
         | 
| 244 | 
            +
                    return f"❌ Erreur: {str(e)}", ""
         | 
| 245 | 
            +
             | 
| 246 | 
            +
             | 
| 247 | 
            +
            def get_user_list():
         | 
| 248 | 
            +
                """Get list of enrolled users"""
         | 
| 249 | 
            +
                users = db.get_all_users()
         | 
| 250 | 
            +
                if not users:
         | 
| 251 | 
            +
                    return "Aucun utilisateur enregistré"
         | 
| 252 | 
            +
                return "\n".join([f"• {user}" for user in sorted(users)])
         | 
| 253 | 
            +
             | 
| 254 | 
            +
             | 
| 255 | 
            +
            def get_stats():
         | 
| 256 | 
            +
                """Get system statistics"""
         | 
| 257 | 
            +
                return f"""
         | 
| 258 | 
            +
            **📊 Statistiques du système:**
         | 
| 259 | 
            +
            - Utilisateurs enregistrés: {db.get_user_count()}
         | 
| 260 | 
            +
            - Précision du modèle: 76%
         | 
| 261 | 
            +
            - Score AUC: 0.82
         | 
| 262 | 
            +
            - Architecture: Wav2Vec 2.0
         | 
| 263 | 
            +
            """
         | 
| 264 | 
            +
             | 
| 265 | 
            +
             | 
| 266 | 
            +
            def delete_user(name):
         | 
| 267 | 
            +
                """Delete a user"""
         | 
| 268 | 
            +
                if not name or not name.strip():
         | 
| 269 | 
            +
                    return "❌ Veuillez sélectionner un utilisateur.", get_user_list(), get_stats()
         | 
| 270 |  | 
| 271 | 
            +
                if db.remove_user(name.strip()):
         | 
| 272 | 
            +
                    return f"✅ Utilisateur '{name}' supprimé.", get_user_list(), get_stats()
         | 
| 273 | 
            +
                else:
         | 
| 274 | 
            +
                    return f"❌ Utilisateur '{name}' non trouvé.", get_user_list(), get_stats()
         | 
| 275 | 
            +
             | 
| 276 | 
            +
             | 
| 277 | 
            +
            # ============================================================
         | 
| 278 | 
            +
            # GRADIO INTERFACE
         | 
| 279 | 
            +
            # ============================================================
         | 
| 280 | 
            +
             | 
| 281 | 
            +
            with gr.Blocks(title="Biométrie Vocale - POC", theme=gr.themes.Soft()) as demo:
         | 
| 282 |  | 
| 283 | 
            +
                gr.Markdown("""
         | 
| 284 | 
            +
                # 🎤 Système de Biométrie Vocale
         | 
| 285 | 
            +
                ### Proof of Concept - Wav2Vec 2.0 Fine-tuné
         | 
| 286 | 
            +
                """)
         | 
| 287 |  | 
| 288 | 
            +
                with gr.Row():
         | 
| 289 | 
            +
                    with gr.Column(scale=2):
         | 
| 290 | 
            +
                        stats_display = gr.Markdown(get_stats())
         | 
| 291 | 
            +
                    with gr.Column(scale=1):
         | 
| 292 | 
            +
                        threshold = gr.Slider(
         | 
| 293 | 
            +
                            minimum=0.5,
         | 
| 294 | 
            +
                            maximum=0.95,
         | 
| 295 | 
            +
                            value=0.75,
         | 
| 296 | 
            +
                            step=0.05,
         | 
| 297 | 
            +
                            label="Seuil de vérification",
         | 
| 298 | 
            +
                            info="Plus élevé = vérification plus stricte"
         | 
|  | |
|  | |
|  | |
| 299 | 
             
                        )
         | 
| 300 | 
            +
                
         | 
| 301 | 
            +
                with gr.Tabs():
         | 
| 302 | 
            +
                    # TAB 1: ENROLLMENT
         | 
| 303 | 
            +
                    with gr.Tab("📝 Enregistrement"):
         | 
| 304 | 
            +
                        gr.Markdown("### Enregistrer un nouvel utilisateur")
         | 
| 305 | 
            +
                        
         | 
| 306 | 
            +
                        with gr.Row():
         | 
| 307 | 
            +
                            with gr.Column():
         | 
| 308 | 
            +
                                enroll_name_input = gr.Textbox(
         | 
| 309 | 
            +
                                    label="Nom de l'utilisateur",
         | 
| 310 | 
            +
                                    placeholder="Ex: Jean Dupont"
         | 
| 311 | 
            +
                                )
         | 
| 312 | 
            +
                                enroll_audio_input = gr.Audio(
         | 
| 313 | 
            +
                                    label="Enregistrement vocal",
         | 
| 314 | 
            +
                                    type="filepath",
         | 
| 315 | 
            +
                                    sources=["upload", "microphone"]
         | 
| 316 | 
            +
                                )
         | 
| 317 | 
            +
                                enroll_button = gr.Button("🎯 Enregistrer", variant="primary")
         | 
| 318 | 
            +
                            
         | 
| 319 | 
            +
                            with gr.Column():
         | 
| 320 | 
            +
                                gr.Markdown("""
         | 
| 321 | 
            +
                                **💡 Conseils:**
         | 
| 322 | 
            +
                                - Audio clair et net
         | 
| 323 | 
            +
                                - 3-20 secondes recommandées
         | 
| 324 | 
            +
                                - Bruit de fond minimal
         | 
| 325 | 
            +
                                - Voix normale
         | 
| 326 | 
            +
                                """)
         | 
| 327 | 
            +
                                enrolled_users = gr.Textbox(
         | 
| 328 | 
            +
                                    label="Utilisateurs enregistrés",
         | 
| 329 | 
            +
                                    value=get_user_list(),
         | 
| 330 | 
            +
                                    lines=8,
         | 
| 331 | 
            +
                                    interactive=False
         | 
| 332 | 
            +
                                )
         | 
| 333 |  | 
| 334 | 
            +
                        enroll_output = gr.Markdown()
         | 
| 335 | 
            +
                        
         | 
| 336 | 
            +
                        enroll_button.click(
         | 
| 337 | 
            +
                            fn=enroll_user,
         | 
| 338 | 
            +
                            inputs=[enroll_name_input, enroll_audio_input, threshold],
         | 
| 339 | 
            +
                            outputs=[enroll_output, enrolled_users, stats_display]
         | 
| 340 | 
             
                        )
         | 
| 341 |  | 
| 342 | 
            +
                    # TAB 2: VERIFICATION
         | 
| 343 | 
            +
                    with gr.Tab("✅ Vérification"):
         | 
| 344 | 
            +
                        gr.Markdown("### Vérifier l'identité d'un utilisateur")
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 345 |  | 
| 346 | 
            +
                        with gr.Row():
         | 
| 347 | 
            +
                            with gr.Column():
         | 
| 348 | 
            +
                                verify_audio_input = gr.Audio(
         | 
| 349 | 
            +
                                    label="Enregistrement vocal à vérifier",
         | 
| 350 | 
            +
                                    type="filepath",
         | 
| 351 | 
            +
                                    sources=["upload", "microphone"]
         | 
| 352 | 
            +
                                )
         | 
| 353 | 
            +
                                verify_button = gr.Button("🔍 Vérifier", variant="primary")
         | 
| 354 | 
            +
                            
         | 
| 355 | 
            +
                            with gr.Column():
         | 
| 356 | 
            +
                                gr.Markdown(f"""
         | 
| 357 | 
            +
                                **ℹ️ Information:**
         | 
| 358 | 
            +
                                - {db.get_user_count()} utilisateur(s) enregistré(s)
         | 
| 359 | 
            +
                                - Seuil: ajustable dans le slider ci-dessus
         | 
| 360 | 
            +
                                - Modèle: Wav2Vec 2.0
         | 
| 361 | 
            +
                                """)
         | 
| 362 |  | 
| 363 | 
            +
                        verify_output = gr.Markdown()
         | 
| 364 | 
            +
                        verify_details = gr.Markdown()
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
| 365 |  | 
| 366 | 
            +
                        verify_button.click(
         | 
| 367 | 
            +
                            fn=verify_user,
         | 
| 368 | 
            +
                            inputs=[verify_audio_input, threshold],
         | 
| 369 | 
            +
                            outputs=[verify_output, verify_details]
         | 
| 370 | 
            +
                        )
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 371 |  | 
| 372 | 
            +
                    # TAB 3: MANAGEMENT
         | 
| 373 | 
            +
                    with gr.Tab("⚙️ Gestion"):
         | 
| 374 | 
            +
                        gr.Markdown("### Gérer les utilisateurs enregistrés")
         | 
| 375 | 
            +
                        
         | 
| 376 | 
            +
                        with gr.Row():
         | 
| 377 | 
            +
                            with gr.Column():
         | 
| 378 | 
            +
                                delete_name_input = gr.Textbox(
         | 
| 379 | 
            +
                                    label="Nom de l'utilisateur à supprimer",
         | 
| 380 | 
            +
                                    placeholder="Ex: Jean Dupont"
         | 
| 381 | 
            +
                                )
         | 
| 382 | 
            +
                                delete_button = gr.Button("🗑️ Supprimer", variant="stop")
         | 
| 383 | 
            +
                            
         | 
| 384 | 
            +
                            with gr.Column():
         | 
| 385 | 
            +
                                delete_users_list = gr.Textbox(
         | 
| 386 | 
            +
                                    label="Utilisateurs enregistrés",
         | 
| 387 | 
            +
                                    value=get_user_list(),
         | 
| 388 | 
            +
                                    lines=8,
         | 
| 389 | 
            +
                                    interactive=False
         | 
| 390 | 
            +
                                )
         | 
| 391 | 
            +
                        
         | 
| 392 | 
            +
                        delete_output = gr.Markdown()
         | 
| 393 | 
            +
                        
         | 
| 394 | 
            +
                        delete_button.click(
         | 
| 395 | 
            +
                            fn=delete_user,
         | 
| 396 | 
            +
                            inputs=[delete_name_input],
         | 
| 397 | 
            +
                            outputs=[delete_output, delete_users_list, stats_display]
         | 
| 398 | 
            +
                        )
         | 
| 399 |  | 
| 400 | 
            +
                    # TAB 4: ABOUT
         | 
| 401 | 
            +
                    with gr.Tab("ℹ️ À propos"):
         | 
| 402 | 
            +
                        gr.Markdown("""
         | 
| 403 | 
            +
                        ## 🎯 Technologie
         | 
| 404 |  | 
| 405 | 
            +
                        **Architecture du modèle:**
         | 
| 406 | 
             
                        - Base: Wav2Vec 2.0 (Facebook AI)
         | 
| 407 | 
            +
                        - Fine-tuné sur 247 locuteurs
         | 
| 408 | 
            +
                        - 1035 échantillons vocaux (qualité téléphonique, 8kHz)
         | 
| 409 | 
            +
                        - Dimension d'embedding: 256
         | 
| 410 |  | 
| 411 | 
            +
                        **Détails d'entraînement:**
         | 
| 412 | 
             
                        - Loss: Supervised Contrastive Learning
         | 
| 413 | 
             
                        - Framework: PyTorch + Transformers
         | 
| 414 | 
            +
                        - Durée d'entraînement: ~50 epochs
         | 
| 415 | 
            +
                        - Matériel: NVIDIA RTX 3050
         | 
| 416 | 
            +
                        
         | 
| 417 | 
            +
                        ---
         | 
| 418 | 
            +
                        
         | 
| 419 | 
            +
                        ## 📊 Métriques de Performance
         | 
| 420 | 
            +
                        
         | 
| 421 | 
            +
                        **Résultats d'évaluation:**
         | 
| 422 | 
            +
                        - **Précision:** 76%
         | 
| 423 | 
            +
                        - **Score AUC:** 0.82
         | 
| 424 | 
            +
                        - **Taux de vrais positifs:** 79%
         | 
| 425 | 
            +
                        - **Taux de faux positifs:** 27%
         | 
| 426 | 
            +
                        
         | 
| 427 | 
            +
                        **Ensemble de test:**
         | 
| 428 | 
            +
                        - 1000 paires de vérification
         | 
| 429 | 
            +
                        - 500 paires même locuteur
         | 
| 430 | 
            +
                        - 500 paires locuteurs différents
         | 
| 431 | 
            +
                        
         | 
| 432 | 
            +
                        ---
         | 
| 433 | 
            +
                        
         | 
| 434 | 
            +
                        ## 🔧 Fonctionnement
         | 
| 435 | 
            +
                        
         | 
| 436 | 
            +
                        1. **Phase d'enregistrement:**
         | 
| 437 | 
            +
                           - L'utilisateur uploade un enregistrement vocal
         | 
| 438 | 
            +
                           - Le système extrait un embedding de dimension 256
         | 
| 439 | 
            +
                           - L'embedding est stocké dans la base de données
         | 
| 440 | 
            +
                        
         | 
| 441 | 
            +
                        2. **Phase de vérification:**
         | 
| 442 | 
            +
                           - Enregistrement vocal inconnu uploadé
         | 
| 443 | 
            +
                           - Le système extrait l'embedding
         | 
| 444 | 
            +
                           - Calcul de similarité cosinus avec tous les utilisateurs enregistrés
         | 
| 445 | 
            +
                           - Correspondance si similarité > seuil
         | 
| 446 |  | 
| 447 | 
            +
                        3. **Algorithme de correspondance:**
         | 
| 448 | 
            +
                           - Similarité cosinus entre embeddings
         | 
| 449 | 
            +
                           - Plage: -1 (opposé) à +1 (identique)
         | 
| 450 | 
            +
                           - Même locuteur typique: 0.75-0.95
         | 
| 451 | 
            +
                           - Locuteurs différents typique: 0.30-0.70
         | 
| 452 |  | 
| 453 | 
            +
                        ---
         | 
| 454 | 
            +
                        
         | 
| 455 | 
            +
                        **Note:** Ceci est un système proof of concept. Pour un déploiement en production, considérer:
         | 
| 456 | 
            +
                        - Dataset plus large (10-20 échantillons par locuteur)
         | 
| 457 | 
            +
                        - Meilleur modèle de base (WavLM pour conditions bruitées)
         | 
| 458 | 
            +
                        - Mesures anti-spoofing
         | 
| 459 | 
            +
                        - Détection de vivacité
         | 
| 460 | 
            +
                        - Multi-enregistrement (moyenne de plusieurs enregistrements par utilisateur)
         | 
| 461 | 
             
                        """)
         | 
| 462 | 
            +
             | 
| 463 | 
            +
            demo.launch(share=False)
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  |