import os import re from datetime import datetime import gradio as gr import torch import pandas as pd import soundfile as sf import torchaudio from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC from src.transcription import SpeechEncoder from src.sentiment import TextEncoder # Préchargement des modèles processor_ctc = Wav2Vec2Processor.from_pretrained( "jonatasgrosman/wav2vec2-large-xlsr-53-french", cache_dir="./models" #"alec228/audio-sentiment/tree/main/wav2vec2", cache_dir="./models" ) model_ctc = Wav2Vec2ForCTC.from_pretrained( "jonatasgrosman/wav2vec2-large-xlsr-53-french", cache_dir="./models" #"alec228/audio-sentiment/tree/main/wav2vec2", cache_dir="./models" ) speech_enc = SpeechEncoder() text_enc = TextEncoder() # Pipeline d’analyse def analyze_audio(audio_path): # Lecture et prétraitement data, sr = sf.read(audio_path) arr = data.T if data.ndim > 1 else data wav = torch.from_numpy(arr).unsqueeze(0).float() if sr != 16000: wav = torchaudio.transforms.Resample(sr, 16000)(wav) sr = 16000 if wav.size(0) > 1: wav = wav.mean(dim=0, keepdim=True) # Transcription inputs = processor_ctc(wav.squeeze().numpy(), sampling_rate=sr, return_tensors="pt") with torch.no_grad(): logits = model_ctc(**inputs).logits pred_ids = torch.argmax(logits, dim=-1) transcription = processor_ctc.batch_decode(pred_ids)[0].lower() # Sentiment principal sent_dict = TextEncoder.analyze_sentiment(transcription) label, conf = max(sent_dict.items(), key=lambda x: x[1]) emojis = {"positif": "😊", "neutre": "😐", "négatif": "☹️"} emoji = emojis.get(label, "") # Segmentation par phrase segments = [s.strip() for s in re.split(r'[.?!]', transcription) if s.strip()] seg_results = [] for seg in segments: sd = TextEncoder.analyze_sentiment(seg) l, c = max(sd.items(), key=lambda x: x[1]) seg_results.append({"Segment": seg, "Sentiment": l.capitalize(), "Confiance (%)": round(c*100,1)}) seg_df = pd.DataFrame(seg_results) # Historique entry timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") history_entry = { "Horodatage": timestamp, "Transcription": transcription, "Sentiment": label.capitalize(), "Confiance (%)": round(conf*100,1) } # Rendu summary_html = ( f"
Confiance : {conf*100:.1f}%
" ) return transcription, summary_html, seg_df, history_entry # Export CSV def export_history_csv(history): df = pd.DataFrame(history) path = "history.csv" df.to_csv(path, index=False) return path # Interface Chat + historique demo = gr.Blocks(theme=gr.themes.Monochrome(primary_hue="purple")) with demo: gr.Markdown("# Chat & Analyse de Sentiment Audio") gr.HTML("""