import os import re from datetime import datetime import gradio as gr import torch import pandas as pd import soundfile as sf import torchaudio from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC from src.transcription import SpeechEncoder from src.sentiment import TextEncoder # Préchargement des modèles processor_ctc = Wav2Vec2Processor.from_pretrained( "jonatasgrosman/wav2vec2-large-xlsr-53-french", cache_dir="./models" #"alec228/audio-sentiment/tree/main/wav2vec2", cache_dir="./models" ) model_ctc = Wav2Vec2ForCTC.from_pretrained( "jonatasgrosman/wav2vec2-large-xlsr-53-french", cache_dir="./models" #"alec228/audio-sentiment/tree/main/wav2vec2", cache_dir="./models" ) speech_enc = SpeechEncoder() text_enc = TextEncoder() # Pipeline d’analyse def analyze_audio(audio_path): # Lecture et prétraitement data, sr = sf.read(audio_path) arr = data.T if data.ndim > 1 else data wav = torch.from_numpy(arr).unsqueeze(0).float() if sr != 16000: wav = torchaudio.transforms.Resample(sr, 16000)(wav) sr = 16000 if wav.size(0) > 1: wav = wav.mean(dim=0, keepdim=True) # Transcription inputs = processor_ctc(wav.squeeze().numpy(), sampling_rate=sr, return_tensors="pt") with torch.no_grad(): logits = model_ctc(**inputs).logits pred_ids = torch.argmax(logits, dim=-1) transcription = processor_ctc.batch_decode(pred_ids)[0].lower() # Sentiment principal sent_dict = TextEncoder.analyze_sentiment(transcription) label, conf = max(sent_dict.items(), key=lambda x: x[1]) emojis = {"positif": "😊", "neutre": "😐", "négatif": "☹️"} emoji = emojis.get(label, "") # Segmentation par phrase segments = [s.strip() for s in re.split(r'[.?!]', transcription) if s.strip()] seg_results = [] for seg in segments: sd = TextEncoder.analyze_sentiment(seg) l, c = max(sd.items(), key=lambda x: x[1]) seg_results.append({"Segment": seg, "Sentiment": l.capitalize(), "Confiance (%)": round(c*100,1)}) seg_df = pd.DataFrame(seg_results) # Historique entry timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") history_entry = { "Horodatage": timestamp, "Transcription": transcription, "Sentiment": label.capitalize(), "Confiance (%)": round(conf*100,1) } # Rendu summary_html = ( f"
" f"{emoji}" f"

{label.upper()}

" f"
" f"

Confiance : {conf*100:.1f}%

" ) return transcription, summary_html, seg_df, history_entry # Export CSV def export_history_csv(history): df = pd.DataFrame(history) path = "history.csv" df.to_csv(path, index=False) return path # Interface Chat + historique demo = gr.Blocks(theme=gr.themes.Monochrome(primary_hue="purple")) with demo: gr.Markdown("# Chat & Analyse de Sentiment Audio") gr.HTML("""
Étape 1 : Enregistrez votre voix ou téléversez un fichier audio (format WAV recommandé).
Étape 2 : Cliquez sur le bouton Analyser pour lancer la transcription et l’analyse.
Étape 3 : Visualisez les résultats : transcription, sentiment, et analyse détaillée.
Étape 4 : Exportez l’historique des analyses au format CSV si besoin.
""") with gr.Row(): with gr.Column(scale=2): audio_in = gr.Audio(sources=["microphone","upload"], type="filepath", label="Audio Input") btn = gr.Button("Analyser") export_btn = gr.Button("Exporter CSV") with gr.Column(scale=3): chat = gr.Chatbot(label="Historique des échanges") transcription_out = gr.Textbox(label="Transcription", interactive=False) summary_out = gr.HTML(label="Sentiment") seg_out = gr.Dataframe(label="Détail par segment") hist_out = gr.Dataframe(label="Historique") state_chat = gr.State([]) # list of (user,bot) state_hist = gr.State([]) # list of dict entries def chat_callback(audio_path, chat_history, hist_state): transcription, summary, seg_df, hist_entry = analyze_audio(audio_path) user_msg = "[Audio reçu]" bot_msg = f"**Transcription :** {transcription}\n**Sentiment :** {summary}" chat_history = chat_history + [(user_msg, bot_msg)] hist_state = hist_state + [hist_entry] return chat_history, transcription, summary, seg_df, hist_state btn.click( fn=chat_callback, inputs=[audio_in, state_chat, state_hist], outputs=[chat, transcription_out, summary_out, seg_out, state_hist] ) export_btn.click( fn=export_history_csv, inputs=[state_hist], outputs=[gr.File(label="Télécharger CSV")] ) if __name__ == "__main__": demo.launch()