Spaces:
Build error
Build error
| ### ----------------------------------------------------------------------- | |
| ### Transkriber version_1.00 | |
| ### app.py | |
| ### ----------------------------------------------------------------------- | |
| # ------------------------------------------------------------------------- | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| # See the License for the specific language governing permissions and | |
| # limitations under the License. | |
| # ------------------------------------------------------------------------- | |
| import os | |
| import re | |
| import uuid | |
| import time | |
| import psutil | |
| import subprocess | |
| from tqdm import tqdm | |
| import tempfile | |
| from fpdf import FPDF | |
| from pathlib import Path | |
| import numpy as np | |
| import torch | |
| from transformers import pipeline | |
| from gpuinfo import GPUInfo | |
| from pydub import AudioSegment | |
| from IPython.display import Audio | |
| import gradio as gr | |
| import huggingface_hub | |
| ############################################################################### | |
| # # Configuration | @version 1.05? | |
| # You are an intelligent assistant specializing in interviews with business clients | |
| # for in-depth content creation, etc..() | |
| ############################################################################### | |
| device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") | |
| ############################################################################### | |
| # Function to detect leading silence | |
| ############################################################################### | |
| def milliseconds_until_sound(sound, silence_threshold_in_decibels=-20.0, chunk_size=10): | |
| trim_ms = 0 | |
| assert chunk_size > 0 | |
| while sound[trim_ms:trim_ms + chunk_size].dBFS < silence_threshold_in_decibels and trim_ms < len(sound): | |
| trim_ms += chunk_size | |
| return trim_ms | |
| ############################################################################### | |
| # Trim the start of the audio file | |
| ############################################################################### | |
| def trim_start(filepath): | |
| path = Path(filepath) | |
| directory = path.parent | |
| filename = path.name | |
| audio = AudioSegment.from_file(filepath, format="wav") | |
| start_trim = milliseconds_until_sound(audio) | |
| trimmed = audio[start_trim:] | |
| new_filename = directory / f"trimmed_{filename}" | |
| trimmed.export(new_filename, format="wav") | |
| return trimmed, new_filename | |
| ############################################################################### | |
| # -- segment the audio into smaller parts (1-minute segments for large files) | |
| ############################################################################### | |
| def segment_audio(trimmed_audio, output_dir_trimmed): | |
| one_minute = 1 * 60 * 1000 # 1 minute in milliseconds | |
| start_time = 0 | |
| i = 0 | |
| # -- iterate through trimmed audio, segment it | |
| segmented_files = [] | |
| while start_time < len(trimmed_audio): | |
| segment = trimmed_audio[start_time:start_time + one_minute] | |
| # -- filename for each segment | |
| file_name = f"trimmed_{i:02d}.wav" | |
| # --export each segment, save to the Hugging Face hub directly | |
| file_path = file_name | |
| segment.export(file_path, format="wav") | |
| segmented_files.append(file_path) | |
| start_time += one_minute | |
| i += 1 | |
| return segmented_files | |
| ############################################################################### | |
| # Transcription logic | |
| ############################################################################### | |
| def transcribe(file_upload, progress=gr.Progress(track_tqdm=True)): | |
| file = file_upload | |
| start_time = time.time() | |
| # -- trim auio, segment it for processing | |
| trimmed_audio, trimmed_filename = trim_start(file) | |
| segmented_files = segment_audio(trimmed_audio, "trimmed_audio") | |
| pipe = pipeline("automatic-speech-recognition", model="NbAiLab/nb-whisper-large", chunk_length_s=30, device=device) | |
| transcriptions = [pipe(seg_file)["text"] for seg_file in segmented_files] | |
| text = ''.join(transcriptions) | |
| end_time = time.time() | |
| output_time = end_time - start_time | |
| # --Word count | |
| word_count = len(text.split()) | |
| # --CPU metric | |
| cpu_usage = psutil.cpu_percent(interval=1) | |
| # --system info string | |
| system_info = f""" | |
| Processing time: {output_time:.2f} seconds. | |
| Number of words: {word_count} | |
| CPU Usage: {cpu_usage}% | |
| """ | |
| return text, system_info | |
| ############################################################################### | |
| # Interface | |
| ############################################################################### | |
| HEADER_INFO = """ | |
| # This space uses the *Norwegian NB-Whisper Large* model by **NbAiLab** to transcribe long-form microphone or audio inputs in Norwegian of arbitrary length. | |
| """.strip() | |
| css = """ | |
| #transcription_output textarea { | |
| background-color: #000000; /* black */ | |
| color: #00FF00 !important; /* text color */ | |
| font-size: 18px; /* font size */ | |
| } | |
| #system_info_box textarea { | |
| background-color: #ffe0b3; /* orange */ | |
| color: black !important; /* text color */ | |
| font-size: 16px; /* font size */ | |
| font-weight: bold; /* bold font */ | |
| } | |
| """ | |
| iface = gr.Blocks(css=css) | |
| with iface: | |
| gr.Markdown(HEADER_INFO) | |
| with gr.Row(): | |
| upload = gr.Audio(label="Upload audio", sources="upload", type="filepath") | |
| transcribe_btn = gr.Button("Transkriber") | |
| with gr.Row(): | |
| with gr.Column(scale=3): | |
| text_output = gr.Textbox(label="Transkribert Tekst", placeholder="t r a n s c r i p t i o", elem_id="transcription_output") | |
| with gr.Column(scale=1): | |
| system_info = gr.Textbox(label="Antall sekunder, ord, system data:", elem_id="system_info_box") | |
| with gr.Row(): | |
| gr.Markdown(''' | |
| <div style="text-align:center;"> | |
| <a href="https://opensource.com/resources/what-open-source" style="display: inline-block;"> | |
| <img src="https://badgen.net/badge/Open%20Source%20%3F/Yes%21/blue?icon=github" alt="Open Source? Yes!" style="vertical-align: middle;"> | |
| </a> | |
| <span style="display:inline-block; width: 20px;"></span> <!-- This adds space between the logos --> | |
| <a href="https://opensource.org/licenses/Apache-2.0" style="display: inline-block;"> | |
| <img src="https://img.shields.io/badge/License-Apache_2.0-blue.svg" alt="License: Apache 2.0" style="vertical-align: middle;"> | |
| </a> | |
| </div> | |
| ''') | |
| transcribe_btn.click( | |
| fn=transcribe, | |
| inputs=[upload], | |
| outputs=[text_output, system_info] | |
| ) | |
| iface.launch(debug=True) |