# Schema version for tracking updates to the schema format schema-version: "v0.5.0" # Component Information component-name: odtp-pyannote-whisper component-version: "v0.1.1" component-license: Apache 2.0 component-type: ephemeral component-description: Transcribe or translate audio files using Whisper and Pyannote for speaker diarization component-authors: - name: Carlos Vivar Rios orcid: null component-repository: url: "https://github.com/odtp-org/odtp-pyannote-whisper" doi: null component-docker-image: null tags: - audio - transcription - translation - whisper - pyannote # Tool Information tools: - tool-name: whisper tool-authors: - name: OpenAI orcid: null tool-version: latest tool-repository: url: "https://github.com/openai/whisper" doi: null tool-license: MIT - tool-name: pyannote tool-authors: - name: Hervé Bredin orcid: null tool-version: latest tool-repository: url: "https://github.com/pyannote/pyannote-audio" doi: null tool-license: MIT # Secrets (ENV variables) secrets: - name: HF_TOKEN description: Hugging Face API token for accessing pyannote models type: str # Build Arguments (if any) build-args: null # Exposed Ports ports: null # Parameters for the Component parameters: - name: MODEL default-value: large-v3 datatype: str description: Whisper model to use for transcription/translation parameter-bounds: null options: - tiny - base - small - medium - large - large-v2 - large-v3 allow-custom-value: false - name: TASK default-value: transcribe datatype: str description: Task to perform (transcribe or translate) parameter-bounds: null options: - transcribe - translate allow-custom-value: false - name: LANGUAGE default-value: auto datatype: str description: Source language code (use 'auto' for auto-detection) parameter-bounds: null options: - auto - en - es - fr - de - it - pt - nl - ja - zh - ru allow-custom-value: true # Data Inputs data-inputs: - name: INPUT_FILE type: .wav path: /odtp/odtp-input description: Input audio file in WAV format naming-convention: null # Data Outputs data-outputs: - name: OUTPUT_FILE type: .srt path: /odtp/odtp-output description: Transcription/translation output in SRT format with speaker diarization naming-convention: null - name: OUTPUT_JSON_FILE type: .json path: /odtp/odtp-output description: Transcription/translation output in JSON format with speaker diarization naming-convention: null - name: OUTPUT_AUDIO_FILE type: .wav path: /odtp/odtp-output description: Audio in wav format naming-convention: null - name: OUTPUT_PARAGRAPHS_FILE type: .json path: /odtp/odtp-output description: Markdown file with the paragraphs containing speaker diarization and transcription/translation naming-convention: null - name: OUTPUT_MD_FILE type: .md path: /odtp/odtp-output description: Markdown file with the speaker diarization and transcription/translation naming-convention: null - name: OUTPUT_PDF_FILE type: .pdf path: /odtp/odtp-output description: PDF file with the speaker diarization and transcription/translation naming-convention: null # Validation Schemas (Future Development) schema-input: null schema-output: null # Device Requirements devices: - type: gpu required: true