ttsfm / templates /docs.html
NitinBot001's picture
Upload 20 files
bf90fc9 verified
{% extends "base.html" %}
{% block title %}TTSFM {{ _('docs.title') }}{% endblock %}
{% block extra_css %}
<style>
.code-block {
background-color: #f8f9fa;
border: 1px solid #e9ecef;
border-radius: 0.375rem;
padding: 1rem;
margin: 1rem 0;
overflow-x: auto;
}
.endpoint-card {
border-left: 4px solid #007bff;
margin-bottom: 2rem;
}
.method-badge {
font-size: 0.75rem;
padding: 0.25rem 0.5rem;
border-radius: 0.25rem;
font-weight: bold;
margin-right: 0.5rem;
}
.method-get { background-color: #28a745; color: white; }
.method-post { background-color: #007bff; color: white; }
.method-put { background-color: #ffc107; color: black; }
.method-delete { background-color: #dc3545; color: white; }
.response-example {
background-color: #f1f3f4;
border-radius: 0.375rem;
padding: 1rem;
margin-top: 1rem;
}
.toc {
position: sticky;
top: 2rem;
max-height: calc(100vh - 4rem);
overflow-y: auto;
}
.toc a {
color: #6c757d;
text-decoration: none;
display: block;
padding: 0.25rem 0;
border-left: 2px solid transparent;
padding-left: 1rem;
}
.toc a:hover, .toc a.active {
color: #007bff;
border-left-color: #007bff;
}
</style>
{% endblock %}
{% block content %}
<div class="container py-5">
<div class="row">
<div class="col-12 text-center mb-5">
<h1 class="display-4 fw-bold">
<i class="fas fa-book me-3 text-primary"></i>{{ _('docs.title') }}
</h1>
<p class="lead text-muted">
{{ _('docs.subtitle') }}
</p>
</div>
</div>
<div class="row">
<!-- Table of Contents -->
<div class="col-lg-3">
<div class="toc">
<h5 class="fw-bold mb-3">{{ _('docs.contents') }}</h5>
<a href="#overview">{{ _('docs.overview') }}</a>
<a href="#authentication">{{ _('docs.authentication') }}</a>
<a href="#text-validation">{{ _('docs.text_validation') }}</a>
<a href="#endpoints">{{ _('docs.endpoints') }}</a>
<a href="#voices">{{ _('docs.voices') }}</a>
<a href="#formats">{{ _('docs.formats') }}</a>
<a href="#generate">{{ _('docs.generate') }}</a>
<a href="#combined">{{ _('docs.combined') }}</a>
<a href="#status">{{ _('docs.status') }}</a>
<a href="#errors">{{ _('docs.errors') }}</a>
<a href="#examples">{{ _('docs.examples') }}</a>
<a href="#python-package">{{ _('docs.python_package') }}</a>
<a href="#websocket">WebSocket Streaming</a>
</div>
</div>
<!-- Documentation Content -->
<div class="col-lg-9">
<!-- Overview -->
<section id="overview" class="mb-5">
<h2 class="fw-bold mb-3">{{ _('docs.overview_title') }}</h2>
<p>
{{ _('docs.overview_desc') }}
</p>
<div class="alert alert-info">
<i class="fas fa-info-circle me-2"></i>
<strong>{{ _('docs.base_url') }}</strong> <code>{{ request.url_root }}api/</code>
</div>
<h4>{{ _('docs.key_features') }}</h4>
<ul>
<li><strong>🎤 {{ _('docs.feature_voices') }}</strong></li>
<li><strong>🎵 {{ _('docs.feature_formats') }}</strong></li>
<li><strong>🤖 {{ _('docs.feature_openai') }}</strong></li>
<li><strong>✨ {{ _('docs.feature_auto_combine') }}</strong></li>
<li><strong>📊 {{ _('docs.feature_validation') }}</strong></li>
<li><strong>📈 {{ _('docs.feature_monitoring') }}</strong></li>
</ul>
<div class="alert alert-success">
<i class="fas fa-star me-2"></i>
<strong>{{ _('docs.new_version') }}</strong> {{ _('docs.new_version_desc') }}
</div>
</section>
<!-- Authentication -->
<section id="authentication" class="mb-5">
<h2 class="fw-bold mb-3">{{ _('docs.authentication_title') }}</h2>
<p>
{{ _('docs.authentication_desc') }}
</p>
<div class="code-block">
<pre><code>Authorization: Bearer YOUR_API_KEY</code></pre>
</div>
</section>
<!-- Text Validation -->
<section id="text-validation" class="mb-5">
<h2 class="fw-bold mb-3">{{ _('docs.text_validation_title') }}</h2>
<p>
{{ _('docs.text_validation_desc') }}
</p>
<div class="alert alert-warning">
<i class="fas fa-exclamation-triangle me-2"></i>
<strong>{{ _('docs.important') }}</strong> {{ _('docs.text_validation_warning') }}
</div>
<h4>{{ _('docs.validation_options') }}</h4>
<ul>
<li><code>max_length</code>: {{ _('docs.max_length_option') }}</li>
<li><code>validate_length</code>: {{ _('docs.validate_length_option') }}</li>
<li><code>preserve_words</code>: {{ _('docs.preserve_words_option') }}</li>
</ul>
</section>
<!-- API Endpoints -->
<section id="endpoints" class="mb-5">
<h2 class="fw-bold mb-3">{{ _('docs.endpoints_title') }}</h2>
<!-- Voices Endpoint -->
<div class="card endpoint-card" id="voices">
<div class="card-body">
<h4 class="card-title">
<span class="method-badge method-get">GET</span>
/api/voices
</h4>
<p class="card-text">{{ _('docs.get_voices_desc') }}</p>
<h6>{{ _('docs.response_example') }}</h6>
<div class="response-example">
<pre><code>{
"voices": [
{
"id": "alloy",
"name": "Alloy",
"description": "Alloy voice"
},
{
"id": "echo",
"name": "Echo",
"description": "Echo voice"
}
],
"count": 6
}</code></pre>
</div>
</div>
</div>
<!-- Formats Endpoint -->
<div class="card endpoint-card" id="formats">
<div class="card-body">
<h4 class="card-title">
<span class="method-badge method-get">GET</span>
/api/formats
</h4>
<p class="card-text">Get available audio formats for speech generation.</p>
<h6>Available Formats</h6>
<p>We support multiple format requests, but internally:</p>
<ul>
<li><strong>mp3</strong> - Returns actual MP3 format</li>
<li><strong>All other formats</strong> (opus, aac, flac, wav, pcm) - Mapped to WAV format</li>
</ul>
<div class="alert alert-info">
<i class="fas fa-info-circle me-2"></i>
<strong>Note:</strong> When you request opus, aac, flac, wav, or pcm, you'll receive WAV audio data.
</div>
<h6>{{ _('docs.response_example') }}</h6>
<div class="response-example">
<pre><code>{
"formats": [
{
"id": "mp3",
"name": "MP3",
"mime_type": "audio/mp3",
"description": "MP3 audio format"
},
{
"id": "opus",
"name": "Opus",
"mime_type": "audio/wav",
"description": "Returns WAV format"
},
{
"id": "aac",
"name": "AAC",
"mime_type": "audio/wav",
"description": "Returns WAV format"
},
{
"id": "flac",
"name": "FLAC",
"mime_type": "audio/wav",
"description": "Returns WAV format"
},
{
"id": "wav",
"name": "WAV",
"mime_type": "audio/wav",
"description": "WAV audio format"
},
{
"id": "pcm",
"name": "PCM",
"mime_type": "audio/wav",
"description": "Returns WAV format"
}
],
"count": 6
}</code></pre>
</div>
</div>
</div>
<!-- Text Validation Endpoint -->
<div class="card endpoint-card">
<div class="card-body">
<h4 class="card-title">
<span class="method-badge method-post">POST</span>
/api/validate-text
</h4>
<p class="card-text">{{ _('docs.validate_text_desc') }}</p>
<h6>{{ _('docs.request_body') }}</h6>
<div class="code-block">
<pre><code>{
"text": "Your text to validate",
"max_length": 4096
}</code></pre>
</div>
<h6>{{ _('docs.response_example') }}</h6>
<div class="response-example">
<pre><code>{
"text_length": 5000,
"max_length": 4096,
"is_valid": false,
"needs_splitting": true,
"suggested_chunks": 2,
"chunk_preview": [
"First chunk preview...",
"Second chunk preview..."
]
}</code></pre>
</div>
</div>
</div>
<!-- Generate Speech Endpoint -->
<div class="card endpoint-card" id="generate">
<div class="card-body">
<h4 class="card-title">
<span class="method-badge method-post">POST</span>
/api/generate
</h4>
<p class="card-text">{{ _('docs.generate_speech_desc') }}</p>
<h6>{{ _('docs.request_body') }}</h6>
<div class="code-block">
<pre><code>{
"text": "Hello, world!",
"voice": "alloy",
"format": "mp3",
"instructions": "Speak cheerfully",
"max_length": 4096,
"validate_length": true
}</code></pre>
</div>
<h6>{{ _('docs.parameters') }}</h6>
<ul>
<li><code>text</code> ({{ _('docs.required') }}): {{ _('docs.text_param') }}</li>
<li><code>voice</code> ({{ _('docs.optional') }}): {{ _('docs.voice_param') }}</li>
<li><code>format</code> ({{ _('docs.optional') }}): {{ _('docs.format_param') }}</li>
<li><code>instructions</code> ({{ _('docs.optional') }}): {{ _('docs.instructions_param') }}</li>
<li><code>max_length</code> ({{ _('docs.optional') }}): {{ _('docs.max_length_param') }}</li>
<li><code>validate_length</code> ({{ _('docs.optional') }}): {{ _('docs.validate_length_param') }}</li>
</ul>
<h6>{{ _('docs.response') }}</h6>
<p>{{ _('docs.response_audio') }}</p>
</div>
</div>
</section>
<!-- Python Package -->
<section id="python-package" class="mb-5">
<h3 class="fw-bold mb-4">
<i class="fab fa-python me-2 text-warning"></i>{{ _('docs.python_package_title') }}
</h3>
<div class="card">
<div class="card-body">
<h5>{{ _('docs.long_text_support') }}</h5>
<p>{{ _('docs.long_text_desc') }}</p>
<div class="code-block">
<pre><code>from ttsfm import TTSClient, Voice, AudioFormat
# Create client
client = TTSClient()
# Generate speech from long text (automatically splits into separate files)
responses = client.generate_speech_long_text(
text="Very long text that exceeds 4096 characters...",
voice=Voice.ALLOY,
response_format=AudioFormat.MP3,
max_length=2000,
preserve_words=True
)
# Save each chunk as separate files
for i, response in enumerate(responses, 1):
response.save_to_file(f"part_{i:03d}.mp3")</code></pre>
</div>
<h6 class="mt-4">{{ _('docs.developer_features') }}</h6>
<ul>
<li><strong>{{ _('docs.manual_splitting') }}</strong></li>
<li><strong>{{ _('docs.word_preservation') }}</strong></li>
<li><strong>{{ _('docs.separate_files') }}</strong></li>
<li><strong>{{ _('docs.cli_support') }}</strong></li>
</ul>
<div class="alert alert-info">
<i class="fas fa-info-circle me-2"></i>
<strong>{{ _('docs.note') }}</strong> {{ _('docs.auto_combine_note') }}
</div>
</div>
</div>
<!-- Combined Audio Endpoints -->
<div class="card endpoint-card" id="combined">
<div class="card-body">
<h4 class="card-title">
<span class="method-badge method-post">POST</span>
/api/generate-combined
</h4>
<p class="card-text">{{ _('docs.combined_audio_desc') }}</p>
<h6>{{ _('docs.request_body') }}</h6>
<div class="code-block">
<pre><code>{
"text": "Very long text that exceeds the limit...",
"voice": "alloy",
"format": "mp3",
"instructions": "Optional voice instructions",
"max_length": 4096,
"preserve_words": true
}</code></pre>
</div>
<h6>{{ _('docs.response') }}</h6>
<p>{{ _('docs.response_combined_audio') }}</p>
<h6>{{ _('docs.response_headers') }}</h6>
<ul>
<li><code>X-Chunks-Combined</code>: {{ _('docs.chunks_combined_header') }}</li>
<li><code>X-Original-Text-Length</code>: {{ _('docs.original_text_length_header') }}</li>
<li><code>X-Audio-Size</code>: {{ _('docs.audio_size_header') }}</li>
</ul>
</div>
</div>
<!-- OpenAI Compatible Endpoint with Auto-Combine -->
<div class="card endpoint-card">
<div class="card-body">
<h4 class="card-title">
<span class="method-badge method-post">POST</span>
/v1/audio/speech
</h4>
<p class="card-text">{{ _('docs.openai_compatible_desc') }}</p>
<h6>{{ _('docs.request_body') }}</h6>
<div class="code-block">
<pre><code>{
"model": "gpt-4o-mini-tts",
"input": "Text of any length...",
"voice": "alloy",
"response_format": "mp3",
"instructions": "Optional voice instructions",
"speed": 1.0,
"auto_combine": true,
"max_length": 4096
}</code></pre>
</div>
<h6>{{ _('docs.enhanced_parameters') }}</h6>
<ul>
<li><strong>auto_combine</strong> (boolean, default: true):
<ul>
<li><code>true</code>: {{ _('docs.auto_combine_param') }}</li>
<li><code>false</code>: {{ _('docs.auto_combine_false') }}</li>
</ul>
</li>
<li><strong>max_length</strong> (integer, default: 4096): {{ _('docs.max_length_chunk_param') }}</li>
</ul>
<h6>{{ _('docs.response_headers') }}</h6>
<ul>
<li><code>X-Auto-Combine</code>: {{ _('docs.auto_combine_header') }}</li>
<li><code>X-Chunks-Combined</code>: {{ _('docs.chunks_combined_response') }}</li>
<li><code>X-Original-Text-Length</code>: {{ _('docs.original_text_response') }}</li>
<li><code>X-Audio-Format</code>: {{ _('docs.audio_format_header') }}</li>
<li><code>X-Audio-Size</code>: {{ _('docs.audio_size_response') }}</li>
</ul>
<h6>{{ _('docs.examples_title') }}</h6>
<div class="code-block">
<pre><code># {{ _('docs.short_text_comment') }}
curl -X POST {{ request.url_root }}v1/audio/speech \
-H "Content-Type: application/json" \
-d '{
"model": "gpt-4o-mini-tts",
"input": "Hello world!",
"voice": "alloy"
}'
# {{ _('docs.long_text_auto_comment') }}
curl -X POST {{ request.url_root }}v1/audio/speech \
-H "Content-Type: application/json" \
-d '{
"model": "gpt-4o-mini-tts",
"input": "Very long text...",
"voice": "alloy",
"auto_combine": true
}'
# {{ _('docs.long_text_no_auto_comment') }}
curl -X POST {{ request.url_root }}v1/audio/speech \
-H "Content-Type: application/json" \
-d '{
"model": "gpt-4o-mini-tts",
"input": "Very long text...",
"voice": "alloy",
"auto_combine": false
}'</code></pre>
</div>
<div class="alert alert-info mt-3">
<i class="fas fa-info-circle me-2"></i>
<strong>{{ _('docs.audio_combination') }}</strong> {{ _('docs.audio_combination_desc') }}
</div>
<h6 class="mt-4">{{ _('docs.use_cases') }}</h6>
<ul>
<li><strong>{{ _('docs.use_case_articles') }}</strong></li>
<li><strong>{{ _('docs.use_case_audiobooks') }}</strong></li>
<li><strong>{{ _('docs.use_case_podcasts') }}</strong></li>
<li><strong>{{ _('docs.use_case_education') }}</strong></li>
</ul>
<h6 class="mt-4">{{ _('docs.example_usage') }}</h6>
<div class="code-block">
<pre><code># {{ _('docs.python_example_comment') }}
import requests
response = requests.post(
"{{ request.url_root }}api/generate-combined",
json={
"text": "Your very long text content here...",
"voice": "nova",
"format": "mp3",
"max_length": 2000
}
)
if response.status_code == 200:
with open("combined_audio.mp3", "wb") as f:
f.write(response.content)
chunks = response.headers.get('X-Chunks-Combined')
print(f"Combined {chunks} chunks into single file")</code></pre>
</div>
</div>
</div>
</section>
<!-- WebSocket Streaming -->
<section id="websocket" class="mb-5">
<h2 class="mb-4">
<i class="fas fa-bolt text-warning me-2"></i>WebSocket Streaming
</h2>
<p class="lead">
Real-time audio streaming for enhanced user experience. Get audio chunks as they're generated instead of waiting for the complete file.
</p>
<div class="alert alert-info">
<i class="fas fa-info-circle me-2"></i>
WebSocket streaming provides lower perceived latency and real-time progress tracking for TTS generation.
</div>
<h3 class="mt-4">Connection</h3>
<div class="code-block">
<pre><code>// JavaScript WebSocket client
const client = new WebSocketTTSClient({
socketUrl: '{{ request.url_root[:-1] }}',
debug: true
});
// Connection events
client.onConnect = () => console.log('Connected');
client.onDisconnect = () => console.log('Disconnected');</code></pre>
</div>
<h3 class="mt-4">Streaming TTS Generation</h3>
<div class="code-block">
<pre><code>// Generate speech with real-time streaming
const result = await client.generateSpeech('Hello, WebSocket world!', {
voice: 'alloy',
format: 'mp3',
chunkSize: 1024, // Characters per chunk
// Progress callback
onProgress: (progress) => {
console.log(`Progress: ${progress.progress}%`);
console.log(`Chunks: ${progress.chunksCompleted}/${progress.totalChunks}`);
},
// Receive audio chunks in real-time
onChunk: (chunk) => {
console.log(`Received chunk ${chunk.chunkIndex + 1}`);
// Process or play audio chunk immediately
processAudioChunk(chunk.audioData);
},
// Completion callback
onComplete: (result) => {
console.log('Streaming complete!');
// result.audioData contains the complete audio
}
});</code></pre>
</div>
<h3 class="mt-4">WebSocket Events</h3>
<div class="endpoint-card card">
<div class="card-body">
<h5>Client → Server Events</h5>
<table class="table table-sm">
<thead>
<tr>
<th>Event</th>
<th>Description</th>
<th>Payload</th>
</tr>
</thead>
<tbody>
<tr>
<td><code>generate_stream</code></td>
<td>Start TTS generation</td>
<td><code>{text, voice, format, chunk_size}</code></td>
</tr>
<tr>
<td><code>cancel_stream</code></td>
<td>Cancel active stream</td>
<td><code>{request_id}</code></td>
</tr>
</tbody>
</table>
<h5 class="mt-4">Server → Client Events</h5>
<table class="table table-sm">
<thead>
<tr>
<th>Event</th>
<th>Description</th>
<th>Payload</th>
</tr>
</thead>
<tbody>
<tr>
<td><code>stream_started</code></td>
<td>Stream initiated</td>
<td><code>{request_id, timestamp}</code></td>
</tr>
<tr>
<td><code>audio_chunk</code></td>
<td>Audio chunk ready</td>
<td><code>{request_id, chunk_index, audio_data, duration}</code></td>
</tr>
<tr>
<td><code>stream_progress</code></td>
<td>Progress update</td>
<td><code>{progress, chunks_completed, total_chunks}</code></td>
</tr>
<tr>
<td><code>stream_complete</code></td>
<td>Generation complete</td>
<td><code>{request_id, total_chunks, status}</code></td>
</tr>
<tr>
<td><code>stream_error</code></td>
<td>Error occurred</td>
<td><code>{request_id, error, timestamp}</code></td>
</tr>
</tbody>
</table>
</div>
</div>
<h3 class="mt-4">Benefits</h3>
<ul>
<li><strong>Real-time feedback:</strong> Users see progress as audio generates</li>
<li><strong>Lower latency:</strong> First audio chunk arrives quickly</li>
<li><strong>Cancellable:</strong> Stop generation mid-stream if needed</li>
<li><strong>Efficient:</strong> Process chunks as they arrive</li>
</ul>
<h3 class="mt-4">Example: Streaming Audio Player</h3>
<div class="code-block">
<pre><code>// Create a streaming audio player
const audioChunks = [];
let isPlaying = false;
const streamingPlayer = await client.generateSpeech(longText, {
voice: 'nova',
format: 'mp3',
onChunk: (chunk) => {
// Store chunk
audioChunks.push(chunk.audioData);
// Start playing after first chunk
if (!isPlaying && audioChunks.length >= 3) {
startStreamingPlayback(audioChunks);
isPlaying = true;
}
},
onComplete: (result) => {
// Ensure all chunks are played
finishPlayback(result.audioData);
}
});</code></pre>
</div>
<div class="alert alert-success mt-4">
<h6><i class="fas fa-rocket me-2"></i>Try It Out!</h6>
<p class="mb-0">
Experience WebSocket streaming in action at the
<a href="/websocket-demo" class="alert-link">WebSocket Demo</a> or enable streaming mode in the
<a href="/playground" class="alert-link">Playground</a>.
</p>
</div>
</section>
</div>
</div>
</div>
{% endblock %}
{% block extra_js %}
<script>
// Smooth scrolling for TOC links
document.querySelectorAll('.toc a').forEach(link => {
link.addEventListener('click', function(e) {
e.preventDefault();
const target = document.querySelector(this.getAttribute('href'));
if (target) {
target.scrollIntoView({ behavior: 'smooth' });
// Update active link
document.querySelectorAll('.toc a').forEach(l => l.classList.remove('active'));
this.classList.add('active');
}
});
});
// Highlight current section in TOC
window.addEventListener('scroll', function() {
const sections = document.querySelectorAll('section[id]');
const scrollPos = window.scrollY + 100;
sections.forEach(section => {
const top = section.offsetTop;
const bottom = top + section.offsetHeight;
const id = section.getAttribute('id');
const link = document.querySelector(`.toc a[href="#${id}"]`);
if (scrollPos >= top && scrollPos < bottom) {
document.querySelectorAll('.toc a').forEach(l => l.classList.remove('active'));
if (link) link.classList.add('active');
}
});
});
</script>
{% endblock %}