Spaces:

NitinBot001
/

ttsfm

Runtime error

App Files Files Community

ttsfm / templates /docs.html

NitinBot001

Upload 20 files

bf90fc9 verified 2 months ago

raw

history blame contribute delete

29.7 kB

	{% extends "base.html" %}

	{% block title %}TTSFM {{ _('docs.title') }}{% endblock %}

	{% block extra_css %}
	<style>
	.code-block {
	background-color: #f8f9fa;
	border: 1px solid #e9ecef;
	border-radius: 0.375rem;
	padding: 1rem;
	margin: 1rem 0;
	overflow-x: auto;
	}

	.endpoint-card {
	border-left: 4px solid #007bff;
	margin-bottom: 2rem;
	}

	.method-badge {
	font-size: 0.75rem;
	padding: 0.25rem 0.5rem;
	border-radius: 0.25rem;
	font-weight: bold;
	margin-right: 0.5rem;
	}

	.method-get { background-color: #28a745; color: white; }
	.method-post { background-color: #007bff; color: white; }
	.method-put { background-color: #ffc107; color: black; }
	.method-delete { background-color: #dc3545; color: white; }

	.response-example {
	background-color: #f1f3f4;
	border-radius: 0.375rem;
	padding: 1rem;
	margin-top: 1rem;
	}

	.toc {
	position: sticky;
	top: 2rem;
	max-height: calc(100vh - 4rem);
	overflow-y: auto;
	}

	.toc a {
	color: #6c757d;
	text-decoration: none;
	display: block;
	padding: 0.25rem 0;
	border-left: 2px solid transparent;
	padding-left: 1rem;
	}

	.toc a:hover, .toc a.active {
	color: #007bff;
	border-left-color: #007bff;
	}
	</style>
	{% endblock %}

	{% block content %}
	<div class="container py-5">
	<div class="row">
	<div class="col-12 text-center mb-5">
	<h1 class="display-4 fw-bold">
	<i class="fas fa-book me-3 text-primary"></i>{{ _('docs.title') }}
	</h1>
	<p class="lead text-muted">
	{{ _('docs.subtitle') }}
	</p>
	</div>
	</div>

	<div class="row">
	<!-- Table of Contents -->
	<div class="col-lg-3">
	<div class="toc">
	<h5 class="fw-bold mb-3">{{ _('docs.contents') }}</h5>
	<a href="#overview">{{ _('docs.overview') }}</a>
	<a href="#authentication">{{ _('docs.authentication') }}</a>
	<a href="#text-validation">{{ _('docs.text_validation') }}</a>
	<a href="#endpoints">{{ _('docs.endpoints') }}</a>
	<a href="#voices">{{ _('docs.voices') }}</a>
	<a href="#formats">{{ _('docs.formats') }}</a>
	<a href="#generate">{{ _('docs.generate') }}</a>
	<a href="#combined">{{ _('docs.combined') }}</a>
	<a href="#status">{{ _('docs.status') }}</a>
	<a href="#errors">{{ _('docs.errors') }}</a>
	<a href="#examples">{{ _('docs.examples') }}</a>
	<a href="#python-package">{{ _('docs.python_package') }}</a>
	<a href="#websocket">WebSocket Streaming</a>
	</div>
	</div>

	<!-- Documentation Content -->
	<div class="col-lg-9">
	<!-- Overview -->
	<section id="overview" class="mb-5">
	<h2 class="fw-bold mb-3">{{ _('docs.overview_title') }}</h2>
	<p>
	{{ _('docs.overview_desc') }}
	</p>

	<div class="alert alert-info">
	<i class="fas fa-info-circle me-2"></i>
	<strong>{{ _('docs.base_url') }}</strong> <code>{{ request.url_root }}api/</code>
	</div>

	<h4>{{ _('docs.key_features') }}</h4>
	<ul>
	<li><strong>🎤 {{ _('docs.feature_voices') }}</strong></li>
	<li><strong>🎵 {{ _('docs.feature_formats') }}</strong></li>
	<li><strong>🤖 {{ _('docs.feature_openai') }}</strong></li>
	<li><strong>✨ {{ _('docs.feature_auto_combine') }}</strong></li>
	<li><strong>📊 {{ _('docs.feature_validation') }}</strong></li>
	<li><strong>📈 {{ _('docs.feature_monitoring') }}</strong></li>
	</ul>

	<div class="alert alert-success">
	<i class="fas fa-star me-2"></i>
	<strong>{{ _('docs.new_version') }}</strong> {{ _('docs.new_version_desc') }}
	</div>
	</section>

	<!-- Authentication -->
	<section id="authentication" class="mb-5">
	<h2 class="fw-bold mb-3">{{ _('docs.authentication_title') }}</h2>
	<p>
	{{ _('docs.authentication_desc') }}
	</p>

	<div class="code-block">
	<pre><code>Authorization: Bearer YOUR_API_KEY</code></pre>
	</div>
	</section>

	<!-- Text Validation -->
	<section id="text-validation" class="mb-5">
	<h2 class="fw-bold mb-3">{{ _('docs.text_validation_title') }}</h2>
	<p>
	{{ _('docs.text_validation_desc') }}
	</p>

	<div class="alert alert-warning">
	<i class="fas fa-exclamation-triangle me-2"></i>
	<strong>{{ _('docs.important') }}</strong> {{ _('docs.text_validation_warning') }}
	</div>

	<h4>{{ _('docs.validation_options') }}</h4>
	<ul>
	<li><code>max_length</code>: {{ _('docs.max_length_option') }}</li>
	<li><code>validate_length</code>: {{ _('docs.validate_length_option') }}</li>
	<li><code>preserve_words</code>: {{ _('docs.preserve_words_option') }}</li>
	</ul>
	</section>

	<!-- API Endpoints -->
	<section id="endpoints" class="mb-5">
	<h2 class="fw-bold mb-3">{{ _('docs.endpoints_title') }}</h2>

	<!-- Voices Endpoint -->
	<div class="card endpoint-card" id="voices">
	<div class="card-body">
	<h4 class="card-title">
	<span class="method-badge method-get">GET</span>
	/api/voices
	</h4>
	<p class="card-text">{{ _('docs.get_voices_desc') }}</p>

	<h6>{{ _('docs.response_example') }}</h6>
	<div class="response-example">
	<pre><code>{
	"voices": [
	{
	"id": "alloy",
	"name": "Alloy",
	"description": "Alloy voice"
	},
	{
	"id": "echo",
	"name": "Echo",
	"description": "Echo voice"
	}
	],
	"count": 6
	}</code></pre>
	</div>
	</div>
	</div>

	<!-- Formats Endpoint -->
	<div class="card endpoint-card" id="formats">
	<div class="card-body">
	<h4 class="card-title">
	<span class="method-badge method-get">GET</span>
	/api/formats
	</h4>
	<p class="card-text">Get available audio formats for speech generation.</p>

	<h6>Available Formats</h6>
	<p>We support multiple format requests, but internally:</p>
	<ul>
	<li><strong>mp3</strong> - Returns actual MP3 format</li>
	<li><strong>All other formats</strong> (opus, aac, flac, wav, pcm) - Mapped to WAV format</li>
	</ul>

	<div class="alert alert-info">
	<i class="fas fa-info-circle me-2"></i>
	<strong>Note:</strong> When you request opus, aac, flac, wav, or pcm, you'll receive WAV audio data.
	</div>

	<h6>{{ _('docs.response_example') }}</h6>
	<div class="response-example">
	<pre><code>{
	"formats": [
	{
	"id": "mp3",
	"name": "MP3",
	"mime_type": "audio/mp3",
	"description": "MP3 audio format"
	},
	{
	"id": "opus",
	"name": "Opus",
	"mime_type": "audio/wav",
	"description": "Returns WAV format"
	},
	{
	"id": "aac",
	"name": "AAC",
	"mime_type": "audio/wav",
	"description": "Returns WAV format"
	},
	{
	"id": "flac",
	"name": "FLAC",
	"mime_type": "audio/wav",
	"description": "Returns WAV format"
	},
	{
	"id": "wav",
	"name": "WAV",
	"mime_type": "audio/wav",
	"description": "WAV audio format"
	},
	{
	"id": "pcm",
	"name": "PCM",
	"mime_type": "audio/wav",
	"description": "Returns WAV format"
	}
	],
	"count": 6
	}</code></pre>
	</div>
	</div>
	</div>

	<!-- Text Validation Endpoint -->
	<div class="card endpoint-card">
	<div class="card-body">
	<h4 class="card-title">
	<span class="method-badge method-post">POST</span>
	/api/validate-text
	</h4>
	<p class="card-text">{{ _('docs.validate_text_desc') }}</p>

	<h6>{{ _('docs.request_body') }}</h6>
	<div class="code-block">
	<pre><code>{
	"text": "Your text to validate",
	"max_length": 4096
	}</code></pre>
	</div>

	<h6>{{ _('docs.response_example') }}</h6>
	<div class="response-example">
	<pre><code>{
	"text_length": 5000,
	"max_length": 4096,
	"is_valid": false,
	"needs_splitting": true,
	"suggested_chunks": 2,
	"chunk_preview": [
	"First chunk preview...",
	"Second chunk preview..."
	]
	}</code></pre>
	</div>
	</div>
	</div>

	<!-- Generate Speech Endpoint -->
	<div class="card endpoint-card" id="generate">
	<div class="card-body">
	<h4 class="card-title">
	<span class="method-badge method-post">POST</span>
	/api/generate
	</h4>
	<p class="card-text">{{ _('docs.generate_speech_desc') }}</p>

	<h6>{{ _('docs.request_body') }}</h6>
	<div class="code-block">
	<pre><code>{
	"text": "Hello, world!",
	"voice": "alloy",
	"format": "mp3",
	"instructions": "Speak cheerfully",
	"max_length": 4096,
	"validate_length": true
	}</code></pre>
	</div>

	<h6>{{ _('docs.parameters') }}</h6>
	<ul>
	<li><code>text</code> ({{ _('docs.required') }}): {{ _('docs.text_param') }}</li>
	<li><code>voice</code> ({{ _('docs.optional') }}): {{ _('docs.voice_param') }}</li>
	<li><code>format</code> ({{ _('docs.optional') }}): {{ _('docs.format_param') }}</li>
	<li><code>instructions</code> ({{ _('docs.optional') }}): {{ _('docs.instructions_param') }}</li>
	<li><code>max_length</code> ({{ _('docs.optional') }}): {{ _('docs.max_length_param') }}</li>
	<li><code>validate_length</code> ({{ _('docs.optional') }}): {{ _('docs.validate_length_param') }}</li>
	</ul>

	<h6>{{ _('docs.response') }}</h6>
	<p>{{ _('docs.response_audio') }}</p>
	</div>
	</div>

	</section>

	<!-- Python Package -->
	<section id="python-package" class="mb-5">
	<h3 class="fw-bold mb-4">
	<i class="fab fa-python me-2 text-warning"></i>{{ _('docs.python_package_title') }}
	</h3>

	<div class="card">
	<div class="card-body">
	<h5>{{ _('docs.long_text_support') }}</h5>
	<p>{{ _('docs.long_text_desc') }}</p>

	<div class="code-block">
	<pre><code>from ttsfm import TTSClient, Voice, AudioFormat

	# Create client
	client = TTSClient()

	# Generate speech from long text (automatically splits into separate files)
	responses = client.generate_speech_long_text(
	text="Very long text that exceeds 4096 characters...",
	voice=Voice.ALLOY,
	response_format=AudioFormat.MP3,
	max_length=2000,
	preserve_words=True
	)

	# Save each chunk as separate files
	for i, response in enumerate(responses, 1):
	response.save_to_file(f"part_{i:03d}.mp3")</code></pre>
	</div>

	<h6 class="mt-4">{{ _('docs.developer_features') }}</h6>
	<ul>
	<li><strong>{{ _('docs.manual_splitting') }}</strong></li>
	<li><strong>{{ _('docs.word_preservation') }}</strong></li>
	<li><strong>{{ _('docs.separate_files') }}</strong></li>
	<li><strong>{{ _('docs.cli_support') }}</strong></li>
	</ul>

	<div class="alert alert-info">
	<i class="fas fa-info-circle me-2"></i>
	<strong>{{ _('docs.note') }}</strong> {{ _('docs.auto_combine_note') }}
	</div>
	</div>
	</div>

	<!-- Combined Audio Endpoints -->
	<div class="card endpoint-card" id="combined">
	<div class="card-body">
	<h4 class="card-title">
	<span class="method-badge method-post">POST</span>
	/api/generate-combined
	</h4>
	<p class="card-text">{{ _('docs.combined_audio_desc') }}</p>

	<h6>{{ _('docs.request_body') }}</h6>
	<div class="code-block">
	<pre><code>{
	"text": "Very long text that exceeds the limit...",
	"voice": "alloy",
	"format": "mp3",
	"instructions": "Optional voice instructions",
	"max_length": 4096,
	"preserve_words": true
	}</code></pre>
	</div>

	<h6>{{ _('docs.response') }}</h6>
	<p>{{ _('docs.response_combined_audio') }}</p>

	<h6>{{ _('docs.response_headers') }}</h6>
	<ul>
	<li><code>X-Chunks-Combined</code>: {{ _('docs.chunks_combined_header') }}</li>
	<li><code>X-Original-Text-Length</code>: {{ _('docs.original_text_length_header') }}</li>
	<li><code>X-Audio-Size</code>: {{ _('docs.audio_size_header') }}</li>
	</ul>
	</div>
	</div>

	<!-- OpenAI Compatible Endpoint with Auto-Combine -->
	<div class="card endpoint-card">
	<div class="card-body">
	<h4 class="card-title">
	<span class="method-badge method-post">POST</span>
	/v1/audio/speech
	</h4>
	<p class="card-text">{{ _('docs.openai_compatible_desc') }}</p>

	<h6>{{ _('docs.request_body') }}</h6>
	<div class="code-block">
	<pre><code>{
	"model": "gpt-4o-mini-tts",
	"input": "Text of any length...",
	"voice": "alloy",
	"response_format": "mp3",
	"instructions": "Optional voice instructions",
	"speed": 1.0,
	"auto_combine": true,
	"max_length": 4096
	}</code></pre>
	</div>

	<h6>{{ _('docs.enhanced_parameters') }}</h6>
	<ul>
	<li><strong>auto_combine</strong> (boolean, default: true):
	<ul>
	<li><code>true</code>: {{ _('docs.auto_combine_param') }}</li>
	<li><code>false</code>: {{ _('docs.auto_combine_false') }}</li>
	</ul>
	</li>
	<li><strong>max_length</strong> (integer, default: 4096): {{ _('docs.max_length_chunk_param') }}</li>
	</ul>

	<h6>{{ _('docs.response_headers') }}</h6>
	<ul>
	<li><code>X-Auto-Combine</code>: {{ _('docs.auto_combine_header') }}</li>
	<li><code>X-Chunks-Combined</code>: {{ _('docs.chunks_combined_response') }}</li>
	<li><code>X-Original-Text-Length</code>: {{ _('docs.original_text_response') }}</li>
	<li><code>X-Audio-Format</code>: {{ _('docs.audio_format_header') }}</li>
	<li><code>X-Audio-Size</code>: {{ _('docs.audio_size_response') }}</li>
	</ul>

	<h6>{{ _('docs.examples_title') }}</h6>
	<div class="code-block">
	<pre><code># {{ _('docs.short_text_comment') }}
	curl -X POST {{ request.url_root }}v1/audio/speech \
	-H "Content-Type: application/json" \
	-d '{
	"model": "gpt-4o-mini-tts",
	"input": "Hello world!",
	"voice": "alloy"
	}'

	# {{ _('docs.long_text_auto_comment') }}
	curl -X POST {{ request.url_root }}v1/audio/speech \
	-H "Content-Type: application/json" \
	-d '{
	"model": "gpt-4o-mini-tts",
	"input": "Very long text...",
	"voice": "alloy",
	"auto_combine": true
	}'

	# {{ _('docs.long_text_no_auto_comment') }}
	curl -X POST {{ request.url_root }}v1/audio/speech \
	-H "Content-Type: application/json" \
	-d '{
	"model": "gpt-4o-mini-tts",
	"input": "Very long text...",
	"voice": "alloy",
	"auto_combine": false
	}'</code></pre>
	</div>

	<div class="alert alert-info mt-3">
	<i class="fas fa-info-circle me-2"></i>
	<strong>{{ _('docs.audio_combination') }}</strong> {{ _('docs.audio_combination_desc') }}
	</div>

	<h6 class="mt-4">{{ _('docs.use_cases') }}</h6>
	<ul>
	<li><strong>{{ _('docs.use_case_articles') }}</strong></li>
	<li><strong>{{ _('docs.use_case_audiobooks') }}</strong></li>
	<li><strong>{{ _('docs.use_case_podcasts') }}</strong></li>
	<li><strong>{{ _('docs.use_case_education') }}</strong></li>
	</ul>

	<h6 class="mt-4">{{ _('docs.example_usage') }}</h6>
	<div class="code-block">
	<pre><code># {{ _('docs.python_example_comment') }}
	import requests

	response = requests.post(
	"{{ request.url_root }}api/generate-combined",
	json={
	"text": "Your very long text content here...",
	"voice": "nova",
	"format": "mp3",
	"max_length": 2000
	}
	)

	if response.status_code == 200:
	with open("combined_audio.mp3", "wb") as f:
	f.write(response.content)

	chunks = response.headers.get('X-Chunks-Combined')
	print(f"Combined {chunks} chunks into single file")</code></pre>
	</div>
	</div>
	</div>
	</section>

	<!-- WebSocket Streaming -->
	<section id="websocket" class="mb-5">
	<h2 class="mb-4">
	<i class="fas fa-bolt text-warning me-2"></i>WebSocket Streaming
	</h2>
	<p class="lead">
	Real-time audio streaming for enhanced user experience. Get audio chunks as they're generated instead of waiting for the complete file.
	</p>

	<div class="alert alert-info">
	<i class="fas fa-info-circle me-2"></i>
	WebSocket streaming provides lower perceived latency and real-time progress tracking for TTS generation.
	</div>

	<h3 class="mt-4">Connection</h3>
	<div class="code-block">
	<pre><code>// JavaScript WebSocket client
	const client = new WebSocketTTSClient({
	socketUrl: '{{ request.url_root[:-1] }}',
	debug: true
	});

	// Connection events
	client.onConnect = () => console.log('Connected');
	client.onDisconnect = () => console.log('Disconnected');</code></pre>
	</div>

	<h3 class="mt-4">Streaming TTS Generation</h3>
	<div class="code-block">
	<pre><code>// Generate speech with real-time streaming
	const result = await client.generateSpeech('Hello, WebSocket world!', {
	voice: 'alloy',
	format: 'mp3',
	chunkSize: 1024, // Characters per chunk

	// Progress callback
	onProgress: (progress) => {
	console.log(`Progress: ${progress.progress}%`);
	console.log(`Chunks: ${progress.chunksCompleted}/${progress.totalChunks}`);
	},

	// Receive audio chunks in real-time
	onChunk: (chunk) => {
	console.log(`Received chunk ${chunk.chunkIndex + 1}`);
	// Process or play audio chunk immediately
	processAudioChunk(chunk.audioData);
	},

	// Completion callback
	onComplete: (result) => {
	console.log('Streaming complete!');
	// result.audioData contains the complete audio
	}
	});</code></pre>
	</div>

	<h3 class="mt-4">WebSocket Events</h3>
	<div class="endpoint-card card">
	<div class="card-body">
	<h5>Client → Server Events</h5>
	<table class="table table-sm">
	<thead>
	<tr>
	<th>Event</th>
	<th>Description</th>
	<th>Payload</th>
	</tr>
	</thead>
	<tbody>
	<tr>
	<td><code>generate_stream</code></td>
	<td>Start TTS generation</td>
	<td><code>{text, voice, format, chunk_size}</code></td>
	</tr>
	<tr>
	<td><code>cancel_stream</code></td>
	<td>Cancel active stream</td>
	<td><code>{request_id}</code></td>
	</tr>
	</tbody>
	</table>

	<h5 class="mt-4">Server → Client Events</h5>
	<table class="table table-sm">
	<thead>
	<tr>
	<th>Event</th>
	<th>Description</th>
	<th>Payload</th>
	</tr>
	</thead>
	<tbody>
	<tr>
	<td><code>stream_started</code></td>
	<td>Stream initiated</td>
	<td><code>{request_id, timestamp}</code></td>
	</tr>
	<tr>
	<td><code>audio_chunk</code></td>
	<td>Audio chunk ready</td>
	<td><code>{request_id, chunk_index, audio_data, duration}</code></td>
	</tr>
	<tr>
	<td><code>stream_progress</code></td>
	<td>Progress update</td>
	<td><code>{progress, chunks_completed, total_chunks}</code></td>
	</tr>
	<tr>
	<td><code>stream_complete</code></td>
	<td>Generation complete</td>
	<td><code>{request_id, total_chunks, status}</code></td>
	</tr>
	<tr>
	<td><code>stream_error</code></td>
	<td>Error occurred</td>
	<td><code>{request_id, error, timestamp}</code></td>
	</tr>
	</tbody>
	</table>
	</div>
	</div>

	<h3 class="mt-4">Benefits</h3>
	<ul>
	<li><strong>Real-time feedback:</strong> Users see progress as audio generates</li>
	<li><strong>Lower latency:</strong> First audio chunk arrives quickly</li>
	<li><strong>Cancellable:</strong> Stop generation mid-stream if needed</li>
	<li><strong>Efficient:</strong> Process chunks as they arrive</li>
	</ul>

	<h3 class="mt-4">Example: Streaming Audio Player</h3>
	<div class="code-block">
	<pre><code>// Create a streaming audio player
	const audioChunks = [];
	let isPlaying = false;

	const streamingPlayer = await client.generateSpeech(longText, {
	voice: 'nova',
	format: 'mp3',

	onChunk: (chunk) => {
	// Store chunk
	audioChunks.push(chunk.audioData);

	// Start playing after first chunk
	if (!isPlaying && audioChunks.length >= 3) {
	startStreamingPlayback(audioChunks);
	isPlaying = true;
	}
	},

	onComplete: (result) => {
	// Ensure all chunks are played
	finishPlayback(result.audioData);
	}
	});</code></pre>
	</div>

	<div class="alert alert-success mt-4">
	<h6><i class="fas fa-rocket me-2"></i>Try It Out!</h6>
	<p class="mb-0">
	Experience WebSocket streaming in action at the
	<a href="/websocket-demo" class="alert-link">WebSocket Demo</a> or enable streaming mode in the
	<a href="/playground" class="alert-link">Playground</a>.
	</p>
	</div>
	</section>
	</div>
	</div>
	</div>
	{% endblock %}

	{% block extra_js %}
	<script>
	// Smooth scrolling for TOC links
	document.querySelectorAll('.toc a').forEach(link => {
	link.addEventListener('click', function(e) {
	e.preventDefault();
	const target = document.querySelector(this.getAttribute('href'));
	if (target) {
	target.scrollIntoView({ behavior: 'smooth' });

	// Update active link
	document.querySelectorAll('.toc a').forEach(l => l.classList.remove('active'));
	this.classList.add('active');
	}
	});
	});

	// Highlight current section in TOC
	window.addEventListener('scroll', function() {
	const sections = document.querySelectorAll('section[id]');
	const scrollPos = window.scrollY + 100;

	sections.forEach(section => {
	const top = section.offsetTop;
	const bottom = top + section.offsetHeight;
	const id = section.getAttribute('id');
	const link = document.querySelector(`.toc a[href="#${id}"]`);

	if (scrollPos >= top && scrollPos < bottom) {
	document.querySelectorAll('.toc a').forEach(l => l.classList.remove('active'));
	if (link) link.classList.add('active');
	}
	});
	});
	</script>
	{% endblock %}