Commit
Β·
1a7ea3c
1
Parent(s):
d1afbc8
transparency update
Browse files
app.py
CHANGED
|
@@ -17,6 +17,206 @@ from utils import (
|
|
| 17 |
from jam_worker import JamWorker, JamParams, JamChunk
|
| 18 |
import uuid, threading
|
| 19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
jam_registry: dict[str, JamWorker] = {}
|
| 21 |
jam_lock = threading.Lock()
|
| 22 |
|
|
@@ -433,4 +633,31 @@ def jam_status(session_id: str):
|
|
| 433 |
|
| 434 |
@app.get("/health")
|
| 435 |
def health():
|
| 436 |
-
return {"ok": True}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
from jam_worker import JamWorker, JamParams, JamChunk
|
| 18 |
import uuid, threading
|
| 19 |
|
| 20 |
+
import gradio as gr
|
| 21 |
+
|
| 22 |
+
def create_documentation_interface():
|
| 23 |
+
"""Create a Gradio interface for documentation and transparency"""
|
| 24 |
+
|
| 25 |
+
with gr.Blocks(title="MagentaRT Research API", theme=gr.themes.Soft()) as interface:
|
| 26 |
+
|
| 27 |
+
gr.Markdown("""
|
| 28 |
+
# π΅ MagentaRT Live Music Generation Research API
|
| 29 |
+
|
| 30 |
+
**Research-only implementation for iOS app development**
|
| 31 |
+
|
| 32 |
+
This API uses Google's [MagentaRT](https://github.com/magenta/magenta-realtime) to generate
|
| 33 |
+
continuous music based on input audio loops for experimental iOS app development.
|
| 34 |
+
""")
|
| 35 |
+
|
| 36 |
+
with gr.Tabs():
|
| 37 |
+
with gr.Tab("π About This Research"):
|
| 38 |
+
gr.Markdown("""
|
| 39 |
+
## What This API Does
|
| 40 |
+
|
| 41 |
+
We're exploring AI-assisted loop-based music creation for mobile apps. Websockets are notoriously annoying in ios-swift apps, so I tried to come up with an http version tailored to the loop based nature of an existing swift app. This API provides:
|
| 42 |
+
|
| 43 |
+
### πΉ Single Generation (`/generate`)
|
| 44 |
+
- Upload audio loop + BPM + style parameters
|
| 45 |
+
- Returns 4-8 bars of AI-generated continuation
|
| 46 |
+
- **Performance**: 4 bars in ~9s, 8 bars in ~16s (L40S GPU)
|
| 47 |
+
|
| 48 |
+
### π Continuous Jamming (`/jam/*`)
|
| 49 |
+
- `/jam/start` - Begin continuous generation session
|
| 50 |
+
- `/jam/next` - Get next bar-aligned chunk
|
| 51 |
+
- `/jam/stop` - End session
|
| 52 |
+
- **Performance**: Real-time 8-bar chunks after warmup
|
| 53 |
+
|
| 54 |
+
## Technical Specs
|
| 55 |
+
- **Model**: MagentaRT (800M parameter transformer)
|
| 56 |
+
- **Quality**: 48kHz stereo output
|
| 57 |
+
- **Context**: 10-second audio analysis window
|
| 58 |
+
- **Styles**: Text descriptions (e.g., "acid house, techno")
|
| 59 |
+
|
| 60 |
+
## Research Goals
|
| 61 |
+
- Seamless AI music generation for loop-based composition
|
| 62 |
+
- Real-time parameter adjustment during generation
|
| 63 |
+
- Mobile-optimized music creation workflows
|
| 64 |
+
""")
|
| 65 |
+
|
| 66 |
+
with gr.Tab("π§ API Documentation"):
|
| 67 |
+
gr.Markdown("""
|
| 68 |
+
## Single Generation Example
|
| 69 |
+
```bash
|
| 70 |
+
curl -X POST "/generate" \\
|
| 71 |
+
-F "loop_audio=@drum_loop.wav" \\
|
| 72 |
+
-F "bpm=120" \\
|
| 73 |
+
-F "bars=8" \\
|
| 74 |
+
-F "styles=acid house,techno" \\
|
| 75 |
+
-F "guidance_weight=5.0" \\
|
| 76 |
+
-F "temperature=1.1"
|
| 77 |
+
```
|
| 78 |
+
|
| 79 |
+
## Continuous Jamming Example
|
| 80 |
+
```bash
|
| 81 |
+
# 1. Start session
|
| 82 |
+
SESSION=$(curl -X POST "/jam/start" \\
|
| 83 |
+
-F "loop_audio=@loop.wav" \\
|
| 84 |
+
-F "bpm=120" \\
|
| 85 |
+
-F "bars_per_chunk=8" | jq -r .session_id)
|
| 86 |
+
|
| 87 |
+
# 2. Get chunks in real-time
|
| 88 |
+
curl "/jam/next?session_id=$SESSION"
|
| 89 |
+
|
| 90 |
+
# 3. Stop when done
|
| 91 |
+
curl -X POST "/jam/stop" \\
|
| 92 |
+
-H "Content-Type: application/json" \\
|
| 93 |
+
-d "{\\"session_id\\": \\"$SESSION\\"}"
|
| 94 |
+
```
|
| 95 |
+
|
| 96 |
+
## Key Parameters
|
| 97 |
+
- **bpm**: 60-200 (beats per minute)
|
| 98 |
+
- **bars**: 1-16 (bars to generate)
|
| 99 |
+
- **styles**: Text descriptions, comma-separated
|
| 100 |
+
- **guidance_weight**: 0.1-10.0 (style adherence)
|
| 101 |
+
- **temperature**: 0.1-2.0 (randomness)
|
| 102 |
+
- **intro_bars_to_drop**: Skip N bars from start
|
| 103 |
+
|
| 104 |
+
## Response Format
|
| 105 |
+
```json
|
| 106 |
+
{
|
| 107 |
+
"audio_base64": "...",
|
| 108 |
+
"metadata": {
|
| 109 |
+
"bpm": 120,
|
| 110 |
+
"bars": 8,
|
| 111 |
+
"sample_rate": 48000,
|
| 112 |
+
"loop_duration_seconds": 16.0
|
| 113 |
+
}
|
| 114 |
+
}
|
| 115 |
+
```
|
| 116 |
+
""")
|
| 117 |
+
|
| 118 |
+
with gr.Tab("π± iOS App Integration"):
|
| 119 |
+
gr.Markdown("""
|
| 120 |
+
## How Our iOS App Uses This API
|
| 121 |
+
|
| 122 |
+
### User Flow
|
| 123 |
+
1. **Record/Import**: User provides drum or instrument loop
|
| 124 |
+
2. **Parameter Setup**: Set BPM, style, generation settings
|
| 125 |
+
3. **Continuous Generation**: App calls `/jam/start`
|
| 126 |
+
4. **Real-time Playback**: App fetches chunks via `/jam/next`
|
| 127 |
+
5. **Seamless Mixing**: Generated audio mixed into live stream
|
| 128 |
+
|
| 129 |
+
### Technical Implementation
|
| 130 |
+
- **Audio Format**: 48kHz WAV for consistency
|
| 131 |
+
- **Chunk Size**: 8 bars (~16 seconds at 120 BPM)
|
| 132 |
+
- **Buffer Management**: 3-5 chunks ahead for smooth playback
|
| 133 |
+
- **Style Updates**: Real-time parameter adjustment via `/jam/update`
|
| 134 |
+
|
| 135 |
+
### Networking Considerations
|
| 136 |
+
- **Latency**: ~2-3 seconds per chunk after warmup
|
| 137 |
+
- **Bandwidth**: ~500KB per 8-bar chunk (compressed)
|
| 138 |
+
- **Reliability**: Automatic retry with exponential backoff
|
| 139 |
+
- **Caching**: Local buffer for offline resilience
|
| 140 |
+
""")
|
| 141 |
+
|
| 142 |
+
with gr.Tab("βοΈ Licensing & Legal"):
|
| 143 |
+
gr.Markdown("""
|
| 144 |
+
## MagentaRT Licensing
|
| 145 |
+
|
| 146 |
+
This project uses Google's MagentaRT model under:
|
| 147 |
+
- **Source Code**: Apache License 2.0
|
| 148 |
+
- **Model Weights**: Creative Commons Attribution 4.0 International
|
| 149 |
+
- **Usage Terms**: [See MagentaRT repository](https://github.com/magenta/magenta-realtime)
|
| 150 |
+
|
| 151 |
+
### Key Requirements
|
| 152 |
+
- β
**Attribution**: Credit MagentaRT in derivative works
|
| 153 |
+
- β
**Responsible Use**: Don't infringe copyrights
|
| 154 |
+
- β
**No Warranties**: Use at your own risk
|
| 155 |
+
- β
**Patent License**: Explicit patent grants included
|
| 156 |
+
|
| 157 |
+
## Our Implementation
|
| 158 |
+
- **Purpose**: Research and development only
|
| 159 |
+
- **Non-Commercial**: Experimental iOS app development
|
| 160 |
+
- **Open Source**: Will release implementation under Apache 2.0
|
| 161 |
+
- **Attribution**: Proper credit to Google Research team
|
| 162 |
+
|
| 163 |
+
### Required Attribution
|
| 164 |
+
```
|
| 165 |
+
Generated using MagentaRT
|
| 166 |
+
Copyright 2024 Google LLC
|
| 167 |
+
Licensed under Apache 2.0 and CC-BY 4.0
|
| 168 |
+
Implementation for research purposes
|
| 169 |
+
```
|
| 170 |
+
""")
|
| 171 |
+
|
| 172 |
+
with gr.Tab("π Performance & Limits"):
|
| 173 |
+
gr.Markdown("""
|
| 174 |
+
## Current Performance (L40S 48GB)
|
| 175 |
+
|
| 176 |
+
### β‘ Single Generation
|
| 177 |
+
- **4 bars @ 100 BPM**: ~9 seconds
|
| 178 |
+
- **8 bars @ 100 BPM**: ~16 seconds
|
| 179 |
+
- **Memory usage**: ~40GB VRAM during generation
|
| 180 |
+
|
| 181 |
+
### π Continuous Jamming
|
| 182 |
+
- **Warmup**: ~10-15 seconds first chunk
|
| 183 |
+
- **8-bar chunks @ 120 BPM**: Real-time delivery
|
| 184 |
+
- **Buffer ahead**: 3-5 chunks for smooth playback
|
| 185 |
+
|
| 186 |
+
## Known Limitations
|
| 187 |
+
|
| 188 |
+
### π΅ Model Limitations (MagentaRT)
|
| 189 |
+
- **Context**: 10-second maximum memory
|
| 190 |
+
- **Training**: Primarily Western instrumental music
|
| 191 |
+
- **Vocals**: Non-lexical only, no lyric conditioning
|
| 192 |
+
- **Structure**: No long-form song arrangement
|
| 193 |
+
- **Inside Swift**: After a few turns of continuous chunks, the swift app works best if you restart the jam from the combined audio again. In this way you might end up with a real jam.
|
| 194 |
+
|
| 195 |
+
### π₯οΈ Infrastructure Limitations
|
| 196 |
+
- **Concurrency**: Single user jam sessions only
|
| 197 |
+
- **GPU Memory**: 40GB+ VRAM required for stable operation
|
| 198 |
+
- **Latency**: 2+ second minimum for style changes
|
| 199 |
+
- **Uptime**: Research setup, no SLA guarantees
|
| 200 |
+
|
| 201 |
+
## Resource Requirements
|
| 202 |
+
- **Minimum**: 24GB VRAM (basic operation, won't operate realtime enough for new chunks coming in)
|
| 203 |
+
- **Recommended**: 48GB VRAM (stable performance)
|
| 204 |
+
- **CPU**: 8+ cores
|
| 205 |
+
- **System RAM**: 32GB+
|
| 206 |
+
- **Storage**: 50GB+ for model weights
|
| 207 |
+
""")
|
| 208 |
+
|
| 209 |
+
gr.Markdown("""
|
| 210 |
+
---
|
| 211 |
+
|
| 212 |
+
**π¬ Research Project** | **π± iOS Development** | **π΅ Powered by MagentaRT**
|
| 213 |
+
|
| 214 |
+
This API is part of ongoing research into AI-assisted music creation for mobile devices.
|
| 215 |
+
For technical details, see the API documentation tabs above.
|
| 216 |
+
""")
|
| 217 |
+
|
| 218 |
+
return interface
|
| 219 |
+
|
| 220 |
jam_registry: dict[str, JamWorker] = {}
|
| 221 |
jam_lock = threading.Lock()
|
| 222 |
|
|
|
|
| 633 |
|
| 634 |
@app.get("/health")
|
| 635 |
def health():
|
| 636 |
+
return {"ok": True}
|
| 637 |
+
|
| 638 |
+
@app.get("/", response_class=Response)
|
| 639 |
+
def read_root():
|
| 640 |
+
"""Root endpoint that explains what this API does"""
|
| 641 |
+
html_content = """
|
| 642 |
+
<!DOCTYPE html>
|
| 643 |
+
<html>
|
| 644 |
+
<head><title>MagentaRT Research API</title></head>
|
| 645 |
+
<body style="font-family: Arial; max-width: 800px; margin: 50px auto; padding: 20px;">
|
| 646 |
+
<h1>π΅ MagentaRT Research API</h1>
|
| 647 |
+
<p><strong>Purpose:</strong> AI music generation for iOS app research using Google's MagentaRT</p>
|
| 648 |
+
<h2>Available Endpoints:</h2>
|
| 649 |
+
<ul>
|
| 650 |
+
<li><code>POST /generate</code> - Generate 4-8 bars of music</li>
|
| 651 |
+
<li><code>POST /jam/start</code> - Start continuous jamming</li>
|
| 652 |
+
<li><code>GET /jam/next</code> - Get next chunk</li>
|
| 653 |
+
<li><code>GET /jam/consume</code> - confirm a chunk as consumed</li>
|
| 654 |
+
<li><code>POST /jam/stop</code> - End session</li>
|
| 655 |
+
<li><code>GET /docs</code> - API documentation</li>
|
| 656 |
+
</ul>
|
| 657 |
+
<p><strong>Research Only:</strong> Experimental implementation for iOS app development.</p>
|
| 658 |
+
<p><strong>Licensing:</strong> Uses MagentaRT (Apache 2.0 + CC-BY 4.0). Users responsible for outputs.</p>
|
| 659 |
+
<p>Visit <a href="/docs">/docs</a> for detailed API documentation.</p>
|
| 660 |
+
</body>
|
| 661 |
+
</html>
|
| 662 |
+
"""
|
| 663 |
+
return Response(content=html_content, media_type="text/html")
|