Commit
·
2b574d8
1
Parent(s):
d9e15de
✨ 添加高级参数设置和UI美化 - Apple简洁风格
Browse files🎛️ 高级参数功能:
1. **可折叠高级设置面板**
- 默认折叠状态,保持界面简洁
- 包含温度、top_k、top_p、重复惩罚等完整参数
- 实时参数调节,支持细粒度控制
2. **预设风格选择器**
- 轻松对话、新闻播报、娱乐节目、教育讲解
- 一键应用最佳参数组合
- 基于生成参数详解文档的推荐配置
3. **生成速度优化**
- 默认 max_new_tokens 从 2048 降至 1024
- 参数范围优化:512(快速) - 1024(平衡) - 2048(完整)
- GPU 持续时间保持 60 秒,提升响应速度
🎨 Apple 简洁设计:
1. **全新CSS样式系统**
- 采用 SF Pro Display 字体族
- 定义 CSS 变量:primary-color, surface-color 等
- Apple 风格的圆角、阴影和过渡效果
2. **UI组件优化**
- 卡片组件hover效果和微妙动画
- 统一的间距和字重设置
- 现代化的颜色系统和视觉层次
3. **交互体验提升**
- 预设风格变更时自动更新参数滑块
- 清晰的参数说明和使用提示
- 优化的按钮样式和状态反馈
🚀 技术实现:
- 参数传递:UI -> generate_dialogue_audio -> model.generation_config
- 动态配置:用户参数覆盖默认设置
- 兼容性:保持原有功能完整可用
现在用户可以通过简洁的Apple风格界面,精确控制语音生成效果!
app.py
CHANGED
|
@@ -257,9 +257,9 @@ def initialize_model():
|
|
| 257 |
|
| 258 |
# 设置稳定的生成参数,避免数值不稳定
|
| 259 |
try:
|
| 260 |
-
#
|
| 261 |
model.generation_config.max_new_tokens = min(
|
| 262 |
-
getattr(model.generation_config, "max_new_tokens",
|
| 263 |
)
|
| 264 |
|
| 265 |
# 使用文档推荐的"轻松对话风格"参数组合,确保数值稳定
|
|
@@ -295,6 +295,12 @@ def generate_dialogue_audio(
|
|
| 295 |
speaker2_audio: Optional[str],
|
| 296 |
speaker2_text: str,
|
| 297 |
use_normalize: bool,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 298 |
) -> Tuple[Optional[str], str]:
|
| 299 |
try:
|
| 300 |
if not dialogue_text or not dialogue_text.strip():
|
|
@@ -306,6 +312,15 @@ def generate_dialogue_audio(
|
|
| 306 |
|
| 307 |
# 初始化模型,显示进度
|
| 308 |
tokenizer, model, spt, device = initialize_model()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 309 |
|
| 310 |
# 根据输入拼装 item(process_batch 兼容单/双说话者)
|
| 311 |
item = {"text": dialogue_text}
|
|
@@ -387,30 +402,45 @@ def generate_dialogue_audio(
|
|
| 387 |
# =========================
|
| 388 |
|
| 389 |
def create_space_ui() -> gr.Blocks:
|
|
|
|
| 390 |
custom_css = """
|
|
|
|
| 391 |
.gradio-container {
|
| 392 |
max-width: 1400px !important;
|
| 393 |
margin: 0 auto !important;
|
| 394 |
-
font-family:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 395 |
}
|
|
|
|
| 396 |
.header {
|
| 397 |
text-align: center;
|
| 398 |
margin-bottom: 2rem;
|
| 399 |
-
background:
|
| 400 |
-
padding:
|
| 401 |
-
border-radius:
|
| 402 |
-
color:
|
| 403 |
-
box-shadow:
|
|
|
|
| 404 |
}
|
| 405 |
.header h1 {
|
| 406 |
-
font-size: 2.
|
| 407 |
-
margin
|
| 408 |
font-weight: 700;
|
|
|
|
|
|
|
| 409 |
}
|
| 410 |
.header p {
|
| 411 |
-
font-size: 1.
|
| 412 |
-
opacity: 0.9;
|
| 413 |
margin: 0;
|
|
|
|
|
|
|
| 414 |
}
|
| 415 |
.section {
|
| 416 |
background: #f8fafc;
|
|
@@ -581,11 +611,78 @@ def create_space_ui() -> gr.Blocks:
|
|
| 581 |
)
|
| 582 |
|
| 583 |
with gr.Group():
|
| 584 |
-
gr.Markdown("### ⚙️
|
| 585 |
with gr.Row():
|
| 586 |
use_normalize = gr.Checkbox(label="✅ 文本标准化(推荐)", value=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 587 |
btn_generate = gr.Button("🎬 开始合成", variant="primary", size="lg")
|
| 588 |
-
gr.Markdown("
|
| 589 |
|
| 590 |
# 右侧:输出与说明
|
| 591 |
with gr.Column(scale=2):
|
|
@@ -692,7 +789,33 @@ def create_space_ui() -> gr.Blocks:
|
|
| 692 |
gr.Error(f"❌ 加载默认音频时出错: {str(e)}")
|
| 693 |
return gr.update(), gr.update(), gr.update(), gr.update(), gr.update()
|
| 694 |
|
| 695 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 696 |
|
| 697 |
btn_load_scenario.click(
|
| 698 |
fn=on_load_scenario,
|
|
@@ -707,7 +830,10 @@ def create_space_ui() -> gr.Blocks:
|
|
| 707 |
|
| 708 |
btn_generate.click(
|
| 709 |
fn=generate_dialogue_audio,
|
| 710 |
-
inputs=[
|
|
|
|
|
|
|
|
|
|
| 711 |
outputs=[output_audio, status_info],
|
| 712 |
show_progress=True,
|
| 713 |
)
|
|
|
|
| 257 |
|
| 258 |
# 设置稳定的生成参数,避免数值不稳定
|
| 259 |
try:
|
| 260 |
+
# 优化生成长度,平衡质量与速度
|
| 261 |
model.generation_config.max_new_tokens = min(
|
| 262 |
+
getattr(model.generation_config, "max_new_tokens", 1024), 1024 # 减少默认长度,提升速度
|
| 263 |
)
|
| 264 |
|
| 265 |
# 使用文档推荐的"轻松对话风格"参数组合,确保数值稳定
|
|
|
|
| 295 |
speaker2_audio: Optional[str],
|
| 296 |
speaker2_text: str,
|
| 297 |
use_normalize: bool,
|
| 298 |
+
temperature: float = 1.0,
|
| 299 |
+
top_k: int = 50,
|
| 300 |
+
top_p: float = 0.9,
|
| 301 |
+
repetition_penalty: float = 1.1,
|
| 302 |
+
max_new_tokens: int = 2048,
|
| 303 |
+
do_sample: bool = True,
|
| 304 |
) -> Tuple[Optional[str], str]:
|
| 305 |
try:
|
| 306 |
if not dialogue_text or not dialogue_text.strip():
|
|
|
|
| 312 |
|
| 313 |
# 初始化模型,显示进度
|
| 314 |
tokenizer, model, spt, device = initialize_model()
|
| 315 |
+
|
| 316 |
+
# 应用用户选择的生成参数,覆盖默认设置
|
| 317 |
+
print(f"🎛️ 应用用户参数: temp={temperature}, top_k={top_k}, top_p={top_p}, penalty={repetition_penalty}")
|
| 318 |
+
model.generation_config.temperature = temperature
|
| 319 |
+
model.generation_config.top_k = top_k
|
| 320 |
+
model.generation_config.top_p = top_p
|
| 321 |
+
model.generation_config.repetition_penalty = repetition_penalty
|
| 322 |
+
model.generation_config.max_new_tokens = min(max_new_tokens, 4096) # 安全限制
|
| 323 |
+
model.generation_config.do_sample = do_sample
|
| 324 |
|
| 325 |
# 根据输入拼装 item(process_batch 兼容单/双说话者)
|
| 326 |
item = {"text": dialogue_text}
|
|
|
|
| 402 |
# =========================
|
| 403 |
|
| 404 |
def create_space_ui() -> gr.Blocks:
|
| 405 |
+
# Apple-inspired 简洁设计样式
|
| 406 |
custom_css = """
|
| 407 |
+
/* 全局样式 - Apple风格 */
|
| 408 |
.gradio-container {
|
| 409 |
max-width: 1400px !important;
|
| 410 |
margin: 0 auto !important;
|
| 411 |
+
font-family: -apple-system, BlinkMacSystemFont, 'SF Pro Display', system-ui, sans-serif !important;
|
| 412 |
+
background: #f5f5f7 !important;
|
| 413 |
+
--primary-color: #007AFF;
|
| 414 |
+
--surface-color: #ffffff;
|
| 415 |
+
--border-color: #d1d1d6;
|
| 416 |
+
--text-primary: #1d1d1f;
|
| 417 |
+
--text-secondary: #86868b;
|
| 418 |
+
--shadow: 0 4px 16px rgba(0,0,0,0.1);
|
| 419 |
+
--radius: 12px;
|
| 420 |
}
|
| 421 |
+
/* 主标题区域 */
|
| 422 |
.header {
|
| 423 |
text-align: center;
|
| 424 |
margin-bottom: 2rem;
|
| 425 |
+
background: var(--surface-color);
|
| 426 |
+
padding: 3rem 2rem;
|
| 427 |
+
border-radius: var(--radius);
|
| 428 |
+
color: var(--text-primary);
|
| 429 |
+
box-shadow: var(--shadow);
|
| 430 |
+
border: 1px solid var(--border-color);
|
| 431 |
}
|
| 432 |
.header h1 {
|
| 433 |
+
font-size: 2.75rem;
|
| 434 |
+
margin: 0 0 0.5rem 0;
|
| 435 |
font-weight: 700;
|
| 436 |
+
letter-spacing: -0.02em;
|
| 437 |
+
color: var(--text-primary);
|
| 438 |
}
|
| 439 |
.header p {
|
| 440 |
+
font-size: 1.1rem;
|
|
|
|
| 441 |
margin: 0;
|
| 442 |
+
color: var(--text-secondary);
|
| 443 |
+
font-weight: 400;
|
| 444 |
}
|
| 445 |
.section {
|
| 446 |
background: #f8fafc;
|
|
|
|
| 611 |
)
|
| 612 |
|
| 613 |
with gr.Group():
|
| 614 |
+
gr.Markdown("### ⚙️ 基础设置")
|
| 615 |
with gr.Row():
|
| 616 |
use_normalize = gr.Checkbox(label="✅ 文本标准化(推荐)", value=True)
|
| 617 |
+
|
| 618 |
+
# 高级参数设置 - 可折叠
|
| 619 |
+
with gr.Accordion("🎛️ 高级参数设置", open=False):
|
| 620 |
+
gr.Markdown("**🎯 生成风格控制** - 根据需要调整参数以获得不同的语音风格")
|
| 621 |
+
|
| 622 |
+
# 预设风格选择
|
| 623 |
+
with gr.Row():
|
| 624 |
+
style_preset = gr.Dropdown(
|
| 625 |
+
label="🎨 预设风格",
|
| 626 |
+
choices=["轻松对话", "新闻播报", "娱乐节目", "教育讲解", "自定义"],
|
| 627 |
+
value="轻松对话",
|
| 628 |
+
interactive=True
|
| 629 |
+
)
|
| 630 |
+
|
| 631 |
+
gr.Markdown("**⚙️ 自定义参数** - 微调生成效果")
|
| 632 |
+
|
| 633 |
+
with gr.Row():
|
| 634 |
+
with gr.Column():
|
| 635 |
+
temperature = gr.Slider(
|
| 636 |
+
minimum=0.5,
|
| 637 |
+
maximum=1.5,
|
| 638 |
+
value=1.0,
|
| 639 |
+
step=0.1,
|
| 640 |
+
label="🌡️ 语气温度",
|
| 641 |
+
info="控制语气自然度 (0.5=稳定, 1.0=自然, 1.5=活泼)"
|
| 642 |
+
)
|
| 643 |
+
top_k = gr.Slider(
|
| 644 |
+
minimum=20,
|
| 645 |
+
maximum=100,
|
| 646 |
+
value=50,
|
| 647 |
+
step=10,
|
| 648 |
+
label="🔝 词汇多样性",
|
| 649 |
+
info="控制词汇选择范围"
|
| 650 |
+
)
|
| 651 |
+
with gr.Column():
|
| 652 |
+
top_p = gr.Slider(
|
| 653 |
+
minimum=0.7,
|
| 654 |
+
maximum=1.0,
|
| 655 |
+
value=0.9,
|
| 656 |
+
step=0.05,
|
| 657 |
+
label="🎯 表达流畅度",
|
| 658 |
+
info="控制表达的连贯性"
|
| 659 |
+
)
|
| 660 |
+
repetition_penalty = gr.Slider(
|
| 661 |
+
minimum=1.0,
|
| 662 |
+
maximum=1.3,
|
| 663 |
+
value=1.1,
|
| 664 |
+
step=0.05,
|
| 665 |
+
label="🔄 重复避免",
|
| 666 |
+
info="避免重复表达的强度"
|
| 667 |
+
)
|
| 668 |
+
|
| 669 |
+
with gr.Row():
|
| 670 |
+
max_new_tokens = gr.Slider(
|
| 671 |
+
minimum=512,
|
| 672 |
+
maximum=2048,
|
| 673 |
+
value=1024,
|
| 674 |
+
step=128,
|
| 675 |
+
label="📏 最大生成长度",
|
| 676 |
+
info="控制生成音频的长度 (512=快速, 1024=平衡, 2048=完整)"
|
| 677 |
+
)
|
| 678 |
+
do_sample = gr.Checkbox(
|
| 679 |
+
label="🎲 启用采样",
|
| 680 |
+
value=True,
|
| 681 |
+
info="关闭后使用确定性生成,更稳定但缺乏变化"
|
| 682 |
+
)
|
| 683 |
+
|
| 684 |
btn_generate = gr.Button("🎬 开始合成", variant="primary", size="lg")
|
| 685 |
+
gr.Markdown("💡 **开箱即用**: 页面已预填充默认内容,可直接合成 | **生成优化**: 预计20-40秒完成")
|
| 686 |
|
| 687 |
# 右侧:输出与说明
|
| 688 |
with gr.Column(scale=2):
|
|
|
|
| 789 |
gr.Error(f"❌ 加载默认音频时出错: {str(e)}")
|
| 790 |
return gr.update(), gr.update(), gr.update(), gr.update(), gr.update()
|
| 791 |
|
| 792 |
+
# 预设风格参数映射
|
| 793 |
+
STYLE_PRESETS = {
|
| 794 |
+
"轻松对话": {"temperature": 1.0, "top_k": 50, "top_p": 0.9, "repetition_penalty": 1.1},
|
| 795 |
+
"新闻播报": {"temperature": 0.8, "top_k": 30, "top_p": 0.85, "repetition_penalty": 1.05},
|
| 796 |
+
"娱乐节目": {"temperature": 1.2, "top_k": 80, "top_p": 0.95, "repetition_penalty": 1.15},
|
| 797 |
+
"教育讲解": {"temperature": 0.9, "top_k": 40, "top_p": 0.88, "repetition_penalty": 1.08},
|
| 798 |
+
"自定义": {"temperature": 1.0, "top_k": 50, "top_p": 0.9, "repetition_penalty": 1.1}
|
| 799 |
+
}
|
| 800 |
+
|
| 801 |
+
def on_style_preset_change(preset_name):
|
| 802 |
+
"""当预设风格改变时,自动更新参数滑块"""
|
| 803 |
+
if preset_name in STYLE_PRESETS:
|
| 804 |
+
params = STYLE_PRESETS[preset_name]
|
| 805 |
+
return (
|
| 806 |
+
gr.update(value=params["temperature"]),
|
| 807 |
+
gr.update(value=params["top_k"]),
|
| 808 |
+
gr.update(value=params["top_p"]),
|
| 809 |
+
gr.update(value=params["repetition_penalty"])
|
| 810 |
+
)
|
| 811 |
+
return gr.update(), gr.update(), gr.update(), gr.update()
|
| 812 |
+
|
| 813 |
+
# 绑定预设风格变更事件
|
| 814 |
+
style_preset.change(
|
| 815 |
+
fn=on_style_preset_change,
|
| 816 |
+
inputs=[style_preset],
|
| 817 |
+
outputs=[temperature, top_k, top_p, repetition_penalty]
|
| 818 |
+
)
|
| 819 |
|
| 820 |
btn_load_scenario.click(
|
| 821 |
fn=on_load_scenario,
|
|
|
|
| 830 |
|
| 831 |
btn_generate.click(
|
| 832 |
fn=generate_dialogue_audio,
|
| 833 |
+
inputs=[
|
| 834 |
+
dialogue_text, speaker1_audio, speaker1_text, speaker2_audio, speaker2_text,
|
| 835 |
+
use_normalize, temperature, top_k, top_p, repetition_penalty, max_new_tokens, do_sample
|
| 836 |
+
],
|
| 837 |
outputs=[output_audio, status_info],
|
| 838 |
show_progress=True,
|
| 839 |
)
|