Spaces:
Running
on
T4
Running
on
T4
meg-huggingface
commited on
Commit
·
9c712b0
1
Parent(s):
6110073
Adding image and some code clean-up
Browse files- app.py +4 -0
- assets/voice_consent_gate_50.png +0 -0
- src/generate.py +27 -24
app.py
CHANGED
|
@@ -7,6 +7,8 @@ import src.process as process
|
|
| 7 |
|
| 8 |
global client
|
| 9 |
|
|
|
|
|
|
|
| 10 |
# TODO: Ideally, instead of the Client method we're using for an external voice cloning app, we use the .load() function and pass in arguments to it directly while displaying the developer's desired UI.
|
| 11 |
#chatterbox_space = gr.load("spaces/ResembleAI/Chatterbox")
|
| 12 |
# ------------------- UI printing functions -------------------
|
|
@@ -168,6 +170,8 @@ def clone_voice(audio_input, text_input, exaggeration_input, cfgw_input,
|
|
| 168 |
with gr.Blocks(title="Voice Consent Gate") as demo:
|
| 169 |
gr.Markdown("# Voice Consent Gate: Demo")
|
| 170 |
with gr.Row():
|
|
|
|
|
|
|
| 171 |
with gr.Column():
|
| 172 |
with gr.Accordion(
|
| 173 |
label="Click for further information on this demo",
|
|
|
|
| 7 |
|
| 8 |
global client
|
| 9 |
|
| 10 |
+
GATE_IMAGE_PATH = "./assets/voice_consent_gate_50.png"
|
| 11 |
+
|
| 12 |
# TODO: Ideally, instead of the Client method we're using for an external voice cloning app, we use the .load() function and pass in arguments to it directly while displaying the developer's desired UI.
|
| 13 |
#chatterbox_space = gr.load("spaces/ResembleAI/Chatterbox")
|
| 14 |
# ------------------- UI printing functions -------------------
|
|
|
|
| 170 |
with gr.Blocks(title="Voice Consent Gate") as demo:
|
| 171 |
gr.Markdown("# Voice Consent Gate: Demo")
|
| 172 |
with gr.Row():
|
| 173 |
+
with gr.Column():
|
| 174 |
+
gr.Image(GATE_IMAGE_PATH, interactive=False, show_download_button=False)
|
| 175 |
with gr.Column():
|
| 176 |
with gr.Accordion(
|
| 177 |
label="Click for further information on this demo",
|
assets/voice_consent_gate_50.png
ADDED
|
src/generate.py
CHANGED
|
@@ -10,6 +10,7 @@ sentences that users can read aloud to give informed consent for voice cloning.
|
|
| 10 |
|
| 11 |
Functions:
|
| 12 |
- _extract_llama_text(): Normalize the API output from the Llama demo.
|
|
|
|
| 13 |
- gen_sentence_llm(): Generate a consent sentence from the Llama model Space.
|
| 14 |
"""
|
| 15 |
|
|
@@ -41,14 +42,9 @@ def _extract_llama_text(result: Any) -> str:
|
|
| 41 |
meaningful text string it finds.
|
| 42 |
|
| 43 |
Parameters
|
| 44 |
-
|
| 45 |
-
result : Any
|
| 46 |
-
The raw output returned by `client.predict()`.
|
| 47 |
|
| 48 |
-
|
| 49 |
-
-------
|
| 50 |
-
str
|
| 51 |
-
Cleaned text output (may be empty string if extraction fails).
|
| 52 |
"""
|
| 53 |
if isinstance(result, str):
|
| 54 |
return result.strip()
|
|
@@ -74,8 +70,12 @@ def _extract_llama_text(result: Any) -> str:
|
|
| 74 |
|
| 75 |
def gen_sentence(consent_method="Llama 3.2 3B Instruct", voice_clone_model="Chatterbox"):
|
| 76 |
"""
|
| 77 |
-
Always
|
| 78 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
"""
|
| 80 |
try:
|
| 81 |
return gen_sentence_llm(consent_method, voice_clone_model)
|
|
@@ -94,26 +94,29 @@ def gen_sentence_llm(consent_method="Llama 3.2 3B Instruct", voice_clone_model="
|
|
| 94 |
|
| 95 |
The response is normalized into a single English sentence suitable
|
| 96 |
for reading aloud.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
The name of the voice-cloning model to mention in the sentence.
|
| 102 |
-
Defaults to "Chatterbox".
|
| 103 |
-
|
| 104 |
-
Returns
|
| 105 |
-
-------
|
| 106 |
-
str
|
| 107 |
-
A clean, human-readable consent sentence.
|
| 108 |
-
:param consent_method:
|
| 109 |
-
:param voice_clone_model:
|
| 110 |
"""
|
| 111 |
# Generate the full natural-language prompt that the LLM will receive
|
| 112 |
prompt = get_consent_generation_prompt(voice_clone_model)
|
|
|
|
|
|
|
| 113 |
|
| 114 |
try:
|
| 115 |
-
#
|
| 116 |
-
|
|
|
|
|
|
|
|
|
|
| 117 |
|
| 118 |
# The Llama demo exposes a simple /chat endpoint with standard decoding params
|
| 119 |
result = client.predict(
|
|
@@ -123,7 +126,7 @@ def gen_sentence_llm(consent_method="Llama 3.2 3B Instruct", voice_clone_model="
|
|
| 123 |
top_p=0.9,
|
| 124 |
top_k=50,
|
| 125 |
repetition_penalty=1.2,
|
| 126 |
-
api_name=
|
| 127 |
)
|
| 128 |
|
| 129 |
# Normalize and clean up model output
|
|
|
|
| 10 |
|
| 11 |
Functions:
|
| 12 |
- _extract_llama_text(): Normalize the API output from the Llama demo.
|
| 13 |
+
- gen_sentence(): Wrapper for gen_sentence_llm(); previously supported other options.
|
| 14 |
- gen_sentence_llm(): Generate a consent sentence from the Llama model Space.
|
| 15 |
"""
|
| 16 |
|
|
|
|
| 42 |
meaningful text string it finds.
|
| 43 |
|
| 44 |
Parameters
|
| 45 |
+
result : The raw output returned by `client.predict()`.
|
|
|
|
|
|
|
| 46 |
|
| 47 |
+
str : Cleaned text output (may be empty string if extraction fails).
|
|
|
|
|
|
|
|
|
|
| 48 |
"""
|
| 49 |
if isinstance(result, str):
|
| 50 |
return result.strip()
|
|
|
|
| 70 |
|
| 71 |
def gen_sentence(consent_method="Llama 3.2 3B Instruct", voice_clone_model="Chatterbox"):
|
| 72 |
"""
|
| 73 |
+
Always generates a sentence via the LLM.
|
| 74 |
+
Parameters
|
| 75 |
+
consent_method: str
|
| 76 |
+
The language model used to generate a consent sentence
|
| 77 |
+
voice_clone_model: str
|
| 78 |
+
The voice cloning model
|
| 79 |
"""
|
| 80 |
try:
|
| 81 |
return gen_sentence_llm(consent_method, voice_clone_model)
|
|
|
|
| 94 |
|
| 95 |
The response is normalized into a single English sentence suitable
|
| 96 |
for reading aloud.
|
| 97 |
+
Parameters
|
| 98 |
+
consent_method : str
|
| 99 |
+
The name of the language model used to generate the consent utterance.
|
| 100 |
+
Currently just implemented for Llama 3.2 3B Instruct.
|
| 101 |
+
audio_model_name : str
|
| 102 |
+
The name of the voice-cloning model to mention in the sentence.
|
| 103 |
+
Defaults to "Chatterbox".
|
| 104 |
|
| 105 |
+
Returns
|
| 106 |
+
str
|
| 107 |
+
A clean, human-readable consent sentence.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 108 |
"""
|
| 109 |
# Generate the full natural-language prompt that the LLM will receive
|
| 110 |
prompt = get_consent_generation_prompt(voice_clone_model)
|
| 111 |
+
space_id = LLAMA_SPACE_ID
|
| 112 |
+
api_name = LLAMA_API_NAME
|
| 113 |
|
| 114 |
try:
|
| 115 |
+
# Currently always true.
|
| 116 |
+
if consent_method != "Llama 3.2 3B Instruct":
|
| 117 |
+
print("Not currently implemented for %s; using Llama 3.2 3B Instruct" % consent_method)
|
| 118 |
+
# Initialize Gradio client for the language model Space
|
| 119 |
+
client = Client(space_id, hf_token=HF_TOKEN)
|
| 120 |
|
| 121 |
# The Llama demo exposes a simple /chat endpoint with standard decoding params
|
| 122 |
result = client.predict(
|
|
|
|
| 126 |
top_p=0.9,
|
| 127 |
top_k=50,
|
| 128 |
repetition_penalty=1.2,
|
| 129 |
+
api_name=api_name,
|
| 130 |
)
|
| 131 |
|
| 132 |
# Normalize and clean up model output
|