Spaces:
Running
on
L40S
Running
on
L40S
Commit
·
46f6023
1
Parent(s):
80c639a
Fixing class names
Browse files
app.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
from collections.abc import Sequence
|
|
|
|
| 2 |
import random
|
| 3 |
from typing import Optional
|
| 4 |
|
|
@@ -85,10 +86,9 @@ detector_module = transformers.generation.BayesianDetectorModel.from_pretrained(
|
|
| 85 |
)
|
| 86 |
detector_module.to(_TORCH_DEVICE)
|
| 87 |
|
| 88 |
-
detector = transformers.generation.watermarking.
|
| 89 |
detector_module=detector_module,
|
| 90 |
logits_processor=logits_processor,
|
| 91 |
-
tokenizer=tokenizer,
|
| 92 |
)
|
| 93 |
|
| 94 |
|
|
@@ -114,7 +114,7 @@ def generate_outputs(
|
|
| 114 |
|
| 115 |
with gr.Blocks() as demo:
|
| 116 |
gr.Markdown(
|
| 117 |
-
'''
|
| 118 |
# Using SynthID Text in your Genreative AI projects
|
| 119 |
|
| 120 |
[SynthID][synthid] is a Google DeepMind technology that watermarks and
|
|
@@ -143,12 +143,63 @@ with gr.Blocks() as demo:
|
|
| 143 |
detectors, and the [SynthID Text documentaiton][raitk-synthid] for more on
|
| 144 |
how this technology works.
|
| 145 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 146 |
[raitk-synthid]: /responsible/docs/safeguards/synthid
|
| 147 |
[synthid]: https://deepmind.google/technologies/synthid/
|
| 148 |
[synthid-hf-config]: https://github.com/huggingface/transformers/blob/v4.46.0/src/transformers/generation/configuration_utils.py
|
| 149 |
[synthid-hf-detector]: https://github.com/huggingface/transformers/blob/v4.46.0/src/transformers/generation/watermarking.py
|
| 150 |
[synthid-hf-detector-e2e]: https://github.com/huggingface/transformers/blob/v4.46.0/examples/research_projects/synthid_text/detector_bayesian.py
|
| 151 |
[synthid-hf-logits-processor]: https://github.com/huggingface/transformers/blob/v4.46.0/src/transformers/generation/logits_process.py
|
|
|
|
| 152 |
'''
|
| 153 |
)
|
| 154 |
prompt_inputs = [
|
|
|
|
| 1 |
from collections.abc import Sequence
|
| 2 |
+
import json
|
| 3 |
import random
|
| 4 |
from typing import Optional
|
| 5 |
|
|
|
|
| 86 |
)
|
| 87 |
detector_module.to(_TORCH_DEVICE)
|
| 88 |
|
| 89 |
+
detector = transformers.generation.watermarking.SynthIDTextWatermarkDetector(
|
| 90 |
detector_module=detector_module,
|
| 91 |
logits_processor=logits_processor,
|
|
|
|
| 92 |
)
|
| 93 |
|
| 94 |
|
|
|
|
| 114 |
|
| 115 |
with gr.Blocks() as demo:
|
| 116 |
gr.Markdown(
|
| 117 |
+
f'''
|
| 118 |
# Using SynthID Text in your Genreative AI projects
|
| 119 |
|
| 120 |
[SynthID][synthid] is a Google DeepMind technology that watermarks and
|
|
|
|
| 143 |
detectors, and the [SynthID Text documentaiton][raitk-synthid] for more on
|
| 144 |
how this technology works.
|
| 145 |
|
| 146 |
+
## Getting started
|
| 147 |
+
|
| 148 |
+
Practically speaking, SynthID Text is a logits processor, applied to your
|
| 149 |
+
model's generation pipeline after [Top-K and Top-P][cloud-parameter-values],
|
| 150 |
+
that augments the model's logits using a pseudorandom _g_-function to encode
|
| 151 |
+
watermarking information in a way that balances generation quality with
|
| 152 |
+
watermark detectability. See the [paper][synthid-nature] for a complete
|
| 153 |
+
technical description of the algorithm and analyses of how different
|
| 154 |
+
configuration values affect performance.
|
| 155 |
+
|
| 156 |
+
Watermarks are [configured][synthid-hf-config] to parameterize the
|
| 157 |
+
_g_-function and how it is applied during generation. We use the following
|
| 158 |
+
configuration for all demos. It should not be used for any production
|
| 159 |
+
purposes.
|
| 160 |
+
|
| 161 |
+
```json
|
| 162 |
+
{json.dumps(_WATERMARK_CONFIG_DICT)}
|
| 163 |
+
```
|
| 164 |
+
|
| 165 |
+
Watermarks are applied by initializing a `SynthIDTextWatermarkingConfig`
|
| 166 |
+
and passing that as the `watermarking_config=` parameter in your call to
|
| 167 |
+
`.generate()`, as shown in the snippet below.
|
| 168 |
+
|
| 169 |
+
```python
|
| 170 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 171 |
+
from transformers.generation import SynthIDTextWatermarkingConfig
|
| 172 |
+
|
| 173 |
+
# Standard model and toeknizer initialization
|
| 174 |
+
tokenizer = AutoTokenizer.from_pretrained('repo/id')
|
| 175 |
+
model = AutoModelForCausalLM.from_pretrained('repo/id')
|
| 176 |
+
|
| 177 |
+
# SynthID Text configuration
|
| 178 |
+
watermarking_config = SynthIDTextWatermarkingConfig(...)
|
| 179 |
+
|
| 180 |
+
# Generation with watermarking
|
| 181 |
+
tokenized_prompts = tokenizer(["your prompts here"])
|
| 182 |
+
output_sequences = model.generate(
|
| 183 |
+
**tokenized_prompts,
|
| 184 |
+
watermarking_config=watermarking_config,
|
| 185 |
+
do_sample=True,
|
| 186 |
+
)
|
| 187 |
+
watermarked_text = tokenizer.batch_decode(output_sequences)
|
| 188 |
+
```
|
| 189 |
+
|
| 190 |
+
Enter up to three prompts then click the generate button. After you click,
|
| 191 |
+
[Gemma 2B][gemma] will generate a watermarked and non-watermarked repsonses
|
| 192 |
+
for each non-empty prompt.
|
| 193 |
+
|
| 194 |
+
[cloud-parameter-values]: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/prompts/adjust-parameter-values
|
| 195 |
+
[gemma]: https://huggingface.co/google/gemma-2b
|
| 196 |
[raitk-synthid]: /responsible/docs/safeguards/synthid
|
| 197 |
[synthid]: https://deepmind.google/technologies/synthid/
|
| 198 |
[synthid-hf-config]: https://github.com/huggingface/transformers/blob/v4.46.0/src/transformers/generation/configuration_utils.py
|
| 199 |
[synthid-hf-detector]: https://github.com/huggingface/transformers/blob/v4.46.0/src/transformers/generation/watermarking.py
|
| 200 |
[synthid-hf-detector-e2e]: https://github.com/huggingface/transformers/blob/v4.46.0/examples/research_projects/synthid_text/detector_bayesian.py
|
| 201 |
[synthid-hf-logits-processor]: https://github.com/huggingface/transformers/blob/v4.46.0/src/transformers/generation/logits_process.py
|
| 202 |
+
[synthid-nature]: https://www.nature.com/articles/s41586-024-08025-4
|
| 203 |
'''
|
| 204 |
)
|
| 205 |
prompt_inputs = [
|