Spaces:

google
/

synthid-text

Running on L40S

App Files Files Community

RyanMullins commited on Oct 23, 2024

Commit

46f6023

1 Parent(s): 80c639a

Fixing class names

Browse files

Files changed (1) hide show

app.py +54 -3

app.py CHANGED Viewed

@@ -1,4 +1,5 @@
 from collections.abc import Sequence
 import random
 from typing import Optional
@@ -85,10 +86,9 @@ detector_module = transformers.generation.BayesianDetectorModel.from_pretrained(
 )
 detector_module.to(_TORCH_DEVICE)
-detector = transformers.generation.watermarking.BayesianDetectorModel(
     detector_module=detector_module,
     logits_processor=logits_processor,
-    tokenizer=tokenizer,
 )
@@ -114,7 +114,7 @@ def generate_outputs(
 with gr.Blocks() as demo:
   gr.Markdown(
-    '''
     # Using SynthID Text in your Genreative AI projects
     [SynthID][synthid] is a Google DeepMind technology that watermarks and
@@ -143,12 +143,63 @@ with gr.Blocks() as demo:
     detectors, and the [SynthID Text documentaiton][raitk-synthid] for more on
     how this technology works.
     [raitk-synthid]: /responsible/docs/safeguards/synthid
     [synthid]: https://deepmind.google/technologies/synthid/
     [synthid-hf-config]: https://github.com/huggingface/transformers/blob/v4.46.0/src/transformers/generation/configuration_utils.py
     [synthid-hf-detector]: https://github.com/huggingface/transformers/blob/v4.46.0/src/transformers/generation/watermarking.py
     [synthid-hf-detector-e2e]: https://github.com/huggingface/transformers/blob/v4.46.0/examples/research_projects/synthid_text/detector_bayesian.py
     [synthid-hf-logits-processor]: https://github.com/huggingface/transformers/blob/v4.46.0/src/transformers/generation/logits_process.py
     '''
   )
   prompt_inputs = [

 from collections.abc import Sequence
+import json
 import random
 from typing import Optional
 )
 detector_module.to(_TORCH_DEVICE)
+detector = transformers.generation.watermarking.SynthIDTextWatermarkDetector(
     detector_module=detector_module,
     logits_processor=logits_processor,
 )
 with gr.Blocks() as demo:
   gr.Markdown(
+    f'''
     # Using SynthID Text in your Genreative AI projects
     [SynthID][synthid] is a Google DeepMind technology that watermarks and
     detectors, and the [SynthID Text documentaiton][raitk-synthid] for more on
     how this technology works.
+    ## Getting started
+    Practically speaking, SynthID Text is a logits processor, applied to your
+    model's generation pipeline after [Top-K and Top-P][cloud-parameter-values],
+    that augments the model's logits using a pseudorandom _g_-function to encode
+    watermarking information in a way that balances generation quality with
+    watermark detectability. See the [paper][synthid-nature] for a complete
+    technical description of the algorithm and analyses of how different
+    configuration values affect performance.
+    Watermarks are [configured][synthid-hf-config] to parameterize the
+    _g_-function and how it is applied during generation. We use the following
+    configuration for all demos. It should not be used for any production
+    purposes.
+    ```json
+    {json.dumps(_WATERMARK_CONFIG_DICT)}
+    ```
+    Watermarks are applied by initializing a `SynthIDTextWatermarkingConfig`
+    and passing that as the `watermarking_config=` parameter in your call to
+    `.generate()`, as shown in the snippet below.
+    ```python
+    from transformers import AutoModelForCausalLM, AutoTokenizer
+    from transformers.generation import SynthIDTextWatermarkingConfig
+    # Standard model and toeknizer initialization
+    tokenizer = AutoTokenizer.from_pretrained('repo/id')
+    model = AutoModelForCausalLM.from_pretrained('repo/id')
+    # SynthID Text configuration
+    watermarking_config = SynthIDTextWatermarkingConfig(...)
+    # Generation with watermarking
+    tokenized_prompts = tokenizer(["your prompts here"])
+    output_sequences = model.generate(
+        **tokenized_prompts,
+        watermarking_config=watermarking_config,
+        do_sample=True,
+    )
+    watermarked_text = tokenizer.batch_decode(output_sequences)
+    ```
+    Enter up to three prompts then click the generate button. After you click,
+    [Gemma 2B][gemma] will generate a watermarked and non-watermarked repsonses
+    for each non-empty prompt.
+    [cloud-parameter-values]: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/prompts/adjust-parameter-values
+    [gemma]: https://huggingface.co/google/gemma-2b
     [raitk-synthid]: /responsible/docs/safeguards/synthid
     [synthid]: https://deepmind.google/technologies/synthid/
     [synthid-hf-config]: https://github.com/huggingface/transformers/blob/v4.46.0/src/transformers/generation/configuration_utils.py
     [synthid-hf-detector]: https://github.com/huggingface/transformers/blob/v4.46.0/src/transformers/generation/watermarking.py
     [synthid-hf-detector-e2e]: https://github.com/huggingface/transformers/blob/v4.46.0/examples/research_projects/synthid_text/detector_bayesian.py
     [synthid-hf-logits-processor]: https://github.com/huggingface/transformers/blob/v4.46.0/src/transformers/generation/logits_process.py
+    [synthid-nature]: https://www.nature.com/articles/s41586-024-08025-4
     '''
   )
   prompt_inputs = [