Spaces:

google
/

synthid-text

Running on L40S

App Files Files Community

RyanMullins commited on Oct 23, 2024

Commit

80c639a

1 Parent(s): 2a04008

Committing in broken state for sharing with HF

Browse files

Files changed (2) hide show

app.py +77 -6
requirements.txt +1 -1

app.py CHANGED Viewed

@@ -11,7 +11,8 @@ import transformers
 # the nature of the task (e.g., fatcual responses are lower entropy) or it could
 # be another
-_MODEL_IDENTIFIER = 'hf-internal-testing/tiny-random-gpt2'
 _PROMPTS: tuple[str] = (
     'prompt 1',
@@ -25,7 +26,7 @@ _TORCH_DEVICE = (
     torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
 )
-_WATERMARK_CONFIG = transformers.generation.SynthIDTextWatermarkingConfig(
     ngram_len=5,
     keys=[
         654,
@@ -64,12 +65,32 @@ _WATERMARK_CONFIG = transformers.generation.SynthIDTextWatermarkingConfig(
     context_history_size=1024,
 )
 tokenizer = transformers.AutoTokenizer.from_pretrained(_MODEL_IDENTIFIER)
 tokenizer.pad_token_id = tokenizer.eos_token_id
 model = transformers.AutoModelForCausalLM.from_pretrained(_MODEL_IDENTIFIER)
 model.to(_TORCH_DEVICE)
 @spaces.GPU
 def generate_outputs(
@@ -86,10 +107,50 @@ def generate_outputs(
       max_length=500,
       top_k=40,
   )
   return tokenizer.batch_decode(output_sequences)
 with gr.Blocks() as demo:
   prompt_inputs = [
       gr.Textbox(value=prompt, lines=4, label='Prompt')
       for prompt in _PROMPTS
@@ -97,6 +158,11 @@ with gr.Blocks() as demo:
   generate_btn = gr.Button('Generate')
   with gr.Column(visible=False) as generations_col:
     generations_grp = gr.CheckboxGroup(
         label='All generations, in random order',
         info='Select the generations you think are watermarked!',
@@ -104,6 +170,11 @@ with gr.Blocks() as demo:
     reveal_btn = gr.Button('Reveal', visible=False)
   with gr.Column(visible=False) as detections_col:
     revealed_grp = gr.CheckboxGroup(
         label='Ground truth for all generations',
         info=(
@@ -160,10 +231,10 @@ with gr.Blocks() as demo:
         value.append(choice)
     return {
-      reveal_btn: gr.Button(visible=False),
-      detections_col: gr.Column(visible=True),
-      revealed_grp: gr.CheckboxGroup(choices=choices, value=value),
-      detect_btn: gr.Button(visible=True),
     }
   reveal_btn.click(

 # the nature of the task (e.g., fatcual responses are lower entropy) or it could
 # be another
+_MODEL_IDENTIFIER = 'google/gemma-2b'
+_DETECTOR_IDENTIFIER = 'gg-hf/detector_2b_1.0_demo'
 _PROMPTS: tuple[str] = (
     'prompt 1',
     torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
 )
+_WATERMARK_CONFIG_DICT = dict(
     ngram_len=5,
     keys=[
         654,
     context_history_size=1024,
 )
+_WATERMARK_CONFIG = transformers.generation.SynthIDTextWatermarkingConfig(
+    **_WATERMARK_CONFIG_DICT
+)
 tokenizer = transformers.AutoTokenizer.from_pretrained(_MODEL_IDENTIFIER)
 tokenizer.pad_token_id = tokenizer.eos_token_id
 model = transformers.AutoModelForCausalLM.from_pretrained(_MODEL_IDENTIFIER)
 model.to(_TORCH_DEVICE)
+logits_processor = transformers.generation.SynthIDTextWatermarkLogitsProcessor(
+    **_WATERMARK_CONFIG_DICT,
+    device=_TORCH_DEVICE,
+)
+detector_module = transformers.generation.BayesianDetectorModel.from_pretrained(
+    _DETECTOR_IDENTIFIER,
+)
+detector_module.to(_TORCH_DEVICE)
+detector = transformers.generation.watermarking.BayesianDetectorModel(
+    detector_module=detector_module,
+    logits_processor=logits_processor,
+    tokenizer=tokenizer,
+)
 @spaces.GPU
 def generate_outputs(
       max_length=500,
       top_k=40,
   )
+  detections = detector(output_sequences)
+  print(detections)
   return tokenizer.batch_decode(output_sequences)
 with gr.Blocks() as demo:
+  gr.Markdown(
+    '''
+    # Using SynthID Text in your Genreative AI projects
+    [SynthID][synthid] is a Google DeepMind technology that watermarks and
+    identifies AI-generated content by embedding digital watermarks directly
+    into AI-generated images, audio, text or video.
+    SynthID Text is an open source implementation of this technology available
+    in Hugging Face Transformers that has two major components:
+    *   A [logits processor][synthid-hf-logits-processor] that is
+        [configured][synthid-hf-config] on a per-model basis and activated when
+        calling `.generate()`; and
+    *   A [detector][synthid-hf-detector] trained to recognized watermarked text
+        generated by a specific model with a specific configuraiton.
+    This Space demonstrates:
+    1.  How to use SynthID Text to apply a watermark to text generated by your
+        model; and
+    1.  How to indetify that text using a ready-made detector.
+    Note that this detector is trained specifically fore this demonstration. You
+    should maintain a specific watermarking configuration for every model you
+    use and protect that configuration as you would any other secret. See the
+    [end-to-end guide][synthid-hf-detector-e2e] for more on training your own
+    detectors, and the [SynthID Text documentaiton][raitk-synthid] for more on
+    how this technology works.
+    [raitk-synthid]: /responsible/docs/safeguards/synthid
+    [synthid]: https://deepmind.google/technologies/synthid/
+    [synthid-hf-config]: https://github.com/huggingface/transformers/blob/v4.46.0/src/transformers/generation/configuration_utils.py
+    [synthid-hf-detector]: https://github.com/huggingface/transformers/blob/v4.46.0/src/transformers/generation/watermarking.py
+    [synthid-hf-detector-e2e]: https://github.com/huggingface/transformers/blob/v4.46.0/examples/research_projects/synthid_text/detector_bayesian.py
+    [synthid-hf-logits-processor]: https://github.com/huggingface/transformers/blob/v4.46.0/src/transformers/generation/logits_process.py
+    '''
+  )
   prompt_inputs = [
       gr.Textbox(value=prompt, lines=4, label='Prompt')
       for prompt in _PROMPTS
   generate_btn = gr.Button('Generate')
   with gr.Column(visible=False) as generations_col:
+    gr.Markdown(
+      '''
+      # SynthID: Tool
+      '''
+    )
     generations_grp = gr.CheckboxGroup(
         label='All generations, in random order',
         info='Select the generations you think are watermarked!',
     reveal_btn = gr.Button('Reveal', visible=False)
   with gr.Column(visible=False) as detections_col:
+    gr.Markdown(
+      '''
+      # SynthID: Tool
+      '''
+    )
     revealed_grp = gr.CheckboxGroup(
         label='Ground truth for all generations',
         info=(
         value.append(choice)
     return {
+        reveal_btn: gr.Button(visible=False),
+        detections_col: gr.Column(visible=True),
+        revealed_grp: gr.CheckboxGroup(choices=choices, value=value),
+        detect_btn: gr.Button(visible=True),
     }
   reveal_btn.click(

requirements.txt CHANGED Viewed

@@ -1,6 +1,6 @@
 gradio
 spaces
-transformers @ git+https://github.com/sumedhghaisas2/transformers_private@synthid_text
 --extra-index-url https://download.pytorch.org/whl/cu113
 torch

 gradio
 spaces
+transformers>=4.46.0
 --extra-index-url https://download.pytorch.org/whl/cu113
 torch