Spaces:

google
/

synthid-text

Running on L40S

App Files Files Community

RyanMullins commited on Oct 15, 2024

Commit

2a04008

1 Parent(s): ff97c38

Adding generation with GPT-2 as a mock model

Browse files

Files changed (2) hide show

app.py +80 -6
requirements.txt +3 -0

app.py CHANGED Viewed

@@ -1,23 +1,94 @@
 from collections.abc import Sequence
 import random
 import gradio as gr
 # If the watewrmark is not detected, consider the use case. Could be because of
 # the nature of the task (e.g., fatcual responses are lower entropy) or it could
 # be another
-_GEMMA_2B = 'google/gemma-2b'
 _PROMPTS: tuple[str] = (
     'prompt 1',
     'prompt 2',
     'prompt 3',
-    'prompt 4',
 )
 _CORRECT_ANSWERS: dict[str, bool] = {}
 with gr.Blocks() as demo:
   prompt_inputs = [
       gr.Textbox(value=prompt, lines=4, label='Prompt')
@@ -43,14 +114,17 @@ with gr.Blocks() as demo:
     detect_btn = gr.Button('Detect', visible=False)
   def generate(*prompts):
-    standard = [f'{prompt} response' for prompt in prompts]
-    watermarked = [f'{prompt} watermarked response' for prompt in prompts]
     responses = standard + watermarked
     random.shuffle(responses)
     _CORRECT_ANSWERS.update({
-      response: response in watermarked
-      for response in responses
     })
     # Load model

 from collections.abc import Sequence
 import random
+from typing import Optional
 import gradio as gr
+import spaces
+import torch
+import transformers
 # If the watewrmark is not detected, consider the use case. Could be because of
 # the nature of the task (e.g., fatcual responses are lower entropy) or it could
 # be another
+_MODEL_IDENTIFIER = 'hf-internal-testing/tiny-random-gpt2'
 _PROMPTS: tuple[str] = (
     'prompt 1',
     'prompt 2',
     'prompt 3',
 )
 _CORRECT_ANSWERS: dict[str, bool] = {}
+_TORCH_DEVICE = (
+    torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
+)
+_WATERMARK_CONFIG = transformers.generation.SynthIDTextWatermarkingConfig(
+    ngram_len=5,
+    keys=[
+        654,
+        400,
+        836,
+        123,
+        340,
+        443,
+        597,
+        160,
+        57,
+        29,
+        590,
+        639,
+        13,
+        715,
+        468,
+        990,
+        966,
+        226,
+        324,
+        585,
+        118,
+        504,
+        421,
+        521,
+        129,
+        669,
+        732,
+        225,
+        90,
+        960,
+    ],
+    sampling_table_size=2**16,
+    sampling_table_seed=0,
+    context_history_size=1024,
+)
+tokenizer = transformers.AutoTokenizer.from_pretrained(_MODEL_IDENTIFIER)
+tokenizer.pad_token_id = tokenizer.eos_token_id
+model = transformers.AutoModelForCausalLM.from_pretrained(_MODEL_IDENTIFIER)
+model.to(_TORCH_DEVICE)
+@spaces.GPU
+def generate_outputs(
+  prompts: Sequence[str],
+  watermarking_config: Optional[
+      transformers.generation.SynthIDTextWatermarkingConfig
+  ] = None,
+) -> Sequence[str]:
+  tokenized_prompts = tokenizer(prompts, return_tensors='pt').to(_TORCH_DEVICE)
+  output_sequences = model.generate(
+      **tokenized_prompts,
+      watermarking_config=watermarking_config,
+      do_sample=True,
+      max_length=500,
+      top_k=40,
+  )
+  return tokenizer.batch_decode(output_sequences)
 with gr.Blocks() as demo:
   prompt_inputs = [
       gr.Textbox(value=prompt, lines=4, label='Prompt')
     detect_btn = gr.Button('Detect', visible=False)
   def generate(*prompts):
+    standard = generate_outputs(prompts=prompts)
+    watermarked = generate_outputs(
+        prompts=prompts,
+        watermarking_config=_WATERMARK_CONFIG,
+    )
     responses = standard + watermarked
     random.shuffle(responses)
     _CORRECT_ANSWERS.update({
+        response: response in watermarked
+        for response in responses
     })
     # Load model

requirements.txt CHANGED Viewed

@@ -1,3 +1,6 @@
 gradio
 spaces
 transformers @ git+https://github.com/sumedhghaisas2/transformers_private@synthid_text

 gradio
 spaces
 transformers @ git+https://github.com/sumedhghaisas2/transformers_private@synthid_text
+--extra-index-url https://download.pytorch.org/whl/cu113
+torch