Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import os | |
| from pii_transform.api.e2e import PiiTextProcessor | |
| from pii_extract.defs import FMT_CONFIG_PLUGIN | |
| examples = [] | |
| with open("examples.txt", "r") as f: | |
| examples = f.readlines() | |
| examples_truncated = [example[:50] + "..." for example in examples] | |
| language_choices = { | |
| "English": "en", | |
| "Italian": "it", | |
| "Spanish": "es", | |
| "Portuguese": "pt", | |
| "German": "de", | |
| "French": "fr", | |
| } | |
| language_code = "en" | |
| cache_dir = "/home/user/app/cache" | |
| os.makedirs(cache_dir, exist_ok=True) | |
| if os.path.isdir(cache_dir): | |
| gr.Info("Cache directory created at "+cache_dir) | |
| else: | |
| gr.Warning("Cache directory creation error") | |
| policy_help_string = """ | |
| Policies are defined as follows: | |
| 1. **Annotate** - replace the PII instance by a \<TYPE:VALUE\> string, i.e. include both the PII type and its value | |
| 2. **Redact** - all PII instances are replaced by a \<PII\> generic string | |
| 3. **Placeholder** - replace with a prototypical value | |
| 4. **Synthetic** - substitute with synthetic data | |
| For more information on the transformation policies, please refer to the guide [here](https://github.com/piisa/pii-transform/blob/main/doc/policies.md#pii-transformation-policies)""" | |
| header_string = """ | |
| ## [PIISA](https://privacyprotection.substack.com/p/towards-a-common-privacy-api-introducing) | |
| **PIISA** (Personally Identifiable Information Standard Architecture) is a set of tools to detect and remediate | |
| PII within large scale language data. It uses best of breed tools like [🤗 transformers](https://huggingface.co/docs/transformers/index) libraries, | |
| [spaCy](https://spacy.io/), regular expressions, [Faker](https://faker.readthedocs.io/en/master/) and [Presidio](https://microsoft.github.io/presidio/) | |
| to leverage best practices for effectively managing data privacy in accordance with your privacy policies. | |
| Important links: | |
| 1. [PIISA API docs](https://github.com/piisa/piisa) | |
| 2. [Blog](https://privacyprotection.substack.com/) | |
| 3. [LinkedIn](https://www.linkedin.com/company/piisa/) | |
| This demo uses the multi-lingual [wikineural model](https://huggingface.co/Babelscape/wikineural-multilingual-ner) from [Babelscape](https://huggingface.co/Babelscape). | |
| ### ▵ We're looking for any feedback and/or suggestions, so please open a new thread in the Discussions tab ▵ | |
| """ | |
| def change_language(language_selection): | |
| global language_code | |
| language_code = language_choices[language_selection] | |
| gr.Info(f"{language_selection} selected") | |
| def process(text, policy): | |
| # Create the object, defining the language to use and the policy | |
| # Further customization is possible by providing a config | |
| policy = policy.lower() | |
| if text == "": | |
| print("Empty text field") | |
| gr.Warning("No text present") | |
| return "" | |
| # Custom config to prevent loading of the Presidio plugin | |
| proc = PiiTextProcessor( | |
| lang=language_code, default_policy=policy, config="config.json" | |
| ) | |
| # Process a text buffer and get the transformed buffer | |
| outbuf = proc(text) | |
| return outbuf | |
| def get_full_example(idx): | |
| return examples[idx] | |
| with gr.Blocks() as demo: | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown(value=header_string) | |
| with gr.Column(scale=0, min_width=100): | |
| pass | |
| with gr.Column(scale=0, min_width=100): | |
| logo = gr.Image( | |
| "image.jpeg", | |
| height=100, | |
| width=100, | |
| show_label=False, | |
| show_download_button=False, | |
| show_share_button=False, | |
| mask_opacity=1.0, | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=2, min_width=400): | |
| text_original = gr.Textbox( | |
| label="Original Text", | |
| lines=13, | |
| placeholder="Enter the text you would like to analyze, or select from one of the examples below", | |
| ) | |
| with gr.Column(scale=0, min_width=25): | |
| pass | |
| with gr.Column(scale=0, min_width=150): | |
| gr.Markdown(value="""<p style="text-align: center;">Select Language</p>""") | |
| lang_picker = gr.Dropdown( | |
| choices=list(language_choices.keys()), | |
| label="", | |
| value=list(language_choices.keys())[0], | |
| type="value", | |
| container=False, | |
| ) | |
| lang_picker.select(change_language, inputs=lang_picker, outputs=None) | |
| gr.Markdown(value="""<p style="text-align: center;">Select Policy</p>""") | |
| annotate_btn = gr.Button(value="Annotate", variant="primary", size="sm") | |
| redact_btn = gr.Button(value="Redact", variant="primary", size="sm") | |
| anonymize_btn = gr.Button(value="Synthetic", variant="primary", size="sm") | |
| placeholder_btn = gr.Button( | |
| value="Placeholder", variant="primary", size="sm" | |
| ) | |
| with gr.Column(scale=0, min_width=25): | |
| pass | |
| with gr.Column( | |
| scale=2, | |
| min_width=400, | |
| ): | |
| text_modified = gr.TextArea( | |
| label="Transformed Text", | |
| lines=13, | |
| show_copy_button=True, | |
| interactive=False, | |
| ) | |
| annotate_btn.click( | |
| fn=process, inputs=[text_original, annotate_btn], outputs=text_modified | |
| ) | |
| redact_btn.click( | |
| fn=process, | |
| inputs=[ | |
| text_original, | |
| gr.Text(value="redact", visible=False), | |
| ], | |
| outputs=text_modified, | |
| ) | |
| anonymize_btn.click( | |
| fn=process, | |
| inputs=[ | |
| text_original, | |
| gr.Text(value="synthetic", visible=False), | |
| ], | |
| outputs=text_modified, | |
| ) | |
| placeholder_btn.click( | |
| fn=process, | |
| inputs=[ | |
| text_original, | |
| gr.Text(value="placeholder", visible=False), | |
| ], | |
| outputs=text_modified, | |
| ) | |
| with gr.Row(): | |
| example_selector = gr.Dropdown( | |
| examples_truncated, type="index", label="Examples" | |
| ) | |
| example_selector.select( | |
| get_full_example, inputs=example_selector, outputs=[text_original] | |
| ) | |
| with gr.Accordion(label="Help Panel", open=False): | |
| gr.Markdown(value=policy_help_string) | |
| demo.queue().launch() | |