Spaces:

gsarti
/

pecore

Running on Zero

App Files Files Community

gsarti commited on Mar 17, 2024

Commit

2574e16

1 Parent(s): dce322c

rug logo and codegen fixes

Browse files

Files changed (3) hide show

contents.py +2 -2
img/rug_logo_white_contour.png +0 -0
presets.py +48 -43

contents.py CHANGED Viewed

@@ -56,7 +56,7 @@ example_explanation = """
 </ol>
 <h2>Using PECoRe from Python with Inseq</h3>
 <p>This demo is useful for testing out various models and methods for PECoRe attribution, but the <a href="https://inseq.org/en/latest/main_classes/cli.html#attribute-context"><code>inseq attribute-context</code></a> CLI command is the way to go if you want to run experiments on several examples, or if you want to exploit the full customizability of the Inseq API.</p>
-<p>The utility we provide in this section allows you to generate Python and Shell code calling the Inseq CLI with the parameters you set in the interface. This is useful to understand how to use the Inseq API and quickly get up to speed with running PECoRe on your own models and data.</p>
 <p>Once you are satisfied with the parameters you set (including context/query strings in the <code>🐑 Demo</code> tab), just press the button and get your code snippets ready for usage! 🤗</p>
 """
@@ -107,7 +107,7 @@ If you use the Inseq implementation of PECoRe (<a href="https://inseq.org/en/lat
 powered_by = """<div class="footer-custom-block"><b>Powered by</b> <a href='https://github.com/inseq-team/inseq' target='_blank'><img src="file/img/inseq_logo_white_contour.png" width=150px /></a></div>"""
-support = """<div class="footer-custom-block"><b>With the support of</b> <a href='https://projects.illc.uva.nl/indeep/' target='_blank'><img src="file/img/indeep_logo_white_contour.png" width=120px /></a><a href='https://www.esciencecenter.nl/' target='_blank'><img src="file/img/escience_logo_white_contour.png" width=160px /></a></div>"""
 examples = [
     [

 </ol>
 <h2>Using PECoRe from Python with Inseq</h3>
 <p>This demo is useful for testing out various models and methods for PECoRe attribution, but the <a href="https://inseq.org/en/latest/main_classes/cli.html#attribute-context"><code>inseq attribute-context</code></a> CLI command is the way to go if you want to run experiments on several examples, or if you want to exploit the full customizability of the Inseq API.</p>
+<p>The utility we provide in this section allows you to generate Python and Shell code calling the Inseq CLI with the parameters you set in the interface. <b>We recommend using the Python version for repeated evaluation, since it allows for model-preloading.</b></p>
 <p>Once you are satisfied with the parameters you set (including context/query strings in the <code>🐑 Demo</code> tab), just press the button and get your code snippets ready for usage! 🤗</p>
 """
 powered_by = """<div class="footer-custom-block"><b>Powered by</b> <a href='https://github.com/inseq-team/inseq' target='_blank'><img src="file/img/inseq_logo_white_contour.png" width=150px /></a></div>"""
+support = """<div class="footer-custom-block"><b>Built by <a href="https://gsarti.com" target="_blank">Gabriele Sarti</a> with the support of</b> <a href='https://www.rug.nl/research/clcg/research/cl/' target='_blank'><img src="file/img/rug_logo_white_contour.png" width=170px /></a><a href='https://projects.illc.uva.nl/indeep/' target='_blank'><img src="file/img/indeep_logo_white_contour.png" width=100px /></a><a href='https://www.esciencecenter.nl/' target='_blank'><img src="file/img/escience_logo_white_contour.png" width=120px /></a></div>"""
 examples = [
     [

img/rug_logo_white_contour.png ADDED Viewed

presets.py CHANGED Viewed

@@ -105,9 +105,31 @@ def update_code_snippets_fn(
     generation_kwargs: str,
     attribution_kwargs: str,
 ) -> tuple[str, str]:
-    def get_kwargs_str(kwargs: str, name: str, pad: str = " " * 4) -> str:
         kwargs_dict = json.loads(kwargs)
         return nl + pad + name + '=' + str(kwargs_dict) + ',' if kwargs_dict else ''
     nl = "\n"
     tq = "\"\"\""
     # Python
@@ -117,59 +139,42 @@ from inseq.commands.attribute_context import attribute_context_with_model
 inseq_model = inseq.load_model(
     "{model_name_or_path}",
-    "{attribution_method}",{get_kwargs_str(model_kwargs, "model_kwargs")}{get_kwargs_str(tokenizer_kwargs, "tokenizer_kwargs")}
 )
 pecore_args = AttributeContextArgs(
-    save_path="pecore_output.json",
-    viz_path="pecore_output.html",
     model_name_or_path="{model_name_or_path}",
     attribution_method="{attribution_method}",
     attributed_fn="{attributed_fn}",
     context_sensitivity_metric="{context_sensitivity_metric}",
-    special_tokens_to_keep={special_tokens_to_keep},
-    context_sensitivity_std_threshold={context_sensitivity_std_threshold},
-    attribution_std_threshold={attribution_std_threshold},
-    input_current_text=\"\"\"{input_current_text}\"\"\",
-    input_template=\"\"\"{input_template}\"\"\",
-    output_template="{output_template}",
     contextless_input_current_text=\"\"\"{contextless_input_template}\"\"\",
     contextless_output_current_text=\"\"\"{contextless_output_template}\"\"\",
-    context_sensitivity_topk={context_sensitivity_topk if context_sensitivity_topk > 0 else None},
-    attribution_topk={attribution_topk if attribution_topk > 0 else None},
-    input_context_text={tq + input_context_text + tq if input_context_text else None},
-    output_context_text={tq + output_context_text + tq if output_context_text else None},
-    output_current_text={tq + output_current_text + tq if output_current_text else None},
-    decoder_input_output_separator={tq + decoder_input_output_separator + tq if decoder_input_output_separator else None},{get_kwargs_str(model_kwargs, "model_kwargs")}{get_kwargs_str(tokenizer_kwargs, "tokenizer_kwargs")}{get_kwargs_str(generation_kwargs, "generation_kwargs")}{get_kwargs_str(attribution_kwargs, "attribution_kwargs")}
 )
 out = attribute_context_with_model(pecore_args, loaded_model)"""
     # Bash
-    bash = f"""pip install inseq
-inseq attribute-context \\
-    --save-path pecore_output.json \\
-    --viz-path pecore_output.html \\
-    --model-name-or-path "{model_name_or_path}" \\
-    --attribution-method "{attribution_method}" \\
-    --attributed-fn "{attributed_fn}" \\
-    --context-sensitivity-metric "{context_sensitivity_metric}" \\
-    --special-tokens-to-keep {" ".join(special_tokens_to_keep)} \\
-    --context-sensitivity-std-threshold {context_sensitivity_std_threshold} \\
-    --attribution-std-threshold {attribution_std_threshold} \\
-    --input-current-text "{input_current_text}" \\
-    --input-template "{input_template}" \\
-    --output-template "{output_template}" \\
-    --contextless-input-current-text "{contextless_input_template}" \\
-    --contextless-output-current-text "{contextless_output_template}" \\
-    --context-sensitivity-topk {context_sensitivity_topk if context_sensitivity_topk > 0 else None} \\
-    --attribution-topk {attribution_topk if attribution_topk > 0 else None} \\
-    --input-context-text "{input_context_text}" \\
-    --output-context-text "{output_context_text}" \\
-    --output-current-text "{output_current_text}" \\
-    --decoder-input-output-separator "{decoder_input_output_separator}" \\
-    --model-kwargs "{str(model_kwargs).replace(nl, "")}" \\
-    --tokenizer-kwargs "{str(tokenizer_kwargs).replace(nl, "")} \\
-    --generation-kwargs "{str(generation_kwargs).replace(nl, "")}" \\
-    --attribution-kwargs "{str(attribution_kwargs).replace(nl, "")}"
-    """
     return python, bash

     generation_kwargs: str,
     attribution_kwargs: str,
 ) -> tuple[str, str]:
+    if not input_current_text:
+        input_current_text = "<MISSING INPUT CURRENT TEXT, REQUIRED>"
+    def py_get_kwargs_str(kwargs: str, name: str, pad: str = " " * 4) -> str:
         kwargs_dict = json.loads(kwargs)
         return nl + pad + name + '=' + str(kwargs_dict) + ',' if kwargs_dict else ''
+    def py_get_if_specified(arg: str | int | float | list | None, name: str, pad: str = " " * 4) -> str:
+        if arg is None or (isinstance(arg, (str, list)) and not arg) or (isinstance(arg, (int, float)) and arg <= 0):
+            return ""
+        elif isinstance(arg, str):
+            return nl + pad + name + "=" + tq + arg + tq + ","
+        elif isinstance(arg, list):
+            return nl + pad + name + "=" + str(arg) + ","
+        else:
+            return nl + pad + name + "=" + str(arg) + ","
+    def sh_get_kwargs_str(kwargs: str, name: str, pad: str = " " * 4) -> str:
+        return nl + pad + f"--{name} " + '"' + str(kwargs).replace("\n", "").replace('"', '\\"') + '"' + " \\\\" if json.loads(kwargs) else ''
+    def sh_get_if_specified(arg: str | int | float | list | None, name: str, pad: str = " " * 4) -> str:
+        if arg is None or (isinstance(arg, (str, list)) and not arg) or (isinstance(arg, (int, float)) and arg <= 0):
+            return ""
+        elif isinstance(arg, str):
+            return nl + pad + f"--{name} " + '"' + arg.replace('"', '\\"') + '"' + " \\\\"
+        elif isinstance(arg, list):
+            return nl + pad + f"--{name} " + " ".join(str(arg)) + " \\\\"
+        else:
+            return nl + pad + f"--{name} " + str(arg) + " \\\\"
     nl = "\n"
     tq = "\"\"\""
     # Python
 inseq_model = inseq.load_model(
     "{model_name_or_path}",
+    "{attribution_method}",{py_get_kwargs_str(model_kwargs, "model_kwargs")}{py_get_kwargs_str(tokenizer_kwargs, "tokenizer_kwargs")}
 )
 pecore_args = AttributeContextArgs(
     model_name_or_path="{model_name_or_path}",
     attribution_method="{attribution_method}",
     attributed_fn="{attributed_fn}",
     context_sensitivity_metric="{context_sensitivity_metric}",
+    context_sensitivity_std_threshold={context_sensitivity_std_threshold},{py_get_if_specified(context_sensitivity_topk, "context_sensitivity_topk")}
+    attribution_std_threshold={attribution_std_threshold},{py_get_if_specified(attribution_topk, "attribution_topk")}
+    input_current_text=\"\"\"{input_current_text}\"\"\",{py_get_if_specified(input_context_text, "input_context_text")}
     contextless_input_current_text=\"\"\"{contextless_input_template}\"\"\",
+    input_template=\"\"\"{input_template}\"\"\",{py_get_if_specified(output_current_text, "output_current_text")}{py_get_if_specified(output_context_text, "output_context_text")}
     contextless_output_current_text=\"\"\"{contextless_output_template}\"\"\",
+    output_template="{output_template}",{py_get_if_specified(special_tokens_to_keep, "special_tokens_to_keep")}{py_get_if_specified(decoder_input_output_separator, "decoder_input_output_separator")}
+    save_path="pecore_output.json",
+    viz_path="pecore_output.html",{py_get_kwargs_str(model_kwargs, "model_kwargs")}{py_get_kwargs_str(tokenizer_kwargs, "tokenizer_kwargs")}{py_get_kwargs_str(generation_kwargs, "generation_kwargs")}{py_get_kwargs_str(attribution_kwargs, "attribution_kwargs")}
 )
 out = attribute_context_with_model(pecore_args, loaded_model)"""
     # Bash
+    bash = f"""# pip install inseq
+inseq attribute-context \\\\
+    --model-name-or-path "{model_name_or_path}" \\\\
+    --attribution-method "{attribution_method}" \\\\
+    --attributed-fn "{attributed_fn}" \\\\
+    --context-sensitivity-metric "{context_sensitivity_metric}" \\\\
+    --context-sensitivity-std-threshold {context_sensitivity_std_threshold} \\\\{sh_get_if_specified(context_sensitivity_topk, "context-sensitivity-topk")}
+    --attribution-std-threshold {attribution_std_threshold} \\\\{sh_get_if_specified(attribution_topk, "attribution-topk")}
+    --input-current-text "{input_current_text}" \\\\{sh_get_if_specified(input_context_text, "input-context-text")}
+    --contextless-input-current-text "{contextless_input_template}" \\\\
+    --input-template "{input_template}" \\\\{sh_get_if_specified(output_current_text, "output-current-text")}{sh_get_if_specified(output_context_text, "output-context-text")}
+    --contextless-output-current-text "{contextless_output_template}" \\\\
+    --output-template "{output_template}" \\\\{sh_get_if_specified(special_tokens_to_keep, "special_tokens_to_keep")}{sh_get_if_specified(decoder_input_output_separator, "decoder-input-output-separator")}
+    --save-path pecore_output.json \\\\
+    --viz-path pecore_output.html \\\\{sh_get_kwargs_str(model_kwargs, "model-kwargs")}{sh_get_kwargs_str(tokenizer_kwargs, "tokenizer-kwargs")}{sh_get_kwargs_str(generation_kwargs, "generation-kwargs")}{sh_get_kwargs_str(attribution_kwargs, "attribution-kwargs")}
+"""
     return python, bash