luna-playground

Runtime error

loubnabnl HF Staff commited on May 2, 2023

Commit

e212587

1 Parent(s): d471812

stylistic changes

- use monospace font for the prompt
- make code block larger
- hide advanced settings + add disclaimer
- update title and model formats

Left:
- improve examples
- remove eos token after this [issue](https://github.com/gradio-app/gradio/issues/3839) is fixed

Files changed (1) hide show

app.py +111 -93

app.py CHANGED Viewed

@@ -12,8 +12,6 @@ from share_btn import community_icon_html, loading_icon_html, share_js, share_bt
 HF_TOKEN = os.environ.get("HF_TOKEN", None)
 API_URL = os.environ.get("API_URL")
-with open("./HHH_prompt.txt", "r") as f:
-    HHH_PROMPT = f.read() + "\n\n"
 FIM_PREFIX = "<fim_prefix>"
 FIM_MIDDLE = "<fim_middle>"
@@ -23,39 +21,40 @@ FIM_INDICATOR = "<FILL_HERE>"
 FORMATS = """## Model formats
-The model is pretrained on code and in addition to the pure code data it is formatted with special tokens. E.g. prefixes specifying the source of the file or special tokens separating code from a commit message. See below:
-### Chat mode
-Chat mode prepends the [HHH prompt](https://gist.github.com/jareddk/2509330f8ef3d787fc5aaac67aab5f11#file-hhh_prompt-txt) from Anthropic to the request which conditions the model to be an assistant.
-### Prefixes
-Any combination of the three following prefixes can be found in pure code files:
 ```
 <reponame>REPONAME<filename>FILENAME<gh_stars>STARS\ncode<|endoftext|>
 ```
 STARS can be one of: 0, 1-10, 10-100, 100-1000, 1000+
-### Commits
 The commits data is formatted as follows:
 ```
 <commit_before>code<commit_msg>text<commit_after>code<|endoftext|>
 ```
-### Jupyter structure
-Jupyter notebooks were both trained in form of Python scripts as well as the following structured format:
 ```
 <start_jupyter><jupyter_text>text<jupyter_code>code<jupyter_output>output<jupyter_text>
 ```
-### Issues
 We also trained on GitHub issues using the following formatting:
 ```
 <issue_start><issue_comment>text<issue_comment>...<issue_closed>
 ```
-### Fill-in-the-middle
-Fill in the middle requires rearranging the model inputs. The playground does this for you - all you need is to specify where to fill:
 ```
 code before<FILL_HERE>code after
 ```
@@ -66,21 +65,30 @@ theme = gr.themes.Monochrome(
     secondary_hue="blue",
     neutral_hue="slate",
     radius_size=gr.themes.sizes.radius_sm,
-    font=[gr.themes.GoogleFont("Open Sans"), "ui-sans-serif", "system-ui", "sans-serif"],
 )
 client = Client(
-    API_URL, headers={"Authorization": f"Bearer {HF_TOKEN}"},
 )
-def generate(prompt, temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0, chat_mode=False):
     temperature = float(temperature)
     if temperature < 1e-2:
         temperature = 1e-2
     top_p = float(top_p)
     fim_mode = False
     generate_kwargs = dict(
         temperature=temperature,
         max_new_tokens=max_new_tokens,
@@ -89,16 +97,7 @@ def generate(prompt, temperature=0.9, max_new_tokens=256, top_p=0.95, repetition
         do_sample=True,
         seed=42,
     )
-    if chat_mode:
-        generate_kwargs.update({"stop_sequences": ["\nHuman", "\n-----"]})
-    if chat_mode and FIM_INDICATOR in prompt:
-        raise ValueError("Chat mode and FIM are mutually exclusive. Choose one or the other.")
-    if chat_mode:
-        chat_prompt = "Human: " + prompt + "\n\nAssistant:"
-        prompt = HHH_PROMPT + chat_prompt
     if FIM_INDICATOR in prompt:
         fim_mode = True
         try:
@@ -111,17 +110,13 @@ def generate(prompt, temperature=0.9, max_new_tokens=256, top_p=0.95, repetition
     if fim_mode:
         output = prefix
-    elif chat_mode:
-        output = chat_prompt
     else:
         output = prompt
     previous_token = ""
     for response in stream:
-        if fim_mode and response.token.text =="<|endoftext|>":
             output += (suffix + "\n" + response.token.text)
-        elif chat_mode and response.token.text in ["Human", "-----"] and previous_token=="\n":
-            return output
         else:
             output += response.token.text
         previous_token = response.token.text
@@ -133,7 +128,7 @@ examples = [
     "def print_hello_world():",
     'def fibonacci(n: int) -> int:\n    """ Compute the n-th Fibonacci number. """',
     'from typing import List, Tuple\n\ndef sum_and_product(numbers: List[int]) -> Tuple[int, int]:\n    """ Return the sum and the product of the integers in the list as a tuple. Here is the answer of the exercise"""',
-    "class ComplexNumbers:"
 ]
@@ -142,29 +137,92 @@ def process_example(args):
         pass
     return x
-css = ".generating {visibility: hidden}" + share_btn_css
-with gr.Blocks(theme=theme, analytics_enabled=False, css=css) as demo:
-    with gr.Column():
-        gr.Markdown(
-            """\
-# BigCode - Playground
-_Note:_ this is an internal playground - please do not share. The deployment can also change and thus the space not work as we continue development.\
 """
-        )
         with gr.Row():
-            with gr.Column(scale=3):
-                instruction = gr.Textbox(placeholder="Enter your prompt here", label="Prompt", elem_id="q-input")
                 submit = gr.Button("Generate", variant="primary")
-                output = gr.Code(elem_id="q-output")
                 with gr.Group(elem_id="share-btn-container"):
                     community_icon = gr.HTML(community_icon_html, visible=True)
                     loading_icon = gr.HTML(loading_icon_html, visible=True)
-                    share_button = gr.Button("Share to community", elem_id="share-btn", visible=True)
                 gr.Examples(
                     examples=examples,
                     inputs=[instruction],
@@ -174,50 +232,10 @@ _Note:_ this is an internal playground - please do not share. The deployment can
                 )
                 gr.Markdown(FORMATS)
-            with gr.Column(scale=1):
-                chat_mode = gr.Checkbox(
-                    value=False,
-                    label="Chat mode",
-                    info="Uses Anthropic's HHH prompt to turn the model into an assistant."
-                )
-                temperature = gr.Slider(
-                    label="Temperature",
-                    value=0.2,
-                    minimum=0.0,
-                    maximum=1.0,
-                    step=0.05,
-                    interactive=True,
-                    info="Higher values produce more diverse outputs",
-                )
-                max_new_tokens = gr.Slider(
-                    label="Max new tokens",
-                    value=256,
-                    minimum=0,
-                    maximum=8192,
-                    step=64,
-                    interactive=True,
-                    info="The maximum numbers of new tokens",
-                )
-                top_p = gr.Slider(
-                    label="Top-p (nucleus sampling)",
-                    value=0.90,
-                    minimum=0.0,
-                    maximum=1,
-                    step=0.05,
-                    interactive=True,
-                    info="Higher values sample more low-probability tokens",
-                )
-                repetition_penalty = gr.Slider(
-                    label="Repetition penalty",
-                    value=1.2,
-                    minimum=1.0,
-                    maximum=2.0,
-                    step=0.05,
-                    interactive=True,
-                    info="Penalize repeated tokens",
-                )
-    submit.click(generate, inputs=[instruction, temperature, max_new_tokens, top_p, repetition_penalty, chat_mode], outputs=[output])
-    # instruction.submit(generate, inputs=[instruction, temperature, max_new_tokens, top_p, repetition_penalty, chat_mode], outputs=[output])
     share_button.click(None, [], [], _js=share_js)
-demo.queue(concurrency_count=16).launch(debug=True)

 HF_TOKEN = os.environ.get("HF_TOKEN", None)
 API_URL = os.environ.get("API_URL")
 FIM_PREFIX = "<fim_prefix>"
 FIM_MIDDLE = "<fim_middle>"
 FORMATS = """## Model formats
+The model is pretrained on code and is formatted with special tokens in addition to the pure code data,
+such as prefixes specifying the source of the file or tokens separating code from a commit message.
+Use these templates to explore the model's capacities:
+### 1. Prefixes 🏷️
+For pure code files, use any combination of the following prefixes:
 ```
 <reponame>REPONAME<filename>FILENAME<gh_stars>STARS\ncode<|endoftext|>
 ```
 STARS can be one of: 0, 1-10, 10-100, 100-1000, 1000+
+### 2. Commits 💾
 The commits data is formatted as follows:
 ```
 <commit_before>code<commit_msg>text<commit_after>code<|endoftext|>
 ```
+### 3. Jupyter Notebooks 📓
+The model is trained on Jupyter notebooks as Python scripts and structured formats like:
 ```
 <start_jupyter><jupyter_text>text<jupyter_code>code<jupyter_output>output<jupyter_text>
 ```
+### 4. Issues 🐛
 We also trained on GitHub issues using the following formatting:
 ```
 <issue_start><issue_comment>text<issue_comment>...<issue_closed>
 ```
+### 5. Fill-in-the-middle 🧩
+Fill in the middle requires rearranging the model inputs. The playground handles this for you - all you need is to specify where to fill:
 ```
 code before<FILL_HERE>code after
 ```
     secondary_hue="blue",
     neutral_hue="slate",
     radius_size=gr.themes.sizes.radius_sm,
+    font=[
+        gr.themes.GoogleFont("Open Sans"),
+        "ui-sans-serif",
+        "system-ui",
+        "sans-serif",
+    ],
 )
 client = Client(
+    API_URL,
+    headers={"Authorization": f"Bearer {HF_TOKEN}"},
 )
+def generate(
+    prompt, temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0
+):
     temperature = float(temperature)
     if temperature < 1e-2:
         temperature = 1e-2
     top_p = float(top_p)
     fim_mode = False
     generate_kwargs = dict(
         temperature=temperature,
         max_new_tokens=max_new_tokens,
         do_sample=True,
         seed=42,
     )
     if FIM_INDICATOR in prompt:
         fim_mode = True
         try:
     if fim_mode:
         output = prefix
     else:
         output = prompt
     previous_token = ""
     for response in stream:
+        if fim_mode and response.token.text == "<|endoftext|>":
             output += (suffix + "\n" + response.token.text)
         else:
             output += response.token.text
         previous_token = response.token.text
     "def print_hello_world():",
     'def fibonacci(n: int) -> int:\n    """ Compute the n-th Fibonacci number. """',
     'from typing import List, Tuple\n\ndef sum_and_product(numbers: List[int]) -> Tuple[int, int]:\n    """ Return the sum and the product of the integers in the list as a tuple. Here is the answer of the exercise"""',
+    "class ComplexNumbers:",
 ]
         pass
     return x
+css = ".generating {visibility: hidden}"
+monospace_css = """
+#q-input textarea {
+    font-family: monospace, 'Consolas', Courier, monospace;
+}
+"""
+custom_output_css = """
+#q-output textarea {
+    min-height: 800px;
+}
 """
+css += share_btn_css + monospace_css + custom_output_css + ".gradio-container {color: black}"
+description = """
+<div style="text-align: center;">
+    <h1 style='color: black;'> 💫 StarCoder<span style='color: #e6b800;'> - </span>Playground</h1>
+    <p style='color: black;'>This is a demo to generate code with <a href="https://huggingface.co/bigcode/starcoder" style='color: #e6b800;'>StarCoder</a>, a 15B parameter model for code generation in 86 programming languages.</p>
+</div>
+"""
+disclaimer = """⚠️ **Intended Use**: this app and its [supporting model](https://huggingface.co/bigcode) are provided for demonstration purposes; not to serve as replacement for human expertise. For more details on the model's limitations in terms of factuality and biases, see the [model card.](hf.co/bigcode)"""
+with gr.Blocks(theme=theme, analytics_enabled=False, css=css) as demo:
+    with gr.Column():
+        gr.Markdown(description)
         with gr.Row():
+            with gr.Column():
+                instruction = gr.Textbox(
+                    placeholder="Enter your prompt here",
+                    label="Prompt",
+                    elem_id="q-input",
+                )
                 submit = gr.Button("Generate", variant="primary")
+                output = gr.Code(elem_id="q-output", lines=30)
+                with gr.Accordion("Advanced settings", open=False):
+                    with gr.Row():
+                        column_1, column_2 = gr.Column(), gr.Column()
+                        with column_1:
+                            temperature = gr.Slider(
+                                label="Temperature",
+                                value=0.2,
+                                minimum=0.0,
+                                maximum=1.0,
+                                step=0.05,
+                                interactive=True,
+                                info="Higher values produce more diverse outputs",
+                            )
+                            max_new_tokens = gr.Slider(
+                                label="Max new tokens",
+                                value=256,
+                                minimum=0,
+                                maximum=8192,
+                                step=64,
+                                interactive=True,
+                                info="The maximum numbers of new tokens",
+                            )
+                        with column_2:
+                            top_p = gr.Slider(
+                                label="Top-p (nucleus sampling)",
+                                value=0.90,
+                                minimum=0.0,
+                                maximum=1,
+                                step=0.05,
+                                interactive=True,
+                                info="Higher values sample more low-probability tokens",
+                            )
+                            repetition_penalty = gr.Slider(
+                                label="Repetition penalty",
+                                value=1.2,
+                                minimum=1.0,
+                                maximum=2.0,
+                                step=0.05,
+                                interactive=True,
+                                info="Penalize repeated tokens",
+                            )
+                gr.Markdown(disclaimer)
                 with gr.Group(elem_id="share-btn-container"):
                     community_icon = gr.HTML(community_icon_html, visible=True)
                     loading_icon = gr.HTML(loading_icon_html, visible=True)
+                    share_button = gr.Button(
+                        "Share to community", elem_id="share-btn", visible=True
+                    )
                 gr.Examples(
                     examples=examples,
                     inputs=[instruction],
                 )
                 gr.Markdown(FORMATS)
+    submit.click(
+        generate,
+        inputs=[instruction, temperature, max_new_tokens, top_p, repetition_penalty],
+        outputs=[output],
+    )
     share_button.click(None, [], [], _js=share_js)
+demo.queue(concurrency_count=16).launch(debug=True)