Spaces:
Running
on
T4
Running
on
T4
| import gradio as gr | |
| from transformers import AutoTokenizer | |
| bert_tokenizer = AutoTokenizer.from_pretrained('openai-community/gpt2') | |
| def display_next_step_tokens(sentence, step): | |
| return ( | |
| gr.Textbox.update(visible=(split_selection==LABEL_RECURSIVE)), | |
| gr.Radio.update(visible=(split_selection==LABEL_RECURSIVE)), | |
| ) | |
| with gr.Blocks(theme=gr.themes.Soft(text_size='lg', font=["monospace"], primary_hue=gr.themes.colors.green)) as demo: | |
| text = gr.Textbox(label="Your prompt to start decoding", value="Ok, I") | |
| with gr.Row(): | |
| split_selection = gr.Dropdown( | |
| choices=[ | |
| LABEL_TEXTSPLITTER, | |
| LABEL_RECURSIVE, | |
| ], | |
| value=LABEL_RECURSIVE, | |
| label="Method to split chunks π", | |
| ) | |
| separators_selection = gr.Textbox( | |
| elem_id="textbox_id", | |
| value=["\n\n", "\n", " ", ""], | |
| info="Separators used in RecursiveCharacterTextSplitter", | |
| show_label=False, # or set label to an empty string if you want to keep its space | |
| visible=True, | |
| ) | |
| separator_preset_selection = gr.Radio( | |
| ['Default', 'Python', 'Markdown'], | |
| label="Choose a preset", | |
| info="This will apply a specific set of separators to RecursiveCharacterTextSplitter.", | |
| visible=True, | |
| ) | |
| with gr.Row(): | |
| length_unit_selection = gr.Dropdown( | |
| choices=[ | |
| "Character count", | |
| "Token count (BERT tokens)", | |
| ], | |
| value="Character count", | |
| label="Length function", | |
| info="How should we measure our chunk lengths?", | |
| ) | |
| slider_count = gr.Slider( | |
| 50, 500, value=200, step=1, label="Chunk length π", info="In the chosen unit." | |
| ) | |
| chunk_overlap = gr.Slider( | |
| 0, 50, value=10, step=1, label="Overlap between chunks", info="In the chosen unit." | |
| ) | |
| out = gr.HighlightedText( | |
| label="Output", | |
| show_legend=True, | |
| show_label=False, | |
| color_map={'Overlap': '#DADADA'} | |
| ) | |
| split_selection.change( | |
| fn=change_split_selection, | |
| inputs=split_selection, | |
| outputs=[separators_selection, separator_preset_selection], | |
| ) | |
| separator_preset_selection.change( | |
| fn=change_preset_separators, | |
| inputs=separator_preset_selection, | |
| outputs=separators_selection, | |
| ) | |
| gr.on( | |
| [text.change, length_unit_selection.change, separators_selection.change, split_selection.change, slider_count.change, chunk_overlap.change], | |
| chunk, | |
| [text, slider_count, split_selection, separators_selection, length_unit_selection, chunk_overlap], | |
| outputs=out | |
| ) | |
| demo.load(chunk, inputs=[text, slider_count, split_selection, separators_selection, length_unit_selection, chunk_overlap], outputs=out) | |
| demo.launch() |