Spaces:

nazneen
/

datasets-explorer

Runtime error

App Files Files Community

nazneen commited on May 23, 2022

Commit

bac7e94

1 Parent(s): 79de90d

interaction

Browse files

Files changed (1) hide show

app.py +29 -14

app.py CHANGED Viewed

@@ -19,12 +19,13 @@ def datasets_explorer_viz(df):
     s = ColumnDataSource(df)
     TOOLTIPS= [("dataset_id", "@dataset_id"), ("task", "@task")]
     color = factor_cmap('task', palette=Category20c_20, factors=df['task'].unique())
-    p = figure(plot_width=1000, plot_height=1000, tools="hover,wheel_zoom,pan,box_select", title="Dataset explorer", tooltips=TOOLTIPS, toolbar_location="above")
-    p.scatter('x', 'y', size=500, source=s, alpha=0.8,marker='circle',fill_color = color, line_color=color, legend_field = 'task')
     p.legend.location = "bottom_right"
-    #p.legend.click_policy="mute"
     p.legend.label_text_font_size="8pt"
     table_source = ColumnDataSource(data=dict())
     columns = [
         # TableColumn(field="x", title="X data"),
         # TableColumn(field="y", title="Y data"),
@@ -32,7 +33,7 @@ def datasets_explorer_viz(df):
         TableColumn(field="dataset_id", title="Dataset ID"),
     ]
     data_table = DataTable(source=table_source, columns=columns, width=300)
     s.selected.js_on_change('indices', CustomJS(args=dict(umap_source=s, table_source=table_source), code="""
             const inds = cb_obj.indices;
             const tableData = table_source.data;
@@ -49,30 +50,44 @@ def datasets_explorer_viz(df):
             table_source.change.emit();
     """
     ))
-    p.selection = ColumnDataSource(data=dict())
     #text_input.on_change("value_input",
-    text_input.value.js_on_change('value', CustomJS(args=dict(plot_source=s, text_input=text_input), code="""
-        const indices = [];
         const plot_data = plot_source.data;
-        for (var i = 0; i < plot_data['dataset_id'].length(); i++) {
-            console.log(plot_data['dataset_id'][i]);
-            if (plot_data['dataset_id'][i] == text_input.value || plot_data['task'][i] == text_input.value) {
-                indices.push(i)
             }
         }
-        plot_source.change.emit()
     """))
-    text_input = TextInput(value="", title="Search")
     st.bokeh_chart(row(column(text_input,p), data_table))
 if __name__ == "__main__":
     ### STREAMLIT APP CONGFIG ###
     st.set_page_config(layout="wide", page_title="Datasets Explorer")
     #lcol, rcol = st.columns([2, 2])
     # ******* loading the mode and the data
     ### LOAD DATA AND SESSION VARIABLES ###
     datasets_df = pd.read_parquet('./assets/data/datasets_df.parquet')
     datasets_explorer_viz(datasets_df)

     s = ColumnDataSource(df)
     TOOLTIPS= [("dataset_id", "@dataset_id"), ("task", "@task")]
     color = factor_cmap('task', palette=Category20c_20, factors=df['task'].unique())
+    p = figure(plot_width=1000, plot_height=1000, tools="hover,wheel_zoom,pan,box_select", tooltips=TOOLTIPS, toolbar_location="above")
+    p.scatter('x', 'y', size=5, source=s, alpha=0.8,marker='circle',fill_color = color, line_color=color, legend_field = 'task')
     p.legend.location = "bottom_right"
+    p.legend.click_policy="mute"
     p.legend.label_text_font_size="8pt"
     table_source = ColumnDataSource(data=dict())
+    selection_source = ColumnDataSource(data=dict())
     columns = [
         # TableColumn(field="x", title="X data"),
         # TableColumn(field="y", title="Y data"),
         TableColumn(field="dataset_id", title="Dataset ID"),
     ]
     data_table = DataTable(source=table_source, columns=columns, width=300)
+    p.circle('x', 'y',source=selection_source, size=5, color= 'red')
     s.selected.js_on_change('indices', CustomJS(args=dict(umap_source=s, table_source=table_source), code="""
             const inds = cb_obj.indices;
             const tableData = table_source.data;
             table_source.change.emit();
     """
     ))
+    text_input = TextInput(value="", title="Search")
     #text_input.on_change("value_input",
+    text_input.js_on_change('value', CustomJS(args=dict(plot_source=s, selection_source=selection_source), code="""
         const plot_data = plot_source.data;
+        const selectData = selection_source.data
+        const value = cb_obj.value
+        selectData['x'] = []
+        selectData['y'] = []
+        selectData['dataset_id'] = []
+        selectData['task'] = []
+        for (var i = 0; i < plot_data['dataset_id'].length; i++) {
+            if (plot_data['dataset_id'][i].includes(value) || plot_data['task'][i].includes(value)) {
+                selectData['x'].push(plot_data['x'][i])
+                selectData['y'].push(plot_data['y'][i])
+                selectData['dataset_id'].push(plot_data['dataset_id'][i])
+                selectData['task'].push(plot_data['task'][i])
             }
         }
+        selection_source.change.emit()
     """))
     st.bokeh_chart(row(column(text_input,p), data_table))
 if __name__ == "__main__":
     ### STREAMLIT APP CONGFIG ###
     st.set_page_config(layout="wide", page_title="Datasets Explorer")
+    st.title('Interactive Datasets Explorer')
     #lcol, rcol = st.columns([2, 2])
     # ******* loading the mode and the data
     ### LOAD DATA AND SESSION VARIABLES ###
+    with st.expander("How to interact with the plot:"):
+        st.markdown("* Each point in the plot represents a HF hub dataset categorized by their `task_id`.")
+        st.markdown("* Every dataset is emebdded using the [SPECTER](https://github.com/allenai/specter#advanced-training-your-own-model) embedding of its corresponding paper abstract.")
+        st.markdown("* You can either search for a dataset or drag and select to peek into the cluster content.")
     datasets_df = pd.read_parquet('./assets/data/datasets_df.parquet')
+    st.warning("Hugging Face 🤗 Datasets Explorer")
     datasets_explorer_viz(datasets_df)