Spaces:
Runtime error
Runtime error
interaction
Browse files
app.py
CHANGED
|
@@ -19,12 +19,13 @@ def datasets_explorer_viz(df):
|
|
| 19 |
s = ColumnDataSource(df)
|
| 20 |
TOOLTIPS= [("dataset_id", "@dataset_id"), ("task", "@task")]
|
| 21 |
color = factor_cmap('task', palette=Category20c_20, factors=df['task'].unique())
|
| 22 |
-
p = figure(plot_width=1000, plot_height=1000, tools="hover,wheel_zoom,pan,box_select",
|
| 23 |
-
p.scatter('x', 'y', size=
|
| 24 |
p.legend.location = "bottom_right"
|
| 25 |
-
|
| 26 |
p.legend.label_text_font_size="8pt"
|
| 27 |
table_source = ColumnDataSource(data=dict())
|
|
|
|
| 28 |
columns = [
|
| 29 |
# TableColumn(field="x", title="X data"),
|
| 30 |
# TableColumn(field="y", title="Y data"),
|
|
@@ -32,7 +33,7 @@ def datasets_explorer_viz(df):
|
|
| 32 |
TableColumn(field="dataset_id", title="Dataset ID"),
|
| 33 |
]
|
| 34 |
data_table = DataTable(source=table_source, columns=columns, width=300)
|
| 35 |
-
|
| 36 |
s.selected.js_on_change('indices', CustomJS(args=dict(umap_source=s, table_source=table_source), code="""
|
| 37 |
const inds = cb_obj.indices;
|
| 38 |
const tableData = table_source.data;
|
|
@@ -49,30 +50,44 @@ def datasets_explorer_viz(df):
|
|
| 49 |
table_source.change.emit();
|
| 50 |
"""
|
| 51 |
))
|
| 52 |
-
|
| 53 |
#text_input.on_change("value_input",
|
| 54 |
-
text_input.
|
| 55 |
-
const indices = [];
|
| 56 |
const plot_data = plot_source.data;
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
}
|
| 62 |
}
|
| 63 |
-
|
| 64 |
"""))
|
| 65 |
-
|
| 66 |
st.bokeh_chart(row(column(text_input,p), data_table))
|
| 67 |
|
| 68 |
|
| 69 |
if __name__ == "__main__":
|
| 70 |
### STREAMLIT APP CONGFIG ###
|
| 71 |
st.set_page_config(layout="wide", page_title="Datasets Explorer")
|
| 72 |
-
|
| 73 |
#lcol, rcol = st.columns([2, 2])
|
| 74 |
# ******* loading the mode and the data
|
| 75 |
|
| 76 |
### LOAD DATA AND SESSION VARIABLES ###
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
datasets_df = pd.read_parquet('./assets/data/datasets_df.parquet')
|
|
|
|
| 78 |
datasets_explorer_viz(datasets_df)
|
|
|
|
| 19 |
s = ColumnDataSource(df)
|
| 20 |
TOOLTIPS= [("dataset_id", "@dataset_id"), ("task", "@task")]
|
| 21 |
color = factor_cmap('task', palette=Category20c_20, factors=df['task'].unique())
|
| 22 |
+
p = figure(plot_width=1000, plot_height=1000, tools="hover,wheel_zoom,pan,box_select", tooltips=TOOLTIPS, toolbar_location="above")
|
| 23 |
+
p.scatter('x', 'y', size=5, source=s, alpha=0.8,marker='circle',fill_color = color, line_color=color, legend_field = 'task')
|
| 24 |
p.legend.location = "bottom_right"
|
| 25 |
+
p.legend.click_policy="mute"
|
| 26 |
p.legend.label_text_font_size="8pt"
|
| 27 |
table_source = ColumnDataSource(data=dict())
|
| 28 |
+
selection_source = ColumnDataSource(data=dict())
|
| 29 |
columns = [
|
| 30 |
# TableColumn(field="x", title="X data"),
|
| 31 |
# TableColumn(field="y", title="Y data"),
|
|
|
|
| 33 |
TableColumn(field="dataset_id", title="Dataset ID"),
|
| 34 |
]
|
| 35 |
data_table = DataTable(source=table_source, columns=columns, width=300)
|
| 36 |
+
p.circle('x', 'y',source=selection_source, size=5, color= 'red')
|
| 37 |
s.selected.js_on_change('indices', CustomJS(args=dict(umap_source=s, table_source=table_source), code="""
|
| 38 |
const inds = cb_obj.indices;
|
| 39 |
const tableData = table_source.data;
|
|
|
|
| 50 |
table_source.change.emit();
|
| 51 |
"""
|
| 52 |
))
|
| 53 |
+
text_input = TextInput(value="", title="Search")
|
| 54 |
#text_input.on_change("value_input",
|
| 55 |
+
text_input.js_on_change('value', CustomJS(args=dict(plot_source=s, selection_source=selection_source), code="""
|
|
|
|
| 56 |
const plot_data = plot_source.data;
|
| 57 |
+
const selectData = selection_source.data
|
| 58 |
+
const value = cb_obj.value
|
| 59 |
+
|
| 60 |
+
selectData['x'] = []
|
| 61 |
+
selectData['y'] = []
|
| 62 |
+
selectData['dataset_id'] = []
|
| 63 |
+
selectData['task'] = []
|
| 64 |
+
|
| 65 |
+
for (var i = 0; i < plot_data['dataset_id'].length; i++) {
|
| 66 |
+
if (plot_data['dataset_id'][i].includes(value) || plot_data['task'][i].includes(value)) {
|
| 67 |
+
selectData['x'].push(plot_data['x'][i])
|
| 68 |
+
selectData['y'].push(plot_data['y'][i])
|
| 69 |
+
selectData['dataset_id'].push(plot_data['dataset_id'][i])
|
| 70 |
+
selectData['task'].push(plot_data['task'][i])
|
| 71 |
}
|
| 72 |
}
|
| 73 |
+
selection_source.change.emit()
|
| 74 |
"""))
|
| 75 |
+
|
| 76 |
st.bokeh_chart(row(column(text_input,p), data_table))
|
| 77 |
|
| 78 |
|
| 79 |
if __name__ == "__main__":
|
| 80 |
### STREAMLIT APP CONGFIG ###
|
| 81 |
st.set_page_config(layout="wide", page_title="Datasets Explorer")
|
| 82 |
+
st.title('Interactive Datasets Explorer')
|
| 83 |
#lcol, rcol = st.columns([2, 2])
|
| 84 |
# ******* loading the mode and the data
|
| 85 |
|
| 86 |
### LOAD DATA AND SESSION VARIABLES ###
|
| 87 |
+
with st.expander("How to interact with the plot:"):
|
| 88 |
+
st.markdown("* Each point in the plot represents a HF hub dataset categorized by their `task_id`.")
|
| 89 |
+
st.markdown("* Every dataset is emebdded using the [SPECTER](https://github.com/allenai/specter#advanced-training-your-own-model) embedding of its corresponding paper abstract.")
|
| 90 |
+
st.markdown("* You can either search for a dataset or drag and select to peek into the cluster content.")
|
| 91 |
datasets_df = pd.read_parquet('./assets/data/datasets_df.parquet')
|
| 92 |
+
st.warning("Hugging Face 🤗 Datasets Explorer")
|
| 93 |
datasets_explorer_viz(datasets_df)
|