Spaces:
Runtime error
Runtime error
Adding subtitle
Browse files
app.py
CHANGED
|
@@ -46,7 +46,7 @@ assert (
|
|
| 46 |
EXPORTS_REPOSITORY is not None
|
| 47 |
), "You need to set EXPORTS_REPOSITORY in your environment variables"
|
| 48 |
|
| 49 |
-
MAX_ROWS = int(os.getenv("MAX_ROWS", "
|
| 50 |
CHUNK_SIZE = int(os.getenv("CHUNK_SIZE", "2_000"))
|
| 51 |
DATASET_VIEWE_API_URL = "https://datasets-server.huggingface.co/"
|
| 52 |
DATASETS_TOPICS_ORGANIZATION = os.getenv(
|
|
@@ -311,11 +311,18 @@ def generate_topics(dataset, config, split, column, nested_column, plot_type):
|
|
| 311 |
all_topics, _ = base_model.transform(all_docs)
|
| 312 |
all_topics = np.array(all_topics)
|
| 313 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 314 |
topic_plot = (
|
| 315 |
base_model.visualize_document_datamap(
|
| 316 |
docs=all_docs,
|
| 317 |
reduced_embeddings=reduced_embeddings_array,
|
| 318 |
title=dataset,
|
|
|
|
| 319 |
width=800,
|
| 320 |
height=700,
|
| 321 |
arrowprops={
|
|
@@ -324,12 +331,11 @@ def generate_topics(dataset, config, split, column, nested_column, plot_type):
|
|
| 324 |
"linewidth": 0,
|
| 325 |
"fc": "#33333377",
|
| 326 |
},
|
| 327 |
-
dynamic_label_size=
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
# min_font_size=4,
|
| 333 |
)
|
| 334 |
if plot_type == "DataMapPlot"
|
| 335 |
else base_model.visualize_documents(
|
|
@@ -386,6 +392,10 @@ def generate_topics(dataset, config, split, column, nested_column, plot_type):
|
|
| 386 |
topic_names_array,
|
| 387 |
hover_text=all_docs,
|
| 388 |
title=dataset,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 389 |
enable_search=True,
|
| 390 |
# TODO: Export data to .arrow and also serve it
|
| 391 |
inline_data=True,
|
|
|
|
| 46 |
EXPORTS_REPOSITORY is not None
|
| 47 |
), "You need to set EXPORTS_REPOSITORY in your environment variables"
|
| 48 |
|
| 49 |
+
MAX_ROWS = int(os.getenv("MAX_ROWS", "8_000"))
|
| 50 |
CHUNK_SIZE = int(os.getenv("CHUNK_SIZE", "2_000"))
|
| 51 |
DATASET_VIEWE_API_URL = "https://datasets-server.huggingface.co/"
|
| 52 |
DATASETS_TOPICS_ORGANIZATION = os.getenv(
|
|
|
|
| 311 |
all_topics, _ = base_model.transform(all_docs)
|
| 312 |
all_topics = np.array(all_topics)
|
| 313 |
|
| 314 |
+
sub_title = (
|
| 315 |
+
f"Data map for the entire dataset ({limit} rows) using the column '{column}'"
|
| 316 |
+
if full_processing
|
| 317 |
+
else f"Data map for a sample of the dataset (first {limit} rows) using the column '{column}'"
|
| 318 |
+
)
|
| 319 |
+
|
| 320 |
topic_plot = (
|
| 321 |
base_model.visualize_document_datamap(
|
| 322 |
docs=all_docs,
|
| 323 |
reduced_embeddings=reduced_embeddings_array,
|
| 324 |
title=dataset,
|
| 325 |
+
sub_title=sub_title,
|
| 326 |
width=800,
|
| 327 |
height=700,
|
| 328 |
arrowprops={
|
|
|
|
| 331 |
"linewidth": 0,
|
| 332 |
"fc": "#33333377",
|
| 333 |
},
|
| 334 |
+
dynamic_label_size=True,
|
| 335 |
+
label_wrap_width=12,
|
| 336 |
+
label_over_points=True,
|
| 337 |
+
max_font_size=36,
|
| 338 |
+
min_font_size=4,
|
|
|
|
| 339 |
)
|
| 340 |
if plot_type == "DataMapPlot"
|
| 341 |
else base_model.visualize_documents(
|
|
|
|
| 392 |
topic_names_array,
|
| 393 |
hover_text=all_docs,
|
| 394 |
title=dataset,
|
| 395 |
+
sub_title=sub_title.replace(
|
| 396 |
+
"dataset",
|
| 397 |
+
f"<a href='https://huggingface.co/datasets/{dataset}/viewer/{config}/{split}' target='_blank'>dataset</a>",
|
| 398 |
+
),
|
| 399 |
enable_search=True,
|
| 400 |
# TODO: Export data to .arrow and also serve it
|
| 401 |
inline_data=True,
|