Spaces:
Running
Running
Add n_neighbors, min_dist, and metric options for UMAP.
Browse files
lynxkite-graph-analytics/src/lynxkite_graph_analytics/ml_ops.py
CHANGED
|
@@ -1,5 +1,7 @@
|
|
| 1 |
"""Operations for machine learning."""
|
| 2 |
|
|
|
|
|
|
|
| 3 |
import numpy as np
|
| 4 |
from . import core
|
| 5 |
from lynxkite.core import workspace
|
|
@@ -153,6 +155,24 @@ VIRIDIS = [
|
|
| 153 |
]
|
| 154 |
|
| 155 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
@op("View vectors", view="visualization")
|
| 157 |
def view_vectors(
|
| 158 |
bundle: core.Bundle,
|
|
@@ -160,15 +180,24 @@ def view_vectors(
|
|
| 160 |
table_name: str = "nodes",
|
| 161 |
vector_column: str = "",
|
| 162 |
label_column: str = "",
|
|
|
|
|
|
|
|
|
|
| 163 |
):
|
| 164 |
vec = np.stack(bundle.dfs[table_name][vector_column].to_numpy())
|
| 165 |
-
|
| 166 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
data = [[*p.tolist(), "", c.item()] for p, c in zip(proj, color)]
|
| 168 |
if label_column:
|
| 169 |
for i, row in enumerate(bundle.dfs[table_name][label_column]):
|
| 170 |
data[i][2] = row
|
| 171 |
-
size =
|
| 172 |
v = {
|
| 173 |
"title": {
|
| 174 |
"text": f"UMAP projection of {vector_column}",
|
|
|
|
| 1 |
"""Operations for machine learning."""
|
| 2 |
|
| 3 |
+
import enum
|
| 4 |
+
import functools
|
| 5 |
import numpy as np
|
| 6 |
from . import core
|
| 7 |
from lynxkite.core import workspace
|
|
|
|
| 155 |
]
|
| 156 |
|
| 157 |
|
| 158 |
+
class UMAPMetric(enum.Enum):
|
| 159 |
+
l1 = "l1"
|
| 160 |
+
cityblock = "cityblock"
|
| 161 |
+
taxicab = "taxicab"
|
| 162 |
+
manhattan = "manhattan"
|
| 163 |
+
euclidean = "euclidean"
|
| 164 |
+
l2 = "l2"
|
| 165 |
+
sqeuclidean = "sqeuclidean"
|
| 166 |
+
canberra = "canberra"
|
| 167 |
+
minkowski = "minkowski"
|
| 168 |
+
chebyshev = "chebyshev"
|
| 169 |
+
linf = "linf"
|
| 170 |
+
cosine = "cosine"
|
| 171 |
+
correlation = "correlation"
|
| 172 |
+
hellinger = "hellinger"
|
| 173 |
+
hamming = "hamming"
|
| 174 |
+
|
| 175 |
+
|
| 176 |
@op("View vectors", view="visualization")
|
| 177 |
def view_vectors(
|
| 178 |
bundle: core.Bundle,
|
|
|
|
| 180 |
table_name: str = "nodes",
|
| 181 |
vector_column: str = "",
|
| 182 |
label_column: str = "",
|
| 183 |
+
n_neighbors: int = 15,
|
| 184 |
+
min_dist: float = 0.1,
|
| 185 |
+
metric: UMAPMetric = UMAPMetric.euclidean,
|
| 186 |
):
|
| 187 |
vec = np.stack(bundle.dfs[table_name][vector_column].to_numpy())
|
| 188 |
+
umap = functools.partial(
|
| 189 |
+
cuml.manifold.umap.UMAP,
|
| 190 |
+
n_neighbors=n_neighbors,
|
| 191 |
+
min_dist=min_dist,
|
| 192 |
+
metric=metric.value,
|
| 193 |
+
)
|
| 194 |
+
proj = umap(n_components=2).fit_transform(vec)
|
| 195 |
+
color = umap(n_components=1).fit_transform(vec)
|
| 196 |
data = [[*p.tolist(), "", c.item()] for p, c in zip(proj, color)]
|
| 197 |
if label_column:
|
| 198 |
for i, row in enumerate(bundle.dfs[table_name][label_column]):
|
| 199 |
data[i][2] = row
|
| 200 |
+
size = 100 / len(data) ** 0.4
|
| 201 |
v = {
|
| 202 |
"title": {
|
| 203 |
"text": f"UMAP projection of {vector_column}",
|