Spaces:
Running
Running
Graph from molecule similarity.
Browse files- README.md +2 -1
- lynxkite-app/src/lynxkite_app/crdt.py +0 -1
- lynxkite-bio/README.md +3 -0
- lynxkite-bio/pyproject.toml +24 -0
- lynxkite-bio/src/lynxkite_bio/__init__.py +67 -0
- lynxkite-graph-analytics/src/lynxkite_graph_analytics/__init__.py +1 -1
- lynxkite-graph-analytics/src/lynxkite_graph_analytics/lynxkite_ops.py +4 -3
README.md
CHANGED
|
@@ -14,6 +14,7 @@ original LynxKite. The primary goals of this rewrite are:
|
|
| 14 |
- `lynxkite-graph-analytics`: Graph analytics plugin. The classical LynxKite experience!
|
| 15 |
- `lynxkite-pillow`: A simple example plugin.
|
| 16 |
- `lynxkite-lynxscribe`: A plugin for building and running LynxScribe applications.
|
|
|
|
| 17 |
- `docs`: User-facing documentation. It's shared between all packages.
|
| 18 |
|
| 19 |
## Development
|
|
@@ -25,7 +26,7 @@ uv venv
|
|
| 25 |
source .venv/bin/activate
|
| 26 |
uvx pre-commit install
|
| 27 |
# The [dev] tag is only needed if you intend on running tests
|
| 28 |
-
uv pip install -e lynxkite-core/[dev] -e lynxkite-app/[dev] -e lynxkite-graph-analytics/[dev] -e lynxkite-lynxscribe/ -e lynxkite-pillow-example/
|
| 29 |
```
|
| 30 |
|
| 31 |
This also builds the frontend, hopefully very quickly. To run it:
|
|
|
|
| 14 |
- `lynxkite-graph-analytics`: Graph analytics plugin. The classical LynxKite experience!
|
| 15 |
- `lynxkite-pillow`: A simple example plugin.
|
| 16 |
- `lynxkite-lynxscribe`: A plugin for building and running LynxScribe applications.
|
| 17 |
+
- `lynxkite-bio`: Bioinformatics additions for LynxKite Graph Analytics.
|
| 18 |
- `docs`: User-facing documentation. It's shared between all packages.
|
| 19 |
|
| 20 |
## Development
|
|
|
|
| 26 |
source .venv/bin/activate
|
| 27 |
uvx pre-commit install
|
| 28 |
# The [dev] tag is only needed if you intend on running tests
|
| 29 |
+
uv pip install -e lynxkite-core/[dev] -e lynxkite-app/[dev] -e lynxkite-graph-analytics/[dev] -e lynxkite-bio -e lynxkite-lynxscribe/ -e lynxkite-pillow-example/
|
| 30 |
```
|
| 31 |
|
| 32 |
This also builds the frontend, hopefully very quickly. To run it:
|
lynxkite-app/src/lynxkite_app/crdt.py
CHANGED
|
@@ -3,7 +3,6 @@
|
|
| 3 |
import asyncio
|
| 4 |
import contextlib
|
| 5 |
import enum
|
| 6 |
-
import pathlib
|
| 7 |
import fastapi
|
| 8 |
import os.path
|
| 9 |
import pycrdt
|
|
|
|
| 3 |
import asyncio
|
| 4 |
import contextlib
|
| 5 |
import enum
|
|
|
|
| 6 |
import fastapi
|
| 7 |
import os.path
|
| 8 |
import pycrdt
|
lynxkite-bio/README.md
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# LynxKite Bio
|
| 2 |
+
|
| 3 |
+
An expansion for `lynxkite-graph-analytics` that provides algorithms for biological applications.
|
lynxkite-bio/pyproject.toml
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[project]
|
| 2 |
+
name = "lynxkite-bio"
|
| 3 |
+
version = "0.1.0"
|
| 4 |
+
description = "Additional boxes for LynxKite Graph Analytics that add algorithms for biology."
|
| 5 |
+
readme = "README.md"
|
| 6 |
+
requires-python = ">=3.11"
|
| 7 |
+
dependencies = [
|
| 8 |
+
"fsspec>=2025.2.0",
|
| 9 |
+
"joblib>=1.4.2",
|
| 10 |
+
"lynxkite-core",
|
| 11 |
+
"lynxkite-graph-analytics",
|
| 12 |
+
"pandas>=2.2.3",
|
| 13 |
+
"rdkit>=2024.9.5",
|
| 14 |
+
"scipy>=1.15.2",
|
| 15 |
+
]
|
| 16 |
+
|
| 17 |
+
[project.optional-dependencies]
|
| 18 |
+
dev = [
|
| 19 |
+
"pytest>=8.3.4",
|
| 20 |
+
]
|
| 21 |
+
|
| 22 |
+
[tool.uv.sources]
|
| 23 |
+
lynxkite-core = { path = "../lynxkite-core" }
|
| 24 |
+
lynxkite-graph-analytics = { path = "../lynxkite-graph-analytics" }
|
lynxkite-bio/src/lynxkite_bio/__init__.py
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Graph analytics operations. To be split into separate files when we have more."""
|
| 2 |
+
|
| 3 |
+
from lynxkite_graph_analytics import Bundle, RelationDefinition
|
| 4 |
+
from lynxkite.core import ops
|
| 5 |
+
import joblib
|
| 6 |
+
import numpy as np
|
| 7 |
+
import pandas as pd
|
| 8 |
+
import rdkit.Chem
|
| 9 |
+
import rdkit.Chem.rdFingerprintGenerator
|
| 10 |
+
import rdkit.Chem.Fingerprints.ClusterMols
|
| 11 |
+
import scipy
|
| 12 |
+
|
| 13 |
+
mem = joblib.Memory("../joblib-cache")
|
| 14 |
+
ENV = "LynxKite Graph Analytics"
|
| 15 |
+
op = ops.op_registration(ENV)
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
@op("Parse SMILES")
|
| 19 |
+
def parse_smiles(bundle: Bundle, *, table="df", smiles_column="SMILES", save_as="mols"):
|
| 20 |
+
"""Parse SMILES strings into RDKit molecules."""
|
| 21 |
+
df = bundle.dfs[table]
|
| 22 |
+
mols = [rdkit.Chem.MolFromSmiles(smiles) for smiles in df[smiles_column].dropna()]
|
| 23 |
+
mols = [mol for mol in mols if mol is not None]
|
| 24 |
+
bundle = bundle.copy()
|
| 25 |
+
bundle.dfs[table] = df.assign(**{save_as: mols})
|
| 26 |
+
return bundle
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def _get_similarity_matrix(mols):
|
| 30 |
+
mfpgen = rdkit.Chem.rdFingerprintGenerator.GetMorganGenerator(radius=2, fpSize=2048)
|
| 31 |
+
fps = [(0, mfpgen.GetFingerprint(mol)) for mol in mols]
|
| 32 |
+
similarity_matrix = rdkit.Chem.Fingerprints.ClusterMols.GetDistanceMatrix(
|
| 33 |
+
fps, metric=rdkit.Chem.DataStructs.TanimotoSimilarity, isSimilarity=1
|
| 34 |
+
)
|
| 35 |
+
return scipy.spatial.distance.squareform(similarity_matrix)
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
@op("Graph from molecule similarity")
|
| 39 |
+
def graph_from_similarity(
|
| 40 |
+
bundle: Bundle, *, table="df", mols_column="mols", average_degree=10
|
| 41 |
+
):
|
| 42 |
+
df = bundle.dfs[table]
|
| 43 |
+
mols = df[mols_column]
|
| 44 |
+
similarity_matrix = _get_similarity_matrix(mols)
|
| 45 |
+
i_idx, j_idx = np.triu_indices_from(similarity_matrix, k=1)
|
| 46 |
+
sim_values = similarity_matrix[i_idx, j_idx]
|
| 47 |
+
N = int(average_degree * len(mols))
|
| 48 |
+
top_n_idx = np.argsort(sim_values)[-N:]
|
| 49 |
+
top_n_pairs = [(i_idx[k], j_idx[k], sim_values[k]) for k in top_n_idx]
|
| 50 |
+
edges = pd.DataFrame(top_n_pairs, columns=["source", "target", "similarity"])
|
| 51 |
+
nodes = df.copy()
|
| 52 |
+
nodes.index.name = "id"
|
| 53 |
+
bundle = Bundle(
|
| 54 |
+
dfs={"edges": edges, "nodes": nodes},
|
| 55 |
+
relations=[
|
| 56 |
+
RelationDefinition(
|
| 57 |
+
df="edges",
|
| 58 |
+
source_column="source",
|
| 59 |
+
target_column="target",
|
| 60 |
+
source_table="nodes",
|
| 61 |
+
target_table="nodes",
|
| 62 |
+
source_key="id",
|
| 63 |
+
target_key="id",
|
| 64 |
+
)
|
| 65 |
+
],
|
| 66 |
+
)
|
| 67 |
+
return bundle
|
lynxkite-graph-analytics/src/lynxkite_graph_analytics/__init__.py
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
-
from . import
|
| 2 |
from . import networkx_ops # noqa (imported to trigger registration)
|
| 3 |
from . import pytorch_model_ops # noqa (imported to trigger registration)
|
|
|
|
| 1 |
+
from .lynxkite_ops import * # noqa (imported to trigger registration)
|
| 2 |
from . import networkx_ops # noqa (imported to trigger registration)
|
| 3 |
from . import pytorch_model_ops # noqa (imported to trigger registration)
|
lynxkite-graph-analytics/src/lynxkite_graph_analytics/lynxkite_ops.py
CHANGED
|
@@ -80,9 +80,10 @@ class Bundle:
|
|
| 80 |
# TODO: Use relations.
|
| 81 |
graph = nx.DiGraph()
|
| 82 |
if "nodes" in self.dfs:
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
|
|
|
| 86 |
graph.add_edges_from(
|
| 87 |
self.dfs["edges"][["source", "target"]].itertuples(index=False, name=None)
|
| 88 |
)
|
|
|
|
| 80 |
# TODO: Use relations.
|
| 81 |
graph = nx.DiGraph()
|
| 82 |
if "nodes" in self.dfs:
|
| 83 |
+
df = self.dfs["nodes"]
|
| 84 |
+
if df.index.name != "id":
|
| 85 |
+
df = df.set_index("id")
|
| 86 |
+
graph.add_nodes_from(df.to_dict("index").items())
|
| 87 |
graph.add_edges_from(
|
| 88 |
self.dfs["edges"][["source", "target"]].itertuples(index=False, name=None)
|
| 89 |
)
|