Spaces:
Build error
Build error
Create new file
Browse files
app.py
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from huggingface_hub import from_pretrained_keras
|
| 2 |
+
import gradio as gr
|
| 3 |
+
import ast
|
| 4 |
+
import pandas as pd
|
| 5 |
+
import numpy as np
|
| 6 |
+
import tensorflow as tf
|
| 7 |
+
from rdkit import Chem, RDLogger
|
| 8 |
+
from rdkit.Chem import BondType
|
| 9 |
+
from rdkit.Chem.Draw import MolsToGridImage
|
| 10 |
+
|
| 11 |
+
RDLogger.DisableLog("rdApp.*")
|
| 12 |
+
|
| 13 |
+
# Config
|
| 14 |
+
SMILE_CHARSET = '["C", "B", "F", "I", "H", "O", "N", "S", "P", "Cl", "Br"]'
|
| 15 |
+
bond_mapping = {"SINGLE": 0, "DOUBLE": 1, "TRIPLE": 2, "AROMATIC": 3}
|
| 16 |
+
bond_mapping.update(
|
| 17 |
+
{0: BondType.SINGLE, 1: BondType.DOUBLE, 2: BondType.TRIPLE, 3: BondType.AROMATIC}
|
| 18 |
+
)
|
| 19 |
+
SMILE_CHARSET = ast.literal_eval(SMILE_CHARSET)
|
| 20 |
+
MAX_MOLSIZE = 109
|
| 21 |
+
SMILE_to_index = dict((c, i) for i, c in enumerate(SMILE_CHARSET))
|
| 22 |
+
index_to_SMILE = dict((i, c) for i, c in enumerate(SMILE_CHARSET))
|
| 23 |
+
atom_mapping = dict(SMILE_to_index)
|
| 24 |
+
atom_mapping.update(index_to_SMILE)
|
| 25 |
+
|
| 26 |
+
NUM_ATOMS = 120 # Maximum number of atoms
|
| 27 |
+
ATOM_DIM = 11 # Number of atom types
|
| 28 |
+
BOND_DIM = 4 + 1 # Number of bond types
|
| 29 |
+
LATENT_DIM = 435 # Size of the latent space
|
| 30 |
+
|
| 31 |
+
def graph_to_molecule(graph):
|
| 32 |
+
# Unpack graph
|
| 33 |
+
adjacency, features = graph
|
| 34 |
+
|
| 35 |
+
# RWMol is a molecule object intended to be edited
|
| 36 |
+
molecule = Chem.RWMol()
|
| 37 |
+
|
| 38 |
+
# Remove "no atoms" & atoms with no bonds
|
| 39 |
+
keep_idx = np.where(
|
| 40 |
+
(np.argmax(features, axis=1) != ATOM_DIM - 1)
|
| 41 |
+
& (np.sum(adjacency[:-1], axis=(0, 1)) != 0)
|
| 42 |
+
)[0]
|
| 43 |
+
features = features[keep_idx]
|
| 44 |
+
adjacency = adjacency[:, keep_idx, :][:, :, keep_idx]
|
| 45 |
+
|
| 46 |
+
# Add atoms to molecule
|
| 47 |
+
for atom_type_idx in np.argmax(features, axis=1):
|
| 48 |
+
atom = Chem.Atom(atom_mapping[atom_type_idx])
|
| 49 |
+
_ = molecule.AddAtom(atom)
|
| 50 |
+
|
| 51 |
+
# Add bonds between atoms in molecule; based on the upper triangles
|
| 52 |
+
# of the [symmetric] adjacency tensor
|
| 53 |
+
(bonds_ij, atoms_i, atoms_j) = np.where(np.triu(adjacency) == 1)
|
| 54 |
+
for (bond_ij, atom_i, atom_j) in zip(bonds_ij, atoms_i, atoms_j):
|
| 55 |
+
if atom_i == atom_j or bond_ij == BOND_DIM - 1:
|
| 56 |
+
continue
|
| 57 |
+
bond_type = bond_mapping[bond_ij]
|
| 58 |
+
molecule.AddBond(int(atom_i), int(atom_j), bond_type)
|
| 59 |
+
|
| 60 |
+
# Sanitize the molecule; for more information on sanitization, see
|
| 61 |
+
# https://www.rdkit.org/docs/RDKit_Book.html#molecular-sanitization
|
| 62 |
+
flag = Chem.SanitizeMol(molecule, catchErrors=True)
|
| 63 |
+
# Let's be strict. If sanitization fails, return None
|
| 64 |
+
if flag != Chem.SanitizeFlags.SANITIZE_NONE:
|
| 65 |
+
return None
|
| 66 |
+
|
| 67 |
+
return molecule
|
| 68 |
+
|
| 69 |
+
model = from_pretrained_keras("keras-io/drug-molecule-generation-with-VAE")
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
def inference(num_mol):
|
| 73 |
+
z = tf.random.normal((1000, LATENT_DIM))
|
| 74 |
+
reconstruction_adjacency, reconstruction_features = model.predict(z)
|
| 75 |
+
# obtain one-hot encoded adjacency tensor
|
| 76 |
+
adjacency = tf.argmax(reconstruction_adjacency, axis=1)
|
| 77 |
+
adjacency = tf.one_hot(adjacency, depth=BOND_DIM, axis=1)
|
| 78 |
+
# Remove potential self-loops from adjacency
|
| 79 |
+
adjacency = tf.linalg.set_diag(adjacency, tf.zeros(tf.shape(adjacency)[:-1]))
|
| 80 |
+
# obtain one-hot encoded feature tensor
|
| 81 |
+
features = tf.argmax(reconstruction_features, axis=2)
|
| 82 |
+
features = tf.one_hot(features, depth=ATOM_DIM, axis=2)
|
| 83 |
+
molecules = [ graph_to_molecule([adjacency[i].numpy(), features[i].numpy()]) for i in range(1000)]
|
| 84 |
+
MolsToGridImage(
|
| 85 |
+
[m for m in molecules if m is not None][:num_mol], molsPerRow=5, subImgSize=(260, 160)
|
| 86 |
+
).save("img.png")
|
| 87 |
+
return 'img.png'
|
| 88 |
+
|
| 89 |
+
gr.Interface(
|
| 90 |
+
fn=inference,
|
| 91 |
+
title="Generating Drug Molecule with VAE",
|
| 92 |
+
description = "Implementing a Convolutional Variational AutoEncoder (VAE) for Drug Discovery 🔬",
|
| 93 |
+
inputs=[
|
| 94 |
+
gr.inputs.Slider(20, 100, label='Number of Molecular Graphs', step=20, default=40),
|
| 95 |
+
],
|
| 96 |
+
outputs="image",
|
| 97 |
+
article = "Author: <a href=\"https://huggingface.co/vumichien\">Vu Minh Chien</a>. Based on the keras example from <a href=\"https://keras.io/examples/generative/molecule_generation/\">Victor Basu</a>",
|
| 98 |
+
).launch(enable_queue=True, debug=True)
|