DrugFlow / src /data /normal_modes.py
mority's picture
Upload 53 files
6e7d4ba verified
import warnings
import numpy as np
import prody
prody.confProDy(verbosity='none')
from prody import parsePDB, ANM
def pdb_to_normal_modes(pdb_file, num_modes=5, nmax=5000):
"""
Compute normal modes for a PDB file using an Anisotropic Network Model (ANM)
http://prody.csb.pitt.edu/tutorials/enm_analysis/anm.html (accessed 01/11/2023)
"""
protein = parsePDB(pdb_file, model=1).select('calpha')
if len(protein) > nmax:
warnings.warn("Protein is too big. Returning zeros...")
eig_vecs = np.zeros((len(protein), 3, num_modes))
else:
# build Hessian
anm = ANM('ANM analysis')
anm.buildHessian(protein, cutoff=15.0, gamma=1.0)
# calculate normal modes
anm.calcModes(num_modes, zeros=False)
# only use slowest modes
eig_vecs = anm.getEigvecs() # shape: (num_atoms * 3, num_modes)
eig_vecs = eig_vecs.reshape(len(protein), 3, num_modes)
# eig_vals = anm.getEigvals() # shape: (num_modes,)
nm_dict = {}
for atom, nm_vec in zip(protein, eig_vecs):
chain = atom.getChid()
resi = atom.getResnum()
name = atom.getName()
nm_dict[(chain, resi, name)] = nm_vec.T
return nm_dict
if __name__ == "__main__":
import argparse
from pathlib import Path
import torch
from tqdm import tqdm
parser = argparse.ArgumentParser()
parser.add_argument('basedir', type=Path)
parser.add_argument('--outfile', type=Path, default=None)
args = parser.parse_args()
# Read data split
split_path = Path(args.basedir, 'split_by_name.pt')
data_split = torch.load(split_path)
pockets = [x[0] for split in data_split.values() for x in split]
all_normal_modes = {}
for p in tqdm(pockets):
pdb_file = Path(args.basedir, 'crossdocked_pocket10', p)
try:
nm_dict = pdb_to_normal_modes(str(pdb_file))
all_normal_modes[p] = nm_dict
except AttributeError as e:
warnings.warn(str(e))
np.save(args.outfile, all_normal_modes)