mirror of
https://github.com/gcorso/DiffDock.git
synced 2026-06-04 09:54:21 +08:00
228 lines
4.4 KiB
Python
228 lines
4.4 KiB
Python
import gzip
|
|
import os
|
|
from typing import List, Optional, Tuple
|
|
|
|
import numpy as np
|
|
import rdkit.Chem as Chem
|
|
|
|
from spyrmsd import molecule, utils
|
|
|
|
|
|
def _load_block_gzipped(loader, fname: str):
|
|
"""
|
|
Load gzipped files using MolBlocks.
|
|
|
|
Parameters
|
|
----------
|
|
loader:
|
|
RDKit MolBlock loader (MolFromMol2Block, MolFromPDBBlock, ...)
|
|
fname: str
|
|
File name
|
|
|
|
Returns
|
|
-------
|
|
Molecule
|
|
"""
|
|
with gzip.open(fname, "r") as fgz:
|
|
content = fgz.read()
|
|
rdmol = loader(content, removeHs=False)
|
|
|
|
return rdmol
|
|
|
|
|
|
def load(fname: str):
|
|
"""
|
|
Load molecule from file.
|
|
|
|
Parameters
|
|
----------
|
|
fname: str
|
|
File name
|
|
|
|
Returns
|
|
-------
|
|
Molecule
|
|
"""
|
|
|
|
gzipped = os.path.splitext(fname)[-1] == ".gz"
|
|
fmt = utils.molformat(fname)
|
|
|
|
if fmt == "mol2":
|
|
if not gzipped:
|
|
rdmol = Chem.MolFromMol2File(fname, removeHs=False)
|
|
else:
|
|
rdmol = _load_block_gzipped(Chem.MolFromMol2Block, fname)
|
|
elif fmt == "sdf":
|
|
if not gzipped:
|
|
rdmol = next(Chem.SDMolSupplier(fname, removeHs=False))
|
|
else:
|
|
with gzip.open(fname, "r") as fgz:
|
|
rdmol = next(Chem.ForwardSDMolSupplier(fgz, removeHs=False))
|
|
elif fmt == "pdb":
|
|
if not gzipped:
|
|
rdmol = Chem.MolFromPDBFile(fname, removeHs=False)
|
|
else:
|
|
rdmol = _load_block_gzipped(Chem.MolFromPDBBlock, fname)
|
|
else:
|
|
raise NotImplementedError
|
|
|
|
return rdmol
|
|
|
|
|
|
def loadall(fname: str):
|
|
"""
|
|
Load molecules from file.
|
|
|
|
Parameters
|
|
----------
|
|
fname: str
|
|
File name
|
|
|
|
Returns
|
|
-------
|
|
List of molecules
|
|
"""
|
|
|
|
gzipped = os.path.splitext(fname)[-1] == ".gz"
|
|
fmt = utils.molformat(fname)
|
|
|
|
if fmt == "mol2":
|
|
raise NotImplementedError # See RDKit Issue #415
|
|
elif fmt == "sdf":
|
|
if not gzipped:
|
|
rdmols = Chem.SDMolSupplier(fname, removeHs=False)
|
|
mols = [rdmol for rdmol in rdmols]
|
|
else:
|
|
with gzip.open(fname, "r") as fgz:
|
|
rdmols = Chem.ForwardSDMolSupplier(fgz, removeHs=False)
|
|
|
|
# Load all molecules before closing file
|
|
mols = [rdmol for rdmol in rdmols]
|
|
elif fmt == "pdb":
|
|
# TODO: Implement
|
|
raise NotImplementedError
|
|
else:
|
|
raise NotImplementedError
|
|
|
|
return mols
|
|
|
|
|
|
def adjacency_matrix(mol) -> np.ndarray:
|
|
"""
|
|
Adjacency matrix from OpenBabel molecule.
|
|
|
|
Parameters
|
|
----------
|
|
mol:
|
|
Molecule
|
|
|
|
Returns
|
|
-------
|
|
np.ndarray
|
|
Adjacency matrix of the molecule
|
|
"""
|
|
|
|
return Chem.rdmolops.GetAdjacencyMatrix(mol)
|
|
|
|
|
|
def to_molecule(mol, adjacency: bool = True):
|
|
"""
|
|
Transform molecule to `pyrmsd` molecule.
|
|
|
|
Parameters
|
|
----------
|
|
mol:
|
|
Molecule
|
|
adjacency: boolean, optional
|
|
Flag to decide wether to build the adjacency matrix from molecule
|
|
|
|
Returns
|
|
-------
|
|
spyrmsd.molecule.Molecule
|
|
`spyrmsd` molecule
|
|
"""
|
|
|
|
if mol is None:
|
|
# Propagate RDKit parsing failure
|
|
return None
|
|
|
|
atoms = mol.GetAtoms()
|
|
|
|
n = len(atoms)
|
|
|
|
atomicnums = np.zeros(n, dtype=int)
|
|
coordinates = np.zeros((n, 3))
|
|
|
|
conformer = mol.GetConformer()
|
|
|
|
for i, atom in enumerate(atoms):
|
|
atomicnums[i] = atom.GetAtomicNum()
|
|
|
|
pos = conformer.GetAtomPosition(i)
|
|
|
|
coordinates[i] = np.array([pos.x, pos.y, pos.z])
|
|
|
|
A: Optional[np.ndarray] = adjacency_matrix(mol) if adjacency else None
|
|
|
|
return molecule.Molecule(atomicnums, coordinates, A)
|
|
|
|
|
|
def numatoms(mol) -> int:
|
|
"""
|
|
Number of atoms.
|
|
|
|
Parameters
|
|
----------
|
|
mol:
|
|
Molecule
|
|
|
|
Returns
|
|
-------
|
|
int
|
|
Number of atoms
|
|
"""
|
|
return mol.GetNumAtoms()
|
|
|
|
|
|
def numbonds(mol) -> int:
|
|
"""
|
|
Number of bonds.
|
|
|
|
Parameters
|
|
----------
|
|
mol:
|
|
Molecule
|
|
|
|
Returns
|
|
-------
|
|
int
|
|
Number of bonds
|
|
"""
|
|
return mol.GetNumBonds()
|
|
|
|
|
|
def bonds(mol) -> List[Tuple[int, int]]:
|
|
"""
|
|
List of bonds.
|
|
|
|
Parameters
|
|
----------
|
|
mol:
|
|
Molecule
|
|
|
|
Returns
|
|
-------
|
|
List[Tuple[int, int]]
|
|
List of bonds
|
|
|
|
Notes
|
|
-----
|
|
A bond is defined by a tuple of (0-based) indices of two atoms.
|
|
"""
|
|
b = []
|
|
|
|
for bond in mol.GetBonds():
|
|
b.append((bond.GetBeginAtomIdx(), bond.GetEndAtomIdx()))
|
|
|
|
return b
|