mirror of
https://github.com/Saoge123/PocketFlow.git
synced 2026-06-04 12:44:22 +08:00
763 lines
30 KiB
Python
763 lines
30 KiB
Python
import os
|
||
import copy
|
||
import numpy as np
|
||
from rdkit import Chem, RDConfig
|
||
from rdkit.Chem.rdchem import BondType
|
||
from rdkit.Chem import ChemicalFeatures
|
||
from .residues_base import RESIDUES_TOPO, RESIDUES_TOPO_WITH_H
|
||
try:
|
||
import pymol
|
||
except:
|
||
print('we can not compute the atoms on the surface of protein, '\
|
||
'because pymol can not be imported')
|
||
pass
|
||
#from .BaseFeatures import Mol2Graph, RESIDUE_GRAPH_WITHOUT_H, RESIDUE_GRAPH_WITH_H
|
||
#from .utils import coordinate_adjusting
|
||
#from .Data import LigandData
|
||
|
||
ATOM_TYPE_WITH_HYBIRD = [
|
||
'SP3_C', 'SP2_C', 'SP_C', 'SP3_N', 'SP2_N', 'SP_N', 'SP3_O', 'SP2_O', 'SP3_F', 'SP3_P',
|
||
'SP2_P', 'SP3D_P', 'SP3_S', 'SP2_S', 'SP3D_S', 'SP3D2_S', 'SP3_Cl', 'SP3_Br', 'SP3_I'
|
||
]
|
||
ATOM_MAP = [6, 6, 6, 7, 7, 7, 8, 8, 9, 15, 15, 15, 16, 16, 16, 16, 17, 35, 53]
|
||
PT = Chem.GetPeriodicTable()
|
||
BACKBONE_SYMBOL = {'N', 'CA', 'C', 'O'}
|
||
AMINO_ACID_TYPE = {
|
||
'CYS':0, 'GLY':1, 'ALA':2, 'THR':3, 'LYS':4, 'PRO':5, 'VAL':6, 'SER':7, 'ASN':8, 'LEU':9,
|
||
'GLN':10, 'MET':11, 'ASP':12, 'TRP':13, 'HIS':14, 'GLU':15, 'ARG':16, 'ILE':17, 'PHE':18,
|
||
'TYR':19
|
||
}
|
||
|
||
'''
|
||
def coordinate_adjusting(pos, wts):
|
||
center_of_mass = np.sum(pos*wts, axis=0)/wts.sum()
|
||
pos = pos - center_of_mass
|
||
|
||
I_xx = (wts*(pos[...,1]**2 + pos[...,2]**2)).sum()
|
||
I_yy = (wts*(pos[...,0]**2 + pos[...,2]**2)).sum()
|
||
I_zz = (wts*(pos[...,0]**2 + pos[...,1]**2)).sum()
|
||
I_xy = -(wts*(pos[...,0]*pos[...,1])).sum()
|
||
I_xz = -(wts*(pos[...,0]*pos[...,2])).sum()
|
||
I_yz = -(wts*(pos[...,1]*pos[...,2])).sum()
|
||
I = np.array([[I_xx, I_xy, I_xz],
|
||
[I_xy, I_yy, I_yz],
|
||
[I_xz, I_yz, I_zz]])
|
||
eig_v, eig_m = np.linalg.eigh(I) # np.linalg.eigh guarantees you that the eigenvalues are sorted and uses a
|
||
# faster algorithm that takes advantage of the fact that the matrix is
|
||
# symmetric. If you know that your matrix is symmetric, use this function.
|
||
am = pos@eig_m # 用本真矢矩阵变换原子坐标,以‘摆正’分子
|
||
|
||
rotate_around = []
|
||
x, y, z = am[0]
|
||
if x < 0:
|
||
am[..., 0] = am[..., 0] * -1
|
||
rotate_around.append(0)
|
||
if y < 0:
|
||
am[..., 1] = am[..., 1] * -1
|
||
rotate_around.append(1)
|
||
if z < 0:
|
||
am[..., 2] = am[..., 2] * -1
|
||
rotate_around.append(2)
|
||
return am, eig_m, center_of_mass, rotate_around
|
||
'''
|
||
def coordinate_adjusting(pos, wts):
|
||
center_of_mass = np.sum(pos*wts, axis=0)/wts.sum()
|
||
pos = pos - center_of_mass
|
||
|
||
I_xx = (wts*(pos[...,1]**2 + pos[...,2]**2)).sum()
|
||
I_yy = (wts*(pos[...,0]**2 + pos[...,2]**2)).sum()
|
||
I_zz = (wts*(pos[...,0]**2 + pos[...,1]**2)).sum()
|
||
I_xy = -(wts*(pos[...,0]*pos[...,1])).sum()
|
||
I_xz = -(wts*(pos[...,0]*pos[...,2])).sum()
|
||
I_yz = -(wts*(pos[...,1]*pos[...,2])).sum()
|
||
I = np.array([[I_xx, I_xy, I_xz],
|
||
[I_xy, I_yy, I_yz],
|
||
[I_xz, I_yz, I_zz]])
|
||
eig_v, eig_m = np.linalg.eigh(I) # np.linalg.eigh guarantees you that the eigenvalues are sorted and uses a
|
||
# faster algorithm that takes advantage of the fact that the matrix is
|
||
# symmetric. If you know that your matrix is symmetric, use this function.
|
||
am = pos@eig_m # 用本真矢矩阵变换原子坐标,以‘摆正’分子
|
||
return am, eig_m, center_of_mass
|
||
|
||
class Dict(dict):
|
||
__setattr__ = dict.__setitem__
|
||
__getattr__ = dict.__getitem__
|
||
|
||
|
||
class Atom(object):
|
||
def __init__(self, atom_info):
|
||
|
||
self.idx = int(atom_info[6:11])
|
||
self.name = atom_info[12:16].strip()
|
||
self.res_name = atom_info[17:20].strip() # atom_info[17:20].strip()
|
||
self.chain = atom_info[21:22].strip()
|
||
self.res_idx = int(atom_info[22:26])
|
||
self.coord = np.array([float(atom_info[30:38].strip()),
|
||
float(atom_info[38:46].strip()),
|
||
float(atom_info[46:54].strip())])
|
||
self.occupancy = float(atom_info[54:60])
|
||
self.temperature_factor = float(atom_info[60:66].strip())
|
||
self.seg_id = atom_info[72:76].strip()
|
||
self.element = atom_info[76:78].strip()
|
||
if self.element == 'SE':
|
||
self.element = 'S'
|
||
self.name = 'SD'
|
||
self.res_name = 'MET'
|
||
self.mass = PT.GetAtomicWeight(self.element)
|
||
if self.occupancy < 1.0:
|
||
self.is_disorder = True
|
||
else:
|
||
self.is_disorder = False
|
||
self.is_surf = atom_info.split()[-1] == 'surf'
|
||
|
||
@property
|
||
def to_string(self):
|
||
fmt = '{:6s}{:5d} {:^4s}{:1s}{:3s} {:1s}{:4d}{:1s} {:8.3f}{:8.3f}{:8.3f}{:6.2f}{:6.2f} {:<2s}{:2s}' # https://cupnet.net/pdb-format/
|
||
out = fmt.format('ATOM', self.idx, self.name, '', self.res_name,
|
||
self.chain, self.res_idx,'', self.coord[0],
|
||
self.coord[1], self.coord[2], self.occupancy,
|
||
self.temperature_factor, self.seg_id, self.element)
|
||
return out
|
||
|
||
@property
|
||
def to_dict(self):
|
||
return {
|
||
'element': PT.GetAtomicNumber(self.element),
|
||
'pos': self.coord,
|
||
'is_backbone': self.name in BACKBONE_SYMBOL,
|
||
'atom_name': self.name,
|
||
'atom_to_aa_type': AMINO_ACID_TYPE[self.res_name]
|
||
}
|
||
|
||
def __repr__(self):
|
||
info = 'name={}, index={}, res={}, chain={}, is_disorder={}'.format(self.name,
|
||
self.idx,
|
||
self.res_name+str(self.res_idx),
|
||
self.chain,
|
||
self.is_disorder)
|
||
return '{}({})'.format(self.__class__.__name__, info)
|
||
|
||
|
||
class Residue(object):
|
||
|
||
def __init__(self, res_info):
|
||
self.res_info = res_info
|
||
#atoms_ = [Atom(i) for i in res_info]
|
||
self.atom_dict = {}
|
||
disorder = []
|
||
for i in res_info: # 排除disorder原子
|
||
atom = Atom(i)
|
||
if atom.name in self.atom_dict:
|
||
continue
|
||
else:
|
||
self.atom_dict[atom.name] = atom
|
||
disorder.append(atom.is_disorder)
|
||
if atom.res_name == 'MSE':
|
||
atom.res_name = 'MET' # 把MSE残基改成MET
|
||
|
||
if True in disorder:
|
||
self.is_disorder = True
|
||
else:
|
||
self.is_disorder = False
|
||
|
||
self.idx = self.atom_dict[atom.name].res_idx
|
||
self.chain = self.atom_dict[atom.name].chain
|
||
self.name = self.atom_dict[atom.name].res_name
|
||
self.is_perfect = True if len(self.get_heavy_atoms)==len(RESIDUES_TOPO[self.name]) else False
|
||
|
||
@property
|
||
def to_heavy_string(self):
|
||
return '\n'.join([a.to_string for a in self.get_heavy_atoms])
|
||
|
||
@property
|
||
def to_string(self):
|
||
return '\n'.join([a.to_string for a in self.get_atoms])
|
||
|
||
@property
|
||
def get_coords(self):
|
||
return np.array([a.coord for a in self.get_atoms])
|
||
|
||
@property
|
||
def get_atoms(self):
|
||
return list(self.atom_dict.values())
|
||
|
||
@property
|
||
def get_heavy_atoms(self):
|
||
return [a for a in self.atom_dict.values() if 'H' not in a.element]
|
||
|
||
@property
|
||
def get_heavy_coords(self):
|
||
return np.array([a.coord for a in self.get_heavy_atoms])
|
||
|
||
@property
|
||
def center_of_mass(self):
|
||
atom_mass = np.array([i.mass for i in self.get_atoms]).reshape(-1, 1)
|
||
return np.sum(self.get_coords*atom_mass, axis=0)/atom_mass.sum()
|
||
|
||
@property
|
||
def bond_graph(self):
|
||
i, j, bt = [], [], []
|
||
res_graph = RESIDUES_TOPO[self.name]
|
||
atom_names = [i.name for i in self.get_heavy_atoms]
|
||
for ix, name in enumerate(atom_names):
|
||
for adj in res_graph[name]:
|
||
if adj in atom_names:
|
||
idx_j = atom_names.index(adj)
|
||
i.append(ix)
|
||
j.append(idx_j)
|
||
bt.append(res_graph[name][adj])
|
||
edge_index = np.stack([i,j]).astype(dtype=np.int64)
|
||
bt = np.array(bt, dtype=np.int64)
|
||
return edge_index, bt
|
||
|
||
@property
|
||
def centroid(self):
|
||
return self.get_coords.mean(axis=0)
|
||
|
||
def __repr__(self):
|
||
info = 'name={}, index={}, chain={}, is_disorder={}, is_perfect={}'.format(
|
||
self.name, self.idx, self.chain, self.is_disorder, self.is_perfect
|
||
)
|
||
return '{}({})'.format(self.__class__.__name__, info)
|
||
|
||
|
||
class Chain(object):
|
||
def __init__(self, chain_info, ignore_incomplete_res=True, pdb_file=None):
|
||
self.pdb_file = pdb_file
|
||
self.res_dict = {}
|
||
'''if ignore_incomplete_res:
|
||
self.residues = {}
|
||
for i in chain_info:
|
||
res = Residue(chain_info[i])
|
||
if len(res.get_heavy_atoms) == len(RESIDUES_TOPO[res.name]):
|
||
self.residues[i] = res
|
||
else:
|
||
self.residues = {i:Residue(chain_info[i]) for i in chain_info}'''
|
||
self.residues = {i:Residue(chain_info[i]) for i in chain_info}
|
||
#print(self.residues)
|
||
self.chain = list(self.residues.values())[0].chain
|
||
self.__normalized__ = False
|
||
self.center_of_mass_shift = None
|
||
self.rotate_matrix = None
|
||
self.ignore_incomplete_res =ignore_incomplete_res
|
||
|
||
def normalize_coord(self):
|
||
wts = np.array([i.mass for i in self.get_atoms])
|
||
wts = np.expand_dims(wts,axis=1)
|
||
pos = self.get_coords
|
||
|
||
am, self.rotate_matrix, self.center_of_mass_shift = coordinate_adjusting(pos, wts)
|
||
for ix, a in enumerate(self.get_atoms):
|
||
a.coord = am[ix]
|
||
self.__normalized__ = True
|
||
|
||
@property
|
||
def get_incomplete_residues(self):
|
||
return [i for i in self.residues.values() if i.is_perfect==False]
|
||
|
||
@property
|
||
def to_heavy_string(self):
|
||
return '\n'.join([res.to_heavy_string for res in self.get_residues])
|
||
|
||
@property
|
||
def to_string(self):
|
||
return '\n'.join([res.to_string for res in self.get_residues])
|
||
|
||
@property
|
||
def get_atoms(self):
|
||
atoms = []
|
||
for res in self.get_residues:
|
||
atoms.extend(res.get_atoms)
|
||
return atoms
|
||
|
||
@property
|
||
def get_residues(self):
|
||
if self.ignore_incomplete_res:
|
||
return [i for i in self.residues.values() if i.is_perfect]
|
||
else:
|
||
return list(self.residues.values())
|
||
|
||
@property
|
||
def get_heavy_atoms(self):
|
||
atoms = []
|
||
for res in self.get_residues:
|
||
atoms.extend(res.get_heavy_atoms)
|
||
return atoms
|
||
|
||
@property
|
||
def get_coords(self):
|
||
return np.array([i.coord for i in self.get_atoms])
|
||
|
||
@property
|
||
def get_heavy_coords(self):
|
||
return np.array([i.coord for i in self.get_heavy_atoms])
|
||
|
||
@property
|
||
def center_of_mass(self):
|
||
atom_mass = np.array([i.mass for i in self.get_atoms]).reshape(-1, 1)
|
||
return np.sum(self.get_coords*atom_mass, axis=0)/atom_mass.sum()
|
||
|
||
@property
|
||
def centroid(self):
|
||
return self.get_coords.mean(axis=0)
|
||
|
||
def compute_surface_atoms(self):
|
||
path, filename = os.path.split(self.pdb_file)
|
||
chain_file_name = filename.split('.')[0] + '_' + self.chain + '.pdb'
|
||
chain_file = path+'/'+chain_file_name
|
||
with open(chain_file, 'w') as fw:
|
||
fw.write(self.to_heavy_string)
|
||
|
||
pymol.cmd.load(chain_file)
|
||
if path == '':
|
||
path = './'
|
||
sele = chain_file_name.split('.')[0]
|
||
pymol.cmd.remove("({}) and hydro".format(sele))
|
||
name = pymol.util.find_surface_atoms(sele=sele, _self=pymol.cmd)
|
||
save_name = path + '/' + sele + '-surface.pdb'
|
||
pymol.cmd.save(save_name,((name)))
|
||
surf_protein = Protein(save_name, ignore_incomplete_res=False)
|
||
surf_res_dict = {r.idx:r for r in surf_protein.get_residues}
|
||
for res in self.get_residues:
|
||
res_idx = res.idx
|
||
if res_idx in surf_res_dict:
|
||
for a in surf_res_dict[res_idx].get_heavy_atoms:
|
||
res.atom_dict[a.name].is_surf = True
|
||
self.has_surf_atom = True
|
||
os.remove(save_name)
|
||
os.remove(chain_file)
|
||
|
||
def get_surf_mask(self):
|
||
if self.has_surf_atom is False:
|
||
self.compute_surface_atoms()
|
||
return np.array([a.is_surf for a in self.get_heavy_atoms], dtype=np.bool)
|
||
|
||
def get_res_by_id(self, res_id):
|
||
return self.residues[res_id]
|
||
|
||
def __repr__(self):
|
||
tmp = 'Chain={}, NumResidues={}, NumAtoms={}, NumHeavyAtoms={}'
|
||
info = tmp.format(self.chain, len(self.residues), self.get_coords.shape[0], self.get_heavy_coords.shape[0])
|
||
return '{}({})'.format(self.__class__.__name__, info)
|
||
|
||
|
||
class Protein(object):
|
||
|
||
def __init__(self, pdb_file, ignore_incomplete_res=True):
|
||
self.ignore_incomplete_res = ignore_incomplete_res
|
||
self.name = pdb_file.split('/')[-1].split('.')[0]
|
||
self.pdb_file = pdb_file
|
||
self.has_surf_atom = False
|
||
#self.pdb = [line.strip() for line in open(pdb_file).readlines()]
|
||
#atoms = os.popen('grep ATOM {}'.format(pdb_file)).read().strip().split('\n')
|
||
with open(pdb_file) as fr:
|
||
lines = fr.readlines()
|
||
surf_item = lines[0].strip().split()[-1]
|
||
if surf_item in {'surf', 'inner'}:
|
||
self.has_surf_atom = True
|
||
chain_info = {}
|
||
for line in lines:
|
||
if line.startswith('ATOM'):
|
||
line = line.strip()
|
||
chain = line[21:22].strip()
|
||
res_idx = int(line[22:26].strip())
|
||
if chain not in chain_info:
|
||
chain_info[chain] = {}
|
||
chain_info[chain][res_idx] = [line]
|
||
elif res_idx not in chain_info[chain]:
|
||
chain_info[chain][res_idx] = [line]
|
||
else:
|
||
chain_info[chain][res_idx].append(line)
|
||
|
||
self.chains = {
|
||
c:Chain(chain_info[c], ignore_incomplete_res=ignore_incomplete_res, pdb_file=pdb_file) for c in chain_info
|
||
}
|
||
self.__normalized__ = False
|
||
self.center_of_mass_shift = None
|
||
self.rotate_matrix = None
|
||
|
||
def normalize_coord(self):
|
||
is_chain_normalized = [c for c in self.chains if self.chains[c].__normalized__==True]
|
||
if len(is_chain_normalized) > 0:
|
||
chain_normalized = ' '.join(is_chain_normalized)
|
||
info = 'Warning: The Chains ({}) have been normalized '\
|
||
'independently, this may lead to wrong coordinate '\
|
||
'transformation for whole Protein !!!'.format(chain_normalized)
|
||
print(info)
|
||
|
||
wts = np.array([i.mass for i in self.get_atoms])
|
||
wts = np.expand_dims(wts,axis=1)
|
||
pos = self.get_coords
|
||
|
||
am, self.rotate_matrix, self.center_of_mass_shift = coordinate_adjusting(pos, wts)
|
||
for ix, a in enumerate(self.get_atoms):
|
||
a.coord = am[ix]
|
||
self.__normalized__ = True
|
||
|
||
@property
|
||
def get_incomplete_residues(self):
|
||
res_list = []
|
||
for i in self.chains:
|
||
res_list += self.chains[i].get_incomplete_residues
|
||
return res_list
|
||
|
||
@property
|
||
def to_heavy_string(self):
|
||
return '\n'.join([res.to_heavy_string for res in self.get_residues])
|
||
|
||
@property
|
||
def to_string(self):
|
||
return '\n'.join([res.to_string for res in self.get_residues])
|
||
|
||
@property
|
||
def get_residues(self,):
|
||
res_list = []
|
||
for i in self.chains:
|
||
res_list += self.chains[i].get_residues
|
||
return res_list
|
||
|
||
@property
|
||
def get_atoms(self):
|
||
atoms = []
|
||
for res in self.get_residues:
|
||
atoms.extend(res.get_atoms)
|
||
return atoms
|
||
|
||
@property
|
||
def get_heavy_atoms(self):
|
||
atoms = []
|
||
for res in self.get_residues:
|
||
atoms.extend(res.get_heavy_atoms)
|
||
return atoms
|
||
|
||
@property
|
||
def get_coords(self):
|
||
return np.array([i.coord for i in self.get_atoms])
|
||
|
||
@property
|
||
def get_heavy_coords(self):
|
||
return np.array([i.coord for i in self.get_heavy_atoms])
|
||
|
||
@property
|
||
def center_of_mass(self):
|
||
atom_mass = np.array([i.mass for i in self.get_atoms]).reshape(-1, 1)
|
||
return np.sum(self.get_coords*atom_mass, axis=0)/atom_mass.sum()
|
||
|
||
@property
|
||
def bond_graph(self):
|
||
res_list = self.get_residues
|
||
bond_index = []
|
||
bond_type = []
|
||
N_term_list = []
|
||
C_term_list = []
|
||
cusum = 0
|
||
for ix, res in enumerate(res_list):
|
||
e_idx, e_type = res.bond_graph
|
||
bond_index.append(e_idx + cusum)
|
||
bond_type.append(e_type)
|
||
N_term_ix = [i.name for i in self.get_heavy_atoms].index('N') + cusum
|
||
C_term_ix = [i.name for i in self.get_heavy_atoms].index('C') + cusum
|
||
N_term_list.append(N_term_ix)
|
||
C_term_list.append(C_term_ix)
|
||
cusum += res.get_heavy_coords.shape[0]
|
||
if ix != 0:
|
||
if res.idx-res_list[ix-1].idx == 1 and res.chain == res_list[ix-1].chain:
|
||
bond_idx_between_res = np.array(
|
||
[[N_term_ix, C_term_list[ix-1]],[C_term_list[ix-1],N_term_ix]],
|
||
dtype=np.long
|
||
)
|
||
bond_index.append(bond_idx_between_res)
|
||
bond_type_between_res = np.array([1,1], dtype=np.long)
|
||
bond_type.append(bond_type_between_res)
|
||
bond_index = np.concatenate(bond_index, axis=1)
|
||
bond_type = np.concatenate(bond_type)
|
||
return bond_index, bond_type
|
||
|
||
@property
|
||
def centroid(self):
|
||
return self.get_coords.mean(axis=0)
|
||
|
||
def get_chain(self, chain_id):
|
||
return self.chains[chain_id]
|
||
|
||
def get_res_by_id(self, chain_id, res_id):
|
||
return self.chains[chain_id].get_res_by_id(res_id)
|
||
|
||
def get_atom_dict(self, removeHs=True, get_surf=False):
|
||
atom_dict = {
|
||
'element': [],
|
||
'pos': [],
|
||
'is_backbone': [],
|
||
'atom_name': [],
|
||
'atom_to_aa_type': []
|
||
}
|
||
for a in self.get_atoms:
|
||
if a.element == 'H' and removeHs:
|
||
continue
|
||
atom_dict['element'].append(a.to_dict['element'])
|
||
atom_dict['pos'].append(a.to_dict['pos'])
|
||
atom_dict['is_backbone'].append(a.to_dict['is_backbone'])
|
||
atom_dict['atom_name'].append(a.to_dict['atom_name'])
|
||
atom_dict['atom_to_aa_type'].append(a.to_dict['atom_to_aa_type'])
|
||
atom_dict['element'] = np.array(atom_dict['element'], dtype=np.long)
|
||
atom_dict['pos'] = np.array(atom_dict['pos'], dtype=np.float32)
|
||
atom_dict['is_backbone'] = np.array(atom_dict['is_backbone'], dtype=np.bool)
|
||
#atom_dict['atom_name'] = atom_dict['atom_name']
|
||
if get_surf:
|
||
atom_dict['surface_mask'] = np.array([a.is_surf for a in self.get_heavy_atoms], dtype=np.bool)#self.get_surf_mask()
|
||
|
||
atom_dict['atom_to_aa_type'] = np.array(atom_dict['atom_to_aa_type'], dtype=np.long)
|
||
atom_dict['molecule_name'] = None
|
||
protein_bond_index, protein_bond_type = self.bond_graph
|
||
atom_dict['bond_index'] = protein_bond_index
|
||
atom_dict['bond_type'] = protein_bond_type
|
||
atom_dict['filename'] = self.pdb_file
|
||
return atom_dict
|
||
|
||
def get_backbone_dict(self, removeHs=True):
|
||
atom_dict = self.get_atom_dict(removeHs=removeHs)
|
||
backbone_dict = {}
|
||
backbone_dict['element'] = atom_dict['element'][atom_dict['is_backbone']]
|
||
backbone_dict['pos'] = atom_dict['pos'][atom_dict['is_backbone']]
|
||
backbone_dict['is_backbone'] = np.ones(atom_dict['is_backbone'].sum(), dtype=np.bool)
|
||
backbone_dict['atom_name'] = np.array(atom_dict['atom_name'])[atom_dict['is_backbone']].tolist()
|
||
backbone_dict['atom_to_aa_type'] = atom_dict['atom_to_aa_type'][atom_dict['is_backbone']]
|
||
backbone_dict['molecule_name'] = atom_dict['molecule_name']
|
||
atom_dict['bond_index'] = np.empty([0,2], dtype=np.long)
|
||
atom_dict['bond_type'] = np.empty(0, dtype=np.long)
|
||
atom_dict['filename'] = self.pdb_file
|
||
return backbone_dict
|
||
|
||
@property
|
||
def get_backbone(self):
|
||
atoms = []
|
||
for res in self.get_residues:
|
||
bkb = [a for a in res.get_atoms if a.name in BACKBONE_SYMBOL]
|
||
atoms += bkb
|
||
return atoms
|
||
|
||
def compute_surface_atoms(self):
|
||
"""
|
||
If the pdb_file is a pocket_file, the surface atoms is not correct!
|
||
"""
|
||
pymol.cmd.load(self.pdb_file)
|
||
path, filename = os.path.split(self.pdb_file)
|
||
if path == '':
|
||
path = './'
|
||
sele = filename.split('.')[0]
|
||
pymol.cmd.remove("({}) and hydro".format(sele))
|
||
name = pymol.util.find_surface_atoms(sele=sele, _self=pymol.cmd)
|
||
save_name = path + '/' + sele + '-surface.pdb'
|
||
pymol.cmd.save(save_name,((name)))
|
||
surf_protein = Protein(save_name, ignore_incomplete_res=False)
|
||
surf_res_dict = {r.idx:r for r in surf_protein.get_residues}
|
||
for res in self.get_residues:
|
||
res_idx = res.idx
|
||
if res_idx in surf_res_dict:
|
||
for a in surf_res_dict[res_idx].get_heavy_atoms:
|
||
res.atom_dict[a.name].is_surf = True
|
||
self.has_surf_atom = True
|
||
os.remove(save_name)
|
||
|
||
def get_surf_mask(self):
|
||
if self.has_surf_atom is False:
|
||
self.compute_surface_atoms()
|
||
return np.array([a.is_surf for a in self.get_heavy_atoms], dtype=np.bool)
|
||
|
||
@staticmethod
|
||
def empty_dict():
|
||
empty_pocket_dict = {}
|
||
empty_pocket_dict = Dict(empty_pocket_dict)
|
||
empty_pocket_dict.element = np.empty(0, dtype=np.int64)
|
||
empty_pocket_dict.pos = np.empty([0,3], dtype=np.float32)
|
||
empty_pocket_dict.is_backbone = np.empty(0, dtype=bool)
|
||
empty_pocket_dict.atom_name = []
|
||
empty_pocket_dict.atom_to_aa_type = np.empty(0, dtype=np.int64)
|
||
empty_pocket_dict.molecule_name = None
|
||
empty_pocket_dict.bond_index = np.empty([2,0], dtype=np.int64)
|
||
empty_pocket_dict.bond_type = np.empty(0, dtype=np.int64)
|
||
empty_pocket_dict.filename = None
|
||
return empty_pocket_dict
|
||
|
||
def __repr__(self):
|
||
num_res = 0
|
||
num_atom = 0
|
||
for i in self.chains:
|
||
res_list = list(self.chains[i].residues.values())
|
||
num_res += len(res_list)
|
||
for i in res_list:
|
||
num_atom += len(i.get_heavy_atoms)
|
||
num_incomp = len(self.get_incomplete_residues)
|
||
tmp = 'Name={}, NumChains={}, NumResidues={}, NumHeavyAtoms={}, NumIncompleteRes={}'
|
||
info = tmp.format(
|
||
self.name, len(self.chains), num_res, num_atom, num_incomp
|
||
)
|
||
return '{}({})'.format(self.__class__.__name__, info)
|
||
|
||
####################################################################################################################
|
||
|
||
|
||
ATOM_FAMILIES = ['Acceptor', 'Donor', 'Aromatic', 'Hydrophobe', 'LumpedHydrophobe', 'NegIonizable', 'PosIonizable', 'ZnBinder']
|
||
ATOM_FAMILIES_ID = {s: i for i, s in enumerate(ATOM_FAMILIES)}
|
||
BOND_TYPES = {t: i for i, t in enumerate(BondType.names.values())}
|
||
BOND_NAMES = {i: t for i, t in enumerate(BondType.names.keys())}
|
||
|
||
|
||
def is_in_ring(mol):
|
||
d = {a:np.array([], dtype=np.int64) for a in range(len(mol.GetAtoms()))}
|
||
rings = Chem.GetSymmSSSR(mol)
|
||
for a in d:
|
||
for r_idx, ring in enumerate(rings):
|
||
if a in ring:
|
||
d[a] = np.append(d[a], r_idx+1)
|
||
else:
|
||
d[a] = np.append(d[a], -a)
|
||
return d
|
||
|
||
def parse_sdf_to_dict(mol_file):
|
||
fdefName = os.path.join(RDConfig.RDDataDir,'BaseFeatures.fdef')
|
||
factory = ChemicalFeatures.BuildFeatureFactory(fdefName)
|
||
rdmol = next(iter(Chem.SDMolSupplier(mol_file, removeHs=True)))
|
||
Chem.Kekulize(rdmol)
|
||
ring_info = is_in_ring(rdmol)
|
||
conformer = rdmol.GetConformer()
|
||
feat_mat = np.zeros([rdmol.GetNumAtoms(), len(ATOM_FAMILIES)], dtype=np.int64)
|
||
for feat in factory.GetFeaturesForMol(rdmol):
|
||
feat_mat[feat.GetAtomIds(), ATOM_FAMILIES_ID[feat.GetFamily()]] = 1
|
||
|
||
element, pos, atom_mass = [], [], []
|
||
for a in rdmol.GetAtoms():
|
||
element.append(a.GetAtomicNum())
|
||
pos.append(conformer.GetAtomPosition(a.GetIdx()))
|
||
atom_mass.append(a.GetMass())
|
||
element = np.array(element, dtype=np.int64)
|
||
pos = np.array(pos, dtype=np.float32)
|
||
atom_mass = np.array(atom_mass, np.float32)
|
||
center_of_mass = (pos * atom_mass.reshape(-1,1)).sum(0)/atom_mass.sum()
|
||
|
||
edge_index, edge_type = [], []
|
||
for b in rdmol.GetBonds():
|
||
row = [b.GetBeginAtomIdx(), b.GetEndAtomIdx()]
|
||
col = [b.GetEndAtomIdx(), b.GetBeginAtomIdx()]
|
||
edge_index.extend([row, col])
|
||
edge_type.extend([BOND_TYPES[b.GetBondType()]] * 2)
|
||
edge_index = np.array(edge_index)
|
||
edge_index_perm = edge_index[:,0].argsort()
|
||
edge_index = edge_index[edge_index_perm].T
|
||
edge_type = np.array(edge_type)[edge_index_perm]
|
||
|
||
return {'element': element,
|
||
'pos': pos,
|
||
'bond_index': edge_index,
|
||
'bond_type': edge_type,
|
||
'center_of_mass': center_of_mass,
|
||
'atom_feature': feat_mat,
|
||
'ring_info': ring_info,
|
||
'filename':mol_file
|
||
}
|
||
|
||
|
||
from scipy.spatial.distance import pdist
|
||
from scipy.spatial.distance import squareform
|
||
|
||
|
||
class Ligand(object):
|
||
def __init__(self, mol_file, removeHs=True, sanitize=True):
|
||
if isinstance(mol_file, Chem.rdchem.Mol):
|
||
mol = mol_file
|
||
self.name = None
|
||
self.lig_file = None
|
||
else:
|
||
mol = Chem.MolFromMolFile(mol_file, removeHs=removeHs, sanitize=sanitize)
|
||
if mol is None:
|
||
mol = Chem.MolFromMolFile(mol_file, removeHs=removeHs, sanitize=False)
|
||
mol.UpdatePropertyCache(strict=False)
|
||
Chem.SanitizeMol(
|
||
mol, ## 如果报错可以尝试:Chem.rdmolops.SanitizeFlags.SANITIZE_FINDRADICALS
|
||
Chem.SanitizeFlags.SANITIZE_FINDRADICALS|\
|
||
Chem.SanitizeFlags.SANITIZE_KEKULIZE|\
|
||
Chem.SanitizeFlags.SANITIZE_SETAROMATICITY| \
|
||
Chem.SanitizeFlags.SANITIZE_SETCONJUGATION| \
|
||
Chem.SanitizeFlags.SANITIZE_SETHYBRIDIZATION| \
|
||
Chem.SanitizeFlags.SANITIZE_SYMMRINGS,
|
||
catchErrors=True
|
||
)
|
||
self.name = mol_file.split('/')[-1].split('.')[0]
|
||
self.lig_file = mol_file
|
||
|
||
Chem.Kekulize(mol)
|
||
self.mol = mol
|
||
#self.self_loop = self_loop
|
||
self.num_atoms = len(self.mol.GetAtoms())
|
||
self.normalized_coords = None
|
||
|
||
def normalize_pos(self, shift_vector, rotate_matrix):
|
||
conformer = self.mol.GetConformer()
|
||
coords = np.array([conformer.GetAtomPosition(a.GetIdx()) for a in self.mol.GetAtoms()])
|
||
coords = (coords - shift_vector)@rotate_matrix
|
||
for ix, pos in enumerate(coords):
|
||
conformer.SetAtomPosition(ix, pos)
|
||
self.normalized_coords = coords
|
||
|
||
def mol_block(self):
|
||
return Chem.MolToMolBlock(self.mol)
|
||
|
||
def to_dict(self):
|
||
fdefName = os.path.join(RDConfig.RDDataDir,'BaseFeatures.fdef')
|
||
factory = ChemicalFeatures.BuildFeatureFactory(fdefName)
|
||
#rdmol = next(iter(Chem.SDMolSupplier(mol_file, removeHs=True)))
|
||
ring_info = is_in_ring(self.mol)
|
||
conformer = self.mol.GetConformer()
|
||
feat_mat = np.zeros([self.mol.GetNumAtoms(), len(ATOM_FAMILIES)], dtype=np.int64)
|
||
for feat in factory.GetFeaturesForMol(self.mol):
|
||
feat_mat[feat.GetAtomIds(), ATOM_FAMILIES_ID[feat.GetFamily()]] = 1
|
||
|
||
element, pos, atom_mass = [], [], []
|
||
for a in self.mol.GetAtoms():
|
||
element.append(a.GetAtomicNum())
|
||
pos.append(conformer.GetAtomPosition(a.GetIdx()))
|
||
atom_mass.append(a.GetMass())
|
||
element = np.array(element, dtype=np.int64)
|
||
pos = np.array(pos, dtype=np.float32)
|
||
atom_mass = np.array(atom_mass, np.float32)
|
||
center_of_mass = (pos * atom_mass.reshape(-1,1)).sum(0)/atom_mass.sum()
|
||
|
||
edge_index, edge_type = [], []
|
||
for b in self.mol.GetBonds():
|
||
row = [b.GetBeginAtomIdx(), b.GetEndAtomIdx()]
|
||
col = [b.GetEndAtomIdx(), b.GetBeginAtomIdx()]
|
||
edge_index.extend([row, col])
|
||
edge_type.extend([BOND_TYPES[b.GetBondType()]] * 2)
|
||
edge_index = np.array(edge_index)
|
||
edge_index_perm = edge_index[:,0].argsort()
|
||
edge_index = edge_index[edge_index_perm].T
|
||
edge_type = np.array(edge_type)[edge_index_perm]
|
||
|
||
return {'element': element,
|
||
'pos': pos,
|
||
'bond_index': edge_index,
|
||
'bond_type': edge_type,
|
||
'center_of_mass': center_of_mass,
|
||
'atom_feature': feat_mat,
|
||
'ring_info': ring_info,
|
||
'filename':self.lig_file
|
||
}
|
||
|
||
@staticmethod
|
||
def empty_dict():
|
||
empty_ligand_dict = {}
|
||
empty_ligand_dict = Dict(empty_ligand_dict)
|
||
empty_ligand_dict.element = np.empty(0, dtype=np.int64)
|
||
empty_ligand_dict.pos = np.empty([0,3], dtype=np.float32)
|
||
empty_ligand_dict.bond_index = np.empty([2,0], dtype=np.int64)
|
||
empty_ligand_dict.bond_type = np.empty(0, dtype=np.int64)
|
||
empty_ligand_dict.center_of_mass = np.empty([0,3], dtype=np.float32)
|
||
empty_ligand_dict.atom_feature = np.empty([0,8], dtype=np.float32)
|
||
empty_ligand_dict.ring_info = {}
|
||
empty_ligand_dict.filename = None
|
||
return empty_ligand_dict
|
||
|
||
def __repr__(self):
|
||
tmp = 'Name={}, NumAtoms={}'
|
||
info = tmp.format(self.name, self.num_atoms)
|
||
return '{}({})'.format(self.__class__.__name__, info) |