mirror of
https://github.com/OdinZhang/Delete.git
synced 2026-06-04 14:24:21 +08:00
181 lines
7.2 KiB
Python
181 lines
7.2 KiB
Python
from rdkit import Chem
|
|
import numpy as np
|
|
class PDBProtein(object):
|
|
|
|
AA_NAME_SYM = {
|
|
'ALA': 'A', 'CYS': 'C', 'ASP': 'D', 'GLU': 'E', 'PHE': 'F', 'GLY': 'G', 'HIS': 'H',
|
|
'ILE': 'I', 'LYS': 'K', 'LEU': 'L', 'MET': 'M', 'ASN': 'N', 'PRO': 'P', 'GLN': 'Q',
|
|
'ARG': 'R', 'SER': 'S', 'THR': 'T', 'VAL': 'V', 'TRP': 'W', 'TYR': 'Y',
|
|
}
|
|
|
|
AA_NAME_NUMBER = {
|
|
k: i for i, (k, _) in enumerate(AA_NAME_SYM.items())
|
|
}
|
|
|
|
BACKBONE_NAMES = ["CA", "C", "N", "O"]
|
|
|
|
def __init__(self, data, mode='auto'):
|
|
super().__init__()
|
|
if (data[-4:].lower() == '.pdb' and mode == 'auto') or mode == 'path':
|
|
with open(data, 'r') as f:
|
|
self.block = f.read()
|
|
else:
|
|
self.block = data
|
|
|
|
self.ptable = Chem.GetPeriodicTable()
|
|
|
|
# Molecule properties
|
|
self.title = None
|
|
# Atom properties
|
|
self.atoms = []
|
|
self.element = []
|
|
self.atomic_weight = []
|
|
self.pos = []
|
|
self.atom_name = []
|
|
self.is_backbone = []
|
|
self.atom_to_aa_type = []
|
|
# Residue properties
|
|
self.residues = []
|
|
self.amino_acid = []
|
|
self.center_of_mass = []
|
|
self.pos_CA = []
|
|
self.pos_C = []
|
|
self.pos_N = []
|
|
self.pos_O = []
|
|
|
|
self._parse()
|
|
|
|
def _enum_formatted_atom_lines(self):
|
|
for line in self.block.splitlines():
|
|
if line[0:6].strip() == 'ATOM':
|
|
element_symb = line[76:78].strip().capitalize()
|
|
if len(element_symb) == 0:
|
|
element_symb = line[13:14]
|
|
yield {
|
|
'line': line,
|
|
'type': 'ATOM',
|
|
'atom_id': int(line[6:11]),
|
|
'atom_name': line[12:16].strip(),
|
|
'res_name': line[17:20].strip(),
|
|
'chain': line[21:22].strip(),
|
|
'res_id': int(line[22:26]),
|
|
'res_insert_id': line[26:27].strip(),
|
|
'x': float(line[30:38]),
|
|
'y': float(line[38:46]),
|
|
'z': float(line[46:54]),
|
|
'occupancy': float(line[54:60]),
|
|
'segment': line[72:76].strip(),
|
|
'element_symb': element_symb,
|
|
'charge': line[78:80].strip(),
|
|
}
|
|
elif line[0:6].strip() == 'HEADER':
|
|
yield {
|
|
'type': 'HEADER',
|
|
'value': line[10:].strip()
|
|
}
|
|
elif line[0:6].strip() == 'ENDMDL':
|
|
break # Some PDBs have more than 1 model.
|
|
|
|
def _parse(self):
|
|
# Process atoms
|
|
residues_tmp = {}
|
|
for atom in self._enum_formatted_atom_lines():
|
|
if atom['type'] == 'HEADER':
|
|
self.title = atom['value'].lower()
|
|
continue
|
|
self.atoms.append(atom)
|
|
atomic_number = self.ptable.GetAtomicNumber(atom['element_symb'])
|
|
next_ptr = len(self.element)
|
|
self.element.append(atomic_number)
|
|
self.atomic_weight.append(self.ptable.GetAtomicWeight(atomic_number))
|
|
self.pos.append(np.array([atom['x'], atom['y'], atom['z']], dtype=np.float32))
|
|
self.atom_name.append(atom['atom_name'])
|
|
self.is_backbone.append(atom['atom_name'] in self.BACKBONE_NAMES)
|
|
self.atom_to_aa_type.append(self.AA_NAME_NUMBER[atom['res_name']])
|
|
|
|
chain_res_id = '%s_%s_%d_%s' % (atom['chain'], atom['segment'], atom['res_id'], atom['res_insert_id'])
|
|
if chain_res_id not in residues_tmp:
|
|
residues_tmp[chain_res_id] = {
|
|
'name': atom['res_name'],
|
|
'atoms': [next_ptr],
|
|
'chain': atom['chain'],
|
|
'segment': atom['segment'],
|
|
}
|
|
else:
|
|
assert residues_tmp[chain_res_id]['name'] == atom['res_name']
|
|
assert residues_tmp[chain_res_id]['chain'] == atom['chain']
|
|
residues_tmp[chain_res_id]['atoms'].append(next_ptr)
|
|
|
|
# Process residues
|
|
self.residues = [r for _, r in residues_tmp.items()]
|
|
for residue in self.residues:
|
|
sum_pos = np.zeros([3], dtype=np.float32)
|
|
sum_mass = 0.0
|
|
for atom_idx in residue['atoms']:
|
|
sum_pos += self.pos[atom_idx] * self.atomic_weight[atom_idx]
|
|
sum_mass += self.atomic_weight[atom_idx]
|
|
if self.atom_name[atom_idx] in self.BACKBONE_NAMES:
|
|
residue['pos_%s' % self.atom_name[atom_idx]] = self.pos[atom_idx]
|
|
residue['center_of_mass'] = sum_pos / sum_mass
|
|
|
|
# Process backbone atoms of residues
|
|
for residue in self.residues:
|
|
self.amino_acid.append(self.AA_NAME_NUMBER[residue['name']])
|
|
self.center_of_mass.append(residue['center_of_mass'])
|
|
for name in self.BACKBONE_NAMES:
|
|
pos_key = 'pos_%s' % name # pos_CA, pos_C, pos_N, pos_O
|
|
if pos_key in residue:
|
|
getattr(self, pos_key).append(residue[pos_key])
|
|
else:
|
|
getattr(self, pos_key).append(residue['center_of_mass'])
|
|
|
|
def to_dict_atom(self):
|
|
return {
|
|
'element': np.array(self.element, dtype=np.long),
|
|
'molecule_name': self.title,
|
|
'pos': np.array(self.pos, dtype=np.float32),
|
|
'is_backbone': np.array(self.is_backbone, dtype=np.bool),
|
|
'atom_name': self.atom_name,
|
|
'atom_to_aa_type': np.array(self.atom_to_aa_type, dtype=np.long)
|
|
}
|
|
|
|
def to_dict_residue(self):
|
|
return {
|
|
'amino_acid': np.array(self.amino_acid, dtype=np.long),
|
|
'center_of_mass': np.array(self.center_of_mass, dtype=np.float32),
|
|
'pos_CA': np.array(self.pos_CA, dtype=np.float32),
|
|
'pos_C': np.array(self.pos_C, dtype=np.float32),
|
|
'pos_N': np.array(self.pos_N, dtype=np.float32),
|
|
'pos_O': np.array(self.pos_O, dtype=np.float32),
|
|
}
|
|
|
|
def query_residues_radius(self, center, radius, criterion='center_of_mass'):
|
|
center = np.array(center).reshape(3)
|
|
selected = []
|
|
for residue in self.residues:
|
|
distance = np.linalg.norm(residue[criterion] - center, ord=2)
|
|
#print(residue[criterion], distance)
|
|
if distance < radius:
|
|
selected.append(residue)
|
|
return selected
|
|
|
|
def query_residues_ligand(self, ligand, radius, criterion='center_of_mass'):
|
|
selected = []
|
|
sel_idx = set()
|
|
# The time-complexity is O(mn).
|
|
for center in ligand['pos']:
|
|
for i, residue in enumerate(self.residues):
|
|
distance = np.linalg.norm(residue[criterion] - center, ord=2)
|
|
if distance < radius and i not in sel_idx:
|
|
selected.append(residue)
|
|
sel_idx.add(i)
|
|
return selected
|
|
|
|
def residues_to_pdb_block(self, residues, name='POCKET'):
|
|
block = "HEADER %s\n" % name
|
|
block += "COMPND %s\n" % name
|
|
for residue in residues:
|
|
for atom_idx in residue['atoms']:
|
|
block += self.atoms[atom_idx]['line'] + "\n"
|
|
block += "END\n"
|
|
return block |