From d5dab3c87f53a201fe83adc808c8e6424fa538d7 Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Sat, 13 Jun 2020 15:16:56 -0700 Subject: [PATCH 01/31] Remove unneeded config file. --- pdb2pqr/build_config.py | 57 ----------------------------------------- 1 file changed, 57 deletions(-) delete mode 100644 pdb2pqr/build_config.py diff --git a/pdb2pqr/build_config.py b/pdb2pqr/build_config.py deleted file mode 100644 index 5565fd7ba..000000000 --- a/pdb2pqr/build_config.py +++ /dev/null @@ -1,57 +0,0 @@ -#Build Configuration file for pdb2pqr -#While you can specify these on the command line with var=value -# this is the recommended way to setup a build. - -#Uncomment the values you would like to change and set new values. - - -#Installation PREFIX -#Sets the install location of pdb2pqr. -#This defaults to ~/pdb2pqr - -#PREFIX="~/pdb2pqr" - -#APBS binary -#Change this to specify the location of the APBS binary if installed. -#This is used for the web interface to pdb2pqr. Provide an absolute path. Relative paths and ~ usually will not work correctly. - -#APBS="" - -#MAX_ATOMS -#Sets the maximum number of atoms in a protein for non-Opal job submission. -#Only affects web tools. Default is 10000. - -#MAX_ATOMS=10000 - - -#BUILD_PDB2PKA -#Set to False to skip building ligand and pdb2pka support. Requires numpy. -# Defaults to False - -#BUILD_PDB2PKA=True - -#DEBUG -#Set to True to build compiled extentions with debug headers. -#Defaults to False - -#DEBUG=True - -#CXXFLAGS -#Set to add extra CXX flags to the build. -#Defaults to "" - -#EXTRA_CXXFLAGS="-fPIC" - -#EXTRA_LINKFLAGS -#Set to add extra CXX flags to the build. -#Defaults to "" - -#EXTRA_LINKFLAGS="" - - -#REBUILD_SWIG -#Set to True to rebuild the swig bindings. -# Requires swig on the the user path. -# Defaults to False - -#REBUILD_SWIG=True From 21d31540564d2a70c6bf2e82cca16394b3f2c530 Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Sat, 13 Jun 2020 15:56:26 -0700 Subject: [PATCH 02/31] Move ligand-specific code to separate submodule. Move functions from pdb.py to mol2.py. --- pdb2pqr/pdb2pqr/ligand/__init__.py | 4 + pdb2pqr/pdb2pqr/ligand/mol2.py | 152 +++++++++++++++++++++++++++++ pdb2pqr/pdb2pqr/pdb.py | 146 --------------------------- pdb2pqr/pdb2pqr/run.py | 77 +-------------- pdb2pqr/tests/short_basic_test.py | 4 + 5 files changed, 163 insertions(+), 220 deletions(-) create mode 100644 pdb2pqr/pdb2pqr/ligand/__init__.py create mode 100644 pdb2pqr/pdb2pqr/ligand/mol2.py diff --git a/pdb2pqr/pdb2pqr/ligand/__init__.py b/pdb2pqr/pdb2pqr/ligand/__init__.py new file mode 100644 index 000000000..b89c4c496 --- /dev/null +++ b/pdb2pqr/pdb2pqr/ligand/__init__.py @@ -0,0 +1,4 @@ +"""Ligand support functions + +Jens Erik Nielsen, University College Dublin 2004 +""" diff --git a/pdb2pqr/pdb2pqr/ligand/mol2.py b/pdb2pqr/pdb2pqr/ligand/mol2.py new file mode 100644 index 000000000..1aafbb2cf --- /dev/null +++ b/pdb2pqr/pdb2pqr/ligand/mol2.py @@ -0,0 +1,152 @@ +"""Support molecules in Tripos MOL2 format. + + For further information look at (web page exists: 25 August 2005): + http://www.tripos.com/index.php?family=modules,SimplePage,,,&page=sup_mol2&s=0 +""" +import logging +import copy +from ..pdb import HETATM + + +_LOGGER = logging.getLogger(__name__) + + +class Mol2Bond(object): + """Bonding of MOL2 files""" + def __init__(self, frm, to, type_, bond_id=0): + self.bond_to_self = to # bond to this atom + self.bond_from_self = frm # bond from atom + self.type = type_ # 1=single, 2=double, ar=aromatic + self.bond_id = bond_id # bond_id + + +class Mol2Molecule(object): + """Tripos MOL2 molecule""" + def __init__(self): + self.l_atoms = [] # all atoms of class + self.l_bonds = [] # all bonds of class + self.l_pdb_atoms = [] # PDB-like list of all atoms + self.serial = None + self.name = None + self.res_name = None + self.res_seq = None + self.x = None + self.y = None + self.z = None + + def read(self, file_): + """Routines for reading MOL2 file""" + data = file_.read() + data = data.replace("\r\n", "\n") + data = data.replace("\r", "\n") + + # ATOM section + start = data.find("@ATOM") + stop = data.find("@BOND") + + # Do some error checking + if start == -1: + raise ValueError("Unable to find '@ATOM' in MOL2 file!") + elif stop == -1: + raise ValueError("Unable to find '@BOND' in MOL2 file!") + + atoms = data[start+14:stop-2].split("\n") + # BOND section + start = data.find("@BOND") + stop = data.find("@SUBSTRUCTURE") + + # More error checking + if stop == -1: + raise ValueError("Unable to find '@SUBSTRUCTURE' in MOL2 file!") + + bonds = data[start+14:stop-1].split("\n") + self.parse_atoms(atoms) + self.parse_bonds(bonds) + self.create_bonded_atoms() + + def parse_atoms(self, atom_list): + """For parsing @ATOM""" + for atom_line in atom_list: + separated_atom_line = atom_line.split() + + # Special handling for blank lines + if len(separated_atom_line) == 0: + continue + + # Error checking + if len(separated_atom_line) < 8: + raise ValueError("Bad atom entry in MOL2 file: %s" % atom_line) + + fake_record = "HETATM" + fake_chain = " L" + + try: + mol2pdb = '%s%5i%5s%4s%2s%4i %8.3f%8.3f%8.3f' % \ + (fake_record, int(separated_atom_line[0]), + separated_atom_line[1], separated_atom_line[7][:4], + fake_chain, int(separated_atom_line[6]), + float(separated_atom_line[2]), float(separated_atom_line[3]), + float(separated_atom_line[4])) + + except ValueError: + raise ValueError("Bad atom entry in MOL2 file: %s" % atom_line) + + this_atom = HETATM(mol2pdb, separated_atom_line[5], [], []) + if len(separated_atom_line) > 8: + charge = separated_atom_line[8] + try: + this_atom.mol2charge = float(charge) + except TypeError: + _LOGGER.warning('Warning. Non-float charge (%s) in mol2 file.', charge) + this_atom.mol2charge = None + self.l_pdb_atoms.append(mol2pdb) + self.l_atoms.append(this_atom) + + def parse_bonds(self, bond_list): + """For parsing @BOND""" + for bond_line in bond_list: + separated_bond_line = bond_line.split() + # Special handling for blank lines + if len(separated_bond_line) == 0: + continue + if len(separated_bond_line) < 4: + raise ValueError("Bad bond entry in MOL2 file: %s" % bond_line) + try: + this_bond = Mol2Bond( + int(separated_bond_line[1]), # bond frm + int(separated_bond_line[2]), # bond to + separated_bond_line[3], # bond type + int(separated_bond_line[0]) # bond id + ) + except ValueError: + raise ValueError("Bad bond entry in MOL2 file: %s" % bond_line) + self.l_bonds.append(this_bond) + + def create_bonded_atoms(self): + """Creates for each atom a list of the bonded Atoms + + This becomes one attribute of MOL2ATOM! + """ + for bond in self.l_bonds: + self.l_atoms[bond.bond_from_self-1].l_bonded_atoms\ + .append(self.l_atoms[bond.bond_to_self-1]) + + self.l_atoms[bond.bond_to_self-1].l_bonded_atoms\ + .append(self.l_atoms[bond.bond_from_self-1]) + + atbond = copy.deepcopy(bond) + atbond.other_atom = self.l_atoms[bond.bond_to_self-1] + self.l_atoms[bond.bond_from_self-1].l_bonds.append(atbond) + + atbond = copy.deepcopy(bond) + atbond.other_atom = self.l_atoms[bond.bond_from_self-1] + self.l_atoms[bond.bond_to_self-1].l_bonds.append(atbond) + + def create_pdb_line_from_mol2(self): + """Generate PDB line from MOL2.""" + raise NotImplementedError("TODO - FIX THIS CODE") + # fake_type = "HETATM" + # rstr = "%s%5i%5s%4s%2s%5s %8.3f%8.3f%8.3f\n" % (fake_type, self.serial, + # self.name, self.res_name, ' L', + # self.res_seq, self.x, self.y, self.z) + # return rstr diff --git a/pdb2pqr/pdb2pqr/pdb.py b/pdb2pqr/pdb2pqr/pdb.py index 1d9e49df6..d0bcb5095 100644 --- a/pdb2pqr/pdb2pqr/pdb.py +++ b/pdb2pqr/pdb2pqr/pdb.py @@ -7,7 +7,6 @@ the classes is taken directly from the above PDB Format Description. Authors: Todd Dolinsky, Yong Huang """ -import copy import logging @@ -448,151 +447,6 @@ class HETATM(BaseRecord): self.charge = "" -class MOL2BOND(object): - """Bonding of MOL2 files""" - def __init__(self, frm, to, type_, bond_id=0): - self.bond_to_self = to # bond to this atom - self.bond_from_self = frm # bond from atom - self.type = type_ # 1=single, 2=double, ar=aromatic - self.bond_id = bond_id # bond_id - - -class Mol2Molecule(object): - """Tripos MOL2 molecule - - For further information look at (web page exists: 25 August 2005): - http://www.tripos.com/index.php?family=modules,SimplePage,,,&page=sup_mol2&s=0 - """ - def __init__(self): - self.l_atoms = [] # all atoms of class - self.l_bonds = [] # all bonds of class - self.l_pdb_atoms = [] # PDB-like list of all atoms - self.serial = None - self.name = None - self.res_name = None - self.res_seq = None - self.x = None - self.y = None - self.z = None - - def read(self, file_): - """Routines for reading MOL2 file""" - data = file_.read() - data = data.replace("\r\n", "\n") - data = data.replace("\r", "\n") - - # ATOM section - start = data.find("@ATOM") - stop = data.find("@BOND") - - # Do some error checking - if start == -1: - raise ValueError("Unable to find '@ATOM' in MOL2 file!") - elif stop == -1: - raise ValueError("Unable to find '@BOND' in MOL2 file!") - - atoms = data[start+14:stop-2].split("\n") - # BOND section - start = data.find("@BOND") - stop = data.find("@SUBSTRUCTURE") - - # More error checking - if stop == -1: - raise ValueError("Unable to find '@SUBSTRUCTURE' in MOL2 file!") - - bonds = data[start+14:stop-1].split("\n") - self.parse_atoms(atoms) - self.parse_bonds(bonds) - self.createl_bonded_atoms() - #self.create_pdb_line_from_mol2(atoms) - - def parse_atoms(self, atom_list): - """For parsing @ATOM""" - for atom_line in atom_list: - separated_atom_line = atom_line.split() - - # Special handling for blank lines - if len(separated_atom_line) == 0: - continue - - # Error checking - if len(separated_atom_line) < 8: - raise ValueError("Bad atom entry in MOL2 file: %s" % atom_line) - - fake_record = "HETATM" - fake_chain = " L" - - try: - mol2pdb = '%s%5i%5s%4s%2s%4i %8.3f%8.3f%8.3f' % \ - (fake_record, int(separated_atom_line[0]), - separated_atom_line[1], separated_atom_line[7][:4], - fake_chain, int(separated_atom_line[6]), - float(separated_atom_line[2]), float(separated_atom_line[3]), - float(separated_atom_line[4])) - - except ValueError: - raise ValueError("Bad atom entry in MOL2 file: %s" % atom_line) - - this_atom = HETATM(mol2pdb, separated_atom_line[5], [], []) - if len(separated_atom_line) > 8: - charge = separated_atom_line[8] - try: - this_atom.mol2charge = float(charge) - except TypeError: - _LOGGER.warning('Warning. Non-float charge (%s) in mol2 file.', charge) - this_atom.mol2charge = None - self.l_pdb_atoms.append(mol2pdb) - self.l_atoms.append(this_atom) - - def parse_bonds(self, bond_list): - """For parsing @BOND""" - for bond_line in bond_list: - separated_bond_line = bond_line.split() - # Special handling for blank lines - if len(separated_bond_line) == 0: - continue - if len(separated_bond_line) < 4: - raise ValueError("Bad bond entry in MOL2 file: %s" % bond_line) - try: - this_bond = MOL2BOND( - int(separated_bond_line[1]), # bond frm - int(separated_bond_line[2]), # bond to - separated_bond_line[3], # bond type - int(separated_bond_line[0]) # bond id - ) - except ValueError: - raise ValueError("Bad bond entry in MOL2 file: %s" % bond_line) - self.l_bonds.append(this_bond) - - def createl_bonded_atoms(self): - """Creates for each atom a list of the bonded Atoms - - This becomes one attribute of MOL2ATOM! - """ - for bond in self.l_bonds: - self.l_atoms[bond.bond_from_self-1].l_bonded_atoms\ - .append(self.l_atoms[bond.bond_to_self-1]) - - self.l_atoms[bond.bond_to_self-1].l_bonded_atoms\ - .append(self.l_atoms[bond.bond_from_self-1]) - - atbond = copy.deepcopy(bond) - atbond.other_atom = self.l_atoms[bond.bond_to_self-1] - self.l_atoms[bond.bond_from_self-1].l_bonds.append(atbond) - - atbond = copy.deepcopy(bond) - atbond.other_atom = self.l_atoms[bond.bond_from_self-1] - self.l_atoms[bond.bond_to_self-1].l_bonds.append(atbond) - - def create_pdb_line_from_mol2(self): - """Generate PDB line from MOL2.""" - fake_type = "HETATM" - rstr = "%s%5i%5s%4s%2s%5s %8.3f%8.3f%8.3f\n" % (fake_type, self.serial, - self.name, self.res_name, ' L', - self.res_seq, self.x, self.y, self.z) - return rstr - - @register_line_parser class ATOM(BaseRecord): """ ATOM class diff --git a/pdb2pqr/pdb2pqr/run.py b/pdb2pqr/pdb2pqr/run.py index f7dd0e09b..808b118e9 100644 --- a/pdb2pqr/pdb2pqr/run.py +++ b/pdb2pqr/pdb2pqr/run.py @@ -14,56 +14,6 @@ from . import input_output as io _LOGGER = logging.getLogger(__name__) -def run_propka_31(protein, pka_options): - """Run PROPKA 3.1 on the current protein, setting protonation states to - the correct values. pH is set in pka_options - - Parameters - pka_options: Options for propKa 3.1, including pH - - Returns - pka_molecule: pKa's internal molecule object (including pKa's, etc) - not_found: dict of residues found in pka_molecule but not in PDB2PQR (with pKa) - """ - # See https://github.com/jensengroup/propka-3.1/blob/master/scripts/propka31.py - - ph = pka_options.ph - _LOGGER.info("Running propka 3.1 at pH %.2f... ", ph) - - # Initialize some variables - pkadic = {} - - # Reorder the atoms in each residue to start with N - TONI is this necessary? - for residue in protein.residues: - residue.reorder() - - # TONI Make a string with all non-hydrogen atoms. Previously it was removing the "element" - # column and hydrogens. This does not seem to be necessary in propKa 3.1 . - with tempfile.NamedTemporaryFile(mode="w+", suffix=".pdb") as h_free_file: - for atom in protein.atoms: - if not atom.is_hydrogen: - atomtxt = atom.get_pdb_string() - h_free_file.write(atomtxt + '\n') - - # Run PropKa 3.1 ------------- - # Creating protein object. Annoyingly, at this stage propka generates a - # *.propka_input file in PWD and does not delete it (irrespective of the original - # .pdb location) - pka_molecule = propka.molecular_container.Molecular_container(h_free_file.name, - pka_options) - - # calculating pKa values for ionizable residues - - pka_molecule.calculate_pka() - - ## pka_molecule.write_pka() - for grp in pka_molecule.conformations['AVR'].groups: - key = str.strip('%s %s %s' % (grp.residue_type, grp.atom.resNumb, grp.atom.chain_id)) - pkadic[key] = grp.pka_value - - protein.pka_protein = pka_molecule - return pkadic - - def run_pdb2pka(ph, force_field, pdb_list, ligand, pdb2pka_params): """Run PDB2PKA""" # TODO - we are not ready to deal with PDB2PKA yet @@ -104,31 +54,10 @@ def run_pdb2pka(ph, force_field, pdb_list, ligand, pdb2pka_params): # protein.apply_pka_values(ff, ph, residue_ph) # _LOGGER.debug('Finished running PDB2PKA.') - -def run_propka(protein, ph, force_field, options, version=30): - """Run PROPKA on the current protein, setting protonation states to the correct values - - Parameters - ph: The desired pH of the system - force_field: The forcefield name to be used - outname: The name of the PQR outfile - options: Options to propka - version: may be 30 or 31 (uses external propka 3.1) - """ - _LOGGER.info("Running PROPKA v%d and applying at pH %.2f... ", version, ph) - pkadic = self.run_propka_31(options) - - if len(pkadic) == 0: - raise ValueError("PROPKA returned empty results!") - - # Now apply each pka to the appropriate residue - protein.apply_pka_values(force_field, ph, pkadic) - _LOGGER.debug("Done running PROPKA") - - -# def run_pdb2pqr(pdblist, my_protein, my_definition, options, is_cif): +def run_pdb2pqr(pdblist, my_protein, my_definition, options, is_cif): + """Run the PDB2PQR Suite""" + raise DeprecationWarning("TODO - This function is deprecated") # """Run the PDB2PQR Suite - # Args: # pdblist: The list of objects that was read from the PDB file given as # input (list) diff --git a/pdb2pqr/tests/short_basic_test.py b/pdb2pqr/tests/short_basic_test.py index 8bf5b245b..3b5f3a9c0 100644 --- a/pdb2pqr/tests/short_basic_test.py +++ b/pdb2pqr/tests/short_basic_test.py @@ -32,6 +32,10 @@ def test_propka_apo(input_pdb, tmp_path): tmp_path=tmp_path) +def test_ligand_import(): + """Testing basic aspects of code breaking.""" + from pdb2pqr.ligand import mol2 + # @pytest.mark.parametrize("input_pdb", ["1K1I", "1FAS"], ids=str) # def test_propka_apo(input_pdb, tmp_path): # """PROPKA titration of proteins without ligands.""" From 57bd5ef421f3b4095c0d4a5e8e9ddc6b44e9adbe Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Sat, 13 Jun 2020 17:07:26 -0700 Subject: [PATCH 03/31] Re-add parsing of MOL2 ligand files. Partial progress towards #592. --- pdb2pqr/pdb2pqr/ligand/mol2.py | 254 ++++++++++++++++-------------- pdb2pqr/tests/data/adp.mol2 | 106 +++++++++++++ pdb2pqr/tests/short_basic_test.py | 11 +- 3 files changed, 254 insertions(+), 117 deletions(-) create mode 100644 pdb2pqr/tests/data/adp.mol2 diff --git a/pdb2pqr/pdb2pqr/ligand/mol2.py b/pdb2pqr/pdb2pqr/ligand/mol2.py index 1aafbb2cf..6e5bed588 100644 --- a/pdb2pqr/pdb2pqr/ligand/mol2.py +++ b/pdb2pqr/pdb2pqr/ligand/mol2.py @@ -4,28 +4,66 @@ http://www.tripos.com/index.php?family=modules,SimplePage,,,&page=sup_mol2&s=0 """ import logging -import copy -from ..pdb import HETATM _LOGGER = logging.getLogger(__name__) -class Mol2Bond(object): - """Bonding of MOL2 files""" - def __init__(self, frm, to, type_, bond_id=0): - self.bond_to_self = to # bond to this atom - self.bond_from_self = frm # bond from atom - self.type = type_ # 1=single, 2=double, ar=aromatic - self.bond_id = bond_id # bond_id +class Mol2Bond: + """MOL2 molecule bonds.""" + def __init__(self, bond_from, bond_to, bond_type, bond_id=0): + """Initialize bond. + + Args: + bond_from: bond from this atom + bond_to: bond to this atom + bond_type: type of bond: 1 (single), 2 (double), or ar (aromatic) + bond_id: integer ID of bond + """ + self.bond_to_self = bond_to + self.bond_from_self = bond_from + self.type = bond_type + self.bond_id = bond_id + + +class Mol2Atom: + """MOL2 molecule atoms.""" + def __init__(self): + self.serial = None + self.name = None + self.alt_loc = None + self.res_name = None + self.chain_id = None + self.res_seq = None + self.x = None + self.y = None + self.z = None + self.sybyl_type = None + self.radius = None + self.is_c_term = False + self.is_n_term = False + self.mol2charge = None + self.occupancy = 0.00 + self.temp_factor = 0.00 + self.seg_id = "" + self.element = "" + self.charge = "" + self.bonded_atoms = [] + + def __str__(self): + """Generate PDB line from MOL2.""" + pdb_fmt = ( + "HETATM{a.serial:5d}{a.name:>5s}{a.res_name:>4s} L" + "{self.res_seq:>5s} {a.x:8.3f}{a.y:8.3f}{a.z:8.3f}" + ) + return pdb_fmt.format(self) class Mol2Molecule(object): """Tripos MOL2 molecule""" def __init__(self): - self.l_atoms = [] # all atoms of class - self.l_bonds = [] # all bonds of class - self.l_pdb_atoms = [] # PDB-like list of all atoms + self.atoms = [] + self.bonds = [] self.serial = None self.name = None self.res_name = None @@ -34,119 +72,105 @@ class Mol2Molecule(object): self.y = None self.z = None - def read(self, file_): - """Routines for reading MOL2 file""" - data = file_.read() - data = data.replace("\r\n", "\n") - data = data.replace("\r", "\n") + def read(self, mol2_file): + """Routines for reading MOL2 file. - # ATOM section - start = data.find("@ATOM") - stop = data.find("@BOND") - - # Do some error checking - if start == -1: - raise ValueError("Unable to find '@ATOM' in MOL2 file!") - elif stop == -1: - raise ValueError("Unable to find '@BOND' in MOL2 file!") - - atoms = data[start+14:stop-2].split("\n") - # BOND section - start = data.find("@BOND") - stop = data.find("@SUBSTRUCTURE") - - # More error checking - if stop == -1: - raise ValueError("Unable to find '@SUBSTRUCTURE' in MOL2 file!") - - bonds = data[start+14:stop-1].split("\n") - self.parse_atoms(atoms) - self.parse_bonds(bonds) + Args: + mol2_file: file-like object with MOL2 data. + """ + mol2_file = self.parse_atoms(mol2_file) + mol2_file = self.parse_bonds(mol2_file) self.create_bonded_atoms() - def parse_atoms(self, atom_list): - """For parsing @ATOM""" - for atom_line in atom_list: - separated_atom_line = atom_line.split() + def parse_atoms(self, mol2_file): + """Parse @ATOM section of file. - # Special handling for blank lines - if len(separated_atom_line) == 0: + Args: + mol2_file: file-like object with MOL2 data. + Returns: + file object advanced to bonds section + Raises: + ValueError for bad MOL2 ATOM lines + TypeError for bad charge entries + """ + # Skip material before atoms section + for line in mol2_file: + if "@ATOM" in line: + break + _LOGGER.debug("Skipping: %s", line.strip()) + + for line in mol2_file: + line = line.strip() + if not line: continue - - # Error checking - if len(separated_atom_line) < 8: - raise ValueError("Bad atom entry in MOL2 file: %s" % atom_line) - - fake_record = "HETATM" - fake_chain = " L" - + if "@BOND" in line: + break + words = line.split() + if len(words) < 8: + err = "Bad entry in MOL2 file: %s" % line + raise ValueError(err) + atom = Mol2Atom() + atom.name = words[1] + atom.sybyl_type = words[5] + atom.chain_id = "L" try: - mol2pdb = '%s%5i%5s%4s%2s%4i %8.3f%8.3f%8.3f' % \ - (fake_record, int(separated_atom_line[0]), - separated_atom_line[1], separated_atom_line[7][:4], - fake_chain, int(separated_atom_line[6]), - float(separated_atom_line[2]), float(separated_atom_line[3]), - float(separated_atom_line[4])) - - except ValueError: - raise ValueError("Bad atom entry in MOL2 file: %s" % atom_line) - - this_atom = HETATM(mol2pdb, separated_atom_line[5], [], []) - if len(separated_atom_line) > 8: - charge = separated_atom_line[8] + atom.serial = int(words[0]) + atom.res_name = words[7][:4] + atom.res_seq = int(words[6]) + atom.x = float(words[2]) + atom.y = float(words[3]) + atom.z = float(words[4]) + except ValueError as exc: + err = "Error (%s) parsing atom line: %s" % (exc, line) + raise ValueError(err) + if len(line) > 8: try: - this_atom.mol2charge = float(charge) + atom.mol2charge = float(words[8]) except TypeError: - _LOGGER.warning('Warning. Non-float charge (%s) in mol2 file.', charge) - this_atom.mol2charge = None - self.l_pdb_atoms.append(mol2pdb) - self.l_atoms.append(this_atom) + err = "Unable to parse %s as charge in atom line: %s" % ( + words[8], line) + _LOGGER.warning(err) + self.atoms.append(atom) + return mol2_file - def parse_bonds(self, bond_list): - """For parsing @BOND""" - for bond_line in bond_list: - separated_bond_line = bond_line.split() - # Special handling for blank lines - if len(separated_bond_line) == 0: + def parse_bonds(self, mol2_file): + """Parse @BOND section of file. + + Args: + mol2_file: file-like object with MOL2 data. + Returns: + file object advanced to bonds section + Raises: + ValueError for problems parsing bond information + """ + for line in mol2_file: + line = line.strip() + if not line: continue - if len(separated_bond_line) < 4: - raise ValueError("Bad bond entry in MOL2 file: %s" % bond_line) + if "@SUBSTRUCTURE" in line: + break + words = line.split() + if len(words) < 4: + err = "Bond line too short: %s" % line + raise ValueError(err) + bond_type = words[3] try: - this_bond = Mol2Bond( - int(separated_bond_line[1]), # bond frm - int(separated_bond_line[2]), # bond to - separated_bond_line[3], # bond type - int(separated_bond_line[0]) # bond id - ) - except ValueError: - raise ValueError("Bad bond entry in MOL2 file: %s" % bond_line) - self.l_bonds.append(this_bond) + bond_from = int(words[1]) + bond_to = int(words[2]) + bond_id = int(words[0]) + bond = Mol2Bond( + bond_from=bond_from, bond_to=bond_to, bond_type=bond_type, + bond_id=bond_id) + except ValueError as exc: + err = "Got error (%s) when parsing bond line: %s" % (exc, line) + raise ValueError(err) + self.bonds.append(bond) + return mol2_file def create_bonded_atoms(self): - """Creates for each atom a list of the bonded Atoms - - This becomes one attribute of MOL2ATOM! - """ - for bond in self.l_bonds: - self.l_atoms[bond.bond_from_self-1].l_bonded_atoms\ - .append(self.l_atoms[bond.bond_to_self-1]) - - self.l_atoms[bond.bond_to_self-1].l_bonded_atoms\ - .append(self.l_atoms[bond.bond_from_self-1]) - - atbond = copy.deepcopy(bond) - atbond.other_atom = self.l_atoms[bond.bond_to_self-1] - self.l_atoms[bond.bond_from_self-1].l_bonds.append(atbond) - - atbond = copy.deepcopy(bond) - atbond.other_atom = self.l_atoms[bond.bond_from_self-1] - self.l_atoms[bond.bond_to_self-1].l_bonds.append(atbond) - - def create_pdb_line_from_mol2(self): - """Generate PDB line from MOL2.""" - raise NotImplementedError("TODO - FIX THIS CODE") - # fake_type = "HETATM" - # rstr = "%s%5i%5s%4s%2s%5s %8.3f%8.3f%8.3f\n" % (fake_type, self.serial, - # self.name, self.res_name, ' L', - # self.res_seq, self.x, self.y, self.z) - # return rstr + """Create a list of bonded atoms for each atom.""" + for bond in self.bonds: + from_atom = self.atoms[bond.bond_from_self-1] + to_atom = self.atoms[bond.bond_to_self-1] + from_atom.bonded_atoms.append(to_atom) + to_atom.bonded_atoms.append(from_atom) diff --git a/pdb2pqr/tests/data/adp.mol2 b/pdb2pqr/tests/data/adp.mol2 new file mode 100644 index 000000000..57ca9a75a --- /dev/null +++ b/pdb2pqr/tests/data/adp.mol2 @@ -0,0 +1,106 @@ +# +# +# This file was generated by PRODRG version AA180301.0717 +# PRODRG written/copyrighted by Daan van Aalten +# and Alexander Schuettelkopf +# +# Questions/comments to dava@davapc1.bioch.dundee.ac.uk +# +# When using this software in a publication, cite: +# A. W. Schuettelkopf and D. M. F. van Aalten (2004). +# PRODRG - a tool for high-throughput crystallography +# of protein-ligand complexes. +# Acta Crystallogr. D60, 1355--1363. +# +# +@MOLECULE +ADP + 39 41 1 +SMALL +USER_CHARGES + +PRODRG MOLECULE +@ATOM + 1 O2B 94.270 83.620 105.750 O.3 1 ADP -0.314 + 2 PB 93.660 85.080 106.010 P.3 1 ADP 0.859 + 3 O3B 92.250 84.840 106.740 O.3 1 ADP -0.314 + 4 O1B 94.550 85.960 106.800 O.2 1 ADP -0.314 + 5 O3A 93.250 85.620 104.590 O.3 1 ADP 0.000 + 6 PA 94.420 86.420 103.910 P.3 1 ADP 1.375 + 7 O2A 95.710 85.470 103.820 O.3 1 ADP -0.377 + 8 O1A 94.690 87.680 104.630 O.2 1 ADP -0.377 + 9 O5' 93.920 86.620 102.390 O.3 1 ADP -0.377 + 10 C5' 93.140 87.760 102.010 C.3 1 ADP 0.000 + 11 H5' 93.033 88.425 102.867 H 1 ADP 0.000 + 12 H5S 92.191 87.398 101.616 H 1 ADP 0.000 + 13 C4' 93.830 88.510 100.870 C.3 1 ADP 0.172 + 14 H4' 94.078 87.794 100.086 H 1 ADP 0.000 + 15 O4' 93.050 89.580 100.320 O.3 1 ADP -0.377 + 16 C3' 95.110 89.220 101.300 C.3 1 ADP 0.143 + 17 H3' 94.971 89.688 102.275 H 1 ADP 0.000 + 18 O3' 96.210 88.280 101.320 O.3 1 ADP -0.574 + 19 H8L 97.040 88.750 101.610 H 1 ADP 0.380 + 20 C2' 95.260 90.290 100.220 C.3 1 ADP 0.143 + 21 H2' 95.270 91.281 100.674 H 1 ADP 0.000 + 22 O2' 96.410 90.090 99.370 O.3 1 ADP -0.574 + 23 H8M 96.410 90.780 98.640 H 1 ADP 0.380 + 24 C1' 94.000 90.110 99.380 C.3 1 ADP 0.172 + 25 H1' 94.220 89.323 98.658 H 1 ADP 0.000 + 26 N9 93.520 91.290 98.600 N.ar 1 ADP 0.000 + 27 C8 94.000 92.530 98.580 C.ar 1 ADP 0.000 + 28 H8 94.840 92.900 99.170 H 1 ADP 0.000 + 29 N7 93.270 93.250 97.740 N.ar 1 ADP 0.000 + 30 C5 92.340 92.460 97.220 C.ar 1 ADP 0.000 + 31 C4 92.490 91.240 97.750 C.ar 1 ADP 0.000 + 32 N3 91.660 90.220 97.410 N.ar 1 ADP 0.000 + 33 C2 90.610 90.410 96.480 C.ar 1 ADP 0.000 + 34 H2 89.940 89.590 96.220 H 1 ADP 0.000 + 35 N1 90.470 91.620 95.950 N.ar 1 ADP -0.293 + 36 H1 89.730 91.800 95.290 H 1 ADP 0.267 + 37 C6 91.350 92.670 96.330 C.ar 1 ADP 0.000 + 38 N6 91.230 93.890 95.800 N.2 1 ADP 0.000 + 39 H6 90.530 94.130 95.130 H 1 ADP 0.000 +@BOND + 1 2 1 1 + 2 2 3 1 + 3 2 4 2 + 4 2 5 1 + 5 6 5 1 + 6 6 7 1 + 7 6 8 2 + 8 6 9 1 + 9 10 9 1 + 10 10 11 1 + 11 10 12 1 + 12 13 10 1 + 13 13 14 1 + 14 13 15 1 + 15 13 16 1 + 16 24 15 1 + 17 16 17 1 + 18 16 18 1 + 19 16 20 1 + 20 18 19 1 + 21 20 21 1 + 22 20 22 1 + 23 20 24 1 + 24 22 23 1 + 25 24 25 1 + 26 24 26 1 + 27 26 27 ar + 28 26 31 ar + 29 27 28 1 + 30 27 29 ar + 31 30 29 ar + 32 30 31 ar + 33 30 37 ar + 34 31 32 ar + 35 33 32 ar + 36 33 34 1 + 37 33 35 ar + 38 35 36 1 + 39 37 35 ar + 40 37 38 2 + 41 38 39 1 +@SUBSTRUCTURE + 1 ADP 1 diff --git a/pdb2pqr/tests/short_basic_test.py b/pdb2pqr/tests/short_basic_test.py index 3b5f3a9c0..342a94225 100644 --- a/pdb2pqr/tests/short_basic_test.py +++ b/pdb2pqr/tests/short_basic_test.py @@ -2,6 +2,7 @@ import logging from pathlib import Path import pytest +from pdb2pqr.ligand import mol2 import common @@ -32,9 +33,15 @@ def test_propka_apo(input_pdb, tmp_path): tmp_path=tmp_path) -def test_ligand_import(): +@pytest.mark.parametrize("input_mol2", [ + "1HPX-ligand.mol2", "1QBS-ligand.mol2", "1US0-ligand.mol2", "adp.mol2"]) +def test_ligand_read(input_mol2): """Testing basic aspects of code breaking.""" - from pdb2pqr.ligand import mol2 + ligand = mol2.Mol2Molecule() + mol2_path = Path("tests/data") / input_mol2 + with open(mol2_path, "rt") as mol2_file: + ligand.read(mol2_file) + # @pytest.mark.parametrize("input_pdb", ["1K1I", "1FAS"], ids=str) # def test_propka_apo(input_pdb, tmp_path): From c138a2c93d169185909bb9da5a8542445deb44c7 Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Sun, 14 Jun 2020 08:44:26 -0700 Subject: [PATCH 04/31] Assert Python 3.5 or greater. Using math.isclose(). --- pdb2pqr/pdb2pqr/ligand/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pdb2pqr/pdb2pqr/ligand/__init__.py b/pdb2pqr/pdb2pqr/ligand/__init__.py index b89c4c496..f11620634 100644 --- a/pdb2pqr/pdb2pqr/ligand/__init__.py +++ b/pdb2pqr/pdb2pqr/ligand/__init__.py @@ -2,3 +2,5 @@ Jens Erik Nielsen, University College Dublin 2004 """ +import sys +assert sys.version_info >= (3, 5) From a74d15501cb088baf9598391eb0f0929d7c7a6dd Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Sun, 14 Jun 2020 08:45:19 -0700 Subject: [PATCH 05/31] Make mol2.py more useful. * Change sybyl_type to atom_type * Fix __str__ error * Add attributes for PEOE charge assignment --- pdb2pqr/pdb2pqr/ligand/mol2.py | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/pdb2pqr/pdb2pqr/ligand/mol2.py b/pdb2pqr/pdb2pqr/ligand/mol2.py index 6e5bed588..3e042594b 100644 --- a/pdb2pqr/pdb2pqr/ligand/mol2.py +++ b/pdb2pqr/pdb2pqr/ligand/mol2.py @@ -38,28 +38,35 @@ class Mol2Atom: self.x = None self.y = None self.z = None - self.sybyl_type = None + self.atom_type = None self.radius = None self.is_c_term = False self.is_n_term = False self.mol2charge = None self.occupancy = 0.00 self.temp_factor = 0.00 - self.seg_id = "" - self.element = "" - self.charge = "" + self.seg_id = None + self.element = None + self.charge = None + self.formal_charge = None self.bonded_atoms = [] + # Terms for calculating atom electronegativity + self.poly_terms = None + # Atom electronegativity + self.chi = None + # Atom charge change during equilibration + self.delta_charge = None def __str__(self): """Generate PDB line from MOL2.""" pdb_fmt = ( "HETATM{a.serial:5d}{a.name:>5s}{a.res_name:>4s} L" - "{self.res_seq:>5s} {a.x:8.3f}{a.y:8.3f}{a.z:8.3f}" + "{a.res_seq!s:>5s} {a.x:8.3f}{a.y:8.3f}{a.z:8.3f}" ) - return pdb_fmt.format(self) + return pdb_fmt.format(a=self) -class Mol2Molecule(object): +class Mol2Molecule: """Tripos MOL2 molecule""" def __init__(self): self.atoms = [] @@ -111,7 +118,7 @@ class Mol2Molecule(object): raise ValueError(err) atom = Mol2Atom() atom.name = words[1] - atom.sybyl_type = words[5] + atom.atom_type = words[5] atom.chain_id = "L" try: atom.serial = int(words[0]) From ed9c675774130a3fde9a46b8983cf64a3d7f9041 Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Sun, 14 Jun 2020 08:46:26 -0700 Subject: [PATCH 06/31] Add PEOE charge support and test. The test is only for functionality; need regression test. See #608. --- pdb2pqr/tests/short_basic_test.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/pdb2pqr/tests/short_basic_test.py b/pdb2pqr/tests/short_basic_test.py index 342a94225..7493dcc01 100644 --- a/pdb2pqr/tests/short_basic_test.py +++ b/pdb2pqr/tests/short_basic_test.py @@ -1,18 +1,20 @@ """Basic tests to see if the code raises exceptions.""" import logging +import random from pathlib import Path import pytest from pdb2pqr.ligand import mol2 +from pdb2pqr.ligand import peoe import common _LOGGER = logging.getLogger(__name__) -_LOGGER.error("Need functional and regression test coverage for --userff") -_LOGGER.error("Need functional and regression test coverage for --usernames") -_LOGGER.error("Need functional and regression test coverage for --ligand") -_LOGGER.error("Need functional and regression test coverage for --apbs-input") +_LOGGER.warning("Need functional and regression test coverage for --userff") +_LOGGER.warning("Need functional and regression test coverage for --usernames") +_LOGGER.warning("Need functional and regression test coverage for --ligand") +_LOGGER.warning("Need functional and regression test coverage for --apbs-input") @pytest.mark.parametrize("input_pdb", ["1K1I", "1AFS", "1FAS", "5DV8", "5D8V"], ids=str) @@ -35,12 +37,19 @@ def test_propka_apo(input_pdb, tmp_path): @pytest.mark.parametrize("input_mol2", [ "1HPX-ligand.mol2", "1QBS-ligand.mol2", "1US0-ligand.mol2", "adp.mol2"]) -def test_ligand_read(input_mol2): +def test_ligand(input_mol2): """Testing basic aspects of code breaking.""" ligand = mol2.Mol2Molecule() mol2_path = Path("tests/data") / input_mol2 with open(mol2_path, "rt") as mol2_file: ligand.read(mol2_file) + for atom in ligand.atoms: + atom.charge = random.uniform(-1, 1) + atom.old_charge = atom.charge + ligand.atoms = peoe.equilibrate(ligand.atoms) + for atom in ligand.atoms: + fmt = "{a!s} -- {a.old_charge:5.2f} -> {a.charge:5.2f}" + _LOGGER.info(fmt.format(a=atom)) # @pytest.mark.parametrize("input_pdb", ["1K1I", "1FAS"], ids=str) From 1dbd8143c4993e861e858fd8cfb8bb38873f1b8f Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Sun, 14 Jun 2020 09:08:57 -0700 Subject: [PATCH 07/31] Forgot to include PEOE code in last commit. --- pdb2pqr/pdb2pqr/ligand/peoe.py | 171 +++++++++++++++++++++++++++++++++ 1 file changed, 171 insertions(+) create mode 100644 pdb2pqr/pdb2pqr/ligand/peoe.py diff --git a/pdb2pqr/pdb2pqr/ligand/peoe.py b/pdb2pqr/pdb2pqr/ligand/peoe.py new file mode 100644 index 000000000..452ae5b30 --- /dev/null +++ b/pdb2pqr/pdb2pqr/ligand/peoe.py @@ -0,0 +1,171 @@ +"""Implements the PEOE method described in: + +Paul Czodrowski Ingo Dramburg Christoph A. Sotriffer Gerhard Klebe. +Development, validation, and application of adapted PEOE charges to estimate +pKa values of functional groups in protein–ligand complexes. +Proteins, 65, 424-437, 2006. +https://doi.org/10.1002/prot.21110 +""" +import logging +from math import isclose + + +_LOGGER = logging.getLogger(__name__) + + +# The terms of the third-order polynomial fit for the electronegativity. +# See https://doi.org/10.1002/prot.21110 for more information. +# NOTE - this data has no meaning outside of this module; do not move. +POLY_TERMS = { + 'H': (7.17, 6.24, -0.56, 12.85), + 'C.3': (7.98, 9.18, 1.88, 19.04), + 'C.CAT': (7.98, 9.18, 1.88, 19.04), + 'C.2': (8.79 + 0.5, 9.32, 1.51, 19.62), + 'C.AR': (7.98 + 0.55, 9.18, 1.88, 19.04), + 'C.1': (10.39, 9.45, 0.73, 20.57), + 'N.3': (11.54 + 6.0, 10.28, 1.36, 28.00), + 'N.4': (11.54 + 6.0, 10.28, 1.36, 28.00), + 'N.AR': (12.87 - 1.29, 11.15, 0.85, 24.87), + 'N.2': (12.87, 11.15, 0.85, 24.87), + 'N.PL3': (12.87 + 0.5, 11.15, 0.85, 24.87), + 'N.AM': (12.87 + 3.5, 11.15, 0.85, 24.87), + 'N.1': (15.68, 11.70, -0.27, 27.11), + 'O.OH': (14.18 + 0.8, 12.92, 1.39, 28.49), + 'O.3': (14.18 - 3.1, 12.92, 1.39, 28.49), + 'O.2': (14.18, 12.92, 1.39, 28.49), + 'O.CO2': (15.25, 13.79, 0.47, 31.33), + 'F': (12.36, 13.85, 2.31, 30.82), + 'CL': (9.38 + 1.0, 9.69, 1.35, 22.04), + 'BR': (10.08 + 0.8, 8.47, 1.16, 19.71), + 'I': (9.90 + 1.0, 7.96, 0.96, 18.82), + 'S.3': (10.13 + 0.5, 9.13, 1.38, 20.65), + 'S.2': (10.13 + 0.5, 9.13, 1.38, 20.65), + 'S.O2': (10.13 + 0.5, 9.13, 1.38, 20.65), + 'P.3': (10.13 + 0.5, 9.13, 1.38, 20.65) + } +# Maximum (absolute) value of charge after which contribution to polynomial +# is capped +MAX_CHARGE = 1.1 +DEFAULT_H_ELECTRONEG = 20.02 +DEFAULT_H_CHARGE = 1.0 +# These next values are from the "Adaptation of the PEOE Procedure" section of +# https://doi.org/10.1002/prot.21110. +DAMPING_FACTOR = 0.778 +SCALING_FACTOR = 1.56 +NUM_CYCLES = 6 + + +def electronegativity(charge, poly_terms, atom_type): + """Calculate the electronegativity. + + Calculation is based on a third-order polynomial in the atomic charge as + described in Equation 2 of https://doi.org/10.1002/prot.21110. + + Args: + charge: charge of atom + poly_terms: polynomial terms ordered from 0th- to 3rd-order + atom_type: string with atom type + Returns: + electronegativity value + Raises: + IndexError if incorrect number of poly_terms given + """ + chi = None + if abs(charge) > MAX_CHARGE: + if charge < 0: + charge = -1.0 * MAX_CHARGE + else: + charge = MAX_CHARGE + if (atom_type == "H") and isclose(charge, DEFAULT_H_CHARGE): + chi = DEFAULT_H_ELECTRONEG + else: + if len(poly_terms) == 4: + chi = ( + poly_terms[0] + poly_terms[1]*charge + + poly_terms[2]*charge*charge + + poly_terms[3]*charge*charge*charge) + elif len(poly_terms) == 3: + chi = ( + poly_terms[0] + poly_terms[1]*charge + + poly_terms[2]*charge*charge + ) + else: + err = "Cannot parse length-%d polynomial" % len(poly_terms) + raise IndexError(err) + return chi + + +def assign_terms(atoms, term_dict): + """Assign polynomial terms to each atom. + + Args: + atoms: list of Mol2Atom atoms + term_dict: dictionary of polynomial terms + Returns: + modified list of atoms + """ + for atom in atoms: + atom_type = atom.atom_type.upper() + if atom_type == 'O.3': + atom_type = 'O.OH' + try: + atom.poly_terms = term_dict[atom_type] + except KeyError: + raise KeyError( + "Unable to find polynomial terms for atom type %s" % atom_type) + return atoms + + +def equilibrate( + atoms, damp=DAMPING_FACTOR, scale=SCALING_FACTOR, + num_cycles=NUM_CYCLES, term_dict=POLY_TERMS): + """Equilibrate the atomic charges. + + Args: + atoms: list of Mol2Atom atoms to equilibrate + damp: damping factor for equilibration process + scale: scaling factor for equilibration process + num_cycles: number of PEOE cycles + term_dict: dictionary of polynomial terms + Returns: + revised list of atoms + """ + atoms = assign_terms(atoms, term_dict) + # Reset or accumulate charges + abs_qges = 0.0 + for atom in atoms: + if isclose(atom.charge, 0.0): + atom.formal_charge = 0.0 + else: + atom.formal_charge = atom.charge*(1.0/scale) + abs_qges += abs(atom.charge) + + for icycle in range(num_cycles): + for atom1 in atoms: + atom1.chi = electronegativity( + atom1.charge, atom1.poly_terms, atom1.atom_type) + atom1.delta_charge = 0.0 + for bonded_atom in atom1.bonded_atoms: + for atom2 in atoms: + if atom2.name == bonded_atom.name: + chi2 = electronegativity( + atom2.charge, atom2.poly_terms, + atom2.atom_type) + chi_diff = chi2 - atom1.chi + if chi_diff > 0: + chi_norm = electronegativity( + +1, atom1.poly_terms, atom1.atom_type) + else: + chi_norm = electronegativity( + +1, atom2.poly_terms, atom2.atom_type) + atom1.delta_charge += ( + (chi_diff/chi_norm)*(damp**icycle)) + for atom1 in atoms: + if isclose(abs_qges, 0.0): + atom1.charge += atom1.delta_charge + else: + atom1.charge += ( + atom1.delta_charge + (1.0/6.0) * atom1.formal_charge) + for atom1 in atoms: + atom1.charge = scale * atom1.charge + return atoms From ce004abff39e7b77c09ae127245d18b434a4ef64 Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Sun, 14 Jun 2020 10:40:45 -0700 Subject: [PATCH 08/31] Add ParameterizedMolecule class and tests. --- pdb2pqr/pdb2pqr/ligand/__init__.py | 16 ++++++++ pdb2pqr/pdb2pqr/ligand/mol2.py | 1 + pdb2pqr/pdb2pqr/ligand/parameterize.py | 53 ++++++++++++++++++++++++++ pdb2pqr/tests/short_basic_test.py | 7 ++-- 4 files changed, 74 insertions(+), 3 deletions(-) create mode 100644 pdb2pqr/pdb2pqr/ligand/parameterize.py diff --git a/pdb2pqr/pdb2pqr/ligand/__init__.py b/pdb2pqr/pdb2pqr/ligand/__init__.py index f11620634..ca5e4f47d 100644 --- a/pdb2pqr/pdb2pqr/ligand/__init__.py +++ b/pdb2pqr/pdb2pqr/ligand/__init__.py @@ -4,3 +4,19 @@ Jens Erik Nielsen, University College Dublin 2004 """ import sys assert sys.version_info >= (3, 5) + + +# TODO - this belongs in a configuration file somewhere other than here. +# PARSE radii data for C, N, O, S, H, Br, F, P are from Sitkoff et al's paper: +# +# Sitkoff D, Sharp KA, Honig B. Accurate Calculation of Hydration Free +# Energies Using Macroscopic Solvent Models. J Phys Chem 98 (7) 1978-88, +# 1994. J. Phys. Chem. 1994, 98, 7, 1978–1988 +# +# See also the AMBER mailing list: http://amber.ch.ic.ac.uk/archive/. +# +# The van der Waals radius is used for chlorine. + +PARSE_RADII = { + "C": 1.70, "N": 1.50, "O": 1.40, "S": 1.85, "H": 1.00, "BR":2.50, + "F": 1.20, "P": 1.90, "CL": 1.75} diff --git a/pdb2pqr/pdb2pqr/ligand/mol2.py b/pdb2pqr/pdb2pqr/ligand/mol2.py index 3e042594b..8117541d1 100644 --- a/pdb2pqr/pdb2pqr/ligand/mol2.py +++ b/pdb2pqr/pdb2pqr/ligand/mol2.py @@ -49,6 +49,7 @@ class Mol2Atom: self.element = None self.charge = None self.formal_charge = None + self.radius = None self.bonded_atoms = [] # Terms for calculating atom electronegativity self.poly_terms = None diff --git a/pdb2pqr/pdb2pqr/ligand/parameterize.py b/pdb2pqr/pdb2pqr/ligand/parameterize.py new file mode 100644 index 000000000..7fc8e28e6 --- /dev/null +++ b/pdb2pqr/pdb2pqr/ligand/parameterize.py @@ -0,0 +1,53 @@ +"""Calculating and assigning ligand charges and radii.""" +import logging +from .mol2 import Mol2Molecule +from .peoe import equilibrate +from . import PARSE_RADII + + +_LOGGER = logging.getLogger(__name__) + + +class ParameterizedMolecule(Mol2Molecule): + """Ligand with charge and radius assignments.""" + + def __init__(self): + super().__init__() + self.ligand_properties = {} + + def update(self, ligand): + """Update self with latest version of ligand (if needed). + + Args: + ligand: latest version of ligand + """ + prev_atom_names = set(self.ligand_properties) + curr_atom_names = {a.name for a in ligand.atoms} + if len(prev_atom_names ^ curr_atom_names) > 0: + for atom in ligand.atoms: + atom.formal_charge = 0.0 + self.reparameterize(ligand) + + + def reparameterize(self, ligand): + """Reassign parameters given new ligand. + + Args: + ligand: latest version of ligand + """ + self.ligand_properties = {} + for atom in ligand.atoms: + atom.charge = atom.formal_charge + ligand.atoms = equilibrate(ligand.atoms) + for atom in ligand.atoms: + elem = atom.atom_type.split(".")[0].upper() + charge = atom.charge + try: + radius = PARSE_RADII[elem] + atom.radius = radius + except KeyError: + raise KeyError( + "Unable to assign radius for element %s in atom %s" % ( + elem, atom)) + self.ligand_properties[atom.name] = { + "charge": charge, "radius": radius} diff --git a/pdb2pqr/tests/short_basic_test.py b/pdb2pqr/tests/short_basic_test.py index 7493dcc01..7278fc041 100644 --- a/pdb2pqr/tests/short_basic_test.py +++ b/pdb2pqr/tests/short_basic_test.py @@ -3,8 +3,8 @@ import logging import random from pathlib import Path import pytest -from pdb2pqr.ligand import mol2 from pdb2pqr.ligand import peoe +from pdb2pqr.ligand import parameterize import common @@ -39,14 +39,15 @@ def test_propka_apo(input_pdb, tmp_path): "1HPX-ligand.mol2", "1QBS-ligand.mol2", "1US0-ligand.mol2", "adp.mol2"]) def test_ligand(input_mol2): """Testing basic aspects of code breaking.""" - ligand = mol2.Mol2Molecule() + _LOGGER.warning("Ideally, this would be a regression test.") + ligand = parameterize.ParameterizedMolecule() mol2_path = Path("tests/data") / input_mol2 with open(mol2_path, "rt") as mol2_file: ligand.read(mol2_file) for atom in ligand.atoms: atom.charge = random.uniform(-1, 1) atom.old_charge = atom.charge - ligand.atoms = peoe.equilibrate(ligand.atoms) + ligand.update(ligand) for atom in ligand.atoms: fmt = "{a!s} -- {a.old_charge:5.2f} -> {a.charge:5.2f}" _LOGGER.info(fmt.format(a=atom)) From 0334a6773385eeb7e1d26dfaf94a882692819754 Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Sun, 14 Jun 2020 12:26:12 -0700 Subject: [PATCH 09/31] Check for duplicate atom names. The need to (re)parameterize depends on the atom name. PDB2PKA buried this check in another part of the code but it should be triggered when initially reading the file. --- pdb2pqr/pdb2pqr/ligand/parameterize.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/pdb2pqr/pdb2pqr/ligand/parameterize.py b/pdb2pqr/pdb2pqr/ligand/parameterize.py index 7fc8e28e6..eda215da4 100644 --- a/pdb2pqr/pdb2pqr/ligand/parameterize.py +++ b/pdb2pqr/pdb2pqr/ligand/parameterize.py @@ -3,6 +3,8 @@ import logging from .mol2 import Mol2Molecule from .peoe import equilibrate from . import PARSE_RADII +from ..forcefield import ForcefieldAtom +from ..forcefield import ForcefieldResidue _LOGGER = logging.getLogger(__name__) @@ -28,6 +30,23 @@ class ParameterizedMolecule(Mol2Molecule): atom.formal_charge = 0.0 self.reparameterize(ligand) + def read(self, mol2_file): + """Routines for reading MOL2 file. + + Args: + mol2_file: file-like object with MOL2 data. + """ + super().read(mol2_file) + atom_names = set() + duplicates = set() + for atom in self.atoms: + if atom.name in atom_names: + duplicates.add(atom.name) + else: + atom_names.add(atom.name) + if len(duplicates) > 0: + err = "Found duplicate atom names: %s" % duplicates + raise KeyError(err) def reparameterize(self, ligand): """Reassign parameters given new ligand. From f63e4d5fd879b4296383a81048e84842810a3320 Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Sun, 14 Jun 2020 14:26:51 -0700 Subject: [PATCH 10/31] Add tests for --ligand support. These tests fail right now. --- pdb2pqr/pdb2pqr/main.py | 11 +++++++---- pdb2pqr/tests/short_basic_test.py | 13 ++++++++++++- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/pdb2pqr/pdb2pqr/main.py b/pdb2pqr/pdb2pqr/main.py index c6d399d67..aba1fd685 100644 --- a/pdb2pqr/pdb2pqr/main.py +++ b/pdb2pqr/pdb2pqr/main.py @@ -20,6 +20,7 @@ from . import hydrogens from . import forcefield from . import protein as prot from . import input_output as io +from .ligand.parameterize import ParameterizedMolecule from .config import VERSION, TITLE_FORMAT_STRING, CITATIONS, FORCE_FIELDS from .config import REPAIR_LIMIT @@ -261,10 +262,10 @@ def setup_molecule(pdblist, definition, ligand_path): ligand: ligand object (may be None) """ if ligand_path is not None: + ligand = ParameterizedMolecule() with open(ligand_path, "rt", encoding="utf-8") as ligand_file: - raise NotImplementedError("Ligand functionality is temporarily disabled.") - # TODO - check to see if ligff updates copy of definition stored with protein - # protein, definition, ligand = ligff.initialize(definition, ligand_file, pdblist) + ligand.read(ligand_file) + raise NotImplementedError("Where do initial ligand charges come from?") else: protein = prot.Protein(pdblist, definition) ligand = None @@ -463,7 +464,9 @@ def non_trivial(args, protein, definition, is_cif): if args.ligand is not None: _LOGGER.info("Processing ligand.") - raise NotImplementedError("Ligand support not implemented.") + raise NotImplementedError( + "Got argument --ligand=%s but ligand support not implemented" % + args.ligand) if args.ffout is not None: _LOGGER.info("Applying custom naming scheme (%s).", args.ffout) diff --git a/pdb2pqr/tests/short_basic_test.py b/pdb2pqr/tests/short_basic_test.py index 7278fc041..1d0de6e8f 100644 --- a/pdb2pqr/tests/short_basic_test.py +++ b/pdb2pqr/tests/short_basic_test.py @@ -37,7 +37,7 @@ def test_propka_apo(input_pdb, tmp_path): @pytest.mark.parametrize("input_mol2", [ "1HPX-ligand.mol2", "1QBS-ligand.mol2", "1US0-ligand.mol2", "adp.mol2"]) -def test_ligand(input_mol2): +def test_ligand_parameterization(input_mol2): """Testing basic aspects of code breaking.""" _LOGGER.warning("Ideally, this would be a regression test.") ligand = parameterize.ParameterizedMolecule() @@ -53,6 +53,17 @@ def test_ligand(input_mol2): _LOGGER.info(fmt.format(a=atom)) +@pytest.mark.parametrize("input_pdb", ["1HPX", "1QBS", "1US0"], ids=str) +def test_ligand(input_pdb, tmp_path): + """PROPKA non-regression tests on proteins without ligands.""" + ligand = Path("tests/data") / ("%s-ligand.mol2" % input_pdb) + args = "--log-level=INFO --ff=AMBER --drop-water --ligand=%s" % ligand + output_pqr = Path(input_pdb).stem + ".pqr" + common.run_pdb2pqr(args=args, input_pdb=input_pdb, output_pqr=output_pqr, + tmp_path=tmp_path) + + + # @pytest.mark.parametrize("input_pdb", ["1K1I", "1FAS"], ids=str) # def test_propka_apo(input_pdb, tmp_path): # """PROPKA titration of proteins without ligands.""" From 57bd4666405b84304cdbdf986daf406a8a2d7492 Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Mon, 15 Jun 2020 08:09:12 -0700 Subject: [PATCH 11/31] Separate functionality testing into separate files. Split PROPKA and ligand testing into separate files from basic tests. --- pdb2pqr/tests/data/anthracene.mol2 | 56 +++++++++++++++++++ pdb2pqr/tests/data/cyclohexane.mol2 | 38 +++++++++++++ pdb2pqr/tests/data/ethanol.mol2 | 33 ++++++++++++ pdb2pqr/tests/data/glycerol.mol2 | 43 +++++++++++++++ pdb2pqr/tests/data/naphthalene.mol2 | 47 ++++++++++++++++ pdb2pqr/tests/ligand_test.py | 72 +++++++++++++++++++++++++ pdb2pqr/tests/propka_test.py | 26 +++++++++ pdb2pqr/tests/short_basic_test.py | 83 ----------------------------- 8 files changed, 315 insertions(+), 83 deletions(-) create mode 100644 pdb2pqr/tests/data/anthracene.mol2 create mode 100644 pdb2pqr/tests/data/cyclohexane.mol2 create mode 100644 pdb2pqr/tests/data/ethanol.mol2 create mode 100644 pdb2pqr/tests/data/glycerol.mol2 create mode 100644 pdb2pqr/tests/data/naphthalene.mol2 create mode 100644 pdb2pqr/tests/ligand_test.py create mode 100644 pdb2pqr/tests/propka_test.py diff --git a/pdb2pqr/tests/data/anthracene.mol2 b/pdb2pqr/tests/data/anthracene.mol2 new file mode 100644 index 000000000..adff53b48 --- /dev/null +++ b/pdb2pqr/tests/data/anthracene.mol2 @@ -0,0 +1,56 @@ +# +# +# This file was generated by PRODRG version AA180301.0717 +# PRODRG written/copyrighted by Daan van Aalten +# and Alexander Schuettelkopf +# +# Questions/comments to dava@davapc1.bioch.dundee.ac.uk +# +# When using this software in a publication, cite: +# A. W. Schuettelkopf and D. M. F. van Aalten (2004). +# PRODRG - a tool for high-throughput crystallography +# of protein-ligand complexes. +# Acta Crystallogr. D60, 1355--1363. +# +# +@MOLECULE +DRG + 14 16 1 +SMALL +USER_CHARGES + +PRODRG MOLECULE +@ATOM + 1 CAB -1.930 6.450 -2.470 C.ar 1 DRG 0.000 + 2 CAC -2.360 7.390 -1.500 C.ar 1 DRG 0.000 + 3 CAD -2.850 8.640 -1.920 C.ar 1 DRG 0.000 + 4 CAE -3.300 9.550 -0.940 C.ar 1 DRG 0.000 + 5 CAF -3.830 10.790 -1.350 C.ar 1 DRG 0.000 + 6 CAG -4.300 11.700 -0.380 C.ar 1 DRG 0.000 + 7 CAN -4.240 11.370 0.990 C.ar 1 DRG 0.000 + 8 CAM -3.710 10.130 1.410 C.ar 1 DRG 0.000 + 9 CAL -3.240 9.220 0.440 C.ar 1 DRG 0.000 + 10 CAK -2.740 7.970 0.860 C.ar 1 DRG 0.000 + 11 CAJ -2.300 7.060 -0.110 C.ar 1 DRG 0.000 + 12 CAI -1.820 5.790 0.300 C.ar 1 DRG 0.000 + 13 CAH -1.400 4.860 -0.680 C.ar 1 DRG 0.000 + 14 CAA -1.460 5.190 -2.050 C.ar 1 DRG 0.000 +@BOND + 1 2 1 ar + 2 1 14 ar + 3 2 3 ar + 4 2 11 ar + 5 4 3 ar + 6 4 5 ar + 7 4 9 ar + 8 5 6 ar + 9 6 7 ar + 10 7 8 ar + 11 9 8 ar + 12 9 10 ar + 13 11 10 ar + 14 11 12 ar + 15 12 13 ar + 16 13 14 ar +@SUBSTRUCTURE + 1 DRG 1 diff --git a/pdb2pqr/tests/data/cyclohexane.mol2 b/pdb2pqr/tests/data/cyclohexane.mol2 new file mode 100644 index 000000000..a5f65190a --- /dev/null +++ b/pdb2pqr/tests/data/cyclohexane.mol2 @@ -0,0 +1,38 @@ +# +# +# This file was generated by PRODRG version AA180301.0717 +# PRODRG written/copyrighted by Daan van Aalten +# and Alexander Schuettelkopf +# +# Questions/comments to dava@davapc1.bioch.dundee.ac.uk +# +# When using this software in a publication, cite: +# A. W. Schuettelkopf and D. M. F. van Aalten (2004). +# PRODRG - a tool for high-throughput crystallography +# of protein-ligand complexes. +# Acta Crystallogr. D60, 1355--1363. +# +# +@MOLECULE +DRG + 6 6 1 +SMALL +USER_CHARGES + +PRODRG MOLECULE +@ATOM + 1 CAA 11.290 18.580 21.950 C.3 1 DRG 0.000 + 2 CAB 11.410 19.880 22.760 C.3 1 DRG 0.000 + 3 CAC 11.080 21.080 21.870 C.3 1 DRG 0.000 + 4 CAF 12.020 21.110 20.660 C.3 1 DRG 0.000 + 5 CAE 11.900 19.810 19.860 C.3 1 DRG 0.000 + 6 CAD 12.230 18.610 20.750 C.3 1 DRG 0.000 +@BOND + 1 1 2 1 + 2 1 6 1 + 3 2 3 1 + 4 3 4 1 + 5 4 5 1 + 6 5 6 1 +@SUBSTRUCTURE + 1 DRG 1 diff --git a/pdb2pqr/tests/data/ethanol.mol2 b/pdb2pqr/tests/data/ethanol.mol2 new file mode 100644 index 000000000..28b0607f6 --- /dev/null +++ b/pdb2pqr/tests/data/ethanol.mol2 @@ -0,0 +1,33 @@ +# +# +# This file was generated by PRODRG version AA180301.0717 +# PRODRG written/copyrighted by Daan van Aalten +# and Alexander Schuettelkopf +# +# Questions/comments to dava@davapc1.bioch.dundee.ac.uk +# +# When using this software in a publication, cite: +# A. W. Schuettelkopf and D. M. F. van Aalten (2004). +# PRODRG - a tool for high-throughput crystallography +# of protein-ligand complexes. +# Acta Crystallogr. D60, 1355--1363. +# +# +@MOLECULE +DRG + 4 3 1 +SMALL +USER_CHARGES + +PRODRG MOLECULE +@ATOM + 1 CAA 21.780 -4.220 -8.790 C.3 1 DRG 0.000 + 2 CAB 20.740 -4.840 -7.860 C.3 1 DRG 0.150 + 3 OAC 20.850 -6.270 -7.890 O.3 1 DRG -0.548 + 4 HAF 20.170 -6.670 -7.280 H 1 DRG 0.398 +@BOND + 1 2 1 1 + 2 2 3 1 + 3 3 4 1 +@SUBSTRUCTURE + 1 DRG 1 diff --git a/pdb2pqr/tests/data/glycerol.mol2 b/pdb2pqr/tests/data/glycerol.mol2 new file mode 100644 index 000000000..e788cf003 --- /dev/null +++ b/pdb2pqr/tests/data/glycerol.mol2 @@ -0,0 +1,43 @@ +# +# +# This file was generated by PRODRG version AA180301.0717 +# PRODRG written/copyrighted by Daan van Aalten +# and Alexander Schuettelkopf +# +# Questions/comments to dava@davapc1.bioch.dundee.ac.uk +# +# When using this software in a publication, cite: +# A. W. Schuettelkopf and D. M. F. van Aalten (2004). +# PRODRG - a tool for high-throughput crystallography +# of protein-ligand complexes. +# Acta Crystallogr. D60, 1355--1363. +# +# +@MOLECULE +DRG + 9 8 1 +SMALL +USER_CHARGES + +PRODRG MOLECULE +@ATOM + 1 OAD -3.760 9.120 5.400 O.3 1 DRG -0.548 + 2 HAF -4.620 9.360 4.950 H 1 DRG 0.398 + 3 CAA -2.870 8.530 4.430 C.3 1 DRG 0.150 + 4 CAB -1.830 7.570 5.040 C.3 1 DRG 0.150 + 5 OAE -2.410 6.360 5.540 O.3 1 DRG -0.548 + 6 HAG -1.690 5.750 5.860 H 1 DRG 0.398 + 7 CAC -0.990 8.280 6.120 C.3 1 DRG 0.150 + 8 OAF -0.110 9.260 5.560 O.3 1 DRG -0.548 + 9 HAH 0.040 9.980 6.230 H 1 DRG 0.398 +@BOND + 1 1 2 1 + 2 3 1 1 + 3 4 3 1 + 4 4 5 1 + 5 4 7 1 + 6 5 6 1 + 7 7 8 1 + 8 8 9 1 +@SUBSTRUCTURE + 1 DRG 1 diff --git a/pdb2pqr/tests/data/naphthalene.mol2 b/pdb2pqr/tests/data/naphthalene.mol2 new file mode 100644 index 000000000..16f8e62af --- /dev/null +++ b/pdb2pqr/tests/data/naphthalene.mol2 @@ -0,0 +1,47 @@ +# +# +# This file was generated by PRODRG version AA180301.0717 +# PRODRG written/copyrighted by Daan van Aalten +# and Alexander Schuettelkopf +# +# Questions/comments to dava@davapc1.bioch.dundee.ac.uk +# +# When using this software in a publication, cite: +# A. W. Schuettelkopf and D. M. F. van Aalten (2004). +# PRODRG - a tool for high-throughput crystallography +# of protein-ligand complexes. +# Acta Crystallogr. D60, 1355--1363. +# +# +@MOLECULE +DRG + 10 11 1 +SMALL +USER_CHARGES + +PRODRG MOLECULE +@ATOM + 1 CAB -3.790 -3.860 0.280 C.ar 1 DRG 0.000 + 2 CAC -4.440 -4.930 -0.380 C.ar 1 DRG 0.000 + 3 CAD -5.830 -4.850 -0.630 C.ar 1 DRG 0.000 + 4 CAE -6.470 -5.920 -1.280 C.ar 1 DRG 0.000 + 5 CAJ -5.740 -7.060 -1.680 C.ar 1 DRG 0.000 + 6 CAI -4.360 -7.150 -1.440 C.ar 1 DRG 0.000 + 7 CAH -3.700 -6.080 -0.790 C.ar 1 DRG 0.000 + 8 CAG -2.320 -6.160 -0.530 C.ar 1 DRG 0.000 + 9 CAF -1.680 -5.100 0.130 C.ar 1 DRG 0.000 + 10 CAA -2.400 -3.960 0.530 C.ar 1 DRG 0.000 +@BOND + 1 2 1 ar + 2 1 10 ar + 3 2 3 ar + 4 2 7 ar + 5 3 4 ar + 6 4 5 ar + 7 5 6 ar + 8 7 6 ar + 9 7 8 ar + 10 8 9 ar + 11 9 10 ar +@SUBSTRUCTURE + 1 DRG 1 diff --git a/pdb2pqr/tests/ligand_test.py b/pdb2pqr/tests/ligand_test.py new file mode 100644 index 000000000..34e8baee1 --- /dev/null +++ b/pdb2pqr/tests/ligand_test.py @@ -0,0 +1,72 @@ +"""Tests for ligand functionality.""" +import logging +import random +from pathlib import Path +import pytest +from pdb2pqr.ligand import parameterize +import common + + +_LOGGER = logging.getLogger(__name__) +_LOGGER.warning("Need functional and regression test coverage for --ligand") + + +@pytest.mark.parametrize("input_mol2", [ + "1HPX-ligand.mol2", "1QBS-ligand.mol2", "1US0-ligand.mol2", "adp.mol2"]) +def test_ligand_parameterization(input_mol2): + """Testing basic aspects of code breaking.""" + _LOGGER.warning("Ideally, this would be a regression test.") + ligand = parameterize.ParameterizedMolecule() + mol2_path = Path("tests/data") / input_mol2 + with open(mol2_path, "rt") as mol2_file: + ligand.read(mol2_file) + for atom in ligand.atoms.values(): + atom.charge = random.uniform(-1, 1) + atom.old_charge = atom.charge + ligand.update(ligand) + for atom in ligand.atoms.values(): + fmt = "{a!s} -- {a.old_charge:5.2f} -> {a.charge:5.2f}" + _LOGGER.info(fmt.format(a=atom)) + + +@pytest.mark.parametrize("input_pdb", ["1HPX", "1QBS", "1US0"], ids=str) +def test_ligand(input_pdb, tmp_path): + """PROPKA non-regression tests on proteins without ligands.""" + ligand = Path("tests/data") / ("%s-ligand.mol2" % input_pdb) + args = "--log-level=INFO --ff=AMBER --drop-water --ligand=%s" % ligand + output_pqr = Path(input_pdb).stem + ".pqr" + common.run_pdb2pqr(args=args, input_pdb=input_pdb, output_pqr=output_pqr, + tmp_path=tmp_path) + + +# @pytest.mark.parametrize( +# "args, input_pdb, input_mol2, output_pqr", +# [ +# pytest.param( +# "--log-level=INFO --ff=AMBER", +# "1HPX", +# common.DATA_DIR / "1HPX-ligand.mol2", +# "output.pqr", +# id="1HPX-ligand AMBER" +# ), +# pytest.param( +# "--log-level=INFO --ff=AMBER", +# common.DATA_DIR / "1QBS.pdb", +# common.DATA_DIR / "1QBS-ligand.mol2", +# "output.pqr", +# id="1QBS-ligand AMBER" +# ), +# pytest.param( +# "--log-level=INFO --ff=AMBER", +# common.DATA_DIR / "1US0.pdb", +# common.DATA_DIR / "1US0-ligand.mol2", +# "output.pqr", +# id="1US0-ligand AMBER" +# ), +# ] +# ) +# def test_ligand(args, input_pdb, input_mol2, output_pqr, tmp_path): +# """Test ligand handling.""" +# args_ = "{args} --ligand={ligand}".format(args=args, ligand=input_mol2) +# run_pdb2pqr(args_, input_pdb, output_pqr, tmp_path) +# _LOGGER.warning("This test needs better checking to avoid silent failure.") \ No newline at end of file diff --git a/pdb2pqr/tests/propka_test.py b/pdb2pqr/tests/propka_test.py new file mode 100644 index 000000000..1c384d76c --- /dev/null +++ b/pdb2pqr/tests/propka_test.py @@ -0,0 +1,26 @@ +"""Tests for PROPKA functionality.""" +import logging +from pathlib import Path +import pytest +import common + + +_LOGGER = logging.getLogger(__name__) + + +@pytest.mark.parametrize("input_pdb", ["1K1I", "1AFS", "1FAS", "5DV8", "5D8V"], ids=str) +def test_propka_apo(input_pdb, tmp_path): + """PROPKA non-regression tests on proteins without ligands.""" + args = "--log-level=INFO --ff=AMBER --drop-water --titration-state-method=propka" + output_pqr = Path(input_pdb).stem + ".pqr" + common.run_pdb2pqr(args=args, input_pdb=input_pdb, output_pqr=output_pqr, + tmp_path=tmp_path) + + +# @pytest.mark.parametrize("input_pdb", ["1K1I", "1FAS"], ids=str) +# def test_propka_apo(input_pdb, tmp_path): +# """PROPKA titration of proteins without ligands.""" +# args = "--log-level=INFO --ff=AMBER --drop-water --titration-state-method=propka" +# output_pqr = Path(input_pdb).stem + ".pqr" +# run_pdb2pqr(args, input_pdb, output_pqr, tmp_path) + diff --git a/pdb2pqr/tests/short_basic_test.py b/pdb2pqr/tests/short_basic_test.py index 1d0de6e8f..aa6ef117d 100644 --- a/pdb2pqr/tests/short_basic_test.py +++ b/pdb2pqr/tests/short_basic_test.py @@ -1,10 +1,7 @@ """Basic tests to see if the code raises exceptions.""" import logging -import random from pathlib import Path import pytest -from pdb2pqr.ligand import peoe -from pdb2pqr.ligand import parameterize import common @@ -13,7 +10,6 @@ _LOGGER = logging.getLogger(__name__) _LOGGER.warning("Need functional and regression test coverage for --userff") _LOGGER.warning("Need functional and regression test coverage for --usernames") -_LOGGER.warning("Need functional and regression test coverage for --ligand") _LOGGER.warning("Need functional and regression test coverage for --apbs-input") @@ -24,82 +20,3 @@ def test_basic_apo(input_pdb, tmp_path): output_pqr = Path(input_pdb).stem + ".pqr" common.run_pdb2pqr(args=args, input_pdb=input_pdb, output_pqr=output_pqr, tmp_path=tmp_path) - - -@pytest.mark.parametrize("input_pdb", ["1K1I", "1AFS", "1FAS", "5DV8", "5D8V"], ids=str) -def test_propka_apo(input_pdb, tmp_path): - """PROPKA non-regression tests on proteins without ligands.""" - args = "--log-level=INFO --ff=AMBER --drop-water --titration-state-method=propka" - output_pqr = Path(input_pdb).stem + ".pqr" - common.run_pdb2pqr(args=args, input_pdb=input_pdb, output_pqr=output_pqr, - tmp_path=tmp_path) - - -@pytest.mark.parametrize("input_mol2", [ - "1HPX-ligand.mol2", "1QBS-ligand.mol2", "1US0-ligand.mol2", "adp.mol2"]) -def test_ligand_parameterization(input_mol2): - """Testing basic aspects of code breaking.""" - _LOGGER.warning("Ideally, this would be a regression test.") - ligand = parameterize.ParameterizedMolecule() - mol2_path = Path("tests/data") / input_mol2 - with open(mol2_path, "rt") as mol2_file: - ligand.read(mol2_file) - for atom in ligand.atoms: - atom.charge = random.uniform(-1, 1) - atom.old_charge = atom.charge - ligand.update(ligand) - for atom in ligand.atoms: - fmt = "{a!s} -- {a.old_charge:5.2f} -> {a.charge:5.2f}" - _LOGGER.info(fmt.format(a=atom)) - - -@pytest.mark.parametrize("input_pdb", ["1HPX", "1QBS", "1US0"], ids=str) -def test_ligand(input_pdb, tmp_path): - """PROPKA non-regression tests on proteins without ligands.""" - ligand = Path("tests/data") / ("%s-ligand.mol2" % input_pdb) - args = "--log-level=INFO --ff=AMBER --drop-water --ligand=%s" % ligand - output_pqr = Path(input_pdb).stem + ".pqr" - common.run_pdb2pqr(args=args, input_pdb=input_pdb, output_pqr=output_pqr, - tmp_path=tmp_path) - - - -# @pytest.mark.parametrize("input_pdb", ["1K1I", "1FAS"], ids=str) -# def test_propka_apo(input_pdb, tmp_path): -# """PROPKA titration of proteins without ligands.""" -# args = "--log-level=INFO --ff=AMBER --drop-water --titration-state-method=propka" -# output_pqr = Path(input_pdb).stem + ".pqr" -# run_pdb2pqr(args, input_pdb, output_pqr, tmp_path) - - -# @pytest.mark.parametrize( -# "args, input_pdb, input_mol2, output_pqr", -# [ -# pytest.param( -# "--log-level=INFO --ff=AMBER", -# "1HPX", -# common.DATA_DIR / "1HPX-ligand.mol2", -# "output.pqr", -# id="1HPX-ligand AMBER" -# ), -# pytest.param( -# "--log-level=INFO --ff=AMBER", -# common.DATA_DIR / "1QBS.pdb", -# common.DATA_DIR / "1QBS-ligand.mol2", -# "output.pqr", -# id="1QBS-ligand AMBER" -# ), -# pytest.param( -# "--log-level=INFO --ff=AMBER", -# common.DATA_DIR / "1US0.pdb", -# common.DATA_DIR / "1US0-ligand.mol2", -# "output.pqr", -# id="1US0-ligand AMBER" -# ), -# ] -# ) -# def test_ligand(args, input_pdb, input_mol2, output_pqr, tmp_path): -# """Test ligand handling.""" -# args_ = "{args} --ligand={ligand}".format(args=args, ligand=input_mol2) -# run_pdb2pqr(args_, input_pdb, output_pqr, tmp_path) -# _LOGGER.warning("This test needs better checking to avoid silent failure.") From e71a6f47524730531971c64b1ff23fbfaf0b3383 Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Mon, 15 Jun 2020 09:24:33 -0700 Subject: [PATCH 12/31] Move topology routines into Mol2Molecule. Many of the topology routines in the old PDB2PQR 2.1.1 implementation of PDB2PKA also work as member functions of the Mol2Molecule class. --- pdb2pqr/pdb2pqr/ligand/mol2.py | 83 ++++++++++++++++++++++---- pdb2pqr/pdb2pqr/ligand/parameterize.py | 30 ++-------- 2 files changed, 79 insertions(+), 34 deletions(-) diff --git a/pdb2pqr/pdb2pqr/ligand/mol2.py b/pdb2pqr/pdb2pqr/ligand/mol2.py index 8117541d1..af95eebcd 100644 --- a/pdb2pqr/pdb2pqr/ligand/mol2.py +++ b/pdb2pqr/pdb2pqr/ligand/mol2.py @@ -4,6 +4,8 @@ http://www.tripos.com/index.php?family=modules,SimplePage,,,&page=sup_mol2&s=0 """ import logging +from collections import OrderedDict +import numpy _LOGGER = logging.getLogger(__name__) @@ -46,11 +48,11 @@ class Mol2Atom: self.occupancy = 0.00 self.temp_factor = 0.00 self.seg_id = None - self.element = None self.charge = None self.formal_charge = None self.radius = None - self.bonded_atoms = [] + self.bonded_atoms = None + self.torsions = None # Terms for calculating atom electronegativity self.poly_terms = None # Atom electronegativity @@ -66,11 +68,36 @@ class Mol2Atom: ) return pdb_fmt.format(a=self) + @property + def coords(self): + """Return coordinates as numpy vector.""" + return numpy.array([self.x, self.y, self.z]) + + @property + def bonded_atom_names(self): + """Return a list of bonded atom names.""" + return [a.name for a in self.bonded_atoms] + + @property + def num_bonded_heavy(self): + """Return the number of heavy atoms bonded to this atom.""" + return len([a for a in self.bonded_atoms if a.atom_type != "H"]) + + @property + def num_bonded_hydrogen(self): + """Return the number of hydrogen atoms bonded to this atom.""" + return len([a for a in self.bonded_atoms if a.atom_type == "H"]) + + @property + def element(self): + """Return a string with the element for this atom (uppercase).""" + return self.atom_type.split(".")[0].upper() + class Mol2Molecule: """Tripos MOL2 molecule""" def __init__(self): - self.atoms = [] + self.atoms = OrderedDict() self.bonds = [] self.serial = None self.name = None @@ -80,6 +107,25 @@ class Mol2Molecule: self.y = None self.z = None + def get_torsions(self, start_atom): + """Get the torsion angles that start with this atom (name). + + Args: + start_atom: starting atom name + Returns: + list of 4-tuples containing atom names comprising torsions + """ + torsions = [] + for bonded1 in self.atoms[start_atom].bonded_atom_names: + for bonded2 in self.atoms[bonded1].bonded_atom_names: + if bonded2 == start_atom: + continue + for end_atom in self.atoms[bonded2].bonded_atom_names: + if end_atom == bonded1: + continue + torsions.append((start_atom, bonded1, bonded2, end_atom)) + return torsions + def read(self, mol2_file): """Routines for reading MOL2 file. @@ -106,7 +152,7 @@ class Mol2Molecule: if "@ATOM" in line: break _LOGGER.debug("Skipping: %s", line.strip()) - + duplicates = set() for line in mol2_file: line = line.strip() if not line: @@ -138,7 +184,13 @@ class Mol2Molecule: err = "Unable to parse %s as charge in atom line: %s" % ( words[8], line) _LOGGER.warning(err) - self.atoms.append(atom) + if atom.name in self.atoms: + duplicates.add(atom.name) + else: + self.atoms[atom.name] = atom + if len(duplicates) > 0: + raise KeyError("Found duplicate atoms names in MOL2 file: %s" % + duplicates) return mol2_file def parse_bonds(self, mol2_file): @@ -176,9 +228,20 @@ class Mol2Molecule: return mol2_file def create_bonded_atoms(self): - """Create a list of bonded atoms for each atom.""" + """Create a list of bonded atoms and torsions for each atom.""" + atom_names = list(self.atoms.keys()) for bond in self.bonds: - from_atom = self.atoms[bond.bond_from_self-1] - to_atom = self.atoms[bond.bond_to_self-1] - from_atom.bonded_atoms.append(to_atom) - to_atom.bonded_atoms.append(from_atom) + from_atom_name = atom_names[bond.bond_from_self-1] + from_atom = self.atoms[from_atom_name] + to_atom_name = atom_names[bond.bond_to_self-1] + to_atom = self.atoms[to_atom_name] + if from_atom.bonded_atoms is None: + from_atom.bonded_atoms = [to_atom] + else: + from_atom.bonded_atoms.append(to_atom) + if to_atom.bonded_atoms is None: + to_atom.bonded_atoms = [from_atom] + else: + to_atom.bonded_atoms.append(from_atom) + for atom_name, atom in self.atoms.items(): + atom.torsions = self.get_torsions(atom_name) diff --git a/pdb2pqr/pdb2pqr/ligand/parameterize.py b/pdb2pqr/pdb2pqr/ligand/parameterize.py index eda215da4..28f4abea9 100644 --- a/pdb2pqr/pdb2pqr/ligand/parameterize.py +++ b/pdb2pqr/pdb2pqr/ligand/parameterize.py @@ -24,30 +24,12 @@ class ParameterizedMolecule(Mol2Molecule): ligand: latest version of ligand """ prev_atom_names = set(self.ligand_properties) - curr_atom_names = {a.name for a in ligand.atoms} + curr_atom_names = set(self.atoms) if len(prev_atom_names ^ curr_atom_names) > 0: - for atom in ligand.atoms: + for atom in ligand.atoms.values(): atom.formal_charge = 0.0 self.reparameterize(ligand) - def read(self, mol2_file): - """Routines for reading MOL2 file. - - Args: - mol2_file: file-like object with MOL2 data. - """ - super().read(mol2_file) - atom_names = set() - duplicates = set() - for atom in self.atoms: - if atom.name in atom_names: - duplicates.add(atom.name) - else: - atom_names.add(atom.name) - if len(duplicates) > 0: - err = "Found duplicate atom names: %s" % duplicates - raise KeyError(err) - def reparameterize(self, ligand): """Reassign parameters given new ligand. @@ -55,11 +37,11 @@ class ParameterizedMolecule(Mol2Molecule): ligand: latest version of ligand """ self.ligand_properties = {} - for atom in ligand.atoms: + for atom in ligand.atoms.values(): atom.charge = atom.formal_charge - ligand.atoms = equilibrate(ligand.atoms) - for atom in ligand.atoms: - elem = atom.atom_type.split(".")[0].upper() + equilibrate(ligand.atoms.values()) + for atom in ligand.atoms.values(): + elem = atom.element charge = atom.charge try: radius = PARSE_RADII[elem] From 4edd5b728eb6f671127cb9f10654ee2180552c6c Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Mon, 15 Jun 2020 09:34:56 -0700 Subject: [PATCH 13/31] Add regression tests for torsion angle checking. --- pdb2pqr/tests/ligand_test.py | 52 +++++++++++++++++++++++++++++++++--- 1 file changed, 49 insertions(+), 3 deletions(-) diff --git a/pdb2pqr/tests/ligand_test.py b/pdb2pqr/tests/ligand_test.py index 34e8baee1..f345f2282 100644 --- a/pdb2pqr/tests/ligand_test.py +++ b/pdb2pqr/tests/ligand_test.py @@ -13,7 +13,7 @@ _LOGGER.warning("Need functional and regression test coverage for --ligand") @pytest.mark.parametrize("input_mol2", [ "1HPX-ligand.mol2", "1QBS-ligand.mol2", "1US0-ligand.mol2", "adp.mol2"]) -def test_ligand_parameterization(input_mol2): +def test_parameterization(input_mol2): """Testing basic aspects of code breaking.""" _LOGGER.warning("Ideally, this would be a regression test.") ligand = parameterize.ParameterizedMolecule() @@ -29,14 +29,60 @@ def test_ligand_parameterization(input_mol2): _LOGGER.info(fmt.format(a=atom)) +TORSION_RESULTS = { + "ethanol.mol2": { + ('CAA', 'CAB', 'OAC', 'HAF'), ('HAF', 'OAC', 'CAB', 'CAA')}, + "glycerol.mol2": { + ('CAA', 'CAB', 'CAC', 'OAF'), ('CAA', 'CAB', 'OAE', 'HAG'), + ('CAB', 'CAA', 'OAD', 'HAF'), ('CAB', 'CAC', 'OAF', 'HAH'), + ('CAC', 'CAB', 'CAA', 'OAD'), ('CAC', 'CAB', 'OAE', 'HAG'), + ('HAF', 'OAD', 'CAA', 'CAB'), ('HAG', 'OAE', 'CAB', 'CAA'), + ('HAG', 'OAE', 'CAB', 'CAC'), ('HAH', 'OAF', 'CAC', 'CAB'), + ('OAD', 'CAA', 'CAB', 'CAC'), ('OAD', 'CAA', 'CAB', 'OAE'), + ('OAE', 'CAB', 'CAA', 'OAD'), ('OAE', 'CAB', 'CAC', 'OAF'), + ('OAF', 'CAC', 'CAB', 'CAA'), ('OAF', 'CAC', 'CAB', 'OAE')}, + "cyclohexane.mol2": { + ('CAA', 'CAB', 'CAC', 'CAF'), ('CAA', 'CAD', 'CAE', 'CAF'), + ('CAB', 'CAA', 'CAD', 'CAE'), ('CAB', 'CAC', 'CAF', 'CAE'), + ('CAC', 'CAB', 'CAA', 'CAD'), ('CAC', 'CAF', 'CAE', 'CAD'), + ('CAD', 'CAA', 'CAB', 'CAC'), ('CAD', 'CAE', 'CAF', 'CAC'), + ('CAE', 'CAD', 'CAA', 'CAB'), ('CAE', 'CAF', 'CAC', 'CAB'), + ('CAF', 'CAC', 'CAB', 'CAA'), ('CAF', 'CAE', 'CAD', 'CAA')} +} + +@pytest.mark.parametrize("input_mol2", [ + "cyclohexane.mol2", "ethanol.mol2", "glycerol.mol2"]) +def test_torsions(input_mol2): + """Test assignment of torsion angles.""" + ligand = parameterize.ParameterizedMolecule() + mol2_path = Path("tests/data") / input_mol2 + with open(mol2_path, "rt") as mol2_file: + ligand.read(mol2_file) + torsions = set() + for name, atom in ligand.atoms.items(): + torsions |= set(atom.torsions) + try: + benchmark = TORSION_RESULTS[input_mol2] + diff = torsions ^ benchmark + if len(diff) > 0: + err = "Torsion test failed for %s: %s" % ( + input_mol2, sorted(list(diff))) + raise ValueError(err) + except KeyError: + _LOGGER.warning( + "Skipping torsions for %s: %s", input_mol2, + sorted(list(torsions))) + + @pytest.mark.parametrize("input_pdb", ["1HPX", "1QBS", "1US0"], ids=str) def test_ligand(input_pdb, tmp_path): """PROPKA non-regression tests on proteins without ligands.""" ligand = Path("tests/data") / ("%s-ligand.mol2" % input_pdb) args = "--log-level=INFO --ff=AMBER --drop-water --ligand=%s" % ligand output_pqr = Path(input_pdb).stem + ".pqr" - common.run_pdb2pqr(args=args, input_pdb=input_pdb, output_pqr=output_pqr, - tmp_path=tmp_path) + common.run_pdb2pqr( + args=args, input_pdb=input_pdb, output_pqr=output_pqr, + tmp_path=tmp_path) # @pytest.mark.parametrize( From cb11c77e9ee2584f7040ca2e467814c46b542995 Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Mon, 15 Jun 2020 10:05:08 -0700 Subject: [PATCH 14/31] Tweak torsion handling in ligands. Hold list of torsions at molecule level. --- pdb2pqr/pdb2pqr/ligand/mol2.py | 17 ++++++++++------- pdb2pqr/tests/ligand_test.py | 7 ++----- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/pdb2pqr/pdb2pqr/ligand/mol2.py b/pdb2pqr/pdb2pqr/ligand/mol2.py index af95eebcd..1dde9e12b 100644 --- a/pdb2pqr/pdb2pqr/ligand/mol2.py +++ b/pdb2pqr/pdb2pqr/ligand/mol2.py @@ -51,8 +51,8 @@ class Mol2Atom: self.charge = None self.formal_charge = None self.radius = None - self.bonded_atoms = None - self.torsions = None + self.bonded_atoms = [] + self.torsions = [] # Terms for calculating atom electronegativity self.poly_terms = None # Atom electronegativity @@ -99,6 +99,7 @@ class Mol2Molecule: def __init__(self): self.atoms = OrderedDict() self.bonds = [] + self.torsions = set() self.serial = None self.name = None self.res_name = None @@ -107,8 +108,8 @@ class Mol2Molecule: self.y = None self.z = None - def get_torsions(self, start_atom): - """Get the torsion angles that start with this atom (name). + def set_torsions(self, start_atom): + """Set the torsion angles that start with this atom (name). Args: start_atom: starting atom name @@ -134,7 +135,7 @@ class Mol2Molecule: """ mol2_file = self.parse_atoms(mol2_file) mol2_file = self.parse_bonds(mol2_file) - self.create_bonded_atoms() + self.process_bonds() def parse_atoms(self, mol2_file): """Parse @ATOM section of file. @@ -227,7 +228,7 @@ class Mol2Molecule: self.bonds.append(bond) return mol2_file - def create_bonded_atoms(self): + def process_bonds(self): """Create a list of bonded atoms and torsions for each atom.""" atom_names = list(self.atoms.keys()) for bond in self.bonds: @@ -244,4 +245,6 @@ class Mol2Molecule: else: to_atom.bonded_atoms.append(from_atom) for atom_name, atom in self.atoms.items(): - atom.torsions = self.get_torsions(atom_name) + atom.torsions = self.set_torsions(atom_name) + for torsion in atom.torsions: + self.torsions.add(torsion) diff --git a/pdb2pqr/tests/ligand_test.py b/pdb2pqr/tests/ligand_test.py index f345f2282..89e3069a7 100644 --- a/pdb2pqr/tests/ligand_test.py +++ b/pdb2pqr/tests/ligand_test.py @@ -58,12 +58,9 @@ def test_torsions(input_mol2): mol2_path = Path("tests/data") / input_mol2 with open(mol2_path, "rt") as mol2_file: ligand.read(mol2_file) - torsions = set() - for name, atom in ligand.atoms.items(): - torsions |= set(atom.torsions) try: benchmark = TORSION_RESULTS[input_mol2] - diff = torsions ^ benchmark + diff = ligand.torsions ^ benchmark if len(diff) > 0: err = "Torsion test failed for %s: %s" % ( input_mol2, sorted(list(diff))) @@ -71,7 +68,7 @@ def test_torsions(input_mol2): except KeyError: _LOGGER.warning( "Skipping torsions for %s: %s", input_mol2, - sorted(list(torsions))) + sorted(list(ligand.torsions))) @pytest.mark.parametrize("input_pdb", ["1HPX", "1QBS", "1US0"], ids=str) From 71b9dcd9e3bc8422c8e8f2176e67a4a5016d90b3 Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Mon, 15 Jun 2020 14:15:36 -0700 Subject: [PATCH 15/31] Clean up torsion calculations. --- pdb2pqr/pdb2pqr/ligand/mol2.py | 102 ++++++++++++++++------------- pdb2pqr/pdb2pqr/ligand/topology.py | 20 ++++++ 2 files changed, 75 insertions(+), 47 deletions(-) create mode 100644 pdb2pqr/pdb2pqr/ligand/topology.py diff --git a/pdb2pqr/pdb2pqr/ligand/mol2.py b/pdb2pqr/pdb2pqr/ligand/mol2.py index 1dde9e12b..9b5cbf1a2 100644 --- a/pdb2pqr/pdb2pqr/ligand/mol2.py +++ b/pdb2pqr/pdb2pqr/ligand/mol2.py @@ -13,19 +13,22 @@ _LOGGER = logging.getLogger(__name__) class Mol2Bond: """MOL2 molecule bonds.""" - def __init__(self, bond_from, bond_to, bond_type, bond_id=0): + def __init__(self, atom1, atom2, bond_type, bond_id=0): """Initialize bond. Args: - bond_from: bond from this atom - bond_to: bond to this atom + atom1: name of first atom in bond + atom2: name of second atom in bond bond_type: type of bond: 1 (single), 2 (double), or ar (aromatic) bond_id: integer ID of bond """ - self.bond_to_self = bond_to - self.bond_from_self = bond_from + self.atoms = (atom1, atom2) self.type = bond_type - self.bond_id = bond_id + self.bond_id = int(bond_id) + + def __str__(self): + fmt = "{b.atoms[0]:s} {b.type:s}-bonded to {b.atoms[1]:s}" + return fmt.format(b=self) class Mol2Atom: @@ -52,7 +55,9 @@ class Mol2Atom: self.formal_charge = None self.radius = None self.bonded_atoms = [] + self.bonds = [] self.torsions = [] + self.rings = [] # Terms for calculating atom electronegativity self.poly_terms = None # Atom electronegativity @@ -100,15 +105,13 @@ class Mol2Molecule: self.atoms = OrderedDict() self.bonds = [] self.torsions = set() + self.rings = set() self.serial = None self.name = None self.res_name = None self.res_seq = None - self.x = None - self.y = None - self.z = None - def set_torsions(self, start_atom): + def find_atom_torsions(self, start_atom): """Set the torsion angles that start with this atom (name). Args: @@ -127,6 +130,23 @@ class Mol2Molecule: torsions.append((start_atom, bonded1, bonded2, end_atom)) return torsions + def set_torsions(self): + """Set all torsions in molecule.""" + for atom_name, atom in self.atoms.items(): + atom.torsions = self.find_atom_torsions(atom_name) + for torsion in atom.torsions: + self.torsions.add(torsion) + + def set_rings(self): + """Set all rings in molecule. + + Like many things, this was borrowed from StackOverflow: + https://stackoverflow.com/questions/12367801/finding-all-cycles-in-undirected-graphs + """ + for bond in self.bonds: + _LOGGER.error(str(bond)) + raise NotImplementedError() + def read(self, mol2_file): """Routines for reading MOL2 file. @@ -135,7 +155,6 @@ class Mol2Molecule: """ mol2_file = self.parse_atoms(mol2_file) mol2_file = self.parse_bonds(mol2_file) - self.process_bonds() def parse_atoms(self, mol2_file): """Parse @ATOM section of file. @@ -190,20 +209,22 @@ class Mol2Molecule: else: self.atoms[atom.name] = atom if len(duplicates) > 0: - raise KeyError("Found duplicate atoms names in MOL2 file: %s" % - duplicates) + raise KeyError( + "Found duplicate atoms names in MOL2 file: %s" % duplicates) return mol2_file def parse_bonds(self, mol2_file): """Parse @BOND section of file. + Atoms must already have been parsed. + Also sets up torsions and rings. + Args: mol2_file: file-like object with MOL2 data. Returns: - file object advanced to bonds section - Raises: - ValueError for problems parsing bond information + file object advanced to SUBSTRUCTURE section """ + atom_names = list(self.atoms.keys()) for line in mol2_file: line = line.strip() if not line: @@ -215,36 +236,23 @@ class Mol2Molecule: err = "Bond line too short: %s" % line raise ValueError(err) bond_type = words[3] - try: - bond_from = int(words[1]) - bond_to = int(words[2]) - bond_id = int(words[0]) - bond = Mol2Bond( - bond_from=bond_from, bond_to=bond_to, bond_type=bond_type, - bond_id=bond_id) - except ValueError as exc: - err = "Got error (%s) when parsing bond line: %s" % (exc, line) - raise ValueError(err) + bond_id = int(words[0]) + atom_id1 = int(words[1]) + atom_id2 = int(words[2]) + atom_name1 = atom_names[atom_id1-1] + atom1 = self.atoms[atom_name1] + atom_name2 = atom_names[atom_id2-1] + atom2 = self.atoms[atom_name2] + bond = Mol2Bond( + atom1=atom_name1, atom2=atom_name2, bond_type=bond_type, + bond_id=bond_id) + atom1.bonds.append(bond) + atom1.bonded_atom_names.append(atom_name2) + atom1.bonded_atoms.append(atom2) + atom2.bonds.append(bond) + atom2.bonded_atom_names.append(atom_name1) + atom2.bonded_atoms.append(atom1) self.bonds.append(bond) + self.set_torsions() + # self.set_rings() return mol2_file - - def process_bonds(self): - """Create a list of bonded atoms and torsions for each atom.""" - atom_names = list(self.atoms.keys()) - for bond in self.bonds: - from_atom_name = atom_names[bond.bond_from_self-1] - from_atom = self.atoms[from_atom_name] - to_atom_name = atom_names[bond.bond_to_self-1] - to_atom = self.atoms[to_atom_name] - if from_atom.bonded_atoms is None: - from_atom.bonded_atoms = [to_atom] - else: - from_atom.bonded_atoms.append(to_atom) - if to_atom.bonded_atoms is None: - to_atom.bonded_atoms = [from_atom] - else: - to_atom.bonded_atoms.append(from_atom) - for atom_name, atom in self.atoms.items(): - atom.torsions = self.set_torsions(atom_name) - for torsion in atom.torsions: - self.torsions.add(torsion) diff --git a/pdb2pqr/pdb2pqr/ligand/topology.py b/pdb2pqr/pdb2pqr/ligand/topology.py new file mode 100644 index 000000000..34993b9ce --- /dev/null +++ b/pdb2pqr/pdb2pqr/ligand/topology.py @@ -0,0 +1,20 @@ +"""Ligand topology classes.""" +import logging + + +_LOGGER = logging.getLogger(__name__) + + +class Topology: + """Ligand topology class.""" + + def __init__(self, molecule): + """Initialize with molecule. + + Args: + molecule: Mol2Molecule object + """ + self.atom_dict = {} + for atom in molecule.atoms: + self.atom_dict[atom.name] = atom + raise NotImplementedError() \ No newline at end of file From 2c155d56ecdf4ee100c5994dbd7374a8b8d4ca89 Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Mon, 15 Jun 2020 16:27:47 -0700 Subject: [PATCH 16/31] Identify rings in ligands. --- pdb2pqr/pdb2pqr/ligand/mol2.py | 75 +++++++++++++++++++++++++++++++--- pdb2pqr/tests/ligand_test.py | 58 +++++++++++++++++++++----- 2 files changed, 117 insertions(+), 16 deletions(-) diff --git a/pdb2pqr/pdb2pqr/ligand/mol2.py b/pdb2pqr/pdb2pqr/ligand/mol2.py index 9b5cbf1a2..533e275c1 100644 --- a/pdb2pqr/pdb2pqr/ligand/mol2.py +++ b/pdb2pqr/pdb2pqr/ligand/mol2.py @@ -5,6 +5,7 @@ """ import logging from collections import OrderedDict +from itertools import combinations import numpy @@ -137,15 +138,79 @@ class Mol2Molecule: for torsion in atom.torsions: self.torsions.add(torsion) + @staticmethod + def rotate_to_smallest(path): + """Rotate cycle path so that it begins with the smallest node. + + This was borrowed from StackOverflow: https://j.mp/2AHaukj + + Args: + path: list of atom names + Returns: + rotated path (list) + """ + n = path.index(min(path)) + return path[n:]+path[:n] + + def find_new_rings(self, path, rings, level=0): + """Find new rings in molecule. + + This was borrowed from StackOverflow: https://j.mp/2AHaukj + + Args: + path: list of atom names + rings: current list of rings + level: recursion level + Returns: + new list of rings + """ + start_node = path[0] + next_node = None + sub_path = [] + for bond in self.bonds: + atom1 = bond.atoms[0] + atom2 = bond.atoms[1] + if start_node in (atom1, atom2): + if atom1 == start_node: + next_node = atom2 + else: + next_node = atom1 + if next_node not in path: + sub_path = [next_node] + sub_path.extend(path) + rings = self.find_new_rings(sub_path, rings, level+1) + elif len(path) > 2 and next_node == path[-1]: + path_ = self.rotate_to_smallest(path) + inv_path = tuple(self.rotate_to_smallest(path_[::-1])) + path_ = tuple(path_) + if (path_ not in rings) and (inv_path not in rings): + rings.add(tuple(path_)) + return rings + def set_rings(self): """Set all rings in molecule. - Like many things, this was borrowed from StackOverflow: - https://stackoverflow.com/questions/12367801/finding-all-cycles-in-undirected-graphs + This was borrowed from StackOverflow: https://j.mp/2AHaukj """ + self.rings = set() + rings = set() + # Generate all rings for bond in self.bonds: - _LOGGER.error(str(bond)) - raise NotImplementedError() + for atom_name in bond.atoms: + rings = self.find_new_rings([atom_name], rings) + # Prune rings that are products of other rings + ring_sets = [] + for i in range(2, len(rings)+1): + for combo in combinations(rings, i): + ring_set = set().union(*combo) + ring_sets.append(ring_set) + for ring in rings: + ring_set = set(ring) + if ring_set in ring_sets: + _LOGGER.debug("Fused ring: %s", ring) + else: + _LOGGER.debug("Unfused ring: %s", ring) + self.rings.add(ring) def read(self, mol2_file): """Routines for reading MOL2 file. @@ -254,5 +319,5 @@ class Mol2Molecule: atom2.bonded_atoms.append(atom1) self.bonds.append(bond) self.set_torsions() - # self.set_rings() + self.set_rings() return mol2_file diff --git a/pdb2pqr/tests/ligand_test.py b/pdb2pqr/tests/ligand_test.py index 89e3069a7..4e6f3820f 100644 --- a/pdb2pqr/tests/ligand_test.py +++ b/pdb2pqr/tests/ligand_test.py @@ -58,17 +58,53 @@ def test_torsions(input_mol2): mol2_path = Path("tests/data") / input_mol2 with open(mol2_path, "rt") as mol2_file: ligand.read(mol2_file) - try: - benchmark = TORSION_RESULTS[input_mol2] - diff = ligand.torsions ^ benchmark - if len(diff) > 0: - err = "Torsion test failed for %s: %s" % ( - input_mol2, sorted(list(diff))) - raise ValueError(err) - except KeyError: - _LOGGER.warning( - "Skipping torsions for %s: %s", input_mol2, - sorted(list(ligand.torsions))) + try: + benchmark = TORSION_RESULTS[input_mol2] + diff = ligand.torsions ^ benchmark + if len(diff) > 0: + err = "Torsion test failed for %s: %s" % ( + input_mol2, sorted(list(diff))) + raise ValueError(err) + except KeyError: + _LOGGER.warning( + "Skipping torsion test for %s: %s", input_mol2, + sorted(list(ligand.torsions))) + + +RING_RESULTS = { + "ethanol.mol2": set(), + "glycerol.mol2": set(), + "cyclohexane.mol2": {('CAA', 'CAD', 'CAE', 'CAF', 'CAC', 'CAB')}, + "naphthalene.mol2": { + ('CAA', 'CAB', 'CAC', 'CAH', 'CAG', 'CAF'), + ('CAC', 'CAH', 'CAI', 'CAJ', 'CAE', 'CAD')}, + "anthracene.mol2": { + ('CAC', 'CAJ', 'CAK', 'CAL', 'CAE', 'CAD'), + ('CAE', 'CAL', 'CAM', 'CAN', 'CAG', 'CAF'), + ('CAA', 'CAB', 'CAC', 'CAJ', 'CAI', 'CAH') + } +} + +@pytest.mark.parametrize("input_mol2", [ + "cyclohexane.mol2", "ethanol.mol2", "glycerol.mol2", "anthracene.mol2", + "naphthalene.mol2"]) +def test_rings(input_mol2): + """Test assignment of torsion angles.""" + ligand = parameterize.ParameterizedMolecule() + mol2_path = Path("tests/data") / input_mol2 + with open(mol2_path, "rt") as mol2_file: + ligand.read(mol2_file) + try: + benchmark = RING_RESULTS[input_mol2] + diff = ligand.rings ^ benchmark + if len(diff) > 0: + err = "Ring test failed for %s: %s" % ( + input_mol2, sorted(list(diff))) + raise ValueError(err) + except KeyError: + _LOGGER.warning( + "Skipping ring test for %s: %s", input_mol2, + sorted(list(ligand.rings))) @pytest.mark.parametrize("input_pdb", ["1HPX", "1QBS", "1US0"], ids=str) From 3dbc8c5217ca7c83992beb8207794189f25678a7 Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Mon, 15 Jun 2020 19:34:29 -0700 Subject: [PATCH 17/31] Add test that breaks current ring calculations. --- pdb2pqr/pdb2pqr/ligand/mol2.py | 5 +++ pdb2pqr/tests/data/phenalene.mol2 | 54 +++++++++++++++++++++++++++++++ pdb2pqr/tests/ligand_test.py | 14 ++++++-- 3 files changed, 70 insertions(+), 3 deletions(-) create mode 100644 pdb2pqr/tests/data/phenalene.mol2 diff --git a/pdb2pqr/pdb2pqr/ligand/mol2.py b/pdb2pqr/pdb2pqr/ligand/mol2.py index 533e275c1..bd89b1b3b 100644 --- a/pdb2pqr/pdb2pqr/ligand/mol2.py +++ b/pdb2pqr/pdb2pqr/ligand/mol2.py @@ -54,6 +54,7 @@ class Mol2Atom: self.seg_id = None self.charge = None self.formal_charge = None + self.num_rings = 0 self.radius = None self.bonded_atoms = [] self.bonds = [] @@ -199,6 +200,7 @@ class Mol2Molecule: for atom_name in bond.atoms: rings = self.find_new_rings([atom_name], rings) # Prune rings that are products of other rings + # TODO - testing on molecules like phenalene shows that this is broken ring_sets = [] for i in range(2, len(rings)+1): for combo in combinations(rings, i): @@ -211,6 +213,9 @@ class Mol2Molecule: else: _LOGGER.debug("Unfused ring: %s", ring) self.rings.add(ring) + for ring in self.rings: + for atom in ring: + self.atoms[atom].num_rings += 1 def read(self, mol2_file): """Routines for reading MOL2 file. diff --git a/pdb2pqr/tests/data/phenalene.mol2 b/pdb2pqr/tests/data/phenalene.mol2 new file mode 100644 index 000000000..5636064a3 --- /dev/null +++ b/pdb2pqr/tests/data/phenalene.mol2 @@ -0,0 +1,54 @@ +# +# +# This file was generated by PRODRG version AA180301.0717 +# PRODRG written/copyrighted by Daan van Aalten +# and Alexander Schuettelkopf +# +# Questions/comments to dava@davapc1.bioch.dundee.ac.uk +# +# When using this software in a publication, cite: +# A. W. Schuettelkopf and D. M. F. van Aalten (2004). +# PRODRG - a tool for high-throughput crystallography +# of protein-ligand complexes. +# Acta Crystallogr. D60, 1355--1363. +# +# +@MOLECULE +DRG + 13 15 1 +SMALL +USER_CHARGES + +PRODRG MOLECULE +@ATOM + 1 CAA -2.230 5.070 -11.340 C.3 1 DRG 0.000 + 2 CAB -1.910 4.500 -12.590 C.2 1 DRG 0.000 + 3 CAC -0.750 3.730 -12.790 C.2 1 DRG 0.000 + 4 CAG 0.160 3.560 -11.730 C.ar 1 DRG 0.000 + 5 CAF -0.060 4.250 -10.510 C.ar 1 DRG 0.000 + 6 CAE -1.210 5.060 -10.360 C.ar 1 DRG 0.000 + 7 CAD -1.380 5.810 -9.170 C.ar 1 DRG 0.000 + 8 CAI -0.420 5.740 -8.150 C.ar 1 DRG 0.000 + 9 CAJ 0.720 4.930 -8.290 C.ar 1 DRG 0.000 + 10 CAK 0.910 4.180 -9.470 C.ar 1 DRG 0.000 + 11 CAL 2.080 3.410 -9.660 C.ar 1 DRG 0.000 + 12 CAM 2.280 2.720 -10.870 C.ar 1 DRG 0.000 + 13 CAH 1.330 2.800 -11.910 C.ar 1 DRG 0.000 +@BOND + 1 1 2 1 + 2 6 1 1 + 3 2 3 2 + 4 4 3 1 + 5 4 5 ar + 6 4 13 ar + 7 5 6 ar + 8 5 10 ar + 9 6 7 ar + 10 7 8 ar + 11 8 9 ar + 12 10 9 ar + 13 10 11 ar + 14 11 12 ar + 15 12 13 ar +@SUBSTRUCTURE + 1 DRG 1 diff --git a/pdb2pqr/tests/ligand_test.py b/pdb2pqr/tests/ligand_test.py index 4e6f3820f..768f60b60 100644 --- a/pdb2pqr/tests/ligand_test.py +++ b/pdb2pqr/tests/ligand_test.py @@ -81,13 +81,16 @@ RING_RESULTS = { "anthracene.mol2": { ('CAC', 'CAJ', 'CAK', 'CAL', 'CAE', 'CAD'), ('CAE', 'CAL', 'CAM', 'CAN', 'CAG', 'CAF'), - ('CAA', 'CAB', 'CAC', 'CAJ', 'CAI', 'CAH') - } + ('CAA', 'CAB', 'CAC', 'CAJ', 'CAI', 'CAH')}, + "phenalene.mol2": { + ('CAA', 'CAE', 'CAF', 'CAG', 'CAC', 'CAB'), + ('CAD', 'CAI', 'CAJ', 'CAK', 'CAF', 'CAE'), + ('CAF', 'CAG', 'CAH', 'CAM', 'CAL', 'CAK')} } @pytest.mark.parametrize("input_mol2", [ "cyclohexane.mol2", "ethanol.mol2", "glycerol.mol2", "anthracene.mol2", - "naphthalene.mol2"]) + "naphthalene.mol2", "phenalene.mol2"]) def test_rings(input_mol2): """Test assignment of torsion angles.""" ligand = parameterize.ParameterizedMolecule() @@ -101,6 +104,11 @@ def test_rings(input_mol2): err = "Ring test failed for %s: %s" % ( input_mol2, sorted(list(diff))) raise ValueError(err) + for atom_name in ligand.atoms: + atom = ligand.atoms[atom_name] + if atom.num_rings > 0: + str_ = "%d rings: %s" % (atom.num_rings, atom) + _LOGGER.debug(str_) except KeyError: _LOGGER.warning( "Skipping ring test for %s: %s", input_mol2, From 0703103cc1093c40e4040de86ebe862fc6c39807 Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Mon, 15 Jun 2020 20:03:52 -0700 Subject: [PATCH 18/31] Add type checking for bond type. --- pdb2pqr/pdb2pqr/ligand/mol2.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/pdb2pqr/pdb2pqr/ligand/mol2.py b/pdb2pqr/pdb2pqr/ligand/mol2.py index bd89b1b3b..6f4d80eb9 100644 --- a/pdb2pqr/pdb2pqr/ligand/mol2.py +++ b/pdb2pqr/pdb2pqr/ligand/mol2.py @@ -12,6 +12,10 @@ import numpy _LOGGER = logging.getLogger(__name__) +# These are the allowed bond types +BOND_TYPES = {"single", "double", "aromatic"} + + class Mol2Bond: """MOL2 molecule bonds.""" def __init__(self, atom1, atom2, bond_type, bond_id=0): @@ -24,8 +28,12 @@ class Mol2Bond: bond_id: integer ID of bond """ self.atoms = (atom1, atom2) - self.type = bond_type self.bond_id = int(bond_id) + if bond_type in BOND_TYPES: + self.type = bond_type + else: + err = "Unknown bond type: %s" % bond_type + raise ValueError(err) def __str__(self): fmt = "{b.atoms[0]:s} {b.type:s}-bonded to {b.atoms[1]:s}" @@ -306,6 +314,15 @@ class Mol2Molecule: err = "Bond line too short: %s" % line raise ValueError(err) bond_type = words[3] + if bond_type == "1": + bond_type = "single" + elif bond_type == "2": + bond_type = "double" + elif bond_type == "ar": + bond_type = "aromatic" + else: + err = "Unknown bond type: %s" % bond_type + raise ValueError(err) bond_id = int(words[0]) atom_id1 = int(words[1]) atom_id2 = int(words[2]) From 1c6cbef3c3d6e329ba6115924eb11786e7717556 Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Sun, 21 Jun 2020 16:48:12 -0700 Subject: [PATCH 19/31] Implement old bond-order detection methods. Note that the old bond-order detection approach fails for some bond types; see #614 for more information. Also note that the new ring detection method fails for multiply fused rings; see #613 for more information. --- pdb2pqr/pdb2pqr/ligand/__init__.py | 24 +++++- pdb2pqr/pdb2pqr/ligand/mol2.py | 63 ++++++++++++-- pdb2pqr/tests/data/acetic-acid.mol2 | 33 ++++++++ pdb2pqr/tests/data/acetylcholine.mol2 | 45 ++++++++++ pdb2pqr/tests/data/cyanide.mol2 | 31 +++++++ pdb2pqr/tests/data/fatty-acid.mol2 | 49 +++++++++++ pdb2pqr/tests/data/pyrrole.mol2 | 38 +++++++++ pdb2pqr/tests/data/trimethylamine.mol2 | 35 ++++++++ pdb2pqr/tests/ligand_test.py | 113 +++++++++++++++++++++---- 9 files changed, 404 insertions(+), 27 deletions(-) create mode 100644 pdb2pqr/tests/data/acetic-acid.mol2 create mode 100644 pdb2pqr/tests/data/acetylcholine.mol2 create mode 100644 pdb2pqr/tests/data/cyanide.mol2 create mode 100644 pdb2pqr/tests/data/fatty-acid.mol2 create mode 100644 pdb2pqr/tests/data/pyrrole.mol2 create mode 100644 pdb2pqr/tests/data/trimethylamine.mol2 diff --git a/pdb2pqr/pdb2pqr/ligand/__init__.py b/pdb2pqr/pdb2pqr/ligand/__init__.py index ca5e4f47d..63ae93e7c 100644 --- a/pdb2pqr/pdb2pqr/ligand/__init__.py +++ b/pdb2pqr/pdb2pqr/ligand/__init__.py @@ -3,6 +3,9 @@ Jens Erik Nielsen, University College Dublin 2004 """ import sys +import pandas + + assert sys.version_info >= (3, 5) @@ -16,7 +19,26 @@ assert sys.version_info >= (3, 5) # See also the AMBER mailing list: http://amber.ch.ic.ac.uk/archive/. # # The van der Waals radius is used for chlorine. - PARSE_RADII = { "C": 1.70, "N": 1.50, "O": 1.40, "S": 1.85, "H": 1.00, "BR":2.50, "F": 1.20, "P": 1.90, "CL": 1.75} + + +# TODO - this belongs in a configuration file somewhere other than here. +# Bond lengths from +# http://www.chem.swin.edu.au/modules/mod2/bondlen.html +# We should get a better reference +_BOND_LENGTH_DICTS = [ + {"atom1": 'C', "atom2": 'C', "length": 1.54, "type": "single"}, + {"atom1": 'C', "atom2": 'C', "length": 1.34, "type": "double"}, + {"atom1": 'C', "atom2": 'C', "length": 1.20, "type": "triple"}, + {"atom1": 'C', "atom2": 'C', "length": 1.40, "type": "aromatic"}, + {"atom1": 'C', "atom2": 'O', "length": 1.43, "type": "single"}, + {"atom1": 'C', "atom2": 'O', "length": 1.21, "type": "double"}, + {"atom1": 'C', "atom2": 'N', "length": 1.47, "type": "single"}, + {"atom1": 'C', "atom2": 'N', "length": 1.25, "type": "double"}, + {"atom1": 'C', "atom2": 'N', "length": 1.16, "type": "triple"}, + {"atom1": 'C', "atom2": 'N', "length": 1.34, "type": "aromatic"}, + {"atom1": 'N', "atom2": 'N', "length": 1.35, "type": "aromatic"} +] +BOND_LENGTHS = pandas.DataFrame(_BOND_LENGTH_DICTS) diff --git a/pdb2pqr/pdb2pqr/ligand/mol2.py b/pdb2pqr/pdb2pqr/ligand/mol2.py index 6f4d80eb9..8f4752105 100644 --- a/pdb2pqr/pdb2pqr/ligand/mol2.py +++ b/pdb2pqr/pdb2pqr/ligand/mol2.py @@ -6,14 +6,18 @@ import logging from collections import OrderedDict from itertools import combinations -import numpy +from numpy import array +from numpy.linalg import norm +from . import BOND_LENGTHS _LOGGER = logging.getLogger(__name__) # These are the allowed bond types -BOND_TYPES = {"single", "double", "aromatic"} +BOND_TYPES = {"single", "double", "triple", "aromatic"} +# This is the maximum deviation from an ideal bond distance +BOND_DIST = 2.0 class Mol2Bond: @@ -35,10 +39,40 @@ class Mol2Bond: err = "Unknown bond type: %s" % bond_type raise ValueError(err) + @property + def atom_names(self): + """Return tuple with names of atoms in bond.""" + return (self.atoms[0].name, self.atoms[1].name) + + @property + def length(self): + """Return bond length.""" + return self.atoms[0].distance(self.atoms[1]) + def __str__(self): - fmt = "{b.atoms[0]:s} {b.type:s}-bonded to {b.atoms[1]:s}" + fmt = "{b.atoms[0].name:s} {b.type:s}-bonded to {b.atoms[1].name:s}" return fmt.format(b=self) + @property + def bond_order(self): + """Attempt to determine the order of this bond. + + Return: + string with order of bond or None + """ + types = sorted( + [self.atoms[0].atom_type[0], self.atoms[1].atom_type[0]]) + bond_lengths = BOND_LENGTHS.loc[ + (BOND_LENGTHS["atom1"] == types[0]) + & (BOND_LENGTHS["atom2"] == types[1])] + best_type = None + best_fit = BOND_DIST + for _, row in bond_lengths.iterrows(): + if abs(self.length - row["length"]) < best_fit: + best_fit = abs(self.length - row["length"]) + best_type = row["type"] + return best_type + class Mol2Atom: """MOL2 molecule atoms.""" @@ -75,6 +109,16 @@ class Mol2Atom: # Atom charge change during equilibration self.delta_charge = None + def distance(self, other): + """Get distance between two atoms. + + Args: + other: other atom object + Returns: + float with distance + """ + return norm(other.coords - self.coords) + def __str__(self): """Generate PDB line from MOL2.""" pdb_fmt = ( @@ -86,7 +130,7 @@ class Mol2Atom: @property def coords(self): """Return coordinates as numpy vector.""" - return numpy.array([self.x, self.y, self.z]) + return array([self.x, self.y, self.z]) @property def bonded_atom_names(self): @@ -177,8 +221,8 @@ class Mol2Molecule: next_node = None sub_path = [] for bond in self.bonds: - atom1 = bond.atoms[0] - atom2 = bond.atoms[1] + atom1 = bond.atoms[0].name + atom2 = bond.atoms[1].name if start_node in (atom1, atom2): if atom1 == start_node: next_node = atom2 @@ -205,7 +249,7 @@ class Mol2Molecule: rings = set() # Generate all rings for bond in self.bonds: - for atom_name in bond.atoms: + for atom_name in bond.atom_names: rings = self.find_new_rings([atom_name], rings) # Prune rings that are products of other rings # TODO - testing on molecules like phenalene shows that this is broken @@ -318,6 +362,8 @@ class Mol2Molecule: bond_type = "single" elif bond_type == "2": bond_type = "double" + elif bond_type == "3": + bond_type = "triple" elif bond_type == "ar": bond_type = "aromatic" else: @@ -331,8 +377,7 @@ class Mol2Molecule: atom_name2 = atom_names[atom_id2-1] atom2 = self.atoms[atom_name2] bond = Mol2Bond( - atom1=atom_name1, atom2=atom_name2, bond_type=bond_type, - bond_id=bond_id) + atom1=atom1, atom2=atom2, bond_type=bond_type, bond_id=bond_id) atom1.bonds.append(bond) atom1.bonded_atom_names.append(atom_name2) atom1.bonded_atoms.append(atom2) diff --git a/pdb2pqr/tests/data/acetic-acid.mol2 b/pdb2pqr/tests/data/acetic-acid.mol2 new file mode 100644 index 000000000..642e59752 --- /dev/null +++ b/pdb2pqr/tests/data/acetic-acid.mol2 @@ -0,0 +1,33 @@ +# +# +# This file was generated by PRODRG version AA180301.0717 +# PRODRG written/copyrighted by Daan van Aalten +# and Alexander Schuettelkopf +# +# Questions/comments to dava@davapc1.bioch.dundee.ac.uk +# +# When using this software in a publication, cite: +# A. W. Schuettelkopf and D. M. F. van Aalten (2004). +# PRODRG - a tool for high-throughput crystallography +# of protein-ligand complexes. +# Acta Crystallogr. D60, 1355--1363. +# +# +@MOLECULE +DRG + 4 3 1 +SMALL +USER_CHARGES + +PRODRG MOLECULE +@ATOM + 1 OAC -1.420 -1.790 -17.890 O.co2 1 DRG -0.360 + 2 CAB -1.510 -2.970 -18.290 C.2 1 DRG 0.720 + 3 OAD -1.200 -3.990 -17.640 O.co2 1 DRG -0.360 + 4 CAA -2.050 -3.180 -19.710 C.3 1 DRG 0.000 +@BOND + 1 2 1 2 + 2 2 3 2 + 3 2 4 1 +@SUBSTRUCTURE + 1 DRG 1 diff --git a/pdb2pqr/tests/data/acetylcholine.mol2 b/pdb2pqr/tests/data/acetylcholine.mol2 new file mode 100644 index 000000000..c2ff8b20a --- /dev/null +++ b/pdb2pqr/tests/data/acetylcholine.mol2 @@ -0,0 +1,45 @@ +# +# +# This file was generated by PRODRG version AA180301.0717 +# PRODRG written/copyrighted by Daan van Aalten +# and Alexander Schuettelkopf +# +# Questions/comments to dava@davapc1.bioch.dundee.ac.uk +# +# When using this software in a publication, cite: +# A. W. Schuettelkopf and D. M. F. van Aalten (2004). +# PRODRG - a tool for high-throughput crystallography +# of protein-ligand complexes. +# Acta Crystallogr. D60, 1355--1363. +# +# +@MOLECULE +DRG + 10 9 1 +SMALL +USER_CHARGES + +PRODRG MOLECULE +@ATOM + 1 CAI 19.020 -0.860 3.850 C.3 1 DRG 0.000 + 2 CAH 17.580 -0.690 3.360 C.2 1 DRG 0.425 + 3 OAB 16.680 -1.340 3.890 O.2 1 DRG -0.335 + 4 OAG 17.450 0.190 2.330 O.3 1 DRG -0.315 + 5 CAF 16.110 0.400 1.870 C.3 1 DRG 0.225 + 6 CAE 16.130 1.350 0.650 C.3 1 DRG 0.000 + 7 NAD 14.840 1.710 0.010 N.3 1 DRG 0.000 + 8 CAJ 15.160 2.580 -1.130 C.3 1 DRG 0.000 + 9 CAA 13.950 2.480 0.910 C.3 1 DRG 0.000 + 10 CAC 14.110 0.550 -0.530 C.3 1 DRG 0.000 +@BOND + 1 2 1 1 + 2 2 3 2 + 3 2 4 1 + 4 5 4 1 + 5 5 6 1 + 6 7 6 1 + 7 7 8 1 + 8 7 9 1 + 9 7 10 1 +@SUBSTRUCTURE + 1 DRG 1 diff --git a/pdb2pqr/tests/data/cyanide.mol2 b/pdb2pqr/tests/data/cyanide.mol2 new file mode 100644 index 000000000..b84280d0b --- /dev/null +++ b/pdb2pqr/tests/data/cyanide.mol2 @@ -0,0 +1,31 @@ +# +# +# This file was generated by PRODRG version AA180301.0717 +# PRODRG written/copyrighted by Daan van Aalten +# and Alexander Schuettelkopf +# +# Questions/comments to dava@davapc1.bioch.dundee.ac.uk +# +# When using this software in a publication, cite: +# A. W. Schuettelkopf and D. M. F. van Aalten (2004). +# PRODRG - a tool for high-throughput crystallography +# of protein-ligand complexes. +# Acta Crystallogr. D60, 1355--1363. +# +# +@MOLECULE +DRG + 3 2 1 +SMALL +USER_CHARGES + +PRODRG MOLECULE +@ATOM + 1 NAC 14.390 -15.160 8.360 N.1 1 DRG 0.000 + 2 CAB 15.480 -15.470 8.460 C.1 1 DRG 0.000 + 3 CAA 16.890 -15.870 8.580 C.3 1 DRG 0.000 +@BOND + 1 2 1 3 + 2 2 3 1 +@SUBSTRUCTURE + 1 DRG 1 diff --git a/pdb2pqr/tests/data/fatty-acid.mol2 b/pdb2pqr/tests/data/fatty-acid.mol2 new file mode 100644 index 000000000..db441c039 --- /dev/null +++ b/pdb2pqr/tests/data/fatty-acid.mol2 @@ -0,0 +1,49 @@ +# +# +# This file was generated by PRODRG version AA180301.0717 +# PRODRG written/copyrighted by Daan van Aalten +# and Alexander Schuettelkopf +# +# Questions/comments to dava@davapc1.bioch.dundee.ac.uk +# +# When using this software in a publication, cite: +# A. W. Schuettelkopf and D. M. F. van Aalten (2004). +# PRODRG - a tool for high-throughput crystallography +# of protein-ligand complexes. +# Acta Crystallogr. D60, 1355--1363. +# +# +@MOLECULE +DRG + 12 11 1 +SMALL +USER_CHARGES + +PRODRG MOLECULE +@ATOM + 1 OAA -3.000 7.130 9.800 O.co2 1 DRG -0.360 + 2 CAC -3.230 6.620 8.680 C.2 1 DRG 0.720 + 3 OAB -2.690 6.950 7.610 O.co2 1 DRG -0.360 + 4 CAD -4.310 5.540 8.610 C.3 1 DRG 0.000 + 5 CAE -3.800 4.140 9.000 C.3 1 DRG 0.000 + 6 CAF -2.790 3.570 8.000 C.3 1 DRG 0.000 + 7 CAG -1.380 4.100 8.300 C.2 1 DRG 0.000 + 8 CAH -0.550 4.480 7.070 C.2 1 DRG 0.000 + 9 CAI 0.880 4.950 7.340 C.3 1 DRG 0.000 + 10 CAJ 0.970 6.470 7.470 C.3 1 DRG 0.000 + 11 CAK 0.460 6.960 8.830 C.3 1 DRG 0.000 + 12 CAL 0.500 8.480 8.880 C.3 1 DRG 0.000 +@BOND + 1 2 1 2 + 2 2 3 2 + 3 2 4 1 + 4 4 5 1 + 5 5 6 1 + 6 6 7 1 + 7 7 8 2 + 8 9 8 1 + 9 9 10 1 + 10 10 11 1 + 11 11 12 1 +@SUBSTRUCTURE + 1 DRG 1 diff --git a/pdb2pqr/tests/data/pyrrole.mol2 b/pdb2pqr/tests/data/pyrrole.mol2 new file mode 100644 index 000000000..1c375f316 --- /dev/null +++ b/pdb2pqr/tests/data/pyrrole.mol2 @@ -0,0 +1,38 @@ +# +# +# This file was generated by PRODRG version AA180301.0717 +# PRODRG written/copyrighted by Daan van Aalten +# and Alexander Schuettelkopf +# +# Questions/comments to dava@davapc1.bioch.dundee.ac.uk +# +# When using this software in a publication, cite: +# A. W. Schuettelkopf and D. M. F. van Aalten (2004). +# PRODRG - a tool for high-throughput crystallography +# of protein-ligand complexes. +# Acta Crystallogr. D60, 1355--1363. +# +# +@MOLECULE +DRG + 6 6 1 +SMALL +USER_CHARGES + +PRODRG MOLECULE +@ATOM + 1 CAA -5.490 0.200 -1.460 C.ar 1 DRG 0.000 + 2 CAD -4.900 -0.330 -0.390 C.ar 1 DRG 0.000 + 3 CAE -3.580 -0.250 -0.570 C.ar 1 DRG 0.000 + 4 CAC -3.360 0.320 -1.750 C.ar 1 DRG 0.000 + 5 NAB -4.540 0.600 -2.300 N.ar 1 DRG -0.280 + 6 HAB -4.680 1.030 -3.190 H 1 DRG 0.280 +@BOND + 1 1 2 ar + 2 1 5 ar + 3 2 3 ar + 4 3 4 ar + 5 4 5 ar + 6 5 6 1 +@SUBSTRUCTURE + 1 DRG 1 diff --git a/pdb2pqr/tests/data/trimethylamine.mol2 b/pdb2pqr/tests/data/trimethylamine.mol2 new file mode 100644 index 000000000..f519e550c --- /dev/null +++ b/pdb2pqr/tests/data/trimethylamine.mol2 @@ -0,0 +1,35 @@ +# +# +# This file was generated by PRODRG version AA180301.0717 +# PRODRG written/copyrighted by Daan van Aalten +# and Alexander Schuettelkopf +# +# Questions/comments to dava@davapc1.bioch.dundee.ac.uk +# +# When using this software in a publication, cite: +# A. W. Schuettelkopf and D. M. F. van Aalten (2004). +# PRODRG - a tool for high-throughput crystallography +# of protein-ligand complexes. +# Acta Crystallogr. D60, 1355--1363. +# +# +@MOLECULE +DRG + 5 4 1 +SMALL +USER_CHARGES + +PRODRG MOLECULE +@ATOM + 1 CAA 12.430 -6.030 48.960 C.3 1 DRG 0.000 + 2 NAB 13.150 -7.170 48.370 N.3 1 DRG 0.000 + 3 HAJ 13.530 -7.730 49.100 H 1 DRG 0.000 + 4 CAD 14.220 -6.680 47.490 C.3 1 DRG 0.000 + 5 CAC 12.210 -7.970 47.580 C.3 1 DRG 0.000 +@BOND + 1 2 1 1 + 2 2 3 1 + 3 2 4 1 + 4 2 5 1 +@SUBSTRUCTURE + 1 DRG 1 diff --git a/pdb2pqr/tests/ligand_test.py b/pdb2pqr/tests/ligand_test.py index 768f60b60..56dc23583 100644 --- a/pdb2pqr/tests/ligand_test.py +++ b/pdb2pqr/tests/ligand_test.py @@ -88,31 +88,110 @@ RING_RESULTS = { ('CAF', 'CAG', 'CAH', 'CAM', 'CAL', 'CAK')} } + +@pytest.mark.parametrize("input_mol2", ["phenalene.mol2"]) +def test_bad_rings(input_mol2): + """Test assignment of torsion angles.""" + ligand = parameterize.ParameterizedMolecule() + mol2_path = Path("tests/data") / input_mol2 + with open(mol2_path, "rt") as mol2_file: + ligand.read(mol2_file) + benchmark = RING_RESULTS[input_mol2] + with pytest.raises(ValueError) as err: + diff = ligand.rings ^ benchmark + if len(diff) > 0: + err = "Ring test failed for %s: %s" % ( + input_mol2, sorted(list(diff))) + raise ValueError(err) + err = "Known bond detection failure for %s: %s" % (input_mol2, err) + _LOGGER.error(err) + + @pytest.mark.parametrize("input_mol2", [ "cyclohexane.mol2", "ethanol.mol2", "glycerol.mol2", "anthracene.mol2", - "naphthalene.mol2", "phenalene.mol2"]) + "naphthalene.mol2"]) def test_rings(input_mol2): """Test assignment of torsion angles.""" ligand = parameterize.ParameterizedMolecule() mol2_path = Path("tests/data") / input_mol2 with open(mol2_path, "rt") as mol2_file: ligand.read(mol2_file) - try: - benchmark = RING_RESULTS[input_mol2] - diff = ligand.rings ^ benchmark - if len(diff) > 0: - err = "Ring test failed for %s: %s" % ( - input_mol2, sorted(list(diff))) - raise ValueError(err) - for atom_name in ligand.atoms: - atom = ligand.atoms[atom_name] - if atom.num_rings > 0: - str_ = "%d rings: %s" % (atom.num_rings, atom) - _LOGGER.debug(str_) - except KeyError: - _LOGGER.warning( - "Skipping ring test for %s: %s", input_mol2, - sorted(list(ligand.rings))) + benchmark = RING_RESULTS[input_mol2] + diff = ligand.rings ^ benchmark + if len(diff) > 0: + err = "Ring test failed for %s: %s" % ( + input_mol2, sorted(list(diff))) + raise ValueError(err) + for atom_name in ligand.atoms: + atom = ligand.atoms[atom_name] + if atom.num_rings > 0: + str_ = "%d rings: %s" % (atom.num_rings, atom) + _LOGGER.debug(str_) + + +BOND_RESULTS = { + "cyclohexane.mol2": 6 * ["single"], + "ethanol.mol2": [ + "single", "single", None, "single", "single"], + "glycerol.mol2": [ + None, "single", "single", "single", "single", None, "single", None], + "acetylcholine.mol2": [ + "single", "double", "single", "single", "single", "single", "single", + "single", "single"], + "cyanide.mol2": [ + "triple", "single"], + "pyrrole.mol2": [ + "aromatic", "aromatic", "aromatic", "aromatic", "aromatic", None], + "fatty-acid.mol2": [ + "double", "double", "single", "single", "single", "single", "double", + "single", "single", "single", "single"], + "trimethylamine.mol2": ["single", None, "single", "single"], + "naphthalene.mol2": 11 * ["aromatic"] +} + + +@pytest.mark.parametrize("input_mol2", ["fatty-acid.mol2", "pyrrole.mol2"]) +def test_bad_bonds(input_mol2): + """Test known failure of detected bond types.""" + ligand = parameterize.ParameterizedMolecule() + mol2_path = Path("tests/data") / input_mol2 + with open(mol2_path, "rt") as mol2_file: + ligand.read(mol2_file) + results = BOND_RESULTS[input_mol2] + for ibond, bond in enumerate(ligand.bonds): + try: + if bond.bond_order != results[ibond]: + err = "Incorrect order for %s. Got %s, expected %s" % ( + str(bond), bond.bond_order, results[ibond]) + err = "Known bond detection failure for %s: %s" % ( + input_mol2, err) + _LOGGER.error(err) + except IndexError: + err = "Add test for %s -- %s (%s)" % ( + input_mol2, str(bond), bond.bond_order) + raise IndexError(err) + + +@pytest.mark.parametrize("input_mol2", [ + "cyclohexane.mol2", "ethanol.mol2", "glycerol.mol2", "acetylcholine.mol2", + "cyanide.mol2", "trimethylamine.mol2", "naphthalene.mol2"]) +def test_bonds(input_mol2): + """Test detection of bond types.""" + ligand = parameterize.ParameterizedMolecule() + mol2_path = Path("tests/data") / input_mol2 + with open(mol2_path, "rt") as mol2_file: + ligand.read(mol2_file) + results = BOND_RESULTS[input_mol2] + for ibond, bond in enumerate(ligand.bonds): + try: + if bond.bond_order != results[ibond]: + err = "Incorrect order for %s. Got %s, expected %s" % ( + str(bond), bond.bond_order, results[ibond]) + raise ValueError(err) + except IndexError: + err = "Add test for %s -- %s (%s)" % ( + input_mol2, str(bond), bond.bond_order) + raise IndexError(err) @pytest.mark.parametrize("input_pdb", ["1HPX", "1QBS", "1US0"], ids=str) From 6e8133af73851e4feabdd8ff11bafd9ce4d44d20 Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Sat, 27 Jun 2020 11:29:54 -0700 Subject: [PATCH 20/31] Clean up README.md Most of these changes are to improve rendering on http://www.poissonboltzmann.org/ and include: * Clean up spacing between badges and standardize -- @intendo do we still need all of the testing badges? * Replace GitHub-specific symbols with unicode emoji * Remove "Tests pass" column from datasheet table since it is covered by badges * Add explanation to datasheet table --- README.md | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index b3d4601ae..db993174b 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,8 @@ - - -[![Build Status](https://travis-ci.org/Electrostatics/apbs-pdb2pqr.svg?branch=master)](https://travis-ci.org/Electrostatics/apbs-pdb2pqr) - -![Build](https://github.com/Electrostatics/apbs-pdb2pqr/workflows/Build/badge.svg) - -![Build](https://ci.appveyor.com/api/projects/status/github/Electrostatics/apbs-pdb2pqr?branch=master&svg=true) +[![Homepage](https://img.shields.io/badge/homepage-poissonboltzmann-blue.svg)](http://www.poissonboltzmann.org) +[![Docs Build](https://readthedocs.org/projects/apbs-pdb2pqr/badge/)](https://apbs-pdb2pqr.readthedocs.io/) +[![Travis Build](https://travis-ci.org/Electrostatics/apbs-pdb2pqr.svg?branch=master)](https://travis-ci.org/Electrostatics/apbs-pdb2pqr) +![GitHub Build](https://github.com/Electrostatics/apbs-pdb2pqr/workflows/Build/badge.svg) +![Appveyor Build](https://ci.appveyor.com/api/projects/status/github/Electrostatics/apbs-pdb2pqr?branch=master&svg=true) # APBS and PDB2PQR: electrostatic and solvation properties for complex molecules @@ -53,9 +51,10 @@ Additional support and contributors are listed in the [online documentation](htt ### APBS Datasheet +This shows the status of APBS functionality on different platforms. -OS | PYTHON VERSION | GEOFLOW | BEM,MSMS | FETK | PBSAM | PBAM | PYTHON | SHARED_LIBS | TESTS PASS -------------- | ------------ | ------------- | ------------- | ------------- | ------------- | ------------- | ------------- | ------------- | ------------- -Ubuntu latest | 3.6, 3.7 | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: -MacOSX latest | 3.6, 3.7 | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: -Windows 10 | 3.7 | :white_check_mark: | :white_check_mark: | :x: | :x: | :white_check_mark: | :white_check_mark: | :x: | :white_check_mark: +OS | PYTHON VERSION | GEOFLOW | BEM, MSMS | FETK | PBSAM | PBAM | PYTHON | SHARED_LIBS | +------------- | -------------- | -------- | --------- | ----- | ----- | ---- | ------ | ----------- | +Ubuntu latest | 3.6, 3.7 | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | +MacOSX latest | 3.6, 3.7 | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | +Windows 10 | 3.7 | ✔️ | ✔️ | ❌ | ❌ | ✔️ | ✔️ | ❌ | From d7c5569ac6bc6c397199790b21e47ce7aa639862 Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Sat, 27 Jun 2020 18:42:18 -0700 Subject: [PATCH 21/31] Add formal charge calculation. Add charge calculation and tests. Breaks other tests. --- pdb2pqr/pdb2pqr/ligand/__init__.py | 33 +++- pdb2pqr/pdb2pqr/ligand/mol2.py | 105 ++++++++++-- pdb2pqr/pdb2pqr/ligand/parameterize.py | 2 - .../data/{acetic-acid.mol2 => acetate.mol2} | 16 +- .../data/{cyanide.mol2 => acetonitrile.mol2} | 14 +- pdb2pqr/tests/data/acetylcholine.mol2 | 72 +++++--- pdb2pqr/tests/data/anthracene.mol2 | 82 +++++---- pdb2pqr/tests/data/crown-ether.mol2 | 110 ++++++++++++ pdb2pqr/tests/data/cyclohexane.mol2 | 46 +++-- pdb2pqr/tests/data/ethanol.mol2 | 26 ++- pdb2pqr/tests/data/fatty-acid.mol2 | 72 ++++++-- pdb2pqr/tests/data/glycerol.mol2 | 42 +++-- pdb2pqr/tests/data/naphthalene.mol2 | 60 ++++--- pdb2pqr/tests/data/phenalene.mol2 | 54 ------ pdb2pqr/tests/data/pyrrole.mol2 | 34 ++-- pdb2pqr/tests/data/trimethylamine.mol2 | 44 +++-- pdb2pqr/tests/ligand_results.py | 80 +++++++++ pdb2pqr/tests/ligand_test.py | 162 +++++------------- 18 files changed, 712 insertions(+), 342 deletions(-) rename pdb2pqr/tests/data/{acetic-acid.mol2 => acetate.mol2} (57%) rename pdb2pqr/tests/data/{cyanide.mol2 => acetonitrile.mol2} (59%) create mode 100644 pdb2pqr/tests/data/crown-ether.mol2 delete mode 100644 pdb2pqr/tests/data/phenalene.mol2 create mode 100644 pdb2pqr/tests/ligand_results.py diff --git a/pdb2pqr/pdb2pqr/ligand/__init__.py b/pdb2pqr/pdb2pqr/ligand/__init__.py index 63ae93e7c..2d787f83f 100644 --- a/pdb2pqr/pdb2pqr/ligand/__init__.py +++ b/pdb2pqr/pdb2pqr/ligand/__init__.py @@ -20,8 +20,8 @@ assert sys.version_info >= (3, 5) # # The van der Waals radius is used for chlorine. PARSE_RADII = { - "C": 1.70, "N": 1.50, "O": 1.40, "S": 1.85, "H": 1.00, "BR":2.50, - "F": 1.20, "P": 1.90, "CL": 1.75} + "C": 1.70, "N": 1.50, "O": 1.40, "S": 1.85, "H": 1.00, "Br":2.50, + "F": 1.20, "P": 1.90, "Cl": 1.75} # TODO - this belongs in a configuration file somewhere other than here. @@ -42,3 +42,32 @@ _BOND_LENGTH_DICTS = [ {"atom1": 'N', "atom2": 'N', "length": 1.35, "type": "aromatic"} ] BOND_LENGTHS = pandas.DataFrame(_BOND_LENGTH_DICTS) + +# Numbers of valence electrons for the groups of the periodic table +VALENCE_BY_GROUP = {1: 1, 2: 2, 13: 3, 14: 4, 15: 5, 16: 6, 17: 7, 18: 8} +# Groups of the periodic table +ELEMENT_BY_GROUP = { + 1: ["H", "Li", "Na", "K", "Rb", "Cs", "Fr"], + 2: ["Be", "Mg", "Ca", "Sr", "Ba", "Ra"], + 13: ["B", "Al", "Ga", "In", "Tl", "Nh"], + 14: ["C", "Si", "Ge", "Sn", "Pb", "Fl"], + 15: ["N", "P", "As", "Sb", "Bi", "Mc"], + 16: ["O", "S", "Se", "Te", "Po", "Lv"], + 17: ["F", "Cl", "Br", "I", "At", "Ts"], + 18: ["He", "Ne", "Ar", "Kr", "Xe", "Rn", "Og"] +} +# Valence electrons by element +VALENCE_BY_ELEMENT = {} +for group, elem_list in ELEMENT_BY_GROUP.items(): + for elem in elem_list: + VALENCE_BY_ELEMENT[elem] = VALENCE_BY_GROUP[group] + +# Numbers of non-bonded electrons for Sybyl-type atoms. Adapted from +# https://onlinelibrary.wiley.com/doi/abs/10.1002/jcc.540100804 (Table I). +NONBONDED_BY_TYPE = { + "Al": 0, "Br": 6, "C.1": 0, "C.2": 0, "C.3": 0, "C.ar": 0, "Ca": 0, + "Cl": 6, "F": 6, "H": 0, "I": 6, "K": 0, "Li": 0, "N.1": 2, "N.2": 2, + "N.3": 2, "N.4": 0, "N.am": 0, "N.ar": 2, "N.pl3": 0, "Na": 0, + "O.2": 4, "O.3": 4, "P.3": 0, "S.2": 4, "S.3": 4, "S.o": 2, "S.o2": 0, + "Si": 0, "O.co2": 4.5 +} diff --git a/pdb2pqr/pdb2pqr/ligand/mol2.py b/pdb2pqr/pdb2pqr/ligand/mol2.py index 8f4752105..9559f8ad4 100644 --- a/pdb2pqr/pdb2pqr/ligand/mol2.py +++ b/pdb2pqr/pdb2pqr/ligand/mol2.py @@ -8,7 +8,7 @@ from collections import OrderedDict from itertools import combinations from numpy import array from numpy.linalg import norm -from . import BOND_LENGTHS +from . import BOND_LENGTHS, VALENCE_BY_ELEMENT, NONBONDED_BY_TYPE _LOGGER = logging.getLogger(__name__) @@ -54,14 +54,16 @@ class Mol2Bond: return fmt.format(b=self) @property - def bond_order(self): + def guess_bond_order(self): """Attempt to determine the order of this bond. Return: string with order of bond or None """ - types = sorted( - [self.atoms[0].atom_type[0], self.atoms[1].atom_type[0]]) + _LOGGER.warning("Ignoring bond type: %s", self.type) + type1 = self.atoms[0].type.split(".")[0] + type2 = self.atoms[1].type.split(".")[0] + types = sorted(type1, type2) bond_lengths = BOND_LENGTHS.loc[ (BOND_LENGTHS["atom1"] == types[0]) & (BOND_LENGTHS["atom2"] == types[1])] @@ -86,7 +88,7 @@ class Mol2Atom: self.x = None self.y = None self.z = None - self.atom_type = None + self.type = None self.radius = None self.is_c_term = False self.is_n_term = False @@ -95,7 +97,6 @@ class Mol2Atom: self.temp_factor = 0.00 self.seg_id = None self.charge = None - self.formal_charge = None self.num_rings = 0 self.radius = None self.bonded_atoms = [] @@ -140,17 +141,93 @@ class Mol2Atom: @property def num_bonded_heavy(self): """Return the number of heavy atoms bonded to this atom.""" - return len([a for a in self.bonded_atoms if a.atom_type != "H"]) + return len([a for a in self.bonded_atoms if a.type != "H"]) @property def num_bonded_hydrogen(self): """Return the number of hydrogen atoms bonded to this atom.""" - return len([a for a in self.bonded_atoms if a.atom_type == "H"]) + return len([a for a in self.bonded_atoms if a.type == "H"]) @property def element(self): """Return a string with the element for this atom (uppercase).""" - return self.atom_type.split(".")[0].upper() + return self.type.split(".")[0].upper() + + @property + def bond_order(self): + """Return the total number of electrons in bonds with other atoms.""" + order = 0 + num_aromatic = 0 + for bond in self.bonds: + if bond.type == "single": + order += 1 + elif bond.type == "double": + order += 2 + elif bond.type == "triple": + order += 3 + elif bond.type == "aromatic": + num_aromatic += 1 + else: + err = "Unknown bond type: %s" % bond.type + raise ValueError(err) + if num_aromatic > 0: + order = order + num_aromatic + 1 + return order + + @property + def formal_charge(self): + """Return an integer with the formal charge for this atom.""" + element = self.type.split(".")[0] + valence = VALENCE_BY_ELEMENT[element] + nonbonded = NONBONDED_BY_TYPE[self.type] + bond_order = self.bond_order + formal_charge = valence - nonbonded - bond_order + if (self.type in ["N.pl3", "N.am"]) and (bond_order == 3) and ( + formal_charge != 0): + # Planar nitrogen bond orders are not always correct in MOL2 + _LOGGER.warning("Correcting planar/amide bond order.") + formal_charge = 0 + elif (self.type in ["N.ar"]) and (bond_order == 4) and ( + formal_charge != 0): + # Aromatic nitrogen bond orders are not always correct in MOL2 + _LOGGER.warning("Correcting aromatic nitrogen bond order.") + formal_charge = 0 + elif (self.type in ["C.ar"]) and (bond_order == 5) and ( + formal_charge != 0): + # Aromatic carbon bond orders are not always correct in MOL2 + _LOGGER.warning("Correcting aromatic carbon bond order.") + formal_charge = 0 + elif (self.type in ["O.co2"]) and (bond_order == 1) and ( + formal_charge != -0.5): + # CO2 bond orders are hardly ever set correctly in MOL2 + formal_charge = -0.5 + elif (self.type in ["C.2"]) and (bond_order == 5) and ( + formal_charge == -1): + # CO2 bond orders are hardly ever set correctly in MOL2 + formal_charge = 0 + elif (self.type in ["N.3"]) and (bond_order == 4) and ( + formal_charge == -1): + # Tetravalent nitrogen atom types are sometimes wrong in MOL2 + _LOGGER.warning("Correcting ammonium atom type.") + formal_charge = 1 + elif (self.type in ["O.3"]) and (bond_order == 1) and ( + formal_charge == 1): + # Phosphate groups are sometimes confused in MOL2 + # Assign negative charge to first O.3 with bond order 1 + # attached to phosphorous + elements = [a.type[0] for a in self.bonds[0].atoms] + p_atom = self.bonds[0].atoms[elements.index("P")] + _LOGGER.warning("Correcting phosphate bond order.") + o_atoms = [] + for bond in p_atom.bonds: + for atom in bond.atoms: + if atom.type[0] == "O" and atom.bond_order == 1: + o_atoms.append(atom.name) + if o_atoms.index(self.name) == 0: + formal_charge = -1 + else: + formal_charge = 0 + return formal_charge class Mol2Molecule: @@ -307,7 +384,15 @@ class Mol2Molecule: raise ValueError(err) atom = Mol2Atom() atom.name = words[1] - atom.atom_type = words[5] + atom_type = words[5] + type_parts = atom_type.split(".") + type_parts[0] = type_parts[0].capitalize() + if len(type_parts) == 2: + type_parts[1] = type_parts[1].lower() + elif len(type_parts) > 2: + err = "Invalid atom type: %s" % atom_type + raise ValueError(err) + atom.type = ".".join(type_parts) atom.chain_id = "L" try: atom.serial = int(words[0]) diff --git a/pdb2pqr/pdb2pqr/ligand/parameterize.py b/pdb2pqr/pdb2pqr/ligand/parameterize.py index 28f4abea9..a3b9d404a 100644 --- a/pdb2pqr/pdb2pqr/ligand/parameterize.py +++ b/pdb2pqr/pdb2pqr/ligand/parameterize.py @@ -26,8 +26,6 @@ class ParameterizedMolecule(Mol2Molecule): prev_atom_names = set(self.ligand_properties) curr_atom_names = set(self.atoms) if len(prev_atom_names ^ curr_atom_names) > 0: - for atom in ligand.atoms.values(): - atom.formal_charge = 0.0 self.reparameterize(ligand) def reparameterize(self, ligand): diff --git a/pdb2pqr/tests/data/acetic-acid.mol2 b/pdb2pqr/tests/data/acetate.mol2 similarity index 57% rename from pdb2pqr/tests/data/acetic-acid.mol2 rename to pdb2pqr/tests/data/acetate.mol2 index 642e59752..7aecce7c8 100644 --- a/pdb2pqr/tests/data/acetic-acid.mol2 +++ b/pdb2pqr/tests/data/acetate.mol2 @@ -15,19 +15,25 @@ # @MOLECULE DRG - 4 3 1 + 7 6 1 SMALL USER_CHARGES PRODRG MOLECULE @ATOM - 1 OAC -1.420 -1.790 -17.890 O.co2 1 DRG -0.360 - 2 CAB -1.510 -2.970 -18.290 C.2 1 DRG 0.720 - 3 OAD -1.200 -3.990 -17.640 O.co2 1 DRG -0.360 - 4 CAA -2.050 -3.180 -19.710 C.3 1 DRG 0.000 + 1 OAC -1.960 -3.920 10.330 O.co2 1 DRG -0.360 + 2 CAB -2.270 -3.050 11.170 C.2 1 DRG 0.720 + 3 OAD -2.300 -3.210 12.410 O.co2 1 DRG -0.360 + 4 CAA -2.640 -1.670 10.630 C.3 1 DRG 0.000 + 5 HAB -2.571 -1.675 9.542 H 1 DRG 0.000 + 6 HAC -1.953 -0.926 11.034 H 1 DRG 0.000 + 7 HAA -3.662 -1.496 10.966 H 1 DRG 0.000 @BOND 1 2 1 2 2 2 3 2 3 2 4 1 + 4 4 5 1 + 5 4 6 1 + 6 4 7 1 @SUBSTRUCTURE 1 DRG 1 diff --git a/pdb2pqr/tests/data/cyanide.mol2 b/pdb2pqr/tests/data/acetonitrile.mol2 similarity index 59% rename from pdb2pqr/tests/data/cyanide.mol2 rename to pdb2pqr/tests/data/acetonitrile.mol2 index b84280d0b..e4eef863b 100644 --- a/pdb2pqr/tests/data/cyanide.mol2 +++ b/pdb2pqr/tests/data/acetonitrile.mol2 @@ -15,17 +15,23 @@ # @MOLECULE DRG - 3 2 1 + 6 5 1 SMALL USER_CHARGES PRODRG MOLECULE @ATOM - 1 NAC 14.390 -15.160 8.360 N.1 1 DRG 0.000 - 2 CAB 15.480 -15.470 8.460 C.1 1 DRG 0.000 - 3 CAA 16.890 -15.870 8.580 C.3 1 DRG 0.000 + 1 NAC 9.650 -0.320 4.310 N.1 1 DRG 0.000 + 2 CAB 10.600 0.090 4.770 C.1 1 DRG 0.000 + 3 CAA 11.840 0.630 5.370 C.3 1 DRG 0.000 + 4 HAB 12.444 -0.191 5.757 H 1 DRG 0.000 + 5 HAC 11.584 1.308 6.184 H 1 DRG 0.000 + 6 HAA 12.352 1.154 4.563 H 1 DRG 0.000 @BOND 1 2 1 3 2 2 3 1 + 3 3 4 1 + 4 3 5 1 + 5 3 6 1 @SUBSTRUCTURE 1 DRG 1 diff --git a/pdb2pqr/tests/data/acetylcholine.mol2 b/pdb2pqr/tests/data/acetylcholine.mol2 index c2ff8b20a..1b2f139b8 100644 --- a/pdb2pqr/tests/data/acetylcholine.mol2 +++ b/pdb2pqr/tests/data/acetylcholine.mol2 @@ -15,31 +15,63 @@ # @MOLECULE DRG - 10 9 1 + 26 25 1 SMALL USER_CHARGES PRODRG MOLECULE @ATOM - 1 CAI 19.020 -0.860 3.850 C.3 1 DRG 0.000 - 2 CAH 17.580 -0.690 3.360 C.2 1 DRG 0.425 - 3 OAB 16.680 -1.340 3.890 O.2 1 DRG -0.335 - 4 OAG 17.450 0.190 2.330 O.3 1 DRG -0.315 - 5 CAF 16.110 0.400 1.870 C.3 1 DRG 0.225 - 6 CAE 16.130 1.350 0.650 C.3 1 DRG 0.000 - 7 NAD 14.840 1.710 0.010 N.3 1 DRG 0.000 - 8 CAJ 15.160 2.580 -1.130 C.3 1 DRG 0.000 - 9 CAA 13.950 2.480 0.910 C.3 1 DRG 0.000 - 10 CAC 14.110 0.550 -0.530 C.3 1 DRG 0.000 + 1 CAI -3.690 -3.410 -6.100 C.3 1 DRG 0.000 + 2 HAK -4.732 -3.177 -5.883 H 1 DRG 0.000 + 3 HAL -3.540 -3.429 -7.179 H 1 DRG 0.000 + 4 HAM -3.012 -2.708 -5.613 H 1 DRG 0.000 + 5 CAH -3.340 -4.780 -5.510 C.2 1 DRG 0.425 + 6 OAB -2.950 -5.690 -6.240 O.2 1 DRG -0.335 + 7 OAG -3.500 -4.850 -4.150 O.3 1 DRG -0.315 + 8 CAF -3.230 -6.130 -3.560 C.3 1 DRG 0.225 + 9 HAI -2.175 -6.368 -3.699 H 1 DRG 0.000 + 10 HAJ -3.903 -6.854 -4.021 H 1 DRG 0.000 + 11 CAE -3.550 -6.140 -2.050 C.3 1 DRG 0.000 + 12 HAG -3.023 -5.290 -1.615 H 1 DRG 0.000 + 13 HAH -3.162 -7.090 -1.681 H 1 DRG 0.000 + 14 NAD -4.950 -6.060 -1.570 N.3 1 DRG 0.000 + 15 CAJ -5.850 -7.060 -2.180 C.3 1 DRG 0.000 + 16 HAO -5.897 -6.898 -3.257 H 1 DRG 0.000 + 17 HAP -6.848 -6.958 -1.753 H 1 DRG 0.000 + 18 HAN -5.414 -8.030 -1.943 H 1 DRG 0.000 + 19 CAA -5.530 -4.720 -1.750 C.3 1 DRG 0.000 + 20 HAB -5.573 -4.482 -2.813 H 1 DRG 0.000 + 21 HAC -4.910 -3.984 -1.238 H 1 DRG 0.000 + 22 HAA -6.528 -4.771 -1.314 H 1 DRG 0.000 + 23 CAC -4.920 -6.320 -0.120 C.3 1 DRG 0.000 + 24 HAE -4.496 -7.307 0.064 H 1 DRG 0.000 + 25 HAF -5.934 -6.281 0.277 H 1 DRG 0.000 + 26 HAD -4.296 -5.534 0.306 H 1 DRG 0.000 @BOND - 1 2 1 1 - 2 2 3 2 - 3 2 4 1 - 4 5 4 1 - 5 5 6 1 - 6 7 6 1 - 7 7 8 1 - 8 7 9 1 - 9 7 10 1 + 1 1 2 1 + 2 1 3 1 + 3 1 4 1 + 4 5 1 1 + 5 5 6 2 + 6 5 7 1 + 7 8 7 1 + 8 8 9 1 + 9 8 10 1 + 10 8 11 1 + 11 11 12 1 + 12 11 13 1 + 13 14 11 1 + 14 14 15 1 + 15 14 19 1 + 16 14 23 1 + 17 15 16 1 + 18 15 17 1 + 19 15 18 1 + 20 19 20 1 + 21 19 21 1 + 22 19 22 1 + 23 23 24 1 + 24 23 25 1 + 25 23 26 1 @SUBSTRUCTURE 1 DRG 1 diff --git a/pdb2pqr/tests/data/anthracene.mol2 b/pdb2pqr/tests/data/anthracene.mol2 index adff53b48..d64e3141c 100644 --- a/pdb2pqr/tests/data/anthracene.mol2 +++ b/pdb2pqr/tests/data/anthracene.mol2 @@ -15,42 +15,62 @@ # @MOLECULE DRG - 14 16 1 + 24 26 1 SMALL USER_CHARGES PRODRG MOLECULE @ATOM - 1 CAB -1.930 6.450 -2.470 C.ar 1 DRG 0.000 - 2 CAC -2.360 7.390 -1.500 C.ar 1 DRG 0.000 - 3 CAD -2.850 8.640 -1.920 C.ar 1 DRG 0.000 - 4 CAE -3.300 9.550 -0.940 C.ar 1 DRG 0.000 - 5 CAF -3.830 10.790 -1.350 C.ar 1 DRG 0.000 - 6 CAG -4.300 11.700 -0.380 C.ar 1 DRG 0.000 - 7 CAN -4.240 11.370 0.990 C.ar 1 DRG 0.000 - 8 CAM -3.710 10.130 1.410 C.ar 1 DRG 0.000 - 9 CAL -3.240 9.220 0.440 C.ar 1 DRG 0.000 - 10 CAK -2.740 7.970 0.860 C.ar 1 DRG 0.000 - 11 CAJ -2.300 7.060 -0.110 C.ar 1 DRG 0.000 - 12 CAI -1.820 5.790 0.300 C.ar 1 DRG 0.000 - 13 CAH -1.400 4.860 -0.680 C.ar 1 DRG 0.000 - 14 CAA -1.460 5.190 -2.050 C.ar 1 DRG 0.000 + 1 CAB 3.230 -1.790 7.950 C.ar 1 DRG 0.000 + 2 HAB 4.070 -2.130 8.550 H 1 DRG 0.000 + 3 CAC 2.480 -2.700 7.170 C.ar 1 DRG 0.000 + 4 CAD 2.840 -4.060 7.160 C.ar 1 DRG 0.000 + 5 HAD 3.680 -4.420 7.770 H 1 DRG 0.000 + 6 CAE 2.100 -4.960 6.370 C.ar 1 DRG 0.000 + 7 CAF 2.470 -6.320 6.350 C.ar 1 DRG 0.000 + 8 HAF 3.300 -6.690 6.950 H 1 DRG 0.000 + 9 CAG 1.740 -7.220 5.540 C.ar 1 DRG 0.000 + 10 HAG 2.010 -8.270 5.520 H 1 DRG 0.000 + 11 CAN 0.650 -6.760 4.760 C.ar 1 DRG 0.000 + 12 HAN 0.100 -7.460 4.140 H 1 DRG 0.000 + 13 CAM 0.280 -5.400 4.780 C.ar 1 DRG 0.000 + 14 HAM -0.560 -5.070 4.170 H 1 DRG 0.000 + 15 CAL 1.010 -4.500 5.580 C.ar 1 DRG 0.000 + 16 CAK 0.640 -3.140 5.590 C.ar 1 DRG 0.000 + 17 HAK -0.200 -2.780 4.990 H 1 DRG 0.000 + 18 CAJ 1.380 -2.240 6.380 C.ar 1 DRG 0.000 + 19 CAI 1.030 -0.870 6.390 C.ar 1 DRG 0.000 + 20 HAI 0.190 -0.500 5.800 H 1 DRG 0.000 + 21 CAH 1.780 0.030 7.170 C.ar 1 DRG 0.000 + 22 HAH 1.520 1.080 7.180 H 1 DRG 0.000 + 23 CAA 2.870 -0.430 7.950 C.ar 1 DRG 0.000 + 24 HAA 3.440 0.280 8.540 H 1 DRG 0.000 @BOND - 1 2 1 ar - 2 1 14 ar - 3 2 3 ar - 4 2 11 ar - 5 4 3 ar - 6 4 5 ar - 7 4 9 ar - 8 5 6 ar - 9 6 7 ar - 10 7 8 ar - 11 9 8 ar - 12 9 10 ar - 13 11 10 ar - 14 11 12 ar - 15 12 13 ar - 16 13 14 ar + 1 1 2 1 + 2 3 1 ar + 3 1 23 ar + 4 3 4 ar + 5 3 18 ar + 6 4 5 1 + 7 6 4 ar + 8 6 7 ar + 9 6 15 ar + 10 7 8 1 + 11 7 9 ar + 12 9 10 1 + 13 9 11 ar + 14 11 12 1 + 15 11 13 ar + 16 13 14 1 + 17 15 13 ar + 18 15 16 ar + 19 16 17 1 + 20 18 16 ar + 21 18 19 ar + 22 19 20 1 + 23 19 21 ar + 24 21 22 1 + 25 21 23 ar + 26 23 24 1 @SUBSTRUCTURE 1 DRG 1 diff --git a/pdb2pqr/tests/data/crown-ether.mol2 b/pdb2pqr/tests/data/crown-ether.mol2 new file mode 100644 index 000000000..7c49da61f --- /dev/null +++ b/pdb2pqr/tests/data/crown-ether.mol2 @@ -0,0 +1,110 @@ +# +# +# This file was generated by PRODRG version AA180301.0717 +# PRODRG written/copyrighted by Daan van Aalten +# and Alexander Schuettelkopf +# +# Questions/comments to dava@davapc1.bioch.dundee.ac.uk +# +# When using this software in a publication, cite: +# A. W. Schuettelkopf and D. M. F. van Aalten (2004). +# PRODRG - a tool for high-throughput crystallography +# of protein-ligand complexes. +# Acta Crystallogr. D60, 1355--1363. +# +# +@MOLECULE +DRG + 42 42 1 +SMALL +USER_CHARGES + +PRODRG MOLECULE +@ATOM + 1 CAB -16.310 -55.310 26.830 C.3 1 DRG 0.180 + 2 HAB -16.231 -56.038 27.637 H 1 DRG 0.000 + 3 HAC -15.470 -54.617 26.783 H 1 DRG 0.000 + 4 OAA -16.390 -56.040 25.590 O.3 1 DRG -0.360 + 5 CAH -15.130 -56.650 25.250 C.3 1 DRG 0.180 + 6 HAJ -14.458 -55.861 24.914 H 1 DRG 0.000 + 7 HAK -14.756 -57.213 26.105 H 1 DRG 0.000 + 8 CAJ -15.330 -57.590 24.070 C.3 1 DRG 0.180 + 9 HAN -15.844 -58.470 24.457 H 1 DRG 0.000 + 10 HAO -15.886 -57.069 23.291 H 1 DRG 0.000 + 11 OAL -14.060 -58.030 23.530 O.3 1 DRG -0.360 + 12 CAM -14.190 -58.950 22.410 C.3 1 DRG 0.180 + 13 HAR -13.185 -59.282 22.150 H 1 DRG 0.000 + 14 HAS -14.845 -59.767 22.712 H 1 DRG 0.000 + 15 CAN -14.770 -58.310 21.140 C.3 1 DRG 0.180 + 16 HAT -14.400 -57.285 21.112 H 1 DRG 0.000 + 17 HAU -14.478 -58.905 20.275 H 1 DRG 0.000 + 18 OAO -16.220 -58.250 21.210 O.3 1 DRG -0.360 + 19 CAP -16.700 -57.050 20.580 C.3 1 DRG 0.180 + 20 HAV -16.101 -56.219 20.952 H 1 DRG 0.000 + 21 HAW -16.653 -57.171 19.498 H 1 DRG 0.000 + 22 CAQ -18.150 -56.760 20.990 C.3 1 DRG 0.180 + 23 HAX -18.427 -55.819 20.515 H 1 DRG 0.000 + 24 HAY -18.782 -57.600 20.702 H 1 DRG 0.000 + 25 OAR -18.230 -56.570 22.420 O.3 1 DRG -0.360 + 26 CAK -19.540 -56.130 22.830 C.3 1 DRG 0.180 + 27 HAP -20.294 -56.792 22.404 H 1 DRG 0.000 + 28 HAQ -19.650 -55.088 22.528 H 1 DRG 0.000 + 29 CAI -19.640 -56.160 24.350 C.3 1 DRG 0.180 + 30 HAL -18.772 -55.654 24.773 H 1 DRG 0.000 + 31 HAM -19.713 -57.209 24.636 H 1 DRG 0.000 + 32 OAG -20.840 -55.500 24.810 O.3 1 DRG -0.360 + 33 CAF -21.000 -55.520 26.250 C.3 1 DRG 0.180 + 34 HAH -20.918 -56.552 26.590 H 1 DRG 0.000 + 35 HAI -21.969 -55.073 26.472 H 1 DRG 0.000 + 36 CAE -19.960 -54.680 27.000 C.3 1 DRG 0.180 + 37 HAF -20.356 -54.416 27.981 H 1 DRG 0.000 + 38 HAG -19.746 -53.817 26.370 H 1 DRG 0.000 + 39 OAD -18.730 -55.410 27.170 O.3 1 DRG -0.360 + 40 CAC -17.600 -54.520 27.080 C.3 1 DRG 0.180 + 41 HAE -17.758 -53.823 26.257 H 1 DRG 0.000 + 42 HAD -17.484 -54.034 28.049 H 1 DRG 0.000 +@BOND + 1 1 2 1 + 2 1 3 1 + 3 1 4 1 + 4 1 40 1 + 5 5 4 1 + 6 5 6 1 + 7 5 7 1 + 8 5 8 1 + 9 8 9 1 + 10 8 10 1 + 11 8 11 1 + 12 12 11 1 + 13 12 13 1 + 14 12 14 1 + 15 12 15 1 + 16 15 16 1 + 17 15 17 1 + 18 15 18 1 + 19 19 18 1 + 20 19 20 1 + 21 19 21 1 + 22 19 22 1 + 23 22 23 1 + 24 22 24 1 + 25 22 25 1 + 26 26 25 1 + 27 26 27 1 + 28 26 28 1 + 29 26 29 1 + 30 29 30 1 + 31 29 31 1 + 32 29 32 1 + 33 33 32 1 + 34 33 34 1 + 35 33 35 1 + 36 33 36 1 + 37 36 37 1 + 38 36 38 1 + 39 36 39 1 + 40 40 39 1 + 41 40 41 1 + 42 40 42 1 +@SUBSTRUCTURE + 1 DRG 1 diff --git a/pdb2pqr/tests/data/cyclohexane.mol2 b/pdb2pqr/tests/data/cyclohexane.mol2 index a5f65190a..89b7c3ee0 100644 --- a/pdb2pqr/tests/data/cyclohexane.mol2 +++ b/pdb2pqr/tests/data/cyclohexane.mol2 @@ -15,24 +15,48 @@ # @MOLECULE DRG - 6 6 1 + 18 18 1 SMALL USER_CHARGES PRODRG MOLECULE @ATOM - 1 CAA 11.290 18.580 21.950 C.3 1 DRG 0.000 - 2 CAB 11.410 19.880 22.760 C.3 1 DRG 0.000 - 3 CAC 11.080 21.080 21.870 C.3 1 DRG 0.000 - 4 CAF 12.020 21.110 20.660 C.3 1 DRG 0.000 - 5 CAE 11.900 19.810 19.860 C.3 1 DRG 0.000 - 6 CAD 12.230 18.610 20.750 C.3 1 DRG 0.000 + 1 CAA -13.920 -8.010 11.920 C.3 1 DRG 0.000 + 2 HAA -13.694 -9.016 11.565 H 1 DRG 0.000 + 3 HAB -13.141 -7.277 11.707 H 1 DRG 0.000 + 4 CAB -14.100 -8.040 13.440 C.3 1 DRG 0.000 + 5 HAC -14.959 -8.678 13.646 H 1 DRG 0.000 + 6 HAD -13.173 -8.374 13.907 H 1 DRG 0.000 + 7 CAC -14.450 -6.640 13.950 C.3 1 DRG 0.000 + 8 HAE -13.631 -5.958 13.723 H 1 DRG 0.000 + 9 HAF -14.671 -6.735 15.013 H 1 DRG 0.000 + 10 CAF -15.730 -6.140 13.270 C.3 1 DRG 0.000 + 11 HAK -16.510 -6.869 13.488 H 1 DRG 0.000 + 12 HAL -15.952 -5.132 13.619 H 1 DRG 0.000 + 13 CAE -15.550 -6.120 11.750 C.3 1 DRG 0.000 + 14 HAI -14.746 -5.430 11.493 H 1 DRG 0.000 + 15 HAJ -16.517 -5.857 11.320 H 1 DRG 0.000 + 16 CAD -15.200 -7.510 11.240 C.3 1 DRG 0.000 + 17 HAH -16.019 -8.194 11.464 H 1 DRG 0.000 + 18 HAG -14.978 -7.414 10.177 H 1 DRG 0.000 @BOND 1 1 2 1 - 2 1 6 1 - 3 2 3 1 - 4 3 4 1 + 2 1 3 1 + 3 1 4 1 + 4 1 16 1 5 4 5 1 - 6 5 6 1 + 6 4 6 1 + 7 4 7 1 + 8 7 8 1 + 9 7 9 1 + 10 7 10 1 + 11 10 11 1 + 12 10 12 1 + 13 10 13 1 + 14 13 14 1 + 15 13 15 1 + 16 13 16 1 + 17 16 17 1 + 18 16 18 1 @SUBSTRUCTURE 1 DRG 1 diff --git a/pdb2pqr/tests/data/ethanol.mol2 b/pdb2pqr/tests/data/ethanol.mol2 index 28b0607f6..1effdfb10 100644 --- a/pdb2pqr/tests/data/ethanol.mol2 +++ b/pdb2pqr/tests/data/ethanol.mol2 @@ -15,19 +15,29 @@ # @MOLECULE DRG - 4 3 1 + 9 8 1 SMALL USER_CHARGES PRODRG MOLECULE @ATOM - 1 CAA 21.780 -4.220 -8.790 C.3 1 DRG 0.000 - 2 CAB 20.740 -4.840 -7.860 C.3 1 DRG 0.150 - 3 OAC 20.850 -6.270 -7.890 O.3 1 DRG -0.548 - 4 HAF 20.170 -6.670 -7.280 H 1 DRG 0.398 + 1 CAA -1.560 -19.770 25.200 C.3 1 DRG 0.000 + 2 HAA -2.402 -19.161 25.529 H 1 DRG 0.000 + 3 HAB -1.355 -19.569 24.148 H 1 DRG 0.000 + 4 HAC -0.679 -19.602 25.819 H 1 DRG 0.000 + 5 CAB -1.900 -21.250 25.380 C.3 1 DRG 0.150 + 6 HAD -1.057 -21.858 25.050 H 1 DRG 0.000 + 7 HAE -2.817 -21.431 24.819 H 1 DRG 0.000 + 8 OAC -2.170 -21.520 26.760 O.3 1 DRG -0.548 + 9 HAF -2.390 -22.490 26.870 H 1 DRG 0.398 @BOND - 1 2 1 1 - 2 2 3 1 - 3 3 4 1 + 1 1 2 1 + 2 1 3 1 + 3 1 4 1 + 4 5 1 1 + 5 5 6 1 + 6 5 7 1 + 7 5 8 1 + 8 8 9 1 @SUBSTRUCTURE 1 DRG 1 diff --git a/pdb2pqr/tests/data/fatty-acid.mol2 b/pdb2pqr/tests/data/fatty-acid.mol2 index db441c039..daacd478b 100644 --- a/pdb2pqr/tests/data/fatty-acid.mol2 +++ b/pdb2pqr/tests/data/fatty-acid.mol2 @@ -15,35 +15,69 @@ # @MOLECULE DRG - 12 11 1 + 29 28 1 SMALL USER_CHARGES PRODRG MOLECULE @ATOM - 1 OAA -3.000 7.130 9.800 O.co2 1 DRG -0.360 - 2 CAC -3.230 6.620 8.680 C.2 1 DRG 0.720 - 3 OAB -2.690 6.950 7.610 O.co2 1 DRG -0.360 - 4 CAD -4.310 5.540 8.610 C.3 1 DRG 0.000 - 5 CAE -3.800 4.140 9.000 C.3 1 DRG 0.000 - 6 CAF -2.790 3.570 8.000 C.3 1 DRG 0.000 - 7 CAG -1.380 4.100 8.300 C.2 1 DRG 0.000 - 8 CAH -0.550 4.480 7.070 C.2 1 DRG 0.000 - 9 CAI 0.880 4.950 7.340 C.3 1 DRG 0.000 - 10 CAJ 0.970 6.470 7.470 C.3 1 DRG 0.000 - 11 CAK 0.460 6.960 8.830 C.3 1 DRG 0.000 - 12 CAL 0.500 8.480 8.880 C.3 1 DRG 0.000 + 1 OAA 0.650 -4.500 8.530 O.co2 1 DRG -0.360 + 2 CAB 0.740 -5.130 7.450 C.2 1 DRG 0.720 + 3 OAL 1.110 -6.320 7.330 O.co2 1 DRG -0.360 + 4 CAC 0.360 -4.390 6.170 C.3 1 DRG 0.000 + 5 HAC 0.512 -3.327 6.360 H 1 DRG 0.000 + 6 HAD 0.947 -4.783 5.340 H 1 DRG 0.000 + 7 CAD -1.130 -4.580 5.870 C.3 1 DRG 0.000 + 8 HAE -1.283 -5.645 5.697 H 1 DRG 0.000 + 9 HAF -1.712 -4.179 6.700 H 1 DRG 0.000 + 10 CAE -1.540 -3.860 4.580 C.3 1 DRG 0.000 + 11 HAG -2.623 -3.951 4.498 H 1 DRG 0.000 + 12 HAH -1.182 -2.831 4.616 H 1 DRG 0.000 + 13 CAF -0.940 -4.560 3.350 C.2 1 DRG 0.000 + 14 HAI -0.744 -5.632 3.323 H 1 DRG 0.000 + 15 CAG -0.640 -3.640 2.170 C.2 1 DRG 0.000 + 16 HAJ -0.871 -2.575 2.182 H 1 DRG 0.000 + 17 CAH 0.010 -4.330 0.970 C.3 1 DRG 0.000 + 18 HAK 0.603 -3.576 0.452 H 1 DRG 0.000 + 19 HAL 0.584 -5.190 1.315 H 1 DRG 0.000 + 20 CAI -1.070 -4.800 -0.010 C.3 1 DRG 0.000 + 21 HAM -1.680 -5.529 0.523 H 1 DRG 0.000 + 22 HAN -1.628 -3.931 -0.359 H 1 DRG 0.000 + 23 CAJ -0.480 -5.530 -1.230 C.3 1 DRG 0.000 + 24 HAO -1.294 -5.654 -1.944 H 1 DRG 0.000 + 25 HAP 0.361 -4.952 -1.614 H 1 DRG 0.000 + 26 CAK 0.010 -6.940 -0.870 C.3 1 DRG 0.000 + 27 HAR -0.823 -7.527 -0.484 H 1 DRG 0.000 + 28 HAS 0.788 -6.871 -0.110 H 1 DRG 0.000 + 29 HAQ 0.395 -7.354 -1.802 H 1 DRG 0.000 @BOND 1 2 1 2 2 2 3 2 3 2 4 1 4 4 5 1 - 5 5 6 1 - 6 6 7 1 - 7 7 8 2 - 8 9 8 1 - 9 9 10 1 + 5 4 6 1 + 6 4 7 1 + 7 7 8 1 + 8 7 9 1 + 9 7 10 1 10 10 11 1 - 11 11 12 1 + 11 10 12 1 + 12 10 13 1 + 13 13 14 1 + 14 13 15 2 + 15 15 16 1 + 16 17 15 1 + 17 17 18 1 + 18 17 19 1 + 19 17 20 1 + 20 20 21 1 + 21 20 22 1 + 22 20 23 1 + 23 23 24 1 + 24 23 25 1 + 25 23 26 1 + 26 26 27 1 + 27 26 28 1 + 28 26 29 1 @SUBSTRUCTURE 1 DRG 1 diff --git a/pdb2pqr/tests/data/glycerol.mol2 b/pdb2pqr/tests/data/glycerol.mol2 index e788cf003..df6aae36d 100644 --- a/pdb2pqr/tests/data/glycerol.mol2 +++ b/pdb2pqr/tests/data/glycerol.mol2 @@ -15,29 +15,39 @@ # @MOLECULE DRG - 9 8 1 + 14 13 1 SMALL USER_CHARGES PRODRG MOLECULE @ATOM - 1 OAD -3.760 9.120 5.400 O.3 1 DRG -0.548 - 2 HAF -4.620 9.360 4.950 H 1 DRG 0.398 - 3 CAA -2.870 8.530 4.430 C.3 1 DRG 0.150 - 4 CAB -1.830 7.570 5.040 C.3 1 DRG 0.150 - 5 OAE -2.410 6.360 5.540 O.3 1 DRG -0.548 - 6 HAG -1.690 5.750 5.860 H 1 DRG 0.398 - 7 CAC -0.990 8.280 6.120 C.3 1 DRG 0.150 - 8 OAF -0.110 9.260 5.560 O.3 1 DRG -0.548 - 9 HAH 0.040 9.980 6.230 H 1 DRG 0.398 + 1 OAB 14.010 4.880 -4.720 O.3 1 DRG -0.548 + 2 HAG 13.390 4.670 -5.480 H 1 DRG 0.398 + 3 CAA 15.220 5.380 -5.300 C.3 1 DRG 0.150 + 4 HAA 15.864 4.543 -5.568 H 1 DRG 0.000 + 5 HAB 15.672 6.070 -4.587 H 1 DRG 0.000 + 6 CAC 14.900 6.200 -6.560 C.3 1 DRG 0.150 + 7 HAC 14.298 7.062 -6.272 H 1 DRG 0.000 + 8 OAD 14.180 5.390 -7.500 O.3 1 DRG -0.548 + 9 HAD 14.870 4.930 -8.070 H 1 DRG 0.398 + 10 CAE 16.200 6.700 -7.200 C.3 1 DRG 0.150 + 11 HAE 15.908 7.395 -7.987 H 1 DRG 0.000 + 12 HAF 16.820 7.156 -6.428 H 1 DRG 0.000 + 13 OAF 16.940 5.630 -7.820 O.3 1 DRG -0.548 + 14 HAH 17.840 5.970 -8.090 H 1 DRG 0.398 @BOND 1 1 2 1 2 3 1 1 - 3 4 3 1 - 4 4 5 1 - 5 4 7 1 - 6 5 6 1 - 7 7 8 1 - 8 8 9 1 + 3 3 4 1 + 4 3 5 1 + 5 6 3 1 + 6 6 7 1 + 7 6 8 1 + 8 6 10 1 + 9 8 9 1 + 10 10 11 1 + 11 10 12 1 + 12 10 13 1 + 13 13 14 1 @SUBSTRUCTURE 1 DRG 1 diff --git a/pdb2pqr/tests/data/naphthalene.mol2 b/pdb2pqr/tests/data/naphthalene.mol2 index 16f8e62af..ed4e5ff3a 100644 --- a/pdb2pqr/tests/data/naphthalene.mol2 +++ b/pdb2pqr/tests/data/naphthalene.mol2 @@ -15,33 +15,49 @@ # @MOLECULE DRG - 10 11 1 + 18 19 1 SMALL USER_CHARGES PRODRG MOLECULE @ATOM - 1 CAB -3.790 -3.860 0.280 C.ar 1 DRG 0.000 - 2 CAC -4.440 -4.930 -0.380 C.ar 1 DRG 0.000 - 3 CAD -5.830 -4.850 -0.630 C.ar 1 DRG 0.000 - 4 CAE -6.470 -5.920 -1.280 C.ar 1 DRG 0.000 - 5 CAJ -5.740 -7.060 -1.680 C.ar 1 DRG 0.000 - 6 CAI -4.360 -7.150 -1.440 C.ar 1 DRG 0.000 - 7 CAH -3.700 -6.080 -0.790 C.ar 1 DRG 0.000 - 8 CAG -2.320 -6.160 -0.530 C.ar 1 DRG 0.000 - 9 CAF -1.680 -5.100 0.130 C.ar 1 DRG 0.000 - 10 CAA -2.400 -3.960 0.530 C.ar 1 DRG 0.000 + 1 CAB -6.460 -5.320 -4.190 C.ar 1 DRG 0.000 + 2 HAB -5.680 -5.810 -4.760 H 1 DRG 0.000 + 3 CAC -6.230 -4.080 -3.560 C.ar 1 DRG 0.000 + 4 CAD -4.970 -3.460 -3.660 C.ar 1 DRG 0.000 + 5 HAD -4.160 -3.930 -4.220 H 1 DRG 0.000 + 6 CAE -4.750 -2.220 -3.030 C.ar 1 DRG 0.000 + 7 HAE -3.780 -1.740 -3.100 H 1 DRG 0.000 + 8 CAJ -5.790 -1.590 -2.300 C.ar 1 DRG 0.000 + 9 HAJ -5.610 -0.630 -1.820 H 1 DRG 0.000 + 10 CAI -7.050 -2.200 -2.190 C.ar 1 DRG 0.000 + 11 HAI -7.840 -1.700 -1.620 H 1 DRG 0.000 + 12 CAH -7.280 -3.440 -2.810 C.ar 1 DRG 0.000 + 13 CAG -8.550 -4.060 -2.710 C.ar 1 DRG 0.000 + 14 HAG -9.350 -3.600 -2.140 H 1 DRG 0.000 + 15 CAF -8.760 -5.300 -3.340 C.ar 1 DRG 0.000 + 16 HAF -9.740 -5.780 -3.270 H 1 DRG 0.000 + 17 CAA -7.730 -5.920 -4.080 C.ar 1 DRG 0.000 + 18 HAA -7.920 -6.880 -4.560 H 1 DRG 0.000 @BOND - 1 2 1 ar - 2 1 10 ar - 3 2 3 ar - 4 2 7 ar - 5 3 4 ar - 6 4 5 ar - 7 5 6 ar - 8 7 6 ar - 9 7 8 ar - 10 8 9 ar - 11 9 10 ar + 1 1 2 1 + 2 3 1 ar + 3 1 17 ar + 4 3 4 ar + 5 3 12 ar + 6 4 5 1 + 7 4 6 ar + 8 6 7 1 + 9 6 8 ar + 10 8 9 1 + 11 8 10 ar + 12 10 11 1 + 13 12 10 ar + 14 12 13 ar + 15 13 14 1 + 16 13 15 ar + 17 15 16 1 + 18 15 17 ar + 19 17 18 1 @SUBSTRUCTURE 1 DRG 1 diff --git a/pdb2pqr/tests/data/phenalene.mol2 b/pdb2pqr/tests/data/phenalene.mol2 deleted file mode 100644 index 5636064a3..000000000 --- a/pdb2pqr/tests/data/phenalene.mol2 +++ /dev/null @@ -1,54 +0,0 @@ -# -# -# This file was generated by PRODRG version AA180301.0717 -# PRODRG written/copyrighted by Daan van Aalten -# and Alexander Schuettelkopf -# -# Questions/comments to dava@davapc1.bioch.dundee.ac.uk -# -# When using this software in a publication, cite: -# A. W. Schuettelkopf and D. M. F. van Aalten (2004). -# PRODRG - a tool for high-throughput crystallography -# of protein-ligand complexes. -# Acta Crystallogr. D60, 1355--1363. -# -# -@MOLECULE -DRG - 13 15 1 -SMALL -USER_CHARGES - -PRODRG MOLECULE -@ATOM - 1 CAA -2.230 5.070 -11.340 C.3 1 DRG 0.000 - 2 CAB -1.910 4.500 -12.590 C.2 1 DRG 0.000 - 3 CAC -0.750 3.730 -12.790 C.2 1 DRG 0.000 - 4 CAG 0.160 3.560 -11.730 C.ar 1 DRG 0.000 - 5 CAF -0.060 4.250 -10.510 C.ar 1 DRG 0.000 - 6 CAE -1.210 5.060 -10.360 C.ar 1 DRG 0.000 - 7 CAD -1.380 5.810 -9.170 C.ar 1 DRG 0.000 - 8 CAI -0.420 5.740 -8.150 C.ar 1 DRG 0.000 - 9 CAJ 0.720 4.930 -8.290 C.ar 1 DRG 0.000 - 10 CAK 0.910 4.180 -9.470 C.ar 1 DRG 0.000 - 11 CAL 2.080 3.410 -9.660 C.ar 1 DRG 0.000 - 12 CAM 2.280 2.720 -10.870 C.ar 1 DRG 0.000 - 13 CAH 1.330 2.800 -11.910 C.ar 1 DRG 0.000 -@BOND - 1 1 2 1 - 2 6 1 1 - 3 2 3 2 - 4 4 3 1 - 5 4 5 ar - 6 4 13 ar - 7 5 6 ar - 8 5 10 ar - 9 6 7 ar - 10 7 8 ar - 11 8 9 ar - 12 10 9 ar - 13 10 11 ar - 14 11 12 ar - 15 12 13 ar -@SUBSTRUCTURE - 1 DRG 1 diff --git a/pdb2pqr/tests/data/pyrrole.mol2 b/pdb2pqr/tests/data/pyrrole.mol2 index 1c375f316..749458804 100644 --- a/pdb2pqr/tests/data/pyrrole.mol2 +++ b/pdb2pqr/tests/data/pyrrole.mol2 @@ -15,24 +15,32 @@ # @MOLECULE DRG - 6 6 1 + 10 10 1 SMALL USER_CHARGES PRODRG MOLECULE @ATOM - 1 CAA -5.490 0.200 -1.460 C.ar 1 DRG 0.000 - 2 CAD -4.900 -0.330 -0.390 C.ar 1 DRG 0.000 - 3 CAE -3.580 -0.250 -0.570 C.ar 1 DRG 0.000 - 4 CAC -3.360 0.320 -1.750 C.ar 1 DRG 0.000 - 5 NAB -4.540 0.600 -2.300 N.ar 1 DRG -0.280 - 6 HAB -4.680 1.030 -3.190 H 1 DRG 0.280 + 1 CAE -12.850 -3.330 -3.760 C.3 1 DRG 0.000 + 2 HAE -13.499 -2.470 -3.597 H 1 DRG 0.000 + 3 HAF -12.168 -3.254 -4.607 H 1 DRG 0.000 + 4 CAD -13.650 -4.620 -3.910 C.2 1 DRG 0.000 + 5 HAD -14.388 -4.834 -4.683 H 1 DRG 0.000 + 6 CAA -13.280 -5.440 -2.920 C.2 1 DRG 0.000 + 7 HAA -13.685 -6.442 -2.777 H 1 DRG 0.000 + 8 NAB -12.360 -4.890 -2.130 N.2 1 DRG 0.000 + 9 CAC -12.030 -3.660 -2.520 C.2 1 DRG 0.000 + 10 HAC -11.303 -3.006 -2.038 H 1 DRG 0.000 @BOND - 1 1 2 ar - 2 1 5 ar - 3 2 3 ar - 4 3 4 ar - 5 4 5 ar - 6 5 6 1 + 1 1 2 1 + 2 1 3 1 + 3 1 4 1 + 4 1 9 1 + 5 4 5 1 + 6 4 6 2 + 7 6 7 1 + 8 6 8 1 + 9 9 8 2 + 10 9 10 1 @SUBSTRUCTURE 1 DRG 1 diff --git a/pdb2pqr/tests/data/trimethylamine.mol2 b/pdb2pqr/tests/data/trimethylamine.mol2 index f519e550c..92b0a1f69 100644 --- a/pdb2pqr/tests/data/trimethylamine.mol2 +++ b/pdb2pqr/tests/data/trimethylamine.mol2 @@ -15,21 +15,45 @@ # @MOLECULE DRG - 5 4 1 + 17 16 1 SMALL USER_CHARGES PRODRG MOLECULE @ATOM - 1 CAA 12.430 -6.030 48.960 C.3 1 DRG 0.000 - 2 NAB 13.150 -7.170 48.370 N.3 1 DRG 0.000 - 3 HAJ 13.530 -7.730 49.100 H 1 DRG 0.000 - 4 CAD 14.220 -6.680 47.490 C.3 1 DRG 0.000 - 5 CAC 12.210 -7.970 47.580 C.3 1 DRG 0.000 + 1 CAA -12.250 -7.600 -0.980 C.3 1 DRG 0.000 + 2 HAA -12.072 -6.550 -1.213 H 1 DRG 0.000 + 3 HAB -12.578 -7.692 0.055 H 1 DRG 0.000 + 4 HAC -12.974 -8.041 -1.666 H 1 DRG 0.000 + 5 NAC -11.010 -8.360 -1.170 N.3 1 DRG 0.000 + 6 CAB -9.960 -7.830 -0.280 C.3 1 DRG 0.000 + 7 HAE -10.285 -7.915 0.757 H 1 DRG 0.000 + 8 HAF -9.777 -6.782 -0.518 H 1 DRG 0.000 + 9 HAD -9.078 -8.441 -0.471 H 1 DRG 0.000 + 10 CAE -11.250 -9.780 -0.850 C.3 1 DRG 0.000 + 11 HAK -11.582 -9.869 0.184 H 1 DRG 0.000 + 12 HAL -10.327 -10.344 -0.984 H 1 DRG 0.000 + 13 HAJ -12.022 -10.111 -1.545 H 1 DRG 0.000 + 14 CAD -10.560 -8.240 -2.570 C.3 1 DRG 0.000 + 15 HAH -10.383 -7.191 -2.807 H 1 DRG 0.000 + 16 HAI -11.328 -8.637 -3.234 H 1 DRG 0.000 + 17 HAG -9.641 -8.822 -2.634 H 1 DRG 0.000 @BOND - 1 2 1 1 - 2 2 3 1 - 3 2 4 1 - 4 2 5 1 + 1 1 2 1 + 2 1 3 1 + 3 1 4 1 + 4 5 1 1 + 5 5 6 1 + 6 5 10 1 + 7 5 14 1 + 8 6 7 1 + 9 6 8 1 + 10 6 9 1 + 11 10 11 1 + 12 10 12 1 + 13 10 13 1 + 14 14 15 1 + 15 14 16 1 + 16 14 17 1 @SUBSTRUCTURE 1 DRG 1 diff --git a/pdb2pqr/tests/ligand_results.py b/pdb2pqr/tests/ligand_results.py new file mode 100644 index 000000000..d20f05c56 --- /dev/null +++ b/pdb2pqr/tests/ligand_results.py @@ -0,0 +1,80 @@ +"""Expected results for ligand tests""" +TORSION_RESULTS = { + "ethanol.mol2": { + ('CAA', 'CAB', 'OAC', 'HAF'), ('HAF', 'OAC', 'CAB', 'CAA')}, + "glycerol.mol2": { + ('CAA', 'CAB', 'CAC', 'OAF'), ('CAA', 'CAB', 'OAE', 'HAG'), + ('CAB', 'CAA', 'OAD', 'HAF'), ('CAB', 'CAC', 'OAF', 'HAH'), + ('CAC', 'CAB', 'CAA', 'OAD'), ('CAC', 'CAB', 'OAE', 'HAG'), + ('HAF', 'OAD', 'CAA', 'CAB'), ('HAG', 'OAE', 'CAB', 'CAA'), + ('HAG', 'OAE', 'CAB', 'CAC'), ('HAH', 'OAF', 'CAC', 'CAB'), + ('OAD', 'CAA', 'CAB', 'CAC'), ('OAD', 'CAA', 'CAB', 'OAE'), + ('OAE', 'CAB', 'CAA', 'OAD'), ('OAE', 'CAB', 'CAC', 'OAF'), + ('OAF', 'CAC', 'CAB', 'CAA'), ('OAF', 'CAC', 'CAB', 'OAE')}, + "cyclohexane.mol2": { + ('CAA', 'CAB', 'CAC', 'CAF'), ('CAA', 'CAD', 'CAE', 'CAF'), + ('CAB', 'CAA', 'CAD', 'CAE'), ('CAB', 'CAC', 'CAF', 'CAE'), + ('CAC', 'CAB', 'CAA', 'CAD'), ('CAC', 'CAF', 'CAE', 'CAD'), + ('CAD', 'CAA', 'CAB', 'CAC'), ('CAD', 'CAE', 'CAF', 'CAC'), + ('CAE', 'CAD', 'CAA', 'CAB'), ('CAE', 'CAF', 'CAC', 'CAB'), + ('CAF', 'CAC', 'CAB', 'CAA'), ('CAF', 'CAE', 'CAD', 'CAA')} +} + + +RING_RESULTS = { + "ethanol.mol2": set(), + "glycerol.mol2": set(), + "cyclohexane.mol2": {('CAA', 'CAD', 'CAE', 'CAF', 'CAC', 'CAB')}, + "naphthalene.mol2": { + ('CAA', 'CAB', 'CAC', 'CAH', 'CAG', 'CAF'), + ('CAC', 'CAH', 'CAI', 'CAJ', 'CAE', 'CAD')}, + "anthracene.mol2": { + ('CAC', 'CAJ', 'CAK', 'CAL', 'CAE', 'CAD'), + ('CAE', 'CAL', 'CAM', 'CAN', 'CAG', 'CAF'), + ('CAA', 'CAB', 'CAC', 'CAJ', 'CAI', 'CAH')}, + "crown-ether.mol2": { + ('CAA', 'CAE', 'CAF', 'CAG', 'CAC', 'CAB'), + ('CAD', 'CAI', 'CAJ', 'CAK', 'CAF', 'CAE'), + ('CAF', 'CAG', 'CAH', 'CAM', 'CAL', 'CAK')} +} + + +BOND_RESULTS = { + "cyclohexane.mol2": 6 * ["single"], + "ethanol.mol2": [ + "single", "single", None, "single", "single"], + "glycerol.mol2": [ + None, "single", "single", "single", "single", None, "single", None], + "acetylcholine.mol2": [ + "single", "double", "single", "single", "single", "single", "single", + "single", "single"], + "acetonitrile.mol2": [ + "triple", "single"], + "pyrrole.mol2": [ + "aromatic", "aromatic", "aromatic", "aromatic", "aromatic", None], + "fatty-acid.mol2": [ + "double", "double", "single", "single", "single", "single", "double", + "single", "single", "single", "single"], + "trimethylamine.mol2": ["single", None, "single", "single"], + "naphthalene.mol2": 11 * ["aromatic"] +} + + +FORMAL_CHARGE_RESULTS = { + "1HPX-ligand.mol2": 87*[0], + "1QBS-ligand.mol2": 80*[0], + "1US0-ligand.mol2": 22*[0] + [-0.5, -0.5] + 11*[0], + "acetate.mol2": [-0.5, 0, -0.5] + 4*[0], + "acetylcholine.mol2": 13*[0] + [1] + 12*[0], + "adp.mol2": [-1] + 5*[0] + [-1] + 32*[0], + "anthracene.mol2": 24*[0], + "acetonitrile.mol2": 6*[0], + "cyclohexane.mol2": 18*[0], + "ethanol.mol2": 9*[0], + "fatty-acid.mol2": [-0.5, 0, -0.5] + 26*[0], + "glycerol.mol2": 14*[0], + "naphthalene.mol2": 18*[0], + "crown-ether.mol2": 42*[0], + "pyrrole.mol2": 10*[0], + "trimethylamine.mol2": 4*[0] + [1] + 12*[0] +} diff --git a/pdb2pqr/tests/ligand_test.py b/pdb2pqr/tests/ligand_test.py index 56dc23583..0df495f11 100644 --- a/pdb2pqr/tests/ligand_test.py +++ b/pdb2pqr/tests/ligand_test.py @@ -1,18 +1,27 @@ """Tests for ligand functionality.""" import logging -import random +from math import isclose from pathlib import Path import pytest from pdb2pqr.ligand import parameterize import common +from ligand_results import TORSION_RESULTS, RING_RESULTS, BOND_RESULTS +from ligand_results import FORMAL_CHARGE_RESULTS _LOGGER = logging.getLogger(__name__) _LOGGER.warning("Need functional and regression test coverage for --ligand") +_LOGGER.error("Still haven't figured out radii") -@pytest.mark.parametrize("input_mol2", [ - "1HPX-ligand.mol2", "1QBS-ligand.mol2", "1US0-ligand.mol2", "adp.mol2"]) +ALL_LIGANDS = set(TORSION_RESULTS) | set(BOND_RESULTS) | set(RING_RESULTS) +ALL_LIGANDS |= { + "1HPX-ligand.mol2", "1QBS-ligand.mol2", "1US0-ligand.mol2", "adp.mol2", + "acetate.mol2"} +ALL_LIGANDS = sorted(list(ALL_LIGANDS)) + + +@pytest.mark.parametrize("input_mol2", ALL_LIGANDS) def test_parameterization(input_mol2): """Testing basic aspects of code breaking.""" _LOGGER.warning("Ideally, this would be a regression test.") @@ -21,7 +30,7 @@ def test_parameterization(input_mol2): with open(mol2_path, "rt") as mol2_file: ligand.read(mol2_file) for atom in ligand.atoms.values(): - atom.charge = random.uniform(-1, 1) + atom.charge = atom.formal_charge atom.old_charge = atom.charge ligand.update(ligand) for atom in ligand.atoms.values(): @@ -29,29 +38,35 @@ def test_parameterization(input_mol2): _LOGGER.info(fmt.format(a=atom)) -TORSION_RESULTS = { - "ethanol.mol2": { - ('CAA', 'CAB', 'OAC', 'HAF'), ('HAF', 'OAC', 'CAB', 'CAA')}, - "glycerol.mol2": { - ('CAA', 'CAB', 'CAC', 'OAF'), ('CAA', 'CAB', 'OAE', 'HAG'), - ('CAB', 'CAA', 'OAD', 'HAF'), ('CAB', 'CAC', 'OAF', 'HAH'), - ('CAC', 'CAB', 'CAA', 'OAD'), ('CAC', 'CAB', 'OAE', 'HAG'), - ('HAF', 'OAD', 'CAA', 'CAB'), ('HAG', 'OAE', 'CAB', 'CAA'), - ('HAG', 'OAE', 'CAB', 'CAC'), ('HAH', 'OAF', 'CAC', 'CAB'), - ('OAD', 'CAA', 'CAB', 'CAC'), ('OAD', 'CAA', 'CAB', 'OAE'), - ('OAE', 'CAB', 'CAA', 'OAD'), ('OAE', 'CAB', 'CAC', 'OAF'), - ('OAF', 'CAC', 'CAB', 'CAA'), ('OAF', 'CAC', 'CAB', 'OAE')}, - "cyclohexane.mol2": { - ('CAA', 'CAB', 'CAC', 'CAF'), ('CAA', 'CAD', 'CAE', 'CAF'), - ('CAB', 'CAA', 'CAD', 'CAE'), ('CAB', 'CAC', 'CAF', 'CAE'), - ('CAC', 'CAB', 'CAA', 'CAD'), ('CAC', 'CAF', 'CAE', 'CAD'), - ('CAD', 'CAA', 'CAB', 'CAC'), ('CAD', 'CAE', 'CAF', 'CAC'), - ('CAE', 'CAD', 'CAA', 'CAB'), ('CAE', 'CAF', 'CAC', 'CAB'), - ('CAF', 'CAC', 'CAB', 'CAA'), ('CAF', 'CAE', 'CAD', 'CAA')} -} +@pytest.mark.parametrize("input_mol2", ALL_LIGANDS) +def test_formal_charge(input_mol2): + """Testing formal charge calculation.""" + ligand = parameterize.ParameterizedMolecule() + mol2_path = Path("tests/data") / input_mol2 + with open(mol2_path, "rt") as mol2_file: + ligand.read(mol2_file) + expected_results = FORMAL_CHARGE_RESULTS[input_mol2] + errors = [] + for iatom, atom in enumerate(ligand.atoms.values()): + try: + expected_charge = expected_results[iatom] + except IndexError: + err = ( + "Missing result for {a.name}, {a.type}, {a.formal_charge}") + err = err.format(a=atom) + raise IndexError(err) + if not isclose(atom.formal_charge, expected_charge): + err = ( + "Atom {0.name} {0.type} with bond order " + "{0.bond_order}: expected {1}, got {2}") + err = err.format(atom, expected_charge, atom.formal_charge) + errors.append(err) + if len(errors) > 0: + err = "Errors in test values:\n%s" % "\n".join(errors) + raise ValueError(err) -@pytest.mark.parametrize("input_mol2", [ - "cyclohexane.mol2", "ethanol.mol2", "glycerol.mol2"]) + +@pytest.mark.parametrize("input_mol2", ALL_LIGANDS) def test_torsions(input_mol2): """Test assignment of torsion angles.""" ligand = parameterize.ParameterizedMolecule() @@ -66,50 +81,12 @@ def test_torsions(input_mol2): input_mol2, sorted(list(diff))) raise ValueError(err) except KeyError: - _LOGGER.warning( - "Skipping torsion test for %s: %s", input_mol2, - sorted(list(ligand.torsions))) + err = "No results for %s: %s", input_mol2, sorted( + list(ligand.torsions)) + raise KeyError(err) -RING_RESULTS = { - "ethanol.mol2": set(), - "glycerol.mol2": set(), - "cyclohexane.mol2": {('CAA', 'CAD', 'CAE', 'CAF', 'CAC', 'CAB')}, - "naphthalene.mol2": { - ('CAA', 'CAB', 'CAC', 'CAH', 'CAG', 'CAF'), - ('CAC', 'CAH', 'CAI', 'CAJ', 'CAE', 'CAD')}, - "anthracene.mol2": { - ('CAC', 'CAJ', 'CAK', 'CAL', 'CAE', 'CAD'), - ('CAE', 'CAL', 'CAM', 'CAN', 'CAG', 'CAF'), - ('CAA', 'CAB', 'CAC', 'CAJ', 'CAI', 'CAH')}, - "phenalene.mol2": { - ('CAA', 'CAE', 'CAF', 'CAG', 'CAC', 'CAB'), - ('CAD', 'CAI', 'CAJ', 'CAK', 'CAF', 'CAE'), - ('CAF', 'CAG', 'CAH', 'CAM', 'CAL', 'CAK')} -} - - -@pytest.mark.parametrize("input_mol2", ["phenalene.mol2"]) -def test_bad_rings(input_mol2): - """Test assignment of torsion angles.""" - ligand = parameterize.ParameterizedMolecule() - mol2_path = Path("tests/data") / input_mol2 - with open(mol2_path, "rt") as mol2_file: - ligand.read(mol2_file) - benchmark = RING_RESULTS[input_mol2] - with pytest.raises(ValueError) as err: - diff = ligand.rings ^ benchmark - if len(diff) > 0: - err = "Ring test failed for %s: %s" % ( - input_mol2, sorted(list(diff))) - raise ValueError(err) - err = "Known bond detection failure for %s: %s" % (input_mol2, err) - _LOGGER.error(err) - - -@pytest.mark.parametrize("input_mol2", [ - "cyclohexane.mol2", "ethanol.mol2", "glycerol.mol2", "anthracene.mol2", - "naphthalene.mol2"]) +@pytest.mark.parametrize("input_mol2", ALL_LIGANDS) def test_rings(input_mol2): """Test assignment of torsion angles.""" ligand = parameterize.ParameterizedMolecule() @@ -129,52 +106,7 @@ def test_rings(input_mol2): _LOGGER.debug(str_) -BOND_RESULTS = { - "cyclohexane.mol2": 6 * ["single"], - "ethanol.mol2": [ - "single", "single", None, "single", "single"], - "glycerol.mol2": [ - None, "single", "single", "single", "single", None, "single", None], - "acetylcholine.mol2": [ - "single", "double", "single", "single", "single", "single", "single", - "single", "single"], - "cyanide.mol2": [ - "triple", "single"], - "pyrrole.mol2": [ - "aromatic", "aromatic", "aromatic", "aromatic", "aromatic", None], - "fatty-acid.mol2": [ - "double", "double", "single", "single", "single", "single", "double", - "single", "single", "single", "single"], - "trimethylamine.mol2": ["single", None, "single", "single"], - "naphthalene.mol2": 11 * ["aromatic"] -} - - -@pytest.mark.parametrize("input_mol2", ["fatty-acid.mol2", "pyrrole.mol2"]) -def test_bad_bonds(input_mol2): - """Test known failure of detected bond types.""" - ligand = parameterize.ParameterizedMolecule() - mol2_path = Path("tests/data") / input_mol2 - with open(mol2_path, "rt") as mol2_file: - ligand.read(mol2_file) - results = BOND_RESULTS[input_mol2] - for ibond, bond in enumerate(ligand.bonds): - try: - if bond.bond_order != results[ibond]: - err = "Incorrect order for %s. Got %s, expected %s" % ( - str(bond), bond.bond_order, results[ibond]) - err = "Known bond detection failure for %s: %s" % ( - input_mol2, err) - _LOGGER.error(err) - except IndexError: - err = "Add test for %s -- %s (%s)" % ( - input_mol2, str(bond), bond.bond_order) - raise IndexError(err) - - -@pytest.mark.parametrize("input_mol2", [ - "cyclohexane.mol2", "ethanol.mol2", "glycerol.mol2", "acetylcholine.mol2", - "cyanide.mol2", "trimethylamine.mol2", "naphthalene.mol2"]) +@pytest.mark.parametrize("input_mol2", ALL_LIGANDS) def test_bonds(input_mol2): """Test detection of bond types.""" ligand = parameterize.ParameterizedMolecule() @@ -195,7 +127,7 @@ def test_bonds(input_mol2): @pytest.mark.parametrize("input_pdb", ["1HPX", "1QBS", "1US0"], ids=str) -def test_ligand(input_pdb, tmp_path): +def test_ligand_protein(input_pdb, tmp_path): """PROPKA non-regression tests on proteins without ligands.""" ligand = Path("tests/data") / ("%s-ligand.mol2" % input_pdb) args = "--log-level=INFO --ff=AMBER --drop-water --ligand=%s" % ligand From 0de37548ef4164544838fe134a722aaa8842470a Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Sun, 28 Jun 2020 09:56:36 -0700 Subject: [PATCH 22/31] Working charge assignment with PEOE. Yay!!! --- pdb2pqr/pdb2pqr/ligand/parameterize.py | 2 +- pdb2pqr/pdb2pqr/ligand/peoe.py | 58 ++- ...hylamine.mol2 => tetramethylammonium.mol2} | 0 pdb2pqr/tests/ligand_results.py | 485 +++++++++++++++++- pdb2pqr/tests/ligand_test.py | 40 +- 5 files changed, 547 insertions(+), 38 deletions(-) rename pdb2pqr/tests/data/{trimethylamine.mol2 => tetramethylammonium.mol2} (100%) diff --git a/pdb2pqr/pdb2pqr/ligand/parameterize.py b/pdb2pqr/pdb2pqr/ligand/parameterize.py index a3b9d404a..32e4af26f 100644 --- a/pdb2pqr/pdb2pqr/ligand/parameterize.py +++ b/pdb2pqr/pdb2pqr/ligand/parameterize.py @@ -39,7 +39,7 @@ class ParameterizedMolecule(Mol2Molecule): atom.charge = atom.formal_charge equilibrate(ligand.atoms.values()) for atom in ligand.atoms.values(): - elem = atom.element + elem = atom.element.capitalize() charge = atom.charge try: radius = PARSE_RADII[elem] diff --git a/pdb2pqr/pdb2pqr/ligand/peoe.py b/pdb2pqr/pdb2pqr/ligand/peoe.py index 452ae5b30..0a6aa08f4 100644 --- a/pdb2pqr/pdb2pqr/ligand/peoe.py +++ b/pdb2pqr/pdb2pqr/ligand/peoe.py @@ -105,7 +105,7 @@ def assign_terms(atoms, term_dict): modified list of atoms """ for atom in atoms: - atom_type = atom.atom_type.upper() + atom_type = atom.type.upper() if atom_type == 'O.3': atom_type = 'O.OH' try: @@ -135,37 +135,43 @@ def equilibrate( abs_qges = 0.0 for atom in atoms: if isclose(atom.charge, 0.0): - atom.formal_charge = 0.0 + atom.equil_formal_charge = 0.0 else: - atom.formal_charge = atom.charge*(1.0/scale) + # PEOE multiples all atoms by a scaling factor at the end to account + # for increased polarizability. The initial formal charge needs to + # be reduced to account for this scaling. + atom.equil_formal_charge = atom.charge*(1.0/scale) abs_qges += abs(atom.charge) + atom.charge = 0 + # A finite number of cycles is used to prevent complete equilibration of the + # molecule. I'm not sure why this is a good idea but people have been doing + # it since the original 1978 Tetrahedron paper with Gasteiger & Marsili for icycle in range(num_cycles): for atom1 in atoms: - atom1.chi = electronegativity( - atom1.charge, atom1.poly_terms, atom1.atom_type) + chi1 = electronegativity( + atom1.charge, atom1.poly_terms, atom1.type) atom1.delta_charge = 0.0 - for bonded_atom in atom1.bonded_atoms: - for atom2 in atoms: - if atom2.name == bonded_atom.name: - chi2 = electronegativity( - atom2.charge, atom2.poly_terms, - atom2.atom_type) - chi_diff = chi2 - atom1.chi - if chi_diff > 0: - chi_norm = electronegativity( - +1, atom1.poly_terms, atom1.atom_type) - else: - chi_norm = electronegativity( - +1, atom2.poly_terms, atom2.atom_type) - atom1.delta_charge += ( - (chi_diff/chi_norm)*(damp**icycle)) - for atom1 in atoms: + for atom2 in atom1.bonded_atoms: + chi2 = electronegativity( + atom2.charge, atom2.poly_terms, atom2.type) + chi_diff = chi2 - chi1 + if chi2 > chi1: + chi_norm = electronegativity( + +1, atom1.poly_terms, atom1.type) + else: + chi_norm = electronegativity( + +1, atom2.poly_terms, atom2.type) + # Damping is used in PEOE to accelerate convergence + atom1.delta_charge += ( + (chi_diff/chi_norm)*(damp**(icycle+1))) + for atom in atoms: if isclose(abs_qges, 0.0): - atom1.charge += atom1.delta_charge + atom.charge += atom.delta_charge else: - atom1.charge += ( - atom1.delta_charge + (1.0/6.0) * atom1.formal_charge) - for atom1 in atoms: - atom1.charge = scale * atom1.charge + atom.charge += ( + atom.delta_charge + + (1.0/num_cycles) * atom.equil_formal_charge) + for atom in atoms: + atom.charge = scale * atom.charge return atoms diff --git a/pdb2pqr/tests/data/trimethylamine.mol2 b/pdb2pqr/tests/data/tetramethylammonium.mol2 similarity index 100% rename from pdb2pqr/tests/data/trimethylamine.mol2 rename to pdb2pqr/tests/data/tetramethylammonium.mol2 diff --git a/pdb2pqr/tests/ligand_results.py b/pdb2pqr/tests/ligand_results.py index d20f05c56..ef577e770 100644 --- a/pdb2pqr/tests/ligand_results.py +++ b/pdb2pqr/tests/ligand_results.py @@ -55,7 +55,7 @@ BOND_RESULTS = { "fatty-acid.mol2": [ "double", "double", "single", "single", "single", "single", "double", "single", "single", "single", "single"], - "trimethylamine.mol2": ["single", None, "single", "single"], + "tetramethylammonium.mol2": ["single", None, "single", "single"], "naphthalene.mol2": 11 * ["aromatic"] } @@ -76,5 +76,486 @@ FORMAL_CHARGE_RESULTS = { "naphthalene.mol2": 18*[0], "crown-ether.mol2": 42*[0], "pyrrole.mol2": 10*[0], - "trimethylamine.mol2": 4*[0] + [1] + 12*[0] + "tetramethylammonium.mol2": 4*[0] + [1] + 12*[0] +} + + +PARTIAL_CHARGE_RESULTS = { + "1HPX-ligand.mol2": [ + {'name': 'C1', 'charge': -0.043081773453678196}, + {'name': 'N1', 'charge': -0.35678069534508244}, + {'name': 'O1', 'charge': -0.5711988943947174}, + {'name': 'S1', 'charge': -0.36230725415046}, + {'name': 'C2', 'charge': -0.07546448002333427}, + {'name': 'N2', 'charge': -0.703336840791423}, + {'name': 'O2', 'charge': -0.5909676444057169}, + {'name': 'S2', 'charge': -0.31326568130416904}, + {'name': 'C3', 'charge': 0.012184036151037315}, + {'name': 'N3', 'charge': -0.7139711103762134}, + {'name': 'O3', 'charge': -0.2919660371248733}, + {'name': 'C4', 'charge': -0.024891112570323084}, + {'name': 'N4', 'charge': -0.7120419698971743}, + {'name': 'O4', 'charge': -0.2689251910425875}, + {'name': 'C5', 'charge': -0.02721768449096584}, + {'name': 'N5', 'charge': -0.7179288700589408}, + {'name': 'O5', 'charge': -0.2884010361503686}, + {'name': 'C6', 'charge': 0.0753432557874654}, + {'name': 'O6', 'charge': -0.2912545916584642}, + {'name': 'C7', 'charge': -0.06673754373256466}, + {'name': 'C8', 'charge': -0.11391059914410395}, + {'name': 'C9', 'charge': -0.10324637080138911}, + {'name': 'C10', 'charge': 0.0955779782243598}, + {'name': 'C11', 'charge': 0.13325937240601735}, + {'name': 'C12', 'charge': -0.007454479991213576}, + {'name': 'C13', 'charge': 0.1300074666489752}, + {'name': 'C14', 'charge': 0.03316079200653884}, + {'name': 'C15', 'charge': 0.1162788489650129}, + {'name': 'C16', 'charge': -0.05012048448800629}, + {'name': 'C17', 'charge': 0.17629988149001455}, + {'name': 'C18', 'charge': 0.1273124481017021}, + {'name': 'C19', 'charge': 0.009718364614545697}, + {'name': 'C20', 'charge': 0.05373224349483613}, + {'name': 'C21', 'charge': 0.14236781888812639}, + {'name': 'C22', 'charge': -0.018858356195459093}, + {'name': 'C23', 'charge': -0.08872387572707603}, + {'name': 'C24', 'charge': -0.08872387572707603}, + {'name': 'C25', 'charge': -0.08872387572707603}, + {'name': 'C26', 'charge': 0.10593153852032608}, + {'name': 'C27', 'charge': -0.05782750620937734}, + {'name': 'C28', 'charge': -0.09940049289330599}, + {'name': 'C29', 'charge': -0.11788247675515531}, + {'name': 'C30', 'charge': -0.12669321002628098}, + {'name': 'C31', 'charge': -0.12840215383108972}, + {'name': 'C32', 'charge': -0.12669321002628098}, + {'name': 'C33', 'charge': -0.11788247675515531}, + {'name': 'HN2', 'charge': 0.47365062709293504}, + {'name': 'HN3', 'charge': 0.46813799183459115}, + {'name': 'HN5', 'charge': 0.4699450251195792}, + {'name': 'HO2', 'charge': 0.4135083335659837}, + {'name': 'H1', 'charge': 0.1930651515377243}, + {'name': 'H2', 'charge': 0.14930498376978826}, + {'name': 'H5', 'charge': 0.2004887586723136}, + {'name': 'H7', 'charge': 0.1568207646303434}, + {'name': 'H8', 'charge': 0.13234810374569977}, + {'name': 'H9', 'charge': 0.13832988708343302}, + {'name': '1H10', 'charge': 0.2090610332096663}, + {'name': '2H10', 'charge': 0.2090610332096663}, + {'name': '1H12', 'charge': 0.12111462041395545}, + {'name': '2H12', 'charge': 0.12111462041395545}, + {'name': 'H14', 'charge': 0.1675599590015787}, + {'name': 'H15', 'charge': 0.22760838606623204}, + {'name': '1H16', 'charge': 0.0852867334715187}, + {'name': '2H16', 'charge': 0.0852867334715187}, + {'name': 'H18', 'charge': 0.24317031751195953}, + {'name': '1H19', 'charge': 0.12776560931027464}, + {'name': '2H19', 'charge': 0.12776560931027464}, + {'name': '1H20', 'charge': 0.19565330098691133}, + {'name': '2H20', 'charge': 0.19565330098691133}, + {'name': '1H23', 'charge': 0.05671902293773316}, + {'name': '2H23', 'charge': 0.05671902293773316}, + {'name': '3H23', 'charge': 0.05671902293773316}, + {'name': '1H24', 'charge': 0.05671902293773316}, + {'name': '2H24', 'charge': 0.05671902293773316}, + {'name': '3H24', 'charge': 0.05671902293773316}, + {'name': '1H25', 'charge': 0.05671902293773316}, + {'name': '2H25', 'charge': 0.05671902293773316}, + {'name': '3H25', 'charge': 0.05671902293773316}, + {'name': 'H26', 'charge': 0.21276929701184144}, + {'name': '1H27', 'charge': 0.0838296767957033}, + {'name': '2H27', 'charge': 0.0838296767957033}, + {'name': '3H27', 'charge': 0.0838296767957033}, + {'name': 'H29', 'charge': 0.1344214537548114}, + {'name': 'H30', 'charge': 0.12944135663694809}, + {'name': 'H31', 'charge': 0.12895177093126134}, + {'name': 'H32', 'charge': 0.12944135663694809}, + {'name': 'H33', 'charge': 0.1344214537548114}], + "1QBS-ligand.mol2": [ + {'name': 'C1', 'charge': 0.2819290256115998}, + {'name': 'C3', 'charge': 0.044938629062860544}, + {'name': 'C4', 'charge': 0.029905546428555488}, + {'name': 'C5', 'charge': 0.029905546428555488}, + {'name': 'C6', 'charge': 0.044938629062860544}, + {'name': 'C20', 'charge': 0.02388740859601932}, + {'name': 'C21', 'charge': -0.06923087776665914}, + {'name': 'C22', 'charge': -0.10856468032765192}, + {'name': 'C23', 'charge': -0.11036338907520785}, + {'name': 'C24', 'charge': -0.08244776383718133}, + {'name': 'C25', 'charge': -0.11036338907520785}, + {'name': 'C26', 'charge': -0.10856468032765192}, + {'name': 'C27', 'charge': -0.021297911008685094}, + {'name': 'C30', 'charge': -0.04323885537049762}, + {'name': 'C31', 'charge': -0.09772541979676261}, + {'name': 'C32', 'charge': -0.11774626937995367}, + {'name': 'C33', 'charge': -0.12668933876013122}, + {'name': 'C34', 'charge': -0.12840215383108972}, + {'name': 'C35', 'charge': -0.12668933876013122}, + {'name': 'C36', 'charge': -0.11774626937995367}, + {'name': 'C60', 'charge': -0.04323885537049762}, + {'name': 'C61', 'charge': -0.09772541979676261}, + {'name': 'C62', 'charge': -0.11774626937995367}, + {'name': 'C63', 'charge': -0.12668933876013122}, + {'name': 'C64', 'charge': -0.12840215383108972}, + {'name': 'C65', 'charge': -0.12668933876013122}, + {'name': 'C66', 'charge': -0.11774626937995367}, + {'name': 'C70', 'charge': 0.02388740859601932}, + {'name': 'C71', 'charge': -0.06923087776665914}, + {'name': 'C72', 'charge': -0.10856468032765192}, + {'name': 'C73', 'charge': -0.11036338907520785}, + {'name': 'C74', 'charge': -0.08244776383718133}, + {'name': 'C75', 'charge': -0.11036338907520785}, + {'name': 'C76', 'charge': -0.10856468032765192}, + {'name': 'C77', 'charge': -0.021297911008685094}, + {'name': 'N2', 'charge': -0.7121260761527837}, + {'name': 'N7', 'charge': -0.7121260761527837}, + {'name': 'O1', 'charge': -0.19835509547371977}, + {'name': 'O4', 'charge': -0.6060823059727466}, + {'name': 'O5', 'charge': -0.6060823059727466}, + {'name': 'O27', 'charge': -0.6190964861641765}, + {'name': 'O77', 'charge': -0.6190964861641765}, + {'name': 'H1', 'charge': 0.1939484454048374}, + {'name': 'H2', 'charge': 0.1783837779801701}, + {'name': 'H3', 'charge': 0.1783837779801701}, + {'name': 'H4', 'charge': 0.1939484454048374}, + {'name': 'H5', 'charge': 0.1660607436631621}, + {'name': 'H6', 'charge': 0.1660607436631621}, + {'name': 'H7', 'charge': 0.13696512588510704}, + {'name': 'H8', 'charge': 0.13656070949143695}, + {'name': 'H9', 'charge': 0.13656070949143695}, + {'name': 'H10', 'charge': 0.13696512588510704}, + {'name': 'H11', 'charge': 0.1336796298532153}, + {'name': 'H12', 'charge': 0.1336796298532153}, + {'name': 'H13', 'charge': 0.08892832134853815}, + {'name': 'H14', 'charge': 0.08892832134853815}, + {'name': 'H15', 'charge': 0.13442892363549896}, + {'name': 'H16', 'charge': 0.12944135663694809}, + {'name': 'H17', 'charge': 0.12895177093126134}, + {'name': 'H18', 'charge': 0.12944135663694809}, + {'name': 'H19', 'charge': 0.13442892363549896}, + {'name': 'H20', 'charge': 0.08892832134853815}, + {'name': 'H21', 'charge': 0.08892832134853815}, + {'name': 'H22', 'charge': 0.13442892363549896}, + {'name': 'H23', 'charge': 0.12944135663694809}, + {'name': 'H24', 'charge': 0.12895177093126134}, + {'name': 'H25', 'charge': 0.12944135663694809}, + {'name': 'H26', 'charge': 0.13442892363549896}, + {'name': 'H27', 'charge': 0.1660607436631621}, + {'name': 'H28', 'charge': 0.1660607436631621}, + {'name': 'H29', 'charge': 0.13696512588510704}, + {'name': 'H30', 'charge': 0.13656070949143695}, + {'name': 'H31', 'charge': 0.13656070949143695}, + {'name': 'H32', 'charge': 0.13696512588510704}, + {'name': 'H33', 'charge': 0.1336796298532153}, + {'name': 'H34', 'charge': 0.1336796298532153}, + {'name': 'H35', 'charge': 0.4089032076940611}, + {'name': 'H36', 'charge': 0.4089032076940611}, + {'name': 'H37', 'charge': 0.40353983279195227}, + {'name': 'H38', 'charge': 0.40353983279195227}], + "1US0-ligand.mol2": [ + {'name': 'C2', 'charge': -0.011692507946738866}, + {'name': 'C3', 'charge': -0.07564532850595106}, + {'name': 'C4', 'charge': 0.0995106215449818}, + {'name': 'C5', 'charge': 0.087912610051304}, + {'name': 'C6', 'charge': -0.06822502752899269}, + {'name': 'C7', 'charge': 0.071206877934909}, + {'name': 'BR8', 'charge': -0.1781415724110881}, + {'name': 'F9', 'charge': -0.24609828999108224}, + {'name': 'C11', 'charge': 0.01969593460035858}, + {'name': 'C13', 'charge': -0.012644223822613768}, + {'name': 'F14', 'charge': -0.24298404863346873}, + {'name': 'O15', 'charge': -0.564853763228142}, + {'name': 'S16', 'charge': -0.08677321834382315}, + {'name': 'N17', 'charge': -0.5406144014197267}, + {'name': 'C20', 'charge': 0.11379374529001238}, + {'name': 'C24', 'charge': -0.025165867794008426}, + {'name': 'C25', 'charge': 0.05247378966494793}, + {'name': 'C26', 'charge': -0.09820934352177407}, + {'name': 'C27', 'charge': 0.09943434454444673}, + {'name': 'C28', 'charge': -0.030058244343191105}, + {'name': 'C29', 'charge': -0.08190135488592247}, + {'name': 'C32', 'charge': 0.13523301177634597}, + {'name': 'O33', 'charge': -0.6823845278785808}, + {'name': 'O34', 'charge': -0.6823845278785808}, + {'name': 'H1', 'charge': 0.1785741592725812}, + {'name': 'H2', 'charge': 0.1460165676666413}, + {'name': 'H3', 'charge': 0.15178137252163487}, + {'name': 'H4', 'charge': 0.12305674443192938}, + {'name': 'H5', 'charge': 0.12305674443192938}, + {'name': 'H6', 'charge': 0.3361191041929824}, + {'name': 'H7', 'charge': 0.21824692354069947}, + {'name': 'H8', 'charge': 0.21824692354069947}, + {'name': 'H9', 'charge': 0.13902103205782942}, + {'name': 'H10', 'charge': 0.16785538090669733}, + {'name': 'H11', 'charge': 0.14654036016275432}], + "acetate.mol2": [ + {'name': 'OAC', 'charge': -0.6920003959699945}, + {'name': 'CAB', 'charge': 0.08786702981704235}, + {'name': 'OAD', 'charge': -0.6920003959699945}, + {'name': 'CAA', 'charge': -0.007013553584021696}, + {'name': 'HAB', 'charge': 0.10104910523565629}, + {'name': 'HAC', 'charge': 0.10104910523565629}, + {'name': 'HAA', 'charge': 0.10104910523565629}], + "acetonitrile.mol2": [ + {'name': 'NAC', 'charge': -0.3658493397941428}, + {'name': 'CAB', 'charge': 0.047166532439326746}, + {'name': 'CAA', 'charge': 0.0031209482290866543}, + {'name': 'HAB', 'charge': 0.10518728637524313}, + {'name': 'HAC', 'charge': 0.10518728637524313}, + {'name': 'HAA', 'charge': 0.10518728637524313}], + "acetylcholine.mol2": [ + {'name': 'CAI', 'charge': -0.008419213752965279}, + {'name': 'HAK', 'charge': 0.09728030818644891}, + {'name': 'HAL', 'charge': 0.09728030818644891}, + {'name': 'HAM', 'charge': 0.09728030818644891}, + {'name': 'CAH', 'charge': 0.1545682666391745}, + {'name': 'OAB', 'charge': -0.27967627950752316}, + {'name': 'OAG', 'charge': -0.5429502778137623}, + {'name': 'CAF', 'charge': 0.037889841506971396}, + {'name': 'HAI', 'charge': 0.1710371780188628}, + {'name': 'HAJ', 'charge': 0.1710371780188628}, + {'name': 'CAE', 'charge': 0.0572339093088662}, + {'name': 'HAG', 'charge': 0.16582144135089144}, + {'name': 'HAH', 'charge': 0.16582144135089144}, + {'name': 'NAD', 'charge': -0.6101582744216567}, + {'name': 'CAJ', 'charge': 0.016251330821632083}, + {'name': 'HAO', 'charge': 0.13079998580857152}, + {'name': 'HAP', 'charge': 0.13079998580857152}, + {'name': 'HAN', 'charge': 0.13079998580857152}, + {'name': 'CAA', 'charge': 0.016251330821632083}, + {'name': 'HAB', 'charge': 0.13079998580857152}, + {'name': 'HAC', 'charge': 0.13079998580857152}, + {'name': 'HAA', 'charge': 0.13079998580857152}, + {'name': 'CAC', 'charge': 0.016251330821632083}, + {'name': 'HAE', 'charge': 0.13079998580857152}, + {'name': 'HAF', 'charge': 0.13079998580857152}, + {'name': 'HAD', 'charge': 0.13079998580857152}], + "adp.mol2": [ + {'name': 'O2B', 'charge': -0.9292599002864984}, + {'name': 'PB', 'charge': 0.2901422925812394}, + {'name': 'O3B', 'charge': -0.15563495903544203}, + {'name': 'O1B', 'charge': -0.10031235497346548}, + {'name': 'O3A', 'charge': -0.22961271227927002}, + {'name': 'PA', 'charge': 0.20924046046691175}, + {'name': 'O2A', 'charge': -0.9415448332469695}, + {'name': 'O1A', 'charge': -0.11862974402094492}, + {'name': "O5'", 'charge': -0.46621302263040854}, + {'name': "C5'", 'charge': 0.0798878056064067}, + {'name': "H5'", 'charge': 0.18839134309240957}, + {'name': 'H5S', 'charge': 0.18839134309240957}, + {'name': "C4'", 'charge': 0.0695445224862847}, + {'name': "H4'", 'charge': 0.20776211836320221}, + {'name': "O4'", 'charge': -0.5870759018169631}, + {'name': "C3'", 'charge': 0.03852622354001534}, + {'name': "H3'", 'charge': 0.17992130819444715}, + {'name': "O3'", 'charge': -0.6050957763635193}, + {'name': 'H8L', 'charge': 0.408917570005205}, + {'name': "C2'", 'charge': 0.04785444900747468}, + {'name': "H2'", 'charge': 0.18698431697045484}, + {'name': "O2'", 'charge': -0.6030269458228865}, + {'name': 'H8M', 'charge': 0.40956200683924104}, + {'name': "C1'", 'charge': 0.1072884172431409}, + {'name': "H1'", 'charge': 0.25190824252954014}, + {'name': 'N9', 'charge': -0.32267909962343944}, + {'name': 'C8', 'charge': 0.03749022549830809}, + {'name': 'H8', 'charge': 0.24861896888732188}, + {'name': 'N7', 'charge': -0.2902489369083555}, + {'name': 'C5', 'charge': 0.15701292932781932}, + {'name': 'C4', 'charge': 0.16341092847650182}, + {'name': 'N3', 'charge': -0.28837259982632546}, + {'name': 'C2', 'charge': 0.00966560470099504}, + {'name': 'H2', 'charge': 0.23340110948012485}, + {'name': 'N1', 'charge': -0.3751603212355695}, + {'name': 'H1', 'charge': 0.2881016272229071}, + {'name': 'C6', 'charge': 0.10850097968415028}, + {'name': 'N6', 'charge': -0.46957750571325696}, + {'name': 'H6', 'charge': 0.3719198204868037}], + "anthracene.mol2": [ + {'name': 'CAB', 'charge': -0.10948195185220932}, + {'name': 'HAB', 'charge': 0.13718781330297145}, + {'name': 'CAC', 'charge': -0.05951797220652635}, + {'name': 'CAD', 'charge': -0.090739817012503}, + {'name': 'HAD', 'charge': 0.14537539214857495}, + {'name': 'CAE', 'charge': -0.05951797220652635}, + {'name': 'CAF', 'charge': -0.10948195185220932}, + {'name': 'HAF', 'charge': 0.13718781330297145}, + {'name': 'CAG', 'charge': -0.1252004685570065}, + {'name': 'HAG', 'charge': 0.12969479174473472}, + {'name': 'CAN', 'charge': -0.1252004685570065}, + {'name': 'HAN', 'charge': 0.12969479174473472}, + {'name': 'CAM', 'charge': -0.10948195185220932}, + {'name': 'HAM', 'charge': 0.13718781330297145}, + {'name': 'CAL', 'charge': -0.05951797220652635}, + {'name': 'CAK', 'charge': -0.090739817012503}, + {'name': 'HAK', 'charge': 0.14537539214857495}, + {'name': 'CAJ', 'charge': -0.05951797220652635}, + {'name': 'CAI', 'charge': -0.10948195185220932}, + {'name': 'HAI', 'charge': 0.13718781330297145}, + {'name': 'CAH', 'charge': -0.1252004685570065}, + {'name': 'HAH', 'charge': 0.12969479174473472}, + {'name': 'CAA', 'charge': -0.1252004685570065}, + {'name': 'HAA', 'charge': 0.12969479174473472}], + "crown-ether.mol2": [ + {'name': 'CAB', 'charge': -0.0020089927963956975}, + {'name': 'HAB', 'charge': 0.1571150829116664}, + {'name': 'HAC', 'charge': 0.1571150829116664}, + {'name': 'OAA', 'charge': -0.6244423460538743}, + {'name': 'CAH', 'charge': -0.002008992796395719}, + {'name': 'HAJ', 'charge': 0.1571150829116664}, + {'name': 'HAK', 'charge': 0.1571150829116664}, + {'name': 'CAJ', 'charge': -0.0020089927963956975}, + {'name': 'HAN', 'charge': 0.1571150829116664}, + {'name': 'HAO', 'charge': 0.1571150829116664}, + {'name': 'OAL', 'charge': -0.6244423460538743}, + {'name': 'CAM', 'charge': -0.002008992796395719}, + {'name': 'HAR', 'charge': 0.1571150829116664}, + {'name': 'HAS', 'charge': 0.1571150829116664}, + {'name': 'CAN', 'charge': -0.0020089927963956975}, + {'name': 'HAT', 'charge': 0.1571150829116664}, + {'name': 'HAU', 'charge': 0.1571150829116664}, + {'name': 'OAO', 'charge': -0.6244423460538743}, + {'name': 'CAP', 'charge': -0.002008992796395719}, + {'name': 'HAV', 'charge': 0.1571150829116664}, + {'name': 'HAW', 'charge': 0.1571150829116664}, + {'name': 'CAQ', 'charge': -0.0020089927963956975}, + {'name': 'HAX', 'charge': 0.1571150829116664}, + {'name': 'HAY', 'charge': 0.1571150829116664}, + {'name': 'OAR', 'charge': -0.6244423460538743}, + {'name': 'CAK', 'charge': -0.002008992796395719}, + {'name': 'HAP', 'charge': 0.1571150829116664}, + {'name': 'HAQ', 'charge': 0.1571150829116664}, + {'name': 'CAI', 'charge': -0.0020089927963956975}, + {'name': 'HAL', 'charge': 0.1571150829116664}, + {'name': 'HAM', 'charge': 0.1571150829116664}, + {'name': 'OAG', 'charge': -0.6244423460538743}, + {'name': 'CAF', 'charge': -0.002008992796395719}, + {'name': 'HAH', 'charge': 0.1571150829116664}, + {'name': 'HAI', 'charge': 0.1571150829116664}, + {'name': 'CAE', 'charge': -0.0020089927963956975}, + {'name': 'HAF', 'charge': 0.1571150829116664}, + {'name': 'HAG', 'charge': 0.1571150829116664}, + {'name': 'OAD', 'charge': -0.6244423460538743}, + {'name': 'CAC', 'charge': -0.002008992796395719}, + {'name': 'HAE', 'charge': 0.1571150829116664}, + {'name': 'HAD', 'charge': 0.1571150829116664}], + "cyclohexane.mol2": [ + {'name': 'CAA', 'charge': -0.10295775116564801}, + {'name': 'HAA', 'charge': 0.051478875582824005}, + {'name': 'HAB', 'charge': 0.051478875582824005}, + {'name': 'CAB', 'charge': -0.10295775116564801}, + {'name': 'HAC', 'charge': 0.051478875582824005}, + {'name': 'HAD', 'charge': 0.051478875582824005}, + {'name': 'CAC', 'charge': -0.10295775116564801}, + {'name': 'HAE', 'charge': 0.051478875582824005}, + {'name': 'HAF', 'charge': 0.051478875582824005}, + {'name': 'CAF', 'charge': -0.10295775116564801}, + {'name': 'HAK', 'charge': 0.051478875582824005}, + {'name': 'HAL', 'charge': 0.051478875582824005}, + {'name': 'CAE', 'charge': -0.10295775116564801}, + {'name': 'HAI', 'charge': 0.051478875582824005}, + {'name': 'HAJ', 'charge': 0.051478875582824005}, + {'name': 'CAD', 'charge': -0.10295775116564801}, + {'name': 'HAH', 'charge': 0.051478875582824005}, + {'name': 'HAG', 'charge': 0.051478875582824005}], + "ethanol.mol2": [ + {'name': 'CAA', 'charge': -0.09830853015557071}, + {'name': 'HAA', 'charge': 0.04965533009730527}, + {'name': 'HAB', 'charge': 0.04965533009730527}, + {'name': 'HAC', 'charge': 0.04965533009730527}, + {'name': 'CAB', 'charge': -0.05333201263132795}, + {'name': 'HAD', 'charge': 0.1136006498229285}, + {'name': 'HAE', 'charge': 0.1136006498229285}, + {'name': 'OAC', 'charge': -0.6256712436819194}, + {'name': 'HAF', 'charge': 0.4011444965310452}], + "fatty-acid.mol2": [ + {'name': 'OAA', 'charge': -0.6905415761361866}, + {'name': 'CAB', 'charge': 0.09491530638614049}, + {'name': 'OAL', 'charge': -0.6905415761361866}, + {'name': 'CAC', 'charge': 0.012573264939255872}, + {'name': 'HAC', 'charge': 0.11848768797200306}, + {'name': 'HAD', 'charge': 0.11848768797200306}, + {'name': 'CAD', 'charge': -0.07742980670292797}, + {'name': 'HAE', 'charge': 0.06140059278092227}, + {'name': 'HAF', 'charge': 0.06140059278092227}, + {'name': 'CAE', 'charge': -0.076959643935905}, + {'name': 'HAG', 'charge': 0.07285165771612633}, + {'name': 'HAH', 'charge': 0.07285165771612633}, + {'name': 'CAF', 'charge': -0.24236491728150775}, + {'name': 'HAI', 'charge': 0.16485599597989423}, + {'name': 'CAG', 'charge': -0.2426026937449157}, + {'name': 'HAJ', 'charge': 0.1648388876026803}, + {'name': 'CAH', 'charge': -0.07965667787077134}, + {'name': 'HAK', 'charge': 0.07232360507910963}, + {'name': 'HAL', 'charge': 0.07232360507910963}, + {'name': 'CAI', 'charge': -0.09862724378017657}, + {'name': 'HAM', 'charge': 0.05403847507524275}, + {'name': 'HAN', 'charge': 0.05403847507524275}, + {'name': 'CAJ', 'charge': -0.10608184708836539}, + {'name': 'HAO', 'charge': 0.04966180988232633}, + {'name': 'HAP', 'charge': 0.04966180988232633}, + {'name': 'CAK', 'charge': -0.11173869603286389}, + {'name': 'HAR', 'charge': 0.04061118893012504}, + {'name': 'HAS', 'charge': 0.04061118893012504}, + {'name': 'HAQ', 'charge': 0.04061118893012504}], + "glycerol.mol2": [ + {'name': 'OAB', 'charge': -0.6193219106971865}, + {'name': 'HAG', 'charge': 0.4033320347133161}, + {'name': 'CAA', 'charge': -0.025227212803925057}, + {'name': 'HAA', 'charge': 0.1316040991547254}, + {'name': 'HAB', 'charge': 0.1316040991547254}, + {'name': 'CAC', 'charge': -0.00300604112002907}, + {'name': 'HAC', 'charge': 0.16288785945019288}, + {'name': 'OAD', 'charge': -0.6115908338990877}, + {'name': 'HAD', 'charge': 0.4077267965256133}, + {'name': 'CAE', 'charge': -0.025227212803925054}, + {'name': 'HAE', 'charge': 0.1316040991547254}, + {'name': 'HAF', 'charge': 0.1316040991547254}, + {'name': 'OAF', 'charge': -0.6193219106971865}, + {'name': 'HAH', 'charge': 0.4033320347133161}], + "naphthalene.mol2": [ + {'name': 'CAB', 'charge': -0.10985826436427909}, + {'name': 'HAB', 'charge': 0.13714843913419308}, + {'name': 'CAC', 'charge': -0.06352790423000582}, + {'name': 'CAD', 'charge': -0.10985826436427909}, + {'name': 'HAD', 'charge': 0.13714843913419308}, + {'name': 'CAE', 'charge': -0.12522022371734542}, + {'name': 'HAE', 'charge': 0.1296940010624343}, + {'name': 'CAJ', 'charge': -0.12522022371734542}, + {'name': 'HAJ', 'charge': 0.1296940010624343}, + {'name': 'CAI', 'charge': -0.10985826436427909}, + {'name': 'HAI', 'charge': 0.13714843913419308}, + {'name': 'CAH', 'charge': -0.06352790423000582}, + {'name': 'CAG', 'charge': -0.10985826436427909}, + {'name': 'HAG', 'charge': 0.13714843913419308}, + {'name': 'CAF', 'charge': -0.12522022371734542}, + {'name': 'HAF', 'charge': 0.1296940010624343}, + {'name': 'CAA', 'charge': -0.12522022371734542}, + {'name': 'HAA', 'charge': 0.1296940010624343}], + "pyrrole.mol2": [ + {'name': 'CAE', 'charge': -0.02725132001441914}, + {'name': 'HAE', 'charge': 0.104834463409739}, + {'name': 'HAF', 'charge': 0.104834463409739}, + {'name': 'CAD', 'charge': -0.19753848158167037}, + {'name': 'HAD', 'charge': 0.18534981863154645}, + {'name': 'CAA', 'charge': -0.1112561716335608}, + {'name': 'HAA', 'charge': 0.2699321111667989}, + {'name': 'NAB', 'charge': -0.44475473234270757}, + {'name': 'CAC', 'charge': -0.13358085588726296}, + {'name': 'HAC', 'charge': 0.24943070484179752}], + "tetramethylammonium.mol2": [ + {'name': 'CAA', 'charge': 0.013840200623221964}, + {'name': 'HAA', 'charge': 0.12997929051575582}, + {'name': 'HAB', 'charge': 0.12997929051575582}, + {'name': 'HAC', 'charge': 0.12997929051575582}, + {'name': 'NAC', 'charge': -0.6151122886819584}, + {'name': 'CAB', 'charge': 0.013840200623221978}, + {'name': 'HAE', 'charge': 0.12997929051575582}, + {'name': 'HAF', 'charge': 0.12997929051575582}, + {'name': 'HAD', 'charge': 0.12997929051575582}, + {'name': 'CAE', 'charge': 0.013840200623221978}, + {'name': 'HAK', 'charge': 0.12997929051575582}, + {'name': 'HAL', 'charge': 0.12997929051575582}, + {'name': 'HAJ', 'charge': 0.12997929051575582}, + {'name': 'CAD', 'charge': 0.013840200623221978}, + {'name': 'HAH', 'charge': 0.12997929051575582}, + {'name': 'HAI', 'charge': 0.12997929051575582}, + {'name': 'HAG', 'charge': 0.12997929051575582}] } diff --git a/pdb2pqr/tests/ligand_test.py b/pdb2pqr/tests/ligand_test.py index 0df495f11..a17814664 100644 --- a/pdb2pqr/tests/ligand_test.py +++ b/pdb2pqr/tests/ligand_test.py @@ -3,10 +3,12 @@ import logging from math import isclose from pathlib import Path import pytest +import pandas as pd +from numpy.testing import assert_almost_equal from pdb2pqr.ligand import parameterize import common from ligand_results import TORSION_RESULTS, RING_RESULTS, BOND_RESULTS -from ligand_results import FORMAL_CHARGE_RESULTS +from ligand_results import FORMAL_CHARGE_RESULTS, PARTIAL_CHARGE_RESULTS _LOGGER = logging.getLogger(__name__) @@ -24,18 +26,38 @@ ALL_LIGANDS = sorted(list(ALL_LIGANDS)) @pytest.mark.parametrize("input_mol2", ALL_LIGANDS) def test_parameterization(input_mol2): """Testing basic aspects of code breaking.""" - _LOGGER.warning("Ideally, this would be a regression test.") ligand = parameterize.ParameterizedMolecule() mol2_path = Path("tests/data") / input_mol2 with open(mol2_path, "rt") as mol2_file: ligand.read(mol2_file) - for atom in ligand.atoms.values(): - atom.charge = atom.formal_charge - atom.old_charge = atom.charge - ligand.update(ligand) - for atom in ligand.atoms.values(): - fmt = "{a!s} -- {a.old_charge:5.2f} -> {a.charge:5.2f}" - _LOGGER.info(fmt.format(a=atom)) + old_total_charge = 0 + for atom in ligand.atoms.values(): + atom.charge = atom.formal_charge + old_total_charge += atom.charge + atom.old_charge = atom.charge + ligand.update(ligand) + new_total_charge = 0 + test_results = [] + for atom in ligand.atoms.values(): + test_row = { + "name": atom.name, "charge": atom.charge} + test_results.append(test_row) + new_total_charge += atom.charge + _LOGGER.info("Test results: %s", test_results) + test_results = pd.DataFrame(test_results) + test_results = test_results.set_index("name") + _LOGGER.debug("Test results:\n%s", test_results) + _LOGGER.info( + "Total charge: %5.2f -> %5.2f", old_total_charge, new_total_charge) + expected_results = pd.DataFrame(PARTIAL_CHARGE_RESULTS[input_mol2]) + expected_results = expected_results.set_index("name") + diff_results = test_results - expected_results + _LOGGER.debug( + "Difference between test and expected results:\n%s", + diff_results.to_string()) + assert_almost_equal( + test_results["charge"].to_numpy(), + expected_results["charge"].to_numpy()) @pytest.mark.parametrize("input_mol2", ALL_LIGANDS) From bc8a4158f1c87457b629d2a336d7be8dd33a3324 Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Sun, 28 Jun 2020 10:15:38 -0700 Subject: [PATCH 23/31] Fix torsion testing. Expanded test set and removed torsions involving hydrogen. --- pdb2pqr/tests/ligand_results.py | 33 +++++++++++++++++++++++---------- pdb2pqr/tests/ligand_test.py | 26 ++++++++++++++++++++------ 2 files changed, 43 insertions(+), 16 deletions(-) diff --git a/pdb2pqr/tests/ligand_results.py b/pdb2pqr/tests/ligand_results.py index ef577e770..581f4d407 100644 --- a/pdb2pqr/tests/ligand_results.py +++ b/pdb2pqr/tests/ligand_results.py @@ -1,16 +1,26 @@ """Expected results for ligand tests""" TORSION_RESULTS = { - "ethanol.mol2": { - ('CAA', 'CAB', 'OAC', 'HAF'), ('HAF', 'OAC', 'CAB', 'CAA')}, + "acetylcholine.mol2": { + ('CAA', 'NAD', 'CAE', 'CAF'), ('CAC', 'NAD', 'CAE', 'CAF'), + ('CAE', 'CAF', 'OAG', 'CAH'), ('CAF', 'CAE', 'NAD', 'CAA'), + ('CAF', 'CAE', 'NAD', 'CAC'), ('CAF', 'CAE', 'NAD', 'CAJ'), + ('CAF', 'OAG', 'CAH', 'CAI'), ('CAF', 'OAG', 'CAH', 'OAB'), + ('CAH', 'OAG', 'CAF', 'CAE'), ('CAI', 'CAH', 'OAG', 'CAF'), + ('CAJ', 'NAD', 'CAE', 'CAF'), ('NAD', 'CAE', 'CAF', 'OAG'), + ('OAB', 'CAH', 'OAG', 'CAF'), ('OAG', 'CAF', 'CAE', 'NAD')}, + "ethanol.mol2": set(), + "pyrrole.mol2": { + ('CAA', 'CAD', 'CAE', 'CAC'), ('CAA', 'NAB', 'CAC', 'CAE'), + ('CAC', 'CAE', 'CAD', 'CAA'), ('CAC', 'NAB', 'CAA', 'CAD'), + ('CAD', 'CAA', 'NAB', 'CAC'), ('CAD', 'CAE', 'CAC', 'NAB'), + ('CAE', 'CAC', 'NAB', 'CAA'), ('CAE', 'CAD', 'CAA', 'NAB'), + ('NAB', 'CAA', 'CAD', 'CAE'), ('NAB', 'CAC', 'CAE', 'CAD')}, + "tetramethylammonium.mol2": set(), "glycerol.mol2": { - ('CAA', 'CAB', 'CAC', 'OAF'), ('CAA', 'CAB', 'OAE', 'HAG'), - ('CAB', 'CAA', 'OAD', 'HAF'), ('CAB', 'CAC', 'OAF', 'HAH'), - ('CAC', 'CAB', 'CAA', 'OAD'), ('CAC', 'CAB', 'OAE', 'HAG'), - ('HAF', 'OAD', 'CAA', 'CAB'), ('HAG', 'OAE', 'CAB', 'CAA'), - ('HAG', 'OAE', 'CAB', 'CAC'), ('HAH', 'OAF', 'CAC', 'CAB'), - ('OAD', 'CAA', 'CAB', 'CAC'), ('OAD', 'CAA', 'CAB', 'OAE'), - ('OAE', 'CAB', 'CAA', 'OAD'), ('OAE', 'CAB', 'CAC', 'OAF'), - ('OAF', 'CAC', 'CAB', 'CAA'), ('OAF', 'CAC', 'CAB', 'OAE')}, + ('CAA', 'CAC', 'CAE', 'OAF'), ('CAE', 'CAC', 'CAA', 'OAB'), + ('OAB', 'CAA', 'CAC', 'CAE'), ('OAB', 'CAA', 'CAC', 'OAD'), + ('OAD', 'CAC', 'CAA', 'OAB'), ('OAD', 'CAC', 'CAE', 'OAF'), + ('OAF', 'CAE', 'CAC', 'CAA'), ('OAF', 'CAE', 'CAC', 'OAD')}, "cyclohexane.mol2": { ('CAA', 'CAB', 'CAC', 'CAF'), ('CAA', 'CAD', 'CAE', 'CAF'), ('CAB', 'CAA', 'CAD', 'CAE'), ('CAB', 'CAC', 'CAF', 'CAE'), @@ -24,6 +34,9 @@ TORSION_RESULTS = { RING_RESULTS = { "ethanol.mol2": set(), "glycerol.mol2": set(), + "pyrrole.mol2": set(), + "tetramethylammonium.mol2": set(), + "naphthalene.mol2": set(), "cyclohexane.mol2": {('CAA', 'CAD', 'CAE', 'CAF', 'CAC', 'CAB')}, "naphthalene.mol2": { ('CAA', 'CAB', 'CAC', 'CAH', 'CAG', 'CAF'), diff --git a/pdb2pqr/tests/ligand_test.py b/pdb2pqr/tests/ligand_test.py index a17814664..2a1f0b061 100644 --- a/pdb2pqr/tests/ligand_test.py +++ b/pdb2pqr/tests/ligand_test.py @@ -43,7 +43,7 @@ def test_parameterization(input_mol2): "name": atom.name, "charge": atom.charge} test_results.append(test_row) new_total_charge += atom.charge - _LOGGER.info("Test results: %s", test_results) + _LOGGER.debug("Test results: %s", test_results) test_results = pd.DataFrame(test_results) test_results = test_results.set_index("name") _LOGGER.debug("Test results:\n%s", test_results) @@ -88,19 +88,33 @@ def test_formal_charge(input_mol2): raise ValueError(err) -@pytest.mark.parametrize("input_mol2", ALL_LIGANDS) +@pytest.mark.parametrize("input_mol2", TORSION_RESULTS) def test_torsions(input_mol2): """Test assignment of torsion angles.""" ligand = parameterize.ParameterizedMolecule() mol2_path = Path("tests/data") / input_mol2 with open(mol2_path, "rt") as mol2_file: ligand.read(mol2_file) + test = set() + # Only test heavy-atom torsions + for torsion in ligand.torsions: + has_hydrogen = False + for atom in torsion: + if atom.startswith("H"): + has_hydrogen = True + if not has_hydrogen: + test.add(torsion) try: - benchmark = TORSION_RESULTS[input_mol2] - diff = ligand.torsions ^ benchmark + expected = TORSION_RESULTS[input_mol2] + diff = test ^ expected if len(diff) > 0: - err = "Torsion test failed for %s: %s" % ( - input_mol2, sorted(list(diff))) + err = ( + "Torsion test failed for {mol}:\n" + "Got: {test}\n" + "Expected: {expected}\n" + "Difference: {diff}").format( + mol=input_mol2, test=sorted(list(test)), + expected=sorted(list(expected)), diff=sorted(list(diff))) raise ValueError(err) except KeyError: err = "No results for %s: %s", input_mol2, sorted( From 3f8db56c3e40bee4cfe0a0e0e0daeddb4abe75e4 Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Sun, 28 Jun 2020 10:36:09 -0700 Subject: [PATCH 24/31] Expand ring test set. --- pdb2pqr/tests/ligand_results.py | 33 +++++++++++++++++++++++++-------- pdb2pqr/tests/ligand_test.py | 20 ++++++++++++++++---- 2 files changed, 41 insertions(+), 12 deletions(-) diff --git a/pdb2pqr/tests/ligand_results.py b/pdb2pqr/tests/ligand_results.py index 581f4d407..528f5c56d 100644 --- a/pdb2pqr/tests/ligand_results.py +++ b/pdb2pqr/tests/ligand_results.py @@ -34,21 +34,38 @@ TORSION_RESULTS = { RING_RESULTS = { "ethanol.mol2": set(), "glycerol.mol2": set(), - "pyrrole.mol2": set(), "tetramethylammonium.mol2": set(), "naphthalene.mol2": set(), + "acetate.mol2": set(), + "acetonitrile.mol2": set(), + "acetylcholine.mol2": set(), + "fatty-acid.mol2": set(), + "pyrrole.mol2": {('CAA', 'CAD', 'CAE', 'CAC', 'NAB')}, + "adp.mol2": { + ('C4', 'C5', 'N7', 'C8', 'N9'), ('C2', 'N1', 'C6', 'C5', 'C4', 'N3'), + ("C1'", "O4'", "C4'", "C3'", "C2'")}, "cyclohexane.mol2": {('CAA', 'CAD', 'CAE', 'CAF', 'CAC', 'CAB')}, "naphthalene.mol2": { - ('CAA', 'CAB', 'CAC', 'CAH', 'CAG', 'CAF'), + ('CAA', 'CAF', 'CAG', 'CAH', 'CAC', 'CAB'), ('CAC', 'CAH', 'CAI', 'CAJ', 'CAE', 'CAD')}, "anthracene.mol2": { + ('CAA', 'CAH', 'CAI', 'CAJ', 'CAC', 'CAB'), ('CAC', 'CAJ', 'CAK', 'CAL', 'CAE', 'CAD'), - ('CAE', 'CAL', 'CAM', 'CAN', 'CAG', 'CAF'), - ('CAA', 'CAB', 'CAC', 'CAJ', 'CAI', 'CAH')}, - "crown-ether.mol2": { - ('CAA', 'CAE', 'CAF', 'CAG', 'CAC', 'CAB'), - ('CAD', 'CAI', 'CAJ', 'CAK', 'CAF', 'CAE'), - ('CAF', 'CAG', 'CAH', 'CAM', 'CAL', 'CAK')} + ('CAE', 'CAL', 'CAM', 'CAN', 'CAG', 'CAF')}, + "1HPX-ligand.mol2": { + ('C1', 'C2', 'C3', 'C4', 'C5', 'N1'), + ('C18', 'N4', 'C20', 'S2', 'C19'), + ('C3', 'C6', 'C7', 'C8', 'C9', 'C4'), + ('C28', 'C33', 'C32', 'C31', 'C30', 'C29')}, + "1QBS-ligand.mol2": { + ('C1', 'N7', 'C6', 'C5', 'C4', 'C3', 'N2'), + ('C31', 'C36', 'C35', 'C34', 'C33', 'C32'), + ('C61', 'C66', 'C65', 'C64', 'C63', 'C62'), + ('C71', 'C76', 'C75', 'C74', 'C73', 'C72'), + ('C21', 'C26', 'C25', 'C24', 'C23', 'C22')}, + "1US0-ligand.mol2": { + ('C24', 'C27', 'C28', 'C25', 'C29', 'C26'), + ('C2', 'C4', 'C7', 'C3', 'C6', 'C5')} } diff --git a/pdb2pqr/tests/ligand_test.py b/pdb2pqr/tests/ligand_test.py index 2a1f0b061..0cd617a24 100644 --- a/pdb2pqr/tests/ligand_test.py +++ b/pdb2pqr/tests/ligand_test.py @@ -129,11 +129,23 @@ def test_rings(input_mol2): mol2_path = Path("tests/data") / input_mol2 with open(mol2_path, "rt") as mol2_file: ligand.read(mol2_file) - benchmark = RING_RESULTS[input_mol2] - diff = ligand.rings ^ benchmark + test = ligand.rings + try: + benchmark = RING_RESULTS[input_mol2] + except KeyError: + err = "Missing expected results for %s: %s" % ( + input_mol2, test) + raise KeyError(err) + diff = test ^ benchmark if len(diff) > 0: - err = "Ring test failed for %s: %s" % ( - input_mol2, sorted(list(diff))) + err = ( + "Ring test failed for {mol}:\n" + "Got: {test}\n" + "Expected: {expected}\n" + "Difference: {diff}").format( + mol=input_mol2, test=sorted(list(test)), + expected=sorted(list(benchmark)), + diff=sorted(list(diff))) raise ValueError(err) for atom_name in ligand.atoms: atom = ligand.atoms[atom_name] From 4cb18b166f0ac3ea04c476c879d7d44e15db2145 Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Sun, 28 Jun 2020 10:39:50 -0700 Subject: [PATCH 25/31] Remove bond-order guessing. Remove guess_bond_order function and associated tests. Bond orders are already specified in the MOL2 file and the existing code wasn't very good at guessing them. --- pdb2pqr/pdb2pqr/ligand/__init__.py | 19 ------------------- pdb2pqr/pdb2pqr/ligand/mol2.py | 25 +------------------------ pdb2pqr/tests/ligand_results.py | 21 --------------------- pdb2pqr/tests/ligand_test.py | 24 ++---------------------- 4 files changed, 3 insertions(+), 86 deletions(-) diff --git a/pdb2pqr/pdb2pqr/ligand/__init__.py b/pdb2pqr/pdb2pqr/ligand/__init__.py index 2d787f83f..3614edc98 100644 --- a/pdb2pqr/pdb2pqr/ligand/__init__.py +++ b/pdb2pqr/pdb2pqr/ligand/__init__.py @@ -24,25 +24,6 @@ PARSE_RADII = { "F": 1.20, "P": 1.90, "Cl": 1.75} -# TODO - this belongs in a configuration file somewhere other than here. -# Bond lengths from -# http://www.chem.swin.edu.au/modules/mod2/bondlen.html -# We should get a better reference -_BOND_LENGTH_DICTS = [ - {"atom1": 'C', "atom2": 'C', "length": 1.54, "type": "single"}, - {"atom1": 'C', "atom2": 'C', "length": 1.34, "type": "double"}, - {"atom1": 'C', "atom2": 'C', "length": 1.20, "type": "triple"}, - {"atom1": 'C', "atom2": 'C', "length": 1.40, "type": "aromatic"}, - {"atom1": 'C', "atom2": 'O', "length": 1.43, "type": "single"}, - {"atom1": 'C', "atom2": 'O', "length": 1.21, "type": "double"}, - {"atom1": 'C', "atom2": 'N', "length": 1.47, "type": "single"}, - {"atom1": 'C', "atom2": 'N', "length": 1.25, "type": "double"}, - {"atom1": 'C', "atom2": 'N', "length": 1.16, "type": "triple"}, - {"atom1": 'C', "atom2": 'N', "length": 1.34, "type": "aromatic"}, - {"atom1": 'N', "atom2": 'N', "length": 1.35, "type": "aromatic"} -] -BOND_LENGTHS = pandas.DataFrame(_BOND_LENGTH_DICTS) - # Numbers of valence electrons for the groups of the periodic table VALENCE_BY_GROUP = {1: 1, 2: 2, 13: 3, 14: 4, 15: 5, 16: 6, 17: 7, 18: 8} # Groups of the periodic table diff --git a/pdb2pqr/pdb2pqr/ligand/mol2.py b/pdb2pqr/pdb2pqr/ligand/mol2.py index 9559f8ad4..d42222106 100644 --- a/pdb2pqr/pdb2pqr/ligand/mol2.py +++ b/pdb2pqr/pdb2pqr/ligand/mol2.py @@ -8,7 +8,7 @@ from collections import OrderedDict from itertools import combinations from numpy import array from numpy.linalg import norm -from . import BOND_LENGTHS, VALENCE_BY_ELEMENT, NONBONDED_BY_TYPE +from . import VALENCE_BY_ELEMENT, NONBONDED_BY_TYPE _LOGGER = logging.getLogger(__name__) @@ -53,29 +53,6 @@ class Mol2Bond: fmt = "{b.atoms[0].name:s} {b.type:s}-bonded to {b.atoms[1].name:s}" return fmt.format(b=self) - @property - def guess_bond_order(self): - """Attempt to determine the order of this bond. - - Return: - string with order of bond or None - """ - _LOGGER.warning("Ignoring bond type: %s", self.type) - type1 = self.atoms[0].type.split(".")[0] - type2 = self.atoms[1].type.split(".")[0] - types = sorted(type1, type2) - bond_lengths = BOND_LENGTHS.loc[ - (BOND_LENGTHS["atom1"] == types[0]) - & (BOND_LENGTHS["atom2"] == types[1])] - best_type = None - best_fit = BOND_DIST - for _, row in bond_lengths.iterrows(): - if abs(self.length - row["length"]) < best_fit: - best_fit = abs(self.length - row["length"]) - best_type = row["type"] - return best_type - - class Mol2Atom: """MOL2 molecule atoms.""" def __init__(self): diff --git a/pdb2pqr/tests/ligand_results.py b/pdb2pqr/tests/ligand_results.py index 528f5c56d..45f9004d8 100644 --- a/pdb2pqr/tests/ligand_results.py +++ b/pdb2pqr/tests/ligand_results.py @@ -69,27 +69,6 @@ RING_RESULTS = { } -BOND_RESULTS = { - "cyclohexane.mol2": 6 * ["single"], - "ethanol.mol2": [ - "single", "single", None, "single", "single"], - "glycerol.mol2": [ - None, "single", "single", "single", "single", None, "single", None], - "acetylcholine.mol2": [ - "single", "double", "single", "single", "single", "single", "single", - "single", "single"], - "acetonitrile.mol2": [ - "triple", "single"], - "pyrrole.mol2": [ - "aromatic", "aromatic", "aromatic", "aromatic", "aromatic", None], - "fatty-acid.mol2": [ - "double", "double", "single", "single", "single", "single", "double", - "single", "single", "single", "single"], - "tetramethylammonium.mol2": ["single", None, "single", "single"], - "naphthalene.mol2": 11 * ["aromatic"] -} - - FORMAL_CHARGE_RESULTS = { "1HPX-ligand.mol2": 87*[0], "1QBS-ligand.mol2": 80*[0], diff --git a/pdb2pqr/tests/ligand_test.py b/pdb2pqr/tests/ligand_test.py index 0cd617a24..83291a06a 100644 --- a/pdb2pqr/tests/ligand_test.py +++ b/pdb2pqr/tests/ligand_test.py @@ -7,7 +7,7 @@ import pandas as pd from numpy.testing import assert_almost_equal from pdb2pqr.ligand import parameterize import common -from ligand_results import TORSION_RESULTS, RING_RESULTS, BOND_RESULTS +from ligand_results import TORSION_RESULTS, RING_RESULTS from ligand_results import FORMAL_CHARGE_RESULTS, PARTIAL_CHARGE_RESULTS @@ -16,7 +16,7 @@ _LOGGER.warning("Need functional and regression test coverage for --ligand") _LOGGER.error("Still haven't figured out radii") -ALL_LIGANDS = set(TORSION_RESULTS) | set(BOND_RESULTS) | set(RING_RESULTS) +ALL_LIGANDS = set(TORSION_RESULTS) | set(RING_RESULTS) ALL_LIGANDS |= { "1HPX-ligand.mol2", "1QBS-ligand.mol2", "1US0-ligand.mol2", "adp.mol2", "acetate.mol2"} @@ -154,26 +154,6 @@ def test_rings(input_mol2): _LOGGER.debug(str_) -@pytest.mark.parametrize("input_mol2", ALL_LIGANDS) -def test_bonds(input_mol2): - """Test detection of bond types.""" - ligand = parameterize.ParameterizedMolecule() - mol2_path = Path("tests/data") / input_mol2 - with open(mol2_path, "rt") as mol2_file: - ligand.read(mol2_file) - results = BOND_RESULTS[input_mol2] - for ibond, bond in enumerate(ligand.bonds): - try: - if bond.bond_order != results[ibond]: - err = "Incorrect order for %s. Got %s, expected %s" % ( - str(bond), bond.bond_order, results[ibond]) - raise ValueError(err) - except IndexError: - err = "Add test for %s -- %s (%s)" % ( - input_mol2, str(bond), bond.bond_order) - raise IndexError(err) - - @pytest.mark.parametrize("input_pdb", ["1HPX", "1QBS", "1US0"], ids=str) def test_ligand_protein(input_pdb, tmp_path): """PROPKA non-regression tests on proteins without ligands.""" From 405a8596a159de7b2e6b4db21963eee7a581b4ae Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Sun, 28 Jun 2020 15:45:03 -0700 Subject: [PATCH 26/31] Combine parameterization functions and tests. --- pdb2pqr/pdb2pqr/ligand/__init__.py | 47 +- pdb2pqr/pdb2pqr/ligand/mol2.py | 63 ++ pdb2pqr/pdb2pqr/ligand/parameterize.py | 52 -- pdb2pqr/pdb2pqr/main.py | 4 +- pdb2pqr/tests/ligand_results.py | 886 ++++++++++++------------- pdb2pqr/tests/ligand_test.py | 36 +- 6 files changed, 538 insertions(+), 550 deletions(-) delete mode 100644 pdb2pqr/pdb2pqr/ligand/parameterize.py diff --git a/pdb2pqr/pdb2pqr/ligand/__init__.py b/pdb2pqr/pdb2pqr/ligand/__init__.py index 3614edc98..50ae58a92 100644 --- a/pdb2pqr/pdb2pqr/ligand/__init__.py +++ b/pdb2pqr/pdb2pqr/ligand/__init__.py @@ -10,19 +10,42 @@ assert sys.version_info >= (3, 5) # TODO - this belongs in a configuration file somewhere other than here. -# PARSE radii data for C, N, O, S, H, Br, F, P are from Sitkoff et al's paper: -# -# Sitkoff D, Sharp KA, Honig B. Accurate Calculation of Hydration Free -# Energies Using Macroscopic Solvent Models. J Phys Chem 98 (7) 1978-88, -# 1994. J. Phys. Chem. 1994, 98, 7, 1978–1988 # -# See also the AMBER mailing list: http://amber.ch.ic.ac.uk/archive/. -# -# The van der Waals radius is used for chlorine. -PARSE_RADII = { - "C": 1.70, "N": 1.50, "O": 1.40, "S": 1.85, "H": 1.00, "Br":2.50, - "F": 1.20, "P": 1.90, "Cl": 1.75} - +# When using these tables, the most specific Sybyl atom type should be used +# first and then the generic element should be used +RADII = { + # NOTE - these are not the original PARSE radii but they are the ones + # included in the previous version of PDB2PKA so I'm preserving them for + # posterity. There's a claim they came from + # http://amber.ch.ic.ac.uk/archive/ but that link no longer works. + "not parse - do not use": { + "C": 1.70, "N": 1.50, "O": 1.40, "S": 1.85, "H": 1.00, "Br": 2.50, + "F": 1.20, "P": 1.90, "Cl": 1.75}, + # These are the PARSE radii from Table 4 of + # http://doi.org/10.1021/j100058a043 + "parse": { + "C.1": 2.00, "C.2": 2.00, "C.3": 2.00, "C": 1.70, "H": 1.00, + "O": 1.40, "N": 1.50, "S": 1.85}, + # These are the ZAP radii from Table 2 of + # http://doi.org/10.1021/jm070549%2B. Bondi radii should be used for + # atoms not found in this table. + "zap9": { + "C": 1.87, "H": 1.10, "O.co2": 1.76, "N": 1.40, "S": 2.15, "F": 2.40, + "Cl": 1.82, "I": 2.65}, + # These are the Bondi radii from Table 2 of + # http://doi.org/10.1021/jm070549%2B + "bondi-zap": { + "C": 1.7, "H": 1.20, "O.co2": 1.52, "N": 1.55, "S": 1.80, "F": 1.47, + "Cl": 1.75, "I": 1.98}, + # These are the Bondi radii from Table I of + # http://doi.org/10.1021/j100785a001. NOTE - there are some variations to + # the halogens in Table V that we might want to consider in the future. + "bondi": { + "H": 1.20, "He": 1.40, "C": 1.70, "N": 1.55, "O": 1.52, "F": 1.47, + "Ne": 1.54, "Si": 2.10, "P": 1.80, "S": 1.80, "Cl": 1.75, "Ar": 1.88, + "As": 1.85, "Se": 1.90, "Br": 1.85, "Kr": 2.02, "Te": 2.06, "I": 1.98, + "Xe": 2.16} +} # Numbers of valence electrons for the groups of the periodic table VALENCE_BY_GROUP = {1: 1, 2: 2, 13: 3, 14: 4, 15: 5, 16: 6, 17: 7, 18: 8} diff --git a/pdb2pqr/pdb2pqr/ligand/mol2.py b/pdb2pqr/pdb2pqr/ligand/mol2.py index d42222106..e2c4ad820 100644 --- a/pdb2pqr/pdb2pqr/ligand/mol2.py +++ b/pdb2pqr/pdb2pqr/ligand/mol2.py @@ -8,7 +8,9 @@ from collections import OrderedDict from itertools import combinations from numpy import array from numpy.linalg import norm +from . import peoe from . import VALENCE_BY_ELEMENT, NONBONDED_BY_TYPE +from . import RADII _LOGGER = logging.getLogger(__name__) @@ -105,6 +107,35 @@ class Mol2Atom: ) return pdb_fmt.format(a=self) + def assign_radius(self, primary_dict, secondary_dict): + """Assign radius to atom. + + TODO - it seems inconsistent that this function pulls radii from a + dictionary and the protein routines use force field files. + + Args: + primary_dict: primary dictionary of radii indexed by atom type or + element + secondary_dict: backup dictionary for radii not found in primary + dictionary + """ + radius = None + for rdict in [primary_dict, secondary_dict]: + if radius is not None: + break + for key in [self.type, self.element]: + if key in rdict: + radius = rdict[key] + break + if radius is not None: + self.radius = radius + else: + err = ( + "Unable to find radius parameter for self of type {type} in " + "radius dictionary: {ff}").format( + type=self.type, ff=primary_dict) + raise KeyError(err) + @property def coords(self): """Return coordinates as numpy vector.""" @@ -219,6 +250,38 @@ class Mol2Molecule: self.res_name = None self.res_seq = None + def assign_parameters( + self, primary_dict=RADII["zap9"], secondary_dict=RADII["bondi"]): + """Assign charges and radii to atoms in molecule. + + Args: + primary_dict: primary dictionary of radii indexed by atom type or + element + secondary_dict: backup dictionary for radii not found in primary + dictionary + """ + self.assign_radii(primary_dict, secondary_dict) + self.assign_charges() + + def assign_radii( + self, primary_dict, secondary_dict): + """Assign radii to atoms in molecule. + + Args: + primary_dict: primary dictionary of radii indexed by atom type or + element + secondary_dict: backup dictionary for radii not found in primary + dictionary + """ + for atom in self.atoms.values(): + atom.assign_radius(primary_dict, secondary_dict) + + def assign_charges(self): + """Assign charges to atoms in molecule.""" + for atom in self.atoms.values(): + atom.charge = atom.formal_charge + peoe.equilibrate(self.atoms.values()) + def find_atom_torsions(self, start_atom): """Set the torsion angles that start with this atom (name). diff --git a/pdb2pqr/pdb2pqr/ligand/parameterize.py b/pdb2pqr/pdb2pqr/ligand/parameterize.py deleted file mode 100644 index 32e4af26f..000000000 --- a/pdb2pqr/pdb2pqr/ligand/parameterize.py +++ /dev/null @@ -1,52 +0,0 @@ -"""Calculating and assigning ligand charges and radii.""" -import logging -from .mol2 import Mol2Molecule -from .peoe import equilibrate -from . import PARSE_RADII -from ..forcefield import ForcefieldAtom -from ..forcefield import ForcefieldResidue - - -_LOGGER = logging.getLogger(__name__) - - -class ParameterizedMolecule(Mol2Molecule): - """Ligand with charge and radius assignments.""" - - def __init__(self): - super().__init__() - self.ligand_properties = {} - - def update(self, ligand): - """Update self with latest version of ligand (if needed). - - Args: - ligand: latest version of ligand - """ - prev_atom_names = set(self.ligand_properties) - curr_atom_names = set(self.atoms) - if len(prev_atom_names ^ curr_atom_names) > 0: - self.reparameterize(ligand) - - def reparameterize(self, ligand): - """Reassign parameters given new ligand. - - Args: - ligand: latest version of ligand - """ - self.ligand_properties = {} - for atom in ligand.atoms.values(): - atom.charge = atom.formal_charge - equilibrate(ligand.atoms.values()) - for atom in ligand.atoms.values(): - elem = atom.element.capitalize() - charge = atom.charge - try: - radius = PARSE_RADII[elem] - atom.radius = radius - except KeyError: - raise KeyError( - "Unable to assign radius for element %s in atom %s" % ( - elem, atom)) - self.ligand_properties[atom.name] = { - "charge": charge, "radius": radius} diff --git a/pdb2pqr/pdb2pqr/main.py b/pdb2pqr/pdb2pqr/main.py index 3f11df080..89752e3c8 100644 --- a/pdb2pqr/pdb2pqr/main.py +++ b/pdb2pqr/pdb2pqr/main.py @@ -20,7 +20,7 @@ from . import hydrogens from . import forcefield from . import protein as prot from . import input_output as io -from .ligand.parameterize import ParameterizedMolecule +from .ligand.mol2 import Mol2Molecule from . import input_output as io from .config import VERSION, TITLE_FORMAT_STRING, CITATIONS, FORCE_FIELDS from .config import REPAIR_LIMIT @@ -263,7 +263,7 @@ def setup_molecule(pdblist, definition, ligand_path): ligand: ligand object (may be None) """ if ligand_path is not None: - ligand = ParameterizedMolecule() + ligand = Mol2Molecule() with open(ligand_path, "rt", encoding="utf-8") as ligand_file: ligand.read(ligand_file) raise NotImplementedError("Where do initial ligand charges come from?") diff --git a/pdb2pqr/tests/ligand_results.py b/pdb2pqr/tests/ligand_results.py index 45f9004d8..dc6f7e316 100644 --- a/pdb2pqr/tests/ligand_results.py +++ b/pdb2pqr/tests/ligand_results.py @@ -88,483 +88,439 @@ FORMAL_CHARGE_RESULTS = { "tetramethylammonium.mol2": 4*[0] + [1] + 12*[0] } - -PARTIAL_CHARGE_RESULTS = { +PARAMETER_RESULTS = { "1HPX-ligand.mol2": [ - {'name': 'C1', 'charge': -0.043081773453678196}, - {'name': 'N1', 'charge': -0.35678069534508244}, - {'name': 'O1', 'charge': -0.5711988943947174}, - {'name': 'S1', 'charge': -0.36230725415046}, - {'name': 'C2', 'charge': -0.07546448002333427}, - {'name': 'N2', 'charge': -0.703336840791423}, - {'name': 'O2', 'charge': -0.5909676444057169}, - {'name': 'S2', 'charge': -0.31326568130416904}, - {'name': 'C3', 'charge': 0.012184036151037315}, - {'name': 'N3', 'charge': -0.7139711103762134}, - {'name': 'O3', 'charge': -0.2919660371248733}, - {'name': 'C4', 'charge': -0.024891112570323084}, - {'name': 'N4', 'charge': -0.7120419698971743}, - {'name': 'O4', 'charge': -0.2689251910425875}, - {'name': 'C5', 'charge': -0.02721768449096584}, - {'name': 'N5', 'charge': -0.7179288700589408}, - {'name': 'O5', 'charge': -0.2884010361503686}, - {'name': 'C6', 'charge': 0.0753432557874654}, - {'name': 'O6', 'charge': -0.2912545916584642}, - {'name': 'C7', 'charge': -0.06673754373256466}, - {'name': 'C8', 'charge': -0.11391059914410395}, - {'name': 'C9', 'charge': -0.10324637080138911}, - {'name': 'C10', 'charge': 0.0955779782243598}, - {'name': 'C11', 'charge': 0.13325937240601735}, - {'name': 'C12', 'charge': -0.007454479991213576}, - {'name': 'C13', 'charge': 0.1300074666489752}, - {'name': 'C14', 'charge': 0.03316079200653884}, - {'name': 'C15', 'charge': 0.1162788489650129}, - {'name': 'C16', 'charge': -0.05012048448800629}, - {'name': 'C17', 'charge': 0.17629988149001455}, - {'name': 'C18', 'charge': 0.1273124481017021}, - {'name': 'C19', 'charge': 0.009718364614545697}, - {'name': 'C20', 'charge': 0.05373224349483613}, - {'name': 'C21', 'charge': 0.14236781888812639}, - {'name': 'C22', 'charge': -0.018858356195459093}, - {'name': 'C23', 'charge': -0.08872387572707603}, - {'name': 'C24', 'charge': -0.08872387572707603}, - {'name': 'C25', 'charge': -0.08872387572707603}, - {'name': 'C26', 'charge': 0.10593153852032608}, - {'name': 'C27', 'charge': -0.05782750620937734}, - {'name': 'C28', 'charge': -0.09940049289330599}, - {'name': 'C29', 'charge': -0.11788247675515531}, - {'name': 'C30', 'charge': -0.12669321002628098}, - {'name': 'C31', 'charge': -0.12840215383108972}, - {'name': 'C32', 'charge': -0.12669321002628098}, - {'name': 'C33', 'charge': -0.11788247675515531}, - {'name': 'HN2', 'charge': 0.47365062709293504}, - {'name': 'HN3', 'charge': 0.46813799183459115}, - {'name': 'HN5', 'charge': 0.4699450251195792}, - {'name': 'HO2', 'charge': 0.4135083335659837}, - {'name': 'H1', 'charge': 0.1930651515377243}, - {'name': 'H2', 'charge': 0.14930498376978826}, - {'name': 'H5', 'charge': 0.2004887586723136}, - {'name': 'H7', 'charge': 0.1568207646303434}, - {'name': 'H8', 'charge': 0.13234810374569977}, - {'name': 'H9', 'charge': 0.13832988708343302}, - {'name': '1H10', 'charge': 0.2090610332096663}, - {'name': '2H10', 'charge': 0.2090610332096663}, - {'name': '1H12', 'charge': 0.12111462041395545}, - {'name': '2H12', 'charge': 0.12111462041395545}, - {'name': 'H14', 'charge': 0.1675599590015787}, - {'name': 'H15', 'charge': 0.22760838606623204}, - {'name': '1H16', 'charge': 0.0852867334715187}, - {'name': '2H16', 'charge': 0.0852867334715187}, - {'name': 'H18', 'charge': 0.24317031751195953}, - {'name': '1H19', 'charge': 0.12776560931027464}, - {'name': '2H19', 'charge': 0.12776560931027464}, - {'name': '1H20', 'charge': 0.19565330098691133}, - {'name': '2H20', 'charge': 0.19565330098691133}, - {'name': '1H23', 'charge': 0.05671902293773316}, - {'name': '2H23', 'charge': 0.05671902293773316}, - {'name': '3H23', 'charge': 0.05671902293773316}, - {'name': '1H24', 'charge': 0.05671902293773316}, - {'name': '2H24', 'charge': 0.05671902293773316}, - {'name': '3H24', 'charge': 0.05671902293773316}, - {'name': '1H25', 'charge': 0.05671902293773316}, - {'name': '2H25', 'charge': 0.05671902293773316}, - {'name': '3H25', 'charge': 0.05671902293773316}, - {'name': 'H26', 'charge': 0.21276929701184144}, - {'name': '1H27', 'charge': 0.0838296767957033}, - {'name': '2H27', 'charge': 0.0838296767957033}, - {'name': '3H27', 'charge': 0.0838296767957033}, - {'name': 'H29', 'charge': 0.1344214537548114}, - {'name': 'H30', 'charge': 0.12944135663694809}, - {'name': 'H31', 'charge': 0.12895177093126134}, - {'name': 'H32', 'charge': 0.12944135663694809}, - {'name': 'H33', 'charge': 0.1344214537548114}], + {'name': 'C1', 'type': 'C.ar', 'charge': -0.043081773453678196, 'radius': 1.87}, + {'name': 'N1', 'type': 'N.ar', 'charge': -0.35678069534508244, 'radius': 1.4}, + {'name': 'O1', 'type': 'O.3', 'charge': -0.5711988943947174, 'radius': 1.52}, + {'name': 'S1', 'type': 'S.3', 'charge': -0.36230725415046, 'radius': 2.15}, + {'name': 'C2', 'type': 'C.ar', 'charge': -0.07546448002333427, 'radius': 1.87}, + {'name': 'N2', 'type': 'N.am', 'charge': -0.703336840791423, 'radius': 1.4}, + {'name': 'O2', 'type': 'O.3', 'charge': -0.5909676444057169, 'radius': 1.52}, + {'name': 'S2', 'type': 'S.3', 'charge': -0.31326568130416904, 'radius': 2.15}, + {'name': 'C3', 'type': 'C.ar', 'charge': 0.012184036151037315, 'radius': 1.87}, + {'name': 'N3', 'type': 'N.am', 'charge': -0.7139711103762134, 'radius': 1.4}, + {'name': 'O3', 'type': 'O.2', 'charge': -0.2919660371248733, 'radius': 1.52}, + {'name': 'C4', 'type': 'C.ar', 'charge': -0.024891112570323084, 'radius': 1.87}, + {'name': 'N4', 'type': 'N.am', 'charge': -0.7120419698971743, 'radius': 1.4}, + {'name': 'O4', 'type': 'O.2', 'charge': -0.2689251910425875, 'radius': 1.52}, + {'name': 'C5', 'type': 'C.ar', 'charge': -0.02721768449096584, 'radius': 1.87}, + {'name': 'N5', 'type': 'N.am', 'charge': -0.7179288700589408, 'radius': 1.4}, + {'name': 'O5', 'type': 'O.2', 'charge': -0.2884010361503686, 'radius': 1.52}, + {'name': 'C6', 'type': 'C.ar', 'charge': 0.0753432557874654, 'radius': 1.87}, + {'name': 'O6', 'type': 'O.2', 'charge': -0.2912545916584642, 'radius': 1.52}, + {'name': 'C7', 'type': 'C.ar', 'charge': -0.06673754373256466, 'radius': 1.87}, + {'name': 'C8', 'type': 'C.ar', 'charge': -0.11391059914410395, 'radius': 1.87}, + {'name': 'C9', 'type': 'C.ar', 'charge': -0.10324637080138911, 'radius': 1.87}, + {'name': 'C10', 'type': 'C.3', 'charge': 0.0955779782243598, 'radius': 1.87}, + {'name': 'C11', 'type': 'C.2', 'charge': 0.13325937240601735, 'radius': 1.87}, + {'name': 'C12', 'type': 'C.3', 'charge': -0.007454479991213576, 'radius': 1.87}, + {'name': 'C13', 'type': 'C.2', 'charge': 0.1300074666489752, 'radius': 1.87}, + {'name': 'C14', 'type': 'C.3', 'charge': 0.03316079200653884, 'radius': 1.87}, + {'name': 'C15', 'type': 'C.3', 'charge': 0.1162788489650129, 'radius': 1.87}, + {'name': 'C16', 'type': 'C.3', 'charge': -0.05012048448800629, 'radius': 1.87}, + {'name': 'C17', 'type': 'C.2', 'charge': 0.17629988149001455, 'radius': 1.87}, + {'name': 'C18', 'type': 'C.3', 'charge': 0.1273124481017021, 'radius': 1.87}, + {'name': 'C19', 'type': 'C.3', 'charge': 0.009718364614545697, 'radius': 1.87}, + {'name': 'C20', 'type': 'C.3', 'charge': 0.05373224349483613, 'radius': 1.87}, + {'name': 'C21', 'type': 'C.2', 'charge': 0.14236781888812639, 'radius': 1.87}, + {'name': 'C22', 'type': 'C.3', 'charge': -0.018858356195459093, 'radius': 1.87}, + {'name': 'C23','type': 'C.3', 'charge': -0.08872387572707603, 'radius': 1.87}, + {'name': 'C24', 'type': 'C.3', 'charge': -0.08872387572707603, 'radius': 1.87}, + {'name': 'C25', 'type': 'C.3', 'charge': -0.08872387572707603, 'radius': 1.87}, + {'name': 'C26', 'type': 'C.3', 'charge': 0.10593153852032608, 'radius': 1.87}, + {'name': 'C27', 'type': 'C.3', 'charge': -0.05782750620937734, 'radius': 1.87}, + {'name': 'C28', 'type': 'C.ar', 'charge': -0.09940049289330599, 'radius': 1.87}, + {'name': 'C29', 'type': 'C.ar', 'charge': -0.11788247675515531, 'radius': 1.87}, + {'name': 'C30', 'type': 'C.ar', 'charge': -0.12669321002628098, 'radius': 1.87}, + {'name': 'C31', 'type': 'C.ar', 'charge': -0.12840215383108972, 'radius': 1.87}, + {'name': 'C32', 'type': 'C.ar', 'charge': -0.12669321002628098, 'radius': 1.87}, + {'name': 'C33', 'type': 'C.ar', 'charge': -0.11788247675515531, 'radius': 1.87}, + {'name': 'HN2', 'type': 'H', 'charge': 0.47365062709293504, 'radius': 1.1}, + {'name': 'HN3', 'type': 'H', 'charge': 0.46813799183459115, 'radius': 1.1}, + {'name': 'HN5', 'type': 'H', 'charge': 0.4699450251195792, 'radius': 1.1}, + {'name': 'HO2', 'type': 'H', 'charge': 0.4135083335659837, 'radius': 1.1}, + {'name':'H1', 'type': 'H', 'charge': 0.1930651515377243, 'radius': 1.1}, + {'name': 'H2', 'type': 'H', 'charge': 0.14930498376978826, 'radius': 1.1}, + {'name': 'H5', 'type': 'H', 'charge': 0.2004887586723136, 'radius': 1.1}, + {'name': 'H7', 'type': 'H', 'charge': 0.1568207646303434, 'radius': 1.1}, + {'name': 'H8', 'type': 'H', 'charge': 0.13234810374569977, 'radius': 1.1}, + {'name': 'H9', 'type': 'H', 'charge': 0.13832988708343302, 'radius': 1.1}, + {'name': '1H10', 'type': 'H', 'charge': 0.2090610332096663, 'radius': 1.1}, + {'name': '2H10', 'type': 'H', 'charge': 0.2090610332096663, 'radius': 1.1}, + {'name': '1H12', 'type': 'H', 'charge': 0.12111462041395545, 'radius': 1.1}, + {'name': '2H12', 'type': 'H', 'charge': 0.12111462041395545, 'radius': 1.1}, + {'name': 'H14', 'type': 'H', 'charge': 0.1675599590015787, 'radius': 1.1}, + {'name': 'H15', 'type': 'H', 'charge': 0.22760838606623204, 'radius': 1.1}, + {'name': '1H16', 'type': 'H', 'charge': 0.0852867334715187, 'radius': 1.1}, + {'name': '2H16', 'type': 'H', 'charge': 0.0852867334715187, 'radius': 1.1}, + {'name': 'H18', 'type': 'H', 'charge': 0.24317031751195953, 'radius': 1.1}, + {'name': '1H19', 'type': 'H', 'charge': 0.12776560931027464, 'radius': 1.1}, + {'name': '2H19', 'type': 'H', 'charge': 0.12776560931027464, 'radius': 1.1}, + {'name': '1H20', 'type': 'H', 'charge': 0.19565330098691133, 'radius': 1.1}, + {'name': '2H20', 'type': 'H', 'charge': 0.19565330098691133, 'radius': 1.1}, + {'name': '1H23', 'type': 'H', 'charge': 0.05671902293773316, 'radius': 1.1}, + {'name': '2H23', 'type': 'H', 'charge': 0.05671902293773316, 'radius': 1.1}, + {'name': '3H23', 'type': 'H', 'charge': 0.05671902293773316, 'radius': 1.1}, + {'name': '1H24', 'type': 'H', 'charge': 0.05671902293773316, 'radius': 1.1}, + {'name': '2H24', 'type': 'H', 'charge': 0.05671902293773316, 'radius': 1.1}, + {'name': '3H24', 'type': 'H', 'charge': 0.05671902293773316, 'radius': 1.1}, + {'name': '1H25', 'type': 'H', 'charge': 0.05671902293773316, 'radius': 1.1}, + {'name': '2H25', 'type': 'H', 'charge': 0.05671902293773316, 'radius': 1.1}, + {'name': '3H25', 'type': 'H', 'charge': 0.05671902293773316, 'radius': 1.1}, + {'name': 'H26', 'type': 'H', 'charge': 0.21276929701184144, 'radius': 1.1}, + {'name': '1H27', 'type': 'H', 'charge': 0.0838296767957033, 'radius': 1.1}, + {'name': '2H27', 'type': 'H', 'charge': 0.0838296767957033, 'radius': 1.1}, + {'name': '3H27', 'type': 'H', 'charge': 0.0838296767957033, 'radius': 1.1}, + {'name': 'H29', 'type': 'H', 'charge': 0.1344214537548114, 'radius': 1.1}, + {'name': 'H30', 'type': 'H', 'charge': 0.12944135663694809, 'radius': 1.1}, + {'name': 'H31', 'type': 'H', 'charge': 0.12895177093126134, 'radius': 1.1}, + {'name': 'H32', 'type': 'H', 'charge': 0.12944135663694809, 'radius': 1.1}, + {'name': 'H33', 'type': 'H', 'charge': 0.1344214537548114, 'radius': 1.1}], "1QBS-ligand.mol2": [ - {'name': 'C1', 'charge': 0.2819290256115998}, - {'name': 'C3', 'charge': 0.044938629062860544}, - {'name': 'C4', 'charge': 0.029905546428555488}, - {'name': 'C5', 'charge': 0.029905546428555488}, - {'name': 'C6', 'charge': 0.044938629062860544}, - {'name': 'C20', 'charge': 0.02388740859601932}, - {'name': 'C21', 'charge': -0.06923087776665914}, - {'name': 'C22', 'charge': -0.10856468032765192}, - {'name': 'C23', 'charge': -0.11036338907520785}, - {'name': 'C24', 'charge': -0.08244776383718133}, - {'name': 'C25', 'charge': -0.11036338907520785}, - {'name': 'C26', 'charge': -0.10856468032765192}, - {'name': 'C27', 'charge': -0.021297911008685094}, - {'name': 'C30', 'charge': -0.04323885537049762}, - {'name': 'C31', 'charge': -0.09772541979676261}, - {'name': 'C32', 'charge': -0.11774626937995367}, - {'name': 'C33', 'charge': -0.12668933876013122}, - {'name': 'C34', 'charge': -0.12840215383108972}, - {'name': 'C35', 'charge': -0.12668933876013122}, - {'name': 'C36', 'charge': -0.11774626937995367}, - {'name': 'C60', 'charge': -0.04323885537049762}, - {'name': 'C61', 'charge': -0.09772541979676261}, - {'name': 'C62', 'charge': -0.11774626937995367}, - {'name': 'C63', 'charge': -0.12668933876013122}, - {'name': 'C64', 'charge': -0.12840215383108972}, - {'name': 'C65', 'charge': -0.12668933876013122}, - {'name': 'C66', 'charge': -0.11774626937995367}, - {'name': 'C70', 'charge': 0.02388740859601932}, - {'name': 'C71', 'charge': -0.06923087776665914}, - {'name': 'C72', 'charge': -0.10856468032765192}, - {'name': 'C73', 'charge': -0.11036338907520785}, - {'name': 'C74', 'charge': -0.08244776383718133}, - {'name': 'C75', 'charge': -0.11036338907520785}, - {'name': 'C76', 'charge': -0.10856468032765192}, - {'name': 'C77', 'charge': -0.021297911008685094}, - {'name': 'N2', 'charge': -0.7121260761527837}, - {'name': 'N7', 'charge': -0.7121260761527837}, - {'name': 'O1', 'charge': -0.19835509547371977}, - {'name': 'O4', 'charge': -0.6060823059727466}, - {'name': 'O5', 'charge': -0.6060823059727466}, - {'name': 'O27', 'charge': -0.6190964861641765}, - {'name': 'O77', 'charge': -0.6190964861641765}, - {'name': 'H1', 'charge': 0.1939484454048374}, - {'name': 'H2', 'charge': 0.1783837779801701}, - {'name': 'H3', 'charge': 0.1783837779801701}, - {'name': 'H4', 'charge': 0.1939484454048374}, - {'name': 'H5', 'charge': 0.1660607436631621}, - {'name': 'H6', 'charge': 0.1660607436631621}, - {'name': 'H7', 'charge': 0.13696512588510704}, - {'name': 'H8', 'charge': 0.13656070949143695}, - {'name': 'H9', 'charge': 0.13656070949143695}, - {'name': 'H10', 'charge': 0.13696512588510704}, - {'name': 'H11', 'charge': 0.1336796298532153}, - {'name': 'H12', 'charge': 0.1336796298532153}, - {'name': 'H13', 'charge': 0.08892832134853815}, - {'name': 'H14', 'charge': 0.08892832134853815}, - {'name': 'H15', 'charge': 0.13442892363549896}, - {'name': 'H16', 'charge': 0.12944135663694809}, - {'name': 'H17', 'charge': 0.12895177093126134}, - {'name': 'H18', 'charge': 0.12944135663694809}, - {'name': 'H19', 'charge': 0.13442892363549896}, - {'name': 'H20', 'charge': 0.08892832134853815}, - {'name': 'H21', 'charge': 0.08892832134853815}, - {'name': 'H22', 'charge': 0.13442892363549896}, - {'name': 'H23', 'charge': 0.12944135663694809}, - {'name': 'H24', 'charge': 0.12895177093126134}, - {'name': 'H25', 'charge': 0.12944135663694809}, - {'name': 'H26', 'charge': 0.13442892363549896}, - {'name': 'H27', 'charge': 0.1660607436631621}, - {'name': 'H28', 'charge': 0.1660607436631621}, - {'name': 'H29', 'charge': 0.13696512588510704}, - {'name': 'H30', 'charge': 0.13656070949143695}, - {'name': 'H31', 'charge': 0.13656070949143695}, - {'name': 'H32', 'charge': 0.13696512588510704}, - {'name': 'H33', 'charge': 0.1336796298532153}, - {'name': 'H34', 'charge': 0.1336796298532153}, - {'name': 'H35', 'charge': 0.4089032076940611}, - {'name': 'H36', 'charge': 0.4089032076940611}, - {'name': 'H37', 'charge': 0.40353983279195227}, - {'name': 'H38', 'charge': 0.40353983279195227}], + {'name': 'C1', 'type': 'C.2', 'charge': 0.2819290256115998, 'radius': 1.87}, + {'name': 'C3', 'type': 'C.3', 'charge': 0.044938629062860544, 'radius': 1.87}, + {'name': 'C4', 'type': 'C.3', 'charge': 0.029905546428555488, 'radius': 1.87}, + {'name': 'C5', 'type': 'C.3', 'charge': 0.029905546428555488, 'radius': 1.87}, + {'name': 'C6', 'type': 'C.3', 'charge': 0.044938629062860544, 'radius': 1.87}, + {'name': 'C20', 'type': 'C.3', 'charge': 0.02388740859601932, 'radius': 1.87}, + {'name': 'C21', 'type': 'C.ar', 'charge': -0.06923087776665914, 'radius': 1.87}, + {'name': 'C22', 'type': 'C.ar', 'charge': -0.10856468032765192, 'radius': 1.87}, + {'name': 'C23', 'type': 'C.ar', 'charge': -0.11036338907520785, 'radius': 1.87}, + {'name': 'C24', 'type': 'C.ar', 'charge': -0.08244776383718133, 'radius': 1.87}, + {'name': 'C25', 'type': 'C.ar', 'charge': -0.11036338907520785, 'radius': 1.87}, + {'name': 'C26', 'type': 'C.ar', 'charge': -0.10856468032765192, 'radius': 1.87}, + {'name': 'C27', 'type': 'C.3', 'charge': -0.021297911008685094, 'radius': 1.87}, + {'name': 'C30', 'type': 'C.3', 'charge': -0.04323885537049762, 'radius': 1.87}, + {'name': 'C31', 'type': 'C.ar', 'charge': -0.09772541979676261, 'radius': 1.87}, + {'name': 'C32', 'type': 'C.ar', 'charge': -0.11774626937995367, 'radius': 1.87}, + {'name': 'C33', 'type': 'C.ar', 'charge': -0.12668933876013122, 'radius': 1.87}, + {'name': 'C34', 'type': 'C.ar', 'charge': -0.12840215383108972, 'radius': 1.87}, + {'name': 'C35', 'type': 'C.ar', 'charge': -0.12668933876013122, 'radius': 1.87}, + {'name': 'C36', 'type': 'C.ar', 'charge': -0.11774626937995367, 'radius': 1.87}, + {'name': 'C60', 'type': 'C.3', 'charge': -0.04323885537049762, 'radius': 1.87}, + {'name': 'C61', 'type': 'C.ar', 'charge': -0.09772541979676261, 'radius': 1.87}, + {'name': 'C62', 'type': 'C.ar', 'charge': -0.11774626937995367, 'radius': 1.87}, + {'name': 'C63', 'type': 'C.ar', 'charge': -0.12668933876013122, 'radius': 1.87}, + {'name': 'C64', 'type': 'C.ar', 'charge': -0.12840215383108972, 'radius': 1.87}, + {'name': 'C65', 'type': 'C.ar', 'charge': -0.12668933876013122, 'radius': 1.87}, + {'name': 'C66', 'type': 'C.ar', 'charge': -0.11774626937995367, 'radius': 1.87}, + {'name': 'C70', 'type': 'C.3', 'charge': 0.02388740859601932, 'radius': 1.87}, + {'name': 'C71', 'type': 'C.ar', 'charge': -0.06923087776665914, 'radius': 1.87}, + {'name': 'C72', 'type': 'C.ar', 'charge': -0.10856468032765192, 'radius': 1.87}, + {'name': 'C73', 'type': 'C.ar', 'charge': -0.11036338907520785, 'radius': 1.87}, + {'name': 'C74', 'type': 'C.ar', 'charge': -0.08244776383718133, 'radius': 1.87}, + {'name': 'C75', 'type': 'C.ar', 'charge': -0.11036338907520785, 'radius': 1.87}, + {'name': 'C76', 'type': 'C.ar', 'charge': -0.10856468032765192, 'radius': 1.87}, + {'name': 'C77', 'type': 'C.3', 'charge': -0.021297911008685094, 'radius': 1.87}, + {'name': 'N2', 'type': 'N.am', 'charge': -0.7121260761527837, 'radius': 1.4}, + {'name': 'N7', 'type': 'N.am', 'charge': -0.7121260761527837, 'radius': 1.4}, + {'name': 'O1', 'type': 'O.2', 'charge': -0.19835509547371977, 'radius': 1.52}, + {'name': 'O4', 'type': 'O.3', 'charge': -0.6060823059727466, 'radius': 1.52}, + {'name': 'O5', 'type': 'O.3', 'charge': -0.6060823059727466, 'radius': 1.52}, + {'name': 'O27', 'type': 'O.3', 'charge': -0.6190964861641765, 'radius': 1.52}, + {'name': 'O77', 'type': 'O.3', 'charge': -0.6190964861641765, 'radius': 1.52}, + {'name': 'H1', 'type': 'H', 'charge': 0.1939484454048374, 'radius': 1.1}, + {'name': 'H2', 'type': 'H', 'charge': 0.1783837779801701, 'radius': 1.1}, + {'name': 'H3', 'type': 'H', 'charge': 0.1783837779801701, 'radius': 1.1}, + {'name': 'H4', 'type': 'H', 'charge': 0.1939484454048374, 'radius': 1.1}, + {'name': 'H5', 'type': 'H', 'charge': 0.1660607436631621, 'radius': 1.1}, + {'name': 'H6', 'type': 'H', 'charge': 0.1660607436631621, 'radius': 1.1}, + {'name': 'H7', 'type': 'H', 'charge': 0.13696512588510704, 'radius': 1.1}, + {'name': 'H8', 'type': 'H', 'charge': 0.13656070949143695, 'radius': 1.1}, + {'name': 'H9', 'type': 'H', 'charge': 0.13656070949143695, 'radius': 1.1}, + {'name': 'H10', 'type': 'H', 'charge': 0.13696512588510704, 'radius': 1.1}, + {'name': 'H11', 'type': 'H', 'charge': 0.1336796298532153, 'radius': 1.1}, + {'name': 'H12', 'type': 'H', 'charge': 0.1336796298532153, 'radius': 1.1}, + {'name': 'H13', 'type': 'H', 'charge': 0.08892832134853815, 'radius': 1.1}, + {'name': 'H14', 'type': 'H', 'charge': 0.08892832134853815, 'radius': 1.1}, + {'name': 'H15', 'type': 'H', 'charge': 0.13442892363549896, 'radius': 1.1}, + {'name': 'H16', 'type': 'H', 'charge': 0.12944135663694809, 'radius': 1.1}, + {'name': 'H17', 'type': 'H', 'charge': 0.12895177093126134, 'radius': 1.1}, + {'name': 'H18', 'type': 'H', 'charge': 0.12944135663694809, 'radius': 1.1}, + {'name': 'H19', 'type': 'H', 'charge': 0.13442892363549896, 'radius': 1.1}, + {'name': 'H20', 'type': 'H', 'charge': 0.08892832134853815, 'radius': 1.1}, + {'name': 'H21', 'type': 'H', 'charge': 0.08892832134853815, 'radius': 1.1}, + {'name': 'H22', 'type': 'H', 'charge': 0.13442892363549896, 'radius': 1.1}, + {'name': 'H23', 'type': 'H', 'charge': 0.12944135663694809, 'radius': 1.1}, + {'name': 'H24', 'type': 'H', 'charge': 0.12895177093126134, 'radius': 1.1}, + {'name': 'H25', 'type': 'H', 'charge': 0.12944135663694809, 'radius': 1.1}, + {'name': 'H26', 'type': 'H', 'charge': 0.13442892363549896, 'radius': 1.1}, + {'name': 'H27', 'type': 'H', 'charge': 0.1660607436631621, 'radius': 1.1}, + {'name': 'H28', 'type': 'H', 'charge': 0.1660607436631621, 'radius': 1.1}, + {'name': 'H29', 'type': 'H', 'charge': 0.13696512588510704, 'radius': 1.1}, + {'name': 'H30', 'type': 'H', 'charge': 0.13656070949143695, 'radius': 1.1}, + {'name': 'H31', 'type': 'H', 'charge': 0.13656070949143695, 'radius': 1.1}, + {'name': 'H32', 'type': 'H', 'charge': 0.13696512588510704, 'radius': 1.1}, + {'name': 'H33', 'type': 'H', 'charge': 0.1336796298532153, 'radius': 1.1}, + {'name': 'H34', 'type': 'H', 'charge': 0.1336796298532153, 'radius': 1.1}, + {'name': 'H35', 'type': 'H', 'charge': 0.4089032076940611, 'radius': 1.1}, + {'name': 'H36', 'type': 'H', 'charge': 0.4089032076940611, 'radius': 1.1}, + {'name': 'H37', 'type': 'H', 'charge': 0.40353983279195227, 'radius': 1.1}, + {'name': 'H38', 'type': 'H', 'charge': 0.40353983279195227, 'radius': 1.1}], "1US0-ligand.mol2": [ - {'name': 'C2', 'charge': -0.011692507946738866}, - {'name': 'C3', 'charge': -0.07564532850595106}, - {'name': 'C4', 'charge': 0.0995106215449818}, - {'name': 'C5', 'charge': 0.087912610051304}, - {'name': 'C6', 'charge': -0.06822502752899269}, - {'name': 'C7', 'charge': 0.071206877934909}, - {'name': 'BR8', 'charge': -0.1781415724110881}, - {'name': 'F9', 'charge': -0.24609828999108224}, - {'name': 'C11', 'charge': 0.01969593460035858}, - {'name': 'C13', 'charge': -0.012644223822613768}, - {'name': 'F14', 'charge': -0.24298404863346873}, - {'name': 'O15', 'charge': -0.564853763228142}, - {'name': 'S16', 'charge': -0.08677321834382315}, - {'name': 'N17', 'charge': -0.5406144014197267}, - {'name': 'C20', 'charge': 0.11379374529001238}, - {'name': 'C24', 'charge': -0.025165867794008426}, - {'name': 'C25', 'charge': 0.05247378966494793}, - {'name': 'C26', 'charge': -0.09820934352177407}, - {'name': 'C27', 'charge': 0.09943434454444673}, - {'name': 'C28', 'charge': -0.030058244343191105}, - {'name': 'C29', 'charge': -0.08190135488592247}, - {'name': 'C32', 'charge': 0.13523301177634597}, - {'name': 'O33', 'charge': -0.6823845278785808}, - {'name': 'O34', 'charge': -0.6823845278785808}, - {'name': 'H1', 'charge': 0.1785741592725812}, - {'name': 'H2', 'charge': 0.1460165676666413}, - {'name': 'H3', 'charge': 0.15178137252163487}, - {'name': 'H4', 'charge': 0.12305674443192938}, - {'name': 'H5', 'charge': 0.12305674443192938}, - {'name': 'H6', 'charge': 0.3361191041929824}, - {'name': 'H7', 'charge': 0.21824692354069947}, - {'name': 'H8', 'charge': 0.21824692354069947}, - {'name': 'H9', 'charge': 0.13902103205782942}, - {'name': 'H10', 'charge': 0.16785538090669733}, - {'name': 'H11', 'charge': 0.14654036016275432}], + {'name': 'C2', 'type': 'C.ar', 'charge': -0.011692507946738866, 'radius': 1.87}, + {'name': 'C3', 'type': 'C.ar', 'charge': -0.07564532850595106, 'radius': 1.87}, + {'name': 'C4', 'type': 'C.ar', 'charge': 0.0995106215449818, 'radius': 1.87}, + {'name': 'C5', 'type': 'C.ar', 'charge': 0.087912610051304, 'radius': 1.87}, + {'name': 'C6', 'type': 'C.ar', 'charge': -0.06822502752899269, 'radius': 1.87}, + {'name': 'C7', 'type': 'C.ar', 'charge': 0.071206877934909, 'radius': 1.87}, + {'name': 'BR8', 'type': 'Br', 'charge': -0.1781415724110881, 'radius': 1.85}, + {'name': 'F9', 'type': 'F', 'charge': -0.24609828999108224, 'radius': 2.4}, + {'name': 'C11', 'type': 'C.2', 'charge': 0.01969593460035858, 'radius': 1.87}, + {'name': 'C13', 'type': 'C.3', 'charge': -0.012644223822613768, 'radius': 1.87}, + {'name': 'F14', 'type': 'F', 'charge': -0.24298404863346873, 'radius': 2.4}, + {'name': 'O15', 'type': 'O.3', 'charge': -0.564853763228142, 'radius': 1.52}, + {'name': 'S16', 'type': 'S.3', 'charge': -0.08677321834382315, 'radius': 2.15}, + {'name': 'N17', 'type': 'N.pl3', 'charge': -0.5406144014197267, 'radius': 1.4}, + {'name': 'C20', 'type': 'C.3', 'charge': 0.11379374529001238, 'radius': 1.87}, + {'name': 'C24', 'type': 'C.ar', 'charge': -0.025165867794008426, 'radius': 1.87}, + {'name': 'C25', 'type': 'C.ar', 'charge': 0.05247378966494793, 'radius': 1.87}, + {'name': 'C26', 'type': 'C.ar', 'charge': -0.09820934352177407, 'radius': 1.87}, + {'name': 'C27', 'type': 'C.ar', 'charge': 0.09943434454444673, 'radius': 1.87}, + {'name': 'C28', 'type': 'C.ar', 'charge': -0.030058244343191105, 'radius': 1.87}, + {'name': 'C29', 'type': 'C.ar', 'charge': -0.08190135488592247, 'radius': 1.87}, + {'name': 'C32', 'type': 'C.2', 'charge': 0.13523301177634597, 'radius': 1.87}, + {'name': 'O33', 'type': 'O.co2', 'charge': -0.6823845278785808, 'radius': 1.76}, + {'name': 'O34', 'type': 'O.co2', 'charge': -0.6823845278785808, 'radius': 1.76}, + {'name': 'H1', 'type': 'H', 'charge': 0.1785741592725812, 'radius': 1.1}, + {'name': 'H2', 'type': 'H', 'charge': 0.1460165676666413, 'radius': 1.1}, + {'name': 'H3', 'type': 'H', 'charge': 0.15178137252163487, 'radius': 1.1}, + {'name': 'H4', 'type': 'H', 'charge': 0.12305674443192938, 'radius': 1.1}, + {'name': 'H5', 'type': 'H', 'charge': 0.12305674443192938, 'radius': 1.1}, + {'name': 'H6', 'type': 'H', 'charge': 0.3361191041929824, 'radius': 1.1}, + {'name': 'H7', 'type': 'H', 'charge': 0.21824692354069947, 'radius': 1.1}, + {'name': 'H8', 'type': 'H', 'charge': 0.21824692354069947, 'radius': 1.1}, + {'name': 'H9', 'type': 'H', 'charge': 0.13902103205782942, 'radius': 1.1}, + {'name': 'H10', 'type': 'H', 'charge': 0.16785538090669733, 'radius': 1.1}, + {'name': 'H11', 'type': 'H', 'charge': 0.14654036016275432, 'radius': 1.1}], "acetate.mol2": [ - {'name': 'OAC', 'charge': -0.6920003959699945}, - {'name': 'CAB', 'charge': 0.08786702981704235}, - {'name': 'OAD', 'charge': -0.6920003959699945}, - {'name': 'CAA', 'charge': -0.007013553584021696}, - {'name': 'HAB', 'charge': 0.10104910523565629}, - {'name': 'HAC', 'charge': 0.10104910523565629}, - {'name': 'HAA', 'charge': 0.10104910523565629}], + {'name': 'OAC', 'type': 'O.co2', 'charge': -0.6920003959699945, 'radius': 1.76}, + {'name': 'CAB', 'type': 'C.2', 'charge': 0.08786702981704235, 'radius': 1.87}, + {'name': 'OAD', 'type': 'O.co2', 'charge': -0.6920003959699945, 'radius': 1.76}, + {'name': 'CAA', 'type': 'C.3', 'charge': -0.007013553584021696, 'radius': 1.87}, + {'name': 'HAB', 'type': 'H', 'charge': 0.10104910523565629, 'radius': 1.1}, + {'name': 'HAC', 'type': 'H', 'charge': 0.10104910523565629, 'radius': 1.1}, + {'name': 'HAA', 'type': 'H', 'charge': 0.10104910523565629, 'radius': 1.1}], "acetonitrile.mol2": [ - {'name': 'NAC', 'charge': -0.3658493397941428}, - {'name': 'CAB', 'charge': 0.047166532439326746}, - {'name': 'CAA', 'charge': 0.0031209482290866543}, - {'name': 'HAB', 'charge': 0.10518728637524313}, - {'name': 'HAC', 'charge': 0.10518728637524313}, - {'name': 'HAA', 'charge': 0.10518728637524313}], + {'name': 'NAC', 'type': 'N.1', 'charge': -0.3658493397941428, 'radius': 1.4}, + {'name': 'CAB', 'type': 'C.1', 'charge': 0.047166532439326746, 'radius': 1.87}, + {'name': 'CAA', 'type': 'C.3', 'charge': 0.0031209482290866543, 'radius': 1.87}, + {'name': 'HAB', 'type': 'H', 'charge': 0.10518728637524313, 'radius': 1.1}, + {'name': 'HAC', 'type': 'H', 'charge': 0.10518728637524313, 'radius': 1.1}, + {'name': 'HAA', 'type': 'H', 'charge': 0.10518728637524313, 'radius': 1.1}], "acetylcholine.mol2": [ - {'name': 'CAI', 'charge': -0.008419213752965279}, - {'name': 'HAK', 'charge': 0.09728030818644891}, - {'name': 'HAL', 'charge': 0.09728030818644891}, - {'name': 'HAM', 'charge': 0.09728030818644891}, - {'name': 'CAH', 'charge': 0.1545682666391745}, - {'name': 'OAB', 'charge': -0.27967627950752316}, - {'name': 'OAG', 'charge': -0.5429502778137623}, - {'name': 'CAF', 'charge': 0.037889841506971396}, - {'name': 'HAI', 'charge': 0.1710371780188628}, - {'name': 'HAJ', 'charge': 0.1710371780188628}, - {'name': 'CAE', 'charge': 0.0572339093088662}, - {'name': 'HAG', 'charge': 0.16582144135089144}, - {'name': 'HAH', 'charge': 0.16582144135089144}, - {'name': 'NAD', 'charge': -0.6101582744216567}, - {'name': 'CAJ', 'charge': 0.016251330821632083}, - {'name': 'HAO', 'charge': 0.13079998580857152}, - {'name': 'HAP', 'charge': 0.13079998580857152}, - {'name': 'HAN', 'charge': 0.13079998580857152}, - {'name': 'CAA', 'charge': 0.016251330821632083}, - {'name': 'HAB', 'charge': 0.13079998580857152}, - {'name': 'HAC', 'charge': 0.13079998580857152}, - {'name': 'HAA', 'charge': 0.13079998580857152}, - {'name': 'CAC', 'charge': 0.016251330821632083}, - {'name': 'HAE', 'charge': 0.13079998580857152}, - {'name': 'HAF', 'charge': 0.13079998580857152}, - {'name': 'HAD', 'charge': 0.13079998580857152}], + {'name': 'CAI', 'type': 'C.3', 'charge': -0.008419213752965279, 'radius': 1.87}, + {'name': 'HAK', 'type': 'H', 'charge': 0.09728030818644891, 'radius': 1.1}, + {'name': 'HAL', 'type': 'H', 'charge': 0.09728030818644891, 'radius': 1.1}, + {'name': 'HAM', 'type': 'H', 'charge': 0.09728030818644891, 'radius': 1.1}, + {'name': 'CAH', 'type': 'C.2', 'charge': 0.1545682666391745, 'radius': 1.87}, + {'name': 'OAB', 'type': 'O.2', 'charge': -0.27967627950752316, 'radius': 1.52}, + {'name': 'OAG', 'type': 'O.3', 'charge': -0.5429502778137623, 'radius': 1.52}, + {'name': 'CAF', 'type': 'C.3', 'charge': 0.037889841506971396, 'radius': 1.87}, + {'name': 'HAI', 'type': 'H', 'charge': 0.1710371780188628, 'radius': 1.1}, + {'name': 'HAJ', 'type': 'H', 'charge': 0.1710371780188628, 'radius': 1.1}, + {'name': 'CAE', 'type': 'C.3', 'charge': 0.0572339093088662, 'radius': 1.87}, + {'name': 'HAG', 'type': 'H', 'charge': 0.16582144135089144, 'radius': 1.1}, + {'name': 'HAH', 'type': 'H', 'charge': 0.16582144135089144, 'radius': 1.1}, + {'name': 'NAD', 'type': 'N.3', 'charge': -0.6101582744216567, 'radius': 1.4}, + {'name': 'CAJ', 'type': 'C.3', 'charge': 0.016251330821632083, 'radius': 1.87}, + {'name': 'HAO', 'type': 'H', 'charge': 0.13079998580857152, 'radius': 1.1}, + {'name': 'HAP', 'type': 'H', 'charge': 0.13079998580857152, 'radius': 1.1}, + {'name': 'HAN', 'type': 'H', 'charge': 0.13079998580857152, 'radius': 1.1}, + {'name': 'CAA', 'type': 'C.3', 'charge': 0.016251330821632083, 'radius': 1.87}, + {'name': 'HAB', 'type': 'H', 'charge': 0.13079998580857152, 'radius': 1.1}, + {'name': 'HAC', 'type': 'H', 'charge': 0.13079998580857152, 'radius': 1.1}, + {'name': 'HAA', 'type': 'H', 'charge': 0.13079998580857152, 'radius': 1.1}, + {'name': 'CAC', 'type': 'C.3', 'charge': 0.016251330821632083, 'radius': 1.87}, + {'name': 'HAE', 'type': 'H', 'charge': 0.13079998580857152, 'radius': 1.1}, + {'name': 'HAF', 'type': 'H', 'charge': 0.13079998580857152, 'radius': 1.1}, + {'name': 'HAD', 'type': 'H', 'charge': 0.13079998580857152, 'radius': 1.1}], "adp.mol2": [ - {'name': 'O2B', 'charge': -0.9292599002864984}, - {'name': 'PB', 'charge': 0.2901422925812394}, - {'name': 'O3B', 'charge': -0.15563495903544203}, - {'name': 'O1B', 'charge': -0.10031235497346548}, - {'name': 'O3A', 'charge': -0.22961271227927002}, - {'name': 'PA', 'charge': 0.20924046046691175}, - {'name': 'O2A', 'charge': -0.9415448332469695}, - {'name': 'O1A', 'charge': -0.11862974402094492}, - {'name': "O5'", 'charge': -0.46621302263040854}, - {'name': "C5'", 'charge': 0.0798878056064067}, - {'name': "H5'", 'charge': 0.18839134309240957}, - {'name': 'H5S', 'charge': 0.18839134309240957}, - {'name': "C4'", 'charge': 0.0695445224862847}, - {'name': "H4'", 'charge': 0.20776211836320221}, - {'name': "O4'", 'charge': -0.5870759018169631}, - {'name': "C3'", 'charge': 0.03852622354001534}, - {'name': "H3'", 'charge': 0.17992130819444715}, - {'name': "O3'", 'charge': -0.6050957763635193}, - {'name': 'H8L', 'charge': 0.408917570005205}, - {'name': "C2'", 'charge': 0.04785444900747468}, - {'name': "H2'", 'charge': 0.18698431697045484}, - {'name': "O2'", 'charge': -0.6030269458228865}, - {'name': 'H8M', 'charge': 0.40956200683924104}, - {'name': "C1'", 'charge': 0.1072884172431409}, - {'name': "H1'", 'charge': 0.25190824252954014}, - {'name': 'N9', 'charge': -0.32267909962343944}, - {'name': 'C8', 'charge': 0.03749022549830809}, - {'name': 'H8', 'charge': 0.24861896888732188}, - {'name': 'N7', 'charge': -0.2902489369083555}, - {'name': 'C5', 'charge': 0.15701292932781932}, - {'name': 'C4', 'charge': 0.16341092847650182}, - {'name': 'N3', 'charge': -0.28837259982632546}, - {'name': 'C2', 'charge': 0.00966560470099504}, - {'name': 'H2', 'charge': 0.23340110948012485}, - {'name': 'N1', 'charge': -0.3751603212355695}, - {'name': 'H1', 'charge': 0.2881016272229071}, - {'name': 'C6', 'charge': 0.10850097968415028}, - {'name': 'N6', 'charge': -0.46957750571325696}, - {'name': 'H6', 'charge': 0.3719198204868037}], + {'name': 'O2B', 'type': 'O.3', 'charge': -0.9292599002864984, 'radius': 1.52}, + {'name': 'PB', 'type': 'P.3', 'charge': 0.2901422925812394, 'radius': 1.8}, + {'name': 'O3B', 'type': 'O.3', 'charge': -0.15563495903544203, 'radius': 1.52}, + {'name': 'O1B', 'type': 'O.2', 'charge': -0.10031235497346548, 'radius': 1.52}, + {'name': 'O3A', 'type': 'O.3', 'charge': -0.22961271227927002, 'radius': 1.52}, + {'name': 'PA', 'type': 'P.3', 'charge': 0.20924046046691175, 'radius': 1.8}, + {'name': 'O2A', 'type': 'O.3', 'charge': -0.9415448332469695, 'radius': 1.52}, + {'name': 'O1A', 'type': 'O.2', 'charge': -0.11862974402094492, 'radius': 1.52}, + {'name': "O5'", 'type': 'O.3', 'charge': -0.46621302263040854, 'radius': 1.52}, + {'name': "C5'", 'type': 'C.3', 'charge': 0.0798878056064067, 'radius': 1.87}, + {'name': "H5'", 'type': 'H', 'charge': 0.18839134309240957, 'radius': 1.1}, + {'name': 'H5S', 'type': 'H', 'charge': 0.18839134309240957, 'radius': 1.1}, + {'name': "C4'", 'type': 'C.3', 'charge': 0.0695445224862847, 'radius': 1.87}, + {'name': "H4'", 'type': 'H', 'charge': 0.20776211836320221, 'radius': 1.1}, + {'name': "O4'", 'type': 'O.3', 'charge': -0.5870759018169631, 'radius': 1.52}, + {'name': "C3'", 'type': 'C.3', 'charge': 0.03852622354001534, 'radius': 1.87}, + {'name': "H3'", 'type': 'H', 'charge': 0.17992130819444715, 'radius': 1.1}, + {'name': "O3'", 'type': 'O.3', 'charge': -0.6050957763635193, 'radius': 1.52}, + {'name': 'H8L', 'type': 'H', 'charge': 0.408917570005205, 'radius': 1.1}, + {'name': "C2'", 'type': 'C.3', 'charge': 0.04785444900747468, 'radius': 1.87}, + {'name': "H2'", 'type': 'H', 'charge': 0.18698431697045484, 'radius': 1.1}, + {'name': "O2'", 'type': 'O.3', 'charge': -0.6030269458228865, 'radius': 1.52}, + {'name': 'H8M', 'type': 'H', 'charge': 0.40956200683924104, 'radius': 1.1}, + {'name': "C1'", 'type': 'C.3', 'charge': 0.1072884172431409, 'radius': 1.87}, + {'name': "H1'", 'type': 'H', 'charge': 0.25190824252954014, 'radius': 1.1}, + {'name': 'N9', 'type': 'N.ar', 'charge': -0.32267909962343944, 'radius': 1.4}, + {'name': 'C8', 'type': 'C.ar', 'charge': 0.03749022549830809, 'radius': 1.87}, + {'name': 'H8', 'type': 'H', 'charge': 0.24861896888732188, 'radius': 1.1}, + {'name': 'N7', 'type': 'N.ar', 'charge': -0.2902489369083555, 'radius': 1.4}, + {'name': 'C5', 'type': 'C.ar', 'charge': 0.15701292932781932, 'radius': 1.87}, + {'name': 'C4', 'type': 'C.ar', 'charge': 0.16341092847650182, 'radius': 1.87}, + {'name': 'N3', 'type': 'N.ar', 'charge': -0.28837259982632546, 'radius': 1.4}, + {'name': 'C2', 'type': 'C.ar', 'charge': 0.00966560470099504, 'radius': 1.87}, + {'name': 'H2', 'type': 'H', 'charge': 0.23340110948012485, 'radius': 1.1}, + {'name': 'N1', 'type': 'N.ar', 'charge': -0.3751603212355695, 'radius': 1.4}, + {'name': 'H1', 'type': 'H', 'charge': 0.2881016272229071, 'radius': 1.1}, + {'name': 'C6', 'type': 'C.ar', 'charge': 0.10850097968415028, 'radius': 1.87}, + {'name': 'N6', 'type': 'N.2', 'charge': -0.46957750571325696, 'radius': 1.4}, + {'name': 'H6', 'type': 'H', 'charge': 0.3719198204868037, 'radius': 1.1}], "anthracene.mol2": [ - {'name': 'CAB', 'charge': -0.10948195185220932}, - {'name': 'HAB', 'charge': 0.13718781330297145}, - {'name': 'CAC', 'charge': -0.05951797220652635}, - {'name': 'CAD', 'charge': -0.090739817012503}, - {'name': 'HAD', 'charge': 0.14537539214857495}, - {'name': 'CAE', 'charge': -0.05951797220652635}, - {'name': 'CAF', 'charge': -0.10948195185220932}, - {'name': 'HAF', 'charge': 0.13718781330297145}, - {'name': 'CAG', 'charge': -0.1252004685570065}, - {'name': 'HAG', 'charge': 0.12969479174473472}, - {'name': 'CAN', 'charge': -0.1252004685570065}, - {'name': 'HAN', 'charge': 0.12969479174473472}, - {'name': 'CAM', 'charge': -0.10948195185220932}, - {'name': 'HAM', 'charge': 0.13718781330297145}, - {'name': 'CAL', 'charge': -0.05951797220652635}, - {'name': 'CAK', 'charge': -0.090739817012503}, - {'name': 'HAK', 'charge': 0.14537539214857495}, - {'name': 'CAJ', 'charge': -0.05951797220652635}, - {'name': 'CAI', 'charge': -0.10948195185220932}, - {'name': 'HAI', 'charge': 0.13718781330297145}, - {'name': 'CAH', 'charge': -0.1252004685570065}, - {'name': 'HAH', 'charge': 0.12969479174473472}, - {'name': 'CAA', 'charge': -0.1252004685570065}, - {'name': 'HAA', 'charge': 0.12969479174473472}], - "crown-ether.mol2": [ - {'name': 'CAB', 'charge': -0.0020089927963956975}, - {'name': 'HAB', 'charge': 0.1571150829116664}, - {'name': 'HAC', 'charge': 0.1571150829116664}, - {'name': 'OAA', 'charge': -0.6244423460538743}, - {'name': 'CAH', 'charge': -0.002008992796395719}, - {'name': 'HAJ', 'charge': 0.1571150829116664}, - {'name': 'HAK', 'charge': 0.1571150829116664}, - {'name': 'CAJ', 'charge': -0.0020089927963956975}, - {'name': 'HAN', 'charge': 0.1571150829116664}, - {'name': 'HAO', 'charge': 0.1571150829116664}, - {'name': 'OAL', 'charge': -0.6244423460538743}, - {'name': 'CAM', 'charge': -0.002008992796395719}, - {'name': 'HAR', 'charge': 0.1571150829116664}, - {'name': 'HAS', 'charge': 0.1571150829116664}, - {'name': 'CAN', 'charge': -0.0020089927963956975}, - {'name': 'HAT', 'charge': 0.1571150829116664}, - {'name': 'HAU', 'charge': 0.1571150829116664}, - {'name': 'OAO', 'charge': -0.6244423460538743}, - {'name': 'CAP', 'charge': -0.002008992796395719}, - {'name': 'HAV', 'charge': 0.1571150829116664}, - {'name': 'HAW', 'charge': 0.1571150829116664}, - {'name': 'CAQ', 'charge': -0.0020089927963956975}, - {'name': 'HAX', 'charge': 0.1571150829116664}, - {'name': 'HAY', 'charge': 0.1571150829116664}, - {'name': 'OAR', 'charge': -0.6244423460538743}, - {'name': 'CAK', 'charge': -0.002008992796395719}, - {'name': 'HAP', 'charge': 0.1571150829116664}, - {'name': 'HAQ', 'charge': 0.1571150829116664}, - {'name': 'CAI', 'charge': -0.0020089927963956975}, - {'name': 'HAL', 'charge': 0.1571150829116664}, - {'name': 'HAM', 'charge': 0.1571150829116664}, - {'name': 'OAG', 'charge': -0.6244423460538743}, - {'name': 'CAF', 'charge': -0.002008992796395719}, - {'name': 'HAH', 'charge': 0.1571150829116664}, - {'name': 'HAI', 'charge': 0.1571150829116664}, - {'name': 'CAE', 'charge': -0.0020089927963956975}, - {'name': 'HAF', 'charge': 0.1571150829116664}, - {'name': 'HAG', 'charge': 0.1571150829116664}, - {'name': 'OAD', 'charge': -0.6244423460538743}, - {'name': 'CAC', 'charge': -0.002008992796395719}, - {'name': 'HAE', 'charge': 0.1571150829116664}, - {'name': 'HAD', 'charge': 0.1571150829116664}], - "cyclohexane.mol2": [ - {'name': 'CAA', 'charge': -0.10295775116564801}, - {'name': 'HAA', 'charge': 0.051478875582824005}, - {'name': 'HAB', 'charge': 0.051478875582824005}, - {'name': 'CAB', 'charge': -0.10295775116564801}, - {'name': 'HAC', 'charge': 0.051478875582824005}, - {'name': 'HAD', 'charge': 0.051478875582824005}, - {'name': 'CAC', 'charge': -0.10295775116564801}, - {'name': 'HAE', 'charge': 0.051478875582824005}, - {'name': 'HAF', 'charge': 0.051478875582824005}, - {'name': 'CAF', 'charge': -0.10295775116564801}, - {'name': 'HAK', 'charge': 0.051478875582824005}, - {'name': 'HAL', 'charge': 0.051478875582824005}, - {'name': 'CAE', 'charge': -0.10295775116564801}, - {'name': 'HAI', 'charge': 0.051478875582824005}, - {'name': 'HAJ', 'charge': 0.051478875582824005}, - {'name': 'CAD', 'charge': -0.10295775116564801}, - {'name': 'HAH', 'charge': 0.051478875582824005}, - {'name': 'HAG', 'charge': 0.051478875582824005}], + {'name': 'CAB', 'type': 'C.ar', 'charge': -0.10948195185220932, 'radius': 1.87}, + {'name': 'HAB', 'type': 'H', 'charge': 0.13718781330297145, 'radius': 1.1}, + {'name': 'CAC', 'type': 'C.ar', 'charge': -0.05951797220652635, 'radius': 1.87}, + {'name': 'CAD', 'type': 'C.ar', 'charge': -0.090739817012503, 'radius': 1.87}, + {'name': 'HAD', 'type': 'H', 'charge': 0.14537539214857495, 'radius': 1.1}, + {'name': 'CAE', 'type': 'C.ar', 'charge': -0.05951797220652635, 'radius': 1.87}, + {'name': 'CAF', 'type': 'C.ar', 'charge': -0.10948195185220932, 'radius': 1.87}, + {'name': 'HAF', 'type': 'H', 'charge': 0.13718781330297145, 'radius': 1.1}, + {'name': 'CAG', 'type': 'C.ar', 'charge': -0.1252004685570065, 'radius': 1.87}, + {'name': 'HAG', 'type': 'H', 'charge': 0.12969479174473472, 'radius': 1.1}, + {'name': 'CAN', 'type': 'C.ar', 'charge': -0.1252004685570065, 'radius': 1.87}, + {'name': 'HAN', 'type': 'H', 'charge': 0.12969479174473472, 'radius': 1.1}, + {'name': 'CAM', 'type': 'C.ar', 'charge': -0.10948195185220932, 'radius': 1.87}, + {'name': 'HAM', 'type': 'H', 'charge': 0.13718781330297145, 'radius': 1.1}, + {'name': 'CAL', 'type': 'C.ar', 'charge': -0.05951797220652635, 'radius': 1.87}, + {'name': 'CAK', 'type': 'C.ar', 'charge': -0.090739817012503, 'radius': 1.87}, + {'name': 'HAK', 'type': 'H', 'charge': 0.14537539214857495, 'radius': 1.1}, + {'name': 'CAJ', 'type': 'C.ar', 'charge': -0.05951797220652635, 'radius': 1.87}, + {'name': 'CAI', 'type': 'C.ar', 'charge': -0.10948195185220932, 'radius': 1.87}, + {'name': 'HAI', 'type': 'H', 'charge': 0.13718781330297145, 'radius': 1.1}, + {'name': 'CAH', 'type': 'C.ar', 'charge': -0.1252004685570065, 'radius': 1.87}, + {'name': 'HAH', 'type': 'H', 'charge': 0.12969479174473472, 'radius': 1.1}, + {'name': 'CAA', 'type': 'C.ar', 'charge': -0.1252004685570065, 'radius': 1.87}, + {'name': 'HAA', 'type': 'H', 'charge': 0.12969479174473472, 'radius': 1.1}], + "cyclohexane.mol2": [ + {'name': 'CAA', 'type': 'C.3', 'charge': -0.10295775116564801, 'radius': 1.87}, + {'name': 'HAA', 'type': 'H', 'charge': 0.051478875582824005, 'radius': 1.1}, + {'name': 'HAB', 'type': 'H', 'charge': 0.051478875582824005, 'radius': 1.1}, + {'name': 'CAB', 'type': 'C.3', 'charge': -0.10295775116564801, 'radius': 1.87}, + {'name': 'HAC', 'type': 'H', 'charge': 0.051478875582824005, 'radius': 1.1}, + {'name': 'HAD', 'type': 'H', 'charge': 0.051478875582824005, 'radius': 1.1}, + {'name': 'CAC', 'type': 'C.3', 'charge': -0.10295775116564801, 'radius': 1.87}, + {'name': 'HAE', 'type': 'H', 'charge': 0.051478875582824005, 'radius': 1.1}, + {'name': 'HAF', 'type': 'H', 'charge': 0.051478875582824005, 'radius': 1.1}, + {'name': 'CAF', 'type': 'C.3', 'charge': -0.10295775116564801, 'radius': 1.87}, + {'name': 'HAK', 'type': 'H', 'charge': 0.051478875582824005, 'radius': 1.1}, + {'name': 'HAL', 'type': 'H', 'charge': 0.051478875582824005, 'radius': 1.1}, + {'name': 'CAE', 'type': 'C.3', 'charge': -0.10295775116564801, 'radius': 1.87}, + {'name': 'HAI', 'type': 'H', 'charge': 0.051478875582824005, 'radius': 1.1}, + {'name': 'HAJ', 'type': 'H', 'charge': 0.051478875582824005, 'radius': 1.1}, + {'name': 'CAD', 'type': 'C.3', 'charge': -0.10295775116564801, 'radius': 1.87}, + {'name': 'HAH', 'type': 'H', 'charge': 0.051478875582824005, 'radius': 1.1}, + {'name': 'HAG', 'type': 'H', 'charge': 0.051478875582824005, 'radius': 1.1}], "ethanol.mol2": [ - {'name': 'CAA', 'charge': -0.09830853015557071}, - {'name': 'HAA', 'charge': 0.04965533009730527}, - {'name': 'HAB', 'charge': 0.04965533009730527}, - {'name': 'HAC', 'charge': 0.04965533009730527}, - {'name': 'CAB', 'charge': -0.05333201263132795}, - {'name': 'HAD', 'charge': 0.1136006498229285}, - {'name': 'HAE', 'charge': 0.1136006498229285}, - {'name': 'OAC', 'charge': -0.6256712436819194}, - {'name': 'HAF', 'charge': 0.4011444965310452}], + {'name': 'CAA', 'type': 'C.3', 'charge': -0.09830853015557071, 'radius': 1.87}, + {'name': 'HAA', 'type': 'H', 'charge': 0.04965533009730527, 'radius': 1.1}, + {'name': 'HAB', 'type': 'H', 'charge': 0.04965533009730527, 'radius': 1.1}, + {'name': 'HAC', 'type': 'H', 'charge': 0.04965533009730527, 'radius': 1.1}, + {'name': 'CAB', 'type': 'C.3', 'charge': -0.05333201263132795, 'radius': 1.87}, + {'name': 'HAD', 'type': 'H', 'charge': 0.1136006498229285, 'radius': 1.1}, + {'name': 'HAE', 'type': 'H', 'charge': 0.1136006498229285, 'radius': 1.1}, + {'name': 'OAC', 'type': 'O.3', 'charge': -0.6256712436819194, 'radius': 1.52}, + {'name': 'HAF', 'type': 'H', 'charge': 0.4011444965310452, 'radius': 1.1}], "fatty-acid.mol2": [ - {'name': 'OAA', 'charge': -0.6905415761361866}, - {'name': 'CAB', 'charge': 0.09491530638614049}, - {'name': 'OAL', 'charge': -0.6905415761361866}, - {'name': 'CAC', 'charge': 0.012573264939255872}, - {'name': 'HAC', 'charge': 0.11848768797200306}, - {'name': 'HAD', 'charge': 0.11848768797200306}, - {'name': 'CAD', 'charge': -0.07742980670292797}, - {'name': 'HAE', 'charge': 0.06140059278092227}, - {'name': 'HAF', 'charge': 0.06140059278092227}, - {'name': 'CAE', 'charge': -0.076959643935905}, - {'name': 'HAG', 'charge': 0.07285165771612633}, - {'name': 'HAH', 'charge': 0.07285165771612633}, - {'name': 'CAF', 'charge': -0.24236491728150775}, - {'name': 'HAI', 'charge': 0.16485599597989423}, - {'name': 'CAG', 'charge': -0.2426026937449157}, - {'name': 'HAJ', 'charge': 0.1648388876026803}, - {'name': 'CAH', 'charge': -0.07965667787077134}, - {'name': 'HAK', 'charge': 0.07232360507910963}, - {'name': 'HAL', 'charge': 0.07232360507910963}, - {'name': 'CAI', 'charge': -0.09862724378017657}, - {'name': 'HAM', 'charge': 0.05403847507524275}, - {'name': 'HAN', 'charge': 0.05403847507524275}, - {'name': 'CAJ', 'charge': -0.10608184708836539}, - {'name': 'HAO', 'charge': 0.04966180988232633}, - {'name': 'HAP', 'charge': 0.04966180988232633}, - {'name': 'CAK', 'charge': -0.11173869603286389}, - {'name': 'HAR', 'charge': 0.04061118893012504}, - {'name': 'HAS', 'charge': 0.04061118893012504}, - {'name': 'HAQ', 'charge': 0.04061118893012504}], + {'name': 'OAA', 'type': 'O.co2', 'charge': -0.6905415761361866, 'radius': 1.76}, + {'name': 'CAB', 'type': 'C.2', 'charge': 0.09491530638614049, 'radius': 1.87}, + {'name': 'OAL', 'type': 'O.co2', 'charge': -0.6905415761361866, 'radius': 1.76}, + {'name': 'CAC', 'type': 'C.3', 'charge': 0.012573264939255872, 'radius': 1.87}, + {'name': 'HAC', 'type': 'H', 'charge': 0.11848768797200306, 'radius': 1.1}, + {'name': 'HAD', 'type': 'H', 'charge': 0.11848768797200306, 'radius': 1.1}, + {'name': 'CAD', 'type': 'C.3', 'charge': -0.07742980670292797, 'radius': 1.87}, + {'name': 'HAE', 'type': 'H', 'charge': 0.06140059278092227, 'radius': 1.1}, + {'name': 'HAF', 'type': 'H', 'charge': 0.06140059278092227, 'radius': 1.1}, + {'name': 'CAE', 'type': 'C.3', 'charge': -0.076959643935905, 'radius': 1.87}, + {'name': 'HAG', 'type': 'H', 'charge': 0.07285165771612633, 'radius': 1.1}, + {'name': 'HAH', 'type': 'H', 'charge': 0.07285165771612633, 'radius': 1.1}, + {'name': 'CAF', 'type': 'C.2', 'charge': -0.24236491728150775, 'radius': 1.87}, + {'name': 'HAI', 'type': 'H', 'charge': 0.16485599597989423, 'radius': 1.1}, + {'name': 'CAG', 'type': 'C.2', 'charge': -0.2426026937449157, 'radius': 1.87}, + {'name': 'HAJ', 'type': 'H', 'charge': 0.1648388876026803, 'radius': 1.1}, + {'name': 'CAH', 'type': 'C.3', 'charge': -0.07965667787077134, 'radius': 1.87}, + {'name': 'HAK', 'type': 'H', 'charge': 0.07232360507910963, 'radius': 1.1}, + {'name': 'HAL', 'type': 'H', 'charge': 0.07232360507910963, 'radius': 1.1}, + {'name': 'CAI', 'type': 'C.3', 'charge': -0.09862724378017657, 'radius': 1.87}, + {'name': 'HAM', 'type': 'H', 'charge': 0.05403847507524275, 'radius': 1.1}, + {'name': 'HAN', 'type': 'H', 'charge': 0.05403847507524275, 'radius': 1.1}, + {'name': 'CAJ', 'type': 'C.3', 'charge': -0.10608184708836539, 'radius': 1.87}, + {'name': 'HAO', 'type': 'H', 'charge': 0.04966180988232633, 'radius': 1.1}, + {'name': 'HAP', 'type': 'H', 'charge': 0.04966180988232633, 'radius': 1.1}, + {'name': 'CAK', 'type': 'C.3', 'charge': -0.11173869603286389, 'radius': 1.87}, + {'name': 'HAR', 'type': 'H', 'charge': 0.04061118893012504, 'radius': 1.1}, + {'name': 'HAS', 'type': 'H', 'charge': 0.04061118893012504, 'radius': 1.1}, + {'name': 'HAQ', 'type': 'H', 'charge': 0.04061118893012504, 'radius': 1.1}], "glycerol.mol2": [ - {'name': 'OAB', 'charge': -0.6193219106971865}, - {'name': 'HAG', 'charge': 0.4033320347133161}, - {'name': 'CAA', 'charge': -0.025227212803925057}, - {'name': 'HAA', 'charge': 0.1316040991547254}, - {'name': 'HAB', 'charge': 0.1316040991547254}, - {'name': 'CAC', 'charge': -0.00300604112002907}, - {'name': 'HAC', 'charge': 0.16288785945019288}, - {'name': 'OAD', 'charge': -0.6115908338990877}, - {'name': 'HAD', 'charge': 0.4077267965256133}, - {'name': 'CAE', 'charge': -0.025227212803925054}, - {'name': 'HAE', 'charge': 0.1316040991547254}, - {'name': 'HAF', 'charge': 0.1316040991547254}, - {'name': 'OAF', 'charge': -0.6193219106971865}, - {'name': 'HAH', 'charge': 0.4033320347133161}], + {'name': 'OAB', 'type': 'O.3', 'charge': -0.6193219106971865, 'radius': 1.52}, + {'name': 'HAG', 'type': 'H', 'charge': 0.4033320347133161, 'radius': 1.1}, + {'name': 'CAA', 'type': 'C.3', 'charge': -0.025227212803925057, 'radius': 1.87}, + {'name': 'HAA', 'type': 'H', 'charge': 0.1316040991547254, 'radius': 1.1}, + {'name': 'HAB', 'type': 'H', 'charge': 0.1316040991547254, 'radius': 1.1}, + {'name': 'CAC', 'type': 'C.3', 'charge': -0.00300604112002907, 'radius': 1.87}, + {'name': 'HAC', 'type': 'H', 'charge': 0.16288785945019288, 'radius': 1.1}, + {'name': 'OAD', 'type': 'O.3', 'charge': -0.6115908338990877, 'radius': 1.52}, + {'name': 'HAD', 'type': 'H', 'charge': 0.4077267965256133, 'radius': 1.1}, + {'name': 'CAE', 'type': 'C.3', 'charge': -0.025227212803925054, 'radius': 1.87}, + {'name': 'HAE', 'type': 'H', 'charge': 0.1316040991547254, 'radius': 1.1}, + {'name': 'HAF', 'type': 'H', 'charge': 0.1316040991547254, 'radius': 1.1}, + {'name': 'OAF', 'type': 'O.3', 'charge': -0.6193219106971865, 'radius': 1.52}, + {'name': 'HAH', 'type': 'H', 'charge': 0.4033320347133161, 'radius': 1.1}], "naphthalene.mol2": [ - {'name': 'CAB', 'charge': -0.10985826436427909}, - {'name': 'HAB', 'charge': 0.13714843913419308}, - {'name': 'CAC', 'charge': -0.06352790423000582}, - {'name': 'CAD', 'charge': -0.10985826436427909}, - {'name': 'HAD', 'charge': 0.13714843913419308}, - {'name': 'CAE', 'charge': -0.12522022371734542}, - {'name': 'HAE', 'charge': 0.1296940010624343}, - {'name': 'CAJ', 'charge': -0.12522022371734542}, - {'name': 'HAJ', 'charge': 0.1296940010624343}, - {'name': 'CAI', 'charge': -0.10985826436427909}, - {'name': 'HAI', 'charge': 0.13714843913419308}, - {'name': 'CAH', 'charge': -0.06352790423000582}, - {'name': 'CAG', 'charge': -0.10985826436427909}, - {'name': 'HAG', 'charge': 0.13714843913419308}, - {'name': 'CAF', 'charge': -0.12522022371734542}, - {'name': 'HAF', 'charge': 0.1296940010624343}, - {'name': 'CAA', 'charge': -0.12522022371734542}, - {'name': 'HAA', 'charge': 0.1296940010624343}], + {'name': 'CAB', 'type': 'C.ar', 'charge': -0.10985826436427909, 'radius': 1.87}, + {'name': 'HAB', 'type': 'H', 'charge': 0.13714843913419308, 'radius': 1.1}, + {'name': 'CAC', 'type': 'C.ar', 'charge': -0.06352790423000582, 'radius': 1.87}, + {'name': 'CAD', 'type': 'C.ar', 'charge': -0.10985826436427909, 'radius': 1.87}, + {'name': 'HAD', 'type': 'H', 'charge': 0.13714843913419308, 'radius': 1.1}, + {'name': 'CAE', 'type': 'C.ar', 'charge': -0.12522022371734542, 'radius': 1.87}, + {'name': 'HAE', 'type': 'H', 'charge': 0.1296940010624343, 'radius': 1.1}, + {'name': 'CAJ', 'type': 'C.ar', 'charge': -0.12522022371734542, 'radius': 1.87}, + {'name': 'HAJ', 'type': 'H', 'charge': 0.1296940010624343, 'radius': 1.1}, + {'name': 'CAI', 'type': 'C.ar', 'charge': -0.10985826436427909, 'radius': 1.87}, + {'name': 'HAI', 'type': 'H', 'charge': 0.13714843913419308, 'radius': 1.1}, + {'name': 'CAH', 'type': 'C.ar', 'charge': -0.06352790423000582, 'radius': 1.87}, + {'name': 'CAG', 'type': 'C.ar', 'charge': -0.10985826436427909, 'radius': 1.87}, + {'name': 'HAG', 'type': 'H', 'charge': 0.13714843913419308, 'radius': 1.1}, + {'name': 'CAF', 'type': 'C.ar', 'charge': -0.12522022371734542, 'radius': 1.87}, + {'name': 'HAF', 'type': 'H', 'charge': 0.1296940010624343, 'radius': 1.1}, + {'name': 'CAA', 'type': 'C.ar', 'charge': -0.12522022371734542, 'radius': 1.87}, + {'name': 'HAA', 'type': 'H', 'charge': 0.1296940010624343, 'radius': 1.1}], "pyrrole.mol2": [ - {'name': 'CAE', 'charge': -0.02725132001441914}, - {'name': 'HAE', 'charge': 0.104834463409739}, - {'name': 'HAF', 'charge': 0.104834463409739}, - {'name': 'CAD', 'charge': -0.19753848158167037}, - {'name': 'HAD', 'charge': 0.18534981863154645}, - {'name': 'CAA', 'charge': -0.1112561716335608}, - {'name': 'HAA', 'charge': 0.2699321111667989}, - {'name': 'NAB', 'charge': -0.44475473234270757}, - {'name': 'CAC', 'charge': -0.13358085588726296}, - {'name': 'HAC', 'charge': 0.24943070484179752}], + {'name': 'CAE', 'type': 'C.3', 'charge': -0.02725132001441914, 'radius': 1.87}, + {'name': 'HAE', 'type': 'H', 'charge': 0.104834463409739, 'radius': 1.1}, + {'name': 'HAF', 'type': 'H', 'charge': 0.104834463409739, 'radius': 1.1}, + {'name': 'CAD', 'type': 'C.2', 'charge': -0.19753848158167037, 'radius': 1.87}, + {'name': 'HAD', 'type': 'H', 'charge': 0.18534981863154645, 'radius': 1.1}, + {'name': 'CAA', 'type': 'C.2', 'charge': -0.1112561716335608, 'radius': 1.87}, + {'name': 'HAA', 'type': 'H', 'charge': 0.2699321111667989, 'radius': 1.1}, + {'name': 'NAB', 'type': 'N.2', 'charge': -0.44475473234270757, 'radius': 1.4}, + {'name': 'CAC', 'type': 'C.2', 'charge': -0.13358085588726296, 'radius': 1.87}, + {'name': 'HAC', 'type': 'H', 'charge': 0.24943070484179752, 'radius': 1.1}], "tetramethylammonium.mol2": [ - {'name': 'CAA', 'charge': 0.013840200623221964}, - {'name': 'HAA', 'charge': 0.12997929051575582}, - {'name': 'HAB', 'charge': 0.12997929051575582}, - {'name': 'HAC', 'charge': 0.12997929051575582}, - {'name': 'NAC', 'charge': -0.6151122886819584}, - {'name': 'CAB', 'charge': 0.013840200623221978}, - {'name': 'HAE', 'charge': 0.12997929051575582}, - {'name': 'HAF', 'charge': 0.12997929051575582}, - {'name': 'HAD', 'charge': 0.12997929051575582}, - {'name': 'CAE', 'charge': 0.013840200623221978}, - {'name': 'HAK', 'charge': 0.12997929051575582}, - {'name': 'HAL', 'charge': 0.12997929051575582}, - {'name': 'HAJ', 'charge': 0.12997929051575582}, - {'name': 'CAD', 'charge': 0.013840200623221978}, - {'name': 'HAH', 'charge': 0.12997929051575582}, - {'name': 'HAI', 'charge': 0.12997929051575582}, - {'name': 'HAG', 'charge': 0.12997929051575582}] + {'name': 'CAA', 'type': 'C.3', 'charge': 0.013840200623221964, 'radius': 1.87}, + {'name': 'HAA', 'type': 'H', 'charge': 0.12997929051575582, 'radius': 1.1}, + {'name': 'HAB', 'type': 'H', 'charge': 0.12997929051575582, 'radius': 1.1}, + {'name': 'HAC', 'type': 'H', 'charge': 0.12997929051575582, 'radius': 1.1}, + {'name': 'NAC', 'type': 'N.3', 'charge': -0.6151122886819584, 'radius': 1.4}, + {'name': 'CAB', 'type': 'C.3', 'charge': 0.013840200623221978, 'radius': 1.87}, + {'name': 'HAE', 'type': 'H', 'charge': 0.12997929051575582, 'radius': 1.1}, + {'name': 'HAF', 'type': 'H', 'charge': 0.12997929051575582, 'radius': 1.1}, + {'name': 'HAD', 'type': 'H', 'charge': 0.12997929051575582, 'radius': 1.1}, + {'name': 'CAE', 'type': 'C.3', 'charge': 0.013840200623221978, 'radius': 1.87}, + {'name': 'HAK', 'type': 'H', 'charge': 0.12997929051575582, 'radius': 1.1}, + {'name': 'HAL', 'type': 'H', 'charge': 0.12997929051575582, 'radius': 1.1}, + {'name': 'HAJ', 'type': 'H', 'charge': 0.12997929051575582, 'radius': 1.1}, + {'name': 'CAD', 'type': 'C.3', 'charge': 0.013840200623221978, 'radius': 1.87}, + {'name': 'HAH', 'type': 'H', 'charge': 0.12997929051575582, 'radius': 1.1}, + {'name': 'HAI', 'type': 'H', 'charge': 0.12997929051575582, 'radius': 1.1}, + {'name': 'HAG', 'type': 'H', 'charge': 0.12997929051575582, 'radius': 1.1}] } diff --git a/pdb2pqr/tests/ligand_test.py b/pdb2pqr/tests/ligand_test.py index 83291a06a..46bd6319b 100644 --- a/pdb2pqr/tests/ligand_test.py +++ b/pdb2pqr/tests/ligand_test.py @@ -5,15 +5,15 @@ from pathlib import Path import pytest import pandas as pd from numpy.testing import assert_almost_equal -from pdb2pqr.ligand import parameterize +from pdb2pqr.ligand.mol2 import Mol2Molecule +from pdb2pqr.ligand import RADII import common from ligand_results import TORSION_RESULTS, RING_RESULTS -from ligand_results import FORMAL_CHARGE_RESULTS, PARTIAL_CHARGE_RESULTS +from ligand_results import FORMAL_CHARGE_RESULTS, PARAMETER_RESULTS _LOGGER = logging.getLogger(__name__) _LOGGER.warning("Need functional and regression test coverage for --ligand") -_LOGGER.error("Still haven't figured out radii") ALL_LIGANDS = set(TORSION_RESULTS) | set(RING_RESULTS) @@ -24,46 +24,44 @@ ALL_LIGANDS = sorted(list(ALL_LIGANDS)) @pytest.mark.parametrize("input_mol2", ALL_LIGANDS) -def test_parameterization(input_mol2): +def test_assign_parameters(input_mol2): """Testing basic aspects of code breaking.""" - ligand = parameterize.ParameterizedMolecule() + ligand = Mol2Molecule() mol2_path = Path("tests/data") / input_mol2 with open(mol2_path, "rt") as mol2_file: ligand.read(mol2_file) old_total_charge = 0 for atom in ligand.atoms.values(): - atom.charge = atom.formal_charge - old_total_charge += atom.charge - atom.old_charge = atom.charge - ligand.update(ligand) + old_total_charge += atom.formal_charge + ligand.assign_parameters() new_total_charge = 0 test_results = [] for atom in ligand.atoms.values(): test_row = { - "name": atom.name, "charge": atom.charge} + "name": atom.name, "type": atom.type, "charge": atom.charge, + "radius": atom.radius} test_results.append(test_row) new_total_charge += atom.charge _LOGGER.debug("Test results: %s", test_results) test_results = pd.DataFrame(test_results) test_results = test_results.set_index("name") - _LOGGER.debug("Test results:\n%s", test_results) + # _LOGGER.debug("Test results:\n%s", test_results.to_string()) _LOGGER.info( "Total charge: %5.2f -> %5.2f", old_total_charge, new_total_charge) - expected_results = pd.DataFrame(PARTIAL_CHARGE_RESULTS[input_mol2]) + expected_results = pd.DataFrame(PARAMETER_RESULTS[input_mol2]) expected_results = expected_results.set_index("name") - diff_results = test_results - expected_results - _LOGGER.debug( - "Difference between test and expected results:\n%s", - diff_results.to_string()) assert_almost_equal( test_results["charge"].to_numpy(), expected_results["charge"].to_numpy()) + assert_almost_equal( + test_results["radius"].to_numpy(), + expected_results["radius"].to_numpy(), verbose=True) @pytest.mark.parametrize("input_mol2", ALL_LIGANDS) def test_formal_charge(input_mol2): """Testing formal charge calculation.""" - ligand = parameterize.ParameterizedMolecule() + ligand = Mol2Molecule() mol2_path = Path("tests/data") / input_mol2 with open(mol2_path, "rt") as mol2_file: ligand.read(mol2_file) @@ -91,7 +89,7 @@ def test_formal_charge(input_mol2): @pytest.mark.parametrize("input_mol2", TORSION_RESULTS) def test_torsions(input_mol2): """Test assignment of torsion angles.""" - ligand = parameterize.ParameterizedMolecule() + ligand = Mol2Molecule() mol2_path = Path("tests/data") / input_mol2 with open(mol2_path, "rt") as mol2_file: ligand.read(mol2_file) @@ -125,7 +123,7 @@ def test_torsions(input_mol2): @pytest.mark.parametrize("input_mol2", ALL_LIGANDS) def test_rings(input_mol2): """Test assignment of torsion angles.""" - ligand = parameterize.ParameterizedMolecule() + ligand = Mol2Molecule() mol2_path = Path("tests/data") / input_mol2 with open(mol2_path, "rt") as mol2_file: ligand.read(mol2_file) From 50b1180ab420e280565ad1056bb4fab3a5950ce1 Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Sun, 28 Jun 2020 17:56:27 -0700 Subject: [PATCH 27/31] Fix documentation. Missing argument in docstring. --- pdb2pqr/pdb2pqr/input_output.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pdb2pqr/pdb2pqr/input_output.py b/pdb2pqr/pdb2pqr/input_output.py index a663ba004..8a5802741 100644 --- a/pdb2pqr/pdb2pqr/input_output.py +++ b/pdb2pqr/pdb2pqr/input_output.py @@ -92,6 +92,7 @@ def print_pqr_header(pdblist, atomlist, reslist, charge, force_field, ph_calc_me """Print the header for the PQR file Args: + pdblist: list of lines from original PDB with header atomlist: A list of atoms that were unable to have charges assigned (list) reslist: A list of residues with non-integral charges (list) charge: The total charge on the protein (float) From d8a4bee4b64df38cb5b856a3fe86e5586fe81cbe Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Sun, 28 Jun 2020 17:56:59 -0700 Subject: [PATCH 28/31] Make ligand atom names consistent with PDB file. --- pdb2pqr/tests/data/1HPX-ligand.mol2 | 44 ++++++++++++++--------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/pdb2pqr/tests/data/1HPX-ligand.mol2 b/pdb2pqr/tests/data/1HPX-ligand.mol2 index 6514b53bd..061f2154c 100644 --- a/pdb2pqr/tests/data/1HPX-ligand.mol2 +++ b/pdb2pqr/tests/data/1HPX-ligand.mol2 @@ -62,32 +62,32 @@ USER_CHARGES 54 H7 6.6440 6.0890 9.2580 H 1 UNK 0.000000 55 H8 8.6070 6.6600 7.9340 H 1 UNK 0.000000 56 H9 9.5090 5.1250 6.2510 H 1 UNK 0.000000 - 57 1H10 4.9080 5.4400 10.2800 H 1 UNK 0.000000 - 58 2H10 6.0650 4.2720 10.9200 H 1 UNK 0.000000 - 59 1H12 2.3020 0.1560 10.2490 H 1 UNK 0.000000 - 60 2H12 1.5120 1.7280 10.1470 H 1 UNK 0.000000 + 57 H101 4.9080 5.4400 10.2800 H 1 UNK 0.000000 + 58 H102 6.0650 4.2720 10.9200 H 1 UNK 0.000000 + 59 H121 2.3020 0.1560 10.2490 H 1 UNK 0.000000 + 60 H122 1.5120 1.7280 10.1470 H 1 UNK 0.000000 61 H14 5.4270 -0.8780 13.7600 H 1 UNK 0.000000 62 H15 3.6200 -0.2630 15.5740 H 1 UNK 0.000000 - 63 1H16 7.4370 -0.0070 14.9100 H 1 UNK 0.000000 - 64 2H16 6.7590 1.5900 14.9110 H 1 UNK 0.000000 + 63 H161 7.4370 -0.0070 14.9100 H 1 UNK 0.000000 + 64 H162 6.7590 1.5900 14.9110 H 1 UNK 0.000000 65 H18 5.5970 -3.7540 17.5730 H 1 UNK 0.000000 - 66 1H19 3.8610 -5.2070 17.1640 H 1 UNK 0.000000 - 67 2H19 4.9340 -5.9150 15.9680 H 1 UNK 0.000000 - 68 1H20 2.6870 -2.2030 15.7120 H 1 UNK 0.000000 - 69 2H20 3.7050 -2.1860 14.2570 H 1 UNK 0.000000 - 70 1H23 10.6090 -3.2990 15.2650 H 1 UNK 0.000000 - 71 2H23 9.4700 -2.5590 16.3820 H 1 UNK 0.000000 - 72 3H23 9.0370 -2.7940 14.6990 H 1 UNK 0.000000 - 73 1H24 10.1130 -5.6140 14.2890 H 1 UNK 0.000000 - 74 2H24 8.4540 -5.2410 13.8640 H 1 UNK 0.000000 - 75 3H24 8.7950 -6.5820 14.9380 H 1 UNK 0.000000 - 76 1H25 10.8970 -5.3850 16.6790 H 1 UNK 0.000000 - 77 2H25 9.4870 -6.1170 17.4210 H 1 UNK 0.000000 - 78 3H25 9.9560 -4.4740 17.8460 H 1 UNK 0.000000 + 66 H191 3.8610 -5.2070 17.1640 H 1 UNK 0.000000 + 67 H192 4.9340 -5.9150 15.9680 H 1 UNK 0.000000 + 68 H201 2.6870 -2.2030 15.7120 H 1 UNK 0.000000 + 69 H202 3.7050 -2.1860 14.2570 H 1 UNK 0.000000 + 70 H231 10.6090 -3.2990 15.2650 H 1 UNK 0.000000 + 71 H232 9.4700 -2.5590 16.3820 H 1 UNK 0.000000 + 72 H233 9.0370 -2.7940 14.6990 H 1 UNK 0.000000 + 73 H241 10.1130 -5.6140 14.2890 H 1 UNK 0.000000 + 74 H242 8.4540 -5.2410 13.8640 H 1 UNK 0.000000 + 75 H243 8.7950 -6.5820 14.9380 H 1 UNK 0.000000 + 76 H251 10.8970 -5.3850 16.6790 H 1 UNK 0.000000 + 77 H252 9.4870 -6.1170 17.4210 H 1 UNK 0.000000 + 78 H253 9.9560 -4.4740 17.8460 H 1 UNK 0.000000 79 H26 2.5370 2.2260 12.4440 H 1 UNK 0.000000 - 80 1H27 -0.9880 1.9690 12.8550 H 1 UNK 0.000000 - 81 2H27 -0.5410 2.3770 11.1910 H 1 UNK 0.000000 - 82 3H27 0.5380 2.8110 12.5230 H 1 UNK 0.000000 + 80 H271 -0.9880 1.9690 12.8550 H 1 UNK 0.000000 + 81 H272 -0.5410 2.3770 11.1910 H 1 UNK 0.000000 + 82 H273 0.5380 2.8110 12.5230 H 1 UNK 0.000000 83 H29 6.9930 3.0000 12.9840 H 1 UNK 0.000000 84 H30 7.9750 3.3650 10.7580 H 1 UNK 0.000000 85 H31 8.8870 1.4780 9.4630 H 1 UNK 0.000000 From 4a59cf2a7217a2bdb72f1560146e064362ef262b Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Sun, 28 Jun 2020 17:57:36 -0700 Subject: [PATCH 29/31] Restore ligand functionality to PDB2PQR. --- pdb2pqr/pdb2pqr/main.py | 68 +++++++++++++++++++++++++++--------- pdb2pqr/tests/ligand_test.py | 7 ++-- 2 files changed, 57 insertions(+), 18 deletions(-) diff --git a/pdb2pqr/pdb2pqr/main.py b/pdb2pqr/pdb2pqr/main.py index 89752e3c8..232841867 100644 --- a/pdb2pqr/pdb2pqr/main.py +++ b/pdb2pqr/pdb2pqr/main.py @@ -9,6 +9,7 @@ import argparse from collections import OrderedDict from tempfile import NamedTemporaryFile from pathlib import Path +from math import isclose import pandas import propka.lib from propka.parameters import Parameters @@ -30,6 +31,10 @@ _LOGGER = logging.getLogger("PDB2PQR%s" % VERSION) _LOGGER.addFilter(io.DuplicateFilter()) +# Round-off error when determining if charge is integral +CHARGE_ERROR = 1e-3 + + def build_parser(): """Build an argument parser. @@ -266,10 +271,9 @@ def setup_molecule(pdblist, definition, ligand_path): ligand = Mol2Molecule() with open(ligand_path, "rt", encoding="utf-8") as ligand_file: ligand.read(ligand_file) - raise NotImplementedError("Where do initial ligand charges come from?") else: - protein = prot.Protein(pdblist, definition) ligand = None + protein = prot.Protein(pdblist, definition) _LOGGER.info("Created protein object with %d residues and %d atoms.", len(protein.residues), len(protein.atoms)) for residue in protein.residues: @@ -397,13 +401,14 @@ def run_propka(args, protein): return df, pka_filename -def non_trivial(args, protein, definition, is_cif): +def non_trivial(args, protein, ligand, definition, is_cif): """Perform a non-trivial PDB2PQR run. Args: args: argparse namespace. protein: Protein object. This is not actually specific to proteins... Nucleic acids are biomolecules, too! + ligand: Mol2Molecule object or None definition: Definition object for topology. is_cif: Boolean indicating whether file is CIF format. Returns: @@ -471,9 +476,38 @@ def non_trivial(args, protein, definition, is_cif): if args.ligand is not None: _LOGGER.info("Processing ligand.") - raise NotImplementedError( - "Got argument --ligand=%s but ligand support not implemented" % - args.ligand) + _LOGGER.warning("Using ZAP9 forcefield for ligand radii.") + ligand.assign_parameters() + missing_atoms = [] + lig_atoms = [] + for residue in protein.residues: + tot_charge = 0 + for pdb_atom in residue.atoms: + # Only check residues with HETATM + if pdb_atom.type == "ATOM": + break + try: + mol2_atom = ligand.atoms[pdb_atom.name] + pdb_atom.radius = mol2_atom.radius + pdb_atom.ffcharge = mol2_atom.charge + tot_charge += mol2_atom.charge + lig_atoms.append(pdb_atom) + except KeyError: + err = ( + "Can't find HETATM {r.name} {r.res_seq} {a.name} " + "in MOL2 file").format(r=residue, a=pdb_atom) + _LOGGER.warning(err) + missing_atoms.append(pdb_atom) + + matched_atoms = hitlist + lig_atoms + + for residue in protein.residues: + if not isclose( + residue.charge, int(residue.charge), abs_tol=CHARGE_ERROR): + err = ( + "Residue {r.name} {r.res_seq} charge is " + "non-integer: {r.charge}").format(r=residue) + raise ValueError(err) if args.ffout is not None: _LOGGER.info("Applying custom naming scheme (%s).", args.ffout) @@ -486,18 +520,19 @@ def non_trivial(args, protein, definition, is_cif): _LOGGER.info("Regenerating headers.") reslist, charge = protein.charge if is_cif: - header = io.print_pqr_header_cif(misslist, reslist, charge, args.ff, - args.pka_method, args.ph, args.ffout, - include_old_header=args.include_header) + header = io.print_pqr_header_cif( + missing_atoms, reslist, charge, args.ff, args.pka_method, args.ph, + args.ffout, include_old_header=args.include_header) else: - header = io.print_pqr_header(protein.pdblist, misslist, reslist, charge, - args.ff, args.pka_method, args.ph, args.ffout, - include_old_header=args.include_header) + header = io.print_pqr_header( + protein.pdblist, missing_atoms, reslist, charge, args.ff, + args.pka_method, args.ph, args.ffout, + include_old_header=args.include_header) _LOGGER.info("Regenerating PDB lines.") - lines = io.print_protein_atoms(hitlist, args.keep_chain) + lines = io.print_protein_atoms(matched_atoms, args.keep_chain) - return {"lines": lines, "header": header, "missed_residues": misslist} + return {"lines": lines, "header": header, "missed_residues": missing_atoms} def main(args): @@ -538,8 +573,9 @@ def main(args): results = {"header": "", "missed_residues": None, "protein": protein, "lines": io.print_protein_atoms(protein.atoms, args.keep_chain)} else: - results = non_trivial(args=args, protein=protein, definition=definition, - is_cif=is_cif) + results = non_trivial( + args=args, protein=protein, ligand=ligand, definition=definition, + is_cif=is_cif) print_pqr(args=args, pqr_lines=results["lines"], header_lines=results["header"], missing_lines=results["missed_residues"], is_cif=is_cif) diff --git a/pdb2pqr/tests/ligand_test.py b/pdb2pqr/tests/ligand_test.py index 46bd6319b..4a7ab52f9 100644 --- a/pdb2pqr/tests/ligand_test.py +++ b/pdb2pqr/tests/ligand_test.py @@ -152,12 +152,15 @@ def test_rings(input_mol2): _LOGGER.debug(str_) -@pytest.mark.parametrize("input_pdb", ["1HPX", "1QBS", "1US0"], ids=str) +@pytest.mark.parametrize( + "input_pdb", ["1HPX"], ids=str) def test_ligand_protein(input_pdb, tmp_path): """PROPKA non-regression tests on proteins without ligands.""" - ligand = Path("tests/data") / ("%s-ligand.mol2" % input_pdb) + input_pdb = Path(input_pdb) + ligand = Path("tests/data") / ("%s-ligand.mol2" % input_pdb.stem) args = "--log-level=INFO --ff=AMBER --drop-water --ligand=%s" % ligand output_pqr = Path(input_pdb).stem + ".pqr" + _LOGGER.debug("Running test in %s", tmp_path) common.run_pdb2pqr( args=args, input_pdb=input_pdb, output_pqr=output_pqr, tmp_path=tmp_path) From 2913ae8f8b286eb4bd09edc4f69c6aa02991d6c7 Mon Sep 17 00:00:00 2001 From: intendo <> Date: Mon, 29 Jun 2020 09:13:21 -0700 Subject: [PATCH 30/31] Fixed some pylint warnings. --- pdb2pqr/pdb2pqr/ligand/__init__.py | 8 ++++---- pdb2pqr/pdb2pqr/ligand/mol2.py | 9 ++++----- pdb2pqr/pdb2pqr/ligand/peoe.py | 7 +++---- pdb2pqr/pdb2pqr/ligand/topology.py | 2 +- pdb2pqr/pdb2pqr/main.py | 4 ++-- pdb2pqr/pdb2pqr/pdb.py | 6 +++--- pdb2pqr/setup.py | 14 +++++++++----- pdb2pqr/tests/ligand_results.py | 3 +-- pdb2pqr/tests/ligand_test.py | 3 +-- 9 files changed, 28 insertions(+), 28 deletions(-) diff --git a/pdb2pqr/pdb2pqr/ligand/__init__.py b/pdb2pqr/pdb2pqr/ligand/__init__.py index 50ae58a92..b87f64d4a 100644 --- a/pdb2pqr/pdb2pqr/ligand/__init__.py +++ b/pdb2pqr/pdb2pqr/ligand/__init__.py @@ -15,7 +15,7 @@ assert sys.version_info >= (3, 5) # first and then the generic element should be used RADII = { # NOTE - these are not the original PARSE radii but they are the ones - # included in the previous version of PDB2PKA so I'm preserving them for + # included in the previous version of PDB2PKA so I'm preserving them for # posterity. There's a claim they came from # http://amber.ch.ic.ac.uk/archive/ but that link no longer works. "not parse - do not use": { @@ -38,10 +38,10 @@ RADII = { "C": 1.7, "H": 1.20, "O.co2": 1.52, "N": 1.55, "S": 1.80, "F": 1.47, "Cl": 1.75, "I": 1.98}, # These are the Bondi radii from Table I of - # http://doi.org/10.1021/j100785a001. NOTE - there are some variations to + # http://doi.org/10.1021/j100785a001. NOTE - there are some variations to # the halogens in Table V that we might want to consider in the future. "bondi": { - "H": 1.20, "He": 1.40, "C": 1.70, "N": 1.55, "O": 1.52, "F": 1.47, + "H": 1.20, "He": 1.40, "C": 1.70, "N": 1.55, "O": 1.52, "F": 1.47, "Ne": 1.54, "Si": 2.10, "P": 1.80, "S": 1.80, "Cl": 1.75, "Ar": 1.88, "As": 1.85, "Se": 1.90, "Br": 1.85, "Kr": 2.02, "Te": 2.06, "I": 1.98, "Xe": 2.16} @@ -69,7 +69,7 @@ for group, elem_list in ELEMENT_BY_GROUP.items(): # Numbers of non-bonded electrons for Sybyl-type atoms. Adapted from # https://onlinelibrary.wiley.com/doi/abs/10.1002/jcc.540100804 (Table I). NONBONDED_BY_TYPE = { - "Al": 0, "Br": 6, "C.1": 0, "C.2": 0, "C.3": 0, "C.ar": 0, "Ca": 0, + "Al": 0, "Br": 6, "C.1": 0, "C.2": 0, "C.3": 0, "C.ar": 0, "Ca": 0, "Cl": 6, "F": 6, "H": 0, "I": 6, "K": 0, "Li": 0, "N.1": 2, "N.2": 2, "N.3": 2, "N.4": 0, "N.am": 0, "N.ar": 2, "N.pl3": 0, "Na": 0, "O.2": 4, "O.3": 4, "P.3": 0, "S.2": 4, "S.3": 4, "S.o": 2, "S.o2": 0, diff --git a/pdb2pqr/pdb2pqr/ligand/mol2.py b/pdb2pqr/pdb2pqr/ligand/mol2.py index e2c4ad820..8b93db842 100644 --- a/pdb2pqr/pdb2pqr/ligand/mol2.py +++ b/pdb2pqr/pdb2pqr/ligand/mol2.py @@ -110,7 +110,7 @@ class Mol2Atom: def assign_radius(self, primary_dict, secondary_dict): """Assign radius to atom. - TODO - it seems inconsistent that this function pulls radii from a + TODO - it seems inconsistent that this function pulls radii from a dictionary and the protein routines use force field files. Args: @@ -250,8 +250,8 @@ class Mol2Molecule: self.res_name = None self.res_seq = None - def assign_parameters( - self, primary_dict=RADII["zap9"], secondary_dict=RADII["bondi"]): + def assign_parameters(self, primary_dict=RADII["zap9"], + secondary_dict=RADII["bondi"]): """Assign charges and radii to atoms in molecule. Args: @@ -263,8 +263,7 @@ class Mol2Molecule: self.assign_radii(primary_dict, secondary_dict) self.assign_charges() - def assign_radii( - self, primary_dict, secondary_dict): + def assign_radii(self, primary_dict, secondary_dict): """Assign radii to atoms in molecule. Args: diff --git a/pdb2pqr/pdb2pqr/ligand/peoe.py b/pdb2pqr/pdb2pqr/ligand/peoe.py index 0a6aa08f4..1b54c0160 100644 --- a/pdb2pqr/pdb2pqr/ligand/peoe.py +++ b/pdb2pqr/pdb2pqr/ligand/peoe.py @@ -43,7 +43,7 @@ POLY_TERMS = { 'S.O2': (10.13 + 0.5, 9.13, 1.38, 20.65), 'P.3': (10.13 + 0.5, 9.13, 1.38, 20.65) } -# Maximum (absolute) value of charge after which contribution to polynomial +# Maximum (absolute) value of charge after which contribution to polynomial # is capped MAX_CHARGE = 1.1 DEFAULT_H_ELECTRONEG = 20.02 @@ -116,9 +116,8 @@ def assign_terms(atoms, term_dict): return atoms -def equilibrate( - atoms, damp=DAMPING_FACTOR, scale=SCALING_FACTOR, - num_cycles=NUM_CYCLES, term_dict=POLY_TERMS): +def equilibrate(atoms, damp=DAMPING_FACTOR, scale=SCALING_FACTOR, + num_cycles=NUM_CYCLES, term_dict=POLY_TERMS): """Equilibrate the atomic charges. Args: diff --git a/pdb2pqr/pdb2pqr/ligand/topology.py b/pdb2pqr/pdb2pqr/ligand/topology.py index 34993b9ce..746c80492 100644 --- a/pdb2pqr/pdb2pqr/ligand/topology.py +++ b/pdb2pqr/pdb2pqr/ligand/topology.py @@ -17,4 +17,4 @@ class Topology: self.atom_dict = {} for atom in molecule.atoms: self.atom_dict[atom.name] = atom - raise NotImplementedError() \ No newline at end of file + raise NotImplementedError() diff --git a/pdb2pqr/pdb2pqr/main.py b/pdb2pqr/pdb2pqr/main.py index 232841867..10c39633c 100644 --- a/pdb2pqr/pdb2pqr/main.py +++ b/pdb2pqr/pdb2pqr/main.py @@ -20,7 +20,7 @@ from . import debump from . import hydrogens from . import forcefield from . import protein as prot -from . import input_output as io +from . import input_output as io from .ligand.mol2 import Mol2Molecule from . import input_output as io from .config import VERSION, TITLE_FORMAT_STRING, CITATIONS, FORCE_FIELDS @@ -377,7 +377,7 @@ def run_propka(args, protein): molecule.calculate_pka() pka_filename = Path(pdb_path).stem + ".pka" - molecule.write_pka(filename = pka_filename) + molecule.write_pka(filename=pka_filename) conformation = molecule.conformations["AVR"] rows = [] diff --git a/pdb2pqr/pdb2pqr/pdb.py b/pdb2pqr/pdb2pqr/pdb.py index d0bcb5095..f286e6e19 100644 --- a/pdb2pqr/pdb2pqr/pdb.py +++ b/pdb2pqr/pdb2pqr/pdb.py @@ -1915,16 +1915,16 @@ def read_pdb(file_): _LOGGER.error("<%s>", line.strip()) _LOGGER.error("Truncating remaining errors for record type:%s", record) except IndexError as details: - if record == "ATOM" or record == "HETATM": + if "ATOM" in record or "HETATM" in record: try: obj = read_atom(line) pdblist.append(obj) except Exception as details: _LOGGER.error("Error parsing line: %s,", details) _LOGGER.error("<%s>", line.strip()) - elif record == "SITE" or record == "TURN": + elif "SITE" in record or "TURN" in record: pass - elif record == "SSBOND" or record == "LINK": + elif "SSBOND" in record or "LINK" in record: _LOGGER.error("Warning -- ignoring record:") _LOGGER.error("<%s>", line.strip()) else: diff --git a/pdb2pqr/setup.py b/pdb2pqr/setup.py index 5fb72076f..31cc6889d 100644 --- a/pdb2pqr/setup.py +++ b/pdb2pqr/setup.py @@ -1,10 +1,14 @@ #!/usr/bin/python3 """ -The use of continuum solvation methods such as APBS requires accurate and complete structural data as well as force field parameters such as atomic charges and radii. -Unfortunately, the limiting step in continuum electrostatics calculations is often the addition of missing atomic coordinates to molecular structures from the Protein -Data Bank and the assignment of parameters to these structures. To adds this problem, we have developed PDB2PQR. This software automates many of the common tasks of -preparing structures for continuum solvation calculations as well as many other types of biomolecular structure modeling, analysis, and simulation. These tasks include: +The use of continuum solvation methods such as APBS requires accurate and complete +structural data as well as force field parameters such as atomic charges and radii. +Unfortunately, the limiting step in continuum electrostatics calculations is often +the addition of missing atomic coordinates to molecular structures from the Protein +Data Bank and the assignment of parameters to these structures. To adds this problem, +we have developed PDB2PQR. This software automates many of the common tasks of +preparing structures for continuum solvation calculations as well as many other types +of biomolecular structure modeling, analysis, and simulation. These tasks include: * Adding a limited number of missing heavy (non-hydrogen) atoms to biomolecular structures. * Estimating titration states and protonating biomolecules in a manner consistent with favorable hydrogen bonding. @@ -18,7 +22,7 @@ biomolecular solvation and electrostatics analyses to the biomedical community. import sys import setuptools -if sys.version_info[:2] < (3,6): +if sys.version_info[:2] < (3, 6): raise RuntimeError("Python version >= 3.6 is required.") with open("README.md", "r") as f: diff --git a/pdb2pqr/tests/ligand_results.py b/pdb2pqr/tests/ligand_results.py index dc6f7e316..b222ea4a8 100644 --- a/pdb2pqr/tests/ligand_results.py +++ b/pdb2pqr/tests/ligand_results.py @@ -35,7 +35,6 @@ RING_RESULTS = { "ethanol.mol2": set(), "glycerol.mol2": set(), "tetramethylammonium.mol2": set(), - "naphthalene.mol2": set(), "acetate.mol2": set(), "acetonitrile.mol2": set(), "acetylcholine.mol2": set(), @@ -125,7 +124,7 @@ PARAMETER_RESULTS = { {'name': 'C20', 'type': 'C.3', 'charge': 0.05373224349483613, 'radius': 1.87}, {'name': 'C21', 'type': 'C.2', 'charge': 0.14236781888812639, 'radius': 1.87}, {'name': 'C22', 'type': 'C.3', 'charge': -0.018858356195459093, 'radius': 1.87}, - {'name': 'C23','type': 'C.3', 'charge': -0.08872387572707603, 'radius': 1.87}, + {'name': 'C23', 'type': 'C.3', 'charge': -0.08872387572707603, 'radius': 1.87}, {'name': 'C24', 'type': 'C.3', 'charge': -0.08872387572707603, 'radius': 1.87}, {'name': 'C25', 'type': 'C.3', 'charge': -0.08872387572707603, 'radius': 1.87}, {'name': 'C26', 'type': 'C.3', 'charge': 0.10593153852032608, 'radius': 1.87}, diff --git a/pdb2pqr/tests/ligand_test.py b/pdb2pqr/tests/ligand_test.py index 4a7ab52f9..772baa54b 100644 --- a/pdb2pqr/tests/ligand_test.py +++ b/pdb2pqr/tests/ligand_test.py @@ -6,7 +6,6 @@ import pytest import pandas as pd from numpy.testing import assert_almost_equal from pdb2pqr.ligand.mol2 import Mol2Molecule -from pdb2pqr.ligand import RADII import common from ligand_results import TORSION_RESULTS, RING_RESULTS from ligand_results import FORMAL_CHARGE_RESULTS, PARAMETER_RESULTS @@ -196,4 +195,4 @@ def test_ligand_protein(input_pdb, tmp_path): # """Test ligand handling.""" # args_ = "{args} --ligand={ligand}".format(args=args, ligand=input_mol2) # run_pdb2pqr(args_, input_pdb, output_pqr, tmp_path) -# _LOGGER.warning("This test needs better checking to avoid silent failure.") \ No newline at end of file +# _LOGGER.warning("This test needs better checking to avoid silent failure.") From f472da5d220acb1503fdb4b7bd445281eb7aca8c Mon Sep 17 00:00:00 2001 From: intendo <> Date: Mon, 29 Jun 2020 09:47:03 -0700 Subject: [PATCH 31/31] Corrected comparison of strings --- pdb2pqr/pdb2pqr/pdb.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pdb2pqr/pdb2pqr/pdb.py b/pdb2pqr/pdb2pqr/pdb.py index f286e6e19..d0bcb5095 100644 --- a/pdb2pqr/pdb2pqr/pdb.py +++ b/pdb2pqr/pdb2pqr/pdb.py @@ -1915,16 +1915,16 @@ def read_pdb(file_): _LOGGER.error("<%s>", line.strip()) _LOGGER.error("Truncating remaining errors for record type:%s", record) except IndexError as details: - if "ATOM" in record or "HETATM" in record: + if record == "ATOM" or record == "HETATM": try: obj = read_atom(line) pdblist.append(obj) except Exception as details: _LOGGER.error("Error parsing line: %s,", details) _LOGGER.error("<%s>", line.strip()) - elif "SITE" in record or "TURN" in record: + elif record == "SITE" or record == "TURN": pass - elif "SSBOND" in record or "LINK" in record: + elif record == "SSBOND" or record == "LINK": _LOGGER.error("Warning -- ignoring record:") _LOGGER.error("<%s>", line.strip()) else: