From 9824a9d964285b3a6cdf6667803caec0527d75a8 Mon Sep 17 00:00:00 2001 From: Juuso Lehtivarjo Date: Wed, 18 Sep 2024 06:59:02 +0300 Subject: [PATCH] Expose MMFF aromaticity model for SetAromaticity (#7765) * Expose MMFF aromaticity model for SetAromaticity * Test, backward compatibility, missing AromaticityModel wrapper --- Code/GraphMol/Aromaticity.cpp | 212 ++++++++++++++++++ .../ForceFieldHelpers/MMFF/AtomTyper.cpp | 174 +------------- .../ForceFieldHelpers/MMFF/AtomTyper.h | 1 + Code/GraphMol/MolOps.h | 4 + Code/GraphMol/Wrap/MolOps.cpp | 1 + Code/GraphMol/molopstest.cpp | 34 +++ ReleaseNotes.md | 1 + 7 files changed, 256 insertions(+), 171 deletions(-) diff --git a/Code/GraphMol/Aromaticity.cpp b/Code/GraphMol/Aromaticity.cpp index 437d211d1..8aa827ee9 100644 --- a/Code/GraphMol/Aromaticity.cpp +++ b/Code/GraphMol/Aromaticity.cpp @@ -785,6 +785,41 @@ int mdlAromaticityHelper(RWMol &mol, const VECT_INT_VECT &srings) { return narom; } +int mmff94AromaticityHelper(RWMol &mol, const VECT_INT_VECT &srings) { + + // set aromaticity as done in MMFF94 init + if (!mol.hasProp(common_properties::_MMFFSanitized)) { + bool isAromaticSet = false; + for (const auto atom : mol.atoms()) { + if (atom->getIsAromatic()) { + isAromaticSet = true; + break; + } + } + if (isAromaticSet) { + MolOps::Kekulize(mol, true); + } + mol.setProp(common_properties::_MMFFSanitized, 1, true); + } + setMMFFAromaticity(mol); + + // count aromatic rings for return value + int narom = 1; + for (auto &sring : srings) { + bool isAromRing = true; + for (auto &aid : sring) { + Atom *atom = mol.getAtomWithIdx(aid); + if (!atom->getIsAromatic()) { + isAromRing = false; + break; + } + } + if (isAromRing) narom++; + } + + return narom; +} + // use minRingSize=0 or maxRingSize=0 to ignore these constraints int aromaticityHelper(RWMol &mol, const VECT_INT_VECT &srings, unsigned int minRingSize, unsigned int maxRingSize, @@ -905,6 +940,180 @@ int aromaticityHelper(RWMol &mol, const VECT_INT_VECT &srings, } // end of anonymous namespace +// sets the aromaticity flags according to MMFF +void setMMFFAromaticity(RWMol &mol) { + bool moveToNextRing = false; + bool isNOSinRing = false; + bool aromRingsAllSet = false; + bool exoDoubleBond = false; + bool canBeAromatic = false; + unsigned int i; + unsigned int j; + unsigned int nextInRing; + unsigned int pi_e = 0; + int nAromSet = 0; + int old_nAromSet = -1; + RingInfo *ringInfo = mol.getRingInfo(); + Atom *atom; + Bond *bond; + const VECT_INT_VECT &atomRings = ringInfo->atomRings(); + ROMol::ADJ_ITER nbrIdx; + ROMol::ADJ_ITER endNbrs; + boost::dynamic_bitset<> aromBitVect(mol.getNumAtoms()); + boost::dynamic_bitset<> aromRingBitVect(atomRings.size()); + + while ((!aromRingsAllSet) && atomRings.size() && (nAromSet > old_nAromSet)) { + // loop over all rings + for (i = 0; i < atomRings.size(); ++i) { + // add 2 pi electrons for each double bond in the ring + for (j = 0, pi_e = 0, moveToNextRing = false, isNOSinRing = false, + exoDoubleBond = false; + (!moveToNextRing) && (j < atomRings[i].size()); ++j) { + atom = mol.getAtomWithIdx(atomRings[i][j]); + // remember if this atom is nitrogen, oxygen or divalent sulfur + if ((atom->getAtomicNum() == 7) || (atom->getAtomicNum() == 8) || + ((atom->getAtomicNum() == 16) && (atom->getDegree() == 2))) { + isNOSinRing = true; + } + // check whether this atom is double-bonded to next one in the ring + nextInRing = (j == (atomRings[i].size() - 1)) ? atomRings[i][0] + : atomRings[i][j + 1]; + if (mol.getBondBetweenAtoms(atomRings[i][j], nextInRing) + ->getBondType() == Bond::DOUBLE) { + pi_e += 2; + } + // if this is not a double bond, check whether this is carbon + // or nitrogen with total bond order = 4 + else { + atom = mol.getAtomWithIdx(atomRings[i][j]); + // if not, move on + if ((atom->getAtomicNum() != 6) && + (!((atom->getAtomicNum() == 7) && + ((atom->getExplicitValence() + atom->getNumImplicitHs()) == + 4)))) { + continue; + } + // loop over neighbors + boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(atom); + for (; nbrIdx != endNbrs; ++nbrIdx) { + const Atom *nbrAtom = mol[*nbrIdx]; + // if the neighbor is one of the ring atoms, skip it + // since we are looking for exocyclic neighbors + if (std::find(atomRings[i].begin(), atomRings[i].end(), + nbrAtom->getIdx()) != atomRings[i].end()) { + continue; + } + // it the neighbor is single-bonded, skip it + if (mol.getBondBetweenAtoms(atomRings[i][j], nbrAtom->getIdx()) + ->getBondType() == Bond::SINGLE) { + continue; + } + // if the neighbor is in a ring and its aromaticity + // bit has not yet been set, then move to the next ring + // we'll take care of this later + if (queryIsAtomInRing(nbrAtom) && + (!(aromBitVect[nbrAtom->getIdx()]))) { + moveToNextRing = true; + break; + } + // if the neighbor is in an aromatic ring and is + // double-bonded to the current atom, add 1 pi electron + if (mol.getBondBetweenAtoms(atomRings[i][j], nbrAtom->getIdx()) + ->getBondType() == Bond::DOUBLE) { + if (nbrAtom->getIsAromatic()) { + ++pi_e; + } else { + exoDoubleBond = true; + } + } + } + } + } + // if we quit the loop at an early stage because aromaticity + // had not yet been set, then move to the next ring + if (moveToNextRing) { + continue; + } + // loop again over all ring atoms + for (j = 0, canBeAromatic = true; j < atomRings[i].size(); ++j) { + // set aromaticity as perceived + aromBitVect[atomRings[i][j]] = 1; + atom = mol.getAtomWithIdx(atomRings[i][j]); + // if this is is a non-sp2 carbon or nitrogen + // then this ring can't be aromatic + if (((atom->getAtomicNum() == 6) || (atom->getAtomicNum() == 7)) && + (atom->getHybridization() != Atom::SP2)) { + canBeAromatic = false; + } + } + // if this ring can't be aromatic, move to the next one + if (!canBeAromatic) { + continue; + } + // if there is N, O, S; no exocyclic double bonds; + // the ring has an odd number of terms: add 2 pi electrons + if (isNOSinRing && (!exoDoubleBond) && (atomRings[i].size() % 2)) { + pi_e += 2; + } + // if this ring satisfies the 4n+2 rule, + // then mark its atoms as aromatic + if ((pi_e > 2) && (!((pi_e - 2) % 4))) { + aromRingBitVect[i] = 1; + for (j = 0; j < atomRings[i].size(); ++j) { + atom = mol.getAtomWithIdx(atomRings[i][j]); + atom->setIsAromatic(true); + } + } + } + // termination criterion: if we did not manage to set any more + // aromatic atoms compared to the previous iteration, then + // stop looping + old_nAromSet = nAromSet; + nAromSet = 0; + aromRingsAllSet = true; + for (i = 0; i < atomRings.size(); ++i) { + for (j = 0; j < atomRings[i].size(); ++j) { + if (aromBitVect[atomRings[i][j]]) { + ++nAromSet; + } else { + aromRingsAllSet = false; + } + } + } + } + for (i = 0; i < atomRings.size(); ++i) { + // if the ring is not aromatic, move to the next one + if (!aromRingBitVect[i]) { + continue; + } + for (j = 0; j < atomRings[i].size(); ++j) { + // mark all ring bonds as aromatic + nextInRing = (j == (atomRings[i].size() - 1)) ? atomRings[i][0] + : atomRings[i][j + 1]; + bond = mol.getBondBetweenAtoms(atomRings[i][j], nextInRing); + bond->setBondType(Bond::AROMATIC); + bond->setIsAromatic(true); + } + } + for (i = 0; i < atomRings.size(); ++i) { + // if the ring is not aromatic, move to the next one + if (!aromRingBitVect[i]) { + continue; + } + for (j = 0; j < atomRings[i].size(); ++j) { + atom = mol.getAtomWithIdx(atomRings[i][j]); + if (atom->getAtomicNum() != 6) { + int iv = atom->calcImplicitValence(false); + atom->calcExplicitValence(false); + if (iv) { + atom->setNumExplicitHs(iv); + atom->calcImplicitValence(false); + } + } + } + } +} + int setAromaticity(RWMol &mol, AromaticityModel model, int (*func)(RWMol &)) { // This function used to check if the input molecule came // with aromaticity information, assumed it is correct and @@ -930,6 +1139,9 @@ int setAromaticity(RWMol &mol, AromaticityModel model, int (*func)(RWMol &)) { case AROMATICITY_MDL: res = mdlAromaticityHelper(mol, srings); break; + case AROMATICITY_MMFF94: + res = mmff94AromaticityHelper(mol, srings); + break; case AROMATICITY_CUSTOM: PRECONDITION( func, diff --git a/Code/GraphMol/ForceFieldHelpers/MMFF/AtomTyper.cpp b/Code/GraphMol/ForceFieldHelpers/MMFF/AtomTyper.cpp index 962cd90b8..b3f2476af 100644 --- a/Code/GraphMol/ForceFieldHelpers/MMFF/AtomTyper.cpp +++ b/Code/GraphMol/ForceFieldHelpers/MMFF/AtomTyper.cpp @@ -502,176 +502,8 @@ bool areAtomsInSameAromaticRing(const ROMol &mol, const unsigned int idx1, // sets the aromaticity flags according to MMFF void setMMFFAromaticity(RWMol &mol) { - bool moveToNextRing = false; - bool isNOSinRing = false; - bool aromRingsAllSet = false; - bool exoDoubleBond = false; - bool canBeAromatic = false; - unsigned int i; - unsigned int j; - unsigned int nextInRing; - unsigned int pi_e = 0; - int nAromSet = 0; - int old_nAromSet = -1; - RingInfo *ringInfo = mol.getRingInfo(); - Atom *atom; - Bond *bond; - const VECT_INT_VECT &atomRings = ringInfo->atomRings(); - ROMol::ADJ_ITER nbrIdx; - ROMol::ADJ_ITER endNbrs; - boost::dynamic_bitset<> aromBitVect(mol.getNumAtoms()); - boost::dynamic_bitset<> aromRingBitVect(atomRings.size()); - - while ((!aromRingsAllSet) && atomRings.size() && (nAromSet > old_nAromSet)) { - // loop over all rings - for (i = 0; i < atomRings.size(); ++i) { - // add 2 pi electrons for each double bond in the ring - for (j = 0, pi_e = 0, moveToNextRing = false, isNOSinRing = false, - exoDoubleBond = false; - (!moveToNextRing) && (j < atomRings[i].size()); ++j) { - atom = mol.getAtomWithIdx(atomRings[i][j]); - // remember if this atom is nitrogen, oxygen or divalent sulfur - if ((atom->getAtomicNum() == 7) || (atom->getAtomicNum() == 8) || - ((atom->getAtomicNum() == 16) && (atom->getDegree() == 2))) { - isNOSinRing = true; - } - // check whether this atom is double-bonded to next one in the ring - nextInRing = (j == (atomRings[i].size() - 1)) ? atomRings[i][0] - : atomRings[i][j + 1]; - if (mol.getBondBetweenAtoms(atomRings[i][j], nextInRing) - ->getBondType() == Bond::DOUBLE) { - pi_e += 2; - } - // if this is not a double bond, check whether this is carbon - // or nitrogen with total bond order = 4 - else { - atom = mol.getAtomWithIdx(atomRings[i][j]); - // if not, move on - if ((atom->getAtomicNum() != 6) && - (!((atom->getAtomicNum() == 7) && - ((atom->getExplicitValence() + atom->getNumImplicitHs()) == - 4)))) { - continue; - } - // loop over neighbors - boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(atom); - for (; nbrIdx != endNbrs; ++nbrIdx) { - const Atom *nbrAtom = mol[*nbrIdx]; - // if the neighbor is one of the ring atoms, skip it - // since we are looking for exocyclic neighbors - if (std::find(atomRings[i].begin(), atomRings[i].end(), - nbrAtom->getIdx()) != atomRings[i].end()) { - continue; - } - // it the neighbor is single-bonded, skip it - if (mol.getBondBetweenAtoms(atomRings[i][j], nbrAtom->getIdx()) - ->getBondType() == Bond::SINGLE) { - continue; - } - // if the neighbor is in a ring and its aromaticity - // bit has not yet been set, then move to the next ring - // we'll take care of this later - if (queryIsAtomInRing(nbrAtom) && - (!(aromBitVect[nbrAtom->getIdx()]))) { - moveToNextRing = true; - break; - } - // if the neighbor is in an aromatic ring and is - // double-bonded to the current atom, add 1 pi electron - if (mol.getBondBetweenAtoms(atomRings[i][j], nbrAtom->getIdx()) - ->getBondType() == Bond::DOUBLE) { - if (nbrAtom->getIsAromatic()) { - ++pi_e; - } else { - exoDoubleBond = true; - } - } - } - } - } - // if we quit the loop at an early stage because aromaticity - // had not yet been set, then move to the next ring - if (moveToNextRing) { - continue; - } - // loop again over all ring atoms - for (j = 0, canBeAromatic = true; j < atomRings[i].size(); ++j) { - // set aromaticity as perceived - aromBitVect[atomRings[i][j]] = 1; - atom = mol.getAtomWithIdx(atomRings[i][j]); - // if this is is a non-sp2 carbon or nitrogen - // then this ring can't be aromatic - if (((atom->getAtomicNum() == 6) || (atom->getAtomicNum() == 7)) && - (atom->getHybridization() != Atom::SP2)) { - canBeAromatic = false; - } - } - // if this ring can't be aromatic, move to the next one - if (!canBeAromatic) { - continue; - } - // if there is N, O, S; no exocyclic double bonds; - // the ring has an odd number of terms: add 2 pi electrons - if (isNOSinRing && (!exoDoubleBond) && (atomRings[i].size() % 2)) { - pi_e += 2; - } - // if this ring satisfies the 4n+2 rule, - // then mark its atoms as aromatic - if ((pi_e > 2) && (!((pi_e - 2) % 4))) { - aromRingBitVect[i] = 1; - for (j = 0; j < atomRings[i].size(); ++j) { - atom = mol.getAtomWithIdx(atomRings[i][j]); - atom->setIsAromatic(true); - } - } - } - // termination criterion: if we did not manage to set any more - // aromatic atoms compared to the previous iteration, then - // stop looping - old_nAromSet = nAromSet; - nAromSet = 0; - aromRingsAllSet = true; - for (i = 0; i < atomRings.size(); ++i) { - for (j = 0; j < atomRings[i].size(); ++j) { - if (aromBitVect[atomRings[i][j]]) { - ++nAromSet; - } else { - aromRingsAllSet = false; - } - } - } - } - for (i = 0; i < atomRings.size(); ++i) { - // if the ring is not aromatic, move to the next one - if (!aromRingBitVect[i]) { - continue; - } - for (j = 0; j < atomRings[i].size(); ++j) { - // mark all ring bonds as aromatic - nextInRing = (j == (atomRings[i].size() - 1)) ? atomRings[i][0] - : atomRings[i][j + 1]; - bond = mol.getBondBetweenAtoms(atomRings[i][j], nextInRing); - bond->setBondType(Bond::AROMATIC); - bond->setIsAromatic(true); - } - } - for (i = 0; i < atomRings.size(); ++i) { - // if the ring is not aromatic, move to the next one - if (!aromRingBitVect[i]) { - continue; - } - for (j = 0; j < atomRings[i].size(); ++j) { - atom = mol.getAtomWithIdx(atomRings[i][j]); - if (atom->getAtomicNum() != 6) { - int iv = atom->calcImplicitValence(false); - atom->calcExplicitValence(false); - if (iv) { - atom->setNumExplicitHs(iv); - atom->calcImplicitValence(false); - } - } - } - } + // Deprecated, moved to MolOps + MolOps::setMMFFAromaticity(mol); } // sets the MMFF atomType for a heavy atom @@ -2535,7 +2367,7 @@ MMFFMolProperties::MMFFMolProperties(ROMol &mol, const std::string &mmffVariant, d_MMFFAtomPropertiesPtrVect[i] = MMFFAtomPropertiesPtr(new MMFFAtomProperties()); } - setMMFFAromaticity((RWMol &)mol); + MolOps::setMMFFAromaticity((RWMol &)mol); RingMembershipSize rmSize(mol); for (const auto atom : mol.atoms()) { if (atom->getAtomicNum() != 1) { diff --git a/Code/GraphMol/ForceFieldHelpers/MMFF/AtomTyper.h b/Code/GraphMol/ForceFieldHelpers/MMFF/AtomTyper.h index 1684f6b1d..e1b242a09 100644 --- a/Code/GraphMol/ForceFieldHelpers/MMFF/AtomTyper.h +++ b/Code/GraphMol/ForceFieldHelpers/MMFF/AtomTyper.h @@ -217,6 +217,7 @@ RDKIT_FORCEFIELDHELPERS_EXPORT bool areAtomsInSameRingOfSize( const ROMol &mol, const unsigned int ringSize, const unsigned int numAtoms, ...); RDKIT_FORCEFIELDHELPERS_EXPORT unsigned int sanitizeMMFFMol(RWMol &mol); +//! \deprecated, please use MolOps::setMMFFAromaticity instead RDKIT_FORCEFIELDHELPERS_EXPORT void setMMFFAromaticity(RWMol &mol); RDKIT_FORCEFIELDHELPERS_EXPORT unsigned int getMMFFStretchBendType( const unsigned int angleType, const unsigned int bondType1, diff --git a/Code/GraphMol/MolOps.h b/Code/GraphMol/MolOps.h index c5a9ef38f..55d41b61f 100644 --- a/Code/GraphMol/MolOps.h +++ b/Code/GraphMol/MolOps.h @@ -541,9 +541,13 @@ typedef enum { AROMATICITY_RDKIT = 0x1, AROMATICITY_SIMPLE = 0x2, AROMATICITY_MDL = 0x4, + AROMATICITY_MMFF94 = 0x8, AROMATICITY_CUSTOM = 0xFFFFFFF ///< use a function } AromaticityModel; + +RDKIT_GRAPHMOL_EXPORT void setMMFFAromaticity(RWMol &mol); + //! Sets up the aromaticity for a molecule /*! diff --git a/Code/GraphMol/Wrap/MolOps.cpp b/Code/GraphMol/Wrap/MolOps.cpp index 723df2ffd..75345f7ec 100644 --- a/Code/GraphMol/Wrap/MolOps.cpp +++ b/Code/GraphMol/Wrap/MolOps.cpp @@ -1772,6 +1772,7 @@ to the terminal dummy atoms.\n\ .value("AROMATICITY_RDKIT", MolOps::AROMATICITY_RDKIT) .value("AROMATICITY_SIMPLE", MolOps::AROMATICITY_SIMPLE) .value("AROMATICITY_MDL", MolOps::AROMATICITY_MDL) + .value("AROMATICITY_MMFF94", MolOps::AROMATICITY_MMFF94) .value("AROMATICITY_CUSTOM", MolOps::AROMATICITY_CUSTOM) .export_values(); diff --git a/Code/GraphMol/molopstest.cpp b/Code/GraphMol/molopstest.cpp index 9ea3e2994..1f5b03dad 100644 --- a/Code/GraphMol/molopstest.cpp +++ b/Code/GraphMol/molopstest.cpp @@ -6952,6 +6952,39 @@ void testSimpleAromaticity() { BOOST_LOG(rdInfoLog) << "Finished" << std::endl; } +void testMMFFAromaticity() { + { + BOOST_LOG(rdInfoLog) + << "-----------------------\n Testing MMFF94 aromaticity" << std::endl; + + // test one known difference between RDKit and MMFF94 aromaticity models: + // the latter does not recognize azulene as aromatic + + std::string smiles = "C1=CC=C2C=CC=C2C=C1"; + RWMol *m = SmilesToMol(smiles); + MolOps::Kekulize(*m, true); + + MolOps::setAromaticity(*m, MolOps::AROMATICITY_RDKIT); + int arombondcount = 0; + for (auto b : m->bonds()) { + if (b->getIsAromatic()) arombondcount++; + } + // all bonds, except the fused one, should be aromatic + TEST_ASSERT(arombondcount == 10); + TEST_ASSERT(m->getBondBetweenAtoms(3, 7)->getIsAromatic() == false); + + MolOps::setAromaticity(*m, MolOps::AROMATICITY_MMFF94); + arombondcount = 0; + for (auto b : m->bonds()) { + if (b->getIsAromatic()) arombondcount++; + } + // no aromatics here + TEST_ASSERT(arombondcount == 0); + delete m; + } + BOOST_LOG(rdInfoLog) << "Finished" << std::endl; +} + //! really dumb aromaticity: any conjugated ring bond is aromatic int customAromaticity(RWMol &m) { m.updatePropertyCache(); @@ -8634,6 +8667,7 @@ int main() { testKekulizeErrorReporting(); testGithubIssue868(); testSimpleAromaticity(); + testMMFFAromaticity(); testGithubIssue1730(); testCustomAromaticity(); testGithubIssue908(); diff --git a/ReleaseNotes.md b/ReleaseNotes.md index 201b1cdcf..5eda65c68 100644 --- a/ReleaseNotes.md +++ b/ReleaseNotes.md @@ -30,6 +30,7 @@ GitHub) - AtomPairs.Utils.NumPiElectrons is deprecated in favor of Chem.GetNumPiElectrons. AtomPairs.Utils.NumPiElectrons failed if the atom had outgoing dative bonds (see Issue #7318). - The classes DistViolationContrib, ChiralViolationContrib, and FourthDimContrib have been deprecated. Please use DistViolationContribs, ChiralViolationContribs, and FourthDimContribs instead. +- The function `MMFF::setMMFFAromaticity()` has been moved to the namespace MolOps. Please use `MolOps::setMMFFAromaticity()` instead. # Release_2024.03.1 (Changes relative to Release_2023.09.1)