Expose MMFF aromaticity model for SetAromaticity (#7765)

* Expose MMFF aromaticity model for SetAromaticity

* Test, backward compatibility, missing AromaticityModel wrapper
This commit is contained in:
Juuso Lehtivarjo
2024-09-18 06:59:02 +03:00
committed by GitHub
parent 75771be090
commit 9824a9d964
7 changed files with 256 additions and 171 deletions

View File

@@ -785,6 +785,41 @@ int mdlAromaticityHelper(RWMol &mol, const VECT_INT_VECT &srings) {
return narom;
}
int mmff94AromaticityHelper(RWMol &mol, const VECT_INT_VECT &srings) {
// set aromaticity as done in MMFF94 init
if (!mol.hasProp(common_properties::_MMFFSanitized)) {
bool isAromaticSet = false;
for (const auto atom : mol.atoms()) {
if (atom->getIsAromatic()) {
isAromaticSet = true;
break;
}
}
if (isAromaticSet) {
MolOps::Kekulize(mol, true);
}
mol.setProp(common_properties::_MMFFSanitized, 1, true);
}
setMMFFAromaticity(mol);
// count aromatic rings for return value
int narom = 1;
for (auto &sring : srings) {
bool isAromRing = true;
for (auto &aid : sring) {
Atom *atom = mol.getAtomWithIdx(aid);
if (!atom->getIsAromatic()) {
isAromRing = false;
break;
}
}
if (isAromRing) narom++;
}
return narom;
}
// use minRingSize=0 or maxRingSize=0 to ignore these constraints
int aromaticityHelper(RWMol &mol, const VECT_INT_VECT &srings,
unsigned int minRingSize, unsigned int maxRingSize,
@@ -905,6 +940,180 @@ int aromaticityHelper(RWMol &mol, const VECT_INT_VECT &srings,
} // end of anonymous namespace
// sets the aromaticity flags according to MMFF
void setMMFFAromaticity(RWMol &mol) {
bool moveToNextRing = false;
bool isNOSinRing = false;
bool aromRingsAllSet = false;
bool exoDoubleBond = false;
bool canBeAromatic = false;
unsigned int i;
unsigned int j;
unsigned int nextInRing;
unsigned int pi_e = 0;
int nAromSet = 0;
int old_nAromSet = -1;
RingInfo *ringInfo = mol.getRingInfo();
Atom *atom;
Bond *bond;
const VECT_INT_VECT &atomRings = ringInfo->atomRings();
ROMol::ADJ_ITER nbrIdx;
ROMol::ADJ_ITER endNbrs;
boost::dynamic_bitset<> aromBitVect(mol.getNumAtoms());
boost::dynamic_bitset<> aromRingBitVect(atomRings.size());
while ((!aromRingsAllSet) && atomRings.size() && (nAromSet > old_nAromSet)) {
// loop over all rings
for (i = 0; i < atomRings.size(); ++i) {
// add 2 pi electrons for each double bond in the ring
for (j = 0, pi_e = 0, moveToNextRing = false, isNOSinRing = false,
exoDoubleBond = false;
(!moveToNextRing) && (j < atomRings[i].size()); ++j) {
atom = mol.getAtomWithIdx(atomRings[i][j]);
// remember if this atom is nitrogen, oxygen or divalent sulfur
if ((atom->getAtomicNum() == 7) || (atom->getAtomicNum() == 8) ||
((atom->getAtomicNum() == 16) && (atom->getDegree() == 2))) {
isNOSinRing = true;
}
// check whether this atom is double-bonded to next one in the ring
nextInRing = (j == (atomRings[i].size() - 1)) ? atomRings[i][0]
: atomRings[i][j + 1];
if (mol.getBondBetweenAtoms(atomRings[i][j], nextInRing)
->getBondType() == Bond::DOUBLE) {
pi_e += 2;
}
// if this is not a double bond, check whether this is carbon
// or nitrogen with total bond order = 4
else {
atom = mol.getAtomWithIdx(atomRings[i][j]);
// if not, move on
if ((atom->getAtomicNum() != 6) &&
(!((atom->getAtomicNum() == 7) &&
((atom->getExplicitValence() + atom->getNumImplicitHs()) ==
4)))) {
continue;
}
// loop over neighbors
boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(atom);
for (; nbrIdx != endNbrs; ++nbrIdx) {
const Atom *nbrAtom = mol[*nbrIdx];
// if the neighbor is one of the ring atoms, skip it
// since we are looking for exocyclic neighbors
if (std::find(atomRings[i].begin(), atomRings[i].end(),
nbrAtom->getIdx()) != atomRings[i].end()) {
continue;
}
// it the neighbor is single-bonded, skip it
if (mol.getBondBetweenAtoms(atomRings[i][j], nbrAtom->getIdx())
->getBondType() == Bond::SINGLE) {
continue;
}
// if the neighbor is in a ring and its aromaticity
// bit has not yet been set, then move to the next ring
// we'll take care of this later
if (queryIsAtomInRing(nbrAtom) &&
(!(aromBitVect[nbrAtom->getIdx()]))) {
moveToNextRing = true;
break;
}
// if the neighbor is in an aromatic ring and is
// double-bonded to the current atom, add 1 pi electron
if (mol.getBondBetweenAtoms(atomRings[i][j], nbrAtom->getIdx())
->getBondType() == Bond::DOUBLE) {
if (nbrAtom->getIsAromatic()) {
++pi_e;
} else {
exoDoubleBond = true;
}
}
}
}
}
// if we quit the loop at an early stage because aromaticity
// had not yet been set, then move to the next ring
if (moveToNextRing) {
continue;
}
// loop again over all ring atoms
for (j = 0, canBeAromatic = true; j < atomRings[i].size(); ++j) {
// set aromaticity as perceived
aromBitVect[atomRings[i][j]] = 1;
atom = mol.getAtomWithIdx(atomRings[i][j]);
// if this is is a non-sp2 carbon or nitrogen
// then this ring can't be aromatic
if (((atom->getAtomicNum() == 6) || (atom->getAtomicNum() == 7)) &&
(atom->getHybridization() != Atom::SP2)) {
canBeAromatic = false;
}
}
// if this ring can't be aromatic, move to the next one
if (!canBeAromatic) {
continue;
}
// if there is N, O, S; no exocyclic double bonds;
// the ring has an odd number of terms: add 2 pi electrons
if (isNOSinRing && (!exoDoubleBond) && (atomRings[i].size() % 2)) {
pi_e += 2;
}
// if this ring satisfies the 4n+2 rule,
// then mark its atoms as aromatic
if ((pi_e > 2) && (!((pi_e - 2) % 4))) {
aromRingBitVect[i] = 1;
for (j = 0; j < atomRings[i].size(); ++j) {
atom = mol.getAtomWithIdx(atomRings[i][j]);
atom->setIsAromatic(true);
}
}
}
// termination criterion: if we did not manage to set any more
// aromatic atoms compared to the previous iteration, then
// stop looping
old_nAromSet = nAromSet;
nAromSet = 0;
aromRingsAllSet = true;
for (i = 0; i < atomRings.size(); ++i) {
for (j = 0; j < atomRings[i].size(); ++j) {
if (aromBitVect[atomRings[i][j]]) {
++nAromSet;
} else {
aromRingsAllSet = false;
}
}
}
}
for (i = 0; i < atomRings.size(); ++i) {
// if the ring is not aromatic, move to the next one
if (!aromRingBitVect[i]) {
continue;
}
for (j = 0; j < atomRings[i].size(); ++j) {
// mark all ring bonds as aromatic
nextInRing = (j == (atomRings[i].size() - 1)) ? atomRings[i][0]
: atomRings[i][j + 1];
bond = mol.getBondBetweenAtoms(atomRings[i][j], nextInRing);
bond->setBondType(Bond::AROMATIC);
bond->setIsAromatic(true);
}
}
for (i = 0; i < atomRings.size(); ++i) {
// if the ring is not aromatic, move to the next one
if (!aromRingBitVect[i]) {
continue;
}
for (j = 0; j < atomRings[i].size(); ++j) {
atom = mol.getAtomWithIdx(atomRings[i][j]);
if (atom->getAtomicNum() != 6) {
int iv = atom->calcImplicitValence(false);
atom->calcExplicitValence(false);
if (iv) {
atom->setNumExplicitHs(iv);
atom->calcImplicitValence(false);
}
}
}
}
}
int setAromaticity(RWMol &mol, AromaticityModel model, int (*func)(RWMol &)) {
// This function used to check if the input molecule came
// with aromaticity information, assumed it is correct and
@@ -930,6 +1139,9 @@ int setAromaticity(RWMol &mol, AromaticityModel model, int (*func)(RWMol &)) {
case AROMATICITY_MDL:
res = mdlAromaticityHelper(mol, srings);
break;
case AROMATICITY_MMFF94:
res = mmff94AromaticityHelper(mol, srings);
break;
case AROMATICITY_CUSTOM:
PRECONDITION(
func,

View File

@@ -502,176 +502,8 @@ bool areAtomsInSameAromaticRing(const ROMol &mol, const unsigned int idx1,
// sets the aromaticity flags according to MMFF
void setMMFFAromaticity(RWMol &mol) {
bool moveToNextRing = false;
bool isNOSinRing = false;
bool aromRingsAllSet = false;
bool exoDoubleBond = false;
bool canBeAromatic = false;
unsigned int i;
unsigned int j;
unsigned int nextInRing;
unsigned int pi_e = 0;
int nAromSet = 0;
int old_nAromSet = -1;
RingInfo *ringInfo = mol.getRingInfo();
Atom *atom;
Bond *bond;
const VECT_INT_VECT &atomRings = ringInfo->atomRings();
ROMol::ADJ_ITER nbrIdx;
ROMol::ADJ_ITER endNbrs;
boost::dynamic_bitset<> aromBitVect(mol.getNumAtoms());
boost::dynamic_bitset<> aromRingBitVect(atomRings.size());
while ((!aromRingsAllSet) && atomRings.size() && (nAromSet > old_nAromSet)) {
// loop over all rings
for (i = 0; i < atomRings.size(); ++i) {
// add 2 pi electrons for each double bond in the ring
for (j = 0, pi_e = 0, moveToNextRing = false, isNOSinRing = false,
exoDoubleBond = false;
(!moveToNextRing) && (j < atomRings[i].size()); ++j) {
atom = mol.getAtomWithIdx(atomRings[i][j]);
// remember if this atom is nitrogen, oxygen or divalent sulfur
if ((atom->getAtomicNum() == 7) || (atom->getAtomicNum() == 8) ||
((atom->getAtomicNum() == 16) && (atom->getDegree() == 2))) {
isNOSinRing = true;
}
// check whether this atom is double-bonded to next one in the ring
nextInRing = (j == (atomRings[i].size() - 1)) ? atomRings[i][0]
: atomRings[i][j + 1];
if (mol.getBondBetweenAtoms(atomRings[i][j], nextInRing)
->getBondType() == Bond::DOUBLE) {
pi_e += 2;
}
// if this is not a double bond, check whether this is carbon
// or nitrogen with total bond order = 4
else {
atom = mol.getAtomWithIdx(atomRings[i][j]);
// if not, move on
if ((atom->getAtomicNum() != 6) &&
(!((atom->getAtomicNum() == 7) &&
((atom->getExplicitValence() + atom->getNumImplicitHs()) ==
4)))) {
continue;
}
// loop over neighbors
boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(atom);
for (; nbrIdx != endNbrs; ++nbrIdx) {
const Atom *nbrAtom = mol[*nbrIdx];
// if the neighbor is one of the ring atoms, skip it
// since we are looking for exocyclic neighbors
if (std::find(atomRings[i].begin(), atomRings[i].end(),
nbrAtom->getIdx()) != atomRings[i].end()) {
continue;
}
// it the neighbor is single-bonded, skip it
if (mol.getBondBetweenAtoms(atomRings[i][j], nbrAtom->getIdx())
->getBondType() == Bond::SINGLE) {
continue;
}
// if the neighbor is in a ring and its aromaticity
// bit has not yet been set, then move to the next ring
// we'll take care of this later
if (queryIsAtomInRing(nbrAtom) &&
(!(aromBitVect[nbrAtom->getIdx()]))) {
moveToNextRing = true;
break;
}
// if the neighbor is in an aromatic ring and is
// double-bonded to the current atom, add 1 pi electron
if (mol.getBondBetweenAtoms(atomRings[i][j], nbrAtom->getIdx())
->getBondType() == Bond::DOUBLE) {
if (nbrAtom->getIsAromatic()) {
++pi_e;
} else {
exoDoubleBond = true;
}
}
}
}
}
// if we quit the loop at an early stage because aromaticity
// had not yet been set, then move to the next ring
if (moveToNextRing) {
continue;
}
// loop again over all ring atoms
for (j = 0, canBeAromatic = true; j < atomRings[i].size(); ++j) {
// set aromaticity as perceived
aromBitVect[atomRings[i][j]] = 1;
atom = mol.getAtomWithIdx(atomRings[i][j]);
// if this is is a non-sp2 carbon or nitrogen
// then this ring can't be aromatic
if (((atom->getAtomicNum() == 6) || (atom->getAtomicNum() == 7)) &&
(atom->getHybridization() != Atom::SP2)) {
canBeAromatic = false;
}
}
// if this ring can't be aromatic, move to the next one
if (!canBeAromatic) {
continue;
}
// if there is N, O, S; no exocyclic double bonds;
// the ring has an odd number of terms: add 2 pi electrons
if (isNOSinRing && (!exoDoubleBond) && (atomRings[i].size() % 2)) {
pi_e += 2;
}
// if this ring satisfies the 4n+2 rule,
// then mark its atoms as aromatic
if ((pi_e > 2) && (!((pi_e - 2) % 4))) {
aromRingBitVect[i] = 1;
for (j = 0; j < atomRings[i].size(); ++j) {
atom = mol.getAtomWithIdx(atomRings[i][j]);
atom->setIsAromatic(true);
}
}
}
// termination criterion: if we did not manage to set any more
// aromatic atoms compared to the previous iteration, then
// stop looping
old_nAromSet = nAromSet;
nAromSet = 0;
aromRingsAllSet = true;
for (i = 0; i < atomRings.size(); ++i) {
for (j = 0; j < atomRings[i].size(); ++j) {
if (aromBitVect[atomRings[i][j]]) {
++nAromSet;
} else {
aromRingsAllSet = false;
}
}
}
}
for (i = 0; i < atomRings.size(); ++i) {
// if the ring is not aromatic, move to the next one
if (!aromRingBitVect[i]) {
continue;
}
for (j = 0; j < atomRings[i].size(); ++j) {
// mark all ring bonds as aromatic
nextInRing = (j == (atomRings[i].size() - 1)) ? atomRings[i][0]
: atomRings[i][j + 1];
bond = mol.getBondBetweenAtoms(atomRings[i][j], nextInRing);
bond->setBondType(Bond::AROMATIC);
bond->setIsAromatic(true);
}
}
for (i = 0; i < atomRings.size(); ++i) {
// if the ring is not aromatic, move to the next one
if (!aromRingBitVect[i]) {
continue;
}
for (j = 0; j < atomRings[i].size(); ++j) {
atom = mol.getAtomWithIdx(atomRings[i][j]);
if (atom->getAtomicNum() != 6) {
int iv = atom->calcImplicitValence(false);
atom->calcExplicitValence(false);
if (iv) {
atom->setNumExplicitHs(iv);
atom->calcImplicitValence(false);
}
}
}
}
// Deprecated, moved to MolOps
MolOps::setMMFFAromaticity(mol);
}
// sets the MMFF atomType for a heavy atom
@@ -2535,7 +2367,7 @@ MMFFMolProperties::MMFFMolProperties(ROMol &mol, const std::string &mmffVariant,
d_MMFFAtomPropertiesPtrVect[i] =
MMFFAtomPropertiesPtr(new MMFFAtomProperties());
}
setMMFFAromaticity((RWMol &)mol);
MolOps::setMMFFAromaticity((RWMol &)mol);
RingMembershipSize rmSize(mol);
for (const auto atom : mol.atoms()) {
if (atom->getAtomicNum() != 1) {

View File

@@ -217,6 +217,7 @@ RDKIT_FORCEFIELDHELPERS_EXPORT bool areAtomsInSameRingOfSize(
const ROMol &mol, const unsigned int ringSize, const unsigned int numAtoms,
...);
RDKIT_FORCEFIELDHELPERS_EXPORT unsigned int sanitizeMMFFMol(RWMol &mol);
//! \deprecated, please use MolOps::setMMFFAromaticity instead
RDKIT_FORCEFIELDHELPERS_EXPORT void setMMFFAromaticity(RWMol &mol);
RDKIT_FORCEFIELDHELPERS_EXPORT unsigned int getMMFFStretchBendType(
const unsigned int angleType, const unsigned int bondType1,

View File

@@ -541,9 +541,13 @@ typedef enum {
AROMATICITY_RDKIT = 0x1,
AROMATICITY_SIMPLE = 0x2,
AROMATICITY_MDL = 0x4,
AROMATICITY_MMFF94 = 0x8,
AROMATICITY_CUSTOM = 0xFFFFFFF ///< use a function
} AromaticityModel;
RDKIT_GRAPHMOL_EXPORT void setMMFFAromaticity(RWMol &mol);
//! Sets up the aromaticity for a molecule
/*!

View File

@@ -1772,6 +1772,7 @@ to the terminal dummy atoms.\n\
.value("AROMATICITY_RDKIT", MolOps::AROMATICITY_RDKIT)
.value("AROMATICITY_SIMPLE", MolOps::AROMATICITY_SIMPLE)
.value("AROMATICITY_MDL", MolOps::AROMATICITY_MDL)
.value("AROMATICITY_MMFF94", MolOps::AROMATICITY_MMFF94)
.value("AROMATICITY_CUSTOM", MolOps::AROMATICITY_CUSTOM)
.export_values();

View File

@@ -6952,6 +6952,39 @@ void testSimpleAromaticity() {
BOOST_LOG(rdInfoLog) << "Finished" << std::endl;
}
void testMMFFAromaticity() {
{
BOOST_LOG(rdInfoLog)
<< "-----------------------\n Testing MMFF94 aromaticity" << std::endl;
// test one known difference between RDKit and MMFF94 aromaticity models:
// the latter does not recognize azulene as aromatic
std::string smiles = "C1=CC=C2C=CC=C2C=C1";
RWMol *m = SmilesToMol(smiles);
MolOps::Kekulize(*m, true);
MolOps::setAromaticity(*m, MolOps::AROMATICITY_RDKIT);
int arombondcount = 0;
for (auto b : m->bonds()) {
if (b->getIsAromatic()) arombondcount++;
}
// all bonds, except the fused one, should be aromatic
TEST_ASSERT(arombondcount == 10);
TEST_ASSERT(m->getBondBetweenAtoms(3, 7)->getIsAromatic() == false);
MolOps::setAromaticity(*m, MolOps::AROMATICITY_MMFF94);
arombondcount = 0;
for (auto b : m->bonds()) {
if (b->getIsAromatic()) arombondcount++;
}
// no aromatics here
TEST_ASSERT(arombondcount == 0);
delete m;
}
BOOST_LOG(rdInfoLog) << "Finished" << std::endl;
}
//! really dumb aromaticity: any conjugated ring bond is aromatic
int customAromaticity(RWMol &m) {
m.updatePropertyCache();
@@ -8634,6 +8667,7 @@ int main() {
testKekulizeErrorReporting();
testGithubIssue868();
testSimpleAromaticity();
testMMFFAromaticity();
testGithubIssue1730();
testCustomAromaticity();
testGithubIssue908();

View File

@@ -30,6 +30,7 @@ GitHub)
- AtomPairs.Utils.NumPiElectrons is deprecated in favor of Chem.GetNumPiElectrons.
AtomPairs.Utils.NumPiElectrons failed if the atom had outgoing dative bonds (see Issue #7318).
- The classes DistViolationContrib, ChiralViolationContrib, and FourthDimContrib have been deprecated. Please use DistViolationContribs, ChiralViolationContribs, and FourthDimContribs instead.
- The function `MMFF::setMMFFAromaticity()` has been moved to the namespace MolOps. Please use `MolOps::setMMFFAromaticity()` instead.
# Release_2024.03.1
(Changes relative to Release_2023.09.1)