Fix/github1810 (#1811)

* Fixes #1810
There's still a problem with stereo atoms (see the commented out test),
but this takes care of the basics and that's up next.

* clean up that last problematic bit.
Still needs all tests to run

* this seems to be a necessary workaround for problems with VS2017.
It's a bit ugly, but shouldn't have any performance impact, so I'm not
going to get too worked up.

* This set of changes alters what we get for some of the InChI test cases.
Adjust for that.
This commit is contained in:
Greg Landrum
2018-04-11 04:48:16 +02:00
committed by GitHub
parent 9d72e645fa
commit e8c46ca2a9
7 changed files with 172 additions and 45 deletions

View File

@@ -1,5 +1,5 @@
//
// Copyright (C) 2003-2017 Greg Landrum and Rational Discovery LLC
// Copyright (C) 2003-2018 Greg Landrum and Rational Discovery LLC
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
@@ -411,9 +411,8 @@ void addHs(RWMol &mol, bool explicitOnly, bool addCoords,
// for their coordinates
unsigned int numAddHyds = 0;
for (auto at : mol.atoms()) {
if (!onlyOnAtoms ||
std::find(onlyOnAtoms->begin(), onlyOnAtoms->end(), at->getIdx()) !=
onlyOnAtoms->end()) {
if (!onlyOnAtoms || std::find(onlyOnAtoms->begin(), onlyOnAtoms->end(),
at->getIdx()) != onlyOnAtoms->end()) {
numAddHyds += at->getNumExplicitHs();
if (!explicitOnly) {
numAddHyds += at->getNumImplicitHs();
@@ -431,9 +430,8 @@ void addHs(RWMol &mol, bool explicitOnly, bool addCoords,
unsigned int stopIdx = mol.getNumAtoms();
for (unsigned int aidx = 0; aidx < stopIdx; ++aidx) {
if (onlyOnAtoms &&
std::find(onlyOnAtoms->begin(), onlyOnAtoms->end(), aidx) ==
onlyOnAtoms->end()) {
if (onlyOnAtoms && std::find(onlyOnAtoms->begin(), onlyOnAtoms->end(),
aidx) == onlyOnAtoms->end()) {
continue;
}
@@ -482,6 +480,57 @@ ROMol *addHs(const ROMol &mol, bool explicitOnly, bool addCoords,
return static_cast<ROMol *>(res);
};
namespace {
// returns whether or not an adjustment was made, in case we want that info
bool adjustStereoAtomsIfRequired(RWMol &mol, const Atom *atom,
const Atom *heavyAtom) {
PRECONDITION(atom != nullptr, "bad atom");
PRECONDITION(heavyAtom != nullptr, "bad heavy atom");
// nothing we can do if the degree is only 2 (and we should have covered
// that earlier anyway)
if (heavyAtom->getDegree() == 2) return false;
const auto &cbnd =
mol.getBondBetweenAtoms(atom->getIdx(), heavyAtom->getIdx());
if (!cbnd) return false;
Bond *dblBond = nullptr;
for (const auto &nbri :
boost::make_iterator_range(mol.getAtomBonds(heavyAtom))) {
Bond *bnd = mol[nbri];
if (bnd->getBondType() == Bond::DOUBLE &&
bnd->getStereo() > Bond::STEREOANY) {
auto sAtomIt = std::find(bnd->getStereoAtoms().begin(),
bnd->getStereoAtoms().end(), atom->getIdx());
if (sAtomIt != bnd->getStereoAtoms().end()) {
// sAtomIt points to the position of this atom's index in the list.
// find the index of another atom attached to the heavy atom and
// use it to update sAtomIt
unsigned int dblNbrIdx = bnd->getOtherAtomIdx(heavyAtom->getIdx());
for (const auto &nbri :
boost::make_iterator_range(mol.getAtomNeighbors(heavyAtom))) {
const auto &nbr = mol[nbri];
if (nbr->getIdx() == dblNbrIdx || nbr->getIdx() == atom->getIdx())
continue;
*sAtomIt = nbr->getIdx();
switch (bnd->getStereo()) {
case Bond::STEREOCIS:
bnd->setStereo(Bond::STEREOTRANS);
break;
case Bond::STEREOTRANS:
bnd->setStereo(Bond::STEREOCIS);
break;
default:
// I think we shouldn't need to do anything with E and Z...
break;
}
return true;
}
}
}
}
return false;
}
} // end of anonymous namespace
//
// This routine removes hydrogens (and bonds to them) from the molecular graph.
// Other Atom and bond indices may be affected by the removal.
@@ -494,6 +543,8 @@ ROMol *addHs(const ROMol &mol, bool explicitOnly, bool addCoords,
// will not be removed.
// - two coordinate Hs, like the central H in C[H-]C, will not be removed
// - Hs connected to dummy atoms will not be removed
// - Hs that are part of the definition of double bond Stereochemistry
// will not be removed
//
void removeHs(RWMol &mol, bool implicitOnly, bool updateExplicitCount,
bool sanitize) {
@@ -525,8 +576,25 @@ void removeHs(RWMol &mol, bool implicitOnly, bool updateExplicitCount,
atom->getDegree() == 1) {
ROMol::ADJ_ITER begin, end;
boost::tie(begin, end) = mol.getAtomNeighbors(atom);
if (mol.getAtomWithIdx(*begin)->getAtomicNum() > 1) {
auto nbr = mol.getAtomWithIdx(*begin);
if (nbr->getAtomicNum() > 1) {
removeIt = true;
// we're connected to a non-dummy, non H atom. Check to see
// if the neighbor has a double bond and we're the only neighbor
// at this end. This was part of github #1810
if (nbr->getDegree() == 2) {
for (const auto &nbri :
boost::make_iterator_range(mol.getAtomBonds(nbr))) {
const Bond *bnd = mol[nbri];
if (bnd->getBondType() == Bond::DOUBLE &&
(bnd->getStereo() > Bond::STEREOANY ||
mol.getBondBetweenAtoms(atom->getIdx(), nbr->getIdx())
->getBondDir() > Bond::NONE)) {
removeIt = false;
break;
}
}
}
}
}
@@ -595,14 +663,12 @@ void removeHs(RWMol &mol, bool implicitOnly, bool updateExplicitCount,
if (bond->getBondDir() == Bond::UNKNOWN &&
bond->getBeginAtomIdx() == heavyAtom->getIdx()) {
heavyAtom->setProp(common_properties::_UnknownStereo, 1);
}
// if the direction is set on this bond and the atom it's connected to
// has no other single bonds with directions set, then we need to set
// direction on one of the other neighbors in order to avoid double bond
// stereochemistry possibly being lost. This was github #754
if (bond->getBondDir() == Bond::ENDDOWNRIGHT ||
bond->getBondDir() == Bond::ENDUPRIGHT) {
} else if (bond->getBondDir() == Bond::ENDDOWNRIGHT ||
bond->getBondDir() == Bond::ENDUPRIGHT) {
// if the direction is set on this bond and the atom it's connected to
// has no other single bonds with directions set, then we need to set
// direction on one of the other neighbors in order to avoid double
// bond stereochemistry possibly being lost. This was github #754
bool foundADir = false;
Bond *oBond = nullptr;
boost::tie(beg, end) = mol.getAtomBonds(heavyAtom);
@@ -628,8 +694,12 @@ void removeHs(RWMol &mol, bool implicitOnly, bool updateExplicitCount,
oBond->setBondDir(bond->getBondDir());
}
}
} else {
// if this atom is one of the stereoatoms for a double bond we need
// to switch the stereo atom on this end to be the other neighbor
// This was part of github #1810
adjustStereoAtomsIfRequired(mol, atom, heavyAtom);
}
mol.removeAtom(atom);
} else {
// only increment the atom idx if we don't remove the atom
@@ -738,7 +808,7 @@ bool isQueryH(const Atom *atom) {
}
return hasHQuery;
}
}
} // namespace
//
// This routine removes explicit hydrogens (and bonds to them) from

View File

@@ -189,7 +189,7 @@ void halogenCleanup(RWMol &mol, Atom *atom) {
}
}
}
}
} // namespace
void cleanUp(RWMol &mol) {
ROMol::AtomIterator ai;
@@ -467,7 +467,7 @@ std::vector<ROMOL_SPTR> getMolFrags(const ROMol &mol, bool sanitizeFrags,
ROMol::EDGE_ITER beg, end;
boost::tie(beg, end) = mol.getEdges();
while (beg != end) {
const Bond* bond = (mol)[*beg];
const Bond *bond = (mol)[*beg];
++beg;
if (!copiedBonds[bond->getIdx()]) {
continue;
@@ -583,14 +583,14 @@ unsigned int getMolFrags(const ROMol &mol, VECT_INT_VECT &frags) {
}
template <typename T>
std::map<T, boost::shared_ptr<ROMol> > getMolFragsWithQuery(
std::map<T, boost::shared_ptr<ROMol>> getMolFragsWithQuery(
const ROMol &mol, T (*query)(const ROMol &, const Atom *),
bool sanitizeFrags, const std::vector<T> *whiteList, bool negateList) {
PRECONDITION(query, "no query");
std::vector<T> assignments(mol.getNumAtoms());
std::vector<int> ids(mol.getNumAtoms(), -1);
std::map<T, boost::shared_ptr<ROMol> > res;
std::map<T, boost::shared_ptr<ROMol>> res;
for (unsigned int i = 0; i < mol.getNumAtoms(); ++i) {
T where = query(mol, mol.getAtomWithIdx(i));
if (whiteList) {
@@ -645,13 +645,13 @@ std::map<T, boost::shared_ptr<ROMol> > getMolFragsWithQuery(
}
return res;
}
template std::map<std::string, boost::shared_ptr<ROMol> > getMolFragsWithQuery(
template std::map<std::string, boost::shared_ptr<ROMol>> getMolFragsWithQuery(
const ROMol &mol, std::string (*query)(const ROMol &, const Atom *),
bool sanitizeFrags, const std::vector<std::string> *, bool);
template std::map<int, boost::shared_ptr<ROMol> > getMolFragsWithQuery(
template std::map<int, boost::shared_ptr<ROMol>> getMolFragsWithQuery(
const ROMol &mol, int (*query)(const ROMol &, const Atom *),
bool sanitizeFrags, const std::vector<int> *, bool);
template std::map<unsigned int, boost::shared_ptr<ROMol> > getMolFragsWithQuery(
template std::map<unsigned int, boost::shared_ptr<ROMol>> getMolFragsWithQuery(
const ROMol &mol, unsigned int (*query)(const ROMol &, const Atom *),
bool sanitizeFrags, const std::vector<unsigned int> *, bool);

View File

@@ -78,7 +78,7 @@ unsigned int getMolFrags(const ROMol &mol, std::vector<int> &mapping);
*/
unsigned int getMolFrags(const ROMol &mol,
std::vector<std::vector<int> > &frags);
std::vector<std::vector<int>> &frags);
//! splits a molecule into its component fragments
// (disconnected components of the molecular graph)
@@ -98,9 +98,9 @@ unsigned int getMolFrags(const ROMol &mol,
\return a vector of the fragments as smart pointers to ROMols
*/
std::vector<boost::shared_ptr<ROMol> > getMolFrags(
std::vector<boost::shared_ptr<ROMol>> getMolFrags(
const ROMol &mol, bool sanitizeFrags = true, std::vector<int> *frags = 0,
std::vector<std::vector<int> > *fragsMolAtomMapping = 0,
std::vector<std::vector<int>> *fragsMolAtomMapping = 0,
bool copyConformers = true);
//! splits a molecule into pieces based on labels assigned using a query
@@ -118,7 +118,7 @@ std::vector<boost::shared_ptr<ROMol> > getMolFrags(
*/
template <typename T>
std::map<T, boost::shared_ptr<ROMol> > getMolFragsWithQuery(
std::map<T, boost::shared_ptr<ROMol>> getMolFragsWithQuery(
const ROMol &mol, T (*query)(const ROMol &, const Atom *),
bool sanitizeFrags = true, const std::vector<T> *whiteList = 0,
bool negateList = false);
@@ -207,6 +207,8 @@ void addHs(RWMol &mol, bool explicitOnly = false, bool addCoords = false,
will not be removed.
- two coordinate Hs, like the central H in C[H-]C, will not be removed
- Hs connected to dummy atoms will not be removed
- Hs that are part of the definition of double bond Stereochemistry
will not be removed
- the caller is responsible for <tt>delete</tt>ing the pointer this
returns.
@@ -555,9 +557,9 @@ void setHybridization(ROMol &mol);
- Since SSSR may not be unique, a post-SSSR step to symmetrize may be done.
The extra rings this process adds can be quite useful.
*/
int findSSSR(const ROMol &mol, std::vector<std::vector<int> > &res);
int findSSSR(const ROMol &mol, std::vector<std::vector<int>> &res);
//! \overload
int findSSSR(const ROMol &mol, std::vector<std::vector<int> > *res = 0);
int findSSSR(const ROMol &mol, std::vector<std::vector<int>> *res = 0);
//! use a DFS algorithm to identify ring bonds and atoms in a molecule
/*!
@@ -593,7 +595,7 @@ void fastFindRings(const ROMol &mol);
- if no SSSR rings are found on the molecule - MolOps::findSSSR() is called
first
*/
int symmetrizeSSSR(ROMol &mol, std::vector<std::vector<int> > &res);
int symmetrizeSSSR(ROMol &mol, std::vector<std::vector<int>> &res);
//! \overload
int symmetrizeSSSR(ROMol &mol);

View File

@@ -358,6 +358,10 @@ MolOps::SanitizeFlags sanitizeMol(ROMol &mol, boost::uint64_t sanitizeOps,
if (catchErrors) {
try {
MolOps::sanitizeMol(wmol, operationThatFailed, sanitizeOps);
} catch ( const MolSanitizeException &e){
// this really should not be necessary, but at some point it
// started to be required with VC++17. Doesn't seem like it does
// any harm.
} catch (...) {
}
} else {
@@ -977,6 +981,15 @@ struct molops_wrapper {
NOTES:\n\
\n\
- The original molecule is *not* modified.\n\
- Hydrogens which aren't connected to a heavy atom will not be\n\
removed. This prevents molecules like [H][H] from having\n\
all atoms removed.\n\
- Labelled hydrogen (e.g. atoms with atomic number=1, but isotope > 1),\n\
will not be removed.\n\
- two coordinate Hs, like the central H in C[H-]C, will not be removed\n\
- Hs connected to dummy atoms will not be removed\n\
- Hs that are part of the definition of double bond Stereochemistry\n\
will not be removed\n\
\n";
python::def("RemoveHs",
(ROMol * (*)(const ROMol &, bool, bool, bool))MolOps::removeHs,

View File

@@ -4648,7 +4648,7 @@ void _renumberTest(const ROMol *m) {
delete nm;
}
}
}
} // namespace
void testRenumberAtoms() {
BOOST_LOG(rdInfoLog) << "-----------------------\n Testing renumbering atoms"
@@ -4807,7 +4807,7 @@ void testGithubIssue190() {
namespace {
int getAtNum(const ROMol &, const Atom *at) { return at->getAtomicNum(); }
}
} // namespace
void testMolFragsWithQuery() {
BOOST_LOG(rdInfoLog)
<< "-----------------------\n Testing getMolFragsWithQuery()."
@@ -5161,7 +5161,7 @@ void hypervalent_check(const char *smiles) {
TEST_ASSERT(m->getAtomWithIdx(1)->getFormalCharge() == -1);
delete m;
}
}
} // namespace
void testGithubIssue510() {
BOOST_LOG(rdInfoLog) << "-----------------------\n Testing github issue 510: "
"Hexafluorophosphate cannot be handled"
@@ -6958,8 +6958,9 @@ void testGithub1605() {
RWMol *m = SmilesToMol(smiles, 0, false);
TEST_ASSERT(m);
unsigned int failed;
MolOps::sanitizeMol(*m, failed, MolOps::SANITIZE_SETAROMATICITY |
MolOps::SANITIZE_ADJUSTHS);
MolOps::sanitizeMol(
*m, failed,
MolOps::SANITIZE_SETAROMATICITY | MolOps::SANITIZE_ADJUSTHS);
TEST_ASSERT(!failed);
delete m;
}
@@ -6974,7 +6975,7 @@ void testGithub1622() {
{
// rings that should be aromatic
string aromaticSmis[] = {"C1=CC=CC=C1", // benzene, of course
// heterocyclics
// heterocyclics
"N1=CC=CC=C1", // pyridine
"N1=CC=CC=N1", // pyridazine
"N1=CC=CN=C1", // pyrimidine
@@ -7335,9 +7336,49 @@ void testGithub1614() {
BOOST_LOG(rdInfoLog) << "Finished" << std::endl;
}
void testGithub1810() {
BOOST_LOG(rdInfoLog) << "-----------------------\n Testing Github issue "
"1810: removeHs() should not remove H atoms that are "
"contributing to the definition of a stereo bond"
<< std::endl;
{
std::unique_ptr<RWMol> mol(SmilesToMol("F/C=C/[H]"));
TEST_ASSERT(mol);
TEST_ASSERT(mol->getNumAtoms() == 4);
TEST_ASSERT(mol->getBondBetweenAtoms(1, 2)->getStereo() == Bond::STEREOE);
}
{
std::unique_ptr<RWMol> mol(SmilesToMol("F/C=C(/F)[H]"));
TEST_ASSERT(mol);
TEST_ASSERT(mol->getNumAtoms() == 4);
TEST_ASSERT(mol->getBondBetweenAtoms(1, 2)->getStereo() == Bond::STEREOE);
}
{
std::unique_ptr<RWMol> mol(SmilesToMol("F/C=C(/[H])F"));
TEST_ASSERT(mol);
TEST_ASSERT(mol->getNumAtoms() == 4);
TEST_ASSERT(mol->getBondBetweenAtoms(1, 2)->getStereo() == Bond::STEREOZ);
}
#if 1
{
std::unique_ptr<RWMol> mol(SmilesToMol("FC=C(F)[H]",false, false));
TEST_ASSERT(mol);
MolOps::sanitizeMol(*mol);
TEST_ASSERT(mol->getNumAtoms()==5);
mol->getBondBetweenAtoms(1,2)->setStereoAtoms(0,4);
mol->getBondBetweenAtoms(1,2)->setStereo(Bond::STEREOTRANS);
MolOps::removeHs(*mol);
TEST_ASSERT(mol->getNumAtoms()==4);
TEST_ASSERT(mol->getBondBetweenAtoms(1,2)->getStereoAtoms()[0]==0);
TEST_ASSERT(mol->getBondBetweenAtoms(1,2)->getStereoAtoms()[1]==3);
}
#endif
BOOST_LOG(rdInfoLog) << "Finished" << std::endl;
}
int main() {
RDLog::InitLogs();
// boost::logging::enable_logs("rdApp.debug");
// boost::logging::enable_logs("rdApp.debug");
#if 1
test1();
@@ -7439,8 +7480,10 @@ int main() {
testGithub1439();
testGithub1281();
testGithub1605();
testGithub1614();
testGithub1622();
#endif
testGithub1703();
testGithub1810();
return 0;
}

View File

@@ -165,14 +165,13 @@ void testGithubIssue8() {
ROMol *m2 = InchiToMol(inchi, tmp2);
TEST_ASSERT(m2);
std::string smi = MolToSmiles(*m2, true);
TEST_ASSERT(smi == "N=c1cc2oc3cc(N)ccc3c(-c3ccccc3C(=O)O)c-2cc1[125I]");
TEST_ASSERT(smi == "[H]/N=c1\\cc2oc3cc(N)ccc3c(-c3ccccc3C(=O)O)c-2cc1[125I]");
inchi = MolToInchi(*m2, tmp2);
TEST_ASSERT(inchi ==
TEST_ASSERT(inchi ==
"InChI=1S/C20H13IN2O3/"
"c21-15-8-14-18(9-16(15)23)26-17-7-10(22)5-6-13(17)19(14)11-3-"
"1-2-4-12(11)20(24)25/h1-9,23H,22H2,(H,24,25)/i21-2");
"1-2-4-12(11)20(24)25/h1-9,23H,22H2,(H,24,25)/b23-16+/i21-2");
delete m;
delete m2;
}

View File

@@ -248,9 +248,9 @@ class TestCase(unittest.TestCase):
same += 1
fmt = "\n{0}InChI read Summary: {1} identical, {2} variance, {3} reasonable variance{4}"
print(fmt.format(COLOR_GREEN, same, diff, reasonable, COLOR_RESET))
self.assertEqual(same, 621)
self.assertEqual(same, 624)
self.assertEqual(diff, 0)
self.assertEqual(reasonable, 560)
self.assertEqual(reasonable, 557)
def test2InchiOptions(self):
m = MolFromSmiles("CC=C(N)C")