Preserve bond direction in fragmentOnBonds (#2484)

* Sketch of a solution to preserve bond direction in fragmentOnBonds

* A bit of cleanup work based on Andrew's original commit for #1039
Add a couple of tests too

* forgot to save a file before the commit
This commit is contained in:
Greg Landrum
2019-06-10 19:08:50 +02:00
committed by Brian Kelley
parent 6e838e3df3
commit ea0e8f674e
3 changed files with 83 additions and 15 deletions

View File

@@ -18,5 +18,5 @@ rdkit_test(testChemTransforms testChemTransforms.cpp
LINK_LIBRARIES ChemTransforms FileParsers SmilesParse SubstructMatch
GraphMol RDGeometryLib RDGeneral ${RDKit_THREAD_LIBS})
rdkit_catch_test(chemTransformsTestCatch catch_tests.cpp LINK_LIBRARIES ChemTransforms FileParsers SmilesParse
SubstructMatch GraphMol RDGeometryLib RDGeneral ${RDKit_THREAD_LIBS} )

View File

@@ -60,8 +60,8 @@ void constructFragmenterAtomTypes(
boost::split(tokens, tempStr, boost::is_any_of(" \t"),
boost::token_compress_on);
if (tokens.size() < 2) {
BOOST_LOG(rdWarningLog) << "line " << line << " is too short"
<< std::endl;
BOOST_LOG(rdWarningLog)
<< "line " << line << " is too short" << std::endl;
continue;
}
unsigned int idx = boost::lexical_cast<unsigned int>(tokens[0]);
@@ -149,20 +149,20 @@ void constructFragmenterBondTypes(
boost::split(tokens, tempStr, boost::is_any_of(" \t"),
boost::token_compress_on);
if (tokens.size() < 3) {
BOOST_LOG(rdWarningLog) << "line " << line << " is too short"
<< std::endl;
BOOST_LOG(rdWarningLog)
<< "line " << line << " is too short" << std::endl;
continue;
}
unsigned int idx1 = boost::lexical_cast<unsigned int>(tokens[0]);
if (atomTypes.find(idx1) == atomTypes.end()) {
BOOST_LOG(rdWarningLog) << "atom type #" << idx1 << " not recognized."
<< std::endl;
BOOST_LOG(rdWarningLog)
<< "atom type #" << idx1 << " not recognized." << std::endl;
continue;
}
unsigned int idx2 = boost::lexical_cast<unsigned int>(tokens[1]);
if (atomTypes.find(idx2) == atomTypes.end()) {
BOOST_LOG(rdWarningLog) << "atom type #" << idx2 << " not recognized."
<< std::endl;
BOOST_LOG(rdWarningLog)
<< "atom type #" << idx2 << " not recognized." << std::endl;
continue;
}
std::string sma1 = atomTypes.find(idx1)->second;
@@ -299,7 +299,7 @@ boost::uint64_t nextBitCombo(boost::uint64_t v) {
boost::uint64_t t = (v | (v - 1)) + 1;
return t | ((((t & -t) / (v & -v)) >> 1) - 1);
}
}
} // namespace
void fragmentOnSomeBonds(
const ROMol &mol, const std::vector<unsigned int> &bondIndices,
@@ -365,7 +365,7 @@ void checkChiralityPostMove(const ROMol &mol, const Atom *oAt, Atom *nAt,
// we do this with a property.
// this was github #1734
if (nAt->getPropIfPresent(newBondOrder, incomingOrder)) {
for (int bidx: incomingOrder) {
for (int bidx : incomingOrder) {
if (bidx != check_bond_index) {
newOrder.push_back(bidx);
}
@@ -393,7 +393,7 @@ void checkChiralityPostMove(const ROMol &mol, const Atom *oAt, Atom *nAt,
nAt->setChiralTag(oAt->getChiralTag());
if (nSwaps % 2) nAt->invertChirality();
}
}
} // namespace
ROMol *fragmentOnBonds(
const ROMol &mol, const std::vector<unsigned int> &bondIndices,
@@ -423,6 +423,8 @@ ROMol *fragmentOnBonds(
unsigned int bidx = bond->getBeginAtomIdx();
unsigned int eidx = bond->getEndAtomIdx();
Bond::BondType bT = bond->getBondType();
Bond::BondDir bD = bond->getBondDir();
unsigned int bondidx;
res->removeBond(bidx, eidx);
if (nCutsPerAtom) {
(*nCutsPerAtom)[bidx] += 1;
@@ -441,9 +443,18 @@ ROMol *fragmentOnBonds(
}
unsigned int idx1 = res->addAtom(at1, false, true);
if (bondTypes) bT = (*bondTypes)[i];
res->addBond(eidx, at1->getIdx(), bT);
bondidx = res->addBond(at1->getIdx(), eidx, bT) - 1;
// the dummy replaces the original start atom, so the
// direction will be ok as long as it's one of the
// states associated with double bond stereo
if (bD == Bond::ENDDOWNRIGHT || bD == Bond::ENDUPRIGHT)
res->getBondWithIdx(bondidx)->setBondDir(bD);
unsigned int idx2 = res->addAtom(at2, false, true);
res->addBond(bidx, at2->getIdx(), bT);
bondidx = res->addBond(bidx, at2->getIdx(), bT) - 1;
// this bond starts at the same atom, so its direction should always be
// correct:
res->getBondWithIdx(bondidx)->setBondDir(bD);
// figure out if we need to change the stereo tags on the atoms:
if (mol.getAtomWithIdx(bidx)->getChiralTag() ==

View File

@@ -0,0 +1,57 @@
//
// Copyright (c) 2019 Greg Landrum
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
///
#define CATCH_CONFIG_MAIN // This tells Catch to provide a main() - only do
// this in one cpp file
#include "catch.hpp"
#include <GraphMol/RDKitBase.h>
#include <GraphMol/SmilesParse/SmilesParse.h>
#include <GraphMol/SmilesParse/SmilesWrite.h>
#include <GraphMol/ChemTransforms/ChemTransforms.h>
#include <GraphMol/FileParsers/FileParsers.h>
#include <GraphMol/FileParsers/MolSupplier.h>
#include <GraphMol/Substruct/SubstructMatch.h>
using namespace RDKit;
using std::unique_ptr;
TEST_CASE("Github #1039", "[]") {
SECTION("double bond") {
auto m1 = "C/C=C/C=C/C"_smiles;
REQUIRE(m1);
std::vector<unsigned int> bonds = {2};
std::unique_ptr<ROMol> pieces(MolFragmenter::fragmentOnBonds(*m1, bonds));
REQUIRE(pieces);
CHECK(pieces->getNumAtoms() == 8);
REQUIRE(pieces->getBondBetweenAtoms(3, 6));
REQUIRE(pieces->getBondBetweenAtoms(2, 7));
CHECK(pieces->getBondBetweenAtoms(3, 6)->getBondType() == Bond::SINGLE);
CHECK(pieces->getBondBetweenAtoms(3, 6)->getBondDir() == Bond::ENDUPRIGHT);
CHECK(pieces->getBondBetweenAtoms(2, 7)->getBondType() == Bond::SINGLE);
CHECK(pieces->getBondBetweenAtoms(2, 7)->getBondDir() == Bond::ENDUPRIGHT);
CHECK(MolToSmiles(*pieces) == "[2*]/C=C/C.[3*]/C=C/C");
}
SECTION("atomic stereo") {
auto m1 = "C(C)(F)(Cl)O"_smiles;
REQUIRE(m1);
m1->getBondWithIdx(0)->setBondDir(Bond::BEGINWEDGE);
std::vector<unsigned int> bonds = {0};
std::unique_ptr<ROMol> pieces(MolFragmenter::fragmentOnBonds(*m1, bonds));
REQUIRE(pieces);
CHECK(pieces->getNumAtoms() == 7);
REQUIRE(pieces->getBondBetweenAtoms(0, 6));
REQUIRE(pieces->getBondBetweenAtoms(1, 5));
CHECK(pieces->getBondBetweenAtoms(0, 6)->getBondDir() == Bond::BEGINWEDGE);
CHECK(pieces->getBondBetweenAtoms(1, 5)->getBondDir() == Bond::NONE);
// no actual stereo in the SMILES here since we haven't assigned it (need a
// conformer to do that using wedging)
CHECK(MolToSmiles(*pieces) == "*C.[1*]C(O)(F)Cl");
}
}