diff --git a/Code/GraphMol/ChemTransforms/MolFragmenter.cpp b/Code/GraphMol/ChemTransforms/MolFragmenter.cpp index 340540373..54cf51450 100644 --- a/Code/GraphMol/ChemTransforms/MolFragmenter.cpp +++ b/Code/GraphMol/ChemTransforms/MolFragmenter.cpp @@ -11,6 +11,7 @@ #include "MolFragmenter.h" #include +#include #include #include #include @@ -464,6 +465,12 @@ ROMol *fragmentOnBonds( Bond::BondDir bD = bond->getBondDir(); unsigned int bondidx; auto nbr_bond_stereo = getNbrBondStereo(*res, bond); + // Grab a copy of any query on the outgoing bond if it will be needed later. + auto outBond = res->getBondBetweenAtoms(bidx, eidx); + std::unique_ptr outQuery; + if (!bondTypes && outBond->hasQuery()) { + outQuery.reset(outBond->getQuery()->copy()); + } res->removeBond(bidx, eidx); if (nCutsPerAtom) { (*nCutsPerAtom)[bidx] += 1; @@ -491,12 +498,23 @@ ROMol *fragmentOnBonds( if (bD == Bond::ENDDOWNRIGHT || bD == Bond::ENDUPRIGHT) { res->getBondWithIdx(bondidx)->setBondDir(bD); } + // transfer any query from the outgoing bond to the new one + if (outQuery) { + auto qb = std::make_unique(*res->getBondWithIdx(bondidx)); + qb->setQuery(outQuery->copy()); + res->replaceBond(bondidx, qb.get()); + } unsigned int idx2 = res->addAtom(at2, false, true); bondidx = res->addBond(bidx, at2->getIdx(), bT) - 1; // this bond starts at the same atom, so its direction should always be // correct: res->getBondWithIdx(bondidx)->setBondDir(bD); + if (outQuery) { + auto qb = std::make_unique(*res->getBondWithIdx(bondidx)); + qb->setQuery(outQuery->copy()); + res->replaceBond(bondidx, qb.get()); + } // restore stereo atoms for (auto &stereo_atoms : nbr_bond_stereo) { diff --git a/Code/GraphMol/ChemTransforms/MolFragmenter.h b/Code/GraphMol/ChemTransforms/MolFragmenter.h index cd7c876dc..cdc1ba800 100644 --- a/Code/GraphMol/ChemTransforms/MolFragmenter.h +++ b/Code/GraphMol/ChemTransforms/MolFragmenter.h @@ -46,6 +46,9 @@ struct RDKIT_CHEMTRANSFORMS_EXPORT FragmenterBondType { \return a new ROMol with the modifications The client is responsible for deleting this molecule. + Unless bondTypes is provided, the bonds to the new dummy atoms will be + of the same type as the bond that is broken, and any queries on the + broken bonds transferred to the new bonds. */ RDKIT_CHEMTRANSFORMS_EXPORT ROMol *fragmentOnBonds( diff --git a/Code/GraphMol/ChemTransforms/catch_tests.cpp b/Code/GraphMol/ChemTransforms/catch_tests.cpp index a059a4a3d..cfe3b7841 100644 --- a/Code/GraphMol/ChemTransforms/catch_tests.cpp +++ b/Code/GraphMol/ChemTransforms/catch_tests.cpp @@ -584,4 +584,70 @@ TEST_CASE( CHECK(MolToSmiles(*res) == "[1*]/C=C/N.[2*]CC"); } +} + +TEST_CASE( + "Github #9009: fragmentOnBonds should transfer queries on deleted bonds") { + // Basic test + auto m1 = "c1ccccc1-C1CCCCC1"_smarts; + REQUIRE(m1); + auto bond1 = m1->getBondWithIdx(1); + REQUIRE(bond1); + REQUIRE(bond1->hasQuery()); + auto splitM1( + MolFragmenter::fragmentOnBonds(*m1, std::vector{1, 5})); + REQUIRE(splitM1); + auto nb1 = splitM1->getBondWithIdx(11); + REQUIRE(nb1); + CHECK(nb1->hasQuery()); + CHECK(nb1->getQuery()->getDescription() == "SingleOrAromaticBond"); + + auto nb2 = splitM1->getBondWithIdx(12); + REQUIRE(nb2); + CHECK(nb2->hasQuery()); + CHECK(nb2->getQuery()->getDescription() == "SingleOrAromaticBond"); + + auto nb3 = splitM1->getBondWithIdx(13); + REQUIRE(nb3); + CHECK(nb3->hasQuery()); + CHECK(nb3->getQuery()->getDescription() == "BondOrder"); + auto nb4 = splitM1->getBondWithIdx(14); + REQUIRE(nb4); + CHECK(nb4->hasQuery()); + CHECK(nb4->getQuery()->getDescription() == "BondOrder"); + + // Check it does nothing if there isn't a query + auto m2 = "c1ccccc1"_smiles; + REQUIRE(m2); + auto bond2 = m2->getBondWithIdx(1); + REQUIRE(bond2); + REQUIRE(!bond2->hasQuery()); + auto splitM2( + MolFragmenter::fragmentOnBonds(*m2, std::vector{1})); + REQUIRE(splitM2); + auto nb5 = splitM2->getBondWithIdx(5); + REQUIRE(nb5); + CHECK(!nb5->hasQuery()); + auto nb6 = splitM2->getBondWithIdx(6); + REQUIRE(nb6); + CHECK(!nb6->hasQuery()); + + // Check it does nothing if bond types are specified + auto m3 = "c1ccccc1"_smiles; + REQUIRE(m3); + auto bond3 = m3->getBondWithIdx(1); + REQUIRE(bond3); + REQUIRE(!bond3->hasQuery()); + auto dummyLabels = + std::vector>{{25, 26}}; + auto newTypes = std::vector{Bond::DOUBLE}; + auto splitM3(MolFragmenter::fragmentOnBonds(*m3, std::vector{1}, + true, &dummyLabels, &newTypes)); + REQUIRE(splitM3); + auto nb7 = splitM3->getBondWithIdx(5); + REQUIRE(nb7); + CHECK(!nb7->hasQuery()); + auto nb8 = splitM3->getBondWithIdx(6); + REQUIRE(nb8); + CHECK(!nb8->hasQuery()); } \ No newline at end of file diff --git a/Code/GraphMol/SynthonSpaceSearch/SynthonSpaceSearch_details.cpp b/Code/GraphMol/SynthonSpaceSearch/SynthonSpaceSearch_details.cpp index 5dfe27451..d35ecc01d 100644 --- a/Code/GraphMol/SynthonSpaceSearch/SynthonSpaceSearch_details.cpp +++ b/Code/GraphMol/SynthonSpaceSearch/SynthonSpaceSearch_details.cpp @@ -366,8 +366,6 @@ void doPartInitialFragmentation( bool timedOut = false; while (true) { std::int64_t thisRB = ++mostRecentRingBond; - // std::cout << "ring bond " << thisRB << " of " << lastRingBond << " and " - // << tmpFrags.size() << std::endl; if (thisRB > lastRingBond) { break; } @@ -575,7 +573,7 @@ std::vector>> splitMolecule( return fragments; } - // Keep unique SMILES onlyu + // Keep unique SMILES only std::sort(tmpFrags.begin(), tmpFrags.end(), [](const auto &lhs, const auto &rhs) -> bool { return lhs.first < rhs.first; diff --git a/Code/GraphMol/SynthonSpaceSearch/SynthonSpaceSearcher.cpp b/Code/GraphMol/SynthonSpaceSearch/SynthonSpaceSearcher.cpp index 81e1ffd59..ad7ac11c5 100644 --- a/Code/GraphMol/SynthonSpaceSearch/SynthonSpaceSearcher.cpp +++ b/Code/GraphMol/SynthonSpaceSearch/SynthonSpaceSearcher.cpp @@ -106,8 +106,8 @@ void SynthonSpaceSearcher::search(const SearchResultCallback &cb) { } // Do any remaining. - if ((d_params.maxHits == -1 || hitCount < d_params.maxHits) && - !stop && !toTry.empty()) { + if ((d_params.maxHits == -1 || hitCount < d_params.maxHits) && !stop && + !toTry.empty()) { std::vector> partResults; processToTrySet(toTry, endTime, partResults); cb(partResults); @@ -393,11 +393,8 @@ void sortAndUniquifyToTry( std::vector>> newToTry; newToTry.reserve(tmp.size()); - std::transform( - tmp.begin(), tmp.end(), - back_inserter(newToTry), [&](const auto &p) -> auto{ - return toTry[p.first]; - }); + std::transform(tmp.begin(), tmp.end(), back_inserter(newToTry), + [&](const auto &p) -> auto { return toTry[p.first]; }); toTry = newToTry; } diff --git a/Code/GraphMol/SynthonSpaceSearch/substructure_search_catch_tests.cpp b/Code/GraphMol/SynthonSpaceSearch/substructure_search_catch_tests.cpp index 7dcfcde7f..df289b219 100644 --- a/Code/GraphMol/SynthonSpaceSearch/substructure_search_catch_tests.cpp +++ b/Code/GraphMol/SynthonSpaceSearch/substructure_search_catch_tests.cpp @@ -152,12 +152,13 @@ TEST_CASE("S Amide 1") { CHECK(resSmi == enumSmi); resSmi.clear(); - SearchResultCallback cb = [&resSmi](const std::vector> &r) { - for (auto &elem : r) { - resSmi.insert(MolToSmiles(*elem)); - } - return false; - }; + SearchResultCallback cb = + [&resSmi](const std::vector> &r) { + for (auto &elem : r) { + resSmi.insert(MolToSmiles(*elem)); + } + return false; + }; synthonspace.substructureSearch(*queryMol, cb, matchParams, params); CHECK(resSmi == enumSmi); } @@ -181,13 +182,14 @@ TEST_CASE("Search Callback returns true") { params.toTryChunkSize = 2; std::set cbSmi; bool retval = false; - SearchResultCallback cb = [&cbSmi,&retval](const std::vector> &r) { - for (auto &elem : r) { - CHECK(r.size() == 2); - cbSmi.insert(MolToSmiles(*elem)); - } - return retval; - }; + SearchResultCallback cb = + [&cbSmi, &retval](const std::vector> &r) { + for (auto &elem : r) { + CHECK(r.size() == 2); + cbSmi.insert(MolToSmiles(*elem)); + } + return retval; + }; synthonspace.substructureSearch(*queryMol, cb, matchParams, params); CHECK(cbSmi.size() == 6); @@ -833,3 +835,37 @@ TEST_CASE("Enhanced Stereochemistry - Github 8650") { REQUIRE(synthons[0].size() == 1); CHECK(synthons[0][0].second->getSmiles() == "C[C@H]1CC[C@H](CC1)F |&1:1,4|"); } + +TEST_CASE("Github 9009") { + { + // Single bond to "extra" aromatic C + SynthonSpace space; + std::istringstream iss( + R"(SMILES synton_id synton# reaction_id release +O=c1ccncn1[1*] 192 1 r2 1 +[1*]c1ccccc1 227 2 r2 1 +)"); + bool cancelled = false; + space.readStream(iss, cancelled); + auto q1 = "O=c1n([c])cncc1"_smarts; + auto res1 = space.substructureSearch(*q1); + CHECK(res1.getHitMolecules().size() == 1); + auto q2 = "O=c1n([a])cncc1"_smarts; + auto res2 = space.substructureSearch(*q2); + CHECK(res2.getHitMolecules().size() == 1); + } + { + // Check that aromatic bond works + SynthonSpace space; + std::istringstream iss( + R"(SMILES synton_id synton# reaction_id release +[1*]C=CC=C[2*] 192 1 r2 1 +O=C1NC=NC([2*])=C1[1*] 227 2 r2 1 +)"); + bool cancelled = false; + space.readStream(iss, cancelled); + auto q3 = "O=c1ncncc1c"_smarts; + auto res3 = space.substructureSearch(*q3); + CHECK(res3.getHitMolecules().size() == 1); + } +} \ No newline at end of file