Fixes Github9009 (#9012)

* Mostly working.

* Clear out debugging writes.

* Avoid a mol copy.

* Left some debugging in.

* Move the fix into fragmentOnBonds.

* Add test.

---------

Co-authored-by: David Cosgrove <david@cozchemix.co.uk>
This commit is contained in:
David Cosgrove
2025-12-22 05:26:23 +00:00
committed by Greg Landrum
parent 4c3a8ccfd0
commit b81004602a
6 changed files with 141 additions and 23 deletions

View File

@@ -11,6 +11,7 @@
#include "MolFragmenter.h"
#include <GraphMol/Depictor/RDDepictor.h>
#include <GraphMol/QueryBond.h>
#include <GraphMol/RDKitBase.h>
#include <GraphMol/SmilesParse/SmilesParse.h>
#include <GraphMol/SmilesParse/SmilesWrite.h>
@@ -464,6 +465,12 @@ ROMol *fragmentOnBonds(
Bond::BondDir bD = bond->getBondDir();
unsigned int bondidx;
auto nbr_bond_stereo = getNbrBondStereo(*res, bond);
// Grab a copy of any query on the outgoing bond if it will be needed later.
auto outBond = res->getBondBetweenAtoms(bidx, eidx);
std::unique_ptr<Bond::QUERYBOND_QUERY> outQuery;
if (!bondTypes && outBond->hasQuery()) {
outQuery.reset(outBond->getQuery()->copy());
}
res->removeBond(bidx, eidx);
if (nCutsPerAtom) {
(*nCutsPerAtom)[bidx] += 1;
@@ -491,12 +498,23 @@ ROMol *fragmentOnBonds(
if (bD == Bond::ENDDOWNRIGHT || bD == Bond::ENDUPRIGHT) {
res->getBondWithIdx(bondidx)->setBondDir(bD);
}
// transfer any query from the outgoing bond to the new one
if (outQuery) {
auto qb = std::make_unique<QueryBond>(*res->getBondWithIdx(bondidx));
qb->setQuery(outQuery->copy());
res->replaceBond(bondidx, qb.get());
}
unsigned int idx2 = res->addAtom(at2, false, true);
bondidx = res->addBond(bidx, at2->getIdx(), bT) - 1;
// this bond starts at the same atom, so its direction should always be
// correct:
res->getBondWithIdx(bondidx)->setBondDir(bD);
if (outQuery) {
auto qb = std::make_unique<QueryBond>(*res->getBondWithIdx(bondidx));
qb->setQuery(outQuery->copy());
res->replaceBond(bondidx, qb.get());
}
// restore stereo atoms
for (auto &stereo_atoms : nbr_bond_stereo) {

View File

@@ -46,6 +46,9 @@ struct RDKIT_CHEMTRANSFORMS_EXPORT FragmenterBondType {
\return a new ROMol with the modifications
The client is responsible for deleting this molecule.
Unless bondTypes is provided, the bonds to the new dummy atoms will be
of the same type as the bond that is broken, and any queries on the
broken bonds transferred to the new bonds.
*/
RDKIT_CHEMTRANSFORMS_EXPORT ROMol *fragmentOnBonds(

View File

@@ -584,4 +584,70 @@ TEST_CASE(
CHECK(MolToSmiles(*res) == "[1*]/C=C/N.[2*]CC");
}
}
TEST_CASE(
"Github #9009: fragmentOnBonds should transfer queries on deleted bonds") {
// Basic test
auto m1 = "c1ccccc1-C1CCCCC1"_smarts;
REQUIRE(m1);
auto bond1 = m1->getBondWithIdx(1);
REQUIRE(bond1);
REQUIRE(bond1->hasQuery());
auto splitM1(
MolFragmenter::fragmentOnBonds(*m1, std::vector<unsigned int>{1, 5}));
REQUIRE(splitM1);
auto nb1 = splitM1->getBondWithIdx(11);
REQUIRE(nb1);
CHECK(nb1->hasQuery());
CHECK(nb1->getQuery()->getDescription() == "SingleOrAromaticBond");
auto nb2 = splitM1->getBondWithIdx(12);
REQUIRE(nb2);
CHECK(nb2->hasQuery());
CHECK(nb2->getQuery()->getDescription() == "SingleOrAromaticBond");
auto nb3 = splitM1->getBondWithIdx(13);
REQUIRE(nb3);
CHECK(nb3->hasQuery());
CHECK(nb3->getQuery()->getDescription() == "BondOrder");
auto nb4 = splitM1->getBondWithIdx(14);
REQUIRE(nb4);
CHECK(nb4->hasQuery());
CHECK(nb4->getQuery()->getDescription() == "BondOrder");
// Check it does nothing if there isn't a query
auto m2 = "c1ccccc1"_smiles;
REQUIRE(m2);
auto bond2 = m2->getBondWithIdx(1);
REQUIRE(bond2);
REQUIRE(!bond2->hasQuery());
auto splitM2(
MolFragmenter::fragmentOnBonds(*m2, std::vector<unsigned int>{1}));
REQUIRE(splitM2);
auto nb5 = splitM2->getBondWithIdx(5);
REQUIRE(nb5);
CHECK(!nb5->hasQuery());
auto nb6 = splitM2->getBondWithIdx(6);
REQUIRE(nb6);
CHECK(!nb6->hasQuery());
// Check it does nothing if bond types are specified
auto m3 = "c1ccccc1"_smiles;
REQUIRE(m3);
auto bond3 = m3->getBondWithIdx(1);
REQUIRE(bond3);
REQUIRE(!bond3->hasQuery());
auto dummyLabels =
std::vector<std::pair<unsigned int, unsigned int>>{{25, 26}};
auto newTypes = std::vector<Bond::BondType>{Bond::DOUBLE};
auto splitM3(MolFragmenter::fragmentOnBonds(*m3, std::vector<unsigned int>{1},
true, &dummyLabels, &newTypes));
REQUIRE(splitM3);
auto nb7 = splitM3->getBondWithIdx(5);
REQUIRE(nb7);
CHECK(!nb7->hasQuery());
auto nb8 = splitM3->getBondWithIdx(6);
REQUIRE(nb8);
CHECK(!nb8->hasQuery());
}

View File

@@ -366,8 +366,6 @@ void doPartInitialFragmentation(
bool timedOut = false;
while (true) {
std::int64_t thisRB = ++mostRecentRingBond;
// std::cout << "ring bond " << thisRB << " of " << lastRingBond << " and "
// << tmpFrags.size() << std::endl;
if (thisRB > lastRingBond) {
break;
}
@@ -575,7 +573,7 @@ std::vector<std::vector<std::unique_ptr<ROMol>>> splitMolecule(
return fragments;
}
// Keep unique SMILES onlyu
// Keep unique SMILES only
std::sort(tmpFrags.begin(), tmpFrags.end(),
[](const auto &lhs, const auto &rhs) -> bool {
return lhs.first < rhs.first;

View File

@@ -106,8 +106,8 @@ void SynthonSpaceSearcher::search(const SearchResultCallback &cb) {
}
// Do any remaining.
if ((d_params.maxHits == -1 || hitCount < d_params.maxHits) &&
!stop && !toTry.empty()) {
if ((d_params.maxHits == -1 || hitCount < d_params.maxHits) && !stop &&
!toTry.empty()) {
std::vector<std::unique_ptr<ROMol>> partResults;
processToTrySet(toTry, endTime, partResults);
cb(partResults);
@@ -393,11 +393,8 @@ void sortAndUniquifyToTry(
std::vector<std::pair<const SynthonSpaceHitSet *, std::vector<size_t>>>
newToTry;
newToTry.reserve(tmp.size());
std::transform(
tmp.begin(), tmp.end(),
back_inserter(newToTry), [&](const auto &p) -> auto{
return toTry[p.first];
});
std::transform(tmp.begin(), tmp.end(), back_inserter(newToTry),
[&](const auto &p) -> auto { return toTry[p.first]; });
toTry = newToTry;
}

View File

@@ -152,12 +152,13 @@ TEST_CASE("S Amide 1") {
CHECK(resSmi == enumSmi);
resSmi.clear();
SearchResultCallback cb = [&resSmi](const std::vector<std::unique_ptr<ROMol>> &r) {
for (auto &elem : r) {
resSmi.insert(MolToSmiles(*elem));
}
return false;
};
SearchResultCallback cb =
[&resSmi](const std::vector<std::unique_ptr<ROMol>> &r) {
for (auto &elem : r) {
resSmi.insert(MolToSmiles(*elem));
}
return false;
};
synthonspace.substructureSearch(*queryMol, cb, matchParams, params);
CHECK(resSmi == enumSmi);
}
@@ -181,13 +182,14 @@ TEST_CASE("Search Callback returns true") {
params.toTryChunkSize = 2;
std::set<std::string> cbSmi;
bool retval = false;
SearchResultCallback cb = [&cbSmi,&retval](const std::vector<std::unique_ptr<ROMol>> &r) {
for (auto &elem : r) {
CHECK(r.size() == 2);
cbSmi.insert(MolToSmiles(*elem));
}
return retval;
};
SearchResultCallback cb =
[&cbSmi, &retval](const std::vector<std::unique_ptr<ROMol>> &r) {
for (auto &elem : r) {
CHECK(r.size() == 2);
cbSmi.insert(MolToSmiles(*elem));
}
return retval;
};
synthonspace.substructureSearch(*queryMol, cb, matchParams, params);
CHECK(cbSmi.size() == 6);
@@ -833,3 +835,37 @@ TEST_CASE("Enhanced Stereochemistry - Github 8650") {
REQUIRE(synthons[0].size() == 1);
CHECK(synthons[0][0].second->getSmiles() == "C[C@H]1CC[C@H](CC1)F |&1:1,4|");
}
TEST_CASE("Github 9009") {
{
// Single bond to "extra" aromatic C
SynthonSpace space;
std::istringstream iss(
R"(SMILES synton_id synton# reaction_id release
O=c1ccncn1[1*] 192 1 r2 1
[1*]c1ccccc1 227 2 r2 1
)");
bool cancelled = false;
space.readStream(iss, cancelled);
auto q1 = "O=c1n([c])cncc1"_smarts;
auto res1 = space.substructureSearch(*q1);
CHECK(res1.getHitMolecules().size() == 1);
auto q2 = "O=c1n([a])cncc1"_smarts;
auto res2 = space.substructureSearch(*q2);
CHECK(res2.getHitMolecules().size() == 1);
}
{
// Check that aromatic bond works
SynthonSpace space;
std::istringstream iss(
R"(SMILES synton_id synton# reaction_id release
[1*]C=CC=C[2*] 192 1 r2 1
O=C1NC=NC([2*])=C1[1*] 227 2 r2 1
)");
bool cancelled = false;
space.readStream(iss, cancelled);
auto q3 = "O=c1ncncc1c"_smarts;
auto res3 = space.substructureSearch(*q3);
CHECK(res3.getHitMolecules().size() == 1);
}
}