mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-03 21:44:30 +08:00
Allow Multiple Core Hits in the Same Molecule in RGroupDecomposition (#8813)
* Allow the same core to match more than once in a molecule. * Update annotation. * Changes after review. --------- Co-authored-by: David Cosgrove <david@cozchemix.co.uk>
This commit is contained in:
committed by
greg landrum
parent
e6f37dc498
commit
53203079c1
@@ -346,6 +346,36 @@ int RGroupDecomposition::getMatchingCoreInternal(
|
||||
return core_idx;
|
||||
}
|
||||
|
||||
namespace {
|
||||
// Take the matches, all from the same molecule and split them so that
|
||||
// different atom sets are separated out. So that if a core hits
|
||||
// more than once in the molecule, both sets of R Groups will be
|
||||
// returned.
|
||||
std::vector<std::vector<MatchVectType>> splitNonUniqueMatches(
|
||||
const std::vector<MatchVectType> &tmatches, unsigned int nAtoms) {
|
||||
std::vector<std::vector<MatchVectType>> outMatches;
|
||||
std::vector<boost::dynamic_bitset<>> atomSets;
|
||||
for (const auto &match : tmatches) {
|
||||
boost::dynamic_bitset<> atomSet(nAtoms);
|
||||
for (const auto &mp : match) {
|
||||
atomSet.set(mp.second);
|
||||
}
|
||||
if (std::find(atomSets.begin(), atomSets.end(), atomSet) ==
|
||||
atomSets.end()) {
|
||||
atomSets.push_back(atomSet);
|
||||
outMatches.push_back(std::vector<MatchVectType>(1, match));
|
||||
} else {
|
||||
for (size_t i = 0; i < atomSets.size(); ++i) {
|
||||
if (atomSet == atomSets[i]) {
|
||||
outMatches[i].push_back(match);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return outMatches;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
int RGroupDecomposition::add(const ROMol &inmol) {
|
||||
RWMOL_SPTR mol(new RWMol(inmol));
|
||||
const RCore *rcore;
|
||||
@@ -371,6 +401,7 @@ int RGroupDecomposition::add(const ROMol &inmol) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// mark any wildcards in input molecule:
|
||||
for (auto &atom : mol->atoms()) {
|
||||
if (atom->getAtomicNum() == 0) {
|
||||
@@ -397,180 +428,192 @@ int RGroupDecomposition::add(const ROMol &inmol) {
|
||||
std::vector<RGroupMatch> potentialMatches;
|
||||
constexpr size_t MAX_PERMUTATIONS = 100000;
|
||||
|
||||
std::unique_ptr<ROMol> tMol;
|
||||
for (const auto &tmatche : tmatches) {
|
||||
const bool replaceDummies = false;
|
||||
const bool labelByIndex = true;
|
||||
const bool requireDummyMatch = false;
|
||||
// TODO see if we need replaceCoreAtomsWithMolMatches or can just use
|
||||
// rcore->core
|
||||
auto coreCopy = rcore->replaceCoreAtomsWithMolMatches(*mol, tmatche);
|
||||
tMol.reset(replaceCore(*mol, *coreCopy, tmatche, replaceDummies,
|
||||
labelByIndex, requireDummyMatch));
|
||||
std::vector<std::vector<MatchVectType>> nonUniqueMatches;
|
||||
if (data->params.allowMultipleCoresInSameMol) {
|
||||
nonUniqueMatches = splitNonUniqueMatches(tmatches, mol->getNumAtoms());
|
||||
} else {
|
||||
nonUniqueMatches.push_back(tmatches);
|
||||
}
|
||||
|
||||
for (const auto &splitMatch : nonUniqueMatches) {
|
||||
std::unique_ptr<ROMol> tMol;
|
||||
for (const auto &tmatche : splitMatch) {
|
||||
const bool replaceDummies = false;
|
||||
const bool labelByIndex = true;
|
||||
const bool requireDummyMatch = false;
|
||||
// TODO see if we need replaceCoreAtomsWithMolMatches or can just use
|
||||
// rcore->core
|
||||
auto coreCopy = rcore->replaceCoreAtomsWithMolMatches(*mol, tmatche);
|
||||
tMol.reset(replaceCore(*mol, *coreCopy, tmatche, replaceDummies,
|
||||
labelByIndex, requireDummyMatch));
|
||||
#ifdef VERBOSE
|
||||
std::cerr << "Core Match core_idx " << core_idx << " idx "
|
||||
<< data->matches.size() << ": " << MolToSmarts(*coreCopy)
|
||||
<< std::endl;
|
||||
std::cerr << "Core Match core_idx " << core_idx << " idx "
|
||||
<< data->matches.size() << ": " << MolToSmarts(*coreCopy)
|
||||
<< std::endl;
|
||||
#endif
|
||||
if (tMol) {
|
||||
if (tMol) {
|
||||
#ifdef VERBOSE
|
||||
std::cerr << "All Fragments " << MolToSmiles(*tMol) << std::endl;
|
||||
std::cerr << "All Fragments " << MolToSmiles(*tMol) << std::endl;
|
||||
#endif
|
||||
R_DECOMP match;
|
||||
// rlabel rgroups
|
||||
MOL_SPTR_VECT fragments = MolOps::getMolFrags(*tMol, false);
|
||||
std::set<int> coreAtomAnyMatched;
|
||||
// get the sidechains
|
||||
for (size_t i = 0; i < fragments.size(); ++i) {
|
||||
const auto &newMol = fragments[i];
|
||||
std::vector<int> rlabelsOnSideChain;
|
||||
newMol->setProp<int>("core", core_idx);
|
||||
newMol->setProp<int>("idx", data->matches.size());
|
||||
newMol->setProp<int>("frag_idx", i);
|
||||
R_DECOMP match;
|
||||
// rlabel rgroups
|
||||
MOL_SPTR_VECT fragments = MolOps::getMolFrags(*tMol, false);
|
||||
std::set<int> coreAtomAnyMatched;
|
||||
// get the sidechains
|
||||
for (size_t i = 0; i < fragments.size(); ++i) {
|
||||
const auto &newMol = fragments[i];
|
||||
std::vector<int> rlabelsOnSideChain;
|
||||
newMol->setProp<int>("core", core_idx);
|
||||
newMol->setProp<int>("idx", data->matches.size());
|
||||
newMol->setProp<int>("frag_idx", i);
|
||||
#ifdef VERBOSE
|
||||
std::cerr << "Fragment " << MolToSmiles(*newMol) << std::endl;
|
||||
std::cerr << "Fragment " << MolToSmiles(*newMol) << std::endl;
|
||||
#endif
|
||||
for (auto sideChainAtom : newMol->atoms()) {
|
||||
if (sideChainAtom->getAtomicNum() != 0) {
|
||||
// we are only interested in sidechain R group atoms
|
||||
continue;
|
||||
for (auto sideChainAtom : newMol->atoms()) {
|
||||
if (sideChainAtom->getAtomicNum() != 0) {
|
||||
// we are only interested in sidechain R group atoms
|
||||
continue;
|
||||
}
|
||||
if (!sideChainAtom->hasProp(_rgroupInputDummy)) {
|
||||
// this is the index of the core atom that the R group
|
||||
// atom is attached to
|
||||
unsigned int coreAtomIndex = sideChainAtom->getIsotope();
|
||||
auto coreAtom = rcore->core->getAtomWithIdx(coreAtomIndex);
|
||||
coreAtomAnyMatched.insert(coreAtomIndex);
|
||||
int rlabel;
|
||||
if (coreAtom->getPropIfPresent(RLABEL, rlabel)) {
|
||||
std::vector<int> rlabelsOnSideChainAtom;
|
||||
sideChainAtom->getPropIfPresent(SIDECHAIN_RLABELS,
|
||||
rlabelsOnSideChainAtom);
|
||||
rlabelsOnSideChainAtom.push_back(rlabel);
|
||||
sideChainAtom->setProp(SIDECHAIN_RLABELS,
|
||||
rlabelsOnSideChainAtom);
|
||||
data->labels.insert(rlabel); // keep track of all labels used
|
||||
rlabelsOnSideChain.push_back(rlabel);
|
||||
if (const auto [bondIdx, end] =
|
||||
newMol->getAtomBonds(sideChainAtom);
|
||||
bondIdx != end) {
|
||||
auto connectingBond = (*newMol)[*bondIdx];
|
||||
if (connectingBond->getStereo() >
|
||||
Bond::BondStereo::STEREOANY) {
|
||||
// TODO: how to handle bond stereo on rgroups connected to
|
||||
// core by stereo double bonds
|
||||
connectingBond->setStereo(Bond::BondStereo::STEREOANY);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// restore input wildcard
|
||||
sideChainAtom->clearProp(_rgroupInputDummy);
|
||||
}
|
||||
}
|
||||
if (!sideChainAtom->hasProp(_rgroupInputDummy)) {
|
||||
// this is the index of the core atom that the R group
|
||||
// atom is attached to
|
||||
unsigned int coreAtomIndex = sideChainAtom->getIsotope();
|
||||
auto coreAtom = rcore->core->getAtomWithIdx(coreAtomIndex);
|
||||
coreAtomAnyMatched.insert(coreAtomIndex);
|
||||
int rlabel;
|
||||
if (coreAtom->getPropIfPresent(RLABEL, rlabel)) {
|
||||
std::vector<int> rlabelsOnSideChainAtom;
|
||||
sideChainAtom->getPropIfPresent(SIDECHAIN_RLABELS,
|
||||
rlabelsOnSideChainAtom);
|
||||
rlabelsOnSideChainAtom.push_back(rlabel);
|
||||
sideChainAtom->setProp(SIDECHAIN_RLABELS, rlabelsOnSideChainAtom);
|
||||
data->labels.insert(rlabel); // keep track of all labels used
|
||||
rlabelsOnSideChain.push_back(rlabel);
|
||||
if (const auto [bondIdx, end] =
|
||||
newMol->getAtomBonds(sideChainAtom);
|
||||
bondIdx != end) {
|
||||
auto connectingBond = (*newMol)[*bondIdx];
|
||||
if (connectingBond->getStereo() > Bond::BondStereo::STEREOANY) {
|
||||
// TODO: how to handle bond stereo on rgroups connected to
|
||||
// core by stereo double bonds
|
||||
connectingBond->setStereo(Bond::BondStereo::STEREOANY);
|
||||
if (data->params.includeTargetMolInResults) {
|
||||
setTargetAtomBondIndices(*newMol, true);
|
||||
}
|
||||
if (!rlabelsOnSideChain.empty()) {
|
||||
#ifdef VERBOSE
|
||||
std::string newCoreSmi = MolToSmiles(*newMol, true);
|
||||
#endif
|
||||
|
||||
for (auto rlabel : rlabelsOnSideChain) {
|
||||
ADD_MATCH(match, rlabel);
|
||||
match[rlabel]->add(newMol, rlabelsOnSideChain);
|
||||
#ifdef VERBOSE
|
||||
std::cerr << "Fragment " << i << " R" << rlabel << " "
|
||||
<< MolToSmiles(*newMol) << std::endl;
|
||||
#endif
|
||||
}
|
||||
} else {
|
||||
// special case, only one fragment
|
||||
if (fragments.size() == 1) { // need to make a new core
|
||||
// remove the sidechains
|
||||
|
||||
// GJ I think if we ever get here that it's really an error and I
|
||||
// believe that I've fixed the case where this code was called.
|
||||
// Still, I'm too scared to delete the block.
|
||||
RWMol newCore(*mol);
|
||||
|
||||
for (const auto &mvpair : tmatche) {
|
||||
const Atom *coreAtm = rcore->core->getAtomWithIdx(mvpair.first);
|
||||
Atom *newCoreAtm = newCore.getAtomWithIdx(mvpair.second);
|
||||
int rlabel;
|
||||
if (coreAtm->getPropIfPresent(RLABEL, rlabel)) {
|
||||
newCoreAtm->setProp<int>(RLABEL, rlabel);
|
||||
}
|
||||
newCoreAtm->setProp<bool>("keep", true);
|
||||
}
|
||||
|
||||
newCore.beginBatchEdit();
|
||||
for (const auto atom : newCore.atoms()) {
|
||||
if (!atom->hasProp("keep")) {
|
||||
newCore.removeAtom(atom);
|
||||
}
|
||||
}
|
||||
newCore.commitBatchEdit();
|
||||
if (newCore.getNumAtoms()) {
|
||||
std::string newCoreSmi = MolToSmiles(newCore, true);
|
||||
// add a new core if possible
|
||||
auto newcore = data->newCores.find(newCoreSmi);
|
||||
int core_idx = 0;
|
||||
if (newcore == data->newCores.end()) {
|
||||
core_idx = data->newCores[newCoreSmi] = data->newCoreLabel--;
|
||||
data->cores[core_idx] = RCore(newCore);
|
||||
return add(inmol);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// restore input wildcard
|
||||
sideChainAtom->clearProp(_rgroupInputDummy);
|
||||
}
|
||||
}
|
||||
if (data->params.includeTargetMolInResults) {
|
||||
setTargetAtomBondIndices(*newMol, true);
|
||||
}
|
||||
if (!rlabelsOnSideChain.empty()) {
|
||||
#ifdef VERBOSE
|
||||
std::string newCoreSmi = MolToSmiles(*newMol, true);
|
||||
#endif
|
||||
|
||||
for (auto rlabel : rlabelsOnSideChain) {
|
||||
ADD_MATCH(match, rlabel);
|
||||
match[rlabel]->add(newMol, rlabelsOnSideChain);
|
||||
#ifdef VERBOSE
|
||||
std::cerr << "Fragment " << i << " R" << rlabel << " "
|
||||
<< MolToSmiles(*newMol) << std::endl;
|
||||
#endif
|
||||
if (!match.empty()) {
|
||||
// this is the number of user-defined R labels associated with
|
||||
// non-hydrogen substituents
|
||||
auto numberUserGroupsInMatch = std::accumulate(
|
||||
match.begin(), match.end(), 0,
|
||||
[](int sum,
|
||||
const std::pair<int, boost::shared_ptr<RGroupData>> &p) {
|
||||
return p.first > 0 && !p.second->is_hydrogen ? ++sum : sum;
|
||||
});
|
||||
int numberMissingUserGroups =
|
||||
rcore->numberUserRGroups - numberUserGroupsInMatch;
|
||||
CHECK_INVARIANT(numberMissingUserGroups >= 0,
|
||||
"Data error in missing user rgroup count");
|
||||
const auto extractedCore =
|
||||
rcore->extractCoreFromMolMatch(*mol, tmatche, params());
|
||||
if (data->params.includeTargetMolInResults) {
|
||||
setTargetAtomBondIndices(*extractedCore, false);
|
||||
}
|
||||
} else {
|
||||
// special case, only one fragment
|
||||
if (fragments.size() == 1) { // need to make a new core
|
||||
// remove the sidechains
|
||||
|
||||
// GJ I think if we ever get here that it's really an error and I
|
||||
// believe that I've fixed the case where this code was called.
|
||||
// Still, I'm too scared to delete the block.
|
||||
RWMol newCore(*mol);
|
||||
|
||||
for (const auto &mvpair : tmatche) {
|
||||
const Atom *coreAtm = rcore->core->getAtomWithIdx(mvpair.first);
|
||||
Atom *newCoreAtm = newCore.getAtomWithIdx(mvpair.second);
|
||||
int rlabel;
|
||||
if (coreAtm->getPropIfPresent(RLABEL, rlabel)) {
|
||||
newCoreAtm->setProp<int>(RLABEL, rlabel);
|
||||
}
|
||||
newCoreAtm->setProp<bool>("keep", true);
|
||||
}
|
||||
|
||||
newCore.beginBatchEdit();
|
||||
for (const auto atom : newCore.atoms()) {
|
||||
if (!atom->hasProp("keep")) {
|
||||
newCore.removeAtom(atom);
|
||||
}
|
||||
}
|
||||
newCore.commitBatchEdit();
|
||||
if (newCore.getNumAtoms()) {
|
||||
std::string newCoreSmi = MolToSmiles(newCore, true);
|
||||
// add a new core if possible
|
||||
auto newcore = data->newCores.find(newCoreSmi);
|
||||
int core_idx = 0;
|
||||
if (newcore == data->newCores.end()) {
|
||||
core_idx = data->newCores[newCoreSmi] = data->newCoreLabel--;
|
||||
data->cores[core_idx] = RCore(newCore);
|
||||
return add(inmol);
|
||||
}
|
||||
}
|
||||
potentialMatches.emplace_back(core_idx, numberMissingUserGroups,
|
||||
match, extractedCore);
|
||||
if (data->params.includeTargetMolInResults) {
|
||||
potentialMatches.back().setTargetMoleculeForHighlights(mol);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!match.empty()) {
|
||||
// this is the number of user-defined R labels associated with
|
||||
// non-hydrogen substituents
|
||||
auto numberUserGroupsInMatch = std::accumulate(
|
||||
match.begin(), match.end(), 0,
|
||||
[](int sum,
|
||||
const std::pair<int, boost::shared_ptr<RGroupData>> &p) {
|
||||
return p.first > 0 && !p.second->is_hydrogen ? ++sum : sum;
|
||||
});
|
||||
int numberMissingUserGroups =
|
||||
rcore->numberUserRGroups - numberUserGroupsInMatch;
|
||||
CHECK_INVARIANT(numberMissingUserGroups >= 0,
|
||||
"Data error in missing user rgroup count");
|
||||
const auto extractedCore =
|
||||
rcore->extractCoreFromMolMatch(*mol, tmatche, params());
|
||||
if (data->params.includeTargetMolInResults) {
|
||||
setTargetAtomBondIndices(*extractedCore, false);
|
||||
}
|
||||
potentialMatches.emplace_back(core_idx, numberMissingUserGroups, match,
|
||||
extractedCore);
|
||||
if (data->params.includeTargetMolInResults) {
|
||||
potentialMatches.back().setTargetMoleculeForHighlights(mol);
|
||||
}
|
||||
if (potentialMatches.empty()) {
|
||||
BOOST_LOG(rdDebugLog)
|
||||
<< "No attachment points in side chains" << std::endl;
|
||||
return -2;
|
||||
}
|
||||
|
||||
if (data->params.matchingStrategy != GA) {
|
||||
size_t N = 1;
|
||||
for (auto matche = data->matches.begin() + data->previousMatchSize;
|
||||
matche != data->matches.end(); ++matche) {
|
||||
size_t sz = matche->size();
|
||||
N *= sz;
|
||||
}
|
||||
// Highly symmetric cores can lead to a very large number of
|
||||
// permutations to test. Fall back to Greedy for the current chunk
|
||||
// when the number is too high.
|
||||
if (N * potentialMatches.size() > MAX_PERMUTATIONS) {
|
||||
data->process(data->prunePermutations);
|
||||
}
|
||||
}
|
||||
data->matches.push_back(std::move(potentialMatches));
|
||||
}
|
||||
if (potentialMatches.empty()) {
|
||||
BOOST_LOG(rdDebugLog) << "No attachment points in side chains" << std::endl;
|
||||
return -2;
|
||||
}
|
||||
|
||||
if (data->params.matchingStrategy != GA) {
|
||||
size_t N = 1;
|
||||
for (auto matche = data->matches.begin() + data->previousMatchSize;
|
||||
matche != data->matches.end(); ++matche) {
|
||||
size_t sz = matche->size();
|
||||
N *= sz;
|
||||
}
|
||||
// Highly symmetric cores can lead to a very large number of
|
||||
// permutations to test. Fall back to Greedy for the current chunk
|
||||
// when the number is too high.
|
||||
if (N * potentialMatches.size() > MAX_PERMUTATIONS) {
|
||||
data->process(data->prunePermutations);
|
||||
}
|
||||
}
|
||||
data->matches.push_back(std::move(potentialMatches));
|
||||
|
||||
if (!data->matches.empty()) {
|
||||
if (data->params.matchingStrategy & Greedy ||
|
||||
(data->params.matchingStrategy & GreedyChunks &&
|
||||
|
||||
@@ -18,40 +18,23 @@
|
||||
|
||||
namespace RDKit {
|
||||
|
||||
BETTER_ENUM(RGroupLabels, unsigned int,
|
||||
IsotopeLabels = 0x01,
|
||||
AtomMapLabels = 0x02,
|
||||
AtomIndexLabels = 0x04,
|
||||
RelabelDuplicateLabels = 0x08,
|
||||
MDLRGroupLabels = 0x10,
|
||||
DummyAtomLabels = 0x20, // These are rgroups but will get relabelled
|
||||
AutoDetect = 0xFF
|
||||
);
|
||||
|
||||
BETTER_ENUM(RGroupMatching, unsigned int,
|
||||
Greedy = 0x01,
|
||||
GreedyChunks = 0x02,
|
||||
Exhaustive = 0x04, // not really useful for large sets
|
||||
NoSymmetrization = 0x08,
|
||||
GA = 0x10
|
||||
);
|
||||
|
||||
BETTER_ENUM(
|
||||
RGroupLabelling, unsigned int,
|
||||
AtomMap = 0x01,
|
||||
Isotope = 0x02,
|
||||
MDLRGroup = 0x04
|
||||
);
|
||||
RGroupLabels, unsigned int, IsotopeLabels = 0x01, AtomMapLabels = 0x02,
|
||||
AtomIndexLabels = 0x04, RelabelDuplicateLabels = 0x08,
|
||||
MDLRGroupLabels = 0x10,
|
||||
DummyAtomLabels = 0x20, // These are rgroups but will get relabelled
|
||||
AutoDetect = 0xFF);
|
||||
|
||||
BETTER_ENUM(RGroupCoreAlignment, unsigned int,
|
||||
NoAlignment = 0x0,
|
||||
MCS = 0x01
|
||||
);
|
||||
BETTER_ENUM(RGroupMatching, unsigned int, Greedy = 0x01, GreedyChunks = 0x02,
|
||||
Exhaustive = 0x04, // not really useful for large sets
|
||||
NoSymmetrization = 0x08, GA = 0x10);
|
||||
|
||||
BETTER_ENUM(RGroupScore, unsigned int,
|
||||
Match = 0x1,
|
||||
FingerprintVariance = 0x4
|
||||
);
|
||||
BETTER_ENUM(RGroupLabelling, unsigned int, AtomMap = 0x01, Isotope = 0x02,
|
||||
MDLRGroup = 0x04);
|
||||
|
||||
BETTER_ENUM(RGroupCoreAlignment, unsigned int, NoAlignment = 0x0, MCS = 0x01);
|
||||
|
||||
BETTER_ENUM(RGroupScore, unsigned int, Match = 0x1, FingerprintVariance = 0x4);
|
||||
|
||||
struct RDKIT_RGROUPDECOMPOSITION_EXPORT RGroupDecompositionParameters {
|
||||
unsigned int labels = RGroupLabels::AutoDetect;
|
||||
@@ -75,6 +58,9 @@ struct RDKIT_RGROUPDECOMPOSITION_EXPORT RGroupDecompositionParameters {
|
||||
bool allowNonTerminalRGroups = false;
|
||||
//! unlabelled core atoms can have multiple rgroups
|
||||
bool allowMultipleRGroupsOnUnlabelled = false;
|
||||
//! Permit a core to match more than once in the same molecule if the sets of
|
||||
// matched atoms are not equal.
|
||||
bool allowMultipleCoresInSameMol = false;
|
||||
// extended query settings for core matching
|
||||
bool doTautomers = false;
|
||||
bool doEnumeration = false;
|
||||
|
||||
@@ -288,7 +288,10 @@ struct rgroupdecomp_wrapper {
|
||||
"input structure\n"
|
||||
" - doEnumeration: expand input cores into enumerated mol bundles\n"
|
||||
" - allowMultipleRGroupsOnUnlabelled: permit more than one rgroup to "
|
||||
"be attached to an unlabelled core atom";
|
||||
"be attached to an unlabelled core atom\n"
|
||||
" - allowMultipleCoresInSameMol: permit a core to match more than"
|
||||
" once in the same molecule if the sets of matched atoms are not equal"
|
||||
" (default=False)";
|
||||
python::class_<RDKit::RGroupDecompositionParameters>(
|
||||
"RGroupDecompositionParameters", docString.c_str(),
|
||||
python::init<>(python::args("self"), "Constructor, takes no arguments"))
|
||||
@@ -338,6 +341,9 @@ struct rgroupdecomp_wrapper {
|
||||
.def_readwrite("allowMultipleRGroupsOnUnlabelled",
|
||||
&RDKit::RGroupDecompositionParameters::
|
||||
allowMultipleRGroupsOnUnlabelled)
|
||||
.def_readwrite(
|
||||
"allowMultipleCoresInSameMol",
|
||||
&RDKit::RGroupDecompositionParameters::allowMultipleCoresInSameMol)
|
||||
.def_readwrite("doTautomers",
|
||||
&RDKit::RGroupDecompositionParameters::doTautomers)
|
||||
.def_readwrite("doEnumeration",
|
||||
|
||||
@@ -1141,3 +1141,85 @@ TEST_CASE("includeTargetMolInResults") {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("Multiple Core Hits") {
|
||||
{
|
||||
std::vector<ROMOL_SPTR> cores{
|
||||
"c1([*:9])c([*:8])c([*:7])c2c(c1([*:10]))c(c([*:5])n2([*:6]))[CH2]C([*:3])([*:4])[N,n]([*:1])([*:2])"_smarts};
|
||||
REQUIRE(cores.front());
|
||||
std::vector<ROMOL_SPTR> mols{
|
||||
"CC1(C)N2[C@@H](Cc3c1[nH]c4ccccc34)C(=O)N(CCc5c[nH]c6ccccc56)CC2=O"_smiles};
|
||||
REQUIRE(mols.front());
|
||||
RGroupRows rows;
|
||||
RGroupDecompositionParameters ps;
|
||||
ps.allowMultipleCoresInSameMol = true;
|
||||
auto n = RGroupDecompose(cores, mols, rows, nullptr, ps);
|
||||
CHECK(n == 1);
|
||||
CHECK(flatten_whitespace(toJSON(rows)) == flatten_whitespace(R"JSON(
|
||||
[
|
||||
{
|
||||
"Core":"c1ccc2c(C[C@H](N([*:1])[*:2])[*:3])c([*:5])[nH]c2c1",
|
||||
"R1":"O=C(CN(CCc1c[nH]c2ccccc12)C(=O)[*:3])[*:1]",
|
||||
"R2":"CC(C)([*:2])[*:5]",
|
||||
"R3":"O=C(CN(CCc1c[nH]c2ccccc12)C(=O)[*:3])[*:1]",
|
||||
"R5":"CC(C)([*:2])[*:5]"
|
||||
},
|
||||
{
|
||||
"Core":"c1ccc2c(CC(N([*:1])[*:2])[*:3])c([*:5])[nH]c2c1",
|
||||
"R1":"CC1(C)c2[nH]c3ccccc3c2C[C@@H](C(=O)[*:2])N1C(=O)C[*:1]",
|
||||
"R2":"CC1(C)c2[nH]c3ccccc3c2C[C@@H](C(=O)[*:2])N1C(=O)C[*:1]",
|
||||
"R3":"[H][*:3]",
|
||||
"R5":"[H][*:5]"
|
||||
}
|
||||
])JSON"));
|
||||
}
|
||||
{
|
||||
std::vector<ROMOL_SPTR> cores{"c1ccccc1"_smarts};
|
||||
std::vector<ROMOL_SPTR> mols{"Fc1ccccc1Nc2ccc(Cl)cc2"_smiles,
|
||||
"c1cc(O)cc(Oc2cccc(Br)c2)c1"_smiles,
|
||||
"Ic1ccccc1"_smiles};
|
||||
RGroupRows rows;
|
||||
RGroupDecompositionParameters ps;
|
||||
ps.allowMultipleCoresInSameMol = true;
|
||||
auto n = RGroupDecompose(cores, mols, rows, nullptr, ps);
|
||||
CHECK(n == 3);
|
||||
CHECK(flatten_whitespace(toJSON(rows)) == flatten_whitespace(R"JSON(
|
||||
[
|
||||
{
|
||||
"Core":"c1cc([*:4])c([*:3])c([*:2])c1[*:1]",
|
||||
"R1":"[H][*:1]",
|
||||
"R2":"[H][*:2]",
|
||||
"R3":"F[*:3]",
|
||||
"R4":"Clc1ccc(N[*:4])cc1"
|
||||
},
|
||||
{
|
||||
"Core":"c1cc([*:4])c([*:3])c([*:2])c1[*:1]",
|
||||
"R1":"Fc1ccccc1N[*:1]",
|
||||
"R2":"[H][*:2]",
|
||||
"R3":"[H][*:3]",
|
||||
"R4":"Cl[*:4]"
|
||||
},
|
||||
{
|
||||
"Core":"c1cc([*:4])c([*:3])c([*:2])c1[*:1]",
|
||||
"R1":"[H][*:1]",
|
||||
"R2":"O[*:2]",
|
||||
"R3":"[H][*:3]",
|
||||
"R4":"Brc1cccc(O[*:4])c1"
|
||||
},
|
||||
{
|
||||
"Core":"c1cc([*:4])c([*:3])c([*:2])c1[*:1]",
|
||||
"R1":"[H][*:1]",
|
||||
"R2":"Oc1cccc(O[*:2])c1",
|
||||
"R3":"[H][*:3]",
|
||||
"R4":"Br[*:4]"
|
||||
},
|
||||
{
|
||||
"Core":"c1cc([*:4])c([*:3])c([*:2])c1[*:1]",
|
||||
"R1":"[H][*:1]",
|
||||
"R2":"[H][*:2]",
|
||||
"R3":"[H][*:3]",
|
||||
"R4":"I[*:4]"
|
||||
}
|
||||
])JSON"));
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user