* Fixes #6017

* a bit of cleanup work

* remove unused variable

* change in response to review
switch to using std::max(maxMatches,maxRecursiveMatches)

* test the case where maxSubstructMatches<maxMatches
This commit is contained in:
Greg Landrum
2023-10-25 04:57:29 +02:00
committed by GitHub
parent 173f548f8f
commit 4a69bc3493
5 changed files with 80 additions and 45 deletions

View File

@@ -490,10 +490,10 @@ std::vector<MatchVectType> SubstructMatch(
if (params.recursionPossible) {
detail::SUBQUERY_MAP subqueryMap;
ROMol::ConstAtomIterator atIt;
for (atIt = query.beginAtoms(); atIt != query.endAtoms(); atIt++) {
if ((*atIt)->getQuery()) {
for (const auto atom : query.atoms()) {
if (atom->hasQuery()) {
// std::cerr<<"recurse from atom "<<(*atIt)->getIdx()<<std::endl;
detail::MatchSubqueries(mol, (*atIt)->getQuery(), params, subqueryMap,
detail::MatchSubqueries(mol, atom->getQuery(), params, subqueryMap,
locker.locked);
}
}
@@ -515,12 +515,10 @@ std::vector<MatchVectType> SubstructMatch(
if (found) {
unsigned int nQueryAtoms = query.getNumAtoms();
matches.reserve(pms.size());
for (std::list<detail::ssPairType>::const_iterator iter1 = pms.begin();
iter1 != pms.end(); ++iter1) {
MatchVectType matchVect;
matchVect.resize(nQueryAtoms);
for (const auto &iter2 : *iter1) {
matchVect[iter2.first] = std::pair<int, int>(iter2.first, iter2.second);
MatchVectType matchVect(nQueryAtoms);
for (const auto &pairs : pms) {
for (const auto &pair : pairs) {
matchVect[pair.first] = pair;
}
matches.push_back(matchVect);
}
@@ -617,17 +615,11 @@ unsigned int RecursiveMatcher(const ROMol &mol, const ROMol &query,
const SubstructMatchParameters &params,
std::vector<RecursiveStructureQuery *> &locked) {
SubstructMatchParameters lparams = params;
// NOTE: maxMatches and recursive queries is problematic. To make this really
// cover all cases we'd need a separate parameter for the number of possible
// recursive matches. We will add that for the 2023.03 release; for now
// we can still fix #888 without introducing any new problems using this
// heuristic:
lparams.maxMatches = std::max(1000u, params.maxMatches);
lparams.maxMatches = std::max(params.maxRecursiveMatches, params.maxMatches);
lparams.uniquify = false;
ROMol::ConstAtomIterator atIt;
for (atIt = query.beginAtoms(); atIt != query.endAtoms(); atIt++) {
if ((*atIt)->getQuery()) {
MatchSubqueries(mol, (*atIt)->getQuery(), lparams, subqueryMap, locked);
for (auto qAtom : query.atoms()) {
if (qAtom->hasQuery()) {
MatchSubqueries(mol, qAtom->getQuery(), lparams, subqueryMap, locked);
}
}
@@ -648,15 +640,14 @@ unsigned int RecursiveMatcher(const ROMol &mol, const ROMol &query,
unsigned int res = 0;
if (found) {
matches.reserve(pms.size());
for (std::list<detail::ssPairType>::const_iterator iter1 = pms.begin();
iter1 != pms.end(); ++iter1) {
for (const auto &pairs : pms) {
if (!query.hasProp(common_properties::_queryRootAtom)) {
matches.push_back(iter1->begin()->second);
matches.push_back(pairs.begin()->second);
} else {
int rootIdx;
query.getProp(common_properties::_queryRootAtom, rootIdx);
bool found = false;
for (const auto &pairIter : *iter1) {
for (const auto &pairIter : pairs) {
if (pairIter.first == static_cast<unsigned int>(rootIdx)) {
matches.push_back(pairIter.second);
found = true;
@@ -668,6 +659,9 @@ unsigned int RecursiveMatcher(const ROMol &mol, const ROMol &query,
<< "no match found for queryRootAtom" << std::endl;
}
}
if (matches.size() == lparams.maxMatches) {
break;
}
}
res = matches.size();
}
@@ -680,7 +674,7 @@ void MatchSubqueries(const ROMol &mol, QueryAtom::QUERYATOM_QUERY *query,
SUBQUERY_MAP &subqueryMap,
std::vector<RecursiveStructureQuery *> &locked) {
PRECONDITION(query, "bad query");
// std::cout << "*-*-* MS: " << (int)query << std::endl;
// std::cout << "*-*-* MS: " << query << std::endl;
// std::cout << "\t\t" << typeid(*query).name() << std::endl;
if (query->getDescription() == "RecursiveStructure") {
auto *rsq = (RecursiveStructureQuery *)query;
@@ -695,7 +689,7 @@ void MatchSubqueries(const ROMol &mol, QueryAtom::QUERYATOM_QUERY *query,
// we've matched an equivalent serial number before, just
// copy in the matches:
matchDone = true;
const RecursiveStructureQuery *orsq =
auto orsq =
(const RecursiveStructureQuery *)subqueryMap[rsq->getSerialNumber()];
for (auto setIter = orsq->beginSet(); setIter != orsq->endSet();
++setIter) {
@@ -720,8 +714,9 @@ void MatchSubqueries(const ROMol &mol, QueryAtom::QUERYATOM_QUERY *query,
}
if (rsq->getSerialNumber()) {
subqueryMap[rsq->getSerialNumber()] = query;
// std::cerr<<" storing results for query serial number:
// "<<rsq->getSerialNumber()<<std::endl;
// std::cerr << " storing results for query serial number: "
// << rsq->getSerialNumber() << " " << rsq->size() <<
// std::endl;
}
}
} else {
@@ -729,11 +724,8 @@ void MatchSubqueries(const ROMol &mol, QueryAtom::QUERYATOM_QUERY *query,
}
// now recurse over our children (these things can be nested)
Queries::Query<int, Atom const *, true>::CHILD_VECT_CI childIt;
// std::cout << query << " " << query->endChildren()-query->beginChildren() <<
// std::endl;
for (childIt = query->beginChildren(); childIt != query->endChildren();
childIt++) {
for (auto childIt = query->beginChildren(); childIt != query->endChildren();
++childIt) {
MatchSubqueries(mol, childIt->get(), params, subqueryMap, locked);
}
// std::cout << "<<- back " << (int)query << std::endl;