mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-03 21:44:30 +08:00
* Fixes #888 * support older versions of boost support for hashing dynamic_bitset was not added until v1.71 * changes in response to review
This commit is contained in:
@@ -197,7 +197,7 @@ MolMatchFinalCheckFunctor::MolMatchFinalCheckFunctor(
|
||||
}
|
||||
|
||||
bool MolMatchFinalCheckFunctor::operator()(const std::uint32_t q_c[],
|
||||
const std::uint32_t m_c[]) const {
|
||||
const std::uint32_t m_c[]) {
|
||||
if (d_params.extraFinalCheck || d_params.useGenericMatchers) {
|
||||
// EFF: we can no-doubt do better than this
|
||||
std::vector<unsigned int> aids(m_c, m_c + d_query.getNumAtoms());
|
||||
@@ -212,7 +212,27 @@ bool MolMatchFinalCheckFunctor::operator()(const std::uint32_t q_c[],
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
HashedStorageType match;
|
||||
if (d_params.uniquify) {
|
||||
match.resize(d_mol.getNumAtoms());
|
||||
#ifdef RDK_INTERNAL_BITSET_HAS_HASH
|
||||
match.reset();
|
||||
#else
|
||||
std::fill(match.begin(), match.end(), 0);
|
||||
#endif
|
||||
for (unsigned int i = 0; i < d_query.getNumAtoms(); ++i) {
|
||||
match[m_c[i]] = 1;
|
||||
}
|
||||
if (matchesSeen.find(match) != matchesSeen.end()) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (!d_params.useChirality) {
|
||||
if (d_params.uniquify) {
|
||||
matchesSeen.insert(match);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -356,7 +376,9 @@ bool MolMatchFinalCheckFunctor::operator()(const std::uint32_t q_c[],
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (d_params.uniquify) {
|
||||
matchesSeen.insert(match);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -502,9 +524,6 @@ std::vector<MatchVectType> SubstructMatch(
|
||||
}
|
||||
matches.push_back(matchVect);
|
||||
}
|
||||
if (params.uniquify) {
|
||||
removeDuplicates(matches, mol.getNumAtoms());
|
||||
}
|
||||
}
|
||||
return matches;
|
||||
}
|
||||
@@ -597,16 +616,24 @@ unsigned int RecursiveMatcher(const ROMol &mol, const ROMol &query,
|
||||
SUBQUERY_MAP &subqueryMap,
|
||||
const SubstructMatchParameters ¶ms,
|
||||
std::vector<RecursiveStructureQuery *> &locked) {
|
||||
SubstructMatchParameters lparams = params;
|
||||
// NOTE: maxMatches and recursive queries is problematic. To make this really
|
||||
// cover all cases we'd need a separate parameter for the number of possible
|
||||
// recursive matches. We will add that for the 2023.03 release; for now
|
||||
// we can still fix #888 without introducing any new problems using this
|
||||
// heuristic:
|
||||
lparams.maxMatches = std::max(1000u, params.maxMatches);
|
||||
lparams.uniquify = false;
|
||||
ROMol::ConstAtomIterator atIt;
|
||||
for (atIt = query.beginAtoms(); atIt != query.endAtoms(); atIt++) {
|
||||
if ((*atIt)->getQuery()) {
|
||||
MatchSubqueries(mol, (*atIt)->getQuery(), params, subqueryMap, locked);
|
||||
MatchSubqueries(mol, (*atIt)->getQuery(), lparams, subqueryMap, locked);
|
||||
}
|
||||
}
|
||||
|
||||
detail::AtomLabelFunctor atomLabeler(query, mol, params);
|
||||
detail::BondLabelFunctor bondLabeler(query, mol, params);
|
||||
MolMatchFinalCheckFunctor matchChecker(query, mol, params);
|
||||
detail::AtomLabelFunctor atomLabeler(query, mol, lparams);
|
||||
detail::BondLabelFunctor bondLabeler(query, mol, lparams);
|
||||
MolMatchFinalCheckFunctor matchChecker(query, mol, lparams);
|
||||
|
||||
matches.clear();
|
||||
matches.resize(0);
|
||||
|
||||
@@ -13,12 +13,20 @@
|
||||
|
||||
// std bits
|
||||
#include <vector>
|
||||
|
||||
#include <unordered_set>
|
||||
#include <functional>
|
||||
#include <unordered_map>
|
||||
#include <cstdint>
|
||||
#include "GraphMol/StereoGroup.h"
|
||||
#include <string>
|
||||
|
||||
#include <boost/dynamic_bitset.hpp>
|
||||
#if BOOST_VERSION >= 107100
|
||||
#define RDK_INTERNAL_BITSET_HAS_HASH
|
||||
#endif
|
||||
|
||||
#include <GraphMol/StereoGroup.h>
|
||||
|
||||
namespace RDKit {
|
||||
class ROMol;
|
||||
class Atom;
|
||||
@@ -218,13 +226,21 @@ class RDKIT_SUBSTRUCTMATCH_EXPORT MolMatchFinalCheckFunctor {
|
||||
MolMatchFinalCheckFunctor(const ROMol &query, const ROMol &mol,
|
||||
const SubstructMatchParameters &ps);
|
||||
|
||||
bool operator()(const std::uint32_t q_c[], const std::uint32_t m_c[]) const;
|
||||
bool operator()(const std::uint32_t q_c[], const std::uint32_t m_c[]);
|
||||
|
||||
private:
|
||||
const ROMol &d_query;
|
||||
const ROMol &d_mol;
|
||||
const SubstructMatchParameters &d_params;
|
||||
std::unordered_map<unsigned int, StereoGroup const *> d_molStereoGroups;
|
||||
#ifdef RDK_INTERNAL_BITSET_HAS_HASH
|
||||
// Boost 1.71 added support for std::hash with dynamic_bitset.
|
||||
using HashedStorageType = boost::dynamic_bitset<>;
|
||||
#else
|
||||
// otherwise we use a less elegant solution
|
||||
using HashedStorageType = std::string;
|
||||
#endif
|
||||
std::unordered_set<HashedStorageType> matchesSeen;
|
||||
};
|
||||
|
||||
} // namespace RDKit
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
//
|
||||
// Copyright (C) 2019 Greg Landrum
|
||||
// Copyright (C) 2019-2023 Greg Landrum
|
||||
//
|
||||
// @@ All Rights Reserved @@
|
||||
// This file is part of the RDKit.
|
||||
@@ -335,4 +335,82 @@ TEST_CASE("Github #4558: GetSubstructMatches() loops at 43690 iterations",
|
||||
|
||||
auto matches = SubstructMatch(*mol, *qry, ps);
|
||||
CHECK(matches.size() == num_mols * 2);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE(
|
||||
"Github #888: GetSubstructMatches uniquify and maxMatches don't work well together ") {
|
||||
SECTION("Basics") {
|
||||
auto m = "CCCCCC"_smiles;
|
||||
auto q = "CC"_smarts;
|
||||
REQUIRE(m);
|
||||
REQUIRE(q);
|
||||
SubstructMatchParameters ps;
|
||||
ps.uniquify = false;
|
||||
ps.maxMatches = 4;
|
||||
auto matches = SubstructMatch(*m, *q, ps);
|
||||
CHECK(matches.size() == 4);
|
||||
|
||||
ps.uniquify = true;
|
||||
matches = SubstructMatch(*m, *q, ps);
|
||||
CHECK(matches.size() == 4);
|
||||
|
||||
ps.useChirality = true;
|
||||
matches = SubstructMatch(*m, *q, ps);
|
||||
CHECK(matches.size() == 4);
|
||||
}
|
||||
SECTION("interaction with chirality") {
|
||||
auto m = "C/C=C/C=C/C=C\\C=C/C=C/C"_smiles;
|
||||
auto q = "C/C=C/C"_smarts;
|
||||
REQUIRE(m);
|
||||
REQUIRE(q);
|
||||
SubstructMatchParameters ps;
|
||||
ps.uniquify = false;
|
||||
ps.maxMatches = 2;
|
||||
auto matches = SubstructMatch(*m, *q, ps);
|
||||
CHECK(matches.size() == 2);
|
||||
|
||||
ps.uniquify = true;
|
||||
matches = SubstructMatch(*m, *q, ps);
|
||||
CHECK(matches.size() == 2);
|
||||
|
||||
ps.useChirality = true;
|
||||
matches = SubstructMatch(*m, *q, ps);
|
||||
CHECK(matches.size() == 2);
|
||||
|
||||
ps.maxMatches = 1000;
|
||||
matches = SubstructMatch(*m, *q, ps);
|
||||
CHECK(matches.size() == 3);
|
||||
}
|
||||
SECTION("interactions with recursive SMARTS") {
|
||||
{
|
||||
auto m = "N#CC#N"_smiles;
|
||||
REQUIRE(m);
|
||||
auto q = "[!$(*#*)]"_smarts;
|
||||
REQUIRE(q);
|
||||
SubstructMatchParameters ps;
|
||||
ps.uniquify = true;
|
||||
ps.maxMatches = 1;
|
||||
auto matches = SubstructMatch(*m, *q, ps);
|
||||
CHECK(matches.empty());
|
||||
}
|
||||
{
|
||||
auto m = "N#CC#N"_smiles;
|
||||
REQUIRE(m);
|
||||
auto q = "[$(*#*)&!D1]"_smarts;
|
||||
REQUIRE(q);
|
||||
SubstructMatchParameters ps;
|
||||
ps.uniquify = true;
|
||||
auto matches = SubstructMatch(*m, *q, ps);
|
||||
CHECK(matches.size() == 2);
|
||||
}
|
||||
{
|
||||
auto m = "N#CCC#N"_smiles;
|
||||
REQUIRE(m);
|
||||
auto q = "[!$(*#*)&!D1]-&!@[!$(*#*)&!D1]"_smarts;
|
||||
REQUIRE(q);
|
||||
SubstructMatchParameters ps;
|
||||
auto matches = SubstructMatch(*m, *q, ps);
|
||||
CHECK(matches.empty());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user