diff --git a/Code/GraphMol/GenericGroups/GenericGroups.cpp b/Code/GraphMol/GenericGroups/GenericGroups.cpp index 098be25b1..7515b423b 100644 --- a/Code/GraphMol/GenericGroups/GenericGroups.cpp +++ b/Code/GraphMol/GenericGroups/GenericGroups.cpp @@ -657,7 +657,7 @@ bool RAtomMatcher(const ROMol &mol, const Atom &atom, } // namespace Matchers bool genericAtomMatcher(const ROMol &mol, const ROMol &query, - const std::vector &match) { + const std::span &match) { boost::dynamic_bitset<> ignore(mol.getNumAtoms()); for (const auto idx : match) { ignore.set(idx); diff --git a/Code/GraphMol/GenericGroups/GenericGroups.h b/Code/GraphMol/GenericGroups/GenericGroups.h index 5f9b18355..853e63f30 100644 --- a/Code/GraphMol/GenericGroups/GenericGroups.h +++ b/Code/GraphMol/GenericGroups/GenericGroups.h @@ -16,6 +16,7 @@ #include #include #include +#include #include namespace RDKit { @@ -603,7 +604,7 @@ RDKIT_GENERICGROUPS_EXPORT ROMol *adjustQueryPropertiesWithGenericGroups( /// the current match RDKIT_GENERICGROUPS_EXPORT bool genericAtomMatcher( const ROMol &mol, const ROMol &query, - const std::vector &match); + const std::span &match); //! sets the apropriate generic query tags based on atom labels and/or SGroups /* diff --git a/Code/GraphMol/GenericGroups/generic_tests.cpp b/Code/GraphMol/GenericGroups/generic_tests.cpp index 65958f087..f8d41e680 100644 --- a/Code/GraphMol/GenericGroups/generic_tests.cpp +++ b/Code/GraphMol/GenericGroups/generic_tests.cpp @@ -172,12 +172,12 @@ M END } namespace { -bool no_match(const ROMol &mol, const std::vector &ids) { +bool no_match(const ROMol &mol, const std::span ids) { RDUNUSED_PARAM(mol); RDUNUSED_PARAM(ids); return false; } -bool always_match(const ROMol &mol, const std::vector &ids) { +bool always_match(const ROMol &mol, const std::span ids) { RDUNUSED_PARAM(mol); RDUNUSED_PARAM(ids); return true; diff --git a/Code/GraphMol/Substruct/SubstructMatch.cpp b/Code/GraphMol/Substruct/SubstructMatch.cpp index d6e0b1dd9..ef13373a1 100644 --- a/Code/GraphMol/Substruct/SubstructMatch.cpp +++ b/Code/GraphMol/Substruct/SubstructMatch.cpp @@ -1,5 +1,5 @@ // -// Copyright (C) 2001-2021 Greg Landrum and other RDKit contributors +// Copyright (C) 2001-2025 Greg Landrum and other RDKit contributors // // @@ All Rights Reserved @@ // This file is part of the RDKit. @@ -21,10 +21,7 @@ #include #include #include - -#if BOOST_VERSION == 106400 -#include -#endif +#include #ifdef RDK_BUILD_THREADSAFE_SSS #include @@ -34,8 +31,6 @@ #include "vf2.hpp" -using boost::make_iterator_range; - namespace RDKit { namespace detail { @@ -57,14 +52,14 @@ bool enhancedStereoIsOK( // If the query has stereo groups: // * OR only matches AND or OR (not absolute) // * AND only matches OR - for (auto &&sg : query.getStereoGroups()) { + for (const auto &sg : query.getStereoGroups()) { if (sg.getGroupType() == StereoGroupType::STEREO_ABSOLUTE) { continue; } // StereoGroup const* matched_mol_group = nullptr; const bool is_and = sg.getGroupType() == StereoGroupType::STEREO_AND; - for (auto &&a : sg.getAtoms()) { - auto mol_group = molStereoGroups.find(q_to_mol[a->getIdx()]); + for (const auto a : sg.getAtoms()) { + const auto mol_group = molStereoGroups.find(q_to_mol[a->getIdx()]); if (mol_group == molStereoGroups.end()) { // group matching absolute. not ok. return false; @@ -81,15 +76,15 @@ bool enhancedStereoIsOK( // If the mol has stereo groups: // * All atoms must either be the same or opposite, you can't mix // * Only one stereogroup must cover all matched atoms in the mol stereo group - for (auto &&sg : mol.getStereoGroups()) { + for (const auto &sg : mol.getStereoGroups()) { if (sg.getGroupType() == StereoGroupType::STEREO_ABSOLUTE) { continue; } - bool doesMatch; + bool doesMatch = false; bool seen = false; StereoGroup const *QGroup = nullptr; - for (auto &&a : sg.getAtoms()) { + for (const auto &a : sg.getAtoms()) { auto thisDoesMatch = matches.find(a->getIdx()); if (thisDoesMatch == matches.end()) { // not matched @@ -199,8 +194,7 @@ MolMatchFinalCheckFunctor::MolMatchFinalCheckFunctor( bool MolMatchFinalCheckFunctor::operator()(const std::uint32_t q_c[], const std::uint32_t m_c[]) { if (d_params.extraFinalCheck || d_params.useGenericMatchers) { - // EFF: we can no-doubt do better than this - std::vector aids(m_c, m_c + d_query.getNumAtoms()); + const std::span aids(m_c, d_query.getNumAtoms()); if (d_params.useGenericMatchers && !GenericGroups::genericAtomMatcher(d_mol, d_query, aids)) { return false; @@ -276,8 +270,8 @@ bool MolMatchFinalCheckFunctor::operator()(const std::uint32_t q_c[], mOrder.insert(mOrder.end(), unmatchedNeighbors, -1); INT_LIST moOrder; - for (const auto &bond : make_iterator_range(d_mol.getAtomBonds(mAt))) { - int dbidx = d_mol[bond]->getIdx(); + for (const auto &bond : d_mol.atomBonds(mAt)) { + const int dbidx = bond->getIdx(); if (std::find(mOrder.begin(), mOrder.end(), dbidx) != mOrder.end()) { moOrder.push_back(dbidx); } else { @@ -285,7 +279,7 @@ bool MolMatchFinalCheckFunctor::operator()(const std::uint32_t q_c[], } } - int mPermCount = + const int mPermCount = static_cast(countSwapsToInterconvert(moOrder, mOrder)); const bool requireMatch = qPermCount % 2 == mPermCount % 2; @@ -293,7 +287,7 @@ bool MolMatchFinalCheckFunctor::operator()(const std::uint32_t q_c[], const bool matchOK = requireMatch == labelsMatch; // if this is not part of a stereogroup and doesn't match, return false - auto msg = d_molStereoGroups.find(m_c[i]); + const auto msg = d_molStereoGroups.find(m_c[i]); if (msg == d_molStereoGroups.end()) { if (!matchOK) { return false; @@ -451,7 +445,7 @@ void ResSubstructMatchHelper_(const ResSubstructMatchHelperArgs_ &args, unsigned int ei) { for (unsigned int i = bi; (matches->size() < args.params.maxMatches) && (i < ei); ++i) { - ROMol *mol = args.resMolSupplier[i]; + std::unique_ptr mol{args.resMolSupplier[i]}; std::vector matchesTmp = SubstructMatch(*mol, args.query, args.params); for (const auto &match : matchesTmp) { @@ -459,7 +453,6 @@ void ResSubstructMatchHelper_(const ResSubstructMatchHelperArgs_ &args, break; } } - delete mol; } }; @@ -516,7 +509,7 @@ std::vector SubstructMatch( boost::vf2_all(query.getTopology(), mol.getTopology(), atomLabeler, bondLabeler, matchChecker, pms, params.maxMatches); if (found) { - unsigned int nQueryAtoms = query.getNumAtoms(); + const unsigned int nQueryAtoms = query.getNumAtoms(); matches.reserve(pms.size()); MatchVectType matchVect(nQueryAtoms); for (const auto &pairs : pms) { @@ -533,7 +526,7 @@ std::vector SubstructMatch( const MolBundle &bundle, const ROMol &query, const SubstructMatchParameters ¶ms) { std::vector res; - for (unsigned int i = 0; i < bundle.size() && !res.size(); ++i) { + for (unsigned int i = 0; i < bundle.size() && res.empty(); ++i) { res = SubstructMatch(*bundle[i], query, params); } return res; @@ -543,7 +536,7 @@ std::vector SubstructMatch( const ROMol &mol, const MolBundle &query, const SubstructMatchParameters ¶ms) { std::vector res; - for (unsigned int i = 0; i < query.size() && !res.size(); ++i) { + for (unsigned int i = 0; i < query.size() && res.empty(); ++i) { res = SubstructMatch(mol, *query[i], params); } return res; @@ -553,8 +546,8 @@ std::vector SubstructMatch( const MolBundle &mol, const MolBundle &query, const SubstructMatchParameters ¶ms) { std::vector res; - for (unsigned int i = 0; i < mol.size() && !res.size(); ++i) { - for (unsigned int j = 0; j < query.size() && !res.size(); ++j) { + for (unsigned int i = 0; i < mol.size() && res.empty(); ++i) { + for (unsigned int j = 0; j < query.size() && res.empty(); ++j) { res = SubstructMatch(*mol[i], *query[j], params); } } @@ -580,19 +573,19 @@ std::vector SubstructMatch( #ifdef RDK_BUILD_THREADSAFE_SSS else { std::vector> tg; - std::vector *> matchesThread(nt); + std::vector>> matchesThread(nt); unsigned int ei = 0; double dpt = static_cast(resMolSupplier.length()) / static_cast(nt); double dc = 0.0; for (unsigned int ti = 0; ti < nt; ++ti) { - matchesThread[ti] = new std::set(); + matchesThread[ti] = std::make_unique>(); unsigned int bi = ei; dc += dpt; ei = static_cast(floor(dc)); tg.emplace_back(std::async(std::launch::async, detail::ResSubstructMatchHelper_, args, - matchesThread[ti], bi, ei)); + matchesThread[ti].get(), bi, ei)); } for (auto &fut : tg) { fut.get(); @@ -604,7 +597,6 @@ std::vector SubstructMatch( break; } } - delete matchesThread[ti]; } } #endif @@ -673,8 +665,6 @@ void MatchSubqueries(const ROMol &mol, QueryAtom::QUERYATOM_QUERY *query, SUBQUERY_MAP &subqueryMap, std::vector &locked) { PRECONDITION(query, "bad query"); - // std::cout << "*-*-* MS: " << query << std::endl; - // std::cout << "\t\t" << typeid(*query).name() << std::endl; if (query->getDescription() == "RecursiveStructure") { auto *rsq = (RecursiveStructureQuery *)query; #ifdef RDK_BUILD_THREADSAFE_SSS @@ -694,8 +684,6 @@ void MatchSubqueries(const ROMol &mol, QueryAtom::QUERYATOM_QUERY *query, ++setIter) { rsq->insert(*setIter); } - // std::cerr<<" copying results for query serial number: - // "<getSerialNumber()<getSerialNumber()) { subqueryMap[rsq->getSerialNumber()] = query; - // std::cerr << " storing results for query serial number: " - // << rsq->getSerialNumber() << " " << rsq->size() << - // std::endl; } } - } else { - // std::cout << "\tmsq1: "; } // now recurse over our children (these things can be nested) diff --git a/Code/GraphMol/Substruct/SubstructMatch.h b/Code/GraphMol/Substruct/SubstructMatch.h index 507d704ec..272a4e518 100644 --- a/Code/GraphMol/Substruct/SubstructMatch.h +++ b/Code/GraphMol/Substruct/SubstructMatch.h @@ -1,5 +1,5 @@ // -// Copyright (C) 2001-2020 Greg Landrum and Rational Discovery LLC +// Copyright (C) 2001-2025 Greg Landrum and other RDKit contributors // // @@ All Rights Reserved @@ // This file is part of the RDKit. @@ -19,6 +19,7 @@ #include #include #include +#include #include #if BOOST_VERSION >= 107100 @@ -62,7 +63,7 @@ struct RDKIT_SUBSTRUCTMATCH_EXPORT SubstructMatchParameters { std::vector bondProperties; //!< bond properties that must be //!< equivalent in order to match std::function &match)> + std::span match)> extraFinalCheck; //!< a function to be called at the end to validate a //!< match unsigned int maxRecursiveMatches = diff --git a/Code/GraphMol/Substruct/SubstructUtils.cpp b/Code/GraphMol/Substruct/SubstructUtils.cpp index 38518961c..e76d7cdb2 100644 --- a/Code/GraphMol/Substruct/SubstructUtils.cpp +++ b/Code/GraphMol/Substruct/SubstructUtils.cpp @@ -1,6 +1,5 @@ // -// Copyright (C) 2003-2021 Greg Landrum and Rational Discovery LLC -// +// Copyright (C) 2003-2025 Greg Landrum and other RDKit contributors // @@ All Rights Reserved @@ // This file is part of the RDKit. // The contents are covered by the terms of the BSD license @@ -146,6 +145,7 @@ bool atomCompat(const Atom *a1, const Atom *a2, } bool chiralAtomCompat(const Atom *&a1, const Atom *&a2) { + /// DEPRECATED PRECONDITION(a1, "bad atom"); PRECONDITION(a2, "bad atom"); bool res = a1->Match(a2); @@ -232,18 +232,18 @@ void removeDuplicates(std::vector &matches, // that the 4 paths are equivalent in the semantics of the query. // Also, OELib returns the same results // - std::set> seen; + std::unordered_set seen; std::vector res; res.reserve(matches.size()); - for (auto &&match : matches) { - boost::dynamic_bitset<> val(nAtoms); + seen.reserve(matches.size()); + for (const auto &match : matches) { + std::string val(nAtoms, '0'); for (const auto &ci : match) { - val.set(ci.second); + val[ci.second] = '1'; } - auto pos = seen.lower_bound(val); - if (pos == seen.end() || *pos != val) { - res.push_back(std::move(match)); - seen.insert(pos, std::move(val)); + const bool inserted = seen.insert(std::move(val)).second; + if (inserted) { + res.push_back(match); } } res.shrink_to_fit(); diff --git a/Code/GraphMol/Substruct/SubstructUtils.h b/Code/GraphMol/Substruct/SubstructUtils.h index ea8312c4d..e1e547d0a 100644 --- a/Code/GraphMol/Substruct/SubstructUtils.h +++ b/Code/GraphMol/Substruct/SubstructUtils.h @@ -1,5 +1,5 @@ // -// Copyright (C) 2003-2019 Greg Landrum and Rational Discovery LLC +// Copyright (C) 2003-2025 Greg Landrum and other RDKit contributors // // @@ All Rights Reserved @@ // This file is part of the RDKit. @@ -27,6 +27,7 @@ RDKIT_SUBSTRUCTMATCH_EXPORT bool propertyCompat( const std::vector &properties); RDKIT_SUBSTRUCTMATCH_EXPORT bool atomCompat(const Atom *a1, const Atom *a2, const SubstructMatchParameters &ps); +[[deprecated("chiralAtomCompat is deprecated and should not be used")]] RDKIT_SUBSTRUCTMATCH_EXPORT bool chiralAtomCompat(const Atom *a1, const Atom *a2); RDKIT_SUBSTRUCTMATCH_EXPORT bool bondCompat(const Bond *b1, const Bond *b2, diff --git a/Code/GraphMol/Substruct/catch_tests.cpp b/Code/GraphMol/Substruct/catch_tests.cpp index 74aae8ecc..64ff63885 100644 --- a/Code/GraphMol/Substruct/catch_tests.cpp +++ b/Code/GraphMol/Substruct/catch_tests.cpp @@ -202,17 +202,17 @@ TEST_CASE("substructure parameters", "[substruct]") { } namespace { -bool no_match(const ROMol &mol, const std::vector &ids) { +bool no_match(const ROMol &mol, const std::span &ids) { RDUNUSED_PARAM(mol); RDUNUSED_PARAM(ids); return false; } -bool always_match(const ROMol &mol, const std::vector &ids) { +bool always_match(const ROMol &mol, const std::span &ids) { RDUNUSED_PARAM(mol); RDUNUSED_PARAM(ids); return true; } -bool bigger(const ROMol &mol, const std::vector &ids) { +bool bigger(const ROMol &mol, const std::span &ids) { RDUNUSED_PARAM(mol); return std::accumulate(ids.begin(), ids.end(), 0) > 5; } diff --git a/Code/GraphMol/TautomerQuery/TautomerQuery.cpp b/Code/GraphMol/TautomerQuery/TautomerQuery.cpp index 1e6d6721c..d4a78efa5 100644 --- a/Code/GraphMol/TautomerQuery/TautomerQuery.cpp +++ b/Code/GraphMol/TautomerQuery/TautomerQuery.cpp @@ -12,6 +12,8 @@ #include #include #include +#include + #include #include #include @@ -101,7 +103,7 @@ class TautomerQueryMatcher { d_params(params), d_matchingTautomers(matchingTautomers) {} - bool match(const ROMol &mol, const std::vector &match) { + bool match(const ROMol &mol, const std::span &match) { #ifdef VERBOSE std::cout << "Checking template match" << std::endl; #endif @@ -192,7 +194,7 @@ TautomerQuery *TautomerQuery::fromMol( bool TautomerQuery::matchTautomer( const ROMol &mol, const ROMol &tautomer, - const std::vector &match, + const std::span &match, const SubstructMatchParameters ¶ms) const { for (auto idx : d_modifiedAtoms) { const auto queryAtom = tautomer.getAtomWithIdx(idx); @@ -255,7 +257,7 @@ std::vector TautomerQuery::substructOf( // use this functor as a final check to see if any tautomer matches the target auto checker = [&tautomerQueryMatcher]( const ROMol &mol, - const std::vector &match) mutable { + const std::span &match) mutable { return tautomerQueryMatcher.match(mol, match); }; templateParams.extraFinalCheck = checker; diff --git a/Code/GraphMol/TautomerQuery/TautomerQuery.h b/Code/GraphMol/TautomerQuery/TautomerQuery.h index b269c8ba4..aed758bd0 100644 --- a/Code/GraphMol/TautomerQuery/TautomerQuery.h +++ b/Code/GraphMol/TautomerQuery/TautomerQuery.h @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -45,7 +46,7 @@ class RDKIT_TAUTOMERQUERY_EXPORT TautomerQuery { // tests if a match to the template matches a specific tautomer bool matchTautomer(const ROMol &mol, const ROMol &tautomer, - const std::vector &match, + const std::span &match, const SubstructMatchParameters ¶ms) const; public: diff --git a/Code/GraphMol/Wrap/Mol.cpp b/Code/GraphMol/Wrap/Mol.cpp index ab5341c43..58e99922c 100644 --- a/Code/GraphMol/Wrap/Mol.cpp +++ b/Code/GraphMol/Wrap/Mol.cpp @@ -1,5 +1,5 @@ -// Copyright (C) 2003-2017 Greg Landrum and Rational Discovery LLC +// Copyright (C) 2003-2025 Greg Landrum and other RDKit contributors // // @@ All Rights Reserved @@ // This file is part of the RDKit. @@ -10,6 +10,7 @@ #define NO_IMPORT_ARRAY #include #include +#include #include "props.hpp" #include "rdchem.h" @@ -156,8 +157,11 @@ class pyobjFunctor { public: pyobjFunctor(python::object obj) : dp_obj(std::move(obj)) {} ~pyobjFunctor() = default; - bool operator()(const ROMol &m, const std::vector &match) { - return python::extract(dp_obj(boost::ref(m), boost::ref(match))); + bool operator()(const ROMol &m, std::span match) { + // boost::python doesn't handle std::span, so we need to convert the span to + // a vector before calling into python: + std::vector matchVec(match.begin(), match.end()); + return python::extract(dp_obj(boost::ref(m), boost::ref(matchVec))); } private: diff --git a/Regress/Scripts/new_timings.py b/Regress/Scripts/new_timings.py index 43391bba0..f9c805577 100644 --- a/Regress/Scripts/new_timings.py +++ b/Regress/Scripts/new_timings.py @@ -17,6 +17,7 @@ def data(fname): logger = logger() +logger.setLevel(1) tests = [1] * 1001 if len(sys.argv) > 1: @@ -80,6 +81,7 @@ if tests[3] or tests[4] or tests[5]: logger.info('patterns from smiles') patts = [] nMols = 0 + nBad = 0 t1 = time.time() for line in pattData: m = Chem.MolFromSmarts(line)