mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-04 21:54:27 +08:00
- Switched from dynamic to static allocation for an instance of `MCSParameters` - Switched to using `auto` where possible - Added a few `CHECK_INVARIANT` where appropriate before dereferencing pointers - Moved some inline comments to the previous line to improve readability - Added a early check for `CompleteRingsOnly` in `checkBondRingMatch()` to improve computational efficiency - Removed `RingMatchTableSet` entirely as 1) it is unnecessary since its functionality is already provided by `RingInfo` 2) it abused the `userData` pointer. This allows cleaning up and simplifying the code, particularly the Python wrappers which had a significant amount of added complexity to support it - Removed all the code that was deprecated several releases ago - Reimplemented ringFusionCheck() from scratch to address several bug reports; also switched from std::set to boost::dynamic_bitset for better efficiency - Replaced boost::tie with boost::make_iterator_range - Modernized `for` loops where possible - Removed entirely the QueryRings structure as its functionality is already available in RingInfo - Removed entirely the _DFS() function since the same algorithm can be implemented in a simpler and more efficient way using RingInfo (from 2m28.441s to 2m9.859s for the same task) - Replaced std::vector<bool> with boost::dynamic_bitset - Replaced C-style casts with C++ casts - Replaced some size_t with unsigned int - Refactored checkIfRingsAreClosed() such that checkNoLoneRingAtoms() is not needed anymore - Added a test for slow runtimes with CompleteRingsOnly - Setting Timeout to 0 means no timeout, as it should be - Removed unused `steps` variable from `MaximumCommonSubgraph::growSeeds` - Storing both Atom and Bond pointers and their indices on Seed and MCS data structures is time-consuming and a potential source of incons istencies; storing pointers is sufficient - Promoted `MaximumCommonSubgraph::match` from `private` to `public` - `NewBonds` was declared `mutable`, but `Seed::fillNewBonds()` was incorrectly declared as `non-const`, which caused the need for an ugly (and unnecessary) `const_cast`. I have now removed the `const_cast` and correctly declared functions that alter `NewBonds` as `const`, since `NewBonds` is explicitly `mut able` - Removed some useless random scoping that was peppering the MCS code - Removed a significant amount of duplicate code from the Python wrappers by inheriting from a base `PyMCSWrapper` class - Fixed #6082 - Fixed #5510 - Fixed #5457 - Fixed #5440 - Fixed #5411 - Fixed #3965 - Fixed #6578 Co-authored-by: ptosco <paolo.tosco@novartis.com>
193 lines
6.7 KiB
C++
193 lines
6.7 KiB
C++
//
|
|
// Copyright (C) 2014 Novartis Institutes for BioMedical Research
|
|
//
|
|
// @@ All Rights Reserved @@
|
|
// This file is part of the RDKit.
|
|
// The contents are covered by the terms of the BSD license
|
|
// which is included in the file license.txt, found at the root
|
|
// of the RDKit source tree.
|
|
//
|
|
#include <map>
|
|
#include <RDGeneral/utils.h>
|
|
#include <RDGeneral/Invariant.h>
|
|
#include <GraphMol/RDKitBase.h>
|
|
#include <GraphMol/RDKitQueries.h>
|
|
#include "SubstructMatchCustom.h"
|
|
|
|
#include "../Substruct/vf2.hpp"
|
|
|
|
namespace RDKit {
|
|
namespace FMCS {
|
|
|
|
class MolMatchFinalCheckFunctor {
|
|
const FMCS::Graph& QueryTopology;
|
|
const FMCS::Graph& TargetTopology;
|
|
const ROMol& d_query;
|
|
const ROMol& d_mol;
|
|
const MCSParameters* Parameters;
|
|
|
|
public:
|
|
MolMatchFinalCheckFunctor(const FMCS::Graph& query, const FMCS::Graph& target,
|
|
const ROMol& querySrc, const ROMol& mol // target
|
|
,
|
|
const MCSParameters* parameters)
|
|
: QueryTopology(query),
|
|
TargetTopology(target),
|
|
d_query(querySrc),
|
|
d_mol(mol),
|
|
Parameters(parameters){};
|
|
|
|
bool operator()(const boost::detail::node_id c1[],
|
|
const boost::detail::node_id c2[]) const {
|
|
if (static_cast<unsigned int>(c1[0]) >=
|
|
boost::num_vertices(QueryTopology)) {
|
|
return false; // invalid index - match failed, see v2f implementation
|
|
}
|
|
auto compare = Parameters ? Parameters->FinalMatchChecker : nullptr;
|
|
return compare ? compare(c1, c2, d_query, QueryTopology, d_mol,
|
|
TargetTopology, Parameters)
|
|
: true;
|
|
}
|
|
};
|
|
//=================================================================================================
|
|
// PRECOMPUTED_TABLES_MATCH much faster in overall even with very simple compare
|
|
// functions
|
|
|
|
class AtomTableCompareFunctor {
|
|
const FMCS::Graph& QueryTopology;
|
|
const FMCS::Graph& TargetTopology;
|
|
const FMCS::MatchTable& MatchTable;
|
|
|
|
public:
|
|
AtomTableCompareFunctor(const FMCS::Graph& query, const FMCS::Graph& target,
|
|
const FMCS::MatchTable& targetMatch)
|
|
: QueryTopology(query), TargetTopology(target), MatchTable(targetMatch){};
|
|
bool operator()(unsigned int i, unsigned int j) const {
|
|
return MatchTable.at(QueryTopology[i], TargetTopology[j]);
|
|
}
|
|
};
|
|
|
|
class BondTableCompareFunctor {
|
|
const FMCS::Graph& QueryTopology;
|
|
const FMCS::Graph& TargetTopology;
|
|
const FMCS::MatchTable& MatchTable;
|
|
|
|
public:
|
|
BondTableCompareFunctor(const FMCS::Graph& query, const FMCS::Graph& target,
|
|
const FMCS::MatchTable& targetMatch)
|
|
: QueryTopology(query), TargetTopology(target), MatchTable(targetMatch){};
|
|
bool operator()(FMCS::Graph::edge_descriptor i,
|
|
FMCS::Graph::edge_descriptor j) const {
|
|
return MatchTable.at(QueryTopology[i], TargetTopology[j]);
|
|
}
|
|
};
|
|
|
|
bool SubstructMatchCustomTable(const FMCS::Graph& target, const ROMol& mol,
|
|
const FMCS::Graph& query, const ROMol& querySrc,
|
|
const MatchTable& atomMatchTable,
|
|
const MatchTable& bondMatchTable,
|
|
const MCSParameters* p, match_V_t* match) {
|
|
if (query.m_vertices.size() > target.m_vertices.size() // query > target
|
|
|| query.m_edges.size() > target.m_edges.size()) {
|
|
return false;
|
|
}
|
|
|
|
MolMatchFinalCheckFunctor mc(query, target, querySrc, mol, p);
|
|
|
|
AtomTableCompareFunctor ac(query, target, atomMatchTable);
|
|
BondTableCompareFunctor bc(query, target, bondMatchTable);
|
|
|
|
match_V_t dummy_match;
|
|
if (!match) {
|
|
match = &dummy_match;
|
|
}
|
|
return boost::vf2(query, target, ac, bc, mc, *match);
|
|
}
|
|
|
|
//=========================================================================
|
|
// slow implementation with absolutely the same functionality
|
|
//=========================================================================
|
|
|
|
class AtomLabelFunctor {
|
|
const FMCS::Graph& QueryTopology;
|
|
const FMCS::Graph& TargetTopology;
|
|
const ROMol& d_query;
|
|
const ROMol& d_mol;
|
|
MCSAtomCompareFunction AtomCompare;
|
|
const MCSAtomCompareParameters& Parameters;
|
|
void* UserData;
|
|
|
|
public:
|
|
AtomLabelFunctor(const FMCS::Graph& query, const FMCS::Graph& target,
|
|
const ROMol& querySrc, const ROMol& mol // target
|
|
,
|
|
MCSAtomCompareFunction atomCompare,
|
|
const MCSAtomCompareParameters& p, void* ud)
|
|
: QueryTopology(query),
|
|
TargetTopology(target),
|
|
d_query(querySrc),
|
|
d_mol(mol),
|
|
AtomCompare(atomCompare),
|
|
Parameters(p),
|
|
UserData(ud){};
|
|
bool operator()(unsigned int i, unsigned int j) const {
|
|
return AtomCompare(Parameters, d_query, QueryTopology[i], d_mol,
|
|
TargetTopology[j], UserData);
|
|
}
|
|
};
|
|
|
|
class BondLabelFunctor {
|
|
const FMCS::Graph& QueryTopology;
|
|
const FMCS::Graph& TargetTopology;
|
|
const ROMol& d_query;
|
|
const ROMol& d_mol;
|
|
MCSBondCompareFunction BondCompare;
|
|
const MCSBondCompareParameters& Parameters;
|
|
void* UserData;
|
|
|
|
public:
|
|
BondLabelFunctor(const FMCS::Graph& query, const FMCS::Graph& target,
|
|
const ROMol& querySrc, const ROMol& mol,
|
|
MCSBondCompareFunction bondCompare,
|
|
const MCSBondCompareParameters& p, void* ud)
|
|
: QueryTopology(query),
|
|
TargetTopology(target),
|
|
d_query(querySrc),
|
|
d_mol(mol),
|
|
BondCompare(bondCompare),
|
|
Parameters(p),
|
|
UserData(ud){};
|
|
|
|
bool operator()(FMCS::Graph::edge_descriptor i,
|
|
FMCS::Graph::edge_descriptor j) const {
|
|
unsigned int ii =
|
|
QueryTopology[i]; // take actual Idx value for full source
|
|
// query molecule from index list
|
|
unsigned int jj = TargetTopology[j]; // the same Idx
|
|
return BondCompare(Parameters, d_query, ii, d_mol, jj, UserData);
|
|
}
|
|
};
|
|
|
|
bool SubstructMatchCustom(
|
|
const FMCS::Graph& target, const ROMol& mol, const FMCS::Graph& query,
|
|
const ROMol& querySrc // seed and full source query molecule
|
|
,
|
|
MCSAtomCompareFunction atomCompare, MCSBondCompareFunction bondCompare,
|
|
MCSFinalMatchCheckFunction, const MCSAtomCompareParameters& acp,
|
|
const MCSBondCompareParameters& bcp, void* ud, match_V_t* match) {
|
|
MolMatchFinalCheckFunctor matchChecker(query, target, querySrc, mol, nullptr);
|
|
AtomLabelFunctor atomLabeler(query, target, querySrc, mol, atomCompare, acp,
|
|
ud);
|
|
BondLabelFunctor bondLabeler(query, target, querySrc, mol, bondCompare, bcp,
|
|
ud);
|
|
|
|
match_V_t dummy_match;
|
|
if (!match) {
|
|
match = &dummy_match;
|
|
}
|
|
return boost::vf2(query, target, atomLabeler, bondLabeler, matchChecker,
|
|
*match);
|
|
}
|
|
} // namespace FMCS
|
|
} // namespace RDKit
|