Files
rdkit/Code/GraphMol/FMCS/SubstructMatchCustom.cpp
Paolo Tosco 350370abe3 - Changed all unsigned to unsigned int for clarity (#6646)
- Switched from dynamic to static allocation for an instance of `MCSParameters`
- Switched to using `auto` where possible
- Added a few `CHECK_INVARIANT` where appropriate before dereferencing pointers
- Moved some inline comments to the previous line to improve readability
- Added a early check for `CompleteRingsOnly` in `checkBondRingMatch()` to improve computational efficiency
- Removed `RingMatchTableSet` entirely as 1) it is unnecessary since its functionality is already provided by `RingInfo` 2) it abused the `userData` pointer. This allows cleaning up and simplifying the code, particularly the Python wrappers which had a significant amount of added complexity to support it
- Removed all the code that was deprecated several releases ago
- Reimplemented ringFusionCheck() from scratch to address several bug reports; also switched from std::set to boost::dynamic_bitset for better efficiency
- Replaced boost::tie with boost::make_iterator_range
- Modernized `for` loops where possible
- Removed entirely the QueryRings structure as its functionality is already available in RingInfo
- Removed entirely the _DFS() function since the same algorithm can be implemented in a simpler and more efficient way using RingInfo (from 2m28.441s to 2m9.859s for the same task)
- Replaced std::vector<bool> with boost::dynamic_bitset
- Replaced C-style casts with C++ casts
- Replaced some size_t with unsigned int
- Refactored checkIfRingsAreClosed() such that checkNoLoneRingAtoms() is not needed anymore
- Added a test for slow runtimes with CompleteRingsOnly
- Setting Timeout to 0 means no timeout, as it should be
- Removed unused `steps` variable from `MaximumCommonSubgraph::growSeeds`
- Storing both Atom and Bond pointers and their indices on Seed and MCS data structures is time-consuming and a potential source of incons
istencies; storing pointers is sufficient
- Promoted `MaximumCommonSubgraph::match` from `private` to `public`
- `NewBonds` was declared `mutable`, but `Seed::fillNewBonds()` was incorrectly declared as `non-const`, which caused the need for an ugly
(and unnecessary) `const_cast`.
I have now removed the `const_cast` and correctly declared functions that alter `NewBonds` as `const`, since `NewBonds` is explicitly `mut
able`
- Removed some useless random scoping that was peppering the MCS code
- Removed a significant amount of duplicate code from the Python wrappers by inheriting from a base `PyMCSWrapper` class
- Fixed #6082
- Fixed #5510
- Fixed #5457
- Fixed #5440
- Fixed #5411
- Fixed #3965
- Fixed #6578

Co-authored-by: ptosco <paolo.tosco@novartis.com>
2023-08-25 06:09:19 +02:00

193 lines
6.7 KiB
C++

//
// Copyright (C) 2014 Novartis Institutes for BioMedical Research
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//
#include <map>
#include <RDGeneral/utils.h>
#include <RDGeneral/Invariant.h>
#include <GraphMol/RDKitBase.h>
#include <GraphMol/RDKitQueries.h>
#include "SubstructMatchCustom.h"
#include "../Substruct/vf2.hpp"
namespace RDKit {
namespace FMCS {
class MolMatchFinalCheckFunctor {
const FMCS::Graph& QueryTopology;
const FMCS::Graph& TargetTopology;
const ROMol& d_query;
const ROMol& d_mol;
const MCSParameters* Parameters;
public:
MolMatchFinalCheckFunctor(const FMCS::Graph& query, const FMCS::Graph& target,
const ROMol& querySrc, const ROMol& mol // target
,
const MCSParameters* parameters)
: QueryTopology(query),
TargetTopology(target),
d_query(querySrc),
d_mol(mol),
Parameters(parameters){};
bool operator()(const boost::detail::node_id c1[],
const boost::detail::node_id c2[]) const {
if (static_cast<unsigned int>(c1[0]) >=
boost::num_vertices(QueryTopology)) {
return false; // invalid index - match failed, see v2f implementation
}
auto compare = Parameters ? Parameters->FinalMatchChecker : nullptr;
return compare ? compare(c1, c2, d_query, QueryTopology, d_mol,
TargetTopology, Parameters)
: true;
}
};
//=================================================================================================
// PRECOMPUTED_TABLES_MATCH much faster in overall even with very simple compare
// functions
class AtomTableCompareFunctor {
const FMCS::Graph& QueryTopology;
const FMCS::Graph& TargetTopology;
const FMCS::MatchTable& MatchTable;
public:
AtomTableCompareFunctor(const FMCS::Graph& query, const FMCS::Graph& target,
const FMCS::MatchTable& targetMatch)
: QueryTopology(query), TargetTopology(target), MatchTable(targetMatch){};
bool operator()(unsigned int i, unsigned int j) const {
return MatchTable.at(QueryTopology[i], TargetTopology[j]);
}
};
class BondTableCompareFunctor {
const FMCS::Graph& QueryTopology;
const FMCS::Graph& TargetTopology;
const FMCS::MatchTable& MatchTable;
public:
BondTableCompareFunctor(const FMCS::Graph& query, const FMCS::Graph& target,
const FMCS::MatchTable& targetMatch)
: QueryTopology(query), TargetTopology(target), MatchTable(targetMatch){};
bool operator()(FMCS::Graph::edge_descriptor i,
FMCS::Graph::edge_descriptor j) const {
return MatchTable.at(QueryTopology[i], TargetTopology[j]);
}
};
bool SubstructMatchCustomTable(const FMCS::Graph& target, const ROMol& mol,
const FMCS::Graph& query, const ROMol& querySrc,
const MatchTable& atomMatchTable,
const MatchTable& bondMatchTable,
const MCSParameters* p, match_V_t* match) {
if (query.m_vertices.size() > target.m_vertices.size() // query > target
|| query.m_edges.size() > target.m_edges.size()) {
return false;
}
MolMatchFinalCheckFunctor mc(query, target, querySrc, mol, p);
AtomTableCompareFunctor ac(query, target, atomMatchTable);
BondTableCompareFunctor bc(query, target, bondMatchTable);
match_V_t dummy_match;
if (!match) {
match = &dummy_match;
}
return boost::vf2(query, target, ac, bc, mc, *match);
}
//=========================================================================
// slow implementation with absolutely the same functionality
//=========================================================================
class AtomLabelFunctor {
const FMCS::Graph& QueryTopology;
const FMCS::Graph& TargetTopology;
const ROMol& d_query;
const ROMol& d_mol;
MCSAtomCompareFunction AtomCompare;
const MCSAtomCompareParameters& Parameters;
void* UserData;
public:
AtomLabelFunctor(const FMCS::Graph& query, const FMCS::Graph& target,
const ROMol& querySrc, const ROMol& mol // target
,
MCSAtomCompareFunction atomCompare,
const MCSAtomCompareParameters& p, void* ud)
: QueryTopology(query),
TargetTopology(target),
d_query(querySrc),
d_mol(mol),
AtomCompare(atomCompare),
Parameters(p),
UserData(ud){};
bool operator()(unsigned int i, unsigned int j) const {
return AtomCompare(Parameters, d_query, QueryTopology[i], d_mol,
TargetTopology[j], UserData);
}
};
class BondLabelFunctor {
const FMCS::Graph& QueryTopology;
const FMCS::Graph& TargetTopology;
const ROMol& d_query;
const ROMol& d_mol;
MCSBondCompareFunction BondCompare;
const MCSBondCompareParameters& Parameters;
void* UserData;
public:
BondLabelFunctor(const FMCS::Graph& query, const FMCS::Graph& target,
const ROMol& querySrc, const ROMol& mol,
MCSBondCompareFunction bondCompare,
const MCSBondCompareParameters& p, void* ud)
: QueryTopology(query),
TargetTopology(target),
d_query(querySrc),
d_mol(mol),
BondCompare(bondCompare),
Parameters(p),
UserData(ud){};
bool operator()(FMCS::Graph::edge_descriptor i,
FMCS::Graph::edge_descriptor j) const {
unsigned int ii =
QueryTopology[i]; // take actual Idx value for full source
// query molecule from index list
unsigned int jj = TargetTopology[j]; // the same Idx
return BondCompare(Parameters, d_query, ii, d_mol, jj, UserData);
}
};
bool SubstructMatchCustom(
const FMCS::Graph& target, const ROMol& mol, const FMCS::Graph& query,
const ROMol& querySrc // seed and full source query molecule
,
MCSAtomCompareFunction atomCompare, MCSBondCompareFunction bondCompare,
MCSFinalMatchCheckFunction, const MCSAtomCompareParameters& acp,
const MCSBondCompareParameters& bcp, void* ud, match_V_t* match) {
MolMatchFinalCheckFunctor matchChecker(query, target, querySrc, mol, nullptr);
AtomLabelFunctor atomLabeler(query, target, querySrc, mol, atomCompare, acp,
ud);
BondLabelFunctor bondLabeler(query, target, querySrc, mol, bondCompare, bcp,
ud);
match_V_t dummy_match;
if (!match) {
match = &dummy_match;
}
return boost::vf2(query, target, atomLabeler, bondLabeler, matchChecker,
*match);
}
} // namespace FMCS
} // namespace RDKit