mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-04 21:54:27 +08:00
- Switched from dynamic to static allocation for an instance of `MCSParameters` - Switched to using `auto` where possible - Added a few `CHECK_INVARIANT` where appropriate before dereferencing pointers - Moved some inline comments to the previous line to improve readability - Added a early check for `CompleteRingsOnly` in `checkBondRingMatch()` to improve computational efficiency - Removed `RingMatchTableSet` entirely as 1) it is unnecessary since its functionality is already provided by `RingInfo` 2) it abused the `userData` pointer. This allows cleaning up and simplifying the code, particularly the Python wrappers which had a significant amount of added complexity to support it - Removed all the code that was deprecated several releases ago - Reimplemented ringFusionCheck() from scratch to address several bug reports; also switched from std::set to boost::dynamic_bitset for better efficiency - Replaced boost::tie with boost::make_iterator_range - Modernized `for` loops where possible - Removed entirely the QueryRings structure as its functionality is already available in RingInfo - Removed entirely the _DFS() function since the same algorithm can be implemented in a simpler and more efficient way using RingInfo (from 2m28.441s to 2m9.859s for the same task) - Replaced std::vector<bool> with boost::dynamic_bitset - Replaced C-style casts with C++ casts - Replaced some size_t with unsigned int - Refactored checkIfRingsAreClosed() such that checkNoLoneRingAtoms() is not needed anymore - Added a test for slow runtimes with CompleteRingsOnly - Setting Timeout to 0 means no timeout, as it should be - Removed unused `steps` variable from `MaximumCommonSubgraph::growSeeds` - Storing both Atom and Bond pointers and their indices on Seed and MCS data structures is time-consuming and a potential source of incons istencies; storing pointers is sufficient - Promoted `MaximumCommonSubgraph::match` from `private` to `public` - `NewBonds` was declared `mutable`, but `Seed::fillNewBonds()` was incorrectly declared as `non-const`, which caused the need for an ugly (and unnecessary) `const_cast`. I have now removed the `const_cast` and correctly declared functions that alter `NewBonds` as `const`, since `NewBonds` is explicitly `mut able` - Removed some useless random scoping that was peppering the MCS code - Removed a significant amount of duplicate code from the Python wrappers by inheriting from a base `PyMCSWrapper` class - Fixed #6082 - Fixed #5510 - Fixed #5457 - Fixed #5440 - Fixed #5411 - Fixed #3965 - Fixed #6578 Co-authored-by: ptosco <paolo.tosco@novartis.com>
107 lines
3.4 KiB
C++
107 lines
3.4 KiB
C++
//
|
|
// Copyright (C) 2014 Novartis Institutes for BioMedical Research
|
|
//
|
|
// @@ All Rights Reserved @@
|
|
// This file is part of the RDKit.
|
|
// The contents are covered by the terms of the BSD license
|
|
// which is included in the file license.txt, found at the root
|
|
// of the RDKit source tree.
|
|
//
|
|
#include <RDGeneral/export.h>
|
|
#pragma once
|
|
#include <vector>
|
|
#include <string>
|
|
#include <stdexcept>
|
|
#include "../RDKitBase.h"
|
|
#include "FMCS.h"
|
|
#include "DebugTrace.h" // algorithm filter definitions
|
|
#include "SeedSet.h"
|
|
#include "Target.h"
|
|
#include "SubstructureCache.h"
|
|
#include "DuplicatedSeedCache.h"
|
|
#include "MatchTable.h"
|
|
#include "TargetMatch.h"
|
|
|
|
namespace RDKit {
|
|
|
|
inline bool FinalChiralityCheckFunction(
|
|
const std::uint32_t c1[], const std::uint32_t c2[], const ROMol& mol1,
|
|
const FMCS::Graph& query, const ROMol& mol2, const FMCS::Graph& target,
|
|
const MCSParameters* p);
|
|
|
|
bool FinalMatchCheckFunction(const std::uint32_t c1[], const std::uint32_t c2[],
|
|
const ROMol& mol1, const FMCS::Graph& query,
|
|
const ROMol& mol2, const FMCS::Graph& target,
|
|
const MCSParameters* p);
|
|
|
|
namespace FMCS {
|
|
class RDKIT_FMCS_EXPORT MaximumCommonSubgraph {
|
|
// current result. Reference to a fragment of source molecule
|
|
struct MCS {
|
|
std::vector<const Atom*> Atoms;
|
|
std::vector<const Bond*> Bonds;
|
|
const ROMol* QueryMolecule;
|
|
std::vector<Target> Targets;
|
|
};
|
|
unsigned long long To;
|
|
MCSProgressData Stat;
|
|
detail::MCSParametersInternal Parameters;
|
|
// min number of matches
|
|
unsigned int ThresholdCount;
|
|
std::vector<const ROMol*> Molecules;
|
|
#ifdef FAST_SUBSTRUCT_CACHE
|
|
// for Morgan code. Value based on current functor and parameters
|
|
std::vector<unsigned int> QueryAtomLabels;
|
|
// for Morgan code. Value based on current functor and parameters
|
|
std::vector<unsigned int> QueryBondLabels;
|
|
SubstructureCache HashCache;
|
|
MatchTable QueryAtomMatchTable;
|
|
MatchTable QueryBondMatchTable;
|
|
#endif
|
|
#ifdef DUP_SUBSTRUCT_CACHE
|
|
DuplicatedSeedCache DuplicateCache;
|
|
#endif
|
|
const ROMol* QueryMolecule;
|
|
unsigned int QueryMoleculeMatchedBonds;
|
|
unsigned int QueryMoleculeMatchedAtoms;
|
|
const Atom* QueryMoleculeSingleMatchedAtom;
|
|
std::vector<Target> Targets;
|
|
SeedSet Seeds;
|
|
MCS McsIdx;
|
|
std::map<std::vector<unsigned int>, MCS> DegenerateMcsMap;
|
|
|
|
public:
|
|
#ifdef VERBOSE_STATISTICS_ON
|
|
ExecStatistics VerboseStatistics;
|
|
#endif
|
|
|
|
MaximumCommonSubgraph(const MCSParameters* params);
|
|
~MaximumCommonSubgraph() { clear(); }
|
|
MCSResult find(const std::vector<ROMOL_SPTR>& mols);
|
|
const ROMol& getQueryMolecule() const { return *QueryMolecule; }
|
|
unsigned int getMaxNumberBonds() const { return McsIdx.Bonds.size(); }
|
|
|
|
unsigned int getMaxNumberAtoms() const { return McsIdx.Atoms.size(); }
|
|
bool checkIfMatchAndAppend(Seed& seed);
|
|
bool match(Seed& seed);
|
|
const MCSParameters& parameters() const { return Parameters; }
|
|
MCSParameters& parameters() { return Parameters; }
|
|
|
|
private:
|
|
void clear() {
|
|
Targets.clear();
|
|
Molecules.clear();
|
|
To = nanoClock();
|
|
}
|
|
void init(size_t startIdx);
|
|
void makeInitialSeeds();
|
|
bool createSeedFromMCS(size_t newQueryTarget, Seed& seed);
|
|
bool growSeeds(); // returns false if canceled
|
|
std::pair<std::string, ROMOL_SPTR> generateResultSMARTSAndQueryMol(
|
|
const MCS& mcsIdx) const;
|
|
|
|
bool matchIncrementalFast(Seed& seed, unsigned int itarget);
|
|
};
|
|
} // namespace FMCS
|
|
} // namespace RDKit
|