mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-06 22:39:55 +08:00
- Switched from dynamic to static allocation for an instance of `MCSParameters` - Switched to using `auto` where possible - Added a few `CHECK_INVARIANT` where appropriate before dereferencing pointers - Moved some inline comments to the previous line to improve readability - Added a early check for `CompleteRingsOnly` in `checkBondRingMatch()` to improve computational efficiency - Removed `RingMatchTableSet` entirely as 1) it is unnecessary since its functionality is already provided by `RingInfo` 2) it abused the `userData` pointer. This allows cleaning up and simplifying the code, particularly the Python wrappers which had a significant amount of added complexity to support it - Removed all the code that was deprecated several releases ago - Reimplemented ringFusionCheck() from scratch to address several bug reports; also switched from std::set to boost::dynamic_bitset for better efficiency - Replaced boost::tie with boost::make_iterator_range - Modernized `for` loops where possible - Removed entirely the QueryRings structure as its functionality is already available in RingInfo - Removed entirely the _DFS() function since the same algorithm can be implemented in a simpler and more efficient way using RingInfo (from 2m28.441s to 2m9.859s for the same task) - Replaced std::vector<bool> with boost::dynamic_bitset - Replaced C-style casts with C++ casts - Replaced some size_t with unsigned int - Refactored checkIfRingsAreClosed() such that checkNoLoneRingAtoms() is not needed anymore - Added a test for slow runtimes with CompleteRingsOnly - Setting Timeout to 0 means no timeout, as it should be - Removed unused `steps` variable from `MaximumCommonSubgraph::growSeeds` - Storing both Atom and Bond pointers and their indices on Seed and MCS data structures is time-consuming and a potential source of incons istencies; storing pointers is sufficient - Promoted `MaximumCommonSubgraph::match` from `private` to `public` - `NewBonds` was declared `mutable`, but `Seed::fillNewBonds()` was incorrectly declared as `non-const`, which caused the need for an ugly (and unnecessary) `const_cast`. I have now removed the `const_cast` and correctly declared functions that alter `NewBonds` as `const`, since `NewBonds` is explicitly `mut able` - Removed some useless random scoping that was peppering the MCS code - Removed a significant amount of duplicate code from the Python wrappers by inheriting from a base `PyMCSWrapper` class - Fixed #6082 - Fixed #5510 - Fixed #5457 - Fixed #5440 - Fixed #5411 - Fixed #3965 - Fixed #6578 Co-authored-by: ptosco <paolo.tosco@novartis.com>
158 lines
5.2 KiB
C++
158 lines
5.2 KiB
C++
//
|
|
// Copyright (C) 2014 Novartis Institutes for BioMedical Research
|
|
//
|
|
// @@ All Rights Reserved @@
|
|
// This file is part of the RDKit.
|
|
// The contents are covered by the terms of the BSD license
|
|
// which is included in the file license.txt, found at the root
|
|
// of the RDKit source tree.
|
|
//
|
|
#include <RDGeneral/export.h>
|
|
#pragma once
|
|
#include <map>
|
|
#include <boost/dynamic_bitset.hpp>
|
|
#include "../RDKitBase.h"
|
|
// algorithm optimisation definitions
|
|
#include "DebugTrace.h"
|
|
#include "Graph.h"
|
|
#include "DuplicatedSeedCache.h"
|
|
#include "SubstructMatchCustom.h"
|
|
|
|
namespace RDKit {
|
|
namespace FMCS {
|
|
class MaximumCommonSubgraph;
|
|
struct TargetMatch;
|
|
|
|
// Reference to a fragment of source molecule
|
|
struct RDKIT_FMCS_EXPORT MolFragment {
|
|
std::vector<const Atom*> Atoms;
|
|
std::vector<const Bond*> Bonds;
|
|
// Full Query Molecule to Seed indices backward conversionmap
|
|
std::map<unsigned int, unsigned int> SeedAtomIdxMap;
|
|
};
|
|
|
|
struct RDKIT_FMCS_EXPORT NewBond {
|
|
// index in qmol of new bond scheduled to be added into
|
|
// seed. This is outgoing bond from SourceAtomIdx
|
|
unsigned int BondIdx{0};
|
|
// index in qmol of new atom scheduled to be
|
|
// added into seed. Another end of new bond
|
|
unsigned int NewAtomIdx{0};
|
|
// index in the seed. RING. "New" Atom on the another
|
|
// end of new bond if it already exists in the seed.
|
|
unsigned int EndAtomIdx{0};
|
|
// pointer to qmol's new atom scheduled to be
|
|
// added into seed. Another end of new bond
|
|
const Atom* NewAtom{nullptr};
|
|
|
|
NewBond()
|
|
|
|
{}
|
|
|
|
NewBond(unsigned int bond_idx, unsigned int new_atom, unsigned int to_atom,
|
|
const Atom* a)
|
|
: BondIdx(bond_idx),
|
|
NewAtomIdx(new_atom),
|
|
EndAtomIdx(to_atom),
|
|
NewAtom(a) {}
|
|
};
|
|
|
|
class RDKIT_FMCS_EXPORT Seed {
|
|
private:
|
|
boost::dynamic_bitset<> addNewBondsToSeed(const ROMol& qmol,
|
|
Seed& seed) const;
|
|
bool canAddAllNonFusedRingBondsConnectedToBond(
|
|
const Atom& srcAtom, const Bond& bond, MaximumCommonSubgraph& mcs) const;
|
|
void addNewBondFromAtom(const Atom& srcAtom, const Bond& bond) const;
|
|
// for multistage growing. all directly connected outgoing bonds
|
|
mutable std::vector<NewBond> NewBonds;
|
|
bool StoreAllDegenerateMCS = false;
|
|
|
|
public:
|
|
// this seed has been completely copied into list.
|
|
// postponed non-locked copy for MULTI_THREAD
|
|
bool CopyComplete{false};
|
|
// 0 new seed; -1 finished; n>0 in
|
|
// progress, exact stage of growing for SDF
|
|
mutable unsigned int GrowingStage{0};
|
|
// Reference to a fragment of source molecule
|
|
MolFragment MoleculeFragment;
|
|
// seed topology with references to source molecule
|
|
Graph Topology;
|
|
|
|
boost::dynamic_bitset<> ExcludedBonds;
|
|
// in this subgraph for improving performance of future growing
|
|
unsigned int LastAddedAtomsBeginIdx{0};
|
|
// in this subgraph for DEBUG ONLY
|
|
unsigned int LastAddedBondsBeginIdx{0};
|
|
unsigned int RemainingBonds{0};
|
|
unsigned int RemainingAtoms{0};
|
|
#ifdef DUP_SUBSTRUCT_CACHE
|
|
DuplicatedSeedCache::TKey DupCacheKey;
|
|
#endif
|
|
// for each target
|
|
std::vector<TargetMatch> MatchResult;
|
|
|
|
public:
|
|
Seed()
|
|
|
|
{}
|
|
|
|
void setMoleculeFragment(const Seed& src) {
|
|
MoleculeFragment = src.MoleculeFragment;
|
|
}
|
|
Seed& operator=(const Seed& src) {
|
|
NewBonds = src.NewBonds;
|
|
GrowingStage = src.GrowingStage;
|
|
MoleculeFragment = src.MoleculeFragment;
|
|
Topology = src.Topology;
|
|
ExcludedBonds = src.ExcludedBonds;
|
|
LastAddedAtomsBeginIdx = src.LastAddedAtomsBeginIdx;
|
|
LastAddedBondsBeginIdx = src.LastAddedBondsBeginIdx;
|
|
RemainingBonds = src.RemainingBonds;
|
|
RemainingAtoms = src.RemainingAtoms;
|
|
StoreAllDegenerateMCS = src.StoreAllDegenerateMCS;
|
|
#ifdef DUP_SUBSTRUCT_CACHE
|
|
DupCacheKey = src.DupCacheKey;
|
|
#endif
|
|
MatchResult = src.MatchResult;
|
|
CopyComplete = true; // LAST
|
|
return *this;
|
|
}
|
|
void createFromParent(const Seed* parent) {
|
|
MoleculeFragment = parent->MoleculeFragment;
|
|
Topology = parent->Topology;
|
|
ExcludedBonds = parent->ExcludedBonds;
|
|
RemainingBonds = parent->RemainingBonds;
|
|
RemainingAtoms = parent->RemainingAtoms;
|
|
StoreAllDegenerateMCS = parent->StoreAllDegenerateMCS;
|
|
#ifdef DUP_SUBSTRUCT_CACHE
|
|
DupCacheKey = parent->DupCacheKey;
|
|
#endif
|
|
LastAddedAtomsBeginIdx = getNumAtoms(); // previous size
|
|
LastAddedBondsBeginIdx = getNumBonds(); // previous size
|
|
GrowingStage = 0;
|
|
}
|
|
|
|
unsigned int getNumAtoms() const { return MoleculeFragment.Atoms.size(); }
|
|
unsigned int getNumBonds() const { return MoleculeFragment.Bonds.size(); }
|
|
|
|
void grow(MaximumCommonSubgraph& mcs) const;
|
|
bool canGrowBiggerThan(unsigned int maxBonds, unsigned int maxAtoms) const {
|
|
return RemainingBonds + getNumBonds() > maxBonds ||
|
|
(RemainingBonds + getNumBonds() == maxBonds &&
|
|
(RemainingAtoms + getNumAtoms() > maxAtoms ||
|
|
(StoreAllDegenerateMCS &&
|
|
RemainingAtoms + getNumAtoms() == maxAtoms)));
|
|
}
|
|
void computeRemainingSize(const ROMol& qmol);
|
|
|
|
unsigned int addAtom(const Atom* atom);
|
|
unsigned int addBond(const Bond* bond);
|
|
void fillNewBonds(const ROMol& qmol,
|
|
MaximumCommonSubgraph* mcs = nullptr) const;
|
|
void setStoreAllDegenerateMCS(bool value) { StoreAllDegenerateMCS = value; }
|
|
};
|
|
} // namespace FMCS
|
|
} // namespace RDKit
|