mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-03 21:44:30 +08:00
* Allow the same core to match more than once in a molecule. * Update annotation. * Changes after review. --------- Co-authored-by: David Cosgrove <david@cozchemix.co.uk>
118 lines
4.4 KiB
C++
118 lines
4.4 KiB
C++
//
|
|
// Copyright (c) 2017-2023, Novartis Institutes for BioMedical Research Inc.
|
|
// and other RDKit contributors
|
|
//
|
|
// @@ All Rights Reserved @@
|
|
// This file is part of the RDKit.
|
|
// The contents are covered by the terms of the BSD license
|
|
// which is included in the file license.txt, found at the root
|
|
// of the RDKit source tree.
|
|
//
|
|
|
|
#ifndef RDKIT_RGROUPDECOMPPARAMS_H
|
|
#define RDKIT_RGROUPDECOMPPARAMS_H
|
|
|
|
#include "../RDKitBase.h"
|
|
#include <GraphMol/Substruct/SubstructMatch.h>
|
|
#include <RDGeneral/BetterEnums.h>
|
|
|
|
namespace RDKit {
|
|
|
|
BETTER_ENUM(
|
|
RGroupLabels, unsigned int, IsotopeLabels = 0x01, AtomMapLabels = 0x02,
|
|
AtomIndexLabels = 0x04, RelabelDuplicateLabels = 0x08,
|
|
MDLRGroupLabels = 0x10,
|
|
DummyAtomLabels = 0x20, // These are rgroups but will get relabelled
|
|
AutoDetect = 0xFF);
|
|
|
|
BETTER_ENUM(RGroupMatching, unsigned int, Greedy = 0x01, GreedyChunks = 0x02,
|
|
Exhaustive = 0x04, // not really useful for large sets
|
|
NoSymmetrization = 0x08, GA = 0x10);
|
|
|
|
BETTER_ENUM(RGroupLabelling, unsigned int, AtomMap = 0x01, Isotope = 0x02,
|
|
MDLRGroup = 0x04);
|
|
|
|
BETTER_ENUM(RGroupCoreAlignment, unsigned int, NoAlignment = 0x0, MCS = 0x01);
|
|
|
|
BETTER_ENUM(RGroupScore, unsigned int, Match = 0x1, FingerprintVariance = 0x4);
|
|
|
|
struct RDKIT_RGROUPDECOMPOSITION_EXPORT RGroupDecompositionParameters {
|
|
unsigned int labels = RGroupLabels::AutoDetect;
|
|
unsigned int matchingStrategy = RGroupMatching::GreedyChunks;
|
|
unsigned int scoreMethod = RGroupScore::Match;
|
|
unsigned int rgroupLabelling =
|
|
RGroupLabelling::AtomMap | RGroupLabelling::MDLRGroup;
|
|
unsigned int alignment = RGroupCoreAlignment::MCS;
|
|
|
|
unsigned int chunkSize = 5;
|
|
//! only allow rgroup decomposition at the specified rgroups
|
|
bool onlyMatchAtRGroups = false;
|
|
//! remove all user-defined rgroups that only have hydrogens
|
|
bool removeAllHydrogenRGroups = true;
|
|
//! remove all user-defined rgroups that only have hydrogens,
|
|
//! and also remove the corresponding labels from the core
|
|
bool removeAllHydrogenRGroupsAndLabels = true;
|
|
//! remove all hydrogens from the output molecules
|
|
bool removeHydrogensPostMatch = true;
|
|
//! allow labelled Rgroups of degree 2 or more
|
|
bool allowNonTerminalRGroups = false;
|
|
//! unlabelled core atoms can have multiple rgroups
|
|
bool allowMultipleRGroupsOnUnlabelled = false;
|
|
//! Permit a core to match more than once in the same molecule if the sets of
|
|
// matched atoms are not equal.
|
|
bool allowMultipleCoresInSameMol = false;
|
|
// extended query settings for core matching
|
|
bool doTautomers = false;
|
|
bool doEnumeration = false;
|
|
//! include target molecule (featuring explicit hydrogens where they
|
|
//! coincide with R groups in the core) into RGD results,
|
|
//! and set _rgroupTargetAtoms and _rgroupTargetBonds properties
|
|
//! on R groups and core as vectors of target atom and bond indices
|
|
//! to enable highlighting for SAR analysis (see
|
|
//! https://greglandrum.github.io/rdkit-blog/posts/2021-08-07-rgd-and-highlighting.html)
|
|
bool includeTargetMolInResults = false;
|
|
|
|
double timeout = -1.0; ///< timeout in seconds. <=0 indicates no timeout
|
|
|
|
// Determine how to assign the rgroup labels from the given core
|
|
unsigned int autoGetLabels(const RWMol &);
|
|
|
|
// Prepare the core for substructure searching and rgroup assignment
|
|
bool prepareCore(RWMol &, const RWMol *alignCore);
|
|
|
|
// Add r groups to unlabelled atoms if allowMultipleRGroupsOnUnlabelled is set
|
|
void addDummyAtomsToUnlabelledCoreAtoms(RWMol &core);
|
|
|
|
// Parameters specific to GA
|
|
|
|
// GA population size or -1 to use best guess
|
|
int gaPopulationSize = -1;
|
|
// GA maximum number of operations or -1 to use best guess
|
|
int gaMaximumOperations = -1;
|
|
// GA number of operations permitted without improvement before exiting (-1
|
|
// for best guess)
|
|
int gaNumberOperationsWithoutImprovement = -1;
|
|
// GA random number seed (-1 for default, -2 for random seed)
|
|
int gaRandomSeed = -1;
|
|
// Number of runs
|
|
int gaNumberRuns = 1;
|
|
// Sequential or parallel runs?
|
|
#ifdef RDK_BUILD_THREADSAFE_SSS
|
|
bool gaParallelRuns = true;
|
|
#else
|
|
bool gaParallelRuns = false;
|
|
#endif
|
|
// Controls the way substructure matching with the core is done
|
|
SubstructMatchParameters substructmatchParams;
|
|
|
|
RGroupDecompositionParameters() { substructmatchParams.useChirality = true; }
|
|
|
|
private:
|
|
int indexOffset{-1};
|
|
void checkNonTerminal(const Atom &atom) const;
|
|
};
|
|
|
|
} // namespace RDKit
|
|
|
|
#endif // RDKIT_RGROUPDECOMPPARAMS_H
|