mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-03 21:44:30 +08:00
278 lines
12 KiB
C++
278 lines
12 KiB
C++
//
|
|
// Copyright (C) 2018-2023 Susan H. Leung and other RDKit contributors
|
|
//
|
|
// @@ All Rights Reserved @@
|
|
// This file is part of the RDKit.
|
|
// The contents are covered by the terms of the BSD license
|
|
// which is included in the file license.txt, found at the root
|
|
// of the RDKit source tree.
|
|
//
|
|
/*! \file MolStandardize.h
|
|
|
|
\brief Defines the CleanupParameters and some convenience functions.
|
|
|
|
*/
|
|
#include <RDGeneral/export.h>
|
|
#ifndef RD_MOLSTANDARDIZE_H
|
|
#define RD_MOLSTANDARDIZE_H
|
|
|
|
#include <string>
|
|
#include <GraphMol/RDKitBase.h>
|
|
#include <GraphMol/MolStandardize/Metal.h>
|
|
|
|
namespace RDKit {
|
|
class RWMol;
|
|
class ROMol;
|
|
|
|
namespace MolStandardize {
|
|
|
|
//! The CleanupParameters structure defines the default parameters for the
|
|
/// cleanup process and also allows the user to customize the process by
|
|
/// changing the parameters.
|
|
/*!
|
|
|
|
<b>Notes:</b>
|
|
- To customize the parameters, the structure must be initialized first.
|
|
(Another on the TODO list)
|
|
- For this project, not all the parameters have been revealed.
|
|
(TODO)
|
|
|
|
*/
|
|
struct RDKIT_MOLSTANDARDIZE_EXPORT CleanupParameters {
|
|
// TODO reveal all parameters
|
|
private:
|
|
const char *rdbase_cstr = std::getenv("RDBASE");
|
|
|
|
public:
|
|
std::string rdbase = rdbase_cstr != nullptr ? rdbase_cstr : "";
|
|
std::string normalizations;
|
|
std::string acidbaseFile;
|
|
std::string fragmentFile;
|
|
std::string tautomerTransforms;
|
|
int maxRestarts{200}; //!< The maximum number of times to attempt to apply
|
|
//!< the series of normalizations (default 200).
|
|
bool preferOrganic{false}; //!< Whether to prioritize organic fragments when
|
|
//!< choosing fragment parent (default False).
|
|
bool doCanonical{true}; //!< Whether to apply normalizations in a
|
|
//!< canonical order
|
|
int maxTautomers{1000}; //!< The maximum number of tautomers to enumerate
|
|
//!< (default 1000).
|
|
int maxTransforms{1000}; //!< The maximum number of tautomer
|
|
//!< transformations to apply (default 1000).
|
|
bool tautomerRemoveSp3Stereo{
|
|
true}; //!< Whether to remove stereochemistry from sp3 centers involved
|
|
//!< in tautomerism (defaults to true)
|
|
bool tautomerRemoveBondStereo{
|
|
true}; //!< Whether to remove stereochemistry from double bonds involved
|
|
//!< in tautomerism (defaults to true)
|
|
bool tautomerRemoveIsotopicHs{
|
|
true}; //!< Whether to remove isotopic Hs from centers involved in
|
|
//!< tautomerism (defaults to true)
|
|
bool tautomerReassignStereo{
|
|
true}; //!< Whether enumerate() should call assignStereochemistry on all
|
|
//!< generated tautomers (defaults to true)
|
|
bool largestFragmentChooserUseAtomCount{
|
|
true}; //!< Whether LargestFragmentChooser should use atom count as main
|
|
//!< criterion before MW (defaults to true)
|
|
bool largestFragmentChooserCountHeavyAtomsOnly{
|
|
false}; //!< Whether LargestFragmentChooser should only count heavy atoms
|
|
//!< (defaults to false)
|
|
std::vector<std::pair<std::string, std::string>> normalizationData;
|
|
std::vector<std::pair<std::string, std::string>> fragmentData;
|
|
std::vector<std::tuple<std::string, std::string, std::string>> acidbaseData;
|
|
std::vector<std::tuple<std::string, std::string, std::string, std::string>>
|
|
tautomerTransformData;
|
|
CleanupParameters() {}
|
|
};
|
|
|
|
RDKIT_MOLSTANDARDIZE_EXPORT extern const CleanupParameters
|
|
defaultCleanupParameters;
|
|
|
|
RDKIT_MOLSTANDARDIZE_EXPORT void updateCleanupParamsFromJSON(
|
|
CleanupParameters ¶ms, const std::string &json);
|
|
|
|
//! The cleanup function is equivalent to the
|
|
/// molvs.Standardizer().standardize(mol) function. It calls the same steps,
|
|
/// namely: RemoveHs, RDKit SanitizeMol, MetalDisconnector, Normalizer,
|
|
/// Reionizer, RDKit AssignStereochemistry.
|
|
RDKIT_MOLSTANDARDIZE_EXPORT RWMol *cleanup(
|
|
const RWMol *mol,
|
|
const CleanupParameters ¶ms = defaultCleanupParameters);
|
|
//! \overload
|
|
inline RWMol *cleanup(const RWMol &mol, const CleanupParameters ¶ms =
|
|
defaultCleanupParameters) {
|
|
return cleanup(&mol, params);
|
|
};
|
|
//! Works the same as cleanup(mol)
|
|
RDKIT_MOLSTANDARDIZE_EXPORT void cleanupInPlace(
|
|
RWMol &mol, const CleanupParameters ¶ms = defaultCleanupParameters);
|
|
//! Operates on multiple molecules
|
|
RDKIT_MOLSTANDARDIZE_EXPORT void cleanupInPlace(
|
|
std::vector<RWMol *> &mols, int numThreads = 1,
|
|
const CleanupParameters ¶ms = defaultCleanupParameters);
|
|
|
|
//! Works the same as Normalizer().normalize(mol)
|
|
RDKIT_MOLSTANDARDIZE_EXPORT RWMol *normalize(
|
|
const RWMol *mol,
|
|
const CleanupParameters ¶ms = defaultCleanupParameters);
|
|
//! Works the same as Normalizer().normalizeInPlace(mol)
|
|
RDKIT_MOLSTANDARDIZE_EXPORT void normalizeInPlace(
|
|
RWMol &mol, const CleanupParameters ¶ms = defaultCleanupParameters);
|
|
//! Operates on multiple molecules
|
|
RDKIT_MOLSTANDARDIZE_EXPORT void normalizeInPlace(
|
|
std::vector<RWMol *> &mols, int numThreads = 1,
|
|
const CleanupParameters ¶ms = defaultCleanupParameters);
|
|
|
|
//! Works the same as Reionizer().reionize(mol)
|
|
RDKIT_MOLSTANDARDIZE_EXPORT RWMol *reionize(
|
|
const RWMol *mol,
|
|
const CleanupParameters ¶ms = defaultCleanupParameters);
|
|
//! Works the same as Reionizer().reionizeInPlace(mol)
|
|
RDKIT_MOLSTANDARDIZE_EXPORT void reionizeInPlace(
|
|
RWMol &mol, const CleanupParameters ¶ms = defaultCleanupParameters);
|
|
//! Operates on multiple molecules
|
|
RDKIT_MOLSTANDARDIZE_EXPORT void reionizeInPlace(
|
|
std::vector<RWMol *> &mols, int numThreads = 1,
|
|
const CleanupParameters ¶ms = defaultCleanupParameters);
|
|
|
|
//! Works the same as FragmentRemover().remove(mol)
|
|
RDKIT_MOLSTANDARDIZE_EXPORT RWMol *removeFragments(
|
|
const RWMol *mol,
|
|
const CleanupParameters ¶ms = defaultCleanupParameters);
|
|
//! Works the same as FragmentRemover().removeInPlace(mol)
|
|
RDKIT_MOLSTANDARDIZE_EXPORT void removeFragmentsInPlace(
|
|
RWMol &mol, const CleanupParameters ¶ms = defaultCleanupParameters);
|
|
//! Operates on multiple molecules
|
|
RDKIT_MOLSTANDARDIZE_EXPORT void removeFragmentsInPlace(
|
|
std::vector<RWMol *> &mols, int numThreads = 1,
|
|
const CleanupParameters ¶ms = defaultCleanupParameters);
|
|
|
|
//! Works the same as TautomerEnumerator().canonicalize(mol)
|
|
RDKIT_MOLSTANDARDIZE_EXPORT RWMol *canonicalTautomer(
|
|
const RWMol *mol,
|
|
const CleanupParameters ¶ms = defaultCleanupParameters);
|
|
RDKIT_MOLSTANDARDIZE_EXPORT void canonicalTautomerInPlace(
|
|
RWMol &mol, const CleanupParameters ¶ms = defaultCleanupParameters);
|
|
|
|
//! Returns the tautomer parent of a given molecule. The fragment parent is the
|
|
/// standardized canonical tautomer of the molecule
|
|
RDKIT_MOLSTANDARDIZE_EXPORT RWMol *tautomerParent(
|
|
const RWMol &mol,
|
|
const CleanupParameters ¶ms = defaultCleanupParameters,
|
|
bool skipStandardize = false);
|
|
RDKIT_MOLSTANDARDIZE_EXPORT void tautomerParentInPlace(
|
|
RWMol &mol, const CleanupParameters ¶ms = defaultCleanupParameters,
|
|
bool skipStandardize = false);
|
|
RDKIT_MOLSTANDARDIZE_EXPORT void tautomerParentInPlace(
|
|
std::vector<RWMol *> &mols, int numThreads = 1,
|
|
const CleanupParameters ¶ms = defaultCleanupParameters,
|
|
bool skipStandardize = false);
|
|
|
|
//! Returns the fragment parent of a given molecule. The fragment parent is the
|
|
/// largest organic covalent unit in the molecule.
|
|
RDKIT_MOLSTANDARDIZE_EXPORT RWMol *fragmentParent(
|
|
const RWMol &mol,
|
|
const CleanupParameters ¶ms = defaultCleanupParameters,
|
|
bool skip_standardize = false);
|
|
RDKIT_MOLSTANDARDIZE_EXPORT void fragmentParentInPlace(
|
|
RWMol &mol, const CleanupParameters ¶ms = defaultCleanupParameters,
|
|
bool skip_standardize = false);
|
|
RDKIT_MOLSTANDARDIZE_EXPORT void fragmentParentInPlace(
|
|
std::vector<RWMol *> &mols, int numThreads = 1,
|
|
const CleanupParameters ¶ms = defaultCleanupParameters,
|
|
bool skip_standardize = false);
|
|
|
|
//! calls removeStereochemistry() on the given molecule
|
|
RDKIT_MOLSTANDARDIZE_EXPORT RWMol *stereoParent(
|
|
const RWMol &mol,
|
|
const CleanupParameters ¶ms = defaultCleanupParameters,
|
|
bool skip_standardize = false);
|
|
RDKIT_MOLSTANDARDIZE_EXPORT void stereoParentInPlace(
|
|
RWMol &mol, const CleanupParameters ¶ms = defaultCleanupParameters,
|
|
bool skip_standardize = false);
|
|
RDKIT_MOLSTANDARDIZE_EXPORT void stereoParentInPlace(
|
|
std::vector<RWMol *> &mols, int numThreads = 1,
|
|
const CleanupParameters ¶ms = defaultCleanupParameters,
|
|
bool skip_standardize = false);
|
|
|
|
//! removes all isotopes specifications from the given molecule
|
|
RDKIT_MOLSTANDARDIZE_EXPORT RWMol *isotopeParent(
|
|
const RWMol &mol,
|
|
const CleanupParameters ¶ms = defaultCleanupParameters,
|
|
bool skip_standardize = false);
|
|
RDKIT_MOLSTANDARDIZE_EXPORT void isotopeParentInPlace(
|
|
RWMol &mol, const CleanupParameters ¶ms = defaultCleanupParameters,
|
|
bool skip_standardize = false);
|
|
RDKIT_MOLSTANDARDIZE_EXPORT void isotopeParentInPlace(
|
|
std::vector<RWMol *> &mols, int numThreads = 1,
|
|
const CleanupParameters ¶ms = defaultCleanupParameters,
|
|
bool skip_standardize = false);
|
|
|
|
//! Returns the charge parent of a given molecule. The charge parent is the
|
|
//! uncharged version of the fragment parent.
|
|
RDKIT_MOLSTANDARDIZE_EXPORT RWMol *chargeParent(
|
|
const RWMol &mol,
|
|
const CleanupParameters ¶ms = defaultCleanupParameters,
|
|
bool skip_standardize = false);
|
|
RDKIT_MOLSTANDARDIZE_EXPORT void chargeParentInPlace(
|
|
RWMol &mol, const CleanupParameters ¶ms = defaultCleanupParameters,
|
|
bool skip_standardize = false);
|
|
//! operates on multiple molecules
|
|
RDKIT_MOLSTANDARDIZE_EXPORT void chargeParentInPlace(
|
|
std::vector<RWMol *> &mols, int numThreads = 1,
|
|
const CleanupParameters ¶ms = defaultCleanupParameters,
|
|
bool skip_standardize = false);
|
|
|
|
//! Returns the super parent. The super parent is the fragment, charge,
|
|
//! isotope, stereo, and tautomer parent of the molecule.
|
|
RDKIT_MOLSTANDARDIZE_EXPORT RWMol *superParent(
|
|
const RWMol &mol,
|
|
const CleanupParameters ¶ms = defaultCleanupParameters,
|
|
bool skip_standardize = false);
|
|
RDKIT_MOLSTANDARDIZE_EXPORT void superParentInPlace(
|
|
RWMol &mol, const CleanupParameters ¶ms = defaultCleanupParameters,
|
|
bool skip_standardize = false);
|
|
RDKIT_MOLSTANDARDIZE_EXPORT void superParentInPlace(
|
|
std::vector<RWMol *> &mols, int numThreads = 1,
|
|
const CleanupParameters ¶ms = defaultCleanupParameters,
|
|
bool skip_standardize = false);
|
|
|
|
//! Convenience function for quickly standardizing a single SMILES string.
|
|
/// Returns a standardized canonical SMILES string given a SMILES string.
|
|
/// This is the equivalent of calling cleanup() on each of the molecules
|
|
RDKIT_MOLSTANDARDIZE_EXPORT std::string standardizeSmiles(
|
|
const std::string &smiles);
|
|
|
|
//! Do a disconnection of an organometallic complex according to rules
|
|
//! preferred by Syngenta. All bonds to metals are broken, including
|
|
//! covalent bonds to Group I/II metals (so including Grignards, lithium
|
|
//! complexes etc.). The ligands are left in the charge states they came
|
|
//! in with. If there are haptic bonds defined by a dummy atom bonded to
|
|
//! a metal by a bond that has a _MolFileBondEndPts (which will contain the
|
|
//! indices of the atoms involved in the haptic bond) then the dummy atom
|
|
//! is removed also.
|
|
//! Do the disconnection in place.
|
|
//! The options are splitGrignards, splitAromaticC, adjustCharges and
|
|
//! removeHapticDummies. Roll on C++20 and designated initializers!
|
|
RDKIT_MOLSTANDARDIZE_EXPORT void disconnectOrganometallics(
|
|
RWMol &mol, RDKit::MolStandardize::MetalDisconnectorOptions mdo = {
|
|
true, true, false, true});
|
|
//! As above, but returns new disconnected molecule.
|
|
RDKIT_MOLSTANDARDIZE_EXPORT ROMol *disconnectOrganometallics(
|
|
const ROMol &mol, RDKit::MolStandardize::MetalDisconnectorOptions mdo = {
|
|
true, true, false, true});
|
|
//! As above, included for API consistency.
|
|
inline void disconnectOrganometallicsInPlace(
|
|
RWMol &mol, RDKit::MolStandardize::MetalDisconnectorOptions mdo = {
|
|
true, true, false, true}) {
|
|
disconnectOrganometallics(mol, mdo);
|
|
};
|
|
|
|
//! TODO
|
|
RDKIT_MOLSTANDARDIZE_EXPORT std::vector<std::string> enumerateTautomerSmiles(
|
|
const std::string &smiles,
|
|
const CleanupParameters ¶ms = defaultCleanupParameters);
|
|
}; // namespace MolStandardize
|
|
} // namespace RDKit
|
|
#endif
|