Files
rdkit/Code/GraphMol/MolStandardize/MolStandardize.h

278 lines
12 KiB
C++

//
// Copyright (C) 2018-2023 Susan H. Leung and other RDKit contributors
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//
/*! \file MolStandardize.h
\brief Defines the CleanupParameters and some convenience functions.
*/
#include <RDGeneral/export.h>
#ifndef RD_MOLSTANDARDIZE_H
#define RD_MOLSTANDARDIZE_H
#include <string>
#include <GraphMol/RDKitBase.h>
#include <GraphMol/MolStandardize/Metal.h>
namespace RDKit {
class RWMol;
class ROMol;
namespace MolStandardize {
//! The CleanupParameters structure defines the default parameters for the
/// cleanup process and also allows the user to customize the process by
/// changing the parameters.
/*!
<b>Notes:</b>
- To customize the parameters, the structure must be initialized first.
(Another on the TODO list)
- For this project, not all the parameters have been revealed.
(TODO)
*/
struct RDKIT_MOLSTANDARDIZE_EXPORT CleanupParameters {
// TODO reveal all parameters
private:
const char *rdbase_cstr = std::getenv("RDBASE");
public:
std::string rdbase = rdbase_cstr != nullptr ? rdbase_cstr : "";
std::string normalizations;
std::string acidbaseFile;
std::string fragmentFile;
std::string tautomerTransforms;
int maxRestarts{200}; //!< The maximum number of times to attempt to apply
//!< the series of normalizations (default 200).
bool preferOrganic{false}; //!< Whether to prioritize organic fragments when
//!< choosing fragment parent (default False).
bool doCanonical{true}; //!< Whether to apply normalizations in a
//!< canonical order
int maxTautomers{1000}; //!< The maximum number of tautomers to enumerate
//!< (default 1000).
int maxTransforms{1000}; //!< The maximum number of tautomer
//!< transformations to apply (default 1000).
bool tautomerRemoveSp3Stereo{
true}; //!< Whether to remove stereochemistry from sp3 centers involved
//!< in tautomerism (defaults to true)
bool tautomerRemoveBondStereo{
true}; //!< Whether to remove stereochemistry from double bonds involved
//!< in tautomerism (defaults to true)
bool tautomerRemoveIsotopicHs{
true}; //!< Whether to remove isotopic Hs from centers involved in
//!< tautomerism (defaults to true)
bool tautomerReassignStereo{
true}; //!< Whether enumerate() should call assignStereochemistry on all
//!< generated tautomers (defaults to true)
bool largestFragmentChooserUseAtomCount{
true}; //!< Whether LargestFragmentChooser should use atom count as main
//!< criterion before MW (defaults to true)
bool largestFragmentChooserCountHeavyAtomsOnly{
false}; //!< Whether LargestFragmentChooser should only count heavy atoms
//!< (defaults to false)
std::vector<std::pair<std::string, std::string>> normalizationData;
std::vector<std::pair<std::string, std::string>> fragmentData;
std::vector<std::tuple<std::string, std::string, std::string>> acidbaseData;
std::vector<std::tuple<std::string, std::string, std::string, std::string>>
tautomerTransformData;
CleanupParameters() {}
};
RDKIT_MOLSTANDARDIZE_EXPORT extern const CleanupParameters
defaultCleanupParameters;
RDKIT_MOLSTANDARDIZE_EXPORT void updateCleanupParamsFromJSON(
CleanupParameters &params, const std::string &json);
//! The cleanup function is equivalent to the
/// molvs.Standardizer().standardize(mol) function. It calls the same steps,
/// namely: RemoveHs, RDKit SanitizeMol, MetalDisconnector, Normalizer,
/// Reionizer, RDKit AssignStereochemistry.
RDKIT_MOLSTANDARDIZE_EXPORT RWMol *cleanup(
const RWMol *mol,
const CleanupParameters &params = defaultCleanupParameters);
//! \overload
inline RWMol *cleanup(const RWMol &mol, const CleanupParameters &params =
defaultCleanupParameters) {
return cleanup(&mol, params);
};
//! Works the same as cleanup(mol)
RDKIT_MOLSTANDARDIZE_EXPORT void cleanupInPlace(
RWMol &mol, const CleanupParameters &params = defaultCleanupParameters);
//! Operates on multiple molecules
RDKIT_MOLSTANDARDIZE_EXPORT void cleanupInPlace(
std::vector<RWMol *> &mols, int numThreads = 1,
const CleanupParameters &params = defaultCleanupParameters);
//! Works the same as Normalizer().normalize(mol)
RDKIT_MOLSTANDARDIZE_EXPORT RWMol *normalize(
const RWMol *mol,
const CleanupParameters &params = defaultCleanupParameters);
//! Works the same as Normalizer().normalizeInPlace(mol)
RDKIT_MOLSTANDARDIZE_EXPORT void normalizeInPlace(
RWMol &mol, const CleanupParameters &params = defaultCleanupParameters);
//! Operates on multiple molecules
RDKIT_MOLSTANDARDIZE_EXPORT void normalizeInPlace(
std::vector<RWMol *> &mols, int numThreads = 1,
const CleanupParameters &params = defaultCleanupParameters);
//! Works the same as Reionizer().reionize(mol)
RDKIT_MOLSTANDARDIZE_EXPORT RWMol *reionize(
const RWMol *mol,
const CleanupParameters &params = defaultCleanupParameters);
//! Works the same as Reionizer().reionizeInPlace(mol)
RDKIT_MOLSTANDARDIZE_EXPORT void reionizeInPlace(
RWMol &mol, const CleanupParameters &params = defaultCleanupParameters);
//! Operates on multiple molecules
RDKIT_MOLSTANDARDIZE_EXPORT void reionizeInPlace(
std::vector<RWMol *> &mols, int numThreads = 1,
const CleanupParameters &params = defaultCleanupParameters);
//! Works the same as FragmentRemover().remove(mol)
RDKIT_MOLSTANDARDIZE_EXPORT RWMol *removeFragments(
const RWMol *mol,
const CleanupParameters &params = defaultCleanupParameters);
//! Works the same as FragmentRemover().removeInPlace(mol)
RDKIT_MOLSTANDARDIZE_EXPORT void removeFragmentsInPlace(
RWMol &mol, const CleanupParameters &params = defaultCleanupParameters);
//! Operates on multiple molecules
RDKIT_MOLSTANDARDIZE_EXPORT void removeFragmentsInPlace(
std::vector<RWMol *> &mols, int numThreads = 1,
const CleanupParameters &params = defaultCleanupParameters);
//! Works the same as TautomerEnumerator().canonicalize(mol)
RDKIT_MOLSTANDARDIZE_EXPORT RWMol *canonicalTautomer(
const RWMol *mol,
const CleanupParameters &params = defaultCleanupParameters);
RDKIT_MOLSTANDARDIZE_EXPORT void canonicalTautomerInPlace(
RWMol &mol, const CleanupParameters &params = defaultCleanupParameters);
//! Returns the tautomer parent of a given molecule. The fragment parent is the
/// standardized canonical tautomer of the molecule
RDKIT_MOLSTANDARDIZE_EXPORT RWMol *tautomerParent(
const RWMol &mol,
const CleanupParameters &params = defaultCleanupParameters,
bool skipStandardize = false);
RDKIT_MOLSTANDARDIZE_EXPORT void tautomerParentInPlace(
RWMol &mol, const CleanupParameters &params = defaultCleanupParameters,
bool skipStandardize = false);
RDKIT_MOLSTANDARDIZE_EXPORT void tautomerParentInPlace(
std::vector<RWMol *> &mols, int numThreads = 1,
const CleanupParameters &params = defaultCleanupParameters,
bool skipStandardize = false);
//! Returns the fragment parent of a given molecule. The fragment parent is the
/// largest organic covalent unit in the molecule.
RDKIT_MOLSTANDARDIZE_EXPORT RWMol *fragmentParent(
const RWMol &mol,
const CleanupParameters &params = defaultCleanupParameters,
bool skip_standardize = false);
RDKIT_MOLSTANDARDIZE_EXPORT void fragmentParentInPlace(
RWMol &mol, const CleanupParameters &params = defaultCleanupParameters,
bool skip_standardize = false);
RDKIT_MOLSTANDARDIZE_EXPORT void fragmentParentInPlace(
std::vector<RWMol *> &mols, int numThreads = 1,
const CleanupParameters &params = defaultCleanupParameters,
bool skip_standardize = false);
//! calls removeStereochemistry() on the given molecule
RDKIT_MOLSTANDARDIZE_EXPORT RWMol *stereoParent(
const RWMol &mol,
const CleanupParameters &params = defaultCleanupParameters,
bool skip_standardize = false);
RDKIT_MOLSTANDARDIZE_EXPORT void stereoParentInPlace(
RWMol &mol, const CleanupParameters &params = defaultCleanupParameters,
bool skip_standardize = false);
RDKIT_MOLSTANDARDIZE_EXPORT void stereoParentInPlace(
std::vector<RWMol *> &mols, int numThreads = 1,
const CleanupParameters &params = defaultCleanupParameters,
bool skip_standardize = false);
//! removes all isotopes specifications from the given molecule
RDKIT_MOLSTANDARDIZE_EXPORT RWMol *isotopeParent(
const RWMol &mol,
const CleanupParameters &params = defaultCleanupParameters,
bool skip_standardize = false);
RDKIT_MOLSTANDARDIZE_EXPORT void isotopeParentInPlace(
RWMol &mol, const CleanupParameters &params = defaultCleanupParameters,
bool skip_standardize = false);
RDKIT_MOLSTANDARDIZE_EXPORT void isotopeParentInPlace(
std::vector<RWMol *> &mols, int numThreads = 1,
const CleanupParameters &params = defaultCleanupParameters,
bool skip_standardize = false);
//! Returns the charge parent of a given molecule. The charge parent is the
//! uncharged version of the fragment parent.
RDKIT_MOLSTANDARDIZE_EXPORT RWMol *chargeParent(
const RWMol &mol,
const CleanupParameters &params = defaultCleanupParameters,
bool skip_standardize = false);
RDKIT_MOLSTANDARDIZE_EXPORT void chargeParentInPlace(
RWMol &mol, const CleanupParameters &params = defaultCleanupParameters,
bool skip_standardize = false);
//! operates on multiple molecules
RDKIT_MOLSTANDARDIZE_EXPORT void chargeParentInPlace(
std::vector<RWMol *> &mols, int numThreads = 1,
const CleanupParameters &params = defaultCleanupParameters,
bool skip_standardize = false);
//! Returns the super parent. The super parent is the fragment, charge,
//! isotope, stereo, and tautomer parent of the molecule.
RDKIT_MOLSTANDARDIZE_EXPORT RWMol *superParent(
const RWMol &mol,
const CleanupParameters &params = defaultCleanupParameters,
bool skip_standardize = false);
RDKIT_MOLSTANDARDIZE_EXPORT void superParentInPlace(
RWMol &mol, const CleanupParameters &params = defaultCleanupParameters,
bool skip_standardize = false);
RDKIT_MOLSTANDARDIZE_EXPORT void superParentInPlace(
std::vector<RWMol *> &mols, int numThreads = 1,
const CleanupParameters &params = defaultCleanupParameters,
bool skip_standardize = false);
//! Convenience function for quickly standardizing a single SMILES string.
/// Returns a standardized canonical SMILES string given a SMILES string.
/// This is the equivalent of calling cleanup() on each of the molecules
RDKIT_MOLSTANDARDIZE_EXPORT std::string standardizeSmiles(
const std::string &smiles);
//! Do a disconnection of an organometallic complex according to rules
//! preferred by Syngenta. All bonds to metals are broken, including
//! covalent bonds to Group I/II metals (so including Grignards, lithium
//! complexes etc.). The ligands are left in the charge states they came
//! in with. If there are haptic bonds defined by a dummy atom bonded to
//! a metal by a bond that has a _MolFileBondEndPts (which will contain the
//! indices of the atoms involved in the haptic bond) then the dummy atom
//! is removed also.
//! Do the disconnection in place.
//! The options are splitGrignards, splitAromaticC, adjustCharges and
//! removeHapticDummies. Roll on C++20 and designated initializers!
RDKIT_MOLSTANDARDIZE_EXPORT void disconnectOrganometallics(
RWMol &mol, RDKit::MolStandardize::MetalDisconnectorOptions mdo = {
true, true, false, true});
//! As above, but returns new disconnected molecule.
RDKIT_MOLSTANDARDIZE_EXPORT ROMol *disconnectOrganometallics(
const ROMol &mol, RDKit::MolStandardize::MetalDisconnectorOptions mdo = {
true, true, false, true});
//! As above, included for API consistency.
inline void disconnectOrganometallicsInPlace(
RWMol &mol, RDKit::MolStandardize::MetalDisconnectorOptions mdo = {
true, true, false, true}) {
disconnectOrganometallics(mol, mdo);
};
//! TODO
RDKIT_MOLSTANDARDIZE_EXPORT std::vector<std::string> enumerateTautomerSmiles(
const std::string &smiles,
const CleanupParameters &params = defaultCleanupParameters);
}; // namespace MolStandardize
} // namespace RDKit
#endif