// // Copyright (C) 2018-2023 Susan H. Leung and other RDKit contributors // // @@ All Rights Reserved @@ // This file is part of the RDKit. // The contents are covered by the terms of the BSD license // which is included in the file license.txt, found at the root // of the RDKit source tree. // /*! \file MolStandardize.h \brief Defines the CleanupParameters and some convenience functions. */ #include #ifndef RD_MOLSTANDARDIZE_H #define RD_MOLSTANDARDIZE_H #include #include #include namespace RDKit { class RWMol; class ROMol; namespace MolStandardize { //! The CleanupParameters structure defines the default parameters for the /// cleanup process and also allows the user to customize the process by /// changing the parameters. /*! Notes: - To customize the parameters, the structure must be initialized first. (Another on the TODO list) - For this project, not all the parameters have been revealed. (TODO) */ struct RDKIT_MOLSTANDARDIZE_EXPORT CleanupParameters { // TODO reveal all parameters private: const char *rdbase_cstr = std::getenv("RDBASE"); public: std::string rdbase = rdbase_cstr != nullptr ? rdbase_cstr : ""; std::string normalizations; std::string acidbaseFile; std::string fragmentFile; std::string tautomerTransforms; int maxRestarts{200}; //!< The maximum number of times to attempt to apply //!< the series of normalizations (default 200). bool preferOrganic{false}; //!< Whether to prioritize organic fragments when //!< choosing fragment parent (default False). bool doCanonical{true}; //!< Whether to apply normalizations in a //!< canonical order int maxTautomers{1000}; //!< The maximum number of tautomers to enumerate //!< (default 1000). int maxTransforms{1000}; //!< The maximum number of tautomer //!< transformations to apply (default 1000). bool tautomerRemoveSp3Stereo{ true}; //!< Whether to remove stereochemistry from sp3 centers involved //!< in tautomerism (defaults to true) bool tautomerRemoveBondStereo{ true}; //!< Whether to remove stereochemistry from double bonds involved //!< in tautomerism (defaults to true) bool tautomerRemoveIsotopicHs{ true}; //!< Whether to remove isotopic Hs from centers involved in //!< tautomerism (defaults to true) bool tautomerReassignStereo{ true}; //!< Whether enumerate() should call assignStereochemistry on all //!< generated tautomers (defaults to true) bool largestFragmentChooserUseAtomCount{ true}; //!< Whether LargestFragmentChooser should use atom count as main //!< criterion before MW (defaults to true) bool largestFragmentChooserCountHeavyAtomsOnly{ false}; //!< Whether LargestFragmentChooser should only count heavy atoms //!< (defaults to false) std::vector> normalizationData; std::vector> fragmentData; std::vector> acidbaseData; std::vector> tautomerTransformData; CleanupParameters() {} }; RDKIT_MOLSTANDARDIZE_EXPORT extern const CleanupParameters defaultCleanupParameters; RDKIT_MOLSTANDARDIZE_EXPORT void updateCleanupParamsFromJSON( CleanupParameters ¶ms, const std::string &json); //! The cleanup function is equivalent to the /// molvs.Standardizer().standardize(mol) function. It calls the same steps, /// namely: RemoveHs, RDKit SanitizeMol, MetalDisconnector, Normalizer, /// Reionizer, RDKit AssignStereochemistry. RDKIT_MOLSTANDARDIZE_EXPORT RWMol *cleanup( const RWMol *mol, const CleanupParameters ¶ms = defaultCleanupParameters); //! \overload inline RWMol *cleanup(const RWMol &mol, const CleanupParameters ¶ms = defaultCleanupParameters) { return cleanup(&mol, params); }; //! Works the same as cleanup(mol) RDKIT_MOLSTANDARDIZE_EXPORT void cleanupInPlace( RWMol &mol, const CleanupParameters ¶ms = defaultCleanupParameters); //! Operates on multiple molecules RDKIT_MOLSTANDARDIZE_EXPORT void cleanupInPlace( std::vector &mols, int numThreads = 1, const CleanupParameters ¶ms = defaultCleanupParameters); //! Works the same as Normalizer().normalize(mol) RDKIT_MOLSTANDARDIZE_EXPORT RWMol *normalize( const RWMol *mol, const CleanupParameters ¶ms = defaultCleanupParameters); //! Works the same as Normalizer().normalizeInPlace(mol) RDKIT_MOLSTANDARDIZE_EXPORT void normalizeInPlace( RWMol &mol, const CleanupParameters ¶ms = defaultCleanupParameters); //! Operates on multiple molecules RDKIT_MOLSTANDARDIZE_EXPORT void normalizeInPlace( std::vector &mols, int numThreads = 1, const CleanupParameters ¶ms = defaultCleanupParameters); //! Works the same as Reionizer().reionize(mol) RDKIT_MOLSTANDARDIZE_EXPORT RWMol *reionize( const RWMol *mol, const CleanupParameters ¶ms = defaultCleanupParameters); //! Works the same as Reionizer().reionizeInPlace(mol) RDKIT_MOLSTANDARDIZE_EXPORT void reionizeInPlace( RWMol &mol, const CleanupParameters ¶ms = defaultCleanupParameters); //! Operates on multiple molecules RDKIT_MOLSTANDARDIZE_EXPORT void reionizeInPlace( std::vector &mols, int numThreads = 1, const CleanupParameters ¶ms = defaultCleanupParameters); //! Works the same as FragmentRemover().remove(mol) RDKIT_MOLSTANDARDIZE_EXPORT RWMol *removeFragments( const RWMol *mol, const CleanupParameters ¶ms = defaultCleanupParameters); //! Works the same as FragmentRemover().removeInPlace(mol) RDKIT_MOLSTANDARDIZE_EXPORT void removeFragmentsInPlace( RWMol &mol, const CleanupParameters ¶ms = defaultCleanupParameters); //! Operates on multiple molecules RDKIT_MOLSTANDARDIZE_EXPORT void removeFragmentsInPlace( std::vector &mols, int numThreads = 1, const CleanupParameters ¶ms = defaultCleanupParameters); //! Works the same as TautomerEnumerator().canonicalize(mol) RDKIT_MOLSTANDARDIZE_EXPORT RWMol *canonicalTautomer( const RWMol *mol, const CleanupParameters ¶ms = defaultCleanupParameters); RDKIT_MOLSTANDARDIZE_EXPORT void canonicalTautomerInPlace( RWMol &mol, const CleanupParameters ¶ms = defaultCleanupParameters); //! Returns the tautomer parent of a given molecule. The fragment parent is the /// standardized canonical tautomer of the molecule RDKIT_MOLSTANDARDIZE_EXPORT RWMol *tautomerParent( const RWMol &mol, const CleanupParameters ¶ms = defaultCleanupParameters, bool skipStandardize = false); RDKIT_MOLSTANDARDIZE_EXPORT void tautomerParentInPlace( RWMol &mol, const CleanupParameters ¶ms = defaultCleanupParameters, bool skipStandardize = false); RDKIT_MOLSTANDARDIZE_EXPORT void tautomerParentInPlace( std::vector &mols, int numThreads = 1, const CleanupParameters ¶ms = defaultCleanupParameters, bool skipStandardize = false); //! Returns the fragment parent of a given molecule. The fragment parent is the /// largest organic covalent unit in the molecule. RDKIT_MOLSTANDARDIZE_EXPORT RWMol *fragmentParent( const RWMol &mol, const CleanupParameters ¶ms = defaultCleanupParameters, bool skip_standardize = false); RDKIT_MOLSTANDARDIZE_EXPORT void fragmentParentInPlace( RWMol &mol, const CleanupParameters ¶ms = defaultCleanupParameters, bool skip_standardize = false); RDKIT_MOLSTANDARDIZE_EXPORT void fragmentParentInPlace( std::vector &mols, int numThreads = 1, const CleanupParameters ¶ms = defaultCleanupParameters, bool skip_standardize = false); //! calls removeStereochemistry() on the given molecule RDKIT_MOLSTANDARDIZE_EXPORT RWMol *stereoParent( const RWMol &mol, const CleanupParameters ¶ms = defaultCleanupParameters, bool skip_standardize = false); RDKIT_MOLSTANDARDIZE_EXPORT void stereoParentInPlace( RWMol &mol, const CleanupParameters ¶ms = defaultCleanupParameters, bool skip_standardize = false); RDKIT_MOLSTANDARDIZE_EXPORT void stereoParentInPlace( std::vector &mols, int numThreads = 1, const CleanupParameters ¶ms = defaultCleanupParameters, bool skip_standardize = false); //! removes all isotopes specifications from the given molecule RDKIT_MOLSTANDARDIZE_EXPORT RWMol *isotopeParent( const RWMol &mol, const CleanupParameters ¶ms = defaultCleanupParameters, bool skip_standardize = false); RDKIT_MOLSTANDARDIZE_EXPORT void isotopeParentInPlace( RWMol &mol, const CleanupParameters ¶ms = defaultCleanupParameters, bool skip_standardize = false); RDKIT_MOLSTANDARDIZE_EXPORT void isotopeParentInPlace( std::vector &mols, int numThreads = 1, const CleanupParameters ¶ms = defaultCleanupParameters, bool skip_standardize = false); //! Returns the charge parent of a given molecule. The charge parent is the //! uncharged version of the fragment parent. RDKIT_MOLSTANDARDIZE_EXPORT RWMol *chargeParent( const RWMol &mol, const CleanupParameters ¶ms = defaultCleanupParameters, bool skip_standardize = false); RDKIT_MOLSTANDARDIZE_EXPORT void chargeParentInPlace( RWMol &mol, const CleanupParameters ¶ms = defaultCleanupParameters, bool skip_standardize = false); //! operates on multiple molecules RDKIT_MOLSTANDARDIZE_EXPORT void chargeParentInPlace( std::vector &mols, int numThreads = 1, const CleanupParameters ¶ms = defaultCleanupParameters, bool skip_standardize = false); //! Returns the super parent. The super parent is the fragment, charge, //! isotope, stereo, and tautomer parent of the molecule. RDKIT_MOLSTANDARDIZE_EXPORT RWMol *superParent( const RWMol &mol, const CleanupParameters ¶ms = defaultCleanupParameters, bool skip_standardize = false); RDKIT_MOLSTANDARDIZE_EXPORT void superParentInPlace( RWMol &mol, const CleanupParameters ¶ms = defaultCleanupParameters, bool skip_standardize = false); RDKIT_MOLSTANDARDIZE_EXPORT void superParentInPlace( std::vector &mols, int numThreads = 1, const CleanupParameters ¶ms = defaultCleanupParameters, bool skip_standardize = false); //! Convenience function for quickly standardizing a single SMILES string. /// Returns a standardized canonical SMILES string given a SMILES string. /// This is the equivalent of calling cleanup() on each of the molecules RDKIT_MOLSTANDARDIZE_EXPORT std::string standardizeSmiles( const std::string &smiles); //! Do a disconnection of an organometallic complex according to rules //! preferred by Syngenta. All bonds to metals are broken, including //! covalent bonds to Group I/II metals (so including Grignards, lithium //! complexes etc.). The ligands are left in the charge states they came //! in with. If there are haptic bonds defined by a dummy atom bonded to //! a metal by a bond that has a _MolFileBondEndPts (which will contain the //! indices of the atoms involved in the haptic bond) then the dummy atom //! is removed also. //! Do the disconnection in place. //! The options are splitGrignards, splitAromaticC, adjustCharges and //! removeHapticDummies. Roll on C++20 and designated initializers! RDKIT_MOLSTANDARDIZE_EXPORT void disconnectOrganometallics( RWMol &mol, RDKit::MolStandardize::MetalDisconnectorOptions mdo = { true, true, false, true}); //! As above, but returns new disconnected molecule. RDKIT_MOLSTANDARDIZE_EXPORT ROMol *disconnectOrganometallics( const ROMol &mol, RDKit::MolStandardize::MetalDisconnectorOptions mdo = { true, true, false, true}); //! As above, included for API consistency. inline void disconnectOrganometallicsInPlace( RWMol &mol, RDKit::MolStandardize::MetalDisconnectorOptions mdo = { true, true, false, true}) { disconnectOrganometallics(mol, mdo); }; //! TODO RDKIT_MOLSTANDARDIZE_EXPORT std::vector enumerateTautomerSmiles( const std::string &smiles, const CleanupParameters ¶ms = defaultCleanupParameters); }; // namespace MolStandardize } // namespace RDKit #endif