// // Copyright (C) 2018 Susan H. Leung // // @@ All Rights Reserved @@ // This file is part of the RDKit. // The contents are covered by the terms of the BSD license // which is included in the file license.txt, found at the root // of the RDKit source tree. // #include "MolStandardize.h" #include "Metal.h" #include "Normalize.h" #include "Tautomer.h" #include "Fragment.h" #include #include #include #include #include #include "Charge.h" #include #include #include #include #include #include using namespace std; namespace RDKit { namespace MolStandardize { const CleanupParameters defaultCleanupParameters; #define PT_OPT_GET(opt) params.opt = pt.get(#opt, params.opt) void updateCleanupParamsFromJSON(CleanupParameters ¶ms, const std::string &json) { if (json.empty()) { return; } std::istringstream ss; ss.str(json); boost::property_tree::ptree pt; boost::property_tree::read_json(ss, pt); PT_OPT_GET(rdbase); PT_OPT_GET(normalizations); PT_OPT_GET(acidbaseFile); PT_OPT_GET(fragmentFile); PT_OPT_GET(tautomerTransforms); PT_OPT_GET(maxRestarts); PT_OPT_GET(preferOrganic); PT_OPT_GET(doCanonical); PT_OPT_GET(maxTautomers); PT_OPT_GET(maxTransforms); PT_OPT_GET(tautomerRemoveSp3Stereo); PT_OPT_GET(tautomerRemoveBondStereo); PT_OPT_GET(tautomerRemoveIsotopicHs); PT_OPT_GET(tautomerReassignStereo); { const auto norm_tfs = pt.get_child_optional("normalizationData"); if (norm_tfs) { for (const auto &entry : *norm_tfs) { std::string nm = entry.second.get("name", ""); std::string smarts = entry.second.get("smarts", ""); if (nm.empty() || smarts.empty()) { BOOST_LOG(rdWarningLog) << " empty transformation name or SMARTS" << std::endl; continue; } params.normalizationData.push_back(std::make_pair(nm, smarts)); } } } { const auto frag_tfs = pt.get_child_optional("fragmentData"); if (frag_tfs) { for (const auto &entry : *frag_tfs) { std::string nm = entry.second.get("name", ""); std::string smarts = entry.second.get("smarts", ""); if (nm.empty() || smarts.empty()) { BOOST_LOG(rdWarningLog) << " empty transformation name or SMARTS" << std::endl; continue; } params.fragmentData.push_back(std::make_pair(nm, smarts)); } } } { const auto ab_data = pt.get_child_optional("acidbaseData"); if (ab_data) { for (const auto &entry : *ab_data) { std::string nm = entry.second.get("name", ""); std::string acid = entry.second.get("acid", ""); std::string base = entry.second.get("base", ""); if (nm.empty() || acid.empty() || base.empty()) { BOOST_LOG(rdWarningLog) << " empty component in acidbaseData" << std::endl; continue; } params.acidbaseData.push_back(std::make_tuple(nm, acid, base)); } } } { const auto taut_data = pt.get_child_optional("tautomerTransformData"); if (taut_data) { for (const auto &entry : *taut_data) { std::string nm = entry.second.get("name", ""); std::string smarts = entry.second.get("smarts", ""); std::string bonds = entry.second.get("bonds", ""); std::string charges = entry.second.get("charges", ""); if (nm.empty() || smarts.empty()) { BOOST_LOG(rdWarningLog) << " empty component in tautomerTransformData" << std::endl; continue; } params.tautomerTransformData.push_back( std::make_tuple(nm, smarts, bonds, charges)); } } } } RWMol *cleanup(const RWMol *mol, const CleanupParameters ¶ms) { RWMol m(*mol); MolOps::removeHs(m); MolStandardize::MetalDisconnector md; md.disconnect(m); RWMOL_SPTR normalized(MolStandardize::normalize(&m, params)); RWMol *reionized = MolStandardize::reionize(normalized.get(), params); bool cleanIt = true; bool force = true; MolOps::assignStereochemistry(*reionized, cleanIt, force); // update properties of reionized using m. reionized->updateProps(m); return reionized; } RWMol *tautomerParent(const RWMol &mol, const CleanupParameters ¶ms, bool skip_standardize) { const RWMol *cleaned = nullptr; std::unique_ptr cleanedHolder; if (!skip_standardize) { cleanedHolder.reset(cleanup(mol, params)); cleaned = cleanedHolder.get(); } else { cleaned = &mol; } std::unique_ptr ct{canonicalTautomer(cleaned, params)}; return cleanup(ct.get(), params); } // Return the fragment parent of a given molecule. // The fragment parent is the largest organic covalent unit in the molecule. // RWMol *fragmentParent(const RWMol &mol, const CleanupParameters ¶ms, bool skip_standardize) { const RWMol *cleaned = nullptr; std::unique_ptr cleanedHolder; if (!skip_standardize) { cleanedHolder.reset(cleanup(mol, params)); cleaned = cleanedHolder.get(); } else { cleaned = &mol; } LargestFragmentChooser lfragchooser(params.preferOrganic); return static_cast(lfragchooser.choose(*cleaned)); } RWMol *stereoParent(const RWMol &mol, const CleanupParameters ¶ms, bool skip_standardize) { RWMol *res; if (!skip_standardize) { res = cleanup(mol, params); } else { res = new RWMol(mol); } MolOps::removeStereochemistry(*res); return res; } RWMol *isotopeParent(const RWMol &mol, const CleanupParameters ¶ms, bool skip_standardize) { RWMol *res; if (!skip_standardize) { res = cleanup(mol, params); } else { res = new RWMol(mol); } for (auto atom : res->atoms()) { atom->setIsotope(0); } return res; } RWMol *chargeParent(const RWMol &mol, const CleanupParameters ¶ms, bool skip_standardize) { // Return the charge parent of a given molecule. // The charge parent is the uncharged version of the fragment parent. RWMOL_SPTR fragparent(fragmentParent(mol, params, skip_standardize)); // if fragment... ROMol nm(*fragparent); Uncharger uncharger(params.doCanonical); ROMOL_SPTR uncharged(uncharger.uncharge(nm)); RWMol *omol = cleanup(static_cast(uncharged.get()), params); return omol; } RWMol *superParent(const RWMol &mol, const CleanupParameters ¶ms, bool skip_standardize) { std::unique_ptr res; if (!skip_standardize) { res.reset(cleanup(mol, params)); } else { res.reset(new RWMol(mol)); } // we can skip fragmentParent since the chargeParent takes care of that res.reset(chargeParent(*res, params, true)); res.reset(isotopeParent(*res, params, true)); res.reset(stereoParent(*res, params, true)); res.reset(tautomerParent(*res, params, true)); return cleanup(*res, params); } RWMol *normalize(const RWMol *mol, const CleanupParameters ¶ms) { PRECONDITION(mol, "bad molecule"); std::unique_ptr normalizer{normalizerFromParams(params)}; return static_cast(normalizer->normalize(*mol)); } RWMol *reionize(const RWMol *mol, const CleanupParameters ¶ms) { PRECONDITION(mol, "bad molecule"); std::unique_ptr reionizer{reionizerFromParams(params)}; return static_cast(reionizer->reionize(*mol)); } RWMol *removeFragments(const RWMol *mol, const CleanupParameters ¶ms) { PRECONDITION(mol, "bad molecule"); std::unique_ptr remover{fragmentRemoverFromParams(params)}; return static_cast(remover->remove(*mol)); } RWMol *canonicalTautomer(const RWMol *mol, const CleanupParameters ¶ms) { PRECONDITION(mol, "bad molecule"); std::unique_ptr te{tautomerEnumeratorFromParams(params)}; return static_cast(te->canonicalize(*mol)); } std::string standardizeSmiles(const std::string &smiles) { RWMOL_SPTR mol(SmilesToMol(smiles, 0, false)); if (!mol) { std::string message = "SMILES Parse Error: syntax error for input: " + smiles; throw ValueErrorException(message); } CleanupParameters params; RWMOL_SPTR cleaned(cleanup(*mol, params)); return MolToSmiles(*cleaned); } std::vector enumerateTautomerSmiles( const std::string &smiles, const CleanupParameters ¶ms) { std::unique_ptr mol(SmilesToMol(smiles, 0, false)); mol.reset(cleanup(mol.get(), params)); MolOps::sanitizeMol(*mol); TautomerEnumerator te(params); auto res = te.enumerate(*mol); return res.smiles(); } } // end of namespace MolStandardize } // namespace RDKit