// // Copyright (C) 2018-2021 Susan H. Leung and other RDKit contributors // // @@ All Rights Reserved @@ // This file is part of the RDKit. // The contents are covered by the terms of the BSD license // which is included in the file license.txt, found at the root // of the RDKit source tree. // #include "Normalize.h" #include #include #include #include #include #include #include #include #include #include #include #include using namespace std; using namespace RDKit; namespace RDKit { class RWMol; class ROMol; namespace MolStandardize { typedef boost::flyweight< boost::flyweights::key_value, boost::flyweights::no_tracking> param_filename_flyweight; typedef boost::flyweight>, TransformCatalogParams>, boost::flyweights::no_tracking> param_data_flyweight; // unsigned int MAX_RESTARTS = 200; // constructor Normalizer::Normalizer() { BOOST_LOG(rdInfoLog) << "Initializing Normalizer\n"; const TransformCatalogParams *tparams = &( param_filename_flyweight(defaultCleanupParameters.normalizations).get()); this->d_tcat = new TransformCatalog(tparams); this->MAX_RESTARTS = 200; this->d_tcat->getCatalogParams()->initializeTransforms(); } // overloaded constructor Normalizer::Normalizer(const std::string normalizeFile, const unsigned int maxRestarts) { BOOST_LOG(rdInfoLog) << "Initializing Normalizer\n"; const TransformCatalogParams *tparams = &(param_filename_flyweight(normalizeFile).get()); this->d_tcat = new TransformCatalog(tparams); this->MAX_RESTARTS = maxRestarts; this->d_tcat->getCatalogParams()->initializeTransforms(); } // overloaded constructor Normalizer::Normalizer(std::istream &normalizeStream, const unsigned int maxRestarts) { BOOST_LOG(rdInfoLog) << "Initializing Normalizer\n"; TransformCatalogParams tparams(normalizeStream); this->d_tcat = new TransformCatalog(&tparams); this->MAX_RESTARTS = maxRestarts; this->d_tcat->getCatalogParams()->initializeTransforms(); } // overloaded constructor Normalizer::Normalizer( const std::vector> &normalizations, const unsigned int maxRestarts) { BOOST_LOG(rdInfoLog) << "Initializing Normalizer\n"; const TransformCatalogParams *tparams = &(param_data_flyweight(normalizations).get()); this->d_tcat = new TransformCatalog(tparams); this->MAX_RESTARTS = maxRestarts; this->d_tcat->getCatalogParams()->initializeTransforms(); } // destructor Normalizer::~Normalizer() { delete d_tcat; } void Normalizer::normalizeInPlace(RWMol &mol) { BOOST_LOG(rdInfoLog) << "Running Normalizer\n"; PRECONDITION(this->d_tcat, ""); const TransformCatalogParams *tparams = this->d_tcat->getCatalogParams(); PRECONDITION(tparams, "no transform parameters"); if (!mol.getNumAtoms()) { return; } const std::vector> &transforms = tparams->getTransformations(); // make the transforms are compatible with the // restrictions on in-place reactions for (auto &transform : transforms) { if (transform->getNumProductTemplates() != 1 || transform->getNumReactantTemplates() != 1 || transform->getProducts()[0]->getNumAtoms() > transform->getReactants()[0]->getNumAtoms()) { throw ValueErrorException( "normalizeInPlace can only be used with transforms which have a single reactant and single product. The number of atoms in the product cannot be larger than the number of atoms in the reactant."); } } // we might want ring info if (!mol.getRingInfo()->isSymmSssr()) { MolOps::symmetrizeSSSR(mol); } for (unsigned int i = 0; i < MAX_RESTARTS; ++i) { bool loop_break = false; // Iterate through Normalization transforms and apply each in order for (auto &transform : transforms) { constexpr bool removeUnmatchedAtoms = false; if (transform->runReactant(mol, removeUnmatchedAtoms)) { BOOST_LOG(rdInfoLog) << "Rule applied: " << transform->getProp(common_properties::_Name) << "\n"; constexpr unsigned int sanitizeOps = MolOps::SANITIZE_ALL ^ MolOps::SANITIZE_CLEANUP ^ MolOps::SANITIZE_PROPERTIES; unsigned int failed; try { MolOps::sanitizeMol(mol, failed, sanitizeOps); } catch (MolSanitizeException &) { BOOST_LOG(rdInfoLog) << "FAILED sanitizeMol.\n"; } loop_break = true; break; } } // For loop finishes normally, all applicable transforms have been applied if (!loop_break) { return; } } BOOST_LOG(rdInfoLog) << "Gave up normalization after " << MAX_RESTARTS << " restarts.\n"; } ROMol *Normalizer::normalize(const ROMol &mol) { BOOST_LOG(rdInfoLog) << "Running Normalizer\n"; PRECONDITION(this->d_tcat, ""); const TransformCatalogParams *tparams = this->d_tcat->getCatalogParams(); PRECONDITION(tparams, ""); if (!mol.getNumAtoms()) { return new ROMol(mol); } const std::vector> &transforms = tparams->getTransformations(); bool sanitizeFrags = false; MOL_SPTR_VECT frags = MolOps::getMolFrags(mol, sanitizeFrags); MOL_SPTR_VECT nfrags; //( frags.size() ); for (const auto &frag : frags) { frag->updatePropertyCache(false); ROMOL_SPTR nfrag(this->normalizeFragment(*frag, transforms)); nfrags.push_back(nfrag); } auto *outmol = new ROMol(*(nfrags.back())); nfrags.pop_back(); for (const auto &nfrag : nfrags) { ROMol *tmol = combineMols(*outmol, *nfrag); delete outmol; outmol = tmol; // delete nfrag; } return outmol; } ROMOL_SPTR Normalizer::normalizeFragment( const ROMol &mol, const std::vector> &transforms) const { ROMOL_SPTR nfrag(new ROMol(mol)); if (!nfrag->getRingInfo()->isFindFastOrBetter()) { MolOps::fastFindRings(*nfrag); } std::set seenProductSmiles; for (unsigned int i = 0; i < MAX_RESTARTS; ++i) { bool loop_break = false; // Iterate through Normalization transforms and apply each in order for (auto &transform : transforms) { SmilesMolPair product = applyTransform(nfrag, *transform); if (!product.first.empty() && !seenProductSmiles.count(product.first)) { seenProductSmiles.insert(product.first); BOOST_LOG(rdInfoLog) << "Rule applied: " << transform->getProp(common_properties::_Name) << "\n"; nfrag = product.second; loop_break = true; break; } } // For loop finishes normally, all applicable transforms have been applied if (!loop_break) { return nfrag; } } BOOST_LOG(rdInfoLog) << "Gave up normalization after " << MAX_RESTARTS << " restarts.\n"; return nfrag; } SmilesMolPair Normalizer::applyTransform(const ROMOL_SPTR &mol, ChemicalReaction &transform) const { // Repeatedly apply normalization transform to molecule until no changes // occur. // // It is possible for multiple products to be produced when a rule is applied. // The rule is applied repeatedly to each of the products, until no further // changes occur or after 20 attempts. // // If there are multiple unique products after the final application, the // first product (sorted alphabetically by SMILES) is chosen. SmilesMolPair smilesMolPair{std::string(), mol}; // REVIEW: what's the source of the 20 in the next line? for (unsigned int i = 0; i < 20; ++i) { std::map pdts; std::vector products = transform.runReactants({smilesMolPair.second}); for (auto &pdt : products) { // shared_ptr p0( new RWMol(*pdt[0]) ); // std::cout << MolToSmiles(*p0) << // std::endl; unsigned int failed; try { auto *tmol = static_cast(pdt.front().get()); // we'll allow atoms with a valence that's too high to make it // through, but we should fail if we just created something that // can't, for example, be kekulized. unsigned int sanitizeOps = MolOps::SANITIZE_ALL ^ MolOps::SANITIZE_CLEANUP ^ MolOps::SANITIZE_PROPERTIES; MolOps::sanitizeMol(*tmol, failed, sanitizeOps); pdts[MolToSmiles(*tmol)] = pdt.front(); } catch (MolSanitizeException &) { BOOST_LOG(rdInfoLog) << "FAILED sanitizeMol.\n"; } } if (!pdts.empty()) { smilesMolPair = std::move(*pdts.begin()); } else { if (i) { return smilesMolPair; } return std::make_pair(std::string(), nullptr); } } return smilesMolPair; } } // namespace MolStandardize } // namespace RDKit