// Copyright (c) 2017-2021, Novartis Institutes for BioMedical Research Inc. // and other RDKit contributors // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following // disclaimer in the documentation and/or other materials provided // with the distribution. // * Neither the name of Novartis Institutes for BioMedical Research Inc. // nor the names of its contributors may be used to endorse or promote // products derived from this software without specific prior written // permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // #define NO_IMPORT_ARRAY #include #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION #include #include #include #include #include #include #include #include #include #include #include #include namespace python = boost::python; using boost_adaptbx::python::streambuf; namespace RDKit { class RGroupDecompositionHelper { std::unique_ptr decomp; public: RGroupDecompositionHelper(python::object cores, const RGroupDecompositionParameters ¶ms = RGroupDecompositionParameters()) { python::extract isROMol(cores); if (isROMol.check()) { decomp.reset(new RGroupDecomposition(isROMol(), params)); } else { MOL_SPTR_VECT coreMols; python::stl_input_iterator iter(cores), end; while (iter != end) { if (!*iter) { throw_value_error("reaction called with None reactants"); } coreMols.push_back(*iter); ++iter; } decomp.reset(new RGroupDecomposition(coreMols, params)); } } int Add(const ROMol &mol) { NOGIL gil; return decomp->add(mol); } int GetMatchingCoreIdx(const ROMol &mol, python::object &matches) { std::vector matchVect; int coreIdx; { NOGIL gil; coreIdx = decomp->getMatchingCoreIdx(mol, &matchVect); } if (!matches.is_none() && PySequence_Check(matches.ptr())) { auto &matchesList = reinterpret_cast(matches); for (const auto &match : matchVect) { python::list atomMap; for (const auto &pair : match) { atomMap.append(python::make_tuple(pair.first, pair.second)); } matchesList.append(python::tuple(atomMap)); } } return coreIdx; } bool Process() { NOGIL gil; return decomp->process(); } python::tuple ProcessAndScore() { NOGIL gil; auto result = decomp->processAndScore(); return python::make_tuple(result.success, result.score); } python::list GetRGroupLabels() { python::list result; std::vector labels = decomp->getRGroupLabels(); for (auto label : labels) { result.append(label); } return result; } python::list GetRGroupsAsRows(bool asSmiles = false) { const RGroupRows &groups = decomp->getRGroupsAsRows(); python::list result; for (const auto &side_chains : groups) { python::dict dict; for (const auto &[lbl, mol] : side_chains) { if (asSmiles) { dict[lbl] = MolToSmiles(*mol, true); } else { dict[lbl] = mol; } } result.append(dict); } return result; } python::dict GetRGroupsAsColumns(bool asSmiles = false) { python::dict result; RGroupColumns groups = decomp->getRGroupsAsColumns(); for (RGroupColumns::const_iterator it = groups.begin(); it != groups.end(); ++it) { python::list col; for (const auto &cit : it->second) { if (asSmiles) { col.append(MolToSmiles(*cit, true)); } else { col.append(cit); } } result[it->first] = col; } return result; } }; python::object RGroupDecomp(python::object cores, python::object mols, bool asSmiles = false, bool asRows = true, const RGroupDecompositionParameters &options = RGroupDecompositionParameters()) { auto t0 = std::chrono::steady_clock::now(); RGroupDecompositionHelper decomp(cores, options); python::list unmatched; python::stl_input_iterator iter(mols), end; unsigned int idx = 0; while (iter != end) { if (!*iter) { throw_value_error("reaction called with None reactants"); } if (decomp.Add(*(*iter)) == -1) { unmatched.append(idx); } ++iter; ++idx; checkForTimeout(t0, options.timeout); } decomp.Process(); if (asRows) { return make_tuple(decomp.GetRGroupsAsRows(asSmiles), unmatched); } else { return make_tuple(decomp.GetRGroupsAsColumns(asSmiles), unmatched); } } void relabelMappedDummiesHelper(ROMol &mol, unsigned int inputLabels, unsigned int outputLabels) { relabelMappedDummies(mol, static_cast(inputLabels), static_cast(outputLabels)); } struct rgroupdecomp_wrapper { static void wrap() { bool noproxy = true; RegisterVectorConverter("MOL_SPTR_VECT", noproxy); std::string docString = ""; python::enum_("RGroupLabels") .value("IsotopeLabels", RDKit::IsotopeLabels) .value("AtomMapLabels", RDKit::AtomMapLabels) .value("AtomIndexLabels", RDKit::AtomIndexLabels) .value("RelabelDuplicateLabels", RDKit::RelabelDuplicateLabels) .value("MDLRGroupLabels", RDKit::MDLRGroupLabels) .value("DummyAtomLabels", RDKit::DummyAtomLabels) .value("AutoDetect", RDKit::AutoDetect) .export_values(); python::enum_("RGroupMatching") .value("Greedy", RDKit::Greedy) .value("GreedyChunks", RDKit::GreedyChunks) .value("Exhaustive", RDKit::Exhaustive) .value("NoSymmetrization", RDKit::NoSymmetrization) .value("GA", RDKit::GA) .export_values(); python::enum_("RGroupLabelling") .value("AtomMap", RDKit::AtomMap) .value("Isotope", RDKit::Isotope) .value("MDLRGroup", RDKit::MDLRGroup) .export_values(); python::enum_("RGroupCoreAlignment") // DEPRECATED, remove the following line in release 2021.03 .value("None", RDKit::NoAlignment) .value("NoAlignment", RDKit::NoAlignment) .value("MCS", RDKit::MCS) .export_values(); python::enum_("RGroupScore") .value("Match", RDKit::Match) .value("FingerprintVariance", RDKit::FingerprintVariance) .export_values(); docString = "RGroupDecompositionParameters controls how the RGroupDecomposition " "sets labelling and matches structures\n" " OPTIONS:\n" " - RGroupCoreAlignment: can be one of RGroupCoreAlignment.None_ or " "RGroupCoreAlignment.MCS\n" " If set to MCS, cores labels are mapped to " "each other using their\n" " Maximum common substructure overlap.\n" " - RGroupLabels: optionally set where the rgroup labels to use are " "encoded.\n" " RGroupLabels.IsotopeLabels - labels are stored " "on isotopes\n" " RGroupLabels.AtomMapLabels - labels are stored " "on atommaps\n" " RGroupLabels.MDLRGroupLabels - labels are stored " "on MDL R-groups\n" " RGroupLabels.DummyAtomLabels - labels are stored " "on dummy atoms\n" " RGroupLabels.AtomIndexLabels - use the atom " "index " "as the label\n" " RGroupLabels.RelabelDuplicateLabels - fix any " "duplicate labels\n" " RGroupLabels.AutoDetect - auto detect the label " "[default]\n" " Note: in all cases, any rgroups found on unlabelled atoms will " "be automatically\n" " labelled.\n" " - RGroupLabelling: choose where the rlabels are stored on the " "decomposition\n" " RGroupLabelling.AtomMap - store rgroups as atom " "maps (for smiles)\n" " RGroupLabelling.Isotope - store rgroups on the " "isotope\n" " RGroupLabelling.MDLRGroup - store rgroups as mdl " "rgroups (for molblocks)\n" " default: AtomMap | MDLRGroup\n" " - onlyMatchAtRGroups: only allow rgroup decomposition at the " "specified rgroups\n" " - removeAllHydrogenRGroups: remove all user-defined rgroups that " "only have hydrogens\n" " - removeAllHydrogenRGroupsAndLabels: remove all user-defined " "rgroups that only have hydrogens, and also remove the corresponding " "labels from the core\n" " - removeHydrogensPostMatch: remove all hydrogens from the output " "molecules\n" " - allowNonTerminalRGroups: allow labelled Rgroups of degree 2 or " "more\n" " - doTautomers: match all tautomers of a core against each " "input structure\n" " - doEnumeration: expand input cores into enumerated mol bundles\n" " - allowMultipleRGroupsOnUnlabelled: permit more than one rgroup to " "be attached to an unlabelled core atom\n" " - allowMultipleCoresInSameMol: permit a core to match more than" " once in the same molecule if the sets of matched atoms are not equal" " (default=False)"; python::class_( "RGroupDecompositionParameters", docString.c_str(), python::init<>(python::args("self"), "Constructor, takes no arguments")) .def_readwrite("labels", &RDKit::RGroupDecompositionParameters::labels) .def_readwrite("matchingStrategy", &RDKit::RGroupDecompositionParameters::matchingStrategy) .def_readwrite("scoreMethod", &RDKit::RGroupDecompositionParameters::scoreMethod) .def_readwrite("rgroupLabelling", &RDKit::RGroupDecompositionParameters::rgroupLabelling) .def_readwrite("alignment", &RDKit::RGroupDecompositionParameters::alignment) .def_readwrite("chunkSize", &RDKit::RGroupDecompositionParameters::chunkSize) .def_readwrite( "onlyMatchAtRGroups", &RDKit::RGroupDecompositionParameters::onlyMatchAtRGroups) .def_readwrite( "removeAllHydrogenRGroups", &RDKit::RGroupDecompositionParameters::removeAllHydrogenRGroups) .def_readwrite( "removeHydrogensPostMatch", &RDKit::RGroupDecompositionParameters::removeHydrogensPostMatch) .def_readwrite("timeout", &RDKit::RGroupDecompositionParameters::timeout) .def_readwrite("gaPopulationSize", &RDKit::RGroupDecompositionParameters::gaPopulationSize) .def_readwrite( "gaMaximumOperations", &RDKit::RGroupDecompositionParameters::gaMaximumOperations) .def_readwrite("gaNumberOperationsWithoutImprovement", &RDKit::RGroupDecompositionParameters:: gaNumberOperationsWithoutImprovement) .def_readwrite("gaRandomSeed", &RDKit::RGroupDecompositionParameters::gaRandomSeed) .def_readwrite("gaNumberRuns", &RDKit::RGroupDecompositionParameters::gaNumberRuns) .def_readwrite("gaParallelRuns", &RDKit::RGroupDecompositionParameters::gaParallelRuns) .def_readwrite( "allowNonTerminalRGroups", &RDKit::RGroupDecompositionParameters::allowNonTerminalRGroups) .def_readwrite("removeAllHydrogenRGroupsAndLabels", &RDKit::RGroupDecompositionParameters:: removeAllHydrogenRGroupsAndLabels) .def_readwrite("allowMultipleRGroupsOnUnlabelled", &RDKit::RGroupDecompositionParameters:: allowMultipleRGroupsOnUnlabelled) .def_readwrite( "allowMultipleCoresInSameMol", &RDKit::RGroupDecompositionParameters::allowMultipleCoresInSameMol) .def_readwrite("doTautomers", &RDKit::RGroupDecompositionParameters::doTautomers) .def_readwrite("doEnumeration", &RDKit::RGroupDecompositionParameters::doEnumeration) .def_readonly( "substructMatchParams", &RDKit::RGroupDecompositionParameters::substructmatchParams) .def_readwrite( "includeTargetMolInResults", &RDKit::RGroupDecompositionParameters::includeTargetMolInResults) .def("__setattr__", &safeSetattr); python::class_( "RGroupDecomposition", python::init( python::args("self", "cores"), "Construct from a molecule or sequence of molecules")) .def( python::init( python::args("self", "cores", "params"), "Construct from a molecule or sequence of molecules and a " "parameters object")) .def("Add", &RGroupDecompositionHelper::Add, python::args("self", "mol")) .def("GetMatchingCoreIdx", &RGroupDecompositionHelper::GetMatchingCoreIdx, ((python::arg("self"), python::arg("mol")), python::arg("matches") = python::object())) .def("Process", &RGroupDecompositionHelper::Process, python::args("self"), "Process the rgroups (must be done prior to " "GetRGroupsAsRows/Columns and GetRGroupLabels)") .def("ProcessAndScore", &RGroupDecompositionHelper::ProcessAndScore, python::args("self"), "Process the rgroups and returns the score (must be done prior to " "GetRGroupsAsRows/Columns and GetRGroupLabels)") .def("GetRGroupLabels", &RGroupDecompositionHelper::GetRGroupLabels, python::args("self"), "Return the current list of found rgroups.\n" "Note, Process() should be called first") .def("GetRGroupsAsRows", &RGroupDecompositionHelper::GetRGroupsAsRows, (python::arg("self"), python::arg("asSmiles") = false), "Return the rgroups as rows (note: can be fed directly into a " "pandas datatable)\n" " ARGUMENTS:\n" " - asSmiles: if True return smiles strings, otherwise return " "molecules [default: False]\n" " Row structure:\n" " rows[idx] = {rgroup_label: molecule_or_smiles}\n") .def("GetRGroupsAsColumns", &RGroupDecompositionHelper::GetRGroupsAsColumns, (python::arg("self"), python::arg("asSmiles") = false), "Return the rgroups as columns (note: can be fed directly into a " "pandas datatable)\n" " ARGUMENTS:\n" " - asSmiles: if True return smiles strings, otherwise return " "molecules [default: False]\n" " Column structure:\n" " columns[rgroup_label] = [ mols_or_smiles ]\n"); docString = "Decompose a collection of molecules into their Rgroups\n" " ARGUMENTS:\n" " - cores: a set of cores from most to least specific.\n" " See RGroupDecompositionParameters for more details\n" " on how the cores can be labelled\n" " - mols: the molecules to be decomposed\n" " - asSmiles: if True return smiles strings, otherwise return " "molecules [default: False]\n" " - asRows: return the results as rows (default) otherwise return " "columns\n" " - options: RGroupDecompositionParameters object that defines " "the parameters for the decomposition.\n" " See RGroupDecompositionParameters for defaults\n" "\n" " RETURNS: row_or_column_results, unmatched\n" "\n" " Row structure:\n" " rows[idx] = {rgroup_label: molecule_or_smiles}\n" " Column structure:\n" " columns[rgroup_label] = [ mols_or_smiles ]\n" "\n" " unmatched is a vector of indices in the input mols that were not " "matched.\n"; python::def("RGroupDecompose", RGroupDecomp, (python::arg("cores"), python::arg("mols"), python::arg("asSmiles") = false, python::arg("asRows") = true, python::arg("options") = RGroupDecompositionParameters()), docString.c_str()); docString = "Relabel dummy atoms bearing an R-group mapping (as\n" "atom map number, isotope or MDLRGroup label) such that\n" "they will be displayed by the rendering code as R# rather\n" "than #*, *:#, #*:#, etc. By default, only the MDLRGroup label\n" "is retained on output; this may be configured through the\n" "outputLabels parameter.\n" "In case there are multiple potential R-group mappings,\n" "the priority on input is Atom map number > Isotope > MDLRGroup.\n" "The inputLabels parameter allows to configure which mappings\n" "are taken into consideration.\n"; python::def("RelabelMappedDummies", relabelMappedDummiesHelper, (python::arg("mol"), python::arg("inputLabels") = static_cast( AtomMap | Isotope | MDLRGroup), python::arg("outputLabels") = MDLRGroup), docString.c_str()); }; }; } // namespace RDKit BOOST_PYTHON_MODULE(rdRGroupDecomposition) { python::scope().attr("__doc__") = "Module containing RGroupDecomposition classes and functions."; RDKit::rgroupdecomp_wrapper::wrap(); }