mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-06 22:39:55 +08:00
* run clang-tidy with readability-braces-around-statements clang-format the results clean up all the parts that clang-tidy-8 broke * fix problem on windows
330 lines
13 KiB
C++
330 lines
13 KiB
C++
// Copyright (c) 2017, Novartis Institutes for BioMedical Research Inc.
|
|
// All rights reserved.
|
|
//
|
|
// Redistribution and use in source and binary forms, with or without
|
|
// modification, are permitted provided that the following conditions are
|
|
// met:
|
|
//
|
|
// * Redistributions of source code must retain the above copyright
|
|
// notice, this list of conditions and the following disclaimer.
|
|
// * Redistributions in binary form must reproduce the above
|
|
// copyright notice, this list of conditions and the following
|
|
// disclaimer in the documentation and/or other materials provided
|
|
// with the distribution.
|
|
// * Neither the name of Novartis Institutes for BioMedical Research Inc.
|
|
// nor the names of its contributors may be used to endorse or promote
|
|
// products derived from this software without specific prior written
|
|
// permission.
|
|
//
|
|
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
//
|
|
#define NO_IMPORT_ARRAY
|
|
#include <RDBoost/python.h>
|
|
#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
|
|
#include <numpy/arrayobject.h>
|
|
#include <boost/python/list.hpp>
|
|
#include <boost/python/suite/indexing/map_indexing_suite.hpp>
|
|
#include <boost/python/suite/indexing/vector_indexing_suite.hpp>
|
|
#include <string>
|
|
#include <math.h>
|
|
|
|
#include <RDGeneral/Exceptions.h>
|
|
#include <GraphMol/SmilesParse/SmilesWrite.h>
|
|
#include <GraphMol/RDKitBase.h>
|
|
#include <GraphMol/RGroupDecomposition/RGroupDecomp.h>
|
|
#include <RDBoost/Wrap.h>
|
|
#include <RDBoost/python_streambuf.h>
|
|
|
|
namespace python = boost::python;
|
|
using boost_adaptbx::python::streambuf;
|
|
|
|
namespace RDKit {
|
|
|
|
class RGroupDecompositionHelper {
|
|
RGroupDecomposition *decomp;
|
|
|
|
public:
|
|
~RGroupDecompositionHelper() { delete decomp; }
|
|
|
|
RGroupDecompositionHelper(python::object cores,
|
|
const RGroupDecompositionParameters ¶ms =
|
|
RGroupDecompositionParameters()) {
|
|
python::extract<ROMol> isROMol(cores);
|
|
if (isROMol.check()) {
|
|
decomp = new RGroupDecomposition(isROMol(), params);
|
|
} else {
|
|
MOL_SPTR_VECT coreMols;
|
|
python::stl_input_iterator<ROMOL_SPTR> iter(cores), end;
|
|
while (iter != end) {
|
|
if (!*iter) {
|
|
throw_value_error("reaction called with None reactants");
|
|
}
|
|
coreMols.push_back(*iter);
|
|
++iter;
|
|
}
|
|
decomp = new RGroupDecomposition(coreMols, params);
|
|
}
|
|
}
|
|
|
|
int Add(const ROMol &mol) { return decomp->add(mol); }
|
|
bool Process() { return decomp->process(); }
|
|
|
|
python::list GetRGroupLabels() {
|
|
python::list result;
|
|
std::vector<std::string> labels = decomp->getRGroupLabels();
|
|
for(auto label : labels) {
|
|
result.append(label);
|
|
}
|
|
return result;
|
|
}
|
|
python::list GetRGroupsAsRows(bool asSmiles = false) {
|
|
const RGroupRows &groups = decomp->getRGroupsAsRows();
|
|
python::list result;
|
|
|
|
for (const auto &side_chains : groups) {
|
|
python::dict dict;
|
|
for (const auto &side_chain : side_chains) {
|
|
if (asSmiles) {
|
|
dict[side_chain.first] = MolToSmiles(*side_chain.second, true);
|
|
} else {
|
|
dict[side_chain.first] = side_chain.second;
|
|
}
|
|
}
|
|
result.append(dict);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
python::dict GetRGroupsAsColumn(bool asSmiles = false) {
|
|
python::dict result;
|
|
|
|
RGroupColumns groups = decomp->getRGroupsAsColumns();
|
|
|
|
for (RGroupColumns::const_iterator it = groups.begin(); it != groups.end();
|
|
++it) {
|
|
python::list col;
|
|
|
|
for (const auto &cit : it->second) {
|
|
if (asSmiles) {
|
|
col.append(MolToSmiles(*cit, true));
|
|
} else {
|
|
col.append(cit);
|
|
}
|
|
}
|
|
result[it->first] = col;
|
|
}
|
|
return result;
|
|
}
|
|
};
|
|
|
|
python::object RGroupDecomp(python::object cores, python::object mols,
|
|
bool asSmiles = false, bool asRows = true,
|
|
const RGroupDecompositionParameters &options =
|
|
RGroupDecompositionParameters()) {
|
|
RGroupDecompositionHelper decomp(cores, options);
|
|
python::list unmatched;
|
|
|
|
python::stl_input_iterator<ROMOL_SPTR> iter(mols), end;
|
|
unsigned int idx = 0;
|
|
while (iter != end) {
|
|
if (!*iter) {
|
|
throw_value_error("reaction called with None reactants");
|
|
}
|
|
if (decomp.Add(*(*iter)) == -1) {
|
|
unmatched.append(idx);
|
|
}
|
|
++iter;
|
|
++idx;
|
|
}
|
|
|
|
decomp.Process();
|
|
if (asRows) {
|
|
return make_tuple(decomp.GetRGroupsAsRows(asSmiles), unmatched);
|
|
} else {
|
|
return make_tuple(decomp.GetRGroupsAsColumn(asSmiles), unmatched);
|
|
}
|
|
}
|
|
|
|
struct rgroupdecomp_wrapper {
|
|
static void wrap() {
|
|
// logic from https://stackoverflow.com/a/13017303
|
|
boost::python::type_info info =
|
|
boost::python::type_id<RDKit::MOL_SPTR_VECT>();
|
|
const boost::python::converter::registration *reg =
|
|
boost::python::converter::registry::query(info);
|
|
if (reg == nullptr || (*reg).m_to_python == nullptr) {
|
|
python::class_<RDKit::MOL_SPTR_VECT>("MOL_SPTR_VECT")
|
|
.def(python::vector_indexing_suite<RDKit::MOL_SPTR_VECT, true>());
|
|
}
|
|
|
|
std::string docString = "";
|
|
python::enum_<RDKit::RGroupLabels>("RGroupLabels")
|
|
.value("IsotopeLabels", RDKit::IsotopeLabels)
|
|
.value("AtomMapLabels", RDKit::AtomMapLabels)
|
|
.value("AtomIndexLabels", RDKit::AtomIndexLabels)
|
|
.value("RelabelDuplicateLabels", RDKit::RelabelDuplicateLabels)
|
|
.value("AutoDetect", RDKit::AutoDetect)
|
|
.export_values();
|
|
|
|
python::enum_<RDKit::RGroupMatching>("RGroupMatching")
|
|
.value("Greedy", RDKit::Greedy)
|
|
.value("GreedyChunks", RDKit::GreedyChunks)
|
|
.value("Exhaustive", RDKit::Exhaustive)
|
|
.export_values();
|
|
|
|
python::enum_<RDKit::RGroupLabelling>("RGroupLabelling")
|
|
.value("AtomMap", RDKit::AtomMap)
|
|
.value("Isotope", RDKit::Isotope)
|
|
.value("MDLRGroup", RDKit::MDLRGroup)
|
|
.export_values();
|
|
|
|
python::enum_<RDKit::RGroupCoreAlignment>("RGroupCoreAlignment")
|
|
.value("None", RDKit::None)
|
|
.value("MCS", RDKit::MCS)
|
|
.export_values();
|
|
|
|
docString =
|
|
"RGroupDecompositionParameters controls how the RGroupDecomposition "
|
|
"sets labelling and matches structures\n"
|
|
" OPTIONS:\n"
|
|
" - RGroupCoreAlignment: can be one of RGroupCoreAlignment.None or "
|
|
"RGroupCoreAlignment.MCS\n"
|
|
" If set to MCS, cores labels are mapped to "
|
|
"each other using their\n"
|
|
" Maximum common substructure overlap.\n"
|
|
" - RGroupLabels: optionally set where the rgroup labels to use are "
|
|
"encoded.\n"
|
|
" RroupLabels.IsotopeLabels - labels are stored on "
|
|
"isotopes\n"
|
|
" RroupLabels.AtomMapLabels - labels are stored on "
|
|
"atommaps\n"
|
|
" RroupLabels.AtomIndexLabels - use the atom index "
|
|
"as the label\n"
|
|
" RGroupLabels.RelabelDuplicateLabels - fix any "
|
|
"duplicate labels\n"
|
|
" RGroupLabels.AutoDetect - auto detect the label "
|
|
"[default]\n"
|
|
" Note: in all cases, any rgroups found on unlabelled atoms will "
|
|
"be automatically\n"
|
|
" labelled.\n"
|
|
" - RGroupLabelling: choose where the rlabels are stored on the "
|
|
"decomposition\n"
|
|
" RGroupLabels.AtomMap - store rgroups as atom "
|
|
"maps (for smiles)\n"
|
|
" RGroupLabels.Isotope - stroe rgroups on the "
|
|
"isotope\n"
|
|
" RGroupLabels.MDLRGroup - store rgroups as mdl "
|
|
"rgroups (for molblocks)\n"
|
|
" default: AtomMap | MDLRGroup\n"
|
|
" - matchOnlyAtRGroups: only allow rgroup decomposition at the "
|
|
"specified rgroups\n"
|
|
" - setRemoveRGroupsThatAreAllHydrogen: remove all rgroups that "
|
|
"only have hydrogens\n"
|
|
" - removeHydrogensPostMatch: remove all hydrogens from the output "
|
|
"molecules\n";
|
|
python::class_<RDKit::RGroupDecompositionParameters>(
|
|
"RGroupDecompositionParameters", docString.c_str(),
|
|
python::init<>("Constructor, takes no arguments"))
|
|
|
|
.def(python::init<RGroupLabels, RGroupMatching, RGroupLabelling,
|
|
RGroupCoreAlignment, unsigned int, bool, bool>())
|
|
.def_readwrite("labels", &RDKit::RGroupDecompositionParameters::labels)
|
|
.def_readwrite("matchingStrategy",
|
|
&RDKit::RGroupDecompositionParameters::matchingStrategy)
|
|
.def_readwrite("rgroupLabelling",
|
|
&RDKit::RGroupDecompositionParameters::rgroupLabelling)
|
|
.def_readwrite("alignment",
|
|
&RDKit::RGroupDecompositionParameters::alignment)
|
|
.def_readwrite("chunkSize",
|
|
&RDKit::RGroupDecompositionParameters::chunkSize)
|
|
.def_readwrite(
|
|
"onlyMatchAtRGroups",
|
|
&RDKit::RGroupDecompositionParameters::onlyMatchAtRGroups)
|
|
.def_readwrite(
|
|
"removeAllHydrogenRGroups",
|
|
&RDKit::RGroupDecompositionParameters::removeAllHydrogenRGroups)
|
|
.def_readwrite(
|
|
"removeHydrogensPostMatch",
|
|
&RDKit::RGroupDecompositionParameters::removeHydrogensPostMatch);
|
|
|
|
python::class_<RDKit::RGroupDecompositionHelper, boost::noncopyable>(
|
|
"RGroupDecomposition", docString.c_str(),
|
|
python::init<python::object>(
|
|
"Construct from a molecule or sequence of molecules"))
|
|
.def(
|
|
python::init<python::object, const RGroupDecompositionParameters &>(
|
|
"Construct from a molecule or sequence of molecules and a "
|
|
"parameters object"))
|
|
.def("Add", &RGroupDecompositionHelper::Add)
|
|
.def("Process", &RGroupDecompositionHelper::Process,
|
|
"Process the rgroups (must be done prior to "
|
|
"GetRGroupsAsRows/Columns and GetRGroupLabels)")
|
|
.def("GetRGroupLabels", &RGroupDecompositionHelper::GetRGroupLabels,
|
|
"Return the current list of found rgroups.\n"
|
|
"Note, Process() should be called first")
|
|
.def("GetRGroupsAsRows", &RGroupDecompositionHelper::GetRGroupsAsRows,
|
|
python::arg("asSmiles") = false,
|
|
"Return the rgroups as rows (note: can be fed directrly into a "
|
|
"pandas datatable)\n"
|
|
" ARGUMENTS:\n"
|
|
" - asSmiles: if True return smiles strings, otherwise return "
|
|
"molecules [default: False]\n"
|
|
" Row structure:\n"
|
|
" rows[idx] = {rgroup_label: molecule_or_smiles}\n")
|
|
.def("GetRGroupsAsColumns",
|
|
&RGroupDecompositionHelper::GetRGroupsAsColumn,
|
|
python::arg("asSmiles") = false,
|
|
"Return the rgroups as columns (note: can be fed directrly into a "
|
|
"pandas datatable)\n"
|
|
" ARGUMENTS:\n"
|
|
" - asSmiles: if True return smiles strings, otherwise return "
|
|
"molecules [default: False]\n"
|
|
" Column structure:\n"
|
|
" columns[rgroup_label] = [ mols_or_smiles ]\n");
|
|
|
|
docString =
|
|
"Decompose a collecion of molecules into their Rgroups\n"
|
|
" ARGUMENTS:\n"
|
|
" - cores: a set of cores from most to least specific.\n"
|
|
" See RGroupDecompositionParameters for more details\n"
|
|
" on how the cores can be labelled\n"
|
|
" - mols: the molecules to be decomposed\n"
|
|
" - asSmiles: if True return smiles strings, otherwise return "
|
|
"molecules [default: False]\n"
|
|
" - asRows: return the results as rows (default) otherwise return "
|
|
"columns\n"
|
|
"\n"
|
|
" RETURNS: row_or_column_results, unmatched\n"
|
|
"\n"
|
|
" Row structure:\n"
|
|
" rows[idx] = {rgroup_label: molecule_or_smiles}\n"
|
|
" Column structure:\n"
|
|
" columns[rgroup_label] = [ mols_or_smiles ]\n"
|
|
"\n"
|
|
" unmatched is a vector of indices in the input mols that were not "
|
|
"matched.\n";
|
|
python::def("RGroupDecompose", RDKit::RGroupDecomp,
|
|
(python::arg("cores"), python::arg("mols"),
|
|
python::arg("asSmiles") = false, python::arg("asRows") = true,
|
|
python::arg("options") = RGroupDecompositionParameters()),
|
|
docString.c_str());
|
|
};
|
|
};
|
|
}
|
|
|
|
BOOST_PYTHON_MODULE(rdRGroupDecomposition) {
|
|
python::scope().attr("__doc__") =
|
|
"Module containing RGroupDecomposition classes and functions.";
|
|
RDKit::rgroupdecomp_wrapper::wrap();
|
|
}
|