Files
rdkit/Code/GraphMol/Abbreviations/Wrap/rdAbbreviations.cpp
Greg Landrum d2d87909de Add support for abbreviations (#3406)
* support read-only access to cstates from python

* expose GetBrackets

* expose getAttachPoints too

remove vestigial SubstanceGroupCState_VECT

* backup

* backup

* basics working

* backup

* add label_mol_abbreviations

* fix a bug in the chirality handling

* add linkers, needs more testing

* add another peptide test

* sanitize results by default

* just need rings

* getting started with the C++ form of abbreviations

* a bit of error handling

* add findApplicableMatches

* actually apply the abbreviations

* make the getDefault functions more efficient

* add labeling (creating s groups)

* docs

* basic python wrappers (maybe this is enough?)

* add _displayLabel and _displayLabelW support to MolDraw2D
update the docs for that

* use displayLabel props

* add more default abbrevs

* change default linker defns
add parseLinkers convenience function

* make sure attachment point atoms aren't aromatic

* change the color of dummies to be darker gray

* remove python implementation

* support abbreviations in the java wrappers

* add abbreviations to the csharp wrappers

* add abbreviations to the js wrappers

* add molParity to the list of atom props not written to CXSMILES

* support condensing SUP substance groups

* add that to the python wrappers

* Update testAbbreviations.py

* clear ring info if we added it

* document that the molecules with abbreviations removed have not been sanitized
2020-09-28 17:09:46 -04:00

101 lines
4.4 KiB
C++

//
// Copyright (C) 2020 Greg Landrum and T5 Informatics GmbH
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//
#include <RDBoost/python.h>
#include <boost/python/suite/indexing/vector_indexing_suite.hpp>
#include <GraphMol/GraphMol.h>
#include <RDBoost/Wrap.h>
#include <GraphMol/Abbreviations/Abbreviations.h>
namespace python = boost::python;
using namespace RDKit;
namespace {
ROMol *condenseMolAbbreviationsHelper(const ROMol *mol,
python::object pyabbrevs,
double maxCoverage, bool sanitize) {
RWMol *res = new RWMol(*mol);
auto abbrevs =
pythonObjectToVect<Abbreviations::AbbreviationDefinition>(pyabbrevs);
Abbreviations::condenseMolAbbreviations(*res, *abbrevs, maxCoverage,
sanitize);
return rdcast<ROMol *>(res);
}
ROMol *condenseAbbreviationSGroupHelper(const ROMol *mol) {
RWMol *res = new RWMol(*mol);
Abbreviations::condenseAbbreviationSubstanceGroups(*res);
return rdcast<ROMol *>(res);
}
ROMol *labelMolAbbreviationsHelper(const ROMol *mol, python::object pyabbrevs,
double maxCoverage) {
RWMol *res = new RWMol(*mol);
auto abbrevs =
pythonObjectToVect<Abbreviations::AbbreviationDefinition>(pyabbrevs);
Abbreviations::labelMolAbbreviations(*res, *abbrevs, maxCoverage);
return rdcast<ROMol *>(res);
}
} // namespace
BOOST_PYTHON_MODULE(rdAbbreviations) {
python::scope().attr("__doc__") =
"Module containing functions for working with molecular abbreviations";
// RegisterVectorConverter<Abbreviations::AbbreviationMatch>();
RegisterVectorConverter<Abbreviations::AbbreviationDefinition>();
python::class_<Abbreviations::AbbreviationDefinition>(
"AbbreviationDefinition", "Abbreviation Definition", python::init<>())
.def_readwrite("label", &Abbreviations::AbbreviationDefinition::label,
"the label")
.def_readwrite(
"displayLabel", &Abbreviations::AbbreviationDefinition::displayLabel,
"the label in a drawing when the bond comes from the right")
.def_readwrite("displayLabelW",
&Abbreviations::AbbreviationDefinition::displayLabelW,
"the label in a drawing when the bond comes from the west")
.def_readwrite(
"mol", &Abbreviations::AbbreviationDefinition::mol,
"the query molecule (should have a dummy as the first atom)");
python::def("GetDefaultAbbreviations",
&Abbreviations::Utils::getDefaultAbbreviations,
"returns a list of the default abbreviation definitions");
python::def("GetDefaultLinkers", &Abbreviations::Utils::getDefaultLinkers,
"returns a list of the default linker definitions");
python::def("ParseAbbreviations", &Abbreviations::Utils::parseAbbreviations,
(python::arg("text"), python::arg("removeExtraDummies") = false,
python::arg("allowConnectionToDummies") = false),
"returns a set of abbreviation definitions from a string");
python::def("ParseLinkers", &Abbreviations::Utils::parseLinkers,
(python::arg("text")),
"returns a set of linker definitions from a string");
python::def(
"CondenseMolAbbreviations", &condenseMolAbbreviationsHelper,
(python::arg("mol"), python::arg("abbrevs"),
python::arg("maxCoverage") = 0.4, python::arg("sanitize") = true),
python::return_value_policy<python::manage_new_object>(),
"Finds and replaces abbreviations in a molecule. The result is not sanitized.");
python::def("LabelMolAbbreviations", &labelMolAbbreviationsHelper,
(python::arg("mol"), python::arg("abbrevs"),
python::arg("maxCoverage") = 0.4),
python::return_value_policy<python::manage_new_object>(),
"Finds abbreviations and adds to them to a molecule as \"SUP\" "
"SubstanceGroups");
python::def(
"CondenseAbbreviationSubstanceGroups", &condenseAbbreviationSGroupHelper,
(python::arg("mol")),
python::return_value_policy<python::manage_new_object>(),
"Finds and replaces abbrevation (i.e. \"SUP\") substance groups in a "
"molecule. The result is not sanitized.");
}