Files
rdkit/Code/GraphMol/Wrap/MolOps.cpp
Greg Landrum 6615ae5a01 SDMolSupplier: don't throw on errors parsing molecules. This allows the parsing of further mols to continue.
Conformer.cpp: make sure we don't ask for space for zero atoms.

Kekulize.cpp: don't complain about marked bonds if we aren't removing markers.

molopstest.cpp: bad smiles corrected in one test.exe

ROMol.cpp, RWMol.cpp: extra preconditions added (should have been there all along)

Wrap/MolOps.cpp: docs for replacesubstructs.
2006-09-18 05:21:39 +00:00

605 lines
19 KiB
C++
Executable File

// $Id$
//
// Copyright (C) 2003-2006 Rational Discovery LLC
//
// @@ All Rights Reserved @@
//
#define NO_IMPORT_ARRAY
#include "rdmolops.h"
#include <boost/python.hpp>
#include <Numeric/arrayobject.h>
#include <string>
#include <math.h>
#include <DataStructs/ExplicitBitVect.h>
#include <GraphMol/RDKitBase.h>
#include <GraphMol/Substruct/SubstructMatch.h>
#include <GraphMol/Subgraphs/Subgraphs.h>
#include <GraphMol/Fingerprints/Fingerprints.h>
#include <GraphMol/FileParsers/MolFileStereochem.h>
#include <GraphMol/ChemTransforms/ChemTransforms.h>
namespace python = boost::python;
namespace RDKit{
ROMol *addHs(const ROMol &orig,bool explicitOnly=false,bool addCoords=false){
return MolOps::addHs(orig,explicitOnly,addCoords);
}
ROMol *removeHs(const ROMol &orig,bool implicitOnly=false){
return MolOps::removeHs(orig,implicitOnly);
}
int getSSSR(ROMol &mol) {
VECT_INT_VECT rings;
int nr = MolOps::findSSSR(mol, rings);
return nr;
}
PyObject* replaceSubstructures(const ROMol &orig,
const ROMol &query,
const ROMol &replacement,
bool replaceAll=false) {
std::vector<ROMOL_SPTR> v=replaceSubstructs(orig, query,
replacement, replaceAll);
PyObject *res=PyTuple_New(v.size());
for(unsigned int i=0;i<v.size();++i){
PyTuple_SetItem(res,i,
python::converter::shared_ptr_to_python(v[i]));
}
return res;
}
void sanitizeMol(ROMol &mol) {
RWMol &wmol = static_cast<RWMol &>(mol);
MolOps::sanitizeMol(wmol);
}
void kekulizeMol(ROMol &mol,bool clearAromaticFlags=false) {
RWMol &wmol = static_cast<RWMol &>(mol);
MolOps::Kekulize(wmol,clearAromaticFlags);
}
VECT_INT_VECT getSymmSSSR(ROMol &mol) {
VECT_INT_VECT rings;
MolOps::symmetrizeSSSR(mol, rings);
return rings;
}
PyObject *getDistanceMatrix(ROMol &mol, bool useBO=false,
bool useAtomWts=false,bool force=false,
const char *prefix=0) {
int dims[2];
int nats = mol.getNumAtoms();
dims[0] = nats;
dims[1] = nats;
double *distMat;
distMat = MolOps::getDistanceMat(mol, useBO, useAtomWts,force,prefix);
PyArrayObject *res = (PyArrayObject *)PyArray_FromDims(2,dims,PyArray_DOUBLE);
memcpy(static_cast<void *>(res->data),
static_cast<void *>(distMat),nats*nats*sizeof(double));
return (PyObject *) res;
}
PyObject *getAdjacencyMatrix(ROMol &mol, bool useBO=false,
int emptyVal=0,bool force=false,
const char *prefix=0) {
int dims[2];
int nats = mol.getNumAtoms();
dims[0] = nats;
dims[1] = nats;
double *tmpMat = MolOps::getAdjacencyMatrix(mol, useBO, emptyVal,force,prefix);
PyArrayObject *res;
if(useBO){
// if we're using valence, the results matrix is made up of doubles
res = (PyArrayObject *)PyArray_FromDims(2,dims,
PyArray_DOUBLE);
memcpy(static_cast<void *>(res->data),
static_cast<void *>(tmpMat),nats*nats*sizeof(double));
} else {
res = (PyArrayObject *)PyArray_FromDims(2,dims,
PyArray_INT);
int *data = (int *)res->data;
for(int i=0;i<nats;i++){
for(int j=0;j<nats;j++){
data[i*nats+j] = (int)round(tmpMat[i*nats+j]);
}
}
}
return (PyObject *) res;
}
VECT_INT_VECT GetMolFrags(const ROMol &mol){
VECT_INT_VECT frags;
MolOps::getMolFrags(mol,frags);
return frags;
}
struct molops_wrapper {
static void wrap() {
std::string docString;
// ------------------------------------------------------------------------
docString="Kekulize, check valencies, set aromaticity, conjugation and hybridization\n\
\n\
- The molecule is modified in place.\n\
\n\
- If sanitization fails, an exception will be thrown\n\
\n\
ARGUMENTS:\n\
\n\
- mol: the molecule to be modified\n\
\n\
NOTES:\n\
\n";
python::def("SanitizeMol", sanitizeMol,
docString.c_str());
// ------------------------------------------------------------------------
docString="Get the smallest set of simple rings for a molecule.\n\
\n\
ARGUMENTS:\n\
\n\
- mol: the molecule to use.\n\
\n\
RETURNS: the number of rings found\n\
This will be equal to NumBonds-NumAtoms+1 for single-fragment molecules.\n\
\n";
python::def("GetSSSR", getSSSR,
docString.c_str());
// ------------------------------------------------------------------------
docString="Get a symmetrized SSSR for a molecule.\n\
\n\
The symmetrized SSSR is at least as large as the SSSR for a molecule.\n\
In certain highly-symmetric cases (e.g. cubane), the symmetrized SSSR can be\n\
a bit larger (i.e. the number of symmetrized rings is >= NumBonds-NumAtoms+1).\n\
\n\
ARGUMENTS:\n\
\n\
- mol: the molecule to use.\n\
\n\
RETURNS: the number of rings found\n\
\n";
python::def("GetSymmSSSR", getSymmSSSR,
docString.c_str());
// ------------------------------------------------------------------------
docString="Adds hydrogens to the graph of a molecule.\n\
\n\
ARGUMENTS:\n\
\n\
- mol: the molecule to be modified\n\
\n\
- explicitOnly: (optional) if this toggle is set, only explicit Hs will\n\
be added to the molecule. Default value is 0 (add implicit and explicit Hs).\n\
\n\
RETURNS: a new molecule with added Hs\n\
\n\
NOTES:\n\
\n\
- The original molecule is *not* modified.\n\
\n\
- Much of the code assumes that Hs are not included in the molecular\n\
topology, so be *very* careful with the molecule that comes back from\n\
this function.\n\
\n";
python::def("AddHs", addHs,
(python::arg("mol"),python::arg("explicitOnly")=false,
python::arg("addCoords")=false),
docString.c_str(),
python::return_value_policy<python::manage_new_object>());
// ------------------------------------------------------------------------
docString="Removes any hydrogens from the graph of a molecule.\n\
\n\
ARGUMENTS:\n\
\n\
- mol: the molecule to be modified\n\
\n\
- implicitOnly: (optional) if this toggle is set, only implicit Hs will\n\
be removed from the graph. Default value is 0 (remove implicit and explicit Hs).\n\
\n\
RETURNS: a new molecule with the Hs removed\n\
\n\
NOTES:\n\
\n\
- The original molecule is *not* modified.\n\
\n";
python::def("RemoveHs", removeHs,
(python::arg("mol"),python::arg("implicitOnly")=false),
docString.c_str(),
python::return_value_policy<python::manage_new_object>());
// ------------------------------------------------------------------------
docString="Removes atoms matching a substructure query from a molecule\n\
\n\
ARGUMENTS:\n\
\n\
- mol: the molecule to be modified\n\
\n\
- query: the molecule to be used as a substructure query\n\
\n\
- onlyFrags: (optional) if this toggle is set, atoms will only be removed if\n\
the entire fragment in which they are found is matched by the query.\n\
See below for examples.\n\
Default value is 0 (remove the atoms whether or not the entire fragment matches)\n\
\n\
RETURNS: a new molecule with the substructure removed\n\
\n\
NOTES:\n\
\n\
- The original molecule is *not* modified.\n\
\n\
EXAMPLES:\n\
\n\
The following examples substitute SMILES/SMARTS strings for molecules, you'd have\n\
to actually use molecules:\n\
\n\
- DeleteSubstructs('CCOC','OC') -> 'CC'\n\
\n\
- DeleteSubstructs('CCOC','OC',1) -> 'CCOC'\n\
\n\
- DeleteSubstructs('CCOCCl.Cl','Cl',1) -> 'CCOCCl'\n\
\n\
- DeleteSubstructs('CCOCCl.Cl','Cl') -> 'CCOC'\n\
\n";
python::def("DeleteSubstructs", deleteSubstructs,
(python::arg("mol"),python::arg("query"),
python::arg("onlyFrags")=false),
docString.c_str(),
python::return_value_policy<python::manage_new_object>());
// ------------------------------------------------------------------------
docString="Replaces atoms matching a substructure query in a molecule\n\
\n\
ARGUMENTS:\n\
\n\
- mol: the molecule to be modified\n\
\n\
- query: the molecule to be used as a substructure query\n\
\n\
- replacement: the molecule to be used as the replacement\n\
\n\
- replaceAll: (optional) if this toggle is set, all substructures matching\n\
the query will be replaced in a single result, otherwise each result will\n\
contain a separate replacement.\n\
Default value is False (return multiple replacements)\n\
\n\
RETURNS: a tuple of new molecules with the substructures replaced removed\n\
\n\
NOTES:\n\
\n\
- The original molecule is *not* modified.\n\
\n\
EXAMPLES:\n\
\n\
The following examples substitute SMILES/SMARTS strings for molecules, you'd have\n\
to actually use molecules:\n\
\n\
- ReplaceSubstructs('CCOC','OC','NC') -> ('CCNC',)\n\
\n\
- ReplaceSubstructs('COCCOC','OC','NC') -> ('COCCNC','CNCCOC')\n\
\n\
- ReplaceSubstructs('COCCOC','OC','NC',True) -> ('CNCCNC',)\n\
\n";
python::def("ReplaceSubstructs", replaceSubstructures,
(python::arg("mol"),python::arg("query"),
python::arg("replacement"),
python::arg("replaceAll")=false),
docString.c_str());
// ------------------------------------------------------------------------
docString="Returns the molecule's distance matrix.\n\
\n\
ARGUMENTS:\n\
\n\
- mol: the molecule to use\n\
\n\
- useBO: (optional) toggles use of bond orders in calculating the distance matrix.\n\
Default value is 0.\n\
\n\
- useAtomWts: (optional) toggles using atom weights for the diagonal elements of the\n\
matrix (to return a \"Balaban\" distance matrix).\n\
Default value is 0.\n\
\n\
- force: (optional) forces the calculation to proceed, even if there is a cached value.\n\
Default value is 0.\n\
\n\
- prefix: (optional, internal use) sets the prefix used in the property cache\n\
Default value is "".\n\
\n\
RETURNS: a Numeric array of floats with the distance matrix\n\
\n";
python::def("GetDistanceMatrix", getDistanceMatrix,
(python::arg("mol"),python::arg("useBO")=false,
python::arg("useAtomWts")=false,
python::arg("force")=false,
python::arg("prefix")=""),
docString.c_str());
// ------------------------------------------------------------------------
docString="Returns the molecule's adjacency matrix.\n\
\n\
ARGUMENTS:\n\
\n\
- mol: the molecule to use\n\
\n\
- useBO: (optional) toggles use of bond orders in calculating the matrix.\n\
Default value is 0.\n\
\n\
- emptyVal: (optional) sets the elements of the matrix between non-adjacent atoms\n\
Default value is 0.\n\
\n\
- force: (optional) forces the calculation to proceed, even if there is a cached value.\n\
Default value is 0.\n\
\n\
- prefix: (optional, internal use) sets the prefix used in the property cache\n\
Default value is "".\n\
\n\
RETURNS: a Numeric array of floats containing the adjacency matrix\n\
\n";
python::def("GetAdjacencyMatrix", getAdjacencyMatrix,
(python::arg("mol"), python::arg("useBO")=false,
python::arg("emptyVal")=0,
python::arg("force")=false,
python::arg("prefix")=""),
docString.c_str());
// ------------------------------------------------------------------------
docString="Kekulizes the molecule\n\
\n\
ARGUMENTS:\n\
\n\
- mol: the molecule to use\n\
\n\
- clearAromaticFlags: (optional) if this toggle is set, all atoms and bonds in the \n\
molecule will be marked non-aromatic following the kekulization.\n\
Default value is 0.\n\
\n\
NOTES:\n\
\n\
- The molecule is modified in place.\n\
\n";
python::def("Kekulize", kekulizeMol,
(python::arg("mol"),python::arg("clearAromaticFlags")=false),
docString.c_str());
// ------------------------------------------------------------------------
docString="Finds all subgraphs of a particular length in a molecule\n\
\n\
ARGUMENTS:\n\
\n\
- mol: the molecule to use\n\
\n\
- length: an integer with the target number of bonds for the subgraphs.\n\
\n\
- useHs: (optional) toggles whether or not bonds to Hs that are part of the graph\n\
should be included in the results.\n\
Defaults to 0.\n\
\n\
- verbose: (optional, internal use) toggles verbosity in the search algorithm.\n\
Defaults to 0.\n\
\n\
RETURNS: a tuple of 2-tuples with bond IDs\n\
\n\
NOTES: \n\
\n\
- Difference between _subgraphs_ and _paths_ :: \n\
\n\
Subgraphs are potentially branched, whereas paths (in our \n\
terminology at least) cannot be. So, the following graph: \n\
\n\
C--0--C--1--C--3--C\n\
|\n\
2\n\
|\n\
C\n\
has 3 _subgraphs_ of length 3: (0,1,2),(0,1,3),(2,1,3)\n\
but only 2 _paths_ of length 3: (0,1,3),(2,1,3)\n\
\n";
python::def("FindAllSubgraphsOfLengthN", &findAllSubgraphsOfLengthN,
(python::arg("mol"),python::arg("length"),
python::arg("useHs")=false,python::arg("verbose")=false),
docString.c_str());
// ------------------------------------------------------------------------
docString="Finds unique subgraphs of a particular length in a molecule\n\
\n\
ARGUMENTS:\n\
\n\
- mol: the molecule to use\n\
\n\
- length: an integer with the target number of bonds for the subgraphs.\n\
\n\
- useHs: (optional) toggles whether or not bonds to Hs that are part of the graph\n\
should be included in the results.\n\
Defaults to 0.\n\
\n\
- useBO: (optional) Toggles use of bond orders in distinguishing one subgraph from\n\
another.\n\
Defaults to 1.\n\
\n\
RETURNS: a tuple of tuples with bond IDs\n\
\n\
\n";
python::def("FindUniqueSubgraphsOfLengthN", &findUniqueSubgraphsOfLengthN,
(python::arg("mol"),python::arg("length"),
python::arg("useHs")=false,python::arg("useBO")=true),
docString.c_str());
// ------------------------------------------------------------------------
docString="Finds all paths of a particular length in a molecule\n\
\n\
ARGUMENTS:\n\
\n\
- mol: the molecule to use\n\
\n\
- length: an integer with the target length for the paths.\n\
\n\
- useBonds: (optional) toggles the use of bond indices in the paths.\n\
Otherwise atom indices are used. *Note* this behavior is different\n\
from that for subgraphs.\n\
Defaults to 1.\n\
\n\
- useHs: (optional) toggles verbosity in the search algorithm.\n\
Defaults to 0.\n\
\n\
RETURNS: a tuple of tuples with IDs for the bonds.\n\
\n\
NOTES: \n\
\n\
- Difference between _subgraphs_ and _paths_ :: \n\
\n\
Subgraphs are potentially branched, whereas paths (in our \n\
terminology at least) cannot be. So, the following graph: \n\
\n\
C--0--C--1--C--3--C\n\
|\n\
2\n\
|\n\
C\n\
\n\
has 3 _subgraphs_ of length 3: (0,1,2),(0,1,3),(2,1,3)\n\
but only 2 _paths_ of length 3: (0,1,3),(2,1,3)\n\
\n";
python::def("FindAllPathsOfLengthN", &findAllPathsOfLengthN,
(python::arg("mol"),python::arg("length"),
python::arg("useBonds")=true,python::arg("useHs")=false),
docString.c_str());
// ------------------------------------------------------------------------
docString="Finds the disconnected fragments from a molecule.\n\
\n\
For example, for the molecule 'CC(=O)[O-].[NH3+]C' GetMolFrags() returns\n\
((0, 1, 2, 3), (4, 5))\n\
\n\
ARGUMENTS:\n\
\n\
- mol: the molecule to use\n\
\n\
RETURNS: a tuple of tuples with IDs for the atoms in each fragment.\n\
\n";
python::def("GetMolFrags", &GetMolFrags,docString.c_str());
// ------------------------------------------------------------------------
docString="Returns the formal charge for the molecule.\n\
\n\
ARGUMENTS:\n\
\n\
- mol: the molecule to use\n\
\n";
python::def("GetFormalCharge", &MolOps::getFormalCharge,docString.c_str());
// ------------------------------------------------------------------------
docString="Does the CIP chirality assignment (R/S) \n\
for the molecule's atoms.\n\
Chiral atoms will have a property '_CIPCode' indicating\n\
their chiral code.\n\
\n\
ARGUMENTS:\n\
\n\
- mol: the molecule to use\n\
- cleanIt: (optional) if provided, atoms with a chiral specifier that aren't\n\
actually chiral (e.g. atoms with duplicate substituents or only 2 substituents,\n\
etc.) will have their chiral code set to CHI_UNSPECIFIED\n\
- force: (optional) causes the calculation to be repeated, even if it has already\n\
been done\n\
\n";
python::def("AssignAtomChiralCodes", MolOps::assignAtomChiralCodes,
(python::arg("mol"),python::arg("cleanIt")=false,python::arg("force")=false),
docString.c_str());
// ------------------------------------------------------------------------
docString="Does the CIP stereochemistry assignment (Z/E)\n\
for the molecule's bonds .\n\
Qualifying bonds will have a property '_CIPCode' indicating\n\
their stereochemistry.\n\
\n\
ARGUMENTS:\n\
\n\
- mol: the molecule to use\n\
- cleanIt: (optional) ignored\n\
- force: (optional) causes the calculation to be repeated, even if it has already\n\
been done\n\
\n";
python::def("AssignBondStereoCodes", MolOps::assignBondStereoCodes,
(python::arg("mol"),python::arg("cleanIt")=false,python::arg("force")=false),
docString.c_str());
// ------------------------------------------------------------------------
docString="Returns a \"Daylight\"-type fingerprint for a molecule\n\
\n\
Explanation of the algorithm below.\n\
\n\
ARGUMENTS:\n\
\n\
- mol: the molecule to use\n\
\n\
- minPath: (optional) minimum number of bonds to include in the subgraphs\n\
Defaults to 1.\n\
\n\
- maxPath: (optional) maximum number of bonds to include in the subgraphs\n\
Defaults to 7.\n\
\n\
- fpSize: (optional) number of bits in the fingerprint\n\
Defaults to 2048.\n\
\n\
- nBitsPerPath: (optional) number of bits to set per path\n\
Defaults to 4.\n\
\n\
- useHs: (optional) include information about number of Hs on each\n\
atom when calculating path hashes.\n\
Defaults to 1.\n\
\n\
RETURNS: a DataStructs.ExplicitBitVect with _fpSize_ bits\n\
\n\
ALGORITHM:\n\
\n\
This algorithm functions by find all paths between minPath and maxPath in\n \
length. For each path:\n\
\n\
1) The Balaban J value is calculated.\n\
\n\
2) The 32 bit Balaban J value is used to seed a random-number generator\n\
\n\
3) _nBitsPerPath_ random numbers are generated and used to set the corresponding\n\
bits in the fingerprint\n\
\n\
\n";
python::def("DaylightFingerprint", DaylightFingerprintMol,
(python::arg("mol"),python::arg("minPath")=1,
python::arg("maxPath")=7,python::arg("fpSize")=2048,
python::arg("nBitsPerHash")=4,python::arg("useHs")=true),
docString.c_str(),python::return_value_policy<python::manage_new_object>());
docString="Set the wedging on single bonds in a molecule.\n \
The wedging scheme used is that from Mol files.\n \
\n\
ARGUMENTS:\n\
\n\
- molecule: the molecule to update\n \
\n\
\n";
python::def("WedgeMolBonds", WedgeMolBonds,
docString.c_str());
};
};
}
void wrap_molops() {
RDKit::molops_wrapper::wrap();
}