Allow identification of chemistry problems (#2587)

* add AtomValenceException

* refactor a bit and add KekulizeException

* add copy ctor and copy() method

* add detectChemistryProblems

* add getType() method
want to be able to get the type of the exception without requiring doing a bunch of dynamic casts

* first pass at exception inheritance/translation
needs some cleanup and expansion, but this does pass all tests.

* cleanup and finish the python wrappers for the new exceptions

* make sure things are truly polymorphic

* wrap shared_ptrs of the new exception types

* expose DetectChemistryProblems()

* get the java wrappers building again

* transfer those changes to the c# wrapper

* add detectChemistryProblems()
and deal with the fun fun exception inheritance things that ensue

* response to review
This commit is contained in:
Greg Landrum
2019-08-28 14:15:55 -07:00
committed by GitHub
parent 574caeba57
commit 253f172353
15 changed files with 555 additions and 77 deletions

View File

@@ -277,7 +277,7 @@ int Atom::calcExplicitValence(bool strict) {
<< effectiveValence << ", is greater than permitted";
std::string msg = errout.str();
BOOST_LOG(rdErrorLog) << msg << std::endl;
throw MolSanitizeException(msg);
throw AtomValenceException(msg, getIdx());
}
}
d_explicitValence = res;
@@ -391,7 +391,7 @@ int Atom::calcImplicitValence(bool strict) {
<< " not equal to any accepted valence\n";
std::string msg = errout.str();
BOOST_LOG(rdErrorLog) << msg << std::endl;
throw MolSanitizeException(msg);
throw AtomValenceException(msg, getIdx());
}
res = 0;
}
@@ -416,7 +416,7 @@ int Atom::calcImplicitValence(bool strict) {
<< " greater than permitted";
std::string msg = errout.str();
BOOST_LOG(rdErrorLog) << msg << std::endl;
throw MolSanitizeException(msg);
throw AtomValenceException(msg, getIdx());
} else {
res = 0;
}

View File

@@ -103,7 +103,7 @@ void markDbondCands(RWMol &mol, const INT_VECT &allAtms,
errout << "Aromatic bonds on non aromatic atom " << at->getIdx();
std::string msg = errout.str();
BOOST_LOG(rdErrorLog) << msg << std::endl;
throw MolSanitizeException(msg);
throw AtomKekulizeException(msg, at->getIdx());
}
++beg;
}
@@ -466,16 +466,20 @@ void kekulizeFused(RWMol &mol, const VECT_INT_VECT &arings,
// we exhausted all option (or crossed the allowed
// number of backTracks) and we still need to backtrack
// can't kekulize this thing
std::vector<unsigned int> problemAtoms;
std::ostringstream errout;
errout << "Can't kekulize mol.";
errout << " Unkekulized atoms:";
for (unsigned int i = 0; i < nats; ++i) {
if (dBndCands[i]) errout << " " << i;
if (dBndCands[i]) {
errout << " " << i;
problemAtoms.push_back(i);
}
}
errout << std::endl;
std::string msg = errout.str();
BOOST_LOG(rdErrorLog) << msg << std::endl;
throw MolSanitizeException(msg);
throw KekulizeException(msg, problemAtoms);
}
}
} // namespace
@@ -592,7 +596,7 @@ void Kekulize(RWMol &mol, bool markAtomsBonds, unsigned int maxBackTracks) {
errout << "non-ring atom " << (*ai)->getIdx() << " marked aromatic";
std::string msg = errout.str();
BOOST_LOG(rdErrorLog) << msg << std::endl;
throw MolSanitizeException(msg);
throw AtomKekulizeException(msg, (*ai)->getIdx());
}
(*ai)->setIsAromatic(false);
// make sure "explicit" Hs on things like pyrroles don't hang around
@@ -619,7 +623,7 @@ void Kekulize(RWMol &mol, bool markAtomsBonds, unsigned int maxBackTracks) {
<< ": " << val << "!=" << valences[i] << std::endl;
std::string msg = errout.str();
BOOST_LOG(rdErrorLog) << msg << std::endl;
throw MolSanitizeException(msg);
throw AtomKekulizeException(msg, (*ai)->getIdx());
}
i++;
}

View File

@@ -383,6 +383,48 @@ void sanitizeMol(RWMol &mol, unsigned int &operationThatFailed,
operationThatFailed = 0;
}
std::vector<std::unique_ptr<MolSanitizeException>> detectChemistryProblems(
const ROMol &imol, unsigned int sanitizeOps) {
RWMol mol(imol);
std::vector<std::unique_ptr<MolSanitizeException>> res;
// clear out any cached properties
mol.clearComputedProps();
int operation;
operation = SANITIZE_CLEANUP;
if (sanitizeOps & operation) {
// clean up things like nitro groups
cleanUp(mol);
}
// update computed properties on atoms and bonds:
operation = SANITIZE_PROPERTIES;
if (sanitizeOps & operation) {
for (auto &atom : mol.atoms()) {
try {
bool strict = true;
atom->updatePropertyCache(strict);
} catch (const MolSanitizeException &e) {
res.emplace_back(e.copy());
}
}
} else {
mol.updatePropertyCache(false);
}
// kekulizations
operation = SANITIZE_KEKULIZE;
if (sanitizeOps & operation) {
try {
Kekulize(mol);
} catch (const MolSanitizeException &e) {
res.emplace_back(e.copy());
}
}
return res;
}
std::vector<ROMOL_SPTR> getMolFrags(const ROMol &mol, bool sanitizeFrags,
INT_VECT *frags,
VECT_INT_VECT *fragsMolAtomMapping,

View File

@@ -20,6 +20,7 @@
#include <boost/dynamic_bitset.hpp>
#include <RDGeneral/BoostEndInclude.h>
#include <RDGeneral/types.h>
#include "SanitException.h"
RDKIT_GRAPHMOL_EXPORT extern const int ci_LOCAL_INF;
namespace RDKit {
@@ -378,24 +379,21 @@ typedef enum {
\param mol : the RWMol to be cleaned
\param operationThatFailed : the first (if any) sanitization operation that
fails is set here.
fails is set here.
The values are taken from the \c SanitizeFlags
enum.
On success, the value is \c
SanitizeFlags::SANITIZE_NONE
enum. On success, the value is \c
SanitizeFlags::SANITIZE_NONE
\param sanitizeOps : the bits here are used to set which sanitization
operations are carried
out. The elements of the \c SanitizeFlags enum define
the operations.
operations are carried out. The elements of the \c
SanitizeFlags enum define the operations.
<b>Notes:</b>
- If there is a failure in the sanitization, a \c SanitException
- If there is a failure in the sanitization, a \c MolSanitizeException
will be thrown.
- in general the user of this function should cast the molecule following
this
function to a ROMol, so that new atoms and bonds cannot be added to the
molecule and screw up the sanitizing that has been done here
this function to a ROMol, so that new atoms and bonds cannot be added to
the molecule and screw up the sanitizing that has been done here
*/
RDKIT_GRAPHMOL_EXPORT void sanitizeMol(RWMol &mol,
unsigned int &operationThatFailed,
@@ -403,6 +401,32 @@ RDKIT_GRAPHMOL_EXPORT void sanitizeMol(RWMol &mol,
//! \overload
RDKIT_GRAPHMOL_EXPORT void sanitizeMol(RWMol &mol);
//! \brief Identifies chemistry problems (things that don't make chemical sense)
//! in a molecule
/*!
This functions uses the operations in sanitizeMol but does not change
the input structure and returns a list of the problems encountered instead of
stopping at the first failure,
The problems this looks for come from the sanitization operations:
-# mol.updatePropertyCache() : Unreasonable valences
-# MolOps::Kekulize() : Unkekulizable ring systems, aromatic atoms not in
rings, aromatic bonds to non-aromatic atoms.
\param mol : the RWMol to be cleaned
\param sanitizeOps : the bits here are used to set which sanitization
operations are carried out. The elements of the \c
SanitizeFlags enum define the operations.
\return a vector of \c MolSanitizeException values that indicate what
problems were encountered
*/
RDKIT_GRAPHMOL_EXPORT
std::vector<std::unique_ptr<MolSanitizeException>> detectChemistryProblems(
const ROMol &mol, unsigned int sanitizeOps = SANITIZE_ALL);
//! Possible aromaticity models
/*!
- \c AROMATICITY_DEFAULT at the moment always uses \c AROMATICITY_RDKIT

View File

@@ -1,5 +1,5 @@
//
// Copyright (C) 2002-2006 Rational Discovery LLC
// Copyright (C) 2002-2019 Greg Landrum and Rational Discovery LLC
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
@@ -9,8 +9,8 @@
//
#include <RDGeneral/export.h>
#ifndef _RD_SANITEXCEPTION_H
#define _RD_SANITEXCEPTION_H
#ifndef RD_SANITEXCEPTION_H
#define RD_SANITEXCEPTION_H
#include <RDGeneral/types.h>
#include <GraphMol/GraphMol.h>
@@ -26,14 +26,93 @@ namespace RDKit {
//! class for flagging sanitization errors
class RDKIT_GRAPHMOL_EXPORT MolSanitizeException : public std::exception {
public:
MolSanitizeException(const char *msg) : _msg(msg){};
MolSanitizeException(const std::string &msg) : _msg(msg){};
const char *message() const { return _msg.c_str(); };
~MolSanitizeException() throw(){};
MolSanitizeException(const char *msg) : d_msg(msg){};
MolSanitizeException(const std::string &msg) : d_msg(msg){};
MolSanitizeException(const MolSanitizeException &other)
: d_msg(other.d_msg){};
virtual const char *message() const { return d_msg.c_str(); };
virtual ~MolSanitizeException() throw(){};
virtual MolSanitizeException *copy() const {
return new MolSanitizeException(*this);
};
virtual std::string getType() const { return "MolSanitizeException"; };
private:
std::string _msg;
protected:
std::string d_msg;
};
class RDKIT_GRAPHMOL_EXPORT AtomSanitizeException
: public MolSanitizeException {
public:
AtomSanitizeException(const char *msg, unsigned int atomIdx)
: MolSanitizeException(msg), d_atomIdx(atomIdx){};
AtomSanitizeException(const std::string &msg, unsigned int atomIdx)
: MolSanitizeException(msg), d_atomIdx(atomIdx){};
AtomSanitizeException(const AtomSanitizeException &other)
: MolSanitizeException(other), d_atomIdx(other.d_atomIdx){};
unsigned int getAtomIdx() const { return d_atomIdx; };
virtual ~AtomSanitizeException() throw(){};
virtual MolSanitizeException *copy() const {
return new AtomSanitizeException(*this);
};
virtual std::string getType() const { return "AtomSanitizeException"; };
protected:
unsigned int d_atomIdx;
};
class RDKIT_GRAPHMOL_EXPORT AtomValenceException
: public AtomSanitizeException {
public:
AtomValenceException(const char *msg, unsigned int atomIdx)
: AtomSanitizeException(msg, atomIdx){};
AtomValenceException(const std::string &msg, unsigned int atomIdx)
: AtomSanitizeException(msg, atomIdx){};
AtomValenceException(const AtomValenceException &other)
: AtomSanitizeException(other){};
virtual ~AtomValenceException() throw(){};
MolSanitizeException *copy() const {
return new AtomValenceException(*this);
};
std::string getType() const { return "AtomValenceException"; };
};
class RDKIT_GRAPHMOL_EXPORT AtomKekulizeException
: public AtomSanitizeException {
public:
AtomKekulizeException(const char *msg, unsigned int atomIdx)
: AtomSanitizeException(msg, atomIdx){};
AtomKekulizeException(const std::string &msg, unsigned int atomIdx)
: AtomSanitizeException(msg, atomIdx){};
AtomKekulizeException(const AtomKekulizeException &other)
: AtomSanitizeException(other){};
virtual ~AtomKekulizeException() throw(){};
MolSanitizeException *copy() const {
return new AtomKekulizeException(*this);
};
std::string getType() const { return "AtomKekulizeException"; };
};
class RDKIT_GRAPHMOL_EXPORT KekulizeException : public MolSanitizeException {
public:
KekulizeException(const char *msg, const std::vector<unsigned int> &indices)
: MolSanitizeException(msg), d_atomIndices(indices){};
KekulizeException(const std::string &msg,
const std::vector<unsigned int> &indices)
: MolSanitizeException(msg), d_atomIndices(indices){};
KekulizeException(const KekulizeException &other)
: MolSanitizeException(other), d_atomIndices(other.d_atomIndices){};
const std::vector<unsigned int> &getAtomIndices() const {
return d_atomIndices;
};
virtual ~KekulizeException() throw(){};
MolSanitizeException *copy() const { return new KekulizeException(*this); };
std::string getType() const { return "KekulizeException"; };
protected:
std::vector<unsigned int> d_atomIndices;
};
} // namespace RDKit
#endif

View File

@@ -351,6 +351,7 @@ void addRecursiveQuery(ROMol &mol, const ROMol &query, unsigned int atomIdx,
oAt->expandQuery(q, Queries::COMPOSITE_AND);
}
}
MolOps::SanitizeFlags sanitizeMol(ROMol &mol, boost::uint64_t sanitizeOps,
bool catchErrors) {
RWMol &wmol = static_cast<RWMol &>(mol);
@@ -766,6 +767,16 @@ ROMol *adjustQueryPropertiesHelper(const ROMol &mol, python::object pyparams) {
return MolOps::adjustQueryProperties(mol, &params);
}
python::tuple detectChemistryProblemsHelper(const ROMol &mol,
unsigned int sanitizeOps) {
auto probs = MolOps::detectChemistryProblems(mol, sanitizeOps);
python::list res;
for (const auto &exc_ptr : probs) {
res.append(boost::shared_ptr<MolSanitizeException>(exc_ptr->copy()));
}
return python::tuple(res);
}
ROMol *replaceCoreHelper(const ROMol &mol, const ROMol &core,
python::object match, bool replaceDummies,
bool labelByIndex, bool requireDummyMatch = false) {
@@ -2258,6 +2269,11 @@ A note on the flags controlling which atoms/bonds are modified: \n\
(python::arg("mol"), python::arg("params") = python::object()),
docString.c_str(),
python::return_value_policy<python::manage_new_object>());
docString = "checks for chemistry problems";
python::def(
"DetectChemistryProblems", detectChemistryProblemsHelper,
(python::arg("mol"), python::arg("sanitizeOps") = MolOps::SANITIZE_ALL),
docString.c_str());
};
};
} // namespace RDKit

View File

@@ -37,12 +37,6 @@ void rdExceptionTranslator(RDKit::ConformerException const &x) {
PyErr_SetString(PyExc_ValueError, "Bad Conformer Id");
}
void rdSanitExceptionTranslator(RDKit::MolSanitizeException const &x) {
std::ostringstream ss;
ss << "Sanitization error: " << x.message();
PyErr_SetString(PyExc_ValueError, ss.str().c_str());
}
void wrap_table();
void wrap_atom();
void wrap_conformer();
@@ -116,14 +110,111 @@ void WrapLogs() {
if (rdWarningLog != nullptr) rdWarningLog->SetTee(warning);
}
python::tuple getAtomIndicesHelper(const KekulizeException &self) {
python::list res;
for (auto idx : self.getAtomIndices()) {
res.append(idx);
}
return python::tuple(res);
}
PyObject *molSanitizeExceptionType = nullptr;
PyObject *atomSanitizeExceptionType = nullptr;
PyObject *atomValenceExceptionType = nullptr;
PyObject *atomKekulizeExceptionType = nullptr;
PyObject *kekulizeExceptionType = nullptr;
// pattern from here:
// https://stackoverflow.com/questions/11448735/boostpython-export-custom-exception-and-inherit-from-pythons-exception
template <typename EXC_TYPE>
void sanitExceptionTranslator(const EXC_TYPE &x, PyObject *pyExcType) {
PRECONDITION(pyExcType != nullptr, "global type not initialized");
python::object pyExcInstance(python::handle<>(python::borrowed(pyExcType)));
pyExcInstance.attr("cause") = x;
PyErr_SetString(pyExcType, x.message());
}
// pattern from here:
// https://stackoverflow.com/questions/9620268/boost-python-custom-exception-class
PyObject *createExceptionClass(const char *name,
PyObject *baseTypeObj = PyExc_ValueError) {
std::string scopeName =
python::extract<std::string>(python::scope().attr("__name__"));
std::string qualifiedName0 = scopeName + "." + name;
char *qualifiedName1 = const_cast<char *>(qualifiedName0.c_str());
PyObject *typeObj = PyErr_NewException(qualifiedName1, baseTypeObj, 0);
if (!typeObj) python::throw_error_already_set();
python::scope().attr(name) = python::handle<>(python::borrowed(typeObj));
return typeObj;
}
BOOST_PYTHON_MODULE(rdchem) {
python::scope().attr("__doc__") =
"Module containing the core chemistry functionality of the RDKit";
RegisterListConverter<RDKit::Atom *>();
RegisterListConverter<RDKit::Bond *>();
rdkit_import_array();
// this is one of those parts where I think I wish that I knew how to do
// template meta-programming
python::class_<MolSanitizeException>("_cppMolSanitizeException",
"exception arising from sanitization",
python::no_init)
.def("Message", &MolSanitizeException::message)
.def("GetType", &MolSanitizeException::getType);
python::register_ptr_to_python<boost::shared_ptr<MolSanitizeException>>();
molSanitizeExceptionType = createExceptionClass("MolSanitizeException");
python::register_exception_translator<RDKit::MolSanitizeException>(
&rdSanitExceptionTranslator);
[&](const MolSanitizeException &exc) {
sanitExceptionTranslator(exc, molSanitizeExceptionType);
});
python::class_<AtomSanitizeException, python::bases<MolSanitizeException>>(
"_cppAtomSanitizeException", "exception arising from sanitization",
python::no_init)
.def("GetAtomIdx", &AtomSanitizeException::getAtomIdx);
python::register_ptr_to_python<boost::shared_ptr<AtomSanitizeException>>();
atomSanitizeExceptionType =
createExceptionClass("AtomSanitizeException", molSanitizeExceptionType);
python::register_exception_translator<RDKit::AtomSanitizeException>(
[&](const AtomSanitizeException &exc) {
sanitExceptionTranslator(exc, atomSanitizeExceptionType);
});
python::class_<AtomValenceException, python::bases<AtomSanitizeException>>(
"_cppAtomValenceException", "exception arising from sanitization",
python::no_init);
python::register_ptr_to_python<boost::shared_ptr<AtomValenceException>>();
atomValenceExceptionType =
createExceptionClass("AtomValenceException", atomSanitizeExceptionType);
python::register_exception_translator<RDKit::AtomValenceException>(
[&](const AtomValenceException &exc) {
sanitExceptionTranslator(exc, atomValenceExceptionType);
});
python::class_<AtomKekulizeException, python::bases<AtomSanitizeException>>(
"_cppAtomKekulizeException", "exception arising from sanitization",
python::no_init);
python::register_ptr_to_python<boost::shared_ptr<AtomKekulizeException>>();
atomKekulizeExceptionType =
createExceptionClass("AtomKekulizeException", atomSanitizeExceptionType);
python::register_exception_translator<RDKit::AtomKekulizeException>(
[&](const AtomKekulizeException &exc) {
sanitExceptionTranslator(exc, atomKekulizeExceptionType);
});
python::class_<KekulizeException, python::bases<MolSanitizeException>>(
"_cppAtomKekulizeException", "exception arising from sanitization",
python::no_init)
.def("GetAtomIndices", &getAtomIndicesHelper);
python::register_ptr_to_python<boost::shared_ptr<KekulizeException>>();
kekulizeExceptionType =
createExceptionClass("KekulizeException", molSanitizeExceptionType);
python::register_exception_translator<RDKit::KekulizeException>(
[&](const KekulizeException &exc) {
sanitExceptionTranslator(exc, kekulizeExceptionType);
});
python::def("WrapLogs", WrapLogs,
"Wrap the internal RDKit streams so they go to python's "

View File

@@ -453,7 +453,7 @@ class TestCase(unittest.TestCase):
smi1 = Chem.MolToSmiles(m)
smi2 = Chem.MolToSmiles(m2)
self.assertTrue(smi1 == smi2)
def test16Props(self):
m = Chem.MolFromSmiles('C1=CN=CC=C1')
self.assertTrue(not m.HasProp('prop1'))
@@ -2106,7 +2106,7 @@ CAS<~>
self.assertTrue(ri.IsBondInRingOfSize(2, 4))
if hasattr(Chem,'FindRingFamilies'):
if hasattr(Chem,'FindRingFamilies'):
ri = m.GetRingInfo()
self.assertFalse(ri.AreRingFamiliesInitialized())
Chem.FindRingFamilies(m)
@@ -5020,16 +5020,16 @@ width='200px' height='200px' >
def testAssignStereochemistryFrom3D(self):
def _stereoTester(mol,expectedCIP,expectedStereo):
mol.UpdatePropertyCache()
self.assertEqual(mol.GetNumAtoms(),9)
self.assertFalse(mol.GetAtomWithIdx(1).HasProp("_CIPCode"))
self.assertEqual(mol.GetBondWithIdx(3).GetStereo(),Chem.BondStereo.STEREONONE)
for bond in mol.GetBonds():
bond.SetBondDir(Chem.BondDir.NONE)
Chem.AssignStereochemistryFrom3D(mol)
self.assertTrue(mol.GetAtomWithIdx(1).HasProp("_CIPCode"))
self.assertEqual(mol.GetAtomWithIdx(1).GetProp("_CIPCode"),expectedCIP)
self.assertEqual(mol.GetBondWithIdx(3).GetStereo(),expectedStereo)
mol.UpdatePropertyCache()
self.assertEqual(mol.GetNumAtoms(),9)
self.assertFalse(mol.GetAtomWithIdx(1).HasProp("_CIPCode"))
self.assertEqual(mol.GetBondWithIdx(3).GetStereo(),Chem.BondStereo.STEREONONE)
for bond in mol.GetBonds():
bond.SetBondDir(Chem.BondDir.NONE)
Chem.AssignStereochemistryFrom3D(mol)
self.assertTrue(mol.GetAtomWithIdx(1).HasProp("_CIPCode"))
self.assertEqual(mol.GetAtomWithIdx(1).GetProp("_CIPCode"),expectedCIP)
self.assertEqual(mol.GetBondWithIdx(3).GetStereo(),expectedStereo)
fileN = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'test_data',
'stereochem.sdf')
@@ -5041,8 +5041,8 @@ width='200px' height='200px' >
("S",Chem.BondStereo.STEREOE),
)
for i,mol in enumerate(suppl):
cip,stereo = expected[i]
_stereoTester(mol,cip,stereo)
cip,stereo = expected[i]
_stereoTester(mol,cip,stereo)
def testGitHub2082(self):
ctab="""
@@ -5096,9 +5096,9 @@ M END
def testGitHub1985(self):
# simple check, this used to throw an exception
try:
Chem.MolToSmarts(Chem.MolFromSmarts("[C@]"))
Chem.MolToSmarts(Chem.MolFromSmarts("[C@]"))
except:
self.fail("[C@] caused an exception when roundtripping smarts")
self.fail("[C@] caused an exception when roundtripping smarts")
def testGetEnhancedStereo(self):
@@ -5285,7 +5285,7 @@ M END
for atom in m.GetAtoms():
bv.SetBit(atom.GetIdx())
atom.SetExplicitBitVectProp("prop", bv)
for atom in m.GetAtoms():
bv = atom.GetExplicitBitVectProp("prop")
self.assertTrue(bv.GetBit(atom.GetIdx()))
@@ -5302,7 +5302,7 @@ M END
for atom in m.GetAtoms():
if atom.GetIdx() == 0:
atom.SetExplicitBitVectProp("prop", bv)
l = tuple([x.GetIdx() for x in m.GetAtomsMatchingQuery(qa)])
self.assertEqual(l, (0,))
@@ -5311,37 +5311,37 @@ M END
bv = DataStructs.ExplicitBitVect(4)
bv.SetBit(atom.GetIdx())
atom.SetExplicitBitVectProp("prop", bv)
sma = Chem.MolFromSmarts("C")
for atom in sma.GetAtoms():
bv = DataStructs.ExplicitBitVect(4)
bv.SetBit(1)
qa = rdqueries.HasBitVectPropWithValueQueryAtom("prop", bv, tolerance=0.0)
atom.ExpandQuery(qa)
bv = DataStructs.ExplicitBitVect(4)
bv.SetBit(1)
qa = rdqueries.HasBitVectPropWithValueQueryAtom("prop", bv, tolerance=0.0)
atom.ExpandQuery(qa)
res = m.GetSubstructMatches(sma)
self.assertEqual(res, ((1,),))
sma = Chem.MolFromSmarts("C")
for atom in sma.GetAtoms():
bv = DataStructs.ExplicitBitVect(4)
bv.SetBit(0)
qa = rdqueries.HasBitVectPropWithValueQueryAtom("prop", bv, tolerance=0.0)
atom.ExpandQuery(qa)
bv = DataStructs.ExplicitBitVect(4)
bv.SetBit(0)
qa = rdqueries.HasBitVectPropWithValueQueryAtom("prop", bv, tolerance=0.0)
atom.ExpandQuery(qa)
res = m.GetSubstructMatches(sma)
self.assertEqual(res, ((0,),))
sma = Chem.MolFromSmarts("C")
for atom in sma.GetAtoms():
bv = DataStructs.ExplicitBitVect(4)
bv.SetBit(0)
qa = rdqueries.HasBitVectPropWithValueQueryAtom("prop", bv, tolerance=1.0)
atom.ExpandQuery(qa)
bv = DataStructs.ExplicitBitVect(4)
bv.SetBit(0)
qa = rdqueries.HasBitVectPropWithValueQueryAtom("prop", bv, tolerance=1.0)
atom.ExpandQuery(qa)
res = m.GetSubstructMatches(sma)
self.assertEqual(res, ((0,),(1,)))
def testGithub2441(self):
m = Chem.MolFromSmiles("CC")
conf = Chem.Conformer(2)
@@ -5387,7 +5387,7 @@ C1C(Cl)CCCC duff2
l = [x for x in suppl2]
self.assertEqual(len(l),7)
self.assertTrue(l[6] is None)
sdf=b"""
Mrv1810 06051911332D
@@ -5422,7 +5422,7 @@ $$$$
self.assertEqual(len(l),3)
self.assertTrue(l[1] is None)
self.assertTrue(l[2] is None)
from io import BytesIO
sio = BytesIO(sdf)
suppl3 = Chem.ForwardSDMolSupplier(sio)
@@ -5430,7 +5430,7 @@ $$$$
self.assertEqual(len(l),3)
self.assertTrue(l[1] is None)
self.assertTrue(l[2] is None)
sdf=b"""
Mrv1810 06051911332D
@@ -5464,7 +5464,7 @@ M END
self.assertEqual(len(l),2)
self.assertTrue(l[0] is not None)
self.assertTrue(l[1] is not None)
from io import BytesIO
sio = BytesIO(sdf)
suppl3 = Chem.ForwardSDMolSupplier(sio)
@@ -5499,6 +5499,48 @@ H 0.635000 0.635000 0.635000
self.assertEqual(Chem.MolToXYZBlock(mol), xyzblock_expected)
def testSanitizationExceptionBasics(self):
try:
Chem.SanitizeMol(Chem.MolFromSmiles('CFC',sanitize=False))
except Chem.AtomValenceException as exc:
self.assertEqual(exc.cause.GetAtomIdx(),1)
else:
self.assertFalse(True)
try:
Chem.SanitizeMol(Chem.MolFromSmiles('c1cc1',sanitize=False))
except Chem.KekulizeException as exc:
self.assertEqual(exc.cause.GetAtomIndices(),(0,1,2))
else:
self.assertFalse(True)
def testSanitizationExceptionHierarchy(self):
with self.assertRaises(Chem.AtomValenceException):
Chem.SanitizeMol(Chem.MolFromSmiles('CFC',sanitize=False))
with self.assertRaises(Chem.AtomSanitizeException):
Chem.SanitizeMol(Chem.MolFromSmiles('CFC',sanitize=False))
with self.assertRaises(Chem.MolSanitizeException):
Chem.SanitizeMol(Chem.MolFromSmiles('CFC',sanitize=False))
with self.assertRaises(ValueError):
Chem.SanitizeMol(Chem.MolFromSmiles('CFC',sanitize=False))
with self.assertRaises(Chem.KekulizeException):
Chem.SanitizeMol(Chem.MolFromSmiles('c1cc1',sanitize=False))
with self.assertRaises(Chem.MolSanitizeException):
Chem.SanitizeMol(Chem.MolFromSmiles('c1cc1',sanitize=False))
with self.assertRaises(ValueError):
Chem.SanitizeMol(Chem.MolFromSmiles('c1cc1', sanitize=False))
def testDetectChemistryProblems(self):
m = Chem.MolFromSmiles('CFCc1cc1ClC',sanitize=False)
ps = Chem.DetectChemistryProblems(m)
self.assertEqual(len(ps),3)
self.assertEqual([x.GetType() for x in ps],['AtomValenceException','AtomValenceException','KekulizeException'])
self.assertEqual(ps[0].GetAtomIdx(),1)
self.assertEqual(ps[1].GetAtomIdx(),6)
self.assertEqual(ps[2].GetAtomIndices(),(3,4,5))
def testGithub2611(self):
mol = Chem.MolFromSmiles('ONCS.ONCS')
for atom in mol.GetAtoms():
@@ -5526,7 +5568,6 @@ H 0.635000 0.635000 0.635000
self.assertEqual(order1,order3)
self.assertEqual(order2,order4)
if __name__ == '__main__':
if "RDTESTCASE" in os.environ:
suite = unittest.TestSuite()

View File

@@ -340,6 +340,82 @@ M END)CTAB";
}
}
TEST_CASE("Specialized exceptions for sanitization errors", "[molops]") {
SECTION("AtomValenceException") {
std::vector<std::pair<std::string, unsigned int>> smiles = {
{"C=n1ccnc1", 1}, {"CCO(C)C", 2}};
for (auto pr : smiles) {
CHECK_THROWS_AS(SmilesToMol(pr.first), AtomValenceException);
try {
auto m = SmilesToMol(pr.first);
} catch (const AtomValenceException &e) {
CHECK(e.getType() == "AtomValenceException");
CHECK(e.getAtomIdx() == pr.second);
}
}
}
SECTION("AtomKekulizeException") {
std::vector<std::pair<std::string, unsigned int>> smiles = {
{"CCcc", 2}, {"C1:c:CC1", 0}};
for (auto pr : smiles) {
CHECK_THROWS_AS(SmilesToMol(pr.first), AtomKekulizeException);
try {
auto m = SmilesToMol(pr.first);
} catch (const AtomKekulizeException &e) {
CHECK(e.getType() == "AtomKekulizeException");
CHECK(e.getAtomIdx() == pr.second);
}
}
}
SECTION("KekulizeException") {
std::vector<std::pair<std::string, std::vector<unsigned int>>> smiles = {
{"c1cccc1", {0, 1, 2, 3, 4}}, {"Cc1cc1", {1, 2, 3}}};
for (auto pr : smiles) {
CHECK_THROWS_AS(SmilesToMol(pr.first), KekulizeException);
try {
auto m = SmilesToMol(pr.first);
} catch (const KekulizeException &e) {
CHECK(e.getType() == "KekulizeException");
CHECK(e.getAtomIndices() == pr.second);
}
}
}
}
TEST_CASE("detectChemistryProblems", "[molops]") {
SECTION("Basics") {
SmilesParserParams ps;
ps.sanitize = false;
auto m = std::unique_ptr<ROMol>(SmilesToMol("CO(C)CFCc1cc1", ps));
REQUIRE(m);
auto res = MolOps::detectChemistryProblems(*m);
REQUIRE(res.size() == 3);
CHECK(res[0]->getType() == "AtomValenceException");
REQUIRE(dynamic_cast<AtomValenceException *>(res[0].get()));
CHECK(dynamic_cast<AtomSanitizeException *>(res[0].get())->getAtomIdx() ==
1);
CHECK(res[1]->getType() == "AtomValenceException");
REQUIRE(dynamic_cast<AtomSanitizeException *>(res[1].get()));
CHECK(dynamic_cast<AtomSanitizeException *>(res[1].get())->getAtomIdx() ==
4);
CHECK(res[2]->getType() == "KekulizeException");
REQUIRE(dynamic_cast<KekulizeException *>(res[2].get()));
CHECK(dynamic_cast<KekulizeException *>(res[2].get())->getAtomIndices() ==
std::vector<unsigned int>({6, 7, 8}));
}
SECTION("No problems") {
SmilesParserParams ps;
ps.sanitize = false;
auto m = std::unique_ptr<ROMol>(SmilesToMol("c1ccccc1", ps));
REQUIRE(m);
auto res = MolOps::detectChemistryProblems(*m);
REQUIRE(res.size() == 0);
}
}
TEST_CASE(
"github #2606: Bad valence corrections on Pb, Sn"
"[bug, molops]") {
@@ -433,4 +509,4 @@ M END
CHECK(mol->getAtomWithIdx(0)->getFormalCharge() == 2);
CHECK(mol->getAtomWithIdx(0)->getTotalNumHs() == 0);
}
}
}

View File

@@ -1,5 +1,4 @@
/*
* $Id$
*
* Copyright (c) 2010, Novartis Institutes for BioMedical Research Inc.
* All rights reserved.
@@ -35,10 +34,12 @@
#include <GraphMol/MolOps.h>
%}
%template(MolSanitizeException_Vect) std::vector<boost::shared_ptr<RDKit::MolSanitizeException>>;
%newobject RDKit::MolOps::renumberAtoms;
%newobject RDKit::MolOps::removeHs;
%newobject RDKit::MolOps::addHs;
%ignore RDKit::MolOps::detectChemistryProblems;
%include <GraphMol/MolOps.h>
%ignore RDKit::MolOps::sanitizeMol(RWMol &,unsigned int &,unsigned int &);
@@ -53,4 +54,12 @@
}
return static_cast<int>(opThatFailed);
};
std::vector<boost::shared_ptr<RDKit::MolSanitizeException>> detectChemistryProblems(RDKit::ROMol &mol,int sanitizeOps=RDKit::MolOps::SANITIZE_ALL){
std::vector<boost::shared_ptr<RDKit::MolSanitizeException>> res;
auto probs = RDKit::MolOps::detectChemistryProblems(mol,sanitizeOps);
for(const auto &exc_ptr : probs) {
res.push_back(boost::shared_ptr<RDKit::MolSanitizeException>(exc_ptr->copy()));
}
return res;
};
%}

View File

@@ -38,3 +38,68 @@
%include <GraphMol/SanitException.h>
#ifdef SWIGJAVA
// support upcasting from MolSanitizeException to the other exception types so that
// we can do something with the output of detectChemistryProblems()
// approach from: http://www.swig.org/Doc3.0/Java.html#Java_adding_downcasts
%exception RDKit::AtomSanitizeException::dynamic_cast(RDKit::MolSanitizeException) {
$action
if(!result){
jclass excep = jenv->FindClass("java/lang/ClassCastException");
if (excep) {
jenv->ThrowNew(excep, "dynamic_cast exception");
}
}
}
%extend RDKit::AtomSanitizeException {
static RDKit::AtomSanitizeException *dynamic_cast(RDKit::MolSanitizeException *mse){
return dynamic_cast<RDKit::AtomSanitizeException *>(mse);
}
};
%exception RDKit::AtomValenceException::dynamic_cast(RDKit::MolSanitizeException) {
$action
if(!result){
jclass excep = jenv->FindClass("java/lang/ClassCastException");
if (excep) {
jenv->ThrowNew(excep, "dynamic_cast exception");
}
}
}
%extend RDKit::AtomValenceException {
static RDKit::AtomValenceException *dynamic_cast(RDKit::MolSanitizeException *mse){
return dynamic_cast<RDKit::AtomValenceException *>(mse);
}
};
%exception RDKit::AtomKekulizeException::dynamic_cast(RDKit::MolSanitizeException) {
$action
if(!result){
jclass excep = jenv->FindClass("java/lang/ClassCastException");
if (excep) {
jenv->ThrowNew(excep, "dynamic_cast exception");
}
}
}
%extend RDKit::AtomKekulizeException {
static RDKit::AtomKekulizeException *dynamic_cast(RDKit::MolSanitizeException *mse){
return dynamic_cast<RDKit::AtomKekulizeException *>(mse);
}
};
%exception RDKit::KekulizeException::dynamic_cast(RDKit::MolSanitizeException) {
$action
if(!result){
jclass excep = jenv->FindClass("java/lang/ClassCastException");
if (excep) {
jenv->ThrowNew(excep, "dynamic_cast exception");
}
}
}
%extend RDKit::KekulizeException {
static RDKit::KekulizeException *dynamic_cast(RDKit::MolSanitizeException *mse){
return dynamic_cast<RDKit::KekulizeException *>(mse);
}
};
#endif

View File

@@ -178,6 +178,10 @@ typedef unsigned long long int uintmax_t;
%shared_ptr(RDKit::QueryBond)
%shared_ptr(RDKit::QueryOps)
%shared_ptr(RDKit::MolSanitizeException)
%shared_ptr(RDKit::AtomSanitizeException)
%shared_ptr(RDKit::AtomValenceException)
%shared_ptr(RDKit::AtomKekulizeException)
%shared_ptr(RDKit::KekulizeException)
%shared_ptr(RDKit::SmilesParseException)
%shared_ptr(RDKit::RingInfo)
%shared_ptr(RDKit::ChemicalReaction)

View File

@@ -130,6 +130,10 @@ typedef unsigned long long int uintmax_t;
%shared_ptr(RDKit::QueryBond)
%shared_ptr(RDKit::QueryOps)
%shared_ptr(RDKit::MolSanitizeException)
%shared_ptr(RDKit::AtomSanitizeException)
%shared_ptr(RDKit::AtomValenceException)
%shared_ptr(RDKit::AtomKekulizeException)
%shared_ptr(RDKit::KekulizeException)
%shared_ptr(RDKit::SmilesParseException)
%shared_ptr(RDKit::MolPicklerException)
%shared_ptr(RDKit::RingInfo)

View File

@@ -1,5 +1,4 @@
/*
* $Id: BasicMolecule2Tests.java 131 2011-01-20 22:01:29Z ebakke $
*
* Copyright (c) 2010, Novartis Institutes for BioMedical Research Inc.
* All rights reserved.
@@ -175,6 +174,31 @@ public class BasicMolecule2Tests extends GraphMolTest {
assertEquals(3,mvv.get(0).size());
}
@Test public void testDetectChemistryProblems() {
ROMol m2;
m2 = RWMol.MolFromSmiles("CFCc1cc1",0,false);
MolSanitizeException_Vect res;
res = RDKFuncs.detectChemistryProblems(m2);
assertEquals(res.size(),2);
assertEquals(res.get(0).getType(),"AtomValenceException");
assertEquals(res.get(1).getType(),"KekulizeException");
// fun with swig and C++ inheritance:
KekulizeException ke = KekulizeException.dynamic_cast(res.get(0));
assertNull(ke);
ke = KekulizeException.dynamic_cast(res.get(1));
assertNotNull(ke);
assertEquals(ke.getAtomIndices().size(),3);
assertEquals(ke.getAtomIndices().get(0),3);
assertEquals(ke.getAtomIndices().get(1),4);
assertEquals(ke.getAtomIndices().get(2),5);
AtomValenceException ave = AtomValenceException.dynamic_cast(res.get(0));
assertNotNull(ave);
assertEquals(ave.getAtomIdx(),1);
}
public static void main(String args[]) {
org.junit.runner.JUnitCore.main("org.RDKit.BasicMolecule2Tests");
}

View File

@@ -1,5 +1,4 @@
/*
* $Id: BasicMoleculeTests.java 131 2011-01-20 22:01:29Z ebakke $
*
* Copyright (c) 2010, Novartis Institutes for BioMedical Research Inc.
* All rights reserved.