diff --git a/Code/GraphMol/Atom.cpp b/Code/GraphMol/Atom.cpp index be5b8fd4b..1ef4d098a 100644 --- a/Code/GraphMol/Atom.cpp +++ b/Code/GraphMol/Atom.cpp @@ -277,7 +277,7 @@ int Atom::calcExplicitValence(bool strict) { << effectiveValence << ", is greater than permitted"; std::string msg = errout.str(); BOOST_LOG(rdErrorLog) << msg << std::endl; - throw MolSanitizeException(msg); + throw AtomValenceException(msg, getIdx()); } } d_explicitValence = res; @@ -391,7 +391,7 @@ int Atom::calcImplicitValence(bool strict) { << " not equal to any accepted valence\n"; std::string msg = errout.str(); BOOST_LOG(rdErrorLog) << msg << std::endl; - throw MolSanitizeException(msg); + throw AtomValenceException(msg, getIdx()); } res = 0; } @@ -416,7 +416,7 @@ int Atom::calcImplicitValence(bool strict) { << " greater than permitted"; std::string msg = errout.str(); BOOST_LOG(rdErrorLog) << msg << std::endl; - throw MolSanitizeException(msg); + throw AtomValenceException(msg, getIdx()); } else { res = 0; } diff --git a/Code/GraphMol/Kekulize.cpp b/Code/GraphMol/Kekulize.cpp index e0dc004f4..41aae9c8c 100644 --- a/Code/GraphMol/Kekulize.cpp +++ b/Code/GraphMol/Kekulize.cpp @@ -103,7 +103,7 @@ void markDbondCands(RWMol &mol, const INT_VECT &allAtms, errout << "Aromatic bonds on non aromatic atom " << at->getIdx(); std::string msg = errout.str(); BOOST_LOG(rdErrorLog) << msg << std::endl; - throw MolSanitizeException(msg); + throw AtomKekulizeException(msg, at->getIdx()); } ++beg; } @@ -466,16 +466,20 @@ void kekulizeFused(RWMol &mol, const VECT_INT_VECT ås, // we exhausted all option (or crossed the allowed // number of backTracks) and we still need to backtrack // can't kekulize this thing + std::vector problemAtoms; std::ostringstream errout; errout << "Can't kekulize mol."; errout << " Unkekulized atoms:"; for (unsigned int i = 0; i < nats; ++i) { - if (dBndCands[i]) errout << " " << i; + if (dBndCands[i]) { + errout << " " << i; + problemAtoms.push_back(i); + } } errout << std::endl; std::string msg = errout.str(); BOOST_LOG(rdErrorLog) << msg << std::endl; - throw MolSanitizeException(msg); + throw KekulizeException(msg, problemAtoms); } } } // namespace @@ -592,7 +596,7 @@ void Kekulize(RWMol &mol, bool markAtomsBonds, unsigned int maxBackTracks) { errout << "non-ring atom " << (*ai)->getIdx() << " marked aromatic"; std::string msg = errout.str(); BOOST_LOG(rdErrorLog) << msg << std::endl; - throw MolSanitizeException(msg); + throw AtomKekulizeException(msg, (*ai)->getIdx()); } (*ai)->setIsAromatic(false); // make sure "explicit" Hs on things like pyrroles don't hang around @@ -619,7 +623,7 @@ void Kekulize(RWMol &mol, bool markAtomsBonds, unsigned int maxBackTracks) { << ": " << val << "!=" << valences[i] << std::endl; std::string msg = errout.str(); BOOST_LOG(rdErrorLog) << msg << std::endl; - throw MolSanitizeException(msg); + throw AtomKekulizeException(msg, (*ai)->getIdx()); } i++; } diff --git a/Code/GraphMol/MolOps.cpp b/Code/GraphMol/MolOps.cpp index a12360b4d..ea959c20e 100644 --- a/Code/GraphMol/MolOps.cpp +++ b/Code/GraphMol/MolOps.cpp @@ -383,6 +383,48 @@ void sanitizeMol(RWMol &mol, unsigned int &operationThatFailed, operationThatFailed = 0; } +std::vector> detectChemistryProblems( + const ROMol &imol, unsigned int sanitizeOps) { + RWMol mol(imol); + std::vector> res; + + // clear out any cached properties + mol.clearComputedProps(); + + int operation; + operation = SANITIZE_CLEANUP; + if (sanitizeOps & operation) { + // clean up things like nitro groups + cleanUp(mol); + } + + // update computed properties on atoms and bonds: + operation = SANITIZE_PROPERTIES; + if (sanitizeOps & operation) { + for (auto &atom : mol.atoms()) { + try { + bool strict = true; + atom->updatePropertyCache(strict); + } catch (const MolSanitizeException &e) { + res.emplace_back(e.copy()); + } + } + } else { + mol.updatePropertyCache(false); + } + + // kekulizations + operation = SANITIZE_KEKULIZE; + if (sanitizeOps & operation) { + try { + Kekulize(mol); + } catch (const MolSanitizeException &e) { + res.emplace_back(e.copy()); + } + } + return res; +} + std::vector getMolFrags(const ROMol &mol, bool sanitizeFrags, INT_VECT *frags, VECT_INT_VECT *fragsMolAtomMapping, diff --git a/Code/GraphMol/MolOps.h b/Code/GraphMol/MolOps.h index 9dd114ae8..4bfaefd6e 100644 --- a/Code/GraphMol/MolOps.h +++ b/Code/GraphMol/MolOps.h @@ -20,6 +20,7 @@ #include #include #include +#include "SanitException.h" RDKIT_GRAPHMOL_EXPORT extern const int ci_LOCAL_INF; namespace RDKit { @@ -378,24 +379,21 @@ typedef enum { \param mol : the RWMol to be cleaned \param operationThatFailed : the first (if any) sanitization operation that - fails is set here. + fails is set here. The values are taken from the \c SanitizeFlags - enum. - On success, the value is \c - SanitizeFlags::SANITIZE_NONE + enum. On success, the value is \c + SanitizeFlags::SANITIZE_NONE \param sanitizeOps : the bits here are used to set which sanitization - operations are carried - out. The elements of the \c SanitizeFlags enum define - the operations. + operations are carried out. The elements of the \c + SanitizeFlags enum define the operations. Notes: - - If there is a failure in the sanitization, a \c SanitException + - If there is a failure in the sanitization, a \c MolSanitizeException will be thrown. - in general the user of this function should cast the molecule following - this - function to a ROMol, so that new atoms and bonds cannot be added to the - molecule and screw up the sanitizing that has been done here + this function to a ROMol, so that new atoms and bonds cannot be added to + the molecule and screw up the sanitizing that has been done here */ RDKIT_GRAPHMOL_EXPORT void sanitizeMol(RWMol &mol, unsigned int &operationThatFailed, @@ -403,6 +401,32 @@ RDKIT_GRAPHMOL_EXPORT void sanitizeMol(RWMol &mol, //! \overload RDKIT_GRAPHMOL_EXPORT void sanitizeMol(RWMol &mol); +//! \brief Identifies chemistry problems (things that don't make chemical sense) +//! in a molecule +/*! + This functions uses the operations in sanitizeMol but does not change + the input structure and returns a list of the problems encountered instead of + stopping at the first failure, + + The problems this looks for come from the sanitization operations: + -# mol.updatePropertyCache() : Unreasonable valences + -# MolOps::Kekulize() : Unkekulizable ring systems, aromatic atoms not in + rings, aromatic bonds to non-aromatic atoms. + + \param mol : the RWMol to be cleaned + + \param sanitizeOps : the bits here are used to set which sanitization + operations are carried out. The elements of the \c + SanitizeFlags enum define the operations. + + \return a vector of \c MolSanitizeException values that indicate what + problems were encountered + +*/ +RDKIT_GRAPHMOL_EXPORT +std::vector> detectChemistryProblems( + const ROMol &mol, unsigned int sanitizeOps = SANITIZE_ALL); + //! Possible aromaticity models /*! - \c AROMATICITY_DEFAULT at the moment always uses \c AROMATICITY_RDKIT diff --git a/Code/GraphMol/SanitException.h b/Code/GraphMol/SanitException.h index d53c719d4..c686cd49e 100644 --- a/Code/GraphMol/SanitException.h +++ b/Code/GraphMol/SanitException.h @@ -1,5 +1,5 @@ // -// Copyright (C) 2002-2006 Rational Discovery LLC +// Copyright (C) 2002-2019 Greg Landrum and Rational Discovery LLC // // @@ All Rights Reserved @@ // This file is part of the RDKit. @@ -9,8 +9,8 @@ // #include -#ifndef _RD_SANITEXCEPTION_H -#define _RD_SANITEXCEPTION_H +#ifndef RD_SANITEXCEPTION_H +#define RD_SANITEXCEPTION_H #include #include @@ -26,14 +26,93 @@ namespace RDKit { //! class for flagging sanitization errors class RDKIT_GRAPHMOL_EXPORT MolSanitizeException : public std::exception { public: - MolSanitizeException(const char *msg) : _msg(msg){}; - MolSanitizeException(const std::string &msg) : _msg(msg){}; - const char *message() const { return _msg.c_str(); }; - ~MolSanitizeException() throw(){}; + MolSanitizeException(const char *msg) : d_msg(msg){}; + MolSanitizeException(const std::string &msg) : d_msg(msg){}; + MolSanitizeException(const MolSanitizeException &other) + : d_msg(other.d_msg){}; + virtual const char *message() const { return d_msg.c_str(); }; + virtual ~MolSanitizeException() throw(){}; + virtual MolSanitizeException *copy() const { + return new MolSanitizeException(*this); + }; + virtual std::string getType() const { return "MolSanitizeException"; }; - private: - std::string _msg; + protected: + std::string d_msg; }; + +class RDKIT_GRAPHMOL_EXPORT AtomSanitizeException + : public MolSanitizeException { + public: + AtomSanitizeException(const char *msg, unsigned int atomIdx) + : MolSanitizeException(msg), d_atomIdx(atomIdx){}; + AtomSanitizeException(const std::string &msg, unsigned int atomIdx) + : MolSanitizeException(msg), d_atomIdx(atomIdx){}; + AtomSanitizeException(const AtomSanitizeException &other) + : MolSanitizeException(other), d_atomIdx(other.d_atomIdx){}; + unsigned int getAtomIdx() const { return d_atomIdx; }; + virtual ~AtomSanitizeException() throw(){}; + virtual MolSanitizeException *copy() const { + return new AtomSanitizeException(*this); + }; + virtual std::string getType() const { return "AtomSanitizeException"; }; + + protected: + unsigned int d_atomIdx; +}; + +class RDKIT_GRAPHMOL_EXPORT AtomValenceException + : public AtomSanitizeException { + public: + AtomValenceException(const char *msg, unsigned int atomIdx) + : AtomSanitizeException(msg, atomIdx){}; + AtomValenceException(const std::string &msg, unsigned int atomIdx) + : AtomSanitizeException(msg, atomIdx){}; + AtomValenceException(const AtomValenceException &other) + : AtomSanitizeException(other){}; + virtual ~AtomValenceException() throw(){}; + MolSanitizeException *copy() const { + return new AtomValenceException(*this); + }; + std::string getType() const { return "AtomValenceException"; }; +}; + +class RDKIT_GRAPHMOL_EXPORT AtomKekulizeException + : public AtomSanitizeException { + public: + AtomKekulizeException(const char *msg, unsigned int atomIdx) + : AtomSanitizeException(msg, atomIdx){}; + AtomKekulizeException(const std::string &msg, unsigned int atomIdx) + : AtomSanitizeException(msg, atomIdx){}; + AtomKekulizeException(const AtomKekulizeException &other) + : AtomSanitizeException(other){}; + virtual ~AtomKekulizeException() throw(){}; + MolSanitizeException *copy() const { + return new AtomKekulizeException(*this); + }; + std::string getType() const { return "AtomKekulizeException"; }; +}; + +class RDKIT_GRAPHMOL_EXPORT KekulizeException : public MolSanitizeException { + public: + KekulizeException(const char *msg, const std::vector &indices) + : MolSanitizeException(msg), d_atomIndices(indices){}; + KekulizeException(const std::string &msg, + const std::vector &indices) + : MolSanitizeException(msg), d_atomIndices(indices){}; + KekulizeException(const KekulizeException &other) + : MolSanitizeException(other), d_atomIndices(other.d_atomIndices){}; + const std::vector &getAtomIndices() const { + return d_atomIndices; + }; + virtual ~KekulizeException() throw(){}; + MolSanitizeException *copy() const { return new KekulizeException(*this); }; + std::string getType() const { return "KekulizeException"; }; + + protected: + std::vector d_atomIndices; +}; + } // namespace RDKit #endif diff --git a/Code/GraphMol/Wrap/MolOps.cpp b/Code/GraphMol/Wrap/MolOps.cpp index a66935904..f49e697f5 100644 --- a/Code/GraphMol/Wrap/MolOps.cpp +++ b/Code/GraphMol/Wrap/MolOps.cpp @@ -351,6 +351,7 @@ void addRecursiveQuery(ROMol &mol, const ROMol &query, unsigned int atomIdx, oAt->expandQuery(q, Queries::COMPOSITE_AND); } } + MolOps::SanitizeFlags sanitizeMol(ROMol &mol, boost::uint64_t sanitizeOps, bool catchErrors) { RWMol &wmol = static_cast(mol); @@ -766,6 +767,16 @@ ROMol *adjustQueryPropertiesHelper(const ROMol &mol, python::object pyparams) { return MolOps::adjustQueryProperties(mol, ¶ms); } +python::tuple detectChemistryProblemsHelper(const ROMol &mol, + unsigned int sanitizeOps) { + auto probs = MolOps::detectChemistryProblems(mol, sanitizeOps); + python::list res; + for (const auto &exc_ptr : probs) { + res.append(boost::shared_ptr(exc_ptr->copy())); + } + return python::tuple(res); +} + ROMol *replaceCoreHelper(const ROMol &mol, const ROMol &core, python::object match, bool replaceDummies, bool labelByIndex, bool requireDummyMatch = false) { @@ -2258,6 +2269,11 @@ A note on the flags controlling which atoms/bonds are modified: \n\ (python::arg("mol"), python::arg("params") = python::object()), docString.c_str(), python::return_value_policy()); + docString = "checks for chemistry problems"; + python::def( + "DetectChemistryProblems", detectChemistryProblemsHelper, + (python::arg("mol"), python::arg("sanitizeOps") = MolOps::SANITIZE_ALL), + docString.c_str()); }; }; } // namespace RDKit diff --git a/Code/GraphMol/Wrap/rdchem.cpp b/Code/GraphMol/Wrap/rdchem.cpp index 021ab8c51..570a7202f 100644 --- a/Code/GraphMol/Wrap/rdchem.cpp +++ b/Code/GraphMol/Wrap/rdchem.cpp @@ -37,12 +37,6 @@ void rdExceptionTranslator(RDKit::ConformerException const &x) { PyErr_SetString(PyExc_ValueError, "Bad Conformer Id"); } -void rdSanitExceptionTranslator(RDKit::MolSanitizeException const &x) { - std::ostringstream ss; - ss << "Sanitization error: " << x.message(); - PyErr_SetString(PyExc_ValueError, ss.str().c_str()); -} - void wrap_table(); void wrap_atom(); void wrap_conformer(); @@ -116,14 +110,111 @@ void WrapLogs() { if (rdWarningLog != nullptr) rdWarningLog->SetTee(warning); } +python::tuple getAtomIndicesHelper(const KekulizeException &self) { + python::list res; + for (auto idx : self.getAtomIndices()) { + res.append(idx); + } + return python::tuple(res); +} + +PyObject *molSanitizeExceptionType = nullptr; +PyObject *atomSanitizeExceptionType = nullptr; +PyObject *atomValenceExceptionType = nullptr; +PyObject *atomKekulizeExceptionType = nullptr; +PyObject *kekulizeExceptionType = nullptr; + +// pattern from here: +// https://stackoverflow.com/questions/11448735/boostpython-export-custom-exception-and-inherit-from-pythons-exception +template +void sanitExceptionTranslator(const EXC_TYPE &x, PyObject *pyExcType) { + PRECONDITION(pyExcType != nullptr, "global type not initialized"); + python::object pyExcInstance(python::handle<>(python::borrowed(pyExcType))); + pyExcInstance.attr("cause") = x; + PyErr_SetString(pyExcType, x.message()); +} + +// pattern from here: +// https://stackoverflow.com/questions/9620268/boost-python-custom-exception-class +PyObject *createExceptionClass(const char *name, + PyObject *baseTypeObj = PyExc_ValueError) { + std::string scopeName = + python::extract(python::scope().attr("__name__")); + std::string qualifiedName0 = scopeName + "." + name; + char *qualifiedName1 = const_cast(qualifiedName0.c_str()); + + PyObject *typeObj = PyErr_NewException(qualifiedName1, baseTypeObj, 0); + if (!typeObj) python::throw_error_already_set(); + python::scope().attr(name) = python::handle<>(python::borrowed(typeObj)); + return typeObj; +} + BOOST_PYTHON_MODULE(rdchem) { python::scope().attr("__doc__") = "Module containing the core chemistry functionality of the RDKit"; RegisterListConverter(); RegisterListConverter(); rdkit_import_array(); + + // this is one of those parts where I think I wish that I knew how to do + // template meta-programming + python::class_("_cppMolSanitizeException", + "exception arising from sanitization", + python::no_init) + .def("Message", &MolSanitizeException::message) + .def("GetType", &MolSanitizeException::getType); + python::register_ptr_to_python>(); + molSanitizeExceptionType = createExceptionClass("MolSanitizeException"); python::register_exception_translator( - &rdSanitExceptionTranslator); + [&](const MolSanitizeException &exc) { + sanitExceptionTranslator(exc, molSanitizeExceptionType); + }); + + python::class_>( + "_cppAtomSanitizeException", "exception arising from sanitization", + python::no_init) + .def("GetAtomIdx", &AtomSanitizeException::getAtomIdx); + python::register_ptr_to_python>(); + atomSanitizeExceptionType = + createExceptionClass("AtomSanitizeException", molSanitizeExceptionType); + python::register_exception_translator( + [&](const AtomSanitizeException &exc) { + sanitExceptionTranslator(exc, atomSanitizeExceptionType); + }); + + python::class_>( + "_cppAtomValenceException", "exception arising from sanitization", + python::no_init); + python::register_ptr_to_python>(); + atomValenceExceptionType = + createExceptionClass("AtomValenceException", atomSanitizeExceptionType); + python::register_exception_translator( + [&](const AtomValenceException &exc) { + sanitExceptionTranslator(exc, atomValenceExceptionType); + }); + + python::class_>( + "_cppAtomKekulizeException", "exception arising from sanitization", + python::no_init); + python::register_ptr_to_python>(); + atomKekulizeExceptionType = + createExceptionClass("AtomKekulizeException", atomSanitizeExceptionType); + python::register_exception_translator( + [&](const AtomKekulizeException &exc) { + sanitExceptionTranslator(exc, atomKekulizeExceptionType); + }); + + python::class_>( + "_cppAtomKekulizeException", "exception arising from sanitization", + python::no_init) + .def("GetAtomIndices", &getAtomIndicesHelper); + python::register_ptr_to_python>(); + kekulizeExceptionType = + createExceptionClass("KekulizeException", molSanitizeExceptionType); + python::register_exception_translator( + [&](const KekulizeException &exc) { + sanitExceptionTranslator(exc, kekulizeExceptionType); + }); python::def("WrapLogs", WrapLogs, "Wrap the internal RDKit streams so they go to python's " diff --git a/Code/GraphMol/Wrap/rough_test.py b/Code/GraphMol/Wrap/rough_test.py index 9f3580154..a09c08434 100644 --- a/Code/GraphMol/Wrap/rough_test.py +++ b/Code/GraphMol/Wrap/rough_test.py @@ -453,7 +453,7 @@ class TestCase(unittest.TestCase): smi1 = Chem.MolToSmiles(m) smi2 = Chem.MolToSmiles(m2) self.assertTrue(smi1 == smi2) - + def test16Props(self): m = Chem.MolFromSmiles('C1=CN=CC=C1') self.assertTrue(not m.HasProp('prop1')) @@ -2106,7 +2106,7 @@ CAS<~> self.assertTrue(ri.IsBondInRingOfSize(2, 4)) - if hasattr(Chem,'FindRingFamilies'): + if hasattr(Chem,'FindRingFamilies'): ri = m.GetRingInfo() self.assertFalse(ri.AreRingFamiliesInitialized()) Chem.FindRingFamilies(m) @@ -5020,16 +5020,16 @@ width='200px' height='200px' > def testAssignStereochemistryFrom3D(self): def _stereoTester(mol,expectedCIP,expectedStereo): - mol.UpdatePropertyCache() - self.assertEqual(mol.GetNumAtoms(),9) - self.assertFalse(mol.GetAtomWithIdx(1).HasProp("_CIPCode")) - self.assertEqual(mol.GetBondWithIdx(3).GetStereo(),Chem.BondStereo.STEREONONE) - for bond in mol.GetBonds(): - bond.SetBondDir(Chem.BondDir.NONE) - Chem.AssignStereochemistryFrom3D(mol) - self.assertTrue(mol.GetAtomWithIdx(1).HasProp("_CIPCode")) - self.assertEqual(mol.GetAtomWithIdx(1).GetProp("_CIPCode"),expectedCIP) - self.assertEqual(mol.GetBondWithIdx(3).GetStereo(),expectedStereo) + mol.UpdatePropertyCache() + self.assertEqual(mol.GetNumAtoms(),9) + self.assertFalse(mol.GetAtomWithIdx(1).HasProp("_CIPCode")) + self.assertEqual(mol.GetBondWithIdx(3).GetStereo(),Chem.BondStereo.STEREONONE) + for bond in mol.GetBonds(): + bond.SetBondDir(Chem.BondDir.NONE) + Chem.AssignStereochemistryFrom3D(mol) + self.assertTrue(mol.GetAtomWithIdx(1).HasProp("_CIPCode")) + self.assertEqual(mol.GetAtomWithIdx(1).GetProp("_CIPCode"),expectedCIP) + self.assertEqual(mol.GetBondWithIdx(3).GetStereo(),expectedStereo) fileN = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'test_data', 'stereochem.sdf') @@ -5041,8 +5041,8 @@ width='200px' height='200px' > ("S",Chem.BondStereo.STEREOE), ) for i,mol in enumerate(suppl): - cip,stereo = expected[i] - _stereoTester(mol,cip,stereo) + cip,stereo = expected[i] + _stereoTester(mol,cip,stereo) def testGitHub2082(self): ctab=""" @@ -5096,9 +5096,9 @@ M END def testGitHub1985(self): # simple check, this used to throw an exception try: - Chem.MolToSmarts(Chem.MolFromSmarts("[C@]")) + Chem.MolToSmarts(Chem.MolFromSmarts("[C@]")) except: - self.fail("[C@] caused an exception when roundtripping smarts") + self.fail("[C@] caused an exception when roundtripping smarts") def testGetEnhancedStereo(self): @@ -5285,7 +5285,7 @@ M END for atom in m.GetAtoms(): bv.SetBit(atom.GetIdx()) atom.SetExplicitBitVectProp("prop", bv) - + for atom in m.GetAtoms(): bv = atom.GetExplicitBitVectProp("prop") self.assertTrue(bv.GetBit(atom.GetIdx())) @@ -5302,7 +5302,7 @@ M END for atom in m.GetAtoms(): if atom.GetIdx() == 0: atom.SetExplicitBitVectProp("prop", bv) - + l = tuple([x.GetIdx() for x in m.GetAtomsMatchingQuery(qa)]) self.assertEqual(l, (0,)) @@ -5311,37 +5311,37 @@ M END bv = DataStructs.ExplicitBitVect(4) bv.SetBit(atom.GetIdx()) atom.SetExplicitBitVectProp("prop", bv) - + sma = Chem.MolFromSmarts("C") for atom in sma.GetAtoms(): - bv = DataStructs.ExplicitBitVect(4) - bv.SetBit(1) - qa = rdqueries.HasBitVectPropWithValueQueryAtom("prop", bv, tolerance=0.0) - atom.ExpandQuery(qa) + bv = DataStructs.ExplicitBitVect(4) + bv.SetBit(1) + qa = rdqueries.HasBitVectPropWithValueQueryAtom("prop", bv, tolerance=0.0) + atom.ExpandQuery(qa) res = m.GetSubstructMatches(sma) self.assertEqual(res, ((1,),)) sma = Chem.MolFromSmarts("C") for atom in sma.GetAtoms(): - bv = DataStructs.ExplicitBitVect(4) - bv.SetBit(0) - qa = rdqueries.HasBitVectPropWithValueQueryAtom("prop", bv, tolerance=0.0) - atom.ExpandQuery(qa) + bv = DataStructs.ExplicitBitVect(4) + bv.SetBit(0) + qa = rdqueries.HasBitVectPropWithValueQueryAtom("prop", bv, tolerance=0.0) + atom.ExpandQuery(qa) res = m.GetSubstructMatches(sma) self.assertEqual(res, ((0,),)) - + sma = Chem.MolFromSmarts("C") for atom in sma.GetAtoms(): - bv = DataStructs.ExplicitBitVect(4) - bv.SetBit(0) - qa = rdqueries.HasBitVectPropWithValueQueryAtom("prop", bv, tolerance=1.0) - atom.ExpandQuery(qa) + bv = DataStructs.ExplicitBitVect(4) + bv.SetBit(0) + qa = rdqueries.HasBitVectPropWithValueQueryAtom("prop", bv, tolerance=1.0) + atom.ExpandQuery(qa) res = m.GetSubstructMatches(sma) self.assertEqual(res, ((0,),(1,))) - + def testGithub2441(self): m = Chem.MolFromSmiles("CC") conf = Chem.Conformer(2) @@ -5387,7 +5387,7 @@ C1C(Cl)CCCC duff2 l = [x for x in suppl2] self.assertEqual(len(l),7) self.assertTrue(l[6] is None) - + sdf=b""" Mrv1810 06051911332D @@ -5422,7 +5422,7 @@ $$$$ self.assertEqual(len(l),3) self.assertTrue(l[1] is None) self.assertTrue(l[2] is None) - + from io import BytesIO sio = BytesIO(sdf) suppl3 = Chem.ForwardSDMolSupplier(sio) @@ -5430,7 +5430,7 @@ $$$$ self.assertEqual(len(l),3) self.assertTrue(l[1] is None) self.assertTrue(l[2] is None) - + sdf=b""" Mrv1810 06051911332D @@ -5464,7 +5464,7 @@ M END self.assertEqual(len(l),2) self.assertTrue(l[0] is not None) self.assertTrue(l[1] is not None) - + from io import BytesIO sio = BytesIO(sdf) suppl3 = Chem.ForwardSDMolSupplier(sio) @@ -5499,6 +5499,48 @@ H 0.635000 0.635000 0.635000 self.assertEqual(Chem.MolToXYZBlock(mol), xyzblock_expected) + def testSanitizationExceptionBasics(self): + try: + Chem.SanitizeMol(Chem.MolFromSmiles('CFC',sanitize=False)) + except Chem.AtomValenceException as exc: + self.assertEqual(exc.cause.GetAtomIdx(),1) + else: + self.assertFalse(True) + + try: + Chem.SanitizeMol(Chem.MolFromSmiles('c1cc1',sanitize=False)) + except Chem.KekulizeException as exc: + self.assertEqual(exc.cause.GetAtomIndices(),(0,1,2)) + else: + self.assertFalse(True) + + + def testSanitizationExceptionHierarchy(self): + with self.assertRaises(Chem.AtomValenceException): + Chem.SanitizeMol(Chem.MolFromSmiles('CFC',sanitize=False)) + with self.assertRaises(Chem.AtomSanitizeException): + Chem.SanitizeMol(Chem.MolFromSmiles('CFC',sanitize=False)) + with self.assertRaises(Chem.MolSanitizeException): + Chem.SanitizeMol(Chem.MolFromSmiles('CFC',sanitize=False)) + with self.assertRaises(ValueError): + Chem.SanitizeMol(Chem.MolFromSmiles('CFC',sanitize=False)) + + with self.assertRaises(Chem.KekulizeException): + Chem.SanitizeMol(Chem.MolFromSmiles('c1cc1',sanitize=False)) + with self.assertRaises(Chem.MolSanitizeException): + Chem.SanitizeMol(Chem.MolFromSmiles('c1cc1',sanitize=False)) + with self.assertRaises(ValueError): + Chem.SanitizeMol(Chem.MolFromSmiles('c1cc1', sanitize=False)) + + def testDetectChemistryProblems(self): + m = Chem.MolFromSmiles('CFCc1cc1ClC',sanitize=False) + ps = Chem.DetectChemistryProblems(m) + self.assertEqual(len(ps),3) + self.assertEqual([x.GetType() for x in ps],['AtomValenceException','AtomValenceException','KekulizeException']) + self.assertEqual(ps[0].GetAtomIdx(),1) + self.assertEqual(ps[1].GetAtomIdx(),6) + self.assertEqual(ps[2].GetAtomIndices(),(3,4,5)) + def testGithub2611(self): mol = Chem.MolFromSmiles('ONCS.ONCS') for atom in mol.GetAtoms(): @@ -5526,7 +5568,6 @@ H 0.635000 0.635000 0.635000 self.assertEqual(order1,order3) self.assertEqual(order2,order4) - if __name__ == '__main__': if "RDTESTCASE" in os.environ: suite = unittest.TestSuite() diff --git a/Code/GraphMol/catch_tests.cpp b/Code/GraphMol/catch_tests.cpp index b4581a2bc..7f9959ce5 100644 --- a/Code/GraphMol/catch_tests.cpp +++ b/Code/GraphMol/catch_tests.cpp @@ -340,6 +340,82 @@ M END)CTAB"; } } +TEST_CASE("Specialized exceptions for sanitization errors", "[molops]") { + SECTION("AtomValenceException") { + std::vector> smiles = { + {"C=n1ccnc1", 1}, {"CCO(C)C", 2}}; + for (auto pr : smiles) { + CHECK_THROWS_AS(SmilesToMol(pr.first), AtomValenceException); + try { + auto m = SmilesToMol(pr.first); + } catch (const AtomValenceException &e) { + CHECK(e.getType() == "AtomValenceException"); + CHECK(e.getAtomIdx() == pr.second); + } + } + } + SECTION("AtomKekulizeException") { + std::vector> smiles = { + {"CCcc", 2}, {"C1:c:CC1", 0}}; + for (auto pr : smiles) { + CHECK_THROWS_AS(SmilesToMol(pr.first), AtomKekulizeException); + try { + auto m = SmilesToMol(pr.first); + } catch (const AtomKekulizeException &e) { + CHECK(e.getType() == "AtomKekulizeException"); + CHECK(e.getAtomIdx() == pr.second); + } + } + } + SECTION("KekulizeException") { + std::vector>> smiles = { + {"c1cccc1", {0, 1, 2, 3, 4}}, {"Cc1cc1", {1, 2, 3}}}; + for (auto pr : smiles) { + CHECK_THROWS_AS(SmilesToMol(pr.first), KekulizeException); + try { + auto m = SmilesToMol(pr.first); + } catch (const KekulizeException &e) { + CHECK(e.getType() == "KekulizeException"); + CHECK(e.getAtomIndices() == pr.second); + } + } + } +} + +TEST_CASE("detectChemistryProblems", "[molops]") { + SECTION("Basics") { + SmilesParserParams ps; + ps.sanitize = false; + auto m = std::unique_ptr(SmilesToMol("CO(C)CFCc1cc1", ps)); + REQUIRE(m); + auto res = MolOps::detectChemistryProblems(*m); + REQUIRE(res.size() == 3); + + CHECK(res[0]->getType() == "AtomValenceException"); + REQUIRE(dynamic_cast(res[0].get())); + CHECK(dynamic_cast(res[0].get())->getAtomIdx() == + 1); + + CHECK(res[1]->getType() == "AtomValenceException"); + REQUIRE(dynamic_cast(res[1].get())); + CHECK(dynamic_cast(res[1].get())->getAtomIdx() == + 4); + + CHECK(res[2]->getType() == "KekulizeException"); + REQUIRE(dynamic_cast(res[2].get())); + CHECK(dynamic_cast(res[2].get())->getAtomIndices() == + std::vector({6, 7, 8})); + } + SECTION("No problems") { + SmilesParserParams ps; + ps.sanitize = false; + auto m = std::unique_ptr(SmilesToMol("c1ccccc1", ps)); + REQUIRE(m); + auto res = MolOps::detectChemistryProblems(*m); + REQUIRE(res.size() == 0); + } +} + TEST_CASE( "github #2606: Bad valence corrections on Pb, Sn" "[bug, molops]") { @@ -433,4 +509,4 @@ M END CHECK(mol->getAtomWithIdx(0)->getFormalCharge() == 2); CHECK(mol->getAtomWithIdx(0)->getTotalNumHs() == 0); } -} \ No newline at end of file +} diff --git a/Code/JavaWrappers/MolOps.i b/Code/JavaWrappers/MolOps.i index 6fed6a9e2..40fefa4df 100644 --- a/Code/JavaWrappers/MolOps.i +++ b/Code/JavaWrappers/MolOps.i @@ -1,5 +1,4 @@ /* -* $Id$ * * Copyright (c) 2010, Novartis Institutes for BioMedical Research Inc. * All rights reserved. @@ -35,10 +34,12 @@ #include %} +%template(MolSanitizeException_Vect) std::vector>; %newobject RDKit::MolOps::renumberAtoms; %newobject RDKit::MolOps::removeHs; %newobject RDKit::MolOps::addHs; +%ignore RDKit::MolOps::detectChemistryProblems; %include %ignore RDKit::MolOps::sanitizeMol(RWMol &,unsigned int &,unsigned int &); @@ -53,4 +54,12 @@ } return static_cast(opThatFailed); }; + std::vector> detectChemistryProblems(RDKit::ROMol &mol,int sanitizeOps=RDKit::MolOps::SANITIZE_ALL){ + std::vector> res; + auto probs = RDKit::MolOps::detectChemistryProblems(mol,sanitizeOps); + for(const auto &exc_ptr : probs) { + res.push_back(boost::shared_ptr(exc_ptr->copy())); + } + return res; + }; %} diff --git a/Code/JavaWrappers/SanitException.i b/Code/JavaWrappers/SanitException.i index 42f38d2a0..eb8aee976 100644 --- a/Code/JavaWrappers/SanitException.i +++ b/Code/JavaWrappers/SanitException.i @@ -38,3 +38,68 @@ %include +#ifdef SWIGJAVA +// support upcasting from MolSanitizeException to the other exception types so that +// we can do something with the output of detectChemistryProblems() + +// approach from: http://www.swig.org/Doc3.0/Java.html#Java_adding_downcasts +%exception RDKit::AtomSanitizeException::dynamic_cast(RDKit::MolSanitizeException) { + $action + if(!result){ + jclass excep = jenv->FindClass("java/lang/ClassCastException"); + if (excep) { + jenv->ThrowNew(excep, "dynamic_cast exception"); + } + } +} +%extend RDKit::AtomSanitizeException { + static RDKit::AtomSanitizeException *dynamic_cast(RDKit::MolSanitizeException *mse){ + return dynamic_cast(mse); + } +}; + +%exception RDKit::AtomValenceException::dynamic_cast(RDKit::MolSanitizeException) { + $action + if(!result){ + jclass excep = jenv->FindClass("java/lang/ClassCastException"); + if (excep) { + jenv->ThrowNew(excep, "dynamic_cast exception"); + } + } +} +%extend RDKit::AtomValenceException { + static RDKit::AtomValenceException *dynamic_cast(RDKit::MolSanitizeException *mse){ + return dynamic_cast(mse); + } +}; + +%exception RDKit::AtomKekulizeException::dynamic_cast(RDKit::MolSanitizeException) { + $action + if(!result){ + jclass excep = jenv->FindClass("java/lang/ClassCastException"); + if (excep) { + jenv->ThrowNew(excep, "dynamic_cast exception"); + } + } +} +%extend RDKit::AtomKekulizeException { + static RDKit::AtomKekulizeException *dynamic_cast(RDKit::MolSanitizeException *mse){ + return dynamic_cast(mse); + } +}; + +%exception RDKit::KekulizeException::dynamic_cast(RDKit::MolSanitizeException) { + $action + if(!result){ + jclass excep = jenv->FindClass("java/lang/ClassCastException"); + if (excep) { + jenv->ThrowNew(excep, "dynamic_cast exception"); + } + } +} +%extend RDKit::KekulizeException { + static RDKit::KekulizeException *dynamic_cast(RDKit::MolSanitizeException *mse){ + return dynamic_cast(mse); + } +}; +#endif \ No newline at end of file diff --git a/Code/JavaWrappers/csharp_wrapper/GraphMolCSharp.i b/Code/JavaWrappers/csharp_wrapper/GraphMolCSharp.i index b199b3e8a..50b79a315 100644 --- a/Code/JavaWrappers/csharp_wrapper/GraphMolCSharp.i +++ b/Code/JavaWrappers/csharp_wrapper/GraphMolCSharp.i @@ -178,6 +178,10 @@ typedef unsigned long long int uintmax_t; %shared_ptr(RDKit::QueryBond) %shared_ptr(RDKit::QueryOps) %shared_ptr(RDKit::MolSanitizeException) +%shared_ptr(RDKit::AtomSanitizeException) +%shared_ptr(RDKit::AtomValenceException) +%shared_ptr(RDKit::AtomKekulizeException) +%shared_ptr(RDKit::KekulizeException) %shared_ptr(RDKit::SmilesParseException) %shared_ptr(RDKit::RingInfo) %shared_ptr(RDKit::ChemicalReaction) diff --git a/Code/JavaWrappers/gmwrapper/GraphMolJava.i b/Code/JavaWrappers/gmwrapper/GraphMolJava.i index 1d88dd843..19e7feb64 100644 --- a/Code/JavaWrappers/gmwrapper/GraphMolJava.i +++ b/Code/JavaWrappers/gmwrapper/GraphMolJava.i @@ -130,6 +130,10 @@ typedef unsigned long long int uintmax_t; %shared_ptr(RDKit::QueryBond) %shared_ptr(RDKit::QueryOps) %shared_ptr(RDKit::MolSanitizeException) +%shared_ptr(RDKit::AtomSanitizeException) +%shared_ptr(RDKit::AtomValenceException) +%shared_ptr(RDKit::AtomKekulizeException) +%shared_ptr(RDKit::KekulizeException) %shared_ptr(RDKit::SmilesParseException) %shared_ptr(RDKit::MolPicklerException) %shared_ptr(RDKit::RingInfo) diff --git a/Code/JavaWrappers/gmwrapper/src-test/org/RDKit/BasicMolecule2Tests.java b/Code/JavaWrappers/gmwrapper/src-test/org/RDKit/BasicMolecule2Tests.java index 0e0974b6a..bc90cff97 100644 --- a/Code/JavaWrappers/gmwrapper/src-test/org/RDKit/BasicMolecule2Tests.java +++ b/Code/JavaWrappers/gmwrapper/src-test/org/RDKit/BasicMolecule2Tests.java @@ -1,5 +1,4 @@ /* - * $Id: BasicMolecule2Tests.java 131 2011-01-20 22:01:29Z ebakke $ * * Copyright (c) 2010, Novartis Institutes for BioMedical Research Inc. * All rights reserved. @@ -175,6 +174,31 @@ public class BasicMolecule2Tests extends GraphMolTest { assertEquals(3,mvv.get(0).size()); } + @Test public void testDetectChemistryProblems() { + ROMol m2; + m2 = RWMol.MolFromSmiles("CFCc1cc1",0,false); + MolSanitizeException_Vect res; + res = RDKFuncs.detectChemistryProblems(m2); + assertEquals(res.size(),2); + assertEquals(res.get(0).getType(),"AtomValenceException"); + assertEquals(res.get(1).getType(),"KekulizeException"); + // fun with swig and C++ inheritance: + KekulizeException ke = KekulizeException.dynamic_cast(res.get(0)); + assertNull(ke); + ke = KekulizeException.dynamic_cast(res.get(1)); + assertNotNull(ke); + assertEquals(ke.getAtomIndices().size(),3); + assertEquals(ke.getAtomIndices().get(0),3); + assertEquals(ke.getAtomIndices().get(1),4); + assertEquals(ke.getAtomIndices().get(2),5); + + AtomValenceException ave = AtomValenceException.dynamic_cast(res.get(0)); + assertNotNull(ave); + assertEquals(ave.getAtomIdx(),1); + + } + + public static void main(String args[]) { org.junit.runner.JUnitCore.main("org.RDKit.BasicMolecule2Tests"); } diff --git a/Code/JavaWrappers/gmwrapper/src-test/org/RDKit/BasicMoleculeTests.java b/Code/JavaWrappers/gmwrapper/src-test/org/RDKit/BasicMoleculeTests.java index 08d1d687f..e585ad7f9 100644 --- a/Code/JavaWrappers/gmwrapper/src-test/org/RDKit/BasicMoleculeTests.java +++ b/Code/JavaWrappers/gmwrapper/src-test/org/RDKit/BasicMoleculeTests.java @@ -1,5 +1,4 @@ /* - * $Id: BasicMoleculeTests.java 131 2011-01-20 22:01:29Z ebakke $ * * Copyright (c) 2010, Novartis Institutes for BioMedical Research Inc. * All rights reserved.