// // Copyright (C) 2018 Susan H. Leung // // @@ All Rights Reserved @@ // This file is part of the RDKit. // The contents are covered by the terms of the BSD license // which is included in the file license.txt, found at the root // of the RDKit source tree. // #include "Validate.h" #include #include #include #include #include #include #include using namespace RDKit; using namespace std; using namespace MolStandardize; void testRDKitValidation() { BOOST_LOG(rdInfoLog) << "-----------------------\n Testing RDKit validation" << std::endl; string smi1, smi2, smi3, smi4; RDKitValidation vm; // testing RDKitDefault smi1 = "CO(C)C"; unique_ptr m1(SmilesToMol(smi1, 0, false)); vector errout1 = vm.validate(*m1, true); for (auto &query : errout1) { std::string msg = query.message(); TEST_ASSERT(msg == "INFO: [ValenceValidation] Explicit valence for atom # 1 O, 3, " "is greater than permitted"); } // testing for molecule with no atoms smi2 = ""; unique_ptr m2(SmilesToMol(smi2, 0, false)); vector errout2 = vm.validate(*m2, true); for (auto &query : errout2) { std::string msg = query.message(); TEST_ASSERT(msg == "ERROR: [NoAtomValidation] Molecule has no atoms"); } // testing molecule with multiple valency errors smi3 = "CO(C)CCN(=O)=O"; unique_ptr m3(SmilesToMol(smi3, 0, false)); vector errout3 = vm.validate(*m3, true); std::vector msgs1; std::vector ans1 = { "INFO: [ValenceValidation] Explicit valence for atom # 1 O, 3, is " "greater than permitted", "INFO: [ValenceValidation] Explicit valence for atom # 5 N, 5, is " "greater than permitted"}; for (auto &query : errout3) { msgs1.push_back(query.message()); } TEST_ASSERT(msgs1 == ans1); // testing molecule with multiple valency errors and only outputting // first error smi4 = "CO(C)CCN(=O)=O"; unique_ptr m4(SmilesToMol(smi4, 0, false)); vector errout4 = vm.validate(*m4, false); std::vector msgs2; std::vector ans2 = { "INFO: [ValenceValidation] Explicit valence for atom # 1 O, 3, is " "greater than permitted"}; for (auto &query : errout4) { msgs2.push_back(query.message()); } TEST_ASSERT(msgs2 == ans2); BOOST_LOG(rdInfoLog) << "Finished" << std::endl; } void testMolVSValidation() { BOOST_LOG(rdInfoLog) << "-----------------------\n Testing MolVS validation" << std::endl; string smi1, smi2, smi3, smi4, smi5, smi6; MolVSValidation vm; // testing MolVSDefault // testing for molecule with no atoms smi1 = ""; unique_ptr m1(SmilesToMol(smi1, 0, false)); vector errout1 = vm.validate(*m1, true); for (auto &query : errout1) { std::string msg = query.message(); TEST_ASSERT(msg == "ERROR: [NoAtomValidation] Molecule has no atoms"); } smi2 = "O=C([O-])c1ccccc1"; unique_ptr m2(SmilesToMol(smi2, 0, false)); vector errout2 = vm.validate(*m2, true); for (auto &query : errout2) { std::string msg = query.message(); TEST_ASSERT(msg == "INFO: [NeutralValidation] Not an overall neutral system (-1)"); } smi3 = "CN=[NH+]CN=N"; unique_ptr m3(SmilesToMol(smi3, 0, false)); vector errout3 = vm.validate(*m3, true); for (auto &query : errout3) { std::string msg = query.message(); TEST_ASSERT( msg == "INFO: [NeutralValidation] Not an overall neutral system (+1)"); // fix // to // show // + // sign } smi4 = "[13CH4]"; unique_ptr m4(SmilesToMol(smi4, 0, false)); vector errout4 = vm.validate(*m4, true); for (auto &query : errout4) { std::string msg = query.message(); TEST_ASSERT(msg == "INFO: [IsotopeValidation] Molecule contains isotope 13C"); } smi5 = "[2H]C(Cl)(Cl)Cl"; unique_ptr m5(SmilesToMol(smi5, 0, false)); vector errout5 = vm.validate(*m5, true); for (auto &query : errout5) { std::string msg = query.message(); TEST_ASSERT(msg == "INFO: [IsotopeValidation] Molecule contains isotope 2H"); } smi6 = "[2H]OC([2H])([2H])[2H]"; unique_ptr m6(SmilesToMol(smi6, 0, false)); vector errout6 = vm.validate(*m6, true); for (auto &query : errout6) { std::string msg = query.message(); TEST_ASSERT(msg == "INFO: [IsotopeValidation] Molecule contains isotope 2H"); } std::string smi7 = "COc1cccc(C=N[N-]C(N)=O)c1[O-].O.O.O.O=[U+2]=O"; unique_ptr m7(SmilesToMol(smi7, 0, false)); vector errout7 = vm.validate(*m7, true); TEST_ASSERT(errout7.size() != 0); for (auto &query : errout7) { std::string msg = query.message(); TEST_ASSERT(msg == "INFO: [FragmentValidation] water/hydroxide is present"); } std::string smi8 = "CC(=O)O.NCC(=O)NCCCCCCCCCCNC(=O)CN"; unique_ptr m8(SmilesToMol(smi8, 0, false)); vector errout8 = vm.validate(*m8, true); TEST_ASSERT(errout8.size() != 0); for (auto &query : errout8) { std::string msg = query.message(); TEST_ASSERT(msg == "INFO: [FragmentValidation] acetate/acetic acid is present"); } std::string smi9 = "N#CC(Br)(Br)C#N.[Br-].[K+]"; unique_ptr m9(SmilesToMol(smi9, 0, false)); vector errout9 = vm.validate(*m9, true); std::vector ans = { "INFO: [FragmentValidation] bromine is present", "INFO: [FragmentValidation] potassium is present"}; TEST_ASSERT(errout9.size() == ans.size()); for (size_t i = 0; i < errout9.size(); ++i) { TEST_ASSERT(errout9[i].message() == ans[i]); } std::string smi10 = "C1COCCO1.O=C(NO)NO"; unique_ptr m10(SmilesToMol(smi10, 0, false)); vector errout10 = vm.validate(*m10, true); std::vector ans10 = { "INFO: [FragmentValidation] 1,2-dimethoxyethane is present", "INFO: [FragmentValidation] 1,4-dioxane is present"}; TEST_ASSERT(errout10.size() == ans10.size()); for (size_t i = 0; i < errout10.size(); ++i) { TEST_ASSERT(errout10[i].message() == ans10[i]); } BOOST_LOG(rdInfoLog) << "Finished" << std::endl; } void testMolVSOptions() { BOOST_LOG(rdInfoLog) << "-----------------------\n Testing MolVS Options" << std::endl; vector> validations = { boost::make_shared()}; MolVSValidation vm(validations); // testing MolVSDefault // testing for molecule with no atoms string smi1 = ""; unique_ptr m1(SmilesToMol(smi1, 0, false)); vector errout1 = vm.validate(*m1, true); for (auto &query : errout1) { std::string msg = query.message(); // TEST_ASSERT(msg == "ERROR: [NoAtomValidation] Molecule has no atoms"); TEST_ASSERT(msg == ""); } string smi2 = "O=C([O-])c1ccccc1"; unique_ptr m2(SmilesToMol(smi2, 0, false)); vector errout2 = vm.validate(*m2, true); for (auto &query : errout2) { std::string msg = query.message(); // TEST_ASSERT(msg == // "INFO: [NeutralValidation] Not an overall neutral system // (-1)"); TEST_ASSERT(msg == ""); } BOOST_LOG(rdInfoLog) << "Finished" << std::endl; } void testAllowedAtomsValidation() { BOOST_LOG(rdInfoLog) << "-----------------------\n Testing AllowedAtoms validation" << std::endl; // std::vector atoms = {"C", "N", "O"}; std::vector atoms = {6, 7, 8}; std::vector> atomList; for (auto &atom : atoms) { shared_ptr a(new Atom(atom)); atomList.push_back(a); } AllowedAtomsValidation vm(atomList); std::string smi1; smi1 = "CC(=O)CF"; unique_ptr m1(SmilesToMol(smi1)); vector errout1 = vm.validate(*m1, true); for (auto &query : errout1) { std::string msg = query.message(); std::cout << msg << std::endl; TEST_ASSERT( msg == "INFO: [AllowedAtomsValidation] Atom F is not in allowedAtoms list"); } BOOST_LOG(rdInfoLog) << "Finished" << std::endl; } void testDisallowedAtomsValidation() { BOOST_LOG(rdInfoLog) << "-----------------------\n Testing DisallowedAtoms validation" << std::endl; // std::vector atoms = {"F", "Cl", "Br"}; std::vector atoms = {9, 17, 35}; std::vector> atomList; for (auto &atom : atoms) { shared_ptr a(new Atom(atom)); atomList.push_back(a); } DisallowedAtomsValidation vm(atomList); std::string smi1; smi1 = "CC(=O)CF"; unique_ptr m1(SmilesToMol(smi1)); vector errout1 = vm.validate(*m1, true); for (auto &query : errout1) { std::string msg = query.message(); std::cout << msg << std::endl; TEST_ASSERT( msg == "INFO: [DisallowedAtomsValidation] Atom F is in disallowedAtoms list"); } BOOST_LOG(rdInfoLog) << "Finished" << std::endl; } void testFragment() { BOOST_LOG(rdInfoLog) << "-----------------------\n Testing fragment validation" << std::endl; string smi1, smi2, smi3, smi4, smi5, smi6; MolVSValidation vm; // testing MolVSValidation fragmentValidation // FragmentValidation should identify 1,2-dichloroethane. smi1 = "ClCCCl.c1ccccc1O"; unique_ptr m1(SmilesToMol(smi1, 0, false)); vector errout1 = vm.validate(*m1, true); for (auto &query : errout1) { std::string msg = query.message(); std::cout << msg << std::endl; TEST_ASSERT(msg == "INFO: [FragmentValidation] 1,2-dichloroethane is present"); } smi2 = "COCCOC.CCCBr"; unique_ptr m2(SmilesToMol(smi2, 0, false)); vector errout2 = vm.validate(*m2, true); for (auto &query : errout2) { std::string msg = query.message(); std::cout << msg << std::endl; TEST_ASSERT(msg == "INFO: [FragmentValidation] 1,2-dimethoxyethane is present"); } BOOST_LOG(rdInfoLog) << "Finished" << std::endl; } void testValidateSmiles() { BOOST_LOG(rdInfoLog) << "-----------------------\n Testing ValidateSmiles" << std::endl; // an invalid smiles should throw a ValueErrorException error try { vector errout1 = validateSmiles("3478q439g98h"); } catch (const ValueErrorException &e) { std::cout << e.message() << std::endl; TEST_ASSERT(e.message() == "SMILES Parse Error: syntax error for input: 3478q439g98h") }; vector errout2 = validateSmiles(""); for (auto &query : errout2) { std::string msg = query.message(); std::cout << msg << std::endl; TEST_ASSERT(msg == "ERROR: [NoAtomValidation] Molecule has no atoms"); } vector errout3 = validateSmiles("ClCCCl.c1ccccc1O"); for (auto &query : errout3) { std::string msg = query.message(); TEST_ASSERT(msg == "INFO: [FragmentValidation] 1,2-dichloroethane is present"); } BOOST_LOG(rdInfoLog) << "Finished" << std::endl; } int main() { testRDKitValidation(); testMolVSValidation(); testMolVSOptions(); testAllowedAtomsValidation(); testDisallowedAtomsValidation(); testFragment(); testValidateSmiles(); return 0; }