// // Copyright (C) 2021 Greg Landrum // // @@ All Rights Reserved @@ // This file is part of the RDKit. // The contents are covered by the terms of the BSD license // which is included in the file license.txt, found at the root // of the RDKit source tree. // // Tests of handling generics in substructure searching // #include #include #include #include #include #include #include #include #include #include using namespace RDKit; class _IsSubstructOf : public Catch::Matchers::MatcherBase { ROMol const *m_mol; SubstructMatchParameters m_ps; unsigned m_count; public: _IsSubstructOf(const ROMol &m, unsigned count, SubstructMatchParameters ps) : m_mol(&m), m_ps(std::move(ps)), m_count(count) {} bool match(const ROMol &query) const override { return SubstructMatch(*m_mol, query, m_ps).size() == m_count; } std::string describe() const override { std::ostringstream ss; ss << "does not have expected number of matches to " << MolToCXSmiles(*m_mol); return ss.str(); } }; static _IsSubstructOf IsSubstructOf(const ROMol &m, unsigned count, const SubstructMatchParameters &ps) { return _IsSubstructOf(m, count, ps); } TEST_CASE("edge cases", "[substructure][generics]") { SECTION("basics") { auto query = "O=C* |$;Foo_p$|"_smarts; REQUIRE(query); query->getAtomWithIdx(2)->setProp(common_properties::_QueryAtomGenericLabel, "Foo"); std::vector> tests = { {"O=CCC", 1}, {"O=CC=C", 1}, {"O=C", 0}}; SubstructMatchParameters ps; ps.useGenericMatchers = true; for (const auto &pr : tests) { SmilesParserParams smilesParms; smilesParms.removeHs = false; std::unique_ptr mol{SmilesToMol(pr.first, smilesParms)}; REQUIRE(mol); CHECK_THAT(*query, IsSubstructOf(*mol, pr.second, ps)); } } } TEST_CASE("Setting generic queries", "[substructure][generics]") { SECTION("cxsmiles") { auto m = "CO* |$;foo_p;ARY_p$|"_smiles; REQUIRE(m); auto atm = m->getAtomWithIdx(2); CHECK(atm->hasProp(common_properties::atomLabel)); GenericGroups::setGenericQueriesFromProperties(*m); CHECK(atm->hasProp(common_properties::_QueryAtomGenericLabel)); CHECK(atm->getProp( common_properties::_QueryAtomGenericLabel) == "ARY"); CHECK(!atm->hasProp(common_properties::atomLabel)); // make sure we didn't remove the other atom label CHECK(m->getAtomWithIdx(1)->hasProp(common_properties::atomLabel)); { auto smi = MolToCXSmiles(*m, SmilesWriteParams(), SmilesWrite::CXSmilesFields::CX_ATOM_LABELS); CHECK(smi == "*OC |$ARY_p;foo_p;$|"); std::unique_ptr m2{SmilesToMol(smi)}; REQUIRE(m2); GenericGroups::setGenericQueriesFromProperties(*m2); CHECK(m2->getAtomWithIdx(0)->hasProp( common_properties::_QueryAtomGenericLabel)); } { auto ctab = MolToV3KMolBlock(*m); CHECK(ctab.find("ARY") != std::string::npos); } } SECTION("CTAB") { auto m = R"CTAB( Mrv2108 11042108512D 0 0 0 0 0 999 V3000 M V30 BEGIN CTAB M V30 COUNTS 7 7 3 0 0 M V30 BEGIN ATOM M V30 1 C -0.0001 0.77 0 0 M V30 2 N -1.3336 0.0001 0 0 M V30 3 C -1.3336 -1.54 0 0 M V30 4 C -0.0001 -2.31 0 0 M V30 5 C 1.3336 -1.54 0 0 M V30 6 N 1.3336 0.0001 0 0 M V30 7 C -0.0001 2.31 0 0 M V30 END ATOM M V30 BEGIN BOND M V30 1 2 3 4 M V30 2 1 4 5 M V30 3 2 1 2 M V30 4 1 2 3 M V30 5 1 1 6 M V30 6 2 5 6 M V30 7 1 1 7 M V30 END BOND M V30 BEGIN SGROUP M V30 1 SUP 0 - M V30 ATOMS=(1 7) - M V30 LABEL="AOX" M V30 2 DAT 0 ATOMS=(1 6) FIELDNAME=data - M V30 FIELDDISP=" 1.3336 0.0001 DAU ALL 0 0" - M V30 MRV_FIELDDISP=0 FIELDDATA=value M V30 3 SUP 0 - M V30 ATOMS=(1 5) - M V30 LABEL="carbon" M V30 END SGROUP M V30 END CTAB M END )CTAB"_ctab; REQUIRE(m); auto atm = m->getAtomWithIdx(6); CHECK(getSubstanceGroups(*m).size() == 3); GenericGroups::setGenericQueriesFromProperties(*m); CHECK(atm->hasProp(common_properties::_QueryAtomGenericLabel)); CHECK(atm->getProp( common_properties::_QueryAtomGenericLabel) == "AOX"); CHECK(getSubstanceGroups(*m).size() == 2); { auto smi = MolToCXSmiles(*m, SmilesWriteParams(), SmilesWrite::CXSmilesFields::CX_ATOM_LABELS); CHECK(smi == "Cc1ncccn1 |$AOX_p;;;;;;$|"); std::unique_ptr m2{SmilesToMol(smi)}; REQUIRE(m2); GenericGroups::setGenericQueriesFromProperties(*m2); CHECK(m2->getAtomWithIdx(0)->hasProp( common_properties::_QueryAtomGenericLabel)); } { RWMol m2(*m); GenericGroups::convertGenericQueriesToSubstanceGroups(m2); CHECK(getSubstanceGroups(m2).size() == getSubstanceGroups(*m).size() + 1); CHECK(!m2.getAtomWithIdx(0)->hasProp( common_properties::_QueryAtomGenericLabel)); } { auto ctab = MolToV3KMolBlock(*m); CHECK(ctab.find("AOX") != std::string::npos); } } } namespace { bool no_match(const ROMol &mol, const std::vector &ids) { RDUNUSED_PARAM(mol); RDUNUSED_PARAM(ids); return false; } bool always_match(const ROMol &mol, const std::vector &ids) { RDUNUSED_PARAM(mol); RDUNUSED_PARAM(ids); return true; } } // namespace TEST_CASE("generics are compatible with extraFinalChecks", "[substructure][generics]") { SECTION("basics") { auto query = "O=C*"_smarts; REQUIRE(query); std::vector labels = {"Heteroaryl", "HAR"}; for (auto label : labels) { query->getAtomWithIdx(2)->setProp( common_properties::_QueryAtomGenericLabel, label); std::vector> tests = { {"O=CC1OC1", 0}, {"O=CCC1COC1", 0}, {"O=CC1CCC2C1CNC2", 0}, {"O=Cc1cccc2c1ccnc2", 1}, {"O=Cc1ccccc1", 0}, {"O=Cc1cccnc1", 1}, }; SubstructMatchParameters ps; ps.useGenericMatchers = true; ps.extraFinalCheck = always_match; for (const auto &pr : tests) { SmilesParserParams smilesParms; smilesParms.removeHs = false; std::unique_ptr mol{SmilesToMol(pr.first, smilesParms)}; REQUIRE(mol); CHECK_THAT(*query, IsSubstructOf(*mol, pr.second, ps)); } ps.extraFinalCheck = no_match; for (const auto &pr : tests) { SmilesParserParams smilesParms; smilesParms.removeHs = false; std::unique_ptr mol{SmilesToMol(pr.first, smilesParms)}; REQUIRE(mol); CHECK_THAT(*query, IsSubstructOf(*mol, 0, ps)); } } } } std::string getCXSmilesQuery(const std::string groupToTest) { std::ostringstream out; out << "*C=O |$" << groupToTest << ";;$|"; return out.str(); } std::string getMolQuery(const std::string groupToTest, bool supGroupFlag = false) { std::ostringstream out; if (!supGroupFlag) { // pad the groupToTest with spaces to make it 3 characters long std::string groupToTestPadded = groupToTest; while (groupToTestPadded.length() < 3) { groupToTestPadded += " "; } out << R"MOLQQQ( Ketcher 3212315322D 1 1.00000 0.00000 0 3 2 0 0 0 0 0 0 0 0999 V2000 9.2250 -1.6000 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 10.0910 -2.1000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 10.9570 -1.6000 0.0000 )MOLQQQ" << groupToTestPadded << R"MOLQQQ( 0 0 0 0 0 0 0 0 0 0 0 0 1 2 2 0 0 0 2 3 1 0 0 0 M END )MOLQQQ"; } else { out << R"MOLQQQ( Ketcher 3202311 72D 1 1.00000 0.00000 0 3 2 0 0 0 0 0 0 0 0999 V2000 6.3840 61.5000 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 7.2500 61.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 8.1160 61.5000 0.0000 * 0 0 0 0 0 0 0 0 0 0 0 0 1 2 2 0 0 0 2 3 1 0 0 0 M STY 1 1 SUP M SLB 1 1 1 M SAL 1 1 3 M SBL 1 1 2 M SMT 1 )MOLQQQ" << groupToTest << R"MOLQQQ( M END )MOLQQQ"; } return out.str(); } std::string getMRVQuery(const std::string groupToTest) { std::ostringstream out; out << R"MRVQQQ( )MRVQQQ"; return out.str(); } void runTest(RWMol *query, std::vector> &tests) { GenericGroups::setGenericQueriesFromProperties(*query, true, true); SubstructMatchParameters ps; ps.useGenericMatchers = true; ps.extraFinalCheck = always_match; for (const auto &pr : tests) { SmilesParserParams smilesParms; smilesParms.removeHs = false; std::unique_ptr mol{SmilesToMol(pr.first, smilesParms)}; REQUIRE(mol); CHECK_THAT(*query, IsSubstructOf(*mol, pr.second, ps)); } ps.extraFinalCheck = no_match; for (const auto &pr : tests) { SmilesParserParams smilesParms; smilesParms.removeHs = false; std::unique_ptr mol{SmilesToMol(pr.first, smilesParms)}; REQUIRE(mol); CHECK_THAT(*query, IsSubstructOf(*mol, 0, ps)); } } void runCxsmilesTest(const std::string groupToTest, std::vector> &tests, bool manualInsertOfGroupFlag = false) { std::unique_ptr query; if (!manualInsertOfGroupFlag) { std::string queryString = getCXSmilesQuery(groupToTest); query = std::unique_ptr(SmartsToMol(queryString)); } else { query = std::unique_ptr(SmartsToMol("*C=O |$;;$|")); query->getAtomWithIdx(0)->setProp(RDKit::common_properties::atomLabel, groupToTest); } runTest(query.get(), tests); } void runMolTest(const std::string groupToTest, std::vector> &tests, bool supGroupFlag) { std::string queryString = getMolQuery(groupToTest, supGroupFlag); auto query = std::unique_ptr(MolBlockToMol(queryString, false, false, false)); runTest(query.get(), tests); } void runMRVTest(const std::string groupToTest, std::vector> &tests) { std::string queryString = getMRVQuery(groupToTest); auto query = std::unique_ptr(MrvBlockToMol(queryString, false, false)); runTest(query.get(), tests); } void runTest(const std::string groupToTest, std::vector> &tests) { runCxsmilesTest(groupToTest, tests, false); runCxsmilesTest(groupToTest, tests, true); runMolTest(groupToTest, tests, true); if (groupToTest.size() <= 3) { runMolTest(groupToTest, tests, false); } runMolTest(groupToTest, tests, true); runMRVTest(groupToTest, tests); } TEST_CASE("alkyl", "[substructure][generics]") { SECTION("Basics") { std::vector> tests = { {"O=CCC", 1}, {"O=CC=C", 0}, {"O=CCC=C", 0}, {"O=CCO", 0}, {"O=CC", 1}, {"O=CC[H]", 1}, {"O=C[H]", 0}, {"O=CC.CC(C=O)C", 2}, {"O=CCC1CC1", 0}}; runTest("Alkyl", tests); runTest("ALK", tests); } } TEST_CASE("alkyl or h", "[substructure][generics]") { SECTION("Basics") { std::vector> tests = { {"O=CCC", 1}, {"O=CC=C", 0}, {"O=CCC=C", 0}, {"O=CCO", 0}, {"O=CC", 1}, {"O=CC[H]", 1}, {"O=C[H]", 1}, {"O=CC.CC(C=O)C", 2}, {"O=CCC1CC1", 0}}; runTest("AlkylH", tests); runTest("ALH", tests); } } TEST_CASE("carboacyclic", "[substructure][generics]") { SECTION("basics") { std::vector> tests = { {"O=CC1CC1", 0}, {"O=CCCCC1CCC1", 0}, {"O=CCCCC", 1}, {"O=CC([H])([H])([H])", 1}, {"O=CCCCO", 0}, {"O=C[H]", 0}, }; runTest("Carboacyclic", tests); runTest("ABC", tests); } } TEST_CASE("carboacyclic or h", "[substructure][generics]") { SECTION("basics") { std::vector> tests = { {"O=CC1CC1", 0}, {"O=CCCCC1CCC1", 0}, {"O=CCCCC", 1}, {"O=CC([H])([H])([H])", 1}, {"O=CCCCO", 0}, {"O=C[H]", 1}, }; runTest("CarboacyclicH", tests); runTest("ABH", tests); } } TEST_CASE("acyclic", "[substructure][generics]") { SECTION("basics") { std::vector> tests = { {"O=CC1CC1", 0}, {"O=CCCCC1CCC1", 0}, {"O=CCCCC", 1}, {"O=CCCCO", 1}, {"O=C[H]", 0}, }; runTest("Acyclic", tests); runTest("ACY", tests); } } TEST_CASE("acyclic or h", "[substructure][generics]") { SECTION("basics") { std::vector> tests = { {"O=CC1CC1", 0}, {"O=CCCCC1CCC1", 0}, {"O=CCCCC", 1}, {"O=CCCCO", 1}, {"O=C[H]", 1}, }; runTest("AcyclicH", tests); runTest("ACH", tests); } } TEST_CASE("Alkenyl", "[substructure][generics]") { SECTION("basics") { std::vector> tests = { {"O=CCC", 0}, {"O=CC=C", 1}, {"O=CCC=C", 1}, {"O=CCC#C", 0}, {"O=CC=N", 0}, {"O=CC=C[H]", 1}, {"O=CCC1CC=C1", 0}, {"O=C[H]", 0}}; runTest("Alkenyl", tests); runTest("AEL", tests); } } TEST_CASE("Alkenyl or h", "[substructure][generics]") { SECTION("basics") { std::vector> tests = { {"O=CCC", 0}, {"O=CC=C", 1}, {"O=CCC=C", 1}, {"O=CCC#C", 0}, {"O=CC=N", 0}, {"O=CC=C[H]", 1}, {"O=CCC1CC=C1", 0}, {"O=C[H]", 1}}; runTest("AlkenylH", tests); runTest("AEH", tests); } } TEST_CASE("Alkynyl", "[substructure][generics]") { SECTION("basics") { std::vector> tests = { {"O=CCC", 0}, {"O=CC#C", 1}, {"O=CCC#C", 1}, {"O=CCC=C", 0}, {"O=CC#N", 0}, {"O=CC#C[H]", 1}, {"O=CCC1CC#C1", 0}, {"O=C([H])[H]", 0}}; runTest("Alkynyl", tests); runTest("AYL", tests); } } TEST_CASE("Alkynyl or h", "[substructure][generics]") { SECTION("basics") { std::vector> tests = { {"O=CCC", 0}, {"O=CC#C", 1}, {"O=CCC#C", 1}, {"O=CCC=C", 0}, {"O=CC#N", 0}, {"O=CC#C[H]", 1}, {"O=CCC1CC#C1", 0}, {"O=C([H])[H]", 2}}; runTest("AlkynylH", tests); runTest("AYH", tests); } } TEST_CASE("Heteroacyclic", "[substructure][generics]") { SECTION("basics") { std::vector> tests = { {"O=CC1OC1", 0}, {"O=CCCCC1CCC1", 0}, {"O=CCCCC", 0}, {"O=CCCCO", 1}, {"O=C[H]", 0}, }; runTest("Heteroacyclic", tests); runTest("AHC", tests); } } TEST_CASE("Heteroacyclic or h", "[substructure][generics]") { SECTION("basics") { std::vector> tests = { {"O=CC1OC1", 0}, {"O=CCCCC1CCC1", 0}, {"O=CCCCC", 0}, {"O=CCCCO", 1}, {"O=C[H]", 1}, }; runTest("HeteroacyclicH", tests); runTest("AHH", tests); } } TEST_CASE("alkoxy", "[substructure][generics]") { SECTION("basics") { std::vector> tests = { {"O=CCC", 0}, {"O=COC", 1}, {"O=COC=C", 0}, {"O=COC=C", 0}, {"O=COCO", 0}, {"O=CO", 0}, {"O=CO[H]", 0}, {"O=COC1CC1", 0}, {"O=C([H])[H]", 0}}; runTest("Alkoxy", tests); runTest("AOX", tests); } } TEST_CASE("alkoxy or H", "[substructure][generics]") { SECTION("basics") { std::vector> tests = { {"O=CCC", 0}, {"O=COC", 1}, {"O=COC=C", 0}, {"O=COC=C", 0}, {"O=COCO", 0}, {"O=CO", 0}, {"O=CO[H]", 0}, {"O=COC1CC1", 0}, {"O=C([H])[H]", 2}}; runTest("AlkoxyH", tests); runTest("AOH", tests); } } TEST_CASE("carboaryl", "[substructure][generics]") { SECTION("basics") { std::vector> tests = { {"O=CC1CCCCC1", 0}, {"O=Cc1ccccc1", 1}, {"O=CC1=CC=CC2=C1CCCC2", 0}, {"O=Cc1cccnc1", 0}, {"O=CC1=CC=CC2=C1CCNC2", 0}, {"O=C([H])[H]", 0}}; runTest("Carboaryl", tests); runTest("ARY", tests); } } TEST_CASE("carboaryl or H", "[substructure][generics]") { SECTION("basics") { std::vector> tests = { {"O=CC1CCCCC1", 0}, {"O=Cc1ccccc1", 1}, {"O=CC1=CC=CC2=C1CCCC2", 0}, {"O=Cc1cccnc1", 0}, {"O=CC1=CC=CC2=C1CCNC2", 0}, {"O=C([H])[H]", 2}}; runTest("CarboarylH", tests); runTest("ARH", tests); } } TEST_CASE("cycloalkyl", "[substructure][generics]") { SECTION("basics") { std::vector> tests = { {"O=CCC", 0}, {"O=CC1CC1", 1}, {"O=CC1C(O)C1", 1}, {"O=CCC1CC1", 0}, {"O=CC1C=C1", 0}, {"O=CC1OC1", 0}, {"O=CC1CC(=O)C1", 1}, {"O=CC1CC2CCC1CC2", 1}, {"O=CC1CC2CCC1NC2", 0}, {"O=CC1CCCC2CC=CCC12", 0}, {"O=C([H])[H]", 0}}; runTest("Carbocycloalkyl", tests); runTest("CAL", tests); } } TEST_CASE("cycloalkyl or H", "[substructure][generics]") { SECTION("basics") { std::vector> tests = { {"O=CCC", 0}, {"O=CC1CC1", 1}, {"O=CC1C(O)C1", 1}, {"O=CCC1CC1", 0}, {"O=CC1C=C1", 0}, {"O=CC1OC1", 0}, {"O=CC1CC(=O)C1", 1}, {"O=CC1CC2CCC1CC2", 1}, {"O=CC1CC2CCC1NC2", 0}, {"O=CC1CCCC2CC=CCC12", 0}, {"O=C([H])[H]", 2}}; runTest("CarbocycloalkylH", tests); runTest("CAH", tests); } } TEST_CASE("carbocyclic", "[substructure][generics]") { SECTION("basics") { std::vector> tests = { {"O=CC1CCCCC1", 1}, {"O=Cc1ccccc1", 1}, {"O=Cc1ccc(O)cc1", 1}, {"O=CC1=CC=CC2=C1CCCC2", 1}, {"O=Cc1cccnc1", 0}, {"O=CC1=CC=CC2=C1CCNC2", 0}, {"O=C([H])[H]", 0}, }; runTest("Carbocyclic", tests); runTest("CBC", tests); } } TEST_CASE("carbocyclic or H", "[substructure][generics]") { SECTION("basics") { std::vector> tests = { {"O=CC1CCCCC1", 1}, {"O=Cc1ccccc1", 1}, {"O=Cc1ccc(O)cc1", 1}, {"O=CC1=CC=CC2=C1CCCC2", 1}, {"O=Cc1cccnc1", 0}, {"O=CC1=CC=CC2=C1CCNC2", 0}, {"O=C([H])[H]", 2}, }; runTest("CarbocyclicH", tests); runTest("CBH", tests); } } TEST_CASE("cycloalkenyl", "[substructure][generics]") { SECTION("basics") { std::vector> tests = { {"O=CCC", 0}, {"O=Cc1ccccc1", 1}, {"O=CC1CC=CCC1", 1}, {"O=CC1CC2CCC1CC2", 0}, {"O=CC1CC2CCC1C=C2", 0}, // <- one of the SSSR rings doesn't match {"O=CC1CC2C=CC1NC2", 0}, {"O=CC1CCCC2=C1CCCC2", 1}, {"O=CC1CC=CC2C1C=CC1CCC=CC21", 1}, {"O=CC1CC=CC2C1C=CC1CNC=CC21", 0}, {"O=C([H])[H]", 0}, }; runTest("Carbocycloalkenyl", tests); runTest("CEL", tests); } } TEST_CASE("cycloalkenyl or H", "[substructure][generics]") { SECTION("basics") { std::vector> tests = { {"O=CCC", 0}, {"O=Cc1ccccc1", 1}, {"O=CC1CC=CCC1", 1}, {"O=CC1CC2CCC1CC2", 0}, {"O=CC1CC2CCC1C=C2", 0}, // <- one of the SSSR // rings doesn't match {"O=CC1CC2C=CC1NC2", 0}, {"O=CC1CCCC2=C1CCCC2", 1}, {"O=CC1CC=CC2C1C=CC1CCC=CC21", 1}, {"O=CC1CC=CC2C1C=CC1CNC=CC21", 0}, {"O=C([H])[H]", 2}, }; runTest("CarbocycloalkenylH", tests); runTest("CEH", tests); } } TEST_CASE("heterocyclic", "[substructure][generics]") { SECTION("basics") { std::vector> tests = { {"O=CC1OC1", 1}, {"O=CC1CC1", 0}, {"O=CCC1COC1", 0}, {"O=CC1CCC2C1CNC2", 1}, {"O=Cc1ccccc1", 0}, {"O=Cc1cccnc1", 1}, {"O=C([H])[H]", 0}, }; runTest("Heterocyclic", tests); runTest("CHC", tests); } } TEST_CASE("heterocyclic or H", "[substructure][generics]") { SECTION("basics") { std::vector> tests = { {"O=CC1OC1", 1}, {"O=CC1CC1", 0}, {"O=CCC1COC1", 0}, {"O=CC1CCC2C1CNC2", 1}, {"O=Cc1ccccc1", 0}, {"O=Cc1cccnc1", 1}, {"O=C([H])[H]", 2}, }; runTest("HeterocyclicH", tests); runTest("CHH", tests); } } TEST_CASE("no carbon ring", "[substructure][generics]") { SECTION("basics") { std::vector> tests = { {"O=CN1NC1", 0}, {"O=CN1NN1", 1}, {"O=CCN1NNN1", 0}, {"O=CN1NNNN2N1N2", 1}, {"O=CN1NNNN2N1C2", 0}, {"O=CNNN", 0}, {"O=C1NNN1", 0}, {"O=C([H])[H]", 0}, }; runTest("NoCarbonRing", tests); runTest("CXX", tests); } } TEST_CASE("no carbon ring or H", "[substructure][generics]") { SECTION("basics") { std::vector> tests = { {"O=CN1NC1", 0}, {"O=CN1NN1", 1}, {"O=CCN1NNN1", 0}, {"O=CN1NNNN2N1N2", 1}, {"O=CN1NNNN2N1C2", 0}, {"O=CNNN", 0}, {"O=C1NNN1", 0}, {"O=C([H])[H]", 2}, }; runTest("NoCarbonRingH", tests); runTest("CXH", tests); } } TEST_CASE("cyclic", "[substructure][generics]") { SECTION("basics") { std::vector> tests = { {"O=CC1CCC1", 1}, {"O=C1CCCC1", 0}, {"O=CCC1CCC1", 0}, {"O=CC1CNC1", 1}, {"O=Cc1ccccc1", 1}, {"O=CC1CC2C1C2", 1}, {"O=CC1CC2CCC1CC2", 1}, {"O=C([H])[H]", 0}, }; runTest("Cyclic", tests); runTest("CYC", tests); } } TEST_CASE("cyclic or H", "[substructure][generics]") { SECTION("basics") { std::vector> tests = { {"O=CC1CCC1", 1}, {"O=C1CCCC1", 0}, {"O=CCC1CCC1", 0}, {"O=CC1CNC1", 1}, {"O=Cc1ccccc1", 1}, {"O=CC1CC2C1C2", 1}, {"O=CC1CC2CCC1CC2", 1}, {"O=C([H])[H]", 2}, }; runTest("CyclicH", tests); runTest("CYH", tests); } } TEST_CASE("heteroaryl", "[substructure][generics]") { SECTION("basics") { std::string query = "*C=O |$HAH;;$|"; std::vector> tests = { {"O=CC1OC1", 0}, {"O=CCC1COC1", 0}, {"O=CC1CCC2C1CNC2", 0}, {"O=Cc1cccc2c1ccnc2", 1}, {"O=Cc1ccccc1", 0}, {"O=Cc1cccnc1", 1}, {"O=C([H])[H]", 0}, }; runTest("Heteroaryl", tests); runTest("HAR", tests); } } TEST_CASE("heteroaryl or H", "[substructure][generics]") { SECTION("basics") { std::string query = "*C=O |$HAH;;$|"; std::vector> tests = { {"O=CC1OC1", 0}, {"O=CCC1COC1", 0}, {"O=CC1CCC2C1CNC2", 0}, {"O=Cc1cccc2c1ccnc2", 1}, {"O=Cc1ccccc1", 0}, {"O=Cc1cccnc1", 1}, {"O=C([H])[H]", 2}, }; runTest("HeteroarylH", tests); runTest("HAH", tests); } } TEST_CASE("group", "[substructure][generics]") { SECTION("basics") { std::vector> tests = { {"O=CC1CCC1", 1}, {"O=CCCCC", 1}, {"O=CCC1CCC1", 1}, {"O=CC1CNC1", 1}, {"O=Cc1ccccc1", 1}, {"O=CC1CC2C1C2", 1}, {"O=CC1CC2CCC1CC2", 1}, {"O=C([H])[H]", 0}, }; runTest("Group", tests); runTest("G", tests); } } TEST_CASE("group or H", "[substructure][generics]") { SECTION("basics") { std::vector> tests = { {"O=CC1CCC1", 1}, {"O=CCCCC", 1}, {"O=CCC1CCC1", 1}, {"O=CC1CNC1", 1}, {"O=Cc1ccccc1", 1}, {"O=CC1CC2C1C2", 1}, {"O=CC1CC2CCC1CC2", 1}, {"O=C([H])[H]", 2}, }; runTest("GroupH", tests); runTest("GH", tests); } } TEST_CASE("group with ring", "[substructure][generics]") { SECTION("basics") { std::vector> tests = { {"O=CCCCC", 0}, {"O=CCC1CCC1", 1}, {"O=CC1CNC1", 1}, {"O=Cc1ccccc1", 1}, {"O=CC1CC2C1C2", 1}, {"O=CC1CC2CCC1CC2", 1}, {"O=C([H])[H]", 0}, }; runTest("Group*", tests); runTest("G*", tests); } } TEST_CASE("group with ring or H", "[substructure][generics]") { SECTION("basics") { std::vector> tests = { {"O=CC1CCC1", 1}, {"O=CCCCC", 0}, {"O=CCC1CCC1", 1}, {"O=CC1CNC1", 1}, {"O=Cc1ccccc1", 1}, {"O=CC1CC2C1C2", 1}, {"O=CC1CC2CCC1CC2", 1}, {"O=C([H])[H]", 2}, }; runTest("GroupH*", tests); runTest("GH*", tests); } } TEST_CASE("from mol blocks using atom labels") { SECTION("basics") { auto qry = R"CTAB( Mrv2211 08032317212D 0 0 0 0 0 999 V3000 M V30 BEGIN CTAB M V30 COUNTS 2 1 0 0 0 M V30 BEGIN ATOM M V30 1 C -6.5833 1.9167 0 0 M V30 2 ARY -5.2497 2.6867 0 0 M V30 END ATOM M V30 BEGIN BOND M V30 1 1 1 2 M V30 END BOND M V30 END CTAB M END )CTAB"_ctab; REQUIRE(qry); SubstructMatchParameters ps; ps.useGenericMatchers = true; auto m1 = "CC"_smiles; REQUIRE(m1); auto m2 = "Cc1ccccc1"_smiles; REQUIRE(m2); CHECK(SubstructMatch(*m1, *qry, ps).empty()); CHECK(SubstructMatch(*m2, *qry, ps).empty()); GenericGroups::setGenericQueriesFromProperties(*qry, true); CHECK(SubstructMatch(*m1, *qry, ps).empty()); CHECK(SubstructMatch(*m2, *qry, ps).size() == 1); } } void testGenericQueries() { BOOST_LOG(rdInfoLog) << "-----------------------\n Testing generic queries: " "Set GenericQueriesFromProperties" << std::endl; auto query = R"CTAB(alkquery.mol Generated by WebMolKit 0 0 0 0 0 999 V3000 M V30 BEGIN CTAB M V30 COUNTS 2 1 1 0 1 M V30 BEGIN ATOM M V30 1 O 7.4214 -9.6318 0.0000 0 M V30 2 * 8.5430 -10.2809 0.0000 0 M V30 END ATOM M V30 BEGIN BOND M V30 1 1 1 2 M V30 END BOND M V30 BEGIN SGROUP M V30 1 SUP 0 LABEL=ALK ATOMS=(1 2) M V30 END SGROUP M V30 END CTAB M END)CTAB"_ctab; TEST_ASSERT(query); std::string target_smi{"c1c(OF)cccc1"}; std::unique_ptr t{SmilesToMol(target_smi)}; TEST_ASSERT(t); MolOps::AdjustQueryParameters params; auto m = new RWMol(*query); TEST_ASSERT(m); MolOps::adjustQueryProperties(*m, ¶ms); SubstructMatchParameters params_match; params_match.useQueryQueryMatches = true; params_match.useGenericMatchers = true; params_match.maxMatches = 1; auto matchVect = SubstructMatch(*t, *m, params_match); TEST_ASSERT(matchVect.size() == 1); delete m; m = new RWMol(*query); GenericGroups::adjustQueryPropertiesWithGenericGroups(*m, ¶ms); matchVect = SubstructMatch(*t, *m, params_match); TEST_ASSERT(matchVect.size() == 0); delete m; m = new RWMol(*query); GenericGroups::adjustQueryPropertiesWithGenericGroups(*m); matchVect = SubstructMatch(*t, *m, params_match); TEST_ASSERT(matchVect.size() == 0); delete m; }