// // Copyright (C) 2020 Greg Landrum // @@ All Rights Reserved @@ // This file is part of the RDKit. // The contents are covered by the terms of the BSD license // which is included in the file license.txt, found at the root // of the RDKit source tree. // #include #include #include #include #include #include #include #include #include "MolEnumerator.h" using namespace RDKit; TEST_CASE("PositionVariation", "[MolEnumerator]") { auto mol1 = R"CTAB( Mrv2007 06232015292D 0 0 0 0 0 999 V3000 M V30 BEGIN CTAB M V30 COUNTS 9 8 0 0 0 M V30 BEGIN ATOM M V30 1 C -1.7083 2.415 0 0 M V30 2 C -3.042 1.645 0 0 M V30 3 C -3.042 0.105 0 0 M V30 4 N -1.7083 -0.665 0 0 M V30 5 C -0.3747 0.105 0 0 M V30 6 C -0.3747 1.645 0 0 M V30 7 * -0.8192 1.3883 0 0 M V30 8 O -0.8192 3.6983 0 0 M V30 9 C 0.5145 4.4683 0 0 M V30 END ATOM M V30 BEGIN BOND M V30 1 1 1 2 M V30 2 2 2 3 M V30 3 1 3 4 M V30 4 2 4 5 M V30 5 1 5 6 M V30 6 2 1 6 M V30 7 1 7 8 ENDPTS=(3 1 5 6) ATTACH=ANY M V30 8 1 8 9 M V30 END BOND M V30 END CTAB M END)CTAB"_ctab; REQUIRE(mol1); auto mol2 = R"CTAB( Mrv2007 06242006032D 0 0 0 0 0 999 V3000 M V30 BEGIN CTAB M V30 COUNTS 10 8 0 0 0 M V30 BEGIN ATOM M V30 1 C -1.7083 2.415 0 0 M V30 2 C -3.042 1.645 0 0 M V30 3 C -3.042 0.105 0 0 M V30 4 N -1.7083 -0.665 0 0 M V30 5 C -0.3747 0.105 0 0 M V30 6 C -0.3747 1.645 0 0 M V30 7 * -3.042 0.875 0 0 M V30 8 F -5.0434 0.875 0 0 M V30 9 * -1.0415 2.03 0 0 M V30 10 Cl -1.0415 4.34 0 0 M V30 END ATOM M V30 BEGIN BOND M V30 1 1 1 2 M V30 2 2 2 3 M V30 3 1 3 4 M V30 4 2 4 5 M V30 5 1 5 6 M V30 6 2 1 6 M V30 7 1 7 8 ENDPTS=(2 2 3) ATTACH=ANY M V30 8 1 9 10 ENDPTS=(2 1 6) ATTACH=ANY M V30 END BOND M V30 END CTAB M END )CTAB"_ctab; REQUIRE(mol2); SECTION("PositionVariationOp unit tests 1") { MolEnumerator::PositionVariationOp op(*mol1); auto vcnts = op.getVariationCounts(); REQUIRE(vcnts.size() == 1); CHECK(vcnts[0] == 3); { std::vector elems{1}; std::unique_ptr newmol(op(elems)); CHECK(MolToSmiles(*newmol) == "COc1ccccn1"); } { std::vector elems{2}; std::unique_ptr newmol(op(elems)); CHECK(MolToSmiles(*newmol) == "COc1cccnc1"); } } SECTION("PositionVariationOp unit tests 2") { MolEnumerator::PositionVariationOp op(*mol2); auto vcnts = op.getVariationCounts(); REQUIRE(vcnts.size() == 2); CHECK(vcnts[0] == 2); CHECK(vcnts[1] == 2); { std::vector elems{1, 0}; std::unique_ptr newmol(op(elems)); CHECK(MolToSmiles(*newmol) == "Fc1cc(Cl)ccn1"); } { std::vector elems{0, 1}; std::unique_ptr newmol(op(elems)); CHECK(MolToSmiles(*newmol) == "Fc1cncc(Cl)c1"); } } SECTION("PositionVariationOp unit tests 3") { MolEnumerator::PositionVariationOp op; op.initFromMol(*mol1); auto vcnts = op.getVariationCounts(); REQUIRE(vcnts.size() == 1); CHECK(vcnts[0] == 3); } SECTION("enumeration basics 1") { MolEnumerator::MolEnumeratorParams ps; ps.dp_operation = std::shared_ptr( new MolEnumerator::PositionVariationOp()); auto bundle = MolEnumerator::enumerate(*mol1, ps); CHECK(bundle.size() == 3); CHECK(bundle.getMols()[0]->getAtomWithIdx(0)->getDegree() == 3); CHECK(bundle.getMols()[0]->getAtomWithIdx(0)->getValence( Atom::ValenceType::IMPLICIT) == 0); std::vector tsmis = {"COc1ccncc1", "COc1ccccn1", "COc1cccnc1"}; std::vector smis; for (const auto &molp : bundle.getMols()) { smis.push_back(MolToSmiles(*molp)); } CHECK(smis == tsmis); } SECTION("enumeration basics 2") { MolEnumerator::MolEnumeratorParams ps; ps.dp_operation = std::shared_ptr( new MolEnumerator::PositionVariationOp()); auto bundle = MolEnumerator::enumerate(*mol2, ps); CHECK(bundle.size() == 4); std::vector tsmis = {"Fc1cnccc1Cl", "Fc1cncc(Cl)c1", "Fc1cc(Cl)ccn1", "Fc1ccc(Cl)cn1"}; std::vector smis; for (const auto &molp : bundle.getMols()) { smis.push_back(MolToSmiles(*molp)); } CHECK(smis == tsmis); } } TEST_CASE("LINKNODE", "[MolEnumerator]") { auto mol1 = R"CTAB(one linknode Mrv2007 06222005102D 0 0 0 0 0 999 V3000 M V30 BEGIN CTAB M V30 COUNTS 6 6 0 0 0 M V30 BEGIN ATOM M V30 1 C 8.25 12.1847 0 0 M V30 2 C 6.9164 12.9547 0 0 M V30 3 C 6.9164 14.4947 0 0 M V30 4 C 9.5836 14.4947 0 0 M V30 5 C 9.5836 12.9547 0 0 M V30 6 O 8.25 10.6447 0 0 M V30 END ATOM M V30 BEGIN BOND M V30 1 1 1 2 M V30 2 1 2 3 M V30 3 1 4 5 M V30 4 1 1 5 M V30 5 1 3 4 M V30 6 1 1 6 M V30 END BOND M V30 LINKNODE 1 4 2 1 2 1 5 M V30 END CTAB M END)CTAB"_ctab; REQUIRE(mol1); SECTION("LinkNode unit tests 1") { MolEnumerator::LinkNodeOp op; op.initFromMol(*mol1); auto vcnts = op.getVariationCounts(); REQUIRE(vcnts.size() == 1); CHECK(vcnts[0] == 4); { std::vector elems{0}; std::unique_ptr newmol(op(elems)); CHECK(MolToSmiles(*newmol) == "OC1CCCC1"); } { std::vector elems{2}; std::unique_ptr newmol(op(elems)); CHECK(MolToSmiles(*newmol) == "OC1CCCCC(O)C1O"); } } SECTION("LinkNode unit tests 2") { auto mol2 = R"CTAB(two linknodes Mrv2014 07072016412D 0 0 0 0 0 999 V3000 M V30 BEGIN CTAB M V30 COUNTS 7 7 0 0 0 M V30 BEGIN ATOM M V30 1 C 8.25 12.1847 0 0 M V30 2 C 6.9164 12.9547 0 0 M V30 3 C 7.2366 14.4611 0 0 M V30 4 C 8.7681 14.622 0 0 M V30 5 C 9.3945 13.2151 0 0 M V30 6 O 8.25 10.6447 0 0 M V30 7 F 9.5382 15.9557 0 0 M V30 END ATOM M V30 BEGIN BOND M V30 1 1 1 2 M V30 2 1 2 3 M V30 3 1 4 5 M V30 4 1 1 5 M V30 5 1 3 4 M V30 6 1 1 6 M V30 7 1 4 7 M V30 END BOND M V30 LINKNODE 1 3 2 1 2 1 5 M V30 LINKNODE 1 4 2 4 3 4 5 M V30 END CTAB M END)CTAB"_ctab; REQUIRE(mol2); MolEnumerator::LinkNodeOp op; op.initFromMol(*mol2); auto vcnts = op.getVariationCounts(); REQUIRE(vcnts.size() == 2); CHECK(vcnts[0] == 3); CHECK(vcnts[1] == 4); { std::vector elems{0, 0}; std::unique_ptr newmol(op(elems)); CHECK(MolToSmiles(*newmol) == "OC1CCC(F)C1"); } { std::vector elems{2, 0}; std::unique_ptr newmol(op(elems)); CHECK(MolToSmiles(*newmol) == "OC1CCC(F)CC(O)C1O"); } { std::vector elems{1, 2}; std::unique_ptr newmol(op(elems)); CHECK(MolToSmiles(*newmol) == "OC1CCC(F)C(F)C(F)CC1O"); } } SECTION("LinkNode unit tests 3") { auto mol = R"CTAB(neighboring linknodes Mrv2014 07082008052D 0 0 0 0 0 999 V3000 M V30 BEGIN CTAB M V30 COUNTS 9 9 0 0 0 M V30 BEGIN ATOM M V30 1 C 8.25 12.1847 0 0 M V30 2 C 6.9164 12.9547 0 0 M V30 3 C 7.2366 14.4611 0 0 M V30 4 C 8.7681 14.622 0 0 M V30 5 C 9.3945 13.2151 0 0 M V30 6 O 8.25 10.6447 0 0 M V30 7 F 9.5382 15.9557 0 0 M V30 8 C 9.5837 9.8747 0 0 M V30 9 C 10.9008 12.8949 0 0 M V30 END ATOM M V30 BEGIN BOND M V30 1 1 1 2 M V30 2 1 2 3 M V30 3 1 4 5 M V30 4 1 1 5 M V30 5 1 3 4 M V30 6 1 1 6 M V30 7 1 4 7 M V30 8 1 6 8 M V30 9 1 5 9 M V30 END BOND M V30 LINKNODE 1 3 2 1 2 1 5 M V30 LINKNODE 1 2 2 5 1 5 4 M V30 END CTAB M END )CTAB"_ctab; REQUIRE(mol); MolEnumerator::LinkNodeOp op; op.initFromMol(*mol); auto vcnts = op.getVariationCounts(); REQUIRE(vcnts.size() == 2); CHECK(vcnts[0] == 3); CHECK(vcnts[1] == 2); { std::vector elems{0, 0}; std::unique_ptr newmol(op(elems)); CHECK(MolToSmiles(*newmol) == "COC1CCC(F)C1C"); } { std::vector elems{1, 0}; std::unique_ptr newmol(op(elems)); CHECK(MolToSmiles(*newmol) == "COC1CCC(F)C(C)C1OC"); } { std::vector elems{1, 1}; std::unique_ptr newmol(op(elems)); CHECK(MolToSmiles(*newmol) == "COC1CCC(F)C(C)C(C)C1OC"); } } SECTION("LinkNode unit tests: isotopes") { auto mol = R"CTAB(isotopes and linknodes Mrv2014 07082008362D 0 0 0 0 0 999 V3000 M V30 BEGIN CTAB M V30 COUNTS 4 4 0 0 0 M V30 BEGIN ATOM M V30 1 C 0.5 19.6392 0 0 MASS=14 M V30 2 C -0.27 18.3054 0 0 MASS=15 M V30 3 C 1.27 18.3054 0 0 MASS=13 M V30 4 O 2.6037 17.5354 0 0 MASS=12 M V30 END ATOM M V30 BEGIN BOND M V30 1 1 1 2 M V30 2 1 3 1 M V30 3 1 2 3 M V30 4 1 3 4 M V30 END BOND M V30 LINKNODE 1 3 2 3 1 3 2 M V30 END CTAB M END )CTAB"_ctab; REQUIRE(mol); MolEnumerator::LinkNodeOp op; op.initFromMol(*mol); auto vcnts = op.getVariationCounts(); REQUIRE(vcnts.size() == 1); CHECK(vcnts[0] == 3); { std::vector elems{1}; std::unique_ptr newmol(op(elems)); CHECK(MolToSmiles(*newmol) == "[12OH][13CH]1[14CH2][15CH2][13CH]1[12OH]"); } } SECTION("LinkNode unit tests: no linknodes") { auto mol = R"CTAB(no linknodes Mrv2007 06222005102D 0 0 0 0 0 999 V3000 M V30 BEGIN CTAB M V30 COUNTS 6 6 0 0 0 M V30 BEGIN ATOM M V30 1 C 8.25 12.1847 0 0 M V30 2 C 6.9164 12.9547 0 0 M V30 3 C 6.9164 14.4947 0 0 M V30 4 C 9.5836 14.4947 0 0 M V30 5 C 9.5836 12.9547 0 0 M V30 6 O 8.25 10.6447 0 0 M V30 END ATOM M V30 BEGIN BOND M V30 1 1 1 2 M V30 2 1 2 3 M V30 3 1 4 5 M V30 4 1 1 5 M V30 5 1 3 4 M V30 6 1 1 6 M V30 END BOND M V30 END CTAB M END)CTAB"_ctab; REQUIRE(mol); MolEnumerator::LinkNodeOp op; op.initFromMol(*mol); auto vcnts = op.getVariationCounts(); REQUIRE(vcnts.size() == 0); } SECTION("enumeration basics 1") { MolEnumerator::MolEnumeratorParams ps; ps.dp_operation = std::shared_ptr( new MolEnumerator::LinkNodeOp()); auto bundle = MolEnumerator::enumerate(*mol1, ps); std::vector tsmis = {"OC1CCCC1", "OC1CCCCC1O", "OC1CCCCC(O)C1O", "OC1CCCCC(O)C(O)C1O"}; CHECK(bundle.size() == tsmis.size()); std::vector smis; for (const auto &molp : bundle.getMols()) { smis.push_back(MolToSmiles(*molp)); } CHECK(smis == tsmis); } SECTION("enumeration basics 2") { auto mol = R"CTAB(no linknodes Mrv2007 06222005102D 0 0 0 0 0 999 V3000 M V30 BEGIN CTAB M V30 COUNTS 6 6 0 0 0 M V30 BEGIN ATOM M V30 1 C 8.25 12.1847 0 0 M V30 2 C 6.9164 12.9547 0 0 M V30 3 C 6.9164 14.4947 0 0 M V30 4 C 9.5836 14.4947 0 0 M V30 5 C 9.5836 12.9547 0 0 M V30 6 O 8.25 10.6447 0 0 M V30 END ATOM M V30 BEGIN BOND M V30 1 1 1 2 M V30 2 1 2 3 M V30 3 1 4 5 M V30 4 1 1 5 M V30 5 1 3 4 M V30 6 1 1 6 M V30 END BOND M V30 END CTAB M END)CTAB"_ctab; REQUIRE(mol); MolEnumerator::MolEnumeratorParams ps; ps.dp_operation = std::shared_ptr( new MolEnumerator::LinkNodeOp()); auto bundle = MolEnumerator::enumerate(*mol, ps); CHECK(bundle.size() == 0); } } TEST_CASE("multiple enumeration points", "[MolEnumerator]") { auto mol1 = R"CTAB( Mrv2014 12212013392D 0 0 0 0 0 999 V3000 M V30 BEGIN CTAB M V30 COUNTS 12 12 0 0 0 M V30 BEGIN ATOM M V30 1 C -6 4.7484 0 0 M V30 2 C -7.3337 3.9784 0 0 M V30 3 N -7.3337 2.4383 0 0 M V30 4 C -6 1.6683 0 0 M V30 5 C -4.6663 2.4383 0 0 M V30 6 C -4.6663 3.9784 0 0 M V30 7 C -5.8773 0.0617 0 0 M V30 8 C -3.2136 0.2013 0 0 M V30 9 C -4.5052 -0.6374 0 0 M V30 10 O -4.4246 -2.1753 0 0 M V30 11 * -6 4.235 0 0 M V30 12 C -4.845 6.2355 0 0 M V30 END ATOM M V30 BEGIN BOND M V30 1 1 1 2 M V30 2 2 2 3 M V30 3 1 3 4 M V30 4 2 4 5 M V30 5 1 5 6 M V30 6 2 1 6 M V30 7 1 8 9 M V30 8 1 7 9 M V30 9 1 5 8 M V30 10 1 7 4 M V30 11 1 9 10 M V30 12 1 11 12 ENDPTS=(3 1 2 6) ATTACH=ANY M V30 END BOND M V30 LINKNODE 1 3 2 9 7 9 8 M V30 END CTAB M END )CTAB"_ctab; REQUIRE(mol1); std::vector tsmis = { "Cc1cnc2c(c1)CC(O)C2", "Cc1cnc2c(c1)CC(O)C(O)C2", "Cc1cnc2c(c1)CC(O)C(O)C(O)C2", "Cc1ccc2c(n1)CC(O)C2", "Cc1ccc2c(n1)CC(O)C(O)C2", "Cc1ccc2c(n1)CC(O)C(O)C(O)C2", "Cc1ccnc2c1CC(O)C2", "Cc1ccnc2c1CC(O)C(O)C2", "Cc1ccnc2c1CC(O)C(O)C(O)C2"}; SECTION("test1") { std::vector paramsList; MolEnumerator::MolEnumeratorParams posVariationParams; posVariationParams.dp_operation = std::shared_ptr( new MolEnumerator::PositionVariationOp()); paramsList.push_back(posVariationParams); MolEnumerator::MolEnumeratorParams linkParams; linkParams.dp_operation = std::shared_ptr( new MolEnumerator::LinkNodeOp()); paramsList.push_back(linkParams); auto bundle = MolEnumerator::enumerate(*mol1, paramsList); CHECK(bundle.size() == tsmis.size()); for (const auto &molp : bundle.getMols()) { auto smi = MolToSmiles(*molp); // std::cerr << smi << std::endl; CHECK(std::find(tsmis.begin(), tsmis.end(), smi) != tsmis.end()); } } SECTION("test2") { auto bundle = MolEnumerator::enumerate(*mol1); CHECK(bundle.size() == tsmis.size()); for (const auto &molp : bundle.getMols()) { auto smi = MolToSmiles(*molp); CHECK(std::find(tsmis.begin(), tsmis.end(), smi) != tsmis.end()); } } SECTION("edges1") { std::vector paramsList; auto bundle = MolEnumerator::enumerate(*mol1, paramsList); CHECK(bundle.size() == 0); } SECTION("edges2") { std::vector paramsList; MolEnumerator::MolEnumeratorParams posVariationParams; posVariationParams.dp_operation = std::shared_ptr( new MolEnumerator::PositionVariationOp()); paramsList.push_back(posVariationParams); MolEnumerator::MolEnumeratorParams linkParams; linkParams.dp_operation = std::shared_ptr( new MolEnumerator::LinkNodeOp()); paramsList.push_back(linkParams); auto mol = "c1ccccc1"_smiles; auto bundle = MolEnumerator::enumerate(*mol, paramsList); CHECK(bundle.size() == 0); } } TEST_CASE("multiple enumeration points 2", "[MolEnumerator][bug]") { auto mol1 = R"CTAB( Mrv2014 02182116082D 0 0 0 0 0 999 V3000 M V30 BEGIN CTAB M V30 COUNTS 15 14 0 0 0 M V30 BEGIN ATOM M V30 1 C -2.1474 10.0453 0 0 M V30 2 C -3.481 9.2753 0 0 M V30 3 C -3.481 7.7352 0 0 M V30 4 C -2.1474 6.9652 0 0 M V30 5 C -0.8137 7.7352 0 0 M V30 6 C -0.8137 9.2753 0 0 M V30 7 N 0.6334 9.802 0 0 M V30 8 C 0.7204 7.3118 0 0 M V30 9 C 1.5815 8.5885 0 0 M V30 10 * -3.0365 9.0186 0 0 M V30 11 O -3.0365 11.3286 0 0 M V30 12 C 1.0579 11.2824 0 0 M V30 13 O -0.0119 12.3901 0 0 M V30 14 * 1.151 7.9501 0 0 M V30 15 C 1.151 10.2601 0 0 M V30 END ATOM M V30 BEGIN BOND M V30 1 1 1 2 M V30 2 2 2 3 M V30 3 1 3 4 M V30 4 2 4 5 M V30 5 1 5 6 M V30 6 2 1 6 M V30 7 1 7 9 M V30 8 1 7 6 M V30 9 1 5 8 M V30 10 2 8 9 M V30 11 1 10 11 ENDPTS=(3 2 3 1) ATTACH=ANY M V30 12 1 7 12 M V30 13 1 12 13 M V30 14 1 14 15 ENDPTS=(2 8 9) ATTACH=ANY M V30 END BOND M V30 LINKNODE 1 2 2 12 7 12 13 M V30 END CTAB M END )CTAB"_ctab; std::vector tsmis = { "Cc1cn(CO)c2cc(O)ccc12", "Cc1cn(CCO)c2cc(O)ccc12", "Cc1cc2ccc(O)cc2n1CO", "Cc1cc2ccc(O)cc2n1CCO", "Cc1cn(CO)c2ccc(O)cc12", "Cc1cn(CCO)c2ccc(O)cc12", "Cc1cc2cc(O)ccc2n1CO", "Cc1cc2cc(O)ccc2n1CCO", "Cc1cn(CO)c2c(O)cccc12", "Cc1cn(CCO)c2c(O)cccc12", "Cc1cc2cccc(O)c2n1CO", "Cc1cc2cccc(O)c2n1CCO"}; SECTION("test2") { auto bundle = MolEnumerator::enumerate(*mol1); CHECK(bundle.size() == tsmis.size()); for (const auto &molp : bundle.getMols()) { auto smi = MolToSmiles(*molp); // std::cerr << "\"" << smi << "\"," << std::endl; CHECK(std::find(tsmis.begin(), tsmis.end(), smi) != tsmis.end()); } } } TEST_CASE( "github #4382: Enumerate fails on variable attachment points with queries", "[MolEnumerator][bug]") { SECTION("test1") { auto mol1 = R"CTAB( Mrv2108 08032115452D 0 0 0 0 0 999 V3000 M V30 BEGIN CTAB M V30 COUNTS 7 6 0 0 0 M V30 BEGIN ATOM M V30 1 C -1.083 5.5632 0 0 M V30 2 C -1.083 7.1033 0 0 M V30 3 N 0.3973 7.5278 0 0 M V30 4 N 0.3104 5.0376 0 0 M V30 5 C 1.2585 6.251 0 0 M V30 6 * 0.3539 6.2827 0 0 M V30 7 A 1.5089 8.2832 0 0 M V30 END ATOM M V30 BEGIN BOND M V30 1 1 1 2 M V30 2 1 3 2 M V30 3 1 1 4 M V30 4 1 4 5 M V30 5 2 3 5 M V30 6 1 6 7 ENDPTS=(2 4 3) ATTACH=ANY M V30 END BOND M V30 END CTAB M END )CTAB"_ctab; REQUIRE(mol1); auto bundle = MolEnumerator::enumerate(*mol1); CHECK(bundle.size() == 2); std::vector tsmas = {"[#6]1-[#6]-[#7]=[#6]-[#7]-1-[!#1]", "[#6]1-[#6]-[#7](=[#6]-[#7]-1)-[!#1]"}; for (const auto &molp : bundle.getMols()) { auto smarts = MolToSmarts(*molp); CHECK(std::find(tsmas.begin(), tsmas.end(), smarts) != tsmas.end()); } } SECTION("test1 reversed") { // never actually observed one of these, but it should still work auto mol1 = R"CTAB( Mrv2108 08032115452D 0 0 0 0 0 999 V3000 M V30 BEGIN CTAB M V30 COUNTS 7 6 0 0 0 M V30 BEGIN ATOM M V30 1 C -1.083 5.5632 0 0 M V30 2 C -1.083 7.1033 0 0 M V30 3 N 0.3973 7.5278 0 0 M V30 4 N 0.3104 5.0376 0 0 M V30 5 C 1.2585 6.251 0 0 M V30 6 A 1.5089 8.2832 0 0 M V30 7 * 0.3539 6.2827 0 0 M V30 END ATOM M V30 BEGIN BOND M V30 1 1 1 2 M V30 2 1 3 2 M V30 3 1 1 4 M V30 4 1 4 5 M V30 5 2 3 5 M V30 6 1 6 7 ENDPTS=(2 4 3) ATTACH=ANY M V30 END BOND M V30 END CTAB M END )CTAB"_ctab; REQUIRE(mol1); auto bundle = MolEnumerator::enumerate(*mol1); CHECK(bundle.size() == 2); std::vector tsmas = {"[#6]1-[#6]-[#7]=[#6]-[#7]-1-[!#1]", "[#6]1-[#6]-[#7](=[#6]-[#7]-1)-[!#1]"}; for (const auto &molp : bundle.getMols()) { auto smarts = MolToSmarts(*molp); CHECK(std::find(tsmas.begin(), tsmas.end(), smarts) != tsmas.end()); } } SECTION("test3: both are dummy atoms") { auto mol1 = R"CTAB( Mrv2108 08032115452D 0 0 0 0 0 999 V3000 M V30 BEGIN CTAB M V30 COUNTS 7 6 0 0 0 M V30 BEGIN ATOM M V30 1 C -1.083 5.5632 0 0 M V30 2 C -1.083 7.1033 0 0 M V30 3 N 0.3973 7.5278 0 0 M V30 4 N 0.3104 5.0376 0 0 M V30 5 C 1.2585 6.251 0 0 M V30 6 * 0.3539 6.2827 0 0 M V30 7 * 1.5089 8.2832 0 0 M V30 END ATOM M V30 BEGIN BOND M V30 1 1 1 2 M V30 2 1 3 2 M V30 3 1 1 4 M V30 4 1 4 5 M V30 5 2 3 5 M V30 6 1 6 7 ENDPTS=(2 4 3) ATTACH=ANY M V30 END BOND M V30 END CTAB M END )CTAB"_ctab; REQUIRE(mol1); auto bundle = MolEnumerator::enumerate(*mol1); CHECK(bundle.size() == 2); std::vector tsmas = {"[#6]1-[#6]-[#7]=[#6]-[#7]-1-*", "[#6]1-[#6]-[#7](=[#6]-[#7]-1)-*"}; for (const auto &molp : bundle.getMols()) { auto smarts = MolToSmarts(*molp); CHECK(std::find(tsmas.begin(), tsmas.end(), smarts) != tsmas.end()); } } } TEST_CASE("github #4381: need implicit H cleanup after Enumerate", "[MolEnumerator][bug]") { SECTION("test1") { auto mol1 = R"CTAB( Mrv2108 08032115452D 0 0 0 0 0 999 V3000 M V30 BEGIN CTAB M V30 COUNTS 7 6 0 0 0 M V30 BEGIN ATOM M V30 1 C -1.083 5.5632 0 0 M V30 2 C -1.083 7.1033 0 0 M V30 3 N 0.3973 7.5278 0 0 M V30 4 N 0.3104 5.0376 0 0 M V30 5 C 1.2585 6.251 0 0 M V30 6 * 0.3539 6.2827 0 0 M V30 7 C 1.5089 8.2832 0 0 M V30 END ATOM M V30 BEGIN BOND M V30 1 2 1 2 M V30 2 1 3 2 M V30 3 1 1 4 M V30 4 1 4 5 M V30 5 2 3 5 M V30 6 1 6 7 ENDPTS=(2 4 3) ATTACH=ANY M V30 END BOND M V30 END CTAB M END )CTAB"_ctab; REQUIRE(mol1); auto bundle = MolEnumerator::enumerate(*mol1); CHECK(bundle.size() == 2); std::vector tsmas = {"[#6]1:[#6]:[#7]:[#6]:[#7]:1-[#6]", "[#6]1:[#6]:[#7](:[#6]:[#7]:1)-[#6]"}; for (const auto &molp : bundle.getMols()) { auto smarts = MolToSmarts(*molp); CHECK(std::find(tsmas.begin(), tsmas.end(), smarts) != tsmas.end()); } } } TEST_CASE("RepeatUnit", "[MolEnumerator]") { SECTION("basic HT enumeration") { auto mol1 = R"CTAB( ACCLDraw05132106232D 0 0 0 0 0 999 V3000 M V30 BEGIN CTAB M V30 COUNTS 4 3 1 0 0 M V30 BEGIN ATOM M V30 1 C 9.7578 -7.0211 0 0 M V30 2 O 10.7757 -7.6201 0 0 M V30 3 * 11.8037 -7.0378 0 0 M V30 4 * 8.7298 -7.6034 0 0 M V30 END ATOM M V30 BEGIN BOND M V30 1 1 1 2 M V30 2 1 2 3 M V30 3 1 1 4 M V30 END BOND M V30 BEGIN SGROUP M V30 1 SRU 1 ATOMS=(2 2 1) XBONDS=(2 3 2) BRKXYZ=(9 9.24 -7.9 0 9.24 -6.72 - M V30 0 0 0 0) BRKXYZ=(9 11.29 -6.74 0 11.29 -7.92 0 0 0 0) CONNECT=HT - M V30 LABEL=n M V30 END SGROUP M V30 END CTAB M END)CTAB"_ctab; REQUIRE(mol1); MolEnumerator::RepeatUnitOp op; op.initFromMol(*mol1); auto vcnts = op.getVariationCounts(); REQUIRE(vcnts.size() == 1); CHECK(vcnts[0] == MolEnumerator::RepeatUnitOp::DEFAULT_REPEAT_COUNT); { std::vector elems{0}; std::unique_ptr newmol(op(elems)); CHECK(MolToSmiles(*newmol) == "**"); } { std::vector elems{1}; std::unique_ptr newmol(op(elems)); CHECK(MolToSmiles(*newmol) == "*CO*"); } { std::vector elems{3}; std::unique_ptr newmol(op(elems)); CHECK(MolToSmiles(*newmol) == "*COCOCO*"); } } SECTION("basic HH enumeration") { auto mol1 = R"CTAB( ACCLDraw05132106232D 0 0 0 0 0 999 V3000 M V30 BEGIN CTAB M V30 COUNTS 4 3 1 0 0 M V30 BEGIN ATOM M V30 1 C 9.7578 -7.0211 0 0 M V30 2 O 10.7757 -7.6201 0 0 M V30 3 * 11.8037 -7.0378 0 0 M V30 4 * 8.7298 -7.6034 0 0 M V30 END ATOM M V30 BEGIN BOND M V30 1 1 1 2 M V30 2 1 2 3 M V30 3 1 1 4 M V30 END BOND M V30 BEGIN SGROUP M V30 1 SRU 1 ATOMS=(2 2 1) XBONDS=(2 3 2) BRKXYZ=(9 9.24 -7.9 0 9.24 -6.72 - M V30 0 0 0 0) BRKXYZ=(9 11.29 -6.74 0 11.29 -7.92 0 0 0 0) CONNECT=HH - M V30 LABEL=n M V30 END SGROUP M V30 END CTAB M END)CTAB"_ctab; REQUIRE(mol1); MolEnumerator::RepeatUnitOp op; op.initFromMol(*mol1); auto vcnts = op.getVariationCounts(); REQUIRE(vcnts.size() == 1); CHECK(vcnts[0] == MolEnumerator::RepeatUnitOp::DEFAULT_REPEAT_COUNT); { std::vector elems{3}; std::unique_ptr newmol(op(elems)); CHECK(MolToSmiles(*newmol) == "*COOCCO*"); } } SECTION("EU enumeration (which we don't do)") { auto mol1 = R"CTAB( ACCLDraw05132106232D 0 0 0 0 0 999 V3000 M V30 BEGIN CTAB M V30 COUNTS 4 3 1 0 0 M V30 BEGIN ATOM M V30 1 C 9.7578 -7.0211 0 0 M V30 2 O 10.7757 -7.6201 0 0 M V30 3 * 11.8037 -7.0378 0 0 M V30 4 * 8.7298 -7.6034 0 0 M V30 END ATOM M V30 BEGIN BOND M V30 1 1 1 2 M V30 2 1 2 3 M V30 3 1 1 4 M V30 END BOND M V30 BEGIN SGROUP M V30 1 SRU 1 ATOMS=(2 2 1) XBONDS=(2 3 2) BRKXYZ=(9 9.24 -7.9 0 9.24 -6.72 - M V30 0 0 0 0) BRKXYZ=(9 11.29 -6.74 0 11.29 -7.92 0 0 0 0) CONNECT=EU - M V30 LABEL=n M V30 END SGROUP M V30 END CTAB M END)CTAB"_ctab; REQUIRE(mol1); MolEnumerator::RepeatUnitOp op; op.initFromMol(*mol1); auto vcnts = op.getVariationCounts(); REQUIRE(vcnts.empty()); } SECTION("non-overlapping multi-HT enumeration") { auto mol1 = R"CTAB( Mrv2108 09232105582D 0 0 0 0 0 999 V3000 M V30 BEGIN CTAB M V30 COUNTS 7 6 2 0 0 M V30 BEGIN ATOM M V30 1 C 12.719 -9.1518 0 0 M V30 2 O 14.0458 -9.9326 0 0 M V30 3 C 15.3857 -9.1735 0 0 MASS=13 M V30 4 * 11.379 -9.9108 0 0 M V30 5 N 16.713 -9.9545 0 0 M V30 6 C 18.053 -9.1955 0 0 M V30 7 * 19.3803 -9.9764 0 0 M V30 END ATOM M V30 BEGIN BOND M V30 1 1 1 2 M V30 2 1 2 3 M V30 3 1 1 4 M V30 4 1 3 5 M V30 5 1 5 6 M V30 6 1 6 7 M V30 END BOND M V30 BEGIN SGROUP M V30 1 SRU 0 ATOMS=(2 2 1) XBONDS=(2 2 3) BRKXYZ=(9 12.044 -10.2974 0 - M V30 12.044 -8.7593 0 0 0 0) BRKXYZ=(9 14.7161 -8.7854 0 14.7161 -10.3235 0 - M V30 0 0 0) CONNECT=HT LABEL=n M V30 2 SRU 0 ATOMS=(2 5 6) XBONDS=(2 4 6) BRKXYZ=(9 19.0681 -8.7207 0 - M V30 18.131 -10.3134 0 0 0 0) BRKXYZ=(9 15.6979 -10.4293 0 16.6351 -8.8365 - M V30 0 0 0 0) CONNECT=HT LABEL=n M V30 END SGROUP M V30 END CTAB M END )CTAB"_ctab; REQUIRE(mol1); MolEnumerator::RepeatUnitOp op; op.initFromMol(*mol1); auto vcnts = op.getVariationCounts(); REQUIRE(vcnts.size() == 2); CHECK(vcnts[0] == MolEnumerator::RepeatUnitOp::DEFAULT_REPEAT_COUNT); CHECK(vcnts[1] == MolEnumerator::RepeatUnitOp::DEFAULT_REPEAT_COUNT); { std::vector elems{0, 0}; std::unique_ptr newmol(op(elems)); CHECK(MolToSmiles(*newmol) == "*[13CH2]*"); } { std::vector elems{1, 0}; std::unique_ptr newmol(op(elems)); CHECK(MolToSmiles(*newmol) == "*CO[13CH2]*"); } { std::vector elems{0, 1}; std::unique_ptr newmol(op(elems)); CHECK(MolToSmiles(*newmol) == "*CN[13CH2]*"); } { std::vector elems{1, 1}; std::unique_ptr newmol(op(elems)); CHECK(MolToSmiles(*newmol) == "*CN[13CH2]OC*"); } { std::vector elems{3, 1}; std::unique_ptr newmol(op(elems)); CHECK(MolToSmiles(*newmol) == "*CN[13CH2]OCOCOC*"); } { std::vector elems{1, 3}; std::unique_ptr newmol(op(elems)); CHECK(MolToSmiles(*newmol) == "*CNCNCN[13CH2]OC*"); } { std::vector elems{2, 2}; std::unique_ptr newmol(op(elems)); CHECK(MolToSmiles(*newmol) == "*CNCN[13CH2]OCOC*"); } } SECTION("non-overlapping multi-HT enumeration reversed example") { // same as the previous example, but the head/tail definition of the second // SRU is swapped. This shouldn't change the results auto mol1 = R"CTAB( Mrv2108 09232105582D 0 0 0 0 0 999 V3000 M V30 BEGIN CTAB M V30 COUNTS 7 6 2 0 0 M V30 BEGIN ATOM M V30 1 C 12.719 -9.1518 0 0 M V30 2 O 14.0458 -9.9326 0 0 M V30 3 C 15.3857 -9.1735 0 0 MASS=13 M V30 4 * 11.379 -9.9108 0 0 M V30 5 N 16.713 -9.9545 0 0 M V30 6 C 18.053 -9.1955 0 0 M V30 7 * 19.3803 -9.9764 0 0 M V30 END ATOM M V30 BEGIN BOND M V30 1 1 1 2 M V30 2 1 2 3 M V30 3 1 1 4 M V30 4 1 3 5 M V30 5 1 5 6 M V30 6 1 6 7 M V30 END BOND M V30 BEGIN SGROUP M V30 1 SRU 0 ATOMS=(2 2 1) XBONDS=(2 2 3) BRKXYZ=(9 12.044 -10.2974 0 - M V30 12.044 -8.7593 0 0 0 0) BRKXYZ=(9 14.7161 -8.7854 0 14.7161 -10.3235 0 - M V30 0 0 0) CONNECT=HT LABEL=n M V30 2 SRU 0 ATOMS=(2 6 5) XBONDS=(2 6 4) BRKXYZ=(9 19.0681 -8.7207 0 - M V30 18.131 -10.3134 0 0 0 0) BRKXYZ=(9 15.6979 -10.4293 0 16.6351 -8.8365 - M V30 0 0 0 0) CONNECT=HT LABEL=n M V30 END SGROUP M V30 END CTAB M END )CTAB"_ctab; REQUIRE(mol1); MolEnumerator::RepeatUnitOp op; op.initFromMol(*mol1); auto vcnts = op.getVariationCounts(); REQUIRE(vcnts.size() == 2); CHECK(vcnts[0] == MolEnumerator::RepeatUnitOp::DEFAULT_REPEAT_COUNT); CHECK(vcnts[1] == MolEnumerator::RepeatUnitOp::DEFAULT_REPEAT_COUNT); { std::vector elems{0, 0}; std::unique_ptr newmol(op(elems)); CHECK(MolToSmiles(*newmol) == "*[13CH2]*"); } { std::vector elems{1, 0}; std::unique_ptr newmol(op(elems)); CHECK(MolToSmiles(*newmol) == "*CO[13CH2]*"); } { std::vector elems{0, 1}; std::unique_ptr newmol(op(elems)); CHECK(MolToSmiles(*newmol) == "*CN[13CH2]*"); } { std::vector elems{1, 1}; std::unique_ptr newmol(op(elems)); CHECK(MolToSmiles(*newmol) == "*CN[13CH2]OC*"); } { std::vector elems{3, 1}; std::unique_ptr newmol(op(elems)); CHECK(MolToSmiles(*newmol) == "*CN[13CH2]OCOCOC*"); } { std::vector elems{1, 3}; std::unique_ptr newmol(op(elems)); CHECK(MolToSmiles(*newmol) == "*CNCNCN[13CH2]OC*"); } { std::vector elems{2, 2}; std::unique_ptr newmol(op(elems)); CHECK(MolToSmiles(*newmol) == "*CNCN[13CH2]OCOC*"); } } SECTION("nested HT enumeration is not supported") { auto mol1 = R"CTAB( ACCLDraw05132106342D 0 0 0 0 0 999 V3000 M V30 BEGIN CTAB M V30 COUNTS 7 6 2 0 0 M V30 BEGIN ATOM M V30 1 * 9.2115 -7.4169 0 0 M V30 2 C 10.2483 -6.8511 0 0 CFG=3 M V30 3 C 10.2767 -5.67 0 0 M V30 4 O 11.257 -7.4662 0 0 M V30 5 * 12.2941 -6.9003 0 0 M V30 6 N 9.268 -5.0548 0 0 CFG=3 M V30 7 * 9.2964 -3.8737 0 0 M V30 END ATOM M V30 BEGIN BOND M V30 1 1 1 2 M V30 2 1 2 3 M V30 3 1 2 4 M V30 4 1 4 5 M V30 5 1 3 6 M V30 6 1 6 7 M V30 END BOND M V30 BEGIN SGROUP M V30 1 SRU 1 ATOMS=(5 3 2 4 6 7) XBONDS=(2 1 4) BRKXYZ=(9 9.72 -7.72 0 9.74 - M V30 -6.54 0 0 0 0) BRKXYZ=(9 11.79 -6.59 0 11.76 -7.77 0 0 0 0) - M V30 CONNECT=HT LABEL=n M V30 2 SRU 2 ATOMS=(2 3 6) XBONDS=(2 6 2) BRKXYZ=(9 8.78 -4.78 0 9.78 -4.15 - M V30 0 0 0 0) BRKXYZ=(9 10.76 -5.95 0 9.76 -6.57 0 0 0 0) CONNECT=HT - M V30 LABEL=n M V30 END SGROUP M V30 END CTAB M END)CTAB"_ctab; REQUIRE(mol1); MolEnumerator::RepeatUnitOp op; CHECK_THROWS_AS(op.initFromMol(*mol1), ValueErrorException); } SECTION("single atom SRU enumeration") { auto mol1 = R"CTAB( Mrv2108 09232115452D 0 0 0 0 0 999 V3000 M V30 BEGIN CTAB M V30 COUNTS 3 2 1 0 0 M V30 BEGIN ATOM M V30 1 * 9.3333 -3.9167 0 0 M V30 2 C 10.667 -3.1467 0 0 M V30 3 * 12.0007 -3.9167 0 0 M V30 END ATOM M V30 BEGIN BOND M V30 1 1 1 2 M V30 2 1 2 3 M V30 END BOND M V30 BEGIN SGROUP M V30 1 SRU 0 ATOMS=(1 2) XBONDS=(2 1 2) BRKXYZ=(9 11.6782 -2.6635 0 10.7542 - M V30 -4.2639 0 0 0 0) BRKXYZ=(9 10.5799 -4.2639 0 9.6559 -2.6635 0 0 0 0) - M V30 CONNECT=HT LABEL=n M V30 END SGROUP M V30 END CTAB M END )CTAB"_ctab; REQUIRE(mol1); MolEnumerator::RepeatUnitOp op; op.initFromMol(*mol1); auto vcnts = op.getVariationCounts(); REQUIRE(vcnts.size() == 1); CHECK(vcnts[0] == MolEnumerator::RepeatUnitOp::DEFAULT_REPEAT_COUNT); { std::vector elems{0}; std::unique_ptr newmol(op(elems)); CHECK(MolToSmiles(*newmol) == "**"); } { std::vector elems{1}; std::unique_ptr newmol(op(elems)); CHECK(MolToSmiles(*newmol) == "*C*"); } { std::vector elems{3}; std::unique_ptr newmol(op(elems)); CHECK(MolToSmiles(*newmol) == "*CCC*"); } } } TEST_CASE("SRUs directly bound to each other") { SECTION("basics") { auto mol1 = R"CTAB( Mrv2108 09242104182D 0 0 0 0 0 999 V3000 M V30 BEGIN CTAB M V30 COUNTS 6 5 2 0 0 M V30 BEGIN ATOM M V30 1 * -0.75 -3.2917 0 0 M V30 2 C 0.5837 -2.5217 0 0 M V30 3 O 1.9174 -3.2917 0 0 M V30 4 C 3.251 -2.5217 0 0 M V30 5 N 4.5847 -3.2917 0 0 M V30 6 * 5.9184 -2.5217 0 0 M V30 END ATOM M V30 BEGIN BOND M V30 1 1 1 2 M V30 2 1 2 3 M V30 3 1 3 4 M V30 4 1 4 5 M V30 5 1 5 6 M V30 END BOND M V30 BEGIN SGROUP M V30 1 SRU 0 ATOMS=(2 2 3) XBONDS=(2 1 3) BRKXYZ=(9 2.0045 -2.1744 0 2.9285 - M V30 -3.7748 0 0 0 0) BRKXYZ=(9 0.4965 -3.6389 0 -0.4275 -2.0385 0 0 0 0) - M V30 CONNECT=HT LABEL=n M V30 2 SRU 0 ATOMS=(2 4 5) XBONDS=(2 3 5) BRKXYZ=(9 4.6719 -2.1744 0 5.5959 - M V30 -3.7748 0 0 0 0) BRKXYZ=(9 3.1639 -3.6389 0 2.2399 -2.0385 0 0 0 0) - M V30 CONNECT=HT LABEL=n M V30 END SGROUP M V30 END CTAB M END )CTAB"_ctab; REQUIRE(mol1); MolEnumerator::RepeatUnitOp op; op.initFromMol(*mol1); auto vcnts = op.getVariationCounts(); REQUIRE(vcnts.size() == 2); CHECK(vcnts[0] == MolEnumerator::RepeatUnitOp::DEFAULT_REPEAT_COUNT); CHECK(vcnts[1] == MolEnumerator::RepeatUnitOp::DEFAULT_REPEAT_COUNT); { std::vector elems{0, 0}; std::unique_ptr newmol(op(elems)); // FIX: this result is ugly and is fixable by adding // some extra bookkeeping. Somehow doesn't seem // super important at the moment though. CHECK(MolToSmiles(*newmol) == "*.*"); } { std::vector elems{1, 0}; std::unique_ptr newmol(op(elems)); CHECK(MolToSmiles(*newmol) == "*CO*"); } { std::vector elems{0, 1}; std::unique_ptr newmol(op(elems)); CHECK(MolToSmiles(*newmol) == "*CN*"); } { std::vector elems{1, 1}; std::unique_ptr newmol(op(elems)); CHECK(MolToSmiles(*newmol) == "*COCN*"); } { std::vector elems{2, 2}; std::unique_ptr newmol(op(elems)); CHECK(MolToSmiles(*newmol) == "*COCOCNCN*"); } } } TEST_CASE("RepeatUnit with enumerate()", "[MolEnumerator]") { SECTION("basic HT enumeration") { auto mol1 = R"CTAB( ACCLDraw05132106232D 0 0 0 0 0 999 V3000 M V30 BEGIN CTAB M V30 COUNTS 4 3 1 0 0 M V30 BEGIN ATOM M V30 1 C 9.7578 -7.0211 0 0 M V30 2 O 10.7757 -7.6201 0 0 M V30 3 * 11.8037 -7.0378 0 0 M V30 4 * 8.7298 -7.6034 0 0 M V30 END ATOM M V30 BEGIN BOND M V30 1 1 1 2 M V30 2 1 2 3 M V30 3 1 1 4 M V30 END BOND M V30 BEGIN SGROUP M V30 1 SRU 1 ATOMS=(2 2 1) XBONDS=(2 3 2) BRKXYZ=(9 9.24 -7.9 0 9.24 -6.72 - M V30 0 0 0 0) BRKXYZ=(9 11.29 -6.74 0 11.29 -7.92 0 0 0 0) CONNECT=HT - M V30 LABEL=n M V30 END SGROUP M V30 END CTAB M END)CTAB"_ctab; REQUIRE(mol1); MolEnumerator::RepeatUnitOp op; std::vector expected = {"**", "*CO*", "*COCO*", "*COCOCO*"}; auto bundle = MolEnumerator::enumerate(*mol1); CHECK(bundle.size() == MolEnumerator::RepeatUnitOp::DEFAULT_REPEAT_COUNT); for (const auto &molp : bundle.getMols()) { auto smiles = MolToSmiles(*molp); CHECK(std::find(expected.begin(), expected.end(), smiles) != expected.end()); } } } TEST_CASE("RepeatUnit+LINKNODE", "[MolEnumerator]") { SECTION("basics") { auto mol1 = R"CTAB( Mrv2108 09252115192D 0 0 0 0 0 999 V3000 M V30 BEGIN CTAB M V30 COUNTS 7 7 1 0 0 M V30 BEGIN ATOM M V30 1 C 12.719 -9.1518 0 0 M V30 2 O 14.0458 -9.9326 0 0 M V30 3 * 15.3857 -9.1735 0 0 M V30 4 * 11.379 -9.9108 0 0 M V30 5 C 12.7317 -7.6118 0 0 M V30 6 C 12.2558 -6.1472 0 0 M V30 7 C 13.7622 -6.4674 0 0 M V30 END ATOM M V30 BEGIN BOND M V30 1 1 1 2 M V30 2 1 2 3 M V30 3 1 1 4 M V30 4 1 1 5 M V30 5 1 7 6 M V30 6 1 6 5 M V30 7 1 5 7 M V30 END BOND M V30 LINKNODE 1 2 2 7 5 7 6 M V30 BEGIN SGROUP M V30 1 SRU 0 ATOMS=(5 2 1 5 7 6) XBONDS=(2 2 3) BRKXYZ=(9 12.044 -10.2974 0 - M V30 12.044 -8.7593 0 0 0 0) BRKXYZ=(9 14.7161 -8.7854 0 14.7161 -10.3235 0 - M V30 0 0 0) CONNECT=HT LABEL=n M V30 END SGROUP M V30 END CTAB M END )CTAB"_ctab; REQUIRE(mol1); CHECK_THROWS_AS(MolEnumerator::enumerate(*mol1), ValueErrorException); } } TEST_CASE("ladder") { SECTION("basics, no XBCORR") { auto mol1 = R"CTAB( Mrv2108 09262105082D 0 0 0 0 0 999 V3000 M V30 BEGIN CTAB M V30 COUNTS 8 7 1 0 0 M V30 BEGIN ATOM M V30 1 C 11.375 -0.2516 0 0 M V30 2 * 10.0413 -1.0216 0 0 M V30 3 * 10.0413 -2.5617 0 0 M V30 4 C 11.375 -3.3317 0 0 M V30 5 C 12.7087 -2.5617 0 0 M V30 6 N 12.7087 -1.0216 0 0 M V30 7 * 14.0423 -0.2516 0 0 M V30 8 * 14.0423 -3.3317 0 0 M V30 END ATOM M V30 BEGIN BOND M V30 1 1 1 2 M V30 2 1 3 4 M V30 3 1 4 5 M V30 4 1 5 6 M V30 5 1 1 6 M V30 6 1 5 8 M V30 7 1 7 6 M V30 END BOND M V30 BEGIN SGROUP M V30 1 SRU 0 ATOMS=(4 1 4 5 6) XBONDS=(4 1 2 6 7) BRKXYZ=(9 13.2958 0.0956 - M V30 0 13.2958 -3.679 0 0 0 0) - M V30 BRKXYZ=(9 10.8638 -3.8148 0 10.8638 0.2315 0 0 0 0) - M V30 CONNECT=HT LABEL=n M V30 END SGROUP M V30 END CTAB M END )CTAB"_ctab; REQUIRE(mol1); MolEnumerator::RepeatUnitOp op; op.initFromMol(*mol1); auto vcnts = op.getVariationCounts(); REQUIRE(vcnts.size() == 1); CHECK(vcnts[0] == MolEnumerator::RepeatUnitOp::DEFAULT_REPEAT_COUNT); { std::vector elems{0}; std::unique_ptr newmol(op(elems)); REQUIRE(newmol); CHECK(MolToSmiles(*newmol) == "**.**"); } { std::vector elems{1}; std::unique_ptr newmol(op(elems)); REQUIRE(newmol); CHECK(MolToSmiles(*newmol) == "*CC(*)N(*)C*"); } { std::vector elems{2}; std::unique_ptr newmol(op(elems)); REQUIRE(newmol); CHECK(MolToSmiles(*newmol) == "*CC1CN(*)C(*)CN1C*"); } { std::vector elems{3}; std::unique_ptr newmol(op(elems)); REQUIRE(newmol); CHECK(MolToSmiles(*newmol) == "*CC1CN2CC(*)N(*)CC2CN1C*"); } } SECTION("basics, with XBCORR") { auto mol1 = R"CTAB( Mrv2108 09262105082D 0 0 0 0 0 999 V3000 M V30 BEGIN CTAB M V30 COUNTS 8 7 1 0 0 M V30 BEGIN ATOM M V30 1 C 11.375 -0.2516 0 0 M V30 2 * 10.0413 -1.0216 0 0 M V30 3 * 10.0413 -2.5617 0 0 M V30 4 C 11.375 -3.3317 0 0 M V30 5 C 12.7087 -2.5617 0 0 M V30 6 N 12.7087 -1.0216 0 0 M V30 7 * 14.0423 -0.2516 0 0 M V30 8 * 14.0423 -3.3317 0 0 M V30 END ATOM M V30 BEGIN BOND M V30 1 1 1 2 M V30 2 1 3 4 M V30 3 1 4 5 M V30 4 1 5 6 M V30 5 1 1 6 M V30 6 1 5 8 M V30 7 1 7 6 M V30 END BOND M V30 BEGIN SGROUP M V30 1 SRU 0 ATOMS=(4 1 4 5 6) XBONDS=(4 1 2 6 7) XBCORR=(4 1 7 2 6) - M V30 BRKXYZ=(9 13.2958 0.0956 0 13.2958 -3.679 0 0 0 0) - M V30 BRKXYZ=(9 10.8638 -3.8148 0 10.8638 0.2315 0 0 0 0) - M V30 CONNECT=HT LABEL=n M V30 END SGROUP M V30 END CTAB M END )CTAB"_ctab; REQUIRE(mol1); MolEnumerator::RepeatUnitOp op; op.initFromMol(*mol1); auto vcnts = op.getVariationCounts(); REQUIRE(vcnts.size() == 1); CHECK(vcnts[0] == MolEnumerator::RepeatUnitOp::DEFAULT_REPEAT_COUNT); { std::vector elems{0}; std::unique_ptr newmol(op(elems)); REQUIRE(newmol); CHECK(MolToSmiles(*newmol) == "**.**"); } { std::vector elems{1}; std::unique_ptr newmol(op(elems)); REQUIRE(newmol); CHECK(MolToSmiles(*newmol) == "*CC(*)N(*)C*"); } { std::vector elems{2}; std::unique_ptr newmol(op(elems)); REQUIRE(newmol); CHECK(MolToSmiles(*newmol) == "*CC1CC(*)N(*)CN1C*"); } { std::vector elems{3}; std::unique_ptr newmol(op(elems)); REQUIRE(newmol); CHECK(MolToSmiles(*newmol) == "*CC1CC2CC(*)N(*)CN2CN1C*"); } } SECTION("using enumerate with XBCORR") { auto mol1 = R"CTAB( Mrv2108 09262105082D 0 0 0 0 0 999 V3000 M V30 BEGIN CTAB M V30 COUNTS 8 7 1 0 0 M V30 BEGIN ATOM M V30 1 C 11.375 -0.2516 0 0 M V30 2 * 10.0413 -1.0216 0 0 M V30 3 * 10.0413 -2.5617 0 0 M V30 4 C 11.375 -3.3317 0 0 M V30 5 C 12.7087 -2.5617 0 0 M V30 6 N 12.7087 -1.0216 0 0 M V30 7 * 14.0423 -0.2516 0 0 M V30 8 * 14.0423 -3.3317 0 0 M V30 END ATOM M V30 BEGIN BOND M V30 1 1 1 2 M V30 2 1 3 4 M V30 3 1 4 5 M V30 4 1 5 6 M V30 5 1 1 6 M V30 6 1 5 8 M V30 7 1 7 6 M V30 END BOND M V30 BEGIN SGROUP M V30 1 SRU 0 ATOMS=(4 1 4 5 6) XBONDS=(4 1 2 6 7) XBCORR=(4 1 7 2 6) - M V30 0 BRKXYZ=(9 13.2958 0.0956 13.2958 -3.679 0 0 0 0) - M V30 BRKXYZ=(9 10.8638 -3.8148 0 10.8638 0.2315 0 0 0 0) - M V30 CONNECT=HT LABEL=n M V30 END SGROUP M V30 END CTAB M END )CTAB"_ctab; REQUIRE(mol1); MolEnumerator::RepeatUnitOp op; std::vector expected = {"**.**", "*CC(*)N(*)C*", "*CC1CC(*)N(*)CN1C*", "*CC1CC2CC(*)N(*)CN2CN1C*"}; auto bundle = MolEnumerator::enumerate(*mol1); CHECK(bundle.size() == MolEnumerator::RepeatUnitOp::DEFAULT_REPEAT_COUNT); for (const auto &molp : bundle.getMols()) { auto smiles = MolToSmiles(*molp); CHECK(std::find(expected.begin(), expected.end(), smiles) != expected.end()); } } } TEST_CASE("starting from CXSMILES") { SECTION("basic HT enumeration") { auto mol1 = "*-CO-* |$star_e;;;star_e$,Sg:n:2,1::ht|"_smiles; REQUIRE(mol1); MolEnumerator::RepeatUnitOp op; op.initFromMol(*mol1); auto vcnts = op.getVariationCounts(); REQUIRE(vcnts.size() == 1); CHECK(vcnts[0] == MolEnumerator::RepeatUnitOp::DEFAULT_REPEAT_COUNT); { std::vector elems{0}; std::unique_ptr newmol(op(elems)); CHECK(MolToSmiles(*newmol) == "**"); } { std::vector elems{1}; std::unique_ptr newmol(op(elems)); CHECK(MolToSmiles(*newmol) == "*CO*"); } { std::vector elems{3}; std::unique_ptr newmol(op(elems)); CHECK(MolToSmiles(*newmol) == "*COCOCO*"); } } SECTION("basic HH enumeration") { auto mol1 = "*-CO-* |$star_e;;;star_e$,Sg:n:2,1::hh|"_smiles; REQUIRE(mol1); MolEnumerator::RepeatUnitOp op; op.initFromMol(*mol1); auto vcnts = op.getVariationCounts(); REQUIRE(vcnts.size() == 1); CHECK(vcnts[0] == MolEnumerator::RepeatUnitOp::DEFAULT_REPEAT_COUNT); { std::vector elems{3}; std::unique_ptr newmol(op(elems)); CHECK(MolToSmiles(*newmol) == "*COOCCO*"); } } SECTION("non-overlapping multi-HT enumeration reversed example") { auto mol1 = "*-CN[13CH2]OC-* |$star_e;;;;;;star_e$,Sg:n:4,5::ht,Sg:n:1,2::ht|"_smiles; REQUIRE(mol1); MolEnumerator::RepeatUnitOp op; op.initFromMol(*mol1); auto vcnts = op.getVariationCounts(); REQUIRE(vcnts.size() == 2); CHECK(vcnts[0] == MolEnumerator::RepeatUnitOp::DEFAULT_REPEAT_COUNT); CHECK(vcnts[1] == MolEnumerator::RepeatUnitOp::DEFAULT_REPEAT_COUNT); { std::vector elems{0, 0}; std::unique_ptr newmol(op(elems)); CHECK(MolToSmiles(*newmol) == "*[13CH2]*"); } { std::vector elems{1, 0}; std::unique_ptr newmol(op(elems)); CHECK(MolToSmiles(*newmol) == "*CO[13CH2]*"); } { std::vector elems{0, 1}; std::unique_ptr newmol(op(elems)); CHECK(MolToSmiles(*newmol) == "*CN[13CH2]*"); } { std::vector elems{1, 1}; std::unique_ptr newmol(op(elems)); CHECK(MolToSmiles(*newmol) == "*CN[13CH2]OC*"); } { std::vector elems{3, 1}; std::unique_ptr newmol(op(elems)); CHECK(MolToSmiles(*newmol) == "*CN[13CH2]OCOCOC*"); } { std::vector elems{1, 3}; std::unique_ptr newmol(op(elems)); CHECK(MolToSmiles(*newmol) == "*CNCNCN[13CH2]OC*"); } { std::vector elems{2, 2}; std::unique_ptr newmol(op(elems)); CHECK(MolToSmiles(*newmol) == "*CNCN[13CH2]OCOC*"); } } SECTION("ladder basics, no XBCORR") { auto mol1 = "*-CC(-*)N(-*)C-* |$star_e;;;star_e;;star_e;;star_e$,Sg:n:6,1,2,4::ht:4,2:0,6|"_smiles; REQUIRE(mol1); MolEnumerator::RepeatUnitOp op; op.initFromMol(*mol1); auto vcnts = op.getVariationCounts(); REQUIRE(vcnts.size() == 1); CHECK(vcnts[0] == MolEnumerator::RepeatUnitOp::DEFAULT_REPEAT_COUNT); { std::vector elems{0}; std::unique_ptr newmol(op(elems)); REQUIRE(newmol); CHECK(MolToSmiles(*newmol) == "**.**"); } { std::vector elems{1}; std::unique_ptr newmol(op(elems)); REQUIRE(newmol); CHECK(MolToSmiles(*newmol) == "*CC(*)N(*)C*"); } { std::vector elems{2}; std::unique_ptr newmol(op(elems)); REQUIRE(newmol); CHECK(MolToSmiles(*newmol) == "*CC1CN(*)C(*)CN1C*"); } { std::vector elems{3}; std::unique_ptr newmol(op(elems)); REQUIRE(newmol); CHECK(MolToSmiles(*newmol) == "*CC1CN2CC(*)N(*)CC2CN1C*"); } } SECTION("ladder basics, with XBCORR") { auto mol1 = "*-CC(-*)N(-*)C-* |$star_e;;;star_e;;star_e;;star_e$,Sg:n:6,1,2,4::ht:6,0:4,2|"_smiles; REQUIRE(mol1); MolEnumerator::RepeatUnitOp op; op.initFromMol(*mol1); auto vcnts = op.getVariationCounts(); REQUIRE(vcnts.size() == 1); CHECK(vcnts[0] == MolEnumerator::RepeatUnitOp::DEFAULT_REPEAT_COUNT); { std::vector elems{0}; std::unique_ptr newmol(op(elems)); REQUIRE(newmol); CHECK(MolToSmiles(*newmol) == "**.**"); } { std::vector elems{1}; std::unique_ptr newmol(op(elems)); REQUIRE(newmol); CHECK(MolToSmiles(*newmol) == "*CC(*)N(*)C*"); } { std::vector elems{2}; std::unique_ptr newmol(op(elems)); REQUIRE(newmol); CHECK(MolToSmiles(*newmol) == "*CC1CC(*)N(*)CN1C*"); } { std::vector elems{3}; std::unique_ptr newmol(op(elems)); REQUIRE(newmol); CHECK(MolToSmiles(*newmol) == "*CC1CC2CC(*)N(*)CN2CN1C*"); } } } TEST_CASE( "Github #6014: MolEnumerator is not propagating query information to molecules ") { SECTION("basics") { auto m = "O-[CX4]-[CH3] |LN:1:1.5|"_smarts; REQUIRE(m); auto bundle = MolEnumerator::enumerate(*m); CHECK(bundle.size() == 5); auto q0 = bundle[0]; REQUIRE(q0); CHECK(q0->getAtomWithIdx(0)->hasQuery()); CHECK(MolToSmarts(*q0) == "O-[C&X4]-[C&H3]"); CHECK(MolToSmarts(*bundle[1]) == "O-[C&X4]-[C&X4]-[C&H3]"); } } TEST_CASE( "Github #6432: MolEnumerate should clear the reaction properties on its results") { SECTION("basics") { auto m = "COC |LN:1:1.3|"_smarts; REQUIRE(m); auto bundle = MolEnumerator::enumerate(*m); CHECK(bundle.size() == 3); auto q0 = bundle[0]; REQUIRE(q0); for (const auto atom : q0->atoms()) { CHECK(!atom->hasProp(common_properties::reactantAtomIdx)); CHECK(!atom->hasProp(common_properties::reactionMapNum)); CHECK(!atom->hasProp("was_dummy")); } } } TEST_CASE("MolEnumerator should propagate atom properties") { auto mol = "COC1=NNC(*)=C1 |LN:1:1.3|"_smiles; REQUIRE(mol); mol->getAtomWithIdx(6)->setProp("_foo", 6); auto bundle = MolEnumerator::enumerate(*mol); CHECK(bundle.size() == 3); CHECK(bundle[0]->getAtomWithIdx(7)->hasProp("_foo")); CHECK(bundle[1]->getAtomWithIdx(8)->hasProp("_foo")); CHECK(bundle[2]->getAtomWithIdx(9)->hasProp("_foo")); } TEST_CASE("SRU enumeration should adhere to repeat counts #6429") { // these encode the same molecule std::string smiles_template{"FCN(CC)-* |Sg:n:2,5:%s:ht|"}; std::string molblock_template{R"CTAB( RDKit 2D 0 0 0 0 0 0 0 0 0 0999 V3000 M V30 BEGIN CTAB M V30 COUNTS 6 5 1 0 0 M V30 BEGIN ATOM M V30 1 F 2.598076 3.000000 0.000000 0 M V30 2 C 1.299038 2.250000 0.000000 0 M V30 3 N 1.299038 0.750000 0.000000 0 M V30 4 C 2.598076 -0.000000 0.000000 0 M V30 5 C 3.897114 0.750000 0.000000 0 M V30 6 A 0.000000 0.000000 0.000000 0 M V30 END ATOM M V30 BEGIN BOND M V30 1 1 1 2 M V30 2 1 2 3 M V30 3 1 3 4 M V30 4 1 4 5 M V30 5 1 3 6 M V30 END BOND M V30 BEGIN SGROUP M V30 1 SRU 0 ATOMS=(2 3 6) XBONDS=(2 2 3) XBHEAD=(2 2 3) CONNECT=HT - M V30 LABEL=%s M V30 END SGROUP M V30 END CTAB M END)CTAB"}; // test that the right number of repetitions produced std::vector> test_info{ {"", MolEnumerator::RepeatUnitOp::DEFAULT_REPEAT_COUNT}, {"n", MolEnumerator::RepeatUnitOp::DEFAULT_REPEAT_COUNT}, {"hello", MolEnumerator::RepeatUnitOp::DEFAULT_REPEAT_COUNT}, {"23-20", MolEnumerator::RepeatUnitOp::DEFAULT_REPEAT_COUNT}, {"-20", MolEnumerator::RepeatUnitOp::DEFAULT_REPEAT_COUNT}, {"2-4", 3}, {"20", 21}, {"20-20", 1}, }; for (auto &[sru_label, expected_repetitions] : test_info) { std::shared_ptr molFromSmi( SmilesToMol(boost::str(boost::format(smiles_template) % sru_label))); std::shared_ptr molFromMolBlock(MolBlockToMol( boost::str(boost::format(molblock_template) % sru_label))); std::vector test_mols{molFromSmi, molFromMolBlock}; REQUIRE(test_mols[0]); REQUIRE(test_mols[1]); for (auto &mol : test_mols) { auto bundle = MolEnumerator::enumerate(*mol); CHECK(bundle.size() == expected_repetitions); // check that maxPerOperation is still valid size_t maxPerOperation = 1; bundle = MolEnumerator::enumerate(*mol, maxPerOperation); CHECK(bundle.size() == maxPerOperation); } } // check that the right smiles are produced auto mol = "FCN(CC)-* |Sg:n:2,5:2-3:ht|"_smiles; REQUIRE(mol); auto bundle = MolEnumerator::enumerate(*mol); CHECK(MolToSmiles(*bundle[0]) == "*N(CC)N(*)CF"); // repeated 2x CHECK(MolToSmiles(*bundle[1]) == "*N(CC)N(*)N(*)CF"); // repeated 3x }