// // Copyright (C) 2022-2025 Greg Landrum // // @@ All Rights Reserved @@ // This file is part of the RDKit. // The contents are covered by the terms of the BSD license // which is included in the file license.txt, found at the root // of the RDKit source tree. // #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include using namespace RDKit; TEST_CASE("includeRedundantEnvironments") { auto mol = "CC(=O)O"_smiles; REQUIRE(mol); SECTION("basics") { std::unique_ptr> fpgen{ MorganFingerprint::getMorganGenerator(2)}; REQUIRE(fpgen); { std::unique_ptr> fp{ fpgen->getCountFingerprint(*mol)}; REQUIRE(fp); CHECK(fp->getTotalVal() == 8); } // turn on inclusion of redundant bits dynamic_cast(fpgen->getOptions()) ->df_includeRedundantEnvironments = true; { std::unique_ptr> fp{ fpgen->getCountFingerprint(*mol)}; REQUIRE(fp); CHECK(fp->getTotalVal() == 12); } } } TEST_CASE( "github #5838: FP count simulation is not accounted for when additional output is requested") { SECTION("as reported") { auto m = "OCCCCCCN"_smiles; REQUIRE(m); std::unique_ptr> fpgen( TopologicalTorsion::getTopologicalTorsionGenerator()); REQUIRE(fpgen); CHECK(fpgen->getOptions()->df_countSimulation); { AdditionalOutput ao; ao.allocateBitPaths(); ao.allocateAtomToBits(); ao.allocateAtomCounts(); FingerprintFuncArguments args; args.additionalOutput = &ao; std::unique_ptr fp{fpgen->getFingerprint(*m, args)}; REQUIRE(fp); CHECK(fp->getNumOnBits() == ao.bitPaths->size()); std::vector obl; fp->getOnBits(obl); for (const auto bid : obl) { INFO(bid); CHECK(ao.bitPaths->find(bid) != ao.bitPaths->end()); } std::vector atomCounts{1, 2, 3, 4, 4, 3, 2, 1}; CHECK(*ao.atomCounts == atomCounts); std::vector> atomToBits = {{0}, {0, 284, 285}, {0, 284, 285}, {0, 284, 285}, {284, 285, 384}, {284, 285, 384}, {284, 285, 384}, {384}}; CHECK(*ao.atomToBits == atomToBits); } { AdditionalOutput ao; ao.allocateBitPaths(); ao.allocateAtomToBits(); ao.allocateAtomCounts(); FingerprintFuncArguments args; args.additionalOutput = &ao; std::unique_ptr fp{fpgen->getSparseFingerprint(*m, args)}; REQUIRE(fp); CHECK(fp->getNumOnBits() == ao.bitPaths->size()); std::vector obl; fp->getOnBits(obl); for (const auto bid : obl) { INFO(bid); CHECK(ao.bitPaths->find(bid) != ao.bitPaths->end()); } std::vector atomCounts{1, 2, 3, 4, 4, 3, 2, 1}; CHECK(*ao.atomCounts == atomCounts); std::vector> atomToBits = { {1046732804}, {1046732804, 1046733088, 1046733089}, {1046732804, 1046733088, 1046733089}, {1046732804, 1046733088, 1046733089}, {1046733088, 1046733089, 1046733188}, {1046733088, 1046733089, 1046733188}, {1046733088, 1046733089, 1046733188}, {1046733188}}; CHECK(*ao.atomToBits == atomToBits); } } SECTION("morgan") { auto m = "OCCCCCCN"_smiles; REQUIRE(m); std::unique_ptr> fpgen( MorganFingerprint::getMorganGenerator(2)); REQUIRE(fpgen); fpgen->getOptions()->df_countSimulation = true; { AdditionalOutput ao; ao.allocateBitInfoMap(); ao.allocateAtomToBits(); ao.allocateAtomCounts(); FingerprintFuncArguments args; args.additionalOutput = &ao; std::unique_ptr fp{fpgen->getFingerprint(*m, args)}; REQUIRE(fp); CHECK(fp->getNumOnBits() == ao.bitInfoMap->size()); std::vector obl; fp->getOnBits(obl); for (const auto bid : obl) { INFO(bid); CHECK(ao.bitInfoMap->find(bid) != ao.bitInfoMap->end()); } std::vector atomCounts{2, 3, 3, 3, 3, 3, 3, 2}; CHECK(*ao.atomCounts == atomCounts); std::vector> atomToBits = { {888, 1180}, {320, 321, 322, 424, 1892}, {116, 320, 321, 322, 1500, 1501, 1502}, {320, 321, 322, 476, 477, 1500, 1501, 1502}, {320, 321, 322, 476, 477, 1500, 1501, 1502}, {232, 320, 321, 322, 1500, 1501, 1502}, {320, 321, 322, 1216, 1972}, {588, 1876}, }; CHECK(*ao.atomToBits == atomToBits); } { AdditionalOutput ao; ao.allocateBitInfoMap(); ao.allocateAtomToBits(); ao.allocateAtomCounts(); FingerprintFuncArguments args; args.additionalOutput = &ao; std::unique_ptr fp{fpgen->getSparseFingerprint(*m, args)}; REQUIRE(fp); CHECK(fp->getNumOnBits() == ao.bitInfoMap->size()); std::vector obl; fp->getOnBits(obl); // there's an unfortunate bit of information loss happening here // due to the fact that the SparseBitVect uses ints, so we have to // do this test backwards: for (const auto &pr : *ao.bitInfoMap) { INFO(pr.first); CHECK(std::find(obl.begin(), obl.end(), (int)(pr.first)) != obl.end()); } // for (auto i = 0u; i < m->getNumAtoms(); ++i) { // std::cerr << " {"; // std::copy(ao.atomToBits->at(i).begin(), ao.atomToBits->at(i).end(), // std::ostream_iterator(std::cerr, ", ")); // std::cerr << " }," << std::endl; // } std::vector atomCounts{2, 3, 3, 3, 3, 3, 3, 2}; CHECK(*ao.atomCounts == atomCounts); std::vector> atomToBits = { {1845699452, 3458649244}, {391602504, 391602505, 391602506, 3018396076, 3209717616}, {391602504, 391602505, 391602506, 1746877920, 1746877921, 1746877922, 3245328504}, {391602504, 391602505, 391602506, 647852508, 647852509, 1746877920, 1746877921, 1746877922}, {391602504, 391602505, 391602506, 647852508, 647852509, 1746877920, 1746877921, 1746877922}, {391602504, 391602505, 391602506, 1746877920, 1746877921, 1746877922, 4225765616}, {10672060, 391602504, 391602505, 391602506, 3149364416}, {1781206876, 3391828556}}; CHECK(*ao.atomToBits == atomToBits); } } } TEST_CASE("numBitsPerFeature") { auto mol = "CC(=O)O"_smiles; REQUIRE(mol); SECTION("basics") { std::unique_ptr> fpgen{ MorganFingerprint::getMorganGenerator(2)}; REQUIRE(fpgen); { std::unique_ptr> fp{ fpgen->getCountFingerprint(*mol)}; REQUIRE(fp); CHECK(fp->getTotalVal() == 8); } // turn on multiple bits per feature: fpgen->getOptions()->d_numBitsPerFeature = 2; { std::unique_ptr> fp{ fpgen->getCountFingerprint(*mol)}; REQUIRE(fp); CHECK(fp->getTotalVal() == 16); } CHECK(fpgen->infoString().find("bitsPerFeature=2") != std::string::npos); } } TEST_CASE("multithreaded fp generation") { std::vector smis = {"CC1CCC1", "CCC1CCC1", "CCCC1CCC1", "CCCC1CC(O)C1", "CCCC1CC(CO)C1"}; std::vector> ov; std::vector mols; for (const auto &smi : smis) { ov.emplace_back(SmilesToMol(smi)); REQUIRE(ov.back()); mols.push_back(ov.back().get()); } for (auto i = 0u; i < 6; ++i) { auto n = mols.size(); for (auto j = 0u; j < n; ++j) { mols.push_back(mols[j]); } } std::unique_ptr> fpgen{ MorganFingerprint::getMorganGenerator(2)}; REQUIRE(fpgen); SECTION("getFingerprints") { std::vector> ovs; FingerprintFuncArguments args; for (const auto mp : mols) { ovs.emplace_back(fpgen->getFingerprint(*mp, args)); } mols.push_back(nullptr); // make sure we handle this properly auto mtvs1 = fpgen->getFingerprints(mols, 1); CHECK(mtvs1.size() == ovs.size() + 1); for (auto fpi = 0u; fpi < ovs.size(); ++fpi) { CHECK(*ovs[fpi] == *mtvs1[fpi]); } CHECK(mtvs1.back().get() == nullptr); #ifdef RDK_BUILD_THREADSAFE_SSS auto mtvs4 = fpgen->getFingerprints(mols, 4); CHECK(mtvs4.size() == ovs.size() + 1); for (auto fpi = 0u; fpi < ovs.size(); ++fpi) { CHECK(*ovs[fpi] == *mtvs4[fpi]); } CHECK(mtvs4.back().get() == nullptr); #endif } SECTION("getSparseFingerprints") { std::vector> ovs; FingerprintFuncArguments args; for (const auto mp : mols) { ovs.emplace_back(fpgen->getSparseFingerprint(*mp, args)); } mols.push_back(nullptr); // make sure we handle this properly auto mtvs1 = fpgen->getSparseFingerprints(mols, 1); CHECK(mtvs1.size() == ovs.size() + 1); for (auto fpi = 0u; fpi < ovs.size(); ++fpi) { CHECK(*ovs[fpi] == *mtvs1[fpi]); } CHECK(mtvs1.back().get() == nullptr); #ifdef RDK_BUILD_THREADSAFE_SSS auto mtvs4 = fpgen->getSparseFingerprints(mols, 4); CHECK(mtvs4.size() == ovs.size() + 1); for (auto fpi = 0u; fpi < ovs.size(); ++fpi) { CHECK(*ovs[fpi] == *mtvs4[fpi]); } CHECK(mtvs4.back().get() == nullptr); #endif } SECTION("getCountFingerprints") { std::vector>> ovs; FingerprintFuncArguments args; for (const auto mp : mols) { ovs.emplace_back(fpgen->getCountFingerprint(*mp, args)); } mols.push_back(nullptr); // make sure we handle this properly auto mtvs1 = fpgen->getCountFingerprints(mols, 1); CHECK(mtvs1.size() == ovs.size() + 1); for (auto fpi = 0u; fpi < ovs.size(); ++fpi) { CHECK(*ovs[fpi] == *mtvs1[fpi]); } CHECK(mtvs1.back().get() == nullptr); #ifdef RDK_BUILD_THREADSAFE_SSS auto mtvs4 = fpgen->getCountFingerprints(mols, 4); CHECK(mtvs4.size() == ovs.size() + 1); for (auto fpi = 0u; fpi < ovs.size(); ++fpi) { CHECK(*ovs[fpi] == *mtvs4[fpi]); } CHECK(mtvs4.back().get() == nullptr); #endif } SECTION("getSparseCountFingerprints") { std::vector>> ovs; FingerprintFuncArguments args; for (const auto mp : mols) { ovs.emplace_back(fpgen->getSparseCountFingerprint(*mp, args)); } mols.push_back(nullptr); // make sure we handle this properly auto mtvs1 = fpgen->getSparseCountFingerprints(mols, 1); CHECK(mtvs1.size() == ovs.size() + 1); for (auto fpi = 0u; fpi < ovs.size(); ++fpi) { CHECK(*ovs[fpi] == *mtvs1[fpi]); } CHECK(mtvs1.back().get() == nullptr); #ifdef RDK_BUILD_THREADSAFE_SSS auto mtvs4 = fpgen->getSparseCountFingerprints(mols, 4); CHECK(mtvs4.size() == ovs.size() + 1); for (auto fpi = 0u; fpi < ovs.size(); ++fpi) { CHECK(*ovs[fpi] == *mtvs4[fpi]); } CHECK(mtvs4.back().get() == nullptr); #endif } } TEST_CASE("countBounds edge cases") { auto mol = "CC"_smiles; REQUIRE(mol); SECTION("just zeros") { std::unique_ptr> fpGenerator( MorganFingerprint::getMorganGenerator(2)); REQUIRE(fpGenerator); fpGenerator->getOptions()->df_countSimulation = true; fpGenerator->getOptions()->d_countBounds = {0, 0, 0, 0}; std::unique_ptr fp(fpGenerator->getFingerprint(*mol)); REQUIRE(fp); CHECK(fp->getNumBits() == 2048); } SECTION("empty bounds") { std::unique_ptr> fpGenerator( MorganFingerprint::getMorganGenerator(2)); REQUIRE(fpGenerator); fpGenerator->getOptions()->df_countSimulation = true; fpGenerator->getOptions()->d_countBounds.clear(); REQUIRE_THROWS_AS(fpGenerator->getFingerprint(*mol), ValueErrorException); } SECTION("really big") { std::unique_ptr> fpGenerator( MorganFingerprint::getMorganGenerator(2)); REQUIRE(fpGenerator); fpGenerator->getOptions()->df_countSimulation = true; fpGenerator->getOptions()->d_countBounds = std::vector((1 << 11) + 1, 0); REQUIRE_THROWS_AS(fpGenerator->getFingerprint(*mol), ValueErrorException); } SECTION("edge case") { std::unique_ptr> fpGenerator( MorganFingerprint::getMorganGenerator(2)); REQUIRE(fpGenerator); fpGenerator->getOptions()->df_countSimulation = true; fpGenerator->getOptions()->d_countBounds = std::vector(2047, 0); std::unique_ptr fp(fpGenerator->getFingerprint(*mol)); REQUIRE(fp); CHECK(fp->getNumBits() == 2048); } } TEST_CASE("Github #7986: Morgan fingerprints, chirality, and radius") { // chiral, not obvious until radius 2: auto mol1 = "FC[C@H](F)CCl.FC[C@@H](F)CCl"_smiles; REQUIRE(mol1); // not chiral, but flagged as being so: auto mol2 = "FC[C@H](F)CF.FC[C@@H](F)CF"_smiles; REQUIRE(mol2); // set bogus chirality tags mol2->getAtomWithIdx(2)->setChiralTag(Atom::CHI_TETRAHEDRAL_CW); mol2->getAtomWithIdx(8)->setChiralTag(Atom::CHI_TETRAHEDRAL_CCW); MorganFingerprint::MorganArguments args; args.df_includeChirality = true; args.df_includeRedundantEnvironments = true; SECTION("radius=1") { args.d_radius = 1; std::unique_ptr> fpGenerator( MorganFingerprint::getMorganGenerator(args)); REQUIRE(fpGenerator); AdditionalOutput ao; ao.allocateBitInfoMap(); ao.allocateAtomToBits(); FingerprintFuncArguments fargs; fargs.additionalOutput = &ao; std::unique_ptr> fp( fpGenerator->getSparseCountFingerprint(*mol1, fargs)); REQUIRE(fp); // the actual chiral centers... we don't see these with radius=1: CHECK(ao.atomToBits->at(2) == ao.atomToBits->at(8)); // neighboring atoms, these are always the same CHECK(ao.atomToBits->at(4) == ao.atomToBits->at(10)); } SECTION("radius=2") { args.d_radius = 2; std::unique_ptr> fpGenerator( MorganFingerprint::getMorganGenerator(args)); REQUIRE(fpGenerator); AdditionalOutput ao; ao.allocateBitInfoMap(); ao.allocateAtomToBits(); FingerprintFuncArguments fargs; fargs.additionalOutput = &ao; std::unique_ptr> fp( fpGenerator->getSparseCountFingerprint(*mol1, fargs)); REQUIRE(fp); // the actual chiral centers... now we see them: CHECK(ao.atomToBits->at(2) != ao.atomToBits->at(8)); // neighboring atoms, these are always the same CHECK(ao.atomToBits->at(4) == ao.atomToBits->at(10)); fp = fpGenerator->getSparseCountFingerprint(*mol2, fargs); REQUIRE(fp); // no chirality here, so we should see the same bits: CHECK(ao.atomToBits->at(2) == ao.atomToBits->at(8)); CHECK(ao.atomToBits->at(4) == ao.atomToBits->at(10)); } } TEST_CASE("github #6679: suspicious value for atom pair code calculation") { SECTION("invariants") { std::vector> data = { {"C[I]", {33, 449}}, {"C[Te]", {33, 417}}, {"C[Sb]", {33, 385}}, {"C[Sn]", {33, 481}}, {"C[Xe]", {33, 481}}, {"C[Li]", {33, 481}}, }; auto invg = AtomPair::AtomPairAtomInvGenerator(); for (const auto &pr : data) { auto mol = v2::SmilesParse::MolFromSmiles(pr.first); REQUIRE(mol); std::unique_ptr invs{invg.getAtomInvariants(*mol)}; INFO(pr.first); CHECK(*invs == pr.second); } } SECTION("fingerprints") { std::vector> data = { {"C[I]", 7918328}, {"C[Te]", 7918456}, {"C[Sb]", 7918584}, {"C[Sn]", 7918200}, {"C[Xe]", 7918200}, {"C[Li]", 7918200}, }; std::unique_ptr> fpg{ AtomPair::getAtomPairGenerator()}; for (const auto &pr : data) { auto mol = v2::SmilesParse::MolFromSmiles(pr.first); REQUIRE(mol); std::unique_ptr fp{fpg->getSparseFingerprint(*mol)}; INFO(pr.first); CHECK(fp->getNumOnBits() == 1); CHECK((*fp)[pr.second]); } } } TEST_CASE("atomsPerBit") { auto mol = "c1ccccn1"_smiles; REQUIRE(mol); AdditionalOutput ao; ao.allocateAtomsPerBit(); REQUIRE(ao.atomsPerBit); FingerprintFuncArguments args; args.additionalOutput = &ao; SECTION("Morgan") { unsigned radius = 2; std::unique_ptr> fpg( MorganFingerprint::getMorganGenerator(radius)); REQUIRE(fpg); auto fp = fpg->getFingerprint(*mol, args); auto &apb = *ao.atomsPerBit; REQUIRE(apb.size() == fp->getNumOnBits()); REQUIRE(apb[378].size() == 1); CHECK(apb[378][0] == std::vector({5})); REQUIRE(apb[1155].size() == 2); CHECK(apb[1155][0] == std::vector({3, 1, 2, 4, 5})); } SECTION("RDKit") { unsigned minPath = 1; unsigned maxPath = 5; std::unique_ptr> fpg( RDKitFP::getRDKitFPGenerator(minPath, maxPath)); REQUIRE(fpg); auto fp = fpg->getFingerprint(*mol, args); auto &apb = *ao.atomsPerBit; REQUIRE(apb.size() == fp->getNumOnBits()); REQUIRE(apb[104].size() == 2); REQUIRE(apb[104][0] == std::vector({0, 1, 2, 4, 5})); } SECTION("AP") { std::unique_ptr> fpg( AtomPair::getAtomPairGenerator()); REQUIRE(fpg); auto fp = fpg->getFingerprint(*mol, args); auto &apb = *ao.atomsPerBit; REQUIRE(apb.size() == fp->getNumOnBits()); REQUIRE(apb[1244].size() == 4); CHECK(apb[1244][0] == std::vector({0, 2})); } SECTION("TT") { std::unique_ptr> fpg( TopologicalTorsion::getTopologicalTorsionGenerator()); REQUIRE(fpg); auto fp = fpg->getFingerprint(*mol, args); auto &apb = *ao.atomsPerBit; REQUIRE(apb.size() == fp->getNumOnBits()); REQUIRE(apb[140].size() == 2); REQUIRE(apb[140][0] == std::vector({2, 1, 0, 5})); } }