mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-04 21:54:27 +08:00
946 lines
33 KiB
C++
946 lines
33 KiB
C++
//
|
|
// Copyright (C) 2019-2025 Greg Landrum
|
|
//
|
|
// @@ All Rights Reserved @@
|
|
// This file is part of the RDKit.
|
|
// The contents are covered by the terms of the BSD license
|
|
// which is included in the file license.txt, found at the root
|
|
// of the RDKit source tree.
|
|
//
|
|
|
|
#include <catch2/catch_all.hpp>
|
|
|
|
#include <memory>
|
|
#include <RDGeneral/test.h>
|
|
#include <GraphMol/RDKitBase.h>
|
|
#include <GraphMol/MolBundle.h>
|
|
#include <GraphMol/SmilesParse/SmilesParse.h>
|
|
#include <GraphMol/FileParsers/FileParsers.h>
|
|
#include <GraphMol/Fingerprints/Fingerprints.h>
|
|
#include <GraphMol/Fingerprints/MorganFingerprints.h>
|
|
#include <RDGeneral/Exceptions.h>
|
|
#include <GraphMol/Fingerprints/RDKitFPGenerator.h>
|
|
#include <GraphMol/Fingerprints/MorganGenerator.h>
|
|
#include <GraphMol/Fingerprints/TopologicalTorsionGenerator.h>
|
|
#include <GraphMol/Fingerprints/AtomPairGenerator.h>
|
|
#include <GraphMol/Substruct/SubstructMatch.h>
|
|
#include <DataStructs/ExplicitBitVect.h>
|
|
#include <DataStructs/BitOps.h>
|
|
#include <string>
|
|
|
|
using namespace RDKit;
|
|
|
|
TEST_CASE("Github 2051", "[patternfp][bug]") {
|
|
auto mol = "CCC1CC1"_smiles;
|
|
std::unique_ptr<ExplicitBitVect> mfp(PatternFingerprintMol(*mol));
|
|
|
|
REQUIRE(mfp);
|
|
SECTION("basics1") {
|
|
auto qmol = "**"_smarts;
|
|
std::unique_ptr<ExplicitBitVect> qfp(PatternFingerprintMol(*qmol));
|
|
REQUIRE(qfp);
|
|
|
|
CHECK(AllProbeBitsMatch(*qfp, *mfp));
|
|
}
|
|
SECTION("basics2") {
|
|
auto qmol = "*"_smarts;
|
|
std::unique_ptr<ExplicitBitVect> qfp(PatternFingerprintMol(*qmol));
|
|
REQUIRE(qfp);
|
|
CHECK(AllProbeBitsMatch(*qfp, *mfp));
|
|
}
|
|
}
|
|
|
|
TEST_CASE("Github 2614", "[patternfp][bug]") {
|
|
SECTION("basics") {
|
|
auto mol =
|
|
"F[P-](F)(F)(F)(F)F.F[P-](F)(F)(F)(F)F.F[P-](F)(F)(F)(F)F.F[P-](F)(F)(F)(F)F.F[P-](F)(F)(F)(F)F.F[P-](F)(F)(F)(F)F.F[P-](F)(F)(F)(F)F.F[P-](F)(F)(F)(F)F.F[P-](F)(F)(F)(F)F.c1ccc2ccccc2c1"_smiles;
|
|
REQUIRE(mol);
|
|
std::unique_ptr<ExplicitBitVect> mfp(PatternFingerprintMol(*mol));
|
|
REQUIRE(mfp);
|
|
auto query = "c1ccc2ccccc2c1"_smiles;
|
|
REQUIRE(query);
|
|
std::unique_ptr<ExplicitBitVect> qfp(PatternFingerprintMol(*query));
|
|
REQUIRE(qfp);
|
|
CHECK(AllProbeBitsMatch(*qfp, *mfp));
|
|
}
|
|
}
|
|
|
|
TEST_CASE("Github 1761", "[patternfp][bug]") {
|
|
SECTION("throw ValueErrorException") {
|
|
auto mol = "CCC1CC1"_smiles;
|
|
try {
|
|
RDKit::MorganFingerprints::getHashedFingerprint(*mol, 0, 0);
|
|
FAIL("Expected ValueErrorException");
|
|
} catch (const ValueErrorException &e) {
|
|
REQUIRE(std::string(e.what()) == "nBits can not be zero");
|
|
}
|
|
}
|
|
}
|
|
|
|
TEST_CASE("RDKit bits per feature", "[fpgenerator][rdkit]") {
|
|
auto m1 = "CCCO"_smiles;
|
|
REQUIRE(m1);
|
|
SECTION("defaults") {
|
|
unsigned int minPath = 1;
|
|
unsigned int maxPath = 2;
|
|
std::unique_ptr<FingerprintGenerator<std::uint64_t>> fpGenerator(
|
|
RDKitFP::getRDKitFPGenerator<std::uint64_t>(minPath, maxPath));
|
|
REQUIRE(fpGenerator);
|
|
std::unique_ptr<ExplicitBitVect> fp(fpGenerator->getFingerprint(*m1));
|
|
REQUIRE(fp);
|
|
CHECK(fp->getNumBits() == 2048);
|
|
CHECK(fp->getNumOnBits() == 8);
|
|
CHECK(fpGenerator->infoString().find("bitsPerFeature=2") !=
|
|
std::string::npos);
|
|
}
|
|
SECTION("change numBitsPerFeature") {
|
|
unsigned int minPath = 1;
|
|
unsigned int maxPath = 2;
|
|
std::unique_ptr<FingerprintGenerator<std::uint64_t>> fpGenerator(
|
|
RDKitFP::getRDKitFPGenerator<std::uint64_t>(minPath, maxPath));
|
|
REQUIRE(fpGenerator);
|
|
fpGenerator->getOptions()->d_numBitsPerFeature = 1;
|
|
std::unique_ptr<ExplicitBitVect> fp(fpGenerator->getFingerprint(*m1));
|
|
REQUIRE(fp);
|
|
CHECK(fp->getNumBits() == 2048);
|
|
CHECK(fp->getNumOnBits() == 4);
|
|
CHECK(fpGenerator->infoString().find("bitsPerFeature=1") !=
|
|
std::string::npos);
|
|
}
|
|
}
|
|
|
|
TEST_CASE("pattern fingerprints for MolBundles", "[patternfp]") {
|
|
SECTION("basics") {
|
|
boost::shared_ptr<ROMol> q1{SmilesToMol("OCCO")};
|
|
REQUIRE(q1);
|
|
boost::shared_ptr<ROMol> q2{SmilesToMol("OCCCO")};
|
|
REQUIRE(q2);
|
|
std::unique_ptr<ExplicitBitVect> pfp1{PatternFingerprintMol(*q1)};
|
|
REQUIRE(pfp1);
|
|
std::unique_ptr<ExplicitBitVect> pfp2{PatternFingerprintMol(*q2)};
|
|
REQUIRE(pfp2);
|
|
|
|
MolBundle bundle;
|
|
bundle.addMol(q1);
|
|
bundle.addMol(q2);
|
|
std::unique_ptr<ExplicitBitVect> pfp{PatternFingerprintMol(bundle)};
|
|
REQUIRE(pfp);
|
|
CHECK(((*pfp1) & (*pfp2)).getNumOnBits() > 0);
|
|
CHECK(((*pfp1) & (*pfp2)).getNumOnBits() == pfp->getNumOnBits());
|
|
CHECK(((*pfp1) & (*pfp2)) == *pfp);
|
|
}
|
|
}
|
|
|
|
TEST_CASE("MorganGenerator bit info", "[fpgenerator][morgan]") {
|
|
auto m1 = "CCC(CC)CO"_smiles;
|
|
REQUIRE(m1);
|
|
unsigned radius = 1;
|
|
std::unique_ptr<FingerprintGenerator<std::uint64_t>> fpGenerator(
|
|
MorganFingerprint::getMorganGenerator<std::uint64_t>(radius));
|
|
REQUIRE(fpGenerator);
|
|
const std::vector<std::uint32_t> *fromAtoms = nullptr;
|
|
const std::vector<std::uint32_t> *ignoreAtoms = nullptr;
|
|
const int confId = -1;
|
|
|
|
SECTION("folded bitInfoMap") {
|
|
AdditionalOutput::bitInfoMapType expected = {
|
|
{1, {{2, 0}}}, {80, {{1, 0}, {3, 0}, {5, 0}}}, {294, {{0, 1}, {4, 1}}}};
|
|
|
|
{
|
|
AdditionalOutput ao;
|
|
ao.allocateBitInfoMap();
|
|
std::unique_ptr<ExplicitBitVect> fp(fpGenerator->getFingerprint(
|
|
*m1, fromAtoms, ignoreAtoms, confId, &ao));
|
|
CHECK(fp->getNumOnBits() == ao.bitInfoMap->size());
|
|
|
|
for (const auto &elem : expected) {
|
|
CHECK((*ao.bitInfoMap)[elem.first] == elem.second);
|
|
CHECK(fp->getBit(elem.first));
|
|
}
|
|
}
|
|
{
|
|
AdditionalOutput ao;
|
|
ao.allocateBitInfoMap();
|
|
std::unique_ptr<SparseIntVect<std::uint32_t>> fp(
|
|
fpGenerator->getCountFingerprint(*m1, fromAtoms, ignoreAtoms, confId,
|
|
&ao));
|
|
CHECK(fp->getNonzeroElements().size() == ao.bitInfoMap->size());
|
|
|
|
for (const auto &elem : expected) {
|
|
CHECK((*ao.bitInfoMap)[elem.first] == elem.second);
|
|
CHECK(fp->getVal(elem.first));
|
|
}
|
|
}
|
|
}
|
|
|
|
SECTION("folded atomToBits atomCounts") {
|
|
AdditionalOutput::atomToBitsType expected1 = {
|
|
{1057, 294}, {80, 1544}, {1, 1420}, {80, 1544},
|
|
{1057, 294}, {80, 482}, {807, 222}};
|
|
AdditionalOutput::atomCountsType expected2 = {2, 2, 2, 2, 2, 2, 2};
|
|
|
|
{
|
|
AdditionalOutput ao;
|
|
ao.allocateAtomCounts();
|
|
ao.allocateAtomToBits();
|
|
std::unique_ptr<ExplicitBitVect> fp(fpGenerator->getFingerprint(
|
|
*m1, fromAtoms, ignoreAtoms, confId, &ao));
|
|
CHECK(*ao.atomToBits == expected1);
|
|
CHECK(*ao.atomCounts == expected2);
|
|
}
|
|
{
|
|
AdditionalOutput ao;
|
|
ao.allocateAtomCounts();
|
|
ao.allocateAtomToBits();
|
|
std::unique_ptr<SparseIntVect<std::uint32_t>> fp(
|
|
fpGenerator->getCountFingerprint(*m1, fromAtoms, ignoreAtoms, confId,
|
|
&ao));
|
|
CHECK(*ao.atomToBits == expected1);
|
|
CHECK(*ao.atomCounts == expected2);
|
|
}
|
|
}
|
|
|
|
SECTION("unfolded bitInfoMap") {
|
|
AdditionalOutput::bitInfoMapType expected = {
|
|
{2245273601, {{2, 0}}},
|
|
{2245384272, {{1, 0}, {3, 0}, {5, 0}}},
|
|
{3542456614, {{0, 1}, {4, 1}}}};
|
|
{
|
|
AdditionalOutput ao;
|
|
ao.allocateBitInfoMap();
|
|
std::unique_ptr<SparseBitVect> fp(fpGenerator->getSparseFingerprint(
|
|
*m1, fromAtoms, ignoreAtoms, confId, &ao));
|
|
CHECK(fp->getNumOnBits() == ao.bitInfoMap->size());
|
|
|
|
for (const auto &elem : expected) {
|
|
CHECK((*ao.bitInfoMap)[elem.first] == elem.second);
|
|
CHECK(fp->getBit(elem.first));
|
|
}
|
|
}
|
|
{
|
|
AdditionalOutput ao;
|
|
ao.allocateBitInfoMap();
|
|
std::unique_ptr<SparseIntVect<std::uint64_t>> fp(
|
|
fpGenerator->getSparseCountFingerprint(*m1, fromAtoms, ignoreAtoms,
|
|
confId, &ao));
|
|
CHECK(fp->getNonzeroElements().size() == ao.bitInfoMap->size());
|
|
|
|
for (const auto &elem : expected) {
|
|
CHECK((*ao.bitInfoMap)[elem.first] == elem.second);
|
|
CHECK(fp->getVal(elem.first));
|
|
}
|
|
}
|
|
}
|
|
SECTION("unfolded atomToBits atomCounts") {
|
|
AdditionalOutput::atomToBitsType expected1 = {
|
|
{2246728737, 3542456614}, {2245384272, 1506563592},
|
|
{2245273601, 3098934668}, {2245384272, 1506563592},
|
|
{2246728737, 3542456614}, {2245384272, 4022716898},
|
|
{864662311, 1535166686}};
|
|
AdditionalOutput::atomCountsType expected2 = {2, 2, 2, 2, 2, 2, 2};
|
|
|
|
{
|
|
AdditionalOutput ao;
|
|
ao.allocateAtomCounts();
|
|
ao.allocateAtomToBits();
|
|
std::unique_ptr<SparseBitVect> fp(fpGenerator->getSparseFingerprint(
|
|
*m1, fromAtoms, ignoreAtoms, confId, &ao));
|
|
CHECK(*ao.atomToBits == expected1);
|
|
CHECK(*ao.atomCounts == expected2);
|
|
}
|
|
{
|
|
AdditionalOutput ao;
|
|
ao.allocateAtomCounts();
|
|
ao.allocateAtomToBits();
|
|
std::unique_ptr<SparseIntVect<std::uint64_t>> fp(
|
|
fpGenerator->getSparseCountFingerprint(*m1, fromAtoms, ignoreAtoms,
|
|
confId, &ao));
|
|
CHECK(*ao.atomToBits == expected1);
|
|
CHECK(*ao.atomCounts == expected2);
|
|
}
|
|
}
|
|
}
|
|
|
|
TEST_CASE("RDKitGenerator bit info", "[fpgenerator][RDKit]") {
|
|
auto m1 = "CCCO"_smiles;
|
|
REQUIRE(m1);
|
|
unsigned int minPath = 1;
|
|
unsigned int maxPath = 3;
|
|
std::unique_ptr<FingerprintGenerator<std::uint64_t>> fpGenerator(
|
|
RDKitFP::getRDKitFPGenerator<std::uint64_t>(minPath, maxPath));
|
|
REQUIRE(fpGenerator);
|
|
const std::vector<std::uint32_t> *fromAtoms = nullptr;
|
|
const std::vector<std::uint32_t> *ignoreAtoms = nullptr;
|
|
const int confId = -1;
|
|
|
|
SECTION("folded bitInfo") {
|
|
// clang-format off
|
|
AdditionalOutput::bitPathsType expected = {
|
|
{562, {{2}}},
|
|
{709, {{0, 1}}},
|
|
{1118, {{0, 1, 2}}},
|
|
{1183, {{1, 2}}},
|
|
{1233, {{0, 1, 2}}},
|
|
{1308, {{0}, {1}}},
|
|
{1339, {{2}}},
|
|
{1728, {{1, 2}}},
|
|
{1772, {{0}, {1}}},
|
|
{1813, {{0, 1}}},
|
|
};
|
|
// clang-format on
|
|
|
|
{
|
|
AdditionalOutput ao;
|
|
ao.allocateBitPaths();
|
|
std::unique_ptr<ExplicitBitVect> fp(fpGenerator->getFingerprint(
|
|
*m1, fromAtoms, ignoreAtoms, confId, &ao));
|
|
|
|
CHECK(*ao.bitPaths == expected);
|
|
}
|
|
{
|
|
AdditionalOutput ao;
|
|
ao.allocateBitPaths();
|
|
std::unique_ptr<SparseIntVect<std::uint32_t>> fp(
|
|
fpGenerator->getCountFingerprint(*m1, fromAtoms, ignoreAtoms, confId,
|
|
&ao));
|
|
CHECK(*ao.bitPaths == expected);
|
|
}
|
|
}
|
|
|
|
SECTION("folded atomToBits atomCounts") {
|
|
AdditionalOutput::atomToBitsType expected1 = {
|
|
{1308, 1772, 1813, 709, 1233, 1118},
|
|
{1308, 1772, 1813, 709, 1728, 1183, 1233, 1118},
|
|
{1308, 1772, 1339, 562, 1813, 709, 1728, 1183, 1233, 1118},
|
|
{1339, 562, 1728, 1183, 1233, 1118}};
|
|
AdditionalOutput::atomCountsType expected2 = {6, 10, 10, 6};
|
|
|
|
{
|
|
AdditionalOutput ao;
|
|
ao.allocateAtomCounts();
|
|
ao.allocateAtomToBits();
|
|
std::unique_ptr<ExplicitBitVect> fp(fpGenerator->getFingerprint(
|
|
*m1, fromAtoms, ignoreAtoms, confId, &ao));
|
|
CHECK(*ao.atomToBits == expected1);
|
|
CHECK(*ao.atomCounts == expected2);
|
|
}
|
|
{
|
|
AdditionalOutput ao;
|
|
ao.allocateAtomCounts();
|
|
ao.allocateAtomToBits();
|
|
std::unique_ptr<SparseIntVect<std::uint32_t>> fp(
|
|
fpGenerator->getCountFingerprint(*m1, fromAtoms, ignoreAtoms, confId,
|
|
&ao));
|
|
CHECK(*ao.atomToBits == expected1);
|
|
CHECK(*ao.atomCounts == expected2);
|
|
}
|
|
}
|
|
|
|
SECTION("unfolded bitInfo") {
|
|
// clang-format off
|
|
AdditionalOutput::bitPathsType expected = {
|
|
{54413874, {{2}}},
|
|
{257418334, {{0, 1, 2}}},
|
|
{986785516, {{0}, {1}}},
|
|
{1135572127, {{1, 2}}},
|
|
{1433230021, {{0, 1}}},
|
|
{1524090560, {{1, 2}}},
|
|
{1940446997, {{0, 1}}},
|
|
{3977409745, {{0, 1, 2}}},
|
|
{4274652475, {{2}}},
|
|
{4275705116, {{0}, {1}}},
|
|
};
|
|
// clang-format on
|
|
|
|
{
|
|
AdditionalOutput ao;
|
|
ao.allocateBitPaths();
|
|
std::unique_ptr<SparseBitVect> fp(fpGenerator->getSparseFingerprint(
|
|
*m1, fromAtoms, ignoreAtoms, confId, &ao));
|
|
CHECK(*ao.bitPaths == expected);
|
|
}
|
|
{
|
|
AdditionalOutput ao;
|
|
ao.allocateBitPaths();
|
|
std::unique_ptr<SparseIntVect<std::uint64_t>> fp(
|
|
fpGenerator->getSparseCountFingerprint(*m1, fromAtoms, ignoreAtoms,
|
|
confId, &ao));
|
|
CHECK(*ao.bitPaths == expected);
|
|
}
|
|
}
|
|
|
|
SECTION("unfolded atomToBits atomCounts") {
|
|
AdditionalOutput::atomToBitsType expected1 = {
|
|
{4275705116, 986785516, 1940446997, 1433230021, 3977409745, 257418334},
|
|
{4275705116, 986785516, 1940446997, 1433230021, 1524090560, 1135572127,
|
|
3977409745, 257418334},
|
|
{4275705116, 986785516, 4274652475, 54413874, 1940446997, 1433230021,
|
|
1524090560, 1135572127, 3977409745, 257418334},
|
|
{4274652475, 54413874, 1524090560, 1135572127, 3977409745, 257418334}};
|
|
AdditionalOutput::atomCountsType expected2 = {6, 10, 10, 6};
|
|
|
|
{
|
|
AdditionalOutput ao;
|
|
ao.allocateAtomCounts();
|
|
ao.allocateAtomToBits();
|
|
std::unique_ptr<SparseBitVect> fp(fpGenerator->getSparseFingerprint(
|
|
*m1, fromAtoms, ignoreAtoms, confId, &ao));
|
|
CHECK(*ao.atomToBits == expected1);
|
|
CHECK(*ao.atomCounts == expected2);
|
|
}
|
|
{
|
|
AdditionalOutput ao;
|
|
ao.allocateAtomCounts();
|
|
ao.allocateAtomToBits();
|
|
std::unique_ptr<SparseIntVect<std::uint64_t>> fp(
|
|
fpGenerator->getSparseCountFingerprint(*m1, fromAtoms, ignoreAtoms,
|
|
confId, &ao));
|
|
CHECK(*ao.atomToBits == expected1);
|
|
CHECK(*ao.atomCounts == expected2);
|
|
}
|
|
}
|
|
}
|
|
|
|
TEST_CASE("TopologicalTorsionGenerator bit info", "[fpgenerator][TT]") {
|
|
auto m1 = "CCCCS"_smiles;
|
|
REQUIRE(m1);
|
|
std::unique_ptr<FingerprintGenerator<std::uint64_t>> fpGenerator(
|
|
TopologicalTorsion::getTopologicalTorsionGenerator<std::uint64_t>());
|
|
REQUIRE(fpGenerator);
|
|
const std::vector<std::uint32_t> *fromAtoms = nullptr;
|
|
const std::vector<std::uint32_t> *ignoreAtoms = nullptr;
|
|
const int confId = -1;
|
|
|
|
SECTION("folded bitInfo") {
|
|
{
|
|
// clang-format off
|
|
AdditionalOutput::bitPathsType expected = {
|
|
{0, {{0, 1, 2, 3}}},
|
|
{1536, {{1, 2, 3, 4}}}
|
|
};
|
|
// clang-format on
|
|
AdditionalOutput ao;
|
|
ao.allocateBitPaths();
|
|
std::unique_ptr<ExplicitBitVect> fp(fpGenerator->getFingerprint(
|
|
*m1, fromAtoms, ignoreAtoms, confId, &ao));
|
|
CHECK(*ao.bitPaths == expected);
|
|
}
|
|
{
|
|
// clang-format off
|
|
AdditionalOutput::bitPathsType expected = {
|
|
{0, {{0, 1, 2, 3}}},
|
|
{1920, {{1, 2, 3, 4}}}
|
|
};
|
|
// clang-format on
|
|
|
|
AdditionalOutput ao;
|
|
ao.allocateBitPaths();
|
|
std::unique_ptr<SparseIntVect<std::uint32_t>> fp(
|
|
fpGenerator->getCountFingerprint(*m1, fromAtoms, ignoreAtoms, confId,
|
|
&ao));
|
|
CHECK(*ao.bitPaths == expected);
|
|
}
|
|
}
|
|
|
|
SECTION("folded atomToBits atomCounts") {
|
|
AdditionalOutput::atomCountsType expected2 = {1, 2, 2, 2, 1};
|
|
|
|
{
|
|
AdditionalOutput::atomToBitsType expected1 = {
|
|
{0}, {0, 1536}, {0, 1536}, {0, 1536}, {1536}};
|
|
AdditionalOutput ao;
|
|
ao.allocateAtomCounts();
|
|
ao.allocateAtomToBits();
|
|
std::unique_ptr<ExplicitBitVect> fp(fpGenerator->getFingerprint(
|
|
*m1, fromAtoms, ignoreAtoms, confId, &ao));
|
|
CHECK(*ao.atomToBits == expected1);
|
|
CHECK(*ao.atomCounts == expected2);
|
|
}
|
|
{
|
|
AdditionalOutput::atomToBitsType expected1 = {
|
|
{0}, {0, 1920}, {0, 1920}, {0, 1920}, {1920}};
|
|
AdditionalOutput ao;
|
|
ao.allocateAtomCounts();
|
|
ao.allocateAtomToBits();
|
|
std::unique_ptr<SparseIntVect<std::uint32_t>> fp(
|
|
fpGenerator->getCountFingerprint(*m1, fromAtoms, ignoreAtoms, confId,
|
|
&ao));
|
|
CHECK(*ao.atomToBits == expected1);
|
|
CHECK(*ao.atomCounts == expected2);
|
|
}
|
|
}
|
|
|
|
SECTION("unfolded bitInfo") {
|
|
{
|
|
// clang-format off
|
|
AdditionalOutput::bitPathsType expected = {
|
|
{1046740484, {{1, 2, 3, 4}}},
|
|
{1048313860, {{0, 1, 2, 3}}},
|
|
};
|
|
// clang-format on
|
|
AdditionalOutput ao;
|
|
ao.allocateBitPaths();
|
|
std::unique_ptr<SparseBitVect> fp(fpGenerator->getSparseFingerprint(
|
|
*m1, fromAtoms, ignoreAtoms, confId, &ao));
|
|
CHECK(*ao.bitPaths == expected);
|
|
}
|
|
{
|
|
// clang-format off
|
|
AdditionalOutput::bitPathsType expected = {
|
|
{4437590048, {{0, 1, 2, 3}}},
|
|
{30073176097, {{1, 2, 3, 4}}}
|
|
};
|
|
// clang-format on
|
|
AdditionalOutput ao;
|
|
ao.allocateBitPaths();
|
|
std::unique_ptr<SparseIntVect<std::uint64_t>> fp(
|
|
fpGenerator->getSparseCountFingerprint(*m1, fromAtoms, ignoreAtoms,
|
|
confId, &ao));
|
|
|
|
CHECK(*ao.bitPaths == expected);
|
|
}
|
|
}
|
|
|
|
SECTION("unfolded atomToBits atomCounts") {
|
|
AdditionalOutput::atomCountsType expected2 = {1, 2, 2, 2, 1};
|
|
|
|
{
|
|
AdditionalOutput::atomToBitsType expected1 = {{1048313860},
|
|
{1046740484, 1048313860},
|
|
{1046740484, 1048313860},
|
|
{1046740484, 1048313860},
|
|
{1046740484}};
|
|
AdditionalOutput ao;
|
|
ao.allocateAtomCounts();
|
|
ao.allocateAtomToBits();
|
|
std::unique_ptr<SparseBitVect> fp(fpGenerator->getSparseFingerprint(
|
|
*m1, fromAtoms, ignoreAtoms, confId, &ao));
|
|
CHECK(*ao.atomToBits == expected1);
|
|
CHECK(*ao.atomCounts == expected2);
|
|
}
|
|
{
|
|
AdditionalOutput::atomToBitsType expected1 = {{4437590048},
|
|
{4437590048, 30073176097},
|
|
{4437590048, 30073176097},
|
|
{4437590048, 30073176097},
|
|
{30073176097}};
|
|
AdditionalOutput ao;
|
|
ao.allocateAtomCounts();
|
|
ao.allocateAtomToBits();
|
|
std::unique_ptr<SparseIntVect<std::uint64_t>> fp(
|
|
fpGenerator->getSparseCountFingerprint(*m1, fromAtoms, ignoreAtoms,
|
|
confId, &ao));
|
|
CHECK(*ao.atomToBits == expected1);
|
|
CHECK(*ao.atomCounts == expected2);
|
|
}
|
|
}
|
|
}
|
|
|
|
TEST_CASE("AtomPairGenerator bit info", "[fpgenerator][AP]") {
|
|
auto m1 = "CCO"_smiles;
|
|
REQUIRE(m1);
|
|
std::unique_ptr<FingerprintGenerator<std::uint64_t>> fpGenerator(
|
|
AtomPair::getAtomPairGenerator<std::uint64_t>());
|
|
REQUIRE(fpGenerator);
|
|
const std::vector<std::uint32_t> *fromAtoms = nullptr;
|
|
const std::vector<std::uint32_t> *ignoreAtoms = nullptr;
|
|
const int confId = -1;
|
|
|
|
SECTION("folded bitInfo") {
|
|
{
|
|
AdditionalOutput::bitInfoMapType expected = {
|
|
{1404, {{0, 1}}},
|
|
{1916, {{0, 2}}},
|
|
{1596, {{1, 2}}},
|
|
};
|
|
AdditionalOutput ao;
|
|
ao.allocateBitInfoMap();
|
|
std::unique_ptr<ExplicitBitVect> fp(fpGenerator->getFingerprint(
|
|
*m1, fromAtoms, ignoreAtoms, confId, &ao));
|
|
CHECK(*ao.bitInfoMap == expected);
|
|
}
|
|
{
|
|
AdditionalOutput::bitInfoMapType expected = {
|
|
{1375, {{0, 1}}},
|
|
{1503, {{0, 2}}},
|
|
{1423, {{1, 2}}},
|
|
};
|
|
AdditionalOutput ao;
|
|
ao.allocateBitInfoMap();
|
|
std::unique_ptr<SparseIntVect<std::uint32_t>> fp(
|
|
fpGenerator->getCountFingerprint(*m1, fromAtoms, ignoreAtoms, confId,
|
|
&ao));
|
|
CHECK(*ao.bitInfoMap == expected);
|
|
}
|
|
}
|
|
|
|
SECTION("folded atomToBits atomCounts") {
|
|
AdditionalOutput::atomCountsType expected2 = {2, 2, 2};
|
|
|
|
{
|
|
AdditionalOutput::atomToBitsType expected1 = {
|
|
{1404, 1916},
|
|
{1404, 1596},
|
|
{1596, 1916},
|
|
};
|
|
AdditionalOutput ao;
|
|
ao.allocateAtomCounts();
|
|
ao.allocateAtomToBits();
|
|
std::unique_ptr<ExplicitBitVect> fp(fpGenerator->getFingerprint(
|
|
*m1, fromAtoms, ignoreAtoms, confId, &ao));
|
|
CHECK(*ao.atomToBits == expected1);
|
|
CHECK(*ao.atomCounts == expected2);
|
|
}
|
|
{
|
|
AdditionalOutput::atomToBitsType expected1 = {
|
|
{1375, 1503},
|
|
{1375, 1423},
|
|
{1503, 1423},
|
|
};
|
|
AdditionalOutput ao;
|
|
ao.allocateAtomCounts();
|
|
ao.allocateAtomToBits();
|
|
std::unique_ptr<SparseIntVect<std::uint32_t>> fp(
|
|
fpGenerator->getCountFingerprint(*m1, fromAtoms, ignoreAtoms, confId,
|
|
&ao));
|
|
CHECK(*ao.atomToBits == expected1);
|
|
CHECK(*ao.atomCounts == expected2);
|
|
}
|
|
}
|
|
|
|
SECTION("unfolded bitInfo") {
|
|
{
|
|
AdditionalOutput::bitInfoMapType expected = {
|
|
{7918972, {{0, 1}}},
|
|
{7919484, {{0, 2}}},
|
|
{8066620, {{1, 2}}},
|
|
};
|
|
AdditionalOutput ao;
|
|
ao.allocateBitInfoMap();
|
|
std::unique_ptr<SparseBitVect> fp(fpGenerator->getSparseFingerprint(
|
|
*m1, fromAtoms, ignoreAtoms, confId, &ao));
|
|
CHECK(*ao.bitInfoMap == expected);
|
|
}
|
|
{
|
|
AdditionalOutput::bitInfoMapType expected = {
|
|
{558113, {{0, 1}}},
|
|
{1590306, {{0, 2}}},
|
|
{1590337, {{1, 2}}},
|
|
};
|
|
AdditionalOutput ao;
|
|
ao.allocateBitInfoMap();
|
|
std::unique_ptr<SparseIntVect<std::uint64_t>> fp(
|
|
fpGenerator->getSparseCountFingerprint(*m1, fromAtoms, ignoreAtoms,
|
|
confId, &ao));
|
|
CHECK(*ao.bitInfoMap == expected);
|
|
}
|
|
}
|
|
|
|
SECTION("unfolded atomToBits atomCounts") {
|
|
AdditionalOutput::atomCountsType expected2 = {2, 2, 2};
|
|
|
|
{
|
|
AdditionalOutput::atomToBitsType expected1 = {
|
|
{7918972, 7919484}, {7918972, 8066620}, {7919484, 8066620}};
|
|
AdditionalOutput ao;
|
|
ao.allocateAtomCounts();
|
|
ao.allocateAtomToBits();
|
|
std::unique_ptr<SparseBitVect> fp(fpGenerator->getSparseFingerprint(
|
|
*m1, fromAtoms, ignoreAtoms, confId, &ao));
|
|
CHECK(*ao.atomToBits == expected1);
|
|
CHECK(*ao.atomCounts == expected2);
|
|
}
|
|
{
|
|
AdditionalOutput::atomToBitsType expected1 = {
|
|
{558113, 1590306}, {558113, 1590337}, {1590306, 1590337}};
|
|
AdditionalOutput ao;
|
|
ao.allocateAtomCounts();
|
|
ao.allocateAtomToBits();
|
|
std::unique_ptr<SparseIntVect<std::uint64_t>> fp(
|
|
fpGenerator->getSparseCountFingerprint(*m1, fromAtoms, ignoreAtoms,
|
|
confId, &ao));
|
|
CHECK(*ao.atomToBits == expected1);
|
|
CHECK(*ao.atomCounts == expected2);
|
|
}
|
|
}
|
|
}
|
|
|
|
TEST_CASE("Generators, bit info, and multiple calls", "[fpgenerator]") {
|
|
std::unique_ptr<FingerprintGenerator<std::uint64_t>> fpGenerator(
|
|
AtomPair::getAtomPairGenerator<std::uint64_t>());
|
|
REQUIRE(fpGenerator);
|
|
const std::vector<std::uint32_t> *fromAtoms = nullptr;
|
|
const std::vector<std::uint32_t> *ignoreAtoms = nullptr;
|
|
const int confId = -1;
|
|
|
|
AdditionalOutput ao;
|
|
ao.allocateBitInfoMap();
|
|
ao.allocateAtomCounts();
|
|
ao.allocateAtomToBits();
|
|
|
|
auto m1 = "CCO"_smiles;
|
|
REQUIRE(m1);
|
|
std::unique_ptr<ExplicitBitVect> fp1(
|
|
fpGenerator->getFingerprint(*m1, fromAtoms, ignoreAtoms, confId, &ao));
|
|
CHECK(ao.bitInfoMap->size() == fp1->getNumOnBits());
|
|
CHECK(ao.atomCounts->size() == m1->getNumAtoms());
|
|
CHECK(ao.atomToBits->size() == m1->getNumAtoms());
|
|
|
|
auto m2 = "CCON"_smiles;
|
|
REQUIRE(m2);
|
|
fp1.reset(
|
|
fpGenerator->getFingerprint(*m2, fromAtoms, ignoreAtoms, confId, &ao));
|
|
CHECK(ao.bitInfoMap->size() == fp1->getNumOnBits());
|
|
CHECK(ao.atomCounts->size() == m2->getNumAtoms());
|
|
CHECK(ao.atomToBits->size() == m2->getNumAtoms());
|
|
|
|
fp1.reset(
|
|
fpGenerator->getFingerprint(*m1, fromAtoms, ignoreAtoms, confId, &ao));
|
|
CHECK(ao.bitInfoMap->size() == fp1->getNumOnBits());
|
|
CHECK(ao.atomCounts->size() == m1->getNumAtoms());
|
|
CHECK(ao.atomToBits->size() == m1->getNumAtoms());
|
|
}
|
|
|
|
TEST_CASE(
|
|
"github #4212: UnfoldedRDKFingerprintCountBased returns a different "
|
|
"fingerprint length for every molecule") {
|
|
auto m1 = "c1ccccc1"_smiles;
|
|
REQUIRE(m1);
|
|
auto m2 = "CCCC"_smiles;
|
|
REQUIRE(m2);
|
|
std::unique_ptr<SparseIntVect<boost::uint64_t>> fp1{
|
|
getUnfoldedRDKFingerprintMol(*m1)};
|
|
REQUIRE(fp1);
|
|
std::unique_ptr<SparseIntVect<boost::uint64_t>> fp2{
|
|
getUnfoldedRDKFingerprintMol(*m2)};
|
|
REQUIRE(fp2);
|
|
CHECK(fp1->getLength() == fp2->getLength());
|
|
}
|
|
|
|
TEST_CASE("RDKit set countBounds", "[fpgenerator][rdkit]") {
|
|
auto m1 = "COc1ccc(CCNC(=O)c2ccccc2C(=O)NCCc2ccc(OC)cc2)cc1"_smiles;
|
|
REQUIRE(m1);
|
|
SECTION("change countBounds") {
|
|
unsigned int minPath = 1;
|
|
unsigned int maxPath = 7;
|
|
std::unique_ptr<FingerprintGenerator<std::uint64_t>> fpGenerator(
|
|
RDKitFP::getRDKitFPGenerator<std::uint64_t>(minPath, maxPath));
|
|
REQUIRE(fpGenerator);
|
|
fpGenerator->getOptions()->df_countSimulation = true;
|
|
std::unique_ptr<ExplicitBitVect> fp1(fpGenerator->getFingerprint(*m1));
|
|
REQUIRE(fp1);
|
|
CHECK(fp1->getNumBits() == 2048);
|
|
|
|
fpGenerator->getOptions()->d_countBounds = {2, 8, 16, 32};
|
|
std::unique_ptr<ExplicitBitVect> fp2(fpGenerator->getFingerprint(*m1));
|
|
REQUIRE(fp2);
|
|
CHECK(fp2->getNumBits() == 2048);
|
|
CHECK(fp2->getNumOnBits() != fp1->getNumOnBits());
|
|
}
|
|
}
|
|
|
|
TEST_CASE(
|
|
"github #5036: Neighboring Hs not taken into account in connectivity "
|
|
"invariants",
|
|
"[morgan]") {
|
|
SECTION("basics") {
|
|
auto mol = "CC[2H]"_smiles;
|
|
REQUIRE(mol);
|
|
std::vector<std::uint32_t> invars(mol->getNumAtoms());
|
|
MorganFingerprints::getConnectivityInvariants(*mol, invars);
|
|
CHECK(invars[1] == invars[0]);
|
|
}
|
|
}
|
|
|
|
TEST_CASE("topological torsions shorted paths") {
|
|
SECTION("basics") {
|
|
auto mol = "CC1CCC1"_smiles;
|
|
REQUIRE(mol);
|
|
std::unique_ptr<FingerprintGenerator<std::uint64_t>> fpGenerator(
|
|
TopologicalTorsion::getTopologicalTorsionGenerator<std::uint64_t>());
|
|
REQUIRE(fpGenerator);
|
|
static_cast<TopologicalTorsion::TopologicalTorsionArguments *>(
|
|
fpGenerator->getOptions())
|
|
->df_countSimulation = false;
|
|
std::unique_ptr<SparseBitVect> fp(fpGenerator->getSparseFingerprint(*mol));
|
|
CHECK(fp->getNumOnBits() == 3);
|
|
static_cast<TopologicalTorsion::TopologicalTorsionArguments *>(
|
|
fpGenerator->getOptions())
|
|
->df_onlyShortestPaths = true;
|
|
fp.reset(fpGenerator->getSparseFingerprint(*mol));
|
|
CHECK(fp->getNumOnBits() == 1);
|
|
}
|
|
}
|
|
|
|
TEST_CASE(
|
|
"GitHub #7318: Utils.AtomPairs.NumPiElectrons fails on atoms with dative bonds",
|
|
"[bug]") {
|
|
auto mol =
|
|
"O=C1[O-]->[Cr+3]23(<-[O-]C(=O)C4=CC=CC=N->24)(<-[O-]C(=O)C2=CC=CC=N->32)<-N2=CC=CC=C12"_smiles;
|
|
REQUIRE(mol);
|
|
|
|
for (auto bond_idx : {2, 3, 12, 21, 28, 31}) {
|
|
INFO("bond = " << bond_idx);
|
|
auto bond = mol->getBondWithIdx(bond_idx);
|
|
REQUIRE(bond->getBondType() == Bond::DATIVE);
|
|
|
|
const auto atom = bond->getBeginAtom();
|
|
CHECK(atom->getHybridization() == Atom::SP2);
|
|
|
|
if (atom->getAtomicNum() == 8) {
|
|
CHECK(numPiElectrons(*atom) == 0);
|
|
} else {
|
|
CHECK(numPiElectrons(*atom) == 1);
|
|
}
|
|
}
|
|
}
|
|
|
|
TEST_CASE("github #7533: IndexError while computing fingerprint") {
|
|
auto mol = "CC1C(B(C)C)S1(C)(C)=O"_smiles;
|
|
REQUIRE(mol);
|
|
std::unique_ptr<SparseIntVect<std::uint32_t>> fp(
|
|
MorganFingerprints::getFingerprint(*mol, 2));
|
|
REQUIRE(fp);
|
|
CHECK(fp->getLength() == std::numeric_limits<unsigned>::max());
|
|
}
|
|
|
|
TEST_CASE("toJSON") {
|
|
auto m1 = "C[C@H](F)Oc1ccc(CCNC(=O)c2ccccc2C(=O)NCCc2ccc(OC)cc2)cc1"_smiles;
|
|
REQUIRE(m1);
|
|
SECTION("morgan") {
|
|
unsigned radius = 2;
|
|
std::unique_ptr<FingerprintGenerator<std::uint64_t>> fpGenerator(
|
|
MorganFingerprint::getMorganGenerator<std::uint64_t>(radius));
|
|
REQUIRE(fpGenerator);
|
|
std::unique_ptr<ExplicitBitVect> fp1{fpGenerator->getFingerprint(*m1)};
|
|
REQUIRE(fp1);
|
|
auto jsonStr = generatorToJSON(*fpGenerator);
|
|
CHECK(!jsonStr.empty());
|
|
CHECK(jsonStr.find("\"type\":\"MorganArguments\"") != std::string::npos);
|
|
|
|
auto fpGenerator2 = generatorFromJSON(jsonStr);
|
|
REQUIRE(fpGenerator2);
|
|
std::unique_ptr<ExplicitBitVect> fp2{fpGenerator2->getFingerprint(*m1)};
|
|
REQUIRE(fp2);
|
|
auto jsonStr2 = generatorToJSON(*fpGenerator2);
|
|
CHECK(jsonStr == jsonStr2);
|
|
CHECK(*fp1 == *fp2);
|
|
}
|
|
SECTION("RDKit") {
|
|
unsigned int minPath = 1;
|
|
unsigned int maxPath = 3;
|
|
std::unique_ptr<FingerprintGenerator<std::uint64_t>> fpGenerator(
|
|
RDKitFP::getRDKitFPGenerator<std::uint64_t>(minPath, maxPath));
|
|
REQUIRE(fpGenerator);
|
|
std::unique_ptr<ExplicitBitVect> fp1{fpGenerator->getFingerprint(*m1)};
|
|
REQUIRE(fp1);
|
|
|
|
auto jsonStr = generatorToJSON(*fpGenerator);
|
|
CHECK(!jsonStr.empty());
|
|
CHECK(jsonStr.find("\"type\":\"RDKitFPArguments\"") != std::string::npos);
|
|
auto fpGenerator2 = generatorFromJSON(jsonStr);
|
|
REQUIRE(fpGenerator2);
|
|
std::unique_ptr<ExplicitBitVect> fp2{fpGenerator2->getFingerprint(*m1)};
|
|
REQUIRE(fp2);
|
|
CHECK(*fp1 == *fp2);
|
|
auto jsonStr2 = generatorToJSON(*fpGenerator2);
|
|
CHECK(jsonStr == jsonStr2);
|
|
}
|
|
SECTION("topological torsion") {
|
|
bool includeChirality = true;
|
|
std::uint32_t torsionAtomCount = 5;
|
|
std::unique_ptr<FingerprintGenerator<std::uint64_t>> fpGenerator(
|
|
TopologicalTorsion::getTopologicalTorsionGenerator<std::uint64_t>(
|
|
includeChirality, torsionAtomCount));
|
|
REQUIRE(fpGenerator);
|
|
std::unique_ptr<ExplicitBitVect> fp1{fpGenerator->getFingerprint(*m1)};
|
|
REQUIRE(fp1);
|
|
auto jsonStr = generatorToJSON(*fpGenerator);
|
|
CHECK(!jsonStr.empty());
|
|
CHECK(jsonStr.find("\"type\":\"TopologicalTorsionArguments\"") !=
|
|
std::string::npos);
|
|
auto fpGenerator2 = generatorFromJSON(jsonStr);
|
|
REQUIRE(fpGenerator2);
|
|
std::unique_ptr<ExplicitBitVect> fp2{fpGenerator2->getFingerprint(*m1)};
|
|
REQUIRE(fp2);
|
|
auto jsonStr2 = generatorToJSON(*fpGenerator2);
|
|
CHECK(jsonStr == jsonStr2);
|
|
}
|
|
SECTION("atom pair") {
|
|
unsigned int minDistance = 2;
|
|
unsigned int maxDistance = 6;
|
|
bool includeChirality = true;
|
|
std::unique_ptr<FingerprintGenerator<std::uint64_t>> fpGenerator(
|
|
AtomPair::getAtomPairGenerator<std::uint64_t>(minDistance, maxDistance,
|
|
includeChirality));
|
|
REQUIRE(fpGenerator);
|
|
std::unique_ptr<ExplicitBitVect> fp1{fpGenerator->getFingerprint(*m1)};
|
|
REQUIRE(fp1);
|
|
auto jsonStr = generatorToJSON(*fpGenerator);
|
|
CHECK(!jsonStr.empty());
|
|
CHECK(jsonStr.find("\"type\":\"AtomPairArguments\"") != std::string::npos);
|
|
auto fpGenerator2 = generatorFromJSON(jsonStr);
|
|
REQUIRE(fpGenerator2);
|
|
std::unique_ptr<ExplicitBitVect> fp2{fpGenerator2->getFingerprint(*m1)};
|
|
REQUIRE(fp2);
|
|
auto jsonStr2 = generatorToJSON(*fpGenerator2);
|
|
CHECK(jsonStr == jsonStr2);
|
|
}
|
|
SECTION("feature morgan") {
|
|
MorganFingerprint::MorganArguments args;
|
|
args.d_radius = 2;
|
|
MorganFingerprint::MorganFeatureAtomInvGenerator atomInvGen;
|
|
std::unique_ptr<FingerprintGenerator<std::uint64_t>> fpGenerator(
|
|
MorganFingerprint::getMorganGenerator<std::uint64_t>(args,
|
|
&atomInvGen));
|
|
REQUIRE(fpGenerator);
|
|
std::unique_ptr<ExplicitBitVect> fp1{fpGenerator->getFingerprint(*m1)};
|
|
REQUIRE(fp1);
|
|
auto jsonStr = generatorToJSON(*fpGenerator);
|
|
CHECK(!jsonStr.empty());
|
|
CHECK(jsonStr.find("\"type\":\"MorganFeatureAtomInvGenerator\"") !=
|
|
std::string::npos);
|
|
// NO patterns there when we use the defaults:
|
|
CHECK(jsonStr.find("\"patternSMARTS\"") == std::string::npos);
|
|
|
|
auto fpGenerator2 = generatorFromJSON(jsonStr);
|
|
REQUIRE(fpGenerator2);
|
|
std::unique_ptr<ExplicitBitVect> fp2{fpGenerator2->getFingerprint(*m1)};
|
|
REQUIRE(fp2);
|
|
auto jsonStr2 = generatorToJSON(*fpGenerator2);
|
|
CHECK(jsonStr == jsonStr2);
|
|
CHECK(*fp1 == *fp2);
|
|
}
|
|
SECTION("custom feature morgan") {
|
|
// dumb feature definitions
|
|
auto p1 = "OC"_smarts;
|
|
REQUIRE(p1);
|
|
auto p2 = "NC"_smarts;
|
|
REQUIRE(p2);
|
|
auto p3 = "FC"_smarts;
|
|
REQUIRE(p3);
|
|
std::vector<const ROMol *> patterns = {p1.get(), p2.get(), p3.get()};
|
|
MorganFingerprint::MorganArguments args;
|
|
args.d_radius = 2;
|
|
MorganFingerprint::MorganFeatureAtomInvGenerator atomInvGen(&patterns);
|
|
std::unique_ptr<FingerprintGenerator<std::uint64_t>> fpGenerator(
|
|
MorganFingerprint::getMorganGenerator<std::uint64_t>(args,
|
|
&atomInvGen));
|
|
REQUIRE(fpGenerator);
|
|
std::unique_ptr<ExplicitBitVect> fp1{fpGenerator->getFingerprint(*m1)};
|
|
REQUIRE(fp1);
|
|
auto jsonStr = generatorToJSON(*fpGenerator);
|
|
CHECK(!jsonStr.empty());
|
|
CHECK(jsonStr.find("\"type\":\"MorganFeatureAtomInvGenerator\"") !=
|
|
std::string::npos);
|
|
CHECK(jsonStr.find("\"patternSMARTS\"") != std::string::npos);
|
|
|
|
auto fpGenerator2 = generatorFromJSON(jsonStr);
|
|
REQUIRE(fpGenerator2);
|
|
std::unique_ptr<ExplicitBitVect> fp2{fpGenerator2->getFingerprint(*m1)};
|
|
REQUIRE(fp2);
|
|
auto jsonStr2 = generatorToJSON(*fpGenerator2);
|
|
CHECK(jsonStr == jsonStr2);
|
|
CHECK(*fp1 == *fp2);
|
|
}
|
|
} |