Files
rdkit/Code/GraphMol/Fingerprints/test1.cpp
Greg Landrum 08844e8f3a Fixes #2115 (#2116)
* Fixes #2115

* update release notestoo
2018-10-17 07:11:17 -04:00

3847 lines
124 KiB
C++

//
// Copyright (C) 2003-2018 Greg Landrum and Rational Discovery LLC
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//
#include <RDGeneral/test.h>
#include <GraphMol/RDKitBase.h>
#include <GraphMol/SmilesParse/SmilesParse.h>
#include <GraphMol/SmilesParse/SmilesWrite.h>
#include <GraphMol/SmilesParse/SmartsWrite.h>
#include <GraphMol/FileParsers/FileParsers.h>
#include <GraphMol/FileParsers/MolSupplier.h>
#include <GraphMol/Fingerprints/Fingerprints.h>
#include <GraphMol/Fingerprints/MorganFingerprints.h>
#include <GraphMol/Fingerprints/MACCS.h>
#include <GraphMol/Fingerprints/AtomPairs.h>
#include <GraphMol/Substruct/SubstructMatch.h>
#include <DataStructs/ExplicitBitVect.h>
#include <DataStructs/BitOps.h>
#include <RDGeneral/RDLog.h>
#include <string>
#include <boost/version.hpp>
#include <boost/foreach.hpp>
using namespace RDKit;
void test1() {
BOOST_LOG(rdInfoLog) << "testing basics" << std::endl;
{
std::string smi = "C1=CC=CC=C1";
RWMol *m1 = SmilesToMol(smi);
TEST_ASSERT(m1->getNumAtoms() == 6);
ExplicitBitVect *fp1 = RDKFingerprintMol(*m1);
ExplicitBitVect *fp2 = RDKFingerprintMol(*m1);
TEST_ASSERT(TanimotoSimilarity(*fp1, *fp2) == 1.0);
TEST_ASSERT(TanimotoSimilarity(*fp1, *fp2) >= 0.0);
smi = "C1=CC=CC=N1";
RWMol *m2 = SmilesToMol(smi);
delete fp2;
fp2 = RDKFingerprintMol(*m2);
TEST_ASSERT(TanimotoSimilarity(*fp1, *fp2) < 1.0);
TEST_ASSERT(TanimotoSimilarity(*fp1, *fp2) > 0.0);
delete m1;
delete m2;
delete fp1;
delete fp2;
}
{
std::string smi = "Cc1ccc(C(F)(F)F)o1";
RWMol *m1 = SmilesToMol(smi);
std::vector<boost::uint32_t> vs1(m1->getNumAtoms());
for (unsigned int i = 0; i < m1->getNumAtoms(); ++i) {
vs1[i] = m1->getAtomWithIdx(i)->getAtomicNum();
}
std::cerr << "\n\n\n\n\n\n\n\n\n-------------------" << std::endl;
ExplicitBitVect *fp1 =
RDKFingerprintMol(*m1, 1, 5, 1024, 1, 0, -1, 0, 1, 1, &vs1);
smi = "c1ccoc1";
RWMol *m2 = SmilesToMol(smi);
std::vector<boost::uint32_t> vs2(m2->getNumAtoms());
for (unsigned int i = 0; i < m2->getNumAtoms(); ++i) {
vs2[i] = m2->getAtomWithIdx(i)->getAtomicNum();
}
std::cerr << "\n\n\n\n\n\n\n\n\n-------------------" << std::endl;
ExplicitBitVect *fp2 =
RDKFingerprintMol(*m2, 1, 5, 1024, 1, 0, -1, 0, 1, 1, &vs2);
TEST_ASSERT(((*fp1) & (*fp2)) == (*fp2));
delete m1;
delete m2;
delete fp1;
delete fp2;
}
{
std::string smi = "Oc1cc2[nH]cnc2c(O)n1";
RWMol *m1 = SmilesToMol(smi);
std::vector<boost::uint32_t> vs1(m1->getNumAtoms());
for (unsigned int i = 0; i < m1->getNumAtoms(); ++i) {
vs1[i] = m1->getAtomWithIdx(i)->getAtomicNum();
}
m1->debugMol(std::cerr);
std::cerr << "\n\n\n\n\n\n\n\n\n-------------------" << std::endl;
ExplicitBitVect *fp1 =
RDKFingerprintMol(*m1, 4, 4, 1024, 1, 0, -1, 0, 1, 1, &vs1);
for (unsigned int i = 0; i < m1->getNumAtoms(); ++i) {
smi = MolToSmiles(*m1, false, false, i, false);
RWMol *m2 = SmilesToMol(smi);
std::vector<boost::uint32_t> vs2(m2->getNumAtoms());
for (unsigned int i = 0; i < m2->getNumAtoms(); ++i) {
vs2[i] = m2->getAtomWithIdx(i)->getAtomicNum();
}
std::cerr << "SMI: " << smi << std::endl;
m2->debugMol(std::cerr);
std::cerr << "\n\n\n\n\n\n\n\n\n-------------------" << std::endl;
ExplicitBitVect *fp2 =
RDKFingerprintMol(*m2, 4, 4, 1024, 1, 0, -1, 0, 1, 1, &vs2);
IntVect iv;
((*fp2) ^ (*fp1)).getOnBits(iv);
std::copy(iv.begin(), iv.end(),
std::ostream_iterator<int>(std::cerr, ", "));
std::cerr << std::endl;
TEST_ASSERT((*fp1) == (*fp2));
delete m2;
delete fp2;
}
}
{
std::string smi = "Oc1cc2[nH]cnc2c(O)n1";
RWMol *m1 = SmilesToMol(smi);
std::vector<boost::uint32_t> vs1(m1->getNumAtoms());
for (unsigned int i = 0; i < m1->getNumAtoms(); ++i) {
vs1[i] = m1->getAtomWithIdx(i)->getAtomicNum();
}
std::cerr << "\n\n\n\n\n\n\n\n\n-------------------" << std::endl;
ExplicitBitVect *fp1 =
RDKFingerprintMol(*m1, 1, 5, 1024, 1, 0, -1, 0, 1, 1, &vs1);
smi = "c1ccncc1";
RWMol *m2 = SmilesToMol(smi);
std::vector<boost::uint32_t> vs2(m2->getNumAtoms());
for (unsigned int i = 0; i < m2->getNumAtoms(); ++i) {
vs2[i] = m2->getAtomWithIdx(i)->getAtomicNum();
}
std::cerr << "\n\n\n\n\n\n\n\n\n-------------------" << std::endl;
ExplicitBitVect *fp2 =
RDKFingerprintMol(*m2, 1, 5, 1024, 1, 0, -1, 0, 1, 1, &vs2);
IntVect iv;
((*fp2) ^ ((*fp1) & (*fp2))).getOnBits(iv);
std::copy(iv.begin(), iv.end(),
std::ostream_iterator<int>(std::cerr, ", "));
std::cerr << std::endl;
TEST_ASSERT(((*fp1) & (*fp2)) == (*fp2));
delete m1;
delete m2;
delete fp1;
delete fp2;
}
BOOST_LOG(rdInfoLog) << "done" << std::endl;
}
void test2() {
BOOST_LOG(rdInfoLog) << "testing subgraph invariants" << std::endl;
std::string smi = "CC(=O)COC";
RWMol *m1 = SmilesToMol(smi);
TEST_ASSERT(m1->getNumAtoms() == 6);
BOOST_LOG(rdInfoLog) << "-------------------- fp1 " << std::endl;
ExplicitBitVect *fp1 = RDKFingerprintMol(*m1, 1, 4, 2048, 4, false);
BOOST_LOG(rdInfoLog) << "-------------------- fp2 " << std::endl;
ExplicitBitVect *fp2 = RDKFingerprintMol(*m1, 1, 4, 2048, 4, false);
TEST_ASSERT(TanimotoSimilarity(*fp1, *fp2) == 1.0);
RWMol *m2 = SmilesToMol("CC");
delete fp2;
BOOST_LOG(rdInfoLog) << "-------------------- fp2 " << std::endl;
fp2 = RDKFingerprintMol(*m2, 1, 4, 2048, 4, false);
TEST_ASSERT(TanimotoSimilarity(*fp1, *fp2) < 1.0);
TEST_ASSERT(TanimotoSimilarity(*fp1, *fp2) > 0.0);
TEST_ASSERT(OnBitProjSimilarity(*fp2, *fp1)[0] == 1.0);
delete m2;
m2 = SmilesToMol("CC=O");
delete fp2;
fp2 = RDKFingerprintMol(*m2, 1, 4, 2048, 4, false);
TEST_ASSERT(TanimotoSimilarity(*fp1, *fp2) < 1.0);
TEST_ASSERT(TanimotoSimilarity(*fp1, *fp2) > 0.0);
TEST_ASSERT(OnBitProjSimilarity(*fp2, *fp1)[0] == 1.0);
delete m2;
m2 = SmilesToMol("CCCOC");
delete fp2;
fp2 = RDKFingerprintMol(*m2, 1, 4, 2048, 4, false);
TEST_ASSERT(TanimotoSimilarity(*fp1, *fp2) < 1.0);
TEST_ASSERT(TanimotoSimilarity(*fp1, *fp2) > 0.0);
TEST_ASSERT(OnBitProjSimilarity(*fp2, *fp1)[0] == 1.0);
delete m1;
delete m2;
delete fp1;
delete fp2;
BOOST_LOG(rdInfoLog) << "done" << std::endl;
}
void test3() {
BOOST_LOG(rdInfoLog) << "testing auto folding" << std::endl;
RWMol *m = SmilesToMol("CCCOC");
ExplicitBitVect *fp1, *fp2;
fp2 = RDKFingerprintMol(*m, 1, 4, 2048, 4, false, 0.3, 256);
TEST_ASSERT(fp2->getNumBits() == 256);
delete m;
delete fp2;
m = SmilesToMol("CN(C)Cc1n-2c(nn1)CN=C(c1ccccc1)c1cc(Cl)ccc12");
fp1 = RDKFingerprintMol(*m, 1, 4, 2048, 4, false);
TEST_ASSERT(fp1->getNumBits() == 2048);
fp2 = RDKFingerprintMol(*m, 1, 4, 2048, 4, false, 0.3, 256);
TEST_ASSERT(fp2->getNumBits() < fp1->getNumBits());
delete m;
delete fp1;
delete fp2;
BOOST_LOG(rdInfoLog) << "done" << std::endl;
}
void test1alg2() {
BOOST_LOG(rdInfoLog) << "testing basics alg2" << std::endl;
std::string smi = "C1=CC=CC=C1";
RWMol *m1 = SmilesToMol(smi);
TEST_ASSERT(m1->getNumAtoms() == 6);
ExplicitBitVect *fp1 = RDKFingerprintMol(*m1);
ExplicitBitVect *fp2 = RDKFingerprintMol(*m1);
TEST_ASSERT(TanimotoSimilarity(*fp1, *fp2) == 1.0);
smi = "C1=CC=CC=N1";
RWMol *m2 = SmilesToMol(smi);
delete fp2;
fp2 = RDKFingerprintMol(*m2);
TEST_ASSERT(TanimotoSimilarity(*fp1, *fp2) < 1.0);
TEST_ASSERT(TanimotoSimilarity(*fp1, *fp2) > 0.0);
delete m1;
delete m2;
delete fp1;
delete fp2;
BOOST_LOG(rdInfoLog) << "done" << std::endl;
}
void test2alg2() {
BOOST_LOG(rdInfoLog) << "testing subgraph invariants alg2" << std::endl;
std::string smi = "CC(=O)COC";
RWMol *m1 = SmilesToMol(smi);
TEST_ASSERT(m1->getNumAtoms() == 6);
BOOST_LOG(rdInfoLog) << "-------------------- fp1 " << std::endl;
ExplicitBitVect *fp1 = RDKFingerprintMol(*m1, 1, 4, 2048, 4, false);
BOOST_LOG(rdInfoLog) << "-------------------- fp2 " << std::endl;
ExplicitBitVect *fp2 = RDKFingerprintMol(*m1, 1, 4, 2048, 4, false);
TEST_ASSERT(TanimotoSimilarity(*fp1, *fp2) == 1.0);
RWMol *m2 = SmilesToMol("CC");
delete fp2;
BOOST_LOG(rdInfoLog) << "-------------------- fp2 " << std::endl;
fp2 = RDKFingerprintMol(*m2, 1, 4, 2048, 4, false);
TEST_ASSERT(TanimotoSimilarity(*fp1, *fp2) < 1.0);
TEST_ASSERT(TanimotoSimilarity(*fp1, *fp2) > 0.0);
TEST_ASSERT(OnBitProjSimilarity(*fp2, *fp1)[0] == 1.0);
delete m2;
m2 = SmilesToMol("CC=O");
delete fp2;
fp2 = RDKFingerprintMol(*m2, 1, 4, 2048, 4, false);
TEST_ASSERT(TanimotoSimilarity(*fp1, *fp2) < 1.0);
TEST_ASSERT(TanimotoSimilarity(*fp1, *fp2) > 0.0);
TEST_ASSERT(OnBitProjSimilarity(*fp2, *fp1)[0] == 1.0);
delete m2;
m2 = SmilesToMol("CCCOC");
delete fp2;
fp2 = RDKFingerprintMol(*m2, 1, 4, 2048, 4, false);
TEST_ASSERT(TanimotoSimilarity(*fp1, *fp2) < 1.0);
TEST_ASSERT(TanimotoSimilarity(*fp1, *fp2) > 0.0);
TEST_ASSERT(OnBitProjSimilarity(*fp2, *fp1)[0] == 1.0);
delete m1;
delete m2;
delete fp1;
delete fp2;
BOOST_LOG(rdInfoLog) << "done" << std::endl;
}
void test4Trends() {
BOOST_LOG(rdInfoLog) << "testing similarity trends" << std::endl;
double sim1, sim2;
RWMol *m;
ExplicitBitVect *fp1, *fp2;
m = SmilesToMol("CCC");
fp1 = RDKFingerprintMol(*m);
delete m;
m = SmilesToMol("C1CC1");
fp2 = RDKFingerprintMol(*m);
sim1 = TanimotoSimilarity(*fp1, *fp2);
delete m;
m = SmilesToMol("CCCC");
delete fp1;
fp1 = RDKFingerprintMol(*m);
delete m;
m = SmilesToMol("C1CCC1");
delete fp2;
fp2 = RDKFingerprintMol(*m);
sim2 = TanimotoSimilarity(*fp1, *fp2);
TEST_ASSERT(sim2 > sim1);
sim2 = sim1;
delete m;
m = SmilesToMol("CCCCC");
delete fp1;
fp1 = RDKFingerprintMol(*m);
delete m;
m = SmilesToMol("C1CCCC1");
delete fp2;
fp2 = RDKFingerprintMol(*m);
sim2 = TanimotoSimilarity(*fp1, *fp2);
TEST_ASSERT(sim2 > sim1);
sim2 = sim1;
delete m;
m = SmilesToMol("CCCCCC");
delete fp1;
fp1 = RDKFingerprintMol(*m);
delete m;
m = SmilesToMol("C1CCCCC1");
delete fp2;
fp2 = RDKFingerprintMol(*m);
sim2 = TanimotoSimilarity(*fp1, *fp2);
TEST_ASSERT(sim2 > sim1);
sim2 = sim1;
delete m;
m = SmilesToMol("CCCCCCC");
delete fp1;
fp1 = RDKFingerprintMol(*m);
delete m;
m = SmilesToMol("C1CCCCCC1");
delete fp2;
fp2 = RDKFingerprintMol(*m);
sim2 = TanimotoSimilarity(*fp1, *fp2);
TEST_ASSERT(sim2 > sim1);
sim2 = sim1;
delete m;
delete fp1;
delete fp2;
BOOST_LOG(rdInfoLog) << "done" << std::endl;
}
void test5BackwardsCompatibility() {
BOOST_LOG(rdInfoLog) << "testing backwards compatibility of fingerprints"
<< std::endl;
RWMol *m;
ExplicitBitVect *fp1;
m = SmilesToMol("CC");
fp1 = RDKFingerprintMol(*m, 1, 7, 2048, 4);
TEST_ASSERT(fp1->getNumOnBits() == 4);
#if BOOST_VERSION / 100 < 1040
// bug fixes in the uniform_int<> distribution in version 1.40
// necessitate this
TEST_ASSERT((*fp1)[951]);
TEST_ASSERT((*fp1)[961]);
TEST_ASSERT((*fp1)[1436]);
TEST_ASSERT((*fp1)[1590]);
#else
TEST_ASSERT((*fp1)[419]);
TEST_ASSERT((*fp1)[718]);
TEST_ASSERT((*fp1)[1499]);
TEST_ASSERT((*fp1)[1504]);
#endif
delete fp1;
delete m;
BOOST_LOG(rdInfoLog) << "done" << std::endl;
}
void test1Layers() {
BOOST_LOG(rdInfoLog) << "testing basics layered fps" << std::endl;
#if 1
{
RWMol *m1 = SmilesToMol("C1=CC=CC=C1");
RWMol *m2 = SmilesToMol("C1=CC=CC=N1");
RWMol *m3 = SmilesToMol("C1CCCCC1");
RWMol *m4 = SmilesToMol("C1CCC1");
ExplicitBitVect *fp1 = LayeredFingerprintMol(*m1);
ExplicitBitVect *fp2 = LayeredFingerprintMol(*m1);
TEST_ASSERT(TanimotoSimilarity(*fp1, *fp2) == 1.0);
delete fp2;
fp2 = LayeredFingerprintMol(*m2);
TEST_ASSERT(TanimotoSimilarity(*fp1, *fp2) < 1.0);
TEST_ASSERT(TanimotoSimilarity(*fp1, *fp2) > 0.0);
ExplicitBitVect *fp3 = LayeredFingerprintMol(*m3);
TEST_ASSERT(TanimotoSimilarity(*fp1, *fp3) < 1.0);
TEST_ASSERT(TanimotoSimilarity(*fp1, *fp3) > 0.0);
TEST_ASSERT(TanimotoSimilarity(*fp2, *fp3) < 1.0);
TEST_ASSERT(TanimotoSimilarity(*fp2, *fp3) > 0.0);
ExplicitBitVect *fp4 = LayeredFingerprintMol(*m4);
TEST_ASSERT(TanimotoSimilarity(*fp1, *fp4) < 1.0);
TEST_ASSERT(TanimotoSimilarity(*fp1, *fp4) > 0.0);
TEST_ASSERT(TanimotoSimilarity(*fp2, *fp4) < 1.0);
TEST_ASSERT(TanimotoSimilarity(*fp2, *fp4) > 0.0);
TEST_ASSERT(TanimotoSimilarity(*fp3, *fp4) < 1.0);
TEST_ASSERT(TanimotoSimilarity(*fp3, *fp4) > 0.0);
delete fp1;
delete fp2;
delete fp3;
delete fp4;
delete m1;
delete m2;
delete m3;
delete m4;
}
#endif
{
RWMol *m1 = SmilesToMol("C1=CC=CC=C1");
RWMol *m2 = SmilesToMol("C1=CC=CC=N1");
RWMol *m3 = SmilesToMol("C1CCCCC1");
RWMol *m4 = SmilesToMol("CCCCCC");
unsigned int layers = 0x1;
ExplicitBitVect *fp1 = LayeredFingerprintMol(*m1, layers, 1, 5);
ExplicitBitVect *fp2 = LayeredFingerprintMol(*m2, layers, 1, 5);
TEST_ASSERT(TanimotoSimilarity(*fp1, *fp2) == 1.0);
ExplicitBitVect *fp3 = LayeredFingerprintMol(*m3, layers, 1, 5);
TEST_ASSERT(TanimotoSimilarity(*fp1, *fp3) == 1.0);
ExplicitBitVect *fp4 = LayeredFingerprintMol(*m4, layers, 1, 5);
TEST_ASSERT(TanimotoSimilarity(*fp1, *fp4) == 1.0);
delete fp1;
delete fp2;
delete fp3;
delete fp4;
delete m1;
delete m2;
delete m3;
delete m4;
}
{
RWMol *m1 = SmilesToMol("C1=CC=CC=C1");
RWMol *m2 = SmilesToMol("C1=CC=CC=N1");
RWMol *m3 = SmilesToMol("C1CCCCC1");
RWMol *m4 = SmilesToMol("CCCCCC");
unsigned int layers = 0x3;
ExplicitBitVect *fp1 = LayeredFingerprintMol(*m1, layers, 1, 5);
ExplicitBitVect *fp2 = LayeredFingerprintMol(*m2, layers, 1, 5);
TEST_ASSERT(TanimotoSimilarity(*fp1, *fp2) == 1.0);
ExplicitBitVect *fp3 = LayeredFingerprintMol(*m3, layers, 1, 5);
TEST_ASSERT(TanimotoSimilarity(*fp1, *fp3) == 1.0);
ExplicitBitVect *fp4 = LayeredFingerprintMol(*m4, layers, 1, 5);
TEST_ASSERT(TanimotoSimilarity(*fp1, *fp4) <= 1.0);
TEST_ASSERT(TanimotoSimilarity(*fp1, *fp4) > 0.0);
TEST_ASSERT(TanimotoSimilarity(*fp3, *fp4) == 1.0);
delete fp1;
delete fp2;
delete fp3;
delete fp4;
delete m1;
delete m2;
delete m3;
delete m4;
}
{
RWMol *m1 = SmilesToMol("C1=CC=CC=C1");
RWMol *m2 = SmilesToMol("C1=CC=CC=N1");
RWMol *m3 = SmilesToMol("C1CCCCC1");
RWMol *m4 = SmilesToMol("CCCCCC");
unsigned int layers = 0x7;
ExplicitBitVect *fp1 = LayeredFingerprintMol(*m1, layers, 1, 5);
ExplicitBitVect *fp2 = LayeredFingerprintMol(*m2, layers, 1, 5);
TEST_ASSERT(TanimotoSimilarity(*fp1, *fp2) < 1.0);
TEST_ASSERT(TanimotoSimilarity(*fp1, *fp2) > 0.0);
ExplicitBitVect *fp3 = LayeredFingerprintMol(*m3, layers, 1, 5);
TEST_ASSERT(TanimotoSimilarity(*fp1, *fp3) == 1.0);
TEST_ASSERT(TanimotoSimilarity(*fp2, *fp3) < 1.0);
TEST_ASSERT(TanimotoSimilarity(*fp2, *fp3) > 0.0);
ExplicitBitVect *fp4 = LayeredFingerprintMol(*m4, layers, 1, 5);
TEST_ASSERT(TanimotoSimilarity(*fp1, *fp4) == 1.0);
TEST_ASSERT(TanimotoSimilarity(*fp2, *fp4) < 1.0);
TEST_ASSERT(TanimotoSimilarity(*fp2, *fp4) > 0.0);
TEST_ASSERT(TanimotoSimilarity(*fp3, *fp4) == 1.0);
delete fp1;
delete fp2;
delete fp3;
delete fp4;
delete m1;
delete m2;
delete m3;
delete m4;
}
{
RWMol *m1 = SmilesToMol("c1ccccc1");
RWMol *m2 = SmilesToMol("C1CCCCC1");
RWMol *m3 = SmilesToMol("CCCCCC");
RWMol *m4 = SmilesToMol("C1CCCCC1");
unsigned int layers = 0xF; // add the "in ring" bit
ExplicitBitVect *fp1 = LayeredFingerprintMol(*m1, layers, 1, 5);
ExplicitBitVect *fp2 = LayeredFingerprintMol(*m2, layers, 1, 5);
TEST_ASSERT(TanimotoSimilarity(*fp1, *fp2) == 1.0);
ExplicitBitVect *fp3 = LayeredFingerprintMol(*m3, layers, 1, 5);
TEST_ASSERT(TanimotoSimilarity(*fp1, *fp3) < 1.0);
TEST_ASSERT(TanimotoSimilarity(*fp1, *fp3) > 0.0);
TEST_ASSERT(TanimotoSimilarity(*fp2, *fp3) < 1.0);
TEST_ASSERT(TanimotoSimilarity(*fp2, *fp3) > 0.0);
ExplicitBitVect *fp4 = LayeredFingerprintMol(*m4, layers, 1, 5);
TEST_ASSERT(TanimotoSimilarity(*fp1, *fp4) == 1.0);
TEST_ASSERT(TanimotoSimilarity(*fp2, *fp4) == 1.0);
delete fp1;
delete fp2;
delete fp3;
delete fp4;
delete m1;
delete m2;
delete m3;
delete m4;
}
{
RWMol *m1 = SmilesToMol("C1=CC=CC=C1");
RWMol *m2 = SmilesToMol("C1CCCCC1");
RWMol *m3 = SmilesToMol("CCCCCC");
RWMol *m4 = SmilesToMol("C1CCCCCC1");
unsigned int layers = 0x1F; // add the ring size bit
ExplicitBitVect *fp1 = LayeredFingerprintMol(*m1, layers, 1, 5);
ExplicitBitVect *fp2 = LayeredFingerprintMol(*m2, layers, 1, 5);
TEST_ASSERT(TanimotoSimilarity(*fp1, *fp2) == 1.0);
ExplicitBitVect *fp3 = LayeredFingerprintMol(*m3, layers, 1, 5);
TEST_ASSERT(TanimotoSimilarity(*fp1, *fp3) < 1.0);
TEST_ASSERT(TanimotoSimilarity(*fp1, *fp3) > 0.0);
TEST_ASSERT(TanimotoSimilarity(*fp2, *fp3) < 1.0);
TEST_ASSERT(TanimotoSimilarity(*fp2, *fp3) > 0.0);
ExplicitBitVect *fp4 = LayeredFingerprintMol(*m4, layers, 1, 5);
TEST_ASSERT(TanimotoSimilarity(*fp1, *fp4) < 1.0);
TEST_ASSERT(TanimotoSimilarity(*fp1, *fp4) > 0.0);
TEST_ASSERT(TanimotoSimilarity(*fp2, *fp4) < 1.0);
TEST_ASSERT(TanimotoSimilarity(*fp2, *fp4) > 0.0);
TEST_ASSERT(TanimotoSimilarity(*fp3, *fp4) < 1.0);
TEST_ASSERT(TanimotoSimilarity(*fp3, *fp4) > 0.0);
delete fp1;
delete fp2;
delete fp3;
delete fp4;
delete m1;
delete m2;
delete m3;
delete m4;
}
{
RWMol *m1 = SmilesToMol("Cc1ncccc1");
RWMol *m2 = SmilesToMol("Cn1ccc2nn(C)c(=O)c-2c1C");
unsigned int layers = 0x7;
ExplicitBitVect *fp1 = LayeredFingerprintMol(*m1, layers, 1, 5);
ExplicitBitVect *fp2 = LayeredFingerprintMol(*m2, layers, 1, 5);
TEST_ASSERT(TanimotoSimilarity(*fp1, *fp2) < 1.0);
TEST_ASSERT(TanimotoSimilarity(*fp1, *fp2) > 0.0);
TEST_ASSERT(((*fp1) & (*fp2)) == (*fp1));
delete fp1;
delete fp2;
delete m1;
delete m2;
}
BOOST_LOG(rdInfoLog) << "done" << std::endl;
}
void test2Layers() {
BOOST_LOG(rdInfoLog) << "testing advanced layered fps" << std::endl;
{
std::vector<unsigned int> atomCounts;
RWMol *m1 = SmilesToMol("CC(C)C");
ExplicitBitVect *fp1 = LayeredFingerprintMol(*m1);
ExplicitBitVect *fp2 = LayeredFingerprintMol(*m1);
TEST_ASSERT(TanimotoSimilarity(*fp1, *fp2) == 1.0);
atomCounts.clear();
atomCounts.resize(m1->getNumAtoms());
for (unsigned int i = 0; i < m1->getNumAtoms(); ++i) atomCounts[i] = 0;
delete fp2;
fp2 = LayeredFingerprintMol(*m1, 0xFFFFFFFF, 1, 7, 2048, &atomCounts);
TEST_ASSERT(TanimotoSimilarity(*fp1, *fp2) == 1.0);
TEST_ASSERT(atomCounts[0] == 4);
TEST_ASSERT(atomCounts[1] == 7);
TEST_ASSERT(atomCounts[2] == 4);
TEST_ASSERT(atomCounts[3] == 4);
delete fp2;
fp2 = LayeredFingerprintMol(*m1, 0xFFFFFFFF, 1, 7, 2048, &atomCounts);
TEST_ASSERT(TanimotoSimilarity(*fp1, *fp2) == 1.0);
TEST_ASSERT(atomCounts[0] == 8);
TEST_ASSERT(atomCounts[1] == 14);
TEST_ASSERT(atomCounts[2] == 8);
TEST_ASSERT(atomCounts[3] == 8);
delete fp1;
delete fp2;
delete m1;
}
{
std::vector<unsigned int> atomCounts;
RWMol *m1 = SmilesToMol("CC(C)C");
RWMol *m2 = SmilesToMol("CCC");
ExplicitBitVect *fp1 = LayeredFingerprintMol(*m1);
ExplicitBitVect *fp2 = LayeredFingerprintMol(*m2);
TEST_ASSERT(TanimotoSimilarity(*fp1, *fp2) < 1.0);
atomCounts.clear();
atomCounts.resize(m1->getNumAtoms());
for (unsigned int i = 0; i < m1->getNumAtoms(); ++i) atomCounts[i] = 0;
ExplicitBitVect *fp3 =
LayeredFingerprintMol(*m1, 0xFFFFFFFF, 1, 7, 2048, &atomCounts, fp2);
TEST_ASSERT(TanimotoSimilarity(*fp1, *fp3) < 1.0);
TEST_ASSERT(atomCounts[0] == 3);
TEST_ASSERT(atomCounts[1] == 6);
TEST_ASSERT(atomCounts[2] == 3);
TEST_ASSERT(atomCounts[3] == 3);
delete fp1;
delete fp2;
delete fp3;
delete m1;
delete m2;
}
BOOST_LOG(rdInfoLog) << "done" << std::endl;
}
void test3Layers() {
BOOST_LOG(rdInfoLog) << "testing layered fps and queries" << std::endl;
{
RWMol *m1 = SmilesToMol("CC(C)C");
RWMol *m2 = SmartsToMol("CC(C)C");
ExplicitBitVect *fp1 = LayeredFingerprintMol(*m1, 0x7, 1, 5, 256);
ExplicitBitVect *fp2 = LayeredFingerprintMol(*m2, 0x7, 1, 5, 256);
TEST_ASSERT(fp2->getNumOnBits());
ExplicitBitVect fp3 = (*fp1) & (*fp2);
#if 0
std::cerr<<BitVectToText(*fp1)<<std::endl;
std::cerr<<"---"<<std::endl;
std::cerr<<BitVectToText(*fp2)<<std::endl;
std::cerr<<BitVectToText(fp3)<<std::endl;
#endif
TEST_ASSERT(fp3 == (*fp2));
delete fp1;
delete fp2;
delete m1;
delete m2;
}
{
RWMol *m1 = SmilesToMol("CC(C)C");
RWMol *m2 = SmartsToMol("C-C(-C)-C");
ExplicitBitVect *fp1 = LayeredFingerprintMol(*m1, 0x7, 1, 5, 256);
ExplicitBitVect *fp2 = LayeredFingerprintMol(*m2, 0x7, 1, 5, 256);
TEST_ASSERT(fp2->getNumOnBits());
ExplicitBitVect fp3 = (*fp1) & (*fp2);
TEST_ASSERT(fp3 == (*fp2));
delete fp1;
delete fp2;
delete m1;
delete m2;
}
{
RWMol *m1 = SmilesToMol("CC(=C)C");
RWMol *m2 = SmartsToMol("C-C(-C)-C");
ExplicitBitVect *fp1 = LayeredFingerprintMol(*m1, 0x7, 1, 5, 256);
ExplicitBitVect *fp2 = LayeredFingerprintMol(*m2, 0x7, 1, 5, 256);
TEST_ASSERT(fp2->getNumOnBits());
ExplicitBitVect fp3 = (*fp1) & (*fp2);
TEST_ASSERT(fp3 != (*fp2));
TEST_ASSERT(fp3 != (*fp1));
delete fp1;
delete fp2;
delete m1;
delete m2;
}
{
RWMol *m1 = SmilesToMol("CC(C)C");
RWMol *m2 = SmartsToMol("CC([C,N])C");
ExplicitBitVect *fp1 = LayeredFingerprintMol(*m1, 0x7, 1, 5, 256);
ExplicitBitVect *fp2 = LayeredFingerprintMol(*m2, 0x7, 1, 5, 256);
TEST_ASSERT(fp2->getNumOnBits());
ExplicitBitVect fp3 = (*fp1) & (*fp2);
TEST_ASSERT(fp3 == (*fp2));
delete fp1;
delete fp2;
delete m1;
delete m2;
}
{
RWMol *m1 = SmilesToMol("CC(C)C");
RWMol *m2 = SmartsToMol("CC([!C])C");
ExplicitBitVect *fp1 = LayeredFingerprintMol(*m1, 0x7, 1, 5, 256);
ExplicitBitVect *fp2 = LayeredFingerprintMol(*m2, 0x7, 1, 5, 256);
TEST_ASSERT(fp2->getNumOnBits());
// the query says "!C", but we still match in the fp...
ExplicitBitVect fp3 = (*fp1) & (*fp2);
TEST_ASSERT(fp3 == (*fp2));
// of course we better match if the thing really isn't a C:
delete fp1;
delete m1;
m1 = SmilesToMol("CC(O)C");
fp1 = LayeredFingerprintMol(*m1, 0x7, 1, 5, 256);
fp3 = (*fp1) & (*fp2);
TEST_ASSERT(fp3 == (*fp2));
delete fp1;
delete fp2;
delete m1;
delete m2;
}
{
RWMol *m1 = SmilesToMol("CC(O)C");
RWMol *m2 = SmartsToMol("CC([C,N])C");
ExplicitBitVect *fp1 = LayeredFingerprintMol(*m1, 0x7, 1, 5, 256);
ExplicitBitVect *fp2 = LayeredFingerprintMol(*m2, 0x7, 1, 5, 256);
TEST_ASSERT(fp2->getNumOnBits());
ExplicitBitVect fp3 = (*fp1) & (*fp2);
TEST_ASSERT(fp3 == (*fp2));
delete fp1;
delete fp2;
delete m1;
delete m2;
}
{
RWMol *m1 = SmilesToMol("c1ccccc1");
RWMol *m2 = SmartsToMol("cccc");
ExplicitBitVect *fp1 = LayeredFingerprintMol(*m1, 0x7, 1, 5, 256);
ExplicitBitVect *fp2 = LayeredFingerprintMol(*m2, 0x7, 1, 5, 256);
TEST_ASSERT(fp2->getNumOnBits());
ExplicitBitVect fp3 = (*fp1) & (*fp2);
TEST_ASSERT(fp3 == (*fp2));
delete fp1;
delete m1;
// somewhat counter-intuitive result here, but this happens
// because the aromaticity does not factor into the
// calculation:
m1 = SmilesToMol("C1CCCCC1");
fp1 = LayeredFingerprintMol(*m1, 0x7, 1, 5, 256);
fp3 = (*fp1) & (*fp2);
TEST_ASSERT(fp3 == (*fp2));
delete fp1;
delete fp2;
delete m1;
delete m2;
}
{
RWMol *m1 = SmilesToMol("c1ccccc1");
RWMol *m2 = SmartsToMol("c:c:c");
ExplicitBitVect *fp1 = LayeredFingerprintMol(*m1, 0xFFFF, 1, 5, 512);
ExplicitBitVect *fp2 = LayeredFingerprintMol(*m2, 0x5, 1, 5, 512);
TEST_ASSERT(fp2->getNumOnBits());
ExplicitBitVect fp3 = (*fp1) & (*fp2);
TEST_ASSERT(fp3 == (*fp2));
delete fp1;
delete m1;
m1 = SmilesToMol("C1CCCCC1");
fp1 = LayeredFingerprintMol(*m1, 0xFFFF, 1, 5, 512);
fp3 = (*fp1) & (*fp2);
TEST_ASSERT(fp3 == (*fp2));
delete fp2;
fp2 = LayeredFingerprintMol(*m2, 0x7, 1, 5, 512);
fp3 = (*fp1) & (*fp2);
TEST_ASSERT(fp3 == (*fp2));
delete fp2;
fp2 = LayeredFingerprintMol(*m2, 0x25, 1, 5, 512);
fp3 = (*fp1) & (*fp2);
TEST_ASSERT(fp3 != (*fp2));
delete fp2;
fp2 = LayeredFingerprintMol(*m2, 0x7, 1, 5, 512);
delete fp1;
delete m1;
// atom type information is factored in:
m1 = SmilesToMol("c1ncncn1");
fp1 = LayeredFingerprintMol(*m1, 0xFFFF, 1, 5, 512);
fp3 = (*fp1) & (*fp2);
TEST_ASSERT(fp3 != (*fp2));
delete fp2;
delete m2;
// but queries switch us back to generic types, so we match again:
m2 = SmartsToMol("[C,N]:[C,N]:[C,N]");
fp2 = LayeredFingerprintMol(*m2, 0x7, 1, 5, 512);
TEST_ASSERT(fp2->getNumOnBits());
fp3 = (*fp1) & (*fp2);
TEST_ASSERT(fp3 == (*fp2));
delete fp1;
delete fp2;
delete m1;
delete m2;
}
BOOST_LOG(rdInfoLog) << "done" << std::endl;
}
void test1MorganFPs() {
BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
BOOST_LOG(rdErrorLog) << " Test Morgan Fingerprints." << std::endl;
{
ROMol *mol;
SparseIntVect<boost::uint32_t> *fp;
mol = SmilesToMol("CCCCC");
fp = MorganFingerprints::getFingerprint(*mol, 0);
TEST_ASSERT(fp->getNonzeroElements().size() == 2);
delete fp;
fp = MorganFingerprints::getFingerprint(*mol, 0, nullptr, nullptr, false,
true, false);
TEST_ASSERT(fp->getNonzeroElements().size() == 2);
for (auto iter = fp->getNonzeroElements().begin();
iter != fp->getNonzeroElements().end(); ++iter) {
TEST_ASSERT(iter->second == 1); // check that count == 1
}
delete fp;
fp = MorganFingerprints::getHashedFingerprint(*mol, 0);
TEST_ASSERT(fp->getNonzeroElements().size() == 2);
delete fp;
fp = MorganFingerprints::getFingerprint(*mol, 1);
TEST_ASSERT(fp->getNonzeroElements().size() == 5);
delete fp;
fp = MorganFingerprints::getHashedFingerprint(*mol, 1);
TEST_ASSERT(fp->getNonzeroElements().size() == 5);
delete fp;
fp = MorganFingerprints::getFingerprint(*mol, 2);
TEST_ASSERT(fp->getNonzeroElements().size() == 7);
delete fp;
fp = MorganFingerprints::getFingerprint(*mol, 3);
TEST_ASSERT(fp->getNonzeroElements().size() == 7);
delete fp;
delete mol;
}
{
ROMol *mol;
SparseIntVect<boost::uint32_t> *fp;
mol = SmilesToMol("O=C(O)CC1CC1");
fp = MorganFingerprints::getFingerprint(*mol, 0);
TEST_ASSERT(fp->getNonzeroElements().size() == 6);
delete fp;
fp = MorganFingerprints::getFingerprint(*mol, 1);
TEST_ASSERT(fp->getNonzeroElements().size() == 12);
delete fp;
fp = MorganFingerprints::getFingerprint(*mol, 2);
TEST_ASSERT(fp->getNonzeroElements().size() == 16);
delete fp;
fp = MorganFingerprints::getFingerprint(*mol, 3);
TEST_ASSERT(fp->getNonzeroElements().size() == 17);
delete fp;
delete mol;
}
{
// test that the results aren't order dependent, i.e. that we're
// "canonicalizing" the fps correctly
ROMol *mol, *mol2;
SparseIntVect<boost::uint32_t> *fp, *fp2;
mol = SmilesToMol("O=C(O)CC1CC1");
mol2 = SmilesToMol("OC(=O)CC1CC1");
fp = MorganFingerprints::getFingerprint(*mol, 0);
fp2 = MorganFingerprints::getFingerprint(*mol2, 0);
TEST_ASSERT(fp->getNonzeroElements().size() == 6);
TEST_ASSERT(fp2->getNonzeroElements().size() == 6);
TEST_ASSERT(*fp == *fp2);
delete fp;
delete fp2;
fp = MorganFingerprints::getFingerprint(*mol, 1);
fp2 = MorganFingerprints::getFingerprint(*mol2, 1);
TEST_ASSERT(fp->getNonzeroElements().size() == 12);
TEST_ASSERT(fp2->getNonzeroElements().size() == 12);
TEST_ASSERT(*fp == *fp2);
delete fp;
delete fp2;
fp = MorganFingerprints::getFingerprint(*mol, 2);
fp2 = MorganFingerprints::getFingerprint(*mol2, 2);
TEST_ASSERT(fp->getNonzeroElements().size() == 16);
TEST_ASSERT(fp2->getNonzeroElements().size() == 16);
TEST_ASSERT(*fp == *fp2);
delete fp;
delete fp2;
fp = MorganFingerprints::getFingerprint(*mol, 3);
fp2 = MorganFingerprints::getFingerprint(*mol2, 3);
TEST_ASSERT(fp->getNonzeroElements().size() == 17);
TEST_ASSERT(fp2->getNonzeroElements().size() == 17);
TEST_ASSERT(*fp == *fp2);
delete fp;
delete fp2;
delete mol;
delete mol2;
}
{
// symmetry test:
ROMol *mol;
SparseIntVect<boost::uint32_t> *fp;
mol = SmilesToMol("OCCCCO");
fp = MorganFingerprints::getFingerprint(*mol, 2);
TEST_ASSERT(fp->getNonzeroElements().size() == 7);
SparseIntVect<boost::uint32_t>::StorageType::const_iterator iter;
for (iter = fp->getNonzeroElements().begin();
iter != fp->getNonzeroElements().end(); ++iter) {
TEST_ASSERT(iter->second == 2 || iter->second == 4);
}
delete fp;
delete mol;
}
{
// chirality test:
ROMol *mol;
SparseIntVect<boost::uint32_t> *fp;
mol = SmilesToMol("CC(F)(Cl)C(F)(Cl)C");
fp = MorganFingerprints::getFingerprint(*mol, 0);
TEST_ASSERT(fp->getNonzeroElements().size() == 4);
delete fp;
fp = MorganFingerprints::getFingerprint(*mol, 1);
TEST_ASSERT(fp->getNonzeroElements().size() == 8);
delete mol;
delete fp;
mol = SmilesToMol("CC(F)(Cl)[C@](F)(Cl)C");
fp = MorganFingerprints::getFingerprint(*mol, 0);
TEST_ASSERT(fp->getNonzeroElements().size() == 4);
delete fp;
fp = MorganFingerprints::getFingerprint(*mol, 1);
TEST_ASSERT(fp->getNonzeroElements().size() == 8);
delete fp;
fp = MorganFingerprints::getFingerprint(*mol, 0, nullptr, nullptr, true);
TEST_ASSERT(fp->getNonzeroElements().size() == 4);
delete fp;
fp = MorganFingerprints::getFingerprint(*mol, 1, nullptr, nullptr, true);
TEST_ASSERT(fp->getNonzeroElements().size() == 9);
delete fp;
delete mol;
}
BOOST_LOG(rdErrorLog) << " done" << std::endl;
}
void test2MorganFPsFromAtoms() {
BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
BOOST_LOG(rdErrorLog)
<< " Test Morgan Fingerprints using fromAtoms argument." << std::endl;
{
ROMol *mol;
SparseIntVect<boost::uint32_t> *fp;
std::vector<boost::uint32_t> atoms;
atoms.push_back(0);
mol = SmilesToMol("CCCCC");
fp = MorganFingerprints::getFingerprint(*mol, 0);
TEST_ASSERT(fp->getNonzeroElements().size() == 2);
delete fp;
fp = MorganFingerprints::getFingerprint(
*mol, 0, (std::vector<boost::uint32_t> *)nullptr, &atoms);
TEST_ASSERT(fp->getNonzeroElements().size() == 1);
delete fp;
fp = MorganFingerprints::getFingerprint(
*mol, 1, (std::vector<boost::uint32_t> *)nullptr, &atoms);
TEST_ASSERT(fp->getNonzeroElements().size() == 2);
delete fp;
// tests issue 3415636
fp = MorganFingerprints::getFingerprint(
*mol, 2, (std::vector<boost::uint32_t> *)nullptr, &atoms);
TEST_ASSERT(fp->getNonzeroElements().size() == 3);
delete fp;
delete mol;
}
{
ROMol *mol;
SparseIntVect<boost::uint32_t> *fp;
std::vector<boost::uint32_t> atoms;
mol = SmilesToMol("CCCCC");
fp = MorganFingerprints::getFingerprint(
*mol, 0, (std::vector<boost::uint32_t> *)nullptr, &atoms);
TEST_ASSERT(fp->getNonzeroElements().size() == 0);
delete fp;
fp = MorganFingerprints::getFingerprint(
*mol, 1, (std::vector<boost::uint32_t> *)nullptr, &atoms);
TEST_ASSERT(fp->getNonzeroElements().size() == 0);
delete fp;
delete mol;
}
{
ROMol *mol;
SparseIntVect<boost::uint32_t> *fp;
std::vector<boost::uint32_t> atoms;
atoms.push_back(0);
mol = SmilesToMol("C(CC)CO");
fp = MorganFingerprints::getFingerprint(
*mol, 0, (std::vector<boost::uint32_t> *)nullptr, &atoms);
TEST_ASSERT(fp->getNonzeroElements().size() == 1);
delete fp;
fp = MorganFingerprints::getFingerprint(
*mol, 1, (std::vector<boost::uint32_t> *)nullptr, &atoms);
TEST_ASSERT(fp->getNonzeroElements().size() == 2);
delete fp;
fp = MorganFingerprints::getFingerprint(
*mol, 2, (std::vector<boost::uint32_t> *)nullptr, &atoms);
TEST_ASSERT(fp->getNonzeroElements().size() == 3);
delete fp;
// tests issue 3415636
fp = MorganFingerprints::getFingerprint(
*mol, 3, (std::vector<boost::uint32_t> *)nullptr, &atoms);
TEST_ASSERT(fp->getNonzeroElements().size() == 3);
delete fp;
delete mol;
}
BOOST_LOG(rdErrorLog) << " done" << std::endl;
}
void test3MorganFPs() {
BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
BOOST_LOG(rdErrorLog) << " Test Morgan Fingerprints as bit vects."
<< std::endl;
{
ROMol *mol;
ExplicitBitVect *fp;
mol = SmilesToMol("CCCCC");
fp = MorganFingerprints::getFingerprintAsBitVect(*mol, 0, 2048);
TEST_ASSERT(fp->getNumOnBits() == 2);
delete fp;
fp = MorganFingerprints::getFingerprintAsBitVect(*mol, 1, 2048);
TEST_ASSERT(fp->getNumOnBits() == 5);
delete fp;
fp = MorganFingerprints::getFingerprintAsBitVect(*mol, 2, 2048);
TEST_ASSERT(fp->getNumOnBits() == 7);
delete fp;
fp = MorganFingerprints::getFingerprintAsBitVect(*mol, 3, 2048);
TEST_ASSERT(fp->getNumOnBits() == 7);
delete fp;
delete mol;
}
BOOST_LOG(rdErrorLog) << " done" << std::endl;
}
void test4MorganFPs() {
BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
BOOST_LOG(rdErrorLog)
<< " Test Morgan Fingerprints with feature invariants." << std::endl;
{
ROMol *mol;
mol = SmilesToMol("Cc1ccccc1");
TEST_ASSERT(mol);
std::vector<boost::uint32_t> invars(mol->getNumAtoms());
MorganFingerprints::getFeatureInvariants(*mol, invars);
TEST_ASSERT(invars[0] == 0);
TEST_ASSERT(invars[1] != 0);
TEST_ASSERT(invars[1] == invars[2]);
TEST_ASSERT(invars[1] == invars[3]);
TEST_ASSERT(invars[1] == invars[4]);
TEST_ASSERT(invars[1] == invars[5]);
TEST_ASSERT(invars[1] == invars[6]);
delete mol;
}
{
ROMol *mol;
mol = SmilesToMol("FCCCl");
TEST_ASSERT(mol);
std::vector<boost::uint32_t> invars(mol->getNumAtoms());
MorganFingerprints::getFeatureInvariants(*mol, invars);
TEST_ASSERT(invars[1] == invars[2]);
TEST_ASSERT(invars[1] == 0);
TEST_ASSERT(invars[0] == invars[3]);
TEST_ASSERT(invars[0] != 0);
delete mol;
}
{
ROMol *mol;
mol = SmilesToMol("Cc1ncccc1O");
TEST_ASSERT(mol);
std::vector<boost::uint32_t> invars(mol->getNumAtoms());
std::vector<const ROMol *> patterns(2);
RWMol *p;
p = SmartsToMol("[A]");
patterns[0] = static_cast<const ROMol *>(p);
p = SmartsToMol("[a]");
patterns[1] = static_cast<const ROMol *>(p);
MorganFingerprints::getFeatureInvariants(*mol, invars, &patterns);
TEST_ASSERT(invars[0] != 0);
TEST_ASSERT(invars[1] != 0);
TEST_ASSERT(invars[0] != invars[1]);
TEST_ASSERT(invars[1] == invars[2]);
TEST_ASSERT(invars[0] == invars[7]);
delete mol;
}
BOOST_LOG(rdErrorLog) << " done" << std::endl;
}
void test5MorganFPs() {
BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
BOOST_LOG(rdErrorLog) << " Test additional Morgan fingerprints options."
<< std::endl;
{
ROMol *m1, *m2;
ExplicitBitVect *fp1, *fp2;
std::vector<boost::uint32_t> invars(3);
invars[0] = 1;
invars[1] = 1;
invars[2] = 1;
m1 = SmilesToMol("CCC");
TEST_ASSERT(m1);
m2 = SmilesToMol("CC=C");
TEST_ASSERT(m2);
fp1 = MorganFingerprints::getFingerprintAsBitVect(*m1, 2, 2048, &invars,
nullptr, false, true);
invars[0] = 1;
invars[1] = 1;
invars[2] = 1;
fp2 = MorganFingerprints::getFingerprintAsBitVect(*m2, 2, 2048, &invars,
nullptr, false, true);
TEST_ASSERT((*fp1) != (*fp2));
delete fp1;
delete fp2;
invars[0] = 1;
invars[1] = 1;
invars[2] = 1;
fp1 = MorganFingerprints::getFingerprintAsBitVect(*m1, 2, 2048, &invars,
nullptr, false, false);
invars[0] = 1;
invars[1] = 1;
invars[2] = 1;
fp2 = MorganFingerprints::getFingerprintAsBitVect(*m2, 2, 2048, &invars,
nullptr, false, false);
TEST_ASSERT((*fp1) == (*fp2));
delete fp1;
delete fp2;
delete m1;
delete m2;
}
{
ROMol *m1, *m2, *m3;
ExplicitBitVect *fp1, *fp2, *fp3;
m1 = SmilesToMol("C[C@H](F)Cl");
TEST_ASSERT(m1);
m2 = SmilesToMol("C[C@@H](F)Cl");
TEST_ASSERT(m2);
m3 = SmilesToMol("CC(F)Cl");
TEST_ASSERT(m3);
fp1 = MorganFingerprints::getFingerprintAsBitVect(*m1, 2, 2048, nullptr,
nullptr, false, true);
fp2 = MorganFingerprints::getFingerprintAsBitVect(*m2, 2, 2048, nullptr,
nullptr, false, true);
fp3 = MorganFingerprints::getFingerprintAsBitVect(*m3, 2, 2048, nullptr,
nullptr, false, true);
TEST_ASSERT((*fp1) == (*fp2));
TEST_ASSERT((*fp1) == (*fp3));
TEST_ASSERT((*fp2) == (*fp3));
delete fp1;
delete fp2;
delete fp3;
fp1 = MorganFingerprints::getFingerprintAsBitVect(*m1, 2, 2048, nullptr,
nullptr, true, true);
fp2 = MorganFingerprints::getFingerprintAsBitVect(*m2, 2, 2048, nullptr,
nullptr, true, true);
fp3 = MorganFingerprints::getFingerprintAsBitVect(*m3, 2, 2048, nullptr,
nullptr, true, true);
TEST_ASSERT((*fp1) != (*fp2));
TEST_ASSERT((*fp1) != (*fp3));
TEST_ASSERT((*fp2) != (*fp3));
delete fp1;
delete fp2;
delete fp3;
delete m1;
delete m2;
}
BOOST_LOG(rdErrorLog) << " done" << std::endl;
}
void testIssue2875658() {
BOOST_LOG(rdInfoLog) << "testing issue 2875658" << std::endl;
RWMol *m;
ExplicitBitVect *fp1, *fp2;
m = SmilesToMol("c1ccccc1");
fp1 = RDKFingerprintMol(*m, 1, 7, 2048, 4);
delete m;
m = SmilesToMol("c1cccnc1");
fp2 = RDKFingerprintMol(*m, 1, 7, 2048, 4);
delete m;
#if BOOST_VERSION / 100 >= 1040
// bug fixes in the uniform_int<> distribution in version 1.40
// necessitate this
TEST_ASSERT(fp1->getNumOnBits() == 24);
TEST_ASSERT((*fp1)[11]);
TEST_ASSERT((*fp1)[857]);
TEST_ASSERT((*fp1)[1786]);
TEST_ASSERT((*fp1)[2020]);
TEST_ASSERT(fp2->getNumOnBits() == 64);
TEST_ASSERT((*fp2)[11]);
TEST_ASSERT((*fp2)[179]);
TEST_ASSERT((*fp2)[1878]);
TEST_ASSERT((*fp2)[2020]);
#else
TEST_ASSERT(fp1->getNumOnBits() == 24);
TEST_ASSERT((*fp1)[23]);
TEST_ASSERT((*fp1)[434]);
TEST_ASSERT((*fp1)[1445]);
TEST_ASSERT((*fp1)[2031]);
TEST_ASSERT(fp2->getNumOnBits() == 62);
TEST_ASSERT((*fp2)[23]);
TEST_ASSERT((*fp2)[173]);
TEST_ASSERT((*fp2)[1847]);
TEST_ASSERT((*fp2)[1975]);
#endif
delete fp1;
delete fp2;
}
void testAtomCodes() {
BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
BOOST_LOG(rdErrorLog) << " Test Atom Codes." << std::endl;
ROMol *mol;
boost::uint32_t tgt;
mol = SmilesToMol("C=C");
TEST_ASSERT(AtomPairs::getAtomCode(mol->getAtomWithIdx(0)) ==
AtomPairs::getAtomCode(mol->getAtomWithIdx(1)));
tgt = 1 | (1 | 1 << AtomPairs::numPiBits) << AtomPairs::numBranchBits;
TEST_ASSERT(AtomPairs::getAtomCode(mol->getAtomWithIdx(0)) == tgt);
tgt = 1 << AtomPairs::numBranchBits |
1 << (AtomPairs::numBranchBits + AtomPairs::numPiBits);
TEST_ASSERT(AtomPairs::getAtomCode(mol->getAtomWithIdx(0), 1) == tgt);
delete mol;
mol = SmilesToMol("C#CO");
tgt = 1 | 2 << AtomPairs::numBranchBits |
1 << (AtomPairs::numBranchBits + AtomPairs::numPiBits);
TEST_ASSERT(AtomPairs::getAtomCode(mol->getAtomWithIdx(0)) == tgt);
tgt = 2 | 2 << AtomPairs::numBranchBits |
1 << (AtomPairs::numBranchBits + AtomPairs::numPiBits);
TEST_ASSERT(AtomPairs::getAtomCode(mol->getAtomWithIdx(1)) == tgt);
tgt = 1 | 0 << AtomPairs::numBranchBits |
3 << (AtomPairs::numBranchBits + AtomPairs::numPiBits);
TEST_ASSERT(AtomPairs::getAtomCode(mol->getAtomWithIdx(2)) == tgt);
delete mol;
mol = SmilesToMol("CC(O)C(O)(O)C");
tgt = 1 | 0 << AtomPairs::numBranchBits |
1 << (AtomPairs::numBranchBits + AtomPairs::numPiBits);
TEST_ASSERT(AtomPairs::getAtomCode(mol->getAtomWithIdx(1), 2) == tgt);
tgt = 2 | 0 << AtomPairs::numBranchBits |
1 << (AtomPairs::numBranchBits + AtomPairs::numPiBits);
TEST_ASSERT(AtomPairs::getAtomCode(mol->getAtomWithIdx(3), 2) == tgt);
delete mol;
mol = SmilesToMol("C=CC(=O)O");
tgt = 0 | 0 << AtomPairs::numBranchBits |
3 << (AtomPairs::numBranchBits + AtomPairs::numPiBits);
TEST_ASSERT(AtomPairs::getAtomCode(mol->getAtomWithIdx(4), 1) == tgt);
tgt = 3 | 1 << AtomPairs::numBranchBits |
1 << (AtomPairs::numBranchBits + AtomPairs::numPiBits);
TEST_ASSERT(AtomPairs::getAtomCode(mol->getAtomWithIdx(2)) == tgt);
delete mol;
BOOST_LOG(rdErrorLog) << " done" << std::endl;
}
void testAtomPairs() {
BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
BOOST_LOG(rdErrorLog) << " Test Atom Pairs." << std::endl;
ROMol *mol;
SparseIntVect<boost::int32_t> *fp;
boost::uint32_t tgt;
boost::uint32_t c1, c2, c3;
mol = SmilesToMol("CCCCC");
c1 = AtomPairs::getAtomCode(mol->getAtomWithIdx(0));
c2 = AtomPairs::getAtomCode(mol->getAtomWithIdx(1));
c3 = AtomPairs::getAtomCode(mol->getAtomWithIdx(2));
tgt = 1 | (std::min(c1, c2) | std::max(c1, c2) << AtomPairs::codeSize)
<< AtomPairs::numPathBits;
TEST_ASSERT(AtomPairs::getAtomPairCode(c1, c2, 1) == tgt);
TEST_ASSERT(AtomPairs::getAtomPairCode(c2, c1, 1) == tgt);
tgt = 2 | (std::min(c1, c3) | std::max(c1, c3) << AtomPairs::codeSize)
<< AtomPairs::numPathBits;
TEST_ASSERT(AtomPairs::getAtomPairCode(c1, c3, 2) == tgt);
TEST_ASSERT(AtomPairs::getAtomPairCode(c3, c1, 2) == tgt);
delete mol;
mol = SmilesToMol("CCC");
fp = AtomPairs::getAtomPairFingerprint(*mol);
TEST_ASSERT(fp->getTotalVal() == 3);
TEST_ASSERT(fp->getNonzeroElements().size() == 2);
c1 = AtomPairs::getAtomCode(mol->getAtomWithIdx(0));
c2 = AtomPairs::getAtomCode(mol->getAtomWithIdx(1));
c3 = AtomPairs::getAtomCode(mol->getAtomWithIdx(2));
TEST_ASSERT(fp->getVal(AtomPairs::getAtomPairCode(c1, c2, 1)) == 2);
TEST_ASSERT(fp->getVal(AtomPairs::getAtomPairCode(c1, c3, 2)) == 1);
delete mol;
delete fp;
mol = SmilesToMol("CC=O.Cl");
fp = AtomPairs::getAtomPairFingerprint(*mol);
TEST_ASSERT(fp->getTotalVal() == 3);
TEST_ASSERT(fp->getNonzeroElements().size() == 3);
c1 = AtomPairs::getAtomCode(mol->getAtomWithIdx(0));
c2 = AtomPairs::getAtomCode(mol->getAtomWithIdx(1));
c3 = AtomPairs::getAtomCode(mol->getAtomWithIdx(2));
TEST_ASSERT(fp->getVal(AtomPairs::getAtomPairCode(c1, c2, 1)) == 1);
TEST_ASSERT(fp->getVal(AtomPairs::getAtomPairCode(c1, c2, 1)) == 1);
TEST_ASSERT(fp->getVal(AtomPairs::getAtomPairCode(c2, c3, 1)) == 1);
delete mol;
delete fp;
BOOST_LOG(rdErrorLog) << " done" << std::endl;
}
void testAtomPairs2() {
BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
BOOST_LOG(rdErrorLog) << " Test Atom Pairs part 2." << std::endl;
{
ROMol *mol;
SparseIntVect<boost::int32_t> *fp;
mol = SmilesToMol("CCC");
fp = AtomPairs::getAtomPairFingerprint(*mol, 1, 2);
TEST_ASSERT(fp->getTotalVal() == 3);
TEST_ASSERT(fp->getNonzeroElements().size() == 2);
delete fp;
fp = AtomPairs::getAtomPairFingerprint(*mol, 2, 2);
TEST_ASSERT(fp->getTotalVal() == 1);
TEST_ASSERT(fp->getNonzeroElements().size() == 1);
}
BOOST_LOG(rdErrorLog) << " done" << std::endl;
}
void testHashedAtomPairs() {
BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
BOOST_LOG(rdErrorLog) << " Test Hashed Atom Pairs." << std::endl;
{
ROMol *mol;
mol = SmilesToMol("c1ccccc1");
SparseIntVect<boost::int32_t> *fp1;
fp1 = AtomPairs::getHashedAtomPairFingerprint(*mol);
SparseIntVect<boost::int32_t> *fp2;
fp2 = AtomPairs::getHashedAtomPairFingerprint(*mol);
TEST_ASSERT(DiceSimilarity(*fp1, *fp2) == 1.0);
TEST_ASSERT(*fp1 == *fp2);
delete mol;
delete fp2;
mol = SmilesToMol("c1ccccn1");
fp2 = AtomPairs::getHashedAtomPairFingerprint(*mol);
RANGE_CHECK(0.0, DiceSimilarity(*fp1, *fp2), 1.0);
delete mol;
delete fp1;
delete fp2;
}
{
ROMol *mol;
mol = SmilesToMol("c1ccccc1");
SparseIntVect<boost::int32_t> *fp1;
fp1 = AtomPairs::getHashedAtomPairFingerprint(*mol, 2048);
SparseIntVect<boost::int32_t> *fp2;
fp2 = AtomPairs::getHashedAtomPairFingerprint(*mol, 2048, 1, 3);
TEST_ASSERT(DiceSimilarity(*fp1, *fp2) == 1.0);
TEST_ASSERT(*fp1 == *fp2);
delete fp2;
fp2 = AtomPairs::getHashedAtomPairFingerprint(*mol, 2048, 1, 2);
RANGE_CHECK(0.0, DiceSimilarity(*fp1, *fp2), 1.0);
delete mol;
delete fp1;
delete fp2;
}
BOOST_LOG(rdErrorLog) << " done" << std::endl;
}
void testTorsions() {
BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
BOOST_LOG(rdErrorLog) << " Test Topological Torsions." << std::endl;
ROMol *mol;
SparseIntVect<boost::int64_t> *fp;
boost::uint64_t tgt;
boost::uint64_t c1, c2, c3, c4;
std::vector<boost::uint32_t> codes;
mol = SmilesToMol("CCCC");
c1 = AtomPairs::getAtomCode(mol->getAtomWithIdx(0)) - 1;
c2 = AtomPairs::getAtomCode(mol->getAtomWithIdx(1)) - 2;
c3 = AtomPairs::getAtomCode(mol->getAtomWithIdx(2)) - 2;
c4 = AtomPairs::getAtomCode(mol->getAtomWithIdx(3)) - 1;
tgt = c1 | (c2 | (c3 | c4 << AtomPairs::codeSize) << AtomPairs::codeSize)
<< AtomPairs::codeSize;
codes.clear();
codes.push_back(static_cast<unsigned int>(c1));
codes.push_back(static_cast<unsigned int>(c2));
codes.push_back(static_cast<unsigned int>(c3));
codes.push_back(static_cast<unsigned int>(c4));
TEST_ASSERT(AtomPairs::getTopologicalTorsionCode(codes) == tgt);
fp = AtomPairs::getTopologicalTorsionFingerprint(*mol);
TEST_ASSERT(fp->getTotalVal() == 1);
TEST_ASSERT(fp->getNonzeroElements().size() == 1);
delete mol;
delete fp;
mol = SmilesToMol("CCCCO.Cl");
fp = AtomPairs::getTopologicalTorsionFingerprint(*mol);
TEST_ASSERT(fp->getTotalVal() == 2);
TEST_ASSERT(fp->getNonzeroElements().size() == 2);
delete fp;
fp = AtomPairs::getTopologicalTorsionFingerprint(*mol, 3);
TEST_ASSERT(fp->getTotalVal() == 3);
TEST_ASSERT(fp->getNonzeroElements().size() == 3);
delete mol;
delete fp;
BOOST_LOG(rdErrorLog) << " done" << std::endl;
}
void testHashedTorsions() {
BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
BOOST_LOG(rdErrorLog) << " Test Hashed torsions." << std::endl;
{
ROMol *mol;
mol = SmilesToMol("c1ccccc1");
SparseIntVect<boost::int64_t> *fp1;
fp1 = AtomPairs::getHashedTopologicalTorsionFingerprint(*mol);
SparseIntVect<boost::int64_t> *fp2;
fp2 = AtomPairs::getHashedTopologicalTorsionFingerprint(*mol);
TEST_ASSERT(DiceSimilarity(*fp1, *fp2) == 1.0);
TEST_ASSERT(*fp1 == *fp2);
delete mol;
delete fp2;
mol = SmilesToMol("c1ccccn1");
fp2 = AtomPairs::getHashedTopologicalTorsionFingerprint(*mol);
RANGE_CHECK(0.0, DiceSimilarity(*fp1, *fp2), 1.0);
delete mol;
delete fp1;
delete fp2;
}
{
ROMol *mol;
mol = SmilesToMol("c1ccccc1");
SparseIntVect<boost::int64_t> *fp1;
fp1 = AtomPairs::getHashedTopologicalTorsionFingerprint(*mol, 2048, 6);
SparseIntVect<boost::int64_t> *fp2;
fp2 = AtomPairs::getHashedTopologicalTorsionFingerprint(*mol, 2048, 6);
TEST_ASSERT(DiceSimilarity(*fp1, *fp2) == 1.0);
TEST_ASSERT(*fp1 == *fp2);
delete mol;
delete fp2;
mol = SmilesToMol("c1ccccn1");
fp2 = AtomPairs::getHashedTopologicalTorsionFingerprint(*mol, 2048, 6);
RANGE_CHECK(0.0, DiceSimilarity(*fp1, *fp2), 1.0);
delete mol;
delete fp1;
delete fp2;
}
BOOST_LOG(rdErrorLog) << " done" << std::endl;
}
void testBulkTorsions() {
BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
BOOST_LOG(rdErrorLog) << " Test Bulk Topological Torsions." << std::endl;
std::string fName = getenv("RDBASE");
fName += "/Projects/DbCLI/testData/pubchem.200.sdf";
SDMolSupplier suppl(fName);
while (!suppl.atEnd()) {
ROMol *mol = suppl.next();
SparseIntVect<boost::int64_t> *fp;
fp = AtomPairs::getTopologicalTorsionFingerprint(*mol);
TEST_ASSERT(fp->getTotalVal() > 1);
delete mol;
delete fp;
}
BOOST_LOG(rdErrorLog) << " done" << std::endl;
}
void testRootedAtomPairs() {
BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
BOOST_LOG(rdErrorLog) << " Test Rooted Atom Pairs." << std::endl;
ROMol *mol;
SparseIntVect<boost::int32_t> *fp1, *fp2;
std::vector<boost::uint32_t> roots;
mol = SmilesToMol("OCCCCC");
fp1 = AtomPairs::getAtomPairFingerprint(*mol);
SparseIntVect<boost::int32_t>::StorageType nz1 = fp1->getNonzeroElements();
TEST_ASSERT(nz1.size() > 0);
roots.push_back(0);
fp2 = AtomPairs::getAtomPairFingerprint(*mol, &roots);
SparseIntVect<boost::int32_t>::StorageType nz2 = fp2->getNonzeroElements();
TEST_ASSERT(nz2.size() > 0);
TEST_ASSERT(nz2.size() < nz1.size());
for (SparseIntVect<boost::int32_t>::StorageType::const_iterator bIt =
nz2.begin();
bIt != nz2.end(); ++bIt) {
TEST_ASSERT(bIt->second <= fp2->getVal(bIt->first));
}
delete mol;
delete fp1;
delete fp2;
BOOST_LOG(rdErrorLog) << " done" << std::endl;
}
void testIgnoreAtomPairs() {
BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
BOOST_LOG(rdErrorLog) << " Test ignoring atoms in Atom Pairs."
<< std::endl;
{
ROMol *mol;
SparseIntVect<boost::int32_t> *fp1, *fp2;
std::vector<boost::uint32_t> roots;
mol = SmilesToMol("OCCCCC");
fp1 = AtomPairs::getAtomPairFingerprint(*mol, 1, 5);
SparseIntVect<boost::int32_t>::StorageType nz1 = fp1->getNonzeroElements();
TEST_ASSERT(nz1.size() > 0);
roots.push_back(0);
fp2 = AtomPairs::getAtomPairFingerprint(*mol, 1, 5, nullptr, &roots);
SparseIntVect<boost::int32_t>::StorageType nz2 = fp2->getNonzeroElements();
TEST_ASSERT(nz2.size() == nz1.size() - 5);
for (SparseIntVect<boost::int32_t>::StorageType::const_iterator bIt =
nz2.begin();
bIt != nz2.end(); ++bIt) {
TEST_ASSERT(bIt->second <= fp2->getVal(bIt->first));
}
delete mol;
delete fp1;
delete fp2;
}
{
ROMol *mol;
SparseIntVect<boost::int32_t> *fp2;
std::vector<boost::uint32_t> roots;
mol = SmilesToMol("OCCCCC");
roots.push_back(0);
fp2 = AtomPairs::getAtomPairFingerprint(*mol, 1, 5, &roots, &roots);
SparseIntVect<boost::int32_t>::StorageType nz2 = fp2->getNonzeroElements();
TEST_ASSERT(nz2.size() == 0);
delete mol;
delete fp2;
}
{
ROMol *mol;
SparseIntVect<boost::int32_t> *fp1, *fp2;
std::vector<boost::uint32_t> roots;
mol = SmilesToMol("OCCCCC");
fp1 = AtomPairs::getHashedAtomPairFingerprint(*mol, 4096, 1, 5);
SparseIntVect<boost::int32_t>::StorageType nz1 = fp1->getNonzeroElements();
TEST_ASSERT(nz1.size() > 0);
roots.push_back(0);
fp2 = AtomPairs::getHashedAtomPairFingerprint(*mol, 4096, 1, 5, nullptr,
&roots);
SparseIntVect<boost::int32_t>::StorageType nz2 = fp2->getNonzeroElements();
TEST_ASSERT(nz2.size() < nz1.size());
for (SparseIntVect<boost::int32_t>::StorageType::const_iterator bIt =
nz2.begin();
bIt != nz2.end(); ++bIt) {
TEST_ASSERT(bIt->second <= fp2->getVal(bIt->first));
}
delete mol;
delete fp1;
delete fp2;
}
BOOST_LOG(rdErrorLog) << " done" << std::endl;
}
void testRootedTorsions() {
BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
BOOST_LOG(rdErrorLog) << " Test Rooted Topological Torsions." << std::endl;
ROMol *mol;
SparseIntVect<boost::int64_t> *fp1, *fp2;
std::vector<boost::uint32_t> roots;
mol = SmilesToMol("OCCCC");
roots.push_back(0);
fp1 = AtomPairs::getTopologicalTorsionFingerprint(*mol);
SparseIntVect<boost::int64_t>::StorageType nz1 = fp1->getNonzeroElements();
TEST_ASSERT(nz1.size() > 0);
fp2 = AtomPairs::getTopologicalTorsionFingerprint(*mol, 4, &roots);
SparseIntVect<boost::int64_t>::StorageType nz2 = fp2->getNonzeroElements();
TEST_ASSERT(nz2.size() > 0);
TEST_ASSERT(nz2.size() < nz1.size());
for (SparseIntVect<boost::int64_t>::StorageType::const_iterator bIt =
nz2.begin();
bIt != nz2.end(); ++bIt) {
TEST_ASSERT(bIt->second <= fp2->getVal(bIt->first));
}
delete mol;
delete fp1;
delete fp2;
BOOST_LOG(rdErrorLog) << " done" << std::endl;
}
void testIgnoreTorsions() {
BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
BOOST_LOG(rdErrorLog) << " Test ignoring atoms in Topological Torsions."
<< std::endl;
{
ROMol *mol;
SparseIntVect<boost::int64_t> *fp1, *fp2;
std::vector<boost::uint32_t> roots;
mol = SmilesToMol("OCCCC");
roots.push_back(0);
fp1 = AtomPairs::getTopologicalTorsionFingerprint(*mol);
SparseIntVect<boost::int64_t>::StorageType nz1 = fp1->getNonzeroElements();
TEST_ASSERT(nz1.size() == 2);
fp2 = AtomPairs::getTopologicalTorsionFingerprint(*mol, 4, nullptr, &roots);
SparseIntVect<boost::int64_t>::StorageType nz2 = fp2->getNonzeroElements();
TEST_ASSERT(nz2.size() == 1);
for (SparseIntVect<boost::int64_t>::StorageType::const_iterator bIt =
nz2.begin();
bIt != nz2.end(); ++bIt) {
TEST_ASSERT(bIt->second <= fp2->getVal(bIt->first));
}
delete mol;
delete fp1;
delete fp2;
}
{
ROMol *mol;
SparseIntVect<boost::int64_t> *fp2;
std::vector<boost::uint32_t> roots;
mol = SmilesToMol("OCCCC");
roots.push_back(1);
fp2 = AtomPairs::getTopologicalTorsionFingerprint(*mol, 4, nullptr, &roots);
SparseIntVect<boost::int64_t>::StorageType nz2 = fp2->getNonzeroElements();
TEST_ASSERT(nz2.size() == 0);
delete mol;
delete fp2;
}
{
ROMol *mol;
SparseIntVect<boost::int64_t> *fp2;
std::vector<boost::uint32_t> roots;
mol = SmilesToMol("OCCCC");
roots.push_back(0);
fp2 = AtomPairs::getTopologicalTorsionFingerprint(*mol, 4, &roots, &roots);
SparseIntVect<boost::int64_t>::StorageType nz2 = fp2->getNonzeroElements();
TEST_ASSERT(nz2.size() == 0);
delete mol;
delete fp2;
}
BOOST_LOG(rdErrorLog) << " done" << std::endl;
}
void testMorganAtomInfo() {
BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
BOOST_LOG(rdErrorLog) << " Test atom info from morgan fingerprints."
<< std::endl;
{
ROMol *mol;
SparseIntVect<boost::uint32_t> *fp;
MorganFingerprints::BitInfoMap bitInfo;
SparseIntVect<boost::uint32_t>::StorageType nze;
mol = SmilesToMol("CCCCC");
fp = MorganFingerprints::getFingerprint(*mol, 0, nullptr, nullptr, false,
true, true, false, &bitInfo);
nze = fp->getNonzeroElements();
TEST_ASSERT(nze.size() == 2);
TEST_ASSERT(bitInfo.size() == 2);
for (SparseIntVect<boost::uint32_t>::StorageType::const_iterator iter =
nze.begin();
iter != nze.end(); ++iter) {
TEST_ASSERT(iter->second == rdcast<int>(bitInfo[iter->first].size()));
}
for (MorganFingerprints::BitInfoMap::const_iterator iter = bitInfo.begin();
iter != bitInfo.end(); ++iter) {
TEST_ASSERT(iter->second.begin()->second == 0);
}
delete fp;
bitInfo.clear();
fp = MorganFingerprints::getFingerprint(*mol, 1, nullptr, nullptr, false,
true, true, false, &bitInfo);
TEST_ASSERT(fp->getNonzeroElements().size() == 5);
for (SparseIntVect<boost::uint32_t>::StorageType::const_iterator iter =
nze.begin();
iter != nze.end(); ++iter) {
TEST_ASSERT(iter->second == rdcast<int>(bitInfo[iter->first].size()));
}
for (MorganFingerprints::BitInfoMap::const_iterator iter = bitInfo.begin();
iter != bitInfo.end(); ++iter) {
TEST_ASSERT(iter->second.begin()->second == 0 ||
iter->second.begin()->second == 1);
}
delete fp;
delete mol;
}
{
ROMol *mol;
ExplicitBitVect *fp;
MorganFingerprints::BitInfoMap bitInfo;
mol = SmilesToMol("CCCCC");
fp = MorganFingerprints::getFingerprintAsBitVect(
*mol, 0, 2048, nullptr, nullptr, false, true, false, &bitInfo);
TEST_ASSERT(fp->getNumOnBits() == 2);
TEST_ASSERT(bitInfo.size() == 2);
for (MorganFingerprints::BitInfoMap::const_iterator iter = bitInfo.begin();
iter != bitInfo.end(); ++iter) {
TEST_ASSERT(iter->first < 2048);
TEST_ASSERT(fp->getBit(iter->first));
}
for (MorganFingerprints::BitInfoMap::const_iterator iter = bitInfo.begin();
iter != bitInfo.end(); ++iter) {
TEST_ASSERT(iter->second.begin()->second == 0);
}
delete fp;
bitInfo.clear();
fp = MorganFingerprints::getFingerprintAsBitVect(
*mol, 1, 2048, nullptr, nullptr, false, true, false, &bitInfo);
TEST_ASSERT(fp->getNumOnBits() == 5);
TEST_ASSERT(bitInfo.size() == 5);
for (MorganFingerprints::BitInfoMap::const_iterator iter = bitInfo.begin();
iter != bitInfo.end(); ++iter) {
TEST_ASSERT(iter->first < 2048);
TEST_ASSERT(fp->getBit(iter->first));
}
for (MorganFingerprints::BitInfoMap::const_iterator iter = bitInfo.begin();
iter != bitInfo.end(); ++iter) {
TEST_ASSERT(iter->second.begin()->second == 0 ||
iter->second.begin()->second == 1);
}
delete fp;
delete mol;
}
{ // this was github issue #295
ROMol *mol;
MorganFingerprints::BitInfoMap bitInfo1, bitInfo2;
mol = SmilesToMol("CCCCC");
ExplicitBitVect *fp;
fp = MorganFingerprints::getFingerprintAsBitVect(
*mol, 2, 2048, nullptr, nullptr, false, true, false, &bitInfo1);
delete fp;
SparseIntVect<boost::uint32_t> *iv;
iv = MorganFingerprints::getHashedFingerprint(
*mol, 2, 2048, nullptr, nullptr, false, true, false, &bitInfo2);
delete iv;
TEST_ASSERT(bitInfo1.size() == bitInfo2.size());
for (MorganFingerprints::BitInfoMap::const_iterator iter1 =
bitInfo1.begin();
iter1 != bitInfo1.end(); ++iter1) {
TEST_ASSERT(iter1->first < 2048);
TEST_ASSERT(bitInfo2.find(iter1->first) != bitInfo2.end());
}
delete mol;
}
BOOST_LOG(rdErrorLog) << " done" << std::endl;
}
void testRDKitFPOptions() {
BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
BOOST_LOG(rdInfoLog) << "testing RDKit fingerprint options" << std::endl;
{
std::string smi = "C1=CC=CC=C1";
RWMol *m1 = SmilesToMol(smi);
TEST_ASSERT(m1);
smi = "C1=CC=CC=N1";
RWMol *m2 = SmilesToMol(smi);
TEST_ASSERT(m2);
ExplicitBitVect *fp1 = RDKFingerprintMol(*m1);
ExplicitBitVect *fp2 = RDKFingerprintMol(*m2);
TEST_ASSERT(*fp1 != *fp2);
delete m1;
delete m2;
delete fp1;
delete fp2;
}
{
UINT_VECT invars(6, 1);
std::string smi = "C1=CC=CC=C1";
RWMol *m1 = SmilesToMol(smi);
TEST_ASSERT(m1);
smi = "C1=CC=CC=N1";
RWMol *m2 = SmilesToMol(smi);
TEST_ASSERT(m2);
ExplicitBitVect *fp1 = RDKFingerprintMol(*m1, 1, 7, 2048, 2, true, 0, 128,
true, true, &invars);
ExplicitBitVect *fp2 = RDKFingerprintMol(*m2, 1, 7, 2048, 2, true, 0, 128,
true, true, &invars);
TEST_ASSERT(*fp1 == *fp2);
delete m1;
delete m2;
delete fp1;
delete fp2;
}
{
UINT_VECT invars(6, 1);
std::string smi = "C1=CC=CC=C1";
RWMol *m1 = SmilesToMol(smi);
TEST_ASSERT(m1);
smi = "C1CCCCN1";
RWMol *m2 = SmilesToMol(smi);
TEST_ASSERT(m2);
ExplicitBitVect *fp1 = RDKFingerprintMol(*m1, 1, 7, 2048, 2, true, 0, 128,
true, false, &invars);
ExplicitBitVect *fp2 = RDKFingerprintMol(*m2, 1, 7, 2048, 2, true, 0, 128,
true, false, &invars);
TEST_ASSERT(*fp1 == *fp2);
delete m1;
delete m2;
delete fp1;
delete fp2;
}
BOOST_LOG(rdInfoLog) << "done" << std::endl;
}
void testPairsAndTorsionsOptions() {
BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
BOOST_LOG(rdInfoLog) << "testing atom pair and torsions options" << std::endl;
{
std::string smi = "C1=CC=CC=C1";
RWMol *m1 = SmilesToMol(smi);
TEST_ASSERT(m1);
smi = "C1=CC=CC=N1";
RWMol *m2 = SmilesToMol(smi);
TEST_ASSERT(m2);
SparseIntVect<boost::int32_t> *fp1 = AtomPairs::getAtomPairFingerprint(*m1);
SparseIntVect<boost::int32_t> *fp2 = AtomPairs::getAtomPairFingerprint(*m2);
TEST_ASSERT(*fp1 != *fp2);
delete fp1;
delete fp2;
UINT_VECT invars(6, 1);
fp1 = AtomPairs::getAtomPairFingerprint(
*m1, (const std::vector<boost::uint32_t> *)nullptr,
(const std::vector<boost::uint32_t> *)nullptr, &invars);
fp2 = AtomPairs::getAtomPairFingerprint(
*m1, (const std::vector<boost::uint32_t> *)nullptr,
(const std::vector<boost::uint32_t> *)nullptr, &invars);
TEST_ASSERT(*fp1 == *fp2);
delete m1;
delete m2;
delete fp1;
delete fp2;
}
{
std::string smi = "C1=CC=CC=C1";
RWMol *m1 = SmilesToMol(smi);
TEST_ASSERT(m1);
smi = "C1=CC=CC=N1";
RWMol *m2 = SmilesToMol(smi);
TEST_ASSERT(m2);
SparseIntVect<boost::int32_t> *fp1 =
AtomPairs::getHashedAtomPairFingerprint(*m1, 1024, 1, 5);
SparseIntVect<boost::int32_t> *fp2 =
AtomPairs::getHashedAtomPairFingerprint(*m2, 1024, 1, 5);
TEST_ASSERT(*fp1 != *fp2);
delete fp1;
delete fp2;
UINT_VECT invars(6, 1);
fp1 = AtomPairs::getHashedAtomPairFingerprint(
*m1, 1024, 1, 5, (const std::vector<boost::uint32_t> *)nullptr,
(const std::vector<boost::uint32_t> *)nullptr, &invars);
fp2 = AtomPairs::getHashedAtomPairFingerprint(
*m2, 1024, 1, 5, (const std::vector<boost::uint32_t> *)nullptr,
(const std::vector<boost::uint32_t> *)nullptr, &invars);
TEST_ASSERT(*fp1 == *fp2);
delete m1;
delete m2;
delete fp1;
delete fp2;
}
{
std::string smi = "C1=CC=CC=C1";
RWMol *m1 = SmilesToMol(smi);
TEST_ASSERT(m1);
smi = "C1=CC=CC=N1";
RWMol *m2 = SmilesToMol(smi);
TEST_ASSERT(m2);
ExplicitBitVect *fp1 =
AtomPairs::getHashedAtomPairFingerprintAsBitVect(*m1, 1024, 1, 5);
ExplicitBitVect *fp2 =
AtomPairs::getHashedAtomPairFingerprintAsBitVect(*m2, 1024, 1, 5);
TEST_ASSERT(*fp1 != *fp2);
delete fp1;
delete fp2;
UINT_VECT invars(6, 1);
fp1 = AtomPairs::getHashedAtomPairFingerprintAsBitVect(
*m1, 1024, 1, 5, (const std::vector<boost::uint32_t> *)nullptr,
(const std::vector<boost::uint32_t> *)nullptr, &invars);
fp2 = AtomPairs::getHashedAtomPairFingerprintAsBitVect(
*m2, 1024, 1, 5, (const std::vector<boost::uint32_t> *)nullptr,
(const std::vector<boost::uint32_t> *)nullptr, &invars);
TEST_ASSERT(*fp1 == *fp2);
delete m1;
delete m2;
delete fp1;
delete fp2;
}
{
std::string smi = "C1=CC=CC=C1";
RWMol *m1 = SmilesToMol(smi);
TEST_ASSERT(m1);
smi = "C1=CC=CC=N1";
RWMol *m2 = SmilesToMol(smi);
TEST_ASSERT(m2);
SparseIntVect<boost::int64_t> *fp1 =
AtomPairs::getTopologicalTorsionFingerprint(*m1);
SparseIntVect<boost::int64_t> *fp2 =
AtomPairs::getTopologicalTorsionFingerprint(*m2);
TEST_ASSERT(*fp1 != *fp2);
delete fp1;
delete fp2;
UINT_VECT invars(6, 1);
fp1 = AtomPairs::getTopologicalTorsionFingerprint(
*m1, 4, (const std::vector<boost::uint32_t> *)nullptr,
(const std::vector<boost::uint32_t> *)nullptr, &invars);
fp2 = AtomPairs::getTopologicalTorsionFingerprint(
*m2, 4, (const std::vector<boost::uint32_t> *)nullptr,
(const std::vector<boost::uint32_t> *)nullptr, &invars);
TEST_ASSERT(*fp1 == *fp2);
delete m1;
delete m2;
delete fp1;
delete fp2;
}
{
std::string smi = "C1=CC=CC=C1";
RWMol *m1 = SmilesToMol(smi);
TEST_ASSERT(m1);
smi = "C1=CC=CC=N1";
RWMol *m2 = SmilesToMol(smi);
TEST_ASSERT(m2);
SparseIntVect<boost::int64_t> *fp1 =
AtomPairs::getHashedTopologicalTorsionFingerprint(*m1);
SparseIntVect<boost::int64_t> *fp2 =
AtomPairs::getHashedTopologicalTorsionFingerprint(*m2);
TEST_ASSERT(*fp1 != *fp2);
delete fp1;
delete fp2;
UINT_VECT invars(6, 1);
fp1 = AtomPairs::getHashedTopologicalTorsionFingerprint(
*m1, 1024, 4, (const std::vector<boost::uint32_t> *)nullptr,
(const std::vector<boost::uint32_t> *)nullptr, &invars);
fp2 = AtomPairs::getHashedTopologicalTorsionFingerprint(
*m2, 1024, 4, (const std::vector<boost::uint32_t> *)nullptr,
(const std::vector<boost::uint32_t> *)nullptr, &invars);
TEST_ASSERT(*fp1 == *fp2);
delete m1;
delete m2;
delete fp1;
delete fp2;
}
{
std::string smi = "C1=CC=CC=C1";
RWMol *m1 = SmilesToMol(smi);
TEST_ASSERT(m1);
smi = "C1=CC=CC=N1";
RWMol *m2 = SmilesToMol(smi);
TEST_ASSERT(m2);
ExplicitBitVect *fp1 =
AtomPairs::getHashedTopologicalTorsionFingerprintAsBitVect(*m1);
ExplicitBitVect *fp2 =
AtomPairs::getHashedTopologicalTorsionFingerprintAsBitVect(*m2);
TEST_ASSERT(*fp1 != *fp2);
delete fp1;
delete fp2;
UINT_VECT invars(6, 1);
fp1 = AtomPairs::getHashedTopologicalTorsionFingerprintAsBitVect(
*m1, 1024, 4, (const std::vector<boost::uint32_t> *)nullptr,
(const std::vector<boost::uint32_t> *)nullptr, &invars);
fp2 = AtomPairs::getHashedTopologicalTorsionFingerprintAsBitVect(
*m2, 1024, 4, (const std::vector<boost::uint32_t> *)nullptr,
(const std::vector<boost::uint32_t> *)nullptr, &invars);
TEST_ASSERT(*fp1 == *fp2);
delete m1;
delete m2;
delete fp1;
delete fp2;
}
BOOST_LOG(rdInfoLog) << "done" << std::endl;
}
void testRDKitFromAtoms() {
BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
BOOST_LOG(rdInfoLog)
<< "testing RDKit fingerprints rooted at particular atoms" << std::endl;
{
std::string smi = "CCCCCC";
RWMol *m1 = SmilesToMol(smi);
TEST_ASSERT(m1);
std::vector<boost::uint32_t> fromAtoms;
fromAtoms.push_back(0);
ExplicitBitVect *fp1 = RDKFingerprintMol(*m1, 1, 4, 2048, 1, true, 0, 128,
true, true, nullptr, &fromAtoms);
TEST_ASSERT(fp1->getNumOnBits() == 4);
delete m1;
delete fp1;
}
{
std::string smi = "CCCCCO";
RWMol *m1 = SmilesToMol(smi);
TEST_ASSERT(m1);
std::vector<boost::uint32_t> fromAtoms;
fromAtoms.push_back(0);
fromAtoms.push_back(5);
ExplicitBitVect *fp1 = RDKFingerprintMol(*m1, 1, 4, 2048, 1, true, 0, 128,
true, true, nullptr, &fromAtoms);
TEST_ASSERT(fp1->getNumOnBits() == 8);
delete m1;
delete fp1;
}
{
std::string smi = "CCCCCO";
RWMol *m1 = SmilesToMol(smi);
TEST_ASSERT(m1);
std::vector<boost::uint32_t> fromAtoms;
fromAtoms.push_back(0);
fromAtoms.push_back(5);
ExplicitBitVect *fp1 = RDKFingerprintMol(*m1, 1, 4, 2048, 1, true, 0, 128,
false, true, nullptr, &fromAtoms);
TEST_ASSERT(fp1->getNumOnBits() == 8);
delete m1;
delete fp1;
}
{
std::string smi = "CCCCCO";
RWMol *m1 = SmilesToMol(smi);
TEST_ASSERT(m1);
std::vector<boost::uint32_t> fromAtoms;
fromAtoms.push_back(0);
ExplicitBitVect *fp1 = LayeredFingerprintMol(
*m1, 0xFFFFFFFF, 1, 4, 2048, nullptr, nullptr, true, &fromAtoms);
TEST_ASSERT(fp1->getNumOnBits() == 20);
delete m1;
delete fp1;
}
{
std::string smi = "CCCCCO";
RWMol *m1 = SmilesToMol(smi);
TEST_ASSERT(m1);
std::vector<boost::uint32_t> fromAtoms;
fromAtoms.push_back(0);
fromAtoms.push_back(5);
ExplicitBitVect *fp1 = LayeredFingerprintMol(
*m1, 0xFFFFFFFF, 1, 4, 2048, nullptr, nullptr, true, &fromAtoms);
TEST_ASSERT(fp1->getNumOnBits() == 24);
delete m1;
delete fp1;
}
{
std::string smi = "CCCCCO";
RWMol *m1 = SmilesToMol(smi);
TEST_ASSERT(m1);
std::vector<boost::uint32_t> fromAtoms;
fromAtoms.push_back(0);
fromAtoms.push_back(5);
ExplicitBitVect *fp1 = LayeredFingerprintMol(
*m1, 0xFFFFFFFF, 1, 4, 2048, nullptr, nullptr, false, &fromAtoms);
TEST_ASSERT(fp1->getNumOnBits() == 24);
delete m1;
delete fp1;
}
BOOST_LOG(rdInfoLog) << "done" << std::endl;
}
void testMACCS() {
BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
BOOST_LOG(rdInfoLog) << "testing MACCS key calculation" << std::endl;
{
std::string smi = "CNO";
RWMol *m1 = SmilesToMol(smi);
TEST_ASSERT(m1);
ExplicitBitVect *fp1 = MACCSFingerprints::getFingerprintAsBitVect(*m1);
TEST_ASSERT(fp1->getNumOnBits() == 15);
unsigned int _onBits[] = {24, 68, 69, 71, 93, 94, 102, 124,
131, 139, 151, 158, 160, 161, 164};
std::vector<unsigned int> onBits(
_onBits, _onBits + sizeof(_onBits) / sizeof(*_onBits));
BOOST_FOREACH (unsigned int ob, onBits) { TEST_ASSERT((*fp1)[ob]); }
delete m1;
delete fp1;
}
{
std::string smi = "CCC";
RWMol *m1 = SmilesToMol(smi);
TEST_ASSERT(m1);
ExplicitBitVect *fp1 = MACCSFingerprints::getFingerprintAsBitVect(*m1);
TEST_ASSERT(fp1->getNumOnBits() == 5);
unsigned int _onBits[] = {74, 114, 149, 155, 160};
std::vector<unsigned int> onBits(
_onBits, _onBits + sizeof(_onBits) / sizeof(*_onBits));
BOOST_FOREACH (unsigned int ob, onBits) { TEST_ASSERT((*fp1)[ob]); }
delete m1;
delete fp1;
}
{
std::string smi = "CC.CO";
RWMol *m1 = SmilesToMol(smi);
TEST_ASSERT(m1);
ExplicitBitVect *fp1 = MACCSFingerprints::getFingerprintAsBitVect(*m1);
TEST_ASSERT(fp1->getNumOnBits() == 8);
unsigned int _onBits[] = {93, 139, 141, 149, 157, 160, 164, 166};
std::vector<unsigned int> onBits(
_onBits, _onBits + sizeof(_onBits) / sizeof(*_onBits));
BOOST_FOREACH (unsigned int ob, onBits) { TEST_ASSERT((*fp1)[ob]); }
delete m1;
delete fp1;
}
{
// check that bit 44 "OTHER" gets properly set:
std::string smi = "CC[SeH]";
RWMol *m1 = SmilesToMol(smi);
TEST_ASSERT(m1);
ExplicitBitVect *fp1 = MACCSFingerprints::getFingerprintAsBitVect(*m1);
TEST_ASSERT((*fp1)[44]);
delete m1;
delete fp1;
}
BOOST_LOG(rdInfoLog) << "done" << std::endl;
}
void testRDKitAtomBits() {
BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
BOOST_LOG(rdInfoLog) << "testing RDKit fingerprints reporting atomBits"
<< std::endl;
{
std::string smi = "CCCCCC";
RWMol *m1 = SmilesToMol(smi);
TEST_ASSERT(m1);
std::vector<std::vector<boost::uint32_t>> atomBits(m1->getNumAtoms());
ExplicitBitVect *fp1 =
RDKFingerprintMol(*m1, 1, 4, 2048, 1, true, 0, 128, true, true, nullptr,
nullptr, &atomBits);
TEST_ASSERT(fp1->getNumOnBits() == 4);
for (unsigned int i = 0; i < m1->getNumAtoms(); ++i) {
TEST_ASSERT(atomBits[i].size() == 4);
}
delete m1;
delete fp1;
}
{
std::string smi = "CCCO";
RWMol *m1 = SmilesToMol(smi);
TEST_ASSERT(m1);
std::vector<std::vector<boost::uint32_t>> atomBits(m1->getNumAtoms());
ExplicitBitVect *fp1 =
RDKFingerprintMol(*m1, 1, 2, 2048, 1, true, 0, 128, true, true, nullptr,
nullptr, &atomBits);
TEST_ASSERT(fp1->getNumOnBits() == 4);
TEST_ASSERT(atomBits[0].size() == 2);
TEST_ASSERT(atomBits[1].size() == 3);
TEST_ASSERT(atomBits[2].size() == 4);
TEST_ASSERT(atomBits[3].size() == 2);
delete m1;
delete fp1;
}
BOOST_LOG(rdInfoLog) << "done" << std::endl;
}
void testChiralPairs() {
BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
BOOST_LOG(rdErrorLog) << " Test Atom Pairs including info about chirality."
<< std::endl;
ROMol *m1, *m2, *m3;
m1 = SmilesToMol("CC[CH](F)Cl");
TEST_ASSERT(m1);
m2 = SmilesToMol("CC[C@H](F)Cl");
TEST_ASSERT(m1);
m3 = SmilesToMol("CC[C@@H](F)Cl");
TEST_ASSERT(m1);
{
SparseIntVect<int> *fp1, *fp2, *fp3;
fp1 = AtomPairs::getAtomPairFingerprint(*m1, 1, 5);
TEST_ASSERT(fp1->getTotalVal() == 10);
TEST_ASSERT(fp1->getNonzeroElements().size() == 10);
fp2 = AtomPairs::getAtomPairFingerprint(*m2, 1, 5);
TEST_ASSERT(fp2->getTotalVal() == 10);
TEST_ASSERT(fp2->getNonzeroElements().size() == 10);
fp3 = AtomPairs::getAtomPairFingerprint(*m3, 1, 5);
TEST_ASSERT(fp3->getTotalVal() == 10);
TEST_ASSERT(fp3->getNonzeroElements().size() == 10);
TEST_ASSERT((*fp1) == (*fp2));
TEST_ASSERT((*fp1) == (*fp3));
TEST_ASSERT((*fp2) == (*fp3));
delete fp1;
delete fp2;
delete fp3;
fp1 = AtomPairs::getAtomPairFingerprint(*m1, 1, 5, nullptr, nullptr,
nullptr, true);
TEST_ASSERT(fp1->getTotalVal() == 10);
TEST_ASSERT(fp1->getNonzeroElements().size() == 10);
fp2 = AtomPairs::getAtomPairFingerprint(*m2, 1, 5, nullptr, nullptr,
nullptr, true);
TEST_ASSERT(fp2->getTotalVal() == 10);
TEST_ASSERT(fp2->getNonzeroElements().size() == 10);
fp3 = AtomPairs::getAtomPairFingerprint(*m3, 1, 5, nullptr, nullptr,
nullptr, true);
TEST_ASSERT(fp3->getTotalVal() == 10);
TEST_ASSERT(fp3->getNonzeroElements().size() == 10);
TEST_ASSERT((*fp1) != (*fp2));
TEST_ASSERT((*fp1) != (*fp3));
TEST_ASSERT((*fp2) != (*fp3));
delete fp1;
delete fp2;
delete fp3;
}
{
SparseIntVect<int> *fp1, *fp2, *fp3;
fp1 = AtomPairs::getHashedAtomPairFingerprint(*m1, 4096, 1, 5);
TEST_ASSERT(fp1->getTotalVal() == 10);
TEST_ASSERT(fp1->getNonzeroElements().size() == 10);
fp2 = AtomPairs::getHashedAtomPairFingerprint(*m2, 4096, 1, 5);
TEST_ASSERT(fp2->getTotalVal() == 10);
TEST_ASSERT(fp2->getNonzeroElements().size() == 10);
fp3 = AtomPairs::getHashedAtomPairFingerprint(*m3, 4096, 1, 5);
TEST_ASSERT(fp3->getTotalVal() == 10);
TEST_ASSERT(fp3->getNonzeroElements().size() == 10);
TEST_ASSERT((*fp1) == (*fp2));
TEST_ASSERT((*fp1) == (*fp3));
TEST_ASSERT((*fp2) == (*fp3));
delete fp1;
delete fp2;
delete fp3;
fp1 = AtomPairs::getHashedAtomPairFingerprint(*m1, 4096, 1, 5, nullptr,
nullptr, nullptr, true);
TEST_ASSERT(fp1->getTotalVal() == 10);
TEST_ASSERT(fp1->getNonzeroElements().size() == 10);
fp2 = AtomPairs::getHashedAtomPairFingerprint(*m2, 4096, 1, 5, nullptr,
nullptr, nullptr, true);
TEST_ASSERT(fp2->getTotalVal() == 10);
TEST_ASSERT(fp2->getNonzeroElements().size() == 10);
fp3 = AtomPairs::getHashedAtomPairFingerprint(*m3, 4096, 1, 5, nullptr,
nullptr, nullptr, true);
TEST_ASSERT(fp3->getTotalVal() == 10);
TEST_ASSERT(fp3->getNonzeroElements().size() == 10);
TEST_ASSERT((*fp1) != (*fp2));
TEST_ASSERT((*fp1) != (*fp3));
TEST_ASSERT((*fp2) != (*fp3));
delete fp1;
delete fp2;
delete fp3;
}
delete m1;
delete m2;
delete m3;
BOOST_LOG(rdErrorLog) << " done" << std::endl;
}
void testChiralTorsions() {
BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
BOOST_LOG(rdErrorLog)
<< " Test Topological Torsions including info about chirality."
<< std::endl;
ROMol *m1, *m2, *m3;
m1 = SmilesToMol("CC[CH](F)Cl");
TEST_ASSERT(m1);
m2 = SmilesToMol("CC[C@H](F)Cl");
TEST_ASSERT(m1);
m3 = SmilesToMol("CC[C@@H](F)Cl");
TEST_ASSERT(m1);
{
SparseIntVect<boost::int64_t> *fp1, *fp2, *fp3;
fp1 = AtomPairs::getTopologicalTorsionFingerprint(*m1);
TEST_ASSERT(fp1->getTotalVal() == 2);
TEST_ASSERT(fp1->getNonzeroElements().size() == 2);
fp2 = AtomPairs::getTopologicalTorsionFingerprint(*m2);
TEST_ASSERT(fp2->getTotalVal() == 2);
TEST_ASSERT(fp2->getNonzeroElements().size() == 2);
fp3 = AtomPairs::getTopologicalTorsionFingerprint(*m3);
TEST_ASSERT(fp3->getTotalVal() == 2);
TEST_ASSERT(fp3->getNonzeroElements().size() == 2);
TEST_ASSERT((*fp1) == (*fp2));
TEST_ASSERT((*fp1) == (*fp3));
TEST_ASSERT((*fp2) == (*fp3));
delete fp1;
delete fp2;
delete fp3;
fp1 = AtomPairs::getTopologicalTorsionFingerprint(*m1, 4, nullptr, nullptr,
nullptr, true);
TEST_ASSERT(fp1->getTotalVal() == 2);
TEST_ASSERT(fp1->getNonzeroElements().size() == 2);
fp2 = AtomPairs::getTopologicalTorsionFingerprint(*m2, 4, nullptr, nullptr,
nullptr, true);
TEST_ASSERT(fp2->getTotalVal() == 2);
TEST_ASSERT(fp2->getNonzeroElements().size() == 2);
fp3 = AtomPairs::getTopologicalTorsionFingerprint(*m3, 4, nullptr, nullptr,
nullptr, true);
TEST_ASSERT(fp3->getTotalVal() == 2);
TEST_ASSERT(fp3->getNonzeroElements().size() == 2);
TEST_ASSERT((*fp1) != (*fp2));
TEST_ASSERT((*fp1) != (*fp3));
TEST_ASSERT((*fp2) != (*fp3));
delete fp1;
delete fp2;
delete fp3;
}
{
SparseIntVect<boost::int64_t> *fp1, *fp2, *fp3;
fp1 = AtomPairs::getHashedTopologicalTorsionFingerprint(*m1, 4096);
TEST_ASSERT(fp1->getTotalVal() == 2);
TEST_ASSERT(fp1->getNonzeroElements().size() == 2);
fp2 = AtomPairs::getHashedTopologicalTorsionFingerprint(*m2, 4096);
TEST_ASSERT(fp2->getTotalVal() == 2);
TEST_ASSERT(fp2->getNonzeroElements().size() == 2);
fp3 = AtomPairs::getHashedTopologicalTorsionFingerprint(*m3, 4096);
TEST_ASSERT(fp3->getTotalVal() == 2);
TEST_ASSERT(fp3->getNonzeroElements().size() == 2);
TEST_ASSERT((*fp1) == (*fp2));
TEST_ASSERT((*fp1) == (*fp3));
TEST_ASSERT((*fp2) == (*fp3));
delete fp1;
delete fp2;
delete fp3;
fp1 = AtomPairs::getHashedTopologicalTorsionFingerprint(
*m1, 4096, 4, nullptr, nullptr, nullptr, true);
TEST_ASSERT(fp1->getTotalVal() == 2);
TEST_ASSERT(fp1->getNonzeroElements().size() == 2);
fp2 = AtomPairs::getHashedTopologicalTorsionFingerprint(
*m2, 4096, 4, nullptr, nullptr, nullptr, true);
TEST_ASSERT(fp2->getTotalVal() == 2);
TEST_ASSERT(fp2->getNonzeroElements().size() == 2);
fp3 = AtomPairs::getHashedTopologicalTorsionFingerprint(
*m3, 4096, 4, nullptr, nullptr, nullptr, true);
TEST_ASSERT(fp3->getTotalVal() == 2);
TEST_ASSERT(fp3->getNonzeroElements().size() == 2);
TEST_ASSERT((*fp1) != (*fp2));
TEST_ASSERT((*fp1) != (*fp3));
TEST_ASSERT((*fp2) != (*fp3));
delete fp1;
delete fp2;
delete fp3;
}
delete m1;
delete m2;
delete m3;
BOOST_LOG(rdErrorLog) << " done" << std::endl;
}
void testGitHubIssue25() {
BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
BOOST_LOG(rdErrorLog)
<< " Test GitHub Issue 25: fingerprint backwards compatibility."
<< std::endl;
{
ROMol *m1 = SmilesToMol("CCCCO");
TEST_ASSERT(m1);
SparseIntVect<boost::int64_t> *fp1;
fp1 = AtomPairs::getTopologicalTorsionFingerprint(*m1);
TEST_ASSERT(fp1);
TEST_ASSERT(fp1->getTotalVal() == 2);
TEST_ASSERT(fp1->getNonzeroElements().size() == 2);
TEST_ASSERT((*fp1)[4437590048LL] == 1);
TEST_ASSERT((*fp1)[12893306913LL] == 1);
delete fp1;
delete m1;
}
{
ROMol *m1 = SmilesToMol("CCCCO");
TEST_ASSERT(m1);
SparseIntVect<boost::int64_t> *fp1;
fp1 = AtomPairs::getHashedTopologicalTorsionFingerprint(*m1, 1000);
TEST_ASSERT(fp1);
TEST_ASSERT(fp1->getTotalVal() == 2);
TEST_ASSERT(fp1->getNonzeroElements().size() == 2);
TEST_ASSERT((*fp1)[24] == 1);
TEST_ASSERT((*fp1)[288] == 1);
delete fp1;
delete m1;
}
{
ROMol *m1 = SmilesToMol("CCO");
TEST_ASSERT(m1);
SparseIntVect<boost::int32_t> *fp1;
fp1 = AtomPairs::getAtomPairFingerprint(*m1);
TEST_ASSERT(fp1);
TEST_ASSERT(fp1->getTotalVal() == 3);
TEST_ASSERT(fp1->getNonzeroElements().size() == 3);
TEST_ASSERT((*fp1)[558113] == 1);
TEST_ASSERT((*fp1)[1590306] == 1);
TEST_ASSERT((*fp1)[1590337] == 1);
delete fp1;
delete m1;
}
{
ROMol *m1 = SmilesToMol("CCO");
TEST_ASSERT(m1);
SparseIntVect<boost::int32_t> *fp1;
fp1 = AtomPairs::getHashedAtomPairFingerprint(*m1);
TEST_ASSERT(fp1);
TEST_ASSERT(fp1->getTotalVal() == 3);
TEST_ASSERT(fp1->getNonzeroElements().size() == 3);
TEST_ASSERT((*fp1)[1375] == 1);
TEST_ASSERT((*fp1)[1423] == 1);
TEST_ASSERT((*fp1)[1503] == 1);
delete fp1;
delete m1;
}
BOOST_LOG(rdErrorLog) << " done" << std::endl;
}
void testGitHubIssue151() {
BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
BOOST_LOG(rdErrorLog)
<< " Test GitHub Issue 151: PatternFingerprint problems" << std::endl;
{
ROMol *qm = SmilesToMol("CCCn1c2ccccc2c2ccccc21");
TEST_ASSERT(qm);
ROMol *m =
SmilesToMol("O=C1NCc2c1c1c3cc(Br)ccc3n3c1c1c2c2ccccc2n1CC(CO)C3");
TEST_ASSERT(m);
ExplicitBitVect *qbv = PatternFingerprintMol(*qm, 2048);
ExplicitBitVect *mbv = PatternFingerprintMol(*m, 2048);
MatchVectType mv;
TEST_ASSERT(SubstructMatch(*m, *qm, mv));
TEST_ASSERT(AllProbeBitsMatch(*qbv, *mbv));
delete qm;
delete m;
}
{
ROMol *qm = SmilesToMol("c1ccc2c(c1)[nH]c1ccccc12");
TEST_ASSERT(qm);
ROMol *m =
SmilesToMol("O=C1NCc2c1c1c3cc(Br)ccc3n3c1c1c2c2ccccc2n1CC(CO)C3");
TEST_ASSERT(m);
ExplicitBitVect *qbv = PatternFingerprintMol(*qm, 2048);
ExplicitBitVect *mbv = PatternFingerprintMol(*m, 2048);
MatchVectType mv;
TEST_ASSERT(SubstructMatch(*m, *qm, mv));
TEST_ASSERT(AllProbeBitsMatch(*qbv, *mbv));
delete qm;
delete m;
}
{
ROMol *qm = SmilesToMol("CC(C)c1nnc(N)[nH]1");
TEST_ASSERT(qm);
ROMol *m = SmilesToMol("CC(C)c1nc2NCCCn2n1");
TEST_ASSERT(m);
ExplicitBitVect *qbv = PatternFingerprintMol(*qm, 2048);
ExplicitBitVect *mbv = PatternFingerprintMol(*m, 2048);
MatchVectType mv;
TEST_ASSERT(SubstructMatch(*m, *qm, mv));
TEST_ASSERT(AllProbeBitsMatch(*qbv, *mbv));
delete qm;
delete m;
}
{
ROMol *qm = SmilesToMol("CC(C)c1nnc(N)[nH]1");
TEST_ASSERT(qm);
ROMol *m = SmilesToMol(
"CN1c2nc(C3CCN(S(=O)(=O)c4ccc(Cl)cc4Cl)CC3)nn2S(=O)(=O)c2ccccc21");
TEST_ASSERT(m);
ExplicitBitVect *qbv = PatternFingerprintMol(*qm, 2048);
ExplicitBitVect *mbv = PatternFingerprintMol(*m, 2048);
MatchVectType mv;
TEST_ASSERT(SubstructMatch(*m, *qm, mv));
TEST_ASSERT(AllProbeBitsMatch(*qbv, *mbv));
delete qm;
delete m;
}
BOOST_LOG(rdErrorLog) << " done" << std::endl;
}
void test3DAtomPairs() {
BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
BOOST_LOG(rdErrorLog) << " Test 3D atom pairs." << std::endl;
std::string fName = getenv("RDBASE");
fName += "/Code/GraphMol/Fingerprints/testData/triangle.sdf";
SDMolSupplier suppl(fName);
{
ROMol *mol = suppl.next();
SparseIntVect<boost::int32_t> *fp;
// do the 3D version
fp = AtomPairs::getHashedAtomPairFingerprint(
*mol, 2048, 1, AtomPairs::maxPathLen - 1, nullptr, nullptr, nullptr,
false, false);
TEST_ASSERT(fp->getTotalVal() == 3);
TEST_ASSERT(fp->getNonzeroElements().size() == 1);
delete fp;
// now do the 2D version
fp = AtomPairs::getHashedAtomPairFingerprint(
*mol, 2048, 1, AtomPairs::maxPathLen - 1, nullptr, nullptr, nullptr,
false, true);
TEST_ASSERT(fp->getTotalVal() == 3);
TEST_ASSERT(fp->getNonzeroElements().size() == 1);
delete fp;
// we should get a conformer exception if there are no conformers:
mol->clearConformers();
bool ok = false;
try {
fp = AtomPairs::getHashedAtomPairFingerprint(
*mol, 2048, 1, AtomPairs::maxPathLen - 1, nullptr, nullptr, nullptr,
false, false);
} catch (ConformerException &e) {
ok = true;
}
TEST_ASSERT(ok);
delete mol;
}
{
ROMol *mol = suppl.next();
SparseIntVect<boost::int32_t> *fp;
// do the 3D version
fp = AtomPairs::getHashedAtomPairFingerprint(
*mol, 2048, 1, AtomPairs::maxPathLen - 1, nullptr, nullptr, nullptr,
false, false);
TEST_ASSERT(fp->getTotalVal() == 3);
TEST_ASSERT(fp->getNonzeroElements().size() == 2);
delete fp;
// now do the 2D version
fp = AtomPairs::getHashedAtomPairFingerprint(
*mol, 2048, 1, AtomPairs::maxPathLen - 1, nullptr, nullptr, nullptr,
false, true);
TEST_ASSERT(fp->getTotalVal() == 3);
TEST_ASSERT(fp->getNonzeroElements().size() == 1);
delete fp;
delete mol;
}
BOOST_LOG(rdErrorLog) << " done" << std::endl;
}
void testGitHubIssue195() {
BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
BOOST_LOG(rdErrorLog) << " Test GitHub Issue 195: GenMACCSKeys() raises "
"an exception with an empty molecule"
<< std::endl;
{
auto *m1 = new ROMol();
ExplicitBitVect *fp1 = MACCSFingerprints::getFingerprintAsBitVect(*m1);
TEST_ASSERT(fp1->getNumOnBits() == 0);
delete m1;
}
BOOST_LOG(rdErrorLog) << " done" << std::endl;
}
void testGitHubIssue258() {
BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
BOOST_LOG(rdErrorLog)
<< " Test GitHub Issue 258: Bad pattern fingerprint for query molecule"
<< std::endl;
{
ROMol *qm = SmartsToMol("n2c(-[#6])ccc2-[#6]", 0, true);
TEST_ASSERT(qm);
ROMol *m = SmilesToMol("Clc1c(cccc1)-n2c(c(cc2C)C=NNC(=O)CSc3ncccn3)C");
TEST_ASSERT(m);
ExplicitBitVect *qbv = PatternFingerprintMol(*qm, 2048);
ExplicitBitVect *mbv = PatternFingerprintMol(*m, 2048);
MatchVectType mv;
TEST_ASSERT(SubstructMatch(*m, *qm, mv));
TEST_ASSERT(AllProbeBitsMatch(*qbv, *mbv));
delete qm;
delete m;
delete qbv;
delete mbv;
}
{
ROMol *qm = SmartsToMol("n2c(cc(c2-[#6]))-[#6]", 0, true);
TEST_ASSERT(qm);
ROMol *m = SmilesToMol("Clc1c(cccc1)-n2c(c(cc2C)C=NNC(=O)CSc3ncccn3)C");
TEST_ASSERT(m);
ExplicitBitVect *qbv = PatternFingerprintMol(*qm, 2048);
ExplicitBitVect *mbv = PatternFingerprintMol(*m, 2048);
MatchVectType mv;
TEST_ASSERT(SubstructMatch(*m, *qm, mv));
TEST_ASSERT(AllProbeBitsMatch(*qbv, *mbv));
delete qm;
delete m;
delete qbv;
delete mbv;
}
{
ROMol *qm = SmartsToMol(
"n2(-[#6]:1:[!#1]:[#6]:[#6]:[#6]:[#6]:1)c(cc(c2-[#6;X4]))-[#6;X4]", 0,
true);
TEST_ASSERT(qm);
ROMol *m = SmilesToMol("n2(-c:1:c:c:c:c:c:1)c(cc(c2-C))-C", 0, true);
TEST_ASSERT(m);
ExplicitBitVect *qbv = PatternFingerprintMol(*qm, 2048);
ExplicitBitVect *mbv = PatternFingerprintMol(*m, 2048);
#if 0
ExplicitBitVect dv=(*qbv)^((*qbv)&(*mbv));
IntVect iv;
dv.getOnBits(iv);
std::cerr<<"\n\n";
std::copy(iv.begin(),iv.end(),std::ostream_iterator<int>(std::cerr,", "));
std::cerr<<std::endl;
#endif
MatchVectType mv;
TEST_ASSERT(SubstructMatch(*m, *qm, mv));
TEST_ASSERT(AllProbeBitsMatch(*qbv, *mbv));
delete qm;
delete m;
delete qbv;
delete mbv;
}
{
ROMol *qm = SmartsToMol(
"n2(-[#6]:1:[!#1]:[#6]:[#6]:[#6]:[#6]:1)c(cc(c2-[#6;X4]))-[#6;X4]", 0,
true);
TEST_ASSERT(qm);
ROMol *m = SmilesToMol("Clc1c(cccc1)-n2c(c(cc2C)C=NNC(=O)CSc3ncccn3)C");
TEST_ASSERT(m);
ExplicitBitVect *qbv = PatternFingerprintMol(*qm, 2048);
ExplicitBitVect *mbv = PatternFingerprintMol(*m, 2048);
MatchVectType mv;
TEST_ASSERT(SubstructMatch(*m, *qm, mv));
TEST_ASSERT(AllProbeBitsMatch(*qbv, *mbv));
delete qm;
delete m;
delete qbv;
delete mbv;
}
{
ROMol *qm = SmartsToMol(
"n2(-[#6]:1:[!#1]:[#6]:[#6]:[#6]:[#6]:1)c(cc(c2-[#6;X4])-[#1])-[#6;X4]",
0, true);
TEST_ASSERT(qm);
ROMol *m = SmilesToMol("Clc1c(cccc1)-n2c(c(cc2C)C=NNC(=O)CSc3ncccn3)C");
TEST_ASSERT(m);
ExplicitBitVect *qbv = PatternFingerprintMol(*qm, 2048);
ExplicitBitVect *mbv = PatternFingerprintMol(*m, 2048);
MatchVectType mv;
TEST_ASSERT(SubstructMatch(*m, *qm, mv));
TEST_ASSERT(AllProbeBitsMatch(*qbv, *mbv));
delete qm;
delete m;
delete qbv;
delete mbv;
}
BOOST_LOG(rdErrorLog) << " done" << std::endl;
}
#ifdef RDK_TEST_MULTITHREADED
namespace {
void runblock(const std::vector<ROMol *> &mols, unsigned int count,
unsigned int idx,
const std::vector<ExplicitBitVect *> &referenceData,
unsigned int nReps) {
for (unsigned int j = 0; j < nReps; j++) {
for (unsigned int i = 0; i < mols.size(); ++i) {
if (i % count != idx) continue;
ROMol *mol = mols[i];
ExplicitBitVect *lbv = PatternFingerprintMol(*mol, 2048);
if (referenceData.size() && referenceData[i])
TEST_ASSERT((*lbv) == (*referenceData[i]));
delete lbv;
}
}
};
} // namespace
#include <thread>
#include <future>
void testMultithreadedPatternFP() {
BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
BOOST_LOG(rdErrorLog) << " Test multithreading with the pattern FP"
<< std::endl;
std::string fName = getenv("RDBASE");
fName += "/Data/NCI/first_200.props.sdf";
SDMolSupplier suppl(fName);
std::cerr << "reading molecules" << std::endl;
std::vector<ROMol *> mols;
std::vector<ExplicitBitVect *> referenceData;
while (!suppl.atEnd() && mols.size() < 100) {
ROMol *mol = nullptr;
try {
mol = suppl.next();
} catch (...) {
continue;
}
if (!mol) continue;
mols.push_back(mol);
}
std::vector<std::future<void>> tg;
std::cerr << "pass 1" << std::endl;
unsigned int count = 4;
for (unsigned int i = 0; i < count; ++i) {
std::cerr << " launch :" << i << std::endl;
std::cerr.flush();
tg.emplace_back(std::async(std::launch::async, runblock, mols, count, i,
referenceData, 10));
}
for (auto &fut : tg) {
fut.get();
}
tg.clear();
BOOST_FOREACH (const ROMol *mol, mols) {
ExplicitBitVect *bv = PatternFingerprintMol(*mol, 2048);
referenceData.push_back(bv);
}
std::cerr << "pass 2" << std::endl;
for (unsigned int i = 0; i < count; ++i) {
std::cerr << " launch :" << i << std::endl;
std::cerr.flush();
tg.emplace_back(std::async(std::launch::async, runblock, mols, count, i,
referenceData, 300));
}
for (auto &fut : tg) {
fut.get();
}
for (unsigned int i = 0; i < mols.size(); ++i) {
delete mols[i];
delete referenceData[i];
}
BOOST_LOG(rdErrorLog) << " done" << std::endl;
}
#else
void testMultithreadedPatternFP() {}
#endif
void testGitHubIssue334() {
BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
BOOST_LOG(rdErrorLog) << " Test GitHub Issue 334: explicit Hs in SMILES "
"modifies atom pair (and topological torsion) FP."
<< std::endl;
{
ROMol *m1 = SmilesToMol("N#C");
TEST_ASSERT(m1);
SparseIntVect<boost::int32_t> *fp1;
fp1 = AtomPairs::getAtomPairFingerprint(*m1);
TEST_ASSERT(fp1);
delete m1;
m1 = SmilesToMol("N#[CH]");
SparseIntVect<boost::int32_t> *fp2;
fp2 = AtomPairs::getAtomPairFingerprint(*m1);
TEST_ASSERT(fp2);
delete m1;
TEST_ASSERT(fp1->getTotalVal() == fp2->getTotalVal());
TEST_ASSERT(fp1->getNonzeroElements().size() ==
fp2->getNonzeroElements().size());
TEST_ASSERT(*fp1 == *fp2);
delete fp1;
delete fp2;
}
{
ROMol *m1 = SmilesToMol("N#C");
TEST_ASSERT(m1);
SparseIntVect<boost::int64_t> *fp1;
fp1 = AtomPairs::getTopologicalTorsionFingerprint(*m1);
TEST_ASSERT(fp1);
delete m1;
m1 = SmilesToMol("N#[CH]");
SparseIntVect<boost::int64_t> *fp2;
fp2 = AtomPairs::getTopologicalTorsionFingerprint(*m1);
TEST_ASSERT(fp2);
delete m1;
TEST_ASSERT(fp1->getTotalVal() == fp2->getTotalVal());
TEST_ASSERT(fp1->getNonzeroElements().size() ==
fp2->getNonzeroElements().size());
TEST_ASSERT(*fp1 == *fp2);
delete fp1;
delete fp2;
}
BOOST_LOG(rdErrorLog) << " done" << std::endl;
}
void testGitHubIssue695() {
BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
BOOST_LOG(rdErrorLog) << " Test GitHub Issue 695: Include cis/trans "
"stereochemistry when useChirality=true with the "
"morgan fingerprints"
<< std::endl;
{
ROMol *m1 = SmilesToMol("CC=CC");
TEST_ASSERT(m1);
SparseIntVect<boost::uint32_t> *fp;
SparseIntVect<boost::uint32_t>::StorageType::const_iterator iter;
fp = MorganFingerprints::getFingerprint(*m1, 1, nullptr, nullptr, false);
TEST_ASSERT(fp);
TEST_ASSERT(fp->getNonzeroElements().size() == 4);
iter = fp->getNonzeroElements().find(736731344);
TEST_ASSERT(iter != fp->getNonzeroElements().end() && iter->second == 2);
iter = fp->getNonzeroElements().find(2246703798);
TEST_ASSERT(iter != fp->getNonzeroElements().end() && iter->second == 2);
iter = fp->getNonzeroElements().find(2246728737);
TEST_ASSERT(iter != fp->getNonzeroElements().end() && iter->second == 2);
iter = fp->getNonzeroElements().find(3545353036);
TEST_ASSERT(iter != fp->getNonzeroElements().end() && iter->second == 2);
delete fp;
fp = MorganFingerprints::getFingerprint(*m1, 1, nullptr, nullptr, true);
TEST_ASSERT(fp);
TEST_ASSERT(fp->getNonzeroElements().size() == 4);
iter = fp->getNonzeroElements().find(736731344);
TEST_ASSERT(iter != fp->getNonzeroElements().end() && iter->second == 2);
iter = fp->getNonzeroElements().find(2246703798);
TEST_ASSERT(iter != fp->getNonzeroElements().end() && iter->second == 2);
iter = fp->getNonzeroElements().find(2246728737);
TEST_ASSERT(iter != fp->getNonzeroElements().end() && iter->second == 2);
iter = fp->getNonzeroElements().find(3545353036);
TEST_ASSERT(iter != fp->getNonzeroElements().end() && iter->second == 2);
delete fp;
delete m1;
}
{
ROMol *m1 = SmilesToMol("C/C=C/C");
TEST_ASSERT(m1);
SparseIntVect<boost::uint32_t> *fp;
SparseIntVect<boost::uint32_t>::StorageType::const_iterator iter;
fp = MorganFingerprints::getFingerprint(*m1, 1, nullptr, nullptr, false);
TEST_ASSERT(fp);
TEST_ASSERT(fp->getNonzeroElements().size() == 4);
iter = fp->getNonzeroElements().find(736731344);
TEST_ASSERT(iter != fp->getNonzeroElements().end() && iter->second == 2);
iter = fp->getNonzeroElements().find(2246703798);
TEST_ASSERT(iter != fp->getNonzeroElements().end() && iter->second == 2);
iter = fp->getNonzeroElements().find(2246728737);
TEST_ASSERT(iter != fp->getNonzeroElements().end() && iter->second == 2);
iter = fp->getNonzeroElements().find(3545353036);
TEST_ASSERT(iter != fp->getNonzeroElements().end() && iter->second == 2);
delete fp;
fp = MorganFingerprints::getFingerprint(*m1, 1, nullptr, nullptr, true);
TEST_ASSERT(fp);
TEST_ASSERT(fp->getNonzeroElements().size() == 4);
#if 0
for (iter = fp->getNonzeroElements().begin();
iter != fp->getNonzeroElements().end(); ++iter) {
std::cerr << " " << iter->first << ": " << iter->second << std::endl;
}
std::cerr << " ------------ " << std::endl;
#endif
iter = fp->getNonzeroElements().find(736735794);
TEST_ASSERT(iter != fp->getNonzeroElements().end() && iter->second == 2);
iter = fp->getNonzeroElements().find(2246703798);
TEST_ASSERT(iter != fp->getNonzeroElements().end() && iter->second == 2);
iter = fp->getNonzeroElements().find(2246728737);
TEST_ASSERT(iter != fp->getNonzeroElements().end() && iter->second == 2);
iter = fp->getNonzeroElements().find(3545353036);
TEST_ASSERT(iter != fp->getNonzeroElements().end() && iter->second == 2);
delete fp;
delete m1;
}
{
ROMol *m1 = SmilesToMol("C/C=C\\C");
TEST_ASSERT(m1);
SparseIntVect<boost::uint32_t> *fp;
SparseIntVect<boost::uint32_t>::StorageType::const_iterator iter;
fp = MorganFingerprints::getFingerprint(*m1, 1, nullptr, nullptr, false);
TEST_ASSERT(fp);
TEST_ASSERT(fp->getNonzeroElements().size() == 4);
iter = fp->getNonzeroElements().find(736731344);
TEST_ASSERT(iter != fp->getNonzeroElements().end() && iter->second == 2);
iter = fp->getNonzeroElements().find(2246703798);
TEST_ASSERT(iter != fp->getNonzeroElements().end() && iter->second == 2);
iter = fp->getNonzeroElements().find(2246728737);
TEST_ASSERT(iter != fp->getNonzeroElements().end() && iter->second == 2);
iter = fp->getNonzeroElements().find(3545353036);
TEST_ASSERT(iter != fp->getNonzeroElements().end() && iter->second == 2);
delete fp;
fp = MorganFingerprints::getFingerprint(*m1, 1, nullptr, nullptr, true);
#if 0
for (iter = fp->getNonzeroElements().begin();
iter != fp->getNonzeroElements().end(); ++iter) {
std::cerr << " " << iter->first << ": " << iter->second << std::endl;
}
std::cerr << " ------------ " << std::endl;
#endif
TEST_ASSERT(fp);
TEST_ASSERT(fp->getNonzeroElements().size() == 4);
iter = fp->getNonzeroElements().find(736735858);
TEST_ASSERT(iter != fp->getNonzeroElements().end() && iter->second == 2);
iter = fp->getNonzeroElements().find(2246703798);
TEST_ASSERT(iter != fp->getNonzeroElements().end() && iter->second == 2);
iter = fp->getNonzeroElements().find(2246728737);
TEST_ASSERT(iter != fp->getNonzeroElements().end() && iter->second == 2);
iter = fp->getNonzeroElements().find(3545353036);
TEST_ASSERT(iter != fp->getNonzeroElements().end() && iter->second == 2);
delete fp;
delete m1;
}
BOOST_LOG(rdErrorLog) << " done" << std::endl;
}
void testGitHubIssue811() {
BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
BOOST_LOG(rdErrorLog) << " Test GitHub Issue 811: rooted atom fingerprint "
"non identical for the same molecules #811"
<< std::endl;
{
std::string dirName = getenv("RDBASE");
dirName += "/Code/GraphMol/Fingerprints/testData/";
ROMol *m1 = MolFileToMol(dirName + "github811a.mol");
TEST_ASSERT(m1);
ROMol *m2 = MolFileToMol(dirName + "github811b.mol");
TEST_ASSERT(m2);
SparseIntVect<boost::int64_t> *fp1, *fp2;
std::vector<boost::uint32_t> roots;
roots.push_back(1);
fp1 = AtomPairs::getTopologicalTorsionFingerprint(*m1, 7, &roots);
SparseIntVect<boost::int64_t>::StorageType nz1 = fp1->getNonzeroElements();
TEST_ASSERT(nz1.size() == 4);
fp2 = AtomPairs::getTopologicalTorsionFingerprint(*m2, 7, &roots);
SparseIntVect<boost::int64_t>::StorageType nz2 = fp2->getNonzeroElements();
TEST_ASSERT(nz2.size() == 4);
TEST_ASSERT(*fp1 == *fp2);
delete fp1;
delete fp2;
delete m1;
delete m2;
}
{
ROMol *m1 = SmilesToMol("C1CC1");
TEST_ASSERT(m1);
SparseIntVect<boost::int64_t> *fp1;
fp1 = AtomPairs::getTopologicalTorsionFingerprint(*m1);
SparseIntVect<boost::int64_t>::StorageType nz1 = fp1->getNonzeroElements();
TEST_ASSERT(nz1.size() == 1);
delete fp1;
delete m1;
}
BOOST_LOG(rdErrorLog) << " done" << std::endl;
}
void testRDKFPUnfolded() {
BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
BOOST_LOG(rdErrorLog) << " Test unfolded version of RDKFP " << std::endl;
{
ROMol *m1 = SmilesToMol("c1ccccc1N");
TEST_ASSERT(m1);
SparseIntVect<boost::uint64_t> *fp1;
SparseIntVect<boost::uint64_t>::StorageType::const_iterator iter;
fp1 = getUnfoldedRDKFingerprintMol(*m1);
TEST_ASSERT(fp1);
#if 0
for (iter = fp1->getNonzeroElements().begin();
iter != fp1->getNonzeroElements().end(); ++iter) {
std::cerr << " " << iter->first << ": " << iter->second << std::endl;
}
std::cerr << " ------------ " << std::endl;
#endif
TEST_ASSERT(fp1->getNonzeroElements().size() == 19);
iter = fp1->getNonzeroElements().find(374073638);
TEST_ASSERT(iter != fp1->getNonzeroElements().end() && iter->second == 6);
iter = fp1->getNonzeroElements().find(464351883);
TEST_ASSERT(iter != fp1->getNonzeroElements().end() && iter->second == 2);
iter = fp1->getNonzeroElements().find(1949583554);
TEST_ASSERT(iter != fp1->getNonzeroElements().end() && iter->second == 6);
iter = fp1->getNonzeroElements().find(4105342207);
TEST_ASSERT(iter != fp1->getNonzeroElements().end() && iter->second == 1);
iter = fp1->getNonzeroElements().find(794080973);
TEST_ASSERT(iter != fp1->getNonzeroElements().end() && iter->second == 1);
iter = fp1->getNonzeroElements().find(3826517238);
TEST_ASSERT(iter != fp1->getNonzeroElements().end() && iter->second == 2);
delete m1;
}
{
ROMol *m1 = SmilesToMol("Cl");
TEST_ASSERT(m1);
SparseIntVect<boost::uint64_t> *fp1;
SparseIntVect<boost::uint64_t>::StorageType::const_iterator iter;
fp1 = getUnfoldedRDKFingerprintMol(*m1);
TEST_ASSERT(fp1);
TEST_ASSERT(fp1->getNonzeroElements().size() == 0);
delete m1;
}
{
ROMol *m1 = SmilesToMol("CCCO");
TEST_ASSERT(m1);
SparseIntVect<boost::uint64_t> *fp1;
SparseIntVect<boost::uint64_t>::StorageType::const_iterator iter;
std::map<boost::uint64_t, std::vector<std::vector<int>>> bitInfo;
std::map<boost::uint64_t, std::vector<std::vector<int>>>::const_iterator
iter2;
fp1 = getUnfoldedRDKFingerprintMol(*m1, 1, 7, true, true, true, nullptr,
nullptr, nullptr, &bitInfo);
TEST_ASSERT(fp1);
#if 0
for (iter = fp1->getNonzeroElements().begin();
iter != fp1->getNonzeroElements().end(); ++iter) {
std::cerr << " " << iter->first << ": " << iter->second << std::endl;
}
std::cerr << " ------------ " << std::endl;
#endif
#if 0
for (iter2 = bitInfo.begin();
iter2 != bitInfo.end(); ++iter2) {
std::cerr << " " << iter2->first << ": ";
for (unsigned i=0; i<iter2->second.size(); i++){
std::cerr << " [ ";
for (unsigned j=0; j<iter2->second.at(i).size(); j++){
std::cerr << iter2->second.at(i).at(j) << " ";
}
std::cerr << "], ";
}
std::cerr << std::endl;
}
std::cerr << " ------------ " << std::endl;
#endif
TEST_ASSERT(fp1->getNonzeroElements().size() == 5);
iter = fp1->getNonzeroElements().find(1524090560);
TEST_ASSERT(iter != fp1->getNonzeroElements().end() && iter->second == 1);
iter = fp1->getNonzeroElements().find(1940446997);
TEST_ASSERT(iter != fp1->getNonzeroElements().end() && iter->second == 1);
iter = fp1->getNonzeroElements().find(3977409745);
TEST_ASSERT(iter != fp1->getNonzeroElements().end() && iter->second == 1);
iter = fp1->getNonzeroElements().find(4274652475);
TEST_ASSERT(iter != fp1->getNonzeroElements().end() && iter->second == 1);
iter = fp1->getNonzeroElements().find(4275705116);
TEST_ASSERT(iter != fp1->getNonzeroElements().end() && iter->second == 2);
iter2 = bitInfo.find(4275705116);
TEST_ASSERT(iter2 != bitInfo.end() && iter2->second[0][0] == 0);
TEST_ASSERT(iter2 != bitInfo.end() && iter2->second.size() == 2 &&
iter2->second[1][0] == 1);
iter2 = bitInfo.find(4274652475);
TEST_ASSERT(iter2 != bitInfo.end() && iter2->second[0][0] == 2);
iter2 = bitInfo.find(3977409745);
TEST_ASSERT(iter2 != bitInfo.end() && iter2->second[0].size() == 3 &&
iter2->second[0][0] == 0);
TEST_ASSERT(iter2 != bitInfo.end() && iter2->second[0].size() == 3 &&
iter2->second[0][1] == 1);
TEST_ASSERT(iter2 != bitInfo.end() && iter2->second[0].size() == 3 &&
iter2->second[0][2] == 2);
iter2 = bitInfo.find(1524090560);
TEST_ASSERT(iter2 != bitInfo.end() && iter2->second[0].size() == 2 &&
iter2->second[0][0] == 1);
TEST_ASSERT(iter2 != bitInfo.end() && iter2->second[0].size() == 2 &&
iter2->second[0][1] == 2);
delete m1;
}
}
void testRDKFPBitInfo() {
BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
BOOST_LOG(rdErrorLog) << " Test bitinfo of RDKFP " << std::endl;
{
ROMol *m1 = SmilesToMol("CCCO");
TEST_ASSERT(m1);
ExplicitBitVect *fp1;
std::map<boost::uint32_t, std::vector<std::vector<int>>> bitInfo;
std::map<boost::uint32_t, std::vector<std::vector<int>>>::const_iterator
iter2;
fp1 = RDKFingerprintMol(*m1, 1, 7, 2048, 2, true, 0.0, 128, true, true,
nullptr, nullptr, nullptr, &bitInfo);
TEST_ASSERT(fp1);
#if 0
for (iter2 = bitInfo.begin();
iter2 != bitInfo.end(); ++iter2) {
std::cerr << " " << iter2->first << ": ";
for (unsigned i=0; i<iter2->second.size(); i++){
std::cerr << " [ ";
for (unsigned j=0; j<iter2->second.at(i).size(); j++){
std::cerr << iter2->second.at(i).at(j) << " ";
}
std::cerr << "], ";
}
std::cerr << std::endl;
}
std::cerr << " ------------ " << std::endl;
#endif
iter2 = bitInfo.find(1772);
TEST_ASSERT(iter2 != bitInfo.end() && iter2->second[0][0] == 0);
TEST_ASSERT(iter2 != bitInfo.end() && iter2->second.size() == 2 &&
iter2->second[1][0] == 1);
iter2 = bitInfo.find(562);
TEST_ASSERT(iter2 != bitInfo.end() && iter2->second[0][0] == 2);
iter2 = bitInfo.find(1118);
TEST_ASSERT(iter2 != bitInfo.end() && iter2->second[0].size() == 3 &&
iter2->second[0][0] == 0);
TEST_ASSERT(iter2 != bitInfo.end() && iter2->second[0].size() == 3 &&
iter2->second[0][1] == 1);
TEST_ASSERT(iter2 != bitInfo.end() && iter2->second[0].size() == 3 &&
iter2->second[0][2] == 2);
iter2 = bitInfo.find(1183);
TEST_ASSERT(iter2 != bitInfo.end() && iter2->second[0].size() == 2 &&
iter2->second[0][0] == 1);
TEST_ASSERT(iter2 != bitInfo.end() && iter2->second[0].size() == 2 &&
iter2->second[0][1] == 2);
delete m1;
}
}
void testGitHubIssue874() {
BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
BOOST_LOG(rdErrorLog)
<< " Single atoms setting radius 1 bits in Morgan fingerprints "
<< std::endl;
{
std::string smiles = "Cl";
ROMol *m1 = SmilesToMol(smiles);
TEST_ASSERT(m1);
TEST_ASSERT(m1->getNumAtoms() == 1);
SparseIntVect<boost::uint32_t> *fp;
fp = MorganFingerprints::getFingerprint(*m1, 0);
TEST_ASSERT(fp->getNonzeroElements().size() == 1);
delete fp;
fp = MorganFingerprints::getFingerprint(*m1, 1);
TEST_ASSERT(fp->getNonzeroElements().size() == 1);
delete fp;
delete m1;
}
BOOST_LOG(rdErrorLog) << " done" << std::endl;
}
void testGitHubIssue879() {
BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
BOOST_LOG(rdErrorLog) << "Github #879: Pattern fingerprint should set bits "
"for single-atom fragments."
<< std::endl;
{
std::string smiles = "Cl";
ROMol *m1 = SmilesToMol(smiles);
TEST_ASSERT(m1);
TEST_ASSERT(m1->getNumAtoms() == 1);
ExplicitBitVect *bv = PatternFingerprintMol(*m1, 2048);
TEST_ASSERT(bv);
TEST_ASSERT(bv->getNumOnBits() == 2);
delete bv;
delete m1;
}
{
std::string smiles = "Cl.[Na]";
ROMol *m1 = SmilesToMol(smiles);
TEST_ASSERT(m1);
TEST_ASSERT(m1->getNumAtoms() == 2);
ExplicitBitVect *bv = PatternFingerprintMol(*m1, 2048);
TEST_ASSERT(bv);
TEST_ASSERT(bv->getNumOnBits() == 4);
delete bv;
delete m1;
}
BOOST_LOG(rdErrorLog) << " done" << std::endl;
}
void testGitHubIssue1496() {
BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
BOOST_LOG(rdErrorLog) << "Github #1496: Pattern fingerprint setting bad bits "
"for degree zero atoms"
<< std::endl;
{
std::string smiles = "C";
ROMol *m1 = SmilesToMol(smiles);
TEST_ASSERT(m1);
TEST_ASSERT(m1->getNumAtoms() == 1);
ExplicitBitVect *bv1 = PatternFingerprintMol(*m1, 2048);
TEST_ASSERT(bv1);
std::string smarts = "[#6]";
ROMol *m2 = SmartsToMol(smarts);
TEST_ASSERT(m2);
TEST_ASSERT(m2->getNumAtoms() == 1);
ExplicitBitVect *bv2 = PatternFingerprintMol(*m2, 2048);
TEST_ASSERT(bv2);
TEST_ASSERT(bv1->getNumOnBits() >= bv2->getNumOnBits())
TEST_ASSERT(AllProbeBitsMatch(*bv2, *bv1));
delete m1;
delete bv1;
delete m2;
delete bv2;
}
{
std::string smiles = "CC";
ROMol *m1 = SmilesToMol(smiles);
TEST_ASSERT(m1);
TEST_ASSERT(m1->getNumAtoms() == 2);
ExplicitBitVect *bv1 = PatternFingerprintMol(*m1, 2048);
TEST_ASSERT(bv1);
std::string smarts = "[#6]";
ROMol *m2 = SmartsToMol(smarts);
TEST_ASSERT(m2);
TEST_ASSERT(m2->getNumAtoms() == 1);
ExplicitBitVect *bv2 = PatternFingerprintMol(*m2, 2048);
TEST_ASSERT(bv2);
TEST_ASSERT(bv1->getNumOnBits() > bv2->getNumOnBits())
TEST_ASSERT(AllProbeBitsMatch(*bv2, *bv1));
delete m1;
delete bv1;
delete m2;
delete bv2;
}
BOOST_LOG(rdErrorLog) << " done" << std::endl;
}
void testGitHubIssue1793() {
BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
BOOST_LOG(rdErrorLog) << "Github #1793: Fingerprint segfaults with "
"branchedPaths=False and useHs=False"
<< std::endl;
{
std::string smiles = "CC";
ROMol *m1 = SmilesToMol(smiles);
TEST_ASSERT(m1);
TEST_ASSERT(m1->getNumAtoms() == 2);
unsigned int minPath = 1;
unsigned int maxPath = 7;
unsigned int fpSize = 2048;
unsigned int nBitsPerHash = 2;
double tgtDensity = 0.0;
unsigned int minSize = 128;
bool useHs = false;
bool branchedPaths = false;
ExplicitBitVect *bv1 =
RDKFingerprintMol(*m1, minPath, maxPath, fpSize, nBitsPerHash, useHs,
tgtDensity, minSize, branchedPaths);
TEST_ASSERT(bv1);
delete m1;
delete bv1;
}
BOOST_LOG(rdErrorLog) << " done" << std::endl;
}
void testGitHubIssue1993() {
BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
BOOST_LOG(rdErrorLog)
<< "Github #1993: fingerprinters that use R/S labels should force a call "
"to assignStereochemistry() if it hasn't already been called"
<< std::endl;
{
std::string smiles = "CC(F)(Cl)CC";
std::unique_ptr<ROMol> m1(SmilesToMol(smiles));
TEST_ASSERT(m1);
TEST_ASSERT(m1->getNumAtoms() == 6);
m1->clearProp(common_properties::_StereochemDone);
std::unique_ptr<ROMol> m2(new ROMol(*m1));
m2->getAtomWithIdx(1)->setChiralTag(Atom::CHI_TETRAHEDRAL_CW);
std::unique_ptr<ROMol> m3(new ROMol(*m1));
m3->getAtomWithIdx(1)->setChiralTag(Atom::CHI_TETRAHEDRAL_CCW);
{ // mfp, no chirality
bool useChirality = false;
std::unique_ptr<SparseIntVect<boost::uint32_t>> fp1(
MorganFingerprints::getFingerprint(*m1, 1, nullptr, nullptr,
useChirality));
TEST_ASSERT(fp1);
std::unique_ptr<SparseIntVect<boost::uint32_t>> fp2(
MorganFingerprints::getFingerprint(*m2, 1, nullptr, nullptr,
useChirality));
TEST_ASSERT(fp2);
std::unique_ptr<SparseIntVect<boost::uint32_t>> fp3(
MorganFingerprints::getFingerprint(*m3, 1, nullptr, nullptr,
useChirality));
TEST_ASSERT(fp3);
TEST_ASSERT((*fp1) == (*fp2));
TEST_ASSERT((*fp1) == (*fp3));
TEST_ASSERT((*fp2) == (*fp3));
}
{ // mfp, with chirality
bool useChirality = true;
std::unique_ptr<SparseIntVect<boost::uint32_t>> fp1(
MorganFingerprints::getFingerprint(*m1, 1, nullptr, nullptr,
useChirality));
TEST_ASSERT(fp1);
std::unique_ptr<SparseIntVect<boost::uint32_t>> fp2(
MorganFingerprints::getFingerprint(*m2, 1, nullptr, nullptr,
useChirality));
TEST_ASSERT(fp2);
std::unique_ptr<SparseIntVect<boost::uint32_t>> fp3(
MorganFingerprints::getFingerprint(*m3, 1, nullptr, nullptr,
useChirality));
TEST_ASSERT(fp3);
TEST_ASSERT((*fp1) != (*fp2));
TEST_ASSERT((*fp1) != (*fp3));
TEST_ASSERT((*fp2) != (*fp3));
}
{ // ap, no chirality
bool useChirality = false;
std::unique_ptr<SparseIntVect<boost::int32_t>> fp1(
AtomPairs::getAtomPairFingerprint(*m1, nullptr, nullptr, nullptr,
useChirality));
TEST_ASSERT(fp1);
std::unique_ptr<SparseIntVect<boost::int32_t>> fp2(
AtomPairs::getAtomPairFingerprint(*m2, nullptr, nullptr, nullptr,
useChirality));
TEST_ASSERT(fp2);
std::unique_ptr<SparseIntVect<boost::int32_t>> fp3(
AtomPairs::getAtomPairFingerprint(*m3, nullptr, nullptr, nullptr,
useChirality));
TEST_ASSERT(fp3);
TEST_ASSERT((*fp1) == (*fp2));
TEST_ASSERT((*fp1) == (*fp3));
TEST_ASSERT((*fp2) == (*fp3));
}
{ // ap, with chirality
bool useChirality = true;
std::unique_ptr<SparseIntVect<boost::int32_t>> fp1(
AtomPairs::getAtomPairFingerprint(*m1, nullptr, nullptr, nullptr,
useChirality));
TEST_ASSERT(fp1);
std::unique_ptr<SparseIntVect<boost::int32_t>> fp2(
AtomPairs::getAtomPairFingerprint(*m2, nullptr, nullptr, nullptr,
useChirality));
TEST_ASSERT(fp2);
std::unique_ptr<SparseIntVect<boost::int32_t>> fp3(
AtomPairs::getAtomPairFingerprint(*m3, nullptr, nullptr, nullptr,
useChirality));
TEST_ASSERT(fp3);
TEST_ASSERT((*fp1) != (*fp2));
TEST_ASSERT((*fp1) != (*fp3));
TEST_ASSERT((*fp2) != (*fp3));
}
{ // ap, no chirality
bool useChirality = false;
unsigned int minLength = 1, maxLength = 10;
std::unique_ptr<SparseIntVect<boost::int32_t>> fp1(
AtomPairs::getHashedAtomPairFingerprint(*m1, 2048, minLength,
maxLength, nullptr, nullptr,
nullptr, useChirality));
TEST_ASSERT(fp1);
std::unique_ptr<SparseIntVect<boost::int32_t>> fp2(
AtomPairs::getHashedAtomPairFingerprint(*m2, 2048, minLength,
maxLength, nullptr, nullptr,
nullptr, useChirality));
TEST_ASSERT(fp2);
std::unique_ptr<SparseIntVect<boost::int32_t>> fp3(
AtomPairs::getHashedAtomPairFingerprint(*m3, 2048, minLength,
maxLength, nullptr, nullptr,
nullptr, useChirality));
TEST_ASSERT(fp3);
TEST_ASSERT((*fp1) == (*fp2));
TEST_ASSERT((*fp1) == (*fp3));
TEST_ASSERT((*fp2) == (*fp3));
}
{ // ap, with chirality
bool useChirality = true;
unsigned int minLength = 1, maxLength = 10;
std::unique_ptr<SparseIntVect<boost::int32_t>> fp1(
AtomPairs::getHashedAtomPairFingerprint(*m1, 2048, minLength,
maxLength, nullptr, nullptr,
nullptr, useChirality));
TEST_ASSERT(fp1);
std::unique_ptr<SparseIntVect<boost::int32_t>> fp2(
AtomPairs::getHashedAtomPairFingerprint(*m2, 2048, minLength,
maxLength, nullptr, nullptr,
nullptr, useChirality));
TEST_ASSERT(fp2);
std::unique_ptr<SparseIntVect<boost::int32_t>> fp3(
AtomPairs::getHashedAtomPairFingerprint(*m3, 2048, minLength,
maxLength, nullptr, nullptr,
nullptr, useChirality));
TEST_ASSERT(fp3);
TEST_ASSERT((*fp1) != (*fp2));
TEST_ASSERT((*fp1) != (*fp3));
TEST_ASSERT((*fp2) != (*fp3));
}
{ // tt, no chirality
bool useChirality = false;
std::unique_ptr<SparseIntVect<boost::int64_t>> fp1(
AtomPairs::getTopologicalTorsionFingerprint(*m1, 4, nullptr, nullptr,
nullptr, useChirality));
TEST_ASSERT(fp1);
std::unique_ptr<SparseIntVect<boost::int64_t>> fp2(
AtomPairs::getTopologicalTorsionFingerprint(*m2, 4, nullptr, nullptr,
nullptr, useChirality));
TEST_ASSERT(fp2);
std::unique_ptr<SparseIntVect<boost::int64_t>> fp3(
AtomPairs::getTopologicalTorsionFingerprint(*m3, 4, nullptr, nullptr,
nullptr, useChirality));
TEST_ASSERT(fp3);
TEST_ASSERT((*fp1) == (*fp2));
TEST_ASSERT((*fp1) == (*fp3));
TEST_ASSERT((*fp2) == (*fp3));
}
{ // tt, with chirality
bool useChirality = true;
std::unique_ptr<SparseIntVect<boost::int64_t>> fp1(
AtomPairs::getTopologicalTorsionFingerprint(*m1, 4, nullptr, nullptr,
nullptr, useChirality));
TEST_ASSERT(fp1);
std::unique_ptr<SparseIntVect<boost::int64_t>> fp2(
AtomPairs::getTopologicalTorsionFingerprint(*m2, 4, nullptr, nullptr,
nullptr, useChirality));
TEST_ASSERT(fp2);
std::unique_ptr<SparseIntVect<boost::int64_t>> fp3(
AtomPairs::getTopologicalTorsionFingerprint(*m3, 4, nullptr, nullptr,
nullptr, useChirality));
TEST_ASSERT(fp3);
TEST_ASSERT((*fp1) != (*fp2));
TEST_ASSERT((*fp1) != (*fp3));
TEST_ASSERT((*fp2) != (*fp3));
}
{ // tt, no chirality
bool useChirality = false;
std::unique_ptr<SparseIntVect<boost::int64_t>> fp1(
AtomPairs::getHashedTopologicalTorsionFingerprint(
*m1, 2048, 4, nullptr, nullptr, nullptr, useChirality));
TEST_ASSERT(fp1);
std::unique_ptr<SparseIntVect<boost::int64_t>> fp2(
AtomPairs::getHashedTopologicalTorsionFingerprint(
*m2, 2048, 4, nullptr, nullptr, nullptr, useChirality));
TEST_ASSERT(fp2);
std::unique_ptr<SparseIntVect<boost::int64_t>> fp3(
AtomPairs::getHashedTopologicalTorsionFingerprint(
*m3, 2048, 4, nullptr, nullptr, nullptr, useChirality));
TEST_ASSERT(fp3);
TEST_ASSERT((*fp1) == (*fp2));
TEST_ASSERT((*fp1) == (*fp3));
TEST_ASSERT((*fp2) == (*fp3));
}
{ // tt, with chirality
bool useChirality = true;
std::unique_ptr<SparseIntVect<boost::int64_t>> fp1(
AtomPairs::getHashedTopologicalTorsionFingerprint(
*m1, 2048, 4, nullptr, nullptr, nullptr, useChirality));
TEST_ASSERT(fp1);
std::unique_ptr<SparseIntVect<boost::int64_t>> fp2(
AtomPairs::getHashedTopologicalTorsionFingerprint(
*m2, 2048, 4, nullptr, nullptr, nullptr, useChirality));
TEST_ASSERT(fp2);
std::unique_ptr<SparseIntVect<boost::int64_t>> fp3(
AtomPairs::getHashedTopologicalTorsionFingerprint(
*m3, 2048, 4, nullptr, nullptr, nullptr, useChirality));
TEST_ASSERT(fp3);
TEST_ASSERT((*fp1) != (*fp2));
TEST_ASSERT((*fp1) != (*fp3));
TEST_ASSERT((*fp2) != (*fp3));
}
}
BOOST_LOG(rdErrorLog) << " done" << std::endl;
}
void testGitHubIssue2115() {
BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
BOOST_LOG(rdErrorLog)
<< "Github #2115: BitInfo not complete for RDKFingerprint" << std::endl;
{
auto mol = "c1ccccc1O"_smiles;
TEST_ASSERT(mol);
unsigned int minPath = 1;
unsigned int maxPath = 7;
unsigned int fpSize = 2048;
unsigned int nBitsPerHash = 2;
bool useHs = true;
double tgtDensity = 0.0;
unsigned int minSize = 128;
bool branchedPaths = true;
bool useBondOrder = true;
std::vector<boost::uint32_t> *atomInvariants = nullptr;
const std::vector<boost::uint32_t> *fromAtoms = nullptr;
std::vector<std::vector<boost::uint32_t>> *atomBits = nullptr;
std::map<boost::uint32_t, std::vector<std::vector<int>>> bitInfo;
std::unique_ptr<ExplicitBitVect> fp1(
RDKFingerprintMol(*mol, minPath, maxPath, fpSize, nBitsPerHash, useHs,
tgtDensity, minSize, branchedPaths, useBondOrder,
atomInvariants, fromAtoms, atomBits, &bitInfo));
TEST_ASSERT(fp1->getNumOnBits() == bitInfo.size());
}
{ // test that for atomBits too
auto m1("CCCO"_smiles);
TEST_ASSERT(m1);
std::vector<std::vector<boost::uint32_t>> atomBits(m1->getNumAtoms());
unsigned int minPath = 1;
unsigned int maxPath = 2;
unsigned int fpSize = 2048;
unsigned int nBitsPerHash = 2;
bool useHs = true;
double tgtDensity = 0.0;
unsigned int minSize = 128;
bool branchedPaths = true;
bool useBondOrder = true;
std::vector<boost::uint32_t> *atomInvariants = nullptr;
const std::vector<boost::uint32_t> *fromAtoms = nullptr;
std::unique_ptr<ExplicitBitVect> fp1(RDKFingerprintMol(
*m1, minPath, maxPath, fpSize, nBitsPerHash, useHs, tgtDensity, minSize,
branchedPaths, useBondOrder, atomInvariants, fromAtoms, &atomBits));
TEST_ASSERT(fp1->getNumOnBits() == 8);
TEST_ASSERT(atomBits[0].size() == 4);
TEST_ASSERT(atomBits[1].size() == 6);
TEST_ASSERT(atomBits[2].size() == 8);
TEST_ASSERT(atomBits[3].size() == 4);
}
BOOST_LOG(rdErrorLog) << " done" << std::endl;
}
int main(int argc, char *argv[]) {
(void)argc;
(void)argv;
RDLog::InitLogs();
#if 1
test1();
test2();
test3();
test1alg2();
test2alg2();
test4Trends();
test1Layers();
test2Layers();
test3Layers();
test1MorganFPs();
test2MorganFPsFromAtoms();
test3MorganFPs();
test4MorganFPs();
test5MorganFPs();
// test5BackwardsCompatibility();
// testIssue2875658();
testAtomCodes();
testAtomPairs();
testAtomPairs2();
testTorsions();
testBulkTorsions();
testHashedAtomPairs();
testHashedTorsions();
testRootedAtomPairs();
testIgnoreAtomPairs();
testRootedTorsions();
testIgnoreTorsions();
testMorganAtomInfo();
testRDKitFPOptions();
testPairsAndTorsionsOptions();
testMACCS();
testRDKitFromAtoms();
testRDKitAtomBits();
testChiralPairs();
testChiralTorsions();
testGitHubIssue25();
testGitHubIssue151();
test3DAtomPairs();
testGitHubIssue195();
testMultithreadedPatternFP();
testGitHubIssue258();
testGitHubIssue334();
testGitHubIssue695();
testGitHubIssue811();
testRDKFPUnfolded();
testRDKFPBitInfo();
testGitHubIssue874();
testGitHubIssue879();
testGitHubIssue1496();
testGitHubIssue1793();
#endif
testGitHubIssue1993();
testGitHubIssue2115();
return 0;
}