Files
rdkit/Code/GraphMol/RascalMCES/mces_cluster_catch.cpp
Ric 8176f5c962 Fail CI builds on compiler warnings + some fixes (#6675)
* enable Werror on Mac and Linux

* do not fail on boost multiprecision pessimizing move

* fix eigen array_bounds warning

* Fix unused arg in Rascal MCS

* fix range-loop-construct warning in Rascal MCES

* fix sign mismatched comparison

* drop unused lambda capture

* allow FMCS timeout test more time under Debug (not a warning!)

* fix fwd declaration of struct RascalClusterOptions

* fix deallocator mismatch

* fix two minor leaks

* fix a real leak

* more minor leaks

* fix another real leak, plus some potential ones

* fix std::move preventing copy ellision

* allow longer run time for debug builds

* make maxBondMatchPairs and getLargestFragSize unsigned int

* make snake case camel case

* update to current master, fix new warnings

* update again and more fixes

* add #include <optional>

* fix char array deallocation

* update and fixes in Marvin writer

* unsigned int

* more copy ellision fixes

* more copy ellision fixes, and typos

* and some more
2023-09-02 04:38:45 +02:00

153 lines
4.9 KiB
C++

//
// Copyright (C) 2023 David Cosgrove
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//
#include <chrono>
#include <random>
#include <vector>
#include <GraphMol/FileParsers/MolSupplier.h>
#include <GraphMol/SmilesParse/SmilesParse.h>
#include <GraphMol/SmilesParse/SmilesWrite.h>
#include <GraphMol/Substruct/SubstructMatch.h>
#include "catch.hpp"
#include <GraphMol/RascalMCES/RascalMCES.h>
#include <GraphMol/RascalMCES/RascalClusterOptions.h>
#include <GraphMol/RascalMCES/RascalResult.h>
TEST_CASE("Small test", "[basics]") {
std::string fName = getenv("RDBASE");
fName += "/Contrib/Fastcluster/cdk2.smi";
RDKit::SmilesMolSupplier suppl(fName, "\t", 1, 0, false);
std::vector<std::shared_ptr<RDKit::ROMol>> mols;
while (!suppl.atEnd()) {
std::shared_ptr<RDKit::ROMol> mol(suppl.next());
if (!mol) {
continue;
}
mols.push_back(mol);
}
RDKit::RascalMCES::RascalClusterOptions clusOpts;
auto clusters = RDKit::RascalMCES::rascalCluster(mols, clusOpts);
REQUIRE(clusters.size() == 8);
std::vector<size_t> expSizes{7, 7, 6, 2, 2, 2, 2, 20};
for (size_t i = 0; i < 8; ++i) {
REQUIRE(clusters[i].size() == expSizes[i]);
}
}
TEST_CASE("BLSets subset", "[basics]") {
std::string fName = getenv("RDBASE");
fName += "/Code/GraphMol/RascalMCES/data/test_cluster1.smi";
RDKit::SmilesMolSupplier suppl(fName, "\t", 1, 0, false);
std::vector<std::shared_ptr<RDKit::ROMol>> mols;
while (!suppl.atEnd()) {
std::shared_ptr<RDKit::ROMol> mol(suppl.next());
if (!mol) {
continue;
}
mols.push_back(mol);
}
auto clusters = RDKit::RascalMCES::rascalCluster(mols);
REQUIRE(clusters.size() == 12);
std::vector<size_t> expSizes{8, 4, 4, 3, 3, 3, 2, 2, 2, 2, 2, 21};
for (size_t i = 0; i < 12; ++i) {
REQUIRE(clusters[i].size() == expSizes[i]);
}
}
TEST_CASE("ChEMBL 1907596") {
std::string fName = getenv("RDBASE");
fName += "/Code/GraphMol/RascalMCES/data/chembl_1907596.smi";
std::cout << fName << std::endl;
RDKit::SmilesMolSupplier suppl(fName, "\t", 1, 0, false);
std::vector<std::shared_ptr<RDKit::ROMol>> mols;
while (!suppl.atEnd()) {
std::shared_ptr<RDKit::ROMol> mol(suppl.next());
if (!mol) {
continue;
}
mols.push_back(mol);
}
RDKit::RascalMCES::RascalClusterOptions clusOpts;
clusOpts.similarityCutoff = 0.7;
auto clusters = RDKit::RascalMCES::rascalCluster(mols, clusOpts);
REQUIRE(clusters.size() == 21);
std::vector<size_t> expSizes{342, 71, 64, 33, 23, 11, 10, 6, 6, 5, 5,
4, 3, 3, 3, 3, 3, 2, 2, 2, 14};
for (size_t i = 0; i < 21; ++i) {
REQUIRE(clusters[i].size() == expSizes[i]);
}
}
TEST_CASE("Small Butina test", "[basics]") {
std::string fName = getenv("RDBASE");
fName += "/Contrib/Fastcluster/cdk2.smi";
RDKit::SmilesMolSupplier suppl(fName, "\t", 1, 0, false);
std::vector<std::shared_ptr<RDKit::ROMol>> mols;
while (!suppl.atEnd()) {
std::shared_ptr<RDKit::ROMol> mol(suppl.next());
if (!mol) {
continue;
}
mols.push_back(mol);
}
RDKit::RascalMCES::RascalClusterOptions clusOpts;
auto clusters = RDKit::RascalMCES::rascalButinaCluster(mols, clusOpts);
unsigned int numMols = 0;
for (const auto &cl : clusters) {
numMols += cl.size();
}
REQUIRE(numMols == mols.size());
REQUIRE(clusters.size() == 29);
std::vector<size_t> expSizes{6, 6, 6, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
for (size_t i = 0; i < 29; ++i) {
REQUIRE(clusters[i].size() == expSizes[i]);
}
}
TEST_CASE("Small test, smaller number of threads", "[basics]") {
// I'm not sure how to test whether this has had the desired effect,
// but at least we'll know that it runs ok.
std::string fName = getenv("RDBASE");
fName += "/Contrib/Fastcluster/cdk2.smi";
RDKit::SmilesMolSupplier suppl(fName, "\t", 1, 0, false);
std::vector<std::shared_ptr<RDKit::ROMol>> mols;
while (!suppl.atEnd()) {
std::shared_ptr<RDKit::ROMol> mol(suppl.next());
if (!mol) {
continue;
}
mols.push_back(mol);
}
{
RDKit::RascalMCES::RascalClusterOptions clusOpts;
clusOpts.numThreads = 2;
auto clusters = RDKit::RascalMCES::rascalCluster(mols, clusOpts);
REQUIRE(clusters.size() == 8);
std::vector<size_t> expSizes{7, 7, 6, 2, 2, 2, 2, 20};
for (size_t i = 0; i < 8; ++i) {
REQUIRE(clusters[i].size() == expSizes[i]);
}
}
{
RDKit::RascalMCES::RascalClusterOptions clusOpts;
clusOpts.numThreads = -2;
auto clusters = RDKit::RascalMCES::rascalCluster(mols, clusOpts);
REQUIRE(clusters.size() == 8);
std::vector<size_t> expSizes{7, 7, 6, 2, 2, 2, 2, 20};
for (size_t i = 0; i < 8; ++i) {
REQUIRE(clusters[i].size() == expSizes[i]);
}
}
}