Files
rdkit/Code/GraphMol/SynthonSpaceSearch/rascal_search_catch_tests.cpp
Justin Gullingsrud bda9ffbeec Incremental synthon search (#8855)
* Iterated interface to substructure search

* Add a test

* Add python unit test

* Expose the toTryChunkSize parameter to python

* Respect the maxHits parameter; sort the hitset

* Treat maxHits=-1 as infinite

* Add callback versions of fp and rascal search; conform to C++ style

* Add fp and rascal C++ tests

* maxHits=-1 tripped me up again

* Add fp and rascal python wrappers.

Changed the name of the callback-based method to have "Incremental"
in the name because the overloaded versions with default arguments
can't be reliably selected by the boost python runtime.  Probably
better to have a different method name anyway since the return type
is None instead of a results object.

* Delete stray printf.

* Run clang-format

* Use std::int64_t instead of ssize_t for portability

* Make docstrings on callback-based methods more descriptive

* Stop incremental search if the callback returns true.

* Add an example of incremental synthon search to the getting started docs

* trivial commit to force CI rerun

* Reformat single line if statements.

* Make SearchResultsCallback take const ref input

* Fix another one-liner

* Oops - another one-liner
2025-11-08 04:27:16 +01:00

129 lines
4.1 KiB
C++

//
// Copyright (C) David Cosgrove 2024.
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
#include <algorithm>
#include <fstream>
#include <GraphMol/SubstructLibrary/SubstructLibrary.h>
#include <GraphMol/FileParsers/MolSupplier.h>
#include <GraphMol/Fingerprints/MorganGenerator.h>
#include <GraphMol/RascalMCES/RascalMCES.h>
#include <GraphMol/SynthonSpaceSearch/SynthonSpace.h>
#include <GraphMol/SynthonSpaceSearch/SearchResults.h>
#include <GraphMol/SynthonSpaceSearch/SynthonSpaceSearch_details.h>
#include <GraphMol/SmilesParse/SmilesParse.h>
#include <GraphMol/SmilesParse/SmilesWrite.h>
#include <catch2/catch_all.hpp>
using namespace RDKit;
using namespace RDKit::SynthonSpaceSearch;
using namespace RDKit::RascalMCES;
const char *rdbase = getenv("RDBASE");
void getMols(const std::string &molFilename,
std::map<std::string, std::unique_ptr<RWMol>> &mols) {
v2::FileParsers::SmilesMolSupplierParams fileparams;
fileparams.titleLine = false;
v2::FileParsers::SmilesMolSupplier suppl(molFilename, fileparams);
std::map<std::string, std::unique_ptr<ExplicitBitVect>> fps;
while (!suppl.atEnd()) {
auto mol = suppl.next();
auto molName = mol->getProp<std::string>(common_properties::_Name);
mols.insert(std::make_pair(molName, mol.release()));
}
}
std::set<std::string> bruteForceSearch(
const ROMol &queryMol,
const std::map<std::string, std::unique_ptr<RWMol>> &mols,
const RascalOptions &rascalOptions) {
std::set<std::string> fullSmi;
std::set<std::string> names;
for (auto &[name, mol] : mols) {
auto res = rascalMCES(queryMol, *mol, rascalOptions);
if (!res.empty() &&
res.front().getSimilarity() > rascalOptions.similarityThreshold) {
mol->setProp<double>("Similarity", res.front().getSimilarity());
names.insert(name);
}
}
return names;
}
TEST_CASE("RASCAL Small tests") {
REQUIRE(rdbase);
std::string fName(rdbase);
std::string fullRoot(fName + "/Code/GraphMol/SynthonSpaceSearch/data/");
std::vector<std::string> libNames{
fullRoot + "amide_space.txt",
fullRoot + "triazole_space.txt",
fullRoot + "urea_space.txt",
};
std::vector<std::string> enumLibNames{
fullRoot + "amide_space_enum.smi",
fullRoot + "triazole_space_enum.smi",
fullRoot + "urea_space_enum.smi",
};
std::vector<std::string> querySmis{
"c1ccccc1C(=O)N1CCCC1",
"CC1CCN(c2nnc(CO)n2C2CCCC2)C1",
"C[C@@H]1CC(NC(=O)NC2COC2)CN(C(=O)c2nccnc2F)C1",
};
std::vector<size_t> expNumHits{6, 4, 1};
RascalOptions rascalOptions;
for (size_t i = 0; i < libNames.size(); i++) {
// if (i != 0) {
// continue;
// }
SynthonSpace synthonspace;
bool cancelled = false;
synthonspace.readTextFile(libNames[i], cancelled);
SynthonSpaceSearchParams params;
auto queryMol = v2::SmilesParse::MolFromSmiles(querySmis[i]);
auto results = synthonspace.rascalSearch(*queryMol, rascalOptions, params);
CHECK(results.getHitMolecules().size() == expNumHits[i]);
std::set<std::string> resSmis;
for (const auto &r : results.getHitMolecules()) {
resSmis.insert(MolToSmiles(*r));
}
// test with callback version
std::set<std::string> cbSmis;
auto cb = [&cbSmis](const std::vector<std::unique_ptr<ROMol>> &results) {
for (const auto &r : results) {
cbSmis.insert(MolToSmiles(*r));
}
return false;
};
synthonspace.rascalSearch(*queryMol, rascalOptions, cb, params);
CHECK(resSmis == cbSmis);
// Do the enumerated library, just to check
std::map<std::string, std::unique_ptr<RWMol>> mols;
getMols(enumLibNames[i], mols);
auto names = bruteForceSearch(*queryMol, mols, rascalOptions);
std::set<std::string> fullSmis;
for (const auto &r : names) {
fullSmis.insert(MolToSmiles(*mols[r]));
}
// As with fingerprints, we don't get all the hits with synthon search
// that we would with a full search.
for (const auto &rs : resSmis) {
CHECK(fullSmis.find(rs) != fullSmis.end());
}
}
}