Add tautomer query to the substructlibrary (#3808)

* Fixes #3797

* [WIP] Add tautomer queries to the substruct library

* Add TautomerQuery to CMake

* Add missing TautomerQuery functions, python wrapper and tests

* Add python wrappers for Substruct Library Tautomer Queries

* Explictly label non-const pattern function now that we have both

* Use boost::shared_ptr not std::shared_ptr

* Fix java builds

* One more try to fix java builds

* Fix Java Tests

* Run clang format

* Reenable tests

* Fix annoyingly stupid bug and annoying commit of debug code

* Fix documentation

* reenable ifdef threadsafe check

* Throw warning and perform tautomer search instead of bailing with incorrect fingerprints

* Simplfy api with templates

* Fix SubstructLibrary java issues

* minor API cleanup

* simplify the SWIG wrappers

Co-authored-by: Brian Kelley <bkelley@relaytx.com>
Co-authored-by: greg landrum <greg.landrum@gmail.com>
This commit is contained in:
Brian Kelley
2021-03-04 22:56:20 -05:00
committed by GitHub
parent e21e2c2af6
commit c8aa10c80f
13 changed files with 643 additions and 273 deletions

View File

@@ -37,7 +37,6 @@
#include <GraphMol/Substruct/SubstructMatch.h>
namespace RDKit {
bool SubstructLibraryCanSerialize() {
@@ -48,7 +47,6 @@ bool SubstructLibraryCanSerialize() {
#endif
}
struct Bits {
const ExplicitBitVect *queryBits;
const FPHolderBase *fps;
@@ -69,6 +67,29 @@ struct Bits {
}
}
Bits(const FPHolderBase *fingerprints, const TautomerQuery &m, bool recursionPossible,
bool useChirality, bool useQueryQueryMatches)
: fps(nullptr),
recursionPossible(recursionPossible),
useChirality(useChirality),
useQueryQueryMatches(useQueryQueryMatches) {
if (fingerprints) {
const TautomerPatternHolder *tp =
dynamic_cast<const TautomerPatternHolder *>(fingerprints);
if(!tp) {
BOOST_LOG(rdWarningLog) << "Pattern fingerprints for tautomersearch aren't tautomer fingerprints, ignoring..." << std::endl;
queryBits = nullptr;
fps = nullptr;
}
else {
fps = fingerprints;
queryBits = m.patternFingerprintTemplate(tp->getNumBits());
}
} else {
queryBits = nullptr;
}
}
bool check(unsigned int idx) const {
if (fps) {
return fps->passesFilter(idx, *queryBits);
@@ -90,15 +111,15 @@ unsigned int SubstructLibrary::addMol(const ROMol &m) {
namespace {
// Return true if the pattern contains a ring query
bool query_needs_rings(const ROMol &in_query) {
for (auto &atom: in_query.atoms()) {
if(atom->hasQuery()) {
for (const auto &atom : in_query.atoms()) {
if (atom->hasQuery()) {
if (describeQuery(atom).find("Ring") != std::string::npos) {
return true;
}
}
}
for (auto &bond: in_query.bonds()) {
if(bond->hasQuery()) {
for (const auto &bond : in_query.bonds()) {
if (bond->hasQuery()) {
if (describeQuery(bond).find("Ring") != std::string::npos) {
return true;
}
@@ -107,12 +128,17 @@ bool query_needs_rings(const ROMol &in_query) {
return false;
}
void SubSearcher(const ROMol &in_query, const Bits &bits,
bool query_needs_rings(const TautomerQuery &in_query) {
return query_needs_rings(in_query.getTemplateMolecule());
}
template <class Query>
void SubSearcher(const Query &in_query, const Bits &bits,
const MolHolderBase &mols, unsigned int start,
unsigned int &end, unsigned int numThreads,
const bool needs_rings, int &counter, const int maxResults,
std::vector<unsigned int> *idxs) {
ROMol query(in_query);
Query query(in_query);
MatchVectType matchVect;
for (unsigned int idx = start; idx < end; idx += numThreads) {
if (!bits.check(idx)) {
@@ -122,13 +148,14 @@ void SubSearcher(const ROMol &in_query, const Bits &bits,
// returned molecule!
const boost::shared_ptr<ROMol> &m = mols.getMol(idx);
ROMol *mol = m.get();
if(!mol){
if (!mol) {
continue;
}
if (needs_rings && (!mol->getRingInfo() || !mol->getRingInfo()->isInitialized())) {
if (needs_rings &&
(!mol->getRingInfo() || !mol->getRingInfo()->isInitialized())) {
MolOps::symmetrizeSSSR(*mol);
}
if (SubstructMatch(*mol, query, matchVect, bits.recursionPossible,
bits.useChirality, bits.useQueryQueryMatches)) {
++counter;
@@ -145,12 +172,13 @@ void SubSearcher(const ROMol &in_query, const Bits &bits,
}
}
int internalGetMatches(
const ROMol &query, MolHolderBase &mols, const FPHolderBase *fps,
unsigned int startIdx, unsigned int endIdx, bool recursionPossible,
bool useChirality, bool useQueryQueryMatches,
int numThreads = -1, int maxResults = 1000,
std::vector<unsigned int> *idxs = nullptr) {
template <class Query>
int internalGetMatches(const Query &query, MolHolderBase &mols,
const FPHolderBase *fps, unsigned int startIdx,
unsigned int endIdx, bool recursionPossible,
bool useChirality, bool useQueryQueryMatches,
int numThreads = -1, int maxResults = 1000,
std::vector<unsigned int> *idxs = nullptr) {
PRECONDITION(startIdx < mols.size(), "startIdx out of bounds");
PRECONDITION(endIdx > startIdx, "endIdx > startIdx");
@@ -193,8 +221,8 @@ int internalGetMatches(
++thread_group_idx) {
// need to use boost::ref otherwise things are passed by value
thread_group.emplace_back(
std::async(std::launch::async, SubSearcher, std::ref(query), bits,
std::ref(mols), startIdx + thread_group_idx,
std::async(std::launch::async, SubSearcher<Query>, std::ref(query),
bits, std::ref(mols), startIdx + thread_group_idx,
std::ref(endIdxVect[thread_group_idx]), numThreads,
needs_rings, std::ref(counterVect[thread_group_idx]),
maxResultsVect[thread_group_idx],
@@ -227,7 +255,7 @@ int internalGetMatches(
}
// need to use boost::ref otherwise things are passed by value
thread_group.emplace_back(std::async(
std::launch::async, SubSearcher, std::ref(query), bits,
std::launch::async, SubSearcher<Query>, std::ref(query), bits,
std::ref(mols), endIdxVect[thread_group_idx] + numThreads,
std::ref(maxEndIdx), numThreads, needs_rings,
std::ref(counterVect[thread_group_idx]), -1,
@@ -267,7 +295,8 @@ int internalGetMatches(
}
}
#else
SubSearcher(query, bits, mols, startIdx, endIdx, 1, needs_rings, counter, maxResults, idxs);
SubSearcher(query, bits, mols, startIdx, endIdx, 1, needs_rings, counter,
maxResults, idxs);
#endif
delete bits.queryBits;
@@ -275,33 +304,28 @@ int internalGetMatches(
return counter;
}
}
std::vector<unsigned int> SubstructLibrary::getMatches(
const ROMol &query, bool recursionPossible, bool useChirality,
bool useQueryQueryMatches, int numThreads, int maxResults) const {
return getMatches(query, 0, mols->size(), recursionPossible, useChirality,
useQueryQueryMatches, numThreads, maxResults);
}
} // namespace
std::vector<unsigned int> SubstructLibrary::getMatches(
const ROMol &query, unsigned int startIdx, unsigned int endIdx,
bool recursionPossible, bool useChirality, bool useQueryQueryMatches,
int numThreads, int maxResults) const {
std::vector<unsigned int> idxs;
internalGetMatches(query, *mols, fps, startIdx, endIdx,
recursionPossible, useChirality,
useQueryQueryMatches, numThreads, maxResults, &idxs);
internalGetMatches(query, *mols, fps, startIdx, endIdx, recursionPossible,
useChirality, useQueryQueryMatches, numThreads, maxResults,
&idxs);
return idxs;
}
unsigned int SubstructLibrary::countMatches(const ROMol &query,
bool recursionPossible,
bool useChirality,
bool useQueryQueryMatches,
int numThreads) const {
return countMatches(query, 0, mols->size(), recursionPossible, useChirality,
useQueryQueryMatches, numThreads);
std::vector<unsigned int> SubstructLibrary::getMatches(
const TautomerQuery &query, unsigned int startIdx, unsigned int endIdx,
bool recursionPossible, bool useChirality, bool useQueryQueryMatches,
int numThreads, int maxResults) const {
std::vector<unsigned int> idxs;
internalGetMatches(query, *mols, fps, startIdx, endIdx, recursionPossible,
useChirality, useQueryQueryMatches, numThreads, maxResults,
&idxs);
return idxs;
}
unsigned int SubstructLibrary::countMatches(
@@ -313,13 +337,13 @@ unsigned int SubstructLibrary::countMatches(
useQueryQueryMatches, numThreads, -1);
}
bool SubstructLibrary::hasMatch(const ROMol &query, bool recursionPossible,
bool useChirality, bool useQueryQueryMatches,
int numThreads) const {
const int maxResults = 1;
return getMatches(query, recursionPossible, useChirality,
useQueryQueryMatches, numThreads, maxResults)
.size() > 0;
unsigned int SubstructLibrary::countMatches(
const TautomerQuery &query, unsigned int startIdx, unsigned int endIdx,
bool recursionPossible, bool useChirality, bool useQueryQueryMatches,
int numThreads) const {
return internalGetMatches(query, *mols, fps, startIdx, endIdx,
recursionPossible, useChirality,
useQueryQueryMatches, numThreads, -1);
}
bool SubstructLibrary::hasMatch(const ROMol &query, unsigned int startIdx,
@@ -332,13 +356,24 @@ bool SubstructLibrary::hasMatch(const ROMol &query, unsigned int startIdx,
.size() > 0;
}
bool SubstructLibrary::hasMatch(const TautomerQuery &query,
unsigned int startIdx, unsigned int endIdx,
bool recursionPossible, bool useChirality,
bool useQueryQueryMatches,
int numThreads) const {
const int maxResults = 1;
return getMatches(query, startIdx, endIdx, recursionPossible, useChirality,
useQueryQueryMatches, numThreads, maxResults)
.size() > 0;
}
void SubstructLibrary::toStream(std::ostream &ss) const {
#ifndef RDK_USE_BOOST_SERIALIZATION
PRECONDITION(0, "Boost SERIALIZATION is not enabled")
#else
boost::archive::text_oarchive ar(ss);
ar << *this;
#endif
#endif
}
std::string SubstructLibrary::Serialize() const {
@@ -353,7 +388,7 @@ void SubstructLibrary::initFromStream(std::istream &ss) {
#else
boost::archive::text_iarchive ar(ss);
ar >> *this;
#endif
#endif
}
void SubstructLibrary::initFromString(const std::string &text) {
@@ -361,4 +396,4 @@ void SubstructLibrary::initFromString(const std::string &text) {
initFromStream(ss);
}
}
} // namespace RDKit