mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-04 21:54:27 +08:00
Add tautomer query to the substructlibrary (#3808)
* Fixes #3797 * [WIP] Add tautomer queries to the substruct library * Add TautomerQuery to CMake * Add missing TautomerQuery functions, python wrapper and tests * Add python wrappers for Substruct Library Tautomer Queries * Explictly label non-const pattern function now that we have both * Use boost::shared_ptr not std::shared_ptr * Fix java builds * One more try to fix java builds * Fix Java Tests * Run clang format * Reenable tests * Fix annoyingly stupid bug and annoying commit of debug code * Fix documentation * reenable ifdef threadsafe check * Throw warning and perform tautomer search instead of bailing with incorrect fingerprints * Simplfy api with templates * Fix SubstructLibrary java issues * minor API cleanup * simplify the SWIG wrappers Co-authored-by: Brian Kelley <bkelley@relaytx.com> Co-authored-by: greg landrum <greg.landrum@gmail.com>
This commit is contained in:
@@ -37,7 +37,6 @@
|
||||
|
||||
#include <GraphMol/Substruct/SubstructMatch.h>
|
||||
|
||||
|
||||
namespace RDKit {
|
||||
|
||||
bool SubstructLibraryCanSerialize() {
|
||||
@@ -48,7 +47,6 @@ bool SubstructLibraryCanSerialize() {
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
struct Bits {
|
||||
const ExplicitBitVect *queryBits;
|
||||
const FPHolderBase *fps;
|
||||
@@ -69,6 +67,29 @@ struct Bits {
|
||||
}
|
||||
}
|
||||
|
||||
Bits(const FPHolderBase *fingerprints, const TautomerQuery &m, bool recursionPossible,
|
||||
bool useChirality, bool useQueryQueryMatches)
|
||||
: fps(nullptr),
|
||||
recursionPossible(recursionPossible),
|
||||
useChirality(useChirality),
|
||||
useQueryQueryMatches(useQueryQueryMatches) {
|
||||
if (fingerprints) {
|
||||
const TautomerPatternHolder *tp =
|
||||
dynamic_cast<const TautomerPatternHolder *>(fingerprints);
|
||||
if(!tp) {
|
||||
BOOST_LOG(rdWarningLog) << "Pattern fingerprints for tautomersearch aren't tautomer fingerprints, ignoring..." << std::endl;
|
||||
queryBits = nullptr;
|
||||
fps = nullptr;
|
||||
}
|
||||
else {
|
||||
fps = fingerprints;
|
||||
queryBits = m.patternFingerprintTemplate(tp->getNumBits());
|
||||
}
|
||||
} else {
|
||||
queryBits = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
bool check(unsigned int idx) const {
|
||||
if (fps) {
|
||||
return fps->passesFilter(idx, *queryBits);
|
||||
@@ -90,15 +111,15 @@ unsigned int SubstructLibrary::addMol(const ROMol &m) {
|
||||
namespace {
|
||||
// Return true if the pattern contains a ring query
|
||||
bool query_needs_rings(const ROMol &in_query) {
|
||||
for (auto &atom: in_query.atoms()) {
|
||||
if(atom->hasQuery()) {
|
||||
for (const auto &atom : in_query.atoms()) {
|
||||
if (atom->hasQuery()) {
|
||||
if (describeQuery(atom).find("Ring") != std::string::npos) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (auto &bond: in_query.bonds()) {
|
||||
if(bond->hasQuery()) {
|
||||
for (const auto &bond : in_query.bonds()) {
|
||||
if (bond->hasQuery()) {
|
||||
if (describeQuery(bond).find("Ring") != std::string::npos) {
|
||||
return true;
|
||||
}
|
||||
@@ -107,12 +128,17 @@ bool query_needs_rings(const ROMol &in_query) {
|
||||
return false;
|
||||
}
|
||||
|
||||
void SubSearcher(const ROMol &in_query, const Bits &bits,
|
||||
bool query_needs_rings(const TautomerQuery &in_query) {
|
||||
return query_needs_rings(in_query.getTemplateMolecule());
|
||||
}
|
||||
|
||||
template <class Query>
|
||||
void SubSearcher(const Query &in_query, const Bits &bits,
|
||||
const MolHolderBase &mols, unsigned int start,
|
||||
unsigned int &end, unsigned int numThreads,
|
||||
const bool needs_rings, int &counter, const int maxResults,
|
||||
std::vector<unsigned int> *idxs) {
|
||||
ROMol query(in_query);
|
||||
Query query(in_query);
|
||||
MatchVectType matchVect;
|
||||
for (unsigned int idx = start; idx < end; idx += numThreads) {
|
||||
if (!bits.check(idx)) {
|
||||
@@ -122,13 +148,14 @@ void SubSearcher(const ROMol &in_query, const Bits &bits,
|
||||
// returned molecule!
|
||||
const boost::shared_ptr<ROMol> &m = mols.getMol(idx);
|
||||
ROMol *mol = m.get();
|
||||
if(!mol){
|
||||
if (!mol) {
|
||||
continue;
|
||||
}
|
||||
if (needs_rings && (!mol->getRingInfo() || !mol->getRingInfo()->isInitialized())) {
|
||||
if (needs_rings &&
|
||||
(!mol->getRingInfo() || !mol->getRingInfo()->isInitialized())) {
|
||||
MolOps::symmetrizeSSSR(*mol);
|
||||
}
|
||||
|
||||
|
||||
if (SubstructMatch(*mol, query, matchVect, bits.recursionPossible,
|
||||
bits.useChirality, bits.useQueryQueryMatches)) {
|
||||
++counter;
|
||||
@@ -145,12 +172,13 @@ void SubSearcher(const ROMol &in_query, const Bits &bits,
|
||||
}
|
||||
}
|
||||
|
||||
int internalGetMatches(
|
||||
const ROMol &query, MolHolderBase &mols, const FPHolderBase *fps,
|
||||
unsigned int startIdx, unsigned int endIdx, bool recursionPossible,
|
||||
bool useChirality, bool useQueryQueryMatches,
|
||||
int numThreads = -1, int maxResults = 1000,
|
||||
std::vector<unsigned int> *idxs = nullptr) {
|
||||
template <class Query>
|
||||
int internalGetMatches(const Query &query, MolHolderBase &mols,
|
||||
const FPHolderBase *fps, unsigned int startIdx,
|
||||
unsigned int endIdx, bool recursionPossible,
|
||||
bool useChirality, bool useQueryQueryMatches,
|
||||
int numThreads = -1, int maxResults = 1000,
|
||||
std::vector<unsigned int> *idxs = nullptr) {
|
||||
PRECONDITION(startIdx < mols.size(), "startIdx out of bounds");
|
||||
PRECONDITION(endIdx > startIdx, "endIdx > startIdx");
|
||||
|
||||
@@ -193,8 +221,8 @@ int internalGetMatches(
|
||||
++thread_group_idx) {
|
||||
// need to use boost::ref otherwise things are passed by value
|
||||
thread_group.emplace_back(
|
||||
std::async(std::launch::async, SubSearcher, std::ref(query), bits,
|
||||
std::ref(mols), startIdx + thread_group_idx,
|
||||
std::async(std::launch::async, SubSearcher<Query>, std::ref(query),
|
||||
bits, std::ref(mols), startIdx + thread_group_idx,
|
||||
std::ref(endIdxVect[thread_group_idx]), numThreads,
|
||||
needs_rings, std::ref(counterVect[thread_group_idx]),
|
||||
maxResultsVect[thread_group_idx],
|
||||
@@ -227,7 +255,7 @@ int internalGetMatches(
|
||||
}
|
||||
// need to use boost::ref otherwise things are passed by value
|
||||
thread_group.emplace_back(std::async(
|
||||
std::launch::async, SubSearcher, std::ref(query), bits,
|
||||
std::launch::async, SubSearcher<Query>, std::ref(query), bits,
|
||||
std::ref(mols), endIdxVect[thread_group_idx] + numThreads,
|
||||
std::ref(maxEndIdx), numThreads, needs_rings,
|
||||
std::ref(counterVect[thread_group_idx]), -1,
|
||||
@@ -267,7 +295,8 @@ int internalGetMatches(
|
||||
}
|
||||
}
|
||||
#else
|
||||
SubSearcher(query, bits, mols, startIdx, endIdx, 1, needs_rings, counter, maxResults, idxs);
|
||||
SubSearcher(query, bits, mols, startIdx, endIdx, 1, needs_rings, counter,
|
||||
maxResults, idxs);
|
||||
#endif
|
||||
|
||||
delete bits.queryBits;
|
||||
@@ -275,33 +304,28 @@ int internalGetMatches(
|
||||
return counter;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
std::vector<unsigned int> SubstructLibrary::getMatches(
|
||||
const ROMol &query, bool recursionPossible, bool useChirality,
|
||||
bool useQueryQueryMatches, int numThreads, int maxResults) const {
|
||||
return getMatches(query, 0, mols->size(), recursionPossible, useChirality,
|
||||
useQueryQueryMatches, numThreads, maxResults);
|
||||
}
|
||||
} // namespace
|
||||
|
||||
std::vector<unsigned int> SubstructLibrary::getMatches(
|
||||
const ROMol &query, unsigned int startIdx, unsigned int endIdx,
|
||||
bool recursionPossible, bool useChirality, bool useQueryQueryMatches,
|
||||
int numThreads, int maxResults) const {
|
||||
std::vector<unsigned int> idxs;
|
||||
internalGetMatches(query, *mols, fps, startIdx, endIdx,
|
||||
recursionPossible, useChirality,
|
||||
useQueryQueryMatches, numThreads, maxResults, &idxs);
|
||||
internalGetMatches(query, *mols, fps, startIdx, endIdx, recursionPossible,
|
||||
useChirality, useQueryQueryMatches, numThreads, maxResults,
|
||||
&idxs);
|
||||
return idxs;
|
||||
}
|
||||
|
||||
unsigned int SubstructLibrary::countMatches(const ROMol &query,
|
||||
bool recursionPossible,
|
||||
bool useChirality,
|
||||
bool useQueryQueryMatches,
|
||||
int numThreads) const {
|
||||
return countMatches(query, 0, mols->size(), recursionPossible, useChirality,
|
||||
useQueryQueryMatches, numThreads);
|
||||
std::vector<unsigned int> SubstructLibrary::getMatches(
|
||||
const TautomerQuery &query, unsigned int startIdx, unsigned int endIdx,
|
||||
bool recursionPossible, bool useChirality, bool useQueryQueryMatches,
|
||||
int numThreads, int maxResults) const {
|
||||
std::vector<unsigned int> idxs;
|
||||
internalGetMatches(query, *mols, fps, startIdx, endIdx, recursionPossible,
|
||||
useChirality, useQueryQueryMatches, numThreads, maxResults,
|
||||
&idxs);
|
||||
return idxs;
|
||||
}
|
||||
|
||||
unsigned int SubstructLibrary::countMatches(
|
||||
@@ -313,13 +337,13 @@ unsigned int SubstructLibrary::countMatches(
|
||||
useQueryQueryMatches, numThreads, -1);
|
||||
}
|
||||
|
||||
bool SubstructLibrary::hasMatch(const ROMol &query, bool recursionPossible,
|
||||
bool useChirality, bool useQueryQueryMatches,
|
||||
int numThreads) const {
|
||||
const int maxResults = 1;
|
||||
return getMatches(query, recursionPossible, useChirality,
|
||||
useQueryQueryMatches, numThreads, maxResults)
|
||||
.size() > 0;
|
||||
unsigned int SubstructLibrary::countMatches(
|
||||
const TautomerQuery &query, unsigned int startIdx, unsigned int endIdx,
|
||||
bool recursionPossible, bool useChirality, bool useQueryQueryMatches,
|
||||
int numThreads) const {
|
||||
return internalGetMatches(query, *mols, fps, startIdx, endIdx,
|
||||
recursionPossible, useChirality,
|
||||
useQueryQueryMatches, numThreads, -1);
|
||||
}
|
||||
|
||||
bool SubstructLibrary::hasMatch(const ROMol &query, unsigned int startIdx,
|
||||
@@ -332,13 +356,24 @@ bool SubstructLibrary::hasMatch(const ROMol &query, unsigned int startIdx,
|
||||
.size() > 0;
|
||||
}
|
||||
|
||||
bool SubstructLibrary::hasMatch(const TautomerQuery &query,
|
||||
unsigned int startIdx, unsigned int endIdx,
|
||||
bool recursionPossible, bool useChirality,
|
||||
bool useQueryQueryMatches,
|
||||
int numThreads) const {
|
||||
const int maxResults = 1;
|
||||
return getMatches(query, startIdx, endIdx, recursionPossible, useChirality,
|
||||
useQueryQueryMatches, numThreads, maxResults)
|
||||
.size() > 0;
|
||||
}
|
||||
|
||||
void SubstructLibrary::toStream(std::ostream &ss) const {
|
||||
#ifndef RDK_USE_BOOST_SERIALIZATION
|
||||
PRECONDITION(0, "Boost SERIALIZATION is not enabled")
|
||||
#else
|
||||
boost::archive::text_oarchive ar(ss);
|
||||
ar << *this;
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
std::string SubstructLibrary::Serialize() const {
|
||||
@@ -353,7 +388,7 @@ void SubstructLibrary::initFromStream(std::istream &ss) {
|
||||
#else
|
||||
boost::archive::text_iarchive ar(ss);
|
||||
ar >> *this;
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
void SubstructLibrary::initFromString(const std::string &text) {
|
||||
@@ -361,4 +396,4 @@ void SubstructLibrary::initFromString(const std::string &text) {
|
||||
initFromStream(ss);
|
||||
}
|
||||
|
||||
}
|
||||
} // namespace RDKit
|
||||
|
||||
Reference in New Issue
Block a user