mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-03 21:44:30 +08:00
* Adds C++ Enumeration Engine to the RDKit * Adds Sanitization helpers, wrappers and tests * Clang format * Remove unused enumerationStateOnly flag * Fixes docStrings to current API * Adds doc strings * Removes RGroupPosition, adds getPosition to EnumerationBase * Fixes readability. * Adds EnumerateLibraryBase::reset and getReaction * Added getReagents method to EnumerateLibrary * Make the tests have the same naming * Need to save the initial state for resetting. * Stupid case-insensitive file systems * Moves ResetState to EnumerateLibraryBase * Adds removeNonmatchingReagents helper * Renames currentPosition to getPosition * Adds Enumeration Toolkit tutorial * Fixes Python3 serialization and enumerators * Verified to run on python2 and 3 * Fixes integer issues on windows * The number of enumeration should be unsigned. * Adds deserialization constructor * Moves boost_serialization to the end * Deprecates Clone in favor of copy * Update tests to use copy.copy not Clone * Move RGROUPS and BBS into an EnumerationTypes namespace * Make sure old pickles work * Adds pickle for backwards compatibility * Moves to uint64_t from size_t for public api * Whups, accidentally used the binary archiver. * Commits boost 1.55 serialization * Makes serialization turnoffable Like Filter Catalog * Fixes tests when serialization not available. Adds more enumeration strategy tests * Fixes a syntax error on some versions of python * Fixes sanitizeRxn to actually make proper RGroup atoms * Updates SanitizeRXN python API * Updates Enumeration API to a parameter class - fixes reagent removal * Adds a mess of tests * Change stats to return a string. * Exposes EvenPairSamplingStrategy Stats to python * Fixes a crash bug in SanitizeRxn * Adds better testing of the even pair sampling * Fixes namespace * One more try to fix gcc * Enum classes are c++11 and a microsoft extension. * Fix typo * Fixes np.median for python3 * Fixes atom iterators * Adds virtual tags to derived virtual functions (for clarity) * Fixes size comparison issues * Adds doc string * Small cleanup (has no effect since flags aren’t used) * fixes crash bug on windows * get the tests working on windows * Updates tutorial * Adds Glare implementation to Contrib
194 lines
6.2 KiB
C++
194 lines
6.2 KiB
C++
//
|
|
// Copyright (c) 2016, Novartis Institutes for BioMedical Research Inc.
|
|
// All rights reserved.
|
|
//
|
|
// Redistribution and use in source and binary forms, with or without
|
|
// modification, are permitted provided that the following conditions are
|
|
// met:
|
|
//
|
|
// * Redistributions of source code must retain the above copyright
|
|
// notice, this list of conditions and the following disclaimer.
|
|
// * Redistributions in binary form must reproduce the above
|
|
// copyright notice, this list of conditions and the following
|
|
// disclaimer in the documentation and/or other materials provided
|
|
// with the distribution.
|
|
// * Neither the name of Novartis Institutes for BioMedical Research Inc.
|
|
// nor the names of its contributors may be used to endorse or promote
|
|
// products derived from this software without specific prior written
|
|
// permission.
|
|
//
|
|
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
//
|
|
|
|
#ifndef RGROUP_EVEN_SAMPLE_H
|
|
#define RGROUP_EVEN_SAMPLE_H
|
|
|
|
#include "EnumerationStrategyBase.h"
|
|
#ifdef RDK_USE_BOOST_SERIALIZATION
|
|
#include <boost/serialization/set.hpp>
|
|
#endif
|
|
#include <stdint.h>
|
|
|
|
namespace RDKit {
|
|
//! EvenSamplePairsStrategy
|
|
/*! Randomly sample Pairs evenly from a collection of building blocks
|
|
This is a good strategy for choosing a relatively small selection
|
|
of building blocks from a larger set. As the amount of work needed
|
|
to retrieve the next evenly sample building block grows with the
|
|
number of samples, this method performs progressively worse as the
|
|
number of samples gets larger.
|
|
|
|
See EnumeartionStrategyBase for more details.
|
|
*/
|
|
|
|
class EvenSamplePairsStrategy : public EnumerationStrategyBase {
|
|
boost::uint64_t m_numPermutationsProcessed;
|
|
|
|
std::vector<int64_t> used_count;
|
|
std::vector<std::vector<size_t> > var_used;
|
|
std::vector<std::vector<size_t> > pair_used;
|
|
std::vector<std::vector<size_t> > pair_counts;
|
|
std::set<size_t> selected;
|
|
|
|
size_t seed; // last seed for permutation (starts at 0)
|
|
size_t M, a, b; // random number stuff
|
|
size_t nslack, min_nslack;
|
|
size_t rejected_period, rejected_unique;
|
|
size_t rejected_slack_condition, rejected_bb_sampling_condition;
|
|
|
|
public:
|
|
EvenSamplePairsStrategy()
|
|
: EnumerationStrategyBase(),
|
|
m_numPermutationsProcessed(),
|
|
used_count(),
|
|
var_used(),
|
|
pair_used(),
|
|
pair_counts(),
|
|
selected(),
|
|
seed(),
|
|
M(),
|
|
a(),
|
|
b(),
|
|
nslack(),
|
|
min_nslack(),
|
|
rejected_period(),
|
|
rejected_unique(),
|
|
rejected_slack_condition(),
|
|
rejected_bb_sampling_condition() {}
|
|
|
|
EvenSamplePairsStrategy(const EvenSamplePairsStrategy &rhs)
|
|
: EnumerationStrategyBase(rhs),
|
|
m_numPermutationsProcessed(rhs.m_numPermutationsProcessed),
|
|
used_count(rhs.used_count),
|
|
var_used(rhs.var_used),
|
|
pair_used(rhs.pair_used),
|
|
pair_counts(rhs.pair_counts),
|
|
selected(rhs.selected),
|
|
seed(rhs.seed),
|
|
M(rhs.M),
|
|
a(rhs.a),
|
|
b(rhs.b),
|
|
nslack(rhs.nslack),
|
|
min_nslack(rhs.min_nslack),
|
|
rejected_period(rhs.rejected_period),
|
|
rejected_unique(rhs.rejected_unique),
|
|
rejected_slack_condition(rhs.rejected_slack_condition),
|
|
rejected_bb_sampling_condition(rhs.rejected_bb_sampling_condition) {}
|
|
|
|
virtual const char *type() const { return "EvenSamplePairsStrategy"; }
|
|
|
|
//! This is a class for enumerating RGroups using Cartesian Products of
|
|
//! reagents.
|
|
/*!
|
|
basic usage:
|
|
|
|
\verbatim
|
|
std::vector<MOL_SPTR_VECT> bbs;
|
|
bbs.push_back( bbs_for_reactants_1 );
|
|
bbs.push_back( bbs_for_reactants_2 );
|
|
|
|
EvenSamplePairsStrategy rgroups;
|
|
rgroups.initialize(rxn, bbs);
|
|
for(size_t i=0; i<num_samples && rgroups; ++i) {
|
|
MOL_SPTR_VECT rvect = getReactantsFromRGroups(bbs, rgroups.next());
|
|
std::vector<MOL_SPTR_VECT> lprops = rxn.RunReactants(rvect);
|
|
...
|
|
}
|
|
\endverbatim
|
|
*/
|
|
using EnumerationStrategyBase::initialize;
|
|
|
|
virtual void initializeStrategy(const ChemicalReaction &, const EnumerationTypes::BBS &);
|
|
|
|
//! The current permutation {r1, r2, ...}
|
|
virtual const EnumerationTypes::RGROUPS &next();
|
|
|
|
virtual boost::uint64_t getPermutationIdx() const {
|
|
return m_numPermutationsProcessed; }
|
|
|
|
virtual operator bool() const { return true; }
|
|
|
|
EnumerationStrategyBase *copy() const {
|
|
return new EvenSamplePairsStrategy(*this);
|
|
}
|
|
|
|
std::string stats() const;
|
|
|
|
private:
|
|
friend class boost::serialization::access;
|
|
|
|
// decode a packed integer into an RGroup selection
|
|
const EnumerationTypes::RGROUPS &decode(size_t seed) {
|
|
for (int64_t j = m_permutationSizes.size() - 1; j >= 0; j--) {
|
|
m_permutation[j] = seed % m_permutationSizes[j];
|
|
seed /= m_permutationSizes[j];
|
|
}
|
|
return m_permutation;
|
|
}
|
|
|
|
bool try_add(size_t seed);
|
|
|
|
public:
|
|
#ifdef RDK_USE_BOOST_SERIALIZATION
|
|
template <class Archive>
|
|
void serialize(Archive &ar, const unsigned int /*version*/) {
|
|
// invoke serialization of the base class
|
|
ar &boost::serialization::base_object<EnumerationStrategyBase>(*this);
|
|
ar &m_numPermutationsProcessed;
|
|
ar &used_count;
|
|
ar &var_used;
|
|
ar &pair_used;
|
|
ar &pair_counts;
|
|
ar &selected;
|
|
|
|
ar &seed;
|
|
|
|
ar &M;
|
|
ar &a;
|
|
ar &b;
|
|
|
|
ar &nslack;
|
|
ar &min_nslack;
|
|
ar &rejected_period;
|
|
ar &rejected_unique;
|
|
ar &rejected_slack_condition;
|
|
ar &rejected_bb_sampling_condition;
|
|
}
|
|
#endif
|
|
};
|
|
}
|
|
|
|
BOOST_CLASS_VERSION(RDKit::EvenSamplePairsStrategy, 1)
|
|
|
|
#endif
|