mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-03 21:44:30 +08:00
Dev/reaction enumeration (#1111)
* Adds C++ Enumeration Engine to the RDKit * Adds Sanitization helpers, wrappers and tests * Clang format * Remove unused enumerationStateOnly flag * Fixes docStrings to current API * Adds doc strings * Removes RGroupPosition, adds getPosition to EnumerationBase * Fixes readability. * Adds EnumerateLibraryBase::reset and getReaction * Added getReagents method to EnumerateLibrary * Make the tests have the same naming * Need to save the initial state for resetting. * Stupid case-insensitive file systems * Moves ResetState to EnumerateLibraryBase * Adds removeNonmatchingReagents helper * Renames currentPosition to getPosition * Adds Enumeration Toolkit tutorial * Fixes Python3 serialization and enumerators * Verified to run on python2 and 3 * Fixes integer issues on windows * The number of enumeration should be unsigned. * Adds deserialization constructor * Moves boost_serialization to the end * Deprecates Clone in favor of copy * Update tests to use copy.copy not Clone * Move RGROUPS and BBS into an EnumerationTypes namespace * Make sure old pickles work * Adds pickle for backwards compatibility * Moves to uint64_t from size_t for public api * Whups, accidentally used the binary archiver. * Commits boost 1.55 serialization * Makes serialization turnoffable Like Filter Catalog * Fixes tests when serialization not available. Adds more enumeration strategy tests * Fixes a syntax error on some versions of python * Fixes sanitizeRxn to actually make proper RGroup atoms * Updates SanitizeRXN python API * Updates Enumeration API to a parameter class - fixes reagent removal * Adds a mess of tests * Change stats to return a string. * Exposes EvenPairSamplingStrategy Stats to python * Fixes a crash bug in SanitizeRxn * Adds better testing of the even pair sampling * Fixes namespace * One more try to fix gcc * Enum classes are c++11 and a microsoft extension. * Fix typo * Fixes np.median for python3 * Fixes atom iterators * Adds virtual tags to derived virtual functions (for clarity) * Fixes size comparison issues * Adds doc string * Small cleanup (has no effect since flags aren’t used) * fixes crash bug on windows * get the tests working on windows * Updates tutorial * Adds Glare implementation to Contrib
This commit is contained in:
committed by
Greg Landrum
parent
1b946794f0
commit
fa89438358
@@ -1,21 +1,47 @@
|
||||
if(RDK_USE_BOOST_SERIALIZATION AND Boost_SERIALIZATION_LIBRARY)
|
||||
ADD_DEFINITIONS("-DRDK_USE_BOOST_SERIALIZATION")
|
||||
else()
|
||||
message("== Making EnumerateLibrary without boost Serialization support")
|
||||
endif()
|
||||
|
||||
rdkit_library(ChemReactions
|
||||
Reaction.cpp MDLParser.cpp DaylightParser.cpp ReactionPickler.cpp
|
||||
ReactionWriter.cpp ReactionDepict.cpp ReactionFingerprints.cpp ReactionUtils.cpp MoleculeParser.cpp ReactionRunner.cpp PreprocessRxn.cpp
|
||||
LINK_LIBRARIES FilterCatalog Descriptors Fingerprints DataStructs Depictor FileParsers SubstructMatch ChemTransforms)
|
||||
ReactionWriter.cpp ReactionDepict.cpp ReactionFingerprints.cpp ReactionUtils.cpp MoleculeParser.cpp ReactionRunner.cpp PreprocessRxn.cpp SanitizeRxn.cpp
|
||||
Enumerate/Enumerate.cpp
|
||||
Enumerate/EnumerationPickler.cpp
|
||||
Enumerate/EvenSamplePairs.cpp
|
||||
|
||||
LINK_LIBRARIES
|
||||
FilterCatalog Descriptors Fingerprints DataStructs Depictor
|
||||
FileParsers SubstructMatch ChemTransforms ${Boost_SERIALIZATION_LIBRARY})
|
||||
|
||||
rdkit_headers(Reaction.h
|
||||
ReactionParser.h
|
||||
ReactionPickler.h
|
||||
ReactionFingerprints.h
|
||||
ReactionUtils.h
|
||||
ReactionRunner.h PreprocessRxn.h DEST GraphMol/ChemReactions)
|
||||
ReactionRunner.h
|
||||
PreprocessRxn.h
|
||||
SanitizeRxn.h
|
||||
Enumerate/Enumerate.h
|
||||
Enumerate/EnumerateBase.h
|
||||
Enumerate/EnumerationPickler.h
|
||||
Enumerate/EnumerationStrategyBase.h
|
||||
Enumerate/CartesianProduct.h
|
||||
Enumerate/RandomSample.h
|
||||
Enumerate/RandomSampleAllBBs.h
|
||||
DEST GraphMol/ChemReactions)
|
||||
|
||||
rdkit_test(testReaction testReaction.cpp LINK_LIBRARIES
|
||||
ChemReactions FilterCatalog ChemTransforms Descriptors Fingerprints Subgraphs DataStructs Depictor FileParsers SmilesParse SubstructMatch
|
||||
GraphMol RDGeneral RDGeometryLib )
|
||||
ChemReactions ChemTransforms Descriptors Fingerprints Subgraphs DataStructs Depictor FileParsers SmilesParse SubstructMatch
|
||||
GraphMol RDGeneral RDGeometryLib ${Boost_SERIALIZATION_LIBRARY} )
|
||||
|
||||
rdkit_test(testReactionFingerprints testReactionFingerprints.cpp LINK_LIBRARIES
|
||||
ChemReactions FilterCatalog Descriptors Fingerprints Subgraphs DataStructs ChemTransforms Depictor FileParsers SmilesParse SubstructMatch
|
||||
GraphMol RDGeneral RDGeometryLib )
|
||||
ChemReactions Descriptors Fingerprints Subgraphs DataStructs ChemTransforms Depictor FileParsers SmilesParse SubstructMatch
|
||||
GraphMol RDGeneral RDGeometryLib ${Boost_SERIALIZATION_LIBRARY} )
|
||||
|
||||
rdkit_test(testEnumeration Enumerate/testEnumerate.cpp LINK_LIBRARIES
|
||||
ChemReactions ChemTransforms Descriptors Fingerprints Subgraphs DataStructs Depictor FileParsers SmilesParse SubstructMatch
|
||||
GraphMol RDGeneral RDGeometryLib ${Boost_SERIALIZATION_LIBRARY} )
|
||||
|
||||
add_subdirectory(Wrap)
|
||||
|
||||
145
Code/GraphMol/ChemReactions/Enumerate/CartesianProduct.h
Normal file
145
Code/GraphMol/ChemReactions/Enumerate/CartesianProduct.h
Normal file
@@ -0,0 +1,145 @@
|
||||
//
|
||||
// Copyright (c) 2015, Novartis Institutes for BioMedical Research Inc.
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following
|
||||
// disclaimer in the documentation and/or other materials provided
|
||||
// with the distribution.
|
||||
// * Neither the name of Novartis Institutes for BioMedical Research Inc.
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
|
||||
#ifndef CARTESIANPRODUCT_H
|
||||
#define CARTESIANPRODUCT_H
|
||||
|
||||
#include "EnumerationStrategyBase.h"
|
||||
|
||||
namespace RDKit {
|
||||
//! This is a class for enumerating reagents using Cartesian Products of
|
||||
// reagents.
|
||||
/*!
|
||||
CartesianProductStrategy produces a standard walk through all possible
|
||||
reagent combinations:
|
||||
|
||||
(0,0,0), (1,0,0), (2,0,0) ...
|
||||
|
||||
basic usage:
|
||||
|
||||
\verbatim
|
||||
std::vector<MOL_SPTR_VECT> bbs;
|
||||
bbs.push_back( bbs_for_reactants_1 );
|
||||
bbs.push_back( bbs_for_reactants_2 );
|
||||
|
||||
RGRUOPS num_bbs;
|
||||
num_bbs.push_back(bbs[0].size());
|
||||
num_bbs.push_back(bbs[1].size());
|
||||
|
||||
CartesianProductStrategy rgroups(num_bbs);
|
||||
for(size_t i=0; i<num_samples && rgroups; ++i) {
|
||||
MOL_SPTR_VECT rvect = getReactantsFromRGroups(bbs, rgroups.next());
|
||||
std::vector<MOL_SPTR_VECT> lprops = rxn.RunReactants(rvect);
|
||||
...
|
||||
}
|
||||
\endverbatim
|
||||
|
||||
See EnumerationStrategyBase for more details and usage.
|
||||
*/
|
||||
|
||||
class CartesianProductStrategy : public EnumerationStrategyBase {
|
||||
size_t m_numPermutationsProcessed;
|
||||
|
||||
public:
|
||||
CartesianProductStrategy()
|
||||
: EnumerationStrategyBase(), m_numPermutationsProcessed() {}
|
||||
|
||||
using EnumerationStrategyBase::initialize;
|
||||
|
||||
virtual void initializeStrategy(const ChemicalReaction &, const EnumerationTypes::BBS &) {
|
||||
m_numPermutationsProcessed = 0;
|
||||
}
|
||||
|
||||
virtual const char *type() const { return "CartesianProductStrategy"; }
|
||||
|
||||
//! The current permutation {r1, r2, ...}
|
||||
virtual const EnumerationTypes::RGROUPS &next() {
|
||||
if (m_numPermutationsProcessed) {
|
||||
increment();
|
||||
} else
|
||||
++m_numPermutationsProcessed;
|
||||
|
||||
return m_permutation;
|
||||
}
|
||||
|
||||
virtual boost::uint64_t getPermutationIdx() const {
|
||||
return m_numPermutationsProcessed; }
|
||||
|
||||
virtual operator bool() const { return hasNext(); }
|
||||
|
||||
EnumerationStrategyBase *copy() const {
|
||||
return new CartesianProductStrategy(*this);
|
||||
}
|
||||
|
||||
private:
|
||||
void increment() {
|
||||
next(0);
|
||||
++m_numPermutationsProcessed;
|
||||
}
|
||||
|
||||
bool hasNext() const {
|
||||
// Fix me -> use multiprecision int here???
|
||||
if (m_numPermutations == EnumerationStrategyBase::EnumerationOverflow ||
|
||||
m_numPermutationsProcessed < rdcast<size_t>(m_numPermutations)) {
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
void next(size_t rowToIncrement) {
|
||||
if (!hasNext()) return;
|
||||
m_permutation[rowToIncrement] += 1;
|
||||
size_t max_index_of_row = m_permutationSizes[rowToIncrement] - 1;
|
||||
if (m_permutation[rowToIncrement] > max_index_of_row) {
|
||||
m_permutation[rowToIncrement] = 0;
|
||||
next(rowToIncrement + 1);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
#ifdef RDK_USE_BOOST_SERIALIZATION
|
||||
friend class boost::serialization::access;
|
||||
template <class Archive>
|
||||
void serialize(Archive &ar, const unsigned int /*version*/) {
|
||||
ar &boost::serialization::base_object<EnumerationStrategyBase>(*this);
|
||||
ar &m_numPermutationsProcessed;
|
||||
}
|
||||
#endif
|
||||
};
|
||||
}
|
||||
|
||||
#ifdef RDK_USE_BOOST_SERIALIZATION
|
||||
BOOST_CLASS_VERSION(RDKit::CartesianProductStrategy, 1)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
259
Code/GraphMol/ChemReactions/Enumerate/Enumerate.cpp
Normal file
259
Code/GraphMol/ChemReactions/Enumerate/Enumerate.cpp
Normal file
@@ -0,0 +1,259 @@
|
||||
//
|
||||
// Copyright (c) 2015, Novartis Institutes for BioMedical Research Inc.
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following
|
||||
// disclaimer in the documentation and/or other materials provided
|
||||
// with the distribution.
|
||||
// * Neither the name of Novartis Institutes for BioMedical Research Inc.
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
|
||||
#include "Enumerate.h"
|
||||
#include "CartesianProduct.h"
|
||||
#include "RandomSample.h"
|
||||
#include "RandomSampleAllBBs.h"
|
||||
#include "EvenSamplePairs.h"
|
||||
#include "../ReactionPickler.h"
|
||||
#include <GraphMol/MolPickler.h>
|
||||
#include <GraphMol/SmilesParse/SmilesWrite.h>
|
||||
|
||||
// Since we are exporting the classes for serialization,
|
||||
// we should declare the archives types used here
|
||||
#ifdef RDK_USE_BOOST_SERIALIZATION
|
||||
#include <RDGeneral/BoostStartInclude.h>
|
||||
#include <boost/archive/text_oarchive.hpp>
|
||||
#include <boost/archive/text_iarchive.hpp>
|
||||
#include <boost/serialization/shared_ptr.hpp>
|
||||
#include <boost/serialization/export.hpp>
|
||||
#include <RDGeneral/BoostEndInclude.h>
|
||||
|
||||
BOOST_CLASS_EXPORT(RDKit::EnumerationStrategyBase);
|
||||
BOOST_CLASS_EXPORT(RDKit::CartesianProductStrategy);
|
||||
BOOST_CLASS_EXPORT(RDKit::RandomSampleStrategy);
|
||||
BOOST_CLASS_EXPORT(RDKit::RandomSampleAllBBsStrategy);
|
||||
BOOST_CLASS_EXPORT(RDKit::EvenSamplePairsStrategy);
|
||||
BOOST_CLASS_EXPORT(RDKit::EnumerateLibrary);
|
||||
#endif
|
||||
|
||||
namespace RDKit {
|
||||
using namespace EnumerationTypes;
|
||||
|
||||
const RGROUPS &EnumerateLibraryBase::getPosition() const {
|
||||
return m_enumerator->getPosition();
|
||||
}
|
||||
|
||||
std::string EnumerateLibraryBase::getState() const {
|
||||
PRECONDITION(m_enumerator.get(), "Null Enumerator");
|
||||
std::string state;
|
||||
EnumerationStrategyPickler::pickle(m_enumerator, state);
|
||||
return state;
|
||||
}
|
||||
|
||||
void EnumerateLibraryBase::setState(const std::string &state) {
|
||||
m_enumerator = EnumerationStrategyPickler::fromPickle(state);
|
||||
}
|
||||
|
||||
void EnumerateLibraryBase::resetState() {
|
||||
PRECONDITION(m_initialEnumerator.get(),
|
||||
"Unset initial enumerator");
|
||||
m_enumerator.reset(m_initialEnumerator->copy());
|
||||
}
|
||||
|
||||
std::vector<std::vector<std::string> > EnumerateLibraryBase::nextSmiles() {
|
||||
std::vector<std::vector<std::string> > result;
|
||||
std::vector<MOL_SPTR_VECT> mols = next();
|
||||
const bool doisomeric = true;
|
||||
result.resize(mols.size());
|
||||
for (size_t i = 0; i < mols.size(); ++i) {
|
||||
result[i].resize(mols[i].size());
|
||||
for (size_t j = 0; j < mols[i].size(); ++j) {
|
||||
if (mols[i][j].get()) result[i][j] = MolToSmiles(*mols[i][j], doisomeric);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
namespace {
|
||||
size_t countMatches( const ROMol& bb, const ROMol& query, int maxMatches) {
|
||||
std::vector<MatchVectType> matches;
|
||||
const bool uniquify = true;
|
||||
const bool useChirality = true;
|
||||
const bool useQueryQueryMatches = false;
|
||||
|
||||
SubstructMatch(bb, query, matches,
|
||||
uniquify, true, useChirality, useQueryQueryMatches,
|
||||
maxMatches+1);
|
||||
return matches.size();
|
||||
}
|
||||
}
|
||||
BBS removeNonmatchingReagents(const ChemicalReaction &rxn, BBS bbs,
|
||||
const EnumerationParams ¶ms) {
|
||||
PRECONDITION(bbs.size() <= rxn.getNumReactantTemplates(),
|
||||
"Number of Reagents not compatible with reaction templates");
|
||||
BBS result;
|
||||
result.resize(bbs.size());
|
||||
|
||||
for(size_t reactant_idx=0; reactant_idx < bbs.size(); ++reactant_idx) {
|
||||
size_t removedCount = 0;
|
||||
const unsigned int maxMatches = (params.reagentMaxMatchCount == INT_MAX) ?
|
||||
0 : rdcast<unsigned int>(params.reagentMaxMatchCount);
|
||||
|
||||
ROMOL_SPTR reactantTemplate = rxn.getReactants()[reactant_idx];
|
||||
for(size_t reagent_idx = 0; reagent_idx < bbs[reactant_idx].size(); ++reagent_idx) {
|
||||
ROMOL_SPTR mol = bbs[reactant_idx][reagent_idx];
|
||||
size_t matches = countMatches(*mol.get(), *reactantTemplate.get(), maxMatches);
|
||||
|
||||
bool removeReagent = false;
|
||||
if(!matches || matches > rdcast<size_t>(params.reagentMaxMatchCount)) {
|
||||
removeReagent = true;
|
||||
}
|
||||
|
||||
if(!removeReagent && params.sanePartialProducts) {
|
||||
// see if we have any sane products in the results
|
||||
std::vector<MOL_SPTR_VECT> partialProducts = rxn.runReactant(mol, reactant_idx);
|
||||
for(size_t productTemplate_idx = 0;
|
||||
productTemplate_idx < partialProducts.size();
|
||||
++productTemplate_idx) {
|
||||
int saneProducts = 0;
|
||||
for(size_t product_idx = 0;
|
||||
product_idx < partialProducts[productTemplate_idx].size();
|
||||
++product_idx) {
|
||||
try {
|
||||
RWMol *m = dynamic_cast<RWMol*>(
|
||||
partialProducts[productTemplate_idx][product_idx].get());
|
||||
MolOps::sanitizeMol(*m);
|
||||
saneProducts++;
|
||||
} catch (...) {
|
||||
}
|
||||
}
|
||||
|
||||
if (!saneProducts) {
|
||||
// if any product template has no sane products, we bail
|
||||
removeReagent = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(removeReagent)
|
||||
removedCount++;
|
||||
else
|
||||
result[reactant_idx].push_back(mol);
|
||||
}
|
||||
|
||||
|
||||
if(removedCount) {
|
||||
BOOST_LOG(rdInfoLog) << "Removed " << removedCount <<
|
||||
" non matching reagents at template " << reactant_idx << std::endl;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
EnumerateLibrary::EnumerateLibrary(const ChemicalReaction &rxn, const BBS &bbs,
|
||||
const EnumerationParams ¶ms)
|
||||
: EnumerateLibraryBase(rxn, new CartesianProductStrategy),
|
||||
m_bbs(removeNonmatchingReagents(m_rxn, bbs, params)) {
|
||||
m_enumerator->initialize(m_rxn, m_bbs); // getSizesFromBBs(bbs));
|
||||
m_initialEnumerator.reset(m_enumerator->copy());
|
||||
}
|
||||
|
||||
EnumerateLibrary::EnumerateLibrary(const ChemicalReaction &rxn, const BBS &bbs,
|
||||
const EnumerationStrategyBase &enumerator,
|
||||
const EnumerationParams ¶ms)
|
||||
: EnumerateLibraryBase(rxn),
|
||||
m_bbs(removeNonmatchingReagents(m_rxn, bbs, params)) {
|
||||
m_enumerator.reset(enumerator.copy());
|
||||
m_enumerator->initialize(m_rxn, m_bbs);
|
||||
m_initialEnumerator.reset(m_enumerator->copy());
|
||||
}
|
||||
|
||||
EnumerateLibrary::EnumerateLibrary(const EnumerateLibrary &rhs)
|
||||
: EnumerateLibraryBase(rhs), m_bbs(rhs.m_bbs) {}
|
||||
|
||||
std::vector<MOL_SPTR_VECT> EnumerateLibrary::next() {
|
||||
PRECONDITION(static_cast<bool>(*this), "No more enumerations");
|
||||
const RGROUPS &reactantIndices = m_enumerator->next();
|
||||
MOL_SPTR_VECT reactants(m_bbs.size());
|
||||
|
||||
for (size_t i = 0; i < m_bbs.size(); ++i) {
|
||||
reactants[i] = m_bbs[i][reactantIndices[i]];
|
||||
}
|
||||
|
||||
return m_rxn.runReactants(reactants);
|
||||
}
|
||||
|
||||
void EnumerateLibrary::toStream(std::ostream &ss) const {
|
||||
#ifdef RDK_USE_BOOST_SERIALIZATION
|
||||
boost::archive::text_oarchive ar(ss);
|
||||
ar << *this;
|
||||
#else
|
||||
PRECONDITION(0, "BOOST SERIALIZATION NOT INSTALLED");
|
||||
#endif
|
||||
}
|
||||
|
||||
void EnumerateLibrary::initFromStream(std::istream &ss) {
|
||||
#ifdef RDK_USE_BOOST_SERIALIZATION
|
||||
boost::archive::text_iarchive ar(ss);
|
||||
ar >> *this;
|
||||
#else
|
||||
PRECONDITION(0, "BOOST SERIALIZATION NOT INSTALLED");
|
||||
#endif
|
||||
}
|
||||
|
||||
boost::uint64_t computeNumProducts(const RGROUPS &sizes) {
|
||||
boost::multiprecision::cpp_int myint = 1;
|
||||
|
||||
for (size_t i = 0; i < sizes.size(); ++i) {
|
||||
myint *= sizes[i];
|
||||
}
|
||||
|
||||
if (myint < std::numeric_limits<boost::uint64_t>::max())
|
||||
return myint.convert_to<boost::uint64_t>();
|
||||
else
|
||||
return EnumerationStrategyBase::EnumerationOverflow;
|
||||
}
|
||||
|
||||
MOL_SPTR_VECT getReactantsFromRGroups(const std::vector<MOL_SPTR_VECT> &bbs,
|
||||
const RGROUPS &rgroups) {
|
||||
PRECONDITION(bbs.size() == rgroups.size(),
|
||||
"BBS and RGROUPS must have the same # reactants");
|
||||
MOL_SPTR_VECT result;
|
||||
result.reserve(bbs.size());
|
||||
for (size_t i = 0; i < bbs.size(); ++i) {
|
||||
result.push_back(bbs[i][rgroups[i]]);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
bool EnumerateLibraryCanSerialize() {
|
||||
#ifdef RDK_USE_BOOST_SERIALIZATION
|
||||
return true;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
}
|
||||
183
Code/GraphMol/ChemReactions/Enumerate/Enumerate.h
Normal file
183
Code/GraphMol/ChemReactions/Enumerate/Enumerate.h
Normal file
@@ -0,0 +1,183 @@
|
||||
//
|
||||
// Copyright (c) 2015, Novartis Institutes for BioMedical Research Inc.
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following
|
||||
// disclaimer in the documentation and/or other materials provided
|
||||
// with the distribution.
|
||||
// * Neither the name of Novartis Institutes for BioMedical Research Inc.
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.n
|
||||
//
|
||||
#ifndef RDKIT_ENUMERATE_H
|
||||
#define RDKIT_ENUMERATE_H
|
||||
#include "EnumerateBase.h"
|
||||
|
||||
namespace RDKit {
|
||||
|
||||
//! This is a class for providing enumeration options that control
|
||||
// how enumerations are performed.
|
||||
/*!
|
||||
Option
|
||||
reagentMaxMatchCount [default INT_MAX]
|
||||
This specifies how many times the reactant template can match a reagent.
|
||||
|
||||
sanePartialProducts [default false]
|
||||
If true, forces all products of the reagent plus the product templates\n\
|
||||
pass chemical sanitization. Note that if the product template itself\n\
|
||||
does not pass sanitization, then none of the products will.
|
||||
*/
|
||||
struct EnumerationParams
|
||||
{
|
||||
int reagentMaxMatchCount;
|
||||
bool sanePartialProducts;
|
||||
EnumerationParams() :
|
||||
reagentMaxMatchCount(INT_MAX), sanePartialProducts(false) {
|
||||
}
|
||||
|
||||
EnumerationParams(const EnumerationParams &rhs) :
|
||||
reagentMaxMatchCount(rhs.reagentMaxMatchCount),
|
||||
sanePartialProducts(rhs.sanePartialProducts) {
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
//! Helper function, remove reagents that are incompatible
|
||||
// with the reaction.
|
||||
// rxn must be sanitized, initialized and preprocessed.
|
||||
// this happens automatically in EnumerateLibrary
|
||||
EnumerationTypes::BBS removeNonmatchingReagents(
|
||||
const ChemicalReaction &rxn,
|
||||
EnumerationTypes::BBS bbs,
|
||||
const EnumerationParams ¶ms=EnumerationParams());
|
||||
|
||||
//! This is a class for running reactions on sets of reagents.
|
||||
/*!
|
||||
This class is a fully self contained reaction engine that can be
|
||||
serialized and restarted. For example, a million products can
|
||||
be generated, the engine can be saved for later and reloaded
|
||||
to retrieve the next million products.
|
||||
|
||||
basic usage will be something like:
|
||||
\verbatim
|
||||
ChemicalReaction rxn = ...
|
||||
BBS bbs(num_rgroups);
|
||||
... somehow LoadRGroups(bbs[0]);
|
||||
... somehow LoadRGroups(bbs[1]..);
|
||||
...
|
||||
EnumerateLibrary enumerator(en, bbs);
|
||||
for(; (bool)en; ++i) {
|
||||
// This is the same as rxn.run_Reactants( reagents );
|
||||
std::vector<MOL_SPTR_VECT> products = en.next();
|
||||
...
|
||||
}
|
||||
\endverbatim
|
||||
|
||||
In general, reactions will enumerate to more products than desired,
|
||||
a standard use is:
|
||||
|
||||
\verbatim
|
||||
for(int i=0;i<num_samples && (bool)en; ++i) {
|
||||
std::vector<MOL_SPTR_VECT> products = en.next();
|
||||
...
|
||||
}
|
||||
\endverbatim
|
||||
*/
|
||||
|
||||
|
||||
class EnumerateLibrary : public EnumerateLibraryBase {
|
||||
EnumerationTypes::BBS m_bbs;
|
||||
|
||||
public:
|
||||
EnumerateLibrary() : EnumerateLibraryBase(), m_bbs() {}
|
||||
EnumerateLibrary(const std::string &s) : EnumerateLibraryBase(), m_bbs() {
|
||||
initFromString(s);
|
||||
}
|
||||
|
||||
EnumerateLibrary(const ChemicalReaction &rxn,
|
||||
const EnumerationTypes::BBS &reagents,
|
||||
const EnumerationParams & params = EnumerationParams());
|
||||
EnumerateLibrary(const ChemicalReaction &rxn,
|
||||
const EnumerationTypes::BBS &reagents,
|
||||
const EnumerationStrategyBase &enumerator,
|
||||
const EnumerationParams & params = EnumerationParams());
|
||||
EnumerateLibrary(const EnumerateLibrary &rhs);
|
||||
|
||||
//! Return the reagents used in the library
|
||||
const EnumerationTypes::BBS &getReagents() const { return m_bbs; }
|
||||
|
||||
//! Get the next product set
|
||||
std::vector<MOL_SPTR_VECT> next();
|
||||
|
||||
void toStream(std::ostream &ss) const;
|
||||
void initFromStream(std::istream &ss);
|
||||
|
||||
private:
|
||||
#ifdef RDK_USE_BOOST_SERIALIZATION
|
||||
friend class boost::serialization::access;
|
||||
template <class Archive>
|
||||
void save(Archive &ar, const unsigned int /*version*/) const {
|
||||
ar &boost::serialization::base_object<EnumerateLibraryBase>(*this);
|
||||
size_t sz = m_bbs.size();
|
||||
ar &sz;
|
||||
|
||||
std::string pickle;
|
||||
for (size_t i = 0; i < m_bbs.size(); ++i) {
|
||||
sz = m_bbs[i].size();
|
||||
ar &sz;
|
||||
for (size_t j = 0; j < m_bbs[i].size(); ++j) {
|
||||
MolPickler::pickleMol(*m_bbs[i][j], pickle);
|
||||
ar &pickle;
|
||||
}
|
||||
}
|
||||
}
|
||||
template <class Archive>
|
||||
void load(Archive &ar, const unsigned int /*version*/) {
|
||||
ar &boost::serialization::base_object<EnumerateLibraryBase>(*this);
|
||||
|
||||
size_t sz;
|
||||
ar &sz;
|
||||
|
||||
m_bbs.resize(sz);
|
||||
|
||||
for (size_t i = 0; i < m_bbs.size(); ++i) {
|
||||
ar &sz;
|
||||
m_bbs[i].resize(sz);
|
||||
std::string pickle;
|
||||
for (size_t j = 0; j < m_bbs[i].size(); ++j) {
|
||||
ar &pickle;
|
||||
RWMol *mol = new RWMol();
|
||||
MolPickler::molFromPickle(pickle, *mol);
|
||||
m_bbs[i][j].reset(mol);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
BOOST_SERIALIZATION_SPLIT_MEMBER();
|
||||
#endif
|
||||
};
|
||||
|
||||
bool EnumerateLibraryCanSerialize();
|
||||
|
||||
}
|
||||
#endif
|
||||
200
Code/GraphMol/ChemReactions/Enumerate/EnumerateBase.h
Normal file
200
Code/GraphMol/ChemReactions/Enumerate/EnumerateBase.h
Normal file
@@ -0,0 +1,200 @@
|
||||
//
|
||||
// Copyright (c) 2015, Novartis Institutes for BioMedical Research Inc.
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following
|
||||
// disclaimer in the documentation and/or other materials provided
|
||||
// with the distribution.
|
||||
// * Neither the name of Novartis Institutes for BioMedical Research Inc.
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
#ifndef RDKIT_ENUMERATEBASE_H
|
||||
#define RDKIT_ENUMERATEBASE_H
|
||||
|
||||
#include <vector>
|
||||
#include "EnumerateTypes.h"
|
||||
#include "../Reaction.h"
|
||||
#include "EnumerationPickler.h"
|
||||
|
||||
#include "EnumerationStrategyBase.h"
|
||||
#include "CartesianProduct.h"
|
||||
#include "../ReactionPickler.h"
|
||||
#include <GraphMol/MolPickler.h>
|
||||
|
||||
namespace RDKit {
|
||||
//! Base class for enumerating chemical reactions from collections of
|
||||
// building blocks and reagents.
|
||||
/*!
|
||||
basic usage:
|
||||
|
||||
\verbatim
|
||||
EnumerateLibraryBase &enumerator;
|
||||
while (enumerator) {
|
||||
MOL_SPTR_VECT res = enumerator.next();
|
||||
// do something with enumeration products here
|
||||
}
|
||||
\endverbatim
|
||||
|
||||
See Reaction.h for more details on how ChemicalReactions are
|
||||
used.
|
||||
*/
|
||||
class EnumerateLibraryBase {
|
||||
protected:
|
||||
ChemicalReaction m_rxn;
|
||||
boost::shared_ptr<EnumerationStrategyBase> m_enumerator;
|
||||
boost::shared_ptr<EnumerationStrategyBase> m_initialEnumerator;
|
||||
public:
|
||||
//! default constructor
|
||||
EnumerateLibraryBase() : m_rxn(),
|
||||
m_enumerator(),
|
||||
m_initialEnumerator() {}
|
||||
|
||||
//! construct with a chemical reaction and an enumeration strategy
|
||||
EnumerateLibraryBase(const ChemicalReaction &rxn,
|
||||
EnumerationStrategyBase *enumerator = 0)
|
||||
: m_rxn(rxn),
|
||||
m_enumerator(enumerator ? enumerator : new CartesianProductStrategy),
|
||||
m_initialEnumerator( m_enumerator->copy() )
|
||||
{
|
||||
m_rxn.initReactantMatchers();
|
||||
}
|
||||
|
||||
//! Copy constructor
|
||||
EnumerateLibraryBase(const EnumerateLibraryBase &rhs)
|
||||
: m_rxn(rhs.m_rxn),
|
||||
m_enumerator(rhs.m_enumerator ? rhs.m_enumerator->copy() : 0),
|
||||
m_initialEnumerator( m_enumerator->copy() ) {}
|
||||
|
||||
virtual ~EnumerateLibraryBase() {}
|
||||
|
||||
//! Are there any enumerations left?
|
||||
virtual operator bool() const {
|
||||
PRECONDITION(m_enumerator.get(), "Null enumeration strategy");
|
||||
return static_cast<bool>(*m_enumerator);
|
||||
}
|
||||
|
||||
//! reset the enumeration to the beginning.
|
||||
void reset() {
|
||||
if(m_initialEnumerator.get()) {
|
||||
m_enumerator.reset(m_initialEnumerator->copy());
|
||||
}
|
||||
}
|
||||
|
||||
//! returns the underlying chemical reaction
|
||||
const ChemicalReaction &getReaction() const { return m_rxn; }
|
||||
|
||||
//! return the current enumeration strategy
|
||||
const EnumerationStrategyBase &getEnumerator() {
|
||||
PRECONDITION(m_enumerator.get(), "Null Enumerator");
|
||||
return *m_enumerator;
|
||||
}
|
||||
|
||||
//! get the next set of products (See run_Reactants) for details
|
||||
// This returns a vector of a vector of molecules.
|
||||
// Each result vector corresponds for a product template.
|
||||
// i.e.
|
||||
// res = library.next();
|
||||
// res[0] are the results for library.getReaction().getProdcts()[0]
|
||||
virtual std::vector<MOL_SPTR_VECT> next() = 0;
|
||||
|
||||
//! get the next set of products as smiles
|
||||
// This returns a vector of a vector strings.
|
||||
// Each result vector corresponds for a product template.
|
||||
virtual std::vector<std::vector<std::string> > nextSmiles();
|
||||
|
||||
//! Get the current position into the reagent vectors
|
||||
// Use getState/setState to save/restart the enumeration
|
||||
// from this position.
|
||||
const EnumerationTypes::RGROUPS &getPosition() const;
|
||||
|
||||
//! Get the current state of the enumerator
|
||||
// This is the position of the enumerator and the enumerators
|
||||
// state that can be used to restart enumerating
|
||||
// from this position.
|
||||
std::string getState() const;
|
||||
|
||||
//! Set the current state of the enumerator
|
||||
// Restart the enumerator from this position.
|
||||
void setState(const std::string &);
|
||||
|
||||
//! Reset the enumerator to the beginning
|
||||
void resetState();
|
||||
|
||||
|
||||
//! serializes (pickles) to a stream
|
||||
virtual void toStream(std::ostream &ss) const = 0;
|
||||
|
||||
//! returns a string with a serialized (pickled) representation
|
||||
virtual std::string Serialize() const {
|
||||
std::stringstream ss;
|
||||
toStream(ss);
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
//! initializes from a stream pickle
|
||||
virtual void initFromStream(std::istream &ss) = 0;
|
||||
|
||||
//! initializes from a string pickle
|
||||
virtual void initFromString(const std::string &text) {
|
||||
std::stringstream ss(text);
|
||||
initFromStream(ss);
|
||||
}
|
||||
|
||||
private:
|
||||
#ifdef RDK_USE_BOOST_SERIALIZATION
|
||||
friend class boost::serialization::access;
|
||||
template <class Archive>
|
||||
void save(Archive &ar, const unsigned int) const {
|
||||
std::string pickle;
|
||||
ReactionPickler::pickleReaction(m_rxn, pickle);
|
||||
ar &pickle;
|
||||
ar &m_enumerator;
|
||||
// we handle the m_initialEnumerator from a string
|
||||
// for backwards compatibility with a unreleased
|
||||
// version
|
||||
EnumerationStrategyPickler::pickle(m_initialEnumerator,
|
||||
pickle);
|
||||
ar &pickle;
|
||||
}
|
||||
template <class Archive>
|
||||
void load(Archive &ar, const unsigned int /*version*/) {
|
||||
std::string pickle;
|
||||
ar &pickle;
|
||||
ReactionPickler::reactionFromPickle(pickle, m_rxn);
|
||||
ar &m_enumerator;
|
||||
ar &pickle;
|
||||
m_initialEnumerator = \
|
||||
EnumerationStrategyPickler::fromPickle(pickle);
|
||||
|
||||
}
|
||||
|
||||
BOOST_SERIALIZATION_SPLIT_MEMBER();
|
||||
#endif
|
||||
};
|
||||
|
||||
#ifdef RDK_USE_BOOST_SERIALIZATION
|
||||
BOOST_SERIALIZATION_ASSUME_ABSTRACT(EnumerateLibraryBase)
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
58
Code/GraphMol/ChemReactions/Enumerate/EnumerateTypes.h
Normal file
58
Code/GraphMol/ChemReactions/Enumerate/EnumerateTypes.h
Normal file
@@ -0,0 +1,58 @@
|
||||
//
|
||||
// Copyright (c) 2015, Novartis Institutes for BioMedical Research Inc.
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following
|
||||
// disclaimer in the documentation and/or other materials provided
|
||||
// with the distribution.
|
||||
// * Neither the name of Novartis Institutes for BioMedical Research Inc.
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
#ifndef ENUMERATETYPES_H
|
||||
#define ENUMERATETYPES_H
|
||||
|
||||
#include <GraphMol/RDKitBase.h>
|
||||
|
||||
namespace RDKit {
|
||||
namespace EnumerationTypes {
|
||||
//! BBS - Helper typedef for holding buliding blocks for reactions
|
||||
//! holds vectors of reagents for each reactant in a Reaction
|
||||
typedef std::vector<MOL_SPTR_VECT> BBS;
|
||||
|
||||
//! RGROUPS Helper typedef for indexing into the BBS vector
|
||||
//! - The indices into the BBS molecule list to create a product
|
||||
//! Example
|
||||
//! RGROUPS groups;
|
||||
//! groups.push_back(10);
|
||||
//! groups.push_back(5);
|
||||
//!
|
||||
//! Will create a product from the following building blocks:
|
||||
//! MOL_SPTR_VECT building_blocks;
|
||||
//! building_blocks.push_back( BBS[0][groups[0] );
|
||||
//! building_blocks.push_back( BBS[1][groups[1] );
|
||||
//! rxn.runReactants( building_blocks );
|
||||
typedef std::vector<boost::uint64_t> RGROUPS;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
110
Code/GraphMol/ChemReactions/Enumerate/EnumerationPickler.cpp
Normal file
110
Code/GraphMol/ChemReactions/Enumerate/EnumerationPickler.cpp
Normal file
@@ -0,0 +1,110 @@
|
||||
//
|
||||
// Copyright (c) 2015, Novartis Institutes for BioMedical Research Inc.
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following
|
||||
// disclaimer in the documentation and/or other materials provided
|
||||
// with the distribution.
|
||||
// * Neither the name of Novartis Institutes for BioMedical Research Inc.
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
|
||||
#include "EnumerationPickler.h"
|
||||
#include "CartesianProduct.h"
|
||||
#include "RandomSample.h"
|
||||
#include "RandomSampleAllBBs.h"
|
||||
|
||||
|
||||
#ifdef RDK_USE_BOOST_SERIALIZATION
|
||||
#include <RDGeneral/BoostStartInclude.h>
|
||||
#include <boost/archive/text_oarchive.hpp>
|
||||
#include <boost/archive/text_iarchive.hpp>
|
||||
#include <boost/serialization/shared_ptr.hpp>
|
||||
#include <RDGeneral/BoostEndInclude.h>
|
||||
#endif
|
||||
|
||||
namespace RDKit {
|
||||
|
||||
std::string GetClass(const EnumerationStrategyBase *en) {
|
||||
if (dynamic_cast<const CartesianProductStrategy *>(en)) return "-->cartesian";
|
||||
if (dynamic_cast<const RandomSampleStrategy *>(en)) return "-->random";
|
||||
if (dynamic_cast<const RandomSampleAllBBsStrategy *>(en))
|
||||
return "-->randombbs";
|
||||
return "Unknown!";
|
||||
}
|
||||
|
||||
namespace EnumerationStrategyPickler {
|
||||
|
||||
void pickle(const boost::shared_ptr<EnumerationStrategyBase> &enumerator,
|
||||
std::ostream &ss) {
|
||||
#ifdef RDK_USE_BOOST_SERIALIZATION
|
||||
boost::archive::text_oarchive ar(ss);
|
||||
ar &enumerator;
|
||||
#else
|
||||
RDUNUSED_PARAM(enumerator);
|
||||
RDUNUSED_PARAM(ss);
|
||||
PRECONDITION(0, "BOOST SERIALIZATION NOT INSTALLED");
|
||||
#endif
|
||||
}
|
||||
|
||||
void pickle(const boost::shared_ptr<EnumerationStrategyBase> &enumerator,
|
||||
std::string &s) {
|
||||
#ifdef RDK_USE_BOOST_SERIALIZATION
|
||||
std::stringstream ss;
|
||||
pickle(enumerator, ss);
|
||||
s = ss.str();
|
||||
#else
|
||||
RDUNUSED_PARAM(enumerator);
|
||||
RDUNUSED_PARAM(s);
|
||||
PRECONDITION(0, "BOOST SERIALIZATION NOT INSTALLED");
|
||||
#endif
|
||||
}
|
||||
|
||||
boost::shared_ptr<EnumerationStrategyBase> fromPickle(std::istream &pickle) {
|
||||
boost::shared_ptr<EnumerationStrategyBase> enumerator;
|
||||
#ifdef RDK_USE_BOOST_SERIALIZATION
|
||||
boost::archive::text_iarchive ar(pickle);
|
||||
ar &enumerator;
|
||||
return enumerator;
|
||||
#else
|
||||
RDUNUSED_PARAM(pickle);
|
||||
PRECONDITION(0, "BOOST SERIALIZATION NOT INSTALLED");
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
boost::shared_ptr<EnumerationStrategyBase> fromPickle(
|
||||
const std::string &pickle) {
|
||||
#ifdef RDK_USE_BOOST_SERIALIZATION
|
||||
std::stringstream ss(pickle);
|
||||
return fromPickle(ss);
|
||||
#else
|
||||
RDUNUSED_PARAM(pickle);
|
||||
PRECONDITION(0, "BOOST SERIALIZATION NOT INSTALLED");
|
||||
return boost::shared_ptr<EnumerationStrategyBase>();
|
||||
#endif
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
56
Code/GraphMol/ChemReactions/Enumerate/EnumerationPickler.h
Normal file
56
Code/GraphMol/ChemReactions/Enumerate/EnumerationPickler.h
Normal file
@@ -0,0 +1,56 @@
|
||||
//
|
||||
// Copyright (c) 2015, Novartis Institutes for BioMedical Research Inc.
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following
|
||||
// disclaimer in the documentation and/or other materials provided
|
||||
// with the distribution.
|
||||
// * Neither the name of Novartis Institutes for BioMedical Research Inc.
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
#ifndef ENUMERATIONPICKLER_H
|
||||
#define ENUMERATIONPICKLER_H
|
||||
|
||||
#include "EnumerationStrategyBase.h"
|
||||
|
||||
namespace RDKit {
|
||||
namespace EnumerationStrategyPickler {
|
||||
//! pickles a EnumerationStrategy and adds the results to a stream \c ss
|
||||
void pickle(const boost::shared_ptr<EnumerationStrategyBase> &enumerator,
|
||||
std::ostream &ss);
|
||||
void pickle(const boost::shared_ptr<EnumerationStrategyBase> &enumerator,
|
||||
std::string &s);
|
||||
|
||||
//! constructs a EnumerationStrategy from a pickle stored in a string
|
||||
//! Since an EnumerationStrategyBase is polymorphic, this must return
|
||||
//! a shared pointer to the EnumerationStrategyBase
|
||||
boost::shared_ptr<EnumerationStrategyBase> fromPickle(std::istream &pickle);
|
||||
|
||||
//! a pointer to the EnumerationStrategyBase
|
||||
boost::shared_ptr<EnumerationStrategyBase> fromPickle(
|
||||
const std::string &pickle);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
199
Code/GraphMol/ChemReactions/Enumerate/EnumerationStrategyBase.h
Normal file
199
Code/GraphMol/ChemReactions/Enumerate/EnumerationStrategyBase.h
Normal file
@@ -0,0 +1,199 @@
|
||||
//
|
||||
// Copyright (c) 2015, Novartis Institutes for BioMedical Research Inc.
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following
|
||||
// disclaimer in the documentation and/or other materials provided
|
||||
// with the distribution.
|
||||
// * Neither the name of Novartis Institutes for BioMedical Research Inc.
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
#ifndef ENUMERATION_STRATEGY_H
|
||||
#define ENUMERATION_STRATEGY_H
|
||||
|
||||
#include "EnumerateTypes.h"
|
||||
#include "../Reaction.h"
|
||||
#include <vector>
|
||||
#include <RDGeneral/BoostStartInclude.h>
|
||||
#include <boost/cstdint.hpp>
|
||||
#include <boost/multiprecision/cpp_int.hpp>
|
||||
#include <boost/serialization/assume_abstract.hpp>
|
||||
#include <boost/serialization/vector.hpp>
|
||||
#include <boost/serialization/shared_ptr.hpp>
|
||||
#include <RDGeneral/BoostEndInclude.h>
|
||||
|
||||
#include <GraphMol/RDKitBase.h>
|
||||
|
||||
namespace RDKit {
|
||||
|
||||
//! class for flagging enumeration strategy errors
|
||||
class EnumerationStrategyException : public std::exception {
|
||||
public:
|
||||
EnumerationStrategyException(const char *msg) : _msg(msg){};
|
||||
EnumerationStrategyException(const std::string &msg) : _msg(msg){};
|
||||
const char *message() const { return _msg.c_str(); };
|
||||
~EnumerationStrategyException() throw(){};
|
||||
|
||||
private:
|
||||
std::string _msg;
|
||||
};
|
||||
|
||||
//! Return the number of elements per input vector
|
||||
/*! \param bbs vector<vector<T> >
|
||||
|
||||
\result vector<unint64_t> number of elements in each vector
|
||||
*/
|
||||
template <class T>
|
||||
EnumerationTypes::RGROUPS getSizesFromBBs(const std::vector<std::vector<T> > &bbs) {
|
||||
EnumerationTypes::RGROUPS sizes;
|
||||
for (size_t i = 0; i < bbs.size(); ++i) sizes.push_back(bbs[i].size());
|
||||
return sizes;
|
||||
}
|
||||
|
||||
//! getSizesFromReactants
|
||||
//! Helper function for enumeration, bbs are stored in a
|
||||
//! std::vector< std::vector<boost:shared_ptr<ROMol> >
|
||||
//
|
||||
EnumerationTypes::RGROUPS getSizesFromReactants(const std::vector<MOL_SPTR_VECT> &bbs);
|
||||
|
||||
//! getReactantsFromRGroups
|
||||
//! Helper function for enumeration, bbs are stored in a
|
||||
//! std::vector< std::vector<boost:shared_ptr<ROMol> >
|
||||
//
|
||||
MOL_SPTR_VECT getReactantsFromRGroups(const std::vector<MOL_SPTR_VECT> &bbs,
|
||||
const EnumerationTypes::RGROUPS &rgroups);
|
||||
|
||||
//! computeNumProducts
|
||||
//! Returns the number of possible product combination from
|
||||
//! The given numbers of building blocks for each rgroup
|
||||
//! or EnumerationStrategyBase::EnumerationOverflow if the
|
||||
//! number will not fit into the machines integer type.
|
||||
//! n.b. An overflow simply means there are a lot of products
|
||||
//! not that they cannot be enumerated
|
||||
boost::uint64_t computeNumProducts(const EnumerationTypes::RGROUPS &sizes);
|
||||
|
||||
//! Base Class for enumeration strageties
|
||||
//! Usage:
|
||||
//! EnumerationStrategyBase must be initialized with both a reaction
|
||||
//! and the building block (molecule) vector to be sampled.
|
||||
//!
|
||||
//! \verbatim
|
||||
//! EnumerationStrategyBase &eb = ...
|
||||
//! if(eb) { // can we get another entry
|
||||
//! const std::vector<int> &v = eb.next();
|
||||
//! v[0] // RGroup 0 position
|
||||
//! v[1] // RGroup 1 position...
|
||||
//! }
|
||||
//! \endverbatim
|
||||
|
||||
class EnumerationStrategyBase {
|
||||
protected:
|
||||
EnumerationTypes::RGROUPS m_permutation; // where are we currently?
|
||||
EnumerationTypes::RGROUPS m_permutationSizes; // m_permutationSizes num bbs per group
|
||||
boost::uint64_t m_numPermutations; // total number of permutations for this group
|
||||
// -1 if > ssize_t::max
|
||||
public:
|
||||
static const boost::uint64_t EnumerationOverflow = static_cast<boost::uint64_t>(-1);
|
||||
EnumerationStrategyBase()
|
||||
: m_permutation(), m_permutationSizes(), m_numPermutations() {}
|
||||
|
||||
virtual ~EnumerationStrategyBase() {}
|
||||
|
||||
virtual const char *type() const { return "EnumerationStrategyBase"; }
|
||||
|
||||
//! Initialize the enumerator based on the reaction and the
|
||||
//! supplied building blocks
|
||||
//! This is the standard API point.
|
||||
void initialize(const ChemicalReaction &reaction,
|
||||
const EnumerationTypes::BBS &building_blocks) {
|
||||
// default initialization, may be overridden (sets the # reactants
|
||||
// and computes the default # of permutations)
|
||||
m_permutationSizes = getSizesFromBBs(building_blocks);
|
||||
m_permutation.resize(m_permutationSizes.size());
|
||||
|
||||
m_numPermutations = computeNumProducts(m_permutationSizes);
|
||||
std::fill(m_permutation.begin(), m_permutation.end(), 0);
|
||||
|
||||
initializeStrategy(reaction, building_blocks);
|
||||
}
|
||||
|
||||
// ! Initialize derived class
|
||||
// ! must exist, EnumerationStrategyBase structures are already initialized
|
||||
virtual void initializeStrategy(const ChemicalReaction &reaction,
|
||||
const EnumerationTypes::BBS &building_blocks) = 0;
|
||||
|
||||
//! returns true if there are more permutations left
|
||||
//! random enumerators may always return true...
|
||||
virtual operator bool() const = 0;
|
||||
|
||||
//! The current permutation {r1, r2, ...}
|
||||
virtual const EnumerationTypes::RGROUPS &next() = 0;
|
||||
|
||||
//! copy the enumeration strategy complete with current state
|
||||
virtual EnumerationStrategyBase *copy() const = 0;
|
||||
|
||||
//! The current position in the enumeration
|
||||
const EnumerationTypes::RGROUPS &getPosition() const { return m_permutation; }
|
||||
|
||||
//! a result of EnumerationOverflow indicates that the number of
|
||||
//! permutations is not computable with the current
|
||||
//! rdlonglong size.
|
||||
boost::uint64_t getNumPermutations() const { return m_numPermutations; }
|
||||
|
||||
//! Returns how many permutations have been processed by this strategy
|
||||
virtual boost::uint64_t getPermutationIdx() const = 0;
|
||||
|
||||
//! Skip the specified number of permutations (useful for
|
||||
//! resetting state to a known position)
|
||||
bool skip(boost::uint64_t skipCount) {
|
||||
for (boost::uint64_t i = 0; i < skipCount; ++i) next();
|
||||
return true;
|
||||
}
|
||||
|
||||
protected:
|
||||
//! Initialize the internal data structures
|
||||
//! i.e. RGROUPS = {10,40,50};
|
||||
void internalInitialize(const EnumerationTypes::RGROUPS &rgroups) {
|
||||
m_permutation.resize(rgroups.size());
|
||||
m_permutationSizes = rgroups;
|
||||
m_numPermutations = computeNumProducts(m_permutationSizes);
|
||||
std::fill(m_permutation.begin(), m_permutation.end(), 0);
|
||||
}
|
||||
|
||||
private:
|
||||
friend class boost::serialization::access;
|
||||
template <class Archive>
|
||||
void serialize(Archive &ar, const unsigned int /*version*/) {
|
||||
ar &m_permutation;
|
||||
ar &m_permutationSizes;
|
||||
ar &m_numPermutations;
|
||||
}
|
||||
};
|
||||
|
||||
BOOST_SERIALIZATION_ASSUME_ABSTRACT(EnumerationStrategyBase)
|
||||
}
|
||||
|
||||
BOOST_CLASS_VERSION(RDKit::EnumerationStrategyBase, 1)
|
||||
|
||||
#endif
|
||||
281
Code/GraphMol/ChemReactions/Enumerate/EvenSamplePairs.cpp
Normal file
281
Code/GraphMol/ChemReactions/Enumerate/EvenSamplePairs.cpp
Normal file
@@ -0,0 +1,281 @@
|
||||
//
|
||||
// Copyright (c) 2016, Novartis Institutes for BioMedical Research Inc.
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following
|
||||
// disclaimer in the documentation and/or other materials provided
|
||||
// with the distribution.
|
||||
// * Neither the name of Novartis Institutes for BioMedical Research Inc.
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
#include "EvenSamplePairs.h"
|
||||
#include <boost/format.hpp>
|
||||
#include <stdint.h>
|
||||
|
||||
|
||||
namespace RDKit {
|
||||
|
||||
using namespace EnumerationTypes;
|
||||
// Based on an implementation from a correspondance with Bernd Rohde.
|
||||
void EvenSamplePairsStrategy::initializeStrategy(const ChemicalReaction &,
|
||||
const BBS &bbs) {
|
||||
size_t npos = bbs.size();
|
||||
used_count.resize(npos);
|
||||
std::fill(used_count.begin(), used_count.end(), 0);
|
||||
|
||||
var_used.resize(npos);
|
||||
for (size_t i = 0; i < npos; ++i) {
|
||||
var_used[i].resize(m_permutationSizes[i]);
|
||||
std::fill(var_used[i].begin(), var_used[i].end(), 0);
|
||||
}
|
||||
|
||||
boost::uint64_t nmonomers = 0;
|
||||
for (size_t i = 0; i < bbs.size(); ++i) nmonomers += m_permutationSizes[i];
|
||||
|
||||
pair_used.resize(nmonomers);
|
||||
for (size_t i = 0; i < nmonomers; ++i) {
|
||||
pair_used[i].resize(nmonomers);
|
||||
std::fill(pair_used[i].begin(), pair_used[i].end(), 0);
|
||||
}
|
||||
|
||||
pair_counts.resize(npos);
|
||||
for (size_t i = 0; i < npos; i++) {
|
||||
pair_counts[i].resize(npos);
|
||||
std::fill(pair_counts[i].begin(), pair_counts[i].end(), 0);
|
||||
}
|
||||
|
||||
/* Initialize random number generator */
|
||||
/* Find modulus */
|
||||
PRECONDITION(m_numPermutations >= 0,
|
||||
"Number of permutations too large to Evenly sample");
|
||||
for (M = 1; M < rdcast<size_t>(m_numPermutations); M = 2 * M)
|
||||
;
|
||||
/* Set factor */
|
||||
a = 5;
|
||||
b = 7;
|
||||
|
||||
// control of random number and heuristics
|
||||
seed = 0;
|
||||
m_numPermutationsProcessed = 0;
|
||||
nslack = 0; // increase this to break evenness criteria
|
||||
rejected_period = 0;
|
||||
rejected_unique = 0;
|
||||
rejected_slack_condition = 0;
|
||||
rejected_bb_sampling_condition = 0;
|
||||
|
||||
selected.clear(); // clear the selected (unique) set
|
||||
}
|
||||
|
||||
// Try to add the given encoded seed position into
|
||||
// the current set of return groups. This checks to
|
||||
// see if the BBS are evenly sampled as pairs. If
|
||||
// they currently are not, reject the selection.
|
||||
// This is fairly suboptimal for large collections
|
||||
// of building blocks and may take a while to
|
||||
// terminate...
|
||||
bool EvenSamplePairsStrategy::try_add(size_t seed) {
|
||||
const RGROUPS &digits = decode(seed);
|
||||
const RGROUPS &rgroups = m_permutationSizes;
|
||||
size_t islack = 0;
|
||||
size_t num_rgroups = m_permutationSizes.size();
|
||||
|
||||
for (size_t i = 0; i < num_rgroups; ++i) {
|
||||
if (var_used[i][digits[i]]) islack += var_used[i][digits[i]];
|
||||
if (islack > nslack) {
|
||||
// add better heuristic here??
|
||||
rejected_slack_condition += 1;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
islack = 0;
|
||||
size_t ioffset = 0;
|
||||
// check that building block pairs get evenly sampled
|
||||
for (size_t i = 0; i < num_rgroups; ++i) {
|
||||
size_t joffset = 0;
|
||||
for (size_t j = 0; j < num_rgroups; ++j) {
|
||||
if (j == i) continue;
|
||||
size_t ii = digits[i] + ioffset;
|
||||
size_t jj = digits[j] + joffset;
|
||||
if (pair_used[ii][jj] > 0) {
|
||||
double numer = (double)pair_used[ii][jj];
|
||||
double denom = sqrt((double)(rgroups[i]) * (double)(rgroups[j]));
|
||||
islack = (int)(numer / denom);
|
||||
}
|
||||
joffset += rgroups[j];
|
||||
}
|
||||
ioffset += rgroups[i];
|
||||
}
|
||||
|
||||
if (islack > nslack) {
|
||||
rejected_bb_sampling_condition += 1;
|
||||
return false;
|
||||
}
|
||||
|
||||
// keep track of bb usage
|
||||
for (size_t i = 0; i < num_rgroups; ++i) {
|
||||
if (var_used[i][digits[i]] == 0) {
|
||||
used_count[i]++;
|
||||
}
|
||||
var_used[i][digits[i]] += 1;
|
||||
if (used_count[i] == rdcast<int64_t>(rgroups[i])) {
|
||||
// complete variable scan => initialize
|
||||
if (nslack > min_nslack && rgroups[i] > 1) // cleared slack on i
|
||||
nslack = min_nslack;
|
||||
|
||||
used_count[i] = 0;
|
||||
for (size_t j = 0; j < rgroups[i]; ++j) {
|
||||
var_used[i][j]--;
|
||||
assert(var_used[i][j] >= 0);
|
||||
if (var_used[i][j] > 0) used_count[i]++;
|
||||
}
|
||||
} // end scan
|
||||
}
|
||||
|
||||
// keep track of BB Pair usage
|
||||
ioffset = 0;
|
||||
for (size_t i = 0; i < num_rgroups; ioffset += rgroups[i], ++i) {
|
||||
size_t joffset = 0;
|
||||
for (size_t j = 0; j < num_rgroups; joffset += rgroups[j], ++j) {
|
||||
if (j == i) {
|
||||
continue;
|
||||
}
|
||||
size_t ii = digits[i] + ioffset;
|
||||
size_t jj = digits[j] + joffset;
|
||||
if (pair_used[ii][jj] == 0) {
|
||||
pair_counts[i][j]++;
|
||||
}
|
||||
pair_used[ii][jj]++;
|
||||
if (pair_counts[i][j] >= rgroups[i] * rgroups[j]) { // all pairs visited
|
||||
if (nslack > min_nslack && (rgroups[i] > 1 || rgroups[j] > 1)) {
|
||||
nslack = min_nslack;
|
||||
}
|
||||
pair_counts[i][j] = 0;
|
||||
for (size_t ii = 0; ii < rgroups[i]; ++ii) {
|
||||
for (size_t jj = 0; jj < rgroups[j]; ++jj) {
|
||||
pair_used[ioffset + ii][joffset + jj]--;
|
||||
if (pair_used[ioffset + ii][joffset + jj] > 0) {
|
||||
pair_counts[i][j]++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
selected.insert(seed);
|
||||
return true;
|
||||
}
|
||||
|
||||
const RGROUPS &EvenSamplePairsStrategy::next() {
|
||||
nslack = 0;
|
||||
while (m_numPermutationsProcessed < rdcast<size_t>(m_numPermutations)) {
|
||||
bool added = false;
|
||||
for (size_t l = 0; l < M; ++l) {
|
||||
seed = ((seed * a + b) % M);
|
||||
if (seed > rdcast<size_t>(m_numPermutations)) {
|
||||
rejected_period += 1;
|
||||
continue;
|
||||
} else if (selected.find(seed) != selected.end()) {
|
||||
rejected_unique += 1;
|
||||
continue;
|
||||
} else if (try_add(seed)) {
|
||||
m_numPermutationsProcessed++;
|
||||
added = true;
|
||||
return decode(seed);
|
||||
}
|
||||
}
|
||||
|
||||
if (!added) {
|
||||
// loosen heuristic
|
||||
nslack += 1;
|
||||
min_nslack += 1;
|
||||
}
|
||||
}
|
||||
|
||||
throw EnumerationStrategyException("Ran out of molecules");
|
||||
}
|
||||
|
||||
std::string EvenSamplePairsStrategy::stats() const {
|
||||
std::ostringstream ss;
|
||||
|
||||
size_t npos = m_permutationSizes.size();
|
||||
const RGROUPS &nvars = m_permutationSizes;
|
||||
size_t i, l, j, ii, jj, ioffset, joffset;
|
||||
ss << "#BEGIN# BBSTAT\n";
|
||||
for (i = 0; i < npos; i++) {
|
||||
size_t maxcount = 0;
|
||||
if (nvars[i] == 1) continue;
|
||||
for (j = 0; j < nvars[i]; j++)
|
||||
if (maxcount < var_used[i][j]) maxcount = var_used[i][j];
|
||||
|
||||
ss << boost::format("%lu\t%lu\t%6.2f") % (i + 1) % nvars[i] %
|
||||
((double)m_numPermutationsProcessed / nvars[i]);
|
||||
|
||||
for (l = 0; l <= maxcount; l++) {
|
||||
size_t n = 0;
|
||||
for (j = 0; j < nvars[i]; j++)
|
||||
if (var_used[i][j] == l) n++;
|
||||
if (n > 0) ss << boost::format("\t%lu|%lu") % l % n;
|
||||
}
|
||||
ss << std::endl;
|
||||
}
|
||||
ss << "#END# BBSTAT\n";
|
||||
|
||||
ss << "#BEGIN# PAIRSTAT\n";
|
||||
for (i = 0, ioffset = 0; i < npos; ioffset += nvars[i], i++) {
|
||||
if (nvars[i] == 1) continue;
|
||||
for (j = 0, joffset = 0; j < npos; joffset += nvars[j], j++) {
|
||||
size_t maxcount = 0;
|
||||
if (nvars[j] == 1) continue;
|
||||
if (j <= i) continue;
|
||||
for (ii = 0; ii < nvars[i]; ii++)
|
||||
for (jj = 0; jj < nvars[j]; jj++)
|
||||
if (maxcount < pair_used[ii + ioffset][jj + joffset])
|
||||
maxcount = pair_used[ii + ioffset][jj + joffset];
|
||||
ss << boost::format("%lu\t%lu\t%lu\t%lu\t%6.2f") % (i + 1) %
|
||||
(j + 1) % nvars[i] % nvars[j] %
|
||||
((double)m_numPermutationsProcessed /
|
||||
(nvars[i] * nvars[j]));
|
||||
for (l = 0; l <= maxcount; l++) {
|
||||
int n = 0;
|
||||
for (ii = 0; ii < nvars[i]; ii++)
|
||||
for (jj = 0; jj < nvars[j]; jj++)
|
||||
if (l == pair_used[ii + ioffset][jj + joffset]) n++;
|
||||
if (n > 0) ss << boost::format("\t%ld|%d") % l % n;
|
||||
}
|
||||
ss << boost::format("\n");
|
||||
}
|
||||
}
|
||||
ss << "#END# PAIRSTAT\n";
|
||||
|
||||
ss << "Rejected Period: " << rejected_period << std::endl;
|
||||
ss << "Rejected (dupes): " << rejected_unique << std::endl;
|
||||
ss << "Rejected Slack Conditions: " << rejected_slack_condition
|
||||
<< std::endl;
|
||||
ss << "Rejected Pair Sampling: " << rejected_bb_sampling_condition
|
||||
<< std::endl;
|
||||
return ss.str();
|
||||
}
|
||||
}
|
||||
193
Code/GraphMol/ChemReactions/Enumerate/EvenSamplePairs.h
Normal file
193
Code/GraphMol/ChemReactions/Enumerate/EvenSamplePairs.h
Normal file
@@ -0,0 +1,193 @@
|
||||
//
|
||||
// Copyright (c) 2016, Novartis Institutes for BioMedical Research Inc.
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following
|
||||
// disclaimer in the documentation and/or other materials provided
|
||||
// with the distribution.
|
||||
// * Neither the name of Novartis Institutes for BioMedical Research Inc.
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
|
||||
#ifndef RGROUP_EVEN_SAMPLE_H
|
||||
#define RGROUP_EVEN_SAMPLE_H
|
||||
|
||||
#include "EnumerationStrategyBase.h"
|
||||
#ifdef RDK_USE_BOOST_SERIALIZATION
|
||||
#include <boost/serialization/set.hpp>
|
||||
#endif
|
||||
#include <stdint.h>
|
||||
|
||||
namespace RDKit {
|
||||
//! EvenSamplePairsStrategy
|
||||
/*! Randomly sample Pairs evenly from a collection of building blocks
|
||||
This is a good strategy for choosing a relatively small selection
|
||||
of building blocks from a larger set. As the amount of work needed
|
||||
to retrieve the next evenly sample building block grows with the
|
||||
number of samples, this method performs progressively worse as the
|
||||
number of samples gets larger.
|
||||
|
||||
See EnumeartionStrategyBase for more details.
|
||||
*/
|
||||
|
||||
class EvenSamplePairsStrategy : public EnumerationStrategyBase {
|
||||
boost::uint64_t m_numPermutationsProcessed;
|
||||
|
||||
std::vector<int64_t> used_count;
|
||||
std::vector<std::vector<size_t> > var_used;
|
||||
std::vector<std::vector<size_t> > pair_used;
|
||||
std::vector<std::vector<size_t> > pair_counts;
|
||||
std::set<size_t> selected;
|
||||
|
||||
size_t seed; // last seed for permutation (starts at 0)
|
||||
size_t M, a, b; // random number stuff
|
||||
size_t nslack, min_nslack;
|
||||
size_t rejected_period, rejected_unique;
|
||||
size_t rejected_slack_condition, rejected_bb_sampling_condition;
|
||||
|
||||
public:
|
||||
EvenSamplePairsStrategy()
|
||||
: EnumerationStrategyBase(),
|
||||
m_numPermutationsProcessed(),
|
||||
used_count(),
|
||||
var_used(),
|
||||
pair_used(),
|
||||
pair_counts(),
|
||||
selected(),
|
||||
seed(),
|
||||
M(),
|
||||
a(),
|
||||
b(),
|
||||
nslack(),
|
||||
min_nslack(),
|
||||
rejected_period(),
|
||||
rejected_unique(),
|
||||
rejected_slack_condition(),
|
||||
rejected_bb_sampling_condition() {}
|
||||
|
||||
EvenSamplePairsStrategy(const EvenSamplePairsStrategy &rhs)
|
||||
: EnumerationStrategyBase(rhs),
|
||||
m_numPermutationsProcessed(rhs.m_numPermutationsProcessed),
|
||||
used_count(rhs.used_count),
|
||||
var_used(rhs.var_used),
|
||||
pair_used(rhs.pair_used),
|
||||
pair_counts(rhs.pair_counts),
|
||||
selected(rhs.selected),
|
||||
seed(rhs.seed),
|
||||
M(rhs.M),
|
||||
a(rhs.a),
|
||||
b(rhs.b),
|
||||
nslack(rhs.nslack),
|
||||
min_nslack(rhs.min_nslack),
|
||||
rejected_period(rhs.rejected_period),
|
||||
rejected_unique(rhs.rejected_unique),
|
||||
rejected_slack_condition(rhs.rejected_slack_condition),
|
||||
rejected_bb_sampling_condition(rhs.rejected_bb_sampling_condition) {}
|
||||
|
||||
virtual const char *type() const { return "EvenSamplePairsStrategy"; }
|
||||
|
||||
//! This is a class for enumerating RGroups using Cartesian Products of
|
||||
//! reagents.
|
||||
/*!
|
||||
basic usage:
|
||||
|
||||
\verbatim
|
||||
std::vector<MOL_SPTR_VECT> bbs;
|
||||
bbs.push_back( bbs_for_reactants_1 );
|
||||
bbs.push_back( bbs_for_reactants_2 );
|
||||
|
||||
EvenSamplePairsStrategy rgroups;
|
||||
rgroups.initialize(rxn, bbs);
|
||||
for(size_t i=0; i<num_samples && rgroups; ++i) {
|
||||
MOL_SPTR_VECT rvect = getReactantsFromRGroups(bbs, rgroups.next());
|
||||
std::vector<MOL_SPTR_VECT> lprops = rxn.RunReactants(rvect);
|
||||
...
|
||||
}
|
||||
\endverbatim
|
||||
*/
|
||||
using EnumerationStrategyBase::initialize;
|
||||
|
||||
virtual void initializeStrategy(const ChemicalReaction &, const EnumerationTypes::BBS &);
|
||||
|
||||
//! The current permutation {r1, r2, ...}
|
||||
virtual const EnumerationTypes::RGROUPS &next();
|
||||
|
||||
virtual boost::uint64_t getPermutationIdx() const {
|
||||
return m_numPermutationsProcessed; }
|
||||
|
||||
virtual operator bool() const { return true; }
|
||||
|
||||
EnumerationStrategyBase *copy() const {
|
||||
return new EvenSamplePairsStrategy(*this);
|
||||
}
|
||||
|
||||
std::string stats() const;
|
||||
|
||||
private:
|
||||
friend class boost::serialization::access;
|
||||
|
||||
// decode a packed integer into an RGroup selection
|
||||
const EnumerationTypes::RGROUPS &decode(size_t seed) {
|
||||
for (int64_t j = m_permutationSizes.size() - 1; j >= 0; j--) {
|
||||
m_permutation[j] = seed % m_permutationSizes[j];
|
||||
seed /= m_permutationSizes[j];
|
||||
}
|
||||
return m_permutation;
|
||||
}
|
||||
|
||||
bool try_add(size_t seed);
|
||||
|
||||
public:
|
||||
#ifdef RDK_USE_BOOST_SERIALIZATION
|
||||
template <class Archive>
|
||||
void serialize(Archive &ar, const unsigned int /*version*/) {
|
||||
// invoke serialization of the base class
|
||||
ar &boost::serialization::base_object<EnumerationStrategyBase>(*this);
|
||||
ar &m_numPermutationsProcessed;
|
||||
ar &used_count;
|
||||
ar &var_used;
|
||||
ar &pair_used;
|
||||
ar &pair_counts;
|
||||
ar &selected;
|
||||
|
||||
ar &seed;
|
||||
|
||||
ar &M;
|
||||
ar &a;
|
||||
ar &b;
|
||||
|
||||
ar &nslack;
|
||||
ar &min_nslack;
|
||||
ar &rejected_period;
|
||||
ar &rejected_unique;
|
||||
ar &rejected_slack_condition;
|
||||
ar &rejected_bb_sampling_condition;
|
||||
}
|
||||
#endif
|
||||
};
|
||||
}
|
||||
|
||||
BOOST_CLASS_VERSION(RDKit::EvenSamplePairsStrategy, 1)
|
||||
|
||||
#endif
|
||||
162
Code/GraphMol/ChemReactions/Enumerate/RandomSample.h
Normal file
162
Code/GraphMol/ChemReactions/Enumerate/RandomSample.h
Normal file
@@ -0,0 +1,162 @@
|
||||
//
|
||||
// Copyright (c) 2015, Novartis Institutes for BioMedical Research Inc.
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following
|
||||
// disclaimer in the documentation and/or other materials provided
|
||||
// with the distribution.
|
||||
// * Neither the name of Novartis Institutes for BioMedical Research Inc.
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
|
||||
#ifndef RGROUP_RANDOM_SAMPLE_H
|
||||
#define RGROUP_RANDOM_SAMPLE_H
|
||||
|
||||
#include "EnumerationStrategyBase.h"
|
||||
#include <boost/random.hpp>
|
||||
#include <boost/random/uniform_int_distribution.hpp>
|
||||
#include <sstream>
|
||||
|
||||
namespace RDKit {
|
||||
|
||||
//! This is a class for fully randomly sampling reagents.
|
||||
// Note that this enumerator never halts.
|
||||
/*!
|
||||
basic usage:
|
||||
|
||||
\verbatim
|
||||
std::vector<MOL_SPTR_VECT> bbs;
|
||||
bbs.push_back( bbs_for_reactants_1 );
|
||||
bbs.push_back( bbs_for_reactants_2 );
|
||||
|
||||
RandomSampleStrategy rgroups;
|
||||
rgroups.initialize(rxn, bbs);
|
||||
for(size_t i=0; i<num_samples && rgroups; ++i) {
|
||||
MOL_SPTR_VECT rvect = getReactantsFromRGroups(bbs, rgroups.next());
|
||||
std::vector<MOL_SPTR_VECT> lprops = rxn.RunReactants(rvect);
|
||||
...
|
||||
}
|
||||
\endverbatim
|
||||
|
||||
See EnumerationStrategyBase for more details and usage.
|
||||
*/
|
||||
class RandomSampleStrategy : public EnumerationStrategyBase {
|
||||
boost::uint64_t m_numPermutationsProcessed;
|
||||
boost::minstd_rand m_rng;
|
||||
std::vector<boost::random::uniform_int_distribution<> > m_distributions;
|
||||
|
||||
public:
|
||||
RandomSampleStrategy()
|
||||
: EnumerationStrategyBase(),
|
||||
m_numPermutationsProcessed(),
|
||||
m_rng(),
|
||||
m_distributions() {
|
||||
for (size_t i = 0; i < m_permutation.size(); ++i) {
|
||||
m_distributions.push_back(
|
||||
boost::random::uniform_int_distribution<>(0, m_permutation[i] - 1));
|
||||
}
|
||||
}
|
||||
|
||||
using EnumerationStrategyBase::initialize;
|
||||
|
||||
virtual void initializeStrategy(const ChemicalReaction &, const EnumerationTypes::BBS &) {
|
||||
m_distributions.clear();
|
||||
for (size_t i = 0; i < m_permutationSizes.size(); ++i) {
|
||||
m_distributions.push_back(boost::random::uniform_int_distribution<>(
|
||||
0, m_permutationSizes[i] - 1));
|
||||
}
|
||||
|
||||
m_numPermutationsProcessed = 0;
|
||||
}
|
||||
|
||||
virtual const char *type() const { return "RandomSampleStrategy"; }
|
||||
|
||||
//! The current permutation {r1, r2, ...}
|
||||
virtual const EnumerationTypes::RGROUPS &next() {
|
||||
for (size_t i = 0; i < m_permutation.size(); ++i) {
|
||||
m_permutation[i] = m_distributions[i](m_rng);
|
||||
}
|
||||
|
||||
++m_numPermutationsProcessed;
|
||||
|
||||
return m_permutation;
|
||||
}
|
||||
|
||||
virtual boost::uint64_t getPermutationIdx() const {
|
||||
return m_numPermutationsProcessed; }
|
||||
|
||||
virtual operator bool() const { return true; }
|
||||
|
||||
EnumerationStrategyBase *copy() const {
|
||||
return new RandomSampleStrategy(*this);
|
||||
}
|
||||
|
||||
private:
|
||||
#ifdef RDK_USE_BOOST_SERIALIZATION
|
||||
friend class boost::serialization::access;
|
||||
|
||||
template <class Archive>
|
||||
void save(Archive &ar, const unsigned int /*version*/) const {
|
||||
// invoke serialization of the base class
|
||||
ar << boost::serialization::base_object<const EnumerationStrategyBase>(
|
||||
*this);
|
||||
ar << m_numPermutationsProcessed;
|
||||
|
||||
std::stringstream random;
|
||||
random << m_rng;
|
||||
std::string s = random.str();
|
||||
ar << s;
|
||||
}
|
||||
|
||||
template <class Archive>
|
||||
void load(Archive &ar, const unsigned int /*version*/) {
|
||||
// invoke serialization of the base class
|
||||
ar >> boost::serialization::base_object<EnumerationStrategyBase>(*this);
|
||||
ar >> m_numPermutationsProcessed;
|
||||
std::string s;
|
||||
ar >> s;
|
||||
std::stringstream random(s);
|
||||
random >> m_rng;
|
||||
|
||||
// reset the uniform distributions
|
||||
m_distributions.clear();
|
||||
for (size_t i = 0; i < m_permutationSizes.size(); ++i) {
|
||||
m_distributions.push_back(boost::random::uniform_int_distribution<>(
|
||||
0, m_permutationSizes[i] - 1));
|
||||
}
|
||||
}
|
||||
|
||||
template <class Archive>
|
||||
void serialize(Archive &ar, const unsigned int file_version) {
|
||||
boost::serialization::split_member(ar, *this, file_version);
|
||||
}
|
||||
#endif
|
||||
};
|
||||
}
|
||||
|
||||
#ifdef RDK_USE_BOOST_SERIALIZATION
|
||||
BOOST_CLASS_VERSION(RDKit::RandomSampleStrategy, 1)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
186
Code/GraphMol/ChemReactions/Enumerate/RandomSampleAllBBs.h
Normal file
186
Code/GraphMol/ChemReactions/Enumerate/RandomSampleAllBBs.h
Normal file
@@ -0,0 +1,186 @@
|
||||
//
|
||||
// Copyright (c) 2015, Novartis Institutes for BioMedical Research Inc.
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following
|
||||
// disclaimer in the documentation and/or other materials provided
|
||||
// with the distribution.
|
||||
// * Neither the name of Novartis Institutes for BioMedical Research Inc.
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
|
||||
#ifndef RGROUP_RANDOM_SAMPLE_ALLBBS_H
|
||||
#define RGROUP_RANDOM_SAMPLE_ALLBBS_H
|
||||
|
||||
#include "EnumerationStrategyBase.h"
|
||||
#include <boost/random.hpp>
|
||||
#include <boost/random/uniform_int_distribution.hpp>
|
||||
#include <sstream>
|
||||
|
||||
namespace RDKit {
|
||||
//! RandomSampleAllBBsStrategy
|
||||
//! Randomly sample rgroup indices
|
||||
|
||||
//! This is a class for randomly enumerating reagents that ensures all reagents
|
||||
// are sampled.
|
||||
/*!
|
||||
basic usage:
|
||||
|
||||
\verbatim
|
||||
std::vector<MOL_SPTR_VECT> bbs;
|
||||
bbs.push_back( bbs_for_reactants_1 );
|
||||
bbs.push_back( bbs_for_reactants_2 );
|
||||
|
||||
RandomSampleAllBBsStrategy rgroups;
|
||||
rgroups.initialize(rxn, bbs);
|
||||
for(size_t i=0; i<num_samples && rgroups; ++i) {
|
||||
MOL_SPTR_VECT rvect = getReactantsFromRGroups(bbs, rgroups.next());
|
||||
std::vector<MOL_SPTR_VECT> lprops = rxn.RunReactants(rvect);
|
||||
...
|
||||
}
|
||||
\endverbatim
|
||||
|
||||
See EnumerationStrategyBase for more details and usage.
|
||||
*/
|
||||
|
||||
class RandomSampleAllBBsStrategy : public EnumerationStrategyBase {
|
||||
boost::uint64_t m_numPermutationsProcessed;
|
||||
size_t m_offset;
|
||||
size_t m_maxoffset;
|
||||
|
||||
boost::minstd_rand m_rng;
|
||||
std::vector<boost::random::uniform_int_distribution<> > m_distributions;
|
||||
|
||||
public:
|
||||
RandomSampleAllBBsStrategy()
|
||||
: EnumerationStrategyBase(),
|
||||
m_numPermutationsProcessed(0),
|
||||
m_offset(0),
|
||||
m_maxoffset(0),
|
||||
m_rng(),
|
||||
m_distributions() {
|
||||
for (size_t i = 0; i < m_permutation.size(); ++i) {
|
||||
m_distributions.push_back(
|
||||
boost::random::uniform_int_distribution<>(0, m_permutation[i] - 1));
|
||||
}
|
||||
}
|
||||
using EnumerationStrategyBase::initialize;
|
||||
|
||||
void initializeStrategy(const ChemicalReaction &, const EnumerationTypes::BBS &) {
|
||||
m_distributions.clear();
|
||||
m_permutation.resize(m_permutationSizes.size());
|
||||
m_permutationSizes = m_permutationSizes;
|
||||
m_offset = 0;
|
||||
m_maxoffset =
|
||||
*std::max_element(m_permutationSizes.begin(), m_permutationSizes.end());
|
||||
for (size_t i = 0; i < m_permutationSizes.size(); ++i) {
|
||||
m_distributions.push_back(boost::random::uniform_int_distribution<>(
|
||||
0, m_permutationSizes[i] - 1));
|
||||
}
|
||||
|
||||
m_numPermutationsProcessed = 0;
|
||||
}
|
||||
|
||||
virtual const char *type() const { return "RandomSampleAllBBsStrategy"; }
|
||||
|
||||
//! The current permutation {r1, r2, ...}
|
||||
virtual const EnumerationTypes::RGROUPS &next() {
|
||||
if (m_offset >= m_maxoffset) {
|
||||
for (size_t i = 0; i < m_permutation.size(); ++i) {
|
||||
m_permutation[i] = m_distributions[i](m_rng);
|
||||
}
|
||||
m_offset = 0;
|
||||
} else {
|
||||
for (size_t i = 0; i < m_permutation.size(); ++i) {
|
||||
m_permutation[i] = (m_permutation[i] + 1) % m_permutationSizes[i];
|
||||
}
|
||||
++m_offset;
|
||||
}
|
||||
++m_numPermutationsProcessed;
|
||||
|
||||
return m_permutation;
|
||||
}
|
||||
|
||||
virtual boost::uint64_t getPermutationIdx() const {
|
||||
return m_numPermutationsProcessed; }
|
||||
|
||||
virtual operator bool() const { return true; }
|
||||
|
||||
EnumerationStrategyBase *copy() const {
|
||||
return new RandomSampleAllBBsStrategy(*this);
|
||||
}
|
||||
|
||||
private:
|
||||
#ifdef RDK_USE_BOOST_SERIALIZATION
|
||||
friend class boost::serialization::access;
|
||||
|
||||
template <class Archive>
|
||||
void save(Archive &ar, const unsigned int /*version*/) const {
|
||||
// invoke serialization of the base class
|
||||
ar << boost::serialization::base_object<const EnumerationStrategyBase>(
|
||||
*this);
|
||||
ar << m_numPermutationsProcessed;
|
||||
|
||||
std::stringstream random;
|
||||
random << m_rng;
|
||||
std::string s = random.str();
|
||||
ar << s;
|
||||
|
||||
ar << m_offset;
|
||||
ar << m_maxoffset;
|
||||
}
|
||||
|
||||
template <class Archive>
|
||||
void load(Archive &ar, const unsigned int /*version*/) {
|
||||
// invoke serialization of the base class
|
||||
ar >> boost::serialization::base_object<EnumerationStrategyBase>(*this);
|
||||
ar >> m_numPermutationsProcessed;
|
||||
std::string s;
|
||||
ar >> s;
|
||||
std::stringstream random(s);
|
||||
random >> m_rng;
|
||||
ar >> m_offset;
|
||||
ar >> m_maxoffset;
|
||||
|
||||
// reset the uniform distributions
|
||||
m_distributions.clear();
|
||||
for (size_t i = 0; i < m_permutationSizes.size(); ++i) {
|
||||
m_distributions.push_back(boost::random::uniform_int_distribution<>(
|
||||
0, m_permutationSizes[i] - 1));
|
||||
}
|
||||
}
|
||||
|
||||
template <class Archive>
|
||||
void serialize(Archive &ar, const unsigned int file_version) {
|
||||
boost::serialization::split_member(ar, *this, file_version);
|
||||
}
|
||||
#endif
|
||||
};
|
||||
}
|
||||
|
||||
#ifdef RDK_USE_BOOST_SERIALIZATION
|
||||
BOOST_CLASS_VERSION(RDKit::RandomSampleAllBBsStrategy, 1)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
299
Code/GraphMol/ChemReactions/Enumerate/testEnumerate.cpp
Normal file
299
Code/GraphMol/ChemReactions/Enumerate/testEnumerate.cpp
Normal file
@@ -0,0 +1,299 @@
|
||||
//
|
||||
// Copyright (c) 2015, Novartis Institutes for BioMedical Research Inc.
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following
|
||||
// disclaimer in the documentation and/or other materials provided
|
||||
// with the distribution.
|
||||
// * Neither the name of Novartis Institutes for BioMedical Research Inc.
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
|
||||
#include <RDGeneral/utils.h>
|
||||
#include <GraphMol/RDKitBase.h>
|
||||
#include <GraphMol/RDKitQueries.h>
|
||||
#include <GraphMol/SmilesParse/SmilesParse.h>
|
||||
#include <GraphMol/FileParsers/MolSupplier.h>
|
||||
|
||||
#include <GraphMol/ChemReactions/Enumerate/CartesianProduct.h>
|
||||
#include <GraphMol/ChemReactions/Enumerate/EvenSamplePairs.h>
|
||||
#include <GraphMol/ChemReactions/Enumerate/RandomSample.h>
|
||||
#include <GraphMol/ChemReactions/Enumerate/RandomSampleAllBBs.h>
|
||||
#include <GraphMol/ChemReactions/Enumerate/Enumerate.h>
|
||||
|
||||
#include <GraphMol/ChemReactions/ReactionParser.h>
|
||||
#include <GraphMol/ChemReactions/ReactionUtils.h>
|
||||
#include <GraphMol/ChemReactions/SanitizeRxn.h>
|
||||
|
||||
#ifdef RDK_USE_BOOST_SERIALIZATION
|
||||
#include <RDGeneral/BoostStartInclude.h>
|
||||
#include <boost/archive/text_oarchive.hpp>
|
||||
#include <boost/archive/text_iarchive.hpp>
|
||||
#include <RDGeneral/BoostEndInclude.h>
|
||||
#endif
|
||||
|
||||
using namespace RDKit;
|
||||
|
||||
#ifdef RDK_USE_BOOST_SERIALIZATION
|
||||
// for each starting point check to see that the archive
|
||||
// starts at the same point
|
||||
void pickleTest(EnumerationStrategyBase &en, size_t len) {
|
||||
boost::shared_ptr<EnumerationStrategyBase> base(en.copy());
|
||||
TEST_ASSERT(std::string(base->type()) == std::string(en.type()));
|
||||
|
||||
for (size_t i = 0; i < len; ++i) {
|
||||
std::stringstream ss;
|
||||
{
|
||||
boost::archive::text_oarchive ar(ss);
|
||||
ar &base;
|
||||
}
|
||||
boost::shared_ptr<EnumerationStrategyBase> copy;
|
||||
{
|
||||
boost::archive::text_iarchive ar(ss);
|
||||
ar ©
|
||||
}
|
||||
TEST_ASSERT(std::string(base->type()) == std::string(copy->type()));
|
||||
TEST_ASSERT(base->next() == copy->next());
|
||||
TEST_ASSERT(base->getPosition() == en.next());
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void testSamplers() {
|
||||
EnumerationTypes::BBS bbs;
|
||||
bbs.resize(3);
|
||||
for (int i = 0; i < 10; ++i)
|
||||
bbs[0].push_back(boost::shared_ptr<ROMol>(SmilesToMol("C=CCN=C=S")));
|
||||
|
||||
for (int i = 0; i < 5; ++i)
|
||||
bbs[1].push_back(boost::shared_ptr<ROMol>(SmilesToMol("NCc1ncc(Cl)cc1Br")));
|
||||
|
||||
for (int i = 0; i < 6; ++i)
|
||||
bbs[2].push_back(
|
||||
boost::shared_ptr<ROMol>(SmilesToMol("NCCCc1ncc(Cl)cc1Br")));
|
||||
|
||||
ChemicalReaction rxn;
|
||||
CartesianProductStrategy cart;
|
||||
cart.initialize(rxn, bbs);
|
||||
RandomSampleStrategy rand;
|
||||
rand.initialize(rxn, bbs);
|
||||
RandomSampleAllBBsStrategy randBBs;
|
||||
randBBs.initialize(rxn, bbs);
|
||||
EvenSamplePairsStrategy even;
|
||||
even.initialize(rxn, bbs);
|
||||
std::vector<boost::shared_ptr<EnumerationStrategyBase> > enumerators;
|
||||
enumerators.push_back(
|
||||
boost::shared_ptr<EnumerationStrategyBase>(cart.copy()));
|
||||
enumerators.push_back(
|
||||
boost::shared_ptr<EnumerationStrategyBase>(rand.copy()));
|
||||
enumerators.push_back(
|
||||
boost::shared_ptr<EnumerationStrategyBase>(randBBs.copy()));
|
||||
enumerators.push_back(
|
||||
boost::shared_ptr<EnumerationStrategyBase>(even.copy()));
|
||||
#ifdef RDK_USE_BOOST_SERIALIZATION
|
||||
for (size_t i = 0; i < enumerators.size(); ++i) {
|
||||
TEST_ASSERT(enumerators[i]->getNumPermutations() == 10 * 5 * 6);
|
||||
pickleTest(*enumerators[i], 10 * 5 * 6);
|
||||
}
|
||||
#endif
|
||||
// for(auto&& i: enumerators) {
|
||||
// TEST_ASSERT(i->getNumPermutations() == 10*5*6);
|
||||
//}
|
||||
}
|
||||
|
||||
void testEvenSamplers() {
|
||||
EnumerationTypes::BBS bbs;
|
||||
bbs.resize(3);
|
||||
unsigned long R1 = 6000;
|
||||
unsigned long R2 = 500;
|
||||
unsigned long R3 = 10000;
|
||||
for (unsigned long i = 0; i < R1; ++i)
|
||||
bbs[0].push_back(boost::shared_ptr<ROMol>(SmilesToMol("C=CCN=C=S")));
|
||||
|
||||
for (unsigned long i = 0; i < R2; ++i)
|
||||
bbs[1].push_back(boost::shared_ptr<ROMol>(SmilesToMol("NCc1ncc(Cl)cc1Br")));
|
||||
|
||||
for (unsigned long i = 0; i < R3; ++i)
|
||||
bbs[2].push_back(
|
||||
boost::shared_ptr<ROMol>(SmilesToMol("NCCCc1ncc(Cl)cc1Br")));
|
||||
|
||||
ChemicalReaction rxn;
|
||||
EvenSamplePairsStrategy even;
|
||||
even.initialize(rxn, bbs);
|
||||
std::cout << even.getNumPermutations() << " " << R1 * R2 * R3 << std::endl;
|
||||
TEST_ASSERT(even.getNumPermutations() == R1 * R2 * R3);
|
||||
|
||||
for (size_t i = 0; i < 5000; ++i) {
|
||||
even.next();
|
||||
}
|
||||
even.stats();
|
||||
}
|
||||
|
||||
const char *smiresults[] = {
|
||||
"C=CCNC(=S)NCc1ncc(Cl)cc1Br", "CC=CCNC(=S)NCc1ncc(Cl)cc1Br",
|
||||
"C=CCNC(=S)NCCc1ncc(Cl)cc1Br", "CC=CCNC(=S)NCCc1ncc(Cl)cc1Br",
|
||||
"C=CCNC(=S)NCCCc1ncc(Cl)cc1Br", "CC=CCNC(=S)NCCCc1ncc(Cl)cc1Br"};
|
||||
|
||||
void testEnumerations() {
|
||||
EnumerationTypes::BBS bbs;
|
||||
bbs.resize(2);
|
||||
|
||||
bbs[0].push_back(boost::shared_ptr<ROMol>(SmilesToMol("C=CCN=C=S")));
|
||||
bbs[0].push_back(boost::shared_ptr<ROMol>(SmilesToMol("CC=CCN=C=S")));
|
||||
|
||||
bbs[1].push_back(boost::shared_ptr<ROMol>(SmilesToMol("NCc1ncc(Cl)cc1Br")));
|
||||
bbs[1].push_back(boost::shared_ptr<ROMol>(SmilesToMol("NCCc1ncc(Cl)cc1Br")));
|
||||
bbs[1].push_back(boost::shared_ptr<ROMol>(SmilesToMol("NCCCc1ncc(Cl)cc1Br")));
|
||||
|
||||
ChemicalReaction *rxn = RxnSmartsToChemicalReaction(
|
||||
"[N;$(N-[#6]):3]=[C;$(C=S):1].[N;$(N[#6]);!$(N=*);!$([N-]);!$(N#*);"
|
||||
"!$([ND3]);!$([ND4]);!$(N[O,N]);!$(N[C,S]=[S,O,N]):2]>>[N:3]-[C:1]-[N+0:"
|
||||
"2]");
|
||||
|
||||
{
|
||||
EnumerateLibrary en(*rxn, bbs);
|
||||
size_t i = 0;
|
||||
for (; (bool)en; ++i) {
|
||||
std::vector<std::vector<std::string> > res = en.nextSmiles();
|
||||
TEST_ASSERT(res.size() == 1);
|
||||
TEST_ASSERT(res[0].size() == 1);
|
||||
TEST_ASSERT(res[0][0] == smiresults[i]);
|
||||
TEST_ASSERT(i<=6);
|
||||
}
|
||||
TEST_ASSERT(i == 6);
|
||||
// tests reset
|
||||
en.resetState();
|
||||
i = 0;
|
||||
for (; (bool)en; ++i) {
|
||||
std::vector<std::vector<std::string> > res = en.nextSmiles();
|
||||
TEST_ASSERT(res.size() == 1);
|
||||
TEST_ASSERT(res[0].size() == 1);
|
||||
TEST_ASSERT(res[0][0] == smiresults[i]);
|
||||
TEST_ASSERT(i<=6);
|
||||
}
|
||||
TEST_ASSERT(i == 6);
|
||||
|
||||
}
|
||||
|
||||
#ifdef RDK_USE_BOOST_SERIALIZATION
|
||||
{
|
||||
|
||||
boost::shared_ptr<EnumerateLibrary> en(
|
||||
new EnumerateLibrary(*rxn, bbs, RandomSampleStrategy()));
|
||||
|
||||
std::vector<std::vector<std::vector<std::string> > >smir;
|
||||
for (size_t j = 0; j < 10; ++j) {
|
||||
std::vector<std::vector<std::string> > smiles = en->nextSmiles();
|
||||
smir.push_back(smiles);
|
||||
}
|
||||
|
||||
en->resetState();
|
||||
|
||||
for (size_t i = 0; i < 1000; ++i) {
|
||||
// pickle and unpickle
|
||||
std::stringstream ss;
|
||||
{
|
||||
boost::archive::text_oarchive ar(ss);
|
||||
ar &en;
|
||||
}
|
||||
boost::shared_ptr<EnumerateLibrary> copy;
|
||||
{
|
||||
boost::archive::text_iarchive ar(ss);
|
||||
ar ©
|
||||
}
|
||||
|
||||
for (size_t j = 0; j < 10; ++j) {
|
||||
TEST_ASSERT(en->nextSmiles() == copy->nextSmiles());
|
||||
}
|
||||
|
||||
copy->resetState();
|
||||
for (size_t j = 0; j < 10; ++j) {
|
||||
TEST_ASSERT(smir[j] == copy->nextSmiles());
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
delete rxn;
|
||||
}
|
||||
|
||||
const char *rxndata = "$RXN\nUntitled Document-1\n ChemDraw10291618492D\n\n 3 1\n$MOL\n\n\n\n 2 1 0 0 0 0 0 0 0 0999 V2000\n 0.4125 0.0000 0.0000 N 0 0 0 0 0 0 0 0 0 3 0 0\n -0.4125 0.0000 0.0000 R2 0 0 0 0 0 0 0 0 0 2 0 0\n 1 2 1 0 0\nM END\n$MOL\n\n\n\n 2 1 0 0 0 0 0 0 0 0999 V2000\n -0.4125 0.0000 0.0000 R1 0 0 0 0 0 0 0 0 0 1 0 0\n 0.4125 0.0000 0.0000 Cl 0 0 0 0 0 0 0 0 0 0 0 0\n 1 2 1 0 0\nM END\n$MOL\n\n\n\n 2 1 0 0 0 0 0 0 0 0999 V2000\n 0.4125 0.0000 0.0000 N 0 0 0 0 0 0 0 0 0 5 0 0\n -0.4125 0.0000 0.0000 R4 0 0 0 0 0 0 0 0 0 4 0 0\n 1 2 1 0 0\nM END\n$MOL\n\n\n\n 14 15 0 0 0 0 0 0 0 0999 V2000\n 0.5072 -0.5166 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 0.5072 0.3084 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 1.2949 -0.7616 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n 1.7817 -0.0880 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 1.2967 0.5794 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 1.5558 -1.5443 0.0000 R1 0 0 0 0 0 0 0 0 0 1 0 0\n -0.2073 0.7208 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -0.9218 0.3083 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -0.9217 -0.5167 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -0.2073 -0.9292 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -1.6362 0.7208 0.0000 N 0 0 0 0 0 0 0 0 0 3 0 0\n 1.5452 1.3661 0.0000 N 0 0 0 0 0 0 0 0 0 5 0 0\n 2.3507 1.5443 0.0000 R4 0 0 0 0 0 0 0 0 0 4 0 0\n -2.3507 0.3083 0.0000 R2 0 0 0 0 0 0 0 0 0 2 0 0\n 1 2 2 0 0\n 1 3 1 0 0\n 3 4 1 0 0\n 4 5 1 0 0\n 5 2 1 0 0\n 3 6 1 0 0\n 2 7 1 0 0\n 7 8 2 0 0\n 8 9 1 0 0\n 9 10 2 0 0\n 10 1 1 0 0\n 8 11 1 0 0\n 12 13 1 0 0\n 11 14 1 0 0\n 12 5 1 0 0\nM END\n";
|
||||
|
||||
void testInsaneEnumerations() {
|
||||
EnumerationTypes::BBS bbs;
|
||||
bbs.resize(3);
|
||||
|
||||
ChemicalReaction *rxn2 = RxnBlockToChemicalReaction(rxndata);
|
||||
//RxnOps::sanitizeRxn(*rxn2, MolOps::AdjustQueryParameters());
|
||||
MatchVectType tvect;
|
||||
|
||||
bbs[0].push_back(boost::shared_ptr<ROMol>(SmilesToMol("CCNCC")));
|
||||
bbs[0].push_back(boost::shared_ptr<ROMol>(SmilesToMol("NCC")));
|
||||
std::cerr << "0,0 " << (int)SubstructMatch(*bbs[0][0].get(), *rxn2->getReactants()[0].get(), tvect) << std::endl;
|
||||
std::cerr << "0,1 " << (int)SubstructMatch(*bbs[0][1].get(), *rxn2->getReactants()[0].get(), tvect) << std::endl;
|
||||
|
||||
bbs[1].push_back(boost::shared_ptr<ROMol>(SmilesToMol("ClC1CCC1")));
|
||||
bbs[1].push_back(boost::shared_ptr<ROMol>(SmilesToMol("ClC1CCC1Cl")));
|
||||
std::cerr << "1,0 " << (int)SubstructMatch(*bbs[1][0].get(), *rxn2->getReactants()[1].get(), tvect) << std::endl;
|
||||
std::cerr << "1,1 " << (int)SubstructMatch(*bbs[1][1].get(), *rxn2->getReactants()[1].get(), tvect) << std::endl;
|
||||
|
||||
bbs[2].push_back(boost::shared_ptr<ROMol>(SmilesToMol("CCNCC")));
|
||||
bbs[2].push_back(boost::shared_ptr<ROMol>(SmilesToMol("NCC")));
|
||||
std::cerr << "2,0 " << (int)SubstructMatch(*bbs[2][0].get(), *rxn2->getReactants()[2].get(), tvect) << std::endl;
|
||||
std::cerr << "2,1 " << (int)SubstructMatch(*bbs[2][1].get(), *rxn2->getReactants()[2].get(), tvect) << std::endl;
|
||||
|
||||
|
||||
{
|
||||
ChemicalReaction *rxn = RxnBlockToChemicalReaction(rxndata);
|
||||
RxnOps::sanitizeRxn(*rxn, MolOps::AdjustQueryParameters());
|
||||
std::cerr << ChemicalReactionToRxnBlock(*rxn) << std::endl;
|
||||
EnumerationParams ThereCanBeOnlyOne;
|
||||
ThereCanBeOnlyOne.reagentMaxMatchCount = 1;
|
||||
EnumerationTypes::BBS bbs2 = removeNonmatchingReagents(
|
||||
*rxn, bbs,
|
||||
ThereCanBeOnlyOne);
|
||||
TEST_ASSERT(bbs2[0].size() == 1);
|
||||
TEST_ASSERT(bbs2[1].size() == 1);
|
||||
TEST_ASSERT(bbs2[2].size() == 1);
|
||||
|
||||
delete rxn;
|
||||
}
|
||||
delete rxn2;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
RDLog::InitLogs();
|
||||
bool doLong = false;
|
||||
if (argc > 1) {
|
||||
if (!strncmp(argv[1], "-l", 2)) {
|
||||
doLong = true;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
testSamplers();
|
||||
testEvenSamplers();
|
||||
testEnumerations();
|
||||
*/
|
||||
testInsaneEnumerations();
|
||||
}
|
||||
@@ -89,6 +89,7 @@ bool preprocessReaction(ChemicalReaction &rxn,
|
||||
const std::map<std::string, ROMOL_SPTR> &queries,
|
||||
const std::string &propName) {
|
||||
rxn.setImplicitPropertiesFlag(true);
|
||||
rxn.initReactantMatchers();
|
||||
|
||||
if (rxn.validate(numWarnings, numErrors)) {
|
||||
addRecursiveQueriesToReaction(rxn,
|
||||
|
||||
391
Code/GraphMol/ChemReactions/SanitizeRxn.cpp
Normal file
391
Code/GraphMol/ChemReactions/SanitizeRxn.cpp
Normal file
@@ -0,0 +1,391 @@
|
||||
//
|
||||
// Copyright (c) 2015, Novartis Institutes for BioMedical Research Inc.
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following
|
||||
// disclaimer in the documentation and/or other materials provided
|
||||
// with the distribution.
|
||||
// * Neither the name of Novartis Institutes for BioMedical Research Inc.
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
#include "SanitizeRxn.h"
|
||||
#include <GraphMol/RDKitBase.h>
|
||||
#include <GraphMol/QueryAtom.h>
|
||||
|
||||
namespace RDKit {
|
||||
namespace RxnOps {
|
||||
|
||||
// molAtomMapNumber ==> int
|
||||
// molFileRLabel ==> unsigned int
|
||||
namespace {
|
||||
template<class T>
|
||||
T getMaxProp(ChemicalReaction &rxn, const std::string &prop) {
|
||||
T max_atom = (T)0;
|
||||
for(MOL_SPTR_VECT::iterator it = rxn.beginReactantTemplates();
|
||||
it != rxn.endReactantTemplates();
|
||||
++it) {
|
||||
for (ROMol::AtomIterator atIt = (*it)->beginAtoms();
|
||||
atIt != (*it)->endAtoms();
|
||||
++atIt) {
|
||||
Atom *atom = (*atIt);
|
||||
T map;
|
||||
if (atom->getPropIfPresent<T>(prop, map)) {
|
||||
if (map > max_atom)
|
||||
max_atom = map;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for(MOL_SPTR_VECT::iterator it = rxn.beginAgentTemplates();
|
||||
it != rxn.endAgentTemplates();
|
||||
++it) {
|
||||
for (ROMol::AtomIterator atIt = (*it)->beginAtoms();
|
||||
atIt != (*it)->endAtoms();
|
||||
++atIt) {
|
||||
Atom *atom = (*atIt);
|
||||
T map;
|
||||
if (atom->getPropIfPresent<T>(prop, map)) {
|
||||
if (map > max_atom)
|
||||
max_atom = map;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for(MOL_SPTR_VECT::iterator it = rxn.beginProductTemplates();
|
||||
it != rxn.endProductTemplates();
|
||||
++it) {
|
||||
for (ROMol::AtomIterator atIt = (*it)->beginAtoms();
|
||||
atIt != (*it)->endAtoms();
|
||||
++atIt) {
|
||||
Atom *atom = (*atIt);
|
||||
T map;
|
||||
if (atom->getPropIfPresent<T>(prop, map)) {
|
||||
if (map > max_atom)
|
||||
max_atom = map;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return max_atom;
|
||||
}
|
||||
|
||||
struct AtomInfo {
|
||||
Atom * atom;
|
||||
unsigned int templateIdx;
|
||||
unsigned int rlabel;
|
||||
int atomMap;
|
||||
int isotope;
|
||||
std::string dummyLabel;
|
||||
AtomInfo(Atom *at, unsigned int templateIdx) :
|
||||
atom(at), templateIdx(templateIdx), rlabel(0), atomMap(0),
|
||||
isotope(at->getIsotope()), dummyLabel() {
|
||||
atom->getPropIfPresent(common_properties::_MolFileRLabel, rlabel);
|
||||
atom->getPropIfPresent(common_properties::molAtomMapNumber, atomMap);
|
||||
atom->getPropIfPresent(common_properties::dummyLabel, dummyLabel);
|
||||
//std::cerr << atom->getIdx() << " : " << atom->getAtomicNum() << " " <<
|
||||
// " rgroup: " << rlabel << " atomMap " << atomMap << " isotope " << isotope <<
|
||||
// " label " << dummyLabel <<
|
||||
// std::endl;
|
||||
}
|
||||
|
||||
bool NeedsRLabel() {
|
||||
return atom->getAtomicNum() == 0 && rlabel == 0;
|
||||
}
|
||||
|
||||
unsigned int bestGuessRLabel() {
|
||||
if (rlabel) return rlabel;
|
||||
if (isotope) return isotope;
|
||||
if (atomMap) return atomMap;
|
||||
if (dummyLabel.size()) {
|
||||
try {
|
||||
return boost::lexical_cast<unsigned int>(
|
||||
dummyLabel.substr(1,dummyLabel.size()-1));
|
||||
} catch (...) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void setRLabel(unsigned int rlabel) {
|
||||
PRECONDITION(atom, "Internal error in SanitizeRxn - null atom");
|
||||
RWMol &mol = dynamic_cast<RWMol&>(atom->getOwningMol());
|
||||
|
||||
QueryAtom qatom(*atom);
|
||||
qatom.setProp(common_properties::_MolFileRLabel, rlabel);
|
||||
std::string dLabel = "R" + boost::lexical_cast<std::string>(rlabel);
|
||||
qatom.setProp(common_properties::dummyLabel, dLabel);
|
||||
if (rlabel > 0 && rlabel < 999) {
|
||||
qatom.setIsotope(rlabel);
|
||||
}
|
||||
qatom.setQuery(makeAtomNullQuery());
|
||||
unsigned int idx = atom->getIdx();
|
||||
mol.replaceAtom(idx, &qatom);
|
||||
atom = mol.getAtomWithIdx(idx);
|
||||
}
|
||||
|
||||
void setAtomMap(int map) {
|
||||
atom->setProp(common_properties::molAtomMapNumber, map);
|
||||
}
|
||||
};
|
||||
|
||||
std::string makeReactantErrorMessage(const std::string &error,
|
||||
const AtomInfo &at) {
|
||||
std::ostringstream str;
|
||||
str << error << " for reactant idx: " << at.templateIdx <<
|
||||
" atom: " << at.atom->getIdx();
|
||||
return str.str();
|
||||
}
|
||||
|
||||
std::string makeProductErrorMessage(const std::string &error,
|
||||
const AtomInfo &at) {
|
||||
std::ostringstream str;
|
||||
str << error << " for product idx: " << at.templateIdx <<
|
||||
" atom: " << at.atom->getIdx();
|
||||
return str.str();
|
||||
}
|
||||
}
|
||||
|
||||
// if we have query atoms without rlabels, make proper rlabels if possible
|
||||
// ensure that every rlabel in the reactant has one in the product
|
||||
void fixRGroups(ChemicalReaction &rxn) {
|
||||
std::map<unsigned int, unsigned int> remapped;
|
||||
std::vector<AtomInfo> reactantAtomsToFix;
|
||||
std::vector<AtomInfo> productAtomsToFix;
|
||||
|
||||
unsigned int templateIdx = 0;
|
||||
for(MOL_SPTR_VECT::iterator it = rxn.beginReactantTemplates();
|
||||
it != rxn.endReactantTemplates();
|
||||
++it, ++templateIdx) {
|
||||
for (ROMol::AtomIterator atIt = (*it)->beginAtoms();
|
||||
atIt != (*it)->endAtoms();
|
||||
++atIt) {
|
||||
Atom *atom = (*atIt);
|
||||
AtomInfo at(atom, templateIdx);
|
||||
if (at.NeedsRLabel())
|
||||
reactantAtomsToFix.push_back(at);
|
||||
}
|
||||
}
|
||||
|
||||
templateIdx = 0;
|
||||
for(MOL_SPTR_VECT::iterator it = rxn.beginProductTemplates();
|
||||
it != rxn.endProductTemplates();
|
||||
++it, ++templateIdx) {
|
||||
for (ROMol::AtomIterator atIt = (*it)->beginAtoms();
|
||||
atIt != (*it)->endAtoms();
|
||||
++atIt) {
|
||||
Atom *atom = (*atIt);
|
||||
AtomInfo at(atom, templateIdx);
|
||||
if (at.NeedsRLabel())
|
||||
productAtomsToFix.push_back(at);
|
||||
}
|
||||
}
|
||||
|
||||
if (!reactantAtomsToFix.size() && !productAtomsToFix.size())
|
||||
return;
|
||||
|
||||
if( reactantAtomsToFix.size() != productAtomsToFix.size() ) {
|
||||
std::ostringstream str;
|
||||
str << "Mismatched rlabels: " <<
|
||||
reactantAtomsToFix.size() << " unmapped reactant rlabels," <<
|
||||
productAtomsToFix.size() << " unmappped product rlabels" ;
|
||||
throw RxnSanitizeException(str.str());
|
||||
}
|
||||
|
||||
|
||||
unsigned int max_rlabel = getMaxProp<unsigned int>(rxn,
|
||||
common_properties::_MolFileRLabel);
|
||||
int max_atom_map = getMaxProp<int>(rxn,
|
||||
common_properties::molAtomMapNumber);
|
||||
|
||||
BOOST_FOREACH(AtomInfo &rat, reactantAtomsToFix) {
|
||||
bool found = false;
|
||||
unsigned int bestGuess = rat.bestGuessRLabel();
|
||||
if (!bestGuess) {
|
||||
throw RxnSanitizeException(makeReactantErrorMessage(
|
||||
"Could not deduce RLabel", rat));
|
||||
}
|
||||
|
||||
BOOST_FOREACH(AtomInfo &pat, productAtomsToFix) {
|
||||
if (!pat.atom) continue;
|
||||
|
||||
if(rat.bestGuessRLabel() == pat.bestGuessRLabel()) {
|
||||
// if the atomMaps don't match, this is bad, no atomMap is ok(==0)
|
||||
if (rat.atomMap == pat.atomMap) {
|
||||
found = true;
|
||||
rat.setRLabel( max_rlabel + rat.bestGuessRLabel() );
|
||||
pat.setRLabel( max_rlabel + pat.bestGuessRLabel() );
|
||||
if (!rat.atomMap) { // set atom mapping as well
|
||||
rat.setAtomMap(max_atom_map + rat.bestGuessRLabel());
|
||||
pat.setAtomMap(max_atom_map + rat.bestGuessRLabel());
|
||||
}
|
||||
pat.atom = NULL; // don't match again
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(!found) {
|
||||
throw RxnSanitizeException(makeReactantErrorMessage(
|
||||
"Could not find RLabel mapping", rat));
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// if we have query atoms without rlabels, make proper rlabels if possible
|
||||
// ensure that every rlabel in the reactant has one in the product
|
||||
|
||||
void fixAtomMaps(ChemicalReaction &rxn) {
|
||||
int max_atom_map = getMaxProp<int>(
|
||||
rxn,
|
||||
common_properties::molAtomMapNumber);
|
||||
std::map<unsigned int, int> potential_mappings;
|
||||
|
||||
unsigned int templateIdx = 0;
|
||||
|
||||
for(MOL_SPTR_VECT::iterator it = rxn.beginReactantTemplates();
|
||||
it != rxn.endReactantTemplates();
|
||||
++it, ++templateIdx) {
|
||||
for (ROMol::AtomIterator atIt = (*it)->beginAtoms();
|
||||
atIt != (*it)->endAtoms();
|
||||
++atIt) {
|
||||
Atom *atom = (*atIt);
|
||||
AtomInfo at(atom, templateIdx);
|
||||
if(at.rlabel && !at.atomMap) {
|
||||
if(potential_mappings.find(at.rlabel) != potential_mappings.end()) {
|
||||
throw RxnSanitizeException(std::string("Duplicated RLabels"));
|
||||
}
|
||||
int map = potential_mappings[at.rlabel] = rdcast<int>(at.rlabel)+max_atom_map;
|
||||
at.setAtomMap(map);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (!potential_mappings.size())
|
||||
return; // everything is ok!
|
||||
|
||||
templateIdx = 0;
|
||||
for(MOL_SPTR_VECT::iterator it = rxn.beginProductTemplates();
|
||||
it != rxn.endProductTemplates();
|
||||
++it, ++templateIdx) {
|
||||
for (ROMol::AtomIterator atIt = (*it)->beginAtoms();
|
||||
atIt != (*it)->endAtoms();
|
||||
++atIt) {
|
||||
Atom *atom = (*atIt);
|
||||
AtomInfo at(atom, templateIdx);
|
||||
if(at.rlabel) {
|
||||
if(!at.atomMap) {
|
||||
at.setAtomMap(potential_mappings[at.rlabel]);
|
||||
} else {
|
||||
if (at.atomMap != potential_mappings[at.rlabel]) {
|
||||
throw RxnSanitizeException(makeProductErrorMessage(
|
||||
"RLabel is mapped in product but not in reactant", at));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// might throw mol sanitization exception??? wrap in RxnSanitize?
|
||||
void fixReactantTemplateAromaticity(ChemicalReaction &rxn) {
|
||||
unsigned int ops;
|
||||
for(MOL_SPTR_VECT::iterator it = rxn.beginReactantTemplates();
|
||||
it != rxn.endReactantTemplates();
|
||||
++it) {
|
||||
RWMol * rw = dynamic_cast<RWMol*>(it->get());
|
||||
if (rw)
|
||||
sanitizeMol(*rw, ops, MolOps::SANITIZE_SETAROMATICITY);
|
||||
else
|
||||
PRECONDITION(rw, "Oops, not really a RWMol?");
|
||||
}
|
||||
}
|
||||
|
||||
void fixHs(ChemicalReaction &rxn) {
|
||||
const bool mergeUnmappedOnly = true;
|
||||
for(MOL_SPTR_VECT::iterator it = rxn.beginReactantTemplates();
|
||||
it != rxn.endReactantTemplates();
|
||||
++it) {
|
||||
RWMol * rw = dynamic_cast<RWMol*>(it->get());
|
||||
if (rw)
|
||||
MolOps::mergeQueryHs(*rw, mergeUnmappedOnly);
|
||||
else
|
||||
PRECONDITION(rw, "Oops, not really a RWMol?");
|
||||
}
|
||||
}
|
||||
|
||||
void adjustTemplates(ChemicalReaction &rxn,
|
||||
const MolOps::AdjustQueryParameters ¶ms) {
|
||||
if(!params.adjustDegree && !params.adjustRingCount) {
|
||||
return;
|
||||
}
|
||||
|
||||
for(MOL_SPTR_VECT::iterator it = rxn.beginReactantTemplates();
|
||||
it != rxn.endReactantTemplates();
|
||||
++it) {
|
||||
RWMol * rw = dynamic_cast<RWMol*>(it->get());
|
||||
if (rw)
|
||||
adjustQueryProperties(*rw, ¶ms);
|
||||
else
|
||||
PRECONDITION(rw, "Oops, not really a RWMol?");
|
||||
}
|
||||
}
|
||||
void sanitizeRxn(ChemicalReaction &rxn,
|
||||
unsigned int &operationsThatFailed,
|
||||
unsigned int ops,
|
||||
const MolOps::AdjustQueryParameters ¶ms)
|
||||
{
|
||||
operationsThatFailed = SANITIZE_NONE;
|
||||
|
||||
if (ops & SANITIZE_RGROUP_NAMES) {
|
||||
operationsThatFailed = SANITIZE_RGROUP_NAMES;
|
||||
fixRGroups(rxn);
|
||||
}
|
||||
|
||||
if (ops & SANITIZE_ATOM_MAPS) {
|
||||
operationsThatFailed = SANITIZE_ATOM_MAPS;
|
||||
fixAtomMaps(rxn);
|
||||
}
|
||||
|
||||
if (ops & SANITIZE_ADJUST_REACTANTS) {
|
||||
operationsThatFailed = SANITIZE_ADJUST_REACTANTS;
|
||||
adjustTemplates(rxn, params);
|
||||
}
|
||||
if (ops & SANITIZE_MERGEHS) {
|
||||
operationsThatFailed = SANITIZE_MERGEHS;
|
||||
fixHs(rxn);
|
||||
}
|
||||
|
||||
operationsThatFailed = SANITIZE_NONE;
|
||||
}
|
||||
|
||||
void sanitizeRxn(ChemicalReaction &rxn, const MolOps::AdjustQueryParameters ¶ms) {
|
||||
unsigned int ops = 0;
|
||||
return sanitizeRxn(rxn, ops, SANITIZE_ALL, params);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
150
Code/GraphMol/ChemReactions/SanitizeRxn.h
Normal file
150
Code/GraphMol/ChemReactions/SanitizeRxn.h
Normal file
@@ -0,0 +1,150 @@
|
||||
//
|
||||
// Copyright (c) 2015, Novartis Institutes for BioMedical Research Inc.
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following
|
||||
// disclaimer in the documentation and/or other materials provided
|
||||
// with the distribution.
|
||||
// * Neither the name of Novartis Institutes for BioMedical Research Inc.
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
#ifndef RDKIT_SANITIZERXN_H
|
||||
#define RDKIT_SANITIZERXN_H
|
||||
|
||||
#include "Reaction.h"
|
||||
#include <GraphMol/MolOps.h>
|
||||
#include <string>
|
||||
#include <exception>
|
||||
|
||||
namespace RDKit {
|
||||
|
||||
//! class for flagging sanitization errors
|
||||
class RxnSanitizeException : public std::exception {
|
||||
public:
|
||||
RxnSanitizeException(const char *msg) : _msg(msg){};
|
||||
RxnSanitizeException(const std::string &msg) : _msg(msg){};
|
||||
const char *message() const { return _msg.c_str(); };
|
||||
~RxnSanitizeException() throw(){};
|
||||
|
||||
private:
|
||||
std::string _msg;
|
||||
};
|
||||
|
||||
|
||||
namespace RxnOps {
|
||||
//! Any dummy atom with a map but no RGroup label, should be an RGroup
|
||||
//! in RDKit's view of a reaction.
|
||||
//! See if these atoms can be salvaged into RGroups.
|
||||
void fixRGroups(ChemicalReaction &rxn);
|
||||
|
||||
//! If atom maps are not defined on rgroups, attempt to deduce them from the RGroup
|
||||
//! labels, or add new ones if possible.
|
||||
void fixAtomMaps(ChemicalReaction &rxn);
|
||||
|
||||
|
||||
//! Adjusts the reactant templates to properly match reagents
|
||||
void adjustTemplates(ChemicalReaction &rxn, const MolOps::AdjustQueryParameters ¶ms);
|
||||
|
||||
//! merge query Hs if appropriate
|
||||
void fixHs(ChemicalReaction &rxn);
|
||||
|
||||
// Default adjustment parameters for matching reagents
|
||||
inline const MolOps::AdjustQueryParameters DefaultRxnAdjustParams() {
|
||||
MolOps::AdjustQueryParameters params;
|
||||
params.adjustDegree = false;
|
||||
params.adjustDegreeFlags = MolOps::ADJUST_IGNOREALL;
|
||||
params.adjustRingCount = false;
|
||||
params.adjustRingCountFlags = MolOps::ADJUST_IGNOREALL;
|
||||
params.makeDummiesQueries = false;
|
||||
params.aromatizeIfPossible = true;
|
||||
return params;
|
||||
}
|
||||
|
||||
// Default adjustment parameters for ChemDraw style matching of reagents
|
||||
inline const MolOps::AdjustQueryParameters ChemDrawRxnAdjustParams() {
|
||||
MolOps::AdjustQueryParameters params;
|
||||
params.adjustDegree = true;
|
||||
params.adjustDegreeFlags = MolOps::ADJUST_IGNOREDUMMIES;
|
||||
params.adjustRingCount = false;
|
||||
params.adjustRingCountFlags = MolOps::ADJUST_IGNORENONE;
|
||||
params.makeDummiesQueries = false;
|
||||
params.aromatizeIfPossible = true;
|
||||
return params;
|
||||
}
|
||||
|
||||
typedef enum {
|
||||
SANITIZE_NONE = 0x0,
|
||||
SANITIZE_RGROUP_NAMES = 0x1,
|
||||
SANITIZE_ATOM_MAPS = 0x2,
|
||||
SANITIZE_ADJUST_REACTANTS = 0x4,
|
||||
SANITIZE_MERGEHS = 0x8,
|
||||
SANITIZE_ALL = 0xFFFFFFFF
|
||||
} SanitizeRxnFlags;
|
||||
|
||||
//! \brief carries out a collection of tasks for cleaning up a reaction and
|
||||
// ensuring
|
||||
//! that it makes "chemical sense" in the context of RDKit reacitons
|
||||
/*!
|
||||
This functions calls the following in sequence
|
||||
-# RxnOps::fixRGroups()
|
||||
-# RxnOps::fixupAtomMaps()
|
||||
-# RxnOps::fixupTemplateAromaticity()
|
||||
-# RxnOps::mergeHs()
|
||||
|
||||
\param rxn : the ChemicalReaction to be cleaned
|
||||
|
||||
\param operationThatFailed : the first (if any) sanitization operation that
|
||||
fails is set here.
|
||||
The values are taken from the \c SanitizeFlags
|
||||
enum.
|
||||
On success, the value is \c
|
||||
SanitizeFlags::SANITIZE_NONE
|
||||
|
||||
\param sanitizeOps : the bits here are used to set which sanitization
|
||||
operations are carried
|
||||
out. The elements of the \c SanitizeFlags enum define
|
||||
the operations.
|
||||
|
||||
<b>Notes:</b>
|
||||
- This attempts to fix known issues with certain reaction drawers.
|
||||
HOWEVER, if any flag is returned in operationsPerformed,
|
||||
the reaction may still be suspect to its validity.
|
||||
- Aromaticity can be tricky when starting with Kekule structures that
|
||||
have query features, aromaticity works well for non-query rings, however
|
||||
certain structures (substitutions on Kekule rings that should really be
|
||||
aromatic) may not have enough information.
|
||||
*/
|
||||
|
||||
void sanitizeRxn(ChemicalReaction &rxn,
|
||||
unsigned int &operationsThatFailed,
|
||||
unsigned int sanitizeOps = SANITIZE_ALL,
|
||||
const MolOps::AdjustQueryParameters ¶ms = DefaultRxnAdjustParams());
|
||||
//! \overload
|
||||
void sanitizeRxn(ChemicalReaction &rxn,
|
||||
const MolOps::AdjustQueryParameters ¶ms = DefaultRxnAdjustParams());
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -1,9 +1,15 @@
|
||||
rdkit_python_extension(rdChemReactions
|
||||
Enumerate.cpp
|
||||
rdChemReactions.cpp
|
||||
DEST Chem
|
||||
LINK_LIBRARIES
|
||||
ChemReactions ChemTransforms Descriptors Fingerprints Subgraphs DataStructs Depictor FileParsers SmilesParse SubstructMatch GraphMol Catalogs FilterCatalog RDGeneral RDGeometryLib RDBoost)
|
||||
ChemReactions FilterCatalog ChemTransforms Descriptors Fingerprints Subgraphs DataStructs Depictor FileParsers SmilesParse SubstructMatch GraphMol Catalogs FilterCatalog RDGeneral RDGeometryLib RDBoost ${Boost_SERIALIZATION_LIBRARY})
|
||||
|
||||
add_pytest(pyChemReactions
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/testReactionWrapper.py)
|
||||
|
||||
add_pytest(pyChemReactionEnumerations
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/testEnumerations.py)
|
||||
|
||||
add_pytest(pyChemReactionSanitize
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/testSanitize.py)
|
||||
|
||||
435
Code/GraphMol/ChemReactions/Wrap/Enumerate.cpp
Normal file
435
Code/GraphMol/ChemReactions/Wrap/Enumerate.cpp
Normal file
@@ -0,0 +1,435 @@
|
||||
//
|
||||
// Copyright (c) 2015, Novartis Institutes for BioMedical Research Inc.
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following
|
||||
// disclaimer in the documentation and/or other materials provided
|
||||
// with the distribution.
|
||||
// * Neither the name of Novartis Institutues for BioMedical Research Inc.
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
#include <boost/python.hpp>
|
||||
#include <RDBoost/Wrap.h>
|
||||
#include <GraphMol/ChemReactions/Enumerate/RandomSample.h>
|
||||
#include <GraphMol/ChemReactions/Enumerate/RandomSampleAllBBs.h>
|
||||
#include <GraphMol/ChemReactions/Enumerate/EvenSamplePairs.h>
|
||||
#include <GraphMol/ChemReactions/Enumerate/Enumerate.h>
|
||||
#include <boost/python/stl_iterator.hpp>
|
||||
|
||||
namespace python = boost::python;
|
||||
|
||||
|
||||
namespace RDKit {
|
||||
|
||||
template<class T>
|
||||
std::vector<RDKit::MOL_SPTR_VECT> ConvertToVect(T bbs) {
|
||||
std::vector<RDKit::MOL_SPTR_VECT> vect;
|
||||
size_t num_bbs = python::extract<unsigned int>(bbs.attr("__len__")());
|
||||
vect.resize(num_bbs);
|
||||
for(size_t i=0; i<num_bbs; ++i) {
|
||||
unsigned int len1 = python::extract<unsigned int>(bbs[i].attr("__len__")());
|
||||
RDKit::MOL_SPTR_VECT &reacts = vect[i];
|
||||
reacts.reserve(len1);
|
||||
for(unsigned int j=0;j<len1;++j){
|
||||
RDKit::ROMOL_SPTR mol = python::extract<RDKit::ROMOL_SPTR>(bbs[i][j]);
|
||||
if(mol)
|
||||
reacts.push_back(mol);
|
||||
else {
|
||||
throw_value_error("reaction called with non molecule reactant");
|
||||
}
|
||||
}
|
||||
}
|
||||
return vect;
|
||||
}
|
||||
|
||||
|
||||
bool EnumerateLibraryBase__nonzero__(RDKit::EnumerateLibraryBase *base) {
|
||||
return static_cast<bool>(*base);
|
||||
}
|
||||
bool EnumerationStrategyBase__nonzero__(RDKit::EnumerationStrategyBase *base) {
|
||||
return static_cast<bool>(*base);
|
||||
}
|
||||
|
||||
inline python::object pass_through(python::object const& o) { return o; }
|
||||
|
||||
PyObject *EnumerateLibraryBase__next__(RDKit::EnumerateLibraryBase *base) {
|
||||
if (!static_cast<bool>(*base)) {
|
||||
PyErr_SetString(PyExc_StopIteration, "Enumerations exhausted");
|
||||
boost::python::throw_error_already_set();
|
||||
}
|
||||
std::vector<RDKit::MOL_SPTR_VECT> mols;
|
||||
{
|
||||
NOGIL gil;
|
||||
mols = base->next();
|
||||
}
|
||||
PyObject *res=PyTuple_New(mols.size());
|
||||
|
||||
for(unsigned int i=0;i<mols.size();++i){
|
||||
PyObject *lTpl =PyTuple_New(mols[i].size());
|
||||
for(unsigned int j=0;j<mols[i].size();++j){
|
||||
PyTuple_SetItem(lTpl,j,
|
||||
python::converter::shared_ptr_to_python(mols[i][j]));
|
||||
}
|
||||
PyTuple_SetItem(res,i,lTpl);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
python::object EnumerateLibraryBase_Serialize(const EnumerateLibraryBase &en) {
|
||||
std::string res = en.Serialize();
|
||||
python::object retval = python::object(
|
||||
python::handle<>(PyBytes_FromStringAndSize(res.c_str(), res.length())));
|
||||
return retval;
|
||||
}
|
||||
|
||||
class EnumerateLibraryWrap : public RDKit::EnumerateLibrary {
|
||||
public:
|
||||
EnumerateLibraryWrap() : RDKit::EnumerateLibrary() {}
|
||||
EnumerateLibraryWrap(const RDKit::ChemicalReaction &rxn, python::list ob,
|
||||
const EnumerationParams & params = EnumerationParams()
|
||||
) :
|
||||
RDKit::EnumerateLibrary(rxn, ConvertToVect(ob), params) {
|
||||
}
|
||||
|
||||
EnumerateLibraryWrap(const RDKit::ChemicalReaction &rxn, python::tuple ob,
|
||||
const EnumerationParams & params = EnumerationParams()
|
||||
) :
|
||||
RDKit::EnumerateLibrary(rxn, ConvertToVect(ob), params) {
|
||||
}
|
||||
|
||||
EnumerateLibraryWrap(const RDKit::ChemicalReaction &rxn, python::list ob,
|
||||
const EnumerationStrategyBase &enumerator,
|
||||
const EnumerationParams & params = EnumerationParams()
|
||||
) :
|
||||
RDKit::EnumerateLibrary(rxn, ConvertToVect(ob), enumerator, params) {
|
||||
}
|
||||
|
||||
EnumerateLibraryWrap(const RDKit::ChemicalReaction &rxn, python::tuple ob,
|
||||
const EnumerationStrategyBase &enumerator,
|
||||
const EnumerationParams & params = EnumerationParams()) :
|
||||
RDKit::EnumerateLibrary(rxn, ConvertToVect(ob), enumerator, params) {
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
namespace {
|
||||
template< typename T >
|
||||
inline
|
||||
std::vector< T > to_std_vector( const python::object& iterable )
|
||||
{
|
||||
return std::vector< T >( python::stl_input_iterator< T >( iterable ),
|
||||
python::stl_input_iterator< T >( ) );
|
||||
}
|
||||
}
|
||||
|
||||
void ToBBS(EnumerationStrategyBase &rgroup, ChemicalReaction &rxn, python::list ob) {
|
||||
rgroup.initialize(rxn, ConvertToVect(ob));
|
||||
}
|
||||
|
||||
typedef std::vector<uint64_t> VectSizeT;
|
||||
typedef std::vector<std::vector<std::string> > VectStringVect;
|
||||
typedef std::vector<MOL_SPTR_VECT > VectMolVect;
|
||||
|
||||
struct enumeration_wrapper {
|
||||
static void wrap() {
|
||||
std::string docString;
|
||||
python::class_<VectStringVect>("VectorOfStringVectors")
|
||||
.def(python::vector_indexing_suite<VectStringVect, false>() );
|
||||
|
||||
python::class_<VectSizeT>("VectSizeT")
|
||||
.def(python::vector_indexing_suite<VectSizeT, false>() );
|
||||
|
||||
python::class_<VectMolVect>("VectMolVect")
|
||||
.def(python::vector_indexing_suite<VectMolVect, false>() );
|
||||
|
||||
python::class_<RDKit::EnumerateLibraryBase, RDKit::EnumerateLibraryBase *,
|
||||
RDKit::EnumerateLibraryBase &, boost::noncopyable>(
|
||||
"EnumerateLibraryBase", python::no_init)
|
||||
.def("__nonzero__", &EnumerateLibraryBase__nonzero__)
|
||||
.def("__bool__", &EnumerateLibraryBase__nonzero__)
|
||||
.def("__iter__", &pass_through)
|
||||
.def("next", &EnumerateLibraryBase__next__,
|
||||
"Return the next molecule from the enumeration.")
|
||||
.def("__next__", &EnumerateLibraryBase__next__,
|
||||
"Return the next molecule from the enumeration.")
|
||||
.def("nextSmiles", &RDKit::EnumerateLibraryBase::nextSmiles,
|
||||
"Return the next smiles string from the enumeration.")
|
||||
.def("Serialize", &EnumerateLibraryBase_Serialize,
|
||||
"Serialize the library to a binary string.\n"
|
||||
"Note that the position in the library is serialized as well. Care should\n"
|
||||
"be taken when serializing. See GetState/SetState for position manipulation.")
|
||||
.def("InitFromString", &RDKit::EnumerateLibraryBase::initFromString,
|
||||
python::arg("data"),
|
||||
"Inititialize the library from a binary string")
|
||||
.def("GetPosition", &RDKit::EnumerateLibraryBase::getPosition,
|
||||
"Returns the current enumeration position into the reagent vectors",
|
||||
python::return_internal_reference<
|
||||
1, python::with_custodian_and_ward_postcall<0, 1> >())
|
||||
.def("GetState", &RDKit::EnumerateLibraryBase::getState,
|
||||
"Returns the current enumeration state (position) of the library.\n"
|
||||
"This position can be used to restart the library from a known position")
|
||||
.def("SetState", &RDKit::EnumerateLibraryBase::setState,
|
||||
python::arg("state"),
|
||||
"Sets the enumeration state (position) of the library.")
|
||||
.def("ResetState", &RDKit::EnumerateLibraryBase::resetState,
|
||||
"Returns the current enumeration state (position) of the library to the start.")
|
||||
.def("GetReaction", &RDKit::EnumerateLibraryBase::getReaction,
|
||||
"Returns the chemical reaction for this library",
|
||||
python::return_internal_reference<
|
||||
1, python::with_custodian_and_ward_postcall<0, 1> >())
|
||||
.def("GetEnumerator", &RDKit::EnumerateLibraryBase::getEnumerator,
|
||||
"Returns the enumation strategy for the current library",
|
||||
python::return_internal_reference<
|
||||
1, python::with_custodian_and_ward_postcall<0, 1> >());
|
||||
|
||||
docString = \
|
||||
"EnumerationParams\n\
|
||||
Controls some aspects of how the enumeration is performed.\n\
|
||||
Options:\n\
|
||||
reagentMaxMatchCount [ default Infinite ]\n\
|
||||
This specifies how many times the reactant template can match a reagent.\n\
|
||||
\n\
|
||||
sanePartialProducts [default false]\n\
|
||||
If true, forces all products of the reagent plus the product templates\n\
|
||||
pass chemical sanitization. Note that if the product template itself\n\
|
||||
does not pass sanitization, then none of the products will.\n\
|
||||
";
|
||||
|
||||
python::class_<RDKit::EnumerationParams,
|
||||
RDKit::EnumerationParams*,
|
||||
RDKit::EnumerationParams&>("EnumerationParams",
|
||||
docString.c_str(),
|
||||
python::init<>())
|
||||
.def_readwrite("reagentMaxMatchCount",
|
||||
&RDKit::EnumerationParams::reagentMaxMatchCount)
|
||||
.def_readwrite("sanePartialProducts",
|
||||
&RDKit::EnumerationParams::sanePartialProducts);
|
||||
|
||||
docString = \
|
||||
"EnumerateLibrary\n\
|
||||
This class allows easy enumeration of reactions. Simply provide a reaction\n\
|
||||
and a set of reagents and you are off the the races.\n\
|
||||
\n\
|
||||
EnumerateLibrary follows the python enumerator protocol, for example:\n\
|
||||
\n\
|
||||
library = EnumerateLibrary(rxn, bbs)\n\
|
||||
for products in library:\n\
|
||||
... do something with the product\n\
|
||||
\n\
|
||||
It is useful to sanitize reactions before hand:\n\
|
||||
\n\
|
||||
SanitizeRxn(rxn)\n\
|
||||
library = EnumerateLibrary(rxn, bbs)\n\
|
||||
\n\
|
||||
If ChemDraw style reaction semantics are prefereed, you can apply\n\
|
||||
the ChemDraw parameters:\n\
|
||||
\n\
|
||||
SanitizeRxn(rxn, params=GetChemDrawRxnAdjustParams())\n\
|
||||
\n\
|
||||
For one, this enforces only matching RGroups and assumes all atoms\n\
|
||||
have fully satisfied valences.\n\
|
||||
\n\
|
||||
Each product has the same output as applying a set of reagents to\n\
|
||||
the libraries reaction.\n\
|
||||
\n\
|
||||
This can be a bit confusing as each product can have multiple molecules\n\
|
||||
generated. The returned data structure is as follows:\n\
|
||||
\n\
|
||||
[ [products1], [products2],... ]\n\
|
||||
Where products1 are the molecule products for the reactions first product\n\
|
||||
template and products2 are the molecule products for the second product\n\
|
||||
template. Since each reactant can match more than once, there may be\n\
|
||||
multiple product molecules for each template.\n\
|
||||
\n\
|
||||
for result in library:\n\
|
||||
for results_for_product_template in products:\n\
|
||||
for mol in results_for_product_template:\n\
|
||||
Chem.MolToSmiles(mol) # finally have a molecule!\n\
|
||||
\n\
|
||||
For sufficiently large libraries, using this iteration strategy is not\n\
|
||||
recommended as the library may contain more products than atoms in the\n\
|
||||
universe. To help with this, you can supply an enumeration strategy.\n\
|
||||
The default strategy is a CartesianProductStrategy which enumerates\n\
|
||||
everything. RandomSampleStrategy randomly samples the products but\n\
|
||||
this strategy never terminates, however, python supplies itertools:\n\
|
||||
\n\
|
||||
import itertools\n\
|
||||
library = EnumerateLibrary(rxn, bbs, rdChemReactions.RandomSampleStrategy())\n\
|
||||
for result in itertools.islice(libary, 1000):\n\
|
||||
# do something with the first 1000 samples\n\
|
||||
\n\
|
||||
for result in itertools.islice(libary, 1000):\n\
|
||||
# do something with the next 1000 samples\n\
|
||||
\n\
|
||||
Libraries are also serializable, including their current state:\n\
|
||||
\n\
|
||||
s = library.Serialize()\n\
|
||||
library2 = EnumerateLibrary()\n\
|
||||
library2.InitFromString(s)\n\
|
||||
for result in itertools.islice(libary2, 1000):\n\
|
||||
# do something with the next 1000 samples\n\
|
||||
";
|
||||
python::class_<EnumerateLibraryWrap,
|
||||
EnumerateLibraryWrap*,EnumerateLibraryWrap&,
|
||||
python::bases<RDKit::EnumerateLibraryBase> >(
|
||||
"EnumerateLibrary", docString.c_str(),
|
||||
python::init<>())
|
||||
.def(python::init<
|
||||
const RDKit::ChemicalReaction &,
|
||||
python::list,
|
||||
python::optional<const RDKit::EnumerationParams&>
|
||||
>(python::args("rxn", "reagents", "params")))
|
||||
.def(python::init<
|
||||
const RDKit::ChemicalReaction &,
|
||||
python::tuple,
|
||||
python::optional<const RDKit::EnumerationParams&>
|
||||
>(python::args("rxn", "reagents", "params")))
|
||||
|
||||
.def(python::init<const RDKit::ChemicalReaction &,
|
||||
python::list,
|
||||
const RDKit::EnumerationStrategyBase &,
|
||||
python::optional<const RDKit::EnumerationParams&>
|
||||
>(python::args(
|
||||
"rxn", "reagents", "enumerator", "params")))
|
||||
.def(python::init<const RDKit::ChemicalReaction &,
|
||||
python::tuple,
|
||||
const RDKit::EnumerationStrategyBase &,
|
||||
python::optional<const RDKit::EnumerationParams&>
|
||||
>(python::args(
|
||||
"rxn", "reagents", "enumerator", "params")))
|
||||
|
||||
.def("GetReagents", &RDKit::EnumerateLibrary::getReagents,
|
||||
"Return the reagents used in this library.",
|
||||
python::return_internal_reference<
|
||||
1, python::with_custodian_and_ward_postcall<0, 1> >())
|
||||
;
|
||||
|
||||
//iterator_wrappers<EnumerateLibrary>().wrap("EnumerateLibraryIterator");
|
||||
|
||||
python::class_<RDKit::EnumerationStrategyBase,
|
||||
RDKit::EnumerationStrategyBase *,
|
||||
RDKit::EnumerationStrategyBase &, boost::noncopyable>(
|
||||
"EnumerationStrategyBase", python::no_init)
|
||||
.def("__nonzero__", &EnumerationStrategyBase__nonzero__)
|
||||
.def("__bool__", &EnumerationStrategyBase__nonzero__)
|
||||
.def("Type", &EnumerationStrategyBase::type,
|
||||
"Returns the enumeration strategy type as a string.")
|
||||
.def("Skip", &EnumerationStrategyBase::skip,
|
||||
python::args("skipCount"),
|
||||
"Skip the next Nth results. note: this may be an expensive "
|
||||
"operation\n"
|
||||
"depending on the enumeration strategy used. It is recommended to "
|
||||
"use\n"
|
||||
"the enumerator state to advance to a known position")
|
||||
.def("__copy__", python::pure_virtual(&EnumerationStrategyBase::copy),
|
||||
python::return_value_policy<python::manage_new_object>())
|
||||
.def("GetNumPermutations", &EnumerationStrategyBase::getNumPermutations,
|
||||
"Returns the total number of results for this enumeration strategy.\n"
|
||||
"Note that some strategies are effectively infinite.")
|
||||
.def("GetPosition", &EnumerationStrategyBase::getPosition,
|
||||
"Return the current indices into the arrays of reagents",
|
||||
python::return_internal_reference<
|
||||
1, python::with_custodian_and_ward_postcall<0, 1> >())
|
||||
.def("next", python::pure_virtual(&EnumerationStrategyBase::next),
|
||||
"Return the next indices into the arrays of reagents",
|
||||
python::return_internal_reference<
|
||||
1, python::with_custodian_and_ward_postcall<0, 1> >())
|
||||
.def("__next__", python::pure_virtual(&EnumerationStrategyBase::next),
|
||||
"Return the next indices into the arrays of reagents",
|
||||
python::return_internal_reference<
|
||||
1, python::with_custodian_and_ward_postcall<0, 1> >())
|
||||
.def("Initialize", ToBBS);
|
||||
|
||||
docString = "CartesianProductStrategy produces a standard walk through all possible\n"
|
||||
"reagent combinations:\n"
|
||||
"\n"
|
||||
"(0,0,0), (1,0,0), (2,0,0) ...\n";
|
||||
|
||||
python::class_<RDKit::CartesianProductStrategy,
|
||||
RDKit::CartesianProductStrategy*,
|
||||
RDKit::CartesianProductStrategy&,
|
||||
python::bases<EnumerationStrategyBase> >("CartesianProductStrategy",
|
||||
docString.c_str(),
|
||||
python::init<>())
|
||||
.def("__copy__", &RDKit::CartesianProductStrategy::copy,
|
||||
python::return_value_policy<python::manage_new_object>())
|
||||
;
|
||||
|
||||
docString = "RandomSampleStrategy simply randomly samples from the reagent sets.\n"
|
||||
"Note that this strategy never halts and can produce duplicates.";
|
||||
python::class_<RDKit::RandomSampleStrategy,
|
||||
RDKit::RandomSampleStrategy*,
|
||||
RDKit::RandomSampleStrategy&,
|
||||
python::bases<EnumerationStrategyBase> >("RandomSampleStrategy",
|
||||
docString.c_str(),
|
||||
python::init<>())
|
||||
.def("__copy__", &RDKit::RandomSampleStrategy::copy,
|
||||
python::return_value_policy<python::manage_new_object>())
|
||||
;
|
||||
|
||||
docString = "RandomSampleAllBBsStrategy randomly samples from the reagent sets\n"
|
||||
"with the constraint that all building blocks are samples as early as possible.\n"
|
||||
"Note that this strategy never halts and can produce duplicates.";
|
||||
python::class_<RDKit::RandomSampleAllBBsStrategy,
|
||||
RDKit::RandomSampleAllBBsStrategy*,
|
||||
RDKit::RandomSampleAllBBsStrategy&,
|
||||
python::bases<EnumerationStrategyBase> >("RandomSampleAllBBsStrategy",
|
||||
docString.c_str(),
|
||||
python::init<>())
|
||||
.def("__copy__", &RDKit::RandomSampleAllBBsStrategy::copy,
|
||||
python::return_value_policy<python::manage_new_object>())
|
||||
;
|
||||
|
||||
docString = "Randomly sample Pairs evenly from a collection of building blocks\n"
|
||||
"This is a good strategy for choosing a relatively small selection\n"
|
||||
"of building blocks from a larger set. As the amount of work needed\n"
|
||||
"to retrieve the next evenly sample building block grows with the\n"
|
||||
"number of samples, this method performs progressively worse as the\n"
|
||||
"number of samples gets larger.\n"
|
||||
"See EnumerationStrategyBase for more details.\n";
|
||||
|
||||
python::class_<RDKit::EvenSamplePairsStrategy,
|
||||
RDKit::EvenSamplePairsStrategy*,
|
||||
RDKit::EvenSamplePairsStrategy&,
|
||||
python::bases<EnumerationStrategyBase> >("EvenSamplePairsStrategy",
|
||||
docString.c_str(),
|
||||
python::init<>())
|
||||
.def("__copy__", &RDKit::EvenSamplePairsStrategy::copy,
|
||||
python::return_value_policy<python::manage_new_object>())
|
||||
.def("Stats", &RDKit::EvenSamplePairsStrategy::stats,
|
||||
"Return the a statisics log of the pairs used in the current enumeration.")
|
||||
;
|
||||
|
||||
python::def("EnumerateLibraryCanSerialize", EnumerateLibraryCanSerialize,
|
||||
"Returns True if the EnumerateLibrary is serializable "
|
||||
"(requires boost serialization");
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
}// end of namespace
|
||||
|
||||
void wrap_enumeration() {
|
||||
RDKit::enumeration_wrapper::wrap();
|
||||
}
|
||||
|
||||
@@ -36,6 +36,7 @@
|
||||
#include <GraphMol/ChemReactions/ReactionParser.h>
|
||||
#include <GraphMol/ChemReactions/ReactionRunner.h>
|
||||
#include <GraphMol/ChemReactions/PreprocessRxn.h>
|
||||
#include <GraphMol/ChemReactions/SanitizeRxn.h>
|
||||
#include <GraphMol/Depictor/DepictUtils.h>
|
||||
#include <GraphMol/FilterCatalog/FunctionalGroupHierarchy.h>
|
||||
|
||||
@@ -328,7 +329,7 @@ python::object AddRecursiveQueriesToReaction(ChemicalReaction &self,
|
||||
python::object PreprocessReaction(ChemicalReaction &reaction,
|
||||
python::dict queryDict,
|
||||
std::string propName) {
|
||||
|
||||
|
||||
// transform dictionary into map
|
||||
std::map<std::string, ROMOL_SPTR> queries;
|
||||
unsigned int size = python::extract<unsigned int>(queryDict.keys().attr("__len__")());
|
||||
@@ -353,11 +354,11 @@ python::object PreprocessReaction(ChemicalReaction &reaction,
|
||||
reaction.validate(nWarn, nError);
|
||||
std::vector<
|
||||
std::vector<std::pair<unsigned int,std::string> > > labels;
|
||||
|
||||
|
||||
if (!nError) {
|
||||
preprocessReaction(reaction, nWarn, nError, labels, queries, propName);
|
||||
}
|
||||
|
||||
|
||||
// transform labels into python::tuple(python::tuple(python::tuple))
|
||||
python::list reactantLabels;
|
||||
for (unsigned int i = 0; i < labels.size(); ++i) {
|
||||
@@ -374,6 +375,30 @@ python::object PreprocessReaction(ChemicalReaction &reaction,
|
||||
python::tuple(reactantLabels));
|
||||
|
||||
}
|
||||
#ifdef RDK_32BIT_BUILD
|
||||
typedef int sanitize_ops;
|
||||
#else
|
||||
typedef unsigned int sanitize_ops;
|
||||
#endif
|
||||
|
||||
RxnOps::SanitizeRxnFlags sanitizeReaction(
|
||||
ChemicalReaction &rxn,
|
||||
sanitize_ops sanitizeOps,
|
||||
const MolOps::AdjustQueryParameters ¶ms,
|
||||
bool catchErrors) {
|
||||
unsigned int operationsThatFailed = 0;
|
||||
try {
|
||||
RxnOps::sanitizeRxn(rxn, operationsThatFailed, sanitizeOps, params);
|
||||
} catch(...) {
|
||||
if (!catchErrors)
|
||||
throw;
|
||||
}
|
||||
return static_cast<RxnOps::SanitizeRxnFlags>(operationsThatFailed);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void wrap_enumeration();
|
||||
|
||||
BOOST_PYTHON_MODULE(rdChemReactions) {
|
||||
python::scope().attr("__doc__") =
|
||||
@@ -661,7 +686,7 @@ of the replacements argument.",
|
||||
"Caution: This is an expert-user function which will change a property (molInversionFlag) of your products.\
|
||||
This function is called by default using the RXN or SMARTS parser for reactions and should really only be called if reactions have been constructed some other way.\
|
||||
The function updates the stereochemistry of the product by considering 4 different cases: inversion, retention, removal, and introduction");
|
||||
|
||||
|
||||
python::def(
|
||||
"ReduceProductToSideChains", RDKit::reduceProductToSideChains,
|
||||
(python::arg("product"), python::arg("addDummyAtoms") = true),
|
||||
@@ -669,7 +694,7 @@ of the replacements argument.",
|
||||
The output is a molecule with attached wildcards indicating where the product was attached.\
|
||||
The dummy atom has the same reaction-map number as the product atom (if available).",
|
||||
python::return_value_policy<python::manage_new_object>());
|
||||
|
||||
|
||||
python::def("RemoveMappingNumbersFromReactions",
|
||||
RDKit::removeMappingNumbersFromReactions,
|
||||
(python::arg("reaction")),
|
||||
@@ -774,10 +799,36 @@ Sample Usage:\n\
|
||||
True\n\
|
||||
";
|
||||
|
||||
python::def("PreprocessReaction", PreprocessReaction,
|
||||
python::def("PreprocessReaction", RDKit::PreprocessReaction,
|
||||
(python::arg("reaction"),
|
||||
python::arg("queries")=python::dict(),
|
||||
python::arg("propName")=common_properties::molFileValue),
|
||||
python::arg("propName")=RDKit::common_properties::molFileValue),
|
||||
docString.c_str());
|
||||
}
|
||||
|
||||
python::enum_<RDKit::RxnOps::SanitizeRxnFlags>("SanitizeFlags")
|
||||
.value("SANITIZE_NONE", RDKit::RxnOps::SANITIZE_NONE)
|
||||
.value("SANITIZE_ATOM_MAPS", RDKit::RxnOps::SANITIZE_ATOM_MAPS)
|
||||
.value("SANITIZE_RGROUP_NAMES", RDKit::RxnOps::SANITIZE_RGROUP_NAMES)
|
||||
.value("SANITIZE_ADJUST_REACTANTS", RDKit::RxnOps::SANITIZE_ADJUST_REACTANTS)
|
||||
.value("SANITIZE_MERGEHS", RDKit::RxnOps::SANITIZE_MERGEHS)
|
||||
.value("SANITIZE_ALL", RDKit::RxnOps::SANITIZE_ALL)
|
||||
.export_values();
|
||||
;
|
||||
|
||||
python::def("GetDefaultAdjustParams", RDKit::RxnOps::DefaultRxnAdjustParams,
|
||||
"Returns the default adjustment parameters for reactant templates");
|
||||
|
||||
python::def("GetChemDrawRxnAdjustParams", RDKit::RxnOps::ChemDrawRxnAdjustParams,
|
||||
"Returns the chemdraw style adjustment parameters for reactant templates");
|
||||
|
||||
std::string docstring = "feed me";
|
||||
python::def(
|
||||
"SanitizeRxn", RDKit::sanitizeReaction,
|
||||
(python::arg("rxn"), python::arg("sanitizeOps") = rdcast<unsigned int>(RDKit::RxnOps::SANITIZE_ALL),
|
||||
python::arg("params") = RDKit::RxnOps::DefaultRxnAdjustParams(),
|
||||
python::arg("catchErrors") = false),
|
||||
docString.c_str());
|
||||
|
||||
wrap_enumeration();
|
||||
|
||||
}
|
||||
|
||||
654
Code/GraphMol/ChemReactions/Wrap/testEnumerations.py
Normal file
654
Code/GraphMol/ChemReactions/Wrap/testEnumerations.py
Normal file
@@ -0,0 +1,654 @@
|
||||
# Copyright (c) 2015, Novartis Institutes for BioMedical Research Inc.
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are
|
||||
# met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above
|
||||
# copyright notice, this list of conditions and the following
|
||||
# disclaimer in the documentation and/or other materials provided
|
||||
# with the distribution.
|
||||
# * Neither the name of Novartis Institutes for BioMedical Research Inc.
|
||||
# nor the names of its contributors may be used to endorse or promote
|
||||
# products derived from this software without specific prior written
|
||||
# permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
from __future__ import print_function
|
||||
|
||||
import unittest
|
||||
import os,sys, copy
|
||||
|
||||
from rdkit.six.moves import cPickle
|
||||
|
||||
from rdkit import rdBase
|
||||
from rdkit import Chem
|
||||
from rdkit.Chem import AllChem,rdChemReactions
|
||||
from rdkit import Geometry
|
||||
from rdkit import RDConfig
|
||||
import itertools, time
|
||||
import numpy as np
|
||||
|
||||
def log(s):
|
||||
Chem.LogErrorMsg("== " + s)
|
||||
|
||||
class TestCase(unittest.TestCase) :
|
||||
def setUp(self):
|
||||
self.dataDir = os.path.join(RDConfig.RDBaseDir,'Code','GraphMol','ChemReactions','testData')
|
||||
|
||||
def testCartesianProduct(self):
|
||||
log("testCartesianProduct")
|
||||
rxn = rdChemReactions.ChemicalReaction();
|
||||
rgroups = [[Chem.MolFromSmiles("C")]*10,
|
||||
[Chem.MolFromSmiles("N")]*5,
|
||||
[Chem.MolFromSmiles("O")]*6]
|
||||
|
||||
cartProd = rdChemReactions.CartesianProductStrategy()
|
||||
cartProd.Initialize(rxn, rgroups)
|
||||
self.assertEquals(cartProd.GetNumPermutations(), 10*5*6)
|
||||
groups = []
|
||||
count = 0
|
||||
print (cartProd.__bool__())
|
||||
while cartProd:
|
||||
groups.append(tuple(cartProd.next()))
|
||||
# count += 1
|
||||
# assert count <= cartProd.GetNumPermutations()
|
||||
self.assertEquals(len(groups), 10*5*6)
|
||||
# see if we are equal to the Python implementation
|
||||
g = list(itertools.product( list(range(10)), list(range(5)), list(range(6)) ))
|
||||
self.assertEquals(set(g), set(groups))
|
||||
copy.copy(cartProd)
|
||||
|
||||
def testRandomSample(self):
|
||||
log("testRandomSample")
|
||||
rgroups = [[Chem.MolFromSmiles("C")]*10,
|
||||
[Chem.MolFromSmiles("N")]*5,
|
||||
[Chem.MolFromSmiles("O")]*6]
|
||||
rxn = rdChemReactions.ChemicalReaction();
|
||||
|
||||
randProd = rdChemReactions.RandomSampleStrategy()
|
||||
randProd.Initialize(rxn, rgroups)
|
||||
self.assertEquals(randProd.GetNumPermutations(), 10*5*6)
|
||||
groups = []
|
||||
for i in range(10*5*6):
|
||||
groups.append(tuple(randProd.next()))
|
||||
print( len(set(groups)), "out of", 10*5*6 )
|
||||
|
||||
randProd = rdChemReactions.RandomSampleStrategy()
|
||||
randProd.Initialize(rxn, rgroups)
|
||||
self.assertEquals(randProd.GetNumPermutations(), 10*5*6)
|
||||
groups = []
|
||||
for i in range(10):
|
||||
groups.append(tuple(randProd.next()))
|
||||
|
||||
for i in range(3):
|
||||
print( i, len(set([g[i] for g in groups])), "out of", [10,5,6][i] )
|
||||
copy.copy(randProd)
|
||||
|
||||
def testRandomSampleAllBBs(self):
|
||||
log("testRandomSampleAllBBs")
|
||||
rxn = rdChemReactions.ChemicalReaction();
|
||||
rgroups = [[Chem.MolFromSmiles("C")]*10,
|
||||
[Chem.MolFromSmiles("N")]*5,
|
||||
[Chem.MolFromSmiles("O")]*6]
|
||||
|
||||
randProd = rdChemReactions.RandomSampleAllBBsStrategy()
|
||||
randProd.Initialize(rxn, rgroups)
|
||||
self.assertEquals(randProd.GetNumPermutations(), 10*5*6)
|
||||
groups = []
|
||||
for i in range(10*5*6):
|
||||
groups.append(tuple(randProd.next()))
|
||||
|
||||
print( len(set(groups)), "out of", 10*5*6 )
|
||||
|
||||
randProd = rdChemReactions.RandomSampleAllBBsStrategy()
|
||||
randProd.Initialize(rxn, rgroups)
|
||||
self.assertEquals(randProd.GetNumPermutations(), 10*5*6)
|
||||
groups = []
|
||||
for i in range(10):
|
||||
groups.append(tuple(randProd.next()))
|
||||
|
||||
for i in range(3):
|
||||
print( i, len(set([g[i] for g in groups])), "out of", [10,5,6][i] )
|
||||
self.assertEquals(len(set([g[i] for g in groups])), [10,5,6][i])
|
||||
copy.copy(randProd)
|
||||
|
||||
def testTimings(self):
|
||||
log("testTimings")
|
||||
rxn = rdChemReactions.ChemicalReaction();
|
||||
|
||||
rgroups = [[Chem.MolFromSmiles("C")]*17000,
|
||||
[Chem.MolFromSmiles("N")]*50000,
|
||||
[Chem.MolFromSmiles("O")]*4000]
|
||||
cartProd = rdChemReactions.CartesianProductStrategy()
|
||||
randProd = rdChemReactions.RandomSampleStrategy()
|
||||
randAllBBs = rdChemReactions.RandomSampleAllBBsStrategy()
|
||||
for r in [cartProd, randProd, randAllBBs]:
|
||||
r.Initialize(rxn, rgroups)
|
||||
num = 10000000
|
||||
t1 = time.time()
|
||||
r.Skip(num)
|
||||
t2 = time.time()
|
||||
print("%s Skipped %s in %s seconds"%(r, num, t2-t1))
|
||||
|
||||
def testEvenPairsSampling(self):
|
||||
rxn = rdChemReactions.ChemicalReaction();
|
||||
|
||||
rgroups = [[Chem.MolFromSmiles("C")]*10,
|
||||
[Chem.MolFromSmiles("N")]*10,
|
||||
[Chem.MolFromSmiles("O")]*10]
|
||||
|
||||
rxn = rdChemReactions.ChemicalReaction();
|
||||
count = 0
|
||||
pairs01 = {}
|
||||
pairs12 = {}
|
||||
pairs02 = {}
|
||||
|
||||
strategy = rdChemReactions.EvenSamplePairsStrategy()
|
||||
strategy.Initialize(rxn, rgroups)
|
||||
# try 100 samples
|
||||
while count < 100:
|
||||
v = strategy.next()
|
||||
p01 = (v[0], v[1])
|
||||
p12 = (v[1], v[2])
|
||||
p02 = (v[0], v[2])
|
||||
pairs01[p01] = pairs01.get(p01, 0) + 1
|
||||
pairs12[p01] = pairs12.get(p12, 0) + 1
|
||||
pairs02[p01] = pairs02.get(p02, 0) + 1
|
||||
count += 1
|
||||
|
||||
# each pair should be used rougly once
|
||||
self.assertEquals(np.median(list(pairs01.values())), 1.0)
|
||||
self.assertEquals(np.median(list(pairs02.values())), 1.0)
|
||||
self.assertEquals(np.median(list(pairs12.values())), 1.0)
|
||||
|
||||
# now try 1000
|
||||
pairs01 = {}
|
||||
pairs12 = {}
|
||||
pairs02 = {}
|
||||
strategy = rdChemReactions.EvenSamplePairsStrategy()
|
||||
strategy.Initialize(rxn, rgroups)
|
||||
count = 0
|
||||
while count < 1000:
|
||||
v = strategy.next()
|
||||
p01 = (v[0], v[1])
|
||||
p12 = (v[1], v[2])
|
||||
p02 = (v[0], v[2])
|
||||
pairs01[p01] = pairs01.get(p01, 0) + 1
|
||||
pairs12[p01] = pairs12.get(p12, 0) + 1
|
||||
pairs02[p01] = pairs02.get(p02, 0) + 1
|
||||
count += 1
|
||||
|
||||
# each pair should be used roughly 10 times
|
||||
self.assertTrue( 9 <= np.median(list(pairs01.values())) <= 11)
|
||||
self.assertTrue( 9 <= np.median(list(pairs02.values())) <= 11)
|
||||
self.assertTrue( 9 <= np.median(list(pairs12.values())) <= 11)
|
||||
|
||||
# now try 500
|
||||
pairs01 = {}
|
||||
pairs12 = {}
|
||||
pairs02 = {}
|
||||
strategy = rdChemReactions.EvenSamplePairsStrategy()
|
||||
strategy.Initialize(rxn, rgroups)
|
||||
count = 0
|
||||
while count < 500:
|
||||
v = strategy.next()
|
||||
p01 = (v[0], v[1])
|
||||
p12 = (v[1], v[2])
|
||||
p02 = (v[0], v[2])
|
||||
pairs01[p01] = pairs01.get(p01, 0) + 1
|
||||
pairs12[p01] = pairs12.get(p12, 0) + 1
|
||||
pairs02[p01] = pairs02.get(p02, 0) + 1
|
||||
count += 1
|
||||
|
||||
# each pair should be used roughly 5 times
|
||||
self.assertTrue( 4 <= np.median(list(pairs01.values())) <= 6)
|
||||
self.assertTrue( 4 <= np.median(list(pairs02.values())) <= 6)
|
||||
self.assertTrue( 4 <= np.median(list(pairs12.values())) <= 6)
|
||||
|
||||
|
||||
self.assertTrue("PAIRSTAT" in strategy.Stats())
|
||||
|
||||
def testEnumerateLibrary(self):
|
||||
log("testEnumerateLibrary")
|
||||
smirks_thiourea = "[N;$(N-[#6]):3]=[C;$(C=S):1].[N;$(N[#6]);!$(N=*);!$([N-]);!$(N#*);!$([ND3]);!$([ND4]);!$(N[O,N]);!$(N[C,S]=[S,O,N]):2]>>[N:3]-[C:1]-[N+0:2]"
|
||||
rxn = rdChemReactions.ReactionFromSmarts(smirks_thiourea)
|
||||
reagents = [
|
||||
[Chem.MolFromSmiles('C=CCN=C=S'), Chem.MolFromSmiles('CC=CCN=C=S')],
|
||||
[Chem.MolFromSmiles('NCc1ncc(Cl)cc1Br'),
|
||||
Chem.MolFromSmiles('NCCc1ncc(Cl)cc1Br'),
|
||||
Chem.MolFromSmiles('NCCCc1ncc(Cl)cc1Br'),
|
||||
]
|
||||
]
|
||||
|
||||
enumerator = rdChemReactions.EnumerateLibrary(rxn, reagents)
|
||||
self.assertTrue(enumerator)
|
||||
|
||||
# need to initialize the reaction before getting the binary serialization
|
||||
rxn.Initialize()
|
||||
self.assertEquals(rxn.ToBinary(), enumerator.GetReaction().ToBinary())
|
||||
|
||||
bbs = enumerator.GetReagents()
|
||||
for i in range(len(bbs)):
|
||||
for j in range(len(bbs[i])):
|
||||
self.assertTrue(Chem.MolToSmiles(reagents[i][j]) == Chem.MolToSmiles(bbs[i][j]))
|
||||
|
||||
smiresults = ['C=CCNC(=S)NCc1ncc(Cl)cc1Br',
|
||||
'CC=CCNC(=S)NCc1ncc(Cl)cc1Br',
|
||||
'C=CCNC(=S)NCCc1ncc(Cl)cc1Br',
|
||||
'CC=CCNC(=S)NCCc1ncc(Cl)cc1Br',
|
||||
'C=CCNC(=S)NCCCc1ncc(Cl)cc1Br',
|
||||
'CC=CCNC(=S)NCCCc1ncc(Cl)cc1Br']
|
||||
results = [Chem.MolToSmiles(Chem.MolFromSmiles(smi)) for smi in smiresults]
|
||||
|
||||
enumerators = [enumerator]
|
||||
|
||||
# add serialized enumerators as well for testing if possible
|
||||
if rdChemReactions.EnumerateLibraryCanSerialize():
|
||||
pickle = enumerator.Serialize()
|
||||
enumerator2 = rdChemReactions.EnumerateLibrary()
|
||||
enumerator2.InitFromString(pickle)
|
||||
|
||||
# make sure old pickles work
|
||||
enumerator3 = rdChemReactions.EnumerateLibrary()
|
||||
enumerator3.InitFromString(open(os.path.join(self.dataDir, "enumeration.pickle"), 'rb').read())
|
||||
|
||||
print("==", enumerator.GetEnumerator().Type(), enumerator2.GetEnumerator().Type())
|
||||
self.assertEquals(enumerator.GetEnumerator().Type(), enumerator2.GetEnumerator().Type())
|
||||
enumerators.append(enumerator2)
|
||||
enumerators.append(enumerator3)
|
||||
|
||||
# check for fully sampled and deterministic ordering in final index values
|
||||
expected_positions = [[0, 0],[1, 0],[0, 1],[1, 1],[0, 2],[1, 2]]
|
||||
|
||||
out = []
|
||||
for en in enumerators:
|
||||
i = 0
|
||||
positions = []
|
||||
for i, prods in enumerate(en):
|
||||
positions.append( list(en.GetPosition()) )
|
||||
for mols in prods:
|
||||
self.assertEquals(len(mols), 1)
|
||||
smi = Chem.MolToSmiles(mols[0])
|
||||
if en is enumerator:
|
||||
out.append(smi)
|
||||
self.assertEquals(smi, results[i])
|
||||
|
||||
if en is enumerator and i == 1 and rdChemReactions.EnumerateLibraryCanSerialize():
|
||||
# save the state not at the start
|
||||
pickle_at_2 = enumerator.Serialize()
|
||||
self.assertEquals(i, 5)
|
||||
self.assertEquals(positions, expected_positions)
|
||||
|
||||
if rdChemReactions.EnumerateLibraryCanSerialize():
|
||||
# see if we can restore the enumeration from the middle
|
||||
out3 = []
|
||||
enumerator3 = rdChemReactions.EnumerateLibrary()
|
||||
enumerator3.InitFromString(pickle_at_2)
|
||||
for prods in enumerator3:
|
||||
for mols in prods:
|
||||
self.assertEquals(len(mols), 1)
|
||||
smi = Chem.MolToSmiles(mols[0])
|
||||
out3.append(smi)
|
||||
|
||||
self.assertEquals(out[2:], out3)
|
||||
# test smiles interface
|
||||
enumerator = rdChemReactions.EnumerateLibrary(rxn, reagents)
|
||||
i = 0
|
||||
while enumerator:
|
||||
for mols in enumerator.nextSmiles():
|
||||
self.assertEquals(len(mols), 1)
|
||||
self.assertEquals(mols[0], results[i])
|
||||
i += 1
|
||||
self.assertEquals(i, 6)
|
||||
|
||||
def testRandomEnumerateLibrary(self):
|
||||
log("testRandomEnumerateLibrary")
|
||||
smirks_thiourea = "[N;$(N-[#6]):3]=[C;$(C=S):1].[N;$(N[#6]);!$(N=*);!$([N-]);!$(N#*);!$([ND3]);!$([ND4]);!$(N[O,N]);!$(N[C,S]=[S,O,N]):2]>>[N:3]-[C:1]-[N+0:2]"
|
||||
rxn = rdChemReactions.ReactionFromSmarts(smirks_thiourea)
|
||||
reagents = [
|
||||
[Chem.MolFromSmiles('C=CCN=C=S'), Chem.MolFromSmiles('CC=CCN=C=S')],
|
||||
[Chem.MolFromSmiles('NCc1ncc(Cl)cc1Br'),
|
||||
Chem.MolFromSmiles('NCCc1ncc(Cl)cc1Br'),
|
||||
Chem.MolFromSmiles('NCCCc1ncc(Cl)cc1Br'),
|
||||
]
|
||||
]
|
||||
|
||||
enumerator = rdChemReactions.EnumerateLibrary(rxn, reagents,
|
||||
rdChemReactions.RandomSampleStrategy())
|
||||
self.assertTrue(enumerator)
|
||||
smiresults = ['C=CCNC(=S)NCc1ncc(Cl)cc1Br',
|
||||
'CC=CCNC(=S)NCc1ncc(Cl)cc1Br',
|
||||
'C=CCNC(=S)NCCc1ncc(Cl)cc1Br',
|
||||
'CC=CCNC(=S)NCCc1ncc(Cl)cc1Br',
|
||||
'C=CCNC(=S)NCCCc1ncc(Cl)cc1Br',
|
||||
'CC=CCNC(=S)NCCCc1ncc(Cl)cc1Br']
|
||||
results = [Chem.MolToSmiles(Chem.MolFromSmiles(smi)) for smi in smiresults]
|
||||
|
||||
enumerator = rdChemReactions.EnumerateLibrary(rxn, reagents,
|
||||
rdChemReactions.RandomSampleStrategy())
|
||||
iteren = iter(enumerator)
|
||||
res = set()
|
||||
count = 0
|
||||
while res != set(results):
|
||||
count += 1
|
||||
if count > 100000:
|
||||
print("Unable to find enumerate set with 100,000 random samples!", file=sys.stderr)
|
||||
self.assertEquals(res,set(results))
|
||||
|
||||
prod = iteren.next()
|
||||
for mols in prod:
|
||||
smi1 = Chem.MolToSmiles(mols[0])
|
||||
res.add(smi1)
|
||||
|
||||
if rdChemReactions.EnumerateLibraryCanSerialize():
|
||||
enumerator = rdChemReactions.EnumerateLibrary(rxn, reagents,
|
||||
rdChemReactions.RandomSampleStrategy())
|
||||
pickle = enumerator.Serialize()
|
||||
enumerator2 = rdChemReactions.EnumerateLibrary()
|
||||
enumerator2.InitFromString(pickle)
|
||||
|
||||
self.assertEquals(enumerator.GetEnumerator().Type(), enumerator2.GetEnumerator().Type())
|
||||
|
||||
iteren = iter(enumerator)
|
||||
iteren2 = iter(enumerator2)
|
||||
|
||||
outsmiles = []
|
||||
for i in range(10):
|
||||
prods1 = iteren.next()
|
||||
prods2 = iteren2.next()
|
||||
self.assertEquals(len(prods1), len(prods2))
|
||||
for mols1, mols2 in zip(prods1, prods2):
|
||||
self.assertEquals(len(mols1), 1)
|
||||
smi1 = Chem.MolToSmiles(mols1[0])
|
||||
self.assertEquals(smi1, Chem.MolToSmiles(mols2[0]))
|
||||
outsmiles.append(smi1)
|
||||
|
||||
if i == 1:
|
||||
pickle_at_2 = enumerator.Serialize()
|
||||
|
||||
# make sure we can pickle the state as well
|
||||
enumerator3 = rdChemReactions.EnumerateLibrary()
|
||||
enumerator3.InitFromString(pickle_at_2)
|
||||
iteren3 = iter(enumerator3)
|
||||
outsmiles2 = []
|
||||
for i in range(8):
|
||||
prods3 = iteren3.next()
|
||||
for mols3 in prods3:
|
||||
self.assertEquals(len(mols3), 1)
|
||||
smi1 = Chem.MolToSmiles(mols3[0])
|
||||
self.assertEquals(smi1, Chem.MolToSmiles(mols3[0]))
|
||||
outsmiles2.append(smi1)
|
||||
|
||||
self.assertEquals(outsmiles2, outsmiles[2:])
|
||||
|
||||
def testRandomEnumerateAllBBsLibrary(self):
|
||||
log("testRandomEnumerateAllBBsLibrary")
|
||||
smirks_thiourea = "[N;$(N-[#6]):3]=[C;$(C=S):1].[N;$(N[#6]);!$(N=*);!$([N-]);!$(N#*);!$([ND3]);!$([ND4]);!$(N[O,N]);!$(N[C,S]=[S,O,N]):2]>>[N:3]-[C:1]-[N+0:2]"
|
||||
rxn = rdChemReactions.ReactionFromSmarts(smirks_thiourea)
|
||||
reagents = [
|
||||
[Chem.MolFromSmiles('C=CCN=C=S'), Chem.MolFromSmiles('CC=CCN=C=S')],
|
||||
[Chem.MolFromSmiles('NCc1ncc(Cl)cc1Br'),
|
||||
Chem.MolFromSmiles('NCCc1ncc(Cl)cc1Br'),
|
||||
Chem.MolFromSmiles('NCCCc1ncc(Cl)cc1Br'),
|
||||
]
|
||||
]
|
||||
enumerator = rdChemReactions.EnumerateLibrary(rxn, reagents,
|
||||
rdChemReactions.RandomSampleAllBBsStrategy())
|
||||
self.assertTrue(enumerator)
|
||||
|
||||
# test the BB sampling here
|
||||
strategy = iter(enumerator)
|
||||
r1 = set()
|
||||
r2 = set()
|
||||
strategy.next()
|
||||
groups = strategy.GetPosition()
|
||||
print("**", list(groups), file=sys.stderr)
|
||||
r1.add(groups[0])
|
||||
r2.add(groups[1])
|
||||
strategy.next()
|
||||
groups = strategy.GetPosition()
|
||||
print("**", list(groups),file=sys.stderr)
|
||||
r1.add(groups[0])
|
||||
r2.add(groups[1])
|
||||
self.assertEquals(r1, set([0,1])) # two bbs at reagent one all sampled at one iteration
|
||||
strategy.next()
|
||||
groups = strategy.GetPosition()
|
||||
print("**", list(groups),file=sys.stderr)
|
||||
r1.add(groups[0])
|
||||
r2.add(groups[1])
|
||||
self.assertEquals(r2, set([0,1,2])) # three bbs at reagent one all sampled in three iterations
|
||||
|
||||
smiresults = ['C=CCNC(=S)NCc1ncc(Cl)cc1Br',
|
||||
'CC=CCNC(=S)NCc1ncc(Cl)cc1Br',
|
||||
'C=CCNC(=S)NCCc1ncc(Cl)cc1Br',
|
||||
'CC=CCNC(=S)NCCc1ncc(Cl)cc1Br',
|
||||
'C=CCNC(=S)NCCCc1ncc(Cl)cc1Br',
|
||||
'CC=CCNC(=S)NCCCc1ncc(Cl)cc1Br']
|
||||
results = [Chem.MolToSmiles(Chem.MolFromSmiles(smi)) for smi in smiresults]
|
||||
|
||||
|
||||
if rdChemReactions.EnumerateLibraryCanSerialize():
|
||||
enumerator = rdChemReactions.EnumerateLibrary(rxn, reagents,
|
||||
rdChemReactions.RandomSampleAllBBsStrategy())
|
||||
self.assertTrue(enumerator)
|
||||
|
||||
pickle = enumerator.Serialize()
|
||||
enumerator2 = rdChemReactions.EnumerateLibrary()
|
||||
enumerator2.InitFromString(pickle)
|
||||
|
||||
self.assertEquals(enumerator.GetEnumerator().Type(), enumerator2.GetEnumerator().Type())
|
||||
iteren = iter(enumerator)
|
||||
iteren2 = iter(enumerator2)
|
||||
|
||||
outsmiles = []
|
||||
for i in range(10):
|
||||
prods1 = iteren.next()
|
||||
prods2 = iteren2.next()
|
||||
self.assertEquals(len(prods1), len(prods2))
|
||||
for mols1, mols2 in zip(prods1, prods2):
|
||||
self.assertEquals(len(mols1), 1)
|
||||
smi1 = Chem.MolToSmiles(mols1[0])
|
||||
self.assertEquals(smi1, Chem.MolToSmiles(mols2[0]))
|
||||
outsmiles.append(smi1)
|
||||
|
||||
if i == 1:
|
||||
pickle_at_2 = enumerator.Serialize()
|
||||
|
||||
# make sure we can pickle the state as well
|
||||
enumerator3 = rdChemReactions.EnumerateLibrary()
|
||||
enumerator3.InitFromString(pickle_at_2)
|
||||
self.assertEquals(enumerator.GetEnumerator().Type(), enumerator3.GetEnumerator().Type())
|
||||
|
||||
iteren3 = iter(enumerator3)
|
||||
outsmiles2 = []
|
||||
for i in range(8):
|
||||
prods3 = iteren3.next()
|
||||
for mols3 in prods3:
|
||||
self.assertEquals(len(mols3), 1)
|
||||
smi1 = Chem.MolToSmiles(mols3[0])
|
||||
self.assertEquals(smi1, Chem.MolToSmiles(mols3[0]))
|
||||
outsmiles2.append(smi1)
|
||||
|
||||
self.assertEquals(outsmiles2, outsmiles[2:])
|
||||
|
||||
|
||||
def testRGroupState(self):
|
||||
if not rdChemReactions.EnumerateLibraryCanSerialize():
|
||||
print("-- Skipping testRGroupState, serialization of EnumerateLibrary not enabled", file=sys.stderr)
|
||||
return
|
||||
|
||||
log("testRGroupState")
|
||||
smirks_thiourea = "[N;$(N-[#6]):3]=[C;$(C=S):1].[N;$(N[#6]);!$(N=*);!$([N-]);!$(N#*);!$([ND3]);!$([ND4]);!$(N[O,N]);!$(N[C,S]=[S,O,N]):2]>>[N:3]-[C:1]-[N+0:2]"
|
||||
rxn = rdChemReactions.ReactionFromSmarts(smirks_thiourea)
|
||||
reagents = [
|
||||
[Chem.MolFromSmiles('C=CCN=C=S'), Chem.MolFromSmiles('CC=CCN=C=S')],
|
||||
[Chem.MolFromSmiles('NCc1ncc(Cl)cc1Br'),
|
||||
Chem.MolFromSmiles('NCCc1ncc(Cl)cc1Br'),
|
||||
Chem.MolFromSmiles('NCCCc1ncc(Cl)cc1Br'),
|
||||
]
|
||||
]
|
||||
|
||||
def tostr(l):
|
||||
return [[str(x) for x in v] for v in l]
|
||||
enumerator = rdChemReactions.EnumerateLibrary(rxn, reagents)
|
||||
state = enumerator.GetState()
|
||||
p = enumerator.nextSmiles()
|
||||
p2 = enumerator.nextSmiles()
|
||||
enumerator.SetState(state)
|
||||
self.assertEquals(tostr(enumerator.nextSmiles()), tostr(p))
|
||||
self.assertEquals(tostr(enumerator.nextSmiles()), tostr(p2))
|
||||
|
||||
enumerator = rdChemReactions.EnumerateLibrary(rxn, reagents,
|
||||
rdChemReactions.RandomSampleStrategy())
|
||||
|
||||
state = enumerator.GetState()
|
||||
p = enumerator.nextSmiles()
|
||||
p2 = enumerator.nextSmiles()
|
||||
enumerator.SetState(state)
|
||||
self.assertEquals(tostr(enumerator.nextSmiles()), tostr(p))
|
||||
self.assertEquals(tostr(enumerator.nextSmiles()), tostr(p2))
|
||||
|
||||
enumerator = rdChemReactions.EnumerateLibrary(rxn, reagents,
|
||||
rdChemReactions.RandomSampleAllBBsStrategy())
|
||||
state = enumerator.GetState()
|
||||
p = enumerator.nextSmiles()
|
||||
p2 = enumerator.nextSmiles()
|
||||
enumerator.SetState(state)
|
||||
self.assertEquals(tostr(enumerator.nextSmiles()), tostr(p))
|
||||
self.assertEquals(tostr(enumerator.nextSmiles()), tostr(p2))
|
||||
|
||||
|
||||
enumerator = rdChemReactions.EnumerateLibrary(rxn, reagents)
|
||||
smiresults = ['C=CCNC(=S)NCc1ncc(Cl)cc1Br',
|
||||
'CC=CCNC(=S)NCc1ncc(Cl)cc1Br',
|
||||
'C=CCNC(=S)NCCc1ncc(Cl)cc1Br',
|
||||
'CC=CCNC(=S)NCCc1ncc(Cl)cc1Br',
|
||||
'C=CCNC(=S)NCCCc1ncc(Cl)cc1Br',
|
||||
'CC=CCNC(=S)NCCCc1ncc(Cl)cc1Br']
|
||||
smiresults = [Chem.MolToSmiles(Chem.MolFromSmiles(smi)) for smi in smiresults]
|
||||
enumerator.GetEnumerator().Skip(10)
|
||||
enumerator.ResetState()
|
||||
|
||||
results = []
|
||||
for result in enumerator:
|
||||
for prodSet in result:
|
||||
for mol in prodSet:
|
||||
results.append( Chem.MolToSmiles(mol) )
|
||||
|
||||
self.assertEquals(results, smiresults)
|
||||
|
||||
def testRemovingBadMatches(self):
|
||||
log("testRemoveBadMatches")
|
||||
smirks_thiourea = "[N;$(N-[#6]):3]=[C;$(C=S):1].[N;$(N[#6]);!$(N=*);!$([N-]);!$(N#*);!$([ND3]);!$([ND4]);!$(N[O,N]);!$(N[C,S]=[S,O,N]):2]>>[N:3]-[C:1]-[N+0:2]"
|
||||
|
||||
rxn = rdChemReactions.ReactionFromSmarts(smirks_thiourea)
|
||||
# invert matches so nothing matches
|
||||
reagents = [
|
||||
[Chem.MolFromSmiles('NCc1ncc(Cl)cc1Br'),
|
||||
Chem.MolFromSmiles('NCCc1ncc(Cl)cc1Br'),
|
||||
Chem.MolFromSmiles('NCCCc1ncc(Cl)cc1Br'),
|
||||
],
|
||||
|
||||
[Chem.MolFromSmiles('C=CCN=C=S'),
|
||||
Chem.MolFromSmiles('CC=CCN=C=S'),
|
||||
Chem.MolFromSmiles('CCC'),
|
||||
Chem.MolFromSmiles('CCCCC'),
|
||||
],
|
||||
]
|
||||
|
||||
enumerator = rdChemReactions.EnumerateLibrary(rxn, reagents)
|
||||
self.assertEquals([], list(enumerator))
|
||||
|
||||
def testRemoveInsaneReagents(self):
|
||||
rxndata = "$RXN\nUntitled Document-1\n ChemDraw10291618492D\n\n 3 1\n$MOL\n\n\n\n 2 1 0 0 0 0 0 0 0 0999 V2000\n 0.4125 0.0000 0.0000 N 0 0 0 0 0 0 0 0 0 3 0 0\n -0.4125 0.0000 0.0000 R2 0 0 0 0 0 0 0 0 0 2 0 0\n 1 2 1 0 0\nM END\n$MOL\n\n\n\n 2 1 0 0 0 0 0 0 0 0999 V2000\n -0.4125 0.0000 0.0000 R1 0 0 0 0 0 0 0 0 0 1 0 0\n 0.4125 0.0000 0.0000 Cl 0 0 0 0 0 0 0 0 0 0 0 0\n 1 2 1 0 0\nM END\n$MOL\n\n\n\n 2 1 0 0 0 0 0 0 0 0999 V2000\n 0.4125 0.0000 0.0000 N 0 0 0 0 0 0 0 0 0 5 0 0\n -0.4125 0.0000 0.0000 R4 0 0 0 0 0 0 0 0 0 4 0 0\n 1 2 1 0 0\nM END\n$MOL\n\n\n\n 14 15 0 0 0 0 0 0 0 0999 V2000\n 0.5072 -0.5166 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 0.5072 0.3084 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 1.2949 -0.7616 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n 1.7817 -0.0880 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 1.2967 0.5794 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 1.5558 -1.5443 0.0000 R1 0 0 0 0 0 0 0 0 0 1 0 0\n -0.2073 0.7208 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -0.9218 0.3083 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -0.9217 -0.5167 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -0.2073 -0.9292 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -1.6362 0.7208 0.0000 N 0 0 0 0 0 0 0 0 0 3 0 0\n 1.5452 1.3661 0.0000 N 0 0 0 0 0 0 0 0 0 5 0 0\n 2.3507 1.5443 0.0000 R4 0 0 0 0 0 0 0 0 0 4 0 0\n -2.3507 0.3083 0.0000 R2 0 0 0 0 0 0 0 0 0 2 0 0\n 1 2 2 0 0\n 1 3 1 0 0\n 3 4 1 0 0\n 4 5 1 0 0\n 5 2 1 0 0\n 3 6 1 0 0\n 2 7 1 0 0\n 7 8 2 0 0\n 8 9 1 0 0\n 9 10 2 0 0\n 10 1 1 0 0\n 8 11 1 0 0\n 12 13 1 0 0\n 11 14 1 0 0\n 12 5 1 0 0\nM END\n";
|
||||
|
||||
rxn = AllChem.ReactionFromRxnBlock(rxndata)
|
||||
bbs = []
|
||||
r1 = [ Chem.MolFromSmiles("CCNCC"),
|
||||
Chem.MolFromSmiles("NCC"),
|
||||
]
|
||||
r2 = [ Chem.MolFromSmiles("ClC1CCCC1"),
|
||||
Chem.MolFromSmiles("ClC1CCCC1Cl"),
|
||||
]
|
||||
r3 = [ Chem.MolFromSmiles("CCNCC"),
|
||||
Chem.MolFromSmiles("NCC"),
|
||||
]
|
||||
bbs = [r1, r2, r3]
|
||||
|
||||
# nothing matches!
|
||||
for i,reagent in enumerate(rxn.GetReactants()):
|
||||
for bb in bbs[i]:
|
||||
self.assertFalse(bb.HasSubstructMatch(reagent))
|
||||
|
||||
# everything matches - yay sanitization!
|
||||
rdChemReactions.SanitizeRxn(rxn)
|
||||
for i,reagent in enumerate(rxn.GetReactants()):
|
||||
for bb in bbs[i]:
|
||||
self.assertTrue(bb.HasSubstructMatch(reagent))
|
||||
|
||||
en = rdChemReactions.EnumerateLibrary(rxn, bbs)
|
||||
self.assertTrue(len(en.GetReagents()[0]) == 2)
|
||||
self.assertTrue(len(en.GetReagents()[1]) == 2)
|
||||
self.assertTrue(len(en.GetReagents()[2]) == 2)
|
||||
|
||||
#####################################################################################
|
||||
# Match only at rgroups (ChemDraw style)
|
||||
rxn = AllChem.ReactionFromRxnBlock(rxndata)
|
||||
expected_matches = [[False,True], [True,True],[False, True] ]
|
||||
rdChemReactions.SanitizeRxn(rxn, params=rdChemReactions.GetChemDrawRxnAdjustParams())
|
||||
for i,(reagent, expected) in enumerate(zip(rxn.GetReactants(), expected_matches)):
|
||||
match = [bb.HasSubstructMatch(reagent) for reagent in bbs[i]]
|
||||
self.assertTrue(match, expected)
|
||||
|
||||
# Now try EnumerateLibrary
|
||||
en = rdChemReactions.EnumerateLibrary(rxn, bbs)
|
||||
self.assertTrue(len(en.GetReagents()[0]) == 1)
|
||||
self.assertTrue(len(en.GetReagents()[1]) == 2)
|
||||
self.assertTrue(len(en.GetReagents()[2]) == 1)
|
||||
|
||||
|
||||
#####################################################################################
|
||||
# now set the removal options ot only make one product per reagent set
|
||||
rxn = AllChem.ReactionFromRxnBlock(rxndata)
|
||||
rdChemReactions.SanitizeRxn(rxn)
|
||||
|
||||
opts = rdChemReactions.EnumerationParams()
|
||||
opts.reagentMaxMatchCount = 1
|
||||
en = rdChemReactions.EnumerateLibrary(rxn, bbs, params=opts)
|
||||
self.assertTrue(len(en.GetReagents()[0]) == 1)
|
||||
self.assertTrue(len(en.GetReagents()[1]) == 1)
|
||||
self.assertTrue(len(en.GetReagents()[2]) == 1)
|
||||
|
||||
#####################################################################################
|
||||
# now set the removal options ot only make one product per reagent set
|
||||
# but wt
|
||||
rxn = AllChem.ReactionFromRxnBlock(rxndata)
|
||||
rdChemReactions.SanitizeRxn(rxn,
|
||||
params=rdChemReactions.GetChemDrawRxnAdjustParams())
|
||||
|
||||
|
||||
opts = rdChemReactions.EnumerationParams()
|
||||
opts.reagentMaxMatchCount = 1
|
||||
en = rdChemReactions.EnumerateLibrary(rxn, bbs, params=opts)
|
||||
self.assertTrue(len(en.GetReagents()[0]) == 1)
|
||||
self.assertTrue(len(en.GetReagents()[1]) == 1)
|
||||
self.assertTrue(len(en.GetReagents()[2]) == 1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
239
Code/GraphMol/ChemReactions/Wrap/testSanitize.py
Normal file
239
Code/GraphMol/ChemReactions/Wrap/testSanitize.py
Normal file
@@ -0,0 +1,239 @@
|
||||
# Copyright (c) 2015, Novartis Institutes for BioMedical Research Inc.
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are
|
||||
# met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above
|
||||
# copyright notice, this list of conditions and the following
|
||||
# disclaimer in the documentation and/or other materials provided
|
||||
# with the distribution.
|
||||
# * Neither the name of Novartis Institutes for BioMedical Research Inc.
|
||||
# nor the names of its contributors may be used to endorse or promote
|
||||
# products derived from this software without specific prior written
|
||||
# permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
from __future__ import print_function
|
||||
|
||||
import unittest
|
||||
import os,sys
|
||||
|
||||
from rdkit.six.moves import cPickle
|
||||
|
||||
from rdkit import rdBase
|
||||
from rdkit import Chem
|
||||
from rdkit.Chem import rdChemReactions, AllChem
|
||||
from rdkit import Geometry
|
||||
from rdkit import RDConfig
|
||||
import itertools, time
|
||||
|
||||
test_data = [("good", '''$RXN
|
||||
|
||||
ISIS 052820091627
|
||||
|
||||
2 1
|
||||
$MOL
|
||||
|
||||
-ISIS- 05280916272D
|
||||
|
||||
2 1 0 0 0 0 0 0 0 0999 V2000
|
||||
-3.2730 -7.0542 0.0000 Br 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-3.9875 -7.4667 0.0000 R# 0 0 0 0 0 0 0 0 0 1 0 0
|
||||
1 2 1 0 0 0 0
|
||||
V 1 halogen.bromine.aromatic
|
||||
M RGP 1 2 1
|
||||
M END
|
||||
$MOL
|
||||
|
||||
-ISIS- 05280916272D
|
||||
|
||||
4 3 0 0 0 0 0 0 0 0999 V2000
|
||||
3.4375 -7.7917 0.0000 R# 0 0 0 0 0 0 0 0 0 2 0 0
|
||||
4.1520 -7.3792 0.0000 B 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
4.1520 -6.5542 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
4.8664 -7.7917 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
2 3 1 0 0 0 0
|
||||
1 2 1 0 0 0 0
|
||||
2 4 1 0 0 0 0
|
||||
V 2 boronicacid
|
||||
M RGP 1 1 2
|
||||
M END
|
||||
$MOL
|
||||
|
||||
-ISIS- 05280916272D
|
||||
|
||||
2 1 0 0 0 0 0 0 0 0999 V2000
|
||||
11.2667 -7.3417 0.0000 R# 0 0 0 0 0 0 0 0 0 1 0 0
|
||||
11.9811 -6.9292 0.0000 R# 0 0 0 0 0 0 0 0 0 2 0 0
|
||||
1 2 1 0 0 0 0
|
||||
M RGP 2 1 1 2 2
|
||||
M END'''),
|
||||
|
||||
("bad", '''$RXN
|
||||
|
||||
ISIS 052820091627
|
||||
|
||||
2 1
|
||||
$MOL
|
||||
|
||||
-ISIS- 05280916272D
|
||||
|
||||
2 1 0 0 0 0 0 0 0 0999 V2000
|
||||
-3.2730 -7.0542 0.0000 Br 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-3.9875 -7.4667 0.0000 R# 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
1 2 1 0 0 0 0
|
||||
V 1 halogen.bromine.aromatic
|
||||
M RGP 1 2 1
|
||||
M END
|
||||
$MOL
|
||||
|
||||
-ISIS- 05280916272D
|
||||
|
||||
4 3 0 0 0 0 0 0 0 0999 V2000
|
||||
3.4375 -7.7917 0.0000 R# 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
4.1520 -7.3792 0.0000 B 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
4.1520 -6.5542 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
4.8664 -7.7917 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
2 3 1 0 0 0 0
|
||||
1 2 1 0 0 0 0
|
||||
2 4 1 0 0 0 0
|
||||
V 2 boronicacid
|
||||
M RGP 1 1 2
|
||||
M END
|
||||
$MOL
|
||||
|
||||
-ISIS- 05280916272D
|
||||
|
||||
2 1 0 0 0 0 0 0 0 0999 V2000
|
||||
11.2667 -7.3417 0.0000 R# 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
11.9811 -6.9292 0.0000 R# 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
1 2 1 0 0 0 0
|
||||
M RGP 2 1 1 2 2
|
||||
M END'''),
|
||||
# chemdraw style
|
||||
("bad", '''$RXN
|
||||
|
||||
ISIS 052820091627
|
||||
|
||||
2 1
|
||||
$MOL
|
||||
|
||||
-ISIS- 05280916272D
|
||||
|
||||
2 1 0 0 0 0 0 0 0 0999 V2000
|
||||
-3.2730 -7.0542 0.0000 Br 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-3.9875 -7.4667 0.0000 R1 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
1 2 1 0 0 0 0
|
||||
V 1 halogen.bromine.aromatic
|
||||
M END
|
||||
$MOL
|
||||
|
||||
-ISIS- 05280916272D
|
||||
|
||||
4 3 0 0 0 0 0 0 0 0999 V2000
|
||||
3.4375 -7.7917 0.0000 R2 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
4.1520 -7.3792 0.0000 B 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
4.1520 -6.5542 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
4.8664 -7.7917 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
2 3 1 0 0 0 0
|
||||
1 2 1 0 0 0 0
|
||||
2 4 1 0 0 0 0
|
||||
V 2 boronicacid
|
||||
M END
|
||||
$MOL
|
||||
|
||||
-ISIS- 05280916272D
|
||||
|
||||
2 1 0 0 0 0 0 0 0 0999 V2000
|
||||
11.2667 -7.3417 0.0000 R1 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
11.9811 -6.9292 0.0000 R2 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
1 2 1 0 0 0 0
|
||||
M END'''),
|
||||
("fail", '''$RXN
|
||||
|
||||
ISIS 052820091627
|
||||
|
||||
2 1
|
||||
$MOL
|
||||
|
||||
-ISIS- 05280916272D
|
||||
|
||||
2 1 0 0 0 0 0 0 0 0999 V2000
|
||||
-3.2730 -7.0542 0.0000 Br 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-3.9875 -7.4667 0.0000 R1 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
1 2 1 0 0 0 0
|
||||
V 1 halogen.bromine.aromatic
|
||||
M END
|
||||
$MOL
|
||||
|
||||
-ISIS- 05280916272D
|
||||
|
||||
4 3 0 0 0 0 0 0 0 0999 V2000
|
||||
3.4375 -7.7917 0.0000 R3 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
4.1520 -7.3792 0.0000 B 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
4.1520 -6.5542 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
4.8664 -7.7917 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
2 3 1 0 0 0 0
|
||||
1 2 1 0 0 0 0
|
||||
2 4 1 0 0 0 0
|
||||
V 2 boronicacid
|
||||
M END
|
||||
$MOL
|
||||
|
||||
-ISIS- 05280916272D
|
||||
|
||||
2 1 0 0 0 0 0 0 0 0999 V2000
|
||||
11.2667 -7.3417 0.0000 R1 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
11.9811 -6.9292 0.0000 R2 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
1 2 1 0 0 0 0
|
||||
M END'''),
|
||||
]
|
||||
|
||||
good_res = (0,0,2,1,(((0, 'halogen.bromine.aromatic'),), ((1, 'boronicacid'),)))
|
||||
bad_res = (3,0,2,1,(((0, 'halogen.bromine.aromatic'),), ((1, 'boronicacid'),)))
|
||||
|
||||
class TestCase(unittest.TestCase) :
|
||||
def test_sanitize(self):
|
||||
for status, block in test_data:
|
||||
print("*"*44)
|
||||
rxna = AllChem.ReactionFromRxnBlock(block)
|
||||
rxnb = AllChem.ReactionFromRxnBlock(block)
|
||||
rxna.Initialize()
|
||||
res = rdChemReactions.PreprocessReaction(rxna)
|
||||
print(AllChem.ReactionToRxnBlock(rxna))
|
||||
if status == "good":
|
||||
self.assertEquals(res, good_res)
|
||||
elif status == "bad":
|
||||
self.assertEquals(res, bad_res)
|
||||
print (">"*44)
|
||||
rxnb.Initialize()
|
||||
try:
|
||||
rdChemReactions.SanitizeRxn(rxnb)
|
||||
res = rdChemReactions.PreprocessReaction(rxnb)
|
||||
print(AllChem.ReactionToRxnBlock(rxnb))
|
||||
self.assertEquals(res, good_res)
|
||||
assert not status == "fail"
|
||||
except:
|
||||
print ("$RXN Failed")
|
||||
if status == "fail":
|
||||
continue
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
@@ -1,11 +1,15 @@
|
||||
import sys
|
||||
|
||||
tests = [("python", "testReactionWrapper.py", {}), ]
|
||||
tests=[
|
||||
("python", "testReactionWrapper.py",{}),
|
||||
("python", "testEnumerations.py",{}),
|
||||
]
|
||||
|
||||
longTests = []
|
||||
longTests=[
|
||||
]
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__=='__main__':
|
||||
import sys
|
||||
from rdkit import TestRunner
|
||||
failed, tests = TestRunner.RunScript('test_list.py', 0, 1)
|
||||
failed,tests = TestRunner.RunScript('test_list.py',0,1)
|
||||
sys.exit(len(failed))
|
||||
|
||||
BIN
Code/GraphMol/ChemReactions/testData/enumeration.pickle
Normal file
BIN
Code/GraphMol/ChemReactions/testData/enumeration.pickle
Normal file
Binary file not shown.
@@ -5943,6 +5943,8 @@ void testCopyConstructor() {
|
||||
removeMappingNumbersFromReactions(*rxn_new);
|
||||
std::string smi2 = ChemicalReactionToRxnSmiles(*rxn);
|
||||
std::string new_smi = ChemicalReactionToRxnSmiles(*rxn_new);
|
||||
std::cerr << "smi1 " << smi1 << std::endl;
|
||||
std::cerr << "smi2 " << smi2 << std::endl;
|
||||
TEST_ASSERT(smi1 == smi2);
|
||||
TEST_ASSERT(smi2 != new_smi);
|
||||
TEST_ASSERT(new_smi == "CCC(N)(O)Cl>>CC(C)(N)O.Cl");
|
||||
|
||||
1095
Code/GraphMol/ChemReactions/tutorial/EnumerationToolkit.ipynb
Normal file
1095
Code/GraphMol/ChemReactions/tutorial/EnumerationToolkit.ipynb
Normal file
File diff suppressed because it is too large
Load Diff
@@ -116,10 +116,19 @@ class FilterMatcherBase
|
||||
virtual bool hasMatch(const ROMol &mol) const = 0;
|
||||
|
||||
//------------------------------------
|
||||
//! Clone
|
||||
//! Clone - deprecated
|
||||
// Clones the current FilterMatcherBase into one that
|
||||
// can be passed around safely.
|
||||
virtual boost::shared_ptr<FilterMatcherBase> Clone() const = 0;
|
||||
virtual boost::shared_ptr<FilterMatcherBase> Clone() const {
|
||||
BOOST_LOG(rdWarningLog) << "FilterMatcherBase::Clone is deprecated, use copy instead" << std::endl;
|
||||
return copy();
|
||||
}
|
||||
|
||||
//------------------------------------
|
||||
//! copy
|
||||
// copies the current FilterMatcherBase into one that
|
||||
// can be passed around safely.
|
||||
virtual boost::shared_ptr<FilterMatcherBase> copy() const = 0;
|
||||
|
||||
private:
|
||||
#ifdef RDK_USE_BOOST_SERIALIZATION
|
||||
|
||||
@@ -91,7 +91,7 @@ bool SmartsMatcher::getMatches(const ROMol &mol,
|
||||
if (d_min_count == 1 && d_max_count == UINT_MAX) {
|
||||
RDKit::MatchVectType match;
|
||||
onPatExists = RDKit::SubstructMatch(mol, *d_pattern.get(), match);
|
||||
if (onPatExists) matchVect.push_back(FilterMatch(Clone(), match));
|
||||
if (onPatExists) matchVect.push_back(FilterMatch(copy(), match));
|
||||
} else { // need to count
|
||||
const bool uniquify = true;
|
||||
unsigned int count =
|
||||
@@ -99,7 +99,7 @@ bool SmartsMatcher::getMatches(const ROMol &mol,
|
||||
onPatExists = (count >= d_min_count &&
|
||||
(d_max_count == UINT_MAX || count <= d_max_count));
|
||||
if (onPatExists) {
|
||||
boost::shared_ptr<FilterMatcherBase> clone = Clone();
|
||||
boost::shared_ptr<FilterMatcherBase> clone = copy();
|
||||
for (size_t i = 0; i < matches.size(); ++i) {
|
||||
matchVect.push_back(FilterMatch(clone, matches[i]));
|
||||
}
|
||||
|
||||
@@ -58,7 +58,7 @@ class And : public FilterMatcherBase {
|
||||
//! True if arg1 and arg2 FilterMatchers are true
|
||||
|
||||
And(const FilterMatcherBase &arg1, const FilterMatcherBase &arg2)
|
||||
: FilterMatcherBase("And"), arg1(arg1.Clone()), arg2(arg2.Clone()) {}
|
||||
: FilterMatcherBase("And"), arg1(arg1.copy()), arg2(arg2.copy()) {}
|
||||
|
||||
And(const boost::shared_ptr<FilterMatcherBase> &arg1,
|
||||
const boost::shared_ptr<FilterMatcherBase> &arg2)
|
||||
@@ -93,7 +93,7 @@ class And : public FilterMatcherBase {
|
||||
return false;
|
||||
}
|
||||
|
||||
boost::shared_ptr<FilterMatcherBase> Clone() const {
|
||||
boost::shared_ptr<FilterMatcherBase> copy() const {
|
||||
return boost::shared_ptr<FilterMatcherBase>(new And(*this));
|
||||
}
|
||||
|
||||
@@ -122,7 +122,7 @@ class Or : public FilterMatcherBase {
|
||||
//! Constructs or Ander
|
||||
//! true if arg1 or arg2 are true
|
||||
Or(const FilterMatcherBase &arg1, const FilterMatcherBase &arg2)
|
||||
: FilterMatcherBase("Or"), arg1(arg1.Clone()), arg2(arg2.Clone()) {}
|
||||
: FilterMatcherBase("Or"), arg1(arg1.copy()), arg2(arg2.copy()) {}
|
||||
|
||||
Or(const boost::shared_ptr<FilterMatcherBase> &arg1,
|
||||
const boost::shared_ptr<FilterMatcherBase> &arg2)
|
||||
@@ -154,7 +154,7 @@ class Or : public FilterMatcherBase {
|
||||
return res1 || res2;
|
||||
}
|
||||
|
||||
boost::shared_ptr<FilterMatcherBase> Clone() const {
|
||||
boost::shared_ptr<FilterMatcherBase> copy() const {
|
||||
return boost::shared_ptr<FilterMatcherBase>(new Or(*this));
|
||||
}
|
||||
|
||||
@@ -182,7 +182,7 @@ class Not : public FilterMatcherBase {
|
||||
// from getMatches since a false internal match matches
|
||||
// nothing!
|
||||
Not(const FilterMatcherBase &arg1)
|
||||
: FilterMatcherBase("Not"), arg1(arg1.Clone()) {}
|
||||
: FilterMatcherBase("Not"), arg1(arg1.copy()) {}
|
||||
|
||||
Not(const boost::shared_ptr<FilterMatcherBase> &arg1)
|
||||
: FilterMatcherBase("Not"), arg1(arg1) {}
|
||||
@@ -208,7 +208,7 @@ class Not : public FilterMatcherBase {
|
||||
return !arg1->getMatches(mol, matchVect);
|
||||
}
|
||||
|
||||
boost::shared_ptr<FilterMatcherBase> Clone() const {
|
||||
boost::shared_ptr<FilterMatcherBase> copy() const {
|
||||
return boost::shared_ptr<FilterMatcherBase>(new Not(*this));
|
||||
}
|
||||
|
||||
@@ -321,7 +321,7 @@ class SmartsMatcher : public FilterMatcherBase {
|
||||
virtual bool getMatches(const ROMol &mol,
|
||||
std::vector<FilterMatch> &matchVect) const;
|
||||
virtual bool hasMatch(const ROMol &mol) const;
|
||||
virtual boost::shared_ptr<FilterMatcherBase> Clone() const {
|
||||
virtual boost::shared_ptr<FilterMatcherBase> copy() const {
|
||||
return boost::shared_ptr<FilterMatcherBase>(new SmartsMatcher(*this));
|
||||
}
|
||||
|
||||
@@ -403,7 +403,7 @@ class ExclusionList : public FilterMatcherBase {
|
||||
|
||||
void addPattern(const FilterMatcherBase &base) {
|
||||
PRECONDITION(base.isValid(), "Invalid FilterMatcherBase");
|
||||
d_offPatterns.push_back(base.Clone());
|
||||
d_offPatterns.push_back(base.copy());
|
||||
}
|
||||
|
||||
void setExclusionPatterns(
|
||||
@@ -433,7 +433,7 @@ class ExclusionList : public FilterMatcherBase {
|
||||
return result;
|
||||
}
|
||||
|
||||
virtual boost::shared_ptr<FilterMatcherBase> Clone() const {
|
||||
virtual boost::shared_ptr<FilterMatcherBase> copy() const {
|
||||
return boost::shared_ptr<FilterMatcherBase>(new ExclusionList(*this));
|
||||
}
|
||||
|
||||
@@ -469,7 +469,7 @@ public:
|
||||
*/
|
||||
FilterHierarchyMatcher(const FilterMatcherBase &matcher) :
|
||||
FilterMatcherBase(),
|
||||
d_matcher(matcher.Clone()) {
|
||||
d_matcher(matcher.copy()) {
|
||||
}
|
||||
|
||||
//! Return the name for this node (from the underlying FilterMatcherBase)
|
||||
@@ -491,7 +491,7 @@ public:
|
||||
*/
|
||||
void setPattern(const FilterMatcherBase & matcher) {
|
||||
PRECONDITION(matcher.isValid(), "Adding invalid patterns is not allowed.");
|
||||
d_matcher = matcher.Clone();
|
||||
d_matcher = matcher.copy();
|
||||
PRECONDITION(getName() == d_matcher->getName(), "Opps");
|
||||
}
|
||||
|
||||
@@ -527,8 +527,8 @@ public:
|
||||
return getMatches(mol, temp);
|
||||
}
|
||||
|
||||
//! Clones the FilterHierarchyMatcher into a FilterMatcherBase
|
||||
virtual boost::shared_ptr<FilterMatcherBase> Clone() const {
|
||||
//! copys the FilterHierarchyMatcher into a FilterMatcherBase
|
||||
virtual boost::shared_ptr<FilterMatcherBase> copy() const {
|
||||
return boost::shared_ptr<FilterMatcherBase>(new FilterHierarchyMatcher(*this));
|
||||
}
|
||||
private:
|
||||
|
||||
@@ -71,7 +71,7 @@ void SetOffPatterns(ExclusionList &fc, boost::python::object list) {
|
||||
std::vector<boost::shared_ptr<FilterMatcherBase> > temp;
|
||||
|
||||
for (; begin != end; ++begin) {
|
||||
temp.push_back((*begin)->Clone());
|
||||
temp.push_back((*begin)->copy());
|
||||
}
|
||||
fc.setExclusionPatterns(temp);
|
||||
}
|
||||
@@ -147,7 +147,7 @@ class PythonFilterMatch : public FilterMatcherBase {
|
||||
functor(self),
|
||||
incref(false){};
|
||||
|
||||
// ONLY CALLED FROM C++ from the Clone operation
|
||||
// ONLY CALLED FROM C++ from the copy operation
|
||||
PythonFilterMatch(const PythonFilterMatch &rhs)
|
||||
: FilterMatcherBase(rhs), functor(rhs.functor), incref(true) {
|
||||
python::incref(functor);
|
||||
@@ -174,7 +174,7 @@ class PythonFilterMatch : public FilterMatcherBase {
|
||||
return python::call_method<bool>(functor, "HasMatch", boost::ref(mol));
|
||||
}
|
||||
|
||||
virtual boost::shared_ptr<FilterMatcherBase> Clone() const {
|
||||
virtual boost::shared_ptr<FilterMatcherBase> copy() const {
|
||||
return boost::shared_ptr<FilterMatcherBase>(new PythonFilterMatch(*this));
|
||||
}
|
||||
};
|
||||
|
||||
49
Contrib/Glare/README.txt
Normal file
49
Contrib/Glare/README.txt
Normal file
@@ -0,0 +1,49 @@
|
||||
Glare Algorithm.
|
||||
|
||||
Implementation of
|
||||
|
||||
GLARE: A New Approach for Filtering Large Reagent Lists in
|
||||
Combinatorial Library Design Using Product Properties
|
||||
Jean-Francois Truchon* and Christopher I. Bayly
|
||||
|
||||
http://pubs.acs.org/doi/pdf/10.1021/ci0504871
|
||||
|
||||
Usage:
|
||||
# somehow make sidechains1/2 with props [mw, alogp, tpsa]
|
||||
r1 = RGroups(sidechains1)
|
||||
r2 = RGroups(sidechains2)
|
||||
lib = Library([r1, r2])
|
||||
props = [
|
||||
Property("mw", 0, 0, 500),
|
||||
Property("alogp", 1, -2.4, 5),
|
||||
Property("tpsa", 2, 0, 90)
|
||||
]
|
||||
|
||||
glare = Glare()
|
||||
glare.optimize(lib, props)
|
||||
|
||||
Notes:
|
||||
Some nomenclature:
|
||||
|
||||
A Libary is made of RGroups
|
||||
RGroups are a collection of sidechains (the paper uses Fragments)
|
||||
that can populate the rgroup position.
|
||||
|
||||
We desire to optimize the Library so that we have a good chance
|
||||
of making the desired products.
|
||||
|
||||
From the testing code, using Fake data:
|
||||
|
||||
r1 = RGroups(makeFakeSidechains("aldehydes", num=1000))
|
||||
r2 = RGroups(makeFakeSidechains("boronic_acids", num=1500))
|
||||
|
||||
libs = Library([r1,r2])
|
||||
props = [
|
||||
Property("mw", propIdx=0, minValue=0, maxValue=500),
|
||||
Property("alogp", propIdx=1, minValue=-2.4, maxValue=5),
|
||||
Property("tpsa", propIdx=2, minValue=0, maxValue=90)
|
||||
]
|
||||
|
||||
glare = Glare()
|
||||
# optimize the library...
|
||||
glare.optimize(libs, props)
|
||||
444
Contrib/Glare/glare.py
Executable file
444
Contrib/Glare/glare.py
Executable file
@@ -0,0 +1,444 @@
|
||||
from __future__ import print_function
|
||||
import random, operator, itertools, math
|
||||
|
||||
"""
|
||||
Glare Algorithm
|
||||
|
||||
Some nomenclature:
|
||||
|
||||
A Libary is made of RGroups
|
||||
RGroups are a collection of sidechains (the paper uses Fragments)
|
||||
that can populate the rgroup position.
|
||||
|
||||
We desire to optimize the Library so that we have a good chance
|
||||
of making the desired products.
|
||||
|
||||
Example From the testing code, using Fake data:
|
||||
|
||||
r1 = RGroups(makeFakeSidechains("aldehydes", num=1000))
|
||||
r2 = RGroups(makeFakeSidechains("boronic_acids", num=1500))
|
||||
|
||||
lib = Library([r1,r2])
|
||||
props = [
|
||||
Property("mw", propIdx=0, minValue=0, maxValue=500),
|
||||
Property("alogp", propIdx=1, minValue=-2.4, maxValue=5),
|
||||
Property("tpsa", propIdx=2, minValue=0, maxValue=90)
|
||||
]
|
||||
|
||||
glare = Glare()
|
||||
# optimize the library...
|
||||
glare.optimize(lib, props)
|
||||
"""
|
||||
|
||||
class Property:
|
||||
def __init__(self, name, propIdx, minValue, maxValue, scaffoldoffset=0.0):
|
||||
"""name, propIdx, minValue, maxValue, scaffoldoffset -> initial a Property
|
||||
name is the name of the property.
|
||||
propIdx: the index of the property in the property vector
|
||||
minValue: the minimum acceptable value for the property
|
||||
maxValue: the maximum acceptable value for the property
|
||||
scaffoldoffset: any offset from the reaction scaffold (defaults to 0)
|
||||
"""
|
||||
self.name = name
|
||||
self.propIdx = propIdx
|
||||
self.minValue = minValue
|
||||
self.maxValue = maxValue
|
||||
self.offset = scaffoldoffset
|
||||
|
||||
|
||||
def evaluate(self, sidechains):
|
||||
"""sidechains -> Evaluate a list of sidechains to see if they
|
||||
pass the property values.
|
||||
|
||||
Each sidechain must have a property vector e.g. (s.props for s in sidechains)
|
||||
which is a vector of values where s.props[propIdx] is the property
|
||||
being inspected
|
||||
"""
|
||||
product = self.offset
|
||||
propIdx = self.propIdx
|
||||
for s in sidechains:
|
||||
product += s.props[propIdx]
|
||||
return self.minValue <= product <= self.maxValue
|
||||
|
||||
class Sidechain:
|
||||
"""Holds the name (identifier) and property list for the
|
||||
given sidechain/fragment. Properties are assumed to
|
||||
be numerical values"""
|
||||
def __init__(self, name, props, goodCount=0):
|
||||
"""name, props, goodCount=0 -> initialize a Sidechain
|
||||
initialize a sidechain.
|
||||
name: the unique name for the sidechain
|
||||
props: the property vector (see Properties class for details)
|
||||
goodCount: the number of times this reagent belongs to
|
||||
a good product, where good is a product that is in the desired
|
||||
property space.
|
||||
"""
|
||||
self.name = name
|
||||
self.props = props
|
||||
self.good_count = goodCount # shared variable
|
||||
self.dropped = False # shared variable
|
||||
|
||||
def __str__(self):
|
||||
return "Sidechain %s(%s, goodCount=%s)"%(self.name,
|
||||
self.props, self.good_count)
|
||||
def __repr__(self):
|
||||
return "Sidechain(%r, %r, %s)"%(self.name, self.props, self.good_count)
|
||||
|
||||
class RGroups:
|
||||
"""Holds a collection of sidechains for the given RGroup"""
|
||||
def __init__(self, sidechains):
|
||||
"""Sidechains -> RGroups
|
||||
sidechains: the list of Sidechains that make up the potential
|
||||
sidechains at this rgroup position"""
|
||||
self.sidechains = sidechains
|
||||
|
||||
self.rejected = [] # list of rejected sidechains
|
||||
self.initial_size = len(sidechains)
|
||||
|
||||
def count(self):
|
||||
"""Returns the number of possible sidechains"""
|
||||
return len(self.sidechains)
|
||||
|
||||
def randomize(self):
|
||||
"""Randomly shuffles the sidechains and reset the goodness counts"""
|
||||
random.shuffle(self.sidechains)
|
||||
for s in self.sidechains:
|
||||
s.good_count = 0
|
||||
|
||||
def effectiveness(self):
|
||||
"""-> return the current effectiveness of this collection
|
||||
effectiveness is the number of items left divided by the
|
||||
initial amount"""
|
||||
|
||||
return len(self.sidechains)/float(self.initial_size)
|
||||
|
||||
def chunk_size(self, num_chunks):
|
||||
"""num_chunks -> return the number of sidechains in each chunk
|
||||
if the sidechains are split into num_chunks chunks"""
|
||||
return int(math.ceil(float(len(self.sidechains))/num_chunks))
|
||||
|
||||
def chunk(self, chunk_idx, num_chunks):
|
||||
"""chunk_idx, num)chunks -> RGroups
|
||||
return the chunk_idxth chunk given num_chunks total chunks"""
|
||||
assert chunk_idx >=0 and chunk_idx < num_chunks, "%s %s"%(
|
||||
chunk_idx, num_chunks)
|
||||
|
||||
n = self.chunk_size(num_chunks)
|
||||
return RGroups(self.sidechains[chunk_idx*n:(chunk_idx+1)*n])
|
||||
|
||||
def prune(self, fractionToKeep):
|
||||
"""fractionToKeep -> Sort the sidechains from the most often
|
||||
found if good products to the least, and keep the best
|
||||
fractionToKeep percentage"""
|
||||
assert 0 < fractionToKeep <= 1.0, "fractionToKeep: %s"%fractionToKeep
|
||||
|
||||
self.sidechains.sort(lambda x,y: -cmp(x.good_count, y.good_count))
|
||||
fragment_index = int(len(self.sidechains) * fractionToKeep + 0.5)
|
||||
|
||||
# update rejected set
|
||||
self.rejected += self.sidechains[fragment_index:]
|
||||
self.sidechains = self.sidechains[:fragment_index]
|
||||
|
||||
class Library:
|
||||
"""A library is a collection of RGroups that need to be combinitorially
|
||||
combined"""
|
||||
def __init__(self, rgroups):
|
||||
"""rgroups -> Initialize the Library.
|
||||
rgroups: the list of possible RGroups that is combinitorially
|
||||
combined to make the library"""
|
||||
self.rgroups = rgroups
|
||||
|
||||
def isValid(self):
|
||||
"""If we have an empty set for any rgroup, return False"""
|
||||
for rg in self.rgroups:
|
||||
if len(rg.sidechains) == 0:
|
||||
return False
|
||||
return True
|
||||
|
||||
def randomize(self):
|
||||
"""randomize the order of the sidechains"""
|
||||
for rg in self.rgroups:
|
||||
rg.randomize()
|
||||
|
||||
def getSidechainsPerPartition( self, total_num_partitions_per_rgroup ):
|
||||
"""total_num_partitions -> [num_fragments/partition for rgroup1,
|
||||
num_fragments/partition for rgroup2]
|
||||
return the number of sidechains in a partition
|
||||
for each rgroup"""
|
||||
|
||||
sizes = [ (libIdx, max(rg.count()/total_num_partitions_per_rgroup, 1))
|
||||
for libIdx, rg in enumerate(self.rgroups) ]
|
||||
|
||||
# "optimially" apportion the partitions according the
|
||||
# the glare paper see Appendix eq (8) and (9)
|
||||
# sort by size
|
||||
sizes.sort(lambda x,y: cmp(x[1], y[1]))
|
||||
last_size = 1
|
||||
opt_sizes = []
|
||||
for libIdx, current_size in sizes[:-1]:
|
||||
opt_sizes.append( (libIdx,
|
||||
current_size - (current_size % last_size)) )
|
||||
last_size = current_size
|
||||
|
||||
# From the Glare paper:
|
||||
# the last library size is set equal to the second to last
|
||||
# From Table 3, it is easy to understand that, if the fourth dimension
|
||||
# was split in 24 instead of 12, a factor of 2 would be gained from the
|
||||
# reduced size of the sublibraries. However, twice as many sublibraries
|
||||
# would be needed, and the net speedup would be null, hence, the decision to
|
||||
# set p4=p3. (p4 here is the last library)
|
||||
libIdx, current_size = sizes[-1]
|
||||
opt_sizes.append((libIdx, last_size))
|
||||
# back to the original library order
|
||||
opt_sizes.sort()
|
||||
res = [size for libIdx, size in opt_sizes]
|
||||
return res
|
||||
|
||||
def chunk(self, num_partitions):
|
||||
"""num_partitions -> [Library(..), Library(...)]
|
||||
|
||||
Return new libraries that are chunks of this one.
|
||||
These are the libraries that get sampled to see of
|
||||
sidechains participate in good products.
|
||||
"""
|
||||
partitions = self.getSidechainsPerPartition(num_partitions)
|
||||
max_subsets = max(partitions)
|
||||
|
||||
enumeration_indices = []
|
||||
for i in xrange(max_subsets):
|
||||
combinations = []
|
||||
for size in partitions:
|
||||
combinations.append( i % size )
|
||||
enumeration_indices.append( combinations )
|
||||
|
||||
library_sets = []
|
||||
for subset_index, combinations in enumerate(enumeration_indices):
|
||||
libs = []
|
||||
partitioned_rgroups = []
|
||||
for lib_index, libpart_index in enumerate(combinations):
|
||||
lib = self.rgroups[lib_index]
|
||||
num_chunks = partitions[lib_index]
|
||||
partitioned_rgroups.append( lib.chunk(chunk_idx=libpart_index,
|
||||
num_chunks=num_chunks))
|
||||
lib = Library(partitioned_rgroups)
|
||||
if lib.isValid():
|
||||
library_sets.append(lib)
|
||||
|
||||
return library_sets
|
||||
|
||||
def effectiveness(self):
|
||||
"""-> returns the average effectiveness of this library set"""
|
||||
sum = 0.0
|
||||
for rg in self.rgroups:
|
||||
sum += rg.effectiveness()
|
||||
return sum/len(self.rgroups)
|
||||
|
||||
def evaluate(self, props):
|
||||
"""props -> num_good_enumerations, total_enumerations
|
||||
|
||||
props: a list of Property evaluators for the fragments.
|
||||
|
||||
returns the number of good enumerations and the total number of
|
||||
enumerations for this Library"""
|
||||
frags = [rg.sidechains for rg in self.rgroups]
|
||||
good = 0
|
||||
bad = 0
|
||||
for i,frag in enumerate(itertools.product(*frags)):
|
||||
for p in props:
|
||||
if not p.evaluate(frag):
|
||||
bad += 1
|
||||
break
|
||||
else:
|
||||
good += 1
|
||||
for sidechain in frag:
|
||||
sidechain.good_count += 1
|
||||
return good, i+1
|
||||
|
||||
|
||||
class Glare:
|
||||
"""Glare Algorithm. Implementation of
|
||||
|
||||
GLARE: A New Approach for Filtering Large Reagent Lists in
|
||||
Combinatorial Library Design Using Product Properties
|
||||
Jean-Francois Truchon* and Christopher I. Bayly
|
||||
|
||||
http://pubs.acs.org/doi/pdf/10.1021/ci0504871
|
||||
|
||||
Usage:
|
||||
# somehow make sidechains1/2 with props [mw, alogp, tpsa]
|
||||
r1 = RGroups(sidechains1)
|
||||
r2 = RGroups(sidechains2)
|
||||
lib = Library([r1, r2])
|
||||
props = [
|
||||
Property("mw", 0, 0, 500),
|
||||
Property("alogp", 1, -2.4, 5),
|
||||
Property("tpsa", 2, 0, 90)
|
||||
]
|
||||
|
||||
glare = Glare()
|
||||
glare.optimize(lib, props)
|
||||
"""
|
||||
def __init__(self,
|
||||
desiredFinalGoodness=0.95,
|
||||
maxIterations=100,
|
||||
rgroupScale=6.0, # None if no scaling
|
||||
initialFraction=None,#None=auto -100.,
|
||||
numPartitions=16):
|
||||
self.fractionGood = self.desiredFinalGoodness = desiredFinalGoodness
|
||||
self.maxIterations = maxIterations
|
||||
self.rgroupScale = rgroupScale
|
||||
|
||||
if initialFraction is not None:
|
||||
self.initialFraction = initialFraction/100.
|
||||
else:
|
||||
self.initialFraction = initialFraction
|
||||
self.numPartitions = numPartitions
|
||||
|
||||
def optimize(self, library, props):
|
||||
"""library, props
|
||||
Given a Library and the list of Propery evaluators,
|
||||
optimize the library.
|
||||
The library is modified in place by removing building blocks
|
||||
(sidechains) that are not likely to pass the property
|
||||
criteria.
|
||||
"""
|
||||
# attempt to generate report like glare application
|
||||
print ("------- PARAMETERS: --------------")
|
||||
print ("GOOODNESS THRESHOLD : %s%%"%(self.desiredFinalGoodness * 100))
|
||||
print ("MIN PARTITION SIZE : %s"%self.numPartitions)
|
||||
if self.initialFraction is None or self.initialFraction > 0.999:
|
||||
print ("INITIAL FRACTION TO KEEP : AUTOMATIC")
|
||||
else:
|
||||
print ("INITAL FRACTION TO KEEP : %s%%"%(self.initialFraction*100))
|
||||
print ("Actual SIZE : %s = %s"%(
|
||||
" x ".join([str(len(rg.sidechains)) for rg in library.rgroups]),
|
||||
reduce(operator.mul, [len(rg.sidechains) for rg in library.rgroups])
|
||||
))
|
||||
|
||||
running_total = 0.0
|
||||
Gt = self.desiredFinalGoodness
|
||||
|
||||
for iteration in range(1, self.maxIterations+1):
|
||||
# chunk of the total library into smaller more managable sets
|
||||
# and run combinitorial analysis on the sub libraries
|
||||
# each of these records the number of times a sidechain is used
|
||||
# in a successful enumeration which is then used to prune the
|
||||
# library at the end
|
||||
#
|
||||
for rg in library.rgroups:
|
||||
rg.randomize()
|
||||
|
||||
good = total = 0.0
|
||||
chunked_libs = library.chunk(self.numPartitions)
|
||||
# for each chunk, do the combinitorial check to see
|
||||
# if reagents make good products
|
||||
for libidx, chunk in enumerate(chunked_libs):
|
||||
g,t = chunk.evaluate(props)
|
||||
good += g
|
||||
total += t
|
||||
running_total += total
|
||||
Gi = good/total # current goodness
|
||||
|
||||
if Gi < 1e-12:
|
||||
# I think we're done here :)
|
||||
fraction = 0.0
|
||||
elif iteration == 1:
|
||||
G0 = Gi # Goodness at first iteration
|
||||
|
||||
# the first time, use the initalFraction or a "good enough"
|
||||
# value
|
||||
if self.initialFraction is not None:
|
||||
fraction = K0 = self.initialFraction
|
||||
else:
|
||||
# auto choose the fraction based on the current good percentage
|
||||
# and the desired
|
||||
fraction = K0 = min(-1.1 * ( Gt - G0) + 1.2,
|
||||
0.9)
|
||||
else:
|
||||
# the second time, gradually eliminate reagents slowing
|
||||
# down as the number of iterations increases
|
||||
# see equation (5) in reference
|
||||
if abs(Gt-G0) < 1e-4:
|
||||
Ki = 1.0
|
||||
else:
|
||||
Ki = (1.0 - K0) * (Gi - G0) / (Gt - G0) + K0;
|
||||
fraction = min(1.0, Ki)
|
||||
|
||||
# prune the library to keep the highest occuring sidechains
|
||||
# note that even if all sidechains are acceptable,
|
||||
# some will always get pruned
|
||||
|
||||
max_size = float(max([len(rg.sidechains) for rg in library.rgroups]))
|
||||
for rg in library.rgroups:
|
||||
scale = 1.0
|
||||
if self.rgroupScale is not None:
|
||||
# scale differently size rgroups via equation (6) in paper
|
||||
numSidechains = len(rg.sidechains)
|
||||
numer = 1.0
|
||||
denom = 1.0 + math.exp(-self.rgroupScale *
|
||||
((numSidechains/max_size) - 0.5))
|
||||
scale = numer/denom
|
||||
fraction_to_reject = (1.0 - fraction) * scale
|
||||
# keep the best fraction...
|
||||
rg.prune(1.0 - fraction_to_reject)
|
||||
|
||||
print ("-------------- ITERATION : %s ----------------------"%iteration)
|
||||
print ("GOODNESS : %s%%"%(Gi * 100))
|
||||
print ("NUMBER EVAL : %s"%(total))
|
||||
print ("CUMUL EVAL : %s"%(running_total))
|
||||
print ("KEPT IN STEP : %s%%"%(fraction*100.))
|
||||
if not iteration:
|
||||
print ("GOODNESS THRESHOLD : %s"%self.desiredFinalGoodness)
|
||||
print ("MIN PARTITION SIZE : %s"%self.numPartitions)
|
||||
print ("INITIAL FRACTION TO KEEP : ")
|
||||
if self.fractionToKeep > 0.999:
|
||||
print ("AUTOMATIC")
|
||||
else:
|
||||
print ("%s%%"%self.fractionToKeep)
|
||||
|
||||
print ("Actual SIZE : %s = %s"%(
|
||||
" x ".join([str(len(rg.sidechains)) for rg in library.rgroups]),
|
||||
reduce(operator.mul, [len(rg.sidechains) for rg in library.rgroups])
|
||||
))
|
||||
print ("EFFECTIVENESS : %s%%"%(library.effectiveness()*100.))
|
||||
|
||||
# stopping critieria
|
||||
if iteration and Gi < 1e-12:
|
||||
return
|
||||
elif abs(Gi - self.desiredFinalGoodness) < 0.001 or \
|
||||
Gi > self.desiredFinalGoodness:
|
||||
return
|
||||
|
||||
######################################################################
|
||||
# testing codes
|
||||
def makeFakeProps():
|
||||
mw = random.randint(10,500)
|
||||
alogp = random.randint(-10,10)
|
||||
tpsa = random.randint(0,180)
|
||||
return [mw, alogp, tpsa]
|
||||
|
||||
def makeFakeSidechains(lib, num):
|
||||
res = []
|
||||
for i in range(num):
|
||||
res.append(Sidechain(lib + "_" + str(i), makeFakeProps()))
|
||||
return res
|
||||
|
||||
def testGlare():
|
||||
a = RGroups(makeFakeSidechains("aldehydes", 1000))
|
||||
b = RGroups(makeFakeSidechains("boronic_acids", 1500))
|
||||
|
||||
lib = Library([a,b])
|
||||
props = [
|
||||
Property("mw", 0, 0, 500, 230.1419),
|
||||
Property("alogp", 1, -2.4, 5, 2.212749),
|
||||
Property("tpsa", 2, 0, 90, 24.5)
|
||||
]
|
||||
|
||||
glare = Glare()
|
||||
glare.optimize(lib, props)
|
||||
|
||||
if __name__ == "__main__":
|
||||
testGlare()
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user