mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-03 21:44:30 +08:00
Add multi-threaded versions of some MolStandardize operations (#6909)
* initial addition of MT support to MolStandardize * do the other inplace functions * add mt ops to python wrappers including tests * release the GIL * remove exploratory code added during dev * make normalizer thread safe * refactor some repeated code
This commit is contained in:
@@ -20,6 +20,11 @@
|
||||
#include "Charge.h"
|
||||
#include <GraphMol/SmilesParse/SmilesWrite.h>
|
||||
#include <GraphMol/SmilesParse/SmilesParse.h>
|
||||
#include <RDGeneral/RDThreads.h>
|
||||
|
||||
#ifdef RDK_BUILD_THREADSAFE_SSS
|
||||
#include <thread>
|
||||
#endif
|
||||
|
||||
#include <RDGeneral/BoostStartInclude.h>
|
||||
#include <boost/property_tree/ptree.hpp>
|
||||
@@ -121,6 +126,39 @@ void updateCleanupParamsFromJSON(CleanupParameters ¶ms,
|
||||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
template <typename FuncType>
|
||||
void standardizeMultipleMolsInPlace(FuncType sfunc, std::vector<RWMol *> &mols,
|
||||
int numThreads,
|
||||
const CleanupParameters ¶ms) {
|
||||
unsigned int numThreadsToUse = std::min(
|
||||
static_cast<unsigned int>(mols.size()), getNumThreadsToUse(numThreads));
|
||||
if (numThreadsToUse == 1) {
|
||||
for (auto molp : mols) {
|
||||
sfunc(*molp, params);
|
||||
}
|
||||
}
|
||||
#ifdef RDK_BUILD_THREADSAFE_SSS
|
||||
else {
|
||||
auto func = [&](unsigned int tidx) {
|
||||
for (auto mi = tidx; mi < mols.size(); mi += numThreads) {
|
||||
sfunc(*mols[mi], params);
|
||||
}
|
||||
};
|
||||
std::vector<std::thread> threads;
|
||||
for (auto tidx = 0u; tidx < numThreadsToUse; ++tidx) {
|
||||
threads.emplace_back(func, tidx);
|
||||
}
|
||||
for (auto &t : threads) {
|
||||
if (t.joinable()) {
|
||||
t.join();
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
} // namespace
|
||||
|
||||
RWMol *cleanup(const RWMol *mol, const CleanupParameters ¶ms) {
|
||||
auto nmol = new RWMol(*mol);
|
||||
cleanupInPlace(*nmol, params);
|
||||
@@ -137,6 +175,13 @@ void cleanupInPlace(RWMol &mol, const CleanupParameters ¶ms) {
|
||||
MolOps::assignStereochemistry(mol, cleanIt, force);
|
||||
}
|
||||
|
||||
void cleanupInPlace(std::vector<RWMol *> &mols, int numThreads,
|
||||
const CleanupParameters ¶ms) {
|
||||
standardizeMultipleMolsInPlace(
|
||||
static_cast<void (*)(RWMol &, const CleanupParameters &)>(cleanupInPlace),
|
||||
mols, numThreads, params);
|
||||
}
|
||||
|
||||
RWMol *tautomerParent(const RWMol &mol, const CleanupParameters ¶ms,
|
||||
bool skip_standardize) {
|
||||
std::unique_ptr<RWMol> res{new RWMol(mol)};
|
||||
@@ -233,10 +278,27 @@ void normalizeInPlace(RWMol &mol, const CleanupParameters ¶ms) {
|
||||
normalizer->normalizeInPlace(mol);
|
||||
}
|
||||
|
||||
void normalizeInPlace(std::vector<RWMol *> &mols, int numThreads,
|
||||
const CleanupParameters ¶ms) {
|
||||
std::unique_ptr<Normalizer> normalizer{normalizerFromParams(params)};
|
||||
auto sfunc = [&](RWMol &m, const CleanupParameters &) {
|
||||
normalizer->normalizeInPlace(m);
|
||||
};
|
||||
standardizeMultipleMolsInPlace(sfunc, mols, numThreads, params);
|
||||
}
|
||||
|
||||
void reionizeInPlace(RWMol &mol, const CleanupParameters ¶ms) {
|
||||
std::unique_ptr<Reionizer> reionizer{reionizerFromParams(params)};
|
||||
reionizer->reionizeInPlace(mol);
|
||||
}
|
||||
void reionizeInPlace(std::vector<RWMol *> &mols,int numThreads,
|
||||
const CleanupParameters ¶ms) {
|
||||
std::unique_ptr<Reionizer> reionizer{reionizerFromParams(params)};
|
||||
auto sfunc = [&](RWMol &m, const CleanupParameters &) {
|
||||
reionizer->reionizeInPlace(m);
|
||||
};
|
||||
standardizeMultipleMolsInPlace(sfunc, mols, numThreads, params);
|
||||
}
|
||||
|
||||
RWMol *removeFragments(const RWMol *mol, const CleanupParameters ¶ms) {
|
||||
PRECONDITION(mol, "bad molecule");
|
||||
@@ -249,6 +311,15 @@ void removeFragmentsInPlace(RWMol &mol, const CleanupParameters ¶ms) {
|
||||
remover->removeInPlace(mol);
|
||||
}
|
||||
|
||||
void removeFragmentsInPlace(std::vector<RWMol *> &mols,int numThreads,
|
||||
const CleanupParameters ¶ms) {
|
||||
std::unique_ptr<FragmentRemover> remover{fragmentRemoverFromParams(params)};
|
||||
auto sfunc = [&](RWMol &m, const CleanupParameters &) {
|
||||
remover->removeInPlace(m);
|
||||
};
|
||||
standardizeMultipleMolsInPlace(sfunc, mols, numThreads, params);
|
||||
}
|
||||
|
||||
RWMol *canonicalTautomer(const RWMol *mol, const CleanupParameters ¶ms) {
|
||||
PRECONDITION(mol, "bad molecule");
|
||||
std::unique_ptr<TautomerEnumerator> te{tautomerEnumeratorFromParams(params)};
|
||||
@@ -292,5 +363,5 @@ ROMol *disconnectOrganometallics(
|
||||
return md.disconnect(mol);
|
||||
}
|
||||
|
||||
} // end of namespace MolStandardize
|
||||
} // namespace MolStandardize
|
||||
} // namespace RDKit
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
//
|
||||
// Copyright (C) 2018-2021 Susan H. Leung and other RDKit contributors
|
||||
// Copyright (C) 2018-2023 Susan H. Leung and other RDKit contributors
|
||||
//
|
||||
// @@ All Rights Reserved @@
|
||||
// This file is part of the RDKit.
|
||||
@@ -106,6 +106,10 @@ inline RWMol *cleanup(const RWMol &mol, const CleanupParameters ¶ms =
|
||||
//! Works the same as cleanup(mol)
|
||||
RDKIT_MOLSTANDARDIZE_EXPORT void cleanupInPlace(
|
||||
RWMol &mol, const CleanupParameters ¶ms = defaultCleanupParameters);
|
||||
//! Operates on multiple molecules
|
||||
RDKIT_MOLSTANDARDIZE_EXPORT void cleanupInPlace(
|
||||
std::vector<RWMol *> &mols, int numThreads = 1,
|
||||
const CleanupParameters ¶ms = defaultCleanupParameters);
|
||||
|
||||
//! Works the same as Normalizer().normalize(mol)
|
||||
RDKIT_MOLSTANDARDIZE_EXPORT RWMol *normalize(
|
||||
@@ -114,6 +118,10 @@ RDKIT_MOLSTANDARDIZE_EXPORT RWMol *normalize(
|
||||
//! Works the same as Normalizer().normalizeInPlace(mol)
|
||||
RDKIT_MOLSTANDARDIZE_EXPORT void normalizeInPlace(
|
||||
RWMol &mol, const CleanupParameters ¶ms = defaultCleanupParameters);
|
||||
//! Operates on multiple molecules
|
||||
RDKIT_MOLSTANDARDIZE_EXPORT void normalizeInPlace(
|
||||
std::vector<RWMol *> &mols, int numThreads = 1,
|
||||
const CleanupParameters ¶ms = defaultCleanupParameters);
|
||||
|
||||
//! Works the same as Reionizer().reionize(mol)
|
||||
RDKIT_MOLSTANDARDIZE_EXPORT RWMol *reionize(
|
||||
@@ -122,6 +130,10 @@ RDKIT_MOLSTANDARDIZE_EXPORT RWMol *reionize(
|
||||
//! Works the same as Reionizer().reionizeInPlace(mol)
|
||||
RDKIT_MOLSTANDARDIZE_EXPORT void reionizeInPlace(
|
||||
RWMol &mol, const CleanupParameters ¶ms = defaultCleanupParameters);
|
||||
//! Operates on multiple molecules
|
||||
RDKIT_MOLSTANDARDIZE_EXPORT void reionizeInPlace(
|
||||
std::vector<RWMol *> &mols, int numThreads = 1,
|
||||
const CleanupParameters ¶ms = defaultCleanupParameters);
|
||||
|
||||
//! Works the same as FragmentRemover().remove(mol)
|
||||
RDKIT_MOLSTANDARDIZE_EXPORT RWMol *removeFragments(
|
||||
@@ -130,6 +142,10 @@ RDKIT_MOLSTANDARDIZE_EXPORT RWMol *removeFragments(
|
||||
//! Works the same as FragmentRemover().removeInPlace(mol)
|
||||
RDKIT_MOLSTANDARDIZE_EXPORT void removeFragmentsInPlace(
|
||||
RWMol &mol, const CleanupParameters ¶ms = defaultCleanupParameters);
|
||||
//! Operates on multiple molecules
|
||||
RDKIT_MOLSTANDARDIZE_EXPORT void removeFragmentsInPlace(
|
||||
std::vector<RWMol *> &mols, int numThreads = 1,
|
||||
const CleanupParameters ¶ms = defaultCleanupParameters);
|
||||
|
||||
//! Works the same as TautomerEnumerator().canonicalize(mol)
|
||||
RDKIT_MOLSTANDARDIZE_EXPORT RWMol *canonicalTautomer(
|
||||
|
||||
@@ -50,6 +50,8 @@ Normalizer::Normalizer() {
|
||||
param_filename_flyweight(defaultCleanupParameters.normalizations).get());
|
||||
this->d_tcat = new TransformCatalog(tparams);
|
||||
this->MAX_RESTARTS = 200;
|
||||
|
||||
this->d_tcat->getCatalogParams()->initializeTransforms();
|
||||
}
|
||||
|
||||
// overloaded constructor
|
||||
@@ -60,6 +62,8 @@ Normalizer::Normalizer(const std::string normalizeFile,
|
||||
&(param_filename_flyweight(normalizeFile).get());
|
||||
this->d_tcat = new TransformCatalog(tparams);
|
||||
this->MAX_RESTARTS = maxRestarts;
|
||||
|
||||
this->d_tcat->getCatalogParams()->initializeTransforms();
|
||||
}
|
||||
|
||||
// overloaded constructor
|
||||
@@ -69,6 +73,8 @@ Normalizer::Normalizer(std::istream &normalizeStream,
|
||||
TransformCatalogParams tparams(normalizeStream);
|
||||
this->d_tcat = new TransformCatalog(&tparams);
|
||||
this->MAX_RESTARTS = maxRestarts;
|
||||
|
||||
this->d_tcat->getCatalogParams()->initializeTransforms();
|
||||
}
|
||||
|
||||
// overloaded constructor
|
||||
@@ -80,6 +86,8 @@ Normalizer::Normalizer(
|
||||
&(param_data_flyweight(normalizations).get());
|
||||
this->d_tcat = new TransformCatalog(tparams);
|
||||
this->MAX_RESTARTS = maxRestarts;
|
||||
|
||||
this->d_tcat->getCatalogParams()->initializeTransforms();
|
||||
}
|
||||
|
||||
// destructor
|
||||
@@ -98,12 +106,9 @@ void Normalizer::normalizeInPlace(RWMol &mol) {
|
||||
const std::vector<std::shared_ptr<ChemicalReaction>> &transforms =
|
||||
tparams->getTransformations();
|
||||
|
||||
// initialize the transforms and make sure that they are compatible with the
|
||||
// make the transforms are compatible with the
|
||||
// restrictions on in-place reactions
|
||||
for (auto &transform : transforms) {
|
||||
if (!transform->isInitialized()) {
|
||||
transform->initReactantMatchers();
|
||||
}
|
||||
if (transform->getNumProductTemplates() != 1 ||
|
||||
transform->getNumReactantTemplates() != 1 ||
|
||||
transform->getProducts()[0]->getNumAtoms() >
|
||||
@@ -226,9 +231,6 @@ SmilesMolPair Normalizer::applyTransform(const ROMOL_SPTR &mol,
|
||||
|
||||
SmilesMolPair smilesMolPair{std::string(), mol};
|
||||
|
||||
if (!transform.isInitialized()) {
|
||||
transform.initReactantMatchers();
|
||||
}
|
||||
// REVIEW: what's the source of the 20 in the next line?
|
||||
for (unsigned int i = 0; i < 20; ++i) {
|
||||
std::map<std::string, ROMOL_SPTR> pdts;
|
||||
|
||||
@@ -53,8 +53,8 @@ TransformCatalogParams::TransformCatalogParams(
|
||||
|
||||
TransformCatalogParams::~TransformCatalogParams() {}
|
||||
|
||||
const std::vector<std::shared_ptr<ChemicalReaction>>
|
||||
&TransformCatalogParams::getTransformations() const {
|
||||
const std::vector<std::shared_ptr<ChemicalReaction>> &
|
||||
TransformCatalogParams::getTransformations() const {
|
||||
return d_transformations;
|
||||
}
|
||||
|
||||
@@ -65,6 +65,15 @@ const ChemicalReaction *TransformCatalogParams::getTransformation(
|
||||
return d_transformations[fid].get();
|
||||
}
|
||||
|
||||
void TransformCatalogParams::initializeTransforms() const {
|
||||
for (auto &transform : d_transformations) {
|
||||
if (!transform || transform->isInitialized()) {
|
||||
continue;
|
||||
}
|
||||
transform->initReactantMatchers();
|
||||
}
|
||||
}
|
||||
|
||||
void TransformCatalogParams::toStream(std::ostream &ss) const {
|
||||
ss << d_transformations.size() << "\n";
|
||||
}
|
||||
|
||||
@@ -55,6 +55,10 @@ class RDKIT_MOLSTANDARDIZE_EXPORT TransformCatalogParams
|
||||
void initFromStream(std::istream &ss) override;
|
||||
void initFromString(const std::string &text) override;
|
||||
|
||||
// it's a bit dirty to make this "const", but it's weakly defensible
|
||||
// since the set of transformations themselves will not be changed.
|
||||
void initializeTransforms() const;
|
||||
|
||||
private:
|
||||
std::vector<std::shared_ptr<ChemicalReaction>> d_transformations;
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
//
|
||||
// Copyright (C) 2018 Susan H. Leung
|
||||
// Copyright (C) 2018-2023 Susan H. Leung and other RDKit contributors
|
||||
//
|
||||
// @@ All Rights Reserved @@
|
||||
// This file is part of the RDKit.
|
||||
@@ -73,19 +73,91 @@ void inPlaceHelper(RDKit::ROMol *mol, python::object params, FUNCTYPE func) {
|
||||
}
|
||||
|
||||
void cleanupInPlaceHelper(RDKit::ROMol *mol, python::object params) {
|
||||
inPlaceHelper(mol, params, RDKit::MolStandardize::cleanupInPlace);
|
||||
inPlaceHelper(
|
||||
mol, params,
|
||||
static_cast<void (*)(RDKit::RWMol &,
|
||||
const RDKit::MolStandardize::CleanupParameters &)>(
|
||||
RDKit::MolStandardize::cleanupInPlace));
|
||||
}
|
||||
|
||||
void normalizeInPlaceHelper(RDKit::ROMol *mol, python::object params) {
|
||||
inPlaceHelper(mol, params, RDKit::MolStandardize::normalizeInPlace);
|
||||
inPlaceHelper(
|
||||
mol, params,
|
||||
static_cast<void (*)(RDKit::RWMol &,
|
||||
const RDKit::MolStandardize::CleanupParameters &)>(
|
||||
RDKit::MolStandardize::normalizeInPlace));
|
||||
}
|
||||
|
||||
void reionizeInPlaceHelper(RDKit::ROMol *mol, python::object params) {
|
||||
inPlaceHelper(mol, params, RDKit::MolStandardize::reionizeInPlace);
|
||||
inPlaceHelper(
|
||||
mol, params,
|
||||
static_cast<void (*)(RDKit::RWMol &,
|
||||
const RDKit::MolStandardize::CleanupParameters &)>(
|
||||
RDKit::MolStandardize::reionizeInPlace));
|
||||
}
|
||||
|
||||
void removeFragmentsInPlaceHelper(RDKit::ROMol *mol, python::object params) {
|
||||
inPlaceHelper(mol, params, RDKit::MolStandardize::removeFragmentsInPlace);
|
||||
inPlaceHelper(
|
||||
mol, params,
|
||||
static_cast<void (*)(RDKit::RWMol &,
|
||||
const RDKit::MolStandardize::CleanupParameters &)>(
|
||||
RDKit::MolStandardize::removeFragmentsInPlace));
|
||||
}
|
||||
|
||||
template <typename FUNCTYPE>
|
||||
void mtinPlaceHelper(python::object pymols, int numThreads,
|
||||
python::object params, FUNCTYPE func) {
|
||||
const RDKit::MolStandardize::CleanupParameters *ps =
|
||||
&RDKit::MolStandardize::defaultCleanupParameters;
|
||||
if (params) {
|
||||
ps = python::extract<RDKit::MolStandardize::CleanupParameters *>(params);
|
||||
}
|
||||
unsigned int nmols = python::extract<unsigned int>(pymols.attr("__len__")());
|
||||
std::vector<RDKit::RWMol *> mols(nmols);
|
||||
for (auto i = 0u; i < nmols; ++i) {
|
||||
RDKit::RWMol *mol = static_cast<RDKit::RWMol *>(
|
||||
python::extract<RDKit::ROMol *>(pymols[i])());
|
||||
mols[i] = mol;
|
||||
}
|
||||
{
|
||||
NOGIL gil;
|
||||
func(mols, numThreads, *ps);
|
||||
}
|
||||
}
|
||||
void mtcleanupInPlaceHelper(python::object mols, int numThreads,
|
||||
python::object params) {
|
||||
mtinPlaceHelper(
|
||||
mols, numThreads, params,
|
||||
static_cast<void (*)(std::vector<RDKit::RWMol *> &, int,
|
||||
const RDKit::MolStandardize::CleanupParameters &)>(
|
||||
RDKit::MolStandardize::cleanupInPlace));
|
||||
}
|
||||
|
||||
void mtnormalizeInPlaceHelper(python::object mols, int numThreads,
|
||||
python::object params) {
|
||||
mtinPlaceHelper(
|
||||
mols, numThreads, params,
|
||||
static_cast<void (*)(std::vector<RDKit::RWMol *> &, int,
|
||||
const RDKit::MolStandardize::CleanupParameters &)>(
|
||||
RDKit::MolStandardize::normalizeInPlace));
|
||||
}
|
||||
|
||||
void mtreionizeInPlaceHelper(python::object mols, int numThreads,
|
||||
python::object params) {
|
||||
mtinPlaceHelper(
|
||||
mols, numThreads, params,
|
||||
static_cast<void (*)(std::vector<RDKit::RWMol *> &, int,
|
||||
const RDKit::MolStandardize::CleanupParameters &)>(
|
||||
RDKit::MolStandardize::reionizeInPlace));
|
||||
}
|
||||
|
||||
void mtremoveFragmentsInPlaceHelper(python::object mols, int numThreads,
|
||||
python::object params) {
|
||||
mtinPlaceHelper(
|
||||
mols, numThreads, params,
|
||||
static_cast<void (*)(std::vector<RDKit::RWMol *> &, int,
|
||||
const RDKit::MolStandardize::CleanupParameters &)>(
|
||||
RDKit::MolStandardize::removeFragmentsInPlace));
|
||||
}
|
||||
|
||||
template <typename FUNCTYPE>
|
||||
@@ -257,6 +329,11 @@ BOOST_PYTHON_MODULE(rdMolStandardize) {
|
||||
python::def("CleanupInPlace", cleanupInPlaceHelper,
|
||||
(python::arg("mol"), python::arg("params") = python::object()),
|
||||
docString.c_str());
|
||||
docString = "Standardizes multiple molecules in place";
|
||||
python::def("CleanupInPlace", mtcleanupInPlaceHelper,
|
||||
(python::arg("mols"), python::arg("numThreads"),
|
||||
python::arg("params") = python::object()),
|
||||
docString.c_str());
|
||||
docString = "Convenience function for standardizing a SMILES";
|
||||
python::def("StandardizeSmiles", RDKit::MolStandardize::standardizeSmiles,
|
||||
(python::arg("smiles")), docString.c_str());
|
||||
@@ -313,6 +390,11 @@ BOOST_PYTHON_MODULE(rdMolStandardize) {
|
||||
python::def("NormalizeInPlace", normalizeInPlaceHelper,
|
||||
(python::arg("mol"), python::arg("params") = python::object()),
|
||||
docString.c_str());
|
||||
docString = "Normalizes multiple molecules in place";
|
||||
python::def("NormalizeInPlace", mtnormalizeInPlaceHelper,
|
||||
(python::arg("mols"), python::arg("numThreads"),
|
||||
python::arg("params") = python::object()),
|
||||
docString.c_str());
|
||||
docString = "Ensures the strongest acid groups are charged first";
|
||||
python::def("Reionize", reionizeHelper,
|
||||
(python::arg("mol"), python::arg("params") = python::object()),
|
||||
@@ -323,6 +405,11 @@ BOOST_PYTHON_MODULE(rdMolStandardize) {
|
||||
python::def("ReionizeInPlace", reionizeInPlaceHelper,
|
||||
(python::arg("mol"), python::arg("params") = python::object()),
|
||||
docString.c_str());
|
||||
docString = "Reionizes multiple molecules in place";
|
||||
python::def("ReionizeInPlace", mtreionizeInPlaceHelper,
|
||||
(python::arg("mols"), python::arg("numThreads"),
|
||||
python::arg("params") = python::object()),
|
||||
docString.c_str());
|
||||
docString = "Removes fragments from the molecule";
|
||||
python::def("RemoveFragments", removeFragsHelper,
|
||||
(python::arg("mol"), python::arg("params") = python::object()),
|
||||
@@ -333,6 +420,11 @@ BOOST_PYTHON_MODULE(rdMolStandardize) {
|
||||
python::def("RemoveFragmentsInPlace", removeFragmentsInPlaceHelper,
|
||||
(python::arg("mol"), python::arg("params") = python::object()),
|
||||
docString.c_str());
|
||||
docString = "Removes fragments from multiple molecules in place";
|
||||
python::def("RemoveFragmentsInPlace", mtremoveFragmentsInPlaceHelper,
|
||||
(python::arg("mols"), python::arg("numThreads"),
|
||||
python::arg("params") = python::object()),
|
||||
docString.c_str());
|
||||
docString = "Returns the canonical tautomer for the molecule";
|
||||
python::def("CanonicalTautomer", canonicalTautomerHelper,
|
||||
(python::arg("mol"), python::arg("params") = python::object()),
|
||||
|
||||
@@ -1009,6 +1009,53 @@ chlorine [Cl]
|
||||
rdMolStandardize.NormalizeInPlace(m)
|
||||
self.assertEqual(Chem.MolToSmiles(m), "CN(C)C=CC=O")
|
||||
|
||||
def test23CleanupInPlaceMT(self):
|
||||
ind = (("O=N(=O)-C(O[Fe])C(C(=O)O)C-N(=O)=O",
|
||||
"O=C([O-])C(C[N+](=O)[O-])C(O)[N+](=O)[O-].[Fe+]"),
|
||||
("O=N(=O)-CC(O[Fe])C(C(=O)O)C-N(=O)=O",
|
||||
"O=C([O-])C(C[N+](=O)[O-])C(O)C[N+](=O)[O-].[Fe+]"),
|
||||
("O=N(=O)-CCC(O[Fe])C(C(=O)O)C-N(=O)=O",
|
||||
"O=C([O-])C(C[N+](=O)[O-])C(O)CC[N+](=O)[O-].[Fe+]"))
|
||||
for i in range(4):
|
||||
ind = ind + ind
|
||||
ms = [Chem.MolFromSmiles(x) for x,y in ind]
|
||||
rdMolStandardize.CleanupInPlace(ms,4)
|
||||
self.assertEqual([Chem.MolToSmiles(m) for m in ms],
|
||||
[y for x,y in ind])
|
||||
|
||||
def test24NormalizeInPlaceMT(self):
|
||||
ind = (("O=N(=O)-CC-N(=O)=O", "O=[N+]([O-])CC[N+](=O)[O-]"),
|
||||
("O=N(=O)-CCC-N(=O)=O", "O=[N+]([O-])CCC[N+](=O)[O-]"),
|
||||
("O=N(=O)-CCCC-N(=O)=O", "O=[N+]([O-])CCCC[N+](=O)[O-]"))
|
||||
for i in range(4):
|
||||
ind = ind + ind
|
||||
ms = [Chem.MolFromSmiles(x) for x,y in ind]
|
||||
rdMolStandardize.NormalizeInPlace(ms,4)
|
||||
self.assertEqual([Chem.MolToSmiles(m) for m in ms],
|
||||
[y for x,y in ind])
|
||||
|
||||
def test25ReionizeInPlaceMT(self):
|
||||
ind = (("c1cc([O-])cc(C(=O)O)c1", "O=C([O-])c1cccc(O)c1"),
|
||||
("c1cc(C[O-])cc(C(=O)O)c1", "O=C([O-])c1cccc(CO)c1"),
|
||||
("c1cc(CC[O-])cc(C(=O)O)c1", "O=C([O-])c1cccc(CCO)c1"))
|
||||
for i in range(4):
|
||||
ind = ind + ind
|
||||
ms = [Chem.MolFromSmiles(x) for x,y in ind]
|
||||
rdMolStandardize.ReionizeInPlace(ms,4)
|
||||
self.assertEqual([Chem.MolToSmiles(m) for m in ms],
|
||||
[y for x,y in ind])
|
||||
|
||||
def test26RemoveFragmentsInPlaceMT(self):
|
||||
ind = (("CCCC.Cl.[Na]", "CCCC"),
|
||||
("CCCCO.Cl.[Na]", "CCCCO"),
|
||||
("CCOC.Cl.[Na]", "CCOC"))
|
||||
for i in range(4):
|
||||
ind = ind + ind
|
||||
ms = [Chem.MolFromSmiles(x) for x,y in ind]
|
||||
rdMolStandardize.RemoveFragmentsInPlace(ms,4)
|
||||
self.assertEqual([Chem.MolToSmiles(m) for m in ms],
|
||||
[y for x,y in ind])
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -1114,3 +1114,172 @@ TEST_CASE("in place operations") {
|
||||
TEST_ASSERT(MolToSmiles(*m) == "[CH2-]c1ccccc1.[K+]");
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("cleanup with multiple mols") {
|
||||
SmilesParserParams ps;
|
||||
ps.sanitize = false;
|
||||
// silly ugly examples which ensures disconnection, normalization, and
|
||||
// reionization
|
||||
std::vector<std::pair<std::string, std::string>> data = {
|
||||
{"O=N(=O)-C(O[Fe])C(C(=O)O)C-N(=O)=O",
|
||||
"O=C([O-])C(C[N+](=O)[O-])C(O)[N+](=O)[O-].[Fe+]"},
|
||||
{"O=N(=O)-CC(O[Fe])C(C(=O)O)C-N(=O)=O",
|
||||
"O=C([O-])C(C[N+](=O)[O-])C(O)C[N+](=O)[O-].[Fe+]"},
|
||||
{"O=N(=O)-CCC(O[Fe])C(C(=O)O)C-N(=O)=O",
|
||||
"O=C([O-])C(C[N+](=O)[O-])C(O)CC[N+](=O)[O-].[Fe+]"},
|
||||
};
|
||||
// bulk that up a bit
|
||||
for (auto iter = 0u; iter < 8; ++iter) {
|
||||
auto sz = data.size();
|
||||
for (auto i = 0u; i < sz; ++i) {
|
||||
data.push_back(data[i]);
|
||||
}
|
||||
}
|
||||
std::vector<std::unique_ptr<RWMol>> mols;
|
||||
std::vector<RWMol *> molPtrs;
|
||||
for (const auto &pr : data) {
|
||||
mols.emplace_back(SmilesToMol(pr.first, ps));
|
||||
REQUIRE(mols.back());
|
||||
molPtrs.push_back(mols.back().get());
|
||||
}
|
||||
SECTION("basics") {
|
||||
MolStandardize::cleanupInPlace(molPtrs);
|
||||
for (auto i = 0u; i < mols.size(); ++i) {
|
||||
REQUIRE(mols[i]);
|
||||
CHECK(MolToSmiles(*mols[i]) == data[i].second);
|
||||
}
|
||||
}
|
||||
#ifdef RDK_BUILD_THREADSAFE_SSS
|
||||
SECTION("multithreaded") {
|
||||
int numThreads = 4;
|
||||
MolStandardize::cleanupInPlace(molPtrs, numThreads);
|
||||
for (auto i = 0u; i < mols.size(); ++i) {
|
||||
REQUIRE(mols[i]);
|
||||
CHECK(MolToSmiles(*mols[i]) == data[i].second);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
TEST_CASE("normalize with multiple mols") {
|
||||
SmilesParserParams ps;
|
||||
ps.sanitize = false;
|
||||
std::vector<std::pair<std::string, std::string>> data = {
|
||||
{"O=N(=O)-CC-N(=O)=O", "O=[N+]([O-])CC[N+](=O)[O-]"},
|
||||
{"O=N(=O)-CCC-N(=O)=O", "O=[N+]([O-])CCC[N+](=O)[O-]"},
|
||||
{"O=N(=O)-CCCC-N(=O)=O", "O=[N+]([O-])CCCC[N+](=O)[O-]"},
|
||||
};
|
||||
// bulk that up a bit
|
||||
for (auto iter = 0u; iter < 8; ++iter) {
|
||||
auto sz = data.size();
|
||||
for (auto i = 0u; i < sz; ++i) {
|
||||
data.push_back(data[i]);
|
||||
}
|
||||
}
|
||||
std::vector<std::unique_ptr<RWMol>> mols;
|
||||
std::vector<RWMol *> molPtrs;
|
||||
for (const auto &pr : data) {
|
||||
mols.emplace_back(SmilesToMol(pr.first, ps));
|
||||
REQUIRE(mols.back());
|
||||
molPtrs.push_back(mols.back().get());
|
||||
}
|
||||
SECTION("basics") {
|
||||
MolStandardize::normalizeInPlace(molPtrs);
|
||||
for (auto i = 0u; i < mols.size(); ++i) {
|
||||
REQUIRE(mols[i]);
|
||||
CHECK(MolToSmiles(*mols[i]) == data[i].second);
|
||||
}
|
||||
}
|
||||
#ifdef RDK_BUILD_THREADSAFE_SSS
|
||||
SECTION("multithreaded") {
|
||||
int numThreads = 4;
|
||||
MolStandardize::normalizeInPlace(molPtrs, numThreads);
|
||||
for (auto i = 0u; i < mols.size(); ++i) {
|
||||
REQUIRE(mols[i]);
|
||||
CHECK(MolToSmiles(*mols[i]) == data[i].second);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
TEST_CASE("Reionize with multiple mols") {
|
||||
SmilesParserParams ps;
|
||||
ps.sanitize = false;
|
||||
std::vector<std::pair<std::string, std::string>> data = {
|
||||
{"c1cc([O-])cc(C(=O)O)c1", "O=C([O-])c1cccc(O)c1"},
|
||||
{"c1cc(C[O-])cc(C(=O)O)c1", "O=C([O-])c1cccc(CO)c1"},
|
||||
{"c1cc(CC[O-])cc(C(=O)O)c1", "O=C([O-])c1cccc(CCO)c1"},
|
||||
};
|
||||
// bulk that up a bit
|
||||
for (auto iter = 0u; iter < 8; ++iter) {
|
||||
auto sz = data.size();
|
||||
for (auto i = 0u; i < sz; ++i) {
|
||||
data.push_back(data[i]);
|
||||
}
|
||||
}
|
||||
std::vector<std::unique_ptr<RWMol>> mols;
|
||||
std::vector<RWMol *> molPtrs;
|
||||
for (const auto &pr : data) {
|
||||
mols.emplace_back(SmilesToMol(pr.first, ps));
|
||||
REQUIRE(mols.back());
|
||||
molPtrs.push_back(mols.back().get());
|
||||
}
|
||||
SECTION("basics") {
|
||||
MolStandardize::reionizeInPlace(molPtrs);
|
||||
for (auto i = 0u; i < mols.size(); ++i) {
|
||||
REQUIRE(mols[i]);
|
||||
CHECK(MolToSmiles(*mols[i]) == data[i].second);
|
||||
}
|
||||
}
|
||||
#ifdef RDK_BUILD_THREADSAFE_SSS
|
||||
SECTION("multithreaded") {
|
||||
int numThreads = 4;
|
||||
MolStandardize::reionizeInPlace(molPtrs, numThreads);
|
||||
for (auto i = 0u; i < mols.size(); ++i) {
|
||||
REQUIRE(mols[i]);
|
||||
CHECK(MolToSmiles(*mols[i]) == data[i].second);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
TEST_CASE("RemoveFragments with multiple mols") {
|
||||
SmilesParserParams ps;
|
||||
ps.sanitize = false;
|
||||
std::vector<std::pair<std::string, std::string>> data = {
|
||||
{"CCCC.Cl.[Na]", "CCCC"},
|
||||
{"CCCCO.Cl.[Na]", "CCCCO"},
|
||||
{"CCOC.Cl.[Na]", "CCOC"},
|
||||
};
|
||||
// bulk that up a bit
|
||||
for (auto iter = 0u; iter < 8; ++iter) {
|
||||
auto sz = data.size();
|
||||
for (auto i = 0u; i < sz; ++i) {
|
||||
data.push_back(data[i]);
|
||||
}
|
||||
}
|
||||
std::vector<std::unique_ptr<RWMol>> mols;
|
||||
std::vector<RWMol *> molPtrs;
|
||||
for (const auto &pr : data) {
|
||||
mols.emplace_back(SmilesToMol(pr.first, ps));
|
||||
REQUIRE(mols.back());
|
||||
molPtrs.push_back(mols.back().get());
|
||||
}
|
||||
SECTION("basics") {
|
||||
MolStandardize::removeFragmentsInPlace(molPtrs);
|
||||
for (auto i = 0u; i < mols.size(); ++i) {
|
||||
REQUIRE(mols[i]);
|
||||
CHECK(MolToSmiles(*mols[i]) == data[i].second);
|
||||
}
|
||||
}
|
||||
#ifdef RDK_BUILD_THREADSAFE_SSS
|
||||
SECTION("multithreaded") {
|
||||
int numThreads = 4;
|
||||
MolStandardize::removeFragmentsInPlace(molPtrs, numThreads);
|
||||
for (auto i = 0u; i < mols.size(); ++i) {
|
||||
REQUIRE(mols[i]);
|
||||
CHECK(MolToSmiles(*mols[i]) == data[i].second);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
@@ -69,4 +69,4 @@ TEST_CASE("RDProps move semantics") {
|
||||
CHECK(!d1.hasProp("foo"s));
|
||||
CHECK(!d1.hasProp("bar"s));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user