Add an in place version of most of the MolStandardize functionality (#6491)

* reionizer and uncharger and normalizer can now operate in place

* add removeUnmatchedAtoms argument to in-place version of runReactant

When set to false atoms which are not explicitly removed by the reaction are preserved

* Fix a case where transforms were incorrectly updating atomic numbers

* add more inplace operations to MolStandardize

* support those in the Python layer

* support inplace for the rest of the python wrappers

* move a few more functions over to the inplace code
This commit is contained in:
Greg Landrum
2023-07-21 08:44:41 +02:00
committed by GitHub
parent 12e23a62d2
commit ac54eb3209
23 changed files with 506 additions and 122 deletions

View File

@@ -57,12 +57,13 @@ std::vector<MOL_SPTR_VECT> ChemicalReaction::runReactant(
return run_Reactant(*this, reactant, reactionTemplateIdx);
}
bool ChemicalReaction::runReactant(RWMol &reactant) const {
bool ChemicalReaction::runReactant(RWMol &reactant,
bool removeUnmatchedAtoms) const {
if (getReactants().size() != 1 || getProducts().size() != 1) {
throw ChemicalReactionException(
"Only single reactant - single product reactions can be run in place.");
}
return run_Reactant(*this, reactant);
return run_Reactant(*this, reactant, removeUnmatchedAtoms);
}
ChemicalReaction::ChemicalReaction(const std::string &pickle) {

View File

@@ -246,10 +246,12 @@ class RDKIT_CHEMREACTIONS_EXPORT ChemicalReaction : public RDProps {
and where no atoms are added in the product.
\param reactant The single reactant to use
\param removeUnmatchedAtoms toggles whether or not atoms from the reactant
which do not match template atoms are removed.
\return whether or not the reactant was actually modified
*/
bool runReactant(RWMol &reactant) const;
bool runReactant(RWMol &reactant, bool removeUnmatchedAtoms = true) const;
const MOL_SPTR_VECT &getReactants() const {
return this->m_reactantTemplates;

View File

@@ -1733,7 +1733,8 @@ bool updateBondsModifiedByReaction(
} // namespace
// Modifies a single reactant IN PLACE
bool run_Reactant(const ChemicalReaction &rxn, RWMol &reactant) {
bool run_Reactant(const ChemicalReaction &rxn, RWMol &reactant,
bool removeUnmatchedAtoms) {
PRECONDITION(rxn.getNumReactantTemplates() == 1,
"only one reactant supported");
PRECONDITION(rxn.getNumProductTemplates() == 1, "only one product supported");
@@ -1782,14 +1783,18 @@ bool run_Reactant(const ChemicalReaction &rxn, RWMol &reactant) {
const auto &match = reactantMatch[0];
// we now have a match for the reactant, so we can work on it
// start by marking atoms which are in the reactants, but not in the product
// start by marking atoms which are in the reactant template, but not in the
// product template for removal
boost::dynamic_bitset<> atomsToRemove(reactant.getNumAtoms());
// finds atoms in the reactantTemplate which aren't in the productTemplate
ReactionRunnerUtils::identifyAtomsInReactantTemplateNotProductTemplate(
*reactantTemplate, atomsToRemove, reactantProductMap, match);
// identify atoms which should be removed from the molecule
ReactionRunnerUtils::traverseToFindAtomsToRemove(reactant, *reactantTemplate,
atomsToRemove, match);
if (removeUnmatchedAtoms) {
// identify atoms which did not match something in the reactant template but
// which should be removed from the molecule
ReactionRunnerUtils::traverseToFindAtomsToRemove(
reactant, *reactantTemplate, atomsToRemove, match);
}
bool molModified = false;
reactant.beginBatchEdit();

View File

@@ -81,7 +81,8 @@ RDKIT_CHEMREACTIONS_EXPORT std::vector<MOL_SPTR_VECT> run_Reactant(
unsigned int reactantIdx);
RDKIT_CHEMREACTIONS_EXPORT bool run_Reactant(const ChemicalReaction& rxn,
RWMol& reactant);
RWMol& reactant,
bool removeUnmatchedAtoms = true);
//! Reduce the product generated by run_Reactants or run_Reactant to
/// the sidechains that come from the reagents

View File

@@ -150,7 +150,8 @@ PyObject *RunReactant(ChemicalReaction *self, T reactant,
return res;
}
bool RunReactantInPlace(ChemicalReaction *self, ROMol *reactant) {
bool RunReactantInPlace(ChemicalReaction *self, ROMol *reactant,
bool removeUnmatchedAtoms) {
auto react = static_cast<RWMol *>(reactant);
bool res = false;
{
@@ -158,7 +159,7 @@ bool RunReactantInPlace(ChemicalReaction *self, ROMol *reactant) {
if (!self->isInitialized()) {
self->initReactantMatchers();
}
res = self->runReactant(*react);
res = self->runReactant(*react, removeUnmatchedAtoms);
}
return res;
}
@@ -566,7 +567,8 @@ Sample Usage:
RDKit::RunReactant,
"apply the reaction to a single reactant")
.def("RunReactantInPlace", RDKit::RunReactantInPlace,
(python::arg("self"), python::arg("reactant")),
(python::arg("self"), python::arg("reactant"),
python::arg("removeUnmatchedAtoms") = true),
"apply the reaction to a single reactant in place. The reactant "
"itself is modified. This can only be used for single reactant - "
"single product reactions.")

View File

@@ -1031,6 +1031,20 @@ M END
self.assertEqual(Chem.MolToSmiles(reactant), 'CCOC(C)=O')
self.assertFalse(rxn.RunReactantInPlace(reactant))
self.assertEqual(Chem.MolToSmiles(reactant), 'CCOC(C)=O')
rxn = rdChemReactions.ReactionFromSmarts('CC[N:1]>>[N:1]')
self.assertIsNotNone(rxn)
reactant = Chem.MolFromSmiles('CCCN.Cl')
self.assertTrue(rxn.RunReactantInPlace(reactant))
Chem.SanitizeMol(reactant)
self.assertEqual(Chem.MolToSmiles(reactant), 'N')
reactant = Chem.MolFromSmiles('CCCN.Cl')
self.assertTrue(rxn.RunReactantInPlace(reactant,removeUnmatchedAtoms=False))
Chem.SanitizeMol(reactant)
self.assertEqual(Chem.MolToSmiles(reactant), 'C.Cl.N')
def testGithub4651(self):
mol_sulfonylchloride = Chem.MolFromSmiles("Nc1c(CCCSNCC)cc(cc1)S(=O)(=O)Cl")

View File

@@ -979,8 +979,35 @@ TEST_CASE("one-component reactions") {
CHECK_THROWS_AS(rxn->runReactant(*mol), ChemicalReactionException);
}
}
SECTION("toggling removing unmapped atoms") {
auto rxn = "CC[N:1]>>[N:1]"_rxnsmarts;
REQUIRE(rxn);
rxn->initReactantMatchers();
{
auto mol = "CCN.Cl"_smiles;
REQUIRE(mol);
CHECK(rxn->runReactant(*mol));
CHECK(mol->getNumAtoms() == 1);
CHECK(MolToSmiles(*mol) == "N");
}
{
auto mol = "CCN.Cl"_smiles;
REQUIRE(mol);
bool removeUnmatchedAtoms = false;
CHECK(rxn->runReactant(*mol, removeUnmatchedAtoms));
CHECK(mol->getNumAtoms() == 2);
CHECK(MolToSmiles(*mol) == "Cl.N");
}
{ // extra atoms connected to the matching part should not be removed
auto mol = "CCCN.Cl"_smiles;
REQUIRE(mol);
bool removeUnmatchedAtoms = false;
CHECK(rxn->runReactant(*mol, removeUnmatchedAtoms));
CHECK(mol->getNumAtoms() == 3);
CHECK(MolToSmiles(*mol) == "C.Cl.N");
}
}
}
TEST_CASE("Github #4759 Reaction parser fails when CX extensions are present") {
std::string sma = "[C:1]Br.[C:2]O>>[C:2][C:1] |$Aryl;;;;;Aryl$|";
SECTION("SMARTS") {

View File

@@ -96,6 +96,12 @@ Reionizer::~Reionizer() { delete d_abcat; }
// d_css(css) {};
ROMol *Reionizer::reionize(const ROMol &mol) {
auto omol = new RWMol(mol);
this->reionizeInPlace(*omol);
return static_cast<ROMol *>(omol);
}
void Reionizer::reionizeInPlace(RWMol &mol) {
PRECONDITION(this->d_abcat, "");
const AcidBaseCatalogParams *abparams = this->d_abcat->getCatalogParams();
@@ -103,21 +109,20 @@ ROMol *Reionizer::reionize(const ROMol &mol) {
const std::vector<std::pair<ROMOL_SPTR, ROMOL_SPTR>> abpairs =
abparams->getPairs();
auto *omol = new ROMol(mol);
if (omol->needsUpdatePropertyCache()) {
omol->updatePropertyCache(false);
if (mol.needsUpdatePropertyCache()) {
mol.updatePropertyCache(false);
}
int start_charge = MolOps::getFormalCharge(*omol);
int start_charge = MolOps::getFormalCharge(mol);
for (const auto &cc : this->d_ccs) {
std::vector<MatchVectType> res;
ROMOL_SPTR ccmol(SmartsToMol(cc.Smarts));
unsigned int matches = SubstructMatch(*omol, *ccmol, res);
unsigned int matches = SubstructMatch(mol, *ccmol, res);
if (matches) {
for (const auto &match : res) {
for (const auto &pair : match) {
auto idx = pair.second;
Atom *atom = omol->getAtomWithIdx(idx);
Atom *atom = mol.getAtomWithIdx(idx);
BOOST_LOG(rdInfoLog)
<< "Applying charge correction " << cc.Name << " "
<< atom->getSymbol() << " " << cc.Charge << "\n";
@@ -126,7 +131,7 @@ ROMol *Reionizer::reionize(const ROMol &mol) {
}
}
}
int current_charge = MolOps::getFormalCharge(*omol);
int current_charge = MolOps::getFormalCharge(mol);
int charge_diff = current_charge - start_charge;
// std::cout << "Current charge: " << current_charge << std::endl;
// std::cout << "Charge diff: " << charge_diff << std::endl;
@@ -140,7 +145,7 @@ ROMol *Reionizer::reionize(const ROMol &mol) {
// returns the acid strength ranking (ppos)
// and the substruct match (poccur) in a pair
std::shared_ptr<std::pair<unsigned int, std::vector<unsigned int>>> res(
this->strongestProtonated(*omol, abpairs));
this->strongestProtonated(mol, abpairs));
if (res == nullptr) {
break;
} else {
@@ -151,7 +156,7 @@ ROMol *Reionizer::reionize(const ROMol &mol) {
(abpair.first)->getProp(common_properties::_Name, abname);
BOOST_LOG(rdInfoLog) << "Ionizing " << abname
<< " to balance previous charge corrections\n";
Atom *patom = omol->getAtomWithIdx(poccur.back());
Atom *patom = mol.getAtomWithIdx(poccur.back());
patom->setFormalCharge(patom->getFormalCharge() - 1);
if (patom->getNumExplicitHs() > 0) {
@@ -164,15 +169,15 @@ ROMol *Reionizer::reionize(const ROMol &mol) {
}
}
// std::cout << MolToSmiles(*omol) << std::endl;
// std::cout << MolToSmiles(mol) << std::endl;
// std::cout << "Charge diff: " << charge_diff << std::endl;
std::set<std::vector<unsigned int>> already_moved;
while (true) {
std::shared_ptr<std::pair<unsigned int, std::vector<unsigned int>>> sp_res(
this->strongestProtonated(*omol, abpairs));
this->strongestProtonated(mol, abpairs));
std::shared_ptr<std::pair<unsigned int, std::vector<unsigned int>>> wi_res(
this->weakestIonized(*omol, abpairs));
this->weakestIonized(mol, abpairs));
if (sp_res != nullptr && wi_res != nullptr) {
unsigned int ppos = sp_res->first;
unsigned int ipos = wi_res->first;
@@ -206,9 +211,10 @@ ROMol *Reionizer::reionize(const ROMol &mol) {
BOOST_LOG(rdInfoLog) << "Moved proton from " << prot_name << " to "
<< ionized_name << "\n";
// Remove hydrogen from strongest protonated
Atom *patom = omol->getAtomWithIdx(poccur.back());
Atom *patom = mol.getAtomWithIdx(poccur.back());
patom->setFormalCharge(patom->getFormalCharge() - 1);
// If no implicit Hs to autoremove, and at least 1 explicit H to remove,
// If no implicit Hs to autoremove, and at least 1 explicit H to
// remove,
// reduce explicit count by 1
if (patom->getNumImplicitHs() == 0 && patom->getNumExplicitHs() > 0) {
patom->setNumExplicitHs(patom->getNumExplicitHs() - 1);
@@ -217,7 +223,7 @@ ROMol *Reionizer::reionize(const ROMol &mol) {
patom->updatePropertyCache();
// Add hydrogen to weakest ionized
Atom *iatom = omol->getAtomWithIdx(ioccur.back());
Atom *iatom = mol.getAtomWithIdx(ioccur.back());
iatom->setFormalCharge(iatom->getFormalCharge() + 1);
// Increase explicit H count if no implicit, or aromatic N or P,
// or non default valence state
@@ -240,10 +246,6 @@ ROMol *Reionizer::reionize(const ROMol &mol) {
break;
}
} // while loop
// MolOps::sanitizeMol(*static_cast<RWMol *>(omol));
return omol;
}
std::pair<unsigned int, std::vector<unsigned int>>
@@ -340,10 +342,14 @@ bool neutralizeNegIfPossible(Atom *atom) {
}
ROMol *Uncharger::uncharge(const ROMol &mol) {
auto omol = new RWMol(mol);
this->unchargeInPlace(*omol);
return static_cast<ROMol *>(omol);
}
void Uncharger::unchargeInPlace(RWMol &mol) {
BOOST_LOG(rdInfoLog) << "Running Uncharger\n";
auto *omol = new ROMol(mol);
if (omol->needsUpdatePropertyCache()) {
omol->updatePropertyCache(false);
if (mol.needsUpdatePropertyCache()) {
mol.updatePropertyCache(false);
}
std::vector<MatchVectType> p_matches;
std::vector<MatchVectType> q_matches;
@@ -351,25 +357,25 @@ ROMol *Uncharger::uncharge(const ROMol &mol) {
std::vector<MatchVectType> a_matches;
// Get atom ids for matches
SubstructMatch(*omol, *(this->pos_h), p_matches);
SubstructMatch(*omol, *(this->pos_noh), q_matches);
SubstructMatch(mol, *(this->pos_h), p_matches);
SubstructMatch(mol, *(this->pos_noh), q_matches);
unsigned int q_matched = 0;
for (const auto &match : q_matches) {
q_matched += omol->getAtomWithIdx(match[0].second)->getFormalCharge();
q_matched += mol.getAtomWithIdx(match[0].second)->getFormalCharge();
}
unsigned int n_matched = SubstructMatch(*omol, *(this->neg), n_matches);
unsigned int a_matched = SubstructMatch(*omol, *(this->neg_acid), a_matches);
unsigned int n_matched = SubstructMatch(mol, *(this->neg), n_matches);
unsigned int a_matched = SubstructMatch(mol, *(this->neg_acid), a_matches);
// count the total number of negative atoms
unsigned int n_neg = std::count_if(
omol->atoms().begin(), omol->atoms().end(),
mol.atoms().begin(), mol.atoms().end(),
[](const auto atom) { return (atom->getFormalCharge() < 0); });
bool needsNeutralization =
(q_matched > 0 && (n_matched > 0 || a_matched > 0));
std::vector<unsigned int> atomRanks(omol->getNumAtoms());
std::vector<unsigned int> atomRanks(mol.getNumAtoms());
if (df_canonicalOrdering && needsNeutralization) {
Canon::rankMolAtoms(*omol, atomRanks);
Canon::rankMolAtoms(mol, atomRanks);
} else {
std::iota(atomRanks.begin(), atomRanks.end(), 0);
}
@@ -395,18 +401,19 @@ ROMol *Uncharger::uncharge(const ROMol &mol) {
// Surplus negative charges more than non-neutralizable positive charges
int neg_surplus = n_neg - q_matched;
if (neg_surplus > 0 && n_matched) {
boost::dynamic_bitset<> nonAcids(omol->getNumAtoms());
boost::dynamic_bitset<> nonAcids(mol.getNumAtoms());
nonAcids.set();
for (const auto &pair : a_atoms) {
nonAcids.reset(pair.second);
}
// zwitterion with more negative charges than quaternary positive centres
// zwitterion with more negative charges than quaternary positive
// centres
for (const auto &pair : n_atoms) {
unsigned int idx = pair.second;
if (!nonAcids[idx]) {
continue;
}
Atom *atom = omol->getAtomWithIdx(idx);
Atom *atom = mol.getAtomWithIdx(idx);
if (neutralizeNegIfPossible(atom) && !--neg_surplus) {
break;
}
@@ -416,13 +423,13 @@ ROMol *Uncharger::uncharge(const ROMol &mol) {
// now do the other negative groups if we still have charges left:
neg_surplus = a_matched - q_matched;
if (neg_surplus > 0) {
boost::dynamic_bitset<> skipChargeSep(omol->getNumAtoms());
boost::dynamic_bitset<> skipChargeSep(mol.getNumAtoms());
for (const auto &pair : n_atoms) {
unsigned int idx = pair.second;
Atom *atom = omol->getAtomWithIdx(idx);
Atom *atom = mol.getAtomWithIdx(idx);
for (const auto &nbri :
boost::make_iterator_range(omol->getAtomNeighbors(atom))) {
const auto &nbr = (*omol)[nbri];
boost::make_iterator_range(mol.getAtomNeighbors(atom))) {
const auto &nbr = (mol)[nbri];
auto nbrIdx = nbr->getIdx();
// if the neighbor has a positive charge,
// neutralize only once (e.g., NO3-)
@@ -436,12 +443,14 @@ ROMol *Uncharger::uncharge(const ROMol &mol) {
}
}
}
// zwitterion with more negative charges than quaternary positive centres
// zwitterion with more negative charges than quaternary positive
// centres
for (const auto &pair : a_atoms) {
// Add hydrogen to first negative acidic atom, increase formal charge
// Until quaternary positive == negative total or no more negative atoms
// Until quaternary positive == negative total or no more negative
// atoms
unsigned int idx = pair.second;
Atom *atom = omol->getAtomWithIdx(idx);
Atom *atom = mol.getAtomWithIdx(idx);
// skip ahead if we already neutralized this or if it is part of a
// zwitterion
if (atom->getFormalCharge() >= 0 || skipChargeSep.test(idx)) {
@@ -457,14 +466,14 @@ ROMol *Uncharger::uncharge(const ROMol &mol) {
} else {
for (const auto &pair : n_atoms) {
auto idx = pair.second;
Atom *atom = omol->getAtomWithIdx(idx);
Atom *atom = mol.getAtomWithIdx(idx);
neutralizeNegIfPossible(atom);
}
}
// Neutralize cations until there is no longer a net charge remaining:
int netCharge = 0;
for (const auto &at : omol->atoms()) {
for (const auto &at : mol.atoms()) {
netCharge += at->getFormalCharge();
}
@@ -477,7 +486,7 @@ ROMol *Uncharger::uncharge(const ROMol &mol) {
}
}
for (const auto &idx : p_idx_matches) {
Atom *atom = omol->getAtomWithIdx(idx);
Atom *atom = mol.getAtomWithIdx(idx);
// atoms from places like Mol blocks are normally missing explicit Hs:
atom->setNumExplicitHs(atom->getTotalNumHs());
atom->setNoImplicit(true);
@@ -507,7 +516,6 @@ ROMol *Uncharger::uncharge(const ROMol &mol) {
}
}
}
return omol;
} // namespace MolStandardize
} // namespace MolStandardize

View File

@@ -85,6 +85,9 @@ class RDKIT_MOLSTANDARDIZE_EXPORT Reionizer {
//! Enforce charges on certain atoms, then perform competitive reionization.
ROMol *reionize(const ROMol &mol);
//! Enforce charges on certain atoms, then perform competitive reionization,
//! modifies molecule in place
void reionizeInPlace(RWMol &mol);
private:
AcidBaseCatalog *d_abcat;
@@ -131,6 +134,7 @@ class RDKIT_MOLSTANDARDIZE_EXPORT Uncharger {
~Uncharger();
ROMol *uncharge(const ROMol &mol);
void unchargeInPlace(RWMol &mol);
private:
bool df_canonicalOrdering = true;

View File

@@ -76,6 +76,12 @@ FragmentRemover::FragmentRemover(std::istream &fragmentStream, bool leave_last,
FragmentRemover::~FragmentRemover() { delete d_fcat; };
ROMol *FragmentRemover::remove(const ROMol &mol) {
auto molcp = new RWMol(mol);
removeInPlace(*molcp);
return static_cast<ROMol *>(molcp);
}
void FragmentRemover::removeInPlace(RWMol &mol) {
BOOST_LOG(rdInfoLog) << "Running FragmentRemover\n";
PRECONDITION(this->d_fcat, "");
const FragmentCatalogParams *fparams = this->d_fcat->getCatalogParams();
@@ -126,9 +132,14 @@ ROMol *FragmentRemover::remove(const ROMol &mol) {
if (this->SKIP_IF_ALL_MATCH) {
BOOST_LOG(rdInfoLog)
<< "All fragments matched; original molecule returned." << std::endl;
return new ROMol(mol);
} else {
mol.beginBatchEdit();
for (auto i = 0u; i < mol.getNumAtoms(); ++i) {
mol.removeAtom(i);
}
mol.commitBatchEdit();
}
return new ROMol();
return;
}
boost::dynamic_bitset<> atomsToRemove(mol.getNumAtoms());
@@ -141,15 +152,13 @@ ROMol *FragmentRemover::remove(const ROMol &mol) {
}
}
// remove the atoms that need to go
auto *removed = new RWMol(mol);
removed->beginBatchEdit();
mol.beginBatchEdit();
for (unsigned int i = 0; i < mol.getNumAtoms(); ++i) {
if (atomsToRemove[i]) {
removed->removeAtom(i);
mol.removeAtom(i);
}
}
removed->commitBatchEdit();
return static_cast<ROMol *>(removed);
mol.commitBatchEdit();
}
bool isOrganic(const ROMol &frag) {

View File

@@ -44,6 +44,7 @@ class RDKIT_MOLSTANDARDIZE_EXPORT FragmentRemover {
FragmentRemover &operator=(FragmentRemover const &) = delete;
ROMol *remove(const ROMol &mol);
void removeInPlace(RWMol &mol);
private:
// Setting leave_last to True will ensure at least one fragment

View File

@@ -68,6 +68,7 @@ accordingly.
//! overload
/// modifies the molecule in place
void disconnect(RWMol &mol);
void disconnectInPlace(RWMol &mol) { disconnect(mol); };
private:
struct NonMetal {

View File

@@ -122,38 +122,31 @@ void updateCleanupParamsFromJSON(CleanupParameters &params,
}
RWMol *cleanup(const RWMol *mol, const CleanupParameters &params) {
RWMol m(*mol);
MolOps::removeHs(m);
auto nmol = new RWMol(*mol);
cleanupInPlace(*nmol, params);
return nmol;
}
void cleanupInPlace(RWMol &mol, const CleanupParameters &params) {
MolOps::removeHs(mol);
MolStandardize::MetalDisconnector md;
md.disconnect(m);
RWMOL_SPTR normalized(MolStandardize::normalize(&m, params));
RWMol *reionized = MolStandardize::reionize(normalized.get(), params);
md.disconnectInPlace(mol);
MolStandardize::normalizeInPlace(mol, params);
MolStandardize::reionizeInPlace(mol, params);
bool cleanIt = true;
bool force = true;
MolOps::assignStereochemistry(*reionized, cleanIt, force);
// update properties of reionized using m.
reionized->updateProps(m);
return reionized;
MolOps::assignStereochemistry(mol, cleanIt, force);
}
RWMol *tautomerParent(const RWMol &mol, const CleanupParameters &params,
bool skip_standardize) {
const RWMol *cleaned = nullptr;
std::unique_ptr<RWMol> cleanedHolder;
std::unique_ptr<RWMol> res{new RWMol(mol)};
if (!skip_standardize) {
cleanedHolder.reset(cleanup(mol, params));
cleaned = cleanedHolder.get();
} else {
cleaned = &mol;
cleanupInPlace(*res, params);
}
std::unique_ptr<RWMol> ct{canonicalTautomer(cleaned, params)};
return cleanup(ct.get(), params);
std::unique_ptr<RWMol> ct{canonicalTautomer(res.get(), params)};
cleanupInPlace(*ct, params);
return ct.release();
}
// Return the fragment parent of a given molecule.
@@ -161,27 +154,19 @@ RWMol *tautomerParent(const RWMol &mol, const CleanupParameters &params,
//
RWMol *fragmentParent(const RWMol &mol, const CleanupParameters &params,
bool skip_standardize) {
const RWMol *cleaned = nullptr;
std::unique_ptr<RWMol> cleanedHolder;
std::unique_ptr<RWMol> res{new RWMol(mol)};
if (!skip_standardize) {
cleanedHolder.reset(cleanup(mol, params));
cleaned = cleanedHolder.get();
} else {
cleaned = &mol;
cleanupInPlace(*res, params);
}
LargestFragmentChooser lfragchooser(params.preferOrganic);
return static_cast<RWMol *>(lfragchooser.choose(*cleaned));
return static_cast<RWMol *>(lfragchooser.choose(*res));
}
RWMol *stereoParent(const RWMol &mol, const CleanupParameters &params,
bool skip_standardize) {
RWMol *res;
RWMol *res = new RWMol(mol);
if (!skip_standardize) {
res = cleanup(mol, params);
} else {
res = new RWMol(mol);
cleanupInPlace(*res, params);
}
MolOps::removeStereochemistry(*res);
@@ -190,11 +175,9 @@ RWMol *stereoParent(const RWMol &mol, const CleanupParameters &params,
RWMol *isotopeParent(const RWMol &mol, const CleanupParameters &params,
bool skip_standardize) {
RWMol *res;
RWMol *res = new RWMol(mol);
if (!skip_standardize) {
res = cleanup(mol, params);
} else {
res = new RWMol(mol);
cleanupInPlace(*res, params);
}
for (auto atom : res->atoms()) {
@@ -208,15 +191,13 @@ RWMol *chargeParent(const RWMol &mol, const CleanupParameters &params,
// Return the charge parent of a given molecule.
// The charge parent is the uncharged version of the fragment parent.
RWMOL_SPTR fragparent(fragmentParent(mol, params, skip_standardize));
// if fragment...
ROMol nm(*fragparent);
std::unique_ptr<RWMol> fragparent{
fragmentParent(mol, params, skip_standardize)};
Uncharger uncharger(params.doCanonical);
ROMOL_SPTR uncharged(uncharger.uncharge(nm));
RWMol *omol = cleanup(static_cast<RWMol *>(uncharged.get()), params);
return omol;
uncharger.unchargeInPlace(*fragparent);
cleanupInPlace(*fragparent, params);
return fragparent.release();
}
RWMol *superParent(const RWMol &mol, const CleanupParameters &params,
@@ -247,12 +228,27 @@ RWMol *reionize(const RWMol *mol, const CleanupParameters &params) {
return static_cast<RWMol *>(reionizer->reionize(*mol));
}
void normalizeInPlace(RWMol &mol, const CleanupParameters &params) {
std::unique_ptr<Normalizer> normalizer{normalizerFromParams(params)};
normalizer->normalizeInPlace(mol);
}
void reionizeInPlace(RWMol &mol, const CleanupParameters &params) {
std::unique_ptr<Reionizer> reionizer{reionizerFromParams(params)};
reionizer->reionizeInPlace(mol);
}
RWMol *removeFragments(const RWMol *mol, const CleanupParameters &params) {
PRECONDITION(mol, "bad molecule");
std::unique_ptr<FragmentRemover> remover{fragmentRemoverFromParams(params)};
return static_cast<RWMol *>(remover->remove(*mol));
}
void removeFragmentsInPlace(RWMol &mol, const CleanupParameters &params) {
std::unique_ptr<FragmentRemover> remover{fragmentRemoverFromParams(params)};
remover->removeInPlace(mol);
}
RWMol *canonicalTautomer(const RWMol *mol, const CleanupParameters &params) {
PRECONDITION(mol, "bad molecule");
std::unique_ptr<TautomerEnumerator> te{tautomerEnumeratorFromParams(params)};
@@ -260,22 +256,21 @@ RWMol *canonicalTautomer(const RWMol *mol, const CleanupParameters &params) {
}
std::string standardizeSmiles(const std::string &smiles) {
RWMOL_SPTR mol(SmilesToMol(smiles, 0, false));
std::unique_ptr<RWMol> mol{SmilesToMol(smiles, 0, false)};
if (!mol) {
std::string message =
"SMILES Parse Error: syntax error for input: " + smiles;
throw ValueErrorException(message);
}
CleanupParameters params;
RWMOL_SPTR cleaned(cleanup(*mol, params));
return MolToSmiles(*cleaned);
cleanupInPlace(*mol);
return MolToSmiles(*mol);
}
std::vector<std::string> enumerateTautomerSmiles(
const std::string &smiles, const CleanupParameters &params) {
std::unique_ptr<RWMol> mol(SmilesToMol(smiles, 0, false));
mol.reset(cleanup(mol.get(), params));
cleanupInPlace(*mol, params);
MolOps::sanitizeMol(*mol);
TautomerEnumerator te(params);

View File

@@ -103,21 +103,33 @@ inline RWMol *cleanup(const RWMol &mol, const CleanupParameters &params =
defaultCleanupParameters) {
return cleanup(&mol, params);
};
//! Works the same as cleanup(mol)
RDKIT_MOLSTANDARDIZE_EXPORT void cleanupInPlace(
RWMol &mol, const CleanupParameters &params = defaultCleanupParameters);
//! Works the same as Normalizer().normalize(mol)
RDKIT_MOLSTANDARDIZE_EXPORT RWMol *normalize(
const RWMol *mol,
const CleanupParameters &params = defaultCleanupParameters);
//! Works the same as Normalizer().normalizeInPlace(mol)
RDKIT_MOLSTANDARDIZE_EXPORT void normalizeInPlace(
RWMol &mol, const CleanupParameters &params = defaultCleanupParameters);
//! Works the same as Reionizer().reionize(mol)
RDKIT_MOLSTANDARDIZE_EXPORT RWMol *reionize(
const RWMol *mol,
const CleanupParameters &params = defaultCleanupParameters);
//! Works the same as Reionizer().reionizeInPlace(mol)
RDKIT_MOLSTANDARDIZE_EXPORT void reionizeInPlace(
RWMol &mol, const CleanupParameters &params = defaultCleanupParameters);
//! Works the same as FragmentRemover().remove(mol)
RDKIT_MOLSTANDARDIZE_EXPORT RWMol *removeFragments(
const RWMol *mol,
const CleanupParameters &params = defaultCleanupParameters);
//! Works the same as FragmentRemover().removeInPlace(mol)
RDKIT_MOLSTANDARDIZE_EXPORT void removeFragmentsInPlace(
RWMol &mol, const CleanupParameters &params = defaultCleanupParameters);
//! Works the same as TautomerEnumerator().canonicalize(mol)
RDKIT_MOLSTANDARDIZE_EXPORT RWMol *canonicalTautomer(
@@ -188,6 +200,12 @@ RDKIT_MOLSTANDARDIZE_EXPORT void disconnectOrganometallics(
RDKIT_MOLSTANDARDIZE_EXPORT ROMol *disconnectOrganometallics(
const ROMol &mol, RDKit::MolStandardize::MetalDisconnectorOptions mdo = {
true, true, false, true});
//! As above, included for API consistency.
inline void disconnectOrganometallicsInPlace(
RWMol &mol, RDKit::MolStandardize::MetalDisconnectorOptions mdo = {
true, true, false, true}) {
disconnectOrganometallics(mol, mdo);
};
//! TODO
RDKIT_MOLSTANDARDIZE_EXPORT std::vector<std::string> enumerateTautomerSmiles(

View File

@@ -85,6 +85,69 @@ Normalizer::Normalizer(
// destructor
Normalizer::~Normalizer() { delete d_tcat; }
void Normalizer::normalizeInPlace(RWMol &mol) {
BOOST_LOG(rdInfoLog) << "Running Normalizer\n";
PRECONDITION(this->d_tcat, "");
const TransformCatalogParams *tparams = this->d_tcat->getCatalogParams();
PRECONDITION(tparams, "no transform parameters");
if (!mol.getNumAtoms()) {
return;
}
const std::vector<std::shared_ptr<ChemicalReaction>> &transforms =
tparams->getTransformations();
// initialize the transforms and make sure that they are compatible with the
// restrictions on in-place reactions
for (auto &transform : transforms) {
if (!transform->isInitialized()) {
transform->initReactantMatchers();
}
if (transform->getNumProductTemplates() != 1 ||
transform->getNumReactantTemplates() != 1 ||
transform->getProducts()[0]->getNumAtoms() >
transform->getReactants()[0]->getNumAtoms()) {
throw ValueErrorException(
"normalizeInPlace can only be used with transforms which have a single reactant and single product. The number of atoms in the product cannot be larger than the number of atoms in the reactant.");
}
}
// we might want ring info
if (!mol.getRingInfo()->isInitialized()) {
MolOps::symmetrizeSSSR(mol);
}
for (unsigned int i = 0; i < MAX_RESTARTS; ++i) {
bool loop_break = false;
// Iterate through Normalization transforms and apply each in order
for (auto &transform : transforms) {
constexpr bool removeUnmatchedAtoms = false;
if (transform->runReactant(mol, removeUnmatchedAtoms)) {
BOOST_LOG(rdInfoLog)
<< "Rule applied: "
<< transform->getProp<std::string>(common_properties::_Name)
<< "\n";
constexpr unsigned int sanitizeOps = MolOps::SANITIZE_ALL ^
MolOps::SANITIZE_CLEANUP ^
MolOps::SANITIZE_PROPERTIES;
unsigned int failed;
try {
MolOps::sanitizeMol(mol, failed, sanitizeOps);
} catch (MolSanitizeException &) {
BOOST_LOG(rdInfoLog) << "FAILED sanitizeMol.\n";
}
loop_break = true;
break;
}
}
// For loop finishes normally, all applicable transforms have been applied
if (!loop_break) {
return;
}
}
BOOST_LOG(rdInfoLog) << "Gave up normalization after " << MAX_RESTARTS
<< " restarts.\n";
}
ROMol *Normalizer::normalize(const ROMol &mol) {
BOOST_LOG(rdInfoLog) << "Running Normalizer\n";
PRECONDITION(this->d_tcat, "");

View File

@@ -77,6 +77,8 @@ class RDKIT_MOLSTANDARDIZE_EXPORT Normalizer {
*/
ROMol *normalize(const ROMol &mol);
void normalizeInPlace(RWMol &mol);
private:
const TransformCatalog *d_tcat;
unsigned int MAX_RESTARTS;

View File

@@ -25,6 +25,9 @@ ROMol *reionizeHelper(MolStandardize::Reionizer &self, const ROMol &mol) {
return self.reionize(mol);
}
void reionizeInPlaceHelper(MolStandardize::Reionizer &self, ROMol &mol) {
self.reionizeInPlace(static_cast<RWMol &>(mol));
}
MolStandardize::Reionizer *reionizerFromData(const std::string &data,
python::object chargeCorrections) {
std::istringstream sstr(data);
@@ -40,6 +43,10 @@ MolStandardize::Reionizer *reionizerFromData(const std::string &data,
return res;
}
void unchargeInPlaceHelper(MolStandardize::Uncharger &self, ROMol &mol) {
self.unchargeInPlace(static_cast<RWMol &>(mol));
}
} // namespace
struct charge_wrapper {
@@ -65,7 +72,10 @@ struct charge_wrapper {
std::vector<MolStandardize::ChargeCorrection>>())
.def("reionize", &reionizeHelper,
(python::arg("self"), python::arg("mol")), "",
python::return_value_policy<python::manage_new_object>());
python::return_value_policy<python::manage_new_object>())
.def("reionizeInPlace", reionizeInPlaceHelper,
(python::arg("self"), python::arg("mol")),
"modifies the input molecule");
python::def("ReionizerFromData", &reionizerFromData,
(python::arg("paramData"),
@@ -78,7 +88,10 @@ struct charge_wrapper {
python::arg("canonicalOrder") = true)))
.def("uncharge", &MolStandardize::Uncharger::uncharge,
(python::arg("self"), python::arg("mol")), "",
python::return_value_policy<python::manage_new_object>());
python::return_value_policy<python::manage_new_object>())
.def("unchargeInPlace", unchargeInPlaceHelper,
(python::arg("self"), python::arg("mol")),
"modifies the input molecule");
}
};

View File

@@ -21,6 +21,10 @@ ROMol *removeHelper(MolStandardize::FragmentRemover &self, const ROMol &mol) {
return self.remove(mol);
}
void removeInPlaceHelper(MolStandardize::FragmentRemover &self, ROMol &mol) {
self.removeInPlace(static_cast<RWMol &>(mol));
}
ROMol *chooseHelper(MolStandardize::LargestFragmentChooser &self,
const ROMol &mol) {
return self.choose(mol);
@@ -50,7 +54,10 @@ struct fragment_wrapper {
python::arg("leave_last") = true,
python::arg("skip_if_all_match") = false)))
.def("remove", &removeHelper, (python::arg("self"), python::arg("mol")),
"", python::return_value_policy<python::manage_new_object>());
"", python::return_value_policy<python::manage_new_object>())
.def("removeInPlace", &removeInPlaceHelper,
(python::arg("self"), python::arg("mol")),
"modifies the molecule in place");
python::def(
"FragmentRemoverFromData", &removerFromParams,

View File

@@ -44,6 +44,9 @@ class MetalDisconnectorWrap {
RDKit::ROMol *disconnect(const RDKit::ROMol &mol) {
return md_->disconnect(mol);
}
void disconnectInPlace(RDKit::ROMol &mol) {
return md_->disconnectInPlace(static_cast<RDKit::RWMol &>(mol));
}
private:
std::unique_ptr<RDKit::MolStandardize::MetalDisconnector> md_;
@@ -118,8 +121,12 @@ struct metal_wrapper {
"Set the query molecule defining the metals to disconnect if attached"
" to Nitrogen, Oxygen or Fluorine.")
.def("Disconnect", &MetalDisconnectorWrap::disconnect,
(python::arg("self"), python::arg("mol")), docString.c_str(),
python::return_value_policy<python::manage_new_object>());
(python::arg("self"), python::arg("mol")),
"performs the disconnection",
python::return_value_policy<python::manage_new_object>())
.def("DisconnectInPlace", &MetalDisconnectorWrap::disconnectInPlace,
(python::arg("self"), python::arg("mol")),
"performs the disconnection, modifies the input molecule");
}
};

View File

@@ -22,6 +22,10 @@ ROMol *normalizeHelper(MolStandardize::Normalizer &self, const ROMol &mol) {
return self.normalize(mol);
}
void normalizeInPlaceHelper(MolStandardize::Normalizer &self, ROMol &mol) {
self.normalizeInPlace(static_cast<RWMol &>(mol));
}
MolStandardize::Normalizer *normalizerFromDataAndParams(
const std::string &data, const MolStandardize::CleanupParameters &params) {
std::istringstream sstr(data);
@@ -44,7 +48,10 @@ struct normalize_wrapper {
python::args("normalizeFilename", "maxRestarts")))
.def("normalize", &normalizeHelper,
(python::arg("self"), python::arg("mol")), "",
python::return_value_policy<python::manage_new_object>());
python::return_value_policy<python::manage_new_object>())
.def("normalizeInPlace", &normalizeInPlaceHelper,
(python::arg("self"), python::arg("mol")),
"modifies the input molecule");
python::def(
"NormalizerFromData", &normalizerFromDataAndParams,
(python::arg("paramData"), python::arg("params")),

View File

@@ -59,6 +59,35 @@ RDKit::ROMol *canonicalTautomerHelper(const RDKit::ROMol *mol,
return msHelper(mol, params, RDKit::MolStandardize::canonicalTautomer);
}
template <typename FUNCTYPE>
void inPlaceHelper(RDKit::ROMol *mol, python::object params, FUNCTYPE func) {
if (!mol) {
throw_value_error("Molecule is None");
}
const RDKit::MolStandardize::CleanupParameters *ps =
&RDKit::MolStandardize::defaultCleanupParameters;
if (params) {
ps = python::extract<RDKit::MolStandardize::CleanupParameters *>(params);
}
func(*static_cast<RDKit::RWMol *>(mol), *ps);
}
void cleanupInPlaceHelper(RDKit::ROMol *mol, python::object params) {
inPlaceHelper(mol, params, RDKit::MolStandardize::cleanupInPlace);
}
void normalizeInPlaceHelper(RDKit::ROMol *mol, python::object params) {
inPlaceHelper(mol, params, RDKit::MolStandardize::normalizeInPlace);
}
void reionizeInPlaceHelper(RDKit::ROMol *mol, python::object params) {
inPlaceHelper(mol, params, RDKit::MolStandardize::reionizeInPlace);
}
void removeFragmentsInPlaceHelper(RDKit::ROMol *mol, python::object params) {
inPlaceHelper(mol, params, RDKit::MolStandardize::removeFragmentsInPlace);
}
template <typename FUNCTYPE>
RDKit::ROMol *parentHelper(const RDKit::ROMol *mol, python::object params,
bool skip_standardize, FUNCTYPE func) {
@@ -118,6 +147,19 @@ RDKit::ROMol *disconnectOrganometallicsHelper(RDKit::ROMol &mol,
return RDKit::MolStandardize::disconnectOrganometallics(mol);
}
}
void disconnectOrganometallicsInPlaceHelper(RDKit::ROMol *mol,
python::object params) {
if (params) {
RDKit::MolStandardize::MetalDisconnectorOptions *mdo =
python::extract<RDKit::MolStandardize::MetalDisconnectorOptions *>(
params);
return RDKit::MolStandardize::disconnectOrganometallicsInPlace(
*static_cast<RDKit::RWMol *>(mol), *mdo);
} else {
return RDKit::MolStandardize::disconnectOrganometallicsInPlace(
*static_cast<RDKit::RWMol *>(mol));
}
}
} // namespace
@@ -208,6 +250,10 @@ BOOST_PYTHON_MODULE(rdMolStandardize) {
(python::arg("mol"), python::arg("params") = python::object()),
docString.c_str(),
python::return_value_policy<python::manage_new_object>());
docString = "Standardizes a molecule in place";
python::def("CleanupInPlace", cleanupInPlaceHelper,
(python::arg("mol"), python::arg("params") = python::object()),
docString.c_str());
docString = "Convenience function for standardizing a SMILES";
python::def("StandardizeSmiles", RDKit::MolStandardize::standardizeSmiles,
(python::arg("smiles")), docString.c_str());
@@ -258,16 +304,32 @@ BOOST_PYTHON_MODULE(rdMolStandardize) {
(python::arg("mol"), python::arg("params") = python::object()),
docString.c_str(),
python::return_value_policy<python::manage_new_object>());
docString =
"Applies a series of standard transformations to correct functional "
"groups and recombine charges, modifies the input molecule";
python::def("NormalizeInPlace", normalizeInPlaceHelper,
(python::arg("mol"), python::arg("params") = python::object()),
docString.c_str());
docString = "Ensures the strongest acid groups are charged first";
python::def("Reionize", reionizeHelper,
(python::arg("mol"), python::arg("params") = python::object()),
docString.c_str(),
python::return_value_policy<python::manage_new_object>());
docString =
"Ensures the strongest acid groups are charged first, modifies the input molecule";
python::def("ReionizeInPlace", reionizeInPlaceHelper,
(python::arg("mol"), python::arg("params") = python::object()),
docString.c_str());
docString = "Removes fragments from the molecule";
python::def("RemoveFragments", removeFragsHelper,
(python::arg("mol"), python::arg("params") = python::object()),
docString.c_str(),
python::return_value_policy<python::manage_new_object>());
docString =
"Removes fragments from the molecule, modifies the input molecule";
python::def("RemoveFragmentsInPlace", removeFragmentsInPlaceHelper,
(python::arg("mol"), python::arg("params") = python::object()),
docString.c_str());
docString = "Returns the canonical tautomer for the molecule";
python::def("CanonicalTautomer", canonicalTautomerHelper,
(python::arg("mol"), python::arg("params") = python::object()),
@@ -280,6 +342,13 @@ BOOST_PYTHON_MODULE(rdMolStandardize) {
(python::arg("mol"), python::arg("params") = python::object()),
docString.c_str(),
python::return_value_policy<python::manage_new_object>());
docString =
"Disconnects the molecule using the organometallics"
" rules, modifies the input molecule";
python::def("DisconnectOrganometallicsInPlace",
disconnectOrganometallicsInPlaceHelper,
(python::arg("mol"), python::arg("params") = python::object()),
docString.c_str());
wrap_validate();
wrap_charge();

View File

@@ -72,8 +72,11 @@ class TestCase(unittest.TestCase):
mol = Chem.MolFromSmiles("C1(CCCCC1)[Zn]Br")
md = rdMolStandardize.MetalDisconnector()
nm = md.Disconnect(mol)
# Metal.MetalDisconnector.Disconnect(mol)
self.assertEqual(Chem.MolToSmiles(nm), "[Br-].[CH-]1CCCCC1.[Zn+2]")
nm = Chem.Mol(mol)
md.DisconnectInPlace(nm)
self.assertEqual(Chem.MolToSmiles(nm), "[Br-].[CH-]1CCCCC1.[Zn+2]")
# test user defined metal_nof
md.SetMetalNof(
@@ -123,6 +126,11 @@ class TestCase(unittest.TestCase):
nm = reionizer.reionize(mol)
self.assertEqual(Chem.MolToSmiles(nm), "O=S(O)c1ccc(S(=O)(=O)[O-])cc1")
nm = Chem.Mol(mol)
reionizer.reionizeInPlace(nm)
self.assertEqual(Chem.MolToSmiles(nm), "O=S(O)c1ccc(S(=O)(=O)[O-])cc1")
# try reionize with another acid base pair library without the right
# pairs
abfile = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'MolStandardize', 'test_data',
@@ -136,15 +144,26 @@ class TestCase(unittest.TestCase):
mol3 = Chem.MolFromSmiles("O=C([O-])c1ccccc1")
nm3 = uncharger.uncharge(mol3)
self.assertEqual(Chem.MolToSmiles(nm3), "O=C(O)c1ccccc1")
nm3 = Chem.Mol(mol3)
uncharger.unchargeInPlace(nm3)
self.assertEqual(Chem.MolToSmiles(nm3), "O=C(O)c1ccccc1")
# test canonical Uncharger
uncharger = rdMolStandardize.Uncharger(canonicalOrder=False)
mol3 = Chem.MolFromSmiles("C[N+](C)(C)CC(C(=O)[O-])CC(=O)[O-]")
nm3 = uncharger.uncharge(mol3)
self.assertEqual(Chem.MolToSmiles(nm3), "C[N+](C)(C)CC(CC(=O)[O-])C(=O)O")
nm3 = Chem.Mol(mol3)
uncharger.unchargeInPlace(nm3)
self.assertEqual(Chem.MolToSmiles(nm3), "C[N+](C)(C)CC(CC(=O)[O-])C(=O)O")
uncharger = rdMolStandardize.Uncharger(canonicalOrder=True)
nm3 = uncharger.uncharge(mol3)
self.assertEqual(Chem.MolToSmiles(nm3), "C[N+](C)(C)CC(CC(=O)O)C(=O)[O-]")
nm3 = Chem.Mol(mol3)
uncharger.unchargeInPlace(nm3)
self.assertEqual(Chem.MolToSmiles(nm3), "C[N+](C)(C)CC(CC(=O)O)C(=O)[O-]")
def test7Fragment(self):
fragremover = rdMolStandardize.FragmentRemover()
@@ -152,6 +171,10 @@ class TestCase(unittest.TestCase):
nm = fragremover.remove(mol)
self.assertEqual(Chem.MolToSmiles(nm), "CN(C)C")
nm = Chem.Mol(mol)
fragremover.removeInPlace(nm)
self.assertEqual(Chem.MolToSmiles(nm), "CN(C)C")
lfragchooser = rdMolStandardize.LargestFragmentChooser()
mol2 = Chem.MolFromSmiles("[N+](=O)([O-])[O-].[CH3+]")
nm2 = lfragchooser.choose(mol2)
@@ -165,6 +188,9 @@ class TestCase(unittest.TestCase):
mol = Chem.MolFromSmiles("[Na+].Cl.Cl.Br")
nm = fragremover.remove(mol)
self.assertEqual(nm.GetNumAtoms(), mol.GetNumAtoms())
nm = Chem.Mol(mol)
fragremover.removeInPlace(mol)
self.assertEqual(nm.GetNumAtoms(), mol.GetNumAtoms())
smi3 = "CNC[C@@H]([C@H]([C@@H]([C@@H](CO)O)O)O)O.c1cc2c(cc1C(=O)O)oc(n2)c3cc(cc(c3)Cl)Cl"
@@ -231,6 +257,10 @@ class TestCase(unittest.TestCase):
mol = Chem.MolFromSmiles("C[n+]1ccccc1[O-]")
nm = normalizer.normalize(mol)
self.assertEqual(Chem.MolToSmiles(nm), "Cn1ccccc1=O")
nm = Chem.Mol(mol)
normalizer.normalizeInPlace(nm)
self.assertEqual(Chem.MolToSmiles(nm), "Cn1ccccc1=O")
def test9Validate(self):
vm = rdMolStandardize.RDKitValidation()
@@ -958,6 +988,28 @@ chlorine [Cl]
nm = rdMolStandardize.RemoveFragments(m)
self.assertEqual(Chem.MolToSmiles(nm), "CC")
def test22StandardizeInPlace(self):
m = Chem.MolFromSmiles("O=N(=O)-C(O[Fe])C(C(=O)O)C-N(=O)=O")
rdMolStandardize.CleanupInPlace(m)
self.assertEqual(Chem.MolToSmiles(m),"O=C([O-])C(C[N+](=O)[O-])C(O)[N+](=O)[O-].[Fe+]")
m = Chem.MolFromSmiles('[F-].[Cl-].[Br-].CC')
rdMolStandardize.RemoveFragmentsInPlace(m)
self.assertEqual(Chem.MolToSmiles(m), "CC")
m = Chem.MolFromSmiles('C1=C(C=CC(=C1)[S]([O-])=O)[S](O)(=O)=O')
rdMolStandardize.ReionizeInPlace(m)
self.assertEqual(Chem.MolToSmiles(m), "O=S(O)c1ccc(S(=O)(=O)[O-])cc1")
m = Chem.MolFromSmiles('CCO[Fe]')
rdMolStandardize.DisconnectOrganometallicsInPlace(m)
self.assertEqual(Chem.MolToSmiles(m), "CCO.[Fe]")
m = Chem.MolFromSmiles(r"C[N+](C)=C\C=C\[O-]")
rdMolStandardize.NormalizeInPlace(m)
self.assertEqual(Chem.MolToSmiles(m), "CN(C)C=CC=O")
if __name__ == "__main__":
unittest.main()

View File

@@ -1038,3 +1038,79 @@ TEST_CASE("Github 5784: kekulization error when enumerating tautomers") {
REQUIRE(res);
}
}
TEST_CASE("in place operations") {
SECTION("reionizer") {
MolStandardize::Reionizer reion;
auto m = "c1cc([O-])cc(C(=O)O)c1"_smiles;
REQUIRE(m);
reion.reionizeInPlace(*m);
CHECK(MolToSmiles(*m) == "O=C([O-])c1cccc(O)c1");
}
SECTION("reionize") {
auto m = "c1cc([O-])cc(C(=O)O)c1"_smiles;
REQUIRE(m);
MolStandardize::reionizeInPlace(*m);
CHECK(MolToSmiles(*m) == "O=C([O-])c1cccc(O)c1");
}
SECTION("uncharge") {
MolStandardize::Uncharger unchg;
auto m = "c1cc([O-])cc(C(=O)O)c1"_smiles;
REQUIRE(m);
unchg.unchargeInPlace(*m);
CHECK(MolToSmiles(*m) == "O=C(O)c1cccc(O)c1");
}
SECTION("normalizer") {
MolStandardize::Normalizer nrml;
SmilesParserParams ps;
ps.sanitize = false;
std::unique_ptr<RWMol> m{SmilesToMol("O=N(=O)-CC-N(=O)=O", ps)};
REQUIRE(m);
nrml.normalizeInPlace(*m);
CHECK(MolToSmiles(*m) == "O=[N+]([O-])CC[N+](=O)[O-]");
m.reset(SmilesToMol("OCCN", ps));
REQUIRE(m);
nrml.normalizeInPlace(*m);
CHECK(MolToSmiles(*m) == "NCCO");
}
SECTION("normalize") {
SmilesParserParams ps;
ps.sanitize = false;
std::unique_ptr<RWMol> m{SmilesToMol("O=N(=O)-CC-N(=O)=O", ps)};
REQUIRE(m);
MolStandardize::normalizeInPlace(*m);
CHECK(MolToSmiles(*m) == "O=[N+]([O-])CC[N+](=O)[O-]");
m.reset(SmilesToMol("OCCN", ps));
REQUIRE(m);
MolStandardize::normalizeInPlace(*m);
CHECK(MolToSmiles(*m) == "NCCO");
}
SECTION("FragmentRemover") {
auto m = "CCCC.Cl.[Na]"_smiles;
REQUIRE(m);
MolStandardize::FragmentRemover fragremover;
RWMol cp1(*m);
fragremover.removeInPlace(cp1);
CHECK(MolToSmiles(cp1) == "CCCC");
RWMol cp2(*m);
MolStandardize::removeFragmentsInPlace(cp2);
CHECK(MolToSmiles(cp2) == "CCCC");
}
SECTION("cleanup") {
SmilesParserParams ps;
ps.sanitize = false;
// silly ugly example which ensures disconnection, normalization, and
// reionization
std::unique_ptr<RWMol> m{
SmilesToMol("O=N(=O)-C(O[Fe])C(C(=O)O)C-N(=O)=O", ps)};
REQUIRE(m);
MolStandardize::cleanupInPlace(*m);
CHECK(MolToSmiles(*m) == "O=C([O-])C(C[N+](=O)[O-])C(O)[N+](=O)[O-].[Fe+]");
}
SECTION("disconnect organometallics") {
auto m("[CH2-](->[K+])c1ccccc1"_smiles);
TEST_ASSERT(m);
MolStandardize::disconnectOrganometallicsInPlace(*m);
TEST_ASSERT(MolToSmiles(*m) == "[CH2-]c1ccccc1.[K+]");
}
}