mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-03 21:44:30 +08:00
Add an in place version of most of the MolStandardize functionality (#6491)
* reionizer and uncharger and normalizer can now operate in place * add removeUnmatchedAtoms argument to in-place version of runReactant When set to false atoms which are not explicitly removed by the reaction are preserved * Fix a case where transforms were incorrectly updating atomic numbers * add more inplace operations to MolStandardize * support those in the Python layer * support inplace for the rest of the python wrappers * move a few more functions over to the inplace code
This commit is contained in:
@@ -57,12 +57,13 @@ std::vector<MOL_SPTR_VECT> ChemicalReaction::runReactant(
|
||||
return run_Reactant(*this, reactant, reactionTemplateIdx);
|
||||
}
|
||||
|
||||
bool ChemicalReaction::runReactant(RWMol &reactant) const {
|
||||
bool ChemicalReaction::runReactant(RWMol &reactant,
|
||||
bool removeUnmatchedAtoms) const {
|
||||
if (getReactants().size() != 1 || getProducts().size() != 1) {
|
||||
throw ChemicalReactionException(
|
||||
"Only single reactant - single product reactions can be run in place.");
|
||||
}
|
||||
return run_Reactant(*this, reactant);
|
||||
return run_Reactant(*this, reactant, removeUnmatchedAtoms);
|
||||
}
|
||||
|
||||
ChemicalReaction::ChemicalReaction(const std::string &pickle) {
|
||||
|
||||
@@ -246,10 +246,12 @@ class RDKIT_CHEMREACTIONS_EXPORT ChemicalReaction : public RDProps {
|
||||
and where no atoms are added in the product.
|
||||
|
||||
\param reactant The single reactant to use
|
||||
\param removeUnmatchedAtoms toggles whether or not atoms from the reactant
|
||||
which do not match template atoms are removed.
|
||||
|
||||
\return whether or not the reactant was actually modified
|
||||
*/
|
||||
bool runReactant(RWMol &reactant) const;
|
||||
bool runReactant(RWMol &reactant, bool removeUnmatchedAtoms = true) const;
|
||||
|
||||
const MOL_SPTR_VECT &getReactants() const {
|
||||
return this->m_reactantTemplates;
|
||||
|
||||
@@ -1733,7 +1733,8 @@ bool updateBondsModifiedByReaction(
|
||||
} // namespace
|
||||
|
||||
// Modifies a single reactant IN PLACE
|
||||
bool run_Reactant(const ChemicalReaction &rxn, RWMol &reactant) {
|
||||
bool run_Reactant(const ChemicalReaction &rxn, RWMol &reactant,
|
||||
bool removeUnmatchedAtoms) {
|
||||
PRECONDITION(rxn.getNumReactantTemplates() == 1,
|
||||
"only one reactant supported");
|
||||
PRECONDITION(rxn.getNumProductTemplates() == 1, "only one product supported");
|
||||
@@ -1782,14 +1783,18 @@ bool run_Reactant(const ChemicalReaction &rxn, RWMol &reactant) {
|
||||
const auto &match = reactantMatch[0];
|
||||
|
||||
// we now have a match for the reactant, so we can work on it
|
||||
// start by marking atoms which are in the reactants, but not in the product
|
||||
// start by marking atoms which are in the reactant template, but not in the
|
||||
// product template for removal
|
||||
boost::dynamic_bitset<> atomsToRemove(reactant.getNumAtoms());
|
||||
// finds atoms in the reactantTemplate which aren't in the productTemplate
|
||||
ReactionRunnerUtils::identifyAtomsInReactantTemplateNotProductTemplate(
|
||||
*reactantTemplate, atomsToRemove, reactantProductMap, match);
|
||||
// identify atoms which should be removed from the molecule
|
||||
ReactionRunnerUtils::traverseToFindAtomsToRemove(reactant, *reactantTemplate,
|
||||
atomsToRemove, match);
|
||||
if (removeUnmatchedAtoms) {
|
||||
// identify atoms which did not match something in the reactant template but
|
||||
// which should be removed from the molecule
|
||||
ReactionRunnerUtils::traverseToFindAtomsToRemove(
|
||||
reactant, *reactantTemplate, atomsToRemove, match);
|
||||
}
|
||||
bool molModified = false;
|
||||
reactant.beginBatchEdit();
|
||||
|
||||
|
||||
@@ -81,7 +81,8 @@ RDKIT_CHEMREACTIONS_EXPORT std::vector<MOL_SPTR_VECT> run_Reactant(
|
||||
unsigned int reactantIdx);
|
||||
|
||||
RDKIT_CHEMREACTIONS_EXPORT bool run_Reactant(const ChemicalReaction& rxn,
|
||||
RWMol& reactant);
|
||||
RWMol& reactant,
|
||||
bool removeUnmatchedAtoms = true);
|
||||
|
||||
//! Reduce the product generated by run_Reactants or run_Reactant to
|
||||
/// the sidechains that come from the reagents
|
||||
|
||||
@@ -150,7 +150,8 @@ PyObject *RunReactant(ChemicalReaction *self, T reactant,
|
||||
return res;
|
||||
}
|
||||
|
||||
bool RunReactantInPlace(ChemicalReaction *self, ROMol *reactant) {
|
||||
bool RunReactantInPlace(ChemicalReaction *self, ROMol *reactant,
|
||||
bool removeUnmatchedAtoms) {
|
||||
auto react = static_cast<RWMol *>(reactant);
|
||||
bool res = false;
|
||||
{
|
||||
@@ -158,7 +159,7 @@ bool RunReactantInPlace(ChemicalReaction *self, ROMol *reactant) {
|
||||
if (!self->isInitialized()) {
|
||||
self->initReactantMatchers();
|
||||
}
|
||||
res = self->runReactant(*react);
|
||||
res = self->runReactant(*react, removeUnmatchedAtoms);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
@@ -566,7 +567,8 @@ Sample Usage:
|
||||
RDKit::RunReactant,
|
||||
"apply the reaction to a single reactant")
|
||||
.def("RunReactantInPlace", RDKit::RunReactantInPlace,
|
||||
(python::arg("self"), python::arg("reactant")),
|
||||
(python::arg("self"), python::arg("reactant"),
|
||||
python::arg("removeUnmatchedAtoms") = true),
|
||||
"apply the reaction to a single reactant in place. The reactant "
|
||||
"itself is modified. This can only be used for single reactant - "
|
||||
"single product reactions.")
|
||||
|
||||
@@ -1031,6 +1031,20 @@ M END
|
||||
self.assertEqual(Chem.MolToSmiles(reactant), 'CCOC(C)=O')
|
||||
self.assertFalse(rxn.RunReactantInPlace(reactant))
|
||||
self.assertEqual(Chem.MolToSmiles(reactant), 'CCOC(C)=O')
|
||||
|
||||
rxn = rdChemReactions.ReactionFromSmarts('CC[N:1]>>[N:1]')
|
||||
self.assertIsNotNone(rxn)
|
||||
reactant = Chem.MolFromSmiles('CCCN.Cl')
|
||||
self.assertTrue(rxn.RunReactantInPlace(reactant))
|
||||
Chem.SanitizeMol(reactant)
|
||||
self.assertEqual(Chem.MolToSmiles(reactant), 'N')
|
||||
|
||||
reactant = Chem.MolFromSmiles('CCCN.Cl')
|
||||
self.assertTrue(rxn.RunReactantInPlace(reactant,removeUnmatchedAtoms=False))
|
||||
Chem.SanitizeMol(reactant)
|
||||
self.assertEqual(Chem.MolToSmiles(reactant), 'C.Cl.N')
|
||||
|
||||
|
||||
|
||||
def testGithub4651(self):
|
||||
mol_sulfonylchloride = Chem.MolFromSmiles("Nc1c(CCCSNCC)cc(cc1)S(=O)(=O)Cl")
|
||||
|
||||
@@ -979,8 +979,35 @@ TEST_CASE("one-component reactions") {
|
||||
CHECK_THROWS_AS(rxn->runReactant(*mol), ChemicalReactionException);
|
||||
}
|
||||
}
|
||||
SECTION("toggling removing unmapped atoms") {
|
||||
auto rxn = "CC[N:1]>>[N:1]"_rxnsmarts;
|
||||
REQUIRE(rxn);
|
||||
rxn->initReactantMatchers();
|
||||
{
|
||||
auto mol = "CCN.Cl"_smiles;
|
||||
REQUIRE(mol);
|
||||
CHECK(rxn->runReactant(*mol));
|
||||
CHECK(mol->getNumAtoms() == 1);
|
||||
CHECK(MolToSmiles(*mol) == "N");
|
||||
}
|
||||
{
|
||||
auto mol = "CCN.Cl"_smiles;
|
||||
REQUIRE(mol);
|
||||
bool removeUnmatchedAtoms = false;
|
||||
CHECK(rxn->runReactant(*mol, removeUnmatchedAtoms));
|
||||
CHECK(mol->getNumAtoms() == 2);
|
||||
CHECK(MolToSmiles(*mol) == "Cl.N");
|
||||
}
|
||||
{ // extra atoms connected to the matching part should not be removed
|
||||
auto mol = "CCCN.Cl"_smiles;
|
||||
REQUIRE(mol);
|
||||
bool removeUnmatchedAtoms = false;
|
||||
CHECK(rxn->runReactant(*mol, removeUnmatchedAtoms));
|
||||
CHECK(mol->getNumAtoms() == 3);
|
||||
CHECK(MolToSmiles(*mol) == "C.Cl.N");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("Github #4759 Reaction parser fails when CX extensions are present") {
|
||||
std::string sma = "[C:1]Br.[C:2]O>>[C:2][C:1] |$Aryl;;;;;Aryl$|";
|
||||
SECTION("SMARTS") {
|
||||
|
||||
@@ -96,6 +96,12 @@ Reionizer::~Reionizer() { delete d_abcat; }
|
||||
// d_css(css) {};
|
||||
|
||||
ROMol *Reionizer::reionize(const ROMol &mol) {
|
||||
auto omol = new RWMol(mol);
|
||||
this->reionizeInPlace(*omol);
|
||||
return static_cast<ROMol *>(omol);
|
||||
}
|
||||
|
||||
void Reionizer::reionizeInPlace(RWMol &mol) {
|
||||
PRECONDITION(this->d_abcat, "");
|
||||
const AcidBaseCatalogParams *abparams = this->d_abcat->getCatalogParams();
|
||||
|
||||
@@ -103,21 +109,20 @@ ROMol *Reionizer::reionize(const ROMol &mol) {
|
||||
const std::vector<std::pair<ROMOL_SPTR, ROMOL_SPTR>> abpairs =
|
||||
abparams->getPairs();
|
||||
|
||||
auto *omol = new ROMol(mol);
|
||||
if (omol->needsUpdatePropertyCache()) {
|
||||
omol->updatePropertyCache(false);
|
||||
if (mol.needsUpdatePropertyCache()) {
|
||||
mol.updatePropertyCache(false);
|
||||
}
|
||||
int start_charge = MolOps::getFormalCharge(*omol);
|
||||
int start_charge = MolOps::getFormalCharge(mol);
|
||||
|
||||
for (const auto &cc : this->d_ccs) {
|
||||
std::vector<MatchVectType> res;
|
||||
ROMOL_SPTR ccmol(SmartsToMol(cc.Smarts));
|
||||
unsigned int matches = SubstructMatch(*omol, *ccmol, res);
|
||||
unsigned int matches = SubstructMatch(mol, *ccmol, res);
|
||||
if (matches) {
|
||||
for (const auto &match : res) {
|
||||
for (const auto &pair : match) {
|
||||
auto idx = pair.second;
|
||||
Atom *atom = omol->getAtomWithIdx(idx);
|
||||
Atom *atom = mol.getAtomWithIdx(idx);
|
||||
BOOST_LOG(rdInfoLog)
|
||||
<< "Applying charge correction " << cc.Name << " "
|
||||
<< atom->getSymbol() << " " << cc.Charge << "\n";
|
||||
@@ -126,7 +131,7 @@ ROMol *Reionizer::reionize(const ROMol &mol) {
|
||||
}
|
||||
}
|
||||
}
|
||||
int current_charge = MolOps::getFormalCharge(*omol);
|
||||
int current_charge = MolOps::getFormalCharge(mol);
|
||||
int charge_diff = current_charge - start_charge;
|
||||
// std::cout << "Current charge: " << current_charge << std::endl;
|
||||
// std::cout << "Charge diff: " << charge_diff << std::endl;
|
||||
@@ -140,7 +145,7 @@ ROMol *Reionizer::reionize(const ROMol &mol) {
|
||||
// returns the acid strength ranking (ppos)
|
||||
// and the substruct match (poccur) in a pair
|
||||
std::shared_ptr<std::pair<unsigned int, std::vector<unsigned int>>> res(
|
||||
this->strongestProtonated(*omol, abpairs));
|
||||
this->strongestProtonated(mol, abpairs));
|
||||
if (res == nullptr) {
|
||||
break;
|
||||
} else {
|
||||
@@ -151,7 +156,7 @@ ROMol *Reionizer::reionize(const ROMol &mol) {
|
||||
(abpair.first)->getProp(common_properties::_Name, abname);
|
||||
BOOST_LOG(rdInfoLog) << "Ionizing " << abname
|
||||
<< " to balance previous charge corrections\n";
|
||||
Atom *patom = omol->getAtomWithIdx(poccur.back());
|
||||
Atom *patom = mol.getAtomWithIdx(poccur.back());
|
||||
patom->setFormalCharge(patom->getFormalCharge() - 1);
|
||||
|
||||
if (patom->getNumExplicitHs() > 0) {
|
||||
@@ -164,15 +169,15 @@ ROMol *Reionizer::reionize(const ROMol &mol) {
|
||||
}
|
||||
}
|
||||
|
||||
// std::cout << MolToSmiles(*omol) << std::endl;
|
||||
// std::cout << MolToSmiles(mol) << std::endl;
|
||||
// std::cout << "Charge diff: " << charge_diff << std::endl;
|
||||
|
||||
std::set<std::vector<unsigned int>> already_moved;
|
||||
while (true) {
|
||||
std::shared_ptr<std::pair<unsigned int, std::vector<unsigned int>>> sp_res(
|
||||
this->strongestProtonated(*omol, abpairs));
|
||||
this->strongestProtonated(mol, abpairs));
|
||||
std::shared_ptr<std::pair<unsigned int, std::vector<unsigned int>>> wi_res(
|
||||
this->weakestIonized(*omol, abpairs));
|
||||
this->weakestIonized(mol, abpairs));
|
||||
if (sp_res != nullptr && wi_res != nullptr) {
|
||||
unsigned int ppos = sp_res->first;
|
||||
unsigned int ipos = wi_res->first;
|
||||
@@ -206,9 +211,10 @@ ROMol *Reionizer::reionize(const ROMol &mol) {
|
||||
BOOST_LOG(rdInfoLog) << "Moved proton from " << prot_name << " to "
|
||||
<< ionized_name << "\n";
|
||||
// Remove hydrogen from strongest protonated
|
||||
Atom *patom = omol->getAtomWithIdx(poccur.back());
|
||||
Atom *patom = mol.getAtomWithIdx(poccur.back());
|
||||
patom->setFormalCharge(patom->getFormalCharge() - 1);
|
||||
// If no implicit Hs to autoremove, and at least 1 explicit H to remove,
|
||||
// If no implicit Hs to autoremove, and at least 1 explicit H to
|
||||
// remove,
|
||||
// reduce explicit count by 1
|
||||
if (patom->getNumImplicitHs() == 0 && patom->getNumExplicitHs() > 0) {
|
||||
patom->setNumExplicitHs(patom->getNumExplicitHs() - 1);
|
||||
@@ -217,7 +223,7 @@ ROMol *Reionizer::reionize(const ROMol &mol) {
|
||||
patom->updatePropertyCache();
|
||||
|
||||
// Add hydrogen to weakest ionized
|
||||
Atom *iatom = omol->getAtomWithIdx(ioccur.back());
|
||||
Atom *iatom = mol.getAtomWithIdx(ioccur.back());
|
||||
iatom->setFormalCharge(iatom->getFormalCharge() + 1);
|
||||
// Increase explicit H count if no implicit, or aromatic N or P,
|
||||
// or non default valence state
|
||||
@@ -240,10 +246,6 @@ ROMol *Reionizer::reionize(const ROMol &mol) {
|
||||
break;
|
||||
}
|
||||
} // while loop
|
||||
|
||||
// MolOps::sanitizeMol(*static_cast<RWMol *>(omol));
|
||||
|
||||
return omol;
|
||||
}
|
||||
|
||||
std::pair<unsigned int, std::vector<unsigned int>>
|
||||
@@ -340,10 +342,14 @@ bool neutralizeNegIfPossible(Atom *atom) {
|
||||
}
|
||||
|
||||
ROMol *Uncharger::uncharge(const ROMol &mol) {
|
||||
auto omol = new RWMol(mol);
|
||||
this->unchargeInPlace(*omol);
|
||||
return static_cast<ROMol *>(omol);
|
||||
}
|
||||
void Uncharger::unchargeInPlace(RWMol &mol) {
|
||||
BOOST_LOG(rdInfoLog) << "Running Uncharger\n";
|
||||
auto *omol = new ROMol(mol);
|
||||
if (omol->needsUpdatePropertyCache()) {
|
||||
omol->updatePropertyCache(false);
|
||||
if (mol.needsUpdatePropertyCache()) {
|
||||
mol.updatePropertyCache(false);
|
||||
}
|
||||
std::vector<MatchVectType> p_matches;
|
||||
std::vector<MatchVectType> q_matches;
|
||||
@@ -351,25 +357,25 @@ ROMol *Uncharger::uncharge(const ROMol &mol) {
|
||||
std::vector<MatchVectType> a_matches;
|
||||
|
||||
// Get atom ids for matches
|
||||
SubstructMatch(*omol, *(this->pos_h), p_matches);
|
||||
SubstructMatch(*omol, *(this->pos_noh), q_matches);
|
||||
SubstructMatch(mol, *(this->pos_h), p_matches);
|
||||
SubstructMatch(mol, *(this->pos_noh), q_matches);
|
||||
unsigned int q_matched = 0;
|
||||
for (const auto &match : q_matches) {
|
||||
q_matched += omol->getAtomWithIdx(match[0].second)->getFormalCharge();
|
||||
q_matched += mol.getAtomWithIdx(match[0].second)->getFormalCharge();
|
||||
}
|
||||
unsigned int n_matched = SubstructMatch(*omol, *(this->neg), n_matches);
|
||||
unsigned int a_matched = SubstructMatch(*omol, *(this->neg_acid), a_matches);
|
||||
unsigned int n_matched = SubstructMatch(mol, *(this->neg), n_matches);
|
||||
unsigned int a_matched = SubstructMatch(mol, *(this->neg_acid), a_matches);
|
||||
|
||||
// count the total number of negative atoms
|
||||
unsigned int n_neg = std::count_if(
|
||||
omol->atoms().begin(), omol->atoms().end(),
|
||||
mol.atoms().begin(), mol.atoms().end(),
|
||||
[](const auto atom) { return (atom->getFormalCharge() < 0); });
|
||||
|
||||
bool needsNeutralization =
|
||||
(q_matched > 0 && (n_matched > 0 || a_matched > 0));
|
||||
std::vector<unsigned int> atomRanks(omol->getNumAtoms());
|
||||
std::vector<unsigned int> atomRanks(mol.getNumAtoms());
|
||||
if (df_canonicalOrdering && needsNeutralization) {
|
||||
Canon::rankMolAtoms(*omol, atomRanks);
|
||||
Canon::rankMolAtoms(mol, atomRanks);
|
||||
} else {
|
||||
std::iota(atomRanks.begin(), atomRanks.end(), 0);
|
||||
}
|
||||
@@ -395,18 +401,19 @@ ROMol *Uncharger::uncharge(const ROMol &mol) {
|
||||
// Surplus negative charges more than non-neutralizable positive charges
|
||||
int neg_surplus = n_neg - q_matched;
|
||||
if (neg_surplus > 0 && n_matched) {
|
||||
boost::dynamic_bitset<> nonAcids(omol->getNumAtoms());
|
||||
boost::dynamic_bitset<> nonAcids(mol.getNumAtoms());
|
||||
nonAcids.set();
|
||||
for (const auto &pair : a_atoms) {
|
||||
nonAcids.reset(pair.second);
|
||||
}
|
||||
// zwitterion with more negative charges than quaternary positive centres
|
||||
// zwitterion with more negative charges than quaternary positive
|
||||
// centres
|
||||
for (const auto &pair : n_atoms) {
|
||||
unsigned int idx = pair.second;
|
||||
if (!nonAcids[idx]) {
|
||||
continue;
|
||||
}
|
||||
Atom *atom = omol->getAtomWithIdx(idx);
|
||||
Atom *atom = mol.getAtomWithIdx(idx);
|
||||
if (neutralizeNegIfPossible(atom) && !--neg_surplus) {
|
||||
break;
|
||||
}
|
||||
@@ -416,13 +423,13 @@ ROMol *Uncharger::uncharge(const ROMol &mol) {
|
||||
// now do the other negative groups if we still have charges left:
|
||||
neg_surplus = a_matched - q_matched;
|
||||
if (neg_surplus > 0) {
|
||||
boost::dynamic_bitset<> skipChargeSep(omol->getNumAtoms());
|
||||
boost::dynamic_bitset<> skipChargeSep(mol.getNumAtoms());
|
||||
for (const auto &pair : n_atoms) {
|
||||
unsigned int idx = pair.second;
|
||||
Atom *atom = omol->getAtomWithIdx(idx);
|
||||
Atom *atom = mol.getAtomWithIdx(idx);
|
||||
for (const auto &nbri :
|
||||
boost::make_iterator_range(omol->getAtomNeighbors(atom))) {
|
||||
const auto &nbr = (*omol)[nbri];
|
||||
boost::make_iterator_range(mol.getAtomNeighbors(atom))) {
|
||||
const auto &nbr = (mol)[nbri];
|
||||
auto nbrIdx = nbr->getIdx();
|
||||
// if the neighbor has a positive charge,
|
||||
// neutralize only once (e.g., NO3-)
|
||||
@@ -436,12 +443,14 @@ ROMol *Uncharger::uncharge(const ROMol &mol) {
|
||||
}
|
||||
}
|
||||
}
|
||||
// zwitterion with more negative charges than quaternary positive centres
|
||||
// zwitterion with more negative charges than quaternary positive
|
||||
// centres
|
||||
for (const auto &pair : a_atoms) {
|
||||
// Add hydrogen to first negative acidic atom, increase formal charge
|
||||
// Until quaternary positive == negative total or no more negative atoms
|
||||
// Until quaternary positive == negative total or no more negative
|
||||
// atoms
|
||||
unsigned int idx = pair.second;
|
||||
Atom *atom = omol->getAtomWithIdx(idx);
|
||||
Atom *atom = mol.getAtomWithIdx(idx);
|
||||
// skip ahead if we already neutralized this or if it is part of a
|
||||
// zwitterion
|
||||
if (atom->getFormalCharge() >= 0 || skipChargeSep.test(idx)) {
|
||||
@@ -457,14 +466,14 @@ ROMol *Uncharger::uncharge(const ROMol &mol) {
|
||||
} else {
|
||||
for (const auto &pair : n_atoms) {
|
||||
auto idx = pair.second;
|
||||
Atom *atom = omol->getAtomWithIdx(idx);
|
||||
Atom *atom = mol.getAtomWithIdx(idx);
|
||||
neutralizeNegIfPossible(atom);
|
||||
}
|
||||
}
|
||||
|
||||
// Neutralize cations until there is no longer a net charge remaining:
|
||||
int netCharge = 0;
|
||||
for (const auto &at : omol->atoms()) {
|
||||
for (const auto &at : mol.atoms()) {
|
||||
netCharge += at->getFormalCharge();
|
||||
}
|
||||
|
||||
@@ -477,7 +486,7 @@ ROMol *Uncharger::uncharge(const ROMol &mol) {
|
||||
}
|
||||
}
|
||||
for (const auto &idx : p_idx_matches) {
|
||||
Atom *atom = omol->getAtomWithIdx(idx);
|
||||
Atom *atom = mol.getAtomWithIdx(idx);
|
||||
// atoms from places like Mol blocks are normally missing explicit Hs:
|
||||
atom->setNumExplicitHs(atom->getTotalNumHs());
|
||||
atom->setNoImplicit(true);
|
||||
@@ -507,7 +516,6 @@ ROMol *Uncharger::uncharge(const ROMol &mol) {
|
||||
}
|
||||
}
|
||||
}
|
||||
return omol;
|
||||
} // namespace MolStandardize
|
||||
|
||||
} // namespace MolStandardize
|
||||
|
||||
@@ -85,6 +85,9 @@ class RDKIT_MOLSTANDARDIZE_EXPORT Reionizer {
|
||||
|
||||
//! Enforce charges on certain atoms, then perform competitive reionization.
|
||||
ROMol *reionize(const ROMol &mol);
|
||||
//! Enforce charges on certain atoms, then perform competitive reionization,
|
||||
//! modifies molecule in place
|
||||
void reionizeInPlace(RWMol &mol);
|
||||
|
||||
private:
|
||||
AcidBaseCatalog *d_abcat;
|
||||
@@ -131,6 +134,7 @@ class RDKIT_MOLSTANDARDIZE_EXPORT Uncharger {
|
||||
~Uncharger();
|
||||
|
||||
ROMol *uncharge(const ROMol &mol);
|
||||
void unchargeInPlace(RWMol &mol);
|
||||
|
||||
private:
|
||||
bool df_canonicalOrdering = true;
|
||||
|
||||
@@ -76,6 +76,12 @@ FragmentRemover::FragmentRemover(std::istream &fragmentStream, bool leave_last,
|
||||
FragmentRemover::~FragmentRemover() { delete d_fcat; };
|
||||
|
||||
ROMol *FragmentRemover::remove(const ROMol &mol) {
|
||||
auto molcp = new RWMol(mol);
|
||||
removeInPlace(*molcp);
|
||||
return static_cast<ROMol *>(molcp);
|
||||
}
|
||||
|
||||
void FragmentRemover::removeInPlace(RWMol &mol) {
|
||||
BOOST_LOG(rdInfoLog) << "Running FragmentRemover\n";
|
||||
PRECONDITION(this->d_fcat, "");
|
||||
const FragmentCatalogParams *fparams = this->d_fcat->getCatalogParams();
|
||||
@@ -126,9 +132,14 @@ ROMol *FragmentRemover::remove(const ROMol &mol) {
|
||||
if (this->SKIP_IF_ALL_MATCH) {
|
||||
BOOST_LOG(rdInfoLog)
|
||||
<< "All fragments matched; original molecule returned." << std::endl;
|
||||
return new ROMol(mol);
|
||||
} else {
|
||||
mol.beginBatchEdit();
|
||||
for (auto i = 0u; i < mol.getNumAtoms(); ++i) {
|
||||
mol.removeAtom(i);
|
||||
}
|
||||
mol.commitBatchEdit();
|
||||
}
|
||||
return new ROMol();
|
||||
return;
|
||||
}
|
||||
|
||||
boost::dynamic_bitset<> atomsToRemove(mol.getNumAtoms());
|
||||
@@ -141,15 +152,13 @@ ROMol *FragmentRemover::remove(const ROMol &mol) {
|
||||
}
|
||||
}
|
||||
// remove the atoms that need to go
|
||||
auto *removed = new RWMol(mol);
|
||||
removed->beginBatchEdit();
|
||||
mol.beginBatchEdit();
|
||||
for (unsigned int i = 0; i < mol.getNumAtoms(); ++i) {
|
||||
if (atomsToRemove[i]) {
|
||||
removed->removeAtom(i);
|
||||
mol.removeAtom(i);
|
||||
}
|
||||
}
|
||||
removed->commitBatchEdit();
|
||||
return static_cast<ROMol *>(removed);
|
||||
mol.commitBatchEdit();
|
||||
}
|
||||
|
||||
bool isOrganic(const ROMol &frag) {
|
||||
|
||||
@@ -44,6 +44,7 @@ class RDKIT_MOLSTANDARDIZE_EXPORT FragmentRemover {
|
||||
FragmentRemover &operator=(FragmentRemover const &) = delete;
|
||||
|
||||
ROMol *remove(const ROMol &mol);
|
||||
void removeInPlace(RWMol &mol);
|
||||
|
||||
private:
|
||||
// Setting leave_last to True will ensure at least one fragment
|
||||
|
||||
@@ -68,6 +68,7 @@ accordingly.
|
||||
//! overload
|
||||
/// modifies the molecule in place
|
||||
void disconnect(RWMol &mol);
|
||||
void disconnectInPlace(RWMol &mol) { disconnect(mol); };
|
||||
|
||||
private:
|
||||
struct NonMetal {
|
||||
|
||||
@@ -122,38 +122,31 @@ void updateCleanupParamsFromJSON(CleanupParameters ¶ms,
|
||||
}
|
||||
|
||||
RWMol *cleanup(const RWMol *mol, const CleanupParameters ¶ms) {
|
||||
RWMol m(*mol);
|
||||
MolOps::removeHs(m);
|
||||
|
||||
auto nmol = new RWMol(*mol);
|
||||
cleanupInPlace(*nmol, params);
|
||||
return nmol;
|
||||
}
|
||||
void cleanupInPlace(RWMol &mol, const CleanupParameters ¶ms) {
|
||||
MolOps::removeHs(mol);
|
||||
MolStandardize::MetalDisconnector md;
|
||||
md.disconnect(m);
|
||||
RWMOL_SPTR normalized(MolStandardize::normalize(&m, params));
|
||||
RWMol *reionized = MolStandardize::reionize(normalized.get(), params);
|
||||
md.disconnectInPlace(mol);
|
||||
MolStandardize::normalizeInPlace(mol, params);
|
||||
MolStandardize::reionizeInPlace(mol, params);
|
||||
bool cleanIt = true;
|
||||
bool force = true;
|
||||
MolOps::assignStereochemistry(*reionized, cleanIt, force);
|
||||
|
||||
// update properties of reionized using m.
|
||||
reionized->updateProps(m);
|
||||
|
||||
return reionized;
|
||||
MolOps::assignStereochemistry(mol, cleanIt, force);
|
||||
}
|
||||
|
||||
RWMol *tautomerParent(const RWMol &mol, const CleanupParameters ¶ms,
|
||||
bool skip_standardize) {
|
||||
const RWMol *cleaned = nullptr;
|
||||
std::unique_ptr<RWMol> cleanedHolder;
|
||||
|
||||
std::unique_ptr<RWMol> res{new RWMol(mol)};
|
||||
if (!skip_standardize) {
|
||||
cleanedHolder.reset(cleanup(mol, params));
|
||||
cleaned = cleanedHolder.get();
|
||||
} else {
|
||||
cleaned = &mol;
|
||||
cleanupInPlace(*res, params);
|
||||
}
|
||||
|
||||
std::unique_ptr<RWMol> ct{canonicalTautomer(cleaned, params)};
|
||||
|
||||
return cleanup(ct.get(), params);
|
||||
std::unique_ptr<RWMol> ct{canonicalTautomer(res.get(), params)};
|
||||
cleanupInPlace(*ct, params);
|
||||
return ct.release();
|
||||
}
|
||||
|
||||
// Return the fragment parent of a given molecule.
|
||||
@@ -161,27 +154,19 @@ RWMol *tautomerParent(const RWMol &mol, const CleanupParameters ¶ms,
|
||||
//
|
||||
RWMol *fragmentParent(const RWMol &mol, const CleanupParameters ¶ms,
|
||||
bool skip_standardize) {
|
||||
const RWMol *cleaned = nullptr;
|
||||
std::unique_ptr<RWMol> cleanedHolder;
|
||||
|
||||
std::unique_ptr<RWMol> res{new RWMol(mol)};
|
||||
if (!skip_standardize) {
|
||||
cleanedHolder.reset(cleanup(mol, params));
|
||||
cleaned = cleanedHolder.get();
|
||||
} else {
|
||||
cleaned = &mol;
|
||||
cleanupInPlace(*res, params);
|
||||
}
|
||||
|
||||
LargestFragmentChooser lfragchooser(params.preferOrganic);
|
||||
return static_cast<RWMol *>(lfragchooser.choose(*cleaned));
|
||||
return static_cast<RWMol *>(lfragchooser.choose(*res));
|
||||
}
|
||||
|
||||
RWMol *stereoParent(const RWMol &mol, const CleanupParameters ¶ms,
|
||||
bool skip_standardize) {
|
||||
RWMol *res;
|
||||
RWMol *res = new RWMol(mol);
|
||||
if (!skip_standardize) {
|
||||
res = cleanup(mol, params);
|
||||
} else {
|
||||
res = new RWMol(mol);
|
||||
cleanupInPlace(*res, params);
|
||||
}
|
||||
|
||||
MolOps::removeStereochemistry(*res);
|
||||
@@ -190,11 +175,9 @@ RWMol *stereoParent(const RWMol &mol, const CleanupParameters ¶ms,
|
||||
|
||||
RWMol *isotopeParent(const RWMol &mol, const CleanupParameters ¶ms,
|
||||
bool skip_standardize) {
|
||||
RWMol *res;
|
||||
RWMol *res = new RWMol(mol);
|
||||
if (!skip_standardize) {
|
||||
res = cleanup(mol, params);
|
||||
} else {
|
||||
res = new RWMol(mol);
|
||||
cleanupInPlace(*res, params);
|
||||
}
|
||||
|
||||
for (auto atom : res->atoms()) {
|
||||
@@ -208,15 +191,13 @@ RWMol *chargeParent(const RWMol &mol, const CleanupParameters ¶ms,
|
||||
// Return the charge parent of a given molecule.
|
||||
// The charge parent is the uncharged version of the fragment parent.
|
||||
|
||||
RWMOL_SPTR fragparent(fragmentParent(mol, params, skip_standardize));
|
||||
|
||||
// if fragment...
|
||||
ROMol nm(*fragparent);
|
||||
std::unique_ptr<RWMol> fragparent{
|
||||
fragmentParent(mol, params, skip_standardize)};
|
||||
|
||||
Uncharger uncharger(params.doCanonical);
|
||||
ROMOL_SPTR uncharged(uncharger.uncharge(nm));
|
||||
RWMol *omol = cleanup(static_cast<RWMol *>(uncharged.get()), params);
|
||||
return omol;
|
||||
uncharger.unchargeInPlace(*fragparent);
|
||||
cleanupInPlace(*fragparent, params);
|
||||
return fragparent.release();
|
||||
}
|
||||
|
||||
RWMol *superParent(const RWMol &mol, const CleanupParameters ¶ms,
|
||||
@@ -247,12 +228,27 @@ RWMol *reionize(const RWMol *mol, const CleanupParameters ¶ms) {
|
||||
return static_cast<RWMol *>(reionizer->reionize(*mol));
|
||||
}
|
||||
|
||||
void normalizeInPlace(RWMol &mol, const CleanupParameters ¶ms) {
|
||||
std::unique_ptr<Normalizer> normalizer{normalizerFromParams(params)};
|
||||
normalizer->normalizeInPlace(mol);
|
||||
}
|
||||
|
||||
void reionizeInPlace(RWMol &mol, const CleanupParameters ¶ms) {
|
||||
std::unique_ptr<Reionizer> reionizer{reionizerFromParams(params)};
|
||||
reionizer->reionizeInPlace(mol);
|
||||
}
|
||||
|
||||
RWMol *removeFragments(const RWMol *mol, const CleanupParameters ¶ms) {
|
||||
PRECONDITION(mol, "bad molecule");
|
||||
std::unique_ptr<FragmentRemover> remover{fragmentRemoverFromParams(params)};
|
||||
return static_cast<RWMol *>(remover->remove(*mol));
|
||||
}
|
||||
|
||||
void removeFragmentsInPlace(RWMol &mol, const CleanupParameters ¶ms) {
|
||||
std::unique_ptr<FragmentRemover> remover{fragmentRemoverFromParams(params)};
|
||||
remover->removeInPlace(mol);
|
||||
}
|
||||
|
||||
RWMol *canonicalTautomer(const RWMol *mol, const CleanupParameters ¶ms) {
|
||||
PRECONDITION(mol, "bad molecule");
|
||||
std::unique_ptr<TautomerEnumerator> te{tautomerEnumeratorFromParams(params)};
|
||||
@@ -260,22 +256,21 @@ RWMol *canonicalTautomer(const RWMol *mol, const CleanupParameters ¶ms) {
|
||||
}
|
||||
|
||||
std::string standardizeSmiles(const std::string &smiles) {
|
||||
RWMOL_SPTR mol(SmilesToMol(smiles, 0, false));
|
||||
std::unique_ptr<RWMol> mol{SmilesToMol(smiles, 0, false)};
|
||||
if (!mol) {
|
||||
std::string message =
|
||||
"SMILES Parse Error: syntax error for input: " + smiles;
|
||||
throw ValueErrorException(message);
|
||||
}
|
||||
|
||||
CleanupParameters params;
|
||||
RWMOL_SPTR cleaned(cleanup(*mol, params));
|
||||
return MolToSmiles(*cleaned);
|
||||
cleanupInPlace(*mol);
|
||||
return MolToSmiles(*mol);
|
||||
}
|
||||
|
||||
std::vector<std::string> enumerateTautomerSmiles(
|
||||
const std::string &smiles, const CleanupParameters ¶ms) {
|
||||
std::unique_ptr<RWMol> mol(SmilesToMol(smiles, 0, false));
|
||||
mol.reset(cleanup(mol.get(), params));
|
||||
cleanupInPlace(*mol, params);
|
||||
MolOps::sanitizeMol(*mol);
|
||||
|
||||
TautomerEnumerator te(params);
|
||||
|
||||
@@ -103,21 +103,33 @@ inline RWMol *cleanup(const RWMol &mol, const CleanupParameters ¶ms =
|
||||
defaultCleanupParameters) {
|
||||
return cleanup(&mol, params);
|
||||
};
|
||||
//! Works the same as cleanup(mol)
|
||||
RDKIT_MOLSTANDARDIZE_EXPORT void cleanupInPlace(
|
||||
RWMol &mol, const CleanupParameters ¶ms = defaultCleanupParameters);
|
||||
|
||||
//! Works the same as Normalizer().normalize(mol)
|
||||
RDKIT_MOLSTANDARDIZE_EXPORT RWMol *normalize(
|
||||
const RWMol *mol,
|
||||
const CleanupParameters ¶ms = defaultCleanupParameters);
|
||||
//! Works the same as Normalizer().normalizeInPlace(mol)
|
||||
RDKIT_MOLSTANDARDIZE_EXPORT void normalizeInPlace(
|
||||
RWMol &mol, const CleanupParameters ¶ms = defaultCleanupParameters);
|
||||
|
||||
//! Works the same as Reionizer().reionize(mol)
|
||||
RDKIT_MOLSTANDARDIZE_EXPORT RWMol *reionize(
|
||||
const RWMol *mol,
|
||||
const CleanupParameters ¶ms = defaultCleanupParameters);
|
||||
//! Works the same as Reionizer().reionizeInPlace(mol)
|
||||
RDKIT_MOLSTANDARDIZE_EXPORT void reionizeInPlace(
|
||||
RWMol &mol, const CleanupParameters ¶ms = defaultCleanupParameters);
|
||||
|
||||
//! Works the same as FragmentRemover().remove(mol)
|
||||
RDKIT_MOLSTANDARDIZE_EXPORT RWMol *removeFragments(
|
||||
const RWMol *mol,
|
||||
const CleanupParameters ¶ms = defaultCleanupParameters);
|
||||
//! Works the same as FragmentRemover().removeInPlace(mol)
|
||||
RDKIT_MOLSTANDARDIZE_EXPORT void removeFragmentsInPlace(
|
||||
RWMol &mol, const CleanupParameters ¶ms = defaultCleanupParameters);
|
||||
|
||||
//! Works the same as TautomerEnumerator().canonicalize(mol)
|
||||
RDKIT_MOLSTANDARDIZE_EXPORT RWMol *canonicalTautomer(
|
||||
@@ -188,6 +200,12 @@ RDKIT_MOLSTANDARDIZE_EXPORT void disconnectOrganometallics(
|
||||
RDKIT_MOLSTANDARDIZE_EXPORT ROMol *disconnectOrganometallics(
|
||||
const ROMol &mol, RDKit::MolStandardize::MetalDisconnectorOptions mdo = {
|
||||
true, true, false, true});
|
||||
//! As above, included for API consistency.
|
||||
inline void disconnectOrganometallicsInPlace(
|
||||
RWMol &mol, RDKit::MolStandardize::MetalDisconnectorOptions mdo = {
|
||||
true, true, false, true}) {
|
||||
disconnectOrganometallics(mol, mdo);
|
||||
};
|
||||
|
||||
//! TODO
|
||||
RDKIT_MOLSTANDARDIZE_EXPORT std::vector<std::string> enumerateTautomerSmiles(
|
||||
|
||||
@@ -85,6 +85,69 @@ Normalizer::Normalizer(
|
||||
// destructor
|
||||
Normalizer::~Normalizer() { delete d_tcat; }
|
||||
|
||||
void Normalizer::normalizeInPlace(RWMol &mol) {
|
||||
BOOST_LOG(rdInfoLog) << "Running Normalizer\n";
|
||||
PRECONDITION(this->d_tcat, "");
|
||||
const TransformCatalogParams *tparams = this->d_tcat->getCatalogParams();
|
||||
PRECONDITION(tparams, "no transform parameters");
|
||||
|
||||
if (!mol.getNumAtoms()) {
|
||||
return;
|
||||
}
|
||||
|
||||
const std::vector<std::shared_ptr<ChemicalReaction>> &transforms =
|
||||
tparams->getTransformations();
|
||||
|
||||
// initialize the transforms and make sure that they are compatible with the
|
||||
// restrictions on in-place reactions
|
||||
for (auto &transform : transforms) {
|
||||
if (!transform->isInitialized()) {
|
||||
transform->initReactantMatchers();
|
||||
}
|
||||
if (transform->getNumProductTemplates() != 1 ||
|
||||
transform->getNumReactantTemplates() != 1 ||
|
||||
transform->getProducts()[0]->getNumAtoms() >
|
||||
transform->getReactants()[0]->getNumAtoms()) {
|
||||
throw ValueErrorException(
|
||||
"normalizeInPlace can only be used with transforms which have a single reactant and single product. The number of atoms in the product cannot be larger than the number of atoms in the reactant.");
|
||||
}
|
||||
}
|
||||
// we might want ring info
|
||||
if (!mol.getRingInfo()->isInitialized()) {
|
||||
MolOps::symmetrizeSSSR(mol);
|
||||
}
|
||||
for (unsigned int i = 0; i < MAX_RESTARTS; ++i) {
|
||||
bool loop_break = false;
|
||||
// Iterate through Normalization transforms and apply each in order
|
||||
for (auto &transform : transforms) {
|
||||
constexpr bool removeUnmatchedAtoms = false;
|
||||
if (transform->runReactant(mol, removeUnmatchedAtoms)) {
|
||||
BOOST_LOG(rdInfoLog)
|
||||
<< "Rule applied: "
|
||||
<< transform->getProp<std::string>(common_properties::_Name)
|
||||
<< "\n";
|
||||
constexpr unsigned int sanitizeOps = MolOps::SANITIZE_ALL ^
|
||||
MolOps::SANITIZE_CLEANUP ^
|
||||
MolOps::SANITIZE_PROPERTIES;
|
||||
unsigned int failed;
|
||||
try {
|
||||
MolOps::sanitizeMol(mol, failed, sanitizeOps);
|
||||
} catch (MolSanitizeException &) {
|
||||
BOOST_LOG(rdInfoLog) << "FAILED sanitizeMol.\n";
|
||||
}
|
||||
loop_break = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// For loop finishes normally, all applicable transforms have been applied
|
||||
if (!loop_break) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
BOOST_LOG(rdInfoLog) << "Gave up normalization after " << MAX_RESTARTS
|
||||
<< " restarts.\n";
|
||||
}
|
||||
|
||||
ROMol *Normalizer::normalize(const ROMol &mol) {
|
||||
BOOST_LOG(rdInfoLog) << "Running Normalizer\n";
|
||||
PRECONDITION(this->d_tcat, "");
|
||||
|
||||
@@ -77,6 +77,8 @@ class RDKIT_MOLSTANDARDIZE_EXPORT Normalizer {
|
||||
*/
|
||||
ROMol *normalize(const ROMol &mol);
|
||||
|
||||
void normalizeInPlace(RWMol &mol);
|
||||
|
||||
private:
|
||||
const TransformCatalog *d_tcat;
|
||||
unsigned int MAX_RESTARTS;
|
||||
|
||||
@@ -25,6 +25,9 @@ ROMol *reionizeHelper(MolStandardize::Reionizer &self, const ROMol &mol) {
|
||||
return self.reionize(mol);
|
||||
}
|
||||
|
||||
void reionizeInPlaceHelper(MolStandardize::Reionizer &self, ROMol &mol) {
|
||||
self.reionizeInPlace(static_cast<RWMol &>(mol));
|
||||
}
|
||||
MolStandardize::Reionizer *reionizerFromData(const std::string &data,
|
||||
python::object chargeCorrections) {
|
||||
std::istringstream sstr(data);
|
||||
@@ -40,6 +43,10 @@ MolStandardize::Reionizer *reionizerFromData(const std::string &data,
|
||||
return res;
|
||||
}
|
||||
|
||||
void unchargeInPlaceHelper(MolStandardize::Uncharger &self, ROMol &mol) {
|
||||
self.unchargeInPlace(static_cast<RWMol &>(mol));
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
struct charge_wrapper {
|
||||
@@ -65,7 +72,10 @@ struct charge_wrapper {
|
||||
std::vector<MolStandardize::ChargeCorrection>>())
|
||||
.def("reionize", &reionizeHelper,
|
||||
(python::arg("self"), python::arg("mol")), "",
|
||||
python::return_value_policy<python::manage_new_object>());
|
||||
python::return_value_policy<python::manage_new_object>())
|
||||
.def("reionizeInPlace", reionizeInPlaceHelper,
|
||||
(python::arg("self"), python::arg("mol")),
|
||||
"modifies the input molecule");
|
||||
|
||||
python::def("ReionizerFromData", &reionizerFromData,
|
||||
(python::arg("paramData"),
|
||||
@@ -78,7 +88,10 @@ struct charge_wrapper {
|
||||
python::arg("canonicalOrder") = true)))
|
||||
.def("uncharge", &MolStandardize::Uncharger::uncharge,
|
||||
(python::arg("self"), python::arg("mol")), "",
|
||||
python::return_value_policy<python::manage_new_object>());
|
||||
python::return_value_policy<python::manage_new_object>())
|
||||
.def("unchargeInPlace", unchargeInPlaceHelper,
|
||||
(python::arg("self"), python::arg("mol")),
|
||||
"modifies the input molecule");
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -21,6 +21,10 @@ ROMol *removeHelper(MolStandardize::FragmentRemover &self, const ROMol &mol) {
|
||||
return self.remove(mol);
|
||||
}
|
||||
|
||||
void removeInPlaceHelper(MolStandardize::FragmentRemover &self, ROMol &mol) {
|
||||
self.removeInPlace(static_cast<RWMol &>(mol));
|
||||
}
|
||||
|
||||
ROMol *chooseHelper(MolStandardize::LargestFragmentChooser &self,
|
||||
const ROMol &mol) {
|
||||
return self.choose(mol);
|
||||
@@ -50,7 +54,10 @@ struct fragment_wrapper {
|
||||
python::arg("leave_last") = true,
|
||||
python::arg("skip_if_all_match") = false)))
|
||||
.def("remove", &removeHelper, (python::arg("self"), python::arg("mol")),
|
||||
"", python::return_value_policy<python::manage_new_object>());
|
||||
"", python::return_value_policy<python::manage_new_object>())
|
||||
.def("removeInPlace", &removeInPlaceHelper,
|
||||
(python::arg("self"), python::arg("mol")),
|
||||
"modifies the molecule in place");
|
||||
|
||||
python::def(
|
||||
"FragmentRemoverFromData", &removerFromParams,
|
||||
|
||||
@@ -44,6 +44,9 @@ class MetalDisconnectorWrap {
|
||||
RDKit::ROMol *disconnect(const RDKit::ROMol &mol) {
|
||||
return md_->disconnect(mol);
|
||||
}
|
||||
void disconnectInPlace(RDKit::ROMol &mol) {
|
||||
return md_->disconnectInPlace(static_cast<RDKit::RWMol &>(mol));
|
||||
}
|
||||
|
||||
private:
|
||||
std::unique_ptr<RDKit::MolStandardize::MetalDisconnector> md_;
|
||||
@@ -118,8 +121,12 @@ struct metal_wrapper {
|
||||
"Set the query molecule defining the metals to disconnect if attached"
|
||||
" to Nitrogen, Oxygen or Fluorine.")
|
||||
.def("Disconnect", &MetalDisconnectorWrap::disconnect,
|
||||
(python::arg("self"), python::arg("mol")), docString.c_str(),
|
||||
python::return_value_policy<python::manage_new_object>());
|
||||
(python::arg("self"), python::arg("mol")),
|
||||
"performs the disconnection",
|
||||
python::return_value_policy<python::manage_new_object>())
|
||||
.def("DisconnectInPlace", &MetalDisconnectorWrap::disconnectInPlace,
|
||||
(python::arg("self"), python::arg("mol")),
|
||||
"performs the disconnection, modifies the input molecule");
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -22,6 +22,10 @@ ROMol *normalizeHelper(MolStandardize::Normalizer &self, const ROMol &mol) {
|
||||
return self.normalize(mol);
|
||||
}
|
||||
|
||||
void normalizeInPlaceHelper(MolStandardize::Normalizer &self, ROMol &mol) {
|
||||
self.normalizeInPlace(static_cast<RWMol &>(mol));
|
||||
}
|
||||
|
||||
MolStandardize::Normalizer *normalizerFromDataAndParams(
|
||||
const std::string &data, const MolStandardize::CleanupParameters ¶ms) {
|
||||
std::istringstream sstr(data);
|
||||
@@ -44,7 +48,10 @@ struct normalize_wrapper {
|
||||
python::args("normalizeFilename", "maxRestarts")))
|
||||
.def("normalize", &normalizeHelper,
|
||||
(python::arg("self"), python::arg("mol")), "",
|
||||
python::return_value_policy<python::manage_new_object>());
|
||||
python::return_value_policy<python::manage_new_object>())
|
||||
.def("normalizeInPlace", &normalizeInPlaceHelper,
|
||||
(python::arg("self"), python::arg("mol")),
|
||||
"modifies the input molecule");
|
||||
python::def(
|
||||
"NormalizerFromData", &normalizerFromDataAndParams,
|
||||
(python::arg("paramData"), python::arg("params")),
|
||||
|
||||
@@ -59,6 +59,35 @@ RDKit::ROMol *canonicalTautomerHelper(const RDKit::ROMol *mol,
|
||||
return msHelper(mol, params, RDKit::MolStandardize::canonicalTautomer);
|
||||
}
|
||||
|
||||
template <typename FUNCTYPE>
|
||||
void inPlaceHelper(RDKit::ROMol *mol, python::object params, FUNCTYPE func) {
|
||||
if (!mol) {
|
||||
throw_value_error("Molecule is None");
|
||||
}
|
||||
const RDKit::MolStandardize::CleanupParameters *ps =
|
||||
&RDKit::MolStandardize::defaultCleanupParameters;
|
||||
if (params) {
|
||||
ps = python::extract<RDKit::MolStandardize::CleanupParameters *>(params);
|
||||
}
|
||||
func(*static_cast<RDKit::RWMol *>(mol), *ps);
|
||||
}
|
||||
|
||||
void cleanupInPlaceHelper(RDKit::ROMol *mol, python::object params) {
|
||||
inPlaceHelper(mol, params, RDKit::MolStandardize::cleanupInPlace);
|
||||
}
|
||||
|
||||
void normalizeInPlaceHelper(RDKit::ROMol *mol, python::object params) {
|
||||
inPlaceHelper(mol, params, RDKit::MolStandardize::normalizeInPlace);
|
||||
}
|
||||
|
||||
void reionizeInPlaceHelper(RDKit::ROMol *mol, python::object params) {
|
||||
inPlaceHelper(mol, params, RDKit::MolStandardize::reionizeInPlace);
|
||||
}
|
||||
|
||||
void removeFragmentsInPlaceHelper(RDKit::ROMol *mol, python::object params) {
|
||||
inPlaceHelper(mol, params, RDKit::MolStandardize::removeFragmentsInPlace);
|
||||
}
|
||||
|
||||
template <typename FUNCTYPE>
|
||||
RDKit::ROMol *parentHelper(const RDKit::ROMol *mol, python::object params,
|
||||
bool skip_standardize, FUNCTYPE func) {
|
||||
@@ -118,6 +147,19 @@ RDKit::ROMol *disconnectOrganometallicsHelper(RDKit::ROMol &mol,
|
||||
return RDKit::MolStandardize::disconnectOrganometallics(mol);
|
||||
}
|
||||
}
|
||||
void disconnectOrganometallicsInPlaceHelper(RDKit::ROMol *mol,
|
||||
python::object params) {
|
||||
if (params) {
|
||||
RDKit::MolStandardize::MetalDisconnectorOptions *mdo =
|
||||
python::extract<RDKit::MolStandardize::MetalDisconnectorOptions *>(
|
||||
params);
|
||||
return RDKit::MolStandardize::disconnectOrganometallicsInPlace(
|
||||
*static_cast<RDKit::RWMol *>(mol), *mdo);
|
||||
} else {
|
||||
return RDKit::MolStandardize::disconnectOrganometallicsInPlace(
|
||||
*static_cast<RDKit::RWMol *>(mol));
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
@@ -208,6 +250,10 @@ BOOST_PYTHON_MODULE(rdMolStandardize) {
|
||||
(python::arg("mol"), python::arg("params") = python::object()),
|
||||
docString.c_str(),
|
||||
python::return_value_policy<python::manage_new_object>());
|
||||
docString = "Standardizes a molecule in place";
|
||||
python::def("CleanupInPlace", cleanupInPlaceHelper,
|
||||
(python::arg("mol"), python::arg("params") = python::object()),
|
||||
docString.c_str());
|
||||
docString = "Convenience function for standardizing a SMILES";
|
||||
python::def("StandardizeSmiles", RDKit::MolStandardize::standardizeSmiles,
|
||||
(python::arg("smiles")), docString.c_str());
|
||||
@@ -258,16 +304,32 @@ BOOST_PYTHON_MODULE(rdMolStandardize) {
|
||||
(python::arg("mol"), python::arg("params") = python::object()),
|
||||
docString.c_str(),
|
||||
python::return_value_policy<python::manage_new_object>());
|
||||
docString =
|
||||
"Applies a series of standard transformations to correct functional "
|
||||
"groups and recombine charges, modifies the input molecule";
|
||||
python::def("NormalizeInPlace", normalizeInPlaceHelper,
|
||||
(python::arg("mol"), python::arg("params") = python::object()),
|
||||
docString.c_str());
|
||||
docString = "Ensures the strongest acid groups are charged first";
|
||||
python::def("Reionize", reionizeHelper,
|
||||
(python::arg("mol"), python::arg("params") = python::object()),
|
||||
docString.c_str(),
|
||||
python::return_value_policy<python::manage_new_object>());
|
||||
docString =
|
||||
"Ensures the strongest acid groups are charged first, modifies the input molecule";
|
||||
python::def("ReionizeInPlace", reionizeInPlaceHelper,
|
||||
(python::arg("mol"), python::arg("params") = python::object()),
|
||||
docString.c_str());
|
||||
docString = "Removes fragments from the molecule";
|
||||
python::def("RemoveFragments", removeFragsHelper,
|
||||
(python::arg("mol"), python::arg("params") = python::object()),
|
||||
docString.c_str(),
|
||||
python::return_value_policy<python::manage_new_object>());
|
||||
docString =
|
||||
"Removes fragments from the molecule, modifies the input molecule";
|
||||
python::def("RemoveFragmentsInPlace", removeFragmentsInPlaceHelper,
|
||||
(python::arg("mol"), python::arg("params") = python::object()),
|
||||
docString.c_str());
|
||||
docString = "Returns the canonical tautomer for the molecule";
|
||||
python::def("CanonicalTautomer", canonicalTautomerHelper,
|
||||
(python::arg("mol"), python::arg("params") = python::object()),
|
||||
@@ -280,6 +342,13 @@ BOOST_PYTHON_MODULE(rdMolStandardize) {
|
||||
(python::arg("mol"), python::arg("params") = python::object()),
|
||||
docString.c_str(),
|
||||
python::return_value_policy<python::manage_new_object>());
|
||||
docString =
|
||||
"Disconnects the molecule using the organometallics"
|
||||
" rules, modifies the input molecule";
|
||||
python::def("DisconnectOrganometallicsInPlace",
|
||||
disconnectOrganometallicsInPlaceHelper,
|
||||
(python::arg("mol"), python::arg("params") = python::object()),
|
||||
docString.c_str());
|
||||
|
||||
wrap_validate();
|
||||
wrap_charge();
|
||||
|
||||
@@ -72,8 +72,11 @@ class TestCase(unittest.TestCase):
|
||||
mol = Chem.MolFromSmiles("C1(CCCCC1)[Zn]Br")
|
||||
md = rdMolStandardize.MetalDisconnector()
|
||||
nm = md.Disconnect(mol)
|
||||
# Metal.MetalDisconnector.Disconnect(mol)
|
||||
self.assertEqual(Chem.MolToSmiles(nm), "[Br-].[CH-]1CCCCC1.[Zn+2]")
|
||||
nm = Chem.Mol(mol)
|
||||
md.DisconnectInPlace(nm)
|
||||
self.assertEqual(Chem.MolToSmiles(nm), "[Br-].[CH-]1CCCCC1.[Zn+2]")
|
||||
|
||||
|
||||
# test user defined metal_nof
|
||||
md.SetMetalNof(
|
||||
@@ -123,6 +126,11 @@ class TestCase(unittest.TestCase):
|
||||
nm = reionizer.reionize(mol)
|
||||
self.assertEqual(Chem.MolToSmiles(nm), "O=S(O)c1ccc(S(=O)(=O)[O-])cc1")
|
||||
|
||||
nm = Chem.Mol(mol)
|
||||
reionizer.reionizeInPlace(nm)
|
||||
self.assertEqual(Chem.MolToSmiles(nm), "O=S(O)c1ccc(S(=O)(=O)[O-])cc1")
|
||||
|
||||
|
||||
# try reionize with another acid base pair library without the right
|
||||
# pairs
|
||||
abfile = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'MolStandardize', 'test_data',
|
||||
@@ -136,15 +144,26 @@ class TestCase(unittest.TestCase):
|
||||
mol3 = Chem.MolFromSmiles("O=C([O-])c1ccccc1")
|
||||
nm3 = uncharger.uncharge(mol3)
|
||||
self.assertEqual(Chem.MolToSmiles(nm3), "O=C(O)c1ccccc1")
|
||||
nm3 = Chem.Mol(mol3)
|
||||
uncharger.unchargeInPlace(nm3)
|
||||
self.assertEqual(Chem.MolToSmiles(nm3), "O=C(O)c1ccccc1")
|
||||
|
||||
# test canonical Uncharger
|
||||
uncharger = rdMolStandardize.Uncharger(canonicalOrder=False)
|
||||
mol3 = Chem.MolFromSmiles("C[N+](C)(C)CC(C(=O)[O-])CC(=O)[O-]")
|
||||
nm3 = uncharger.uncharge(mol3)
|
||||
self.assertEqual(Chem.MolToSmiles(nm3), "C[N+](C)(C)CC(CC(=O)[O-])C(=O)O")
|
||||
nm3 = Chem.Mol(mol3)
|
||||
uncharger.unchargeInPlace(nm3)
|
||||
self.assertEqual(Chem.MolToSmiles(nm3), "C[N+](C)(C)CC(CC(=O)[O-])C(=O)O")
|
||||
|
||||
|
||||
uncharger = rdMolStandardize.Uncharger(canonicalOrder=True)
|
||||
nm3 = uncharger.uncharge(mol3)
|
||||
self.assertEqual(Chem.MolToSmiles(nm3), "C[N+](C)(C)CC(CC(=O)O)C(=O)[O-]")
|
||||
nm3 = Chem.Mol(mol3)
|
||||
uncharger.unchargeInPlace(nm3)
|
||||
self.assertEqual(Chem.MolToSmiles(nm3), "C[N+](C)(C)CC(CC(=O)O)C(=O)[O-]")
|
||||
|
||||
def test7Fragment(self):
|
||||
fragremover = rdMolStandardize.FragmentRemover()
|
||||
@@ -152,6 +171,10 @@ class TestCase(unittest.TestCase):
|
||||
nm = fragremover.remove(mol)
|
||||
self.assertEqual(Chem.MolToSmiles(nm), "CN(C)C")
|
||||
|
||||
nm = Chem.Mol(mol)
|
||||
fragremover.removeInPlace(nm)
|
||||
self.assertEqual(Chem.MolToSmiles(nm), "CN(C)C")
|
||||
|
||||
lfragchooser = rdMolStandardize.LargestFragmentChooser()
|
||||
mol2 = Chem.MolFromSmiles("[N+](=O)([O-])[O-].[CH3+]")
|
||||
nm2 = lfragchooser.choose(mol2)
|
||||
@@ -165,6 +188,9 @@ class TestCase(unittest.TestCase):
|
||||
mol = Chem.MolFromSmiles("[Na+].Cl.Cl.Br")
|
||||
nm = fragremover.remove(mol)
|
||||
self.assertEqual(nm.GetNumAtoms(), mol.GetNumAtoms())
|
||||
nm = Chem.Mol(mol)
|
||||
fragremover.removeInPlace(mol)
|
||||
self.assertEqual(nm.GetNumAtoms(), mol.GetNumAtoms())
|
||||
|
||||
smi3 = "CNC[C@@H]([C@H]([C@@H]([C@@H](CO)O)O)O)O.c1cc2c(cc1C(=O)O)oc(n2)c3cc(cc(c3)Cl)Cl"
|
||||
|
||||
@@ -231,6 +257,10 @@ class TestCase(unittest.TestCase):
|
||||
mol = Chem.MolFromSmiles("C[n+]1ccccc1[O-]")
|
||||
nm = normalizer.normalize(mol)
|
||||
self.assertEqual(Chem.MolToSmiles(nm), "Cn1ccccc1=O")
|
||||
nm = Chem.Mol(mol)
|
||||
normalizer.normalizeInPlace(nm)
|
||||
self.assertEqual(Chem.MolToSmiles(nm), "Cn1ccccc1=O")
|
||||
|
||||
|
||||
def test9Validate(self):
|
||||
vm = rdMolStandardize.RDKitValidation()
|
||||
@@ -958,6 +988,28 @@ chlorine [Cl]
|
||||
nm = rdMolStandardize.RemoveFragments(m)
|
||||
self.assertEqual(Chem.MolToSmiles(nm), "CC")
|
||||
|
||||
def test22StandardizeInPlace(self):
|
||||
m = Chem.MolFromSmiles("O=N(=O)-C(O[Fe])C(C(=O)O)C-N(=O)=O")
|
||||
rdMolStandardize.CleanupInPlace(m)
|
||||
self.assertEqual(Chem.MolToSmiles(m),"O=C([O-])C(C[N+](=O)[O-])C(O)[N+](=O)[O-].[Fe+]")
|
||||
|
||||
m = Chem.MolFromSmiles('[F-].[Cl-].[Br-].CC')
|
||||
rdMolStandardize.RemoveFragmentsInPlace(m)
|
||||
self.assertEqual(Chem.MolToSmiles(m), "CC")
|
||||
|
||||
m = Chem.MolFromSmiles('C1=C(C=CC(=C1)[S]([O-])=O)[S](O)(=O)=O')
|
||||
rdMolStandardize.ReionizeInPlace(m)
|
||||
self.assertEqual(Chem.MolToSmiles(m), "O=S(O)c1ccc(S(=O)(=O)[O-])cc1")
|
||||
|
||||
m = Chem.MolFromSmiles('CCO[Fe]')
|
||||
rdMolStandardize.DisconnectOrganometallicsInPlace(m)
|
||||
self.assertEqual(Chem.MolToSmiles(m), "CCO.[Fe]")
|
||||
|
||||
m = Chem.MolFromSmiles(r"C[N+](C)=C\C=C\[O-]")
|
||||
rdMolStandardize.NormalizeInPlace(m)
|
||||
self.assertEqual(Chem.MolToSmiles(m), "CN(C)C=CC=O")
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
||||
@@ -1038,3 +1038,79 @@ TEST_CASE("Github 5784: kekulization error when enumerating tautomers") {
|
||||
REQUIRE(res);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("in place operations") {
|
||||
SECTION("reionizer") {
|
||||
MolStandardize::Reionizer reion;
|
||||
auto m = "c1cc([O-])cc(C(=O)O)c1"_smiles;
|
||||
REQUIRE(m);
|
||||
reion.reionizeInPlace(*m);
|
||||
CHECK(MolToSmiles(*m) == "O=C([O-])c1cccc(O)c1");
|
||||
}
|
||||
SECTION("reionize") {
|
||||
auto m = "c1cc([O-])cc(C(=O)O)c1"_smiles;
|
||||
REQUIRE(m);
|
||||
MolStandardize::reionizeInPlace(*m);
|
||||
CHECK(MolToSmiles(*m) == "O=C([O-])c1cccc(O)c1");
|
||||
}
|
||||
SECTION("uncharge") {
|
||||
MolStandardize::Uncharger unchg;
|
||||
auto m = "c1cc([O-])cc(C(=O)O)c1"_smiles;
|
||||
REQUIRE(m);
|
||||
unchg.unchargeInPlace(*m);
|
||||
CHECK(MolToSmiles(*m) == "O=C(O)c1cccc(O)c1");
|
||||
}
|
||||
SECTION("normalizer") {
|
||||
MolStandardize::Normalizer nrml;
|
||||
SmilesParserParams ps;
|
||||
ps.sanitize = false;
|
||||
std::unique_ptr<RWMol> m{SmilesToMol("O=N(=O)-CC-N(=O)=O", ps)};
|
||||
REQUIRE(m);
|
||||
nrml.normalizeInPlace(*m);
|
||||
CHECK(MolToSmiles(*m) == "O=[N+]([O-])CC[N+](=O)[O-]");
|
||||
m.reset(SmilesToMol("OCCN", ps));
|
||||
REQUIRE(m);
|
||||
nrml.normalizeInPlace(*m);
|
||||
CHECK(MolToSmiles(*m) == "NCCO");
|
||||
}
|
||||
SECTION("normalize") {
|
||||
SmilesParserParams ps;
|
||||
ps.sanitize = false;
|
||||
std::unique_ptr<RWMol> m{SmilesToMol("O=N(=O)-CC-N(=O)=O", ps)};
|
||||
REQUIRE(m);
|
||||
MolStandardize::normalizeInPlace(*m);
|
||||
CHECK(MolToSmiles(*m) == "O=[N+]([O-])CC[N+](=O)[O-]");
|
||||
m.reset(SmilesToMol("OCCN", ps));
|
||||
REQUIRE(m);
|
||||
MolStandardize::normalizeInPlace(*m);
|
||||
CHECK(MolToSmiles(*m) == "NCCO");
|
||||
}
|
||||
SECTION("FragmentRemover") {
|
||||
auto m = "CCCC.Cl.[Na]"_smiles;
|
||||
REQUIRE(m);
|
||||
MolStandardize::FragmentRemover fragremover;
|
||||
RWMol cp1(*m);
|
||||
fragremover.removeInPlace(cp1);
|
||||
CHECK(MolToSmiles(cp1) == "CCCC");
|
||||
RWMol cp2(*m);
|
||||
MolStandardize::removeFragmentsInPlace(cp2);
|
||||
CHECK(MolToSmiles(cp2) == "CCCC");
|
||||
}
|
||||
SECTION("cleanup") {
|
||||
SmilesParserParams ps;
|
||||
ps.sanitize = false;
|
||||
// silly ugly example which ensures disconnection, normalization, and
|
||||
// reionization
|
||||
std::unique_ptr<RWMol> m{
|
||||
SmilesToMol("O=N(=O)-C(O[Fe])C(C(=O)O)C-N(=O)=O", ps)};
|
||||
REQUIRE(m);
|
||||
MolStandardize::cleanupInPlace(*m);
|
||||
CHECK(MolToSmiles(*m) == "O=C([O-])C(C[N+](=O)[O-])C(O)[N+](=O)[O-].[Fe+]");
|
||||
}
|
||||
SECTION("disconnect organometallics") {
|
||||
auto m("[CH2-](->[K+])c1ccccc1"_smiles);
|
||||
TEST_ASSERT(m);
|
||||
MolStandardize::disconnectOrganometallicsInPlace(*m);
|
||||
TEST_ASSERT(MolToSmiles(*m) == "[CH2-]c1ccccc1.[K+]");
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user