Fix/sanitizerxn merge hs (#1367)

* Fixes exceptions to use correct functionName

* Deprecates ChemDrawAdjustParams name sciquest integration/eln searching
 doesn’t map to this behavior.  renamed  MatchOnlyAtRgroupAdjustParams

* Python wrapper for deprecated API

* fixHs now is more intelligent about merging hs.

The only cases where hs can’t be merged is if they are mapped
to a non hydrogen in the product.

* Updates deprecated ChemDrawAdjustParams
This commit is contained in:
Brian Kelley
2017-03-20 10:30:36 -04:00
committed by Greg Landrum
parent 1d04eb3351
commit a02f2b0bb1
6 changed files with 493 additions and 10 deletions

View File

@@ -317,7 +317,7 @@ bool ChemicalReaction::validate(unsigned int &numWarnings,
bool isMoleculeReactantOfReaction(const ChemicalReaction &rxn, const ROMol &mol,
unsigned int &which) {
if (!rxn.isInitialized()) {
throw ChemicalReactionException("initMatchers() must be called first");
throw ChemicalReactionException("initReactantMatchers() must be called first");
}
which = 0;
for (MOL_SPTR_VECT::const_iterator iter = rxn.beginReactantTemplates();
@@ -338,7 +338,7 @@ bool isMoleculeReactantOfReaction(const ChemicalReaction &rxn,
bool isMoleculeProductOfReaction(const ChemicalReaction &rxn, const ROMol &mol,
unsigned int &which) {
if (!rxn.isInitialized()) {
throw ChemicalReactionException("initMatchers() must be called first");
throw ChemicalReactionException("initReactantMatchers() must be called first");
}
which = 0;
for (MOL_SPTR_VECT::const_iterator iter = rxn.beginProductTemplates();
@@ -359,7 +359,7 @@ bool isMoleculeProductOfReaction(const ChemicalReaction &rxn,
bool isMoleculeAgentOfReaction(const ChemicalReaction &rxn, const ROMol &mol,
unsigned int &which) {
if (!rxn.isInitialized()) {
throw ChemicalReactionException("initMatchers() must be called first");
throw ChemicalReactionException("initReactantMatchers() must be called first");
}
which = 0;
for (MOL_SPTR_VECT::const_iterator iter = rxn.beginAgentTemplates();
@@ -398,7 +398,7 @@ void addRecursiveQueriesToReaction(
std::vector<std::vector<std::pair<unsigned int, std::string> > >
*reactantLabels) {
if (!rxn.isInitialized()) {
throw ChemicalReactionException("initMatchers() must be called first");
throw ChemicalReactionException("initReactantMatchers() must be called first");
}
if (reactantLabels != NULL) {
@@ -576,7 +576,7 @@ bool getMappedAtoms(T &rIt, std::map<int, const Atom *> &mappedAtoms) {
VECT_INT_VECT getReactingAtoms(const ChemicalReaction &rxn,
bool mappedAtomsOnly) {
if (!rxn.isInitialized()) {
throw ChemicalReactionException("initMatchers() must be called first");
throw ChemicalReactionException("initReactantMatchers() must be called first");
}
VECT_INT_VECT res;
res.resize(rxn.getNumReactantTemplates());

View File

@@ -255,7 +255,6 @@ void fixRGroups(ChemicalReaction &rxn) {
// if we have query atoms without rlabels, make proper rlabels if possible
// ensure that every rlabel in the reactant has one in the product
void fixAtomMaps(ChemicalReaction &rxn) {
int max_atom_map = getMaxProp<int>(
rxn,
@@ -324,15 +323,67 @@ void fixReactantTemplateAromaticity(ChemicalReaction &rxn) {
}
void fixHs(ChemicalReaction &rxn) {
{
// if mapped Hydrogens in reactants area mapped to heavy atoms
// keep mappings, in all other cases remove them.
// this allows us to merge query hydrogens atoms
std::map<int, bool> mappedToNonHeavyProductAtom;
for(MOL_SPTR_VECT::iterator it = rxn.beginProductTemplates();
it != rxn.endProductTemplates();
++it) {
int atomMap = 0;
for (ROMol::AtomIterator atIt = (*it)->beginAtoms();
atIt != (*it)->endAtoms();
++atIt) {
Atom *atom = (*atIt);
if (atom->getAtomicNum() != 1) { // hydrogen
if (atom->getPropIfPresent(common_properties::molAtomMapNumber, atomMap)) {
if(atomMap) {
mappedToNonHeavyProductAtom[atomMap] = true;
}
}
}
}
}
for(MOL_SPTR_VECT::iterator it = rxn.beginReactantTemplates();
it != rxn.endReactantTemplates();
++it) {
int atomMap = 0;
for (ROMol::AtomIterator atIt = (*it)->beginAtoms();
atIt != (*it)->endAtoms();
++atIt) {
Atom *atom = (*atIt);
if (atom->getAtomicNum() == 1) { // hydrogen
if (atom->getPropIfPresent(common_properties::molAtomMapNumber, atomMap)) {
if(atomMap) {
if(mappedToNonHeavyProductAtom.find(atomMap) ==
mappedToNonHeavyProductAtom.end()) {
atom->clearProp(common_properties::molAtomMapNumber);
} else {
BOOST_LOG(rdWarningLog) <<
"Reaction has explicit hydrogens, reactants will need explicit hydrogens (addHs)"
<< std::endl;
}
}
}
}
}
}
}
const bool mergeUnmappedOnly = true;
for(MOL_SPTR_VECT::iterator it = rxn.beginReactantTemplates();
it != rxn.endReactantTemplates();
++it) {
RWMol * rw = dynamic_cast<RWMol*>(it->get());
if (rw)
if (rw) {
MolOps::mergeQueryHs(*rw, mergeUnmappedOnly);
}
else
PRECONDITION(rw, "Oops, not really a RWMol?");
PRECONDITION(rw, "Oops, not really an RWMol?");
}
}

View File

@@ -82,7 +82,22 @@ inline const MolOps::AdjustQueryParameters DefaultRxnAdjustParams() {
}
// Default adjustment parameters for ChemDraw style matching of reagents
// -- deprecated - renamed MatchOnlyAtRgroupsAdjustParams
// -- this doesn't match sciquest style searching
inline const MolOps::AdjustQueryParameters ChemDrawRxnAdjustParams() {
BOOST_LOG(rdWarningLog) <<
" deprecated -- please use MatchOnlyAtRgroupsAdjustParams instead" << std::endl;
MolOps::AdjustQueryParameters params;
params.adjustDegree = true;
params.adjustDegreeFlags = MolOps::ADJUST_IGNOREDUMMIES;
params.adjustRingCount = false;
params.adjustRingCountFlags = MolOps::ADJUST_IGNORENONE;
params.makeDummiesQueries = false;
params.aromatizeIfPossible = true;
return params;
}
inline const MolOps::AdjustQueryParameters MatchOnlyAtRgroupsAdjustParams() {
MolOps::AdjustQueryParameters params;
params.adjustDegree = true;
params.adjustDegreeFlags = MolOps::ADJUST_IGNOREDUMMIES;

View File

@@ -816,7 +816,10 @@ Sample Usage:\n\
"Returns the default adjustment parameters for reactant templates");
python::def("GetChemDrawRxnAdjustParams", RDKit::RxnOps::ChemDrawRxnAdjustParams,
"Returns the chemdraw style adjustment parameters for reactant templates");
"(deprecated, see MatchOnlyAtRgroupsAdjustParams)\n\tReturns the chemdraw style adjustment parameters for reactant templates");
python::def("MatchOnlyAtRgroupsAdjustParams", RDKit::RxnOps::MatchOnlyAtRgroupsAdjustParams,
"Only match at the specified rgroup locations in the reactant templates");
std::string docstring = "feed me";
python::def(

View File

@@ -43,6 +43,7 @@
#include <GraphMol/ChemReactions/ReactionPickler.h>
#include <GraphMol/ChemReactions/ReactionRunner.h>
#include <GraphMol/ChemReactions/ReactionUtils.h>
#include <GraphMol/ChemReactions/SanitizeRxn.h>
#include <GraphMol/FileParsers/FileParsers.h>
#include <GraphMol/Atom.h>
#include <fstream>
@@ -5985,6 +5986,410 @@ void test64Github1266() {
BOOST_LOG(rdInfoLog) << "Done" << std::endl;
}
void test65SanitizeUnmappedHs() {
BOOST_LOG(rdInfoLog) << "-------------------------------------" << std::endl;
BOOST_LOG(rdInfoLog) << "Tests sanitize reaction (unmapped Hs) "
<< std::endl;
const std::string unmappedHs = \
"$RXN\n" \
"\n" \
" Marvin 031701170941\n" \
"\n" \
" 1 1\n" \
"$MOL\n" \
"\n" \
" Mrv1583 03171709412D \n" \
"\n" \
" 16 16 0 0 0 0 999 V2000\n" \
" -2.5620 0.5265 0.0000 C 0 0 0 0 0 0 0 0 0 1 0 0\n" \
" -2.4235 -0.2868 0.0000 C 0 0 0 0 0 0 0 0 0 2 0 0\n" \
" -3.0587 -0.8133 0.0000 C 0 0 0 0 0 0 0 0 0 3 0 0\n" \
" -3.8322 -0.5265 0.0000 C 0 0 0 0 0 0 0 0 0 4 0 0\n" \
" -3.3355 0.8133 0.0000 C 0 0 0 0 0 0 0 0 0 6 0 0\n" \
" -1.7581 0.7120 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0\n" \
" -3.0942 1.6022 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0\n" \
" -4.5124 0.9090 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0\n" \
" -4.3663 -1.1553 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0\n" \
" -3.3000 -1.6022 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0\n" \
" -3.8242 1.4780 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0\n" \
" -2.2307 1.2821 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0\n" \
" -3.9706 0.2868 0.0000 C 0 0 0 0 0 0 0 0 0 5 0 0\n" \
" -2.5700 -1.4780 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0\n" \
" -1.6500 -0.5736 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0\n" \
" -4.6278 -0.3082 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0\n" \
" 1 2 1 0 0 0 0\n" \
" 5 1 1 0 0 0 0\n" \
" 1 6 1 0 0 0 0\n" \
" 1 12 1 0 0 0 0\n" \
" 2 3 1 0 0 0 0\n" \
" 2 15 1 0 0 0 0\n" \
" 3 4 1 0 0 0 0\n" \
" 3 10 1 0 0 0 0\n" \
" 3 14 1 0 0 0 0\n" \
" 4 9 1 0 0 0 0\n" \
" 4 13 1 0 0 0 0\n" \
" 4 16 1 0 0 0 0\n" \
" 5 7 1 0 0 0 0\n" \
" 5 11 1 0 0 0 0\n" \
" 13 5 1 0 0 0 0\n" \
" 13 8 1 0 0 0 0\n" \
"M END\n" \
"$MOL\n" \
"\n" \
" Mrv1583 03171709412D \n" \
"\n" \
" 6 6 0 0 0 0 999 V2000\n" \
" 3.8966 0.8250 0.0000 C 0 0 0 0 0 0 0 0 0 1 0 0\n" \
" 4.6111 0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 2 0 0\n" \
" 4.6111 -0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 3 0 0\n" \
" 3.8966 -0.8250 0.0000 C 0 0 0 0 0 0 0 0 0 4 0 0\n" \
" 3.1821 -0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 5 0 0\n" \
" 3.1821 0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 6 0 0\n" \
" 1 2 1 0 0 0 0\n" \
" 6 1 1 0 0 0 0\n" \
" 2 3 1 0 0 0 0\n" \
" 3 4 1 0 0 0 0\n" \
" 4 5 1 0 0 0 0\n" \
" 5 6 1 0 0 0 0\n" \
"M END";
ChemicalReaction *rxn = RxnBlockToChemicalReaction(unmappedHs);
TEST_ASSERT(rxn);
rxn->initReactantMatchers();
MOL_SPTR_VECT reacts1, hreacts1, reacts2, hreacts2;
std::vector<MOL_SPTR_VECT> prods;
reacts1.push_back(ROMOL_SPTR(SmilesToMol("C1CCCCC1")));
hreacts1.push_back(ROMOL_SPTR(MolOps::addHs(*reacts1[0].get())));
reacts2.push_back(ROMOL_SPTR(SmilesToMol("C1CCCCC1Cl")));
hreacts2.push_back(ROMOL_SPTR(MolOps::addHs(*reacts2[0].get())));
// test with and without AddHs
prods = rxn->runReactants(reacts1);
TEST_ASSERT(prods.size() == 0);
prods = rxn->runReactants(hreacts1);
TEST_ASSERT(prods.size() == 768);
prods = rxn->runReactants(reacts2);
TEST_ASSERT(prods.size() == 0);
prods = rxn->runReactants(hreacts2);
TEST_ASSERT(prods.size() == 128);
// Test after sanitization (way fewer matches than with AddHs..)
RxnOps::sanitizeRxn(*rxn);
prods = rxn->runReactants(reacts1);
TEST_ASSERT(prods.size() == 12);
prods = rxn->runReactants(reacts2);
TEST_ASSERT(prods.size() == 4);
delete rxn;
BOOST_LOG(rdInfoLog) << "Done" << std::endl;
}
void test66SanitizeMappedHs() {
BOOST_LOG(rdInfoLog) << "-------------------------------------" << std::endl;
BOOST_LOG(rdInfoLog) << "Tests sanitize reaction (mapped hs in react but not prod) "
<< std::endl;
// H's are mapped in reactant but do not exist in product,
// they can be merged
const std::string unmappedHs = \
"$RXN\n" \
"\n" \
" Marvin 031701170941\n" \
"\n" \
" 1 1\n" \
"$MOL\n" \
"\n" \
" Mrv1583 03171709412D \n" \
"\n" \
" 16 16 0 0 0 0 999 V2000\n" \
" -2.5620 0.5265 0.0000 C 0 0 0 0 0 0 0 0 0 1 0 0\n" \
" -2.4235 -0.2868 0.0000 C 0 0 0 0 0 0 0 0 0 2 0 0\n" \
" -3.0587 -0.8133 0.0000 C 0 0 0 0 0 0 0 0 0 3 0 0\n" \
" -3.8322 -0.5265 0.0000 C 0 0 0 0 0 0 0 0 0 4 0 0\n" \
" -3.3355 0.8133 0.0000 C 0 0 0 0 0 0 0 0 0 6 0 0\n" \
" -1.7581 0.7120 0.0000 H 0 0 0 0 0 0 0 0 0 7 0 0\n" \
" -3.0942 1.6022 0.0000 H 0 0 0 0 0 0 0 0 0 8 0 0\n" \
" -4.5124 0.9090 0.0000 H 0 0 0 0 0 0 0 0 0 9 0 0\n" \
" -4.3663 -1.1553 0.0000 H 0 0 0 0 0 0 0 0 0 10 0 0\n" \
" -3.3000 -1.6022 0.0000 H 0 0 0 0 0 0 0 0 0 11 0 0\n" \
" -3.8242 1.4780 0.0000 H 0 0 0 0 0 0 0 0 0 12 0 0\n" \
" -2.2307 1.2821 0.0000 H 0 0 0 0 0 0 0 0 0 13 0 0\n" \
" -3.9706 0.2868 0.0000 C 0 0 0 0 0 0 0 0 0 5 0 0\n" \
" -2.5700 -1.4780 0.0000 H 0 0 0 0 0 0 0 0 0 14 0 0\n" \
" -1.6500 -0.5736 0.0000 H 0 0 0 0 0 0 0 0 0 15 0 0\n" \
" -4.6278 -0.3082 0.0000 H 0 0 0 0 0 0 0 0 0 16 0 0\n" \
" 1 2 1 0 0 0 0\n" \
" 5 1 1 0 0 0 0\n" \
" 1 6 1 0 0 0 0\n" \
" 1 12 1 0 0 0 0\n" \
" 2 3 1 0 0 0 0\n" \
" 2 15 1 0 0 0 0\n" \
" 3 4 1 0 0 0 0\n" \
" 3 10 1 0 0 0 0\n" \
" 3 14 1 0 0 0 0\n" \
" 4 9 1 0 0 0 0\n" \
" 4 13 1 0 0 0 0\n" \
" 4 16 1 0 0 0 0\n" \
" 5 7 1 0 0 0 0\n" \
" 5 11 1 0 0 0 0\n" \
" 13 5 1 0 0 0 0\n" \
" 13 8 1 0 0 0 0\n" \
"M END\n" \
"$MOL\n" \
"\n" \
" Mrv1583 03171709412D \n" \
"\n" \
" 6 6 0 0 0 0 999 V2000\n" \
" 3.8966 0.8250 0.0000 C 0 0 0 0 0 0 0 0 0 1 0 0\n" \
" 4.6111 0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 2 0 0\n" \
" 4.6111 -0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 3 0 0\n" \
" 3.8966 -0.8250 0.0000 C 0 0 0 0 0 0 0 0 0 4 0 0\n" \
" 3.1821 -0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 5 0 0\n" \
" 3.1821 0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 6 0 0\n" \
" 1 2 1 0 0 0 0\n" \
" 6 1 1 0 0 0 0\n" \
" 2 3 1 0 0 0 0\n" \
" 3 4 1 0 0 0 0\n" \
" 4 5 1 0 0 0 0\n" \
" 5 6 1 0 0 0 0\n" \
"M END";
ChemicalReaction *rxn = RxnBlockToChemicalReaction(unmappedHs);
TEST_ASSERT(rxn);
rxn->initReactantMatchers();
MOL_SPTR_VECT reacts1, hreacts1, reacts2, hreacts2;
std::vector<MOL_SPTR_VECT> prods;
reacts1.push_back(ROMOL_SPTR(SmilesToMol("C1CCCCC1")));
hreacts1.push_back(ROMOL_SPTR(MolOps::addHs(*reacts1[0].get())));
reacts2.push_back(ROMOL_SPTR(SmilesToMol("C1CCCCC1Cl")));
hreacts2.push_back(ROMOL_SPTR(MolOps::addHs(*reacts2[0].get())));
// test with and without AddHs
prods = rxn->runReactants(reacts1);
TEST_ASSERT(prods.size() == 0);
prods = rxn->runReactants(hreacts1);
TEST_ASSERT(prods.size() == 768);
prods = rxn->runReactants(reacts2);
TEST_ASSERT(prods.size() == 0);
prods = rxn->runReactants(hreacts2);
TEST_ASSERT(prods.size() == 128);
// Test after sanitization (way fewer matches than with AddHs..)
RxnOps::sanitizeRxn(*rxn);
prods = rxn->runReactants(reacts1);
TEST_ASSERT(prods.size() == 12);
prods = rxn->runReactants(reacts2);
TEST_ASSERT(prods.size() == 4);
delete rxn;
BOOST_LOG(rdInfoLog) << "Done" << std::endl;
}
void test67SanitizeMappedHsInReactantAndProd() {
BOOST_LOG(rdInfoLog) << "-------------------------------------" << std::endl;
BOOST_LOG(rdInfoLog) << "Tests sanitize reaction (mapped hs in react and prod) "
<< std::endl;
// H's are mapped in reactant and in prod
// they can be merged
const std::string unmappedHs = \
"$RXN\n" \
"\n" \
" Marvin 031701171002\n" \
"\n" \
" 1 1\n" \
"$MOL\n" \
"\n" \
" Mrv1583 03171710022D \n" \
"\n" \
" 16 16 0 0 0 0 999 V2000\n" \
" -3.1881 0.8250 0.0000 C 0 0 0 0 0 0 0 0 0 1 0 0\n" \
" -2.4736 0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 2 0 0\n" \
" -2.4736 -0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 3 0 0\n" \
" -3.1881 -0.8250 0.0000 C 0 0 0 0 0 0 0 0 0 4 0 0\n" \
" -3.9025 0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 6 0 0\n" \
" -2.8178 1.5622 0.0000 H 0 0 0 0 0 0 0 0 0 7 0 0\n" \
" -4.3559 1.1018 0.0000 H 0 0 0 0 0 0 0 0 0 8 0 0\n" \
" -4.6170 -0.8250 0.0000 H 0 0 0 0 0 0 0 0 0 9 0 0\n" \
" -3.5583 -1.5622 0.0000 H 0 0 0 0 0 0 0 0 0 10 0 0\n" \
" -2.0203 -1.1018 0.0000 H 0 0 0 0 0 0 0 0 0 11 0 0\n" \
" -4.7262 0.4605 0.0000 H 0 0 0 0 0 0 0 0 0 12 0 0\n" \
" -3.5583 1.5622 0.0000 H 0 0 0 0 0 0 0 0 0 13 0 0\n" \
" -3.9025 -0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 5 0 0\n" \
" -1.6500 -0.4605 0.0000 H 0 0 0 0 0 0 0 0 0 14 0 0\n" \
" -1.7591 0.8250 0.0000 H 0 0 0 0 0 0 0 0 0 15 0 0\n" \
" -2.8178 -1.5622 0.0000 H 0 0 0 0 0 0 0 0 0 16 0 0\n" \
" 1 2 1 0 0 0 0\n" \
" 5 1 1 0 0 0 0\n" \
" 1 6 1 0 0 0 0\n" \
" 1 12 1 0 0 0 0\n" \
" 2 3 1 0 0 0 0\n" \
" 2 15 1 0 0 0 0\n" \
" 3 4 1 0 0 0 0\n" \
" 3 10 1 0 0 0 0\n" \
" 3 14 1 0 0 0 0\n" \
" 4 9 1 0 0 0 0\n" \
" 4 13 1 0 0 0 0\n" \
" 4 16 1 0 0 0 0\n" \
" 5 7 1 0 0 0 0\n" \
" 5 11 1 0 0 0 0\n" \
" 13 5 1 0 0 0 0\n" \
" 13 8 1 0 0 0 0\n" \
"M END\n" \
"$MOL\n" \
"\n" \
" Mrv1583 03171710022D \n" \
"\n" \
" 17 17 0 0 0 0 999 V2000\n" \
" 4.1309 0.8250 0.0000 C 0 0 0 0 0 0 0 0 0 1 0 0\n" \
" 4.8454 0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 2 0 0\n" \
" 4.8454 -0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 3 0 0\n" \
" 4.1309 -0.8250 0.0000 C 0 0 0 0 0 0 0 0 0 4 0 0\n" \
" 3.4165 -0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 5 0 0\n" \
" 3.4165 0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 6 0 0\n" \
" 4.5012 1.5622 0.0000 H 0 0 0 0 0 0 0 0 0 13 0 0\n" \
" 3.7607 1.5622 0.0000 H 0 0 0 0 0 0 0 0 0 7 0 0\n" \
" 5.6690 0.4605 0.0000 H 0 0 0 0 0 0 0 0 0 15 0 0\n" \
" 5.2987 1.1018 0.0000 H 0 0 0 0 0 0 0 0 0 17 0 0\n" \
" 5.2987 -1.1018 0.0000 H 0 0 0 0 0 0 0 0 0 14 0 0\n" \
" 3.7607 -1.5622 0.0000 H 0 0 0 0 0 0 0 0 0 16 0 0\n" \
" 2.9631 -1.1018 0.0000 H 0 0 0 0 0 0 0 0 0 9 0 0\n" \
" 2.5929 -0.4605 0.0000 H 0 0 0 0 0 0 0 0 0 18 0 0\n" \
" 2.7020 0.8250 0.0000 H 0 0 0 0 0 0 0 0 0 8 0 0\n" \
" 4.5012 -1.5622 0.0000 H 0 0 0 0 0 0 0 0 0 10 0 0\n" \
" 5.6690 -0.4605 0.0000 H 0 0 0 0 0 0 0 0 0 11 0 0\n" \
" 1 2 1 0 0 0 0\n" \
" 6 1 1 0 0 0 0\n" \
" 1 7 1 0 0 0 0\n" \
" 1 8 1 0 0 0 0\n" \
" 2 9 1 0 0 0 0\n" \
" 2 10 1 0 0 0 0\n" \
" 2 3 1 0 0 0 0\n" \
" 3 4 1 0 0 0 0\n" \
" 3 11 1 0 0 0 0\n" \
" 3 17 1 0 0 0 0\n" \
" 4 5 1 0 0 0 0\n" \
" 4 12 1 0 0 0 0\n" \
" 4 16 1 0 0 0 0\n" \
" 5 6 1 0 0 0 0\n" \
" 5 14 1 0 0 0 0\n" \
" 5 13 1 0 0 0 0\n" \
" 6 15 1 0 0 0 0\n" \
"M END\n";
ChemicalReaction *rxn = RxnBlockToChemicalReaction(unmappedHs);
TEST_ASSERT(rxn);
rxn->initReactantMatchers();
MOL_SPTR_VECT reacts1, hreacts1, reacts2, hreacts2;
std::vector<MOL_SPTR_VECT> prods;
reacts1.push_back(ROMOL_SPTR(SmilesToMol("C1CCCCC1")));
hreacts1.push_back(ROMOL_SPTR(MolOps::addHs(*reacts1[0].get())));
reacts2.push_back(ROMOL_SPTR(SmilesToMol("C1CCCCC1Cl")));
hreacts2.push_back(ROMOL_SPTR(MolOps::addHs(*reacts2[0].get())));
// test with and without AddHs
prods = rxn->runReactants(reacts1);
TEST_ASSERT(prods.size() == 0);
prods = rxn->runReactants(hreacts1);
TEST_ASSERT(prods.size() == 768);
prods = rxn->runReactants(reacts2);
TEST_ASSERT(prods.size() == 0);
prods = rxn->runReactants(hreacts2);
TEST_ASSERT(prods.size() == 128);
// Test after sanitization (way fewer matches than with AddHs..)
RxnOps::sanitizeRxn(*rxn);
prods = rxn->runReactants(reacts1);
TEST_ASSERT(prods.size() == 12);
prods = rxn->runReactants(reacts2);
TEST_ASSERT(prods.size() == 4);
delete rxn;
BOOST_LOG(rdInfoLog) << "Done" << std::endl;
}
void test68MappedHToHeavy() {
const std::string rxnblock = \
"$RXN\n" \
"\n" \
" Marvin 031701171005\n" \
"\n" \
" 1 1\n" \
"$MOL\n" \
"\n" \
" Mrv1583 03171710052D \n" \
"\n" \
" 3 2 0 0 0 0 999 V2000\n" \
" -1.2721 -0.0116 0.0000 C 0 0 0 0 0 0 0 0 0 1 0 0\n" \
" -1.9866 -0.4241 0.0000 C 0 0 0 0 0 0 0 0 0 2 0 0\n" \
" -1.2721 0.8134 0.0000 H 0 0 0 0 0 0 0 0 0 3 0 0\n" \
" 2 1 1 0 0 0 0\n" \
" 1 3 1 0 0 0 0\n" \
"M END\n" \
"$MOL\n" \
"\n" \
" Mrv1583 03171710052D \n" \
"\n" \
" 3 2 0 0 0 0 999 V2000\n" \
" 2.3886 -0.0563 0.0000 C 0 0 0 0 0 0 0 0 0 1 0 0\n" \
" 1.6741 -0.4688 0.0000 C 0 0 0 0 0 0 0 0 0 2 0 0\n" \
" 2.3886 0.7688 0.0000 C 0 0 0 0 0 0 0 0 0 3 0 0\n" \
" 2 1 1 0 0 0 0\n" \
" 1 3 1 0 0 0 0\n" \
"M END\n";
ChemicalReaction *rxn = RxnBlockToChemicalReaction(rxnblock);
TEST_ASSERT(rxn);
rxn->initReactantMatchers();
MOL_SPTR_VECT reacts1, hreacts1, reacts2, hreacts2;
std::vector<MOL_SPTR_VECT> prods;
reacts1.push_back(ROMOL_SPTR(SmilesToMol("CC")));
hreacts1.push_back(ROMOL_SPTR(MolOps::addHs(*reacts1[0].get())));
// test with and without AddHs
prods = rxn->runReactants(reacts1);
TEST_ASSERT(prods.size() == 0);
prods = rxn->runReactants(hreacts1);
TEST_ASSERT(prods.size() == 6);
std::stringstream sstrm;
rdWarningLog->SetTee(sstrm);
RxnOps::sanitizeRxn(*rxn);
std::string s = sstrm.str();
std::cerr << s << std::endl;
TEST_ASSERT(s.find("Reaction has explicit hydrogens, reactants will need explicit hydrogens (addHs)") != std::string::npos);
rdWarningLog->ClearTee();
delete rxn;
}
int main() {
RDLog::InitLogs();
@@ -6062,6 +6467,11 @@ int main() {
test43Github243();
test64Github1266();
test65SanitizeUnmappedHs();
test66SanitizeMappedHs();
test67SanitizeMappedHsInReactantAndProd();
test68MappedHToHeavy();
BOOST_LOG(rdInfoLog)
<< "*******************************************************\n";
return (0);

View File

@@ -345,7 +345,11 @@ Vianello, Maciek Wojcikowski
## Deprecated code (to be removed in a future release):
- rdkit.VLib python module
- SanitizeRxn parameters "ChemDrawRxnAdjustParams" has been renamed to
"MatchOnlyAtRgroupsAdjustParams". These settings did not reflect
how integrations with SciQuest or the Perkin Elmer ELN behaved and
were confusing to users (especially since they were not explicit)
## Removed code:
## Contrib updates: