mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-03 21:44:30 +08:00
Fix/sanitizerxn merge hs (#1367)
* Fixes exceptions to use correct functionName * Deprecates ChemDrawAdjustParams name sciquest integration/eln searching doesn’t map to this behavior. renamed MatchOnlyAtRgroupAdjustParams * Python wrapper for deprecated API * fixHs now is more intelligent about merging hs. The only cases where hs can’t be merged is if they are mapped to a non hydrogen in the product. * Updates deprecated ChemDrawAdjustParams
This commit is contained in:
committed by
Greg Landrum
parent
1d04eb3351
commit
a02f2b0bb1
@@ -317,7 +317,7 @@ bool ChemicalReaction::validate(unsigned int &numWarnings,
|
||||
bool isMoleculeReactantOfReaction(const ChemicalReaction &rxn, const ROMol &mol,
|
||||
unsigned int &which) {
|
||||
if (!rxn.isInitialized()) {
|
||||
throw ChemicalReactionException("initMatchers() must be called first");
|
||||
throw ChemicalReactionException("initReactantMatchers() must be called first");
|
||||
}
|
||||
which = 0;
|
||||
for (MOL_SPTR_VECT::const_iterator iter = rxn.beginReactantTemplates();
|
||||
@@ -338,7 +338,7 @@ bool isMoleculeReactantOfReaction(const ChemicalReaction &rxn,
|
||||
bool isMoleculeProductOfReaction(const ChemicalReaction &rxn, const ROMol &mol,
|
||||
unsigned int &which) {
|
||||
if (!rxn.isInitialized()) {
|
||||
throw ChemicalReactionException("initMatchers() must be called first");
|
||||
throw ChemicalReactionException("initReactantMatchers() must be called first");
|
||||
}
|
||||
which = 0;
|
||||
for (MOL_SPTR_VECT::const_iterator iter = rxn.beginProductTemplates();
|
||||
@@ -359,7 +359,7 @@ bool isMoleculeProductOfReaction(const ChemicalReaction &rxn,
|
||||
bool isMoleculeAgentOfReaction(const ChemicalReaction &rxn, const ROMol &mol,
|
||||
unsigned int &which) {
|
||||
if (!rxn.isInitialized()) {
|
||||
throw ChemicalReactionException("initMatchers() must be called first");
|
||||
throw ChemicalReactionException("initReactantMatchers() must be called first");
|
||||
}
|
||||
which = 0;
|
||||
for (MOL_SPTR_VECT::const_iterator iter = rxn.beginAgentTemplates();
|
||||
@@ -398,7 +398,7 @@ void addRecursiveQueriesToReaction(
|
||||
std::vector<std::vector<std::pair<unsigned int, std::string> > >
|
||||
*reactantLabels) {
|
||||
if (!rxn.isInitialized()) {
|
||||
throw ChemicalReactionException("initMatchers() must be called first");
|
||||
throw ChemicalReactionException("initReactantMatchers() must be called first");
|
||||
}
|
||||
|
||||
if (reactantLabels != NULL) {
|
||||
@@ -576,7 +576,7 @@ bool getMappedAtoms(T &rIt, std::map<int, const Atom *> &mappedAtoms) {
|
||||
VECT_INT_VECT getReactingAtoms(const ChemicalReaction &rxn,
|
||||
bool mappedAtomsOnly) {
|
||||
if (!rxn.isInitialized()) {
|
||||
throw ChemicalReactionException("initMatchers() must be called first");
|
||||
throw ChemicalReactionException("initReactantMatchers() must be called first");
|
||||
}
|
||||
VECT_INT_VECT res;
|
||||
res.resize(rxn.getNumReactantTemplates());
|
||||
|
||||
@@ -255,7 +255,6 @@ void fixRGroups(ChemicalReaction &rxn) {
|
||||
|
||||
// if we have query atoms without rlabels, make proper rlabels if possible
|
||||
// ensure that every rlabel in the reactant has one in the product
|
||||
|
||||
void fixAtomMaps(ChemicalReaction &rxn) {
|
||||
int max_atom_map = getMaxProp<int>(
|
||||
rxn,
|
||||
@@ -324,15 +323,67 @@ void fixReactantTemplateAromaticity(ChemicalReaction &rxn) {
|
||||
}
|
||||
|
||||
void fixHs(ChemicalReaction &rxn) {
|
||||
{
|
||||
// if mapped Hydrogens in reactants area mapped to heavy atoms
|
||||
// keep mappings, in all other cases remove them.
|
||||
// this allows us to merge query hydrogens atoms
|
||||
|
||||
std::map<int, bool> mappedToNonHeavyProductAtom;
|
||||
|
||||
for(MOL_SPTR_VECT::iterator it = rxn.beginProductTemplates();
|
||||
it != rxn.endProductTemplates();
|
||||
++it) {
|
||||
int atomMap = 0;
|
||||
for (ROMol::AtomIterator atIt = (*it)->beginAtoms();
|
||||
atIt != (*it)->endAtoms();
|
||||
++atIt) {
|
||||
Atom *atom = (*atIt);
|
||||
if (atom->getAtomicNum() != 1) { // hydrogen
|
||||
if (atom->getPropIfPresent(common_properties::molAtomMapNumber, atomMap)) {
|
||||
if(atomMap) {
|
||||
mappedToNonHeavyProductAtom[atomMap] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for(MOL_SPTR_VECT::iterator it = rxn.beginReactantTemplates();
|
||||
it != rxn.endReactantTemplates();
|
||||
++it) {
|
||||
int atomMap = 0;
|
||||
for (ROMol::AtomIterator atIt = (*it)->beginAtoms();
|
||||
atIt != (*it)->endAtoms();
|
||||
++atIt) {
|
||||
Atom *atom = (*atIt);
|
||||
if (atom->getAtomicNum() == 1) { // hydrogen
|
||||
if (atom->getPropIfPresent(common_properties::molAtomMapNumber, atomMap)) {
|
||||
if(atomMap) {
|
||||
if(mappedToNonHeavyProductAtom.find(atomMap) ==
|
||||
mappedToNonHeavyProductAtom.end()) {
|
||||
atom->clearProp(common_properties::molAtomMapNumber);
|
||||
} else {
|
||||
BOOST_LOG(rdWarningLog) <<
|
||||
"Reaction has explicit hydrogens, reactants will need explicit hydrogens (addHs)"
|
||||
<< std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const bool mergeUnmappedOnly = true;
|
||||
for(MOL_SPTR_VECT::iterator it = rxn.beginReactantTemplates();
|
||||
it != rxn.endReactantTemplates();
|
||||
++it) {
|
||||
RWMol * rw = dynamic_cast<RWMol*>(it->get());
|
||||
if (rw)
|
||||
if (rw) {
|
||||
MolOps::mergeQueryHs(*rw, mergeUnmappedOnly);
|
||||
}
|
||||
else
|
||||
PRECONDITION(rw, "Oops, not really a RWMol?");
|
||||
PRECONDITION(rw, "Oops, not really an RWMol?");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -82,7 +82,22 @@ inline const MolOps::AdjustQueryParameters DefaultRxnAdjustParams() {
|
||||
}
|
||||
|
||||
// Default adjustment parameters for ChemDraw style matching of reagents
|
||||
// -- deprecated - renamed MatchOnlyAtRgroupsAdjustParams
|
||||
// -- this doesn't match sciquest style searching
|
||||
inline const MolOps::AdjustQueryParameters ChemDrawRxnAdjustParams() {
|
||||
BOOST_LOG(rdWarningLog) <<
|
||||
" deprecated -- please use MatchOnlyAtRgroupsAdjustParams instead" << std::endl;
|
||||
MolOps::AdjustQueryParameters params;
|
||||
params.adjustDegree = true;
|
||||
params.adjustDegreeFlags = MolOps::ADJUST_IGNOREDUMMIES;
|
||||
params.adjustRingCount = false;
|
||||
params.adjustRingCountFlags = MolOps::ADJUST_IGNORENONE;
|
||||
params.makeDummiesQueries = false;
|
||||
params.aromatizeIfPossible = true;
|
||||
return params;
|
||||
}
|
||||
|
||||
inline const MolOps::AdjustQueryParameters MatchOnlyAtRgroupsAdjustParams() {
|
||||
MolOps::AdjustQueryParameters params;
|
||||
params.adjustDegree = true;
|
||||
params.adjustDegreeFlags = MolOps::ADJUST_IGNOREDUMMIES;
|
||||
|
||||
@@ -816,7 +816,10 @@ Sample Usage:\n\
|
||||
"Returns the default adjustment parameters for reactant templates");
|
||||
|
||||
python::def("GetChemDrawRxnAdjustParams", RDKit::RxnOps::ChemDrawRxnAdjustParams,
|
||||
"Returns the chemdraw style adjustment parameters for reactant templates");
|
||||
"(deprecated, see MatchOnlyAtRgroupsAdjustParams)\n\tReturns the chemdraw style adjustment parameters for reactant templates");
|
||||
|
||||
python::def("MatchOnlyAtRgroupsAdjustParams", RDKit::RxnOps::MatchOnlyAtRgroupsAdjustParams,
|
||||
"Only match at the specified rgroup locations in the reactant templates");
|
||||
|
||||
std::string docstring = "feed me";
|
||||
python::def(
|
||||
|
||||
@@ -43,6 +43,7 @@
|
||||
#include <GraphMol/ChemReactions/ReactionPickler.h>
|
||||
#include <GraphMol/ChemReactions/ReactionRunner.h>
|
||||
#include <GraphMol/ChemReactions/ReactionUtils.h>
|
||||
#include <GraphMol/ChemReactions/SanitizeRxn.h>
|
||||
#include <GraphMol/FileParsers/FileParsers.h>
|
||||
#include <GraphMol/Atom.h>
|
||||
#include <fstream>
|
||||
@@ -5985,6 +5986,410 @@ void test64Github1266() {
|
||||
BOOST_LOG(rdInfoLog) << "Done" << std::endl;
|
||||
}
|
||||
|
||||
void test65SanitizeUnmappedHs() {
|
||||
BOOST_LOG(rdInfoLog) << "-------------------------------------" << std::endl;
|
||||
BOOST_LOG(rdInfoLog) << "Tests sanitize reaction (unmapped Hs) "
|
||||
<< std::endl;
|
||||
|
||||
const std::string unmappedHs = \
|
||||
"$RXN\n" \
|
||||
"\n" \
|
||||
" Marvin 031701170941\n" \
|
||||
"\n" \
|
||||
" 1 1\n" \
|
||||
"$MOL\n" \
|
||||
"\n" \
|
||||
" Mrv1583 03171709412D \n" \
|
||||
"\n" \
|
||||
" 16 16 0 0 0 0 999 V2000\n" \
|
||||
" -2.5620 0.5265 0.0000 C 0 0 0 0 0 0 0 0 0 1 0 0\n" \
|
||||
" -2.4235 -0.2868 0.0000 C 0 0 0 0 0 0 0 0 0 2 0 0\n" \
|
||||
" -3.0587 -0.8133 0.0000 C 0 0 0 0 0 0 0 0 0 3 0 0\n" \
|
||||
" -3.8322 -0.5265 0.0000 C 0 0 0 0 0 0 0 0 0 4 0 0\n" \
|
||||
" -3.3355 0.8133 0.0000 C 0 0 0 0 0 0 0 0 0 6 0 0\n" \
|
||||
" -1.7581 0.7120 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0\n" \
|
||||
" -3.0942 1.6022 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0\n" \
|
||||
" -4.5124 0.9090 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0\n" \
|
||||
" -4.3663 -1.1553 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0\n" \
|
||||
" -3.3000 -1.6022 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0\n" \
|
||||
" -3.8242 1.4780 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0\n" \
|
||||
" -2.2307 1.2821 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0\n" \
|
||||
" -3.9706 0.2868 0.0000 C 0 0 0 0 0 0 0 0 0 5 0 0\n" \
|
||||
" -2.5700 -1.4780 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0\n" \
|
||||
" -1.6500 -0.5736 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0\n" \
|
||||
" -4.6278 -0.3082 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0\n" \
|
||||
" 1 2 1 0 0 0 0\n" \
|
||||
" 5 1 1 0 0 0 0\n" \
|
||||
" 1 6 1 0 0 0 0\n" \
|
||||
" 1 12 1 0 0 0 0\n" \
|
||||
" 2 3 1 0 0 0 0\n" \
|
||||
" 2 15 1 0 0 0 0\n" \
|
||||
" 3 4 1 0 0 0 0\n" \
|
||||
" 3 10 1 0 0 0 0\n" \
|
||||
" 3 14 1 0 0 0 0\n" \
|
||||
" 4 9 1 0 0 0 0\n" \
|
||||
" 4 13 1 0 0 0 0\n" \
|
||||
" 4 16 1 0 0 0 0\n" \
|
||||
" 5 7 1 0 0 0 0\n" \
|
||||
" 5 11 1 0 0 0 0\n" \
|
||||
" 13 5 1 0 0 0 0\n" \
|
||||
" 13 8 1 0 0 0 0\n" \
|
||||
"M END\n" \
|
||||
"$MOL\n" \
|
||||
"\n" \
|
||||
" Mrv1583 03171709412D \n" \
|
||||
"\n" \
|
||||
" 6 6 0 0 0 0 999 V2000\n" \
|
||||
" 3.8966 0.8250 0.0000 C 0 0 0 0 0 0 0 0 0 1 0 0\n" \
|
||||
" 4.6111 0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 2 0 0\n" \
|
||||
" 4.6111 -0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 3 0 0\n" \
|
||||
" 3.8966 -0.8250 0.0000 C 0 0 0 0 0 0 0 0 0 4 0 0\n" \
|
||||
" 3.1821 -0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 5 0 0\n" \
|
||||
" 3.1821 0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 6 0 0\n" \
|
||||
" 1 2 1 0 0 0 0\n" \
|
||||
" 6 1 1 0 0 0 0\n" \
|
||||
" 2 3 1 0 0 0 0\n" \
|
||||
" 3 4 1 0 0 0 0\n" \
|
||||
" 4 5 1 0 0 0 0\n" \
|
||||
" 5 6 1 0 0 0 0\n" \
|
||||
"M END";
|
||||
|
||||
ChemicalReaction *rxn = RxnBlockToChemicalReaction(unmappedHs);
|
||||
TEST_ASSERT(rxn);
|
||||
rxn->initReactantMatchers();
|
||||
MOL_SPTR_VECT reacts1, hreacts1, reacts2, hreacts2;
|
||||
std::vector<MOL_SPTR_VECT> prods;
|
||||
|
||||
reacts1.push_back(ROMOL_SPTR(SmilesToMol("C1CCCCC1")));
|
||||
hreacts1.push_back(ROMOL_SPTR(MolOps::addHs(*reacts1[0].get())));
|
||||
|
||||
reacts2.push_back(ROMOL_SPTR(SmilesToMol("C1CCCCC1Cl")));
|
||||
hreacts2.push_back(ROMOL_SPTR(MolOps::addHs(*reacts2[0].get())));
|
||||
|
||||
// test with and without AddHs
|
||||
prods = rxn->runReactants(reacts1);
|
||||
TEST_ASSERT(prods.size() == 0);
|
||||
prods = rxn->runReactants(hreacts1);
|
||||
TEST_ASSERT(prods.size() == 768);
|
||||
|
||||
prods = rxn->runReactants(reacts2);
|
||||
TEST_ASSERT(prods.size() == 0);
|
||||
|
||||
prods = rxn->runReactants(hreacts2);
|
||||
TEST_ASSERT(prods.size() == 128);
|
||||
|
||||
// Test after sanitization (way fewer matches than with AddHs..)
|
||||
RxnOps::sanitizeRxn(*rxn);
|
||||
prods = rxn->runReactants(reacts1);
|
||||
TEST_ASSERT(prods.size() == 12);
|
||||
|
||||
prods = rxn->runReactants(reacts2);
|
||||
TEST_ASSERT(prods.size() == 4);
|
||||
|
||||
|
||||
delete rxn;
|
||||
|
||||
BOOST_LOG(rdInfoLog) << "Done" << std::endl;
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
void test66SanitizeMappedHs() {
|
||||
BOOST_LOG(rdInfoLog) << "-------------------------------------" << std::endl;
|
||||
BOOST_LOG(rdInfoLog) << "Tests sanitize reaction (mapped hs in react but not prod) "
|
||||
<< std::endl;
|
||||
|
||||
// H's are mapped in reactant but do not exist in product,
|
||||
// they can be merged
|
||||
const std::string unmappedHs = \
|
||||
"$RXN\n" \
|
||||
"\n" \
|
||||
" Marvin 031701170941\n" \
|
||||
"\n" \
|
||||
" 1 1\n" \
|
||||
"$MOL\n" \
|
||||
"\n" \
|
||||
" Mrv1583 03171709412D \n" \
|
||||
"\n" \
|
||||
" 16 16 0 0 0 0 999 V2000\n" \
|
||||
" -2.5620 0.5265 0.0000 C 0 0 0 0 0 0 0 0 0 1 0 0\n" \
|
||||
" -2.4235 -0.2868 0.0000 C 0 0 0 0 0 0 0 0 0 2 0 0\n" \
|
||||
" -3.0587 -0.8133 0.0000 C 0 0 0 0 0 0 0 0 0 3 0 0\n" \
|
||||
" -3.8322 -0.5265 0.0000 C 0 0 0 0 0 0 0 0 0 4 0 0\n" \
|
||||
" -3.3355 0.8133 0.0000 C 0 0 0 0 0 0 0 0 0 6 0 0\n" \
|
||||
" -1.7581 0.7120 0.0000 H 0 0 0 0 0 0 0 0 0 7 0 0\n" \
|
||||
" -3.0942 1.6022 0.0000 H 0 0 0 0 0 0 0 0 0 8 0 0\n" \
|
||||
" -4.5124 0.9090 0.0000 H 0 0 0 0 0 0 0 0 0 9 0 0\n" \
|
||||
" -4.3663 -1.1553 0.0000 H 0 0 0 0 0 0 0 0 0 10 0 0\n" \
|
||||
" -3.3000 -1.6022 0.0000 H 0 0 0 0 0 0 0 0 0 11 0 0\n" \
|
||||
" -3.8242 1.4780 0.0000 H 0 0 0 0 0 0 0 0 0 12 0 0\n" \
|
||||
" -2.2307 1.2821 0.0000 H 0 0 0 0 0 0 0 0 0 13 0 0\n" \
|
||||
" -3.9706 0.2868 0.0000 C 0 0 0 0 0 0 0 0 0 5 0 0\n" \
|
||||
" -2.5700 -1.4780 0.0000 H 0 0 0 0 0 0 0 0 0 14 0 0\n" \
|
||||
" -1.6500 -0.5736 0.0000 H 0 0 0 0 0 0 0 0 0 15 0 0\n" \
|
||||
" -4.6278 -0.3082 0.0000 H 0 0 0 0 0 0 0 0 0 16 0 0\n" \
|
||||
" 1 2 1 0 0 0 0\n" \
|
||||
" 5 1 1 0 0 0 0\n" \
|
||||
" 1 6 1 0 0 0 0\n" \
|
||||
" 1 12 1 0 0 0 0\n" \
|
||||
" 2 3 1 0 0 0 0\n" \
|
||||
" 2 15 1 0 0 0 0\n" \
|
||||
" 3 4 1 0 0 0 0\n" \
|
||||
" 3 10 1 0 0 0 0\n" \
|
||||
" 3 14 1 0 0 0 0\n" \
|
||||
" 4 9 1 0 0 0 0\n" \
|
||||
" 4 13 1 0 0 0 0\n" \
|
||||
" 4 16 1 0 0 0 0\n" \
|
||||
" 5 7 1 0 0 0 0\n" \
|
||||
" 5 11 1 0 0 0 0\n" \
|
||||
" 13 5 1 0 0 0 0\n" \
|
||||
" 13 8 1 0 0 0 0\n" \
|
||||
"M END\n" \
|
||||
"$MOL\n" \
|
||||
"\n" \
|
||||
" Mrv1583 03171709412D \n" \
|
||||
"\n" \
|
||||
" 6 6 0 0 0 0 999 V2000\n" \
|
||||
" 3.8966 0.8250 0.0000 C 0 0 0 0 0 0 0 0 0 1 0 0\n" \
|
||||
" 4.6111 0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 2 0 0\n" \
|
||||
" 4.6111 -0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 3 0 0\n" \
|
||||
" 3.8966 -0.8250 0.0000 C 0 0 0 0 0 0 0 0 0 4 0 0\n" \
|
||||
" 3.1821 -0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 5 0 0\n" \
|
||||
" 3.1821 0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 6 0 0\n" \
|
||||
" 1 2 1 0 0 0 0\n" \
|
||||
" 6 1 1 0 0 0 0\n" \
|
||||
" 2 3 1 0 0 0 0\n" \
|
||||
" 3 4 1 0 0 0 0\n" \
|
||||
" 4 5 1 0 0 0 0\n" \
|
||||
" 5 6 1 0 0 0 0\n" \
|
||||
"M END";
|
||||
|
||||
ChemicalReaction *rxn = RxnBlockToChemicalReaction(unmappedHs);
|
||||
TEST_ASSERT(rxn);
|
||||
rxn->initReactantMatchers();
|
||||
MOL_SPTR_VECT reacts1, hreacts1, reacts2, hreacts2;
|
||||
std::vector<MOL_SPTR_VECT> prods;
|
||||
|
||||
reacts1.push_back(ROMOL_SPTR(SmilesToMol("C1CCCCC1")));
|
||||
hreacts1.push_back(ROMOL_SPTR(MolOps::addHs(*reacts1[0].get())));
|
||||
|
||||
reacts2.push_back(ROMOL_SPTR(SmilesToMol("C1CCCCC1Cl")));
|
||||
hreacts2.push_back(ROMOL_SPTR(MolOps::addHs(*reacts2[0].get())));
|
||||
|
||||
// test with and without AddHs
|
||||
prods = rxn->runReactants(reacts1);
|
||||
TEST_ASSERT(prods.size() == 0);
|
||||
prods = rxn->runReactants(hreacts1);
|
||||
TEST_ASSERT(prods.size() == 768);
|
||||
|
||||
prods = rxn->runReactants(reacts2);
|
||||
TEST_ASSERT(prods.size() == 0);
|
||||
|
||||
prods = rxn->runReactants(hreacts2);
|
||||
TEST_ASSERT(prods.size() == 128);
|
||||
|
||||
// Test after sanitization (way fewer matches than with AddHs..)
|
||||
RxnOps::sanitizeRxn(*rxn);
|
||||
prods = rxn->runReactants(reacts1);
|
||||
TEST_ASSERT(prods.size() == 12);
|
||||
|
||||
prods = rxn->runReactants(reacts2);
|
||||
TEST_ASSERT(prods.size() == 4);
|
||||
|
||||
delete rxn;
|
||||
|
||||
BOOST_LOG(rdInfoLog) << "Done" << std::endl;
|
||||
|
||||
}
|
||||
|
||||
void test67SanitizeMappedHsInReactantAndProd() {
|
||||
BOOST_LOG(rdInfoLog) << "-------------------------------------" << std::endl;
|
||||
BOOST_LOG(rdInfoLog) << "Tests sanitize reaction (mapped hs in react and prod) "
|
||||
<< std::endl;
|
||||
|
||||
// H's are mapped in reactant and in prod
|
||||
// they can be merged
|
||||
const std::string unmappedHs = \
|
||||
"$RXN\n" \
|
||||
"\n" \
|
||||
" Marvin 031701171002\n" \
|
||||
"\n" \
|
||||
" 1 1\n" \
|
||||
"$MOL\n" \
|
||||
"\n" \
|
||||
" Mrv1583 03171710022D \n" \
|
||||
"\n" \
|
||||
" 16 16 0 0 0 0 999 V2000\n" \
|
||||
" -3.1881 0.8250 0.0000 C 0 0 0 0 0 0 0 0 0 1 0 0\n" \
|
||||
" -2.4736 0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 2 0 0\n" \
|
||||
" -2.4736 -0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 3 0 0\n" \
|
||||
" -3.1881 -0.8250 0.0000 C 0 0 0 0 0 0 0 0 0 4 0 0\n" \
|
||||
" -3.9025 0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 6 0 0\n" \
|
||||
" -2.8178 1.5622 0.0000 H 0 0 0 0 0 0 0 0 0 7 0 0\n" \
|
||||
" -4.3559 1.1018 0.0000 H 0 0 0 0 0 0 0 0 0 8 0 0\n" \
|
||||
" -4.6170 -0.8250 0.0000 H 0 0 0 0 0 0 0 0 0 9 0 0\n" \
|
||||
" -3.5583 -1.5622 0.0000 H 0 0 0 0 0 0 0 0 0 10 0 0\n" \
|
||||
" -2.0203 -1.1018 0.0000 H 0 0 0 0 0 0 0 0 0 11 0 0\n" \
|
||||
" -4.7262 0.4605 0.0000 H 0 0 0 0 0 0 0 0 0 12 0 0\n" \
|
||||
" -3.5583 1.5622 0.0000 H 0 0 0 0 0 0 0 0 0 13 0 0\n" \
|
||||
" -3.9025 -0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 5 0 0\n" \
|
||||
" -1.6500 -0.4605 0.0000 H 0 0 0 0 0 0 0 0 0 14 0 0\n" \
|
||||
" -1.7591 0.8250 0.0000 H 0 0 0 0 0 0 0 0 0 15 0 0\n" \
|
||||
" -2.8178 -1.5622 0.0000 H 0 0 0 0 0 0 0 0 0 16 0 0\n" \
|
||||
" 1 2 1 0 0 0 0\n" \
|
||||
" 5 1 1 0 0 0 0\n" \
|
||||
" 1 6 1 0 0 0 0\n" \
|
||||
" 1 12 1 0 0 0 0\n" \
|
||||
" 2 3 1 0 0 0 0\n" \
|
||||
" 2 15 1 0 0 0 0\n" \
|
||||
" 3 4 1 0 0 0 0\n" \
|
||||
" 3 10 1 0 0 0 0\n" \
|
||||
" 3 14 1 0 0 0 0\n" \
|
||||
" 4 9 1 0 0 0 0\n" \
|
||||
" 4 13 1 0 0 0 0\n" \
|
||||
" 4 16 1 0 0 0 0\n" \
|
||||
" 5 7 1 0 0 0 0\n" \
|
||||
" 5 11 1 0 0 0 0\n" \
|
||||
" 13 5 1 0 0 0 0\n" \
|
||||
" 13 8 1 0 0 0 0\n" \
|
||||
"M END\n" \
|
||||
"$MOL\n" \
|
||||
"\n" \
|
||||
" Mrv1583 03171710022D \n" \
|
||||
"\n" \
|
||||
" 17 17 0 0 0 0 999 V2000\n" \
|
||||
" 4.1309 0.8250 0.0000 C 0 0 0 0 0 0 0 0 0 1 0 0\n" \
|
||||
" 4.8454 0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 2 0 0\n" \
|
||||
" 4.8454 -0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 3 0 0\n" \
|
||||
" 4.1309 -0.8250 0.0000 C 0 0 0 0 0 0 0 0 0 4 0 0\n" \
|
||||
" 3.4165 -0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 5 0 0\n" \
|
||||
" 3.4165 0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 6 0 0\n" \
|
||||
" 4.5012 1.5622 0.0000 H 0 0 0 0 0 0 0 0 0 13 0 0\n" \
|
||||
" 3.7607 1.5622 0.0000 H 0 0 0 0 0 0 0 0 0 7 0 0\n" \
|
||||
" 5.6690 0.4605 0.0000 H 0 0 0 0 0 0 0 0 0 15 0 0\n" \
|
||||
" 5.2987 1.1018 0.0000 H 0 0 0 0 0 0 0 0 0 17 0 0\n" \
|
||||
" 5.2987 -1.1018 0.0000 H 0 0 0 0 0 0 0 0 0 14 0 0\n" \
|
||||
" 3.7607 -1.5622 0.0000 H 0 0 0 0 0 0 0 0 0 16 0 0\n" \
|
||||
" 2.9631 -1.1018 0.0000 H 0 0 0 0 0 0 0 0 0 9 0 0\n" \
|
||||
" 2.5929 -0.4605 0.0000 H 0 0 0 0 0 0 0 0 0 18 0 0\n" \
|
||||
" 2.7020 0.8250 0.0000 H 0 0 0 0 0 0 0 0 0 8 0 0\n" \
|
||||
" 4.5012 -1.5622 0.0000 H 0 0 0 0 0 0 0 0 0 10 0 0\n" \
|
||||
" 5.6690 -0.4605 0.0000 H 0 0 0 0 0 0 0 0 0 11 0 0\n" \
|
||||
" 1 2 1 0 0 0 0\n" \
|
||||
" 6 1 1 0 0 0 0\n" \
|
||||
" 1 7 1 0 0 0 0\n" \
|
||||
" 1 8 1 0 0 0 0\n" \
|
||||
" 2 9 1 0 0 0 0\n" \
|
||||
" 2 10 1 0 0 0 0\n" \
|
||||
" 2 3 1 0 0 0 0\n" \
|
||||
" 3 4 1 0 0 0 0\n" \
|
||||
" 3 11 1 0 0 0 0\n" \
|
||||
" 3 17 1 0 0 0 0\n" \
|
||||
" 4 5 1 0 0 0 0\n" \
|
||||
" 4 12 1 0 0 0 0\n" \
|
||||
" 4 16 1 0 0 0 0\n" \
|
||||
" 5 6 1 0 0 0 0\n" \
|
||||
" 5 14 1 0 0 0 0\n" \
|
||||
" 5 13 1 0 0 0 0\n" \
|
||||
" 6 15 1 0 0 0 0\n" \
|
||||
"M END\n";
|
||||
|
||||
ChemicalReaction *rxn = RxnBlockToChemicalReaction(unmappedHs);
|
||||
TEST_ASSERT(rxn);
|
||||
rxn->initReactantMatchers();
|
||||
MOL_SPTR_VECT reacts1, hreacts1, reacts2, hreacts2;
|
||||
std::vector<MOL_SPTR_VECT> prods;
|
||||
|
||||
reacts1.push_back(ROMOL_SPTR(SmilesToMol("C1CCCCC1")));
|
||||
hreacts1.push_back(ROMOL_SPTR(MolOps::addHs(*reacts1[0].get())));
|
||||
|
||||
reacts2.push_back(ROMOL_SPTR(SmilesToMol("C1CCCCC1Cl")));
|
||||
hreacts2.push_back(ROMOL_SPTR(MolOps::addHs(*reacts2[0].get())));
|
||||
|
||||
// test with and without AddHs
|
||||
prods = rxn->runReactants(reacts1);
|
||||
TEST_ASSERT(prods.size() == 0);
|
||||
prods = rxn->runReactants(hreacts1);
|
||||
TEST_ASSERT(prods.size() == 768);
|
||||
|
||||
prods = rxn->runReactants(reacts2);
|
||||
TEST_ASSERT(prods.size() == 0);
|
||||
|
||||
prods = rxn->runReactants(hreacts2);
|
||||
TEST_ASSERT(prods.size() == 128);
|
||||
|
||||
// Test after sanitization (way fewer matches than with AddHs..)
|
||||
RxnOps::sanitizeRxn(*rxn);
|
||||
prods = rxn->runReactants(reacts1);
|
||||
TEST_ASSERT(prods.size() == 12);
|
||||
|
||||
prods = rxn->runReactants(reacts2);
|
||||
TEST_ASSERT(prods.size() == 4);
|
||||
|
||||
|
||||
delete rxn;
|
||||
|
||||
BOOST_LOG(rdInfoLog) << "Done" << std::endl;
|
||||
|
||||
}
|
||||
void test68MappedHToHeavy() {
|
||||
const std::string rxnblock = \
|
||||
"$RXN\n" \
|
||||
"\n" \
|
||||
" Marvin 031701171005\n" \
|
||||
"\n" \
|
||||
" 1 1\n" \
|
||||
"$MOL\n" \
|
||||
"\n" \
|
||||
" Mrv1583 03171710052D \n" \
|
||||
"\n" \
|
||||
" 3 2 0 0 0 0 999 V2000\n" \
|
||||
" -1.2721 -0.0116 0.0000 C 0 0 0 0 0 0 0 0 0 1 0 0\n" \
|
||||
" -1.9866 -0.4241 0.0000 C 0 0 0 0 0 0 0 0 0 2 0 0\n" \
|
||||
" -1.2721 0.8134 0.0000 H 0 0 0 0 0 0 0 0 0 3 0 0\n" \
|
||||
" 2 1 1 0 0 0 0\n" \
|
||||
" 1 3 1 0 0 0 0\n" \
|
||||
"M END\n" \
|
||||
"$MOL\n" \
|
||||
"\n" \
|
||||
" Mrv1583 03171710052D \n" \
|
||||
"\n" \
|
||||
" 3 2 0 0 0 0 999 V2000\n" \
|
||||
" 2.3886 -0.0563 0.0000 C 0 0 0 0 0 0 0 0 0 1 0 0\n" \
|
||||
" 1.6741 -0.4688 0.0000 C 0 0 0 0 0 0 0 0 0 2 0 0\n" \
|
||||
" 2.3886 0.7688 0.0000 C 0 0 0 0 0 0 0 0 0 3 0 0\n" \
|
||||
" 2 1 1 0 0 0 0\n" \
|
||||
" 1 3 1 0 0 0 0\n" \
|
||||
"M END\n";
|
||||
|
||||
ChemicalReaction *rxn = RxnBlockToChemicalReaction(rxnblock);
|
||||
TEST_ASSERT(rxn);
|
||||
rxn->initReactantMatchers();
|
||||
MOL_SPTR_VECT reacts1, hreacts1, reacts2, hreacts2;
|
||||
std::vector<MOL_SPTR_VECT> prods;
|
||||
|
||||
reacts1.push_back(ROMOL_SPTR(SmilesToMol("CC")));
|
||||
hreacts1.push_back(ROMOL_SPTR(MolOps::addHs(*reacts1[0].get())));
|
||||
|
||||
// test with and without AddHs
|
||||
prods = rxn->runReactants(reacts1);
|
||||
TEST_ASSERT(prods.size() == 0);
|
||||
|
||||
prods = rxn->runReactants(hreacts1);
|
||||
TEST_ASSERT(prods.size() == 6);
|
||||
|
||||
std::stringstream sstrm;
|
||||
rdWarningLog->SetTee(sstrm);
|
||||
RxnOps::sanitizeRxn(*rxn);
|
||||
std::string s = sstrm.str();
|
||||
std::cerr << s << std::endl;
|
||||
TEST_ASSERT(s.find("Reaction has explicit hydrogens, reactants will need explicit hydrogens (addHs)") != std::string::npos);
|
||||
rdWarningLog->ClearTee();
|
||||
|
||||
delete rxn;
|
||||
}
|
||||
|
||||
int main() {
|
||||
RDLog::InitLogs();
|
||||
|
||||
@@ -6062,6 +6467,11 @@ int main() {
|
||||
|
||||
test43Github243();
|
||||
test64Github1266();
|
||||
test65SanitizeUnmappedHs();
|
||||
test66SanitizeMappedHs();
|
||||
test67SanitizeMappedHsInReactantAndProd();
|
||||
test68MappedHToHeavy();
|
||||
|
||||
BOOST_LOG(rdInfoLog)
|
||||
<< "*******************************************************\n";
|
||||
return (0);
|
||||
|
||||
@@ -345,7 +345,11 @@ Vianello, Maciek Wojcikowski
|
||||
|
||||
## Deprecated code (to be removed in a future release):
|
||||
- rdkit.VLib python module
|
||||
|
||||
- SanitizeRxn parameters "ChemDrawRxnAdjustParams" has been renamed to
|
||||
"MatchOnlyAtRgroupsAdjustParams". These settings did not reflect
|
||||
how integrations with SciQuest or the Perkin Elmer ELN behaved and
|
||||
were confusing to users (especially since they were not explicit)
|
||||
|
||||
## Removed code:
|
||||
|
||||
## Contrib updates:
|
||||
|
||||
Reference in New Issue
Block a user