Files
rdkit/Code/GraphMol/ChemReactions/MDLParser.cpp
Ric a6b26253ff Fix (most of) mem problems (#2123)
* do not use new on loggers

* del pointers in testDistGeom

* Update Dict hasNonPOD status on bulk update

* delete new Dicts in memtest1.cpp

* fixes in MolSuppliers and testFMCS

* PeriodicTable singleton as unique_ptr

* fix EEM_arrays leak

* fix leaks in testPBF

* fix ParamCollection leak in test UFF

* fix leaks in MMFF

* clear prop dict before read in in pickler

* fix leaks in testFreeSASA

* fix leaks in test3D

* modernize Dict.h & SmilesParse.cpp

* fix leaks in testQuery

* fix leaks in testCrystalFF

* fix leaks in cxsmilesTest

* fix leaks in Catalog & mol cat test

* fix leaks in ShapeUtils & tests

* fix leaks in testSubgraphs1

* fix leaks testFingerprintGenerators

* fix leaks in Catalog/FilterCatalog

* fix leaks in graphmolqueryTest

* these changes reduce bison parse leaks

* fixed leaks in testChirality.cpp

* fix leaks + 2 tests in testMolWriter

* fix 4m leaks in substructLibraryTest

* small improvements to molTautomerTest; still leaks

* fix leaks in testRGroupDecomp

* fix leaks in test; parser still leaks

* fix leaks in itertest

* fix 4m leaks in testDepictor

* fixes in smatest; still leaking due to parser

* fixes in testSLNParse; still leaking due to parser

* flex/bison: always add atoms with ownership; smarts error cleanup

* fix leaks in testReaction

* fix leaks in testSubstructMatch

* fix leaks in resMolSupplierTest

* fix leaks in testChemTransforms + bug in ChemTransforms

* fix leaks in testPickler

* fix leaks in testMolTransform

* fix leaks in testFragCatalog

* fix leak in testSLNParse. Still leaks due to Smiles

* fixed most leaks in testMolSupplier

* pre bison fix

* fix some atom & bond parse problems; others still fail

* bison smiles & smarts, atoms & bonds more or less fixed

* fix leaks in molopstest.cpp

* fix leaks in testFingerprints, MACCS.cpp & AtomPairs.cpp

* fix leaks in moldraw2Dtest1

* fix leaks in testDescriptors

* fix leaks in testInchi

* fix leaks in testUFFForceFieldHelpers

* fix leaks in hanoiTest & new_canon.h

* fix leaks in testMMFFForceField

* fix leaks in graphmolTest1

* fix leaks in testMMFFForceFieldHelpers

* fix leaks in testDistGeomHelpers

* fix leaks in testMolAlign

* initialize occupancy & temp facto with default values

* fix leak in TautomerTransform

* updated suppressions

* fix testStructChecker

* fix logging & py tests

* fix TautomerTransform class/struct issue

* remove misplaced delete in testSLNParse

* deinit in testAvalonLib1

* fix Avalon-triggered(?) bug in StructChecker/Pattern.cpp

* fix random testMolWriter/Supplier fails

- diversify output file names to avoid clashing.
- unify Writers close/destruct behavior.
- flushing/closing in tests.

* use reset in FFs Params.cpp

* comments on testMMFFForceField

* unrequired 'if's added to mol suppliers

* correct cast in FilterCatalog.h

* use unique_ptr in MACCS Patterns

* remove unrequred if in new_canon

* update & move suppressions
2018-10-29 14:33:26 +00:00

379 lines
13 KiB
C++

// $Id$
//
// Copyright (c) 2007-2014, Novartis Institutes for BioMedical Research Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Novartis Institutes for BioMedical Research Inc.
// nor the names of its contributors may be used to endorse or promote
// products derived from this software without specific prior written
// permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
#include <GraphMol/ChemReactions/Reaction.h>
#include <GraphMol/ChemReactions/ReactionParser.h>
#include <GraphMol/FileParsers/FileParsers.h>
#include <GraphMol/FileParsers/FileParserUtils.h>
#include <sstream>
#include <RDGeneral/StreamOps.h>
#include <RDGeneral/FileParseException.h>
#include <fstream>
#include <RDGeneral/BoostStartInclude.h>
#include <boost/lexical_cast.hpp>
#include <boost/algorithm/string/trim.hpp>
#include <boost/tokenizer.hpp>
#include <RDGeneral/BoostEndInclude.h>
#include "ReactionUtils.h"
namespace RDKit {
namespace {
void ParseV2000RxnBlock(std::istream &inStream, unsigned int &line,
ChemicalReaction *&rxn) {
std::string tempStr;
// FIX: parse name and comment fields
line++;
tempStr = getLine(inStream);
line++;
tempStr = getLine(inStream);
line++;
tempStr = getLine(inStream);
line++;
tempStr = getLine(inStream);
if (inStream.eof()) {
throw ChemicalReactionParserException("premature EOF hit.");
}
unsigned int nReacts = 0, nProds = 0, nAgents = 0;
unsigned int spos = 0;
if (tempStr.size() < 6) {
throw ChemicalReactionParserException("rxn counts line is too short");
}
rxn = new ChemicalReaction();
try {
nReacts =
FileParserUtils::stripSpacesAndCast<unsigned int>(tempStr.substr(0, 3));
spos = 3;
nProds = FileParserUtils::stripSpacesAndCast<unsigned int>(
tempStr.substr(spos, 3));
spos = 6;
;
if (tempStr.size() > 6) {
std::string trimmed = boost::trim_copy(tempStr.substr(spos, 3));
if (trimmed.size() > 0) {
nAgents = FileParserUtils::stripSpacesAndCast<unsigned int>(
tempStr.substr(spos, 3));
spos = 9;
}
}
} catch (boost::bad_lexical_cast &) {
std::ostringstream errout;
errout << "Cannot convert " << tempStr.substr(spos, 3) << " to int";
throw ChemicalReactionParserException(errout.str());
}
for (unsigned int i = 0; i < nReacts; ++i) {
line++;
tempStr = getLine(inStream);
if (inStream.eof()) {
throw ChemicalReactionParserException("premature EOF hit.");
}
if (tempStr.substr(0, 4) != "$MOL") {
throw ChemicalReactionParserException("$MOL header not found");
}
ROMol *react;
try {
react = MolDataStreamToMol(inStream, line, false);
} catch (FileParseException &e) {
std::ostringstream errout;
errout << "Cannot parse reactant " << i << ". The error was:\n\t"
<< e.message();
throw ChemicalReactionParserException(errout.str());
}
if (!react) {
throw ChemicalReactionParserException("Null reactant in reaction file.");
}
rxn->addReactantTemplate(ROMOL_SPTR(react));
}
for (unsigned int i = 0; i < nProds; ++i) {
line++;
tempStr = getLine(inStream);
if (inStream.eof()) {
throw ChemicalReactionParserException("premature EOF hit.");
}
if (tempStr.substr(0, 4) != "$MOL") {
throw ChemicalReactionParserException("$MOL header not found");
}
ROMol *prod;
try {
prod = MolDataStreamToMol(inStream, line, false);
} catch (FileParseException &e) {
std::ostringstream errout;
errout << "Cannot parse product " << i << ". The error was:\n\t"
<< e.message();
throw ChemicalReactionParserException(errout.str());
}
if (!prod) {
throw ChemicalReactionParserException("Null product in reaction file.");
}
rxn->addProductTemplate(ROMOL_SPTR(prod));
}
for (unsigned int i = 0; i < nAgents; ++i) {
line++;
tempStr = getLine(inStream);
if (inStream.eof()) {
throw ChemicalReactionParserException("premature EOF hit.");
}
if (tempStr.substr(0, 4) != "$MOL") {
throw ChemicalReactionParserException("$MOL header not found");
}
ROMol *agent;
try {
agent = MolDataStreamToMol(inStream, line, false);
} catch (FileParseException &e) {
std::ostringstream errout;
errout << "Cannot parse agent " << i << ". The error was:\n\t"
<< e.message();
throw ChemicalReactionParserException(errout.str());
}
rxn->addAgentTemplate(ROMOL_SPTR(agent));
}
}
void ParseV3000RxnBlock(std::istream &inStream, unsigned int &line,
ChemicalReaction *&rxn) {
std::string tempStr;
// skip the header block:
line++;
tempStr = getLine(inStream);
line++;
tempStr = getLine(inStream);
line++;
tempStr = getLine(inStream);
line++;
tempStr = FileParserUtils::getV3000Line(&inStream, line);
boost::to_upper(tempStr);
tempStr = boost::trim_copy(tempStr);
std::vector<std::string> tokens;
boost::split(tokens, tempStr, boost::is_any_of(" \t"),
boost::token_compress_on);
if (tokens.size() < 3 || tokens[0] != "COUNTS") {
throw ChemicalReactionParserException("bad counts line");
}
unsigned int nReacts =
FileParserUtils::stripSpacesAndCast<unsigned int>(tokens[1]);
unsigned int nProds =
FileParserUtils::stripSpacesAndCast<unsigned int>(tokens[2]);
unsigned int nAgents = 0;
if (tokens.size() > 3) {
nAgents = FileParserUtils::stripSpacesAndCast<unsigned int>(tokens[3]);
}
tempStr = FileParserUtils::getV3000Line(&inStream, line);
boost::to_upper(tempStr);
if (tempStr.length() < 14 || tempStr.substr(0, 14) != "BEGIN REACTANT") {
throw FileParseException("BEGIN REACTANT line not found");
}
rxn = new ChemicalReaction();
for (unsigned int i = 0; i < nReacts; ++i) {
RWMol *react;
unsigned int natoms, nbonds;
bool chiralityPossible;
Conformer *conf = nullptr;
react = new RWMol();
try {
FileParserUtils::ParseV3000CTAB(&inStream, line, react, conf,
chiralityPossible, natoms, nbonds, true,
false);
} catch (FileParseException &e) {
std::ostringstream errout;
errout << "Cannot parse reactant " << i << ". The error was:\n\t"
<< e.message();
delete react;
throw ChemicalReactionParserException(errout.str());
}
if (!react) {
throw ChemicalReactionParserException("Null reactant in reaction file.");
}
rxn->addReactantTemplate(ROMOL_SPTR(dynamic_cast<ROMol *>(react)));
}
tempStr = FileParserUtils::getV3000Line(&inStream, line);
boost::to_upper(tempStr);
if (tempStr.length() < 12 || tempStr.substr(0, 12) != "END REACTANT") {
delete rxn;
throw FileParseException("END REACTANT line not found");
}
tempStr = FileParserUtils::getV3000Line(&inStream, line);
boost::to_upper(tempStr);
if (tempStr.length() < 13 || tempStr.substr(0, 13) != "BEGIN PRODUCT") {
delete rxn;
throw FileParseException("BEGIN PRODUCT line not found");
}
for (unsigned int i = 0; i < nProds; ++i) {
RWMol *prod;
unsigned int natoms, nbonds;
bool chiralityPossible;
Conformer *conf = nullptr;
prod = new RWMol();
try {
FileParserUtils::ParseV3000CTAB(&inStream, line, prod, conf,
chiralityPossible, natoms, nbonds, true,
false);
} catch (FileParseException &e) {
std::ostringstream errout;
errout << "Cannot parse product " << i << ". The error was:\n\t"
<< e.message();
delete prod;
throw ChemicalReactionParserException(errout.str());
}
if (!prod) {
throw ChemicalReactionParserException("Null product in reaction file.");
}
rxn->addProductTemplate(ROMOL_SPTR(dynamic_cast<ROMol *>(prod)));
}
tempStr = FileParserUtils::getV3000Line(&inStream, line);
boost::to_upper(tempStr);
if (tempStr.length() < 11 || tempStr.substr(0, 11) != "END PRODUCT") {
delete rxn;
throw FileParseException("END PRODUCT line not found");
}
if (nAgents) {
tempStr = FileParserUtils::getV3000Line(&inStream, line);
boost::to_upper(tempStr);
if (tempStr.length() < 14 || tempStr.substr(0, 14) != "BEGIN AGENT") {
delete rxn;
throw FileParseException("BEGIN AGENT line not found");
}
}
for (unsigned int i = 0; i < nAgents; ++i) {
RWMol *agent;
unsigned int natoms, nbonds;
bool chiralityPossible;
Conformer *conf = nullptr;
agent = new RWMol();
try {
FileParserUtils::ParseV3000CTAB(&inStream, line, agent, conf,
chiralityPossible, natoms, nbonds, true,
false);
} catch (FileParseException &e) {
std::ostringstream errout;
errout << "Cannot parse agent " << i << ". The error was:\n\t"
<< e.message();
delete agent;
throw ChemicalReactionParserException(errout.str());
}
rxn->addAgentTemplate(ROMOL_SPTR(dynamic_cast<ROMol *>(agent)));
}
if (nAgents) {
tempStr = FileParserUtils::getV3000Line(&inStream, line);
boost::to_upper(tempStr);
if (tempStr.length() < 12 || tempStr.substr(0, 12) != "END AGENT") {
delete rxn;
throw FileParseException("END AGENT line not found");
}
}
}
} // end of local namespace
//! Parse a text stream in MDL rxn format into a ChemicalReaction
ChemicalReaction *RxnDataStreamToChemicalReaction(std::istream &inStream,
unsigned int &line) {
std::string tempStr;
// header line
line++;
tempStr = getLine(inStream);
if (inStream.eof()) {
throw ChemicalReactionParserException("premature EOF hit.");
}
if (tempStr.substr(0, 4) != "$RXN") {
throw ChemicalReactionParserException("$RXN header not found");
}
int version = 2000;
if (tempStr.size() >= 10 && tempStr.substr(5, 5) == "V3000") version = 3000;
ChemicalReaction *res = nullptr;
try {
if (version == 2000) {
ParseV2000RxnBlock(inStream, line, res);
} else {
ParseV3000RxnBlock(inStream, line, res);
}
} catch (ChemicalReactionParserException &e) {
// catch our exceptions and throw them back after cleanup
delete res;
res = nullptr;
throw e;
}
// convert atoms to queries:
for (MOL_SPTR_VECT::const_iterator iter = res->beginReactantTemplates();
iter != res->endReactantTemplates(); ++iter) {
// to write the mol block, we need ring information:
for (ROMol::AtomIterator atomIt = (*iter)->beginAtoms();
atomIt != (*iter)->endAtoms(); ++atomIt) {
FileParserUtils::replaceAtomWithQueryAtom((RWMol *)iter->get(),
(*atomIt));
}
}
for (MOL_SPTR_VECT::const_iterator iter = res->beginProductTemplates();
iter != res->endProductTemplates(); ++iter) {
// to write the mol block, we need ring information:
for (ROMol::AtomIterator atomIt = (*iter)->beginAtoms();
atomIt != (*iter)->endAtoms(); ++atomIt) {
FileParserUtils::replaceAtomWithQueryAtom((RWMol *)iter->get(),
(*atomIt));
}
}
updateProductsStereochem(res);
// RXN-based reactions do not have implicit properties
res->setImplicitPropertiesFlag(false);
return res;
};
ChemicalReaction *RxnBlockToChemicalReaction(const std::string &rxnBlock) {
std::istringstream inStream(rxnBlock);
unsigned int line = 0;
return RxnDataStreamToChemicalReaction(inStream, line);
};
ChemicalReaction *RxnFileToChemicalReaction(const std::string &fName) {
std::ifstream inStream(fName.c_str());
if (!inStream) {
return nullptr;
}
ChemicalReaction *res = nullptr;
if (!inStream.eof()) {
unsigned int line = 0;
res = RxnDataStreamToChemicalReaction(inStream, line);
}
return res;
};
}