Files
rdkit/Code/GraphMol/FileParsers/SDMolSupplier.cpp
Ric a6b26253ff Fix (most of) mem problems (#2123)
* do not use new on loggers

* del pointers in testDistGeom

* Update Dict hasNonPOD status on bulk update

* delete new Dicts in memtest1.cpp

* fixes in MolSuppliers and testFMCS

* PeriodicTable singleton as unique_ptr

* fix EEM_arrays leak

* fix leaks in testPBF

* fix ParamCollection leak in test UFF

* fix leaks in MMFF

* clear prop dict before read in in pickler

* fix leaks in testFreeSASA

* fix leaks in test3D

* modernize Dict.h & SmilesParse.cpp

* fix leaks in testQuery

* fix leaks in testCrystalFF

* fix leaks in cxsmilesTest

* fix leaks in Catalog & mol cat test

* fix leaks in ShapeUtils & tests

* fix leaks in testSubgraphs1

* fix leaks testFingerprintGenerators

* fix leaks in Catalog/FilterCatalog

* fix leaks in graphmolqueryTest

* these changes reduce bison parse leaks

* fixed leaks in testChirality.cpp

* fix leaks + 2 tests in testMolWriter

* fix 4m leaks in substructLibraryTest

* small improvements to molTautomerTest; still leaks

* fix leaks in testRGroupDecomp

* fix leaks in test; parser still leaks

* fix leaks in itertest

* fix 4m leaks in testDepictor

* fixes in smatest; still leaking due to parser

* fixes in testSLNParse; still leaking due to parser

* flex/bison: always add atoms with ownership; smarts error cleanup

* fix leaks in testReaction

* fix leaks in testSubstructMatch

* fix leaks in resMolSupplierTest

* fix leaks in testChemTransforms + bug in ChemTransforms

* fix leaks in testPickler

* fix leaks in testMolTransform

* fix leaks in testFragCatalog

* fix leak in testSLNParse. Still leaks due to Smiles

* fixed most leaks in testMolSupplier

* pre bison fix

* fix some atom & bond parse problems; others still fail

* bison smiles & smarts, atoms & bonds more or less fixed

* fix leaks in molopstest.cpp

* fix leaks in testFingerprints, MACCS.cpp & AtomPairs.cpp

* fix leaks in moldraw2Dtest1

* fix leaks in testDescriptors

* fix leaks in testInchi

* fix leaks in testUFFForceFieldHelpers

* fix leaks in hanoiTest & new_canon.h

* fix leaks in testMMFFForceField

* fix leaks in graphmolTest1

* fix leaks in testMMFFForceFieldHelpers

* fix leaks in testDistGeomHelpers

* fix leaks in testMolAlign

* initialize occupancy & temp facto with default values

* fix leak in TautomerTransform

* updated suppressions

* fix testStructChecker

* fix logging & py tests

* fix TautomerTransform class/struct issue

* remove misplaced delete in testSLNParse

* deinit in testAvalonLib1

* fix Avalon-triggered(?) bug in StructChecker/Pattern.cpp

* fix random testMolWriter/Supplier fails

- diversify output file names to avoid clashing.
- unify Writers close/destruct behavior.
- flushing/closing in tests.

* use reset in FFs Params.cpp

* comments on testMMFFForceField

* unrequired 'if's added to mol suppliers

* correct cast in FilterCatalog.h

* use unique_ptr in MACCS Patterns

* remove unrequred if in new_canon

* update & move suppressions
2018-10-29 14:33:26 +00:00

298 lines
8.4 KiB
C++

// $Id$
//
// Copyright (C) 2002-2012 Greg Landrum and Rational Discovery LLC
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//
#include <RDGeneral/FileParseException.h>
#include <RDGeneral/BadFileException.h>
#include <RDGeneral/StreamOps.h>
#include <RDGeneral/RDLog.h>
#include <GraphMol/SanitException.h>
#include <boost/algorithm/string.hpp>
#include "MolSupplier.h"
#include "FileParsers.h"
#include <fstream>
#include <iostream>
#include <sstream>
#include <string>
namespace RDKit {
SDMolSupplier::SDMolSupplier(const std::string &fileName, bool sanitize,
bool removeHs, bool strictParsing) {
init();
// FIX: this binary mode of opening file is here because of a bug in VC++ 6.0
// the function "tellg" does not work correctly if we do not open it this way
// Jan 2009: Confirmed that this is still the case in visual studio 2008
std::istream *tmpStream = nullptr;
tmpStream = static_cast<std::istream *>(
new std::ifstream(fileName.c_str(), std::ios_base::binary));
if (!tmpStream || (!(*tmpStream)) || (tmpStream->bad())) {
std::ostringstream errout;
errout << "Bad input file " << fileName;
if (tmpStream) { delete tmpStream; }
throw BadFileException(errout.str());
}
// dp_inStream = static_cast<std::istream *>(tmpStream);
dp_inStream = tmpStream;
df_owner = true;
d_molpos.push_back(dp_inStream->tellg());
df_sanitize = sanitize;
df_removeHs = removeHs;
df_strictParsing = strictParsing;
this->checkForEnd();
if (df_end) {
// checkForEnd() sets d_len if we're at EOF. undo that (was GitHub issue
// 19):
d_len = 0;
}
POSTCONDITION(dp_inStream, "bad instream");
}
SDMolSupplier::SDMolSupplier(std::istream *inStream, bool takeOwnership,
bool sanitize, bool removeHs, bool strictParsing) {
PRECONDITION(inStream, "bad stream");
init();
dp_inStream = inStream;
df_owner = takeOwnership;
d_molpos.push_back(dp_inStream->tellg());
df_sanitize = sanitize;
df_removeHs = removeHs;
df_strictParsing = strictParsing;
this->checkForEnd();
if (df_end) {
// checkForEnd() sets d_len if we're at EOF. undo that (was GitHub issue
// 19):
d_len = 0;
}
POSTCONDITION(dp_inStream, "bad instream");
}
void SDMolSupplier::init() {
ForwardSDMolSupplier::init();
d_len = -1;
d_last = 0;
}
void SDMolSupplier::setDataCommon(const std::string &text, bool sanitize,
bool removeHs) {
if (dp_inStream && df_owner) delete dp_inStream;
init();
std::istream *tmpStream = nullptr;
tmpStream = static_cast<std::istream *>(
new std::istringstream(text, std::ios_base::binary));
dp_inStream = tmpStream;
df_owner = true;
d_molpos.push_back(dp_inStream->tellg());
df_sanitize = sanitize;
df_removeHs = removeHs;
this->checkForEnd();
if (df_end) {
// checkForEnd() sets d_len if we're at EOF. undo that (was GitHub issue
// 19):
d_len = 0;
}
POSTCONDITION(dp_inStream, "bad instream");
}
void SDMolSupplier::setData(const std::string &text, bool sanitize,
bool removeHs) {
df_strictParsing = true;
setDataCommon(text, sanitize, removeHs);
}
void SDMolSupplier::setData(const std::string &text, bool sanitize,
bool removeHs, bool strictParsing) {
df_strictParsing = strictParsing;
setDataCommon(text, sanitize, removeHs);
}
void SDMolSupplier::checkForEnd() {
PRECONDITION(dp_inStream, "no stream");
// we will call it end of file if we have more than 4 contiguous empty lines
// or we reach end of file in the meantime
if (dp_inStream->eof()) {
df_end = true;
d_len = rdcast<int>(d_molpos.size());
return;
}
// we are not at the end of file, check for blank lines
unsigned int nempty = 0;
std::string tempStr, stmp;
for (unsigned int i = 0; i < 4; i++) {
tempStr = getLine(dp_inStream);
if (dp_inStream->eof()) {
df_end = true;
d_len = rdcast<int>(d_molpos.size());
return;
}
if (tempStr.find_first_not_of(" \t\r\n") == std::string::npos) {
++nempty;
}
}
if (nempty == 4) {
df_end = true;
d_len = rdcast<int>(d_molpos.size());
}
}
void SDMolSupplier::reset() {
PRECONDITION(dp_inStream, "no stream");
dp_inStream->clear();
dp_inStream->seekg(0, std::ios::beg);
df_end = false;
d_last = 0;
d_line = 0;
}
ROMol *SDMolSupplier::next() {
PRECONDITION(dp_inStream, "no stream");
if (df_end && d_last >= d_len) {
throw FileParseException("EOF hit.");
}
// set the stream to the current position
dp_inStream->seekg(d_molpos[d_last]);
std::string tempStr;
ROMol *res = nullptr;
// finally if we reached the end of the file set end to be true
if (dp_inStream->eof()) {
// FIX: we should probably be throwing an exception here
df_end = true;
d_len = rdcast<int>(d_molpos.size());
return res;
}
res = _next();
++d_last;
std::streampos posHold = dp_inStream->tellg();
this->checkForEnd();
if (!this->df_end && d_last >= static_cast<int>(d_molpos.size())) {
d_molpos.push_back(posHold);
}
return res;
}
std::string SDMolSupplier::getItemText(unsigned int idx) {
PRECONDITION(dp_inStream, "no stream");
unsigned int holder = d_last;
moveTo(idx);
std::streampos begP = d_molpos[idx];
std::streampos endP;
try {
moveTo(idx + 1);
endP = d_molpos[idx + 1];
} catch (FileParseException &) {
dp_inStream->clear();
dp_inStream->seekg(0, std::ios_base::end);
endP = dp_inStream->tellg();
}
d_last = holder;
auto *buff = new char[endP - begP];
dp_inStream->seekg(begP);
dp_inStream->read(buff, endP - begP);
std::string res(buff, endP - begP);
delete[] buff;
return res;
}
void SDMolSupplier::moveTo(unsigned int idx) {
PRECONDITION(dp_inStream, "no stream");
// dp_inStream->seekg() is called for all idx values
// and earlier calls to next() may have put the stream into a bad state
dp_inStream->clear();
// move until we hit the desired idx
if (idx < d_molpos.size()) {
dp_inStream->seekg(d_molpos[idx]);
d_last = idx;
} else {
std::string tempStr;
dp_inStream->seekg(d_molpos.back());
d_last = rdcast<int>(d_molpos.size()) - 1;
while ((d_last < static_cast<int>(idx)) && (!dp_inStream->eof())) {
d_line++;
tempStr = getLine(dp_inStream);
if (tempStr[0] == '$' && tempStr.substr(0, 4) == "$$$$") {
std::streampos posHold = dp_inStream->tellg();
this->checkForEnd();
if (!this->df_end) {
d_molpos.push_back(posHold);
d_last++;
}
}
}
// if we reached end of file without reaching "idx" we have an index error
if (dp_inStream->eof()) {
d_len = rdcast<int>(d_molpos.size());
std::ostringstream errout;
errout << "ERROR: Index error (idx = " << idx << ") : "
<< " we do no have enough mol blocks";
throw FileParseException(errout.str());
}
}
}
ROMol *SDMolSupplier::operator[](unsigned int idx) {
PRECONDITION(dp_inStream, "no stream");
// get the molecule with index idx
moveTo(idx);
return next();
}
unsigned int SDMolSupplier::length() {
PRECONDITION(dp_inStream, "no stream");
// return the number of mol blocks in the sdfile
if (d_len > 0 || (df_end && d_len == 0)) {
return d_len;
} else {
std::string tempStr;
d_len = rdcast<int>(d_molpos.size());
dp_inStream->seekg(d_molpos.back());
while (!dp_inStream->eof()) {
std::getline(*dp_inStream, tempStr);
if (tempStr.length() >= 4 && tempStr[0] == '$' && tempStr[1] == '$' &&
tempStr[2] == '$' && tempStr[3] == '$') {
std::streampos posHold = dp_inStream->tellg();
// don't worry about the last molecule:
this->checkForEnd();
if (!this->df_end) {
d_molpos.push_back(posHold);
++d_len;
}
}
}
// now remember to set the stream to the last postion we want to read
dp_inStream->clear();
dp_inStream->seekg(d_molpos[d_last]);
return d_len;
}
}
bool SDMolSupplier::atEnd() {
PRECONDITION(dp_inStream, "no stream");
return df_end;
}
void SDMolSupplier::setStreamIndices(const std::vector<std::streampos> &locs) {
d_molpos.clear();
d_molpos.resize(locs.size());
std::copy(locs.begin(), locs.end(), d_molpos.begin());
this->reset();
d_len = rdcast<int>(d_molpos.size());
}
}