mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-03 21:44:30 +08:00
* do not use new on loggers * del pointers in testDistGeom * Update Dict hasNonPOD status on bulk update * delete new Dicts in memtest1.cpp * fixes in MolSuppliers and testFMCS * PeriodicTable singleton as unique_ptr * fix EEM_arrays leak * fix leaks in testPBF * fix ParamCollection leak in test UFF * fix leaks in MMFF * clear prop dict before read in in pickler * fix leaks in testFreeSASA * fix leaks in test3D * modernize Dict.h & SmilesParse.cpp * fix leaks in testQuery * fix leaks in testCrystalFF * fix leaks in cxsmilesTest * fix leaks in Catalog & mol cat test * fix leaks in ShapeUtils & tests * fix leaks in testSubgraphs1 * fix leaks testFingerprintGenerators * fix leaks in Catalog/FilterCatalog * fix leaks in graphmolqueryTest * these changes reduce bison parse leaks * fixed leaks in testChirality.cpp * fix leaks + 2 tests in testMolWriter * fix 4m leaks in substructLibraryTest * small improvements to molTautomerTest; still leaks * fix leaks in testRGroupDecomp * fix leaks in test; parser still leaks * fix leaks in itertest * fix 4m leaks in testDepictor * fixes in smatest; still leaking due to parser * fixes in testSLNParse; still leaking due to parser * flex/bison: always add atoms with ownership; smarts error cleanup * fix leaks in testReaction * fix leaks in testSubstructMatch * fix leaks in resMolSupplierTest * fix leaks in testChemTransforms + bug in ChemTransforms * fix leaks in testPickler * fix leaks in testMolTransform * fix leaks in testFragCatalog * fix leak in testSLNParse. Still leaks due to Smiles * fixed most leaks in testMolSupplier * pre bison fix * fix some atom & bond parse problems; others still fail * bison smiles & smarts, atoms & bonds more or less fixed * fix leaks in molopstest.cpp * fix leaks in testFingerprints, MACCS.cpp & AtomPairs.cpp * fix leaks in moldraw2Dtest1 * fix leaks in testDescriptors * fix leaks in testInchi * fix leaks in testUFFForceFieldHelpers * fix leaks in hanoiTest & new_canon.h * fix leaks in testMMFFForceField * fix leaks in graphmolTest1 * fix leaks in testMMFFForceFieldHelpers * fix leaks in testDistGeomHelpers * fix leaks in testMolAlign * initialize occupancy & temp facto with default values * fix leak in TautomerTransform * updated suppressions * fix testStructChecker * fix logging & py tests * fix TautomerTransform class/struct issue * remove misplaced delete in testSLNParse * deinit in testAvalonLib1 * fix Avalon-triggered(?) bug in StructChecker/Pattern.cpp * fix random testMolWriter/Supplier fails - diversify output file names to avoid clashing. - unify Writers close/destruct behavior. - flushing/closing in tests. * use reset in FFs Params.cpp * comments on testMMFFForceField * unrequired 'if's added to mol suppliers * correct cast in FilterCatalog.h * use unique_ptr in MACCS Patterns * remove unrequred if in new_canon * update & move suppressions
298 lines
8.4 KiB
C++
298 lines
8.4 KiB
C++
// $Id$
|
|
//
|
|
// Copyright (C) 2002-2012 Greg Landrum and Rational Discovery LLC
|
|
//
|
|
// @@ All Rights Reserved @@
|
|
// This file is part of the RDKit.
|
|
// The contents are covered by the terms of the BSD license
|
|
// which is included in the file license.txt, found at the root
|
|
// of the RDKit source tree.
|
|
//
|
|
#include <RDGeneral/FileParseException.h>
|
|
#include <RDGeneral/BadFileException.h>
|
|
#include <RDGeneral/StreamOps.h>
|
|
#include <RDGeneral/RDLog.h>
|
|
#include <GraphMol/SanitException.h>
|
|
|
|
#include <boost/algorithm/string.hpp>
|
|
#include "MolSupplier.h"
|
|
#include "FileParsers.h"
|
|
|
|
#include <fstream>
|
|
#include <iostream>
|
|
#include <sstream>
|
|
#include <string>
|
|
|
|
namespace RDKit {
|
|
|
|
SDMolSupplier::SDMolSupplier(const std::string &fileName, bool sanitize,
|
|
bool removeHs, bool strictParsing) {
|
|
init();
|
|
// FIX: this binary mode of opening file is here because of a bug in VC++ 6.0
|
|
// the function "tellg" does not work correctly if we do not open it this way
|
|
// Jan 2009: Confirmed that this is still the case in visual studio 2008
|
|
std::istream *tmpStream = nullptr;
|
|
tmpStream = static_cast<std::istream *>(
|
|
new std::ifstream(fileName.c_str(), std::ios_base::binary));
|
|
if (!tmpStream || (!(*tmpStream)) || (tmpStream->bad())) {
|
|
std::ostringstream errout;
|
|
errout << "Bad input file " << fileName;
|
|
if (tmpStream) { delete tmpStream; }
|
|
throw BadFileException(errout.str());
|
|
}
|
|
|
|
// dp_inStream = static_cast<std::istream *>(tmpStream);
|
|
dp_inStream = tmpStream;
|
|
df_owner = true;
|
|
d_molpos.push_back(dp_inStream->tellg());
|
|
df_sanitize = sanitize;
|
|
df_removeHs = removeHs;
|
|
df_strictParsing = strictParsing;
|
|
this->checkForEnd();
|
|
if (df_end) {
|
|
// checkForEnd() sets d_len if we're at EOF. undo that (was GitHub issue
|
|
// 19):
|
|
d_len = 0;
|
|
}
|
|
POSTCONDITION(dp_inStream, "bad instream");
|
|
}
|
|
|
|
SDMolSupplier::SDMolSupplier(std::istream *inStream, bool takeOwnership,
|
|
bool sanitize, bool removeHs, bool strictParsing) {
|
|
PRECONDITION(inStream, "bad stream");
|
|
init();
|
|
dp_inStream = inStream;
|
|
df_owner = takeOwnership;
|
|
d_molpos.push_back(dp_inStream->tellg());
|
|
df_sanitize = sanitize;
|
|
df_removeHs = removeHs;
|
|
df_strictParsing = strictParsing;
|
|
this->checkForEnd();
|
|
if (df_end) {
|
|
// checkForEnd() sets d_len if we're at EOF. undo that (was GitHub issue
|
|
// 19):
|
|
d_len = 0;
|
|
}
|
|
POSTCONDITION(dp_inStream, "bad instream");
|
|
}
|
|
|
|
void SDMolSupplier::init() {
|
|
ForwardSDMolSupplier::init();
|
|
d_len = -1;
|
|
d_last = 0;
|
|
}
|
|
|
|
void SDMolSupplier::setDataCommon(const std::string &text, bool sanitize,
|
|
bool removeHs) {
|
|
if (dp_inStream && df_owner) delete dp_inStream;
|
|
init();
|
|
std::istream *tmpStream = nullptr;
|
|
tmpStream = static_cast<std::istream *>(
|
|
new std::istringstream(text, std::ios_base::binary));
|
|
dp_inStream = tmpStream;
|
|
df_owner = true;
|
|
d_molpos.push_back(dp_inStream->tellg());
|
|
df_sanitize = sanitize;
|
|
df_removeHs = removeHs;
|
|
this->checkForEnd();
|
|
if (df_end) {
|
|
// checkForEnd() sets d_len if we're at EOF. undo that (was GitHub issue
|
|
// 19):
|
|
d_len = 0;
|
|
}
|
|
POSTCONDITION(dp_inStream, "bad instream");
|
|
}
|
|
|
|
void SDMolSupplier::setData(const std::string &text, bool sanitize,
|
|
bool removeHs) {
|
|
df_strictParsing = true;
|
|
setDataCommon(text, sanitize, removeHs);
|
|
}
|
|
|
|
void SDMolSupplier::setData(const std::string &text, bool sanitize,
|
|
bool removeHs, bool strictParsing) {
|
|
df_strictParsing = strictParsing;
|
|
setDataCommon(text, sanitize, removeHs);
|
|
}
|
|
|
|
void SDMolSupplier::checkForEnd() {
|
|
PRECONDITION(dp_inStream, "no stream");
|
|
// we will call it end of file if we have more than 4 contiguous empty lines
|
|
// or we reach end of file in the meantime
|
|
if (dp_inStream->eof()) {
|
|
df_end = true;
|
|
d_len = rdcast<int>(d_molpos.size());
|
|
return;
|
|
}
|
|
// we are not at the end of file, check for blank lines
|
|
unsigned int nempty = 0;
|
|
std::string tempStr, stmp;
|
|
for (unsigned int i = 0; i < 4; i++) {
|
|
tempStr = getLine(dp_inStream);
|
|
if (dp_inStream->eof()) {
|
|
df_end = true;
|
|
d_len = rdcast<int>(d_molpos.size());
|
|
return;
|
|
}
|
|
if (tempStr.find_first_not_of(" \t\r\n") == std::string::npos) {
|
|
++nempty;
|
|
}
|
|
}
|
|
if (nempty == 4) {
|
|
df_end = true;
|
|
d_len = rdcast<int>(d_molpos.size());
|
|
}
|
|
}
|
|
|
|
void SDMolSupplier::reset() {
|
|
PRECONDITION(dp_inStream, "no stream");
|
|
dp_inStream->clear();
|
|
dp_inStream->seekg(0, std::ios::beg);
|
|
df_end = false;
|
|
d_last = 0;
|
|
d_line = 0;
|
|
}
|
|
|
|
ROMol *SDMolSupplier::next() {
|
|
PRECONDITION(dp_inStream, "no stream");
|
|
if (df_end && d_last >= d_len) {
|
|
throw FileParseException("EOF hit.");
|
|
}
|
|
|
|
// set the stream to the current position
|
|
dp_inStream->seekg(d_molpos[d_last]);
|
|
|
|
std::string tempStr;
|
|
ROMol *res = nullptr;
|
|
// finally if we reached the end of the file set end to be true
|
|
if (dp_inStream->eof()) {
|
|
// FIX: we should probably be throwing an exception here
|
|
df_end = true;
|
|
d_len = rdcast<int>(d_molpos.size());
|
|
return res;
|
|
}
|
|
|
|
res = _next();
|
|
|
|
++d_last;
|
|
std::streampos posHold = dp_inStream->tellg();
|
|
this->checkForEnd();
|
|
if (!this->df_end && d_last >= static_cast<int>(d_molpos.size())) {
|
|
d_molpos.push_back(posHold);
|
|
}
|
|
|
|
return res;
|
|
}
|
|
|
|
std::string SDMolSupplier::getItemText(unsigned int idx) {
|
|
PRECONDITION(dp_inStream, "no stream");
|
|
unsigned int holder = d_last;
|
|
moveTo(idx);
|
|
std::streampos begP = d_molpos[idx];
|
|
std::streampos endP;
|
|
try {
|
|
moveTo(idx + 1);
|
|
endP = d_molpos[idx + 1];
|
|
} catch (FileParseException &) {
|
|
dp_inStream->clear();
|
|
dp_inStream->seekg(0, std::ios_base::end);
|
|
endP = dp_inStream->tellg();
|
|
}
|
|
d_last = holder;
|
|
auto *buff = new char[endP - begP];
|
|
dp_inStream->seekg(begP);
|
|
dp_inStream->read(buff, endP - begP);
|
|
std::string res(buff, endP - begP);
|
|
delete[] buff;
|
|
return res;
|
|
}
|
|
|
|
void SDMolSupplier::moveTo(unsigned int idx) {
|
|
PRECONDITION(dp_inStream, "no stream");
|
|
|
|
// dp_inStream->seekg() is called for all idx values
|
|
// and earlier calls to next() may have put the stream into a bad state
|
|
dp_inStream->clear();
|
|
|
|
// move until we hit the desired idx
|
|
if (idx < d_molpos.size()) {
|
|
dp_inStream->seekg(d_molpos[idx]);
|
|
d_last = idx;
|
|
} else {
|
|
std::string tempStr;
|
|
dp_inStream->seekg(d_molpos.back());
|
|
d_last = rdcast<int>(d_molpos.size()) - 1;
|
|
while ((d_last < static_cast<int>(idx)) && (!dp_inStream->eof())) {
|
|
d_line++;
|
|
tempStr = getLine(dp_inStream);
|
|
|
|
if (tempStr[0] == '$' && tempStr.substr(0, 4) == "$$$$") {
|
|
std::streampos posHold = dp_inStream->tellg();
|
|
this->checkForEnd();
|
|
if (!this->df_end) {
|
|
d_molpos.push_back(posHold);
|
|
d_last++;
|
|
}
|
|
}
|
|
}
|
|
// if we reached end of file without reaching "idx" we have an index error
|
|
if (dp_inStream->eof()) {
|
|
d_len = rdcast<int>(d_molpos.size());
|
|
std::ostringstream errout;
|
|
errout << "ERROR: Index error (idx = " << idx << ") : "
|
|
<< " we do no have enough mol blocks";
|
|
throw FileParseException(errout.str());
|
|
}
|
|
}
|
|
}
|
|
|
|
ROMol *SDMolSupplier::operator[](unsigned int idx) {
|
|
PRECONDITION(dp_inStream, "no stream");
|
|
// get the molecule with index idx
|
|
moveTo(idx);
|
|
return next();
|
|
}
|
|
|
|
unsigned int SDMolSupplier::length() {
|
|
PRECONDITION(dp_inStream, "no stream");
|
|
// return the number of mol blocks in the sdfile
|
|
if (d_len > 0 || (df_end && d_len == 0)) {
|
|
return d_len;
|
|
} else {
|
|
std::string tempStr;
|
|
d_len = rdcast<int>(d_molpos.size());
|
|
dp_inStream->seekg(d_molpos.back());
|
|
while (!dp_inStream->eof()) {
|
|
std::getline(*dp_inStream, tempStr);
|
|
if (tempStr.length() >= 4 && tempStr[0] == '$' && tempStr[1] == '$' &&
|
|
tempStr[2] == '$' && tempStr[3] == '$') {
|
|
std::streampos posHold = dp_inStream->tellg();
|
|
// don't worry about the last molecule:
|
|
this->checkForEnd();
|
|
if (!this->df_end) {
|
|
d_molpos.push_back(posHold);
|
|
++d_len;
|
|
}
|
|
}
|
|
}
|
|
// now remember to set the stream to the last postion we want to read
|
|
dp_inStream->clear();
|
|
dp_inStream->seekg(d_molpos[d_last]);
|
|
return d_len;
|
|
}
|
|
}
|
|
|
|
bool SDMolSupplier::atEnd() {
|
|
PRECONDITION(dp_inStream, "no stream");
|
|
return df_end;
|
|
}
|
|
|
|
void SDMolSupplier::setStreamIndices(const std::vector<std::streampos> &locs) {
|
|
d_molpos.clear();
|
|
d_molpos.resize(locs.size());
|
|
std::copy(locs.begin(), locs.end(), d_molpos.begin());
|
|
this->reset();
|
|
d_len = rdcast<int>(d_molpos.size());
|
|
}
|
|
}
|