Fix a hang when trying to read mols from a directory not a file on linux (#2983)

* Fix a hang when trying to read mols from a directory not a file on linux

* thrown an exception at construction time

* clarify the readme

* update release notes

* Refactor the stream opening and checking code to a common method

Co-authored-by: Brian Kelley <bkelley@relaytx.com>
Co-authored-by: Greg Landrum <greg.landrum@gmail.com>
This commit is contained in:
Brian Kelley
2020-03-09 10:27:58 -04:00
committed by GitHub
parent 34e96e2048
commit 4c1ea25fda
10 changed files with 342 additions and 390 deletions

View File

@@ -390,14 +390,7 @@ MaeMolSupplier::MaeMolSupplier(std::istream *inStream, bool takeOwnership,
MaeMolSupplier::MaeMolSupplier(const std::string &fileName, bool sanitize,
bool removeHs) {
df_owner = true;
auto *ifs = new std::ifstream(fileName.c_str(), std::ios_base::binary);
if (!(*ifs) || ifs->bad()) {
delete ifs;
std::ostringstream errout;
errout << "Bad input file " << fileName;
throw BadFileException(errout.str());
}
dp_inStream = static_cast<std::istream *>(ifs);
dp_inStream = openAndCheckStream(fileName);
dp_sInStream.reset(dp_inStream);
df_sanitize = sanitize;
df_removeHs = removeHs;

View File

@@ -18,7 +18,9 @@
#include <memory>
#include <vector>
#include <iostream>
#include <fstream>
#include <GraphMol/ROMol.h>
#include <RDGeneral/BadFileException.h>
#ifdef RDK_BUILD_COORDGEN_SUPPORT
namespace schrodinger {
@@ -76,6 +78,33 @@ class RDKIT_FILEPARSERS_EXPORT MolSupplier {
std::istream *dp_inStream = nullptr;
// do we own dp_inStream?
bool df_owner = false;
// opens a stream for reading and verifies that it can be read from.
// if not it throws an exception
// the caller owns the resulting stream
std::istream *openAndCheckStream(const std::string &filename) {
// FIX: this binary mode of opening file is here because of a bug in
// VC++ 6.0
// the function "tellg" does not work correctly if we do not open it this
// way
// Jan 2009: Confirmed that this is still the case in visual studio 2008
std::ifstream *strm =
new std::ifstream(filename.c_str(), std::ios_base::binary);
if ((!(*strm)) || strm->bad()) {
std::ostringstream errout;
errout << "Bad input file " << filename;
delete strm;
throw BadFileException(errout.str());
}
strm->peek();
if (strm->bad() || strm->eof()) {
std::ostringstream errout;
errout << "Invalid input file " << filename;
delete strm;
throw BadFileException(errout.str());
}
return static_cast<std::istream *>(strm);
}
};
// \brief a supplier from an SD file that only reads forward:

View File

@@ -29,14 +29,7 @@ PDBMolSupplier::PDBMolSupplier(std::istream *inStream, bool takeOwnership,
PDBMolSupplier::PDBMolSupplier(const std::string &fileName, bool sanitize,
bool removeHs, unsigned int flavor,
bool proximityBonding) {
auto *ifs = new std::ifstream(fileName.c_str(), std::ios_base::binary);
if (!(*ifs) || ifs->bad()) {
delete ifs;
std::ostringstream errout;
errout << "Bad input file " << fileName;
throw BadFileException(errout.str());
}
dp_inStream = (std::istream *)ifs;
dp_inStream = openAndCheckStream(fileName);
df_owner = true;
df_sanitize = sanitize;
df_removeHs = removeHs;

View File

@@ -28,21 +28,7 @@ namespace RDKit {
SDMolSupplier::SDMolSupplier(const std::string &fileName, bool sanitize,
bool removeHs, bool strictParsing) {
init();
// FIX: this binary mode of opening file is here because of a bug in VC++ 6.0
// the function "tellg" does not work correctly if we do not open it this way
// Jan 2009: Confirmed that this is still the case in visual studio 2008
std::istream *tmpStream = nullptr;
tmpStream = static_cast<std::istream *>(
new std::ifstream(fileName.c_str(), std::ios_base::binary));
if ((!(*tmpStream)) || (tmpStream->bad())) {
std::ostringstream errout;
errout << "Bad input file " << fileName;
delete tmpStream;
throw BadFileException(errout.str());
}
// dp_inStream = static_cast<std::istream *>(tmpStream);
dp_inStream = tmpStream;
dp_inStream = openAndCheckStream(fileName);
df_owner = true;
d_molpos.push_back(dp_inStream->tellg());
df_sanitize = sanitize;

View File

@@ -32,19 +32,7 @@ SmilesMolSupplier::SmilesMolSupplier(const std::string &fileName,
int smilesColumn, int nameColumn,
bool titleLine, bool sanitize) {
init();
// FIX: this binary mode of opening file is here because of a bug in VC++ 6.0
// the function "tellg" does not work correctly if we do not open it this way
// Need to check if this has been fixed in VC++ 7.0
auto *tmpStream = new std::ifstream(fileName.c_str(), std::ios_base::binary);
if (!(*tmpStream) || tmpStream->bad()) {
std::ostringstream errout;
errout << "Bad input file " << fileName;
delete tmpStream;
throw BadFileException(errout.str());
}
dp_inStream = static_cast<std::istream *>(tmpStream);
dp_inStream = openAndCheckStream(fileName);
CHECK_INVARIANT(dp_inStream, "bad instream");
CHECK_INVARIANT(!(dp_inStream->eof()), "early EOF");
@@ -268,7 +256,7 @@ std::string SmilesMolSupplier::nextLine() {
if (tempStr == "") {
// got an empty string, check to see if we hit EOF:
if (dp_inStream->eof()) {
if (dp_inStream->eof() || dp_inStream->bad()) {
// yes, set our flag:
df_end = true;
}

View File

@@ -1,6 +1,5 @@
// $Id$
//
// Copyright (C) 2005-2008 Greg Landrum and Rational Discovery LLC
// Copyright (C) 2005-2020 Greg Landrum and Rational Discovery LLC
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
@@ -79,21 +78,9 @@ TDTMolSupplier::TDTMolSupplier(const std::string &fileName,
d_confId2D = confId2D;
d_confId3D = confId3D;
d_nameProp = nameRecord;
// FIX: this binary moe of opening file is here because of a bug in VC++ 6.0
// the function "tellg" does not work correctly if we do not open it this way
// Need to check if this has been fixed in VC++ 7.0
std::istream *tmpStream = nullptr;
tmpStream = static_cast<std::istream *>(
new std::ifstream(fileName.c_str(), std::ios_base::binary));
if (!(*tmpStream) || tmpStream->bad()) {
std::ostringstream errout;
errout << "Bad input file " << fileName;
delete tmpStream;
throw BadFileException(errout.str());
}
dp_inStream = tmpStream;
dp_inStream = openAndCheckStream(fileName);
df_owner = true;
this->advanceToNextRecord();
d_molpos.push_back(dp_inStream->tellg());
df_sanitize = sanitize;
@@ -158,7 +145,7 @@ bool TDTMolSupplier::advanceToNextRecord() {
std::streampos pos;
bool res = false;
while (1) {
if (dp_inStream->eof()) {
if (dp_inStream->eof() || dp_inStream->bad()) {
return false;
}
pos = dp_inStream->tellg();
@@ -176,7 +163,7 @@ bool TDTMolSupplier::advanceToNextRecord() {
void TDTMolSupplier::checkForEnd() {
PRECONDITION(dp_inStream, "no stream");
if (dp_inStream->eof()) {
if (dp_inStream->eof() || dp_inStream->bad()) {
df_end = true;
// the -1 here is because by the time we get here we've already pushed on
// the

View File

@@ -923,7 +923,7 @@ void testSDSupplierEnding() {
void testSuppliersEmptyFile() {
std::string rdbase = getenv("RDBASE");
{
{ // contains no records
std::string infile =
rdbase + "/Code/GraphMol/FileParsers/test_data/empty.sdf";
SDMolSupplier reader(infile);
@@ -936,38 +936,16 @@ void testSuppliersEmptyFile() {
TEST_ASSERT(smiSup.atEnd());
}
// tests for GitHub issue 19:
{
{ // actually an empty file, throws an exception:
std::string infile =
rdbase + "/Code/GraphMol/FileParsers/test_data/empty2.sdf";
SDMolSupplier reader(infile);
TEST_ASSERT(reader.length() == 0);
}
{
std::string infile =
rdbase + "/Code/GraphMol/FileParsers/test_data/empty2.sdf";
SDMolSupplier reader(infile);
TEST_ASSERT(reader.atEnd());
bool failed = false;
try {
reader[0];
} catch (FileParseException &) {
SDMolSupplier reader(infile);
} catch (BadFileException &) {
failed = true;
}
TEST_ASSERT(failed);
TEST_ASSERT(reader.length() == 0);
}
{
std::string infile =
rdbase + "/Code/GraphMol/FileParsers/test_data/empty2.sdf";
SDMolSupplier reader(infile);
bool failed = false;
try {
reader[0];
} catch (FileParseException &) {
failed = true;
}
TEST_ASSERT(failed);
TEST_ASSERT(reader.length() == 0);
}
{
SDMolSupplier reader;

View File

@@ -74,30 +74,9 @@ class LocalMaeMolSupplier : public RDKit::MaeMolSupplier {
}
LocalMaeMolSupplier(const std::string &fname, bool sanitize = true,
bool removeHs = true) {
df_owner = true;
auto *ifs = new std::ifstream(fname.c_str(), std::ios_base::binary);
if (!(*ifs) || ifs->bad()) {
delete ifs;
std::ostringstream errout;
errout << "Bad input file " << fname;
throw RDKit::BadFileException(errout.str());
}
dp_inStream = (std::istream *)ifs;
dp_sInStream.reset(dp_inStream);
df_sanitize = sanitize;
df_removeHs = removeHs;
d_reader.reset(new mae::Reader(dp_sInStream));
CHECK_INVARIANT(streamIsGoodOrExhausted(dp_inStream), "bad instream");
try {
d_next_struct = d_reader->next(mae::CT_BLOCK);
} catch (const mae::read_exception &e) {
throw RDKit::FileParseException(e.what());
}
};
};
bool removeHs = true)
: RDKit::MaeMolSupplier(fname, sanitize, removeHs) {}
}; // namespace
LocalMaeMolSupplier *FwdMolSupplIter(LocalMaeMolSupplier *self) { return self; }
} // namespace

File diff suppressed because one or more lines are too long

View File

@@ -12,6 +12,9 @@
now use the `do_chiral_sss` option. So if `do_chiral_sss` is false (the
default), the molecules `CC(F)Cl` and `C[C@H](F)Cl` will be considered to be equal.
Previously these molecules were always considered to be different.
- Attempting to create a MolSupplier from a filename pointing to an empty file,
a file that does not exist or sometihing that is not a standard file (i.e.
something like a directory) now generates an exception.
# Release_2019.09.1