mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-04 21:54:27 +08:00
Fix a hang when trying to read mols from a directory not a file on linux (#2983)
* Fix a hang when trying to read mols from a directory not a file on linux * thrown an exception at construction time * clarify the readme * update release notes * Refactor the stream opening and checking code to a common method Co-authored-by: Brian Kelley <bkelley@relaytx.com> Co-authored-by: Greg Landrum <greg.landrum@gmail.com>
This commit is contained in:
@@ -390,14 +390,7 @@ MaeMolSupplier::MaeMolSupplier(std::istream *inStream, bool takeOwnership,
|
||||
MaeMolSupplier::MaeMolSupplier(const std::string &fileName, bool sanitize,
|
||||
bool removeHs) {
|
||||
df_owner = true;
|
||||
auto *ifs = new std::ifstream(fileName.c_str(), std::ios_base::binary);
|
||||
if (!(*ifs) || ifs->bad()) {
|
||||
delete ifs;
|
||||
std::ostringstream errout;
|
||||
errout << "Bad input file " << fileName;
|
||||
throw BadFileException(errout.str());
|
||||
}
|
||||
dp_inStream = static_cast<std::istream *>(ifs);
|
||||
dp_inStream = openAndCheckStream(fileName);
|
||||
dp_sInStream.reset(dp_inStream);
|
||||
df_sanitize = sanitize;
|
||||
df_removeHs = removeHs;
|
||||
|
||||
@@ -18,7 +18,9 @@
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <GraphMol/ROMol.h>
|
||||
#include <RDGeneral/BadFileException.h>
|
||||
|
||||
#ifdef RDK_BUILD_COORDGEN_SUPPORT
|
||||
namespace schrodinger {
|
||||
@@ -76,6 +78,33 @@ class RDKIT_FILEPARSERS_EXPORT MolSupplier {
|
||||
std::istream *dp_inStream = nullptr;
|
||||
// do we own dp_inStream?
|
||||
bool df_owner = false;
|
||||
// opens a stream for reading and verifies that it can be read from.
|
||||
// if not it throws an exception
|
||||
// the caller owns the resulting stream
|
||||
std::istream *openAndCheckStream(const std::string &filename) {
|
||||
// FIX: this binary mode of opening file is here because of a bug in
|
||||
// VC++ 6.0
|
||||
// the function "tellg" does not work correctly if we do not open it this
|
||||
// way
|
||||
// Jan 2009: Confirmed that this is still the case in visual studio 2008
|
||||
std::ifstream *strm =
|
||||
new std::ifstream(filename.c_str(), std::ios_base::binary);
|
||||
if ((!(*strm)) || strm->bad()) {
|
||||
std::ostringstream errout;
|
||||
errout << "Bad input file " << filename;
|
||||
delete strm;
|
||||
throw BadFileException(errout.str());
|
||||
}
|
||||
|
||||
strm->peek();
|
||||
if (strm->bad() || strm->eof()) {
|
||||
std::ostringstream errout;
|
||||
errout << "Invalid input file " << filename;
|
||||
delete strm;
|
||||
throw BadFileException(errout.str());
|
||||
}
|
||||
return static_cast<std::istream *>(strm);
|
||||
}
|
||||
};
|
||||
|
||||
// \brief a supplier from an SD file that only reads forward:
|
||||
|
||||
@@ -29,14 +29,7 @@ PDBMolSupplier::PDBMolSupplier(std::istream *inStream, bool takeOwnership,
|
||||
PDBMolSupplier::PDBMolSupplier(const std::string &fileName, bool sanitize,
|
||||
bool removeHs, unsigned int flavor,
|
||||
bool proximityBonding) {
|
||||
auto *ifs = new std::ifstream(fileName.c_str(), std::ios_base::binary);
|
||||
if (!(*ifs) || ifs->bad()) {
|
||||
delete ifs;
|
||||
std::ostringstream errout;
|
||||
errout << "Bad input file " << fileName;
|
||||
throw BadFileException(errout.str());
|
||||
}
|
||||
dp_inStream = (std::istream *)ifs;
|
||||
dp_inStream = openAndCheckStream(fileName);
|
||||
df_owner = true;
|
||||
df_sanitize = sanitize;
|
||||
df_removeHs = removeHs;
|
||||
|
||||
@@ -28,21 +28,7 @@ namespace RDKit {
|
||||
SDMolSupplier::SDMolSupplier(const std::string &fileName, bool sanitize,
|
||||
bool removeHs, bool strictParsing) {
|
||||
init();
|
||||
// FIX: this binary mode of opening file is here because of a bug in VC++ 6.0
|
||||
// the function "tellg" does not work correctly if we do not open it this way
|
||||
// Jan 2009: Confirmed that this is still the case in visual studio 2008
|
||||
std::istream *tmpStream = nullptr;
|
||||
tmpStream = static_cast<std::istream *>(
|
||||
new std::ifstream(fileName.c_str(), std::ios_base::binary));
|
||||
if ((!(*tmpStream)) || (tmpStream->bad())) {
|
||||
std::ostringstream errout;
|
||||
errout << "Bad input file " << fileName;
|
||||
delete tmpStream;
|
||||
throw BadFileException(errout.str());
|
||||
}
|
||||
|
||||
// dp_inStream = static_cast<std::istream *>(tmpStream);
|
||||
dp_inStream = tmpStream;
|
||||
dp_inStream = openAndCheckStream(fileName);
|
||||
df_owner = true;
|
||||
d_molpos.push_back(dp_inStream->tellg());
|
||||
df_sanitize = sanitize;
|
||||
|
||||
@@ -32,19 +32,7 @@ SmilesMolSupplier::SmilesMolSupplier(const std::string &fileName,
|
||||
int smilesColumn, int nameColumn,
|
||||
bool titleLine, bool sanitize) {
|
||||
init();
|
||||
|
||||
// FIX: this binary mode of opening file is here because of a bug in VC++ 6.0
|
||||
// the function "tellg" does not work correctly if we do not open it this way
|
||||
// Need to check if this has been fixed in VC++ 7.0
|
||||
auto *tmpStream = new std::ifstream(fileName.c_str(), std::ios_base::binary);
|
||||
|
||||
if (!(*tmpStream) || tmpStream->bad()) {
|
||||
std::ostringstream errout;
|
||||
errout << "Bad input file " << fileName;
|
||||
delete tmpStream;
|
||||
throw BadFileException(errout.str());
|
||||
}
|
||||
dp_inStream = static_cast<std::istream *>(tmpStream);
|
||||
dp_inStream = openAndCheckStream(fileName);
|
||||
CHECK_INVARIANT(dp_inStream, "bad instream");
|
||||
CHECK_INVARIANT(!(dp_inStream->eof()), "early EOF");
|
||||
|
||||
@@ -268,7 +256,7 @@ std::string SmilesMolSupplier::nextLine() {
|
||||
|
||||
if (tempStr == "") {
|
||||
// got an empty string, check to see if we hit EOF:
|
||||
if (dp_inStream->eof()) {
|
||||
if (dp_inStream->eof() || dp_inStream->bad()) {
|
||||
// yes, set our flag:
|
||||
df_end = true;
|
||||
}
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
// $Id$
|
||||
//
|
||||
// Copyright (C) 2005-2008 Greg Landrum and Rational Discovery LLC
|
||||
// Copyright (C) 2005-2020 Greg Landrum and Rational Discovery LLC
|
||||
//
|
||||
// @@ All Rights Reserved @@
|
||||
// This file is part of the RDKit.
|
||||
@@ -79,21 +78,9 @@ TDTMolSupplier::TDTMolSupplier(const std::string &fileName,
|
||||
d_confId2D = confId2D;
|
||||
d_confId3D = confId3D;
|
||||
d_nameProp = nameRecord;
|
||||
// FIX: this binary moe of opening file is here because of a bug in VC++ 6.0
|
||||
// the function "tellg" does not work correctly if we do not open it this way
|
||||
// Need to check if this has been fixed in VC++ 7.0
|
||||
std::istream *tmpStream = nullptr;
|
||||
tmpStream = static_cast<std::istream *>(
|
||||
new std::ifstream(fileName.c_str(), std::ios_base::binary));
|
||||
if (!(*tmpStream) || tmpStream->bad()) {
|
||||
std::ostringstream errout;
|
||||
errout << "Bad input file " << fileName;
|
||||
delete tmpStream;
|
||||
throw BadFileException(errout.str());
|
||||
}
|
||||
|
||||
dp_inStream = tmpStream;
|
||||
dp_inStream = openAndCheckStream(fileName);
|
||||
df_owner = true;
|
||||
|
||||
this->advanceToNextRecord();
|
||||
d_molpos.push_back(dp_inStream->tellg());
|
||||
df_sanitize = sanitize;
|
||||
@@ -158,7 +145,7 @@ bool TDTMolSupplier::advanceToNextRecord() {
|
||||
std::streampos pos;
|
||||
bool res = false;
|
||||
while (1) {
|
||||
if (dp_inStream->eof()) {
|
||||
if (dp_inStream->eof() || dp_inStream->bad()) {
|
||||
return false;
|
||||
}
|
||||
pos = dp_inStream->tellg();
|
||||
@@ -176,7 +163,7 @@ bool TDTMolSupplier::advanceToNextRecord() {
|
||||
|
||||
void TDTMolSupplier::checkForEnd() {
|
||||
PRECONDITION(dp_inStream, "no stream");
|
||||
if (dp_inStream->eof()) {
|
||||
if (dp_inStream->eof() || dp_inStream->bad()) {
|
||||
df_end = true;
|
||||
// the -1 here is because by the time we get here we've already pushed on
|
||||
// the
|
||||
|
||||
@@ -923,7 +923,7 @@ void testSDSupplierEnding() {
|
||||
|
||||
void testSuppliersEmptyFile() {
|
||||
std::string rdbase = getenv("RDBASE");
|
||||
{
|
||||
{ // contains no records
|
||||
std::string infile =
|
||||
rdbase + "/Code/GraphMol/FileParsers/test_data/empty.sdf";
|
||||
SDMolSupplier reader(infile);
|
||||
@@ -936,38 +936,16 @@ void testSuppliersEmptyFile() {
|
||||
TEST_ASSERT(smiSup.atEnd());
|
||||
}
|
||||
// tests for GitHub issue 19:
|
||||
{
|
||||
{ // actually an empty file, throws an exception:
|
||||
std::string infile =
|
||||
rdbase + "/Code/GraphMol/FileParsers/test_data/empty2.sdf";
|
||||
SDMolSupplier reader(infile);
|
||||
TEST_ASSERT(reader.length() == 0);
|
||||
}
|
||||
{
|
||||
std::string infile =
|
||||
rdbase + "/Code/GraphMol/FileParsers/test_data/empty2.sdf";
|
||||
SDMolSupplier reader(infile);
|
||||
TEST_ASSERT(reader.atEnd());
|
||||
bool failed = false;
|
||||
try {
|
||||
reader[0];
|
||||
} catch (FileParseException &) {
|
||||
SDMolSupplier reader(infile);
|
||||
} catch (BadFileException &) {
|
||||
failed = true;
|
||||
}
|
||||
TEST_ASSERT(failed);
|
||||
TEST_ASSERT(reader.length() == 0);
|
||||
}
|
||||
{
|
||||
std::string infile =
|
||||
rdbase + "/Code/GraphMol/FileParsers/test_data/empty2.sdf";
|
||||
SDMolSupplier reader(infile);
|
||||
bool failed = false;
|
||||
try {
|
||||
reader[0];
|
||||
} catch (FileParseException &) {
|
||||
failed = true;
|
||||
}
|
||||
TEST_ASSERT(failed);
|
||||
TEST_ASSERT(reader.length() == 0);
|
||||
}
|
||||
{
|
||||
SDMolSupplier reader;
|
||||
|
||||
@@ -74,30 +74,9 @@ class LocalMaeMolSupplier : public RDKit::MaeMolSupplier {
|
||||
}
|
||||
|
||||
LocalMaeMolSupplier(const std::string &fname, bool sanitize = true,
|
||||
bool removeHs = true) {
|
||||
df_owner = true;
|
||||
auto *ifs = new std::ifstream(fname.c_str(), std::ios_base::binary);
|
||||
if (!(*ifs) || ifs->bad()) {
|
||||
delete ifs;
|
||||
std::ostringstream errout;
|
||||
errout << "Bad input file " << fname;
|
||||
throw RDKit::BadFileException(errout.str());
|
||||
}
|
||||
dp_inStream = (std::istream *)ifs;
|
||||
dp_sInStream.reset(dp_inStream);
|
||||
df_sanitize = sanitize;
|
||||
df_removeHs = removeHs;
|
||||
|
||||
d_reader.reset(new mae::Reader(dp_sInStream));
|
||||
CHECK_INVARIANT(streamIsGoodOrExhausted(dp_inStream), "bad instream");
|
||||
|
||||
try {
|
||||
d_next_struct = d_reader->next(mae::CT_BLOCK);
|
||||
} catch (const mae::read_exception &e) {
|
||||
throw RDKit::FileParseException(e.what());
|
||||
}
|
||||
};
|
||||
};
|
||||
bool removeHs = true)
|
||||
: RDKit::MaeMolSupplier(fname, sanitize, removeHs) {}
|
||||
}; // namespace
|
||||
|
||||
LocalMaeMolSupplier *FwdMolSupplIter(LocalMaeMolSupplier *self) { return self; }
|
||||
} // namespace
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -12,6 +12,9 @@
|
||||
now use the `do_chiral_sss` option. So if `do_chiral_sss` is false (the
|
||||
default), the molecules `CC(F)Cl` and `C[C@H](F)Cl` will be considered to be equal.
|
||||
Previously these molecules were always considered to be different.
|
||||
- Attempting to create a MolSupplier from a filename pointing to an empty file,
|
||||
a file that does not exist or sometihing that is not a standard file (i.e.
|
||||
something like a directory) now generates an exception.
|
||||
|
||||
|
||||
# Release_2019.09.1
|
||||
|
||||
Reference in New Issue
Block a user