Adds gzstream stream, exposes to swig (#2314)

* Move RDBoostStreams to RDStreams

* RDBoostStreams->RDStreams

* RDBoostStreams->RDStreams

* Wrap SWIG (with Java test)

* Fix missing declaration

* Use the file that already exists

* Revert to original version

* Revert to CXSMiles version

* Update boost version

* Remove redundant code

* Add zlib

* check for win32

* FileParsers now builds static on windows
This commit is contained in:
Brian Kelley
2019-03-18 00:32:42 -04:00
committed by Greg Landrum
parent 48c9532c6f
commit d2f716a2e4
13 changed files with 202 additions and 50 deletions

View File

@@ -3,6 +3,7 @@ add_subdirectory(RDGeneral)
if(RDK_BUILD_PYTHON_WRAPPERS)
add_subdirectory(RDBoost)
endif(RDK_BUILD_PYTHON_WRAPPERS)
add_subdirectory(RDStreams)
add_subdirectory(DataStructs)
add_subdirectory(Geometry)

View File

@@ -2,6 +2,20 @@
remove_definitions(-DRDKIT_GRAPHMOL_BUILD)
add_definitions(-DRDKIT_FILEPARSERS_BUILD)
if(WIN32)
find_package(Boost 1.56.0 COMPONENTS system iostreams zlib REQUIRED)
set (link_iostreams ${Boost_LIBRARIES})
if (NOT Boost_USE_STATIC_LIBS)
add_definitions("-DBOOST_IOSTREAMS_DYN_LINK")
endif()
else()
find_package(Boost 1.56.0 COMPONENTS system iostreams REQUIRED)
set (link_iostreams ${Boost_LIBRARIES})
if (NOT Boost_USE_STATIC_LIBS)
add_definitions("-DBOOST_IOSTREAMS_DYN_LINK")
endif()
endif()
if(RDK_BUILD_COORDGEN_SUPPORT)
include_directories(${maeparser_INCLUDE_DIRS})
set (maesupplier MaeMolSupplier.cpp)
@@ -26,7 +40,7 @@ rdkit_library(FileParsers
ProximityBonds.cpp
SequenceParsers.cpp SequenceWriters.cpp
SVGParser.cpp
LINK_LIBRARIES Depictor SmilesParse GraphMol ${RDK_COORDGEN_LIBS} ${regex_lib})
LINK_LIBRARIES Depictor SmilesParse GraphMol ${RDK_COORDGEN_LIBS} ${regex_lib} ${link_iostreams})
rdkit_headers(FileParsers.h
FileParserUtils.h
@@ -40,20 +54,11 @@ rdkit_test(fileParsersTest1 test1.cpp
LINK_LIBRARIES FileParsers SmilesParse
SubstructMatch GraphMol RDGeneral RDGeometryLib ${maeparser_var})
if(RDK_BUILD_TEST_GZIP)
find_package(Boost 1.56.0 COMPONENTS system iostreams REQUIRED)
set (link_iostreams ${Boost_LIBRARIES})
add_definitions("-DTEST_GZIP_SD")
if (NOT Boost_USE_STATIC_LIBS)
add_definitions("-DBOOST_IOSTREAMS_DYN_LINK")
endif()
else()
set (link_iostreams "")
endif()
rdkit_test(testMolSupplier testMolSupplier.cpp
LINK_LIBRARIES FileParsers SmilesParse SubstructMatch
GraphMol RDGeneral RDGeometryLib ${link_iostreams})
GraphMol RDGeneral RDGeometryLib RDStreams)
rdkit_test(testMolWriter testMolWriter.cpp LINK_LIBRARIES FileParsers SmilesParse GraphMol RDGeneral RDGeometryLib )

View File

@@ -19,19 +19,18 @@
#include "MolSupplier.h"
#include "MolWriters.h"
#include "FileParsers.h"
#include "FileParserUtils.h"
#include <RDGeneral/FileParseException.h>
#include <RDGeneral/BadFileException.h>
#include <RDGeneral/RDLog.h>
#include <RDStreams/streams.h>
#include <GraphMol/SmilesParse/SmilesWrite.h>
#include <GraphMol/SmilesParse/SmilesParse.h>
#include <GraphMol/Depictor/RDDepictor.h>
#ifdef TEST_GZIP_SD
#include <boost/iostreams/device/file.hpp>
#include <boost/iostreams/filtering_stream.hpp>
#include <boost/iostreams/filter/gzip.hpp>
namespace io = boost::iostreams;
#endif
using namespace RDKit;
@@ -2075,7 +2074,7 @@ int testForwardSDSupplier() {
}
TEST_ASSERT(i == 16);
}
#ifdef TEST_GZIP_SD
// make sure the boost::iostreams are working
{
io::filtering_istream strm;
@@ -2093,13 +2092,7 @@ int testForwardSDSupplier() {
TEST_ASSERT(i == 998);
}
{
io::filtering_istream strm;
// the stream must be opened in binary mode otherwise it won't work on
// Windows
std::ifstream is(fname2.c_str(), std::ios_base::binary);
strm.push(io::gzip_decompressor());
strm.push(is);
gzstream strm(fname2);
unsigned int i = 0;
while (!strm.eof()) {
std::string line;
@@ -2113,12 +2106,7 @@ int testForwardSDSupplier() {
}
// looks good, now do a supplier:
{
io::filtering_istream strm;
// the stream must be opened in binary mode otherwise it won't work on
// Windows
std::ifstream is(fname2.c_str(), std::ios_base::binary);
strm.push(io::gzip_decompressor());
strm.push(is);
gzstream strm(fname2);
ForwardSDMolSupplier sdsup(&strm, false);
unsigned int i = 0;
@@ -2156,12 +2144,7 @@ int testForwardSDSupplier() {
TEST_ASSERT(i == 1663);
}
{
io::filtering_istream strm;
// the stream must be opened in binary mode otherwise it won't work on
// Windows
std::ifstream is(maefname2.c_str(), std::ios_base::binary);
strm.push(io::gzip_decompressor());
strm.push(is);
gzstream strm(maefname2);
unsigned int i = 0;
while (!strm.eof()) {
@@ -2176,12 +2159,8 @@ int testForwardSDSupplier() {
}
// looks good, now do a supplier:
{
auto strm = std::make_shared<io::filtering_istream>();
// the stream must be opened in binary mode otherwise it won't work on
// Windows
std::ifstream is(maefname2.c_str(), std::ios_base::binary);
strm->push(io::gzip_decompressor());
strm->push(is);
gzstream *strm = new gzstream(maefname2);
MaeMolSupplier maesup(strm);
unsigned int i = 0;
std::shared_ptr<ROMol> nmol;
@@ -2195,7 +2174,7 @@ int testForwardSDSupplier() {
}
#endif // RDK_BUILD_COORDGEN_SUPPORT
#endif
return 1;
}

View File

@@ -26,7 +26,7 @@ set(swigRDKitLibList "${swigRDKitLibList}"
"Subgraphs;GraphMol;DataStructs;Trajectory;Descriptors;"
"PartialCharges;MolTransforms;DistGeomHelpers;DistGeometry;"
"ForceFieldHelpers;ForceField;EigenSolvers;Optimizer;MolAlign;"
"Alignment;SimDivPickers;RDGeometryLib;RDGeneral;"
"Alignment;SimDivPickers;RDGeometryLib;RDStreams;RDGeneral;"
)
#if(RDK_BUILD_COORDGEN_SUPPORT)
# set(swigRDKitLibList "${swigRDKitLibList};${RDK_COORDGEN_LIBS}")
@@ -37,15 +37,21 @@ foreach(swigRDKitLib ${swigRDKitLibList})
endforeach()
set(swigRDKitLibs "${swigRDKitLibs}${Boost_SERIALIZATION_LIBRARY};")
if(RDK_BUILD_COORDGEN_SUPPORT)
if(WIN32)
find_package(Boost 1.56.0 COMPONENTS system iostreams zlib REQUIRED)
set(swigRDKitLibs "${swigRDKitLibs}${Boost_IOSTREAMS_LIBRARY};${Boost_ZLIB_LIBRARY};${Boost_SYSTEM_LIBRARY};")
else()
find_package(Boost 1.56.0 COMPONENTS system iostreams REQUIRED)
set(swigRDKitLibs "${swigRDKitLibs}${Boost_IOSTREAMS_LIBRARY};${Boost_SYSTEM_LIBRARY};")
endif(RDK_BUILD_COORDGEN_SUPPORT)
endif()
set(RDKit_Wrapper_Libs ${swigRDKitLibs})
message("SUFFIX: ${swigRDKitLibSuffix}")
message("LIBS: ${RDKit_Wrapper_Libs}")
message("JAVA_LIBS: ${RDKit_Wrapper_Libs}")
if(RDK_BUILD_SWIG_JAVA_WRAPPER)

View File

@@ -42,6 +42,7 @@
#include <GraphMol/Substruct/SubstructMatch.h>
%}
%include "Streams.i"
%newobject RDKit::ForwardSDMolSupplier::next;
%newobject RDKit::ResonanceMolSupplier::next;
@@ -50,7 +51,22 @@
%newobject RDKit::TDTMolSupplier::next;
%newobject RDKit::PDBMolSupplier::next;
%include <GraphMol/FileParsers/MolSupplier.h>
%extend RDKit::ForwardSDMolSupplier {
ForwardSDMolSupplier(RDKit::gzstream *strm, bool sanitize=true, bool removeHs = true,
bool strictParsing = true) {
const bool takeOwnership = false;
RDKit::ForwardSDMolSupplier*foo = new RDKit::ForwardSDMolSupplier(
(std::istream*)strm,
takeOwnership,
sanitize, removeHs, strictParsing);
PRECONDITION(!foo->atEnd(), "LDJKLJF");
return foo;
}
};
%include <GraphMol/Resonance.h>
%extend RDKit::ResonanceMolSupplier {
@@ -60,3 +76,4 @@
return mv;
}
}

View File

@@ -0,0 +1,74 @@
/*
* Copyright (c) 2019, Novartis Institutes for BioMedical Research Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
* * Neither the name of Novartis Institutes for BioMedical Research Inc.
* nor the names of its contributors may be used to endorse or promote
* products derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
%include "std_string.i"
%{
#include <../RDStreams/streams.h>
#include <sstream>
%}
// We just need to pass the pointers around here
%nodefaultctor istream;
%nodefaultdtor istream;
%nodefaultctor filtering_istream;
%nodefaultdtor filtering_istream;
namespace std {
class istream;
}
%extend RDKit::gzstream {
std::istream* _GetStream() { return (std::istream*)$self; }
std::string Dump() {
std::ostringstream stream;
std::copy(std::istreambuf_iterator<char>(*$self),
std::istreambuf_iterator<char>(),
std::ostreambuf_iterator<char>(stream));
return stream.str();
}
}
%typemap(javacode) RDKit::gzstream %{
private SWIGTYPE_p_std__istream streamRef;
public SWIGTYPE_p_std__istream GetStream() {
if (streamRef == null)
streamRef = _GetStream();
return streamRef;
}
%}
%include <../RDStreams/streams.h>

View File

@@ -247,6 +247,8 @@ typedef unsigned long long int uintmax_t;
%include "../FilterCatalog.i"
%include "../Trajectory.i"
%include "../SubstructLibrary.i"
%include "../Streams.i"
// Create a class to throw various sorts of errors for testing. Required for unit tests in ErrorHandlingTests.java
#ifdef INCLUDE_ERROR_GENERATOR

View File

@@ -226,6 +226,7 @@ typedef unsigned long long int uintmax_t;
%include "../MolStandardize.i"
%include "../SubstructLibrary.i"
%include "../RGroupDecomposition.i"
%include "../Streams.i"
// Create a class to throw various sorts of errors for testing. Required for unit tests in ErrorHandlingTests.java
#ifdef INCLUDE_ERROR_GENERATOR

View File

@@ -167,6 +167,28 @@ public class SuppliersTests extends GraphMolTest {
if((i%1000)==0) System.err.printf("Done: %s\n",i);
}
}
@Test
public void test11GZstream() {
// NCI_aids_few.sdf.gz
File base = getRdBase();
File gzpath = new File(base, "Code" + File.separator + "GraphMol" + File.separator +
"FileParsers" + File.separator + "test_data");
File fileN = new File(baseTestPath, "NCI_aids_few.sdf.gz");
assertTrue(fileN.exists());
gzstream stream = new gzstream(fileN.getPath());
ForwardSDMolSupplier suppl = new ForwardSDMolSupplier(stream);
assertFalse(suppl.atEnd());
ArrayList<ROMol> ms = new ArrayList<ROMol>();
ROMol m;
do {
m = suppl.next();
if (m != null)
ms.add(m);
} while (!suppl.atEnd());
assertEquals(16, ms.size());
}
public static void main(String args[]) {
org.junit.runner.JUnitCore.main("org.RDKit.SuppliersTests");

View File

@@ -0,0 +1,17 @@
find_package(Boost 1.56.0 COMPONENTS system iostreams REQUIRED)
set (link_iostreams ${Boost_LIBRARIES})
if (NOT Boost_USE_STATIC_LIBS)
add_definitions("-DBOOST_IOSTREAMS_DYN_LINK")
endif()
if (WIN32)
find_package(Boost 1.58.0 COMPONENTS zlib REQUIRED)
set(zlib_lib Boost::zlib)
endif()
add_definitions(-DRDKIT_RDSTREAMS_BUILD)
rdkit_library(RDStreams streams.cpp
LINK_LIBRARIES ${Boost_LIBRARIES} ${link_iostreams} ${zlib_lib}
DEST RDStreams)

View File

@@ -0,0 +1,11 @@
#include "streams.h"
namespace RDKit
{
gzstream::gzstream(const std::string &fname) :
boost::iostreams::filtering_istream(),
is(fname.c_str(), std::ios_base::binary) {
push(boost::iostreams::gzip_decompressor());
push(is);
}
}

16
Code/RDStreams/streams.h Normal file
View File

@@ -0,0 +1,16 @@
//
#include <RDGeneral/export.h>
#include <boost/iostreams/device/file.hpp>
#include <boost/iostreams/filtering_stream.hpp>
#include <boost/iostreams/filter/gzip.hpp>
namespace RDKit
{
// gzstream from a file
class RDKIT_RDSTREAMS_EXPORT gzstream : public boost::iostreams::filtering_istream
{
std::ifstream is;
public:
gzstream(const std::string &fname);
};
}

View File

@@ -4,8 +4,8 @@ environment:
P: "c:/projects/libs"
RDBASE: "c:/projects/RDKit"
PATH: "%RDBASE%/lib;%PATH%"
BOOST_ROOT: c:/Libraries/boost_1_67_0
BOOST_LIBRARYDIR: c:/Libraries/boost_1_67_0/lib64-msvc-14.0
BOOST_ROOT: c:/Libraries/boost_1_69_0
BOOST_LIBRARYDIR: c:/Libraries/boost_1_69_0/lib64-msvc-14.1
image: Visual Studio 2017
@@ -22,6 +22,7 @@ branches:
# scripts that are called at very beginning, before repo cloning
init:
- ps: iex ((new-object net.webclient).DownloadString('https://raw.githubusercontent.com/appveyor/ci/master/scripts/enable-rdp.ps1'))
- git config --global core.autocrlf input
- cmake -version
@@ -46,7 +47,7 @@ before_build:
- mkdir c:\projects\RDKit\build
- cd c:\projects\RDKit\build
- call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvars64.bat" amd64
- cmake -G "Visual Studio 15 2017 Win64" -DCMAKE_INSTALL_PREFIX=%P% -DRDK_USE_BOOST_SERIALIZATION=ON -DRDK_BUILD_PYTHON_WRAPPERS=OFF -DRDK_BUILD_SWIG_WRAPPERS=OFF -DBOOST_ROOT="%BOOST_ROOT%" -DBOOST_LIBRARYDIR="%BOOST_LIBRARYDIR%" ..
- cmake -G "Visual Studio 15 2017 Win64" -DCMAKE_INSTALL_PREFIX=%P% -DRDK_USE_BOOST_SERIALIZATION=ON -DRDK_BUILD_PYTHON_WRAPPERS=OFF -DRDK_BUILD_SWIG_WRAPPERS=OFF -DBOOST_ROOT="%BOOST_ROOT%" -DBOOST_LIBRARYDIR="%BOOST_LIBRARYDIR%" -DBoost_USE_STATIC_LIBS=ON -DBoost_USE_MULTITHREADED=ON ..
test_script:
- cd c:\projects\RDKit\build
@@ -64,4 +65,4 @@ test_script:
#deploy_script:
# - cd c:\projects\sqlite
# - curl -T sqlite.zip --user %ACCOUNT% https://webdav.yandex.ru/libs/sqlite.zip
# - curl -T sqlite.zip --user %ACCOUNT% https://webdav.yandex.ru/libs/sqlite.zip