mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-04 21:54:27 +08:00
* do not use new on loggers * del pointers in testDistGeom * Update Dict hasNonPOD status on bulk update * delete new Dicts in memtest1.cpp * fixes in MolSuppliers and testFMCS * PeriodicTable singleton as unique_ptr * fix EEM_arrays leak * fix leaks in testPBF * fix ParamCollection leak in test UFF * fix leaks in MMFF * clear prop dict before read in in pickler * fix leaks in testFreeSASA * fix leaks in test3D * modernize Dict.h & SmilesParse.cpp * fix leaks in testQuery * fix leaks in testCrystalFF * fix leaks in cxsmilesTest * fix leaks in Catalog & mol cat test * fix leaks in ShapeUtils & tests * fix leaks in testSubgraphs1 * fix leaks testFingerprintGenerators * fix leaks in Catalog/FilterCatalog * fix leaks in graphmolqueryTest * these changes reduce bison parse leaks * fixed leaks in testChirality.cpp * fix leaks + 2 tests in testMolWriter * fix 4m leaks in substructLibraryTest * small improvements to molTautomerTest; still leaks * fix leaks in testRGroupDecomp * fix leaks in test; parser still leaks * fix leaks in itertest * fix 4m leaks in testDepictor * fixes in smatest; still leaking due to parser * fixes in testSLNParse; still leaking due to parser * flex/bison: always add atoms with ownership; smarts error cleanup * fix leaks in testReaction * fix leaks in testSubstructMatch * fix leaks in resMolSupplierTest * fix leaks in testChemTransforms + bug in ChemTransforms * fix leaks in testPickler * fix leaks in testMolTransform * fix leaks in testFragCatalog * fix leak in testSLNParse. Still leaks due to Smiles * fixed most leaks in testMolSupplier * pre bison fix * fix some atom & bond parse problems; others still fail * bison smiles & smarts, atoms & bonds more or less fixed * fix leaks in molopstest.cpp * fix leaks in testFingerprints, MACCS.cpp & AtomPairs.cpp * fix leaks in moldraw2Dtest1 * fix leaks in testDescriptors * fix leaks in testInchi * fix leaks in testUFFForceFieldHelpers * fix leaks in hanoiTest & new_canon.h * fix leaks in testMMFFForceField * fix leaks in graphmolTest1 * fix leaks in testMMFFForceFieldHelpers * fix leaks in testDistGeomHelpers * fix leaks in testMolAlign * initialize occupancy & temp facto with default values * fix leak in TautomerTransform * updated suppressions * fix testStructChecker * fix logging & py tests * fix TautomerTransform class/struct issue * remove misplaced delete in testSLNParse * deinit in testAvalonLib1 * fix Avalon-triggered(?) bug in StructChecker/Pattern.cpp * fix random testMolWriter/Supplier fails - diversify output file names to avoid clashing. - unify Writers close/destruct behavior. - flushing/closing in tests. * use reset in FFs Params.cpp * comments on testMMFFForceField * unrequired 'if's added to mol suppliers * correct cast in FilterCatalog.h * use unique_ptr in MACCS Patterns * remove unrequred if in new_canon * update & move suppressions
522 lines
14 KiB
C++
522 lines
14 KiB
C++
//
|
|
// Copyright (C) 2002-2008 Greg Landrum and Rational Discovery LLC
|
|
//
|
|
// @@ All Rights Reserved @@
|
|
// This file is part of the RDKit.
|
|
// The contents are covered by the terms of the BSD license
|
|
// which is included in the file license.txt, found at the root
|
|
// of the RDKit source tree.
|
|
//
|
|
//
|
|
#include <RDGeneral/export.h>
|
|
#ifndef _RD_STREAMOPS_H
|
|
#define _RD_STREAMOPS_H
|
|
|
|
#include "types.h"
|
|
#include "Invariant.h"
|
|
#include "RDProps.h"
|
|
#include <string>
|
|
#include <sstream>
|
|
#include <iostream>
|
|
#include <boost/cstdint.hpp>
|
|
#include <boost/detail/endian.hpp>
|
|
|
|
namespace RDKit {
|
|
// this code block for handling endian problems is from :
|
|
// http://stackoverflow.com/questions/105252/how-do-i-convert-between-big-endian-and-little-endian-values-in-c
|
|
enum EEndian {
|
|
LITTLE_ENDIAN_ORDER,
|
|
BIG_ENDIAN_ORDER,
|
|
#if defined(BOOST_LITTLE_ENDIAN)
|
|
HOST_ENDIAN_ORDER = LITTLE_ENDIAN_ORDER
|
|
#elif defined(BOOST_BIG_ENDIAN)
|
|
HOST_ENDIAN_ORDER = BIG_ENDIAN_ORDER
|
|
#else
|
|
#error "Failed to determine the system endian value"
|
|
#endif
|
|
};
|
|
|
|
// this function swap the bytes of values given it's size as a template
|
|
// parameter (could sizeof be used?).
|
|
template <class T, unsigned int size>
|
|
inline T SwapBytes(T value) {
|
|
if (size < 2)
|
|
return value;
|
|
|
|
union {
|
|
T value;
|
|
char bytes[size];
|
|
} in, out;
|
|
|
|
in.value = value;
|
|
|
|
for (unsigned int i = 0; i < size; ++i) {
|
|
out.bytes[i] = in.bytes[size - 1 - i];
|
|
}
|
|
|
|
return out.value;
|
|
}
|
|
|
|
// Here is the function you will use. Again there is two compile-time assertion
|
|
// that use the boost librarie. You could probably comment them out, but if you
|
|
// do be cautious not to use this function for anything else than integers
|
|
// types. This function need to be calles like this :
|
|
//
|
|
// int x = someValue;
|
|
// int i = EndianSwapBytes<HOST_ENDIAN_ORDER, BIG_ENDIAN_ORDER>(x);
|
|
//
|
|
template <EEndian from, EEndian to, class T>
|
|
inline T EndianSwapBytes(T value) {
|
|
// A : La donnée à swapper à une taille de 2, 4 ou 8 octets
|
|
BOOST_STATIC_ASSERT(sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4 ||
|
|
sizeof(T) == 8);
|
|
if (sizeof(T) == 1) return value;
|
|
|
|
// A : La donnée à swapper est d'un type arithmetic
|
|
// BOOST_STATIC_ASSERT(boost::is_arithmetic<T>::value);
|
|
|
|
// Si from et to sont du même type on ne swap pas.
|
|
if (from == to) return value;
|
|
|
|
return SwapBytes<T, sizeof(T)>(value);
|
|
}
|
|
template <EEndian from, EEndian to>
|
|
inline char EndianSwapBytes(char value) {
|
|
return value;
|
|
}
|
|
template <EEndian from, EEndian to>
|
|
inline unsigned char EndianSwapBytes(unsigned char value) {
|
|
return value;
|
|
}
|
|
template <EEndian from, EEndian to>
|
|
inline signed char EndianSwapBytes(signed char value) {
|
|
return value;
|
|
}
|
|
// --------------------------------------
|
|
|
|
//! Packs an integer and outputs it to a stream
|
|
inline void appendPackedIntToStream(std::stringstream &ss,
|
|
boost::uint32_t num) {
|
|
int nbytes, bix;
|
|
unsigned int val, res;
|
|
char tc;
|
|
|
|
res = num;
|
|
while (1) {
|
|
if (res < (1 << 7)) {
|
|
val = (res << 1);
|
|
nbytes = 1;
|
|
break;
|
|
}
|
|
res -= (1 << 7);
|
|
if (res < (1 << 14)) {
|
|
val = ((res << 2) | 1);
|
|
nbytes = 2;
|
|
break;
|
|
}
|
|
res -= (1 << 14);
|
|
if (res < (1 << 21)) {
|
|
val = ((res << 3) | 3);
|
|
nbytes = 3;
|
|
break;
|
|
}
|
|
res -= (1 << 21);
|
|
if (res < (1 << 29)) {
|
|
val = ((res << 3) | 7);
|
|
nbytes = 4;
|
|
break;
|
|
} else {
|
|
CHECK_INVARIANT(0, "ERROR: Integer too big to pack\n");
|
|
}
|
|
}
|
|
// val = EndianSwapBytes<HOST_ENDIAN_ORDER,LITTLE_ENDIAN_ORDER>(val);
|
|
|
|
for (bix = 0; bix < nbytes; bix++) {
|
|
tc = (char)(val & 255);
|
|
ss.write(&tc, 1);
|
|
val >>= 8;
|
|
}
|
|
}
|
|
|
|
//! Reads an integer from a stream in packed format and returns the result.
|
|
inline boost::uint32_t readPackedIntFromStream(std::stringstream &ss) {
|
|
boost::uint32_t val, num;
|
|
int shift, offset;
|
|
char tmp;
|
|
ss.read(&tmp, sizeof(tmp));
|
|
val = UCHAR(tmp);
|
|
offset = 0;
|
|
if ((val & 1) == 0) {
|
|
shift = 1;
|
|
} else if ((val & 3) == 1) {
|
|
ss.read((char *)&tmp, sizeof(tmp));
|
|
val |= (UCHAR(tmp) << 8);
|
|
shift = 2;
|
|
offset = (1 << 7);
|
|
} else if ((val & 7) == 3) {
|
|
ss.read((char *)&tmp, sizeof(tmp));
|
|
val |= (UCHAR(tmp) << 8);
|
|
ss.read((char *)&tmp, sizeof(tmp));
|
|
val |= (UCHAR(tmp) << 16);
|
|
shift = 3;
|
|
offset = (1 << 7) + (1 << 14);
|
|
} else {
|
|
ss.read((char *)&tmp, sizeof(tmp));
|
|
val |= (UCHAR(tmp) << 8);
|
|
ss.read((char *)&tmp, sizeof(tmp));
|
|
val |= (UCHAR(tmp) << 16);
|
|
ss.read((char *)&tmp, sizeof(tmp));
|
|
val |= (UCHAR(tmp) << 24);
|
|
shift = 3;
|
|
offset = (1 << 7) + (1 << 14) + (1 << 21);
|
|
}
|
|
num = (val >> shift) + offset;
|
|
// num = EndianSwapBytes<LITTLE_ENDIAN_ORDER,HOST_ENDIAN_ORDER>(num);
|
|
return num;
|
|
}
|
|
|
|
//! Reads an integer from a char * in packed format and returns the result.
|
|
//! The argument is advanced
|
|
inline boost::uint32_t pullPackedIntFromString(const char *&text) {
|
|
boost::uint32_t val, num;
|
|
int shift, offset;
|
|
char tmp;
|
|
tmp = *text;
|
|
text++;
|
|
val = UCHAR(tmp);
|
|
offset = 0;
|
|
if ((val & 1) == 0) {
|
|
shift = 1;
|
|
} else if ((val & 3) == 1) {
|
|
tmp = *text;
|
|
text++;
|
|
val |= (UCHAR(tmp) << 8);
|
|
shift = 2;
|
|
offset = (1 << 7);
|
|
} else if ((val & 7) == 3) {
|
|
tmp = *text;
|
|
text++;
|
|
val |= (UCHAR(tmp) << 8);
|
|
tmp = *text;
|
|
text++;
|
|
val |= (UCHAR(tmp) << 16);
|
|
shift = 3;
|
|
offset = (1 << 7) + (1 << 14);
|
|
} else {
|
|
tmp = *text;
|
|
text++;
|
|
val |= (UCHAR(tmp) << 8);
|
|
tmp = *text;
|
|
text++;
|
|
val |= (UCHAR(tmp) << 16);
|
|
tmp = *text;
|
|
text++;
|
|
val |= (UCHAR(tmp) << 24);
|
|
shift = 3;
|
|
offset = (1 << 7) + (1 << 14) + (1 << 21);
|
|
}
|
|
num = (val >> shift) + offset;
|
|
// num = EndianSwapBytes<LITTLE_ENDIAN_ORDER,HOST_ENDIAN_ORDER>(num);
|
|
return num;
|
|
}
|
|
|
|
//! does a binary write of an object to a stream
|
|
template <typename T>
|
|
void streamWrite(std::ostream &ss, const T &val) {
|
|
T tval = EndianSwapBytes<HOST_ENDIAN_ORDER, LITTLE_ENDIAN_ORDER>(val);
|
|
ss.write((const char *)&tval, sizeof(T));
|
|
}
|
|
|
|
//! special case for string
|
|
inline void streamWrite(std::ostream &ss, const std::string &what) {
|
|
unsigned int l = rdcast<unsigned int>(what.length());
|
|
ss.write((const char *)&l, sizeof(l));
|
|
ss.write(what.c_str(), sizeof(char) * l);
|
|
};
|
|
|
|
template <typename T>
|
|
void streamWriteVec(std::ostream &ss, const T &val) {
|
|
streamWrite(ss, static_cast<boost::uint64_t>(val.size()));
|
|
for (size_t i = 0; i < val.size(); ++i) streamWrite(ss, val[i]);
|
|
}
|
|
|
|
//! does a binary read of an object from a stream
|
|
template <typename T>
|
|
void streamRead(std::istream &ss, T &loc) {
|
|
T tloc;
|
|
ss.read((char *)&tloc, sizeof(T));
|
|
loc = EndianSwapBytes<LITTLE_ENDIAN_ORDER, HOST_ENDIAN_ORDER>(tloc);
|
|
}
|
|
|
|
//! special case for string
|
|
template <class T>
|
|
void streamRead(std::istream &ss, T &obj, int version) {
|
|
RDUNUSED_PARAM(version);
|
|
streamRead(ss, obj);
|
|
}
|
|
|
|
inline void streamRead(std::istream &ss, std::string &what, int version) {
|
|
RDUNUSED_PARAM(version);
|
|
unsigned int l;
|
|
ss.read((char *)&l, sizeof(l));
|
|
char *buff = new char[l + 1];
|
|
ss.read(buff, sizeof(char) * l);
|
|
buff[l] = 0;
|
|
what = buff;
|
|
delete[] buff;
|
|
};
|
|
|
|
template <class T>
|
|
void streamReadVec(std::istream &ss, T &val) {
|
|
boost::uint64_t size;
|
|
streamRead(ss, size);
|
|
val.resize(size);
|
|
|
|
for (size_t i = 0; i < size; ++i) streamRead(ss, val[i]);
|
|
}
|
|
|
|
inline void streamReadStringVec(std::istream &ss, std::vector<std::string> &val,
|
|
int version) {
|
|
boost::uint64_t size;
|
|
streamRead(ss, size);
|
|
val.resize(size);
|
|
|
|
for (size_t i = 0; i < size; ++i) streamRead(ss, val[i], version);
|
|
}
|
|
|
|
//! grabs the next line from an instream and returns it.
|
|
inline std::string getLine(std::istream *inStream) {
|
|
std::string res;
|
|
std::getline(*inStream, res);
|
|
if ((res.length() > 0) && (res[res.length() - 1] == '\r')) {
|
|
res.erase(res.length() - 1);
|
|
}
|
|
return res;
|
|
}
|
|
//! grabs the next line from an instream and returns it.
|
|
inline std::string getLine(std::istream &inStream) {
|
|
return getLine(&inStream);
|
|
}
|
|
|
|
// n.b. We can't use RDTypeTag directly, they are implementation
|
|
// specific
|
|
namespace DTags {
|
|
const unsigned char StringTag = 0;
|
|
const unsigned char IntTag = 1;
|
|
const unsigned char UnsignedIntTag = 2;
|
|
const unsigned char BoolTag = 3;
|
|
const unsigned char FloatTag = 4;
|
|
const unsigned char DoubleTag = 5;
|
|
const unsigned char VecStringTag = 6;
|
|
const unsigned char VecIntTag = 7;
|
|
const unsigned char VecUIntTag = 8;
|
|
const unsigned char VecBoolTag = 9;
|
|
const unsigned char VecFloatTag = 10;
|
|
const unsigned char VecDoubleTag = 11;
|
|
const unsigned char EndTag = 0xFF;
|
|
}
|
|
|
|
inline bool isSerializable(const Dict::Pair &pair) {
|
|
switch (pair.val.getTag()) {
|
|
case RDTypeTag::StringTag:
|
|
case RDTypeTag::IntTag:
|
|
case RDTypeTag::UnsignedIntTag:
|
|
case RDTypeTag::BoolTag:
|
|
case RDTypeTag::FloatTag:
|
|
case RDTypeTag::DoubleTag:
|
|
|
|
case RDTypeTag::VecStringTag:
|
|
case RDTypeTag::VecIntTag:
|
|
case RDTypeTag::VecUnsignedIntTag:
|
|
case RDTypeTag::VecFloatTag:
|
|
case RDTypeTag::VecDoubleTag:
|
|
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
inline bool streamWriteProp(std::ostream &ss, const Dict::Pair &pair) {
|
|
if (!isSerializable(pair)) return false;
|
|
|
|
streamWrite(ss, pair.key);
|
|
switch (pair.val.getTag()) {
|
|
case RDTypeTag::StringTag:
|
|
streamWrite(ss, DTags::StringTag);
|
|
streamWrite(ss, rdvalue_cast<std::string>(pair.val));
|
|
break;
|
|
case RDTypeTag::IntTag:
|
|
streamWrite(ss, DTags::IntTag);
|
|
streamWrite(ss, rdvalue_cast<int>(pair.val));
|
|
break;
|
|
case RDTypeTag::UnsignedIntTag:
|
|
streamWrite(ss, DTags::UnsignedIntTag);
|
|
streamWrite(ss, rdvalue_cast<unsigned int>(pair.val));
|
|
break;
|
|
case RDTypeTag::BoolTag:
|
|
streamWrite(ss, DTags::BoolTag);
|
|
streamWrite(ss, rdvalue_cast<bool>(pair.val));
|
|
break;
|
|
case RDTypeTag::FloatTag:
|
|
streamWrite(ss, DTags::FloatTag);
|
|
streamWrite(ss, rdvalue_cast<float>(pair.val));
|
|
break;
|
|
case RDTypeTag::DoubleTag:
|
|
streamWrite(ss, DTags::DoubleTag);
|
|
streamWrite(ss, rdvalue_cast<double>(pair.val));
|
|
break;
|
|
|
|
case RDTypeTag::VecStringTag:
|
|
streamWrite(ss, DTags::VecStringTag);
|
|
streamWriteVec(ss, rdvalue_cast<std::vector<std::string>>(pair.val));
|
|
break;
|
|
case RDTypeTag::VecDoubleTag:
|
|
streamWrite(ss, DTags::VecDoubleTag);
|
|
streamWriteVec(ss, rdvalue_cast<std::vector<double>>(pair.val));
|
|
break;
|
|
case RDTypeTag::VecFloatTag:
|
|
streamWrite(ss, DTags::VecFloatTag);
|
|
streamWriteVec(ss, rdvalue_cast<std::vector<float>>(pair.val));
|
|
break;
|
|
case RDTypeTag::VecIntTag:
|
|
streamWrite(ss, DTags::VecIntTag);
|
|
streamWriteVec(ss, rdvalue_cast<std::vector<int>>(pair.val));
|
|
break;
|
|
case RDTypeTag::VecUnsignedIntTag:
|
|
streamWrite(ss, DTags::VecUIntTag);
|
|
streamWriteVec(ss, rdvalue_cast<std::vector<unsigned int>>(pair.val));
|
|
break;
|
|
default:
|
|
std::cerr << "Failed to write " << pair.key << std::endl;
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
inline bool streamWriteProps(std::ostream &ss, const RDProps &props,
|
|
bool savePrivate=false, bool saveComputed=false) {
|
|
STR_VECT propsToSave = props.getPropList(savePrivate, saveComputed);
|
|
std::set<std::string> propnames(propsToSave.begin(), propsToSave.end());
|
|
|
|
const Dict &dict = props.getDict();
|
|
unsigned int count = 0;
|
|
for(Dict::DataType::const_iterator it = dict.getData().begin();
|
|
it != dict.getData().end();
|
|
++it) {
|
|
if(isSerializable(*it) && propnames.find(it->key) != propnames.end()) {
|
|
count ++;
|
|
}
|
|
}
|
|
|
|
streamWrite(ss, count); // packed int?
|
|
|
|
unsigned int writtenCount = 0;
|
|
for(Dict::DataType::const_iterator it = dict.getData().begin();
|
|
it != dict.getData().end();
|
|
++it) {
|
|
if (propnames.find(it->key) != propnames.end()) {
|
|
if (isSerializable(*it)) {
|
|
// note - not all properties are serializable, this may be
|
|
// a null op
|
|
if (streamWriteProp(ss, *it)) {
|
|
writtenCount++;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
POSTCONDITION(count==writtenCount, "Estimated property count not equal to written");
|
|
return true;
|
|
}
|
|
|
|
template<class T>
|
|
void readRDValue(std::istream &ss, RDValue &value) {
|
|
T v;
|
|
streamRead(ss, v);
|
|
value = v;
|
|
}
|
|
|
|
template <class T>
|
|
void readRDVecValue(std::istream &ss, RDValue &value) {
|
|
std::vector<T> v;
|
|
streamReadVec(ss, v);
|
|
value = v;
|
|
}
|
|
|
|
inline void readRDValueString(std::istream &ss, RDValue &value) {
|
|
std::string v;
|
|
int version=0;
|
|
streamRead(ss, v, version);
|
|
value = v;
|
|
}
|
|
|
|
inline void readRDStringVecValue(std::istream &ss, RDValue &value) {
|
|
std::vector<std::string> v;
|
|
int version = 0;
|
|
streamReadStringVec(ss, v, version);
|
|
value = v;
|
|
}
|
|
|
|
inline bool streamReadProp(std::istream &ss, Dict::Pair &pair, bool& dictHasNonPOD) {
|
|
int version=0;
|
|
streamRead(ss, pair.key, version);
|
|
|
|
unsigned char type;
|
|
streamRead(ss, type);
|
|
switch(type) {
|
|
case DTags::IntTag: readRDValue<int>(ss, pair.val); break;
|
|
case DTags::UnsignedIntTag: readRDValue<unsigned int>(ss, pair.val); break;
|
|
case DTags::BoolTag: readRDValue<bool>(ss, pair.val); break;
|
|
case DTags::FloatTag: readRDValue<float>(ss, pair.val); break;
|
|
case DTags::DoubleTag: readRDValue<double>(ss, pair.val); break;
|
|
|
|
case DTags::StringTag:
|
|
readRDValueString(ss, pair.val);
|
|
dictHasNonPOD = true;
|
|
break;
|
|
case DTags::VecStringTag:
|
|
readRDStringVecValue(ss, pair.val);
|
|
dictHasNonPOD = true;
|
|
break;
|
|
case DTags::VecIntTag:
|
|
readRDVecValue<int>(ss, pair.val);
|
|
dictHasNonPOD = true;
|
|
break;
|
|
case DTags::VecUIntTag:
|
|
readRDVecValue<unsigned int>(ss, pair.val);
|
|
dictHasNonPOD = true;
|
|
break;
|
|
case DTags::VecFloatTag:
|
|
readRDVecValue<float>(ss, pair.val);
|
|
dictHasNonPOD = true;
|
|
break;
|
|
case DTags::VecDoubleTag:
|
|
readRDVecValue<double>(ss, pair.val);
|
|
dictHasNonPOD = true;
|
|
break;
|
|
|
|
default:
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
inline unsigned int streamReadProps(std::istream &ss, RDProps &props) {
|
|
unsigned int count;
|
|
streamRead(ss, count);
|
|
|
|
Dict &dict = props.getDict();
|
|
dict.reset(); // Clear data before repopulating
|
|
dict.getData().resize(count);
|
|
for(unsigned index = 0; index<count; ++index) {
|
|
CHECK_INVARIANT(streamReadProp(ss, dict.getData()[index], dict.getNonPODStatus()),
|
|
"Corrupted property serialization detected");
|
|
}
|
|
|
|
return count;
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|