mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-03 21:44:30 +08:00
Expose CDX support to FileParsers and ChemDraw to SWIG (#8681)
* Fist pass at CDX support * Enable CDX support for reading (also) in the CDXMLParser API * Add cdxml test files * Update swig wrappers for CDXMLFormat and Parameters * Add constructor to ChemDrawParserParams * Add Java SWIG support for ChemDraw * Add chemdraw define to rdconfig * Add missing chemdraw deps * Remove direct expat link * Fix Java linkages for ChemDraw * Remove bad merge code * Remove bad merge code * Fix csharp builds * Add sniffer for the ChemDraw DataStream * Include filesystem * Fix test on windows * Add more CDX tests * Ensure streams are open in binary mode to support CDX on windows * Fix text to show that a Block is the text input, not a file * Fix CSharp test * Disable CDX tests when not building chemdraw * Turn back on chemdraw * Response to review * Turn off chemdraw support for the limited external test --------- Co-authored-by: Brian Kelley <bkelley@glysade.com>
This commit is contained in:
6
External/ChemDraw/Wrap/rdChemDraw.cpp
vendored
6
External/ChemDraw/Wrap/rdChemDraw.cpp
vendored
@@ -70,7 +70,8 @@ python::object MolsFromChemDrawBlockHelper(const std::string &filename, bool san
|
||||
bool removeHs) {
|
||||
std::vector<std::unique_ptr<RWMol>> mols;
|
||||
try {
|
||||
mols = RDKit::v2::MolsFromChemDrawBlock(filename, {sanitize, removeHs});
|
||||
mols = RDKit::v2::MolsFromChemDrawBlock(filename,
|
||||
{sanitize, removeHs, RDKit::v2::CDXFormat::CDXML});
|
||||
} catch (RDKit::BadFileException &e) {
|
||||
PyErr_SetString(PyExc_IOError, e.what());
|
||||
throw python::error_already_set();
|
||||
@@ -89,7 +90,8 @@ python::object MolsFromChemDrawBlockHelper(const std::string &filename, bool san
|
||||
|
||||
python::tuple MolsFromChemDrawFileHelper(python::object cdxml, bool sanitize,
|
||||
bool removeHs) {
|
||||
auto mols = RDKit::v2::MolsFromChemDrawFile(pyObjectToString(cdxml), {sanitize, removeHs});
|
||||
auto mols = RDKit::v2::MolsFromChemDrawFile(pyObjectToString(cdxml),
|
||||
{sanitize, removeHs, RDKit::v2::CDXFormat::CDXML});
|
||||
python::list res;
|
||||
for (auto &mol : mols) {
|
||||
// take ownership of the data from the unique_ptr
|
||||
|
||||
47
External/ChemDraw/chemdraw.cpp
vendored
47
External/ChemDraw/chemdraw.cpp
vendored
@@ -237,8 +237,41 @@ void visit_children(
|
||||
}
|
||||
}
|
||||
|
||||
CDXFormat sniff_format(std::istream &is) {
|
||||
// Remember the current read position
|
||||
std::streampos start_pos = is.tellg();
|
||||
if (start_pos == -1) {
|
||||
// Some streams (like std::cin) may not support tellg
|
||||
return CDXFormat::AUTO; // here it simply means we failed
|
||||
}
|
||||
|
||||
// CDX header consists of:
|
||||
// 8 bytes with the value "VjCD0100" (hex: 56 6A 43 44 30 31 30 30).
|
||||
CDXFormat format = CDXFormat::CDXML;
|
||||
const std::vector<char> header{86, 106, 67, 68, 48, 49, 48, 48};
|
||||
std::vector<char> buf(8);
|
||||
is.read(buf.data(), 8);
|
||||
if (buf == header) {
|
||||
format = CDXFormat::CDX;
|
||||
}
|
||||
|
||||
// Reset the stream position
|
||||
is.clear(); // clear EOF flag if we hit it
|
||||
is.seekg(start_pos);
|
||||
return format;
|
||||
}
|
||||
|
||||
std::unique_ptr<CDXDocument> streamToCDXDocument(std::istream &inStream,
|
||||
CDXFormat format) {
|
||||
if(format == CDXFormat::AUTO) {
|
||||
format = sniff_format(inStream);
|
||||
if(format == CDXFormat::AUTO) {
|
||||
const std::string msg = " Failed deducing whether the input stream is CDXML or CDX";
|
||||
BOOST_LOG(rdErrorLog) << msg << std::endl;
|
||||
throw FileParseException(msg);
|
||||
}
|
||||
}
|
||||
|
||||
if (format == CDXFormat::CDXML) {
|
||||
CDXMLParser parser;
|
||||
// populate tree structure pt
|
||||
@@ -249,17 +282,21 @@ std::unique_ptr<CDXDocument> streamToCDXDocument(std::istream &inStream,
|
||||
static_cast<int>(data.size()),
|
||||
HaveAllXml)) {
|
||||
auto error = XML_GetErrorCode(parser);
|
||||
BOOST_LOG(rdErrorLog) << "Failed parsing XML with error code " << error;
|
||||
throw FileParseException("Bad Input File");
|
||||
std::stringstream msg;
|
||||
msg << "Failed parsing XML with error code " << error;
|
||||
BOOST_LOG(rdErrorLog) << msg.str() << std::endl;
|
||||
throw FileParseException(msg.str());
|
||||
}
|
||||
|
||||
return parser.ReleaseDocument();
|
||||
} else {
|
||||
throw FileParseException("Can't handle cdx yet");
|
||||
return std::unique_ptr<CDXDocument>();
|
||||
CDXistream input(inStream);
|
||||
const bool doThrow = true;
|
||||
std::unique_ptr<CDXDocument> doc(CDXReadDocFromStorage(input, doThrow));
|
||||
return doc;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// may raise FileParseException
|
||||
std::vector<std::unique_ptr<RWMol>> molsFromCDXMLDataStream(
|
||||
std::istream &inStream, const ChemDrawParserParams ¶ms) {
|
||||
|
||||
13
External/ChemDraw/chemdraw.h
vendored
13
External/ChemDraw/chemdraw.h
vendored
@@ -41,13 +41,18 @@ namespace RDKit {
|
||||
namespace v2 {
|
||||
enum class CDXFormat {
|
||||
CDX = 1,
|
||||
CDXML = 2
|
||||
CDXML = 2,
|
||||
AUTO = 3
|
||||
};
|
||||
|
||||
struct RDKIT_RDCHEMDRAWLIB_EXPORT ChemDrawParserParams {
|
||||
bool sanitize = true;
|
||||
bool removeHs = true;
|
||||
CDXFormat format = CDXFormat::CDXML;
|
||||
bool sanitize;
|
||||
bool removeHs;
|
||||
CDXFormat format;
|
||||
ChemDrawParserParams() : sanitize(true), removeHs(true), format(CDXFormat::AUTO) {}
|
||||
ChemDrawParserParams(bool sanitize, bool removeHs, CDXFormat format) :
|
||||
sanitize(sanitize), removeHs(removeHs), format(format) {}
|
||||
|
||||
};
|
||||
|
||||
std::vector<std::unique_ptr<RWMol>> RDKIT_RDCHEMDRAWLIB_EXPORT
|
||||
|
||||
Reference in New Issue
Block a user