Adds MolToCDXMLBlock to FileParsers (#9291)

* Adds MolToCDXMLBlock to FileParsers

* Simplified code, removed warning

* Fix C# wrapper for MolToCDX

* Add C# test, fix cscode in swig

* Fix typo in tests

* Set default format to CDXML for MolToCDXML

Co-authored-by: Greg Landrum <greg.landrum@gmail.com>

* Add CDXML writer smoke tests

---------

Co-authored-by: Greg Landrum <greg.landrum@gmail.com>
This commit is contained in:
Brian Kelley
2026-05-28 23:36:35 -04:00
committed by GitHub
parent 76a32ef1ee
commit b417465e93
9 changed files with 224 additions and 3 deletions

View File

@@ -834,12 +834,21 @@ std::vector<std::unique_ptr<RWMol>> MolsFromCDXMLFile(
}
std::vector<std::unique_ptr<RWMol>> MolsFromCDXML(
const std::string &cdxml, const CDXMLParserParams &params) {
const std::string &cdxml, const CDXMLParserParams &params) {
std::stringstream iss(cdxml);
return MolsFromCDXMLDataStream(iss, params);
}
RDKIT_FILEPARSERS_EXPORT std::string MolToCDXMLBlock(
const RWMol &,
CDXMLFormat ) {
std::ostringstream errout;
errout << "RDKit build withoutChemDraw writing support. ";
throw FileParseException(errout.str());
return "";
}
} // namespace CDXMLParser
} // namespace v2
} // namespace RDKit
@@ -904,6 +913,19 @@ std::vector<std::unique_ptr<RWMol>> MolsFromCDXML(
std::stringstream iss(cdxml);
return MolsFromCDXMLDataStream(iss, params);
}
std::string MolToCDXMLBlock(
const RWMol &mol,
CDXMLFormat format) {
CDXFormat cdx_format = CDXFormat::CDXML;
if (format == CDXMLFormat::CDX) {
cdx_format = CDXFormat::CDX;
}
return MolToChemDrawBlock(mol, cdx_format);
}
}
}
}

View File

@@ -86,6 +86,26 @@ RDKIT_FILEPARSERS_EXPORT std::vector<std::unique_ptr<RWMol>> MolsFromCDXML(
const std::string &cdxml,
const CDXMLParserParams &params =
CDXMLParserParams(true, true, v2::CDXMLParser::CDXMLFormat::Auto));
//! \brief write a CDX or CDXML block from a molecule
//! The RDKit is optionally built with the Revvity ChemDraw parser
//! If this is available, CDX and CDXML can be written
//! Note that the CDXML format is large and complex, the RDKit doesn't
//! support full functionality, just the base ones required for molecule and
//! reaction parsing.
//! Note: If the ChemDraw extensions are unavailable, an exception will be thrown
//! please use the support function hasChemDrawCDXSupport() to check
//! whether ChemDraw writing support is enabled.
//! Note: For CDXML the contents of the std::string are UTF-8
//! For CDX they are the binary bytes.
/*!
* \param mol - Molecule to write
* \param format - CDXMLFormat to use, CDX or CDXML (default)
*/
RDKIT_FILEPARSERS_EXPORT std::string MolToCDXMLBlock(
const RWMol &mol,
CDXMLFormat format = CDXMLFormat::CDXML);
} // namespace CDXMLParser
} // namespace v2

View File

@@ -1434,6 +1434,27 @@ TEST_CASE("CDX and Formats") {
}
}
}
SECTION("Check MolToCDXMLBlock") {
if(hasChemDrawCDXSupport()) {
auto mol = "c1ccccc1"_smiles;
REQUIRE(mol);
auto block = MolToCDXMLBlock(*mol);
CHECK(block.find("<CDXML") != std::string::npos);
auto cdx = MolToCDXMLBlock(*mol, CDXMLFormat::CDX);
CHECK(cdx.find("<CDXML") == std::string::npos);
auto mols = MolsFromCDXML(block, {true, true, CDXMLFormat::CDXML});
CHECK(mols.size() == 1);
CHECK(MolToSmiles(*mols[0]) == "c1ccccc1");
mols = MolsFromCDXML(cdx, {true, true, CDXMLFormat::CDX});
CHECK(mols.size() == 1);
CHECK(MolToSmiles(*mols[0]) == "c1ccccc1");
}
}
}
#endif

View File

@@ -797,6 +797,21 @@ python::tuple MolsFromCDXML(python::object cdxml, bool sanitize,
}
return python::tuple(res);
}
python::object MolToCDXMLBlockHelper(const RDKit::ROMol &mol,
RDKit::v2::CDXMLParser::CDXMLFormat format) {
auto block = RDKit::v2::CDXMLParser::MolToCDXMLBlock(mol, format);
// if CDXML return string
// if CDX return byteszo
if(format == RDKit::v2::CDXMLParser::CDXMLFormat::CDX) {
PyObject* py_bytes = PyBytes_FromStringAndSize(block.data(), block.size());
return python::object(python::handle<>(py_bytes));
} else {
return python::object(python::handle<>(PyUnicode_FromString(block.c_str())));
}
}
namespace {
python::object translateMetadataToList(
const std::vector<std::pair<std::string, std::string>> &metadata) {
@@ -2746,6 +2761,34 @@ BOOST_PYTHON_MODULE(rdmolfiles) {
python::def("MolsFromCDXML", MolsFromCDXMLHelper,
(python::arg("cdxml"), python::arg("params")), docString.c_str());
docString =
R"DOC(brief write a CDX or CDXML block from a molecule
The RDKit is optionally built with the Revvity ChemDraw parser
If this is available, CDX and CDXML can be written
Note that the CDXML format is large and complex, the RDKit doesn't
support full functionality, just the base ones required for molecule and
reaction parsing.
Note: If the ChemDraw extensions are unavailable, an exception will be thrown
please use the support function HasChemDrawCDXSupport() to check
whether ChemDraw writing support is enabled.
Note: For CDXML this returns a UTF-8 string <str>
For CDX this returns a byte sting <bytes>
ARGUMENTS:
- mol: the molecule to write
- format: CDXMLFormat [default CDXML]
RETURNS:
a tuple of parsed Mol objects.)DOC";
python::def("MolToCDXMLBlock", MolToCDXMLBlockHelper,
(python::arg("mol"), python::arg("format")=RDKit::v2::CDXMLParser::CDXMLFormat::CDXML), docString.c_str());
docString = "Returns true if the RDKit is built with ChemDraw CDX support";
python::def("HasChemDrawCDXSupport",
RDKit::v2::CDXMLParser::hasChemDrawCDXSupport, docString.c_str());

View File

@@ -332,6 +332,21 @@ class TestCase(unittest.TestCase):
mols3 = Chem.MolsFromCDXML(data, params)
smi3 = [Chem.MolToSmiles(m) for m in mols3]
self.assertEqual(smi1, smi3)
if Chem.HasChemDrawCDXSupport():
# ensure we can round trip through CDXML, CDX
for smi, mol in zip(smi3, mols3):
cdxml = Chem.MolToCDXMLBlock(mol)
cdxml2 = Chem.MolToCDXMLBlock(mol, Chem.CDXMLFormat.CDXML)
# check default is cdxml
self.assertEqual(cdxml, cdxml2)
cdx = Chem.MolToCDXMLBlock(mol, Chem.CDXMLFormat.CDX)
self.assertEqual(type(cdx), bytes)
self.assertEqual(Chem.MolToSmiles(Chem.MolsFromCDXML(cdxml)[0]),
smi)
self.assertEqual(Chem.MolToSmiles(Chem.MolsFromCDXML(cdx, params)[0]),
smi)
def test_formats(self):

View File

@@ -126,6 +126,17 @@
$result = JCALL1(NewByteArray, jenv, $1.size());
JCALL4(SetByteArrayRegion, jenv, $result, 0, $1.size(), (const jbyte*)$1.c_str());
}
%typemap(jni) std::string RDKit::ROMol::MolToCDX "jbyteArray"
%typemap(jtype) std::string RDKit::ROMol::MolToCDX "byte[]"
%typemap(jstype) std::string RDKit::ROMol::MolToCDX "byte[]"
%typemap(javaout) std::string RDKit::ROMol::MolToCDX {
return $jnicall;
}
%typemap(out) std::string RDKit::ROMol::MolToCDX {
$result = JCALL1(NewByteArray, jenv, $1.size());
JCALL4(SetByteArrayRegion, jenv, $result, 0, $1.size(), (const jbyte*)$1.c_str());
}
#endif
/*
@@ -162,6 +173,7 @@
}
}
%}
%typemap(cscode) RDKit::ROMol %{
public static ROMol FromByteArray(byte[] pkl) {
UChar_Vect vec = null;
@@ -191,8 +203,22 @@
vec.Dispose();
}
}
}
}
public byte[] MolToCDX() {
UChar_Vect vec = null;
try {
vec = MolToCDXHelper();
byte[] res = new byte[vec.Count];
vec.CopyTo(res);
return res;
} finally {
if (vec != null) {
vec.Dispose();
}
}
}
%}
%include <GraphMol/ROMol.h>
%ignore SubstructMatch;
@@ -270,6 +296,7 @@ void setAllowNontetrahedralChirality(bool);
#ifdef SWIGCSHARP
%csmethodmodifiers RDKit::ROMol::fromUCharVect "private";
%csmethodmodifiers RDKit::ROMol::toUCharVect "private";
%csmethodmodifiers RDKit::ROMol::MolToCDXHelper "private";
#endif
%{
@@ -349,6 +376,16 @@ unsigned int getDefaultPickleProperties();
RDKit::MolToXYZFile(*($self), fName, confId);
}
static bool hasChemDrawCDXSupport() {
return RDKit::v2::CDXMLParser::hasChemDrawCDXSupport();
}
std::string MolToCDXML() {
return RDKit::v2::CDXMLParser::MolToCDXMLBlock(
*($self),
RDKit::v2::CDXMLParser::CDXMLFormat::CDXML);
}
bool hasSubstructMatch(RDKit::ROMol &query,bool useChirality=false){
RDKit::MatchVectType mv;
return SubstructMatch(*($self),query,mv,true,useChirality);
@@ -664,6 +701,11 @@ unsigned int getDefaultPickleProperties();
}
return sres;
}
const std::string MolToCDX() {
return RDKit::v2::CDXMLParser::MolToCDXMLBlock(
*($self),
RDKit::v2::CDXMLParser::CDXMLFormat::CDX);
}
#endif
#ifdef SWIGCSHARP
const std::vector<unsigned char> toUCharVect(int propertyFlags=-1) {
@@ -676,6 +718,13 @@ unsigned int getDefaultPickleProperties();
const std::vector<unsigned char> vec(sres.begin(), sres.end());
return vec;
}
const std::vector<unsigned char> MolToCDXHelper() {
std::string cdx = RDKit::v2::CDXMLParser::MolToCDXMLBlock(
*($self),
RDKit::v2::CDXMLParser::CDXMLFormat::CDX);
const std::vector<unsigned char> vec(cdx.begin(), cdx.end());
return vec;
}
#endif
static RDKit::ROMOL_SPTR fromUCharVect(const std::vector<unsigned char> &pkl) {
std::string sres(pkl.begin(), pkl.end());

View File

@@ -75,6 +75,30 @@
%include <GraphMol/FileParsers/CDXMLParser.h>
%include <GraphMol/SmilesParse/SmilesParse.h>
%typemap(javacode) RDKit::RWMol %{
// expose mols from MolsFromCDXMLByteArray
public static RWMol_Vect MolsFromCDXMLByteArray(
byte[] pkl, boolean sanitize, boolean removeHs) {
UChar_Vect vec = null;
try {
vec = new UChar_Vect();
vec.reserve(pkl.length);
for (int i = 0; i < pkl.length; ++i) {
vec.add((byte)pkl[i]);
}
return RWMol.MolsFromCDXML(vec, sanitize, removeHs);
} finally {
if (vec != null) {
vec.delete();
}
}
}
public static RWMol_Vect MolsFromCDXMLByteArray(byte [] pkl) {
return MolsFromCDXMLByteArray(pkl, true, true);
}
%}
%typemap(cscode) RDKit::RWMol %{
public static RWMol_Vect MolsFromCDXMLByteArray(

View File

@@ -70,5 +70,17 @@ namespace RdkitTests
Assert.True(mols.Count >= 1);
}
[Fact]
public void MolToCDX() {
string smi = "c1ccccc1[C@](F)(Cl)Br";
ROMol mol = RWMol.MolFromSmiles(smi);
byte[] pkl = mol.MolToCDX();
var mols = RWMol.MolsFromCDXMLByteArray(pkl);
Assert.True(mol.MolToSmiles() == mols[0].MolToSmiles());
string cdxml = mol.MolToCDXML();
mols = RWMol.MolsFromCDXMLByteArray(pkl);
Assert.True(mol.MolToSmiles() == mols[0].MolToSmiles());
}
}
}

View File

@@ -357,6 +357,21 @@ public class WrapperTests extends GraphMolTest {
assertEquals(prods.get(idx).MolToSmiles(true), "CC1(C)[C@H](C=C(Cl)Cl)[C@H]1C(=O)O[C@@H](C#N)c1cccc(Oc2ccccc2)c1");
}
}
if(ROMol.hasChemDrawCDXSupport()) {
String cdxml = prods.get(0).MolToCDXML();
byte[] cdx = prods.get(0).MolToCDX();
assertTrue(cdxml.indexOf("CDXML") != -1);
prods = RWMol.MolsFromCDXML(cdxml, params);
assertEquals(prods.get(0).MolToSmiles(true),
"CC1(C)[C@H](C=C(Cl)Cl)[C@H]1C(=O)O[C@@H](C#N)c1cccc(Oc2ccccc2)c1");
params.setFormat(CDXMLFormat.CDX);
prods = RWMol.MolsFromCDXMLByteArray(cdx);
assertEquals(prods.get(0).MolToSmiles(true),
"CC1(C)[C@H](C=C(Cl)Cl)[C@H]1C(=O)O[C@@H](C#N)c1cccc(Oc2ccccc2)c1");
}
}
public static void main(String args[]) {