mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-03 21:44:30 +08:00
Adds MolToCDXMLBlock to FileParsers (#9291)
* Adds MolToCDXMLBlock to FileParsers * Simplified code, removed warning * Fix C# wrapper for MolToCDX * Add C# test, fix cscode in swig * Fix typo in tests * Set default format to CDXML for MolToCDXML Co-authored-by: Greg Landrum <greg.landrum@gmail.com> * Add CDXML writer smoke tests --------- Co-authored-by: Greg Landrum <greg.landrum@gmail.com>
This commit is contained in:
@@ -834,12 +834,21 @@ std::vector<std::unique_ptr<RWMol>> MolsFromCDXMLFile(
|
||||
}
|
||||
|
||||
std::vector<std::unique_ptr<RWMol>> MolsFromCDXML(
|
||||
|
||||
const std::string &cdxml, const CDXMLParserParams ¶ms) {
|
||||
const std::string &cdxml, const CDXMLParserParams ¶ms) {
|
||||
|
||||
std::stringstream iss(cdxml);
|
||||
return MolsFromCDXMLDataStream(iss, params);
|
||||
}
|
||||
|
||||
RDKIT_FILEPARSERS_EXPORT std::string MolToCDXMLBlock(
|
||||
const RWMol &,
|
||||
CDXMLFormat ) {
|
||||
std::ostringstream errout;
|
||||
errout << "RDKit build withoutChemDraw writing support. ";
|
||||
throw FileParseException(errout.str());
|
||||
return "";
|
||||
}
|
||||
|
||||
} // namespace CDXMLParser
|
||||
} // namespace v2
|
||||
} // namespace RDKit
|
||||
@@ -904,6 +913,19 @@ std::vector<std::unique_ptr<RWMol>> MolsFromCDXML(
|
||||
std::stringstream iss(cdxml);
|
||||
return MolsFromCDXMLDataStream(iss, params);
|
||||
}
|
||||
|
||||
std::string MolToCDXMLBlock(
|
||||
const RWMol &mol,
|
||||
CDXMLFormat format) {
|
||||
|
||||
CDXFormat cdx_format = CDXFormat::CDXML;
|
||||
|
||||
if (format == CDXMLFormat::CDX) {
|
||||
cdx_format = CDXFormat::CDX;
|
||||
}
|
||||
|
||||
return MolToChemDrawBlock(mol, cdx_format);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -86,6 +86,26 @@ RDKIT_FILEPARSERS_EXPORT std::vector<std::unique_ptr<RWMol>> MolsFromCDXML(
|
||||
const std::string &cdxml,
|
||||
const CDXMLParserParams ¶ms =
|
||||
CDXMLParserParams(true, true, v2::CDXMLParser::CDXMLFormat::Auto));
|
||||
|
||||
//! \brief write a CDX or CDXML block from a molecule
|
||||
//! The RDKit is optionally built with the Revvity ChemDraw parser
|
||||
//! If this is available, CDX and CDXML can be written
|
||||
//! Note that the CDXML format is large and complex, the RDKit doesn't
|
||||
//! support full functionality, just the base ones required for molecule and
|
||||
//! reaction parsing.
|
||||
//! Note: If the ChemDraw extensions are unavailable, an exception will be thrown
|
||||
//! please use the support function hasChemDrawCDXSupport() to check
|
||||
//! whether ChemDraw writing support is enabled.
|
||||
//! Note: For CDXML the contents of the std::string are UTF-8
|
||||
//! For CDX they are the binary bytes.
|
||||
/*!
|
||||
* \param mol - Molecule to write
|
||||
* \param format - CDXMLFormat to use, CDX or CDXML (default)
|
||||
*/
|
||||
RDKIT_FILEPARSERS_EXPORT std::string MolToCDXMLBlock(
|
||||
const RWMol &mol,
|
||||
CDXMLFormat format = CDXMLFormat::CDXML);
|
||||
|
||||
} // namespace CDXMLParser
|
||||
} // namespace v2
|
||||
|
||||
|
||||
@@ -1434,6 +1434,27 @@ TEST_CASE("CDX and Formats") {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SECTION("Check MolToCDXMLBlock") {
|
||||
if(hasChemDrawCDXSupport()) {
|
||||
auto mol = "c1ccccc1"_smiles;
|
||||
REQUIRE(mol);
|
||||
|
||||
auto block = MolToCDXMLBlock(*mol);
|
||||
CHECK(block.find("<CDXML") != std::string::npos);
|
||||
|
||||
auto cdx = MolToCDXMLBlock(*mol, CDXMLFormat::CDX);
|
||||
CHECK(cdx.find("<CDXML") == std::string::npos);
|
||||
|
||||
auto mols = MolsFromCDXML(block, {true, true, CDXMLFormat::CDXML});
|
||||
CHECK(mols.size() == 1);
|
||||
CHECK(MolToSmiles(*mols[0]) == "c1ccccc1");
|
||||
|
||||
mols = MolsFromCDXML(cdx, {true, true, CDXMLFormat::CDX});
|
||||
CHECK(mols.size() == 1);
|
||||
CHECK(MolToSmiles(*mols[0]) == "c1ccccc1");
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
@@ -797,6 +797,21 @@ python::tuple MolsFromCDXML(python::object cdxml, bool sanitize,
|
||||
}
|
||||
return python::tuple(res);
|
||||
}
|
||||
|
||||
python::object MolToCDXMLBlockHelper(const RDKit::ROMol &mol,
|
||||
RDKit::v2::CDXMLParser::CDXMLFormat format) {
|
||||
auto block = RDKit::v2::CDXMLParser::MolToCDXMLBlock(mol, format);
|
||||
// if CDXML return string
|
||||
// if CDX return byteszo
|
||||
|
||||
if(format == RDKit::v2::CDXMLParser::CDXMLFormat::CDX) {
|
||||
PyObject* py_bytes = PyBytes_FromStringAndSize(block.data(), block.size());
|
||||
return python::object(python::handle<>(py_bytes));
|
||||
} else {
|
||||
return python::object(python::handle<>(PyUnicode_FromString(block.c_str())));
|
||||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
python::object translateMetadataToList(
|
||||
const std::vector<std::pair<std::string, std::string>> &metadata) {
|
||||
@@ -2746,6 +2761,34 @@ BOOST_PYTHON_MODULE(rdmolfiles) {
|
||||
python::def("MolsFromCDXML", MolsFromCDXMLHelper,
|
||||
(python::arg("cdxml"), python::arg("params")), docString.c_str());
|
||||
|
||||
docString =
|
||||
R"DOC(brief write a CDX or CDXML block from a molecule
|
||||
|
||||
The RDKit is optionally built with the Revvity ChemDraw parser
|
||||
If this is available, CDX and CDXML can be written
|
||||
Note that the CDXML format is large and complex, the RDKit doesn't
|
||||
support full functionality, just the base ones required for molecule and
|
||||
reaction parsing.
|
||||
|
||||
Note: If the ChemDraw extensions are unavailable, an exception will be thrown
|
||||
please use the support function HasChemDrawCDXSupport() to check
|
||||
whether ChemDraw writing support is enabled.
|
||||
|
||||
Note: For CDXML this returns a UTF-8 string <str>
|
||||
For CDX this returns a byte sting <bytes>
|
||||
|
||||
ARGUMENTS:
|
||||
|
||||
- mol: the molecule to write
|
||||
|
||||
- format: CDXMLFormat [default CDXML]
|
||||
|
||||
RETURNS:
|
||||
a tuple of parsed Mol objects.)DOC";
|
||||
|
||||
python::def("MolToCDXMLBlock", MolToCDXMLBlockHelper,
|
||||
(python::arg("mol"), python::arg("format")=RDKit::v2::CDXMLParser::CDXMLFormat::CDXML), docString.c_str());
|
||||
|
||||
docString = "Returns true if the RDKit is built with ChemDraw CDX support";
|
||||
python::def("HasChemDrawCDXSupport",
|
||||
RDKit::v2::CDXMLParser::hasChemDrawCDXSupport, docString.c_str());
|
||||
|
||||
@@ -332,6 +332,21 @@ class TestCase(unittest.TestCase):
|
||||
mols3 = Chem.MolsFromCDXML(data, params)
|
||||
smi3 = [Chem.MolToSmiles(m) for m in mols3]
|
||||
self.assertEqual(smi1, smi3)
|
||||
if Chem.HasChemDrawCDXSupport():
|
||||
# ensure we can round trip through CDXML, CDX
|
||||
for smi, mol in zip(smi3, mols3):
|
||||
cdxml = Chem.MolToCDXMLBlock(mol)
|
||||
cdxml2 = Chem.MolToCDXMLBlock(mol, Chem.CDXMLFormat.CDXML)
|
||||
# check default is cdxml
|
||||
self.assertEqual(cdxml, cdxml2)
|
||||
cdx = Chem.MolToCDXMLBlock(mol, Chem.CDXMLFormat.CDX)
|
||||
self.assertEqual(type(cdx), bytes)
|
||||
|
||||
self.assertEqual(Chem.MolToSmiles(Chem.MolsFromCDXML(cdxml)[0]),
|
||||
smi)
|
||||
self.assertEqual(Chem.MolToSmiles(Chem.MolsFromCDXML(cdx, params)[0]),
|
||||
smi)
|
||||
|
||||
|
||||
|
||||
def test_formats(self):
|
||||
|
||||
@@ -126,6 +126,17 @@
|
||||
$result = JCALL1(NewByteArray, jenv, $1.size());
|
||||
JCALL4(SetByteArrayRegion, jenv, $result, 0, $1.size(), (const jbyte*)$1.c_str());
|
||||
}
|
||||
%typemap(jni) std::string RDKit::ROMol::MolToCDX "jbyteArray"
|
||||
%typemap(jtype) std::string RDKit::ROMol::MolToCDX "byte[]"
|
||||
%typemap(jstype) std::string RDKit::ROMol::MolToCDX "byte[]"
|
||||
%typemap(javaout) std::string RDKit::ROMol::MolToCDX {
|
||||
return $jnicall;
|
||||
}
|
||||
%typemap(out) std::string RDKit::ROMol::MolToCDX {
|
||||
$result = JCALL1(NewByteArray, jenv, $1.size());
|
||||
JCALL4(SetByteArrayRegion, jenv, $result, 0, $1.size(), (const jbyte*)$1.c_str());
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
@@ -162,6 +173,7 @@
|
||||
}
|
||||
}
|
||||
%}
|
||||
|
||||
%typemap(cscode) RDKit::ROMol %{
|
||||
public static ROMol FromByteArray(byte[] pkl) {
|
||||
UChar_Vect vec = null;
|
||||
@@ -191,8 +203,22 @@
|
||||
vec.Dispose();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
public byte[] MolToCDX() {
|
||||
UChar_Vect vec = null;
|
||||
try {
|
||||
vec = MolToCDXHelper();
|
||||
byte[] res = new byte[vec.Count];
|
||||
vec.CopyTo(res);
|
||||
return res;
|
||||
} finally {
|
||||
if (vec != null) {
|
||||
vec.Dispose();
|
||||
}
|
||||
}
|
||||
}
|
||||
%}
|
||||
|
||||
%include <GraphMol/ROMol.h>
|
||||
|
||||
%ignore SubstructMatch;
|
||||
@@ -270,6 +296,7 @@ void setAllowNontetrahedralChirality(bool);
|
||||
#ifdef SWIGCSHARP
|
||||
%csmethodmodifiers RDKit::ROMol::fromUCharVect "private";
|
||||
%csmethodmodifiers RDKit::ROMol::toUCharVect "private";
|
||||
%csmethodmodifiers RDKit::ROMol::MolToCDXHelper "private";
|
||||
#endif
|
||||
|
||||
%{
|
||||
@@ -349,6 +376,16 @@ unsigned int getDefaultPickleProperties();
|
||||
RDKit::MolToXYZFile(*($self), fName, confId);
|
||||
}
|
||||
|
||||
static bool hasChemDrawCDXSupport() {
|
||||
return RDKit::v2::CDXMLParser::hasChemDrawCDXSupport();
|
||||
}
|
||||
|
||||
std::string MolToCDXML() {
|
||||
return RDKit::v2::CDXMLParser::MolToCDXMLBlock(
|
||||
*($self),
|
||||
RDKit::v2::CDXMLParser::CDXMLFormat::CDXML);
|
||||
}
|
||||
|
||||
bool hasSubstructMatch(RDKit::ROMol &query,bool useChirality=false){
|
||||
RDKit::MatchVectType mv;
|
||||
return SubstructMatch(*($self),query,mv,true,useChirality);
|
||||
@@ -664,6 +701,11 @@ unsigned int getDefaultPickleProperties();
|
||||
}
|
||||
return sres;
|
||||
}
|
||||
const std::string MolToCDX() {
|
||||
return RDKit::v2::CDXMLParser::MolToCDXMLBlock(
|
||||
*($self),
|
||||
RDKit::v2::CDXMLParser::CDXMLFormat::CDX);
|
||||
}
|
||||
#endif
|
||||
#ifdef SWIGCSHARP
|
||||
const std::vector<unsigned char> toUCharVect(int propertyFlags=-1) {
|
||||
@@ -676,6 +718,13 @@ unsigned int getDefaultPickleProperties();
|
||||
const std::vector<unsigned char> vec(sres.begin(), sres.end());
|
||||
return vec;
|
||||
}
|
||||
const std::vector<unsigned char> MolToCDXHelper() {
|
||||
std::string cdx = RDKit::v2::CDXMLParser::MolToCDXMLBlock(
|
||||
*($self),
|
||||
RDKit::v2::CDXMLParser::CDXMLFormat::CDX);
|
||||
const std::vector<unsigned char> vec(cdx.begin(), cdx.end());
|
||||
return vec;
|
||||
}
|
||||
#endif
|
||||
static RDKit::ROMOL_SPTR fromUCharVect(const std::vector<unsigned char> &pkl) {
|
||||
std::string sres(pkl.begin(), pkl.end());
|
||||
|
||||
@@ -75,6 +75,30 @@
|
||||
%include <GraphMol/FileParsers/CDXMLParser.h>
|
||||
%include <GraphMol/SmilesParse/SmilesParse.h>
|
||||
|
||||
%typemap(javacode) RDKit::RWMol %{
|
||||
// expose mols from MolsFromCDXMLByteArray
|
||||
public static RWMol_Vect MolsFromCDXMLByteArray(
|
||||
byte[] pkl, boolean sanitize, boolean removeHs) {
|
||||
UChar_Vect vec = null;
|
||||
try {
|
||||
vec = new UChar_Vect();
|
||||
vec.reserve(pkl.length);
|
||||
for (int i = 0; i < pkl.length; ++i) {
|
||||
vec.add((byte)pkl[i]);
|
||||
}
|
||||
return RWMol.MolsFromCDXML(vec, sanitize, removeHs);
|
||||
} finally {
|
||||
if (vec != null) {
|
||||
vec.delete();
|
||||
}
|
||||
}
|
||||
}
|
||||
public static RWMol_Vect MolsFromCDXMLByteArray(byte [] pkl) {
|
||||
return MolsFromCDXMLByteArray(pkl, true, true);
|
||||
}
|
||||
|
||||
%}
|
||||
|
||||
|
||||
%typemap(cscode) RDKit::RWMol %{
|
||||
public static RWMol_Vect MolsFromCDXMLByteArray(
|
||||
|
||||
@@ -70,5 +70,17 @@ namespace RdkitTests
|
||||
Assert.True(mols.Count >= 1);
|
||||
|
||||
}
|
||||
[Fact]
|
||||
public void MolToCDX() {
|
||||
string smi = "c1ccccc1[C@](F)(Cl)Br";
|
||||
ROMol mol = RWMol.MolFromSmiles(smi);
|
||||
byte[] pkl = mol.MolToCDX();
|
||||
var mols = RWMol.MolsFromCDXMLByteArray(pkl);
|
||||
Assert.True(mol.MolToSmiles() == mols[0].MolToSmiles());
|
||||
|
||||
string cdxml = mol.MolToCDXML();
|
||||
mols = RWMol.MolsFromCDXMLByteArray(pkl);
|
||||
Assert.True(mol.MolToSmiles() == mols[0].MolToSmiles());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -357,6 +357,21 @@ public class WrapperTests extends GraphMolTest {
|
||||
assertEquals(prods.get(idx).MolToSmiles(true), "CC1(C)[C@H](C=C(Cl)Cl)[C@H]1C(=O)O[C@@H](C#N)c1cccc(Oc2ccccc2)c1");
|
||||
}
|
||||
}
|
||||
|
||||
if(ROMol.hasChemDrawCDXSupport()) {
|
||||
String cdxml = prods.get(0).MolToCDXML();
|
||||
byte[] cdx = prods.get(0).MolToCDX();
|
||||
assertTrue(cdxml.indexOf("CDXML") != -1);
|
||||
|
||||
prods = RWMol.MolsFromCDXML(cdxml, params);
|
||||
assertEquals(prods.get(0).MolToSmiles(true),
|
||||
"CC1(C)[C@H](C=C(Cl)Cl)[C@H]1C(=O)O[C@@H](C#N)c1cccc(Oc2ccccc2)c1");
|
||||
|
||||
params.setFormat(CDXMLFormat.CDX);
|
||||
prods = RWMol.MolsFromCDXMLByteArray(cdx);
|
||||
assertEquals(prods.get(0).MolToSmiles(true),
|
||||
"CC1(C)[C@H](C=C(Cl)Cl)[C@H]1C(=O)O[C@@H](C#N)c1cccc(Oc2ccccc2)c1");
|
||||
}
|
||||
|
||||
}
|
||||
public static void main(String args[]) {
|
||||
|
||||
Reference in New Issue
Block a user