Files
rdkit/Code/GraphMol/Wrap/test_cdxml.py
Brian Kelley b417465e93 Adds MolToCDXMLBlock to FileParsers (#9291)
* Adds MolToCDXMLBlock to FileParsers

* Simplified code, removed warning

* Fix C# wrapper for MolToCDX

* Add C# test, fix cscode in swig

* Fix typo in tests

* Set default format to CDXML for MolToCDXML

Co-authored-by: Greg Landrum <greg.landrum@gmail.com>

* Add CDXML writer smoke tests

---------

Co-authored-by: Greg Landrum <greg.landrum@gmail.com>
2026-05-29 05:36:35 +02:00

406 lines
11 KiB
Python

#
# Copyright (C) 2003-2021 Greg Landrum and other RDKit contributors
# All Rights Reserved
#
""" This is a rough coverage test of the python wrapper
it's intended to be shallow, but broad
"""
import doctest
import gc
import gzip
import logging
import os
import sys
import tempfile
import unittest
from io import StringIO
from rdkit import Chem
class TestCase(unittest.TestCase):
def test_cdxml(self):
cdxml = """<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE CDXML SYSTEM "http://www.cambridgesoft.com/xml/cdxml.dtd" >
<CDXML
CreationProgram="ChemDraw JS 2.0.0.9"
Name="ACS Document 1996"
BoundingBox="94.75 178.16 154.89 211.17"
WindowPosition="0 0"
WindowSize="0 0"
FractionalWidths="yes"
InterpretChemically="yes"
ShowAtomQuery="yes"
ShowAtomStereo="no"
ShowAtomEnhancedStereo="yes"
ShowAtomNumber="no"
ShowResidueID="no"
ShowBondQuery="yes"
ShowBondRxn="yes"
ShowBondStereo="no"
ShowTerminalCarbonLabels="no"
ShowNonTerminalCarbonLabels="no"
HideImplicitHydrogens="no"
Magnification="666"
LabelFont="24"
LabelSize="10"
LabelFace="96"
CaptionFont="24"
CaptionSize="10"
HashSpacing="2.50"
MarginWidth="1.60"
LineWidth="0.60"
BoldWidth="2"
BondLength="14.40"
BondSpacing="18"
ChainAngle="120"
LabelJustification="Auto"
CaptionJustification="Left"
AminoAcidTermini="HOH"
ShowSequenceTermini="yes"
ShowSequenceBonds="yes"
ShowSequenceUnlinkedBranches="no"
ResidueWrapCount="40"
ResidueBlockCount="10"
ResidueZigZag="yes"
NumberResidueBlocks="no"
PrintMargins="36 36 36 36"
MacPrintInfo="0003000001200120000000000B6608A0FF84FF880BE309180367052703FC0002000001200120000000000B6608A0000100000064000000010001010100000001270F000100010000000000000000000000000002001901900000000000400000000000000000000100000000000000000000000000000000"
ChemPropName=""
ChemPropFormula="Chemical Formula: "
ChemPropExactMass="Exact Mass: "
ChemPropMolWt="Molecular Weight: "
ChemPropMOverZ="m/z: "
ChemPropAnalysis="Elemental Analysis: "
ChemPropBoilingPt="Boiling Point: "
ChemPropMeltingPt="Melting Point: "
ChemPropCritTemp="Critical Temp: "
ChemPropCritPres="Critical Pres: "
ChemPropCritVol="Critical Vol: "
ChemPropGibbs="Gibbs Energy: "
ChemPropLogP="Log P: "
ChemPropMR="MR: "
ChemPropHenry="Henry&apos;s Law: "
ChemPropEForm="Heat of Form: "
ChemProptPSA="tPSA: "
ChemPropID=""
ChemPropFragmentLabel=""
color="0"
bgcolor="1"
RxnAutonumberStart="1"
RxnAutonumberConditions="no"
RxnAutonumberStyle="Roman"
><colortable>
<color r="1" g="1" b="1"/>
<color r="0" g="0" b="0"/>
<color r="1" g="0" b="0"/>
<color r="1" g="1" b="0"/>
<color r="0" g="1" b="0"/>
<color r="0" g="1" b="1"/>
<color r="0" g="0" b="1"/>
<color r="1" g="0" b="1"/>
</colortable><fonttable>
<font id="24" charset="utf-8" name="Arial"/>
</fonttable><page
id="32"
BoundingBox="0 0 542 354"
Width="542"
Height="354"
HeaderPosition="36"
FooterPosition="36"
PageOverlap="0"
PrintTrimMarks="yes"
HeightPages="1"
WidthPages="1"
DrawingSpace="poster"
><fragment
id="10"
BoundingBox="94.75 178.16 154.89 211.17"
Z="4"
><n
id="7"
p="95.05 187.47"
Z="1"
AS="N"
/><n
id="9"
p="95.05 201.87"
Z="3"
AS="N"
/><n
id="11"
p="106.31 210.84"
Z="5"
AS="N"
/><n
id="13"
p="120.35 207.64"
Z="7"
AS="N"
/><n
id="15"
p="126.59 194.67"
Z="9"
AS="N"
/><n
id="17"
p="120.35 181.69"
Z="11"
AS="N"
/><n
id="19"
p="106.31 178.49"
Z="13"
AS="N"
/><n
id="28"
p="140.99 194.67"
Z="22"
NodeType="Nickname"
NeedsClean="yes"
AS="N"
><fragment
id="33"
><n
id="34"
p="148.17 207.09"
Element="8"
NumHydrogens="0"
/><n
id="35"
p="162.52 207.09"
/><n
id="36"
p="176.87 207.09"
/><n
id="37"
p="169.69 194.67"
/><n
id="38"
p="169.69 219.52"
/><n
id="39"
p="140.99 194.67"
/><n
id="40"
p="148.17 182.24"
Element="8"
NumHydrogens="0"
/><n
id="41"
p="126.64 194.67"
NodeType="ExternalConnectionPoint"
/><b
id="42"
B="39"
E="40"
Order="2"
/><b
id="43"
B="35"
E="38"
/><b
id="44"
B="35"
E="36"
/><b
id="45"
B="35"
E="37"
/><b
id="46"
B="34"
E="35"
/><b
id="47"
B="34"
E="39"
/><b
id="48"
B="41"
E="39"
/></fragment><t
p="137.66 198.28"
BoundingBox="137.66 189.64 154.89 198.28"
LabelJustification="Left"
LabelAlignment="Left"
><s font="24" size="9.95" color="0" face="96">Boc</s></t></n><b
id="21"
Z="15"
B="7"
E="9"
BS="N"
/><b
id="22"
Z="16"
B="9"
E="11"
BS="N"
/><b
id="23"
Z="17"
B="11"
E="13"
BS="N"
/><b
id="24"
Z="18"
B="13"
E="15"
BS="N"
/><b
id="25"
Z="19"
B="15"
E="17"
BS="N"
/><b
id="26"
Z="20"
B="17"
E="19"
BS="N"
/><b
id="27"
Z="21"
B="19"
E="7"
BS="N"
/><b
id="29"
Z="23"
B="15"
E="28"
BS="N"
/></fragment></page></CDXML>"""
mols = Chem.MolsFromCDXML(cdxml)
self.assertEqual(len(mols), 1)
self.assertEqual(Chem.MolToSmiles(mols[0]), "CC(C)(C)OC(=O)C1CCCCCC1")
mols = Chem.MolsFromCDXML(cdxml, True, False)
self.assertEqual(len(mols), 1)
self.assertEqual(Chem.MolToSmiles(mols[0]), "CC(C)(C)OC(=O)C1CCCCCC1")
mols = Chem.MolsFromCDXML(cdxml, False, False)
self.assertEqual(len(mols), 1)
self.assertEqual(Chem.MolToSmiles(mols[0]), "CC(C)(C)OC(=O)C1CCCCCC1")
params = Chem.CDXMLParserParams()
mols = Chem.MolsFromCDXML(cdxml, params)
self.assertEqual(len(mols), 1)
self.assertEqual(Chem.MolToSmiles(mols[0]), "CC(C)(C)OC(=O)C1CCCCCC1")
params.sanitize = True
params.removeHs = False
mols = Chem.MolsFromCDXML(cdxml, params)
self.assertEqual(len(mols), 1)
self.assertEqual(Chem.MolToSmiles(mols[0]), "CC(C)(C)OC(=O)C1CCCCCC1")
params.sanitize = False
params.removeHs = False
mols = Chem.MolsFromCDXML(cdxml, params)
self.assertEqual(len(mols), 1)
self.assertEqual(Chem.MolToSmiles(mols[0]), "CC(C)(C)OC(=O)C1CCCCCC1")
def test_cdxml(self):
try: from rdkit.Chem import rdChemDraw
except:
return
rdbase = os.environ['RDBASE']
cdxfilename = os.path.join(rdbase,
'Code/GraphMol/test_data/CDXML/ring-stereo1.cdx')
mols = Chem.MolsFromCDXMLFile(cdxfilename)
filename = os.path.join(rdbase,
'Code/GraphMol/test_data/CDXML/ring-stereo1.cdxml')
mols2 = Chem.MolsFromCDXMLFile(filename)
smi1 = [Chem.MolToSmiles(m) for m in mols]
smi2 = [Chem.MolToSmiles(m) for m in mols2]
self.assertEqual(smi1, smi2)
self.assertEqual(smi1, ['C1CC[C@H]2CCCC[C@H]2C1'])
with open(cdxfilename, 'rb') as f:
data = f.read()
params = Chem.CDXMLParserParams(True, True, Chem.CDXMLFormat.CDX)
mols3 = Chem.MolsFromCDXML(data, params)
smi3 = [Chem.MolToSmiles(m) for m in mols3]
self.assertEqual(smi1, smi3)
if Chem.HasChemDrawCDXSupport():
# ensure we can round trip through CDXML, CDX
for smi, mol in zip(smi3, mols3):
cdxml = Chem.MolToCDXMLBlock(mol)
cdxml2 = Chem.MolToCDXMLBlock(mol, Chem.CDXMLFormat.CDXML)
# check default is cdxml
self.assertEqual(cdxml, cdxml2)
cdx = Chem.MolToCDXMLBlock(mol, Chem.CDXMLFormat.CDX)
self.assertEqual(type(cdx), bytes)
self.assertEqual(Chem.MolToSmiles(Chem.MolsFromCDXML(cdxml)[0]),
smi)
self.assertEqual(Chem.MolToSmiles(Chem.MolsFromCDXML(cdx, params)[0]),
smi)
def test_formats(self):
try:
from rdkit.Chem import rdChemDraw
self.assertEqual(Chem.HasChemDrawCDXSupport(),True)
except:
self.assertEqual(Chem.HasChemDrawCDXSupport(),False)
return
rdbase = os.environ['RDBASE']
cdxfilename = os.path.join(rdbase,
'Code/GraphMol/test_data/CDXML/ring-stereo1.cdx')
mols = Chem.MolsFromCDXMLFile(cdxfilename)
cdxmlfilename = os.path.join(rdbase,
'Code/GraphMol/test_data/CDXML/ring-stereo1.cdxml')
tests = [
# we can deduce extensions from filenames, but not from streams (yet!)
# filename, Stream, IsCDX, CDX res, CDXML res, Auto Res
(cdxfilename, True, True, True, False, False),
(cdxfilename, False, True, True, False, True),
(cdxmlfilename, True, False, False, True, True),
(cdxmlfilename, False, False, False, True, True),
]
for filename, stream, iscdx, cdxres, cdxmlres, autores in tests:
for format, res in zip([Chem.CDXMLFormat.CDX, Chem.CDXMLFormat.CDXML, Chem.CDXMLFormat.Auto],
[cdxres, cdxmlres, autores]):
if stream:
with open(filename, 'rb') as f:
data = f.read()
try:
mols = Chem.MolsFromCDXML(data, Chem.CDXMLParserParams(True, True, format))
if res: assert mols
except RuntimeError:
assert res == False
else:
mols = Chem.MolsFromCDXMLFile(filename, Chem.CDXMLParserParams(True, True, format))
if res: assert mols
if __name__ == '__main__':
if "RDTESTCASE" in os.environ:
suite = unittest.TestSuite()
testcases = os.environ["RDTESTCASE"]
for name in testcases.split(':'):
suite.addTest(TestCase(name))
runner = unittest.TextTestRunner()
runner.run(suite)
else:
unittest.main()