Files
rdkit/Code/GraphMol/Wrap/testSCSR.py
tadhurst-cdd ca41fa5bfd Add SCSR parsing to RDKit (#8147)
* Parsing SCSR

* add scsrol to mol

* removed bad include file

* loosen distGeom test slightly

* add wrap test for SCSRMol

* Add test for scsr in python

* tests added for scsr and strict parsing removed

* remove extra stuff

* More fully specified use of SCSRMol for PR CI build

* Added flags for SCSR expansion to not include any leaving groups

* Added MolFromScsrParams to Wrap for python

* added SCSRMol destructor

* Added two tests for RNA macromols, and fixed a bug they revealed

* Added new tests abd expected files

* changes as per PR review

* SCSR Chnages for leaving groups

* fixed testScsr.py

* hydrogen bond treatment

* in SCSR expand, allow Hbond to be autoatically detected

* changes as per code review

* Adding new test file

* chages for SCSR contructors, destructors for CI build

* fixed pyton for SCSR hydrogen bond modes, and added tests

* Added new test files

* fixed edge case for SCSR

* fix checksum for inchi

* consistent capitalization of SCSR throughout

* switch to enum class

* make things shorter

* simplify

* get rid of the ATTCHORD class

* New section for SCSR in RDKit_book

* addeed section to RDKit_Book

* SCSRMol is no longer exposed in Python

* fix leak in MolFromSCSRFile()
light refactoring

* expose MolFromSCSRFile() to python
make the MolFromSCSR functions work with default args
a bit more testing

* removed C++ access to SCSRMol

* CXMsiles now ouputs hbonds, fix to template matching, and a few other things

* Addl fix for bad aromaticity in Hbond rings

* Test files needed

* Test files needed

* try to fix a CI build errors

* CI error fix

* Added missing test file

* CMake version - for CI build

* remove full file compoarison from macromol test file

* accidental change to debug restored to release

* Code review changes

* As per PR review

---------

Co-authored-by: Greg Landrum <greg.landrum@gmail.com>
2025-05-14 13:37:59 +02:00

122 lines
4.0 KiB
Python

#
# Copyright (C) 2024 Tad Hurst
# All Rights Reserved
#
# This file is part of the RDKit.
# The contents are covered by the terms of the BSD license
# which is included in the file license.txt, found at the root
# of the RDKit source tree.
#
#
import os
import sys
import unittest
from rdkit import Chem
from rdkit.Chem import RDConfig
class TestCase(unittest.TestCase):
def setUp(self):
pass
def testSCSR(self):
"""Test the SCSR system"""
ofile = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'FileParsers', 'test_data',
'macromols', 'Triplet.mol')
with open(ofile) as inf:
scsrBlock = inf.read()
molFromSCSRParams = Chem.MolFromSCSRParams()
molFromSCSRParams.includeLeavingGroups = True
molFromSCSRParams.scsrTemplateNames = Chem.SCSRTemplateNames.AsEntered
molFromSCSRParams.scsrBaseHbondOptions = Chem.SCSRBaseHbondOptions.Ignore
for mol in (Chem.MolFromSCSRBlock(scsrBlock, False, False, molFromSCSRParams),
Chem.MolFromSCSRFile(ofile, False, False, molFromSCSRParams)):
self.assertTrue(mol.GetNumAtoms() == 30)
sgs = Chem.GetMolSubstanceGroups(mol)
self.assertEqual(len(sgs), 6)
# check defaults:
for mol in (Chem.MolFromSCSRBlock(scsrBlock), Chem.MolFromSCSRFile(ofile)):
self.assertTrue(mol.GetNumAtoms() == 30)
sgs = Chem.GetMolSubstanceGroups(mol)
self.assertEqual(len(sgs), 6)
def testSCSRRna(self):
"""Test the SCSR system with and RNA double strand"""
ofile = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'FileParsers', 'test_data',
'macromols', 'DnaTest.mol')
with open(ofile) as inf:
scsrBlock = inf.read()
molFromSCSRParams = Chem.MolFromSCSRParams()
molFromSCSRParams.includeLeavingGroups = True
molFromSCSRParams.scsrTemplateNames = Chem.SCSRTemplateNames.AsEntered
molFromSCSRParams.scsrBaseHbondOptions = Chem.SCSRBaseHbondOptions.Auto
mol = Chem.MolFromSCSRBlock(scsrBlock, False, False, molFromSCSRParams)
self.assertTrue(mol.GetNumAtoms() == 254)
self.assertTrue(mol.GetNumBonds() == 300)
sgs = Chem.GetMolSubstanceGroups(mol)
self.assertTrue(len(sgs) == 38)
molFromSCSRParams.scsrBaseHbondOptions = Chem.SCSRBaseHbondOptions.Ignore
mol = Chem.MolFromSCSRBlock(scsrBlock, False, False, molFromSCSRParams)
self.assertTrue(mol.GetNumAtoms() == 254)
self.assertTrue(mol.GetNumBonds() == 282)
sgs = Chem.GetMolSubstanceGroups(mol)
self.assertTrue(len(sgs) == 38)
def testThreeLetterCodes(self):
"""Test the SCSR system with three letter codes"""
ofile = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'FileParsers', 'test_data',
'macromols', 'PepTla.mol')
with open(ofile) as inf:
scsrBlock = inf.read()
molFromSCSRParams = Chem.MolFromSCSRParams()
molFromSCSRParams.includeLeavingGroups = True
molFromSCSRParams.scsrTemplateNames = Chem.SCSRTemplateNames.AsEntered
mol = Chem.MolFromSCSRBlock(scsrBlock, False, False, molFromSCSRParams)
self.assertEqual(mol.GetNumAtoms(), 26)
self.assertEqual(mol.GetNumBonds(), 25)
sgs = Chem.GetMolSubstanceGroups(mol)
self.assertTrue(len(sgs) == 7)
sgs[0].GetProp('LABEL')
self.assertEqual(sgs[0].GetProp('LABEL'), 'Ala_4')
molFromSCSRParams.scsrTemplateNames = Chem.SCSRTemplateNames.UseFirstName
mol = Chem.MolFromSCSRBlock(scsrBlock, False, False, molFromSCSRParams)
self.assertEqual(mol.GetNumAtoms(), 26)
sgs = Chem.GetMolSubstanceGroups(mol)
self.assertEqual(len(sgs), 7)
self.assertEqual(sgs[0].GetProp('LABEL'), 'Ala_4')
molFromSCSRParams.scsrTemplateNames = Chem.SCSRTemplateNames.UseSecondName
mol = Chem.MolFromSCSRBlock(scsrBlock, False, False, molFromSCSRParams)
self.assertEqual(mol.GetNumAtoms(), 26)
sgs = Chem.GetMolSubstanceGroups(mol)
self.assertEqual(len(sgs), 7)
self.assertEqual(sgs[0].GetProp('LABEL'), 'A_4')
if __name__ == '__main__':
unittest.main()