Files
rdkit/Code/GraphMol/CIPLabeler/Wrap/pyCIPLabelsValidation.py
Ric 880a8e5725 Reformat Python code for 2023.03 release (#6294)
* run yapf

* run isort

---------

Co-authored-by: Greg Landrum <greg.landrum@gmail.com>
2023-04-28 06:53:56 +02:00

107 lines
2.5 KiB
Python

# coding: utf-8
import os
import sys
from rdkit import Chem
EXPECTED_LABELS_OVERRIDES = {
# p/m labels are filtered out on input, so they are not
# included here
# allene-likes
'VS063': ['26R', '2S'],
'VS118': [],
'VS135': [],
'VS154': ['6E', '7E'],
'VS164': ['21R'],
'VS231': [],
# Chiralities not flagged by RDKit
'VS132': [],
}
def split_label_string(labels):
"""
Splits the string of expected labels and filters out helical
labels, which are not supported by rdkit
"""
return {label for label in labels.split() if label[-1] not in 'mpMP'}
def supplier(fname):
"""
Read the smiles, name and expected labels from the input file.
If we know of an override for the expected labels, we do the
override here.
"""
with open(fname) as f:
for line in f:
smiles, name, expected, _ = line.split('\t', 3)
try:
expected = set(EXPECTED_LABELS_OVERRIDES[name])
except KeyError:
expected = split_label_string(expected)
mol = Chem.MolFromSmiles(smiles, sanitize=False)
Chem.SanitizeMol(mol)
Chem.SetBondStereoFromDirections(mol)
yield mol, name, expected
def getLabels(mol):
"""
Calculates and extracts the CIP labels for the mol
"""
Chem.rdCIPLabeler.AssignCIPLabels(mol)
labels = set()
for atom in mol.GetAtoms():
try:
label = atom.GetProp('_CIPCode')
except KeyError:
continue
else:
atom_idx = atom.GetIdx() + 1
labels.add(f'{atom_idx}{label}')
for bond in mol.GetBonds():
try:
label = bond.GetProp('_CIPCode')
except KeyError:
continue
else:
begin_idx = bond.GetBeginAtomIdx() + 1
end_idx = bond.GetEndAtomIdx() + 1
labels.add(f'{begin_idx}{label}')
labels.add(f'{end_idx}{label}')
return labels
if __name__ == '__main__':
# The structures used for the validation come from
# https://github.com/CIPValidationSuite/ValidationSuite
# at commit 28d0fe05073905e74a1ba5e06b3bd6298686f6af
fname = 'compounds.smi'
fpath = os.path.join(os.environ['RDBASE'], 'Code', 'GraphMol', 'test_data', fname)
failed = []
for mol, name, expected in supplier(fpath):
actual = getLabels(mol)
if actual == expected:
print(f'{name}: PASSED')
else:
print(f'{name}: FAILED')
print(f' Expected: {sorted(expected)}')
print(f' Actual: {sorted(actual)}')
failed.append(name)
print(f'Check finished: {len(failed)} molecules failed.')
print(f'Failed: {", ".join(failed)}')
sys.exit(len(failed) > 0)