mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-04 21:54:27 +08:00
107 lines
2.5 KiB
Python
107 lines
2.5 KiB
Python
# coding: utf-8
|
|
import os
|
|
import sys
|
|
|
|
from rdkit import Chem
|
|
|
|
EXPECTED_LABELS_OVERRIDES = {
|
|
# p/m labels are filtered out on input, so they are not
|
|
# included here
|
|
|
|
# allene-likes
|
|
'VS063': ['26R', '2S'],
|
|
'VS118': [],
|
|
'VS135': [],
|
|
'VS154': ['6E', '7E'],
|
|
'VS164': ['21R'],
|
|
'VS231': [],
|
|
|
|
# Chiralities not flagged by RDKit
|
|
'VS132': [],
|
|
}
|
|
|
|
|
|
def split_label_string(labels):
|
|
"""
|
|
Splits the string of expected labels and filters out helical
|
|
labels, which are not supported by rdkit
|
|
"""
|
|
return {label for label in labels.split() if label[-1] not in 'mpMP'}
|
|
|
|
|
|
def supplier(fname):
|
|
"""
|
|
Read the smiles, name and expected labels from the input file.
|
|
If we know of an override for the expected labels, we do the
|
|
override here.
|
|
"""
|
|
with open(fname) as f:
|
|
for line in f:
|
|
smiles, name, expected, _ = line.split('\t', 3)
|
|
|
|
try:
|
|
expected = set(EXPECTED_LABELS_OVERRIDES[name])
|
|
except KeyError:
|
|
expected = split_label_string(expected)
|
|
|
|
mol = Chem.MolFromSmiles(smiles, sanitize=False)
|
|
Chem.SanitizeMol(mol)
|
|
Chem.SetBondStereoFromDirections(mol)
|
|
|
|
yield mol, name, expected
|
|
|
|
|
|
def getLabels(mol):
|
|
"""
|
|
Calculates and extracts the CIP labels for the mol
|
|
"""
|
|
|
|
Chem.rdCIPLabeler.AssignCIPLabels(mol)
|
|
|
|
labels = set()
|
|
for atom in mol.GetAtoms():
|
|
try:
|
|
label = atom.GetProp('_CIPCode')
|
|
except KeyError:
|
|
continue
|
|
else:
|
|
atom_idx = atom.GetIdx() + 1
|
|
labels.add(f'{atom_idx}{label}')
|
|
|
|
for bond in mol.GetBonds():
|
|
try:
|
|
label = bond.GetProp('_CIPCode')
|
|
except KeyError:
|
|
continue
|
|
else:
|
|
begin_idx = bond.GetBeginAtomIdx() + 1
|
|
end_idx = bond.GetEndAtomIdx() + 1
|
|
labels.add(f'{begin_idx}{label}')
|
|
labels.add(f'{end_idx}{label}')
|
|
|
|
return labels
|
|
|
|
|
|
if __name__ == '__main__':
|
|
# The structures used for the validation come from
|
|
# https://github.com/CIPValidationSuite/ValidationSuite
|
|
# at commit 28d0fe05073905e74a1ba5e06b3bd6298686f6af
|
|
fname = 'compounds.smi'
|
|
fpath = os.path.join(os.environ['RDBASE'], 'Code', 'GraphMol', 'test_data', fname)
|
|
|
|
failed = []
|
|
for mol, name, expected in supplier(fpath):
|
|
actual = getLabels(mol)
|
|
|
|
if actual == expected:
|
|
print(f'{name}: PASSED')
|
|
else:
|
|
print(f'{name}: FAILED')
|
|
print(f' Expected: {sorted(expected)}')
|
|
print(f' Actual: {sorted(actual)}')
|
|
failed.append(name)
|
|
print(f'Check finished: {len(failed)} molecules failed.')
|
|
print(f'Failed: {", ".join(failed)}')
|
|
|
|
sys.exit(len(failed) > 0)
|