mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-04 21:54:27 +08:00
Fixes #2420 Tests updated to reflect changes. The existing CompleteRingsOnly code wasn't actually guaranteeing CompleteRingsOnly at all, it was just causing #2420. The updated code actually makes sure that bonds that were ring bonds in one of the queries are ring bonds in the final MCS. This is done via a relatively simple DFS.
259 lines
9.8 KiB
Python
259 lines
9.8 KiB
Python
from rdkit import RDConfig
|
|
import os
|
|
import sys
|
|
import unittest
|
|
from rdkit import Chem
|
|
from rdkit.Chem import rdFMCS
|
|
|
|
|
|
class TestCase(unittest.TestCase):
|
|
|
|
def setUp(self):
|
|
pass
|
|
|
|
def test1(self):
|
|
smis = (
|
|
"Cc1nc(CN(C(C)c2ncccc2)CCCCN)ccc1 CHEMBL1682991", # -- QUERY
|
|
"Cc1ccc(CN(C(C)c2ccccn2)CCCCN)nc1 CHEMBL1682990",
|
|
"Cc1cccnc1CN(C(C)c1ccccn1)CCCCN CHEMBL1682998",
|
|
"CC(N(CCCCN)Cc1c(N)cccn1)c1ccccn1 CHEMBL1682987",
|
|
"Cc1cc(C)c(CN(C(C)c2ccccn2)CCCCN)nc1 CHEMBL1682992",
|
|
"Cc1cc(C(C)N(CCCCN)Cc2c(C)cccn2)ncc1 CHEMBL1682993",
|
|
"Cc1nc(C(C)N(CCCCN)Cc2nc3c([nH]2)cccc3)ccc1 CHEMBL1682878",
|
|
"CC(c1ncccc1)N(CCCCN)Cc1nc2c([nH]1)cccc2 CHEMBL1682867",
|
|
"CC(N(CCCCN)Cc1c(C(C)(C)C)cccn1)c1ccccn1 CHEMBL1682989",
|
|
"CC(N(CCCCN)Cc1c(C(F)(F)F)cccn1)c1ccccn1 CHEMBL1682988",
|
|
)
|
|
ms = [Chem.MolFromSmiles(x.split()[0]) for x in smis]
|
|
qm = ms[0]
|
|
ms = ms[1:]
|
|
mcs = rdFMCS.FindMCS(ms)
|
|
self.assertEqual(mcs.numBonds, 21)
|
|
self.assertEqual(mcs.numAtoms, 21)
|
|
self.assertEqual(
|
|
mcs.smartsString,
|
|
'[#6](:[#6]:[#6]):[#6]:[#7]:[#6]-[#6]-[#7](-[#6](-[#6])-[#6]1:[#6]:[#6]:[#6]:[#6]:[#7]:1)-[#6]-[#6]-[#6]-[#6]-[#7]'
|
|
)
|
|
qm = Chem.MolFromSmarts(mcs.smartsString)
|
|
self.assertTrue(qm is not None)
|
|
for m in ms:
|
|
self.assertTrue(m.HasSubstructMatch(qm))
|
|
|
|
mcs = rdFMCS.FindMCS(ms, threshold=0.8)
|
|
self.assertEqual(mcs.numBonds, 21)
|
|
self.assertEqual(mcs.numAtoms, 21)
|
|
self.assertEqual(
|
|
mcs.smartsString,
|
|
'[#6](:[#6]:[#6]):[#6]:[#7]:[#6]-[#6]-[#7](-[#6](-[#6])-[#6]1:[#6]:[#6]:[#6]:[#6]:[#7]:1)-[#6]-[#6]-[#6]-[#6]-[#7]'
|
|
)
|
|
qm = Chem.MolFromSmarts(mcs.smartsString)
|
|
self.assertTrue(qm is not None)
|
|
for m in ms:
|
|
self.assertTrue(m.HasSubstructMatch(qm))
|
|
|
|
def test2(self):
|
|
smis = (
|
|
"CHEMBL122452 CN(CCCN(C)CCc1ccccc1)CCOC(c1ccccc1)c1ccccc1",
|
|
"CHEMBL123252 CN(CCCc1ccccc1)CCCN(C)CCOC(c1ccccc1)c1ccccc1",
|
|
"CHEMBL121611 Fc1ccc(C(OCCNCCCNCCc2ccccc2)c2ccc(F)cc2)cc1",
|
|
"CHEMBL121050 O=C(Cc1ccccc1)NCCCCNCCOC(c1ccc(F)cc1)c1ccc(F)cc1",
|
|
"CHEMBL333667 O=C(Cc1ccccc1)NCCNCCOC(c1ccc(F)cc1)c1ccc(F)cc1",
|
|
"CHEMBL121486 O=C(Cc1ccc(Br)cc1)NC=CNCCOC(c1ccc(F)cc1)c1ccc(F)cc1",
|
|
"CHEMBL123830 O=C(Cc1ccc(F)cc1)NCCNCCOC(c1ccc(F)cc1)c1ccc(F)cc1",
|
|
"CHEMBL420900 O=C(Cc1ccccc1)NCCCNCCOC(c1ccc(F)cc1)c1ccc(F)cc1",
|
|
"CHEMBL121460 CN(CCOC(c1ccc(F)cc1)c1ccc(F)cc1)CCN(C)CCOC(c1ccc(F)cc1)c1ccc(F)cc1",
|
|
"CHEMBL120901 COC(=O)C1C2CCC(CC1C(=O)Oc1ccccc1)N2C",
|
|
"CHEMBL122859 O=C1CN(CCc2ccccc2)CCN1CCOC(c1ccc(F)cc1)c1ccc(F)cc1",
|
|
"CHEMBL121027 CN(CCOC(c1ccccc1)c1ccccc1)CCN(C)CCc1ccc(F)cc1",
|
|
)
|
|
|
|
ms = [Chem.MolFromSmiles(x.split()[1]) for x in smis]
|
|
qm = ms[0]
|
|
ms = ms[1:]
|
|
mcs = rdFMCS.FindMCS(ms)
|
|
self.assertEqual(mcs.numBonds, 9)
|
|
self.assertEqual(mcs.numAtoms, 10)
|
|
qm = Chem.MolFromSmarts(mcs.smartsString)
|
|
self.assertTrue(qm is not None)
|
|
for m in ms:
|
|
self.assertTrue(m.HasSubstructMatch(qm))
|
|
# smarts too hard to canonicalize this
|
|
# self.assertEqual(mcs.smartsString,'[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6](-[#6]-[#8]-[#6]:,-[#6])-,:[#6]')
|
|
|
|
mcs = rdFMCS.FindMCS(ms, threshold=0.8)
|
|
self.assertEqual(mcs.numBonds, 20)
|
|
self.assertEqual(mcs.numAtoms, 19)
|
|
qm = Chem.MolFromSmarts(mcs.smartsString)
|
|
self.assertTrue(qm is not None)
|
|
nHits = 0
|
|
for m in ms:
|
|
if m.HasSubstructMatch(qm):
|
|
nHits += 1
|
|
self.assertTrue(nHits >= int(0.8 * len(smis)))
|
|
# smarts too hard to canonicalize this
|
|
# self.assertEqual(mcs.smartsString,'[#6]1:[#6]:[#6]:[#6](:[#6]:[#6]:1)-[#6](-[#8]-[#6]-[#6]-[#7]-[#6]-[#6])-[#6]2:[#6]:[#6]:[#6]:[#6]:[#6]:2')
|
|
|
|
def test3IsotopeMatch(self):
|
|
smis = (
|
|
"CC[14NH2]",
|
|
"CC[14CH3]",
|
|
)
|
|
|
|
ms = [Chem.MolFromSmiles(x) for x in smis]
|
|
mcs = rdFMCS.FindMCS(ms)
|
|
self.assertEqual(mcs.numBonds, 1)
|
|
self.assertEqual(mcs.numAtoms, 2)
|
|
qm = Chem.MolFromSmarts(mcs.smartsString)
|
|
|
|
mcs = rdFMCS.FindMCS(ms, atomCompare=rdFMCS.AtomCompare.CompareIsotopes)
|
|
self.assertEqual(mcs.numBonds, 2)
|
|
self.assertEqual(mcs.numAtoms, 3)
|
|
qm = Chem.MolFromSmarts(mcs.smartsString)
|
|
|
|
self.assertTrue(Chem.MolFromSmiles('CC[14CH3]').HasSubstructMatch(qm))
|
|
self.assertFalse(Chem.MolFromSmiles('CC[13CH3]').HasSubstructMatch(qm))
|
|
self.assertTrue(Chem.MolFromSmiles('OO[14CH3]').HasSubstructMatch(qm))
|
|
self.assertFalse(Chem.MolFromSmiles('O[13CH2][14CH3]').HasSubstructMatch(qm))
|
|
|
|
def test4RingMatches(self):
|
|
smis = ['CCCCC', 'CCC1CCCCC1']
|
|
ms = [Chem.MolFromSmiles(x) for x in smis]
|
|
mcs = rdFMCS.FindMCS(ms)
|
|
self.assertEqual(mcs.numBonds, 4)
|
|
self.assertEqual(mcs.numAtoms, 5)
|
|
self.assertEqual(mcs.smartsString, '[#6]-[#6]-[#6]-[#6]-[#6]')
|
|
|
|
mcs = rdFMCS.FindMCS(ms, completeRingsOnly=True)
|
|
self.assertEqual(mcs.numBonds, 2)
|
|
self.assertEqual(mcs.numAtoms, 3)
|
|
self.assertEqual(mcs.smartsString, '[#6]-[#6]-[#6]')
|
|
|
|
mcs = rdFMCS.FindMCS(ms, ringMatchesRingOnly=True)
|
|
self.assertEqual(mcs.numBonds, 1)
|
|
self.assertEqual(mcs.numAtoms, 2)
|
|
self.assertEqual(mcs.smartsString, '[#6]-[#6]')
|
|
|
|
smis = ['CC1CCC1', 'CCC1CCCCC1']
|
|
ms = [Chem.MolFromSmiles(x) for x in smis]
|
|
mcs = rdFMCS.FindMCS(ms)
|
|
self.assertEqual(mcs.numBonds, 4)
|
|
self.assertEqual(mcs.numAtoms, 5)
|
|
self.assertEqual(mcs.smartsString, '[#6]-[#6](-[#6]-[#6])-[#6]')
|
|
|
|
mcs = rdFMCS.FindMCS(ms, completeRingsOnly=True)
|
|
self.assertEqual(mcs.numBonds, 0)
|
|
self.assertEqual(mcs.numAtoms, 0)
|
|
self.assertEqual(mcs.smartsString, '')
|
|
|
|
mcs = rdFMCS.FindMCS(ms, ringMatchesRingOnly=True)
|
|
self.assertEqual(mcs.numBonds, 4)
|
|
self.assertEqual(mcs.numAtoms, 5)
|
|
self.assertEqual(mcs.smartsString, '[#6]-[#6](-[#6]-[#6])-[#6]')
|
|
|
|
def test5AnyMatch(self):
|
|
smis = ('c1ccccc1C', 'c1ccccc1O', 'c1ccccc1Cl')
|
|
ms = [Chem.MolFromSmiles(x) for x in smis]
|
|
mcs = rdFMCS.FindMCS(ms, atomCompare=rdFMCS.AtomCompare.CompareAny)
|
|
self.assertEqual(mcs.numBonds, 7)
|
|
self.assertEqual(mcs.numAtoms, 7)
|
|
qm = Chem.MolFromSmarts(mcs.smartsString)
|
|
|
|
for m in ms:
|
|
self.assertTrue(m.HasSubstructMatch(qm))
|
|
|
|
smis = ('c1cccnc1C', 'c1cnncc1O', 'c1cccnc1Cl')
|
|
ms = [Chem.MolFromSmiles(x) for x in smis]
|
|
mcs = rdFMCS.FindMCS(ms, atomCompare=rdFMCS.AtomCompare.CompareAny)
|
|
self.assertEqual(mcs.numBonds, 7)
|
|
self.assertEqual(mcs.numAtoms, 7)
|
|
qm = Chem.MolFromSmarts(mcs.smartsString)
|
|
|
|
for m in ms:
|
|
self.assertTrue(m.HasSubstructMatch(qm))
|
|
|
|
def test6MatchValences(self):
|
|
ms = (Chem.MolFromSmiles('NC1OC1'), Chem.MolFromSmiles('C1OC1[N+](=O)[O-]'))
|
|
mcs = rdFMCS.FindMCS(ms)
|
|
self.assertEqual(mcs.numBonds, 4)
|
|
self.assertEqual(mcs.numAtoms, 4)
|
|
mcs = rdFMCS.FindMCS(ms, matchValences=True)
|
|
self.assertEqual(mcs.numBonds, 3)
|
|
self.assertEqual(mcs.numAtoms, 3)
|
|
|
|
def test7Seed(self):
|
|
smis = ['C1CCC1CC1CC1', 'C1CCC1OC1CC1', 'C1CCC1NC1CC1', 'C1CCC1SC1CC1']
|
|
ms = [Chem.MolFromSmiles(x) for x in smis]
|
|
r = rdFMCS.FindMCS(ms)
|
|
self.assertEqual(r.smartsString, "[#6]1-[#6]-[#6]-[#6]-1")
|
|
r = rdFMCS.FindMCS(ms, seedSmarts='C1CC1')
|
|
self.assertEqual(r.smartsString, "[#6]1-[#6]-[#6]-1")
|
|
r = rdFMCS.FindMCS(ms, seedSmarts='C1OC1')
|
|
self.assertEqual(r.smartsString, "")
|
|
|
|
def test8MatchParams(self):
|
|
smis = ("CCC1NC1", "CCC1N(C)C1", "CCC1OC1")
|
|
ms = [Chem.MolFromSmiles(x) for x in smis]
|
|
|
|
mcs = rdFMCS.FindMCS(ms)
|
|
self.assertEqual(mcs.numAtoms, 4)
|
|
|
|
ps = rdFMCS.MCSParameters()
|
|
ps.BondCompareParameters.CompleteRingsOnly = True
|
|
mcs = rdFMCS.FindMCS(ms, ps)
|
|
self.assertEqual(mcs.numAtoms, 3)
|
|
|
|
ps = rdFMCS.MCSParameters()
|
|
ps.SetAtomTyper(rdFMCS.AtomCompare.CompareAny)
|
|
mcs = rdFMCS.FindMCS(ms, ps)
|
|
self.assertEqual(mcs.numAtoms, 5)
|
|
|
|
def test9MatchCharge(self):
|
|
smis = ("CCNC", "CCN(C)C", "CC[N+](C)C")
|
|
ms = [Chem.MolFromSmiles(x) for x in smis]
|
|
|
|
mcs = rdFMCS.FindMCS(ms)
|
|
self.assertEqual(mcs.numAtoms, 4)
|
|
|
|
ps = rdFMCS.MCSParameters()
|
|
ps.AtomCompareParameters.MatchFormalCharge = True
|
|
mcs = rdFMCS.FindMCS(ms, ps)
|
|
self.assertEqual(mcs.numAtoms, 2)
|
|
|
|
def test10MatchChargeAndParams(self):
|
|
smis = ("CCNC", "CCN(C)C", "CC[N+](C)C", "CC[C+](C)C")
|
|
ms = [Chem.MolFromSmiles(x) for x in smis]
|
|
|
|
mcs = rdFMCS.FindMCS(ms)
|
|
self.assertEqual(mcs.numAtoms, 2)
|
|
|
|
ps = rdFMCS.MCSParameters()
|
|
ps.SetAtomTyper(rdFMCS.AtomCompare.CompareAny)
|
|
mcs = rdFMCS.FindMCS(ms, ps)
|
|
self.assertEqual(mcs.numAtoms, 4)
|
|
|
|
ps.AtomCompareParameters.MatchFormalCharge = True
|
|
mcs = rdFMCS.FindMCS(ms, ps)
|
|
self.assertEqual(mcs.numAtoms, 2)
|
|
|
|
def test11Github2034(self):
|
|
smis = ("C1CC1N2CC2", "C1CC1N")
|
|
ms = [Chem.MolFromSmiles(x) for x in smis]
|
|
|
|
mcs = rdFMCS.FindMCS(ms)
|
|
self.assertEqual(mcs.numAtoms, 4)
|
|
self.assertEqual(mcs.numBonds, 4)
|
|
|
|
mcs = rdFMCS.FindMCS(ms, ringMatchesRingOnly=True)
|
|
self.assertEqual(mcs.numAtoms, 3)
|
|
self.assertEqual(mcs.numBonds, 3)
|
|
|
|
ps = rdFMCS.MCSParameters()
|
|
ps.AtomCompareParameters.RingMatchesRingOnly = True
|
|
mcs = rdFMCS.FindMCS(ms, ps)
|
|
self.assertEqual(mcs.numAtoms, 3)
|
|
self.assertEqual(mcs.numBonds, 3)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
unittest.main()
|