mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-04 21:54:27 +08:00
* make sure that we can build without boost iostreams or seralization adds some "private" variables on the python side to check for these compilation flags * get out minimal cmake version correct * get minimallib js building installs an up-to-date cmake also updates the version of boost being used for the minimallib adds extra argument to allow the repo to be specified
740 lines
30 KiB
Python
740 lines
30 KiB
Python
# Copyright (C) 2017-2021 Novartis Institute of BioMedical Research
|
|
# and other RDKit contributors
|
|
#
|
|
# All Rights Reserved
|
|
#
|
|
# Redistribution and use in source and binary forms, with or without
|
|
# modification, are permitted provided that the following conditions are
|
|
# met:
|
|
#
|
|
# * Redistributions of source code must retain the above copyright
|
|
# notice, this list of conditions and the following disclaimer.
|
|
# * Redistributions in binary form must reproduce the above
|
|
# copyright notice, this list of conditions and the following
|
|
# disclaimer in the documentation and/or other materials provided
|
|
# with the distribution.
|
|
# * Neither the name of Novartis Institutes for BioMedical Research Inc.
|
|
# nor the names of its contributors may be used to endorse or promote
|
|
# products derived from this software without specific prior written permission.
|
|
#
|
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
#
|
|
""" This is a rough coverage test of the python wrapper for the SubstructLibrary
|
|
|
|
it is intended to be shallow but broad.
|
|
"""
|
|
|
|
import doctest
|
|
import logging
|
|
import os
|
|
import sys
|
|
import unittest
|
|
|
|
from rdkit import RDConfig, RDLogger, rdBase
|
|
from rdkit.RDLogger import logger
|
|
|
|
logger = logger()
|
|
import pickle
|
|
import tempfile
|
|
import time
|
|
|
|
from rdkit import Chem
|
|
from rdkit.Chem import rdSubstructLibrary, rdGeneralizedSubstruct, rdTautomerQuery
|
|
|
|
|
|
def load_tests(loader, tests, ignore):
|
|
tests.addTests(doctest.DocTestSuite(rdSubstructLibrary))
|
|
return tests
|
|
|
|
|
|
def makeStereoExamples():
|
|
el = "NO"
|
|
mols = []
|
|
for e in el:
|
|
for e2 in el:
|
|
if e != e2:
|
|
smi = "C1CCO[C@@](%s)(%s)1" % (e, e2)
|
|
m = Chem.MolFromSmiles(smi)
|
|
if m:
|
|
mols.append(m)
|
|
smi = "C1CCO[C@](%s)(%s)1" % (e, e2)
|
|
m = Chem.MolFromSmiles(smi)
|
|
if m:
|
|
mols.append(m)
|
|
|
|
return mols
|
|
|
|
|
|
class TestCase(unittest.TestCase):
|
|
|
|
def setUp(self):
|
|
pass
|
|
|
|
def test0SubstructLibrary(self):
|
|
for keyholderCls in [None, rdSubstructLibrary.KeyFromPropHolder]:
|
|
for fpholderCls in [None, rdSubstructLibrary.PatternHolder]:
|
|
for holder in [
|
|
rdSubstructLibrary.MolHolder(),
|
|
rdSubstructLibrary.CachedMolHolder(),
|
|
rdSubstructLibrary.CachedSmilesMolHolder()
|
|
]:
|
|
if fpholderCls:
|
|
fpholder = fpholderCls()
|
|
else:
|
|
fpholder = None
|
|
if keyholderCls:
|
|
keyholder = keyholderCls()
|
|
self.assertEqual(keyholder.GetPropName(), "_Name")
|
|
else:
|
|
keyholder = None
|
|
slib_ = rdSubstructLibrary.SubstructLibrary(holder, fpholder, keyholder)
|
|
for i in range(100):
|
|
m = Chem.MolFromSmiles("c1ccccc1")
|
|
m.SetProp("_Name", str(i))
|
|
self.assertEqual(slib_.AddMol(m), i)
|
|
|
|
libs = [slib_]
|
|
if rdSubstructLibrary.SubstructLibraryCanSerialize():
|
|
serialized1 = pickle.loads(pickle.dumps(slib_))
|
|
serialized2 = rdSubstructLibrary.SubstructLibrary(slib_.Serialize())
|
|
libs.append(serialized1)
|
|
libs.append(serialized2)
|
|
|
|
for slib in libs:
|
|
res = slib.GetMatches(m)
|
|
|
|
if keyholderCls:
|
|
for idx in res:
|
|
self.assertEqual(str(idx), slib.GetKeyHolder().GetKey(idx))
|
|
self.assertEqual([str(idx) for idx in res], list(slib.GetKeyHolder().GetKeys(res)))
|
|
|
|
t2 = time.time()
|
|
self.assertTrue(len(res) == 100)
|
|
|
|
res = slib.GetMatches(m)
|
|
|
|
self.assertEqual(len(res), 100)
|
|
self.assertTrue(set(res) == set(list(range(100))))
|
|
|
|
res = slib.GetMatches(m, maxResults=100)
|
|
self.assertEqual(len(res), 100)
|
|
self.assertEqual(len(slib.GetMatches(m, startIdx=0, endIdx=100)), 100)
|
|
|
|
self.assertTrue(slib.HasMatch(m))
|
|
self.assertEqual(slib.CountMatches(m), 100)
|
|
|
|
def test1SubstructLibrary(self):
|
|
for keyholderCls in [None, rdSubstructLibrary.KeyFromPropHolder]:
|
|
for fpholderCls in [None, rdSubstructLibrary.PatternHolder]:
|
|
for holder in [
|
|
rdSubstructLibrary.MolHolder(),
|
|
rdSubstructLibrary.CachedMolHolder(),
|
|
rdSubstructLibrary.CachedSmilesMolHolder()
|
|
]:
|
|
if fpholderCls:
|
|
fpholder = fpholderCls()
|
|
else:
|
|
fpholder = None
|
|
if keyholderCls:
|
|
keyholder = keyholderCls()
|
|
self.assertEqual(keyholder.GetPropName(), "_Name")
|
|
else:
|
|
keyholder = None
|
|
|
|
slib_ = rdSubstructLibrary.SubstructLibrary(holder, fpholder, keyholder)
|
|
mols = []
|
|
for i in range(100):
|
|
m = Chem.MolFromSmiles("c1ccccc1")
|
|
m.SetProp("_Name", str(i * 2))
|
|
self.assertEqual(slib_.AddMol(m), i * 2)
|
|
mols.append(m)
|
|
m2 = Chem.MolFromSmiles("CCCC")
|
|
m2.SetProp("_Name", str(i * 2 + 1))
|
|
self.assertEqual(slib_.AddMol(m2), i * 2 + 1)
|
|
mols.append(m2)
|
|
|
|
libs = [slib_]
|
|
if rdSubstructLibrary.SubstructLibraryCanSerialize():
|
|
serialized1 = pickle.loads(pickle.dumps(slib_))
|
|
serialized2 = rdSubstructLibrary.SubstructLibrary(slib_.Serialize())
|
|
libs.append(serialized1)
|
|
libs.append(serialized2)
|
|
|
|
for slib in libs:
|
|
res = slib.GetMatches(m)
|
|
self.assertEqual(len(res), 100)
|
|
self.assertEqual(set(res), set(list(range(0, 200, 2))))
|
|
if keyholderCls:
|
|
self.assertEqual([str(idx) for idx in res], [str(idx) for idx in range(0, 200, 2)])
|
|
|
|
res = slib.GetMatches(m2)
|
|
self.assertEqual(len(res), 100)
|
|
self.assertTrue(set(res) == set(list(range(1, 200, 2))))
|
|
if keyholderCls:
|
|
self.assertEqual([str(idx) for idx in res], [str(idx) for idx in range(1, 200, 2)])
|
|
|
|
res = slib.GetMatches(m)
|
|
self.assertEqual(len(res), 100)
|
|
|
|
res = slib.GetMatches(m, maxResults=100)
|
|
self.assertEqual(len(res), 100)
|
|
|
|
self.assertEqual(len(slib.GetMatches(m, startIdx=0, endIdx=50 * 2)), 50)
|
|
self.assertEqual(len(slib.GetMatches(m2, startIdx=1, endIdx=50 * 2 + 1)), 50)
|
|
|
|
self.assertTrue(slib.HasMatch(m))
|
|
self.assertTrue(slib.HasMatch(m2))
|
|
self.assertEqual(slib.CountMatches(m), 100)
|
|
self.assertEqual(slib.CountMatches(m2), 100)
|
|
|
|
def testOptions(self):
|
|
mols = makeStereoExamples() * 10
|
|
|
|
for holderCls in [
|
|
rdSubstructLibrary.MolHolder,
|
|
rdSubstructLibrary.CachedMolHolder,
|
|
rdSubstructLibrary.CachedSmilesMolHolder,
|
|
rdSubstructLibrary.CachedTrustedSmilesMolHolder,
|
|
]:
|
|
holder = holderCls()
|
|
slib_ = rdSubstructLibrary.SubstructLibrary(holder, None)
|
|
|
|
for mol in mols:
|
|
slib_.AddMol(mol)
|
|
|
|
libs = [slib_]
|
|
if rdSubstructLibrary.SubstructLibraryCanSerialize():
|
|
serialized1 = pickle.loads(pickle.dumps(slib_))
|
|
serialized2 = rdSubstructLibrary.SubstructLibrary(slib_.Serialize())
|
|
libs.append(serialized1)
|
|
libs.append(serialized2)
|
|
|
|
for slib in libs:
|
|
core = Chem.MolFromSmarts("C-1-C-C-O-C(-*)(-*)1")
|
|
res = slib.GetMatches(core)
|
|
self.assertEqual(len(res),
|
|
len([x for x in mols if x.HasSubstructMatch(core, useChirality=True)]))
|
|
|
|
core = Chem.MolFromSmarts("C-1-C-C-O-C(-[O])(-[N])1")
|
|
core.SetProp("core", "core")
|
|
res = slib.GetMatches(core, useChirality=False)
|
|
self.assertEqual(len(res),
|
|
len([x for x in mols if x.HasSubstructMatch(core, useChirality=False)]))
|
|
|
|
core = Chem.MolFromSmarts("C-1-C-C-O-[C@@](-[O])(-[N])1")
|
|
res = slib.GetMatches(core, useChirality=False)
|
|
self.assertEqual(len(res),
|
|
len([x for x in mols if x.HasSubstructMatch(core, useChirality=False)]))
|
|
|
|
core = Chem.MolFromSmarts("C-1-C-C-O-[C@@](-[O])(-[N])1")
|
|
res = slib.GetMatches(core)
|
|
self.assertEqual(len(res),
|
|
len([x for x in mols if x.HasSubstructMatch(core, useChirality=True)]))
|
|
|
|
def testSmilesCache(self):
|
|
mols = makeStereoExamples() * 10
|
|
holder = rdSubstructLibrary.CachedSmilesMolHolder()
|
|
|
|
slib_ = rdSubstructLibrary.SubstructLibrary(holder, None)
|
|
|
|
for mol in mols:
|
|
holder.AddSmiles(Chem.MolToSmiles(mol, isomericSmiles=True))
|
|
|
|
libs = [slib_]
|
|
if rdSubstructLibrary.SubstructLibraryCanSerialize():
|
|
serialized1 = pickle.loads(pickle.dumps(slib_))
|
|
serialized2 = rdSubstructLibrary.SubstructLibrary(slib_.Serialize())
|
|
libs.append(serialized1)
|
|
libs.append(serialized2)
|
|
|
|
for slib in libs:
|
|
core = Chem.MolFromSmarts("C-1-C-C-O-C(-*)(-*)1")
|
|
res = slib.GetMatches(core)
|
|
self.assertEqual(len(res),
|
|
len([x for x in mols if x.HasSubstructMatch(core, useChirality=True)]))
|
|
|
|
core = Chem.MolFromSmarts("C-1-C-C-O-C(-[O])(-[N])1")
|
|
core.SetProp("core", "core")
|
|
res = slib.GetMatches(core, useChirality=False)
|
|
self.assertEqual(len(res),
|
|
len([x for x in mols if x.HasSubstructMatch(core, useChirality=False)]))
|
|
|
|
core = Chem.MolFromSmarts("C-1-C-C-O-[C@@](-[O])(-[N])1")
|
|
res = slib.GetMatches(core, useChirality=False)
|
|
self.assertEqual(len(res),
|
|
len([x for x in mols if x.HasSubstructMatch(core, useChirality=False)]))
|
|
|
|
core = Chem.MolFromSmarts("C-1-C-C-O-[C@@](-[O])(-[N])1")
|
|
res = slib.GetMatches(core)
|
|
self.assertEqual(len(res),
|
|
len([x for x in mols if x.HasSubstructMatch(core, useChirality=True)]))
|
|
|
|
def testTrustedSmilesCache(self):
|
|
mols = makeStereoExamples() * 10
|
|
holder = rdSubstructLibrary.CachedTrustedSmilesMolHolder()
|
|
|
|
slib_ = rdSubstructLibrary.SubstructLibrary(holder, None)
|
|
|
|
for mol in mols:
|
|
holder.AddSmiles(Chem.MolToSmiles(mol, isomericSmiles=True))
|
|
|
|
libs = [slib_]
|
|
if rdSubstructLibrary.SubstructLibraryCanSerialize():
|
|
serialized1 = pickle.loads(pickle.dumps(slib_))
|
|
serialized2 = rdSubstructLibrary.SubstructLibrary(slib_.Serialize())
|
|
libs.append(serialized1)
|
|
libs.append(serialized2)
|
|
|
|
for slib in libs:
|
|
core = Chem.MolFromSmarts("C-1-C-C-O-C(-*)(-*)1")
|
|
res = slib.GetMatches(core)
|
|
self.assertEqual(len(res),
|
|
len([x for x in mols if x.HasSubstructMatch(core, useChirality=True)]))
|
|
|
|
core = Chem.MolFromSmarts("C-1-C-C-O-C(-[O])(-[N])1")
|
|
core.SetProp("core", "core")
|
|
res = slib.GetMatches(core, useChirality=False)
|
|
self.assertEqual(len(res),
|
|
len([x for x in mols if x.HasSubstructMatch(core, useChirality=False)]))
|
|
|
|
core = Chem.MolFromSmarts("C-1-C-C-O-[C@@](-[O])(-[N])1")
|
|
res = slib.GetMatches(core, useChirality=False)
|
|
self.assertEqual(len(res),
|
|
len([x for x in mols if x.HasSubstructMatch(core, useChirality=False)]))
|
|
|
|
core = Chem.MolFromSmarts("C-1-C-C-O-[C@@](-[O])(-[N])1")
|
|
res = slib.GetMatches(core)
|
|
self.assertEqual(len(res),
|
|
len([x for x in mols if x.HasSubstructMatch(core, useChirality=True)]))
|
|
|
|
def testBinaryCache(self):
|
|
mols = makeStereoExamples() * 10
|
|
holder = rdSubstructLibrary.CachedMolHolder()
|
|
|
|
slib_ = rdSubstructLibrary.SubstructLibrary(holder, None)
|
|
|
|
for mol in mols:
|
|
holder.AddBinary(mol.ToBinary())
|
|
|
|
libs = [slib_]
|
|
if rdSubstructLibrary.SubstructLibraryCanSerialize():
|
|
serialized1 = pickle.loads(pickle.dumps(slib_))
|
|
serialized2 = rdSubstructLibrary.SubstructLibrary(slib_.Serialize())
|
|
libs.append(serialized1)
|
|
libs.append(serialized2)
|
|
|
|
for slib in libs:
|
|
core = Chem.MolFromSmarts("C-1-C-C-O-C(-*)(-*)1")
|
|
res = slib.GetMatches(core)
|
|
self.assertEqual(len(res),
|
|
len([x for x in mols if x.HasSubstructMatch(core, useChirality=True)]))
|
|
|
|
core = Chem.MolFromSmarts("C-1-C-C-O-C(-[O])(-[N])1")
|
|
core.SetProp("core", "core")
|
|
res = slib.GetMatches(core, useChirality=False)
|
|
self.assertEqual(len(res),
|
|
len([x for x in mols if x.HasSubstructMatch(core, useChirality=False)]))
|
|
|
|
core = Chem.MolFromSmarts("C-1-C-C-O-[C@@](-[O])(-[N])1")
|
|
res = slib.GetMatches(core, useChirality=False)
|
|
self.assertEqual(len(res),
|
|
len([x for x in mols if x.HasSubstructMatch(core, useChirality=False)]))
|
|
|
|
core = Chem.MolFromSmarts("C-1-C-C-O-[C@@](-[O])(-[N])1")
|
|
res = slib.GetMatches(core)
|
|
self.assertEqual(len(res),
|
|
len([x for x in mols if x.HasSubstructMatch(core, useChirality=True)]))
|
|
|
|
def testRingSmartsWithTrustedSmiles(self):
|
|
pat = Chem.MolFromSmarts("[C&R1]")
|
|
pat2 = Chem.MolFromSmarts("C@C") # ring bond
|
|
holder = rdSubstructLibrary.CachedTrustedSmilesMolHolder()
|
|
lib = rdSubstructLibrary.SubstructLibrary(holder)
|
|
lib.AddMol(Chem.MolFromSmiles("C1CC1"))
|
|
|
|
# make sure we can get an unsanitized molecule that fails (no ring info)
|
|
print("Testing atom rings")
|
|
with self.assertRaises(RuntimeError):
|
|
holder.GetMol(0).HasSubstructMatch(pat)
|
|
print("testing bond rings")
|
|
with self.assertRaises(RuntimeError):
|
|
holder.GetMol(0).HasSubstructMatch(pat2)
|
|
|
|
# shouldn't throw
|
|
print("searching atom rings")
|
|
self.assertEqual(len(lib.GetMatches(pat)), 1)
|
|
self.assertEqual(lib.CountMatches(pat), 1)
|
|
print("searching bond rings")
|
|
self.assertEqual(len(lib.GetMatches(pat2)), 1)
|
|
self.assertEqual(lib.CountMatches(pat2), 1)
|
|
print("done")
|
|
|
|
@unittest.skipIf(not rdBase._serializationEnabled, "not built with serialization support")
|
|
def test_init_from_and_to_stream(self):
|
|
mols = makeStereoExamples() * 10
|
|
holder = rdSubstructLibrary.CachedSmilesMolHolder()
|
|
|
|
# one day I'll fix this, but we need to write text but read binary
|
|
# grrr.... something about the python_streambuf handler.
|
|
slib = rdSubstructLibrary.SubstructLibrary(holder, None)
|
|
|
|
for mol in mols:
|
|
holder.AddSmiles(Chem.MolToSmiles(mol, isomericSmiles=True))
|
|
|
|
if rdSubstructLibrary.SubstructLibraryCanSerialize():
|
|
fd, path = tempfile.mkstemp()
|
|
with open(path, 'w') as file:
|
|
slib.ToStream(file)
|
|
|
|
with open(path, 'rb') as file:
|
|
slib2 = rdSubstructLibrary.SubstructLibrary()
|
|
slib2.InitFromStream(file)
|
|
self.assertEqual(len(slib), len(slib2))
|
|
|
|
from io import BytesIO, StringIO
|
|
s = StringIO()
|
|
slib.ToStream(s)
|
|
|
|
sb = BytesIO(s.getvalue().encode("ascii"))
|
|
self.assertTrue(len(sb.getvalue()) > 0)
|
|
slib3 = rdSubstructLibrary.SubstructLibrary()
|
|
slib3.InitFromStream(sb)
|
|
self.assertEqual(len(slib), len(slib2))
|
|
|
|
def test_addpatterns(self):
|
|
pdb_ligands = [
|
|
"CCS(=O)(=O)c1ccc(OC)c(Nc2ncc(-c3cccc(-c4ccccn4)c3)o2)c1",
|
|
"COc1ccc(S(=O)(=O)NCC2CC2)cc1Nc1ncc(-c2cccc(-c3cccnc3)c2)o1",
|
|
"COc1ccc(-c2oc3ncnc(N)c3c2-c2ccc(NC(=O)Nc3cc(C(F)(F)F)ccc3F)cc2)cc1",
|
|
"COC(=O)Nc1nc2ccc(Oc3ccc(NC(=O)Nc4cc(C(F)(F)F)ccc4F)cc3)cc2[nH]1",
|
|
"COc1cc(Nc2ncnc(-c3cccnc3Nc3ccccc3)n2)cc(OC)c1OC",
|
|
"O=C(Nc1ccc(Oc2ccccc2)cc1)c1cccnc1NCc1ccncc1", "O=C(Nc1ccc(Oc2ccccc2)cc1)c1cccnc1NCc1ccncc1",
|
|
"CNC(=O)c1cc(Oc2ccc3[nH]c(Nc4ccc(Cl)c(C(F)(F)F)c4)nc3c2)ccn1",
|
|
"CNC(=O)c1cc(Oc2ccc3oc(Nc4ccc(Cl)c(OCC5CCC[NH+]5C)c4)nc3c2)ccn1",
|
|
"CNC(=O)c1cc(Oc2ccc3oc(Nc4ccc(Cl)c(OCC5CCC[NH+]5C)c4)nc3c2)ccn1",
|
|
"COc1cc2nccc(Oc3ccc4c(c3)OCCN4C(=O)Nc3ccc(Cl)cc3)c2cc1OC",
|
|
"CNC(=O)c1c(C)oc2cc(Oc3cc[nH+]c4cc(OCCN5CCOCC5)ccc34)ccc12",
|
|
"COc1cc2[nH+]ccc(Oc3ccc4c(C(=O)Nc5ccc(Cl)cc5)cccc4c3)c2cc1OC",
|
|
"COc1cc2[nH+]ccc(Oc3ccc4c(C(=O)Nc5ccc(Cl)cc5)cccc4c3)c2cc1OC",
|
|
"COc1cc2[nH+]ccc(Oc3ccc4c(C(=O)NC5CC5)cccc4c3)c2cc1OC",
|
|
"COc1cc2[nH+]ccc(Oc3ccc4c(C(=O)NC5CC5)cccc4c3)c2cc1OC",
|
|
"Cc1ccc(C(=O)Nc2cc(CCC[NH+](C)C)cc(C(F)(F)F)c2)cc1Nc1ncccc1-c1ccncn1",
|
|
"COc1cc(Nc2nccc(Nc3ccc4c(C)n[nH]c4c3)n2)cc(OC)c1OC",
|
|
"COc1cc(Nc2nccc(N(C)c3ccc4c(C)n[nH]c4c3)n2)cc(OC)c1OC",
|
|
"Cc1ccn(-c2ccc3c(c2)NCC3(C)C)c(=O)c1-c1ccc2nc(N)ncc2c1",
|
|
"Cc1ccn(-c2ccc3c(c2)NCC3(C)C)c(=O)c1-c1ccc2nc(N)ncc2c1",
|
|
"Cc1ccc(C(=O)NCCC2CCCC2)cc1C(=O)Nc1ccc(N)nc1", "Cc1ccc(C(=O)NCCC2CCCC2)cc1C(=O)Nc1ccc(N)nc1",
|
|
"Cc1ccn(-c2cccc(C(F)(F)F)c2)c(=O)c1-c1ccc2nc(N)ncc2c1",
|
|
"Cc1ccn(-c2cccc(C(F)(F)F)c2)c(=O)c1-c1ccc2nc(N)ncc2c1",
|
|
"O=C(Nc1cncnc1)c1c(Cl)ccc2c(Nc3cccc(C(F)(F)F)c3)noc12",
|
|
"O=C(Nc1cncnc1)c1c(Cl)ccc2c(Nc3cccc(C(F)(F)F)c3)noc12",
|
|
"CC1(C)CNc2cc(NC(=O)c3cccnc3NCc3ccncc3)ccc21", "CC1(C)CNc2cc(NC(=O)c3cccnc3NCc3ccncc3)ccc21"
|
|
]
|
|
|
|
for patterns in [
|
|
rdSubstructLibrary.PatternHolder(),
|
|
rdSubstructLibrary.TautomerPatternHolder()
|
|
]:
|
|
mols = [Chem.MolFromSmiles(smi) for smi in pdb_ligands]
|
|
holder = rdSubstructLibrary.CachedMolHolder()
|
|
slib_with_patterns = rdSubstructLibrary.SubstructLibrary(holder, patterns)
|
|
|
|
for mol in mols:
|
|
slib_with_patterns.AddMol(mol)
|
|
|
|
for nthreads in [1, 2, 0]:
|
|
slib_without_patterns = rdSubstructLibrary.SubstructLibrary(holder, None)
|
|
rdSubstructLibrary.AddPatterns(slib_without_patterns, nthreads)
|
|
# check for seg fault
|
|
# were the fingerprints really created
|
|
slib_without_patterns.GetFpHolder().GetFingerprint(0)
|
|
for mol in mols:
|
|
l1 = slib_with_patterns.CountMatches(mol)
|
|
l2 = slib_without_patterns.CountMatches(mol)
|
|
self.assertTrue(l1)
|
|
self.assertEqual(l1, l2)
|
|
|
|
def test_basic_addpatterns(self):
|
|
# add mols
|
|
pdb_ligands = [
|
|
"CCS(=O)(=O)c1ccc(OC)c(Nc2ncc(-c3cccc(-c4ccccn4)c3)o2)c1",
|
|
"COc1ccc(S(=O)(=O)NCC2CC2)cc1Nc1ncc(-c2cccc(-c3cccnc3)c2)o1",
|
|
"COc1ccc(-c2oc3ncnc(N)c3c2-c2ccc(NC(=O)Nc3cc(C(F)(F)F)ccc3F)cc2)cc1",
|
|
"COC(=O)Nc1nc2ccc(Oc3ccc(NC(=O)Nc4cc(C(F)(F)F)ccc4F)cc3)cc2[nH]1",
|
|
"COc1cc(Nc2ncnc(-c3cccnc3Nc3ccccc3)n2)cc(OC)c1OC",
|
|
"O=C(Nc1ccc(Oc2ccccc2)cc1)c1cccnc1NCc1ccncc1", "O=C(Nc1ccc(Oc2ccccc2)cc1)c1cccnc1NCc1ccncc1",
|
|
"CNC(=O)c1cc(Oc2ccc3[nH]c(Nc4ccc(Cl)c(C(F)(F)F)c4)nc3c2)ccn1",
|
|
"CNC(=O)c1cc(Oc2ccc3oc(Nc4ccc(Cl)c(OCC5CCC[NH+]5C)c4)nc3c2)ccn1",
|
|
"CNC(=O)c1cc(Oc2ccc3oc(Nc4ccc(Cl)c(OCC5CCC[NH+]5C)c4)nc3c2)ccn1",
|
|
"COc1cc2nccc(Oc3ccc4c(c3)OCCN4C(=O)Nc3ccc(Cl)cc3)c2cc1OC",
|
|
"CNC(=O)c1c(C)oc2cc(Oc3cc[nH+]c4cc(OCCN5CCOCC5)ccc34)ccc12",
|
|
"COc1cc2[nH+]ccc(Oc3ccc4c(C(=O)Nc5ccc(Cl)cc5)cccc4c3)c2cc1OC",
|
|
"COc1cc2[nH+]ccc(Oc3ccc4c(C(=O)Nc5ccc(Cl)cc5)cccc4c3)c2cc1OC",
|
|
"COc1cc2[nH+]ccc(Oc3ccc4c(C(=O)NC5CC5)cccc4c3)c2cc1OC",
|
|
"COc1cc2[nH+]ccc(Oc3ccc4c(C(=O)NC5CC5)cccc4c3)c2cc1OC",
|
|
"Cc1ccc(C(=O)Nc2cc(CCC[NH+](C)C)cc(C(F)(F)F)c2)cc1Nc1ncccc1-c1ccncn1",
|
|
"COc1cc(Nc2nccc(Nc3ccc4c(C)n[nH]c4c3)n2)cc(OC)c1OC",
|
|
"COc1cc(Nc2nccc(N(C)c3ccc4c(C)n[nH]c4c3)n2)cc(OC)c1OC",
|
|
"Cc1ccn(-c2ccc3c(c2)NCC3(C)C)c(=O)c1-c1ccc2nc(N)ncc2c1",
|
|
"Cc1ccn(-c2ccc3c(c2)NCC3(C)C)c(=O)c1-c1ccc2nc(N)ncc2c1",
|
|
"Cc1ccc(C(=O)NCCC2CCCC2)cc1C(=O)Nc1ccc(N)nc1", "Cc1ccc(C(=O)NCCC2CCCC2)cc1C(=O)Nc1ccc(N)nc1",
|
|
"Cc1ccn(-c2cccc(C(F)(F)F)c2)c(=O)c1-c1ccc2nc(N)ncc2c1",
|
|
"Cc1ccn(-c2cccc(C(F)(F)F)c2)c(=O)c1-c1ccc2nc(N)ncc2c1",
|
|
"O=C(Nc1cncnc1)c1c(Cl)ccc2c(Nc3cccc(C(F)(F)F)c3)noc12",
|
|
"O=C(Nc1cncnc1)c1c(Cl)ccc2c(Nc3cccc(C(F)(F)F)c3)noc12",
|
|
"CC1(C)CNc2cc(NC(=O)c3cccnc3NCc3ccncc3)ccc21", "CC1(C)CNc2cc(NC(=O)c3cccnc3NCc3ccncc3)ccc21"
|
|
]
|
|
|
|
for holder in [
|
|
rdSubstructLibrary.CachedSmilesMolHolder(),
|
|
rdSubstructLibrary.CachedTrustedSmilesMolHolder()
|
|
]:
|
|
for smi in pdb_ligands:
|
|
holder.AddSmiles(smi)
|
|
|
|
for patttern in [
|
|
None,
|
|
rdSubstructLibrary.PatternHolder(),
|
|
rdSubstructLibrary.TautomerPatternHolder()
|
|
]:
|
|
lib = rdSubstructLibrary.SubstructLibrary(holder)
|
|
rdSubstructLibrary.AddPatterns(lib, numThreads=-1)
|
|
self.assertEqual(len(lib.GetMolHolder()), len(lib.GetFpHolder()))
|
|
for smi in pdb_ligands:
|
|
self.assertTrue(lib.CountMatches(Chem.MolFromSmiles(smi)))
|
|
|
|
def test_PatternHolder(self):
|
|
for holder in [rdSubstructLibrary.PatternHolder, rdSubstructLibrary.TautomerPatternHolder]:
|
|
fname = os.path.join(os.environ["RDBASE"], "Data", "NCI", "first_5K.smi")
|
|
suppl = Chem.SmilesMolSupplier(fname, delimiter="\t", titleLine=False)
|
|
mols1 = rdSubstructLibrary.CachedTrustedSmilesMolHolder()
|
|
fps1 = holder(2048)
|
|
ssslib1 = rdSubstructLibrary.SubstructLibrary(mols1, fps1)
|
|
mols2 = rdSubstructLibrary.CachedTrustedSmilesMolHolder()
|
|
fps2 = holder()
|
|
ssslib2 = rdSubstructLibrary.SubstructLibrary(mols2, fps2)
|
|
|
|
RDLogger.DisableLog('rdApp.error')
|
|
for i in range(0, 1000, 10):
|
|
try:
|
|
mol = suppl[i]
|
|
except Exception:
|
|
continue
|
|
if (not mol):
|
|
continue
|
|
mols1.AddSmiles(Chem.MolToSmiles(mol))
|
|
fps1.AddFingerprint(fps1.MakeFingerprint(mol))
|
|
ssslib2.AddMol(mol)
|
|
RDLogger.EnableLog('rdApp.error')
|
|
query = Chem.MolFromSmarts("N")
|
|
self.assertIsNotNone(query)
|
|
matches1 = sorted(ssslib1.GetMatches(query))
|
|
matches2 = sorted(ssslib2.GetMatches(query))
|
|
self.assertEqual(len(matches1), len(matches2))
|
|
self.assertTrue(all([m1 == matches2[i] for i, m1 in enumerate(matches1)]))
|
|
|
|
def testMolBundles(self):
|
|
ssl = rdSubstructLibrary.SubstructLibrary()
|
|
for smi in ('CCOC', 'CCNC', 'COOCOO', 'CCNC', 'CCCC'):
|
|
ssl.AddMol(Chem.MolFromSmiles(smi))
|
|
bndl = Chem.MolBundle()
|
|
for smi in ('COC', 'CCC'):
|
|
bndl.AddMol(Chem.MolFromSmiles(smi))
|
|
self.assertEqual(list(ssl.GetMatches(bndl)), [0, 4])
|
|
bndl.AddMol(Chem.MolFromSmiles('CN'))
|
|
self.assertEqual(list(sorted(ssl.GetMatches(bndl))), [0, 1, 3, 4])
|
|
|
|
def testSubstructParameters(self):
|
|
ssl = rdSubstructLibrary.SubstructLibrary()
|
|
for smi in ('C[C@H](F)Cl', 'C[C@@H](F)Cl', 'CC(F)Cl'):
|
|
ssl.AddMol(Chem.MolFromSmiles(smi))
|
|
bndl = Chem.MolBundle()
|
|
for smi in ('C[C@H](F)Cl', ):
|
|
bndl.AddMol(Chem.MolFromSmiles(smi))
|
|
params = Chem.SubstructMatchParameters()
|
|
self.assertEqual(list(sorted(ssl.GetMatches(bndl, params))), [0, 1, 2])
|
|
|
|
params.useChirality = True
|
|
self.assertEqual(list(sorted(ssl.GetMatches(bndl, params))), [0])
|
|
|
|
def testSearchOrder(self):
|
|
for keyholder in [None, rdSubstructLibrary.KeyFromPropHolder()]:
|
|
ssl = rdSubstructLibrary.SubstructLibrary(rdSubstructLibrary.MolHolder(), keyholder)
|
|
for idx, smi in enumerate(("CCCOC", "CCCCOCC", "CCOC", "COC", "CCCCCOC")):
|
|
m = Chem.MolFromSmiles(smi)
|
|
m.SetProp("_Name", str(idx))
|
|
ssl.AddMol(m)
|
|
|
|
ssl.SetSearchOrder((3, 2, 0, 1, 4))
|
|
self.assertEqual(ssl.GetSearchOrder(), (3, 2, 0, 1, 4))
|
|
qm = Chem.MolFromSmiles('COC')
|
|
self.assertEqual(list(ssl.GetMatches(qm, maxResults=2)), [3, 2])
|
|
self.assertEqual(list(ssl.GetMatches(qm, maxResults=2)), [3, 2])
|
|
if keyholder:
|
|
self.assertEqual(keyholder.GetPropName(), "_Name")
|
|
self.assertEqual(list(ssl.GetKeyHolder().GetKeys(ssl.GetMatches(qm, maxResults=2))),
|
|
['3', '2'])
|
|
|
|
# make sure we can clear the search order:
|
|
ssl.SetSearchOrder(None)
|
|
self.assertEqual(ssl.GetSearchOrder(), ())
|
|
|
|
ssl.SetSearchOrder((3, 2, 0, 1, 4))
|
|
self.assertEqual(ssl.GetSearchOrder(), (3, 2, 0, 1, 4))
|
|
|
|
ssl.SetSearchOrder([])
|
|
self.assertEqual(ssl.GetSearchOrder(), ())
|
|
|
|
def testSearchOrder2(self):
|
|
ssl = rdSubstructLibrary.SubstructLibrary()
|
|
for smi in ("CCCOC", "CCCCOCC", "CCOC", "COC", "CCCCCOC"):
|
|
ssl.AddMol(Chem.MolFromSmiles(smi))
|
|
|
|
def setSearchSmallestFirst(sslib):
|
|
searchOrder = list(range(len(sslib)))
|
|
holder = sslib.GetMolHolder()
|
|
searchOrder.sort(key=lambda x, holder=holder: holder.GetMol(x).GetNumAtoms())
|
|
sslib.SetSearchOrder(searchOrder)
|
|
|
|
setSearchSmallestFirst(ssl)
|
|
qm = Chem.MolFromSmiles('COC')
|
|
self.assertEqual(list(ssl.GetMatches(qm)), [3, 2, 0, 1, 4])
|
|
|
|
def testPropHolder(self):
|
|
for propname in [None, 'foo']:
|
|
if propname is None:
|
|
keyholder = rdSubstructLibrary.KeyFromPropHolder()
|
|
else:
|
|
keyholder = rdSubstructLibrary.KeyFromPropHolder(propname)
|
|
|
|
library = rdSubstructLibrary.SubstructLibrary(rdSubstructLibrary.MolHolder(), keyholder)
|
|
m = Chem.MolFromSmiles('CCC')
|
|
if propname is None:
|
|
self.assertEqual(keyholder.GetPropName(), "_Name")
|
|
else:
|
|
self.assertEqual(keyholder.GetPropName(), propname)
|
|
|
|
if propname:
|
|
m.SetProp(propname, 'Z11234')
|
|
else:
|
|
m.SetProp("_Name", 'Z11234')
|
|
|
|
library.AddMol(m)
|
|
indices = library.GetMatches(m)
|
|
self.assertEqual(['Z11234'], list(library.GetKeyHolder().GetKeys(indices)))
|
|
|
|
def test_bad_smiles(self):
|
|
# add mols
|
|
pdb_ligands = [
|
|
"&CCS(=O)(=O)c1ccc(OC)c(Nc2ncc(-c3cccc(-c4ccccn4)c3)o2)c1",
|
|
"&COc1ccc(S(=O)(=O)NCC2CC2)cc1Nc1ncc(-c2cccc(-c3cccnc3)c2)o1",
|
|
"&COc1ccc(-c2oc3ncnc(N)c3c2-c2ccc(NC(=O)Nc3cc(C(F)(F)F)ccc3F)cc2)cc1",
|
|
"&COC(=O)Nc1nc2ccc(Oc3ccc(NC(=O)Nc4cc(C(F)(F)F)ccc4F)cc3)cc2[nH]1",
|
|
"&COc1cc(Nc2ncnc(-c3cccnc3Nc3ccccc3)n2)cc(OC)c1OC",
|
|
"&O=C(Nc1ccc(Oc2ccccc2)cc1)c1cccnc1NCc1ccncc1",
|
|
"&O=C(Nc1ccc(Oc2ccccc2)cc1)c1cccnc1NCc1ccncc1",
|
|
"&CNC(=O)c1cc(Oc2ccc3[nH]c(Nc4ccc(Cl)c(C(F)(F)F)c4)nc3c2)ccn1",
|
|
"&CNC(=O)c1cc(Oc2ccc3oc(Nc4ccc(Cl)c(OCC5CCC[NH+]5C)c4)nc3c2)ccn1",
|
|
"&CNC(=O)c1cc(Oc2ccc3oc(Nc4ccc(Cl)c(OCC5CCC[NH+]5C)c4)nc3c2)ccn1",
|
|
"&COc1cc2nccc(Oc3ccc4c(c3)OCCN4C(=O)Nc3ccc(Cl)cc3)c2cc1OC",
|
|
"&CNC(=O)c1c(C)oc2cc(Oc3cc[nH+]c4cc(OCCN5CCOCC5)ccc34)ccc12",
|
|
"&COc1cc2[nH+]ccc(Oc3ccc4c(C(=O)Nc5ccc(Cl)cc5)cccc4c3)c2cc1OC",
|
|
"&COc1cc2[nH+]ccc(Oc3ccc4c(C(=O)Nc5ccc(Cl)cc5)cccc4c3)c2cc1OC",
|
|
"&COc1cc2[nH+]ccc(Oc3ccc4c(C(=O)NC5CC5)cccc4c3)c2cc1OC",
|
|
"&COc1cc2[nH+]ccc(Oc3ccc4c(C(=O)NC5CC5)cccc4c3)c2cc1OC",
|
|
"&Cc1ccc(C(=O)Nc2cc(CCC[NH+](C)C)cc(C(F)(F)F)c2)cc1Nc1ncccc1-c1ccncn1",
|
|
"&COc1cc(Nc2nccc(Nc3ccc4c(C)n[nH]c4c3)n2)cc(OC)c1OC",
|
|
"&COc1cc(Nc2nccc(N(C)c3ccc4c(C)n[nH]c4c3)n2)cc(OC)c1OC",
|
|
"&Cc1ccn(-c2ccc3c(c2)NCC3(C)C)c(=O)c1-c1ccc2nc(N)ncc2c1",
|
|
"&Cc1ccn(-c2ccc3c(c2)NCC3(C)C)c(=O)c1-c1ccc2nc(N)ncc2c1",
|
|
"&Cc1ccc(C(=O)NCCC2CCCC2)cc1C(=O)Nc1ccc(N)nc1",
|
|
"&Cc1ccc(C(=O)NCCC2CCCC2)cc1C(=O)Nc1ccc(N)nc1",
|
|
"&Cc1ccn(-c2cccc(C(F)(F)F)c2)c(=O)c1-c1ccc2nc(N)ncc2c1",
|
|
"&Cc1ccn(-c2cccc(C(F)(F)F)c2)c(=O)c1-c1ccc2nc(N)ncc2c1",
|
|
"&O=C(Nc1cncnc1)c1c(Cl)ccc2c(Nc3cccc(C(F)(F)F)c3)noc12",
|
|
"&O=C(Nc1cncnc1)c1c(Cl)ccc2c(Nc3cccc(C(F)(F)F)c3)noc12",
|
|
"&CC1(C)CNc2cc(NC(=O)c3cccnc3NCc3ccncc3)ccc21", "&CC1(C)CNc2cc(NC(=O)c3cccnc3NCc3ccncc3)ccc21"
|
|
]
|
|
# this test is really verbose, so disable the actual output without
|
|
# disabling that logging happens.
|
|
rdBase.LogToPythonLogger()
|
|
pylog = logging.getLogger("rdkit")
|
|
pylog.setLevel(logging.CRITICAL)
|
|
for holder in [
|
|
rdSubstructLibrary.CachedSmilesMolHolder(),
|
|
rdSubstructLibrary.CachedTrustedSmilesMolHolder()
|
|
]:
|
|
for smi in pdb_ligands:
|
|
holder.AddSmiles(smi)
|
|
lib = rdSubstructLibrary.SubstructLibrary(holder)
|
|
# this should excercise the logger
|
|
smi = "CCS(=O)(=O)c1ccc(OC)c(Nc2ncc(-c3cccc(-c4ccccn4)c3)o2)c1"
|
|
self.assertEqual(0, lib.CountMatches(Chem.MolFromSmiles(smi)))
|
|
|
|
# test add patterns
|
|
rdSubstructLibrary.AddPatterns(lib, -1)
|
|
pylog.setLevel(logging.WARN)
|
|
rdBase.LogToCppStreams()
|
|
|
|
@unittest.skipIf(not rdBase._serializationEnabled, "not built with serialization support")
|
|
def test_using_xqms(self):
|
|
smis = ["COCC=O", "COOCC=O", "COOOCC=O", "COOOOCC=O"]
|
|
|
|
for holder in [
|
|
rdSubstructLibrary.CachedSmilesMolHolder(),
|
|
rdSubstructLibrary.CachedTrustedSmilesMolHolder()
|
|
]:
|
|
for smi in smis:
|
|
holder.AddSmiles(smi)
|
|
fph = rdSubstructLibrary.TautomerPatternHolder()
|
|
lib = rdSubstructLibrary.SubstructLibrary(holder)
|
|
|
|
mol = Chem.MolFromSmiles("COCC")
|
|
xqm = rdGeneralizedSubstruct.CreateExtendedQueryMol(mol)
|
|
res = lib.GetMatches(xqm)
|
|
self.assertEqual(list(res), [0])
|
|
self.assertTrue(lib.HasMatch(xqm))
|
|
self.assertEqual(lib.CountMatches(xqm), 1)
|
|
|
|
mol = Chem.MolFromSmiles("COC=CO")
|
|
xqm = rdGeneralizedSubstruct.CreateExtendedQueryMol(mol)
|
|
res = lib.GetMatches(xqm)
|
|
self.assertEqual(list(res), [0])
|
|
self.assertTrue(lib.HasMatch(xqm))
|
|
self.assertEqual(lib.CountMatches(xqm), 1)
|
|
|
|
mol = Chem.MolFromSmiles("COCC |LN:1:1.3|")
|
|
xqm = rdGeneralizedSubstruct.CreateExtendedQueryMol(mol)
|
|
res = lib.GetMatches(xqm)
|
|
self.assertEqual(list(res), [0, 1, 2])
|
|
self.assertTrue(lib.HasMatch(xqm))
|
|
self.assertEqual(lib.CountMatches(xqm), 3)
|
|
|
|
mol = Chem.MolFromSmiles("COC=CO |LN:1:1.3|")
|
|
xqm = rdGeneralizedSubstruct.CreateExtendedQueryMol(mol)
|
|
res = lib.GetMatches(xqm)
|
|
self.assertEqual(list(res), [0, 1, 2])
|
|
self.assertTrue(lib.HasMatch(xqm))
|
|
self.assertEqual(lib.CountMatches(xqm), 3)
|
|
|
|
mol = Chem.MolFromSmiles("CNC=CO |LN:1:1.3|")
|
|
xqm = rdGeneralizedSubstruct.CreateExtendedQueryMol(mol)
|
|
res = lib.GetMatches(xqm)
|
|
self.assertEqual(list(res), [])
|
|
self.assertFalse(lib.HasMatch(xqm))
|
|
self.assertEqual(lib.CountMatches(xqm), 0)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
unittest.main()
|