mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-07 22:44:25 +08:00
241 lines
8.5 KiB
Python
Executable File
241 lines
8.5 KiB
Python
Executable File
# $Id$
|
|
#
|
|
# Copyright (C) 2003-2006 Rational Discovery LLC
|
|
#
|
|
# @@ All Rights Reserved @@
|
|
#
|
|
import RDConfig
|
|
import unittest
|
|
import Chem
|
|
from Chem import FragmentCatalog, BuildFragmentCatalog
|
|
import cPickle
|
|
import os
|
|
|
|
def feq(n1,n2,tol=1e-4):
|
|
return abs(n1-n2)<tol
|
|
|
|
class TestCase(unittest.TestCase):
|
|
def setUp(self) :
|
|
self.smiList = ["S(SC1=NC2=CC=CC=C2S1)C3=NC4=C(S3)C=CC=C4","CC1=CC(=O)C=CC1=O",
|
|
"OC1=C(Cl)C=C(C=C1[N+]([O-])=O)[N+]([O-])=O",
|
|
"[O-][N+](=O)C1=CNC(=N)S1", "NC1=CC2=C(C=C1)C(=O)C3=C(C=CC=C3)C2=O",
|
|
"OC(=O)C1=C(C=CC=C1)C2=C3C=CC(=O)C(=C3OC4=C2C=CC(=C4Br)O)Br",
|
|
"CN(C)C1=C(Cl)C(=O)C2=C(C=CC=C2)C1=O",
|
|
"CC1=C(C2=C(C=C1)C(=O)C3=CC=CC=C3C2=O)[N+]([O-])=O",
|
|
"CC(=NO)C(C)=NO"]
|
|
self.smiList2 = ['OCCC','CCC','C=CC','OC=CC','CC(O)C',
|
|
'C=C(O)C','OCCCC','CC(O)CC','C=CCC','CC=CC',
|
|
'OC=CCC','CC=C(O)C','OCC=CC','C=C(O)CC',
|
|
'C=CC(O)C','C=CCCO',
|
|
]
|
|
self.list2Acts = [1,0,0,1,1,1,1,1,0,0,1,1,1,1,1,1]
|
|
self.list2Obls = [(0,1,2),(1,3),(1,4,5),(1,6,7),(0,8),(0,6,9),(0,1,2,3,10),
|
|
(0,1,2,8,11),(1,3,4,5,12),(1,4,5,13),(1,3,6,7,14),(0,1,6,7,9,15)]
|
|
|
|
ffile = os.path.join(RDConfig.RDDataDir,'FunctionalGroups.txt')
|
|
self.catParams = FragmentCatalog.FragCatParams(1,6,ffile)
|
|
self.fragCat = FragmentCatalog.FragCatalog(self.catParams)
|
|
self.fgen = FragmentCatalog.FragCatGenerator()
|
|
|
|
def _fillCat(self,smilList):
|
|
for smi in self.smiList2:
|
|
mol = Chem.MolFromSmiles(smi)
|
|
self.fgen.AddFragsFromMol(mol,self.fragCat)
|
|
|
|
def _testBits(self,fragCat):
|
|
fpgen = FragmentCatalog.FragFPGenerator()
|
|
obits = [3,2,3,3,2,3,5,5,5,4,5,6]
|
|
obls = self.list2Obls
|
|
suppl = Chem.SmilesMolSupplierFromText('\n'.join(self.smiList2),
|
|
',',0,-1,0)
|
|
i = 0
|
|
for mol in suppl:
|
|
fp = fpgen.GetFPForMol(mol, fragCat)
|
|
if i < len(obits):
|
|
smi = Chem.MolToSmiles(mol)
|
|
assert fp.GetNumOnBits()==obits[i],'%s: %s'%(smi,str(fp.GetOnBits()))
|
|
obl = fp.GetOnBits()
|
|
if i < len(obls):
|
|
assert tuple(obl)==obls[i],'%s: %s'%(smi,obl)
|
|
i+=1
|
|
|
|
def test1CatGen(self) :
|
|
self._fillCat(self.smiList2)
|
|
assert self.fragCat.GetNumEntries()==21
|
|
assert self.fragCat.GetFPLength()==21
|
|
self._testBits(self.fragCat)
|
|
|
|
def test2CatStringPickle(self):
|
|
self._fillCat(self.smiList2)
|
|
|
|
# test non-binary pickle:
|
|
cat2 = cPickle.loads(cPickle.dumps(self.fragCat))
|
|
assert cat2.GetNumEntries()==21
|
|
assert cat2.GetFPLength()==21
|
|
self._testBits(cat2)
|
|
|
|
# test binary pickle:
|
|
cat2 = cPickle.loads(cPickle.dumps(self.fragCat,1))
|
|
assert cat2.GetNumEntries()==21
|
|
assert cat2.GetFPLength()==21
|
|
self._testBits(cat2)
|
|
|
|
def test3CatFilePickle(self):
|
|
pklFile = open(os.path.join(RDConfig.RDCodeDir,'Chem',
|
|
'tests','simple_catalog.pkl'),
|
|
'rb')
|
|
cat = cPickle.load(pklFile)
|
|
assert cat.GetNumEntries()==21
|
|
assert cat.GetFPLength()==21
|
|
self._testBits(cat)
|
|
|
|
def test4CatGuts(self):
|
|
self._fillCat(self.smiList2)
|
|
assert self.fragCat.GetNumEntries()==21
|
|
assert self.fragCat.GetFPLength()==21
|
|
#
|
|
# FIX: (Issue 162)
|
|
# bits like 11 and 15 are questionable here because the underlying
|
|
# fragments are symmetrical, so they can generate one of two
|
|
# text representations (i.e. there is nothing to distinguish
|
|
# between 'CC<-O>CC' and 'CCC<-O>C').
|
|
# This ought to eventually be cleaned up.
|
|
descrs = [(0,'CC<-O>',1,(34,)),
|
|
(1,'CC',1,()),
|
|
(2,'CCC<-O>',2,(34,)),
|
|
(3,'CCC',2,()),
|
|
(4,'C=C',1,()),
|
|
(5,'CC=C',2,()),
|
|
(6,'C=C<-O>',1,(34,)),
|
|
(7,'CC=C<-O>',2,(34,)),
|
|
(8,'CC<-O>C',2,(34,)),
|
|
(9,'CC<-O>=C',2,(34,)),
|
|
(10,'CCCC<-O>',3,(34,)),
|
|
(11,'CCC<-O>C',3,(34,)),
|
|
(12,'CCC=C',3,()),
|
|
(13,'CC=CC',3,()),
|
|
(14,'CCC=C<-O>',3,(34,)),
|
|
(15,'CC<-O>=CC',3,(34,)),
|
|
(16,'C<-O>C=C',2,(34,)),
|
|
]
|
|
for i in range(len(descrs)):
|
|
id,d,order,ids=descrs[i]
|
|
descr = self.fragCat.GetBitDescription(id)
|
|
assert descr == d,'%d: %s != %s'%(id,descr,d)
|
|
assert self.fragCat.GetBitOrder(id)==order
|
|
assert tuple(self.fragCat.GetBitFuncGroupIds(id)) == \
|
|
ids,'%d: %s != %s'%(id,
|
|
str(self.fragCat.GetBitFuncGroupIds(id)),
|
|
str(ids))
|
|
|
|
|
|
def _test5MoreComplex(self):
|
|
lastIdx = 0
|
|
ranges = {}
|
|
suppl = Chem.SmilesMolSupplierFromText('\n'.join(self.smiList),
|
|
',',0,-1,0)
|
|
i = 0
|
|
for mol in suppl:
|
|
nEnt = self.fgen.AddFragsFromMol(mol,self.fragCat)
|
|
ranges[i] = range(lastIdx,lastIdx+nEnt)
|
|
lastIdx+=nEnt
|
|
i+=1
|
|
# now make sure that those bits are contained in the signatures:
|
|
fpgen = FragmentCatalog.FragFPGenerator()
|
|
i = 0
|
|
for mol in suppl:
|
|
fp = fpgen.GetFPForMol(mol,self.fragCat)
|
|
for bit in ranges[i]:
|
|
assert fp[bit],'%s: %s'%(Chem.MolToSmiles(mol),str(bit))
|
|
i += 1
|
|
|
|
def test6Builder(self):
|
|
suppl = Chem.SmilesMolSupplierFromText('\n'.join(self.smiList2),
|
|
',',0,-1,0)
|
|
cat = BuildFragmentCatalog.BuildCatalog(suppl,minPath=1,reportFreq=20)
|
|
assert cat.GetNumEntries()==21
|
|
assert cat.GetFPLength()==21
|
|
self._testBits(cat)
|
|
|
|
def test7ScoreMolecules(self):
|
|
suppl = Chem.SmilesMolSupplierFromText('\n'.join(self.smiList2),
|
|
',',0,-1,0)
|
|
cat = BuildFragmentCatalog.BuildCatalog(suppl,minPath=1,reportFreq=20)
|
|
assert cat.GetNumEntries()==21
|
|
assert cat.GetFPLength()==21
|
|
|
|
scores,obls = BuildFragmentCatalog.ScoreMolecules(suppl,cat,acts=self.list2Acts,
|
|
reportFreq=20)
|
|
for i in range(len(self.list2Obls)):
|
|
assert tuple(obls[i])==self.list2Obls[i],'%d: %s != %s'%(i,str(obls[i]),
|
|
str(self.list2Obls[i]))
|
|
|
|
scores2 = BuildFragmentCatalog.ScoreFromLists(obls,suppl,cat,acts=self.list2Acts,
|
|
reportFreq=20)
|
|
for i in range(len(scores)):
|
|
assert scores[i]==scores2[i],'%d: %s != %s'%(i,str(scores[i]),str(scores2[i]))
|
|
|
|
def test8MolRanks(self):
|
|
suppl = Chem.SmilesMolSupplierFromText('\n'.join(self.smiList2),
|
|
',',0,-1,0)
|
|
cat = BuildFragmentCatalog.BuildCatalog(suppl,minPath=1,reportFreq=20)
|
|
assert cat.GetNumEntries()==21
|
|
assert cat.GetFPLength()==21
|
|
|
|
# new InfoGain ranking:
|
|
bitInfo,fps = BuildFragmentCatalog.CalcGains(suppl,cat,topN=10,acts=self.list2Acts,
|
|
reportFreq=20,biasList=(1,))
|
|
entry = bitInfo[0]
|
|
assert int(entry[0])==0
|
|
assert cat.GetBitDescription(int(entry[0]))=='CC<-O>'
|
|
assert feq(entry[1],0.4669)
|
|
|
|
entry = bitInfo[1]
|
|
assert int(entry[0]) in (2,6)
|
|
txt = cat.GetBitDescription(int(entry[0]))
|
|
self.failUnless( txt in ('CCC<-O>','C=C<-O>'), txt)
|
|
assert feq(entry[1],0.1611)
|
|
|
|
entry = bitInfo[6]
|
|
assert int(entry[0])==16
|
|
assert cat.GetBitDescription(int(entry[0]))=='C<-O>C=C'
|
|
assert feq(entry[1],0.0560)
|
|
|
|
# standard InfoGain ranking:
|
|
bitInfo,fps = BuildFragmentCatalog.CalcGains(suppl,cat,topN=10,acts=self.list2Acts,
|
|
reportFreq=20)
|
|
entry = bitInfo[0]
|
|
assert int(entry[0])==0
|
|
assert cat.GetBitDescription(int(entry[0]))=='CC<-O>'
|
|
assert feq(entry[1],0.4669)
|
|
|
|
entry = bitInfo[1]
|
|
assert int(entry[0])==5
|
|
assert cat.GetBitDescription(int(entry[0]))=='CC=C'
|
|
assert feq(entry[1],0.2057)
|
|
|
|
def test9Issue116(self):
|
|
smiList = ['Cc1ccccc1']
|
|
suppl = Chem.SmilesMolSupplierFromText('\n'.join(smiList),
|
|
',',0,-1,0)
|
|
cat = BuildFragmentCatalog.BuildCatalog(suppl,minPath=2,maxPath=2)
|
|
assert cat.GetFPLength()==2
|
|
assert cat.GetBitDescription(0)=='Ccc'
|
|
fpgen = FragmentCatalog.FragFPGenerator()
|
|
mol = Chem.MolFromSmiles('Cc1ccccc1')
|
|
fp = fpgen.GetFPForMol(mol,cat)
|
|
assert fp[0]
|
|
assert fp[1]
|
|
mol = Chem.MolFromSmiles('c1ccccc1-c1ccccc1')
|
|
fp = fpgen.GetFPForMol(mol,cat)
|
|
assert not fp[0]
|
|
assert fp[1]
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
unittest.main()
|
|
|
|
|