mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-04 21:54:27 +08:00
* remove all of the "from __future__" imports * remove the first batch of rdkit.six imports/uses * next step of rdkit.six removal * removing xrange, range, and some maps * next round of removals * next round of cleanups * fix inchi test * last bits of "from rdkit.six" are gone * and the last of the six stuff is gone * strange importlib problem
53 lines
1.2 KiB
Python
53 lines
1.2 KiB
Python
|
|
from rdkit import Chem
|
|
from rdkit import RDConfig
|
|
import time, sys, gzip
|
|
import pickle
|
|
from rdkit.RDLogger import logger
|
|
logger = logger()
|
|
|
|
logger.info('reading smarts')
|
|
qs = []
|
|
smas = []
|
|
for line in file(RDConfig.RDDataDir + '/SmartsLib/RLewis_smarts.txt', 'r').readlines():
|
|
if line[0] == '#':
|
|
continue
|
|
line = line.split(' ')
|
|
p = Chem.MolFromSmarts(line[0])
|
|
if not p:
|
|
print(line[0], file=sys.stderr)
|
|
continue
|
|
smas.append(line[0])
|
|
qs.append(p)
|
|
|
|
logger.info('reading target counts')
|
|
refFps = pickle.loads(gzip.open('fps.1000.counts.pkl.gz', 'rb').read())
|
|
|
|
fps = []
|
|
logger.info('reading mols:')
|
|
ms = pickle.loads(gzip.open('mols.1000.pkl.gz', 'rb').read())
|
|
t1 = time.time()
|
|
nFail = 0
|
|
for i, m in enumerate(ms):
|
|
fp = [0] * len(qs)
|
|
for j, q in enumerate(qs):
|
|
o = m.GetSubstructMatches(q)
|
|
if len(o) != refFps[i][j]:
|
|
print(' >', i, j, o, refFps[i][j], Chem.MolToSmiles(m), smas[j])
|
|
nFail += 1
|
|
if nFail == 10:
|
|
raise ValueError
|
|
fp[j] = len(o)
|
|
fps.append(fp)
|
|
if not i % 50:
|
|
logger.info('Done %d' % i)
|
|
t2 = time.time()
|
|
print('%.2f' % (t2 - t1))
|
|
|
|
#pickle.dump(fps,file('fps.1000.counts.pkl','wb+'))
|
|
nFail = 0
|
|
for i, fp in enumerate(fps):
|
|
if fp != refFps[i]:
|
|
nFail += 1
|
|
print('%d mismatches' % nFail)
|