Files
rdkit/Regress/Scripts/timings.py
Ric 880a8e5725 Reformat Python code for 2023.03 release (#6294)
* run yapf

* run isort

---------

Co-authored-by: Greg Landrum <greg.landrum@gmail.com>
2023-04-28 06:53:56 +02:00

248 lines
6.1 KiB
Python

import gzip
import os
import random
import sys
import time
import rdkit
from rdkit import Chem
from rdkit.Chem import AllChem, Recap
from rdkit.RDLogger import logger
logger = logger()
tests = [1] * 1001
if len(sys.argv) > 1:
tests = [0] * 1001
tests[1] = 1
for x in sys.argv[1:]:
x = int(x)
tests[x] = 1
ts = []
sdData = gzip.open('../Data/mols.1000.sdf.gz').read()
logger.info('mols from sdf')
suppl = Chem.SDMolSupplier()
suppl.SetData(sdData)
mols = []
nMols = 0
nBad = 0
t1 = time.time()
for m in suppl:
if m:
nMols += 1
mols.append(m)
else:
nBad += 1
t2 = time.time()
logger.info('Results1: %.2f seconds, %d passed, %d failed' % (t2 - t1, nMols, nBad))
ts.append(t2 - t1)
if tests[2]:
lines = gzip.open('../Data/mols.1000.txt.gz').readlines()
logger.info('mols from smiles')
nMols = 0
nBad = 0
t1 = time.time()
for line in lines:
line = line.decode().strip().split(' ')
m = Chem.MolFromSmiles(line[1])
if m:
nMols += 1
else:
nBad += 1
t2 = time.time()
logger.info('Results2: %.2f seconds, %d passed, %d failed' % (t2 - t1, nMols, nBad))
ts.append(t2 - t1)
if tests[3] or tests[4] or tests[5]:
pattData = gzip.open('../Data/queries.txt.gz').readlines()
pattData = [x.decode().strip().replace('[H]', '').replace('()', '') for x in pattData]
logger.info('patterns from smiles')
patts = []
nMols = 0
t1 = time.time()
for line in pattData:
m = Chem.MolFromSmarts(line)
if m:
nMols += 1
patts.append(m)
else:
nBad += 1
t2 = time.time()
logger.info('Results3: %.2f seconds, %d passed, %d failed' % (t2 - t1, nMols, nBad))
ts.append(t2 - t1)
random.seed(23)
random.shuffle(patts)
patts = patts[:100]
if tests[4]:
logger.info('Matching1: HasSubstructMatch')
t1 = time.time()
for mol in mols:
for patt in patts:
mol.HasSubstructMatch(patt)
t2 = time.time()
logger.info('Results4: %.2f seconds' % (t2 - t1))
ts.append(t2 - t1)
if tests[5]:
logger.info('Matching2: GetSubstructMatches')
t1 = time.time()
for mol in mols:
for patt in patts:
mol.GetSubstructMatches(patt)
t2 = time.time()
logger.info('Results5: %.2f seconds' % (t2 - t1))
ts.append(t2 - t1)
if tests[6] or tests[7] or tests[8]:
logger.info('reading SMARTS')
patts = []
t1 = time.time()
for line in open('../Data/RLewis_smarts.txt'):
line = line.strip()
if line == '' or line[0] == '#':
continue
splitL = line.split(' ')
sma = splitL[0]
m = Chem.MolFromSmarts(sma)
if m:
patts.append(m)
t2 = time.time()
logger.info('Results6: %.2f seconds for %d patterns' % (t2 - t1, len(patts)))
ts.append(t2 - t1)
if tests[7]:
logger.info('Matching3: HasSubstructMatch')
t1 = time.time()
for mol in mols:
for patt in patts:
mol.HasSubstructMatch(patt)
t2 = time.time()
logger.info('Results7: %.2f seconds' % (t2 - t1))
ts.append(t2 - t1)
if tests[8]:
logger.info('Matching4: GetSubstructMatches')
t1 = time.time()
for mol in mols:
for patt in patts:
mol.GetSubstructMatches(patt)
t2 = time.time()
logger.info('Results8: %.2f seconds' % (t2 - t1))
ts.append(t2 - t1)
if tests[9]:
logger.info('Writing: Canonical SMILES')
t1 = time.time()
for mol in mols:
smi = Chem.MolToSmiles(mol)
t2 = time.time()
logger.info('Results9: %.2f seconds' % (t2 - t1))
ts.append(t2 - t1)
if tests[10]:
logger.info('Generate 2D coords')
t1 = time.time()
for mol in mols:
AllChem.Compute2DCoords(mol)
t2 = time.time()
logger.info('Results10: %.2f seconds' % (t2 - t1))
ts.append(t2 - t1)
if tests[11]:
logger.info('Writing: Mol blocks')
t1 = time.time()
for mol in mols:
mb = Chem.MolToMolBlock(mol)
t2 = time.time()
logger.info('Results11: %.2f seconds' % (t2 - t1))
ts.append(t2 - t1)
if tests[12]:
logger.info('RECAP decomposition')
t1 = time.time()
for mol in mols:
d = Recap.RecapDecompose(mol)
t2 = time.time()
logger.info('Results12: %.2f seconds' % (t2 - t1))
ts.append(t2 - t1)
if tests[13]:
logger.info('Generate 3D coords for 50 molecules with ETKDG')
mols3d = mols[200:250]
t1 = time.time()
nBad = 0
for mol in mols3d:
cid = AllChem.EmbedMolecule(mol, randomSeed=0xF00D, useExpTorsionAnglePrefs=True,
useBasicKnowledge=True)
if cid < 0:
nBad += 1
t2 = time.time()
logger.info('Results13: %.2f seconds %d failures' % (t2 - t1, nBad))
ts.append(t2 - t1)
if tests[14]:
logger.info('UFF optimizing those:')
t1 = time.time()
for mol in mols3d:
if not mol.GetNumConformers():
continue
mol = Chem.Mol(mol)
needMore = 1
while needMore:
needMore = AllChem.UFFOptimizeMolecule(mol, maxIters=200)
t2 = time.time()
logger.info('Results14: %.2f seconds' % (t2 - t1))
ts.append(t2 - t1)
if tests[15]:
logger.info('MMFF optimizing the molecules:')
t1 = time.time()
for i, mol in enumerate(mols3d):
mol = Chem.Mol(mol)
if not mol.GetNumConformers():
continue
if not AllChem.MMFFHasAllMoleculeParams(mol):
continue
needMore = 1
while needMore:
try:
needMore = AllChem.MMFFOptimizeMolecule(mol, maxIters=200)
except ValueError:
logger.warning('Problems with MMFF and mol %d' % i)
break
t2 = time.time()
logger.info('Results15: %.2f seconds' % (t2 - t1))
ts.append(t2 - t1)
if tests[16]:
logger.info('Find unique subgraphs')
t1 = time.time()
for mol in mols:
Chem.FindUniqueSubgraphsOfLengthN(mol, 6)
t2 = time.time()
logger.info('Results16: %.2f seconds' % (t2 - t1))
ts.append(t2 - t1)
if tests[17]:
logger.info('Generate topological fingerprints')
t1 = time.time()
for mol in mols:
Chem.RDKFingerprint(mol)
t2 = time.time()
logger.info('Results17: %.2f seconds' % (t2 - t1))
ts.append(t2 - t1)
if tests[18]:
logger.info('Generate morgan fingerprints')
t1 = time.time()
for mol in mols:
AllChem.GetMorganFingerprint(mol, radius=2)
t2 = time.time()
logger.info('Results18: %.2f seconds' % (t2 - t1))
ts.append(t2 - t1)
print(f"| {rdkit.__version__} | {' | '.join(['%.1f' % x for x in ts])} |")