mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-04 21:54:27 +08:00
248 lines
6.1 KiB
Python
248 lines
6.1 KiB
Python
import gzip
|
|
import os
|
|
import random
|
|
import sys
|
|
import time
|
|
|
|
import rdkit
|
|
from rdkit import Chem
|
|
from rdkit.Chem import AllChem, Recap
|
|
from rdkit.RDLogger import logger
|
|
|
|
logger = logger()
|
|
|
|
tests = [1] * 1001
|
|
if len(sys.argv) > 1:
|
|
tests = [0] * 1001
|
|
tests[1] = 1
|
|
for x in sys.argv[1:]:
|
|
x = int(x)
|
|
tests[x] = 1
|
|
ts = []
|
|
|
|
sdData = gzip.open('../Data/mols.1000.sdf.gz').read()
|
|
logger.info('mols from sdf')
|
|
suppl = Chem.SDMolSupplier()
|
|
suppl.SetData(sdData)
|
|
mols = []
|
|
nMols = 0
|
|
nBad = 0
|
|
t1 = time.time()
|
|
for m in suppl:
|
|
if m:
|
|
nMols += 1
|
|
mols.append(m)
|
|
else:
|
|
nBad += 1
|
|
t2 = time.time()
|
|
logger.info('Results1: %.2f seconds, %d passed, %d failed' % (t2 - t1, nMols, nBad))
|
|
ts.append(t2 - t1)
|
|
|
|
if tests[2]:
|
|
lines = gzip.open('../Data/mols.1000.txt.gz').readlines()
|
|
logger.info('mols from smiles')
|
|
nMols = 0
|
|
nBad = 0
|
|
t1 = time.time()
|
|
for line in lines:
|
|
line = line.decode().strip().split(' ')
|
|
m = Chem.MolFromSmiles(line[1])
|
|
if m:
|
|
nMols += 1
|
|
else:
|
|
nBad += 1
|
|
t2 = time.time()
|
|
logger.info('Results2: %.2f seconds, %d passed, %d failed' % (t2 - t1, nMols, nBad))
|
|
ts.append(t2 - t1)
|
|
|
|
if tests[3] or tests[4] or tests[5]:
|
|
pattData = gzip.open('../Data/queries.txt.gz').readlines()
|
|
pattData = [x.decode().strip().replace('[H]', '').replace('()', '') for x in pattData]
|
|
logger.info('patterns from smiles')
|
|
patts = []
|
|
nMols = 0
|
|
t1 = time.time()
|
|
for line in pattData:
|
|
m = Chem.MolFromSmarts(line)
|
|
if m:
|
|
nMols += 1
|
|
patts.append(m)
|
|
else:
|
|
nBad += 1
|
|
t2 = time.time()
|
|
logger.info('Results3: %.2f seconds, %d passed, %d failed' % (t2 - t1, nMols, nBad))
|
|
ts.append(t2 - t1)
|
|
random.seed(23)
|
|
random.shuffle(patts)
|
|
patts = patts[:100]
|
|
|
|
if tests[4]:
|
|
logger.info('Matching1: HasSubstructMatch')
|
|
t1 = time.time()
|
|
for mol in mols:
|
|
for patt in patts:
|
|
mol.HasSubstructMatch(patt)
|
|
t2 = time.time()
|
|
logger.info('Results4: %.2f seconds' % (t2 - t1))
|
|
ts.append(t2 - t1)
|
|
|
|
if tests[5]:
|
|
logger.info('Matching2: GetSubstructMatches')
|
|
t1 = time.time()
|
|
for mol in mols:
|
|
for patt in patts:
|
|
mol.GetSubstructMatches(patt)
|
|
t2 = time.time()
|
|
logger.info('Results5: %.2f seconds' % (t2 - t1))
|
|
ts.append(t2 - t1)
|
|
|
|
if tests[6] or tests[7] or tests[8]:
|
|
logger.info('reading SMARTS')
|
|
patts = []
|
|
t1 = time.time()
|
|
for line in open('../Data/RLewis_smarts.txt'):
|
|
line = line.strip()
|
|
if line == '' or line[0] == '#':
|
|
continue
|
|
splitL = line.split(' ')
|
|
sma = splitL[0]
|
|
m = Chem.MolFromSmarts(sma)
|
|
if m:
|
|
patts.append(m)
|
|
t2 = time.time()
|
|
logger.info('Results6: %.2f seconds for %d patterns' % (t2 - t1, len(patts)))
|
|
ts.append(t2 - t1)
|
|
|
|
if tests[7]:
|
|
logger.info('Matching3: HasSubstructMatch')
|
|
t1 = time.time()
|
|
for mol in mols:
|
|
for patt in patts:
|
|
mol.HasSubstructMatch(patt)
|
|
t2 = time.time()
|
|
logger.info('Results7: %.2f seconds' % (t2 - t1))
|
|
ts.append(t2 - t1)
|
|
|
|
if tests[8]:
|
|
logger.info('Matching4: GetSubstructMatches')
|
|
t1 = time.time()
|
|
for mol in mols:
|
|
for patt in patts:
|
|
mol.GetSubstructMatches(patt)
|
|
t2 = time.time()
|
|
logger.info('Results8: %.2f seconds' % (t2 - t1))
|
|
ts.append(t2 - t1)
|
|
|
|
if tests[9]:
|
|
logger.info('Writing: Canonical SMILES')
|
|
t1 = time.time()
|
|
for mol in mols:
|
|
smi = Chem.MolToSmiles(mol)
|
|
t2 = time.time()
|
|
logger.info('Results9: %.2f seconds' % (t2 - t1))
|
|
ts.append(t2 - t1)
|
|
|
|
if tests[10]:
|
|
logger.info('Generate 2D coords')
|
|
t1 = time.time()
|
|
for mol in mols:
|
|
AllChem.Compute2DCoords(mol)
|
|
t2 = time.time()
|
|
logger.info('Results10: %.2f seconds' % (t2 - t1))
|
|
ts.append(t2 - t1)
|
|
|
|
if tests[11]:
|
|
logger.info('Writing: Mol blocks')
|
|
t1 = time.time()
|
|
for mol in mols:
|
|
mb = Chem.MolToMolBlock(mol)
|
|
t2 = time.time()
|
|
logger.info('Results11: %.2f seconds' % (t2 - t1))
|
|
ts.append(t2 - t1)
|
|
|
|
if tests[12]:
|
|
logger.info('RECAP decomposition')
|
|
t1 = time.time()
|
|
for mol in mols:
|
|
d = Recap.RecapDecompose(mol)
|
|
t2 = time.time()
|
|
logger.info('Results12: %.2f seconds' % (t2 - t1))
|
|
ts.append(t2 - t1)
|
|
|
|
if tests[13]:
|
|
logger.info('Generate 3D coords for 50 molecules with ETKDG')
|
|
mols3d = mols[200:250]
|
|
t1 = time.time()
|
|
nBad = 0
|
|
for mol in mols3d:
|
|
cid = AllChem.EmbedMolecule(mol, randomSeed=0xF00D, useExpTorsionAnglePrefs=True,
|
|
useBasicKnowledge=True)
|
|
if cid < 0:
|
|
nBad += 1
|
|
t2 = time.time()
|
|
logger.info('Results13: %.2f seconds %d failures' % (t2 - t1, nBad))
|
|
ts.append(t2 - t1)
|
|
|
|
if tests[14]:
|
|
logger.info('UFF optimizing those:')
|
|
t1 = time.time()
|
|
for mol in mols3d:
|
|
if not mol.GetNumConformers():
|
|
continue
|
|
mol = Chem.Mol(mol)
|
|
needMore = 1
|
|
while needMore:
|
|
needMore = AllChem.UFFOptimizeMolecule(mol, maxIters=200)
|
|
t2 = time.time()
|
|
logger.info('Results14: %.2f seconds' % (t2 - t1))
|
|
ts.append(t2 - t1)
|
|
|
|
if tests[15]:
|
|
logger.info('MMFF optimizing the molecules:')
|
|
t1 = time.time()
|
|
for i, mol in enumerate(mols3d):
|
|
mol = Chem.Mol(mol)
|
|
if not mol.GetNumConformers():
|
|
continue
|
|
if not AllChem.MMFFHasAllMoleculeParams(mol):
|
|
continue
|
|
needMore = 1
|
|
while needMore:
|
|
try:
|
|
needMore = AllChem.MMFFOptimizeMolecule(mol, maxIters=200)
|
|
except ValueError:
|
|
logger.warning('Problems with MMFF and mol %d' % i)
|
|
break
|
|
t2 = time.time()
|
|
logger.info('Results15: %.2f seconds' % (t2 - t1))
|
|
ts.append(t2 - t1)
|
|
|
|
if tests[16]:
|
|
logger.info('Find unique subgraphs')
|
|
t1 = time.time()
|
|
for mol in mols:
|
|
Chem.FindUniqueSubgraphsOfLengthN(mol, 6)
|
|
t2 = time.time()
|
|
logger.info('Results16: %.2f seconds' % (t2 - t1))
|
|
ts.append(t2 - t1)
|
|
|
|
if tests[17]:
|
|
logger.info('Generate topological fingerprints')
|
|
t1 = time.time()
|
|
for mol in mols:
|
|
Chem.RDKFingerprint(mol)
|
|
t2 = time.time()
|
|
logger.info('Results17: %.2f seconds' % (t2 - t1))
|
|
ts.append(t2 - t1)
|
|
|
|
if tests[18]:
|
|
logger.info('Generate morgan fingerprints')
|
|
t1 = time.time()
|
|
for mol in mols:
|
|
AllChem.GetMorganFingerprint(mol, radius=2)
|
|
t2 = time.time()
|
|
logger.info('Results18: %.2f seconds' % (t2 - t1))
|
|
ts.append(t2 - t1)
|
|
|
|
print(f"| {rdkit.__version__} | {' | '.join(['%.1f' % x for x in ts])} |")
|