Files
rdkit/Python/Chem/tests/BuildCrippenTestSet.py
Greg Landrum 75a79b6327 initial import
2006-05-06 22:20:08 +00:00

34 lines
1014 B
Python
Executable File

import RDConfig
import gzip
import os.path,cPickle
import Chem
from Chem import Crippen
Crippen._Init()
def runIt(inFileName,outFileName,smiCol=0,maxMols=-1,delim=','):
inF = gzip.open(inFileName,'r')
outF = open(outFileName,'wb+')
mols = []
nDone = 0
for line in inF.readlines():
if line[0] != '#':
splitL = line.strip().split(delim)
smi = splitL[smiCol].strip()
print smi
mol = Chem.MolFromSmiles(smi)
if mol:
contribs = Crippen._GetAtomContribs(mol)
cPickle.dump((smi,contribs),outF)
nDone += 1
if maxMols>0 and nDone>=maxMols:
break
outF.close()
if __name__ == '__main__':
inFileName = os.path.join(RDConfig.RDCodeDir,'Chem','tests',
'buildingblocks.smi.gz')
outFileName = os.path.join(RDConfig.RDCodeDir,'Chem','tests',
'Crippen_contribs_regress.2.pkl')
runIt(inFileName,outFileName,smiCol=1,delim='\t',maxMols=500)