mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-07 22:44:25 +08:00
34 lines
1014 B
Python
Executable File
34 lines
1014 B
Python
Executable File
import RDConfig
|
|
import gzip
|
|
import os.path,cPickle
|
|
import Chem
|
|
from Chem import Crippen
|
|
Crippen._Init()
|
|
|
|
def runIt(inFileName,outFileName,smiCol=0,maxMols=-1,delim=','):
|
|
inF = gzip.open(inFileName,'r')
|
|
outF = open(outFileName,'wb+')
|
|
mols = []
|
|
nDone = 0
|
|
for line in inF.readlines():
|
|
if line[0] != '#':
|
|
splitL = line.strip().split(delim)
|
|
smi = splitL[smiCol].strip()
|
|
print smi
|
|
mol = Chem.MolFromSmiles(smi)
|
|
if mol:
|
|
contribs = Crippen._GetAtomContribs(mol)
|
|
cPickle.dump((smi,contribs),outF)
|
|
nDone += 1
|
|
if maxMols>0 and nDone>=maxMols:
|
|
break
|
|
outF.close()
|
|
|
|
if __name__ == '__main__':
|
|
inFileName = os.path.join(RDConfig.RDCodeDir,'Chem','tests',
|
|
'buildingblocks.smi.gz')
|
|
outFileName = os.path.join(RDConfig.RDCodeDir,'Chem','tests',
|
|
'Crippen_contribs_regress.2.pkl')
|
|
runIt(inFileName,outFileName,smiCol=1,delim='\t',maxMols=500)
|
|
|