mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-07 22:44:25 +08:00
612 lines
24 KiB
Python
612 lines
24 KiB
Python
# $Id$
|
|
#
|
|
# Copyright (c) 2007-2013, Novartis Institutes for BioMedical Research Inc.
|
|
# All rights reserved.
|
|
#
|
|
# Redistribution and use in source and binary forms, with or without
|
|
# modification, are permitted provided that the following conditions are
|
|
# met:
|
|
#
|
|
# * Redistributions of source code must retain the above copyright
|
|
# notice, this list of conditions and the following disclaimer.
|
|
# * Redistributions in binary form must reproduce the above
|
|
# copyright notice, this list of conditions and the following
|
|
# disclaimer in the documentation and/or other materials provided
|
|
# with the distribution.
|
|
# * Neither the name of Novartis Institutes for BioMedical Research Inc.
|
|
# nor the names of its contributors may be used to endorse or promote
|
|
# products derived from this software without specific prior written permission.
|
|
#
|
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
#
|
|
# Created by Greg Landrum, July 2007
|
|
#
|
|
from __future__ import print_function
|
|
_version = "0.14.0"
|
|
_usage="""
|
|
SearchDb [optional arguments] <sdfilename>
|
|
|
|
The sd filename argument can be either an SD file or an MDL mol
|
|
file.
|
|
|
|
|
|
NOTES:
|
|
|
|
- The property names may have been altered on loading the
|
|
database. Any non-alphanumeric character in a property name
|
|
will be replaced with '_'. e.g."Gold.Goldscore.Constraint.Score" becomes
|
|
"Gold_Goldscore_Constraint_Score".
|
|
|
|
- Property names are not case sensitive in the database.
|
|
|
|
"""
|
|
from rdkit import RDConfig
|
|
from rdkit.Dbase.DbConnection import DbConnect
|
|
|
|
from rdkit.RDLogger import logger
|
|
logger=logger()
|
|
import zlib
|
|
from rdkit import Chem
|
|
|
|
from rdkit.Chem.MolDb.FingerprintUtils import supportedSimilarityMethods,BuildSigFactory,DepickleFP,LayeredOptions
|
|
from rdkit.Chem.MolDb import FingerprintUtils
|
|
|
|
from rdkit import DataStructs
|
|
|
|
def _molFromPkl(pkl):
|
|
if isinstance(pkl,(bytes,str)):
|
|
mol = Chem.Mol(pkl)
|
|
else:
|
|
mol = Chem.Mol(str(pkl))
|
|
return mol
|
|
|
|
def GetNeighborLists(probes,topN,pool,
|
|
simMetric=DataStructs.DiceSimilarity,
|
|
simThresh=-1.,
|
|
silent=False,
|
|
**kwargs):
|
|
probeFps = [x[1] for x in probes]
|
|
validProbes = [x for x in range(len(probeFps)) if probeFps[x] is not None]
|
|
validFps=[probeFps[x] for x in validProbes]
|
|
from rdkit.DataStructs.TopNContainer import TopNContainer
|
|
if simThresh<=0:
|
|
nbrLists = [TopNContainer(topN) for x in range(len(probeFps))]
|
|
else:
|
|
nbrLists=[TopNContainer(-1) for x in range(len(probeFps))]
|
|
|
|
nDone=0
|
|
for nm,fp in pool:
|
|
nDone+=1
|
|
if not silent and not nDone%1000: logger.info(' searched %d rows'%nDone)
|
|
if(simMetric==DataStructs.DiceSimilarity):
|
|
scores = DataStructs.BulkDiceSimilarity(fp,validFps)
|
|
for i,score in enumerate(scores):
|
|
if score>simThresh:
|
|
nbrLists[validProbes[i]].Insert(score,nm)
|
|
elif(simMetric==DataStructs.TanimotoSimilarity):
|
|
scores = DataStructs.BulkTanimotoSimilarity(fp,validFps)
|
|
for i,score in enumerate(scores):
|
|
if score>simThresh:
|
|
nbrLists[validProbes[i]].Insert(score,nm)
|
|
elif(simMetric==DataStructs.TverskySimilarity):
|
|
av = float(kwargs.get('tverskyA',0.5))
|
|
bv = float(kwargs.get('tverskyB',0.5))
|
|
scores = DataStructs.BulkTverskySimilarity(fp,validFps,av,bv)
|
|
for i,score in enumerate(scores):
|
|
if score>simThresh:
|
|
nbrLists[validProbes[i]].Insert(score,nm)
|
|
else:
|
|
for i in range(len(probeFps)):
|
|
pfp = probeFps[i]
|
|
if pfp is not None:
|
|
score = simMetric(probeFps[i],fp)
|
|
if score>simThresh:
|
|
nbrLists[validProbes[i]].Insert(score,nm)
|
|
return nbrLists
|
|
|
|
def GetMolsFromSmilesFile(dataFilename,errFile,nameProp):
|
|
dataFile=open(dataFilename,'r')
|
|
for idx,line in enumerate(dataFile):
|
|
try:
|
|
smi,nm = line.strip().split(' ')
|
|
except:
|
|
continue
|
|
try:
|
|
m = Chem.MolFromSmiles(smi)
|
|
except:
|
|
m=None
|
|
if not m:
|
|
if errfile:
|
|
print(idx,nm,smi,file=errfile)
|
|
continue
|
|
yield (nm,smi,m)
|
|
|
|
def GetMolsFromSDFile(dataFilename,errFile,nameProp):
|
|
suppl = Chem.SDMolSupplier(dataFilename)
|
|
|
|
for idx,m in enumerate(suppl):
|
|
if not m:
|
|
if errFile:
|
|
if hasattr(suppl,'GetItemText'):
|
|
d = suppl.GetItemText(idx)
|
|
errFile.write(d)
|
|
else:
|
|
logger.warning('full error file support not complete')
|
|
continue
|
|
smi = Chem.MolToSmiles(m,True)
|
|
if m.HasProp(nameProp):
|
|
nm = m.GetProp(nameProp)
|
|
if not nm:
|
|
logger.warning('molecule found with empty name property')
|
|
else:
|
|
nm = 'Mol_%d'%(idx+1)
|
|
yield nm,smi,m
|
|
|
|
|
|
def RunSearch(options,queryFilename):
|
|
global sigFactory
|
|
if options.similarityType=='AtomPairs':
|
|
fpBuilder=FingerprintUtils.BuildAtomPairFP
|
|
simMetric=DataStructs.DiceSimilarity
|
|
dbName = os.path.join(options.dbDir,options.pairDbName)
|
|
fpTableName = options.pairTableName
|
|
fpColName = options.pairColName
|
|
elif options.similarityType=='TopologicalTorsions':
|
|
fpBuilder=FingerprintUtils.BuildTorsionsFP
|
|
simMetric=DataStructs.DiceSimilarity
|
|
dbName = os.path.join(options.dbDir,options.torsionsDbName)
|
|
fpTableName = options.torsionsTableName
|
|
fpColName = options.torsionsColName
|
|
elif options.similarityType=='RDK':
|
|
fpBuilder=FingerprintUtils.BuildRDKitFP
|
|
simMetric=DataStructs.FingerprintSimilarity
|
|
dbName = os.path.join(options.dbDir,options.fpDbName)
|
|
fpTableName = options.fpTableName
|
|
if not options.fpColName:
|
|
options.fpColName='rdkfp'
|
|
fpColName = options.fpColName
|
|
elif options.similarityType=='Pharm2D':
|
|
fpBuilder=FingerprintUtils.BuildPharm2DFP
|
|
simMetric=DataStructs.DiceSimilarity
|
|
dbName = os.path.join(options.dbDir,options.fpDbName)
|
|
fpTableName = options.pharm2DTableName
|
|
if not options.fpColName:
|
|
options.fpColName='pharm2dfp'
|
|
fpColName = options.fpColName
|
|
FingerprintUtils.sigFactory = BuildSigFactory(options)
|
|
elif options.similarityType=='Gobbi2D':
|
|
from rdkit.Chem.Pharm2D import Gobbi_Pharm2D
|
|
fpBuilder=FingerprintUtils.BuildPharm2DFP
|
|
simMetric=DataStructs.TanimotoSimilarity
|
|
dbName = os.path.join(options.dbDir,options.fpDbName)
|
|
fpTableName = options.gobbi2DTableName
|
|
if not options.fpColName:
|
|
options.fpColName='gobbi2dfp'
|
|
fpColName = options.fpColName
|
|
FingerprintUtils.sigFactory = Gobbi_Pharm2D.factory
|
|
elif options.similarityType=='Morgan':
|
|
fpBuilder=FingerprintUtils.BuildMorganFP
|
|
simMetric=DataStructs.DiceSimilarity
|
|
dbName = os.path.join(options.dbDir,options.morganFpDbName)
|
|
fpTableName = options.morganFpTableName
|
|
fpColName = options.morganFpColName
|
|
|
|
|
|
extraArgs={}
|
|
if options.similarityMetric=='tanimoto':
|
|
simMetric = DataStructs.TanimotoSimilarity
|
|
elif options.similarityMetric=='dice':
|
|
simMetric = DataStructs.DiceSimilarity
|
|
elif options.similarityMetric=='tversky':
|
|
simMetric = DataStructs.TverskySimilarity
|
|
extraArgs['tverskyA']=options.tverskyA
|
|
extraArgs['tverskyB']=options.tverskyB
|
|
|
|
if options.smilesQuery:
|
|
mol=Chem.MolFromSmiles(options.smilesQuery)
|
|
if not mol:
|
|
logger.error('could not build query molecule from smiles "%s"'%options.smilesQuery)
|
|
sys.exit(-1)
|
|
options.queryMol = mol
|
|
elif options.smartsQuery:
|
|
mol=Chem.MolFromSmarts(options.smartsQuery)
|
|
if not mol:
|
|
logger.error('could not build query molecule from smarts "%s"'%options.smartsQuery)
|
|
sys.exit(-1)
|
|
options.queryMol = mol
|
|
|
|
if options.outF=='-':
|
|
outF=sys.stdout
|
|
elif options.outF=='':
|
|
outF=None
|
|
else:
|
|
outF = open(options.outF,'w+')
|
|
|
|
molsOut=False
|
|
if options.sdfOut:
|
|
molsOut=True
|
|
if options.sdfOut=='-':
|
|
sdfOut=sys.stdout
|
|
else:
|
|
sdfOut = open(options.sdfOut,'w+')
|
|
else:
|
|
sdfOut=None
|
|
if options.smilesOut:
|
|
molsOut=True
|
|
if options.smilesOut=='-':
|
|
smilesOut=sys.stdout
|
|
else:
|
|
smilesOut = open(options.smilesOut,'w+')
|
|
else:
|
|
smilesOut=None
|
|
|
|
if queryFilename:
|
|
try:
|
|
tmpF = open(queryFilename,'r')
|
|
except IOError:
|
|
logger.error('could not open query file %s'%queryFilename)
|
|
sys.exit(1)
|
|
|
|
if options.molFormat=='smiles':
|
|
func=GetMolsFromSmilesFile
|
|
elif options.molFormat=='sdf':
|
|
func=GetMolsFromSDFile
|
|
|
|
if not options.silent:
|
|
msg='Reading query molecules'
|
|
if fpBuilder: msg+=' and generating fingerprints'
|
|
logger.info(msg)
|
|
probes=[]
|
|
i=0
|
|
nms=[]
|
|
for nm,smi,mol in func(queryFilename,None,options.nameProp):
|
|
i+=1
|
|
nms.append(nm)
|
|
if not mol:
|
|
logger.error('query molecule %d could not be built'%(i))
|
|
probes.append((None,None))
|
|
continue
|
|
if fpBuilder:
|
|
probes.append((mol,fpBuilder(mol)))
|
|
else:
|
|
probes.append((mol,None))
|
|
if not options.silent and not i%1000:
|
|
logger.info(" done %d"%i)
|
|
else:
|
|
probes=None
|
|
|
|
conn=None
|
|
idName = options.molIdName
|
|
ids=None
|
|
names=None
|
|
molDbName = os.path.join(options.dbDir,options.molDbName)
|
|
molIdName = options.molIdName
|
|
mConn = DbConnect(molDbName)
|
|
cns = [(x.lower(),y) for x,y in mConn.GetColumnNamesAndTypes('molecules')]
|
|
idCol,idTyp=cns[0]
|
|
if options.propQuery or options.queryMol:
|
|
conn = DbConnect(molDbName)
|
|
curs = conn.GetCursor()
|
|
if options.queryMol:
|
|
if not options.silent: logger.info('Doing substructure query')
|
|
if options.propQuery:
|
|
where='where %s'%options.propQuery
|
|
else:
|
|
where=''
|
|
if not options.silent:
|
|
curs.execute('select count(*) from molecules %(where)s'%locals())
|
|
nToDo = curs.fetchone()[0]
|
|
|
|
join=''
|
|
doSubstructFPs=False
|
|
fpDbName = os.path.join(options.dbDir,options.fpDbName)
|
|
if os.path.exists(fpDbName) and not options.negateQuery :
|
|
curs.execute("attach database '%s' as fpdb"%(fpDbName))
|
|
try:
|
|
curs.execute('select * from fpdb.%s limit 1'%options.layeredTableName)
|
|
except:
|
|
pass
|
|
else:
|
|
doSubstructFPs=True
|
|
join = 'join fpdb.%s using (%s)'%(options.layeredTableName,idCol)
|
|
query = LayeredOptions.GetQueryText(options.queryMol)
|
|
if query:
|
|
if not where:
|
|
where='where'
|
|
else:
|
|
where += ' and'
|
|
where += ' '+query
|
|
|
|
cmd = 'select %(idCol)s,molpkl from molecules %(join)s %(where)s'%locals()
|
|
curs.execute(cmd)
|
|
row=curs.fetchone()
|
|
nDone=0
|
|
ids=[]
|
|
while row:
|
|
id,molpkl = row
|
|
if not options.zipMols:
|
|
m = _molFromPkl(molpkl)
|
|
else:
|
|
m = Chem.Mol(zlib.decompress(molpkl))
|
|
matched=m.HasSubstructMatch(options.queryMol)
|
|
if options.negateQuery:
|
|
matched = not matched
|
|
if matched:
|
|
ids.append(id)
|
|
nDone+=1
|
|
if not options.silent and not nDone%500:
|
|
if not doSubstructFPs:
|
|
logger.info(' searched %d (of %d) molecules; %d hits so far'%(nDone,nToDo,len(ids)))
|
|
else:
|
|
logger.info(' searched through %d molecules; %d hits so far'%(nDone,len(ids)))
|
|
row=curs.fetchone()
|
|
if not options.silent and doSubstructFPs and nToDo:
|
|
nFiltered = nToDo-nDone
|
|
logger.info(' Fingerprint screenout rate: %d of %d (%%%.2f)'%(nFiltered,nToDo,100.*nFiltered/nToDo))
|
|
|
|
elif options.propQuery:
|
|
if not options.silent: logger.info('Doing property query')
|
|
propQuery=options.propQuery.split(';')[0]
|
|
curs.execute('select %(idCol)s from molecules where %(propQuery)s'%locals())
|
|
ids = [x[0] for x in curs.fetchall()]
|
|
if not options.silent:
|
|
logger.info('Found %d molecules matching the query'%(len(ids)))
|
|
|
|
t1=time.time()
|
|
if probes:
|
|
if not options.silent: logger.info('Finding Neighbors')
|
|
conn = DbConnect(dbName)
|
|
cns = conn.GetColumnNames(fpTableName)
|
|
curs = conn.GetCursor()
|
|
|
|
if ids:
|
|
ids = [(x,) for x in ids]
|
|
curs.execute('create temporary table _tmpTbl (%(idCol)s %(idTyp)s)'%locals())
|
|
curs.executemany('insert into _tmpTbl values (?)',ids)
|
|
join='join _tmpTbl using (%(idCol)s)'%locals()
|
|
else:
|
|
join=''
|
|
|
|
if cns[0].lower() != idCol.lower():
|
|
# backwards compatibility to the days when mol tables had a guid and
|
|
# the fps tables did not:
|
|
curs.execute("attach database '%(molDbName)s' as mols"%locals())
|
|
curs.execute("""
|
|
select %(idCol)s,%(fpColName)s from %(fpTableName)s join
|
|
(select %(idCol)s,%(molIdName)s from mols.molecules %(join)s)
|
|
using (%(molIdName)s)
|
|
"""%(locals()))
|
|
else:
|
|
curs.execute('select %(idCol)s,%(fpColName)s from %(fpTableName)s %(join)s'%locals())
|
|
def poolFromCurs(curs,similarityMethod):
|
|
row = curs.fetchone()
|
|
while row:
|
|
id,pkl = row
|
|
fp = DepickleFP(pkl,similarityMethod)
|
|
yield (id,fp)
|
|
row = curs.fetchone()
|
|
topNLists = GetNeighborLists(probes,options.topN,poolFromCurs(curs,options.similarityType),
|
|
simMetric=simMetric,simThresh=options.simThresh,**extraArgs)
|
|
uniqIds=set()
|
|
nbrLists = {}
|
|
for i,nm in enumerate(nms):
|
|
topNLists[i].reverse()
|
|
scores=topNLists[i].GetPts()
|
|
nbrNames = topNLists[i].GetExtras()
|
|
nbrs = []
|
|
for j,nbrGuid in enumerate(nbrNames):
|
|
if nbrGuid is None:
|
|
break
|
|
else:
|
|
uniqIds.add(nbrGuid)
|
|
nbrs.append((nbrGuid,scores[j]))
|
|
nbrLists[(i,nm)] = nbrs
|
|
t2=time.time()
|
|
if not options.silent: logger.info('The search took %.1f seconds'%(t2-t1))
|
|
|
|
if not options.silent: logger.info('Creating output')
|
|
|
|
|
|
curs = mConn.GetCursor()
|
|
ids = list(uniqIds)
|
|
|
|
ids = [(x,) for x in ids]
|
|
curs.execute('create temporary table _tmpTbl (%(idCol)s %(idTyp)s)'%locals())
|
|
curs.executemany('insert into _tmpTbl values (?)',ids)
|
|
curs.execute('select %(idCol)s,%(molIdName)s from molecules join _tmpTbl using (%(idCol)s)'%locals())
|
|
nmDict={}
|
|
for guid,id in curs.fetchall():
|
|
nmDict[guid]=str(id)
|
|
|
|
ks = list(nbrLists.keys())
|
|
ks.sort()
|
|
if not options.transpose:
|
|
for i,nm in ks:
|
|
nbrs= nbrLists[(i,nm)]
|
|
nbrTxt=options.outputDelim.join([nm]+['%s%s%.3f'%(nmDict[id],options.outputDelim,score) for id,score in nbrs])
|
|
if outF: print(nbrTxt,file=outF)
|
|
else:
|
|
labels = ['%s%sSimilarity'%(x[1],options.outputDelim) for x in ks]
|
|
if outF: print(options.outputDelim.join(labels),file=outF)
|
|
for i in range(options.topN):
|
|
outL = []
|
|
for idx,nm in ks:
|
|
nbr = nbrLists[(idx,nm)][i]
|
|
outL.append(nmDict[nbr[0]])
|
|
outL.append('%.3f'%nbr[1])
|
|
if outF: print(options.outputDelim.join(outL),file=outF)
|
|
else:
|
|
if not options.silent: logger.info('Creating output')
|
|
curs = mConn.GetCursor()
|
|
ids = [(x,) for x in set(ids)]
|
|
curs.execute('create temporary table _tmpTbl (%(idCol)s %(idTyp)s)'%locals())
|
|
curs.executemany('insert into _tmpTbl values (?)',ids)
|
|
molIdName = options.molIdName
|
|
curs.execute('select %(idCol)s,%(molIdName)s from molecules join _tmpTbl using (%(idCol)s)'%locals())
|
|
nmDict={}
|
|
for guid,id in curs.fetchall():
|
|
nmDict[guid]=str(id)
|
|
if outF: print('\n'.join(nmDict.values()),file=outF)
|
|
if molsOut and ids:
|
|
molDbName = os.path.join(options.dbDir,options.molDbName)
|
|
cns = [x.lower() for x in mConn.GetColumnNames('molecules')]
|
|
if cns[-1]!='molpkl':
|
|
cns.remove('molpkl')
|
|
cns.append('molpkl')
|
|
|
|
curs = mConn.GetCursor()
|
|
#curs.execute('create temporary table _tmpTbl (guid integer)'%locals())
|
|
#curs.executemany('insert into _tmpTbl values (?)',ids)
|
|
cnText=','.join(cns)
|
|
curs.execute('select %(cnText)s from molecules join _tmpTbl using (%(idCol)s)'%locals())
|
|
|
|
row=curs.fetchone()
|
|
molD = {}
|
|
while row:
|
|
row = list(row)
|
|
m = _molFromPkl(row[-1])
|
|
guid = row[0]
|
|
nm = nmDict[guid]
|
|
if sdfOut:
|
|
m.SetProp('_Name',nm)
|
|
print(Chem.MolToMolBlock(m),file=sdfOut)
|
|
for i in range(1,len(cns)-1):
|
|
pn = cns[i]
|
|
pv = str(row[i])
|
|
print >>sdfOut,'> <%s>\n%s\n'%(pn,pv)
|
|
print('$$$$',file=sdfOut)
|
|
if smilesOut:
|
|
smi=Chem.MolToSmiles(m,options.chiralSmiles)
|
|
if smilesOut:
|
|
print('%s %s'%(smi,str(row[1])),file=smilesOut)
|
|
row=curs.fetchone()
|
|
if not options.silent: logger.info('Done!')
|
|
|
|
# ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ----
|
|
import os
|
|
from optparse import OptionParser
|
|
parser=OptionParser(_usage,version='%prog '+_version)
|
|
parser.add_option('--dbDir',default='/db/camm/CURRENT/rdk_db',
|
|
help='name of the directory containing the database information. The default is %default')
|
|
parser.add_option('--molDbName',default='Compounds.sqlt',
|
|
help='name of the molecule database')
|
|
parser.add_option('--molIdName',default='compound_id',
|
|
help='name of the database key column')
|
|
parser.add_option('--regName',default='molecules',
|
|
help='name of the molecular registry table')
|
|
parser.add_option('--pairDbName',default='AtomPairs.sqlt',
|
|
help='name of the atom pairs database')
|
|
parser.add_option('--pairTableName',default='atompairs',
|
|
help='name of the atom pairs table')
|
|
parser.add_option('--pairColName',default='atompairfp',
|
|
help='name of the atom pair column')
|
|
parser.add_option('--torsionsDbName',default='AtomPairs.sqlt',
|
|
help='name of the topological torsions database (usually the same as the atom pairs database)')
|
|
parser.add_option('--torsionsTableName',default='atompairs',
|
|
help='name of the topological torsions table (usually the same as the atom pairs table)')
|
|
parser.add_option('--torsionsColName',default='torsionfp',
|
|
help='name of the atom pair column')
|
|
parser.add_option('--fpDbName',default='Fingerprints.sqlt',
|
|
help='name of the 2D fingerprints database')
|
|
parser.add_option('--fpTableName',default='rdkitfps',
|
|
help='name of the 2D fingerprints table')
|
|
parser.add_option('--layeredTableName',default='layeredfps',
|
|
help='name of the layered fingerprints table')
|
|
parser.add_option('--fpColName',default='',
|
|
help='name of the 2D fingerprint column, a sensible default is used')
|
|
parser.add_option('--descrDbName',default='Descriptors.sqlt',
|
|
help='name of the descriptor database')
|
|
parser.add_option('--descrTableName',default='descriptors_v1',
|
|
help='name of the descriptor table')
|
|
parser.add_option('--descriptorCalcFilename',default=os.path.join(RDConfig.RDBaseDir,'Projects',
|
|
'DbCLI','moe_like.dsc'),
|
|
help='name of the file containing the descriptor calculator')
|
|
parser.add_option('--outputDelim',default=',',
|
|
help='the delimiter for the output file. The default is %default')
|
|
parser.add_option('--topN',default=20,type='int',
|
|
help='the number of neighbors to keep for each query compound. The default is %default')
|
|
|
|
parser.add_option('--outF','--outFile',default='-',
|
|
help='The name of the output file. The default is the console (stdout).')
|
|
|
|
parser.add_option('--transpose',default=False,action="store_true",
|
|
help='print the results out in a transposed form: e.g. neighbors in rows and probe compounds in columns')
|
|
|
|
parser.add_option('--molFormat',default='sdf',choices=('smiles','sdf'),
|
|
help='specify the format of the input file')
|
|
parser.add_option('--nameProp',default='_Name',
|
|
help='specify the SD property to be used for the molecule names. Default is to use the mol block name')
|
|
|
|
parser.add_option('--smartsQuery','--smarts','--sma',default='',
|
|
help='provide a SMARTS to be used as a substructure query')
|
|
parser.add_option('--smilesQuery','--smiles','--smi',default='',
|
|
help='provide a SMILES to be used as a substructure query')
|
|
parser.add_option('--negateQuery','--negate',default=False,action='store_true',
|
|
help='negate the results of the smarts query.')
|
|
parser.add_option('--propQuery','--query','-q',default='',
|
|
help='provide a property query (see the NOTE about property names)')
|
|
|
|
parser.add_option('--sdfOut','--sdOut',default='',
|
|
help='export an SD file with the matching molecules')
|
|
parser.add_option('--smilesOut','--smiOut',default='',
|
|
help='export a smiles file with the matching molecules')
|
|
parser.add_option('--nonchiralSmiles',dest='chiralSmiles',default=True,action='store_false',
|
|
help='do not use chiral SMILES in the output')
|
|
parser.add_option('--silent',default=False,action='store_true',
|
|
help='Do not generate status messages.')
|
|
|
|
parser.add_option('--zipMols','--zip',default=False,action='store_true',
|
|
help='read compressed mols from the database')
|
|
|
|
parser.add_option('--pharm2DTableName',default='pharm2dfps',
|
|
help='name of the Pharm2D fingerprints table')
|
|
parser.add_option('--fdefFile','--fdef',
|
|
default=os.path.join(RDConfig.RDDataDir,'Novartis1.fdef'),
|
|
help='provide the name of the fdef file to use for 2d pharmacophores')
|
|
parser.add_option('--gobbi2DTableName',default='gobbi2dfps',
|
|
help='name of the Gobbi2D fingerprints table')
|
|
|
|
parser.add_option('--similarityType','--simType','--sim',
|
|
default='RDK',choices=supportedSimilarityMethods,
|
|
help='Choose the type of similarity to use, possible values: RDK, AtomPairs, TopologicalTorsions, Pharm2D, Gobbi2D, Avalon, Morgan. The default is %default')
|
|
parser.add_option('--morganFpDbName',default='Fingerprints.sqlt',
|
|
help='name of the morgan fingerprints database')
|
|
parser.add_option('--morganFpTableName',default='morganfps',
|
|
help='name of the morgan fingerprints table')
|
|
parser.add_option('--morganFpColName',default='morganfp',
|
|
help='name of the morgan fingerprint column')
|
|
|
|
parser.add_option('--similarityMetric','--simMetric','--metric',
|
|
default='',choices=('tanimoto','dice','tversky',''),
|
|
help='Choose the type of similarity to use, possible values: tanimoto, dice, tversky. The default is determined by the fingerprint type')
|
|
parser.add_option('--tverskyA',default=0.5,type='float',
|
|
help='Tversky A value')
|
|
parser.add_option('--tverskyB',default=0.5,type='float',
|
|
help='Tversky B value')
|
|
parser.add_option('--simThresh',default=-1,type='float',
|
|
help='threshold to use for similarity searching. If provided, this supersedes the topN argument')
|
|
|
|
if __name__=='__main__':
|
|
import sys,getopt,time
|
|
|
|
options,args = parser.parse_args()
|
|
if len(args)!=1 and not (options.smilesQuery or options.smartsQuery or options.propQuery):
|
|
parser.error('please either provide a query filename argument or do a data or smarts query')
|
|
|
|
if len(args):
|
|
queryFilename=args[0]
|
|
else:
|
|
queryFilename=None
|
|
options.queryMol=None
|
|
RunSearch(options,queryFilename)
|