mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-04 21:54:27 +08:00
* remove all of the "from __future__" imports * remove the first batch of rdkit.six imports/uses * next step of rdkit.six removal * removing xrange, range, and some maps * next round of removals * next round of cleanups * fix inchi test * last bits of "from rdkit.six" are gone * and the last of the six stuff is gone * strange importlib problem
632 lines
24 KiB
Python
632 lines
24 KiB
Python
# $Id$
|
|
#
|
|
# Copyright (c) 2007-2013, Novartis Institutes for BioMedical Research Inc.
|
|
# All rights reserved.
|
|
#
|
|
# Redistribution and use in source and binary forms, with or without
|
|
# modification, are permitted provided that the following conditions are
|
|
# met:
|
|
#
|
|
# * Redistributions of source code must retain the above copyright
|
|
# notice, this list of conditions and the following disclaimer.
|
|
# * Redistributions in binary form must reproduce the above
|
|
# copyright notice, this list of conditions and the following
|
|
# disclaimer in the documentation and/or other materials provided
|
|
# with the distribution.
|
|
# * Neither the name of Novartis Institutes for BioMedical Research Inc.
|
|
# nor the names of its contributors may be used to endorse or promote
|
|
# products derived from this software without specific prior written permission.
|
|
#
|
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
#
|
|
# Created by Greg Landrum, July 2007
|
|
#
|
|
|
|
_version = "0.14.0"
|
|
_description = """
|
|
The sd filename argument can be either an SD file or an MDL mol
|
|
file.
|
|
|
|
|
|
NOTES:
|
|
|
|
- The property names may have been altered on loading the
|
|
database. Any non-alphanumeric character in a property name
|
|
will be replaced with '_'. e.g."Gold.Goldscore.Constraint.Score" becomes
|
|
"Gold_Goldscore_Constraint_Score".
|
|
|
|
- Property names are not case sensitive in the database.
|
|
|
|
"""
|
|
import os
|
|
import argparse
|
|
import sys, time
|
|
|
|
from rdkit import RDConfig
|
|
from rdkit.Dbase.DbConnection import DbConnect
|
|
|
|
from rdkit.RDLogger import logger
|
|
logger = logger()
|
|
import zlib
|
|
from rdkit import Chem
|
|
|
|
from rdkit.Chem.MolDb.FingerprintUtils import supportedSimilarityMethods, BuildSigFactory, DepickleFP, LayeredOptions
|
|
from rdkit.Chem.MolDb import FingerprintUtils
|
|
|
|
from rdkit import DataStructs
|
|
|
|
|
|
def _molFromPkl(pkl):
|
|
if isinstance(pkl, (bytes, str)):
|
|
mol = Chem.Mol(pkl)
|
|
else:
|
|
mol = Chem.Mol(str(pkl))
|
|
return mol
|
|
|
|
|
|
def GetNeighborLists(probes, topN, pool, simMetric=DataStructs.DiceSimilarity, simThresh=-1.,
|
|
silent=False, **kwargs):
|
|
probeFps = [x[1] for x in probes]
|
|
validProbes = [x for x in range(len(probeFps)) if probeFps[x] is not None]
|
|
validFps = [probeFps[x] for x in validProbes]
|
|
from rdkit.DataStructs.TopNContainer import TopNContainer
|
|
if simThresh <= 0:
|
|
nbrLists = [TopNContainer(topN) for x in range(len(probeFps))]
|
|
else:
|
|
nbrLists = [TopNContainer(-1) for x in range(len(probeFps))]
|
|
|
|
nDone = 0
|
|
for nm, fp in pool:
|
|
nDone += 1
|
|
if not silent and not nDone % 1000:
|
|
logger.info(' searched %d rows' % nDone)
|
|
if (simMetric == DataStructs.DiceSimilarity):
|
|
scores = DataStructs.BulkDiceSimilarity(fp, validFps)
|
|
for i, score in enumerate(scores):
|
|
if score > simThresh:
|
|
nbrLists[validProbes[i]].Insert(score, nm)
|
|
elif (simMetric == DataStructs.TanimotoSimilarity):
|
|
scores = DataStructs.BulkTanimotoSimilarity(fp, validFps)
|
|
for i, score in enumerate(scores):
|
|
if score > simThresh:
|
|
nbrLists[validProbes[i]].Insert(score, nm)
|
|
elif (simMetric == DataStructs.TverskySimilarity):
|
|
av = float(kwargs.get('tverskyA', 0.5))
|
|
bv = float(kwargs.get('tverskyB', 0.5))
|
|
scores = DataStructs.BulkTverskySimilarity(fp, validFps, av, bv)
|
|
for i, score in enumerate(scores):
|
|
if score > simThresh:
|
|
nbrLists[validProbes[i]].Insert(score, nm)
|
|
else:
|
|
for i in range(len(probeFps)):
|
|
pfp = probeFps[i]
|
|
if pfp is not None:
|
|
score = simMetric(probeFps[i], fp)
|
|
if score > simThresh:
|
|
nbrLists[validProbes[i]].Insert(score, nm)
|
|
return nbrLists
|
|
|
|
|
|
def GetMolsFromSmilesFile(dataFilename, errFile, nameProp):
|
|
dataFile = open(dataFilename, 'r')
|
|
for idx, line in enumerate(dataFile):
|
|
try:
|
|
smi, nm = line.strip().split(' ')
|
|
except ValueError:
|
|
continue
|
|
m = Chem.MolFromSmiles(smi)
|
|
if not m:
|
|
if errFile:
|
|
print(idx, nm, smi, file=errFile)
|
|
continue
|
|
yield (nm, smi, m)
|
|
|
|
|
|
def GetMolsFromSDFile(dataFilename, errFile, nameProp):
|
|
suppl = Chem.SDMolSupplier(dataFilename)
|
|
|
|
for idx, m in enumerate(suppl):
|
|
if not m:
|
|
if errFile:
|
|
if hasattr(suppl, 'GetItemText'):
|
|
d = suppl.GetItemText(idx)
|
|
errFile.write(d)
|
|
else:
|
|
logger.warning('full error file support not complete')
|
|
continue
|
|
smi = Chem.MolToSmiles(m, True)
|
|
if m.HasProp(nameProp):
|
|
nm = m.GetProp(nameProp)
|
|
if not nm:
|
|
logger.warning('molecule found with empty name property')
|
|
else:
|
|
nm = 'Mol_%d' % (idx + 1)
|
|
yield nm, smi, m
|
|
|
|
|
|
def RunSearch(options, queryFilename):
|
|
global sigFactory
|
|
if options.similarityType == 'AtomPairs':
|
|
fpBuilder = FingerprintUtils.BuildAtomPairFP
|
|
simMetric = DataStructs.DiceSimilarity
|
|
dbName = os.path.join(options.dbDir, options.pairDbName)
|
|
fpTableName = options.pairTableName
|
|
fpColName = options.pairColName
|
|
elif options.similarityType == 'TopologicalTorsions':
|
|
fpBuilder = FingerprintUtils.BuildTorsionsFP
|
|
simMetric = DataStructs.DiceSimilarity
|
|
dbName = os.path.join(options.dbDir, options.torsionsDbName)
|
|
fpTableName = options.torsionsTableName
|
|
fpColName = options.torsionsColName
|
|
elif options.similarityType == 'RDK':
|
|
fpBuilder = FingerprintUtils.BuildRDKitFP
|
|
simMetric = DataStructs.FingerprintSimilarity
|
|
dbName = os.path.join(options.dbDir, options.fpDbName)
|
|
fpTableName = options.fpTableName
|
|
if not options.fpColName:
|
|
options.fpColName = 'rdkfp'
|
|
fpColName = options.fpColName
|
|
elif options.similarityType == 'Pharm2D':
|
|
fpBuilder = FingerprintUtils.BuildPharm2DFP
|
|
simMetric = DataStructs.DiceSimilarity
|
|
dbName = os.path.join(options.dbDir, options.fpDbName)
|
|
fpTableName = options.pharm2DTableName
|
|
if not options.fpColName:
|
|
options.fpColName = 'pharm2dfp'
|
|
fpColName = options.fpColName
|
|
FingerprintUtils.sigFactory = BuildSigFactory(options)
|
|
elif options.similarityType == 'Gobbi2D':
|
|
from rdkit.Chem.Pharm2D import Gobbi_Pharm2D
|
|
fpBuilder = FingerprintUtils.BuildPharm2DFP
|
|
simMetric = DataStructs.TanimotoSimilarity
|
|
dbName = os.path.join(options.dbDir, options.fpDbName)
|
|
fpTableName = options.gobbi2DTableName
|
|
if not options.fpColName:
|
|
options.fpColName = 'gobbi2dfp'
|
|
fpColName = options.fpColName
|
|
FingerprintUtils.sigFactory = Gobbi_Pharm2D.factory
|
|
elif options.similarityType == 'Morgan':
|
|
fpBuilder = FingerprintUtils.BuildMorganFP
|
|
simMetric = DataStructs.DiceSimilarity
|
|
dbName = os.path.join(options.dbDir, options.morganFpDbName)
|
|
fpTableName = options.morganFpTableName
|
|
fpColName = options.morganFpColName
|
|
|
|
extraArgs = {}
|
|
if options.similarityMetric == 'tanimoto':
|
|
simMetric = DataStructs.TanimotoSimilarity
|
|
elif options.similarityMetric == 'dice':
|
|
simMetric = DataStructs.DiceSimilarity
|
|
elif options.similarityMetric == 'tversky':
|
|
simMetric = DataStructs.TverskySimilarity
|
|
extraArgs['tverskyA'] = options.tverskyA
|
|
extraArgs['tverskyB'] = options.tverskyB
|
|
|
|
if options.smilesQuery:
|
|
mol = Chem.MolFromSmiles(options.smilesQuery)
|
|
if not mol:
|
|
logger.error('could not build query molecule from smiles "%s"' % options.smilesQuery)
|
|
sys.exit(-1)
|
|
options.queryMol = mol
|
|
elif options.smartsQuery:
|
|
mol = Chem.MolFromSmarts(options.smartsQuery)
|
|
if not mol:
|
|
logger.error('could not build query molecule from smarts "%s"' % options.smartsQuery)
|
|
sys.exit(-1)
|
|
options.queryMol = mol
|
|
|
|
if options.outF == '-':
|
|
outF = sys.stdout
|
|
elif options.outF == '':
|
|
outF = None
|
|
else:
|
|
outF = open(options.outF, 'w+')
|
|
|
|
molsOut = False
|
|
if options.sdfOut:
|
|
molsOut = True
|
|
if options.sdfOut == '-':
|
|
sdfOut = sys.stdout
|
|
else:
|
|
sdfOut = open(options.sdfOut, 'w+')
|
|
else:
|
|
sdfOut = None
|
|
if options.smilesOut:
|
|
molsOut = True
|
|
if options.smilesOut == '-':
|
|
smilesOut = sys.stdout
|
|
else:
|
|
smilesOut = open(options.smilesOut, 'w+')
|
|
else:
|
|
smilesOut = None
|
|
|
|
if queryFilename:
|
|
try:
|
|
tmpF = open(queryFilename, 'r')
|
|
except IOError:
|
|
logger.error('could not open query file %s' % queryFilename)
|
|
sys.exit(1)
|
|
|
|
if options.molFormat == 'smiles':
|
|
func = GetMolsFromSmilesFile
|
|
elif options.molFormat == 'sdf':
|
|
func = GetMolsFromSDFile
|
|
|
|
if not options.silent:
|
|
msg = 'Reading query molecules'
|
|
if fpBuilder:
|
|
msg += ' and generating fingerprints'
|
|
logger.info(msg)
|
|
probes = []
|
|
i = 0
|
|
nms = []
|
|
for nm, smi, mol in func(queryFilename, None, options.nameProp):
|
|
i += 1
|
|
nms.append(nm)
|
|
if not mol:
|
|
logger.error('query molecule %d could not be built' % (i))
|
|
probes.append((None, None))
|
|
continue
|
|
if fpBuilder:
|
|
probes.append((mol, fpBuilder(mol)))
|
|
else:
|
|
probes.append((mol, None))
|
|
if not options.silent and not i % 1000:
|
|
logger.info(" done %d" % i)
|
|
else:
|
|
probes = None
|
|
|
|
conn = None
|
|
idName = options.molIdName
|
|
ids = None
|
|
names = None
|
|
molDbName = os.path.join(options.dbDir, options.molDbName)
|
|
molIdName = options.molIdName
|
|
mConn = DbConnect(molDbName)
|
|
cns = [(x.lower(), y) for x, y in mConn.GetColumnNamesAndTypes('molecules')]
|
|
idCol, idTyp = cns[0]
|
|
if options.propQuery or options.queryMol:
|
|
conn = DbConnect(molDbName)
|
|
curs = conn.GetCursor()
|
|
if options.queryMol:
|
|
if not options.silent:
|
|
logger.info('Doing substructure query')
|
|
if options.propQuery:
|
|
where = 'where %s' % options.propQuery
|
|
else:
|
|
where = ''
|
|
if not options.silent:
|
|
curs.execute('select count(*) from molecules %(where)s' % locals())
|
|
nToDo = curs.fetchone()[0]
|
|
|
|
join = ''
|
|
doSubstructFPs = False
|
|
fpDbName = os.path.join(options.dbDir, options.fpDbName)
|
|
if os.path.exists(fpDbName) and not options.negateQuery:
|
|
curs.execute("attach database '%s' as fpdb" % (fpDbName))
|
|
try:
|
|
curs.execute('select * from fpdb.%s limit 1' % options.layeredTableName)
|
|
except Exception:
|
|
pass
|
|
else:
|
|
doSubstructFPs = True
|
|
join = 'join fpdb.%s using (%s)' % (options.layeredTableName, idCol)
|
|
query = LayeredOptions.GetQueryText(options.queryMol)
|
|
if query:
|
|
if not where:
|
|
where = 'where'
|
|
else:
|
|
where += ' and'
|
|
where += ' ' + query
|
|
|
|
cmd = 'select %(idCol)s,molpkl from molecules %(join)s %(where)s' % locals()
|
|
curs.execute(cmd)
|
|
row = curs.fetchone()
|
|
nDone = 0
|
|
ids = []
|
|
while row:
|
|
id, molpkl = row
|
|
if not options.zipMols:
|
|
m = _molFromPkl(molpkl)
|
|
else:
|
|
m = Chem.Mol(zlib.decompress(molpkl))
|
|
matched = m.HasSubstructMatch(options.queryMol)
|
|
if options.negateQuery:
|
|
matched = not matched
|
|
if matched:
|
|
ids.append(id)
|
|
nDone += 1
|
|
if not options.silent and not nDone % 500:
|
|
if not doSubstructFPs:
|
|
logger.info(' searched %d (of %d) molecules; %d hits so far' %
|
|
(nDone, nToDo, len(ids)))
|
|
else:
|
|
logger.info(' searched through %d molecules; %d hits so far' % (nDone, len(ids)))
|
|
row = curs.fetchone()
|
|
if not options.silent and doSubstructFPs and nToDo:
|
|
nFiltered = nToDo - nDone
|
|
logger.info(' Fingerprint screenout rate: %d of %d (%%%.2f)' %
|
|
(nFiltered, nToDo, 100. * nFiltered / nToDo))
|
|
|
|
elif options.propQuery:
|
|
if not options.silent:
|
|
logger.info('Doing property query')
|
|
propQuery = options.propQuery.split(';')[0]
|
|
curs.execute('select %(idCol)s from molecules where %(propQuery)s' % locals())
|
|
ids = [x[0] for x in curs.fetchall()]
|
|
if not options.silent:
|
|
logger.info('Found %d molecules matching the query' % (len(ids)))
|
|
|
|
t1 = time.time()
|
|
if probes:
|
|
if not options.silent:
|
|
logger.info('Finding Neighbors')
|
|
conn = DbConnect(dbName)
|
|
cns = conn.GetColumnNames(fpTableName)
|
|
curs = conn.GetCursor()
|
|
|
|
if ids:
|
|
ids = [(x, ) for x in ids]
|
|
curs.execute('create temporary table _tmpTbl (%(idCol)s %(idTyp)s)' % locals())
|
|
curs.executemany('insert into _tmpTbl values (?)', ids)
|
|
join = 'join _tmpTbl using (%(idCol)s)' % locals()
|
|
else:
|
|
join = ''
|
|
|
|
if cns[0].lower() != idCol.lower():
|
|
# backwards compatibility to the days when mol tables had a guid and
|
|
# the fps tables did not:
|
|
curs.execute("attach database '%(molDbName)s' as mols" % locals())
|
|
curs.execute("""
|
|
select %(idCol)s,%(fpColName)s from %(fpTableName)s join
|
|
(select %(idCol)s,%(molIdName)s from mols.molecules %(join)s)
|
|
using (%(molIdName)s)
|
|
""" % (locals()))
|
|
else:
|
|
curs.execute('select %(idCol)s,%(fpColName)s from %(fpTableName)s %(join)s' % locals())
|
|
|
|
def poolFromCurs(curs, similarityMethod):
|
|
row = curs.fetchone()
|
|
while row:
|
|
id, pkl = row
|
|
fp = DepickleFP(pkl, similarityMethod)
|
|
yield (id, fp)
|
|
row = curs.fetchone()
|
|
|
|
topNLists = GetNeighborLists(probes, options.topN, poolFromCurs(curs, options.similarityType),
|
|
simMetric=simMetric, simThresh=options.simThresh, **extraArgs)
|
|
uniqIds = set()
|
|
nbrLists = {}
|
|
for i, nm in enumerate(nms):
|
|
topNLists[i].reverse()
|
|
scores = topNLists[i].GetPts()
|
|
nbrNames = topNLists[i].GetExtras()
|
|
nbrs = []
|
|
for j, nbrGuid in enumerate(nbrNames):
|
|
if nbrGuid is None:
|
|
break
|
|
else:
|
|
uniqIds.add(nbrGuid)
|
|
nbrs.append((nbrGuid, scores[j]))
|
|
nbrLists[(i, nm)] = nbrs
|
|
t2 = time.time()
|
|
if not options.silent:
|
|
logger.info('The search took %.1f seconds' % (t2 - t1))
|
|
|
|
if not options.silent:
|
|
logger.info('Creating output')
|
|
|
|
curs = mConn.GetCursor()
|
|
ids = list(uniqIds)
|
|
|
|
ids = [(x, ) for x in ids]
|
|
curs.execute('create temporary table _tmpTbl (%(idCol)s %(idTyp)s)' % locals())
|
|
curs.executemany('insert into _tmpTbl values (?)', ids)
|
|
curs.execute('select %(idCol)s,%(molIdName)s from molecules join _tmpTbl using (%(idCol)s)' %
|
|
locals())
|
|
nmDict = {}
|
|
for guid, id in curs.fetchall():
|
|
nmDict[guid] = str(id)
|
|
|
|
ks = list(nbrLists.keys())
|
|
ks.sort()
|
|
if not options.transpose:
|
|
for i, nm in ks:
|
|
nbrs = nbrLists[(i, nm)]
|
|
nbrTxt = options.outputDelim.join([nm] + ['%s%s%.3f' % (nmDict[id], options.outputDelim,
|
|
score) for id, score in nbrs])
|
|
if outF:
|
|
print(nbrTxt, file=outF)
|
|
else:
|
|
labels = ['%s%sSimilarity' % (x[1], options.outputDelim) for x in ks]
|
|
if outF:
|
|
print(options.outputDelim.join(labels), file=outF)
|
|
for i in range(options.topN):
|
|
outL = []
|
|
for idx, nm in ks:
|
|
nbr = nbrLists[(idx, nm)][i]
|
|
outL.append(nmDict[nbr[0]])
|
|
outL.append('%.3f' % nbr[1])
|
|
if outF:
|
|
print(options.outputDelim.join(outL), file=outF)
|
|
else:
|
|
if not options.silent:
|
|
logger.info('Creating output')
|
|
curs = mConn.GetCursor()
|
|
ids = [(x, ) for x in set(ids)]
|
|
curs.execute('create temporary table _tmpTbl (%(idCol)s %(idTyp)s)' % locals())
|
|
curs.executemany('insert into _tmpTbl values (?)', ids)
|
|
molIdName = options.molIdName
|
|
curs.execute('select %(idCol)s,%(molIdName)s from molecules join _tmpTbl using (%(idCol)s)' %
|
|
locals())
|
|
nmDict = {}
|
|
for guid, id in curs.fetchall():
|
|
nmDict[guid] = str(id)
|
|
if outF:
|
|
print('\n'.join(nmDict.values()), file=outF)
|
|
if molsOut and ids:
|
|
molDbName = os.path.join(options.dbDir, options.molDbName)
|
|
cns = [x.lower() for x in mConn.GetColumnNames('molecules')]
|
|
if cns[-1] != 'molpkl':
|
|
cns.remove('molpkl')
|
|
cns.append('molpkl')
|
|
|
|
curs = mConn.GetCursor()
|
|
#curs.execute('create temporary table _tmpTbl (guid integer)'%locals())
|
|
#curs.executemany('insert into _tmpTbl values (?)',ids)
|
|
cnText = ','.join(cns)
|
|
curs.execute('select %(cnText)s from molecules join _tmpTbl using (%(idCol)s)' % locals())
|
|
|
|
row = curs.fetchone()
|
|
molD = {}
|
|
while row:
|
|
row = list(row)
|
|
m = _molFromPkl(row[-1])
|
|
guid = row[0]
|
|
nm = nmDict[guid]
|
|
if sdfOut:
|
|
m.SetProp('_Name', nm)
|
|
print(Chem.MolToMolBlock(m), file=sdfOut)
|
|
for i in range(1, len(cns) - 1):
|
|
pn = cns[i]
|
|
pv = str(row[i])
|
|
print >> sdfOut, '> <%s>\n%s\n' % (pn, pv)
|
|
print('$$$$', file=sdfOut)
|
|
if smilesOut:
|
|
smi = Chem.MolToSmiles(m, options.chiralSmiles)
|
|
if smilesOut:
|
|
print('%s %s' % (smi, str(row[1])), file=smilesOut)
|
|
row = curs.fetchone()
|
|
if not options.silent:
|
|
logger.info('Done!')
|
|
|
|
# ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ----
|
|
|
|
def initParser():
|
|
""" Initialize the command line parser """
|
|
parser = argparse.ArgumentParser(usage='SearchDB [optional arguments] <sdffilename>',
|
|
description=_description,
|
|
formatter_class=argparse.RawDescriptionHelpFormatter)
|
|
|
|
parser.add_argument('filename', nargs='?', help='File containg molecules for searching')
|
|
parser.add_argument('--version', action='version', version='%(prog)s ' + _version)
|
|
|
|
parser.add_argument('--dbDir', default='',
|
|
help='name of the directory containing the database information. The default is the current directory')
|
|
parser.add_argument('--molDbName', default='Compounds.sqlt', help='name of the molecule database')
|
|
parser.add_argument('--molIdName', default='compound_id', help='name of the database key column')
|
|
parser.add_argument('--regName', default='molecules', help='name of the molecular registry table')
|
|
parser.add_argument('--pairDbName', default='AtomPairs.sqlt', help='name of the atom pairs database')
|
|
parser.add_argument('--pairTableName', default='atompairs', help='name of the atom pairs table')
|
|
parser.add_argument('--pairColName', default='atompairfp', help='name of the atom pair column')
|
|
parser.add_argument(
|
|
'--torsionsDbName', default='AtomPairs.sqlt',
|
|
help='name of the topological torsions database (usually the same as the atom pairs database)')
|
|
parser.add_argument(
|
|
'--torsionsTableName', default='atompairs',
|
|
help='name of the topological torsions table (usually the same as the atom pairs table)')
|
|
parser.add_argument('--torsionsColName', default='torsionfp', help='name of the atom pair column')
|
|
parser.add_argument('--fpDbName', default='Fingerprints.sqlt',
|
|
help='name of the 2D fingerprints database')
|
|
parser.add_argument('--fpTableName', default='rdkitfps', help='name of the 2D fingerprints table')
|
|
parser.add_argument('--layeredTableName', default='layeredfps',
|
|
help='name of the layered fingerprints table')
|
|
parser.add_argument('--fpColName', default='',
|
|
help='name of the 2D fingerprint column, a sensible default is used')
|
|
parser.add_argument('--descrDbName', default='Descriptors.sqlt',
|
|
help='name of the descriptor database')
|
|
parser.add_argument('--descrTableName', default='descriptors_v1', help='name of the descriptor table')
|
|
parser.add_argument('--descriptorCalcFilename', default=os.path.join(RDConfig.RDBaseDir, 'Projects',
|
|
'DbCLI', 'moe_like.dsc'),
|
|
help='name of the file containing the descriptor calculator')
|
|
parser.add_argument('--outputDelim', default=',',
|
|
help='the delimiter for the output file. The default is %(default)s')
|
|
parser.add_argument(
|
|
'--topN', default=20, type=int,
|
|
help='the number of neighbors to keep for each query compound. The default is %(default)s')
|
|
|
|
parser.add_argument('--outF', '--outFile', default='-',
|
|
help='The name of the output file. The default is the console (stdout).')
|
|
|
|
parser.add_argument(
|
|
'--transpose', default=False, action="store_true",
|
|
help='print the results out in a transposed form: e.g. neighbors in rows and probe compounds in columns')
|
|
|
|
parser.add_argument('--molFormat', default='sdf', choices=('smiles', 'sdf'),
|
|
help='specify the format of the input file')
|
|
parser.add_argument(
|
|
'--nameProp', default='_Name',
|
|
help='specify the SD property to be used for the molecule names. Default is to use the mol block name')
|
|
|
|
parser.add_argument('--smartsQuery', '--smarts', '--sma', default='',
|
|
help='provide a SMARTS to be used as a substructure query')
|
|
parser.add_argument('--smilesQuery', '--smiles', '--smi', default='',
|
|
help='provide a SMILES to be used as a substructure query')
|
|
parser.add_argument('--negateQuery', '--negate', default=False, action='store_true',
|
|
help='negate the results of the smarts query.')
|
|
parser.add_argument('--propQuery', '--query', '-q', default='',
|
|
help='provide a property query (see the NOTE about property names)')
|
|
|
|
parser.add_argument('--sdfOut', '--sdOut', default='',
|
|
help='export an SD file with the matching molecules')
|
|
parser.add_argument('--smilesOut', '--smiOut', default='',
|
|
help='export a smiles file with the matching molecules')
|
|
parser.add_argument('--nonchiralSmiles', dest='chiralSmiles', default=True, action='store_false',
|
|
help='do not use chiral SMILES in the output')
|
|
parser.add_argument('--silent', default=False, action='store_true',
|
|
help='Do not generate status messages.')
|
|
|
|
parser.add_argument('--zipMols', '--zip', default=False, action='store_true',
|
|
help='read compressed mols from the database')
|
|
|
|
parser.add_argument('--pharm2DTableName', default='pharm2dfps',
|
|
help='name of the Pharm2D fingerprints table')
|
|
parser.add_argument('--fdefFile', '--fdef',
|
|
default=os.path.join(RDConfig.RDDataDir, 'Novartis1.fdef'),
|
|
help='provide the name of the fdef file to use for 2d pharmacophores')
|
|
parser.add_argument('--gobbi2DTableName', default='gobbi2dfps',
|
|
help='name of the Gobbi2D fingerprints table')
|
|
|
|
parser.add_argument(
|
|
'--similarityType', '--simType', '--sim', default='RDK', choices=supportedSimilarityMethods,
|
|
help='Choose the type of similarity to use, possible values: RDK, AtomPairs, TopologicalTorsions, Pharm2D, Gobbi2D, Avalon, Morgan. The default is %(default)s')
|
|
parser.add_argument('--morganFpDbName', default='Fingerprints.sqlt',
|
|
help='name of the morgan fingerprints database')
|
|
parser.add_argument('--morganFpTableName', default='morganfps',
|
|
help='name of the morgan fingerprints table')
|
|
parser.add_argument('--morganFpColName', default='morganfp',
|
|
help='name of the morgan fingerprint column')
|
|
|
|
parser.add_argument(
|
|
'--similarityMetric', '--simMetric', '--metric', default='',
|
|
choices=('tanimoto', 'dice', 'tversky', ''),
|
|
help='Choose the type of similarity to use, possible values: tanimoto, dice, tversky. The default is determined by the fingerprint type')
|
|
parser.add_argument('--tverskyA', default=0.5, type=float, help='Tversky A value')
|
|
parser.add_argument('--tverskyB', default=0.5, type=float, help='Tversky B value')
|
|
parser.add_argument(
|
|
'--simThresh', default=-1, type=float,
|
|
help='threshold to use for similarity searching. If provided, this supersedes the topN argument')
|
|
return parser
|
|
|
|
if __name__ == '__main__':
|
|
|
|
parser = initParser()
|
|
options = parser.parse_args()
|
|
|
|
if options.filename is None and not (options.smilesQuery or options.smartsQuery or options.propQuery):
|
|
parser.error('please either provide a query filename argument or do a data or smarts query')
|
|
|
|
queryFilename = options.filename
|
|
options.queryMol = None
|
|
RunSearch(options, queryFilename)
|