Refactor FeatFinderCLI and add unittests (#1299)

* 1194: Review assignments of range in Python code 

Task-Url: https://github.com/rdkit/rdkit/issues/1194
Either wrapped the range expression into a list or made sure that the
code is working with a range object.

* Refactored FeatFinderCLI script

* Add unit test file for CLI apps in rdkit.Chem

Add tests for FeatFinderCLI

* Renamed the unit test file

* Slight improvement to test coverage

* Address comments from review

* Add script to Scripts folder #1305
This commit is contained in:
gedeck
2017-02-10 02:03:59 -05:00
committed by Greg Landrum
parent 6152165ad3
commit 8a5403f176
5 changed files with 158 additions and 76 deletions

9
Scripts/FeatFinderCLI.py Normal file
View File

@@ -0,0 +1,9 @@
#!python
'''
FeatFinderCLI reads molecules as SMILES from the first column of a tab, comma or space
separated file and annotates the atoms of the molecules with their pharmacophore property.
Use 'FeatFinderCLI.py --help' for further information
'''
from rdkit.Chem import FeatFinderCLI
FeatFinderCLI.main()

7
Scripts/README.md Normal file
View File

@@ -0,0 +1,7 @@
# RDKit scripts
This folder contains a number of scripts that make use of RDKit functionality.
## Pharmacophores
### FeatFinderCLI
`FeatFinderCLI` reads molecules as SMILES from the first column of a tab, comma or space
separated file and annotates the atoms of the molecules with their pharmacophore property.

View File

@@ -1,4 +1,3 @@
# $Id$
#
# Copyright (C) 2005-2006 Rational Discovery LLC
#
@@ -10,25 +9,15 @@
#
from __future__ import print_function
_version = "$Rev$"
_splashMessage = """
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
FeatFinderCLI version %s
Copyright (C) 2005 Rational Discovery LLC
This software is copyrighted. The software may not be copied,
reproduced, translated or reduced to any electronic medium or
machine-readable form without the prior written consent of
Rational Discovery LLC.
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
""" % _version
from rdkit import Chem
from rdkit.Chem import ChemicalFeatures
from rdkit import RDLogger
logger = RDLogger.logger()
import sys, os
import argparse
import re
import os
from rdkit import Chem
from rdkit import RDLogger
from rdkit.Chem import ChemicalFeatures
logger = RDLogger.logger()
splitExpr = re.compile(r'[ \t,]')
@@ -37,68 +26,67 @@ def GetAtomFeatInfo(factory, mol):
feats = factory.GetFeaturesForMol(mol)
for feat in feats:
ids = feat.GetAtomIds()
for id in ids:
if res[id] is None:
res[id] = []
res[id].append("%s-%s" % (feat.GetFamily(), feat.GetType()))
feature = "%s-%s" % (feat.GetFamily(), feat.GetType())
for id_ in ids:
if res[id_] is None:
res[id_] = []
res[id_].append(feature)
return res
if __name__ == '__main__':
def initParser():
""" Initialize the parser """
parser = argparse.ArgumentParser(description='Determine pharmacophore features of molecules',
epilog=_splashMessage,
formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('-r', dest='reverseIt', default=False, action='store_true',
help='Set to get atoms lists for each feature.')
parser.add_argument('-n', dest='maxLines', default=-1, help=argparse.SUPPRESS, type=int)
parser.add_argument('fdefFilename', type=existingFile,
help='Pharmacophore feature definition file')
parser.add_argument('smilesFilename', type=existingFile,
help='The smiles file should have SMILES in the first column')
return parser
def Usage():
message = """
Usage: FeatFinderCLI [-r] <fdefFilename> <smilesFilename>
NOTE:
- the smiles file should have SMILES in the first column
"""
print(message, file=sys.stderr)
_splashMessage = """
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
FeatFinderCLI
Part of the RDKit (http://www.rdkit.org)
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
"""
import getopt
args, extras = getopt.getopt(sys.argv[1:], 'r')
reverseIt = False
for arg, val in args:
if arg == '-r':
reverseIt = True
if len(extras) < 2:
Usage()
sys.exit(-1)
print(_splashMessage, file=sys.stderr)
fdefFilename = extras[0]
if not os.path.exists(fdefFilename):
logger.error("Fdef file %s does not exist." % fdefFilename)
sys.exit(-1)
def existingFile(filename):
""" 'type' for argparse - check that filename exists """
if not os.path.exists(filename):
raise argparse.ArgumentTypeError("{0} does not exist".format(filename))
return filename
def processArgs(args, parser):
try:
factory = ChemicalFeatures.BuildFeatureFactory(fdefFilename)
factory = ChemicalFeatures.BuildFeatureFactory(args.fdefFilename)
except Exception:
logger.error("Could not parse Fdef file %s." % fdefFilename, exc_info=True)
sys.exit(-1)
parser.error("Could not parse Fdef file {0.fdefFilename}.".format(args))
smilesFilename = extras[1]
if not os.path.exists(smilesFilename):
logger.error("Smiles file %s does not exist." % smilesFilename)
sys.exit(-1)
with open(args.smilesFilename) as inF:
for lineNo, line in enumerate(inF, 1):
if lineNo == args.maxLines + 1:
break
smi = splitExpr.split(line.strip())[0].strip()
mol = Chem.MolFromSmiles(smi)
if mol is None:
logger.warning("Could not process smiles '%s' on line %d." % (smi, lineNo))
continue
try:
inF = file(smilesFilename, 'r')
except Exception:
logger.error("Could not open smiles file %s." % smilesFilename, exc_info=True)
sys.exit(-1)
lineNo = 0
for line in inF.readlines():
lineNo += 1
line = line.strip()
smi = splitExpr.split(line)[0].strip()
mol = Chem.MolFromSmiles(smi)
if mol is not None:
print('Mol-%d\t%s' % (lineNo, smi))
if not reverseIt:
if args.reverseIt:
feats = factory.GetFeaturesForMol(mol)
for feat in feats:
print('\t%s-%s: ' % (feat.GetFamily(), feat.GetType()), end='')
print(', '.join([str(x) for x in feat.GetAtomIds()]))
else:
featInfo = GetAtomFeatInfo(factory, mol)
for i, v in enumerate(featInfo):
print('\t% 2s(%d)' % (mol.GetAtomWithIdx(i).GetSymbol(), i + 1), end='')
@@ -106,10 +94,14 @@ if __name__ == '__main__':
print('\t', ', '.join(v))
else:
print()
else:
feats = factory.GetFeaturesForMol(mol)
for feat in feats:
print('\t%s-%s: ' % (feat.GetFamily(), feat.GetType()), end='')
print(', '.join([str(x) for x in feat.GetAtomIds()]))
else:
logger.warning("Could not process smiles '%s' on line %d." % (smi, lineNo))
def main():
""" Main application """
parser = initParser()
args = parser.parse_args()
processArgs(args, parser)
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,73 @@
from contextlib import contextmanager
import os
import sys
import unittest
from rdkit import RDConfig
from rdkit.Chem import FeatFinderCLI
from rdkit.six.moves import cStringIO as StringIO
class TestCase(unittest.TestCase):
def test_FeatFinderCLI(self):
smilesFile = os.path.join(RDConfig.RDDataDir, 'NCI', 'first_5K.smi')
featureFile = os.path.join(RDConfig.RDCodeDir, 'Chem', 'Pharm2D', 'test_data',
'BaseFeatures.fdef')
parser = FeatFinderCLI.initParser()
cmd = '-n 10 {0} {1}'.format(featureFile, smilesFile)
with outputRedirect() as (out, err):
args = parser.parse_args(cmd.split())
FeatFinderCLI.processArgs(args, parser)
out = out.getvalue()
err = err.getvalue()
self.assertIn('Mol-1', out)
self.assertIn('Acceptor-SingleAtomAcceptor', out)
self.assertIn('C(1)', out)
self.assertNotIn('Mol-11', out)
self.assertEqual(err, '')
cmd = '-n 2 -r {0} {1}'.format(featureFile, smilesFile)
with outputRedirect() as (out, err):
args = parser.parse_args(cmd.split())
FeatFinderCLI.processArgs(args, parser)
out = out.getvalue()
err = err.getvalue()
self.assertIn('Mol-1', out)
self.assertIn('Acceptor-SingleAtomAcceptor:', out)
self.assertIn('2, 3, 4', out)
self.assertNotIn('Mol-3', out)
self.assertEqual(err, '')
def test_FeatFinderCLIexceptions(self):
smilesFile = os.path.join(RDConfig.RDDataDir, 'NCI', 'first_5K.smi')
featureFile = os.path.join(RDConfig.RDCodeDir, 'Chem', 'Pharm2D', 'test_data',
'BaseFeatures.fdef')
parser = FeatFinderCLI.initParser()
cmd = '-n 10 {0} {1}'.format(smilesFile, smilesFile)
with self.assertRaises(SystemExit), outputRedirect() as (_, err):
args = parser.parse_args(cmd.split())
FeatFinderCLI.processArgs(args, parser)
self.assertIn('error', err.getvalue())
cmd = '-n 10 {0} {1}'.format(featureFile, 'incorrectFilename')
with self.assertRaises(SystemExit), outputRedirect() as (_, err):
args = parser.parse_args(cmd.split())
FeatFinderCLI.processArgs(args, parser)
self.assertIn('error', err.getvalue())
@contextmanager
def outputRedirect():
""" Redirect standard output and error to String IO and return """
try:
_stdout, _stderr = sys.stdout, sys.stderr
sys.stdout = sStdout = StringIO()
sys.stderr = sStderr = StringIO()
yield (sStdout, sStderr)
finally:
sys.stdout, sys.stderr = _stdout, _stderr
if __name__ == '__main__': # pragma: nocover
unittest.main()

View File

@@ -22,6 +22,7 @@ tests = [
("python", "UnitTestPandasTools.py", {}),
("python", "UnitTestPeriodicTable.py", {}),
("python", "UnitTestDocTestsChem.py", {}),
("python", "UnitTestFeatFinderCLI.py", {}),
("python", "test_list.py", {'dir': 'AtomPairs'}),
("python", "test_list.py", {'dir': 'ChemUtils'}),
("python", "test_list.py", {'dir': 'EState'}),