From 8a5403f176efb252dd493fb4d55ca6223a42bf97 Mon Sep 17 00:00:00 2001 From: gedeck Date: Fri, 10 Feb 2017 02:03:59 -0500 Subject: [PATCH] Refactor FeatFinderCLI and add unittests (#1299) * 1194: Review assignments of range in Python code Task-Url: https://github.com/rdkit/rdkit/issues/1194 Either wrapped the range expression into a list or made sure that the code is working with a range object. * Refactored FeatFinderCLI script * Add unit test file for CLI apps in rdkit.Chem Add tests for FeatFinderCLI * Renamed the unit test file * Slight improvement to test coverage * Address comments from review * Add script to Scripts folder #1305 --- Scripts/FeatFinderCLI.py | 9 ++ Scripts/README.md | 7 ++ rdkit/Chem/FeatFinderCLI.py | 144 +++++++++++++--------------- rdkit/Chem/UnitTestFeatFinderCLI.py | 73 ++++++++++++++ rdkit/Chem/test_list.py | 1 + 5 files changed, 158 insertions(+), 76 deletions(-) create mode 100644 Scripts/FeatFinderCLI.py create mode 100644 Scripts/README.md create mode 100644 rdkit/Chem/UnitTestFeatFinderCLI.py diff --git a/Scripts/FeatFinderCLI.py b/Scripts/FeatFinderCLI.py new file mode 100644 index 000000000..1e51996dc --- /dev/null +++ b/Scripts/FeatFinderCLI.py @@ -0,0 +1,9 @@ +#!python +''' +FeatFinderCLI reads molecules as SMILES from the first column of a tab, comma or space +separated file and annotates the atoms of the molecules with their pharmacophore property. + +Use 'FeatFinderCLI.py --help' for further information +''' +from rdkit.Chem import FeatFinderCLI +FeatFinderCLI.main() diff --git a/Scripts/README.md b/Scripts/README.md new file mode 100644 index 000000000..d501fde7b --- /dev/null +++ b/Scripts/README.md @@ -0,0 +1,7 @@ +# RDKit scripts +This folder contains a number of scripts that make use of RDKit functionality. + +## Pharmacophores +### FeatFinderCLI +`FeatFinderCLI` reads molecules as SMILES from the first column of a tab, comma or space +separated file and annotates the atoms of the molecules with their pharmacophore property. \ No newline at end of file diff --git a/rdkit/Chem/FeatFinderCLI.py b/rdkit/Chem/FeatFinderCLI.py index ed545f053..94d052f7d 100644 --- a/rdkit/Chem/FeatFinderCLI.py +++ b/rdkit/Chem/FeatFinderCLI.py @@ -1,4 +1,3 @@ -# $Id$ # # Copyright (C) 2005-2006 Rational Discovery LLC # @@ -10,25 +9,15 @@ # from __future__ import print_function -_version = "$Rev$" -_splashMessage = """ --*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-* - FeatFinderCLI version %s - - Copyright (C) 2005 Rational Discovery LLC - - This software is copyrighted. The software may not be copied, - reproduced, translated or reduced to any electronic medium or - machine-readable form without the prior written consent of - Rational Discovery LLC. --*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-* -""" % _version -from rdkit import Chem -from rdkit.Chem import ChemicalFeatures -from rdkit import RDLogger -logger = RDLogger.logger() -import sys, os +import argparse import re +import os + +from rdkit import Chem +from rdkit import RDLogger +from rdkit.Chem import ChemicalFeatures + +logger = RDLogger.logger() splitExpr = re.compile(r'[ \t,]') @@ -37,68 +26,67 @@ def GetAtomFeatInfo(factory, mol): feats = factory.GetFeaturesForMol(mol) for feat in feats: ids = feat.GetAtomIds() - for id in ids: - if res[id] is None: - res[id] = [] - res[id].append("%s-%s" % (feat.GetFamily(), feat.GetType())) + feature = "%s-%s" % (feat.GetFamily(), feat.GetType()) + for id_ in ids: + if res[id_] is None: + res[id_] = [] + res[id_].append(feature) return res -if __name__ == '__main__': +def initParser(): + """ Initialize the parser """ + parser = argparse.ArgumentParser(description='Determine pharmacophore features of molecules', + epilog=_splashMessage, + formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument('-r', dest='reverseIt', default=False, action='store_true', + help='Set to get atoms lists for each feature.') + parser.add_argument('-n', dest='maxLines', default=-1, help=argparse.SUPPRESS, type=int) + parser.add_argument('fdefFilename', type=existingFile, + help='Pharmacophore feature definition file') + parser.add_argument('smilesFilename', type=existingFile, + help='The smiles file should have SMILES in the first column') + return parser - def Usage(): - message = """ - Usage: FeatFinderCLI [-r] - - NOTE: - - the smiles file should have SMILES in the first column - """ - print(message, file=sys.stderr) +_splashMessage = """ +-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-* + FeatFinderCLI + Part of the RDKit (http://www.rdkit.org) +-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-* +""" - import getopt - args, extras = getopt.getopt(sys.argv[1:], 'r') - reverseIt = False - for arg, val in args: - if arg == '-r': - reverseIt = True - if len(extras) < 2: - Usage() - sys.exit(-1) - print(_splashMessage, file=sys.stderr) - fdefFilename = extras[0] - if not os.path.exists(fdefFilename): - logger.error("Fdef file %s does not exist." % fdefFilename) - sys.exit(-1) +def existingFile(filename): + """ 'type' for argparse - check that filename exists """ + if not os.path.exists(filename): + raise argparse.ArgumentTypeError("{0} does not exist".format(filename)) + return filename + + +def processArgs(args, parser): try: - factory = ChemicalFeatures.BuildFeatureFactory(fdefFilename) + factory = ChemicalFeatures.BuildFeatureFactory(args.fdefFilename) except Exception: - logger.error("Could not parse Fdef file %s." % fdefFilename, exc_info=True) - sys.exit(-1) + parser.error("Could not parse Fdef file {0.fdefFilename}.".format(args)) - smilesFilename = extras[1] - if not os.path.exists(smilesFilename): - logger.error("Smiles file %s does not exist." % smilesFilename) - sys.exit(-1) + with open(args.smilesFilename) as inF: + for lineNo, line in enumerate(inF, 1): + if lineNo == args.maxLines + 1: + break + smi = splitExpr.split(line.strip())[0].strip() + mol = Chem.MolFromSmiles(smi) + if mol is None: + logger.warning("Could not process smiles '%s' on line %d." % (smi, lineNo)) + continue - try: - inF = file(smilesFilename, 'r') - except Exception: - logger.error("Could not open smiles file %s." % smilesFilename, exc_info=True) - sys.exit(-1) - - lineNo = 0 - for line in inF.readlines(): - lineNo += 1 - line = line.strip() - smi = splitExpr.split(line)[0].strip() - mol = Chem.MolFromSmiles(smi) - - if mol is not None: print('Mol-%d\t%s' % (lineNo, smi)) - - if not reverseIt: + if args.reverseIt: + feats = factory.GetFeaturesForMol(mol) + for feat in feats: + print('\t%s-%s: ' % (feat.GetFamily(), feat.GetType()), end='') + print(', '.join([str(x) for x in feat.GetAtomIds()])) + else: featInfo = GetAtomFeatInfo(factory, mol) for i, v in enumerate(featInfo): print('\t% 2s(%d)' % (mol.GetAtomWithIdx(i).GetSymbol(), i + 1), end='') @@ -106,10 +94,14 @@ if __name__ == '__main__': print('\t', ', '.join(v)) else: print() - else: - feats = factory.GetFeaturesForMol(mol) - for feat in feats: - print('\t%s-%s: ' % (feat.GetFamily(), feat.GetType()), end='') - print(', '.join([str(x) for x in feat.GetAtomIds()])) - else: - logger.warning("Could not process smiles '%s' on line %d." % (smi, lineNo)) + + +def main(): + """ Main application """ + parser = initParser() + args = parser.parse_args() + processArgs(args, parser) + + +if __name__ == '__main__': + main() diff --git a/rdkit/Chem/UnitTestFeatFinderCLI.py b/rdkit/Chem/UnitTestFeatFinderCLI.py new file mode 100644 index 000000000..cb1d61b1a --- /dev/null +++ b/rdkit/Chem/UnitTestFeatFinderCLI.py @@ -0,0 +1,73 @@ +from contextlib import contextmanager +import os +import sys +import unittest + +from rdkit import RDConfig +from rdkit.Chem import FeatFinderCLI +from rdkit.six.moves import cStringIO as StringIO + + +class TestCase(unittest.TestCase): + + def test_FeatFinderCLI(self): + smilesFile = os.path.join(RDConfig.RDDataDir, 'NCI', 'first_5K.smi') + featureFile = os.path.join(RDConfig.RDCodeDir, 'Chem', 'Pharm2D', 'test_data', + 'BaseFeatures.fdef') + parser = FeatFinderCLI.initParser() + cmd = '-n 10 {0} {1}'.format(featureFile, smilesFile) + with outputRedirect() as (out, err): + args = parser.parse_args(cmd.split()) + FeatFinderCLI.processArgs(args, parser) + out = out.getvalue() + err = err.getvalue() + self.assertIn('Mol-1', out) + self.assertIn('Acceptor-SingleAtomAcceptor', out) + self.assertIn('C(1)', out) + self.assertNotIn('Mol-11', out) + self.assertEqual(err, '') + + cmd = '-n 2 -r {0} {1}'.format(featureFile, smilesFile) + with outputRedirect() as (out, err): + args = parser.parse_args(cmd.split()) + FeatFinderCLI.processArgs(args, parser) + out = out.getvalue() + err = err.getvalue() + self.assertIn('Mol-1', out) + self.assertIn('Acceptor-SingleAtomAcceptor:', out) + self.assertIn('2, 3, 4', out) + self.assertNotIn('Mol-3', out) + self.assertEqual(err, '') + + def test_FeatFinderCLIexceptions(self): + smilesFile = os.path.join(RDConfig.RDDataDir, 'NCI', 'first_5K.smi') + featureFile = os.path.join(RDConfig.RDCodeDir, 'Chem', 'Pharm2D', 'test_data', + 'BaseFeatures.fdef') + parser = FeatFinderCLI.initParser() + cmd = '-n 10 {0} {1}'.format(smilesFile, smilesFile) + with self.assertRaises(SystemExit), outputRedirect() as (_, err): + args = parser.parse_args(cmd.split()) + FeatFinderCLI.processArgs(args, parser) + self.assertIn('error', err.getvalue()) + + cmd = '-n 10 {0} {1}'.format(featureFile, 'incorrectFilename') + with self.assertRaises(SystemExit), outputRedirect() as (_, err): + args = parser.parse_args(cmd.split()) + FeatFinderCLI.processArgs(args, parser) + self.assertIn('error', err.getvalue()) + + +@contextmanager +def outputRedirect(): + """ Redirect standard output and error to String IO and return """ + try: + _stdout, _stderr = sys.stdout, sys.stderr + sys.stdout = sStdout = StringIO() + sys.stderr = sStderr = StringIO() + yield (sStdout, sStderr) + finally: + sys.stdout, sys.stderr = _stdout, _stderr + + +if __name__ == '__main__': # pragma: nocover + unittest.main() diff --git a/rdkit/Chem/test_list.py b/rdkit/Chem/test_list.py index b5fc55dc0..fb96b31ce 100755 --- a/rdkit/Chem/test_list.py +++ b/rdkit/Chem/test_list.py @@ -22,6 +22,7 @@ tests = [ ("python", "UnitTestPandasTools.py", {}), ("python", "UnitTestPeriodicTable.py", {}), ("python", "UnitTestDocTestsChem.py", {}), + ("python", "UnitTestFeatFinderCLI.py", {}), ("python", "test_list.py", {'dir': 'AtomPairs'}), ("python", "test_list.py", {'dir': 'ChemUtils'}), ("python", "test_list.py", {'dir': 'EState'}),