mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-03 21:44:30 +08:00
Refactor FeatFinderCLI and add unittests (#1299)
* 1194: Review assignments of range in Python code Task-Url: https://github.com/rdkit/rdkit/issues/1194 Either wrapped the range expression into a list or made sure that the code is working with a range object. * Refactored FeatFinderCLI script * Add unit test file for CLI apps in rdkit.Chem Add tests for FeatFinderCLI * Renamed the unit test file * Slight improvement to test coverage * Address comments from review * Add script to Scripts folder #1305
This commit is contained in:
9
Scripts/FeatFinderCLI.py
Normal file
9
Scripts/FeatFinderCLI.py
Normal file
@@ -0,0 +1,9 @@
|
||||
#!python
|
||||
'''
|
||||
FeatFinderCLI reads molecules as SMILES from the first column of a tab, comma or space
|
||||
separated file and annotates the atoms of the molecules with their pharmacophore property.
|
||||
|
||||
Use 'FeatFinderCLI.py --help' for further information
|
||||
'''
|
||||
from rdkit.Chem import FeatFinderCLI
|
||||
FeatFinderCLI.main()
|
||||
7
Scripts/README.md
Normal file
7
Scripts/README.md
Normal file
@@ -0,0 +1,7 @@
|
||||
# RDKit scripts
|
||||
This folder contains a number of scripts that make use of RDKit functionality.
|
||||
|
||||
## Pharmacophores
|
||||
### FeatFinderCLI
|
||||
`FeatFinderCLI` reads molecules as SMILES from the first column of a tab, comma or space
|
||||
separated file and annotates the atoms of the molecules with their pharmacophore property.
|
||||
@@ -1,4 +1,3 @@
|
||||
# $Id$
|
||||
#
|
||||
# Copyright (C) 2005-2006 Rational Discovery LLC
|
||||
#
|
||||
@@ -10,25 +9,15 @@
|
||||
#
|
||||
from __future__ import print_function
|
||||
|
||||
_version = "$Rev$"
|
||||
_splashMessage = """
|
||||
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
|
||||
FeatFinderCLI version %s
|
||||
|
||||
Copyright (C) 2005 Rational Discovery LLC
|
||||
|
||||
This software is copyrighted. The software may not be copied,
|
||||
reproduced, translated or reduced to any electronic medium or
|
||||
machine-readable form without the prior written consent of
|
||||
Rational Discovery LLC.
|
||||
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
|
||||
""" % _version
|
||||
from rdkit import Chem
|
||||
from rdkit.Chem import ChemicalFeatures
|
||||
from rdkit import RDLogger
|
||||
logger = RDLogger.logger()
|
||||
import sys, os
|
||||
import argparse
|
||||
import re
|
||||
import os
|
||||
|
||||
from rdkit import Chem
|
||||
from rdkit import RDLogger
|
||||
from rdkit.Chem import ChemicalFeatures
|
||||
|
||||
logger = RDLogger.logger()
|
||||
splitExpr = re.compile(r'[ \t,]')
|
||||
|
||||
|
||||
@@ -37,68 +26,67 @@ def GetAtomFeatInfo(factory, mol):
|
||||
feats = factory.GetFeaturesForMol(mol)
|
||||
for feat in feats:
|
||||
ids = feat.GetAtomIds()
|
||||
for id in ids:
|
||||
if res[id] is None:
|
||||
res[id] = []
|
||||
res[id].append("%s-%s" % (feat.GetFamily(), feat.GetType()))
|
||||
feature = "%s-%s" % (feat.GetFamily(), feat.GetType())
|
||||
for id_ in ids:
|
||||
if res[id_] is None:
|
||||
res[id_] = []
|
||||
res[id_].append(feature)
|
||||
return res
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
def initParser():
|
||||
""" Initialize the parser """
|
||||
parser = argparse.ArgumentParser(description='Determine pharmacophore features of molecules',
|
||||
epilog=_splashMessage,
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter)
|
||||
parser.add_argument('-r', dest='reverseIt', default=False, action='store_true',
|
||||
help='Set to get atoms lists for each feature.')
|
||||
parser.add_argument('-n', dest='maxLines', default=-1, help=argparse.SUPPRESS, type=int)
|
||||
parser.add_argument('fdefFilename', type=existingFile,
|
||||
help='Pharmacophore feature definition file')
|
||||
parser.add_argument('smilesFilename', type=existingFile,
|
||||
help='The smiles file should have SMILES in the first column')
|
||||
return parser
|
||||
|
||||
def Usage():
|
||||
message = """
|
||||
Usage: FeatFinderCLI [-r] <fdefFilename> <smilesFilename>
|
||||
|
||||
NOTE:
|
||||
- the smiles file should have SMILES in the first column
|
||||
|
||||
"""
|
||||
print(message, file=sys.stderr)
|
||||
_splashMessage = """
|
||||
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
|
||||
FeatFinderCLI
|
||||
Part of the RDKit (http://www.rdkit.org)
|
||||
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
|
||||
"""
|
||||
|
||||
import getopt
|
||||
args, extras = getopt.getopt(sys.argv[1:], 'r')
|
||||
reverseIt = False
|
||||
for arg, val in args:
|
||||
if arg == '-r':
|
||||
reverseIt = True
|
||||
|
||||
if len(extras) < 2:
|
||||
Usage()
|
||||
sys.exit(-1)
|
||||
print(_splashMessage, file=sys.stderr)
|
||||
fdefFilename = extras[0]
|
||||
if not os.path.exists(fdefFilename):
|
||||
logger.error("Fdef file %s does not exist." % fdefFilename)
|
||||
sys.exit(-1)
|
||||
def existingFile(filename):
|
||||
""" 'type' for argparse - check that filename exists """
|
||||
if not os.path.exists(filename):
|
||||
raise argparse.ArgumentTypeError("{0} does not exist".format(filename))
|
||||
return filename
|
||||
|
||||
|
||||
def processArgs(args, parser):
|
||||
try:
|
||||
factory = ChemicalFeatures.BuildFeatureFactory(fdefFilename)
|
||||
factory = ChemicalFeatures.BuildFeatureFactory(args.fdefFilename)
|
||||
except Exception:
|
||||
logger.error("Could not parse Fdef file %s." % fdefFilename, exc_info=True)
|
||||
sys.exit(-1)
|
||||
parser.error("Could not parse Fdef file {0.fdefFilename}.".format(args))
|
||||
|
||||
smilesFilename = extras[1]
|
||||
if not os.path.exists(smilesFilename):
|
||||
logger.error("Smiles file %s does not exist." % smilesFilename)
|
||||
sys.exit(-1)
|
||||
with open(args.smilesFilename) as inF:
|
||||
for lineNo, line in enumerate(inF, 1):
|
||||
if lineNo == args.maxLines + 1:
|
||||
break
|
||||
smi = splitExpr.split(line.strip())[0].strip()
|
||||
mol = Chem.MolFromSmiles(smi)
|
||||
if mol is None:
|
||||
logger.warning("Could not process smiles '%s' on line %d." % (smi, lineNo))
|
||||
continue
|
||||
|
||||
try:
|
||||
inF = file(smilesFilename, 'r')
|
||||
except Exception:
|
||||
logger.error("Could not open smiles file %s." % smilesFilename, exc_info=True)
|
||||
sys.exit(-1)
|
||||
|
||||
lineNo = 0
|
||||
for line in inF.readlines():
|
||||
lineNo += 1
|
||||
line = line.strip()
|
||||
smi = splitExpr.split(line)[0].strip()
|
||||
mol = Chem.MolFromSmiles(smi)
|
||||
|
||||
if mol is not None:
|
||||
print('Mol-%d\t%s' % (lineNo, smi))
|
||||
|
||||
if not reverseIt:
|
||||
if args.reverseIt:
|
||||
feats = factory.GetFeaturesForMol(mol)
|
||||
for feat in feats:
|
||||
print('\t%s-%s: ' % (feat.GetFamily(), feat.GetType()), end='')
|
||||
print(', '.join([str(x) for x in feat.GetAtomIds()]))
|
||||
else:
|
||||
featInfo = GetAtomFeatInfo(factory, mol)
|
||||
for i, v in enumerate(featInfo):
|
||||
print('\t% 2s(%d)' % (mol.GetAtomWithIdx(i).GetSymbol(), i + 1), end='')
|
||||
@@ -106,10 +94,14 @@ if __name__ == '__main__':
|
||||
print('\t', ', '.join(v))
|
||||
else:
|
||||
print()
|
||||
else:
|
||||
feats = factory.GetFeaturesForMol(mol)
|
||||
for feat in feats:
|
||||
print('\t%s-%s: ' % (feat.GetFamily(), feat.GetType()), end='')
|
||||
print(', '.join([str(x) for x in feat.GetAtomIds()]))
|
||||
else:
|
||||
logger.warning("Could not process smiles '%s' on line %d." % (smi, lineNo))
|
||||
|
||||
|
||||
def main():
|
||||
""" Main application """
|
||||
parser = initParser()
|
||||
args = parser.parse_args()
|
||||
processArgs(args, parser)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
||||
73
rdkit/Chem/UnitTestFeatFinderCLI.py
Normal file
73
rdkit/Chem/UnitTestFeatFinderCLI.py
Normal file
@@ -0,0 +1,73 @@
|
||||
from contextlib import contextmanager
|
||||
import os
|
||||
import sys
|
||||
import unittest
|
||||
|
||||
from rdkit import RDConfig
|
||||
from rdkit.Chem import FeatFinderCLI
|
||||
from rdkit.six.moves import cStringIO as StringIO
|
||||
|
||||
|
||||
class TestCase(unittest.TestCase):
|
||||
|
||||
def test_FeatFinderCLI(self):
|
||||
smilesFile = os.path.join(RDConfig.RDDataDir, 'NCI', 'first_5K.smi')
|
||||
featureFile = os.path.join(RDConfig.RDCodeDir, 'Chem', 'Pharm2D', 'test_data',
|
||||
'BaseFeatures.fdef')
|
||||
parser = FeatFinderCLI.initParser()
|
||||
cmd = '-n 10 {0} {1}'.format(featureFile, smilesFile)
|
||||
with outputRedirect() as (out, err):
|
||||
args = parser.parse_args(cmd.split())
|
||||
FeatFinderCLI.processArgs(args, parser)
|
||||
out = out.getvalue()
|
||||
err = err.getvalue()
|
||||
self.assertIn('Mol-1', out)
|
||||
self.assertIn('Acceptor-SingleAtomAcceptor', out)
|
||||
self.assertIn('C(1)', out)
|
||||
self.assertNotIn('Mol-11', out)
|
||||
self.assertEqual(err, '')
|
||||
|
||||
cmd = '-n 2 -r {0} {1}'.format(featureFile, smilesFile)
|
||||
with outputRedirect() as (out, err):
|
||||
args = parser.parse_args(cmd.split())
|
||||
FeatFinderCLI.processArgs(args, parser)
|
||||
out = out.getvalue()
|
||||
err = err.getvalue()
|
||||
self.assertIn('Mol-1', out)
|
||||
self.assertIn('Acceptor-SingleAtomAcceptor:', out)
|
||||
self.assertIn('2, 3, 4', out)
|
||||
self.assertNotIn('Mol-3', out)
|
||||
self.assertEqual(err, '')
|
||||
|
||||
def test_FeatFinderCLIexceptions(self):
|
||||
smilesFile = os.path.join(RDConfig.RDDataDir, 'NCI', 'first_5K.smi')
|
||||
featureFile = os.path.join(RDConfig.RDCodeDir, 'Chem', 'Pharm2D', 'test_data',
|
||||
'BaseFeatures.fdef')
|
||||
parser = FeatFinderCLI.initParser()
|
||||
cmd = '-n 10 {0} {1}'.format(smilesFile, smilesFile)
|
||||
with self.assertRaises(SystemExit), outputRedirect() as (_, err):
|
||||
args = parser.parse_args(cmd.split())
|
||||
FeatFinderCLI.processArgs(args, parser)
|
||||
self.assertIn('error', err.getvalue())
|
||||
|
||||
cmd = '-n 10 {0} {1}'.format(featureFile, 'incorrectFilename')
|
||||
with self.assertRaises(SystemExit), outputRedirect() as (_, err):
|
||||
args = parser.parse_args(cmd.split())
|
||||
FeatFinderCLI.processArgs(args, parser)
|
||||
self.assertIn('error', err.getvalue())
|
||||
|
||||
|
||||
@contextmanager
|
||||
def outputRedirect():
|
||||
""" Redirect standard output and error to String IO and return """
|
||||
try:
|
||||
_stdout, _stderr = sys.stdout, sys.stderr
|
||||
sys.stdout = sStdout = StringIO()
|
||||
sys.stderr = sStderr = StringIO()
|
||||
yield (sStdout, sStderr)
|
||||
finally:
|
||||
sys.stdout, sys.stderr = _stdout, _stderr
|
||||
|
||||
|
||||
if __name__ == '__main__': # pragma: nocover
|
||||
unittest.main()
|
||||
@@ -22,6 +22,7 @@ tests = [
|
||||
("python", "UnitTestPandasTools.py", {}),
|
||||
("python", "UnitTestPeriodicTable.py", {}),
|
||||
("python", "UnitTestDocTestsChem.py", {}),
|
||||
("python", "UnitTestFeatFinderCLI.py", {}),
|
||||
("python", "test_list.py", {'dir': 'AtomPairs'}),
|
||||
("python", "test_list.py", {'dir': 'ChemUtils'}),
|
||||
("python", "test_list.py", {'dir': 'EState'}),
|
||||
|
||||
Reference in New Issue
Block a user