diff --git a/Contrib/NP_Score/npscorer.py b/Contrib/NP_Score/npscorer.py index fccab044f..5e0033350 100644 --- a/Contrib/NP_Score/npscorer.py +++ b/Contrib/NP_Score/npscorer.py @@ -18,37 +18,59 @@ from rdkit import Chem from rdkit.Chem import rdMolDescriptors import sys, math, gzip, pickle import os.path +from collections import namedtuple def readNPModel(filename=os.path.join(os.path.dirname(__file__), 'publicnp.model.gz')): - sys.stderr.write("reading NP model ...\n") + """Reads and returns the scoring model, + which has to be passed to the scoring functions.""" + print("reading NP model ...", file=sys.stderr) fscore = pickle.load(gzip.open(filename)) - sys.stderr.write("model in\n") + print("model in", file=sys.stderr) return fscore -def scoreMol(mol, fscore): +def scoreMolWConfidence(mol, fscore): + """Next to the NP Likeness Score, this function outputs a confidence value + between 0..1 that descibes how many fragments of the tested molecule + were found in the model data set (1: all fragments were found). + + Returns namedtuple NPLikeness(nplikeness, confidence)""" + if mol is None: raise ValueError('invalid molecule') fp = rdMolDescriptors.GetMorganFingerprint(mol, 2) bits = fp.GetNonzeroElements() # calculating the score - score = 0. + score = 0.0 + bits_found = 0 for bit in bits: - score += fscore.get(bit, 0) + if bit in fscore: + bits_found += 1 + score += fscore[bit] + score /= float(mol.GetNumAtoms()) + confidence = float(bits_found / len(bits)) # preventing score explosion for exotic molecules if score > 4: score = 4. + math.log10(score - 4. + 1.) - if score < -4: + elif score < -4: score = -4. - math.log10(-4. - score + 1.) - return score + NPLikeness = namedtuple("NPLikeness", "nplikeness,confidence") + return NPLikeness(score, confidence) + + +def scoreMol(mol, fscore): + """Calculates the Natural Product Likeness of a molecule. + + Returns the score as float in the range -5..5.""" + return scoreMolWConfidence(mol, fscore).nplikeness def processMols(fscore, suppl): - sys.stderr.write("calculating ...\n") + print("calculating ...", file=sys.stderr) count = {} n = 0 for i, m in enumerate(suppl): @@ -62,7 +84,7 @@ def processMols(fscore, suppl): name = m.GetProp('_Name') print(smiles + "\t" + name + "\t" + score) - sys.stderr.write("finished, " + str(n) + " molecules processed\n") + print("finished, " + str(n) + " molecules processed", file=sys.stderr) if __name__ == '__main__':