Add reaction role assignment functionality to the RDKit Contrib

2026-06-03 21:44:30 +08:00 · 2016-09-19 13:09:37 +02:00
parent 01dda4a2bc
commit dd947a65c0
4 changed files with 530 additions and 0 deletions
--- a/Contrib/RxnRoleAssignment/README.txt
+++ b/Contrib/RxnRoleAssignment/README.txt
@@ -0,0 +1,10 @@
+This directory contains the scripts used to assign reaction roles to a given reaction. 
+
+The algorithm used in the scripts is described in the following publication:
+
+Schneider, N.; Stiefl, N.; Landrum G.A. What's what: The (Nearly) Definitive Guide 
+to Reaction Role Assignment. Journal of Chemical Information and Modeling, 2016, submitted.
+
+The approach requires RDKit (www.rdkit.org) to be installed and properly configured.
+
+ 
--- a/Contrib/RxnRoleAssignment/init.py
+++ b/Contrib/RxnRoleAssignment/init.py
--- a/Contrib/RxnRoleAssignment/identifyReactants.py
+++ b/Contrib/RxnRoleAssignment/identifyReactants.py
@@ -0,0 +1,433 @@
+#
+#  Copyright (c) 2016, Novartis Institutes for BioMedical Research Inc.
+#  All rights reserved.
+# 
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: 
+#
+#     * Redistributions of source code must retain the above copyright 
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+#       copyright notice, this list of conditions and the following 
+#       disclaimer in the documentation and/or other materials provided 
+#       with the distribution.
+#     * Neither the name of Novartis Institutes for BioMedical Research Inc. 
+#       nor the names of its contributors may be used to endorse or promote 
+#       products derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# Created by Nadine Schneider, July 2016
+
+
+
+from __future__ import print_function
+from rdkit import Chem
+from rdkit.Chem import AllChem
+from rdkit.Chem import rdqueries
+
+from collections import defaultdict, Counter
+import itertools
+import numpy as np
+
+from . import utils
+
+
+class MoleculeDetails(object):
+    
+    __slots__ = ['detailFP','scaffoldFP','bitInfoDetailFP','bitInfoScaffoldFP','reactivity','bitReactivity','molecule']
+    
+    def _atomDetailInvariant(self, mol):
+        mol.UpdatePropertyCache(False)
+        num_atoms = mol.GetNumAtoms()
+        Chem.GetSSSR(mol)
+        rinfo = mol.GetRingInfo()
+        invariants = [0]*num_atoms
+        for i,a in enumerate(mol.GetAtoms()):
+            descriptors=[]
+            descriptors.append(a.GetAtomicNum())
+            descriptors.append(a.GetTotalDegree())
+            descriptors.append(a.GetTotalNumHs())
+            descriptors.append(rinfo.IsAtomInRingOfSize(a.GetIdx(),6))
+            descriptors.append(rinfo.IsAtomInRingOfSize(a.GetIdx(),5))
+            descriptors.append(a.IsInRing())
+            descriptors.append(a.GetIsAromatic())
+            invariants[i]=hash(tuple(descriptors))& 0xffffffff
+        return invariants
+
+    def _atomScaffoldInvariant(self, mol):
+        num_atoms = mol.GetNumAtoms()
+        invariants = [0]*num_atoms
+        for i,a in enumerate(mol.GetAtoms()):
+            descriptors=[]
+            descriptors.append(a.GetAtomicNum())
+            invariants[i]=hash(tuple(descriptors))& 0xffffffff
+        return invariants    
+        
+    def _createFP(self, mol, invariant, bitinfo, useBondTypes=True, radius=1):
+        return AllChem.GetMorganFingerprint(mol=mol, radius=radius, invariants=invariant, useBondTypes=useBondTypes, bitInfo=bitinfo) 
+    
+    def _isHeteroAtom(self, a):
+        return a.GetAtomicNum() not in (6, 1)
+
+    def _isSp3OrAromaticCarbon(self, a):
+        if a.GetAtomicNum() != 6:
+            return False
+        if a.GetIsAromatic():
+            return True
+        for b in a.GetBonds():
+            if b.GetBondTypeAsDouble() > 1.5:
+                return False
+        return True
+
+    def _calcReactivityAtom(self, a):
+        # exclude sp3 carbons or uncharged single heavy atoms such as water molecules
+        if self._isSp3OrAromaticCarbon(a) or (len(a.GetNeighbors())==0 and a.GetFormalCharge()==0):
+            return 0
+        # all other atoms have at least a reactivity of one
+        reactivity=1
+        b = a.GetBonds()
+        # if it is a heteroatom or has an H (we already know it's not SP3 or aromatic) increase the reactivity 
+        if self._isHeteroAtom(a) or a.GetTotalNumHs() > 0:
+            reactivity += 1
+        # slightly increase reactivity for atoms in aromatic rings compared to aliphatic rings
+        if a.IsInRing():
+            if a.GetIsAromatic():
+                reactivity += 0.5
+        # but prefer non-ring atoms
+        else:
+            reactivity += 1
+        # increase reactivity of charged atoms
+        if a.GetFormalCharge():
+            reactivity += 2
+        for bo in b:
+            # look at the direct neighbors of the atom
+            ni = bo.GetOtherAtom(a)
+            # for non-single bonds increase the reactivity
+            if bo.GetBondTypeAsDouble() > 1.5:
+                reactivity += 1
+                # if there are hydrogens attached, increase the reactivity
+                if ni.GetTotalNumHs() > 0:
+                    reactivity+=1
+            # if it is a bond to a hetero atom further increase the reactivity
+            if self._isHeteroAtom(ni):
+                reactivity += 1
+                # bonds between nitrogens and oxygen or between oxygen and oxygen or between nitrogen and nitrogen are more reactive
+                if a.GetAtomicNum() in (7,8) and ni.GetAtomicNum() in (7,8):
+                    reactivity += 2
+                # if the neighbor is a Mg, Si, P, Pd, or Sn atom increase the reactivity 
+                elif ni.GetAtomicNum() in (12,14,15,46,50):
+                    reactivity += 1
+        return reactivity 
+
+    def _calcReactivityMolecule(self, mol):
+        reactivityAtoms = [self._calcReactivityAtom(a) for a in mol.GetAtoms()]
+        return reactivityAtoms
+
+    def __init__(self, molecule, verbose=0):
+        self.molecule= molecule
+        self.bitInfoDetailFP={}
+        self.detailFP = self._createFP(molecule, self._atomDetailInvariant(molecule), self.bitInfoDetailFP)
+        self.bitInfoScaffoldFP={}
+        self.scaffoldFP = self._createFP(molecule, self._atomScaffoldInvariant(molecule), self.bitInfoScaffoldFP, useBondTypes=False)
+        reactivityAtoms = self._calcReactivityMolecule(molecule)
+        reactivity = sum(reactivityAtoms)
+        if Chem.MolToSmiles(molecule) in frequentReagents:
+            reactivity*=0.8
+        self.reactivity = reactivity
+        
+        
+def _calcScore(reactantFP,productFP,bitInfoProd=None,output=False):
+    if output:
+        print("--- _calcScore ---")
+    score=0
+    dFP = productFP-reactantFP
+    numRBits = float(utils.getNumPositiveCounts(reactantFP))
+    if output > 2:
+        print("num RBits: ",numRBits)
+    numPBits = float(utils.getNumPositiveCounts(productFP))
+    if output > 2:
+        print("num PBits: ",numPBits)
+    numUnmappedPBits = float(utils.getNumPositiveCounts(dFP))
+    if output > 2:
+        print("num UnmappedPBits: ",numUnmappedPBits)
+    numUnmappedRBits = float(utils.getNumNegativeCounts(dFP))
+    if output > 2:
+        print("num UnmappedRBits: ",numUnmappedRBits)
+
+    numUnmappedPAtoms=-1
+    bitsUnmappedPAtoms=-1
+    if bitInfoProd is not None:
+        numUnmappedPAtoms,bitsUnmappedPAtoms = utils.getNumPositiveBitCountsOfRadius0(dFP,bitInfoProd)
+        if output > 2:
+            print("num UnmappedPAtoms: ", numUnmappedPAtoms)
+    ratioMappedPBits = 1-(numUnmappedPBits/numPBits)
+    ratioUnmappedRBits = numUnmappedRBits/numRBits
+    score = max(ratioMappedPBits - ratioUnmappedRBits*ratioUnmappedRBits,0)
+
+    if output > 1:
+        print("score: ",score, "(",ratioMappedPBits,",",ratioUnmappedRBits*ratioUnmappedRBits,",",ratioUnmappedRBits,")")
+        
+    return [score,numUnmappedPBits,numUnmappedPAtoms,bitsUnmappedPAtoms]
+
+# Set of frequent reagents derived from all patent reactions
+frequentReagents = set(['CCN(CC)CC', '[Li+]', '[Na+]', 'O=C(O)CC(O)(CC(=O)O)C(=O)O', 'O=S(=O)(O)O', 'CN1CCCC1=O', 'CCN(C(C)C)C(C)C',\
+ 'c1ccncc1', '[K]', 'CC(C)(C)O', 'CCO', 'Cc1ccc(S(=O)(=O)O)cc1', 'ClC(Cl)(Cl)Cl', '[Na]', 'CC(C)(C)[O-]', 'O=C([O-])O', 'COCCOC', '[NH4+]',\
+ 'CC(C)OC(C)C', 'O=C([O-])[O-]', 'CC(=O)OC(C)=O', 'O=C=O', '[Cl-]', 'c1ccc(P(c2ccccc2)c2ccccc2)cc1', '[H-]', 'N#N', 'CN1CCOCC1',\
+ 'C1COCCO1', 'c1ccccc1', '[Cs+]', '[K+]', '[OH-]', 'CCCCCC', 'CCCCC', 'CN(C)C=O', 'C[O-]', 'Cc1ccccc1', 'C1CCC2=NCCCN2CC1', 'CO',\
+ 'CCCCO', 'O=C(O)C(F)(F)F', 'O=P([O-])([O-])[O-]', 'CCOC(C)=O', '[Mg+2]', 'C1CCCCC1', 'O', 'N', 'II', 'O=CO', 'CC(=O)N(C)C', 'CC(=O)O',\
+ 'CCOCC', 'CC(C)O', 'C[Si](C)(C)Cl', 'Cc1ccccc1C', 'CC(C)=O', 'CS(=O)(=O)O', 'CN(C)c1ccncc1', 'Cl', 'ClCCCl', 'O=S(Cl)Cl', 'ClC(Cl)Cl',\
+ '[Li]CCCC', '[Pd]', '[H][H]', '[Br-]', 'CS(C)=O', 'COC(C)(C)C', 'O=S(=O)([O-])[O-]', 'CC(Cl)Cl', 'CC(=O)[O-]', 'CCCC[N+](CCCC)(CCCC)CCCC',\
+ 'ClCCl', 'CC#N', 'C1CCOC1', 'CCCCCCC'])
+
+
+def _getBestCombination(rfps,pfps,output=False):
+
+    if output:
+        print("--- _getBestCombination ---")
+
+    tests=[]
+    numReactants=len(rfps)
+    # generate first all reactant combinations
+    for i in range(1,numReactants+1):
+        for x in itertools.combinations(range(numReactants),i):
+            temp=[]
+            for j in x:
+                # don't include frequent reagents
+                if not rfps[j][1]:
+                    numAtms = rfps[j][0].molecule.GetNumAtoms()
+                    # not test single ions
+                    if numAtms > 1:
+                        # store the number of reactant atoms for later
+                        temp.append((rfps[j][0].molecule.GetNumAtoms(),j))
+                else:
+                    if output > 3:
+                        print("Frequent reagent found: ", j)
+            if temp not in tests:
+                tests.append(temp)
+    # initalisation of the results
+    maxScore=0
+    maxDetailScore=0
+    finalReacts=[[]]
+    # get the product fingerprints
+    productsDetailFP = utils.getSumFps([i.detailFP for i in pfps])
+    productsScaffoldFP = utils.getSumFps([i.scaffoldFP for i in pfps])
+    # get the number of atoms for the product
+    numProductAtoms = 0
+    for i in pfps:
+        numProductAtoms += i.molecule.GetNumAtoms() 
+    # get the bitinfo for the product FP
+    productsDetailFPBitInfo={}
+    productsScaffoldFPBitInfo={}
+    for i in pfps:
+        productsDetailFPBitInfo.update(i.bitInfoDetailFP)
+        productsScaffoldFPBitInfo.update(i.bitInfoScaffoldFP)
+    # set some initial values    
+    numUnmappedPAtoms,bitsUnmappedPAtoms = utils.getNumPositiveBitCountsOfRadius0(productsScaffoldFP,productsScaffoldFPBitInfo)
+    finalNumUnmappedProdAtoms=[[len(productsDetailFP.GetNonzeroElements()),\
+                                len(productsScaffoldFP.GetNonzeroElements()),numUnmappedPAtoms,bitsUnmappedPAtoms]]
+
+    for test in tests:
+        if len(test) < 1:
+            continue
+        # get the number of involved reactant atoms
+        numReactantAtoms = np.array(test)[:,0].sum()
+        # ignore combinations including too many or too few atoms
+        if numReactantAtoms > 5*numProductAtoms or numReactantAtoms < numProductAtoms*0.8:
+            continue
+            
+        if output > 0:
+            print("Combination: ",test)
+            
+        #build the combined reactant FPs
+        reactantsDetailFP = utils.getSumFps([rfps[i[1]][0].detailFP for i in test])
+        reactantsScaffoldFP = utils.getSumFps([rfps[i[1]][0].scaffoldFP for i in test])
+
+        # get the scores for both FPs
+        detailFPScore = _calcScore(reactantsDetailFP,productsDetailFP,bitInfoProd=productsDetailFPBitInfo,output=output)
+        scaffoldFPScore = _calcScore(reactantsScaffoldFP,productsScaffoldFP,bitInfoProd=productsScaffoldFPBitInfo,output=output)
+        # final score
+        score = detailFPScore[0] + scaffoldFPScore[0]
+            
+        if output > 0:
+            print(">>>> score: ", score)
+            print(">>>> scores (detail, scaffold): ", detailFPScore[0], scaffoldFPScore[0])
+            print(">>>> num unmapped productFP bits: ", detailFPScore[1], scaffoldFPScore[1], detailFPScore[2], scaffoldFPScore[2])
+
+        if score > maxScore:
+            maxScore=score
+            maxDetailScore=detailFPScore[0]
+            del finalReacts[:]
+            del finalNumUnmappedProdAtoms[:]
+            # set the final reactants
+            finalReacts.append([i[1] for i in test])
+            # for tracking the mapping of the product atoms include the number of unmapped detailedFP bits, the number of unmapped 
+            # atoms based on the scaffold FP,  the number of unmapped scaffoldFP bits, and the unmapped scaffoldFP bits
+            finalNumUnmappedProdAtoms.append([detailFPScore[1], scaffoldFPScore[2], scaffoldFPScore[1],scaffoldFPScore[-1]])
+            if output > 0:
+                print(" >> maxScore: ", maxScore)
+                print(" >> Final reactants: ", finalReacts)
+            # test for almost perfect matchings (e.g. oxidations, reduction etc.)
+            if scaffoldFPScore[0] > 0.9999 and detailFPScore[0] > 0.8:
+                return finalReacts, finalNumUnmappedProdAtoms
+            # test for number of mapped product atoms e.g. to capture deprotections ealier 
+            if len(finalNumUnmappedProdAtoms) > 0 and len(test) == 1:
+                if finalNumUnmappedProdAtoms[0][1] == 0 and finalNumUnmappedProdAtoms[0][0] <= 3:
+                    return finalReacts, finalNumUnmappedProdAtoms
+        # include alternative solutions
+        elif abs(score - maxScore) < 0.0000001 and score > 0.0:
+            finalReacts.append([i[1] for i in test])
+            finalNumUnmappedProdAtoms.append([detailFPScore[1], scaffoldFPScore[2], scaffoldFPScore[1],scaffoldFPScore[-1]])
+            if output > 0:
+                print(" >> Added alternative result")
+                print(" >> Final reactants: ", finalReacts)
+             
+    return finalReacts, finalNumUnmappedProdAtoms
+
+def _findMissingReactiveReactants(rfps, pfps, currentReactants, unmappedPAtoms, output=False):
+    if output:
+        print("--- _findMissingReactiveReactants ---")
+    if not len(unmappedPAtoms):
+        return currentReactants
+    # if there are unmapped product bits find possible reactants for those
+    else:
+        finalReactants = []
+        numReactants=len(rfps)
+        # investigate all possible solutions of the scoring before
+        for reacts,umPA in zip(currentReactants,unmappedPAtoms):
+            # if there are unmapped product atoms find possible reactants for those
+            finalReactants.append(reacts)
+            if umPA[1] > 0:
+                remainingReactants=set(range(numReactants)).difference(set(reacts))
+                # sort the possible reactants by the reactivity
+                remainingReactants = sorted(remainingReactants, key=lambda x: rfps[x].reactivity/float(rfps[x].molecule.GetNumAtoms()),\
+                                            reverse=True)
+                missingPAtoms = []
+                # get the missing atoms and counts
+                for bit,c in umPA[-1]:
+                    for pbi in range(len(pfps)):
+                        if bit in pfps[pbi].bitInfoScaffoldFP:
+                            a = pfps[pbi].bitInfoScaffoldFP[bit][0]
+                            missingPAtoms.extend([pfps[pbi].molecule.GetAtomWithIdx(a[0]).GetAtomicNum()]*c)
+                missingPAtoms = Counter(missingPAtoms)
+                if output >  0:
+                    print(missingPAtoms)
+                # build queries for the missing atoms
+                queries=[(rdqueries.AtomNumEqualsQueryAtom(a),a) for a in missingPAtoms]
+                maxFullfilledQueries=0
+                maxReactivity=-1
+                addReactants=[]
+                # search for the most reactive reactants capturing all/most of the unmapped product atoms
+                for r in remainingReactants:
+                    if output > 0:
+                        print(" >> Reactant", r, rfps[r].reactivity/float(rfps[r].molecule.GetNumAtoms()))
+                    countFullfilledQueries=0
+                    for q,a in queries:
+                        if len(rfps[r].molecule.GetAtomsMatchingQuery(q)) >= missingPAtoms[a]:
+                            countFullfilledQueries+=1
+                    if output > 0:
+                        print(" Max reactivity", maxReactivity)
+                        print(" Max fullfilled queries", maxFullfilledQueries)
+                    if countFullfilledQueries > maxFullfilledQueries:
+                        maxFullfilledQueries = countFullfilledQueries
+                        maxReactivity = rfps[r].reactivity/float(rfps[r].molecule.GetNumAtoms())
+                        addReactants = [r]
+                    elif maxFullfilledQueries and countFullfilledQueries == maxFullfilledQueries and \
+                         rfps[r].reactivity/float(rfps[r].molecule.GetNumAtoms()) >= maxReactivity:
+                        maxFullfilledQueries = countFullfilledQueries
+                        addReactants.append(r)
+                    if output > 0:
+                        print(" Added reactants", addReactants)
+                finalReactants[-1].extend(addReactants)
+    if output > 0:
+        print(" >> Final reactants", finalReactants)
+    return finalReactants
+
+def _detectObviousReagents(reactants, products):
+    unchangedReacts=set()
+    unchangedProds=set()
+    for i,r in enumerate(reactants):
+        for j,p in enumerate(products):
+            if r==p:
+                unchangedReacts.add(i)
+                unchangedProds.add(j)
+    return unchangedReacts,unchangedProds
+
+def identifyReactants(reaction,output=False):
+    rxn = AllChem.ChemicalReaction(reaction)
+    AllChem.RemoveMappingNumbersFromReactions(rxn)
+    if output:
+        print("--- identifyReactants ---")
+    reactants = rxn.GetReactants()
+    products = rxn.GetProducts()
+    ### Preprocessing
+    uniqueReactants,reactantSmiles = utils.uniqueMolecules(reactants)
+    uniqueProducts,productSmiles = utils.uniqueMolecules(products)
+    # find molecules which do not change in the rxn
+    unmodifiedReactants,unmodifiedProducts = _detectObviousReagents(reactantSmiles, productSmiles)
+    if output:
+        print("  >>> Found reagents in reactants:", unmodifiedReactants)
+        print("  >>> Found reagents in products:", unmodifiedProducts)
+    if len(products) == len(unmodifiedProducts):
+        unmodifiedProducts=set()
+    uniquePotentialReactants = [r for r in sorted(set(uniqueReactants.values()))]
+    uniquePotentialProducts = [p for p in sorted(set(uniqueProducts.values())) if p not in unmodifiedProducts]
+    
+    ### Find the most probable reactants
+    # only generate moleculeDetail objects for unique, potential reactants and products
+    rfps = [MoleculeDetails(reactants[r]) for r in uniquePotentialReactants]
+    pfps = [MoleculeDetails(products[p]) for p in uniquePotentialProducts]
+    
+    rfpsPrep = [(MoleculeDetails(reactants[r]),reactantSmiles[r] in frequentReagents) for r in uniquePotentialReactants]
+
+    reacts, unmappedProdAtoms = _getBestCombination(rfpsPrep,pfps,output=output)
+    # no reactants where found try again including the frequent reagents
+    if np.array(reacts).shape == (1,0):
+        rfpsPrep = [(MoleculeDetails(reactants[r]),0) for r in uniquePotentialReactants]
+        reacts, unmappedProdAtoms = _getBestCombination(rfpsPrep,pfps,output=output)
+    
+    ### Postprocessing
+    # identify missing reactants
+    reacts = _findMissingReactiveReactants(rfps, pfps, reacts, unmappedProdAtoms, output=output)
+    finalreacts = []
+    for i in reacts:
+        temp=[uniquePotentialReactants[j] for j in i]
+        finalreacts.append(set(temp))
+            
+    return finalreacts, unmodifiedReactants, unmodifiedProducts
+
+# reassign the reaction roles of a reaction
+def reassignRXNRoles(rxn):
+    utils.transferAgentsToReactants(rxn)
+    reacts, rAgents, pAgents = identifyReactants(rxn)
+    if len(reacts) < 1:
+        return None
+    new_rxn = AllChem.ChemicalReaction()
+    for i in range(rxn.GetNumProductTemplates()):
+        new_rxn.AddProductTemplate(rxn.GetProductTemplate(i))
+    for i in range(rxn.GetNumReactantTemplates()):
+        if i in reacts[0]:
+            new_rxn.AddReactantTemplate(rxn.GetReactantTemplate(i))
+        else:
+            new_rxn.AddAgentTemplate(rxn.GetReactantTemplate(i))
+    return new_rxn
+
+# clean-up the reaction smiles
+def reassignReactionRoles(smi):
+    rxn = AllChem.ReactionFromSmarts(smi,useSmiles=True)
+    new_rxn = reassignRXNRoles(rxn)
+    if new_rxn is None:
+        return ''
+    smi_new =  AllChem.ReactionToSmiles(new_rxn)
+    return smi_new
--- a/Contrib/RxnRoleAssignment/utils.py
+++ b/Contrib/RxnRoleAssignment/utils.py
@@ -0,0 +1,87 @@
+#
+#  Copyright (c) 2016, Novartis Institutes for BioMedical Research Inc.
+#  All rights reserved.
+# 
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: 
+#
+#     * Redistributions of source code must retain the above copyright 
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+#       copyright notice, this list of conditions and the following 
+#       disclaimer in the documentation and/or other materials provided 
+#       with the distribution.
+#     * Neither the name of Novartis Institutes for BioMedical Research Inc. 
+#       nor the names of its contributors may be used to endorse or promote 
+#       products derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# Created by Nadine Schneider, July 2016
+
+
+from rdkit import Chem
+from rdkit.Chem import AllChem
+from collections import defaultdict
+import copy
+
+
+def transferAgentsToReactants(rxn):
+    for a in range(rxn.GetNumAgentTemplates()):
+        agent = rxn.GetAgentTemplate(a)
+        rxn.AddReactantTemplate(agent)
+
+def removeAgentsAndTransferToReactants(rxn):
+    tmp=[]
+    rxn.RemoveAgentTemplates(tmp)
+    for a in tmp:
+        rxn.AddReactantTemplate(a)
+
+def getNumPositiveCounts(fp):
+    count=0
+    for k,v in fp.GetNonzeroElements().items():
+        if v > 0:
+            count+=v
+    return count
+
+def getNumNegativeCounts(fp):
+    count=0
+    for k,v in fp.GetNonzeroElements().items():
+        if v < 0:
+            count+=abs(v)
+    return count
+
+def getNumPositiveBitCountsOfRadius0(fp,bitinfo):
+    count=0
+    bitsUnmappedAtoms=[]
+    for k in bitinfo:
+        if bitinfo[k][0][1] == 0:
+            v = fp[k]
+            if v>0:
+                count += 1
+                bitsUnmappedAtoms.append((k,v))
+    return count,bitsUnmappedAtoms
+
+def getSumFps(fps):
+    summedFP = copy.deepcopy(fps[0])
+    for fp in fps[1:]:
+        summedFP += fp
+    return summedFP
+
+def uniqueMolecules(mols):
+    smiles = [Chem.MolToSmiles(mol) for mol in mols]
+    uniqueMolecules=defaultdict(int)
+    for n,smi in enumerate(smiles):
+        uniqueMolecules[n]=smiles.index(smi)
+    return uniqueMolecules,smiles