mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-03 21:44:30 +08:00
Remove a bunch of Python2-related warts (#2315)
* remove all of the "from __future__" imports * remove the first batch of rdkit.six imports/uses * next step of rdkit.six removal * removing xrange, range, and some maps * next round of removals * next round of cleanups * fix inchi test * last bits of "from rdkit.six" are gone * and the last of the six stuff is gone * strange importlib problem
This commit is contained in:
committed by
Brian Kelley
parent
84c1ea5e7a
commit
24f1737839
@@ -1,4 +1,4 @@
|
||||
from __future__ import print_function
|
||||
|
||||
import numpy
|
||||
import time
|
||||
import unittest
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from __future__ import print_function
|
||||
|
||||
import random, operator, itertools, math
|
||||
|
||||
"""
|
||||
|
||||
@@ -29,10 +29,10 @@
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
# Created by Greg Landrum and Anna Vulpetti, March 2009
|
||||
from __future__ import print_function
|
||||
|
||||
from rdkit import Chem
|
||||
from rdkit.Chem import BRICS
|
||||
import sys, cPickle, re
|
||||
import sys, pickle, re
|
||||
|
||||
inF = file(sys.argv[1], 'r')
|
||||
inLs = inF.readlines()
|
||||
|
||||
@@ -29,10 +29,10 @@
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
# Created by Greg Landrum and Anna Vulpetti, March 2009
|
||||
from __future__ import print_function
|
||||
|
||||
from rdkit.ML.Cluster import Butina
|
||||
from rdkit import DataStructs
|
||||
import sys, cPickle
|
||||
import sys, pickle
|
||||
|
||||
# sims is the list of similarity thresholds used to generate clusters
|
||||
sims = [.9, .8, .7, .6]
|
||||
@@ -42,8 +42,8 @@ uFps = []
|
||||
|
||||
for fileN in sys.argv[1:]:
|
||||
inF = file(sys.argv[1], 'r')
|
||||
cols = cPickle.load(inF)
|
||||
fps = cPickle.load(inF)
|
||||
cols = pickle.load(inF)
|
||||
fps = pickle.load(inF)
|
||||
|
||||
for row in fps:
|
||||
nm, smi, fp = row[:3]
|
||||
|
||||
@@ -29,12 +29,12 @@
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
# Created by Greg Landrum and Anna Vulpetti, March 2009
|
||||
from __future__ import print_function
|
||||
|
||||
|
||||
from rdkit import Chem
|
||||
from rdkit.Chem import AllChem
|
||||
from rdkit.Chem.AtomPairs import Pairs, Torsions
|
||||
import sys, cPickle
|
||||
import sys, pickle
|
||||
|
||||
# maxPathLength is the maximum path length in atoms
|
||||
# maxPathLength=6 corresponds to F-FP-5
|
||||
@@ -92,8 +92,8 @@ if __name__ == '__main__':
|
||||
nm = mol.GetProp(nameField)
|
||||
fps.append([nm, smi, fp] + queryMatches)
|
||||
colNames = ['name', 'smiles', 'fp'] + [x for x, y in extraQueries]
|
||||
cPickle.dump(colNames, outF)
|
||||
cPickle.dump(fps, outF)
|
||||
pickle.dump(colNames, outF)
|
||||
pickle.dump(fps, outF)
|
||||
|
||||
print('name1 smiles1 name2 smiles2 name12 smiles12 environment_id ' + ' '.join(
|
||||
[x for x, y in extraQueries]))
|
||||
|
||||
@@ -17,7 +17,7 @@
|
||||
# - sd files containing all molecules belonging to one frame (1.sdf, 2.sdf etc)
|
||||
# - frames.smi containing the (caninical) smiles and count of occurrence
|
||||
#
|
||||
from __future__ import print_function
|
||||
|
||||
|
||||
import os, sys
|
||||
from Chem import AllChem as Chem
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
# peter ertl, august 2015
|
||||
#
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
from rdkit import Chem
|
||||
from rdkit.Chem import rdMolDescriptors
|
||||
import sys, math, gzip, pickle
|
||||
|
||||
@@ -32,7 +32,7 @@
|
||||
|
||||
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
from rdkit import Chem
|
||||
from rdkit.Chem import AllChem
|
||||
from rdkit.Chem import rdqueries
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from __future__ import print_function
|
||||
|
||||
from rdkit import RDConfig
|
||||
from rdkit import Chem
|
||||
import unittest, os.path
|
||||
|
||||
@@ -15,12 +15,11 @@
|
||||
#
|
||||
# peter ertl & greg landrum, september 2013
|
||||
#
|
||||
from __future__ import print_function
|
||||
|
||||
|
||||
from rdkit import Chem
|
||||
from rdkit.Chem import rdMolDescriptors
|
||||
from rdkit.six.moves import cPickle
|
||||
from rdkit.six import iteritems
|
||||
import pickle
|
||||
|
||||
import math
|
||||
from collections import defaultdict
|
||||
@@ -31,132 +30,133 @@ _fscores = None
|
||||
|
||||
|
||||
def readFragmentScores(name='fpscores'):
|
||||
import gzip
|
||||
global _fscores
|
||||
# generate the full path filename:
|
||||
if name == "fpscores":
|
||||
name = op.join(op.dirname(__file__), name)
|
||||
_fscores = cPickle.load(gzip.open('%s.pkl.gz' % name))
|
||||
outDict = {}
|
||||
for i in _fscores:
|
||||
for j in range(1, len(i)):
|
||||
outDict[i[j]] = float(i[0])
|
||||
_fscores = outDict
|
||||
import gzip
|
||||
global _fscores
|
||||
# generate the full path filename:
|
||||
if name == "fpscores":
|
||||
name = op.join(op.dirname(__file__), name)
|
||||
_fscores = pickle.load(gzip.open('%s.pkl.gz' % name))
|
||||
outDict = {}
|
||||
for i in _fscores:
|
||||
for j in range(1, len(i)):
|
||||
outDict[i[j]] = float(i[0])
|
||||
_fscores = outDict
|
||||
|
||||
|
||||
def numBridgeheadsAndSpiro(mol, ri=None):
|
||||
nSpiro = rdMolDescriptors.CalcNumSpiroAtoms(mol)
|
||||
nBridgehead = rdMolDescriptors.CalcNumBridgeheadAtoms(mol)
|
||||
return nBridgehead, nSpiro
|
||||
nSpiro = rdMolDescriptors.CalcNumSpiroAtoms(mol)
|
||||
nBridgehead = rdMolDescriptors.CalcNumBridgeheadAtoms(mol)
|
||||
return nBridgehead, nSpiro
|
||||
|
||||
|
||||
def calculateScore(m):
|
||||
if _fscores is None:
|
||||
readFragmentScores()
|
||||
if _fscores is None:
|
||||
readFragmentScores()
|
||||
|
||||
# fragment score
|
||||
fp = rdMolDescriptors.GetMorganFingerprint(m,
|
||||
2) #<- 2 is the *radius* of the circular fingerprint
|
||||
fps = fp.GetNonzeroElements()
|
||||
score1 = 0.
|
||||
nf = 0
|
||||
for bitId, v in iteritems(fps):
|
||||
nf += v
|
||||
sfp = bitId
|
||||
score1 += _fscores.get(sfp, -4) * v
|
||||
score1 /= nf
|
||||
# fragment score
|
||||
fp = rdMolDescriptors.GetMorganFingerprint(m,
|
||||
2) # <- 2 is the *radius* of the circular fingerprint
|
||||
fps = fp.GetNonzeroElements()
|
||||
score1 = 0.
|
||||
nf = 0
|
||||
for bitId, v in fps.items():
|
||||
nf += v
|
||||
sfp = bitId
|
||||
score1 += _fscores.get(sfp, -4) * v
|
||||
score1 /= nf
|
||||
|
||||
# features score
|
||||
nAtoms = m.GetNumAtoms()
|
||||
nChiralCenters = len(Chem.FindMolChiralCenters(m, includeUnassigned=True))
|
||||
ri = m.GetRingInfo()
|
||||
nBridgeheads, nSpiro = numBridgeheadsAndSpiro(m, ri)
|
||||
nMacrocycles = 0
|
||||
for x in ri.AtomRings():
|
||||
if len(x) > 8:
|
||||
nMacrocycles += 1
|
||||
# features score
|
||||
nAtoms = m.GetNumAtoms()
|
||||
nChiralCenters = len(Chem.FindMolChiralCenters(m, includeUnassigned=True))
|
||||
ri = m.GetRingInfo()
|
||||
nBridgeheads, nSpiro = numBridgeheadsAndSpiro(m, ri)
|
||||
nMacrocycles = 0
|
||||
for x in ri.AtomRings():
|
||||
if len(x) > 8:
|
||||
nMacrocycles += 1
|
||||
|
||||
sizePenalty = nAtoms**1.005 - nAtoms
|
||||
stereoPenalty = math.log10(nChiralCenters + 1)
|
||||
spiroPenalty = math.log10(nSpiro + 1)
|
||||
bridgePenalty = math.log10(nBridgeheads + 1)
|
||||
macrocyclePenalty = 0.
|
||||
# ---------------------------------------
|
||||
# This differs from the paper, which defines:
|
||||
# macrocyclePenalty = math.log10(nMacrocycles+1)
|
||||
# This form generates better results when 2 or more macrocycles are present
|
||||
if nMacrocycles > 0:
|
||||
macrocyclePenalty = math.log10(2)
|
||||
sizePenalty = nAtoms**1.005 - nAtoms
|
||||
stereoPenalty = math.log10(nChiralCenters + 1)
|
||||
spiroPenalty = math.log10(nSpiro + 1)
|
||||
bridgePenalty = math.log10(nBridgeheads + 1)
|
||||
macrocyclePenalty = 0.
|
||||
# ---------------------------------------
|
||||
# This differs from the paper, which defines:
|
||||
# macrocyclePenalty = math.log10(nMacrocycles+1)
|
||||
# This form generates better results when 2 or more macrocycles are present
|
||||
if nMacrocycles > 0:
|
||||
macrocyclePenalty = math.log10(2)
|
||||
|
||||
score2 = 0. - sizePenalty - stereoPenalty - spiroPenalty - bridgePenalty - macrocyclePenalty
|
||||
score2 = 0. - sizePenalty - stereoPenalty - spiroPenalty - bridgePenalty - macrocyclePenalty
|
||||
|
||||
# correction for the fingerprint density
|
||||
# not in the original publication, added in version 1.1
|
||||
# to make highly symmetrical molecules easier to synthetise
|
||||
score3 = 0.
|
||||
if nAtoms > len(fps):
|
||||
score3 = math.log(float(nAtoms) / len(fps)) * .5
|
||||
# correction for the fingerprint density
|
||||
# not in the original publication, added in version 1.1
|
||||
# to make highly symmetrical molecules easier to synthetise
|
||||
score3 = 0.
|
||||
if nAtoms > len(fps):
|
||||
score3 = math.log(float(nAtoms) / len(fps)) * .5
|
||||
|
||||
sascore = score1 + score2 + score3
|
||||
sascore = score1 + score2 + score3
|
||||
|
||||
# need to transform "raw" value into scale between 1 and 10
|
||||
min = -4.0
|
||||
max = 2.5
|
||||
sascore = 11. - (sascore - min + 1) / (max - min) * 9.
|
||||
# smooth the 10-end
|
||||
if sascore > 8.:
|
||||
sascore = 8. + math.log(sascore + 1. - 9.)
|
||||
if sascore > 10.:
|
||||
sascore = 10.0
|
||||
elif sascore < 1.:
|
||||
sascore = 1.0
|
||||
# need to transform "raw" value into scale between 1 and 10
|
||||
min = -4.0
|
||||
max = 2.5
|
||||
sascore = 11. - (sascore - min + 1) / (max - min) * 9.
|
||||
# smooth the 10-end
|
||||
if sascore > 8.:
|
||||
sascore = 8. + math.log(sascore + 1. - 9.)
|
||||
if sascore > 10.:
|
||||
sascore = 10.0
|
||||
elif sascore < 1.:
|
||||
sascore = 1.0
|
||||
|
||||
return sascore
|
||||
return sascore
|
||||
|
||||
|
||||
def processMols(mols):
|
||||
print('smiles\tName\tsa_score')
|
||||
for i, m in enumerate(mols):
|
||||
if m is None:
|
||||
continue
|
||||
print('smiles\tName\tsa_score')
|
||||
for i, m in enumerate(mols):
|
||||
if m is None:
|
||||
continue
|
||||
|
||||
s = calculateScore(m)
|
||||
s = calculateScore(m)
|
||||
|
||||
smiles = Chem.MolToSmiles(m)
|
||||
print(smiles + "\t" + m.GetProp('_Name') + "\t%3f" % s)
|
||||
smiles = Chem.MolToSmiles(m)
|
||||
print(smiles + "\t" + m.GetProp('_Name') + "\t%3f" % s)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
import sys, time
|
||||
import sys
|
||||
import time
|
||||
|
||||
t1 = time.time()
|
||||
readFragmentScores("fpscores")
|
||||
t2 = time.time()
|
||||
t1 = time.time()
|
||||
readFragmentScores("fpscores")
|
||||
t2 = time.time()
|
||||
|
||||
suppl = Chem.SmilesMolSupplier(sys.argv[1])
|
||||
t3 = time.time()
|
||||
processMols(suppl)
|
||||
t4 = time.time()
|
||||
suppl = Chem.SmilesMolSupplier(sys.argv[1])
|
||||
t3 = time.time()
|
||||
processMols(suppl)
|
||||
t4 = time.time()
|
||||
|
||||
print('Reading took %.2f seconds. Calculating took %.2f seconds' % ((t2 - t1), (t4 - t3)),
|
||||
file=sys.stderr)
|
||||
print('Reading took %.2f seconds. Calculating took %.2f seconds' % ((t2 - t1), (t4 - t3)),
|
||||
file=sys.stderr)
|
||||
|
||||
#
|
||||
# Copyright (c) 2013, Novartis Institutes for BioMedical Research Inc.
|
||||
# All rights reserved.
|
||||
#
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are
|
||||
# met:
|
||||
# met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright
|
||||
# * Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above
|
||||
# copyright notice, this list of conditions and the following
|
||||
# disclaimer in the documentation and/or other materials provided
|
||||
# copyright notice, this list of conditions and the following
|
||||
# disclaimer in the documentation and/or other materials provided
|
||||
# with the distribution.
|
||||
# * Neither the name of Novartis Institutes for BioMedical Research Inc.
|
||||
# nor the names of its contributors may be used to endorse or promote
|
||||
# * Neither the name of Novartis Institutes for BioMedical Research Inc.
|
||||
# nor the names of its contributors may be used to endorse or promote
|
||||
# products derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
|
||||
@@ -29,7 +29,7 @@
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
# Created by Jameed Hussain, May 2013
|
||||
from __future__ import print_function
|
||||
|
||||
import sys
|
||||
from optparse import OptionParser
|
||||
from rdkit import Chem
|
||||
|
||||
@@ -31,7 +31,7 @@
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
# Created by Jameed Hussain, July 2013
|
||||
from __future__ import print_function
|
||||
|
||||
|
||||
import sys
|
||||
import re
|
||||
|
||||
@@ -29,7 +29,7 @@
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
# Created by Jameed Hussain, May 2013
|
||||
from __future__ import print_function
|
||||
|
||||
from rdkit import Chem
|
||||
from rdkit.Chem.Fraggle import FraggleSim
|
||||
|
||||
|
||||
@@ -29,7 +29,7 @@
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
# Created by Jameed Hussain, October 2013
|
||||
from __future__ import print_function
|
||||
|
||||
import sys
|
||||
import re
|
||||
from optparse import OptionParser
|
||||
|
||||
@@ -29,7 +29,7 @@
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
# Created by Jameed Hussain, September 2012
|
||||
from __future__ import print_function
|
||||
|
||||
import sys
|
||||
import re
|
||||
from rdkit import Chem
|
||||
|
||||
@@ -29,7 +29,7 @@
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
# Created by Jameed Hussain, September 2012
|
||||
from __future__ import print_function
|
||||
|
||||
import sys
|
||||
import re
|
||||
from rdkit import Chem
|
||||
|
||||
@@ -29,7 +29,7 @@
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
# Created by Jameed Hussain, July 2013
|
||||
from __future__ import print_function
|
||||
|
||||
import sys
|
||||
import re
|
||||
from optparse import OptionParser
|
||||
|
||||
@@ -32,7 +32,7 @@
|
||||
#
|
||||
# Modifications and optimizations by Greg Landrum, July 2015
|
||||
#
|
||||
from __future__ import print_function
|
||||
|
||||
import sys
|
||||
import re
|
||||
from rdkit import Chem
|
||||
|
||||
@@ -29,7 +29,7 @@
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
# Created by Jameed Hussain, July 2013
|
||||
from __future__ import print_function
|
||||
|
||||
import sys
|
||||
import os
|
||||
import re
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# coding=utf-8
|
||||
# Copyright (c) 2014 Merck KGaA
|
||||
from __future__ import print_function
|
||||
|
||||
import os, re, gzip, json, requests, sys, optparse, csv
|
||||
from rdkit import Chem
|
||||
from rdkit.Chem import AllChem
|
||||
@@ -16,7 +16,7 @@ from sklearn.cross_validation import train_test_split
|
||||
from sklearn.metrics import roc_curve, auc
|
||||
from sklearn.metrics import precision_score, recall_score
|
||||
from sklearn import preprocessing
|
||||
import cPickle
|
||||
import pickle
|
||||
from pickle import Unpickler
|
||||
import numpy as np
|
||||
import math
|
||||
@@ -1207,8 +1207,8 @@ table th[class*="col-"] {
|
||||
return
|
||||
|
||||
def save_model(self, outfile, model_number=0):
|
||||
"""save Model to file using cPickle.dump"""
|
||||
cPickle.dump(self.model[model_number], file(outfile, "wb+"))
|
||||
"""save Model to file using pickle.dump"""
|
||||
pickle.dump(self.model[model_number], file(outfile, "wb+"))
|
||||
return
|
||||
|
||||
def load_models(self, model_files):
|
||||
|
||||
Reference in New Issue
Block a user