Remove a bunch of Python2-related warts (#2315)

* remove all of the "from __future__" imports

* remove the first batch of rdkit.six imports/uses

* next step of rdkit.six removal

* removing xrange, range, and some maps

* next round of removals

* next round of cleanups

* fix inchi test

* last bits of "from rdkit.six" are gone

* and the last of the six stuff is gone

* strange importlib problem
This commit is contained in:
Greg Landrum
2019-03-07 02:43:49 +01:00
committed by Brian Kelley
parent 84c1ea5e7a
commit 24f1737839
296 changed files with 15501 additions and 15517 deletions

View File

@@ -1,4 +1,4 @@
from __future__ import print_function
import numpy
import time
import unittest

View File

@@ -1,4 +1,4 @@
from __future__ import print_function
import random, operator, itertools, math
"""

View File

@@ -29,10 +29,10 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Created by Greg Landrum and Anna Vulpetti, March 2009
from __future__ import print_function
from rdkit import Chem
from rdkit.Chem import BRICS
import sys, cPickle, re
import sys, pickle, re
inF = file(sys.argv[1], 'r')
inLs = inF.readlines()

View File

@@ -29,10 +29,10 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Created by Greg Landrum and Anna Vulpetti, March 2009
from __future__ import print_function
from rdkit.ML.Cluster import Butina
from rdkit import DataStructs
import sys, cPickle
import sys, pickle
# sims is the list of similarity thresholds used to generate clusters
sims = [.9, .8, .7, .6]
@@ -42,8 +42,8 @@ uFps = []
for fileN in sys.argv[1:]:
inF = file(sys.argv[1], 'r')
cols = cPickle.load(inF)
fps = cPickle.load(inF)
cols = pickle.load(inF)
fps = pickle.load(inF)
for row in fps:
nm, smi, fp = row[:3]

View File

@@ -29,12 +29,12 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Created by Greg Landrum and Anna Vulpetti, March 2009
from __future__ import print_function
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem.AtomPairs import Pairs, Torsions
import sys, cPickle
import sys, pickle
# maxPathLength is the maximum path length in atoms
# maxPathLength=6 corresponds to F-FP-5
@@ -92,8 +92,8 @@ if __name__ == '__main__':
nm = mol.GetProp(nameField)
fps.append([nm, smi, fp] + queryMatches)
colNames = ['name', 'smiles', 'fp'] + [x for x, y in extraQueries]
cPickle.dump(colNames, outF)
cPickle.dump(fps, outF)
pickle.dump(colNames, outF)
pickle.dump(fps, outF)
print('name1 smiles1 name2 smiles2 name12 smiles12 environment_id ' + ' '.join(
[x for x, y in extraQueries]))

View File

@@ -17,7 +17,7 @@
# - sd files containing all molecules belonging to one frame (1.sdf, 2.sdf etc)
# - frames.smi containing the (caninical) smiles and count of occurrence
#
from __future__ import print_function
import os, sys
from Chem import AllChem as Chem

View File

@@ -13,7 +13,7 @@
# peter ertl, august 2015
#
from __future__ import print_function
from rdkit import Chem
from rdkit.Chem import rdMolDescriptors
import sys, math, gzip, pickle

View File

@@ -32,7 +32,7 @@
from __future__ import print_function
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import rdqueries

View File

@@ -1,4 +1,4 @@
from __future__ import print_function
from rdkit import RDConfig
from rdkit import Chem
import unittest, os.path

View File

@@ -15,12 +15,11 @@
#
# peter ertl & greg landrum, september 2013
#
from __future__ import print_function
from rdkit import Chem
from rdkit.Chem import rdMolDescriptors
from rdkit.six.moves import cPickle
from rdkit.six import iteritems
import pickle
import math
from collections import defaultdict
@@ -31,132 +30,133 @@ _fscores = None
def readFragmentScores(name='fpscores'):
import gzip
global _fscores
# generate the full path filename:
if name == "fpscores":
name = op.join(op.dirname(__file__), name)
_fscores = cPickle.load(gzip.open('%s.pkl.gz' % name))
outDict = {}
for i in _fscores:
for j in range(1, len(i)):
outDict[i[j]] = float(i[0])
_fscores = outDict
import gzip
global _fscores
# generate the full path filename:
if name == "fpscores":
name = op.join(op.dirname(__file__), name)
_fscores = pickle.load(gzip.open('%s.pkl.gz' % name))
outDict = {}
for i in _fscores:
for j in range(1, len(i)):
outDict[i[j]] = float(i[0])
_fscores = outDict
def numBridgeheadsAndSpiro(mol, ri=None):
nSpiro = rdMolDescriptors.CalcNumSpiroAtoms(mol)
nBridgehead = rdMolDescriptors.CalcNumBridgeheadAtoms(mol)
return nBridgehead, nSpiro
nSpiro = rdMolDescriptors.CalcNumSpiroAtoms(mol)
nBridgehead = rdMolDescriptors.CalcNumBridgeheadAtoms(mol)
return nBridgehead, nSpiro
def calculateScore(m):
if _fscores is None:
readFragmentScores()
if _fscores is None:
readFragmentScores()
# fragment score
fp = rdMolDescriptors.GetMorganFingerprint(m,
2) #<- 2 is the *radius* of the circular fingerprint
fps = fp.GetNonzeroElements()
score1 = 0.
nf = 0
for bitId, v in iteritems(fps):
nf += v
sfp = bitId
score1 += _fscores.get(sfp, -4) * v
score1 /= nf
# fragment score
fp = rdMolDescriptors.GetMorganFingerprint(m,
2) # <- 2 is the *radius* of the circular fingerprint
fps = fp.GetNonzeroElements()
score1 = 0.
nf = 0
for bitId, v in fps.items():
nf += v
sfp = bitId
score1 += _fscores.get(sfp, -4) * v
score1 /= nf
# features score
nAtoms = m.GetNumAtoms()
nChiralCenters = len(Chem.FindMolChiralCenters(m, includeUnassigned=True))
ri = m.GetRingInfo()
nBridgeheads, nSpiro = numBridgeheadsAndSpiro(m, ri)
nMacrocycles = 0
for x in ri.AtomRings():
if len(x) > 8:
nMacrocycles += 1
# features score
nAtoms = m.GetNumAtoms()
nChiralCenters = len(Chem.FindMolChiralCenters(m, includeUnassigned=True))
ri = m.GetRingInfo()
nBridgeheads, nSpiro = numBridgeheadsAndSpiro(m, ri)
nMacrocycles = 0
for x in ri.AtomRings():
if len(x) > 8:
nMacrocycles += 1
sizePenalty = nAtoms**1.005 - nAtoms
stereoPenalty = math.log10(nChiralCenters + 1)
spiroPenalty = math.log10(nSpiro + 1)
bridgePenalty = math.log10(nBridgeheads + 1)
macrocyclePenalty = 0.
# ---------------------------------------
# This differs from the paper, which defines:
# macrocyclePenalty = math.log10(nMacrocycles+1)
# This form generates better results when 2 or more macrocycles are present
if nMacrocycles > 0:
macrocyclePenalty = math.log10(2)
sizePenalty = nAtoms**1.005 - nAtoms
stereoPenalty = math.log10(nChiralCenters + 1)
spiroPenalty = math.log10(nSpiro + 1)
bridgePenalty = math.log10(nBridgeheads + 1)
macrocyclePenalty = 0.
# ---------------------------------------
# This differs from the paper, which defines:
# macrocyclePenalty = math.log10(nMacrocycles+1)
# This form generates better results when 2 or more macrocycles are present
if nMacrocycles > 0:
macrocyclePenalty = math.log10(2)
score2 = 0. - sizePenalty - stereoPenalty - spiroPenalty - bridgePenalty - macrocyclePenalty
score2 = 0. - sizePenalty - stereoPenalty - spiroPenalty - bridgePenalty - macrocyclePenalty
# correction for the fingerprint density
# not in the original publication, added in version 1.1
# to make highly symmetrical molecules easier to synthetise
score3 = 0.
if nAtoms > len(fps):
score3 = math.log(float(nAtoms) / len(fps)) * .5
# correction for the fingerprint density
# not in the original publication, added in version 1.1
# to make highly symmetrical molecules easier to synthetise
score3 = 0.
if nAtoms > len(fps):
score3 = math.log(float(nAtoms) / len(fps)) * .5
sascore = score1 + score2 + score3
sascore = score1 + score2 + score3
# need to transform "raw" value into scale between 1 and 10
min = -4.0
max = 2.5
sascore = 11. - (sascore - min + 1) / (max - min) * 9.
# smooth the 10-end
if sascore > 8.:
sascore = 8. + math.log(sascore + 1. - 9.)
if sascore > 10.:
sascore = 10.0
elif sascore < 1.:
sascore = 1.0
# need to transform "raw" value into scale between 1 and 10
min = -4.0
max = 2.5
sascore = 11. - (sascore - min + 1) / (max - min) * 9.
# smooth the 10-end
if sascore > 8.:
sascore = 8. + math.log(sascore + 1. - 9.)
if sascore > 10.:
sascore = 10.0
elif sascore < 1.:
sascore = 1.0
return sascore
return sascore
def processMols(mols):
print('smiles\tName\tsa_score')
for i, m in enumerate(mols):
if m is None:
continue
print('smiles\tName\tsa_score')
for i, m in enumerate(mols):
if m is None:
continue
s = calculateScore(m)
s = calculateScore(m)
smiles = Chem.MolToSmiles(m)
print(smiles + "\t" + m.GetProp('_Name') + "\t%3f" % s)
smiles = Chem.MolToSmiles(m)
print(smiles + "\t" + m.GetProp('_Name') + "\t%3f" % s)
if __name__ == '__main__':
import sys, time
import sys
import time
t1 = time.time()
readFragmentScores("fpscores")
t2 = time.time()
t1 = time.time()
readFragmentScores("fpscores")
t2 = time.time()
suppl = Chem.SmilesMolSupplier(sys.argv[1])
t3 = time.time()
processMols(suppl)
t4 = time.time()
suppl = Chem.SmilesMolSupplier(sys.argv[1])
t3 = time.time()
processMols(suppl)
t4 = time.time()
print('Reading took %.2f seconds. Calculating took %.2f seconds' % ((t2 - t1), (t4 - t3)),
file=sys.stderr)
print('Reading took %.2f seconds. Calculating took %.2f seconds' % ((t2 - t1), (t4 - t3)),
file=sys.stderr)
#
# Copyright (c) 2013, Novartis Institutes for BioMedical Research Inc.
# All rights reserved.
#
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
# met:
#
# * Redistributions of source code must retain the above copyright
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above
# copyright notice, this list of conditions and the following
# disclaimer in the documentation and/or other materials provided
# copyright notice, this list of conditions and the following
# disclaimer in the documentation and/or other materials provided
# with the distribution.
# * Neither the name of Novartis Institutes for BioMedical Research Inc.
# nor the names of its contributors may be used to endorse or promote
# * Neither the name of Novartis Institutes for BioMedical Research Inc.
# nor the names of its contributors may be used to endorse or promote
# products derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

View File

@@ -29,7 +29,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Created by Jameed Hussain, May 2013
from __future__ import print_function
import sys
from optparse import OptionParser
from rdkit import Chem

View File

@@ -31,7 +31,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Created by Jameed Hussain, July 2013
from __future__ import print_function
import sys
import re

View File

@@ -29,7 +29,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Created by Jameed Hussain, May 2013
from __future__ import print_function
from rdkit import Chem
from rdkit.Chem.Fraggle import FraggleSim

View File

@@ -29,7 +29,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Created by Jameed Hussain, October 2013
from __future__ import print_function
import sys
import re
from optparse import OptionParser

View File

@@ -29,7 +29,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Created by Jameed Hussain, September 2012
from __future__ import print_function
import sys
import re
from rdkit import Chem

View File

@@ -29,7 +29,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Created by Jameed Hussain, September 2012
from __future__ import print_function
import sys
import re
from rdkit import Chem

View File

@@ -29,7 +29,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Created by Jameed Hussain, July 2013
from __future__ import print_function
import sys
import re
from optparse import OptionParser

View File

@@ -32,7 +32,7 @@
#
# Modifications and optimizations by Greg Landrum, July 2015
#
from __future__ import print_function
import sys
import re
from rdkit import Chem

View File

@@ -29,7 +29,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Created by Jameed Hussain, July 2013
from __future__ import print_function
import sys
import os
import re

View File

@@ -1,6 +1,6 @@
# coding=utf-8
# Copyright (c) 2014 Merck KGaA
from __future__ import print_function
import os, re, gzip, json, requests, sys, optparse, csv
from rdkit import Chem
from rdkit.Chem import AllChem
@@ -16,7 +16,7 @@ from sklearn.cross_validation import train_test_split
from sklearn.metrics import roc_curve, auc
from sklearn.metrics import precision_score, recall_score
from sklearn import preprocessing
import cPickle
import pickle
from pickle import Unpickler
import numpy as np
import math
@@ -1207,8 +1207,8 @@ table th[class*="col-"] {
return
def save_model(self, outfile, model_number=0):
"""save Model to file using cPickle.dump"""
cPickle.dump(self.model[model_number], file(outfile, "wb+"))
"""save Model to file using pickle.dump"""
pickle.dump(self.model[model_number], file(outfile, "wb+"))
return
def load_models(self, model_files):