mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-03 21:44:30 +08:00
Get things working with numpy 2.4 and pandas 3.0 (#9072)
* get BertzCT working with numpy 2.4 * test pass with with Pandas 3.0 (on windows at least) * update testRanker too * update nb test * run win32 CI tests with different pandas versions also updates boost version * works with pandas 2.0? * update linux_build_py311 -> linux_build_py312 test both old and new pandas can't go higher with the python version yet because the older pandas and numpy are not available. * doctest fix? --------- Co-authored-by: = <=>
This commit is contained in:
committed by
greg landrum
parent
56eb0dfa81
commit
3444408692
@@ -7,9 +7,11 @@ steps:
|
||||
conda update -q conda
|
||||
conda info -a
|
||||
conda create --name rdkit_build -c conda-forge --override-channels $(python) cmake \
|
||||
boost-cpp=$(boost_version) \
|
||||
boost=$(boost_version) \
|
||||
numpy=1.24.3 pillow eigen pandas=2.1 matplotlib-base=3.8 \
|
||||
libboost-python-devel=$(boost_version) \
|
||||
libboost-python=$(boost_version) \
|
||||
libboost-devel=$(boost_version) \
|
||||
libboost=$(boost_version) \
|
||||
numpy=2.4 pillow eigen pandas=3 matplotlib-base=3.8 \
|
||||
cairo
|
||||
conda activate rdkit_build
|
||||
conda config --env --add channels conda-forge
|
||||
@@ -66,6 +68,18 @@ steps:
|
||||
cd build
|
||||
ctest -j $( $(number_of_cores) ) --output-on-failure -T Test
|
||||
displayName: Run tests
|
||||
- bash: |
|
||||
source ${CONDA}/etc/profile.d/conda.sh
|
||||
conda activate rdkit_build
|
||||
conda install -c conda-forge --override-channels numpy=1.24 pandas=2.2
|
||||
export RDBASE=`pwd`
|
||||
export PYTHONPATH=${RDBASE}:${PYTHONPATH}
|
||||
export LD_LIBRARY_PATH=${RDBASE}/lib:${CONDA_PREFIX}/lib:${LD_LIBRARY_PATH}
|
||||
echo "LD_LIBRARY_PATH: " $LD_LIBRARY_PATH
|
||||
export QT_QPA_PLATFORM='offscreen'
|
||||
cd build
|
||||
ctest -j $( $(number_of_cores) ) --output-on-failure -T Test
|
||||
displayName: Run tests with older numpy and pandas
|
||||
- bash: |
|
||||
source ${CONDA}/etc/profile.d/conda.sh
|
||||
conda activate rdkit_build
|
||||
@@ -7,11 +7,12 @@ steps:
|
||||
conda install -n base conda-libmamba-solver
|
||||
conda config --set solver libmamba
|
||||
conda create --name rdkit_build -c conda-forge --override-channels $(python) ^
|
||||
boost=$(boost_version) boost-cpp=$(boost_version) ^
|
||||
libboost-python=(boost_version) ^
|
||||
libboost-python-devel=(boost_version) ^
|
||||
libboost=$(boost_version) ^
|
||||
libboost-devel=$(boost_version) ^
|
||||
numpy matplotlib cairo pillow eigen pandas=2.1 ^
|
||||
numpy matplotlib cairo pillow eigen ^
|
||||
numpy=2.4 pandas=3 ^
|
||||
sphinx myst-parser ipython jupyter pytest nbval cmake
|
||||
call activate rdkit_build
|
||||
conda config --env --add channels conda-forge
|
||||
@@ -59,6 +60,15 @@ steps:
|
||||
cd build
|
||||
ctest -C Release -j $(number_of_cores) --output-on-failure -T Test
|
||||
displayName: Run tests
|
||||
- script: |
|
||||
call activate rdkit_build
|
||||
conda install -c conda-forge --override-channels numpy=1.26 pandas=2.2
|
||||
set RDBASE=%cd%
|
||||
set PYTHONPATH=%RDBASE%;%PYTHONPATH%
|
||||
set PATH=%RDBASE%\lib;%PATH%
|
||||
cd build
|
||||
ctest -C Release -j $(number_of_cores) --output-on-failure -T Test
|
||||
displayName: Run with older numpy and pandas
|
||||
- script: |
|
||||
call activate rdkit_build
|
||||
conda install -c conda-forge --override-channels sphinx myst-parser
|
||||
|
||||
@@ -19,33 +19,33 @@ class TestCase(unittest.TestCase):
|
||||
pass
|
||||
|
||||
def test0GainFuns(self):
|
||||
arr = numpy.array([9, 5])
|
||||
arr = numpy.array([9, 5],float)
|
||||
self.assertTrue(feq(rdit.InfoEntropy(arr), 0.9403))
|
||||
arr = numpy.array([9, 9])
|
||||
arr = numpy.array([9, 9],float)
|
||||
self.assertTrue(feq(rdit.InfoEntropy(arr), 1.0000))
|
||||
arr = numpy.array([5, 5])
|
||||
arr = numpy.array([5, 5],float)
|
||||
self.assertTrue(feq(rdit.InfoEntropy(arr), 1.0000))
|
||||
arr = numpy.array([5, 0])
|
||||
arr = numpy.array([5, 0],float)
|
||||
self.assertTrue(feq(rdit.InfoEntropy(arr), 0.0000))
|
||||
arr = numpy.array([5, 5, 5])
|
||||
arr = numpy.array([5, 5, 5],float)
|
||||
self.assertTrue(feq(rdit.InfoEntropy(arr), 1.5850))
|
||||
arr = numpy.array([2, 5, 5])
|
||||
arr = numpy.array([2, 5, 5],float)
|
||||
self.assertTrue(feq(rdit.InfoEntropy(arr), 1.4834))
|
||||
|
||||
mat2 = numpy.array([[6, 2], [3, 3]])
|
||||
mat2 = numpy.array([[6, 2], [3, 3]],float)
|
||||
self.assertTrue(feq(rdit.InfoGain(mat2), 0.0481))
|
||||
self.assertTrue(feq(rdit.ChiSquare(mat2), 0.9333))
|
||||
|
||||
mat3 = numpy.array([[1, 1], [2, 1]])
|
||||
mat3 = numpy.array([[1, 1], [2, 1]],float)
|
||||
self.assertTrue(feq(rdit.InfoGain(mat3), 0.0200))
|
||||
|
||||
mat4 = numpy.array([[2, 0], [1, 2]])
|
||||
mat4 = numpy.array([[2, 0], [1, 2]],float)
|
||||
self.assertTrue(feq(rdit.InfoGain(mat4), 0.4200))
|
||||
|
||||
mat5 = numpy.array([[0, 0], [0, 0]])
|
||||
mat5 = numpy.array([[0, 0], [0, 0]],float)
|
||||
self.assertTrue(feq(rdit.InfoGain(mat5), 0.0000))
|
||||
|
||||
mat6 = numpy.array([[1, 0], [1, 0]])
|
||||
mat6 = numpy.array([[1, 0], [1, 0]],float)
|
||||
self.assertTrue(feq(rdit.InfoGain(mat6), 0.0000))
|
||||
|
||||
def test1ranker(self):
|
||||
|
||||
@@ -3500,7 +3500,7 @@ These are accessible using Python's help command:
|
||||
>>> m.GetNumAtoms()
|
||||
7
|
||||
>>> help(m.GetNumAtoms)
|
||||
Help on method GetNumAtoms:
|
||||
Help on method GetNumAtoms...
|
||||
<BLANKLINE>
|
||||
GetNumAtoms(...) method of rdkit.Chem.rdchem.Mol instance
|
||||
GetNumAtoms( (Mol)self [, (int)onlyHeavy=-1 [, (bool)onlyExplicit=True]]) -> int :
|
||||
|
||||
@@ -32,20 +32,20 @@ jobs:
|
||||
cxx: g++-11
|
||||
steps:
|
||||
- template: .azure-pipelines/linux_build.yml
|
||||
- job: Ubuntu_x64_py311
|
||||
- job: Ubuntu_x64_py312
|
||||
timeoutInMinutes: 120
|
||||
pool:
|
||||
vmImage: ubuntu-latest
|
||||
variables:
|
||||
python: python=3.11
|
||||
boost_version: 1.82.0
|
||||
python: python=3.12
|
||||
boost_version: 1.89.0
|
||||
compiler: gxx_linux-64
|
||||
cc: gcc-13
|
||||
cxx: g++-13
|
||||
number_of_cores: nproc
|
||||
python_name: python311
|
||||
python_name: python312
|
||||
steps:
|
||||
- template: .azure-pipelines/linux_build_py311.yml
|
||||
- template: .azure-pipelines/linux_build_py312.yml
|
||||
- job: macOS_x64
|
||||
timeoutInMinutes: 120
|
||||
pool:
|
||||
|
||||
@@ -229,7 +229,7 @@ def Chi0(mol):
|
||||
deltas = [x.GetDegree() for x in mol.GetAtoms()]
|
||||
while 0 in deltas:
|
||||
deltas.remove(0)
|
||||
deltas = numpy.array(deltas, 'd')
|
||||
deltas = numpy.array(deltas, float)
|
||||
res = sum(numpy.sqrt(1. / deltas))
|
||||
return res
|
||||
|
||||
@@ -244,7 +244,7 @@ def Chi1(mol):
|
||||
c1s = [x.GetBeginAtom().GetDegree() * x.GetEndAtom().GetDegree() for x in mol.GetBonds()]
|
||||
while 0 in c1s:
|
||||
c1s.remove(0)
|
||||
c1s = numpy.array(c1s, 'd')
|
||||
c1s = numpy.array(c1s, float)
|
||||
res = sum(numpy.sqrt(1. / c1s))
|
||||
return res
|
||||
|
||||
@@ -320,7 +320,7 @@ def _pyChiNv_(mol, order=2):
|
||||
for hkd in _hkDeltas(mol, skipHs=0)])
|
||||
accum = 0.0
|
||||
for path in Chem.FindAllPathsOfLengthN(mol, order + 1, useBonds=0):
|
||||
accum += numpy.prod(deltas[numpy.array(path)])
|
||||
accum += numpy.prod(deltas[numpy.array(path)],float)
|
||||
return accum
|
||||
|
||||
|
||||
@@ -358,7 +358,7 @@ def _pyChi0n(mol):
|
||||
deltas = [_nVal(x) for x in mol.GetAtoms()]
|
||||
while deltas.count(0):
|
||||
deltas.remove(0)
|
||||
deltas = numpy.array(deltas, 'd')
|
||||
deltas = numpy.array(deltas, float)
|
||||
res = sum(numpy.sqrt(1. / deltas))
|
||||
return res
|
||||
|
||||
@@ -367,7 +367,7 @@ def _pyChi1n(mol):
|
||||
""" Similar to Hall Kier Chi1v, but uses nVal instead of valence
|
||||
|
||||
"""
|
||||
delts = numpy.array([_nVal(x) for x in mol.GetAtoms()], 'd')
|
||||
delts = numpy.array([_nVal(x) for x in mol.GetAtoms()], float)
|
||||
res = 0.0
|
||||
for bond in mol.GetBonds():
|
||||
v = delts[bond.GetBeginAtomIdx()] * delts[bond.GetEndAtomIdx()]
|
||||
@@ -391,7 +391,7 @@ def _pyChiNn_(mol, order=2):
|
||||
deltas = numpy.array([(1. / numpy.sqrt(x) if x else 0.0) for x in nval])
|
||||
accum = 0.0
|
||||
for path in Chem.FindAllPathsOfLengthN(mol, order + 1, useBonds=0):
|
||||
accum += numpy.prod(deltas[numpy.array(path)])
|
||||
accum += numpy.prod(deltas[numpy.array(path)],float)
|
||||
return accum
|
||||
|
||||
|
||||
@@ -578,10 +578,10 @@ def _CalculateEntropies(connectionDict, atomTypeDict, numAtoms):
|
||||
"""
|
||||
connectionList = list(connectionDict.values())
|
||||
totConnections = sum(connectionList)
|
||||
connectionIE = totConnections * (entropy.InfoEntropy(numpy.array(connectionList)) +
|
||||
connectionIE = totConnections * (entropy.InfoEntropy(numpy.array(connectionList, float)) +
|
||||
math.log(totConnections) / _log2val)
|
||||
atomTypeList = list(atomTypeDict.values())
|
||||
atomTypeIE = numAtoms * entropy.InfoEntropy(numpy.array(atomTypeList))
|
||||
atomTypeIE = numAtoms * entropy.InfoEntropy(numpy.array(atomTypeList, float))
|
||||
return atomTypeIE + connectionIE
|
||||
|
||||
|
||||
|
||||
@@ -88,12 +88,13 @@ except ImportError:
|
||||
log.warning("Failed to import pandas")
|
||||
raise
|
||||
|
||||
dataframe_applymap = pd.DataFrame.applymap
|
||||
try:
|
||||
if tuple(map(int, (pd.__version__.split(".")))) >= (2, 1, 0):
|
||||
if tuple(map(int, (pd.__version__.split(".")))) < (2, 1, 0):
|
||||
dataframe_applymap = pd.DataFrame.applymap
|
||||
else:
|
||||
dataframe_applymap = pd.DataFrame.map
|
||||
except:
|
||||
pass
|
||||
log.warning("Failed to find a suitable map function for data frames")
|
||||
|
||||
orig_to_html = getattr(to_html_class, "to_html")
|
||||
pprint_thing = pandas_formats.printing.pprint_thing
|
||||
@@ -138,7 +139,7 @@ class MolFormatter:
|
||||
@classmethod
|
||||
def get_formatters(cls, df, orig_formatters):
|
||||
"""Return an instance of MolFormatter for each column that contains Chem.Mol objects"""
|
||||
df_subset = df.select_dtypes("object")
|
||||
df_subset = df.select_dtypes(["object", "string"])
|
||||
return {
|
||||
col: cls(orig_formatters.get(col, None))
|
||||
for col in df_subset.columns[dataframe_applymap(df_subset, MolFormatter.is_mol).any()]
|
||||
|
||||
@@ -431,7 +431,7 @@ def WriteSDF(df, out, molColName='ROMol', idName=None, properties=None, allNumer
|
||||
if allNumeric:
|
||||
properties.extend([
|
||||
dt for dt in df.dtypes.keys()
|
||||
if (np.issubdtype(df.dtypes[dt], np.floating) or np.issubdtype(df.dtypes[dt], np.integer))
|
||||
if not pd.api.types.is_string_dtype(df.dtypes[dt]) and (np.issubdtype(df.dtypes[dt], np.floating) or np.issubdtype(df.dtypes[dt], np.integer))
|
||||
])
|
||||
|
||||
if molColName in properties:
|
||||
|
||||
@@ -212,7 +212,7 @@ class TestPandasTools(unittest.TestCase):
|
||||
|
||||
@unittest.skipIf(not hasattr(rdMolDraw2D, 'MolDraw2DCairo'), 'Cairo not available')
|
||||
def testPandasShouldShowMoleculesWhenTruncating(self):
|
||||
csv_data = '''"Molecule ChEMBL ID";"Molecule Name";"Molecule Max Phase";"Molecular Weight";"#RO5 Violations";"AlogP";"Compound Key";"Smiles";"Standard Type";"Standard Relation";"Standard Value";"Standard Units";"pChEMBL Value";"Data Validity Comment";"Comment";"Uo Units";"Ligand Efficiency BEI";"Ligand Efficiency LE";"Ligand Efficiency LLE";"Ligand Efficiency SEI";"Potential Duplicate";"Assay ChEMBL ID";"Assay Description";"Assay Type";"BAO Format ID";"BAO Label";"Assay Organism";"Assay Tissue ChEMBL ID";"Assay Tissue Name";"Assay Cell Type";"Assay Subcellular Fraction";"Target ChEMBL ID";"Target Name";"Target Organism";"Target Type";"Document ChEMBL ID";"Source ID";"Source Description";"Document Journal";"Document Year";"Cell ChEMBL ID"
|
||||
csv_data = r'''"Molecule ChEMBL ID";"Molecule Name";"Molecule Max Phase";"Molecular Weight";"#RO5 Violations";"AlogP";"Compound Key";"Smiles";"Standard Type";"Standard Relation";"Standard Value";"Standard Units";"pChEMBL Value";"Data Validity Comment";"Comment";"Uo Units";"Ligand Efficiency BEI";"Ligand Efficiency LE";"Ligand Efficiency LLE";"Ligand Efficiency SEI";"Potential Duplicate";"Assay ChEMBL ID";"Assay Description";"Assay Type";"BAO Format ID";"BAO Label";"Assay Organism";"Assay Tissue ChEMBL ID";"Assay Tissue Name";"Assay Cell Type";"Assay Subcellular Fraction";"Target ChEMBL ID";"Target Name";"Target Organism";"Target Type";"Document ChEMBL ID";"Source ID";"Source Description";"Document Journal";"Document Year";"Cell ChEMBL ID"
|
||||
"CHEMBL543779";"";"0";"341.86";"0";"2.60";"1w";"CCN(CC)CCS/C(=N\O)C(=O)c1ccc(C#N)cc1.Cl";"IC50";"'='";"180000.0";"nM";"";"Outside typical range";"";"UO_0000065";"";"";"";"";"False";"CHEMBL644102";"Reversible inhibition of Human AchE";"B";"BAO_0000357";"single protein format";"None";"None";"None";"None";"None";"CHEMBL220";"Acetylcholinesterase";"Homo sapiens";"SINGLE PROTEIN";"CHEMBL1123431";"1";"Scientific Literature";"J. Med. Chem.";"1986";"None"
|
||||
'''
|
||||
try:
|
||||
|
||||
@@ -130,7 +130,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 5,
|
||||
"id": "permanent-liechtenstein",
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
@@ -139,36 +139,51 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Index(['AMW', 'CLOGP', 'CP', 'CR', 'DAYLIGHT.FPG', 'DAYLIGHT_CLOGP', 'FP',\n",
|
||||
" 'ISM', 'LIPINSKI_VIOLATIONS', 'NUM_HACCEPTORS', 'NUM_HDONORS',\n",
|
||||
" 'NUM_HETEROATOMS', 'NUM_LIPINSKIHACCEPTORS', 'NUM_LIPINSKIHDONORS',\n",
|
||||
" 'NUM_RINGS', 'NUM_ROTATABLEBONDS', 'NUM_ROTATABLEBONDS_O', 'P1',\n",
|
||||
" 'SMILES', 'ID', 'ROMol'],\n",
|
||||
" dtype='object')"
|
||||
"['AMW',\n",
|
||||
" 'CLOGP',\n",
|
||||
" 'CP',\n",
|
||||
" 'CR',\n",
|
||||
" 'DAYLIGHT.FPG',\n",
|
||||
" 'DAYLIGHT_CLOGP',\n",
|
||||
" 'FP',\n",
|
||||
" 'ISM',\n",
|
||||
" 'LIPINSKI_VIOLATIONS',\n",
|
||||
" 'NUM_HACCEPTORS',\n",
|
||||
" 'NUM_HDONORS',\n",
|
||||
" 'NUM_HETEROATOMS',\n",
|
||||
" 'NUM_LIPINSKIHACCEPTORS',\n",
|
||||
" 'NUM_LIPINSKIHDONORS',\n",
|
||||
" 'NUM_RINGS',\n",
|
||||
" 'NUM_ROTATABLEBONDS',\n",
|
||||
" 'NUM_ROTATABLEBONDS_O',\n",
|
||||
" 'P1',\n",
|
||||
" 'SMILES',\n",
|
||||
" 'ID',\n",
|
||||
" 'ROMol']"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df.columns"
|
||||
"list(df.columns)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 6,
|
||||
"id": "careful-netherlands",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"<module 'rdkit.Chem.PandasTools' from '/scratch/RDKit_git/rdkit/Chem/PandasTools.py'>"
|
||||
"<module 'rdkit.Chem.PandasTools' from '/localhome/glandrum/RDKit_git/rdkit/Chem/PandasTools.py'>"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -181,7 +196,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 7,
|
||||
"id": "identical-finder",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -261,7 +276,7 @@
|
||||
"4 223.231 2.43 1.869;-0P;4.71 6.390;-0R;4.71"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -281,7 +296,7 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"display_name": "py312_build",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@@ -295,7 +310,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.8"
|
||||
"version": "3.12.3"
|
||||
},
|
||||
"toc": {
|
||||
"base_numbering": 1,
|
||||
|
||||
Reference in New Issue
Block a user