Get things working with numpy 2.4 and pandas 3.0 (#9072)

* get BertzCT working with numpy 2.4

* test pass with with Pandas 3.0
(on windows at least)

* update testRanker too

* update nb test

* run win32 CI tests with different pandas versions
also updates boost version

* works with pandas 2.0?

* update linux_build_py311 -> linux_build_py312
test both old and new pandas

can't go higher with the python version yet because the older pandas and numpy are not available.

* doctest fix?

---------

Co-authored-by: = <=>
This commit is contained in:
Greg Landrum
2026-02-04 12:06:21 +01:00
committed by greg landrum
parent 56eb0dfa81
commit 3444408692
10 changed files with 92 additions and 52 deletions

View File

@@ -7,9 +7,11 @@ steps:
conda update -q conda
conda info -a
conda create --name rdkit_build -c conda-forge --override-channels $(python) cmake \
boost-cpp=$(boost_version) \
boost=$(boost_version) \
numpy=1.24.3 pillow eigen pandas=2.1 matplotlib-base=3.8 \
libboost-python-devel=$(boost_version) \
libboost-python=$(boost_version) \
libboost-devel=$(boost_version) \
libboost=$(boost_version) \
numpy=2.4 pillow eigen pandas=3 matplotlib-base=3.8 \
cairo
conda activate rdkit_build
conda config --env --add channels conda-forge
@@ -66,6 +68,18 @@ steps:
cd build
ctest -j $( $(number_of_cores) ) --output-on-failure -T Test
displayName: Run tests
- bash: |
source ${CONDA}/etc/profile.d/conda.sh
conda activate rdkit_build
conda install -c conda-forge --override-channels numpy=1.24 pandas=2.2
export RDBASE=`pwd`
export PYTHONPATH=${RDBASE}:${PYTHONPATH}
export LD_LIBRARY_PATH=${RDBASE}/lib:${CONDA_PREFIX}/lib:${LD_LIBRARY_PATH}
echo "LD_LIBRARY_PATH: " $LD_LIBRARY_PATH
export QT_QPA_PLATFORM='offscreen'
cd build
ctest -j $( $(number_of_cores) ) --output-on-failure -T Test
displayName: Run tests with older numpy and pandas
- bash: |
source ${CONDA}/etc/profile.d/conda.sh
conda activate rdkit_build

View File

@@ -7,11 +7,12 @@ steps:
conda install -n base conda-libmamba-solver
conda config --set solver libmamba
conda create --name rdkit_build -c conda-forge --override-channels $(python) ^
boost=$(boost_version) boost-cpp=$(boost_version) ^
libboost-python=(boost_version) ^
libboost-python-devel=(boost_version) ^
libboost=$(boost_version) ^
libboost-devel=$(boost_version) ^
numpy matplotlib cairo pillow eigen pandas=2.1 ^
numpy matplotlib cairo pillow eigen ^
numpy=2.4 pandas=3 ^
sphinx myst-parser ipython jupyter pytest nbval cmake
call activate rdkit_build
conda config --env --add channels conda-forge
@@ -59,6 +60,15 @@ steps:
cd build
ctest -C Release -j $(number_of_cores) --output-on-failure -T Test
displayName: Run tests
- script: |
call activate rdkit_build
conda install -c conda-forge --override-channels numpy=1.26 pandas=2.2
set RDBASE=%cd%
set PYTHONPATH=%RDBASE%;%PYTHONPATH%
set PATH=%RDBASE%\lib;%PATH%
cd build
ctest -C Release -j $(number_of_cores) --output-on-failure -T Test
displayName: Run with older numpy and pandas
- script: |
call activate rdkit_build
conda install -c conda-forge --override-channels sphinx myst-parser

View File

@@ -19,33 +19,33 @@ class TestCase(unittest.TestCase):
pass
def test0GainFuns(self):
arr = numpy.array([9, 5])
arr = numpy.array([9, 5],float)
self.assertTrue(feq(rdit.InfoEntropy(arr), 0.9403))
arr = numpy.array([9, 9])
arr = numpy.array([9, 9],float)
self.assertTrue(feq(rdit.InfoEntropy(arr), 1.0000))
arr = numpy.array([5, 5])
arr = numpy.array([5, 5],float)
self.assertTrue(feq(rdit.InfoEntropy(arr), 1.0000))
arr = numpy.array([5, 0])
arr = numpy.array([5, 0],float)
self.assertTrue(feq(rdit.InfoEntropy(arr), 0.0000))
arr = numpy.array([5, 5, 5])
arr = numpy.array([5, 5, 5],float)
self.assertTrue(feq(rdit.InfoEntropy(arr), 1.5850))
arr = numpy.array([2, 5, 5])
arr = numpy.array([2, 5, 5],float)
self.assertTrue(feq(rdit.InfoEntropy(arr), 1.4834))
mat2 = numpy.array([[6, 2], [3, 3]])
mat2 = numpy.array([[6, 2], [3, 3]],float)
self.assertTrue(feq(rdit.InfoGain(mat2), 0.0481))
self.assertTrue(feq(rdit.ChiSquare(mat2), 0.9333))
mat3 = numpy.array([[1, 1], [2, 1]])
mat3 = numpy.array([[1, 1], [2, 1]],float)
self.assertTrue(feq(rdit.InfoGain(mat3), 0.0200))
mat4 = numpy.array([[2, 0], [1, 2]])
mat4 = numpy.array([[2, 0], [1, 2]],float)
self.assertTrue(feq(rdit.InfoGain(mat4), 0.4200))
mat5 = numpy.array([[0, 0], [0, 0]])
mat5 = numpy.array([[0, 0], [0, 0]],float)
self.assertTrue(feq(rdit.InfoGain(mat5), 0.0000))
mat6 = numpy.array([[1, 0], [1, 0]])
mat6 = numpy.array([[1, 0], [1, 0]],float)
self.assertTrue(feq(rdit.InfoGain(mat6), 0.0000))
def test1ranker(self):

View File

@@ -3500,7 +3500,7 @@ These are accessible using Python's help command:
>>> m.GetNumAtoms()
7
>>> help(m.GetNumAtoms)
Help on method GetNumAtoms:
Help on method GetNumAtoms...
<BLANKLINE>
GetNumAtoms(...) method of rdkit.Chem.rdchem.Mol instance
GetNumAtoms( (Mol)self [, (int)onlyHeavy=-1 [, (bool)onlyExplicit=True]]) -> int :

View File

@@ -32,20 +32,20 @@ jobs:
cxx: g++-11
steps:
- template: .azure-pipelines/linux_build.yml
- job: Ubuntu_x64_py311
- job: Ubuntu_x64_py312
timeoutInMinutes: 120
pool:
vmImage: ubuntu-latest
variables:
python: python=3.11
boost_version: 1.82.0
python: python=3.12
boost_version: 1.89.0
compiler: gxx_linux-64
cc: gcc-13
cxx: g++-13
number_of_cores: nproc
python_name: python311
python_name: python312
steps:
- template: .azure-pipelines/linux_build_py311.yml
- template: .azure-pipelines/linux_build_py312.yml
- job: macOS_x64
timeoutInMinutes: 120
pool:

View File

@@ -229,7 +229,7 @@ def Chi0(mol):
deltas = [x.GetDegree() for x in mol.GetAtoms()]
while 0 in deltas:
deltas.remove(0)
deltas = numpy.array(deltas, 'd')
deltas = numpy.array(deltas, float)
res = sum(numpy.sqrt(1. / deltas))
return res
@@ -244,7 +244,7 @@ def Chi1(mol):
c1s = [x.GetBeginAtom().GetDegree() * x.GetEndAtom().GetDegree() for x in mol.GetBonds()]
while 0 in c1s:
c1s.remove(0)
c1s = numpy.array(c1s, 'd')
c1s = numpy.array(c1s, float)
res = sum(numpy.sqrt(1. / c1s))
return res
@@ -320,7 +320,7 @@ def _pyChiNv_(mol, order=2):
for hkd in _hkDeltas(mol, skipHs=0)])
accum = 0.0
for path in Chem.FindAllPathsOfLengthN(mol, order + 1, useBonds=0):
accum += numpy.prod(deltas[numpy.array(path)])
accum += numpy.prod(deltas[numpy.array(path)],float)
return accum
@@ -358,7 +358,7 @@ def _pyChi0n(mol):
deltas = [_nVal(x) for x in mol.GetAtoms()]
while deltas.count(0):
deltas.remove(0)
deltas = numpy.array(deltas, 'd')
deltas = numpy.array(deltas, float)
res = sum(numpy.sqrt(1. / deltas))
return res
@@ -367,7 +367,7 @@ def _pyChi1n(mol):
""" Similar to Hall Kier Chi1v, but uses nVal instead of valence
"""
delts = numpy.array([_nVal(x) for x in mol.GetAtoms()], 'd')
delts = numpy.array([_nVal(x) for x in mol.GetAtoms()], float)
res = 0.0
for bond in mol.GetBonds():
v = delts[bond.GetBeginAtomIdx()] * delts[bond.GetEndAtomIdx()]
@@ -391,7 +391,7 @@ def _pyChiNn_(mol, order=2):
deltas = numpy.array([(1. / numpy.sqrt(x) if x else 0.0) for x in nval])
accum = 0.0
for path in Chem.FindAllPathsOfLengthN(mol, order + 1, useBonds=0):
accum += numpy.prod(deltas[numpy.array(path)])
accum += numpy.prod(deltas[numpy.array(path)],float)
return accum
@@ -578,10 +578,10 @@ def _CalculateEntropies(connectionDict, atomTypeDict, numAtoms):
"""
connectionList = list(connectionDict.values())
totConnections = sum(connectionList)
connectionIE = totConnections * (entropy.InfoEntropy(numpy.array(connectionList)) +
connectionIE = totConnections * (entropy.InfoEntropy(numpy.array(connectionList, float)) +
math.log(totConnections) / _log2val)
atomTypeList = list(atomTypeDict.values())
atomTypeIE = numAtoms * entropy.InfoEntropy(numpy.array(atomTypeList))
atomTypeIE = numAtoms * entropy.InfoEntropy(numpy.array(atomTypeList, float))
return atomTypeIE + connectionIE

View File

@@ -88,12 +88,13 @@ except ImportError:
log.warning("Failed to import pandas")
raise
dataframe_applymap = pd.DataFrame.applymap
try:
if tuple(map(int, (pd.__version__.split(".")))) >= (2, 1, 0):
if tuple(map(int, (pd.__version__.split(".")))) < (2, 1, 0):
dataframe_applymap = pd.DataFrame.applymap
else:
dataframe_applymap = pd.DataFrame.map
except:
pass
log.warning("Failed to find a suitable map function for data frames")
orig_to_html = getattr(to_html_class, "to_html")
pprint_thing = pandas_formats.printing.pprint_thing
@@ -138,7 +139,7 @@ class MolFormatter:
@classmethod
def get_formatters(cls, df, orig_formatters):
"""Return an instance of MolFormatter for each column that contains Chem.Mol objects"""
df_subset = df.select_dtypes("object")
df_subset = df.select_dtypes(["object", "string"])
return {
col: cls(orig_formatters.get(col, None))
for col in df_subset.columns[dataframe_applymap(df_subset, MolFormatter.is_mol).any()]

View File

@@ -431,7 +431,7 @@ def WriteSDF(df, out, molColName='ROMol', idName=None, properties=None, allNumer
if allNumeric:
properties.extend([
dt for dt in df.dtypes.keys()
if (np.issubdtype(df.dtypes[dt], np.floating) or np.issubdtype(df.dtypes[dt], np.integer))
if not pd.api.types.is_string_dtype(df.dtypes[dt]) and (np.issubdtype(df.dtypes[dt], np.floating) or np.issubdtype(df.dtypes[dt], np.integer))
])
if molColName in properties:

View File

@@ -212,7 +212,7 @@ class TestPandasTools(unittest.TestCase):
@unittest.skipIf(not hasattr(rdMolDraw2D, 'MolDraw2DCairo'), 'Cairo not available')
def testPandasShouldShowMoleculesWhenTruncating(self):
csv_data = '''"Molecule ChEMBL ID";"Molecule Name";"Molecule Max Phase";"Molecular Weight";"#RO5 Violations";"AlogP";"Compound Key";"Smiles";"Standard Type";"Standard Relation";"Standard Value";"Standard Units";"pChEMBL Value";"Data Validity Comment";"Comment";"Uo Units";"Ligand Efficiency BEI";"Ligand Efficiency LE";"Ligand Efficiency LLE";"Ligand Efficiency SEI";"Potential Duplicate";"Assay ChEMBL ID";"Assay Description";"Assay Type";"BAO Format ID";"BAO Label";"Assay Organism";"Assay Tissue ChEMBL ID";"Assay Tissue Name";"Assay Cell Type";"Assay Subcellular Fraction";"Target ChEMBL ID";"Target Name";"Target Organism";"Target Type";"Document ChEMBL ID";"Source ID";"Source Description";"Document Journal";"Document Year";"Cell ChEMBL ID"
csv_data = r'''"Molecule ChEMBL ID";"Molecule Name";"Molecule Max Phase";"Molecular Weight";"#RO5 Violations";"AlogP";"Compound Key";"Smiles";"Standard Type";"Standard Relation";"Standard Value";"Standard Units";"pChEMBL Value";"Data Validity Comment";"Comment";"Uo Units";"Ligand Efficiency BEI";"Ligand Efficiency LE";"Ligand Efficiency LLE";"Ligand Efficiency SEI";"Potential Duplicate";"Assay ChEMBL ID";"Assay Description";"Assay Type";"BAO Format ID";"BAO Label";"Assay Organism";"Assay Tissue ChEMBL ID";"Assay Tissue Name";"Assay Cell Type";"Assay Subcellular Fraction";"Target ChEMBL ID";"Target Name";"Target Organism";"Target Type";"Document ChEMBL ID";"Source ID";"Source Description";"Document Journal";"Document Year";"Cell ChEMBL ID"
"CHEMBL543779";"";"0";"341.86";"0";"2.60";"1w";"CCN(CC)CCS/C(=N\O)C(=O)c1ccc(C#N)cc1.Cl";"IC50";"'='";"180000.0";"nM";"";"Outside typical range";"";"UO_0000065";"";"";"";"";"False";"CHEMBL644102";"Reversible inhibition of Human AchE";"B";"BAO_0000357";"single protein format";"None";"None";"None";"None";"None";"CHEMBL220";"Acetylcholinesterase";"Homo sapiens";"SINGLE PROTEIN";"CHEMBL1123431";"1";"Scientific Literature";"J. Med. Chem.";"1986";"None"
'''
try:

View File

@@ -130,7 +130,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 5,
"id": "permanent-liechtenstein",
"metadata": {
"scrolled": true
@@ -139,36 +139,51 @@
{
"data": {
"text/plain": [
"Index(['AMW', 'CLOGP', 'CP', 'CR', 'DAYLIGHT.FPG', 'DAYLIGHT_CLOGP', 'FP',\n",
" 'ISM', 'LIPINSKI_VIOLATIONS', 'NUM_HACCEPTORS', 'NUM_HDONORS',\n",
" 'NUM_HETEROATOMS', 'NUM_LIPINSKIHACCEPTORS', 'NUM_LIPINSKIHDONORS',\n",
" 'NUM_RINGS', 'NUM_ROTATABLEBONDS', 'NUM_ROTATABLEBONDS_O', 'P1',\n",
" 'SMILES', 'ID', 'ROMol'],\n",
" dtype='object')"
"['AMW',\n",
" 'CLOGP',\n",
" 'CP',\n",
" 'CR',\n",
" 'DAYLIGHT.FPG',\n",
" 'DAYLIGHT_CLOGP',\n",
" 'FP',\n",
" 'ISM',\n",
" 'LIPINSKI_VIOLATIONS',\n",
" 'NUM_HACCEPTORS',\n",
" 'NUM_HDONORS',\n",
" 'NUM_HETEROATOMS',\n",
" 'NUM_LIPINSKIHACCEPTORS',\n",
" 'NUM_LIPINSKIHDONORS',\n",
" 'NUM_RINGS',\n",
" 'NUM_ROTATABLEBONDS',\n",
" 'NUM_ROTATABLEBONDS_O',\n",
" 'P1',\n",
" 'SMILES',\n",
" 'ID',\n",
" 'ROMol']"
]
},
"execution_count": 4,
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.columns"
"list(df.columns)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 6,
"id": "careful-netherlands",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<module 'rdkit.Chem.PandasTools' from '/scratch/RDKit_git/rdkit/Chem/PandasTools.py'>"
"<module 'rdkit.Chem.PandasTools' from '/localhome/glandrum/RDKit_git/rdkit/Chem/PandasTools.py'>"
]
},
"execution_count": 5,
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
@@ -181,7 +196,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 7,
"id": "identical-finder",
"metadata": {},
"outputs": [
@@ -261,7 +276,7 @@
"4 223.231 2.43 1.869;-0P;4.71 6.390;-0R;4.71"
]
},
"execution_count": 6,
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
@@ -281,7 +296,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "py312_build",
"language": "python",
"name": "python3"
},
@@ -295,7 +310,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.8"
"version": "3.12.3"
},
"toc": {
"base_numbering": 1,