diff --git a/Code/DataStructs/Wrap/DataStructs.cpp b/Code/DataStructs/Wrap/DataStructs.cpp index 31956fc39..a2db382af 100644 --- a/Code/DataStructs/Wrap/DataStructs.cpp +++ b/Code/DataStructs/Wrap/DataStructs.cpp @@ -10,6 +10,10 @@ // #define PY_ARRAY_UNIQUE_SYMBOL rddatastructs_array_API +#ifdef RDK_BUILD_THREADSAFE_SSS +#include +#endif + #include #include #include @@ -32,9 +36,26 @@ void wrap_realValVect(); void wrap_sparseIntVect(); void wrap_FPB(); +#ifdef RDK_BUILD_THREADSAFE_SSS +static std::once_flag s_ds_numpy_init_flag; +#endif + +static void ds_ensure_numpy() { +#ifdef RDK_BUILD_THREADSAFE_SSS + std::call_once(s_ds_numpy_init_flag, rdkit_import_array); +#else + static bool initialized = false; + if (!initialized) { + initialized = true; + rdkit_import_array(); + } +#endif +} + namespace { template void converter(const T &v, python::object destArray, U func) { + ds_ensure_numpy(); if (!PyArray_Check(destArray.ptr())) { throw_value_error("Expecting a Numeric array object"); } @@ -64,7 +85,6 @@ void convertToDoubleNumpyArray(const T &v, python::object destArray) { } BOOST_PYTHON_MODULE(cDataStructs) { - rdkit_import_array(); python::scope().attr("__doc__") = "Module containing an assortment of functionality for basic data " "structures.\n" diff --git a/Code/GraphMol/Wrap/CMakeLists.txt b/Code/GraphMol/Wrap/CMakeLists.txt index 028a95a4b..5a33b80c2 100644 --- a/Code/GraphMol/Wrap/CMakeLists.txt +++ b/Code/GraphMol/Wrap/CMakeLists.txt @@ -93,3 +93,6 @@ add_pytest(pyCDXMLTest add_pytest(pySubsetTest ${CMAKE_CURRENT_SOURCE_DIR}/test_subset.py) + +add_pytest(pyTestLazyNumpy + ${CMAKE_CURRENT_SOURCE_DIR}/test_lazy_numpy.py) diff --git a/Code/GraphMol/Wrap/Conformer.cpp b/Code/GraphMol/Wrap/Conformer.cpp index 83a5c7422..063e23ed0 100644 --- a/Code/GraphMol/Wrap/Conformer.cpp +++ b/Code/GraphMol/Wrap/Conformer.cpp @@ -36,6 +36,7 @@ RDGeom::Point3D GetAtomPos(const Conformer *conf, unsigned int aid) { } PyObject *GetPos(const Conformer *conf) { + rdkit_rdchem_ensure_numpy(); const RDGeom::POINT3D_VECT &pos = conf->getPositions(); // define a 2D array with the following size @@ -58,7 +59,9 @@ PyObject *GetPos(const Conformer *conf) { return PyArray_Return(res); } -void SetPos(Conformer *conf, np::ndarray const &array) { +void SetPos(Conformer *conf, python::object const &arrayObj) { + rdkit_rdchem_ensure_numpy(); + np::ndarray array = python::extract(arrayObj); if (array.get_dtype() != np::dtype::get_builtin()) { PyErr_SetString(PyExc_TypeError, "Incorrect array data type"); python::throw_error_already_set(); diff --git a/Code/GraphMol/Wrap/MolOps.cpp b/Code/GraphMol/Wrap/MolOps.cpp index 1624e2d5c..2f6353367 100644 --- a/Code/GraphMol/Wrap/MolOps.cpp +++ b/Code/GraphMol/Wrap/MolOps.cpp @@ -503,6 +503,7 @@ VECT_INT_VECT getSymmSSSR(ROMol &mol, bool includeDativeBonds, PyObject *getDistanceMatrix(ROMol &mol, bool useBO = false, bool useAtomWts = false, bool force = false, const char *prefix = nullptr) { + rdkit_rdmolops_ensure_numpy(); int nats = mol.getNumAtoms(); npy_intp dims[2]; dims[0] = nats; @@ -521,6 +522,7 @@ PyObject *getDistanceMatrix(ROMol &mol, bool useBO = false, PyObject *get3DDistanceMatrix(ROMol &mol, int confId = -1, bool useAtomWts = false, bool force = false, const char *prefix = nullptr) { + rdkit_rdmolops_ensure_numpy(); int nats = mol.getNumAtoms(); npy_intp dims[2]; dims[0] = nats; @@ -542,6 +544,7 @@ PyObject *get3DDistanceMatrix(ROMol &mol, int confId = -1, PyObject *getAdjacencyMatrix(ROMol &mol, bool useBO = false, int emptyVal = 0, bool force = false, const char *prefix = nullptr) { + rdkit_rdmolops_ensure_numpy(); int nats = mol.getNumAtoms(); npy_intp dims[2]; dims[0] = nats; diff --git a/Code/GraphMol/Wrap/rdchem.cpp b/Code/GraphMol/Wrap/rdchem.cpp index 8e1d25a36..85e05c262 100644 --- a/Code/GraphMol/Wrap/rdchem.cpp +++ b/Code/GraphMol/Wrap/rdchem.cpp @@ -21,11 +21,36 @@ #include #include +#ifdef RDK_BUILD_THREADSAFE_SSS +#include +#endif #include "seqs.hpp" namespace python = boost::python; using namespace RDKit; +#ifdef RDK_BUILD_THREADSAFE_SSS +static std::once_flag s_rdchem_numpy_init_flag; +#endif + +void rdkit_rdchem_ensure_numpy() { +#ifdef RDK_BUILD_THREADSAFE_SSS + std::call_once(s_rdchem_numpy_init_flag, []() { + const bool register_scalar_converters = false; + boost::python::numpy::initialize(register_scalar_converters); + rdkit_import_array(); + }); +#else + static bool initialized = false; + if (!initialized) { + initialized = true; + const bool register_scalar_converters = false; + boost::python::numpy::initialize(register_scalar_converters); + rdkit_import_array(); + } +#endif +} + namespace RDKit { void tossit() { throw IndexErrorException(1); } } // namespace RDKit @@ -102,12 +127,9 @@ T *next_ptr(O &self) { BOOST_PYTHON_MODULE(rdchem) { python::scope().attr("__doc__") = "Module containing the core chemistry functionality of the RDKit"; - const bool register_scalar_converters = false; - boost::python::numpy::initialize(register_scalar_converters); RegisterListConverter(); RegisterListConverter(); RegisterListConverter(); - rdkit_import_array(); // this is one of those parts where I think I wish that I knew how to do // template meta-programming diff --git a/Code/GraphMol/Wrap/rdchem.h b/Code/GraphMol/Wrap/rdchem.h index 3191bd5ce..3f611177d 100644 --- a/Code/GraphMol/Wrap/rdchem.h +++ b/Code/GraphMol/Wrap/rdchem.h @@ -18,4 +18,6 @@ class ConformerException; } void rdExceptionTranslator(RDKit::ConformerException const &x); +void rdkit_rdchem_ensure_numpy(); + #endif diff --git a/Code/GraphMol/Wrap/rdmolops.cpp b/Code/GraphMol/Wrap/rdmolops.cpp index 8f4c3950f..e44c9182b 100644 --- a/Code/GraphMol/Wrap/rdmolops.cpp +++ b/Code/GraphMol/Wrap/rdmolops.cpp @@ -17,17 +17,35 @@ #include #include #include +#ifdef RDK_BUILD_THREADSAFE_SSS +#include +#endif namespace python = boost::python; using namespace RDKit; +#ifdef RDK_BUILD_THREADSAFE_SSS +static std::once_flag s_rdmolops_numpy_init_flag; +#endif + +void rdkit_rdmolops_ensure_numpy() { +#ifdef RDK_BUILD_THREADSAFE_SSS + std::call_once(s_rdmolops_numpy_init_flag, rdkit_import_array); +#else + static bool initialized = false; + if (!initialized) { + initialized = true; + rdkit_import_array(); + } +#endif +} + void wrap_molops(); void wrap_chiralityops(); BOOST_PYTHON_MODULE(rdmolops) { python::scope().attr("__doc__") = "Module containing RDKit functionality for manipulating molecules."; - rdkit_import_array(); // ****************************** // Functions from MolOps diff --git a/Code/GraphMol/Wrap/rdmolops.h b/Code/GraphMol/Wrap/rdmolops.h index 40a96e26e..bd90b6ba8 100644 --- a/Code/GraphMol/Wrap/rdmolops.h +++ b/Code/GraphMol/Wrap/rdmolops.h @@ -13,4 +13,6 @@ #define PY_ARRAY_UNIQUE_SYMBOL rdmolops_array_API +void rdkit_rdmolops_ensure_numpy(); + #endif diff --git a/Code/GraphMol/Wrap/test_lazy_numpy.py b/Code/GraphMol/Wrap/test_lazy_numpy.py new file mode 100644 index 000000000..25624729c --- /dev/null +++ b/Code/GraphMol/Wrap/test_lazy_numpy.py @@ -0,0 +1,164 @@ +# +# Copyright (C) 2025 RDKit contributors +# All Rights Reserved +# +"""Tests that 'from rdkit import Chem' does not eagerly load numpy, +and that core Chem functionality works before numpy is loaded. + +Because numpy's import state is process-global and cannot be unloaded, +import-ordering tests must run in a **fresh subprocess**. +""" +import subprocess +import sys +import textwrap +import unittest + + +def _run_snippet(code: str) -> subprocess.CompletedProcess: + """Run *code* in a clean Python subprocess and return the result.""" + return subprocess.run( + [sys.executable, "-c", textwrap.dedent(code)], + capture_output=True, + text=True, + timeout=60, + ) + + +class TestLazyNumpy(unittest.TestCase): + + def test_chem_import_does_not_load_numpy(self): + """Importing rdkit.Chem must not pull numpy into sys.modules.""" + result = _run_snippet("""\ + import sys + from rdkit import Chem + # numpy must not have been imported as a side-effect + if "numpy" in sys.modules: + sys.exit("FAIL: numpy was loaded by 'from rdkit import Chem'") + """) + self.assertEqual(result.returncode, 0, result.stderr or result.stdout) + + def test_basic_smiles_roundtrip_without_numpy(self): + """MolFromSmiles / MolToSmiles must work before numpy is loaded.""" + result = _run_snippet("""\ + import sys + from rdkit import Chem + + mol = Chem.MolFromSmiles("c1ccccc1") + assert mol is not None, "MolFromSmiles returned None" + + smi = Chem.MolToSmiles(mol) + assert smi == "c1ccccc1", f"unexpected SMILES: {smi}" + + assert "numpy" not in sys.modules, "numpy crept in during SMILES ops" + """) + self.assertEqual(result.returncode, 0, result.stderr or result.stdout) + + def test_mol_operations_without_numpy(self): + """Core mol operations (AddHs, sanitize, substruct) work pre-numpy.""" + result = _run_snippet("""\ + import sys + from rdkit import Chem + from rdkit.Chem import inchi + + mol = Chem.MolFromSmiles("CCO") + assert mol is not None + + # AddHs / RemoveHs + molH = Chem.AddHs(mol) + assert molH.GetNumAtoms() == 9 # 3 heavy + 6 H + mol2 = Chem.RemoveHs(molH) + assert mol2.GetNumAtoms() == 3 + + # Substructure match + query = Chem.MolFromSmarts("[OH]") + assert mol.HasSubstructMatch(query) + + # MolToMolBlock (V2000) + mb = Chem.MolToMolBlock(mol) + assert "V2000" in mb + + if inchi.INCHI_AVAILABLE: + # InChI round-trip + inchi_str = inchi.MolToInchi(mol) + assert inchi_str.startswith("InChI=") + + assert "numpy" not in sys.modules, "numpy crept in during mol ops" + """) + self.assertEqual(result.returncode, 0, result.stderr or result.stdout) + + def test_numpy_loads_on_demand(self): + """Calling a numpy-returning API (GetDistanceMatrix) loads numpy lazily.""" + result = _run_snippet("""\ + import sys + from rdkit import Chem + + assert "numpy" not in sys.modules, "numpy loaded too early" + + mol = Chem.MolFromSmiles("CCO") + dm = Chem.GetDistanceMatrix(mol) + + assert "numpy" in sys.modules, "numpy should be loaded after GetDistanceMatrix" + assert dm.shape == (3, 3), f"unexpected shape: {dm.shape}" + assert dm[0, 2] == 2.0, f"unexpected distance: {dm[0, 2]}" + """) + self.assertEqual(result.returncode, 0, result.stderr or result.stdout) + + def test_conformer_positions_loads_numpy(self): + """GetPositions() / SetPositions() load numpy on first call.""" + result = _run_snippet("""\ + import sys + from rdkit import Chem + from rdkit.Chem import rdchem + + assert "numpy" not in sys.modules, "numpy loaded too early" + + mol = Chem.MolFromSmiles("C") + conf = rdchem.Conformer(mol.GetNumAtoms()) + conf.SetAtomPosition(0, (1.0, 2.0, 3.0)) + mol.AddConformer(conf, assignId=True) + + pos = mol.GetConformer().GetPositions() + assert "numpy" in sys.modules, "numpy should load after GetPositions" + assert pos.shape == (1, 3), f"unexpected shape: {pos.shape}" + assert abs(pos[0, 0] - 1.0) < 1e-6 + """) + self.assertEqual(result.returncode, 0, result.stderr or result.stdout) + + def test_datastructs_convert_loads_numpy(self): + """DataStructs.ConvertToNumpyArray loads numpy on demand.""" + result = _run_snippet("""\ + import sys + from rdkit import Chem, DataStructs + + assert "numpy" not in sys.modules, "numpy loaded too early" + + fp = Chem.RDKFingerprint(Chem.MolFromSmiles("c1ccccc1")) + + import numpy as np + arr = np.zeros(len(fp), dtype=int) + DataStructs.ConvertToNumpyArray(fp, arr) + assert arr.sum() > 0, "fingerprint should have bits set" + """) + self.assertEqual(result.returncode, 0, result.stderr or result.stdout) + + def test_adjacency_matrix_loads_numpy(self): + """GetAdjacencyMatrix loads numpy on demand and returns correct result.""" + result = _run_snippet("""\ + import sys + from rdkit import Chem + + assert "numpy" not in sys.modules, "numpy loaded too early" + + mol = Chem.MolFromSmiles("CC") + adj = Chem.GetAdjacencyMatrix(mol) + + assert "numpy" in sys.modules, "numpy should load after GetAdjacencyMatrix" + assert adj[0, 1] == 1 + assert adj[1, 0] == 1 + assert adj[0, 0] == 0 + """) + self.assertEqual(result.returncode, 0, result.stderr or result.stdout) + + +if __name__ == '__main__': + unittest.main()