mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-03 21:44:30 +08:00
* Defer numpy initialization to first use in rdchem, rdmolops, cDataStructs
`from rdkit import Chem` unconditionally bootstrapped numpy (~120ms) via
import_array()/boost::python::numpy::initialize() in module init functions,
even when no numpy-dependent APIs were called. This is costly in cold-start
environments like AWS Lambda.
Move numpy initialization behind lazy guards (static bool + first-call init)
in rdchem.so, rdmolops.so, and cDataStructs.so. Numpy now loads only when
an API that actually needs it is invoked (GetDistanceMatrix, GetPositions,
SetPositions, GetAdjacencyMatrix, ConvertToNumpyArray, etc.).
Also change Conformer::SetPos to accept python::object instead of
np::ndarray to prevent Boost.Python from requiring numpy type conversion
before the lazy guard runs.
Adds test_lazy_numpy.py with subprocess-based tests verifying:
- `from rdkit import Chem` does not load numpy
- SmilesToMol/MolToSmiles work without numpy
- numpy loads on demand when array APIs are called
* skip inchi tests if not available
* switch to threadsafe once_flag, like elsewhere
* finish ifdef style
* switch to magic static style
* Revert "switch to magic static style"
This reverts commit 7300188db7.
165 lines
5.3 KiB
Python
165 lines
5.3 KiB
Python
#
|
|
# Copyright (C) 2025 RDKit contributors
|
|
# All Rights Reserved
|
|
#
|
|
"""Tests that 'from rdkit import Chem' does not eagerly load numpy,
|
|
and that core Chem functionality works before numpy is loaded.
|
|
|
|
Because numpy's import state is process-global and cannot be unloaded,
|
|
import-ordering tests must run in a **fresh subprocess**.
|
|
"""
|
|
import subprocess
|
|
import sys
|
|
import textwrap
|
|
import unittest
|
|
|
|
|
|
def _run_snippet(code: str) -> subprocess.CompletedProcess:
|
|
"""Run *code* in a clean Python subprocess and return the result."""
|
|
return subprocess.run(
|
|
[sys.executable, "-c", textwrap.dedent(code)],
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=60,
|
|
)
|
|
|
|
|
|
class TestLazyNumpy(unittest.TestCase):
|
|
|
|
def test_chem_import_does_not_load_numpy(self):
|
|
"""Importing rdkit.Chem must not pull numpy into sys.modules."""
|
|
result = _run_snippet("""\
|
|
import sys
|
|
from rdkit import Chem
|
|
# numpy must not have been imported as a side-effect
|
|
if "numpy" in sys.modules:
|
|
sys.exit("FAIL: numpy was loaded by 'from rdkit import Chem'")
|
|
""")
|
|
self.assertEqual(result.returncode, 0, result.stderr or result.stdout)
|
|
|
|
def test_basic_smiles_roundtrip_without_numpy(self):
|
|
"""MolFromSmiles / MolToSmiles must work before numpy is loaded."""
|
|
result = _run_snippet("""\
|
|
import sys
|
|
from rdkit import Chem
|
|
|
|
mol = Chem.MolFromSmiles("c1ccccc1")
|
|
assert mol is not None, "MolFromSmiles returned None"
|
|
|
|
smi = Chem.MolToSmiles(mol)
|
|
assert smi == "c1ccccc1", f"unexpected SMILES: {smi}"
|
|
|
|
assert "numpy" not in sys.modules, "numpy crept in during SMILES ops"
|
|
""")
|
|
self.assertEqual(result.returncode, 0, result.stderr or result.stdout)
|
|
|
|
def test_mol_operations_without_numpy(self):
|
|
"""Core mol operations (AddHs, sanitize, substruct) work pre-numpy."""
|
|
result = _run_snippet("""\
|
|
import sys
|
|
from rdkit import Chem
|
|
from rdkit.Chem import inchi
|
|
|
|
mol = Chem.MolFromSmiles("CCO")
|
|
assert mol is not None
|
|
|
|
# AddHs / RemoveHs
|
|
molH = Chem.AddHs(mol)
|
|
assert molH.GetNumAtoms() == 9 # 3 heavy + 6 H
|
|
mol2 = Chem.RemoveHs(molH)
|
|
assert mol2.GetNumAtoms() == 3
|
|
|
|
# Substructure match
|
|
query = Chem.MolFromSmarts("[OH]")
|
|
assert mol.HasSubstructMatch(query)
|
|
|
|
# MolToMolBlock (V2000)
|
|
mb = Chem.MolToMolBlock(mol)
|
|
assert "V2000" in mb
|
|
|
|
if inchi.INCHI_AVAILABLE:
|
|
# InChI round-trip
|
|
inchi_str = inchi.MolToInchi(mol)
|
|
assert inchi_str.startswith("InChI=")
|
|
|
|
assert "numpy" not in sys.modules, "numpy crept in during mol ops"
|
|
""")
|
|
self.assertEqual(result.returncode, 0, result.stderr or result.stdout)
|
|
|
|
def test_numpy_loads_on_demand(self):
|
|
"""Calling a numpy-returning API (GetDistanceMatrix) loads numpy lazily."""
|
|
result = _run_snippet("""\
|
|
import sys
|
|
from rdkit import Chem
|
|
|
|
assert "numpy" not in sys.modules, "numpy loaded too early"
|
|
|
|
mol = Chem.MolFromSmiles("CCO")
|
|
dm = Chem.GetDistanceMatrix(mol)
|
|
|
|
assert "numpy" in sys.modules, "numpy should be loaded after GetDistanceMatrix"
|
|
assert dm.shape == (3, 3), f"unexpected shape: {dm.shape}"
|
|
assert dm[0, 2] == 2.0, f"unexpected distance: {dm[0, 2]}"
|
|
""")
|
|
self.assertEqual(result.returncode, 0, result.stderr or result.stdout)
|
|
|
|
def test_conformer_positions_loads_numpy(self):
|
|
"""GetPositions() / SetPositions() load numpy on first call."""
|
|
result = _run_snippet("""\
|
|
import sys
|
|
from rdkit import Chem
|
|
from rdkit.Chem import rdchem
|
|
|
|
assert "numpy" not in sys.modules, "numpy loaded too early"
|
|
|
|
mol = Chem.MolFromSmiles("C")
|
|
conf = rdchem.Conformer(mol.GetNumAtoms())
|
|
conf.SetAtomPosition(0, (1.0, 2.0, 3.0))
|
|
mol.AddConformer(conf, assignId=True)
|
|
|
|
pos = mol.GetConformer().GetPositions()
|
|
assert "numpy" in sys.modules, "numpy should load after GetPositions"
|
|
assert pos.shape == (1, 3), f"unexpected shape: {pos.shape}"
|
|
assert abs(pos[0, 0] - 1.0) < 1e-6
|
|
""")
|
|
self.assertEqual(result.returncode, 0, result.stderr or result.stdout)
|
|
|
|
def test_datastructs_convert_loads_numpy(self):
|
|
"""DataStructs.ConvertToNumpyArray loads numpy on demand."""
|
|
result = _run_snippet("""\
|
|
import sys
|
|
from rdkit import Chem, DataStructs
|
|
|
|
assert "numpy" not in sys.modules, "numpy loaded too early"
|
|
|
|
fp = Chem.RDKFingerprint(Chem.MolFromSmiles("c1ccccc1"))
|
|
|
|
import numpy as np
|
|
arr = np.zeros(len(fp), dtype=int)
|
|
DataStructs.ConvertToNumpyArray(fp, arr)
|
|
assert arr.sum() > 0, "fingerprint should have bits set"
|
|
""")
|
|
self.assertEqual(result.returncode, 0, result.stderr or result.stdout)
|
|
|
|
def test_adjacency_matrix_loads_numpy(self):
|
|
"""GetAdjacencyMatrix loads numpy on demand and returns correct result."""
|
|
result = _run_snippet("""\
|
|
import sys
|
|
from rdkit import Chem
|
|
|
|
assert "numpy" not in sys.modules, "numpy loaded too early"
|
|
|
|
mol = Chem.MolFromSmiles("CC")
|
|
adj = Chem.GetAdjacencyMatrix(mol)
|
|
|
|
assert "numpy" in sys.modules, "numpy should load after GetAdjacencyMatrix"
|
|
assert adj[0, 1] == 1
|
|
assert adj[1, 0] == 1
|
|
assert adj[0, 0] == 0
|
|
""")
|
|
self.assertEqual(result.returncode, 0, result.stderr or result.stdout)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
unittest.main()
|