CIPLabeler performance: Store vector of bonds (#9250)

* CIPLabeler performance: Store vector of bonds

CIPLabelling refers to bonds by index over and over again. This
causes a measurable hit in performance in findConfigs() because
we iterate over a bitset of "allowed" bonds. For very large
molecules with many bonds, this can be a rate-limiting step!

This affects many PDB-sized structures.

2J3N goes from 0.7s to 0.25s with this change.

I had another example for which the findBondWithIdx() call was
taking 500ms of a 700ms call (after the performance update
in #9171 was implemented)

* yikes, XXL reserve

thanks, greg

Co-authored-by: Greg Landrum <greg.landrum@gmail.com>

---------

Co-authored-by: Greg Landrum <greg.landrum@gmail.com>
This commit is contained in:
Dan Nealschneider
2026-05-06 02:57:28 -07:00
committed by GitHub
parent 1663989053
commit 67fc0708e5
2 changed files with 7 additions and 3 deletions

View File

@@ -16,7 +16,10 @@
namespace RDKit {
namespace CIPLabeler {
CIPMol::CIPMol(ROMol &mol) : d_mol{mol} {}
CIPMol::CIPMol(ROMol &mol) : d_mol{mol} {
d_bonds.reserve(mol.getNumBonds());
std::ranges::copy(mol.bonds(), std::back_inserter(d_bonds));
}
boost::rational<int> CIPMol::getFractionalAtomicNum(Atom *atom) const {
PRECONDITION(atom, "bad atom")
@@ -36,7 +39,7 @@ CXXAtomIterator<MolGraph, Atom *> CIPMol::atoms() const {
return d_mol.atoms();
}
Bond *CIPMol::getBond(int idx) const { return d_mol.getBondWithIdx(idx); };
Bond *CIPMol::getBond(int idx) const { return d_bonds[idx]; };
CIPMolSpan<Bond *, ROMol::OEDGE_ITER> CIPMol::getBonds(Atom *atom) const {
PRECONDITION(atom, "bad atom")
@@ -108,4 +111,4 @@ int CIPMol::getBondOrder(Bond *bond) const {
};
} // namespace CIPLabeler
} // namespace RDKit
} // namespace RDKit

View File

@@ -97,6 +97,7 @@ class CIPMol {
ROMol &d_mol;
std::vector<RDKit::Bond::BondType> d_kekulized_bonds;
std::vector<boost::rational<int>> d_atomnums;
std::vector<RDKit::Bond* > d_bonds;
};
} // namespace CIPLabeler