add checked atom and bond iterators (#9290)

* add checked iterators

* support checked atom and bond iterators

the idea here is to allow optional checking that the graph is not being
modified while an iterator is active

* ignore new member functions

---------

Co-authored-by: Ric R <ricrogz@gmail.com>
This commit is contained in:
Greg Landrum
2026-05-26 15:25:34 +02:00
committed by GitHub
parent 4ad9f33bf6
commit 85f33083cd
3 changed files with 188 additions and 7 deletions

View File

@@ -113,7 +113,8 @@ RDKIT_GRAPHMOL_EXPORT extern const int ci_ATOM_HOLDER;
//! \name C++11 Iterators
template <class Graph, class Vertex,
class Iterator = typename Graph::vertex_iterator>
class Iterator = typename Graph::vertex_iterator,
bool CheckedAtoms = false, bool CheckedBonds = false>
struct CXXAtomIterator {
Graph *graph;
Iterator vstart, vend;
@@ -128,17 +129,38 @@ struct CXXAtomIterator {
Graph *graph = nullptr;
Iterator pos;
size_t osizeAtoms{0};
size_t osizeBonds{0};
inline void checkIterator() const {
if ((CheckedAtoms && boost::num_vertices(*graph) != osizeAtoms) ||
(CheckedBonds && boost::num_edges(*graph) != osizeBonds)) {
throw std::runtime_error("molecule modified during iteration");
}
}
CXXAtomIter() {};
CXXAtomIter(Graph *graph, Iterator pos) : graph(graph), pos(pos) {}
CXXAtomIter(Graph *graph, Iterator pos) : graph(graph), pos(pos) {
if (CheckedAtoms) {
osizeAtoms = boost::num_vertices(*graph);
}
if (CheckedBonds) {
osizeBonds = boost::num_edges(*graph);
}
}
// we only return const references since we don't want clients modifying the
// graph itself through these iterators
const_reference operator*() const { return (*graph)[*pos]; }
const_reference operator*() const {
checkIterator();
return (*graph)[*pos];
}
// we only return const references since we don't want clients modifying the
// graph itself through these iterators
const_reference operator[](difference_type n) const {
checkIterator();
return (*graph)[*(pos + n)];
}
@@ -211,7 +233,7 @@ static_assert(
// clang-format on
template <class Graph, class Edge,
class Iterator = typename Graph::edge_iterator>
class Iterator = typename Graph::edge_iterator, bool Checked = false>
struct CXXBondIterator {
Graph *graph;
Iterator vstart, vend;
@@ -226,13 +248,25 @@ struct CXXBondIterator {
Graph *graph = nullptr;
Iterator pos;
size_t osize{0};
CXXBondIter() {};
CXXBondIter(Graph *graph, Iterator pos) : graph(graph), pos(pos) {}
CXXBondIter(Graph *graph, Iterator pos) : graph(graph), pos(pos) {
if (Checked) {
osize = boost::num_edges(*graph);
}
}
// we only return const references since we don't want clients modifying the
// graph itself through these iterators
const_reference operator*() const { return (*graph)[*pos]; }
const_reference operator*() const {
if (Checked) {
if (boost::num_edges(*graph) != osize) {
throw std::runtime_error("molecule modified during iteration");
}
}
return (*graph)[*pos];
}
CXXBondIter &operator++() {
++pos;
return *this;
@@ -365,6 +399,17 @@ class RDKIT_GRAPHMOL_EXPORT ROMol : public RDProps {
CXXAtomIterator<const MolGraph, Atom *const> atoms() const {
return {&d_graph};
}
// returns an iterator that will throw if the number of atoms changes during
// iteration
CXXAtomIterator<MolGraph, Atom *, MolGraph::vertex_iterator, true>
checkedAtoms() {
return {&d_graph};
}
// \overload
CXXAtomIterator<const MolGraph, Atom *const, MolGraph::vertex_iterator, true>
checkedAtoms() const {
return {&d_graph};
}
CXXAtomIterator<const MolGraph, Atom *const, MolGraph::adjacency_iterator>
atomNeighbors(Atom const *at) const {
@@ -390,6 +435,36 @@ class RDKIT_GRAPHMOL_EXPORT ROMol : public RDProps {
return {&d_graph, pr.first, pr.second};
}
// returns an iterator that will throw if the number of atoms or bonds changes
// during iteration
CXXAtomIterator<const MolGraph, Atom *const, MolGraph::adjacency_iterator,
true, true>
checkedAtomNeighbors(Atom const *at) const {
auto pr = getAtomNeighbors(at);
return {&d_graph, pr.first, pr.second};
}
// \overload
CXXAtomIterator<MolGraph, Atom *, MolGraph::adjacency_iterator, true, true>
checkedAtomNeighbors(Atom const *at) {
auto pr = getAtomNeighbors(at);
return {&d_graph, pr.first, pr.second};
}
// returns an iterator that will throw if the number of bonds changes during
// iteration
CXXBondIterator<const MolGraph, Bond *const, MolGraph::out_edge_iterator,
true>
checkedAtomBonds(Atom const *at) const {
auto pr = getAtomBonds(at);
return {&d_graph, pr.first, pr.second};
}
// \overload
CXXBondIterator<MolGraph, Bond *, MolGraph::out_edge_iterator, true>
checkedAtomBonds(Atom const *at) {
auto pr = getAtomBonds(at);
return {&d_graph, pr.first, pr.second};
}
/*!
<b>Usage</b>
\code
@@ -404,6 +479,17 @@ class RDKIT_GRAPHMOL_EXPORT ROMol : public RDProps {
CXXBondIterator<const MolGraph, Bond *const> bonds() const {
return {&d_graph};
}
// returns an iterator that will throw if the number of bonds changes during
// iteration
CXXBondIterator<MolGraph, Bond *, MolGraph::edge_iterator, true>
checkedBonds() {
return {&d_graph};
}
// \overload
CXXBondIterator<const MolGraph, Bond *const, MolGraph::edge_iterator, true>
checkedBonds() const {
return {&d_graph};
}
ROMol() : RDProps() { initMol(); }
@@ -967,6 +1053,5 @@ typedef std::vector<ROMOL_SPTR> MOL_SPTR_VECT;
typedef MOL_PTR_VECT::const_iterator MOL_PTR_VECT_CI;
typedef MOL_PTR_VECT::iterator MOL_PTR_VECT_I;
}; // namespace RDKit
#endif

View File

@@ -49,6 +49,34 @@ TEST_CASE("mol.atoms()") {
CHECK(ccount == 4);
}
TEST_CASE("mol.checkedAtoms()") {
const auto m = "CC(C)CO"_smiles;
REQUIRE(m);
unsigned int ccount = 0;
for (const auto atom : m->checkedAtoms()) {
if (atom->getAtomicNum() == 6) {
++ccount;
}
}
CHECK(ccount == 4);
{
RWMol m2(*m);
auto atoms = m2.checkedAtoms();
auto beg = atoms.begin();
m2.removeAtom(m2.getAtomWithIdx(2));
CHECK_THROWS_AS(*beg, std::runtime_error);
}
{
RWMol m2(*m);
auto atoms = m2.checkedAtoms();
auto beg = atoms.begin();
// with checked atom iterators we can safely delete bonds
m2.removeBond(1, 2);
auto at = *beg;
CHECK(at->getAtomicNum() == 6);
}
}
TEST_CASE("mol.bonds()") {
const auto m = "OC(=O)C(=O)O"_smiles;
REQUIRE(m);
@@ -78,6 +106,22 @@ TEST_CASE("mol.bonds()") {
CHECK(doubleBondCount == 2);
}
TEST_CASE("mol.checkedBonds()") {
const auto m = "OC(=O)C(=O)O"_smiles;
REQUIRE(m);
unsigned int doubleBondCount = 0;
for (const auto bond : m->checkedBonds()) {
if (bond->getBondType() == Bond::DOUBLE) {
++doubleBondCount;
}
}
CHECK(doubleBondCount == 2);
auto bonds = m->checkedBonds();
auto beg = bonds.begin();
m->removeBond(1, 2);
CHECK_THROWS_AS(*beg, std::runtime_error);
}
TEST_CASE("mol.atomNeighbors()") {
const auto m = "CC(C)CO"_smiles;
REQUIRE(m);
@@ -93,6 +137,31 @@ TEST_CASE("mol.atomNeighbors()") {
CHECK(MolToSmiles(*m) == "NC(N)NO");
}
TEST_CASE("mol.checkedAtomNeighbors()") {
const auto m = "CC(C)CO"_smiles;
REQUIRE(m);
unsigned int count = 0;
for (const auto atom : m->checkedAtomNeighbors(m->getAtomWithIdx(1))) {
count += atom->getDegree();
}
CHECK(count == 4);
{
RWMol m2(*m);
auto nbrs = m2.checkedAtomNeighbors(m2.getAtomWithIdx(1));
auto beg = nbrs.begin();
m2.removeAtom(m2.getAtomWithIdx(2));
CHECK_THROWS_AS(*beg, std::runtime_error);
}
{
RWMol m2(*m);
auto nbrs = m2.checkedAtomNeighbors(m2.getAtomWithIdx(1));
auto beg = nbrs.begin();
m2.removeBond(1, 2);
CHECK_THROWS_AS(*beg, std::runtime_error);
}
}
TEST_CASE("mol.atomBonds()") {
const auto m = "CC(=C)CO"_smiles;
REQUIRE(m);
@@ -108,6 +177,23 @@ TEST_CASE("mol.atomBonds()") {
CHECK(MolToSmiles(*m) == "CC(C)CO");
}
TEST_CASE("mol.checkedAtomBonds()") {
const auto m = "CC(=C)CO"_smiles;
REQUIRE(m);
double count = 0;
for (const auto bond : m->checkedAtomBonds(m->getAtomWithIdx(1))) {
count += bond->getBondTypeAsDouble();
}
CHECK(count == 4);
{
RWMol m2(*m);
auto bonds = m2.checkedAtomBonds(m2.getAtomWithIdx(1));
auto beg = bonds.begin();
m2.removeBond(1, 2);
CHECK_THROWS_AS(*beg, std::runtime_error);
}
}
TEST_CASE("ranges") {
const auto m = "CC(C)CO"_smiles;
REQUIRE(m);

View File

@@ -98,6 +98,16 @@
%ignore RDKit::ROMol::bonds();
%ignore RDKit::ROMol::bonds() const;
%ignore RDKit::ROMol::checkedAtomNeighbors(Atom const *at) const;
%ignore RDKit::ROMol::checkedAtomNeighbors(Atom const *at);
%ignore RDKit::ROMol::checkedAtoms() const;
%ignore RDKit::ROMol::checkedAtoms();
%ignore RDKit::ROMol::checkedAtomBonds(Atom const *at) const;
%ignore RDKit::ROMol::checkedAtomBonds(Atom const *at);
%ignore RDKit::ROMol::checkedBonds();
%ignore RDKit::ROMol::checkedBonds() const;
%ignore RDKit::ROMol::getVertices() ;
%ignore RDKit::ROMol::getVertices() const ;
%ignore RDKit::ROMol::getEdges() ;