Refactor atomVisitOrders and bondVisitOrders in SMILES Writer canonicalization (#8972)

* refactor calculation of atomVisitOrders/bondVisitOrders

* refactor checkBondsInSameBranch

* fix
This commit is contained in:
Ricardo Rodriguez
2025-12-12 08:07:26 -05:00
committed by Greg Landrum
parent 08d9b42574
commit deeb9727fc

View File

@@ -86,39 +86,6 @@ auto _possibleCompare = [](const PossibleType &arg1, const PossibleType &arg2) {
return (std::get<0>(arg1) < std::get<0>(arg2));
};
bool checkBondsInSameBranch(MolStack &molStack, Bond *dblBnd, Bond *dirBnd) {
bool seenDblBond = false;
int branchCounter = 0;
for (const auto &item : molStack) {
switch (item.type) {
case MOL_STACK_BOND:
if (item.obj.bond == dirBnd || item.obj.bond == dblBnd) {
if (seenDblBond) {
return branchCounter == 0;
} else {
seenDblBond = true;
}
}
break;
case MOL_STACK_BRANCH_OPEN:
if (seenDblBond) {
++branchCounter;
}
break;
case MOL_STACK_BRANCH_CLOSE:
if (seenDblBond) {
--branchCounter;
}
break;
default:
break;
}
}
// We should not ever hit this. But if we do, returning false
// causes the same behavior as before this patch.
return false;
}
void switchBondDir(Bond *bond) {
PRECONDITION(bond, "bad bond");
PRECONDITION(bond->getBondType() == Bond::SINGLE || bond->getIsAromatic() ||
@@ -152,10 +119,10 @@ bool isClosingRingBond(Bond *bond) {
// move it there?
//
//
void canonicalizeDoubleBond(Bond *dblBond, UINT_VECT &bondVisitOrders,
UINT_VECT &atomVisitOrders,
void canonicalizeDoubleBond(Bond *dblBond, const UINT_VECT &bondVisitOrders,
const UINT_VECT &atomVisitOrders,
UINT_VECT &bondDirCounts, UINT_VECT &atomDirCounts,
MolStack &molStack) {
const MolStack &molStack) {
PRECONDITION(dblBond, "bad bond");
PRECONDITION(dblBond->getBondType() == Bond::DOUBLE, "bad bond order");
PRECONDITION(dblBond->getStereo() > Bond::STEREOANY, "bad bond stereo");
@@ -430,7 +397,25 @@ void canonicalizeDoubleBond(Bond *dblBond, UINT_VECT &bondVisitOrders,
// UNLESS the bond is not in a branch (in the smiles) (e.g. firstFromAtom1
// branches off a cycle, and secondFromAtom1 shows up at the end of the
// cycle). This was Github Issue #2023, see it for an example.
if (checkBondsInSameBranch(molStack, dblBond, secondFromAtom1)) {
auto checkBondsInSameBranch = [&molStack, &bondVisitOrders](
Bond *dblBond, Bond *dirBond) {
auto start = bondVisitOrders[dblBond->getIdx()];
auto end = bondVisitOrders[dirBond->getIdx()];
if (start > end) {
std::swap(start, end);
}
unsigned int branchLevel = 0;
for (auto i = start + 1; i != end; ++i) {
const auto &item = molStack[i];
if (item.type == MOL_STACK_BRANCH_OPEN) {
++branchLevel;
} else if (item.type == MOL_STACK_BRANCH_CLOSE) {
--branchLevel;
}
}
return branchLevel == 0;
};
if (checkBondsInSameBranch(dblBond, secondFromAtom1)) {
auto otherDir = flipBondDir(firstFromAtom1->getBondDir());
secondFromAtom1->setBondDir(otherDir);
} else {
@@ -540,11 +525,10 @@ void canonicalizeDoubleBond(Bond *dblBond, UINT_VECT &bondVisitOrders,
// finds cycles
void dfsFindCycles(ROMol &mol, int atomIdx, int inBondIdx,
std::vector<AtomColors> &colors, const UINT_VECT &ranks,
UINT_VECT &atomOrders, VECT_INT_VECT &atomRingClosures,
VECT_INT_VECT &atomRingClosures,
const boost::dynamic_bitset<> *bondsInPlay,
const std::vector<std::string> *bondSymbols, bool doRandom) {
Atom *atom = mol.getAtomWithIdx(atomIdx);
atomOrders.push_back(atomIdx);
colors[atomIdx] = GREY_NODE;
@@ -647,8 +631,7 @@ void dfsFindCycles(ROMol &mol, int atomIdx, int inBondIdx,
// we haven't seen this node at all before, traverse
// -----
dfsFindCycles(mol, possibleIdx, bond->getIdx(), colors, ranks,
atomOrders, atomRingClosures, bondsInPlay, bondSymbols,
doRandom);
atomRingClosures, bondsInPlay, bondSymbols, doRandom);
break;
case GREY_NODE:
// -----
@@ -670,8 +653,7 @@ void dfsFindCycles(ROMol &mol, int atomIdx, int inBondIdx,
void dfsBuildStack(ROMol &mol, int atomIdx, int inBondIdx,
std::vector<AtomColors> &colors, VECT_INT_VECT &cycles,
const UINT_VECT &ranks, UINT_VECT &cyclesAvailable,
MolStack &molStack, UINT_VECT &atomOrders,
UINT_VECT &bondVisitOrders, VECT_INT_VECT &atomRingClosures,
MolStack &molStack, VECT_INT_VECT &atomRingClosures,
std::vector<INT_LIST> &atomTraversalBondOrder,
const boost::dynamic_bitset<> *bondsInPlay,
const std::vector<std::string> *bondSymbols, bool doRandom) {
@@ -681,7 +663,6 @@ void dfsBuildStack(ROMol &mol, int atomIdx, int inBondIdx,
seenFromHere.set(atomIdx);
molStack.push_back(MolStackElem(atom));
atomOrders[atom->getIdx()] = rdcast<int>(molStack.size());
colors[atomIdx] = GREY_NODE;
INT_LIST travList;
@@ -706,7 +687,6 @@ void dfsBuildStack(ROMol &mol, int atomIdx, int inBondIdx,
// this is end of the ring closure
// we can just pull the ring index from the bond itself:
molStack.push_back(MolStackElem(bond, atomIdx));
bondVisitOrders[bIdx] = molStack.size();
molStack.push_back(MolStackElem(ringIdx));
// don't make the ring digit immediately available again: we don't want
// to have the same
@@ -825,11 +805,9 @@ void dfsBuildStack(ROMol &mol, int atomIdx, int inBondIdx,
MolStackElem("(", rdcast<int>(possiblesIt - possibles.begin())));
}
molStack.push_back(MolStackElem(bond, atomIdx));
bondVisitOrders[bond->getIdx()] = molStack.size();
dfsBuildStack(mol, possibleIdx, bond->getIdx(), colors, cycles, ranks,
cyclesAvailable, molStack, atomOrders, bondVisitOrders,
atomRingClosures, atomTraversalBondOrder, bondsInPlay,
bondSymbols, doRandom);
cyclesAvailable, molStack, atomRingClosures,
atomTraversalBondOrder, bondsInPlay, bondSymbols, doRandom);
if (possiblesIt + 1 != possibles.end()) {
molStack.push_back(
MolStackElem(")", rdcast<int>(possiblesIt - possibles.begin())));
@@ -844,7 +822,6 @@ void canonicalDFSTraversal(ROMol &mol, int atomIdx, int inBondIdx,
std::vector<AtomColors> &colors,
VECT_INT_VECT &cycles, const UINT_VECT &ranks,
UINT_VECT &cyclesAvailable, MolStack &molStack,
UINT_VECT &atomOrders, UINT_VECT &bondVisitOrders,
VECT_INT_VECT &atomRingClosures,
std::vector<INT_LIST> &atomTraversalBondOrder,
const boost::dynamic_bitset<> *bondsInPlay,
@@ -852,8 +829,6 @@ void canonicalDFSTraversal(ROMol &mol, int atomIdx, int inBondIdx,
bool doRandom) {
PRECONDITION(colors.size() >= mol.getNumAtoms(), "vector too small");
PRECONDITION(ranks.size() >= mol.getNumAtoms(), "vector too small");
PRECONDITION(atomOrders.size() >= mol.getNumAtoms(), "vector too small");
PRECONDITION(bondVisitOrders.size() >= mol.getNumBonds(), "vector too small");
PRECONDITION(atomRingClosures.size() >= mol.getNumAtoms(),
"vector too small");
PRECONDITION(atomTraversalBondOrder.size() >= mol.getNumAtoms(),
@@ -866,11 +841,11 @@ void canonicalDFSTraversal(ROMol &mol, int atomIdx, int inBondIdx,
std::vector<AtomColors> tcolors;
tcolors.resize(colors.size());
std::copy(colors.begin(), colors.end(), tcolors.begin());
dfsFindCycles(mol, atomIdx, inBondIdx, tcolors, ranks, atomOrders,
atomRingClosures, bondsInPlay, bondSymbols, doRandom);
dfsFindCycles(mol, atomIdx, inBondIdx, tcolors, ranks, atomRingClosures,
bondsInPlay, bondSymbols, doRandom);
dfsBuildStack(mol, atomIdx, inBondIdx, colors, cycles, ranks, cyclesAvailable,
molStack, atomOrders, bondVisitOrders, atomRingClosures,
atomTraversalBondOrder, bondsInPlay, bondSymbols, doRandom);
molStack, atomRingClosures, atomTraversalBondOrder, bondsInPlay,
bondSymbols, doRandom);
}
void clearBondDirs(ROMol &mol, Bond *refBond, const Atom *fromAtom,
@@ -1004,8 +979,6 @@ void canonicalizeFragment(ROMol &mol, int atomIdx,
"bondSymbols too small");
unsigned int nAtoms = mol.getNumAtoms();
UINT_VECT atomVisitOrders(nAtoms, 0);
UINT_VECT bondVisitOrders(mol.getNumBonds(), 0);
UINT_VECT bondDirCounts(mol.getNumBonds(), 0);
UINT_VECT atomDirCounts(nAtoms, 0);
UINT_VECT cyclesAvailable(MAX_CYCLES, 1);
@@ -1030,10 +1003,10 @@ void canonicalizeFragment(ROMol &mol, int atomIdx,
VECT_INT_VECT atomRingClosures(nAtoms);
std::vector<INT_LIST> atomTraversalBondOrder(nAtoms);
Canon::canonicalDFSTraversal(
mol, atomIdx, -1, colors, cycles, ranks, cyclesAvailable, molStack,
atomVisitOrders, bondVisitOrders, atomRingClosures,
atomTraversalBondOrder, bondsInPlay, bondSymbols, doRandom);
Canon::canonicalDFSTraversal(mol, atomIdx, -1, colors, cycles, ranks,
cyclesAvailable, molStack, atomRingClosures,
atomTraversalBondOrder, bondsInPlay, bondSymbols,
doRandom);
CHECK_INVARIANT(!molStack.empty(), "Empty stack.");
CHECK_INVARIANT(molStack.begin()->type == MOL_STACK_ATOM,
@@ -1122,12 +1095,21 @@ void canonicalizeFragment(ROMol &mol, int atomIdx,
}
}
// remove the current directions on single bonds around double bonds:
for (auto bond : mol.bonds()) {
Bond::BondDir dir = bond->getBondDir();
if (dir == Bond::ENDDOWNRIGHT || dir == Bond::ENDUPRIGHT) {
bond->setBondDir(Bond::NONE);
std::vector<unsigned int> atomVisitOrders(mol.getNumAtoms());
std::vector<unsigned int> bondVisitOrders(mol.getNumBonds());
unsigned int pos = 0;
for (const auto &msI : molStack) {
if (msI.type == MOL_STACK_ATOM) {
atomVisitOrders[msI.obj.atom->getIdx()] = pos;
} else if (msI.type == MOL_STACK_BOND) {
bondVisitOrders[msI.obj.bond->getIdx()] = pos;
auto dir = msI.obj.bond->getBondDir();
if (dir == Bond::ENDDOWNRIGHT || dir == Bond::ENDUPRIGHT) {
msI.obj.bond->setBondDir(Bond::NONE);
}
}
++pos;
}
// traverse the stack and canonicalize double bonds and atoms with (ring)