diff --git a/Code/GraphMol/Canon.cpp b/Code/GraphMol/Canon.cpp index 296168018..684e9dfdc 100644 --- a/Code/GraphMol/Canon.cpp +++ b/Code/GraphMol/Canon.cpp @@ -86,39 +86,6 @@ auto _possibleCompare = [](const PossibleType &arg1, const PossibleType &arg2) { return (std::get<0>(arg1) < std::get<0>(arg2)); }; -bool checkBondsInSameBranch(MolStack &molStack, Bond *dblBnd, Bond *dirBnd) { - bool seenDblBond = false; - int branchCounter = 0; - for (const auto &item : molStack) { - switch (item.type) { - case MOL_STACK_BOND: - if (item.obj.bond == dirBnd || item.obj.bond == dblBnd) { - if (seenDblBond) { - return branchCounter == 0; - } else { - seenDblBond = true; - } - } - break; - case MOL_STACK_BRANCH_OPEN: - if (seenDblBond) { - ++branchCounter; - } - break; - case MOL_STACK_BRANCH_CLOSE: - if (seenDblBond) { - --branchCounter; - } - break; - default: - break; - } - } - // We should not ever hit this. But if we do, returning false - // causes the same behavior as before this patch. - return false; -} - void switchBondDir(Bond *bond) { PRECONDITION(bond, "bad bond"); PRECONDITION(bond->getBondType() == Bond::SINGLE || bond->getIsAromatic() || @@ -152,10 +119,10 @@ bool isClosingRingBond(Bond *bond) { // move it there? // // -void canonicalizeDoubleBond(Bond *dblBond, UINT_VECT &bondVisitOrders, - UINT_VECT &atomVisitOrders, +void canonicalizeDoubleBond(Bond *dblBond, const UINT_VECT &bondVisitOrders, + const UINT_VECT &atomVisitOrders, UINT_VECT &bondDirCounts, UINT_VECT &atomDirCounts, - MolStack &molStack) { + const MolStack &molStack) { PRECONDITION(dblBond, "bad bond"); PRECONDITION(dblBond->getBondType() == Bond::DOUBLE, "bad bond order"); PRECONDITION(dblBond->getStereo() > Bond::STEREOANY, "bad bond stereo"); @@ -430,7 +397,25 @@ void canonicalizeDoubleBond(Bond *dblBond, UINT_VECT &bondVisitOrders, // UNLESS the bond is not in a branch (in the smiles) (e.g. firstFromAtom1 // branches off a cycle, and secondFromAtom1 shows up at the end of the // cycle). This was Github Issue #2023, see it for an example. - if (checkBondsInSameBranch(molStack, dblBond, secondFromAtom1)) { + auto checkBondsInSameBranch = [&molStack, &bondVisitOrders]( + Bond *dblBond, Bond *dirBond) { + auto start = bondVisitOrders[dblBond->getIdx()]; + auto end = bondVisitOrders[dirBond->getIdx()]; + if (start > end) { + std::swap(start, end); + } + unsigned int branchLevel = 0; + for (auto i = start + 1; i != end; ++i) { + const auto &item = molStack[i]; + if (item.type == MOL_STACK_BRANCH_OPEN) { + ++branchLevel; + } else if (item.type == MOL_STACK_BRANCH_CLOSE) { + --branchLevel; + } + } + return branchLevel == 0; + }; + if (checkBondsInSameBranch(dblBond, secondFromAtom1)) { auto otherDir = flipBondDir(firstFromAtom1->getBondDir()); secondFromAtom1->setBondDir(otherDir); } else { @@ -540,11 +525,10 @@ void canonicalizeDoubleBond(Bond *dblBond, UINT_VECT &bondVisitOrders, // finds cycles void dfsFindCycles(ROMol &mol, int atomIdx, int inBondIdx, std::vector &colors, const UINT_VECT &ranks, - UINT_VECT &atomOrders, VECT_INT_VECT &atomRingClosures, + VECT_INT_VECT &atomRingClosures, const boost::dynamic_bitset<> *bondsInPlay, const std::vector *bondSymbols, bool doRandom) { Atom *atom = mol.getAtomWithIdx(atomIdx); - atomOrders.push_back(atomIdx); colors[atomIdx] = GREY_NODE; @@ -647,8 +631,7 @@ void dfsFindCycles(ROMol &mol, int atomIdx, int inBondIdx, // we haven't seen this node at all before, traverse // ----- dfsFindCycles(mol, possibleIdx, bond->getIdx(), colors, ranks, - atomOrders, atomRingClosures, bondsInPlay, bondSymbols, - doRandom); + atomRingClosures, bondsInPlay, bondSymbols, doRandom); break; case GREY_NODE: // ----- @@ -670,8 +653,7 @@ void dfsFindCycles(ROMol &mol, int atomIdx, int inBondIdx, void dfsBuildStack(ROMol &mol, int atomIdx, int inBondIdx, std::vector &colors, VECT_INT_VECT &cycles, const UINT_VECT &ranks, UINT_VECT &cyclesAvailable, - MolStack &molStack, UINT_VECT &atomOrders, - UINT_VECT &bondVisitOrders, VECT_INT_VECT &atomRingClosures, + MolStack &molStack, VECT_INT_VECT &atomRingClosures, std::vector &atomTraversalBondOrder, const boost::dynamic_bitset<> *bondsInPlay, const std::vector *bondSymbols, bool doRandom) { @@ -681,7 +663,6 @@ void dfsBuildStack(ROMol &mol, int atomIdx, int inBondIdx, seenFromHere.set(atomIdx); molStack.push_back(MolStackElem(atom)); - atomOrders[atom->getIdx()] = rdcast(molStack.size()); colors[atomIdx] = GREY_NODE; INT_LIST travList; @@ -706,7 +687,6 @@ void dfsBuildStack(ROMol &mol, int atomIdx, int inBondIdx, // this is end of the ring closure // we can just pull the ring index from the bond itself: molStack.push_back(MolStackElem(bond, atomIdx)); - bondVisitOrders[bIdx] = molStack.size(); molStack.push_back(MolStackElem(ringIdx)); // don't make the ring digit immediately available again: we don't want // to have the same @@ -825,11 +805,9 @@ void dfsBuildStack(ROMol &mol, int atomIdx, int inBondIdx, MolStackElem("(", rdcast(possiblesIt - possibles.begin()))); } molStack.push_back(MolStackElem(bond, atomIdx)); - bondVisitOrders[bond->getIdx()] = molStack.size(); dfsBuildStack(mol, possibleIdx, bond->getIdx(), colors, cycles, ranks, - cyclesAvailable, molStack, atomOrders, bondVisitOrders, - atomRingClosures, atomTraversalBondOrder, bondsInPlay, - bondSymbols, doRandom); + cyclesAvailable, molStack, atomRingClosures, + atomTraversalBondOrder, bondsInPlay, bondSymbols, doRandom); if (possiblesIt + 1 != possibles.end()) { molStack.push_back( MolStackElem(")", rdcast(possiblesIt - possibles.begin()))); @@ -844,7 +822,6 @@ void canonicalDFSTraversal(ROMol &mol, int atomIdx, int inBondIdx, std::vector &colors, VECT_INT_VECT &cycles, const UINT_VECT &ranks, UINT_VECT &cyclesAvailable, MolStack &molStack, - UINT_VECT &atomOrders, UINT_VECT &bondVisitOrders, VECT_INT_VECT &atomRingClosures, std::vector &atomTraversalBondOrder, const boost::dynamic_bitset<> *bondsInPlay, @@ -852,8 +829,6 @@ void canonicalDFSTraversal(ROMol &mol, int atomIdx, int inBondIdx, bool doRandom) { PRECONDITION(colors.size() >= mol.getNumAtoms(), "vector too small"); PRECONDITION(ranks.size() >= mol.getNumAtoms(), "vector too small"); - PRECONDITION(atomOrders.size() >= mol.getNumAtoms(), "vector too small"); - PRECONDITION(bondVisitOrders.size() >= mol.getNumBonds(), "vector too small"); PRECONDITION(atomRingClosures.size() >= mol.getNumAtoms(), "vector too small"); PRECONDITION(atomTraversalBondOrder.size() >= mol.getNumAtoms(), @@ -866,11 +841,11 @@ void canonicalDFSTraversal(ROMol &mol, int atomIdx, int inBondIdx, std::vector tcolors; tcolors.resize(colors.size()); std::copy(colors.begin(), colors.end(), tcolors.begin()); - dfsFindCycles(mol, atomIdx, inBondIdx, tcolors, ranks, atomOrders, - atomRingClosures, bondsInPlay, bondSymbols, doRandom); + dfsFindCycles(mol, atomIdx, inBondIdx, tcolors, ranks, atomRingClosures, + bondsInPlay, bondSymbols, doRandom); dfsBuildStack(mol, atomIdx, inBondIdx, colors, cycles, ranks, cyclesAvailable, - molStack, atomOrders, bondVisitOrders, atomRingClosures, - atomTraversalBondOrder, bondsInPlay, bondSymbols, doRandom); + molStack, atomRingClosures, atomTraversalBondOrder, bondsInPlay, + bondSymbols, doRandom); } void clearBondDirs(ROMol &mol, Bond *refBond, const Atom *fromAtom, @@ -1004,8 +979,6 @@ void canonicalizeFragment(ROMol &mol, int atomIdx, "bondSymbols too small"); unsigned int nAtoms = mol.getNumAtoms(); - UINT_VECT atomVisitOrders(nAtoms, 0); - UINT_VECT bondVisitOrders(mol.getNumBonds(), 0); UINT_VECT bondDirCounts(mol.getNumBonds(), 0); UINT_VECT atomDirCounts(nAtoms, 0); UINT_VECT cyclesAvailable(MAX_CYCLES, 1); @@ -1030,10 +1003,10 @@ void canonicalizeFragment(ROMol &mol, int atomIdx, VECT_INT_VECT atomRingClosures(nAtoms); std::vector atomTraversalBondOrder(nAtoms); - Canon::canonicalDFSTraversal( - mol, atomIdx, -1, colors, cycles, ranks, cyclesAvailable, molStack, - atomVisitOrders, bondVisitOrders, atomRingClosures, - atomTraversalBondOrder, bondsInPlay, bondSymbols, doRandom); + Canon::canonicalDFSTraversal(mol, atomIdx, -1, colors, cycles, ranks, + cyclesAvailable, molStack, atomRingClosures, + atomTraversalBondOrder, bondsInPlay, bondSymbols, + doRandom); CHECK_INVARIANT(!molStack.empty(), "Empty stack."); CHECK_INVARIANT(molStack.begin()->type == MOL_STACK_ATOM, @@ -1122,12 +1095,21 @@ void canonicalizeFragment(ROMol &mol, int atomIdx, } } - // remove the current directions on single bonds around double bonds: - for (auto bond : mol.bonds()) { - Bond::BondDir dir = bond->getBondDir(); - if (dir == Bond::ENDDOWNRIGHT || dir == Bond::ENDUPRIGHT) { - bond->setBondDir(Bond::NONE); + std::vector atomVisitOrders(mol.getNumAtoms()); + std::vector bondVisitOrders(mol.getNumBonds()); + + unsigned int pos = 0; + for (const auto &msI : molStack) { + if (msI.type == MOL_STACK_ATOM) { + atomVisitOrders[msI.obj.atom->getIdx()] = pos; + } else if (msI.type == MOL_STACK_BOND) { + bondVisitOrders[msI.obj.bond->getIdx()] = pos; + auto dir = msI.obj.bond->getBondDir(); + if (dir == Bond::ENDDOWNRIGHT || dir == Bond::ENDUPRIGHT) { + msI.obj.bond->setBondDir(Bond::NONE); + } } + ++pos; } // traverse the stack and canonicalize double bonds and atoms with (ring)