mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-04 21:54:27 +08:00
Refactor atomVisitOrders and bondVisitOrders in SMILES Writer canonicalization (#8972)
* refactor calculation of atomVisitOrders/bondVisitOrders * refactor checkBondsInSameBranch * fix
This commit is contained in:
committed by
Greg Landrum
parent
08d9b42574
commit
deeb9727fc
@@ -86,39 +86,6 @@ auto _possibleCompare = [](const PossibleType &arg1, const PossibleType &arg2) {
|
||||
return (std::get<0>(arg1) < std::get<0>(arg2));
|
||||
};
|
||||
|
||||
bool checkBondsInSameBranch(MolStack &molStack, Bond *dblBnd, Bond *dirBnd) {
|
||||
bool seenDblBond = false;
|
||||
int branchCounter = 0;
|
||||
for (const auto &item : molStack) {
|
||||
switch (item.type) {
|
||||
case MOL_STACK_BOND:
|
||||
if (item.obj.bond == dirBnd || item.obj.bond == dblBnd) {
|
||||
if (seenDblBond) {
|
||||
return branchCounter == 0;
|
||||
} else {
|
||||
seenDblBond = true;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case MOL_STACK_BRANCH_OPEN:
|
||||
if (seenDblBond) {
|
||||
++branchCounter;
|
||||
}
|
||||
break;
|
||||
case MOL_STACK_BRANCH_CLOSE:
|
||||
if (seenDblBond) {
|
||||
--branchCounter;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
// We should not ever hit this. But if we do, returning false
|
||||
// causes the same behavior as before this patch.
|
||||
return false;
|
||||
}
|
||||
|
||||
void switchBondDir(Bond *bond) {
|
||||
PRECONDITION(bond, "bad bond");
|
||||
PRECONDITION(bond->getBondType() == Bond::SINGLE || bond->getIsAromatic() ||
|
||||
@@ -152,10 +119,10 @@ bool isClosingRingBond(Bond *bond) {
|
||||
// move it there?
|
||||
//
|
||||
//
|
||||
void canonicalizeDoubleBond(Bond *dblBond, UINT_VECT &bondVisitOrders,
|
||||
UINT_VECT &atomVisitOrders,
|
||||
void canonicalizeDoubleBond(Bond *dblBond, const UINT_VECT &bondVisitOrders,
|
||||
const UINT_VECT &atomVisitOrders,
|
||||
UINT_VECT &bondDirCounts, UINT_VECT &atomDirCounts,
|
||||
MolStack &molStack) {
|
||||
const MolStack &molStack) {
|
||||
PRECONDITION(dblBond, "bad bond");
|
||||
PRECONDITION(dblBond->getBondType() == Bond::DOUBLE, "bad bond order");
|
||||
PRECONDITION(dblBond->getStereo() > Bond::STEREOANY, "bad bond stereo");
|
||||
@@ -430,7 +397,25 @@ void canonicalizeDoubleBond(Bond *dblBond, UINT_VECT &bondVisitOrders,
|
||||
// UNLESS the bond is not in a branch (in the smiles) (e.g. firstFromAtom1
|
||||
// branches off a cycle, and secondFromAtom1 shows up at the end of the
|
||||
// cycle). This was Github Issue #2023, see it for an example.
|
||||
if (checkBondsInSameBranch(molStack, dblBond, secondFromAtom1)) {
|
||||
auto checkBondsInSameBranch = [&molStack, &bondVisitOrders](
|
||||
Bond *dblBond, Bond *dirBond) {
|
||||
auto start = bondVisitOrders[dblBond->getIdx()];
|
||||
auto end = bondVisitOrders[dirBond->getIdx()];
|
||||
if (start > end) {
|
||||
std::swap(start, end);
|
||||
}
|
||||
unsigned int branchLevel = 0;
|
||||
for (auto i = start + 1; i != end; ++i) {
|
||||
const auto &item = molStack[i];
|
||||
if (item.type == MOL_STACK_BRANCH_OPEN) {
|
||||
++branchLevel;
|
||||
} else if (item.type == MOL_STACK_BRANCH_CLOSE) {
|
||||
--branchLevel;
|
||||
}
|
||||
}
|
||||
return branchLevel == 0;
|
||||
};
|
||||
if (checkBondsInSameBranch(dblBond, secondFromAtom1)) {
|
||||
auto otherDir = flipBondDir(firstFromAtom1->getBondDir());
|
||||
secondFromAtom1->setBondDir(otherDir);
|
||||
} else {
|
||||
@@ -540,11 +525,10 @@ void canonicalizeDoubleBond(Bond *dblBond, UINT_VECT &bondVisitOrders,
|
||||
// finds cycles
|
||||
void dfsFindCycles(ROMol &mol, int atomIdx, int inBondIdx,
|
||||
std::vector<AtomColors> &colors, const UINT_VECT &ranks,
|
||||
UINT_VECT &atomOrders, VECT_INT_VECT &atomRingClosures,
|
||||
VECT_INT_VECT &atomRingClosures,
|
||||
const boost::dynamic_bitset<> *bondsInPlay,
|
||||
const std::vector<std::string> *bondSymbols, bool doRandom) {
|
||||
Atom *atom = mol.getAtomWithIdx(atomIdx);
|
||||
atomOrders.push_back(atomIdx);
|
||||
|
||||
colors[atomIdx] = GREY_NODE;
|
||||
|
||||
@@ -647,8 +631,7 @@ void dfsFindCycles(ROMol &mol, int atomIdx, int inBondIdx,
|
||||
// we haven't seen this node at all before, traverse
|
||||
// -----
|
||||
dfsFindCycles(mol, possibleIdx, bond->getIdx(), colors, ranks,
|
||||
atomOrders, atomRingClosures, bondsInPlay, bondSymbols,
|
||||
doRandom);
|
||||
atomRingClosures, bondsInPlay, bondSymbols, doRandom);
|
||||
break;
|
||||
case GREY_NODE:
|
||||
// -----
|
||||
@@ -670,8 +653,7 @@ void dfsFindCycles(ROMol &mol, int atomIdx, int inBondIdx,
|
||||
void dfsBuildStack(ROMol &mol, int atomIdx, int inBondIdx,
|
||||
std::vector<AtomColors> &colors, VECT_INT_VECT &cycles,
|
||||
const UINT_VECT &ranks, UINT_VECT &cyclesAvailable,
|
||||
MolStack &molStack, UINT_VECT &atomOrders,
|
||||
UINT_VECT &bondVisitOrders, VECT_INT_VECT &atomRingClosures,
|
||||
MolStack &molStack, VECT_INT_VECT &atomRingClosures,
|
||||
std::vector<INT_LIST> &atomTraversalBondOrder,
|
||||
const boost::dynamic_bitset<> *bondsInPlay,
|
||||
const std::vector<std::string> *bondSymbols, bool doRandom) {
|
||||
@@ -681,7 +663,6 @@ void dfsBuildStack(ROMol &mol, int atomIdx, int inBondIdx,
|
||||
|
||||
seenFromHere.set(atomIdx);
|
||||
molStack.push_back(MolStackElem(atom));
|
||||
atomOrders[atom->getIdx()] = rdcast<int>(molStack.size());
|
||||
colors[atomIdx] = GREY_NODE;
|
||||
|
||||
INT_LIST travList;
|
||||
@@ -706,7 +687,6 @@ void dfsBuildStack(ROMol &mol, int atomIdx, int inBondIdx,
|
||||
// this is end of the ring closure
|
||||
// we can just pull the ring index from the bond itself:
|
||||
molStack.push_back(MolStackElem(bond, atomIdx));
|
||||
bondVisitOrders[bIdx] = molStack.size();
|
||||
molStack.push_back(MolStackElem(ringIdx));
|
||||
// don't make the ring digit immediately available again: we don't want
|
||||
// to have the same
|
||||
@@ -825,11 +805,9 @@ void dfsBuildStack(ROMol &mol, int atomIdx, int inBondIdx,
|
||||
MolStackElem("(", rdcast<int>(possiblesIt - possibles.begin())));
|
||||
}
|
||||
molStack.push_back(MolStackElem(bond, atomIdx));
|
||||
bondVisitOrders[bond->getIdx()] = molStack.size();
|
||||
dfsBuildStack(mol, possibleIdx, bond->getIdx(), colors, cycles, ranks,
|
||||
cyclesAvailable, molStack, atomOrders, bondVisitOrders,
|
||||
atomRingClosures, atomTraversalBondOrder, bondsInPlay,
|
||||
bondSymbols, doRandom);
|
||||
cyclesAvailable, molStack, atomRingClosures,
|
||||
atomTraversalBondOrder, bondsInPlay, bondSymbols, doRandom);
|
||||
if (possiblesIt + 1 != possibles.end()) {
|
||||
molStack.push_back(
|
||||
MolStackElem(")", rdcast<int>(possiblesIt - possibles.begin())));
|
||||
@@ -844,7 +822,6 @@ void canonicalDFSTraversal(ROMol &mol, int atomIdx, int inBondIdx,
|
||||
std::vector<AtomColors> &colors,
|
||||
VECT_INT_VECT &cycles, const UINT_VECT &ranks,
|
||||
UINT_VECT &cyclesAvailable, MolStack &molStack,
|
||||
UINT_VECT &atomOrders, UINT_VECT &bondVisitOrders,
|
||||
VECT_INT_VECT &atomRingClosures,
|
||||
std::vector<INT_LIST> &atomTraversalBondOrder,
|
||||
const boost::dynamic_bitset<> *bondsInPlay,
|
||||
@@ -852,8 +829,6 @@ void canonicalDFSTraversal(ROMol &mol, int atomIdx, int inBondIdx,
|
||||
bool doRandom) {
|
||||
PRECONDITION(colors.size() >= mol.getNumAtoms(), "vector too small");
|
||||
PRECONDITION(ranks.size() >= mol.getNumAtoms(), "vector too small");
|
||||
PRECONDITION(atomOrders.size() >= mol.getNumAtoms(), "vector too small");
|
||||
PRECONDITION(bondVisitOrders.size() >= mol.getNumBonds(), "vector too small");
|
||||
PRECONDITION(atomRingClosures.size() >= mol.getNumAtoms(),
|
||||
"vector too small");
|
||||
PRECONDITION(atomTraversalBondOrder.size() >= mol.getNumAtoms(),
|
||||
@@ -866,11 +841,11 @@ void canonicalDFSTraversal(ROMol &mol, int atomIdx, int inBondIdx,
|
||||
std::vector<AtomColors> tcolors;
|
||||
tcolors.resize(colors.size());
|
||||
std::copy(colors.begin(), colors.end(), tcolors.begin());
|
||||
dfsFindCycles(mol, atomIdx, inBondIdx, tcolors, ranks, atomOrders,
|
||||
atomRingClosures, bondsInPlay, bondSymbols, doRandom);
|
||||
dfsFindCycles(mol, atomIdx, inBondIdx, tcolors, ranks, atomRingClosures,
|
||||
bondsInPlay, bondSymbols, doRandom);
|
||||
dfsBuildStack(mol, atomIdx, inBondIdx, colors, cycles, ranks, cyclesAvailable,
|
||||
molStack, atomOrders, bondVisitOrders, atomRingClosures,
|
||||
atomTraversalBondOrder, bondsInPlay, bondSymbols, doRandom);
|
||||
molStack, atomRingClosures, atomTraversalBondOrder, bondsInPlay,
|
||||
bondSymbols, doRandom);
|
||||
}
|
||||
|
||||
void clearBondDirs(ROMol &mol, Bond *refBond, const Atom *fromAtom,
|
||||
@@ -1004,8 +979,6 @@ void canonicalizeFragment(ROMol &mol, int atomIdx,
|
||||
"bondSymbols too small");
|
||||
unsigned int nAtoms = mol.getNumAtoms();
|
||||
|
||||
UINT_VECT atomVisitOrders(nAtoms, 0);
|
||||
UINT_VECT bondVisitOrders(mol.getNumBonds(), 0);
|
||||
UINT_VECT bondDirCounts(mol.getNumBonds(), 0);
|
||||
UINT_VECT atomDirCounts(nAtoms, 0);
|
||||
UINT_VECT cyclesAvailable(MAX_CYCLES, 1);
|
||||
@@ -1030,10 +1003,10 @@ void canonicalizeFragment(ROMol &mol, int atomIdx,
|
||||
|
||||
VECT_INT_VECT atomRingClosures(nAtoms);
|
||||
std::vector<INT_LIST> atomTraversalBondOrder(nAtoms);
|
||||
Canon::canonicalDFSTraversal(
|
||||
mol, atomIdx, -1, colors, cycles, ranks, cyclesAvailable, molStack,
|
||||
atomVisitOrders, bondVisitOrders, atomRingClosures,
|
||||
atomTraversalBondOrder, bondsInPlay, bondSymbols, doRandom);
|
||||
Canon::canonicalDFSTraversal(mol, atomIdx, -1, colors, cycles, ranks,
|
||||
cyclesAvailable, molStack, atomRingClosures,
|
||||
atomTraversalBondOrder, bondsInPlay, bondSymbols,
|
||||
doRandom);
|
||||
|
||||
CHECK_INVARIANT(!molStack.empty(), "Empty stack.");
|
||||
CHECK_INVARIANT(molStack.begin()->type == MOL_STACK_ATOM,
|
||||
@@ -1122,12 +1095,21 @@ void canonicalizeFragment(ROMol &mol, int atomIdx,
|
||||
}
|
||||
}
|
||||
|
||||
// remove the current directions on single bonds around double bonds:
|
||||
for (auto bond : mol.bonds()) {
|
||||
Bond::BondDir dir = bond->getBondDir();
|
||||
if (dir == Bond::ENDDOWNRIGHT || dir == Bond::ENDUPRIGHT) {
|
||||
bond->setBondDir(Bond::NONE);
|
||||
std::vector<unsigned int> atomVisitOrders(mol.getNumAtoms());
|
||||
std::vector<unsigned int> bondVisitOrders(mol.getNumBonds());
|
||||
|
||||
unsigned int pos = 0;
|
||||
for (const auto &msI : molStack) {
|
||||
if (msI.type == MOL_STACK_ATOM) {
|
||||
atomVisitOrders[msI.obj.atom->getIdx()] = pos;
|
||||
} else if (msI.type == MOL_STACK_BOND) {
|
||||
bondVisitOrders[msI.obj.bond->getIdx()] = pos;
|
||||
auto dir = msI.obj.bond->getBondDir();
|
||||
if (dir == Bond::ENDDOWNRIGHT || dir == Bond::ENDUPRIGHT) {
|
||||
msI.obj.bond->setBondDir(Bond::NONE);
|
||||
}
|
||||
}
|
||||
++pos;
|
||||
}
|
||||
|
||||
// traverse the stack and canonicalize double bonds and atoms with (ring)
|
||||
|
||||
Reference in New Issue
Block a user