// $Id$ // // Copyright (C) 2001-2010 Greg Landrum and Rational Discovery LLC // // @@ All Rights Reserved @@ // This file is part of the RDKit. // The contents are covered by the terms of the BSD license // which is included in the file license.txt, found at the root // of the RDKit source tree. // #include #include #include #include #include namespace RDKit { namespace Canon { struct _possibleCompare : public std::binary_function { bool operator()(const PossibleType &arg1, const PossibleType &arg2) const { return (arg1.get<0>() < arg2.get<0>()); } }; void switchBondDir(Bond *bond) { PRECONDITION(bond, "bad bond"); PRECONDITION(bond->getBondType() == Bond::SINGLE || bond->getIsAromatic(), "bad bond type"); switch (bond->getBondDir()) { case Bond::ENDUPRIGHT: bond->setBondDir(Bond::ENDDOWNRIGHT); break; case Bond::ENDDOWNRIGHT: bond->setBondDir(Bond::ENDUPRIGHT); break; default: break; } } // FIX: this may only be of interest from the SmilesWriter, should we // move it there? // // void canonicalizeDoubleBond(Bond *dblBond, INT_VECT &bondVisitOrders, INT_VECT &atomVisitOrders, INT_VECT &bondDirCounts, INT_VECT &atomDirCounts) { PRECONDITION(dblBond, "bad bond"); PRECONDITION(dblBond->getBondType() == Bond::DOUBLE, "bad bond order"); PRECONDITION(dblBond->getStereo() > Bond::STEREOANY, "bad bond stereo"); PRECONDITION(dblBond->getStereoAtoms().size() >= 2, "bad bond stereo atoms"); PRECONDITION(atomVisitOrders[dblBond->getBeginAtomIdx()] > 0 || atomVisitOrders[dblBond->getEndAtomIdx()] > 0, "neither end atom traversed"); // atom1 is the lower numbered atom of the double bond (the one traversed // first) Atom *atom1, *atom2; if (atomVisitOrders[dblBond->getBeginAtomIdx()] < atomVisitOrders[dblBond->getEndAtomIdx()]) { atom1 = dblBond->getBeginAtom(); atom2 = dblBond->getEndAtom(); } else { atom1 = dblBond->getEndAtom(); atom2 = dblBond->getBeginAtom(); } // we only worry about double bonds that begin and end at atoms // of degree 2 or 3: if ((atom1->getDegree() != 2 && atom1->getDegree() != 3) || (atom2->getDegree() != 2 && atom2->getDegree() != 3)) { return; } Bond *firstFromAtom1 = NULL, *secondFromAtom1 = NULL; Bond *firstFromAtom2 = NULL, *secondFromAtom2 = NULL; int firstVisitOrder = 100000; ROMol &mol = dblBond->getOwningMol(); ROMol::OBOND_ITER_PAIR atomBonds; // ------------------------------------------------------- // find the lowest visit order bonds from each end and determine // if anything is already constraining our choice of directions: bool dir1Set = false, dir2Set = false; atomBonds = mol.getAtomBonds(atom1); while (atomBonds.first != atomBonds.second) { if (mol[*atomBonds.first].get() != dblBond) { int bondIdx = mol[*atomBonds.first]->getIdx(); if (bondDirCounts[bondIdx] > 0) { dir1Set = true; } if (!firstFromAtom1 || bondVisitOrders[bondIdx] < firstVisitOrder) { if (firstFromAtom1) secondFromAtom1 = firstFromAtom1; firstFromAtom1 = mol[*atomBonds.first].get(); firstVisitOrder = bondVisitOrders[bondIdx]; } else { secondFromAtom1 = mol[*atomBonds.first].get(); } } atomBonds.first++; } atomBonds = mol.getAtomBonds(atom2); firstVisitOrder = 10000; while (atomBonds.first != atomBonds.second) { if (mol[*atomBonds.first].get() != dblBond) { int bondIdx = mol[*atomBonds.first]->getIdx(); if (bondDirCounts[bondIdx] > 0) { dir2Set = true; } if (!firstFromAtom2 || bondVisitOrders[bondIdx] < firstVisitOrder) { if (firstFromAtom2) secondFromAtom2 = firstFromAtom2; firstFromAtom2 = mol[*atomBonds.first].get(); firstVisitOrder = bondVisitOrders[bondIdx]; } else { secondFromAtom2 = mol[*atomBonds.first].get(); } } atomBonds.first++; } // make sure we found everything we need to find: CHECK_INVARIANT(firstFromAtom1, "could not find atom1"); CHECK_INVARIANT(firstFromAtom2, "could not find atom2"); CHECK_INVARIANT(atom1->getDegree() == 2 || secondFromAtom1, "inconsistency at atom1"); CHECK_INVARIANT(atom2->getDegree() == 2 || secondFromAtom2, "inconsistency at atom2"); bool setFromBond1 = true; Bond::BondDir atom1Dir = Bond::NONE; Bond::BondDir atom2Dir = Bond::NONE; Bond *atom1ControllingBond = firstFromAtom1; Bond *atom2ControllingBond = firstFromAtom2; if (!dir1Set && !dir2Set) { // ---------------------------------- // nothing has touched our bonds so far, so set the // directions to "arbitrary" values: // the bond we came in on becomes ENDUPRIGHT: atom1Dir = Bond::ENDUPRIGHT; firstFromAtom1->setBondDir(atom1Dir); bondDirCounts[firstFromAtom1->getIdx()] += 1; atomDirCounts[atom1->getIdx()] += 1; } else if (!dir2Set) { // at least one of the bonds on atom1 has its directionality set already: if (bondDirCounts[firstFromAtom1->getIdx()] > 0) { // The first bond's direction has been set at some earlier point: atom1Dir = firstFromAtom1->getBondDir(); bondDirCounts[firstFromAtom1->getIdx()] += 1; atomDirCounts[atom1->getIdx()] += 1; if (secondFromAtom1) { // both bonds have their directionalities set, make sure // they are compatible: if (firstFromAtom1->getBondDir() == secondFromAtom1->getBondDir() && bondDirCounts[firstFromAtom2->getIdx()]) { CHECK_INVARIANT( ((firstFromAtom1->getBeginAtomIdx() == atom1->getIdx()) ^ (secondFromAtom1->getBeginAtomIdx() == atom1->getIdx())), "inconsistent state"); } } } else { // the second bond must be present and setting the direction: CHECK_INVARIANT(secondFromAtom1, "inconsistent state"); CHECK_INVARIANT(bondDirCounts[secondFromAtom1->getIdx()] > 0, "inconsistent state"); // It must be the second bond setting the direction. // This happens when the bond dir is set in a branch: // v- this double bond // CC(/C=P/N)=N/O // ^- the second bond sets the direction // or when the first bond is a ring closure from an // earlier traversed atom: // v- this double bond // NC1=NOC/C1=N\O // ^- this closure ends up being the first bond, // and it does not set the direction. // // This addresses parts of Issue 185 and sf.net Issue 1842174 // atom1Dir = secondFromAtom1->getBondDir(); firstFromAtom1->setBondDir(atom1Dir); bondDirCounts[firstFromAtom1->getIdx()] += 1; atomDirCounts[atom1->getIdx()] += 2; atom1ControllingBond = secondFromAtom1; } } else { // dir2 has been set, and dir1 hasn't: we're dealing with a stereochem // specification on a ring double bond: setFromBond1 = false; // at least one of the bonds on atom2 has its directionality set already: if (bondDirCounts[firstFromAtom2->getIdx()] > 0) { // The second bond's direction has been set at some earlier point: atom2Dir = firstFromAtom2->getBondDir(); bondDirCounts[firstFromAtom2->getIdx()] += 1; atomDirCounts[atom2->getIdx()] += 1; if (secondFromAtom2) { // both bonds have their directionalities set, make sure // they are compatible: if (firstFromAtom2->getBondDir() == secondFromAtom2->getBondDir() && bondDirCounts[firstFromAtom1->getIdx()]) { CHECK_INVARIANT( ((firstFromAtom2->getBeginAtomIdx() == atom2->getIdx()) ^ (secondFromAtom2->getBeginAtomIdx() == atom2->getIdx())), "inconsistent state"); } } } else { // the second bond must be present and setting the direction: CHECK_INVARIANT(secondFromAtom2, "inconsistent state"); CHECK_INVARIANT(bondDirCounts[secondFromAtom2->getIdx()] > 0, "inconsistent state"); // It must be the second bond setting the direction. // This happens when the bond dir is set in a branch: // v- this double bond // CC(/C=P/N)=N/O // ^- the second bond sets the direction // or when the first bond is a ring closure from an // earlier traversed atom: // v- this double bond // NC1=NOC/C1=N\O // ^- this closure ends up being the first bond, // and it does not set the direction. // // This addresses parts of Issue 185 and sf.net Issue 1842174 // atom2Dir = secondFromAtom2->getBondDir(); firstFromAtom2->setBondDir(atom2Dir); bondDirCounts[firstFromAtom2->getIdx()] += 1; atomDirCounts[atom2->getIdx()] += 2; atom2ControllingBond = secondFromAtom2; } // CHECK_INVARIANT(0,"ring stereochemistry not handled"); } // end of the ring stereochemistry if // now set the directionality on the other side: if (setFromBond1) { if (dblBond->getStereo() == Bond::STEREOE) { atom2Dir = atom1Dir; } else if (dblBond->getStereo() == Bond::STEREOZ) { atom2Dir = (atom1Dir == Bond::ENDUPRIGHT) ? Bond::ENDDOWNRIGHT : Bond::ENDUPRIGHT; } CHECK_INVARIANT(atom2Dir != Bond::NONE, "stereo not set"); // If we're not looking at the bonds used to determine the // stereochemistry, we need to flip the setting on the other bond: const INT_VECT &stereoAtoms = dblBond->getStereoAtoms(); if (atom1->getDegree() == 3 && std::find(stereoAtoms.begin(), stereoAtoms.end(), static_cast(atom1ControllingBond->getOtherAtomIdx( atom1->getIdx()))) == stereoAtoms.end()) { atom2Dir = (atom2Dir == Bond::ENDUPRIGHT) ? Bond::ENDDOWNRIGHT : Bond::ENDUPRIGHT; } // std::cerr<<" 0 set bond 2: "<getIdx()<<" // "<getDegree() == 3 && std::find(stereoAtoms.begin(), stereoAtoms.end(), static_cast(firstFromAtom2->getOtherAtomIdx( atom2->getIdx()))) == stereoAtoms.end()) { atom2Dir = (atom2Dir == Bond::ENDUPRIGHT) ? Bond::ENDDOWNRIGHT : Bond::ENDUPRIGHT; } // std::cerr<<" 1 set bond 2: "<getIdx()<<" // "<setBondDir(atom2Dir); // this block of code is no longer needed // if(firstFromAtom2->hasProp(common_properties::_TraversalRingClosureBond)){ // // another nice one: we're traversing and come to a ring // // closure bond that has directionality set. This is going to // // have its direction swapped on writing so we need to // // pre-emptively swap it here. // // example situation for this is a non-canonical traversal of // // C1CCCCN/C=C/1 // // starting at atom 0, we hit it on encountering the final bond. // switchBondDir(firstFromAtom2); // } bondDirCounts[firstFromAtom2->getIdx()] += 1; atomDirCounts[atom2->getIdx()] += 1; } else { // we come before a ring closure: if (dblBond->getStereo() == Bond::STEREOZ) { atom1Dir = atom2Dir; } else if (dblBond->getStereo() == Bond::STEREOE) { atom1Dir = (atom2Dir == Bond::ENDUPRIGHT) ? Bond::ENDDOWNRIGHT : Bond::ENDUPRIGHT; } CHECK_INVARIANT(atom1Dir != Bond::NONE, "stereo not set"); // If we're not looking at the bonds used to determine the // stereochemistry, we need to flip the setting on the other bond: const INT_VECT &stereoAtoms = dblBond->getStereoAtoms(); if (atom2->getDegree() == 3 && std::find(stereoAtoms.begin(), stereoAtoms.end(), static_cast(atom2ControllingBond->getOtherAtomIdx( atom2->getIdx()))) == stereoAtoms.end()) { // std::cerr<<"flip 1"<getDegree() == 3 && std::find(stereoAtoms.begin(), stereoAtoms.end(), static_cast(firstFromAtom1->getOtherAtomIdx( atom1->getIdx()))) == stereoAtoms.end()) { // std::cerr<<"flip 2"<setBondDir(atom1Dir); switchBondDir(firstFromAtom1); bondDirCounts[firstFromAtom1->getIdx()] += 1; atomDirCounts[atom1->getIdx()] += 1; } // ----------------------------------- // // Check if there are other bonds from atoms 1 and 2 that need // to have their directionalities set: /// if (atom1->getDegree() == 3) { if (!bondDirCounts[secondFromAtom1->getIdx()]) { // This bond (the second bond from the starting atom of the double bond) // is a special case. It's going to appear in a branch in the smiles: // X\C(\Y)=C/Z // ^ // |- here // so it actually needs to go down with the *same* direction as the // bond that's already been set (because "pulling the bond out of the // branch" reverses its direction). // A quick example. This SMILES: // F/C(\Cl)=C/F // is *wrong*. This is the correct form: // F/C(/Cl)=C/F // So, since we want this bond to have the opposite direction to the // other one, we put it in with the same direction. // This was Issue 183 secondFromAtom1->setBondDir(firstFromAtom1->getBondDir()); } bondDirCounts[secondFromAtom1->getIdx()] += 1; atomDirCounts[atom1->getIdx()] += 1; } if (atom2->getDegree() == 3) { if (!bondDirCounts[secondFromAtom2->getIdx()]) { // Here we set the bond direction to be opposite the other one (since // both come after the atom connected to the double bond). Bond::BondDir otherDir; if (!secondFromAtom2->hasProp( common_properties::_TraversalRingClosureBond)) { otherDir = (firstFromAtom2->getBondDir() == Bond::ENDUPRIGHT) ? Bond::ENDDOWNRIGHT : Bond::ENDUPRIGHT; } else { // another one those irritating little reversal things due to // ring closures otherDir = firstFromAtom2->getBondDir(); } secondFromAtom2->setBondDir(otherDir); } bondDirCounts[secondFromAtom2->getIdx()] += 1; atomDirCounts[atom2->getIdx()] += 1; // std::cerr<<" other: "<getIdx()<<" // "<getIdx()] > atomVisitOrders[atom1->getIdx()]) { if (bondDirCounts[atom1ControllingBond->getIdx()] == 1) { if (!atom1ControllingBond->hasProp( common_properties::_TraversalRingClosureBond)) { // std::cerr<<" switcheroo 1"<getIdx()] == 1) { // the controlling bond at atom1 is being set by someone else, flip the // direction // on the atom2 bond instead: // std::cerr<<" switcheroo 2"<getIdx()] >= 1) { switchBondDir(secondFromAtom2); } } } } // something to watch out for here. For this molecule and traversal order: // 0 1 2 3 4 5 6 7 8 <- atom numbers // C/C=C/C(/N=C/C)=C/C // ^ ^ // |--|-- these two bonds must match in direction or the SMILES // is inconsistent (according to Daylight, Marvin does ok with // it) // That means that the direction of the bond from atom 3->4 needs to be set // when the bond from 2->3 is set. // // I believe we only need to worry about this for the bonds from atom2. const Atom *atom3 = firstFromAtom2->getOtherAtom(atom2); if (atom3->getDegree() == 3) { Bond *otherAtom3Bond = NULL; bool dblBondPresent = false; atomBonds = mol.getAtomBonds(atom3); while (atomBonds.first != atomBonds.second) { Bond *tbond = mol[*atomBonds.first].get(); if (tbond->getBondType() == Bond::DOUBLE && tbond->getStereo() > Bond::STEREOANY) { dblBondPresent = true; } else if ((tbond->getBondType() == Bond::SINGLE) && (tbond != firstFromAtom2)) { otherAtom3Bond = tbond; } atomBonds.first++; } if (dblBondPresent && otherAtom3Bond) { // std::cerr<<"set!"<setBondDir(firstFromAtom2->getBondDir()); bondDirCounts[otherAtom3Bond->getIdx()] += 1; atomDirCounts[atom3->getIdx()] += 1; } } } // finds cycles void dfsFindCycles(ROMol &mol, int atomIdx, int inBondIdx, std::vector &colors, const UINT_VECT &ranks, INT_VECT &atomOrders, VECT_INT_VECT &atomRingClosures, const boost::dynamic_bitset<> *bondsInPlay, const std::vector *bondSymbols) { Atom *atom = mol.getAtomWithIdx(atomIdx); atomOrders.push_back(atomIdx); colors[atomIdx] = GREY_NODE; // --------------------- // // Build the list of possible destinations from here // // --------------------- std::vector possibles; possibles.resize(0); ROMol::OBOND_ITER_PAIR bondsPair = mol.getAtomBonds(atom); possibles.reserve(bondsPair.second - bondsPair.first); while (bondsPair.first != bondsPair.second) { BOND_SPTR theBond = mol[*(bondsPair.first)]; bondsPair.first++; if (bondsInPlay && !(*bondsInPlay)[theBond->getIdx()]) continue; if (inBondIdx < 0 || theBond->getIdx() != static_cast(inBondIdx)) { int otherIdx = theBond->getOtherAtomIdx(atomIdx); long rank = ranks[otherIdx]; // --------------------- // // things are a bit more complicated if we are sitting on a // ring atom. we would like to traverse first to the // ring-closure atoms, then to atoms outside the ring first, // then to atoms in the ring that haven't already been visited // (non-ring-closure atoms). // // Here's how the black magic works: // - non-ring atom neighbors have their original ranks // - ring atom neighbors have this added to their ranks: // (MAX_BONDTYPE - bondOrder)*MAX_NATOMS*MAX_NATOMS // - ring-closure neighbors lose a factor of: // (MAX_BONDTYPE+1)*MAX_NATOMS*MAX_NATOMS // // This tactic biases us to traverse to non-ring neighbors first, // original ordering if bond orders are all equal... crafty, neh? // // --------------------- if (colors[otherIdx] == GREY_NODE) { rank -= static_cast(MAX_BONDTYPE + 1) * MAX_NATOMS * MAX_NATOMS; if (!bondSymbols) { rank += static_cast(MAX_BONDTYPE - theBond->getBondType()) * MAX_NATOMS; } else { const std::string &symb = (*bondSymbols)[theBond->getIdx()]; boost::uint32_t hsh = gboost::hash_range(symb.begin(), symb.end()); rank += (hsh % MAX_NATOMS) * MAX_NATOMS; } } else if (theBond->getOwningMol().getRingInfo()->numBondRings( theBond->getIdx())) { if (!bondSymbols) { rank += static_cast(MAX_BONDTYPE - theBond->getBondType()) * MAX_NATOMS * MAX_NATOMS; } else { const std::string &symb = (*bondSymbols)[theBond->getIdx()]; boost::uint32_t hsh = gboost::hash_range(symb.begin(), symb.end()); rank += (hsh % MAX_NATOMS) * MAX_NATOMS * MAX_NATOMS; } } // std::cerr<<"aIdx: "<< atomIdx <<" p: "<getBondType()<<" "<::iterator possiblesIt = possibles.begin(); possiblesIt != possibles.end(); possiblesIt++) { int possibleIdx = possiblesIt->get<1>(); Bond *bond = possiblesIt->get<2>(); switch (colors[possibleIdx]) { case WHITE_NODE: // ----- // we haven't seen this node at all before, traverse // ----- dfsFindCycles(mol, possibleIdx, bond->getIdx(), colors, ranks, atomOrders, atomRingClosures, bondsInPlay, bondSymbols); break; case GREY_NODE: // ----- // we've seen this, but haven't finished it (we're finishing a ring) // ----- atomRingClosures[possibleIdx].push_back(bond->getIdx()); atomRingClosures[atomIdx].push_back(bond->getIdx()); break; default: // ----- // this node has been finished. don't do anything. // ----- break; } } colors[atomIdx] = BLACK_NODE; } void dfsBuildStack(ROMol &mol, int atomIdx, int inBondIdx, std::vector &colors, VECT_INT_VECT &cycles, const UINT_VECT &ranks, INT_VECT &cyclesAvailable, MolStack &molStack, INT_VECT &atomOrders, INT_VECT &bondVisitOrders, VECT_INT_VECT &atomRingClosures, std::vector &atomTraversalBondOrder, const boost::dynamic_bitset<> *bondsInPlay, const std::vector *bondSymbols) { #if 0 std::cerr<<"traverse from atom: "< seenFromHere(mol.getNumAtoms()); seenFromHere.set(atomIdx); molStack.push_back(MolStackElem(atom)); atomOrders[atom->getIdx()] = rdcast(molStack.size()); colors[atomIdx] = GREY_NODE; INT_LIST travList; if (inBondIdx >= 0) travList.push_back(inBondIdx); // --------------------- // // Add any ring closures // // --------------------- if (atomRingClosures[atomIdx].size()) { std::vector ringsClosed; BOOST_FOREACH (int bIdx, atomRingClosures[atomIdx]) { travList.push_back(bIdx); Bond *bond = mol.getBondWithIdx(bIdx); seenFromHere.set(bond->getOtherAtomIdx(atomIdx)); unsigned int ringIdx; if (bond->getPropIfPresent(common_properties::_TraversalRingClosureBond, ringIdx)) { // this is end of the ring closure // we can just pull the ring index from the bond itself: molStack.push_back(MolStackElem(bond, atomIdx)); bondVisitOrders[bIdx] = rdcast(molStack.size()); molStack.push_back(MolStackElem(ringIdx)); // don't make the ring digit immediately available again: we don't want // to have the same // ring digit opening and closing rings on an atom. ringsClosed.push_back(ringIdx - 1); } else { // this is the beginning of the ring closure, we need to come up with a // ring index: INT_VECT::const_iterator cAIt = std::find(cyclesAvailable.begin(), cyclesAvailable.end(), 1); if (cAIt == cyclesAvailable.end()) { throw ValueErrorException( "Too many rings open at once. SMILES cannot be generated."); } unsigned int lowestRingIdx = rdcast(cAIt - cyclesAvailable.begin()); cyclesAvailable[lowestRingIdx] = 0; ++lowestRingIdx; bond->setProp(common_properties::_TraversalRingClosureBond, lowestRingIdx); molStack.push_back(MolStackElem(lowestRingIdx)); } } BOOST_FOREACH (unsigned int ringIdx, ringsClosed) { cyclesAvailable[ringIdx] = 1; } } // --------------------- // // Build the list of possible destinations from here // // --------------------- std::vector possibles; possibles.resize(0); ROMol::OBOND_ITER_PAIR bondsPair = mol.getAtomBonds(atom); possibles.reserve(bondsPair.second - bondsPair.first); while (bondsPair.first != bondsPair.second) { BOND_SPTR theBond = mol[*(bondsPair.first)]; bondsPair.first++; if (bondsInPlay && !(*bondsInPlay)[theBond->getIdx()]) continue; if (inBondIdx < 0 || theBond->getIdx() != static_cast(inBondIdx)) { int otherIdx = theBond->getOtherAtomIdx(atomIdx); // --------------------- // // This time we skip the ring-closure atoms (we did them // above); we want to traverse first to atoms outside the ring // then to atoms in the ring that haven't already been visited // (non-ring-closure atoms). // // otherwise it's the same ranking logic as above // // --------------------- if (colors[otherIdx] != WHITE_NODE || seenFromHere[otherIdx]) { // ring closure or finished atom... skip it. continue; } unsigned long rank = ranks[otherIdx]; if (theBond->getOwningMol().getRingInfo()->numBondRings( theBond->getIdx())) { if (!bondSymbols) { rank += static_cast(MAX_BONDTYPE - theBond->getBondType()) * MAX_NATOMS * MAX_NATOMS; } else { const std::string &symb = (*bondSymbols)[theBond->getIdx()]; boost::uint32_t hsh = gboost::hash_range(symb.begin(), symb.end()); rank += (hsh % MAX_NATOMS) * MAX_NATOMS * MAX_NATOMS; } } // std::cerr<<" p: "<getBondType()<<" "<::iterator possiblesIt = possibles.begin(); possiblesIt != possibles.end(); possiblesIt++) { int possibleIdx = possiblesIt->get<1>(); if (colors[possibleIdx] != WHITE_NODE) { // we're either done or it's a ring-closure, which we already processed... // this test isn't strictly required, because we only added WHITE notes to // the possibles list, but it seems logical to document it continue; } Bond *bond = possiblesIt->get<2>(); Atom *otherAtom = mol.getAtomWithIdx(possibleIdx); // unsigned int lowestRingIdx; INT_VECT::const_iterator cAIt; // ww might have some residual data from earlier calls, clean that up: if (otherAtom->hasProp(common_properties::_TraversalBondIndexOrder)) { otherAtom->clearProp(common_properties::_TraversalBondIndexOrder); } travList.push_back(bond->getIdx()); if (possiblesIt + 1 != possibles.end()) { // we're branching molStack.push_back(MolStackElem("(", rdcast(possiblesIt - possibles.begin()))); } molStack.push_back(MolStackElem(bond, atomIdx)); bondVisitOrders[bond->getIdx()] = rdcast(molStack.size()); dfsBuildStack(mol, possibleIdx, bond->getIdx(), colors, cycles, ranks, cyclesAvailable, molStack, atomOrders, bondVisitOrders, atomRingClosures, atomTraversalBondOrder, bondsInPlay, bondSymbols); if (possiblesIt + 1 != possibles.end()) { molStack.push_back(MolStackElem(")", rdcast(possiblesIt - possibles.begin()))); } } atomTraversalBondOrder[atom->getIdx()] = travList; colors[atomIdx] = BLACK_NODE; } void canonicalDFSTraversal(ROMol &mol, int atomIdx, int inBondIdx, std::vector &colors, VECT_INT_VECT &cycles, const UINT_VECT &ranks, INT_VECT &cyclesAvailable, MolStack &molStack, INT_VECT &atomOrders, INT_VECT &bondVisitOrders, VECT_INT_VECT &atomRingClosures, std::vector &atomTraversalBondOrder, const boost::dynamic_bitset<> *bondsInPlay, const std::vector *bondSymbols) { PRECONDITION(colors.size() >= mol.getNumAtoms(), "vector too small"); PRECONDITION(ranks.size() >= mol.getNumAtoms(), "vector too small"); PRECONDITION(atomOrders.size() >= mol.getNumAtoms(), "vector too small"); PRECONDITION(bondVisitOrders.size() >= mol.getNumBonds(), "vector too small"); PRECONDITION(atomRingClosures.size() >= mol.getNumAtoms(), "vector too small"); PRECONDITION(atomTraversalBondOrder.size() >= mol.getNumAtoms(), "vector too small"); PRECONDITION(!bondsInPlay || bondsInPlay->size() >= mol.getNumBonds(), "bondsInPlay too small"); PRECONDITION(!bondSymbols || bondSymbols->size() >= mol.getNumBonds(), "bondSymbols too small"); std::vector tcolors; tcolors.resize(colors.size()); std::copy(colors.begin(), colors.end(), tcolors.begin()); dfsFindCycles(mol, atomIdx, inBondIdx, tcolors, ranks, atomOrders, atomRingClosures, bondsInPlay, bondSymbols); dfsBuildStack(mol, atomIdx, inBondIdx, colors, cycles, ranks, cyclesAvailable, molStack, atomOrders, bondVisitOrders, atomRingClosures, atomTraversalBondOrder, bondsInPlay, bondSymbols); } bool canHaveDirection(const Bond *bond) { PRECONDITION(bond, "bad bond"); Bond::BondType bondType = bond->getBondType(); return (bondType == Bond::SINGLE || bondType == Bond::AROMATIC); } void clearBondDirs(ROMol &mol, Bond *refBond, const Atom *fromAtom, INT_VECT &bondDirCounts, INT_VECT &atomDirCounts, const INT_VECT &bondVisitOrders) { RDUNUSED_PARAM(bondVisitOrders); PRECONDITION(bondDirCounts.size() >= mol.getNumBonds(), "bad dirCount size"); PRECONDITION(refBond, "bad bond"); PRECONDITION(&refBond->getOwningMol() == &mol, "bad bond"); PRECONDITION(fromAtom, "bad atom"); PRECONDITION(&fromAtom->getOwningMol() == &mol, "bad bond"); #if 0 std::copy(bondDirCounts.begin(),bondDirCounts.end(),std::ostream_iterator(std::cerr,", ")); std::cerr<<"\n"; std::copy(atomDirCounts.begin(),atomDirCounts.end(),std::ostream_iterator(std::cerr,", ")); std::cerr<<"\n"; std::cerr<<"cBD: bond: "<getIdx()<<" atom: "<getIdx()<<": "; #endif ROMol::OEDGE_ITER beg, end; boost::tie(beg, end) = mol.getAtomBonds(fromAtom); bool nbrPossible = false, adjusted = false; while (beg != end) { Bond *oBond = mol[*beg].get(); // std::cerr<<" >>"<getIdx()<<" "<getIdx()]<<"-"<getIdx()]<<" // "<getBeginAtomIdx()]<<"-"<getEndAtomIdx()]<getIdx()] >= bondDirCounts[refBond->getIdx()]) && atomDirCounts[oBond->getBeginAtomIdx()] != 1 && atomDirCounts[oBond->getEndAtomIdx()] != 1) { adjusted = true; bondDirCounts[oBond->getIdx()] -= 1; if (!bondDirCounts[oBond->getIdx()]) { // no one is setting the direction here: oBond->setBondDir(Bond::NONE); atomDirCounts[oBond->getBeginAtomIdx()] -= 1; atomDirCounts[oBond->getEndAtomIdx()] -= 1; // std::cerr<<"ob:"<getIdx()<<" "; } } } beg++; } if (nbrPossible && !adjusted && atomDirCounts[refBond->getBeginAtomIdx()] != 1 && atomDirCounts[refBond->getEndAtomIdx()] != 1) { // we found a neighbor that could have directionality set, // but it had a lower bondDirCount than us, so we must // need to be adjusted: bondDirCounts[refBond->getIdx()] -= 1; if (!bondDirCounts[refBond->getIdx()]) { refBond->setBondDir(Bond::NONE); atomDirCounts[refBond->getBeginAtomIdx()] -= 1; atomDirCounts[refBond->getEndAtomIdx()] -= 1; // std::cerr<<"rb:"<getIdx()<<" "; } } // std::cerr<= mol.getNumBonds(), "bad dirCount size"); #if 0 std::cerr<<"rRBDS: "; mol.debugMol(std::cerr); std::copy(bondDirCounts.begin(),bondDirCounts.end(),std::ostream_iterator(std::cerr,", ")); std::cerr<<"\n"; #endif // find bonds that have directions indicated that are redundant: for (MolStack::iterator msI = molStack.begin(); msI != molStack.end(); msI++) { if (msI->type == MOL_STACK_BOND) { Bond *tBond = msI->obj.bond; const Atom *canonBeginAtom = mol.getAtomWithIdx(msI->number); const Atom *canonEndAtom = mol.getAtomWithIdx(tBond->getOtherAtomIdx(msI->number)); if (canHaveDirection(tBond) && bondDirCounts[tBond->getIdx()] >= 1) { // start by finding the double bond that sets tBond's direction: const Atom *dblBondAtom = NULL; ROMol::OEDGE_ITER beg, end; boost::tie(beg, end) = mol.getAtomBonds(canonBeginAtom); while (beg != end) { if (mol[*beg].get() != tBond && mol[*beg]->getBondType() == Bond::DOUBLE && mol[*beg]->getStereo() > Bond::STEREOANY) { dblBondAtom = canonBeginAtom; // tBond->getOtherAtom(canonBeginAtom); break; } beg++; } if (dblBondAtom != NULL) { clearBondDirs(mol, tBond, dblBondAtom, bondDirCounts, atomDirCounts, bondVisitOrders); } dblBondAtom = NULL; boost::tie(beg, end) = mol.getAtomBonds(canonEndAtom); while (beg != end) { if (mol[*beg].get() != tBond && mol[*beg]->getBondType() == Bond::DOUBLE && mol[*beg]->getStereo() > Bond::STEREOANY) { dblBondAtom = canonEndAtom; // tBond->getOtherAtom(canonEndAtom); break; } beg++; } if (dblBondAtom != NULL) { clearBondDirs(mol, tBond, dblBondAtom, bondDirCounts, atomDirCounts, bondVisitOrders); } } else if (tBond->getBondDir() != Bond::NONE) { // we aren't supposed to have a direction set, but we do: tBond->setBondDir(Bond::NONE); } } } } void canonicalizeFragment(ROMol &mol, int atomIdx, std::vector &colors, const UINT_VECT &ranks, MolStack &molStack, const boost::dynamic_bitset<> *bondsInPlay, const std::vector *bondSymbols, bool doIsomericSmiles) { PRECONDITION(colors.size() >= mol.getNumAtoms(), "vector too small"); PRECONDITION(ranks.size() >= mol.getNumAtoms(), "vector too small"); PRECONDITION(!bondsInPlay || bondsInPlay->size() >= mol.getNumBonds(), "bondsInPlay too small"); PRECONDITION(!bondSymbols || bondSymbols->size() >= mol.getNumBonds(), "bondSymbols too small"); int nAtoms = mol.getNumAtoms(); INT_VECT atomVisitOrders(nAtoms, 0); INT_VECT bondVisitOrders(mol.getNumBonds(), 0); INT_VECT bondDirCounts(mol.getNumBonds(), 0); INT_VECT atomDirCounts(nAtoms, 0); INT_VECT cyclesAvailable(MAX_CYCLES, 1); VECT_INT_VECT cycles(nAtoms); for (VECT_INT_VECT_I vviIt = cycles.begin(); vviIt != cycles.end(); ++vviIt) vviIt->resize(0); boost::dynamic_bitset<> ringStereoChemAdjusted(nAtoms); // make sure that we've done the stereo perception: if (!mol.hasProp(common_properties::_StereochemDone)) { MolOps::assignStereochemistry(mol, false); } // we need ring information; make sure findSSSR has been called before // if not call now if (!mol.getRingInfo()->isInitialized()) { MolOps::findSSSR(mol); } mol.getAtomWithIdx(atomIdx)->setProp( common_properties::_TraversalStartPoint, true); VECT_INT_VECT atomRingClosures(nAtoms); std::vector atomTraversalBondOrder(nAtoms); Canon::canonicalDFSTraversal( mol, atomIdx, -1, colors, cycles, ranks, cyclesAvailable, molStack, atomVisitOrders, bondVisitOrders, atomRingClosures, atomTraversalBondOrder, bondsInPlay, bondSymbols); PRECONDITION(!molStack.empty(), "Empty stack."); PRECONDITION(molStack.begin()->type == MOL_STACK_ATOM, "Corrupted stack. First element should be an atom."); // collect some information about traversal order on chiral atoms bool *numSwapsChiralAtoms = (bool *)malloc(nAtoms * sizeof(bool)); memset(numSwapsChiralAtoms, 0, nAtoms * sizeof(bool)); if (doIsomericSmiles) { for (ROMol::AtomIterator atomIt = mol.beginAtoms(); atomIt != mol.endAtoms(); ++atomIt) { if ((*atomIt)->getChiralTag() != Atom::CHI_UNSPECIFIED) { ROMol::OEDGE_ITER beg, end; boost::tie(beg, end) = mol.getAtomBonds(*atomIt); while (beg != end) { if (bondsInPlay && !(*bondsInPlay)[mol[*beg]->getIdx()]) { (*atomIt)->setProp(common_properties::_brokenChirality, true); break; } ++beg; } if ((*atomIt)->hasProp(common_properties::_brokenChirality)) { continue; } INT_LIST trueOrder = atomTraversalBondOrder[(*atomIt)->getIdx()]; // Test if the atom is in current fragment if (trueOrder.size() > 0) { int nSwaps = (*atomIt)->getPerturbationOrder(trueOrder); if ((*atomIt)->getDegree() == 3 && molStack.begin()->obj.atom->getIdx() == (*atomIt)->getIdx()) { // This is a special case. Here's an example: // Our internal representation of a chiral center is equivalent // to: // [C@](F)(O)(C)[H] // we'll be dumping it without the H, which entails a reordering: // [C@@H](F)(O)C ++nSwaps; } if (nSwaps % 2) { numSwapsChiralAtoms[(*atomIt)->getIdx()] = 1; } } } } } // remove the current directions on single bonds around double bonds: for (ROMol::BondIterator bondIt = mol.beginBonds(); bondIt != mol.endBonds(); ++bondIt) { Bond::BondDir dir = (*bondIt)->getBondDir(); if (dir == Bond::ENDDOWNRIGHT || dir == Bond::ENDUPRIGHT) { (*bondIt)->setBondDir(Bond::NONE); } } #if 0 std::cerr<<"<11111111"<"<\ntraversal stack:"<type == MOL_STACK_ATOM) std::cerr<<" atom: "<obj.atom->getIdx()<type == MOL_STACK_BOND) std::cerr<<" bond: "<obj.bond->getIdx()<<" "<number<<" "<obj.bond->getBeginAtomIdx()<<"-"<obj.bond->getEndAtomIdx()<<" order: "<obj.bond->getBondType()<type == MOL_STACK_RING) std::cerr<<" ring: "<number<type == MOL_STACK_BRANCH_OPEN) std::cerr<<" branch open"<type == MOL_STACK_BRANCH_CLOSE) std::cerr<<" branch close"<type == MOL_STACK_BOND && msI->obj.bond->getBondType() == Bond::DOUBLE && msI->obj.bond->getStereo() > Bond::STEREOANY) { if (msI->obj.bond->getStereoAtoms().size() >= 2) { Canon::canonicalizeDoubleBond(msI->obj.bond, bondVisitOrders, atomVisitOrders, bondDirCounts, atomDirCounts); } else { // bad stereo spec: msI->obj.bond->setStereo(Bond::STEREONONE); } } if (doIsomericSmiles) { if (msI->type == MOL_STACK_ATOM && msI->obj.atom->getChiralTag() != Atom::CHI_UNSPECIFIED && !msI->obj.atom->hasProp(common_properties::_brokenChirality)) { if (msI->obj.atom->hasProp(common_properties::_ringStereoAtoms)) { if (!ringStereoChemAdjusted[msI->obj.atom->getIdx()]) { msI->obj.atom->setChiralTag(Atom::CHI_TETRAHEDRAL_CCW); ringStereoChemAdjusted.set(msI->obj.atom->getIdx()); } const INT_VECT &ringStereoAtoms = msI->obj.atom->getProp( common_properties::_ringStereoAtoms); BOOST_FOREACH (int nbrV, ringStereoAtoms) { int nbrIdx = abs(nbrV) - 1; // Adjust the chiraliy flag of the ring stereo atoms according to // the first one if (!ringStereoChemAdjusted[nbrIdx] && atomVisitOrders[nbrIdx] > atomVisitOrders[msI->obj.atom->getIdx()]) { mol.getAtomWithIdx(nbrIdx) ->setChiralTag(msI->obj.atom->getChiralTag()); if (nbrV < 0) { mol.getAtomWithIdx(nbrIdx)->invertChirality(); } // Odd number of swaps for first chiral ring atom --> needs to be // swapped but we want to retain chirality if (numSwapsChiralAtoms[msI->obj.atom->getIdx()]) { // Odd number of swaps for chiral ring neighbor --> needs to be // swapped but we want to retain chirality if (!numSwapsChiralAtoms[nbrIdx]) { mol.getAtomWithIdx(nbrIdx)->invertChirality(); } } // Even number of swaps for first chiral ring atom --> don't need // to be swapped else { // Odd number of swaps for chiral ring neighbor --> needs to be // swapped if (numSwapsChiralAtoms[nbrIdx]) { mol.getAtomWithIdx(nbrIdx)->invertChirality(); } } ringStereoChemAdjusted.set(nbrIdx); } } } else { if (msI->obj.atom->getChiralTag() == Atom::CHI_TETRAHEDRAL_CW) { if ((numSwapsChiralAtoms[msI->obj.atom->getIdx()])) { msI->obj.atom->invertChirality(); } } else if (msI->obj.atom->getChiralTag() == Atom::CHI_TETRAHEDRAL_CCW) { if ((numSwapsChiralAtoms[msI->obj.atom->getIdx()])) { msI->obj.atom->invertChirality(); } } } } } } #if 0 std::cerr<<"<-----"<"<"<"<