// $Id$ // // Copyright (C) 2001-2009 Greg Landrum and Rational Discovery LLC // // @@ All Rights Reserved @@ // This file is part of the RDKit. // The contents are covered by the terms of the BSD license // which is included in the file license.txt, found at the root // of the RDKit source tree. // #include #include #include #include #include #include // end of namespace Kekulize namespace RDKit { // Local utility namespace namespace { void backTrack(RWMol &mol, INT_INT_DEQ_MAP &options, int lastOpt, INT_VECT &done, INT_DEQUE &aqueue, boost::dynamic_bitset<> &dBndCands, boost::dynamic_bitset<> &dBndAdds) { // so we made a wrong turn at the lastOpt //remove on done list that comes after the lastOpt including itself INT_VECT_I ei = std::find(done.begin(), done.end(), lastOpt); INT_VECT tdone; tdone.insert(tdone.end(),done.begin(),ei); INT_VECT_CRI eri = std::find(done.rbegin(), done.rend(), lastOpt); ++eri; // and push them back onto the stack for (INT_VECT_CRI ri = done.rbegin(); ri != eri; ++ri) { aqueue.push_front(*ri); } // remove any double bonds that were add since we passed through lastOpt Bond *bnd; unsigned int nbnds = mol.getNumBonds(); for (unsigned int bi = 0; bi < nbnds; bi++) { if (dBndAdds[bi]) { bnd = mol.getBondWithIdx(bi); int aid1 = bnd->getBeginAtomIdx(); int aid2 = bnd->getEndAtomIdx(); // if one of these atoms has been dealt with before lastOpt // we don't have to chnage the double bond addition if ( (std::find(tdone.begin(), tdone.end(), aid1) == tdone.end()) && (std::find(tdone.begin(), tdone.end(), aid2) == tdone.end()) ) { // otherwise strip the double bond and set it back to single // and add the atoms to candidate for double bonds dBndAdds[bi]=0; bnd->setBondType(Bond::SINGLE); dBndCands[aid1]=1; dBndCands[aid2]=1; } } } done = tdone; } void markDbondCands(RWMol &mol, const INT_VECT &allAtms, boost::dynamic_bitset<> &dBndCands, INT_VECT &questions, INT_VECT &done){ // ok this function does more than mark atoms that are candidates for // double bonds during kekulization // - check that an non aromatic atom does not have any aromatic bonds // - marks all aromatic bonds to single bonds // - marks atoms that can take a double bond bool hasAromaticOrDummyAtom=false; for (INT_VECT_CI adx = allAtms.begin(); adx != allAtms.end(); ++adx) { if(mol.getAtomWithIdx(*adx)->getIsAromatic() || !mol.getAtomWithIdx(*adx)->getAtomicNum()){ hasAromaticOrDummyAtom=true; break; } } // if there's not at least one atom in the ring that's // marked as being aromatic or a dummy, // there's no point in continuing: if(!hasAromaticOrDummyAtom) return; std::vector makeSingle; for (INT_VECT_CI adx = allAtms.begin(); adx != allAtms.end(); ++adx) { Atom *at = mol.getAtomWithIdx(*adx); if (!at->getIsAromatic() && at->getAtomicNum() ) { done.push_back(*adx); // make sure all the bonds on this atom are also non aromatic // i.e. can't have aromatic bond onto a non-aromatic atom RWMol::OEDGE_ITER beg,end; boost::tie(beg,end) = mol.getAtomBonds(at); while (beg != end) { // ok we can't have an aromatic atom if (mol[*beg]->getIsAromatic()) { std::ostringstream errout; errout << "Aromatic bonds on non aromatic atom " << at->getIdx(); std::string msg = errout.str(); BOOST_LOG(rdErrorLog) << msg << std::endl; throw MolSanitizeException(msg); } ++beg; } continue; } // count the number of neighbors connected with single, // double, or aromatic bonds. Along the way, mark // bonds that we will later mark as being single: int sbo = 0; RWMol::OEDGE_ITER beg,end; boost::tie(beg,end) = mol.getAtomBonds(at); while (beg != end) { Bond *bond=mol[*beg].get(); if (bond->getIsAromatic() && (bond->getBondType()==Bond::SINGLE || bond->getBondType()==Bond::DOUBLE || bond->getBondType()==Bond::AROMATIC) ) { ++sbo; // mark this bond to be marked single later // we don't want to do right now because it can screw-up the // valence calculation to determine the number of hydrogens below makeSingle.push_back(bond); } else { sbo += (int)bond->getValenceContrib(at); } ++beg; } if(!at->getAtomicNum()){ // dummies always start as candidates to have a double bond: dBndCands[*adx]=1; // but they don't have to have one, so mark them as questionable: questions.push_back(*adx); } else { // for non dummies, it's a bit more work to figure out if they // can take a double bond: sbo += at->getTotalNumHs(); int dv = PeriodicTable::getTable()->getDefaultValence(at->getAtomicNum()); int chrg = at->getFormalCharge(); dv += chrg; int tbo = at->getExplicitValence() + at->getImplicitValence(); int nRadicals=at->getNumRadicalElectrons(); int totalDegree=at->getDegree()+at->getImplicitValence(); const INT_VECT &valList = PeriodicTable::getTable()->getValenceList(at->getAtomicNum()); unsigned int vi = 1; while ( tbo>dv && vi0 ) { dv = valList[vi] + chrg; ++vi; } if(totalDegree+nRadicals>=dv){ // if our degree + nRadicals exceeds the default valence, // there's no way we can take a double bond, just continue. continue; } if( dv==(sbo+1) ){ dBndCands[*adx]=1; } else if( !nRadicals && at->getNoImplicit() && dv==(sbo+2) ){ // special case: there is currently no radical on the atom, but if // if we allow one then this is a candidate: dBndCands[*adx]=1; } } }// loop over all atoms in the fused system // now turn all the aromatic bond in this fused system to single for (std::vector::iterator bi=makeSingle.begin(); bi!=makeSingle.end();++bi){ (*bi)->setBondType(Bond::SINGLE); } } bool kekulizeWorker(RWMol &mol,const INT_VECT &allAtms, boost::dynamic_bitset<> dBndCands, boost::dynamic_bitset<> dBndAdds, INT_VECT done, unsigned int maxBackTracks){ INT_DEQUE astack; INT_INT_DEQ_MAP options; int lastOpt=-1; // ok the algorithm goes something like this // - start with an atom that has been marked aromatic before // - check if it can have a double bond // - add its neighbors to the stack // - check if one of its neighbors can also have a double bond // - if yes add a double bond. // - if multiple neighbors can have double bonds - add them to a // options stack we may have to retrace out path if we chose the // wrong neighbor to add the double bond // - if double bond added update the candidates for double bond // - move to the next atom on the stack and repeat the process // - if an atom that can have multiple a double bond has no // neighbors that can take double bond - we made a mistake // earlier by picking a wrong candidate for double bond // - in this case back track to where we made the mistake int curr; INT_DEQUE btmoves; unsigned int numBT = 0; // number of back tracks so far while ( (done.size() < allAtms.size()) || (astack.size() > 0) ) { // pick a curr atom to work with if (astack.size() > 0) { curr = astack.front(); astack.pop_front(); } else { for (INT_VECT_CI ai = allAtms.begin(); ai != allAtms.end(); ++ai){ if (std::find(done.begin(), done.end(), (*ai)) == done.end()) { curr = (*ai); break; } } } done.push_back(curr); // loop over the neighbors if we can add double bonds or // simply push them onto the stack INT_DEQUE opts; bool cCand = false; if (dBndCands[curr]) { cCand = true; } int ncnd; // if we are here because of backtracking if (options.find(curr) != options.end()) { opts = options[curr]; CHECK_INVARIANT(opts.size() > 0, ""); } else { RWMol::ADJ_ITER nbrIdx,endNbrs; boost::tie(nbrIdx,endNbrs) = mol.getAtomNeighbors(mol.getAtomWithIdx(curr)); while (nbrIdx != endNbrs) { // ignore if the neighbor has already been dealt with before if (std::find(done.begin(), done.end(), static_cast(*nbrIdx)) != done.end()) { ++nbrIdx; continue; } // ignore if the neighbor is not part of the fused system if (std::find(allAtms.begin(),allAtms.end(), static_cast(*nbrIdx)) == allAtms.end()) { ++nbrIdx; continue; } // if the neighbor is not on the stack add it if (std::find(astack.begin(), astack.end(), static_cast(*nbrIdx)) == astack.end()) { astack.push_back(*nbrIdx); } // check if the neighbor is also a candidate for a double bond if (cCand && dBndCands[*nbrIdx] ){ opts.push_back(*nbrIdx); } // end of curr atoms can have a double bond ++nbrIdx; } // end of looping over neighbors } // now add a double bond from current to one of the neighbors if we can if (cCand) { if (opts.size() > 0) { ncnd = opts.front(); opts.pop_front(); Bond *bnd = mol.getBondBetweenAtoms(curr, ncnd); bnd->setBondType(Bond::DOUBLE); // remove current and the neighbor from the dBndCands list dBndCands[curr]=0; dBndCands[ncnd]=0; // add them to the list of bonds to which have been made double dBndAdds[bnd->getIdx()]=1; // if this is an atom we previously visted and picked we // simply tried a different option now, overwrite the options // stored for this atoms if (options.find(curr) != options.end() ) { if(opts.size() == 0){ options.erase(curr); btmoves.pop_back(); if (btmoves.size() > 0) { lastOpt = btmoves.back(); } else { lastOpt = -1; } } else { options[curr] = opts; } } else { // this is new atoms we are trying and have other // neighbors as options to add double bond store this to // the options stack, we may have made a mistake in // which one we chose and have to return here if (opts.size() > 0) { lastOpt = curr; btmoves.push_back(lastOpt); options[curr] = opts; } } } // end of adding a double bond else { // we have an atom that should be getting a double bond // but none of the neighbors can take one. Most likely // because of a wrong choice earlier so back track if ((lastOpt >= 0) && (numBT < maxBackTracks)) { //std::cerr << "PRE BACKTRACK" << std::endl; //mol.debugMol(std::cerr); backTrack(mol, options, lastOpt, done, astack, dBndCands, dBndAdds); //std::cerr << "POST BACKTRACK" << std::endl; //mol.debugMol(std::cerr); numBT++; } else { return false; } } // end of else try to backtrack } // end of curr atom atom being a cand for double bond } // end of while we are not done with all atoms return true; } class QuestionEnumerator{ public: QuestionEnumerator(const INT_VECT &questions) : d_questions(questions) , d_pos(1) {}; INT_VECT next() { INT_VECT res; if(d_pos >= (0x1u< dBndCands, INT_VECT &questions, unsigned int maxBackTracks){ boost::dynamic_bitset<> atomsInPlay(mol.getNumAtoms()); for(INT_VECT_CI ai=allAtms.begin();ai!=allAtms.end();++ai){ atomsInPlay[*ai]=1; } bool kekulized=false; QuestionEnumerator qEnum(questions); while(!kekulized && questions.size()){ boost::dynamic_bitset<> dBndAdds(mol.getNumBonds()); INT_VECT done; #if 1 // reset the state: all aromatic bonds are remarked to single: for(RWMol::BondIterator bi=mol.beginBonds();bi!=mol.endBonds();++bi){ if((*bi)->getIsAromatic() && (*bi)->getBondType()!=Bond::SINGLE && atomsInPlay[(*bi)->getBeginAtomIdx()] && atomsInPlay[(*bi)->getEndAtomIdx()] ){ (*bi)->setBondType(Bond::SINGLE); } } #endif // pick a new permutation of the questionable atoms: const INT_VECT &switchOff=qEnum.next(); if(!switchOff.size()) break; boost::dynamic_bitset<> tCands=dBndCands; for(INT_VECT_CI it=switchOff.begin();it!=switchOff.end();++it){ tCands[*it]=0; } #if 0 std::cerr<<"permute: "; for (boost::dynamic_bitset<>::size_type i = 0; i < tCands.size(); ++i){ std::cerr << tCands[i]; } std::cerr< dBndCands(nats); boost::dynamic_bitset<> dBndAdds(nbnds); markDbondCands(mol, allAtms, dBndCands, questions, done); #if 0 std::cerr << "candidates: "; for(int i=0;igetImplicitValence()); } // A bit on the state of the molecule at this point // - aromatic and non aromatic atoms and bonds may be mixed up // - for all aromatic bonds it is assumed that that both the following // are true: // - getIsAromatic returns true // - getBondType return aromatic // - all aromatic atoms return true for "getIsAromatic" // first find the all the simple rings in the molecule VECT_INT_VECT arings; MolOps::symmetrizeSSSR(mol, arings); // convert the rings to bonds ids VECT_INT_VECT brings; RingUtils::convertToBonds(arings, brings, mol); // make a the neighbor map for the rings i.e. a ring is a // neighbor to another candidate ring if it shares at least // one bond // useful to figure out fused systems INT_INT_VECT_MAP neighMap; RingUtils::makeRingNeighborMap(brings, neighMap); int curr = 0; int cnrs = arings.size(); boost::dynamic_bitset<> fusDone(cnrs); while (curr < cnrs) { INT_VECT fused; RingUtils::pickFusedRings(curr, neighMap, fused, fusDone); VECT_INT_VECT frings; for (INT_VECT_CI ci = fused.begin(); ci != fused.end();++ci) { frings.push_back(arings[*ci]); } kekulizeFused(mol, frings, maxBackTracks); int rix; for (rix = 0; rix < cnrs; rix++) { if (!fusDone[rix]) { curr = rix; break; } } if (rix == cnrs) { break; } } if (markAtomsBonds) { // if we want the atoms and bonds to be marked non-aromatic do // that here. for (ROMol::AtomIterator ai = mol.beginAtoms(); ai != mol.endAtoms(); ++ai) { if((*ai)->getIsAromatic()){ if(!mol.getRingInfo()->numAtomRings((*ai)->getIdx())){ std::ostringstream errout; errout << "non-ring atom " << (*ai)->getIdx()<<" marked aromatic"; std::string msg = errout.str(); BOOST_LOG(rdErrorLog) << msg<< std::endl; throw MolSanitizeException(msg); } (*ai)->setIsAromatic(false); } } for (ROMol::BondIterator bi = mol.beginBonds(); bi != mol.endBonds(); ++bi) { (*bi)->setIsAromatic(false); } } for (ROMol::BondIterator bi=mol.beginBonds(); bi != mol.endBonds(); ++bi) { // by now the bondtype should have already changed from aromatic if (markAtomsBonds && (*bi)->getBondType() == Bond::AROMATIC) { std::ostringstream errout; errout << "Kekulization somehow did not convert bond " << (*bi)->getIdx(); std::string msg = errout.str(); BOOST_LOG(rdErrorLog) << msg<< std::endl; throw MolSanitizeException(msg); } } // ok some error checking here force a implicit valence // calculation that should do some error checking by itself. In // addition compare them to what they were before kekulizing int i = 0; for (ROMol::AtomIterator ai = mol.beginAtoms(); ai != mol.endAtoms(); ++ai) { int val = (*ai)->getImplicitValence(); if (val != valences[i]) { std::ostringstream errout; errout << "Kekulization somehow screwed up valence on " << (*ai)->getIdx() <<": "<