// $Id$ // // Copyright (C) 2002-2008 Greg Landrum and Rational Discovery LLC // // @@ All Rights Reserved @@ // This file is part of the RDKit. // The contents are covered by the terms of the BSD license // which is included in the file license.txt, found at the root // of the RDKit source tree. // #include #include #include #include #include #include using namespace boost::lambda; namespace RankAtoms{ using namespace RDKit; // -------------------------------------------------- // // grabs the corresponding primes for the rank vector ranks // // -------------------------------------------------- void getPrimes(const INT_VECT &ranks,INT_VECT &res){ PRECONDITION(res.size()==0,""); res.reserve(ranks.size()); for(INT_VECT_CI ivCIt=ranks.begin();ivCIt!=ranks.end();++ivCIt){ res.push_back(firstThousandPrimes[*ivCIt]); } } // -------------------------------------------------- // // blows out any indices in indicesInPlay which correspond to unique ranks // // -------------------------------------------------- void updateInPlayIndices(const INT_VECT &ranks,INT_LIST &indicesInPlay){ INT_LIST::iterator ivIt=indicesInPlay.begin(); while(ivIt!=indicesInPlay.end()){ // find the first instance of this rank: INT_VECT::const_iterator pos=std::find(ranks.begin(),ranks.end(),ranks[*ivIt]); ++pos; // now check to see if there is at least one more: if( std::find(pos,ranks.end(),ranks[*ivIt])==ranks.end()){ INT_LIST::iterator tmpIt = ivIt; ++ivIt; indicesInPlay.erase(tmpIt); } else { ++ivIt; } } } // -------------------------------------------------- // // for each index in indicesInPlay, generate the products of the adjacent // elements // // The products are weighted by the order of the bond connecting the atoms. // // -------------------------------------------------- void calcAdjacentProducts(unsigned int nAtoms, const INT_VECT &valVect, double const *adjMat, const INT_LIST &indicesInPlay, DOUBLE_VECT &res, bool useSelf=true, double tol=1e-6){ PRECONDITION(valVect.size() >= nAtoms,""); PRECONDITION(res.size() == 0,""); PRECONDITION(adjMat,""); for(INT_LIST::const_iterator idxIt=indicesInPlay.begin(); idxIt != indicesInPlay.end(); ++idxIt){ double accum; if(useSelf) accum=valVect[*idxIt]; else accum=1.0; const unsigned int iTab = (*idxIt)*nAtoms; for(unsigned int j=0;jtol){ if(elem<2.-tol){ accum *= valVect[j]; } else { accum *= pow(static_cast(valVect[j]), static_cast(elem)); } } } res.push_back(accum); } } template void debugVect(const std::vector arg){ typename std::vector::const_iterator viIt; for(viIt=arg.begin();viIt!=arg.end();++viIt){ BOOST_LOG(rdDebugLog)<< *viIt << " "; } BOOST_LOG(rdDebugLog)<< std::endl; } // -------------------------------------------------- // // This is one round of the process from Step III in the Daylight // paper // // -------------------------------------------------- unsigned int iterateRanks(unsigned int nAtoms,INT_VECT &primeVect, DOUBLE_VECT &atomicVect, INT_LIST &indicesInPlay, double *adjMat, INT_VECT &ranks, VECT_INT_VECT *rankHistory=0,unsigned int stagnantTol=1){ PRECONDITION(!rankHistory||rankHistory->size()>=nAtoms,"bad rankHistory size"); bool done = false; unsigned int numClasses = countClasses(ranks); unsigned int lastNumClasses = 0; unsigned int nCycles = 0; unsigned int nStagnant=0; if(stagnantTol<0){ stagnantTol = nAtoms; } // // loop until either we finish or no improvement is seen // #ifdef VERBOSE_CANON for(unsigned int i=0;i:" << i << " " << ranks[i] << std::endl; } BOOST_LOG(rdDebugLog)<< "\t\t-*-*-*-*-" << std::endl; #endif while(!done && nCycles < nAtoms){ // determine which atomic indices are in play (which have duplicate ranks) if(rankHistory){ for(INT_LIST_CI idx=indicesInPlay.begin();idx!=indicesInPlay.end();++idx){ (*rankHistory)[*idx].push_back(ranks[*idx]); } } updateInPlayIndices(ranks,indicesInPlay); if(indicesInPlay.empty()) break; #ifdef VERYVERBOSE_CANON BOOST_LOG(rdDebugLog)<< "IN PLAY:" << std::endl; BOOST_LOG(rdDebugLog)<< "\t\t->"; for(INT_LIST::const_iterator tmpI=indicesInPlay.begin();tmpI != indicesInPlay.end();tmpI++){ BOOST_LOG(rdDebugLog)<< " " << *tmpI; } BOOST_LOG(rdDebugLog)<< std::endl; BOOST_LOG(rdDebugLog)<< "\t\t---------" << std::endl; #endif //------------------------- // Step (2): // Get the products of adjacent primes //------------------------- primeVect.resize(0); getPrimes(ranks,primeVect); atomicVect.resize(0); calcAdjacentProducts(nAtoms,primeVect,adjMat,indicesInPlay,atomicVect); #ifdef VERYVERBOSE_CANON BOOST_LOG(rdDebugLog)<< "primes: "; debugVect(primeVect); BOOST_LOG(rdDebugLog)<< "products: "; debugVect(atomicVect); #endif //------------------------- // Steps (3) and (4) // sort the products and count classes //------------------------- sortAndRankVect(nAtoms,atomicVect,indicesInPlay,ranks); lastNumClasses = numClasses; numClasses = countClasses(ranks); if(numClasses == lastNumClasses) nStagnant++; #ifdef VERYVERBOSE_CANON int tmpOff=0; for(unsigned int i=0;i stagnantTol) done = 1; nCycles++; } #ifdef VERBOSE_CANON BOOST_LOG(rdDebugLog)<< ">>>>>> done inner iteration. static: "<< nStagnant << " "; BOOST_LOG(rdDebugLog)<< nCycles << " " << nAtoms << " " << numClasses << std::endl; #ifdef VERYVERBOSE_CANON for(unsigned int i=0;i=mol.getNumAtoms(),"res vect too small"); unsigned int atsSoFar=0; for(ROMol::ConstAtomIterator atIt=mol.beginAtoms();atIt!=mol.endAtoms();atIt++){ Atom const *atom = *atIt; // FIX: this is just for debugging purposes: unsigned long invariant = atom->getIdx(); int nHs = atom->getTotalNumHs() % 8; int chg = abs(atom->getFormalCharge()) % 8; int chgSign = atom->getFormalCharge() > 0; int num = atom->getAtomicNum() % 128; int nConns = atom->getDegree() % 8; int deltaMass = static_cast(atom->getMass() - PeriodicTable::getTable()->getAtomicWeight(atom->getAtomicNum())); deltaMass += 8; if(deltaMass < 0) deltaMass = 0; else deltaMass = deltaMass % 16; // figure out the minimum-sized ring we're involved in int inRing = 0; if(atom->getOwningMol().getRingInfo()->numAtomRings(atom->getIdx())){ RingInfo *ringInfo=atom->getOwningMol().getRingInfo(); inRing=3; while(inRing<256){ if(ringInfo->isAtomInRingOfSize(atom->getIdx(),inRing)){ break; } else { inRing++; } } } inRing = inRing % 16; invariant = 0; invariant = (invariant << 3) | nConns; // we used to include the number of explicitHs, but that // didn't make much sense. TotalValence is another possible // discriminator here, but the information is essentially // redundant with nCons, num, and nHs. // invariant = (invariant << 4) | totalVal; invariant = (invariant << 7) | num; invariant = (invariant << 4) | deltaMass; invariant = (invariant << 3) | nHs; invariant = (invariant << 4) | inRing; invariant = (invariant << 3) | chg; invariant = (invariant << 1) | chgSign; if(includeChirality ){ int isR=0; if( atom->hasProp("_CIPCode")){ std::string cipCode; atom->getProp("_CIPCode",cipCode); if(cipCode=="R"){ isR=1; } else { isR=2; } } invariant = (invariant << 2) | isR; } // now deal with cis/trans - this is meant to address issue 174 // loop over the bonds on this atom and check if we have a double bond with // a chiral code marking if (includeChirality) { ROMol::OBOND_ITER_PAIR atomBonds = atom->getOwningMol().getAtomBonds(atom); int isT=0; while (atomBonds.first != atomBonds.second){ BOND_SPTR tBond = atom->getOwningMol()[*(atomBonds.first)]; if( (tBond->getBondType() == Bond::DOUBLE) && (tBond->getStereo()>Bond::STEREOANY )) { if (tBond->getStereo()==Bond::STEREOE) { isT = 1; } else if(tBond->getStereo()==Bond::STEREOZ) { isT=2; } break; } atomBonds.first++; } invariant = (invariant << 2) | isT; } res[atsSoFar++] = invariant; } } }// end of RankAtoms namespace namespace RDKit{ namespace MolOps { // -------------------------------------------------- // // Daylight canonicalization, based up on algorithm described in // JCICS 29, 97-101, (1989) // When appropriate, specific references are made to the algorithm // description in that paper. Steps refer to Table III of the paper // // -------------------------------------------------- void rankAtoms(const ROMol &mol,INT_VECT &ranks, bool breakTies, VECT_INT_VECT *rankHistory){ unsigned int i; unsigned int nAtoms = mol.getNumAtoms(); PRECONDITION(ranks.size()>=nAtoms,""); PRECONDITION(!rankHistory||rankHistory->size()>=nAtoms,"bad rankHistory size"); unsigned int stagnantTol=1; if(!mol.getRingInfo()->isInitialized()){ MolOps::findSSSR(mol); } if(nAtoms > 1){ double *adjMat = MolOps::getAdjacencyMatrix(mol, true); // ---------------------- // generate atomic invariants, Step (1) // ---------------------- INVAR_VECT invariants; invariants.resize(nAtoms); RankAtoms::buildAtomInvariants(mol,invariants,true); #ifdef VERBOSE_CANON BOOST_LOG(rdDebugLog)<< "invariants:" << std::endl; for(i=0;i