// $Id$ // // Copyright (C) 2002-2012 Greg Landrum and Rational Discovery LLC // // @@ All Rights Reserved @@ // This file is part of the RDKit. // The contents are covered by the terms of the BSD license // which is included in the file license.txt, found at the root // of the RDKit source tree. // #include #include #include #include #include #include #include #include #include #include using namespace boost::lambda; //#define VERBOSE_CANON 1 //#define VERYVERBOSE_CANON 1 namespace RankAtoms{ using namespace RDKit; // -------------------------------------------------- // // grabs the corresponding primes for the rank vector ranks // // -------------------------------------------------- void getPrimes(const INT_VECT &ranks,INT_VECT &res){ PRECONDITION(res.size()==0,""); res.reserve(ranks.size()); for(INT_VECT_CI ivCIt=ranks.begin();ivCIt!=ranks.end();++ivCIt){ res.push_back(firstThousandPrimes[(*ivCIt)%NUM_PRIMES_AVAIL]); } } // -------------------------------------------------- // // blows out any indices in indicesInPlay which correspond to unique ranks // // -------------------------------------------------- void updateInPlayIndices(const INT_VECT &ranks,INT_LIST &indicesInPlay){ INT_LIST::iterator ivIt=indicesInPlay.begin(); while(ivIt!=indicesInPlay.end()){ // find the first instance of this rank: INT_VECT::const_iterator pos=std::find(ranks.begin(),ranks.end(),ranks[*ivIt]); ++pos; // now check to see if there is at least one more: if( std::find(pos,ranks.end(),ranks[*ivIt])==ranks.end()){ INT_LIST::iterator tmpIt = ivIt; ++ivIt; indicesInPlay.erase(tmpIt); } else { ++ivIt; } } } // -------------------------------------------------- // // for each index in indicesInPlay, generate the products of the adjacent // elements // // The products are weighted by the order of the bond connecting the atoms. // // -------------------------------------------------- void calcAdjacentProducts(unsigned int nAtoms, const INT_VECT &valVect, double const *adjMat, const INT_LIST &indicesInPlay, DOUBLE_VECT &res, bool useSelf=true, double tol=1e-6){ PRECONDITION(valVect.size() >= nAtoms,""); PRECONDITION(res.size() == 0,""); PRECONDITION(adjMat,""); for(INT_LIST::const_iterator idxIt=indicesInPlay.begin(); idxIt != indicesInPlay.end(); ++idxIt){ double accum; if(useSelf) accum=valVect[*idxIt]; else accum=1.0; const unsigned int iTab = (*idxIt)*nAtoms; for(unsigned int j=0;jtol){ if(elem<2.-tol){ accum *= valVect[j]; } else { accum *= pow(static_cast(valVect[j]), static_cast(elem)); } } } res.push_back(accum); } } template void debugVect(const std::vector arg){ typename std::vector::const_iterator viIt; for(viIt=arg.begin();viIt!=arg.end();++viIt){ BOOST_LOG(rdDebugLog)<< *viIt << " "; } BOOST_LOG(rdDebugLog)<< std::endl; } // -------------------------------------------------- // // This is one round of the process from Step III in the Daylight // paper // // -------------------------------------------------- unsigned int iterateRanks(unsigned int nAtoms,INT_VECT &primeVect, DOUBLE_VECT &atomicVect, INT_LIST &indicesInPlay, double *adjMat, INT_VECT &ranks, VECT_INT_VECT *rankHistory,unsigned int stagnantTol){ PRECONDITION(!rankHistory||rankHistory->size()>=nAtoms,"bad rankHistory size"); bool done = false; unsigned int numClasses = countClasses(ranks); unsigned int lastNumClasses = 0; unsigned int nCycles = 0; unsigned int nStagnant=0; // // loop until either we finish or no improvement is seen // #ifdef VERBOSE_CANON for(unsigned int i=0;i:" << i << " " << ranks[i] << std::endl; } BOOST_LOG(rdDebugLog)<< "\t\t-*-*-*-*-" << std::endl; #endif while(!done && nCycles < nAtoms){ // determine which atomic indices are in play (which have duplicate ranks) if(rankHistory){ for(INT_LIST_CI idx=indicesInPlay.begin();idx!=indicesInPlay.end();++idx){ (*rankHistory)[*idx].push_back(ranks[*idx]); } } updateInPlayIndices(ranks,indicesInPlay); if(indicesInPlay.empty()) break; #ifdef VERYVERBOSE_CANON BOOST_LOG(rdDebugLog)<< "IN PLAY:" << std::endl; BOOST_LOG(rdDebugLog)<< "\t\t->"; for(INT_LIST::const_iterator tmpI=indicesInPlay.begin();tmpI != indicesInPlay.end();tmpI++){ BOOST_LOG(rdDebugLog)<< " " << *tmpI; } BOOST_LOG(rdDebugLog)<< std::endl; BOOST_LOG(rdDebugLog)<< "\t\t---------" << std::endl; #endif //------------------------- // Step (2): // Get the products of adjacent primes //------------------------- primeVect.resize(0); getPrimes(ranks,primeVect); atomicVect.resize(0); calcAdjacentProducts(nAtoms,primeVect,adjMat,indicesInPlay,atomicVect,false); #ifdef VERYVERBOSE_CANON BOOST_LOG(rdDebugLog)<< "primes: "; debugVect(primeVect); BOOST_LOG(rdDebugLog)<< "products: "; debugVect(atomicVect); #endif //------------------------- // Steps (3) and (4) // sort the products and count classes //------------------------- sortAndRankVect(nAtoms,atomicVect,indicesInPlay,ranks); lastNumClasses = numClasses; numClasses = countClasses(ranks); if(numClasses == lastNumClasses) nStagnant++; #ifdef VERYVERBOSE_CANON int tmpOff=0; for(unsigned int i=0;i stagnantTol) done = 1; nCycles++; } #ifdef VERBOSE_CANON BOOST_LOG(rdDebugLog)<< ">>>>>> done inner iteration. static: "<< nStagnant << " "; BOOST_LOG(rdDebugLog)<< nCycles << " " << nAtoms << " " << numClasses << std::endl; #ifdef VERYVERBOSE_CANON for(unsigned int i=0;isize()>=nAtoms,"bad rankHistory size"); bool done = false; unsigned int numClasses = countClasses(ranks); unsigned int lastNumClasses = 0; unsigned int nCycles = 0; unsigned int nStagnant=0; // // loop until either we finish or no improvement is seen // #ifdef VERBOSE_CANON for(unsigned int i=0;i:" << i << " " << ranks[i] << std::endl; } BOOST_LOG(rdDebugLog)<< "\t\t-*-*-*-*-" << std::endl; #endif while(!done && nCycles < nAtoms){ // determine which atomic indices are in play (which have duplicate ranks) if(rankHistory){ BOOST_FOREACH(int idx,indicesInPlay){ (*rankHistory)[idx].push_back(ranks[idx]); } } updateInPlayIndices(ranks,indicesInPlay); if(indicesInPlay.empty()) break; #ifdef VERYVERBOSE_CANON BOOST_LOG(rdDebugLog)<< "IN PLAY:" << std::endl; BOOST_LOG(rdDebugLog)<< "\t\t->"; for(INT_LIST::const_iterator tmpI=indicesInPlay.begin();tmpI != indicesInPlay.end();tmpI++){ BOOST_LOG(rdDebugLog)<< " " << *tmpI; } BOOST_LOG(rdDebugLog)<< std::endl; BOOST_LOG(rdDebugLog)<< "\t\t---------" << std::endl; #endif //------------------------- // Step (2): // Get the products of adjacent primes //------------------------- primeVect.resize(0); getPrimes(ranks,primeVect); atomicVect.resize(0); calcAdjacentProducts(nAtoms,primeVect,adjMat,indicesInPlay,atomicVect,false); #ifdef VERYVERBOSE_CANON BOOST_LOG(rdDebugLog)<< "primes: "; debugVect(primeVect); BOOST_LOG(rdDebugLog)<< "products: "; debugVect(atomicVect); #endif unsigned int p=0; BOOST_FOREACH(int idx,indicesInPlay){ nRanks[idx].push_back(atomicVect[p++]); } #ifdef VERYVERBOSE_CANON for(int idx=0;idx(std::cerr," ")); std::cerr<<"\n"; } #endif //------------------------- // Steps (3) and (4) // sort the products and count classes //------------------------- rankVect(nRanks,ranks); //sortAndRankVect2(nRanks,indicesInPlay,ranks); lastNumClasses = numClasses; numClasses = countClasses(ranks); if(numClasses == lastNumClasses) nStagnant++; #ifdef VERYVERBOSE_CANON int tmpOff=0; for(unsigned int i=0;i stagnantTol) done = 1; nCycles++; } #ifdef VERBOSE_CANON BOOST_LOG(rdDebugLog)<< ">>>>>> done inner iteration. static: "<< nStagnant << " "; BOOST_LOG(rdDebugLog)<< nCycles << " " << nAtoms << " " << numClasses << std::endl; #ifdef VERYVERBOSE_CANON for(unsigned int i=0;i &res, bool includeChirality, bool includeIsotopes){ PRECONDITION(res.size()>=mol.getNumAtoms(),"res vect too small"); unsigned int atsSoFar=0; for(ROMol::ConstAtomIterator atIt=mol.beginAtoms();atIt!=mol.endAtoms();atIt++){ Atom const *atom = *atIt; int nHs = atom->getTotalNumHs() % 8; int chg = abs(atom->getFormalCharge()) % 8; int chgSign = atom->getFormalCharge() > 0; int num = atom->getAtomicNum() % 128; int nConns = atom->getDegree() % 8; int deltaMass=0; if(includeIsotopes && atom->getIsotope()){ deltaMass = static_cast(atom->getIsotope() - PeriodicTable::getTable()->getMostCommonIsotope(atom->getAtomicNum())); deltaMass += 128; if(deltaMass < 0) deltaMass = 0; else deltaMass = deltaMass % 256; } // figure out the minimum-sized ring we're involved in int inRing = 0; if(atom->getOwningMol().getRingInfo()->numAtomRings(atom->getIdx())){ RingInfo *ringInfo=atom->getOwningMol().getRingInfo(); inRing=3; while(inRing<256){ if(ringInfo->isAtomInRingOfSize(atom->getIdx(),inRing)){ break; } else { inRing++; } } } inRing = inRing % 16; boost::uint64_t invariant = 0; invariant = (invariant << 3) | nConns; // we used to include the number of explicitHs, but that // didn't make much sense. TotalValence is another possible // discriminator here, but the information is essentially // redundant with nCons, num, and nHs. // invariant = (invariant << 4) | totalVal; invariant = (invariant << 7) | num; invariant = (invariant << 8) | deltaMass; invariant = (invariant << 3) | nHs; invariant = (invariant << 4) | inRing; invariant = (invariant << 3) | chg; invariant = (invariant << 1) | chgSign; if(includeChirality ){ int isR=0; if( atom->hasProp("_CIPCode")){ std::string cipCode; atom->getProp("_CIPCode",cipCode); if(cipCode=="R"){ isR=1; } else { isR=2; } } invariant = (invariant << 2) | isR; } // now deal with cis/trans - this is meant to address issue 174 // loop over the bonds on this atom and check if we have a double bond with // a chiral code marking if (includeChirality) { ROMol::OBOND_ITER_PAIR atomBonds = atom->getOwningMol().getAtomBonds(atom); int isT=0; while (atomBonds.first != atomBonds.second){ BOND_SPTR tBond = atom->getOwningMol()[*(atomBonds.first)]; if( (tBond->getBondType() == Bond::DOUBLE) && (tBond->getStereo()>Bond::STEREOANY )) { if (tBond->getStereo()==Bond::STEREOE) { isT = 1; } else if(tBond->getStereo()==Bond::STEREOZ) { isT=2; } break; } atomBonds.first++; } invariant = (invariant << 2) | isT; } res[atsSoFar++] = invariant; } if(includeChirality){ // ring stereochemistry boost::dynamic_bitset<> adjusted(mol.getNumAtoms()); for(ROMol::ConstAtomIterator atIt=mol.beginAtoms();atIt!=mol.endAtoms();atIt++){ Atom const *atom = *atIt; res[atom->getIdx()] = res[atom->getIdx()]<<2; } for(ROMol::ConstAtomIterator atIt=mol.beginAtoms();atIt!=mol.endAtoms();atIt++){ Atom const *atom = *atIt; if((atom->getChiralTag()==Atom::CHI_TETRAHEDRAL_CW || atom->getChiralTag()==Atom::CHI_TETRAHEDRAL_CCW) && atom->hasProp("_ringStereoAtoms")){ //atom->hasProp("_CIPRank") && //!atom->hasProp("_CIPCode")){ ROMol::ADJ_ITER beg,end; boost::tie(beg,end) = mol.getAtomNeighbors(atom); unsigned int nCount=0; while(beg!=end){ unsigned int nbrIdx=mol[*beg]->getIdx(); if(!adjusted[nbrIdx]){ res[nbrIdx] |= nCount%4; adjusted.set(nbrIdx); } ++nCount; ++beg; } } } } } // -------------------------------------------------- // // Calculates invariants for the atoms of a molecule // // NOTE: if the atom has not had chirality info pre-calculated, it doesn't // much matter what value includeChirality has! // -------------------------------------------------- void buildAtomInvariants(const ROMol &mol,INVAR_VECT &res, bool includeChirality, bool includeIsotopes){ PRECONDITION(res.size()>=mol.getNumAtoms(),"res vect too small"); std::vector tres(mol.getNumAtoms()); buildAtomInvariants(mol,tres,includeChirality,includeIsotopes); for(unsigned int i=0;i &atomsToUse, const boost::dynamic_bitset<> &bondsToUse, const std::vector *atomSymbols ){ PRECONDITION(res.size()>=mol.getNumAtoms(),"res vect too small"); std::vector degrees(mol.getNumAtoms(),0); for(unsigned int i=0;igetBeginAtomIdx()]++; degrees[bnd->getEndAtomIdx()]++; } } for(ROMol::ConstAtomIterator atIt=mol.beginAtoms();atIt!=mol.endAtoms();++atIt){ Atom const *atom = *atIt; int aIdx=atom->getIdx(); if(!atomsToUse[aIdx]){ res[aIdx] = 0; continue; } boost::uint64_t invariant = 0; int nConns = degrees[aIdx]% 8; invariant = (invariant << 3) | nConns; if(!atomSymbols){ int chg = abs(atom->getFormalCharge()) % 8; int chgSign = atom->getFormalCharge() > 0; int num = atom->getAtomicNum() % 128; int deltaMass=0; if(atom->getIsotope()){ deltaMass = static_cast(atom->getIsotope() - PeriodicTable::getTable()->getMostCommonIsotope(atom->getAtomicNum())); deltaMass += 128; if(deltaMass < 0) deltaMass = 0; else deltaMass = deltaMass % 256; } invariant = (invariant << 7) | num; invariant = (invariant << 8) | deltaMass; invariant = (invariant << 3) | chg; invariant = (invariant << 1) | chgSign; invariant = (invariant << 1) | atom->getIsAromatic(); } else { const std::string &symb=(*atomSymbols)[aIdx]; boost::uint32_t hsh=gboost::hash_range(symb.begin(),symb.end()); invariant = (invariant << 20) | (hsh%(1<<20)); } // figure out the minimum-sized ring we're involved in int inRing = mol.getRingInfo()->minAtomRingSize(aIdx); inRing = inRing % 16; invariant = (invariant << 4) | inRing; if(includeChirality ){ int isR=0; if( atom->hasProp("_CIPCode")){ std::string cipCode; atom->getProp("_CIPCode",cipCode); if(cipCode=="R"){ isR=1; } else { isR=2; } } invariant = (invariant << 2) | isR; } // now deal with cis/trans - this is meant to address issue 174 // loop over the bonds on this atom and check if we have a double bond with // a chiral code marking if (includeChirality) { ROMol::OBOND_ITER_PAIR atomBonds = mol.getAtomBonds(atom); int isT=0; while (atomBonds.first != atomBonds.second){ BOND_SPTR tBond = mol[*(atomBonds.first)]; atomBonds.first++; if(!bondsToUse[tBond->getIdx()]) continue; if( (tBond->getBondType() == Bond::DOUBLE) && (tBond->getStereo()>Bond::STEREOANY )) { if (tBond->getStereo()==Bond::STEREOE) { isT = 1; } else if(tBond->getStereo()==Bond::STEREOZ) { isT=2; } break; } } invariant = (invariant << 2) | isT; } res[aIdx] = invariant; } } }// end of RankAtoms namespace namespace RDKit{ namespace MolOps { // -------------------------------------------------- // // Daylight canonicalization, loosely based up on algorithm described in // JCICS 29, 97-101, (1989) // When appropriate, specific references are made to the algorithm // description in that paper. Steps refer to Table III of the paper // // -------------------------------------------------- void rankAtoms(const ROMol &mol,INT_VECT &ranks, bool breakTies, bool includeChirality, bool includeIsotopes, VECT_INT_VECT *rankHistory){ unsigned int i; unsigned int nAtoms = mol.getNumAtoms(); PRECONDITION(ranks.size()>=nAtoms,""); PRECONDITION(!rankHistory||rankHistory->size()>=nAtoms,"bad rankHistory size"); unsigned int stagnantTol=1; if(!mol.getRingInfo()->isInitialized()){ MolOps::findSSSR(mol); } if(nAtoms > 1){ double *adjMat = MolOps::getAdjacencyMatrix(mol, true); // ---------------------- // generate atomic invariants, Step (1) // ---------------------- INVAR_VECT invariants; invariants.resize(nAtoms); RankAtoms::buildAtomInvariants(mol,invariants,includeChirality,includeIsotopes); #ifdef VERBOSE_CANON BOOST_LOG(rdDebugLog)<< "invariants:" << std::endl; for(i=0;i &atomsToUse, const boost::dynamic_bitset<> &bondsToUse, const std::vector *atomSymbols, const std::vector *bondSymbols, bool breakTies, VECT_INT_VECT *rankHistory){ unsigned int nAtoms = mol.getNumAtoms(); unsigned int nActiveAtoms = atomsToUse.count(); PRECONDITION(ranks.size()>=nAtoms,""); PRECONDITION(!atomSymbols||atomSymbols->size()>=nAtoms,"bad atomSymbols"); PRECONDITION(!rankHistory||rankHistory->size()>=nAtoms,"bad rankHistory size"); PRECONDITION(mol.getRingInfo()->isInitialized(),"no ring information present"); PRECONDITION(!rankHistory,"rankHistory not currently supported."); unsigned int stagnantTol=1; if(nActiveAtoms > 1){ // ---------------------- // generate atomic invariants, Step (1) // ---------------------- INVAR_VECT invariants; invariants.resize(nAtoms); RankAtoms::buildFragmentAtomInvariants(mol,invariants,true, atomsToUse,bondsToUse, atomSymbols); INVAR_VECT tinvariants; tinvariants.resize(nActiveAtoms); unsigned int activeIdx=0; for(unsigned int aidx=0;aidx(tadjMat),0,nActiveAtoms*nActiveAtoms*sizeof(double)); if(!bondSymbols){ double *adjMat = MolOps::getAdjacencyMatrix(mol,true,0,true,0,&bondsToUse); activeIdx=0; for(unsigned int aidx=0;aidx tbranks(bondsToUse.size(), 0); for(unsigned int bidx=0;bidx(std::cerr," ")); std::cerr<(std::cerr," ")); std::cerr<getBeginAtomIdx(); unsigned int aidx2=bond->getEndAtomIdx(); unsigned int tidx1=0; for(unsigned int iidx=0;iidx"<