Files
rdkit/Code/GraphMol/new_canon.cpp
2015-02-12 18:08:03 +01:00

455 lines
16 KiB
C++

//
// Copyright (C) 2014 Greg Landrum
// Adapted from pseudo-code from Roger Sayle
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//
#include "new_canon.h"
#include <GraphMol/RDKitBase.h>
#include <boost/cstdint.hpp>
#include <boost/foreach.hpp>
#include <cstring>
#include <iostream>
#include <cassert>
namespace RDKit {
namespace Canon{
void CreateSinglePartition(unsigned int nAtoms,
int *order,
int *count,
canon_atom *atoms) {
for( unsigned int i=0; i<nAtoms; i++ ) {
atoms[i].index = 0;
order[i] = i;
count[i] = 0;
}
count[0] = nAtoms;
}
void ActivatePartitions(unsigned int nAtoms,
int *order,
int *count,
int &activeset,int *next,
int *changed) {
unsigned int i,j;
activeset = -1;
for( i=0; i<nAtoms; i++ )
next[i] = -2;
i = 0;
do {
j = order[i];
if( count[j] > 1 ) {
next[j] = activeset;
activeset = j;
i += count[j];
} else i++;
} while( i < nAtoms );
for( i=0; i<nAtoms; i++ ){
j = order[i];
int flag=1;
//#define SKIP_NODE_CHANGED_OPTIMIZATION 0
//#ifndef SKIP_NODE_CHANGED_OPTIMIZATION
// if(count[j]){
// std::cout << "j " << j << std::endl;
// flag=(next[j]!=-2);
// }
//#endif
changed[j]=flag;
}
}
void compareRingAtomsConcerningNumNeighbors(Canon::canon_atom *atoms, unsigned int nAtoms) {
for(unsigned idx = 0; idx < nAtoms; ++idx){
std::deque<int> neighbors;
neighbors.push_back(idx);
char *visited=(char *)malloc(nAtoms*sizeof(char));
memset(visited,0,nAtoms*sizeof(char));
unsigned count = 1;
std::vector<int> nextLevelNbrs;
std::set<int> lastLevelNbrs;
std::set<int> currentLevelNbrs;
while(!neighbors.empty()){
unsigned int revisitedNeighbors=0;
unsigned int numLevelNbrs=0;
nextLevelNbrs.resize(0);
while(!neighbors.empty()){
int nidx = neighbors.front();
const Canon::canon_atom &atom = atoms[nidx];
lastLevelNbrs.insert(nidx);
visited[nidx]=1;
neighbors.pop_front();
for(unsigned int j=0; j<atom.degree; j++){
int iidx = atom.nbrIds[j];
if(!visited[iidx]){
currentLevelNbrs.insert(iidx);
numLevelNbrs++;
visited[iidx]=1;
nextLevelNbrs.push_back(iidx);
}
}
}
for(std::set<int>::iterator iter=currentLevelNbrs.begin(); iter != currentLevelNbrs.end();iter++){
const Canon::canon_atom &natom = atoms[*iter];
for(unsigned int k=0; k<natom.degree; k++){
int jidx = natom.nbrIds[k];
if(currentLevelNbrs.count(jidx) || lastLevelNbrs.count(jidx)){
revisitedNeighbors+=1;
}
}
}
lastLevelNbrs.clear();
lastLevelNbrs.insert(currentLevelNbrs.begin(),currentLevelNbrs.end());
currentLevelNbrs.clear();
if(revisitedNeighbors < 10){
atoms[idx].revistedNeighbors = (atoms[idx].revistedNeighbors*10) + revisitedNeighbors;
}
else{
atoms[idx].revistedNeighbors = (atoms[idx].revistedNeighbors*100) + revisitedNeighbors;
}
if(numLevelNbrs < 0){
atoms[idx].neighborNum = (atoms[idx].neighborNum*10) +numLevelNbrs;
}
else{
atoms[idx].neighborNum = (atoms[idx].neighborNum*100) +numLevelNbrs;
}
neighbors.insert(neighbors.end(),nextLevelNbrs.begin(),nextLevelNbrs.end());
count++;
}
free(visited);
}
}
template <typename T>
void rankWithFunctor(T &ftor,
bool breakTies,
int *order, bool useSpecial=false){
const ROMol &mol=*ftor.dp_mol;
canon_atom *atoms=ftor.dp_atoms;
unsigned int nAts=mol.getNumAtoms();
int *count=(int *)malloc(nAts*sizeof(int));
int activeset;
int *next=(int *)malloc(nAts*sizeof(int));
int *changed=(int *)malloc(nAts*sizeof(int));
char *touched=(char *)malloc(nAts*sizeof(char));
memset(touched,0,nAts*sizeof(char));
memset(changed,1,nAts*sizeof(int));
CreateSinglePartition(nAts,order,count,atoms);
ActivatePartitions(nAts,order,count,activeset,next,changed);
RefinePartitions(mol,atoms,ftor,false,order,count,activeset,next,changed,touched);
#ifdef VERBOSE_CANON
std::cerr<<"1--------"<<std::endl;
for(unsigned int i=0;i<mol.getNumAtoms();++i){
std::cerr<<order[i]+1<<" "<<" index: "<<atoms[order[i]].index<<" count: "<<count[order[i]]<<std::endl;
}
#endif
ftor.df_useNbrs=true;
ActivatePartitions(nAts,order,count,activeset,next,changed);
#ifdef VERBOSE_CANON
std::cerr<<"1a--------"<<std::endl;
for(unsigned int i=0;i<mol.getNumAtoms();++i){
std::cerr<<order[i]+1<<" "<<" index: "<<atoms[order[i]].index<<" count: "<<count[order[i]]<<std::endl;
}
#endif
RefinePartitions(mol,atoms,ftor,true,order,count,activeset,next,changed,touched);
#ifdef VERBOSE_CANON
std::cerr<<"2--------"<<std::endl;
for(unsigned int i=0;i<mol.getNumAtoms();++i){
std::cerr<<order[i]+1<<" "<<" index: "<<atoms[order[i]].index<<" count: "<<count[order[i]]<<std::endl;
}
#endif
bool ties=false;
unsigned countCls=0;
for(unsigned i=0; i<nAts; ++i){
if(count[i]){
countCls++;
}
else{
ties=true;
}
}
if(useSpecial && ties){
SpecialAtomCompareFunctor sftor(atoms,mol);
compareRingAtomsConcerningNumNeighbors(atoms, nAts);
ActivatePartitions(nAts,order,count,activeset,next,changed);
RefinePartitions(mol,atoms,sftor,true,order,count,activeset,next,changed,touched);
#ifdef VERBOSE_CANON
std::cerr<<"2a--------"<<std::endl;
for(unsigned int i=0;i<mol.getNumAtoms();++i){
std::cerr<<order[i]+1<<" "<<" index: "<<atoms[order[i]].index<<" count: "<<count[order[i]]<<std::endl;
}
#endif
}
if(breakTies){
BreakTies(mol,atoms,ftor,true,order,count,activeset,next,changed,touched);
#ifdef VERBOSE_CANON
std::cerr<<"3--------"<<std::endl;
for(unsigned int i=0;i<mol.getNumAtoms();++i){
std::cerr<<order[i]+1<<" "<<" index: "<<atoms[order[i]].index<<" count: "<<count[order[i]]<<std::endl;
}
#endif
}
free(count); free(next); free(touched); free(changed);
}
namespace {
bool hasRingNbr(const ROMol &mol,const Atom *at) {
ROMol::ADJ_ITER beg,end;
boost::tie(beg,end) = mol.getAtomNeighbors(at);
while(beg!=end){
const ATOM_SPTR nbr=mol[*beg];
++beg;
if((nbr->getChiralTag()==Atom::CHI_TETRAHEDRAL_CW ||
nbr->getChiralTag()==Atom::CHI_TETRAHEDRAL_CCW) &&
nbr->hasProp("_ringStereoAtoms")){
return true;
}
}
return false;
}
unsigned int getNumRingMember(const ROMol &mol,unsigned int idx){
if(mol.getRingInfo()->isInitialized()){
return mol.getRingInfo()->numAtomRings(idx);
}
return 0;
}
void getNbrs(const ROMol &mol,const Atom *at, int *ids){
ROMol::ADJ_ITER beg,end;
boost::tie(beg,end) = mol.getAtomNeighbors(at);
unsigned int idx=0;
while(beg!=end){
const ATOM_SPTR nbr=(mol)[*beg];
++beg;
ids[idx] = nbr->getIdx();
++idx;
}
}
void getBonds(const ROMol &mol,const Atom *at,std::vector<bondholder> &nbrs,bool includeChirality){
ROMol::OEDGE_ITER beg,end;
boost::tie(beg,end) = mol.getAtomBonds(at);
while(beg!=end){
const BOND_SPTR bond=(mol)[*beg];
++beg;
Bond::BondStereo stereo=Bond::STEREONONE;
if(includeChirality){
stereo=bond->getStereo();
}
unsigned int idx = bond->getOtherAtomIdx(at->getIdx());
Bond::BondType bt = bond->getIsAromatic() ? Bond::AROMATIC : bond->getBondType();
bondholder bh(bondholder(bt,stereo,idx,idx));
nbrs.insert(std::lower_bound(nbrs.begin(),nbrs.end(),bh,bondholder::greater),1,bh);
}
}
void getChiralBonds(const ROMol &mol,const Atom *at,std::vector<bondholder> &nbrs){
ROMol::OEDGE_ITER beg,end;
boost::tie(beg,end) = mol.getAtomBonds(at);
while(beg!=end){
const BOND_SPTR bond=(mol)[*beg];
++beg;
unsigned int nbrIdx = bond->getOtherAtomIdx(at->getIdx());
const Atom* nbr = mol.getAtomWithIdx(nbrIdx);
unsigned int degreeNbr = nbr->getDegree();
unsigned int nReps=1;
unsigned int stereo=0;
switch(bond->getStereo()){
case Bond::STEREOZ:
stereo=1;
break;
case Bond::STEREOE:
stereo=2;
break;
default:
stereo=0;
}
if(bond->getBondType() == Bond::DOUBLE &&
nbr->getAtomicNum()==15 &&
(degreeNbr==4 || degreeNbr==3) ) {
// a special case for chiral phophorous compounds
// (this was leading to incorrect assignment of
// R/S labels ):
nReps=1;
// general justification of this is:
// Paragraph 2.2. in the 1966 article is "Valence-Bond Conventions:
// Multiple-Bond Unsaturation and Aromaticity". It contains several
// conventions of which convention (b) is the one applying here:
// "(b) Contibutions by d orbitals to bonds of quadriligant atoms are
// neglected."
// FIX: this applies to more than just P
} else {
nReps = static_cast<unsigned int>(floor(2.*bond->getBondTypeAsDouble()));
}
unsigned int symclass = nbr->getAtomicNum()*ATNUM_CLASS_OFFSET+nbrIdx+1;
bondholder bh(bondholder(Bond::SINGLE,stereo,nbrIdx,symclass));
std::vector<bondholder>::iterator iPos=std::lower_bound(nbrs.begin(),nbrs.end(),bh);
nbrs.insert(iPos,nReps,bh);
}
std::reverse(nbrs.begin(),nbrs.end());
if(!at->needsUpdatePropertyCache()){
for(unsigned int ii=0;ii<at->getTotalNumHs();++ii){
nbrs.push_back(bondholder(Bond::SINGLE,Bond::STEREONONE,ATNUM_CLASS_OFFSET,ATNUM_CLASS_OFFSET));
nbrs.push_back(bondholder(Bond::SINGLE,Bond::STEREONONE,ATNUM_CLASS_OFFSET,ATNUM_CLASS_OFFSET));
}
}
}
void basicInitCanonAtom(const ROMol &mol,Canon::canon_atom &atom,const int &idx){
atom.atom=mol.getAtomWithIdx(idx);
atom.index=idx;
atom.p_symbol=NULL;
atom.degree=atom.atom->getDegree();
atom.nbrIds=(int *)malloc(atom.degree*sizeof(int));
getNbrs(mol, atom.atom,atom.nbrIds);
}
void advancedInitCanonAtom(const ROMol &mol,Canon::canon_atom &atom,const int &idx){
atom.totalNumHs=atom.atom->getTotalNumHs();
atom.isRingStereoAtom=(atom.atom->getChiralTag()==Atom::CHI_TETRAHEDRAL_CW ||
atom.atom->getChiralTag()==Atom::CHI_TETRAHEDRAL_CCW) &&
atom.atom->hasProp("_ringStereoAtoms");
atom.hasRingNbr=hasRingNbr(mol,atom.atom);
}
}//end anonymous namespace
void initCanonAtoms(const ROMol &mol,std::vector<Canon::canon_atom> &atoms,
bool includeChirality){
for(unsigned int i=0;i<mol.getNumAtoms();++i){
basicInitCanonAtom(mol,atoms[i],i);
advancedInitCanonAtom(mol,atoms[i],i);
atoms[i].bonds.reserve(atoms[i].degree);
getBonds(mol,atoms[i].atom,atoms[i].bonds,includeChirality);
}
}
void initFragmentCanonAtoms(const ROMol &mol,std::vector<Canon::canon_atom> &atoms,
bool includeChirality, const std::vector<std::string> *atomSymbols){
for(unsigned int i=0;i<mol.getNumAtoms();++i){
basicInitCanonAtom(mol,atoms[i],i);
atoms[i].degree=0;
if(atomSymbols){
atoms[i].p_symbol=&(*atomSymbols)[i];
}
else{
atoms[i].p_symbol=0;
}
advancedInitCanonAtom(mol,atoms[i],i);
atoms[i].bonds.reserve(atoms[i].degree);
getBonds(mol,atoms[i].atom,atoms[i].bonds,includeChirality);
}
}
void initChiralCanonAtoms(const ROMol &mol,std::vector<Canon::canon_atom> &atoms){
for(unsigned int i=0;i<mol.getNumAtoms();++i){
basicInitCanonAtom(mol,atoms[i],i);
getChiralBonds(mol,atoms[i].atom,atoms[i].bonds);
}
}
void freeCanonAtoms(std::vector<Canon::canon_atom> &atoms){
for(unsigned int i=0;i<atoms.size();++i){
if(atoms[i].nbrIds){
free(atoms[i].nbrIds);
}
}
}
void updateAtomNeighborIndex(canon_atom* atoms, std::vector<bondholder> &nbrs) {
for(unsigned j=0; j < nbrs.size(); ++j){
unsigned newSymClass = atoms[nbrs.at(j).nbrIdx].index;
nbrs.at(j).nbrSymClass = newSymClass;
}
std::sort(nbrs.begin(),nbrs.end(),bondholder::greater);
}
void rankMolAtoms(const ROMol &mol,std::vector<unsigned int> &res,
bool breakTies,
bool includeChirality,bool includeIsotopes) {
std::vector<Canon::canon_atom> atoms(mol.getNumAtoms());
initCanonAtoms(mol,atoms,includeChirality);
AtomCompareFunctor ftor(&atoms.front(),mol);
ftor.df_useIsotopes=includeIsotopes;
ftor.df_useChirality=includeChirality;
ftor.df_useChiralityRings=includeChirality;
int *order=(int *)malloc(mol.getNumAtoms()*sizeof(int));
rankWithFunctor(ftor,breakTies,order,true);
res.resize(mol.getNumAtoms());
for(unsigned int i=0;i<mol.getNumAtoms();++i){
res[order[i]]=atoms[order[i]].index;
}
free(order);
freeCanonAtoms(atoms);
} // end of rankMolAtoms()
void rankFragmentAtoms(const ROMol &mol,std::vector<unsigned int> &res,
const boost::dynamic_bitset<> &atomsInPlay,
const boost::dynamic_bitset<> &bondsInPlay,
const std::vector<std::string> *atomSymbols,
bool breakTies,
bool includeChirality,bool includeIsotopes) {
PRECONDITION(atomsInPlay.size()==mol.getNumAtoms(),"bad atomsInPlay size");
PRECONDITION(bondsInPlay.size()==mol.getNumBonds(),"bad bondsInPlay size");
PRECONDITION(!atomSymbols || atomSymbols->size()==mol.getNumAtoms(),"bad atomSymbols size");
std::vector<Canon::canon_atom> atoms(mol.getNumAtoms());
initFragmentCanonAtoms(mol,atoms,includeChirality, atomSymbols);
for(ROMol::ConstBondIterator bI=mol.beginBonds();
bI!=mol.endBonds();++bI){
if(!bondsInPlay[(*bI)->getIdx()])
continue;
atoms[(*bI)->getBeginAtomIdx()].degree++;
atoms[(*bI)->getEndAtomIdx()].degree++;
}
AtomCompareFunctor ftor(&atoms.front(),mol,
&atomsInPlay,&bondsInPlay);
ftor.df_useIsotopes=includeIsotopes;
ftor.df_useChirality=includeChirality;
int *order=(int *)malloc(mol.getNumAtoms()*sizeof(int));
rankWithFunctor(ftor,breakTies,order,true);
res.resize(mol.getNumAtoms());
for(unsigned int i=0;i<mol.getNumAtoms();++i){
res[order[i]]=atoms[order[i]].index;
}
free(order);
freeCanonAtoms(atoms);
} // end of rankFragmentAtoms()
void chiralRankMolAtoms(const ROMol &mol,std::vector<unsigned int> &res){
std::vector<Canon::canon_atom> atoms(mol.getNumAtoms());
initChiralCanonAtoms(mol,atoms);
ChiralAtomCompareFunctor ftor(&atoms.front(),mol);
int *order=(int *)malloc(mol.getNumAtoms()*sizeof(int));
rankWithFunctor(ftor,false,order);
res.resize(mol.getNumAtoms());
for(unsigned int i=0;i<mol.getNumAtoms();++i){
res[order[i]]=atoms[order[i]].index;
}
free(order);
freeCanonAtoms(atoms);
} // end of rankMolAtoms()
} // end of Canon namespace
} // end of RDKit namespace