From 624dd72ad6d74286b810b8fe0bbc352886e25099 Mon Sep 17 00:00:00 2001 From: Nadine Schneider Date: Tue, 4 Aug 2015 18:57:04 +0200 Subject: [PATCH] new canon: fix in special symmetry invariant --- Code/GraphMol/hanoitest.cpp | 21 +++++++++++-- Code/GraphMol/new_canon.cpp | 62 ++++++++++++++++++++++++------------- Code/GraphMol/new_canon.h | 6 ++-- 3 files changed, 62 insertions(+), 27 deletions(-) diff --git a/Code/GraphMol/hanoitest.cpp b/Code/GraphMol/hanoitest.cpp index 742b64fe3..b73262587 100644 --- a/Code/GraphMol/hanoitest.cpp +++ b/Code/GraphMol/hanoitest.cpp @@ -981,6 +981,22 @@ std::string smis[]={ "CCCCCCCCCCCCCCCCCCNC(=O)OC[C@H]1C[C@H]([C@@H2]OC(=O)N(Cc2cccc[n+]2CC)C(C)=O)C1.[I-]", //CHEMBL1172371 "CC.CCCCCCCCCC(C(=O)NCCc1ccc(OP(=S)(Oc2ccc(CCNC(=O)C(CCCCCCCCC)P(=O)([O-])O)cc2)N(C)/N=C/c2ccc(OP3(Oc4ccc(/C=N/N(C)P(=S)(Oc5ccc(CCNC(=O)C(CCCCCCCCC)P(=O)([O-])O)cc5)Oc5ccc(CCNC(=O)C(CCCCCCCCC)P(=O)([O-])O)cc5)cc4)=NP(Oc4ccc(/C=N/N(C)P(=S)(Oc5ccc(CCNC(=O)C(CCCCCCCCC)P(=O)([O-])O)cc5)Oc5ccc(CCNC(=O)C(CCCCCCCCC)P(=O)([O-])O)cc5)cc4)(Oc4ccc(/C=N/N(C)P(=S)(Oc5ccc(CCNC(=O)C(CCCCCCCCC)P(=O)([O-])O)cc5)Oc5ccc(CCNC(=O)C(CCCCCCCCC)P(=O)([O-])O)cc5)cc4)=NP(Oc4ccc(/C=N/N(C)P(=S)(Oc5ccc(CCNC(=O)C(CCCCCCCCC)P(=O)([O-])O)cc5)Oc5ccc(CCNC(=O)C(CCCCCCCCC)P(=O)([O-])O)cc5)cc4)(Oc4ccc(/C=N/N(C)P(=S)(Oc5ccc(CCNC(=O)C(CCCCCCCCC)P(=O)([O-])O)cc5)Oc5ccc(CCNC(=O)C(CCCCCCCCC)P(=O)([O-])O)cc5)cc4)=N3)cc2)cc1)P(=O)([O-])O.CCCCCCCCCCCCCCCC[NH2+]OC(CO)C(O)C(OC1OC(CO)C(O)C(O)C1O)C(O)CO.CCCCCCCCCCCCCCCC[NH2+]OC(CO)C(O)C(OC1OC(CO)C(O)C(O)C1O)C(O)CO.CCCCCCCCCCCCCCCC[NH2+]OC(CO)C(O)C(OC1OC(CO)C(O)C(O)C1O)C(O)CO.CCCCCCCCCCCCCCCC[NH2+]OC(CO)C(O)C(OC1OC(CO)C(O)C(O)C1O)C(O)CO.CCCCCCCCCCCCCCCC[NH2+]OC(CO)C(O)C(OC1OC(CO)C(O)C(O)C1O)C(O)CO.CCCCCCCCCCCCCCCC[NH2+]OC(CO)C(O)C(OC1OC(CO)C(O)C(O)C1O)C(O)CO.CCCCCCCCCCCCCCCC[NH2+]OC(CO)C(O)C(OC1OC(CO)C(O)C(O)C1O)C(O)CO.CCCCCCCCCCCCCCCC[NH2+]OC(CO)C(O)C(OC1OC(CO)C(O)C(O)C1O)C(O)CO.CCCCCCCCCCCCCCCC[NH2+]OC(CO)C(O)C(OC1OC(CO)C(O)C(O)C1O)C(O)CO.CCCCCCCCCCCCCCCC[NH2+]OC(CO)C(O)C(OC1OC(CO)C(O)C(O)C1O)C(O)CO.CCCCCCCCCCCCCCCC[NH2+]OC(CO)C(O)C(OC1OC(CO)C(O)C(O)C1O)C(O)CO.CCCCCCCCCCCCCCCC[NH2+]OC(CO)C(O)C(OC1OC(CO)C(O)C(O)C1O)C(O)CO", + //Examples first reviewer + "C12C3C4C1C3C1C3C4C1C23", // does not initially work + "C12C3C1C1C4C5C(C23)C5C14", + "C12C3C4C5C1C1C4C5C3C21", + "C12C3C4C1C2C1C2C4C2C31", + "C12C3C4C5C2C2C6C1C(C5C36)C42", + "C12C3C4C5C1C1C3C3C5C1C2C43", + "C12C3C4C5C6C1C1C7C3C3C5C1C1C6C3C2C7C41", // does not initially work + "C12C3C4C5C6C1C1C7C3C3C6C6C4C7C2C3C5C16", + "C12C3C4C5C6C7C8C9C1C6C1C(C37)C9C5C2C8C41", + "C12C3C4C5C6C7C1C1C8C4C4C9C2C2C5C5C1C1C3C(C7C45)C2C8C6C91", // does not initially work + "C12C3C4C5C6C7C1C1C8C4C4C7C7C3C3C8C8C2C2C5C3C4C(C1C72)C68", + "C12C3C4C5C6C7C8C1C1C9C5C5C%10C2C2C%11C%12C%13C3C3C7C%10C7C4C%11C1C3C(C5C8%12)C(C62)C7C9%13", // does not initially work + //drawn examples first reviewer + "C12C3C4C1CC5C46C7C5C1C57C6C53C1C2", + "C1C2C3C4CC5C6C1C17C8C61C5C48C3C27", "EOS" }; @@ -1358,10 +1374,11 @@ int main(){ test9(); test10(); test11(); - test8(); - test7(); test12(); + test7(); + test8(); #endif + return 0; } diff --git a/Code/GraphMol/new_canon.cpp b/Code/GraphMol/new_canon.cpp index a0969f262..88e1f8f07 100644 --- a/Code/GraphMol/new_canon.cpp +++ b/Code/GraphMol/new_canon.cpp @@ -77,6 +77,9 @@ namespace RDKit { } std::deque neighbors; neighbors.push_back(idx); + unsigned currentRNIdx=0; + atoms[idx].neighborNum.reserve(1000); + atoms[idx].revistedNeighbors.assign(1000,0); char *visited=(char *)malloc(nAtoms*sizeof(char)); memset(visited,0,nAtoms*sizeof(char)); unsigned count = 1; @@ -85,8 +88,9 @@ namespace RDKit { memset(lastLevelNbrs,0,nAtoms*sizeof(char)); char *currentLevelNbrs=(char *)malloc(nAtoms*sizeof(char)); memset(currentLevelNbrs,0,nAtoms*sizeof(char)); + int *revisitedNeighbors=(int *)malloc(nAtoms*sizeof(int)); + memset(revisitedNeighbors,0,nAtoms*sizeof(int)); while(!neighbors.empty()){ - unsigned int revisitedNeighbors=0; unsigned int numLevelNbrs=0; nextLevelNbrs.resize(0); while(!neighbors.empty()){ @@ -114,7 +118,7 @@ namespace RDKit { for(unsigned int k=0; ktmp; + tmp.reserve(30); + for(unsigned i=0; i0){ + tmp.push_back(revisitedNeighbors[i]); + } } - else{ - atoms[idx].revistedNeighbors = (atoms[idx].revistedNeighbors*100) + revisitedNeighbors; + std::sort(tmp.begin(),tmp.end()); + tmp.push_back(-1); + for(unsigned i=0; i= atoms[idx].revistedNeighbors.size()){ + atoms[idx].revistedNeighbors.resize(atoms[idx].revistedNeighbors.size()+1000); + } + atoms[idx].revistedNeighbors[currentRNIdx] = tmp[i]; + currentRNIdx++; } + memset(revisitedNeighbors,0,nAtoms*sizeof(int)); + + atoms[idx].neighborNum.push_back(numLevelNbrs); + atoms[idx].neighborNum.push_back(-1); - if(numLevelNbrs < 10){ - atoms[idx].neighborNum = (atoms[idx].neighborNum*10) +numLevelNbrs; - } - else{ - atoms[idx].neighborNum = (atoms[idx].neighborNum*100) +numLevelNbrs; - } neighbors.insert(neighbors.end(),nextLevelNbrs.begin(),nextLevelNbrs.end()); count++; } + atoms[idx].revistedNeighbors.resize(currentRNIdx); + free(visited); free(currentLevelNbrs); free(lastLevelNbrs); + free(revisitedNeighbors); } } @@ -209,26 +224,29 @@ namespace RDKit { } ties=false; unsigned symRingAtoms=0; - unsigned countCls=0; + unsigned ringAtoms=0; + bool branchingRingAtom=false; RingInfo *ringInfo=mol.getRingInfo(); if(!ringInfo->isInitialized()){ ringInfo->initialize(); } for(unsigned i=0; inumAtomRings(ftor.dp_atoms[i].atom->getIdx()) > 1){ - if(count[i] != 1){ - symRingAtoms += 1; + if(ringInfo->numAtomRings(order[i])){ + if(count[order[i]] > 2){ + symRingAtoms+=count[order[i]]; + } + ringAtoms++; + if(ringInfo->numAtomRings(order[i]) > 1 && count[order[i]] > 1){ + branchingRingAtom = true; } } - if(count[i]){ - countCls++; - } - else{ + if(!count[i]){ ties=true; } + } - unsigned int nAts2 = atomsInPlay ? atomsInPlay->count() : nAts; - if(useSpecial && ties && static_cast(countCls)/nAts2 < 0.5 && symRingAtoms>0){ +// std::cout << " " << ringAtoms << " " << symRingAtoms << std::endl; + if(useSpecial && ties && ringAtoms > 0 && static_cast(symRingAtoms)/ringAtoms > 0.5 && branchingRingAtom){ SpecialSymmetryAtomCompareFunctor sftor(atoms,mol,atomsInPlay,bondsInPlay); compareRingAtomsConcerningNumNeighbors(atoms, nAts, mol); ActivatePartitions(nAts,order,count,activeset,next,changed); diff --git a/Code/GraphMol/new_canon.h b/Code/GraphMol/new_canon.h index 113bc045f..e8efaecc8 100644 --- a/Code/GraphMol/new_canon.h +++ b/Code/GraphMol/new_canon.h @@ -81,13 +81,13 @@ namespace RDKit { bool isRingStereoAtom; int* nbrIds; const std::string *p_symbol; // if provided, this is used to order atoms - unsigned int neighborNum; - unsigned int revistedNeighbors; + std::vector neighborNum; + std::vector revistedNeighbors; std::vector bonds; canon_atom() : atom(NULL),index(-1),degree(0),totalNumHs(0), hasRingNbr(false), isRingStereoAtom(false), nbrIds(NULL), - p_symbol(NULL), neighborNum(0), revistedNeighbors(0) {}; + p_symbol(NULL) {}; }; void updateAtomNeighborIndex(canon_atom* atoms, std::vector &nbrs);