// // Copyright (C) 2014 Greg Landrum // // @@ All Rights Reserved @@ // This file is part of the RDKit. // The contents are covered by the terms of the BSD license // which is included in the file license.txt, found at the root // of the RDKit source tree. // #include #include #include #include #include #include #include #include #include #include #include #include using namespace RDKit; int pcmp(const void *a,const void *b){ if((*(int *)a)<(*(int *)b)){ return -1; } else if((*(int *)a)>(*(int *)b)){ return 1; } return 0; } int icmp(int a,int b){ if(ab){ return 1; } return 0; } class int_compare_ftor { const int *dp_ints; public: int_compare_ftor() : dp_ints(NULL) {}; int_compare_ftor(const int *ints) : dp_ints(ints) {}; int operator()(int i,int j) const { PRECONDITION(dp_ints,"no ints"); unsigned int ivi= dp_ints[i]; unsigned int ivj= dp_ints[j]; if(iviivj) return 1; else return 0; } }; void qs1( const std::vector< std::vector > &vects){ BOOST_LOG(rdInfoLog)<<"sorting (qsort) vectors"< tv=vects[i]; int *data=&tv.front(); qsort(data,tv.size(),sizeof(int),pcmp); for(unsigned int j=1;j=tv[j-1]); } } BOOST_LOG(rdInfoLog)<< "done: " << vects.size()< > &vects){ BOOST_LOG(rdInfoLog)<<"sorting (hanoi sort) vectors"<=data[indices[j-1]]); } free(count); free(indices); } BOOST_LOG(rdInfoLog)<< "done: " << vects.size()< rng_type; typedef boost::uniform_int<> distrib_type; typedef boost::variate_generator source_type; rng_type generator(42u); const unsigned int nVects=500000; const unsigned int vectSize=50; const unsigned int nClasses=15; distrib_type dist(0,nClasses); source_type randomSource(generator,dist); BOOST_LOG(rdInfoLog)<<"populating vectors"< > vects(nVects); for(unsigned int i=0;i(vectSize); for(unsigned int j=0;jivj) return 1; ivi= d_atoms[i].atom->getAtomicNum(); ivj= d_atoms[j].atom->getAtomicNum(); if(iviivj) return 1; return 0; } }; class atomcomparefunctor2 { Canon::canon_atom *d_atoms; public: atomcomparefunctor2() : d_atoms(NULL) {}; atomcomparefunctor2(Canon::canon_atom *atoms) : d_atoms(atoms) {}; int operator()(int i,int j) const { PRECONDITION(d_atoms,"no atoms"); unsigned int ivi,ivj; // always start with the current class: ivi= d_atoms[i].index; ivj= d_atoms[j].index; if(iviivj) return 1; // start by comparing degree ivi= d_atoms[i].atom->getDegree(); ivj= d_atoms[j].atom->getDegree(); if(iviivj) return 1; // move onto atomic number ivi= d_atoms[i].atom->getAtomicNum(); ivj= d_atoms[j].atom->getAtomicNum(); if(iviivj) return 1; return 0; } }; void test2(){ BOOST_LOG(rdInfoLog) << "Testing hanoi with a functor." << std::endl; // make sure that hanoi works with a functor and "molecule data" { std::string smi="FC1C(Cl)C1C"; RWMol *m =SmilesToMol(smi); TEST_ASSERT(m); std::vector atoms(m->getNumAtoms()); std::vector indices(m->getNumAtoms()); for(unsigned int i=0;igetNumAtoms();++i){ atoms[i].atom = m->getAtomWithIdx(i); atoms[i].index=0; indices[i]=i; } atomcomparefunctor ftor(&atoms.front()); int *data=&indices.front(); int *count=(int *)malloc(atoms.size()*sizeof(int)); int *changed=(int *)malloc(atoms.size()*sizeof(int)); memset(changed, 1, atoms.size()*sizeof(int)); RDKit::hanoisort(data,atoms.size(),count,changed,ftor); for(unsigned int i=0;igetNumAtoms();++i){ //std::cerr<0){ TEST_ASSERT(atoms[indices[i]].atom->getAtomicNum() >= atoms[indices[i-1]].atom->getAtomicNum()); if(atoms[indices[i]].atom->getAtomicNum() != atoms[indices[i-1]].atom->getAtomicNum()){ TEST_ASSERT(count[indices[i]]!=0); } else { TEST_ASSERT(count[indices[i]]==0); } } else { TEST_ASSERT(count[indices[i]]!=0); } } } BOOST_LOG(rdInfoLog) << "Done" << std::endl; }; void test3(){ BOOST_LOG(rdInfoLog) << "Testing basic partition refinement." << std::endl; // basic partition refinement { std::string smi="FC1C(Cl)CCC1C"; RWMol *m =SmilesToMol(smi); TEST_ASSERT(m); std::vector atoms(m->getNumAtoms()); initCanonAtoms(*m,atoms,true); atomcomparefunctor ftor(&atoms.front()); RDKit::Canon::canon_atom *data=&atoms.front(); int *count=(int *)malloc(atoms.size()*sizeof(int)); int *order=(int *)malloc(atoms.size()*sizeof(int)); int activeset; int *next=(int *)malloc(atoms.size()*sizeof(int)); int *changed=(int *)malloc(atoms.size()*sizeof(int)); memset(changed, 1, atoms.size()*sizeof(int)); char *touched=(char *)malloc(atoms.size()*sizeof(char)); RDKit::Canon::CreateSinglePartition(atoms.size(),order,count,data); RDKit::Canon::ActivatePartitions(atoms.size(),order,count,activeset,next,changed); // std::cerr<<"----------------------------------"<getNumAtoms();++i){ // std::cerr<getNumAtoms();++i){ // std::cerr<getNumAtoms();++i){ // std::cerr< atoms(m->getNumAtoms()); initCanonAtoms(*m,atoms,true); atomcomparefunctor2 ftor(&atoms.front()); RDKit::Canon::canon_atom *data=&atoms.front(); int *count=(int *)malloc(atoms.size()*sizeof(int)); int *order=(int *)malloc(atoms.size()*sizeof(int)); int activeset; int *next=(int *)malloc(atoms.size()*sizeof(int)); int *changed=(int *)malloc(atoms.size()*sizeof(int)); memset(changed, 1, atoms.size()*sizeof(int)); char *touched=(char *)malloc(atoms.size()*sizeof(char)); RDKit::Canon::CreateSinglePartition(atoms.size(),order,count,data); RDKit::Canon::ActivatePartitions(atoms.size(),order,count,activeset,next,changed); RDKit::Canon::RefinePartitions(*m,data,ftor,false,order,count,activeset,next,changed,touched); // std::cerr<<"----------------------------------"<getNumAtoms();++i){ // std::cerr<getAtomWithIdx(i); std::vector nbrs(at->getDegree()); unsigned int nbridx=0; ROMol::OEDGE_ITER beg,end; boost::tie(beg,end) = dp_mol->getAtomBonds(at); while(beg!=end){ const BOND_SPTR bond=(*dp_mol)[*beg]; nbrs[nbridx]=static_cast(100*bond->getBondTypeAsDouble())+dp_atoms[bond->getOtherAtomIdx(i)].index; ++beg; ++nbridx; } std::sort(nbrs.begin(),nbrs.end()); for(nbridx=0;nbridxgetDegree();++nbridx){ res+=(nbridx+1)*1000+nbrs[nbridx]; } return res; } int basecomp(int i,int j) const { PRECONDITION(dp_atoms,"no atoms"); unsigned int ivi,ivj; // always start with the current class: ivi= dp_atoms[i].index; ivj= dp_atoms[j].index; if(iviivj) return 1; // start by comparing degree ivi= dp_atoms[i].atom->getDegree(); ivj= dp_atoms[j].atom->getDegree(); if(iviivj) return 1; // move onto atomic number ivi= dp_atoms[i].atom->getAtomicNum(); ivj= dp_atoms[j].atom->getAtomicNum(); if(iviivj) return 1; return 0; } public: bool df_useNbrs; atomcomparefunctor3() : dp_atoms(NULL), dp_mol(NULL), df_useNbrs(false) {}; atomcomparefunctor3(Canon::canon_atom *atoms, const ROMol &m) : dp_atoms(atoms), dp_mol(&m), df_useNbrs(false) {}; int operator()(int i,int j) const { PRECONDITION(dp_atoms,"no atoms"); PRECONDITION(dp_mol,"no molecule"); int v=basecomp(i,j); if(v) return v; unsigned int ivi,ivj; if(df_useNbrs){ ivi=dp_atoms[i].index+1+getAtomNeighborhood(i); ivj=dp_atoms[j].index+1+getAtomNeighborhood(j); //std::cerr<<" "<ivj) return 1; } return 0; } }; void test4(){ BOOST_LOG(rdInfoLog) << "Testing partition refinement with neighbors." << std::endl; // partition refinement with neighbors { std::string smi="FC1C(Cl)CCC1C"; RWMol *m =SmilesToMol(smi); TEST_ASSERT(m); std::vector atoms(m->getNumAtoms()); initCanonAtoms(*m,atoms,true); atomcomparefunctor3 ftor(&atoms.front(),*m); RDKit::Canon::canon_atom *data=&atoms.front(); int *count=(int *)malloc(atoms.size()*sizeof(int)); int *order=(int *)malloc(atoms.size()*sizeof(int)); int activeset; int *next=(int *)malloc(atoms.size()*sizeof(int)); int *changed=(int *)malloc(atoms.size()*sizeof(int)); memset(changed, 1, atoms.size()*sizeof(int)); char *touched=(char *)malloc(atoms.size()*sizeof(char)); RDKit::Canon::CreateSinglePartition(atoms.size(),order,count,data); RDKit::Canon::ActivatePartitions(atoms.size(),order,count,activeset,next,changed); // std::cerr<<"1----------------------------------"<getNumAtoms();++i){ // std::cerr<getNumAtoms();++i){ // std::cerr<getNumAtoms();++i){ // std::cerr<getNumAtoms();++i){ //std::cerr<0){ TEST_ASSERT(ftor(order[i],order[i-1])>=0); } } delete m; } { std::string smi="FC1C(CO)CCC1CC"; RWMol *m =SmilesToMol(smi); TEST_ASSERT(m); std::vector atoms(m->getNumAtoms()); initCanonAtoms(*m,atoms,true); atomcomparefunctor3 ftor(&atoms.front(),*m); RDKit::Canon::canon_atom *data=&atoms.front(); int *count=(int *)malloc(atoms.size()*sizeof(int)); int *order=(int *)malloc(atoms.size()*sizeof(int)); int activeset; int *next=(int *)malloc(atoms.size()*sizeof(int)); int *changed=(int *)malloc(atoms.size()*sizeof(int)); memset(changed, 1, atoms.size()*sizeof(int)); char *touched=(char *)malloc(atoms.size()*sizeof(char)); RDKit::Canon::CreateSinglePartition(atoms.size(),order,count,data); RDKit::Canon::ActivatePartitions(atoms.size(),order,count,activeset,next,changed); RDKit::Canon::RefinePartitions(*m,data,ftor,false,order,count,activeset,next,changed,touched); // std::cerr<<"----------------------------------"<getNumAtoms();++i){ // std::cerr<getNumAtoms();++i){ //std::cerr<0){ //std::cerr<<" ftor: "<=0); } } delete m; } { std::string smi="FC1C(CC)CCC1CC"; RWMol *m =SmilesToMol(smi); TEST_ASSERT(m); std::vector atoms(m->getNumAtoms()); initCanonAtoms(*m,atoms,true); atomcomparefunctor3 ftor(&atoms.front(),*m); RDKit::Canon::canon_atom *data=&atoms.front(); int *count=(int *)malloc(atoms.size()*sizeof(int)); int *order=(int *)malloc(atoms.size()*sizeof(int)); int activeset; int *next=(int *)malloc(atoms.size()*sizeof(int)); int *changed=(int *)malloc(atoms.size()*sizeof(int)); memset(changed, 1, atoms.size()*sizeof(int)); char *touched=(char *)malloc(atoms.size()*sizeof(char)); RDKit::Canon::CreateSinglePartition(atoms.size(),order,count,data); RDKit::Canon::ActivatePartitions(atoms.size(),order,count,activeset,next,changed); // std::cerr<<"----------------------------------"<getNumAtoms();++i){ // std::cerr<getNumAtoms();++i){ //std::cerr<0){ //std::cerr<<" ftor: "<=0); } } // here we can't manage to get everything unique TEST_ASSERT(order[0]==4 && count[4]==2); TEST_ASSERT(order[1]==9 && count[9]==0); TEST_ASSERT(order[2]==0 && count[0]==1); TEST_ASSERT(order[3]==3 && count[3]==2); TEST_ASSERT(order[4]==8 && count[8]==0); TEST_ASSERT(order[5]==5 && count[5]==2); TEST_ASSERT(order[6]==6 && count[6]==0); TEST_ASSERT(order[7]==2 && count[2]==2); TEST_ASSERT(order[8]==7 && count[7]==0); TEST_ASSERT(order[9]==1 && count[1]==1); delete m; } BOOST_LOG(rdInfoLog) << "Done" << std::endl; }; void test5(){ BOOST_LOG(rdInfoLog) << "testing canonicalization via tie breaking." << std::endl; // canonicalization via tie breaking { std::string smi="FC1C(CC)CCC1CC"; RWMol *m =SmilesToMol(smi); TEST_ASSERT(m); std::vector atoms(m->getNumAtoms()); initCanonAtoms(*m,atoms,true); atomcomparefunctor3 ftor(&atoms.front(),*m); RDKit::Canon::canon_atom *data=&atoms.front(); int *count=(int *)malloc(atoms.size()*sizeof(int)); int *order=(int *)malloc(atoms.size()*sizeof(int)); int activeset; int *next=(int *)malloc(atoms.size()*sizeof(int)); int *changed=(int *)malloc(atoms.size()*sizeof(int)); memset(changed, 1, atoms.size()*sizeof(int)); char *touched=(char *)malloc(atoms.size()*sizeof(char)); RDKit::Canon::CreateSinglePartition(atoms.size(),order,count,data); RDKit::Canon::ActivatePartitions(atoms.size(),order,count,activeset,next,changed); // std::cerr<<"----------------------------------"<getNumAtoms();++i){ // std::cerr<getNumAtoms();++i){ // std::cerr<getNumAtoms();++i){ //std::cerr< atomRanks; RDKit::Canon::rankMolAtoms(*m,atomRanks); boost::dynamic_bitset<> seen(m->getNumAtoms()); for(unsigned int i=0;igetNumAtoms();++i){ TEST_ASSERT(!seen[atomRanks[i]]); seen.set(atomRanks[i],1); } // std::copy(atomRanks.begin(),atomRanks.end(),std::ostream_iterator(std::cerr," ")); // std::cerr< atomRanks; RDKit::Canon::rankMolAtoms(*m,atomRanks); boost::dynamic_bitset<> seen(m->getNumAtoms()); for(unsigned int i=0;igetNumAtoms();++i){ // std::cerr< atomRanks; RDKit::Canon::rankMolAtoms(*m,atomRanks); boost::dynamic_bitset<> seen(m->getNumAtoms()); for(unsigned int i=0;igetNumAtoms();++i){ // std::cerr< atomRanks; RDKit::Canon::rankMolAtoms(*m,atomRanks); boost::dynamic_bitset<> seen(m->getNumAtoms()); for(unsigned int i=0;igetNumAtoms();++i){ //std::cerr< atomRanks; RDKit::Canon::rankMolAtoms(*m,atomRanks,false); boost::dynamic_bitset<> seen(m->getNumAtoms()); for(unsigned int i=0;igetNumAtoms();++i){ // std::cerr<<" "< atomRanks; RDKit::Canon::rankMolAtoms(*m,atomRanks,false); boost::dynamic_bitset<> seen(m->getNumAtoms()); for(unsigned int i=0;igetNumAtoms();++i){ TEST_ASSERT(!seen[atomRanks[i]]); seen.set(atomRanks[i],1); } delete m; } BOOST_LOG(rdInfoLog) << "Done" << std::endl; }; namespace{ ROMol* _renumber(const ROMol *m,std::vector& nVect,std::string inSmiles){ ROMol *nm=MolOps::renumberAtoms(*m,nVect); TEST_ASSERT(nm); TEST_ASSERT(nm->getNumAtoms()==m->getNumAtoms()); TEST_ASSERT(nm->getNumBonds()==m->getNumBonds()); MolOps::assignStereochemistry(*nm,true,true); for(unsigned int ii=0;iigetNumAtoms();++ii){ if(nm->getAtomWithIdx(ii)->hasProp("_CIPCode")){ TEST_ASSERT(m->getAtomWithIdx(nVect[ii])->hasProp("_CIPCode")); std::string ocip=m->getAtomWithIdx(nVect[ii])->getProp("_CIPCode"); std::string ncip=nm->getAtomWithIdx(ii)->getProp("_CIPCode"); if(ocip!=ncip){ std::cerr<<" cip mismatch: "< "<>>>>>>>>>>>>>>>>>>>>>>>>>>"< idxV(m->getNumAtoms()); for(unsigned int i=0;igetNumAtoms();++i) idxV[i]=i; std::srand(0xF00D); for(unsigned int i=0;i nVect(idxV); std::random_shuffle(nVect.begin(),nVect.end()); // for(unsigned int j=0;jgetNumAtoms();++j){ // std::cerr<<"Renumber: "<"<setProp("_Name","orig"); std::cerr<setProp("_Name","renumber"); std::cerr<getNumAtoms();++j){ std::cerr<<"Renumber: "<"<getNumAtoms(); std::vector idxV(m->getNumAtoms()); for(unsigned int i=0;igetNumAtoms();++i) idxV[i]=i; std::srand(0xF00D); for(unsigned int i=0;i nVect(idxV); std::random_shuffle(nVect.begin(),nVect.end()); ROMol *nm= _renumber(m,nVect,inSmiles); UINT_VECT ranks(nAtoms); Canon::rankMolAtoms(*nm,ranks,true); char *ranksSet=(char *)malloc(nAtoms*sizeof(char)); memset(ranksSet,0,nAtoms*sizeof(char)); for(unsigned int i=0;i atomRanks; // std::cerr <<"\n\n\n\n\n\n\n\n\n\n\n\n>--------------" << std::endl; RDKit::Canon::rankMolAtoms(*m,atomRanks,false); // std::cerr <<"---------------" << std::endl; // for(unsigned int i=0;igetNumAtoms();++i){ // std::cerr<<" "< atomRanks; // std::cerr <<">--------------" << std::endl; RDKit::Canon::rankMolAtoms(*m,atomRanks,false); // std::cerr <<"---------------" << std::endl; // for(unsigned int i=0;igetNumAtoms();++i){ // std::cerr<<" "< atomRanks; //std::cerr<(std::cerr," ")); //std::cerr< atomRanks; //std::cerr<(std::cerr," ")); //std::cerr<atomRanks[5]); TEST_ASSERT(atomRanks[4]>atomRanks[5]); } { // make sure we aren't breaking ties std::string smi="C[C@](C)(Cl)I"; RWMol *m =SmilesToMol(smi,0,0); TEST_ASSERT(m); MolOps::sanitizeMol(*m); std::vector atomRanks; //std::cerr<(std::cerr," ")); //std::cerr< atomRanks; //std::cerr<(std::cerr," ")); //std::cerr<atomRanks[1]); TEST_ASSERT(atomRanks[0]atomRanks[3]); TEST_ASSERT(atomRanks[2]>atomRanks[11]); TEST_ASSERT(atomRanks[3] atomRanks; //std::cerr<(std::cerr," ")); //std::cerr<atomRanks[8]); TEST_ASSERT(atomRanks[5]>atomRanks[2]); } { // are double bonds being handled correctly? std::string smi="OC[C@H](F)C=O"; RWMol *m =SmilesToMol(smi,0,0); TEST_ASSERT(m); MolOps::sanitizeMol(*m); std::vector atomRanks; //std::cerr<(std::cerr," ")); //std::cerr< atomRanks; //std::cerr<(std::cerr," ")); //std::cerr<atomRanks[5]); TEST_ASSERT(atomRanks[1]>atomRanks[4]); } { // are double bonds being handled correctly? std::string smi="CC[C@](C)(CF)C=O"; RWMol *m =SmilesToMol(smi,0,0); TEST_ASSERT(m); MolOps::sanitizeMol(*m); std::vector atomRanks; //std::cerr<(std::cerr," ")); //std::cerr<atomRanks[6]); TEST_ASSERT(atomRanks[1]>>Molecule: " << smiles << std::endl; ROMol *m = SmilesToMol(smiles); TEST_ASSERT(m); MolOps::assignStereochemistry(*m,true); _renumberTest2(m,smiles,1); delete m; } BOOST_LOG(rdInfoLog) << "Finished" << std::endl; } void test11(){ BOOST_LOG(rdInfoLog) << "testing mol fragments." << std::endl; { std::string smi="C[C@H]([C@H](c1ccccc1)O)N2CCCCC2.C[C@@H]([C@H](c1ccccc1)O)N2CCCCC2"; ROMol *m = SmilesToMol(smi); TEST_ASSERT(m); std::vector vfragsmi; std::vector > frags; unsigned int numFrag = MolOps::getMolFrags(*m,frags); for(unsigned i=0;i