mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-04 21:54:27 +08:00
1368 lines
50 KiB
C++
1368 lines
50 KiB
C++
//
|
|
// Copyright (C) 2014 Greg Landrum
|
|
//
|
|
// @@ All Rights Reserved @@
|
|
// This file is part of the RDKit.
|
|
// The contents are covered by the terms of the BSD license
|
|
// which is included in the file license.txt, found at the root
|
|
// of the RDKit source tree.
|
|
//
|
|
#include <GraphMol/new_canon.h>
|
|
#include <RDGeneral/RDLog.h>
|
|
#include <RDGeneral/Invariant.h>
|
|
#include <RDGeneral/hanoiSort.h>
|
|
|
|
#include <GraphMol/RDKitBase.h>
|
|
#include <GraphMol/SmilesParse/SmilesParse.h>
|
|
#include <GraphMol/SmilesParse/SmilesWrite.h>
|
|
#include <GraphMol/FileParsers/FileParsers.h>
|
|
|
|
#include <iostream>
|
|
#include <vector>
|
|
#include <boost/random.hpp>
|
|
#include <cstdlib>
|
|
|
|
using namespace RDKit;
|
|
|
|
|
|
int pcmp(const void *a,const void *b){
|
|
if((*(int *)a)<(*(int *)b)){
|
|
return -1;
|
|
} else if((*(int *)a)>(*(int *)b)){
|
|
return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
int icmp(int a,int b){
|
|
if(a<b){
|
|
return -1;
|
|
} else if(a>b){
|
|
return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
class int_compare_ftor {
|
|
const int *dp_ints;
|
|
public:
|
|
int_compare_ftor() : dp_ints(NULL) {};
|
|
int_compare_ftor(const int *ints) : dp_ints(ints) {};
|
|
int operator()(int i,int j) const {
|
|
PRECONDITION(dp_ints,"no ints");
|
|
unsigned int ivi= dp_ints[i];
|
|
unsigned int ivj= dp_ints[j];
|
|
if(ivi<ivj)
|
|
return -1;
|
|
else if(ivi>ivj)
|
|
return 1;
|
|
else
|
|
return 0;
|
|
}
|
|
};
|
|
|
|
void qs1( const std::vector< std::vector<int> > &vects){
|
|
BOOST_LOG(rdInfoLog)<<"sorting (qsort) vectors"<<std::endl;
|
|
for(unsigned int i=0;i<vects.size();++i){
|
|
std::vector<int> tv=vects[i];
|
|
int *data=&tv.front();
|
|
qsort(data,tv.size(),sizeof(int),pcmp);
|
|
for(unsigned int j=1;j<tv.size();++j){
|
|
TEST_ASSERT(tv[j]>=tv[j-1]);
|
|
}
|
|
}
|
|
BOOST_LOG(rdInfoLog)<< "done: " << vects.size()<<std::endl;
|
|
}
|
|
|
|
void hs1( const std::vector< std::vector<int> > &vects){
|
|
BOOST_LOG(rdInfoLog)<<"sorting (hanoi sort) vectors"<<std::endl;
|
|
for(unsigned int i=0;i<vects.size();++i){
|
|
const int *data=&vects[i].front();
|
|
int_compare_ftor icmp(data);
|
|
int *indices=(int *)malloc(vects[i].size()*sizeof(int));
|
|
for(unsigned int j=0;j<vects[i].size();++j) indices[j]=j;
|
|
int *count=(int *)malloc(vects[i].size()*sizeof(int));
|
|
int *changed=(int *)malloc(vects[i].size()*sizeof(int));
|
|
memset(changed, 1, vects[i].size()*sizeof(int));
|
|
RDKit::hanoisort(indices,vects[i].size(),count,changed,icmp);
|
|
for(unsigned int j=1;j<vects[i].size();++j){
|
|
TEST_ASSERT(data[indices[j]]>=data[indices[j-1]]);
|
|
}
|
|
free(count);
|
|
free(indices);
|
|
}
|
|
BOOST_LOG(rdInfoLog)<< "done: " << vects.size()<<std::endl;
|
|
}
|
|
|
|
void test1(){
|
|
BOOST_LOG(rdInfoLog) << "Testing the hanoi sort" << std::endl;
|
|
|
|
typedef boost::random::mersenne_twister<boost::uint32_t,32,4,2,31,0x9908b0df,11,7,0x9d2c5680,15,0xefc60000,18, 3346425566U> rng_type;
|
|
typedef boost::uniform_int<> distrib_type;
|
|
typedef boost::variate_generator<rng_type &,distrib_type> source_type;
|
|
rng_type generator(42u);
|
|
|
|
const unsigned int nVects=500000;
|
|
const unsigned int vectSize=50;
|
|
const unsigned int nClasses=15;
|
|
|
|
distrib_type dist(0,nClasses);
|
|
source_type randomSource(generator,dist);
|
|
|
|
BOOST_LOG(rdInfoLog)<<"populating vectors"<<std::endl;
|
|
std::vector< std::vector<int> > vects(nVects);
|
|
for(unsigned int i=0;i<nVects;++i){
|
|
vects[i] = std::vector<int>(vectSize);
|
|
for(unsigned int j=0;j<vectSize;++j){
|
|
vects[i][j] = randomSource();
|
|
}
|
|
}
|
|
|
|
//qs1(vects);
|
|
hs1(vects);
|
|
BOOST_LOG(rdInfoLog) << "Done" << std::endl;
|
|
};
|
|
|
|
|
|
class atomcomparefunctor {
|
|
Canon::canon_atom *d_atoms;
|
|
public:
|
|
atomcomparefunctor() : d_atoms(NULL) {};
|
|
atomcomparefunctor(Canon::canon_atom *atoms) : d_atoms(atoms) {};
|
|
int operator()(int i,int j) const {
|
|
PRECONDITION(d_atoms,"no atoms");
|
|
unsigned int ivi,ivj;
|
|
|
|
// always start with the current class:
|
|
ivi= d_atoms[i].index;
|
|
ivj= d_atoms[j].index;
|
|
if(ivi<ivj)
|
|
return -1;
|
|
else if(ivi>ivj)
|
|
return 1;
|
|
|
|
ivi= d_atoms[i].atom->getAtomicNum();
|
|
ivj= d_atoms[j].atom->getAtomicNum();
|
|
if(ivi<ivj)
|
|
return -1;
|
|
else if(ivi>ivj)
|
|
return 1;
|
|
|
|
return 0;
|
|
}
|
|
};
|
|
class atomcomparefunctor2 {
|
|
Canon::canon_atom *d_atoms;
|
|
public:
|
|
atomcomparefunctor2() : d_atoms(NULL) {};
|
|
atomcomparefunctor2(Canon::canon_atom *atoms) : d_atoms(atoms) {};
|
|
int operator()(int i,int j) const {
|
|
PRECONDITION(d_atoms,"no atoms");
|
|
unsigned int ivi,ivj;
|
|
|
|
// always start with the current class:
|
|
ivi= d_atoms[i].index;
|
|
ivj= d_atoms[j].index;
|
|
if(ivi<ivj)
|
|
return -1;
|
|
else if(ivi>ivj)
|
|
return 1;
|
|
|
|
// start by comparing degree
|
|
ivi= d_atoms[i].atom->getDegree();
|
|
ivj= d_atoms[j].atom->getDegree();
|
|
if(ivi<ivj)
|
|
return -1;
|
|
else if(ivi>ivj)
|
|
return 1;
|
|
|
|
// move onto atomic number
|
|
ivi= d_atoms[i].atom->getAtomicNum();
|
|
ivj= d_atoms[j].atom->getAtomicNum();
|
|
if(ivi<ivj)
|
|
return -1;
|
|
else if(ivi>ivj)
|
|
return 1;
|
|
|
|
return 0;
|
|
}
|
|
};
|
|
|
|
void test2(){
|
|
BOOST_LOG(rdInfoLog) << "Testing hanoi with a functor." << std::endl;
|
|
// make sure that hanoi works with a functor and "molecule data"
|
|
{
|
|
std::string smi="FC1C(Cl)C1C";
|
|
RWMol *m =SmilesToMol(smi);
|
|
TEST_ASSERT(m);
|
|
std::vector<Canon::canon_atom> atoms(m->getNumAtoms());
|
|
std::vector<int> indices(m->getNumAtoms());
|
|
for(unsigned int i=0;i<m->getNumAtoms();++i){
|
|
atoms[i].atom = m->getAtomWithIdx(i);
|
|
atoms[i].index=0;
|
|
indices[i]=i;
|
|
}
|
|
atomcomparefunctor ftor(&atoms.front());
|
|
|
|
int *data=&indices.front();
|
|
int *count=(int *)malloc(atoms.size()*sizeof(int));
|
|
int *changed=(int *)malloc(atoms.size()*sizeof(int));
|
|
memset(changed, 1, atoms.size()*sizeof(int));
|
|
RDKit::hanoisort(data,atoms.size(),count,changed,ftor);
|
|
|
|
for(unsigned int i=0;i<m->getNumAtoms();++i){
|
|
//std::cerr<<indices[i]<<" "<<" index: "<<atoms[indices[i]].index<<" count: "<<count[indices[i]]<<std::endl;
|
|
if(i>0){
|
|
TEST_ASSERT(atoms[indices[i]].atom->getAtomicNum() >= atoms[indices[i-1]].atom->getAtomicNum());
|
|
if(atoms[indices[i]].atom->getAtomicNum() != atoms[indices[i-1]].atom->getAtomicNum()){
|
|
TEST_ASSERT(count[indices[i]]!=0);
|
|
} else {
|
|
TEST_ASSERT(count[indices[i]]==0);
|
|
}
|
|
} else {
|
|
TEST_ASSERT(count[indices[i]]!=0);
|
|
}
|
|
}
|
|
}
|
|
BOOST_LOG(rdInfoLog) << "Done" << std::endl;
|
|
};
|
|
|
|
void test3(){
|
|
BOOST_LOG(rdInfoLog) << "Testing basic partition refinement." << std::endl;
|
|
// basic partition refinement
|
|
{
|
|
std::string smi="FC1C(Cl)CCC1C";
|
|
RWMol *m =SmilesToMol(smi);
|
|
TEST_ASSERT(m);
|
|
std::vector<Canon::canon_atom> atoms(m->getNumAtoms());
|
|
initCanonAtoms(*m,atoms,true);
|
|
atomcomparefunctor ftor(&atoms.front());
|
|
|
|
RDKit::Canon::canon_atom *data=&atoms.front();
|
|
int *count=(int *)malloc(atoms.size()*sizeof(int));
|
|
int *order=(int *)malloc(atoms.size()*sizeof(int));
|
|
int activeset;
|
|
int *next=(int *)malloc(atoms.size()*sizeof(int));
|
|
int *changed=(int *)malloc(atoms.size()*sizeof(int));
|
|
memset(changed, 1, atoms.size()*sizeof(int));
|
|
char *touched=(char *)malloc(atoms.size()*sizeof(char));
|
|
|
|
RDKit::Canon::CreateSinglePartition(atoms.size(),order,count,data);
|
|
RDKit::Canon::ActivatePartitions(atoms.size(),order,count,activeset,next,changed);
|
|
|
|
|
|
// std::cerr<<"----------------------------------"<<std::endl;
|
|
// for(unsigned int i=0;i<m->getNumAtoms();++i){
|
|
// std::cerr<<i<<" "<<atoms[i].index<<" "<<count[i]<<" "<<next[i]<<" "<<order[i]<<std::endl;
|
|
// }
|
|
|
|
RDKit::Canon::RefinePartitions(*m,data,ftor,false,order,count,activeset,next,changed,touched);
|
|
|
|
// std::cerr<<"----------------------------------"<<std::endl;
|
|
// for(unsigned int i=0;i<m->getNumAtoms();++i){
|
|
// std::cerr<<i<<" "<<atoms[i].index<<" "<<count[i]<<" "<<next[i]<<" "<<order[i]<<std::endl;
|
|
// }
|
|
|
|
// std::cerr<<"----------------------------------"<<std::endl;
|
|
// for(unsigned int i=0;i<m->getNumAtoms();++i){
|
|
// std::cerr<<order[i]<<" "<<atoms[order[i]].invar<<" index: "<<atoms[order[i]].index<<" count: "<<count[order[i]]<<std::endl;
|
|
// }
|
|
|
|
TEST_ASSERT(order[0]==1);
|
|
TEST_ASSERT(order[6]==0);
|
|
TEST_ASSERT(order[7]==3);
|
|
TEST_ASSERT(count[order[0]]==6);
|
|
TEST_ASSERT(count[order[1]]==0);
|
|
TEST_ASSERT(count[order[6]]==1);
|
|
TEST_ASSERT(count[order[7]]==1);
|
|
}
|
|
{
|
|
// this time with smarter invariants
|
|
std::string smi="FC1C(Cl)CCC1C";
|
|
RWMol *m =SmilesToMol(smi);
|
|
TEST_ASSERT(m);
|
|
std::vector<Canon::canon_atom> atoms(m->getNumAtoms());
|
|
initCanonAtoms(*m,atoms,true);
|
|
atomcomparefunctor2 ftor(&atoms.front());
|
|
|
|
RDKit::Canon::canon_atom *data=&atoms.front();
|
|
int *count=(int *)malloc(atoms.size()*sizeof(int));
|
|
int *order=(int *)malloc(atoms.size()*sizeof(int));
|
|
int activeset;
|
|
int *next=(int *)malloc(atoms.size()*sizeof(int));
|
|
int *changed=(int *)malloc(atoms.size()*sizeof(int));
|
|
memset(changed, 1, atoms.size()*sizeof(int));
|
|
char *touched=(char *)malloc(atoms.size()*sizeof(char));
|
|
|
|
RDKit::Canon::CreateSinglePartition(atoms.size(),order,count,data);
|
|
RDKit::Canon::ActivatePartitions(atoms.size(),order,count,activeset,next,changed);
|
|
|
|
RDKit::Canon::RefinePartitions(*m,data,ftor,false,order,count,activeset,next,changed,touched);
|
|
|
|
// std::cerr<<"----------------------------------"<<std::endl;
|
|
// for(unsigned int i=0;i<m->getNumAtoms();++i){
|
|
// std::cerr<<order[i]<<" "<<" index: "<<atoms[order[i]].index<<" count: "<<count[order[i]]<<std::endl;
|
|
// }
|
|
|
|
TEST_ASSERT(order[0]==7);
|
|
TEST_ASSERT(order[1]==0);
|
|
TEST_ASSERT(order[2]==3);
|
|
TEST_ASSERT(order[3]==4);
|
|
TEST_ASSERT(order[5]==1);
|
|
TEST_ASSERT(count[order[0]]==1);
|
|
TEST_ASSERT(count[order[1]]==1);
|
|
TEST_ASSERT(count[order[2]]==1);
|
|
TEST_ASSERT(count[order[3]]==2);
|
|
TEST_ASSERT(count[order[4]]==0);
|
|
TEST_ASSERT(count[order[5]]==3);
|
|
TEST_ASSERT(count[order[6]]==0);
|
|
|
|
}
|
|
BOOST_LOG(rdInfoLog) << "Done" << std::endl;
|
|
};
|
|
|
|
|
|
class atomcomparefunctor3 {
|
|
Canon::canon_atom *dp_atoms;
|
|
const ROMol *dp_mol;
|
|
unsigned int getAtomNeighborhood(unsigned int i) const{
|
|
unsigned int res=0;
|
|
const Atom *at=dp_mol->getAtomWithIdx(i);
|
|
std::vector<unsigned int> nbrs(at->getDegree());
|
|
unsigned int nbridx=0;
|
|
ROMol::OEDGE_ITER beg,end;
|
|
boost::tie(beg,end) = dp_mol->getAtomBonds(at);
|
|
while(beg!=end){
|
|
const BOND_SPTR bond=(*dp_mol)[*beg];
|
|
nbrs[nbridx]=static_cast<unsigned int>(100*bond->getBondTypeAsDouble())+dp_atoms[bond->getOtherAtomIdx(i)].index;
|
|
++beg;
|
|
++nbridx;
|
|
}
|
|
std::sort(nbrs.begin(),nbrs.end());
|
|
for(nbridx=0;nbridx<at->getDegree();++nbridx){
|
|
res+=(nbridx+1)*1000+nbrs[nbridx];
|
|
}
|
|
return res;
|
|
}
|
|
int basecomp(int i,int j) const {
|
|
PRECONDITION(dp_atoms,"no atoms");
|
|
unsigned int ivi,ivj;
|
|
|
|
// always start with the current class:
|
|
ivi= dp_atoms[i].index;
|
|
ivj= dp_atoms[j].index;
|
|
if(ivi<ivj)
|
|
return -1;
|
|
else if(ivi>ivj)
|
|
return 1;
|
|
|
|
// start by comparing degree
|
|
ivi= dp_atoms[i].atom->getDegree();
|
|
ivj= dp_atoms[j].atom->getDegree();
|
|
if(ivi<ivj)
|
|
return -1;
|
|
else if(ivi>ivj)
|
|
return 1;
|
|
|
|
// move onto atomic number
|
|
ivi= dp_atoms[i].atom->getAtomicNum();
|
|
ivj= dp_atoms[j].atom->getAtomicNum();
|
|
if(ivi<ivj)
|
|
return -1;
|
|
else if(ivi>ivj)
|
|
return 1;
|
|
|
|
return 0;
|
|
}
|
|
public:
|
|
bool df_useNbrs;
|
|
atomcomparefunctor3() : dp_atoms(NULL), dp_mol(NULL), df_useNbrs(false) {};
|
|
atomcomparefunctor3(Canon::canon_atom *atoms, const ROMol &m) : dp_atoms(atoms), dp_mol(&m),
|
|
df_useNbrs(false) {};
|
|
int operator()(int i,int j) const {
|
|
PRECONDITION(dp_atoms,"no atoms");
|
|
PRECONDITION(dp_mol,"no molecule");
|
|
int v=basecomp(i,j);
|
|
if(v) return v;
|
|
unsigned int ivi,ivj;
|
|
if(df_useNbrs){
|
|
ivi=dp_atoms[i].index+1+getAtomNeighborhood(i);
|
|
ivj=dp_atoms[j].index+1+getAtomNeighborhood(j);
|
|
//std::cerr<<" "<<i<<"-"<<j<<": "<<ivi<<" "<<ivj<<std::endl;
|
|
if(ivi<ivj)
|
|
return -1;
|
|
else if(ivi>ivj)
|
|
return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
};
|
|
|
|
|
|
void test4(){
|
|
BOOST_LOG(rdInfoLog) << "Testing partition refinement with neighbors." << std::endl;
|
|
// partition refinement with neighbors
|
|
{
|
|
std::string smi="FC1C(Cl)CCC1C";
|
|
RWMol *m =SmilesToMol(smi);
|
|
TEST_ASSERT(m);
|
|
std::vector<Canon::canon_atom> atoms(m->getNumAtoms());
|
|
initCanonAtoms(*m,atoms,true);
|
|
atomcomparefunctor3 ftor(&atoms.front(),*m);
|
|
RDKit::Canon::canon_atom *data=&atoms.front();
|
|
int *count=(int *)malloc(atoms.size()*sizeof(int));
|
|
int *order=(int *)malloc(atoms.size()*sizeof(int));
|
|
int activeset;
|
|
int *next=(int *)malloc(atoms.size()*sizeof(int));
|
|
int *changed=(int *)malloc(atoms.size()*sizeof(int));
|
|
memset(changed, 1, atoms.size()*sizeof(int));
|
|
char *touched=(char *)malloc(atoms.size()*sizeof(char));
|
|
|
|
RDKit::Canon::CreateSinglePartition(atoms.size(),order,count,data);
|
|
RDKit::Canon::ActivatePartitions(atoms.size(),order,count,activeset,next,changed);
|
|
// std::cerr<<"1----------------------------------"<<std::endl;
|
|
// for(unsigned int i=0;i<m->getNumAtoms();++i){
|
|
// std::cerr<<order[i]<<" "<<" index: "<<atoms[order[i]].index<<" count: "<<count[order[i]]<<" next: "<<next[order[i]]<<" changed: "<<changed[order[i]]<<std::endl;
|
|
// }
|
|
RDKit::Canon::RefinePartitions(*m,data,ftor,false,order,count,activeset,next,changed,touched);
|
|
|
|
// std::cerr<<"2----------------------------------"<<std::endl;
|
|
// for(unsigned int i=0;i<m->getNumAtoms();++i){
|
|
// std::cerr<<order[i]<<" "<<" index: "<<atoms[order[i]].index<<" count: "<<count[order[i]]<<" next: "<<next[order[i]]<<" changed: "<<changed[order[i]]<<std::endl;
|
|
// }
|
|
|
|
//std::cerr<<"!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"<<std::endl;
|
|
ftor.df_useNbrs=true;
|
|
RDKit::Canon::ActivatePartitions(atoms.size(),order,count,activeset,next,changed);
|
|
// std::cerr<<"3----------------------------------"<<std::endl;
|
|
// for(unsigned int i=0;i<m->getNumAtoms();++i){
|
|
// std::cerr<<order[i]<<" "<<" index: "<<atoms[order[i]].index<<" count: "<<count[order[i]]<<" next: "<<next[order[i]]<<" changed: "<<changed[order[i]]<<std::endl;
|
|
// }
|
|
RDKit::Canon::RefinePartitions(*m,data,ftor,true,order,count,activeset,next,changed,touched);
|
|
|
|
//std::cerr<<"----------------------------------"<<std::endl;
|
|
for(unsigned int i=0;i<m->getNumAtoms();++i){
|
|
//std::cerr<<order[i]<<" "<<" index: "<<atoms[order[i]].index<<" count: "<<count[order[i]]<<std::endl;
|
|
TEST_ASSERT(count[order[i]]==1);
|
|
if(i>0){
|
|
TEST_ASSERT(ftor(order[i],order[i-1])>=0);
|
|
}
|
|
}
|
|
delete m;
|
|
}
|
|
|
|
{
|
|
std::string smi="FC1C(CO)CCC1CC";
|
|
RWMol *m =SmilesToMol(smi);
|
|
TEST_ASSERT(m);
|
|
std::vector<Canon::canon_atom> atoms(m->getNumAtoms());
|
|
initCanonAtoms(*m,atoms,true);
|
|
atomcomparefunctor3 ftor(&atoms.front(),*m);
|
|
|
|
RDKit::Canon::canon_atom *data=&atoms.front();
|
|
int *count=(int *)malloc(atoms.size()*sizeof(int));
|
|
int *order=(int *)malloc(atoms.size()*sizeof(int));
|
|
int activeset;
|
|
int *next=(int *)malloc(atoms.size()*sizeof(int));
|
|
int *changed=(int *)malloc(atoms.size()*sizeof(int));
|
|
memset(changed, 1, atoms.size()*sizeof(int));
|
|
char *touched=(char *)malloc(atoms.size()*sizeof(char));
|
|
|
|
RDKit::Canon::CreateSinglePartition(atoms.size(),order,count,data);
|
|
RDKit::Canon::ActivatePartitions(atoms.size(),order,count,activeset,next,changed);
|
|
|
|
RDKit::Canon::RefinePartitions(*m,data,ftor,false,order,count,activeset,next,changed,touched);
|
|
// std::cerr<<"----------------------------------"<<std::endl;
|
|
// for(unsigned int i=0;i<m->getNumAtoms();++i){
|
|
// std::cerr<<order[i]<<" "<<" index: "<<atoms[order[i]].index<<" count: "<<count[order[i]]<<std::endl;
|
|
// }
|
|
|
|
ftor.df_useNbrs=true;
|
|
RDKit::Canon::ActivatePartitions(atoms.size(),order,count,activeset,next,changed);
|
|
RDKit::Canon::RefinePartitions(*m,data,ftor,true,order,count,activeset,next,changed,touched);
|
|
|
|
//std::cerr<<"----------------------------------"<<std::endl;
|
|
for(unsigned int i=0;i<m->getNumAtoms();++i){
|
|
//std::cerr<<order[i]<<" "<<" index: "<<atoms[order[i]].index<<" count: "<<count[order[i]]<<std::endl;
|
|
TEST_ASSERT(count[order[i]]==1);
|
|
if(i>0){
|
|
//std::cerr<<" ftor: "<<ftor(order[i],order[i-1])<<std::endl;
|
|
TEST_ASSERT(ftor(order[i],order[i-1])>=0);
|
|
}
|
|
}
|
|
delete m;
|
|
}
|
|
|
|
{
|
|
std::string smi="FC1C(CC)CCC1CC";
|
|
RWMol *m =SmilesToMol(smi);
|
|
TEST_ASSERT(m);
|
|
std::vector<Canon::canon_atom> atoms(m->getNumAtoms());
|
|
initCanonAtoms(*m,atoms,true);
|
|
atomcomparefunctor3 ftor(&atoms.front(),*m);
|
|
|
|
RDKit::Canon::canon_atom *data=&atoms.front();
|
|
int *count=(int *)malloc(atoms.size()*sizeof(int));
|
|
int *order=(int *)malloc(atoms.size()*sizeof(int));
|
|
int activeset;
|
|
int *next=(int *)malloc(atoms.size()*sizeof(int));
|
|
int *changed=(int *)malloc(atoms.size()*sizeof(int));
|
|
memset(changed, 1, atoms.size()*sizeof(int));
|
|
char *touched=(char *)malloc(atoms.size()*sizeof(char));
|
|
|
|
RDKit::Canon::CreateSinglePartition(atoms.size(),order,count,data);
|
|
RDKit::Canon::ActivatePartitions(atoms.size(),order,count,activeset,next,changed);
|
|
|
|
// std::cerr<<"----------------------------------"<<std::endl;
|
|
// for(unsigned int i=0;i<m->getNumAtoms();++i){
|
|
// std::cerr<<order[i]<<" "<<atoms[order[i]].invar<<" index: "<<atoms[order[i]].index<<std::endl;
|
|
// }
|
|
|
|
RDKit::Canon::RefinePartitions(*m,data,ftor,false,order,count,activeset,next,changed,touched);
|
|
|
|
//std::cerr<<"!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"<<std::endl;
|
|
ftor.df_useNbrs=true;
|
|
|
|
RDKit::Canon::ActivatePartitions(atoms.size(),order,count,activeset,next,changed);
|
|
RDKit::Canon::RefinePartitions(*m,data,ftor,true,order,count,activeset,next,changed,touched);
|
|
//std::cerr<<"----------------------------------"<<std::endl;
|
|
|
|
|
|
for(unsigned int i=0;i<m->getNumAtoms();++i){
|
|
//std::cerr<<order[i]<<" "<<" index: "<<atoms[order[i]].index<<" count: "<<count[order[i]]<<std::endl;
|
|
if(i>0){
|
|
//std::cerr<<" ftor: "<<ftor(order[i],order[i-1])<<std::endl;
|
|
TEST_ASSERT(ftor(order[i],order[i-1])>=0);
|
|
}
|
|
}
|
|
|
|
// here we can't manage to get everything unique
|
|
TEST_ASSERT(order[0]==4 && count[4]==2);
|
|
TEST_ASSERT(order[1]==9 && count[9]==0);
|
|
TEST_ASSERT(order[2]==0 && count[0]==1);
|
|
TEST_ASSERT(order[3]==3 && count[3]==2);
|
|
TEST_ASSERT(order[4]==8 && count[8]==0);
|
|
TEST_ASSERT(order[5]==5 && count[5]==2);
|
|
TEST_ASSERT(order[6]==6 && count[6]==0);
|
|
TEST_ASSERT(order[7]==2 && count[2]==2);
|
|
TEST_ASSERT(order[8]==7 && count[7]==0);
|
|
TEST_ASSERT(order[9]==1 && count[1]==1);
|
|
|
|
delete m;
|
|
}
|
|
|
|
BOOST_LOG(rdInfoLog) << "Done" << std::endl;
|
|
};
|
|
|
|
|
|
void test5(){
|
|
BOOST_LOG(rdInfoLog) << "testing canonicalization via tie breaking." << std::endl;
|
|
// canonicalization via tie breaking
|
|
{
|
|
std::string smi="FC1C(CC)CCC1CC";
|
|
RWMol *m =SmilesToMol(smi);
|
|
TEST_ASSERT(m);
|
|
std::vector<Canon::canon_atom> atoms(m->getNumAtoms());
|
|
initCanonAtoms(*m,atoms,true);
|
|
atomcomparefunctor3 ftor(&atoms.front(),*m);
|
|
|
|
RDKit::Canon::canon_atom *data=&atoms.front();
|
|
int *count=(int *)malloc(atoms.size()*sizeof(int));
|
|
int *order=(int *)malloc(atoms.size()*sizeof(int));
|
|
int activeset;
|
|
int *next=(int *)malloc(atoms.size()*sizeof(int));
|
|
int *changed=(int *)malloc(atoms.size()*sizeof(int));
|
|
memset(changed, 1, atoms.size()*sizeof(int));
|
|
char *touched=(char *)malloc(atoms.size()*sizeof(char));
|
|
|
|
RDKit::Canon::CreateSinglePartition(atoms.size(),order,count,data);
|
|
RDKit::Canon::ActivatePartitions(atoms.size(),order,count,activeset,next,changed);
|
|
|
|
// std::cerr<<"----------------------------------"<<std::endl;
|
|
// for(unsigned int i=0;i<m->getNumAtoms();++i){
|
|
// std::cerr<<order[i]<<" "<<atoms[order[i]].invar<<" index: "<<atoms[order[i]].index<<std::endl;
|
|
// }
|
|
|
|
RDKit::Canon::RefinePartitions(*m,data,ftor,false,order,count,activeset,next,changed,touched);
|
|
|
|
//std::cerr<<"!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"<<std::endl;
|
|
ftor.df_useNbrs=true;
|
|
RDKit::Canon::ActivatePartitions(atoms.size(),order,count,activeset,next,changed);
|
|
RDKit::Canon::RefinePartitions(*m,data,ftor,true,order,count,activeset,next,changed,touched);
|
|
|
|
// std::cerr<<"----------------------------------"<<std::endl;
|
|
// for(unsigned int i=0;i<m->getNumAtoms();++i){
|
|
// std::cerr<<order[i]<<" "<<" index: "<<atoms[order[i]].index<<" count: "<<count[order[i]]<<std::endl;
|
|
// }
|
|
|
|
// here we can't manage to get everything unique
|
|
TEST_ASSERT(order[0]==4 && count[4]==2);
|
|
TEST_ASSERT(order[1]==9 && count[9]==0);
|
|
TEST_ASSERT(order[2]==0 && count[0]==1);
|
|
TEST_ASSERT(order[3]==3 && count[3]==2);
|
|
TEST_ASSERT(order[4]==8 && count[8]==0);
|
|
TEST_ASSERT(order[5]==5 && count[5]==2);
|
|
TEST_ASSERT(order[6]==6 && count[6]==0);
|
|
TEST_ASSERT(order[7]==2 && count[2]==2);
|
|
TEST_ASSERT(order[8]==7 && count[7]==0);
|
|
TEST_ASSERT(order[9]==1 && count[1]==1);
|
|
|
|
RDKit::Canon::BreakTies(*m,data,ftor,true,order,count,activeset,next,changed,touched);
|
|
for(unsigned int i=0;i<m->getNumAtoms();++i){
|
|
//std::cerr<<order[i]<<" "<<" index: "<<atoms[order[i]].index<<" count: "<<count[order[i]]<<std::endl;
|
|
TEST_ASSERT(count[order[i]]==1);
|
|
}
|
|
delete m;
|
|
}
|
|
BOOST_LOG(rdInfoLog) << "Done" << std::endl;
|
|
};
|
|
|
|
|
|
void test6(){
|
|
BOOST_LOG(rdInfoLog) << "testing canonicalization using the wrapper." << std::endl;
|
|
// canonicalization using the wrapper
|
|
#if 1
|
|
{
|
|
std::string smi="FC1C(CC)CCC1CC";
|
|
RWMol *m =SmilesToMol(smi);
|
|
TEST_ASSERT(m);
|
|
|
|
std::vector<unsigned int> atomRanks;
|
|
RDKit::Canon::rankMolAtoms(*m,atomRanks);
|
|
boost::dynamic_bitset<> seen(m->getNumAtoms());
|
|
for(unsigned int i=0;i<m->getNumAtoms();++i){
|
|
TEST_ASSERT(!seen[atomRanks[i]]);
|
|
seen.set(atomRanks[i],1);
|
|
}
|
|
// std::copy(atomRanks.begin(),atomRanks.end(),std::ostream_iterator<unsigned int>(std::cerr," "));
|
|
// std::cerr<<std::endl;
|
|
TEST_ASSERT(atomRanks[0]==2);
|
|
TEST_ASSERT(atomRanks[1]==9);
|
|
TEST_ASSERT(atomRanks[2]==7);
|
|
TEST_ASSERT(atomRanks[3]==3);
|
|
TEST_ASSERT(atomRanks[4]==0);
|
|
TEST_ASSERT(atomRanks[5]==5);
|
|
TEST_ASSERT(atomRanks[6]==6);
|
|
TEST_ASSERT(atomRanks[7]==8);
|
|
TEST_ASSERT(atomRanks[8]==4);
|
|
TEST_ASSERT(atomRanks[9]==1);
|
|
delete m;
|
|
}
|
|
|
|
{
|
|
std::string smi="CC[C@@H]1CCC[C@@H](O1)C(=O)O";
|
|
RWMol *m =SmilesToMol(smi);
|
|
TEST_ASSERT(m);
|
|
|
|
std::vector<unsigned int> atomRanks;
|
|
RDKit::Canon::rankMolAtoms(*m,atomRanks);
|
|
boost::dynamic_bitset<> seen(m->getNumAtoms());
|
|
for(unsigned int i=0;i<m->getNumAtoms();++i){
|
|
// std::cerr<<i<<" "<<atomRanks[i]<<std::endl;
|
|
TEST_ASSERT(!seen[atomRanks[i]]);
|
|
seen.set(atomRanks[i],1);
|
|
}
|
|
|
|
// for(unsigned int ii=0;ii<atomRanks.size();++ii){
|
|
// std::cerr<<ii<<":"<<atomRanks[ii]<<std::endl;
|
|
// }
|
|
TEST_ASSERT(atomRanks[0]==0);
|
|
TEST_ASSERT(atomRanks[1]==4);
|
|
TEST_ASSERT(atomRanks[2]==9);
|
|
TEST_ASSERT(atomRanks[3]==5);
|
|
TEST_ASSERT(atomRanks[4]==3);
|
|
TEST_ASSERT(atomRanks[5]==6);
|
|
TEST_ASSERT(atomRanks[6]==10);
|
|
TEST_ASSERT(atomRanks[7]==7);
|
|
TEST_ASSERT(atomRanks[8]==8);
|
|
TEST_ASSERT(atomRanks[9]==1);
|
|
TEST_ASSERT(atomRanks[10]==2);
|
|
|
|
delete m;
|
|
}
|
|
|
|
{
|
|
std::string smi="N[C@@H](Cc1c[nH]c2ccccc12)C(=O)N[C@@H](CCCN=C(N)N)C(=O)N[C@@H](Cc3c[nH]c4ccccc34)C(=O)OCc5ccccc5";
|
|
RWMol *m =SmilesToMol(smi);
|
|
TEST_ASSERT(m);
|
|
|
|
std::vector<unsigned int> atomRanks;
|
|
RDKit::Canon::rankMolAtoms(*m,atomRanks);
|
|
boost::dynamic_bitset<> seen(m->getNumAtoms());
|
|
for(unsigned int i=0;i<m->getNumAtoms();++i){
|
|
// std::cerr<<i<<" "<<atomRanks[i]<<std::endl;
|
|
TEST_ASSERT(!seen[atomRanks[i]]);
|
|
seen.set(atomRanks[i],1);
|
|
}
|
|
|
|
// for(unsigned int ii=0;ii<atomRanks.size();++ii){
|
|
// std::cerr<<ii<<":"<<atomRanks[ii]<<std::endl;
|
|
// }
|
|
delete m;
|
|
}
|
|
#endif
|
|
{
|
|
std::string smi="BrC=C1CCC(C(=O)O1)c2cccc3ccccc23";
|
|
RWMol *m =SmilesToMol(smi);
|
|
TEST_ASSERT(m);
|
|
|
|
std::vector<unsigned int> atomRanks;
|
|
RDKit::Canon::rankMolAtoms(*m,atomRanks);
|
|
boost::dynamic_bitset<> seen(m->getNumAtoms());
|
|
for(unsigned int i=0;i<m->getNumAtoms();++i){
|
|
//std::cerr<<i<<" "<<atomRanks[i]<<std::endl;
|
|
TEST_ASSERT(!seen[atomRanks[i]]);
|
|
seen.set(atomRanks[i],1);
|
|
}
|
|
|
|
// for(unsigned int ii=0;ii<atomRanks.size();++ii){
|
|
// std::cerr<<ii<<":"<<atomRanks[ii]<<std::endl;
|
|
// }
|
|
delete m;
|
|
}
|
|
|
|
{
|
|
std::string smi="CC12CCCC1CCCC2";
|
|
RWMol *m =SmilesToMol(smi);
|
|
TEST_ASSERT(m);
|
|
|
|
// start w/o tie breaking here; we shouldn't need it.
|
|
std::vector<unsigned int> atomRanks;
|
|
RDKit::Canon::rankMolAtoms(*m,atomRanks,false);
|
|
boost::dynamic_bitset<> seen(m->getNumAtoms());
|
|
for(unsigned int i=0;i<m->getNumAtoms();++i){
|
|
// std::cerr<<" "<<i<<" "<<atomRanks[i]<<std::endl;
|
|
TEST_ASSERT(!seen[atomRanks[i]]);
|
|
seen.set(atomRanks[i],1);
|
|
}
|
|
delete m;
|
|
}
|
|
|
|
{
|
|
std::string smi="CC12CCCC1C1CCC3CC(O)CCC3(C)C1CC2";
|
|
RWMol *m =SmilesToMol(smi);
|
|
TEST_ASSERT(m);
|
|
|
|
// start w/o tie breaking here; we shouldn't need it.
|
|
std::vector<unsigned int> atomRanks;
|
|
RDKit::Canon::rankMolAtoms(*m,atomRanks,false);
|
|
boost::dynamic_bitset<> seen(m->getNumAtoms());
|
|
for(unsigned int i=0;i<m->getNumAtoms();++i){
|
|
TEST_ASSERT(!seen[atomRanks[i]]);
|
|
seen.set(atomRanks[i],1);
|
|
}
|
|
delete m;
|
|
}
|
|
|
|
|
|
BOOST_LOG(rdInfoLog) << "Done" << std::endl;
|
|
};
|
|
|
|
|
|
namespace{
|
|
|
|
ROMol* _renumber(const ROMol *m,std::vector<unsigned int>& nVect,std::string inSmiles){
|
|
ROMol *nm=MolOps::renumberAtoms(*m,nVect);
|
|
TEST_ASSERT(nm);
|
|
TEST_ASSERT(nm->getNumAtoms()==m->getNumAtoms());
|
|
TEST_ASSERT(nm->getNumBonds()==m->getNumBonds());
|
|
MolOps::assignStereochemistry(*nm,true,true);
|
|
for(unsigned int ii=0;ii<nm->getNumAtoms();++ii){
|
|
if(nm->getAtomWithIdx(ii)->hasProp("_CIPCode")){
|
|
TEST_ASSERT(m->getAtomWithIdx(nVect[ii])->hasProp("_CIPCode"));
|
|
std::string ocip=m->getAtomWithIdx(nVect[ii])->getProp<std::string>("_CIPCode");
|
|
std::string ncip=nm->getAtomWithIdx(ii)->getProp<std::string>("_CIPCode");
|
|
if(ocip!=ncip){
|
|
std::cerr<<" cip mismatch: "<<inSmiles<<std::endl;
|
|
std::cerr<<" "<<nVect[ii]<<": "<<ocip<<" -> "<<ii<<": "<<ncip<<std::endl;
|
|
std::cerr<<" "<<MolToSmiles(*nm,true)<<std::endl;
|
|
}
|
|
TEST_ASSERT(ocip==ncip);
|
|
}
|
|
}
|
|
return nm;
|
|
}
|
|
|
|
void _renumberTest(const ROMol *m,std::string inSmiles,unsigned int numRenumbers){
|
|
PRECONDITION(m,"no molecule");
|
|
// std::cerr<<">>>>>>>>>>>>>>>>>>>>>>>>>>>"<<std::endl;
|
|
std::string osmi=MolToSmiles(*m,true);
|
|
std::vector<unsigned int> idxV(m->getNumAtoms());
|
|
for(unsigned int i=0;i<m->getNumAtoms();++i) idxV[i]=i;
|
|
|
|
std::srand(0xF00D);
|
|
for(unsigned int i=0;i<numRenumbers;++i){
|
|
// std::cerr<<"---------------------------------------------------"<<std::endl;
|
|
std::vector<unsigned int> nVect(idxV);
|
|
std::random_shuffle(nVect.begin(),nVect.end());
|
|
// for(unsigned int j=0;j<m->getNumAtoms();++j){
|
|
// std::cerr<<"Renumber: "<<nVect[j]<<"->"<<j<<std::endl;
|
|
// }
|
|
ROMol *nm= _renumber(m,nVect,inSmiles);
|
|
|
|
std::string smi=MolToSmiles(*nm,true);
|
|
if(smi!=osmi){
|
|
std::cerr<<" input: "<<inSmiles<<", Renumbering round: " << i <<std::endl;
|
|
std::cerr<<osmi<<std::endl;
|
|
std::cerr<<smi<<std::endl;
|
|
m->setProp("_Name","orig");
|
|
std::cerr<<MolToMolBlock(*m)<<std::endl;
|
|
nm->setProp("_Name","renumber");
|
|
std::cerr<<MolToMolBlock(*nm)<<std::endl;
|
|
for(unsigned int j=0;j<m->getNumAtoms();++j){
|
|
std::cerr<<"Renumber: "<<nVect[j]<<"->"<<j<<std::endl;
|
|
}
|
|
}
|
|
TEST_ASSERT(smi==osmi);
|
|
delete nm;
|
|
}
|
|
}
|
|
|
|
void _renumberTest2(const ROMol *m,std::string inSmiles,unsigned int numRenumbers){
|
|
PRECONDITION(m,"no molecule");
|
|
|
|
unsigned int nAtoms=m->getNumAtoms();
|
|
std::vector<unsigned int> idxV(m->getNumAtoms());
|
|
for(unsigned int i=0;i<m->getNumAtoms();++i) idxV[i]=i;
|
|
|
|
std::srand(0xF00D);
|
|
for(unsigned int i=0;i<numRenumbers;++i){
|
|
std::vector<unsigned int> nVect(idxV);
|
|
std::random_shuffle(nVect.begin(),nVect.end());
|
|
|
|
ROMol *nm= _renumber(m,nVect,inSmiles);
|
|
|
|
UINT_VECT ranks(nAtoms);
|
|
Canon::rankMolAtoms(*nm,ranks,true);
|
|
char *ranksSet=(char *)malloc(nAtoms*sizeof(char));
|
|
memset(ranksSet,0,nAtoms*sizeof(char));
|
|
for(unsigned int i=0;i<ranks.size();i++){
|
|
ranksSet[ranks[i]]=1;
|
|
}
|
|
for(unsigned int i=0;i<nAtoms;i++){
|
|
if(ranksSet[i]!=1){
|
|
std::cerr << "Molecule has non unique ranks: " << MolToSmiles(*nm,true) << ", Renumbering round: " << i <<std::endl;
|
|
for(unsigned int i=0;i<nAtoms;i++){
|
|
std::cerr << "AtomIdx: " << i << " Rank: " << ranks[i] << std::endl;
|
|
}
|
|
}
|
|
TEST_ASSERT(ranksSet[i]==1);
|
|
}
|
|
delete nm;
|
|
}
|
|
}
|
|
}
|
|
|
|
void test7a() {
|
|
BOOST_LOG(rdInfoLog) << "testing some specific ordering problems" << std::endl;
|
|
std::string rdbase = getenv("RDBASE");
|
|
std::string smi1,smi2;
|
|
{
|
|
std::string fName = rdbase+"/Code/GraphMol/test_data/canon_reorder1.mol";
|
|
RWMol *m = MolFileToMol(fName,false,false);
|
|
TEST_ASSERT(m);
|
|
MolOps::sanitizeMol(*m);
|
|
std::vector<unsigned int> atomRanks;
|
|
// std::cerr <<"\n\n\n\n\n\n\n\n\n\n\n\n>--------------" << std::endl;
|
|
RDKit::Canon::rankMolAtoms(*m,atomRanks,false);
|
|
// std::cerr <<"---------------" << std::endl;
|
|
// for(unsigned int i=0;i<m->getNumAtoms();++i){
|
|
// std::cerr<<" "<<i+1<<" "<<atomRanks[i]<<std::endl;
|
|
// }
|
|
// std::cerr <<"---------------" << std::endl;
|
|
smi1=MolToSmiles(*m,true);
|
|
delete m;
|
|
}
|
|
{
|
|
std::string fName = rdbase+"/Code/GraphMol/test_data/canon_reorder2.mol";
|
|
RWMol *m = MolFileToMol(fName,false,false);
|
|
TEST_ASSERT(m);
|
|
MolOps::sanitizeMol(*m);
|
|
std::vector<unsigned int> atomRanks;
|
|
// std::cerr <<">--------------" << std::endl;
|
|
RDKit::Canon::rankMolAtoms(*m,atomRanks,false);
|
|
// std::cerr <<"---------------" << std::endl;
|
|
// for(unsigned int i=0;i<m->getNumAtoms();++i){
|
|
// std::cerr<<" "<<i+1<<" "<<atomRanks[i]<<std::endl;
|
|
// }
|
|
// std::cerr <<"---------------" << std::endl;
|
|
smi2=MolToSmiles(*m,true);
|
|
delete m;
|
|
}
|
|
if(smi1!=smi2){
|
|
std::cerr<<smi1<<"\n"<<smi2<<std::endl;
|
|
}
|
|
TEST_ASSERT(smi1==smi2);
|
|
BOOST_LOG(rdInfoLog) << "Finished" << std::endl;
|
|
}
|
|
|
|
std::string smis[]={
|
|
"C[C@@H]1CCC[C@H](C)[C@H]1C",
|
|
"N[C@@]1(C[C@H]([18F])C1)C(=O)O",
|
|
"CC12CCCC1C1CCC3CC(O)CCC3(C)C1CC2",
|
|
"CC(C)CCCC[C@@H]1C[C@H](/C=C/[C@]2(C)CC[C@H](O)CC2)[C@@H](O)[C@H]1O",
|
|
"C[C@@]12CCC[C@H]1[C@@H]1CC[C@H]3C[C@@H](O)CC[C@]3(C)[C@H]1CC2",
|
|
"CCCN[C@H]1CC[C@H](NC)CC1",
|
|
"O=S(=O)(NC[C@H]1CC[C@H](CNCc2ccc3ccccc3c2)CC1)c1ccc2ccccc2c1",
|
|
"CC(C)[C@H]1CC[C@H](C(=O)N[C@H](Cc2ccccc2)C(=O)O)CC1",
|
|
"O=[N+]([O-])c1ccccc1S(=O)(=O)NC[C@H]1CC[C@H](CNCC2Cc3ccccc3CC2)CC1",
|
|
"Oc1ccc2c(Cc3ccc(OCCN4CCCCC4)cc3)c([C@H]3CC[C@H](O)CC3)sc2c1",
|
|
"O=C(c1ccc(OCCN2CCCCC2)cc1)c1c2ccc(O)cc2sc1[C@H]1CC[C@H](O)CC1",
|
|
"N#Cc1ccc2c(c1)CCN(CC[C@@H]1CC[C@@H](NC(=O)c3ccnc4ccccc34)CC1)C2",
|
|
"COCCOC[C@H](CC1(C(=O)N[C@H]2CC[C@@H](C(=O)O)CC2)CCCC1)C(=O)O",
|
|
"c1ccc(CN[C@H]2CC[C@H](Nc3ccc4[nH]ncc4c3)CC2)cc1",
|
|
"CCC1=C(C)CN(C(=O)NCCc2ccc(S(=O)(=O)NC(=O)N[C@H]3CC[C@H](C)CC3)cc2)C1=O",
|
|
"C[C@H]1C[C@H](C1)N1CCC1",
|
|
"C[C@H]1C[C@H](C1)N1CCN(C)CC1",
|
|
"CN1CCN(CC1)[C@H]1C[C@H](C1)c1ncc2c(N)nccn12",
|
|
"CN1CCN(CC1)[C@H]1C[C@H](C1)c1nc(-c2ccc3ccc(nc3c2)-c2ccccc2)c2c(N)nccn12",
|
|
"C12C3C1C3C4C5C4C52",
|
|
"N[C@H]1C2CC3CC1C[C@](O)(C3)C2",
|
|
"O=C(CN1CCN(c2ccc(C(F)(F)F)cn2)CC1)N[C@H]1C2CC3CC1C[C@](O)(C3)C2",
|
|
"COc1cc([C@H]2[C@H](C)[C@H](C)[C@H]2c2ccc(O)c(OC)c2)ccc1O",
|
|
"N[C@@H]1[C@H]2CNC[C@@H]12",
|
|
"N[C@@H]1[C@H]2CN(c3nc4c(cc3F)c(=O)c(C(=O)O)cn4C3CC3)C[C@@H]12",
|
|
// some examples that came up while doing a torture test in ZINC
|
|
"CN1CCCNCCN(C)CCC[NH2+]CC1",
|
|
"CN1CCC[NH2+]CCN(C)CCC[NH+](C)CC1",
|
|
"O=P([O-])([O-])C[NH+]1CCCN(CP(=O)([O-])O)CC[NH+](CP(=O)([O-])[O-])CCC[NH+](CP(=O)([O-])[O-])CC1",
|
|
"O=C(CCNC(=O)Cn1cnc2ccccc2c1=O)NCCc1c[nH]c2ccccc12",
|
|
"C1CNCC[NH2+]CC(C2CNCC[NH2+]CCC[NH2+]CCNC2)CNCC[NH2+]C1",
|
|
// C60
|
|
"C12=C3C4=C5C6=C1C7=C8C9=C1C%10=C%11C(=C29)C3=C2C3=C4C4=C5C5=C9C6=C7C6=C7C8=C1C1=C8C%10=C%10C%11=C2C2=C3C3=C4C4=C5C5=C%11C%12=C(C6=C95)C7=C1C1=C%12C5=C%11C4=C3C3=C5C(=C81)C%10=C23",
|
|
// C70
|
|
"C12=C3C4=C5C6=C7C8=C9C%10=C%11C%12=C%13C%10=C%10C8=C5C1=C%10C1=C%13C5=C8C1=C2C1=C3C2=C3C%10=C%13C%14=C3C1=C8C1=C3C5=C%12C5=C8C%11=C%11C9=C7C7=C9C6=C4C2=C2C%10=C4C(=C29)C2=C6C(=C8C8=C9C6=C4C%13=C9C(=C%141)C3=C85)C%11=C27",
|
|
//Bernd's example1
|
|
"C12C3C4C3C3C1C2C43",
|
|
//Bernd's example2
|
|
"C12C3C1C1C4C5C4C4C6C(C6C3C3C1C43)C25",
|
|
//doubled house
|
|
"C12C3C45C67C8C9C66C14C21C35C78C961",
|
|
"C12C3C45C6C7C11C27C41C356",
|
|
//Problematic round-tripping
|
|
"COC(=O)/C=C/C(C)=C/C=C/C(C)=C/C=C/C=C(/C)/C=C/C=C(\\C)/C=C/C(=O)[O-]",
|
|
"COC(=O)/C=C/C(C)=C/C=C/C(C)=C/C=C/C=C(\\C)/C=C/C=C(\\C)/C=C/C(=O)[O-]",
|
|
"c1cc2ccc(ccc3ccc1cc3)cc2",
|
|
"C13C6C1C2C4C2C3C5C4C56",
|
|
"C45C1C6C3C6C5C4C2C3C12",
|
|
"C45C2C6C3C6C5C4C1C3C12",
|
|
"Cl[C@H]1[C@@H](Cl)[C@H](Cl)[C@@H](Cl)[C@H](Cl)[C@@H]1Cl",
|
|
"N[C@]1(C(=O)O)C[C@H](n2oc(=O)[nH]c2=O)C1",
|
|
"CC1CC(C)CC(C)C1",
|
|
"C[C@H]1C[C@@H](C)C[C@H](C)C1",
|
|
"C[C@H]1C[C@@H](C)C[C@@H](C)C1",
|
|
//Stereochemistry in large rings
|
|
"C1[C@@H](C(C)=O)[C@H]2[C@H](C(C)=C1)[C@@H]1O[C@H]2[C@@H](O)C/C=C\\CC1",
|
|
//Chembl 20 examples
|
|
"COC(=O)CC[C@]12C[C@]13CC[C@]1(C)[C@@H]4[C@@H](C[C@@]1(C)[C@@H]3CC[C@H]2C(C)(C)O)O[C@@]1(C[C@@H](C)C(=O)O1)C[C@H]4C",
|
|
"C/C=C/[C@H](O)C(C)(C)[C@@H]1CC=CC=CC=C[C@H](OC)Cc2nc(co2)C(=O)O[C@H](C(C)(C)[C@@H](O)/C=C/C)CC=C[C@H]2O[C@H]2C=CC=Cc2nc(co2)C(=O)O1",
|
|
"N[C@]1(C(=O)O)C[C@@H](n2oc(=O)[nH]c2=O)C1",
|
|
"COc1ccc([C@H]2C3(CO)C4[N@](C)C5C2(CO)C2[N@](C)C3C4(CO)[C@H](c3ccc(OC)cc3)C52CO)cc1",
|
|
"NCCNC(=O)[C@]1(O)C[C@@H](O)[C@H](O)[C@H](O)C1",
|
|
//CIP renumber problems
|
|
// "O=P(O)(O)O[C@@H]1[C@H](OP(=O)(O)O)[C@@H](OP(=O)(O)O)[C@H](OP(=O)(O)O)[C@@H](OP(=O)(O)O[32P](=O)(O)O)[C@@H]1OP(=O)(O)O",
|
|
// "O=C(NCCO[C@H]1[C@@H](O)[C@H](OP(=O)(O)O)[C@@H](OP(=O)(O)O)[C@H](O)[C@H]1OP(=O)(O)O)NCCO[C@H]1[C@@H](O)[C@H](OP(=O)(O)O)[C@@H](O[PH](O)(O)O)[C@H](O)[C@H]1OP(=O)(O)O",
|
|
// "O=C(NCCO[C@@H]1[C@H](OP(=O)(O)O)[C@@H](OP(=O)(O)O)[C@H](O[PH](O)(O)O)[C@@H](OP(=O)(O)O)[C@@H]1OP(=O)(O)O)NCCO[C@H]1[C@@H](OP(=O)(O)O)[C@H](O)[C@@H](OP(=O)(O)O)[C@H](OP(=O)(O)O)[C@H]1O",
|
|
// "O=C(O)[C@H]1[C@@H](C(=O)O)[C@@H](C(=O)O)[C@@H]1C(=O)O",
|
|
"C[C@H]1[C@H](C)[C@@H](C)[C@H]1C",
|
|
"COc1cc([C@H]2[C@@](NC(=O)c3ccc(NC(=O)C4CCCC4)cc3)(C(=O)O)[C@@H](c3ccc(OC(=O)c4cccs4)c(OC)c3)[C@]2(NC(=O)c2ccc(NC(=O)C3CCCC3)cc2)C(=O)O)ccc1OC(=O)c1cccs1",
|
|
"COc1cc([C@H]2[C@@](NC(=O)c3ccc(NC(=O)OC(C)(C)C)cc3)(C(=O)O)[C@@H](c3ccc(OC(=O)c4cccs4)c(OC)c3)[C@]2(NC(=O)c2ccc(NC(=O)OC(C)(C)C)cc2)C(=O)O)ccc1OC(=O)c1cccs1",
|
|
"COc1cc([C@H]2[C@](NC(=O)c3ccc(NC(=O)OC(C)(C)C)cc3)(C(=O)O)[C@H](c3ccc(OC(=O)c4cccs4)c(OC)c3)[C@]2(NC(=O)c2ccc(NC(=O)OC(C)(C)C)cc2)C(=O)O)ccc1OC(=O)c1cccs1",
|
|
"CCC[C@H]1CC[C@H]([C@H]2CC[C@H](OC(=O)[C@H]3[C@@H](c4ccc(O)cc4)[C@H](C(=O)O[C@H]4CC[C@H]([C@H]5CC[C@H](CCC)CC5)CC4)[C@@H]3c3ccc(O)cc3)CC2)CC1",
|
|
//test molecules with atom-mapping numbers
|
|
"[O:1]=[C:2]([CH2:3][C:4]1=[CH:5][CH:6]=[CH:7][CH:8]=[CH:9]1)[NH2:10]",
|
|
//chembl molecules with multiple fragments
|
|
//CHEMBL439119
|
|
"CCCCCCC1C23C4=c5c6c7c8c9c%10c%11c%12c%13c%14c%15c%16c%17c%18c%19c%20c%21c%22c%23c(c5c5c6c6c8c%11c8c%11c%12c%15c%12c(c%20%16)c%21c%15c%23c5c(c68)c%15c%12%11)C2(C[N+]1(C)C)C%22C%19c1c-%18c2c5c(c13)C4C7C9=C5C1(C2C%17%14)C(CCCCCC)[N+](C)(C)CC%10%131.[I-].[I-]",
|
|
//CHEMBL1203199
|
|
"C[C@H](NC(=O)[C@H]1Cc2c(sc3ccccc23)CN1)c1ccccc1.Cl",
|
|
//CHEMBL501667
|
|
"CCn1c2ccc3cc2c2cc(ccc21)C(=O)c1ccc(cc1)Cn1cc[n+](c1)Cc1ccc(cc1)-c1cccc(-c2ccc(cc2)C[n+]2ccn(c2)Cc2ccc(cc2)C3=O)c1C(=O)O.[Br-].[Br-]",
|
|
//CHEMBL12438
|
|
"CCCCCCCCCCCCCCCCCCNC(=O)OC[C@H]1C[C@H]([C@@H2]OC(=O)N(Cc2cccc[n+]2CC)C(C)=O)C1.[I-]",
|
|
//CHEMBL1172371
|
|
"CC.CCCCCCCCCC(C(=O)NCCc1ccc(OP(=S)(Oc2ccc(CCNC(=O)C(CCCCCCCCC)P(=O)([O-])O)cc2)N(C)/N=C/c2ccc(OP3(Oc4ccc(/C=N/N(C)P(=S)(Oc5ccc(CCNC(=O)C(CCCCCCCCC)P(=O)([O-])O)cc5)Oc5ccc(CCNC(=O)C(CCCCCCCCC)P(=O)([O-])O)cc5)cc4)=NP(Oc4ccc(/C=N/N(C)P(=S)(Oc5ccc(CCNC(=O)C(CCCCCCCCC)P(=O)([O-])O)cc5)Oc5ccc(CCNC(=O)C(CCCCCCCCC)P(=O)([O-])O)cc5)cc4)(Oc4ccc(/C=N/N(C)P(=S)(Oc5ccc(CCNC(=O)C(CCCCCCCCC)P(=O)([O-])O)cc5)Oc5ccc(CCNC(=O)C(CCCCCCCCC)P(=O)([O-])O)cc5)cc4)=NP(Oc4ccc(/C=N/N(C)P(=S)(Oc5ccc(CCNC(=O)C(CCCCCCCCC)P(=O)([O-])O)cc5)Oc5ccc(CCNC(=O)C(CCCCCCCCC)P(=O)([O-])O)cc5)cc4)(Oc4ccc(/C=N/N(C)P(=S)(Oc5ccc(CCNC(=O)C(CCCCCCCCC)P(=O)([O-])O)cc5)Oc5ccc(CCNC(=O)C(CCCCCCCCC)P(=O)([O-])O)cc5)cc4)=N3)cc2)cc1)P(=O)([O-])O.CCCCCCCCCCCCCCCC[NH2+]OC(CO)C(O)C(OC1OC(CO)C(O)C(O)C1O)C(O)CO.CCCCCCCCCCCCCCCC[NH2+]OC(CO)C(O)C(OC1OC(CO)C(O)C(O)C1O)C(O)CO.CCCCCCCCCCCCCCCC[NH2+]OC(CO)C(O)C(OC1OC(CO)C(O)C(O)C1O)C(O)CO.CCCCCCCCCCCCCCCC[NH2+]OC(CO)C(O)C(OC1OC(CO)C(O)C(O)C1O)C(O)CO.CCCCCCCCCCCCCCCC[NH2+]OC(CO)C(O)C(OC1OC(CO)C(O)C(O)C1O)C(O)CO.CCCCCCCCCCCCCCCC[NH2+]OC(CO)C(O)C(OC1OC(CO)C(O)C(O)C1O)C(O)CO.CCCCCCCCCCCCCCCC[NH2+]OC(CO)C(O)C(OC1OC(CO)C(O)C(O)C1O)C(O)CO.CCCCCCCCCCCCCCCC[NH2+]OC(CO)C(O)C(OC1OC(CO)C(O)C(O)C1O)C(O)CO.CCCCCCCCCCCCCCCC[NH2+]OC(CO)C(O)C(OC1OC(CO)C(O)C(O)C1O)C(O)CO.CCCCCCCCCCCCCCCC[NH2+]OC(CO)C(O)C(OC1OC(CO)C(O)C(O)C1O)C(O)CO.CCCCCCCCCCCCCCCC[NH2+]OC(CO)C(O)C(OC1OC(CO)C(O)C(O)C1O)C(O)CO.CCCCCCCCCCCCCCCC[NH2+]OC(CO)C(O)C(OC1OC(CO)C(O)C(O)C1O)C(O)CO",
|
|
"EOS"
|
|
};
|
|
|
|
void test7(){
|
|
BOOST_LOG(rdInfoLog) << "testing stability w.r.t. renumbering." << std::endl;
|
|
unsigned int i=0;
|
|
while(smis[i]!="EOS"){
|
|
std::string smiles=smis[i++];
|
|
ROMol *m = SmilesToMol(smiles);
|
|
TEST_ASSERT(m);
|
|
MolOps::assignStereochemistry(*m,true);
|
|
_renumberTest(m,smiles,500);
|
|
delete m;
|
|
}
|
|
BOOST_LOG(rdInfoLog) << "Finished" << std::endl;
|
|
}
|
|
|
|
std::string molbl1 =
|
|
"CHEMBL1950780 \n"
|
|
" RDKit 2D \n"
|
|
" \n"
|
|
" 12 12 0 0 0 0 0 0 0 0999 V2000 \n"
|
|
" 16.2083 -6.1750 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n"
|
|
" 16.2083 -7.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n"
|
|
" 16.9204 -7.4083 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n"
|
|
" 17.6324 -7.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n"
|
|
" 17.6324 -6.1750 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n"
|
|
" 16.9204 -5.7583 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n"
|
|
" 16.9204 -4.9333 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n"
|
|
" 18.3480 -5.7646 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n"
|
|
" 18.3462 -7.4135 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n"
|
|
" 16.9204 -8.2333 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n"
|
|
" 15.4945 -7.4135 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n"
|
|
" 15.4927 -5.7646 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n"
|
|
" 5 6 1 0 \n"
|
|
" 6 7 1 6 \n"
|
|
" 1 2 1 0 \n"
|
|
" 5 8 1 6 \n"
|
|
" 1 6 1 0 \n"
|
|
" 4 9 1 6 \n"
|
|
" 2 3 1 0 \n"
|
|
" 3 10 1 1 \n"
|
|
" 3 4 1 0 \n"
|
|
" 2 11 1 6 \n"
|
|
" 4 5 1 0 \n"
|
|
" 1 12 1 6 \n"
|
|
"M END";
|
|
|
|
std::string molbl2 =
|
|
"CHEMBL1874247 \n"
|
|
" RDKit 2D \n"
|
|
" \n"
|
|
" 12 12 0 0 0 0 0 0 0 0999 V2000 \n"
|
|
" 0.0000 1.6500 0.0000 Cl 0 0 0 0 0 0 0 0 0 0 0 0\n"
|
|
" 1.4289 0.8250 0.0000 Cl 0 0 0 0 0 0 0 0 0 0 0 0\n"
|
|
" -1.4289 0.8250 0.0000 Cl 0 0 0 0 0 0 0 0 0 0 0 0\n"
|
|
" 1.4289 -0.8250 0.0000 Cl 0 0 0 0 0 0 0 0 0 0 0 0\n"
|
|
" -1.4289 -0.8250 0.0000 Cl 0 0 0 0 0 0 0 0 0 0 0 0\n"
|
|
" 0.0000 -1.6500 0.0000 Cl 0 0 0 0 0 0 0 0 0 0 0 0\n"
|
|
" 0.0000 0.8250 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n"
|
|
" 0.7145 0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n"
|
|
" -0.7145 0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n"
|
|
" 0.7145 -0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n"
|
|
" -0.7145 -0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n"
|
|
" 0.0000 -0.8250 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n"
|
|
" 7 1 1 1 \n"
|
|
" 8 2 1 1 \n"
|
|
" 9 3 1 1 \n"
|
|
" 10 4 1 6 \n"
|
|
" 11 5 1 6 \n"
|
|
" 12 6 1 1 \n"
|
|
" 7 8 1 0 \n"
|
|
" 7 9 1 0 \n"
|
|
" 8 10 1 0 \n"
|
|
" 9 11 1 0 \n"
|
|
" 10 12 1 0 \n"
|
|
" 11 12 1 0 \n"
|
|
"M END";
|
|
|
|
|
|
void test8(){
|
|
BOOST_LOG(rdInfoLog) << "testing smiles round-tripping." << std::endl;
|
|
std::string rdbase = getenv("RDBASE");
|
|
{
|
|
std::string fName = rdbase+"/Code/GraphMol/test_data/iChi1b.mol";
|
|
RWMol *m = MolFileToMol(fName);
|
|
TEST_ASSERT(m);
|
|
std::string smi1=MolToSmiles(*m,true);
|
|
delete m;
|
|
m = SmilesToMol(smi1);
|
|
TEST_ASSERT(m);
|
|
std::string smi2=MolToSmiles(*m,true);
|
|
if(smi1!=smi2) std::cerr<<smi1<<"\n"<<smi2<<std::endl;
|
|
TEST_ASSERT(smi1==smi2);
|
|
delete m;
|
|
}
|
|
|
|
{
|
|
unsigned int i=0;
|
|
while(smis[i]!="EOS"){
|
|
std::string smiles=smis[i++];
|
|
ROMol *m = SmilesToMol(smiles);
|
|
TEST_ASSERT(m);
|
|
// std::cerr<<"MolToSMILES 1"<<std::endl;
|
|
std::string smi1=MolToSmiles(*m, true);
|
|
delete m;
|
|
|
|
m = SmilesToMol(smi1);
|
|
TEST_ASSERT(m);
|
|
// std::cerr<<"MolToSMILES 2"<<std::endl;
|
|
std::string smi2=MolToSmiles(*m, true);
|
|
delete m;
|
|
if(smi1!=smi2){
|
|
std::cerr<<"Input smiles: "<<smiles<<"\n1. Iter: "<<smi1<<"\n2. Iter: "<<smi2<<std::endl;
|
|
}
|
|
TEST_ASSERT(smi1==smi2);
|
|
}
|
|
{
|
|
ROMol *m = MolBlockToMol(molbl1);
|
|
TEST_ASSERT(m);
|
|
std::string smiles=MolToSmiles(*m, true);
|
|
delete m;
|
|
|
|
m = SmilesToMol(smiles);
|
|
TEST_ASSERT(m);
|
|
// std::cerr<<"MolToSMILES 1"<<std::endl;
|
|
std::string smi1=MolToSmiles(*m, true);
|
|
delete m;
|
|
|
|
if(smiles!=smi1){
|
|
std::cerr<<smiles<<"\n"<<smi1<<std::endl;
|
|
}
|
|
TEST_ASSERT(smiles==smi1);
|
|
}
|
|
{
|
|
ROMol *m = MolBlockToMol(molbl2);
|
|
TEST_ASSERT(m);
|
|
std::string smiles=MolToSmiles(*m, true);
|
|
delete m;
|
|
|
|
m = SmilesToMol(smiles);
|
|
TEST_ASSERT(m);
|
|
// std::cerr<<"MolToSMILES 1"<<std::endl;
|
|
std::string smi1=MolToSmiles(*m, true);
|
|
delete m;
|
|
|
|
if(smiles!=smi1){
|
|
std::cerr<<smiles<<"\n"<<smi1<<std::endl;
|
|
}
|
|
TEST_ASSERT(smiles==smi1);
|
|
}
|
|
}
|
|
|
|
BOOST_LOG(rdInfoLog) << "Finished" << std::endl;
|
|
}
|
|
|
|
void test9(){
|
|
BOOST_LOG(rdInfoLog) << "testing chiral invariants." << std::endl;
|
|
std::string rdbase = getenv("RDBASE");
|
|
|
|
{
|
|
std::string smi="C[C@](F)(Cl)I";
|
|
RWMol *m =SmilesToMol(smi,0,0);
|
|
TEST_ASSERT(m);
|
|
MolOps::sanitizeMol(*m);
|
|
std::vector<unsigned int> atomRanks;
|
|
//std::cerr<<smi<<std::endl;
|
|
RDKit::Canon::chiralRankMolAtoms(*m,atomRanks);
|
|
//std::copy(atomRanks.begin(),atomRanks.end(),std::ostream_iterator<unsigned int>(std::cerr," "));
|
|
//std::cerr<<std::endl;
|
|
TEST_ASSERT(atomRanks[0]<atomRanks[2]);
|
|
TEST_ASSERT(atomRanks[0]<atomRanks[3]);
|
|
TEST_ASSERT(atomRanks[0]<atomRanks[4]);
|
|
TEST_ASSERT(atomRanks[2]<atomRanks[3]);
|
|
TEST_ASSERT(atomRanks[2]<atomRanks[4]);
|
|
TEST_ASSERT(atomRanks[3]<atomRanks[4]);
|
|
}
|
|
|
|
{
|
|
std::string smi="CC[C@](F)(Cl)C=C";
|
|
RWMol *m =SmilesToMol(smi,0,0);
|
|
TEST_ASSERT(m);
|
|
MolOps::sanitizeMol(*m);
|
|
std::vector<unsigned int> atomRanks;
|
|
//std::cerr<<smi<<std::endl;
|
|
RDKit::Canon::chiralRankMolAtoms(*m,atomRanks);
|
|
//std::copy(atomRanks.begin(),atomRanks.end(),std::ostream_iterator<unsigned int>(std::cerr," "));
|
|
//std::cerr<<std::endl;
|
|
TEST_ASSERT(atomRanks[1]<atomRanks[3]);
|
|
TEST_ASSERT(atomRanks[1]<atomRanks[4]);
|
|
TEST_ASSERT(atomRanks[1]<atomRanks[5]);
|
|
TEST_ASSERT(atomRanks[3]<atomRanks[4]);
|
|
TEST_ASSERT(atomRanks[4]>atomRanks[5]);
|
|
TEST_ASSERT(atomRanks[4]>atomRanks[5]);
|
|
}
|
|
|
|
{
|
|
// make sure we aren't breaking ties
|
|
std::string smi="C[C@](C)(Cl)I";
|
|
RWMol *m =SmilesToMol(smi,0,0);
|
|
TEST_ASSERT(m);
|
|
MolOps::sanitizeMol(*m);
|
|
std::vector<unsigned int> atomRanks;
|
|
//std::cerr<<smi<<std::endl;
|
|
RDKit::Canon::chiralRankMolAtoms(*m,atomRanks);
|
|
//std::copy(atomRanks.begin(),atomRanks.end(),std::ostream_iterator<unsigned int>(std::cerr," "));
|
|
//std::cerr<<std::endl;
|
|
TEST_ASSERT(atomRanks[0]==atomRanks[2]);
|
|
TEST_ASSERT(atomRanks[0]<atomRanks[3]);
|
|
TEST_ASSERT(atomRanks[0]<atomRanks[4]);
|
|
TEST_ASSERT(atomRanks[2]<atomRanks[3]);
|
|
TEST_ASSERT(atomRanks[2]<atomRanks[4]);
|
|
TEST_ASSERT(atomRanks[3]<atomRanks[4]);
|
|
}
|
|
|
|
{
|
|
std::string smi="N[C@H]1C2CC3CC1C[C@](O)(C3)C2";
|
|
RWMol *m =SmilesToMol(smi,0,0);
|
|
TEST_ASSERT(m);
|
|
MolOps::sanitizeMol(*m);
|
|
std::vector<unsigned int> atomRanks;
|
|
//std::cerr<<smi<<std::endl;
|
|
RDKit::Canon::chiralRankMolAtoms(*m,atomRanks);
|
|
//std::copy(atomRanks.begin(),atomRanks.end(),std::ostream_iterator<unsigned int>(std::cerr," "));
|
|
//std::cerr<<std::endl;
|
|
TEST_ASSERT(atomRanks[0]>atomRanks[1]);
|
|
TEST_ASSERT(atomRanks[0]<atomRanks[9]);
|
|
TEST_ASSERT(atomRanks[2]==atomRanks[6]);
|
|
TEST_ASSERT(atomRanks[7]==atomRanks[11]);
|
|
TEST_ASSERT(atomRanks[3]==atomRanks[5]);
|
|
TEST_ASSERT(atomRanks[2]>atomRanks[3]);
|
|
TEST_ASSERT(atomRanks[2]>atomRanks[11]);
|
|
TEST_ASSERT(atomRanks[3]<atomRanks[11]);
|
|
}
|
|
|
|
{
|
|
// this one was a chiral ranking problem
|
|
std::string smi="COC(C)CC(C)(C)O";
|
|
RWMol *m =SmilesToMol(smi,0,0);
|
|
TEST_ASSERT(m);
|
|
MolOps::sanitizeMol(*m);
|
|
std::vector<unsigned int> atomRanks;
|
|
//std::cerr<<smi<<std::endl;
|
|
RDKit::Canon::chiralRankMolAtoms(*m,atomRanks);
|
|
//std::copy(atomRanks.begin(),atomRanks.end(),std::ostream_iterator<unsigned int>(std::cerr," "));
|
|
//std::cerr<<std::endl;
|
|
TEST_ASSERT(atomRanks[1]>atomRanks[8]);
|
|
TEST_ASSERT(atomRanks[5]>atomRanks[2]);
|
|
}
|
|
|
|
{
|
|
// are double bonds being handled correctly?
|
|
std::string smi="OC[C@H](F)C=O";
|
|
RWMol *m =SmilesToMol(smi,0,0);
|
|
TEST_ASSERT(m);
|
|
MolOps::sanitizeMol(*m);
|
|
std::vector<unsigned int> atomRanks;
|
|
//std::cerr<<smi<<std::endl;
|
|
RDKit::Canon::chiralRankMolAtoms(*m,atomRanks);
|
|
//std::copy(atomRanks.begin(),atomRanks.end(),std::ostream_iterator<unsigned int>(std::cerr," "));
|
|
//std::cerr<<std::endl;
|
|
TEST_ASSERT(atomRanks[0]<atomRanks[5]);
|
|
TEST_ASSERT(atomRanks[1]<atomRanks[4]);
|
|
}
|
|
|
|
{
|
|
// are double bonds being handled correctly?
|
|
std::string smi="O=C[C@H](F)CO";
|
|
RWMol *m =SmilesToMol(smi,0,0);
|
|
TEST_ASSERT(m);
|
|
MolOps::sanitizeMol(*m);
|
|
std::vector<unsigned int> atomRanks;
|
|
//std::cerr<<smi<<std::endl;
|
|
RDKit::Canon::chiralRankMolAtoms(*m,atomRanks);
|
|
//std::copy(atomRanks.begin(),atomRanks.end(),std::ostream_iterator<unsigned int>(std::cerr," "));
|
|
//std::cerr<<std::endl;
|
|
TEST_ASSERT(atomRanks[0]>atomRanks[5]);
|
|
TEST_ASSERT(atomRanks[1]>atomRanks[4]);
|
|
}
|
|
|
|
{
|
|
// are double bonds being handled correctly?
|
|
std::string smi="CC[C@](C)(CF)C=O";
|
|
RWMol *m =SmilesToMol(smi,0,0);
|
|
TEST_ASSERT(m);
|
|
MolOps::sanitizeMol(*m);
|
|
std::vector<unsigned int> atomRanks;
|
|
//std::cerr<<smi<<std::endl;
|
|
RDKit::Canon::chiralRankMolAtoms(*m,atomRanks);
|
|
//std::copy(atomRanks.begin(),atomRanks.end(),std::ostream_iterator<unsigned int>(std::cerr," "));
|
|
//std::cerr<<std::endl;
|
|
TEST_ASSERT(atomRanks[4]>atomRanks[6]);
|
|
TEST_ASSERT(atomRanks[1]<atomRanks[4]);
|
|
}
|
|
|
|
BOOST_LOG(rdInfoLog) << "Finished" << std::endl;
|
|
}
|
|
|
|
|
|
void test10(){
|
|
BOOST_LOG(rdInfoLog) << "testing unique ranks in w.r.t. renumbering." << std::endl;
|
|
unsigned int i=0;
|
|
while(smis[i]!="EOS"){
|
|
std::string smiles=smis[i++];
|
|
// std::cerr<< ">>>Molecule: " << smiles << std::endl;
|
|
ROMol *m = SmilesToMol(smiles);
|
|
TEST_ASSERT(m);
|
|
MolOps::assignStereochemistry(*m,true);
|
|
_renumberTest2(m,smiles,1);
|
|
delete m;
|
|
}
|
|
BOOST_LOG(rdInfoLog) << "Finished" << std::endl;
|
|
}
|
|
|
|
|
|
void test11(){
|
|
BOOST_LOG(rdInfoLog) << "testing mol fragments." << std::endl;
|
|
{
|
|
std::string smi="C[C@H]([C@H](c1ccccc1)O)N2CCCCC2.C[C@@H]([C@H](c1ccccc1)O)N2CCCCC2";
|
|
ROMol *m = SmilesToMol(smi);
|
|
TEST_ASSERT(m);
|
|
std::vector<std::string> vfragsmi;
|
|
std::vector<std::vector<int> > frags;
|
|
unsigned int numFrag = MolOps::getMolFrags(*m,frags);
|
|
for(unsigned i=0;i<numFrag;++i){
|
|
std::string smii = MolFragmentToSmiles(*m,frags[i],0,0,0,true);
|
|
//std::cout << "Test "<< smii << std::endl;
|
|
vfragsmi.push_back(smii);
|
|
}
|
|
std::string smi1 = MolToSmiles(*m,true);
|
|
delete m;
|
|
|
|
smi="C[C@@H]([C@H](c1ccccc1)O)N2CCCCC2.C[C@H]([C@H](c1ccccc1)O)N2CCCCC2";
|
|
m = SmilesToMol(smi);
|
|
TEST_ASSERT(m);
|
|
std::string smi2 = MolToSmiles(*m,true);
|
|
delete m;
|
|
//std::cout << smi1 << "\n" << smi2 << std::endl;
|
|
TEST_ASSERT(smi1==smi2);
|
|
|
|
}
|
|
BOOST_LOG(rdInfoLog) << "Finished" << std::endl;
|
|
}
|
|
|
|
void test12(){
|
|
BOOST_LOG(rdInfoLog) << "testing protein round-tripping." << std::endl;
|
|
std::string rdbase = getenv("RDBASE");
|
|
{
|
|
std::string fName = rdbase+"/Code/GraphMol/FileParsers/test_data/2FVD.pdb";
|
|
ROMol *m = PDBFileToMol(fName);
|
|
TEST_ASSERT(m);
|
|
std::string smi1 = MolToSmiles(*m,true);
|
|
delete m;
|
|
|
|
m = SmilesToMol(smi1);
|
|
TEST_ASSERT(m);
|
|
std::string smi2 = MolToSmiles(*m,true);
|
|
delete m;
|
|
//std::cout << smi1 << "\n" << smi2 << std::endl;
|
|
TEST_ASSERT(smi1==smi2);
|
|
}
|
|
BOOST_LOG(rdInfoLog) << "Finished" << std::endl;
|
|
}
|
|
|
|
int main(){
|
|
RDLog::InitLogs();
|
|
#if 1
|
|
test1();
|
|
test2();
|
|
test3();
|
|
test4();
|
|
test5();
|
|
test6();
|
|
test7a();
|
|
test9();
|
|
test10();
|
|
test11();
|
|
test8();
|
|
test7();
|
|
test12();
|
|
#endif
|
|
return 0;
|
|
}
|
|
|