mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-03 21:44:30 +08:00
add a test for initstruchk, fix an input problem
This commit is contained in:
334
External/AvalonTools/test1.cpp
vendored
334
External/AvalonTools/test1.cpp
vendored
@@ -4,169 +4,172 @@
|
||||
//
|
||||
|
||||
//
|
||||
// Expected test results here correspond to v1.0 of the open-source avalontoolkit
|
||||
// Expected test results here correspond to v1.0 of the open-source
|
||||
// avalontoolkit
|
||||
//
|
||||
|
||||
|
||||
#include <RDGeneral/RDLog.h>
|
||||
#include <GraphMol/RDKitBase.h>
|
||||
#include <GraphMol/RDKitBase.h>
|
||||
#include <GraphMol/SmilesParse/SmilesParse.h>
|
||||
#include <GraphMol/FileParsers/FileParsers.h>
|
||||
#include <RDGeneral/Invariant.h>
|
||||
#include <RDGeneral/Invariant.h>
|
||||
#include <DataStructs/ExplicitBitVect.h>
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <cstdio>
|
||||
#include "AvalonTools.h"
|
||||
|
||||
#include <string>
|
||||
|
||||
using namespace RDKit;
|
||||
|
||||
void test1(){
|
||||
void test1() {
|
||||
BOOST_LOG(rdInfoLog) << "testing canonical smiles generation" << std::endl;
|
||||
|
||||
{
|
||||
ROMol *m = static_cast<ROMol *>(SmilesToMol("c1ccccc1"));
|
||||
TEST_ASSERT(m);
|
||||
std::string smi=AvalonTools::getCanonSmiles(*m);
|
||||
TEST_ASSERT(smi=="c1ccccc1");
|
||||
std::string smi = AvalonTools::getCanonSmiles(*m);
|
||||
TEST_ASSERT(smi == "c1ccccc1");
|
||||
delete m;
|
||||
}
|
||||
{
|
||||
ROMol *m = static_cast<ROMol *>(SmilesToMol("c1cccnc1"));
|
||||
TEST_ASSERT(m);
|
||||
std::string smi=AvalonTools::getCanonSmiles(*m);
|
||||
TEST_ASSERT(smi=="c1ccncc1");
|
||||
std::string smi = AvalonTools::getCanonSmiles(*m);
|
||||
TEST_ASSERT(smi == "c1ccncc1");
|
||||
delete m;
|
||||
}
|
||||
{
|
||||
ROMol *m = static_cast<ROMol *>(SmilesToMol("n1ccccc1"));
|
||||
TEST_ASSERT(m);
|
||||
std::string smi=AvalonTools::getCanonSmiles(*m);
|
||||
TEST_ASSERT(smi=="c1ccncc1");
|
||||
std::string smi = AvalonTools::getCanonSmiles(*m);
|
||||
TEST_ASSERT(smi == "c1ccncc1");
|
||||
delete m;
|
||||
}
|
||||
{
|
||||
std::string smi=AvalonTools::getCanonSmiles("n1ccccc1",true);
|
||||
TEST_ASSERT(smi=="c1ccncc1");
|
||||
std::string smi = AvalonTools::getCanonSmiles("n1ccccc1", true);
|
||||
TEST_ASSERT(smi == "c1ccncc1");
|
||||
}
|
||||
{
|
||||
std::string smi=AvalonTools::getCanonSmiles("c1cccnc1",true);
|
||||
TEST_ASSERT(smi=="c1ccncc1");
|
||||
std::string smi = AvalonTools::getCanonSmiles("c1cccnc1", true);
|
||||
TEST_ASSERT(smi == "c1ccncc1");
|
||||
}
|
||||
|
||||
BOOST_LOG(rdInfoLog) << "done" << std::endl;
|
||||
}
|
||||
void test2(){
|
||||
void test2() {
|
||||
BOOST_LOG(rdInfoLog) << "testing coordinate generation" << std::endl;
|
||||
|
||||
#if 1
|
||||
{
|
||||
RWMol *m = SmilesToMol("c1cccnc1");
|
||||
TEST_ASSERT(m);
|
||||
unsigned int confId=AvalonTools::set2DCoords(*m);
|
||||
TEST_ASSERT(m->getNumConformers()==1);
|
||||
TEST_ASSERT(confId==0);
|
||||
unsigned int confId = AvalonTools::set2DCoords(*m);
|
||||
TEST_ASSERT(m->getNumConformers() == 1);
|
||||
TEST_ASSERT(confId == 0);
|
||||
delete m;
|
||||
}
|
||||
#endif
|
||||
{
|
||||
std::string molb = AvalonTools::set2DCoords("c1cccnc1",true);
|
||||
TEST_ASSERT(molb!="");
|
||||
std::string molb = AvalonTools::set2DCoords("c1cccnc1", true);
|
||||
TEST_ASSERT(molb != "");
|
||||
}
|
||||
|
||||
BOOST_LOG(rdInfoLog) << "done" << std::endl;
|
||||
}
|
||||
|
||||
void test3(){
|
||||
void test3() {
|
||||
BOOST_LOG(rdInfoLog) << "testing fingerprint generation" << std::endl;
|
||||
|
||||
{
|
||||
ROMol *m = static_cast<ROMol *>(SmilesToMol("c1ccccn1"));
|
||||
TEST_ASSERT(m);
|
||||
ExplicitBitVect bv(512);
|
||||
AvalonTools::getAvalonFP(*m,bv,512,false,true,0x00006FFF);
|
||||
AvalonTools::getAvalonFP(*m, bv, 512, false, true, 0x00006FFF);
|
||||
BOOST_LOG(rdInfoLog) << "c1ccccn1 " << bv.getNumOnBits() << std::endl;
|
||||
TEST_ASSERT(bv.getNumOnBits()==18);
|
||||
TEST_ASSERT(bv.getNumOnBits() == 18);
|
||||
delete m;
|
||||
}
|
||||
{
|
||||
ROMol *m = static_cast<ROMol *>(SmilesToMol("c1ccccc1"));
|
||||
TEST_ASSERT(m);
|
||||
ExplicitBitVect bv(512);
|
||||
AvalonTools::getAvalonFP(*m,bv,512,false,true,0x006FFF);
|
||||
AvalonTools::getAvalonFP(*m, bv, 512, false, true, 0x006FFF);
|
||||
BOOST_LOG(rdInfoLog) << "c1ccccn1 " << bv.getNumOnBits() << std::endl;
|
||||
TEST_ASSERT(bv.getNumOnBits()==6);
|
||||
TEST_ASSERT(bv.getNumOnBits() == 6);
|
||||
delete m;
|
||||
}
|
||||
{
|
||||
ROMol *m = static_cast<ROMol *>(SmilesToMol("c1nnccc1"));
|
||||
TEST_ASSERT(m);
|
||||
ExplicitBitVect bv(512);
|
||||
AvalonTools::getAvalonFP(*m,bv,512,false,true,0x006FFF);
|
||||
AvalonTools::getAvalonFP(*m, bv, 512, false, true, 0x006FFF);
|
||||
BOOST_LOG(rdInfoLog) << "c1nnccc1 " << bv.getNumOnBits() << std::endl;
|
||||
TEST_ASSERT(bv.getNumOnBits()==28);
|
||||
TEST_ASSERT(bv.getNumOnBits() == 28);
|
||||
delete m;
|
||||
}
|
||||
{
|
||||
ROMol *m = static_cast<ROMol *>(SmilesToMol("c1ncncc1"));
|
||||
TEST_ASSERT(m);
|
||||
ExplicitBitVect bv(512);
|
||||
AvalonTools::getAvalonFP(*m,bv,512,false,true,0x006FFF);
|
||||
AvalonTools::getAvalonFP(*m, bv, 512, false, true, 0x006FFF);
|
||||
BOOST_LOG(rdInfoLog) << "c1ncncc1 " << bv.getNumOnBits() << std::endl;
|
||||
TEST_ASSERT(bv.getNumOnBits()==25);
|
||||
TEST_ASSERT(bv.getNumOnBits() == 25);
|
||||
delete m;
|
||||
}
|
||||
{
|
||||
ExplicitBitVect bv(512);
|
||||
AvalonTools::getAvalonFP("c1cccnc1",true,bv,512,false,true,0x006FFF);
|
||||
AvalonTools::getAvalonFP("c1cccnc1", true, bv, 512, false, true, 0x006FFF);
|
||||
BOOST_LOG(rdInfoLog) << "c1cccnc1 " << bv.getNumOnBits() << std::endl;
|
||||
TEST_ASSERT(bv.getNumOnBits()==18);
|
||||
TEST_ASSERT(bv.getNumOnBits() == 18);
|
||||
}
|
||||
{
|
||||
ExplicitBitVect bv(512);
|
||||
AvalonTools::getAvalonFP("c1ccccc1",true,bv,512,false,true,0x006FFF);
|
||||
AvalonTools::getAvalonFP("c1ccccc1", true, bv, 512, false, true, 0x006FFF);
|
||||
BOOST_LOG(rdInfoLog) << "c1ccccc1 " << bv.getNumOnBits() << std::endl;
|
||||
TEST_ASSERT(bv.getNumOnBits()==6);
|
||||
TEST_ASSERT(bv.getNumOnBits() == 6);
|
||||
}
|
||||
|
||||
{
|
||||
ROMol *m = static_cast<ROMol *>(SmilesToMol("c1cccnc1"));
|
||||
TEST_ASSERT(m);
|
||||
ExplicitBitVect bv(1024);
|
||||
AvalonTools::getAvalonFP(*m,bv,1024,false,true,0x006FFF);
|
||||
AvalonTools::getAvalonFP(*m, bv, 1024, false, true, 0x006FFF);
|
||||
BOOST_LOG(rdInfoLog) << "c1cccnc1 " << bv.getNumOnBits() << std::endl;
|
||||
TEST_ASSERT(bv.getNumOnBits()==19);
|
||||
TEST_ASSERT(bv.getNumOnBits() == 19);
|
||||
delete m;
|
||||
}
|
||||
{
|
||||
ExplicitBitVect bv(2048);
|
||||
AvalonTools::getAvalonFP("c1cocc1",true,bv,2048,false,true,0x006FFF);
|
||||
AvalonTools::getAvalonFP("c1cocc1", true, bv, 2048, false, true, 0x006FFF);
|
||||
BOOST_LOG(rdInfoLog) << "c1cocc1 " << bv.getNumOnBits() << std::endl;
|
||||
TEST_ASSERT(bv.getNumOnBits()==53);
|
||||
TEST_ASSERT(bv.getNumOnBits() == 53);
|
||||
}
|
||||
{
|
||||
ExplicitBitVect bv(2048);
|
||||
AvalonTools::getAvalonFP("C1=COC=C1",true,bv,2048,false,true,0x006FFF);
|
||||
AvalonTools::getAvalonFP("C1=COC=C1", true, bv, 2048, false, true,
|
||||
0x006FFF);
|
||||
BOOST_LOG(rdInfoLog) << "C1=COC=C1 " << bv.getNumOnBits() << std::endl;
|
||||
TEST_ASSERT(bv.getNumOnBits()==53);
|
||||
TEST_ASSERT(bv.getNumOnBits() == 53);
|
||||
}
|
||||
|
||||
BOOST_LOG(rdInfoLog) << "done" << std::endl;
|
||||
}
|
||||
|
||||
|
||||
void testRDK151(){
|
||||
BOOST_LOG(rdInfoLog) << "testing Jira issue RDK-151: pyAvalonTools not generating chiral smiles from molecules" << std::endl;
|
||||
void testRDK151() {
|
||||
BOOST_LOG(rdInfoLog) << "testing Jira issue RDK-151: pyAvalonTools not "
|
||||
"generating chiral smiles from molecules"
|
||||
<< std::endl;
|
||||
|
||||
{
|
||||
std::string tSmi="C[C@H](F)Cl";
|
||||
std::string tSmi = "C[C@H](F)Cl";
|
||||
ROMol *m = static_cast<ROMol *>(SmilesToMol(tSmi));
|
||||
TEST_ASSERT(m);
|
||||
std::string smi=AvalonTools::getCanonSmiles(tSmi,true);
|
||||
CHECK_INVARIANT(smi==tSmi,smi+"!="+tSmi);
|
||||
smi=AvalonTools::getCanonSmiles(*m);
|
||||
CHECK_INVARIANT(smi==tSmi,smi+"!="+tSmi);
|
||||
std::string smi = AvalonTools::getCanonSmiles(tSmi, true);
|
||||
CHECK_INVARIANT(smi == tSmi, smi + "!=" + tSmi);
|
||||
smi = AvalonTools::getCanonSmiles(*m);
|
||||
CHECK_INVARIANT(smi == tSmi, smi + "!=" + tSmi);
|
||||
|
||||
delete m;
|
||||
}
|
||||
@@ -174,57 +177,60 @@ void testRDK151(){
|
||||
BOOST_LOG(rdInfoLog) << "done" << std::endl;
|
||||
}
|
||||
|
||||
void testSmilesFailures(){
|
||||
void testSmilesFailures() {
|
||||
BOOST_LOG(rdInfoLog) << "testing handling of bad smiles strings" << std::endl;
|
||||
|
||||
{
|
||||
std::string tSmi="C1C";
|
||||
std::string smi=AvalonTools::getCanonSmiles(tSmi,true);
|
||||
CHECK_INVARIANT(smi=="",smi);
|
||||
std::string tSmi = "C1C";
|
||||
std::string smi = AvalonTools::getCanonSmiles(tSmi, true);
|
||||
CHECK_INVARIANT(smi == "", smi);
|
||||
}
|
||||
|
||||
BOOST_LOG(rdInfoLog) << "done" << std::endl;
|
||||
}
|
||||
|
||||
void testSubstructFps(){
|
||||
void testSubstructFps() {
|
||||
BOOST_LOG(rdInfoLog) << "testing substructure fingerprints " << std::endl;
|
||||
{
|
||||
ExplicitBitVect bv1(512),bv2(512);
|
||||
AvalonTools::getAvalonFP("c1ccccc1",true,bv1,512,true,true,AvalonTools::avalonSSSBits);
|
||||
AvalonTools::getAvalonFP("c1ccccc1C(F)(F)F",true,bv2,512);
|
||||
TEST_ASSERT((bv1&bv2)==bv1);
|
||||
AvalonTools::getAvalonFP("c1ccccc1C(F)(F)F",true,bv1,512);
|
||||
TEST_ASSERT((bv1&bv2)==bv1);
|
||||
AvalonTools::getAvalonFP("c1cccc(C)c1C(F)(F)F",true,bv2,512);
|
||||
TEST_ASSERT((bv1&bv2)==bv1);
|
||||
ExplicitBitVect bv1(512), bv2(512);
|
||||
AvalonTools::getAvalonFP("c1ccccc1", true, bv1, 512, true, true,
|
||||
AvalonTools::avalonSSSBits);
|
||||
AvalonTools::getAvalonFP("c1ccccc1C(F)(F)F", true, bv2, 512);
|
||||
TEST_ASSERT((bv1 & bv2) == bv1);
|
||||
AvalonTools::getAvalonFP("c1ccccc1C(F)(F)F", true, bv1, 512);
|
||||
TEST_ASSERT((bv1 & bv2) == bv1);
|
||||
AvalonTools::getAvalonFP("c1cccc(C)c1C(F)(F)F", true, bv2, 512);
|
||||
TEST_ASSERT((bv1 & bv2) == bv1);
|
||||
}
|
||||
{
|
||||
ExplicitBitVect bv1(512),bv2(512);
|
||||
AvalonTools::getAvalonFP("c1ccccc1O",true,bv1,512,true,true,AvalonTools::avalonSSSBits);
|
||||
AvalonTools::getAvalonFP("c1ccccc1OC",true,bv2,512);
|
||||
TEST_ASSERT((bv1&bv2)==bv1);
|
||||
ExplicitBitVect bv1(512), bv2(512);
|
||||
AvalonTools::getAvalonFP("c1ccccc1O", true, bv1, 512, true, true,
|
||||
AvalonTools::avalonSSSBits);
|
||||
AvalonTools::getAvalonFP("c1ccccc1OC", true, bv2, 512);
|
||||
TEST_ASSERT((bv1 & bv2) == bv1);
|
||||
}
|
||||
BOOST_LOG(rdInfoLog) << "done" << std::endl;
|
||||
}
|
||||
|
||||
void testStruChk(){
|
||||
void testStruChk() {
|
||||
BOOST_LOG(rdInfoLog) << "testing structure checking " << std::endl;
|
||||
{
|
||||
int errs = 0;
|
||||
RDKit::ROMOL_SPTR m = AvalonTools::checkMol(errs, "c1ccccc1",true);
|
||||
TEST_ASSERT(errs==0);
|
||||
m = AvalonTools::checkMol(errs, "c1c(R)cccc1C1(CC-C(C)C1)C",true);
|
||||
TEST_ASSERT(errs!=0);
|
||||
RDKit::ROMOL_SPTR m = AvalonTools::checkMol(errs, "c1ccccc1", true);
|
||||
TEST_ASSERT(errs == 0);
|
||||
m = AvalonTools::checkMol(errs, "c1c(R)cccc1C1(CC-C(C)C1)C", true);
|
||||
TEST_ASSERT(errs != 0);
|
||||
}
|
||||
{
|
||||
int errs = 0;
|
||||
std::string res;
|
||||
boost::tie(res,errs)=AvalonTools::checkMolString("c1ccccc1",true);
|
||||
TEST_ASSERT(errs==0);
|
||||
TEST_ASSERT(res!="");
|
||||
boost::tie(res,errs)=AvalonTools::checkMolString("c1c(R)cccc1C1(CC-C(C)C1)C",true);
|
||||
TEST_ASSERT(errs==1);
|
||||
TEST_ASSERT(res=="");
|
||||
boost::tie(res, errs) = AvalonTools::checkMolString("c1ccccc1", true);
|
||||
TEST_ASSERT(errs == 0);
|
||||
TEST_ASSERT(res != "");
|
||||
boost::tie(res, errs) =
|
||||
AvalonTools::checkMolString("c1c(R)cccc1C1(CC-C(C)C1)C", true);
|
||||
TEST_ASSERT(errs == 1);
|
||||
TEST_ASSERT(res == "");
|
||||
}
|
||||
BOOST_LOG(rdInfoLog) << "done" << std::endl;
|
||||
}
|
||||
@@ -233,123 +239,124 @@ void testBadMolfile() {
|
||||
BOOST_LOG(rdInfoLog) << "testing handling bad molecules " << std::endl;
|
||||
// some tests around dealing with bad mol blocks
|
||||
{
|
||||
std::string molb="SNAP007157A\n\
|
||||
std::string molb =
|
||||
"SNAP007157A\n\
|
||||
MACCS-II3194121345\n\
|
||||
\n\
|
||||
0 0 0 0 0";
|
||||
std::string smi=AvalonTools::getCanonSmiles(molb,false);
|
||||
CHECK_INVARIANT(smi=="",smi);
|
||||
std::string smi = AvalonTools::getCanonSmiles(molb, false);
|
||||
CHECK_INVARIANT(smi == "", smi);
|
||||
|
||||
ExplicitBitVect bv(1024);
|
||||
AvalonTools::getAvalonFP(molb,false,bv,1024);
|
||||
TEST_ASSERT(bv.getNumOnBits()==0);
|
||||
|
||||
AvalonTools::getAvalonFP(molb, false, bv, 1024);
|
||||
TEST_ASSERT(bv.getNumOnBits() == 0);
|
||||
|
||||
std::string oMolb;
|
||||
AvalonTools::set2DCoords(molb,false);
|
||||
CHECK_INVARIANT(oMolb=="",oMolb);
|
||||
|
||||
AvalonTools::set2DCoords(molb, false);
|
||||
CHECK_INVARIANT(oMolb == "", oMolb);
|
||||
}
|
||||
}
|
||||
|
||||
void testSmilesSegFault() {
|
||||
BOOST_LOG(rdInfoLog) << "testing a canonical smiles case that led to seg faults " << std::endl;
|
||||
BOOST_LOG(rdInfoLog)
|
||||
<< "testing a canonical smiles case that led to seg faults " << std::endl;
|
||||
// some tests around dealing with bad mol blocks
|
||||
{
|
||||
std::string inSmi(1024,'C');
|
||||
std::string smi=AvalonTools::getCanonSmiles(inSmi,true);
|
||||
TEST_ASSERT(smi==inSmi);
|
||||
std::string inSmi(1024, 'C');
|
||||
std::string smi = AvalonTools::getCanonSmiles(inSmi, true);
|
||||
TEST_ASSERT(smi == inSmi);
|
||||
}
|
||||
{
|
||||
std::string inSmi(1534,'C');
|
||||
std::string smi=AvalonTools::getCanonSmiles(inSmi,true);
|
||||
TEST_ASSERT(smi==inSmi);
|
||||
std::string inSmi(1534, 'C');
|
||||
std::string smi = AvalonTools::getCanonSmiles(inSmi, true);
|
||||
TEST_ASSERT(smi == inSmi);
|
||||
}
|
||||
BOOST_LOG(rdInfoLog) << "done" << std::endl;
|
||||
}
|
||||
|
||||
void testGithub336() {
|
||||
BOOST_LOG(rdInfoLog) << "testing github issue 336: bad canonical smiles for conjugated double bonds" << std::endl;
|
||||
BOOST_LOG(rdInfoLog) << "testing github issue 336: bad canonical smiles for "
|
||||
"conjugated double bonds" << std::endl;
|
||||
// some tests around dealing with bad mol blocks
|
||||
{
|
||||
std::string pathName=getenv("RDBASE");
|
||||
std::string pathName = getenv("RDBASE");
|
||||
pathName += "/External/AvalonTools/test_data/";
|
||||
std::ifstream ins((pathName+"EZ_test.2.sdf").c_str());
|
||||
std::string mb((std::istreambuf_iterator<char>(ins)),
|
||||
std::ifstream ins((pathName + "EZ_test.2.sdf").c_str());
|
||||
std::string mb((std::istreambuf_iterator<char>(ins)),
|
||||
std::istreambuf_iterator<char>());
|
||||
ROMol *m = MolBlockToMol(mb);
|
||||
TEST_ASSERT(m);
|
||||
TEST_ASSERT(m->getNumAtoms()==17);
|
||||
TEST_ASSERT(m->getNumAtoms() == 17);
|
||||
|
||||
std::string smi1=AvalonTools::getCanonSmiles(mb,false);
|
||||
std::string smi2=AvalonTools::getCanonSmiles(*m);
|
||||
std::cerr<<"smi1: "<<smi1<<std::endl;
|
||||
std::cerr<<"smi2: "<<smi2<<std::endl;
|
||||
TEST_ASSERT(smi1==smi2);
|
||||
std::string smi1 = AvalonTools::getCanonSmiles(mb, false);
|
||||
std::string smi2 = AvalonTools::getCanonSmiles(*m);
|
||||
std::cerr << "smi1: " << smi1 << std::endl;
|
||||
std::cerr << "smi2: " << smi2 << std::endl;
|
||||
TEST_ASSERT(smi1 == smi2);
|
||||
delete m;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
{
|
||||
std::string pathName=getenv("RDBASE");
|
||||
std::string pathName = getenv("RDBASE");
|
||||
pathName += "/External/AvalonTools/test_data/";
|
||||
std::ifstream ins((pathName+"heterocycle.mol").c_str());
|
||||
std::string mb((std::istreambuf_iterator<char>(ins)),
|
||||
std::ifstream ins((pathName + "heterocycle.mol").c_str());
|
||||
std::string mb((std::istreambuf_iterator<char>(ins)),
|
||||
std::istreambuf_iterator<char>());
|
||||
RWMol *m = MolBlockToMol(mb,false);
|
||||
RWMol *m = MolBlockToMol(mb, false);
|
||||
TEST_ASSERT(m);
|
||||
TEST_ASSERT(m->getNumAtoms()==6);
|
||||
TEST_ASSERT(m->getNumAtoms() == 6);
|
||||
m->updatePropertyCache();
|
||||
MolOps::cleanUp(*m);
|
||||
MolOps::setAromaticity(*m);
|
||||
|
||||
std::string smi1=AvalonTools::getCanonSmiles(mb,false);
|
||||
std::string smi2=AvalonTools::getCanonSmiles(*m);
|
||||
std::cerr<<"smi1: "<<smi1<<std::endl;
|
||||
std::cerr<<"smi2: "<<smi2<<std::endl;
|
||||
TEST_ASSERT(smi1==smi2);
|
||||
TEST_ASSERT(smi1=="CC1C=NNC=1");
|
||||
std::string smi1 = AvalonTools::getCanonSmiles(mb, false);
|
||||
std::string smi2 = AvalonTools::getCanonSmiles(*m);
|
||||
std::cerr << "smi1: " << smi1 << std::endl;
|
||||
std::cerr << "smi2: " << smi2 << std::endl;
|
||||
TEST_ASSERT(smi1 == smi2);
|
||||
TEST_ASSERT(smi1 == "CC1C=NNC=1");
|
||||
delete m;
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
std::string pathName=getenv("RDBASE");
|
||||
std::string pathName = getenv("RDBASE");
|
||||
pathName += "/External/AvalonTools/test_data/";
|
||||
std::ifstream ins((pathName+"heterocycle2.mol").c_str());
|
||||
std::string mb((std::istreambuf_iterator<char>(ins)),
|
||||
std::ifstream ins((pathName + "heterocycle2.mol").c_str());
|
||||
std::string mb((std::istreambuf_iterator<char>(ins)),
|
||||
std::istreambuf_iterator<char>());
|
||||
RWMol *m = MolBlockToMol(mb,false);
|
||||
RWMol *m = MolBlockToMol(mb, false);
|
||||
TEST_ASSERT(m);
|
||||
TEST_ASSERT(m->getNumAtoms()==11);
|
||||
TEST_ASSERT(m->getNumAtoms() == 11);
|
||||
m->updatePropertyCache();
|
||||
MolOps::cleanUp(*m);
|
||||
MolOps::setAromaticity(*m);
|
||||
|
||||
std::string smi1=AvalonTools::getCanonSmiles(mb,false);
|
||||
std::string smi2=AvalonTools::getCanonSmiles(*m);
|
||||
std::cerr<<"smi1: "<<smi1<<std::endl;
|
||||
std::cerr<<"smi2: "<<smi2<<std::endl;
|
||||
TEST_ASSERT(smi1==smi2);
|
||||
TEST_ASSERT(smi1=="CN2C=CC1=CC(=O)NC=C12");
|
||||
std::string smi1 = AvalonTools::getCanonSmiles(mb, false);
|
||||
std::string smi2 = AvalonTools::getCanonSmiles(*m);
|
||||
std::cerr << "smi1: " << smi1 << std::endl;
|
||||
std::cerr << "smi2: " << smi2 << std::endl;
|
||||
TEST_ASSERT(smi1 == smi2);
|
||||
TEST_ASSERT(smi1 == "CN2C=CC1=CC(=O)NC=C12");
|
||||
delete m;
|
||||
}
|
||||
}
|
||||
|
||||
BOOST_LOG(rdInfoLog) << "done" << std::endl;
|
||||
}
|
||||
|
||||
|
||||
void testCountFps(){
|
||||
void testCountFps() {
|
||||
BOOST_LOG(rdInfoLog) << "testing substructure fingerprints " << std::endl;
|
||||
{
|
||||
SparseIntVect<boost::uint32_t> cv1(5000),cv2(5000);
|
||||
AvalonTools::getAvalonCountFP("c1ccccc1",true,cv1,5000);
|
||||
AvalonTools::getAvalonCountFP("c1ccccc1.c1ccccc1",true,cv2,5000);
|
||||
for(unsigned int i=0;i<cv1.size();++i){
|
||||
if(cv1[i] && (cv2[i]!=2*cv1[i])){
|
||||
std::cerr<<" mismatch: "<<i<<" "<<cv1[i]<<" "<<cv2[i]<<std::endl;
|
||||
SparseIntVect<boost::uint32_t> cv1(5000), cv2(5000);
|
||||
AvalonTools::getAvalonCountFP("c1ccccc1", true, cv1, 5000);
|
||||
AvalonTools::getAvalonCountFP("c1ccccc1.c1ccccc1", true, cv2, 5000);
|
||||
for (unsigned int i = 0; i < cv1.size(); ++i) {
|
||||
if (cv1[i] && (cv2[i] != 2 * cv1[i])) {
|
||||
std::cerr << " mismatch: " << i << " " << cv1[i] << " " << cv2[i]
|
||||
<< std::endl;
|
||||
}
|
||||
}
|
||||
for(unsigned int i=0;i<cv1.size();++i){
|
||||
TEST_ASSERT(!cv1[i] || (cv2[i]==2*cv1[i]) );
|
||||
for (unsigned int i = 0; i < cv1.size(); ++i) {
|
||||
TEST_ASSERT(!cv1[i] || (cv2[i] == 2 * cv1[i]));
|
||||
}
|
||||
}
|
||||
{
|
||||
@@ -358,16 +365,17 @@ void testCountFps(){
|
||||
ROMol *m2 = static_cast<ROMol *>(SmilesToMol("c1ccccc1.c1ccccc1"));
|
||||
TEST_ASSERT(m2);
|
||||
|
||||
SparseIntVect<boost::uint32_t> cv1(5000),cv2(5000);
|
||||
AvalonTools::getAvalonCountFP(*m1,cv1,5000);
|
||||
AvalonTools::getAvalonCountFP(*m2,cv2,5000);
|
||||
for(unsigned int i=0;i<cv1.size();++i){
|
||||
if(cv1[i] && (cv2[i]!=2*cv1[i])){
|
||||
std::cerr<<" mismatch: "<<i<<" "<<cv1[i]<<" "<<cv2[i]<<std::endl;
|
||||
SparseIntVect<boost::uint32_t> cv1(5000), cv2(5000);
|
||||
AvalonTools::getAvalonCountFP(*m1, cv1, 5000);
|
||||
AvalonTools::getAvalonCountFP(*m2, cv2, 5000);
|
||||
for (unsigned int i = 0; i < cv1.size(); ++i) {
|
||||
if (cv1[i] && (cv2[i] != 2 * cv1[i])) {
|
||||
std::cerr << " mismatch: " << i << " " << cv1[i] << " " << cv2[i]
|
||||
<< std::endl;
|
||||
}
|
||||
}
|
||||
for(unsigned int i=0;i<cv1.size();++i){
|
||||
TEST_ASSERT(!cv1[i] || (cv2[i]==2*cv1[i]) );
|
||||
for (unsigned int i = 0; i < cv1.size(); ++i) {
|
||||
TEST_ASSERT(!cv1[i] || (cv2[i] == 2 * cv1[i]));
|
||||
}
|
||||
delete m1;
|
||||
delete m2;
|
||||
@@ -375,10 +383,34 @@ void testCountFps(){
|
||||
BOOST_LOG(rdInfoLog) << "done" << std::endl;
|
||||
}
|
||||
|
||||
void testInitStruChk() {
|
||||
BOOST_LOG(rdInfoLog) << "testing init struchk " << std::endl;
|
||||
{
|
||||
std::string pathName = getenv("RDBASE");
|
||||
pathName += "/Data/struchk/";
|
||||
std::string struchk_init =
|
||||
"-tm\n"
|
||||
"-ta " +
|
||||
pathName + std::string("checkfgs.trn\n") +
|
||||
"-tm\n"
|
||||
"-or\n"
|
||||
"-ca " +
|
||||
pathName + std::string("checkfgs.chk\n") +
|
||||
"-cc\n"
|
||||
"-cl 3\n"
|
||||
"-cs\n"
|
||||
"-cn 999\n"
|
||||
"-l " +
|
||||
std::string(std::tmpnam(NULL)) + std::string("\n");
|
||||
int errs = AvalonTools::initCheckMol(struchk_init);
|
||||
TEST_ASSERT(!errs);
|
||||
RDKit::ROMOL_SPTR m = AvalonTools::checkMol(errs, "c1ccccc1", true);
|
||||
TEST_ASSERT(errs == 0);
|
||||
}
|
||||
BOOST_LOG(rdInfoLog) << "done" << std::endl;
|
||||
}
|
||||
|
||||
|
||||
|
||||
int main(int argc,char *argv[]){
|
||||
int main(int argc, char *argv[]) {
|
||||
RDLog::InitLogs();
|
||||
#if 1
|
||||
test1();
|
||||
@@ -391,9 +423,9 @@ int main(int argc,char *argv[]){
|
||||
testBadMolfile();
|
||||
testSmilesSegFault();
|
||||
testGithub336();
|
||||
#endif
|
||||
testCountFps();
|
||||
#endif
|
||||
testInitStruChk();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,19 +1,19 @@
|
||||
#
|
||||
# Copyright (c) 2015, Novartis Institutes for BioMedical Research Inc.
|
||||
# All rights reserved.
|
||||
#
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are
|
||||
# met:
|
||||
# met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright
|
||||
# * Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above
|
||||
# copyright notice, this list of conditions and the following
|
||||
# disclaimer in the documentation and/or other materials provided
|
||||
# copyright notice, this list of conditions and the following
|
||||
# disclaimer in the documentation and/or other materials provided
|
||||
# with the distribution.
|
||||
# * Neither the name of Novartis Institutes for BioMedical Research Inc.
|
||||
# nor the names of its contributors may be used to endorse or promote
|
||||
# * Neither the name of Novartis Institutes for BioMedical Research Inc.
|
||||
# nor the names of its contributors may be used to endorse or promote
|
||||
# products derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
@@ -77,7 +77,7 @@ INCHI_READWRITE_ERROR = ERROR_DICT['INCHI_READWRITE_ERROR']
|
||||
NULL_MOL = ERROR_DICT['NULL_MOL']
|
||||
|
||||
BAD_SET = pyAvalonTools.StruChkResult.bad_set | INCHI_COMPUTATION_ERROR | RDKIT_CONVERSION_ERROR | INCHI_READWRITE_ERROR | NULL_MOL
|
||||
|
||||
|
||||
GET_STEREO_RE = re.compile(r'^InChI=1S(.*?)/(t.*?)/m\d/s1(.*$)')
|
||||
NULL_SMILES_RE = re.compile(r'^\s*$|^\s*NO_STRUCTURE\s*$', re.IGNORECASE)
|
||||
PATTERN_NULL_MOL = r'^([\s0]+[1-9]+[\s]+V[\w]*)'
|
||||
@@ -113,22 +113,22 @@ def _fix_all(pat, sbt, my_string) :
|
||||
return new_string
|
||||
except :
|
||||
return None
|
||||
|
||||
|
||||
def _fix_line_ends(my_string) :
|
||||
pat = '\r\n{0,1}'
|
||||
sbt = '\n'
|
||||
return _fix_all(pat, sbt, my_string)
|
||||
|
||||
|
||||
def _fix_chemdraw_header(my_string) :
|
||||
pat = '0V2000'
|
||||
sbt = 'V2000'
|
||||
return _fix_all(pat, sbt, my_string)
|
||||
|
||||
def _ctab_has_atoms(ctab_lines):
|
||||
''' look at atom count position (line 4, characters 0:3)
|
||||
Return True if the count is >0, False if 0.
|
||||
Throw BadMoleculeException if there are no characters
|
||||
at the required position or if they cannot be converted
|
||||
''' look at atom count position (line 4, characters 0:3)
|
||||
Return True if the count is >0, False if 0.
|
||||
Throw BadMoleculeException if there are no characters
|
||||
at the required position or if they cannot be converted
|
||||
to a positive integer
|
||||
'''
|
||||
try:
|
||||
@@ -143,14 +143,14 @@ def _ctab_has_atoms(ctab_lines):
|
||||
except IndexError:
|
||||
raise BadMoleculeException('Invalid molfile format')
|
||||
except ValueError:
|
||||
raise BadMoleculeException('Expected integer')
|
||||
raise BadMoleculeException('Expected integer')
|
||||
|
||||
return rval
|
||||
|
||||
|
||||
def _ctab_remove_chiral_flag(ctab_lines):
|
||||
''' read the chiral flag (line 4, characters 12:15)
|
||||
''' read the chiral flag (line 4, characters 12:15)
|
||||
and set it to 0. Return True if it was 1, False if 0.
|
||||
Throw BadMoleculeException if there are no characters
|
||||
Throw BadMoleculeException if there are no characters
|
||||
at the required position or if they where not 0 or 1
|
||||
'''
|
||||
try:
|
||||
@@ -161,13 +161,13 @@ def _ctab_remove_chiral_flag(ctab_lines):
|
||||
elif a_count == 1:
|
||||
rval = True
|
||||
orig_line = ctab_lines[3]
|
||||
ctab_lines[3] = orig_line[:CHIRAL_POS] + ' 0' + orig_line[CHIRAL_POS + 3:]
|
||||
ctab_lines[3] = orig_line[:CHIRAL_POS] + ' 0' + orig_line[CHIRAL_POS + 3:]
|
||||
else:
|
||||
raise BadMoleculeException('Expected chiral flag 0 or 1')
|
||||
except IndexError:
|
||||
raise BadMoleculeException('Invalid molfile format')
|
||||
except ValueError:
|
||||
raise BadMoleculeException('Expected integer, got {0}'.format(str_a_count))
|
||||
raise BadMoleculeException('Expected integer, got {0}'.format(str_a_count))
|
||||
|
||||
return rval
|
||||
|
||||
@@ -183,7 +183,7 @@ def initStruchk(configDir=None,logFile=None):
|
||||
fd.close()
|
||||
logFile= fd.name
|
||||
struchk_init = '''-tm
|
||||
-ta {0}checkfgs.trn
|
||||
-ta {0}checkfgs.trn
|
||||
-tm
|
||||
-or
|
||||
-ca {0}checkfgs.chk
|
||||
@@ -191,7 +191,7 @@ def initStruchk(configDir=None,logFile=None):
|
||||
-cl 3
|
||||
-cs
|
||||
-cn 999
|
||||
-l {1}'''.format(configDir, logFile)
|
||||
-l {1}\n'''.format(configDir, logFile)
|
||||
initRes=pyAvalonTools.InitializeCheckMol(struchk_init)
|
||||
if initRes:
|
||||
raise ValueError('bad result from InitializeCheckMol: '+str(initRes))
|
||||
@@ -203,7 +203,7 @@ def CheckCTAB(ctab, isSmiles=True):
|
||||
mol_str = ctab
|
||||
if not mol_str:
|
||||
raise BadMoleculeException('Unexpected blank or NULL molecule')
|
||||
else:
|
||||
else:
|
||||
mol_str = _fix_line_ends(mol_str)
|
||||
mol_str = _fix_chemdraw_header(mol_str)
|
||||
|
||||
@@ -211,22 +211,22 @@ def CheckCTAB(ctab, isSmiles=True):
|
||||
if mol_str and NULL_SMILES_RE.match(mol_str):
|
||||
rval = T_NULL_MOL
|
||||
else:
|
||||
rval = pyAvalonTools.CheckMoleculeString(mol_str, isSmiles)
|
||||
rval = pyAvalonTools.CheckMoleculeString(mol_str, isSmiles)
|
||||
else:
|
||||
# decompose the ctab into lines
|
||||
# the line terminator may be \n or \r\n, or even r'\n'
|
||||
# the line terminator may be \n or \r\n, or even r'\n'
|
||||
ctab_lines = mol_str.split('\n')
|
||||
if len(ctab_lines) <= 3:
|
||||
raise BadMoleculeException('Not enough lines in CTAB')
|
||||
_ctab_remove_chiral_flag(ctab_lines)
|
||||
if not _ctab_has_atoms(ctab_lines):
|
||||
rval = T_NULL_MOL
|
||||
else: # reassemble the ctab lines into one string.
|
||||
else: # reassemble the ctab lines into one string.
|
||||
mol_str = '\n'.join(ctab_lines)
|
||||
rval = pyAvalonTools.CheckMoleculeString(mol_str, isSmiles)
|
||||
return rval
|
||||
|
||||
InchiResult = namedtuple('InchiResult',['error','inchi','fixed_ctab'])
|
||||
InchiResult = namedtuple('InchiResult',['error','inchi','fixed_ctab'])
|
||||
def GetInchiForCTAB(ctab):
|
||||
"""
|
||||
>>> from rdkit.Chem.MolKey import MolKey
|
||||
@@ -237,7 +237,7 @@ def GetInchiForCTAB(ctab):
|
||||
>>> res = MolKey.GetInchiForCTAB(pyAvalonTools.Generate2DCoords('c1c[nH]nc1C(Cl)Br',True))
|
||||
>>> res.inchi
|
||||
'InChI=1/C4H4BrClN2/c5-4(6)3-1-2-7-8-3/h1-2,4H,(H,7,8)/t4?/f/h7H'
|
||||
>>>
|
||||
>>>
|
||||
"""
|
||||
inchi = None
|
||||
ctab_str = ctab
|
||||
@@ -260,13 +260,13 @@ def GetInchiForCTAB(ctab):
|
||||
return InchiResult(strucheck_err | conversion_err, inchi, fixed_mol)
|
||||
|
||||
def _make_racemate_inchi(inchi):
|
||||
""" Normalize the stereo information (t-layer) to one selected isomer. """
|
||||
""" Normalize the stereo information (t-layer) to one selected isomer. """
|
||||
# set stereo type = 3 (racemate) for consistency
|
||||
# reset inverted flag to m0 - not inverted
|
||||
new_stereo = '/m0/s3/'
|
||||
stereo_match = GET_STEREO_RE.match(inchi)
|
||||
if stereo_match:
|
||||
inchi = stereo_match.group(1) + new_stereo + stereo_match.group(2)
|
||||
inchi = stereo_match.group(1) + new_stereo + stereo_match.group(2)
|
||||
return inchi
|
||||
|
||||
def _get_identification_string(err, ctab, inchi, stereo_category=None, extra_stereo=None):
|
||||
@@ -284,13 +284,13 @@ def _get_identification_string(err, ctab, inchi, stereo_category=None, extra_ste
|
||||
else:
|
||||
pieces.append('ST=' + stereo_category)
|
||||
if extra_stereo:
|
||||
pieces.append('XTR=' + extra_stereo)
|
||||
pieces.append('XTR=' + extra_stereo)
|
||||
key_string = '/'.join(pieces)
|
||||
return key_string
|
||||
|
||||
def _get_null_mol_identification_string(extra_stereo) :
|
||||
key_string = str(uuid.uuid1 ())
|
||||
return key_string
|
||||
return key_string
|
||||
|
||||
def _get_bad_mol_identification_string(ctab, stereo_category, extra_stereo):
|
||||
pieces = []
|
||||
@@ -298,7 +298,7 @@ def _get_bad_mol_identification_string(ctab, stereo_category, extra_stereo):
|
||||
if ctab_str: # make the ctab part of the key if available
|
||||
ctab_str = _fix_line_ends(ctab_str)
|
||||
ctab_str = _fix_chemdraw_header(ctab_str)
|
||||
ctab_str = '\n'.join(ctab_str.split('\n')[3:])
|
||||
ctab_str = '\n'.join(ctab_str.split('\n')[3:])
|
||||
pieces.append(ctab_str.replace('\n', r'\n')) # make a handy one-line string
|
||||
else:
|
||||
pass
|
||||
@@ -312,12 +312,12 @@ def _get_bad_mol_identification_string(ctab, stereo_category, extra_stereo):
|
||||
return key_string
|
||||
|
||||
def _identify(err, ctab, inchi, stereo_category, extra_structure_desc=None):
|
||||
""" Compute the molecule key based on the inchi string,
|
||||
stereo category as well as extra structure
|
||||
""" Compute the molecule key based on the inchi string,
|
||||
stereo category as well as extra structure
|
||||
information """
|
||||
key_string = _get_identification_string(err, ctab, inchi, stereo_category, extra_structure_desc)
|
||||
if key_string:
|
||||
return "{0}|{1}".format(MOL_KEY_VERSION,
|
||||
return "{0}|{1}".format(MOL_KEY_VERSION,
|
||||
base64.b64encode(hashlib.md5(key_string.encode('UTF-8')).digest()).decode()) #pylint: disable=E1101
|
||||
else:
|
||||
return None
|
||||
@@ -382,7 +382,7 @@ def GetKeyForCTAB(ctab,stereo_info=None,stereo_comment=None,logger=None):
|
||||
>>> res=MolKey.GetKeyForCTAB(pyAvalonTools.Generate2DCoords('c1cccc(C(Br)Cl)c1C(F)Cl',True))
|
||||
>>> res.mol_key
|
||||
'1|5H9R3LvclagMXHp3Clrc/g=='
|
||||
>>> res.stereo_code
|
||||
>>> res.stereo_code
|
||||
'S_UNKN'
|
||||
>>> res=MolKey.GetKeyForCTAB(pyAvalonTools.Generate2DCoords('c1cccc(C(Br)Cl)c1C(F)Cl',True),stereo_info='S_REL')
|
||||
>>> res.mol_key
|
||||
@@ -415,7 +415,7 @@ def GetKeyForCTAB(ctab,stereo_info=None,stereo_comment=None,logger=None):
|
||||
extra_structure_desc = info_flds[1].strip()
|
||||
else:
|
||||
logger.warn('stereo code {0} not recognized. Using default value for ctab.'.format(code_fld))
|
||||
|
||||
|
||||
if not (err & BAD_SET):
|
||||
(n_stereo, n_undef_stereo, is_meso, dummy) = InchiInfo.InchiInfo(inchi).get_sp3_stereo()['main']['non-isotopic']
|
||||
if stereo_category == None or stereo_category == 'DEFAULT' : # compute if not set
|
||||
@@ -426,7 +426,7 @@ def GetKeyForCTAB(ctab,stereo_info=None,stereo_comment=None,logger=None):
|
||||
key = _identify(err, fixed_mol, inchi, stereo_category, extra_structure_desc)
|
||||
return MolKeyResult(key, err, inchi, fixed_mol, stereo_category, extra_structure_desc)
|
||||
|
||||
|
||||
|
||||
|
||||
#------------------------------------
|
||||
#
|
||||
|
||||
Reference in New Issue
Block a user