add a test for initstruchk, fix an input problem

This commit is contained in:
Greg Landrum
2015-11-29 05:09:39 +01:00
parent 5fed769d9e
commit 489dbfbc9a
2 changed files with 222 additions and 190 deletions

View File

@@ -4,169 +4,172 @@
//
//
// Expected test results here correspond to v1.0 of the open-source avalontoolkit
// Expected test results here correspond to v1.0 of the open-source
// avalontoolkit
//
#include <RDGeneral/RDLog.h>
#include <GraphMol/RDKitBase.h>
#include <GraphMol/RDKitBase.h>
#include <GraphMol/SmilesParse/SmilesParse.h>
#include <GraphMol/FileParsers/FileParsers.h>
#include <RDGeneral/Invariant.h>
#include <RDGeneral/Invariant.h>
#include <DataStructs/ExplicitBitVect.h>
#include <iostream>
#include <fstream>
#include <cstdio>
#include "AvalonTools.h"
#include <string>
using namespace RDKit;
void test1(){
void test1() {
BOOST_LOG(rdInfoLog) << "testing canonical smiles generation" << std::endl;
{
ROMol *m = static_cast<ROMol *>(SmilesToMol("c1ccccc1"));
TEST_ASSERT(m);
std::string smi=AvalonTools::getCanonSmiles(*m);
TEST_ASSERT(smi=="c1ccccc1");
std::string smi = AvalonTools::getCanonSmiles(*m);
TEST_ASSERT(smi == "c1ccccc1");
delete m;
}
{
ROMol *m = static_cast<ROMol *>(SmilesToMol("c1cccnc1"));
TEST_ASSERT(m);
std::string smi=AvalonTools::getCanonSmiles(*m);
TEST_ASSERT(smi=="c1ccncc1");
std::string smi = AvalonTools::getCanonSmiles(*m);
TEST_ASSERT(smi == "c1ccncc1");
delete m;
}
{
ROMol *m = static_cast<ROMol *>(SmilesToMol("n1ccccc1"));
TEST_ASSERT(m);
std::string smi=AvalonTools::getCanonSmiles(*m);
TEST_ASSERT(smi=="c1ccncc1");
std::string smi = AvalonTools::getCanonSmiles(*m);
TEST_ASSERT(smi == "c1ccncc1");
delete m;
}
{
std::string smi=AvalonTools::getCanonSmiles("n1ccccc1",true);
TEST_ASSERT(smi=="c1ccncc1");
std::string smi = AvalonTools::getCanonSmiles("n1ccccc1", true);
TEST_ASSERT(smi == "c1ccncc1");
}
{
std::string smi=AvalonTools::getCanonSmiles("c1cccnc1",true);
TEST_ASSERT(smi=="c1ccncc1");
std::string smi = AvalonTools::getCanonSmiles("c1cccnc1", true);
TEST_ASSERT(smi == "c1ccncc1");
}
BOOST_LOG(rdInfoLog) << "done" << std::endl;
}
void test2(){
void test2() {
BOOST_LOG(rdInfoLog) << "testing coordinate generation" << std::endl;
#if 1
{
RWMol *m = SmilesToMol("c1cccnc1");
TEST_ASSERT(m);
unsigned int confId=AvalonTools::set2DCoords(*m);
TEST_ASSERT(m->getNumConformers()==1);
TEST_ASSERT(confId==0);
unsigned int confId = AvalonTools::set2DCoords(*m);
TEST_ASSERT(m->getNumConformers() == 1);
TEST_ASSERT(confId == 0);
delete m;
}
#endif
{
std::string molb = AvalonTools::set2DCoords("c1cccnc1",true);
TEST_ASSERT(molb!="");
std::string molb = AvalonTools::set2DCoords("c1cccnc1", true);
TEST_ASSERT(molb != "");
}
BOOST_LOG(rdInfoLog) << "done" << std::endl;
}
void test3(){
void test3() {
BOOST_LOG(rdInfoLog) << "testing fingerprint generation" << std::endl;
{
ROMol *m = static_cast<ROMol *>(SmilesToMol("c1ccccn1"));
TEST_ASSERT(m);
ExplicitBitVect bv(512);
AvalonTools::getAvalonFP(*m,bv,512,false,true,0x00006FFF);
AvalonTools::getAvalonFP(*m, bv, 512, false, true, 0x00006FFF);
BOOST_LOG(rdInfoLog) << "c1ccccn1 " << bv.getNumOnBits() << std::endl;
TEST_ASSERT(bv.getNumOnBits()==18);
TEST_ASSERT(bv.getNumOnBits() == 18);
delete m;
}
{
ROMol *m = static_cast<ROMol *>(SmilesToMol("c1ccccc1"));
TEST_ASSERT(m);
ExplicitBitVect bv(512);
AvalonTools::getAvalonFP(*m,bv,512,false,true,0x006FFF);
AvalonTools::getAvalonFP(*m, bv, 512, false, true, 0x006FFF);
BOOST_LOG(rdInfoLog) << "c1ccccn1 " << bv.getNumOnBits() << std::endl;
TEST_ASSERT(bv.getNumOnBits()==6);
TEST_ASSERT(bv.getNumOnBits() == 6);
delete m;
}
{
ROMol *m = static_cast<ROMol *>(SmilesToMol("c1nnccc1"));
TEST_ASSERT(m);
ExplicitBitVect bv(512);
AvalonTools::getAvalonFP(*m,bv,512,false,true,0x006FFF);
AvalonTools::getAvalonFP(*m, bv, 512, false, true, 0x006FFF);
BOOST_LOG(rdInfoLog) << "c1nnccc1 " << bv.getNumOnBits() << std::endl;
TEST_ASSERT(bv.getNumOnBits()==28);
TEST_ASSERT(bv.getNumOnBits() == 28);
delete m;
}
{
ROMol *m = static_cast<ROMol *>(SmilesToMol("c1ncncc1"));
TEST_ASSERT(m);
ExplicitBitVect bv(512);
AvalonTools::getAvalonFP(*m,bv,512,false,true,0x006FFF);
AvalonTools::getAvalonFP(*m, bv, 512, false, true, 0x006FFF);
BOOST_LOG(rdInfoLog) << "c1ncncc1 " << bv.getNumOnBits() << std::endl;
TEST_ASSERT(bv.getNumOnBits()==25);
TEST_ASSERT(bv.getNumOnBits() == 25);
delete m;
}
{
ExplicitBitVect bv(512);
AvalonTools::getAvalonFP("c1cccnc1",true,bv,512,false,true,0x006FFF);
AvalonTools::getAvalonFP("c1cccnc1", true, bv, 512, false, true, 0x006FFF);
BOOST_LOG(rdInfoLog) << "c1cccnc1 " << bv.getNumOnBits() << std::endl;
TEST_ASSERT(bv.getNumOnBits()==18);
TEST_ASSERT(bv.getNumOnBits() == 18);
}
{
ExplicitBitVect bv(512);
AvalonTools::getAvalonFP("c1ccccc1",true,bv,512,false,true,0x006FFF);
AvalonTools::getAvalonFP("c1ccccc1", true, bv, 512, false, true, 0x006FFF);
BOOST_LOG(rdInfoLog) << "c1ccccc1 " << bv.getNumOnBits() << std::endl;
TEST_ASSERT(bv.getNumOnBits()==6);
TEST_ASSERT(bv.getNumOnBits() == 6);
}
{
ROMol *m = static_cast<ROMol *>(SmilesToMol("c1cccnc1"));
TEST_ASSERT(m);
ExplicitBitVect bv(1024);
AvalonTools::getAvalonFP(*m,bv,1024,false,true,0x006FFF);
AvalonTools::getAvalonFP(*m, bv, 1024, false, true, 0x006FFF);
BOOST_LOG(rdInfoLog) << "c1cccnc1 " << bv.getNumOnBits() << std::endl;
TEST_ASSERT(bv.getNumOnBits()==19);
TEST_ASSERT(bv.getNumOnBits() == 19);
delete m;
}
{
ExplicitBitVect bv(2048);
AvalonTools::getAvalonFP("c1cocc1",true,bv,2048,false,true,0x006FFF);
AvalonTools::getAvalonFP("c1cocc1", true, bv, 2048, false, true, 0x006FFF);
BOOST_LOG(rdInfoLog) << "c1cocc1 " << bv.getNumOnBits() << std::endl;
TEST_ASSERT(bv.getNumOnBits()==53);
TEST_ASSERT(bv.getNumOnBits() == 53);
}
{
ExplicitBitVect bv(2048);
AvalonTools::getAvalonFP("C1=COC=C1",true,bv,2048,false,true,0x006FFF);
AvalonTools::getAvalonFP("C1=COC=C1", true, bv, 2048, false, true,
0x006FFF);
BOOST_LOG(rdInfoLog) << "C1=COC=C1 " << bv.getNumOnBits() << std::endl;
TEST_ASSERT(bv.getNumOnBits()==53);
TEST_ASSERT(bv.getNumOnBits() == 53);
}
BOOST_LOG(rdInfoLog) << "done" << std::endl;
}
void testRDK151(){
BOOST_LOG(rdInfoLog) << "testing Jira issue RDK-151: pyAvalonTools not generating chiral smiles from molecules" << std::endl;
void testRDK151() {
BOOST_LOG(rdInfoLog) << "testing Jira issue RDK-151: pyAvalonTools not "
"generating chiral smiles from molecules"
<< std::endl;
{
std::string tSmi="C[C@H](F)Cl";
std::string tSmi = "C[C@H](F)Cl";
ROMol *m = static_cast<ROMol *>(SmilesToMol(tSmi));
TEST_ASSERT(m);
std::string smi=AvalonTools::getCanonSmiles(tSmi,true);
CHECK_INVARIANT(smi==tSmi,smi+"!="+tSmi);
smi=AvalonTools::getCanonSmiles(*m);
CHECK_INVARIANT(smi==tSmi,smi+"!="+tSmi);
std::string smi = AvalonTools::getCanonSmiles(tSmi, true);
CHECK_INVARIANT(smi == tSmi, smi + "!=" + tSmi);
smi = AvalonTools::getCanonSmiles(*m);
CHECK_INVARIANT(smi == tSmi, smi + "!=" + tSmi);
delete m;
}
@@ -174,57 +177,60 @@ void testRDK151(){
BOOST_LOG(rdInfoLog) << "done" << std::endl;
}
void testSmilesFailures(){
void testSmilesFailures() {
BOOST_LOG(rdInfoLog) << "testing handling of bad smiles strings" << std::endl;
{
std::string tSmi="C1C";
std::string smi=AvalonTools::getCanonSmiles(tSmi,true);
CHECK_INVARIANT(smi=="",smi);
std::string tSmi = "C1C";
std::string smi = AvalonTools::getCanonSmiles(tSmi, true);
CHECK_INVARIANT(smi == "", smi);
}
BOOST_LOG(rdInfoLog) << "done" << std::endl;
}
void testSubstructFps(){
void testSubstructFps() {
BOOST_LOG(rdInfoLog) << "testing substructure fingerprints " << std::endl;
{
ExplicitBitVect bv1(512),bv2(512);
AvalonTools::getAvalonFP("c1ccccc1",true,bv1,512,true,true,AvalonTools::avalonSSSBits);
AvalonTools::getAvalonFP("c1ccccc1C(F)(F)F",true,bv2,512);
TEST_ASSERT((bv1&bv2)==bv1);
AvalonTools::getAvalonFP("c1ccccc1C(F)(F)F",true,bv1,512);
TEST_ASSERT((bv1&bv2)==bv1);
AvalonTools::getAvalonFP("c1cccc(C)c1C(F)(F)F",true,bv2,512);
TEST_ASSERT((bv1&bv2)==bv1);
ExplicitBitVect bv1(512), bv2(512);
AvalonTools::getAvalonFP("c1ccccc1", true, bv1, 512, true, true,
AvalonTools::avalonSSSBits);
AvalonTools::getAvalonFP("c1ccccc1C(F)(F)F", true, bv2, 512);
TEST_ASSERT((bv1 & bv2) == bv1);
AvalonTools::getAvalonFP("c1ccccc1C(F)(F)F", true, bv1, 512);
TEST_ASSERT((bv1 & bv2) == bv1);
AvalonTools::getAvalonFP("c1cccc(C)c1C(F)(F)F", true, bv2, 512);
TEST_ASSERT((bv1 & bv2) == bv1);
}
{
ExplicitBitVect bv1(512),bv2(512);
AvalonTools::getAvalonFP("c1ccccc1O",true,bv1,512,true,true,AvalonTools::avalonSSSBits);
AvalonTools::getAvalonFP("c1ccccc1OC",true,bv2,512);
TEST_ASSERT((bv1&bv2)==bv1);
ExplicitBitVect bv1(512), bv2(512);
AvalonTools::getAvalonFP("c1ccccc1O", true, bv1, 512, true, true,
AvalonTools::avalonSSSBits);
AvalonTools::getAvalonFP("c1ccccc1OC", true, bv2, 512);
TEST_ASSERT((bv1 & bv2) == bv1);
}
BOOST_LOG(rdInfoLog) << "done" << std::endl;
}
void testStruChk(){
void testStruChk() {
BOOST_LOG(rdInfoLog) << "testing structure checking " << std::endl;
{
int errs = 0;
RDKit::ROMOL_SPTR m = AvalonTools::checkMol(errs, "c1ccccc1",true);
TEST_ASSERT(errs==0);
m = AvalonTools::checkMol(errs, "c1c(R)cccc1C1(CC-C(C)C1)C",true);
TEST_ASSERT(errs!=0);
RDKit::ROMOL_SPTR m = AvalonTools::checkMol(errs, "c1ccccc1", true);
TEST_ASSERT(errs == 0);
m = AvalonTools::checkMol(errs, "c1c(R)cccc1C1(CC-C(C)C1)C", true);
TEST_ASSERT(errs != 0);
}
{
int errs = 0;
std::string res;
boost::tie(res,errs)=AvalonTools::checkMolString("c1ccccc1",true);
TEST_ASSERT(errs==0);
TEST_ASSERT(res!="");
boost::tie(res,errs)=AvalonTools::checkMolString("c1c(R)cccc1C1(CC-C(C)C1)C",true);
TEST_ASSERT(errs==1);
TEST_ASSERT(res=="");
boost::tie(res, errs) = AvalonTools::checkMolString("c1ccccc1", true);
TEST_ASSERT(errs == 0);
TEST_ASSERT(res != "");
boost::tie(res, errs) =
AvalonTools::checkMolString("c1c(R)cccc1C1(CC-C(C)C1)C", true);
TEST_ASSERT(errs == 1);
TEST_ASSERT(res == "");
}
BOOST_LOG(rdInfoLog) << "done" << std::endl;
}
@@ -233,123 +239,124 @@ void testBadMolfile() {
BOOST_LOG(rdInfoLog) << "testing handling bad molecules " << std::endl;
// some tests around dealing with bad mol blocks
{
std::string molb="SNAP007157A\n\
std::string molb =
"SNAP007157A\n\
MACCS-II3194121345\n\
\n\
0 0 0 0 0";
std::string smi=AvalonTools::getCanonSmiles(molb,false);
CHECK_INVARIANT(smi=="",smi);
std::string smi = AvalonTools::getCanonSmiles(molb, false);
CHECK_INVARIANT(smi == "", smi);
ExplicitBitVect bv(1024);
AvalonTools::getAvalonFP(molb,false,bv,1024);
TEST_ASSERT(bv.getNumOnBits()==0);
AvalonTools::getAvalonFP(molb, false, bv, 1024);
TEST_ASSERT(bv.getNumOnBits() == 0);
std::string oMolb;
AvalonTools::set2DCoords(molb,false);
CHECK_INVARIANT(oMolb=="",oMolb);
AvalonTools::set2DCoords(molb, false);
CHECK_INVARIANT(oMolb == "", oMolb);
}
}
void testSmilesSegFault() {
BOOST_LOG(rdInfoLog) << "testing a canonical smiles case that led to seg faults " << std::endl;
BOOST_LOG(rdInfoLog)
<< "testing a canonical smiles case that led to seg faults " << std::endl;
// some tests around dealing with bad mol blocks
{
std::string inSmi(1024,'C');
std::string smi=AvalonTools::getCanonSmiles(inSmi,true);
TEST_ASSERT(smi==inSmi);
std::string inSmi(1024, 'C');
std::string smi = AvalonTools::getCanonSmiles(inSmi, true);
TEST_ASSERT(smi == inSmi);
}
{
std::string inSmi(1534,'C');
std::string smi=AvalonTools::getCanonSmiles(inSmi,true);
TEST_ASSERT(smi==inSmi);
std::string inSmi(1534, 'C');
std::string smi = AvalonTools::getCanonSmiles(inSmi, true);
TEST_ASSERT(smi == inSmi);
}
BOOST_LOG(rdInfoLog) << "done" << std::endl;
}
void testGithub336() {
BOOST_LOG(rdInfoLog) << "testing github issue 336: bad canonical smiles for conjugated double bonds" << std::endl;
BOOST_LOG(rdInfoLog) << "testing github issue 336: bad canonical smiles for "
"conjugated double bonds" << std::endl;
// some tests around dealing with bad mol blocks
{
std::string pathName=getenv("RDBASE");
std::string pathName = getenv("RDBASE");
pathName += "/External/AvalonTools/test_data/";
std::ifstream ins((pathName+"EZ_test.2.sdf").c_str());
std::string mb((std::istreambuf_iterator<char>(ins)),
std::ifstream ins((pathName + "EZ_test.2.sdf").c_str());
std::string mb((std::istreambuf_iterator<char>(ins)),
std::istreambuf_iterator<char>());
ROMol *m = MolBlockToMol(mb);
TEST_ASSERT(m);
TEST_ASSERT(m->getNumAtoms()==17);
TEST_ASSERT(m->getNumAtoms() == 17);
std::string smi1=AvalonTools::getCanonSmiles(mb,false);
std::string smi2=AvalonTools::getCanonSmiles(*m);
std::cerr<<"smi1: "<<smi1<<std::endl;
std::cerr<<"smi2: "<<smi2<<std::endl;
TEST_ASSERT(smi1==smi2);
std::string smi1 = AvalonTools::getCanonSmiles(mb, false);
std::string smi2 = AvalonTools::getCanonSmiles(*m);
std::cerr << "smi1: " << smi1 << std::endl;
std::cerr << "smi2: " << smi2 << std::endl;
TEST_ASSERT(smi1 == smi2);
delete m;
}
}
{
std::string pathName=getenv("RDBASE");
std::string pathName = getenv("RDBASE");
pathName += "/External/AvalonTools/test_data/";
std::ifstream ins((pathName+"heterocycle.mol").c_str());
std::string mb((std::istreambuf_iterator<char>(ins)),
std::ifstream ins((pathName + "heterocycle.mol").c_str());
std::string mb((std::istreambuf_iterator<char>(ins)),
std::istreambuf_iterator<char>());
RWMol *m = MolBlockToMol(mb,false);
RWMol *m = MolBlockToMol(mb, false);
TEST_ASSERT(m);
TEST_ASSERT(m->getNumAtoms()==6);
TEST_ASSERT(m->getNumAtoms() == 6);
m->updatePropertyCache();
MolOps::cleanUp(*m);
MolOps::setAromaticity(*m);
std::string smi1=AvalonTools::getCanonSmiles(mb,false);
std::string smi2=AvalonTools::getCanonSmiles(*m);
std::cerr<<"smi1: "<<smi1<<std::endl;
std::cerr<<"smi2: "<<smi2<<std::endl;
TEST_ASSERT(smi1==smi2);
TEST_ASSERT(smi1=="CC1C=NNC=1");
std::string smi1 = AvalonTools::getCanonSmiles(mb, false);
std::string smi2 = AvalonTools::getCanonSmiles(*m);
std::cerr << "smi1: " << smi1 << std::endl;
std::cerr << "smi2: " << smi2 << std::endl;
TEST_ASSERT(smi1 == smi2);
TEST_ASSERT(smi1 == "CC1C=NNC=1");
delete m;
}
}
{
std::string pathName=getenv("RDBASE");
std::string pathName = getenv("RDBASE");
pathName += "/External/AvalonTools/test_data/";
std::ifstream ins((pathName+"heterocycle2.mol").c_str());
std::string mb((std::istreambuf_iterator<char>(ins)),
std::ifstream ins((pathName + "heterocycle2.mol").c_str());
std::string mb((std::istreambuf_iterator<char>(ins)),
std::istreambuf_iterator<char>());
RWMol *m = MolBlockToMol(mb,false);
RWMol *m = MolBlockToMol(mb, false);
TEST_ASSERT(m);
TEST_ASSERT(m->getNumAtoms()==11);
TEST_ASSERT(m->getNumAtoms() == 11);
m->updatePropertyCache();
MolOps::cleanUp(*m);
MolOps::setAromaticity(*m);
std::string smi1=AvalonTools::getCanonSmiles(mb,false);
std::string smi2=AvalonTools::getCanonSmiles(*m);
std::cerr<<"smi1: "<<smi1<<std::endl;
std::cerr<<"smi2: "<<smi2<<std::endl;
TEST_ASSERT(smi1==smi2);
TEST_ASSERT(smi1=="CN2C=CC1=CC(=O)NC=C12");
std::string smi1 = AvalonTools::getCanonSmiles(mb, false);
std::string smi2 = AvalonTools::getCanonSmiles(*m);
std::cerr << "smi1: " << smi1 << std::endl;
std::cerr << "smi2: " << smi2 << std::endl;
TEST_ASSERT(smi1 == smi2);
TEST_ASSERT(smi1 == "CN2C=CC1=CC(=O)NC=C12");
delete m;
}
}
BOOST_LOG(rdInfoLog) << "done" << std::endl;
}
void testCountFps(){
void testCountFps() {
BOOST_LOG(rdInfoLog) << "testing substructure fingerprints " << std::endl;
{
SparseIntVect<boost::uint32_t> cv1(5000),cv2(5000);
AvalonTools::getAvalonCountFP("c1ccccc1",true,cv1,5000);
AvalonTools::getAvalonCountFP("c1ccccc1.c1ccccc1",true,cv2,5000);
for(unsigned int i=0;i<cv1.size();++i){
if(cv1[i] && (cv2[i]!=2*cv1[i])){
std::cerr<<" mismatch: "<<i<<" "<<cv1[i]<<" "<<cv2[i]<<std::endl;
SparseIntVect<boost::uint32_t> cv1(5000), cv2(5000);
AvalonTools::getAvalonCountFP("c1ccccc1", true, cv1, 5000);
AvalonTools::getAvalonCountFP("c1ccccc1.c1ccccc1", true, cv2, 5000);
for (unsigned int i = 0; i < cv1.size(); ++i) {
if (cv1[i] && (cv2[i] != 2 * cv1[i])) {
std::cerr << " mismatch: " << i << " " << cv1[i] << " " << cv2[i]
<< std::endl;
}
}
for(unsigned int i=0;i<cv1.size();++i){
TEST_ASSERT(!cv1[i] || (cv2[i]==2*cv1[i]) );
for (unsigned int i = 0; i < cv1.size(); ++i) {
TEST_ASSERT(!cv1[i] || (cv2[i] == 2 * cv1[i]));
}
}
{
@@ -358,16 +365,17 @@ void testCountFps(){
ROMol *m2 = static_cast<ROMol *>(SmilesToMol("c1ccccc1.c1ccccc1"));
TEST_ASSERT(m2);
SparseIntVect<boost::uint32_t> cv1(5000),cv2(5000);
AvalonTools::getAvalonCountFP(*m1,cv1,5000);
AvalonTools::getAvalonCountFP(*m2,cv2,5000);
for(unsigned int i=0;i<cv1.size();++i){
if(cv1[i] && (cv2[i]!=2*cv1[i])){
std::cerr<<" mismatch: "<<i<<" "<<cv1[i]<<" "<<cv2[i]<<std::endl;
SparseIntVect<boost::uint32_t> cv1(5000), cv2(5000);
AvalonTools::getAvalonCountFP(*m1, cv1, 5000);
AvalonTools::getAvalonCountFP(*m2, cv2, 5000);
for (unsigned int i = 0; i < cv1.size(); ++i) {
if (cv1[i] && (cv2[i] != 2 * cv1[i])) {
std::cerr << " mismatch: " << i << " " << cv1[i] << " " << cv2[i]
<< std::endl;
}
}
for(unsigned int i=0;i<cv1.size();++i){
TEST_ASSERT(!cv1[i] || (cv2[i]==2*cv1[i]) );
for (unsigned int i = 0; i < cv1.size(); ++i) {
TEST_ASSERT(!cv1[i] || (cv2[i] == 2 * cv1[i]));
}
delete m1;
delete m2;
@@ -375,10 +383,34 @@ void testCountFps(){
BOOST_LOG(rdInfoLog) << "done" << std::endl;
}
void testInitStruChk() {
BOOST_LOG(rdInfoLog) << "testing init struchk " << std::endl;
{
std::string pathName = getenv("RDBASE");
pathName += "/Data/struchk/";
std::string struchk_init =
"-tm\n"
"-ta " +
pathName + std::string("checkfgs.trn\n") +
"-tm\n"
"-or\n"
"-ca " +
pathName + std::string("checkfgs.chk\n") +
"-cc\n"
"-cl 3\n"
"-cs\n"
"-cn 999\n"
"-l " +
std::string(std::tmpnam(NULL)) + std::string("\n");
int errs = AvalonTools::initCheckMol(struchk_init);
TEST_ASSERT(!errs);
RDKit::ROMOL_SPTR m = AvalonTools::checkMol(errs, "c1ccccc1", true);
TEST_ASSERT(errs == 0);
}
BOOST_LOG(rdInfoLog) << "done" << std::endl;
}
int main(int argc,char *argv[]){
int main(int argc, char *argv[]) {
RDLog::InitLogs();
#if 1
test1();
@@ -391,9 +423,9 @@ int main(int argc,char *argv[]){
testBadMolfile();
testSmilesSegFault();
testGithub336();
#endif
testCountFps();
#endif
testInitStruChk();
return 0;
}

View File

@@ -1,19 +1,19 @@
#
# Copyright (c) 2015, Novartis Institutes for BioMedical Research Inc.
# All rights reserved.
#
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
# met:
#
# * Redistributions of source code must retain the above copyright
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above
# copyright notice, this list of conditions and the following
# disclaimer in the documentation and/or other materials provided
# copyright notice, this list of conditions and the following
# disclaimer in the documentation and/or other materials provided
# with the distribution.
# * Neither the name of Novartis Institutes for BioMedical Research Inc.
# nor the names of its contributors may be used to endorse or promote
# * Neither the name of Novartis Institutes for BioMedical Research Inc.
# nor the names of its contributors may be used to endorse or promote
# products derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
@@ -77,7 +77,7 @@ INCHI_READWRITE_ERROR = ERROR_DICT['INCHI_READWRITE_ERROR']
NULL_MOL = ERROR_DICT['NULL_MOL']
BAD_SET = pyAvalonTools.StruChkResult.bad_set | INCHI_COMPUTATION_ERROR | RDKIT_CONVERSION_ERROR | INCHI_READWRITE_ERROR | NULL_MOL
GET_STEREO_RE = re.compile(r'^InChI=1S(.*?)/(t.*?)/m\d/s1(.*$)')
NULL_SMILES_RE = re.compile(r'^\s*$|^\s*NO_STRUCTURE\s*$', re.IGNORECASE)
PATTERN_NULL_MOL = r'^([\s0]+[1-9]+[\s]+V[\w]*)'
@@ -113,22 +113,22 @@ def _fix_all(pat, sbt, my_string) :
return new_string
except :
return None
def _fix_line_ends(my_string) :
pat = '\r\n{0,1}'
sbt = '\n'
return _fix_all(pat, sbt, my_string)
def _fix_chemdraw_header(my_string) :
pat = '0V2000'
sbt = 'V2000'
return _fix_all(pat, sbt, my_string)
def _ctab_has_atoms(ctab_lines):
''' look at atom count position (line 4, characters 0:3)
Return True if the count is >0, False if 0.
Throw BadMoleculeException if there are no characters
at the required position or if they cannot be converted
''' look at atom count position (line 4, characters 0:3)
Return True if the count is >0, False if 0.
Throw BadMoleculeException if there are no characters
at the required position or if they cannot be converted
to a positive integer
'''
try:
@@ -143,14 +143,14 @@ def _ctab_has_atoms(ctab_lines):
except IndexError:
raise BadMoleculeException('Invalid molfile format')
except ValueError:
raise BadMoleculeException('Expected integer')
raise BadMoleculeException('Expected integer')
return rval
def _ctab_remove_chiral_flag(ctab_lines):
''' read the chiral flag (line 4, characters 12:15)
''' read the chiral flag (line 4, characters 12:15)
and set it to 0. Return True if it was 1, False if 0.
Throw BadMoleculeException if there are no characters
Throw BadMoleculeException if there are no characters
at the required position or if they where not 0 or 1
'''
try:
@@ -161,13 +161,13 @@ def _ctab_remove_chiral_flag(ctab_lines):
elif a_count == 1:
rval = True
orig_line = ctab_lines[3]
ctab_lines[3] = orig_line[:CHIRAL_POS] + ' 0' + orig_line[CHIRAL_POS + 3:]
ctab_lines[3] = orig_line[:CHIRAL_POS] + ' 0' + orig_line[CHIRAL_POS + 3:]
else:
raise BadMoleculeException('Expected chiral flag 0 or 1')
except IndexError:
raise BadMoleculeException('Invalid molfile format')
except ValueError:
raise BadMoleculeException('Expected integer, got {0}'.format(str_a_count))
raise BadMoleculeException('Expected integer, got {0}'.format(str_a_count))
return rval
@@ -183,7 +183,7 @@ def initStruchk(configDir=None,logFile=None):
fd.close()
logFile= fd.name
struchk_init = '''-tm
-ta {0}checkfgs.trn
-ta {0}checkfgs.trn
-tm
-or
-ca {0}checkfgs.chk
@@ -191,7 +191,7 @@ def initStruchk(configDir=None,logFile=None):
-cl 3
-cs
-cn 999
-l {1}'''.format(configDir, logFile)
-l {1}\n'''.format(configDir, logFile)
initRes=pyAvalonTools.InitializeCheckMol(struchk_init)
if initRes:
raise ValueError('bad result from InitializeCheckMol: '+str(initRes))
@@ -203,7 +203,7 @@ def CheckCTAB(ctab, isSmiles=True):
mol_str = ctab
if not mol_str:
raise BadMoleculeException('Unexpected blank or NULL molecule')
else:
else:
mol_str = _fix_line_ends(mol_str)
mol_str = _fix_chemdraw_header(mol_str)
@@ -211,22 +211,22 @@ def CheckCTAB(ctab, isSmiles=True):
if mol_str and NULL_SMILES_RE.match(mol_str):
rval = T_NULL_MOL
else:
rval = pyAvalonTools.CheckMoleculeString(mol_str, isSmiles)
rval = pyAvalonTools.CheckMoleculeString(mol_str, isSmiles)
else:
# decompose the ctab into lines
# the line terminator may be \n or \r\n, or even r'\n'
# the line terminator may be \n or \r\n, or even r'\n'
ctab_lines = mol_str.split('\n')
if len(ctab_lines) <= 3:
raise BadMoleculeException('Not enough lines in CTAB')
_ctab_remove_chiral_flag(ctab_lines)
if not _ctab_has_atoms(ctab_lines):
rval = T_NULL_MOL
else: # reassemble the ctab lines into one string.
else: # reassemble the ctab lines into one string.
mol_str = '\n'.join(ctab_lines)
rval = pyAvalonTools.CheckMoleculeString(mol_str, isSmiles)
return rval
InchiResult = namedtuple('InchiResult',['error','inchi','fixed_ctab'])
InchiResult = namedtuple('InchiResult',['error','inchi','fixed_ctab'])
def GetInchiForCTAB(ctab):
"""
>>> from rdkit.Chem.MolKey import MolKey
@@ -237,7 +237,7 @@ def GetInchiForCTAB(ctab):
>>> res = MolKey.GetInchiForCTAB(pyAvalonTools.Generate2DCoords('c1c[nH]nc1C(Cl)Br',True))
>>> res.inchi
'InChI=1/C4H4BrClN2/c5-4(6)3-1-2-7-8-3/h1-2,4H,(H,7,8)/t4?/f/h7H'
>>>
>>>
"""
inchi = None
ctab_str = ctab
@@ -260,13 +260,13 @@ def GetInchiForCTAB(ctab):
return InchiResult(strucheck_err | conversion_err, inchi, fixed_mol)
def _make_racemate_inchi(inchi):
""" Normalize the stereo information (t-layer) to one selected isomer. """
""" Normalize the stereo information (t-layer) to one selected isomer. """
# set stereo type = 3 (racemate) for consistency
# reset inverted flag to m0 - not inverted
new_stereo = '/m0/s3/'
stereo_match = GET_STEREO_RE.match(inchi)
if stereo_match:
inchi = stereo_match.group(1) + new_stereo + stereo_match.group(2)
inchi = stereo_match.group(1) + new_stereo + stereo_match.group(2)
return inchi
def _get_identification_string(err, ctab, inchi, stereo_category=None, extra_stereo=None):
@@ -284,13 +284,13 @@ def _get_identification_string(err, ctab, inchi, stereo_category=None, extra_ste
else:
pieces.append('ST=' + stereo_category)
if extra_stereo:
pieces.append('XTR=' + extra_stereo)
pieces.append('XTR=' + extra_stereo)
key_string = '/'.join(pieces)
return key_string
def _get_null_mol_identification_string(extra_stereo) :
key_string = str(uuid.uuid1 ())
return key_string
return key_string
def _get_bad_mol_identification_string(ctab, stereo_category, extra_stereo):
pieces = []
@@ -298,7 +298,7 @@ def _get_bad_mol_identification_string(ctab, stereo_category, extra_stereo):
if ctab_str: # make the ctab part of the key if available
ctab_str = _fix_line_ends(ctab_str)
ctab_str = _fix_chemdraw_header(ctab_str)
ctab_str = '\n'.join(ctab_str.split('\n')[3:])
ctab_str = '\n'.join(ctab_str.split('\n')[3:])
pieces.append(ctab_str.replace('\n', r'\n')) # make a handy one-line string
else:
pass
@@ -312,12 +312,12 @@ def _get_bad_mol_identification_string(ctab, stereo_category, extra_stereo):
return key_string
def _identify(err, ctab, inchi, stereo_category, extra_structure_desc=None):
""" Compute the molecule key based on the inchi string,
stereo category as well as extra structure
""" Compute the molecule key based on the inchi string,
stereo category as well as extra structure
information """
key_string = _get_identification_string(err, ctab, inchi, stereo_category, extra_structure_desc)
if key_string:
return "{0}|{1}".format(MOL_KEY_VERSION,
return "{0}|{1}".format(MOL_KEY_VERSION,
base64.b64encode(hashlib.md5(key_string.encode('UTF-8')).digest()).decode()) #pylint: disable=E1101
else:
return None
@@ -382,7 +382,7 @@ def GetKeyForCTAB(ctab,stereo_info=None,stereo_comment=None,logger=None):
>>> res=MolKey.GetKeyForCTAB(pyAvalonTools.Generate2DCoords('c1cccc(C(Br)Cl)c1C(F)Cl',True))
>>> res.mol_key
'1|5H9R3LvclagMXHp3Clrc/g=='
>>> res.stereo_code
>>> res.stereo_code
'S_UNKN'
>>> res=MolKey.GetKeyForCTAB(pyAvalonTools.Generate2DCoords('c1cccc(C(Br)Cl)c1C(F)Cl',True),stereo_info='S_REL')
>>> res.mol_key
@@ -415,7 +415,7 @@ def GetKeyForCTAB(ctab,stereo_info=None,stereo_comment=None,logger=None):
extra_structure_desc = info_flds[1].strip()
else:
logger.warn('stereo code {0} not recognized. Using default value for ctab.'.format(code_fld))
if not (err & BAD_SET):
(n_stereo, n_undef_stereo, is_meso, dummy) = InchiInfo.InchiInfo(inchi).get_sp3_stereo()['main']['non-isotopic']
if stereo_category == None or stereo_category == 'DEFAULT' : # compute if not set
@@ -426,7 +426,7 @@ def GetKeyForCTAB(ctab,stereo_info=None,stereo_comment=None,logger=None):
key = _identify(err, fixed_mol, inchi, stereo_category, extra_structure_desc)
return MolKeyResult(key, err, inchi, fixed_mol, stereo_category, extra_structure_desc)
#------------------------------------
#