// $Id: testFMCS.cpp $ // // Copyright (c) 2014, Novartis Institutes for BioMedical Research Inc. // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following // disclaimer in the documentation and/or other materials provided // with the distribution. // * Neither the name of Novartis Institutes for BioMedical Research Inc. // nor the names of its contributors may be used to endorse or promote // products derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // #ifdef WIN32 #include #else #include #include #include #include #endif #include #include #include #include #include #include "../../RDKitBase.h" #include "../../FileParsers/FileParsers.h" //MOL single molecule ! #include "../../FileParsers/MolSupplier.h" //SDF #include "../../SmilesParse/SmilesParse.h" #include "../../SmilesParse/SmilesWrite.h" #include "../../SmilesParse/SmartsWrite.h" #include "../FMCS.h" #include "../DebugTrace.h" //#ifdef VERBOSE_STATISTICS_ON using namespace RDKit; unsigned long long T0; unsigned long long t0; void printTime() { unsigned long long t1 = nanoClock(); double sec = double(t1-t0) / 1000000.; printf("Time elapsed %.3lf seconds\n", sec); t0 = nanoClock(); } std::string getSmilesOnly(const char* smiles, std::string* id=0) { // remove label, because RDKit parse FAILED const char* sp = strchr(smiles,' '); unsigned n = (sp ? sp-smiles+1 : strlen(smiles)); if(id) *id = std::string(smiles+n); return std::string(smiles, n); } std::string getSmilesOnlyTxt(const char* smiles, std::string* id=0) { // remove label from string like "CHEMBL90218 NS(=O)(=O)c1ccc(NC(=O)c2cccc(C(=O)O)n2)c(Cl)c1" const char* sp = strchr(smiles,' '); if(sp && '\0'!=*sp) { if(id) *id = std::string(smiles, sp-smiles); sp++; size_t i=strlen(sp); while(i>0 && sp[i-1]<' ') --i; return std::string(sp, i); } else return smiles; } std::string getSmilesOnlyChEMBL(const char* smiles, std::string* id=0) { // remove label, because RDKit parse FAILED const char* sp = strchr(smiles, '\t'); if(sp) { unsigned n = (sp ? sp-smiles+1 : strlen(smiles)); if(id) *id = std::string(smiles, n); sp = strchr(++sp,'\t'); if(sp) sp++; } return std::string(sp); } //==================================================================================================== MCSParameters p; void testFileMCSB(const char* test, unsigned timeout=30, std::vector test_N=std::vector()) { // optional list of some tests for investigation p.Verbose = false; std::vector mols; // IT CAN OCCUPY A LOT OF MEMORY. store SMILES only to reduce memory usage. char str [4096]; std::string molFile, id; std::map molIdMap; std::vector smilesList; std::list< std::vector > testCase; std::string referenceOutFile(test); referenceOutFile += ".REF.out"; std::string outFile(test); if(!test_N.empty()) { if(1==test_N.size()) sprintf(str,".%u.out", test_N[0]); else sprintf(str,".%u-%u.out", test_N[0], test_N.back()); outFile += str; } else { outFile += ".Cpp.out"; } std::string outSmilesFile(test); outSmilesFile += ".smiles"; unsigned n=0, passed=0, failed=0, failed_less=0, timedout=0; double secTotal = 0.; std::vector referenceResults; std::vector referenceResultsTime; FILE* f = fopen(referenceOutFile.c_str(), "rt"); if(!f) perror("Could not open reference test result file"); else { std::cout<<"Loading reference test results ... \n"; while(fgets(str, sizeof(str), f)) if('#' != str[0]) { char c; int frag; float t; char mcs [1024]; MCSResult res; sscanf(str, "%u %c %d %d %d %f %s", &n, &c, &frag, &res.NumAtoms, &res.NumBonds, &t, mcs); res.Canceled = ('.' != c); res.SmartsString = mcs; referenceResults.push_back(res); referenceResultsTime.push_back(t); } } fclose(f); f = fopen(test, "rt"); if(!f) { perror("Could not open test case list MCSB file"); exit(1); } { std::cout<<"Loading MCSB test list ... \n"; if(fgets(str, sizeof(str), f)) if(fgets(str, sizeof(str), f)) { char* c = strrchr(str, '\n'); // remove LineFeed if(c) *c = '\0'; c = strrchr(str, '\r'); if(c) *c = '\0'; molFile = str + 6; // #File filename } std::cout<<"Molecules file:"<()); sscanf(str, "%u%n", &nn, &len); while('\0'!=*(str+len) && 1==sscanf(str+len, "%s%n", name, &nn)) { len += nn; testCase.back().push_back(std::string(name)); } } std::cout< >::const_iterator tc = testCase.begin(); tc != testCase.end(); tc++, n++) { if(!test_N.empty() && test_N.end() == std::find(test_N.begin(), test_N.end(), n+1)) continue; std::cout<<"\rTest: "<< n+1 <<" "; if(!test_N.empty()) // test case is listed std::cout<<"\n"; std::vector tcmols; fprintf(f, "# %u Using ", n+1); if(fs) fprintf(fs, "\n//TEST %u\n", n+1); for(std::vector::const_iterator mid = tc->begin(); mid != tc->end(); mid++) { std::map::const_iterator id = molIdMap.find(*mid); if(molIdMap.end() == id) continue; size_t i = id->second; tcmols.push_back(mols[i]); fprintf(f, "%s ", mid->c_str()); if(fs) fprintf(fs, "\"%s%s\",\n", smilesList[i].c_str(), mid->c_str()); } fprintf(f, "\n"); // ExecStatistics curStat = stat; //to compute the difference for this test only unsigned long long tc0 = nanoClock(); MCSResult res = findMCS(tcmols, &p); // *** T E S T *** unsigned long long tc1 = nanoClock(); double sec = double(tc1-tc0) / 1000000.; // without time of SMILES to ROMol conversion secTotal += sec; if(!test_N.empty()) std::cout<<"\n" << "MCS: "< res.NumAtoms ||*/ referenceResults[n].NumBonds > res.NumBonds)) std::cout<<" - failed. LESS: "< res.NumAtoms ||*/ referenceResults[n].NumBonds > res.NumBonds ? "MISSING":"GREATER" , referenceResults[n].NumAtoms, referenceResults[n].NumBonds, referenceResults[n].SmartsString.c_str()); if(referenceResults[n].Canceled ||(referenceResults[n].NumAtoms == res.NumAtoms && referenceResults[n].NumBonds == res.NumBonds)) passed++; else if(res.Canceled) timedout++; else { if(referenceResults[n].NumBonds > res.NumBonds) failed_less++; failed++; } } else fprintf(f, "# %u REFCMP: res ABSENT - timeout\n", n+1); } // 1 . 1 25 28 1.69 F-c1:c:c: fprintf(f, "%u %c %d %u %u %.2f %s\n", n+1, (res.Canceled ? 'F':'.'), 1 //number of fragments in the MCS , res.NumAtoms, res.NumBonds, sec, res.SmartsString.c_str()); if(fs) fprintf(fs, "//# %u %c %u %u %.2f sec MCS: %s\n", n+1, (res.Canceled ? 'F':'.'), res.NumAtoms, res.NumBonds, sec, res.SmartsString.c_str()); #ifdef xxVERBOSE_STATISTICS_ON if(ft) // statistic details fprintf(ft, "%u; %s; %d; %d; %.2f; %.2f; %u; %u; %u; %u\n",n+1 , !res.Canceled ? "ok" : referenceResults[n].Canceled ? "bad" : "TIMEOUT" , referenceResults[n].Canceled ? 0 : res.NumAtoms - referenceResults[n].NumAtoms , referenceResults[n].Canceled ? 0 : res.NumBonds - referenceResults[n].NumBonds , sec, referenceResultsTime[n] , stat.Seed - curStat.Seed , stat.MatchCall - curStat.MatchCall , stat.AtomCompareCalls - curStat.AtomCompareCalls , stat.BondCompareCalls - curStat.BondCompareCalls ); stat.AtomCompareCalls = 0; // 32 bit counter with very big value -> possible overflow stat.BondCompareCalls = 0; #endif } fprintf(f, "#\n#\n# %u passed, %u failed, %u failed_less, %u timed out.\n# Total %.2f seconds, Average %.2f seconds, Average exclude timeouts about %.2f seconds.\n" , passed, failed, failed_less, timedout, secTotal, secTotal/n, (secTotal-30.6*timedout)/n); #ifdef xxVERBOSE_STATISTICS_ON fprintf(f, "#\n# --- STATISTICS:---\n# Total value | Average\n" "# Seeds Num %15u | %8u (amount of generated seeds)\n" "# BestSizeR %15u | %8u = %d%% (rejected by RemainingSize against BestSize seed)\n" "# MatchCall %15u | %8u (SubstructMatch function calls)\n" "# MatchTRUE %15u | %8u = %d%%\n" #ifdef FAST_SUBSTRUCT_CACHE // "#HashCache %15u | %8u keys\n" // "#HashCache %15u | %8u entries\n" "# HashCacheFind %15u | %8u \n" "# HashKeysFound %15u | %8u = %d%% hash keys found \n" "# ExactMatchCall %15u | %8u (SubstructMatch function calls)\n" "# ExactMatchTRUE %15u | %8u \n" #endif , stat.Seed, stat.Seed/n , stat.RemainingSizeRejected, stat.RemainingSizeRejected/n , 0==stat.Seed ? 0 : int((double)stat.RemainingSizeRejected / (double)stat.Seed *100.) , stat.MatchCall , stat.MatchCall/n , stat.MatchCallTrue , stat.MatchCallTrue/n , int((double)stat.MatchCallTrue / (double)stat.MatchCall *100.) #ifdef FAST_SUBSTRUCT_CACHE // , stat.HashCacheKeysSize, stat.HashCacheKeysSize/n // , stat.HashCacheEntries , stat.HashCacheEntries/n , stat.FindHashInCache, stat.FindHashInCache/n , stat.HashKeyFoundInCache,stat.HashKeyFoundInCache/n , 0==stat.FindHashInCache ? 0 : int((double)stat.HashKeyFoundInCache / (double)stat.FindHashInCache *100.) , stat.ExactMatchCall , stat.ExactMatchCall/n , stat.ExactMatchCallTrue, stat.ExactMatchCallTrue/n #endif ); #endif if(f) fclose(f); if(fs) fclose(fs); #ifdef xxVERBOSE_STATISTICS_ON if(ft) fclose(ft); #endif printTime(); } //========================================================================= void test504() { std::cout << "\ntest504()\n"; std::vector mols; const char* smi[] = { //TEST 504 "C(CCNC(C1CC1[c:1]1[c:2]c(Cl)c(Cl)c[c:3]1)=O)CCN1CCC(NC(Nc2ccc(Cl)cc2)=O)C1 CHEMBL545864", // - QUERY //"C(CCNC(C1CC1[c:1]1[c:2]c(Cl)c(Cl)c[c:3]1)=O)CCN1CCC(NC(Nc2ccccc2)=O)C1 CHEMBL545864", // - QUERY - Cl:30 "FC(F)(F)c1cc(NC(N2CCCN(CCCCCNC(C3CC3c3ccc(Cl)c(Cl)c3)=O)CC2)=O)ccc1Cl CHEMBL528228", "FC(F)(F)c1cc(NC(NC2CCN(CCCCCNC(C3CC3c3ccc(Cl)c(Cl)c3)=O)C2)=O)ccc1Cl CHEMBL525875", "Fc1ccc(NC(N2CCCN(CCCCCNC(C3CC3c3ccc(Cl)c(Cl)c3)=O)CC2)=O)cc1C(F)(F)F CHEMBL527277", "FC(F)(F)c1cc(NC(NC2CCN(CCCCCNC(C3CC3c3ccc(Cl)c(Cl)c3)=O)CC2)=O)ccc1Cl CHEMBL537333", "Fc1ccc(NC(NC2CCN(CCCCCNC(C3CC3c3ccc(Cl)c(Cl)c3)=O)C2)=O)cc1C(F)(F)F CHEMBL588077", "FC(F)(F)c1ccc(NC(NC2CCN(CCCCCNC(C3CC3c3cc(Cl)c(Cl)cc3)=O)C2)=O)cc1 CHEMBL525307", "Fc1ccc(NC(NC2CCN(CCCCCNC(C3CC3c3ccc(Cl)c(Cl)c3)=O)CC2)=O)cc1C(F)(F)F CHEMBL581847", "FC(F)(F)c1ccc(NC(NC2CCN(CCCCCNC(C3CC3c3cc(Cl)c(Cl)cc3)=O)CC2)=O)cc1 CHEMBL579547", "N#Cc1cccc(NC(NC2CCN(CCCCCNC(C3CC3c3ccc(Cl)c(Cl)c3)=O)CC2)=O)c1 CHEMBL529994", }; RWMol* qm = SmilesToMol( getSmilesOnly(smi[0]) ); unsigned nq = qm->getNumAtoms(); for(size_t ai = 0; ai < nq; ai++) { Atom* atom = qm->getAtomWithIdx(ai); atom->setProp(common_properties::molAtomMapNumber, (int)ai); } std::cout<<"Query +MAP "<< MolToSmiles(*qm) <<"\n"; mols.push_back(ROMOL_SPTR(qm)); // with RING INFO for(int i=1; i mols; char smiles[4096]; unsigned n=0; FILE* f = fopen(test, "rt"); if(!f) { perror("fopen testChEMBL_Txt()"); return ""; } char testsmi[512]; strcpy(testsmi, test); strcpy(testsmi + strlen(test)-3, "smi"); FILE* fsmi = fopen(testsmi, "wt"); FILE* fres = fopen(csv, "at"); while(fgets(smiles, sizeof(smiles), f)) { if('#' != smiles[0] && ' ' != smiles[0] && '/' != smiles[0]) { // commented to skip std::string id; std::string smi = getSmilesOnlyTxt(smiles, &id); fprintf(fsmi, "%s\n", (smi+" "+ id).c_str()); mols.push_back(ROMOL_SPTR(SmilesToMol(smi))); } } fclose(f); fclose(fsmi); p.Threshold = th; t0 = nanoClock(); MCSResult res = findMCS(mols, &p); unsigned long long tc1 = nanoClock(); double sec = double(tc1-t0) / 1000000.; std::cout << "MCS : "<%s\n", "chembl_II_sets.P.res.csv"); p.Timeout = 60; p.Verbose = false; for(int i=0; i>%s\n", p.Timeout, testsmi.c_str(), "chembl_II_sets.P.res.csv"); // command for the same Python test } } void testChEMBL_TxtSLOW_chembl_II_sets(double th=1.0) { FILE* fres = fopen("chembl_II_sets.SLOW.C++.res.csv", "wt"); fprintf(fres,"test;threshold;t,sec;atoms;bonds;mcs\n"); fclose(fres); const char* test[] = { "Target_no_10980_51302.txt", "Target_no_114_31443.txt", "Target_no_10260_54285.txt", "Target_no_11489_37339.txt", "Target_no_10980_52937.txt", "Target_no_114_48395.txt", "Target_no_10280_45765.txt", "Target_no_10188_50681.txt", "Target_no_114_46087.txt", "Target_no_10188_58358.txt", "Target_no_11140_54121.txt", "Target_no_114_37208.txt", "Target_no_10193_53483.txt", "Target_no_10980_31623.txt", "Target_no_10280_60672.txt", "Target_no_10188_45279.txt", "Target_no_11140_49860.txt", "Target_no_10280_53081.txt", "Target_no_11489_46089.txt", "Target_no_10188_54039.txt", "Target_no_114_34747.txt", "Target_no_11365_30331.txt", "Target_no_10188_48029.txt", "Target_no_114_16827.txt", "Target_no_100_30745.txt", "Target_no_108_49591.txt", "Target_no_107_49591.txt", "Target_no_10980_35957.txt", "Target_no_11489_37318.txt", "Target_no_10188_58265.txt", "Target_no_107_58879.txt", "Target_no_11140_37272.txt", "Target_no_10188_39262.txt", "Target_no_10280_47928.txt", "Target_no_107_49345.txt", "Target_no_10188_20244.txt", "Target_no_10980_20853.txt", "Target_no_11365_58566.txt", "Target_no_114_44562.txt", "Target_no_100_16613.txt", "Target_no_11489_39561.txt", "Target_no_10434_35926.txt", "Target_no_10980_31508.txt", "Target_no_11489_34818.txt", "Target_no_10188_58866.txt", "Target_no_11489_58958.txt", "Target_no_11140_39472.txt", "Target_no_10980_5739.txt", "Target_no_11489_54754.txt", "Target_no_10260_38526.txt", "Target_no_11489_58262.txt", "Target_no_107_34921.txt", "Target_no_107_30866.txt", "Target_no_108_30866.txt", "Target_no_10980_36641.txt", "Target_no_10980_30840.txt", "Target_no_107_31249.txt", "Target_no_11140_53395.txt", "Target_no_10193_46700.txt", "Target_no_10980_30994.txt", "Target_no_11140_37038.txt", }; for(int i=0; i mols; char smiles[4096]; unsigned n=0; FILE* f = fopen(test, "rt"); if(!f) { perror("fopen testChEMBLdat()"); return; } FILE* fs = fopen((std::string(test)+".smi").c_str(), "wt"); std::cout<<"Loading SMILES ... \n"; t0 = nanoClock(); while(fgets(smiles, sizeof(smiles), f)) { std::cout<<"\rLine: "<< ++n <<" "; if('#' != smiles[0] && ' ' != smiles[0] && '/' != smiles[0]) { // commented to skip mols.push_back(ROMOL_SPTR(SmilesToMol(getSmilesOnlyChEMBL(smiles)))); fputs(getSmilesOnlyChEMBL(smiles).c_str(), fs); } } fclose(f); fclose(fs); printTime(); std::cout<<"FIND MCS in "< mols; const char* smi[] = { //Target_no_10188_30149.txt // VERY SLOWER than Python "CN(C)CCNC(=O)c1ccc(-c2n[nH]c3cc(Nc4ccccc4Cl)ccc32)cc1 CHEMBL399167", "O=C(O)c1cccc(-c2[nH]nc3cc(Nc4ccccc4Cl)ccc32)c1 CHEMBL197613", "c1ccc(Nc2ccc3c(c2)[nH]nc3-c2ccccc2)cc1 CHEMBL383177", /// == QUERY "NC(=O)c1cccc(-c2[nH]nc3cc(Nc4ccccc4Cl)ccc32)c1 CHEMBL199136", "Clc1ccccc1Nc1ccc2c(c1)n[nH]c2-c1ccccc1 CHEMBL440566", "O=C(NCCCN1CCOCC1)c1cccc(-c2[nH]nc3cc(Nc4ccccc4Cl)ccc32)c1 CHEMBL198687", "O=C(O)c1ccc(-c2[nH]nc3cc(Nc4ccccc4Cl)ccc32)cc1 CHEMBL197698", "O=C(NC1CCNCC1)c1cccc(-c2n[nH]c3cc(Nc4ccccc4Cl)ccc32)c1 CHEMBL194806", "COc1ccccc1Nc1ccc2c(c1)[nH]nc2-c1ccccc1 CHEMBL254443", "CN(C)CCNC(=O)c1cccc(-c2[nH]nc3cc(Nc4ccccc4Cl)ccc32)c1 CHEMBL198821", }; for(int i=0; i mols; const char* smi[] = { "Cn1c(=O)c(-c2c(Cl)cccc2Cl)cc2cnc(Nc3cccc(O)c3)nc21", "Cn1c(=O)c(-c2c(Cl)cccc2Cl)cc2cnc(Nc3ccc(F)cc3)nc21", "Cn1c(=O)c(-c2c(Cl)cccc2Cl)cc2cnc(Nc3ccc(OC4OC(CO)C(O)C(O)C4O)cc3)nc21", "Cn1c(=O)c(-c2c(Cl)cccc2Cl)cc2cnc(Nc3ccc(NC(=O)CCl)cc3)nc21", "Cn1c2nc(Nc3ccc(NC(=O)CCN)cc3)ncc2cc(-c2c(Cl)cccc2Cl)c1=O", "Cn1c(=O)c(-c2c(Cl)cccc2Cl)cc2cnc(Nc3cccc(COCC(O)CO)c3)nc21", "Cn1c(=O)c(-c2c(Cl)cccc2Cl)cc2cnc(Nc3ccc(O)cc3)nc21", "CC(=O)Nc1ccc(Nc2ncc3cc(-c4c(Cl)cccc4Cl)c(=O)n(C)c3n2)cc1", "Cn1c2nc(Nc3ccc(N)cc3)ncc2cc(-c2c(Cl)cccc2Cl)c1=O", "Cn1c(=O)c(-c2c(Cl)cccc2Cl)cc2cnc(Nc3ccc(NC(=O)CCNC(=O)OC(C)(C)C)cc3)nc21", "Cc1ccc(Nc2ncc3cc(-c4c(Cl)cccc4Cl)c(=O)n(C)c3n2)cc1", "Cn1c(=O)c(-c2c(Cl)cccc2Cl)cc2cnc(Nc3cccc(CO)c3)nc21", "Cn1c(=O)c(-c2c(Cl)cccc2Cl)cc2cnc(Nc3ccc(NCC(O)CO)cc3)nc21", "CCc1cccc(Nc2ncc3cc(-c4c(Cl)cccc4Cl)c(=O)n(C)c3n2)c1", "Cn1c2nc(Nc3cccc(N)c3)ncc2cc(-c2c(Cl)cccc2Cl)c1=O", "CC(=O)Nc1cccc(Nc2ncc3cc(-c4c(Cl)cccc4Cl)c(=O)n(C)c3n2)c1", "Cn1c(=O)c(-c2c(Cl)cccc2Cl)cc2cnc(Nc3ccc(CCO)cc3)nc21", "Cn1c(=O)c(-c2c(Cl)cccc2Cl)cc2cnc(Nc3ccc(I)cc3)nc21", "CN1CCN(C(=O)c2ccc(Nc3ncc4cc(-c5c(Cl)cccc5Cl)c(=O)n(C)c4n3)cc2)CC1", }; for(int i=0; i mols; for(int i=1; i mols; std::string fn(test); RDKit::SDMolSupplier suppl(fn); while(!suppl.atEnd()) { ROMol *m=suppl.next(); if(m) mols.push_back(ROMOL_SPTR(m)); } t0 = nanoClock(); MCSResult res = findMCS(mols, &p); double t = (nanoClock() - t0) / 1000000.; std::cout << "MCS: "< mols; FILE* fcsv = fopen((std::string(path)+"_"+test+".SMI.C.csv").c_str(), "wt"); fprintf(fcsv, "test;Nmols;status;t,sec;nAtoms;nBonds;C++ MCS; E status;E t,sec;E nAtoms;E nBonds;E C++ MCS\n"); bool fileExist = true; unsigned n = 1; for( ; fileExist ; n++) { char smiName[256]; sprintf(smiName,"%s/smiles/%s.%u.smi", path, test, n); FILE* fsmi = fopen(smiName, "rt"); if(!fsmi) { fileExist = false; break; } char smiles[4096]; while(fgets(smiles, sizeof(smiles), fsmi)) { mols.push_back(ROMOL_SPTR(SmilesToMol(getSmilesOnly(smiles)))); } fclose(fsmi); if(mols.size()>1) { t0 = nanoClock(); MCSResult res = findMCS(mols, &p); double t = (nanoClock() - t0) / 1000000.; printTime(); std::cout << n <<" MCS: "<1) { p.BondTyper = MCSBondCompareOrderExact; t0 = nanoClock(); MCSResult res = findMCS(mols, &p); double t = (nanoClock() - t0) / 1000000.; printTime(); std::cout << n <<" MCS: "< mols; std::vector all_mols; FILE* fall = fopen((std::string(path)+"_ALL.P.bat").c_str(), "at"); fprintf(fall, "CALL %s\n", (std::string(path)+"_"+test+".P.bat").c_str()); fclose(fall); FILE* fcmd = fopen((std::string(path)+"_"+test+".P.bat").c_str(), "wt"); FILE* fcsv = fopen((std::string(path)+"_"+test+".C.csv").c_str(), "wt"); fprintf(fcsv, "test;Nmols;status;t,sec;nAtoms;nBonds;C++ MCS; E status;E t,sec;E nAtoms;E nBonds;E C++ MCS\n"); std::string fn(std::string(path)+"/"+test); RDKit::MolSupplier* suppl = 0; try { if('f' == test[strlen(test)-1]) // sdf file suppl = new RDKit::SDMolSupplier(fn); else if('i' == test[strlen(test)-1]) // smi file suppl = new RDKit::SmilesMolSupplier(fn); } catch(...) { std::cout << "ERROR: RDKit could not load input file" << "\n"; return; } if(!suppl) { std::cout << "ERROR: unsupported input file format" << "\n"; return; } // commands for prepare Python test: fprintf(fcmd, "DEL %s\n", (std::string(path)+"_"+test+".P.csv").c_str()); //clear before append results fprintf(fcmd, "SET PATH=%%PATH%%;C:/LIB\n"); fprintf(fcmd, "SET PYTHONPATH=C:/Projects/RDKit/RDKit_2013_09_1\n"); fprintf(fcmd, "ECHO P test;P Nmols;P status;P time,sec;P nAtoms;P nBonds;P MCS >%s\n", (std::string(path)+"_"+test+".P.csv").c_str()); unsigned n = 1; std::cout << "\n****** Load all molecules from SFD and PAIR SET test *********\n\n"; for( ; !suppl->atEnd() ; n++) { char smiName[256]; sprintf(smiName,"%s/smiles/%s.%u.smi", path, test, n); FILE* fsmi = fopen(smiName, "wt"); if(!fsmi) { std::cout << "ERROR: could not create SMI file " << smiName <<"\n"; return; } ROMol *m=0; for(int i=0; i<2 && !suppl->atEnd(); i++) { // load sequential pair m = suppl->next(); if(m) { mols.push_back(ROMOL_SPTR(m)); all_mols.push_back(mols.back()); fprintf(fsmi,"%s Mol%u\n", MolToSmiles(*m).c_str(), n+i); } } fclose(fsmi); if(mols.size()<2) { unlink(smiName); n--; } else { fprintf(fcmd, "fmcs_bench.py --id %u --timeout %u --threshold %.2f %s >>%s\n", n, p.Timeout, p.Threshold, smiName, (std::string(path)+"_"+test+".P.csv").c_str()); // command for the same Python test if(mols.size()>1) { t0 = nanoClock(); MCSResult res = findMCS(mols, &p); double t = (nanoClock() - t0) / 1000000.; if(t < 0.00001) t = 0.00001; // avoid division by zero printTime(); std::cout << n <<" MCS: "<1) { p.BondTyper = MCSBondCompareOrderExact; t0 = nanoClock(); MCSResult res = findMCS(mols, &p); double t = (nanoClock() - t0) / 1000000.; if(t < 0.00001) t = 0.00001; // avoid division by zero printTime(); std::cout << n <<" MCS: "<>%s\n", n, p.Timeout, p.Threshold, smiName, (std::string(path)+"_"+test+".P.csv").c_str()); // command for the same Python test ROMol *m=0; unsigned iN = 3+rand()%32; for(int i=0; i < iN; i++) { // load random set mols.push_back(all_mols[rand()%(all_mols.size()-1)]); fprintf(fsmi,"%s Mol%u\n", MolToSmiles(*mols.back()).c_str(), n+i); } fclose(fsmi); if(mols.size()>1) { t0 = nanoClock(); MCSResult res = findMCS(mols, &p); double t = (nanoClock() - t0) / 1000000.; if(t < 0.00001) t = 0.00001; // avoid division by zero printTime(); std::cout << n <<" MCS: "<1) { p.BondTyper = MCSBondCompareOrderExact; t0 = nanoClock(); MCSResult res = findMCS(mols, &p); double t = (nanoClock() - t0) / 1000000.; if(t < 0.00001) t = 0.00001; // avoid division by zero printTime(); std::cout << n <<" MCS: "<getNumBonds()) maxMol = all_mols[i]->getNumBonds(); } const unsigned N_BigRandomTests = all_mols.size() > 2000 ? all_mols.size()/2 : all_mols.size()*2; const unsigned N_BigRandomTestsAttempts = all_mols.size()*130; const unsigned SizeOfBigMCS_ForBigRandomTests = maxMol < 24 ? maxMol*2/3 : 21; fall = fopen((std::string(path)+"_ALL_BIG.P.bat").c_str(), "at"); fprintf(fall, "CALL %s\n", (std::string(path)+"_"+test+".BIG_MCS.P.bat").c_str()); fclose(fall); fcmd = fopen((std::string(path)+"_"+test+".BIG_MCS.P.bat").c_str(), "wt"); fcsv = fopen((std::string(path)+"_"+test+".BIG_MCS.C.csv").c_str(), "wt"); fprintf(fcsv, "test;Nmols;status;t,sec;nAtoms;nBonds;C++ MCS; E status;E t,sec;E nAtoms;E nBonds;E C++ MCS\n"); const unsigned n1 = n; fprintf(fcmd, "DEL %s\n", (std::string(path)+"_"+test+".BIG_MCS.P.csv").c_str()); //clear before append results fprintf(fcmd, "SET PATH=%%PATH%%;C:/LIB\n"); fprintf(fcmd, "SET PYTHONPATH=C:/Projects/RDKit/RDKit_2013_09_1\n"); fprintf(fcmd, "ECHO P test;P Nmols;P status;P time,sec;P nAtoms;P nBonds;P MCS >%s\n", (std::string(path)+"_"+test+".BIG_MCS.P.csv").c_str()); for(size_t jn=0; jn < N_BigRandomTestsAttempts && n-n1 <= N_BigRandomTests; jn++) { char smiName[512]; sprintf(smiName,"%s/smilesBIG/%s.%u.smi", path, test, n); ROMol *m=0; unsigned iN = 2+rand()%24; mols.clear(); for(size_t i=0; i < iN; i++) // load random set for(size_t ij=0; ij < all_mols.size()/2; ij++) { unsigned mi = rand()%(all_mols.size()-1); if(all_mols[mi]->getNumBonds() < SizeOfBigMCS_ForBigRandomTests) continue; mols.push_back(all_mols[mi]); break; } MCSResult res; if(mols.size()>1) { t0 = nanoClock(); res = findMCS(mols, &p); double t = (nanoClock() - t0) / 1000000.; if(t < 0.00001) t = 0.00001; // avoid division by zero printTime(); std::cout << n <<" MCS: "<= SizeOfBigMCS_ForBigRandomTests && res.isCompleted()) { fprintf(fcsv, "%u;%lu;%s;%.5f;%u;%u;%s;", n, mols.size(), res.isCompleted()?" ":"TIMEOUT", t, res.NumAtoms, res.NumBonds, res.SmartsString.c_str()); fprintf(fcmd, "fmcs_bench.py --id %u --timeout %u --threshold %.2f %s >>%s\n", n, p.Timeout, p.Threshold, smiName, (std::string(path)+"_"+test+".BIG_MCS.P.csv").c_str()); // command for the same Python test n++; } } if(res.NumBonds >= SizeOfBigMCS_ForBigRandomTests && res.isCompleted()) { FILE* fsmi = fopen(smiName, "wt"); for(int i=0; i < mols.size(); i++) { // load random set fprintf(fsmi,"%s Mol%u\n", MolToSmiles(*mols[i]).c_str(), n+i); } fclose(fsmi); } if(res.NumBonds >= SizeOfBigMCS_ForBigRandomTests && res.isCompleted()) { p.BondTyper = MCSBondCompareOrderExact; t0 = nanoClock(); res = findMCS(mols, &p); double t = (nanoClock() - t0) / 1000000.; if(t < 0.00001) t = 0.00001; // avoid division by zero printTime(); std::cout << n <<" MCS: "< mols; char smiles[4096]; unsigned n=0; FILE* f = fopen(test, "rt"); std::cout<<"Loading SMILES ... \n"; while(fgets(smiles, sizeof(smiles), f)) { std::cout<<"\rLine: "<< ++n <<" "; if('#' != smiles[0] && ' ' != smiles[0] && '/' != smiles[0]) // commented to skip // if(strlen(smiles) > 92) // minimal query size !!! mols.push_back(ROMOL_SPTR(SmilesToMol(getSmilesOnly(smiles)))); } fclose(f); printTime(); std::cout<<"FIND MCS in "< tc; tc.push_back(90); tc.push_back(326); tc.push_back(330); //992 PYTHON 20 21 N1(-C-C=C(-c2:c(:c:c:c):c:n:c:2)-C-C-1)-C-C-C-C-C-C //992 . 1 27 28 1.10 CNcc(CCCCCN1CCC(=CC1)c1cncc1ccc)cccc //now 25 26 /* tc.push_back(33); tc.push_back(59); tc.push_back(124); tc.push_back(345); tc.push_back(605); tc.push_back(619); */ testFileMCSB(argv[1], 300, tc); return 0; } #endif if(3 == argc && '-' == argv[1][0]) switch(argv[1][1]) { // ./test -s|m|b case 's': { // smiles files list char test[256]; FILE* f = fopen(argv[2], "rt"); while(fgets(test, sizeof(test), f)) testFileSMILES(test); fclose(f); } break; case 'm': { // SDF mol files list char test[256]; FILE* f = fopen(argv[2], "rt"); while(fgets(test, sizeof(test), f)) testFileSDF(test); fclose(f); } break; case 'b': { std::vector tc; // empty -> all testFileMCSB(argv[2], 30, tc); // .mcsb } break; default: break; } else if(2 == argc) { // .sdf /.smi file if(0==strcmp(argv[1]+strlen(argv[1])-4, ".smi")) testFileSMILES(argv[1]);// .smi else if(0==strcmp(argv[1]+strlen(argv[1])-4, ".sdf")) testFileSDF(argv[1]); // .sdf else if(0==strcmp(argv[1]+strlen(argv[1])-4, "mcsb")) testFileMCSB(argv[1], 30); // .mcsb else if(0==strcmp(argv[1]+strlen(argv[1])-4, ".dat")) testChEMBLdat(argv[1]); // .sdf else printf("UNKNOWN File Extention.\n"); } else if(argc > 1+2) testCmndLineSMILES(argc, argv); else { testGregSDFFileSetFiltered(); } // BOOST_LOG(rdInfoLog) << "*******************************************************\n"; unsigned long long t1 = nanoClock(); double sec = double(t1-T0) / 1000000.; printf("TOTAL Time elapsed %.2lf seconds\n", sec); return 0; }