/* MaxMinPicker.cpp */ #include #include #include #include #include #include #include #include #include #include #include #if 0 #include #include #else #include "MaxMinPicker.h" #include "LeaderPicker.h" #endif std::vector mols; std::vector fps; // ExplicitBitVect **fps; const char *poolname; const char *pickname; unsigned int origsize; unsigned int picksize; unsigned int poolsize; unsigned int newpicks; int seed; unsigned long ticks = 0; static unsigned int LoadDatabase(FILE *fp) { char buffer[32768]; unsigned int result = 0; while (fgets(buffer, 327666, fp)) { if (buffer[0] == '#' || buffer[0] == ' ' || buffer[0] == '\t') continue; char *ptr = buffer; while (*ptr && *ptr != ' ' && *ptr != '\t') ptr++; if (*ptr) { *ptr++ = '\0'; char *end = ptr; while (*end && *end != '\n' && *end != '\r') end++; *end = '\0'; } RDKit::RWMol *mol; try { mol = RDKit::SmilesToMol(buffer); } catch (...) { mol = (RDKit::RWMol *)0; } if (mol) { #if 0 if (*ptr) { static std::string key("_Name"); mol->setProp(key,std::string(ptr)); } mols.push_back(mol); #endif ExplicitBitVect *fp; fp = RDKit::MorganFingerprints::getFingerprintAsBitVect(*mol, 2, 2048); fps.push_back(fp); delete mol; result++; } } fprintf(stderr, "%u molecules\n", result); return result; } static unsigned int LoadDatabase(const char *fname) { if (fname && strcmp(fname, "-")) { FILE *fp = fopen(fname, "rb"); if (!fp) return 0; unsigned int result = LoadDatabase(fp); fclose(fp); return result; } else return LoadDatabase(stdin); } #ifdef UNUSED static void GenerateFingerprints() { unsigned int count = (unsigned int)mols.size(); unsigned int size = (unsigned int)(count * sizeof(void *)); fps = (ExplicitBitVect **)malloc(size); for (unsigned int i = 0; i < count; i++) fps[i] = RDKit::MorganFingerprints::getFingerprintAsBitVect(*mols[i], 2, 2048); fprintf(stderr, "%u Fingerprints\n", count); } static void DestroyFingerprints() { unsigned int count = (unsigned int)mols.size(); for (unsigned int i = 0; i < count; i++) delete fps[i]; free(fps); } #endif static void DisplayUsage() { fputs("usage: MaxMinPicker [options] []\n", stderr); exit(1); } static void ProcessCommandLine(int argc, char *argv[]) { poolname = (const char *)0; pickname = (const char *)0; newpicks = 1000; for (int i = 1; i < argc; i++) { const char *ptr = argv[i]; if (ptr[0] == '-' && ptr[1]) { if (ptr[1] >= '0' && ptr[1] <= '9') { newpicks = atoi(ptr + 1); } else DisplayUsage(); } else if (!poolname) { poolname = ptr; } else if (!pickname) { pickname = ptr; } else DisplayUsage(); } if (!poolname) DisplayUsage(); } double MyDist(int i, int j) { ticks++; return 1.0 - TanimotoSimilarity(*fps[i], *fps[j]); } int main(int argc, char *argv[]) { struct timeval beg, end; double elapsed; ProcessCommandLine(argc, argv); gettimeofday(&beg, (struct timezone *)0); origsize = LoadDatabase(poolname); gettimeofday(&end, (struct timezone *)0); elapsed = (end.tv_sec + 0.000001 * end.tv_usec) - (beg.tv_sec + 0.000001 * beg.tv_usec); fprintf(stderr, "Elapsed time: %g secs\n", elapsed); if (pickname) { gettimeofday(&beg, (struct timezone *)0); picksize = LoadDatabase(pickname); gettimeofday(&end, (struct timezone *)0); elapsed = (end.tv_sec + 0.000001 * end.tv_usec) - (beg.tv_sec + 0.000001 * beg.tv_usec); fprintf(stderr, "Elapsed time: %g secs\n", elapsed); } else picksize = 0; poolsize = origsize + picksize; if (newpicks == 0) newpicks = origsize; #ifdef UNUSED gettimeofday(&beg, (struct timezone *)0); GenerateFingerprints(); gettimeofday(&end, (struct timezone *)0); elapsed = (end.tv_sec + 0.000001 * end.tv_usec) - (beg.tv_sec + 0.000001 * beg.tv_usec); fprintf(stderr, "Elapsed time: %g secs\n", elapsed); #endif RDKit::INT_VECT firstPicks; if (picksize) { firstPicks.reserve(picksize); for (unsigned int i = 0; i < picksize; i++) firstPicks.push_back((int)(i + origsize)); } // RDPickers::MaxMinPicker mmpicker; double threshold = 0.9; // 0.675; RDPickers::LeaderPicker ldpicker(threshold, 4); gettimeofday(&beg, (struct timezone *)0); #if 0 RDKit::INT_VECT iv = mmpicker.lazyPick(MyDist,poolsize,picksize+newpicks, firstPicks,seed,threshold); #else RDKit::INT_VECT iv = ldpicker.lazyPick(MyDist, poolsize, picksize + newpicks, firstPicks, threshold, 16); #endif gettimeofday(&end, (struct timezone *)0); elapsed = (end.tv_sec + 0.000001 * end.tv_usec) - (beg.tv_sec + 0.000001 * beg.tv_usec); fprintf(stderr, "Elapsed time: %g secs\n", elapsed); unsigned int count = (unsigned int)iv.size(); #if 0 for (unsigned int j=0; j