// // Copyright (C) 2003-2006 Rational Discovery LLC // // @@ All Rights Reserved @@ // This file is part of the RDKit. // The contents are covered by the terms of the BSD license // which is included in the file license.txt, found at the root // of the RDKit source tree. // #include #include "FragFPGenerator.h" #include "FragCatalogEntry.h" #include "FragCatParams.h" #include "FragCatalogUtils.h" #include #include #include #include #include namespace RDKit { ExplicitBitVect *FragFPGenerator::getFPForMol(const ROMol &mol, const FragCatalog &fcat) { INT_PATH_LIST_MAP allPaths; auto *fp = new ExplicitBitVect(fcat.getFPLength()); const FragCatParams *fparams = fcat.getCatalogParams(); // prepare the molecule for fingerprinting // i.e. find functional groups, remove them from the mol etc. MatchVectType newAidToFid; ROMol *coreMol = prepareMol(mol, fparams, newAidToFid); computeFP(*coreMol, fcat, newAidToFid, fp); // this was Issue294; delete coreMol; return fp; } void FragFPGenerator::computeFP(const ROMol &mol, const FragCatalog &fcat, const MatchVectType &aidToFid, ExplicitBitVect *fp) { PRECONDITION(fp, "Bad ExplicitBitVect - FingerPrint"); const FragCatParams *fparams = fcat.getCatalogParams(); int uLen = fparams->getUpperFragLength(); double tol = fparams->getTolerance(); DOUBLE_INT_MAP mapkm1, mapk; // get all the paths in the molecule mapped by their order INT_PATH_LIST_MAP allPathsMap = findAllSubgraphsOfLengthsMtoN(mol, 1, uLen); // first deal with order 1 stuff PATH_LIST_CI pi; const INT_VECT &o1entries = fcat.getEntriesOfOrder(1); const FragCatalogEntry *entry; int bitId; double invar; for (pi = allPathsMap[1].begin(); pi != allPathsMap[1].end(); pi++) { // std::cout << "-*-*-* Fragment *-*-*-*-" << std::endl; auto *nent = new FragCatalogEntry(&mol, (*pi), aidToFid); nent->setDescription(fparams); invar = computeIntVectPrimesProduct(*pi); // ok here is the plan - initialize the entry for this path in mapkm1 to -1 // which will be overwritten to the correct entry id in the catalog if we // find // a match. This -1 initialization will be useful when we move onto higher // order stuff mapkm1[invar] = -1; for (auto eti : o1entries) { entry = fcat.getEntryWithIdx(eti); if (nent->match(entry, tol)) { bitId = entry->getBitId(); if (bitId >= 0) { fp->setBit(bitId); } mapkm1[invar] = eti; break; } } delete nent; } // now deal with the higher order stuff. // - for each higher order path obtain the invariants for order k-1 subpaths // - if entries for this invariant values exist in mapkm1, it means that we // have // seen this subpath before // - if the value for this entry is -1 it mean that we did not find a match // for this subpath // in the catalog i.e. we wont find a match for this order k path either // - if the entries for all order k-1 subpaths have non-negative values - // search the intersection // of the down entries for these catalog entries to find a match for the // order k path INT_PATH_LIST_MAP_CI ordi; double sinvar; int entId; for (ordi = allPathsMap.begin(); ordi != allPathsMap.end(); ordi++) { if (ordi->first < 2) { continue; // ignore order 1 paths - we are done with them } mapk.clear(); for (pi = ordi->second.begin(); pi != ordi->second.end(); pi++) { invar = computeIntVectPrimesProduct(*pi); mapk[invar] = -1; auto *nent = new FragCatalogEntry(&mol, (*pi), aidToFid); nent->setDescription(fparams); // std::cout << "Testing 2nd order fragment: " << nent->getDescription() // << std::endl; int scnt = 0; INT_VECT intersect, tmpVect; PATH_TYPE::const_iterator pii; // loop over the subpaths (order (k-1) ) (by ignoring one bond // at a time from consideration) and find out which entries // int eh catalog they correspond to and make an intersection // of the down entries (i.e. order k entries that contain // these order k-1 entries. - we can basically limit our // search for an isomorphic entry in the catalog of the order // k path from the molecule to this intersection list for (pii = (*pi).begin(); pii != (*pi).end(); pii++) { sinvar = invar / firstThousandPrimes[*pii]; // here is a check for "did we see this path before ?" // this should take care of disconnected subpaths (since the // catalog should have only connected subgraphs) if (mapkm1.find(sinvar) == mapkm1.end()) { continue; } if (mapkm1[sinvar] == -1) { // we have seen this order k-1 pat before but we couldn't find a match // for it in the catalog // which mean that we wont find a match for the order k path either in // the catalog intersect .clear(); // the intersection list we built so far is useless break; } entId = mapkm1[sinvar]; if (scnt == 0) { intersect = fcat.getDownEntryList(entId); scnt++; } else { tmpVect = intersect; Intersect(fcat.getDownEntryList(entId), tmpVect, intersect); scnt++; } } // now search through the intersection list to check if we already have a // isomorphic entry in the catalog for (auto iti : intersect) { entry = fcat.getEntryWithIdx(iti); if (nent->match(entry, tol)) { mapk[invar] = iti; bitId = entry->getBitId(); if (bitId >= 0) { fp->setBit(bitId); } break; } } delete nent; } // overwrite mapkm1 with mapk before we move on to order k+1 mapkm1 = mapk; } // end of loop over path order } } // namespace RDKit