Files
rdkit/Code/GraphMol/FragCatalog/FragFPGenerator.cpp
Ricardo Rodriguez 92d5d2c657 Refactor to stop using iterator definitions in types.h (#9275)
* clean up iterator defs in types.h

* do not use auto for inline constexpr

* restore undef max,min

* restore types.h declarations
2026-05-21 19:19:38 +02:00

180 lines
6.1 KiB
C++

//
// Copyright (C) 2003-2006 Rational Discovery LLC
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//
#include <Catalogs/Catalog.h>
#include "FragFPGenerator.h"
#include "FragCatalogEntry.h"
#include "FragCatParams.h"
#include "FragCatalogUtils.h"
#include <RDGeneral/types.h>
#include <DataStructs/BitVects.h>
#include <GraphMol/RDKitBase.h>
#include <GraphMol/Subgraphs/SubgraphUtils.h>
#include <GraphMol/Subgraphs/Subgraphs.h>
namespace RDKit {
ExplicitBitVect *FragFPGenerator::getFPForMol(const ROMol &mol,
const FragCatalog &fcat) {
INT_PATH_LIST_MAP allPaths;
auto *fp = new ExplicitBitVect(fcat.getFPLength());
const FragCatParams *fparams = fcat.getCatalogParams();
// prepare the molecule for fingerprinting
// i.e. find functional groups, remove them from the mol etc.
MatchVectType newAidToFid;
ROMol *coreMol = prepareMol(mol, fparams, newAidToFid);
computeFP(*coreMol, fcat, newAidToFid, fp);
// this was Issue294;
delete coreMol;
return fp;
}
void FragFPGenerator::computeFP(const ROMol &mol, const FragCatalog &fcat,
const MatchVectType &aidToFid,
ExplicitBitVect *fp) {
PRECONDITION(fp, "Bad ExplicitBitVect - FingerPrint");
const FragCatParams *fparams = fcat.getCatalogParams();
int uLen = fparams->getUpperFragLength();
double tol = fparams->getTolerance();
DOUBLE_INT_MAP mapkm1, mapk;
// get all the paths in the molecule mapped by their order
INT_PATH_LIST_MAP allPathsMap = findAllSubgraphsOfLengthsMtoN(mol, 1, uLen);
// first deal with order 1 stuff
PATH_LIST_CI pi;
const INT_VECT &o1entries = fcat.getEntriesOfOrder(1);
const FragCatalogEntry *entry;
int bitId;
double invar;
for (pi = allPathsMap[1].begin(); pi != allPathsMap[1].end(); pi++) {
// std::cout << "-*-*-* Fragment *-*-*-*-" << std::endl;
auto *nent = new FragCatalogEntry(&mol, (*pi), aidToFid);
nent->setDescription(fparams);
invar = computeIntVectPrimesProduct(*pi);
// ok here is the plan - initialize the entry for this path in mapkm1 to -1
// which will be overwritten to the correct entry id in the catalog if we
// find
// a match. This -1 initialization will be useful when we move onto higher
// order stuff
mapkm1[invar] = -1;
for (auto eti : o1entries) {
entry = fcat.getEntryWithIdx(eti);
if (nent->match(entry, tol)) {
bitId = entry->getBitId();
if (bitId >= 0) {
fp->setBit(bitId);
}
mapkm1[invar] = eti;
break;
}
}
delete nent;
}
// now deal with the higher order stuff.
// - for each higher order path obtain the invariants for order k-1 subpaths
// - if entries for this invariant values exist in mapkm1, it means that we
// have
// seen this subpath before
// - if the value for this entry is -1 it mean that we did not find a match
// for this subpath
// in the catalog i.e. we wont find a match for this order k path either
// - if the entries for all order k-1 subpaths have non-negative values -
// search the intersection
// of the down entries for these catalog entries to find a match for the
// order k path
INT_PATH_LIST_MAP_CI ordi;
double sinvar;
int entId;
for (ordi = allPathsMap.begin(); ordi != allPathsMap.end(); ordi++) {
if (ordi->first < 2) {
continue; // ignore order 1 paths - we are done with them
}
mapk.clear();
for (pi = ordi->second.begin(); pi != ordi->second.end(); pi++) {
invar = computeIntVectPrimesProduct(*pi);
mapk[invar] = -1;
auto *nent = new FragCatalogEntry(&mol, (*pi), aidToFid);
nent->setDescription(fparams);
// std::cout << "Testing 2nd order fragment: " << nent->getDescription()
// << std::endl;
int scnt = 0;
INT_VECT intersect, tmpVect;
PATH_TYPE::const_iterator pii;
// loop over the subpaths (order (k-1) ) (by ignoring one bond
// at a time from consideration) and find out which entries
// int eh catalog they correspond to and make an intersection
// of the down entries (i.e. order k entries that contain
// these order k-1 entries. - we can basically limit our
// search for an isomorphic entry in the catalog of the order
// k path from the molecule to this intersection list
for (pii = (*pi).begin(); pii != (*pi).end(); pii++) {
sinvar = invar / firstThousandPrimes[*pii];
// here is a check for "did we see this path before ?"
// this should take care of disconnected subpaths (since the
// catalog should have only connected subgraphs)
if (mapkm1.find(sinvar) == mapkm1.end()) {
continue;
}
if (mapkm1[sinvar] == -1) {
// we have seen this order k-1 pat before but we couldn't find a match
// for it in the catalog
// which mean that we wont find a match for the order k path either in
// the catalog
intersect
.clear(); // the intersection list we built so far is useless
break;
}
entId = mapkm1[sinvar];
if (scnt == 0) {
intersect = fcat.getDownEntryList(entId);
scnt++;
} else {
tmpVect = intersect;
Intersect(fcat.getDownEntryList(entId), tmpVect, intersect);
scnt++;
}
}
// now search through the intersection list to check if we already have a
// isomorphic entry in the catalog
for (auto iti : intersect) {
entry = fcat.getEntryWithIdx(iti);
if (nent->match(entry, tol)) {
mapk[invar] = iti;
bitId = entry->getBitId();
if (bitId >= 0) {
fp->setBit(bitId);
}
break;
}
}
delete nent;
}
// overwrite mapkm1 with mapk before we move on to order k+1
mapkm1 = mapk;
} // end of loop over path order
}
} // namespace RDKit