mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-04 21:54:27 +08:00
184 lines
6.2 KiB
C++
184 lines
6.2 KiB
C++
//
|
|
// Copyright (C) 2003-2006 Rational Discovery LLC
|
|
//
|
|
// @@ All Rights Reserved @@
|
|
// This file is part of the RDKit.
|
|
// The contents are covered by the terms of the BSD license
|
|
// which is included in the file license.txt, found at the root
|
|
// of the RDKit source tree.
|
|
//
|
|
#include <Catalogs/Catalog.h>
|
|
#include "FragFPGenerator.h"
|
|
#include "FragCatalogEntry.h"
|
|
#include "FragCatParams.h"
|
|
#include "FragCatalogUtils.h"
|
|
|
|
#include <RDGeneral/types.h>
|
|
#include <DataStructs/BitVects.h>
|
|
#include <GraphMol/RDKitBase.h>
|
|
#include <GraphMol/Subgraphs/SubgraphUtils.h>
|
|
#include <GraphMol/Subgraphs/Subgraphs.h>
|
|
|
|
namespace RDKit {
|
|
|
|
ExplicitBitVect *FragFPGenerator::getFPForMol(const ROMol &mol,
|
|
const FragCatalog &fcat) {
|
|
INT_PATH_LIST_MAP allPaths;
|
|
|
|
auto *fp = new ExplicitBitVect(fcat.getFPLength());
|
|
const FragCatParams *fparams = fcat.getCatalogParams();
|
|
|
|
// prepare the molecule for fingerprinting
|
|
// i.e. find functional groups, remove them from the mol etc.
|
|
MatchVectType newAidToFid;
|
|
ROMol *coreMol = prepareMol(mol, fparams, newAidToFid);
|
|
|
|
computeFP(*coreMol, fcat, newAidToFid, fp);
|
|
|
|
// this was Issue294;
|
|
delete coreMol;
|
|
return fp;
|
|
}
|
|
|
|
void FragFPGenerator::computeFP(const ROMol &mol, const FragCatalog &fcat,
|
|
const MatchVectType &aidToFid,
|
|
ExplicitBitVect *fp) {
|
|
PRECONDITION(fp, "Bad ExplicitBitVect - FingerPrint");
|
|
|
|
const FragCatParams *fparams = fcat.getCatalogParams();
|
|
|
|
int uLen = fparams->getUpperFragLength();
|
|
double tol = fparams->getTolerance();
|
|
|
|
DOUBLE_INT_MAP mapkm1, mapk;
|
|
|
|
// get all the paths in the molecule mapped by their order
|
|
INT_PATH_LIST_MAP allPathsMap = findAllSubgraphsOfLengthsMtoN(mol, 1, uLen);
|
|
|
|
// first deal with order 1 stuff
|
|
PATH_LIST_CI pi;
|
|
INT_VECT_CI eti;
|
|
const INT_VECT &o1entries = fcat.getEntriesOfOrder(1);
|
|
const FragCatalogEntry *entry;
|
|
int bitId;
|
|
double invar;
|
|
for (pi = allPathsMap[1].begin(); pi != allPathsMap[1].end(); pi++) {
|
|
// std::cout << "-*-*-* Fragment *-*-*-*-" << std::endl;
|
|
auto *nent = new FragCatalogEntry(&mol, (*pi), aidToFid);
|
|
nent->setDescription(fparams);
|
|
invar = computeIntVectPrimesProduct(*pi);
|
|
// ok here is the plan - initialize the entry for this path in mapkm1 to -1
|
|
// which will be overwritten to the correct entry id in the catalog if we
|
|
// find
|
|
// a match. This -1 initialization will be useful when we move onto higher
|
|
// order stuff
|
|
mapkm1[invar] = -1;
|
|
for (eti = o1entries.begin(); eti != o1entries.end(); eti++) {
|
|
entry = fcat.getEntryWithIdx(*eti);
|
|
if (nent->match(entry, tol)) {
|
|
bitId = entry->getBitId();
|
|
if (bitId >= 0) {
|
|
fp->setBit(bitId);
|
|
}
|
|
mapkm1[invar] = (*eti);
|
|
break;
|
|
}
|
|
}
|
|
delete nent;
|
|
}
|
|
|
|
// now deal with the higher order stuff.
|
|
// - for each higher order path obtain the invariants for order k-1 subpaths
|
|
// - if entries for this invariant values exist in mapkm1, it means that we
|
|
// have
|
|
// seen this subpath before
|
|
// - if the value for this entry is -1 it mean that we did not find a match
|
|
// for this subpath
|
|
// in the catalog i.e. we wont find a match for this order k path either
|
|
// - if the entries for all order k-1 subpaths have non-negative values -
|
|
// search the intersection
|
|
// of the down entries for these catalog entries to find a match for the
|
|
// order k path
|
|
INT_PATH_LIST_MAP_CI ordi;
|
|
double sinvar;
|
|
int entId;
|
|
|
|
for (ordi = allPathsMap.begin(); ordi != allPathsMap.end(); ordi++) {
|
|
if (ordi->first < 2) {
|
|
continue; // ignore order 1 paths - we are done with them
|
|
}
|
|
mapk.clear();
|
|
|
|
for (pi = ordi->second.begin(); pi != ordi->second.end(); pi++) {
|
|
invar = computeIntVectPrimesProduct(*pi);
|
|
mapk[invar] = -1;
|
|
auto *nent = new FragCatalogEntry(&mol, (*pi), aidToFid);
|
|
nent->setDescription(fparams);
|
|
// std::cout << "Testing 2nd order fragment: " << nent->getDescription()
|
|
// << std::endl;
|
|
int scnt = 0;
|
|
INT_VECT intersect, tmpVect;
|
|
INT_VECT_CI iti;
|
|
PATH_TYPE::const_iterator pii;
|
|
|
|
// loop over the subpaths (order (k-1) ) (by ignoring one bond
|
|
// at a time from consideration) and find out which entries
|
|
// int eh catalog they correspond to and make an intersection
|
|
// of the down entries (i.e. order k entries that contain
|
|
// these order k-1 entries. - we can basically limit our
|
|
// search for an isomorphic entry in the catalog of the order
|
|
// k path from the molecule to this intersection list
|
|
for (pii = (*pi).begin(); pii != (*pi).end(); pii++) {
|
|
sinvar = invar / firstThousandPrimes[*pii];
|
|
|
|
// here is a check for "did we see this path before ?"
|
|
// this should take care of disconnected subpaths (since the
|
|
// catalog should have only connected subgraphs)
|
|
if (mapkm1.find(sinvar) == mapkm1.end()) {
|
|
continue;
|
|
}
|
|
|
|
if (mapkm1[sinvar] == -1) {
|
|
// we have seen this order k-1 pat before but we couldn't find a match
|
|
// for it in the catalog
|
|
// which mean that we wont find a match for the order k path either in
|
|
// the catalog
|
|
intersect
|
|
.clear(); // the intersection list we built so far is useless
|
|
break;
|
|
}
|
|
|
|
entId = mapkm1[sinvar];
|
|
if (scnt == 0) {
|
|
intersect = fcat.getDownEntryList(entId);
|
|
scnt++;
|
|
} else {
|
|
tmpVect = intersect;
|
|
Intersect(fcat.getDownEntryList(entId), tmpVect, intersect);
|
|
scnt++;
|
|
}
|
|
}
|
|
// now search through the intersection list to check if we already have a
|
|
// isomorphic
|
|
// entry in the catalog
|
|
for (iti = intersect.begin(); iti != intersect.end(); iti++) {
|
|
entry = fcat.getEntryWithIdx(*iti);
|
|
if (nent->match(entry, tol)) {
|
|
mapk[invar] = (*iti);
|
|
bitId = entry->getBitId();
|
|
if (bitId >= 0) {
|
|
fp->setBit(bitId);
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
delete nent;
|
|
}
|
|
|
|
// overwrite mapkm1 with mapk before we move on to order k+1
|
|
mapkm1 = mapk;
|
|
} // end of loop over path order
|
|
|
|
}
|
|
} // namespace RDKit
|