Files
rdkit/Code/GraphMol/FragCatalog/FragCatalogEntry.cpp
Ricardo Rodriguez 92d5d2c657 Refactor to stop using iterator definitions in types.h (#9275)
* clean up iterator defs in types.h

* do not use auto for inline constexpr

* restore undef max,min

* restore types.h declarations
2026-05-21 19:19:38 +02:00

247 lines
7.1 KiB
C++

//
// Copyright (C) 2003-2022 Greg Landrum and other RDKit contributors
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//
#include "FragCatalogEntry.h"
#include <RDGeneral/types.h>
#include <RDGeneral/utils.h>
#include <RDGeneral/StreamOps.h>
#include <GraphMol/RDKitBase.h>
#include <GraphMol/MolPickler.h>
#include <GraphMol/Subgraphs/SubgraphUtils.h>
#include <GraphMol/Subgraphs/Subgraphs.h>
#include <RDGeneral/hash/hash.hpp>
#include <algorithm>
#include <cstdint>
#include <cstdlib>
#include <fstream>
#include <tuple>
namespace RDKit {
FragCatalogEntry::FragCatalogEntry(const ROMol *omol, const PATH_TYPE &path,
const MatchVectType &aidToFid) {
PRECONDITION(omol, "bad mol");
// start with the assumption that this entry is not participating in
// any find of fingerprinting
d_aToFmap.clear();
setBitId(-1);
INT_MAP_INT aIdxMap; // a map from atom id in omol to the new atoms id in mol
dp_mol = Subgraphs::pathToSubmol(*omol, path, false,
aIdxMap); // Using Subgraphs functionality
d_order = path.size();
// using aIdxMap initialize the location (and their IDs) of the
// functional groups on dp_mol
for (const auto &mvtci : aidToFid) {
int oldAid = mvtci.first;
if (aIdxMap.find(oldAid) != aIdxMap.end()) {
int newAid = aIdxMap[oldAid];
if (d_aToFmap.find(newAid) != d_aToFmap.end()) {
d_aToFmap[newAid].push_back(mvtci.second);
} else {
INT_VECT tmpVect;
tmpVect.clear();
tmpVect.push_back(mvtci.second);
d_aToFmap[newAid] = tmpVect;
}
}
}
dp_props = new Dict();
d_descrip = "";
}
FragCatalogEntry::FragCatalogEntry(const std::string &pickle) {
d_aToFmap.clear();
dp_props = new Dict();
this->initFromString(pickle);
}
void FragCatalogEntry::setDescription(const FragCatParams *params) {
PRECONDITION(params, "");
for (auto &[atIdx, fGroups] : d_aToFmap) {
std::string label;
std::string temp;
const ROMol *fGroup = nullptr;
auto fGroupIdx = fGroups.cbegin();
for (unsigned int i = 0; i < fGroups.size() - 1; i++) {
fGroup = params->getFuncGroup(*fGroupIdx);
fGroup->getProp(common_properties::_Name, temp);
label += "(<" + temp + ">)";
++fGroupIdx;
}
fGroup = params->getFuncGroup(*fGroupIdx);
fGroup->getProp(common_properties::_Name, temp);
label += "<" + temp + ">";
dp_mol->getAtomWithIdx(atIdx)->setProp(
common_properties::_supplementalSmilesLabel, label);
}
d_descrip = MolToSmiles(*dp_mol);
};
bool FragCatalogEntry::match(const FragCatalogEntry *other, double tol) const {
PRECONDITION(other, "bad fragment to compare");
if (d_order != other->getOrder()) {
return false;
}
// now check if both the entries have the same number of functional groups
const INT_INT_VECT_MAP &oFgpMap = other->getFuncGroupMap();
if (oFgpMap.size() != d_aToFmap.size()) {
return false;
}
// now check if the IDs are the same
for (const auto &tfi : d_aToFmap) {
bool found = false;
for (const auto &ofi : oFgpMap) {
if (tfi.second == ofi.second) {
found = true;
break;
}
}
if (!found) {
return false;
}
}
// FIX: if might be better if we just do the balaban first and then
// move onto eigen values
Subgraphs::DiscrimTuple tdiscs, odiscs;
odiscs = other->getDiscrims();
tdiscs = this->getDiscrims();
// REVIEW: need an overload of feq that handles tuples in MolOps, or wherever
// DiscrimTuple is defined
// FIX: this may not be enough
// we may have to do the actual isomorphism mapping
return feq(std::get<0>(tdiscs), std::get<0>(odiscs), tol) &&
feq(std::get<1>(tdiscs), std::get<1>(odiscs), tol) &&
feq(std::get<2>(tdiscs), std::get<2>(odiscs), tol);
}
Subgraphs::DiscrimTuple FragCatalogEntry::getDiscrims() const {
Subgraphs::DiscrimTuple res;
if (this->hasProp(common_properties::Discrims)) {
this->getProp(common_properties::Discrims, res);
} else {
PATH_TYPE path(dp_mol->getNumBonds(), 0);
std::iota(path.begin(), path.end(), 0);
// create invariant additions to reflect the functional groups attached to
// the atoms
gboost::hash<INT_VECT> vectHasher;
std::vector<std::uint32_t> funcGpInvars(dp_mol->getNumAtoms(), 0);
for (unsigned int aid = 0; aid < dp_mol->getNumAtoms(); ++aid) {
std::uint32_t invar = 0;
auto mapPos = d_aToFmap.find(aid);
if (mapPos != d_aToFmap.end()) {
INT_VECT fGroups = mapPos->second;
std::sort(fGroups.begin(), fGroups.end());
invar = vectHasher(fGroups);
}
funcGpInvars[aid] = invar;
}
res = Subgraphs::calcPathDiscriminators(*dp_mol, path, true, &funcGpInvars);
this->setProp(common_properties::Discrims, res);
}
// std::cout << "DISCRIMS: " << d_descrip << " ";
// std::cout << res.get<0>() << " " << res.get<1>() << " " << res.get<2>();
// std::cout << std::endl;
return res;
}
void FragCatalogEntry::toStream(std::ostream &ss) const {
MolPickler::pickleMol(*dp_mol, ss);
std::int32_t tmpInt;
tmpInt = getBitId();
streamWrite(ss, tmpInt);
tmpInt = d_descrip.size();
streamWrite(ss, tmpInt);
ss.write(d_descrip.c_str(), tmpInt * sizeof(char));
tmpInt = d_order;
streamWrite(ss, tmpInt);
tmpInt = d_aToFmap.size();
streamWrite(ss, tmpInt);
for (const auto &iivmci : d_aToFmap) {
tmpInt = iivmci.first;
streamWrite(ss, tmpInt);
INT_VECT tmpVect = iivmci.second;
tmpInt = tmpVect.size();
streamWrite(ss, tmpInt);
for (auto tmpInt : tmpVect) {
streamWrite(ss, tmpInt);
}
}
}
std::string FragCatalogEntry::Serialize() const {
std::stringstream ss(std::ios_base::binary | std::ios_base::out |
std::ios_base::in);
toStream(ss);
return ss.str();
}
void FragCatalogEntry::initFromStream(std::istream &ss) {
// the molecule:
dp_mol = new ROMol();
MolPickler::molFromPickle(ss, *dp_mol);
std::int32_t tmpInt;
// the bitId:
streamRead(ss, tmpInt);
setBitId(tmpInt);
// the description:
streamRead(ss, tmpInt);
auto *tmpText = new char[tmpInt + 1];
ss.read(tmpText, tmpInt * sizeof(char));
tmpText[tmpInt] = 0;
d_descrip = tmpText;
delete[] tmpText;
streamRead(ss, tmpInt);
d_order = tmpInt;
// now the map:
streamRead(ss, tmpInt);
for (int i = 0; i < tmpInt; i++) {
std::int32_t key, value, size;
streamRead(ss, key);
streamRead(ss, size);
INT_VECT tmpVect;
tmpVect.clear();
for (int j = 0; j < size; j++) {
streamRead(ss, value);
tmpVect.push_back(value);
}
d_aToFmap[key] = tmpVect;
}
}
void FragCatalogEntry::initFromString(const std::string &text) {
std::stringstream ss(std::ios_base::binary | std::ios_base::out |
std::ios_base::in);
// initialize the stream:
ss.write(text.c_str(), text.length());
// now start reading out values:
initFromStream(ss);
}
} // namespace RDKit