mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-04 21:54:27 +08:00
* un/signed comparisons in molops * reference in loop in EmbeddedFrag.cpp * un/signed comparison in filtercatalogtest.cpp * un/signed comparison in Descriptors/catch_tests.cpp * un/signed comparisons in DetermineBonds.cpp * drop unused 'params' arg from static functions in MolInterchange/Writer.cpp * un/signed comparisons in MolDraw2D/catch_tests.cpp * unused var in SubgraphUtils.cpp * 'move' preventing copy ellision in CDXMLParser.cpp * fix infinite recursive call in SubstructLibraryWrap.cpp
243 lines
8.3 KiB
C++
243 lines
8.3 KiB
C++
// $Id$
|
|
//
|
|
// Copyright (C) 2003-2013 Greg Landrum and Rational Discovery LLC
|
|
//
|
|
// @@ All Rights Reserved @@
|
|
// This file is part of the RDKit.
|
|
// The contents are covered by the terms of the BSD license
|
|
// which is included in the file license.txt, found at the root
|
|
// of the RDKit source tree.
|
|
//
|
|
#include "SubgraphUtils.h"
|
|
#include "Subgraphs.h"
|
|
#include <RDGeneral/utils.h>
|
|
#include <GraphMol/RDKitBase.h>
|
|
#include <GraphMol/QueryAtom.h>
|
|
#include <GraphMol/QueryBond.h>
|
|
#include <iostream>
|
|
#include <algorithm>
|
|
#include <map>
|
|
#include <RDGeneral/hash/hash.hpp>
|
|
|
|
namespace RDKit {
|
|
namespace Subgraphs {
|
|
ROMol *pathToSubmol(const ROMol &mol, const PATH_TYPE &path, bool useQuery) {
|
|
INT_MAP_INT aIdxMap;
|
|
return pathToSubmol(mol, path, useQuery, aIdxMap);
|
|
}
|
|
|
|
ROMol *pathToSubmol(const ROMol &mol, const PATH_TYPE &path, bool useQuery,
|
|
INT_MAP_INT &atomIdxMap) {
|
|
auto *subMol = new RWMol();
|
|
// path needs to be in sorted order to preserve chirality
|
|
PATH_TYPE sorted_path(path);
|
|
std::sort(sorted_path.begin(), sorted_path.end());
|
|
|
|
atomIdxMap.clear();
|
|
|
|
if (useQuery) {
|
|
// have to do this in two different blocks because of issues with variable
|
|
// scopes.
|
|
for (auto bondidx : sorted_path) {
|
|
auto *bond = new QueryBond(*(mol.getBondWithIdx(bondidx)));
|
|
|
|
int begIdx = bond->getBeginAtomIdx();
|
|
int endIdx = bond->getEndAtomIdx();
|
|
|
|
if (atomIdxMap.find(begIdx) == atomIdxMap.end()) {
|
|
auto *atom = new QueryAtom(*(mol.getAtomWithIdx(begIdx)));
|
|
int newAtomIdx = subMol->addAtom(atom, false, true);
|
|
atomIdxMap[begIdx] = newAtomIdx;
|
|
}
|
|
begIdx = atomIdxMap.find(begIdx)->second;
|
|
if (atomIdxMap.find(endIdx) == atomIdxMap.end()) {
|
|
auto *atom = new QueryAtom(*(mol.getAtomWithIdx(endIdx)));
|
|
int newAtomIdx = subMol->addAtom(atom, false, true);
|
|
atomIdxMap[endIdx] = newAtomIdx;
|
|
}
|
|
endIdx = atomIdxMap.find(endIdx)->second;
|
|
|
|
bond->setOwningMol(subMol);
|
|
bond->setBeginAtomIdx(begIdx);
|
|
bond->setEndAtomIdx(endIdx);
|
|
subMol->addBond(bond, true);
|
|
}
|
|
} else {
|
|
for (auto bondidx : sorted_path) {
|
|
Bond *bond = mol.getBondWithIdx(bondidx)->copy();
|
|
|
|
int begIdx = bond->getBeginAtomIdx();
|
|
int endIdx = bond->getEndAtomIdx();
|
|
|
|
if (atomIdxMap.find(begIdx) == atomIdxMap.end()) {
|
|
Atom *atom = mol.getAtomWithIdx(begIdx)->copy();
|
|
int newAtomIdx = subMol->addAtom(atom, false, true);
|
|
atomIdxMap[begIdx] = newAtomIdx;
|
|
}
|
|
begIdx = atomIdxMap.find(begIdx)->second;
|
|
if (atomIdxMap.find(endIdx) == atomIdxMap.end()) {
|
|
Atom *atom = mol.getAtomWithIdx(endIdx)->copy();
|
|
int newAtomIdx = subMol->addAtom(atom, false, true);
|
|
atomIdxMap[endIdx] = newAtomIdx;
|
|
}
|
|
endIdx = atomIdxMap.find(endIdx)->second;
|
|
|
|
bond->setOwningMol(subMol);
|
|
bond->setBeginAtomIdx(begIdx);
|
|
bond->setEndAtomIdx(endIdx);
|
|
subMol->addBond(bond, true);
|
|
}
|
|
}
|
|
if (mol.getNumConformers()) {
|
|
// copy coordinates over:
|
|
for (auto confIt = mol.beginConformers(); confIt != mol.endConformers();
|
|
++confIt) {
|
|
auto *conf = new Conformer(subMol->getNumAtoms());
|
|
conf->set3D((*confIt)->is3D());
|
|
for (INT_MAP_INT::const_iterator mapIt = atomIdxMap.begin();
|
|
mapIt != atomIdxMap.end(); ++mapIt) {
|
|
conf->setAtomPos(mapIt->second, (*confIt)->getAtomPos(mapIt->first));
|
|
}
|
|
conf->setId((*confIt)->getId());
|
|
subMol->addConformer(conf, false);
|
|
}
|
|
}
|
|
// clear computed properties
|
|
subMol->clearComputedProps(true);
|
|
|
|
return subMol;
|
|
}
|
|
|
|
PATH_TYPE bondListFromAtomList(const ROMol &mol, const PATH_TYPE &atomIds) {
|
|
PATH_TYPE bids;
|
|
unsigned int natms = atomIds.size();
|
|
if (natms <= 1) {
|
|
return bids; // FIX: should probably throw an exception
|
|
}
|
|
for (unsigned int i = 0; i < natms; i++) {
|
|
for (unsigned int j = i + 1; j < natms; j++) {
|
|
const Bond *bnd = mol.getBondBetweenAtoms(atomIds[i], atomIds[j]);
|
|
if (bnd) {
|
|
int bid = bnd->getIdx();
|
|
bids.push_back(bid);
|
|
}
|
|
}
|
|
}
|
|
return bids;
|
|
}
|
|
|
|
using std::int32_t;
|
|
using std::uint32_t;
|
|
DiscrimTuple calcPathDiscriminators(const ROMol &mol, const PATH_TYPE &path,
|
|
bool useBO,
|
|
std::vector<std::uint32_t> *extraInvars) {
|
|
if (extraInvars) {
|
|
CHECK_INVARIANT(extraInvars->size() == mol.getNumAtoms(),
|
|
"bad extra invars");
|
|
}
|
|
|
|
// Start by collecting the atoms in the path and their degrees
|
|
std::vector<int32_t> atomsUsed(mol.getNumAtoms(),
|
|
-1); // map from atom index->path index
|
|
std::vector<const Atom *> atoms; // to contain the atoms in the path
|
|
std::vector<uint32_t> pathDegrees; // degrees of each atom *in the path*
|
|
for (int pathIter : path) {
|
|
const Bond *bond = mol.getBondWithIdx(pathIter);
|
|
if (atomsUsed[bond->getBeginAtomIdx()] < 0) {
|
|
atomsUsed[bond->getBeginAtomIdx()] = static_cast<int>(atoms.size());
|
|
atoms.push_back(bond->getBeginAtom());
|
|
pathDegrees.push_back(1);
|
|
} else {
|
|
pathDegrees[atomsUsed[bond->getBeginAtomIdx()]] += 1;
|
|
}
|
|
if (atomsUsed[bond->getEndAtomIdx()] < 0) {
|
|
atomsUsed[bond->getEndAtomIdx()] = static_cast<int>(atoms.size());
|
|
atoms.push_back(bond->getEndAtom());
|
|
pathDegrees.push_back(1);
|
|
} else {
|
|
pathDegrees[atomsUsed[bond->getEndAtomIdx()]] += 1;
|
|
}
|
|
}
|
|
|
|
// Calculate the atomic invariants
|
|
unsigned int nAtoms = atoms.size();
|
|
std::vector<uint32_t> invars(nAtoms);
|
|
for (unsigned int i = 0; i < nAtoms; ++i) {
|
|
const Atom *atom = atoms[i];
|
|
uint32_t invar = atom->getAtomicNum();
|
|
gboost::hash_combine(invar, pathDegrees[i]);
|
|
gboost::hash_combine(invar, atom->getFormalCharge());
|
|
int deltaMass = static_cast<int>(
|
|
atom->getMass() -
|
|
PeriodicTable::getTable()->getAtomicWeight(atom->getAtomicNum()));
|
|
gboost::hash_combine(invar, deltaMass);
|
|
if (atom->getIsAromatic()) {
|
|
gboost::hash_combine(invar, 1);
|
|
}
|
|
if (extraInvars) {
|
|
gboost::hash_combine(invar, (*extraInvars)[atom->getIdx()]);
|
|
}
|
|
invars[i] = invar;
|
|
}
|
|
|
|
// now do the Morgan iterations:
|
|
// the most number of cycles we need for the atoms on the edges
|
|
// to feel each other is pathSize/2
|
|
// EFF: it may be worth revisiting this at some point to see
|
|
// if the iteration count can be even smaller (and if it
|
|
// makes a difference in runtime)
|
|
unsigned int nCycles = path.size() / 2 + 1;
|
|
gboost::hash<std::vector<uint32_t>> vectHasher;
|
|
for (unsigned int cycle = 0; cycle < nCycles; ++cycle) {
|
|
// let each atom feel it's neighbors:
|
|
std::vector<std::vector<uint32_t>> locInvars(nAtoms);
|
|
for (int pathIter : path) {
|
|
const Bond *bond = mol.getBondWithIdx(pathIter);
|
|
uint32_t v1 = invars[atomsUsed[bond->getBeginAtomIdx()]];
|
|
uint32_t v2 = invars[atomsUsed[bond->getEndAtomIdx()]];
|
|
if (useBO) {
|
|
gboost::hash_combine(v1, static_cast<uint32_t>(bond->getBondType()));
|
|
gboost::hash_combine(v2, static_cast<uint32_t>(bond->getBondType()));
|
|
}
|
|
locInvars[atomsUsed[bond->getBeginAtomIdx()]].push_back(v2);
|
|
locInvars[atomsUsed[bond->getEndAtomIdx()]].push_back(v1);
|
|
}
|
|
// we need to sort by the neighbor invariants to be order
|
|
// independent:
|
|
for (unsigned int i = 0; i < nAtoms; ++i) {
|
|
std::sort(locInvars[i].begin(), locInvars[i].end());
|
|
invars[i] = vectHasher(locInvars[i]);
|
|
}
|
|
}
|
|
|
|
// again, a sort for order independence:
|
|
std::sort(invars.begin(), invars.end());
|
|
uint32_t pathInvar = vectHasher(invars);
|
|
|
|
// also include the path size (bond count) and number of atoms
|
|
// in the discriminator
|
|
return std::make_tuple(pathInvar, path.size(), nAtoms);
|
|
}
|
|
|
|
//
|
|
// This is intended for use on either subgraphs or paths.
|
|
// The entries in PATH_LIST should refer to bonds though (not
|
|
// atoms)
|
|
//
|
|
PATH_LIST uniquifyPaths(const ROMol &mol, const PATH_LIST &allPaths,
|
|
bool useBO) {
|
|
PATH_LIST res;
|
|
std::vector<DiscrimTuple> discrimsSeen;
|
|
for (const auto &allPath : allPaths) {
|
|
DiscrimTuple discrims = calcPathDiscriminators(mol, allPath, useBO);
|
|
if (std::find(discrimsSeen.begin(), discrimsSeen.end(), discrims) ==
|
|
discrimsSeen.end()) {
|
|
discrimsSeen.push_back(discrims);
|
|
res.push_back(allPath);
|
|
}
|
|
}
|
|
return res;
|
|
}
|
|
} // end of namespace Subgraphs
|
|
} // end of namespace RDKit
|