Files
rdkit/Code/GraphMol/new_canon.cpp
tadhurst-cdd 0de215a1f8 Fix canonicalization of stereogroups (#7041)
* atropisomer handling added

* fixed non-used variables,  linking directives

* BOOST LIB start/stop fixes, linking fix

* Fixes for RDKIT CI errors

* minimalLib fix

* changed vector<enum> for java builds

* check for extra chars in CIP labeling

* removed wrong deprecated message

* fix ostrstream output error?

* restored _ChiralAtomRank to lowercase first letter

* changes for merged master

* Fixed catch label for new Catch package

* update expected psql results

* get swig wrappers building

* restore MolFileStereochem to FileParsers

* fix java wrapper for reapplyMolBlockWedging

* test changes

* some suggestions

* move a couple functions out of Bond

* Merge branch 'master' into pr/atropisomers2

* merged master

* Renamed setStereoanyFromSquiggleBond

* atropisomers in cdxml, rationalize atrop wedging, stereoGroups in drawMol

* Merge branch 'master' into pr/specialQueries

* changes from previous PR

* Iclude false chiral

* rigorous enhnced stereo canoncalization

* Added more tests and clenup

* removed commented out code

* corrected init of SmilesWriteParams

* added MolFileStereoChem.h to the header files

* Renamed Rxn parser to MrvBlockToChemicalReaction

* To make catch2 work, and match the checksum

* Fixed Structchecker errors

* fix CI for DetermineBonds catch test

* error in catch_test for CI

* Allow custom  smileWriteParams  in GetMolLayers

* misnamed entry point

* ReactionFromMrvString change name

* remove adding writeParams to GetMolLayers

* make rigorous enhanced stereo the default, and fix tests

* only one abs group no longer needs Rigorous Enhanced treatment

* changed string_view to string in catch test

* Canonicalize Enhnaced Stereo only resturne unique smiles

* Now allows or and and groups together

* internal routines inside detail scope

* fix test error

* changed string back to string_view and fixed a CHECK

* Fixes for PR review tests

* Fix RDKit_Book.rst failure on build test

* fix xqm sql test

* updated expected files for cxsmiles_test

* Fixed removal of atom attrs

* Fixed tests after merge of master

* More efficient version of Stereo Groups Canonicalization

* Fixes for ctests

* removed debug code

* readded cipLabel test

* fix generalizedSubstruct/catch_tests.cpp error

* hueristics to improve speed

* Rationaized control of abs groups

* removed unused routine

* added rigorous stereo group treatment to test

* some suggested changes

* Changes per PR review and removed some changes to smiles

* Fixed CI errors

* changes per PR review

* more PR review vhanges and cleanup

* Fixed PSql PKL change

* changes as per PR review

* Restored error type for bad mols for canonicalizeStereoGroups and added a test

* Merge master and fix test in MolDraw2D

* Fix for randomize test error and other PR review comments

* Removed unsued variable to fix mac CI

* do not force aromatization in canonicalizeStereoGroups

* changes as per PR review

---------

Co-authored-by: greg landrum <greg.landrum@gmail.com>
2024-10-11 17:09:18 +02:00

876 lines
30 KiB
C++

//
// Copyright (C) 2014 Greg Landrum
// Adapted from pseudo-code from Roger Sayle
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//
#include "new_canon.h"
#include <GraphMol/RDKitBase.h>
#include <GraphMol/QueryOps.h>
#include <GraphMol/Atropisomers.h>
#include <cstdint>
#include <cstring>
#include <iostream>
#include <cassert>
// #define VERBOSE_CANON 1
namespace RDKit {
namespace Canon {
namespace {
void flipIfNeeded(Bond::BondStereo &st1,
const canon_atom *const *controllingAtoms) {
CHECK_INVARIANT(controllingAtoms[0], "missing controlling atom");
CHECK_INVARIANT(controllingAtoms[2], "missing controlling atom");
bool flip = false;
if (controllingAtoms[1] &&
controllingAtoms[1]->index > controllingAtoms[0]->index) {
flip = !flip;
}
if (controllingAtoms[3] &&
controllingAtoms[3]->index > controllingAtoms[2]->index) {
flip = !flip;
}
if (flip) {
if (st1 == Bond::BondStereo::STEREOCIS) {
st1 = Bond::BondStereo::STEREOTRANS;
} else if (st1 == Bond::BondStereo::STEREOTRANS) {
st1 = Bond::BondStereo::STEREOCIS;
}
}
}
} // namespace
int bondholder::compareStereo(const bondholder &o) const {
auto st1 = stype;
auto st2 = o.stype;
if (st1 == Bond::BondStereo::STEREONONE) {
if (st2 == Bond::BondStereo::STEREONONE) {
return 0;
} else {
return -1;
}
}
if (st2 == Bond::BondStereo::STEREONONE) {
return 1;
}
if (st1 == Bond::BondStereo::STEREOANY) {
if (st2 == Bond::BondStereo::STEREOANY) {
return 0;
} else {
return -1;
}
}
if (st2 == Bond::BondStereo::STEREOANY) {
return 1;
}
// we have some kind of specified stereo on both bonds, work is required
// if both have absolute stereo labels we can compare them directly
if ((st1 == Bond::BondStereo::STEREOE || st1 == Bond::BondStereo::STEREOZ) &&
(st2 == Bond::BondStereo::STEREOE || st2 == Bond::BondStereo::STEREOZ)) {
if (st1 < st2) {
return -1;
} else if (st1 > st2) {
return 1;
}
return 0;
}
// check to see if we need to flip the controlling atoms due to atom ranks
flipIfNeeded(st1, controllingAtoms);
flipIfNeeded(st2, o.controllingAtoms);
if (st1 < st2) {
return -1;
} else if (st1 > st2) {
return 1;
}
return 0;
}
void CreateSinglePartition(unsigned int nAtoms, int *order, int *count,
canon_atom *atoms) {
PRECONDITION(order, "bad pointer");
PRECONDITION(count, "bad pointer");
PRECONDITION(atoms, "bad pointer");
for (unsigned int i = 0; i < nAtoms; i++) {
atoms[i].index = 0;
order[i] = i;
count[i] = 0;
}
count[0] = nAtoms;
}
void ActivatePartitions(unsigned int nAtoms, int *order, int *count,
int &activeset, int *next, int *changed) {
PRECONDITION(order, "bad pointer");
PRECONDITION(count, "bad pointer");
PRECONDITION(next, "bad pointer");
PRECONDITION(changed, "bad pointer");
unsigned int i, j;
activeset = -1;
for (i = 0; i < nAtoms; i++) {
next[i] = -2;
}
i = 0;
do {
j = order[i];
if (count[j] > 1) {
next[j] = activeset;
activeset = j;
i += count[j];
} else {
i++;
}
} while (i < nAtoms);
for (i = 0; i < nAtoms; i++) {
j = order[i];
int flag = 1;
// #define SKIP_NODE_CHANGED_OPTIMIZATION 0
// #ifndef SKIP_NODE_CHANGED_OPTIMIZATION
// if(count[j]){
// std::cout << "j " << j << std::endl;
// flag=(next[j]!=-2);
// }
// #endif
changed[j] = flag;
}
}
void compareRingAtomsConcerningNumNeighbors(Canon::canon_atom *atoms,
unsigned int nAtoms,
const ROMol &mol) {
PRECONDITION(atoms, "bad pointer");
RingInfo *ringInfo = mol.getRingInfo();
auto visited = std::make_unique<char[]>(nAtoms);
auto lastLevelNbrs = std::make_unique<char[]>(nAtoms);
auto currentLevelNbrs = std::make_unique<char[]>(nAtoms);
auto revisitedNeighbors = std::make_unique<int[]>(nAtoms);
for (unsigned idx = 0; idx < nAtoms; ++idx) {
const Canon::canon_atom &a = atoms[idx];
if (!ringInfo->isInitialized() ||
ringInfo->numAtomRings(a.atom->getIdx()) < 1) {
continue;
}
std::deque<int> neighbors;
neighbors.push_back(idx);
unsigned currentRNIdx = 0;
atoms[idx].neighborNum.reserve(1000);
atoms[idx].revistedNeighbors.assign(1000, 0);
memset(visited.get(), 0, nAtoms * sizeof(char));
memset(lastLevelNbrs.get(), 0, nAtoms * sizeof(char));
memset(currentLevelNbrs.get(), 0, nAtoms * sizeof(char));
memset(revisitedNeighbors.get(), 0, nAtoms * sizeof(int));
std::vector<int> nextLevelNbrs;
while (!neighbors.empty()) {
unsigned int numLevelNbrs = 0;
nextLevelNbrs.resize(0);
while (!neighbors.empty()) {
int nidx = neighbors.front();
neighbors.pop_front();
const Canon::canon_atom &atom = atoms[nidx];
if (!ringInfo->isInitialized() ||
ringInfo->numAtomRings(atom.atom->getIdx()) < 1) {
continue;
}
lastLevelNbrs[nidx] = 1;
visited[nidx] = 1;
for (unsigned int j = 0; j < atom.degree; j++) {
int iidx = atom.nbrIds[j];
if (!visited[iidx]) {
currentLevelNbrs[iidx] = 1;
numLevelNbrs++;
visited[iidx] = 1;
nextLevelNbrs.push_back(iidx);
}
}
}
for (unsigned i = 0; i < nAtoms; ++i) {
if (currentLevelNbrs[i]) {
const Canon::canon_atom &natom = atoms[i];
for (unsigned int k = 0; k < natom.degree; k++) {
int jidx = natom.nbrIds[k];
if (currentLevelNbrs[jidx] || lastLevelNbrs[jidx]) {
revisitedNeighbors[jidx] += 1;
}
}
}
}
memset(lastLevelNbrs.get(), 0, nAtoms * sizeof(char));
for (unsigned i = 0; i < nAtoms; ++i) {
if (currentLevelNbrs[i]) {
lastLevelNbrs[i] = 1;
}
}
memset(currentLevelNbrs.get(), 0, nAtoms * sizeof(char));
std::vector<int> tmp;
tmp.reserve(30);
for (unsigned i = 0; i < nAtoms; ++i) {
if (revisitedNeighbors[i] > 0) {
tmp.push_back(revisitedNeighbors[i]);
}
}
std::sort(tmp.begin(), tmp.end());
tmp.push_back(-1);
for (int i : tmp) {
if (currentRNIdx >= atoms[idx].revistedNeighbors.size()) {
atoms[idx].revistedNeighbors.resize(
atoms[idx].revistedNeighbors.size() + 1000);
}
atoms[idx].revistedNeighbors[currentRNIdx] = i;
currentRNIdx++;
}
memset(revisitedNeighbors.get(), 0, nAtoms * sizeof(int));
atoms[idx].neighborNum.push_back(numLevelNbrs);
atoms[idx].neighborNum.push_back(-1);
neighbors.insert(neighbors.end(), nextLevelNbrs.begin(),
nextLevelNbrs.end());
}
atoms[idx].revistedNeighbors.resize(currentRNIdx);
}
}
namespace detail {
template <typename T>
void rankWithFunctor(T &ftor, bool breakTies, int *order, bool useSpecial,
bool useChirality,
const boost::dynamic_bitset<> *atomsInPlay,
const boost::dynamic_bitset<> *bondsInPlay) {
PRECONDITION(order, "bad pointer");
const ROMol &mol = *ftor.dp_mol;
canon_atom *atoms = ftor.dp_atoms;
unsigned int nAts = mol.getNumAtoms();
// auto order = std::make_unique<int[]>(mol.getNumAtoms());
auto count = std::make_unique<int[]>(nAts);
auto next = std::make_unique<int[]>(nAts);
auto changed = std::make_unique<int[]>(nAts);
memset(changed.get(), 1, nAts * sizeof(int));
auto touched = std::make_unique<char[]>(nAts);
memset(touched.get(), 0, nAts * sizeof(char));
int activeset;
CreateSinglePartition(nAts, order, count.get(), atoms);
// ActivatePartitions(nAts,order,count,activeset,next,changed);
// RefinePartitions(mol,atoms,ftor,false,order,count,activeset,next,changed,touched);
#ifdef VERBOSE_CANON
std::cerr << "1--------" << std::endl;
for (unsigned int i = 0; i < mol.getNumAtoms(); ++i) {
std::cerr << order[i] + 1 << " " << " index: " << atoms[order[i]].index
<< " count: " << count[order[i]] << std::endl;
}
#endif
ftor.df_useNbrs = true;
ActivatePartitions(nAts, order, count.get(), activeset, next.get(),
changed.get());
#ifdef VERBOSE_CANON
std::cerr << "1a--------" << std::endl;
for (unsigned int i = 0; i < mol.getNumAtoms(); ++i) {
std::cerr << order[i] + 1 << " " << " index: " << atoms[order[i]].index
<< " count: " << count[order[i]] << std::endl;
}
#endif
RefinePartitions(mol, atoms, ftor, true, order, count.get(), activeset,
next.get(), changed.get(), touched.get());
#ifdef VERBOSE_CANON
std::cerr << "2--------" << std::endl;
for (unsigned int i = 0; i < mol.getNumAtoms(); ++i) {
std::cerr << order[i] + 1 << " " << " index: " << atoms[order[i]].index
<< " count: " << count[order[i]] << std::endl;
}
#endif
bool ties = false;
for (unsigned i = 0; i < nAts; ++i) {
if (!count[i]) {
ties = true;
}
}
if (useChirality && ties) {
SpecialChiralityAtomCompareFunctor scftor(atoms, mol, atomsInPlay,
bondsInPlay);
ActivatePartitions(nAts, order, count.get(), activeset, next.get(),
changed.get());
RefinePartitions(mol, atoms, scftor, true, order, count.get(), activeset,
next.get(), changed.get(), touched.get());
#ifdef VERBOSE_CANON
std::cerr << "2a--------" << std::endl;
for (unsigned int i = 0; i < mol.getNumAtoms(); ++i) {
std::cerr << order[i] + 1 << " " << " index: " << atoms[order[i]].index
<< " count: " << count[order[i]] << std::endl;
}
#endif
}
ties = false;
unsigned symRingAtoms = 0;
unsigned ringAtoms = 0;
bool branchingRingAtom = false;
RingInfo *ringInfo = mol.getRingInfo();
for (unsigned i = 0; i < nAts; ++i) {
if (ringInfo->isInitialized() && ringInfo->numAtomRings(order[i])) {
if (count[order[i]] > 2) {
symRingAtoms += count[order[i]];
}
ringAtoms++;
if (ringInfo->isInitialized() && ringInfo->numAtomRings(order[i]) > 1 &&
count[order[i]] > 1) {
branchingRingAtom = true;
}
}
if (!count[i]) {
ties = true;
}
}
// std::cout << " " << ringAtoms << " " << symRingAtoms << std::endl;
if (useSpecial && ties && ringAtoms > 0 &&
static_cast<float>(symRingAtoms) / ringAtoms > 0.5 && branchingRingAtom) {
SpecialSymmetryAtomCompareFunctor sftor(atoms, mol, atomsInPlay,
bondsInPlay);
compareRingAtomsConcerningNumNeighbors(atoms, nAts, mol);
ActivatePartitions(nAts, order, count.get(), activeset, next.get(),
changed.get());
RefinePartitions(mol, atoms, sftor, true, order, count.get(), activeset,
next.get(), changed.get(), touched.get());
#ifdef VERBOSE_CANON
std::cerr << "2b--------" << std::endl;
for (unsigned int i = 0; i < mol.getNumAtoms(); ++i) {
std::cerr << order[i] + 1 << " " << " index: " << atoms[order[i]].index
<< " count: " << count[order[i]] << std::endl;
}
#endif
}
if (breakTies) {
BreakTies(mol, atoms, ftor, true, order, count.get(), activeset, next.get(),
changed.get(), touched.get());
#ifdef VERBOSE_CANON
std::cerr << "3--------" << std::endl;
for (unsigned int i = 0; i < mol.getNumAtoms(); ++i) {
std::cerr << order[i] + 1 << " " << " index: " << atoms[order[i]].index
<< " count: " << count[order[i]] << std::endl;
}
#endif
}
}
} // namespace detail
namespace {
bool hasRingNbr(const ROMol &mol, const Atom *at) {
PRECONDITION(at, "bad pointer");
for (const auto nbr : mol.atomNeighbors(at)) {
if ((nbr->getChiralTag() == Atom::CHI_TETRAHEDRAL_CW ||
nbr->getChiralTag() == Atom::CHI_TETRAHEDRAL_CCW) &&
nbr->hasProp(common_properties::_ringStereoAtoms)) {
return true;
}
}
return false;
}
void getNbrs(const ROMol &mol, const Atom *at, int *ids) {
PRECONDITION(at, "bad pointer");
PRECONDITION(ids, "bad pointer");
ROMol::ADJ_ITER beg, end;
boost::tie(beg, end) = mol.getAtomNeighbors(at);
unsigned int idx = 0;
while (beg != end) {
ids[idx++] = static_cast<int>(*beg++);
}
}
bondholder makeBondHolder(const Bond *bond, unsigned int otherIdx,
bool includeChirality,
const std::vector<Canon::canon_atom> &atoms) {
PRECONDITION(bond, "bad pointer");
Bond::BondStereo stereo = Bond::STEREONONE;
if (includeChirality) {
stereo = bond->getStereo();
}
Bond::BondType bt =
bond->getIsAromatic() ? Bond::AROMATIC : bond->getBondType();
bondholder res(bt, stereo, otherIdx, 0, bond->getIdx());
if (includeChirality) {
res.stype = bond->getStereo();
if (res.stype == Bond::BondStereo::STEREOCIS ||
res.stype == Bond::BondStereo::STEREOTRANS) {
res.controllingAtoms[0] = &atoms[bond->getStereoAtoms()[0]];
res.controllingAtoms[2] = &atoms[bond->getStereoAtoms()[1]];
if (bond->getBeginAtom()->getDegree() > 2) {
for (const auto nbr :
bond->getOwningMol().atomNeighbors(bond->getBeginAtom())) {
if (nbr->getIdx() != bond->getEndAtomIdx() &&
nbr->getIdx() !=
static_cast<unsigned int>(bond->getStereoAtoms()[0])) {
res.controllingAtoms[1] = &atoms[nbr->getIdx()];
}
}
}
if (bond->getEndAtom()->getDegree() > 2) {
for (const auto nbr :
bond->getOwningMol().atomNeighbors(bond->getEndAtom())) {
if (nbr->getIdx() != bond->getBeginAtomIdx() &&
nbr->getIdx() !=
static_cast<unsigned int>(bond->getStereoAtoms()[1])) {
res.controllingAtoms[3] = &atoms[nbr->getIdx()];
}
}
}
}
if (res.stype == Bond::BondStereo::STEREOATROPCCW ||
res.stype == Bond::BondStereo::STEREOATROPCW) {
Atropisomers::AtropAtomAndBondVec atropAtomAndBondVecs[2];
CHECK_INVARIANT(Atropisomers::getAtropisomerAtomsAndBonds(
bond, atropAtomAndBondVecs, bond->getOwningMol()),
"Could not find atropisomer controlling atoms")
res.controllingAtoms[0] =
&atoms[atropAtomAndBondVecs[0]
.second[0]
->getOtherAtom(atropAtomAndBondVecs[0].first)
->getIdx()];
res.controllingAtoms[2] =
&atoms[atropAtomAndBondVecs[1]
.second[0]
->getOtherAtom(atropAtomAndBondVecs[1].first)
->getIdx()];
if (atropAtomAndBondVecs[0].second.size() > 1) {
res.controllingAtoms[1] =
&atoms[atropAtomAndBondVecs[0]
.second[1]
->getOtherAtom(atropAtomAndBondVecs[0].first)
->getIdx()];
}
if (atropAtomAndBondVecs[1].second.size() > 1) {
res.controllingAtoms[3] =
&atoms[atropAtomAndBondVecs[1]
.second[1]
->getOtherAtom(atropAtomAndBondVecs[1].first)
->getIdx()];
}
}
}
return res;
}
void getBonds(const ROMol &mol, const Atom *at, std::vector<bondholder> &nbrs,
bool includeChirality,
const std::vector<Canon::canon_atom> &atoms) {
PRECONDITION(at, "bad pointer");
ROMol::OEDGE_ITER beg, end;
boost::tie(beg, end) = mol.getAtomBonds(at);
while (beg != end) {
const Bond *bond = (mol)[*beg];
++beg;
nbrs.push_back(makeBondHolder(bond, bond->getOtherAtomIdx(at->getIdx()),
includeChirality, atoms));
}
std::sort(nbrs.begin(), nbrs.end(), bondholder::greater);
}
void getChiralBonds(const ROMol &mol, const Atom *at,
std::vector<bondholder> &nbrs) {
PRECONDITION(at, "bad pointer");
ROMol::OEDGE_ITER beg, end;
boost::tie(beg, end) = mol.getAtomBonds(at);
while (beg != end) {
const Bond *bond = (mol)[*beg];
++beg;
unsigned int nbrIdx = bond->getOtherAtomIdx(at->getIdx());
const Atom *nbr = mol.getAtomWithIdx(nbrIdx);
unsigned int degreeNbr = nbr->getDegree();
unsigned int nReps = 1;
unsigned int stereo = 0;
// FIX: Since the user can set the stereoatoms, the use of STEREOCIS and
// STEREOTRANS here isn't actually canonical. In order for that to work we
// would need to be able to canonicalize the CIS/TRANS assignment, which
// is not currently being done
switch (bond->getStereo()) {
case Bond::STEREOZ:
case Bond::STEREOCIS:
stereo = 1;
break;
case Bond::STEREOE:
case Bond::STEREOTRANS:
stereo = 2;
break;
default:
stereo = 0;
}
if (bond->getBondType() == Bond::DOUBLE && nbr->getAtomicNum() == 15 &&
(degreeNbr == 4 || degreeNbr == 3)) {
// a special case for chiral phosphorous compounds
// (this was leading to incorrect assignment of
// R/S labels ):
nReps = 1;
// general justification of this is:
// Paragraph 2.2. in the 1966 article is "Valence-Bond Conventions:
// Multiple-Bond Unsaturation and Aromaticity". It contains several
// conventions of which convention (b) is the one applying here:
// "(b) Contributions by d orbitals to bonds of quadriligant atoms are
// neglected."
// FIX: this applies to more than just P
} else {
nReps =
static_cast<unsigned int>(floor(2. * bond->getBondTypeAsDouble()));
}
unsigned int symclass =
nbr->getAtomicNum() * ATNUM_CLASS_OFFSET + nbrIdx + 1;
bondholder bh(
bondholder(Bond::SINGLE, stereo, nbrIdx, symclass, bond->getIdx()));
auto iPos = std::lower_bound(nbrs.begin(), nbrs.end(), bh);
nbrs.insert(iPos, nReps, bh);
}
std::reverse(nbrs.begin(), nbrs.end());
if (!at->needsUpdatePropertyCache()) {
for (unsigned int ii = 0; ii < at->getTotalNumHs(); ++ii) {
nbrs.emplace_back(Bond::SINGLE, Bond::STEREONONE, ATNUM_CLASS_OFFSET,
ATNUM_CLASS_OFFSET, 0);
nbrs.emplace_back(Bond::SINGLE, Bond::STEREONONE, ATNUM_CLASS_OFFSET,
ATNUM_CLASS_OFFSET, 0);
}
}
}
void basicInitCanonAtom(const ROMol &mol, Canon::canon_atom &atom,
const int &idx) {
atom.atom = mol.getAtomWithIdx(idx);
atom.index = idx;
atom.p_symbol = nullptr;
atom.degree = atom.atom->getDegree();
atom.nbrIds = std::make_unique<int[]>(atom.degree);
getNbrs(mol, atom.atom, atom.nbrIds.get());
}
void advancedInitCanonAtom(const ROMol &mol, Canon::canon_atom &atom,
const int &) {
atom.totalNumHs = atom.atom->getTotalNumHs();
atom.isRingStereoAtom =
(atom.atom->getChiralTag() == Atom::CHI_TETRAHEDRAL_CW ||
atom.atom->getChiralTag() == Atom::CHI_TETRAHEDRAL_CCW) &&
atom.atom->hasProp(common_properties::_ringStereoAtoms);
atom.hasRingNbr = hasRingNbr(mol, atom.atom);
}
} // end anonymous namespace
void initCanonAtoms(const ROMol &mol, std::vector<Canon::canon_atom> &atoms,
bool includeChirality, bool includeStereoGroups) {
for (unsigned int i = 0; i < mol.getNumAtoms(); ++i) {
basicInitCanonAtom(mol, atoms[i], i);
advancedInitCanonAtom(mol, atoms[i], i);
atoms[i].bonds.reserve(atoms[i].degree);
getBonds(mol, atoms[i].atom, atoms[i].bonds, includeChirality, atoms);
}
if (includeChirality && includeStereoGroups) {
unsigned int sgidx = 1;
for (auto &sg : mol.getStereoGroups()) {
for (auto atom : sg.getAtoms()) {
atoms[atom->getIdx()].whichStereoGroup = sgidx;
atoms[atom->getIdx()].typeOfStereoGroup = sg.getGroupType();
}
++sgidx;
}
}
}
namespace detail {
void initFragmentCanonAtoms(const ROMol &mol,
std::vector<Canon::canon_atom> &atoms,
bool includeChirality,
const std::vector<std::string> *atomSymbols,
const std::vector<std::string> *bondSymbols,
const boost::dynamic_bitset<> &atomsInPlay,
const boost::dynamic_bitset<> &bondsInPlay,
bool needsInit) {
needsInit = true;
PRECONDITION(!atomSymbols || atomSymbols->size() == mol.getNumAtoms(),
"bad atom symbols");
PRECONDITION(!bondSymbols || bondSymbols->size() == mol.getNumBonds(),
"bad bond symbols");
// start by initializing the atoms
for (const auto atom : mol.atoms()) {
auto i = atom->getIdx();
auto &atomsi = atoms[i];
atomsi.atom = atom;
atomsi.index = i;
// we don't care about overall degree, so we start that at zero, and
// then count the degree in the fragment itself below.
atomsi.degree = 0;
if (atomsInPlay[i]) {
if (atomSymbols) {
atomsi.p_symbol = &(*atomSymbols)[i];
} else {
atomsi.p_symbol = nullptr;
}
if (needsInit) {
atomsi.nbrIds = std::make_unique<int[]>(atom->getDegree());
advancedInitCanonAtom(mol, atomsi, i);
atomsi.bonds.reserve(4);
}
}
}
// now deal with the bonds in the fragment.
// these set the atomic degrees and update the neighbor lists
if (needsInit) {
for (const auto bond : mol.bonds()) {
if (!bondsInPlay[bond->getIdx()] ||
!atomsInPlay[bond->getBeginAtomIdx()] ||
!atomsInPlay[bond->getEndAtomIdx()]) {
continue;
}
Canon::canon_atom &begAt = atoms[bond->getBeginAtomIdx()];
Canon::canon_atom &endAt = atoms[bond->getEndAtomIdx()];
begAt.nbrIds[begAt.degree++] = bond->getEndAtomIdx();
endAt.nbrIds[endAt.degree++] = bond->getBeginAtomIdx();
begAt.bonds.push_back(
makeBondHolder(bond, bond->getEndAtomIdx(), includeChirality, atoms));
endAt.bonds.push_back(makeBondHolder(bond, bond->getBeginAtomIdx(),
includeChirality, atoms));
if (bondSymbols) {
begAt.bonds.back().p_symbol = &(*bondSymbols)[bond->getIdx()];
endAt.bonds.back().p_symbol = &(*bondSymbols)[bond->getIdx()];
}
}
} else {
if (bondSymbols) {
for (auto &atom : atoms) {
for (auto &bond : atom.bonds) {
bond.p_symbol = &(*bondSymbols)[bond.bondIdx];
}
}
}
}
// and now we can do the last bit for each atom
for (size_t i = 0; i < mol.getNumAtoms(); ++i) {
if (!atomsInPlay[i]) {
continue;
}
auto &atomsi = atoms[i];
if (needsInit) {
// this is the fix for github #1567: we let the atom's degree
// in the original molecule influence its degree in the fragment
atomsi.totalNumHs += (mol.getAtomWithIdx(i)->getDegree() - atomsi.degree);
}
// and sort our list of neighboring bonds
std::sort(atomsi.bonds.begin(), atomsi.bonds.end(), bondholder::greater);
}
}
void initChiralCanonAtoms(const ROMol &mol,
std::vector<Canon::canon_atom> &atoms) {
for (unsigned int i = 0; i < mol.getNumAtoms(); ++i) {
basicInitCanonAtom(mol, atoms[i], i);
getChiralBonds(mol, atoms[i].atom, atoms[i].bonds);
}
}
} // namespace detail
void updateAtomNeighborIndex(canon_atom *atoms, std::vector<bondholder> &nbrs) {
PRECONDITION(atoms, "bad pointer");
for (auto &nbr : nbrs) {
unsigned nbrIdx = nbr.nbrIdx;
unsigned newSymClass = atoms[nbrIdx].index;
nbr.nbrSymClass = newSymClass;
}
std::sort(nbrs.begin(), nbrs.end(), bondholder::greater);
}
// This routine calculates the number of swaps that would be required to
// determine what the smiles chirality value would be for a given chiral atom
// given that the atom is visited first from the atom of interest.
// This is used to determine which of two atoms has priority based on the
// neighbor's chirality
//
// If the chiral neighbor has two equivlent (at least so far) neighbors that are
// not the atom of interest, it cannot be used to determine the priority of the
// atom of interest. For this reason, we keep track of the number of neighbors
// that have the same priority so far. If any two are the same, we do NOT use
// that neighbor to determine the priority of the atom of interest.
void updateAtomNeighborNumSwaps(
canon_atom *atoms, std::vector<bondholder> &nbrs, unsigned int atomIdx,
std::vector<std::pair<unsigned int, unsigned int>> &result) {
bool isRingAtom = queryIsAtomInRing(atoms[atomIdx].atom);
for (auto &nbr : nbrs) {
unsigned nbrIdx = nbr.nbrIdx;
std::list<unsigned int> neighborsSeen;
bool tooManySimilarNbrs = false;
if (isRingAtom && atoms[nbrIdx].atom->getChiralTag() != 0) {
std::vector<int> ref, probe;
for (unsigned i = 0; i < atoms[nbrIdx].degree; ++i) {
auto nbrNbrId =
atoms[nbrIdx].nbrIds[i]; // id of the neighbor's neighbor
ref.push_back(nbrNbrId);
if ((int)atomIdx != nbrNbrId) {
if ((std::find(neighborsSeen.begin(), neighborsSeen.end(),
atoms[nbrNbrId].index) != neighborsSeen.end())) {
tooManySimilarNbrs = true;
} else {
neighborsSeen.push_back(atoms[nbrNbrId].index);
}
}
}
probe.push_back(atomIdx);
for (auto &bond : atoms[nbrIdx].bonds) {
if (bond.nbrIdx != atomIdx) {
probe.push_back(bond.nbrIdx);
}
}
if (tooManySimilarNbrs) {
result.emplace_back(nbr.nbrSymClass, 0);
} else {
int nSwaps = static_cast<int>(countSwapsToInterconvert(ref, probe));
if (atoms[nbrIdx].atom->getChiralTag() == Atom::CHI_TETRAHEDRAL_CW) {
if (nSwaps % 2) {
result.emplace_back(nbr.nbrSymClass, 2);
} else {
result.emplace_back(nbr.nbrSymClass, 1);
}
} else if (atoms[nbrIdx].atom->getChiralTag() ==
Atom::CHI_TETRAHEDRAL_CCW) {
if (nSwaps % 2) {
result.emplace_back(nbr.nbrSymClass, 1);
} else {
result.emplace_back(nbr.nbrSymClass, 2);
}
}
}
} else {
result.emplace_back(nbr.nbrSymClass, 0);
}
}
sort(result.begin(), result.end());
}
void rankMolAtoms(const ROMol &mol, std::vector<unsigned int> &res,
bool breakTies, bool includeChirality, bool includeIsotopes,
bool includeAtomMaps, bool includeChiralPresence,
bool includeStereoGroups, bool useNonStereoRanks) {
if (!mol.getNumAtoms()) {
return;
}
bool clearRings = false;
if (!mol.getRingInfo()->isFindFastOrBetter()) {
MolOps::fastFindRings(mol);
clearRings = true;
}
res.resize(mol.getNumAtoms());
std::vector<Canon::canon_atom> atoms(mol.getNumAtoms());
initCanonAtoms(mol, atoms, includeChirality, includeStereoGroups);
AtomCompareFunctor ftor(&atoms.front(), mol);
ftor.df_useIsotopes = includeIsotopes;
ftor.df_useChirality = includeChirality;
ftor.df_useChiralityRings = includeChirality;
ftor.df_useAtomMaps = includeAtomMaps;
ftor.df_useNonStereoRanks = useNonStereoRanks;
ftor.df_useChiralPresence = includeChiralPresence;
auto order = std::make_unique<int[]>(mol.getNumAtoms());
detail::rankWithFunctor(ftor, breakTies, order.get(), true, includeChirality);
for (unsigned int i = 0; i < mol.getNumAtoms(); ++i) {
res[order[i]] = atoms[order[i]].index;
}
if (clearRings) {
mol.getRingInfo()->reset();
}
} // end of rankMolAtoms()
void rankFragmentAtoms(const ROMol &mol, std::vector<unsigned int> &res,
const boost::dynamic_bitset<> &atomsInPlay,
const boost::dynamic_bitset<> &bondsInPlay,
const std::vector<std::string> *atomSymbols,
const std::vector<std::string> *bondSymbols,
bool breakTies, bool includeChirality,
bool includeIsotopes, bool includeAtomMaps,
bool includeChiralPresence) {
PRECONDITION(atomsInPlay.size() == mol.getNumAtoms(), "bad atomsInPlay size");
PRECONDITION(bondsInPlay.size() == mol.getNumBonds(), "bad bondsInPlay size");
PRECONDITION(!atomSymbols || atomSymbols->size() == mol.getNumAtoms(),
"bad atomSymbols size");
PRECONDITION(!bondSymbols || bondSymbols->size() == mol.getNumBonds(),
"bad bondSymbols size");
if (!mol.getNumAtoms()) {
return;
}
bool clearRings = false;
if (!mol.getRingInfo()->isFindFastOrBetter()) {
MolOps::fastFindRings(mol);
clearRings = true;
}
res.resize(mol.getNumAtoms());
std::vector<Canon::canon_atom> atoms(mol.getNumAtoms());
detail::initFragmentCanonAtoms(mol, atoms, includeChirality, atomSymbols,
bondSymbols, atomsInPlay, bondsInPlay, true);
AtomCompareFunctor ftor(&atoms.front(), mol, &atomsInPlay, &bondsInPlay);
ftor.df_useIsotopes = includeIsotopes;
ftor.df_useChirality = includeChirality;
ftor.df_useAtomMaps = includeAtomMaps;
ftor.df_useChiralityRings = includeChirality;
ftor.df_useChiralPresence = includeChiralPresence;
auto order = std::make_unique<int[]>(mol.getNumAtoms());
detail::rankWithFunctor(ftor, breakTies, order.get(), true, includeChirality,
&atomsInPlay, &bondsInPlay);
for (unsigned int i = 0; i < mol.getNumAtoms(); ++i) {
res[order[i]] = atoms[order[i]].index;
}
if (clearRings) {
mol.getRingInfo()->reset();
}
} // end of rankFragmentAtoms()
void chiralRankMolAtoms(const ROMol &mol, std::vector<unsigned int> &res) {
if (!mol.getNumAtoms()) {
return;
}
bool clearRings = false;
if (!mol.getRingInfo()->isFindFastOrBetter()) {
MolOps::fastFindRings(mol);
clearRings = true;
}
res.resize(mol.getNumAtoms());
std::vector<Canon::canon_atom> atoms(mol.getNumAtoms());
detail::initChiralCanonAtoms(mol, atoms);
ChiralAtomCompareFunctor ftor(&atoms.front(), mol);
auto order = std::make_unique<int[]>(mol.getNumAtoms());
detail::rankWithFunctor(ftor, false, order.get());
for (unsigned int i = 0; i < mol.getNumAtoms(); ++i) {
res[order[i]] = atoms[order[i]].index;
}
if (clearRings) {
mol.getRingInfo()->reset();
}
}
} // namespace Canon
} // namespace RDKit