mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-03 21:44:30 +08:00
* fix leak in testConformerParser * fix leaks in testMultithreadedMolSupplier * fix leak in catch_graphmol * pass build type to YAEHMOP * cleanup fragments in CoordGen minimizeOnly * fix leaking ConjElectrons stack in res mol supplier * avoid double delete * do not delete 'this'; clean ce not added to map * delete mol if Multithreaded SD readMolProps throws * fix typo * fix typo in comment
1844 lines
64 KiB
C++
1844 lines
64 KiB
C++
//
|
|
//
|
|
// Copyright (C) 2015 Paolo Tosco
|
|
//
|
|
// @@ All Rights Reserved @@
|
|
// This file is part of the RDKit.
|
|
// The contents are covered by the terms of the BSD license
|
|
// which is included in the file license.txt, found at the root
|
|
// of the RDKit source tree.
|
|
//
|
|
#include <GraphMol/RDKitBase.h>
|
|
#include <GraphMol/MolOps.h>
|
|
#include <GraphMol/Resonance.h>
|
|
#include <RDGeneral/hash/hash.hpp>
|
|
#include <RDGeneral/RDThreads.h>
|
|
#ifdef RDK_THREADSAFE_SSS
|
|
#include <thread>
|
|
#include <future>
|
|
#endif
|
|
#include <algorithm>
|
|
|
|
namespace RDKit {
|
|
// class definitions that do not need being exposed in Resonance.h
|
|
|
|
typedef std::set<std::size_t> CESet;
|
|
typedef std::unordered_map<std::size_t, unsigned int> CEDegCount;
|
|
|
|
class CEVect2 {
|
|
public:
|
|
CEVect2(const CEMap &ceMap);
|
|
ConjElectrons *getCE(unsigned int depth, unsigned int width);
|
|
size_t ceCount() { return d_ceVect.size(); }
|
|
size_t depth() { return d_degVect.size(); }
|
|
void resize(size_t size);
|
|
void idxToDepthWidth(size_t idx, unsigned int &d, unsigned int &w);
|
|
unsigned int ceCountAtDepth(unsigned int depth);
|
|
unsigned int ceCountUntilDepth(unsigned int depth);
|
|
|
|
private:
|
|
static bool resonanceStructureCompare(const ConjElectrons *a,
|
|
const ConjElectrons *b);
|
|
CEVect d_ceVect;
|
|
std::vector<unsigned int> d_degVect;
|
|
};
|
|
|
|
class CEMetrics {
|
|
friend class ConjElectrons;
|
|
|
|
public:
|
|
CEMetrics(){};
|
|
bool operator==(const CEMetrics &other) const {
|
|
return (d_hash == other.d_hash);
|
|
}
|
|
bool operator!=(const CEMetrics &other) const {
|
|
return (d_hash != other.d_hash);
|
|
}
|
|
|
|
private:
|
|
int d_absFormalCharges{0};
|
|
int d_fcSameSignDist{0};
|
|
int d_fcOppSignDist{0};
|
|
int d_nbMissing{0};
|
|
int d_wtdFormalCharges{0};
|
|
int d_sumFormalChargeIdxs{0};
|
|
int d_sumMultipleBondIdxs{0};
|
|
std::size_t d_hash{0};
|
|
};
|
|
|
|
struct CEStats {
|
|
CEStats()
|
|
: d_init(false),
|
|
d_haveNoCationsRightOfN(false),
|
|
d_haveNoChargeSeparation(false),
|
|
d_minNbMissing(0) {}
|
|
bool d_init;
|
|
bool d_haveNoCationsRightOfN;
|
|
bool d_haveNoChargeSeparation;
|
|
unsigned int d_minNbMissing;
|
|
};
|
|
|
|
class ConjElectrons {
|
|
public:
|
|
typedef enum {
|
|
HAVE_CATION_RIGHT_OF_N = (1 << 0),
|
|
HAVE_CATION = (1 << 1),
|
|
HAVE_ANION = (1 << 2)
|
|
} ConjElectronsFlags;
|
|
typedef enum { FP_BONDS = (1 << 0), FP_ATOMS = (1 << 1) } FPFlags;
|
|
ConjElectrons(ResonanceMolSupplier *parent, unsigned int groupIdx);
|
|
ConjElectrons(const ConjElectrons &ce);
|
|
~ConjElectrons();
|
|
unsigned int groupIdx() const { return d_groupIdx; };
|
|
unsigned int currElectrons() const { return d_currElectrons; };
|
|
unsigned int totalElectrons() const { return d_totalElectrons; };
|
|
void decrCurrElectrons(unsigned int d);
|
|
AtomElectrons *getAtomElectronsWithIdx(unsigned int ai);
|
|
BondElectrons *getBondElectronsWithIdx(unsigned int bi);
|
|
void pushToBeginStack(unsigned int ai);
|
|
bool popFromBeginStack(unsigned int &ai);
|
|
bool isBeginStackEmpty() { return d_beginAIStack.empty(); };
|
|
int allowedChgLeftOfN() const { return d_allowedChgLeftOfN; };
|
|
void decrAllowedChgLeftOfN(int d) { d_allowedChgLeftOfN -= d; };
|
|
int totalFormalCharge() const { return d_totalFormalCharge; };
|
|
bool hasCationRightOfN() const {
|
|
return static_cast<bool>(d_flags & HAVE_CATION_RIGHT_OF_N);
|
|
};
|
|
bool hasChargeSeparation() const {
|
|
return static_cast<bool>((d_flags & HAVE_CATION) && (d_flags & HAVE_ANION));
|
|
};
|
|
unsigned int absFormalCharges() const {
|
|
return d_ceMetrics.d_absFormalCharges;
|
|
};
|
|
unsigned int fcSameSignDist() const { return d_ceMetrics.d_fcSameSignDist; };
|
|
unsigned int fcOppSignDist() const { return d_ceMetrics.d_fcOppSignDist; };
|
|
unsigned int nbMissing() const { return d_ceMetrics.d_nbMissing; }
|
|
unsigned int sumFormalChargeIdxs() const {
|
|
return d_ceMetrics.d_sumFormalChargeIdxs;
|
|
}
|
|
unsigned int sumMultipleBondIdxs() const {
|
|
return d_ceMetrics.d_sumMultipleBondIdxs;
|
|
}
|
|
std::size_t hash() { return d_ceMetrics.d_hash; }
|
|
int wtdFormalCharges() const { return d_ceMetrics.d_wtdFormalCharges; };
|
|
void enumerateNonBonded(CEMap &ceMap, CEDegCount &ceDegCount,
|
|
CEStats &ceStats);
|
|
void initCeFromMol();
|
|
void assignNonBonded();
|
|
void assignFormalCharge();
|
|
bool assignFormalChargesAndStore(CEMap &ceMap, CEDegCount &ceDegCount,
|
|
CEStats &ceStats, unsigned int fpFlags);
|
|
void assignBondsFormalChargesToMol(ROMol &mol);
|
|
bool checkChargesAndBondOrders();
|
|
void computeMetrics();
|
|
bool checkMetrics(CEStats &ceStats, bool &ok) const;
|
|
bool purgeMaps(CEMap &ceMap, CEDegCount &ceDegCount, CEStats &ceStats) const;
|
|
void updateDegCount(CEDegCount &ceDegCount);
|
|
std::size_t computeFP(unsigned int flags);
|
|
inline bool haveFP(CESet &ceSet, unsigned int flags);
|
|
inline bool storeFP(CEMap &ceMap, unsigned int flags);
|
|
ResonanceMolSupplier *parent() const { return d_parent; };
|
|
|
|
private:
|
|
unsigned int d_groupIdx;
|
|
unsigned int d_totalElectrons;
|
|
unsigned int d_currElectrons;
|
|
unsigned int d_numFormalCharges;
|
|
int d_totalFormalCharge;
|
|
int d_allowedChgLeftOfN;
|
|
std::uint8_t d_flags;
|
|
CEMetrics d_ceMetrics;
|
|
ConjBondMap d_conjBondMap;
|
|
ConjAtomMap d_conjAtomMap;
|
|
std::stack<unsigned int> d_beginAIStack;
|
|
ResonanceMolSupplier *d_parent;
|
|
ConjElectrons &operator=(const ConjElectrons &);
|
|
unsigned int countTotalElectrons();
|
|
void computeDistFormalCharges();
|
|
void computeSumFormalChargeIdxs();
|
|
void computeSumMultipleBondIdxs();
|
|
void checkOctets();
|
|
};
|
|
|
|
class CEVect2Store {
|
|
public:
|
|
CEVect2Store(CEVect &ceVect);
|
|
unsigned int ceCount() { return d_ceCount; }
|
|
ConjElectrons *getCE(unsigned int i, unsigned int j);
|
|
|
|
private:
|
|
unsigned int d_ceCount;
|
|
CEVect2 d_ceVect2;
|
|
};
|
|
|
|
class AtomElectrons {
|
|
public:
|
|
typedef enum {
|
|
LAST_BOND = (1 << 0),
|
|
DEFINITIVE = (1 << 1),
|
|
STACKED = (1 << 2),
|
|
HAS_MULTIPLE_BOND = (1 << 3),
|
|
} AtomElectronsFlags;
|
|
typedef enum { NEED_CHARGE_BIT = 1 } AllowedBondFlag;
|
|
AtomElectrons(ConjElectrons *parent, const Atom *a);
|
|
AtomElectrons(ConjElectrons *parent, const AtomElectrons &ae);
|
|
~AtomElectrons(){};
|
|
std::uint8_t findAllowedBonds(unsigned int bi);
|
|
bool hasOctet() const { return ((d_nb + d_tv * 2) == 8); };
|
|
bool isLastBond() const { return (d_flags & LAST_BOND); };
|
|
void setLastBond() { d_flags |= LAST_BOND; };
|
|
bool isDefinitive() const { return (d_flags & DEFINITIVE); };
|
|
void setDefinitive() { d_flags |= DEFINITIVE; };
|
|
bool hasMultipleBond() const { return (d_flags & HAS_MULTIPLE_BOND); };
|
|
void setHasMultipleBond() { d_flags |= HAS_MULTIPLE_BOND; };
|
|
bool isStacked() const { return (d_flags & STACKED); };
|
|
void setStacked() { d_flags |= STACKED; };
|
|
void clearStacked() { d_flags &= ~STACKED; };
|
|
int conjGrpIdx() const {
|
|
return d_parent->parent()->getAtomConjGrpIdx(d_atom->getIdx());
|
|
};
|
|
void finalizeAtom();
|
|
unsigned int nb() const { return d_nb; };
|
|
unsigned int tv() const { return d_tv; };
|
|
unsigned int oe() const {
|
|
return PeriodicTable::getTable()->getNouterElecs(d_atom->getAtomicNum());
|
|
};
|
|
int fc() const { return d_fc; };
|
|
void tvIncr(unsigned int i) { d_tv += i; };
|
|
unsigned int neededNbForOctet() const { return (8 - (2 * d_tv + d_nb)); }
|
|
const Atom *atom() { return d_atom; }
|
|
void initTvNbFcFromAtom();
|
|
void assignNonBonded(unsigned int nb) { d_nb = nb; }
|
|
void assignFormalCharge() { d_fc = oe() - (d_nb + d_tv); }
|
|
bool isNbrCharged(unsigned int bo, unsigned int oeConstraint = 0);
|
|
AtomElectrons &operator=(const AtomElectrons &) = delete;
|
|
AtomElectrons(const AtomElectrons &) = delete;
|
|
|
|
private:
|
|
std::uint8_t d_nb;
|
|
std::uint8_t d_tv;
|
|
std::int8_t d_fc;
|
|
std::uint8_t d_flags;
|
|
const Atom *d_atom;
|
|
ConjElectrons *d_parent;
|
|
std::uint8_t canAddBondWithOrder(unsigned int bo);
|
|
void allConjBondsDefinitiveBut(unsigned int bi);
|
|
};
|
|
|
|
class BondElectrons {
|
|
public:
|
|
typedef enum { DEFINITIVE = (1 << 0) } BondElectronsFlags;
|
|
BondElectrons(ConjElectrons *parent, const Bond *b);
|
|
BondElectrons(ConjElectrons *parent, const BondElectrons &be);
|
|
~BondElectrons(){};
|
|
bool isDefinitive() const { return (d_flags & DEFINITIVE); };
|
|
void setDefinitive() { d_flags |= DEFINITIVE; };
|
|
int conjGrpIdx() const {
|
|
return d_parent->parent()->getBondConjGrpIdx(d_bond->getIdx());
|
|
};
|
|
void setOrder(unsigned int bo);
|
|
unsigned int order() const { return d_bo; };
|
|
unsigned int orderFromBond();
|
|
void initOrderFromBond() { d_bo = orderFromBond(); };
|
|
const Bond *bond() { return d_bond; };
|
|
BondElectrons &operator=(const BondElectrons &) = delete;
|
|
BondElectrons(const BondElectrons &) = delete;
|
|
|
|
private:
|
|
std::uint8_t d_bo;
|
|
std::uint8_t d_flags;
|
|
const Bond *d_bond;
|
|
ConjElectrons *d_parent;
|
|
};
|
|
|
|
namespace ResonanceUtils {
|
|
// depending on the number of atoms which won't have a complete
|
|
// octet (nCandSlots) and the number of those which need non-bonded
|
|
// electrons (nTotalSlots), the number of permutations (numComb) and
|
|
// a binary code (v) which indicates which of the atom indices in
|
|
// aiVec will be octet-unsatisfied for each permutation are computed
|
|
void getNumCombStartV(unsigned int nCandSlots, unsigned int nTotalSlots,
|
|
unsigned int &numComb, unsigned int &v) {
|
|
numComb = 1;
|
|
v = 0;
|
|
for (unsigned int i = 0; i < nCandSlots; ++i) {
|
|
numComb *= (nTotalSlots - i);
|
|
numComb /= (i + 1);
|
|
v |= (1 << i);
|
|
}
|
|
}
|
|
|
|
// get the next permutation
|
|
void updateV(unsigned int &v) {
|
|
unsigned int t = (v | (v - 1)) + 1;
|
|
v = t | ((((t & (~t + 1)) / (v & (~v + 1))) >> 1) - 1);
|
|
}
|
|
|
|
// sanitize the resonance structure which has been assembled
|
|
void sanitizeMol(RWMol &mol) {
|
|
unsigned int opFailed;
|
|
MolOps::sanitizeMol(
|
|
mol, opFailed, MolOps::SANITIZE_FINDRADICALS | MolOps::SANITIZE_ADJUSTHS);
|
|
}
|
|
|
|
// fix the number of explicit and implicit Hs in the
|
|
// resonance structure which has been assembled
|
|
void fixExplicitImplicitHs(ROMol &mol) {
|
|
mol.clearComputedProps(false);
|
|
for (ROMol::AtomIterator ai = mol.beginAtoms(); ai != mol.endAtoms(); ++ai) {
|
|
(*ai)->clearComputedProps();
|
|
(*ai)->setNumExplicitHs((*ai)->getNumImplicitHs() +
|
|
(*ai)->getNumExplicitHs());
|
|
(*ai)->updatePropertyCache();
|
|
}
|
|
}
|
|
} // end of namespace ResonanceUtils
|
|
|
|
// object constructor
|
|
AtomElectrons::AtomElectrons(ConjElectrons *parent, const Atom *a)
|
|
: d_nb(0), d_fc(0), d_flags(0), d_atom(a), d_parent(parent) {
|
|
PRECONDITION(d_atom, "d_atom cannot be NULL");
|
|
d_tv = static_cast<std::uint8_t>(a->getTotalDegree());
|
|
const ROMol &mol = d_atom->getOwningMol();
|
|
for (const auto &bNbri :
|
|
boost::make_iterator_range(mol.getAtomBonds(d_atom))) {
|
|
if (d_tv && mol[bNbri]->getBondType() >= Bond::DATIVEONE) {
|
|
--d_tv;
|
|
}
|
|
}
|
|
}
|
|
|
|
// copy constructor
|
|
AtomElectrons::AtomElectrons(ConjElectrons *parent, const AtomElectrons &ae)
|
|
: d_nb(ae.d_nb),
|
|
d_tv(ae.d_tv),
|
|
d_fc(ae.d_fc),
|
|
d_flags(ae.d_flags),
|
|
d_atom(ae.d_atom),
|
|
d_parent(parent) {}
|
|
|
|
// assign total valence, formal charge and non-bonded electrons
|
|
// from the original atom
|
|
void AtomElectrons::initTvNbFcFromAtom() {
|
|
d_tv = d_atom->getTotalValence();
|
|
d_fc = d_atom->getFormalCharge();
|
|
d_nb = oe() - d_tv - d_fc;
|
|
}
|
|
|
|
std::uint8_t AtomElectrons::findAllowedBonds(unsigned int bi) {
|
|
// AtomElectrons::findAllowedBonds returns a 6-bit result
|
|
// encoded as follows:
|
|
// +-------------------------------------+
|
|
// | BIT | MEANING |
|
|
// | 1 | can accept single bond |
|
|
// | 2 | needs charge if single bonded |
|
|
// | 3 | can accept double bond |
|
|
// | 4 | needs charge if double bonded |
|
|
// | 5 | can accept triple bond |
|
|
// | 6 | needs charge if triple bonded |
|
|
// +-------------------------------------+
|
|
|
|
allConjBondsDefinitiveBut(bi);
|
|
std::uint8_t res = 0;
|
|
for (unsigned int i = 0; i < 3; ++i) {
|
|
res |= (canAddBondWithOrder(i + 1) << (i * 2));
|
|
}
|
|
return res;
|
|
}
|
|
|
|
// returns true if any conjugated neighbor is charged (and
|
|
// has oe() == oeConstraint, if the latter is non-zero)
|
|
bool AtomElectrons::isNbrCharged(unsigned int bo, unsigned int oeConstraint) {
|
|
bool res = false;
|
|
const ROMol &mol = d_parent->parent()->mol();
|
|
ROMol::OEDGE_ITER nbrIdx, endNbrs;
|
|
boost::tie(nbrIdx, endNbrs) = mol.getAtomBonds(d_atom);
|
|
for (; !res && (nbrIdx != endNbrs); ++nbrIdx) {
|
|
const Bond *bondNbr = mol[*nbrIdx];
|
|
unsigned int biNbr = bondNbr->getIdx();
|
|
if (d_parent->parent()->getBondConjGrpIdx(biNbr) != conjGrpIdx()) {
|
|
continue;
|
|
}
|
|
BondElectrons *beNbr = d_parent->getBondElectronsWithIdx(biNbr);
|
|
if (!beNbr) {
|
|
continue;
|
|
}
|
|
const Atom *atomNbr = bondNbr->getOtherAtom(d_atom);
|
|
unsigned int aiNbr = atomNbr->getIdx();
|
|
AtomElectrons *aeNbr = d_parent->getAtomElectronsWithIdx(aiNbr);
|
|
if (!aeNbr) {
|
|
continue;
|
|
}
|
|
res = (((beNbr->isDefinitive() && !aeNbr->hasOctet()) ||
|
|
(!beNbr->isDefinitive() && aeNbr->isDefinitive() &&
|
|
(aeNbr->oe() < (5 - bo)))) &&
|
|
(!oeConstraint || (aeNbr->oe() == oeConstraint)));
|
|
}
|
|
return res;
|
|
}
|
|
|
|
// returns a 2-bit value, where the least significant bit is true
|
|
// if the atom can add a bond with order bo, and the most significant
|
|
// bit is true if the atom needs be charged
|
|
std::uint8_t AtomElectrons::canAddBondWithOrder(unsigned int bo) {
|
|
std::uint8_t canAdd = !isDefinitive();
|
|
if (canAdd) {
|
|
canAdd = (d_tv <= (5 - bo));
|
|
}
|
|
// if canAdd is true, loop over neighboring conjugated bonds
|
|
// and check their definitive flag; if all neighbors are
|
|
// definitive, we need an additional check on total valence
|
|
if (canAdd && isLastBond()) {
|
|
// we allow a formal charge up to 2 on atoms right of
|
|
// carbon, not more than 1 on atoms left of N
|
|
unsigned int rightOfC = ((oe() > 4) ? 1 : 0);
|
|
unsigned int fcInc = 0;
|
|
if (rightOfC) {
|
|
fcInc = (!isNbrCharged(bo, 4) ? 1 : 0);
|
|
} else {
|
|
// atoms left of N can be charged only if:
|
|
// - the neighbor is uncharged and either the conjugate
|
|
// group does not bear a non-zero total formal charge
|
|
// or it does and there is no other element left of N
|
|
// which has already been assigned a formal charge
|
|
// - the neighbor is charged, and triple-bonded to
|
|
// this atom, which is left of N (e.g., isonitrile)
|
|
bool isnc = isNbrCharged(bo);
|
|
fcInc = (((!isnc && !(!d_parent->allowedChgLeftOfN() &&
|
|
d_parent->totalFormalCharge())) ||
|
|
(isnc && (bo == 3) && (oe() < 5)))
|
|
? 1
|
|
: 0);
|
|
}
|
|
unsigned int e = oe() + d_tv - 1 + bo;
|
|
canAdd = ((e + fcInc + rightOfC) >= 8);
|
|
if (canAdd && (e < 8)) {
|
|
if (d_parent->totalFormalCharge() != 0 ||
|
|
(d_parent->parent()->flags() &
|
|
(ResonanceMolSupplier::UNCONSTRAINED_CATIONS |
|
|
ResonanceMolSupplier::UNCONSTRAINED_ANIONS)) ||
|
|
rightOfC) {
|
|
canAdd |= (1 << NEED_CHARGE_BIT);
|
|
} else {
|
|
canAdd = 0;
|
|
}
|
|
}
|
|
}
|
|
return canAdd;
|
|
}
|
|
|
|
// sets the LAST_BOND flag on this atom if there is only one
|
|
// non-definitive bond left
|
|
void AtomElectrons::allConjBondsDefinitiveBut(unsigned int bi) {
|
|
bool allDefinitive = true;
|
|
ROMol &mol = d_atom->getOwningMol();
|
|
ROMol::OEDGE_ITER nbrIdx, endNbrs;
|
|
boost::tie(nbrIdx, endNbrs) = mol.getAtomBonds(d_atom);
|
|
for (; allDefinitive && (nbrIdx != endNbrs); ++nbrIdx) {
|
|
unsigned int nbi = mol[*nbrIdx]->getIdx();
|
|
if ((nbi != bi) &&
|
|
(d_parent->parent()->getBondConjGrpIdx(nbi) == conjGrpIdx())) {
|
|
allDefinitive = d_parent->getBondElectronsWithIdx(nbi)->isDefinitive();
|
|
}
|
|
}
|
|
if (allDefinitive) {
|
|
setLastBond();
|
|
}
|
|
}
|
|
|
|
// called after all bonds to the atom have been marked as definitive
|
|
void AtomElectrons::finalizeAtom() {
|
|
// if the atom is left of N and needs non-bonded electrons
|
|
// to achieve the octet, it is the total formal charge
|
|
// of the conjugated group which determines if it is
|
|
// going to be a cation or an anion; once the formal
|
|
// charge is assigned to an atom left of N, the counter of allowed
|
|
// charges on atoms left of N (signed) is decremented by one
|
|
if (oe() < 5) {
|
|
unsigned int nb = neededNbForOctet();
|
|
if (nb) {
|
|
int fc = nb / 2;
|
|
if (d_parent->allowedChgLeftOfN()) {
|
|
if (d_parent->allowedChgLeftOfN() < 0) {
|
|
fc = -fc;
|
|
}
|
|
d_parent->decrAllowedChgLeftOfN(fc);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// object constructor
|
|
BondElectrons::BondElectrons(ConjElectrons *parent, const Bond *b)
|
|
: d_bo(1), d_flags(0), d_bond(b), d_parent(parent) {
|
|
PRECONDITION(d_bond, "d_bond cannot be NULL");
|
|
}
|
|
|
|
// copy constructor
|
|
BondElectrons::BondElectrons(ConjElectrons *parent, const BondElectrons &be)
|
|
: d_bo(be.d_bo), d_flags(be.d_flags), d_bond(be.d_bond), d_parent(parent) {}
|
|
|
|
// returns the bond order given the bond type
|
|
unsigned int BondElectrons::orderFromBond() {
|
|
unsigned int bo = 0;
|
|
switch (d_bond->getBondType()) {
|
|
case Bond::SINGLE:
|
|
bo = 1;
|
|
break;
|
|
case Bond::DOUBLE:
|
|
bo = 2;
|
|
break;
|
|
case Bond::TRIPLE:
|
|
bo = 3;
|
|
break;
|
|
default:
|
|
std::stringstream ss;
|
|
ss << "Bond idx " << d_bond->getIdx() << " between atoms "
|
|
<< d_bond->getBeginAtomIdx() << " and " << d_bond->getEndAtomIdx()
|
|
<< " has an invalid bond type";
|
|
throw std::runtime_error(ss.str());
|
|
}
|
|
return bo;
|
|
}
|
|
|
|
// set bond order, update total valence on the atoms involved
|
|
// in the bond and update count of current available electrons
|
|
void BondElectrons::setOrder(unsigned int bo) {
|
|
d_parent->getAtomElectronsWithIdx(d_bond->getBeginAtomIdx())->tvIncr(bo - 1);
|
|
d_parent->getAtomElectronsWithIdx(d_bond->getEndAtomIdx())->tvIncr(bo - 1);
|
|
setDefinitive();
|
|
d_parent->decrCurrElectrons(bo * 2);
|
|
d_bo = bo;
|
|
}
|
|
|
|
// object constructor
|
|
ConjElectrons::ConjElectrons(ResonanceMolSupplier *parent,
|
|
unsigned int groupIdx)
|
|
: d_groupIdx(groupIdx),
|
|
d_totalElectrons(0),
|
|
d_numFormalCharges(0),
|
|
d_totalFormalCharge(0),
|
|
d_flags(0),
|
|
d_parent(parent) {
|
|
const ROMol &mol = d_parent->mol();
|
|
unsigned int nb = mol.getNumBonds();
|
|
unsigned int na = mol.getNumAtoms();
|
|
for (unsigned int ai = 0; ai < na; ++ai) {
|
|
if (d_parent->getAtomConjGrpIdx(ai) != -1) {
|
|
d_totalFormalCharge += mol.getAtomWithIdx(ai)->getFormalCharge();
|
|
}
|
|
}
|
|
d_allowedChgLeftOfN = d_totalFormalCharge;
|
|
for (unsigned int bi = 0; bi < nb; ++bi) {
|
|
if (d_parent->getBondConjGrpIdx(bi) != static_cast<int>(groupIdx)) {
|
|
continue;
|
|
}
|
|
const Bond *bond = mol.getBondWithIdx(bi);
|
|
// store the pointers to BondElectrons objects in a map
|
|
d_conjBondMap[bi] = new BondElectrons(this, bond);
|
|
// store the pointers to AtomElectrons objects in a map
|
|
const Atom *atom[2] = {bond->getBeginAtom(), bond->getEndAtom()};
|
|
for (auto &i : atom) {
|
|
unsigned int ai = i->getIdx();
|
|
if (d_conjAtomMap.find(ai) == d_conjAtomMap.end()) {
|
|
d_conjAtomMap[ai] = new AtomElectrons(this, i);
|
|
}
|
|
}
|
|
}
|
|
// count total number of valence electrons in conjugated group
|
|
d_currElectrons = countTotalElectrons();
|
|
}
|
|
|
|
// copy constructor
|
|
ConjElectrons::ConjElectrons(const ConjElectrons &ce)
|
|
: d_groupIdx(ce.d_groupIdx),
|
|
d_totalElectrons(ce.d_totalElectrons),
|
|
d_currElectrons(ce.d_currElectrons),
|
|
d_numFormalCharges(ce.d_numFormalCharges),
|
|
d_totalFormalCharge(ce.d_totalFormalCharge),
|
|
d_allowedChgLeftOfN(ce.d_allowedChgLeftOfN),
|
|
d_flags(ce.d_flags),
|
|
d_ceMetrics(ce.d_ceMetrics),
|
|
d_beginAIStack(ce.d_beginAIStack),
|
|
d_parent(ce.d_parent) {
|
|
for (const auto &it : ce.d_conjAtomMap) {
|
|
d_conjAtomMap[it.first] = new AtomElectrons(this, *(it.second));
|
|
}
|
|
for (const auto &it : ce.d_conjBondMap) {
|
|
d_conjBondMap[it.first] = new BondElectrons(this, *(it.second));
|
|
}
|
|
}
|
|
|
|
// object destructor
|
|
ConjElectrons::~ConjElectrons() {
|
|
for (ConjAtomMap::const_iterator it = d_conjAtomMap.begin();
|
|
it != d_conjAtomMap.end(); ++it) {
|
|
delete it->second;
|
|
}
|
|
for (ConjBondMap::const_iterator it = d_conjBondMap.begin();
|
|
it != d_conjBondMap.end(); ++it) {
|
|
delete it->second;
|
|
}
|
|
}
|
|
|
|
std::size_t ConjElectrons::computeFP(unsigned int flags) {
|
|
std::uint8_t byte;
|
|
ConjFP fp;
|
|
size_t fpSize = 0;
|
|
if (flags & FP_ATOMS) {
|
|
fpSize += d_conjAtomMap.size();
|
|
}
|
|
if (flags & FP_BONDS) {
|
|
fpSize += (d_conjBondMap.size() - 1) / 4 + 1;
|
|
}
|
|
fp.reserve(fpSize);
|
|
if (flags & FP_ATOMS) {
|
|
// for each atom, we push a byte to the FP vector whose
|
|
// 4 least significant bits are total valence and the
|
|
// 4 most significant bits are non-bonded electrons
|
|
for (ConjAtomMap::const_iterator it = d_conjAtomMap.begin();
|
|
it != d_conjAtomMap.end(); ++it) {
|
|
byte = it->second->tv() | (it->second->nb() << 4);
|
|
fp.push_back(byte);
|
|
}
|
|
}
|
|
if (flags & FP_BONDS) {
|
|
unsigned int i = 0;
|
|
byte = 0;
|
|
for (ConjBondMap::const_iterator it = d_conjBondMap.begin();
|
|
it != d_conjBondMap.end(); ++it) {
|
|
// for each bond, we push 2 bits to the FP vector which
|
|
// represent the bond order; the FP vector is byte-aligned
|
|
// anyway
|
|
if (i && !(i % 4)) {
|
|
fp.push_back(byte);
|
|
byte = 0;
|
|
i = 0;
|
|
}
|
|
byte |= (static_cast<std::uint8_t>(it->second->order()) << (i * 2));
|
|
++i;
|
|
}
|
|
if (i) {
|
|
fp.push_back(byte);
|
|
}
|
|
}
|
|
// convert the FP vector to a hash
|
|
return boost::hash_range(fp.begin(), fp.end());
|
|
}
|
|
|
|
// store fingerprints for this ConjElectrons object in ceSet
|
|
// return true if the FP did not already exist in the set, false
|
|
// if they did (so the ConjElectrons object can be deleted)
|
|
bool ConjElectrons::haveFP(CESet &ceSet, unsigned int flags) {
|
|
// return true if the FP did not already exist in ceMap,
|
|
// false if it did
|
|
return ceSet.insert(computeFP(flags)).second;
|
|
}
|
|
|
|
// store fingerprints for this ConjElectrons object in ceMap
|
|
// return true if the FP did not already exist in the map, false
|
|
// if they did (so the ConjElectrons object can be deleted)
|
|
bool ConjElectrons::storeFP(CEMap &ceMap, unsigned int flags) {
|
|
// return true if the FP did not already exist in ceMap,
|
|
// false if it did
|
|
return ceMap.insert(std::make_pair(computeFP(flags), this)).second;
|
|
}
|
|
|
|
// assign bond orders and formal charges as encoded in the
|
|
// ConjElectrons object to the ROMol passed as reference
|
|
void ConjElectrons::assignBondsFormalChargesToMol(ROMol &mol) {
|
|
const Bond::BondType bondType[3] = {Bond::SINGLE, Bond::DOUBLE, Bond::TRIPLE};
|
|
for (ConjAtomMap::const_iterator it = d_conjAtomMap.begin();
|
|
it != d_conjAtomMap.end(); ++it) {
|
|
unsigned int ai = it->first;
|
|
AtomElectrons *ae = it->second;
|
|
mol.getAtomWithIdx(ai)->setFormalCharge(ae->fc());
|
|
}
|
|
for (ConjBondMap::const_iterator it = d_conjBondMap.begin();
|
|
it != d_conjBondMap.end(); ++it) {
|
|
unsigned int bi = it->first;
|
|
BondElectrons *be = it->second;
|
|
if ((be->order() < 1) || (be->order() > 3)) {
|
|
std::stringstream ss;
|
|
ss << "bond order for bond with index " << bi << " is " << be->order()
|
|
<< "; it should be between 1 and 3";
|
|
throw std::runtime_error(ss.str());
|
|
}
|
|
mol.getBondWithIdx(bi)->setBondType(bondType[be->order() - 1]);
|
|
}
|
|
}
|
|
|
|
// init atom total valences and bond orders from the
|
|
// respective atoms and bonds
|
|
void ConjElectrons::initCeFromMol() {
|
|
for (ConjAtomMap::const_iterator it = d_conjAtomMap.begin();
|
|
it != d_conjAtomMap.end(); ++it) {
|
|
it->second->initTvNbFcFromAtom();
|
|
}
|
|
for (ConjBondMap::const_iterator it = d_conjBondMap.begin();
|
|
it != d_conjBondMap.end(); ++it) {
|
|
it->second->initOrderFromBond();
|
|
}
|
|
d_currElectrons = 0;
|
|
}
|
|
|
|
// assign non-bonded electrons to atoms
|
|
void ConjElectrons::assignNonBonded() {
|
|
for (ConjAtomMap::const_iterator it = d_conjAtomMap.begin();
|
|
it != d_conjAtomMap.end(); ++it) {
|
|
AtomElectrons *ae = it->second;
|
|
unsigned int nb = std::min(ae->neededNbForOctet(), d_currElectrons);
|
|
decrCurrElectrons(nb);
|
|
ae->assignNonBonded(nb);
|
|
}
|
|
}
|
|
|
|
// assign formal charges to atoms
|
|
void ConjElectrons::assignFormalCharge() {
|
|
for (ConjAtomMap::const_iterator it = d_conjAtomMap.begin();
|
|
it != d_conjAtomMap.end(); ++it) {
|
|
it->second->assignFormalCharge();
|
|
}
|
|
}
|
|
|
|
// return true if bond orders and formal charges for this ConjElectrons
|
|
// object are OK, false if they aren't
|
|
bool ConjElectrons::checkChargesAndBondOrders() {
|
|
bool areAcceptable = true;
|
|
bool haveIncompleteOctetRightOfC = false;
|
|
bool haveNitrogenGroup = false;
|
|
bool havePosLeftOfN = false;
|
|
bool haveNegLeftOfN = false;
|
|
for (ConjAtomMap::const_iterator it = d_conjAtomMap.begin();
|
|
areAcceptable && (it != d_conjAtomMap.end()); ++it) {
|
|
AtomElectrons *ae = it->second;
|
|
// formal charges should be between -2 and +1
|
|
areAcceptable = ((ae->fc() < 2) && (ae->fc() > -3));
|
|
if (areAcceptable) {
|
|
if (ae->fc() > 0) {
|
|
d_flags |= HAVE_CATION;
|
|
} else if (ae->fc() < 0) {
|
|
d_flags |= HAVE_ANION;
|
|
}
|
|
if (ae->oe() > 4) {
|
|
if (ae->oe() == 5) {
|
|
haveNitrogenGroup = true;
|
|
}
|
|
if (!ae->hasOctet()) {
|
|
haveIncompleteOctetRightOfC = true;
|
|
}
|
|
if ((ae->fc() > 0) && (ae->oe() > 5)) {
|
|
d_flags |= HAVE_CATION_RIGHT_OF_N;
|
|
}
|
|
} else {
|
|
if (ae->fc() < 0) {
|
|
haveNegLeftOfN = true;
|
|
} else if (ae->fc() > 0) {
|
|
havePosLeftOfN = true;
|
|
}
|
|
}
|
|
// no carbanions should coexist with incomplete octets
|
|
areAcceptable = !(haveIncompleteOctetRightOfC && haveNegLeftOfN);
|
|
}
|
|
}
|
|
if (areAcceptable && havePosLeftOfN &&
|
|
!(d_parent->flags() & ResonanceMolSupplier::UNCONSTRAINED_CATIONS)) {
|
|
// if the UNCONSTRAINED_CATIONS flag is not set, positively charged
|
|
// atoms left of N with an incomplete octet are acceptable
|
|
// only if the conjugated group has a positive total formal charge,
|
|
// no atoms bearing a negative charge and no nitrogens
|
|
areAcceptable = (d_totalFormalCharge > 0 && !(d_flags & HAVE_ANION) &&
|
|
!haveNitrogenGroup);
|
|
}
|
|
if (areAcceptable && haveNegLeftOfN &&
|
|
!(d_parent->flags() & ResonanceMolSupplier::UNCONSTRAINED_ANIONS)) {
|
|
// if the UNCONSTRAINED_ANIONS flag is not set, negatively charged
|
|
// atoms left of N are acceptable only if the conjugated group has
|
|
// a negative total formal charge and no atoms bearing a positive
|
|
// charge
|
|
areAcceptable = (d_totalFormalCharge < 0 && !(d_flags & HAVE_CATION));
|
|
}
|
|
// if the ALLOW_INCOMPLETE_OCTETS flag is not set, incomplete octets
|
|
// right of C are only allowed in the absence of anions
|
|
if (areAcceptable &&
|
|
!(d_parent->flags() & ResonanceMolSupplier::ALLOW_INCOMPLETE_OCTETS)) {
|
|
areAcceptable = !(haveIncompleteOctetRightOfC && (d_flags & HAVE_ANION));
|
|
}
|
|
for (ConjBondMap::const_iterator it = d_conjBondMap.begin();
|
|
areAcceptable && (it != d_conjBondMap.end()); ++it) {
|
|
BondElectrons *be = it->second;
|
|
AtomElectrons *ae[2] = {d_conjAtomMap[be->bond()->getBeginAtomIdx()],
|
|
d_conjAtomMap[be->bond()->getEndAtomIdx()]};
|
|
for (unsigned int i = 0; areAcceptable && (i < 2); ++i) {
|
|
// cumulated bonds are disallowed in aromatic rings
|
|
if (be->order() > 1 && ae[i]->atom()->getIsAromatic()) {
|
|
if (ae[i]->hasMultipleBond()) {
|
|
areAcceptable = false;
|
|
} else {
|
|
ae[i]->setHasMultipleBond();
|
|
}
|
|
}
|
|
if (areAcceptable && ae[i]->oe() < 5) {
|
|
// no carbocations allowed on carbons bearing double
|
|
// or triple bonds, carbanions allowed on all carbons
|
|
// no double charged allowed left of N
|
|
areAcceptable =
|
|
((be->order() == 1) || ((be->order() > 1) && (ae[i]->fc() < 1)));
|
|
}
|
|
}
|
|
if (areAcceptable) {
|
|
// charged neighboring atoms left of N are not acceptable
|
|
areAcceptable = !((ae[0]->oe() < 5) && ae[0]->fc() && (ae[1]->oe() < 5) &&
|
|
ae[1]->fc());
|
|
}
|
|
}
|
|
return areAcceptable;
|
|
}
|
|
|
|
bool ConjElectrons::checkMetrics(CEStats &ceStats, bool &changed) const {
|
|
bool ok = true;
|
|
changed = false;
|
|
if (!ceStats.d_init || (nbMissing() < ceStats.d_minNbMissing)) {
|
|
changed = ceStats.d_init;
|
|
ceStats.d_init = true;
|
|
ceStats.d_minNbMissing = nbMissing();
|
|
}
|
|
if ((d_parent->flags() & ResonanceMolSupplier::ALLOW_INCOMPLETE_OCTETS) ||
|
|
(nbMissing() <= ceStats.d_minNbMissing)) {
|
|
if (!hasCationRightOfN() && !ceStats.d_haveNoCationsRightOfN) {
|
|
ceStats.d_haveNoCationsRightOfN = true;
|
|
changed = true;
|
|
}
|
|
if (!hasChargeSeparation() && !ceStats.d_haveNoChargeSeparation) {
|
|
ceStats.d_haveNoChargeSeparation = true;
|
|
changed = true;
|
|
}
|
|
}
|
|
// if the flag ALLOW_INCOMPLETE_OCTETS is not set, ConjElectrons
|
|
// objects having less electrons than the most electron-complete
|
|
// structure (which most often will have all complete octets) are
|
|
// discarded
|
|
if ((!(d_parent->flags() & ResonanceMolSupplier::ALLOW_INCOMPLETE_OCTETS) &&
|
|
(nbMissing() > ceStats.d_minNbMissing)) ||
|
|
(!(d_parent->flags() & ResonanceMolSupplier::UNCONSTRAINED_CATIONS) &&
|
|
hasCationRightOfN() && ceStats.d_haveNoCationsRightOfN) ||
|
|
(!(d_parent->flags() & ResonanceMolSupplier::ALLOW_CHARGE_SEPARATION) &&
|
|
hasChargeSeparation() && ceStats.d_haveNoChargeSeparation)) {
|
|
ok = false;
|
|
}
|
|
return ok;
|
|
}
|
|
|
|
bool ConjElectrons::purgeMaps(CEMap &ceMap, CEDegCount &ceDegCount,
|
|
CEStats &ceStats) const {
|
|
bool ok = true;
|
|
bool changed = true;
|
|
while (changed) {
|
|
for (auto it = ceMap.begin(); it != ceMap.end();) {
|
|
if (!it->second->checkMetrics(ceStats, changed)) {
|
|
auto it2 = ceDegCount.find(it->second->hash());
|
|
if (it2 != ceDegCount.end()) {
|
|
--it2->second;
|
|
if (!it2->second) {
|
|
ceDegCount.erase(it2);
|
|
}
|
|
}
|
|
if (it->second == this) {
|
|
// postpone self deletion
|
|
ok = false;
|
|
} else {
|
|
delete it->second;
|
|
}
|
|
it = ceMap.erase(it);
|
|
changed = true;
|
|
} else {
|
|
++it;
|
|
}
|
|
if (changed) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
return ok;
|
|
}
|
|
|
|
// assign formal charges and, if they are acceptable, store
|
|
// return true if FPs did not already exist in ceMap, false if they did
|
|
bool ConjElectrons::assignFormalChargesAndStore(CEMap &ceMap,
|
|
CEDegCount &ceDegCount,
|
|
CEStats &ceStats,
|
|
unsigned int fpFlags) {
|
|
assignFormalCharge();
|
|
bool ok = checkChargesAndBondOrders();
|
|
bool changed = false;
|
|
if (ok) {
|
|
computeMetrics();
|
|
ok = checkMetrics(ceStats, changed);
|
|
}
|
|
if (ok) {
|
|
ok = storeFP(ceMap, fpFlags);
|
|
}
|
|
if (changed) {
|
|
ok = purgeMaps(ceMap, ceDegCount, ceStats) && ok;
|
|
}
|
|
if (ok) {
|
|
updateDegCount(ceDegCount);
|
|
}
|
|
return ok;
|
|
}
|
|
|
|
// enumerate all possible permutations of non-bonded electrons
|
|
void ConjElectrons::enumerateNonBonded(CEMap &ceMap, CEDegCount &ceDegCount,
|
|
CEStats &ceStats) {
|
|
// the way we compute FPs for a resonance structure depends
|
|
// on whether we want to enumerate all Kekule structures
|
|
// or not; in the first case, we also include bond orders
|
|
// in the FP computation in addition to atom valences
|
|
const unsigned int fpFlags =
|
|
FP_ATOMS |
|
|
((d_parent->flags() & ResonanceMolSupplier::KEKULE_ALL) ? FP_BONDS : 0);
|
|
// count how many atoms need non-bonded electrons to complete
|
|
// their octet ant store their indices in aiVec
|
|
std::vector<unsigned int> aiVec;
|
|
unsigned int nbTotal = 0;
|
|
for (ConjAtomMap::const_iterator it = d_conjAtomMap.begin();
|
|
it != d_conjAtomMap.end(); ++it) {
|
|
unsigned int nb = it->second->neededNbForOctet();
|
|
if (nb) {
|
|
nbTotal += nb;
|
|
aiVec.push_back(it->first);
|
|
}
|
|
}
|
|
if (nbTotal > currElectrons()) {
|
|
// if the electrons required to satisfy all octets
|
|
// are more than those currently available, some atoms will
|
|
// be satisfied and some won't: we enumerate all permutations
|
|
// of the unsatisfied atoms
|
|
unsigned int missingElectrons = nbTotal - currElectrons();
|
|
// number of atoms which won't have a complete octet
|
|
unsigned int numCand = (missingElectrons - 1) / 2 + 1;
|
|
unsigned int numComb;
|
|
unsigned int v;
|
|
// depending on the number of atoms which won't have a complete
|
|
// octet and the number of those which need non-bonded electrons
|
|
// we compute the number of permutations (numComb) and a
|
|
// binary code (v) which indicates which of the atom indices in
|
|
// aiVec will be octet-unsatisfied for each permutation
|
|
ResonanceUtils::getNumCombStartV(numCand, aiVec.size(), numComb, v);
|
|
// if there are multiple permutations, make a copy of the original
|
|
// ConjElectrons object, since the latter will be modified
|
|
ConjElectrons *ceCopy = new ConjElectrons(*this);
|
|
// enumerate all permutations
|
|
for (unsigned int c = 0; c < numComb; ++c) {
|
|
ConjElectrons *ce = new ConjElectrons(*ceCopy);
|
|
unsigned int vc = v;
|
|
for (unsigned int i : aiVec) {
|
|
AtomElectrons *ae = ce->getAtomElectronsWithIdx(i);
|
|
unsigned int e = ae->neededNbForOctet();
|
|
// if this atom was chosen to be octet-unsatisfied in
|
|
// this permutation, give it one electron pair less than
|
|
// its need (which most likely means no electrons at all)
|
|
if (vc & 1) {
|
|
e -= 2;
|
|
}
|
|
ce->decrCurrElectrons(e);
|
|
ae->assignNonBonded(e);
|
|
vc >>= 1;
|
|
}
|
|
// delete this candidate if it fails the formal charge check
|
|
if (!ce->assignFormalChargesAndStore(ceMap, ceDegCount, ceStats,
|
|
fpFlags)) {
|
|
delete ce;
|
|
}
|
|
|
|
// get the next binary code
|
|
ResonanceUtils::updateV(v);
|
|
}
|
|
delete ceCopy;
|
|
} else if (nbTotal == currElectrons()) {
|
|
ConjElectrons *ce = new ConjElectrons(*this);
|
|
// if the electrons required to satisfy all octets
|
|
// are as many as those currently available, assignment
|
|
// is univocal
|
|
ce->assignNonBonded();
|
|
// delete this candidate if it fails the formal charge check
|
|
if (!ce->assignFormalChargesAndStore(ceMap, ceDegCount, ceStats, fpFlags)) {
|
|
delete ce;
|
|
}
|
|
}
|
|
// if the electrons required to satisfy all octets are less
|
|
// than those currently available, we must have failed the bond
|
|
// assignment
|
|
}
|
|
|
|
void ConjElectrons::computeMetrics() {
|
|
// 1000 * Electronegativity according to the Allen scale
|
|
// (Allen, L.C. J. Am. Chem. Soc. 1989, 111, 9003-9014)
|
|
static const std::vector<unsigned int> en{
|
|
1000, 2300, 4160, 912, 1576, 2051, 2544, 3066, 3610, 4193, 4789, 869,
|
|
1293, 1613, 1916, 2253, 2589, 2869, 3242, 734, 1034, 1190, 1380, 1530,
|
|
1650, 1750, 1800, 1840, 1880, 1850, 1590, 1756, 1994, 2211, 2434, 2685,
|
|
2966, 706, 963, 1120, 1320, 1410, 1470, 1510, 1540, 1560, 1590, 1870,
|
|
1520, 1656, 1824, 1984, 2158, 2359, 2582, 659, 881, 1000, 1000, 1000,
|
|
1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1090,
|
|
1160, 1340, 1470, 1600, 1650, 1680, 1720, 1920, 1760, 1789, 1854, 2010,
|
|
2190, 2390, 2600, 670, 890};
|
|
for (ConjAtomMap::const_iterator it = d_conjAtomMap.begin();
|
|
it != d_conjAtomMap.end(); ++it) {
|
|
d_ceMetrics.d_absFormalCharges += abs(it->second->fc());
|
|
size_t anIdx = it->second->atom()->getAtomicNum();
|
|
d_ceMetrics.d_wtdFormalCharges +=
|
|
(it->second->fc() * ((anIdx >= en.size()) ? 1000 : en[anIdx]));
|
|
d_ceMetrics.d_nbMissing += it->second->neededNbForOctet();
|
|
}
|
|
computeDistFormalCharges();
|
|
computeSumFormalChargeIdxs();
|
|
computeSumMultipleBondIdxs();
|
|
}
|
|
|
|
void ConjElectrons::updateDegCount(CEDegCount &ceDegCount) {
|
|
static const size_t ceMetricsArrayLen = 5;
|
|
const auto metricsIt = reinterpret_cast<const int *>(&d_ceMetrics);
|
|
d_ceMetrics.d_hash =
|
|
boost::hash_range(metricsIt, metricsIt + ceMetricsArrayLen);
|
|
++ceDegCount[d_ceMetrics.d_hash];
|
|
}
|
|
|
|
// compute sum of shortest path distances between all pairs of
|
|
// formal charges of the same sign and of opposite signs
|
|
void ConjElectrons::computeDistFormalCharges() {
|
|
unsigned int n = d_parent->mol().getNumAtoms();
|
|
for (ConjAtomMap::const_iterator it1 = d_conjAtomMap.begin();
|
|
it1 != d_conjAtomMap.end(); ++it1) {
|
|
if (!it1->second->fc()) {
|
|
continue;
|
|
}
|
|
size_t y = it1->first * n;
|
|
for (auto it2 = it1; it2 != d_conjAtomMap.end(); ++it2) {
|
|
if ((it1 == it2) || !it2->second->fc()) {
|
|
continue;
|
|
}
|
|
unsigned int dist = static_cast<unsigned int>(
|
|
MolOps::getDistanceMat(d_parent->mol())[y + it2->first] + 0.1);
|
|
if ((it1->second->fc() * it2->second->fc()) > 0) {
|
|
d_ceMetrics.d_fcSameSignDist += dist;
|
|
} else {
|
|
d_ceMetrics.d_fcOppSignDist += dist;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// compute the sum of indices of atoms bearing a formal charge
|
|
void ConjElectrons::computeSumFormalChargeIdxs() {
|
|
for (ConjAtomMap::const_iterator it = d_conjAtomMap.begin();
|
|
it != d_conjAtomMap.end(); ++it) {
|
|
if (it->second->fc()) {
|
|
d_ceMetrics.d_sumFormalChargeIdxs += it->first;
|
|
}
|
|
}
|
|
}
|
|
|
|
// compute the sum of indices of multiple bonds
|
|
void ConjElectrons::computeSumMultipleBondIdxs() {
|
|
for (ConjBondMap::const_iterator it = d_conjBondMap.begin();
|
|
it != d_conjBondMap.end(); ++it) {
|
|
if (it->second->order() > 1) {
|
|
d_ceMetrics.d_sumMultipleBondIdxs += it->first;
|
|
}
|
|
}
|
|
}
|
|
|
|
// decrement the count of current electrons by d
|
|
void ConjElectrons::decrCurrElectrons(unsigned int d) {
|
|
if (d_currElectrons < d) {
|
|
std::stringstream ss;
|
|
ss << "d_currElectrons = " << d_currElectrons << ", d = " << d;
|
|
throw std::runtime_error(ss.str());
|
|
}
|
|
d_currElectrons -= d;
|
|
}
|
|
|
|
// push atom index ai to the begin stack if it is not already there
|
|
void ConjElectrons::pushToBeginStack(unsigned int ai) {
|
|
if (!d_conjAtomMap[ai]->isStacked()) {
|
|
d_conjAtomMap[ai]->setStacked();
|
|
d_beginAIStack.push(ai);
|
|
}
|
|
}
|
|
|
|
// pop an atom from the begin stack and put its index in ai
|
|
bool ConjElectrons::popFromBeginStack(unsigned int &ai) {
|
|
bool ok = false;
|
|
while (!d_beginAIStack.empty() && !ok) {
|
|
ai = d_beginAIStack.top();
|
|
d_beginAIStack.pop();
|
|
AtomElectrons *ae = d_conjAtomMap[ai];
|
|
ae->clearStacked();
|
|
ok = (!ae->isDefinitive());
|
|
}
|
|
return ok;
|
|
}
|
|
|
|
// function used to sort ConjElectrons objects based on their
|
|
// importance to describe the structure; criteria in order of decreasing
|
|
// priority follow:
|
|
// 1) Number of unsatisfied octets
|
|
// 2) Number of formal charges
|
|
// 3) Number of formal charges weighted by atom electronegativity
|
|
// 4) Distance between formal charges with the same sign
|
|
// 5) Distance between formal charges with opposite signs
|
|
// 6) Sum of the indices of atoms bearing a formal charge
|
|
// 7) Sum of the indices of multiple bonds
|
|
bool CEVect2::resonanceStructureCompare(const ConjElectrons *a,
|
|
const ConjElectrons *b) {
|
|
return (
|
|
(a->nbMissing() != b->nbMissing())
|
|
? (a->nbMissing() < b->nbMissing())
|
|
: (a->absFormalCharges() != b->absFormalCharges())
|
|
? (a->absFormalCharges() < b->absFormalCharges())
|
|
: (a->wtdFormalCharges() != b->wtdFormalCharges())
|
|
? (a->wtdFormalCharges() < b->wtdFormalCharges())
|
|
: (a->fcSameSignDist() != b->fcSameSignDist())
|
|
? (a->fcSameSignDist() > b->fcSameSignDist())
|
|
: (a->fcOppSignDist() != b->fcOppSignDist())
|
|
? (a->fcOppSignDist() > b->fcOppSignDist())
|
|
: (a->sumFormalChargeIdxs() !=
|
|
b->sumFormalChargeIdxs())
|
|
? (a->sumFormalChargeIdxs() <
|
|
b->sumFormalChargeIdxs())
|
|
: (a->sumMultipleBondIdxs() <
|
|
b->sumMultipleBondIdxs()));
|
|
}
|
|
|
|
CEVect2::CEVect2(const CEMap &ceMap) {
|
|
d_ceVect.reserve(ceMap.size());
|
|
for (CEMap::const_iterator it = ceMap.begin(); it != ceMap.end(); ++it) {
|
|
d_ceVect.push_back(it->second);
|
|
}
|
|
std::sort(d_ceVect.begin(), d_ceVect.end(), resonanceStructureCompare);
|
|
bool first = true;
|
|
std::size_t hashPrev;
|
|
for (CEVect::const_iterator it = d_ceVect.begin(); it != d_ceVect.end();
|
|
++it) {
|
|
if (first || ((*it)->hash() != hashPrev)) {
|
|
first = false;
|
|
hashPrev = (*it)->hash();
|
|
d_degVect.push_back(1);
|
|
} else {
|
|
++d_degVect.back();
|
|
}
|
|
}
|
|
}
|
|
|
|
ConjElectrons *CEVect2::getCE(unsigned int depth, unsigned int width) {
|
|
if (depth >= d_degVect.size()) {
|
|
std::stringstream ss;
|
|
ss << "depth = " << depth << ", d_degVect.size() = " << d_degVect.size();
|
|
throw std::runtime_error(ss.str());
|
|
}
|
|
if (width >= d_degVect[depth]) {
|
|
std::stringstream ss;
|
|
ss << "width = " << width << ", d_degVect[" << depth
|
|
<< "] = " << d_degVect[depth];
|
|
throw std::runtime_error(ss.str());
|
|
}
|
|
size_t i =
|
|
std::accumulate(d_degVect.begin(), d_degVect.begin() + depth, 0U) + width;
|
|
return d_ceVect[i];
|
|
}
|
|
|
|
void CEVect2::resize(size_t size) {
|
|
d_ceVect.resize(size ? ceCountUntilDepth(size - 1) : 0);
|
|
d_degVect.resize(size);
|
|
}
|
|
|
|
unsigned int CEVect2::ceCountAtDepth(unsigned int depth) {
|
|
if (depth >= d_degVect.size()) {
|
|
std::stringstream ss;
|
|
ss << "depth = " << depth << ", d_degVect.size() = " << d_degVect.size();
|
|
throw std::runtime_error(ss.str());
|
|
}
|
|
return d_degVect[depth];
|
|
}
|
|
|
|
unsigned int CEVect2::ceCountUntilDepth(unsigned int depth) {
|
|
if (depth >= d_degVect.size()) {
|
|
std::stringstream ss;
|
|
ss << "depth = " << depth << ", d_degVect.size() = " << d_degVect.size();
|
|
throw std::runtime_error(ss.str());
|
|
}
|
|
return std::accumulate(d_degVect.begin(), d_degVect.begin() + depth + 1, 0U);
|
|
}
|
|
|
|
void CEVect2::idxToDepthWidth(size_t idx, unsigned int &d, unsigned int &w) {
|
|
if (idx >= d_ceVect.size()) {
|
|
std::stringstream ss;
|
|
ss << "idx = " << idx << ", d_ceVect.size() = " << d_ceVect.size();
|
|
throw std::runtime_error(ss.str());
|
|
}
|
|
d = 0;
|
|
while (idx >= d_degVect[d]) {
|
|
idx -= d_degVect[d];
|
|
++d;
|
|
}
|
|
w = idx;
|
|
}
|
|
|
|
// get the pointer to the BondElectrons object for bond having index bi
|
|
BondElectrons *ConjElectrons::getBondElectronsWithIdx(unsigned int bi) {
|
|
auto it = d_conjBondMap.find(bi);
|
|
return (it != d_conjBondMap.end() ? it->second : nullptr);
|
|
}
|
|
|
|
// get the pointer to the AtomElectrons object for atom having index ai
|
|
AtomElectrons *ConjElectrons::getAtomElectronsWithIdx(unsigned int ai) {
|
|
auto it = d_conjAtomMap.find(ai);
|
|
return (it != d_conjAtomMap.end() ? it->second : nullptr);
|
|
}
|
|
|
|
// count number of total electrons
|
|
unsigned int ConjElectrons::countTotalElectrons() {
|
|
// count total number of valence electrons in conjugated group
|
|
for (ConjBondMap::const_iterator it = d_conjBondMap.begin();
|
|
it != d_conjBondMap.end(); ++it) {
|
|
d_totalElectrons += (2 * it->second->orderFromBond());
|
|
}
|
|
for (ConjAtomMap::const_iterator it = d_conjAtomMap.begin();
|
|
it != d_conjAtomMap.end(); ++it) {
|
|
const Atom *a = it->second->atom();
|
|
d_totalElectrons +=
|
|
it->second->oe() - a->getTotalValence() - a->getFormalCharge();
|
|
}
|
|
return d_totalElectrons;
|
|
}
|
|
|
|
size_t ResonanceMolSupplierCallback::getNumStructures(
|
|
unsigned int conjGrpIdx) const {
|
|
PRECONDITION(conjGrpIdx < d_progress.size(), "conjGrpIdx out of bounds");
|
|
return d_progress.at(conjGrpIdx).d_totalStructs;
|
|
}
|
|
|
|
size_t ResonanceMolSupplierCallback::getNumDiverseStructures(
|
|
unsigned int conjGrpIdx) const {
|
|
PRECONDITION(conjGrpIdx < d_progress.size(), "conjGrpIdx out of bounds");
|
|
return d_progress.at(conjGrpIdx).d_diverseStructs;
|
|
}
|
|
|
|
void ResonanceMolSupplier::setProgressCallback(
|
|
ResonanceMolSupplierCallback *callback) {
|
|
d_callback.reset(callback);
|
|
}
|
|
|
|
// if the total number of resonance structures exceeds d_maxStructs,
|
|
// trim the number of ConjElectrons objects for each conjugated group
|
|
// to exclude the less likely structures and save memory
|
|
// we are going to generate the complete resonance structures
|
|
// combining the most likely ConjElectrons objects in a breadth-first
|
|
// fashion
|
|
void ResonanceMolSupplier::trimCeVect2() {
|
|
if (d_length == d_maxStructs) {
|
|
std::vector<unsigned int> s(d_nConjGrp, 0);
|
|
std::vector<unsigned int> t(d_nConjGrp, 0);
|
|
unsigned int currSize = 0;
|
|
while (currSize < d_length) {
|
|
currSize = 1;
|
|
for (unsigned int conjGrpIdx = 0;
|
|
(currSize < d_length) && (conjGrpIdx < d_nConjGrp); ++conjGrpIdx) {
|
|
if (s[conjGrpIdx] < d_ceVect3[conjGrpIdx]->depth()) {
|
|
t[conjGrpIdx] += d_ceVect3[conjGrpIdx]->ceCountAtDepth(s[conjGrpIdx]);
|
|
++s[conjGrpIdx];
|
|
}
|
|
currSize *= t[conjGrpIdx];
|
|
}
|
|
}
|
|
for (unsigned int conjGrpIdx = 0; conjGrpIdx < d_nConjGrp; ++conjGrpIdx) {
|
|
for (unsigned int d = s[conjGrpIdx]; d < d_ceVect3[conjGrpIdx]->depth();
|
|
++d) {
|
|
for (unsigned int w = 0; w < d_ceVect3[conjGrpIdx]->ceCountAtDepth(d);
|
|
++w) {
|
|
delete d_ceVect3[conjGrpIdx]->getCE(d, w);
|
|
}
|
|
}
|
|
d_ceVect3[conjGrpIdx]->resize(s[conjGrpIdx]);
|
|
}
|
|
}
|
|
}
|
|
|
|
// get the ConjElectrons indices to be combined given
|
|
// the complete resonance structure index
|
|
void ResonanceMolSupplier::idxToCEPerm(size_t idx,
|
|
std::vector<unsigned int> &c) const {
|
|
// the c vector holds a pair of values for each ConjGrp,
|
|
// namely depth and width where the ConjElectrons
|
|
// object lies in the CEVect2 vector
|
|
c.resize(d_nConjGrp * 2);
|
|
unsigned int g = d_nConjGrp;
|
|
while (g) {
|
|
--g;
|
|
unsigned int gt2 = g * 2;
|
|
unsigned int d = 1;
|
|
for (unsigned int j = 0; j < g; ++j) {
|
|
d *= d_ceVect3[j]->ceCount();
|
|
}
|
|
if (d_ceVect3[g]->ceCount()) {
|
|
d_ceVect3[g]->idxToDepthWidth(idx / d, c[gt2], c[gt2 + 1]);
|
|
idx %= d;
|
|
}
|
|
}
|
|
}
|
|
|
|
// sort the vectors of ConjElectrons indices in such a way that we
|
|
// generate resonance structures out of the most likely ConjElectrons
|
|
// objects in a breadth-first fashion
|
|
bool ResonanceMolSupplier::cePermCompare(const CEPerm *a, const CEPerm *b) {
|
|
unsigned int aSum = 0;
|
|
unsigned int bSum = 0;
|
|
for (size_t i = 0; i < a->v.size(); i += 2) {
|
|
aSum += a->v[i];
|
|
bSum += b->v[i];
|
|
}
|
|
if (aSum != bSum) {
|
|
return (aSum < bSum);
|
|
}
|
|
unsigned int aMax = 0;
|
|
unsigned int bMax = 0;
|
|
for (size_t i = 0; i < a->v.size(); i += 2) {
|
|
if (!i || (a->v[i] > aMax)) {
|
|
aMax = a->v[i];
|
|
}
|
|
if (!i || (b->v[i] > bMax)) {
|
|
bMax = b->v[i];
|
|
}
|
|
}
|
|
if (aMax != bMax) {
|
|
return (aMax < bMax);
|
|
}
|
|
for (size_t i = 0; i < a->v.size(); i += 2) {
|
|
if (a->v[i] != b->v[i]) {
|
|
return (a->v[i] < b->v[i]);
|
|
}
|
|
}
|
|
// if the other criteria didn't discriminate,
|
|
// sort based on degenerate resonance structures
|
|
for (size_t i = 1; i < a->v.size(); i += 2) {
|
|
if (a->v[i] != b->v[i]) {
|
|
return (a->v[i] < b->v[i]);
|
|
}
|
|
}
|
|
// we'll never get here, this it is just to silence a warning
|
|
return false;
|
|
}
|
|
|
|
// enumerate upfront all the d_length indices for complete resonance
|
|
// structures and sort them to privilege the most likely
|
|
// ConjElectrons index combinations in a breadth-first fashion
|
|
void ResonanceMolSupplier::prepEnumIdxVect() {
|
|
d_enumIdx.resize(d_length);
|
|
std::vector<CEPerm *> cePermVect(d_length);
|
|
for (size_t i = 0; i < d_length; ++i) {
|
|
cePermVect[i] = new CEPerm;
|
|
cePermVect[i]->idx = i;
|
|
idxToCEPerm(i, cePermVect[i]->v);
|
|
}
|
|
std::sort(cePermVect.begin(), cePermVect.end(), cePermCompare);
|
|
for (unsigned int i = 0; i < d_length; ++i) {
|
|
d_enumIdx[i] = cePermVect[i]->idx;
|
|
delete cePermVect[i];
|
|
}
|
|
}
|
|
|
|
// object constructor
|
|
ResonanceMolSupplier::ResonanceMolSupplier(ROMol &mol, unsigned int flags,
|
|
unsigned int maxStructs)
|
|
: d_nConjGrp(0),
|
|
d_flags(flags),
|
|
d_idx(0),
|
|
d_numThreads(1),
|
|
d_isEnumerated(false),
|
|
d_wasCanceled(false) {
|
|
const unsigned int MAX_STRUCTS = 1000000;
|
|
if (d_flags & UNCONSTRAINED_CATIONS) {
|
|
d_flags |= (ALLOW_CHARGE_SEPARATION | ALLOW_INCOMPLETE_OCTETS);
|
|
}
|
|
if (d_flags & UNCONSTRAINED_ANIONS) {
|
|
d_flags |= ALLOW_CHARGE_SEPARATION;
|
|
}
|
|
d_maxStructs = std::min(maxStructs, MAX_STRUCTS);
|
|
d_length = std::min(1U, d_maxStructs);
|
|
d_mol = new ROMol(mol);
|
|
// call here to avoid a race condition in threads
|
|
MolOps::getDistanceMat(*d_mol);
|
|
MolOps::Kekulize((RWMol &)*d_mol, false);
|
|
// identify conjugate substructures
|
|
assignConjGrpIdx();
|
|
}
|
|
|
|
// object destructor
|
|
ResonanceMolSupplier::~ResonanceMolSupplier() {
|
|
for (CEVect3::const_iterator ceVect3It = d_ceVect3.begin();
|
|
ceVect3It != d_ceVect3.end(); ++ceVect3It) {
|
|
if (!(*ceVect3It)) {
|
|
continue;
|
|
}
|
|
for (unsigned int d = 0; d < (*ceVect3It)->depth(); ++d) {
|
|
for (unsigned int w = 0; w < (*ceVect3It)->ceCountAtDepth(d); ++w) {
|
|
delete (*ceVect3It)->getCE(d, w);
|
|
}
|
|
}
|
|
delete *ceVect3It;
|
|
}
|
|
if (d_mol) {
|
|
delete d_mol;
|
|
}
|
|
}
|
|
|
|
void ResonanceMolSupplier::setNumThreads(int numThreads) {
|
|
d_numThreads = std::min(d_nConjGrp, getNumThreadsToUse(numThreads));
|
|
}
|
|
|
|
void ResonanceMolSupplier::enumerate() {
|
|
if (d_isEnumerated) {
|
|
return;
|
|
}
|
|
if (d_callback.get()) {
|
|
d_callback->d_progress.resize(d_nConjGrp);
|
|
}
|
|
resizeCeVect();
|
|
if (d_numThreads == 1) {
|
|
mainLoop(0, 1);
|
|
}
|
|
#ifdef RDK_THREADSAFE_SSS
|
|
else {
|
|
std::vector<std::future<void>> tg;
|
|
auto functor = [this](unsigned int ti, unsigned int d_numThreads) -> void {
|
|
mainLoop(ti, d_numThreads);
|
|
};
|
|
for (unsigned int ti = 0; ti < d_numThreads; ++ti) {
|
|
tg.emplace_back(
|
|
std::async(std::launch::async, functor, ti, d_numThreads));
|
|
}
|
|
for (auto &fut : tg) {
|
|
fut.get();
|
|
}
|
|
}
|
|
#endif
|
|
setResonanceMolSupplierLength();
|
|
trimCeVect2();
|
|
prepEnumIdxVect();
|
|
d_isEnumerated = true;
|
|
}
|
|
|
|
void ResonanceMolSupplier::mainLoop(unsigned int ti, unsigned int nt) {
|
|
for (unsigned int conjGrpIdx = 0; conjGrpIdx < d_nConjGrp; ++conjGrpIdx) {
|
|
if ((conjGrpIdx % nt) != ti) {
|
|
continue;
|
|
}
|
|
CEMap ceMap;
|
|
buildCEMap(ceMap, conjGrpIdx);
|
|
storeCEMap(ceMap, conjGrpIdx);
|
|
}
|
|
}
|
|
|
|
// each bond an atom is assigned an index representing the conjugated
|
|
// group it belongs to; such indices are stored in two vectors
|
|
// (d_bondConjGrpIdx and d_atomConjGrpIdx, respectively)
|
|
// atoms and bonds which do not belong to a conjugated group are given
|
|
// index -1
|
|
void ResonanceMolSupplier::assignConjGrpIdx() {
|
|
unsigned int nb = d_mol->getNumBonds();
|
|
if (!nb) {
|
|
return;
|
|
}
|
|
std::stack<unsigned int> bondIndexStack;
|
|
d_bondConjGrpIdx.resize(nb, -1);
|
|
unsigned int na = d_mol->getNumAtoms();
|
|
d_atomConjGrpIdx.resize(na, -1);
|
|
for (d_nConjGrp = 0; true; ++d_nConjGrp) {
|
|
for (const auto b : d_mol->bonds()) {
|
|
unsigned int i = b->getIdx();
|
|
if (b->getIsConjugated() && (d_bondConjGrpIdx[i] == -1)) {
|
|
bondIndexStack.push(i);
|
|
break;
|
|
}
|
|
}
|
|
if (bondIndexStack.empty()) {
|
|
break;
|
|
}
|
|
while (!bondIndexStack.empty()) {
|
|
unsigned int i = bondIndexStack.top();
|
|
bondIndexStack.pop();
|
|
const Bond *bi = d_mol->getBondWithIdx(i);
|
|
for (const Atom *bondAtom : {bi->getBeginAtom(), bi->getEndAtom()}) {
|
|
// loop over bonds sprouting from bondAtom
|
|
for (const auto &bNbri :
|
|
boost::make_iterator_range(d_mol->getAtomBonds(bondAtom))) {
|
|
const auto &bNbr = (*d_mol)[bNbri];
|
|
unsigned int biNbr = bNbr->getIdx();
|
|
if (bNbr->getIsConjugated() && (d_bondConjGrpIdx[biNbr] == -1)) {
|
|
d_bondConjGrpIdx[biNbr] = d_nConjGrp;
|
|
bondIndexStack.push(biNbr);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
for (unsigned int i = 0; i < nb; ++i) {
|
|
if (d_bondConjGrpIdx[i] != -1) {
|
|
const Bond *bi = d_mol->getBondWithIdx(i);
|
|
unsigned int biBeginIdx = bi->getBeginAtomIdx();
|
|
unsigned int biEndIdx = bi->getEndAtomIdx();
|
|
d_atomConjGrpIdx[biBeginIdx] = d_bondConjGrpIdx[i];
|
|
d_atomConjGrpIdx[biEndIdx] = d_bondConjGrpIdx[i];
|
|
}
|
|
}
|
|
}
|
|
|
|
// function which enumerates all possible multiple bond arrangements
|
|
// for each conjugated group, stores each arrangement in a ConjElectrons
|
|
// object and stores the latter in a map (ceMapTmp), keyed with its
|
|
// fingerprints. Depending on whether the KEKULE_ALL flag is set, the FP
|
|
// computation will be based either on the bond arrangement or on atom
|
|
// valences; this provides a convenient mechanism to remove degenerate
|
|
// Kekule structures upfront. In a subsequent step, ConjElectrons objects
|
|
// stored in ceMapTmp are further enumerated for their non-bonded
|
|
// electron arrangements, and the final ConjElectrons objects are stored
|
|
// in ceMap. Depending on whether the KEKULE_ALL flag is set, the FP
|
|
// computation will involve or not bond arrangement in addition to atom
|
|
// valences
|
|
void ResonanceMolSupplier::buildCEMap(CEMap &ceMap, unsigned int conjGrpIdx) {
|
|
const unsigned int BEGIN_POS = 0;
|
|
const unsigned int END_POS = 1;
|
|
const unsigned int fpFlags =
|
|
ConjElectrons::FP_ATOMS |
|
|
((d_flags & KEKULE_ALL) ? ConjElectrons::FP_BONDS : 0);
|
|
const unsigned int fpFlagsTmp =
|
|
((d_flags & KEKULE_ALL) ? ConjElectrons::FP_BONDS
|
|
: ConjElectrons::FP_ATOMS);
|
|
unsigned int nb = d_mol->getNumBonds();
|
|
unsigned int na = d_mol->getNumAtoms();
|
|
CESet ceSetTmp;
|
|
CEDegCount ceDegCount;
|
|
CEStats ceStats;
|
|
// There are two stacks in this algorithm:
|
|
// 1) ConjElectrons stack (ceStack, unique). It stores partial bond
|
|
// arrangements whenever multiple paths arise. Partial bond
|
|
// arrangements are then popped from the stack until the latter is
|
|
// empty
|
|
// 2) atom index stack (d_beginAIStack, associated to each
|
|
// ConjElectrons object). It stores a stack of atom indices to
|
|
// start from to complete the bond arrangement for each
|
|
// ConjElectrons object
|
|
std::stack<ConjElectrons *> ceStack;
|
|
auto *ce = new ConjElectrons(this, conjGrpIdx);
|
|
auto *ceCopy = new ConjElectrons(*ce);
|
|
// the first ConjElectrons object has the user-supplied bond
|
|
// and formal charge arrangement and is stored as such
|
|
ce->initCeFromMol();
|
|
// we ignore the result of the call to checkChargesAndBondOrders()
|
|
// but we need to call it so that the HAVE_CATION_RIGHT_OF_N flag
|
|
// may eventually be set
|
|
ce->checkChargesAndBondOrders();
|
|
ce->computeMetrics();
|
|
ce->updateDegCount(ceDegCount);
|
|
ce->storeFP(ceMap, fpFlags);
|
|
ce = ceCopy;
|
|
// initialize ceStack
|
|
ceStack.push(ce);
|
|
// loop until ceStack is empty
|
|
while (!ceStack.empty()) {
|
|
ce = ceStack.top();
|
|
ceStack.pop();
|
|
unsigned int aiBegin = 0;
|
|
// if the atom index stack is empty, initialize it with a primer;
|
|
// any atom index belonging to this conjugated group will do
|
|
if (ce->isBeginStackEmpty()) {
|
|
bool aiFound = false;
|
|
while (aiBegin < na) {
|
|
aiFound = (d_atomConjGrpIdx[aiBegin] == static_cast<int>(conjGrpIdx));
|
|
if (aiFound) {
|
|
break;
|
|
}
|
|
++aiBegin;
|
|
}
|
|
if (!aiFound) {
|
|
continue;
|
|
}
|
|
ce->pushToBeginStack(aiBegin);
|
|
}
|
|
// loop until the atom index stack is empty
|
|
while (ce && ce->popFromBeginStack(aiBegin)) {
|
|
// aiBegin holds the atom index just popped from stack
|
|
unsigned int ai[2] = {aiBegin, na};
|
|
AtomElectrons *ae[2] = {ce->getAtomElectronsWithIdx(ai[BEGIN_POS]),
|
|
nullptr};
|
|
unsigned int bi = nb;
|
|
BondElectrons *be = nullptr;
|
|
// loop over neighbors of the atom popped from the
|
|
// atom index stack
|
|
ROMol::ADJ_ITER nbrIdx, endNbrs;
|
|
boost::tie(nbrIdx, endNbrs) =
|
|
d_mol->getAtomNeighbors(ae[BEGIN_POS]->atom());
|
|
for (; nbrIdx != endNbrs; ++nbrIdx) {
|
|
unsigned int aiNbr = (*d_mol)[*nbrIdx]->getIdx();
|
|
// if this neighbor is not part of the conjugated group,
|
|
// ignore it
|
|
if (ce->parent()->getAtomConjGrpIdx(aiNbr) !=
|
|
static_cast<int>(conjGrpIdx)) {
|
|
continue;
|
|
}
|
|
AtomElectrons *aeNbr = ce->getAtomElectronsWithIdx(aiNbr);
|
|
// if we've already dealt with this neighbor before, ignore it
|
|
if (!aeNbr || aeNbr->isDefinitive()) {
|
|
continue;
|
|
}
|
|
unsigned int biNbr =
|
|
d_mol->getBondBetweenAtoms(ai[BEGIN_POS], aiNbr)->getIdx();
|
|
BondElectrons *beNbr = ce->getBondElectronsWithIdx(biNbr);
|
|
// if we have already assigned the bond order to this bond,
|
|
// ignore it
|
|
if (!beNbr || beNbr->isDefinitive()) {
|
|
continue;
|
|
}
|
|
// if this is the first neighbor we find, process it
|
|
if (!ae[END_POS]) {
|
|
bi = biNbr;
|
|
be = beNbr;
|
|
ai[END_POS] = aiNbr;
|
|
ae[END_POS] = aeNbr;
|
|
}
|
|
// otherwise, if there are multiple neighbors, process the first
|
|
// and store the rest to the atom index stack
|
|
else {
|
|
ce->pushToBeginStack(aiNbr);
|
|
}
|
|
}
|
|
// if no neighbors were found, move on to the next atom index
|
|
// in the stack
|
|
if (!be) {
|
|
continue;
|
|
}
|
|
std::uint8_t allowedBondsPerAtom[2];
|
|
// loop over the two atoms that need be bonded
|
|
for (unsigned int i = 0; i < 2; ++i) {
|
|
// for each atom, find which bond orders are allowed
|
|
allowedBondsPerAtom[i] = ae[i]->findAllowedBonds(bi);
|
|
// if for either atom this is the last bond, mark the
|
|
// atom as definitive, otherwise push the atom index
|
|
// to the stack
|
|
if (ae[i]->isLastBond()) {
|
|
ae[i]->setDefinitive();
|
|
} else {
|
|
ce->pushToBeginStack(ai[i]);
|
|
}
|
|
}
|
|
// logical AND between allowed bond masks for the two atoms
|
|
std::uint8_t allowedBonds =
|
|
allowedBondsPerAtom[BEGIN_POS] & allowedBondsPerAtom[END_POS];
|
|
bool isAnyBondAllowed = false;
|
|
bool needToFork = false;
|
|
// make a copy of the current ConjElectrons object
|
|
ceCopy = new ConjElectrons(*ce);
|
|
// consider single, double and triple bond alternatives in turn
|
|
for (unsigned int i = 0; i < 3; ++i) {
|
|
unsigned int t = i * 2;
|
|
ConjElectrons *ceToSet = nullptr;
|
|
std::uint8_t orderMask = (1 << t);
|
|
std::uint8_t chgMask = (1 << (t + 1));
|
|
unsigned int bo = i + 1;
|
|
// if the currently available electrons are enough for this
|
|
// bond type and both atoms can accept this bond type
|
|
// and we are not in a situation where both atoms are left
|
|
// of N and both need a charge to accept this bond type,
|
|
// then this bond type is feasible
|
|
if ((ce->currElectrons() >= (bo * 2)) && (allowedBonds & orderMask) &&
|
|
!((allowedBonds & chgMask) &&
|
|
((ae[BEGIN_POS]->oe() < 5) || (ae[END_POS]->oe() < 5)))) {
|
|
isAnyBondAllowed = true;
|
|
// if another bond type will be possible, then we'll
|
|
// need to save that to ceStack and keep on with the current
|
|
// ConjElectrons object
|
|
if (!needToFork) {
|
|
needToFork = true;
|
|
ceToSet = ce;
|
|
} else {
|
|
auto *ceFork = new ConjElectrons(*ceCopy);
|
|
ceStack.push(ceFork);
|
|
ceToSet = ceFork;
|
|
}
|
|
}
|
|
if (ceToSet) {
|
|
// set bond orders and finalize atoms as needed
|
|
ceToSet->getBondElectronsWithIdx(bi)->setOrder(bo);
|
|
for (unsigned int j = 0; j < 2; ++j) {
|
|
if (ae[j]->isLastBond()) {
|
|
ceToSet->getAtomElectronsWithIdx(ai[j])->finalizeAtom();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
delete ceCopy;
|
|
// if a dead end was hit, discard this ConjElectrons object
|
|
if (!isAnyBondAllowed) {
|
|
delete ce;
|
|
ce = nullptr;
|
|
}
|
|
}
|
|
if (ce) {
|
|
// if this bond arrangement was already stored previously,
|
|
// discard it
|
|
if (!ce->haveFP(ceSetTmp, fpFlagsTmp)) {
|
|
delete ce;
|
|
} else {
|
|
// enumerate possible non-bonded electron
|
|
// arrangements, and store them in ceMap
|
|
ce->enumerateNonBonded(ceMap, ceDegCount, ceStats);
|
|
delete ce;
|
|
// quit the loop early if the number of non-degenerate
|
|
// structures already exceeds d_maxStructs
|
|
if (d_callback.get()) {
|
|
d_callback->d_progress[conjGrpIdx].d_totalStructs = ceMap.size();
|
|
d_callback->d_progress[conjGrpIdx].d_diverseStructs =
|
|
ceDegCount.size();
|
|
if (!(*d_callback)()) {
|
|
d_wasCanceled = true;
|
|
break;
|
|
}
|
|
}
|
|
if (ceDegCount.size() >= d_maxStructs) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
while (!ceStack.empty()) {
|
|
delete ceStack.top();
|
|
ceStack.pop();
|
|
}
|
|
}
|
|
|
|
// getter function which returns the bondConjGrpIdx for a given
|
|
// bond index, or -1 if the bond is not conjugated
|
|
int ResonanceMolSupplier::getBondConjGrpIdx(unsigned int bi) const {
|
|
if (bi >= d_bondConjGrpIdx.size()) {
|
|
std::stringstream ss;
|
|
ss << "d_bondConjGrpIdx.size() = " << d_bondConjGrpIdx.size()
|
|
<< ", bi = " << bi;
|
|
throw std::runtime_error(ss.str());
|
|
}
|
|
return d_bondConjGrpIdx[bi];
|
|
}
|
|
|
|
// getter function which returns the atomConjGrpIdx for a given
|
|
// atom index, or -1 if the atom is not conjugated
|
|
int ResonanceMolSupplier::getAtomConjGrpIdx(unsigned int ai) const {
|
|
if (ai >= d_atomConjGrpIdx.size()) {
|
|
std::stringstream ss;
|
|
ss << "d_atomConjGrpIdx.size() = " << d_atomConjGrpIdx.size()
|
|
<< ", ai = " << ai;
|
|
throw std::runtime_error(ss.str());
|
|
}
|
|
return d_atomConjGrpIdx[ai];
|
|
}
|
|
|
|
// resizes d_ceVect3 vector
|
|
inline void ResonanceMolSupplier::resizeCeVect() {
|
|
d_ceVect3.resize(d_nConjGrp, nullptr);
|
|
}
|
|
|
|
// stores the ConjElectrons pointers currently stored in ceMap
|
|
// in the d_ceVect3 vector
|
|
inline void ResonanceMolSupplier::storeCEMap(const CEMap &ceMap,
|
|
unsigned int conjGrpIdx) {
|
|
d_ceVect3[conjGrpIdx] = new CEVect2(ceMap);
|
|
}
|
|
|
|
void ResonanceMolSupplier::setResonanceMolSupplierLength() {
|
|
for (unsigned int i = 0; (d_length < d_maxStructs) && (i < d_ceVect3.size());
|
|
++i) {
|
|
boost::uint64_t p = d_length * d_ceVect3[i]->ceCount();
|
|
d_length =
|
|
((p < d_maxStructs) ? static_cast<unsigned int>(p) : d_maxStructs);
|
|
}
|
|
}
|
|
|
|
// Returns the number of resonance structures in the
|
|
// ResonanceMolSupplier
|
|
unsigned int ResonanceMolSupplier::length() {
|
|
enumerate();
|
|
return d_length;
|
|
}
|
|
|
|
// Resets the ResonanceMolSupplier index
|
|
void ResonanceMolSupplier::reset() {
|
|
enumerate();
|
|
d_idx = 0;
|
|
}
|
|
|
|
// Returns true if there are no more resonance structures left
|
|
bool ResonanceMolSupplier::atEnd() {
|
|
enumerate();
|
|
return (d_idx == d_length);
|
|
}
|
|
|
|
// Returns a pointer to the next resonance structure as a ROMol,
|
|
// or NULL if there are no more resonance structures left.
|
|
// The caller is responsible for freeing memory associated to
|
|
// the pointer
|
|
ROMol *ResonanceMolSupplier::next() {
|
|
enumerate();
|
|
return (atEnd() ? nullptr : (*this)[d_idx++]);
|
|
}
|
|
|
|
// sets the ResonanceMolSupplier index to idx
|
|
void ResonanceMolSupplier::moveTo(unsigned int idx) {
|
|
enumerate();
|
|
if (idx >= d_length) {
|
|
std::stringstream ss;
|
|
ss << "d_length = " << d_length << ", idx = " << idx;
|
|
throw std::runtime_error(ss.str());
|
|
}
|
|
d_idx = idx;
|
|
}
|
|
|
|
// returns the resonance structure with index idx as a ROMol *
|
|
// the index returns resonance structures combining ConjElectrons
|
|
// objects in a breadth-first fashion, in order to return the most
|
|
// likely complete resonance structures first
|
|
ROMol *ResonanceMolSupplier::operator[](unsigned int idx) {
|
|
enumerate();
|
|
if (idx >= d_length) {
|
|
std::stringstream ss;
|
|
ss << "d_length = " << d_length << ", idx = " << idx;
|
|
throw std::runtime_error(ss.str());
|
|
}
|
|
std::vector<unsigned int> c;
|
|
idxToCEPerm(d_enumIdx[idx], c);
|
|
return assignBondsFormalCharges(c);
|
|
}
|
|
|
|
// helper function to assign bond orders and formal charges to the
|
|
// mol passed as reference out of the ConjElectrons objects whose
|
|
// indices are passed with the c vector
|
|
void ResonanceMolSupplier::assignBondsFormalChargesHelper(
|
|
ROMol &mol, std::vector<unsigned int> &c) const {
|
|
for (unsigned int conjGrpIdx = 0; conjGrpIdx < d_nConjGrp; ++conjGrpIdx) {
|
|
unsigned int i = conjGrpIdx * 2;
|
|
ConjElectrons *ce = d_ceVect3[conjGrpIdx]->getCE(c[i], c[i + 1]);
|
|
ce->assignBondsFormalChargesToMol(mol);
|
|
}
|
|
}
|
|
|
|
// returns a pointer to a ROMol with bond orders and formal charges
|
|
// assigned out of the ConjElectrons objects whose indices are passed
|
|
// with the c vector
|
|
ROMol *ResonanceMolSupplier::assignBondsFormalCharges(
|
|
std::vector<unsigned int> &c) const {
|
|
auto *mol = new ROMol(this->mol());
|
|
assignBondsFormalChargesHelper(*mol, c);
|
|
ResonanceUtils::fixExplicitImplicitHs(*mol);
|
|
ResonanceUtils::sanitizeMol((RWMol &)*mol);
|
|
return mol;
|
|
}
|
|
} // namespace RDKit
|