mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-03 21:44:30 +08:00
* run clang-tidy with readability-braces-around-statements clang-format the results clean up all the parts that clang-tidy-8 broke * fix problem on windows
860 lines
22 KiB
C++
860 lines
22 KiB
C++
/*==============================================*/
|
|
/* Copyright (C) 2011-2019 NextMove Software */
|
|
/* All rights reserved. */
|
|
/* */
|
|
/* This file is part of molhash. */
|
|
/* */
|
|
/* The contents are covered by the terms of the */
|
|
/* BSD license, which is included in the file */
|
|
/* license.txt. */
|
|
/*==============================================*/
|
|
#define _CRT_SECURE_NO_WARNINGS
|
|
|
|
#include <string.h>
|
|
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
|
|
#include <string>
|
|
#include <GraphMol/RDKitBase.h>
|
|
#include <GraphMol/RDKitQueries.h>
|
|
#include <GraphMol/SmilesParse/SmilesWrite.h>
|
|
|
|
#include "nmmolhash.h"
|
|
#include "mf.h"
|
|
|
|
namespace {
|
|
unsigned int NMRDKitBondGetOrder(const RDKit::Bond *bnd) {
|
|
PRECONDITION(bnd, "bad bond");
|
|
switch (bnd->getBondType()) {
|
|
case RDKit::Bond::AROMATIC:
|
|
case RDKit::Bond::SINGLE:
|
|
return 1;
|
|
case RDKit::Bond::DOUBLE:
|
|
return 2;
|
|
case RDKit::Bond::TRIPLE:
|
|
return 3;
|
|
case RDKit::Bond::QUADRUPLE:
|
|
return 4;
|
|
case RDKit::Bond::QUINTUPLE:
|
|
return 5;
|
|
case RDKit::Bond::HEXTUPLE:
|
|
return 6;
|
|
default:
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
RDKit::Bond *NMRDKitMolNewBond(RDKit::RWMol *mol, RDKit::Atom *src,
|
|
RDKit::Atom *dst, unsigned int order,
|
|
bool arom) {
|
|
PRECONDITION(mol, "bad molecule");
|
|
PRECONDITION(src, "bad src atom");
|
|
PRECONDITION(dst, "bad dest atom");
|
|
RDKit::Bond *result;
|
|
result = mol->getBondBetweenAtoms(src->getIdx(), dst->getIdx());
|
|
if (result) {
|
|
if (order == 1) {
|
|
switch (result->getBondType()) {
|
|
case RDKit::Bond::SINGLE:
|
|
result->setBondType(RDKit::Bond::DOUBLE);
|
|
break;
|
|
case RDKit::Bond::DOUBLE:
|
|
result->setBondType(RDKit::Bond::TRIPLE);
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
RDKit::Bond::BondType type = RDKit::Bond::UNSPECIFIED;
|
|
if (!arom) {
|
|
switch (order) {
|
|
case 1:
|
|
type = RDKit::Bond::SINGLE;
|
|
break;
|
|
case 2:
|
|
type = RDKit::Bond::DOUBLE;
|
|
break;
|
|
case 3:
|
|
type = RDKit::Bond::TRIPLE;
|
|
break;
|
|
case 4:
|
|
type = RDKit::Bond::QUADRUPLE;
|
|
break;
|
|
}
|
|
} else {
|
|
type = RDKit::Bond::AROMATIC;
|
|
}
|
|
|
|
result = new RDKit::Bond(type);
|
|
result->setOwningMol(mol);
|
|
result->setBeginAtom(src);
|
|
result->setEndAtom(dst);
|
|
mol->addBond(result, true);
|
|
if (arom) {
|
|
result->setIsAromatic(true);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
void NMRDKitSanitizeHydrogens(RDKit::RWMol *mol) {
|
|
PRECONDITION(mol, "bad molecule");
|
|
// Move all of the implicit Hs into one box
|
|
for (auto aptr : mol->atoms()) {
|
|
unsigned int hcount = aptr->getTotalNumHs();
|
|
aptr->setNoImplicit(true);
|
|
aptr->setNumExplicitHs(hcount);
|
|
aptr->updatePropertyCache(); // or else the valence is reported incorrectly
|
|
}
|
|
}
|
|
|
|
} // namespace
|
|
|
|
namespace RDKit {
|
|
namespace MolHash {
|
|
static unsigned int NMDetermineComponents(RWMol *mol, unsigned int *parts,
|
|
unsigned int acount) {
|
|
PRECONDITION(mol, "bad molecule");
|
|
PRECONDITION(parts, "bad parts pointer");
|
|
memset(parts, 0, acount * sizeof(unsigned int));
|
|
std::vector<Atom *> todo;
|
|
|
|
unsigned int result = 0;
|
|
for (auto aptr : mol->atoms()) {
|
|
unsigned int idx = aptr->getIdx();
|
|
if (parts[idx] == 0) {
|
|
parts[idx] = ++result;
|
|
todo.push_back(aptr);
|
|
|
|
while (!todo.empty()) {
|
|
aptr = todo.back();
|
|
todo.pop_back();
|
|
for (auto nbri :
|
|
boost::make_iterator_range(mol->getAtomNeighbors(aptr))) {
|
|
auto nptr = (*mol)[nbri];
|
|
idx = nptr->getIdx();
|
|
if (parts[idx] == 0) {
|
|
parts[idx] = result;
|
|
todo.push_back(nptr);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
static std::string NMMolecularFormula(RWMol *mol, const unsigned int *parts,
|
|
unsigned int part) {
|
|
PRECONDITION(mol, "bad molecule");
|
|
PRECONDITION((!part || parts), "bad parts pointer");
|
|
unsigned int hist[256];
|
|
int charge = 0;
|
|
|
|
memset(hist, 0, sizeof(hist));
|
|
|
|
for (auto aptr : mol->atoms()) {
|
|
unsigned int idx = aptr->getIdx();
|
|
if (part == 0 || parts[idx] == part) {
|
|
unsigned int elem = aptr->getAtomicNum();
|
|
if (elem < 256) {
|
|
hist[elem]++;
|
|
} else {
|
|
hist[0]++;
|
|
}
|
|
hist[1] += aptr->getTotalNumHs(false);
|
|
charge += aptr->getFormalCharge();
|
|
}
|
|
}
|
|
|
|
char buffer[16];
|
|
std::string result;
|
|
const unsigned char *perm = hist[6] ? OrganicHillOrder : InorganicHillOrder;
|
|
for (unsigned int i = 0; i < 119; i++) {
|
|
unsigned int elem = perm[i];
|
|
if (hist[elem] > 0) {
|
|
result += symbol[elem];
|
|
if (hist[elem] > 1) {
|
|
sprintf(buffer, "%u", hist[elem]);
|
|
result += buffer;
|
|
}
|
|
}
|
|
}
|
|
if (charge != 0) {
|
|
if (charge > 0) {
|
|
result += '+';
|
|
if (charge > 1) {
|
|
sprintf(buffer, "%d", charge);
|
|
result += buffer;
|
|
}
|
|
} else { // charge < 0
|
|
result += '-';
|
|
if (charge < -1) {
|
|
sprintf(buffer, "%d", -charge);
|
|
result += buffer;
|
|
}
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
static std::string NMMolecularFormula(RWMol *mol, bool sep = false) {
|
|
PRECONDITION(mol, "bad molecule");
|
|
if (!sep) {
|
|
return NMMolecularFormula(mol, nullptr, 0);
|
|
}
|
|
|
|
unsigned int acount = mol->getNumAtoms();
|
|
if (acount == 0) {
|
|
return "";
|
|
}
|
|
|
|
auto size = (unsigned int)(acount * sizeof(int));
|
|
auto *parts = (unsigned int *)malloc(size);
|
|
unsigned int pcount = NMDetermineComponents(mol, parts, acount);
|
|
|
|
std::string result;
|
|
if (pcount > 1) {
|
|
std::vector<std::string> vmf;
|
|
for (unsigned int i = 1; i <= pcount; i++) {
|
|
vmf.push_back(NMMolecularFormula(mol, parts, i));
|
|
}
|
|
|
|
// sort
|
|
result = vmf[0];
|
|
for (unsigned int i = 1; i < pcount; i++) {
|
|
result += ".";
|
|
result += vmf[i];
|
|
}
|
|
} else { // pcount == 1
|
|
result = NMMolecularFormula(mol, parts, 1);
|
|
}
|
|
free(parts);
|
|
return result;
|
|
}
|
|
|
|
static void NormalizeHCount(Atom *aptr) {
|
|
PRECONDITION(aptr, "bad atom pointer");
|
|
unsigned int hcount;
|
|
|
|
switch (aptr->getAtomicNum()) {
|
|
case 9: // Fluorine
|
|
case 17: // Chlorine
|
|
case 35: // Bromine
|
|
case 53: // Iodine
|
|
hcount = aptr->getDegree();
|
|
hcount = hcount < 1 ? 1 - hcount : 0;
|
|
break;
|
|
case 8: // Oxygen
|
|
case 16: // Sulfur
|
|
hcount = aptr->getDegree();
|
|
hcount = hcount < 2 ? 2 - hcount : 0;
|
|
break;
|
|
case 5: // Boron
|
|
hcount = aptr->getDegree();
|
|
hcount = hcount < 3 ? 3 - hcount : 0;
|
|
break;
|
|
case 7: // Nitogen
|
|
case 15: // Phosphorus
|
|
hcount = aptr->getDegree();
|
|
if (hcount < 3) {
|
|
hcount = 3 - hcount;
|
|
} else if (hcount == 4) {
|
|
hcount = 1;
|
|
} else {
|
|
hcount = 0;
|
|
}
|
|
break;
|
|
case 6: // Carbon
|
|
hcount = aptr->getDegree();
|
|
hcount = hcount < 4 ? 4 - hcount : 0;
|
|
break;
|
|
default:
|
|
hcount = 0;
|
|
}
|
|
aptr->setNoImplicit(true);
|
|
aptr->setNumExplicitHs(hcount);
|
|
}
|
|
|
|
static std::string AnonymousGraph(RWMol *mol, bool elem) {
|
|
PRECONDITION(mol, "bad molecule");
|
|
std::string result;
|
|
int charge = 0;
|
|
|
|
for (auto aptr : mol->atoms()) {
|
|
charge += aptr->getFormalCharge();
|
|
aptr->setIsAromatic(false);
|
|
aptr->setFormalCharge(0);
|
|
if (!elem) {
|
|
aptr->setNumExplicitHs(0);
|
|
aptr->setNoImplicit(true);
|
|
aptr->setAtomicNum(0);
|
|
} else {
|
|
NormalizeHCount(aptr);
|
|
}
|
|
}
|
|
|
|
for (auto bptr : mol->bonds()) {
|
|
bptr->setBondType(Bond::SINGLE);
|
|
}
|
|
MolOps::assignRadicals(*mol);
|
|
result = MolToSmiles(*mol);
|
|
return result;
|
|
}
|
|
|
|
static std::string MesomerHash(RWMol *mol, bool netq) {
|
|
PRECONDITION(mol, "bad molecule");
|
|
std::string result;
|
|
char buffer[32];
|
|
int charge = 0;
|
|
|
|
for (auto aptr : mol->atoms()) {
|
|
charge += aptr->getFormalCharge();
|
|
aptr->setIsAromatic(false);
|
|
aptr->setFormalCharge(0);
|
|
}
|
|
|
|
for (auto bptr : mol->bonds()) {
|
|
bptr->setBondType(Bond::SINGLE);
|
|
}
|
|
|
|
MolOps::assignRadicals(*mol);
|
|
result = MolToSmiles(*mol);
|
|
if (netq) {
|
|
sprintf(buffer, "_%d", charge);
|
|
result += buffer;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
static std::string TautomerHash(RWMol *mol, bool proto) {
|
|
PRECONDITION(mol, "bad molecule");
|
|
std::string result;
|
|
char buffer[32];
|
|
int hcount = 0;
|
|
int charge = 0;
|
|
|
|
for (auto aptr : mol->atoms()) {
|
|
charge += aptr->getFormalCharge();
|
|
aptr->setIsAromatic(false);
|
|
aptr->setFormalCharge(0);
|
|
if (aptr->getAtomicNum() != 6) {
|
|
hcount += aptr->getTotalNumHs(false);
|
|
aptr->setNoImplicit(true);
|
|
aptr->setNumExplicitHs(0);
|
|
}
|
|
}
|
|
|
|
for (auto bptr : mol->bonds()) {
|
|
if (bptr->getBondType() != Bond::SINGLE &&
|
|
(bptr->getIsConjugated() || bptr->getBeginAtom()->getAtomicNum() != 6 ||
|
|
bptr->getEndAtom()->getAtomicNum() != 6)) {
|
|
bptr->setIsAromatic(false);
|
|
bptr->setBondType(Bond::SINGLE);
|
|
bptr->setStereo(Bond::BondStereo::STEREONONE);
|
|
}
|
|
}
|
|
|
|
MolOps::assignRadicals(*mol);
|
|
// we may have just destroyed some stereocenters/bonds
|
|
// clean that up:
|
|
bool cleanIt = true;
|
|
bool force = true;
|
|
MolOps::assignStereochemistry(*mol, cleanIt, force);
|
|
result = MolToSmiles(*mol);
|
|
if (!proto) {
|
|
sprintf(buffer, "_%d_%d", hcount, charge);
|
|
} else {
|
|
sprintf(buffer, "_%d", hcount - charge);
|
|
}
|
|
result += buffer;
|
|
return result;
|
|
}
|
|
|
|
static bool TraverseForRing(Atom *atom, unsigned char *visit) {
|
|
PRECONDITION(atom, "bad atom pointer");
|
|
PRECONDITION(visit, "bad pointer");
|
|
visit[atom->getIdx()] = 1;
|
|
for (auto nbri : boost::make_iterator_range(
|
|
atom->getOwningMol().getAtomNeighbors(atom))) {
|
|
auto nptr = atom->getOwningMol()[nbri];
|
|
if (visit[nptr->getIdx()] == 0) {
|
|
if (RDKit::queryIsAtomInRing(nptr)) {
|
|
return true;
|
|
}
|
|
|
|
if (TraverseForRing(nptr, visit)) {
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
static bool DepthFirstSearchForRing(Atom *root, Atom *nbor,
|
|
unsigned int maxatomidx) {
|
|
PRECONDITION(root, "bad atom pointer");
|
|
PRECONDITION(nbor, "bad atom pointer");
|
|
|
|
unsigned int natoms = maxatomidx;
|
|
auto *visit = (unsigned char *)alloca(natoms);
|
|
memset(visit, 0, natoms);
|
|
|
|
visit[root->getIdx()] = true;
|
|
return TraverseForRing(nbor, visit);
|
|
}
|
|
|
|
bool IsInScaffold(Atom *atom, unsigned int maxatomidx) {
|
|
PRECONDITION(atom, "bad atom pointer");
|
|
if (RDKit::queryIsAtomInRing(atom)) {
|
|
return true;
|
|
}
|
|
|
|
unsigned int count = 0;
|
|
for (auto nbri : boost::make_iterator_range(
|
|
atom->getOwningMol().getAtomNeighbors(atom))) {
|
|
auto nptr = atom->getOwningMol()[nbri];
|
|
if (DepthFirstSearchForRing(atom, nptr, maxatomidx)) {
|
|
++count;
|
|
}
|
|
}
|
|
return count > 1;
|
|
}
|
|
|
|
static bool HasNbrInScaffold(Atom *aptr, unsigned char *is_in_scaffold) {
|
|
PRECONDITION(aptr, "bad atom pointer");
|
|
PRECONDITION(is_in_scaffold, "bad pointer");
|
|
for (auto nbri : boost::make_iterator_range(
|
|
aptr->getOwningMol().getAtomNeighbors(aptr))) {
|
|
auto nptr = aptr->getOwningMol()[nbri];
|
|
if (is_in_scaffold[nptr->getIdx()]) {
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
static std::string ExtendedMurckoScaffold(RWMol *mol) {
|
|
PRECONDITION(mol, "bad molecule");
|
|
RDKit::MolOps::fastFindRings(*mol);
|
|
|
|
unsigned int maxatomidx = mol->getNumAtoms();
|
|
auto *is_in_scaffold = (unsigned char *)alloca(maxatomidx);
|
|
for (auto aptr : mol->atoms()) {
|
|
is_in_scaffold[aptr->getIdx()] = IsInScaffold(aptr, maxatomidx);
|
|
}
|
|
|
|
std::vector<Atom *> for_deletion;
|
|
for (auto aptr : mol->atoms()) {
|
|
unsigned int aidx = aptr->getIdx();
|
|
if (is_in_scaffold[aidx]) {
|
|
continue;
|
|
}
|
|
if (HasNbrInScaffold(aptr, is_in_scaffold)) {
|
|
aptr->setAtomicNum(0);
|
|
aptr->setFormalCharge(0);
|
|
aptr->setNoImplicit(true);
|
|
aptr->setNumExplicitHs(0);
|
|
} else {
|
|
for_deletion.push_back(aptr);
|
|
}
|
|
}
|
|
|
|
for (auto &i : for_deletion) {
|
|
mol->removeAtom(i);
|
|
}
|
|
|
|
MolOps::assignRadicals(*mol);
|
|
std::string result;
|
|
result = MolToSmiles(*mol);
|
|
return result;
|
|
}
|
|
|
|
static std::string MurckoScaffoldHash(RWMol *mol) {
|
|
PRECONDITION(mol, "bad molecule");
|
|
std::vector<Atom *> for_deletion;
|
|
do {
|
|
for_deletion.clear();
|
|
for (auto aptr : mol->atoms()) {
|
|
unsigned int deg = aptr->getDegree();
|
|
if (deg < 2) {
|
|
if (deg == 1) { // i.e. not 0 and the last atom in the molecule
|
|
for (const auto &nbri : boost::make_iterator_range(
|
|
aptr->getOwningMol().getAtomBonds(aptr))) {
|
|
auto bptr = (aptr->getOwningMol())[nbri];
|
|
Atom *nbr = bptr->getOtherAtom(aptr);
|
|
unsigned int hcount = nbr->getTotalNumHs(false);
|
|
nbr->setNumExplicitHs(hcount + NMRDKitBondGetOrder(bptr));
|
|
nbr->setNoImplicit(true);
|
|
}
|
|
}
|
|
for_deletion.push_back(aptr);
|
|
}
|
|
}
|
|
for (auto &i : for_deletion) {
|
|
mol->removeAtom(i);
|
|
}
|
|
} while (!for_deletion.empty());
|
|
MolOps::assignRadicals(*mol);
|
|
std::string result;
|
|
result = MolToSmiles(*mol);
|
|
return result;
|
|
}
|
|
|
|
static std::string NetChargeHash(RWMol *mol) {
|
|
PRECONDITION(mol, "bad molecule");
|
|
int totalq = 0;
|
|
|
|
for (auto aptr : mol->atoms()) {
|
|
totalq += aptr->getFormalCharge();
|
|
}
|
|
|
|
char buffer[16];
|
|
sprintf(buffer, "%d", totalq);
|
|
return buffer;
|
|
}
|
|
|
|
static std::string SmallWorldHash(RWMol *mol, bool brl) {
|
|
PRECONDITION(mol, "bad molecule");
|
|
char buffer[64];
|
|
|
|
unsigned int acount = mol->getNumAtoms();
|
|
unsigned int bcount = mol->getNumBonds();
|
|
unsigned int rcount = (bcount + 1) - acount;
|
|
|
|
if (brl) {
|
|
unsigned int lcount = 0;
|
|
for (auto aptr : mol->atoms()) {
|
|
if (aptr->getDegree() == 2) {
|
|
lcount++;
|
|
}
|
|
}
|
|
sprintf(buffer, "B%uR%uL%u", bcount, rcount, lcount);
|
|
} else {
|
|
sprintf(buffer, "B%uR%u", bcount, rcount);
|
|
}
|
|
return buffer;
|
|
}
|
|
|
|
static void DegreeVector(RWMol *mol, unsigned int *v) {
|
|
memset(v, 0, 4 * sizeof(unsigned int));
|
|
for (auto aptr : mol->atoms()) {
|
|
switch (aptr->getDegree()) {
|
|
case 4:
|
|
v[0]++;
|
|
break;
|
|
case 3:
|
|
v[1]++;
|
|
break;
|
|
case 2:
|
|
v[2]++;
|
|
break;
|
|
case 1:
|
|
v[3]++;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
static bool HasDoubleBond(Atom *atom) {
|
|
PRECONDITION(atom, "bad atom");
|
|
for (const auto &nbri :
|
|
boost::make_iterator_range(atom->getOwningMol().getAtomBonds(atom))) {
|
|
auto bptr = (atom->getOwningMol())[nbri];
|
|
if (NMRDKitBondGetOrder(bptr) == 2) {
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// Determine whether/how to fragment bond
|
|
// -1 means don't fragment bond
|
|
// 0 means break, with hydrogens on both beg and end
|
|
// 1 means break, with asterisk on beg and hydrogen on end
|
|
// 2 means break, with hydrogen on beg and asterisk on end
|
|
// 3 means break, with asterisks on both beg and end
|
|
|
|
static int RegioisomerBond(Bond *bnd) {
|
|
PRECONDITION(bnd, "bad bond");
|
|
if (NMRDKitBondGetOrder(bnd) != 1) {
|
|
return -1;
|
|
}
|
|
if (RDKit::queryIsBondInRing(bnd)) {
|
|
return -1;
|
|
}
|
|
|
|
Atom *beg = bnd->getBeginAtom();
|
|
Atom *end = bnd->getEndAtom();
|
|
unsigned int beg_elem = beg->getAtomicNum();
|
|
unsigned int end_elem = end->getAtomicNum();
|
|
|
|
if (beg_elem == 0 || end_elem == 0) {
|
|
return -1;
|
|
}
|
|
|
|
if (RDKit::queryIsAtomInRing(beg)) {
|
|
if (RDKit::queryIsAtomInRing(end)) {
|
|
return 0;
|
|
}
|
|
return 2;
|
|
}
|
|
if (RDKit::queryIsAtomInRing(end)) {
|
|
return 1;
|
|
}
|
|
|
|
if (beg_elem != 6 && end_elem == 6 && !HasDoubleBond(end)) {
|
|
return 1;
|
|
}
|
|
if (beg_elem == 6 && end_elem != 6 && !HasDoubleBond(beg)) {
|
|
return 2;
|
|
}
|
|
|
|
return -1;
|
|
}
|
|
|
|
static void ClearEZStereo(Atom *atm) {
|
|
PRECONDITION(atm, "bad atom");
|
|
for (const auto &nbri :
|
|
boost::make_iterator_range(atm->getOwningMol().getAtomBonds(atm))) {
|
|
auto bptr = (atm->getOwningMol())[nbri];
|
|
if (bptr->getStereo() > RDKit::Bond::STEREOANY) {
|
|
bptr->setStereo(RDKit::Bond::STEREOANY);
|
|
}
|
|
}
|
|
}
|
|
|
|
static std::string RegioisomerHash(RWMol *mol) {
|
|
PRECONDITION(mol, "bad molecule");
|
|
|
|
// we need a copy of the molecule so that we can loop over the bonds of
|
|
// something while modifying something else
|
|
RDKit::MolOps::fastFindRings(*mol);
|
|
RDKit::ROMol molcpy(*mol);
|
|
for (int i = molcpy.getNumBonds() - 1; i >= 0; --i) {
|
|
auto bptr = molcpy.getBondWithIdx(i);
|
|
int split = RegioisomerBond(bptr);
|
|
if (split >= 0) {
|
|
bptr = mol->getBondWithIdx(i);
|
|
Atom *beg = bptr->getBeginAtom();
|
|
Atom *end = bptr->getEndAtom();
|
|
mol->removeBond(bptr->getBeginAtomIdx(), bptr->getEndAtomIdx());
|
|
ClearEZStereo(beg);
|
|
ClearEZStereo(end);
|
|
|
|
if (split & 1) {
|
|
Atom *star = new RDKit::Atom(0);
|
|
mol->addAtom(star, true, true);
|
|
star->setNoImplicit(true);
|
|
NMRDKitMolNewBond(mol, beg, star, 1, false);
|
|
} else {
|
|
unsigned int hcount = beg->getTotalNumHs(false);
|
|
beg->setNumExplicitHs(hcount + 1);
|
|
beg->setNoImplicit(true);
|
|
}
|
|
if (split & 2) {
|
|
Atom *star = new RDKit::Atom(0);
|
|
mol->addAtom(star, true, true);
|
|
star->setNoImplicit(true);
|
|
NMRDKitMolNewBond(mol, end, star, 1, false);
|
|
} else {
|
|
unsigned int hcount = end->getTotalNumHs(false);
|
|
end->setNumExplicitHs(hcount + 1);
|
|
end->setNoImplicit(true);
|
|
}
|
|
}
|
|
}
|
|
|
|
std::string result;
|
|
result = MolToSmiles(*mol);
|
|
return result;
|
|
}
|
|
|
|
static std::string ArthorSubOrderHash(RWMol *mol) {
|
|
PRECONDITION(mol, "bad molecule");
|
|
char buffer[256];
|
|
|
|
unsigned int acount = mol->getNumAtoms();
|
|
unsigned int bcount = mol->getNumBonds();
|
|
|
|
unsigned int pcount = 1;
|
|
unsigned int size = 4 * mol->getNumAtoms() + 4;
|
|
auto *parts = (unsigned int *)malloc(size);
|
|
if (parts) {
|
|
memset(parts, 0, size);
|
|
pcount = NMDetermineComponents(mol, parts, acount);
|
|
free(parts);
|
|
}
|
|
|
|
unsigned int ccount = 0;
|
|
unsigned int ocount = 0;
|
|
unsigned int zcount = 0;
|
|
unsigned int icount = 0;
|
|
unsigned int qcount = 0;
|
|
unsigned int rcount = 0;
|
|
|
|
for (auto aptr : mol->atoms()) {
|
|
unsigned int elem = aptr->getAtomicNum();
|
|
int charge = aptr->getFormalCharge();
|
|
switch (elem) {
|
|
case 6: // Carbon
|
|
ccount++;
|
|
if (charge == 0 && aptr->getTotalValence() != 4) {
|
|
rcount++;
|
|
}
|
|
break;
|
|
case 7: // Nitrogen
|
|
case 15: // Phosphorus
|
|
ocount++;
|
|
if (charge == 0) {
|
|
unsigned int valence = aptr->getTotalValence();
|
|
if (valence != 3 && valence != 5) {
|
|
rcount++;
|
|
}
|
|
}
|
|
break;
|
|
case 8: // Oxygen
|
|
ocount++;
|
|
if (charge && aptr->getTotalValence() != 2) {
|
|
rcount++;
|
|
}
|
|
break;
|
|
case 9: // Fluorine
|
|
ocount++;
|
|
if (charge && aptr->getTotalValence() != 1) {
|
|
rcount++;
|
|
}
|
|
break;
|
|
case 17: // Chlorine
|
|
case 35: // Bromine
|
|
case 53: // Iodine
|
|
ocount++;
|
|
if (charge == 0) {
|
|
unsigned int valence = aptr->getTotalValence();
|
|
if (valence != 1 && valence != 3 && valence != 5 && valence != 7) {
|
|
rcount++;
|
|
}
|
|
}
|
|
break;
|
|
case 16: // Sulfur
|
|
ocount++;
|
|
if (charge == 0) {
|
|
unsigned int valence = aptr->getTotalValence();
|
|
if (valence != 2 && valence != 4 && valence != 6) {
|
|
rcount++;
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
zcount += elem;
|
|
if (aptr->getIsotope() != 0) {
|
|
icount++;
|
|
}
|
|
if (charge != 0) {
|
|
qcount++;
|
|
}
|
|
}
|
|
|
|
if (acount > 0xffff) {
|
|
acount = 0xffff;
|
|
}
|
|
if (bcount > 0xffff) {
|
|
bcount = 0xffff;
|
|
}
|
|
if (pcount > 0xff) {
|
|
pcount = 0xff;
|
|
}
|
|
if (ccount > 0xffff) {
|
|
ccount = 0xffff;
|
|
}
|
|
if (ocount > 0xffff) {
|
|
ocount = 0xffff;
|
|
}
|
|
if (zcount > 0xffffff) {
|
|
zcount = 0xffffff;
|
|
}
|
|
if (rcount > 0xff) {
|
|
rcount = 0xff;
|
|
}
|
|
if (qcount > 0xff) {
|
|
qcount = 0xff;
|
|
}
|
|
if (icount > 0xff) {
|
|
icount = 0xff;
|
|
}
|
|
|
|
sprintf(buffer, "%04x%04x%02x%04x%04x%06x%02x%02x%02x", acount, bcount,
|
|
pcount, ccount, ocount, zcount, rcount, qcount, icount);
|
|
return buffer;
|
|
}
|
|
|
|
std::string MolHash(RWMol *mol, HashFunction func) {
|
|
PRECONDITION(mol, "bad molecule");
|
|
std::string result;
|
|
char buffer[32];
|
|
NMRDKitSanitizeHydrogens(mol);
|
|
|
|
switch (func) {
|
|
default:
|
|
case HashFunction::AnonymousGraph:
|
|
result = AnonymousGraph(mol, false);
|
|
break;
|
|
case HashFunction::ElementGraph:
|
|
result = AnonymousGraph(mol, true);
|
|
break;
|
|
case HashFunction::CanonicalSmiles:
|
|
result = MolToSmiles(*mol);
|
|
break;
|
|
case HashFunction::MurckoScaffold:
|
|
result = MurckoScaffoldHash(mol);
|
|
break;
|
|
case HashFunction::ExtendedMurcko:
|
|
result = ExtendedMurckoScaffold(mol);
|
|
break;
|
|
case HashFunction::Mesomer:
|
|
result = MesomerHash(mol, true);
|
|
break;
|
|
case HashFunction::RedoxPair:
|
|
result = MesomerHash(mol, false);
|
|
break;
|
|
case HashFunction::HetAtomTautomer:
|
|
result = TautomerHash(mol, false);
|
|
break;
|
|
case HashFunction::HetAtomProtomer:
|
|
result = TautomerHash(mol, true);
|
|
break;
|
|
case HashFunction::MolFormula:
|
|
result = NMMolecularFormula(mol);
|
|
break;
|
|
case HashFunction::AtomBondCounts:
|
|
sprintf(buffer, "%u,%u", mol->getNumAtoms(), mol->getNumBonds());
|
|
result = buffer;
|
|
break;
|
|
case HashFunction::NetCharge:
|
|
result = NetChargeHash(mol);
|
|
break;
|
|
case HashFunction::SmallWorldIndexBR:
|
|
result = SmallWorldHash(mol, false);
|
|
break;
|
|
case HashFunction::SmallWorldIndexBRL:
|
|
result = SmallWorldHash(mol, true);
|
|
break;
|
|
case HashFunction::DegreeVector: {
|
|
unsigned int dv[4];
|
|
DegreeVector(mol, dv);
|
|
sprintf(buffer, "%u,%u,%u,%u", dv[0], dv[1], dv[2], dv[3]);
|
|
result = buffer;
|
|
} break;
|
|
case HashFunction::ArthorSubstructureOrder:
|
|
result = ArthorSubOrderHash(mol);
|
|
break;
|
|
case HashFunction::Regioisomer:
|
|
result = RegioisomerHash(mol);
|
|
break;
|
|
}
|
|
return result;
|
|
}
|
|
} // namespace MolHash
|
|
} // namespace RDKit
|