diff --git a/Code/GraphMol/FileParsers/ProximityBonds.cpp b/Code/GraphMol/FileParsers/ProximityBonds.cpp index 6f3b1b2c9..e4e70f339 100644 --- a/Code/GraphMol/FileParsers/ProximityBonds.cpp +++ b/Code/GraphMol/FileParsers/ProximityBonds.cpp @@ -1,393 +1,393 @@ -// -// Copyright (C) 2013-2017 Greg Landrum and NextMove Software -// -// @@ All Rights Reserved @@ -// This file is part of the RDKit. -// The contents are covered by the terms of the BSD license -// which is included in the file license.txt, found at the root -// of the RDKit source tree. -// -#include "ProximityBonds.h" -#include -#include -#include -#include - -namespace RDKit { - -static const double EXTDIST = 0.45; -// static const double MAXRAD = 2.50; -// static const double MINDIST = 0.40; -static const double MAXDIST = 5.45; // 2*MAXRAD + EXTDIST -static const double MINDIST2 = 0.16; // MINDIST*MINDIST -static const double MAXDIST2 = 29.7025; // MAXDIST*MAXDIST - -struct ProximityEntry { - float x, y, z, r; - int atm, hash, next, elem; - - bool operator<(const ProximityEntry &p) const { return x < p.x; } -}; - -static bool IsBonded(ProximityEntry *p, ProximityEntry *q, unsigned int flags) { - if (flags & ctdIGNORE_H_H_CONTACTS && p->elem == 1 && q->elem == 1) - return false; - double dx = (double)p->x - (double)q->x; - double dist2 = dx * dx; - if (dist2 > MAXDIST2) return false; - double dy = (double)p->y - (double)q->y; - dist2 += dy * dy; - if (dist2 > MAXDIST2) return false; - double dz = (double)p->z - (double)q->z; - dist2 += dz * dz; - - if (dist2 > MAXDIST2 || dist2 < MINDIST2) return false; - - double radius = (double)p->r + (double)q->r + EXTDIST; - return dist2 <= radius * radius; -} - -bool SamePDBResidue(AtomPDBResidueInfo *p, AtomPDBResidueInfo *q) { - return p->getResidueNumber() == q->getResidueNumber() && - p->getResidueName() == q->getResidueName() && - p->getChainId() == q->getChainId() && - p->getInsertionCode() == q->getInsertionCode(); -} - -static bool IsBlacklistedAtom(Atom *atom) { - // blacklist metals, noble gasses and halogens - int elem = atom->getAtomicNum(); - // make an inverse query (non-metals and metaloids) - if ((5 <= elem && elem <= 8) || (14 <= elem && elem <= 16) || - (32 <= elem && elem <= 34) || (51 <= elem && elem <= 52)) - return false; - else - return true; -} - -bool IsBlacklistedPair(Atom *beg_atom, Atom *end_atom) { - PRECONDITION(beg_atom, "empty atom"); - PRECONDITION(end_atom, "empty atom"); - - AtomPDBResidueInfo *beg_info = - (AtomPDBResidueInfo *)beg_atom->getMonomerInfo(); - AtomPDBResidueInfo *end_info = - (AtomPDBResidueInfo *)end_atom->getMonomerInfo(); - if (!beg_info || beg_info->getMonomerType() != AtomMonomerInfo::PDBRESIDUE) - return false; - if (!end_info || end_info->getMonomerType() != AtomMonomerInfo::PDBRESIDUE) - return false; - - if (!SamePDBResidue(beg_info, end_info)) { - if (IsBlacklistedAtom(beg_atom) || IsBlacklistedAtom(end_atom)) return true; - // Dont make bonds to waters - if (beg_info->getResidueName() == "HOH" || - end_info->getResidueName() == "HOH") - return true; - } - return false; -} - -/* -static void ConnectTheDots_Small(RWMol *mol) -{ -unsigned int count = mol->getNumAtoms(); -ProximityEntry *tmp = (ProximityEntry*)malloc(count*sizeof(ProximityEntry)); -PeriodicTable *table = PeriodicTable::getTable(); -Conformer *conf = &mol->getConformer(); -for (unsigned int i=0; igetAtomWithIdx(i); - unsigned int elem = atom->getAtomicNum(); - RDGeom::Point3D p = conf->getAtomPos(i); - ProximityEntry *tmpi = tmp+i; - tmpi->x = (float)p.x; - tmpi->y = (float)p.y; - tmpi->z = (float)p.z; - tmpi->r = (float)table->getRcovalent(elem); - for (unsigned int j=0; jgetBondBetweenAtoms(i,j)) - mol->addBond(i,j,Bond::SINGLE); - } -} -free(tmp); -} - - -static void ConnectTheDots_Medium(RWMol *mol) -{ -int count = mol->getNumAtoms(); -std::vector tmp(count); -PeriodicTable *table = PeriodicTable::getTable(); -Conformer *conf = &mol->getConformer(); -for (int i=0; igetAtomWithIdx(i); - unsigned int elem = atom->getAtomicNum(); - RDGeom::Point3D p = conf->getAtomPos(i); - ProximityEntry *tmpi = &tmp[i]; - tmpi->x = (float)p.x; - tmpi->y = (float)p.y; - tmpi->z = (float)p.z; - tmpi->r = (float)table->getRcovalent(elem); - tmpi->atm = i; -} - -std::stable_sort(tmp.begin(),tmp.end()); - -for (int j=0; jx - MAXDIST; - for (int k=j-1; k>=0; k--) { - ProximityEntry *tmpk = &tmp[k]; - if (tmpk->x < limit) - break; - if (IsBonded(tmpj,tmpk) && - !mol->getBondBetweenAtoms(tmpj->atm,tmpk->atm)) - mol->addBond(tmpj->atm,tmpk->atm,Bond::SINGLE); - } -} -} -*/ - -#define HASHSIZE 1024 -#define HASHMASK 1023 -#define HASHX 571 -#define HASHY 127 -#define HASHZ 3 - -static void ConnectTheDots_Large(RWMol *mol, unsigned int flags) { - int HashTable[HASHSIZE]; - memset(HashTable, -1, sizeof(HashTable)); - - unsigned int count = mol->getNumAtoms(); - ProximityEntry *tmp = - (ProximityEntry *)malloc(count * sizeof(ProximityEntry)); - CHECK_INVARIANT(tmp,"bad allocation"); - PeriodicTable *table = PeriodicTable::getTable(); - Conformer *conf = &mol->getConformer(); - - for (unsigned int i = 0; i < count; i++) { - Atom *atom = mol->getAtomWithIdx(i); - unsigned int elem = atom->getAtomicNum(); - RDGeom::Point3D p = conf->getAtomPos(i); - ProximityEntry *tmpi = tmp + i; - tmpi->x = (float)p.x; - tmpi->y = (float)p.y; - tmpi->z = (float)p.z; - tmpi->r = (float)table->getRcovalent(elem); - tmpi->atm = i; - tmpi->elem = elem; - - int hash = HASHX * (int)(p.x / MAXDIST) + HASHY * (int)(p.y / MAXDIST) + - HASHZ * (int)(p.z / MAXDIST); - - for (int dx = -HASHX; dx <= HASHX; dx += HASHX) - for (int dy = -HASHY; dy <= HASHY; dy += HASHY) - for (int dz = -HASHZ; dz <= HASHZ; dz += HASHZ) { - int probe = hash + dx + dy + dz; - int list = HashTable[probe & HASHMASK]; - while (list != -1) { - ProximityEntry *tmpj = &tmp[list]; - if (tmpj->hash == probe && IsBonded(tmpi, tmpj, flags) && - !mol->getBondBetweenAtoms(tmpi->atm, tmpj->atm) && - !IsBlacklistedPair(atom, mol->getAtomWithIdx(tmpj->atm))) - mol->addBond(tmpi->atm, tmpj->atm, Bond::SINGLE); - list = tmpj->next; - } - } - int list = hash & HASHMASK; - tmpi->next = HashTable[list]; - HashTable[list] = i; - tmpi->hash = hash; - } - // Cleanup pass - for (unsigned int i = 0; i < count; i++) { - Atom *atom = mol->getAtomWithIdx(i); - unsigned int elem = atom->getAtomicNum(); - // detect multivalent Hs, which could happen with ConnectTheDots - if (elem == 1 && atom->getDegree() > 1) { - AtomPDBResidueInfo *atom_info = - (AtomPDBResidueInfo *)(atom->getMonomerInfo()); - // cut all but shortest Bond - RDGeom::Point3D p = conf->getAtomPos(i); - RDKit::RWMol::ADJ_ITER nbr, end_nbr; - boost::tie(nbr, end_nbr) = mol->getAtomNeighbors(atom); - float best = 10000; - unsigned int best_idx = mol->getNumAtoms() + 1; - while (nbr != end_nbr) { - RDGeom::Point3D pn = conf->getAtomPos(*nbr); - float d = (p - pn).length(); - AtomPDBResidueInfo *n_info = - (AtomPDBResidueInfo *)(mol->getAtomWithIdx(*nbr)->getMonomerInfo()); - if (d < best && - atom_info->getResidueNumber() == n_info->getResidueNumber()) { - best = d; - best_idx = *nbr; - } - ++nbr; - } - // iterate again and remove all but closest - boost::tie(nbr, end_nbr) = mol->getAtomNeighbors(atom); - while (nbr != end_nbr) { - if (*nbr == best_idx) { - Bond *bond = mol->getBondBetweenAtoms(i, *nbr); - bond->setBondType(Bond::SINGLE); // make sure this one is single - } else { - mol->removeBond(i, *nbr); - } - ++nbr; - } - } - } - free(tmp); -} - -void ConnectTheDots(RWMol *mol, unsigned int flags) { - if (!mol || !mol->getNumConformers()) return; - // Determine optimal algorithm to use by getNumAtoms()? - ConnectTheDots_Large(mol, flags); -} - -// These are macros to allow their use in C++ constants -#define BCNAM(A, B, C) (((A) << 16) | ((B) << 8) | (C)) -#define BCATM(A, B, C, D) (((A) << 24) | ((B) << 16) | ((C) << 8) | (D)) - -static bool StandardPDBDoubleBond(unsigned int rescode, unsigned int atm1, - unsigned int atm2) { - if (atm1 > atm2) { - unsigned int tmp = atm1; - atm1 = atm2; - atm2 = tmp; - } - - switch (rescode) { - case BCNAM('A', 'L', 'A'): - case BCNAM('C', 'Y', 'S'): - case BCNAM('G', 'L', 'Y'): - case BCNAM('I', 'L', 'E'): - case BCNAM('L', 'E', 'U'): - case BCNAM('L', 'Y', 'S'): - case BCNAM('M', 'E', 'T'): - case BCNAM('P', 'R', 'O'): - case BCNAM('S', 'E', 'R'): - case BCNAM('T', 'H', 'R'): - case BCNAM('V', 'A', 'L'): - if (atm1 == BCATM(' ', 'C', ' ', ' ') && - atm2 == BCATM(' ', 'O', ' ', ' ')) - return true; - break; - case BCNAM('A', 'R', 'G'): - if (atm1 == BCATM(' ', 'C', ' ', ' ') && - atm2 == BCATM(' ', 'O', ' ', ' ')) - return true; - if (atm1 == BCATM(' ', 'C', 'Z', ' ') && - atm2 == BCATM(' ', 'N', 'H', '2')) - return true; - break; - case BCNAM('A', 'S', 'N'): - case BCNAM('A', 'S', 'P'): - if (atm1 == BCATM(' ', 'C', ' ', ' ') && - atm2 == BCATM(' ', 'O', ' ', ' ')) - return true; - if (atm1 == BCATM(' ', 'C', 'G', ' ') && - atm2 == BCATM(' ', 'O', 'D', '1')) - return true; - break; - case BCNAM('G', 'L', 'N'): - case BCNAM('G', 'L', 'U'): - if (atm1 == BCATM(' ', 'C', ' ', ' ') && - atm2 == BCATM(' ', 'O', ' ', ' ')) - return true; - if (atm1 == BCATM(' ', 'C', 'D', ' ') && - atm2 == BCATM(' ', 'O', 'E', '1')) - return true; - break; - case BCNAM('H', 'I', 'S'): - if (atm1 == BCATM(' ', 'C', ' ', ' ') && - atm2 == BCATM(' ', 'O', ' ', ' ')) - return true; - if (atm1 == BCATM(' ', 'C', 'D', '2') && - atm2 == BCATM(' ', 'C', 'G', ' ')) - return true; - if (atm1 == BCATM(' ', 'C', 'E', '1') && - atm2 == BCATM(' ', 'N', 'D', '1')) - return true; - break; - case BCNAM('P', 'H', 'E'): - case BCNAM('T', 'Y', 'R'): - if (atm1 == BCATM(' ', 'C', ' ', ' ') && - atm2 == BCATM(' ', 'O', ' ', ' ')) - return true; - if (atm1 == BCATM(' ', 'C', 'D', '1') && - atm2 == BCATM(' ', 'C', 'G', ' ')) - return true; - if (atm1 == BCATM(' ', 'C', 'D', '2') && - atm2 == BCATM(' ', 'C', 'E', '2')) - return true; - if (atm1 == BCATM(' ', 'C', 'E', '1') && - atm2 == BCATM(' ', 'C', 'Z', ' ')) - return true; - break; - case BCNAM('T', 'R', 'P'): - if (atm1 == BCATM(' ', 'C', ' ', ' ') && - atm2 == BCATM(' ', 'O', ' ', ' ')) - return true; - if (atm1 == BCATM(' ', 'C', 'D', '1') && - atm2 == BCATM(' ', 'C', 'G', ' ')) - return true; - if (atm1 == BCATM(' ', 'C', 'D', '2') && - atm2 == BCATM(' ', 'C', 'E', '2')) - return true; - if (atm1 == BCATM(' ', 'C', 'E', '3') && - atm2 == BCATM(' ', 'C', 'Z', '3')) - return true; - if (atm1 == BCATM(' ', 'C', 'H', '2') && - atm2 == BCATM(' ', 'C', 'Z', '2')) - return true; - break; - } - return false; -} - -static bool StandardPDBDoubleBond(RWMol *mol, Atom *beg, Atom *end) { - AtomPDBResidueInfo *bInfo = (AtomPDBResidueInfo *)beg->getMonomerInfo(); - if (!bInfo || bInfo->getMonomerType() != AtomMonomerInfo::PDBRESIDUE) - return false; - AtomPDBResidueInfo *eInfo = (AtomPDBResidueInfo *)end->getMonomerInfo(); - if (!eInfo || eInfo->getMonomerType() != AtomMonomerInfo::PDBRESIDUE) - return false; - if (!SamePDBResidue(bInfo, eInfo)) return false; - if (bInfo->getIsHeteroAtom() || eInfo->getIsHeteroAtom()) return false; - - const char *ptr = bInfo->getResidueName().c_str(); - unsigned int rescode = BCNAM(ptr[0], ptr[1], ptr[2]); - ptr = bInfo->getName().c_str(); - unsigned int atm1 = BCATM(ptr[0], ptr[1], ptr[2], ptr[3]); - ptr = eInfo->getName().c_str(); - unsigned int atm2 = BCATM(ptr[0], ptr[1], ptr[2], ptr[3]); - - if (!StandardPDBDoubleBond(rescode, atm1, atm2)) return false; - - // Check that neither end already has a double bond - ROMol::OBOND_ITER_PAIR bp; - for (bp = mol->getAtomBonds(beg); bp.first != bp.second; ++bp.first) - if ((*mol)[*bp.first]->getBondType() == Bond::DOUBLE) return false; - for (bp = mol->getAtomBonds(end); bp.first != bp.second; ++bp.first) - if ((*mol)[*bp.first]->getBondType() == Bond::DOUBLE) return false; - - return true; -} - -void StandardPDBResidueBondOrders(RWMol *mol) { - RWMol::BondIterator bondIt; - for (bondIt = mol->beginBonds(); bondIt != mol->endBonds(); ++bondIt) { - Bond *bond = *bondIt; - if (bond->getBondType() == Bond::SINGLE) { - Atom *beg = bond->getBeginAtom(); - Atom *end = bond->getEndAtom(); - if (StandardPDBDoubleBond(mol, beg, end)) bond->setBondType(Bond::DOUBLE); - } - } -} - -} // namespace RDKit +// +// Copyright (C) 2013-2017 Greg Landrum and NextMove Software +// +// @@ All Rights Reserved @@ +// This file is part of the RDKit. +// The contents are covered by the terms of the BSD license +// which is included in the file license.txt, found at the root +// of the RDKit source tree. +// +#include "ProximityBonds.h" +#include +#include +#include +#include + +namespace RDKit { + +static const double EXTDIST = 0.45; +// static const double MAXRAD = 2.50; +// static const double MINDIST = 0.40; +static const double MAXDIST = 5.45; // 2*MAXRAD + EXTDIST +static const double MINDIST2 = 0.16; // MINDIST*MINDIST +static const double MAXDIST2 = 29.7025; // MAXDIST*MAXDIST + +struct ProximityEntry { + float x, y, z, r; + int atm, hash, next, elem; + + bool operator<(const ProximityEntry &p) const { return x < p.x; } +}; + +static bool IsBonded(ProximityEntry *p, ProximityEntry *q, unsigned int flags) { + if (flags & ctdIGNORE_H_H_CONTACTS && p->elem == 1 && q->elem == 1) + return false; + double dx = (double)p->x - (double)q->x; + double dist2 = dx * dx; + if (dist2 > MAXDIST2) return false; + double dy = (double)p->y - (double)q->y; + dist2 += dy * dy; + if (dist2 > MAXDIST2) return false; + double dz = (double)p->z - (double)q->z; + dist2 += dz * dz; + + if (dist2 > MAXDIST2 || dist2 < MINDIST2) return false; + + double radius = (double)p->r + (double)q->r + EXTDIST; + return dist2 <= radius * radius; +} + +bool SamePDBResidue(AtomPDBResidueInfo *p, AtomPDBResidueInfo *q) { + return p->getResidueNumber() == q->getResidueNumber() && + p->getResidueName() == q->getResidueName() && + p->getChainId() == q->getChainId() && + p->getInsertionCode() == q->getInsertionCode(); +} + +static bool IsBlacklistedAtom(Atom *atom) { + // blacklist metals, noble gasses and halogens + int elem = atom->getAtomicNum(); + // make an inverse query (non-metals and metaloids) + if ((5 <= elem && elem <= 8) || (14 <= elem && elem <= 16) || + (32 <= elem && elem <= 34) || (51 <= elem && elem <= 52)) + return false; + else + return true; +} + +bool IsBlacklistedPair(Atom *beg_atom, Atom *end_atom) { + PRECONDITION(beg_atom, "empty atom"); + PRECONDITION(end_atom, "empty atom"); + + AtomPDBResidueInfo *beg_info = + (AtomPDBResidueInfo *)beg_atom->getMonomerInfo(); + AtomPDBResidueInfo *end_info = + (AtomPDBResidueInfo *)end_atom->getMonomerInfo(); + if (!beg_info || beg_info->getMonomerType() != AtomMonomerInfo::PDBRESIDUE) + return false; + if (!end_info || end_info->getMonomerType() != AtomMonomerInfo::PDBRESIDUE) + return false; + + if (!SamePDBResidue(beg_info, end_info)) { + if (IsBlacklistedAtom(beg_atom) || IsBlacklistedAtom(end_atom)) return true; + // Dont make bonds to waters + if (beg_info->getResidueName() == "HOH" || + end_info->getResidueName() == "HOH") + return true; + } + return false; +} + +/* +static void ConnectTheDots_Small(RWMol *mol) +{ +unsigned int count = mol->getNumAtoms(); +ProximityEntry *tmp = (ProximityEntry*)malloc(count*sizeof(ProximityEntry)); +PeriodicTable *table = PeriodicTable::getTable(); +Conformer *conf = &mol->getConformer(); +for (unsigned int i=0; igetAtomWithIdx(i); + unsigned int elem = atom->getAtomicNum(); + RDGeom::Point3D p = conf->getAtomPos(i); + ProximityEntry *tmpi = tmp+i; + tmpi->x = (float)p.x; + tmpi->y = (float)p.y; + tmpi->z = (float)p.z; + tmpi->r = (float)table->getRcovalent(elem); + for (unsigned int j=0; jgetBondBetweenAtoms(i,j)) + mol->addBond(i,j,Bond::SINGLE); + } +} +free(tmp); +} + + +static void ConnectTheDots_Medium(RWMol *mol) +{ +int count = mol->getNumAtoms(); +std::vector tmp(count); +PeriodicTable *table = PeriodicTable::getTable(); +Conformer *conf = &mol->getConformer(); +for (int i=0; igetAtomWithIdx(i); + unsigned int elem = atom->getAtomicNum(); + RDGeom::Point3D p = conf->getAtomPos(i); + ProximityEntry *tmpi = &tmp[i]; + tmpi->x = (float)p.x; + tmpi->y = (float)p.y; + tmpi->z = (float)p.z; + tmpi->r = (float)table->getRcovalent(elem); + tmpi->atm = i; +} + +std::stable_sort(tmp.begin(),tmp.end()); + +for (int j=0; jx - MAXDIST; + for (int k=j-1; k>=0; k--) { + ProximityEntry *tmpk = &tmp[k]; + if (tmpk->x < limit) + break; + if (IsBonded(tmpj,tmpk) && + !mol->getBondBetweenAtoms(tmpj->atm,tmpk->atm)) + mol->addBond(tmpj->atm,tmpk->atm,Bond::SINGLE); + } +} +} +*/ + +#define HASHSIZE 1024 +#define HASHMASK 1023 +#define HASHX 571 +#define HASHY 127 +#define HASHZ 3 + +static void ConnectTheDots_Large(RWMol *mol, unsigned int flags) { + int HashTable[HASHSIZE]; + memset(HashTable, -1, sizeof(HashTable)); + + unsigned int count = mol->getNumAtoms(); + ProximityEntry *tmp = + (ProximityEntry *)malloc(count * sizeof(ProximityEntry)); + CHECK_INVARIANT(tmp,"bad allocation"); + PeriodicTable *table = PeriodicTable::getTable(); + Conformer *conf = &mol->getConformer(); + + for (unsigned int i = 0; i < count; i++) { + Atom *atom = mol->getAtomWithIdx(i); + unsigned int elem = atom->getAtomicNum(); + RDGeom::Point3D p = conf->getAtomPos(i); + ProximityEntry *tmpi = tmp + i; + tmpi->x = (float)p.x; + tmpi->y = (float)p.y; + tmpi->z = (float)p.z; + tmpi->r = (float)table->getRcovalent(elem); + tmpi->atm = i; + tmpi->elem = elem; + + int hash = HASHX * (int)(p.x / MAXDIST) + HASHY * (int)(p.y / MAXDIST) + + HASHZ * (int)(p.z / MAXDIST); + + for (int dx = -HASHX; dx <= HASHX; dx += HASHX) + for (int dy = -HASHY; dy <= HASHY; dy += HASHY) + for (int dz = -HASHZ; dz <= HASHZ; dz += HASHZ) { + int probe = hash + dx + dy + dz; + int list = HashTable[probe & HASHMASK]; + while (list != -1) { + ProximityEntry *tmpj = &tmp[list]; + if (tmpj->hash == probe && IsBonded(tmpi, tmpj, flags) && + !mol->getBondBetweenAtoms(tmpi->atm, tmpj->atm) && + !IsBlacklistedPair(atom, mol->getAtomWithIdx(tmpj->atm))) + mol->addBond(tmpi->atm, tmpj->atm, Bond::SINGLE); + list = tmpj->next; + } + } + int list = hash & HASHMASK; + tmpi->next = HashTable[list]; + HashTable[list] = i; + tmpi->hash = hash; + } + // Cleanup pass + for (unsigned int i = 0; i < count; i++) { + Atom *atom = mol->getAtomWithIdx(i); + unsigned int elem = atom->getAtomicNum(); + // detect multivalent Hs, which could happen with ConnectTheDots + if (elem == 1 && atom->getDegree() > 1) { + AtomPDBResidueInfo *atom_info = + (AtomPDBResidueInfo *)(atom->getMonomerInfo()); + // cut all but shortest Bond + RDGeom::Point3D p = conf->getAtomPos(i); + RDKit::RWMol::ADJ_ITER nbr, end_nbr; + boost::tie(nbr, end_nbr) = mol->getAtomNeighbors(atom); + float best = 10000; + unsigned int best_idx = mol->getNumAtoms() + 1; + while (nbr != end_nbr) { + RDGeom::Point3D pn = conf->getAtomPos(*nbr); + float d = (p - pn).length(); + AtomPDBResidueInfo *n_info = + (AtomPDBResidueInfo *)(mol->getAtomWithIdx(*nbr)->getMonomerInfo()); + if (d < best && + atom_info->getResidueNumber() == n_info->getResidueNumber()) { + best = d; + best_idx = *nbr; + } + ++nbr; + } + // iterate again and remove all but closest + boost::tie(nbr, end_nbr) = mol->getAtomNeighbors(atom); + while (nbr != end_nbr) { + if (*nbr == best_idx) { + Bond *bond = mol->getBondBetweenAtoms(i, *nbr); + bond->setBondType(Bond::SINGLE); // make sure this one is single + } else { + mol->removeBond(i, *nbr); + } + ++nbr; + } + } + } + free(tmp); +} + +void ConnectTheDots(RWMol *mol, unsigned int flags) { + if (!mol || !mol->getNumConformers()) return; + // Determine optimal algorithm to use by getNumAtoms()? + ConnectTheDots_Large(mol, flags); +} + +// These are macros to allow their use in C++ constants +#define BCNAM(A, B, C) (((A) << 16) | ((B) << 8) | (C)) +#define BCATM(A, B, C, D) (((A) << 24) | ((B) << 16) | ((C) << 8) | (D)) + +static bool StandardPDBDoubleBond(unsigned int rescode, unsigned int atm1, + unsigned int atm2) { + if (atm1 > atm2) { + unsigned int tmp = atm1; + atm1 = atm2; + atm2 = tmp; + } + + switch (rescode) { + case BCNAM('A', 'L', 'A'): + case BCNAM('C', 'Y', 'S'): + case BCNAM('G', 'L', 'Y'): + case BCNAM('I', 'L', 'E'): + case BCNAM('L', 'E', 'U'): + case BCNAM('L', 'Y', 'S'): + case BCNAM('M', 'E', 'T'): + case BCNAM('P', 'R', 'O'): + case BCNAM('S', 'E', 'R'): + case BCNAM('T', 'H', 'R'): + case BCNAM('V', 'A', 'L'): + if (atm1 == BCATM(' ', 'C', ' ', ' ') && + atm2 == BCATM(' ', 'O', ' ', ' ')) + return true; + break; + case BCNAM('A', 'R', 'G'): + if (atm1 == BCATM(' ', 'C', ' ', ' ') && + atm2 == BCATM(' ', 'O', ' ', ' ')) + return true; + if (atm1 == BCATM(' ', 'C', 'Z', ' ') && + atm2 == BCATM(' ', 'N', 'H', '2')) + return true; + break; + case BCNAM('A', 'S', 'N'): + case BCNAM('A', 'S', 'P'): + if (atm1 == BCATM(' ', 'C', ' ', ' ') && + atm2 == BCATM(' ', 'O', ' ', ' ')) + return true; + if (atm1 == BCATM(' ', 'C', 'G', ' ') && + atm2 == BCATM(' ', 'O', 'D', '1')) + return true; + break; + case BCNAM('G', 'L', 'N'): + case BCNAM('G', 'L', 'U'): + if (atm1 == BCATM(' ', 'C', ' ', ' ') && + atm2 == BCATM(' ', 'O', ' ', ' ')) + return true; + if (atm1 == BCATM(' ', 'C', 'D', ' ') && + atm2 == BCATM(' ', 'O', 'E', '1')) + return true; + break; + case BCNAM('H', 'I', 'S'): + if (atm1 == BCATM(' ', 'C', ' ', ' ') && + atm2 == BCATM(' ', 'O', ' ', ' ')) + return true; + if (atm1 == BCATM(' ', 'C', 'D', '2') && + atm2 == BCATM(' ', 'C', 'G', ' ')) + return true; + if (atm1 == BCATM(' ', 'C', 'E', '1') && + atm2 == BCATM(' ', 'N', 'D', '1')) + return true; + break; + case BCNAM('P', 'H', 'E'): + case BCNAM('T', 'Y', 'R'): + if (atm1 == BCATM(' ', 'C', ' ', ' ') && + atm2 == BCATM(' ', 'O', ' ', ' ')) + return true; + if (atm1 == BCATM(' ', 'C', 'D', '1') && + atm2 == BCATM(' ', 'C', 'G', ' ')) + return true; + if (atm1 == BCATM(' ', 'C', 'D', '2') && + atm2 == BCATM(' ', 'C', 'E', '2')) + return true; + if (atm1 == BCATM(' ', 'C', 'E', '1') && + atm2 == BCATM(' ', 'C', 'Z', ' ')) + return true; + break; + case BCNAM('T', 'R', 'P'): + if (atm1 == BCATM(' ', 'C', ' ', ' ') && + atm2 == BCATM(' ', 'O', ' ', ' ')) + return true; + if (atm1 == BCATM(' ', 'C', 'D', '1') && + atm2 == BCATM(' ', 'C', 'G', ' ')) + return true; + if (atm1 == BCATM(' ', 'C', 'D', '2') && + atm2 == BCATM(' ', 'C', 'E', '2')) + return true; + if (atm1 == BCATM(' ', 'C', 'E', '3') && + atm2 == BCATM(' ', 'C', 'Z', '3')) + return true; + if (atm1 == BCATM(' ', 'C', 'H', '2') && + atm2 == BCATM(' ', 'C', 'Z', '2')) + return true; + break; + } + return false; +} + +static bool StandardPDBDoubleBond(RWMol *mol, Atom *beg, Atom *end) { + AtomPDBResidueInfo *bInfo = (AtomPDBResidueInfo *)beg->getMonomerInfo(); + if (!bInfo || bInfo->getMonomerType() != AtomMonomerInfo::PDBRESIDUE) + return false; + AtomPDBResidueInfo *eInfo = (AtomPDBResidueInfo *)end->getMonomerInfo(); + if (!eInfo || eInfo->getMonomerType() != AtomMonomerInfo::PDBRESIDUE) + return false; + if (!SamePDBResidue(bInfo, eInfo)) return false; + if (bInfo->getIsHeteroAtom() || eInfo->getIsHeteroAtom()) return false; + + const char *ptr = bInfo->getResidueName().c_str(); + unsigned int rescode = BCNAM(ptr[0], ptr[1], ptr[2]); + ptr = bInfo->getName().c_str(); + unsigned int atm1 = BCATM(ptr[0], ptr[1], ptr[2], ptr[3]); + ptr = eInfo->getName().c_str(); + unsigned int atm2 = BCATM(ptr[0], ptr[1], ptr[2], ptr[3]); + + if (!StandardPDBDoubleBond(rescode, atm1, atm2)) return false; + + // Check that neither end already has a double bond + ROMol::OBOND_ITER_PAIR bp; + for (bp = mol->getAtomBonds(beg); bp.first != bp.second; ++bp.first) + if ((*mol)[*bp.first]->getBondType() == Bond::DOUBLE) return false; + for (bp = mol->getAtomBonds(end); bp.first != bp.second; ++bp.first) + if ((*mol)[*bp.first]->getBondType() == Bond::DOUBLE) return false; + + return true; +} + +void StandardPDBResidueBondOrders(RWMol *mol) { + RWMol::BondIterator bondIt; + for (bondIt = mol->beginBonds(); bondIt != mol->endBonds(); ++bondIt) { + Bond *bond = *bondIt; + if (bond->getBondType() == Bond::SINGLE) { + Atom *beg = bond->getBeginAtom(); + Atom *end = bond->getEndAtom(); + if (StandardPDBDoubleBond(mol, beg, end)) bond->setBondType(Bond::DOUBLE); + } + } +} + +} // namespace RDKit diff --git a/Code/GraphMol/ForceFieldHelpers/FFConvenience.h b/Code/GraphMol/ForceFieldHelpers/FFConvenience.h index db04b84ba..b06b4f199 100644 --- a/Code/GraphMol/ForceFieldHelpers/FFConvenience.h +++ b/Code/GraphMol/ForceFieldHelpers/FFConvenience.h @@ -1,122 +1,122 @@ -// -// Copyright (C) 2019 Paolo Tosco -// -// @@ All Rights Reserved @@ -// This file is part of the RDKit. -// The contents are covered by the terms of the BSD license -// which is included in the file license.txt, found at the root -// of the RDKit source tree. -// -#include -#ifndef RD_FFCONVENIENCE_H -#define RD_FFCONVENIENCE_H -#include -#include - -namespace RDKit { -class ROMol; -namespace ForceFieldsHelper { -namespace detail { -#ifdef RDK_THREADSAFE_SSS -void OptimizeMoleculeConfsHelper_(ForceFields::ForceField ff, ROMol *mol, - std::vector> *res, - unsigned int threadIdx, - unsigned int numThreads, int maxIters) { - PRECONDITION(mol, "mol must not be nullptr"); - PRECONDITION(res, "res must not be nullptr"); - PRECONDITION(res->size() >= mol->getNumConformers(), "res->size() must be >= mol->getNumConformers()"); - unsigned int i = 0; - ff.positions().resize(mol->getNumAtoms()); - for (ROMol::ConformerIterator cit = mol->beginConformers(); - cit != mol->endConformers(); ++cit, ++i) { - if (i % numThreads != threadIdx) continue; - for (unsigned int aidx = 0; aidx < mol->getNumAtoms(); ++aidx) { - ff.positions()[aidx] = &(*cit)->getAtomPos(aidx); - } - ff.initialize(); - int needsMore = ff.minimize(maxIters); - double e = ff.calcEnergy(); - (*res)[i] = std::make_pair(needsMore, e); - } -} - -void OptimizeMoleculeConfsMT(ROMol &mol, const ForceFields::ForceField &ff, - std::vector> &res, - int numThreads, int maxIters) { - std::vector tg; - for (int ti = 0; ti < numThreads; ++ti) { - tg.emplace_back(std::thread(detail::OptimizeMoleculeConfsHelper_, - ff, &mol, &res, ti, numThreads, maxIters)); - } - for (auto &thread : tg) { - if (thread.joinable()) thread.join(); - } -} -#endif - -void OptimizeMoleculeConfsST(ROMol &mol, ForceFields::ForceField &ff, - std::vector> &res, - int maxIters) { - PRECONDITION(res.size() >= mol.getNumConformers(), "res.size() must be >= mol.getNumConformers()"); - unsigned int i = 0; - for (ROMol::ConformerIterator cit = mol.beginConformers(); - cit != mol.endConformers(); ++cit, ++i) { - for (unsigned int aidx = 0; aidx < mol.getNumAtoms(); ++aidx) { - ff.positions()[aidx] = &(*cit)->getAtomPos(aidx); - } - ff.initialize(); - int needsMore = ff.minimize(maxIters); - double e = ff.calcEnergy(); - res[i] = std::make_pair(needsMore, e); - } -} -} // end of detail namespace - -//! Convenience function for optimizing a molecule using a pre-generated force-field -/* - \param ff the force-field - \param res vector of (needsMore,energy) pairs - \param maxIters the maximum number of force-field iterations - - \return a pair with: - first: -1 if parameters were missing, 0 if the optimization converged, 1 if - more iterations are required. - second: the energy -*/ -std::pair OptimizeMolecule(ForceFields::ForceField &ff, int maxIters = 1000) { - ff.initialize(); - int res = ff.minimize(maxIters); - double e = ff.calcEnergy(); - return std::make_pair(res, e); -} - -//! Convenience function for optimizing all of a molecule's conformations using -// a pre-generated force-field -/* - \param mol the molecule to use - \param ff the force-field - \param res vector of (needsMore,energy) pairs - \param numThreads the number of simultaneous threads to use (only has an - effect if the RDKit is compiled with thread support). - If set to zero, the max supported by the system will be - used. - \param maxIters the maximum number of force-field iterations - -*/ -void OptimizeMoleculeConfs(ROMol &mol, ForceFields::ForceField &ff, - std::vector> &res, - int numThreads = 1, int maxIters = 1000) { - res.resize(mol.getNumConformers()); - numThreads = getNumThreadsToUse(numThreads); - if (numThreads == 1) { - detail::OptimizeMoleculeConfsST(mol, ff, res, maxIters); - } -#ifdef RDK_THREADSAFE_SSS - else { - detail::OptimizeMoleculeConfsMT(mol, ff, res, numThreads, maxIters); - } -#endif -} -} // end of namespace ForceFieldsHelper -} // end of namespace RDKit -#endif +// +// Copyright (C) 2019 Paolo Tosco +// +// @@ All Rights Reserved @@ +// This file is part of the RDKit. +// The contents are covered by the terms of the BSD license +// which is included in the file license.txt, found at the root +// of the RDKit source tree. +// +#include +#ifndef RD_FFCONVENIENCE_H +#define RD_FFCONVENIENCE_H +#include +#include + +namespace RDKit { +class ROMol; +namespace ForceFieldsHelper { +namespace detail { +#ifdef RDK_THREADSAFE_SSS +void OptimizeMoleculeConfsHelper_(ForceFields::ForceField ff, ROMol *mol, + std::vector> *res, + unsigned int threadIdx, + unsigned int numThreads, int maxIters) { + PRECONDITION(mol, "mol must not be nullptr"); + PRECONDITION(res, "res must not be nullptr"); + PRECONDITION(res->size() >= mol->getNumConformers(), "res->size() must be >= mol->getNumConformers()"); + unsigned int i = 0; + ff.positions().resize(mol->getNumAtoms()); + for (ROMol::ConformerIterator cit = mol->beginConformers(); + cit != mol->endConformers(); ++cit, ++i) { + if (i % numThreads != threadIdx) continue; + for (unsigned int aidx = 0; aidx < mol->getNumAtoms(); ++aidx) { + ff.positions()[aidx] = &(*cit)->getAtomPos(aidx); + } + ff.initialize(); + int needsMore = ff.minimize(maxIters); + double e = ff.calcEnergy(); + (*res)[i] = std::make_pair(needsMore, e); + } +} + +void OptimizeMoleculeConfsMT(ROMol &mol, const ForceFields::ForceField &ff, + std::vector> &res, + int numThreads, int maxIters) { + std::vector tg; + for (int ti = 0; ti < numThreads; ++ti) { + tg.emplace_back(std::thread(detail::OptimizeMoleculeConfsHelper_, + ff, &mol, &res, ti, numThreads, maxIters)); + } + for (auto &thread : tg) { + if (thread.joinable()) thread.join(); + } +} +#endif + +void OptimizeMoleculeConfsST(ROMol &mol, ForceFields::ForceField &ff, + std::vector> &res, + int maxIters) { + PRECONDITION(res.size() >= mol.getNumConformers(), "res.size() must be >= mol.getNumConformers()"); + unsigned int i = 0; + for (ROMol::ConformerIterator cit = mol.beginConformers(); + cit != mol.endConformers(); ++cit, ++i) { + for (unsigned int aidx = 0; aidx < mol.getNumAtoms(); ++aidx) { + ff.positions()[aidx] = &(*cit)->getAtomPos(aidx); + } + ff.initialize(); + int needsMore = ff.minimize(maxIters); + double e = ff.calcEnergy(); + res[i] = std::make_pair(needsMore, e); + } +} +} // end of detail namespace + +//! Convenience function for optimizing a molecule using a pre-generated force-field +/* + \param ff the force-field + \param res vector of (needsMore,energy) pairs + \param maxIters the maximum number of force-field iterations + + \return a pair with: + first: -1 if parameters were missing, 0 if the optimization converged, 1 if + more iterations are required. + second: the energy +*/ +std::pair OptimizeMolecule(ForceFields::ForceField &ff, int maxIters = 1000) { + ff.initialize(); + int res = ff.minimize(maxIters); + double e = ff.calcEnergy(); + return std::make_pair(res, e); +} + +//! Convenience function for optimizing all of a molecule's conformations using +// a pre-generated force-field +/* + \param mol the molecule to use + \param ff the force-field + \param res vector of (needsMore,energy) pairs + \param numThreads the number of simultaneous threads to use (only has an + effect if the RDKit is compiled with thread support). + If set to zero, the max supported by the system will be + used. + \param maxIters the maximum number of force-field iterations + +*/ +void OptimizeMoleculeConfs(ROMol &mol, ForceFields::ForceField &ff, + std::vector> &res, + int numThreads = 1, int maxIters = 1000) { + res.resize(mol.getNumConformers()); + numThreads = getNumThreadsToUse(numThreads); + if (numThreads == 1) { + detail::OptimizeMoleculeConfsST(mol, ff, res, maxIters); + } +#ifdef RDK_THREADSAFE_SSS + else { + detail::OptimizeMoleculeConfsMT(mol, ff, res, numThreads, maxIters); + } +#endif +} +} // end of namespace ForceFieldsHelper +} // end of namespace RDKit +#endif diff --git a/Code/GraphMol/Wrap/SubstanceGroup.cpp b/Code/GraphMol/Wrap/SubstanceGroup.cpp index b7ffafc55..a72f18bfb 100644 --- a/Code/GraphMol/Wrap/SubstanceGroup.cpp +++ b/Code/GraphMol/Wrap/SubstanceGroup.cpp @@ -1,117 +1,117 @@ -// -// Copyright (C) 2019 Greg Landrum -// -// @@ All Rights Reserved @@ -// This file is part of the RDKit. -// The contents are covered by the terms of the BSD license -// which is included in the file license.txt, found at the root -// of the RDKit source tree. -// - -#define NO_IMPORT_ARRAY -#include -#include -#include - -// ours -#include -#include -#include "props.hpp" - -namespace python = boost::python; - -namespace RDKit { - -namespace { -std::vector getMolSubstanceGroups(ROMol &mol) { - return getSubstanceGroups(mol); -} -void clearMolSubstanceGroups(ROMol &mol) { - std::vector &sgs = getSubstanceGroups(mol); - sgs.clear(); -} -} // namespace - -std::string sGroupClassDoc = - "A collection of atoms and bonds with associated properties\n"; - -struct sgroup_wrap { - static void wrap() { - // register the vector_indexing_suite for SubstanceGroups - // if it hasn't already been done. - // logic from https://stackoverflow.com/a/13017303 - boost::python::type_info info = - boost::python::type_id>(); - const boost::python::converter::registration *reg = - boost::python::converter::registry::query(info); - if (reg == NULL || (*reg).m_to_python == NULL) { - python::class_>("SubstanceGroup_VECT") - .def(python::vector_indexing_suite< - std::vector>()); - } - - python::class_>( - "SubstanceGroup", sGroupClassDoc.c_str(), python::no_init) - .def("GetOwningMol", &SubstanceGroup::getOwningMol, - "returns the molecule owning this SubstanceGroup", - python::return_internal_reference<>()) - .def("GetIndexInMol", &SubstanceGroup::getIndexInMol, - "returns the index of this SubstanceGroup in the owning " - "molecule's list.") - .def( - "GetAtoms", &SubstanceGroup::getAtoms, - "returns a list of the indices of the atoms in this SubstanceGroup", - python::return_value_policy()) - .def("GetParentAtoms", &SubstanceGroup::getParentAtoms, - "returns a list of the indices of the parent atoms in this " - "SubstanceGroup", - python::return_value_policy()) - .def( - "GetBonds", &SubstanceGroup::getBonds, - "returns a list of the indices of the bonds in this SubstanceGroup", - python::return_value_policy()) - .def("HasProp", - (bool (RDProps::*)(const std::string &) const) & - SubstanceGroup::hasProp, - "returns whether or not a particular property exists") - .def("GetProp", - (std::string(RDProps::*)(const std::string &) const) & - SubstanceGroup::getProp, - "returns the value of a particular property") - .def("GetIntProp", - (int (RDProps::*)(const std::string &) const) & - SubstanceGroup::getProp, - "returns the value of a particular property") - .def("GetUnsignedProp", - (unsigned int (RDProps::*)(const std::string &) const) & - SubstanceGroup::getProp, - "returns the value of a particular property") - .def("GetDoubleProp", - (double (RDProps::*)(const std::string &) const) & - SubstanceGroup::getProp, - "returns the value of a particular property") - .def("GetBoolProp", - (bool (RDProps::*)(const std::string &) const) & - SubstanceGroup::getProp, - "returns the value of a particular property") - .def("GetPropNames", &SubstanceGroup::getPropList, - (python::arg("self"), python::arg("includePrivate") = false, - python::arg("includeComputed") = false), - "Returns a list of the properties set on the SubstanceGroup.\n\n") - .def("GetPropsAsDict", GetPropsAsDict, - (python::arg("self"), python::arg("includePrivate") = true, - python::arg("includeComputed") = true), - "Returns a dictionary of the properties set on the " - "SubstanceGroup.\n" - " n.b. some properties cannot be converted to python types.\n"); - python::def("GetMolSubstanceGroups", &getMolSubstanceGroups, - "returns the SubstanceGroups for a molecule (if any)", - python::with_custodian_and_ward_postcall<0, 1>()); - python::def("ClearMolSubstanceGroups", &clearMolSubstanceGroups, - "removes all SubstanceGroups from a molecule (if any)"); - // FIX: needs something tying the lifetime to the mol - } -}; -} // namespace RDKit - -void wrap_sgroup() { RDKit::sgroup_wrap::wrap(); } +// +// Copyright (C) 2019 Greg Landrum +// +// @@ All Rights Reserved @@ +// This file is part of the RDKit. +// The contents are covered by the terms of the BSD license +// which is included in the file license.txt, found at the root +// of the RDKit source tree. +// + +#define NO_IMPORT_ARRAY +#include +#include +#include + +// ours +#include +#include +#include "props.hpp" + +namespace python = boost::python; + +namespace RDKit { + +namespace { +std::vector getMolSubstanceGroups(ROMol &mol) { + return getSubstanceGroups(mol); +} +void clearMolSubstanceGroups(ROMol &mol) { + std::vector &sgs = getSubstanceGroups(mol); + sgs.clear(); +} +} // namespace + +std::string sGroupClassDoc = + "A collection of atoms and bonds with associated properties\n"; + +struct sgroup_wrap { + static void wrap() { + // register the vector_indexing_suite for SubstanceGroups + // if it hasn't already been done. + // logic from https://stackoverflow.com/a/13017303 + boost::python::type_info info = + boost::python::type_id>(); + const boost::python::converter::registration *reg = + boost::python::converter::registry::query(info); + if (reg == NULL || (*reg).m_to_python == NULL) { + python::class_>("SubstanceGroup_VECT") + .def(python::vector_indexing_suite< + std::vector>()); + } + + python::class_>( + "SubstanceGroup", sGroupClassDoc.c_str(), python::no_init) + .def("GetOwningMol", &SubstanceGroup::getOwningMol, + "returns the molecule owning this SubstanceGroup", + python::return_internal_reference<>()) + .def("GetIndexInMol", &SubstanceGroup::getIndexInMol, + "returns the index of this SubstanceGroup in the owning " + "molecule's list.") + .def( + "GetAtoms", &SubstanceGroup::getAtoms, + "returns a list of the indices of the atoms in this SubstanceGroup", + python::return_value_policy()) + .def("GetParentAtoms", &SubstanceGroup::getParentAtoms, + "returns a list of the indices of the parent atoms in this " + "SubstanceGroup", + python::return_value_policy()) + .def( + "GetBonds", &SubstanceGroup::getBonds, + "returns a list of the indices of the bonds in this SubstanceGroup", + python::return_value_policy()) + .def("HasProp", + (bool (RDProps::*)(const std::string &) const) & + SubstanceGroup::hasProp, + "returns whether or not a particular property exists") + .def("GetProp", + (std::string(RDProps::*)(const std::string &) const) & + SubstanceGroup::getProp, + "returns the value of a particular property") + .def("GetIntProp", + (int (RDProps::*)(const std::string &) const) & + SubstanceGroup::getProp, + "returns the value of a particular property") + .def("GetUnsignedProp", + (unsigned int (RDProps::*)(const std::string &) const) & + SubstanceGroup::getProp, + "returns the value of a particular property") + .def("GetDoubleProp", + (double (RDProps::*)(const std::string &) const) & + SubstanceGroup::getProp, + "returns the value of a particular property") + .def("GetBoolProp", + (bool (RDProps::*)(const std::string &) const) & + SubstanceGroup::getProp, + "returns the value of a particular property") + .def("GetPropNames", &SubstanceGroup::getPropList, + (python::arg("self"), python::arg("includePrivate") = false, + python::arg("includeComputed") = false), + "Returns a list of the properties set on the SubstanceGroup.\n\n") + .def("GetPropsAsDict", GetPropsAsDict, + (python::arg("self"), python::arg("includePrivate") = true, + python::arg("includeComputed") = true), + "Returns a dictionary of the properties set on the " + "SubstanceGroup.\n" + " n.b. some properties cannot be converted to python types.\n"); + python::def("GetMolSubstanceGroups", &getMolSubstanceGroups, + "returns the SubstanceGroups for a molecule (if any)", + python::with_custodian_and_ward_postcall<0, 1>()); + python::def("ClearMolSubstanceGroups", &clearMolSubstanceGroups, + "removes all SubstanceGroups from a molecule (if any)"); + // FIX: needs something tying the lifetime to the mol + } +}; +} // namespace RDKit + +void wrap_sgroup() { RDKit::sgroup_wrap::wrap(); } diff --git a/Contrib/PBF/PBFRDKit.cpp b/Contrib/PBF/PBFRDKit.cpp index b9a7fab7a..7e2965740 100644 --- a/Contrib/PBF/PBFRDKit.cpp +++ b/Contrib/PBF/PBFRDKit.cpp @@ -1,156 +1,156 @@ -// -// Copyright (c) 2012, Institue of Cancer Research. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -//modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// * Neither the name of Institue of Cancer Research. -// nor the names of its contributors may be used to endorse or promote -// products derived from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// For more information on the Plane of Best Fit please see http://pubs.acs.org/doi/abs/10.1021/ci300293f -// -// If this code has been useful to you, please include the reference -// in any work which has made use of it: - -// Plane of Best Fit: A Novel Method to Characterize the Three-Dimensionality of Molecules, Nicholas C. Firth, Nathan Brown, and Julian Blagg, Journal of Chemical Information and Modeling 2012 52 (10), 2516-2525 - -// -// -// Created by Nicholas Firth, November 2011 -// Modified by Greg Landrum for inclusion in the RDKit distribution November 2012 -// - -#include "PBFRDKit.h" -#include -#include -#include -#include - -#include -using namespace RDKit; - -void getSmallestEigenVector(double fSumXX,double fSumXY,double fSumXZ, - double fSumYY,double fSumYZ,double fSumZZ, - double &x,double &y, double &z); - -double distanceFromAPlane(const RDGeom::Point3D &pt,const std::vector &plane, double denom){ - double numer=0.0; - numer = std::fabs(pt.x*plane[0]+pt.y*plane[1]+pt.z*plane[2]+plane[3]); - - return numer/denom; -} - -bool getBestFitPlane(const std::vector &points, - std::vector &plane, - const std::vector *weights) { - PRECONDITION((!weights || weights->size()>=points.size()),"bad weights vector"); - RDGeom::Point3D origin(0,0,0); - double wSum=0.0; - - for(unsigned int i=0;i eigensolver(mat); - if(eigensolver.info()!=Eigen::Success){ - BOOST_LOG(rdErrorLog)<<"eigenvalue calculation did not converge"<=1,"molecule has no conformers") - int numAtoms = mol.getNumAtoms(); - if(numAtoms<4) return 0; - - const Conformer &conf = mol.getConformer(confId); - if(!conf.is3D()) return 0 ; - - std::vector points; - points.reserve(numAtoms); - for(unsigned int i=0; i plane(4); - getBestFitPlane(points,plane,0); - - double denom=0.0; - for(unsigned int i=0; i<3; ++i){ - denom += plane[i]*plane[i]; - } - denom = pow(denom,0.5); - - double res=0.0; - for(unsigned int i=0; i +#include +#include +#include + +#include +using namespace RDKit; + +void getSmallestEigenVector(double fSumXX,double fSumXY,double fSumXZ, + double fSumYY,double fSumYZ,double fSumZZ, + double &x,double &y, double &z); + +double distanceFromAPlane(const RDGeom::Point3D &pt,const std::vector &plane, double denom){ + double numer=0.0; + numer = std::fabs(pt.x*plane[0]+pt.y*plane[1]+pt.z*plane[2]+plane[3]); + + return numer/denom; +} + +bool getBestFitPlane(const std::vector &points, + std::vector &plane, + const std::vector *weights) { + PRECONDITION((!weights || weights->size()>=points.size()),"bad weights vector"); + RDGeom::Point3D origin(0,0,0); + double wSum=0.0; + + for(unsigned int i=0;i eigensolver(mat); + if(eigensolver.info()!=Eigen::Success){ + BOOST_LOG(rdErrorLog)<<"eigenvalue calculation did not converge"<=1,"molecule has no conformers") + int numAtoms = mol.getNumAtoms(); + if(numAtoms<4) return 0; + + const Conformer &conf = mol.getConformer(confId); + if(!conf.is3D()) return 0 ; + + std::vector points; + points.reserve(numAtoms); + for(unsigned int i=0; i plane(4); + getBestFitPlane(points,plane,0); + + double denom=0.0; + for(unsigned int i=0; i<3; ++i){ + denom += plane[i]*plane[i]; + } + denom = pow(denom,0.5); + + double res=0.0; + for(unsigned int i=0; i - -#include -#include -#include -#include - -using namespace RDKit; - -double PBFRD(ROMol&,int confId=-1); - -#endif +// +// Copyright (c) 2012, Institue of Cancer Research. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +//modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Institue of Cancer Research. +// nor the names of its contributors may be used to endorse or promote +// products derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// For more information on the Plane of Best Fit please see http://pubs.acs.org/doi/abs/10.1021/ci300293f +// +// If this code has been useful to you, please include the reference +// in any work which has made use of it: + +// Plane of Best Fit: A Novel Method to Characterize the Three-Dimensionality of Molecules, Nicholas C. Firth, Nathan Brown, and Julian Blagg, Journal of Chemical Information and Modeling 2012 52 (10), 2516-2525 + +// +// +// Created by Nicholas Firth, November 2011 + + + +#ifndef _PBFRDKit_h +#define _PBFRDKit_h + + +#include + +#include +#include +#include +#include + +using namespace RDKit; + +double PBFRD(ROMol&,int confId=-1); + +#endif diff --git a/build_support/pkg_version.py b/build_support/pkg_version.py index b3906030c..121ee7eaf 100644 --- a/build_support/pkg_version.py +++ b/build_support/pkg_version.py @@ -1,72 +1,72 @@ -from __future__ import print_function -import os -import re -from datetime import datetime -from setuptools import setup - -pkg_version = '' -src_dir = os.path.realpath(__file__) -have_src_dir = os.path.isfile(src_dir) -i = 0 -while (i < 2 and have_src_dir): - i += 1 - src_dir = os.path.dirname(src_dir) - have_src_dir = os.path.isdir(src_dir) -if (not have_src_dir): - raise OSError('Could not find SRC_DIR, got: ' + str(src_dir)) -# parse root CMakeLists.txt and Code/cmake/Modules/RDKitUtils.cmake -root_cmakelists_path = os.path.join(src_dir, 'CMakeLists.txt') -rdkitutils_path = os.path.join(src_dir, 'Code', 'cmake', - 'Modules', 'RDKitUtils.cmake') - -var_dict = {} -for file in (root_cmakelists_path, rdkitutils_path): - with open(file, 'rt') as hnd: - # vars we want to read - var_set = set(['RDKit_Year', 'RDKit_Month', 'RDKit_Revision', - 'RDKit_ABI', 'RDKit_RELEASENAME']) - line = hnd.readline() - while (line): - # is this an uncommented set command? - m = re.match('^\s*set\s*\((\w+)\s*\"(.*)\"\s*\)', - line, re.IGNORECASE) - # if it is - if (m is not None): - # extract the var name - var_name = m.group(1) - if (var_name in var_set): - # if the var name is in the vars we want to read - var_value = m.group(2) - keepLooping = True - while (keepLooping): - # recursively replace variables we already found - m = re.match('^.*\${(\w+)}', var_value) - keepLooping = (m is not None) - if (keepLooping): - v = var_dict.get(m.group(1)) - # if the variable is not defined, remove - # the preceding dot - eat_dot = 0 - if (v is None): - v = '' - eat_dot = 1 - # replace variable name with its value - s = m.start(1) - (2 + eat_dot) - e = m.end(1) + 1 - var_value = var_value[:s] + v + var_value[e:] - # assign variable value and keep parsing - var_dict[var_name] = var_value - line = hnd.readline() -d = datetime.today().strftime('%Y%m%d') -rdkitVersion = var_dict.get('RDKit_RELEASENAME') -if (rdkitVersion is not None): - if rdkitVersion.endswith('.dev1'): - pkg_version = rdkitVersion[:-1] + d - else: - pkg_version = rdkitVersion -else: - # if extracting rdkitVersion somehow failed, use the date - pkg_version = d - -print('rdkitVersion:', pkg_version) -setup(rdkitVersion = pkg_version) +from __future__ import print_function +import os +import re +from datetime import datetime +from setuptools import setup + +pkg_version = '' +src_dir = os.path.realpath(__file__) +have_src_dir = os.path.isfile(src_dir) +i = 0 +while (i < 2 and have_src_dir): + i += 1 + src_dir = os.path.dirname(src_dir) + have_src_dir = os.path.isdir(src_dir) +if (not have_src_dir): + raise OSError('Could not find SRC_DIR, got: ' + str(src_dir)) +# parse root CMakeLists.txt and Code/cmake/Modules/RDKitUtils.cmake +root_cmakelists_path = os.path.join(src_dir, 'CMakeLists.txt') +rdkitutils_path = os.path.join(src_dir, 'Code', 'cmake', + 'Modules', 'RDKitUtils.cmake') + +var_dict = {} +for file in (root_cmakelists_path, rdkitutils_path): + with open(file, 'rt') as hnd: + # vars we want to read + var_set = set(['RDKit_Year', 'RDKit_Month', 'RDKit_Revision', + 'RDKit_ABI', 'RDKit_RELEASENAME']) + line = hnd.readline() + while (line): + # is this an uncommented set command? + m = re.match('^\s*set\s*\((\w+)\s*\"(.*)\"\s*\)', + line, re.IGNORECASE) + # if it is + if (m is not None): + # extract the var name + var_name = m.group(1) + if (var_name in var_set): + # if the var name is in the vars we want to read + var_value = m.group(2) + keepLooping = True + while (keepLooping): + # recursively replace variables we already found + m = re.match('^.*\${(\w+)}', var_value) + keepLooping = (m is not None) + if (keepLooping): + v = var_dict.get(m.group(1)) + # if the variable is not defined, remove + # the preceding dot + eat_dot = 0 + if (v is None): + v = '' + eat_dot = 1 + # replace variable name with its value + s = m.start(1) - (2 + eat_dot) + e = m.end(1) + 1 + var_value = var_value[:s] + v + var_value[e:] + # assign variable value and keep parsing + var_dict[var_name] = var_value + line = hnd.readline() +d = datetime.today().strftime('%Y%m%d') +rdkitVersion = var_dict.get('RDKit_RELEASENAME') +if (rdkitVersion is not None): + if rdkitVersion.endswith('.dev1'): + pkg_version = rdkitVersion[:-1] + d + else: + pkg_version = rdkitVersion +else: + # if extracting rdkitVersion somehow failed, use the date + pkg_version = d + +print('rdkitVersion:', pkg_version) +setup(rdkitVersion = pkg_version)