Files
rdkit/Code/GraphMol/StructChecker/Utilites.cpp
Brian Kelley 8609cd4883 Add StructChecker functionality
* StructChecker changes. Initial commit. First implementation. Added some tests.

* StructChecker: add  GoodAtoms and AcidicAtoms. new updates

* StructChecker: add new tests

* StructChecker: added TransformAugmentedAtoms()

* StructCheck: add structCheck to GraphMol. Fix compilation errors.

* StructChecker: add stereo verification and some utilities.

* StructChecker: function FixDubious3DMolecule was added

* StructChecker: checkStereo added. done with stereo.

* StructChecker: add StripSmallFragments()

* StructChecker: add AtomClash() function. Some cosmetic + tests

* StructChecker: checkAtoms() was started

* StructChecker: checkAtoms is ready

* StructChecker: user RingInfo from RDkit. Start regarge

* StructChecker: ReCharge molecule method prototype

* StructChecker: updates for ReCharge. Almost finished

* StructChecker: all ReCharge is done except external data tables loading

* StructChecker: add path tables into API. ReCharge completed

* Adds augmented atom data

Signed-off-by: Brian Kelley <brian.kelley@novartis.com>

* Removes extra files

Signed-off-by: Brian Kelley <brian.kelley@novartis.com>

* Adds path to test data via RDBASE environment

Signed-off-by: Brian Kelley <brian.kelley@novartis.com>

* Revert "Struct checker apr15"

* StructChecker: add missing tautomer tests

* Updates test to use RDBASE

* Adds initialization of data from data section

* Adds Python API and tests

* Fixes namespace for enum

* StructChecker: update/imporve strip small fragments

* StructChecker: fix acidic atoms (but logic does not work)

* StructChecker: fix match issue for CheckAtoms

* Adds macro guards

* Adds loading API and proper constructor

* Fixes tests, adds stereo test

* Fixes crash bug, matches[0] was being accessed from an empty match vector

* Reverts crash fix - conflicts with previous

* Adds the rest of the structure checker options

* StructChecker: fix atom matching for aromatic rings

* StructChecker: add tautomers checks. Update some tests

* StructChecker: stereo fixes. Add some tests

* StructChecker: fix check atoms. Start ligand symbol list

* StructChecker: fix some check atoms validation. Add Tranform to query lists. Start correct loading augmented atoms

* update

* another set of fixes

* StructChecker: fix loadDefaultAugmentedAtoms. Some changes in CheckAtom + tests + debug conditional breakpoints (TEMP operators)

* StructChecker: rewrited RecMatch() to sequential. Changed bond matching algorithm. small bug fixes

* Adds better logging of mismatched atoms

* Removes duplicated negative charge

* Fixes charges

* Adds nitro group test

* StructChecker: add better logging

* remove double logging

* Reformats code using RDKit's clang-format style

* StructChecker: Fix charge reformat using RDKit format.

* StructChecker: compilation restore after merge

* restore bond matching

* Removes the same fragments that strucheck does in case of ties

* Don't resanitize - this adds aromaticity which mucks things up

* Adds empty molecule checks

* Fixes atom clashes.

* Removes debug printing

* Removes debug logging info

* First pass at stereo fixes

* Fixes off by one error for dubious stereo fix

* Fixes more off by one errors

* Fixes more off by one errors

* More off by one fixes.

* Another off by one

* Fixes chiral flag set in molfile check

* Copies chiral flag over to largest fragment if necessary

* Poor man’s parity check.

* Find unspecified chiral centers ala Avalon.

* StructChecker: fix recursive match. Fix transformations

* StructChecker: fix transformation for atom list (using query atoms)

* Fixes checks && to &

* StructChecker: fix carboxylic acids tranform issue. Atom list is changed only if different

* StructChecker: documentation was updated

* Fixes snprintf and silences some warnings

* Adds Get/Set StructCheckerOptions

* Adds default AugmentedAtomTransforms
2016-10-24 08:00:07 +02:00

140 lines
4.1 KiB
C++

//
// Copyright (C) 2016 Novartis Institutes for BioMedical Research
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//
#include "../../RDGeneral/types.h"
#include "../../Geometry/point.h"
#include "Utilites.h"
#include <algorithm>
namespace RDKit {
namespace StructureCheck {
void SetupNeighbourhood(const ROMol &mol,
std::vector<Neighbourhood> &neighbours) {
neighbours.clear();
neighbours.resize(mol.getNumAtoms());
for (unsigned i = 0; i < mol.getNumBonds(); i++) {
const Bond *bond = mol.getBondWithIdx(i);
unsigned a1 = bond->getBeginAtomIdx();
unsigned a2 = bond->getEndAtomIdx();
neighbours[a1].Atoms.push_back(a2);
neighbours[a1].Bonds.push_back(i);
neighbours[a2].Atoms.push_back(a1);
neighbours[a2].Bonds.push_back(i);
}
}
bool getMolAtomPoints(const ROMol &mol,
std::vector<RDGeom::Point3D> &atomPoint,
bool twod) {
bool non_zero_z = false;
atomPoint.resize(mol.getNumAtoms());
// take X,Y,Z coordinates of each atom
if (0 != mol.getNumConformers())
for (RDKit::ROMol::ConstConformerIterator cnfi = mol.beginConformers();
cnfi != mol.endConformers(); cnfi++) {
const Conformer &conf = **cnfi; // mol.getConformer(confId);
if (twod || conf.is3D()) {
for (unsigned i = 0; i < mol.getNumAtoms(); i++) {
atomPoint[i] = conf.getAtomPos(i);
if (fabs(atomPoint[i].z) >= 1.e-7) non_zero_z = true;
}
break;
}
}
if (atomPoint.empty()) { // compute XYZ
// TODO:
// ???? ..........
}
return non_zero_z;
}
typedef boost::tuple<std::string, int, int, int> NbrData;
bool lessTuple(const NbrData &left, const NbrData &right) {
if (left.get<0>() < right.get<0>()) return true;
if (left.get<0>() > right.get<0>()) return false;
if (left.get<1>() < right.get<1>()) return true;
if (left.get<1>() > right.get<1>()) return false;
if (left.get<2>() < right.get<2>()) return true;
if (left.get<2>() > right.get<2>()) return false;
if (left.get<3>() < right.get<3>()) return true;
return false;
}
std::string LogNeighbourhood(
const ROMol &mol, unsigned int idx,
const std::vector<Neighbourhood> &neighbour_array) {
std::stringstream oss;
// FIX ME turn into utility func?
std::string name("");
mol.getPropIfPresent(common_properties::_Name, name);
oss << "atom '" << name << "' idx=" << idx << " AA: ";
const Atom &atm = *mol.getAtomWithIdx(idx);
oss << atm.getSymbol();
if (atm.getFormalCharge())
oss << (atm.getFormalCharge() > 0 ? "+" : "") << atm.getFormalCharge();
if (atm.getNumRadicalElectrons()) oss << atm.getNumRadicalElectrons();
// these neighbors should be sorted properly?
size_t numNbrs = neighbour_array[idx].Atoms.size();
// CHECK_INVARIANT(numNBrs == neighbour_array[idx].Bonds.size());
std::vector<NbrData> nbrs;
for (size_t i = 0; i < numNbrs; ++i) {
const Bond *bond = mol.getBondWithIdx(neighbour_array[idx].Bonds[i]);
const Atom &nbr = *mol.getAtomWithIdx(neighbour_array[idx].Atoms[i]);
nbrs.push_back(NbrData(nbr.getSymbol(), bond->getBondType(),
nbr.getFormalCharge(),
nbr.getNumRadicalElectrons()));
}
std::sort(nbrs.begin(), nbrs.end(), lessTuple);
for (size_t i = 0; i < nbrs.size(); ++i) {
NbrData &nbr = nbrs[i];
std::string bs = "";
switch (nbr.get<1>()) {
case Bond::SINGLE:
bs = "-";
break;
case Bond::DOUBLE:
bs = "=";
break;
case Bond::TRIPLE:
bs = "#";
break;
case Bond::AROMATIC:
bs = "~";
break;
}
if (bs.size())
oss << "(" << bs << nbr.get<0>();
else
oss << "("
<< "?" << (int)nbr.get<1>() << "?" << nbr.get<0>();
if (nbr.get<2>()) oss << (nbr.get<2>() > 0 ? "+" : "") << nbr.get<2>();
if (nbr.get<3>()) oss << nbr.get<3>();
oss << ")";
}
return oss.str();
}
} // namespace StructureCheck
} // namespace RDKit