mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-04 21:54:27 +08:00
* StructChecker changes. Initial commit. First implementation. Added some tests. * StructChecker: add GoodAtoms and AcidicAtoms. new updates * StructChecker: add new tests * StructChecker: added TransformAugmentedAtoms() * StructCheck: add structCheck to GraphMol. Fix compilation errors. * StructChecker: add stereo verification and some utilities. * StructChecker: function FixDubious3DMolecule was added * StructChecker: checkStereo added. done with stereo. * StructChecker: add StripSmallFragments() * StructChecker: add AtomClash() function. Some cosmetic + tests * StructChecker: checkAtoms() was started * StructChecker: checkAtoms is ready * StructChecker: user RingInfo from RDkit. Start regarge * StructChecker: ReCharge molecule method prototype * StructChecker: updates for ReCharge. Almost finished * StructChecker: all ReCharge is done except external data tables loading * StructChecker: add path tables into API. ReCharge completed * Adds augmented atom data Signed-off-by: Brian Kelley <brian.kelley@novartis.com> * Removes extra files Signed-off-by: Brian Kelley <brian.kelley@novartis.com> * Adds path to test data via RDBASE environment Signed-off-by: Brian Kelley <brian.kelley@novartis.com> * Revert "Struct checker apr15" * StructChecker: add missing tautomer tests * Updates test to use RDBASE * Adds initialization of data from data section * Adds Python API and tests * Fixes namespace for enum * StructChecker: update/imporve strip small fragments * StructChecker: fix acidic atoms (but logic does not work) * StructChecker: fix match issue for CheckAtoms * Adds macro guards * Adds loading API and proper constructor * Fixes tests, adds stereo test * Fixes crash bug, matches[0] was being accessed from an empty match vector * Reverts crash fix - conflicts with previous * Adds the rest of the structure checker options * StructChecker: fix atom matching for aromatic rings * StructChecker: add tautomers checks. Update some tests * StructChecker: stereo fixes. Add some tests * StructChecker: fix check atoms. Start ligand symbol list * StructChecker: fix some check atoms validation. Add Tranform to query lists. Start correct loading augmented atoms * update * another set of fixes * StructChecker: fix loadDefaultAugmentedAtoms. Some changes in CheckAtom + tests + debug conditional breakpoints (TEMP operators) * StructChecker: rewrited RecMatch() to sequential. Changed bond matching algorithm. small bug fixes * Adds better logging of mismatched atoms * Removes duplicated negative charge * Fixes charges * Adds nitro group test * StructChecker: add better logging * remove double logging * Reformats code using RDKit's clang-format style * StructChecker: Fix charge reformat using RDKit format. * StructChecker: compilation restore after merge * restore bond matching * Removes the same fragments that strucheck does in case of ties * Don't resanitize - this adds aromaticity which mucks things up * Adds empty molecule checks * Fixes atom clashes. * Removes debug printing * Removes debug logging info * First pass at stereo fixes * Fixes off by one error for dubious stereo fix * Fixes more off by one errors * Fixes more off by one errors * More off by one fixes. * Another off by one * Fixes chiral flag set in molfile check * Copies chiral flag over to largest fragment if necessary * Poor man’s parity check. * Find unspecified chiral centers ala Avalon. * StructChecker: fix recursive match. Fix transformations * StructChecker: fix transformation for atom list (using query atoms) * Fixes checks && to & * StructChecker: fix carboxylic acids tranform issue. Atom list is changed only if different * StructChecker: documentation was updated * Fixes snprintf and silences some warnings * Adds Get/Set StructCheckerOptions * Adds default AugmentedAtomTransforms
140 lines
4.1 KiB
C++
140 lines
4.1 KiB
C++
//
|
|
// Copyright (C) 2016 Novartis Institutes for BioMedical Research
|
|
//
|
|
// @@ All Rights Reserved @@
|
|
// This file is part of the RDKit.
|
|
// The contents are covered by the terms of the BSD license
|
|
// which is included in the file license.txt, found at the root
|
|
// of the RDKit source tree.
|
|
//
|
|
#include "../../RDGeneral/types.h"
|
|
#include "../../Geometry/point.h"
|
|
#include "Utilites.h"
|
|
#include <algorithm>
|
|
namespace RDKit {
|
|
namespace StructureCheck {
|
|
|
|
void SetupNeighbourhood(const ROMol &mol,
|
|
std::vector<Neighbourhood> &neighbours) {
|
|
neighbours.clear();
|
|
neighbours.resize(mol.getNumAtoms());
|
|
|
|
for (unsigned i = 0; i < mol.getNumBonds(); i++) {
|
|
const Bond *bond = mol.getBondWithIdx(i);
|
|
unsigned a1 = bond->getBeginAtomIdx();
|
|
unsigned a2 = bond->getEndAtomIdx();
|
|
|
|
neighbours[a1].Atoms.push_back(a2);
|
|
neighbours[a1].Bonds.push_back(i);
|
|
|
|
neighbours[a2].Atoms.push_back(a1);
|
|
neighbours[a2].Bonds.push_back(i);
|
|
}
|
|
}
|
|
|
|
bool getMolAtomPoints(const ROMol &mol,
|
|
std::vector<RDGeom::Point3D> &atomPoint,
|
|
bool twod) {
|
|
bool non_zero_z = false;
|
|
atomPoint.resize(mol.getNumAtoms());
|
|
// take X,Y,Z coordinates of each atom
|
|
if (0 != mol.getNumConformers())
|
|
for (RDKit::ROMol::ConstConformerIterator cnfi = mol.beginConformers();
|
|
cnfi != mol.endConformers(); cnfi++) {
|
|
const Conformer &conf = **cnfi; // mol.getConformer(confId);
|
|
if (twod || conf.is3D()) {
|
|
for (unsigned i = 0; i < mol.getNumAtoms(); i++) {
|
|
atomPoint[i] = conf.getAtomPos(i);
|
|
if (fabs(atomPoint[i].z) >= 1.e-7) non_zero_z = true;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
if (atomPoint.empty()) { // compute XYZ
|
|
// TODO:
|
|
// ???? ..........
|
|
}
|
|
return non_zero_z;
|
|
}
|
|
|
|
typedef boost::tuple<std::string, int, int, int> NbrData;
|
|
|
|
bool lessTuple(const NbrData &left, const NbrData &right) {
|
|
if (left.get<0>() < right.get<0>()) return true;
|
|
if (left.get<0>() > right.get<0>()) return false;
|
|
|
|
if (left.get<1>() < right.get<1>()) return true;
|
|
if (left.get<1>() > right.get<1>()) return false;
|
|
|
|
if (left.get<2>() < right.get<2>()) return true;
|
|
if (left.get<2>() > right.get<2>()) return false;
|
|
|
|
if (left.get<3>() < right.get<3>()) return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
std::string LogNeighbourhood(
|
|
const ROMol &mol, unsigned int idx,
|
|
const std::vector<Neighbourhood> &neighbour_array) {
|
|
std::stringstream oss;
|
|
// FIX ME turn into utility func?
|
|
std::string name("");
|
|
mol.getPropIfPresent(common_properties::_Name, name);
|
|
oss << "atom '" << name << "' idx=" << idx << " AA: ";
|
|
const Atom &atm = *mol.getAtomWithIdx(idx);
|
|
oss << atm.getSymbol();
|
|
|
|
if (atm.getFormalCharge())
|
|
oss << (atm.getFormalCharge() > 0 ? "+" : "") << atm.getFormalCharge();
|
|
|
|
if (atm.getNumRadicalElectrons()) oss << atm.getNumRadicalElectrons();
|
|
|
|
// these neighbors should be sorted properly?
|
|
size_t numNbrs = neighbour_array[idx].Atoms.size();
|
|
// CHECK_INVARIANT(numNBrs == neighbour_array[idx].Bonds.size());
|
|
|
|
std::vector<NbrData> nbrs;
|
|
|
|
for (size_t i = 0; i < numNbrs; ++i) {
|
|
const Bond *bond = mol.getBondWithIdx(neighbour_array[idx].Bonds[i]);
|
|
const Atom &nbr = *mol.getAtomWithIdx(neighbour_array[idx].Atoms[i]);
|
|
nbrs.push_back(NbrData(nbr.getSymbol(), bond->getBondType(),
|
|
nbr.getFormalCharge(),
|
|
nbr.getNumRadicalElectrons()));
|
|
}
|
|
|
|
std::sort(nbrs.begin(), nbrs.end(), lessTuple);
|
|
for (size_t i = 0; i < nbrs.size(); ++i) {
|
|
NbrData &nbr = nbrs[i];
|
|
std::string bs = "";
|
|
switch (nbr.get<1>()) {
|
|
case Bond::SINGLE:
|
|
bs = "-";
|
|
break;
|
|
case Bond::DOUBLE:
|
|
bs = "=";
|
|
break;
|
|
case Bond::TRIPLE:
|
|
bs = "#";
|
|
break;
|
|
case Bond::AROMATIC:
|
|
bs = "~";
|
|
break;
|
|
}
|
|
if (bs.size())
|
|
oss << "(" << bs << nbr.get<0>();
|
|
else
|
|
oss << "("
|
|
<< "?" << (int)nbr.get<1>() << "?" << nbr.get<0>();
|
|
if (nbr.get<2>()) oss << (nbr.get<2>() > 0 ? "+" : "") << nbr.get<2>();
|
|
|
|
if (nbr.get<3>()) oss << nbr.get<3>();
|
|
oss << ")";
|
|
}
|
|
return oss.str();
|
|
}
|
|
|
|
} // namespace StructureCheck
|
|
} // namespace RDKit
|