mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-04 21:54:27 +08:00
* StructChecker changes. Initial commit. First implementation. Added some tests. * StructChecker: add GoodAtoms and AcidicAtoms. new updates * StructChecker: add new tests * StructChecker: added TransformAugmentedAtoms() * StructCheck: add structCheck to GraphMol. Fix compilation errors. * StructChecker: add stereo verification and some utilities. * StructChecker: function FixDubious3DMolecule was added * StructChecker: checkStereo added. done with stereo. * StructChecker: add StripSmallFragments() * StructChecker: add AtomClash() function. Some cosmetic + tests * StructChecker: checkAtoms() was started * StructChecker: checkAtoms is ready * StructChecker: user RingInfo from RDkit. Start regarge * StructChecker: ReCharge molecule method prototype * StructChecker: updates for ReCharge. Almost finished * StructChecker: all ReCharge is done except external data tables loading * StructChecker: add path tables into API. ReCharge completed * Adds augmented atom data Signed-off-by: Brian Kelley <brian.kelley@novartis.com> * Removes extra files Signed-off-by: Brian Kelley <brian.kelley@novartis.com> * Adds path to test data via RDBASE environment Signed-off-by: Brian Kelley <brian.kelley@novartis.com> * Revert "Struct checker apr15" * StructChecker: add missing tautomer tests * Updates test to use RDBASE * Adds initialization of data from data section * Adds Python API and tests * Fixes namespace for enum * StructChecker: update/imporve strip small fragments * StructChecker: fix acidic atoms (but logic does not work) * StructChecker: fix match issue for CheckAtoms * Adds macro guards * Adds loading API and proper constructor * Fixes tests, adds stereo test * Fixes crash bug, matches[0] was being accessed from an empty match vector * Reverts crash fix - conflicts with previous * Adds the rest of the structure checker options * StructChecker: fix atom matching for aromatic rings * StructChecker: add tautomers checks. Update some tests * StructChecker: stereo fixes. Add some tests * StructChecker: fix check atoms. Start ligand symbol list * StructChecker: fix some check atoms validation. Add Tranform to query lists. Start correct loading augmented atoms * update * another set of fixes * StructChecker: fix loadDefaultAugmentedAtoms. Some changes in CheckAtom + tests + debug conditional breakpoints (TEMP operators) * StructChecker: rewrited RecMatch() to sequential. Changed bond matching algorithm. small bug fixes * Adds better logging of mismatched atoms * Removes duplicated negative charge * Fixes charges * Adds nitro group test * StructChecker: add better logging * remove double logging * Reformats code using RDKit's clang-format style * StructChecker: Fix charge reformat using RDKit format. * StructChecker: compilation restore after merge * restore bond matching * Removes the same fragments that strucheck does in case of ties * Don't resanitize - this adds aromaticity which mucks things up * Adds empty molecule checks * Fixes atom clashes. * Removes debug printing * Removes debug logging info * First pass at stereo fixes * Fixes off by one error for dubious stereo fix * Fixes more off by one errors * Fixes more off by one errors * More off by one fixes. * Another off by one * Fixes chiral flag set in molfile check * Copies chiral flag over to largest fragment if necessary * Poor man’s parity check. * Find unspecified chiral centers ala Avalon. * StructChecker: fix recursive match. Fix transformations * StructChecker: fix transformation for atom list (using query atoms) * Fixes checks && to & * StructChecker: fix carboxylic acids tranform issue. Atom list is changed only if different * StructChecker: documentation was updated * Fixes snprintf and silences some warnings * Adds Get/Set StructCheckerOptions * Adds default AugmentedAtomTransforms
145 lines
4.2 KiB
C++
145 lines
4.2 KiB
C++
//
|
|
// Copyright (C) 2016 Novartis Institutes for BioMedical Research
|
|
//
|
|
// @@ All Rights Reserved @@
|
|
// This file is part of the RDKit.
|
|
// The contents are covered by the terms of the BSD license
|
|
// which is included in the file license.txt, found at the root
|
|
// of the RDKit source tree.
|
|
//
|
|
#include <map>
|
|
|
|
#include "../MolOps.h"
|
|
#include "../Descriptors/MolDescriptors.h"
|
|
#include "StripSmallFragments.h"
|
|
#include "../SmilesParse/SmilesWrite.h"
|
|
#include "../FileParsers/MolFileStereochem.h"
|
|
|
|
// define snprintf for msvc
|
|
#if _MSC_VER
|
|
#if _MSC_VER < 1900
|
|
#define snprintf _snprintf
|
|
#endif
|
|
#endif
|
|
|
|
namespace RDKit {
|
|
namespace StructureCheck {
|
|
|
|
static inline std::string getMolecularFormula(const ROMol &mol) {
|
|
return RDKit::Descriptors::calcMolFormula(mol);
|
|
}
|
|
|
|
void AddMWMF(RWMol &mol,
|
|
bool pre) { // set formula & mass properties "MW_PRE" "MW_POST"
|
|
double mass = 0.0;
|
|
mass = RDKit::Descriptors::calcExactMW(mol);
|
|
/*
|
|
for (unsigned i = 0; i < mol.getNumAtoms(); i++) {
|
|
const Atom& atom = *mol.getAtomWithIdx(i);
|
|
mass += atom.getMass();
|
|
mass += atom.getNumImplicitHs() * 1.0080; // and add implicit
|
|
Hydrogens mass
|
|
}
|
|
*/
|
|
std::string formula = getMolecularFormula(mol);
|
|
if (!formula.empty()) mol.setProp((pre ? "MF_PRE" : "MF_POST"), formula);
|
|
char propertyValue[64];
|
|
snprintf(propertyValue, sizeof(propertyValue), "%g", mass);
|
|
mol.setProp((pre ? "MW_PRE" : "MW_POST"), mass);
|
|
}
|
|
|
|
bool StripSmallFragments(RWMol &mol, bool verbose) {
|
|
const bool sanitize=false;
|
|
std::vector<boost::shared_ptr<ROMol> > frags = MolOps::getMolFrags(mol, sanitize);
|
|
if (frags.size() <= 1)
|
|
return false;
|
|
|
|
size_t maxFragSize = 0;
|
|
size_t maxFragIdx = 0;
|
|
|
|
for(size_t i=0; i<frags.size(); ++i) {
|
|
const unsigned int fragSize = frags[i].get()->getNumAtoms();
|
|
if(fragSize >= maxFragSize) {
|
|
maxFragSize = fragSize;
|
|
maxFragIdx = i;
|
|
}
|
|
}
|
|
|
|
if(verbose) {
|
|
std::string name = "<no name>";
|
|
mol.getPropIfPresent(common_properties::_Name, name);
|
|
for(size_t i=0; i<frags.size(); ++i) {
|
|
if (i != maxFragIdx) {
|
|
BOOST_LOG(rdWarningLog) << name << " removed fragment i="<<i<<" with "
|
|
<< frags[i].get()->getNumAtoms() << " atoms" << std::endl;
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
// we need to save chirality for checking later
|
|
bool checkChiral = false;
|
|
if(mol.hasProp(RDKit::common_properties::_MolFileChiralFlag)) {
|
|
unsigned int chiralflag = mol.getProp<unsigned int>(
|
|
RDKit::common_properties::_MolFileChiralFlag);
|
|
frags[maxFragIdx].get()->setProp<unsigned int>(
|
|
RDKit::common_properties::_MolFileChiralFlag, chiralflag);
|
|
checkChiral = chiralflag != 0;
|
|
}
|
|
|
|
mol = *frags[maxFragIdx].get();
|
|
|
|
// We need to see if the mol file's chirality possibly came from this
|
|
// fragment.
|
|
if (checkChiral) {
|
|
bool ischiral = false;
|
|
|
|
RWMol copy(mol);
|
|
try {
|
|
MolOps::sanitizeMol(copy);
|
|
ClearSingleBondDirFlags(copy);
|
|
const Conformer &conf = copy.getConformer();
|
|
DetectBondStereoChemistry(copy, &conf);
|
|
MolOps::assignStereochemistry(copy, true, true, true);
|
|
for (ROMol::AtomIterator atIt =copy.beginAtoms(); atIt != copy.endAtoms();
|
|
++atIt) {
|
|
if((*atIt)->hasProp(common_properties::_ChiralityPossible)) {
|
|
ischiral = true;
|
|
checkChiral = false;
|
|
break;
|
|
}
|
|
}
|
|
} catch (...) {
|
|
}
|
|
|
|
// are chiral tags set
|
|
if(checkChiral) {
|
|
for (ROMol::AtomIterator atIt = mol.beginAtoms(); atIt != mol.endAtoms();
|
|
++atIt) {
|
|
if ( (*atIt)->getChiralTag() == Atom::CHI_TETRAHEDRAL_CW ||
|
|
(*atIt)->getChiralTag() == Atom::CHI_TETRAHEDRAL_CCW ) {
|
|
ischiral = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
for (ROMol::BondIterator bondIt = mol.beginBonds(); bondIt != mol.endBonds();
|
|
++bondIt) {
|
|
if ((*bondIt)->getBondDir() == Bond::BEGINDASH ||
|
|
(*bondIt)->getBondDir() == Bond::BEGINWEDGE) {
|
|
ischiral = true;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!ischiral) {
|
|
mol.setProp<unsigned int>(RDKit::common_properties::_MolFileChiralFlag, 0);
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
} // namespace StructureCheck
|
|
} // namespace RDKit
|