Files
rdkit/Code/GraphMol/StructChecker/StripSmallFragments.cpp
Brian Kelley 8609cd4883 Add StructChecker functionality
* StructChecker changes. Initial commit. First implementation. Added some tests.

* StructChecker: add  GoodAtoms and AcidicAtoms. new updates

* StructChecker: add new tests

* StructChecker: added TransformAugmentedAtoms()

* StructCheck: add structCheck to GraphMol. Fix compilation errors.

* StructChecker: add stereo verification and some utilities.

* StructChecker: function FixDubious3DMolecule was added

* StructChecker: checkStereo added. done with stereo.

* StructChecker: add StripSmallFragments()

* StructChecker: add AtomClash() function. Some cosmetic + tests

* StructChecker: checkAtoms() was started

* StructChecker: checkAtoms is ready

* StructChecker: user RingInfo from RDkit. Start regarge

* StructChecker: ReCharge molecule method prototype

* StructChecker: updates for ReCharge. Almost finished

* StructChecker: all ReCharge is done except external data tables loading

* StructChecker: add path tables into API. ReCharge completed

* Adds augmented atom data

Signed-off-by: Brian Kelley <brian.kelley@novartis.com>

* Removes extra files

Signed-off-by: Brian Kelley <brian.kelley@novartis.com>

* Adds path to test data via RDBASE environment

Signed-off-by: Brian Kelley <brian.kelley@novartis.com>

* Revert "Struct checker apr15"

* StructChecker: add missing tautomer tests

* Updates test to use RDBASE

* Adds initialization of data from data section

* Adds Python API and tests

* Fixes namespace for enum

* StructChecker: update/imporve strip small fragments

* StructChecker: fix acidic atoms (but logic does not work)

* StructChecker: fix match issue for CheckAtoms

* Adds macro guards

* Adds loading API and proper constructor

* Fixes tests, adds stereo test

* Fixes crash bug, matches[0] was being accessed from an empty match vector

* Reverts crash fix - conflicts with previous

* Adds the rest of the structure checker options

* StructChecker: fix atom matching for aromatic rings

* StructChecker: add tautomers checks. Update some tests

* StructChecker: stereo fixes. Add some tests

* StructChecker: fix check atoms. Start ligand symbol list

* StructChecker: fix some check atoms validation. Add Tranform to query lists. Start correct loading augmented atoms

* update

* another set of fixes

* StructChecker: fix loadDefaultAugmentedAtoms. Some changes in CheckAtom + tests + debug conditional breakpoints (TEMP operators)

* StructChecker: rewrited RecMatch() to sequential. Changed bond matching algorithm. small bug fixes

* Adds better logging of mismatched atoms

* Removes duplicated negative charge

* Fixes charges

* Adds nitro group test

* StructChecker: add better logging

* remove double logging

* Reformats code using RDKit's clang-format style

* StructChecker: Fix charge reformat using RDKit format.

* StructChecker: compilation restore after merge

* restore bond matching

* Removes the same fragments that strucheck does in case of ties

* Don't resanitize - this adds aromaticity which mucks things up

* Adds empty molecule checks

* Fixes atom clashes.

* Removes debug printing

* Removes debug logging info

* First pass at stereo fixes

* Fixes off by one error for dubious stereo fix

* Fixes more off by one errors

* Fixes more off by one errors

* More off by one fixes.

* Another off by one

* Fixes chiral flag set in molfile check

* Copies chiral flag over to largest fragment if necessary

* Poor man’s parity check.

* Find unspecified chiral centers ala Avalon.

* StructChecker: fix recursive match. Fix transformations

* StructChecker: fix transformation for atom list (using query atoms)

* Fixes checks && to &

* StructChecker: fix carboxylic acids tranform issue. Atom list is changed only if different

* StructChecker: documentation was updated

* Fixes snprintf and silences some warnings

* Adds Get/Set StructCheckerOptions

* Adds default AugmentedAtomTransforms
2016-10-24 08:00:07 +02:00

145 lines
4.2 KiB
C++

//
// Copyright (C) 2016 Novartis Institutes for BioMedical Research
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//
#include <map>
#include "../MolOps.h"
#include "../Descriptors/MolDescriptors.h"
#include "StripSmallFragments.h"
#include "../SmilesParse/SmilesWrite.h"
#include "../FileParsers/MolFileStereochem.h"
// define snprintf for msvc
#if _MSC_VER
#if _MSC_VER < 1900
#define snprintf _snprintf
#endif
#endif
namespace RDKit {
namespace StructureCheck {
static inline std::string getMolecularFormula(const ROMol &mol) {
return RDKit::Descriptors::calcMolFormula(mol);
}
void AddMWMF(RWMol &mol,
bool pre) { // set formula & mass properties "MW_PRE" "MW_POST"
double mass = 0.0;
mass = RDKit::Descriptors::calcExactMW(mol);
/*
for (unsigned i = 0; i < mol.getNumAtoms(); i++) {
const Atom& atom = *mol.getAtomWithIdx(i);
mass += atom.getMass();
mass += atom.getNumImplicitHs() * 1.0080; // and add implicit
Hydrogens mass
}
*/
std::string formula = getMolecularFormula(mol);
if (!formula.empty()) mol.setProp((pre ? "MF_PRE" : "MF_POST"), formula);
char propertyValue[64];
snprintf(propertyValue, sizeof(propertyValue), "%g", mass);
mol.setProp((pre ? "MW_PRE" : "MW_POST"), mass);
}
bool StripSmallFragments(RWMol &mol, bool verbose) {
const bool sanitize=false;
std::vector<boost::shared_ptr<ROMol> > frags = MolOps::getMolFrags(mol, sanitize);
if (frags.size() <= 1)
return false;
size_t maxFragSize = 0;
size_t maxFragIdx = 0;
for(size_t i=0; i<frags.size(); ++i) {
const unsigned int fragSize = frags[i].get()->getNumAtoms();
if(fragSize >= maxFragSize) {
maxFragSize = fragSize;
maxFragIdx = i;
}
}
if(verbose) {
std::string name = "<no name>";
mol.getPropIfPresent(common_properties::_Name, name);
for(size_t i=0; i<frags.size(); ++i) {
if (i != maxFragIdx) {
BOOST_LOG(rdWarningLog) << name << " removed fragment i="<<i<<" with "
<< frags[i].get()->getNumAtoms() << " atoms" << std::endl;
}
}
}
// we need to save chirality for checking later
bool checkChiral = false;
if(mol.hasProp(RDKit::common_properties::_MolFileChiralFlag)) {
unsigned int chiralflag = mol.getProp<unsigned int>(
RDKit::common_properties::_MolFileChiralFlag);
frags[maxFragIdx].get()->setProp<unsigned int>(
RDKit::common_properties::_MolFileChiralFlag, chiralflag);
checkChiral = chiralflag != 0;
}
mol = *frags[maxFragIdx].get();
// We need to see if the mol file's chirality possibly came from this
// fragment.
if (checkChiral) {
bool ischiral = false;
RWMol copy(mol);
try {
MolOps::sanitizeMol(copy);
ClearSingleBondDirFlags(copy);
const Conformer &conf = copy.getConformer();
DetectBondStereoChemistry(copy, &conf);
MolOps::assignStereochemistry(copy, true, true, true);
for (ROMol::AtomIterator atIt =copy.beginAtoms(); atIt != copy.endAtoms();
++atIt) {
if((*atIt)->hasProp(common_properties::_ChiralityPossible)) {
ischiral = true;
checkChiral = false;
break;
}
}
} catch (...) {
}
// are chiral tags set
if(checkChiral) {
for (ROMol::AtomIterator atIt = mol.beginAtoms(); atIt != mol.endAtoms();
++atIt) {
if ( (*atIt)->getChiralTag() == Atom::CHI_TETRAHEDRAL_CW ||
(*atIt)->getChiralTag() == Atom::CHI_TETRAHEDRAL_CCW ) {
ischiral = true;
break;
}
}
for (ROMol::BondIterator bondIt = mol.beginBonds(); bondIt != mol.endBonds();
++bondIt) {
if ((*bondIt)->getBondDir() == Bond::BEGINDASH ||
(*bondIt)->getBondDir() == Bond::BEGINWEDGE) {
ischiral = true;
break;
}
}
}
if (!ischiral) {
mol.setProp<unsigned int>(RDKit::common_properties::_MolFileChiralFlag, 0);
}
}
return true;
}
} // namespace StructureCheck
} // namespace RDKit