Files
rdkit/Code/GraphMol/StructChecker/StructChecker.cpp
Brian Kelley 8609cd4883 Add StructChecker functionality
* StructChecker changes. Initial commit. First implementation. Added some tests.

* StructChecker: add  GoodAtoms and AcidicAtoms. new updates

* StructChecker: add new tests

* StructChecker: added TransformAugmentedAtoms()

* StructCheck: add structCheck to GraphMol. Fix compilation errors.

* StructChecker: add stereo verification and some utilities.

* StructChecker: function FixDubious3DMolecule was added

* StructChecker: checkStereo added. done with stereo.

* StructChecker: add StripSmallFragments()

* StructChecker: add AtomClash() function. Some cosmetic + tests

* StructChecker: checkAtoms() was started

* StructChecker: checkAtoms is ready

* StructChecker: user RingInfo from RDkit. Start regarge

* StructChecker: ReCharge molecule method prototype

* StructChecker: updates for ReCharge. Almost finished

* StructChecker: all ReCharge is done except external data tables loading

* StructChecker: add path tables into API. ReCharge completed

* Adds augmented atom data

Signed-off-by: Brian Kelley <brian.kelley@novartis.com>

* Removes extra files

Signed-off-by: Brian Kelley <brian.kelley@novartis.com>

* Adds path to test data via RDBASE environment

Signed-off-by: Brian Kelley <brian.kelley@novartis.com>

* Revert "Struct checker apr15"

* StructChecker: add missing tautomer tests

* Updates test to use RDBASE

* Adds initialization of data from data section

* Adds Python API and tests

* Fixes namespace for enum

* StructChecker: update/imporve strip small fragments

* StructChecker: fix acidic atoms (but logic does not work)

* StructChecker: fix match issue for CheckAtoms

* Adds macro guards

* Adds loading API and proper constructor

* Fixes tests, adds stereo test

* Fixes crash bug, matches[0] was being accessed from an empty match vector

* Reverts crash fix - conflicts with previous

* Adds the rest of the structure checker options

* StructChecker: fix atom matching for aromatic rings

* StructChecker: add tautomers checks. Update some tests

* StructChecker: stereo fixes. Add some tests

* StructChecker: fix check atoms. Start ligand symbol list

* StructChecker: fix some check atoms validation. Add Tranform to query lists. Start correct loading augmented atoms

* update

* another set of fixes

* StructChecker: fix loadDefaultAugmentedAtoms. Some changes in CheckAtom + tests + debug conditional breakpoints (TEMP operators)

* StructChecker: rewrited RecMatch() to sequential. Changed bond matching algorithm. small bug fixes

* Adds better logging of mismatched atoms

* Removes duplicated negative charge

* Fixes charges

* Adds nitro group test

* StructChecker: add better logging

* remove double logging

* Reformats code using RDKit's clang-format style

* StructChecker: Fix charge reformat using RDKit format.

* StructChecker: compilation restore after merge

* restore bond matching

* Removes the same fragments that strucheck does in case of ties

* Don't resanitize - this adds aromaticity which mucks things up

* Adds empty molecule checks

* Fixes atom clashes.

* Removes debug printing

* Removes debug logging info

* First pass at stereo fixes

* Fixes off by one error for dubious stereo fix

* Fixes more off by one errors

* Fixes more off by one errors

* More off by one fixes.

* Another off by one

* Fixes chiral flag set in molfile check

* Copies chiral flag over to largest fragment if necessary

* Poor man’s parity check.

* Find unspecified chiral centers ala Avalon.

* StructChecker: fix recursive match. Fix transformations

* StructChecker: fix transformation for atom list (using query atoms)

* Fixes checks && to &

* StructChecker: fix carboxylic acids tranform issue. Atom list is changed only if different

* StructChecker: documentation was updated

* Fixes snprintf and silences some warnings

* Adds Get/Set StructCheckerOptions

* Adds default AugmentedAtomTransforms
2016-10-24 08:00:07 +02:00

194 lines
6.1 KiB
C++

//
// Copyright (C) 2016 Novartis Institutes for BioMedical Research
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//
#include "../SmilesParse/SmilesWrite.h"
#include "../SmilesParse/SmilesWrite.h"
#include "StructChecker.h"
#include "Pattern.h"
#include "Stereo.h"
#include "ReCharge.h"
#include "Tautomer.h"
#include "StripSmallFragments.h"
namespace RDKit {
namespace StructureCheck {
unsigned StructChecker::checkMolStructure(RWMol &mol) const {
unsigned flags = NO_CHANGE; // == 0. return value
if (0 != Options.MaxMolSize && (mol.getNumAtoms() > Options.MaxMolSize ||
mol.getNumBonds() > Options.MaxMolSize)) {
return SIZE_CHECK_FAILED;
}
if (mol.getNumAtoms() == 0) {
return SIZE_CHECK_FAILED;
}
if (!mol.getRingInfo()->isInitialized()) mol.getRingInfo()->initialize();
/* it uses SDL text
if (Options.ConvertAtomTexts)
{
if(!convertAtomAliases(mol))
flags |= ALIAS_CONVERSION_FAILED;
else
flags |= TRANSFORMED;
}
if (Options.ConvertSText)
;//new_data_list = ConvertSTEXTToData(mol, new_data_list);
*/
if (!Options.AugmentedAtomPairs.empty()) {
if (TransformAugmentedAtoms(mol, Options.AugmentedAtomPairs, Options.Verbose)) {
flags |= TRANSFORMED;
if (Options.Verbose)
BOOST_LOG(rdInfoLog) << MolToSmiles(mol) << "\n";
}
}
unsigned stereo_result = DubiousStereochemistry(mol);
if (0 != (FixDubious3DMolecule(mol) & CONVERTED_TO_2D)) {
stereo_result = 1;
flags |= DUBIOUS_STEREO_REMOVED;
if (Options.Verbose)
BOOST_LOG(rdInfoLog) << MolToSmiles(mol) << "\n";
}
if (Options.RemoveMinorFragments) {
AddMWMF(mol, true); // Add mol mass data field "MW_PRE"
if (StripSmallFragments(mol, Options.Verbose)) {
flags |= FRAGMENTS_FOUND;
if (Options.Verbose)
BOOST_LOG(rdInfoLog) << "Striped SmallFragments"
<< MolToSmiles(mol) << "\n";
}
AddMWMF(mol, false); // Add mol mass data field "MW_POST"
}
// do tautomer standardization
for (unsigned i = 0; i < Options.FromTautomer.size(); i++) {
if (Options.Verbose)
BOOST_LOG(rdInfoLog) << "tautomerizing with rule " << i << "\n";
// fprintf(stderr, "tautomerizing with rule %d\n", i);
for (unsigned j = 0; j < 3; j++) // limit to 3 run per rule
{
StructCheckTautomer sct(mol, Options);
if (!sct.applyTautomer(i)) break;
flags |= TAUTOMER_TRANSFORMED;
if (Options.Verbose)
BOOST_LOG(rdInfoLog) << "molecule: has been tautomerized with rule "
<< i << ":\n" << MolToSmiles(mol) << "\n";
// sprintf(msg_buffer,"%10s: has been tautomerized with
// rule '%s'", mp->name, from_tautomer[i]->name);
// AddMsgToList(msg_buffer);
}
}
/* if (!IsNULL(data_list) && !IsNULL(new_data_list))
{ // append new data list if any
for (dph = data_list; !IsNULL(dph->next); dph = dph->next)
;
dph->next = new_data_list;
}
*/
if (stereo_result == EITHER_BOND_FOUND) { // looks for EITHER bonds
flags |= EITHER_WARNING;
RemoveDubiousStereochemistry(mol);
flags |= DUBIOUS_STEREO_REMOVED;
} else if (stereo_result > EITHER_BOND_FOUND) { // more severe errors
flags |= STEREO_ERROR;
if (Options.CheckStereo) {
flags |= BAD_MOLECULE;
} else {
RemoveDubiousStereochemistry(mol);
flags |= DUBIOUS_STEREO_REMOVED;
}
}
// line 1612
if (TotalCharge(mol) != Options.DesiredCharge) {
unsigned ndeprot;
unsigned nrefine;
ChargeFix ch(Options, mol);
if (ch.rechargeMolecule(ndeprot, nrefine)) {
flags |= RECHARGED;
if (Options.Verbose)
BOOST_LOG(rdInfoLog) << "Recharged Molecule:" << MolToSmiles(mol) << "\n";
}
}
//
const double clashLimit = Options.CollisionLimitPercent/100.0;
if (Options.CheckCollisions && AtomClash(mol, clashLimit)) {
flags |= ATOM_CLASH;
if (Options.Verbose)
BOOST_LOG(rdInfoLog) << "AtomClash done:" << MolToSmiles(mol) << "\n";
}
if (!Options.GoodAtoms.empty())
if (!CheckAtoms(mol, Options.GoodAtoms, Options.Verbose))
flags |= ATOM_CHECK_FAILED;
if (Options.CheckStereo && !CheckStereo(mol)) flags |= STEREO_ERROR;
// if (Options.GroupsToSGroups)
// ConvertGroupsToSGroups(mol);
/*
//line 1630
stereo_bad = FALSE;
for (i = 0; i<nstereopat; i++)
{
ssp = stereo_patterns[i];
tmp = ForceStereoTemplate(mp, ssp);
if (tmp == (-1))
{
flags |= STEREO_FORCED_BAD; // problem enforcing
stereochemistry of 'ssp->name'
}
else if (tmp == 15) // "STEREO_FORCED"
{
flags |= STEREO_TRANSFORMED; // stereochemistry of
'ssp->name' enforced",
}
}
//line 1655
for (i = 0; i<npat; i++) // do template cleaning
{
ssp = patterns[i];
if (TemplateClean(mol, ssp))
{
result |= TEMPLATE_TRANSFORMED; // has been cleaned with
template 'ssp->name'
}
}
//line 1669
for (i = 0; i<nrpat; i++) // do template rotation
{
ssp = rotate_patterns[i];
if (TemplateRotate(mol, ssp))
{
result |= TEMPLATE_TRANSFORMED; // has been rotated by
template 'ssp->name'
}
}
}
*/
// the end:
if (0 != (flags & TRANSFORMED)) { // sanitaze molecule
// + ???? .............. ????
if (mol.getRingInfo()->isInitialized()) mol.getRingInfo()->reset();
mol.getRingInfo()->initialize();
}
return flags;
}
} // namespace StructureCheck
} // namespace RDKit