mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-04 21:54:27 +08:00
* StructChecker changes. Initial commit. First implementation. Added some tests. * StructChecker: add GoodAtoms and AcidicAtoms. new updates * StructChecker: add new tests * StructChecker: added TransformAugmentedAtoms() * StructCheck: add structCheck to GraphMol. Fix compilation errors. * StructChecker: add stereo verification and some utilities. * StructChecker: function FixDubious3DMolecule was added * StructChecker: checkStereo added. done with stereo. * StructChecker: add StripSmallFragments() * StructChecker: add AtomClash() function. Some cosmetic + tests * StructChecker: checkAtoms() was started * StructChecker: checkAtoms is ready * StructChecker: user RingInfo from RDkit. Start regarge * StructChecker: ReCharge molecule method prototype * StructChecker: updates for ReCharge. Almost finished * StructChecker: all ReCharge is done except external data tables loading * StructChecker: add path tables into API. ReCharge completed * Adds augmented atom data Signed-off-by: Brian Kelley <brian.kelley@novartis.com> * Removes extra files Signed-off-by: Brian Kelley <brian.kelley@novartis.com> * Adds path to test data via RDBASE environment Signed-off-by: Brian Kelley <brian.kelley@novartis.com> * Revert "Struct checker apr15" * StructChecker: add missing tautomer tests * Updates test to use RDBASE * Adds initialization of data from data section * Adds Python API and tests * Fixes namespace for enum * StructChecker: update/imporve strip small fragments * StructChecker: fix acidic atoms (but logic does not work) * StructChecker: fix match issue for CheckAtoms * Adds macro guards * Adds loading API and proper constructor * Fixes tests, adds stereo test * Fixes crash bug, matches[0] was being accessed from an empty match vector * Reverts crash fix - conflicts with previous * Adds the rest of the structure checker options * StructChecker: fix atom matching for aromatic rings * StructChecker: add tautomers checks. Update some tests * StructChecker: stereo fixes. Add some tests * StructChecker: fix check atoms. Start ligand symbol list * StructChecker: fix some check atoms validation. Add Tranform to query lists. Start correct loading augmented atoms * update * another set of fixes * StructChecker: fix loadDefaultAugmentedAtoms. Some changes in CheckAtom + tests + debug conditional breakpoints (TEMP operators) * StructChecker: rewrited RecMatch() to sequential. Changed bond matching algorithm. small bug fixes * Adds better logging of mismatched atoms * Removes duplicated negative charge * Fixes charges * Adds nitro group test * StructChecker: add better logging * remove double logging * Reformats code using RDKit's clang-format style * StructChecker: Fix charge reformat using RDKit format. * StructChecker: compilation restore after merge * restore bond matching * Removes the same fragments that strucheck does in case of ties * Don't resanitize - this adds aromaticity which mucks things up * Adds empty molecule checks * Fixes atom clashes. * Removes debug printing * Removes debug logging info * First pass at stereo fixes * Fixes off by one error for dubious stereo fix * Fixes more off by one errors * Fixes more off by one errors * More off by one fixes. * Another off by one * Fixes chiral flag set in molfile check * Copies chiral flag over to largest fragment if necessary * Poor man’s parity check. * Find unspecified chiral centers ala Avalon. * StructChecker: fix recursive match. Fix transformations * StructChecker: fix transformation for atom list (using query atoms) * Fixes checks && to & * StructChecker: fix carboxylic acids tranform issue. Atom list is changed only if different * StructChecker: documentation was updated * Fixes snprintf and silences some warnings * Adds Get/Set StructCheckerOptions * Adds default AugmentedAtomTransforms
194 lines
6.1 KiB
C++
194 lines
6.1 KiB
C++
//
|
|
// Copyright (C) 2016 Novartis Institutes for BioMedical Research
|
|
//
|
|
// @@ All Rights Reserved @@
|
|
// This file is part of the RDKit.
|
|
// The contents are covered by the terms of the BSD license
|
|
// which is included in the file license.txt, found at the root
|
|
// of the RDKit source tree.
|
|
//
|
|
#include "../SmilesParse/SmilesWrite.h"
|
|
#include "../SmilesParse/SmilesWrite.h"
|
|
#include "StructChecker.h"
|
|
#include "Pattern.h"
|
|
#include "Stereo.h"
|
|
#include "ReCharge.h"
|
|
#include "Tautomer.h"
|
|
#include "StripSmallFragments.h"
|
|
|
|
namespace RDKit {
|
|
namespace StructureCheck {
|
|
|
|
unsigned StructChecker::checkMolStructure(RWMol &mol) const {
|
|
unsigned flags = NO_CHANGE; // == 0. return value
|
|
|
|
if (0 != Options.MaxMolSize && (mol.getNumAtoms() > Options.MaxMolSize ||
|
|
mol.getNumBonds() > Options.MaxMolSize)) {
|
|
return SIZE_CHECK_FAILED;
|
|
}
|
|
|
|
if (mol.getNumAtoms() == 0) {
|
|
return SIZE_CHECK_FAILED;
|
|
}
|
|
|
|
if (!mol.getRingInfo()->isInitialized()) mol.getRingInfo()->initialize();
|
|
|
|
/* it uses SDL text
|
|
if (Options.ConvertAtomTexts)
|
|
{
|
|
if(!convertAtomAliases(mol))
|
|
flags |= ALIAS_CONVERSION_FAILED;
|
|
else
|
|
flags |= TRANSFORMED;
|
|
}
|
|
|
|
if (Options.ConvertSText)
|
|
;//new_data_list = ConvertSTEXTToData(mol, new_data_list);
|
|
*/
|
|
if (!Options.AugmentedAtomPairs.empty()) {
|
|
if (TransformAugmentedAtoms(mol, Options.AugmentedAtomPairs, Options.Verbose)) {
|
|
flags |= TRANSFORMED;
|
|
if (Options.Verbose)
|
|
BOOST_LOG(rdInfoLog) << MolToSmiles(mol) << "\n";
|
|
}
|
|
}
|
|
|
|
unsigned stereo_result = DubiousStereochemistry(mol);
|
|
if (0 != (FixDubious3DMolecule(mol) & CONVERTED_TO_2D)) {
|
|
stereo_result = 1;
|
|
flags |= DUBIOUS_STEREO_REMOVED;
|
|
if (Options.Verbose)
|
|
BOOST_LOG(rdInfoLog) << MolToSmiles(mol) << "\n";
|
|
}
|
|
|
|
if (Options.RemoveMinorFragments) {
|
|
AddMWMF(mol, true); // Add mol mass data field "MW_PRE"
|
|
if (StripSmallFragments(mol, Options.Verbose)) {
|
|
flags |= FRAGMENTS_FOUND;
|
|
if (Options.Verbose)
|
|
BOOST_LOG(rdInfoLog) << "Striped SmallFragments"
|
|
<< MolToSmiles(mol) << "\n";
|
|
}
|
|
AddMWMF(mol, false); // Add mol mass data field "MW_POST"
|
|
}
|
|
|
|
// do tautomer standardization
|
|
for (unsigned i = 0; i < Options.FromTautomer.size(); i++) {
|
|
if (Options.Verbose)
|
|
BOOST_LOG(rdInfoLog) << "tautomerizing with rule " << i << "\n";
|
|
// fprintf(stderr, "tautomerizing with rule %d\n", i);
|
|
for (unsigned j = 0; j < 3; j++) // limit to 3 run per rule
|
|
{
|
|
StructCheckTautomer sct(mol, Options);
|
|
if (!sct.applyTautomer(i)) break;
|
|
flags |= TAUTOMER_TRANSFORMED;
|
|
if (Options.Verbose)
|
|
BOOST_LOG(rdInfoLog) << "molecule: has been tautomerized with rule "
|
|
<< i << ":\n" << MolToSmiles(mol) << "\n";
|
|
// sprintf(msg_buffer,"%10s: has been tautomerized with
|
|
// rule '%s'", mp->name, from_tautomer[i]->name);
|
|
// AddMsgToList(msg_buffer);
|
|
}
|
|
}
|
|
|
|
/* if (!IsNULL(data_list) && !IsNULL(new_data_list))
|
|
{ // append new data list if any
|
|
for (dph = data_list; !IsNULL(dph->next); dph = dph->next)
|
|
;
|
|
dph->next = new_data_list;
|
|
}
|
|
*/
|
|
if (stereo_result == EITHER_BOND_FOUND) { // looks for EITHER bonds
|
|
flags |= EITHER_WARNING;
|
|
RemoveDubiousStereochemistry(mol);
|
|
flags |= DUBIOUS_STEREO_REMOVED;
|
|
} else if (stereo_result > EITHER_BOND_FOUND) { // more severe errors
|
|
flags |= STEREO_ERROR;
|
|
if (Options.CheckStereo) {
|
|
flags |= BAD_MOLECULE;
|
|
} else {
|
|
RemoveDubiousStereochemistry(mol);
|
|
flags |= DUBIOUS_STEREO_REMOVED;
|
|
}
|
|
}
|
|
// line 1612
|
|
|
|
if (TotalCharge(mol) != Options.DesiredCharge) {
|
|
unsigned ndeprot;
|
|
unsigned nrefine;
|
|
ChargeFix ch(Options, mol);
|
|
if (ch.rechargeMolecule(ndeprot, nrefine)) {
|
|
flags |= RECHARGED;
|
|
if (Options.Verbose)
|
|
BOOST_LOG(rdInfoLog) << "Recharged Molecule:" << MolToSmiles(mol) << "\n";
|
|
}
|
|
}
|
|
//
|
|
const double clashLimit = Options.CollisionLimitPercent/100.0;
|
|
if (Options.CheckCollisions && AtomClash(mol, clashLimit)) {
|
|
flags |= ATOM_CLASH;
|
|
if (Options.Verbose)
|
|
BOOST_LOG(rdInfoLog) << "AtomClash done:" << MolToSmiles(mol) << "\n";
|
|
}
|
|
|
|
if (!Options.GoodAtoms.empty())
|
|
if (!CheckAtoms(mol, Options.GoodAtoms, Options.Verbose))
|
|
flags |= ATOM_CHECK_FAILED;
|
|
|
|
if (Options.CheckStereo && !CheckStereo(mol)) flags |= STEREO_ERROR;
|
|
|
|
// if (Options.GroupsToSGroups)
|
|
// ConvertGroupsToSGroups(mol);
|
|
/*
|
|
//line 1630
|
|
stereo_bad = FALSE;
|
|
for (i = 0; i<nstereopat; i++)
|
|
{
|
|
ssp = stereo_patterns[i];
|
|
tmp = ForceStereoTemplate(mp, ssp);
|
|
if (tmp == (-1))
|
|
{
|
|
flags |= STEREO_FORCED_BAD; // problem enforcing
|
|
stereochemistry of 'ssp->name'
|
|
}
|
|
else if (tmp == 15) // "STEREO_FORCED"
|
|
{
|
|
flags |= STEREO_TRANSFORMED; // stereochemistry of
|
|
'ssp->name' enforced",
|
|
}
|
|
}
|
|
//line 1655
|
|
for (i = 0; i<npat; i++) // do template cleaning
|
|
{
|
|
ssp = patterns[i];
|
|
if (TemplateClean(mol, ssp))
|
|
{
|
|
result |= TEMPLATE_TRANSFORMED; // has been cleaned with
|
|
template 'ssp->name'
|
|
}
|
|
}
|
|
//line 1669
|
|
for (i = 0; i<nrpat; i++) // do template rotation
|
|
{
|
|
ssp = rotate_patterns[i];
|
|
if (TemplateRotate(mol, ssp))
|
|
{
|
|
result |= TEMPLATE_TRANSFORMED; // has been rotated by
|
|
template 'ssp->name'
|
|
}
|
|
}
|
|
}
|
|
*/
|
|
|
|
// the end:
|
|
if (0 != (flags & TRANSFORMED)) { // sanitaze molecule
|
|
// + ???? .............. ????
|
|
if (mol.getRingInfo()->isInitialized()) mol.getRingInfo()->reset();
|
|
mol.getRingInfo()->initialize();
|
|
}
|
|
return flags;
|
|
}
|
|
|
|
} // namespace StructureCheck
|
|
} // namespace RDKit
|