Files
rdkit/Code/GraphMol/StructChecker/StripSmallFragments.cpp
tadhurst-cdd 0de215a1f8 Fix canonicalization of stereogroups (#7041)
* atropisomer handling added

* fixed non-used variables,  linking directives

* BOOST LIB start/stop fixes, linking fix

* Fixes for RDKIT CI errors

* minimalLib fix

* changed vector<enum> for java builds

* check for extra chars in CIP labeling

* removed wrong deprecated message

* fix ostrstream output error?

* restored _ChiralAtomRank to lowercase first letter

* changes for merged master

* Fixed catch label for new Catch package

* update expected psql results

* get swig wrappers building

* restore MolFileStereochem to FileParsers

* fix java wrapper for reapplyMolBlockWedging

* test changes

* some suggestions

* move a couple functions out of Bond

* Merge branch 'master' into pr/atropisomers2

* merged master

* Renamed setStereoanyFromSquiggleBond

* atropisomers in cdxml, rationalize atrop wedging, stereoGroups in drawMol

* Merge branch 'master' into pr/specialQueries

* changes from previous PR

* Iclude false chiral

* rigorous enhnced stereo canoncalization

* Added more tests and clenup

* removed commented out code

* corrected init of SmilesWriteParams

* added MolFileStereoChem.h to the header files

* Renamed Rxn parser to MrvBlockToChemicalReaction

* To make catch2 work, and match the checksum

* Fixed Structchecker errors

* fix CI for DetermineBonds catch test

* error in catch_test for CI

* Allow custom  smileWriteParams  in GetMolLayers

* misnamed entry point

* ReactionFromMrvString change name

* remove adding writeParams to GetMolLayers

* make rigorous enhanced stereo the default, and fix tests

* only one abs group no longer needs Rigorous Enhanced treatment

* changed string_view to string in catch test

* Canonicalize Enhnaced Stereo only resturne unique smiles

* Now allows or and and groups together

* internal routines inside detail scope

* fix test error

* changed string back to string_view and fixed a CHECK

* Fixes for PR review tests

* Fix RDKit_Book.rst failure on build test

* fix xqm sql test

* updated expected files for cxsmiles_test

* Fixed removal of atom attrs

* Fixed tests after merge of master

* More efficient version of Stereo Groups Canonicalization

* Fixes for ctests

* removed debug code

* readded cipLabel test

* fix generalizedSubstruct/catch_tests.cpp error

* hueristics to improve speed

* Rationaized control of abs groups

* removed unused routine

* added rigorous stereo group treatment to test

* some suggested changes

* Changes per PR review and removed some changes to smiles

* Fixed CI errors

* changes per PR review

* more PR review vhanges and cleanup

* Fixed PSql PKL change

* changes as per PR review

* Restored error type for bad mols for canonicalizeStereoGroups and added a test

* Merge master and fix test in MolDraw2D

* Fix for randomize test error and other PR review comments

* Removed unsued variable to fix mac CI

* do not force aromatization in canonicalizeStereoGroups

* changes as per PR review

---------

Co-authored-by: greg landrum <greg.landrum@gmail.com>
2024-10-11 17:09:18 +02:00

133 lines
3.8 KiB
C++

//
// Copyright (C) 2016 Novartis Institutes for BioMedical Research
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//
#include <map>
#include "../MolOps.h"
#include "StripSmallFragments.h"
#include "../SmilesParse/SmilesWrite.h"
#include "../FileParsers/MolFileStereochem.h"
// define snprintf for msvc
#if _MSC_VER
#if _MSC_VER < 1900
#define snprintf _snprintf
#endif
#endif
namespace RDKit {
namespace StructureCheck {
void AddMWMF(RWMol &mol,
bool pre) { // set formula & mass properties "MW_PRE" "MW_POST"
double mass = 0.0;
mass = RDKit::MolOps::getExactMolWt(mol);
std::string formula = RDKit::MolOps::getMolFormula(mol);
if (!formula.empty()) mol.setProp((pre ? "MF_PRE" : "MF_POST"), formula);
char propertyValue[64];
snprintf(propertyValue, sizeof(propertyValue), "%g", mass);
mol.setProp((pre ? "MW_PRE" : "MW_POST"), mass);
}
bool StripSmallFragments(RWMol &mol, bool verbose) {
const bool sanitize = false;
std::vector<boost::shared_ptr<ROMol>> frags =
MolOps::getMolFrags(mol, sanitize);
if (frags.size() <= 1) return false;
size_t maxFragSize = 0;
size_t maxFragIdx = 0;
for (size_t i = 0; i < frags.size(); ++i) {
const unsigned int fragSize = frags[i].get()->getNumAtoms();
if (fragSize >= maxFragSize) {
maxFragSize = fragSize;
maxFragIdx = i;
}
}
if (verbose) {
std::string name = "<no name>";
mol.getPropIfPresent(common_properties::_Name, name);
for (size_t i = 0; i < frags.size(); ++i) {
if (i != maxFragIdx) {
BOOST_LOG(rdWarningLog)
<< name << " removed fragment i=" << i << " with "
<< frags[i].get()->getNumAtoms() << " atoms" << std::endl;
}
}
}
// we need to save chirality for checking later
bool checkChiral = false;
if (mol.hasProp(RDKit::common_properties::_MolFileChiralFlag)) {
unsigned int chiralflag =
mol.getProp<unsigned int>(RDKit::common_properties::_MolFileChiralFlag);
frags[maxFragIdx].get()->setProp<unsigned int>(
RDKit::common_properties::_MolFileChiralFlag, chiralflag);
checkChiral = chiralflag != 0;
}
mol = *frags[maxFragIdx].get();
// We need to see if the mol file's chirality possibly came from this
// fragment.
if (checkChiral) {
bool ischiral = false;
RWMol copy(mol);
try {
MolOps::sanitizeMol(copy);
MolOps::clearSingleBondDirFlags(copy);
MolOps::detectBondStereochemistry(copy);
MolOps::assignStereochemistry(copy, true, true, true);
for (ROMol::AtomIterator atIt = copy.beginAtoms();
atIt != copy.endAtoms(); ++atIt) {
if ((*atIt)->hasProp(common_properties::_ChiralityPossible)) {
ischiral = true;
checkChiral = false;
break;
}
}
} catch (...) {
}
// are chiral tags set
if (checkChiral) {
for (ROMol::AtomIterator atIt = mol.beginAtoms(); atIt != mol.endAtoms();
++atIt) {
if ((*atIt)->getChiralTag() == Atom::CHI_TETRAHEDRAL_CW ||
(*atIt)->getChiralTag() == Atom::CHI_TETRAHEDRAL_CCW) {
ischiral = true;
break;
}
}
for (ROMol::BondIterator bondIt = mol.beginBonds();
bondIt != mol.endBonds(); ++bondIt) {
if ((*bondIt)->getBondDir() == Bond::BEGINDASH ||
(*bondIt)->getBondDir() == Bond::BEGINWEDGE) {
ischiral = true;
break;
}
}
}
if (!ischiral) {
mol.setProp<unsigned int>(RDKit::common_properties::_MolFileChiralFlag,
0);
}
}
return true;
}
} // namespace StructureCheck
} // namespace RDKit