mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-03 21:44:30 +08:00
* Code/GraphMol/Depictor/RDDepictor.h - fixed typo in docstring Code/GraphMol/RGroupDecomposition/RGroupCore.cpp - added a missing const; formatting changes Code/GraphMol/RGroupDecomposition/RGroupData.cpp, Code/GraphMol/RGroupDecomposition/RGroupData.h - moved the code which merges disconnected R groups sharing the same attachment point into a single combined molecule to a private method, RGroupData::mergeIntoCombinedMol(). The method also includes logic to merge atom and bond highlights, if present. - modernized a for loop - isMolHydrogen is now a static function since it does not actually require any instance data - implemented three static function to return the R group, Core and Mol labels, respectively Code/GraphMol/RGroupDecomposition/RGroupDecomp.cpp, Code/GraphMol/RGroupDecomposition/RGroupDecomp.h - implemented two private methods, RGroupDecomposition::labelAtomBondIndices() and RGroupDecomposition::setTargetAtomBondIndices(). The first method tags all atoms and bonds in the target molecule such that they can be tracked following core removal by RDKit::replaceCore(). The second method sets common_properties::_rgroupTargetAtoms and common_properties::_rgroupTargetBonds properties on core and R groups. These are vectors of atom and bond indices in the target molecule corresponding to core and R group atom/bonds, respectively, and can be used for color-coding the target molecule according to the R group decomposition it was subjected to, similarly to https://greglandrum.github.io/rdkit-blog/posts/2021-08-07-rgd-and-highlighting.html Code/GraphMol/RGroupDecomposition/RGroupDecompData.cpp - formatting changes and for loop modernization Code/GraphMol/RGroupDecomposition/RGroupDecompParams.cpp, Code/GraphMol/RGroupDecomposition/RGroupDecompParams.h - implemented updateRGroupDecompositionParametersFromJSON() - added includeTargetMolInResults boolean parameter Code/GraphMol/RGroupDecomposition/RGroupMatch.h - implemented RGroupMatch::setTargetMoleculeForHighlights() and RGroupMatch::getTargetMoleculeForHighlights() methods to, respectively set and get the target molecule where R group decomposition can be color-coded with highlights. This molecule includes the explicit H atoms corresponding to extracted R groups, if any. Code/GraphMol/RGroupDecomposition/Wrap/rdRGroupComposition.cpp - use a std::unique_ptr to store the pointer to the C++ RGroupDecomposition instance - fixed typos in docstrings Code/GraphMol/RGroupDecomposition/Wrap/test_rgroups.py - added test for the new includeTargetMolInResults parameter Code/GraphMol/RGroupDecomposition/catch_rgd.cpp - added test for the new includeTargetMolInResults parameter Code/GraphMol/RGroupDecomposition/testRGroupDecomp.cpp - formatting changes Code/GraphMol/RGroupDecomposition/testRGroupInternals.cpp - do not use deprecated constant Code/MinimalLib/CMakeLists.txt - added RDK_BUILD_MINIMAL_LIB_RGROUPDECOMP CMake flag to optionally expose R group decomposition functionality into MinimalLib Code/MinimalLib/common.h - added makeDummiesQueries flag to mol_from_input() (defaults to false) - implemented parse_highlight_multi_colors() function to parse multi-color atom and bond highlights - enable multi-color atom and bond highlighting Code/MinimalLib/demo/rgd_demo.html - added HTML page showcasing the multi-color highlights similarly to https://greglandrum.github.io/rdkit-blog/posts/2021-08-07-rgd-and-highlighting.html Code/MinimalLib/jswrapper.cpp - removed checks for non-nullness of d_mol as d_mol cannot be directly accessed anymore - replaced all instances of d_mol with get() - implemented support for multi-color atom and bond highlights - implemented optional support for R group decomposition - added JSMol::copy() convenience method with same functionality as get_mol_copy() to duplicate a molecule Code/MinimalLib/minilib.cpp, Code/MinimalLib/minilib.h - replaced all occurrences of d_mol with get(), as d_mol is now private - removed all occurrences of assert(d_mol) as non-nullness is checked at construction time and whenever get() is called - JSMol is now split into two subbclasses, JSMolUnique and JSMolShared, which both inherit from the JSMol base class. JSMolUnique can be constructed from a RWMol* (as the old JSMol), while JSMolShared can be constructed from a ROMOL_SPTR. This avoids unnecessary copies when wrapping a ROMOL_SPTR (e.g., from subtructure library, JSMolList or R group decomposition) into a JSMol to pass it to JS. This also avoids that modifications done in the JS layer on a molecule stored in a MolList (e.g., adding a property) are not persisted because they are carried out on a volatile copy of the molecule rather than on the actual molecule. Code/MinimalLib/tests/tests.js - added a test for pesistence of modifications made to JSSharedMol - added tests for RGD - added test for JSMol::copy() Code/RDGeneral/RDValue.h - removed trailing comma from vector properties such that they can be deserialized as syntactically correct JSON Code/RDGeneral/types.cpp, Code/RDGeneral/types.h - added _rgroupTargetAtoms and _rgroupTargetBonds common_properties * Code/GraphMol/Depictor/RDDepictor.h - fixed typo in docstring Code/GraphMol/RGroupDecomposition/RGroupCore.cpp - added a missing const; formatting changes Code/GraphMol/RGroupDecomposition/RGroupData.cpp, Code/GraphMol/RGroupDecomposition/RGroupData.h - moved the code which merges disconnected R groups sharing the same attachment point into a single combined molecule to a private method, RGroupData::mergeIntoCombinedMol(). The method also includes logic to merge atom and bond highlights, if present. - modernized a for loop - isMolHydrogen is now a static function since it does not actually require any instance data - implemented three static function to return the R group, Core and Mol labels, respectively Code/GraphMol/RGroupDecomposition/RGroupDecomp.cpp, Code/GraphMol/RGroupDecomposition/RGroupDecomp.h - implemented two private methods, RGroupDecomposition::labelAtomBondIndices() and RGroupDecomposition::setTargetAtomBondIndices(). The first method tags all atoms and bonds in the target molecule such that they can be tracked following core removal by RDKit::replaceCore(). The second method sets common_properties::_rgroupTargetAtoms and common_properties::_rgroupTargetBonds properties on core and R groups. These are vectors of atom and bond indices in the target molecule corresponding to core and R group atom/bonds, respectively, and can be used for color-coding the target molecule according to the R group decomposition it was subjected to, similarly to https://greglandrum.github.io/rdkit-blog/posts/2021-08-07-rgd-and-highlighting.html Code/GraphMol/RGroupDecomposition/RGroupDecompData.cpp - formatting changes and for loop modernization Code/GraphMol/RGroupDecomposition/RGroupDecompParams.cpp, Code/GraphMol/RGroupDecomposition/RGroupDecompParams.h - implemented updateRGroupDecompositionParametersFromJSON() - added includeTargetMolInResults boolean parameter Code/GraphMol/RGroupDecomposition/RGroupMatch.h - implemented RGroupMatch::setTargetMoleculeForHighlights() and RGroupMatch::getTargetMoleculeForHighlights() methods to, respectively set and get the target molecule where R group decomposition can be color-coded with highlights. This molecule includes the explicit H atoms corresponding to extracted R groups, if any. Code/GraphMol/RGroupDecomposition/Wrap/rdRGroupComposition.cpp - use a std::unique_ptr to store the pointer to the C++ RGroupDecomposition instance - fixed typos in docstrings Code/GraphMol/RGroupDecomposition/Wrap/test_rgroups.py - added test for the new includeTargetMolInResults parameter Code/GraphMol/RGroupDecomposition/catch_rgd.cpp - added test for the new includeTargetMolInResults parameter Code/GraphMol/RGroupDecomposition/testRGroupDecomp.cpp - formatting changes Code/GraphMol/RGroupDecomposition/testRGroupInternals.cpp - do not use deprecated constant Code/MinimalLib/CMakeLists.txt - added RDK_BUILD_MINIMAL_LIB_RGROUPDECOMP CMake flag to optionally expose R group decomposition functionality into MinimalLib Code/MinimalLib/common.h - added makeDummiesQueries flag to mol_from_input() (defaults to false) - implemented parse_highlight_multi_colors() function to parse multi-color atom and bond highlights - enable multi-color atom and bond highlighting Code/MinimalLib/demo/rgd_demo.html - added HTML page showcasing the multi-color highlights similarly to https://greglandrum.github.io/rdkit-blog/posts/2021-08-07-rgd-and-highlighting.html Code/MinimalLib/jswrapper.cpp - removed checks for non-nullness of d_mol as d_mol cannot be directly accessed anymore - replaced all instances of d_mol with get() - implemented support for multi-color atom and bond highlights - implemented optional support for R group decomposition - JSMol is now split into two subbclasses, JSMol and JSMolShared, which both inherit from the JSMolBase class. While JSMol can be constructed from a RWMol* as usual, JSMolShared can be constructed from a ROMOL_SPTR. This avoids unnecessary copies when wrapping a ROMOL_SPTR (e.g., from subtructure library, JSMolList or R group decomposition) into a JSMol to pass it to JS. This also avoids that modifications done in the JS layer on a molecule stored in a MolList (e.g., adding a property) are not persisted because they are carried out on a volatile copy of the molecule rather than on the actual molecule. - added JSMolBase::copy() convenience method with same functionality as get_mol_copy() to duplicate a molecule Code/MinimalLib/minilib.cpp, Code/MinimalLib/minilib.h - replaced all occurrences of d_mol with get(), as d_mol is now private - removed all occurrences of assert(d_mol) as non-nullness is checked at construction time and whenever get() is called Code/MinimalLib/tests/tests.js - added a test for pesistence of modifications made to JSMolShared - added tests for RGD - added test for JSMolBase::copy() Code/RDGeneral/RDValue.h - removed trailing comma from vector properties such that they can be deserialized as syntactically correct JSON Code/RDGeneral/types.cpp, Code/RDGeneral/types.h - added _rgroupTargetAtoms and _rgroupTargetBonds common_properties * added assignChiralTypesFromMolParity flag * added test for makeDummiesQueries * added CFFI tests * reordered tests * re-added piece of code that had gone accidentally lost while merging conflicts * Removed CHECK_INVARIANT following code review --------- Co-authored-by: ptosco <paolo.tosco@novartis.com>
162 lines
5.1 KiB
C++
162 lines
5.1 KiB
C++
//
|
|
// Copyright (c) 2017-2023, Novartis Institutes for BioMedical Research Inc.
|
|
// and other RDKit contributors
|
|
//
|
|
// @@ All Rights Reserved @@
|
|
// This file is part of the RDKit.
|
|
// The contents are covered by the terms of the BSD license
|
|
// which is included in the file license.txt, found at the root
|
|
// of the RDKit source tree.
|
|
//
|
|
|
|
#include "RGroupData.h"
|
|
#include <GraphMol/SmilesParse/SmilesWrite.h>
|
|
#include <GraphMol/Substruct/SubstructMatch.h>
|
|
#include <GraphMol/ChemTransforms/ChemTransforms.h>
|
|
#include <regex>
|
|
|
|
namespace RDKit {
|
|
|
|
void RGroupData::mergeIntoCombinedMol(const ROMOL_SPTR &mol) {
|
|
CHECK_INVARIANT(mol, "mol must not be null");
|
|
if (!combinedMol) {
|
|
combinedMol = RWMOL_SPTR(new RWMol(*mol));
|
|
} else {
|
|
combinedMol.reset(static_cast<RWMol *>(combineMols(*combinedMol, *mol)));
|
|
single_fragment = false;
|
|
}
|
|
smiles = getSmiles();
|
|
combinedMol->setProp(common_properties::internalRgroupSmiles, smiles);
|
|
std::vector<int> incomingAtomIndices;
|
|
std::vector<int> incomingBondIndices;
|
|
mol->getPropIfPresent(common_properties::_rgroupTargetAtoms,
|
|
incomingAtomIndices);
|
|
mol->getPropIfPresent(common_properties::_rgroupTargetBonds,
|
|
incomingBondIndices);
|
|
std::vector<int> existingAtomIndices;
|
|
std::vector<int> existingBondIndices;
|
|
combinedMol->getPropIfPresent(common_properties::_rgroupTargetAtoms,
|
|
existingAtomIndices);
|
|
combinedMol->getPropIfPresent(common_properties::_rgroupTargetBonds,
|
|
existingBondIndices);
|
|
if (!incomingAtomIndices.empty()) {
|
|
existingAtomIndices.insert(
|
|
existingAtomIndices.end(),
|
|
std::make_move_iterator(incomingAtomIndices.begin()),
|
|
std::make_move_iterator(incomingAtomIndices.end()));
|
|
}
|
|
if (!incomingBondIndices.empty()) {
|
|
existingBondIndices.insert(
|
|
existingBondIndices.end(),
|
|
std::make_move_iterator(existingBondIndices.begin()),
|
|
std::make_move_iterator(existingBondIndices.end()));
|
|
}
|
|
}
|
|
|
|
std::string RGroupData::getRGroupLabel(int rlabel) {
|
|
static const std::string RPREFIX = "R";
|
|
return RPREFIX + std::to_string(rlabel);
|
|
}
|
|
|
|
const std::string &RGroupData::getCoreLabel() {
|
|
static const std::string CORE = "Core";
|
|
return CORE;
|
|
}
|
|
|
|
const std::string &RGroupData::getMolLabel() {
|
|
static const std::string MOL = "Mol";
|
|
return MOL;
|
|
}
|
|
|
|
void RGroupData::add(const ROMOL_SPTR &newMol,
|
|
const std::vector<int> &rlabel_attachments) {
|
|
// some fragments can be added multiple times if they are cyclic
|
|
if (std::any_of(mols.begin(), mols.end(),
|
|
[&newMol](const auto &mol) { return newMol == mol; })) {
|
|
return;
|
|
}
|
|
|
|
if (!mols.empty()) {
|
|
// don't add extraneous hydrogens
|
|
if (isMolHydrogen(*newMol)) {
|
|
return;
|
|
}
|
|
if (is_hydrogen) {
|
|
// if we are adding a heavy attachment to hydrogens, discard the
|
|
// hydrogen and start over
|
|
combinedMol = nullptr;
|
|
smilesVect.clear();
|
|
attachments.clear();
|
|
mols.clear();
|
|
}
|
|
}
|
|
|
|
labelled = false;
|
|
std::copy(rlabel_attachments.begin(), rlabel_attachments.end(),
|
|
std::inserter(attachments, attachments.end()));
|
|
|
|
mols.push_back(newMol);
|
|
static const std::regex remove_isotopes_regex("\\[\\d*\\*\\]");
|
|
// remove the isotope labels from the SMILES string to avoid
|
|
// that identical R-group are perceived as different when
|
|
// MCS alignment is not used (NoAlign flag)
|
|
smilesVect.push_back(std::regex_replace(MolToSmiles(*newMol, true),
|
|
remove_isotopes_regex, "*"));
|
|
mergeIntoCombinedMol(newMol);
|
|
computeIsHydrogen();
|
|
is_linker = single_fragment && attachments.size() > 1;
|
|
}
|
|
|
|
std::map<int, int> RGroupData::getNumBondsToRlabels() const {
|
|
std::map<int, int> rlabelsUsedCount;
|
|
|
|
for (const auto atom : combinedMol->atoms()) {
|
|
int rlabel;
|
|
if (atom->getPropIfPresent<int>(RLABEL, rlabel)) {
|
|
++rlabelsUsedCount[rlabel];
|
|
}
|
|
}
|
|
return rlabelsUsedCount;
|
|
}
|
|
|
|
std::string RGroupData::toString() const {
|
|
auto attachmentString = std::accumulate(
|
|
attachments.cbegin(), attachments.cend(), std::string(),
|
|
[](std::string s, int a) {
|
|
return s.empty() ? std::to_string(a)
|
|
: std::move(s) + ',' + std::to_string(a);
|
|
});
|
|
std::stringstream ss;
|
|
ss << "RG " << attachmentString << " " << getSmiles();
|
|
return ss.str();
|
|
}
|
|
|
|
void RGroupData::computeIsHydrogen() { // is the rgroup all Hs
|
|
is_hydrogen = std::all_of(mols.begin(), mols.end(), [](const auto &mol) {
|
|
return RGroupData::isMolHydrogen(*mol);
|
|
});
|
|
}
|
|
|
|
bool RGroupData::isMolHydrogen(const ROMol &mol) {
|
|
auto atoms = mol.atoms();
|
|
return std::all_of(atoms.begin(), atoms.end(), [](const auto &atom) {
|
|
return (atom->getAtomicNum() == 1 ||
|
|
(atom->getAtomicNum() == 0 && atom->hasProp(SIDECHAIN_RLABELS)));
|
|
});
|
|
}
|
|
|
|
//! compute the canonical smiles for the attachments (bug: removes dupes since
|
|
//! we are using a set...)
|
|
std::string RGroupData::getSmiles() const {
|
|
std::string s;
|
|
for (const auto &it : smilesVect) {
|
|
if (s.length()) {
|
|
s += ".";
|
|
}
|
|
s += it;
|
|
}
|
|
return s;
|
|
}
|
|
|
|
} // namespace RDKit
|