diff --git a/Code/GraphMol/RGroupDecomposition/RGroupUtils.cpp b/Code/GraphMol/RGroupDecomposition/RGroupUtils.cpp index 63ffe8359..90a820938 100644 --- a/Code/GraphMol/RGroupDecomposition/RGroupUtils.cpp +++ b/Code/GraphMol/RGroupDecomposition/RGroupUtils.cpp @@ -208,4 +208,44 @@ std::string toJSON(const RGroupColumns &cols, const std::string &prefix) { return res; } +void relabelMappedDummies(ROMol &mol, unsigned int inputLabels, + unsigned int outputLabels) { + for (auto &atom : mol.atoms()) { + if (atom->getAtomicNum()) { + continue; + } + unsigned int atomMapNum = 0; + if (inputLabels & AtomMap) { + atomMapNum = static_cast(abs(atom->getAtomMapNum())); + } + if (!atomMapNum && (inputLabels & Isotope)) { + atomMapNum = atom->getIsotope(); + } + if (!atomMapNum && (inputLabels & MDLRGroup)) { + atom->getPropIfPresent(common_properties::_MolFileRLabel, atomMapNum); + } + if (!atomMapNum) { + continue; + } + auto rLabel = "R" + std::to_string(atomMapNum); + if (outputLabels & AtomMap) { + atom->setAtomMapNum(atomMapNum); + } else { + atom->setAtomMapNum(0); + } + if (outputLabels & Isotope) { + atom->setIsotope(atomMapNum); + } else { + atom->setIsotope(0); + } + if (outputLabels & MDLRGroup) { + atom->setProp(common_properties::_MolFileRLabel, atomMapNum); + atom->setProp(common_properties::dummyLabel, rLabel); + } else { + atom->clearProp(common_properties::_MolFileRLabel); + atom->clearProp(common_properties::dummyLabel); + } + } +} + } // namespace RDKit diff --git a/Code/GraphMol/RGroupDecomposition/RGroupUtils.h b/Code/GraphMol/RGroupDecomposition/RGroupUtils.h index e683e4f1c..99a42f4e9 100644 --- a/Code/GraphMol/RGroupDecomposition/RGroupUtils.h +++ b/Code/GraphMol/RGroupDecomposition/RGroupUtils.h @@ -93,6 +93,20 @@ RDKIT_RGROUPDECOMPOSITION_EXPORT std::string toJSON( RDKIT_RGROUPDECOMPOSITION_EXPORT std::string toJSON( const RGroupColumns &rgr, const std::string &prefix = ""); +//! Relabel dummy atoms bearing an R-group mapping (as +/// atom map number, isotope or MDLRGroup label) such that +/// they will be displayed by the rendering code as R# rather +/// than #*, *:#, #*:#, etc. By default, only the MDLRGroup label +/// is retained on output; this may be configured through the +/// outputLabels parameter. +/// In case there are multiple potential R-group mappings, +/// the priority on input is Atom map number > Isotope > MDLRGroup. +/// The inputLabels parameter allows to configure which mappings +/// are taken into consideration. +RDKIT_RGROUPDECOMPOSITION_EXPORT void relabelMappedDummies( + ROMol &mol, unsigned int inputLabels = AtomMap | Isotope | MDLRGroup, + unsigned int outputLabels = MDLRGroup); + } // namespace RDKit #endif diff --git a/Code/GraphMol/RGroupDecomposition/Wrap/rdRGroupComposition.cpp b/Code/GraphMol/RGroupDecomposition/Wrap/rdRGroupComposition.cpp index 826ab48ca..8459b184c 100644 --- a/Code/GraphMol/RGroupDecomposition/Wrap/rdRGroupComposition.cpp +++ b/Code/GraphMol/RGroupDecomposition/Wrap/rdRGroupComposition.cpp @@ -43,6 +43,7 @@ #include #include #include +#include #include #include @@ -186,7 +187,13 @@ python::object RGroupDecomp(python::object cores, python::object mols, } else { return make_tuple(decomp.GetRGroupsAsColumn(asSmiles), unmatched); } -} // namespace RDKit +} + +void relabelMappedDummiesHelper(ROMol &mol, unsigned int inputLabels, + unsigned int outputLabels) { + relabelMappedDummies(mol, static_cast(inputLabels), + static_cast(outputLabels)); +} struct rgroupdecomp_wrapper { static void wrap() { @@ -405,11 +412,29 @@ struct rgroupdecomp_wrapper { "\n" " unmatched is a vector of indices in the input mols that were not " "matched.\n"; - python::def("RGroupDecompose", RDKit::RGroupDecomp, + python::def("RGroupDecompose", RGroupDecomp, (python::arg("cores"), python::arg("mols"), python::arg("asSmiles") = false, python::arg("asRows") = true, python::arg("options") = RGroupDecompositionParameters()), docString.c_str()); + + docString = + "Relabel dummy atoms bearing an R-group mapping (as\n" + "atom map number, isotope or MDLRGroup label) such that\n" + "they will be displayed by the rendering code as R# rather\n" + "than #*, *:#, #*:#, etc. By default, only the MDLRGroup label\n" + "is retained on output; this may be configured through the\n" + "outputLabels parameter.\n" + "In case there are multiple potential R-group mappings,\n" + "the priority on input is Atom map number > Isotope > MDLRGroup.\n" + "The inputLabels parameter allows to configure which mappings\n" + "are taken into consideration.\n"; + python::def("RelabelMappedDummies", relabelMappedDummiesHelper, + (python::arg("mol"), + python::arg("inputLabels") = static_cast( + AtomMap | Isotope | MDLRGroup), + python::arg("outputLabels") = MDLRGroup), + docString.c_str()); }; }; } // namespace RDKit diff --git a/Code/GraphMol/RGroupDecomposition/Wrap/test_rgroups.py b/Code/GraphMol/RGroupDecomposition/Wrap/test_rgroups.py index f0a636a98..baa24dd1e 100644 --- a/Code/GraphMol/RGroupDecomposition/Wrap/test_rgroups.py +++ b/Code/GraphMol/RGroupDecomposition/Wrap/test_rgroups.py @@ -30,10 +30,6 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # -import copy -import os -import pickle -import sys import unittest from collections import OrderedDict @@ -43,7 +39,9 @@ from rdkit.Chem.rdRGroupDecomposition import (RGroupCoreAlignment, RGroupDecompose, RGroupDecomposition, RGroupDecompositionParameters, - RGroupLabels) + RGroupLabels, + RGroupLabelling, + RelabelMappedDummies) RDLogger.DisableLog("rdApp.warning") @@ -725,6 +723,123 @@ M END self.assertEqual(len(cmol_h.GetSubstructMatch(core)), core.GetNumAtoms()) self.assertFalse(nmol_h.HasSubstructMatch(core)) + def testRelabelMappedDummies(self): + p = Chem.SmilesWriteParams() + p.canonical = False + allDifferentCore = Chem.MolFromMolBlock(""" + RDKit 2D + + 8 8 0 0 0 0 0 0 0 0999 V2000 + 1.0808 -0.8772 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.0827 0.1228 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.2177 0.6246 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.2198 1.6246 0.0000 R# 0 0 0 0 0 15 0 0 0 4 0 0 + -0.6493 0.1262 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.5142 0.6280 0.0000 R# 0 0 0 0 0 15 0 0 0 3 0 0 + -0.6513 -0.8736 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.2137 -1.3754 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + 1 2 2 0 + 2 3 1 0 + 3 4 1 0 + 3 5 2 0 + 5 6 1 0 + 5 7 1 0 + 7 8 2 0 + 8 1 1 0 +M RGP 2 4 2 6 1 +M END +)CTAB +""") + allDifferentCore.RemoveConformer(0) + allDifferentCore.GetAtomWithIdx(3).SetIsotope(6) + allDifferentCore.GetAtomWithIdx(5).SetIsotope(5) + self.assertEqual(Chem.MolToCXSmiles(allDifferentCore, p), "c1cc([6*:4])c([5*:3])cn1 |atomProp:3.dummyLabel.R2:3.molAtomMapNumber.4:5.dummyLabel.R1:5.molAtomMapNumber.3|") + # AtomMap in, MDLRGroup out + core = Chem.MolFromSmiles("c1cc([*:2])c([*:1])cn1") + self.assertEqual(Chem.MolToCXSmiles(core, p), "c1cc([*:2])c([*:1])cn1 |atomProp:3.dummyLabel.*:3.molAtomMapNumber.2:5.dummyLabel.*:5.molAtomMapNumber.1|") + RelabelMappedDummies(core) + self.assertEqual(Chem.MolToCXSmiles(core, p), "c1cc(*)c(*)cn1 |atomProp:3.dummyLabel.R2:5.dummyLabel.R1|") + # Isotope in, MDLRGroup out + core = Chem.MolFromSmiles("c1cc([2*])c([1*])cn1") + self.assertEqual(Chem.MolToCXSmiles(core, p), "c1cc([2*])c([1*])cn1 |atomProp:3.dummyLabel.*:5.dummyLabel.*|") + RelabelMappedDummies(core) + self.assertEqual(Chem.MolToCXSmiles(core, p), "c1cc(*)c(*)cn1 |atomProp:3.dummyLabel.R2:5.dummyLabel.R1|") + # MDLRGroup in, MDLRGroup out + core = Chem.MolFromMolBlock(""" + RDKit 2D + + 8 8 0 0 0 0 0 0 0 0999 V2000 + 1.0808 -0.8772 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.0827 0.1228 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.2177 0.6246 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.2198 1.6246 0.0000 R# 0 0 0 0 0 1 0 0 0 0 0 0 + -0.6493 0.1262 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.5142 0.6280 0.0000 R# 0 0 0 0 0 1 0 0 0 0 0 0 + -0.6513 -0.8736 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.2137 -1.3754 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + 1 2 2 0 + 2 3 1 0 + 3 4 1 0 + 3 5 2 0 + 5 6 1 0 + 5 7 1 0 + 7 8 2 0 + 8 1 1 0 +M RGP 2 4 2 6 1 +M END +)CTAB +""") + core.RemoveConformer(0) + self.assertEqual(Chem.MolToCXSmiles(core, p), "c1cc([2*])c([1*])cn1 |atomProp:3.dummyLabel.R2:5.dummyLabel.R1|") + RelabelMappedDummies(core) + self.assertEqual(Chem.MolToCXSmiles(core, p), "c1cc(*)c(*)cn1 |atomProp:3.dummyLabel.R2:5.dummyLabel.R1|") + # AtomMap and Isotope in, MDLRGroup out - AtomMap has priority + core = Chem.MolFromSmiles("c1cc([4*:2])c([3*:1])cn1") + self.assertEqual(Chem.MolToCXSmiles(core, p), "c1cc([4*:2])c([3*:1])cn1 |atomProp:3.dummyLabel.*:3.molAtomMapNumber.2:5.dummyLabel.*:5.molAtomMapNumber.1|") + RelabelMappedDummies(core) + self.assertEqual(Chem.MolToCXSmiles(core, p), "c1cc(*)c(*)cn1 |atomProp:3.dummyLabel.R2:5.dummyLabel.R1|") + # AtomMap and Isotope in, MDLRGroup out - force Isotope priority + core = Chem.MolFromSmiles("c1cc([4*:2])c([3*:1])cn1") + self.assertEqual(Chem.MolToCXSmiles(core, p), "c1cc([4*:2])c([3*:1])cn1 |atomProp:3.dummyLabel.*:3.molAtomMapNumber.2:5.dummyLabel.*:5.molAtomMapNumber.1|") + RelabelMappedDummies(core, RGroupLabelling.Isotope) + self.assertEqual(Chem.MolToCXSmiles(core, p), "c1cc(*)c(*)cn1 |atomProp:3.dummyLabel.R4:5.dummyLabel.R3|") + # AtomMap, Isotope and MDLRGroup in, MDLRGroup out - AtomMap has priority + core = Chem.Mol(allDifferentCore) + RelabelMappedDummies(core) + self.assertEqual(Chem.MolToCXSmiles(core, p), "c1cc(*)c(*)cn1 |atomProp:3.dummyLabel.R4:5.dummyLabel.R3|") + # AtomMap, Isotope and MDLRGroup in, MDLRGroup out - force Isotope priority + core = Chem.Mol(allDifferentCore) + RelabelMappedDummies(core, RGroupLabelling.Isotope) + self.assertEqual(Chem.MolToCXSmiles(core, p), "c1cc(*)c(*)cn1 |atomProp:3.dummyLabel.R6:5.dummyLabel.R5|") + # AtomMap, Isotope and MDLRGroup in, MDLRGroup out - force MDLRGroup priority + core = Chem.Mol(allDifferentCore) + RelabelMappedDummies(core, RGroupLabelling.MDLRGroup) + self.assertEqual(Chem.MolToCXSmiles(core, p), "c1cc(*)c(*)cn1 |atomProp:3.dummyLabel.R2:5.dummyLabel.R1|") + # AtomMap, Isotope and MDLRGroup in, AtomMap out - AtomMap has priority + core = Chem.Mol(allDifferentCore) + RelabelMappedDummies(core, outputLabels=RGroupLabelling.AtomMap) + self.assertEqual(Chem.MolToCXSmiles(core, p), "c1cc([*:4])c([*:3])cn1 |atomProp:3.molAtomMapNumber.4:5.molAtomMapNumber.3|") + # AtomMap, Isotope and MDLRGroup in, Isotope out - AtomMap has priority + core = Chem.Mol(allDifferentCore) + RelabelMappedDummies(core, outputLabels=RGroupLabelling.Isotope) + self.assertEqual(Chem.MolToCXSmiles(core, p), "c1cc([4*])c([3*])cn1") + # AtomMap, Isotope and MDLRGroup in, AtomMap out - Isotope has priority + core = Chem.Mol(allDifferentCore) + RelabelMappedDummies(core, inputLabels=(RGroupLabelling.Isotope | RGroupLabelling.MDLRGroup), outputLabels=RGroupLabelling.AtomMap) + self.assertEqual(Chem.MolToCXSmiles(core, p), "c1cc([*:6])c([*:5])cn1 |atomProp:3.molAtomMapNumber.6:5.molAtomMapNumber.5|") + # AtomMap, Isotope and MDLRGroup in, Isotope out - Isotope has priority + core = Chem.Mol(allDifferentCore) + RelabelMappedDummies(core, inputLabels=(RGroupLabelling.Isotope | RGroupLabelling.MDLRGroup), outputLabels=RGroupLabelling.Isotope) + self.assertEqual(Chem.MolToCXSmiles(core, p), "c1cc([6*])c([5*])cn1") + # AtomMap, Isotope and MDLRGroup in, AtomMap out - MDLRGroup has priority + core = Chem.Mol(allDifferentCore) + RelabelMappedDummies(core, inputLabels=RGroupLabelling.MDLRGroup, outputLabels=RGroupLabelling.AtomMap) + self.assertEqual(Chem.MolToCXSmiles(core, p), "c1cc([*:2])c([*:1])cn1 |atomProp:3.molAtomMapNumber.2:5.molAtomMapNumber.1|") + # AtomMap, Isotope and MDLRGroup in, Isotope out - MDLRGroup has priority + core = Chem.Mol(allDifferentCore) + RelabelMappedDummies(core, inputLabels=RGroupLabelling.MDLRGroup, outputLabels=RGroupLabelling.Isotope) + self.assertEqual(Chem.MolToCXSmiles(core, p), "c1cc([2*])c([1*])cn1") + if __name__ == '__main__': rdBase.DisableLog("rdApp.debug") diff --git a/Code/GraphMol/RGroupDecomposition/catch_rgd.cpp b/Code/GraphMol/RGroupDecomposition/catch_rgd.cpp index bc1e470f7..e60f6d5bf 100644 --- a/Code/GraphMol/RGroupDecomposition/catch_rgd.cpp +++ b/Code/GraphMol/RGroupDecomposition/catch_rgd.cpp @@ -808,3 +808,166 @@ TEST_CASE("Mol matches core") { match.clear(); CHECK(!SubstructMatch(*nmol, *core, match)); } + +TEST_CASE("relabelMappedDummies") { + SmilesWriteParams p; + p.canonical = false; + auto allDifferentCore = R"CTAB( + RDKit 2D + + 8 8 0 0 0 0 0 0 0 0999 V2000 + 1.0808 -0.8772 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.0827 0.1228 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.2177 0.6246 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.2198 1.6246 0.0000 R# 0 0 0 0 0 15 0 0 0 4 0 0 + -0.6493 0.1262 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.5142 0.6280 0.0000 R# 0 0 0 0 0 15 0 0 0 3 0 0 + -0.6513 -0.8736 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.2137 -1.3754 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + 1 2 2 0 + 2 3 1 0 + 3 4 1 0 + 3 5 2 0 + 5 6 1 0 + 5 7 1 0 + 7 8 2 0 + 8 1 1 0 +M RGP 2 4 2 6 1 +M END +)CTAB"_ctab; + allDifferentCore->removeConformer(0); + allDifferentCore->getAtomWithIdx(3)->setIsotope(6); + allDifferentCore->getAtomWithIdx(5)->setIsotope(5); + CHECK( + MolToCXSmiles(*allDifferentCore, p) == + "c1cc([6*:4])c([5*:3])cn1 |atomProp:3.dummyLabel.R2:3.molAtomMapNumber.4:5.dummyLabel.R1:5.molAtomMapNumber.3|"); + SECTION("AtomMap in, MDLRGroup out") { + auto core = "c1cc([*:2])c([*:1])cn1"_smiles; + CHECK( + MolToCXSmiles(*core, p) == + "c1cc([*:2])c([*:1])cn1 |atomProp:3.dummyLabel.*:3.molAtomMapNumber.2:5.dummyLabel.*:5.molAtomMapNumber.1|"); + relabelMappedDummies(*core); + CHECK(MolToCXSmiles(*core, p) == + "c1cc(*)c(*)cn1 |atomProp:3.dummyLabel.R2:5.dummyLabel.R1|"); + } + SECTION("Isotope in, MDLRGroup out") { + auto core = "c1cc([2*])c([1*])cn1"_smiles; + CHECK(MolToCXSmiles(*core, p) == + "c1cc([2*])c([1*])cn1 |atomProp:3.dummyLabel.*:5.dummyLabel.*|"); + relabelMappedDummies(*core); + CHECK(MolToCXSmiles(*core, p) == + "c1cc(*)c(*)cn1 |atomProp:3.dummyLabel.R2:5.dummyLabel.R1|"); + } + SECTION("MDLRGroup in, MDLRGroup out") { + auto core = R"CTAB( + RDKit 2D + + 8 8 0 0 0 0 0 0 0 0999 V2000 + 1.0808 -0.8772 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.0827 0.1228 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.2177 0.6246 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.2198 1.6246 0.0000 R# 0 0 0 0 0 1 0 0 0 0 0 0 + -0.6493 0.1262 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.5142 0.6280 0.0000 R# 0 0 0 0 0 1 0 0 0 0 0 0 + -0.6513 -0.8736 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.2137 -1.3754 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + 1 2 2 0 + 2 3 1 0 + 3 4 1 0 + 3 5 2 0 + 5 6 1 0 + 5 7 1 0 + 7 8 2 0 + 8 1 1 0 +M RGP 2 4 2 6 1 +M END +)CTAB"_ctab; + core->removeConformer(0); + CHECK(MolToCXSmiles(*core, p) == + "c1cc([2*])c([1*])cn1 |atomProp:3.dummyLabel.R2:5.dummyLabel.R1|"); + relabelMappedDummies(*core); + CHECK(MolToCXSmiles(*core, p) == + "c1cc(*)c(*)cn1 |atomProp:3.dummyLabel.R2:5.dummyLabel.R1|"); + } + SECTION("AtomMap and Isotope in, MDLRGroup out - AtomMap has priority") { + auto core = "c1cc([4*:2])c([3*:1])cn1"_smiles; + CHECK( + MolToCXSmiles(*core, p) == + "c1cc([4*:2])c([3*:1])cn1 |atomProp:3.dummyLabel.*:3.molAtomMapNumber.2:5.dummyLabel.*:5.molAtomMapNumber.1|"); + relabelMappedDummies(*core); + CHECK(MolToCXSmiles(*core, p) == + "c1cc(*)c(*)cn1 |atomProp:3.dummyLabel.R2:5.dummyLabel.R1|"); + } + SECTION("AtomMap and Isotope in, MDLRGroup out - force Isotope priority") { + auto core = "c1cc([4*:2])c([3*:1])cn1"_smiles; + CHECK( + MolToCXSmiles(*core, p) == + "c1cc([4*:2])c([3*:1])cn1 |atomProp:3.dummyLabel.*:3.molAtomMapNumber.2:5.dummyLabel.*:5.molAtomMapNumber.1|"); + relabelMappedDummies(*core, Isotope); + CHECK(MolToCXSmiles(*core, p) == + "c1cc(*)c(*)cn1 |atomProp:3.dummyLabel.R4:5.dummyLabel.R3|"); + } + SECTION( + "AtomMap, Isotope and MDLRGroup in, MDLRGroup out - AtomMap has priority") { + ROMol core(*allDifferentCore); + relabelMappedDummies(core); + CHECK(MolToCXSmiles(core, p) == + "c1cc(*)c(*)cn1 |atomProp:3.dummyLabel.R4:5.dummyLabel.R3|"); + } + SECTION( + "AtomMap, Isotope and MDLRGroup in, MDLRGroup out - force Isotope priority") { + ROMol core(*allDifferentCore); + relabelMappedDummies(core, Isotope); + CHECK(MolToCXSmiles(core, p) == + "c1cc(*)c(*)cn1 |atomProp:3.dummyLabel.R6:5.dummyLabel.R5|"); + } + SECTION( + "AtomMap, Isotope and MDLRGroup in, MDLRGroup out - force MDLRGroup priority") { + ROMol core(*allDifferentCore); + relabelMappedDummies(core, MDLRGroup); + CHECK(MolToCXSmiles(core, p) == + "c1cc(*)c(*)cn1 |atomProp:3.dummyLabel.R2:5.dummyLabel.R1|"); + } + SECTION( + "AtomMap, Isotope and MDLRGroup in, AtomMap out - AtomMap has priority") { + ROMol core(*allDifferentCore); + relabelMappedDummies(core, AtomMap | Isotope | MDLRGroup, AtomMap); + CHECK( + MolToCXSmiles(core, p) == + "c1cc([*:4])c([*:3])cn1 |atomProp:3.molAtomMapNumber.4:5.molAtomMapNumber.3|"); + } + SECTION( + "AtomMap, Isotope and MDLRGroup in, Isotope out - AtomMap has priority") { + ROMol core(*allDifferentCore); + relabelMappedDummies(core, AtomMap | Isotope | MDLRGroup, Isotope); + CHECK(MolToCXSmiles(core, p) == "c1cc([4*])c([3*])cn1"); + } + SECTION( + "AtomMap, Isotope and MDLRGroup in, AtomMap out - Isotope has priority") { + ROMol core(*allDifferentCore); + relabelMappedDummies(core, Isotope | MDLRGroup, AtomMap); + CHECK( + MolToCXSmiles(core, p) == + "c1cc([*:6])c([*:5])cn1 |atomProp:3.molAtomMapNumber.6:5.molAtomMapNumber.5|"); + } + SECTION( + "AtomMap, Isotope and MDLRGroup in, Isotope out - Isotope has priority") { + ROMol core(*allDifferentCore); + relabelMappedDummies(core, Isotope | MDLRGroup, Isotope); + CHECK(MolToCXSmiles(core, p) == "c1cc([6*])c([5*])cn1"); + } + SECTION( + "AtomMap, Isotope and MDLRGroup in, AtomMap out - MDLRGroup has priority") { + ROMol core(*allDifferentCore); + relabelMappedDummies(core, MDLRGroup, AtomMap); + CHECK( + MolToCXSmiles(core, p) == + "c1cc([*:2])c([*:1])cn1 |atomProp:3.molAtomMapNumber.2:5.molAtomMapNumber.1|"); + } + SECTION( + "AtomMap, Isotope and MDLRGroup in, Isotope out - MDLRGroup has priority") { + ROMol core(*allDifferentCore); + relabelMappedDummies(core, MDLRGroup, Isotope); + CHECK(MolToCXSmiles(core, p) == "c1cc([2*])c([1*])cn1"); + } +} diff --git a/Code/MinimalLib/CMakeLists.txt b/Code/MinimalLib/CMakeLists.txt index 2c957f24e..5b26d38df 100644 --- a/Code/MinimalLib/CMakeLists.txt +++ b/Code/MinimalLib/CMakeLists.txt @@ -5,7 +5,8 @@ if(RDK_BUILD_MINIMAL_LIB) set(MINIMAL_LIB_LIBRARIES "MolInterchange_static;Abbreviations_static;" "CIPLabeler_static;MolDraw2D_static;Depictor_static;RDInchiLib_static;" "SubstructMatch_static;FileParsers_static;" - "SmilesParse_static;GraphMol_static;RDGeometryLib_static;RDGeneral_static") + "SmilesParse_static;GraphMol_static;RDGeometryLib_static;" + "RDGeneral_static;RGroupDecomposition_static") if(RDK_BUILD_MINIMAL_LIB_RXN) add_definitions(-DRDK_BUILD_MINIMAL_LIB_RXN) set(MINIMAL_LIB_LIBRARIES "${MINIMAL_LIB_LIBRARIES};ChemReactions_static") @@ -40,7 +41,7 @@ if(RDK_BUILD_CFFI_LIB) MolInterchange Abbreviations CIPLabeler MolDraw2D Depictor RDInchiLib SubstructMatch FileParsers - SmilesParse GraphMol RDGeometryLib RDGeneral ) + SmilesParse GraphMol RDGeometryLib RDGeneral RGroupDecomposition) if(RDK_URF_LIBS) list(APPEND LIBS_TO_USE RingDecomposerLib) endif() diff --git a/Code/MinimalLib/cffi_test.c b/Code/MinimalLib/cffi_test.c index 49970f17d..dfc1cf416 100644 --- a/Code/MinimalLib/cffi_test.c +++ b/Code/MinimalLib/cffi_test.c @@ -2213,6 +2213,28 @@ void test_capture_logs() { } } +void test_relabel_mapped_dummies() { + printf("--------------------------\n"); + printf(" test_relabel_mapped_dummies\n"); + char *mpkl; + size_t mpkl_size; + char *smiles; + mpkl = get_mol("c1cc([4*:2])c([3*:1])cn1", &mpkl_size, ""); + smiles = get_cxsmiles(mpkl, mpkl_size, NULL); + assert(!strcmp( + smiles, + "c1cc([4*:2])c([3*:1])cn1 |atomProp:3.molAtomMapNumber.2:3.dummyLabel.*:5.molAtomMapNumber.1:5.dummyLabel.*|")); + free(smiles); + free(mpkl); + mpkl = get_mol("c1cc([4*:2])c([3*:1])cn1", &mpkl_size, + "{\"mappedDummiesAreRGroups\":true}"); + smiles = get_cxsmiles(mpkl, mpkl_size, NULL); + assert( + !strcmp(smiles, "*c1ccncc1* |atomProp:0.dummyLabel.R2:7.dummyLabel.R1|")); + free(smiles); + free(mpkl); +} + int main() { enable_logging(); char *vers = version(); @@ -2240,5 +2262,6 @@ int main() { test_alignment_r_groups_aromatic_ring(); test_partial_sanitization(); test_capture_logs(); + test_relabel_mapped_dummies(); return 0; } diff --git a/Code/MinimalLib/common.h b/Code/MinimalLib/common.h index 1b9fbbf30..f4940756d 100644 --- a/Code/MinimalLib/common.h +++ b/Code/MinimalLib/common.h @@ -47,6 +47,7 @@ #include #include #include +#include #include #include @@ -104,6 +105,7 @@ RWMol *mol_from_input(const std::string &input, bool setAromaticity = true; bool fastFindRings = true; bool assignStereo = true; + bool mappedDummiesAreRGroups = false; RWMol *res = nullptr; boost::property_tree::ptree pt; if (!details_json.empty()) { @@ -117,6 +119,7 @@ RWMol *mol_from_input(const std::string &input, LPT_OPT_GET(setAromaticity); LPT_OPT_GET(fastFindRings); LPT_OPT_GET(assignStereo); + LPT_OPT_GET(mappedDummiesAreRGroups); } try { if (input.find("M END") != std::string::npos) { @@ -170,6 +173,9 @@ RWMol *mol_from_input(const std::string &input, if (mergeQueryHs) { MolOps::mergeQueryHs(*res); } + if (mappedDummiesAreRGroups) { + relabelMappedDummies(*res); + } } catch (...) { delete res; res = nullptr; diff --git a/Code/MinimalLib/tests/tests.js b/Code/MinimalLib/tests/tests.js index f97e9b365..715212545 100644 --- a/Code/MinimalLib/tests/tests.js +++ b/Code/MinimalLib/tests/tests.js @@ -2690,6 +2690,15 @@ M END fentanylScaffold.delete(); } +function test_relabel_mapped_dummies() { + var core = RDKitModule.get_mol("c1cc([4*:2])c([3*:1])cn1"); + assert.equal(core.get_cxsmiles(), "c1cc([4*:2])c([3*:1])cn1 |atomProp:3.dummyLabel.*:3.molAtomMapNumber.2:5.dummyLabel.*:5.molAtomMapNumber.1|"); + core.delete(); + core = RDKitModule.get_mol("c1cc([4*:2])c([3*:1])cn1", JSON.stringify({mappedDummiesAreRGroups: true})); + assert.equal(core.get_cxsmiles(), "*c1ccncc1* |atomProp:0.dummyLabel.R2:7.dummyLabel.R1|"); + core.delete(); +} + initRDKitModule().then(function(instance) { var done = {}; const waitAllTestsFinished = () => { @@ -2759,6 +2768,7 @@ initRDKitModule().then(function(instance) { test_capture_logs(); test_rgroup_match_heavy_hydro_none_charged(); test_get_sss_json(); + test_relabel_mapped_dummies(); waitAllTestsFinished().then(() => console.log("Tests finished successfully") );