diff --git a/Code/GraphMol/SmilesParse/SmilesWrite.cpp b/Code/GraphMol/SmilesParse/SmilesWrite.cpp index 164059aa4..7ed8b4880 100644 --- a/Code/GraphMol/SmilesParse/SmilesWrite.cpp +++ b/Code/GraphMol/SmilesParse/SmilesWrite.cpp @@ -220,8 +220,10 @@ std::string GetAtomSmiles(const Atom *atom, const SmilesWriteParams ¶ms) { } // this was originally only done for the organic subset, // applying it to other atom-types is a fix for Issue 3152751: - // Only accept for atom->getAtomicNum() in [5, 6, 7, 8, 14, 15, 16, 33, 34, 52] - if (!params.doKekule && atom->getIsAromatic() && symb[0] >= 'A' && symb[0] <= 'Z') { + // Only accept for atom->getAtomicNum() in [5, 6, 7, 8, 14, 15, 16, 33, 34, + // 52] + if (!params.doKekule && atom->getIsAromatic() && symb[0] >= 'A' && + symb[0] <= 'Z') { switch (atom->getAtomicNum()) { case 5: case 6: @@ -572,7 +574,12 @@ std::string MolToSmiles(const ROMol &mol, const SmilesWriteParams ¶ms, ROMol *tmol = mols[fragIdx].get(); // update property cache + std::vector atomMapNums(tmol->getNumAtoms(), 0); for (auto atom : tmol->atoms()) { + if (params.ignoreAtomMapNumbers) { + atomMapNums[atom->getIdx()] = atom->getAtomMapNum(); + atom->setAtomMapNum(0); + } atom->updatePropertyCache(false); } @@ -650,6 +657,11 @@ std::string MolToSmiles(const ROMol &mol, const SmilesWriteParams ¶ms, Canon::rankMolAtoms(*tmol, ranks, breakTies, params.doIsomericSmiles, params.doIsomericSmiles); } + if (params.ignoreAtomMapNumbers) { + for (auto atom : tmol->atoms()) { + atom->setAtomMapNum(atomMapNums[atom->getIdx()]); + } + } } else { std::iota(ranks.begin(), ranks.end(), 0); } diff --git a/Code/GraphMol/SmilesParse/SmilesWrite.h b/Code/GraphMol/SmilesParse/SmilesWrite.h index e399dc05e..1b8bcb85e 100644 --- a/Code/GraphMol/SmilesParse/SmilesWrite.h +++ b/Code/GraphMol/SmilesParse/SmilesWrite.h @@ -39,6 +39,8 @@ struct RDKIT_SMILESPARSE_EXPORT SmilesWriteParams { bool includeDativeBonds = true; /**< include the RDKit extension for dative bonds. Otherwise dative bonds will be written as single bonds*/ + bool ignoreAtomMapNumbers = false; /**< If true, ignores any atom map numbers + when canonicalizing the molecule */ }; namespace SmilesWrite { @@ -165,13 +167,16 @@ RDKIT_SMILESPARSE_EXPORT std::string MolToSmiles( atom. \param doRandom : if true, the first atom in the SMILES string will be selected at random and the SMILES string will not be canonical + \param ignoreAtomMapNumbers : if true, ignores any atom map numbers when + canonicalizing the molecule */ inline std::string MolToSmiles(const ROMol &mol, bool doIsomericSmiles = true, bool doKekule = false, int rootedAtAtom = -1, bool canonical = true, bool allBondsExplicit = false, bool allHsExplicit = false, - bool doRandom = false) { + bool doRandom = false, + bool ignoreAtomMapNumbers = false) { SmilesWriteParams ps; ps.doIsomericSmiles = doIsomericSmiles; ps.doKekule = doKekule; @@ -180,6 +185,7 @@ inline std::string MolToSmiles(const ROMol &mol, bool doIsomericSmiles = true, ps.allBondsExplicit = allBondsExplicit; ps.allHsExplicit = allHsExplicit; ps.doRandom = doRandom; + ps.ignoreAtomMapNumbers = ignoreAtomMapNumbers; return MolToSmiles(mol, ps); }; diff --git a/Code/GraphMol/SmilesParse/catch_tests.cpp b/Code/GraphMol/SmilesParse/catch_tests.cpp index dfbb21283..2babb9e8b 100644 --- a/Code/GraphMol/SmilesParse/catch_tests.cpp +++ b/Code/GraphMol/SmilesParse/catch_tests.cpp @@ -2879,4 +2879,17 @@ TEST_CASE("Canonicalization of meso structures") { } } } +} + +TEST_CASE("Ignore atom map numbers") { + SmilesWriteParams params; + auto m1 = "[NH2:1]c1ccccc1"_smiles; + CHECK(MolToSmiles(*m1, params) == "c1ccc([NH2:1])cc1"); + params.ignoreAtomMapNumbers = true; + CHECK(MolToSmiles(*m1, params) == "[NH2:1]c1ccccc1"); + auto m2 = "Nc1ccccc1"_smiles; + m1->getAtomWithIdx(0)->setAtomMapNum(0); + CHECK(MolToSmiles(*m1, params) == MolToSmiles(*m2, params)); + CHECK(MolToSmiles(*m1, true, false, -1, true, false, false, false, true) == + MolToSmiles(*m2, true, false, -1, true, false, false, false, true)); } \ No newline at end of file diff --git a/Code/GraphMol/Wrap/rdmolfiles.cpp b/Code/GraphMol/Wrap/rdmolfiles.cpp index 3fc143b35..376ab9441 100644 --- a/Code/GraphMol/Wrap/rdmolfiles.cpp +++ b/Code/GraphMol/Wrap/rdmolfiles.cpp @@ -1582,7 +1582,11 @@ BOOST_PYTHON_MODULE(rdmolfiles) { "resulting SMILES is not canonical") .def_readwrite( "includeDativeBonds", &RDKit::SmilesWriteParams::includeDativeBonds, - "include the RDKit extension for dative bonds. Otherwise dative bonds will be written as single bonds"); + "include the RDKit extension for dative bonds. Otherwise dative bonds will be written as single bonds") + .def_readwrite( + "ignoreAtomMapNumbers", + &RDKit::SmilesWriteParams::ignoreAtomMapNumbers, + "ignore atom map numbers when canonicalizing the molecule"); python::def("MolToSmiles", (std::string(*)(const ROMol &, @@ -1609,6 +1613,8 @@ BOOST_PYTHON_MODULE(rdmolfiles) { in the output SMILES. Defaults to false.\n\ - doRandom: (optional) if true, randomize the traversal of the molecule graph,\n\ so we can generate random smiles. Defaults to false.\n\ + - ignoreAtomMapNumbers (optional) if true, ignores any atom map numbers when\n\ + canonicalizing the molecule \n\ \n\ RETURNS:\n\ \n\ @@ -1616,12 +1622,13 @@ BOOST_PYTHON_MODULE(rdmolfiles) { \n"; python::def( "MolToSmiles", - (std::string(*)(const ROMol &, bool, bool, int, bool, bool, bool, + (std::string(*)(const ROMol &, bool, bool, int, bool, bool, bool, bool, bool))RDKit::MolToSmiles, (python::arg("mol"), python::arg("isomericSmiles") = true, python::arg("kekuleSmiles") = false, python::arg("rootedAtAtom") = -1, python::arg("canonical") = true, python::arg("allBondsExplicit") = false, - python::arg("allHsExplicit") = false, python::arg("doRandom") = false), + python::arg("allHsExplicit") = false, python::arg("doRandom") = false, + python::arg("ignoreAtomMapNumbers") = false), docString.c_str()); docString = diff --git a/Code/GraphMol/Wrap/rough_test.py b/Code/GraphMol/Wrap/rough_test.py index b478313ad..f85b891d4 100644 --- a/Code/GraphMol/Wrap/rough_test.py +++ b/Code/GraphMol/Wrap/rough_test.py @@ -8213,7 +8213,16 @@ M END centers = Chem.FindMesoCenters(mol, includeIsotopes=False) self.assertEqual(centers, ()) - + def testIgnoreAtomMapNumbers(self): + mol = Chem.MolFromSmiles("[NH2:1]c1ccccc1") + ps = Chem.SmilesWriteParams() + ps.ignoreAtomMapNumbers = True + self.assertEqual(Chem.MolToSmiles(mol, ps), "[NH2:1]c1ccccc1") + self.assertEqual(Chem.MolToSmiles(mol, ignoreAtomMapNumbers=True), + "[NH2:1]c1ccccc1") + self.assertEqual(Chem.MolToSmiles(mol, ignoreAtomMapNumbers=False), + "c1ccc([NH2:1])cc1") + if __name__ == '__main__': if "RDTESTCASE" in os.environ: suite = unittest.TestSuite()