Allow atom map numbers to be ignored when generating canonical SMILES (#7732)

* Add option to ignore atom map numbers when generating canonical SMILES.

* Remove blank line.

* Improve docs.

---------

Co-authored-by: David Cosgrove <david@cozchemix.co.uk>
This commit is contained in:
David Cosgrove
2024-08-23 16:15:31 +01:00
committed by GitHub
parent 4c8b27717a
commit df3403f767
5 changed files with 54 additions and 7 deletions

View File

@@ -220,8 +220,10 @@ std::string GetAtomSmiles(const Atom *atom, const SmilesWriteParams &params) {
}
// this was originally only done for the organic subset,
// applying it to other atom-types is a fix for Issue 3152751:
// Only accept for atom->getAtomicNum() in [5, 6, 7, 8, 14, 15, 16, 33, 34, 52]
if (!params.doKekule && atom->getIsAromatic() && symb[0] >= 'A' && symb[0] <= 'Z') {
// Only accept for atom->getAtomicNum() in [5, 6, 7, 8, 14, 15, 16, 33, 34,
// 52]
if (!params.doKekule && atom->getIsAromatic() && symb[0] >= 'A' &&
symb[0] <= 'Z') {
switch (atom->getAtomicNum()) {
case 5:
case 6:
@@ -572,7 +574,12 @@ std::string MolToSmiles(const ROMol &mol, const SmilesWriteParams &params,
ROMol *tmol = mols[fragIdx].get();
// update property cache
std::vector<int> atomMapNums(tmol->getNumAtoms(), 0);
for (auto atom : tmol->atoms()) {
if (params.ignoreAtomMapNumbers) {
atomMapNums[atom->getIdx()] = atom->getAtomMapNum();
atom->setAtomMapNum(0);
}
atom->updatePropertyCache(false);
}
@@ -650,6 +657,11 @@ std::string MolToSmiles(const ROMol &mol, const SmilesWriteParams &params,
Canon::rankMolAtoms(*tmol, ranks, breakTies, params.doIsomericSmiles,
params.doIsomericSmiles);
}
if (params.ignoreAtomMapNumbers) {
for (auto atom : tmol->atoms()) {
atom->setAtomMapNum(atomMapNums[atom->getIdx()]);
}
}
} else {
std::iota(ranks.begin(), ranks.end(), 0);
}

View File

@@ -39,6 +39,8 @@ struct RDKIT_SMILESPARSE_EXPORT SmilesWriteParams {
bool includeDativeBonds =
true; /**< include the RDKit extension for dative bonds. Otherwise dative
bonds will be written as single bonds*/
bool ignoreAtomMapNumbers = false; /**< If true, ignores any atom map numbers
when canonicalizing the molecule */
};
namespace SmilesWrite {
@@ -165,13 +167,16 @@ RDKIT_SMILESPARSE_EXPORT std::string MolToSmiles(
atom.
\param doRandom : if true, the first atom in the SMILES string will be
selected at random and the SMILES string will not be canonical
\param ignoreAtomMapNumbers : if true, ignores any atom map numbers when
canonicalizing the molecule
*/
inline std::string MolToSmiles(const ROMol &mol, bool doIsomericSmiles = true,
bool doKekule = false, int rootedAtAtom = -1,
bool canonical = true,
bool allBondsExplicit = false,
bool allHsExplicit = false,
bool doRandom = false) {
bool doRandom = false,
bool ignoreAtomMapNumbers = false) {
SmilesWriteParams ps;
ps.doIsomericSmiles = doIsomericSmiles;
ps.doKekule = doKekule;
@@ -180,6 +185,7 @@ inline std::string MolToSmiles(const ROMol &mol, bool doIsomericSmiles = true,
ps.allBondsExplicit = allBondsExplicit;
ps.allHsExplicit = allHsExplicit;
ps.doRandom = doRandom;
ps.ignoreAtomMapNumbers = ignoreAtomMapNumbers;
return MolToSmiles(mol, ps);
};

View File

@@ -2879,4 +2879,17 @@ TEST_CASE("Canonicalization of meso structures") {
}
}
}
}
TEST_CASE("Ignore atom map numbers") {
SmilesWriteParams params;
auto m1 = "[NH2:1]c1ccccc1"_smiles;
CHECK(MolToSmiles(*m1, params) == "c1ccc([NH2:1])cc1");
params.ignoreAtomMapNumbers = true;
CHECK(MolToSmiles(*m1, params) == "[NH2:1]c1ccccc1");
auto m2 = "Nc1ccccc1"_smiles;
m1->getAtomWithIdx(0)->setAtomMapNum(0);
CHECK(MolToSmiles(*m1, params) == MolToSmiles(*m2, params));
CHECK(MolToSmiles(*m1, true, false, -1, true, false, false, false, true) ==
MolToSmiles(*m2, true, false, -1, true, false, false, false, true));
}

View File

@@ -1582,7 +1582,11 @@ BOOST_PYTHON_MODULE(rdmolfiles) {
"resulting SMILES is not canonical")
.def_readwrite(
"includeDativeBonds", &RDKit::SmilesWriteParams::includeDativeBonds,
"include the RDKit extension for dative bonds. Otherwise dative bonds will be written as single bonds");
"include the RDKit extension for dative bonds. Otherwise dative bonds will be written as single bonds")
.def_readwrite(
"ignoreAtomMapNumbers",
&RDKit::SmilesWriteParams::ignoreAtomMapNumbers,
"ignore atom map numbers when canonicalizing the molecule");
python::def("MolToSmiles",
(std::string(*)(const ROMol &,
@@ -1609,6 +1613,8 @@ BOOST_PYTHON_MODULE(rdmolfiles) {
in the output SMILES. Defaults to false.\n\
- doRandom: (optional) if true, randomize the traversal of the molecule graph,\n\
so we can generate random smiles. Defaults to false.\n\
- ignoreAtomMapNumbers (optional) if true, ignores any atom map numbers when\n\
canonicalizing the molecule \n\
\n\
RETURNS:\n\
\n\
@@ -1616,12 +1622,13 @@ BOOST_PYTHON_MODULE(rdmolfiles) {
\n";
python::def(
"MolToSmiles",
(std::string(*)(const ROMol &, bool, bool, int, bool, bool, bool,
(std::string(*)(const ROMol &, bool, bool, int, bool, bool, bool, bool,
bool))RDKit::MolToSmiles,
(python::arg("mol"), python::arg("isomericSmiles") = true,
python::arg("kekuleSmiles") = false, python::arg("rootedAtAtom") = -1,
python::arg("canonical") = true, python::arg("allBondsExplicit") = false,
python::arg("allHsExplicit") = false, python::arg("doRandom") = false),
python::arg("allHsExplicit") = false, python::arg("doRandom") = false,
python::arg("ignoreAtomMapNumbers") = false),
docString.c_str());
docString =

View File

@@ -8213,7 +8213,16 @@ M END
centers = Chem.FindMesoCenters(mol, includeIsotopes=False)
self.assertEqual(centers, ())
def testIgnoreAtomMapNumbers(self):
mol = Chem.MolFromSmiles("[NH2:1]c1ccccc1")
ps = Chem.SmilesWriteParams()
ps.ignoreAtomMapNumbers = True
self.assertEqual(Chem.MolToSmiles(mol, ps), "[NH2:1]c1ccccc1")
self.assertEqual(Chem.MolToSmiles(mol, ignoreAtomMapNumbers=True),
"[NH2:1]c1ccccc1")
self.assertEqual(Chem.MolToSmiles(mol, ignoreAtomMapNumbers=False),
"c1ccc([NH2:1])cc1")
if __name__ == '__main__':
if "RDTESTCASE" in os.environ:
suite = unittest.TestSuite()