mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-03 21:44:30 +08:00
Allow atom map numbers to be ignored when generating canonical SMILES (#7732)
* Add option to ignore atom map numbers when generating canonical SMILES. * Remove blank line. * Improve docs. --------- Co-authored-by: David Cosgrove <david@cozchemix.co.uk>
This commit is contained in:
@@ -220,8 +220,10 @@ std::string GetAtomSmiles(const Atom *atom, const SmilesWriteParams ¶ms) {
|
||||
}
|
||||
// this was originally only done for the organic subset,
|
||||
// applying it to other atom-types is a fix for Issue 3152751:
|
||||
// Only accept for atom->getAtomicNum() in [5, 6, 7, 8, 14, 15, 16, 33, 34, 52]
|
||||
if (!params.doKekule && atom->getIsAromatic() && symb[0] >= 'A' && symb[0] <= 'Z') {
|
||||
// Only accept for atom->getAtomicNum() in [5, 6, 7, 8, 14, 15, 16, 33, 34,
|
||||
// 52]
|
||||
if (!params.doKekule && atom->getIsAromatic() && symb[0] >= 'A' &&
|
||||
symb[0] <= 'Z') {
|
||||
switch (atom->getAtomicNum()) {
|
||||
case 5:
|
||||
case 6:
|
||||
@@ -572,7 +574,12 @@ std::string MolToSmiles(const ROMol &mol, const SmilesWriteParams ¶ms,
|
||||
ROMol *tmol = mols[fragIdx].get();
|
||||
|
||||
// update property cache
|
||||
std::vector<int> atomMapNums(tmol->getNumAtoms(), 0);
|
||||
for (auto atom : tmol->atoms()) {
|
||||
if (params.ignoreAtomMapNumbers) {
|
||||
atomMapNums[atom->getIdx()] = atom->getAtomMapNum();
|
||||
atom->setAtomMapNum(0);
|
||||
}
|
||||
atom->updatePropertyCache(false);
|
||||
}
|
||||
|
||||
@@ -650,6 +657,11 @@ std::string MolToSmiles(const ROMol &mol, const SmilesWriteParams ¶ms,
|
||||
Canon::rankMolAtoms(*tmol, ranks, breakTies, params.doIsomericSmiles,
|
||||
params.doIsomericSmiles);
|
||||
}
|
||||
if (params.ignoreAtomMapNumbers) {
|
||||
for (auto atom : tmol->atoms()) {
|
||||
atom->setAtomMapNum(atomMapNums[atom->getIdx()]);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
std::iota(ranks.begin(), ranks.end(), 0);
|
||||
}
|
||||
|
||||
@@ -39,6 +39,8 @@ struct RDKIT_SMILESPARSE_EXPORT SmilesWriteParams {
|
||||
bool includeDativeBonds =
|
||||
true; /**< include the RDKit extension for dative bonds. Otherwise dative
|
||||
bonds will be written as single bonds*/
|
||||
bool ignoreAtomMapNumbers = false; /**< If true, ignores any atom map numbers
|
||||
when canonicalizing the molecule */
|
||||
};
|
||||
|
||||
namespace SmilesWrite {
|
||||
@@ -165,13 +167,16 @@ RDKIT_SMILESPARSE_EXPORT std::string MolToSmiles(
|
||||
atom.
|
||||
\param doRandom : if true, the first atom in the SMILES string will be
|
||||
selected at random and the SMILES string will not be canonical
|
||||
\param ignoreAtomMapNumbers : if true, ignores any atom map numbers when
|
||||
canonicalizing the molecule
|
||||
*/
|
||||
inline std::string MolToSmiles(const ROMol &mol, bool doIsomericSmiles = true,
|
||||
bool doKekule = false, int rootedAtAtom = -1,
|
||||
bool canonical = true,
|
||||
bool allBondsExplicit = false,
|
||||
bool allHsExplicit = false,
|
||||
bool doRandom = false) {
|
||||
bool doRandom = false,
|
||||
bool ignoreAtomMapNumbers = false) {
|
||||
SmilesWriteParams ps;
|
||||
ps.doIsomericSmiles = doIsomericSmiles;
|
||||
ps.doKekule = doKekule;
|
||||
@@ -180,6 +185,7 @@ inline std::string MolToSmiles(const ROMol &mol, bool doIsomericSmiles = true,
|
||||
ps.allBondsExplicit = allBondsExplicit;
|
||||
ps.allHsExplicit = allHsExplicit;
|
||||
ps.doRandom = doRandom;
|
||||
ps.ignoreAtomMapNumbers = ignoreAtomMapNumbers;
|
||||
return MolToSmiles(mol, ps);
|
||||
};
|
||||
|
||||
|
||||
@@ -2879,4 +2879,17 @@ TEST_CASE("Canonicalization of meso structures") {
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("Ignore atom map numbers") {
|
||||
SmilesWriteParams params;
|
||||
auto m1 = "[NH2:1]c1ccccc1"_smiles;
|
||||
CHECK(MolToSmiles(*m1, params) == "c1ccc([NH2:1])cc1");
|
||||
params.ignoreAtomMapNumbers = true;
|
||||
CHECK(MolToSmiles(*m1, params) == "[NH2:1]c1ccccc1");
|
||||
auto m2 = "Nc1ccccc1"_smiles;
|
||||
m1->getAtomWithIdx(0)->setAtomMapNum(0);
|
||||
CHECK(MolToSmiles(*m1, params) == MolToSmiles(*m2, params));
|
||||
CHECK(MolToSmiles(*m1, true, false, -1, true, false, false, false, true) ==
|
||||
MolToSmiles(*m2, true, false, -1, true, false, false, false, true));
|
||||
}
|
||||
@@ -1582,7 +1582,11 @@ BOOST_PYTHON_MODULE(rdmolfiles) {
|
||||
"resulting SMILES is not canonical")
|
||||
.def_readwrite(
|
||||
"includeDativeBonds", &RDKit::SmilesWriteParams::includeDativeBonds,
|
||||
"include the RDKit extension for dative bonds. Otherwise dative bonds will be written as single bonds");
|
||||
"include the RDKit extension for dative bonds. Otherwise dative bonds will be written as single bonds")
|
||||
.def_readwrite(
|
||||
"ignoreAtomMapNumbers",
|
||||
&RDKit::SmilesWriteParams::ignoreAtomMapNumbers,
|
||||
"ignore atom map numbers when canonicalizing the molecule");
|
||||
|
||||
python::def("MolToSmiles",
|
||||
(std::string(*)(const ROMol &,
|
||||
@@ -1609,6 +1613,8 @@ BOOST_PYTHON_MODULE(rdmolfiles) {
|
||||
in the output SMILES. Defaults to false.\n\
|
||||
- doRandom: (optional) if true, randomize the traversal of the molecule graph,\n\
|
||||
so we can generate random smiles. Defaults to false.\n\
|
||||
- ignoreAtomMapNumbers (optional) if true, ignores any atom map numbers when\n\
|
||||
canonicalizing the molecule \n\
|
||||
\n\
|
||||
RETURNS:\n\
|
||||
\n\
|
||||
@@ -1616,12 +1622,13 @@ BOOST_PYTHON_MODULE(rdmolfiles) {
|
||||
\n";
|
||||
python::def(
|
||||
"MolToSmiles",
|
||||
(std::string(*)(const ROMol &, bool, bool, int, bool, bool, bool,
|
||||
(std::string(*)(const ROMol &, bool, bool, int, bool, bool, bool, bool,
|
||||
bool))RDKit::MolToSmiles,
|
||||
(python::arg("mol"), python::arg("isomericSmiles") = true,
|
||||
python::arg("kekuleSmiles") = false, python::arg("rootedAtAtom") = -1,
|
||||
python::arg("canonical") = true, python::arg("allBondsExplicit") = false,
|
||||
python::arg("allHsExplicit") = false, python::arg("doRandom") = false),
|
||||
python::arg("allHsExplicit") = false, python::arg("doRandom") = false,
|
||||
python::arg("ignoreAtomMapNumbers") = false),
|
||||
docString.c_str());
|
||||
|
||||
docString =
|
||||
|
||||
@@ -8213,7 +8213,16 @@ M END
|
||||
centers = Chem.FindMesoCenters(mol, includeIsotopes=False)
|
||||
self.assertEqual(centers, ())
|
||||
|
||||
|
||||
def testIgnoreAtomMapNumbers(self):
|
||||
mol = Chem.MolFromSmiles("[NH2:1]c1ccccc1")
|
||||
ps = Chem.SmilesWriteParams()
|
||||
ps.ignoreAtomMapNumbers = True
|
||||
self.assertEqual(Chem.MolToSmiles(mol, ps), "[NH2:1]c1ccccc1")
|
||||
self.assertEqual(Chem.MolToSmiles(mol, ignoreAtomMapNumbers=True),
|
||||
"[NH2:1]c1ccccc1")
|
||||
self.assertEqual(Chem.MolToSmiles(mol, ignoreAtomMapNumbers=False),
|
||||
"c1ccc([NH2:1])cc1")
|
||||
|
||||
if __name__ == '__main__':
|
||||
if "RDTESTCASE" in os.environ:
|
||||
suite = unittest.TestSuite()
|
||||
|
||||
Reference in New Issue
Block a user