Shrink the tautomeric zone for the v2 hash of things like imines (#7502)

This commit is contained in:
Greg Landrum
2024-06-07 05:09:57 +02:00
committed by GitHub
parent 8c27805fa3
commit 93e8a746bf
3 changed files with 32 additions and 5 deletions

View File

@@ -414,7 +414,7 @@ TEST_CASE("tautomer v2") {
{{"CC(=O)CC(=O)C", "C=C(O)CC(=O)C", "CC(=O)C=C(O)C",
"C=C(O)C=C(O)C", "C=C(O)CC(O)=C"},
{}},
{{"CN=CF", "C=NCF"}, {"CNCF"}},
{{"CN=CCF", "CNC=CF"}, {"C=NCCF", "CNCCF"}},
{{"CN=C(C)F", "CNC(=C)F"}, {"C=NC(C)F"}},
{{"Cc1n[nH]cc1", "Cc1[nH][n]cc1", "CC1=NN=CC1", "CC1N=NCC=1"}, {}},
{{"O=C1C=CC(=O)C=C1"}, {"Oc1ccc(O)cc1", "O=C1C=CC(O)C=C1"}},
@@ -501,7 +501,7 @@ TEST_CASE("tautomer v2") {
"[CH3]-[C](-[CH3])(-[CH3])-[CH]=[O]_0_0"},
{"CC(C)=CO", "C[C](C)[CH][O]_1_0", "[CH3]-[C](-[CH3]):[C]:[O]_2_0"},
{"COC=O", "CO[CH][O]_0_0", "[CH3]-[O]:[C]:[O]_1_0"},
{"CNC=O", "C[N][CH][O]_1_0", "[C]:[N]:[C]:[O]_5_0"},
{"CNC=O", "C[N][CH][O]_1_0", "[CH3]-[N]:[C]:[O]_2_0"},
{"CN(C)C=O", "CN(C)[CH][O]_0_0", "[CH3]-[N](-[CH3]):[C]:[O]_1_0"},
{"CC(C)(C)NC=O", "CC(C)(C)[N][CH][O]_1_0",
"[CH3]-[C](-[CH3])(-[CH3])-[N]:[C]:[O]_2_0"},
@@ -511,7 +511,7 @@ TEST_CASE("tautomer v2") {
{"CC=CC(=O)C", "C[CH][CH][C](C)[O]_0_0",
"[C]:[C](:[O]):[C]:[C]-[CH3]_5_0"},
{"N1C=CCC(F)C1", "FC1C[CH][CH][N]C1_1_0",
"[F]-[CH]1-[C]:[N]:[C]:[C]-[CH2]-1_5_0"},
"[F]-[CH]1-[CH2]-[C]:[C]:[N]-[CH2]-1_3_0"},
{"CCC=C(O)C", "CC[CH][C](C)[O]_1_0",
"[C]:[C](:[O]):[C]-[CH2]-[CH3]_5_0"},
{"CCCC(=O)C", "CCC[C](C)[O]_0_0", "[C]:[C](:[O]):[C]-[CH2]-[CH3]_5_0"},
@@ -860,3 +860,18 @@ TEST_CASE("HetAtomProtomerv2") {
}
}
}
TEST_CASE("overreach with v2 tautomer hashes and imines") {
SECTION("basics") {
std::vector<std::string> smileses = {"C[C@H](F)NC1=CCCCC1",
"C[C@H](F)N=C1CCCCC1"};
for (const auto &smiles : smileses) {
auto m = v2::SmilesParse::MolFromSmiles(smiles);
REQUIRE(m);
auto hsh =
MolHash::MolHash(m.get(), MolHash::HashFunction::HetAtomTautomerv2);
CHECK(hsh ==
"[CH3]-[C@H](-[F])-[N]:[C]1:[C]-[CH2]-[CH2]-[CH2]-[C]:1_4_0");
}
}
}

View File

@@ -1,5 +1,5 @@
//
// Copyright (C) 2011-2022 NextMove Software and other RDKit contributors
// Copyright (C) 2011-2024 NextMove Software and other RDKit contributors
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
@@ -509,12 +509,23 @@ std::string TautomerHashv2(RWMol *mol, bool proto, bool useCXSmiles,
<< atm->getIdx() << "-" << oatom->getIdx() << std::endl;
std::cerr << " " << bondsConsidered[nbrBond->getIdx()] << " icao "
<< isCandidateAtom(oatom) << " hsbo "
<< hasStartBond(oatom, startBonds) << " unsat "
<< hasStartBond(oatom, startBonds) << " atomunsato "
<< queryAtomUnsaturated(oatom) << " atomunsat "
<< queryAtomUnsaturated(atm) << " bondunsat "
<< isUnsaturatedBond(nbrBond) << " icaa "
<< isCandidateAtom(atm) << " hsba "
<< hasStartBond(atm, startBonds) << std::endl;
#endif
// special case to prevent "overreach" with things like enamines.
// the logic here prevents the first bond in CNC=C from being included
// in the tautomeric system. So we get: [CH3]-[N]:[C] instead of
// [C]:[N]:[C]
if (startBonds[bptr->getIdx()] && isHeteroAtom(atm) &&
!isUnsaturatedBond(nbrBond)) {
continue;
}
// if both bonds are not eligible, then we can skip this neighbor
if (skipNeighborBond(atm, oatom, nbrBond, startBonds) &&
skipNeighborBond(oatom, atm, nbrBond, startBonds)) {
continue;

View File

@@ -12,6 +12,7 @@ GitHub)
## Backwards incompatible changes
- The SMARTS for the unbranched alkanes in the fragment descriptors has been corrected. This descriptor will now frequently return different results.
- The SimilarityMap functions GetSimilarityMapFromWeights(), GetSimilarityMapForFingerprint(), and GetSimilarityMapForModel() all now require an rdMolDraw2D drawing object to be passed in.
- A bug fix in v2 of the tautomer and protomer hashes can lead to different results for these hashes. One less bond is now included in the tautomeric zone for systems like enamines/imines, so the v2 tautomer hash of the molecules CN=CC and CNC=C is now [C]:[C]:[N]-[CH3]_4_0 instead of [C]:[C]:[N]:[C]_7_0
## New Features and Enhancements: