Fix a problem with aromatic heteroatom tautomer enumeration (#2952)

* update transforms to enforce neutral nitrogens when doing the aromatic neteroatom transforms

* formatting, add a test

* remove unused #include
This commit is contained in:
Greg Landrum
2020-02-13 06:35:01 +01:00
committed by GitHub
parent 40b9828fd9
commit 915471a079
3 changed files with 52 additions and 13 deletions

View File

@@ -243,9 +243,9 @@ std::vector<ROMOL_SPTR> TautomerEnumerator::enumerate(const ROMol &mol) const {
} else {
// std::cout << "kmol: " << kmol->first << std::endl;
// std::cout << MolToSmiles(*(kmol->second)) << std::endl;
// std::cout << "transform mol: " << MolToSmiles(*(transform.Mol))
// std::cout << "transform mol: " << MolToSmarts(*(transform.Mol))
// << std::endl;
//
// std::cout << "Matched: " << name << std::endl;
}
for (const auto &match : matches) {
@@ -310,6 +310,9 @@ std::vector<ROMOL_SPTR> TautomerEnumerator::enumerate(const ROMol &mol) const {
}
boost::shared_ptr<RWMol> wproduct(new RWMol(*product));
// wproduct->updatePropertyCache(false);
// std::cout << "pre-sanitization: "
// << MolToSmiles(*wproduct, true, true) << std::endl;
MolOps::sanitizeMol(*wproduct);
// MolOps::sanitizeMol(*static_cast<RWMol*>(product.get()));
tsmiles = MolToSmiles(*wproduct, true);

View File

@@ -946,13 +946,49 @@ void testCustomScoreFunc() {
BOOST_LOG(rdInfoLog) << "Finished" << std::endl;
}
void testEnumerationProblems() {
BOOST_LOG(rdInfoLog)
<< "-----------------------\n Testing tautomer enumeration problems"
<< std::endl;
std::string rdbase = getenv("RDBASE");
std::string tautomerFile =
rdbase + "/Data/MolStandardize/tautomerTransforms.in";
auto *tautparams = new TautomerCatalogParams(tautomerFile);
unsigned int ntautomers = tautparams->getNumTautomers();
TEST_ASSERT(ntautomers == 34);
TautomerEnumerator te(new TautomerCatalog(tautparams));
#if 1
{ // from the discussion of #2908
auto mol = "O=C(C1=C[NH+]=CC=C1)[O-]"_smiles;
auto tauts = te.enumerate(*mol);
TEST_ASSERT(tauts.size() == 1);
}
#endif
{ // one of the examples from the tautobase paper
auto m =
"[S:1]=[c:2]1[nH+:3][c:5]([NH2:9])[nH:8][c:7]2[c:4]1[n:6][nH:10][n:11]2"_smiles;
TEST_ASSERT(m);
auto tauts = te.enumerate(*m);
// for (auto taut : tauts) {
// std::cerr << MolToSmiles(*taut) << std::endl;
// }
TEST_ASSERT(tauts.size() == 12);
}
BOOST_LOG(rdInfoLog) << "Finished" << std::endl;
}
int main() {
RDLog::InitLogs();
#if 1
testEnumerator();
#endif
testCanonicalize();
testPickCanonical();
testCustomScoreFunc();
#endif
testEnumerationProblems();
return 0;
}

View File

@@ -7,16 +7,16 @@ aliphatic imine f [CX4!H0]-[C]=[NX2]
aliphatic imine r [NX3!H0]-[C]=[CX3]
special imine f [N!H0]-[C]=[CX3R0]
special imine r [CX4!H0]-[c]=[n]
1,3 aromatic heteroatom H shift f [#7!H0]-[#6R1]=[O,#7X2]
1,3 aromatic heteroatom H shift [O,#7;!H0]-[#6R1]=[#7X2]
1,3 heteroatom H shift [#7,S,O,Se,Te;!H0]-[#7X2,#6,#15]=[#7,#16,#8,Se,Te]
1,5 aromatic heteroatom H shift [#7,#16,#8;!H0]-[#6,#7]=[#6]-[#6,#7]=[#7,#16,#8;H0]
1,5 aromatic heteroatom H shift [#7,#16,#8,Se,Te;!H0]-[#6,nX2]=[#6,nX2]-[#6,#7X2]=[#7X2,S,O,Se,Te]
1,5 aromatic heteroatom H shift r [#7,S,O,Se,Te;!H0]-[#6,#7X2]=[#6,nX2]-[#6,nX2]=[#7,#16,#8,Se,Te]
1,7 aromatic heteroatom H shift f [#7,#8,#16,Se,Te;!H0]-[#6,#7X2]=[#6,#7X2]-[#6,#7X2]=[#6]-[#6,#7X2]=[#7X2,S,O,Se,Te,CX3]
1,7 aromatic heteroatom H shift r [#7,S,O,Se,Te,CX4;!H0]-[#6,#7X2]=[#6]-[#6,#7X2]=[#6,#7X2]-[#6,#7X2]=[NX2,S,O,Se,Te]
1,9 aromatic heteroatom H shift f [#7,O;!H0]-[#6,#7X2]=[#6,#7X2]-[#6,#7X2]=[#6,#7X2]-[#6,#7X2]=[#6,#7X2]-[#6,#7X2]=[#7,O]
1,11 aromatic heteroatom H shift f [#7,O;!H0]-[#6,nX2]=[#6,nX2]-[#6,nX2]=[#6,nX2]-[#6,nX2]=[#6,nX2]-[#6,nX2]=[#6,nX2]-[#6,nX2]=[#7X2,O]
1,3 aromatic heteroatom H shift f [#7+0!H0]-[#6R1]=[O,#7X2+0]
1,3 aromatic heteroatom H shift [O,#7+0;!H0]-[#6R1]=[#7+0X2]
1,3 heteroatom H shift [#7+0,S,O,Se,Te;!H0]-[#7X2,#6,#15]=[#7+0,#16,#8,Se,Te]
1,5 aromatic heteroatom H shift [#7+0,#16,#8;!H0]-[#6,#7]=[#6]-[#6,#7]=[#7+0,#16,#8;H0]
1,5 aromatic heteroatom H shift [#7+0,#16,#8,Se,Te;!H0]-[#6,nX2]=[#6,nX2]-[#6,#7X2]=[#7X2+0,S,O,Se,Te]
1,5 aromatic heteroatom H shift r [#7+0,S,O,Se,Te;!H0]-[#6,#7X2]=[#6,nX2]-[#6,nX2]=[#7+0,#16,#8,Se,Te]
1,7 aromatic heteroatom H shift f [#7+0,#8,#16,Se,Te;!H0]-[#6,#7X2]=[#6,#7X2]-[#6,#7X2]=[#6]-[#6,#7X2]=[#7X2+0,S,O,Se,Te,CX3]
1,7 aromatic heteroatom H shift r [#7+0,S,O,Se,Te,CX4;!H0]-[#6,#7X2]=[#6]-[#6,#7X2]=[#6,#7X2]-[#6,#7X2]=[NX2,S,O,Se,Te]
1,9 aromatic heteroatom H shift f [#7+0,O;!H0]-[#6,#7X2]=[#6,#7X2]-[#6,#7X2]=[#6,#7X2]-[#6,#7X2]=[#6,#7X2]-[#6,#7X2]=[#7+0,O]
1,11 aromatic heteroatom H shift f [#7+0,O;!H0]-[#6,nX2]=[#6,nX2]-[#6,nX2]=[#6,nX2]-[#6,nX2]=[#6,nX2]-[#6,nX2]=[#6,nX2]-[#6,nX2]=[#7X2+0,O]
furanone f [O,S,N;!H0]-[#6r5]=[#6X3r5;$([#6]([#6r5])=[#6r5])]
furanone r [#6r5!H0;$([#6]([#6r5])[#6r5])]-[#6r5]=[O,S,N]
keten/ynol f [C!H0]=[C]=[O,S,Se,Te;X1] #-