mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-05 22:04:27 +08:00
* framework for extended query. serialization works to/from text doesn't work * first pass at getting substructure search working basic tests improved error handling (try not to take down the server thread!) * add serialization to MolBundle * we really need to pickle mol properties * basic support for molbundle including substructure search * tautomer and molbundle queries to JSON * remove debug msg * cleanup debug initial index steps (not tested) * remove indexing stuff since it wasn't working will try to come back to that * add xqm to update script * add c++ testing for molbundle serialization * add serialization of molbundles to python interface * support expanding molbundles to arrays of tautomer queries * edge cases Signed-off-by: greg landrum <greg.landrum@gmail.com> * change in response to review * a bunch of updates * make sure the mol props needed for XQMs are being serialized * update update script * fix binary string output from ExtendedQueryMols in python * tautomer queries should serialize properties * more testing never hurts * combo of generic groups and generalized queries works * Update Code/PgSQL/rdkit/adapter.cpp Co-authored-by: Paolo Tosco <paolo.tosco.mail@gmail.com> * Update Code/PgSQL/rdkit/adapter.cpp Co-authored-by: Paolo Tosco <paolo.tosco.mail@gmail.com> * Update Code/PgSQL/rdkit/adapter.cpp Co-authored-by: Paolo Tosco <paolo.tosco.mail@gmail.com> * Fix weird quotes? --------- Signed-off-by: greg landrum <greg.landrum@gmail.com> Co-authored-by: Paolo Tosco <paolo.tosco.mail@gmail.com>
151 lines
7.0 KiB
Python
151 lines
7.0 KiB
Python
#
|
|
# Copyright (C) 2023 Greg Landrum and other RDKit contributors
|
|
# @@ All Rights Reserved @@
|
|
#
|
|
# This file is part of the RDKit.
|
|
# The contents are covered by the terms of the BSD license
|
|
# which is included in the file license.txt, found at the root
|
|
# of the RDKit source tree.
|
|
|
|
import unittest
|
|
|
|
#
|
|
from rdkit import Chem
|
|
from rdkit.Chem import rdGeneralizedSubstruct
|
|
|
|
|
|
class TestCase(unittest.TestCase):
|
|
|
|
def test1CreationAndSubstruct(self):
|
|
m = Chem.MolFromSmiles('COCc1n[nH]c(F)c1 |LN:1:1.3|')
|
|
xqm = rdGeneralizedSubstruct.CreateExtendedQueryMol(m)
|
|
|
|
mol = Chem.MolFromSmiles('COOCc1n[nH]c(F)c1')
|
|
matches = rdGeneralizedSubstruct.MolGetSubstructMatches(mol, xqm)
|
|
self.assertEqual(len(matches), 1)
|
|
self.assertEqual(tuple(matches[0]), (0, 1, 2, 3, 4, 5, 9, 6, 7, 8))
|
|
|
|
match = rdGeneralizedSubstruct.MolGetSubstructMatch(mol, xqm)
|
|
self.assertEqual(tuple(match), (0, 1, 2, 3, 4, 5, 9, 6, 7, 8))
|
|
|
|
self.assertTrue(rdGeneralizedSubstruct.MolHasSubstructMatch(mol, xqm))
|
|
|
|
def test2Params(self):
|
|
m = Chem.MolFromSmiles('COCc1n[nH]c(F)c1[C@H](F)Cl |LN:1:1.3|')
|
|
xqm = rdGeneralizedSubstruct.CreateExtendedQueryMol(m)
|
|
|
|
mol1 = Chem.MolFromSmiles('COOCc1n[nH]c(F)c1[C@H](F)Cl')
|
|
mol2 = Chem.MolFromSmiles('COOCc1n[nH]c(F)c1[C@@H](F)Cl')
|
|
self.assertEqual(len(rdGeneralizedSubstruct.MolGetSubstructMatches(mol1, xqm)), 1)
|
|
self.assertEqual(len(rdGeneralizedSubstruct.MolGetSubstructMatches(mol2, xqm)), 1)
|
|
|
|
params = Chem.SubstructMatchParameters()
|
|
params.useChirality = True
|
|
self.assertEqual(len(rdGeneralizedSubstruct.MolGetSubstructMatches(mol1, xqm, params)), 1)
|
|
self.assertEqual(len(rdGeneralizedSubstruct.MolGetSubstructMatches(mol2, xqm, params)), 0)
|
|
|
|
def test3MultipleResults(self):
|
|
m = Chem.MolFromSmiles('COC |LN:1:1.3|')
|
|
xqm = rdGeneralizedSubstruct.CreateExtendedQueryMol(m)
|
|
|
|
mol1 = Chem.MolFromSmiles('COOCC')
|
|
mol2 = Chem.MolFromSmiles('COCC')
|
|
|
|
params = Chem.SubstructMatchParameters()
|
|
params.uniquify = False
|
|
self.assertEqual(len(rdGeneralizedSubstruct.MolGetSubstructMatches(mol1, xqm, params)), 2)
|
|
self.assertEqual(len(rdGeneralizedSubstruct.MolGetSubstructMatches(mol2, xqm, params)), 2)
|
|
|
|
def test4AdjustQueryProperties(self):
|
|
m = Chem.MolFromSmiles('COCc1n[nH]cc1 |LN:1:1.3|')
|
|
xqm1 = rdGeneralizedSubstruct.CreateExtendedQueryMol(m)
|
|
xqm2 = rdGeneralizedSubstruct.CreateExtendedQueryMol(m, adjustQueryProperties=True)
|
|
nops = Chem.AdjustQueryParameters.NoAdjustments()
|
|
xqm3 = rdGeneralizedSubstruct.CreateExtendedQueryMol(m, adjustQueryProperties=True,
|
|
adjustQueryParameters=nops)
|
|
|
|
mol1 = Chem.MolFromSmiles('COCc1n[nH]cc1')
|
|
mol2 = Chem.MolFromSmiles('COCc1n[nH]c(F)c1')
|
|
self.assertEqual(len(rdGeneralizedSubstruct.MolGetSubstructMatches(mol1, xqm1)), 1)
|
|
self.assertEqual(len(rdGeneralizedSubstruct.MolGetSubstructMatches(mol1, xqm2)), 1)
|
|
self.assertEqual(len(rdGeneralizedSubstruct.MolGetSubstructMatches(mol1, xqm3)), 1)
|
|
self.assertEqual(len(rdGeneralizedSubstruct.MolGetSubstructMatches(mol2, xqm1)), 1)
|
|
self.assertEqual(len(rdGeneralizedSubstruct.MolGetSubstructMatches(mol2, xqm2)), 0)
|
|
self.assertEqual(len(rdGeneralizedSubstruct.MolGetSubstructMatches(mol2, xqm3)), 1)
|
|
|
|
def test5ControlSteps(self):
|
|
m = Chem.MolFromSmiles('COCC1OC(N)=N1 |LN:1:1.3|')
|
|
xqm1 = rdGeneralizedSubstruct.CreateExtendedQueryMol(m)
|
|
xqm2 = rdGeneralizedSubstruct.CreateExtendedQueryMol(m, doEnumeration=False)
|
|
xqm3 = rdGeneralizedSubstruct.CreateExtendedQueryMol(m, doTautomers=False)
|
|
xqm4 = rdGeneralizedSubstruct.CreateExtendedQueryMol(m, doEnumeration=False, doTautomers=False)
|
|
|
|
mol1 = Chem.MolFromSmiles('COCC1OC(N)=N1')
|
|
self.assertEqual(len(rdGeneralizedSubstruct.MolGetSubstructMatches(mol1, xqm1)), 1)
|
|
self.assertEqual(len(rdGeneralizedSubstruct.MolGetSubstructMatches(mol1, xqm2)), 1)
|
|
self.assertEqual(len(rdGeneralizedSubstruct.MolGetSubstructMatches(mol1, xqm3)), 1)
|
|
self.assertEqual(len(rdGeneralizedSubstruct.MolGetSubstructMatches(mol1, xqm4)), 1)
|
|
|
|
mol2 = Chem.MolFromSmiles('COCC1OC(=N)N1')
|
|
self.assertEqual(len(mol2.GetSubstructMatches(m)), 0)
|
|
self.assertEqual(len(rdGeneralizedSubstruct.MolGetSubstructMatches(mol2, xqm1)), 1)
|
|
self.assertEqual(len(rdGeneralizedSubstruct.MolGetSubstructMatches(mol2, xqm2)), 1)
|
|
self.assertEqual(len(rdGeneralizedSubstruct.MolGetSubstructMatches(mol2, xqm3)), 0)
|
|
self.assertEqual(len(rdGeneralizedSubstruct.MolGetSubstructMatches(mol2, xqm4)), 0)
|
|
|
|
mol3 = Chem.MolFromSmiles('COOCC1OC(N)=N1')
|
|
self.assertEqual(len(rdGeneralizedSubstruct.MolGetSubstructMatches(mol3, xqm1)), 1)
|
|
self.assertEqual(len(rdGeneralizedSubstruct.MolGetSubstructMatches(mol3, xqm2)), 0)
|
|
self.assertEqual(len(rdGeneralizedSubstruct.MolGetSubstructMatches(mol3, xqm3)), 1)
|
|
self.assertEqual(len(rdGeneralizedSubstruct.MolGetSubstructMatches(mol3, xqm4)), 0)
|
|
|
|
def test6Serialization(self):
|
|
m = Chem.MolFromSmiles('COCc1n[nH]c(F)c1 |LN:1:1.3|')
|
|
xqm = rdGeneralizedSubstruct.CreateExtendedQueryMol(m)
|
|
|
|
mol = Chem.MolFromSmiles('COOCc1n[nH]c(F)c1')
|
|
matches = rdGeneralizedSubstruct.MolGetSubstructMatches(mol, xqm)
|
|
self.assertEqual(len(matches), 1)
|
|
self.assertEqual(tuple(matches[0]), (0, 1, 2, 3, 4, 5, 9, 6, 7, 8))
|
|
|
|
xqm2 = rdGeneralizedSubstruct.ExtendedQueryMol(xqm.ToBinary())
|
|
matches = rdGeneralizedSubstruct.MolGetSubstructMatches(mol, xqm2)
|
|
self.assertEqual(len(matches), 1)
|
|
|
|
def test7GenericMatchers(self):
|
|
m = Chem.MolFromSmiles('COC1=NNC(*)=C1 |$;;;;;;AEL_p;$,LN:1:1.3|')
|
|
qps = Chem.AdjustQueryParameters.NoAdjustments()
|
|
qps.makeDummiesQueries = True
|
|
m = Chem.AdjustQueryProperties(m,qps)
|
|
Chem.SetGenericQueriesFromProperties(m)
|
|
ps = Chem.SubstructMatchParameters()
|
|
ps.useGenericMatchers = True;
|
|
|
|
mol1 = Chem.MolFromSmiles('COC1=NNC(C=C)=C1')
|
|
mol2 = Chem.MolFromSmiles('COC1=NNC(CC)=C1')
|
|
mol3 = Chem.MolFromSmiles('COOC1=NNC(C=C)=C1')
|
|
mol4 = Chem.MolFromSmiles('COOC1=NNC(CC)=C1')
|
|
|
|
self.assertTrue(mol1.HasSubstructMatch(m,ps))
|
|
self.assertFalse(mol2.HasSubstructMatch(m,ps))
|
|
self.assertFalse(mol3.HasSubstructMatch(m,ps))
|
|
self.assertFalse(mol4.HasSubstructMatch(m,ps))
|
|
|
|
xqm = rdGeneralizedSubstruct.CreateExtendedQueryMol(m)
|
|
self.assertTrue(rdGeneralizedSubstruct.MolHasSubstructMatch(mol1, xqm, ps))
|
|
self.assertFalse(rdGeneralizedSubstruct.MolHasSubstructMatch(mol2, xqm, ps))
|
|
self.assertTrue(rdGeneralizedSubstruct.MolHasSubstructMatch(mol3, xqm, ps))
|
|
self.assertFalse(rdGeneralizedSubstruct.MolHasSubstructMatch(mol4, xqm, ps))
|
|
|
|
xqm2 = rdGeneralizedSubstruct.ExtendedQueryMol(xqm.ToBinary())
|
|
self.assertTrue(rdGeneralizedSubstruct.MolHasSubstructMatch(mol1, xqm2, ps))
|
|
self.assertFalse(rdGeneralizedSubstruct.MolHasSubstructMatch(mol2, xqm2, ps))
|
|
self.assertTrue(rdGeneralizedSubstruct.MolHasSubstructMatch(mol3, xqm2, ps))
|
|
self.assertFalse(rdGeneralizedSubstruct.MolHasSubstructMatch(mol4, xqm2, ps))
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__': # pragma: nocover
|
|
unittest.main()
|