diff --git a/Code/GraphMol/GeneralizedSubstruct/CMakeLists.txt b/Code/GraphMol/GeneralizedSubstruct/CMakeLists.txt index 409cb45ea..3f8cc9a0d 100644 --- a/Code/GraphMol/GeneralizedSubstruct/CMakeLists.txt +++ b/Code/GraphMol/GeneralizedSubstruct/CMakeLists.txt @@ -1,7 +1,7 @@ rdkit_library(GeneralizedSubstruct XQMol.cpp TextIO.cpp - LINK_LIBRARIES MolEnumerator TautomerQuery SubstructMatch SmilesParse GraphMol) + LINK_LIBRARIES MolEnumerator TautomerQuery SubstructMatch SmilesParse GraphMol Fingerprints) target_compile_definitions(GeneralizedSubstruct PRIVATE RDKIT_GENERALIZEDSUBSTRUCT_BUILD) rdkit_headers(XQMol.h DEST GraphMol/GeneralizedSubstruct) diff --git a/Code/GraphMol/GeneralizedSubstruct/Wrap/rdGeneralizedSubstruct.cpp b/Code/GraphMol/GeneralizedSubstruct/Wrap/rdGeneralizedSubstruct.cpp index 50ffe9ea5..e918ac323 100644 --- a/Code/GraphMol/GeneralizedSubstruct/Wrap/rdGeneralizedSubstruct.cpp +++ b/Code/GraphMol/GeneralizedSubstruct/Wrap/rdGeneralizedSubstruct.cpp @@ -111,7 +111,10 @@ BOOST_PYTHON_MODULE(rdGeneralizedSubstruct) { .def("InitFromJSON", &ExtendedQueryMol::initFromJSON, python::args("self", "text")) .def("ToBinary", XQMolToBinary, python::args("self")) - .def("ToJSON", &ExtendedQueryMol::toJSON, python::args("self")); + .def("ToJSON", &ExtendedQueryMol::toJSON, python::args("self")) + .def("PatternFingerprintQuery", + &ExtendedQueryMol::patternFingerprintQuery, + (python::arg("self"), python::arg("fingerprintSize") = 2048)); python::def( "MolHasSubstructMatch", &hasSubstructHelper, @@ -129,6 +132,11 @@ BOOST_PYTHON_MODULE(rdGeneralizedSubstruct) { python::arg("params") = python::object()), "returns all matches (if any) of a molecule to a generalized substructure query"); + python::def( + "PatternFingerprintTarget", &patternFingerprintTargetMol, + (python::arg("target"), python::arg("fingerprintSize") = 2048), + "Creates a pattern fingerprint for a target molecule that is compatible with an extended query"); + python::def("CreateExtendedQueryMol", createExtendedQueryMolHelper, (python::arg("mol"), python::arg("doEnumeration") = true, python::arg("doTautomers") = true, diff --git a/Code/GraphMol/GeneralizedSubstruct/XQMol.cpp b/Code/GraphMol/GeneralizedSubstruct/XQMol.cpp index 810d9948b..694b84f94 100644 --- a/Code/GraphMol/GeneralizedSubstruct/XQMol.cpp +++ b/Code/GraphMol/GeneralizedSubstruct/XQMol.cpp @@ -19,6 +19,8 @@ #include "XQMol.h" +#include "GraphMol/Fingerprints/Fingerprints.h" + namespace RDKit { namespace GeneralizedSubstruct { @@ -53,6 +55,42 @@ void ExtendedQueryMol::initFromOther(const ExtendedQueryMol &other) { } } +std::unique_ptr ExtendedQueryMol::patternFingerprintQuery( + unsigned fpSize) const { + if (std::holds_alternative(xqmol)) { + const auto raw = PatternFingerprintMol(*std::get(xqmol), fpSize, nullptr, + nullptr, true); + std::unique_ptr ptr(raw); + return ptr; + } if (std::holds_alternative(xqmol)) { + const auto raw = PatternFingerprintMol(*std::get(xqmol), fpSize, nullptr, + true); + std::unique_ptr ptr(raw); + return ptr; + } if (std::holds_alternative(xqmol)) { + const auto raw = std::get(xqmol)->patternFingerprintTemplate(fpSize); + std::unique_ptr ptr(raw); + return ptr; + } if (std::holds_alternative(xqmol)) { + const auto &tautomerBundle = std::get(xqmol); + ExplicitBitVect *res = nullptr; + for (const auto &tautomer : *tautomerBundle) { + const auto molfp = tautomer->patternFingerprintTemplate(fpSize); + if (!res) { + res = molfp; + } else { + *res &= *molfp; + delete molfp; + } + } + std::unique_ptr ptr(res); + return ptr; + } + + throw std::invalid_argument("Unknown extended query molecule type"); +} + + std::vector SubstructMatch( const ROMol &mol, const ExtendedQueryMol &query, const SubstructMatchParameters ¶ms) { @@ -152,5 +190,12 @@ ExtendedQueryMol createExtendedQueryMol(const RWMol &mol, bool doEnumeration, } } } + +std::unique_ptr patternFingerprintTargetMol( + const ROMol& mol, unsigned fpSize) { + const auto raw= PatternFingerprintMol(mol, fpSize, nullptr, nullptr, true); + std::unique_ptr ptr(raw); + return ptr; +} } // namespace GeneralizedSubstruct } // namespace RDKit diff --git a/Code/GraphMol/GeneralizedSubstruct/XQMol.h b/Code/GraphMol/GeneralizedSubstruct/XQMol.h index 79978f856..e727ea882 100644 --- a/Code/GraphMol/GeneralizedSubstruct/XQMol.h +++ b/Code/GraphMol/GeneralizedSubstruct/XQMol.h @@ -68,6 +68,10 @@ struct RDKIT_GENERALIZEDSUBSTRUCT_EXPORT ExtendedQueryMol { ContainedType xqmol; std::string toBinary() const; std::string toJSON() const; + + // Query fingerprint + std::unique_ptr patternFingerprintQuery( + unsigned int fpSize = 2048U) const; }; //! Creates an ExtendedQueryMol from the input molecule @@ -102,6 +106,10 @@ RDKIT_GENERALIZEDSUBSTRUCT_EXPORT std::vector SubstructMatch( const ROMol &mol, const ExtendedQueryMol &query, const SubstructMatchParameters ¶ms = SubstructMatchParameters()); +//! Fingerprints a target molecule +RDKIT_GENERALIZEDSUBSTRUCT_EXPORT std::unique_ptr +patternFingerprintTargetMol(const ROMol &mol, unsigned int fpSize = 2048U); + //! checks if a molecule has a match to an ExtendedQueryMol inline bool hasSubstructMatch( const ROMol &mol, const ExtendedQueryMol &query, diff --git a/Code/GraphMol/GeneralizedSubstruct/catch_tests.cpp b/Code/GraphMol/GeneralizedSubstruct/catch_tests.cpp index 6a91ea88e..f62c50b35 100644 --- a/Code/GraphMol/GeneralizedSubstruct/catch_tests.cpp +++ b/Code/GraphMol/GeneralizedSubstruct/catch_tests.cpp @@ -27,6 +27,15 @@ using namespace RDKit; using namespace RDKit::GeneralizedSubstruct; +bool fingerprintsMatch(const ROMol& target, const ExtendedQueryMol& xqm) { + const auto queryFingerprint = xqm.patternFingerprintQuery(); + const auto targetFingerprint = patternFingerprintTargetMol(target); + CHECK(queryFingerprint->getNumOnBits() > 0); + CHECK(targetFingerprint->getNumOnBits() > 0); + const auto match = AllProbeBitsMatch(*queryFingerprint, *targetFingerprint); + return match; +} + TEST_CASE("molecule basics") { auto mol = "Cc1n[nH]c(F)c1"_smarts; REQUIRE(mol); @@ -39,6 +48,7 @@ TEST_CASE("molecule basics") { CHECK(SubstructMatch(*"CCc1[nH]nc(F)c1"_smiles, *xq).empty()); CHECK(hasSubstructMatch(*"CCc1n[nH]c(F)c1"_smiles, *xq)); CHECK(!hasSubstructMatch(*"CCc1[nH]nc(F)c1"_smiles, *xq)); + CHECK(fingerprintsMatch(*"CCc1n[nH]c(F)c1"_smiles, *xq)); } } } @@ -57,6 +67,9 @@ TEST_CASE("enumeration basics") { CHECK(SubstructMatch(*"COOCC"_smiles, *xq).size() == 1); CHECK(SubstructMatch(*"COOOCC"_smiles, *xq).size() == 1); CHECK(SubstructMatch(*"COOOOCC"_smiles, *xq).empty()); + CHECK(fingerprintsMatch(*"COCC"_smiles, *xq)); + CHECK(fingerprintsMatch(*"COOCC"_smiles, *xq)); + CHECK(fingerprintsMatch(*"COOOCC"_smiles, *xq)); } } } @@ -76,6 +89,9 @@ TEST_CASE("result counts") { CHECK(SubstructMatch(*"COOCC"_smiles, *xq, ps).size() == 2); CHECK(SubstructMatch(*"COOOCC"_smiles, *xq, ps).size() == 2); CHECK(SubstructMatch(*"COOOOCC"_smiles, *xq, ps).empty()); + CHECK(fingerprintsMatch(*"COCC"_smiles, *xq)); + CHECK(fingerprintsMatch(*"COOCC"_smiles, *xq)); + CHECK(fingerprintsMatch(*"COOOCC"_smiles, *xq)); } } } @@ -93,6 +109,9 @@ TEST_CASE("tautomer basics") { CHECK(SubstructMatch(*"CCc1n[nH]c(F)c1"_smiles, *xq).size() == 1); CHECK(SubstructMatch(*"CCc1[nH]nc(F)c1"_smiles, *xq).size() == 1); CHECK(SubstructMatch(*"CCc1[nH]ncc1"_smiles, *xq).empty()); + CHECK(fingerprintsMatch(*"CCc1n[nH]c(F)c1"_smiles, *xq)); + CHECK(fingerprintsMatch(*"CCc1[nH]nc(F)c1"_smiles, *xq)); + CHECK(!fingerprintsMatch(*"CCc1[nH]ncc1"_smiles, *xq)); } } } @@ -119,6 +138,11 @@ TEST_CASE("tautomer bundle basics") { CHECK(SubstructMatch(*"CCc1[nH]ncc1F"_smiles, *xq).size() == 1); CHECK(SubstructMatch(*"CCc1n[nH]cc1F"_smiles, *xq).size() == 1); CHECK(SubstructMatch(*"CCc1[nH]ncc1"_smiles, *xq).empty()); + CHECK(fingerprintsMatch(*"CCc1n[nH]c(F)c1"_smiles, *xq)); + CHECK(fingerprintsMatch(*"CCc1[nH]nc(F)c1"_smiles, *xq)); + CHECK(fingerprintsMatch(*"CCc1[nH]ncc1F"_smiles, *xq)); + CHECK(fingerprintsMatch(*"CCc1n[nH]cc1F"_smiles, *xq)); + CHECK(!fingerprintsMatch(*"CCc1[nH]ncc1"_smiles, *xq)); } } } @@ -136,6 +160,8 @@ TEST_CASE("createExtendedQueryMol and copy ctors") { CHECK(std::holds_alternative(xqm.xqmol)); CHECK(SubstructMatch(*"COCC"_smiles, xqm).size() == 1); CHECK(SubstructMatch(*"COOCC"_smiles, xqm).empty()); + CHECK(fingerprintsMatch(*"COCC"_smiles, xqm)); + CHECK(!fingerprintsMatch(*"COOCC"_smiles, xqm)); } } SECTION("MolBundle") { @@ -152,6 +178,9 @@ TEST_CASE("createExtendedQueryMol and copy ctors") { CHECK(SubstructMatch(*"COOCC"_smiles, xqm).size() == 1); CHECK(SubstructMatch(*"COOOCC"_smiles, xqm).size() == 1); CHECK(SubstructMatch(*"COOOOCC"_smiles, xqm).empty()); + CHECK(fingerprintsMatch(*"COCC"_smiles, xqm)); + CHECK(fingerprintsMatch(*"COOCC"_smiles, xqm)); + CHECK(fingerprintsMatch(*"COOOCC"_smiles, xqm)); } } SECTION("TautomerQuery") { @@ -169,6 +198,9 @@ TEST_CASE("createExtendedQueryMol and copy ctors") { CHECK(SubstructMatch(*"CCC1OC(=N)N1"_smiles, *mol1).empty()); CHECK(SubstructMatch(*"CCC1OC(=N)N1"_smiles, xqm).size() == 1); CHECK(SubstructMatch(*"c1[nH]ncc1"_smiles, xqm).empty()); + CHECK(fingerprintsMatch(*"CCC1OC(N)=N1"_smiles, xqm)); + CHECK(fingerprintsMatch(*"CCC1OC(=N)N1"_smiles, xqm)); + CHECK(!fingerprintsMatch(*"c1[nH]ncc1"_smiles, xqm)); } } SECTION("TautomerBundle") { @@ -186,6 +218,9 @@ TEST_CASE("createExtendedQueryMol and copy ctors") { CHECK(SubstructMatch(*"COOCC1(F)OC(=N)N1"_smiles, xqm).size() == 1); CHECK(SubstructMatch(*"COCC1OC(N)=N1"_smiles, xqm).size() == 1); CHECK(SubstructMatch(*"COOOOCC1OC(=N)N1"_smiles, xqm).empty()); + CHECK(fingerprintsMatch(*"COCC1(F)OC(N)=N1"_smiles, xqm)); + CHECK(fingerprintsMatch(*"COOCC1(F)OC(=N)N1"_smiles, xqm)); + CHECK(fingerprintsMatch(*"COCC1OC(N)=N1"_smiles, xqm)); } } } @@ -200,6 +235,8 @@ TEST_CASE("test SRUs") { // we won't test limits here. CHECK(SubstructMatch(*"FCN(C)CC"_smiles, xqm).size() == 1); CHECK(SubstructMatch(*"FCN(O)N(C)CC"_smiles, xqm).size() == 1); + CHECK(fingerprintsMatch(*"FCN(C)CC"_smiles, xqm)); + CHECK(fingerprintsMatch(*"FCN(O)N(C)CC"_smiles, xqm)); } } @@ -232,6 +269,14 @@ TEST_CASE("adjustQueryProperties") { CHECK(SubstructMatch(*"COC1OC1"_smiles, xqm1).empty()); CHECK(SubstructMatch(*"COC1OC1"_smiles, xqm2).empty()); CHECK(SubstructMatch(*"COC1OC1"_smiles, xqm3).size() == 1); + CHECK(fingerprintsMatch(*"COC1CC1"_smiles, xqm1)); + CHECK(fingerprintsMatch(*"COC1CC1"_smiles, xqm2)); + CHECK(fingerprintsMatch(*"COC1CC1"_smiles, xqm3)); + CHECK(fingerprintsMatch(*"COC1C(C)C1"_smiles, xqm1)); + CHECK(fingerprintsMatch(*"COC1C(C)C1"_smiles, xqm3)); + CHECK(!fingerprintsMatch(*"COC1OC1"_smiles, xqm1)); + CHECK(!fingerprintsMatch(*"COC1OC1"_smiles, xqm2)); + CHECK(fingerprintsMatch(*"COC1OC1"_smiles, xqm3)); } SECTION("MolBundle") { auto mol = "COCC |LN:1:1.3|"_smiles; @@ -248,6 +293,10 @@ TEST_CASE("adjustQueryProperties") { CHECK(SubstructMatch(*"COOC=C"_smiles, xqm1).empty()); CHECK(SubstructMatch(*"COC=C"_smiles, xqm2).size() == 1); CHECK(SubstructMatch(*"COOC=C"_smiles, xqm2).size() == 1); + CHECK(!fingerprintsMatch(*"COC=C"_smiles, xqm1)); + CHECK(!fingerprintsMatch(*"COOC=C"_smiles, xqm1)); + CHECK(fingerprintsMatch(*"COC=C"_smiles, xqm2)); + CHECK(fingerprintsMatch(*"COOC=C"_smiles, xqm2)); } SECTION("TautomerQuery") { auto mol1 = "CC1OC(N)=N1"_smiles; @@ -266,6 +315,10 @@ TEST_CASE("adjustQueryProperties") { CHECK(SubstructMatch(*"CC1(F)OC(=N)N1"_smiles, xqm1).size() == 1); CHECK(SubstructMatch(*"CC1(F)OC(N)=N1"_smiles, xqm2).empty()); CHECK(SubstructMatch(*"CC1(F)OC(=N)N1"_smiles, xqm2).empty()); + CHECK(fingerprintsMatch(*"CC1OC(N)=N1"_smiles, xqm1)); + CHECK(fingerprintsMatch(*"CC1OC(N)=N1"_smiles, xqm2)); + CHECK(fingerprintsMatch(*"CC1(F)OC(N)=N1"_smiles, xqm1)); + CHECK(fingerprintsMatch(*"CC1(F)OC(=N)N1"_smiles, xqm1)); } SECTION("TautomerBundle") { auto mol1 = "COCC1OC(N)=N1 |LN:1:1.3|"_smiles; @@ -283,6 +336,12 @@ TEST_CASE("adjustQueryProperties") { CHECK(SubstructMatch(*"COOCC1(F)OC(=N)N1"_smiles, xqm1).size() == 1); CHECK(SubstructMatch(*"COCC1(F)OC(N)=N1"_smiles, xqm2).empty()); CHECK(SubstructMatch(*"COOCC1(F)OC(=N)N1"_smiles, xqm2).empty()); + CHECK(fingerprintsMatch(*"COCC1OC(N)=N1"_smiles, xqm1)); + CHECK(fingerprintsMatch(*"COOCC1OC(=N)N1"_smiles, xqm1)); + CHECK(fingerprintsMatch(*"COCC1OC(N)=N1"_smiles, xqm2)); + CHECK(fingerprintsMatch(*"COOCC1OC(=N)N1"_smiles, xqm2)); + CHECK(fingerprintsMatch(*"COCC1(F)OC(N)=N1"_smiles, xqm1)); + CHECK(fingerprintsMatch(*"COOCC1(F)OC(=N)N1"_smiles, xqm1)); } } diff --git a/Code/JavaWrappers/GeneralizedSubstruct.i b/Code/JavaWrappers/GeneralizedSubstruct.i index 89d5b13e3..97c746880 100644 --- a/Code/JavaWrappers/GeneralizedSubstruct.i +++ b/Code/JavaWrappers/GeneralizedSubstruct.i @@ -1,10 +1,21 @@ + +#if SWIG_VERSION >= 0x040101 +%include +%unique_ptr(ExplicitBitVect) +#endif + %{ #include %} // %include "std_unique_ptr.i" // %unique_ptr(ExtendedQueryMol) +#if SWIG_VERSION < 0x040101 +%ignore patternFingerprintTargetMol(const ROMol &mol, unsigned int fpSize = 2048U); +%ignore patternFingerprintQuery(unsigned int fpSize = 2048U) const; +#endif + %ignore ExtendedQueryMol(std::unique_ptr mol); %ignore ExtendedQueryMol(std::unique_ptr mol); %ignore ExtendedQueryMol(std::unique_ptr mol); @@ -14,3 +25,9 @@ %include "GraphMol/GeneralizedSubstruct/XQMol.h"; +%extend RDKit::GeneralizedSubstruct::ExtendedQueryMol { + std::vector< std::vector > > getSubstructMatches(RDKit::ROMol &target,RDKit::SubstructMatchParameters ps = RDKit::SubstructMatchParameters()){ + std::vector mvs = SubstructMatch(target, *($self),ps); + return mvs; + }; +} diff --git a/Code/JavaWrappers/csharp_wrapper/GraphMolCSharp.i b/Code/JavaWrappers/csharp_wrapper/GraphMolCSharp.i index 3b2f80c28..ee6dd8f64 100644 --- a/Code/JavaWrappers/csharp_wrapper/GraphMolCSharp.i +++ b/Code/JavaWrappers/csharp_wrapper/GraphMolCSharp.i @@ -245,6 +245,7 @@ typedef unsigned long long int uintmax_t; %include "../MolHash.i" %include "../Abbreviations.i" %include "../Streams.i" +%include "../GeneralizedSubstruct.i" // Create a class to throw various sorts of errors for testing. Required for unit tests in ErrorHandlingTests.java diff --git a/Code/JavaWrappers/csharp_wrapper/RDKitCSharpTest/GeneralizedSubstructTest.cs b/Code/JavaWrappers/csharp_wrapper/RDKitCSharpTest/GeneralizedSubstructTest.cs new file mode 100644 index 000000000..8e1520f8c --- /dev/null +++ b/Code/JavaWrappers/csharp_wrapper/RDKitCSharpTest/GeneralizedSubstructTest.cs @@ -0,0 +1,56 @@ +using GraphMolWrap; +using Xunit; + +namespace RdkitTests +{ + public class GeneralizedSubstructTest + { + private bool FingerprintsMatch(ExtendedQueryMol queryMol, RWMol target) + { + var queryFingerprint = queryMol.patternFingerprintQuery(); + var targetFingerprint = RDKFuncs.patternFingerprintTargetMol(target); + Assert.True(queryFingerprint.getNumOnBits() > 0); + Assert.True(targetFingerprint.getNumOnBits() > 0); + var match = RDKFuncs.AllProbeBitsMatch(queryFingerprint, targetFingerprint); + return match; + } + + [Fact] + public void TestControlSteps() + { + var queryMol = RWMol.MolFromSmiles("COCC1OC(N)=N1 |LN:1:1.3|"); + var xqm1 = RDKFuncs.createExtendedQueryMol(queryMol); + var xqm2 = RDKFuncs.createExtendedQueryMol(queryMol, false); + var xqm3 = RDKFuncs.createExtendedQueryMol(queryMol, true, false); + var xqm4 = RDKFuncs.createExtendedQueryMol(queryMol, false, false); + + var mol1 = RWMol.MolFromSmiles("COCC1OC(N)=N1"); + Assert.Equal(1, xqm1.getSubstructMatches(mol1).Count); + Assert.Equal(1, xqm2.getSubstructMatches(mol1).Count); + Assert.Equal(1, xqm3.getSubstructMatches(mol1).Count); + Assert.Equal(1, xqm4.getSubstructMatches(mol1).Count); + Assert.True(FingerprintsMatch(xqm1, mol1)); + Assert.True(FingerprintsMatch(xqm2, mol1)); + Assert.True(FingerprintsMatch(xqm3, mol1)); + Assert.True(FingerprintsMatch(xqm4, mol1)); + + var mol2 = RWMol.MolFromSmiles("COCC1OC(=N)N1"); + Assert.Equal(1, xqm1.getSubstructMatches(mol2).Count); + Assert.Equal(1, xqm2.getSubstructMatches(mol2).Count); + Assert.Equal(0, xqm3.getSubstructMatches(mol2).Count); + Assert.Equal(0, xqm4.getSubstructMatches(mol2).Count); + Assert.True(FingerprintsMatch(xqm1, mol2)); + Assert.True(FingerprintsMatch(xqm2, mol2)); + Assert.False(FingerprintsMatch(xqm3, mol2)); + Assert.False(FingerprintsMatch(xqm4, mol2)); + + var mol3 = RWMol.MolFromSmiles("COOCC1OC(N)=N1"); + Assert.Equal(1, xqm1.getSubstructMatches(mol3).Count); + Assert.Equal(0, xqm2.getSubstructMatches(mol3).Count); + Assert.Equal(1, xqm3.getSubstructMatches(mol3).Count); + Assert.Equal(0, xqm4.getSubstructMatches(mol3).Count); + Assert.True(FingerprintsMatch(xqm1, mol3)); + Assert.True(FingerprintsMatch(xqm3, mol3)); + } + } +} \ No newline at end of file