From 7ec4661f849d0a7dee7ce25df7af8e027b05aadc Mon Sep 17 00:00:00 2001 From: Gareth Jones Date: Sat, 4 May 2024 21:52:24 -0600 Subject: [PATCH] C sharp rascal mcs wrapper (#7390) * Added Wrapper Files * Addd interface file * Edited interface file * Rascal SWIG wrapper finished * Remove from Java wrapper * add java wrappers too * Update GraphMolCSharp.i * Update Code/JavaWrappers/RascalMCES.i Co-authored-by: Paolo Tosco * Update Code/JavaWrappers/RascalMCES.i Co-authored-by: Paolo Tosco --------- Co-authored-by: Greg Landrum Co-authored-by: Paolo Tosco --- Code/JavaWrappers/CMakeLists.txt | 2 +- Code/JavaWrappers/RascalMCES.i | 69 +++++++++ .../csharp_wrapper/GraphMolCSharp.i | 1 + .../RdkitTests/RascalMCESTest.cs | 143 ++++++++++++++++++ Code/JavaWrappers/gmwrapper/CMakeLists.txt | 4 + Code/JavaWrappers/gmwrapper/GraphMolJava.i | 1 + .../src-test/org/RDKit/RascalMCESTest.java | 83 ++++++++++ 7 files changed, 302 insertions(+), 1 deletion(-) create mode 100644 Code/JavaWrappers/RascalMCES.i create mode 100644 Code/JavaWrappers/csharp_wrapper/RdkitTests/RascalMCESTest.cs create mode 100644 Code/JavaWrappers/gmwrapper/src-test/org/RDKit/RascalMCESTest.java diff --git a/Code/JavaWrappers/CMakeLists.txt b/Code/JavaWrappers/CMakeLists.txt index 0f31ce974..a88756a6a 100644 --- a/Code/JavaWrappers/CMakeLists.txt +++ b/Code/JavaWrappers/CMakeLists.txt @@ -20,7 +20,7 @@ if(RDK_BUILD_INCHI_SUPPORT) set(swigRDKitLibList "${swigRDKitLibList}RDInchiLib;${INCHI_LIBRARIES};") endif(RDK_BUILD_INCHI_SUPPORT) set(swigRDKitLibList "${swigRDKitLibList}" - "GeneralizedSubstruct;Abbreviations;ScaffoldNetwork;MolHash;RGroupDecomposition;SubstructLibrary;TautomerQuery;" + "RascalMCES;GeneralizedSubstruct;Abbreviations;ScaffoldNetwork;MolHash;RGroupDecomposition;SubstructLibrary;TautomerQuery;" "MolEnumerator;" "MolStandardize;FilterCatalog;Catalogs;FMCS;MolDraw2D;FileParsers;SmilesParse;MarvinParser;" "Depictor;SubstructMatch;ChemReactions;Fingerprints;ChemTransforms;" diff --git a/Code/JavaWrappers/RascalMCES.i b/Code/JavaWrappers/RascalMCES.i new file mode 100644 index 000000000..f723fdac9 --- /dev/null +++ b/Code/JavaWrappers/RascalMCES.i @@ -0,0 +1,69 @@ +// +// Copyright (C) 2020 Gareth Jones, Glysade LLC +// +// @@ All Rights Reserved @@ +// This file is part of the RDKit. +// The contents are covered by the terms of the BSD license +// which is included in the file license.txt, found at the root +// of the RDKit source tree. +// + +%include "std_vector.i" + +%{ +#include +#include +#include +#include +%} + + + +%include +%include +%ignore RDKit::RascalMCES::RascalResult::getMcesMol; +%include +%ignore RDKit::RascalMCES::rascalCluster; +%ignore RDKit::RascalMCES::rascalButinaCluster; +%include +// The extra functions in extend_std_vector.i do not play well with RascalResult +%ignore std::vector::equals; +%ignore std::vector::vector(size_type); +%template(RascalResult_Vect) std::vector; +%template(Unsigned_Vect_Vect) std::vector>; + +// The Rascal code uses std::shared_ptr rather than boost::shared_ptr + +%extend RDKit::RascalMCES::RascalResult { + RDKit::ROMol *getMCESMol() { + auto shared_ptr = ($self)->getMcesMol(); + return shared_ptr.get(); + } +} + +%inline %{ + namespace RDKit { + namespace RascalMCES { + class RascalApp { + }; + } + } +%} + + +%extend RDKit::RascalMCES::RascalApp { + static std::vector > RascalCluster(const std::vector >& mols, const RDKit::RascalMCES::RascalClusterOptions& clusterOptions=RascalClusterOptions()) { + std::vector > rascalMolecules; + for (auto molIn: mols) { + rascalMolecules.emplace_back(new RDKit::ROMol(*molIn)); + } + return RDKit::RascalMCES::rascalCluster(rascalMolecules, clusterOptions); + } + static std::vector > RascalButinaCluster(const std::vector >& mols, const RDKit::RascalMCES::RascalClusterOptions& clusterOptions=RascalClusterOptions()) { + std::vector > rascalMolecules; + for (auto molIn: mols) { + rascalMolecules.emplace_back(new RDKit::ROMol(*molIn)); + } + return RDKit::RascalMCES::rascalButinaCluster(rascalMolecules, clusterOptions); + } +} diff --git a/Code/JavaWrappers/csharp_wrapper/GraphMolCSharp.i b/Code/JavaWrappers/csharp_wrapper/GraphMolCSharp.i index 3be40c42a..14ad6a92b 100644 --- a/Code/JavaWrappers/csharp_wrapper/GraphMolCSharp.i +++ b/Code/JavaWrappers/csharp_wrapper/GraphMolCSharp.i @@ -256,6 +256,7 @@ typedef unsigned long long int uintmax_t; %include "../Abbreviations.i" %include "../Streams.i" %include "../GeneralizedSubstruct.i" +%include "../RascalMCES.i" // Create a class to throw various sorts of errors for testing. Required for unit tests in ErrorHandlingTests.java diff --git a/Code/JavaWrappers/csharp_wrapper/RdkitTests/RascalMCESTest.cs b/Code/JavaWrappers/csharp_wrapper/RdkitTests/RascalMCESTest.cs new file mode 100644 index 000000000..35c848bad --- /dev/null +++ b/Code/JavaWrappers/csharp_wrapper/RdkitTests/RascalMCESTest.cs @@ -0,0 +1,143 @@ +// +// Copyright (C) 2020 Gareth Jones, Glysade LLC +// +// @@ All Rights Reserved @@ +// This file is part of the RDKit. +// The contents are covered by the terms of the BSD license +// which is included in the file license.txt, found at the root +// of the RDKit source tree. +// + +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using GraphMolWrap; +using Xunit; + +namespace RdkitTests; + +public class RascalMCESTest +{ + [Fact] + public void TestTestosteroneVsEstradiol() + { + var m1 = RWMol.MolFromSmiles("CC12CCC3C(C1CCC2O)CCC4=CC(=O)CCC34C"); + Assert.NotNull(m1); + var m2 = RWMol.MolFromSmiles("CC12CCC3C(C1CCC2O)CCC4=C3C=CC(=C4)O"); + Assert.NotNull(m2); + + var options = new RascalOptions(); + options.similarityThreshold = 0.6; + var results = RDKFuncs.rascalMCES(m1, m2, options); + Assert.Equal(1, results.Count); + var result = results.First(); + var expectedBondMatches = new List> + { + (0, 0).ToTuple(), + (1, 1).ToTuple(), + (2, 2).ToTuple(), + (3, 3).ToTuple(), + (4, 4).ToTuple(), + (5, 5).ToTuple(), + (6, 6).ToTuple(), + (7, 7).ToTuple(), + (8, 8).ToTuple(), + (9, 9).ToTuple(), + (10, 10).ToTuple(), + (11, 11).ToTuple(), + (12, 12).ToTuple(), + (20, 19).ToTuple(), + (21, 20).ToTuple(), + (22, 21).ToTuple() + }; + var bondMatches = result.getBondMatches().ToList(); + Assert.Equal(16, bondMatches.Count); + for (int i = 0; i < 16; i++) + { + Assert.Equal(expectedBondMatches[i].Item1, bondMatches[i].first); + Assert.Equal(expectedBondMatches[i].Item2, bondMatches[i].second); + } + + Assert.Equal(0.4966, result.getSimilarity(), 4); + var queryMol = RWMol.MolFromSmarts(result.getSmarts()); + Assert.True(m1.hasSubstructMatch(queryMol)); + Assert.True(m2.hasSubstructMatch(queryMol)); + } + + [Fact(Skip = "Works but takes a long time")] + public void TestRascalCluster() + { + var fileName = + Path.Combine(Environment.GetEnvironmentVariable("RDBASE"), + "Code", "GraphMol", "RascalMCES", "data", "chembl_1907596.smi"); + var supplier = new SmilesMolSupplier(fileName, "\t", 1, 0, false); + var molecules = new ROMol_Vect(); + while (!supplier.atEnd()) + { + molecules.Add(supplier.next()); + } + + var clusterOptions = new RascalClusterOptions(); + clusterOptions.similarityCutoff = 0.7; + var clusters = RascalApp.RascalCluster(molecules, clusterOptions); + + Assert.Equal(21, clusters.Count); + var expectedClusterSizes = new[] + { + 342, 71, 64, 33, 23, 11, 10, 6, 6, 5, 5, + 4, 3, 3, 3, 3, 3, 2, 2, 2, 14 + }; + for (int i = 0; i < 21; i++) + { + Assert.Equal(expectedClusterSizes[i], clusters[i].Count); + } + } + + [Fact] + public void TestSmallButina() + { + var fileName = + Path.Combine(Environment.GetEnvironmentVariable("RDBASE"), "Contrib", "Fastcluster", "cdk2.smi"); + var supplier = new SmilesMolSupplier(fileName, "\t", 1, 0, false); + var molecules = new ROMol_Vect(); + while (!supplier.atEnd()) + { + molecules.Add(supplier.next()); + } + + var clusters = RascalApp.RascalButinaCluster(molecules); + + Assert.Equal(29, clusters.Count); + var expectedClusterSizes = new[] + { + 6, 6, 6, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 + }; + for (int i = 0; i < 29; i++) + { + Assert.Equal(expectedClusterSizes[i], clusters[i].Count); + } + } + + [Fact] + public void TestSmall() + { + var fileName = + Path.Combine(Environment.GetEnvironmentVariable("RDBASE"), "Contrib", "Fastcluster", "cdk2.smi"); + var supplier = new SmilesMolSupplier(fileName, "\t", 1, 0, false); + var molecules = new ROMol_Vect(); + while (!supplier.atEnd()) + { + molecules.Add(supplier.next()); + } + + var clusters = RascalApp.RascalCluster(molecules); + Assert.Equal(8, clusters.Count); + var expectedClusterSizes = new[] { 7, 7, 6, 2, 2, 2, 2, 20 }; + for (int i = 0; i < 8; i++) + { + Assert.Equal(expectedClusterSizes[i], clusters[i].Count); + } + } +} \ No newline at end of file diff --git a/Code/JavaWrappers/gmwrapper/CMakeLists.txt b/Code/JavaWrappers/gmwrapper/CMakeLists.txt index 4324dfadd..ef50db449 100644 --- a/Code/JavaWrappers/gmwrapper/CMakeLists.txt +++ b/Code/JavaWrappers/gmwrapper/CMakeLists.txt @@ -303,6 +303,10 @@ ADD_TEST(JavaWrapperTests java -Djava.library.path=${CMAKE_CURRENT_SOURCE_DIR} -cp "${JUNIT_JAR}${PATH_SEP}${CMAKE_JAVA_TEST_OUTDIR}${PATH_SEP}${CMAKE_CURRENT_SOURCE_DIR}/org.RDKit.jar" org.RDKit.WrapperTests) +ADD_TEST(JavaRascalMCES + java -Djava.library.path=${CMAKE_CURRENT_SOURCE_DIR} + -cp "${JUNIT_JAR}${PATH_SEP}${CMAKE_JAVA_TEST_OUTDIR}${PATH_SEP}${CMAKE_CURRENT_SOURCE_DIR}/org.RDKit.jar" + org.RDKit.RascalMCESTest) if(RDK_BUILD_AVALON_SUPPORT) ADD_TEST(JavaAvalonTests diff --git a/Code/JavaWrappers/gmwrapper/GraphMolJava.i b/Code/JavaWrappers/gmwrapper/GraphMolJava.i index 1a5576a74..0a930086f 100644 --- a/Code/JavaWrappers/gmwrapper/GraphMolJava.i +++ b/Code/JavaWrappers/gmwrapper/GraphMolJava.i @@ -225,6 +225,7 @@ typedef unsigned long long int uintmax_t; %include "../Abbreviations.i" %include "../Streams.i" %include "../GeneralizedSubstruct.i" +%include "../RascalMCES.i" // Create a class to throw various sorts of errors for testing. Required for unit tests in ErrorHandlingTests.java #ifdef INCLUDE_ERROR_GENERATOR diff --git a/Code/JavaWrappers/gmwrapper/src-test/org/RDKit/RascalMCESTest.java b/Code/JavaWrappers/gmwrapper/src-test/org/RDKit/RascalMCESTest.java new file mode 100644 index 000000000..eb0c3ecd7 --- /dev/null +++ b/Code/JavaWrappers/gmwrapper/src-test/org/RDKit/RascalMCESTest.java @@ -0,0 +1,83 @@ +/* + * + * + * Copyright (c) 2024, Greg Landrum + * All rights reserved. + * + * @@ All Rights Reserved @@ + * This file is part of the RDKit. + * The contents are covered by the terms of the BSD license + * which is included in the file license.txt, found at the root + * of the RDKit source tree. + */ +package org.RDKit; + +import static org.junit.Assert.*; + +import java.io.*; +import java.util.ArrayList; + +import org.junit.*; + +public class RascalMCESTest extends GraphMolTest { + + private File baseTestPath; + + @Before + public void setUp() { + File base = getRdBase(); + baseTestPath = new File(base, "Contrib" + File.separator + "Fastcluster"+ File.separator + "cdk2.smi"); + } + + @After + public void tearDown() { + } + + @Test + public void test1Rascal() { + ROMol m1 = RWMol.MolFromSmiles("CC12CCC3C(C1CCC2O)CCC4=CC(=O)CCC34C"); + ROMol m2 = RWMol.MolFromSmiles("CC12CCC3C(C1CCC2O)CCC4=C3C=CC(=C4)O"); + RascalOptions options = new RascalOptions(); + options.setSimilarityThreshold(0.6); + RascalResult_Vect res = RDKFuncs.rascalMCES(m1, m2, options); + assertEquals(res.size(),1); + assertEquals(res.get(0).getSmarts(),"CC12CCC(-C(-C1CCC2O)-CC-[#6])-[#6]"); + } + + @Test + public void test2RascalButina() { + SmilesMolSupplier suppl = new SmilesMolSupplier(baseTestPath.getPath(),"\t", 1, 0, false); + ROMol_Vect ms = new ROMol_Vect(); + do { + ms.add(suppl.next()); + } + while (!suppl.atEnd()); + + Unsigned_Vect_Vect res = RascalApp.RascalButinaCluster(ms); + assertEquals(res.size(),29); + assertEquals(res.get(0).size(),6); + assertEquals(res.get(1).size(),6); + + } + + @Test + public void test3RascalCluster() { + SmilesMolSupplier suppl = new SmilesMolSupplier(baseTestPath.getPath(),"\t", 1, 0, false); + ROMol_Vect ms = new ROMol_Vect(); + do { + ms.add(suppl.next()); + } + while (!suppl.atEnd()); + + Unsigned_Vect_Vect res = RascalApp.RascalCluster(ms); + assertEquals(res.size(),8); + assertEquals(res.get(0).size(),7); + assertEquals(res.get(1).size(),7); + + } + + public static void main(String args[]) { + org.junit.runner.JUnitCore.main("org.RDKit.RascalMCESTest"); + } + +}