C sharp rascal mcs wrapper (#7390)

* Added Wrapper Files

* Addd interface file

* Edited interface file

* Rascal SWIG wrapper finished

* Remove from Java wrapper

* add java wrappers too

* Update GraphMolCSharp.i

* Update Code/JavaWrappers/RascalMCES.i

Co-authored-by: Paolo Tosco <paolo.tosco.mail@gmail.com>

* Update Code/JavaWrappers/RascalMCES.i

Co-authored-by: Paolo Tosco <paolo.tosco.mail@gmail.com>

---------

Co-authored-by: Greg Landrum <greg.landrum@gmail.com>
Co-authored-by: Paolo Tosco <paolo.tosco.mail@gmail.com>
This commit is contained in:
Gareth Jones
2024-05-04 21:52:24 -06:00
committed by GitHub
parent 31f11952e0
commit 7ec4661f84
7 changed files with 302 additions and 1 deletions

View File

@@ -20,7 +20,7 @@ if(RDK_BUILD_INCHI_SUPPORT)
set(swigRDKitLibList "${swigRDKitLibList}RDInchiLib;${INCHI_LIBRARIES};")
endif(RDK_BUILD_INCHI_SUPPORT)
set(swigRDKitLibList "${swigRDKitLibList}"
"GeneralizedSubstruct;Abbreviations;ScaffoldNetwork;MolHash;RGroupDecomposition;SubstructLibrary;TautomerQuery;"
"RascalMCES;GeneralizedSubstruct;Abbreviations;ScaffoldNetwork;MolHash;RGroupDecomposition;SubstructLibrary;TautomerQuery;"
"MolEnumerator;"
"MolStandardize;FilterCatalog;Catalogs;FMCS;MolDraw2D;FileParsers;SmilesParse;MarvinParser;"
"Depictor;SubstructMatch;ChemReactions;Fingerprints;ChemTransforms;"

View File

@@ -0,0 +1,69 @@
//
// Copyright (C) 2020 Gareth Jones, Glysade LLC
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//
%include "std_vector.i"
%{
#include <GraphMol/RascalMCES/RascalClusterOptions.h>
#include <GraphMol/RascalMCES/RascalOptions.h>
#include <GraphMol/RascalMCES/RascalResult.h>
#include <GraphMol/RascalMCES/RascalMCES.h>
%}
%include <GraphMol/RascalMCES/RascalClusterOptions.h>
%include <GraphMol/RascalMCES/RascalOptions.h>
%ignore RDKit::RascalMCES::RascalResult::getMcesMol;
%include <GraphMol/RascalMCES/RascalResult.h>
%ignore RDKit::RascalMCES::rascalCluster;
%ignore RDKit::RascalMCES::rascalButinaCluster;
%include <GraphMol/RascalMCES/RascalMCES.h>
// The extra functions in extend_std_vector.i do not play well with RascalResult
%ignore std::vector<RDKit::RascalMCES::RascalResult>::equals;
%ignore std::vector<RDKit::RascalMCES::RascalResult>::vector(size_type);
%template(RascalResult_Vect) std::vector<RDKit::RascalMCES::RascalResult>;
%template(Unsigned_Vect_Vect) std::vector<std::vector<unsigned int>>;
// The Rascal code uses std::shared_ptr rather than boost::shared_ptr
%extend RDKit::RascalMCES::RascalResult {
RDKit::ROMol *getMCESMol() {
auto shared_ptr = ($self)->getMcesMol();
return shared_ptr.get();
}
}
%inline %{
namespace RDKit {
namespace RascalMCES {
class RascalApp {
};
}
}
%}
%extend RDKit::RascalMCES::RascalApp {
static std::vector<std::vector<unsigned int> > RascalCluster(const std::vector<boost::shared_ptr<RDKit::ROMol> >& mols, const RDKit::RascalMCES::RascalClusterOptions& clusterOptions=RascalClusterOptions()) {
std::vector<std::shared_ptr<RDKit::ROMol> > rascalMolecules;
for (auto molIn: mols) {
rascalMolecules.emplace_back(new RDKit::ROMol(*molIn));
}
return RDKit::RascalMCES::rascalCluster(rascalMolecules, clusterOptions);
}
static std::vector<std::vector<unsigned int> > RascalButinaCluster(const std::vector<boost::shared_ptr<RDKit::ROMol> >& mols, const RDKit::RascalMCES::RascalClusterOptions& clusterOptions=RascalClusterOptions()) {
std::vector<std::shared_ptr<RDKit::ROMol> > rascalMolecules;
for (auto molIn: mols) {
rascalMolecules.emplace_back(new RDKit::ROMol(*molIn));
}
return RDKit::RascalMCES::rascalButinaCluster(rascalMolecules, clusterOptions);
}
}

View File

@@ -256,6 +256,7 @@ typedef unsigned long long int uintmax_t;
%include "../Abbreviations.i"
%include "../Streams.i"
%include "../GeneralizedSubstruct.i"
%include "../RascalMCES.i"
// Create a class to throw various sorts of errors for testing. Required for unit tests in ErrorHandlingTests.java

View File

@@ -0,0 +1,143 @@
//
// Copyright (C) 2020 Gareth Jones, Glysade LLC
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using GraphMolWrap;
using Xunit;
namespace RdkitTests;
public class RascalMCESTest
{
[Fact]
public void TestTestosteroneVsEstradiol()
{
var m1 = RWMol.MolFromSmiles("CC12CCC3C(C1CCC2O)CCC4=CC(=O)CCC34C");
Assert.NotNull(m1);
var m2 = RWMol.MolFromSmiles("CC12CCC3C(C1CCC2O)CCC4=C3C=CC(=C4)O");
Assert.NotNull(m2);
var options = new RascalOptions();
options.similarityThreshold = 0.6;
var results = RDKFuncs.rascalMCES(m1, m2, options);
Assert.Equal(1, results.Count);
var result = results.First();
var expectedBondMatches = new List<Tuple<int, int>>
{
(0, 0).ToTuple(),
(1, 1).ToTuple(),
(2, 2).ToTuple(),
(3, 3).ToTuple(),
(4, 4).ToTuple(),
(5, 5).ToTuple(),
(6, 6).ToTuple(),
(7, 7).ToTuple(),
(8, 8).ToTuple(),
(9, 9).ToTuple(),
(10, 10).ToTuple(),
(11, 11).ToTuple(),
(12, 12).ToTuple(),
(20, 19).ToTuple(),
(21, 20).ToTuple(),
(22, 21).ToTuple()
};
var bondMatches = result.getBondMatches().ToList();
Assert.Equal(16, bondMatches.Count);
for (int i = 0; i < 16; i++)
{
Assert.Equal(expectedBondMatches[i].Item1, bondMatches[i].first);
Assert.Equal(expectedBondMatches[i].Item2, bondMatches[i].second);
}
Assert.Equal(0.4966, result.getSimilarity(), 4);
var queryMol = RWMol.MolFromSmarts(result.getSmarts());
Assert.True(m1.hasSubstructMatch(queryMol));
Assert.True(m2.hasSubstructMatch(queryMol));
}
[Fact(Skip = "Works but takes a long time")]
public void TestRascalCluster()
{
var fileName =
Path.Combine(Environment.GetEnvironmentVariable("RDBASE"),
"Code", "GraphMol", "RascalMCES", "data", "chembl_1907596.smi");
var supplier = new SmilesMolSupplier(fileName, "\t", 1, 0, false);
var molecules = new ROMol_Vect();
while (!supplier.atEnd())
{
molecules.Add(supplier.next());
}
var clusterOptions = new RascalClusterOptions();
clusterOptions.similarityCutoff = 0.7;
var clusters = RascalApp.RascalCluster(molecules, clusterOptions);
Assert.Equal(21, clusters.Count);
var expectedClusterSizes = new[]
{
342, 71, 64, 33, 23, 11, 10, 6, 6, 5, 5,
4, 3, 3, 3, 3, 3, 2, 2, 2, 14
};
for (int i = 0; i < 21; i++)
{
Assert.Equal(expectedClusterSizes[i], clusters[i].Count);
}
}
[Fact]
public void TestSmallButina()
{
var fileName =
Path.Combine(Environment.GetEnvironmentVariable("RDBASE"), "Contrib", "Fastcluster", "cdk2.smi");
var supplier = new SmilesMolSupplier(fileName, "\t", 1, 0, false);
var molecules = new ROMol_Vect();
while (!supplier.atEnd())
{
molecules.Add(supplier.next());
}
var clusters = RascalApp.RascalButinaCluster(molecules);
Assert.Equal(29, clusters.Count);
var expectedClusterSizes = new[]
{
6, 6, 6, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
};
for (int i = 0; i < 29; i++)
{
Assert.Equal(expectedClusterSizes[i], clusters[i].Count);
}
}
[Fact]
public void TestSmall()
{
var fileName =
Path.Combine(Environment.GetEnvironmentVariable("RDBASE"), "Contrib", "Fastcluster", "cdk2.smi");
var supplier = new SmilesMolSupplier(fileName, "\t", 1, 0, false);
var molecules = new ROMol_Vect();
while (!supplier.atEnd())
{
molecules.Add(supplier.next());
}
var clusters = RascalApp.RascalCluster(molecules);
Assert.Equal(8, clusters.Count);
var expectedClusterSizes = new[] { 7, 7, 6, 2, 2, 2, 2, 20 };
for (int i = 0; i < 8; i++)
{
Assert.Equal(expectedClusterSizes[i], clusters[i].Count);
}
}
}

View File

@@ -303,6 +303,10 @@ ADD_TEST(JavaWrapperTests
java -Djava.library.path=${CMAKE_CURRENT_SOURCE_DIR}
-cp "${JUNIT_JAR}${PATH_SEP}${CMAKE_JAVA_TEST_OUTDIR}${PATH_SEP}${CMAKE_CURRENT_SOURCE_DIR}/org.RDKit.jar"
org.RDKit.WrapperTests)
ADD_TEST(JavaRascalMCES
java -Djava.library.path=${CMAKE_CURRENT_SOURCE_DIR}
-cp "${JUNIT_JAR}${PATH_SEP}${CMAKE_JAVA_TEST_OUTDIR}${PATH_SEP}${CMAKE_CURRENT_SOURCE_DIR}/org.RDKit.jar"
org.RDKit.RascalMCESTest)
if(RDK_BUILD_AVALON_SUPPORT)
ADD_TEST(JavaAvalonTests

View File

@@ -225,6 +225,7 @@ typedef unsigned long long int uintmax_t;
%include "../Abbreviations.i"
%include "../Streams.i"
%include "../GeneralizedSubstruct.i"
%include "../RascalMCES.i"
// Create a class to throw various sorts of errors for testing. Required for unit tests in ErrorHandlingTests.java
#ifdef INCLUDE_ERROR_GENERATOR

View File

@@ -0,0 +1,83 @@
/*
*
*
* Copyright (c) 2024, Greg Landrum
* All rights reserved.
*
* @@ All Rights Reserved @@
* This file is part of the RDKit.
* The contents are covered by the terms of the BSD license
* which is included in the file license.txt, found at the root
* of the RDKit source tree.
*/
package org.RDKit;
import static org.junit.Assert.*;
import java.io.*;
import java.util.ArrayList;
import org.junit.*;
public class RascalMCESTest extends GraphMolTest {
private File baseTestPath;
@Before
public void setUp() {
File base = getRdBase();
baseTestPath = new File(base, "Contrib" + File.separator + "Fastcluster"+ File.separator + "cdk2.smi");
}
@After
public void tearDown() {
}
@Test
public void test1Rascal() {
ROMol m1 = RWMol.MolFromSmiles("CC12CCC3C(C1CCC2O)CCC4=CC(=O)CCC34C");
ROMol m2 = RWMol.MolFromSmiles("CC12CCC3C(C1CCC2O)CCC4=C3C=CC(=C4)O");
RascalOptions options = new RascalOptions();
options.setSimilarityThreshold(0.6);
RascalResult_Vect res = RDKFuncs.rascalMCES(m1, m2, options);
assertEquals(res.size(),1);
assertEquals(res.get(0).getSmarts(),"CC12CCC(-C(-C1CCC2O)-CC-[#6])-[#6]");
}
@Test
public void test2RascalButina() {
SmilesMolSupplier suppl = new SmilesMolSupplier(baseTestPath.getPath(),"\t", 1, 0, false);
ROMol_Vect ms = new ROMol_Vect();
do {
ms.add(suppl.next());
}
while (!suppl.atEnd());
Unsigned_Vect_Vect res = RascalApp.RascalButinaCluster(ms);
assertEquals(res.size(),29);
assertEquals(res.get(0).size(),6);
assertEquals(res.get(1).size(),6);
}
@Test
public void test3RascalCluster() {
SmilesMolSupplier suppl = new SmilesMolSupplier(baseTestPath.getPath(),"\t", 1, 0, false);
ROMol_Vect ms = new ROMol_Vect();
do {
ms.add(suppl.next());
}
while (!suppl.atEnd());
Unsigned_Vect_Vect res = RascalApp.RascalCluster(ms);
assertEquals(res.size(),8);
assertEquals(res.get(0).size(),7);
assertEquals(res.get(1).size(),7);
}
public static void main(String args[]) {
org.junit.runner.JUnitCore.main("org.RDKit.RascalMCESTest");
}
}