From 8e9adcd467b82a65249bc298c795e416deb0cc64 Mon Sep 17 00:00:00 2001 From: Gareth Jones Date: Thu, 14 May 2026 01:07:56 -0600 Subject: [PATCH] Adds some features to the C# SWIG wrappers (#9274) --- Code/GraphMol/MolStandardize/Tautomer.h | 2 + Code/JavaWrappers/ROMol.i | 3 + Code/JavaWrappers/Tautomer.i | 18 +++ .../csharp_wrapper/GraphMolCSharp.i | 3 + .../RdkitTests/TestMolStandardize.cs | 98 ++++++++++++- .../csharp_wrapper/RdkitTests/TestTautomer.cs | 134 ++++++++++++++++++ 6 files changed, 257 insertions(+), 1 deletion(-) create mode 100644 Code/JavaWrappers/Tautomer.i create mode 100644 Code/JavaWrappers/csharp_wrapper/RdkitTests/TestTautomer.cs diff --git a/Code/GraphMol/MolStandardize/Tautomer.h b/Code/GraphMol/MolStandardize/Tautomer.h index 3fd60deca..d2d7cef2c 100644 --- a/Code/GraphMol/MolStandardize/Tautomer.h +++ b/Code/GraphMol/MolStandardize/Tautomer.h @@ -449,6 +449,7 @@ class RDKIT_MOLSTANDARDIZE_EXPORT TautomerEnumerator { /// When Iterable is TautomerEnumeratorResult we use the other non-templated /// overload for efficiency (TautomerEnumeratorResult already has SMILES so no /// need to recompute them) + /// caller owns the pointer template ::value, @@ -504,6 +505,7 @@ class RDKIT_MOLSTANDARDIZE_EXPORT TautomerEnumerator { */ /// When \p scoreFunc is empty (default), an optimized scorer is created /// that pre-filters substructure patterns once for the input molecule. + /// caller owns the pointer ROMol *canonicalize( const ROMol &mol, boost::function scoreFunc = {}) const; diff --git a/Code/JavaWrappers/ROMol.i b/Code/JavaWrappers/ROMol.i index 242017926..7211fc94b 100644 --- a/Code/JavaWrappers/ROMol.i +++ b/Code/JavaWrappers/ROMol.i @@ -77,6 +77,7 @@ %template(ROMol_Vect) std::vector< boost::shared_ptr >; %template(ROMol_Vect_Vect) std::vector< std::vector< boost::shared_ptr > >; %template(Atom_Vect) std::vector; +%template(Const_Bond_Vect) std::vector; %template(StereoGroup_Vect) std::vector; %template(UChar_Vect) std::vector; @@ -210,6 +211,8 @@ %newobject getBonds; %newobject getAtomNeighbors; %newobject getAtomBonds; +%newobject getDistanceMat; +%newobject getAdjacencyMatrix; %{ #ifdef RDK_BUILD_COORDGEN_SUPPORT diff --git a/Code/JavaWrappers/Tautomer.i b/Code/JavaWrappers/Tautomer.i new file mode 100644 index 000000000..3924a8329 --- /dev/null +++ b/Code/JavaWrappers/Tautomer.i @@ -0,0 +1,18 @@ +%include "std_vector.i" + +%{ +#include +#include +%} + +%shared_ptr(RDKit::ROMol) +%template(Sizet_Vect) std::vector; +%ignore RDKit::MolStandardize::TautomerScoringFunctions::makeOptimizedScorer; +%shared_ptr(RDKit::MolStandardize::TautomerEnumerator) +%newobject RDKit::MolStandardize::tautomerEnumeratorFromParams; +%newobject RDKit::MolStandardize::getV1TautomerEnumerator; +%newobject RDKit::MolStandardize::TautomerEnumerator::pickCanonical; +%newobject RDKit::MolStandardize::TautomerEnumerator::canonicalize; + +%include + diff --git a/Code/JavaWrappers/csharp_wrapper/GraphMolCSharp.i b/Code/JavaWrappers/csharp_wrapper/GraphMolCSharp.i index c90cf0f0e..4c5770960 100644 --- a/Code/JavaWrappers/csharp_wrapper/GraphMolCSharp.i +++ b/Code/JavaWrappers/csharp_wrapper/GraphMolCSharp.i @@ -250,6 +250,7 @@ typedef unsigned long long int uintmax_t; %include "../SubstructLibrary.i" %include "../ScaffoldNetwork.i" %include "../TautomerQuery.i" +%include "../Tautomer.i" %include "../MolEnumerator.i" %include "../SubstanceGroup.i" %include "../MolHash.i" @@ -288,6 +289,7 @@ typedef unsigned long long int uintmax_t; %template(Double_Pair) std::pair; %template(UInt_Pair) std::pair; %template(Long_Pair) std::pair; +%template(String_Pair) std::pair; /* map */ %template(String_String_Map) std::map; @@ -300,6 +302,7 @@ typedef unsigned long long int uintmax_t; %template(UInt_Pair_Vect) std::vector >; %template(Match_Vect) std::vector >; %template(Long_Pair_Vect) std::vector >; +%template(String_String_Pair_Vect) std::vector >; /* vector vector */ %template(Int_Vect_Vect) std::vector >; diff --git a/Code/JavaWrappers/csharp_wrapper/RdkitTests/TestMolStandardize.cs b/Code/JavaWrappers/csharp_wrapper/RdkitTests/TestMolStandardize.cs index 675aa55f1..fc3c4d8c3 100644 --- a/Code/JavaWrappers/csharp_wrapper/RdkitTests/TestMolStandardize.cs +++ b/Code/JavaWrappers/csharp_wrapper/RdkitTests/TestMolStandardize.cs @@ -1,4 +1,5 @@ -using GraphMolWrap; +using System.Collections.Generic; +using GraphMolWrap; using Xunit; @@ -43,5 +44,100 @@ namespace RdkitTests RDKFuncs.canonicalTautomerInPlace(mol); Assert.Equal("C[PH](C)=O", mol.MolToSmiles()); } + + // Test that standardization rules for a guanidinium group work as expected when matching an Arginine query to a set of peptides + [Fact] + public void TestRMatchNormalize() + { + var cleanupParameters = new CleanupParameters(); + cleanupParameters.doCanonical = true; + var replacements = cleanupParameters.normalizationData; + var replacement = new String_Pair("Standardize ARG", + "[#6:1][#6:2][#6:3][#7H1:4]-[#6X3:5](=[#7:6])-[#7:7]>>[#6:1][#6:2][#6:3][#7H0:4]=[#6X3:5](-[#7:6])-[#7:7]"); + replacements.Add(replacement); + cleanupParameters.normalizationData = replacements; + // Record for R + var molFile = @" + ChemDraw06032117432D + + 0 0 0 0 0 0 V3000 +M V30 BEGIN CTAB +M V30 COUNTS 14 13 0 0 1 +M V30 BEGIN ATOM +M V30 1 C 1.111765 -1.636387 0.000000 0 +M V30 2 C 0.389632 -1.237272 0.000000 0 +M V30 3 C 0.374442 -0.412275 0.000000 0 +M V30 4 C -0.347639 -0.013245 0.000000 0 +M V30 5 C -0.362863 0.811612 0.000000 0 +M V30 6 N -0.273074 -1.636387 0.000000 0 +M V30 7 O 1.111765 -2.461396 0.000000 0 +M V30 8 N -1.084995 1.210729 0.000000 0 +M V30 9 C -1.100270 2.035672 0.000000 0 +M V30 10 N -0.393589 2.461396 0.000000 0 +M V30 11 N -1.822349 2.434701 0.000000 0 +M V30 12 R1 -0.995042 -1.237157 0.000000 0 +M V30 13 R2 1.822349 -1.217228 0.000000 0 +M V30 14 R3 0.320882 2.048896 0.000000 0 +M V30 END ATOM +M V30 BEGIN BOND +M V30 1 1 6 12 +M V30 2 1 1 13 +M V30 3 2 7 1 +M V30 4 1 1 2 +M V30 5 1 2 6 +M V30 6 1 2 3 CFG=1 +M V30 7 1 3 4 +M V30 8 1 4 5 +M V30 9 1 5 8 +M V30 10 2 8 9 +M V30 11 1 9 11 +M V30 12 1 9 10 +M V30 13 1 10 14 +M V30 END BOND +M V30 BEGIN COLLECTION +M V30 MDLV30/STEABS ATOMS=(1 2) +M V30 END COLLECTION +M V30 END CTAB +M END +"; + var q = RWMol.MolFromMolBlock(molFile); + q = RDKFuncs.normalize(q, cleanupParameters); + RDKFuncs.addHs(q); + foreach (var atom in q.getAtoms()) + { + if (atom.getAtomicNum() != 0) continue; + atom.setAtomMapNum(0); + atom.setIsotope(0); + atom.clearProp("_MolFileRLabel"); + } + + var queryParameters = AdjustQueryParameters.noAdjustments(); + queryParameters.makeDummiesQueries = true; + RDKFuncs.adjustQueryProperties(q, queryParameters); + var matchParameters = new SubstructMatchParameters + { + useChirality = true, specifiedStereoQueryMatchesUnspecified = true, useEnhancedStereo = true + }; + List smiles = new(); + smiles.Add( + "O=C(N)CCC1NC(=O)C2N(C(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCCNC(=N)N)CO)C(O)C)CC(C)C)CCSC)CCC2"); + smiles.Add( + "C[C@H](N[H])C(=O)N[C@H](C(=O)N[C@@H]1C(=O)N([C@@H](C)C(=O)N[C@H](C)C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C=O)CC(O)=O)CSSC1)[C@@H](C)O)CCCCN[H])[C@@H](C)C(=O)O)CCCNC(N)=N"); + smiles.Add( + "CSCC[C@@H]1NC(=O)[C@H](CC(C)C)NC(=O)[C@H]([C@@H](C)O)NC(=O)[C@H](CO)NC(=O)[C@H](CCCN=C(N)N)NC(=O)[C@H](CCC(N)=O)NC(=O)[C@@H]2CCCN2C1=O"); + smiles.Add( + "C[C@H](N)C(=O)N[C@@H](CCCN=C(N)N)C(=O)N[C@H]1CSSC[C@@H](C(=O)N[C@@H](CC(=O)O)C(=O)N[C@@H](C)C(=O)O)NC(=O)[C@H]([C@@H](C)O)NC(=O)[C@H](CCCCN)NC(=O)[C@H](C)NC(=O)[C@H](C)NC1=O"); + foreach (var smi in smiles) + { + var mol = RWMol.MolFromSmiles(smi); + + mol = RDKFuncs.normalize(mol, cleanupParameters); + RDKFuncs.addHs(mol); + + var normalizedHits = mol.getSubstructMatches(q, matchParameters); + Assert.Equal(1, normalizedHits.Count); + } + } } + } \ No newline at end of file diff --git a/Code/JavaWrappers/csharp_wrapper/RdkitTests/TestTautomer.cs b/Code/JavaWrappers/csharp_wrapper/RdkitTests/TestTautomer.cs new file mode 100644 index 000000000..963e3837f --- /dev/null +++ b/Code/JavaWrappers/csharp_wrapper/RdkitTests/TestTautomer.cs @@ -0,0 +1,134 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using GraphMolWrap; +using Xunit; + +namespace RdkitTests +{ + public class TestTautomer + { + [Fact] + public void TestTautomerEnumeration() + { + var smiles1 = "CCCNC(=N)N"; + var mol1 = RDKFuncs.SmilesToMol(smiles1); + var smiles2 = "CCCN=C(N)N"; + var mol2 = RDKFuncs.SmilesToMol(smiles2); + Assert.Equal(0, mol1.getSubstructMatch(mol2).Count); + + var enumerator = new TautomerEnumerator(); + var tautomers1 = enumerator.enumerate(mol1); + var tautomers2 = enumerator.enumerate(mol2); + Assert.Equal(2, Convert.ToInt32(tautomers1.size())); + Assert.Equal(2, Convert.ToInt32(tautomers2.size())); + + var mol1Matches = 0; + var mol2Matches = 0; + for (uint i = 0; i < 2; i++) + { + var mol1Tautomer = tautomers1.at(i); + if (mol2.getSubstructMatch(mol1Tautomer).Count > 0) mol2Matches++; + var mol2Tautomer = tautomers2.at(i); + if (mol1.getSubstructMatch(mol2Tautomer).Count > 0) mol1Matches++; + } + + Assert.Equal(1, mol1Matches); + Assert.Equal(1, mol2Matches); + } + + [Fact] + public void TestTautomerCanonicalization() + { + var smi = + "O=C(N)CCC1NC(=O)C2N(C(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCCNC(=N)N)CO)C(O)C)CC(C)C)CCSC)CCC2"; + var mol = RWMol.MolFromSmiles(smi); + // Record for R + var molFile = @" + ChemDraw06032117432D + + 0 0 0 0 0 0 V3000 +M V30 BEGIN CTAB +M V30 COUNTS 14 13 0 0 1 +M V30 BEGIN ATOM +M V30 1 C 1.111765 -1.636387 0.000000 0 +M V30 2 C 0.389632 -1.237272 0.000000 0 +M V30 3 C 0.374442 -0.412275 0.000000 0 +M V30 4 C -0.347639 -0.013245 0.000000 0 +M V30 5 C -0.362863 0.811612 0.000000 0 +M V30 6 N -0.273074 -1.636387 0.000000 0 +M V30 7 O 1.111765 -2.461396 0.000000 0 +M V30 8 N -1.084995 1.210729 0.000000 0 +M V30 9 C -1.100270 2.035672 0.000000 0 +M V30 10 N -0.393589 2.461396 0.000000 0 +M V30 11 N -1.822349 2.434701 0.000000 0 +M V30 12 R1 -0.995042 -1.237157 0.000000 0 +M V30 13 R2 1.822349 -1.217228 0.000000 0 +M V30 14 R3 0.320882 2.048896 0.000000 0 +M V30 END ATOM +M V30 BEGIN BOND +M V30 1 1 6 12 +M V30 2 1 1 13 +M V30 3 2 7 1 +M V30 4 1 1 2 +M V30 5 1 2 6 +M V30 6 1 2 3 CFG=1 +M V30 7 1 3 4 +M V30 8 1 4 5 +M V30 9 1 5 8 +M V30 10 2 8 9 +M V30 11 1 9 11 +M V30 12 1 9 10 +M V30 13 1 10 14 +M V30 END BOND +M V30 BEGIN COLLECTION +M V30 MDLV30/STEABS ATOMS=(1 2) +M V30 END COLLECTION +M V30 END CTAB +M END +"; + + var query = RWMol.MolFromMolBlock(molFile); + foreach (var atom in query.getAtoms()) + { + if (atom.getAtomicNum() != 0) + { + continue; + } + + atom.setIsotope(0U); + atom.setAtomMapNum(0); + } + + var queryParameters = AdjustQueryParameters.noAdjustments(); + queryParameters.makeDummiesQueries = true; + var matchParameters = new SubstructMatchParameters + { + useChirality = true, specifiedStereoQueryMatchesUnspecified = true, useEnhancedStereo = true + }; + + var cleanupParameters = new CleanupParameters(); + cleanupParameters.tautomerRemoveBondStereo = false; + cleanupParameters.tautomerRemoveIsotopicHs = false; + cleanupParameters.tautomerReassignStereo = false; + cleanupParameters.tautomerRemoveSp3Stereo = false; + var canonMol = RDKFuncs.canonicalTautomer(mol, cleanupParameters); + RDKFuncs.addHs(canonMol); + var canonQuery = RDKFuncs.canonicalTautomer(query, cleanupParameters); + RDKFuncs.addHs(canonQuery); + RDKFuncs.adjustQueryProperties(canonQuery, queryParameters); + var canonMatches = canonMol.getSubstructMatches(canonQuery, matchParameters); + var numberCanonHits = canonMatches.Count; + Assert.Equal(1, numberCanonHits); + + RDKFuncs.addHs(mol); + RDKFuncs.addHs(query); + RDKFuncs.adjustQueryProperties(query, queryParameters); + var matches = mol.getSubstructMatches(query, matchParameters); + var numberHits = matches.Count; + Assert.Equal(0, numberHits); + } + } +} \ No newline at end of file