Add extract mol fragment api (#8811)

* Create a function to extract some specified atoms from a ROMol as a new ROMol by creating new graph (#8742)

This adds a new api, `RDKit::MolOps::ExtractMolFragment`, to allow efficient
extractions of mol fragments from large mols. Compared to the approach where
we delete "unwanted" atoms/bonds from the input mol, this api is faster for
small mols (about 2x faster) and at least 3x faster for big mols
(was 10x faster for "CCC"*1000).

* clang-format

* review comments

* cleanup

* Consolidate copying subsets of molecules

* Readd missing tests

* Update comment to restart build

* Remove missing test

* Remove debugging comment, fix warnings

* Fix warnings on gcc11

* Add docs

* Make vector<bool> dynamic_bitset<>

* Update copyright

* Add swig wrappers

* Use new designated constructor API

* Fix windows builds

* Change enum values from unsigned int to integer

* Fix unsigned int variable

* Update Code/GraphMol/Wrap/test_subset.py

Co-authored-by: Greg Landrum <greg.landrum@gmail.com>

* Update Code/GraphMol/Subset.cpp

Co-authored-by: Greg Landrum <greg.landrum@gmail.com>

* Update Code/JavaWrappers/gmwrapper/src-test/org/RDKit/ChemTransformsTests.java

Co-authored-by: Greg Landrum <greg.landrum@gmail.com>

* Reponse to review

* Fix documentation

* Remove comments

* Remove unnecessary comments

* Fix one liners

* Change assertion to be clearer (and not one-liners)

* Run clang-format

---------

Co-authored-by: Your Name <you@example.com>
Co-authored-by: Hussein Faara <hussein.faara@schrodinger.com>
Co-authored-by: Brian Kelley <bkelley@glysade.com>
Co-authored-by: Greg Landrum <greg.landrum@gmail.com>
This commit is contained in:
Brian Kelley
2025-12-09 09:06:29 -05:00
committed by Greg Landrum
parent c86a691a4d
commit 48e1fc2b8d
13 changed files with 1006 additions and 106 deletions

View File

@@ -0,0 +1,73 @@
//
// Copyright (C) 2025 and other RDKit contributors
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//
%{
#include <GraphMol/Subset.h>
%}
%ignore RDKit::copyMolSubset;
%include <GraphMol/Subset.h>
%{
RDKit::ROMol *copyMolSubsetHelper(const RDKit::ROMol& mol,
const std::vector<unsigned int> &atoms,
const std::vector<unsigned int> &bonds,
const RDKit::SubsetOptions &options=RDKit::SubsetOptions()
)
{
return copyMolSubset(mol, atoms, bonds, options).release();
}
RDKit::ROMol *copyMolSubsetHelper(const RDKit::ROMol& mol,
const std::vector<unsigned int> &atoms,
const std::vector<unsigned int> &bonds,
RDKit::SubsetInfo &subsetInfo,
const RDKit::SubsetOptions &options=RDKit::SubsetOptions()
) {
return copyMolSubset(mol, atoms, bonds, subsetInfo, options).release();
}
RDKit::ROMol *copyMolSubsetHelper(const RDKit::ROMol& mol,
const std::vector<unsigned int>& path,
const RDKit::SubsetOptions &options = RDKit::SubsetOptions()) {
return copyMolSubset(mol, path, options).release();
}
RDKit::ROMol *copyMolSubsetHelper(const RDKit::ROMol& mol,
const std::vector<unsigned int>& path,
RDKit::SubsetInfo &subsetInfo,
const RDKit::SubsetOptions &options = RDKit::SubsetOptions()) {
return copyMolSubset(mol, path, subsetInfo, options).release();
}
%}
%rename("copyMolSubset") copyMolSubsetHelper;
RDKit::ROMol *copyMolSubsetHelper(const RDKit::ROMol& mol,
const std::vector<unsigned int> &atoms,
const std::vector<unsigned int> &bonds,
const RDKit::SubsetOptions &options=RDKit::SubsetOptions()
);
RDKit::ROMol *copyMolSubsetHelper(const RDKit::ROMol& mol,
const std::vector<unsigned int> &atoms,
const std::vector<unsigned int> &bonds,
RDKit::SubsetInfo &subsetInfo,
const RDKit::SubsetOptions &options=SRDKit::ubsetOptions()
);
RDKit::ROMol *copyMolSubsetHelper(const RDKit::ROMol& mol,
const std::vector<unsigned int>& path,
const RDKit::SubsetOptions &options = SubsetOptions());
RDKit::ROMol *copyMolSubsetHelper(const RDKit::ROMol& mol,
const std::vector<unsigned int>& path,
RDKit::SubsetInfo &subsetInfo,
const RDKit::SubsetOptions &options = RDKit::SubsetOptions());

View File

@@ -227,6 +227,7 @@ typedef unsigned long long int uintmax_t;
%include "../GeneralizedSubstruct.i"
%include "../RascalMCES.i"
%include "../Queries.i"
%include "../Subset.i"
// Create a class to throw various sorts of errors for testing. Required for unit tests in ErrorHandlingTests.java
#ifdef INCLUDE_ERROR_GENERATOR

View File

@@ -70,6 +70,19 @@ public class ChemTransformsTests extends GraphMolTest {
}
@Test
public void testSubset() {
ROMol mol = RWMol.MolFromSmiles("c1ccccc1CCN");
UInt_Vect vect = new UInt_Vect(mol.getNumAtoms());
for(int i=0;i<6;++i) {
vect.add(i);
}
// atom copy
ROMol sub = RDKFuncs.copyMolSubset(mol, vect);
assertEquals("c1ccccc1", sub.MolToSmiles());
}
public static void main(String args[]) {
org.junit.runner.JUnitCore.main("org.RDKit.ChemTransformsTests");
}