Files
rdkit/Code/GraphMol/Wrap/test_subset.py
Brian Kelley 70540c2eed Add extract mol fragment api (#8811)
* Create a function to extract some specified atoms from a ROMol as a new ROMol by creating new graph (#8742)

This adds a new api, `RDKit::MolOps::ExtractMolFragment`, to allow efficient
extractions of mol fragments from large mols. Compared to the approach where
we delete "unwanted" atoms/bonds from the input mol, this api is faster for
small mols (about 2x faster) and at least 3x faster for big mols
(was 10x faster for "CCC"*1000).

* clang-format

* review comments

* cleanup

* Consolidate copying subsets of molecules

* Readd missing tests

* Update comment to restart build

* Remove missing test

* Remove debugging comment, fix warnings

* Fix warnings on gcc11

* Add docs

* Make vector<bool> dynamic_bitset<>

* Update copyright

* Add swig wrappers

* Use new designated constructor API

* Fix windows builds

* Change enum values from unsigned int to integer

* Fix unsigned int variable

* Update Code/GraphMol/Wrap/test_subset.py

Co-authored-by: Greg Landrum <greg.landrum@gmail.com>

* Update Code/GraphMol/Subset.cpp

Co-authored-by: Greg Landrum <greg.landrum@gmail.com>

* Update Code/JavaWrappers/gmwrapper/src-test/org/RDKit/ChemTransformsTests.java

Co-authored-by: Greg Landrum <greg.landrum@gmail.com>

* Reponse to review

* Fix documentation

* Remove comments

* Remove unnecessary comments

* Fix one liners

* Change assertion to be clearer (and not one-liners)

* Run clang-format

---------

Co-authored-by: Your Name <you@example.com>
Co-authored-by: Hussein Faara <hussein.faara@schrodinger.com>
Co-authored-by: Brian Kelley <bkelley@glysade.com>
Co-authored-by: Greg Landrum <greg.landrum@gmail.com>
2025-12-09 15:06:29 +01:00

76 lines
2.4 KiB
Python

#
# Copyright (C) 2025 Hussein Faara, Brian Kelley and other RDKit contributors
# All Rights Reserved
#
import doctest
import gc
import gzip
import logging
import os
import sys
import tempfile
import unittest
from io import StringIO
from rdkit import Chem
def check(info, selection):
for i,v in enumerate(selection):
assert (v and i in info) or (i not in info)
class TestCase(unittest.TestCase):
def test_subset(self):
smi = "c1ccccc1CCN"
m = Chem.MolFromSmiles(smi)
N = list(range(6))
sub = Chem.CopyMolSubset(m, N)
self.assertEqual(Chem.MolToSmiles(sub), "c1ccccc1")
info = Chem.SubsetInfo()
sub = Chem.CopyMolSubset(m, N, info)
check(info.atomMapping, [True]*6 + [False]*3)
check(info.bondMapping,
[True, True, True, True, True, False, False, False, True])
atoms = [(0,0),(1,1),(2,2),(3,3),(4,4),(5,5)]
bonds = [(0,0),(1,1),(2,2),(3,3),(4,4),(8,5)]
for src,dst in atoms:
self.assertEqual(info.atomMapping[src], dst)
for src,dst in bonds:
self.assertEqual(info.bondMapping[src], dst)
opts = Chem.SubsetOptions()
opts.method = Chem.SubsetMethod.BONDS;
sub = Chem.CopyMolSubset(m, N, opts)
self.assertEqual(Chem.MolToSmiles(sub), "ccccccC")
sub = Chem.CopyMolSubset(m, N, info, opts)
check(info.atomMapping, [True]*7 + [False]*2)
check(info.bondMapping, [True]*6 + [False]*3)
for i in N:
self.assertEqual(info.atomMapping[i], i)
self.assertEqual(info.bondMapping[i], i)
N = list(range(6,11))
sub = Chem.CopyMolSubset(m, N)
self.assertEqual(Chem.MolToSmiles(sub), "CCN")
info = Chem.SubsetInfo()
sub = Chem.CopyMolSubset(m, N, info)
self.assertEqual(Chem.MolToSmiles(sub), "CCN")
check(info.atomMapping, [False]*6 + [True]*3)
check(info.bondMapping, [False]*6+ [True]*2 + [False])
sub = Chem.CopyMolSubset(m, N, info, opts)
self.assertEqual(Chem.MolToSmiles(sub), "CCN.cc")
check(info.atomMapping, [True, False, False, False, False, True, True, True, True])
check(info.bondMapping, [False, False, False, False, False, False, True, True, True])
m.SetProp("foo", "bar", computed=True)
sub = Chem.CopyMolSubset(m, N)
self.assertTrue(sub.HasProp("foo"))
opts.clearComputedProps = True
sub = Chem.CopyMolSubset(m, N, opts)
self.assertFalse(sub.HasProp("foo"))