Allow Multiple Core Hits in the Same Molecule in RGroupDecomposition (#8813)

* Allow the same core to match more than once in a molecule. * Update annotation. * Changes after review. --------- Co-authored-by: David Cosgrove <david@cozchemix.co.uk>
2026-06-03 21:44:30 +08:00 · 2025-10-29 20:50:19 +00:00
parent e6f37dc498
commit 53203079c1
4 changed files with 302 additions and 185 deletions
--- a/Code/GraphMol/RGroupDecomposition/RGroupDecomp.cpp
+++ b/Code/GraphMol/RGroupDecomposition/RGroupDecomp.cpp
@@ -346,6 +346,36 @@ int RGroupDecomposition::getMatchingCoreInternal(
  return core_idx;
 }

+namespace {
+// Take the matches, all from the same molecule and split them so that
+// different atom sets are separated out.  So that if a core hits
+// more than once in the molecule, both sets of R Groups will be
+// returned.
+std::vector<std::vector<MatchVectType>> splitNonUniqueMatches(
+    const std::vector<MatchVectType> &tmatches, unsigned int nAtoms) {
+  std::vector<std::vector<MatchVectType>> outMatches;
+  std::vector<boost::dynamic_bitset<>> atomSets;
+  for (const auto &match : tmatches) {
+    boost::dynamic_bitset<> atomSet(nAtoms);
+    for (const auto &mp : match) {
+      atomSet.set(mp.second);
+    }
+    if (std::find(atomSets.begin(), atomSets.end(), atomSet) ==
+        atomSets.end()) {
+      atomSets.push_back(atomSet);
+      outMatches.push_back(std::vector<MatchVectType>(1, match));
+    } else {
+      for (size_t i = 0; i < atomSets.size(); ++i) {
+        if (atomSet == atomSets[i]) {
+          outMatches[i].push_back(match);
+        }
+      }
+    }
+  }
+  return outMatches;
+}
+}  // namespace
+
 int RGroupDecomposition::add(const ROMol &inmol) {
  RWMOL_SPTR mol(new RWMol(inmol));
  const RCore *rcore;
@@ -371,6 +401,7 @@ int RGroupDecomposition::add(const ROMol &inmol) {
      }
    }
  }
+
  // mark any wildcards in input molecule:
  for (auto &atom : mol->atoms()) {
    if (atom->getAtomicNum() == 0) {
@@ -397,180 +428,192 @@ int RGroupDecomposition::add(const ROMol &inmol) {
  std::vector<RGroupMatch> potentialMatches;
  constexpr size_t MAX_PERMUTATIONS = 100000;

-  std::unique_ptr<ROMol> tMol;
-  for (const auto &tmatche : tmatches) {
-    const bool replaceDummies = false;
-    const bool labelByIndex = true;
-    const bool requireDummyMatch = false;
-    // TODO see if we need replaceCoreAtomsWithMolMatches or can just use
-    // rcore->core
-    auto coreCopy = rcore->replaceCoreAtomsWithMolMatches(*mol, tmatche);
-    tMol.reset(replaceCore(*mol, *coreCopy, tmatche, replaceDummies,
-                           labelByIndex, requireDummyMatch));
+  std::vector<std::vector<MatchVectType>> nonUniqueMatches;
+  if (data->params.allowMultipleCoresInSameMol) {
+    nonUniqueMatches = splitNonUniqueMatches(tmatches, mol->getNumAtoms());
+  } else {
+    nonUniqueMatches.push_back(tmatches);
+  }
+
+  for (const auto &splitMatch : nonUniqueMatches) {
+    std::unique_ptr<ROMol> tMol;
+    for (const auto &tmatche : splitMatch) {
+      const bool replaceDummies = false;
+      const bool labelByIndex = true;
+      const bool requireDummyMatch = false;
+      // TODO see if we need replaceCoreAtomsWithMolMatches or can just use
+      // rcore->core
+      auto coreCopy = rcore->replaceCoreAtomsWithMolMatches(*mol, tmatche);
+      tMol.reset(replaceCore(*mol, *coreCopy, tmatche, replaceDummies,
+                             labelByIndex, requireDummyMatch));
 #ifdef VERBOSE
-    std::cerr << "Core Match core_idx " << core_idx << " idx "
-              << data->matches.size() << ": " << MolToSmarts(*coreCopy)
-              << std::endl;
+      std::cerr << "Core Match core_idx " << core_idx << " idx "
+                << data->matches.size() << ": " << MolToSmarts(*coreCopy)
+                << std::endl;
 #endif
-    if (tMol) {
+      if (tMol) {
 #ifdef VERBOSE
-      std::cerr << "All Fragments " << MolToSmiles(*tMol) << std::endl;
+        std::cerr << "All Fragments " << MolToSmiles(*tMol) << std::endl;
 #endif
-      R_DECOMP match;
-      // rlabel rgroups
-      MOL_SPTR_VECT fragments = MolOps::getMolFrags(*tMol, false);
-      std::set<int> coreAtomAnyMatched;
-      // get the sidechains
-      for (size_t i = 0; i < fragments.size(); ++i) {
-        const auto &newMol = fragments[i];
-        std::vector<int> rlabelsOnSideChain;
-        newMol->setProp<int>("core", core_idx);
-        newMol->setProp<int>("idx", data->matches.size());
-        newMol->setProp<int>("frag_idx", i);
+        R_DECOMP match;
+        // rlabel rgroups
+        MOL_SPTR_VECT fragments = MolOps::getMolFrags(*tMol, false);
+        std::set<int> coreAtomAnyMatched;
+        // get the sidechains
+        for (size_t i = 0; i < fragments.size(); ++i) {
+          const auto &newMol = fragments[i];
+          std::vector<int> rlabelsOnSideChain;
+          newMol->setProp<int>("core", core_idx);
+          newMol->setProp<int>("idx", data->matches.size());
+          newMol->setProp<int>("frag_idx", i);
 #ifdef VERBOSE
-        std::cerr << "Fragment " << MolToSmiles(*newMol) << std::endl;
+          std::cerr << "Fragment " << MolToSmiles(*newMol) << std::endl;
 #endif
-        for (auto sideChainAtom : newMol->atoms()) {
-          if (sideChainAtom->getAtomicNum() != 0) {
-            // we are only interested in sidechain R group atoms
-            continue;
+          for (auto sideChainAtom : newMol->atoms()) {
+            if (sideChainAtom->getAtomicNum() != 0) {
+              // we are only interested in sidechain R group atoms
+              continue;
+            }
+            if (!sideChainAtom->hasProp(_rgroupInputDummy)) {
+              // this is the index of the core atom that the R group
+              // atom is attached to
+              unsigned int coreAtomIndex = sideChainAtom->getIsotope();
+              auto coreAtom = rcore->core->getAtomWithIdx(coreAtomIndex);
+              coreAtomAnyMatched.insert(coreAtomIndex);
+              int rlabel;
+              if (coreAtom->getPropIfPresent(RLABEL, rlabel)) {
+                std::vector<int> rlabelsOnSideChainAtom;
+                sideChainAtom->getPropIfPresent(SIDECHAIN_RLABELS,
+                                                rlabelsOnSideChainAtom);
+                rlabelsOnSideChainAtom.push_back(rlabel);
+                sideChainAtom->setProp(SIDECHAIN_RLABELS,
+                                       rlabelsOnSideChainAtom);
+                data->labels.insert(rlabel);  // keep track of all labels used
+                rlabelsOnSideChain.push_back(rlabel);
+                if (const auto [bondIdx, end] =
+                        newMol->getAtomBonds(sideChainAtom);
+                    bondIdx != end) {
+                  auto connectingBond = (*newMol)[*bondIdx];
+                  if (connectingBond->getStereo() >
+                      Bond::BondStereo::STEREOANY) {
+                    // TODO: how to handle bond stereo on rgroups connected to
+                    // core by stereo double bonds
+                    connectingBond->setStereo(Bond::BondStereo::STEREOANY);
+                  }
+                }
+              }
+            } else {
+              // restore input wildcard
+              sideChainAtom->clearProp(_rgroupInputDummy);
+            }
          }
-          if (!sideChainAtom->hasProp(_rgroupInputDummy)) {
-            // this is the index of the core atom that the R group
-            // atom is attached to
-            unsigned int coreAtomIndex = sideChainAtom->getIsotope();
-            auto coreAtom = rcore->core->getAtomWithIdx(coreAtomIndex);
-            coreAtomAnyMatched.insert(coreAtomIndex);
-            int rlabel;
-            if (coreAtom->getPropIfPresent(RLABEL, rlabel)) {
-              std::vector<int> rlabelsOnSideChainAtom;
-              sideChainAtom->getPropIfPresent(SIDECHAIN_RLABELS,
-                                              rlabelsOnSideChainAtom);
-              rlabelsOnSideChainAtom.push_back(rlabel);
-              sideChainAtom->setProp(SIDECHAIN_RLABELS, rlabelsOnSideChainAtom);
-              data->labels.insert(rlabel);  // keep track of all labels used
-              rlabelsOnSideChain.push_back(rlabel);
-              if (const auto [bondIdx, end] =
-                      newMol->getAtomBonds(sideChainAtom);
-                  bondIdx != end) {
-                auto connectingBond = (*newMol)[*bondIdx];
-                if (connectingBond->getStereo() > Bond::BondStereo::STEREOANY) {
-                  // TODO: how to handle bond stereo on rgroups connected to
-                  // core by stereo double bonds
-                  connectingBond->setStereo(Bond::BondStereo::STEREOANY);
+          if (data->params.includeTargetMolInResults) {
+            setTargetAtomBondIndices(*newMol, true);
+          }
+          if (!rlabelsOnSideChain.empty()) {
+#ifdef VERBOSE
+            std::string newCoreSmi = MolToSmiles(*newMol, true);
+#endif
+
+            for (auto rlabel : rlabelsOnSideChain) {
+              ADD_MATCH(match, rlabel);
+              match[rlabel]->add(newMol, rlabelsOnSideChain);
+#ifdef VERBOSE
+              std::cerr << "Fragment " << i << " R" << rlabel << " "
+                        << MolToSmiles(*newMol) << std::endl;
+#endif
+            }
+          } else {
+            // special case, only one fragment
+            if (fragments.size() == 1) {  // need to make a new core
+              // remove the sidechains
+
+              // GJ I think if we ever get here that it's really an error and I
+              // believe that I've fixed the case where this code was called.
+              // Still, I'm too scared to delete the block.
+              RWMol newCore(*mol);
+
+              for (const auto &mvpair : tmatche) {
+                const Atom *coreAtm = rcore->core->getAtomWithIdx(mvpair.first);
+                Atom *newCoreAtm = newCore.getAtomWithIdx(mvpair.second);
+                int rlabel;
+                if (coreAtm->getPropIfPresent(RLABEL, rlabel)) {
+                  newCoreAtm->setProp<int>(RLABEL, rlabel);
+                }
+                newCoreAtm->setProp<bool>("keep", true);
+              }
+
+              newCore.beginBatchEdit();
+              for (const auto atom : newCore.atoms()) {
+                if (!atom->hasProp("keep")) {
+                  newCore.removeAtom(atom);
+                }
+              }
+              newCore.commitBatchEdit();
+              if (newCore.getNumAtoms()) {
+                std::string newCoreSmi = MolToSmiles(newCore, true);
+                // add a new core if possible
+                auto newcore = data->newCores.find(newCoreSmi);
+                int core_idx = 0;
+                if (newcore == data->newCores.end()) {
+                  core_idx = data->newCores[newCoreSmi] = data->newCoreLabel--;
+                  data->cores[core_idx] = RCore(newCore);
+                  return add(inmol);
                }
              }
            }
-          } else {
-            // restore input wildcard
-            sideChainAtom->clearProp(_rgroupInputDummy);
          }
        }
-        if (data->params.includeTargetMolInResults) {
-          setTargetAtomBondIndices(*newMol, true);
-        }
-        if (!rlabelsOnSideChain.empty()) {
-#ifdef VERBOSE
-          std::string newCoreSmi = MolToSmiles(*newMol, true);
-#endif

-          for (auto rlabel : rlabelsOnSideChain) {
-            ADD_MATCH(match, rlabel);
-            match[rlabel]->add(newMol, rlabelsOnSideChain);
-#ifdef VERBOSE
-            std::cerr << "Fragment " << i << " R" << rlabel << " "
-                      << MolToSmiles(*newMol) << std::endl;
-#endif
+        if (!match.empty()) {
+          // this is the number of user-defined R labels associated with
+          // non-hydrogen substituents
+          auto numberUserGroupsInMatch = std::accumulate(
+              match.begin(), match.end(), 0,
+              [](int sum,
+                 const std::pair<int, boost::shared_ptr<RGroupData>> &p) {
+                return p.first > 0 && !p.second->is_hydrogen ? ++sum : sum;
+              });
+          int numberMissingUserGroups =
+              rcore->numberUserRGroups - numberUserGroupsInMatch;
+          CHECK_INVARIANT(numberMissingUserGroups >= 0,
+                          "Data error in missing user rgroup count");
+          const auto extractedCore =
+              rcore->extractCoreFromMolMatch(*mol, tmatche, params());
+          if (data->params.includeTargetMolInResults) {
+            setTargetAtomBondIndices(*extractedCore, false);
          }
-        } else {
-          // special case, only one fragment
-          if (fragments.size() == 1) {  // need to make a new core
-            // remove the sidechains
-
-            // GJ I think if we ever get here that it's really an error and I
-            // believe that I've fixed the case where this code was called.
-            // Still, I'm too scared to delete the block.
-            RWMol newCore(*mol);
-
-            for (const auto &mvpair : tmatche) {
-              const Atom *coreAtm = rcore->core->getAtomWithIdx(mvpair.first);
-              Atom *newCoreAtm = newCore.getAtomWithIdx(mvpair.second);
-              int rlabel;
-              if (coreAtm->getPropIfPresent(RLABEL, rlabel)) {
-                newCoreAtm->setProp<int>(RLABEL, rlabel);
-              }
-              newCoreAtm->setProp<bool>("keep", true);
-            }
-
-            newCore.beginBatchEdit();
-            for (const auto atom : newCore.atoms()) {
-              if (!atom->hasProp("keep")) {
-                newCore.removeAtom(atom);
-              }
-            }
-            newCore.commitBatchEdit();
-            if (newCore.getNumAtoms()) {
-              std::string newCoreSmi = MolToSmiles(newCore, true);
-              // add a new core if possible
-              auto newcore = data->newCores.find(newCoreSmi);
-              int core_idx = 0;
-              if (newcore == data->newCores.end()) {
-                core_idx = data->newCores[newCoreSmi] = data->newCoreLabel--;
-                data->cores[core_idx] = RCore(newCore);
-                return add(inmol);
-              }
-            }
+          potentialMatches.emplace_back(core_idx, numberMissingUserGroups,
+                                        match, extractedCore);
+          if (data->params.includeTargetMolInResults) {
+            potentialMatches.back().setTargetMoleculeForHighlights(mol);
          }
        }
      }
+    }

-      if (!match.empty()) {
-        // this is the number of user-defined R labels associated with
-        // non-hydrogen substituents
-        auto numberUserGroupsInMatch = std::accumulate(
-            match.begin(), match.end(), 0,
-            [](int sum,
-               const std::pair<int, boost::shared_ptr<RGroupData>> &p) {
-              return p.first > 0 && !p.second->is_hydrogen ? ++sum : sum;
-            });
-        int numberMissingUserGroups =
-            rcore->numberUserRGroups - numberUserGroupsInMatch;
-        CHECK_INVARIANT(numberMissingUserGroups >= 0,
-                        "Data error in missing user rgroup count");
-        const auto extractedCore =
-            rcore->extractCoreFromMolMatch(*mol, tmatche, params());
-        if (data->params.includeTargetMolInResults) {
-          setTargetAtomBondIndices(*extractedCore, false);
-        }
-        potentialMatches.emplace_back(core_idx, numberMissingUserGroups, match,
-                                      extractedCore);
-        if (data->params.includeTargetMolInResults) {
-          potentialMatches.back().setTargetMoleculeForHighlights(mol);
-        }
+    if (potentialMatches.empty()) {
+      BOOST_LOG(rdDebugLog)
+          << "No attachment points in side chains" << std::endl;
+      return -2;
+    }
+
+    if (data->params.matchingStrategy != GA) {
+      size_t N = 1;
+      for (auto matche = data->matches.begin() + data->previousMatchSize;
+           matche != data->matches.end(); ++matche) {
+        size_t sz = matche->size();
+        N *= sz;
+      }
+      // Highly symmetric cores can lead to a very large number of
+      // permutations to test. Fall back to Greedy for the current chunk
+      // when the number is too high.
+      if (N * potentialMatches.size() > MAX_PERMUTATIONS) {
+        data->process(data->prunePermutations);
      }
    }
+    data->matches.push_back(std::move(potentialMatches));
  }
-  if (potentialMatches.empty()) {
-    BOOST_LOG(rdDebugLog) << "No attachment points in side chains" << std::endl;
-    return -2;
-  }
-
-  if (data->params.matchingStrategy != GA) {
-    size_t N = 1;
-    for (auto matche = data->matches.begin() + data->previousMatchSize;
-         matche != data->matches.end(); ++matche) {
-      size_t sz = matche->size();
-      N *= sz;
-    }
-    // Highly symmetric cores can lead to a very large number of
-    // permutations to test. Fall back to Greedy for the current chunk
-    // when the number is too high.
-    if (N * potentialMatches.size() > MAX_PERMUTATIONS) {
-      data->process(data->prunePermutations);
-    }
-  }
-  data->matches.push_back(std::move(potentialMatches));
-
  if (!data->matches.empty()) {
    if (data->params.matchingStrategy & Greedy ||
        (data->params.matchingStrategy & GreedyChunks &&
--- a/Code/GraphMol/RGroupDecomposition/RGroupDecompParams.h
+++ b/Code/GraphMol/RGroupDecomposition/RGroupDecompParams.h
@@ -18,40 +18,23 @@

 namespace RDKit {

-BETTER_ENUM(RGroupLabels, unsigned int,
-  IsotopeLabels = 0x01,
-  AtomMapLabels = 0x02,
-  AtomIndexLabels = 0x04,
-  RelabelDuplicateLabels = 0x08,
-  MDLRGroupLabels = 0x10,
-  DummyAtomLabels = 0x20,  // These are rgroups but will get relabelled
-  AutoDetect = 0xFF
-);
-
-BETTER_ENUM(RGroupMatching, unsigned int,
-  Greedy = 0x01,
-  GreedyChunks = 0x02,
-  Exhaustive = 0x04,  // not really useful for large sets
-  NoSymmetrization = 0x08,
-  GA = 0x10
-);
-
 BETTER_ENUM(
-  RGroupLabelling, unsigned int,
-  AtomMap = 0x01,
-  Isotope = 0x02,
-  MDLRGroup = 0x04
-);
+    RGroupLabels, unsigned int, IsotopeLabels = 0x01, AtomMapLabels = 0x02,
+    AtomIndexLabels = 0x04, RelabelDuplicateLabels = 0x08,
+    MDLRGroupLabels = 0x10,
+    DummyAtomLabels = 0x20,  // These are rgroups but will get relabelled
+    AutoDetect = 0xFF);

-BETTER_ENUM(RGroupCoreAlignment, unsigned int,
-  NoAlignment = 0x0,
-  MCS = 0x01
-);
+BETTER_ENUM(RGroupMatching, unsigned int, Greedy = 0x01, GreedyChunks = 0x02,
+            Exhaustive = 0x04,  // not really useful for large sets
+            NoSymmetrization = 0x08, GA = 0x10);

-BETTER_ENUM(RGroupScore, unsigned int,
-  Match = 0x1,
-  FingerprintVariance = 0x4
-);
+BETTER_ENUM(RGroupLabelling, unsigned int, AtomMap = 0x01, Isotope = 0x02,
+            MDLRGroup = 0x04);
+
+BETTER_ENUM(RGroupCoreAlignment, unsigned int, NoAlignment = 0x0, MCS = 0x01);
+
+BETTER_ENUM(RGroupScore, unsigned int, Match = 0x1, FingerprintVariance = 0x4);

 struct RDKIT_RGROUPDECOMPOSITION_EXPORT RGroupDecompositionParameters {
  unsigned int labels = RGroupLabels::AutoDetect;
@@ -75,6 +58,9 @@ struct RDKIT_RGROUPDECOMPOSITION_EXPORT RGroupDecompositionParameters {
  bool allowNonTerminalRGroups = false;
  //! unlabelled core atoms can have multiple rgroups
  bool allowMultipleRGroupsOnUnlabelled = false;
+  //! Permit a core to match more than once in the same molecule if the sets of
+  // matched atoms are not equal.
+  bool allowMultipleCoresInSameMol = false;
  // extended query settings for core matching
  bool doTautomers = false;
  bool doEnumeration = false;
--- a/Code/GraphMol/RGroupDecomposition/Wrap/rdRGroupComposition.cpp
+++ b/Code/GraphMol/RGroupDecomposition/Wrap/rdRGroupComposition.cpp
@@ -288,7 +288,10 @@ struct rgroupdecomp_wrapper {
        "input structure\n"
        "    - doEnumeration: expand input cores into enumerated mol bundles\n"
        "    - allowMultipleRGroupsOnUnlabelled: permit more than one rgroup to "
-        "be attached to an unlabelled core atom";
+        "be attached to an unlabelled core atom\n"
+        "    - allowMultipleCoresInSameMol: permit a core to match more than"
+        " once in the same molecule if the sets of matched atoms are not equal"
+        " (default=False)";
    python::class_<RDKit::RGroupDecompositionParameters>(
        "RGroupDecompositionParameters", docString.c_str(),
        python::init<>(python::args("self"), "Constructor, takes no arguments"))
@@ -338,6 +341,9 @@ struct rgroupdecomp_wrapper {
        .def_readwrite("allowMultipleRGroupsOnUnlabelled",
                       &RDKit::RGroupDecompositionParameters::
                           allowMultipleRGroupsOnUnlabelled)
+        .def_readwrite(
+            "allowMultipleCoresInSameMol",
+            &RDKit::RGroupDecompositionParameters::allowMultipleCoresInSameMol)
        .def_readwrite("doTautomers",
                       &RDKit::RGroupDecompositionParameters::doTautomers)
        .def_readwrite("doEnumeration",
--- a/Code/GraphMol/RGroupDecomposition/catch_rgd.cpp
+++ b/Code/GraphMol/RGroupDecomposition/catch_rgd.cpp
@@ -1141,3 +1141,85 @@ TEST_CASE("includeTargetMolInResults") {
    }
  }
 }
+
+TEST_CASE("Multiple Core Hits") {
+  {
+    std::vector<ROMOL_SPTR> cores{
+        "c1([*:9])c([*:8])c([*:7])c2c(c1([*:10]))c(c([*:5])n2([*:6]))[CH2]C([*:3])([*:4])[N,n]([*:1])([*:2])"_smarts};
+    REQUIRE(cores.front());
+    std::vector<ROMOL_SPTR> mols{
+        "CC1(C)N2[C@@H](Cc3c1[nH]c4ccccc34)C(=O)N(CCc5c[nH]c6ccccc56)CC2=O"_smiles};
+    REQUIRE(mols.front());
+    RGroupRows rows;
+    RGroupDecompositionParameters ps;
+    ps.allowMultipleCoresInSameMol = true;
+    auto n = RGroupDecompose(cores, mols, rows, nullptr, ps);
+    CHECK(n == 1);
+    CHECK(flatten_whitespace(toJSON(rows)) == flatten_whitespace(R"JSON(
+[
+  {
+    "Core":"c1ccc2c(C[C@H](N([*:1])[*:2])[*:3])c([*:5])[nH]c2c1",
+    "R1":"O=C(CN(CCc1c[nH]c2ccccc12)C(=O)[*:3])[*:1]",
+    "R2":"CC(C)([*:2])[*:5]",
+    "R3":"O=C(CN(CCc1c[nH]c2ccccc12)C(=O)[*:3])[*:1]",
+    "R5":"CC(C)([*:2])[*:5]"
+  },
+  {
+    "Core":"c1ccc2c(CC(N([*:1])[*:2])[*:3])c([*:5])[nH]c2c1",
+    "R1":"CC1(C)c2[nH]c3ccccc3c2C[C@@H](C(=O)[*:2])N1C(=O)C[*:1]",
+    "R2":"CC1(C)c2[nH]c3ccccc3c2C[C@@H](C(=O)[*:2])N1C(=O)C[*:1]",
+    "R3":"[H][*:3]",
+    "R5":"[H][*:5]"
+  }
+])JSON"));
+  }
+  {
+    std::vector<ROMOL_SPTR> cores{"c1ccccc1"_smarts};
+    std::vector<ROMOL_SPTR> mols{"Fc1ccccc1Nc2ccc(Cl)cc2"_smiles,
+                                 "c1cc(O)cc(Oc2cccc(Br)c2)c1"_smiles,
+                                 "Ic1ccccc1"_smiles};
+    RGroupRows rows;
+    RGroupDecompositionParameters ps;
+    ps.allowMultipleCoresInSameMol = true;
+    auto n = RGroupDecompose(cores, mols, rows, nullptr, ps);
+    CHECK(n == 3);
+    CHECK(flatten_whitespace(toJSON(rows)) == flatten_whitespace(R"JSON(
+[
+  {
+    "Core":"c1cc([*:4])c([*:3])c([*:2])c1[*:1]",
+    "R1":"[H][*:1]",
+    "R2":"[H][*:2]",
+    "R3":"F[*:3]",
+    "R4":"Clc1ccc(N[*:4])cc1"
+  },
+  {
+    "Core":"c1cc([*:4])c([*:3])c([*:2])c1[*:1]",
+    "R1":"Fc1ccccc1N[*:1]",
+    "R2":"[H][*:2]",
+    "R3":"[H][*:3]",
+    "R4":"Cl[*:4]"
+  },
+  {
+    "Core":"c1cc([*:4])c([*:3])c([*:2])c1[*:1]",
+    "R1":"[H][*:1]",
+    "R2":"O[*:2]",
+    "R3":"[H][*:3]",
+    "R4":"Brc1cccc(O[*:4])c1"
+  },
+  {
+    "Core":"c1cc([*:4])c([*:3])c([*:2])c1[*:1]",
+    "R1":"[H][*:1]",
+    "R2":"Oc1cccc(O[*:2])c1",
+    "R3":"[H][*:3]",
+    "R4":"Br[*:4]"
+  },
+  {
+    "Core":"c1cc([*:4])c([*:3])c([*:2])c1[*:1]",
+    "R1":"[H][*:1]",
+    "R2":"[H][*:2]",
+    "R3":"[H][*:3]",
+    "R4":"I[*:4]"
+  }
+])JSON"));
+  }
+}