RASCAL MCES (#6568)

2026-06-03 21:44:30 +08:00 · 2023-08-27 12:51:49 +01:00
parent 9184a143d8
commit 2dd9c5f3cd
28 changed files with 5815 additions and 3 deletions
--- a/Code/GraphMol/CMakeLists.txt
+++ b/Code/GraphMol/CMakeLists.txt
@@ -87,6 +87,7 @@ add_subdirectory(MolDraw2D)
 add_subdirectory(FMCS)
 add_subdirectory(MolHash)
 add_subdirectory(MMPA)
+add_subdirectory(RascalMCES)

 add_subdirectory(CIPLabeler)
 add_subdirectory(Deprotect)
@@ -193,6 +194,6 @@ rdkit_catch_test(queryTestsCatch catch_queries.cpp

 rdkit_catch_test(molbundleTestsCatch catch_molbundle.cpp
        LINK_LIBRARIES SmilesParse GraphMol)
-        
+
 rdkit_catch_test(pickleTestsCatch catch_pickles.cpp
        LINK_LIBRARIES FileParsers SmilesParse GraphMol)
--- a/Code/GraphMol/RascalMCES/CMakeLists.txt
+++ b/Code/GraphMol/RascalMCES/CMakeLists.txt
@@ -0,0 +1,16 @@
+
+rdkit_library(RascalMCES
+        RascalMCES.cpp RascalCluster.cpp RascalButinaCluster.cpp
+        lap_a_la_scipy.cpp PartitionSet.cpp RascalResult.cpp
+        LINK_LIBRARIES SmilesParse FileParsers ChemTransforms SubstructMatch GraphMol)
+target_compile_definitions(RascalMCES PRIVATE RDKIT_RASCALMCES_BUILD)
+
+rdkit_headers(RascalMCES.h RascalOptions.h RascalClusterOptions.h RascalResult.h
+        DEST GraphMol/RascalMCES)
+
+rdkit_catch_test(testRascalMCES mces_catch.cpp LINK_LIBRARIES RascalMCES)
+rdkit_catch_test(testRascalCluster mces_cluster_catch.cpp LINK_LIBRARIES RascalMCES)
+
+if (RDK_BUILD_PYTHON_WRAPPERS)
+    add_subdirectory(Wrap)
+endif ()
--- a/Code/GraphMol/RascalMCES/PartitionSet.cpp
+++ b/Code/GraphMol/RascalMCES/PartitionSet.cpp
@@ -0,0 +1,220 @@
+//
+// Copyright (C) David Cosgrove 2023
+//
+//   @@ All Rights Reserved @@
+//  This file is part of the RDKit.
+//  The contents are covered by the terms of the BSD license
+//  which is included in the file license.txt, found at the root
+//  of the RDKit source tree.
+//
+
+#include <algorithm>
+#include <iostream>
+#include <limits>
+#include <map>
+#include <memory>
+
+#include "PartitionSet.h"
+
+namespace RDKit {
+
+namespace RascalMCES {
+PartitionSet::PartitionSet(const std::vector<boost::dynamic_bitset<>> &modProd,
+                           const std::vector<std::pair<int, int>> &vtxPairs,
+                           const std::vector<unsigned int> &vtx1Labels,
+                           const std::vector<unsigned int> &vtx2Labels,
+                           unsigned int lowerBound)
+    : d_ModProd(new std::vector<boost::dynamic_bitset<>>(modProd)),
+      d_VtxPairs(new std::vector<std::pair<int, int>>(vtxPairs)),
+      d_vtx1Labels(new std::vector<unsigned int>(vtx1Labels)),
+      d_vtx2Labels(new std::vector<unsigned int>(vtx2Labels)) {
+  d_vtx1Counts = std::vector<int>(d_vtx1Labels->size(), 0);
+  d_vtx2Counts = std::vector<int>(d_vtx2Labels->size(), 0);
+  int firstVtx = -1;
+  // Clearly, a vertex in one of the line graphs can only match one vertex
+  // in the other.  Thus, the initial partitions can be set up so that
+  // all vertices in a partition have the same vertex in the first
+  // line graph.
+  for (size_t i = 0; i < vtxPairs.size(); ++i) {
+    auto &vp = vtxPairs[i];
+    if (vp.first != firstVtx) {
+      d_parts.push_back(std::vector<unsigned int>());
+      d_parts.back().push_back(i);
+      firstVtx = vp.first;
+    } else {
+      d_parts.back().push_back(i);
+    }
+    d_vtx1Counts[vp.first]++;
+    d_vtx2Counts[vp.second]++;
+  }
+  if (d_parts.empty()) {
+    return;
+  }
+  // Now sort the partitions by size.  This means that the vertices at the
+  // top of the partition set, above the lowerBound (or Pex as Raymond
+  // calls it in the paper), are the ones that match the least number of
+  // vertices in the other line graph.  This has a dramatic effect on the
+  // speed compared with other things tried.  I think it is what Raymond
+  // means when he says "Perform an initial partitioning of the vertices...
+  // using the labeled edge projection procedure."
+  sortPartitions();
+  // Now reassign vertices from above Pex to below it if possible.
+  // This also improves the speed of finding a large clique early.
+  // A vertex is moved to a partition where it isn't connected to a vertex
+  // in the modular product graph that is in the partition.
+  for (size_t i = d_parts.size() - 1; i > lowerBound; --i) {
+    bool reassigned = false;
+    for (auto &iv : d_parts[i]) {
+      for (size_t k = 0; k <= lowerBound; ++k) {
+        bool conn = false;
+        for (auto kv : d_parts[k]) {
+          if (modProd[iv][kv]) {
+            conn = true;
+            break;
+          }
+        }
+        if (!conn) {
+          d_parts[k].push_back(iv);
+          iv = std::numeric_limits<unsigned int>::max();
+          reassigned = true;
+          break;
+        }
+      }
+    }
+    if (reassigned) {
+      d_parts[i].erase(std::remove(d_parts[i].begin(), d_parts[i].end(),
+                                   std::numeric_limits<unsigned int>::max()),
+                       d_parts[i].end());
+    }
+  }
+  d_parts.erase(std::remove_if(d_parts.begin(), d_parts.end(),
+                               [](const std::vector<unsigned int> &v) {
+                                 return v.empty();
+                               }),
+                d_parts.end());
+  // Sort again, to make sure the large partitions are dealt with as late as
+  // possible.
+  sortPartitions();
+
+  // Get the info together for the upper bound calculation.
+  calcVtxTypeCounts();
+}
+
+int PartitionSet::upperBound() {
+  int upperBound = 0;
+  for (size_t i = 0; i < d_vtx1TypeCounts.size(); ++i) {
+    upperBound += std::min(d_vtx1TypeCounts[i], d_vtx2TypeCounts[i]);
+  }
+  return upperBound;
+}
+
+unsigned int PartitionSet::popLastVertex() {
+  if (d_parts.empty()) {
+    throw std::runtime_error("PartitionSet set is empty.");
+  }
+  unsigned int ret_val = d_parts.back().back();
+  d_parts.back().pop_back();
+  if (d_parts.back().empty()) {
+    d_parts.pop_back();
+  }
+  decrementVertexCounts(ret_val);
+  return ret_val;
+}
+
+void PartitionSet::pruneVertices(unsigned int vtx_num) {
+  for (auto &part : d_parts) {
+    size_t i = 0;
+    while (i < part.size()) {
+      if (!(*d_ModProd)[part[i]][vtx_num]) {
+        decrementVertexCounts(part[i]);
+        part[i] = part.back();
+        part.pop_back();
+      } else {
+        ++i;
+      }
+    }
+  }
+  d_parts.erase(std::remove_if(d_parts.begin(), d_parts.end(),
+                               [](const std::vector<unsigned int> &v) {
+                                 return v.empty();
+                               }),
+                d_parts.end());
+  sortPartitions();
+}
+
+void PartitionSet::sortPartitions() {
+  // When sorting lists with duplicate values, the order of the
+  // duplicates isn't defined.  Different compilers do it differently.
+  // This can affect the results in the case where more than 1 MCES is
+  // possible, because the partition orders and hence the search tree
+  // traversal will be different.  The results should be equivalent,
+  // though.  To make things consistent, the sort is done with a
+  // tie-breaker on the first value in vectors of the same size.  It
+  // doesn't slow things down very much on average, and it makes things
+  // tidier.
+  std::sort(d_parts.begin(), d_parts.end(),
+            [](const std::vector<unsigned int> &v1,
+               const std::vector<unsigned int> &v2) {
+              if (v1.size() == v2.size() && !v1.empty()) {
+                return v1.front() < v2.front();
+              } else {
+                return v1.size() > v2.size();
+              }
+            });
+}
+
+void PartitionSet::calcVtxTypeCounts() {
+  auto doIt = [](unsigned int maxLabel, const std::vector<int> &vtxCounts,
+                 const std::vector<unsigned int> &vtxLabels,
+                 std::vector<int> &vtxTypeCounts) -> void {
+    vtxTypeCounts = std::vector<int>(maxLabel + 1, 0);
+    for (size_t i = 0; i < vtxCounts.size(); ++i) {
+      if (vtxCounts[i]) {
+        ++vtxTypeCounts[vtxLabels[i]];
+      }
+    }
+  };
+
+  unsigned int max_label = 0;
+  max_label =
+      std::max(*std::max_element(d_vtx1Labels->begin(), d_vtx1Labels->end()),
+               *std::max_element(d_vtx2Labels->begin(), d_vtx2Labels->end()));
+  doIt(max_label, d_vtx1Counts, *d_vtx1Labels, d_vtx1TypeCounts);
+  doIt(max_label, d_vtx2Counts, *d_vtx2Labels, d_vtx2TypeCounts);
+}
+
+void PartitionSet::decrementVertexCounts(int vtxNum) {
+  --d_vtx1Counts[(*d_VtxPairs)[vtxNum].first];
+  if (!d_vtx1Counts[(*d_VtxPairs)[vtxNum].first]) {
+    --d_vtx1TypeCounts[(*d_vtx1Labels)[(*d_VtxPairs)[vtxNum].first]];
+  }
+  --d_vtx2Counts[(*d_VtxPairs)[vtxNum].second];
+  if (!d_vtx2Counts[(*d_VtxPairs)[vtxNum].second]) {
+    --d_vtx2TypeCounts[(*d_vtx2Labels)[(*d_VtxPairs)[vtxNum].second]];
+  }
+}
+
+std::ostream &operator<<(std::ostream &os, const PartitionSet &pt) {
+  for (size_t i = 0; i < pt.d_parts.size(); ++i) {
+    os << i << " :: " << pt.d_parts[i].size() << " ::";
+    for (auto &mem : pt.d_parts[i]) {
+      os << " " << mem << " (" << (*pt.d_VtxPairs)[mem].first << ","
+         << (*pt.d_VtxPairs)[mem].second << ")";
+    }
+    os << std::endl;
+  }
+  os << "vtx1_counts :";
+  for (auto vc : pt.d_vtx1Counts) {
+    os << " " << vc;
+  }
+  os << std::endl;
+  os << "vtx2_counts :";
+  for (auto vc : pt.d_vtx2Counts) {
+    os << " " << vc;
+  }
+  os << std::endl;
+  return os;
+}
+
+}  // namespace RascalMCES
+}  // namespace RDKit
--- a/Code/GraphMol/RascalMCES/PartitionSet.h
+++ b/Code/GraphMol/RascalMCES/PartitionSet.h
@@ -0,0 +1,73 @@
+//
+// Copyright (C) David Cosgrove 2023
+//
+//   @@ All Rights Reserved @@
+//  This file is part of the RDKit.
+//  The contents are covered by the terms of the BSD license
+//  which is included in the file license.txt, found at the root
+//  of the RDKit source tree.
+//
+
+#ifndef RASCALMCES_PARTITION_SET_H
+#define RASCALMCES_PARTITION_SET_H
+
+#include <map>
+#include <vector>
+
+#include <boost/dynamic_bitset.hpp>
+
+namespace RDKit {
+
+namespace RascalMCES {
+
+class PartitionSet {
+ public:
+  // Make a partition set from the modular product and the labels
+  // of the vertices from the first graph.  Each element in vtxPairs
+  // has a row/column in modProd.  The partitions are sorted
+  // into descending order of sizes.
+  PartitionSet(const std::vector<boost::dynamic_bitset<>> &modProd,
+               const std::vector<std::pair<int, int>> &vtxPairs,
+               const std::vector<unsigned int> &vtx1Labels,
+               const std::vector<unsigned int> &vtx2Labels,
+               unsigned int lowerBound);
+
+  bool isEmpty() const { return d_parts.empty(); }
+
+  size_t numParts() const { return d_parts.size(); }
+
+  // Compute the upper bound on the clique that can be extracted from
+  // the current partition.
+  int upperBound();
+
+  friend std::ostream &operator<<(std::ostream &os, const PartitionSet &pt);
+
+  // removes the last element of the last partition and returns
+  // its value. Throws a runtime_error if empty.
+  unsigned int popLastVertex();
+
+  // remove from the partitions any vertex not connected to the given
+  // vertex
+  void pruneVertices(unsigned int vtx_num);
+
+ private:
+  std::shared_ptr<const std::vector<boost::dynamic_bitset<>>> d_ModProd;
+  std::shared_ptr<const std::vector<std::pair<int, int>>> d_VtxPairs;
+  std::shared_ptr<const std::vector<unsigned int>> d_vtx1Labels;
+  std::shared_ptr<const std::vector<unsigned int>> d_vtx2Labels;
+  std::vector<std::vector<unsigned int>> d_parts;
+  // counts of the number of times each vertex appears in the partitions
+  std::vector<int> d_vtx1Counts, d_vtx2Counts;
+  // counts of the number of times the d_vtx[12]_labels appear in the partitions
+  std::vector<int> d_vtx1TypeCounts, d_vtx2TypeCounts;
+
+  void sortPartitions();
+
+  void calcVtxTypeCounts();
+
+  void decrementVertexCounts(int vtxNum);
+};
+}  // namespace RascalMCES
+}  // namespace RDKit
+
+#endif  // RASCALMCES_PARTITION_SET_H
--- a/Code/GraphMol/RascalMCES/RascalButinaCluster.cpp
+++ b/Code/GraphMol/RascalMCES/RascalButinaCluster.cpp
@@ -0,0 +1,118 @@
+//
+// Copyright (C) David Cosgrove 2023
+//
+//   @@ All Rights Reserved @@
+//  This file is part of the RDKit.
+//  The contents are covered by the terms of the BSD license
+//  which is included in the file license.txt, found at the root
+//  of the RDKit source tree.
+//
+// This file contains an implementation of Butina clustering
+// (Butina JCICS 39 747-750 (1999)) using the RascalMCES
+// Johnson similarity metric.  It is largely a transliteration
+// of $RDBASE/rdkit/ML/Cluster/Butina.py.
+
+#include <algorithm>
+#include <iterator>
+#include <vector>
+#include <set>
+
+#include <GraphMol/ROMol.h>
+#include <GraphMol/RascalMCES/RascalMCES.h>
+#include <GraphMol/RascalMCES/RascalClusterOptions.h>
+#include <GraphMol/RascalMCES/RascalDetails.h>
+
+namespace RDKit {
+
+namespace RascalMCES {
+namespace details {
+std::vector<std::vector<unsigned int>> buildNborLists(
+    const std::vector<std::vector<ClusNode>> &proxGraph) {
+  std::vector<std::vector<unsigned int>> nborLists;
+  for (size_t i = 0; i < proxGraph.size(); ++i) {
+    std::vector<std::pair<unsigned int, double>> tmpList;
+    for (const auto &cn : proxGraph[i]) {
+      if (cn.d_res) {
+        if (i == cn.d_mol1Num) {
+          tmpList.push_back({cn.d_mol2Num, cn.d_sim});
+        } else {
+          tmpList.push_back({cn.d_mol1Num, cn.d_sim});
+        }
+      }
+    }
+    std::sort(tmpList.begin(), tmpList.end(),
+              [](const std::pair<unsigned int, double> &p1,
+                 const std::pair<unsigned int, double> &p2) -> bool {
+                return p1.second > p2.second;
+              });
+    std::vector<unsigned int> nborList(tmpList.size() + 1, 0);
+    nborList[0] = i;
+    std::transform(
+        tmpList.begin(), tmpList.end(), nborList.begin() + 1,
+        [](const std::pair<unsigned int, double> &p) -> unsigned int {
+          return p.first;
+        });
+    nborLists.push_back(nborList);
+  }
+  std::sort(nborLists.begin(), nborLists.end(),
+            [](const std::vector<unsigned int> &nl1,
+               const std::vector<unsigned int> &nl2) -> bool {
+              if (nl1.size() == nl2.size()) {
+                return nl1 > nl2;
+              } else {
+                return nl1.size() > nl2.size();
+              }
+            });
+  return nborLists;
+}
+
+// This function destroys nborLists.
+std::vector<std::vector<unsigned int>> formClusters(
+    std::vector<std::vector<unsigned int>> &nborLists) {
+  std::vector<std::vector<unsigned int>> clusters;
+
+  while (!nborLists.empty()) {
+    clusters.push_back(nborLists.front());
+    std::set<unsigned int> inNborList(nborLists.front().begin(),
+                                      nborLists.front().end());
+    nborLists.front().clear();
+    for (auto &nborList : nborLists) {
+      for (auto &n : nborList) {
+        if (inNborList.find(n) != inNborList.end()) {
+          n = std::numeric_limits<unsigned int>::max();
+        }
+      }
+      nborList.erase(std::remove(nborList.begin(), nborList.end(),
+                                 std::numeric_limits<unsigned int>::max()),
+                     nborList.end());
+    }
+    nborLists.erase(
+        std::remove_if(nborLists.begin(), nborLists.end(),
+                       [](const std::vector<unsigned int> &nl) -> bool {
+                         return nl.empty();
+                       }),
+        nborLists.end());
+    std::sort(nborLists.begin(), nborLists.end(),
+              [](const std::vector<unsigned int> &nl1,
+                 const std::vector<unsigned int> &nl2) -> bool {
+                if (nl1.size() == nl2.size()) {
+                  return nl1 > nl2;
+                } else {
+                  return nl1.size() > nl2.size();
+                }
+              });
+  }
+  return clusters;
+}
+
+}  // namespace details
+std::vector<std::vector<unsigned int>> rascalButinaCluster(
+    const std::vector<std::shared_ptr<ROMol>> &mols,
+    const RascalClusterOptions &clusOpts) {
+  auto proxGraph = details::buildProximityGraph(mols, clusOpts);
+  auto nborLists = details::buildNborLists(proxGraph);
+  auto clusters = details::formClusters(nborLists);
+  return clusters;
+}
+}  // namespace RascalMCES
+}  // namespace RDKit
--- a/Code/GraphMol/RascalMCES/RascalCluster.cpp
+++ b/Code/GraphMol/RascalMCES/RascalCluster.cpp
@@ -0,0 +1,382 @@
+//
+// Copyright (C) David Cosgrove 2023
+//
+//   @@ All Rights Reserved @@
+//  This file is part of the RDKit.
+//  The contents are covered by the terms of the BSD license
+//  which is included in the file license.txt, found at the root
+//  of the RDKit source tree.
+//
+// This file contains an implementation of the clustering algorithm
+// described in
+// 'A Line Graph Algorithm for Clustering Chemical Structures Based
+// on Common Substructural Cores', JW Raymond, PW Willett.
+// https://match.pmf.kg.ac.rs/electronic_versions/Match48/match48_197-207.pdf
+// https://eprints.whiterose.ac.uk/77598/
+// It uses the RASCAL MCES algorithm to perform a fuzzy clustering
+// of a set of molecules.
+
+#include <algorithm>
+#include <iterator>
+#include <list>
+#include <thread>
+#include <vector>
+
+#include <RDGeneral/RDThreads.h>
+#include <GraphMol/ROMol.h>
+#include <GraphMol/MolOps.h>
+#include <GraphMol/RascalMCES/RascalClusterOptions.h>
+#include <GraphMol/RascalMCES/RascalDetails.h>
+#include <GraphMol/RascalMCES/RascalMCES.h>
+#include <GraphMol/RascalMCES/RascalResult.h>
+
+namespace RDKit {
+namespace RascalMCES {
+namespace details {
+ClusNode calcMolMolSimilarity(
+    const std::tuple<
+        size_t, size_t, const std::vector<std::shared_ptr<ROMol>> *,
+        const RascalOptions *, const RascalClusterOptions *> &toDo) {
+  auto i = std::get<0>(toDo);
+  auto j = std::get<1>(toDo);
+  auto mols = std::get<2>(toDo);
+  auto opts = std::get<3>(toDo);
+  auto clusOpts = std::get<4>(toDo);
+  auto res = rascalMCES(*(*mols)[i], *(*mols)[j], *opts);
+  ClusNode cn;
+  cn.d_mol1Num = i;
+  cn.d_mol2Num = j;
+  if (res.empty()) {
+    // tier1Sim and tier2Sim were above the threshold, but no MCES
+    // was found.
+    cn.d_sim = 0.0;
+  } else {
+    if (res.front().getBondMatches().empty()) {
+      cn.d_sim = 0.0;
+    } else {
+      res.front().trimSmallFrags();
+      res.front().largestFragsOnly(clusOpts->maxNumFrags);
+      cn.d_sim = res.front().getSimilarity();
+      if (cn.d_sim >= opts->similarityThreshold) {
+        cn.d_res = std::shared_ptr<RascalResult>(new RascalResult(res.front()));
+      }
+    }
+  }
+  return cn;
+}
+
+std::vector<std::vector<ClusNode>> buildProximityGraph(
+    const std::vector<std::shared_ptr<ROMol>> &mols,
+    const RascalClusterOptions &clusOpts) {
+  if (mols.size() < 2) {
+    return std::vector<std::vector<ClusNode>>();
+  }
+  std::vector<std::vector<ClusNode>> proxGraph =
+      std::vector<std::vector<ClusNode>>(
+          mols.size(), std::vector<ClusNode>(mols.size(), ClusNode()));
+  std::vector<
+      std::tuple<size_t, size_t, const std::vector<std::shared_ptr<ROMol>> *,
+                 const RascalOptions *, const RascalClusterOptions *>>
+      toDo;
+
+  RascalOptions opts;
+  opts.similarityThreshold = clusOpts.similarityCutoff;
+  for (size_t i = 0; i < mols.size() - 1; ++i) {
+    for (size_t j = i + 1; j < mols.size(); ++j) {
+      toDo.push_back({i, j, &mols, &opts, &clusOpts});
+    }
+  }
+
+  auto buildProxGraphPart =
+      [](const std::vector<std::tuple<
+             size_t, size_t, const std::vector<std::shared_ptr<ROMol>> *,
+             const RascalOptions *, const RascalClusterOptions *>> &toDo,
+         std::vector<ClusNode> &molSims, size_t start, size_t finish) -> void {
+    if (start > toDo.size()) {
+      return;
+    }
+    if (finish > toDo.size()) {
+      finish = toDo.size();
+    }
+    std::transform(toDo.begin() + start, toDo.begin() + finish,
+                   molSims.begin() + start, calcMolMolSimilarity);
+  };
+
+  std::vector<ClusNode> molSims(toDo.size());
+#if RDK_BUILD_THREADSAFE_SSS
+  auto numThreads = getNumThreadsToUse(clusOpts.numThreads);
+  if (numThreads > 1) {
+    size_t eachThread = 1 + (toDo.size() / numThreads);
+    size_t start = 0;
+    std::vector<std::thread> threads;
+    for (unsigned int i = 0U; i < numThreads; ++i, start += eachThread) {
+      threads.push_back(std::thread(buildProxGraphPart, std::ref(toDo),
+                                    std::ref(molSims), start,
+                                    start + eachThread));
+    }
+    for (auto &t : threads) {
+      t.join();
+    }
+  } else {
+    std::transform(toDo.begin(), toDo.end(), molSims.begin(),
+                   calcMolMolSimilarity);
+  }
+#else
+  std::transform(toDo.begin(), toDo.end(), molSims.begin(),
+                 calcMolMolSimilarity);
+#endif
+  for (const auto &cn : molSims) {
+    proxGraph[cn.d_mol1Num][cn.d_mol2Num] =
+        proxGraph[cn.d_mol2Num][cn.d_mol1Num] = cn;
+  }
+  return proxGraph;
+}
+
+// Split the proximity graph into its disconnected components,
+// returning vectors of the molecule numbers of the disconnected
+// graphs.
+std::vector<std::vector<unsigned int>> disconnectProximityGraphs(
+    std::vector<std::vector<ClusNode>> &proxGraph) {
+  std::vector<std::vector<unsigned int>> subGraphs;
+  std::vector<bool> done(proxGraph.size(), false);
+  auto nextStart = std::find(done.begin(), done.end(), false);
+  while (nextStart != done.end()) {
+    std::list<unsigned int> nodes;
+    std::list<unsigned int> toDo(1, std::distance(done.begin(), nextStart));
+    while (!toDo.empty()) {
+      auto nextNode = toDo.front();
+      toDo.pop_front();
+      if (!done[nextNode]) {
+        nodes.push_back(nextNode);
+      }
+      done[nextNode] = true;
+      for (size_t i = 0; i < proxGraph.size(); ++i) {
+        if (!done[i] && proxGraph[nextNode][i].d_res) {
+          toDo.push_back(i);
+          nodes.push_back(i);
+          done[i] = true;
+        }
+      }
+    }
+    nodes.sort();
+    subGraphs.push_back(std::vector(nodes.begin(), nodes.end()));
+    nextStart = std::find(done.begin(), done.end(), false);
+  }
+  return subGraphs;
+}
+
+// Calculate G_{ij} for the molecule.  p is the number of bonds that
+// a fragment must exceed for it to be counted in the formula.
+double g_ij(const std::shared_ptr<ROMol> &mol, double a, double b,
+            unsigned int p) {
+  auto molFrags = MolOps::getMolFrags(*mol, false);
+  int numBigFrags = 0;
+  for (const auto &mf : molFrags) {
+    if (mf->getNumBonds() > p) {
+      ++numBigFrags;
+    }
+  }
+  numBigFrags = numBigFrags == 0 ? molFrags.size() : numBigFrags;
+  double g = mol->getNumAtoms();
+  g += b * (1.0 - a * (numBigFrags - 1)) * mol->getNumBonds();
+  return g;
+}
+
+std::vector<std::vector<unsigned int>> makeSubClusters(
+    const std::vector<ClusNode> &nbors, const RascalClusterOptions &clusOpts) {
+  std::vector<std::vector<unsigned int>> subClusters;
+
+  std::vector<const ClusNode *> tmpNbors;
+  for (const auto &n : nbors) {
+    tmpNbors.push_back(&n);
+  }
+
+  while (!tmpNbors.empty()) {
+    subClusters.push_back(std::vector<unsigned int>{
+        tmpNbors.front()->d_mol1Num, tmpNbors.front()->d_mol2Num});
+    auto m1 = tmpNbors.front()->d_res->getMcesMol();
+    auto g_12 = g_ij(m1, clusOpts.a, clusOpts.b, clusOpts.minFragSize);
+    for (size_t i = 1; i < tmpNbors.size(); ++i) {
+      auto m2 = tmpNbors[i]->d_res->getMcesMol();
+      auto g_13 = g_ij(m2, clusOpts.a, clusOpts.b, clusOpts.minFragSize);
+
+      auto results = RDKit::RascalMCES::rascalMCES(*m1, *m2);
+      if (results.empty() || results.front().getBondMatches().empty()) {
+        continue;
+      }
+      auto res = results.front();
+      auto g_12_13 =
+          g_ij(res.getMcesMol(), clusOpts.a, clusOpts.b, clusOpts.minFragSize);
+      double sim = g_12_13 / std::min(g_12, g_13);
+      if (sim > clusOpts.minIntraClusterSim) {
+        subClusters.back().push_back(tmpNbors[i]->d_mol2Num);
+        subClusters.back().push_back(tmpNbors[i]->d_mol1Num);
+        tmpNbors[i] = nullptr;
+      }
+    }
+    tmpNbors.front() = nullptr;
+    tmpNbors.erase(std::remove(tmpNbors.begin(), tmpNbors.end(), nullptr),
+                   tmpNbors.end());
+    std::sort(subClusters.back().begin(), subClusters.back().end());
+    subClusters.back().erase(
+        std::unique(subClusters.back().begin(), subClusters.back().end()),
+        subClusters.back().end());
+  }
+  return subClusters;
+}
+
+std::vector<std::vector<unsigned int>> formInitialClusters(
+    const std::vector<unsigned int> &subGraph,
+    const std::vector<std::vector<ClusNode>> &proxGraph,
+    const RascalClusterOptions &clusOpts) {
+  std::vector<std::vector<unsigned int>> clusters;
+  if (subGraph.size() < 2) {
+    return clusters;
+  }
+  for (auto i : subGraph) {
+    std::vector<ClusNode> nbors;
+    for (auto j : subGraph) {
+      if (proxGraph[i][j].d_res) {
+        nbors.push_back(proxGraph[i][j]);
+      }
+    }
+    std::sort(nbors.begin(), nbors.end(),
+              [](const ClusNode &c1, const ClusNode &c2) -> bool {
+                return c1.d_sim > c2.d_sim;
+              });
+    if (!nbors.empty()) {
+      auto subClusters = makeSubClusters(nbors, clusOpts);
+      clusters.insert(clusters.end(), subClusters.begin(), subClusters.end());
+    }
+  }
+  std::sort(clusters.begin(), clusters.end(),
+            [](const std::vector<unsigned int> &c1,
+               const std::vector<unsigned int> &c2) -> bool {
+              if (c1.size() == c2.size()) {
+                return c1.front() < c2.front();
+              } else {
+                return c1.size() > c2.size();
+              }
+            });
+  clusters.erase(std::unique(clusters.begin(), clusters.end()), clusters.end());
+  return clusters;
+}
+
+std::vector<std::vector<unsigned int>> mergeClusters(
+    const std::vector<std::vector<unsigned int>> &clusters,
+    const RascalClusterOptions &clusOpts) {
+  std::vector<std::vector<unsigned int>> outClusters(clusters);
+
+  if (outClusters.size() < 2) {
+    return outClusters;
+  }
+
+  for (size_t i = 0; i < outClusters.size() - 1; ++i) {
+    for (size_t j = i + 1; j < outClusters.size(); ++j) {
+      std::vector<int> inCommon;
+      std::set_intersection(outClusters[i].begin(), outClusters[i].end(),
+                            outClusters[j].begin(), outClusters[j].end(),
+                            std::back_inserter(inCommon));
+      double s =
+          double(inCommon.size()) / std::min(double(outClusters[i].size()),
+                                             double(outClusters[j].size()));
+      if (s > clusOpts.clusterMergeSim) {
+        outClusters[i].insert(outClusters[i].end(), outClusters[j].begin(),
+                              outClusters[j].end());
+        outClusters[j].clear();
+        std::sort(outClusters[i].begin(), outClusters[i].end());
+        outClusters[i].erase(
+            std::unique(outClusters[i].begin(), outClusters[i].end()),
+            outClusters[i].end());
+      }
+    }
+    outClusters.erase(
+        std::remove_if(outClusters.begin(), outClusters.end(),
+                       [](const std::vector<unsigned int> &c) -> bool {
+                         return c.empty();
+                       }),
+        outClusters.end());
+  }
+
+  return outClusters;
+}
+
+void sortClusterMembersByMeanSim(
+    const std::vector<std::vector<ClusNode>> &proxGraph,
+    std::vector<std::vector<unsigned int>> &clusters) {
+  for (auto &clus : clusters) {
+    std::vector<std::pair<unsigned int, double>> clusSims;
+    for (unsigned int i = 0U; i < clus.size(); ++i) {
+      double totSim = 0.0;
+      for (unsigned int j = 0U; j < clus.size(); ++j) {
+        if (i != j) {
+          totSim += proxGraph[clus[i]][clus[j]].d_sim;
+        }
+      }
+      clusSims.push_back({clus[i], totSim / (clus.size() - 1)});
+    }
+    std::sort(clusSims.begin(), clusSims.end(),
+              [](const std::pair<unsigned int, double> &p1,
+                 const std::pair<unsigned int, double> &p2) -> bool {
+                return p1.second > p2.second;
+              });
+    std::transform(
+        clusSims.begin(), clusSims.end(), clus.begin(),
+        [](const std::pair<unsigned int, double> &p) -> unsigned int {
+          return p.first;
+        });
+  }
+}
+
+std::vector<std::vector<unsigned int>> makeClusters(
+    const std::vector<std::vector<unsigned int>> &subGraphs,
+    const std::vector<std::vector<ClusNode>> &proxGraph,
+    const RascalClusterOptions &clusOpts) {
+  std::vector<std::vector<unsigned int>> clusters;
+  for (const auto &sg : subGraphs) {
+    auto theseClusters = formInitialClusters(sg, proxGraph, clusOpts);
+    auto mergedClusters = mergeClusters(theseClusters, clusOpts);
+    clusters.insert(clusters.end(), mergedClusters.begin(),
+                    mergedClusters.end());
+  }
+  std::sort(clusters.begin(), clusters.end(),
+            [](const std::vector<unsigned int> &c1,
+               const std::vector<unsigned int> &c2) -> bool {
+              return c1.size() > c2.size();
+            });
+  return clusters;
+}
+
+std::vector<unsigned int> collectSingletons(
+    const std::vector<std::vector<ClusNode>> &proxGraph) {
+  std::vector<unsigned int> singletons;
+  for (size_t i = 0; i < proxGraph.size(); ++i) {
+    bool single = true;
+    for (const auto &cn : proxGraph[i]) {
+      if (cn.d_res) {
+        single = false;
+        break;
+      }
+    }
+    if (single) {
+      singletons.push_back(i);
+    }
+  }
+  return singletons;
+}
+}  // namespace details
+
+std::vector<std::vector<unsigned int>> rascalCluster(
+    const std::vector<std::shared_ptr<ROMol>> &mols,
+    const RascalClusterOptions &clusOpts) {
+  auto proxGraph = details::buildProximityGraph(mols, clusOpts);
+  auto subGraphs = details::disconnectProximityGraphs(proxGraph);
+  auto clusters = details::makeClusters(subGraphs, proxGraph, clusOpts);
+  auto singletons = details::collectSingletons(proxGraph);
+  clusters.push_back(singletons);
+  details::sortClusterMembersByMeanSim(proxGraph, clusters);
+  return clusters;
+}
+
+}  // namespace RascalMCES
+}  // namespace RDKit
--- a/Code/GraphMol/RascalMCES/RascalClusterOptions.h
+++ b/Code/GraphMol/RascalMCES/RascalClusterOptions.h
@@ -0,0 +1,53 @@
+//
+// Copyright (C) David Cosgrove 2023
+//
+//   @@ All Rights Reserved @@
+//  This file is part of the RDKit.
+//  The contents are covered by the terms of the BSD license
+//  which is included in the file license.txt, found at the root
+//  of the RDKit source tree.
+//
+// Options for Rascal Clustering.  In general, the option names and defaults
+// are taken from the paper:
+// 'A Line Graph Algorithm for Clustering Chemical Structures Based
+// on Common Substructural Cores', JW Raymond, PW Willett.
+// https://match.pmf.kg.ac.rs/electronic_versions/Match48/match48_197-207.pdf
+// https://eprints.whiterose.ac.uk/77598/
+
+#include <RDGeneral/export.h>
+
+#ifndef RASCALCLUSTEROPTIONS_H
+#define RASCALCLUSTEROPTIONS_H
+
+namespace RDKit {
+namespace RascalMCES {
+
+struct RDKIT_RASCALMCES_EXPORT RascalClusterOptions {
+  double similarityCutoff = 0.7; /* Similarity cutoff for clustering.  Initial
+                                    clusters will have molecule pairs of at
+                                    least this similarity. */
+  double a = 0.05; /* penalty score for each unconnected component in MCES */
+  double b = 2.0;  /* weight of matched bonds over matched atoms */
+  unsigned int minFragSize =
+      3; /* minimum number of atoms in a fragment for it to
+            be included in the MCES.  Also p in the paper. */
+  double minIntraClusterSim = 0.9; /* two pairs of molecules are included in the
+                                      same cluster if the similarity between
+                                      their MCESs is greater than this. S_a
+                                      in the paper */
+  double clusterMergeSim = 0.6;    /* two clusters are merged if fraction of
+                                      molecules they have in common is greater than
+                                      this. S_b in the paper */
+  unsigned int maxNumFrags = 2; /* The maximum number of fragments in any MCES.
+                                   Otherwise the MCES can be a lot of small
+                                   fragments scattered across the molecule - it
+                                   tries too hard to find a match, sometimes */
+  int numThreads = -1; /* The number of threads to use.  If > 0, will use that
+                          number.  If <= 0, will use the number of hardware
+                          threads plus this number.  So if the number of
+                          hardware threads is 8, and numThreads is -1, it will
+                          use 7 threads. */
+};
+}  // namespace RascalMCES
+}  // namespace RDKit
+#endif  // RASCALCLUSTEROPTIONS_H
--- a/Code/GraphMol/RascalMCES/RascalDetails.h
+++ b/Code/GraphMol/RascalMCES/RascalDetails.h
@@ -0,0 +1,94 @@
+//
+// Copyright (C) David Cosgrove 2023
+//
+//   @@ All Rights Reserved @@
+//  This file is part of the RDKit.
+//  The contents are covered by the terms of the BSD license
+//  which is included in the file license.txt, found at the root
+//  of the RDKit source tree.
+//
+
+#include <RDGeneral/export.h>
+#ifndef RDKIT_RASCAL_DETAILS_H
+#define RDKIT_RASCAL_DETAILS_H
+
+#include <map>
+
+#include <GraphMol/RascalMCES/RascalOptions.h>
+#include <GraphMol/RascalMCES/RascalResult.h>
+namespace RDKit {
+class ROMol;
+
+namespace RascalMCES {
+
+class RascalClusterOptions;
+
+namespace details {
+
+struct ClusNode {
+  std::shared_ptr<RascalResult> d_res;
+  double d_sim;
+  unsigned int d_mol1Num, d_mol2Num;
+};
+
+RDKIT_RASCALMCES_EXPORT double tier1Sim(
+    const RDKit::ROMol &mol1, const RDKit::ROMol &mol2,
+    std::map<int, std::vector<std::pair<int, int>>> &degSeqs1,
+    std::map<int, std::vector<std::pair<int, int>>> &degSeqs2);
+
+RDKIT_RASCALMCES_EXPORT double tier2Sim(
+    const ROMol &mol1, const ROMol &mol2,
+    const std::map<int, std::vector<std::pair<int, int>>> &degSeqs1,
+    const std::map<int, std::vector<std::pair<int, int>>> &degSeqs2,
+    const std::vector<unsigned int> &bondLabels1,
+    const std::vector<unsigned int> &bondLabels2);
+
+RDKIT_RASCALMCES_EXPORT void getBondLabels(
+    const RDKit::ROMol &mol1, const RDKit::ROMol &mol2,
+    const RascalOptions &opts, std::vector<unsigned int> &bondLabels1,
+    std::vector<unsigned int> &bondLabels2);
+
+std::vector<std::vector<ClusNode>> buildProximityGraph(
+    const std::vector<std::shared_ptr<ROMol>> &mols,
+    const RascalClusterOptions &clusOpts);
+
+RDKIT_RASCALMCES_EXPORT bool resultCompare(const RascalResult &res1,
+                                           const RascalResult &res2);
+
+RDKIT_RASCALMCES_EXPORT void extractClique(
+    const std::vector<unsigned int> &clique,
+    const std::vector<std::pair<int, int>> &vtxPairs, bool swapped,
+    std::vector<std::pair<int, int>> &bondMatches);
+
+// do some simple cleaning of the SMARTS, to make it more user-friendly.
+RDKIT_RASCALMCES_EXPORT void cleanSmarts(std::string &smarts);
+
+// Primarily for debugging, these write out the corresponding bonds/atoms
+// in Python list format, for ease of cut/paste into a highlighted image
+// creation.
+RDKIT_RASCALMCES_EXPORT void printBondMatches(const RascalResult &res,
+                                              std::ostream &os);
+
+RDKIT_RASCALMCES_EXPORT void printAtomMatches(const RascalResult &res,
+                                              std::ostream &os);
+
+// This prints out the scores in the order they are used in resultCompare.
+RDKIT_RASCALMCES_EXPORT void printScores(const RascalResult &res,
+                                         std::ostream &os);
+
+// Calculate the Johnson similarity between the two molecules using the given
+// bondMatches.  It's the fraction of the 2 molecules that are in common,
+// somewhat akin to the tanimoto - the square of the number of atoms plus
+// number of bonds in the MCES divided by the product of the sums of the number
+// of atoms and bonds in the 2 molecules.
+// It has nothing to do with lying UK politicians.
+RDKIT_RASCALMCES_EXPORT double johnsonSimilarity(
+    const std::vector<std::pair<int, int>> &bondMatches,
+    const std::vector<std::pair<int, int>> &atomMatches,
+    const RDKit::ROMol &mol1, const RDKit::ROMol &mol2);
+
+}  // namespace details
+
+}  // namespace RascalMCES
+}  // namespace RDKit
+#endif  // RDKIT_RASCAL_MCES_H
--- a/Code/GraphMol/RascalMCES/RascalMCES.cpp
+++ b/Code/GraphMol/RascalMCES/RascalMCES.cpp
--- a/Code/GraphMol/RascalMCES/RascalMCES.h
+++ b/Code/GraphMol/RascalMCES/RascalMCES.h
@@ -0,0 +1,73 @@
+//
+// Copyright (C) David Cosgrove 2023
+//
+//   @@ All Rights Reserved @@
+//  This file is part of the RDKit.
+//  The contents are covered by the terms of the BSD license
+//  which is included in the file license.txt, found at the root
+//  of the RDKit source tree.
+//
+
+#include <RDGeneral/export.h>
+#ifndef RDKIT_RASCAL_MCES_H
+#define RDKIT_RASCAL_MCES_H
+
+#include <vector>
+
+#include <GraphMol/RascalMCES/RascalClusterOptions.h>
+#include <GraphMol/RascalMCES/RascalOptions.h>
+#include <GraphMol/RascalMCES/RascalResult.h>
+namespace RDKit {
+class ROMol;
+
+namespace RascalMCES {
+
+// Find one or more MCESs between the two molecules.  The MCES is the
+// Maximum Common Edge Substructure, and is the largest set of bonds
+// common to the 2 molecules.
+/*!
+ *
+ * @param mol1 : first molecule
+ * @param mol2 : second molecule for MCES determination.
+ * @param opts : (optional) set of options controlling the MCES determination
+ * @return : vector of RascalResult objects.
+ */
+RDKIT_RASCALMCES_EXPORT std::vector<RascalResult> rascalMCES(
+    const ROMol &mol1, const ROMol &mol2,
+    const RascalOptions &opts = RascalOptions());
+
+// Cluster the molecules using the Johnson similarity from rascalMCES
+// and the algorithm of
+// 'A Line Graph Algorithm for Clustering Chemical Structures Based
+// on Common Substructural Cores', JW Raymond, PW Willett.
+// https://match.pmf.kg.ac.rs/electronic_versions/Match48/match48_197-207.pdf
+// https://eprints.whiterose.ac.uk/77598/
+// This is a fuzzy clustering algorithm, so a molecule may appear in more than
+// one cluster.  The final cluster is all the molecules that didn't fit into
+// another cluster (the singletons).
+/*!
+ *
+ * @param mols : molecules to cluster
+ * @param clusOpts : (optional) cluster options
+ * @return clusters as vector of vectors of unsigned ints - indices into the
+ *         input mols vector
+ */
+RDKIT_RASCALMCES_EXPORT std::vector<std::vector<unsigned int>> rascalCluster(
+    const std::vector<std::shared_ptr<ROMol>> &mols,
+    const RascalClusterOptions &clusOpts = RascalClusterOptions());
+// Cluster the molecules using the Johnson similarity from rascalMCES and
+// the Butina algorithm.  Butina JCICS 39 747-750 (1999).
+/*!
+ *
+ * @param mols : molecules to cluster
+ * @param clusOpts : (optional) cluster options
+ * @return clusters as vector of vectors of unsigned ints - indices into the
+ *         input mols vector
+ */
+RDKIT_RASCALMCES_EXPORT std::vector<std::vector<unsigned int>>
+rascalButinaCluster(
+    const std::vector<std::shared_ptr<ROMol>> &mols,
+    const RascalClusterOptions &clusOpts = RascalClusterOptions());
+}  // namespace RascalMCES
+}  // namespace RDKit
+#endif  // RDKIT_RASCAL_MCES_H
--- a/Code/GraphMol/RascalMCES/RascalOptions.h
+++ b/Code/GraphMol/RascalMCES/RascalOptions.h
@@ -0,0 +1,50 @@
+//
+// Copyright (C) David Cosgrove 2023
+//
+//   @@ All Rights Reserved @@
+//  This file is part of the RDKit.
+//  The contents are covered by the terms of the BSD license
+//  which is included in the file license.txt, found at the root
+//  of the RDKit source tree.
+//
+#include <RDGeneral/export.h>
+
+#ifndef RASCALOPTIONS_H
+#define RASCALOPTIONS_H
+
+namespace RDKit {
+
+namespace RascalMCES {
+
+struct RDKIT_RASCALMCES_EXPORT RascalOptions {
+  double similarityThreshold =
+      0.7;  // if calculated below this, no MCES will be evaluated.
+  bool completeAromaticRings =
+      true;  // if true, partial aromatic rings won't be returned
+  bool ringMatchesRingOnly =
+      false;  // if true, ring bonds won't match non-ring bonds
+  bool singleLargestFrag =
+      false; /* if true, only return a single fragment for the MCES. Default
+                is to produce multiple matching fragments if necessary. */
+  int minFragSize =
+      -1; /* minimum number of atoms in any fragment - -1 means no minimum */
+  int maxFragSeparation = -1; /* biggest through-bond distance that bonds can
+                               match. -1 means no limit. */
+  bool allBestMCESs =
+      false; /* If true, all MCESs are returned, in order of diminishing score.
+                This is likely to result in higher run times. */
+  int timeout = 60;  // max run time, in seconds. -1 means no max.
+  bool doEquivBondPruning =
+      false; /* This might make the code run a bit faster in some
+                circumstances, but on average it is very marginal. */
+  bool returnEmptyMCES = false; /* if true, if the similarity thresholds aren't
+                                   matched still return a RascalResult with the
+                                   tier1 and tier2 sims filled in. */
+  int maxBondMatchPairs = 1000; /* Too many matching bond (vertex) pairs can
+                                   cause it to run out of memory.  This is a
+                                   reasonable default for my Mac. */
+};
+}  // namespace RascalMCES
+}  // namespace RDKit
+
+#endif  // RASCALOPTIONS_H
--- a/Code/GraphMol/RascalMCES/RascalResult.cpp
+++ b/Code/GraphMol/RascalMCES/RascalResult.cpp
@@ -0,0 +1,815 @@
+//
+// Copyright (C) David Cosgrove 2023
+//
+//   @@ All Rights Reserved @@
+//  This file is part of the RDKit.
+//  The contents are covered by the terms of the BSD license
+//  which is included in the file license.txt, found at the root
+//  of the RDKit source tree.
+//
+
+#include <regex>
+#include <set>
+
+#include <boost/dynamic_bitset.hpp>
+
+#include <GraphMol/MolOps.h>
+#include <GraphMol/QueryAtom.h>
+#include <GraphMol/QueryBond.h>
+#include <GraphMol/QueryOps.h>
+#include <GraphMol/SmilesParse/SmartsWrite.h>
+#include <GraphMol/SmilesParse/SmilesWrite.h>
+
+#include <GraphMol/RascalMCES/RascalDetails.h>
+#include <GraphMol/RascalMCES/RascalResult.h>
+
+namespace RDKit {
+
+namespace RascalMCES {
+
+RascalResult::RascalResult(const RDKit::ROMol &mol1, const RDKit::ROMol &mol2,
+                           const std::vector<std::vector<int>> &adjMatrix1,
+                           const std::vector<std::vector<int>> &adjMatrix2,
+                           const std::vector<unsigned int> &clique,
+                           const std::vector<std::pair<int, int>> &vtx_pairs,
+                           bool timedOut, bool swapped, double tier1Sim,
+                           double tier2Sim, bool ringMatchesRingOnly,
+                           bool singleLargestFrag, int maxFragSep)
+    : d_timedOut(timedOut),
+      d_tier1Sim(tier1Sim),
+      d_tier2Sim(tier2Sim),
+      d_ringMatchesRingOnly(ringMatchesRingOnly),
+      d_maxFragSep(maxFragSep) {
+  const std::vector<std::vector<int>> *mol1AdjMatrix;
+  if (swapped) {
+    d_mol1.reset(new RDKit::ROMol(mol2));
+    d_mol2.reset(new RDKit::ROMol(mol1));
+    mol1AdjMatrix = &adjMatrix2;
+  } else {
+    d_mol1.reset(new RDKit::ROMol(mol1));
+    d_mol2.reset(new RDKit::ROMol(mol2));
+    mol1AdjMatrix = &adjMatrix1;
+  }
+
+  details::extractClique(clique, vtx_pairs, swapped, d_bondMatches);
+  matchCliqueAtoms(*mol1AdjMatrix);
+  if (d_maxFragSep != -1) {
+    applyMaxFragSep();
+  }
+  if (singleLargestFrag) {
+    largestFragOnly();
+  }
+}
+
+RascalResult::RascalResult(double tier1Sim, double tier2Sim)
+    : d_tier1Sim(tier1Sim), d_tier2Sim(tier2Sim) {}
+
+RascalResult::RascalResult(const RascalResult &other)
+    : d_bondMatches(other.d_bondMatches),
+      d_atomMatches(other.d_atomMatches),
+      d_smarts(other.d_smarts),
+      d_timedOut(other.d_timedOut),
+      d_tier1Sim(other.d_tier1Sim),
+      d_tier2Sim(other.d_tier2Sim),
+      d_numFrags(other.d_numFrags),
+      d_ringNonRingBondScore(other.d_ringNonRingBondScore),
+      d_atomMatchScore(other.d_atomMatchScore),
+      d_maxDeltaAtomAtomDist(other.d_maxDeltaAtomAtomDist),
+      d_largestFragSize(other.d_largestFragSize) {
+  if (other.d_mol1) {
+    d_mol1.reset(new ROMol(*other.d_mol1));
+  }
+  if (other.d_mol2) {
+    d_mol2.reset(new ROMol(*other.d_mol2));
+  }
+  if (other.d_mcesMol) {
+    d_mcesMol.reset(new ROMol(*other.d_mcesMol));
+  }
+}
+
+RascalResult &RascalResult::operator=(const RascalResult &other) {
+  if (this == &other) {
+    return *this;
+  }
+  d_bondMatches = other.d_bondMatches;
+  d_atomMatches = other.d_atomMatches;
+  d_smarts = other.d_smarts;
+  d_timedOut = other.d_timedOut;
+  d_numFrags = other.d_numFrags;
+  d_ringNonRingBondScore = other.d_ringNonRingBondScore;
+  d_atomMatchScore = other.d_atomMatchScore;
+  d_maxDeltaAtomAtomDist = other.d_maxDeltaAtomAtomDist;
+  d_largestFragSize = other.d_largestFragSize;
+  if (other.d_mol1) {
+    d_mol1.reset(new ROMol(*other.d_mol1));
+  }
+  if (other.d_mol2) {
+    d_mol2.reset(new ROMol(*other.d_mol2));
+  }
+  if (other.d_mcesMol) {
+    d_mcesMol.reset(new ROMol(*other.d_mcesMol));
+  }
+  return *this;
+}
+
+void RascalResult::largestFragOnly() { largestFragsOnly(1); }
+
+void RascalResult::largestFragsOnly(unsigned int numFrags) {
+  std::unique_ptr<RDKit::ROMol> mol1_frags(makeMolFrags(1));
+  // getMolFrags() returns boost::shared_ptr.  Ho-hum.
+  auto frags = RDKit::MolOps::getMolFrags(*mol1_frags, false);
+  if (numFrags < 1 || frags.size() < numFrags) {
+    return;
+  }
+  std::sort(frags.begin(), frags.end(),
+            [](const boost::shared_ptr<ROMol> &f1,
+               const boost::shared_ptr<ROMol> &f2) -> bool {
+              return f1->getNumAtoms() > f2->getNumAtoms();
+            });
+  frags.erase(frags.begin() + numFrags, frags.end());
+  rebuildFromFrags(frags);
+}
+
+void RascalResult::trimSmallFrags(unsigned int minFragSize) {
+  std::unique_ptr<RDKit::ROMol> mol1_frags(makeMolFrags(1));
+  // getMolFrags() returns boost::shared_ptr.  Ho-hum.
+  auto frags = RDKit::MolOps::getMolFrags(*mol1_frags, false);
+  frags.erase(std::remove_if(frags.begin(), frags.end(),
+                             [&](const boost::shared_ptr<ROMol> &f) -> bool {
+                               return f->getNumAtoms() < minFragSize;
+                             }),
+              frags.end());
+  rebuildFromFrags(frags);
+}
+
+double RascalResult::getSimilarity() const {
+  if (!d_mol1 || !d_mol2) {
+    return 0.0;
+  }
+  return details::johnsonSimilarity(d_bondMatches, d_atomMatches, *d_mol1,
+                                    *d_mol2);
+}
+
+void RascalResult::rebuildFromFrags(
+    const std::vector<boost::shared_ptr<ROMol>> &frags) {
+  // Force the re-creation of the SMARTS and other properties next time
+  // they-re needed.
+  d_smarts = "";
+  d_maxFragSep = -1;
+  d_ringNonRingBondScore = -1;
+  d_maxDeltaAtomAtomDist = -1;
+  d_largestFragSize = -1;
+
+  // for now, this is always called after fragmenting d_mol1, but just for
+  // safety, protect against the frags coming from d_mol2 in some future
+  // use.
+  boost::dynamic_bitset<> fragAtoms(
+      std::max(d_mol1->getNumAtoms(), d_mol2->getNumAtoms()));
+  boost::dynamic_bitset<> fragBonds(
+      std::max(d_mol1->getNumBonds(), d_mol2->getNumBonds()));
+  for (const auto &f : frags) {
+    for (auto atom : f->atoms()) {
+      if (atom->hasProp("ORIG_INDEX")) {
+        fragAtoms.set(atom->getProp<int>("ORIG_INDEX"));
+      }
+    }
+    for (auto bond : f->bonds()) {
+      if (bond->hasProp("ORIG_INDEX")) {
+        fragBonds.set(bond->getProp<int>("ORIG_INDEX"));
+      }
+    }
+  }
+  std::vector<std::pair<int, int>> newAtomMatches;
+  for (const auto &am : d_atomMatches) {
+    if (fragAtoms[am.first]) {
+      newAtomMatches.push_back(am);
+    }
+  }
+  d_atomMatches = newAtomMatches;
+  std::vector<std::pair<int, int>> new_bond_matches;
+  for (const auto &bm : d_bondMatches) {
+    if (fragBonds[bm.first]) {
+      new_bond_matches.push_back(bm);
+    }
+  }
+  d_bondMatches = new_bond_matches;
+  d_numFrags = frags.size();
+  d_largestFragSize = frags.empty() ? 0 : frags.front()->getNumAtoms();
+}
+
+std::string RascalResult::createSmartsString() const {
+  if (!d_mol1 || !d_mol2) {
+    return "";
+  }
+  RWMol smartsMol;
+  std::map<int, unsigned int> atomMap;
+  auto mol1Rings = d_mol1->getRingInfo();
+  auto mol2Rings = d_mol2->getRingInfo();
+  for (const auto &am : d_atomMatches) {
+    RDKit::QueryAtom a;
+    auto mol1Atom = d_mol1->getAtomWithIdx(am.first);
+    a.setQuery(RDKit::makeAtomNumQuery(mol1Atom->getAtomicNum()));
+    auto mol2Atom = d_mol2->getAtomWithIdx(am.second);
+    if (mol1Atom->getAtomicNum() != mol2Atom->getAtomicNum()) {
+      a.expandQuery(RDKit::makeAtomNumQuery(mol2Atom->getAtomicNum()),
+                    Queries::COMPOSITE_OR);
+    }
+    if (mol1Atom->getIsAromatic() && mol2Atom->getIsAromatic()) {
+      a.expandQuery(RDKit::makeAtomAromaticQuery(), Queries::COMPOSITE_AND,
+                    true);
+    } else if (!mol1Atom->getIsAromatic() && !mol2Atom->getIsAromatic()) {
+      a.expandQuery(RDKit::makeAtomAliphaticQuery(), Queries::COMPOSITE_AND,
+                    true);
+    }
+    if (d_ringMatchesRingOnly && !mol1Atom->getIsAromatic() &&
+        !mol2Atom->getIsAromatic() &&
+        mol1Rings->numAtomRings(mol1Atom->getIdx()) &&
+        mol2Rings->numAtomRings(mol2Atom->getIdx())) {
+      a.expandQuery(RDKit::makeAtomInRingQuery(), Queries::COMPOSITE_AND, true);
+    }
+    auto ai = smartsMol.addAtom(&a);
+    atomMap.insert(std::make_pair(am.first, ai));
+  }
+
+  for (const auto &bm : d_bondMatches) {
+    RDKit::QueryBond b;
+    auto mol1Bond = d_mol1->getBondWithIdx(bm.first);
+    b.setBeginAtomIdx(atomMap[mol1Bond->getBeginAtomIdx()]);
+    b.setEndAtomIdx(atomMap[mol1Bond->getEndAtomIdx()]);
+    b.setQuery(makeBondOrderEqualsQuery(mol1Bond->getBondType()));
+    auto mol2Bond = d_mol2->getBondWithIdx(bm.second);
+    if (mol1Bond->getBondType() != mol2Bond->getBondType()) {
+      b.expandQuery(makeBondOrderEqualsQuery(mol2Bond->getBondType()),
+                    Queries::COMPOSITE_OR);
+    }
+    if (d_ringMatchesRingOnly && !mol1Bond->getIsAromatic() &&
+        !mol2Bond->getIsAromatic() &&
+        mol1Rings->numBondRings(mol1Bond->getIdx()) &&
+        mol2Rings->numBondRings(mol2Bond->getIdx())) {
+      b.expandQuery(RDKit::makeBondIsInRingQuery(), Queries::COMPOSITE_AND,
+                    true);
+    }
+    smartsMol.addBond(&b, false);
+  }
+  std::string smt = RDKit::MolToSmarts(smartsMol, true);
+  details::cleanSmarts(smt);
+  return smt;
+}
+
+namespace {
+// Return the atom common to the two bonds, -1 if there isn't one.
+int common_atom_in_bonds(const RDKit::Bond *bond1, const RDKit::Bond *bond2) {
+  int commonAtom = -1;
+  if (bond1->getBeginAtomIdx() == bond2->getBeginAtomIdx()) {
+    commonAtom = bond1->getBeginAtomIdx();
+  } else if (bond1->getEndAtomIdx() == bond2->getBeginAtomIdx()) {
+    commonAtom = bond1->getEndAtomIdx();
+  } else if (bond1->getBeginAtomIdx() == bond2->getEndAtomIdx()) {
+    commonAtom = bond1->getBeginAtomIdx();
+  } else if (bond1->getEndAtomIdx() == bond2->getEndAtomIdx()) {
+    commonAtom = bond1->getEndAtomIdx();
+  }
+  return commonAtom;
+}
+}  // namespace
+
+void RascalResult::matchCliqueAtoms(
+    const std::vector<std::vector<int>> &mol1_adj_matrix) {
+  if (d_bondMatches.empty()) {
+    return;
+  }
+  std::vector<int> mol1Matches(d_mol1->getNumAtoms(), -1);
+  // set the clique atoms to -2 in mol1Matches, to mark them as yet undecided.
+  for (const auto &bm : d_bondMatches) {
+    auto bond1 = d_mol1->getBondWithIdx(bm.first);
+    mol1Matches[bond1->getBeginAtomIdx()] = -2;
+    mol1Matches[bond1->getEndAtomIdx()] = -2;
+  }
+
+  // First, use the line graphs to match atoms that have 2 matching bonds
+  // incident on them.
+  for (size_t i = 0; i < d_bondMatches.size() - 1; ++i) {
+    const auto &pair1 = d_bondMatches[i];
+    auto bond1_1 = d_mol1->getBondWithIdx(pair1.first);
+    auto bond2_1 = d_mol2->getBondWithIdx(pair1.second);
+    for (size_t j = i + 1; j < d_bondMatches.size(); ++j) {
+      const auto &pair2 = d_bondMatches[j];
+      if (mol1_adj_matrix[pair1.first][pair2.first]) {
+        // the 2 bonds are incident on the same atom, so the 2 atoms must match
+        auto bond1_2 = d_mol1->getBondWithIdx(pair2.first);
+        auto bond2_2 = d_mol2->getBondWithIdx(pair2.second);
+        auto mol1Atom = common_atom_in_bonds(bond1_1, bond1_2);
+        auto mol2Atom = common_atom_in_bonds(bond2_1, bond2_2);
+        if (mol1Atom != -1) {
+          mol1Matches[mol1Atom] = mol2Atom;
+          auto omol1Atom = bond1_1->getOtherAtomIdx(mol1Atom);
+          auto omol2Atom = bond2_1->getOtherAtomIdx(mol2Atom);
+          mol1Matches[omol1Atom] = omol2Atom;
+          omol1Atom = bond1_2->getOtherAtomIdx(mol1Atom);
+          omol2Atom = bond2_2->getOtherAtomIdx(mol2Atom);
+          mol1Matches[omol1Atom] = omol2Atom;
+        }
+      }
+    }
+  }
+  // if there are -2 entries in mol1Matches there's more to do.
+  if (std::count(mol1Matches.begin(), mol1Matches.end(), -2)) {
+    // Any -2 entries in mol1Matches are down to isolated bonds, which are a bit
+    // tricky.
+    for (const auto &pair1 : d_bondMatches) {
+      auto bond1_1 = d_mol1->getBondWithIdx(pair1.first);
+      if (mol1Matches[bond1_1->getBeginAtomIdx()] == -2 &&
+          mol1Matches[bond1_1->getEndAtomIdx()] == -2) {
+        auto bond2_1 = d_mol2->getBondWithIdx(pair1.second);
+        if (bond1_1->getBeginAtom()->getAtomicNum() !=
+            bond1_1->getEndAtom()->getAtomicNum()) {
+          // it's fairly straightforward:
+          if (bond1_1->getBeginAtom()->getAtomicNum() ==
+              bond2_1->getBeginAtom()->getAtomicNum()) {
+            mol1Matches[bond1_1->getBeginAtomIdx()] =
+                bond2_1->getBeginAtomIdx();
+            mol1Matches[bond1_1->getEndAtomIdx()] = bond2_1->getEndAtomIdx();
+          } else {
+            mol1Matches[bond1_1->getBeginAtomIdx()] = bond2_1->getEndAtomIdx();
+            mol1Matches[bond1_1->getEndAtomIdx()] = bond2_1->getBeginAtomIdx();
+          }
+        } else if (bond1_1->getBeginAtom()->getTotalNumHs() !=
+                   bond1_1->getEndAtom()->getTotalNumHs()) {
+          // try it on number of hydrogens
+          if (bond1_1->getBeginAtom()->getTotalNumHs() >
+              bond1_1->getEndAtom()->getTotalNumHs()) {
+            mol1Matches[bond1_1->getBeginAtomIdx()] =
+                bond2_1->getBeginAtomIdx();
+            mol1Matches[bond1_1->getEndAtomIdx()] = bond2_1->getEndAtomIdx();
+          } else {
+            mol1Matches[bond1_1->getBeginAtomIdx()] = bond2_1->getEndAtomIdx();
+            mol1Matches[bond1_1->getEndAtomIdx()] = bond2_1->getBeginAtomIdx();
+          }
+        } else {
+          // it probably doesn't matter
+          mol1Matches[bond1_1->getBeginAtomIdx()] = bond2_1->getBeginAtomIdx();
+          mol1Matches[bond1_1->getEndAtomIdx()] = bond2_1->getEndAtomIdx();
+        }
+      }
+    }
+  }
+  for (size_t i = 0u; i < d_mol1->getNumAtoms(); ++i) {
+    if (mol1Matches[i] >= 0) {
+      d_atomMatches.push_back(std::make_pair(i, mol1Matches[i]));
+    }
+  }
+}
+
+void RascalResult::applyMaxFragSep() {
+  std::unique_ptr<RDKit::ROMol> mol1_frags(makeMolFrags(1));
+  auto frags1 = RDKit::MolOps::getMolFrags(*mol1_frags, false);
+  if (frags1.size() < 2) {
+    return;
+  }
+  auto fragFragDist = [](const boost::shared_ptr<RDKit::ROMol> &frag1,
+                         const boost::shared_ptr<RDKit::ROMol> &frag2,
+                         const double *pathMatrix, int num_atoms) -> double {
+    int minDist = std::numeric_limits<int>::max();
+    for (auto at1 : frag1->atoms()) {
+      int at1Idx = at1->getProp<int>("ORIG_INDEX");
+      for (auto at2 : frag2->atoms()) {
+        int at2Idx = at2->getProp<int>("ORIG_INDEX");
+        int dist = std::nearbyint(pathMatrix[at1Idx * num_atoms + at2Idx]);
+        if (dist < minDist) {
+          minDist = dist;
+        }
+      }
+    }
+    return minDist;
+  };
+
+  std::unique_ptr<RDKit::ROMol> mol2Frags(makeMolFrags(2));
+  auto frags2 = RDKit::MolOps::getMolFrags(*mol2Frags, false);
+  // These arrays must not be deleted - they are cached in the molecule and
+  // deleted when it is. The distance matrix will be re-calculated in case
+  // something's been copied over somewhere.
+  auto mol1Dists = RDKit::MolOps::getDistanceMat(*d_mol1, false, false, true);
+  auto mol2Dists = RDKit::MolOps::getDistanceMat(*d_mol2, false, false, true);
+
+  bool deletedFrag = false;
+  for (size_t i = 0; i < frags1.size() - 1; ++i) {
+    if (!frags1[i]) {
+      continue;
+    }
+    for (size_t j = i + 1; j < frags1.size(); ++j) {
+      if (!frags1[j]) {
+        continue;
+      }
+      int mol1Dist =
+          fragFragDist(frags1[i], frags1[j], mol1Dists, d_mol1->getNumAtoms());
+      int mol2Dist =
+          fragFragDist(frags2[i], frags2[j], mol2Dists, d_mol2->getNumAtoms());
+      if (mol1Dist > d_maxFragSep || mol2Dist > d_maxFragSep) {
+        deletedFrag = true;
+        if (frags1[i]->getNumAtoms() < frags1[j]->getNumAtoms()) {
+          frags1[i].reset();
+          frags2[i].reset();
+        } else {
+          frags1[j].reset();
+          frags2[j].reset();
+        }
+      }
+    }
+  }
+
+  if (deletedFrag) {
+    // rebuild the d_bondMatches
+    std::vector<std::pair<int, int>> new_bond_matches;
+    for (auto &frag : frags1) {
+      if (!frag) {
+        continue;
+      }
+      for (auto b : frag->bonds()) {
+        int b_idx = b->getProp<int>("ORIG_INDEX");
+        for (auto &bm : d_bondMatches) {
+          if (b_idx == bm.first) {
+            new_bond_matches.push_back(bm);
+            break;
+          }
+        }
+      }
+    }
+    d_bondMatches = new_bond_matches;
+    // and the d_atomMatches
+    std::vector<std::pair<int, int>> new_atom_matches;
+    for (auto &frag : frags1) {
+      if (!frag) {
+        continue;
+      }
+      for (auto a : frag->atoms()) {
+        int a_idx = a->getProp<int>("ORIG_INDEX");
+        for (auto &am : d_atomMatches) {
+          if (a_idx == am.first) {
+            new_atom_matches.push_back(am);
+            break;
+          }
+        }
+      }
+    }
+    d_atomMatches = new_atom_matches;
+  }
+}
+
+// Return a molecule with the clique in it.  Each atom will have the property
+// ORIG_INDEX giving its index in the original molecule.
+RDKit::ROMol *RascalResult::makeMolFrags(int molNum) const {
+  std::shared_ptr<RDKit::ROMol> theMol;
+  if (molNum == 1) {
+    theMol = d_mol1;
+  } else if (molNum == 2) {
+    theMol = d_mol2;
+  } else {
+    return nullptr;
+  }
+  if (!theMol) {
+    return nullptr;
+  }
+  auto *molFrags = new RDKit::RWMol(*theMol);
+  std::vector<char> ainClique(theMol->getNumAtoms(), 0);
+  for (const auto &am : d_atomMatches) {
+    if (molNum == 1) {
+      ainClique[am.first] = 1;
+    } else {
+      ainClique[am.second] = 1;
+    }
+  }
+  std::vector<char> binClique(theMol->getNumBonds(), 0);
+  for (const auto &bm : d_bondMatches) {
+    if (molNum == 1) {
+      binClique[bm.first] = 1;
+    } else {
+      binClique[bm.second] = 1;
+    }
+  }
+  molFrags->beginBatchEdit();
+  for (auto &a : molFrags->atoms()) {
+    if (!ainClique[a->getIdx()]) {
+      molFrags->removeAtom(a);
+    } else {
+      a->setProp<int>("ORIG_INDEX", a->getIdx());
+    }
+  }
+  for (auto &b : molFrags->bonds()) {
+    if (!binClique[b->getIdx()]) {
+      molFrags->removeBond(b->getBeginAtomIdx(), b->getEndAtomIdx());
+    } else {
+      b->setProp<int>("ORIG_INDEX", b->getIdx());
+    }
+  }
+  molFrags->commitBatchEdit();
+  return molFrags;
+}
+
+// Calculate a score for how many bonds in the clique don't match
+// cyclic/non-cyclic
+int RascalResult::calcRingNonRingScore() const {
+  if (!d_mol1 || !d_mol2) {
+    return 0;
+  }
+
+  int score = 0;
+  for (const auto &bm : d_bondMatches) {
+    auto nbr1 = d_mol1->getRingInfo()->numBondRings(bm.first);
+    auto nbr2 = d_mol2->getRingInfo()->numBondRings(bm.second);
+
+    if ((nbr1 && !nbr2) || (!nbr1 && nbr2)) {
+      ++score;
+    }
+  }
+  return score;
+}
+
+// Calculate a score for how well the atoms in the clique from mol1 match the
+// atoms for the clique in mol2.  The atom scores are made up of H count and
+// summed for the molecule. Its so that, for example, an OH in mol1 that could
+// match an OH or OMe matches the OH for preference.
+int RascalResult::calcAtomMatchScore() const {
+  if (!d_mol1 || !d_mol2) {
+    return 0;
+  }
+  int score = 0;
+  for (const auto &am : d_atomMatches) {
+    int num_h_1 = d_mol1->getAtomWithIdx(am.first)->getTotalNumHs();
+    int num_h_2 = d_mol2->getAtomWithIdx(am.second)->getTotalNumHs();
+    score += std::abs(num_h_1 - num_h_2);
+  }
+  return score;
+}
+
+int RascalResult::calcMaxDeltaAtomAtomDistScore() const {
+  // Possibly this could be improved, to be the total of the minimum distances
+  // between each fragment.
+  if (d_atomMatches.empty()) {
+    return 0;
+  }
+  // These arrays are cached so shouldn't be deleted.  The final 'true' in the
+  // call is to force recalculation, just in case there's some other type copied
+  // over from the input molecule.
+  const auto *mol1Dists =
+      RDKit::MolOps::getDistanceMat(*d_mol1, false, false, true);
+  const auto *mol2Dists =
+      RDKit::MolOps::getDistanceMat(*d_mol2, false, false, true);
+
+  int score = 0;
+  auto dist = [](int idx1, int idx2, const double *dists,
+                 int num_atoms) -> int {
+    return int(std::nearbyint(dists[idx1 * num_atoms + idx2]));
+  };
+  for (size_t i = 0; i < d_atomMatches.size() - 1; ++i) {
+    for (size_t j = i + 1; j < d_atomMatches.size(); ++j) {
+      auto d1 = dist(d_atomMatches[i].first, d_atomMatches[j].first, mol1Dists,
+                     d_mol1->getNumAtoms());
+      auto d2 = dist(d_atomMatches[i].second, d_atomMatches[j].second,
+                     mol2Dists, d_mol2->getNumAtoms());
+      auto deltaDist = abs(d1 - d2);
+      if (deltaDist > score) {
+        score = deltaDist;
+      }
+    }
+  }
+  return score;
+}
+
+int RascalResult::calcLargestFragSize() const {
+  if (!d_mol1 || !d_mol2) {
+    return 0;
+  }
+  std::unique_ptr<RDKit::ROMol> mol1_frags(makeMolFrags(1));
+  std::vector<int> mapping;
+  auto numFrags = RDKit::MolOps::getMolFrags(*mol1_frags, mapping);
+  auto lfs = std::count(mapping.begin(), mapping.end(), 0);
+  for (unsigned int i = 1; i < numFrags; ++i) {
+    auto fragSize = std::count(mapping.begin(), mapping.end(), i);
+    lfs = std::max(lfs, fragSize);
+  }
+  return lfs;
+}
+
+int RascalResult::getNumFrags() const {
+  if (!d_mol1 || !d_mol2) {
+    return 0;
+  }
+  if (d_numFrags == -1) {
+    std::unique_ptr<RDKit::ROMol> mol1_frags(makeMolFrags(1));
+    std::vector<int> mol1_frag_mapping;
+    d_numFrags = RDKit::MolOps::getMolFrags(*mol1_frags, mol1_frag_mapping);
+  }
+  return d_numFrags;
+}
+
+int RascalResult::getRingNonRingBondScore() const {
+  if (!d_mol1 || !d_mol2) {
+    return 0;
+  }
+  if (d_ringNonRingBondScore == -1) {
+    d_ringNonRingBondScore = calcRingNonRingScore();
+  }
+  return d_ringNonRingBondScore;
+}
+
+int RascalResult::getAtomMatchScore() const {
+  if (!d_mol1 || !d_mol2) {
+    return 0;
+  }
+  if (d_atomMatchScore == -1) {
+    d_atomMatchScore = calcAtomMatchScore();
+  }
+  return d_atomMatchScore;
+}
+
+int RascalResult::getMaxDeltaAtomAtomDist() const {
+  if (!d_mol1 || !d_mol2) {
+    return 0;
+  }
+  if (d_maxDeltaAtomAtomDist == -1) {
+    d_maxDeltaAtomAtomDist = calcMaxDeltaAtomAtomDistScore();
+  }
+  return d_maxDeltaAtomAtomDist;
+}
+
+int RascalResult::getLargestFragSize() const {
+  if (!d_mol1 || !d_mol2) {
+    return 0;
+  }
+  if (d_largestFragSize == -1) {
+    d_largestFragSize = calcLargestFragSize();
+  }
+  return d_largestFragSize;
+}
+
+std::string RascalResult::getSmarts() const {
+  if (!d_mol1 || !d_mol2) {
+    return "";
+  }
+  if (d_smarts.empty()) {
+    d_smarts = createSmartsString();
+  }
+  return d_smarts;
+}
+
+const std::shared_ptr<ROMol> RascalResult::getMcesMol() const {
+  if (d_mcesMol || !d_mol1) {
+    return d_mcesMol;
+  }
+
+  boost::dynamic_bitset<> mol1Bonds(d_mol1->getNumBonds());
+  for (const auto &bm : d_bondMatches) {
+    mol1Bonds.set(bm.first);
+  }
+  boost::dynamic_bitset<> mol1Atoms(d_mol1->getNumAtoms());
+  for (const auto &am : d_atomMatches) {
+    mol1Atoms.set(am.first);
+  }
+  std::shared_ptr<RWMol> tmpMol(new RWMol(*d_mol1));
+  MolOps::KekulizeIfPossible(*tmpMol);
+  tmpMol->beginBatchEdit();
+  for (auto &bond : tmpMol->bonds()) {
+    if (!mol1Bonds[bond->getIdx()]) {
+      auto bo = bond->getBondType();
+      if (bond->getBeginAtom()->getNoImplicit() ||
+          (bond->getBeginAtom()->getIsAromatic() &&
+           bond->getBeginAtom()->getAtomicNum() != 6)) {
+        bond->getBeginAtom()->setNumExplicitHs(
+            bond->getBeginAtom()->getNumExplicitHs() + bo);
+      }
+      if (bond->getEndAtom()->getNoImplicit() ||
+          (bond->getEndAtom()->getIsAromatic() &&
+           bond->getEndAtom()->getAtomicNum() != 6)) {
+        bond->getEndAtom()->setNumExplicitHs(
+            bond->getEndAtom()->getNumExplicitHs() + bo);
+      }
+      tmpMol->removeBond(bond->getBeginAtomIdx(), bond->getEndAtomIdx());
+    }
+  }
+  for (auto atom : tmpMol->atoms()) {
+    if (!mol1Atoms[atom->getIdx()]) {
+      tmpMol->removeAtom(atom);
+    }
+  }
+  tmpMol->commitBatchEdit();
+  MolOps::removeHs(*tmpMol);
+  MolOps::sanitizeMol(*tmpMol);
+  d_mcesMol = tmpMol;
+  return d_mcesMol;
+}
+
+namespace details {
+bool resultCompare(const RascalResult &res1, const RascalResult &res2) {
+  if (res1.getBondMatches().size() != res2.getBondMatches().size()) {
+    return res1.getBondMatches().size() > res2.getBondMatches().size();
+  }
+  if (res1.getNumFrags() != res2.getNumFrags()) {
+    return res1.getNumFrags() < res2.getNumFrags();
+  }
+  if (res1.getLargestFragSize() != res2.getLargestFragSize()) {
+    return res1.getLargestFragSize() > res2.getLargestFragSize();
+  }
+  if (res1.getRingNonRingBondScore() != res2.getRingNonRingBondScore()) {
+    return res1.getRingNonRingBondScore() < res2.getRingNonRingBondScore();
+  }
+  if (res1.getAtomMatchScore() != res2.getAtomMatchScore()) {
+    return res1.getAtomMatchScore() < res2.getAtomMatchScore();
+  }
+  if (res1.getMaxDeltaAtomAtomDist() != res2.getMaxDeltaAtomAtomDist()) {
+    return res1.getMaxDeltaAtomAtomDist() < res2.getMaxDeltaAtomAtomDist();
+  }
+  return res1.getSmarts() < res2.getSmarts();
+}
+
+void extractClique(const std::vector<unsigned int> &clique,
+                   const std::vector<std::pair<int, int>> &vtxPairs,
+                   bool swapped,
+                   std::vector<std::pair<int, int>> &bondMatches) {
+  bondMatches.clear();
+  for (auto mem : clique) {
+    if (swapped) {
+      bondMatches.emplace_back(vtxPairs[mem].second, vtxPairs[mem].first);
+    } else {
+      bondMatches.push_back(vtxPairs[mem]);
+    }
+  }
+  std::sort(bondMatches.begin(), bondMatches.end());
+}
+
+void cleanSmarts(std::string &smarts) {
+  const static std::vector<std::pair<std::regex, std::string>> repls{
+      {std::regex(R"(\[#6&A\])"), "C"},
+      {std::regex(R"(\[#6&A&R\])"), "[C&R]"},
+      {std::regex(R"(\[#6&a\])"), "c"},
+      {std::regex(R"(\[#7&A\])"), "N"},
+      {std::regex(R"(\[#7&A&R\])"), "[N&R]"},
+      {std::regex(R"(\[#7&a\])"), "n"},
+      {std::regex(R"(\[#8&A\])"), "O"},
+      {std::regex(R"(\[#8&A&R\])"), "[O&R]"},
+      {std::regex(R"(\[#8&a\])"), "o"},
+      {std::regex(R"(\[#9&A\])"), "F"},
+      {std::regex(R"(\[#16&A\])"), "S"},
+      {std::regex(R"(\[#16&a\])"), "s"},
+      {std::regex(R"(\[#17&A\])"), "Cl"},
+      {std::regex(R"(\[#35&A\])"), "Br"},
+      {std::regex(R"(\[#53&A\])"), "I"},
+      {std::regex(R"(([A-Z])-([cnops]))"), "$1$2"},
+      {std::regex(R"(([cnops][1-9]*)-([A-Z]))"), "$1$2"},
+      {std::regex(R"(([A-Z][1-9]*)-([A-Z]))"), "$1$2"},
+      {std::regex(R"(([A-Z])-([1-9]))"), "$1$2"}};
+  // Sometimes it needs more than 1 pass through
+  std::string start_smt = "";
+  while (start_smt != smarts) {
+    start_smt = smarts;
+    for (auto [patt, repl] : repls) {
+      smarts = std::regex_replace(smarts, patt, repl);
+    }
+  }
+}
+
+void printBondMatches(const RascalResult &res, std::ostream &os) {
+  os << "Bond 1 matches : " << res.getBondMatches().size() << " : [";
+  for (const auto &bm : res.getBondMatches()) {
+    os << bm.first << ",";
+  }
+  os << "]" << std::endl;
+  os << "Bond 2 matches : " << res.getBondMatches().size() << " : [";
+  for (const auto &bm : res.getBondMatches()) {
+    os << bm.second << ",";
+  }
+  os << "]" << std::endl;
+}
+
+void printAtomMatches(const RascalResult &res, std::ostream &os) {
+  os << "Atom 1 matches : " << res.getAtomMatches().size() << " : [";
+  for (const auto &am : res.getAtomMatches()) {
+    os << am.first << ",";
+  }
+  os << "]" << std::endl;
+  os << "Atom 2 matches : " << res.getAtomMatches().size() << " : [";
+  for (const auto &am : res.getAtomMatches()) {
+    os << am.second << ",";
+  }
+  os << "]" << std::endl;
+}
+
+void printScores(const RascalResult &res, std::ostream &os) {
+  os << res.getBondMatches().size() << " : " << res.getNumFrags() << " : "
+     << res.getLargestFragSize() << " : " << res.getRingNonRingBondScore()
+     << " : " << res.getAtomMatchScore() << " : "
+     << res.getMaxDeltaAtomAtomDist() << " : " << res.getSmarts() << std::endl;
+}
+
+double johnsonSimilarity(const std::vector<std::pair<int, int>> &bondMatches,
+                         const std::vector<std::pair<int, int>> &atomMatches,
+                         const RDKit::ROMol &mol1, const RDKit::ROMol &mol2) {
+  double num = (bondMatches.size() + atomMatches.size()) *
+               (bondMatches.size() + atomMatches.size());
+  double denom = (mol1.getNumAtoms() + mol1.getNumBonds()) *
+                 (mol2.getNumAtoms() + mol2.getNumBonds());
+  return num / denom;
+}
+}  // namespace details
+
+}  // namespace RascalMCES
+}  // namespace RDKit
--- a/Code/GraphMol/RascalMCES/RascalResult.h
+++ b/Code/GraphMol/RascalMCES/RascalResult.h
@@ -0,0 +1,153 @@
+//
+// Copyright (C) David Cosgrove 2023
+//
+//   @@ All Rights Reserved @@
+//  This file is part of the RDKit.
+//  The contents are covered by the terms of the BSD license
+//  which is included in the file license.txt, found at the root
+//  of the RDKit source tree.
+
+// A class to hold the results of a RASCAL MCES determination
+// between 2 molecules.  Contains the bonds and atoms that
+// correspond between the molecules, and also a SMARTS pattern
+// defining the MCES.
+//
+#include <RDGeneral/export.h>
+
+#ifndef RASCALRESULT_H
+#define RASCALRESULT_H
+
+#include <vector>
+
+#include <GraphMol/ROMol.h>
+
+namespace RDKit {
+
+namespace RascalMCES {
+
+class RDKIT_RASCALMCES_EXPORT RascalResult {
+ public:
+  RascalResult(const RDKit::ROMol &mol1, const RDKit::ROMol &mol2,
+               const std::vector<std::vector<int>> &adjMatrix1,
+               const std::vector<std::vector<int>> &adjMatrix2,
+               const std::vector<unsigned int> &clique,
+               const std::vector<std::pair<int, int>> &vtx_pairs, bool timedOut,
+               bool swapped, double tier1Sim, double tier2Sim,
+               bool ringMatchesRingOnly, bool singleLargestFrag,
+               int minFragSep);
+  // For when the tier[12]Sim didn't hit the threshold, but it
+  // might be of interest what the estimates of similarity were.
+  RascalResult(double tier1Sim, double tier2Sim);
+
+  RascalResult(const RascalResult &other);
+
+  RascalResult(RascalResult &&other) = default;
+
+  ~RascalResult() = default;
+
+  RascalResult &operator=(const RascalResult &other);
+
+  RascalResult &operator=(RascalResult &&other) = default;
+
+  // Cut the result down to the single largest fragment.  This is
+  // irrecoverably destructive.
+  void largestFragOnly();
+  void largestFragsOnly(unsigned int numFrags = 2);
+  void trimSmallFrags(unsigned int minFragSize = 3);
+
+  std::vector<std::pair<int, int>> getBondMatches() const {
+    return d_bondMatches;
+  }
+
+  std::vector<std::pair<int, int>> getAtomMatches() const {
+    return d_atomMatches;
+  }
+
+  // The following 5 functions are used in resultCompare to rank
+  // 2 MCES of the same size for the same pair of molecules.
+  // returns the number of contiguous fragments in the MCES.
+  int getNumFrags() const;
+
+  // returns how many bonds in the clique don't match
+  // cyclic/non-cyclic i.e. count as a matche in the MCES but
+  // are ring bonds in one of the molecules and not in the other.
+  int getRingNonRingBondScore() const;
+
+  // returns a score for how well the atoms in the clique from mol1 match the
+  // atoms for the clique in mol2.  Currently, the atom scores are the
+  // difference in H count for matching atoms, and summed for the molecule. Its
+  // so that, for example, an OH in mol1 that could match an OH or OMe matches
+  // the OH for preference.
+  int getAtomMatchScore() const;
+
+  // returns a score for the maximum difference in through-bond distance for
+  // pairs of matching atoms in the 2 molecules.  An MCES where 2 atoms
+  // are far apart in one molecule and the corresponding atoms are close
+  // together in the other will get a high score by this measure.
+  int getMaxDeltaAtomAtomDist() const;
+
+  // returns the number of atoms in the largest contiguous fragment
+  // in the MCES.
+  int getLargestFragSize() const;
+
+  std::string getSmarts() const;
+  const std::shared_ptr<ROMol> getMcesMol() const;
+  bool getTimedOut() const { return d_timedOut; };
+
+  double getTier1Sim() const { return d_tier1Sim; }
+  double getTier2Sim() const { return d_tier2Sim; }
+  double getSimilarity() const;
+
+ private:
+  std::shared_ptr<ROMol> d_mol1;
+  std::shared_ptr<ROMol> d_mol2;
+  mutable std::shared_ptr<ROMol> d_mcesMol;
+  std::vector<std::pair<int, int>> d_bondMatches;
+  std::vector<std::pair<int, int>> d_atomMatches;
+
+  mutable std::string d_smarts;
+  bool d_timedOut{false};
+  double d_tier1Sim;
+  double d_tier2Sim;
+  bool d_ringMatchesRingOnly{false};
+  int d_maxFragSep{-1};
+
+  // These are used for sorting the results.
+  mutable int d_numFrags{-1};
+  mutable int d_ringNonRingBondScore{-1};
+  mutable int d_atomMatchScore{-1};
+  mutable int d_maxDeltaAtomAtomDist{-1};
+  mutable int d_largestFragSize{-1};
+
+  // Assuming the frags are all part of the original MCES, just cut it
+  // down to what's in the frags.
+  void rebuildFromFrags(const std::vector<boost::shared_ptr<ROMol>> &frags);
+
+  std::string createSmartsString() const;
+
+  void matchCliqueAtoms(const std::vector<std::vector<int>> &mol1_adj_matrix);
+
+  // If the clique involves a fragment that is more than d_maxFragSep from
+  // any other frag in either molecule, discard the smaller frag.
+  void applyMaxFragSep();
+
+  // Make the fragments for either mol1 or mol2.  If molNum is not 1 or 2,
+  // returns nullptr.
+  RDKit::ROMol *makeMolFrags(int molNum) const;
+
+  int calcRingNonRingScore() const;
+
+  int calcAtomMatchScore() const;
+
+  int calcLargestFragSize() const;
+
+  // If there are multiple fragments, can be helpful as a tie-breaker.  It's the
+  // maximum difference between through-bond distances between matching atoms in
+  // the 2 molecules.
+  int calcMaxDeltaAtomAtomDistScore() const;
+};
+
+}  // namespace RascalMCES
+}  // namespace RDKit
+
+#endif  // RASCALRESULT_H
--- a/Code/GraphMol/RascalMCES/Wrap/CMakeLists.txt
+++ b/Code/GraphMol/RascalMCES/Wrap/CMakeLists.txt
@@ -0,0 +1,8 @@
+remove_definitions(-DRDKIT_RASCALMCES_BUILD)
+
+rdkit_python_extension(rdRascalMCES
+        rdRascalMCES.cpp
+        DEST Chem
+        LINK_LIBRARIES RascalMCES)
+
+add_pytest(pyMolDraw2D ${CMAKE_CURRENT_SOURCE_DIR}/testRascalMCES.py)
--- a/Code/GraphMol/RascalMCES/Wrap/rdRascalMCES.cpp
+++ b/Code/GraphMol/RascalMCES/Wrap/rdRascalMCES.cpp
@@ -0,0 +1,217 @@
+//
+// Copyright (C) David Cosgrove 2023
+//
+//   @@ All Rights Reserved @@
+//  This file is part of the RDKit.
+//  The contents are covered by the terms of the BSD license
+//  which is included in the file license.txt, found at the root
+//  of the RDKit source tree.
+//
+
+#include <RDBoost/python.h>
+#include <RDBoost/Wrap.h>
+
+#include <GraphMol/ROMol.h>
+#include <GraphMol/RascalMCES/RascalMCES.h>
+#include <GraphMol/RascalMCES/RascalClusterOptions.h>
+#include <GraphMol/RascalMCES/RascalOptions.h>
+#include <GraphMol/RascalMCES/RascalResult.h>
+
+namespace python = boost::python;
+
+namespace {
+
+python::list convertVecPairInt(const std::vector<std::pair<int, int>> &vec) {
+  python::list pyres;
+  for (const auto &p : vec) {
+    python::tuple tup = python::make_tuple(p.first, p.second);
+    pyres.append(tup);
+  }
+  return pyres;
+}
+
+python::list bondMatches(const RDKit::RascalMCES::RascalResult &res) {
+  return convertVecPairInt(res.getBondMatches());
+}
+python::list atomMatches(const RDKit::RascalMCES::RascalResult &res) {
+  return convertVecPairInt(res.getAtomMatches());
+}
+
+void largestFragmentOnly(RDKit::RascalMCES::RascalResult &res) {
+  res.largestFragOnly();
+}
+
+struct RascalResult_wrapper {
+  static void wrap() {
+    std::string docString = "Used to return RASCAL MCES results.";
+    python::class_<RDKit::RascalMCES::RascalResult>(
+        "RascalResult", docString.c_str(), python::no_init)
+        .def_readonly("smartsString",
+                      &RDKit::RascalMCES::RascalResult::getSmarts,
+                      "SMARTS string defining the MCES.")
+        .def("bondMatches", &bondMatches,
+             "A function returning a list of list "
+             "of tuples, each inner list containing the matching bonds in the "
+             "MCES as tuples of bond indices from mol1 and mol2")
+        .def("atomMatches", &atomMatches, "Likewise for atoms.")
+        .def(
+            "largestFragmentOnly", &largestFragmentOnly,
+            "Function that cuts the MCES down to the single largest frag.  This cannot be undone.")
+        .def_readonly("similarity",
+                      &RDKit::RascalMCES::RascalResult::getSimilarity,
+                      "Johnson similarity between 2 molecules.")
+        .def_readonly("numFragments",
+                      &RDKit::RascalMCES::RascalResult::getNumFrags,
+                      "Number of fragments in MCES.")
+        .def_readonly("largestFragmentSize",
+                      &RDKit::RascalMCES::RascalResult::getLargestFragSize,
+                      "Number of atoms in largest fragment.")
+        .def_readonly("timedOut", &RDKit::RascalMCES::RascalResult::getTimedOut,
+                      "Whether it timed out.");
+  }
+};
+}  // namespace
+
+namespace RDKit {
+
+python::list findMCESWrapper(const ROMol &mol1, const ROMol &mol2,
+                             python::object py_opts) {
+  RascalMCES::RascalOptions opts;
+  if (!py_opts.is_none()) {
+    opts = python::extract<RascalMCES::RascalOptions>(py_opts);
+  }
+  std::vector<RDKit::RascalMCES::RascalResult> results;
+  {
+    NOGIL gil;
+    results = RascalMCES::rascalMCES(mol1, mol2, opts);
+  }
+  python::list pyres;
+  for (auto &res : results) {
+    pyres.append(res);
+  }
+  return pyres;
+}
+
+std::vector<std::shared_ptr<ROMol>> extractMols(python::object mols) {
+  std::vector<std::shared_ptr<ROMol>> cmols;
+  unsigned int nElems = python::extract<unsigned int>(mols.attr("__len__")());
+  cmols.resize(nElems);
+  for (unsigned int i = 0; i < nElems; ++i) {
+    if (!mols[i]) {
+      throw_value_error("molecule is None");
+    }
+    cmols[i] = python::extract<std::shared_ptr<ROMol>>(mols[i]);
+  }
+  return cmols;
+}
+
+python::list packOutputMols(
+    const std::vector<std::vector<unsigned int>> &clusters) {
+  python::list pyres;
+  for (auto &clus : clusters) {
+    python::list mols;
+    for (auto &m : clus) {
+      mols.append(m);
+    }
+    pyres.append(mols);
+  }
+  return pyres;
+}
+
+python::list rascalClusterWrapper(python::object mols, python::object py_opts) {
+  RascalMCES::RascalClusterOptions opts;
+  if (!py_opts.is_none()) {
+    opts = python::extract<RascalMCES::RascalClusterOptions>(py_opts);
+  }
+  auto cmols = extractMols(mols);
+  std::vector<RDKit::UINT_VECT> clusters;
+  {
+    NOGIL gil;
+    clusters = RascalMCES::rascalCluster(cmols, opts);
+  }
+  return packOutputMols(clusters);
+}
+
+python::list rascalButinaClusterWrapper(python::object mols,
+                                        python::object py_opts) {
+  RascalMCES::RascalClusterOptions opts;
+  if (!py_opts.is_none()) {
+    opts = python::extract<RascalMCES::RascalClusterOptions>(py_opts);
+  }
+  auto cmols = extractMols(mols);
+  std::vector<RDKit::UINT_VECT> clusters;
+  {
+    NOGIL gil;
+    clusters = RascalMCES::rascalButinaCluster(cmols, opts);
+  }
+  return packOutputMols(clusters);
+}
+
+BOOST_PYTHON_MODULE(rdRascalMCES) {
+  python::scope().attr("__doc__") =
+      "Module containing implementation of RASCAL Maximum Common Edge Substructure algorithm.";
+  RascalResult_wrapper::wrap();
+
+  std::string docString = "RASCAL Options";
+  python::class_<RDKit::RascalMCES::RascalOptions, boost::noncopyable>(
+      "RascalOptions", docString.c_str())
+      .def_readwrite(
+          "similarityThreshold",
+          &RDKit::RascalMCES::RascalOptions::similarityThreshold,
+          "Threshold below which MCES won't be run.  Between 0.0 and 1.0, default=0.7.")
+      .def_readwrite(
+          "completeAromaticRings",
+          &RDKit::RascalMCES::RascalOptions::completeAromaticRings,
+          "If True (default), partial aromatic rings won't be returned.")
+      .def_readwrite("ringMatchesRingOnly",
+                     &RDKit::RascalMCES::RascalOptions::ringMatchesRingOnly,
+                     "If True (default), ring bonds won't match ring bonds.")
+      .def_readwrite(
+          "minFragSize", &RDKit::RascalMCES::RascalOptions::minFragSize,
+          "Imposes a minimum on the number of atoms in a fragment that may be part of the MCES.  Default -1 means no minimum.")
+      .def_readwrite(
+          "maxFragSeparation",
+          &RDKit::RascalMCES::RascalOptions::maxFragSeparation,
+          "Maximum number of bonds between fragments in the MCES for both to be reported.  Default -1 means no maximum.  If exceeded, the smaller fragment will be removed.")
+      .def_readwrite(
+          "allBestMCESs", &RDKit::RascalMCES::RascalOptions::allBestMCESs,
+          "If True, reports all MCESs found of the same maximum size.  Default False means just report the first found.")
+      .def_readwrite(
+          "timeout", &RDKit::RascalMCES::RascalOptions::timeout,
+          "Maximum time (in seconds) to spend on an individual MCESs determination.  Default 60, -1 means no limit.");
+
+  docString =
+      "Find one or more MCESs between the 2 molecules given.  Returns a list of "
+      "RascalResult objects."
+      "- mol1"
+      "- mol2 The two molecules for which to find the MCES"
+      "- opts Optional RascalOptions object changing the default run mode."
+      "";
+  python::def("FindMCES", &RDKit::findMCESWrapper,
+              (python::arg("mol1"), python::arg("mol2"),
+               python::arg("opts") = python::object()),
+              docString.c_str());
+  docString =
+      "Use the RASCAL MCES similarity metric to do fuzzy clustering.  Returns a list of lists "
+      "of molecules, each inner list being a cluster.  The last cluster is all the "
+      "molecules that didn't fit into another cluster (the singletons)."
+      "- mols List of molecules to be clustered"
+      "- opts Optional RascalOptions object changing the default run mode."
+      "";
+  python::def("RascalCluster", &RDKit::rascalClusterWrapper,
+              (python::arg("mols"), python::arg("opts") = python::object()),
+              docString.c_str());
+  docString =
+      "Use the RASCAL MCES similarity metric to do Butina clustering"
+      " (Butina JCICS 39 747-750 (1999)).  Returns a list of lists of molecules,"
+      " each inner list being a cluster.  The last cluster is all the"
+      " molecules that didn't fit into another cluster (the singletons)."
+      "- mols List of molecules to be clustered"
+      "- opts Optional RascalOptions object changing the default run mode."
+      "";
+  python::def("RascalButinaCluster", &RDKit::rascalButinaClusterWrapper,
+              (python::arg("mols"), python::arg("opts") = python::object()),
+              docString.c_str());
+}
+
+}  // namespace RDKit
--- a/Code/GraphMol/RascalMCES/Wrap/testRascalMCES.py
+++ b/Code/GraphMol/RascalMCES/Wrap/testRascalMCES.py
@@ -0,0 +1,119 @@
+#  Copyright (c) 2023 David Cosgrove and other RDKit contributors
+#  All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+#       copyright notice, this list of conditions and the following
+#       disclaimer in the documentation and/or other materials provided
+#       with the distribution.
+#     * Neither the name of Novartis Institutes for BioMedical Research Inc.
+#       nor the names of its contributors may be used to endorse or promote
+#       products derived from this software without specific prior written
+#       permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+# These tests are just to check that the Python wrappers are working
+# ok.  The bulk of the tests are in the C++ code.
+import os
+import unittest
+
+from pathlib import Path
+
+from rdkit import Chem
+from rdkit.Chem import rdRascalMCES
+
+
+class TestCase(unittest.TestCase):
+
+  def setUp(self):
+    pass
+
+  def test1(self):
+    mol1 = Chem.MolFromSmiles("c1ccccc1Cl")
+    mol2 = Chem.MolFromSmiles("c1ccccc1F")
+    opts = rdRascalMCES.RascalOptions()
+
+    results = rdRascalMCES.FindMCES(mol1, mol2, opts)
+    self.assertEqual(len(results), 1)
+    self.assertEqual(results[0].smartsString, 'c1:c:c:c:c:c:1')
+    self.assertEqual(len(results[0].bondMatches()), 6)
+    self.assertEqual(len(results[0].atomMatches()), 6)
+
+  def test2(self):
+    # Test single largest fragment extraction
+    ad1 = Chem.MolFromSmiles("CN(C)c1ccc(CC(=O)NCCCCCCCCCCNC23CC4CC(C2)CC(C3)C4)cc1 CHEMBL153934")
+    ad2 = Chem.MolFromSmiles("N(C)c1ccc(CC(=O)NCCCCCCCCCCCCNC23CC4CC(C2)CC(C3)C4)cc1 CHEMBL157336")
+
+    opts = rdRascalMCES.RascalOptions()
+    results = rdRascalMCES.FindMCES(ad1, ad2, opts)
+    self.assertEqual(len(results), 1)
+    self.assertEqual(results[0].smartsString,
+                     'N(-C)-c1:c:c:c(-CC(=O)-NCCCCCCCCCC):c:c:1.NC12CC3CC(-C1)-CC(-C2)-C3')
+    results[0].largestFragmentOnly()
+    self.assertEqual(results[0].smartsString, 'N(-C)-c1:c:c:c(-CC(=O)-NCCCCCCCCCC):c:c:1')
+
+  def test3(self):
+    # Test not specifying options
+    mol1 = Chem.MolFromSmiles("c1ccccc1Cl")
+    mol2 = Chem.MolFromSmiles("c1ccccc1F")
+
+    results = rdRascalMCES.FindMCES(mol1, mol2)
+    self.assertEqual(len(results), 1)
+    self.assertEqual(results[0].smartsString, 'c1:c:c:c:c:c:1')
+    self.assertEqual(len(results[0].bondMatches()), 6)
+    self.assertEqual(len(results[0].atomMatches()), 6)
+
+  def test4(self):
+    # Test setting non-default option
+    mol1 = Chem.MolFromSmiles('Oc1cccc2C(=O)C=CC(=O)c12')
+    mol2 = Chem.MolFromSmiles('O1C(=O)C=Cc2cc(OC)c(O)cc12')
+    results = rdRascalMCES.FindMCES(mol1, mol2)
+    self.assertEqual(len(results), 0)
+
+    opts = rdRascalMCES.RascalOptions()
+    opts.similarityThreshold = 0.5
+    results = rdRascalMCES.FindMCES(mol1, mol2, opts)
+    self.assertEqual(len(results), 1)
+
+  def testRascalCluster(self):
+    cdk2_file = Path(os.environ['RDBASE']) / 'Contrib' / 'Fastcluster' / 'cdk2.smi'
+    suppl = Chem.SmilesMolSupplier(str(cdk2_file), '\t', 1, 0, False)
+    mols = [mol for mol in suppl]
+    clusters = rdRascalMCES.RascalCluster(mols)
+    self.assertEqual(len(clusters), 8)
+    expClusters = [7, 7, 6, 2, 2, 2, 2, 20]
+    for clus, expClusSize in zip(clusters, expClusters):
+      self.assertEqual(expClusSize, len(clus))
+
+  def testRascalButinaCluster(self):
+    cdk2_file = Path(os.environ['RDBASE']) / 'Contrib' / 'Fastcluster' / 'cdk2.smi'
+    suppl = Chem.SmilesMolSupplier(str(cdk2_file), '\t', 1, 0, False)
+    mols = [mol for mol in suppl]
+    clusters = rdRascalMCES.RascalButinaCluster(mols)
+    self.assertEqual(len(clusters), 29)
+    expClusters = [
+      6, 6, 6, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
+    ]
+    for clus, expClusSize in zip(clusters, expClusters):
+      self.assertEqual(expClusSize, len(clus))
+
+
+if __name__ == "__main__":
+  unittest.main()
--- a/Code/GraphMol/RascalMCES/data/chembl_1907596.smi
+++ b/Code/GraphMol/RascalMCES/data/chembl_1907596.smi
@@ -0,0 +1,529 @@
+CHEMBL1907596_1	CN1CCC[C@H]1COc2cccnc2
+CHEMBL1907596_2	C(Oc1cncnc1)[C@@H]2CCN2
+CHEMBL1907596_3	Clc1ccc(cn1)C2CC3CCC2N3
+CHEMBL1907596_4	Fc1ncccc1OC[C@@H]2CCN2
+CHEMBL1907596_5	Fc1ncc(\C=C\c2cc(OC[C@@H]3CCN3)cnc2Cl)cc1Br
+CHEMBL1907596_6	Clc1ncc(OC[C@@H]2CCCN2)cc1\C=C\c3ccnc(Br)c3
+CHEMBL1907596_7	Fc1cc(\C=C\c2cc(OC[C@@H]3CCCN3)cnc2Cl)ccn1
+CHEMBL1907596_8	Clc1ncc(OC[C@@H]2CCN2)cc1\C=C\c3ccnc(Br)c3
+CHEMBL1907596_9	Clc1ccc(OC[C@@H]2CCN2)cn1
+CHEMBL1907596_10	Clc1ncc(OC[C@@H]2CCCN2)cc1c3cccnc3
+CHEMBL1907596_11	Fc1ncc(OC[C@@H]2CCN2)cc1c3cccnc3
+CHEMBL1907596_12	Brc1ncc(OC[C@@H]2CCN2)cc1c3cccnc3
+CHEMBL1907596_13	Ic1ncc(OC[C@@H]2CCN2)cc1c3cccnc3
+CHEMBL1907596_14	Clc1ccc(cn1)c2cc(OC[C@@H]3CCN3)cnc2Cl
+CHEMBL1907596_15	Fc1ccc(cn1)c2cc(OC[C@@H]3CCN3)cnc2Cl
+CHEMBL1907596_16	Clc1ncc(OC[C@@H]2CCN2)cc1c3cccnc3
+CHEMBL1907596_17	Clc1ncc(OC[C@@H]2CCN2)cc1\C=C\c3ccncc3
+CHEMBL1907596_18	Fc1cc(ccn1)c2cc(OC[C@@H]3CCN3)cnc2Cl
+CHEMBL1907596_19	Fc1cc(\C=C\c2cc(OC[C@@H]3CCN3)cnc2Cl)ccn1
+CHEMBL1907596_20	Fc1ccnc[n+]1c2cc(OC[C@@H]3CCN3)cnc2Cl
+CHEMBL1907596_21	Clc1ncc(OC[C@@H]2CCN2)cc1c3ccnc(Br)c3
+CHEMBL1907596_22	Clc1ncc(OC[C@@H]2CCCN2)cc1c3ccc(Br)nc3
+CHEMBL1907596_23	Fc1ccc(cn1)c2cc(OC[C@@H]3CCCN3)cnc2Cl
+CHEMBL1907596_24	Fc1ncc(cc1Br)c2cc(OC[C@@H]3CCN3)cnc2Cl
+CHEMBL1907596_25	Clc1ncc(OC[C@@H]2CCN2)cc1c3ccc(Br)nc3
+CHEMBL1907596_26	Clc1ccc(cn1)C2CC3CCC2N3
+CHEMBL1907596_27	Clc1ncc(OCC2CCCN2)cc1\C=C\c3ccccn3
+CHEMBL1907596_28	CN1CCC1COc2cnc(Cl)c(\C=C\c3ccncc3)c2
+CHEMBL1907596_29	CN1CCCC1COc2cnc(Cl)c(\C=C\c3ccccn3)c2
+CHEMBL1907596_30	CN1CCCC1COc2cnc(Cl)c(\C=C\c3cccnc3)c2
+CHEMBL1907596_31	CN1CCCC1COc2cnc(Cl)c(\C=C\c3ccncc3)c2
+CHEMBL1907596_32	CN1CCCC1COc2cnc(Cl)c(CCc3ccncc3)c2
+CHEMBL1907596_33	Clc1ncc(OCC2CCN2)cc1\C=C\c3ccncc3
+CHEMBL1907596_34	Clc1ncc(OCC2CCCN2)cc1CCc3ccncc3
+CHEMBL1907596_35	Clc1ncc(OCC2CCCN2)cc1\C=C\c3ccncc3
+CHEMBL1907596_36	Clc1ncc(OCC2CCCN2)cc1\C=C\c3cccnc3
+CHEMBL1907596_37	Clc1ccc(cn1)C2CC3CCC2N3
+CHEMBL1907596_38	CN1CCCC1c2ccc(Br)nc2
+CHEMBL1907596_39	CN1CCCC1c2ccc(Cl)nc2
+CHEMBL1907596_40	CN1CCC[C@H]1c2cccnc2
+CHEMBL1907596_41	Clc1ccc(cn1)C2CC3CCC2N3
+CHEMBL1907596_42	C1C[C@H]2CCC(N2)C(=C1)c3cccnc3
+CHEMBL1907596_43	Clc1ccc(cn1)C2=CCC[C@H]3CCC2N3
+CHEMBL1907596_44	Clc1ccc(cn1)C2CC3CCC2N3
+CHEMBL1907596_45	CO[C@@H]1CC=C2CCN3CCC4=C(CC(=O)OC4)[C@@]23C1
+CHEMBL1907596_46	CN1CCC[C@H]1c2cccnc2
+CHEMBL1907596_47	Clc1ccc(cn1)C2CC3CCC2N3
+CHEMBL1907596_48	CN1CCC[C@H]1c2cccnc2
+CHEMBL1907596_49	Clc1ccc(cn1)C2CC3CCC2N3
+CHEMBL1907596_50	Clc1ncc(cn1)C2CC3CCC2N3
+CHEMBL1907596_51	C1CC2CCC(N2)C(=C1)c3cccnc3
+CHEMBL1907596_52	Clc1ccc(cn1)C2=CCCC3CCC2N3
+CHEMBL1907596_53	CN1CCC[C@H]1COc2cccnc2
+CHEMBL1907596_54	CN1CCC[C@H]1COc2cncc(CCc3ccccc3)c2
+CHEMBL1907596_55	CN1CCC[C@H]1COc2cncc(\C=C\c3ccccc3)c2
+CHEMBL1907596_56	CN1CCC[C@H]1COc2cncc(c2)c3oc4ccccc4c3
+CHEMBL1907596_57	CN1CCC[C@H]1COc2cncc(c2)C#Cc3ccccc3
+CHEMBL1907596_58	CN1CCC[C@H]1COc2cncc(c2)c3cncnc3
+CHEMBL1907596_59	CN1CCC[C@H]1COc2cncc(c2)c3ccc(F)c(Cl)c3
+CHEMBL1907596_60	CN1CCC[C@H]1COc2cncc(c2)c3ccc(Cl)cc3Cl
+CHEMBL1907596_61	CN1CCC[C@H]1COc2cncc(c2)c3ccc(Cl)cc3
+CHEMBL1907596_62	CN1CCC[C@H]1COc2cncc(c2)c3ccc(C)cc3
+CHEMBL1907596_63	CN1CCC[C@H]1COc2cncc(c2)c3ccc(F)cc3
+CHEMBL1907596_64	CN1CCC[C@H]1COc2cncc(c2)c3cccc(N)c3
+CHEMBL1907596_65	CN1CCC[C@H]1COc2cncc(c2)c3cccc(c3)[N+](=O)[O-]
+CHEMBL1907596_66	CN1CCC[C@H]1COc2cncc(c2)c3ccc(cc3)C(F)(F)F
+CHEMBL1907596_67	COc1ccc(cc1)c2cncc(OC[C@@H]3CCCN3C)c2
+CHEMBL1907596_68	CN1CCC[C@H]1COc2cncc(c2)c3ccccc3C=O
+CHEMBL1907596_69	CN1CCC[C@H]1COc2cncc(CCc3ccncc3)c2
+CHEMBL1907596_70	CN1CCC[C@H]1COc2cncc(c2)c3ccccc3
+CHEMBL1907596_71	CN1CCC[C@H]1COc2cncc(c2)C#Cc3ccc(C)cc3
+CHEMBL1907596_72	CN1CCC[C@H]1COc2cncc(\C=C\c3ccncc3)c2
+CHEMBL1907596_73	CN1CCC[C@H]1COc2cncc(c2)c3ccc4ccccc4c3
+CHEMBL1907596_74	CN1CCC[C@H]1COc2cncc(c2)c3cccs3
+CHEMBL1907596_75	CN1CCC[C@H]1COc2cncc(c2)c3occc3
+CHEMBL1907596_76	CN1CCC[C@H]1COc2cncc(c2)c3cccnc3
+CHEMBL1907596_77	CN1CCC[C@H]1COc2cncc(c2)c3cc4ccccc4n3C
+CHEMBL1907596_78	CN1CCC[C@H]1COc2cncc(c2)c3cnc4ccccc4c3
+CHEMBL1907596_79	Clc1ccc(cn1)[C@H]2C[C@@H]3CC[C@H]2N3
+CHEMBL1907596_80	Brc1ccc(cn1)C2CC3CCC2N3
+CHEMBL1907596_81	Ic1ccc(cn1)C2CC3CCC2N3
+CHEMBL1907596_82	Fc1ccc(cn1)C2CC3CCC2N3
+CHEMBL1907596_83	C1CC2NC1CC2c3cccnc3
+CHEMBL1907596_84	Clc1ccc(cn1)[C@@H]2C[C@H]3CC[C@@H]2N3
+CHEMBL1907596_85	CN1CCCC1c2ccc(Br)nc2
+CHEMBL1907596_86	CN1CCCC1c2ccc(Cl)nc2
+CHEMBL1907596_87	Clc1ccc(cn1)[C@H]2C[C@@H]3CC[C@H]2N3
+CHEMBL1907596_88	Ic1ccc(cn1)C2CC3CCC2N3
+CHEMBL1907596_89	C1CC2NC1CC2c3cccnc3
+CHEMBL1907596_90	Clc1ccc(cn1)[C@@H]2C[C@H]3CC[C@@H]2N3
+CHEMBL1907596_91	CN1CCC[C@H]1c2cccnc2
+CHEMBL1907596_92	Clc1ccc(cn1)C2CC3CCC2N3
+CHEMBL1907596_93	C1C[C@H]2CCC(N2)C(=C1)c3cccnc3
+CHEMBL1907596_94	Clc1ccc(cn1)C2=CCC[C@H]3CCC2N3
+CHEMBL1907596_95	C1C[C@H]2CCC(N2)C(=C1)c3cncnc3
+CHEMBL1907596_96	Clc1ccc(cn1)C2CC3CCC2N3
+CHEMBL1907596_97	Clc1ccc(cn1)C2CC3CCC2N3
+CHEMBL1907596_98	Ic1cncc(O[C@H]2CCN2)c1
+CHEMBL1907596_99	C(Oc1cccnc1)[C@@H]2CCN2
+CHEMBL1907596_100	Clc1ccc(cn1)C2CC3CCC2N3
+CHEMBL1907596_101	Brc1ccc(cn1)C2CC3CCC2N3
+CHEMBL1907596_102	Fc1ccc(cn1)C2CC3CCC2N3
+CHEMBL1907596_103	C1CC2NC1CC2c3cccnc3
+CHEMBL1907596_104	Nc1ccc(cn1)C2CC3CCC2N3
+CHEMBL1907596_105	C(Oc1cccnc1)[C@@H]2CCN2
+CHEMBL1907596_106	Fc1ncccc1OC[C@@H]2CCN2
+CHEMBL1907596_107	Clc1ccc(OC[C@@H]2CCN2)cn1
+CHEMBL1907596_108	[O-][N+](=O)c1cncc(OC[C@@H]2CCN2)c1
+CHEMBL1907596_109	CCOc1cncc(OC[C@@H]2CCN2)c1
+CHEMBL1907596_110	CCCc1cncc(OC[C@@H]2CCN2)c1
+CHEMBL1907596_111	Fc1cncc(OC[C@@H]2CCN2)c1
+CHEMBL1907596_112	C(Oc1cncc(c1)c2ccccc2)[C@@H]3CCN3
+CHEMBL1907596_113	CC(=O)NCc1cncc(OC[C@@H]2CCN2)c1
+CHEMBL1907596_114	Cc1cncc(OC[C@H]2CCN2)c1
+CHEMBL1907596_115	Oc1cncc(OC[C@H]2CCN2)c1
+CHEMBL1907596_116	Clc1cncc(OC[C@H]2CCN2)c1
+CHEMBL1907596_117	COc1ccc(OC[C@H]2CCN2)cn1
+CHEMBL1907596_118	Clc1ccc(OC[C@H]2CCN2)cn1
+CHEMBL1907596_119	Cc1ccc(OC[C@H]2CCN2)cn1
+CHEMBL1907596_120	Cc1ccc(OC[C@@H]2CCN2)cn1
+CHEMBL1907596_121	Brc1ccc(OC[C@H]2CCN2)cn1
+CHEMBL1907596_122	Fc1ccc(OC[C@H]2CCN2)cn1
+CHEMBL1907596_123	Fc1ccc(OC[C@@H]2CCN2)cn1
+CHEMBL1907596_124	Brc1ccc(OC[C@@H]2CCN2)cn1
+CHEMBL1907596_125	CCc1cncc(OC[C@@H]2CCN2)c1
+CHEMBL1907596_126	Cc1cncc(OC[C@@H]2CCN2)c1
+CHEMBL1907596_127	Nc1cncc(OC[C@@H]2CCN2)c1
+CHEMBL1907596_128	FC(F)(F)c1cncc(OC[C@@H]2CCN2)c1
+CHEMBL1907596_129	Brc1cncc(OC[C@@H]2CCN2)c1
+CHEMBL1907596_130	Clc1cc(OC[C@@H]2CCN2)cnc1Cl
+CHEMBL1907596_131	Clc1cncc(OC[C@@H]2CCN2)c1
+CHEMBL1907596_132	Fc1ncccc1OC[C@H]2CCN2
+CHEMBL1907596_133	Clc1ncc(OC[C@@H]2CCN2)cc1c3ccccc3
+CHEMBL1907596_134	Clc1ncc(OC[C@@H]2CCN2)cc1Br
+CHEMBL1907596_135	N#Cc1cncc(OC[C@@H]2CCN2)c1
+CHEMBL1907596_136	Clc1ccc(cn1)C2CC3CCC2N3
+CHEMBL1907596_137	CN1CCC[C@H]1c2cccnc2
+CHEMBL1907596_138	Ic1ccc(cn1)C2CC3CCC2N3
+CHEMBL1907596_139	Ic1ccc(cn1)C2CC3CCC2N3
+CHEMBL1907596_140	Nc1cc(cnc1Cl)C2CC3CCC2N3
+CHEMBL1907596_141	Clc1ncc(cc1I)C2CC3CCC2N3
+CHEMBL1907596_142	Clc1ncc(cc1Br)C2CC3CCC2N3
+CHEMBL1907596_143	Clc1ncc(cc1N=[N+]=[N-])C2CC3CCC2N3
+CHEMBL1907596_144	Clc1ncc(cc1c2ccccc2)C3CC4CCC3N4
+CHEMBL1907596_145	Fc1cc(cnc1Cl)C2CC3CCC2N3
+CHEMBL1907596_146	Clc1cc(cnc1Cl)C2CC3CCC2N3
+CHEMBL1907596_147	Clc1ccc(cn1)C2CC3CCC2N3
+CHEMBL1907596_148	Fc1ncc(cc1c2ccccc2)[C@H]3C[C@@H]4CC[C@H]3N4
+CHEMBL1907596_149	Fc1ncc(cc1c2ccccc2)[C@@H]3C[C@H]4CC[C@@H]3N4
+CHEMBL1907596_150	Fc1ncc(cc1c2ccccc2)C3CC4CCC3N4
+CHEMBL1907596_151	Clc1ccc(cn1)[C@H]2C[C@@H]3CC[C@H]2N3
+CHEMBL1907596_152	Brc1ccc(cn1)C2CC3CCC2N3
+CHEMBL1907596_153	Fc1ccc(cn1)C2CC3CCC2N3
+CHEMBL1907596_154	C1CC2NC1CC2c3cccnc3
+CHEMBL1907596_155	CN1CCC[C@H]1c2cccnc2
+CHEMBL1907596_156	Clc1ccc(cn1)[C@H]2C[C@@H]3CC[C@H]2N3
+CHEMBL1907596_157	CN1[C@@H]2CC[C@H]1[C@@H](C2)c3ccc(Cl)nc3
+CHEMBL1907596_158	Cc1ccc(cn1)C2CC3CCC2N3
+CHEMBL1907596_159	CN1[C@H]2CC[C@@H]1[C@H](C2)c3ccc(Cl)nc3
+CHEMBL1907596_160	Clc1ccc(cn1)[C@H]2C[C@@H]3CC[C@H]2N3
+CHEMBL1907596_161	Clc1ccc(cn1)[C@@H]2C[C@H]3CC[C@@H]2N3
+CHEMBL1907596_162	Clc1ccc(cn1)[C@@H]2C[C@H]3CC[C@@H]2N3
+CHEMBL1907596_163	CN1C2CCC1C(C2)c3ccc(Cl)nc3
+CHEMBL1907596_164	Clc1ccc(cn1)C2CC3CCC2N3
+CHEMBL1907596_165	Clc1ccc(cn1)[C@@H]2C[C@H]3CC[C@@H]2N3
+CHEMBL1907596_166	Clc1ccc(cn1)[C@@H]2C[C@H]3CC[C@@H]2N3
+CHEMBL1907596_167	C(Oc1cccnc1)[C@@H]2CCN2
+CHEMBL1907596_168	Fc1ncccc1OC[C@@H]2CCN2
+CHEMBL1907596_169	Fc1cncc(OC[C@@H]2CCN2)c1
+CHEMBL1907596_170	Fc1ccc(OC[C@@H]2CCN2)cn1
+CHEMBL1907596_171	Brc1cncc(OC[C@@H]2CCN2)c1
+CHEMBL1907596_172	Clc1cncc(OC[C@@H]2CCN2)c1
+CHEMBL1907596_173	Clc1ncccc1OC[C@@H]2CCN2
+CHEMBL1907596_174	Ic1cncc(OC[C@@H]2CCN2)c1
+CHEMBL1907596_175	Ic1ccc(OC[C@@H]2CCN2)cn1
+CHEMBL1907596_176	Brc1ncccc1OC[C@@H]2CCN2
+CHEMBL1907596_177	CN1CCC[C@H]1c2cccnc2
+CHEMBL1907596_178	CCCCCCc1cncc(OC[C@@H]2CCCN2C)c1
+CHEMBL1907596_179	CCCCc1cncc(OC[C@@H]2CCCN2C)c1
+CHEMBL1907596_180	CC(C)Cc1cncc(OC[C@@H]2CCCN2C)c1
+CHEMBL1907596_181	CCCc1cncc(OC[C@@H]2CCCN2C)c1
+CHEMBL1907596_182	CN1CCC[C@H]1COc2cncc(N)c2
+CHEMBL1907596_183	CN1CCC[C@H]1COc2cncc(Br)c2
+CHEMBL1907596_184	CN1CCC[C@H]1COc2cncc(Cl)c2
+CHEMBL1907596_185	CCc1cncc(OC[C@@H]2CCCN2C)c1
+CHEMBL1907596_186	CN1CCC[C@H]1COc2cncc(C)c2
+CHEMBL1907596_187	CN1CCC[C@H]1COc2ccc(Cl)nc2
+CHEMBL1907596_188	CN1CCC[C@H]1COc2cccnc2
+CHEMBL1907596_189	CN1CCC[C@H]1COc2cncc(F)c2
+CHEMBL1907596_190	Clc1ccc(cn1)[C@H]2C[C@@H]3CC[C@H]2N3
+CHEMBL1907596_191	Clc1ccc(cn1)[C@@H]2C[C@H]3CC[C@@H]2N3
+CHEMBL1907596_192	Brc1ccc(cn1)C2CC3CCC2N3
+CHEMBL1907596_193	C1CC2NC1CC2c3cccnc3
+CHEMBL1907596_194	Fc1ccc(cn1)C2CC3CCC2N3
+CHEMBL1907596_195	O=C1C=CC=C2[C@H]3CNC[C@H](C3)CN12
+CHEMBL1907596_196	CN1CCC[C@H]1c2cccnc2
+CHEMBL1907596_197	CN1CCC[C@H]1c2cccnc2
+CHEMBL1907596_198	O=C1C=CC=C2[C@H]3CNC[C@H](C3)CN12
+CHEMBL1907596_199	Cl.Cl.C1NCC2CC1c3cc4nccnc4cc23
+CHEMBL1907596_200	Cl.[O-][N+](=O)c1ccc2C3CNCC(C3)c2c1
+CHEMBL1907596_201	Clc1ccc(cn1)[C@H]2C[C@@H]3CC[C@H]2N3
+CHEMBL1907596_202	Fc1ccc(cn1)C2CC3CCC2N3
+CHEMBL1907596_203	Fc1ncc(cc1c2ccccc2)C3CC4CCC3N4
+CHEMBL1907596_204	Clc1ccc(cn1)[C@@H]2C[C@H]3CC[C@@H]2N3
+CHEMBL1907596_205	COc1ccc(cc1)c2cc(cnc2F)C3CC4CCC3N4
+CHEMBL1907596_206	Fc1ccc(cc1)c2cc(cnc2F)C3CC4CCC3N4
+CHEMBL1907596_207	Fc1ncc(cc1c2cccc(Cl)c2)C3CC4CCC3N4
+CHEMBL1907596_208	Fc1cccc(c1)c2cc(cnc2F)C3CC4CCC3N4
+CHEMBL1907596_209	Fc1ncc(cc1c2ccc(Cl)cc2)C3CC4CCC3N4
+CHEMBL1907596_210	[O-][N+](=O)c1cccc(c1)c2cc(cnc2F)C3CC4CCC3N4
+CHEMBL1907596_211	COc1cccc(c1)c2cc(cnc2F)C3CC4CCC3N4
+CHEMBL1907596_212	[O-][N+](=O)c1ccc(cc1)c2cc(cnc2F)C3CC4CCC3N4
+CHEMBL1907596_213	Nc1cccc(c1)c2cc(cnc2F)C3CC4CCC3N4
+CHEMBL1907596_214	Nc1ccc(cc1)c2cc(cnc2F)C3CC4CCC3N4
+CHEMBL1907596_215	Clc1ccc(cn1)C2CC3CCC2N3
+CHEMBL1907596_216	CN1CCC[C@H]1COc2cncc(c2)C#Cc3ccccc3
+CHEMBL1907596_217	CN1CCC[C@H]1COc2cncc(c2)C#CCO
+CHEMBL1907596_218	CN1CCC[C@H]1COc2cncc(c2)C#CCCCCO
+CHEMBL1907596_219	CN1CCC[C@H]1COc2cncc(c2)C#CCCCCF
+CHEMBL1907596_220	CN1CCC[C@H]1c2cccnc2
+CHEMBL1907596_221	Clc1ccc(cn1)C2CC3CCC2N3
+CHEMBL1907596_222	Ic1cncc(c1)C2CC3CCC2N3
+CHEMBL1907596_223	Nc1cncc(c1)C2CC3CCC2N3
+CHEMBL1907596_224	C=Cc1cncc(c1)C2CC3CCC2N3
+CHEMBL1907596_225	Fc1cncc(c1)C2CC3CCC2N3
+CHEMBL1907596_226	Clc1cncc(c1)C2CC3CCC2N3
+CHEMBL1907596_227	Brc1cncc(c1)C2CC3CCC2N3
+CHEMBL1907596_228	C#Cc1cncc(c1)C2CC3CCC2N3
+CHEMBL1907596_229	CN1C2CCC1C(C2)c3cncc(I)c3
+CHEMBL1907596_230	Clc1ccc(cn1)C2CC3CCC2N3
+CHEMBL1907596_231	Clc1ccc(cn1)C2CC3CCCCC2N3
+CHEMBL1907596_232	Clc1ccc(cn1)[C@@H]2CC3CNC2C3
+CHEMBL1907596_233	Clc1ccc(cn1)[C@@H]2CC3CC2CN3
+CHEMBL1907596_234	Clc1ccc(cn1)C2CC3CCC2CN3
+CHEMBL1907596_235	Clc1ccc(cn1)C2CC3CCC2N3
+CHEMBL1907596_236	Clc1ccc(cn1)C2CC3CCC2NC3
+CHEMBL1907596_237	Clc1ccc(cn1)[C@@H]2CC3CNC2C3
+CHEMBL1907596_238	Clc1ccc(cn1)C2CC3CCC2N3
+CHEMBL1907596_239	C1CC2NC1CC2c3cccnc3
+CHEMBL1907596_240	Clc1ccc(cn1)[C@@H]2C[C@H]3CC[C@@H]2N3
+CHEMBL1907596_241	C(Oc1cccnc1)C2CCN2
+CHEMBL1907596_242	CN1CCCC1COc2cccnc2
+CHEMBL1907596_243	C1NCC2CC1c3cc4nccnc4cc23
+CHEMBL1907596_244	Clc1ccc2cc3C4CNCC(C4)c3cc2n1
+CHEMBL1907596_245	Cc1ccc2cc3C4CNCC(C4)c3cc2n1
+CHEMBL1907596_246	Cc1cnc2cc3C4CNCC(C4)c3cc2c1
+CHEMBL1907596_247	[O-][N+](=O)c1ccc2C3CNCC(C3)c2c1
+CHEMBL1907596_248	Fc1cc2C3CNCC(C3)c2cc1F
+CHEMBL1907596_249	Clc1ccc(cn1)C2CC3CCC2N3
+CHEMBL1907596_250	Fc1ncc(cc1c2ccccc2)C3CC4CCC3N4
+CHEMBL1907596_251	Clc1ncc(cc1c2ccccc2)C3CC4CCC3N4
+CHEMBL1907596_252	Fc1ccc(cc1)c2cc(cnc2F)C3CC4CCC3N4
+CHEMBL1907596_253	Fc1ncc(cc1c2ccc(Cl)cc2)C3CC4CCC3N4
+CHEMBL1907596_254	Fc1ncc(cc1c2ccc(cc2)C#N)C3CC4CCC3N4
+CHEMBL1907596_255	Fc1ncc(cc1c2ccc(Cl)c(Cl)c2)C3CC4CCC3N4
+CHEMBL1907596_256	CN1C2CCC1C(C2)c3cnc(Cl)c(c3)c4ccccc4
+CHEMBL1907596_257	Cc1ccc(cc1)c2cc(cnc2Cl)C3CC4CCC3N4
+CHEMBL1907596_258	COc1ccc(cc1)c2cc(cnc2Cl)C3CC4CCC3N4
+CHEMBL1907596_259	Cc1ccc(cc1)c2cc(cnc2F)C3CC4CCC3N4
+CHEMBL1907596_260	Cc1cccc(c1)c2cc(cnc2F)C3CC4CCC3N4
+CHEMBL1907596_261	CN1CCCC1c2ccc(Br)nc2
+CHEMBL1907596_262	CN1CCCC1c2ccc(Cl)nc2
+CHEMBL1907596_263	Clc1ccc(cn1)C2CC3CCC2N3
+CHEMBL1907596_264	C=CC1=CC=C2C3CNCC(C3)CN2C1=O
+CHEMBL1907596_265	Clc1ccc(cn1)C2CC3CCC2N3
+CHEMBL1907596_266	Clc1ccc(cn1)[C@H]2C[C@@H]3CC[C@H]2N3
+CHEMBL1907596_267	Clc1ccc(cn1)[C@@H]2C[C@H]3CC[C@@H]2N3
+CHEMBL1907596_268	C1CN(C[C@@H]2NC[C@H]12)c3cccnc3
+CHEMBL1907596_269	Clc1ccc(cn1)N2CC[C@H]3CN[C@H]3C2
+CHEMBL1907596_270	N#Cc1cncc(c1)N2CC[C@@H]3CN[C@@H]3C2
+CHEMBL1907596_271	COc1cncc(c1)N2CC[C@H]3CN[C@H]3C2
+CHEMBL1907596_272	Brc1ncc(cc1C#N)N2CC[C@H]3CN[C@H]3C2
+CHEMBL1907596_273	N\C(=N\O)\c1cncc(c1)N2CC[C@@H]3CN[C@@H]3C2
+CHEMBL1907596_274	C1C[C@H]2CN([C@H]2CN1)c3cccnc3
+CHEMBL1907596_275	Brc1ncc(cc1C#N)N2C[C@@H]3CCNC[C@H]23
+CHEMBL1907596_276	C1CN(C[C@@H]2NC[C@H]12)c3cncnc3
+CHEMBL1907596_277	Clc1ccc(nn1)N2CC[C@H]3CN[C@H]3C2
+CHEMBL1907596_278	Brc1cncc(c1)N2CC[C@H]3CN[C@H]3C2
+CHEMBL1907596_279	Clc1cc(cnc1Cl)N2CC[C@H]3CN[C@H]3C2
+CHEMBL1907596_280	COc1ccc(cn1)N2CC[C@H]3CN[C@H]3C2
+CHEMBL1907596_281	CCOc1cncc(c1)N2CC[C@H]3CN[C@H]3C2
+CHEMBL1907596_282	Clc1ccc(cn1)N2C[C@@H]3CCNC[C@H]23
+CHEMBL1907596_283	Clc1cc(cnc1Cl)N2C[C@@H]3CCNC[C@H]23
+CHEMBL1907596_284	Brc1cc(cnc1Br)N2CC[C@H]3CN[C@H]3C2
+CHEMBL1907596_285	Cc1cncc(c1)N2CC[C@@H]3CN[C@@H]3C2
+CHEMBL1907596_286	Cc1cc(cnc1Cl)N2C[C@@H]3CCNC[C@H]23
+CHEMBL1907596_287	Cc1cc(cnc1Cl)N2CC[C@H]3CN[C@H]3C2
+CHEMBL1907596_288	COc1cc(cnc1Br)N2CC[C@H]3CN[C@H]3C2
+CHEMBL1907596_289	Clc1ccc(cn1)N2C[C@H]3CCNC[C@@H]23
+CHEMBL1907596_290	Clc1cc(cnc1Cl)N2C[C@H]3CCNC[C@@H]23
+CHEMBL1907596_291	N#Cc1cncc(c1)N2C[C@@H]3CCNC[C@H]23
+CHEMBL1907596_292	Cc1cc(cnc1Cl)N2C[C@H]3CCNC[C@@H]23
+CHEMBL1907596_293	COc1cc(cnc1Br)N2C[C@H]3CCNC[C@@H]23
+CHEMBL1907596_294	Brc1ncc(cc1C#N)N2C[C@H]3CCNC[C@@H]23
+CHEMBL1907596_295	COc1cncc(c1)N2C[C@H]3CCNC[C@@H]23
+CHEMBL1907596_296	CCOc1cncc(c1)N2CC[C@@H]3CN[C@@H]3C2
+CHEMBL1907596_297	C1CN(C[C@H]2NC[C@@H]12)c3cccnc3
+CHEMBL1907596_298	Clc1ccc(cn1)N2CC[C@@H]3CN[C@@H]3C2
+CHEMBL1907596_299	Brc1cncc(c1)N2CC[C@@H]3CN[C@@H]3C2
+CHEMBL1907596_300	Clc1cc(cnc1Cl)N2CC[C@@H]3CN[C@@H]3C2
+CHEMBL1907596_301	Brc1cc(cnc1Br)N2CC[C@@H]3CN[C@@H]3C2
+CHEMBL1907596_302	COc1cncc(c1)N2CC[C@@H]3CN[C@@H]3C2
+CHEMBL1907596_303	Cc1cc(cnc1Cl)N2CC[C@@H]3CN[C@@H]3C2
+CHEMBL1907596_304	N#Cc1cncc(c1)N2CC[C@H]3CN[C@H]3C2
+CHEMBL1907596_305	COc1cc(cnc1Br)N2CC[C@@H]3CN[C@@H]3C2
+CHEMBL1907596_306	C1CN(C[C@H]2NC[C@@H]12)c3cncnc3
+CHEMBL1907596_307	Clc1ccc(cn1)[C@H]2C[C@@H]3CC[C@H]2N3
+CHEMBL1907596_308	Clc1ccc(cn1)[C@@H]2C[C@H]3CC[C@@H]2N3
+CHEMBL1907596_309	Cl.Cl.Clc1ccc(cn1)C2CC3CCCC2N3
+CHEMBL1907596_310	Cl.Cl.Cl.Clc1ccc(cn1)C2CC3CC2CN3
+CHEMBL1907596_311	Cl.Clc1ccc(cn1)C2CC3CCC2N3
+CHEMBL1907596_312	CN1CCC[C@H]1c2cccnc2
+CHEMBL1907596_313	Clc1ccc(cn1)C2CC3CCC2N3
+CHEMBL1907596_314	Clc1ccc(cn1)N2CC3CC2CN3
+CHEMBL1907596_315	C1CC2CNC1CN2c3cccnc3
+CHEMBL1907596_316	C1NC2CC1CN(C2)c3cccnc3
+CHEMBL1907596_317	C1NCC2CC1CN2c3cccnc3
+CHEMBL1907596_318	C1NC2CC1N(C2)c3cccnc3
+CHEMBL1907596_319	[I-].C[N+]1(C)CC2CC1CN2c3ccc(Cl)nc3
+CHEMBL1907596_320	Fc1ccc(cn1)N2CC3CC2CN3
+CHEMBL1907596_321	COc1ccc(cn1)N2CC3CC2CN3
+CHEMBL1907596_322	N#Cc1cncc(c1)N2CC3CC2CN3
+CHEMBL1907596_323	Oc1cc(cnc1Cl)N2CC3CC2CN3
+CHEMBL1907596_324	COc1cncc(c1)N2CC3CC2CN3
+CHEMBL1907596_325	Cc1cc(cnc1Cl)N2CC3CC2CN3
+CHEMBL1907596_326	Clc1cc(cnc1Cl)N2CC3CC2CN3
+CHEMBL1907596_327	COc1cc(cnc1Cl)N2CC3CC2CN3
+CHEMBL1907596_328	C1NC2CC1N(C2)c3cncnc3
+CHEMBL1907596_329	C1NC2CC1N(C2)c3cc4ncccc4s3
+CHEMBL1907596_330	Clc1ccc(cn1)C2CC3CCC2N3
+CHEMBL1907596_331	Fc1ccc(cn1)C2CC3CCC2N3
+CHEMBL1907596_332	C1CC2NC1CC2c3cccnc3
+CHEMBL1907596_333	C1NCC2CC1c3cc4nccnc4cc23
+CHEMBL1907596_334	Fc1ccc(cc1)c2cncc(c2)C3CC4CCC3N4
+CHEMBL1907596_335	Clc1ccc(cc1)c2cncc(c2)C3CC4CCC3N4
+CHEMBL1907596_336	Cl.Fc1ncc(cc1c2ccccc2)C3CC4CCC3N4
+CHEMBL1907596_337	Cl.Fc1ncc(cc1c2ccc(Cl)cc2)C3CC4CCC3N4
+CHEMBL1907596_338	Cl.Fc1cccc(c1)c2cc(cnc2F)C3CC4CCC3N4
+CHEMBL1907596_339	Cl.Nc1cccc(c1)c2cc(cnc2F)C3CC4CCC3N4
+CHEMBL1907596_340	Cl.Nc1ccc(cc1)c2cc(cnc2F)C3CC4CCC3N4
+CHEMBL1907596_341	Cl.[O-][N+](=O)c1ccc(cc1)c2cc(cnc2F)C3CC4CCC3N4
+CHEMBL1907596_342	Cl.Fc1ncc(cc1c2cccc(Cl)c2)C3CC4CCC3N4
+CHEMBL1907596_343	Cl.Fc1ccc(cc1)c2cc(cnc2F)C3CC4CCC3N4
+CHEMBL1907596_344	Cl.Nc1ccc(cc1)c2cncc(c2)C3CC4CCC3N4
+CHEMBL1907596_345	Cl.COc1cccc(c1)c2cc(cnc2F)C3CC4CCC3N4
+CHEMBL1907596_346	Cl.[O-][N+](=O)c1cccc(c1)c2cc(cnc2F)C3CC4CCC3N4
+CHEMBL1907596_347	Cl.Cl.C1CC2NC1CC2c3cncc(c3)c4ccccc4
+CHEMBL1907596_348	Cl.Cl.Fc1cccc(c1)c2cncc(c2)C3CC4CCC3N4
+CHEMBL1907596_349	Cl.Cl.[O-][N+](=O)c1cccc(c1)c2cncc(c2)C3CC4CCC3N4
+CHEMBL1907596_350	Cl.Cl.Cl.Clc1cccc(c1)c2cncc(c2)C3CC4CCC3N4
+CHEMBL1907596_351	Cl.Cl.[O-][N+](=O)c1ccc(cc1)c2cncc(c2)C3CC4CCC3N4
+CHEMBL1907596_352	Cl.Cl.COc1cccc(c1)c2cncc(c2)C3CC4CCC3N4
+CHEMBL1907596_353	O=C1C=CC=C2[C@H]3CNC[C@H](C3)CN12
+CHEMBL1907596_354	Clc1ccc(cn1)C2CC3CCC2N3
+CHEMBL1907596_355	CN1[C@@H]2CC[C@@H]1[C@H](C2)c3cncc(c3)c4ccnc(F)c4
+CHEMBL1907596_356	CN1[C@@H]2CC[C@@H]1[C@H](C2)c3cncc(c3)c4cccnc4F
+CHEMBL1907596_357	CN1[C@@H]2CC[C@@H]1[C@H](C2)c3cncc(c3)c4cccc(F)n4
+CHEMBL1907596_358	CN1C2CCC1C(C2)c3cncc(c3)c4ccc(F)nc4
+CHEMBL1907596_359	Clc1ccc(cn1)N2CC3CC(C2)N3
+CHEMBL1907596_360	C1C2CN(CC1N2)c3cccnc3
+CHEMBL1907596_361	Brc1ccc(cn1)N2CC3CC(C2)N3
+CHEMBL1907596_362	Clc1ccc(nn1)N2CC3CC(C2)N3
+CHEMBL1907596_363	CN1C2CC1CN(C2)c3ccc(Cl)nc3
+CHEMBL1907596_364	Clc1ccc(cn1)N2CC3CC2CN3
+CHEMBL1907596_365	C[C@H](CCOC(=O)N1CC(C)C1)N(C)C.OC(=O)C(=O)O
+CHEMBL1907596_366	Cc1cc(on1)[C@H]2C[C@H]3CC[C@H]2N3
+CHEMBL1907596_367	Clc1ccc(cn1)C2CC3CCC2N3
+CHEMBL1907596_368	Clc1ccc(cn1)C2=CCCC3CCC2N3
+CHEMBL1907596_369	Cc1cc(on1)[C@H]2C[C@H]3CC[C@H]2N3
+CHEMBL1907596_370	O=C1C=CC=C2[C@H]3CNC[C@H](C3)CN12
+CHEMBL1907596_371	O=C1C=CC=C2[C@H]3CNC[C@H](C3)CN12
+CHEMBL1907596_372	CN1CCC[C@H]1c2cccnc2
+CHEMBL1907596_373	Clc1ccc(cn1)C2CC3CCC2N3
+CHEMBL1907596_374	C(Oc1cncc(c1)N2C[C@H]3CNC[C@H]3C2)c4ccccc4
+CHEMBL1907596_375	CC(C)Oc1cncc(c1)N2C[C@H]3CNC[C@H]3C2.OC(=O)\C=C\C(=O)O
+CHEMBL1907596_376	OC(=O)C(F)(F)F.C1NC[C@H]2CN(C[C@@H]12)c3cncc(c3)c4ccccc4
+CHEMBL1907596_377	Cl.Clc1ccc(cn1)N2C[C@H]3CNC[C@H]3C2
+CHEMBL1907596_378	OC(=O)C(F)(F)F.Brc1ccc(cn1)N2C[C@H]3CNC[C@H]3C2
+CHEMBL1907596_379	OC(=O)C(F)(F)F.Brc1cncc(c1)N2C[C@H]3CNC[C@H]3C2
+CHEMBL1907596_380	CCCOc1cncc(c1)N2C[C@H]3CNC[C@H]3C2.OC(=O)\C=C\C(=O)O
+CHEMBL1907596_381	Cl.Cl.C1NC[C@H]2CN(C[C@@H]12)c3cccnc3
+CHEMBL1907596_382	Cl.Cl.Oc1cncc(c1)N2C[C@H]3CNC[C@H]3C2
+CHEMBL1907596_383	Cl.Cl.COc1cncc(c1)N2C[C@H]3CNC[C@H]3C2
+CHEMBL1907596_384	Cl.Cl.CCOc1cncc(c1)N2C[C@H]3CNC[C@H]3C2
+CHEMBL1907596_385	CN1CCC[C@H]1c2cccnc2
+CHEMBL1907596_386	Clc1ccc(cn1)C2CC3CCC2N3
+CHEMBL1907596_387	Clc1ccc(OC[C@H]2CCN2)cn1
+CHEMBL1907596_388	C1NC2CC1N(C2)c3cccnc3
+CHEMBL1907596_389	Clc1ccc(cn1)C2CC3CCC2N3
+CHEMBL1907596_390	CN1CCC[C@H]1COc2cnc(Cl)c(OCc3ccc(Cl)nc3)c2
+CHEMBL1907596_391	CN1CCC[C@H]1COc2cnc(Cl)c(OCc3ccnc(F)c3)c2
+CHEMBL1907596_392	Fc1cc(COc2cc(OC[C@@H]3CCCN3)cnc2Cl)ccn1
+CHEMBL1907596_393	CN1CCC[C@H]1COc2cnc(Cl)c(c2)c3ccnc(F)c3
+CHEMBL1907596_394	Clc1ccc(Oc2cc(OC[C@@H]3CCCN3)cnc2Cl)cn1
+CHEMBL1907596_395	[11CH3]N1CCC[C@H]1COc2cnc(Cl)c(\C=C\c3ccncc3)c2
+CHEMBL1907596_396	Clc1ccc(cn1)[C@@H]2C[C@H]3CC[C@@H]2N3
+CHEMBL1907596_397	Clc1ncc(cc1c2ccccc2)[C@H]3C[C@@H]4CC[C@H]3N4
+CHEMBL1907596_398	Fc1ccc(cc1)c2cc(cnc2Cl)[C@H]3C[C@@H]4CC[C@H]3N4
+CHEMBL1907596_399	Fc1cccc(c1)c2cc(cnc2Cl)[C@H]3C[C@@H]4CC[C@H]3N4
+CHEMBL1907596_400	Clc1ccc(cc1)c2cc(cnc2Cl)[C@H]3C[C@@H]4CC[C@H]3N4
+CHEMBL1907596_401	Clc1cccc(c1)c2cc(cnc2Cl)[C@H]3C[C@@H]4CC[C@H]3N4
+CHEMBL1907596_402	Clc1ncc(cc1c2cccc(Br)c2)[C@H]3C[C@@H]4CC[C@H]3N4
+CHEMBL1907596_403	[O-][N+](=O)c1ccc(cc1)c2cc(cnc2Cl)[C@H]3C[C@@H]4CC[C@H]3N4
+CHEMBL1907596_404	[O-][N+](=O)c1cccc(c1)c2cc(cnc2Cl)[C@H]3C[C@@H]4CC[C@H]3N4
+CHEMBL1907596_405	Nc1ccc(cc1)c2cc(cnc2Cl)[C@H]3C[C@@H]4CC[C@H]3N4
+CHEMBL1907596_406	Nc1cccc(c1)c2cc(cnc2Cl)[C@H]3C[C@@H]4CC[C@H]3N4
+CHEMBL1907596_407	COc1ccc(cc1)c2cc(cnc2Cl)[C@H]3C[C@@H]4CC[C@H]3N4
+CHEMBL1907596_408	COc1cccc(c1)c2cc(cnc2Cl)[C@H]3C[C@@H]4CC[C@H]3N4
+CHEMBL1907596_409	CN(C)c1cccc(c1)c2cc(cnc2Cl)[C@H]3C[C@@H]4CC[C@H]3N4
+CHEMBL1907596_410	Clc1ccc(nn1)N2C[C@H]3C[C@@H]2CN3
+CHEMBL1907596_411	Clc1ccc(cn1)C2CC3CCC2N3
+CHEMBL1907596_412	O=C1C=CC=C2[C@H]3CNC[C@H](C3)CN12
+CHEMBL1907596_413	BrC1=CC=C2[C@H]3CNC[C@H](C3)CN2C1=O
+CHEMBL1907596_414	BrN1C[C@H]2C[C@H](C1)C3=CC=CC(=O)N3C2
+CHEMBL1907596_415	FC(F)(F)C1=CC=C2[C@H]3CNC[C@H](C3)CN2C1=O
+CHEMBL1907596_416	CC1=CC=C2[C@H]3CNC[C@H](C3)CN2C1=O
+CHEMBL1907596_417	IN1C[C@H]2C[C@H](C1)C3=CC=CC(=O)N3C2
+CHEMBL1907596_418	Clc1ccc(cn1)C2CC3CCC2N3
+CHEMBL1907596_419	Cl.OCCCc1cc(on1)c2cncc(OC[C@@H]3CCN3)c2
+CHEMBL1907596_420	Cl.OCc1cc(on1)c2cncc(OC[C@@H]3CCN3)c2
+CHEMBL1907596_421	Cl.OCCc1cc(on1)c2cncc(OC[C@@H]3CCN3)c2
+CHEMBL1907596_422	Cl.OCCCc1cc(on1)c2cncc(OC[C@@H]3CCN3)c2
+CHEMBL1907596_423	Cl.OCc1cc(on1)c2cncc(OC[C@@H]3CCN3)c2
+CHEMBL1907596_424	Cl.OCCc1cc(on1)c2cncc(OC[C@@H]3CCN3)c2
+CHEMBL1907596_425	C1NC[C@H]2CN(C[C@@H]12)c3cccnc3
+CHEMBL1907596_426	C1C[C@@H]2CN(C[C@@H]2N1)c3cccnc3
+CHEMBL1907596_427	Clc1cccc(NC(=O)c2cncc(n2)N3C[C@H]4CNC[C@H]4C3)c1
+CHEMBL1907596_428	FC(F)(F)c1ccccc1CNC(=O)c2cncc(c2)N3C[C@H]4CNC[C@H]4C3
+CHEMBL1907596_429	O=C(N1CCc2ccccc2C1)c3cncc(c3)N4C[C@H]5CNC[C@H]5C4
+CHEMBL1907596_430	Fc1ccccc1CCNC(=O)c2cncc(c2)N3C[C@H]4CNC[C@H]4C3
+CHEMBL1907596_431	Ic1cccc(NC(=O)c2cncc(c2)N3C[C@H]4CNC[C@H]4C3)c1
+CHEMBL1907596_432	Clc1ccc(NC(=O)c2cncc(c2)N3C[C@H]4CNC[C@H]4C3)cc1
+CHEMBL1907596_433	Cc1cccc(NC(=O)c2cncc(c2)N3C[C@H]4CNC[C@H]4C3)c1
+CHEMBL1907596_434	Cc1cc(C)cc(NC(=O)c2cncc(c2)N3C[C@H]4CNC[C@H]4C3)c1
+CHEMBL1907596_435	COc1cc(NC(=O)c2cncc(c2)N3C[C@H]4CNC[C@H]4C3)cc(OC)c1
+CHEMBL1907596_436	Fc1cc(F)cc(NC(=O)c2cncc(c2)N3C[C@H]4CNC[C@H]4C3)c1
+CHEMBL1907596_437	Clc1ccc(cn1)C2CC3CCC2N3
+CHEMBL1907596_438	Cc1cc(on1)C2CC3CCC2N3
+CHEMBL1907596_439	Cl.Clc1ccc(cn1)C2=CC3CCC2N3
+CHEMBL1907596_440	Cl.Fc1ccc(cn1)C2=CC3CCC2N3
+CHEMBL1907596_441	Cl.C1CC2NC1C=C2c3cccnc3
+CHEMBL1907596_442	OCC[C@H]1C[C@@H]1c2cncc(OC[C@@H]3CCN3)c2
+CHEMBL1907596_443	OCC[C@@H]1C[C@H]1c2cncc(OC[C@@H]3CCN3)c2
+CHEMBL1907596_444	CNC(=O)OCC[C@H]1C[C@@H]1c2cncc(OC[C@@H]3CCN3)c2
+CHEMBL1907596_445	CN(C)C(=O)OCC[C@H]1C[C@@H]1c2cncc(OC[C@@H]3CCN3)c2
+CHEMBL1907596_446	O=C(NC1CC1)OCC[C@H]2C[C@@H]2c3cncc(OC[C@@H]4CCN4)c3
+CHEMBL1907596_447	O=C(OCC[C@H]1C[C@@H]1c2cncc(OC[C@@H]3CCN3)c2)N4CCCC4
+CHEMBL1907596_448	O=C(Nc1ccccc1)OCC[C@H]2C[C@@H]2c3cncc(OC[C@@H]4CCN4)c3
+CHEMBL1907596_449	COCC[C@H]1C[C@@H]1c2cncc(OC[C@@H]3CCN3)c2
+CHEMBL1907596_450	COCC[C@@H]1C[C@H]1c2cncc(OC[C@@H]3CCN3)c2
+CHEMBL1907596_451	OCC[C@H]1C[C@@H]1c2cncc(OC[C@@H]3CCN3)c2
+CHEMBL1907596_452	COCC[C@H]1C[C@@H]1c2cncc(OC[C@@H]3CCN3)c2
+CHEMBL1907596_453	OCCCCC#Cc1cncc(OC[C@@H]2CCN2)c1
+CHEMBL1907596_454	OCCCCC#Cc1cncc(OC[C@@H]2CCN2)c1
+CHEMBL1907596_455	C(Oc1cccnc1)[C@@H]2CCN2
+CHEMBL1907596_456	C1NCC2CC1c3cc4nccnc4cc23
+CHEMBL1907596_457	O=C(C1CC1)N2CC3CNC(C3)C2
+CHEMBL1907596_458	O=C1C=CC=C2[C@H]3CNC[C@H](C3)CN12
+CHEMBL1907596_459	OC(=O)C(F)(F)F.FC(F)C[C@H]1C[C@@H]1c2cncc(OC[C@@H]3CCN3)c2
+CHEMBL1907596_460	OC(=O)C(F)(F)F.FCC[C@H]1C[C@@H]1c2cncc(OC[C@@H]3CCN3)c2
+CHEMBL1907596_461	CC[C@H]1C[C@@H]1c2cncc(OC[C@@H]3CCN3)c2.OC(=O)C(F)(F)F
+CHEMBL1907596_462	OCCc1cc(on1)c2cncc(OC[C@@H]3CCN3)c2
+CHEMBL1907596_463	FC(F)CCc1cc(on1)c2cncc(OC[C@@H]3CCN3)c2
+CHEMBL1907596_464	FCCCc1cc(on1)c2cncc(OC[C@@H]3CCN3)c2
+CHEMBL1907596_465	CCNC(=O)OCCc1cc(on1)c2cncc(OC[C@@H]3CCN3)c2
+CHEMBL1907596_466	O=C(Nc1ccccc1)OCCc2cc(on2)c3cncc(OC[C@@H]4CCN4)c3
+CHEMBL1907596_467	CCc1cc(on1)c2cncc(OC[C@@H]3CCN3)c2
+CHEMBL1907596_468	FC(F)(F)CCc1cc(on1)c2cncc(OC[C@@H]3CCN3)c2
+CHEMBL1907596_469	OCCc1cc(on1)c2cncc(OC[C@@H]3CCN3)c2
+CHEMBL1907596_470	FC(F)CCc1cc(on1)c2cncc(OC[C@@H]3CCN3)c2
+CHEMBL1907596_471	FCCCc1cc(on1)c2cncc(OC[C@@H]3CCN3)c2
+CHEMBL1907596_472	O=C(Nc1ccccc1)OCCc2cc(on2)c3cncc(OC[C@@H]4CCN4)c3
+CHEMBL1907596_473	CCc1cc(on1)c2cncc(OC[C@@H]3CCN3)c2
+CHEMBL1907596_474	C(Oc1cccnc1)[C@@H]2CCN2
+CHEMBL1907596_475	Ic1cncc(OC[C@@H]2CCN2)c1
+CHEMBL1907596_476	Fc1ncccc1OC[C@H]2NCC=C2
+CHEMBL1907596_477	Ic1cncc(OC[C@H]2NCC=C2)c1
+CHEMBL1907596_478	Ic1cncc(OC[C@@H]2CCN2)c1
+CHEMBL1907596_479	C1NCC2CC1c3cc4nccnc4cc23
+CHEMBL1907596_480	OCCCCC#Cc1cncc(OC[C@@H]2CCN2)c1
+CHEMBL1907596_481	CCOC(=O)NCC[C@H]1C[C@@H]1c2cncc(OC[C@@H]3CCN3)c2.OC(=O)C(F)(F)F
+CHEMBL1907596_482	COCC[C@H]1C[C@@H]1c2cncc(OC[C@@H]3CCCN3C)c2.OC(=O)C(F)(F)F
+CHEMBL1907596_483	COCC[C@H]1C[C@@H]1c2cncc(OC[C@@H]3CCNC3)c2.OC(=O)C(F)(F)F
+CHEMBL1907596_484	OC[C@H]1C[C@@H]1c2cncc(OC[C@@H]3CCN3)c2.OC(=O)C(F)(F)F
+CHEMBL1907596_485	OCCC[C@H]1C[C@@H]1c2cncc(OC[C@@H]3CCN3)c2.OC(=O)C(F)(F)F
+CHEMBL1907596_486	CCOC(=O)NCC[C@H]1C[C@@H]1c2cncc(OC[C@@H]3CCN3)c2.OC(=O)C(F)(F)F
+CHEMBL1907596_487	CC(C)OC(=O)NCC[C@H]1C[C@@H]1c2cncc(OC[C@@H]3CCN3)c2.OC(=O)C(F)(F)F
+CHEMBL1907596_488	COCC[C@H]1C[C@@H]1c2cncc(OC[C@@H]3CCCN3)c2.OC(=O)C(F)(F)F
+CHEMBL1907596_489	COCC[C@H]1C[C@@H]1c2cncc(OC[C@@H]3CCCN3C)c2.OC(=O)C(F)(F)F
+CHEMBL1907596_490	COCC[C@H]1C[C@@H]1c2cncc(OC[C@@H]3CCNC3)c2.OC(=O)C(F)(F)F
+CHEMBL1907596_491	OC[C@H]1C[C@@H]1c2cncc(OC[C@@H]3CCN3)c2.OC(=O)C(F)(F)F
+CHEMBL1907596_492	OCCC[C@H]1C[C@@H]1c2cncc(OC[C@@H]3CCN3)c2.OC(=O)C(F)(F)F
+CHEMBL1907596_493	OC(=O)CC[C@H]1C[C@@H]1c2cncc(OC[C@@H]3CCN3)c2.OC(=O)C(F)(F)F
+CHEMBL1907596_494	OC(=O)C(F)(F)F.FC(F)(F)OCC[C@H]1C[C@@H]1c2cncc(OC[C@@H]3CCN3)c2
+CHEMBL1907596_495	O=C1C=CC=C2[C@H]3CNC[C@H](C3)CN12
+CHEMBL1907596_496	O=C1N2C[C@@H]3CNC[C@@H](C3)C2=CC=C1c4cccnc4
+CHEMBL1907596_497	COc1ccc(cc1Cl)C#CC(=O)N2C[C@@H]3CNC[C@@H](C3)C2
+CHEMBL1907596_498	O=C1C=CC=C2[C@H]3CNC[C@H](C3)CN12
+CHEMBL1907596_499	O=C1N2C[C@@H]3CNC[C@@H](C3)C2=CC=C1c4cccnc4
+CHEMBL1907596_500	CCCCC#Cc1cncc(OC[C@@H]2CCN2)c1
+CHEMBL1907596_501	CCCCC#Cc1cncc(OC[C@H]2CCN2)c1
+CHEMBL1907596_502	CCCCC#Cc1cncc(OC[C@H]2CCCN2)c1
+CHEMBL1907596_503	[N-]=[N+]=NCCCCC#Cc1cncc(OC[C@@H]2CCN2)c1
+CHEMBL1907596_504	C1NCC2CC1c3cc4nccnc4cc23
+CHEMBL1907596_505	C1NC[C@H]2CN(C[C@@H]12)c3cccnc3
+CHEMBL1907596_506	OCCCCC#Cc1cncc(OC[C@@H]2CCN2)c1
+CHEMBL1907596_507	CCOCC[C@H]1C[C@@H]1c2cncc(c2)N3C[C@H]4CNC[C@H]4C3.OC(=O)C(F)(F)F
+CHEMBL1907596_508	CN(C)CCOc1cncc(c1)N2CC3CNCC(C3)C2
+CHEMBL1907596_509	C1NCC2CC1CN(C2)c3cccnc3
+CHEMBL1907596_510	C1NCC2CC1c3cc4nccnc4cc23
+CHEMBL1907596_511	Fc1ccc(cn1)c2cncc(c2)[C@H]3C[C@H]4CC[C@H]3N4
+CHEMBL1907596_512	Clc1ccc(cn1)[C@H]2C[C@H]3CC[C@H]2N3
+CHEMBL1907596_513	C1C[C@H]2N[C@H]1C[C@@H]2c3cncc(c3)c4ccncc4
+CHEMBL1907596_514	Fc1cc(ccn1)c2cncc(c2)[C@H]3C[C@H]4CC[C@H]3N4
+CHEMBL1907596_515	Clc1cc(ccn1)c2cncc(c2)[C@H]3C[C@H]4CC[C@H]3N4
+CHEMBL1907596_516	Nc1cc(ccn1)c2cncc(c2)[C@H]3C[C@H]4CC[C@H]3N4
+CHEMBL1907596_517	COc1cc(ccn1)c2cncc(c2)[C@H]3C[C@H]4CC[C@H]3N4
+CHEMBL1907596_518	C1C[C@H]2N[C@H]1C[C@@H]2c3cncc(c3)c4cccnc4
+CHEMBL1907596_519	Clc1ccc(cn1)c2cncc(c2)[C@H]3C[C@H]4CC[C@H]3N4
+CHEMBL1907596_520	Nc1ccc(cn1)c2cncc(c2)[C@H]3C[C@H]4CC[C@H]3N4
+CHEMBL1907596_521	COc1ccc(cn1)c2cncc(c2)[C@H]3C[C@H]4CC[C@H]3N4
+CHEMBL1907596_522	C1CNCCN(C1)c2cccnc2
+CHEMBL1907596_523	CN1CCC[C@H]1COc2cncc(Br)c2
+CHEMBL1907596_524	CN1CCC[C@H]1COc2cccnc2
+CHEMBL1907596_525	CN1CCC[C@H]1COc2cncc(c2)c3ccccc3
+CHEMBL1907596_526	C1NC2CC1N(C2)c3cccnc3
+CHEMBL1907596_527	COCC[C@@H]1C[C@H]1c2cncc(c2)N3CCCNCC3
+CHEMBL1907596_528	COCC[C@H]1C[C@@H]1c2cncc(c2)N3CCCNCC3
+CHEMBL1907596_529	OCCc1cc(on1)c2cncc(c2)N3CCCNCC3
--- a/Code/GraphMol/RascalMCES/data/test_cluster1.smi
+++ b/Code/GraphMol/RascalMCES/data/test_cluster1.smi
@@ -0,0 +1,55 @@
+CHEMBL214_1	CCCN(CCC)[C@@H]1CCc2ccc3[nH]c(cc3c2C1)C#N
+CHEMBL214_2	Oc1cccc2CC[C@@H]3[C@@H](CN3CC=C)c12
+CHEMBL214_3	COc1ccccc1N2CCN(CCN3C(=O)CC4(CCCC4)CC3=O)CC2
+CHEMBL214_4	C[C@H]1C[C@@H](CCN1C[C@H](O)COc2cccc3[nH]c(C)cc23)c4cc5c(F)cccc5s4
+CHEMBL214_5	NCCc1c[nH]c2ccc(OCc3cccc(COc4ccc5[nH]cc(CCN)c5c4)c3)cc12
+CHEMBL214_6	COc1ccccc1N2CCN(CCCCNS(=O)(=O)c3ccc(C)cc3)CC2
+CHEMBL214_7	CCCN(CCCc1c[nH]c2ccc(F)cc12)C3COc4c(F)ccc(C(=O)NC)c4C3
+CHEMBL214_8	O=C1NCc2ccc(OCCCCN3CCN(CC3)c4cccc5ccccc45)cc12
+CHEMBL214_9	Fc1ccc2cccc(N3CCN(CCCOc4ccc5CNC(=O)c5c4)CC3)c2c1
+CHEMBL214_10	Fc1cccc2cccc(N3CCN(CCCOc4ccc5CNC(=O)c5c4)CC3)c12
+CHEMBL214_11	O=C1NCc2ccc(OCCCCN3CCN(CC3)c4cccc5CCCc45)cc12
+CHEMBL214_12	Fc1cc2CNC(=O)c2cc1OCCCN3CCN(CC3)c4cccc5ccccc45
+CHEMBL214_13	COc1ccccc1N2CCN(CCN(C(=O)C34CCC(I)(CC3)C4)c5ccccn5)CC2
+CHEMBL214_14	Cl.Cl.Cl.COc1ccccc1N2CCN(CCN(C(=O)C3CCCCC3)c4ccccn4)CC2
+CHEMBL214_15	COc1ccccc1N2CCN(CCN(C(=O)C34C5C6C3C7C4C5C67CF)c8ccccn8)CC2
+CHEMBL214_16	Oc1ccccc1N2CCN(CCN(C(=O)C34C5C6C3C7C4C5C67CF)c8ccccn8)CC2
+CHEMBL214_17	O=C(Nc1cccnc1)Nc2cccc(CCN3CCN(CC3)c4ccccc4)c2
+CHEMBL214_18	O=C(N1CCC(CCN2CCN(CC2)c3nsc4ccccc34)CC1)c5occc5
+CHEMBL339_2	CNc1cc(OC)c(cc1Cl)C(=O)N[C@H]2CCN(C2)C3C4CCCC3CCC4
+CHEMBL339_3	CCN1CCC[C@H]1CNC(=O)c2c(O)c(CCF)cc(OC)c2OC
+CHEMBL339_4	CCN1CCC[C@H]1CNC(=O)c2cc(I)cc(OC)c2OC
+CHEMBL339_5	CC(C)Oc1ccccc1N2CCN(Cc3ccc(CN4CCCCC4=O)n3C)CC2
+CHEMBL339_6	CCN(CC)C(=O)N[C@@H]1C[C@H]2[C@@H](CC3=CCc4cccc2c34)N(C)C1
+CHEMBL339_7	OC(=O)C(=O)O.Oc1ccc2CC[C@H](CN3CCc4ccccc4C3)Oc2c1
+CHEMBL339_8	COc1ccccc1N2CCN(Cc3ccc([nH]3)c4ccccc4)CC2
+CHEMBL339_9	Fc1ccc(CN2CN(c3ccccc3)C4(CCN(CCCC(=O)c5ccc(F)cc5)CC4)C2=O)cc1
+CHEMBL339_10	CCCN1CCc2cccc3c2[C@H]1Cc4ccc(O)c(O)c34
+CHEMBL339_11	O=C1CCc2ccc(OCCCN3CCN(CC3)c4cccc5sccc45)cc2N1
+CHEMBL339_12	O=C1Nc2cc(OCCCN3CCN(CC3)c4cccc5sccc45)ccc2C=C1
+CHEMBL1946_1	CCCC(=O)NCCC1CCc2c(OC)ccc3ccc(OC)c1c23
+CHEMBL1946_3	COc1cccc(Cc2oc3ccc(OC)cc3c2CCNC(=O)C)c1
+CHEMBL1946_4	COc1ccc2oc(Cc3ccccc3OC)c(CCNC(=O)C)c2c1
+CHEMBL1946_5	CCC(=O)NC[C@@H]1C[C@H]1c2cccc3nc(CCCCc4ccccc4)oc23
+CHEMBL1946_6	COc1ccc2[nH]cc(CCNC(=O)C)c2c1
+CHEMBL1946_7	COc1cc2c(CCNC(=O)C)c(I)[nH]c2cc1[N+](=O)[O-]
+CHEMBL1946_8	COc1ccc2[nH]cc(CCNC(=O)C)c2c1
+CHEMBL1946_9	COc1ccc2cc(cc(CCNC(=O)C)c2c1)c3cccc(CBr)c3
+CHEMBL1946_10	CCCC(=O)NCCCc1cc(OC)ccc1OCc2ccccc2
+CHEMBL1946_11	CC(C)C1=C(CCNC(=O)C)c2c(C1)ccc3OCCc23
+CHEMBL1946_12	CCNC(=O)NCCC1=C(Cc2ccc3OCCc3c12)C(C)C
+CHEMBL1946_13	COc1ccc2cccc(\C=C\NC(=O)C)c2c1
+CHEMBL1946_14	COc1ccc2cccc(CCC(=O)NS(=O)(=O)C)c2n1
+CHEMBL1946_15	CCC(=O)NC[C@@H]1CCc2ccccc2[C@@H]1c3ccccc3
+CHEMBL273_1	CCCN(CCC)[C@H]1CCc2cccc(C(=O)C)c2C1
+CHEMBL273_2	COc1ccc2CCC(CCN3CCN(CC3)c4ccccn4)Cc2c1
+CHEMBL273_3	CCCCN(CCCC)C(=O)c1cccc(CN2CCN(CC2)c3ccccc3OC(C)C)c1
+CHEMBL273_4	CCCN(CCC)C1CCc2cccc(O)c2C1
+CHEMBL273_5	COc1ccccc1N2CCN(CCCCN3C(=O)c4ccccc4C3=O)CC2
+CHEMBL273_6	Cl.Cl.COc1ccccc1N2CCN(CCNC(=O)C34C[C@H]5CC(C[C@@H]3C5)C4)CC2
+CHEMBL273_7	Cl.Cl.COc1ccccc1N2CCN(CCNC(=O)C34C[C@@H]5C[C@@H](C[C@@H](C5)C3)C4)CC2
+CHEMBL273_8	Cl.C(Cc1ccccc1)N2CCN(CC2)c3cccc4ccoc34
+CHEMBL273_9	CCCNC1CCc2ccc3[nH]cc(C=O)c3c2C1
+CHEMBL273_10	CCCN(CCC)C1CCc2ccc3[nH]cc(C=O)c3c2C1
+CHEMBL273_11	COc1ccccc1N2CCN(CCCCN3C(=O)c4ccccc4C3=O)CC2
+CHEMBL273_12	CCCN(CCC)C1CCc2cccc(O)c2C1
--- a/Code/GraphMol/RascalMCES/lap_a_la_scipy.cpp
+++ b/Code/GraphMol/RascalMCES/lap_a_la_scipy.cpp
@@ -0,0 +1,227 @@
+// This is a mildly modified version of the code in SciPy's
+// scipy.optimize.linear_sum_assignment, extracted from
+// rectangular_lsap.cpp.
+// https://github.com/scipy/scipy/blob/main/scipy/optimize/rectangular_lsap/rectangular_lsap.cpp
+// As such it is subject to the following notice:
+/*
+Copyright (c) 2001-2002 Enthought, Inc. 2003-2023, SciPy Developers.
+All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following
+   disclaimer in the documentation and/or other materials provided
+   with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+   contributors may be used to endorse or promote products derived
+   from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+This code implements the shortest augmenting path algorithm for the
+rectangular assignment problem.  This implementation is based on the
+pseudocode described in pages 1685-1686 of:
+
+    DF Crouse. On implementing 2D rectangular assignment algorithms.
+    IEEE Transactions on Aerospace and Electronic Systems
+    52(4):1679-1696, August 2016
+    doi: 10.1109/TAES.2016.140952
+
+Author: PM Larsen
+*/
+
+#include <algorithm>
+#include <iostream>
+#include <limits>
+#include <numeric>
+#include <vector>
+
+namespace RDKit {
+namespace RascalMCES {
+template <typename T>
+std::vector<size_t> argsort_iter(const std::vector<T> &v) {
+  std::vector<size_t> index(v.size());
+  std::iota(index.begin(), index.end(), 0);
+  std::sort(index.begin(), index.end(),
+            [&v](size_t i, size_t j) { return v[i] < v[j]; });
+  return index;
+}
+
+static int augmenting_path(size_t nc, std::vector<int> &cost,
+                           std::vector<double> &u, std::vector<double> &v,
+                           std::vector<size_t> &path,
+                           std::vector<size_t> &row4col,
+                           std::vector<double> &shortestPathCosts, size_t i,
+                           std::vector<bool> &SR, std::vector<bool> &SC,
+                           std::vector<size_t> &remaining, double *p_minVal) {
+  double minVal = 0;
+
+  // Crouse's pseudocode uses set complements to keep track of remaining
+  // nodes.  Here we use a vector, as it is more efficient in C++.
+  size_t num_remaining = nc;
+  for (size_t it = 0; it < nc; it++) {
+    // Filling this up in reverse order ensures that the solution of a
+    // constant cost matrix is the identity matrix (c.f. #11602).
+    remaining[it] = nc - it - 1;
+  }
+
+  std::fill(SR.begin(), SR.end(), false);
+  std::fill(SC.begin(), SC.end(), false);
+  std::fill(shortestPathCosts.begin(), shortestPathCosts.end(),
+            std::numeric_limits<double>::max());
+
+  // find shortest augmenting path
+  int sink = -1;
+  while (sink == -1) {
+    // Clearly this will produce an overflow and set index to a large integer.
+    // It is how the original code did it, and I assume whoever wrote it knew
+    // what they were doing.
+    size_t index = -1;
+    double lowest = std::numeric_limits<double>::max();
+    SR[i] = true;
+
+    for (size_t it = 0; it < num_remaining; it++) {
+      size_t j = remaining[it];
+
+      double r = minVal + cost[i * nc + j] - u[i] - v[j];
+      if (r < shortestPathCosts[j]) {
+        path[j] = i;
+        shortestPathCosts[j] = r;
+      }
+
+      // When multiple nodes have the minimum cost, we select one which
+      // gives us a new sink node. This is particularly important for
+      // integer cost matrices with small co-efficients.
+      if (shortestPathCosts[j] < lowest ||
+          (shortestPathCosts[j] == lowest &&
+           row4col[j] == static_cast<size_t>(-1))) {
+        lowest = shortestPathCosts[j];
+        index = it;
+      }
+    }
+
+    minVal = lowest;
+    if (minVal ==
+        std::numeric_limits<double>::max()) {  // infeasible cost matrix
+      return -1;
+    }
+
+    size_t j = remaining[index];
+    if (row4col[j] == static_cast<size_t>(-1)) {
+      sink = j;
+    } else {
+      i = row4col[j];
+    }
+
+    SC[j] = true;
+    remaining[index] = remaining[--num_remaining];
+  }
+
+  *p_minVal = minVal;
+  return sink;
+}
+
+int lap_maximize(const std::vector<std::vector<int>> &costsMat,
+                 std::vector<size_t> &a, std::vector<size_t> &b) {
+  if (costsMat.empty() || costsMat.front().empty()) {
+    return 0;
+  }
+  size_t nr = costsMat.size();
+  size_t nc = costsMat.front().size();
+  bool transpose = nc < nr;
+  std::vector<int> cost(nc * nr);
+  // for maximization, take -ve of costs.
+  for (size_t i = 0; i < nr; ++i) {
+    for (size_t j = 0; j < nc; ++j) {
+      if (transpose) {
+        cost[j * nr + i] = -costsMat[i][j];
+      } else {
+        cost[i * nc + j] = -costsMat[i][j];
+      }
+    }
+  }
+  if (transpose) {
+    std::swap(nc, nr);
+  }
+  // initialize variables
+  std::vector<double> u(nr, 0);
+  std::vector<double> v(nc, 0);
+  std::vector<double> shortestPathCosts(nc);
+  std::vector<size_t> path(nc, -1);
+  std::vector<size_t> col4row(nr, -1);
+  std::vector<size_t> row4col(nc, -1);
+  std::vector<bool> SR(nr);
+  std::vector<bool> SC(nc);
+  std::vector<size_t> remaining(nc);
+
+  // iteratively build the solution
+  for (size_t curRow = 0; curRow < nr; curRow++) {
+    double minVal;
+    int sink = augmenting_path(nc, cost, u, v, path, row4col, shortestPathCosts,
+                               curRow, SR, SC, remaining, &minVal);
+    if (sink < 0) {
+      return -1;
+    }
+
+    // update dual variables
+    u[curRow] += minVal;
+    for (size_t i = 0; i < nr; i++) {
+      if (SR[i] && i != curRow) {
+        u[i] += minVal - shortestPathCosts[col4row[i]];
+      }
+    }
+
+    for (size_t j = 0; j < nc; j++) {
+      if (SC[j]) {
+        v[j] -= minVal - shortestPathCosts[j];
+      }
+    }
+
+    // augment previous solution
+    size_t j = sink;
+    while (1) {
+      size_t i = path[j];
+      row4col[j] = i;
+      std::swap(col4row[i], j);
+      if (i == curRow) {
+        break;
+      }
+    }
+  }
+
+  if (transpose) {
+    size_t i = 0;
+    for (auto v : argsort_iter(col4row)) {
+      a[i] = col4row[v];
+      b[i] = v;
+      i++;
+    }
+  } else {
+    for (size_t i = 0; i < nr; i++) {
+      a[i] = i;
+      b[i] = col4row[i];
+    }
+  }
+
+  return 0;
+}
+}  // namespace RascalMCES
+}  // namespace RDKit
--- a/Code/GraphMol/RascalMCES/mces_catch.cpp
+++ b/Code/GraphMol/RascalMCES/mces_catch.cpp
--- a/Code/GraphMol/RascalMCES/mces_cluster_catch.cpp
+++ b/Code/GraphMol/RascalMCES/mces_cluster_catch.cpp
@@ -0,0 +1,152 @@
+//
+//  Copyright (C) 2023 David Cosgrove
+//
+//   @@ All Rights Reserved @@
+//  This file is part of the RDKit.
+//  The contents are covered by the terms of the BSD license
+//  which is included in the file license.txt, found at the root
+//  of the RDKit source tree.
+//
+
+#include <chrono>
+#include <random>
+#include <vector>
+
+#include <GraphMol/FileParsers/MolSupplier.h>
+#include <GraphMol/SmilesParse/SmilesParse.h>
+#include <GraphMol/SmilesParse/SmilesWrite.h>
+#include <GraphMol/Substruct/SubstructMatch.h>
+
+#include "catch.hpp"
+
+#include <GraphMol/RascalMCES/RascalMCES.h>
+#include <GraphMol/RascalMCES/RascalClusterOptions.h>
+#include <GraphMol/RascalMCES/RascalResult.h>
+
+TEST_CASE("Small test", "[basics]") {
+  std::string fName = getenv("RDBASE");
+  fName += "/Contrib/Fastcluster/cdk2.smi";
+  RDKit::SmilesMolSupplier suppl(fName, "\t", 1, 0, false);
+  std::vector<std::shared_ptr<RDKit::ROMol>> mols;
+  while (!suppl.atEnd()) {
+    std::shared_ptr<RDKit::ROMol> mol(suppl.next());
+    if (!mol) {
+      continue;
+    }
+    mols.push_back(mol);
+  }
+  RDKit::RascalMCES::RascalClusterOptions clusOpts;
+  auto clusters = RDKit::RascalMCES::rascalCluster(mols, clusOpts);
+  REQUIRE(clusters.size() == 8);
+  std::vector<size_t> expSizes{7, 7, 6, 2, 2, 2, 2, 20};
+  for (size_t i = 0; i < 8; ++i) {
+    REQUIRE(clusters[i].size() == expSizes[i]);
+  }
+}
+
+TEST_CASE("BLSets subset", "[basics]") {
+  std::string fName = getenv("RDBASE");
+  fName += "/Code/GraphMol/RascalMCES/data/test_cluster1.smi";
+  RDKit::SmilesMolSupplier suppl(fName, "\t", 1, 0, false);
+  std::vector<std::shared_ptr<RDKit::ROMol>> mols;
+  while (!suppl.atEnd()) {
+    std::shared_ptr<RDKit::ROMol> mol(suppl.next());
+    if (!mol) {
+      continue;
+    }
+    mols.push_back(mol);
+  }
+  auto clusters = RDKit::RascalMCES::rascalCluster(mols);
+  REQUIRE(clusters.size() == 12);
+  std::vector<size_t> expSizes{8, 4, 4, 3, 3, 3, 2, 2, 2, 2, 2, 21};
+  for (size_t i = 0; i < 12; ++i) {
+    REQUIRE(clusters[i].size() == expSizes[i]);
+  }
+}
+
+TEST_CASE("ChEMBL 1907596") {
+  std::string fName = getenv("RDBASE");
+  fName += "/Code/GraphMol/RascalMCES/data/chembl_1907596.smi";
+  std::cout << fName << std::endl;
+  RDKit::SmilesMolSupplier suppl(fName, "\t", 1, 0, false);
+  std::vector<std::shared_ptr<RDKit::ROMol>> mols;
+  while (!suppl.atEnd()) {
+    std::shared_ptr<RDKit::ROMol> mol(suppl.next());
+    if (!mol) {
+      continue;
+    }
+    mols.push_back(mol);
+  }
+  RDKit::RascalMCES::RascalClusterOptions clusOpts;
+  clusOpts.similarityCutoff = 0.7;
+  auto clusters = RDKit::RascalMCES::rascalCluster(mols, clusOpts);
+  REQUIRE(clusters.size() == 21);
+  std::vector<size_t> expSizes{342, 71, 64, 33, 23, 11, 10, 6, 6, 5, 5,
+                               4,   3,  3,  3,  3,  3,  2,  2, 2, 14};
+  for (size_t i = 0; i < 21; ++i) {
+    REQUIRE(clusters[i].size() == expSizes[i]);
+  }
+}
+
+TEST_CASE("Small Butina test", "[basics]") {
+  std::string fName = getenv("RDBASE");
+  fName += "/Contrib/Fastcluster/cdk2.smi";
+  RDKit::SmilesMolSupplier suppl(fName, "\t", 1, 0, false);
+  std::vector<std::shared_ptr<RDKit::ROMol>> mols;
+  while (!suppl.atEnd()) {
+    std::shared_ptr<RDKit::ROMol> mol(suppl.next());
+    if (!mol) {
+      continue;
+    }
+    mols.push_back(mol);
+  }
+  RDKit::RascalMCES::RascalClusterOptions clusOpts;
+  auto clusters = RDKit::RascalMCES::rascalButinaCluster(mols, clusOpts);
+  int numMols = 0;
+  for (const auto &cl : clusters) {
+    numMols += cl.size();
+  }
+  REQUIRE(numMols == mols.size());
+  REQUIRE(clusters.size() == 29);
+  std::vector<size_t> expSizes{6, 6, 6, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+                               1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
+  for (size_t i = 0; i < 29; ++i) {
+    REQUIRE(clusters[i].size() == expSizes[i]);
+  }
+}
+
+TEST_CASE("Small test, smaller number of threads", "[basics]") {
+  // I'm not sure how to test whether this has had the desired effect,
+  // but at least we'll know that it runs ok.
+  std::string fName = getenv("RDBASE");
+  fName += "/Contrib/Fastcluster/cdk2.smi";
+  RDKit::SmilesMolSupplier suppl(fName, "\t", 1, 0, false);
+  std::vector<std::shared_ptr<RDKit::ROMol>> mols;
+  while (!suppl.atEnd()) {
+    std::shared_ptr<RDKit::ROMol> mol(suppl.next());
+    if (!mol) {
+      continue;
+    }
+    mols.push_back(mol);
+  }
+  {
+    RDKit::RascalMCES::RascalClusterOptions clusOpts;
+    clusOpts.numThreads = 2;
+    auto clusters = RDKit::RascalMCES::rascalCluster(mols, clusOpts);
+    REQUIRE(clusters.size() == 8);
+    std::vector<size_t> expSizes{7, 7, 6, 2, 2, 2, 2, 20};
+    for (size_t i = 0; i < 8; ++i) {
+      REQUIRE(clusters[i].size() == expSizes[i]);
+    }
+  }
+  {
+    RDKit::RascalMCES::RascalClusterOptions clusOpts;
+    clusOpts.numThreads = -2;
+    auto clusters = RDKit::RascalMCES::rascalCluster(mols, clusOpts);
+    REQUIRE(clusters.size() == 8);
+    std::vector<size_t> expSizes{7, 7, 6, 2, 2, 2, 2, 20};
+    for (size_t i = 0; i < 8; ++i) {
+      REQUIRE(clusters[i].size() == expSizes[i]);
+    }
+  }
+}
--- a/Docs/Book/GettingStartedInPython.rst
+++ b/Docs/Book/GettingStartedInPython.rst
@@ -1417,8 +1417,38 @@ or into a generic framework:
 Maximum Common Substructure
 ***************************

-The FindMCS function find a maximum common substructure (MCS) of two
-or more molecules:
+There are 2 methods for finding maximum common substructures.  The first, FindMCS,
+finds a single fragment maximum common substructure (MCS) of two or more molecules:
+The second, RascalMCES, finds the maximum common edge substructure (MCES) between two
+molecules and can return a multi-fragment MCES.  The difference is demonstrated with the
+following pair of molecules:
+
+-------------------------------------+
+| .. image:: images/mcs_example_1.png |
+-------------------------------------+
+| .. image:: images/mcs_example_2.png |
+-------------------------------------+
+
+FMCS gives this maximum common substructure:
+
+-------------------------------------+
+| .. image:: images/mcs_example_3.png |
+-------------------------------------+
+| .. image:: images/mcs_example_4.png |
+-------------------------------------+
+
+Whereas RascalMCES gives:
+
+-------------------------------------+
+| .. image:: images/mcs_example_5.png |
+-------------------------------------+
+| .. image:: images/mcs_example_6.png |
+-------------------------------------+
+
+FindMCS
+=======
+
+FindMCS operates on 2 or more molecules:

 .. doctest::

@@ -1555,6 +1585,135 @@ return the best match found in that time. If timeout is reached then the

 (The MCS after 50 seconds contained 511 atoms.)

+RascalMCES
+==========
+
+RascalMCES can only work on 2 molecules at a time:
+
+.. doctest::
+
+  >>> from rdkit.Chem import rdRascalMCES
+  >>> mol1 = Chem.MolFromSmiles("CN(C)c1ccc(CC(=O)NCCCCCCCCCCNC23CC4CC(C2)CC(C3)C4)cc1 CHEMBL153934")
+  >>> mol2 = Chem.MolFromSmiles("CN(C)c1ccc(CC(=O)NCCCCCCCNC23CC4CC(C2)CC(C3)C4)cc1 CHEMBL152361")
+  >>> res = rdRascalMCES.FindMCES(mol1, mol2)
+  >>> res[0].smartsString
+  'CN(-C)-c1:c:c:c(-CC(=O)-NCCCCCCC):c:c:1.NC12CC3CC(-C1)-CC(-C2)-C3'
+  >>> len(res[0].bondMatches())
+  33
+
+It returns a list of RascalResult objects.  Each RascalResult contains the 2 molecules that
+the result pertains to, the SMARTS string of the MCES, the lists of atoms and bonds in the
+two molecules that match, the Johnson similarity between the 2 molecules, the number of
+fragments in the MCES, the number of atoms in the largest fragment and whether the run
+timed out or not.  There is also the method largestFragmentOnly(), which cuts the MCES
+down to the largest single fragment.  This is a non-reversible change, so if you want both
+results, take a copy first.
+
+By default, the MCES algorithm returns the first result it finds of maximum size.  Because of
+symmetry, there may be other equivalent solutions with the same number of atoms and bonds,
+but with different equivalent bonds matched to each other.  If you want to see all MCESs of
+maximum size, you can use the option allBestMCESs = True.  This will increase the run time,
+partly because more branches in the search tree must be examined, but mostly because sorting
+the multiple results is quite time-consuming.  The results are returned in a consistent order
+sorted by number of bond matches, then number of fragments (fewer first), then largest
+fragment size and so on.  Some of these aren't trivial to compute.  The adamantane example
+above is particularly extreme because not only is there extensive symmetry about the
+adamantane end and 2-fold symmetry at the phenyl end but also several points of breaking the
+matching alkyl chain all of which give rise to valid MCESs of the same size.  In this case,
+sorting into a consistent order takes significantly longer than determining the MCESs in the
+first place.
+
+The MCES differs from a conventional MCS in that it is the maximum common substructure based
+on bonds rather than atoms.  Often the result is the same, but not always.
+
+The Johnson similarity is akin to a Tanimoto similarity, but expressed in terms of the
+atoms and bonds in the MCES.  It is the square of the sum of the number of atoms and bonds
+in the MCES divided by the product of the sums of the numbers of atoms and bonds in the
+2 input molecules.  It has values between 0.0 (no MCES between the molecules) and 1.0 (the
+molecules are identical).  A key source of efficiency in the RASCAL algorithm is a fast and
+correct prediction of a maximum value for the Johnson similarity between 2 molecules and
+hence the maximum size of the MCES.  The first step in the algorithm is then a screening,
+whereby the full MCES determination is not performed if the predicted similarity is less
+than some desired threshold.  The final similarity between the 2 molecules may be less
+than the threshold, but it will never be higher than the predicted upper bound.  RASCAL
+stems from RApid Similarity CALulation.
+
+The default settings for RascalMCES are good for general use, but they may be altered
+by passing an optional RascalOptions object:
+
+.. doctest::
+
+  >>> mol1 = Chem.MolFromSmiles('Oc1cccc2C(=O)C=CC(=O)c12')
+  >>> mol2 = Chem.MolFromSmiles('O1C(=O)C=Cc2cc(OC)c(O)cc12')
+  >>> results = rdRascalMCES.FindMCES(mol1, mol2)
+  >>> len(results)
+  0
+  >>> opts = rdRascalMCES.RascalOptions()
+  >>> opts.similarityThreshold = 0.5
+  >>> results = rdRascalMCES.FindMCES(mol1, mol2, opts)
+  >>> len(results)
+  1
+  >>> f'{results[0].similarity:.2f}'
+  '0.37'
+  >>> results[0].smartsString
+  'Oc1:c:c:c:c:c:1.[#6]=O'
+  >>> opts.minFragSize = 3
+  >>> results = rdRascalMCES.FindMCES(mol1, mol2, opts)
+  >>> len(results)
+  1
+  >>> f'{results[0].similarity:.2f}'
+  '0.25'
+  >>> results[0].smartsString
+  'Oc1:c:c:c:c:c:1'
+
+In this case, the upper bound on the similarity score is below the default threshold
+of 0.7, so no results are returned.  Setting the threshold to 0.5 produces the second
+result although, as can be seen, the final similarity is substantially below the
+threshold.  This example also shows a disadvantage of the MCES method, which is that
+it can produce small fragments in the MCES which are rarely helpful.  The option
+minFragSize can be used to over-ride the default value of -1, which means no minimum
+size.
+
+Like FindMCS, there is a ringMatchesRingOnly option, and also there's
+completeAromaticRings, which is True by default, and means that MCESs won't be returned
+with partial aromatic rings matching:
+
+.. doctest::
+
+  >>> mol1 = Chem.MolFromSmiles('C1CCCC1c1ccncc1')
+  >>> mol2 = Chem.MolFromSmiles('C1CCCC1c1ccccc1')
+  >>> results = rdRascalMCES.FindMCES(mol1, mol2, opts)
+  >>> f'{results[0].similarity:.2f}'
+  '0.27'
+  >>> results[0].smartsString
+  'C1CCCC1-c'
+  >>> opts.completeAromaticRings = False
+  >>> results = rdRascalMCES.FindMCES(mol1, mol2, opts)
+  >>> f'{results[0].similarity:.2f}'
+  '0.76'
+  >>> results[0].smartsString
+  'C1CCCC1-c(:c:c):c:c'
+
+This result may look a bit odd, with a single aromatic carbon in the first SMARTS
+string.  This is a consequence of the fact that the MCES works on matching bonds.
+A better, atom-centric, representation might be C1CCC[$(C-c)]1.  When the
+completeAromaticRings option is set to False, a larger MCES is found, with just
+the pyridine nitrogen atom not matching the corresponding phenyl carbon atom.
+
+Clustering with Rascal
+======================
+
+There are 2 clustering methods available using the Johnson metric.  The first,
+RascalCluster, is a fuzzy method described in 'A Line Graph Algorithm for
+Clustering Chemical Structures Based on Common Substructural Cores', JW Raymond,
+PW Willett
+(https://match.pmf.kg.ac.rs/electronic_versions/Match48/match48_197-207.pdf also
+available at https://eprints.whiterose.ac.uk/77598/).
+The second, RascalButinaCluster, uses the Butina sphere-exclusion algorithm
+(Butina JCICS 39 747-750 (1999)).  Because of the time-consuming nature of the MCES
+determination, these clustering methods can be slow to run, so are best used
+on small sets (no more than a few hundred molecules) of small molecules.
+

 Fingerprinting and Molecular Similarity
 ***************************************
--- a/Docs/Book/images/mcs_example_1.png
+++ b/Docs/Book/images/mcs_example_1.png
--- a/Docs/Book/images/mcs_example_2.png
+++ b/Docs/Book/images/mcs_example_2.png
--- a/Docs/Book/images/mcs_example_3.png
+++ b/Docs/Book/images/mcs_example_3.png
--- a/Docs/Book/images/mcs_example_4.png
+++ b/Docs/Book/images/mcs_example_4.png
--- a/Docs/Book/images/mcs_example_5.png
+++ b/Docs/Book/images/mcs_example_5.png
--- a/Docs/Book/images/mcs_example_6.png
+++ b/Docs/Book/images/mcs_example_6.png