Fix github5222 (#5232)

* Fix for github5222 * Remove stderr output * Fix failing tests * Fix comment * Revert to old label ordering * Fix failing tests * Revert unnecessary change. Hopefully fix notebook * Modified RDG Jupyter notebook * Fix kernelspec in rgd_testing.ipynb
2026-06-03 21:44:30 +08:00 · 2022-04-26 22:25:05 -06:00
parent 4093f871c9
commit b587a65a27
7 changed files with 120 additions and 64 deletions
--- a/Code/GraphMol/RGroupDecomposition/RGroupScore.cpp
+++ b/Code/GraphMol/RGroupDecomposition/RGroupScore.cpp
@@ -211,7 +211,7 @@ void RGroupScorer::breakTies(
  orderedLabels.reserve(labels.size());
  std::copy_if(labels.begin(), labels.end(), std::back_inserter(orderedLabels),
               [](const int &i) { return !(i < 0); });
-  std::copy_if(labels.begin(), labels.end(), std::back_inserter(orderedLabels),
+  std::copy_if(labels.rbegin(), labels.rend(), std::back_inserter(orderedLabels),
               [](const int &i) { return (i < 0); });
  // We only care about the sign of the ordered labels,
  // not about their value, so we convert the ordered map
--- a/Code/GraphMol/RGroupDecomposition/catch_rgd.cpp
+++ b/Code/GraphMol/RGroupDecomposition/catch_rgd.cpp
@@ -67,19 +67,19 @@ TEST_CASE("toJSONTests", "[unittests]") {
    CHECK(rows.size() == mols.size());
    std::string expected = R"JSON([
    {
-        "Core": "Cc1cccc([*:2])c1[*:1]",
-        "R1": "CO[*:1]",
-        "R2": "[H][*:2]"
-    },
-    {
-        "Core": "Cc1cccc([*:2])c1[*:1]",
-        "R1": "CO[*:1]",
-        "R2": "[H][*:2]"
-    },
-    {
-        "Core": "Cc1cccc([*:2])c1[*:1]",
+        "Core": "Cc1cccc([*:1])c1[*:2]",
        "R1": "[H][*:1]",
        "R2": "CO[*:2]"
+    },
+    {
+        "Core": "Cc1cccc([*:1])c1[*:2]",
+        "R1": "[H][*:1]",
+        "R2": "CO[*:2]"
+    },
+    {
+        "Core": "Cc1cccc([*:1])c1[*:2]",
+        "R1": "CO[*:1]",
+        "R2": "[H][*:2]"
    }
 ])JSON";
    CHECK(flatten_whitespace(toJSON(rows)) == flatten_whitespace(expected));
@@ -91,19 +91,19 @@ TEST_CASE("toJSONTests", "[unittests]") {
    CHECK(cols.size() == mols.size());
    std::string expected = R"JSON([
  "Core": [
-    "Cc1cccc([*:2])c1[*:1]",
-    "Cc1cccc([*:2])c1[*:1]",
-    "Cc1cccc([*:2])c1[*:1]"
+    "Cc1cccc([*:1])c1[*:2]",
+    "Cc1cccc([*:1])c1[*:2]",
+    "Cc1cccc([*:1])c1[*:2]"
  ],
  "R1": [
-    "CO[*:1]",
-    "CO[*:1]",
-    "[H][*:1]"
+    "[H][*:1]",
+    "[H][*:1]",
+    "CO[*:1]"
  ],
  "R2": [
-    "[H][*:2]",
-    "[H][*:2]",
-    "CO[*:2]"
+    "CO[*:2]",
+    "CO[*:2]",
+    "[H][*:2]"
  ]
 ]
 )JSON";
--- a/Code/GraphMol/RGroupDecomposition/testRGroupDecomp.cpp
+++ b/Code/GraphMol/RGroupDecomposition/testRGroupDecomp.cpp
@@ -536,23 +536,23 @@ void testGitHubIssue1705() {
    }
    delete core;
    std::string expected = R"RES(Rgroup===Core
-Oc1ccc([*:2])cc1[*:1]
-Oc1ccc([*:2])cc1[*:1]
-Oc1ccc([*:2])cc1[*:1]
-Oc1ccc([*:2])cc1[*:1]
-Oc1ccc([*:2])cc1[*:1]
+Oc1ccc([*:1])cc1[*:2]
+Oc1ccc([*:1])cc1[*:2]
+Oc1ccc([*:1])cc1[*:2]
+Oc1ccc([*:1])cc1[*:2]
+Oc1ccc([*:1])cc1[*:2]
 Rgroup===R1
 [H][*:1]
-F[*:1]
-F[*:1]
-F[*:1]
-Cl[*:1]
+[H][*:1]
+[H][*:1]
+N[*:1]
+[H][*:1]
 Rgroup===R2
 [H][*:2]
-[H][*:2]
-[H][*:2]
-N[*:2]
-[H][*:2]
+F[*:2]
+F[*:2]
+F[*:2]
+Cl[*:2]
 )RES";
 #ifdef DEBUG
    if (ss.str() != expected) {
@@ -596,13 +596,13 @@ Cc1c([*:1])cccc1[*:2]
 Cc1c([*:1])cccc1[*:2]
 Rgroup===R1
 [H][*:1]
-F[*:1]
-F[*:1]
+[H][*:1]
+[H][*:1]
 F[*:1]
 Rgroup===R2
 [H][*:2]
-[H][*:2]
-[H][*:2]
+F[*:2]
+F[*:2]
 F[*:2]
 )RES";
 #ifdef DEBUG
@@ -1370,13 +1370,13 @@ Cc1c([*:1])cccc1[*:2]
 Cc1c([*:1])cccc1[*:2]
 Rgroup===R1
 [H][*:1]
-F[*:1]
-F[*:1]
+[H][*:1]
+[H][*:1]
 F[*:1]
 Rgroup===R2
 [H][*:2]
-[H][*:2]
-[H][*:2]
+F[*:2]
+F[*:2]
 F[*:2]
 )RES";
 #ifdef DEBUG
@@ -2762,6 +2762,61 @@ void testDoNotChooseUnrelatedCores() {
  }
 }

+void testGithub5222() {
+  BOOST_LOG(rdInfoLog)
+      << "********************************************************\n";
+  BOOST_LOG(rdInfoLog) << "Test that Github5222 is fixed" << std::endl;
+  
+  auto core = R"CTAB(
+  ChemDraw04112214222D
+
+  6  6  3  0  0  0  0  0  0  0999 V2000
+   -0.7145    0.4125    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+   -0.7145   -0.4125    0.0000 L   0  0  0  0  0  0  0  0  0  0  0  0
+    0.0000   -0.8250    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    0.7145   -0.4125    0.0000 L   0  0  0  0  0  0  0  0  0  0  0  0
+    0.7145    0.4125    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    0.0000    0.8250    0.0000 L   0  0  0  0  0  0  0  0  0  0  0  0
+  1  2  2  0
+  2  3  1  0
+  3  4  2  0
+  4  5  1  0
+  5  6  2  0
+  6  1  1  0
+  2 F    2   6   7
+  4 F    2   6   7
+  6 F    2   6   7
+M  ALS   2  2 F C   N   
+M  ALS   4  2 F C   N   
+M  ALS   6  2 F C   N   
+M  END
+)CTAB"_ctab;
+  std::vector<std::string> smiArray(10, "COc1ccccc1");
+  smiArray.push_back("COc1ccncn1");
+  RGroupDecompositionParameters params;
+  params.matchingStrategy = GreedyChunks;
+  RGroupDecomposition decomp(*core, params);
+  for (const auto smiles : smiArray) {
+    ROMol *mol = SmilesToMol(smiles);
+    int res = decomp.add(*mol);
+    TEST_ASSERT(res >= 0);
+    delete mol;
+  }
+
+  decomp.process();
+  std::cerr << "Best mapping" << std::endl;
+  RGroupRows rows = decomp.getRGroupsAsRows();
+  TEST_ASSERT(rows.size() == 11);
+  for (const auto row : rows) {
+    TEST_ASSERT(row.size() == 2);
+    TEST_ASSERT(row.count("Core") == 1);
+    TEST_ASSERT(row.count("R1") == 1);
+    auto mol = row.at("R1");
+    auto groupSmiles = MolToSmiles(*mol);
+    TEST_ASSERT(groupSmiles == "CO[*:1]");
+  }
+}
+
 int main() {
  RDLog::InitLogs();
  boost::logging::disable_logs("rdApp.debug");
@@ -2813,6 +2868,7 @@ int main() {
  testAlignOutputCoreToMolecule();
  testWildcardInInput();
  testDoNotChooseUnrelatedCores();
+  testGithub5222();
  BOOST_LOG(rdInfoLog)
      << "********************************************************\n";
  return 0;
--- a/Code/GraphMol/RGroupDecomposition/test_data/rgd_testing.ipynb
+++ b/Code/GraphMol/RGroupDecomposition/test_data/rgd_testing.ipynb
--- a/Code/GraphMol/RGroupDecomposition/test_data/simple1.out1.json
+++ b/Code/GraphMol/RGroupDecomposition/test_data/simple1.out1.json
@@ -1,17 +1,17 @@
 [
    {
-        "Core": "Cc1cccc([*:2])c1[*:1]",
-        "R1": "CO[*:1]",
-        "R2": "[H][*:2]"
-    },
-    {
-        "Core": "Cc1cccc([*:2])c1[*:1]",
-        "R1": "CO[*:1]",
-        "R2": "[H][*:2]"
-    },
-    {
-        "Core": "Cc1cccc([*:2])c1[*:1]",
+        "Core": "Cc1cccc([*:1])c1[*:2]",
        "R1": "[H][*:1]",
        "R2": "CO[*:2]"
+    },
+    {
+        "Core": "Cc1cccc([*:1])c1[*:2]",
+        "R1": "[H][*:1]",
+        "R2": "CO[*:2]"
+    },
+    {
+        "Core": "Cc1cccc([*:1])c1[*:2]",
+        "R1": "CO[*:1]",
+        "R2": "[H][*:2]"
    }
 ]
--- a/Code/GraphMol/RGroupDecomposition/test_data/simple2.out1.json
+++ b/Code/GraphMol/RGroupDecomposition/test_data/simple2.out1.json
@@ -1,16 +1,16 @@
 [
    {
-        "Core": "Cc1cc([*:1])ccc1[*:3]",
+        "Core": "Cc1cccc([*:1])c1[*:3]",
        "R1": "[H][*:1]",
        "R3": "CO[*:3]"
    },
    {
-        "Core": "Cc1cc([*:1])ccc1[*:3]",
+        "Core": "Cc1cccc([*:1])c1[*:3]",
        "R1": "[H][*:1]",
        "R3": "CO[*:3]"
    },
    {
-        "Core": "Cc1cc([*:1])ccc1[*:3]",
+        "Core": "Cc1cccc([*:1])c1[*:3]",
        "R1": "CO[*:1]",
        "R3": "[H][*:3]"
    }
--- a/rdkit/Chem/UnitTestPandasTools.py
+++ b/rdkit/Chem/UnitTestPandasTools.py
@@ -193,15 +193,15 @@ class TestPandasTools(unittest.TestCase):
    df = PandasTools.RGroupDecompositionToFrame(groups, mols, include_core=True)
    self.assertEqual(len(df), len(mols))
    self.assertEqual(list(df.columns), ['Mol', 'Core', 'R1', 'R2'])
-    self.assertEqual(list(df.R1), ['F[*:1]', 'Cl[*:1]', 'O[*:1]', 'F[*:1]', 'F[*:1]'])
+    self.assertEqual(list(df.R2), ['F[*:2]', 'Cl[*:2]', 'O[*:2]', 'F[*:2]', 'F[*:2]'])

    groups, _ = rdRGroupDecomposition.RGroupDecompose([scaffold], mols, asSmiles=False,
                                                      asRows=False)
    df = PandasTools.RGroupDecompositionToFrame(groups, mols, include_core=True)
    self.assertEqual(len(df), len(mols))
    self.assertEqual(list(df.columns), ['Mol', 'Core', 'R1', 'R2'])
-    self.assertEqual([Chem.MolToSmiles(x) for x in df.R1],
-                     ['F[*:1]', 'Cl[*:1]', 'O[*:1]', 'F[*:1]', 'F[*:1]'])
+    self.assertEqual([Chem.MolToSmiles(x) for x in df.R2],
+                     ['F[*:2]', 'Cl[*:2]', 'O[*:2]', 'F[*:2]', 'F[*:2]'])


@unittest.skipIf(PandasTools.pd is None, 'Pandas not installed, skipping')