Fix github5222 (#5232)

* Fix for github5222

* Remove stderr output

* Fix failing tests

* Fix comment

* Revert to old label ordering

* Fix failing tests

* Revert unnecessary change.  Hopefully fix notebook

* Modified RDG Jupyter notebook

* Fix kernelspec in rgd_testing.ipynb
This commit is contained in:
Gareth Jones
2022-04-26 22:25:05 -06:00
committed by GitHub
parent 4093f871c9
commit b587a65a27
7 changed files with 120 additions and 64 deletions

View File

@@ -211,7 +211,7 @@ void RGroupScorer::breakTies(
orderedLabels.reserve(labels.size());
std::copy_if(labels.begin(), labels.end(), std::back_inserter(orderedLabels),
[](const int &i) { return !(i < 0); });
std::copy_if(labels.begin(), labels.end(), std::back_inserter(orderedLabels),
std::copy_if(labels.rbegin(), labels.rend(), std::back_inserter(orderedLabels),
[](const int &i) { return (i < 0); });
// We only care about the sign of the ordered labels,
// not about their value, so we convert the ordered map

View File

@@ -67,19 +67,19 @@ TEST_CASE("toJSONTests", "[unittests]") {
CHECK(rows.size() == mols.size());
std::string expected = R"JSON([
{
"Core": "Cc1cccc([*:2])c1[*:1]",
"R1": "CO[*:1]",
"R2": "[H][*:2]"
},
{
"Core": "Cc1cccc([*:2])c1[*:1]",
"R1": "CO[*:1]",
"R2": "[H][*:2]"
},
{
"Core": "Cc1cccc([*:2])c1[*:1]",
"Core": "Cc1cccc([*:1])c1[*:2]",
"R1": "[H][*:1]",
"R2": "CO[*:2]"
},
{
"Core": "Cc1cccc([*:1])c1[*:2]",
"R1": "[H][*:1]",
"R2": "CO[*:2]"
},
{
"Core": "Cc1cccc([*:1])c1[*:2]",
"R1": "CO[*:1]",
"R2": "[H][*:2]"
}
])JSON";
CHECK(flatten_whitespace(toJSON(rows)) == flatten_whitespace(expected));
@@ -91,19 +91,19 @@ TEST_CASE("toJSONTests", "[unittests]") {
CHECK(cols.size() == mols.size());
std::string expected = R"JSON([
"Core": [
"Cc1cccc([*:2])c1[*:1]",
"Cc1cccc([*:2])c1[*:1]",
"Cc1cccc([*:2])c1[*:1]"
"Cc1cccc([*:1])c1[*:2]",
"Cc1cccc([*:1])c1[*:2]",
"Cc1cccc([*:1])c1[*:2]"
],
"R1": [
"CO[*:1]",
"CO[*:1]",
"[H][*:1]"
"[H][*:1]",
"[H][*:1]",
"CO[*:1]"
],
"R2": [
"[H][*:2]",
"[H][*:2]",
"CO[*:2]"
"CO[*:2]",
"CO[*:2]",
"[H][*:2]"
]
]
)JSON";

View File

@@ -536,23 +536,23 @@ void testGitHubIssue1705() {
}
delete core;
std::string expected = R"RES(Rgroup===Core
Oc1ccc([*:2])cc1[*:1]
Oc1ccc([*:2])cc1[*:1]
Oc1ccc([*:2])cc1[*:1]
Oc1ccc([*:2])cc1[*:1]
Oc1ccc([*:2])cc1[*:1]
Oc1ccc([*:1])cc1[*:2]
Oc1ccc([*:1])cc1[*:2]
Oc1ccc([*:1])cc1[*:2]
Oc1ccc([*:1])cc1[*:2]
Oc1ccc([*:1])cc1[*:2]
Rgroup===R1
[H][*:1]
F[*:1]
F[*:1]
F[*:1]
Cl[*:1]
[H][*:1]
[H][*:1]
N[*:1]
[H][*:1]
Rgroup===R2
[H][*:2]
[H][*:2]
[H][*:2]
N[*:2]
[H][*:2]
F[*:2]
F[*:2]
F[*:2]
Cl[*:2]
)RES";
#ifdef DEBUG
if (ss.str() != expected) {
@@ -596,13 +596,13 @@ Cc1c([*:1])cccc1[*:2]
Cc1c([*:1])cccc1[*:2]
Rgroup===R1
[H][*:1]
F[*:1]
F[*:1]
[H][*:1]
[H][*:1]
F[*:1]
Rgroup===R2
[H][*:2]
[H][*:2]
[H][*:2]
F[*:2]
F[*:2]
F[*:2]
)RES";
#ifdef DEBUG
@@ -1370,13 +1370,13 @@ Cc1c([*:1])cccc1[*:2]
Cc1c([*:1])cccc1[*:2]
Rgroup===R1
[H][*:1]
F[*:1]
F[*:1]
[H][*:1]
[H][*:1]
F[*:1]
Rgroup===R2
[H][*:2]
[H][*:2]
[H][*:2]
F[*:2]
F[*:2]
F[*:2]
)RES";
#ifdef DEBUG
@@ -2762,6 +2762,61 @@ void testDoNotChooseUnrelatedCores() {
}
}
void testGithub5222() {
BOOST_LOG(rdInfoLog)
<< "********************************************************\n";
BOOST_LOG(rdInfoLog) << "Test that Github5222 is fixed" << std::endl;
auto core = R"CTAB(
ChemDraw04112214222D
6 6 3 0 0 0 0 0 0 0999 V2000
-0.7145 0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-0.7145 -0.4125 0.0000 L 0 0 0 0 0 0 0 0 0 0 0 0
0.0000 -0.8250 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
0.7145 -0.4125 0.0000 L 0 0 0 0 0 0 0 0 0 0 0 0
0.7145 0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
0.0000 0.8250 0.0000 L 0 0 0 0 0 0 0 0 0 0 0 0
1 2 2 0
2 3 1 0
3 4 2 0
4 5 1 0
5 6 2 0
6 1 1 0
2 F 2 6 7
4 F 2 6 7
6 F 2 6 7
M ALS 2 2 F C N
M ALS 4 2 F C N
M ALS 6 2 F C N
M END
)CTAB"_ctab;
std::vector<std::string> smiArray(10, "COc1ccccc1");
smiArray.push_back("COc1ccncn1");
RGroupDecompositionParameters params;
params.matchingStrategy = GreedyChunks;
RGroupDecomposition decomp(*core, params);
for (const auto smiles : smiArray) {
ROMol *mol = SmilesToMol(smiles);
int res = decomp.add(*mol);
TEST_ASSERT(res >= 0);
delete mol;
}
decomp.process();
std::cerr << "Best mapping" << std::endl;
RGroupRows rows = decomp.getRGroupsAsRows();
TEST_ASSERT(rows.size() == 11);
for (const auto row : rows) {
TEST_ASSERT(row.size() == 2);
TEST_ASSERT(row.count("Core") == 1);
TEST_ASSERT(row.count("R1") == 1);
auto mol = row.at("R1");
auto groupSmiles = MolToSmiles(*mol);
TEST_ASSERT(groupSmiles == "CO[*:1]");
}
}
int main() {
RDLog::InitLogs();
boost::logging::disable_logs("rdApp.debug");
@@ -2813,6 +2868,7 @@ int main() {
testAlignOutputCoreToMolecule();
testWildcardInInput();
testDoNotChooseUnrelatedCores();
testGithub5222();
BOOST_LOG(rdInfoLog)
<< "********************************************************\n";
return 0;

File diff suppressed because one or more lines are too long

View File

@@ -1,17 +1,17 @@
[
{
"Core": "Cc1cccc([*:2])c1[*:1]",
"R1": "CO[*:1]",
"R2": "[H][*:2]"
},
{
"Core": "Cc1cccc([*:2])c1[*:1]",
"R1": "CO[*:1]",
"R2": "[H][*:2]"
},
{
"Core": "Cc1cccc([*:2])c1[*:1]",
"Core": "Cc1cccc([*:1])c1[*:2]",
"R1": "[H][*:1]",
"R2": "CO[*:2]"
},
{
"Core": "Cc1cccc([*:1])c1[*:2]",
"R1": "[H][*:1]",
"R2": "CO[*:2]"
},
{
"Core": "Cc1cccc([*:1])c1[*:2]",
"R1": "CO[*:1]",
"R2": "[H][*:2]"
}
]

View File

@@ -1,16 +1,16 @@
[
{
"Core": "Cc1cc([*:1])ccc1[*:3]",
"Core": "Cc1cccc([*:1])c1[*:3]",
"R1": "[H][*:1]",
"R3": "CO[*:3]"
},
{
"Core": "Cc1cc([*:1])ccc1[*:3]",
"Core": "Cc1cccc([*:1])c1[*:3]",
"R1": "[H][*:1]",
"R3": "CO[*:3]"
},
{
"Core": "Cc1cc([*:1])ccc1[*:3]",
"Core": "Cc1cccc([*:1])c1[*:3]",
"R1": "CO[*:1]",
"R3": "[H][*:3]"
}

View File

@@ -193,15 +193,15 @@ class TestPandasTools(unittest.TestCase):
df = PandasTools.RGroupDecompositionToFrame(groups, mols, include_core=True)
self.assertEqual(len(df), len(mols))
self.assertEqual(list(df.columns), ['Mol', 'Core', 'R1', 'R2'])
self.assertEqual(list(df.R1), ['F[*:1]', 'Cl[*:1]', 'O[*:1]', 'F[*:1]', 'F[*:1]'])
self.assertEqual(list(df.R2), ['F[*:2]', 'Cl[*:2]', 'O[*:2]', 'F[*:2]', 'F[*:2]'])
groups, _ = rdRGroupDecomposition.RGroupDecompose([scaffold], mols, asSmiles=False,
asRows=False)
df = PandasTools.RGroupDecompositionToFrame(groups, mols, include_core=True)
self.assertEqual(len(df), len(mols))
self.assertEqual(list(df.columns), ['Mol', 'Core', 'R1', 'R2'])
self.assertEqual([Chem.MolToSmiles(x) for x in df.R1],
['F[*:1]', 'Cl[*:1]', 'O[*:1]', 'F[*:1]', 'F[*:1]'])
self.assertEqual([Chem.MolToSmiles(x) for x in df.R2],
['F[*:2]', 'Cl[*:2]', 'O[*:2]', 'F[*:2]', 'F[*:2]'])
@unittest.skipIf(PandasTools.pd is None, 'Pandas not installed, skipping')