mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-03 21:44:30 +08:00
* c++20 builds working * get MolStandardize building with clang19 * get FMCS building with clang-19 * set cxx version to c++20 * remove a few more compiler warnings * bump min boost version, CI cleanup * boost 1.81 is not available from conda-forge * remove unused constants * bump linux version for CI * remove another unused variable * fix (hopefully) cartridge CI builds * simplify cartridge environment * try postgresql14 in CI * start the postgresql service * change the columns used in the pandastools nbtest * remove missed merge conflict artifact * get github4823 test to pass with numpy 2.2 * remove a compiler warning/error with g++13
1110 lines
35 KiB
C++
1110 lines
35 KiB
C++
//
|
|
// Copyright (C) 2021 Greg Landrum and other RDKit contributors
|
|
//
|
|
// @@ All Rights Reserved @@
|
|
// This file is part of the RDKit.
|
|
// The contents are covered by the terms of the BSD license
|
|
// which is included in the file license.txt, found at the root
|
|
// of the RDKit source tree.
|
|
//
|
|
|
|
#include <catch2/catch_all.hpp>
|
|
|
|
#include <GraphMol/RDKitBase.h>
|
|
|
|
#include <GraphMol/FileParsers/FileParsers.h>
|
|
#include <GraphMol/FileParsers/MolSupplier.h>
|
|
#include <GraphMol/SmilesParse/SmilesParse.h>
|
|
#include <GraphMol/SmilesParse/SmilesWrite.h>
|
|
#include <GraphMol/RGroupDecomposition/RGroupDecomp.h>
|
|
#include <GraphMol/RGroupDecomposition/RGroupUtils.h>
|
|
#include <GraphMol/RGroupDecomposition/RGroupData.h>
|
|
|
|
#include <boost/format.hpp>
|
|
#include <boost/algorithm/string.hpp>
|
|
#include <boost/algorithm/string/trim_all.hpp>
|
|
|
|
using namespace RDKit;
|
|
|
|
template <typename T>
|
|
void initDataset(T &suppl, ROMOL_SPTR &core, std::vector<ROMOL_SPTR> &mols) {
|
|
core.reset(suppl[0]);
|
|
REQUIRE(core);
|
|
for (unsigned int i = 1; i < suppl.length(); ++i) {
|
|
mols.emplace_back(suppl[i]);
|
|
REQUIRE(mols.back());
|
|
}
|
|
}
|
|
|
|
std::string flatten_whitespace(const std::string &txt) {
|
|
auto res = txt;
|
|
boost::algorithm::trim_fill_if(res, "", boost::is_any_of(" \t\r\n"));
|
|
return res;
|
|
}
|
|
|
|
std::string readReferenceData(const std::string &fname) {
|
|
std::ifstream ins(fname);
|
|
std::string res;
|
|
ins.seekg(0, std::ios::end);
|
|
res.reserve(ins.tellg());
|
|
ins.seekg(0, std::ios::beg);
|
|
res.assign((std::istreambuf_iterator<char>(ins)),
|
|
std::istreambuf_iterator<char>());
|
|
return res;
|
|
}
|
|
TEST_CASE("toJSONTests", "[unittests]") {
|
|
std::string testDataDir =
|
|
std::string(getenv("RDBASE")) +
|
|
std::string("/Code/GraphMol/RGroupDecomposition/test_data/");
|
|
std::string fName = testDataDir + "simple1.sdf";
|
|
SDMolSupplier suppl(fName);
|
|
std::vector<ROMOL_SPTR> cores(1);
|
|
std::vector<ROMOL_SPTR> mols;
|
|
initDataset(suppl, cores.front(), mols);
|
|
SECTION("rows") {
|
|
RGroupRows rows;
|
|
auto n = RGroupDecompose(cores, mols, rows);
|
|
CHECK(n == mols.size());
|
|
CHECK(rows.size() == mols.size());
|
|
std::string expected = R"JSON([
|
|
{
|
|
"Core": "Cc1cccc([*:1])c1[*:2]",
|
|
"R1": "[H][*:1]",
|
|
"R2": "CO[*:2]"
|
|
},
|
|
{
|
|
"Core": "Cc1cccc([*:1])c1[*:2]",
|
|
"R1": "[H][*:1]",
|
|
"R2": "CO[*:2]"
|
|
},
|
|
{
|
|
"Core": "Cc1cccc([*:1])c1[*:2]",
|
|
"R1": "CO[*:1]",
|
|
"R2": "[H][*:2]"
|
|
}
|
|
])JSON";
|
|
CHECK(flatten_whitespace(toJSON(rows)) == flatten_whitespace(expected));
|
|
}
|
|
SECTION("columns") {
|
|
RGroupColumns cols;
|
|
auto n = RGroupDecompose(cores, mols, cols);
|
|
CHECK(n == mols.size());
|
|
CHECK(cols.size() == mols.size());
|
|
std::string expected = R"JSON([
|
|
"Core": [
|
|
"Cc1cccc([*:1])c1[*:2]",
|
|
"Cc1cccc([*:1])c1[*:2]",
|
|
"Cc1cccc([*:1])c1[*:2]"
|
|
],
|
|
"R1": [
|
|
"[H][*:1]",
|
|
"[H][*:1]",
|
|
"CO[*:1]"
|
|
],
|
|
"R2": [
|
|
"CO[*:2]",
|
|
"CO[*:2]",
|
|
"[H][*:2]"
|
|
]
|
|
]
|
|
)JSON";
|
|
CHECK(flatten_whitespace(toJSON(cols)) == flatten_whitespace(expected));
|
|
}
|
|
}
|
|
TEST_CASE("simple1") {
|
|
std::string testDataDir =
|
|
std::string(getenv("RDBASE")) +
|
|
std::string("/Code/GraphMol/RGroupDecomposition/test_data/");
|
|
std::string fName = testDataDir + "simple1.sdf";
|
|
SDMolSupplier suppl(fName);
|
|
std::vector<ROMOL_SPTR> cores(1);
|
|
std::vector<ROMOL_SPTR> mols;
|
|
initDataset(suppl, cores.front(), mols);
|
|
SECTION("defaults") {
|
|
RGroupRows rows;
|
|
auto n = RGroupDecompose(cores, mols, rows);
|
|
CHECK(n == mols.size());
|
|
CHECK(rows.size() == mols.size());
|
|
CHECK(flatten_whitespace(toJSON(rows)) ==
|
|
flatten_whitespace(
|
|
readReferenceData(testDataDir + "simple1.out1.json")));
|
|
}
|
|
SECTION("no symmetrization") {
|
|
RGroupRows rows;
|
|
RGroupDecompositionParameters ps;
|
|
ps.matchingStrategy = RGroupMatching::NoSymmetrization;
|
|
auto n = RGroupDecompose(cores, mols, rows, nullptr, ps);
|
|
CHECK(n == mols.size());
|
|
CHECK(rows.size() == mols.size());
|
|
CHECK(flatten_whitespace(toJSON(rows)) ==
|
|
flatten_whitespace(
|
|
readReferenceData(testDataDir + "simple1.out2.json")));
|
|
}
|
|
}
|
|
|
|
TEST_CASE("simple2 with specified R groups") {
|
|
std::string testDataDir =
|
|
std::string(getenv("RDBASE")) +
|
|
std::string("/Code/GraphMol/RGroupDecomposition/test_data/");
|
|
std::string fName = testDataDir + "simple2.sdf";
|
|
SDMolSupplier suppl(fName);
|
|
std::vector<ROMOL_SPTR> cores(1);
|
|
std::vector<ROMOL_SPTR> mols;
|
|
initDataset(suppl, cores.front(), mols);
|
|
SECTION("defaults") {
|
|
RGroupRows rows;
|
|
auto n = RGroupDecompose(cores, mols, rows);
|
|
CHECK(n == mols.size());
|
|
CHECK(rows.size() == mols.size());
|
|
CHECK(flatten_whitespace(toJSON(rows)) ==
|
|
flatten_whitespace(
|
|
readReferenceData(testDataDir + "simple2.out1.json")));
|
|
}
|
|
SECTION("only match at r groups") {
|
|
RGroupRows rows;
|
|
RGroupDecompositionParameters ps;
|
|
ps.onlyMatchAtRGroups = true;
|
|
std::vector<unsigned> unmatched;
|
|
auto n = RGroupDecompose(cores, mols, rows, &unmatched, ps);
|
|
CHECK(n == 2);
|
|
CHECK(rows.size() == n);
|
|
CHECK(unmatched.size() == mols.size() - n);
|
|
CHECK(unmatched[0] == 2);
|
|
CHECK(flatten_whitespace(toJSON(rows)) ==
|
|
flatten_whitespace(
|
|
readReferenceData(testDataDir + "simple2.out2.json")));
|
|
}
|
|
}
|
|
|
|
TEST_CASE("simple3 with user labels on aromatic N") {
|
|
std::string testDataDir =
|
|
std::string(getenv("RDBASE")) +
|
|
std::string("/Code/GraphMol/RGroupDecomposition/test_data/");
|
|
std::string fName = testDataDir + "simple3.sdf";
|
|
SDMolSupplier suppl(fName);
|
|
std::vector<ROMOL_SPTR> cores(1);
|
|
std::vector<ROMOL_SPTR> mols;
|
|
initDataset(suppl, cores.front(), mols);
|
|
SECTION("defaults (allH labels and R-groups are removed)") {
|
|
RGroupRows rows;
|
|
auto n = RGroupDecompose(cores, mols, rows);
|
|
CHECK(n == mols.size());
|
|
CHECK(rows.size() == mols.size());
|
|
CHECK(flatten_whitespace(toJSON(rows)) ==
|
|
flatten_whitespace(
|
|
readReferenceData(testDataDir + "simple3.out1.json")));
|
|
}
|
|
SECTION("removeAllHydrogenRGroups = false (as defaults)") {
|
|
RGroupRows rows;
|
|
RGroupDecompositionParameters ps;
|
|
ps.removeAllHydrogenRGroups = false;
|
|
std::vector<unsigned> unmatched;
|
|
auto n = RGroupDecompose(cores, mols, rows, &unmatched, ps);
|
|
CHECK(n == mols.size());
|
|
CHECK(rows.size() == mols.size());
|
|
CHECK(unmatched.empty());
|
|
CHECK(flatten_whitespace(toJSON(rows)) ==
|
|
flatten_whitespace(
|
|
readReferenceData(testDataDir + "simple3.out2.json")));
|
|
}
|
|
SECTION("removeAllHydrogenRGroupsAndLabels = false (allH labels retained)") {
|
|
RGroupRows rows;
|
|
RGroupDecompositionParameters ps;
|
|
ps.removeAllHydrogenRGroupsAndLabels = false;
|
|
std::vector<unsigned> unmatched;
|
|
auto n = RGroupDecompose(cores, mols, rows, &unmatched, ps);
|
|
CHECK(n == mols.size());
|
|
CHECK(rows.size() == mols.size());
|
|
CHECK(unmatched.empty());
|
|
CHECK(flatten_whitespace(toJSON(rows)) ==
|
|
flatten_whitespace(
|
|
readReferenceData(testDataDir + "simple3.out3.json")));
|
|
}
|
|
SECTION(
|
|
"removeAllHydrogenRGroupsAndLabels = false, removeAllHydrogenRGroups = "
|
|
"false (allH labels and R-groups are retained)") {
|
|
RGroupRows rows;
|
|
RGroupDecompositionParameters ps;
|
|
ps.removeAllHydrogenRGroups = false;
|
|
ps.removeAllHydrogenRGroupsAndLabels = false;
|
|
std::vector<unsigned> unmatched;
|
|
auto n = RGroupDecompose(cores, mols, rows, &unmatched, ps);
|
|
CHECK(n == mols.size());
|
|
CHECK(rows.size() == mols.size());
|
|
CHECK(unmatched.empty());
|
|
CHECK(flatten_whitespace(toJSON(rows)) ==
|
|
flatten_whitespace(
|
|
readReferenceData(testDataDir + "simple3.out4.json")));
|
|
}
|
|
}
|
|
|
|
TEST_CASE("jm7b00306 Snippet") {
|
|
std::string testDataDir =
|
|
std::string(getenv("RDBASE")) +
|
|
std::string("/Code/GraphMol/RGroupDecomposition/test_data/");
|
|
std::string fName = testDataDir + "jm7b00306.excerpt.sdf";
|
|
SDMolSupplier suppl(fName);
|
|
std::vector<ROMOL_SPTR> cores(1);
|
|
std::vector<ROMOL_SPTR> mols;
|
|
initDataset(suppl, cores.front(), mols);
|
|
SECTION("defaults") {
|
|
RGroupRows rows;
|
|
std::vector<unsigned> unmatched;
|
|
auto n = RGroupDecompose(cores, mols, rows, &unmatched);
|
|
CHECK(n == mols.size() - 1);
|
|
CHECK(rows.size() == n);
|
|
// there is one structure in there that doesn't match the core
|
|
CHECK(unmatched.size() == mols.size() - n);
|
|
CHECK(unmatched[0] == 1);
|
|
CHECK(flatten_whitespace(toJSON(rows)) ==
|
|
flatten_whitespace(
|
|
readReferenceData(testDataDir + "jm7b00306.excerpt.out1.json")));
|
|
}
|
|
}
|
|
|
|
TEST_CASE("jm200186n Snippet") {
|
|
std::string testDataDir =
|
|
std::string(getenv("RDBASE")) +
|
|
std::string("/Code/GraphMol/RGroupDecomposition/test_data/");
|
|
std::string fName = testDataDir + "jm200186n.excerpt.sdf";
|
|
SDMolSupplier suppl(fName);
|
|
std::vector<ROMOL_SPTR> cores(1);
|
|
std::vector<ROMOL_SPTR> mols;
|
|
initDataset(suppl, cores.front(), mols);
|
|
SECTION("defaults") {
|
|
RGroupRows rows;
|
|
std::vector<unsigned> unmatched;
|
|
auto n = RGroupDecompose(cores, mols, rows, &unmatched);
|
|
CHECK(n == mols.size() - 1);
|
|
CHECK(rows.size() == n);
|
|
// there is one structure in there that doesn't match the core
|
|
CHECK(unmatched.size() == mols.size() - n);
|
|
CHECK(unmatched[0] == 3);
|
|
CHECK(flatten_whitespace(toJSON(rows)) ==
|
|
flatten_whitespace(
|
|
readReferenceData(testDataDir + "jm200186n.excerpt.out1.json")));
|
|
}
|
|
}
|
|
|
|
std::vector<ROMOL_SPTR> smisToMols(const std::vector<std::string> &smis) {
|
|
std::vector<ROMOL_SPTR> mols;
|
|
for (const auto &smi : smis) {
|
|
auto m = SmilesToMol(smi);
|
|
assert(m);
|
|
mols.emplace_back(m);
|
|
}
|
|
return mols;
|
|
}
|
|
|
|
TEST_CASE("substructure parameters and RGD: chirality") {
|
|
std::vector<std::string> smis = {"C1CN[C@H]1F", "C1CN[C@]1(O)F",
|
|
"C1CN[C@@H]1F", "C1CN[CH]1F"};
|
|
auto mols = smisToMols(smis);
|
|
std::vector<std::string> csmis = {"C1CNC1[*:1]"};
|
|
auto cores = smisToMols(csmis);
|
|
std::vector<std::string> csmis2 = {"C1CN[C@H]1[*:1]"};
|
|
auto chiral_cores = smisToMols(csmis2);
|
|
SECTION("defaults") {
|
|
RGroupRows rows;
|
|
std::vector<unsigned> unmatched;
|
|
RGroupDecompositionParameters params;
|
|
params.allowMultipleRGroupsOnUnlabelled = true;
|
|
{
|
|
auto n = RGroupDecompose(cores, mols, rows, &unmatched, params);
|
|
CHECK(n == mols.size());
|
|
CHECK(rows.size() == n);
|
|
CHECK(unmatched.empty());
|
|
CHECK(flatten_whitespace(toJSON(rows)) == flatten_whitespace(R"JSON(
|
|
[
|
|
{
|
|
"Core":"C1C[C@@]([*:1])([*:2])N1",
|
|
"R1":"F[*:1]",
|
|
"R2":"[H][*:2]"
|
|
},
|
|
{
|
|
"Core":"C1C[C@]([*:1])([*:2])N1",
|
|
"R1":"F[*:1]",
|
|
"R2":"O[*:2]"
|
|
},
|
|
{
|
|
"Core":"C1C[C@]([*:1])([*:2])N1",
|
|
"R1":"F[*:1]",
|
|
"R2":"[H][*:2]"
|
|
},
|
|
{
|
|
"Core":"C1CC([*:1])([*:2])N1",
|
|
"R1":"F[*:1]",
|
|
"R2":"[H][*:2]"
|
|
}
|
|
]
|
|
)JSON"));
|
|
}
|
|
{
|
|
auto n = RGroupDecompose(chiral_cores, mols, rows, &unmatched, params);
|
|
CHECK(n == mols.size() - 1);
|
|
CHECK(rows.size() == n);
|
|
CHECK(unmatched.size() == 1);
|
|
CHECK(flatten_whitespace(toJSON(rows)) == flatten_whitespace(R"JSON(
|
|
[
|
|
{
|
|
"Core":"C1C[C@@]([*:1])([*:2])N1",
|
|
"R1":"F[*:1]",
|
|
"R2":"[H][*:2]"
|
|
},
|
|
{
|
|
"Core":"C1C[C@@]([*:1])([*:2])N1",
|
|
"R1":"O[*:1]",
|
|
"R2":"F[*:2]"
|
|
},
|
|
{
|
|
"Core":"C1C[C@@]([*:1])([*:2])N1",
|
|
"R1":"[H][*:1]",
|
|
"R2":"F[*:2]"
|
|
}
|
|
]
|
|
)JSON"));
|
|
}
|
|
}
|
|
|
|
SECTION("not using chirality") {
|
|
// this time both cores return the same thing and stereo information is
|
|
// removed from the chiral cores
|
|
RGroupRows rows;
|
|
std::vector<unsigned> unmatched;
|
|
RGroupDecompositionParameters params;
|
|
params.allowMultipleRGroupsOnUnlabelled = true;
|
|
params.substructmatchParams.useChirality = false;
|
|
{
|
|
auto n = RGroupDecompose(cores, mols, rows, &unmatched, params);
|
|
CHECK(n == mols.size());
|
|
CHECK(rows.size() == n);
|
|
CHECK(unmatched.empty());
|
|
CHECK(flatten_whitespace(toJSON(rows)) == flatten_whitespace(R"JSON(
|
|
[
|
|
{
|
|
"Core":"C1CC([*:1])([*:2])N1",
|
|
"R1":"F[*:1]",
|
|
"R2":"[H][*:2]"
|
|
},
|
|
{
|
|
"Core":"C1CC([*:1])([*:2])N1",
|
|
"R1":"F[*:1]",
|
|
"R2":"O[*:2]"
|
|
},
|
|
{
|
|
"Core":"C1CC([*:1])([*:2])N1",
|
|
"R1":"F[*:1]",
|
|
"R2":"[H][*:2]"
|
|
},
|
|
{
|
|
"Core":"C1CC([*:1])([*:2])N1",
|
|
"R1":"F[*:1]",
|
|
"R2":"[H][*:2]"
|
|
}
|
|
]
|
|
)JSON"));
|
|
}
|
|
{
|
|
auto n = RGroupDecompose(chiral_cores, mols, rows, &unmatched, params);
|
|
CHECK(n == mols.size());
|
|
CHECK(rows.size() == n);
|
|
CHECK(unmatched.empty());
|
|
CHECK(flatten_whitespace(toJSON(rows)) == flatten_whitespace(R"JSON(
|
|
[
|
|
{
|
|
"Core":"C1CC([*:1])([*:2])N1",
|
|
"R1":"F[*:1]",
|
|
"R2":"[H][*:2]"
|
|
},
|
|
{
|
|
"Core":"C1CC([*:1])([*:2])N1",
|
|
"R1":"F[*:1]",
|
|
"R2":"O[*:2]"
|
|
},
|
|
{
|
|
"Core":"C1CC([*:1])([*:2])N1",
|
|
"R1":"F[*:1]",
|
|
"R2":"[H][*:2]"
|
|
},
|
|
{
|
|
"Core":"C1CC([*:1])([*:2])N1",
|
|
"R1":"F[*:1]",
|
|
"R2":"[H][*:2]"
|
|
}
|
|
]
|
|
)JSON"));
|
|
}
|
|
}
|
|
}
|
|
|
|
TEST_CASE("substructure parameters and RGD: enhanced stereo") {
|
|
std::vector<std::string> smis = {"F[C@H]1CCN1 |&1:1|", "C1CN[C@]1(O)F |&1:3|",
|
|
"C1CN[C@@H]1F |&1:3|", "Cl[C@H]1CCN1 |o1:1|",
|
|
"C1CN[CH]1F"};
|
|
auto mols = smisToMols(smis);
|
|
std::vector<std::string> csmis = {"C1CN[C@H]1[*:1] |&1:3|"};
|
|
auto cores = smisToMols(csmis);
|
|
std::vector<std::string> csmis2 = {"C1CN[C@H]1[*:1] |o1:3|"};
|
|
auto cores2 = smisToMols(csmis2);
|
|
SECTION("defaults: no enhanced stereo") {
|
|
RGroupRows rows;
|
|
std::vector<unsigned> unmatched;
|
|
RGroupDecompositionParameters params;
|
|
params.allowMultipleRGroupsOnUnlabelled = true;
|
|
{
|
|
auto n = RGroupDecompose(cores, mols, rows, &unmatched, params);
|
|
CHECK(n == mols.size() - 1);
|
|
CHECK(rows.size() == n);
|
|
CHECK(unmatched.size() == mols.size() - n);
|
|
// std::cerr << toJSON(rows) << std::endl;
|
|
|
|
// the core output no longer is SMARTS as the core output is the portion
|
|
// of the target that matches the core query.
|
|
CHECK(flatten_whitespace(toJSON(rows)) == flatten_whitespace(R"JSON(
|
|
[
|
|
{
|
|
"Core":"C1C[C@@]([*:1])([*:2])N1",
|
|
"R1":"F[*:1]",
|
|
"R2":"[H][*:2]"
|
|
},
|
|
{
|
|
"Core":"C1C[C@@]([*:1])([*:2])N1",
|
|
"R1":"O[*:1]",
|
|
"R2":"F[*:2]"
|
|
},
|
|
{
|
|
"Core":"C1C[C@@]([*:1])([*:2])N1",
|
|
"R1":"[H][*:1]",
|
|
"R2":"F[*:2]"
|
|
},
|
|
{
|
|
"Core":"C1C[C@@]([*:1])([*:2])N1",
|
|
"R1":"Cl[*:1]",
|
|
"R2":"[H][*:2]"
|
|
}
|
|
]
|
|
)JSON"));
|
|
}
|
|
{
|
|
auto n = RGroupDecompose(cores2, mols, rows, &unmatched, params);
|
|
CHECK(n == mols.size() - 1);
|
|
CHECK(rows.size() == n);
|
|
CHECK(unmatched.size() == 1);
|
|
// std::cerr << toJSON(rows) << std::endl;
|
|
|
|
// the core output no longer is SMARTS as the core output is the portion
|
|
// of the target that matches the core query.
|
|
CHECK(flatten_whitespace(toJSON(rows)) == flatten_whitespace(R"JSON(
|
|
[
|
|
{
|
|
"Core":"C1C[C@@]([*:1])([*:2])N1",
|
|
"R1":"F[*:1]",
|
|
"R2":"[H][*:2]"
|
|
},
|
|
{
|
|
"Core":"C1C[C@@]([*:1])([*:2])N1",
|
|
"R1":"O[*:1]",
|
|
"R2":"F[*:2]"
|
|
},
|
|
{
|
|
"Core":"C1C[C@@]([*:1])([*:2])N1",
|
|
"R1":"[H][*:1]",
|
|
"R2":"F[*:2]"
|
|
},
|
|
{
|
|
"Core":"C1C[C@@]([*:1])([*:2])N1",
|
|
"R1":"Cl[*:1]",
|
|
"R2":"[H][*:2]"
|
|
}
|
|
]
|
|
)JSON"));
|
|
}
|
|
}
|
|
|
|
SECTION("using enhanced stereo") {
|
|
RGroupRows rows;
|
|
std::vector<unsigned> unmatched;
|
|
RGroupDecompositionParameters params;
|
|
params.allowMultipleRGroupsOnUnlabelled = true;
|
|
params.substructmatchParams.useEnhancedStereo = true;
|
|
{
|
|
auto n = RGroupDecompose(cores, mols, rows, &unmatched, params);
|
|
CHECK(n == mols.size() - 2);
|
|
CHECK(rows.size() == n);
|
|
CHECK(unmatched.size() == mols.size() - n);
|
|
// std::cerr << toJSON(rows) << std::endl;
|
|
// the core output no longer is SMARTS as the core output is the portion
|
|
// of the target that matches the core query.
|
|
CHECK(flatten_whitespace(toJSON(rows)) == flatten_whitespace(R"JSON(
|
|
[
|
|
{
|
|
"Core":"C1C[C@@]([*:1])([*:2])N1",
|
|
"R1":"F[*:1]",
|
|
"R2":"[H][*:2]"
|
|
},
|
|
{
|
|
"Core":"C1C[C@]([*:1])([*:2])N1",
|
|
"R1":"F[*:1]",
|
|
"R2":"O[*:2]"
|
|
},
|
|
{
|
|
"Core":"C1C[C@]([*:1])([*:2])N1",
|
|
"R1":"F[*:1]",
|
|
"R2":"[H][*:2]"
|
|
}
|
|
]
|
|
)JSON"));
|
|
}
|
|
{
|
|
auto n = RGroupDecompose(cores2, mols, rows, &unmatched, params);
|
|
CHECK(n == mols.size() - 1);
|
|
CHECK(rows.size() == n);
|
|
CHECK(unmatched.size() == 1);
|
|
// std::cerr << toJSON(rows) << std::endl;
|
|
// the core output no longer is SMARTS as the core output is the portion
|
|
// of the target that matches the core query.
|
|
CHECK(flatten_whitespace(toJSON(rows)) == flatten_whitespace(R"JSON(
|
|
[
|
|
{
|
|
"Core":"C1C[C@@]([*:1])([*:2])N1",
|
|
"R1":"F[*:1]",
|
|
"R2":"[H][*:2]"
|
|
},
|
|
{
|
|
"Core":"C1C[C@]([*:1])([*:2])N1",
|
|
"R1":"F[*:1]",
|
|
"R2":"O[*:2]"
|
|
},
|
|
{
|
|
"Core":"C1C[C@]([*:1])([*:2])N1",
|
|
"R1":"F[*:1]",
|
|
"R2":"[H][*:2]"
|
|
},
|
|
{
|
|
"Core":"C1C[C@@]([*:1])([*:2])N1",
|
|
"R1":"Cl[*:1]",
|
|
"R2":"[H][*:2]"
|
|
}
|
|
]
|
|
)JSON"));
|
|
}
|
|
}
|
|
}
|
|
|
|
TEST_CASE("github4809: ring double bonds written as crossed bonds after RGD") {
|
|
std::vector<std::string> smis = {"C1C=CCC2=C1C=CC=N2"};
|
|
auto mols = smisToMols(smis);
|
|
std::vector<std::string> csmis = {"c1ccnc([*:1])c1[*:2]"};
|
|
auto cores = smisToMols(csmis);
|
|
SECTION("basics") {
|
|
RGroupRows rows;
|
|
{
|
|
auto n = RGroupDecompose(cores, mols, rows);
|
|
CHECK(n == mols.size());
|
|
CHECK(rows.size() == n);
|
|
auto r1 = rows[0]["R1"];
|
|
auto mb = MolToV3KMolBlock(*r1);
|
|
CHECK(mb.find("CFG=2") == std::string::npos);
|
|
}
|
|
}
|
|
}
|
|
|
|
TEST_CASE("rgroupLabelling") {
|
|
std::vector<std::string> smis = {"C1CN[C@H]1F", "C1CN[C@]1(O)F",
|
|
"C1CN[C@@H]1F", "C1CN[CH]1F"};
|
|
auto mols = smisToMols(smis);
|
|
std::vector<std::string> csmis = {"C1CNC1[*:1]"};
|
|
auto cores = smisToMols(csmis);
|
|
SECTION("Isotope") {
|
|
RGroupRows rows;
|
|
std::vector<unsigned> unmatched;
|
|
RGroupDecompositionParameters params;
|
|
params.rgroupLabelling = RGroupLabelling::Isotope;
|
|
params.allowMultipleRGroupsOnUnlabelled = true;
|
|
{
|
|
auto n = RGroupDecompose(cores, mols, rows, &unmatched, params);
|
|
CHECK(n == mols.size());
|
|
CHECK(rows.size() == n);
|
|
CHECK(unmatched.empty());
|
|
CHECK(flatten_whitespace(toJSON(rows)) == flatten_whitespace(R"JSON(
|
|
[
|
|
{
|
|
"Core": "[1*][C@@]1([2*])CCN1",
|
|
"R1":"[1*]F",
|
|
"R2":"[2*][H]"
|
|
},
|
|
{
|
|
"Core": "[1*][C@]1([2*])CCN1",
|
|
"R1":"[1*]F",
|
|
"R2":"[2*]O"
|
|
},
|
|
{
|
|
"Core":"[1*][C@]1([2*])CCN1",
|
|
"R1":"[1*]F",
|
|
"R2":"[2*][H]"
|
|
},
|
|
{
|
|
"Core":"[1*]C1([2*])CCN1",
|
|
"R1":"[1*]F",
|
|
"R2":"[2*][H]"
|
|
}
|
|
]
|
|
)JSON"));
|
|
}
|
|
}
|
|
SECTION("RGroup") {
|
|
RGroupRows rows;
|
|
std::vector<unsigned> unmatched;
|
|
RGroupDecompositionParameters params;
|
|
params.rgroupLabelling = RGroupLabelling::MDLRGroup;
|
|
params.allowMultipleRGroupsOnUnlabelled = true;
|
|
{
|
|
auto n = RGroupDecompose(cores, mols, rows, &unmatched, params);
|
|
CHECK(n == mols.size());
|
|
CHECK(rows.size() == n);
|
|
CHECK(unmatched.empty());
|
|
// in this case the labels don't show up in the output SMILES
|
|
// Presumably the dummy atoms are no longer distinguishable without
|
|
// the isotope labels as the smiles no longer contains chiralty.
|
|
// Chirality is present in the core SMARTS
|
|
CHECK(flatten_whitespace(toJSON(rows)) == flatten_whitespace(R"JSON(
|
|
[
|
|
{
|
|
"Core":"*C1(*)CCN1",
|
|
"R1":"*F",
|
|
"R2":"*[H]"
|
|
},
|
|
{
|
|
"Core":"*C1(*)CCN1",
|
|
"R1":"*F",
|
|
"R2":"*O"
|
|
},
|
|
{
|
|
"Core":"*C1(*)CCN1",
|
|
"R1":"*F",
|
|
"R2":"*[H]"
|
|
},
|
|
{
|
|
"Core":"*C1(*)CCN1",
|
|
"R1":"*F",
|
|
"R2":"*[H]"
|
|
}
|
|
]
|
|
)JSON"));
|
|
}
|
|
}
|
|
SECTION("Isotope|Map") {
|
|
RGroupRows rows;
|
|
std::vector<unsigned> unmatched;
|
|
RGroupDecompositionParameters params;
|
|
params.allowMultipleRGroupsOnUnlabelled = true;
|
|
params.rgroupLabelling =
|
|
RGroupLabelling::Isotope | RGroupLabelling::AtomMap;
|
|
{
|
|
auto n = RGroupDecompose(cores, mols, rows, &unmatched, params);
|
|
CHECK(n == mols.size());
|
|
CHECK(rows.size() == n);
|
|
CHECK(unmatched.empty());
|
|
CHECK(flatten_whitespace(toJSON(rows)) == flatten_whitespace(R"JSON(
|
|
|
|
[
|
|
{
|
|
"Core":"C1C[C@@]([1*:1])([2*:2])N1",
|
|
"R1":"F[1*:1]",
|
|
"R2":"[H][2*:2]"
|
|
},
|
|
{
|
|
"Core":"C1C[C@]([1*:1])([2*:2])N1",
|
|
"R1":"F[1*:1]",
|
|
"R2":"O[2*:2]"
|
|
},
|
|
{
|
|
"Core":"C1C[C@]([1*:1])([2*:2])N1",
|
|
"R1":"F[1*:1]",
|
|
"R2":"[H][2*:2]"
|
|
},
|
|
{
|
|
"Core":"C1CC([1*:1])([2*:2])N1",
|
|
"R1":"F[1*:1]",
|
|
"R2":"[H][2*:2]"
|
|
}
|
|
]
|
|
)JSON"));
|
|
}
|
|
}
|
|
}
|
|
|
|
TEST_CASE("MDL R labels from original core") {
|
|
std::vector<std::string> smis = {"C1CN[C@H]1F", "C1CN[C@]1(O)F",
|
|
"C1CN[C@@H]1F", "C1CN[CH]1F"};
|
|
auto mols = smisToMols(smis);
|
|
std::vector<std::string> csmis = {"[*]C1CCN1 |$_R1;;;;$|"};
|
|
auto cores = smisToMols(csmis);
|
|
SECTION("Map") {
|
|
RGroupRows rows;
|
|
std::vector<unsigned> unmatched;
|
|
RGroupDecompositionParameters params;
|
|
params.allowMultipleRGroupsOnUnlabelled = true;
|
|
params.rgroupLabelling = RGroupLabelling::AtomMap;
|
|
{
|
|
auto n = RGroupDecompose(cores, mols, rows, &unmatched, params);
|
|
CHECK(n == mols.size());
|
|
CHECK(rows.size() == n);
|
|
CHECK(unmatched.empty());
|
|
CHECK(rows[0]["Core"]->getAtomWithIdx(4)->getAtomicNum() == 0);
|
|
CHECK(!rows[0]["Core"]->getAtomWithIdx(4)->hasProp(
|
|
common_properties::dummyLabel));
|
|
CHECK(rows[0]["Core"]->getAtomWithIdx(5)->getAtomicNum() == 0);
|
|
CHECK(!rows[0]["Core"]->getAtomWithIdx(5)->hasProp(
|
|
common_properties::dummyLabel));
|
|
}
|
|
}
|
|
SECTION("Map | MDL") {
|
|
RGroupRows rows;
|
|
std::vector<unsigned> unmatched;
|
|
RGroupDecompositionParameters params;
|
|
params.allowMultipleRGroupsOnUnlabelled = true;
|
|
params.rgroupLabelling =
|
|
RGroupLabelling::AtomMap | RGroupLabelling::MDLRGroup;
|
|
{
|
|
auto n = RGroupDecompose(cores, mols, rows, &unmatched, params);
|
|
CHECK(n == mols.size());
|
|
CHECK(rows.size() == n);
|
|
CHECK(unmatched.empty());
|
|
CHECK(rows[0]["Core"]->getAtomWithIdx(4)->getAtomicNum() == 0);
|
|
CHECK(rows[0]["Core"]->getAtomWithIdx(4)->hasProp(
|
|
common_properties::dummyLabel));
|
|
CHECK(rows[0]["Core"]->getAtomWithIdx(5)->getAtomicNum() == 0);
|
|
CHECK(rows[0]["Core"]->getAtomWithIdx(5)->hasProp(
|
|
common_properties::dummyLabel));
|
|
}
|
|
}
|
|
}
|
|
|
|
TEST_CASE("Mol matches core") {
|
|
auto core = "[*:1]c1[!#1]([*:2])cc([*:3])n([*:4])c(=O)1"_smarts;
|
|
auto cmol = "Clc1c(C)cc(F)n(CC)c(=O)1"_smiles;
|
|
auto nmol = "Clc1ncc(F)n(CC)c(=O)1"_smiles;
|
|
auto smol = "Clc1ncc(F)n(CC)c(=S)1"_smiles;
|
|
RGroupDecompositionParameters params;
|
|
params.onlyMatchAtRGroups = true;
|
|
RGroupDecomposition decomp(*core, params);
|
|
CHECK(decomp.getMatchingCoreIdx(*cmol) == 0);
|
|
CHECK(decomp.getMatchingCoreIdx(*nmol) == 0);
|
|
CHECK(decomp.getMatchingCoreIdx(*smol) == -1);
|
|
std::vector<MatchVectType> matches;
|
|
CHECK(decomp.getMatchingCoreIdx(*cmol, &matches) == 0);
|
|
CHECK(matches.size() == 1);
|
|
CHECK(matches.front().size() == core->getNumAtoms());
|
|
CHECK(decomp.getMatchingCoreIdx(*nmol, &matches) == 0);
|
|
CHECK(matches.size() == 1);
|
|
CHECK(matches.front().size() == core->getNumAtoms() - 1);
|
|
CHECK(decomp.getMatchingCoreIdx(*smol, &matches) == -1);
|
|
CHECK(matches.empty());
|
|
MolOps::addHs(*cmol);
|
|
MolOps::addHs(*nmol);
|
|
MatchVectType match;
|
|
CHECK(SubstructMatch(*cmol, *core, match));
|
|
CHECK(match.size() == core->getNumAtoms());
|
|
match.clear();
|
|
CHECK(!SubstructMatch(*nmol, *core, match));
|
|
}
|
|
|
|
TEST_CASE("relabelMappedDummies") {
|
|
SmilesWriteParams p;
|
|
p.canonical = false;
|
|
auto allDifferentCore = R"CTAB(
|
|
RDKit 2D
|
|
|
|
8 8 0 0 0 0 0 0 0 0999 V2000
|
|
1.0808 -0.8772 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
|
|
1.0827 0.1228 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
|
|
0.2177 0.6246 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
|
|
0.2198 1.6246 0.0000 R# 0 0 0 0 0 15 0 0 0 4 0 0
|
|
-0.6493 0.1262 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
|
|
-1.5142 0.6280 0.0000 R# 0 0 0 0 0 15 0 0 0 3 0 0
|
|
-0.6513 -0.8736 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
|
|
0.2137 -1.3754 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0
|
|
1 2 2 0
|
|
2 3 1 0
|
|
3 4 1 0
|
|
3 5 2 0
|
|
5 6 1 0
|
|
5 7 1 0
|
|
7 8 2 0
|
|
8 1 1 0
|
|
M RGP 2 4 2 6 1
|
|
M END
|
|
)CTAB"_ctab;
|
|
allDifferentCore->removeConformer(0);
|
|
allDifferentCore->getAtomWithIdx(3)->setIsotope(6);
|
|
allDifferentCore->getAtomWithIdx(5)->setIsotope(5);
|
|
CHECK(
|
|
MolToCXSmiles(*allDifferentCore, p) ==
|
|
"c1cc([6*:4])c([5*:3])cn1 |atomProp:3.dummyLabel.R2:3.molAtomMapNumber.4:5.dummyLabel.R1:5.molAtomMapNumber.3|");
|
|
SECTION("AtomMap in, MDLRGroup out") {
|
|
auto core = "c1cc([*:2])c([*:1])cn1"_smiles;
|
|
CHECK(
|
|
MolToCXSmiles(*core, p) ==
|
|
"c1cc([*:2])c([*:1])cn1 |atomProp:3.dummyLabel.*:3.molAtomMapNumber.2:5.dummyLabel.*:5.molAtomMapNumber.1|");
|
|
relabelMappedDummies(*core);
|
|
CHECK(MolToCXSmiles(*core, p) ==
|
|
"c1cc(*)c(*)cn1 |atomProp:3.dummyLabel.R2:5.dummyLabel.R1|");
|
|
}
|
|
SECTION("Isotope in, MDLRGroup out") {
|
|
auto core = "c1cc([2*])c([1*])cn1"_smiles;
|
|
CHECK(MolToCXSmiles(*core, p) ==
|
|
"c1cc([2*])c([1*])cn1 |atomProp:3.dummyLabel.*:5.dummyLabel.*|");
|
|
relabelMappedDummies(*core);
|
|
CHECK(MolToCXSmiles(*core, p) ==
|
|
"c1cc(*)c(*)cn1 |atomProp:3.dummyLabel.R2:5.dummyLabel.R1|");
|
|
}
|
|
SECTION("MDLRGroup in, MDLRGroup out") {
|
|
auto core = R"CTAB(
|
|
RDKit 2D
|
|
|
|
8 8 0 0 0 0 0 0 0 0999 V2000
|
|
1.0808 -0.8772 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
|
|
1.0827 0.1228 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
|
|
0.2177 0.6246 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
|
|
0.2198 1.6246 0.0000 R# 0 0 0 0 0 1 0 0 0 0 0 0
|
|
-0.6493 0.1262 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
|
|
-1.5142 0.6280 0.0000 R# 0 0 0 0 0 1 0 0 0 0 0 0
|
|
-0.6513 -0.8736 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
|
|
0.2137 -1.3754 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0
|
|
1 2 2 0
|
|
2 3 1 0
|
|
3 4 1 0
|
|
3 5 2 0
|
|
5 6 1 0
|
|
5 7 1 0
|
|
7 8 2 0
|
|
8 1 1 0
|
|
M RGP 2 4 2 6 1
|
|
M END
|
|
)CTAB"_ctab;
|
|
core->removeConformer(0);
|
|
CHECK(MolToCXSmiles(*core, p) ==
|
|
"c1cc([2*])c([1*])cn1 |atomProp:3.dummyLabel.R2:5.dummyLabel.R1|");
|
|
relabelMappedDummies(*core);
|
|
CHECK(MolToCXSmiles(*core, p) ==
|
|
"c1cc(*)c(*)cn1 |atomProp:3.dummyLabel.R2:5.dummyLabel.R1|");
|
|
}
|
|
SECTION("AtomMap and Isotope in, MDLRGroup out - AtomMap has priority") {
|
|
auto core = "c1cc([4*:2])c([3*:1])cn1"_smiles;
|
|
CHECK(
|
|
MolToCXSmiles(*core, p) ==
|
|
"c1cc([4*:2])c([3*:1])cn1 |atomProp:3.dummyLabel.*:3.molAtomMapNumber.2:5.dummyLabel.*:5.molAtomMapNumber.1|");
|
|
relabelMappedDummies(*core);
|
|
CHECK(MolToCXSmiles(*core, p) ==
|
|
"c1cc(*)c(*)cn1 |atomProp:3.dummyLabel.R2:5.dummyLabel.R1|");
|
|
}
|
|
SECTION("AtomMap and Isotope in, MDLRGroup out - force Isotope priority") {
|
|
auto core = "c1cc([4*:2])c([3*:1])cn1"_smiles;
|
|
CHECK(
|
|
MolToCXSmiles(*core, p) ==
|
|
"c1cc([4*:2])c([3*:1])cn1 |atomProp:3.dummyLabel.*:3.molAtomMapNumber.2:5.dummyLabel.*:5.molAtomMapNumber.1|");
|
|
relabelMappedDummies(*core, Isotope);
|
|
CHECK(MolToCXSmiles(*core, p) ==
|
|
"c1cc(*)c(*)cn1 |atomProp:3.dummyLabel.R4:5.dummyLabel.R3|");
|
|
}
|
|
SECTION(
|
|
"AtomMap, Isotope and MDLRGroup in, MDLRGroup out - AtomMap has priority") {
|
|
ROMol core(*allDifferentCore);
|
|
relabelMappedDummies(core);
|
|
CHECK(MolToCXSmiles(core, p) ==
|
|
"c1cc(*)c(*)cn1 |atomProp:3.dummyLabel.R4:5.dummyLabel.R3|");
|
|
}
|
|
SECTION(
|
|
"AtomMap, Isotope and MDLRGroup in, MDLRGroup out - force Isotope priority") {
|
|
ROMol core(*allDifferentCore);
|
|
relabelMappedDummies(core, Isotope);
|
|
CHECK(MolToCXSmiles(core, p) ==
|
|
"c1cc(*)c(*)cn1 |atomProp:3.dummyLabel.R6:5.dummyLabel.R5|");
|
|
}
|
|
SECTION(
|
|
"AtomMap, Isotope and MDLRGroup in, MDLRGroup out - force MDLRGroup priority") {
|
|
ROMol core(*allDifferentCore);
|
|
relabelMappedDummies(core, MDLRGroup);
|
|
CHECK(MolToCXSmiles(core, p) ==
|
|
"c1cc(*)c(*)cn1 |atomProp:3.dummyLabel.R2:5.dummyLabel.R1|");
|
|
}
|
|
SECTION(
|
|
"AtomMap, Isotope and MDLRGroup in, AtomMap out - AtomMap has priority") {
|
|
ROMol core(*allDifferentCore);
|
|
relabelMappedDummies(core, AtomMap | Isotope | MDLRGroup, AtomMap);
|
|
CHECK(
|
|
MolToCXSmiles(core, p) ==
|
|
"c1cc([*:4])c([*:3])cn1 |atomProp:3.molAtomMapNumber.4:5.molAtomMapNumber.3|");
|
|
}
|
|
SECTION(
|
|
"AtomMap, Isotope and MDLRGroup in, Isotope out - AtomMap has priority") {
|
|
ROMol core(*allDifferentCore);
|
|
relabelMappedDummies(core, AtomMap | Isotope | MDLRGroup, Isotope);
|
|
CHECK(MolToCXSmiles(core, p) == "c1cc([4*])c([3*])cn1");
|
|
}
|
|
SECTION(
|
|
"AtomMap, Isotope and MDLRGroup in, AtomMap out - Isotope has priority") {
|
|
ROMol core(*allDifferentCore);
|
|
relabelMappedDummies(core, Isotope | MDLRGroup, AtomMap);
|
|
CHECK(
|
|
MolToCXSmiles(core, p) ==
|
|
"c1cc([*:6])c([*:5])cn1 |atomProp:3.molAtomMapNumber.6:5.molAtomMapNumber.5|");
|
|
}
|
|
SECTION(
|
|
"AtomMap, Isotope and MDLRGroup in, Isotope out - Isotope has priority") {
|
|
ROMol core(*allDifferentCore);
|
|
relabelMappedDummies(core, Isotope | MDLRGroup, Isotope);
|
|
CHECK(MolToCXSmiles(core, p) == "c1cc([6*])c([5*])cn1");
|
|
}
|
|
SECTION(
|
|
"AtomMap, Isotope and MDLRGroup in, AtomMap out - MDLRGroup has priority") {
|
|
ROMol core(*allDifferentCore);
|
|
relabelMappedDummies(core, MDLRGroup, AtomMap);
|
|
CHECK(
|
|
MolToCXSmiles(core, p) ==
|
|
"c1cc([*:2])c([*:1])cn1 |atomProp:3.molAtomMapNumber.2:5.molAtomMapNumber.1|");
|
|
}
|
|
SECTION(
|
|
"AtomMap, Isotope and MDLRGroup in, Isotope out - MDLRGroup has priority") {
|
|
ROMol core(*allDifferentCore);
|
|
relabelMappedDummies(core, MDLRGroup, Isotope);
|
|
CHECK(MolToCXSmiles(core, p) == "c1cc([2*])c([1*])cn1");
|
|
}
|
|
}
|
|
|
|
TEST_CASE("includeTargetMolInResults") {
|
|
auto core =
|
|
"c1cc(-c2c([*:1])nn3nc([*:2])ccc23)nc(N(c2ccc([*:4])c([*:3])c2))n1"_smiles;
|
|
REQUIRE(core);
|
|
std::vector<ROMOL_SPTR> mols{
|
|
"Cc1ccc2c(c3ccnc(Nc4cccc(c4)C(F)(F)F)n3)c(nn2n1)c5ccc(F)cc5"_smiles,
|
|
"Cc1ccc2c(c3ccnc(Nc4ccc(F)c(F)c4)n3)c(nn2n1)c5ccc(F)cc5"_smiles,
|
|
"Cc1ccc2c(c3ccnc(Nc4ccc5OCCOc5c4)n3)c(nn2n1)c6ccc(F)cc6"_smiles,
|
|
"Cc1ccc2c(c3ccnc(Nc4ccc(Cl)c(c4)C(F)(F)F)n3)c(nn2n1)c5ccc(F)cc5"_smiles,
|
|
"C1CC1c2nn3ncccc3c2c4ccnc(Nc5ccccc5)n4"_smiles,
|
|
"Fc1ccc(Nc2nccc(n2)c3c(nn4ncccc34)C5CC5)cc1F"_smiles,
|
|
"C1CCC(CC1)c2nn3ncccc3c2c4ccnc(Nc5ccccc5)n4"_smiles,
|
|
"Fc1ccc(Nc2nccc(n2)c3c(nn4ncccc34)C5CCCCC5)cc1F"_smiles,
|
|
"COCCOc1cnn2ncc(c3ccnc(Nc4cccc(OC)c4)n3)c2c1"_smiles,
|
|
"Cc1ccc2c(c3ccnc(Nc4ccc(F)c(F)c4)n3)c(nn2n1)c5ccccc5"_smiles,
|
|
"Cc1ccc2c(c3ccnc(Nc4ccc(Cl)c(c4)C(F)(F)F)n3)c(nn2n1)c5ccccc5"_smiles,
|
|
"Cc1ccc2c(c3ccnc(Nc4ccc5OCCOc5c4)n3)c(nn2n1)c6ccccc6"_smiles,
|
|
"Cc1ccc2c(c3ccnc(Nc4ccccc4)n3)c(nn2n1)c5cccc(c5)C(F)(F)F"_smiles,
|
|
"Cc1ccc2c(c3ccnc(Nc4ccc(F)c(F)c4)n3)c(nn2n1)c5cccc(c5)C(F)(F)F"_smiles,
|
|
"Cc1ccc2c(c3ccnc(Nc4ccc(Cl)c(c4)C(F)(F)F)n3)c(nn2n1)c5cccc(c5)C(F)(F)F"_smiles,
|
|
"Cc1ccc2c(c3ccnc(Nc4ccc5OCCOc5c4)n3)c(nn2n1)c6cccc(c6)C(F)(F)F"_smiles,
|
|
};
|
|
bool areMolsNonNull = std::all_of(mols.begin(), mols.end(),
|
|
[](const auto &mol) { return mol; });
|
|
REQUIRE(areMolsNonNull);
|
|
RGroupDecompositionParameters ps;
|
|
ps.includeTargetMolInResults = true;
|
|
RGroupDecomposition rgd(*core, ps);
|
|
for (const auto &mol : mols) {
|
|
CHECK(rgd.add(*mol) != -1);
|
|
}
|
|
REQUIRE(rgd.process());
|
|
auto checkRow = [](const RGroupRow &row) {
|
|
ROMOL_SPTR targetMol;
|
|
// These are sets of int vectors rather just plain int vectors
|
|
// because there can be cyclic R groups with 2 attachment points
|
|
// in that case it is OK for 2 R groups to have exactly the same
|
|
// target atom and bond indices
|
|
std::set<std::vector<int>> allAtomIndices;
|
|
std::set<std::vector<int>> allBondIndices;
|
|
for (const auto &pair : row) {
|
|
if (pair.first == RGroupData::getMolLabel()) {
|
|
targetMol = pair.second;
|
|
} else {
|
|
auto atoms = pair.second->atoms();
|
|
unsigned int numNonRAtoms =
|
|
std::count_if(atoms.begin(), atoms.end(), [](const auto &atom) {
|
|
return atom->getAtomicNum() > 0 || !atom->getAtomMapNum();
|
|
});
|
|
CHECK(pair.second->getNumAtoms() > numNonRAtoms);
|
|
unsigned int numBonds = 0;
|
|
if (pair.first == RGroupData::getCoreLabel()) {
|
|
auto bonds = pair.second->bonds();
|
|
numBonds =
|
|
std::count_if(bonds.begin(), bonds.end(), [](const auto &bond) {
|
|
return (bond->getBeginAtom()->getAtomicNum() > 0 ||
|
|
!bond->getBeginAtom()->getAtomMapNum()) &&
|
|
(bond->getEndAtom()->getAtomicNum() > 0 ||
|
|
!bond->getEndAtom()->getAtomMapNum());
|
|
});
|
|
} else {
|
|
numBonds = pair.second->getNumBonds();
|
|
}
|
|
std::vector<int> atomIndices;
|
|
std::vector<int> bondIndices;
|
|
CHECK(pair.second->getPropIfPresent(
|
|
common_properties::_rgroupTargetAtoms, atomIndices));
|
|
CHECK(pair.second->getPropIfPresent(
|
|
common_properties::_rgroupTargetBonds, bondIndices));
|
|
CHECK(atomIndices.size() == numNonRAtoms);
|
|
allAtomIndices.insert(atomIndices);
|
|
CHECK(bondIndices.size() == numBonds);
|
|
allBondIndices.insert(bondIndices);
|
|
}
|
|
}
|
|
REQUIRE(targetMol);
|
|
auto flattenedAtomIndices = std::accumulate(
|
|
allAtomIndices.begin(), allAtomIndices.end(), std::vector<int>{},
|
|
[](std::vector<int> acc, const std::vector<int> &v) {
|
|
acc.insert(acc.end(), std::make_move_iterator(v.begin()),
|
|
std::make_move_iterator(v.end()));
|
|
return acc;
|
|
});
|
|
auto uniqueAtomIndices = std::accumulate(
|
|
allAtomIndices.begin(), allAtomIndices.end(), std::set<int>{},
|
|
[](std::set<int> acc, const std::vector<int> &v) {
|
|
acc.insert(std::make_move_iterator(v.begin()),
|
|
std::make_move_iterator(v.end()));
|
|
return acc;
|
|
});
|
|
CHECK(flattenedAtomIndices.size() == uniqueAtomIndices.size());
|
|
CHECK(flattenedAtomIndices.size() == targetMol->getNumAtoms());
|
|
auto flattenedBondIndices = std::accumulate(
|
|
allBondIndices.begin(), allBondIndices.end(), std::vector<int>{},
|
|
[](std::vector<int> acc, const std::vector<int> &v) {
|
|
acc.insert(acc.end(), std::make_move_iterator(v.begin()),
|
|
std::make_move_iterator(v.end()));
|
|
return acc;
|
|
});
|
|
auto uniqueBondIndices = std::accumulate(
|
|
allBondIndices.begin(), allBondIndices.end(), std::set<int>{},
|
|
[](std::set<int> acc, const std::vector<int> &v) {
|
|
acc.insert(std::make_move_iterator(v.begin()),
|
|
std::make_move_iterator(v.end()));
|
|
return acc;
|
|
});
|
|
CHECK(flattenedBondIndices.size() == uniqueBondIndices.size());
|
|
CHECK(flattenedBondIndices.size() == targetMol->getNumBonds());
|
|
};
|
|
SECTION("rows") {
|
|
auto rows = rgd.getRGroupsAsRows();
|
|
CHECK(rows.size() == mols.size());
|
|
for (const auto &row : rows) {
|
|
checkRow(row);
|
|
}
|
|
}
|
|
SECTION("columns") {
|
|
auto cols = rgd.getRGroupsAsColumns();
|
|
RGroupRows rows;
|
|
rows.reserve(mols.size());
|
|
for (size_t i = 0; i < mols.size(); ++i) {
|
|
RGroupRow row;
|
|
for (const auto &pair : cols) {
|
|
CHECK(pair.second.size() == mols.size());
|
|
row.emplace(pair.first, pair.second.at(i));
|
|
}
|
|
rows.push_back(std::move(row));
|
|
}
|
|
CHECK(rows.size() == mols.size());
|
|
for (const auto &row : rows) {
|
|
checkRow(row);
|
|
}
|
|
}
|
|
}
|