mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-04 21:54:27 +08:00
* fix handling double bond stereo extraction * add tests * Update Code/GraphMol/Subset.cpp Co-authored-by: Greg Landrum <greg.landrum@gmail.com> --------- Co-authored-by: Greg Landrum <greg.landrum@gmail.com>
879 lines
28 KiB
C++
879 lines
28 KiB
C++
//
|
|
// Copyright (C) 2024 Greg Landrum and other RDKit contributors
|
|
//
|
|
// @@ All Rights Reserved @@
|
|
// This file is part of the RDKit.
|
|
// The contents are covered by the terms of the BSD license
|
|
// which is included in the file license.txt, found at the root
|
|
// of the RDKit source tree.
|
|
//
|
|
|
|
#include <catch2/catch_all.hpp>
|
|
|
|
#include <GraphMol/RDKitBase.h>
|
|
#include <GraphMol/MolOps.h>
|
|
#include <GraphMol/Subset.h>
|
|
#include <GraphMol/test_fixtures.h>
|
|
|
|
#include <GraphMol/FileParsers/FileParsers.h>
|
|
#include <GraphMol/SmilesParse/SmilesParse.h>
|
|
#include <GraphMol/SmilesParse/SmilesWrite.h>
|
|
|
|
using namespace RDKit;
|
|
|
|
TEST_CASE("github #7556: chiral sulfur in conjugated rings") {
|
|
SECTION("as reported") {
|
|
auto m = "CC1=CC(Cl)=CC2=C1N=[S@](C)N=C2N"_smiles;
|
|
REQUIRE(m);
|
|
CHECK(!m->getBondBetweenAtoms(8, 9)->getIsConjugated());
|
|
CHECK(!m->getBondBetweenAtoms(9, 11)->getIsConjugated());
|
|
REQUIRE(m->getAtomWithIdx(9)->getChiralTag() != Atom::CHI_UNSPECIFIED);
|
|
}
|
|
}
|
|
|
|
TEST_CASE("getAvgMolWt") {
|
|
SECTION("basics") {
|
|
auto mol = "C"_smiles;
|
|
REQUIRE(mol);
|
|
auto amw = MolOps::getAvgMolWt(*mol);
|
|
CHECK_THAT(amw, Catch::Matchers::WithinAbs(16.043, 0.001));
|
|
amw = MolOps::getAvgMolWt(*mol, true);
|
|
CHECK_THAT(amw, Catch::Matchers::WithinAbs(12.011, 0.001));
|
|
MolOps::addHs(*mol);
|
|
amw = MolOps::getAvgMolWt(*mol);
|
|
CHECK_THAT(amw, Catch::Matchers::WithinAbs(16.043, 0.001));
|
|
amw = MolOps::getAvgMolWt(*mol, true);
|
|
CHECK_THAT(amw, Catch::Matchers::WithinAbs(12.011, 0.001));
|
|
}
|
|
SECTION("Hs in SMILES") {
|
|
auto mol = "[CH4]"_smiles;
|
|
REQUIRE(mol);
|
|
auto amw = MolOps::getAvgMolWt(*mol);
|
|
CHECK_THAT(amw, Catch::Matchers::WithinAbs(16.043, 0.001));
|
|
amw = MolOps::getAvgMolWt(*mol, true);
|
|
CHECK_THAT(amw, Catch::Matchers::WithinAbs(12.011, 0.001));
|
|
}
|
|
SECTION("isotopes") {
|
|
auto mol = "C[2H]"_smiles;
|
|
REQUIRE(mol);
|
|
auto amw = MolOps::getAvgMolWt(*mol);
|
|
CHECK_THAT(amw, Catch::Matchers::WithinAbs(17.0, 0.1));
|
|
}
|
|
}
|
|
|
|
TEST_CASE("getExactMolWt") {
|
|
SECTION("basics") {
|
|
auto mol = "C"_smiles;
|
|
REQUIRE(mol);
|
|
auto mw = MolOps::getExactMolWt(*mol);
|
|
CHECK_THAT(mw, Catch::Matchers::WithinAbs(16.031, 0.001));
|
|
mw = MolOps::getExactMolWt(*mol, true);
|
|
CHECK_THAT(mw, Catch::Matchers::WithinAbs(12.000, 0.001));
|
|
MolOps::addHs(*mol);
|
|
mw = MolOps::getExactMolWt(*mol);
|
|
CHECK_THAT(mw, Catch::Matchers::WithinAbs(16.031, 0.001));
|
|
mw = MolOps::getExactMolWt(*mol, true);
|
|
CHECK_THAT(mw, Catch::Matchers::WithinAbs(12.000, 0.001));
|
|
}
|
|
SECTION("Hs in SMILES") {
|
|
auto mol = "[CH4]"_smiles;
|
|
REQUIRE(mol);
|
|
auto mw = MolOps::getExactMolWt(*mol);
|
|
CHECK_THAT(mw, Catch::Matchers::WithinAbs(16.031, 0.001));
|
|
mw = MolOps::getExactMolWt(*mol, true);
|
|
CHECK_THAT(mw, Catch::Matchers::WithinAbs(12.000, 0.001));
|
|
}
|
|
SECTION("isotopes") {
|
|
auto mol = "C[2H]"_smiles;
|
|
REQUIRE(mol);
|
|
auto mw = MolOps::getExactMolWt(*mol);
|
|
CHECK_THAT(mw, Catch::Matchers::WithinAbs(17.037, 0.001));
|
|
mw = MolOps::getExactMolWt(*mol, true);
|
|
CHECK_THAT(mw, Catch::Matchers::WithinAbs(12.000, 0.001));
|
|
}
|
|
SECTION("Cl") {
|
|
auto mol = "Cl"_smiles;
|
|
REQUIRE(mol);
|
|
auto mw = MolOps::getExactMolWt(*mol);
|
|
CHECK_THAT(mw, Catch::Matchers::WithinAbs(34.9688 + 1.0078, 0.001));
|
|
mw = MolOps::getAvgMolWt(*mol);
|
|
CHECK_THAT(mw, Catch::Matchers::WithinAbs(35.453 + 1.008, 0.001));
|
|
|
|
mol = "[35ClH]"_smiles;
|
|
REQUIRE(mol);
|
|
mw = MolOps::getExactMolWt(*mol);
|
|
CHECK_THAT(mw, Catch::Matchers::WithinAbs(34.9688 + 1.0078, 0.001));
|
|
mw = MolOps::getAvgMolWt(*mol);
|
|
CHECK_THAT(mw, Catch::Matchers::WithinAbs(34.9688 + 1.008, 0.001));
|
|
|
|
mol = "[36ClH]"_smiles;
|
|
REQUIRE(mol);
|
|
mw = MolOps::getExactMolWt(*mol);
|
|
CHECK_THAT(mw, Catch::Matchers::WithinAbs(35.9683 + 1.0078, 0.001));
|
|
mw = MolOps::getAvgMolWt(*mol);
|
|
CHECK_THAT(mw, Catch::Matchers::WithinAbs(35.9683 + 1.008, 0.001));
|
|
}
|
|
}
|
|
|
|
TEST_CASE("getMolFormula") {
|
|
SECTION("basics") {
|
|
auto mol = "C"_smiles;
|
|
REQUIRE(mol);
|
|
auto formula = MolOps::getMolFormula(*mol);
|
|
CHECK(formula == "CH4");
|
|
MolOps::addHs(*mol);
|
|
formula = MolOps::getMolFormula(*mol);
|
|
CHECK(formula == "CH4");
|
|
|
|
mol = "[CH4]"_smiles;
|
|
REQUIRE(mol);
|
|
formula = MolOps::getMolFormula(*mol);
|
|
CHECK(formula == "CH4");
|
|
|
|
mol = "CO"_smiles;
|
|
REQUIRE(mol);
|
|
formula = MolOps::getMolFormula(*mol);
|
|
CHECK(formula == "CH4O");
|
|
MolOps::addHs(*mol);
|
|
formula = MolOps::getMolFormula(*mol);
|
|
CHECK(formula == "CH4O");
|
|
|
|
mol = "C(=O)N"_smiles;
|
|
REQUIRE(mol);
|
|
formula = MolOps::getMolFormula(*mol);
|
|
CHECK(formula == "CH3NO");
|
|
|
|
mol = "C(=O)=O"_smiles;
|
|
REQUIRE(mol);
|
|
formula = MolOps::getMolFormula(*mol);
|
|
CHECK(formula == "CO2");
|
|
|
|
mol = "C(=O)[O-]"_smiles;
|
|
REQUIRE(mol);
|
|
formula = MolOps::getMolFormula(*mol);
|
|
CHECK(formula == "CHO2-");
|
|
|
|
mol = "C([O-])[O-]"_smiles;
|
|
REQUIRE(mol);
|
|
formula = MolOps::getMolFormula(*mol);
|
|
CHECK(formula == "CH2O2-2");
|
|
|
|
mol = "C([NH3+])[O-]"_smiles;
|
|
REQUIRE(mol);
|
|
formula = MolOps::getMolFormula(*mol);
|
|
CHECK(formula == "CH5NO");
|
|
|
|
mol = "C([NH3+])O"_smiles;
|
|
REQUIRE(mol);
|
|
formula = MolOps::getMolFormula(*mol);
|
|
CHECK(formula == "CH6NO+");
|
|
|
|
mol = "C([NH3+])[NH3+]"_smiles;
|
|
REQUIRE(mol);
|
|
formula = MolOps::getMolFormula(*mol);
|
|
CHECK(formula == "CH8N2+2");
|
|
}
|
|
SECTION("H isotopes") {
|
|
auto mol = "[2H]C([3H])O"_smiles;
|
|
REQUIRE(mol);
|
|
auto formula = MolOps::getMolFormula(*mol);
|
|
CHECK(formula == "CH4O");
|
|
formula = MolOps::getMolFormula(*mol, true);
|
|
CHECK(formula == "CH2DTO");
|
|
formula = MolOps::getMolFormula(*mol, true, false);
|
|
CHECK(formula == "CH2[2H][3H]O");
|
|
}
|
|
|
|
SECTION("isotopes") {
|
|
auto mol = "[13CH3]C[13CH2]C"_smiles;
|
|
REQUIRE(mol);
|
|
auto formula = MolOps::getMolFormula(*mol);
|
|
CHECK(formula == "C4H10");
|
|
formula = MolOps::getMolFormula(*mol, true);
|
|
CHECK(formula == "C2[13C]2H10");
|
|
formula = MolOps::getMolFormula(*mol, true, false);
|
|
CHECK(formula == "C2[13C]2H10");
|
|
|
|
mol = "[13CH3]C([2H])O"_smiles;
|
|
REQUIRE(mol);
|
|
formula = MolOps::getMolFormula(*mol);
|
|
CHECK(formula == "C2H6O");
|
|
formula = MolOps::getMolFormula(*mol, true);
|
|
CHECK(formula == "C[13C]H5DO");
|
|
formula = MolOps::getMolFormula(*mol, true, false);
|
|
CHECK(formula == "C[13C]H5[2H]O");
|
|
|
|
mol = "[13CH3]C[13CH2]CB(O)O[2H]"_smiles;
|
|
REQUIRE(mol);
|
|
formula = MolOps::getMolFormula(*mol);
|
|
CHECK(formula == "C4H11BO2");
|
|
formula = MolOps::getMolFormula(*mol, true);
|
|
CHECK(formula == "C2[13C]2H10DBO2");
|
|
formula = MolOps::getMolFormula(*mol, true, false);
|
|
CHECK(formula == "C2[13C]2H10[2H]BO2");
|
|
}
|
|
}
|
|
|
|
TEST_CASE(
|
|
"github #8121: symmetric ring finding not returning correct results for molecules with fragments") {
|
|
auto twoCubanes = "C12C3C4C1C5C2C3C45.C12C3C4C1C5C2C3C45"_smiles;
|
|
REQUIRE(twoCubanes);
|
|
auto rinfo = twoCubanes->getRingInfo();
|
|
CHECK(rinfo->numRings() == 12);
|
|
}
|
|
|
|
TEST_CASE("check division by zero in setTerminalAtomCoords") {
|
|
SECTION("degree 4") {
|
|
auto m = R"CTAB(
|
|
Mrv2311 11162401483D
|
|
|
|
0 0 0 0 0 999 V3000
|
|
M V30 BEGIN CTAB
|
|
M V30 COUNTS 5 4 0 0 0
|
|
M V30 BEGIN ATOM
|
|
M V30 1 C 0 0 0 0
|
|
M V30 2 F -0.7971 -0.9945 1.275 0
|
|
M V30 3 F 0.7971 0.9945 -1.275 0
|
|
M V30 4 F -1.2069 -0.1568 -0.8768 0
|
|
M V30 5 Cl 1.1223 1.1513 0.8154 0
|
|
M V30 END ATOM
|
|
M V30 BEGIN BOND
|
|
M V30 1 1 1 2
|
|
M V30 2 1 1 3
|
|
M V30 3 1 1 4
|
|
M V30 4 1 1 5
|
|
M V30 END BOND
|
|
M V30 END CTAB
|
|
M END
|
|
)CTAB"_ctab;
|
|
REQUIRE(m);
|
|
|
|
CHECK_NOTHROW(MolOps::setTerminalAtomCoords(*m, 4, 0));
|
|
}
|
|
SECTION("degree 2, aligned 2nd neighbors") {
|
|
// This looks like a weird mol, but it's an intermediate
|
|
// state in AddHs.
|
|
auto mb = R"CTAB(
|
|
Mrv1908 06032010402D
|
|
|
|
0 0 0 0 0 999 V3000
|
|
M V30 BEGIN CTAB
|
|
M V30 COUNTS 4 3 0 0 0
|
|
M V30 BEGIN ATOM
|
|
M V30 1 C -19.8317 16.5 0 1 CHG=-1
|
|
M V30 2 N -18.2917 16.5 0 2 CHG=1
|
|
M V30 3 N -16.7517 16.5 0 3
|
|
M V30 4 H 0 0 0 3
|
|
M V30 END ATOM
|
|
M V30 BEGIN BOND
|
|
M V30 1 1 1 2
|
|
M V30 2 3 2 3
|
|
M V30 3 1 1 4
|
|
M V30 END BOND
|
|
M V30 END CTAB
|
|
M END
|
|
)CTAB";
|
|
|
|
v2::FileParsers::MolFileParserParams p;
|
|
p.removeHs = false;
|
|
|
|
auto m = v2::FileParsers::MolFromMolBlock(mb, p);
|
|
REQUIRE(m);
|
|
REQUIRE(m->getNumAtoms() == 4);
|
|
|
|
CHECK_NOTHROW(MolOps::setTerminalAtomCoords(*m, 3, 0));
|
|
}
|
|
}
|
|
|
|
TEST_CASE("cleanuporganometallics and carbon") {
|
|
SECTION("basics") {
|
|
std::vector<std::pair<std::string,
|
|
std::vector<std::pair<unsigned int, unsigned int>>>>
|
|
data = {
|
|
{"C=[CH2][Fe]", {{1, 2}}}, // this is a silly example, but it's the
|
|
// simplest one I could think of
|
|
{"[CH2]1=[CH2]2.[Fe]12", {{1, 2}, {0, 2}}},
|
|
{"[CH2]1=[CH]2-[CH2-]3.[Fe]123", {{1, 3}, {0, 3}, {2, 3}}},
|
|
{"[CH]12=[CH]3[CH]4=[CH]5[CH-]16.[Fe]23456",
|
|
{{0, 5}, {1, 5}, {2, 5}, {3, 5}, {4, 5}}},
|
|
{"[Fe]12.[CH2]1=[CH2]2",
|
|
{{1, 0}, {2, 0}}}, // reverse the original atom order
|
|
{"[CH]12=[CH]3[CH]4=[CH]5[CH]6=[CH]17.[Fe]234567",
|
|
{{0, 6}, {1, 6}, {2, 6}, {3, 6}, {4, 6}, {5, 6}}},
|
|
{"[cH]12[cH]3[cH]4[cH]5[cH]6[cH]17.[Fe]234567",
|
|
{{0, 6}, {1, 6}, {2, 6}, {3, 6}, {4, 6}, {5, 6}}},
|
|
{"[cH]12[cH]3[cH]4[cH]5[nH]16.[Fe]23456",
|
|
{{0, 5}, {1, 5}, {2, 5}, {3, 5}, {4, 5}}},
|
|
{"[CH]12=[CH]3[CH]4=[CH]5[NH]16.[Fe]23456",
|
|
{{0, 5}, {1, 5}, {2, 5}, {3, 5}, {4, 5}}},
|
|
};
|
|
for (const auto &pr : data) {
|
|
INFO(pr.first);
|
|
auto mol = v2::SmilesParse::MolFromSmiles(pr.first);
|
|
REQUIRE(mol);
|
|
for (const auto &pair : pr.second) {
|
|
REQUIRE(mol->getBondBetweenAtoms(pair.first, pair.second) != nullptr);
|
|
CHECK(mol->getBondBetweenAtoms(pair.first, pair.second)
|
|
->getBeginAtomIdx() == pair.first);
|
|
CHECK(
|
|
mol->getBondBetweenAtoms(pair.first, pair.second)->getBondType() ==
|
|
Bond::BondType::DATIVE);
|
|
}
|
|
}
|
|
}
|
|
SECTION("no dative bonds") {
|
|
std::vector<std::string> smileses = {
|
|
"C=[CH][Fe]",
|
|
"[CH]1=[CH]2.[Fe]12",
|
|
"[CH]1=[C]2-[CH-]3.[Fe]123",
|
|
"[C]12=[C]3[C]4=[C]5[C-]16.[Fe]23456",
|
|
};
|
|
for (const auto &smiles : smileses) {
|
|
INFO(smiles);
|
|
auto mol = v2::SmilesParse::MolFromSmiles(smiles);
|
|
REQUIRE(mol);
|
|
for (const auto bond : mol->bonds()) {
|
|
CHECK(bond->getBondType() != Bond::BondType::DATIVE);
|
|
}
|
|
}
|
|
}
|
|
SECTION("github #8312") {
|
|
std::string mb = R"CTAB(
|
|
ChemDraw03012503262D
|
|
|
|
0 0 0 0 0 0 V3000
|
|
M V30 BEGIN CTAB
|
|
M V30 COUNTS 21 24 0 0 0
|
|
M V30 BEGIN ATOM
|
|
M V30 1 C -1.822529 0.405221 0.000000 0 VAL=5
|
|
M V30 2 C -1.822529 -0.419779 0.000000 0 VAL=5
|
|
M V30 3 C -1.239167 -1.003142 0.000000 0
|
|
M V30 4 C -0.414167 -1.003142 0.000000 0
|
|
M V30 5 C 0.169196 -0.419779 0.000000 0 VAL=5
|
|
M V30 6 C 0.169196 0.405221 0.000000 0 VAL=5
|
|
M V30 7 C -0.414167 0.988584 0.000000 0
|
|
M V30 8 C -1.239167 0.988584 0.000000 0
|
|
M V30 9 Pt 1.335541 -0.028481 0.000000 0 VAL=6
|
|
M V30 10 C 1.697726 0.712766 0.000000 0
|
|
M V30 11 C 1.796387 -0.712766 0.000000 0
|
|
M V30 12 H 2.520757 0.769729 0.000000 0
|
|
M V30 13 H 1.236879 1.397051 0.000000 0
|
|
M V30 14 H 2.059910 1.454013 0.000000 0
|
|
M V30 15 H 2.619419 -0.655803 0.000000 0
|
|
M V30 16 H 1.434203 -1.454013 0.000000 0
|
|
M V30 17 H 2.257234 -1.397051 0.000000 0
|
|
M V30 18 H -2.619419 0.618747 0.000000 0
|
|
M V30 19 H -2.619419 -0.633305 0.000000 0
|
|
M V30 20 H 0.382722 1.202110 0.000000 0
|
|
M V30 21 H 0.581696 -1.134250 0.000000 0
|
|
M V30 END ATOM
|
|
M V30 BEGIN BOND
|
|
M V30 1 2 1 2
|
|
M V30 2 1 2 3
|
|
M V30 3 1 3 4
|
|
M V30 4 1 4 5
|
|
M V30 5 2 5 6
|
|
M V30 6 1 6 7
|
|
M V30 7 1 7 8
|
|
M V30 8 1 8 1
|
|
M V30 9 1 9 10
|
|
M V30 10 1 9 11
|
|
M V30 11 1 10 12
|
|
M V30 12 1 10 13
|
|
M V30 13 1 10 14
|
|
M V30 14 1 11 15
|
|
M V30 15 1 11 16
|
|
M V30 16 1 11 17
|
|
M V30 17 1 9 6
|
|
M V30 18 1 9 5
|
|
M V30 19 1 9 1
|
|
M V30 20 1 9 2
|
|
M V30 21 1 1 18
|
|
M V30 22 1 2 19
|
|
M V30 23 1 6 20
|
|
M V30 24 1 5 21
|
|
M V30 END BOND
|
|
M V30 END CTAB
|
|
M END)CTAB";
|
|
auto mol = v2::FileParsers::MolFromMolBlock(mb);
|
|
REQUIRE(mol);
|
|
REQUIRE(mol->getBondBetweenAtoms(5, 8));
|
|
CHECK(mol->getBondBetweenAtoms(5, 8)->getBondType() ==
|
|
Bond::BondType::DATIVE);
|
|
REQUIRE(mol->getBondBetweenAtoms(4, 8));
|
|
CHECK(mol->getBondBetweenAtoms(4, 8)->getBondType() ==
|
|
Bond::BondType::DATIVE);
|
|
REQUIRE(mol->getBondBetweenAtoms(0, 8));
|
|
CHECK(mol->getBondBetweenAtoms(0, 8)->getBondType() ==
|
|
Bond::BondType::DATIVE);
|
|
REQUIRE(mol->getBondBetweenAtoms(1, 8));
|
|
CHECK(mol->getBondBetweenAtoms(1, 8)->getBondType() ==
|
|
Bond::BondType::DATIVE);
|
|
REQUIRE(mol->getBondBetweenAtoms(9, 8));
|
|
CHECK(mol->getBondBetweenAtoms(9, 8)->getBondType() ==
|
|
Bond::BondType::SINGLE);
|
|
REQUIRE(mol->getBondBetweenAtoms(10, 8));
|
|
CHECK(mol->getBondBetweenAtoms(10, 8)->getBondType() ==
|
|
Bond::BondType::SINGLE);
|
|
}
|
|
}
|
|
|
|
// helper api to get test data for copyMolSubset
|
|
struct SelectedComponents {
|
|
std::vector<bool> selected_atoms;
|
|
std::vector<bool> selected_bonds;
|
|
};
|
|
|
|
// helper api to get test mol for copyMolSubset api.
|
|
static std::unique_ptr<RDKit::RWMol> getTestMol() {
|
|
std::unique_ptr<RDKit::RWMol> mol{RDKit::SmilesToMol("CCCCCCCCCCCCCCC")};
|
|
for (auto &atom : mol->atoms()) {
|
|
atom->setProp("orig_idx", atom->getIdx());
|
|
}
|
|
|
|
for (auto &bond : mol->bonds()) {
|
|
bond->setProp("orig_idx", bond->getIdx());
|
|
}
|
|
|
|
return mol;
|
|
}
|
|
|
|
// Helper api to get the included atoms and bonds from test atom indices.
|
|
[[nodiscard]] static SelectedComponents get_selected_components(
|
|
::RDKit::RWMol &mol, const std::vector<unsigned int> &atom_ids) {
|
|
const auto num_atoms = mol.getNumAtoms();
|
|
std::vector<bool> selected_atoms(num_atoms);
|
|
|
|
for (auto &atom_idx : atom_ids) {
|
|
if (atom_idx < num_atoms) {
|
|
selected_atoms[atom_idx] = true;
|
|
}
|
|
}
|
|
|
|
std::vector<bool> selected_bonds(mol.getNumBonds());
|
|
for (auto &bond : mol.bonds()) {
|
|
if (selected_atoms[bond->getBeginAtomIdx()] &&
|
|
selected_atoms[bond->getEndAtomIdx()]) {
|
|
selected_bonds[bond->getIdx()] = true;
|
|
}
|
|
}
|
|
|
|
return {std::move(selected_atoms), std::move(selected_bonds)};
|
|
}
|
|
|
|
TEST_CASE("test_extract_atoms", "[copyMolSubset]") {
|
|
auto selected_atoms = GENERATE(
|
|
// unique values
|
|
std::vector<unsigned int>{0, 2, 4, 6, 8, 10, 12},
|
|
// duplicate values
|
|
std::vector<unsigned int>{0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12},
|
|
// values outside of atom indices
|
|
std::vector<unsigned int>{0, 2, 4, 6, 8, 10, 12, 100, 200, 300});
|
|
|
|
std::vector<unsigned int> expected_atoms{0, 2, 4, 6, 8, 10, 12};
|
|
|
|
auto mol = getTestMol();
|
|
auto extracted_mol = copyMolSubset(*mol, selected_atoms);
|
|
REQUIRE(extracted_mol->getNumAtoms() == expected_atoms.size());
|
|
|
|
std::vector<unsigned int> extracted_atoms;
|
|
for (auto &atom : extracted_mol->atoms()) {
|
|
extracted_atoms.push_back(atom->template getProp<unsigned int>("orig_idx"));
|
|
}
|
|
|
|
CHECK(extracted_atoms == expected_atoms);
|
|
}
|
|
|
|
TEST_CASE("test_extract_bonds", "[copyMolSubset]") {
|
|
auto test_mol = getTestMol();
|
|
|
|
for (auto &bond : test_mol->bonds()) {
|
|
bond->setProp("test_prop", true);
|
|
}
|
|
|
|
for (auto &bond : test_mol->bonds()) {
|
|
auto begin_idx = bond->getBeginAtomIdx();
|
|
auto end_idx = bond->getEndAtomIdx();
|
|
auto m = copyMolSubset(*test_mol, {begin_idx, end_idx});
|
|
|
|
REQUIRE(m->getNumBonds() == 1);
|
|
CHECK(m->getBondWithIdx(0)->getProp<bool>("test_prop") == true);
|
|
CHECK(m->getNumAtoms() == 2);
|
|
CHECK(m->getAtomWithIdx(0)->getProp<unsigned int>("orig_idx") == begin_idx);
|
|
CHECK(m->getAtomWithIdx(1)->getProp<unsigned int>("orig_idx") == end_idx);
|
|
}
|
|
}
|
|
|
|
TEST_CASE("test_extract_substance_groups", "[copyMolSubset]") {
|
|
auto mol = getTestMol();
|
|
::RDKit::SubstanceGroup sgroup{mol.get(), "COP"};
|
|
|
|
auto test_sgroup_atoms = GENERATE(std::vector<unsigned int>{},
|
|
std::vector<unsigned int>{0, 1, 2, 3, 4},
|
|
std::vector<unsigned int>{9, 10, 11});
|
|
sgroup.setAtoms(test_sgroup_atoms);
|
|
|
|
auto test_sgroup_bonds =
|
|
GENERATE(std::vector<unsigned int>{}, std::vector<unsigned int>{0, 1, 2},
|
|
std::vector<unsigned int>{3, 4, 5});
|
|
sgroup.setBonds(test_sgroup_bonds);
|
|
|
|
auto test_sgroup_patoms =
|
|
GENERATE(std::vector<unsigned int>{}, std::vector<unsigned int>{3, 4},
|
|
std::vector<unsigned int>{5, 6});
|
|
sgroup.setParentAtoms(test_sgroup_patoms);
|
|
|
|
::RDKit::addSubstanceGroup(*mol, std::move(sgroup));
|
|
|
|
auto has_selected_components = [&](auto &components, auto &ref_bitset) {
|
|
return components.empty() ||
|
|
std::ranges::all_of(components, [&](auto &idx) {
|
|
return idx < ref_bitset.size() && ref_bitset[idx];
|
|
});
|
|
};
|
|
|
|
auto test_selected_atoms = GENERATE(
|
|
std::vector<unsigned int>{0, 1, 2, 3, 4, 5},
|
|
std::vector<unsigned int>{0, 2, 4, 6, 8, 10, 12},
|
|
std::vector<unsigned int>{0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12},
|
|
std::vector<unsigned int>{0, 2, 4, 6, 8, 10, 12, 100, 200, 300});
|
|
|
|
auto extracted_mol = copyMolSubset(*mol, test_selected_atoms);
|
|
|
|
auto [selected_atoms, selected_bonds] =
|
|
get_selected_components(*mol, test_selected_atoms);
|
|
auto flag = ::RDKit::getSubstanceGroups(*extracted_mol).size() == 1;
|
|
REQUIRE(flag ==
|
|
(has_selected_components(test_sgroup_atoms, selected_atoms) &&
|
|
has_selected_components(test_sgroup_patoms, selected_atoms) &&
|
|
has_selected_components(test_sgroup_bonds, selected_bonds)));
|
|
|
|
if (flag) {
|
|
auto &extracted_sgroup = ::RDKit::getSubstanceGroups(*extracted_mol)[0];
|
|
for (auto &idx : extracted_sgroup.getAtoms()) {
|
|
auto atom = extracted_mol->getAtomWithIdx(idx);
|
|
CHECK(selected_atoms[atom->template getProp<unsigned int>("orig_idx")] ==
|
|
true);
|
|
}
|
|
|
|
for (auto &idx : extracted_sgroup.getParentAtoms()) {
|
|
auto atom = extracted_mol->getAtomWithIdx(idx);
|
|
CHECK(selected_atoms[atom->template getProp<unsigned int>("orig_idx")] ==
|
|
true);
|
|
}
|
|
|
|
for (auto &idx : extracted_sgroup.getBonds()) {
|
|
auto bond = extracted_mol->getBondWithIdx(idx);
|
|
CHECK(selected_bonds[bond->template getProp<unsigned int>("orig_idx")] ==
|
|
true);
|
|
}
|
|
}
|
|
}
|
|
|
|
TEST_CASE("test_extract_stereo_groups", "[copyMolSubset]") {
|
|
auto mol = getTestMol();
|
|
|
|
auto test_stereo_group_atoms = GENERATE(
|
|
std::vector<unsigned int>{}, std::vector<unsigned int>{0, 1, 2, 3, 4},
|
|
std::vector<unsigned int>{9, 10, 11});
|
|
|
|
std::vector<::RDKit::Atom *> sg_atoms;
|
|
for (auto &idx : test_stereo_group_atoms) {
|
|
sg_atoms.push_back(mol->getAtomWithIdx(idx));
|
|
}
|
|
|
|
auto test_stereo_group_bonds =
|
|
GENERATE(std::vector<unsigned int>{}, std::vector<unsigned int>{0, 1, 2},
|
|
std::vector<unsigned int>{3, 4, 5});
|
|
|
|
std::vector<::RDKit::Bond *> sg_bonds;
|
|
for (auto &idx : test_stereo_group_bonds) {
|
|
sg_bonds.push_back(mol->getBondWithIdx(idx));
|
|
}
|
|
|
|
::RDKit::StereoGroup stereo_group{::RDKit::StereoGroupType::STEREO_ABSOLUTE,
|
|
std::move(sg_atoms), std::move(sg_bonds)};
|
|
mol->setStereoGroups({std::move(stereo_group)});
|
|
|
|
auto test_selected_atoms = GENERATE(
|
|
std::vector<unsigned int>{0, 1, 2, 3, 4, 5},
|
|
std::vector<unsigned int>{0, 2, 4, 6, 8, 10, 12},
|
|
std::vector<unsigned int>{0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12},
|
|
std::vector<unsigned int>{0, 2, 4, 6, 8, 10, 12, 100, 200, 300});
|
|
|
|
auto extracted_mol = copyMolSubset(*mol, test_selected_atoms);
|
|
|
|
auto [selected_atoms, selected_bonds] =
|
|
get_selected_components(*mol, test_selected_atoms);
|
|
|
|
auto flag = extracted_mol->getStereoGroups().size() == 1;
|
|
|
|
if (flag) {
|
|
auto &extracted_stereo_group = extracted_mol->getStereoGroups()[0];
|
|
for (auto &atom : extracted_stereo_group.getAtoms()) {
|
|
CHECK(selected_atoms[atom->template getProp<int>("orig_idx")] == true);
|
|
}
|
|
|
|
for (auto &bond : extracted_stereo_group.getBonds()) {
|
|
CHECK(selected_bonds[bond->template getProp<int>("orig_idx")] == true);
|
|
}
|
|
}
|
|
}
|
|
|
|
TEST_CASE("test_manual_atom_bond_subset", "[copyMolSubset]") {
|
|
auto m = "CCC"_smiles;
|
|
{
|
|
std::vector<unsigned int> atoms = {};
|
|
std::vector<unsigned int> bonds = {0};
|
|
// this should throw a ValueErrorException
|
|
REQUIRE_THROWS_AS(copyMolSubset(*m, atoms, bonds), ValueErrorException);
|
|
}
|
|
{
|
|
std::vector<unsigned int> atoms = {0, 1, 2};
|
|
std::vector<unsigned int> bonds = {0};
|
|
auto m2 = copyMolSubset(*m, atoms, bonds);
|
|
CHECK(m2->getNumAtoms() == 3);
|
|
CHECK(m2->getNumBonds() == 1);
|
|
}
|
|
{
|
|
std::vector<unsigned int> atoms = {0, 1, 2};
|
|
std::vector<unsigned int> bonds = {0, 1};
|
|
auto m2 = copyMolSubset(*m, atoms, bonds);
|
|
CHECK(m2->getNumAtoms() == 3);
|
|
CHECK(m2->getNumBonds() == 2);
|
|
}
|
|
{
|
|
std::vector<unsigned int> atoms = {0, 1, 2};
|
|
std::vector<unsigned int> bonds = {};
|
|
auto m2 = copyMolSubset(*m, atoms, bonds);
|
|
CHECK(m2->getNumAtoms() == 3);
|
|
CHECK(m2->getNumBonds() == 0);
|
|
}
|
|
{
|
|
std::vector<unsigned int> atoms = {0, 1, 2, 3};
|
|
std::vector<unsigned int> bonds = {0, 1};
|
|
REQUIRE_THROWS_AS(copyMolSubset(*m, atoms, bonds), IndexErrorException);
|
|
}
|
|
{
|
|
std::vector<unsigned int> atoms = {0, 1, 2};
|
|
std::vector<unsigned int> bonds = {2};
|
|
REQUIRE_THROWS_AS(copyMolSubset(*m, atoms, bonds), IndexErrorException);
|
|
}
|
|
}
|
|
|
|
TEST_CASE("GitHub #8726: Do not remove hydrides by default") {
|
|
auto m = "[OH+][H-]"_smiles;
|
|
REQUIRE(m);
|
|
CHECK(m->getNumAtoms() == 2);
|
|
auto h_atom = m->getAtomWithIdx(1);
|
|
CHECK(h_atom->getAtomicNum() == 1);
|
|
CHECK(h_atom->getFormalCharge() == -1);
|
|
}
|
|
|
|
TEST_CASE("Github #8945") {
|
|
SECTION("as reported") {
|
|
auto m1 = "N#N=O"_smiles;
|
|
REQUIRE(m1);
|
|
auto m2 = "O=N#N"_smiles;
|
|
REQUIRE(m2);
|
|
|
|
CHECK(MolToSmiles(*m1) == MolToSmiles(*m2));
|
|
}
|
|
}
|
|
|
|
#ifdef RDK_USE_URF
|
|
TEST_CASE("Test findRingFamilies") {
|
|
SECTION("basic check") {
|
|
auto m = R"SMI(c1ccc2c(c1)C3CC3C4CC5CC4CC25)SMI"_smiles;
|
|
REQUIRE(m);
|
|
|
|
MolOps::findRingFamilies(*m);
|
|
|
|
auto r = m->getRingInfo();
|
|
REQUIRE(r);
|
|
|
|
REQUIRE(r->areRingFamiliesInitialized());
|
|
|
|
auto &atomRingFamilies = r->atomRingFamilies();
|
|
REQUIRE(atomRingFamilies.size() == 5);
|
|
CHECK(atomRingFamilies[0] == std::vector<int>{6, 7, 8});
|
|
CHECK(atomRingFamilies[1] == std::vector<int>{9, 10, 11, 12, 13});
|
|
CHECK(atomRingFamilies[2] == std::vector<int>{11, 12, 13, 14, 15});
|
|
CHECK(atomRingFamilies[3] == std::vector<int>{0, 1, 2, 3, 4, 5});
|
|
CHECK(atomRingFamilies[4] ==
|
|
std::vector<int>{3, 4, 6, 8, 9, 10, 11, 13, 14, 15});
|
|
|
|
auto &bondRingFamilies = r->bondRingFamilies();
|
|
REQUIRE(bondRingFamilies.size() == 5);
|
|
CHECK(bondRingFamilies[0] == std::vector<int>{6, 7, 17});
|
|
CHECK(bondRingFamilies[1] == std::vector<int>{9, 10, 11, 12, 18});
|
|
CHECK(bondRingFamilies[2] == std::vector<int>{11, 12, 13, 14, 19});
|
|
CHECK(bondRingFamilies[3] == std::vector<int>{0, 1, 2, 3, 4, 15});
|
|
CHECK(bondRingFamilies[4] ==
|
|
std::vector<int>{3, 5, 8, 9, 10, 13, 14, 16, 17, 18, 19});
|
|
}
|
|
|
|
SECTION("With Zero Order Bonds") {
|
|
// ZOBs should never be considered in rings
|
|
auto m = R"SMI(C1CCCCC1)SMI"_smiles;
|
|
REQUIRE(m);
|
|
|
|
// Make one bond a ZOB; this breaks the cyclohexane ring
|
|
m->getBondWithIdx(0)->setBondType(Bond::ZERO);
|
|
|
|
MolOps::findRingFamilies(*m);
|
|
|
|
auto r = m->getRingInfo();
|
|
REQUIRE(r);
|
|
|
|
REQUIRE(r->areRingFamiliesInitialized());
|
|
CHECK(r->atomRingFamilies().empty() == true);
|
|
CHECK(r->bondRingFamilies().empty() == true);
|
|
}
|
|
|
|
SECTION("toggle includeDativeBonds") {
|
|
auto includeDativeBonds = GENERATE(true, false);
|
|
CAPTURE(includeDativeBonds);
|
|
|
|
auto m = R"SMI(N->1CCN->[Pt]1)SMI"_smiles;
|
|
REQUIRE(m);
|
|
|
|
MolOps::findRingFamilies(*m, includeDativeBonds);
|
|
|
|
auto r = m->getRingInfo();
|
|
REQUIRE(r);
|
|
|
|
REQUIRE(r->areRingFamiliesInitialized());
|
|
|
|
unsigned int numRings = (includeDativeBonds ? 1 : 0);
|
|
CHECK(r->atomRingFamilies().size() == numRings);
|
|
CHECK(r->bondRingFamilies().size() == numRings);
|
|
}
|
|
|
|
SECTION("toggle includeHydrogenBonds") {
|
|
auto includeHydrogenBonds = GENERATE(true, false);
|
|
CAPTURE(includeHydrogenBonds);
|
|
|
|
auto m = "CC1O[H]O=C(C)C1 |H:4.3|"_smiles;
|
|
REQUIRE(m);
|
|
|
|
constexpr bool includeDativeBonds = false;
|
|
MolOps::findRingFamilies(*m, includeDativeBonds, includeHydrogenBonds);
|
|
|
|
auto r = m->getRingInfo();
|
|
REQUIRE(r);
|
|
|
|
REQUIRE(r->areRingFamiliesInitialized());
|
|
|
|
unsigned int numRings = (includeHydrogenBonds ? 1 : 0);
|
|
CHECK(r->atomRingFamilies().size() == numRings);
|
|
CHECK(r->bondRingFamilies().size() == numRings);
|
|
}
|
|
}
|
|
#endif
|
|
|
|
TEST_CASE("GitHub #9270: Segfault when calling MolToSmiles on submol") {
|
|
SECTION("both atoms mapped") {
|
|
// Legacy Stereo perceives double bond stereo as E/Z,
|
|
// modern stereo as CIS/TRANS, but both should result
|
|
// in the same output
|
|
auto useLegacy = GENERATE(true, false);
|
|
CAPTURE(useLegacy);
|
|
UseLegacyStereoPerceptionFixture useLegacyFixture(useLegacy);
|
|
|
|
auto mol = "C/C=C/CC"_smiles;
|
|
REQUIRE(mol);
|
|
|
|
auto dblBond = mol->getBondWithIdx(1);
|
|
REQUIRE(dblBond->getBondType() == Bond::BondType::DOUBLE);
|
|
|
|
if (useLegacy) {
|
|
REQUIRE(dblBond->getStereo() == Bond::BondStereo::STEREOE);
|
|
} else {
|
|
REQUIRE(dblBond->getStereo() == Bond::BondStereo::STEREOTRANS);
|
|
}
|
|
REQUIRE(dblBond->getStereoAtoms() == std::vector<int>{0, 3});
|
|
|
|
// Extract all atoms and bond except the last atom;
|
|
// both stereo atoms should be mapped
|
|
std::vector<unsigned int> atoms{0, 1, 2, 3};
|
|
std::vector<unsigned int> bonds{0, 1, 2};
|
|
auto subset = copyMolSubset(*mol, atoms, bonds);
|
|
REQUIRE(subset);
|
|
|
|
dblBond = subset->getBondWithIdx(1);
|
|
REQUIRE(dblBond->getBondType() == Bond::BondType::DOUBLE);
|
|
REQUIRE(dblBond->getStereo() == Bond::BondStereo::STEREOTRANS);
|
|
REQUIRE(dblBond->getStereoAtoms() == std::vector<int>{0, 3});
|
|
}
|
|
|
|
SECTION("one stereo atom replaced with alternative") {
|
|
UseLegacyStereoPerceptionFixture useLegacyFixture(false);
|
|
|
|
auto mol = "OC(/C)=C/C"_smiles;
|
|
REQUIRE(mol);
|
|
|
|
auto dblBond = mol->getBondWithIdx(2);
|
|
REQUIRE(dblBond->getBondType() == Bond::BondType::DOUBLE);
|
|
REQUIRE(dblBond->getStereo() == Bond::BondStereo::STEREOTRANS);
|
|
REQUIRE(dblBond->getStereoAtoms() == std::vector<int>{0, 4});
|
|
|
|
// Extract all atoms and bonds except the first stereo atom (atom 0)
|
|
std::vector<unsigned int> atoms{1, 2, 3, 4};
|
|
std::vector<unsigned int> bonds{1, 2, 3};
|
|
auto subset = copyMolSubset(*mol, atoms, bonds);
|
|
REQUIRE(subset);
|
|
|
|
dblBond = subset->getBondWithIdx(1);
|
|
REQUIRE(dblBond->getBondType() == Bond::BondType::DOUBLE);
|
|
REQUIRE(dblBond->getStereo() == Bond::BondStereo::STEREOCIS);
|
|
REQUIRE(dblBond->getStereoAtoms() == std::vector<int>{1, 3});
|
|
}
|
|
|
|
SECTION("both atoms on one side removed") {
|
|
UseLegacyStereoPerceptionFixture useLegacyFixture(false);
|
|
|
|
auto mol = "OC(/C)=C/C"_smiles;
|
|
REQUIRE(mol);
|
|
|
|
auto dblBond = mol->getBondWithIdx(2);
|
|
REQUIRE(dblBond->getBondType() == Bond::BondType::DOUBLE);
|
|
REQUIRE(dblBond->getStereo() == Bond::BondStereo::STEREOTRANS);
|
|
REQUIRE(dblBond->getStereoAtoms() == std::vector<int>{0, 4});
|
|
|
|
// Extract double bond and second stereo atom
|
|
std::vector<unsigned int> atoms{1, 3, 4};
|
|
std::vector<unsigned int> bonds{2, 3};
|
|
auto subset = copyMolSubset(*mol, atoms, bonds);
|
|
REQUIRE(subset);
|
|
|
|
dblBond = subset->getBondWithIdx(0);
|
|
REQUIRE(dblBond->getBondType() == Bond::BondType::DOUBLE);
|
|
REQUIRE(dblBond->getStereo() == Bond::BondStereo::STEREONONE);
|
|
REQUIRE(dblBond->getStereoAtoms().empty());
|
|
}
|
|
|
|
SECTION("both atoms replaced - double swap") {
|
|
UseLegacyStereoPerceptionFixture useLegacyFixture(false);
|
|
|
|
auto mol = "OC(/C)=C(/C)N"_smiles;
|
|
REQUIRE(mol);
|
|
|
|
auto dblBond = mol->getBondWithIdx(2);
|
|
REQUIRE(dblBond->getBondType() == Bond::BondType::DOUBLE);
|
|
REQUIRE(dblBond->getStereo() == Bond::BondStereo::STEREOTRANS);
|
|
REQUIRE(dblBond->getStereoAtoms() == std::vector<int>{0, 4});
|
|
|
|
// Extract all atoms and bonds except the first stereo atom (atom 0)
|
|
std::vector<unsigned int> atoms{1, 2, 3, 5};
|
|
std::vector<unsigned int> bonds{1, 2, 4};
|
|
auto subset = copyMolSubset(*mol, atoms, bonds);
|
|
REQUIRE(subset);
|
|
|
|
dblBond = subset->getBondWithIdx(1);
|
|
REQUIRE(dblBond->getBondType() == Bond::BondType::DOUBLE);
|
|
REQUIRE(dblBond->getStereo() == Bond::BondStereo::STEREOTRANS);
|
|
REQUIRE(dblBond->getStereoAtoms() == std::vector<int>{1, 3});
|
|
}
|
|
}
|