Files
rdkit/Code/GraphMol/catch_graphmol.cpp
Dan N b5dcb21fef Improve performance of aromaticity detection for large molecules (#3253)
* remove trailing spaces

* 3256: Envelope aromaticity not detected in complex fused system

Removes stopping point in aromaticity detection when all atoms
are "done". This also markedly improves the performance of
aromaticity detection for very large molecules - for example,
aromitization of 3EOH from the PDB was dominated by done atom
checking before this commit.

Some aromatic bonds were missed before this commit in complex fused
systems. This happened if all atoms in the fused system were also
in some smaller aromatic ring and there was at least one fused edge
that was single in the kekule form.

Some example molecules for which envelope aromaticity failed
before this commit:

c1cc2n(c1)c1cccn1c1cccn21
-> became c1cc2n(c1)-c1cccn1-c1cccn1-2 before this commit
c1cc2c3cc[nH]c3n3cccc3n2c1
-> became c1cc2n(c1)-c1cccn1-c1[nH]ccc1-2 before this commit
c1cc2c3cc[nH]c3c3cc[nH]c3n2c1
-> became c1cc2n(c1)-c1[nH]ccc1-c1[nH]ccc1-2 before this commit

Here's a similar example that didn't fail even before this
commit. The central ring only shares double bonds with the
exterior rings.
* c1cc2c([nH]1)c1cc[nH]c1c1cc[nH]c21

Requires updates to some MQN descriptors tests because some
bonds become aromatic (MQN includes counts of single and
double bonds of kekule form).

FWIW, for the molecule that had a change in counts, the counts
were incorrect both before and after this commit, because
MQN uses an approximation (dividing aromatic bonds evenly
between single and double bonds) to avoid kekulization.
This approximation is invalid when there are oodles of
nitrogens lone pairs participating in the aromatic
bonds.

(the failing line was 2558 in aromat_regress.txt: Cc1cc2n(n1)c1cc(C)nn1c1c(C=O)c(C)nn21)

* Detect envelope aromaticity in fused systems

In #3253, we proposed removing doneAtoms for performance, and it was
noted that it also fixed detection of envelope aromaticity in some
fused systems. However, when I completely removed doneAtoms, I saw
hangs in sanitization of things like nanotubes. Using doneBonds
allows envelope aromaticity, while preserving a reasonable break
on runaway work for crazy molecules.

The performance issue was addressed by caching the ring bond
count.

Here are some sanitize timings on proteins from the RCSB PDB:
Before this commit:
* 3eoh 1.21s
* 2j3n 0.77s
* 1nks 0.053s

Afterwards:
* 3eoh 0.42s
* 2j3n 0.15s
* 1nks 0.046s

* Use boost::dynamic_bitset instead of unordered_set

To cound ring bonds.
2020-08-13 05:57:16 +02:00

1351 lines
43 KiB
C++

//
//
// Copyright (C) 2018-2020 Greg Landrum and T5 Informatics GmbH
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//
#include "catch.hpp"
#include <GraphMol/RDKitBase.h>
#include <GraphMol/new_canon.h>
#include <GraphMol/RDKitQueries.h>
#include <GraphMol/Chirality.h>
#include <GraphMol/FileParsers/FileParsers.h>
#include <GraphMol/SmilesParse/SmilesParse.h>
#include <GraphMol/SmilesParse/SmilesWrite.h>
#include <GraphMol/SmilesParse/SmartsWrite.h>
#include <boost/format.hpp>
using namespace RDKit;
#if 1
TEST_CASE("SMILES Parsing works", "[molops]") {
std::unique_ptr<RWMol> mol(SmilesToMol("C1CC1"));
REQUIRE(mol);
REQUIRE(mol->getNumAtoms() == 3);
}
TEST_CASE("Sanitization tests", "[molops]") {
std::unique_ptr<RWMol> mol(SmilesToMol("C1=CC=CC=C1Cc2ccccc2", false, false));
REQUIRE(mol);
REQUIRE(mol->getNumAtoms() == 13);
SECTION("properties") {
mol->updatePropertyCache();
CHECK(mol->getAtomWithIdx(0)->getTotalNumHs() == 1);
CHECK(!mol->getAtomWithIdx(0)->getIsAromatic());
CHECK(mol->getAtomWithIdx(10)->getIsAromatic());
SECTION("aromaticity") {
unsigned int opThatFailed;
MolOps::sanitizeMol(*mol, opThatFailed, MolOps::SANITIZE_SETAROMATICITY);
// mol->debugMol(std::cerr);
CHECK(mol->getAtomWithIdx(10)->getIsAromatic());
// blocked by #1730
// CHECK(mol->getAtomWithIdx(0)->getIsAromatic());
}
SECTION("kekulize") {
unsigned int opThatFailed;
MolOps::sanitizeMol(*mol, opThatFailed, MolOps::SANITIZE_KEKULIZE);
CHECK(!mol->getAtomWithIdx(0)->getIsAromatic());
CHECK(!mol->getAtomWithIdx(10)->getIsAromatic());
}
}
}
TEST_CASE("Github #2062", "[bug][molops]") {
SmilesParserParams ps;
ps.removeHs = false;
ps.sanitize = true;
std::unique_ptr<RWMol> mol(SmilesToMol("[C:1][C:2]([H:3])([H])[O:4][H]", ps));
REQUIRE(mol);
CHECK(mol->getNumAtoms() == 6);
mol->getAtomWithIdx(1)->setProp("intProp", 42);
MolOps::mergeQueryHs(*mol);
CHECK(mol->getNumAtoms() == 3);
SECTION("basics") { CHECK(mol->getAtomWithIdx(1)->getAtomMapNum() == 2); }
SECTION("other props") {
REQUIRE(mol->getAtomWithIdx(1)->hasProp("intProp"));
CHECK(mol->getAtomWithIdx(1)->getProp<int>("intProp") == 42);
}
}
TEST_CASE("Github #2086", "[bug][molops]") {
SECTION("reported version") {
auto mol = "C1CCCC1"_smiles;
REQUIRE(mol);
MolOps::addHs(*mol);
REQUIRE(mol->getNumAtoms() == 15);
mol->removeBond(4, 13);
MolOps::removeHs(*mol);
REQUIRE(mol->getNumAtoms() == 6);
}
}
TEST_CASE("github #299", "[bug][molops][SSSR]") {
SECTION("simplified") {
auto mol =
"C13%13%14.C124%18.C25%13%15.C368%17.C4679.C75%10%17.C8%11%14%16.C9%11%12%18.C%10%12%15%16"_smiles;
REQUIRE(mol);
REQUIRE(mol->getNumAtoms() == 9);
}
SECTION("old example from molopstest") {
auto mol = "C123C45C11C44C55C22C33C14C523"_smiles;
REQUIRE(mol);
REQUIRE(mol->getNumAtoms() == 9);
}
SECTION("carborane") {
std::unique_ptr<RWMol> mol(
SmilesToMol("[B]1234[B]567[B]118[B]229[B]33%10[B]454[B]656[B]711[B]822["
"C]933[B]%1045[C]6123",
0, false));
REQUIRE(mol);
CHECK(mol->getNumAtoms() == 12);
mol->updatePropertyCache(false);
MolOps::findSSSR(*mol);
REQUIRE(mol->getRingInfo()->isInitialized());
}
SECTION("original report from ChEbI") {
std::string pathName = getenv("RDBASE");
pathName += "/Code/GraphMol/test_data/";
std::unique_ptr<RWMol> mol(
MolFileToMol(pathName + "ChEBI_50252.mol", false));
REQUIRE(mol);
CHECK(mol->getNumAtoms() == 80);
mol->updatePropertyCache(false);
MolOps::findSSSR(*mol);
REQUIRE(mol->getRingInfo()->isInitialized());
}
}
TEST_CASE("github #2224", "[bug][molops][removeHs][query]") {
SECTION("the original report") {
std::string pathName = getenv("RDBASE");
pathName += "/Code/GraphMol/test_data/";
std::unique_ptr<RWMol> mol(MolFileToMol(pathName + "github2224_1.mol"));
REQUIRE(mol);
REQUIRE(mol->getNumAtoms() == 7);
}
SECTION("basics") {
SmilesParserParams ps;
ps.removeHs = false;
ps.sanitize = true;
std::unique_ptr<ROMol> mol(SmilesToMol("C[H]", ps));
REQUIRE(mol);
REQUIRE(mol->getNumAtoms() == 2);
{ // The H without a query is removed
std::unique_ptr<ROMol> m2(MolOps::removeHs(*mol));
CHECK(m2->getNumAtoms() == 1);
}
{ // but if we add a query feature it's not removed
RWMol m2(*mol);
auto *qa = new QueryAtom(1);
m2.replaceAtom(1, qa);
m2.getAtomWithIdx(1)->setAtomicNum(1);
MolOps::removeHs(m2);
CHECK(m2.getNumAtoms() == 2);
delete qa;
}
}
}
TEST_CASE(
"github #2268: Recognize N in three-membered rings as potentially chiral",
"[bug][stereo]") {
SECTION("basics: N in a 3 ring") {
const auto mol = "C[N@]1CC1C"_smiles;
REQUIRE(mol);
CHECK(mol->getAtomWithIdx(1)->getChiralTag() != Atom::CHI_UNSPECIFIED);
}
SECTION("basics: N in a 4 ring") {
const auto mol = "C[N@]1CCC1C"_smiles;
REQUIRE(mol);
CHECK(mol->getAtomWithIdx(1)->getChiralTag() == Atom::CHI_UNSPECIFIED);
}
SECTION("the original molecule") {
std::string mb = R"CTAB(
Mrv1810 02131915062D
18 20 0 0 1 0 999 V2000
-0.7207 -1.3415 0.0000 N 0 0 1 0 0 0 0 0 0 0 0 0
-0.0583 -0.8416 0.0000 C 0 0 2 0 0 0 0 0 0 0 0 0
-0.0083 -1.7540 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
-1.3956 -0.8666 0.0000 C 0 0 2 0 0 0 0 0 0 0 0 0
-0.3250 -0.0667 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-2.1955 -0.6499 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-1.1499 -0.0792 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
0.6541 -0.4292 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-2.7830 -1.2291 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
-1.6081 -1.6623 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-2.4080 0.1500 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
1.3665 -0.8374 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
0.6416 0.3958 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-3.1996 0.3708 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-3.4121 1.1624 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
1.3498 0.8207 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
2.0790 -0.4167 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
2.0665 0.4083 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
2 1 1 0 0 0 0
1 3 1 1 0 0 0
4 1 1 0 0 0 0
5 2 1 0 0 0 0
4 6 1 0 0 0 0
7 4 1 0 0 0 0
2 8 1 6 0 0 0
9 6 2 0 0 0 0
4 10 1 1 0 0 0
11 6 1 0 0 0 0
12 8 2 0 0 0 0
13 8 1 0 0 0 0
14 11 1 0 0 0 0
15 14 1 0 0 0 0
16 13 2 0 0 0 0
17 12 1 0 0 0 0
18 16 1 0 0 0 0
2 3 1 0 0 0 0
5 7 1 0 0 0 0
17 18 2 0 0 0 0
M END
)CTAB";
std::unique_ptr<ROMol> mol(MolBlockToMol(mb));
REQUIRE(mol);
CHECK(mol->getAtomWithIdx(0)->getChiralTag() != Atom::CHI_UNSPECIFIED);
}
}
TEST_CASE("github #2244", "[bug][molops][stereo]") {
SECTION("the original report") {
auto mol = "CC=CC=CC"_smiles;
REQUIRE(mol);
MolOps::findPotentialStereoBonds(*mol, true);
CHECK(mol->getBondWithIdx(1)->getStereo() == Bond::STEREOANY);
CHECK(mol->getBondWithIdx(3)->getStereo() == Bond::STEREOANY);
mol->getBondWithIdx(3)->setStereo(Bond::STEREONONE);
MolOps::findPotentialStereoBonds(*mol, true);
CHECK(mol->getBondWithIdx(1)->getStereo() == Bond::STEREOANY);
CHECK(mol->getBondWithIdx(3)->getStereo() == Bond::STEREOANY);
}
}
TEST_CASE(
"github #2258: heterocycles with exocyclic bonds not failing valence check",
"[bug][molops]") {
SECTION("the original report") {
std::vector<std::string> smiles = {"C=n1ccnc1", "C#n1ccnc1"};
for (auto smi : smiles) {
CHECK_THROWS_AS(SmilesToMol(smi), MolSanitizeException);
}
}
}
TEST_CASE("github #908: AddHs() using 3D coordinates with 2D conformations",
"[bug][molops]") {
SECTION("basics: single atom mols") {
std::vector<std::string> smiles = {"Cl", "O", "N", "C"};
for (auto smi : smiles) {
// std::cerr << smi << std::endl;
std::unique_ptr<RWMol> mol(SmilesToMol(smi));
REQUIRE(mol);
auto conf = new Conformer(1);
conf->set3D(false);
conf->setAtomPos(0, RDGeom::Point3D(0, 0, 0));
mol->addConformer(conf, true);
bool explicitOnly = false;
bool addCoords = true;
MolOps::addHs(*mol, explicitOnly, addCoords);
for (size_t i = 0; i < mol->getNumAtoms(); ++i) {
// std::cerr << " " << i << " " << conf->getAtomPos(i) << std::endl;
CHECK(conf->getAtomPos(i).z == 0.0);
}
}
}
}
TEST_CASE(
"github #2437: Canon::rankMolAtoms results in crossed double bonds in "
"rings",
"[bug][molops]") {
SECTION("underlying problem") {
std::string molb = R"CTAB(testmol
Mrv1824 05081910082D
4 4 0 0 0 0 999 V2000
6.9312 -8.6277 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
6.9312 -9.4527 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
7.7562 -8.6277 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
7.7562 -9.4527 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
1 2 1 0 0 0 0
1 3 1 0 0 0 0
3 4 1 0 0 0 0
2 4 2 0 0 0 0
M END
)CTAB";
bool sanitize = false;
bool removeHs = false;
std::unique_ptr<RWMol> mol(MolBlockToMol(molb, sanitize, removeHs));
REQUIRE(mol);
mol->updatePropertyCache();
CHECK(mol->getBondWithIdx(3)->getBondType() == Bond::BondType::DOUBLE);
CHECK(mol->getBondWithIdx(3)->getBondDir() == Bond::BondDir::NONE);
std::vector<unsigned int> ranks;
CHECK(!mol->getRingInfo()->isInitialized());
Canon::rankMolAtoms(*mol, ranks);
CHECK(!mol->getRingInfo()->isInitialized());
}
SECTION("as discovered") {
std::string molb = R"CTAB(testmol
Mrv1824 05081910082D
4 4 0 0 0 0 999 V2000
6.9312 -9.4527 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
7.7562 -8.6277 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
7.7562 -9.4527 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
6.9312 -8.6277 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
1 2 1 0 0 0 0
1 3 1 0 0 0 0
3 4 1 0 0 0 0
2 4 2 0 0 0 0
M END
)CTAB";
bool sanitize = false;
bool removeHs = false;
std::unique_ptr<RWMol> mol(MolBlockToMol(molb, sanitize, removeHs));
REQUIRE(mol);
mol->updatePropertyCache();
CHECK(mol->getBondWithIdx(3)->getBondType() == Bond::BondType::DOUBLE);
CHECK(mol->getBondWithIdx(3)->getBondDir() == Bond::BondDir::NONE);
auto nmb = MolToMolBlock(*mol);
CHECK(nmb.find("2 4 2 3") == std::string::npos);
CHECK(nmb.find("2 4 2 0") != std::string::npos);
std::vector<unsigned int> ranks;
Canon::rankMolAtoms(*mol, ranks);
nmb = MolToMolBlock(*mol);
CHECK(nmb.find("2 4 2 3") == std::string::npos);
CHECK(nmb.find("2 4 2 0") != std::string::npos);
}
}
TEST_CASE(
"github #2423: Incorrect assignment of explicit Hs to Al+3 read from mol "
"block",
"[bug][molops]") {
SECTION("basics: single atom mols") {
std::string mb = R"CTAB(2300
-OEChem-01301907122D
1 0 0 0 0 0 0 0 0999 V2000
-66.7000 999.0000 0.0000 Al 0 1 0 0 0 0 0 0 0 0 0 0
M CHG 1 1 3
M END)CTAB";
std::unique_ptr<ROMol> mol(MolBlockToMol(mb));
REQUIRE(mol);
CHECK(mol->getAtomWithIdx(0)->getFormalCharge() == 3);
CHECK(mol->getAtomWithIdx(0)->getTotalNumHs() == 0);
}
}
TEST_CASE("Specialized exceptions for sanitization errors", "[molops]") {
SECTION("AtomValenceException") {
std::vector<std::pair<std::string, unsigned int>> smiles = {
{"C=n1ccnc1", 1}, {"CCO(C)C", 2}};
for (auto pr : smiles) {
CHECK_THROWS_AS(SmilesToMol(pr.first), AtomValenceException);
try {
auto m = SmilesToMol(pr.first);
RDUNUSED_PARAM(m);
} catch (const AtomValenceException &e) {
CHECK(e.getType() == "AtomValenceException");
CHECK(e.getAtomIdx() == pr.second);
}
}
}
SECTION("AtomKekulizeException") {
std::vector<std::pair<std::string, unsigned int>> smiles = {
{"CCcc", 2}, {"C1:c:CC1", 0}};
for (auto pr : smiles) {
CHECK_THROWS_AS(SmilesToMol(pr.first), AtomKekulizeException);
try {
auto m = SmilesToMol(pr.first);
RDUNUSED_PARAM(m);
} catch (const AtomKekulizeException &e) {
CHECK(e.getType() == "AtomKekulizeException");
CHECK(e.getAtomIdx() == pr.second);
}
}
}
SECTION("KekulizeException") {
std::vector<std::pair<std::string, std::vector<unsigned int>>> smiles = {
{"c1cccc1", {0, 1, 2, 3, 4}}, {"Cc1cc1", {1, 2, 3}}};
for (auto pr : smiles) {
CHECK_THROWS_AS(SmilesToMol(pr.first), KekulizeException);
try {
auto m = SmilesToMol(pr.first);
RDUNUSED_PARAM(m);
} catch (const KekulizeException &e) {
CHECK(e.getType() == "KekulizeException");
CHECK(e.getAtomIndices() == pr.second);
}
}
}
}
TEST_CASE("detectChemistryProblems", "[molops]") {
SECTION("Basics") {
SmilesParserParams ps;
ps.sanitize = false;
auto m = std::unique_ptr<ROMol>(SmilesToMol("CO(C)CFCc1cc1", ps));
REQUIRE(m);
auto res = MolOps::detectChemistryProblems(*m);
REQUIRE(res.size() == 3);
CHECK(res[0]->getType() == "AtomValenceException");
REQUIRE(dynamic_cast<AtomValenceException *>(res[0].get()));
CHECK(dynamic_cast<AtomSanitizeException *>(res[0].get())->getAtomIdx() ==
1);
CHECK(res[1]->getType() == "AtomValenceException");
REQUIRE(dynamic_cast<AtomSanitizeException *>(res[1].get()));
CHECK(dynamic_cast<AtomSanitizeException *>(res[1].get())->getAtomIdx() ==
4);
CHECK(res[2]->getType() == "KekulizeException");
REQUIRE(dynamic_cast<KekulizeException *>(res[2].get()));
CHECK(dynamic_cast<KekulizeException *>(res[2].get())->getAtomIndices() ==
std::vector<unsigned int>({6, 7, 8}));
}
SECTION("No problems") {
SmilesParserParams ps;
ps.sanitize = false;
auto m = std::unique_ptr<ROMol>(SmilesToMol("c1ccccc1", ps));
REQUIRE(m);
auto res = MolOps::detectChemistryProblems(*m);
REQUIRE(res.size() == 0);
}
}
TEST_CASE(
"github #2606: Bad valence corrections on Pb, Sn"
"[bug][molops]") {
SECTION("basics-Pb") {
std::string mb = R"CTAB(
Mrv1810 08141905562D
5 0 0 0 0 0 999 V2000
-3.6316 -0.4737 0.0000 Pb 0 0 0 0 0 0 0 0 0 0 0 0
-3.6541 0.3609 0.0000 O 0 5 0 0 0 0 0 0 0 0 0 0
-2.4586 -0.5188 0.0000 O 0 5 0 0 0 0 0 0 0 0 0 0
-3.6992 -1.5338 0.0000 O 0 5 0 0 0 0 0 0 0 0 0 0
-4.5789 -0.4286 0.0000 O 0 5 0 0 0 0 0 0 0 0 0 0
M CHG 5 1 4 2 -1 3 -1 4 -1 5 -1
M END
)CTAB";
std::unique_ptr<ROMol> mol(MolBlockToMol(mb));
REQUIRE(mol);
CHECK(mol->getAtomWithIdx(0)->getFormalCharge() == 4);
CHECK(mol->getAtomWithIdx(0)->getTotalNumHs() == 0);
}
SECTION("basics-Sn") {
std::string mb = R"CTAB(
Mrv1810 08141905562D
5 0 0 0 0 0 999 V2000
-3.6316 -0.4737 0.0000 Sn 0 0 0 0 0 0 0 0 0 0 0 0
-3.6541 0.3609 0.0000 O 0 5 0 0 0 0 0 0 0 0 0 0
-2.4586 -0.5188 0.0000 O 0 5 0 0 0 0 0 0 0 0 0 0
-3.6992 -1.5338 0.0000 O 0 5 0 0 0 0 0 0 0 0 0 0
-4.5789 -0.4286 0.0000 O 0 5 0 0 0 0 0 0 0 0 0 0
M CHG 5 1 4 2 -1 3 -1 4 -1 5 -1
M END
)CTAB";
std::unique_ptr<ROMol> mol(MolBlockToMol(mb));
REQUIRE(mol);
CHECK(mol->getAtomWithIdx(0)->getFormalCharge() == 4);
CHECK(mol->getAtomWithIdx(0)->getTotalNumHs() == 0);
}
SECTION("basics-Ge") {
std::string mb = R"CTAB(
Mrv1810 08141905562D
5 0 0 0 0 0 999 V2000
-3.6316 -0.4737 0.0000 Ge 0 0 0 0 0 0 0 0 0 0 0 0
-3.6541 0.3609 0.0000 O 0 5 0 0 0 0 0 0 0 0 0 0
-2.4586 -0.5188 0.0000 O 0 5 0 0 0 0 0 0 0 0 0 0
-3.6992 -1.5338 0.0000 O 0 5 0 0 0 0 0 0 0 0 0 0
-4.5789 -0.4286 0.0000 O 0 5 0 0 0 0 0 0 0 0 0 0
M CHG 5 1 4 2 -1 3 -1 4 -1 5 -1
M END
)CTAB";
std::unique_ptr<ROMol> mol(MolBlockToMol(mb));
REQUIRE(mol);
CHECK(mol->getAtomWithIdx(0)->getFormalCharge() == 4);
CHECK(mol->getAtomWithIdx(0)->getTotalNumHs() == 0);
}
}
TEST_CASE(
"github #2607: Pb, Sn should support valence 2"
"[bug][molops]") {
SECTION("basics-Pb") {
std::string mb = R"CTAB(
Mrv1810 08141905562D
3 0 0 0 0 0 999 V2000
-3.6316 -0.4737 0.0000 Pb 0 0 0 0 0 0 0 0 0 0 0 0
-3.6541 0.3609 0.0000 O 0 5 0 0 0 0 0 0 0 0 0 0
-2.4586 -0.5188 0.0000 O 0 5 0 0 0 0 0 0 0 0 0 0
M CHG 3 1 2 2 -1 3 -1
M END
)CTAB";
std::unique_ptr<ROMol> mol(MolBlockToMol(mb));
REQUIRE(mol);
CHECK(mol->getAtomWithIdx(0)->getFormalCharge() == 2);
CHECK(mol->getAtomWithIdx(0)->getTotalNumHs() == 0);
}
SECTION("basics-Sn") {
std::string mb = R"CTAB(
Mrv1810 08141905562D
3 0 0 0 0 0 999 V2000
-3.6316 -0.4737 0.0000 Sn 0 0 0 0 0 0 0 0 0 0 0 0
-3.6541 0.3609 0.0000 O 0 5 0 0 0 0 0 0 0 0 0 0
-2.4586 -0.5188 0.0000 O 0 5 0 0 0 0 0 0 0 0 0 0
M CHG 3 1 2 2 -1 3 -1
M END
)CTAB";
std::unique_ptr<ROMol> mol(MolBlockToMol(mb));
REQUIRE(mol);
CHECK(mol->getAtomWithIdx(0)->getFormalCharge() == 2);
CHECK(mol->getAtomWithIdx(0)->getTotalNumHs() == 0);
}
}
TEST_CASE(
"github #2649: Allenes read from mol blocks have crossed bonds assigned"
"[bug][stereochemistry]") {
SECTION("basics") {
std::string mb = R"CTAB(mol
Mrv1824 09191901002D
6 5 0 0 0 0 999 V2000
-1.6986 -7.4294 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-2.2522 -6.8245 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-1.1438 -8.0357 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-2.8095 -6.2156 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-0.3374 -7.8470 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-3.6162 -6.3886 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
1 3 2 0 0 0 0
2 1 2 0 0 0 0
3 5 1 0 0 0 0
4 2 2 0 0 0 0
6 4 1 0 0 0 0
M END)CTAB";
std::unique_ptr<ROMol> mol(MolBlockToMol(mb));
REQUIRE(mol);
CHECK(mol->getBondWithIdx(0)->getStereo() == Bond::STEREONONE);
CHECK(mol->getBondWithIdx(1)->getStereo() == Bond::STEREONONE);
CHECK(mol->getBondWithIdx(3)->getStereo() == Bond::STEREONONE);
auto outmolb = MolToMolBlock(*mol);
// std::cerr<<outmolb<<std::endl;
CHECK(outmolb.find("1 3 2 0") != std::string::npos);
CHECK(outmolb.find("2 1 2 0") != std::string::npos);
CHECK(outmolb.find("4 2 2 0") != std::string::npos);
}
}
TEST_CASE(
"GitHub 2712: setBondStereoFromDirections() returning incorrect results"
"[stereochemistry]") {
SECTION("basics 1a") {
std::string mb = R"CTAB(
Mrv1810 10141909562D
4 3 0 0 0 0 999 V2000
3.3412 -2.9968 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
2.5162 -2.9968 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
2.1037 -3.7112 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
3.7537 -2.2823 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
1 2 2 0 0 0 0
2 3 1 0 0 0 0
1 4 1 0 0 0 0
M END
)CTAB";
bool sanitize = false;
std::unique_ptr<ROMol> mol(MolBlockToMol(mb, sanitize));
REQUIRE(mol);
CHECK(mol->getBondWithIdx(0)->getBondType() == Bond::DOUBLE);
CHECK(mol->getBondWithIdx(0)->getStereo() == Bond::STEREONONE);
MolOps::setBondStereoFromDirections(*mol);
CHECK(mol->getBondWithIdx(0)->getStereo() == Bond::STEREOTRANS);
}
SECTION("basics 1b") {
std::string mb = R"CTAB(
Mrv1810 10141909562D
4 3 0 0 0 0 999 V2000
3.3412 -2.9968 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
2.5162 -2.9968 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
2.1037 -3.7112 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
3.7537 -2.2823 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
1 2 2 0 0 0 0
2 3 1 0 0 0 0
4 1 1 0 0 0 0
M END
)CTAB";
bool sanitize = false;
std::unique_ptr<ROMol> mol(MolBlockToMol(mb, sanitize));
REQUIRE(mol);
CHECK(mol->getBondWithIdx(0)->getBondType() == Bond::DOUBLE);
CHECK(mol->getBondWithIdx(0)->getStereo() == Bond::STEREONONE);
MolOps::setBondStereoFromDirections(*mol);
CHECK(mol->getBondWithIdx(0)->getStereo() == Bond::STEREOTRANS);
}
SECTION("basics 2a") {
std::string mb = R"CTAB(
Mrv1810 10141909582D
4 3 0 0 0 0 999 V2000
3.4745 -5.2424 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
2.6495 -5.2424 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
2.2370 -5.9569 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
3.8870 -5.9569 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
1 2 2 0 0 0 0
2 3 1 0 0 0 0
1 4 1 0 0 0 0
M END
)CTAB";
bool sanitize = false;
std::unique_ptr<ROMol> mol(MolBlockToMol(mb, sanitize));
REQUIRE(mol);
CHECK(mol->getBondWithIdx(0)->getBondType() == Bond::DOUBLE);
CHECK(mol->getBondWithIdx(0)->getStereo() == Bond::STEREONONE);
MolOps::setBondStereoFromDirections(*mol);
CHECK(mol->getBondWithIdx(0)->getStereo() == Bond::STEREOCIS);
}
SECTION("basics 2b") {
std::string mb = R"CTAB(
Mrv1810 10141909582D
4 3 0 0 0 0 999 V2000
3.4745 -5.2424 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
2.6495 -5.2424 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
2.2370 -5.9569 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
3.8870 -5.9569 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
1 2 2 0 0 0 0
2 3 1 0 0 0 0
4 1 1 0 0 0 0
M END
)CTAB";
bool sanitize = false;
std::unique_ptr<ROMol> mol(MolBlockToMol(mb, sanitize));
REQUIRE(mol);
CHECK(mol->getBondWithIdx(0)->getBondType() == Bond::DOUBLE);
CHECK(mol->getBondWithIdx(0)->getStereo() == Bond::STEREONONE);
MolOps::setBondStereoFromDirections(*mol);
CHECK(mol->getBondWithIdx(0)->getStereo() == Bond::STEREOCIS);
}
}
TEST_CASE("removeHs screwing up double bond stereo", "[bug][removeHs]") {
SECTION("example1") {
std::string molblock = R"CTAB(molblock = """
SciTegic12221702182D
47 51 0 0 0 0 999 V2000
0.2962 6.2611 0.0000 C 0 0
-3.9004 4.4820 0.0000 C 0 0
1.4195 5.2670 0.0000 C 0 0
-3.8201 -7.4431 0.0000 C 0 0
-4.9433 -6.4490 0.0000 C 0 0
-2.3975 -6.9674 0.0000 C 0 0
3.5921 -3.5947 0.0000 C 0 0
-3.1475 2.3700 0.0000 C 0 0
2.1695 -4.0705 0.0000 C 0 0
-2.0242 1.3759 0.0000 C 0 0
-4.6440 -4.9792 0.0000 C 0 0
2.7681 -1.1308 0.0000 C 0 0
-5.8626 1.1332 0.0000 C 0 0
3.0674 0.3391 0.0000 C 0 0
3.6660 3.2787 0.0000 C 0 0
8.1591 -0.6978 0.0000 C 0 0
7.3351 1.7662 0.0000 C 0 0
-6.3876 3.5028 0.0000 C 0 0
-0.6756 -5.0219 0.0000 C 0 0
7.0358 0.2964 0.0000 C 0 0
3.8914 -2.1249 0.0000 C 0 0
-2.0982 -5.4976 0.0000 C 0 0
-4.5701 1.8943 0.0000 C 0 0 1 0 0 0
-6.9859 2.1273 0.0000 C 0 0 1 0 0 0
4.4900 0.8148 0.0000 C 0 0
1.3455 -1.6065 0.0000 C 0 0
4.7893 2.2846 0.0000 C 0 0
1.9442 1.3332 0.0000 C 0 0
1.0462 -3.0763 0.0000 C 0 0
2.2435 2.8030 0.0000 C 0 0
-0.6017 1.8516 0.0000 C 0 0
5.6132 -0.1794 0.0000 C 0 0
0.2223 -0.6124 0.0000 Cl 0 0
9.2823 -1.6919 0.0000 N 0 0
-3.2215 -4.5035 0.0000 N 0 0
6.2119 2.7603 0.0000 N 0 0
5.3139 -1.6492 0.0000 N 0 0
0.5216 0.8575 0.0000 N 0 0
-4.8945 3.3588 0.0000 N 0 0
-8.2913 2.8662 0.0000 O 0 0
-0.3024 3.3214 0.0000 O 0 0
1.1202 3.7971 0.0000 O 0 0
-0.3763 -3.5520 0.0000 O 0 0
-2.8482 3.8398 0.0000 H 0 0
-2.3235 -0.0940 0.0000 H 0 0
-3.9483 0.5292 0.0000 H 0 0
-7.8572 0.9063 0.0000 H 0 0
1 3 1 0
2 39 1 0
3 42 1 0
4 5 2 0
4 6 1 0
5 11 1 0
6 22 2 0
7 9 2 0
7 21 1 0
8 44 1 0
8 10 2 0
8 23 1 0
9 29 1 0
10 45 1 0
10 31 1 0
11 35 2 0
12 21 2 0
12 26 1 0
13 23 1 0
13 24 1 0
14 25 2 0
14 28 1 0
15 27 2 0
15 30 1 0
16 20 1 0
16 34 3 0
17 20 2 0
17 36 1 0
18 24 1 0
18 39 1 0
19 22 1 0
19 43 1 0
20 32 1 0
21 37 1 0
22 35 1 0
23 46 1 6
23 39 1 0
24 47 1 1
24 40 1 0
25 27 1 0
25 32 1 0
26 29 2 0
26 33 1 0
27 36 1 0
28 30 2 0
28 38 1 0
29 43 1 0
30 42 1 0
31 38 2 0
31 41 1 0
32 37 2 3
M END
"""
)CTAB";
bool sanitize = false;
bool removeHs = false;
std::unique_ptr<RWMol> m(MolBlockToMol(molblock, sanitize, removeHs));
REQUIRE(m);
m->updatePropertyCache();
MolOps::setBondStereoFromDirections(*m);
CHECK(m->getBondWithIdx(10)->getBondType() == Bond::DOUBLE);
CHECK(m->getBondWithIdx(10)->getStereo() == Bond::STEREOTRANS);
REQUIRE(m->getBondWithIdx(10)->getStereoAtoms().size() == 2);
CHECK(m->getBondWithIdx(10)->getStereoAtoms()[0] == 43);
CHECK(m->getBondWithIdx(10)->getStereoAtoms()[1] == 44);
MolOps::removeHs(*m); // implicitOnly,updateExplicitCount,sanitize);
// m->debugMol(std::cerr);
CHECK(m->getBondWithIdx(9)->getBondType() == Bond::DOUBLE);
CHECK(m->getBondWithIdx(9)->getStereo() == Bond::STEREOTRANS);
REQUIRE(m->getBondWithIdx(9)->getStereoAtoms().size() == 2);
CHECK(m->getBondWithIdx(9)->getStereoAtoms()[0] == 22);
CHECK(m->getBondWithIdx(9)->getStereoAtoms()[1] == 30);
}
}
TEST_CASE("setDoubleBondNeighborDirections()", "[stereochemistry][bug]") {
SECTION("basics") {
auto m = "CC=CC"_smiles;
REQUIRE(m);
m->getBondWithIdx(1)->getStereoAtoms() = {0, 3};
m->getBondWithIdx(1)->setStereo(Bond::STEREOCIS);
MolOps::setDoubleBondNeighborDirections(*m);
CHECK(m->getBondWithIdx(0)->getBondDir() == Bond::ENDUPRIGHT);
CHECK(m->getBondWithIdx(2)->getBondDir() == Bond::ENDDOWNRIGHT);
CHECK(MolToSmiles(*m) == "C/C=C\\C");
}
}
TEST_CASE("github #2782: addHs() fails on atoms with 'bad' valences", "[bug]") {
SECTION("basics") {
SmilesParserParams ps;
ps.sanitize = false;
std::unique_ptr<RWMol> m(
static_cast<RWMol *>(SmilesToMol("C=C1=CC=CC=C1", ps)));
REQUIRE(m);
bool strict = false;
m->updatePropertyCache(strict);
CHECK(m->getNumAtoms() == 7);
MolOps::addHs(*m);
CHECK(m->getNumAtoms() == 14);
// this doesn't change the fact that there's still a bad valence present:
CHECK_THROWS_AS(m->updatePropertyCache(), AtomValenceException);
}
}
TEST_CASE(
"Github #2784: Element symbol lookup for some transuranics returns "
"incorrect results",
"[transuranics][bug]") {
auto pt = PeriodicTable::getTable();
SECTION("number to symbol") {
std::vector<std::pair<unsigned int, std::string>> data = {
{113, "Nh"}, {114, "Fl"}, {115, "Mc"},
{116, "Lv"}, {117, "Ts"}, {118, "Og"}};
for (const auto &pr : data) {
CHECK(pt->getElementSymbol(pr.first) == pr.second);
}
}
SECTION("symbol to number") {
std::vector<std::pair<int, std::string>> data = {
{113, "Nh"}, {114, "Fl"}, {115, "Mc"}, {116, "Lv"},
{117, "Ts"}, {118, "Og"}, {113, "Uut"}, {115, "Uup"}};
for (const auto &pr : data) {
CHECK(pt->getAtomicNumber(pr.second) == pr.first);
}
}
}
TEST_CASE("github #2775", "[valence][bug]") {
SECTION("basics") {
std::string molblock = R"CTAB(bismuth citrate
Mrv1810 11111908592D
14 12 0 0 0 0 999 V2000
7.4050 -0.5957 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
6.6906 -1.0082 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
5.9761 -0.5957 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
6.6906 -1.8332 0.0000 O 0 5 0 0 0 0 0 0 0 0 0 0
7.4050 0.2293 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
6.5800 0.2293 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
6.1675 0.9438 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
6.5800 1.6583 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
5.3425 0.9438 0.0000 O 0 5 0 0 0 0 0 0 0 0 0 0
8.2300 0.2293 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
8.6425 -0.4851 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
8.6425 0.9438 0.0000 O 0 5 0 0 0 0 0 0 0 0 0 0
7.4050 1.0543 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
4.5175 0.9438 0.0000 Bi 0 1 0 0 0 0 0 0 0 0 0 0
1 2 1 0 0 0 0
2 3 2 0 0 0 0
2 4 1 0 0 0 0
1 5 1 0 0 0 0
5 6 1 0 0 0 0
6 7 1 0 0 0 0
7 8 2 0 0 0 0
7 9 1 0 0 0 0
5 10 1 0 0 0 0
10 11 2 0 0 0 0
10 12 1 0 0 0 0
5 13 1 0 0 0 0
M CHG 4 4 -1 9 -1 12 -1 14 3
M END
)CTAB";
std::unique_ptr<RWMol> m(MolBlockToMol(molblock));
REQUIRE(m);
CHECK(m->getAtomWithIdx(13)->getSymbol() == "Bi");
CHECK(m->getAtomWithIdx(13)->getNumImplicitHs() == 0);
}
}
TEST_CASE("RemoveHsParameters", "[molops]") {
SmilesParserParams smilesPs;
smilesPs.removeHs = false;
SECTION("H-H") {
std::unique_ptr<RWMol> m{SmilesToMol("[H][H].[H]O[H]", smilesPs)};
REQUIRE(m);
CHECK(m->getNumAtoms() == 5);
{
RWMol cp(*m);
MolOps::removeHs(cp);
CHECK(cp.getNumAtoms() == 3);
}
{
MolOps::RemoveHsParameters ps;
RWMol cp(*m);
MolOps::removeHs(cp, ps);
CHECK(cp.getNumAtoms() == 3);
}
{
MolOps::RemoveHsParameters ps;
ps.removeOnlyHNeighbors = true;
RWMol cp(*m);
MolOps::removeHs(cp, ps);
CHECK(cp.getNumAtoms() == 1);
}
}
SECTION("dummies") {
std::unique_ptr<RWMol> m{SmilesToMol("[H][*]O[H]", smilesPs)};
REQUIRE(m);
CHECK(m->getNumAtoms() == 4);
{
RWMol cp(*m);
MolOps::removeHs(cp);
CHECK(cp.getNumAtoms() == 3);
}
{
MolOps::RemoveHsParameters ps;
RWMol cp(*m);
MolOps::removeHs(cp, ps);
CHECK(cp.getNumAtoms() == 3);
}
{
MolOps::RemoveHsParameters ps;
ps.removeDummyNeighbors = true;
RWMol cp(*m);
MolOps::removeHs(cp, ps);
CHECK(cp.getNumAtoms() == 2);
}
}
SECTION("chiralHs") {
std::unique_ptr<RWMol> m{SmilesToMol("[C@]12([H])CCC1CO2", smilesPs)};
REQUIRE(m);
CHECK(m->getNumAtoms() == 7);
// artificial wedging since we don't have a conformer
m->getBondBetweenAtoms(0, 1)->setBondDir(Bond::BEGINWEDGE);
{
RWMol cp(*m);
MolOps::removeHs(cp);
CHECK(cp.getNumAtoms() == 6);
}
{
MolOps::RemoveHsParameters ps;
RWMol cp(*m);
MolOps::removeHs(cp, ps);
CHECK(cp.getNumAtoms() == 6);
}
{
MolOps::RemoveHsParameters ps;
ps.removeWithWedgedBond = false;
RWMol cp(*m);
MolOps::removeHs(cp, ps);
CHECK(cp.getNumAtoms() == 7);
}
}
SECTION("degree zero") {
std::unique_ptr<RWMol> m{SmilesToMol("[F-].[H+]", smilesPs)};
REQUIRE(m);
CHECK(m->getNumAtoms() == 2);
{
RWMol cp(*m);
MolOps::removeHs(cp);
CHECK(cp.getNumAtoms() == 2);
}
{
MolOps::RemoveHsParameters ps;
RWMol cp(*m);
MolOps::removeHs(cp, ps);
CHECK(cp.getNumAtoms() == 2);
}
{
MolOps::RemoveHsParameters ps;
ps.removeDegreeZero = true;
RWMol cp(*m);
MolOps::removeHs(cp, ps);
CHECK(cp.getNumAtoms() == 1);
}
}
SECTION("isotopes") {
std::unique_ptr<RWMol> m{SmilesToMol("F[2H]", smilesPs)};
REQUIRE(m);
CHECK(m->getNumAtoms() == 2);
{
RWMol cp(*m);
MolOps::removeHs(cp);
CHECK(cp.getNumAtoms() == 2);
}
{
MolOps::RemoveHsParameters ps;
RWMol cp(*m);
MolOps::removeHs(cp, ps);
CHECK(cp.getNumAtoms() == 2);
}
{
MolOps::RemoveHsParameters ps;
ps.removeIsotopes = true;
RWMol cp(*m);
MolOps::removeHs(cp, ps);
CHECK(cp.getNumAtoms() == 1);
}
}
SECTION("defining bond stereo") {
std::unique_ptr<RWMol> m{SmilesToMol("F/C=N/[H]", smilesPs)};
REQUIRE(m);
CHECK(m->getNumAtoms() == 4);
{
RWMol cp(*m);
MolOps::removeHs(cp);
CHECK(cp.getNumAtoms() == 4);
}
{
MolOps::RemoveHsParameters ps;
RWMol cp(*m);
MolOps::removeHs(cp, ps);
CHECK(cp.getNumAtoms() == 4);
}
{
MolOps::RemoveHsParameters ps;
ps.removeDefiningBondStereo = true;
RWMol cp(*m);
MolOps::removeHs(cp, ps);
CHECK(cp.getNumAtoms() == 3);
}
}
SECTION("Query atoms") {
std::unique_ptr<RWMol> m{SmartsToMol("O[#1]")};
REQUIRE(m);
CHECK(m->getNumAtoms() == 2);
{
RWMol cp(*m);
MolOps::removeHs(cp);
CHECK(cp.getNumAtoms() == 2);
}
{
MolOps::RemoveHsParameters ps;
RWMol cp(*m);
MolOps::removeHs(cp, ps);
CHECK(cp.getNumAtoms() == 2);
}
{
MolOps::RemoveHsParameters ps;
ps.removeWithQuery = true;
RWMol cp(*m);
MolOps::removeHs(cp, ps);
CHECK(cp.getNumAtoms() == 1);
}
}
SECTION("higher degree") {
// this is a silly example
std::unique_ptr<RWMol> m{SmilesToMol("F[H-]F", smilesPs)};
REQUIRE(m);
CHECK(m->getNumAtoms() == 3);
{
RWMol cp(*m);
MolOps::removeHs(cp);
CHECK(cp.getNumAtoms() == 3);
}
{
MolOps::RemoveHsParameters ps;
RWMol cp(*m);
MolOps::removeHs(cp, ps);
CHECK(cp.getNumAtoms() == 3);
}
{
MolOps::RemoveHsParameters ps;
ps.removeHigherDegrees = true;
RWMol cp(*m);
MolOps::removeHs(cp, ps);
CHECK(cp.getNumAtoms() == 2);
}
}
SECTION("mapped Hs") {
std::unique_ptr<RWMol> m{SmilesToMol("[H:1]O[H]", smilesPs)};
REQUIRE(m);
CHECK(m->getNumAtoms() == 3);
{
RWMol cp(*m);
MolOps::removeHs(cp);
CHECK(cp.getNumAtoms() == 1);
}
{
MolOps::RemoveHsParameters ps;
ps.removeMapped = false;
RWMol cp(*m);
MolOps::removeHs(cp, ps);
CHECK(cp.getNumAtoms() == 2);
}
}
SECTION("allHs") {
std::unique_ptr<RWMol> m{SmilesToMol(
"[C@]12([H])CCC1CO2.[H+].F[H-]F.[H][H].[H]*.F/C=C/[H]", smilesPs)};
REQUIRE(m);
// artificial wedging since we don't have a conformer
m->getBondBetweenAtoms(0, 1)->setBondDir(Bond::BEGINWEDGE);
RWMol cp(*m);
MolOps::removeAllHs(cp);
for (auto atom : cp.atoms()) {
CHECK(atom->getAtomicNum() != 1);
}
}
SECTION("allHs2") {
std::unique_ptr<ROMol> m{SmilesToMol(
"[C@]12([H])CCC1CO2.[H+].F[H-]F.[H][H].[H]*.F/C=C/[H]", smilesPs)};
REQUIRE(m);
// artificial wedging since we don't have a conformer
m->getBondBetweenAtoms(0, 1)->setBondDir(Bond::BEGINWEDGE);
std::unique_ptr<ROMol> cp{MolOps::removeAllHs(*m)};
for (auto atom : cp->atoms()) {
CHECK(atom->getAtomicNum() != 1);
}
}
}
#endif
TEST_CASE("github #2895: acepentalene aromaticity perception ",
"[molops][bug][aromaticity]") {
SECTION("acepentalene") {
std::unique_ptr<RWMol> m{SmilesToMol("C1=CC2=CC=C3C2=C1C=C3")};
REQUIRE(m);
auto smi = MolToSmiles(*m);
CHECK(smi == "C1=CC2=C3C1=CC=C3C=C2");
}
}
TEST_CASE("github #3256: fused ring aromaticity perception",
"[molops][bug][aromaticity]") {
SECTION("nitrogen only central ring") {
auto mol = "C1=CN2C3=CC=CN3C3=CC=CN3C2=C1"_smiles;
REQUIRE(mol);
for (const auto b: mol->bonds()) {
CHECK(b->getBondType() == Bond::AROMATIC);
}
auto smi = MolToSmiles(*mol);
CHECK(smi == "c1cc2n(c1)c1cccn1c1cccn21");
}
}
TEST_CASE("phosphine and arsine chirality", "[Chirality]") {
SECTION("chiral center recognized") {
auto mol1 = "C[P@](C1CCCC1)C1=CC=CC=C1"_smiles;
auto mol2 = "C[As@](C1CCCC1)C1=CC=CC=C1"_smiles;
REQUIRE(mol1);
REQUIRE(mol2);
CHECK(mol1->getAtomWithIdx(1)->getChiralTag() != Atom::CHI_UNSPECIFIED);
CHECK(mol2->getAtomWithIdx(1)->getChiralTag() != Atom::CHI_UNSPECIFIED);
}
SECTION("chiral center selective") {
auto mol1 = "C[P@](C)C1CCCCC1"_smiles;
auto mol2 = "C[As@](C)C1CCCCC1"_smiles;
REQUIRE(mol1);
REQUIRE(mol2);
CHECK(mol1->getAtomWithIdx(1)->getChiralTag() == Atom::CHI_UNSPECIFIED);
CHECK(mol2->getAtomWithIdx(1)->getChiralTag() == Atom::CHI_UNSPECIFIED);
}
SECTION("chiral center specific: P") {
auto mol1 = "C[P@](C1CCCC1)C1=CC=CC=C1"_smiles;
auto mol2 = "C[P@@](C1CCCC1)C1=CC=CC=C1"_smiles;
REQUIRE(mol1);
REQUIRE(mol2);
CHECK(MolToSmiles(*mol1) != MolToSmiles(*mol2));
}
SECTION("chiral center specific: As") {
auto mol1 = "C[As@](C1CCCC1)C1=CC=CC=C1"_smiles;
auto mol2 = "C[As@@](C1CCCC1)C1=CC=CC=C1"_smiles;
REQUIRE(mol1);
REQUIRE(mol2);
CHECK(MolToSmiles(*mol1) != MolToSmiles(*mol2));
}
SECTION("chiral center, implicit H: P") {
auto mol1 = "C[P@H]C1CCCCC1"_smiles;
auto mol2 = "C[P@@H]C1CCCCC1"_smiles;
REQUIRE(mol1);
REQUIRE(mol2);
CHECK(mol1->getAtomWithIdx(1)->getChiralTag() != Atom::CHI_UNSPECIFIED);
CHECK(mol1->getAtomWithIdx(1)->getChiralTag() != Atom::CHI_UNSPECIFIED);
}
SECTION("chiral center, implicit H: As") {
auto mol1 = "C[As@H]C1CCCCC1"_smiles;
auto mol2 = "C[As@@H]C1CCCCC1"_smiles;
REQUIRE(mol1);
REQUIRE(mol2);
CHECK(mol1->getAtomWithIdx(1)->getChiralTag() != Atom::CHI_UNSPECIFIED);
CHECK(mol1->getAtomWithIdx(1)->getChiralTag() != Atom::CHI_UNSPECIFIED);
}
SECTION("chiral center specific, implicit H: P") {
auto mol1 = "C[P@H]C1CCCCC1"_smiles;
auto mol2 = "C[P@@H]C1CCCCC1"_smiles;
REQUIRE(mol1);
REQUIRE(mol2);
CHECK(MolToSmiles(*mol1) != MolToSmiles(*mol2));
}
SECTION("chiral center specific, implicit H: As") {
auto mol1 = "C[As@H]C1CCCCC1"_smiles;
auto mol2 = "C[As@@H]C1CCCCC1"_smiles;
REQUIRE(mol1);
REQUIRE(mol2);
CHECK(MolToSmiles(*mol1) != MolToSmiles(*mol2));
}
}
TEST_CASE("github #2890", "[bug][molops][stereo]") {
auto mol = "CC=CC"_smiles;
REQUIRE(mol);
auto bond = mol->getBondWithIdx(1);
bond->setStereo(Bond::STEREOANY);
REQUIRE(bond->getStereoAtoms().empty());
MolOps::findPotentialStereoBonds(*mol);
CHECK(bond->getStereo() == Bond::STEREOANY);
CHECK(bond->getStereoAtoms().size() == 2);
}
TEST_CASE("github #3150 MolOps::removeHs removes hydrides", "[bug][molops]") {
SmilesParserParams smilesPs;
smilesPs.removeHs = false;
SECTION("Hydride ion remove Hydrides false") {
std::unique_ptr<RWMol> m{SmilesToMol("[H-]", smilesPs)};
REQUIRE(m);
MolOps::RemoveHsParameters ps;
ps.removeHydrides = false;
RWMol cp(*m);
MolOps::removeHs(cp, ps);
// H atom not removed in this case because by default H atoms with degree 0
// are not removed
CHECK(cp.getNumAtoms() == 1);
CHECK(MolOps::getFormalCharge(cp) == -1);
}
SECTION("Hydride ion remove Hydrides true") {
std::unique_ptr<RWMol> m{SmilesToMol("[H-]", smilesPs)};
REQUIRE(m);
MolOps::RemoveHsParameters ps;
ps.removeHydrides = true;
RWMol cp(*m);
MolOps::removeHs(cp, ps);
// H atom not removed in this case because by default H atoms with degree 0
// are not removed
CHECK(cp.getNumAtoms() == 1);
CHECK(MolOps::getFormalCharge(cp) == -1);
}
SECTION("Water") {
std::unique_ptr<RWMol> m{SmilesToMol("[OH+][H-]", smilesPs)};
REQUIRE(m);
MolOps::RemoveHsParameters ps;
ps.removeHydrides = false;
RWMol cp(*m);
MolOps::removeHs(cp, ps);
CHECK(cp.getNumAtoms() == 2);
CHECK(MolOps::getFormalCharge(cp) == 0);
}
SECTION("Water remove Hydrides true") {
std::unique_ptr<RWMol> m{SmilesToMol("[OH+][H-]", smilesPs)};
REQUIRE(m);
MolOps::RemoveHsParameters ps;
ps.removeHydrides = true;
RWMol cp(*m);
MolOps::removeHs(cp, ps);
CHECK(cp.getNumAtoms() == 1);
CHECK(MolOps::getFormalCharge(cp) == 1);
}
SECTION("Iron Hydride") {
std::unique_ptr<RWMol> m{SmilesToMol("[Fe+2]<-[H-]", smilesPs)};
REQUIRE(m);
MolOps::RemoveHsParameters ps;
ps.removeHydrides = false;
RWMol cp(*m);
MolOps::removeHs(cp, ps);
CHECK(cp.getNumAtoms() == 2);
CHECK(MolOps::getFormalCharge(cp) == 1);
}
SECTION("Iron Hydride remove Hydrides") {
std::unique_ptr<RWMol> m{SmilesToMol("[Fe+2]<-[H-]", smilesPs)};
REQUIRE(m);
MolOps::RemoveHsParameters ps;
ps.removeHydrides = true;
RWMol cp(*m);
MolOps::removeHs(cp, ps);
CHECK(cp.getNumAtoms() == 1);
CHECK(MolOps::getFormalCharge(cp) == 2);
}
SECTION("Ferrous Hydroxide") {
std::unique_ptr<RWMol> m{SmilesToMol("[Fe+2]<-[OH-]", smilesPs)};
REQUIRE(m);
MolOps::RemoveHsParameters ps;
ps.removeHydrides = false;
RWMol cp(*m);
MolOps::removeHs(cp, ps);
CHECK(cp.getNumAtoms() == 2);
CHECK(MolOps::getFormalCharge(cp) == 1);
}
SECTION("Ferrous Hydroxide remove Hydrides") {
std::unique_ptr<RWMol> m{SmilesToMol("[Fe+2]<-[OH-]", smilesPs)};
REQUIRE(m);
MolOps::RemoveHsParameters ps;
ps.removeHydrides = true;
RWMol cp(*m);
MolOps::removeHs(cp, ps);
CHECK(cp.getNumAtoms() == 2);
CHECK(MolOps::getFormalCharge(cp) == 1);
}
SECTION("Remove All Hs in Hydrides Ferrous Hydride") {
std::unique_ptr<RWMol> m{SmilesToMol("[Fe+2]<-[H-]", smilesPs)};
REQUIRE(m);
RWMol cp(*m);
MolOps::removeAllHs(cp);
CHECK(cp.getNumAtoms() == 1);
CHECK(MolOps::getFormalCharge(cp) == 2);
}
SECTION("Remove All Hs in Hydrides Water") {
std::unique_ptr<RWMol> m{SmilesToMol("[OH+][H-]", smilesPs)};
REQUIRE(m);
RWMol cp(*m);
MolOps::removeAllHs(cp);
CHECK(cp.getNumAtoms() == 1);
CHECK(MolOps::getFormalCharge(cp) == 1);
}
}
TEST_CASE("hybridization of unknown atom types", "[bug][molops]") {
SECTION("Basics") {
auto m = "[U][U][U]"_smiles;
REQUIRE(m);
for (const auto atom : m->atoms()) {
CHECK(atom->getHybridization() == Atom::HybridizationType::S);
}
}
SECTION("comprehensive") {
std::string smiles = "";
for (unsigned int i = 89; i <= 118; ++i) {
smiles += (boost::format("[#%d]") % i).str();
}
std::unique_ptr<ROMol> m(SmilesToMol(smiles));
REQUIRE(m);
for (const auto atom : m->atoms()) {
CHECK(atom->getHybridization() == Atom::HybridizationType::S);
}
}
}