A collection of bug fixes (#2608)

* Fixes #2602

* Fixes #2605

* Remove vestigial isEarlyAtom() definition in Kekulize.cpp

* Fixes #2606

* Fixes #2607

adds allowed valence 2 for Sn and Pb

* Fixes #2610

* update in response to review
This commit is contained in:
Greg Landrum
2019-08-15 04:53:23 +02:00
parent a8e01de200
commit 11584a9067
7 changed files with 249 additions and 62 deletions

View File

@@ -25,7 +25,16 @@ namespace RDKit {
// Determine whether or not a molecule is to the left of Carbon
bool isEarlyAtom(int atomicNum) {
return (4 - PeriodicTable::getTable()->getNouterElecs(atomicNum)) > 0;
int eshift = 4 - PeriodicTable::getTable()->getNouterElecs(atomicNum);
if (eshift > 0) {
return true;
} else if (eshift < 0) {
return false;
} else {
// we make an arbitrary decision that Ge, Sn, and Pb
// are treated like early elements (part of github #2606)
return atomicNum > 14;
}
}
Atom::Atom() : RDProps() {

View File

@@ -20,12 +20,6 @@ namespace RDKit {
// Local utility namespace
namespace {
// Determine whether or not a molecule is to the left of Carbon
bool isEarlyAtom(int atomicNum) {
// FIX: this is duplicated from Atom.cpp. It should be defined once!
return (4 - PeriodicTable::getTable()->getNouterElecs(atomicNum)) > 0;
}
void backTrack(RWMol &mol, INT_INT_DEQ_MAP &options, int lastOpt,
INT_VECT &done, INT_DEQUE &aqueue,
boost::dynamic_bitset<> &dBndCands,

View File

@@ -252,14 +252,14 @@ std::pair<unsigned int, std::vector<unsigned int>> *Reionizer::weakestIonized(
}
Uncharger::Uncharger()
: pos_h(SmartsToMol("[+!H0!$(*~[-])]")),
pos_quat(SmartsToMol("[+H0!$(*~[-])]")),
neg(SmartsToMol("[-!$(*~[+H0])]")),
: pos_h(SmartsToMol("[+,+2,+3,+4;!H0!$(*~[-])]")),
pos_noh(SmartsToMol("[+,+2,+3,+4;H0;!$(*~[-])]")),
neg(SmartsToMol("[-!$(*~[+,+2,+3,+4])]")),
neg_acid(SmartsToMol("[$([O-][C,P,S]=O),$([n-]1nnnc1),$(n1[n-]nnc1)]")){};
Uncharger::Uncharger(const Uncharger &other) {
pos_h = other.pos_h;
pos_quat = other.pos_quat;
pos_noh = other.pos_noh;
neg = other.neg;
neg_acid = other.neg_acid;
};
@@ -277,7 +277,11 @@ ROMol *Uncharger::uncharge(const ROMol &mol) {
// Get atom ids for matches
SubstructMatch(*omol, *(this->pos_h), p_matches);
unsigned int q_matched = SubstructMatch(*omol, *(this->pos_quat), q_matches);
SubstructMatch(*omol, *(this->pos_noh), q_matches);
unsigned int q_matched = 0;
for (const auto &match : q_matches) {
q_matched += omol->getAtomWithIdx(match[0].second)->getFormalCharge();
}
unsigned int n_matched = SubstructMatch(*omol, *(this->neg), n_matches);
unsigned int a_matched = SubstructMatch(*omol, *(this->neg_acid), a_matches);
@@ -377,24 +381,36 @@ ROMol *Uncharger::uncharge(const ROMol &mol) {
}
}
}
// Neutralize positive charges
std::vector<unsigned int> p_idx_matches;
for (const auto &match : p_matches) {
for (const auto &pair : match) {
p_idx_matches.push_back(pair.second);
}
// Neutralize cations until there is no longer a net charge remaining:
int netCharge = 0;
for (const auto &at : omol->atoms()) {
netCharge += at->getFormalCharge();
}
for (const auto &idx : p_idx_matches) {
Atom *atom = omol->getAtomWithIdx(idx);
if (!atom->getNumExplicitHs()) {
// atoms from places like Mol blocks are normally missing explicit Hs:
atom->setNumExplicitHs(atom->getTotalNumHs());
if (netCharge > 0) {
// Neutralize positive charges where H counts can be adjusted
std::vector<unsigned int> p_idx_matches;
for (const auto &match : p_matches) {
for (const auto &pair : match) {
p_idx_matches.push_back(pair.second);
}
}
atom->setNoImplicit(true);
while (atom->getFormalCharge() > 0 && atom->getNumExplicitHs() > 0) {
atom->setNumExplicitHs(atom->getTotalNumHs() - 1);
atom->setFormalCharge(atom->getFormalCharge() - 1);
BOOST_LOG(rdInfoLog) << "Removed positive charge.\n";
for (const auto &idx : p_idx_matches) {
Atom *atom = omol->getAtomWithIdx(idx);
if (!atom->getNumExplicitHs()) {
// atoms from places like Mol blocks are normally missing explicit Hs:
atom->setNumExplicitHs(atom->getTotalNumHs());
}
atom->setNoImplicit(true);
while (atom->getFormalCharge() > 0 && atom->getNumExplicitHs() > 0 &&
netCharge > 0) {
atom->setNumExplicitHs(atom->getTotalNumHs() - 1);
atom->setFormalCharge(atom->getFormalCharge() - 1);
--netCharge;
BOOST_LOG(rdInfoLog) << "Removed positive charge.\n";
}
if (!netCharge) break;
}
}
return omol;

View File

@@ -13,8 +13,8 @@
*/
#include <RDGeneral/export.h>
#ifndef __RD_CHARGE_H__
#define __RD_CHARGE_H__
#ifndef RD_CHARGE_H
#define RD_CHARGE_H
#include "MolStandardize.h"
#include <Catalogs/Catalog.h>
@@ -116,7 +116,7 @@ class RDKIT_MOLSTANDARDIZE_EXPORT Uncharger {
private:
bool df_canonicalOrdering = true;
std::shared_ptr<ROMol> pos_h;
std::shared_ptr<ROMol> pos_quat;
std::shared_ptr<ROMol> pos_noh;
std::shared_ptr<ROMol> neg;
std::shared_ptr<ROMol> neg_acid;
}; // Uncharger class

View File

@@ -57,7 +57,7 @@ TEST_CASE("SKIP_IF_ALL_MATCH") {
}
}
TEST_CASE("symmetry in the uncharger") {
TEST_CASE("symmetry in the uncharger", "uncharger") {
SECTION("case 1") {
auto m = "C[N+](C)(C)CC(C(=O)[O-])CC(=O)[O-]"_smiles;
REQUIRE(m);
@@ -91,7 +91,7 @@ TEST_CASE("symmetry in the uncharger") {
}
}
TEST_CASE("uncharger bug with duplicates") {
TEST_CASE("uncharger bug with duplicates", "uncharger") {
SECTION("case 1") {
auto m = "[NH3+]CC([O-])C[O-]"_smiles;
REQUIRE(m);
@@ -130,7 +130,7 @@ TEST_CASE("uncharger bug with duplicates") {
TEST_CASE(
"github #2411: MolStandardize: FragmentRemover should not sanitize "
"fragments ") {
"fragments") {
SECTION("demo") {
std::string smi = "CN(C)(C)C.Cl";
bool debugParse = false;
@@ -147,7 +147,7 @@ TEST_CASE(
TEST_CASE(
"github #2452: incorrectly removing charge from boron anions"
"fragments ") {
"fragments,uncharger") {
SECTION("demo") {
auto m = "C[B-](C)(C)C"_smiles;
REQUIRE(m);
@@ -170,4 +170,74 @@ TEST_CASE(
CHECK(outm->getAtomWithIdx(1)->getFormalCharge() == 0);
CHECK(MolToSmiles(*outm) == "CB(C)C");
}
}
TEST_CASE("github #2602: Uncharger ignores dications", "uncharger") {
SECTION("demo") {
auto m = "[O-]CCC[O-].[Ca+2]"_smiles;
REQUIRE(m);
bool canonicalOrdering = true;
MolStandardize::Uncharger uncharger(canonicalOrdering);
std::unique_ptr<ROMol> outm(uncharger.uncharge(*m));
REQUIRE(outm);
CHECK(outm->getAtomWithIdx(5)->getFormalCharge() == 2);
CHECK(outm->getAtomWithIdx(0)->getFormalCharge() == -1);
CHECK(outm->getAtomWithIdx(4)->getFormalCharge() == -1);
CHECK(MolToSmiles(*outm) == "[Ca+2].[O-]CCC[O-]");
}
}
TEST_CASE(
"github #2605: Uncharger incorrectly neutralizes cations when "
"non-neutralizable anions are present.",
"uncharger") {
SECTION("demo") {
auto m = "F[B-](F)(F)F.[NH3+]CCC"_smiles;
REQUIRE(m);
bool canonicalOrdering = true;
MolStandardize::Uncharger uncharger(canonicalOrdering);
std::unique_ptr<ROMol> outm(uncharger.uncharge(*m));
REQUIRE(outm);
CHECK(outm->getAtomWithIdx(1)->getFormalCharge() == -1);
CHECK(outm->getAtomWithIdx(5)->getFormalCharge() == 1);
CHECK(MolToSmiles(*outm) == "CCC[NH3+].F[B-](F)(F)F");
}
SECTION("multiple positively charged sites") {
auto m = "F[B-](F)(F)F.[NH3+]CC=C[NH3+]"_smiles;
REQUIRE(m);
bool canonicalOrdering = true;
MolStandardize::Uncharger uncharger(canonicalOrdering);
std::unique_ptr<ROMol> outm(uncharger.uncharge(*m));
REQUIRE(outm);
CHECK(outm->getAtomWithIdx(1)->getFormalCharge() == -1);
CHECK(outm->getAtomWithIdx(5)->getFormalCharge() == 0);
CHECK(outm->getAtomWithIdx(9)->getFormalCharge() == 1);
CHECK(MolToSmiles(*outm) == "F[B-](F)(F)F.NCC=C[NH3+]");
}
SECTION("make sure we don't go too far") {
auto m = "F[B-](F)(F)F.[NH4+2]CCC"_smiles; // totally bogus structure
REQUIRE(m);
bool canonicalOrdering = true;
MolStandardize::Uncharger uncharger(canonicalOrdering);
std::unique_ptr<ROMol> outm(uncharger.uncharge(*m));
REQUIRE(outm);
CHECK(outm->getAtomWithIdx(1)->getFormalCharge() == -1);
CHECK(outm->getAtomWithIdx(5)->getFormalCharge() == 1);
CHECK(MolToSmiles(*outm) == "CCC[NH3+].F[B-](F)(F)F");
}
}
TEST_CASE("github #2610: Uncharger incorrectly modifying a zwitterion.",
"uncharger") {
SECTION("demo") {
auto m = "C1=CC=CC[NH+]1-[O-]"_smiles;
REQUIRE(m);
bool canonicalOrdering = true;
MolStandardize::Uncharger uncharger(canonicalOrdering);
std::unique_ptr<ROMol> outm(uncharger.uncharge(*m));
REQUIRE(outm);
CHECK(outm->getAtomWithIdx(5)->getFormalCharge() == 1);
CHECK(outm->getAtomWithIdx(6)->getFormalCharge() == -1);
CHECK(MolToSmiles(*outm) == "[O-][NH+]1C=CC=CC1");
}
}

View File

@@ -74,7 +74,7 @@ const std::string periodicTableAtomData =
47 Ag 1.59 1.34 1.72 107.868 11 107 106.905097 -1 \n \
48 Cd 1.69 1.48 1.58 112.412 2 114 113.9033585 -1 \n \
49 In 1.63 1.44 1.93 114.818 3 115 114.903878 3 \n \
50 Sn 1.46 1.385 2.17 118.711 4 120 119.9021947 4 \n \
50 Sn 1.46 1.385 2.17 118.711 4 120 119.9021947 2 4 \n \
51 Sb 1.46 1.4 2.2 121.76 5 121 120.9038157 3 5 7 \n \
52 Te 1.47 1.378 2.06 127.6 6 130 129.9062244 2 4 6\n \
53 I 1.4 1.387 2.15 126.904 7 127 126.904473 1 2 5 \n \
@@ -106,7 +106,7 @@ const std::string periodicTableAtomData =
79 Au 1.5 1.34 1.66 196.967 11 197 196.9665687 -1 \n \
80 Hg 1.7 1.49 1.55 200.59 2 202 201.970643 -1 \n"
"81 Tl 1.55 1.48 1.96 204.383 3 205 204.9744275 3 \n \
82 Pb 1.54 1.48 2.02 207.2 4 208 207.9766521 4 \n \
82 Pb 1.54 1.48 2.02 207.2 4 208 207.9766521 2 4 \n \
83 Bi 1.54 1.45 1.7 208.98 5 209 208.9803987 3 5 7 \n \
84 Po 1.68 1.46 1.7 209 6 209 208.9824304 2 \n \
85 At 1.7 1.45 1.7 210 7 210 209.987148 1 \n \
@@ -3345,3 +3345,4 @@ const std::string isotopesAtomData[] = {
118 Og 295 295.21624 0 \n",
"EOS"};
} // namespace RDKit

View File

@@ -257,10 +257,12 @@ TEST_CASE("github #908: AddHs() using 3D coordinates with 2D conformations",
}
#endif
TEST_CASE("github #2437: Canon::rankMolAtoms results in crossed double bonds in rings",
"[bug, molops]") {
TEST_CASE(
"github #2437: Canon::rankMolAtoms results in crossed double bonds in "
"rings",
"[bug, molops]") {
SECTION("underlying problem") {
std::string molb=R"CTAB(testmol
std::string molb = R"CTAB(testmol
Mrv1824 05081910082D
4 4 0 0 0 0 999 V2000
@@ -274,21 +276,21 @@ TEST_CASE("github #2437: Canon::rankMolAtoms results in crossed double bonds in
2 4 2 0 0 0 0
M END
)CTAB";
bool sanitize=false;
bool removeHs=false;
std::unique_ptr<RWMol> mol(MolBlockToMol(molb,sanitize,removeHs));
bool sanitize = false;
bool removeHs = false;
std::unique_ptr<RWMol> mol(MolBlockToMol(molb, sanitize, removeHs));
REQUIRE(mol);
mol->updatePropertyCache();
CHECK(mol->getBondWithIdx(3)->getBondType()==Bond::BondType::DOUBLE);
CHECK(mol->getBondWithIdx(3)->getBondDir()==Bond::BondDir::NONE);
CHECK(mol->getBondWithIdx(3)->getBondType() == Bond::BondType::DOUBLE);
CHECK(mol->getBondWithIdx(3)->getBondDir() == Bond::BondDir::NONE);
std::vector<unsigned int> ranks;
CHECK(!mol->getRingInfo()->isInitialized());
Canon::rankMolAtoms(*mol,ranks);
Canon::rankMolAtoms(*mol, ranks);
CHECK(!mol->getRingInfo()->isInitialized());
}
SECTION("as discovered") {
std::string molb = R"CTAB(testmol
std::string molb = R"CTAB(testmol
Mrv1824 05081910082D
4 4 0 0 0 0 999 V2000
@@ -302,21 +304,21 @@ M END
2 4 2 0 0 0 0
M END
)CTAB";
bool sanitize = false;
bool removeHs = false;
std::unique_ptr<RWMol> mol(MolBlockToMol(molb, sanitize, removeHs));
REQUIRE(mol);
mol->updatePropertyCache();
CHECK(mol->getBondWithIdx(3)->getBondType() == Bond::BondType::DOUBLE);
CHECK(mol->getBondWithIdx(3)->getBondDir() == Bond::BondDir::NONE);
auto nmb = MolToMolBlock(*mol);
CHECK(nmb.find("2 4 2 3") == std::string::npos);
CHECK(nmb.find("2 4 2 0") != std::string::npos);
std::vector<unsigned int> ranks;
Canon::rankMolAtoms(*mol, ranks);
nmb = MolToMolBlock(*mol);
CHECK(nmb.find("2 4 2 3") == std::string::npos);
CHECK(nmb.find("2 4 2 0") != std::string::npos);
bool sanitize = false;
bool removeHs = false;
std::unique_ptr<RWMol> mol(MolBlockToMol(molb, sanitize, removeHs));
REQUIRE(mol);
mol->updatePropertyCache();
CHECK(mol->getBondWithIdx(3)->getBondType() == Bond::BondType::DOUBLE);
CHECK(mol->getBondWithIdx(3)->getBondDir() == Bond::BondDir::NONE);
auto nmb = MolToMolBlock(*mol);
CHECK(nmb.find("2 4 2 3") == std::string::npos);
CHECK(nmb.find("2 4 2 0") != std::string::npos);
std::vector<unsigned int> ranks;
Canon::rankMolAtoms(*mol, ranks);
nmb = MolToMolBlock(*mol);
CHECK(nmb.find("2 4 2 3") == std::string::npos);
CHECK(nmb.find("2 4 2 0") != std::string::npos);
}
}
TEST_CASE(
@@ -337,3 +339,98 @@ M END)CTAB";
CHECK(mol->getAtomWithIdx(0)->getTotalNumHs() == 0);
}
}
TEST_CASE(
"github #2606: Bad valence corrections on Pb, Sn"
"[bug, molops]") {
SECTION("basics-Pb") {
std::string mb = R"CTAB(
Mrv1810 08141905562D
5 0 0 0 0 0 999 V2000
-3.6316 -0.4737 0.0000 Pb 0 0 0 0 0 0 0 0 0 0 0 0
-3.6541 0.3609 0.0000 O 0 5 0 0 0 0 0 0 0 0 0 0
-2.4586 -0.5188 0.0000 O 0 5 0 0 0 0 0 0 0 0 0 0
-3.6992 -1.5338 0.0000 O 0 5 0 0 0 0 0 0 0 0 0 0
-4.5789 -0.4286 0.0000 O 0 5 0 0 0 0 0 0 0 0 0 0
M CHG 5 1 4 2 -1 3 -1 4 -1 5 -1
M END
)CTAB";
std::unique_ptr<ROMol> mol(MolBlockToMol(mb));
REQUIRE(mol);
CHECK(mol->getAtomWithIdx(0)->getFormalCharge() == 4);
CHECK(mol->getAtomWithIdx(0)->getTotalNumHs() == 0);
}
SECTION("basics-Sn") {
std::string mb = R"CTAB(
Mrv1810 08141905562D
5 0 0 0 0 0 999 V2000
-3.6316 -0.4737 0.0000 Sn 0 0 0 0 0 0 0 0 0 0 0 0
-3.6541 0.3609 0.0000 O 0 5 0 0 0 0 0 0 0 0 0 0
-2.4586 -0.5188 0.0000 O 0 5 0 0 0 0 0 0 0 0 0 0
-3.6992 -1.5338 0.0000 O 0 5 0 0 0 0 0 0 0 0 0 0
-4.5789 -0.4286 0.0000 O 0 5 0 0 0 0 0 0 0 0 0 0
M CHG 5 1 4 2 -1 3 -1 4 -1 5 -1
M END
)CTAB";
std::unique_ptr<ROMol> mol(MolBlockToMol(mb));
REQUIRE(mol);
CHECK(mol->getAtomWithIdx(0)->getFormalCharge() == 4);
CHECK(mol->getAtomWithIdx(0)->getTotalNumHs() == 0);
}
SECTION("basics-Ge") {
std::string mb = R"CTAB(
Mrv1810 08141905562D
5 0 0 0 0 0 999 V2000
-3.6316 -0.4737 0.0000 Ge 0 0 0 0 0 0 0 0 0 0 0 0
-3.6541 0.3609 0.0000 O 0 5 0 0 0 0 0 0 0 0 0 0
-2.4586 -0.5188 0.0000 O 0 5 0 0 0 0 0 0 0 0 0 0
-3.6992 -1.5338 0.0000 O 0 5 0 0 0 0 0 0 0 0 0 0
-4.5789 -0.4286 0.0000 O 0 5 0 0 0 0 0 0 0 0 0 0
M CHG 5 1 4 2 -1 3 -1 4 -1 5 -1
M END
)CTAB";
std::unique_ptr<ROMol> mol(MolBlockToMol(mb));
REQUIRE(mol);
CHECK(mol->getAtomWithIdx(0)->getFormalCharge() == 4);
CHECK(mol->getAtomWithIdx(0)->getTotalNumHs() == 0);
}
}
TEST_CASE(
"github #2607: Pb, Sn should support valence 2"
"[bug, molops]") {
SECTION("basics-Pb") {
std::string mb = R"CTAB(
Mrv1810 08141905562D
3 0 0 0 0 0 999 V2000
-3.6316 -0.4737 0.0000 Pb 0 0 0 0 0 0 0 0 0 0 0 0
-3.6541 0.3609 0.0000 O 0 5 0 0 0 0 0 0 0 0 0 0
-2.4586 -0.5188 0.0000 O 0 5 0 0 0 0 0 0 0 0 0 0
M CHG 3 1 2 2 -1 3 -1
M END
)CTAB";
std::unique_ptr<ROMol> mol(MolBlockToMol(mb));
REQUIRE(mol);
CHECK(mol->getAtomWithIdx(0)->getFormalCharge() == 2);
CHECK(mol->getAtomWithIdx(0)->getTotalNumHs() == 0);
}
SECTION("basics-Sn") {
std::string mb = R"CTAB(
Mrv1810 08141905562D
3 0 0 0 0 0 999 V2000
-3.6316 -0.4737 0.0000 Sn 0 0 0 0 0 0 0 0 0 0 0 0
-3.6541 0.3609 0.0000 O 0 5 0 0 0 0 0 0 0 0 0 0
-2.4586 -0.5188 0.0000 O 0 5 0 0 0 0 0 0 0 0 0 0
M CHG 3 1 2 2 -1 3 -1
M END
)CTAB";
std::unique_ptr<ROMol> mol(MolBlockToMol(mb));
REQUIRE(mol);
CHECK(mol->getAtomWithIdx(0)->getFormalCharge() == 2);
CHECK(mol->getAtomWithIdx(0)->getTotalNumHs() == 0);
}
}