Fix oxidation numbers calculation (#6266)

* Skip dative bonds when calculating oxidation numbers.

* Add extra test file.

---------

Co-authored-by: David Cosgrove <david@cozchemix.co.uk>
This commit is contained in:
David Cosgrove
2023-04-03 08:55:59 +01:00
committed by GitHub
parent e6cf9e7768
commit aef426b247
5 changed files with 67 additions and 3 deletions

View File

@@ -60,6 +60,11 @@ int calcOxidationNumberByEN(const Atom *atom) {
float parEN = get_en(atom->getAtomicNum());
for (const auto &bond : atom->getOwningMol().atomBonds(atom)) {
if (bond->getBondType() == Bond::DATIVE || bond->getBondType() == Bond::DATIVEONE
|| bond->getBondType() == Bond::DATIVEL || bond->getBondType() == Bond::DATIVER
|| bond->getBondType() == Bond::NONE) {
continue;
}
auto otherAtom = bond->getOtherAtom(atom);
if (otherAtom->getAtomicNum() > 1) {
float en_diff = parEN - get_en(otherAtom->getAtomicNum());

View File

@@ -29,6 +29,7 @@ namespace Descriptors {
* Calculates the oxidation numbers (states) of the atoms in a molecule
* and stores them in the property _OxidationNumber on the atoms. Uses Pauling
* electronegativies.
* This is experimental code, still under development.
*
* @param mol the molecule of interest
*/

View File

@@ -1828,7 +1828,8 @@ BOOST_PYTHON_MODULE(rdMolDescriptors) {
docString =
"Adds the oxidation number/state to the atoms of a molecule as"
" property OxidationNumber on each atom. Use Pauling"
" electronegativities.";
" electronegativities."
" This is experimental code, still under development.";
python::def("CalcOxidationNumbers", RDKit::Descriptors::calcOxidationNumbers,
(python::arg("mol")), docString.c_str());
#endif

View File

@@ -274,7 +274,7 @@ TEST_CASE("Oxidation numbers") {
std::string file1 =
rdbase + "/Code/GraphMol/MolStandardize/test_data/ferrocene.mol";
std::vector<int> expected{-2, -1, -1, -1, -1, -2, -1,
-1, -1, -1, 2, 1, 1};
-1, -1, -1, 2, 0, 0};
bool takeOwnership = true;
SDMolSupplier mol_supplier(file1, takeOwnership);
std::unique_ptr<ROMol> m1(mol_supplier.next());
@@ -295,7 +295,7 @@ TEST_CASE("Oxidation numbers") {
RWMol m2(*m1);
RDKit::MolOps::Kekulize(m2);
std::vector<unsigned int> ats{0, 5, 10, 13, 14, 19, 20, 21, 42, 43, 44};
std::vector<int> expected{-2, -2, 2, 4, 4, 2, -2, -2, -1, 0, -1};
std::vector<int> expected{-2, -2, 2, 3, 3, 2, -1, -1, -1, 0, -1};
Descriptors::calcOxidationNumbers(m2);
for (unsigned int i = 0; i < ats.size(); ++i) {
auto a = m2.getAtomWithIdx(ats[i]);
@@ -303,6 +303,29 @@ TEST_CASE("Oxidation numbers") {
expected[i]);
}
}
{
std::string file3 =
rdbase + "/Code/GraphMol/MolStandardize/test_data/MOL_00104.mol";
bool takeOwnership = true;
SDMolSupplier mol_supplier(file3, takeOwnership);
std::unique_ptr<ROMol> m1(mol_supplier.next());
REQUIRE(m1);
RWMol m2(*m1);
RDKit::MolOps::Kekulize(m2);
std::vector<int> expected{-3, -1, -2, 0, 2, -3, -1, -2, 0, -1, -1, 2};
Descriptors::calcOxidationNumbers(m2);
for (auto &a : m2.atoms()) {
CHECK(a->getProp<int>(common_properties::OxidationNumber) ==
expected[a->getIdx()]);
}
RDKit::MolOps::hapticBondsToDative(m2);
Descriptors::calcOxidationNumbers(m2);
std::vector<int> expectedNoDummies{-3, -1, -2, 2, -3, -1, -2, -1, -1, 2};
for (auto &a : m2.atoms()) {
CHECK(a->getProp<int>(common_properties::OxidationNumber) ==
expectedNoDummies[a->getIdx()]);
}
}
}
SECTION("Syngenta tests") {
// These are from

View File

@@ -0,0 +1,34 @@
Mrv2219 03302314152D
0 0 0 0 0 999 V3000
M V30 BEGIN CTAB
M V30 COUNTS 12 10 0 0 0
M V30 BEGIN ATOM
M V30 1 C -20.042 -1.7514 0 0 CHG=-1
M V30 2 C -18.7083 -2.5214 0 0
M V30 3 C -17.3746 -1.7514 0 0
M V30 4 * -18.7083 -2.0081 0 0
M V30 5 Pd -18.7083 1.0719 0 0 CHG=2
M V30 6 C -20.042 6.5864 0 0 CHG=-1
M V30 7 C -18.7083 5.8164 0 0
M V30 8 C -17.3746 6.5864 0 0
M V30 9 * -18.7083 6.3298 0 0
M V30 10 Cl -17.6193 2.1608 0 0 CHG=-1
M V30 11 Cl -19.7972 2.1608 0 0 CHG=-1
M V30 12 Pd -18.7083 3.2498 0 0 CHG=2
M V30 END ATOM
M V30 BEGIN BOND
M V30 1 1 1 2
M V30 2 2 2 3
M V30 3 1 6 7
M V30 4 2 7 8
M V30 5 9 10 5
M V30 6 9 11 5
M V30 7 9 10 12
M V30 8 9 11 12
M V30 9 9 9 12 ENDPTS=(3 6 7 8) ATTACH=ALL
M V30 10 9 4 5 ENDPTS=(3 1 2 3) ATTACH=ALL
M V30 END BOND
M V30 END CTAB
M END