mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-03 21:44:30 +08:00
* Fixes #8379 * check in some working tests * test passes * test passes * test passes * test passes * test passes * ensure that the invariants flush the streams on failure * tests pass * test passes * tests pass * tests pass * tests pass * tests pass * tests pass * tests pass * tests pass * tests pass * tests pass * tests pass * tests pass * tests pass * tests pass * tests pass * Fixes #8391 * tests pass * fix a test with legacy not clear why this was not causing problems before * make a test work * Fixes #8396 * gcc builds work * fingerprint tests pass * mention backwards incompatible change * fix a problem with FindMolChiralCenters * more testing details * enable the test status output * Fixes #8432 fix a bug in double-bond stereo handling for template matching * all depictor tests pass * use the new-stereo chiral ranks in the depiction code * always assign new-stereo chiral ranks * make _ChiralAtomRank a computed property This is analogous to _CIPRank * tweak to the way the atom ordering is computed for 2D coordinate generation * update two expected results * backup * response to review * tests pass * tests pass --------- Co-authored-by: = <=>
2427 lines
72 KiB
C++
2427 lines
72 KiB
C++
//
|
|
// Copyright (C) 2023 Novartis Biomedical Research
|
|
//
|
|
// @@ All Rights Reserved @@
|
|
// This file is part of the RDKit.
|
|
// The contents are covered by the terms of the BSD license
|
|
// which is included in the file license.txt, found at the root
|
|
// of the RDKit source tree.
|
|
//
|
|
#include <catch2/catch_all.hpp>
|
|
|
|
#include <GraphMol/RDKitBase.h>
|
|
#include <GraphMol/ROMol.h>
|
|
#include <GraphMol/FileParsers/FileParsers.h>
|
|
#include <GraphMol/SmilesParse/SmilesParse.h>
|
|
#include <GraphMol/SmilesParse/SmilesWrite.h>
|
|
#include <GraphMol/MolStandardize/Pipeline.h>
|
|
#include <GraphMol/MolStandardize/MolStandardize.h>
|
|
#include <GraphMol/Chirality.h>
|
|
#include <GraphMol/test_fixtures.h>
|
|
#include <memory>
|
|
#include <string>
|
|
|
|
using namespace RDKit;
|
|
|
|
TEST_CASE("VALIDATION_ERRORS_WITH_DEFAULT_OPTIONS") {
|
|
MolStandardize::Pipeline pipeline;
|
|
|
|
SECTION("parse error") {
|
|
const char *molblock = R"(
|
|
sldfj;ldskfj sldkjfsd;lkf
|
|
M V30 BEGIN CTAB
|
|
)";
|
|
|
|
MolStandardize::PipelineResult result = pipeline.run(molblock);
|
|
|
|
for (auto &info : result.log) {
|
|
std::cerr << info.status << " " << info.detail << std::endl;
|
|
}
|
|
|
|
REQUIRE(static_cast<MolStandardize::PipelineStage>(result.stage) ==
|
|
MolStandardize::PipelineStage::PARSING_INPUT);
|
|
REQUIRE((result.status & MolStandardize::PipelineStatus::PIPELINE_ERROR) !=
|
|
MolStandardize::PipelineStatus::NO_EVENT);
|
|
REQUIRE(result.status & MolStandardize::PipelineStatus::INPUT_ERROR);
|
|
}
|
|
|
|
SECTION("failing RDKit validation, no atoms") {
|
|
const char *molblock = R"(
|
|
10052313452D
|
|
|
|
0 0 0 0 0 999 V3000
|
|
M V30 BEGIN CTAB
|
|
M V30 COUNTS 0 0 0 0 0
|
|
M V30 END CTAB
|
|
M END
|
|
)";
|
|
|
|
MolStandardize::PipelineResult result = pipeline.run(molblock);
|
|
|
|
for (auto &info : result.log) {
|
|
std::cerr << info.status << " " << info.detail << std::endl;
|
|
}
|
|
|
|
REQUIRE(static_cast<MolStandardize::PipelineStage>(result.stage) ==
|
|
MolStandardize::PipelineStage::COMPLETED);
|
|
REQUIRE((result.status & MolStandardize::PipelineStatus::PIPELINE_ERROR) !=
|
|
MolStandardize::PipelineStatus::NO_EVENT);
|
|
REQUIRE(result.status & MolStandardize::PipelineStatus::VALIDATION_ERROR);
|
|
REQUIRE(result.status &
|
|
MolStandardize::PipelineStatus::BASIC_VALIDATION_ERROR);
|
|
}
|
|
|
|
SECTION("failing RDKit validation, bad valence status") {
|
|
const char *molblock = R"(
|
|
10242314442D
|
|
|
|
0 0 0 0 0 999 V3000
|
|
M V30 BEGIN CTAB
|
|
M V30 COUNTS 5 4 0 0 0
|
|
M V30 BEGIN ATOM
|
|
M V30 1 C -1.6247 7.5825 0 0
|
|
M V30 2 N -2.9583 6.8125 0 0
|
|
M V30 3 C -4.292 7.5825 0 0
|
|
M V30 4 C -2.9583 5.2725 0 0
|
|
M V30 5 C -1.6247 6.0425 0 0
|
|
M V30 END ATOM
|
|
M V30 BEGIN BOND
|
|
M V30 1 1 2 1
|
|
M V30 2 1 2 3
|
|
M V30 3 1 2 4
|
|
M V30 4 1 2 5
|
|
M V30 END BOND
|
|
M V30 END CTAB
|
|
M END
|
|
)";
|
|
|
|
MolStandardize::PipelineResult result = pipeline.run(molblock);
|
|
|
|
for (auto &info : result.log) {
|
|
std::cerr << info.status << " " << info.detail << std::endl;
|
|
}
|
|
|
|
REQUIRE(static_cast<MolStandardize::PipelineStage>(result.stage) ==
|
|
MolStandardize::PipelineStage::COMPLETED);
|
|
REQUIRE((result.status & MolStandardize::PipelineStatus::PIPELINE_ERROR) !=
|
|
MolStandardize::PipelineStatus::NO_EVENT);
|
|
REQUIRE(result.status & MolStandardize::PipelineStatus::VALIDATION_ERROR);
|
|
REQUIRE(
|
|
result.status ==
|
|
(MolStandardize::PipelineStatus::BASIC_VALIDATION_ERROR |
|
|
MolStandardize::PipelineStatus::PREPARE_FOR_STANDARDIZATION_ERROR));
|
|
}
|
|
|
|
SECTION("failing Isotopes validation") {
|
|
const char *molblock = R"(
|
|
Mrv2311 01312409582D
|
|
|
|
0 0 0 0 0 999 V3000
|
|
M V30 BEGIN CTAB
|
|
M V30 COUNTS 2 1 0 0 0
|
|
M V30 BEGIN ATOM
|
|
M V30 1 C -15.3955 7.6033 0 0 MASS=3
|
|
M V30 2 C -16.7292 6.8333 0 0
|
|
M V30 END ATOM
|
|
M V30 BEGIN BOND
|
|
M V30 1 1 2 1
|
|
M V30 END BOND
|
|
M V30 END CTAB
|
|
M END
|
|
)";
|
|
|
|
MolStandardize::PipelineResult result = pipeline.run(molblock);
|
|
|
|
for (auto &info : result.log) {
|
|
std::cerr << info.status << " " << info.detail << std::endl;
|
|
}
|
|
|
|
REQUIRE(static_cast<MolStandardize::PipelineStage>(result.stage) ==
|
|
MolStandardize::PipelineStage::COMPLETED);
|
|
REQUIRE((result.status & MolStandardize::PipelineStatus::PIPELINE_ERROR) !=
|
|
MolStandardize::PipelineStatus::NO_EVENT);
|
|
REQUIRE(result.status & MolStandardize::PipelineStatus::VALIDATION_ERROR);
|
|
REQUIRE((result.status & MolStandardize::PipelineStatus::PIPELINE_ERROR) ==
|
|
MolStandardize::PipelineStatus::BASIC_VALIDATION_ERROR);
|
|
}
|
|
|
|
SECTION("failing features validation, query atom") {
|
|
const char *molblock = R"(
|
|
Mrv2311 01162413552D
|
|
|
|
0 0 0 0 0 999 V3000
|
|
M V30 BEGIN CTAB
|
|
M V30 COUNTS 2 1 0 0 0
|
|
M V30 BEGIN ATOM
|
|
M V30 1 R# -17.3747 6.9367 0 0 RGROUPS=(1 0)
|
|
M V30 2 C -18.7083 6.1667 0 0
|
|
M V30 END ATOM
|
|
M V30 BEGIN BOND
|
|
M V30 1 1 2 1
|
|
M V30 END BOND
|
|
M V30 END CTAB
|
|
M END
|
|
)";
|
|
|
|
MolStandardize::PipelineResult result = pipeline.run(molblock);
|
|
|
|
for (auto &info : result.log) {
|
|
std::cerr << info.status << " " << info.detail << std::endl;
|
|
}
|
|
|
|
REQUIRE(static_cast<MolStandardize::PipelineStage>(result.stage) ==
|
|
MolStandardize::PipelineStage::COMPLETED);
|
|
REQUIRE((result.status & MolStandardize::PipelineStatus::PIPELINE_ERROR) !=
|
|
MolStandardize::PipelineStatus::NO_EVENT);
|
|
REQUIRE(result.status & MolStandardize::PipelineStatus::VALIDATION_ERROR);
|
|
REQUIRE(result.status &
|
|
MolStandardize::PipelineStatus::FEATURES_VALIDATION_ERROR);
|
|
}
|
|
|
|
SECTION("failing features validation, aromatic bonds") {
|
|
const char *molblock = R"(
|
|
Mrv2311 02272411562D
|
|
|
|
0 0 0 0 0 999 V3000
|
|
M V30 BEGIN CTAB
|
|
M V30 COUNTS 7 7 0 0 0
|
|
M V30 BEGIN ATOM
|
|
M V30 1 C -10.3542 4.29 0 0
|
|
M V30 2 C -11.6879 3.52 0 0
|
|
M V30 3 C -11.6879 1.9798 0 0
|
|
M V30 4 N -10.3542 1.21 0 0
|
|
M V30 5 C -9.0204 1.9798 0 0
|
|
M V30 6 C -9.0204 3.52 0 0
|
|
M V30 7 C -10.3542 5.83 0 0
|
|
M V30 END ATOM
|
|
M V30 BEGIN BOND
|
|
M V30 1 4 1 2
|
|
M V30 2 4 1 6
|
|
M V30 3 4 2 3
|
|
M V30 4 4 5 6
|
|
M V30 5 1 1 7
|
|
M V30 6 4 3 4
|
|
M V30 7 4 4 5
|
|
M V30 END BOND
|
|
M V30 END CTAB
|
|
M END
|
|
)";
|
|
|
|
MolStandardize::PipelineResult result = pipeline.run(molblock);
|
|
|
|
for (auto &info : result.log) {
|
|
std::cerr << info.status << " " << info.detail << std::endl;
|
|
}
|
|
|
|
REQUIRE(static_cast<MolStandardize::PipelineStage>(result.stage) ==
|
|
MolStandardize::PipelineStage::COMPLETED);
|
|
REQUIRE((result.status & MolStandardize::PipelineStatus::PIPELINE_ERROR) !=
|
|
MolStandardize::PipelineStatus::NO_EVENT);
|
|
REQUIRE(result.status & MolStandardize::PipelineStatus::VALIDATION_ERROR);
|
|
REQUIRE(result.status &
|
|
MolStandardize::PipelineStatus::FEATURES_VALIDATION_ERROR);
|
|
}
|
|
|
|
SECTION("failing features validation, dative bonds") {
|
|
const char *molblock = R"(
|
|
Mrv2311 07222412542D
|
|
|
|
0 0 0 0 0 999 V3000
|
|
M V30 BEGIN CTAB
|
|
M V30 COUNTS 5 4 0 0 0
|
|
M V30 BEGIN ATOM
|
|
M V30 1 Pt -17.4792 5.75 0 0
|
|
M V30 2 Cl -16.1042 6.8333 0 0
|
|
M V30 3 Cl -16.1875 4.7917 0 0
|
|
M V30 4 N -18.8958 6.8333 0 0
|
|
M V30 5 N -18.8125 4.5833 0 0
|
|
M V30 END ATOM
|
|
M V30 BEGIN BOND
|
|
M V30 1 1 2 1
|
|
M V30 2 1 3 1
|
|
M V30 3 9 4 1
|
|
M V30 4 9 5 1
|
|
M V30 END BOND
|
|
M V30 END CTAB
|
|
M END
|
|
)";
|
|
|
|
MolStandardize::PipelineResult result = pipeline.run(molblock);
|
|
|
|
for (auto &info : result.log) {
|
|
std::cerr << info.status << " " << info.detail << std::endl;
|
|
}
|
|
|
|
REQUIRE(static_cast<MolStandardize::PipelineStage>(result.stage) ==
|
|
MolStandardize::PipelineStage::COMPLETED);
|
|
REQUIRE((result.status & MolStandardize::PipelineStatus::PIPELINE_ERROR) !=
|
|
MolStandardize::PipelineStatus::NO_EVENT);
|
|
REQUIRE(result.status & MolStandardize::PipelineStatus::VALIDATION_ERROR);
|
|
REQUIRE(result.status &
|
|
MolStandardize::PipelineStatus::FEATURES_VALIDATION_ERROR);
|
|
}
|
|
|
|
SECTION("failing features validation, enhanced stereo") {
|
|
const char *molblock = R"(
|
|
Mrv2311 01162411552D
|
|
|
|
0 0 0 0 0 999 V3000
|
|
M V30 BEGIN CTAB
|
|
M V30 COUNTS 4 3 0 0 0
|
|
M V30 BEGIN ATOM
|
|
M V30 1 C -18.208 8.52 0 0 CFG=2
|
|
M V30 2 F -19.5417 7.75 0 0
|
|
M V30 3 C -16.8743 7.75 0 0
|
|
M V30 4 Cl -18.208 10.06 0 0
|
|
M V30 END ATOM
|
|
M V30 BEGIN BOND
|
|
M V30 1 1 1 3 CFG=1
|
|
M V30 2 1 2 1
|
|
M V30 3 1 1 4
|
|
M V30 END BOND
|
|
M V30 BEGIN COLLECTION
|
|
M V30 MDLV30/STERAC1 ATOMS=(1 1)
|
|
M V30 END COLLECTION
|
|
M V30 END CTAB
|
|
M END
|
|
)";
|
|
|
|
MolStandardize::PipelineResult result = pipeline.run(molblock);
|
|
|
|
for (auto &info : result.log) {
|
|
std::cerr << info.status << " " << info.detail << std::endl;
|
|
}
|
|
|
|
REQUIRE(static_cast<MolStandardize::PipelineStage>(result.stage) ==
|
|
MolStandardize::PipelineStage::COMPLETED);
|
|
REQUIRE((result.status & MolStandardize::PipelineStatus::PIPELINE_ERROR) !=
|
|
MolStandardize::PipelineStatus::NO_EVENT);
|
|
REQUIRE(result.status & MolStandardize::PipelineStatus::VALIDATION_ERROR);
|
|
REQUIRE(result.status &
|
|
MolStandardize::PipelineStatus::FEATURES_VALIDATION_ERROR);
|
|
}
|
|
|
|
SECTION("failing radical validation, disallowed radical") {
|
|
const char *molblock = R"(
|
|
Mrv2311 02082417212D
|
|
|
|
0 0 0 0 0 999 V3000
|
|
M V30 BEGIN CTAB
|
|
M V30 COUNTS 2 1 0 0 0
|
|
M V30 BEGIN ATOM
|
|
M V30 1 C -20.9372 7.145 0 0 RAD=2
|
|
M V30 2 C -22.2708 6.375 0 0
|
|
M V30 END ATOM
|
|
M V30 BEGIN BOND
|
|
M V30 1 1 2 1
|
|
M V30 END BOND
|
|
M V30 END CTAB
|
|
M END
|
|
)";
|
|
|
|
MolStandardize::PipelineResult result = pipeline.run(molblock);
|
|
|
|
for (auto &info : result.log) {
|
|
std::cerr << info.status << " " << info.detail << std::endl;
|
|
}
|
|
|
|
REQUIRE(static_cast<MolStandardize::PipelineStage>(result.stage) ==
|
|
MolStandardize::PipelineStage::COMPLETED);
|
|
REQUIRE((result.status & MolStandardize::PipelineStatus::PIPELINE_ERROR) !=
|
|
MolStandardize::PipelineStatus::NO_EVENT);
|
|
REQUIRE(result.status & MolStandardize::PipelineStatus::VALIDATION_ERROR);
|
|
REQUIRE(result.status &
|
|
MolStandardize::PipelineStatus::BASIC_VALIDATION_ERROR);
|
|
}
|
|
|
|
SECTION("failing 2D validation, non-null Z coords") {
|
|
const char *molblock = R"(
|
|
2D
|
|
|
|
0 0 0 0 0 999 V3000
|
|
M V30 BEGIN CTAB
|
|
M V30 COUNTS 2 1 0 0 0
|
|
M V30 BEGIN ATOM
|
|
M V30 1 C 0.8753 4.9367 0 0
|
|
M V30 2 C -0.4583 4.1667 0.2 0
|
|
M V30 END ATOM
|
|
M V30 BEGIN BOND
|
|
M V30 1 1 2 1
|
|
M V30 END BOND
|
|
M V30 END CTAB
|
|
M END
|
|
)";
|
|
|
|
MolStandardize::PipelineResult result = pipeline.run(molblock);
|
|
|
|
for (auto &info : result.log) {
|
|
std::cerr << info.status << " " << info.detail << std::endl;
|
|
}
|
|
|
|
REQUIRE(static_cast<MolStandardize::PipelineStage>(result.stage) ==
|
|
MolStandardize::PipelineStage::COMPLETED);
|
|
REQUIRE((result.status & MolStandardize::PipelineStatus::PIPELINE_ERROR) !=
|
|
MolStandardize::PipelineStatus::NO_EVENT);
|
|
REQUIRE(result.status & MolStandardize::PipelineStatus::VALIDATION_ERROR);
|
|
REQUIRE(result.status &
|
|
MolStandardize::PipelineStatus::IS2D_VALIDATION_ERROR);
|
|
}
|
|
|
|
SECTION("failing validation, clashing atoms") {
|
|
const char *molblock = R"(
|
|
2D
|
|
|
|
0 0 0 0 0 999 V3000
|
|
M V30 BEGIN CTAB
|
|
M V30 COUNTS 4 3 0 0 0
|
|
M V30 BEGIN ATOM
|
|
M V30 1 C -3.05 5.48 0 0
|
|
M V30 2 C -4.4167 4.6875 0 0
|
|
M V30 3 C -4.3289 6.3627 0 0
|
|
M V30 4 C -3.0 5.5 0 0
|
|
M V30 END ATOM
|
|
M V30 BEGIN BOND
|
|
M V30 1 1 2 1
|
|
M V30 2 1 1 3
|
|
M V30 3 1 3 4
|
|
M V30 END BOND
|
|
M V30 END CTAB
|
|
M END
|
|
)";
|
|
|
|
MolStandardize::PipelineResult result = pipeline.run(molblock);
|
|
|
|
for (auto &info : result.log) {
|
|
std::cerr << info.status << " " << info.detail << std::endl;
|
|
}
|
|
|
|
REQUIRE(static_cast<MolStandardize::PipelineStage>(result.stage) ==
|
|
MolStandardize::PipelineStage::COMPLETED);
|
|
REQUIRE((result.status & MolStandardize::PipelineStatus::PIPELINE_ERROR) !=
|
|
MolStandardize::PipelineStatus::NO_EVENT);
|
|
REQUIRE(result.status & MolStandardize::PipelineStatus::VALIDATION_ERROR);
|
|
REQUIRE(result.status &
|
|
MolStandardize::PipelineStatus::LAYOUT2D_VALIDATION_ERROR);
|
|
}
|
|
|
|
SECTION("failing validation, abnormally long bonds") {
|
|
const char *molblock = R"(
|
|
01112413352D
|
|
|
|
0 0 0 0 0 999 V3000
|
|
M V30 BEGIN CTAB
|
|
M V30 COUNTS 4 4 0 0 0
|
|
M V30 BEGIN ATOM
|
|
M V30 1 C -28.1663 10.4367 0 0
|
|
M V30 2 C -29.5 9.6667 0 0
|
|
M V30 3 C -29.5 11.2067 0 0
|
|
M V30 4 F 150.0 10.4367 0 0
|
|
M V30 END ATOM
|
|
M V30 BEGIN BOND
|
|
M V30 1 1 2 1
|
|
M V30 2 1 2 3
|
|
M V30 3 1 3 1
|
|
M V30 4 1 1 4
|
|
M V30 END BOND
|
|
M V30 END CTAB
|
|
M END
|
|
)";
|
|
|
|
MolStandardize::PipelineResult result = pipeline.run(molblock);
|
|
|
|
for (auto &info : result.log) {
|
|
std::cerr << info.status << " " << info.detail << std::endl;
|
|
}
|
|
|
|
REQUIRE(static_cast<MolStandardize::PipelineStage>(result.stage) ==
|
|
MolStandardize::PipelineStage::COMPLETED);
|
|
REQUIRE((result.status & MolStandardize::PipelineStatus::PIPELINE_ERROR) !=
|
|
MolStandardize::PipelineStatus::NO_EVENT);
|
|
REQUIRE(result.status & MolStandardize::PipelineStatus::VALIDATION_ERROR);
|
|
REQUIRE(result.status &
|
|
MolStandardize::PipelineStatus::LAYOUT2D_VALIDATION_ERROR);
|
|
|
|
molblock = R"(
|
|
Mrv2311 02222409302D
|
|
|
|
0 0 0 0 0 999 V3000
|
|
M V30 BEGIN CTAB
|
|
M V30 COUNTS 17 17 0 0 0
|
|
M V30 BEGIN ATOM
|
|
M V30 1 C -9.0205 2.1033 0 0
|
|
M V30 2 C -10.3542 1.3333 0 0
|
|
M V30 3 C -7.4805 2.1033 0 0
|
|
M V30 4 C -5.9405 2.1033 0 0
|
|
M V30 5 C -4.4005 2.1033 0 0
|
|
M V30 6 C -2.8605 2.1033 0 0
|
|
M V30 7 C -1.3205 2.1033 0 0
|
|
M V30 8 C 0.2195 2.1033 0 0
|
|
M V30 9 C 1.7595 2.1033 0 0
|
|
M V30 10 C 3.2995 2.1033 0 0
|
|
M V30 11 C 4.8395 2.1033 0 0
|
|
M V30 12 C 6.3795 2.1033 0 0
|
|
M V30 13 C 7.9195 2.1033 0 0
|
|
M V30 14 C 9.4595 2.1033 0 0
|
|
M V30 15 C 10.9995 2.1033 0 0
|
|
M V30 16 C 12.5395 2.1033 0 0
|
|
M V30 17 C 13.7854 1.1981 0 0
|
|
M V30 END ATOM
|
|
M V30 BEGIN BOND
|
|
M V30 1 1 2 1
|
|
M V30 2 1 1 3
|
|
M V30 3 1 3 4
|
|
M V30 4 1 4 5
|
|
M V30 5 1 5 6
|
|
M V30 6 1 6 7
|
|
M V30 7 1 7 8
|
|
M V30 8 1 8 9
|
|
M V30 9 1 9 10
|
|
M V30 10 1 10 11
|
|
M V30 11 1 11 12
|
|
M V30 12 1 12 13
|
|
M V30 13 1 13 14
|
|
M V30 14 1 14 15
|
|
M V30 15 1 15 16
|
|
M V30 16 1 16 17
|
|
M V30 17 1 17 2
|
|
M V30 END BOND
|
|
M V30 END CTAB
|
|
M END
|
|
)";
|
|
|
|
result = pipeline.run(molblock);
|
|
|
|
for (auto &info : result.log) {
|
|
std::cerr << info.status << " " << info.detail << std::endl;
|
|
}
|
|
|
|
REQUIRE(static_cast<MolStandardize::PipelineStage>(result.stage) ==
|
|
MolStandardize::PipelineStage::COMPLETED);
|
|
// long bonds in rings are by defaul allowed
|
|
REQUIRE((result.status & MolStandardize::PipelineStatus::PIPELINE_ERROR) ==
|
|
MolStandardize::PipelineStatus::NO_EVENT);
|
|
}
|
|
|
|
SECTION("failing stereo validation, too many stereo bonds (3 subst. case)") {
|
|
const char *molblock = R"(
|
|
2D
|
|
|
|
0 0 0 0 0 999 V3000
|
|
M V30 BEGIN CTAB
|
|
M V30 COUNTS 4 3 0 0 0
|
|
M V30 BEGIN ATOM
|
|
M V30 1 C -1.583 5.7075 0 0 CFG=1
|
|
M V30 2 C -2.9167 4.9375 0 0
|
|
M V30 3 C -1.583 7.2475 0 0
|
|
M V30 4 C -0.2493 4.9375 0 0
|
|
M V30 END ATOM
|
|
M V30 BEGIN BOND
|
|
M V30 1 1 1 2 CFG=1
|
|
M V30 2 1 1 3 CFG=1
|
|
M V30 3 1 1 4
|
|
M V30 END BOND
|
|
M V30 END CTAB
|
|
M END
|
|
)";
|
|
|
|
MolStandardize::PipelineResult result = pipeline.run(molblock);
|
|
|
|
for (auto &info : result.log) {
|
|
std::cerr << info.status << " " << info.detail << std::endl;
|
|
}
|
|
|
|
REQUIRE(static_cast<MolStandardize::PipelineStage>(result.stage) ==
|
|
MolStandardize::PipelineStage::COMPLETED);
|
|
REQUIRE((result.status & MolStandardize::PipelineStatus::PIPELINE_ERROR) !=
|
|
MolStandardize::PipelineStatus::NO_EVENT);
|
|
REQUIRE(result.status & MolStandardize::PipelineStatus::VALIDATION_ERROR);
|
|
REQUIRE(result.status &
|
|
MolStandardize::PipelineStatus::STEREO_VALIDATION_ERROR);
|
|
}
|
|
|
|
SECTION(
|
|
"failing stereo validation, adjacent stereo bonds w/ like direction (4 subst. case)") {
|
|
const char *molblock = R"(
|
|
2D
|
|
|
|
0 0 0 0 0 999 V3000
|
|
M V30 BEGIN CTAB
|
|
M V30 COUNTS 5 4 0 0 0
|
|
M V30 BEGIN ATOM
|
|
M V30 1 C -1.583 5.7075 0 0
|
|
M V30 2 C -2.9167 4.9375 0 0
|
|
M V30 3 C -1.583 7.2475 0 0
|
|
M V30 4 C -0.2493 4.9375 0 0
|
|
M V30 5 C -1.583 4.1675 0 0
|
|
M V30 END ATOM
|
|
M V30 BEGIN BOND
|
|
M V30 1 1 1 2 CFG=1
|
|
M V30 2 1 1 3 CFG=1
|
|
M V30 3 1 1 4
|
|
M V30 4 1 1 5
|
|
M V30 END BOND
|
|
M V30 END CTAB
|
|
M END
|
|
)";
|
|
|
|
MolStandardize::PipelineResult result = pipeline.run(molblock);
|
|
|
|
for (auto &info : result.log) {
|
|
std::cerr << info.status << " " << info.detail << std::endl;
|
|
}
|
|
|
|
REQUIRE(static_cast<MolStandardize::PipelineStage>(result.stage) ==
|
|
MolStandardize::PipelineStage::COMPLETED);
|
|
REQUIRE((result.status & MolStandardize::PipelineStatus::PIPELINE_ERROR) !=
|
|
MolStandardize::PipelineStatus::NO_EVENT);
|
|
REQUIRE(result.status & MolStandardize::PipelineStatus::VALIDATION_ERROR);
|
|
REQUIRE(result.status &
|
|
MolStandardize::PipelineStatus::STEREO_VALIDATION_ERROR);
|
|
}
|
|
|
|
SECTION(
|
|
"failing validation, not 2D + adjacent stereo bonds w/ like direction (4 subst. case)") {
|
|
const char *molblock = R"(
|
|
2D
|
|
|
|
0 0 0 0 0 999 V3000
|
|
M V30 BEGIN CTAB
|
|
M V30 COUNTS 5 4 0 0 0
|
|
M V30 BEGIN ATOM
|
|
M V30 1 C -1.583 5.7075 0 0
|
|
M V30 2 C -2.9167 4.9375 0 0
|
|
M V30 3 C -1.583 7.2475 0 0
|
|
M V30 4 C -0.2493 4.9375 0.5 0
|
|
M V30 5 C -1.583 4.1675 0 0
|
|
M V30 END ATOM
|
|
M V30 BEGIN BOND
|
|
M V30 1 1 1 2 CFG=1
|
|
M V30 2 1 1 3 CFG=1
|
|
M V30 3 1 1 4
|
|
M V30 4 1 1 5
|
|
M V30 END BOND
|
|
M V30 END CTAB
|
|
M END
|
|
)";
|
|
|
|
MolStandardize::PipelineResult result = pipeline.run(molblock);
|
|
|
|
for (auto &info : result.log) {
|
|
std::cerr << info.status << " " << info.detail << std::endl;
|
|
}
|
|
|
|
REQUIRE(static_cast<MolStandardize::PipelineStage>(result.stage) ==
|
|
MolStandardize::PipelineStage::COMPLETED);
|
|
REQUIRE((result.status & MolStandardize::PipelineStatus::PIPELINE_ERROR) !=
|
|
MolStandardize::PipelineStatus::NO_EVENT);
|
|
REQUIRE(result.status & MolStandardize::PipelineStatus::VALIDATION_ERROR);
|
|
REQUIRE(result.status &
|
|
MolStandardize::PipelineStatus::IS2D_VALIDATION_ERROR);
|
|
REQUIRE(result.status &
|
|
MolStandardize::PipelineStatus::STEREO_VALIDATION_ERROR);
|
|
}
|
|
}
|
|
|
|
TEST_CASE("VALIDATION_WITH_ALLOW_EMPTY_MOLS_OPTION") {
|
|
MolStandardize::PipelineOptions options;
|
|
options.allowEmptyMolecules = true;
|
|
MolStandardize::Pipeline pipeline(options);
|
|
|
|
SECTION("no atoms produces no error") {
|
|
const char *molblock = R"(
|
|
10052313452D
|
|
|
|
0 0 0 0 0 999 V3000
|
|
M V30 BEGIN CTAB
|
|
M V30 COUNTS 0 0 0 0 0
|
|
M V30 END CTAB
|
|
M END
|
|
)";
|
|
|
|
MolStandardize::PipelineResult result = pipeline.run(molblock);
|
|
|
|
for (auto &info : result.log) {
|
|
std::cerr << info.status << " " << info.detail << std::endl;
|
|
}
|
|
|
|
REQUIRE(static_cast<MolStandardize::PipelineStage>(result.stage) ==
|
|
MolStandardize::PipelineStage::COMPLETED);
|
|
REQUIRE(result.status == MolStandardize::PipelineStatus::NO_EVENT);
|
|
}
|
|
}
|
|
|
|
TEST_CASE("VALIDATION_WITH_DISALLOWED_LONG_BONDS_IN_RINGS") {
|
|
MolStandardize::PipelineOptions options;
|
|
options.bondLengthLimit = 10.; // adapted to test structure
|
|
options.allowLongBondsInRings = false;
|
|
MolStandardize::Pipeline pipeline(options);
|
|
|
|
SECTION("report long bonds in rings") {
|
|
const char *molblock = R"(
|
|
Mrv2311 02222409302D
|
|
|
|
0 0 0 0 0 999 V3000
|
|
M V30 BEGIN CTAB
|
|
M V30 COUNTS 17 17 0 0 0
|
|
M V30 BEGIN ATOM
|
|
M V30 1 C -9.0205 2.1033 0 0
|
|
M V30 2 C -10.3542 1.3333 0 0
|
|
M V30 3 C -7.4805 2.1033 0 0
|
|
M V30 4 C -5.9405 2.1033 0 0
|
|
M V30 5 C -4.4005 2.1033 0 0
|
|
M V30 6 C -2.8605 2.1033 0 0
|
|
M V30 7 C -1.3205 2.1033 0 0
|
|
M V30 8 C 0.2195 2.1033 0 0
|
|
M V30 9 C 1.7595 2.1033 0 0
|
|
M V30 10 C 3.2995 2.1033 0 0
|
|
M V30 11 C 4.8395 2.1033 0 0
|
|
M V30 12 C 6.3795 2.1033 0 0
|
|
M V30 13 C 7.9195 2.1033 0 0
|
|
M V30 14 C 9.4595 2.1033 0 0
|
|
M V30 15 C 10.9995 2.1033 0 0
|
|
M V30 16 C 12.5395 2.1033 0 0
|
|
M V30 17 C 13.7854 1.1981 0 0
|
|
M V30 END ATOM
|
|
M V30 BEGIN BOND
|
|
M V30 1 1 2 1
|
|
M V30 2 1 1 3
|
|
M V30 3 1 3 4
|
|
M V30 4 1 4 5
|
|
M V30 5 1 5 6
|
|
M V30 6 1 6 7
|
|
M V30 7 1 7 8
|
|
M V30 8 1 8 9
|
|
M V30 9 1 9 10
|
|
M V30 10 1 10 11
|
|
M V30 11 1 11 12
|
|
M V30 12 1 12 13
|
|
M V30 13 1 13 14
|
|
M V30 14 1 14 15
|
|
M V30 15 1 15 16
|
|
M V30 16 1 16 17
|
|
M V30 17 1 17 2
|
|
M V30 END BOND
|
|
M V30 END CTAB
|
|
M END
|
|
)";
|
|
|
|
MolStandardize::PipelineResult result = pipeline.run(molblock);
|
|
|
|
for (auto &info : result.log) {
|
|
std::cerr << info.status << " " << info.detail << std::endl;
|
|
}
|
|
|
|
REQUIRE(static_cast<MolStandardize::PipelineStage>(result.stage) ==
|
|
MolStandardize::PipelineStage::COMPLETED);
|
|
REQUIRE((result.status & MolStandardize::PipelineStatus::PIPELINE_ERROR) !=
|
|
MolStandardize::PipelineStatus::NO_EVENT);
|
|
REQUIRE(result.status & MolStandardize::PipelineStatus::VALIDATION_ERROR);
|
|
REQUIRE(result.status &
|
|
MolStandardize::PipelineStatus::LAYOUT2D_VALIDATION_ERROR);
|
|
}
|
|
}
|
|
|
|
TEST_CASE("STANDARDIZATION") {
|
|
MolStandardize::Pipeline pipeline;
|
|
|
|
SECTION("disconnect metal") {
|
|
const char *molblock = R"(
|
|
10282320572D
|
|
|
|
0 0 0 0 0 999 V3000
|
|
M V30 BEGIN CTAB
|
|
M V30 COUNTS 5 4 0 0 0
|
|
M V30 BEGIN ATOM
|
|
M V30 1 C -1.0413 5.4992 0 0
|
|
M V30 2 C -2.375 4.7292 0 0
|
|
M V30 3 O -1.0413 7.0392 0 0
|
|
M V30 4 O 0.2924 4.7292 0 0
|
|
M V30 5 Na 0.2924 3.1892 0 0
|
|
M V30 END ATOM
|
|
M V30 BEGIN BOND
|
|
M V30 1 1 2 1
|
|
M V30 2 1 1 4
|
|
M V30 3 2 1 3
|
|
M V30 4 1 4 5
|
|
M V30 END BOND
|
|
M V30 END CTAB
|
|
M END
|
|
)";
|
|
|
|
MolStandardize::PipelineResult result = pipeline.run(molblock);
|
|
|
|
for (auto &info : result.log) {
|
|
std::cerr << info.status << " " << info.detail << std::endl;
|
|
}
|
|
|
|
REQUIRE(static_cast<MolStandardize::PipelineStage>(result.stage) ==
|
|
MolStandardize::PipelineStage::COMPLETED);
|
|
REQUIRE((result.status & MolStandardize::PipelineStatus::PIPELINE_ERROR) ==
|
|
MolStandardize::PipelineStatus::NO_EVENT);
|
|
REQUIRE((result.status &
|
|
MolStandardize::PipelineStatus::STRUCTURE_MODIFICATION) !=
|
|
MolStandardize::PipelineStatus::NO_EVENT);
|
|
REQUIRE(result.status &
|
|
MolStandardize::PipelineStatus::METALS_DISCONNECTED);
|
|
|
|
std::unique_ptr<RWMol> mol(
|
|
MolBlockToMol(result.outputMolData, false, false));
|
|
REQUIRE(mol);
|
|
|
|
std::unique_ptr<RWMol> parentMol(
|
|
MolBlockToMol(result.parentMolData, false, false));
|
|
REQUIRE(parentMol);
|
|
std::string parentSmiles{MolToSmiles(*parentMol)};
|
|
REQUIRE(parentSmiles == "CC(=O)O");
|
|
|
|
std::unique_ptr<RWMol> outputMol(
|
|
MolBlockToMol(result.outputMolData, false, false));
|
|
REQUIRE(outputMol);
|
|
std::string outputSmiles{MolToSmiles(*outputMol)};
|
|
REQUIRE(outputSmiles == "CC(=O)O");
|
|
}
|
|
|
|
SECTION("normalize nitro") {
|
|
const char *molblock = R"(
|
|
10282320572D
|
|
|
|
0 0 0 0 0 999 V3000
|
|
M V30 BEGIN CTAB
|
|
M V30 COUNTS 4 3 0 0 0
|
|
M V30 BEGIN ATOM
|
|
M V30 1 N -1.0413 5.4992 0 0
|
|
M V30 2 C -2.375 4.7292 0 0
|
|
M V30 3 O -1.0413 7.0392 0 0
|
|
M V30 4 O 0.2924 4.7292 0 0
|
|
M V30 END ATOM
|
|
M V30 BEGIN BOND
|
|
M V30 1 1 2 1
|
|
M V30 2 2 1 4
|
|
M V30 3 2 1 3
|
|
M V30 END BOND
|
|
M V30 END CTAB
|
|
M END
|
|
)";
|
|
|
|
MolStandardize::PipelineResult result = pipeline.run(molblock);
|
|
|
|
for (auto &info : result.log) {
|
|
std::cerr << info.status << " " << info.detail << std::endl;
|
|
}
|
|
|
|
// nitro groups are cleaned-up in a pre-standardization sanitization step.
|
|
REQUIRE(static_cast<MolStandardize::PipelineStage>(result.stage) ==
|
|
MolStandardize::PipelineStage::COMPLETED);
|
|
REQUIRE((result.status & MolStandardize::PipelineStatus::PIPELINE_ERROR) ==
|
|
MolStandardize::PipelineStatus::NO_EVENT);
|
|
REQUIRE((result.status &
|
|
MolStandardize::PipelineStatus::STRUCTURE_MODIFICATION) ==
|
|
MolStandardize::PipelineStatus::NO_EVENT);
|
|
|
|
std::unique_ptr<RWMol> mol(
|
|
MolBlockToMol(result.outputMolData, false, false));
|
|
REQUIRE(mol);
|
|
|
|
std::string smiles{MolToSmiles(*mol)};
|
|
REQUIRE(smiles == "C[N+](=O)[O-]");
|
|
}
|
|
|
|
SECTION("Phosphate normalization") {
|
|
const char *molblock_a = R"(
|
|
Mrv2311 04152413292D
|
|
|
|
0 0 0 0 0 999 V3000
|
|
M V30 BEGIN CTAB
|
|
M V30 COUNTS 5 4 0 0 0
|
|
M V30 BEGIN ATOM
|
|
M V30 1 P -15.6247 3.9575 0 0 CHG=1
|
|
M V30 2 C -16.9583 3.1875 0 0
|
|
M V30 3 O -15.6247 5.4975 0 0 CHG=-1
|
|
M V30 4 S -14.291 3.1875 0 0 CHG=-1
|
|
M V30 5 C -15.6247 2.4175 0 0
|
|
M V30 END ATOM
|
|
M V30 BEGIN BOND
|
|
M V30 1 1 2 1
|
|
M V30 2 1 1 5
|
|
M V30 3 1 1 3
|
|
M V30 4 1 1 4
|
|
M V30 END BOND
|
|
M V30 END CTAB
|
|
M END
|
|
)";
|
|
|
|
MolStandardize::PipelineResult result_a = pipeline.run(molblock_a);
|
|
|
|
for (auto &info : result_a.log) {
|
|
std::cerr << info.status << " " << info.detail << std::endl;
|
|
}
|
|
|
|
// this test case is not expected to register any errors.
|
|
REQUIRE(static_cast<MolStandardize::PipelineStage>(result_a.stage) ==
|
|
MolStandardize::PipelineStage::COMPLETED);
|
|
REQUIRE(
|
|
(result_a.status & MolStandardize::PipelineStatus::PIPELINE_ERROR) ==
|
|
MolStandardize::PipelineStatus::NO_EVENT);
|
|
REQUIRE((result_a.status &
|
|
MolStandardize::PipelineStatus::STRUCTURE_MODIFICATION) !=
|
|
MolStandardize::PipelineStatus::NO_EVENT);
|
|
REQUIRE(result_a.status &
|
|
MolStandardize::PipelineStatus::NORMALIZATION_APPLIED);
|
|
REQUIRE(result_a.status &
|
|
MolStandardize::PipelineStatus::PROTONATION_CHANGED);
|
|
|
|
std::unique_ptr<RWMol> mol_a(
|
|
MolBlockToMol(result_a.outputMolData, false, false));
|
|
REQUIRE(mol_a);
|
|
|
|
std::string smiles_a{MolToSmiles(*mol_a)};
|
|
REQUIRE(smiles_a == "CP(C)(=O)S");
|
|
|
|
const char *molblock_b = R"(
|
|
Mrv2311 04152413292D
|
|
|
|
0 0 0 0 0 999 V3000
|
|
M V30 BEGIN CTAB
|
|
M V30 COUNTS 5 4 0 0 0
|
|
M V30 BEGIN ATOM
|
|
M V30 1 P -15.6247 3.9575 0 0 CHG=1
|
|
M V30 2 C -16.9583 3.1875 0 0
|
|
M V30 3 S -15.6247 5.4975 0 0 CHG=-1
|
|
M V30 4 O -14.291 3.1875 0 0 CHG=-1
|
|
M V30 5 C -15.6247 2.4175 0 0
|
|
M V30 END ATOM
|
|
M V30 BEGIN BOND
|
|
M V30 1 1 2 1
|
|
M V30 2 1 1 5
|
|
M V30 3 1 1 3
|
|
M V30 4 1 1 4
|
|
M V30 END BOND
|
|
M V30 END CTAB
|
|
M END
|
|
)";
|
|
|
|
MolStandardize::PipelineResult result_b = pipeline.run(molblock_b);
|
|
|
|
for (auto &info : result_b.log) {
|
|
std::cerr << info.status << " " << info.detail << std::endl;
|
|
}
|
|
|
|
// this test case is not expected to register any errors.
|
|
REQUIRE(static_cast<MolStandardize::PipelineStage>(result_b.stage) ==
|
|
MolStandardize::PipelineStage::COMPLETED);
|
|
REQUIRE(
|
|
(result_b.status & MolStandardize::PipelineStatus::PIPELINE_ERROR) ==
|
|
MolStandardize::PipelineStatus::NO_EVENT);
|
|
REQUIRE((result_b.status &
|
|
MolStandardize::PipelineStatus::STRUCTURE_MODIFICATION) !=
|
|
MolStandardize::PipelineStatus::NO_EVENT);
|
|
REQUIRE(result_b.status &
|
|
MolStandardize::PipelineStatus::NORMALIZATION_APPLIED);
|
|
REQUIRE(result_b.status &
|
|
MolStandardize::PipelineStatus::PROTONATION_CHANGED);
|
|
|
|
std::unique_ptr<RWMol> mol_b(
|
|
MolBlockToMol(result_b.outputMolData, false, false));
|
|
REQUIRE(mol_b);
|
|
|
|
std::string smiles_b{MolToSmiles(*mol_b)};
|
|
REQUIRE(smiles_b == "CP(C)(=O)S");
|
|
}
|
|
|
|
SECTION("normalize w/ RDKit's default normalizer transformations") {
|
|
const char *molblock = R"(
|
|
Mrv2311 03112410152D
|
|
|
|
0 0 0 0 0 999 V3000
|
|
M V30 BEGIN CTAB
|
|
M V30 COUNTS 4 3 0 0 0
|
|
M V30 BEGIN ATOM
|
|
M V30 1 S -20.083 2.4575 0 0
|
|
M V30 2 C -21.4167 1.6875 0 0
|
|
M V30 3 O -20.083 3.9975 0 0
|
|
M V30 4 C -18.7493 1.6875 0 0
|
|
M V30 END ATOM
|
|
M V30 BEGIN BOND
|
|
M V30 1 1 2 1
|
|
M V30 2 1 1 4
|
|
M V30 3 2 1 3
|
|
M V30 END BOND
|
|
M V30 END CTAB
|
|
M END
|
|
)";
|
|
|
|
MolStandardize::PipelineOptions options;
|
|
options.normalizerData = "";
|
|
MolStandardize::Pipeline customizedPipeline(options);
|
|
|
|
MolStandardize::PipelineResult result = customizedPipeline.run(molblock);
|
|
|
|
for (auto &info : result.log) {
|
|
std::cerr << info.status << " " << info.detail << std::endl;
|
|
}
|
|
|
|
REQUIRE(static_cast<MolStandardize::PipelineStage>(result.stage) ==
|
|
MolStandardize::PipelineStage::COMPLETED);
|
|
REQUIRE((result.status & MolStandardize::PipelineStatus::PIPELINE_ERROR) ==
|
|
MolStandardize::PipelineStatus::NO_EVENT);
|
|
REQUIRE((result.status &
|
|
MolStandardize::PipelineStatus::STRUCTURE_MODIFICATION) !=
|
|
MolStandardize::PipelineStatus::NO_EVENT);
|
|
REQUIRE(result.status &
|
|
MolStandardize::PipelineStatus::NORMALIZATION_APPLIED);
|
|
|
|
std::unique_ptr<RWMol> mol(
|
|
MolBlockToMol(result.outputMolData, false, false));
|
|
REQUIRE(mol);
|
|
|
|
std::string smiles{MolToSmiles(*mol)};
|
|
REQUIRE(smiles == "C[S+](C)[O-]");
|
|
}
|
|
|
|
SECTION(
|
|
"normalization of 1,3- 1,5- conjugated systems favors application within rings") {
|
|
const char *molblock{};
|
|
MolStandardize::PipelineResult result;
|
|
std::unique_ptr<RWMol> parentMol;
|
|
std::string parentSmiles;
|
|
|
|
// 1,3- conjugated cation - test a first ctab permutation
|
|
molblock = R"(
|
|
RDKit 2D
|
|
|
|
0 0 0 0 0 0 0 0 0 0999 V3000
|
|
M V30 BEGIN CTAB
|
|
M V30 COUNTS 8 8 0 0 0
|
|
M V30 BEGIN ATOM
|
|
M V30 1 N -11.833300 8.935800 0.000000 0 CHG=1
|
|
M V30 2 C -13.167100 8.165800 0.000000 0
|
|
M V30 3 C -11.833300 5.855800 0.000000 0
|
|
M V30 4 C -10.499600 6.625600 0.000000 0
|
|
M V30 5 C -10.499600 8.165800 0.000000 0
|
|
M V30 6 C -11.833300 10.475800 0.000000 0
|
|
M V30 7 N -13.167100 6.625600 0.000000 0
|
|
M V30 8 N -14.500700 8.935800 0.000000 0
|
|
M V30 END ATOM
|
|
M V30 BEGIN BOND
|
|
M V30 1 1 3 4
|
|
M V30 2 1 4 5
|
|
M V30 3 2 1 2
|
|
M V30 4 1 1 5
|
|
M V30 5 1 1 6
|
|
M V30 6 1 2 8
|
|
M V30 7 1 2 7
|
|
M V30 8 1 7 3
|
|
M V30 END BOND
|
|
M V30 END CTAB
|
|
M END
|
|
)";
|
|
|
|
result = pipeline.run(molblock);
|
|
|
|
for (auto &info : result.log) {
|
|
std::cerr << info.status << " " << info.detail << std::endl;
|
|
}
|
|
|
|
REQUIRE(static_cast<MolStandardize::PipelineStage>(result.stage) ==
|
|
MolStandardize::PipelineStage::COMPLETED);
|
|
REQUIRE((result.status & MolStandardize::PipelineStatus::PIPELINE_ERROR) ==
|
|
MolStandardize::PipelineStatus::NO_EVENT);
|
|
REQUIRE((result.status &
|
|
MolStandardize::PipelineStatus::STRUCTURE_MODIFICATION) ==
|
|
(MolStandardize::PipelineStatus::NORMALIZATION_APPLIED |
|
|
MolStandardize::PipelineStatus::PROTONATION_CHANGED));
|
|
|
|
parentMol.reset(MolBlockToMol(result.parentMolData, false, false));
|
|
REQUIRE(parentMol);
|
|
parentSmiles = MolToSmiles(*parentMol);
|
|
REQUIRE(parentSmiles == "CN1CCCN=C1N");
|
|
|
|
// 1,3- conjugated cation - test a second ctab permutation
|
|
molblock = R"(
|
|
RDKit 2D
|
|
|
|
0 0 0 0 0 0 0 0 0 0999 V3000
|
|
M V30 BEGIN CTAB
|
|
M V30 COUNTS 8 8 0 0 0
|
|
M V30 BEGIN ATOM
|
|
M V30 1 N -11.833300 8.935800 0.000000 0 CHG=1
|
|
M V30 2 C -13.167100 8.165800 0.000000 0
|
|
M V30 3 C -11.833300 5.855800 0.000000 0
|
|
M V30 4 C -10.499600 6.625600 0.000000 0
|
|
M V30 5 C -10.499600 8.165800 0.000000 0
|
|
M V30 6 C -11.833300 10.475800 0.000000 0
|
|
M V30 7 N -14.500700 8.935800 0.000000 0
|
|
M V30 8 N -13.167100 6.625600 0.000000 0
|
|
M V30 END ATOM
|
|
M V30 BEGIN BOND
|
|
M V30 1 1 3 4
|
|
M V30 2 1 4 5
|
|
M V30 3 2 1 2
|
|
M V30 4 1 1 5
|
|
M V30 5 1 1 6
|
|
M V30 6 1 2 7
|
|
M V30 7 1 2 8
|
|
M V30 8 1 8 3
|
|
M V30 END BOND
|
|
M V30 END CTAB
|
|
M END
|
|
)";
|
|
|
|
result = pipeline.run(molblock);
|
|
|
|
for (auto &info : result.log) {
|
|
std::cerr << info.status << " " << info.detail << std::endl;
|
|
}
|
|
|
|
REQUIRE(static_cast<MolStandardize::PipelineStage>(result.stage) ==
|
|
MolStandardize::PipelineStage::COMPLETED);
|
|
REQUIRE((result.status & MolStandardize::PipelineStatus::PIPELINE_ERROR) ==
|
|
MolStandardize::PipelineStatus::NO_EVENT);
|
|
REQUIRE((result.status &
|
|
MolStandardize::PipelineStatus::STRUCTURE_MODIFICATION) ==
|
|
(MolStandardize::PipelineStatus::NORMALIZATION_APPLIED |
|
|
MolStandardize::PROTONATION_CHANGED));
|
|
|
|
parentMol.reset(MolBlockToMol(result.parentMolData, false, false));
|
|
REQUIRE(parentMol);
|
|
parentSmiles = MolToSmiles(*parentMol);
|
|
REQUIRE(parentSmiles == "CN1CCCN=C1N");
|
|
|
|
// 1,5- conjugated cation - test a first ctab permutation
|
|
molblock = R"(
|
|
RDKit 2D
|
|
|
|
0 0 0 0 0 0 0 0 0 0999 V3000
|
|
M V30 BEGIN CTAB
|
|
M V30 COUNTS 8 8 0 0 0
|
|
M V30 BEGIN ATOM
|
|
M V30 1 N -3.958300 0.790000 0.000000 0 CHG=1
|
|
M V30 2 C -5.292100 0.020000 0.000000 0
|
|
M V30 3 C -5.292100 -1.520200 0.000000 0
|
|
M V30 4 C -3.958300 -2.290000 0.000000 0
|
|
M V30 5 C -2.624600 0.020000 0.000000 0
|
|
M V30 6 N -2.624600 -1.520200 0.000000 0
|
|
M V30 7 C -3.958300 2.330000 0.000000 0
|
|
M V30 8 N -3.958300 -3.830000 0.000000 0
|
|
M V30 END ATOM
|
|
M V30 BEGIN BOND
|
|
M V30 1 1 2 3
|
|
M V30 2 2 3 4
|
|
M V30 3 2 1 2
|
|
M V30 4 1 1 5
|
|
M V30 5 1 1 7
|
|
M V30 6 1 4 8
|
|
M V30 7 1 6 5
|
|
M V30 8 1 4 6
|
|
M V30 END BOND
|
|
M V30 END CTAB
|
|
M END
|
|
)";
|
|
|
|
result = pipeline.run(molblock);
|
|
|
|
for (auto &info : result.log) {
|
|
std::cerr << info.status << " " << info.detail << std::endl;
|
|
}
|
|
|
|
REQUIRE(static_cast<MolStandardize::PipelineStage>(result.stage) ==
|
|
MolStandardize::PipelineStage::COMPLETED);
|
|
REQUIRE((result.status & MolStandardize::PipelineStatus::PIPELINE_ERROR) ==
|
|
MolStandardize::PipelineStatus::NO_EVENT);
|
|
REQUIRE((result.status &
|
|
MolStandardize::PipelineStatus::STRUCTURE_MODIFICATION) ==
|
|
(MolStandardize::PipelineStatus::NORMALIZATION_APPLIED |
|
|
MolStandardize::PROTONATION_CHANGED));
|
|
|
|
parentMol.reset(MolBlockToMol(result.parentMolData, false, false));
|
|
REQUIRE(parentMol);
|
|
parentSmiles = MolToSmiles(*parentMol);
|
|
REQUIRE(parentSmiles == "CN1C=CC(N)=NC1");
|
|
|
|
// 1,5- conjugated cation - test a second ctab permutation
|
|
molblock = R"(
|
|
RDKit 2D
|
|
|
|
0 0 0 0 0 0 0 0 0 0999 V3000
|
|
M V30 BEGIN CTAB
|
|
M V30 COUNTS 8 8 0 0 0
|
|
M V30 BEGIN ATOM
|
|
M V30 1 N -3.958300 0.790000 0.000000 0 CHG=1
|
|
M V30 2 C -5.292100 0.020000 0.000000 0
|
|
M V30 3 C -5.292100 -1.520200 0.000000 0
|
|
M V30 4 C -3.958300 -2.290000 0.000000 0
|
|
M V30 5 C -2.624600 0.020000 0.000000 0
|
|
M V30 6 N -3.958300 -3.830000 0.000000 0
|
|
M V30 7 N -2.624600 -1.520200 0.000000 0
|
|
M V30 8 C -3.958300 2.330000 0.000000 0
|
|
M V30 END ATOM
|
|
M V30 BEGIN BOND
|
|
M V30 1 1 2 3
|
|
M V30 2 2 3 4
|
|
M V30 3 2 1 2
|
|
M V30 4 1 1 5
|
|
M V30 5 1 1 8
|
|
M V30 6 1 4 6
|
|
M V30 7 1 7 5
|
|
M V30 8 1 4 7
|
|
M V30 END BOND
|
|
M V30 END CTAB
|
|
M END
|
|
)";
|
|
|
|
result = pipeline.run(molblock);
|
|
|
|
for (auto &info : result.log) {
|
|
std::cerr << info.status << " " << info.detail << std::endl;
|
|
}
|
|
|
|
REQUIRE(static_cast<MolStandardize::PipelineStage>(result.stage) ==
|
|
MolStandardize::PipelineStage::COMPLETED);
|
|
REQUIRE((result.status & MolStandardize::PipelineStatus::PIPELINE_ERROR) ==
|
|
MolStandardize::PipelineStatus::NO_EVENT);
|
|
REQUIRE((result.status &
|
|
MolStandardize::PipelineStatus::STRUCTURE_MODIFICATION) ==
|
|
(MolStandardize::PipelineStatus::NORMALIZATION_APPLIED |
|
|
MolStandardize::PROTONATION_CHANGED));
|
|
|
|
parentMol.reset(MolBlockToMol(result.parentMolData, false, false));
|
|
REQUIRE(parentMol);
|
|
parentSmiles = MolToSmiles(*parentMol);
|
|
REQUIRE(parentSmiles == "CN1C=CC(N)=NC1");
|
|
}
|
|
|
|
SECTION("standardize zwitterion") {
|
|
const char *molblock = R"(
|
|
10282320572D
|
|
|
|
0 0 0 0 0 999 V3000
|
|
M V30 BEGIN CTAB
|
|
M V30 COUNTS 6 5 0 0 0
|
|
M V30 BEGIN ATOM
|
|
M V30 1 C -1.0413 5.4992 0 0
|
|
M V30 2 C -2.375 4.7292 0 0
|
|
M V30 3 O -1.0413 7.0392 0 0
|
|
M V30 4 O 0.2924 4.7292 0 0
|
|
M V30 5 N -3.7087 5.4992 0 0 CHG=1
|
|
M V30 6 Na 0.2924 3.1892 0 0
|
|
M V30 END ATOM
|
|
M V30 BEGIN BOND
|
|
M V30 1 1 2 1
|
|
M V30 2 1 1 4
|
|
M V30 3 2 1 3
|
|
M V30 4 1 2 5
|
|
M V30 5 1 4 6
|
|
M V30 END BOND
|
|
M V30 END CTAB
|
|
M END
|
|
)";
|
|
|
|
MolStandardize::PipelineResult result = pipeline.run(molblock);
|
|
|
|
for (auto &info : result.log) {
|
|
std::cerr << info.status << " " << info.detail << std::endl;
|
|
}
|
|
|
|
REQUIRE(static_cast<MolStandardize::PipelineStage>(result.stage) ==
|
|
MolStandardize::PipelineStage::COMPLETED);
|
|
REQUIRE((result.status & MolStandardize::PipelineStatus::PIPELINE_ERROR) ==
|
|
MolStandardize::PipelineStatus::NO_EVENT);
|
|
REQUIRE((result.status &
|
|
MolStandardize::PipelineStatus::STRUCTURE_MODIFICATION) !=
|
|
MolStandardize::PipelineStatus::NO_EVENT);
|
|
REQUIRE((result.status &
|
|
MolStandardize::PipelineStatus::STRUCTURE_MODIFICATION) ==
|
|
(MolStandardize::PipelineStatus::METALS_DISCONNECTED |
|
|
MolStandardize::PipelineStatus::FRAGMENTS_REMOVED));
|
|
|
|
std::unique_ptr<RWMol> parentMol(
|
|
MolBlockToMol(result.parentMolData, false, false));
|
|
REQUIRE(parentMol);
|
|
std::string parentSmiles{MolToSmiles(*parentMol)};
|
|
REQUIRE(parentSmiles == "NCC(=O)O");
|
|
|
|
std::unique_ptr<RWMol> outputMol(
|
|
MolBlockToMol(result.outputMolData, false, false));
|
|
REQUIRE(outputMol);
|
|
std::string outputSmiles{MolToSmiles(*outputMol)};
|
|
REQUIRE(outputSmiles == "[NH3+]CC(=O)[O-]");
|
|
}
|
|
|
|
SECTION("standardize zwitterion with quaternary nitrogen") {
|
|
const char *molblock = R"(
|
|
Mrv2311 02052411472D
|
|
|
|
0 0 0 0 0 999 V3000
|
|
M V30 BEGIN CTAB
|
|
M V30 COUNTS 8 7 0 0 0
|
|
M V30 BEGIN ATOM
|
|
M V30 1 N -10.9997 4.77 0 0 CHG=1
|
|
M V30 2 C -12.3333 4 0 0
|
|
M V30 3 C -10.9997 6.31 0 0
|
|
M V30 4 C -9.666 4 0 0
|
|
M V30 5 C -10.9997 3.23 0 0
|
|
M V30 6 C -9.666 2.46 0 0
|
|
M V30 7 O -10.9997 1.69 0 0
|
|
M V30 8 O -8.3323 1.69 0 0 CHG=-1
|
|
M V30 END ATOM
|
|
M V30 BEGIN BOND
|
|
M V30 1 1 4 6
|
|
M V30 2 2 6 7
|
|
M V30 3 1 6 8
|
|
M V30 4 1 2 1
|
|
M V30 5 1 1 3
|
|
M V30 6 1 1 4
|
|
M V30 7 1 1 5
|
|
M V30 END BOND
|
|
M V30 END CTAB
|
|
M END
|
|
)";
|
|
|
|
MolStandardize::PipelineResult result = pipeline.run(molblock);
|
|
|
|
for (auto &info : result.log) {
|
|
std::cerr << info.status << " " << info.detail << std::endl;
|
|
}
|
|
|
|
REQUIRE(static_cast<MolStandardize::PipelineStage>(result.stage) ==
|
|
MolStandardize::PipelineStage::COMPLETED);
|
|
REQUIRE((result.status & MolStandardize::PipelineStatus::PIPELINE_ERROR) ==
|
|
MolStandardize::PipelineStatus::NO_EVENT);
|
|
REQUIRE((result.status &
|
|
MolStandardize::PipelineStatus::STRUCTURE_MODIFICATION) ==
|
|
MolStandardize::PipelineStatus::NO_EVENT);
|
|
|
|
std::unique_ptr<RWMol> parentMol(
|
|
MolBlockToMol(result.parentMolData, false, false));
|
|
REQUIRE(parentMol);
|
|
std::string parentSmiles{MolToSmiles(*parentMol)};
|
|
REQUIRE(parentSmiles == "C[N+](C)(C)CC(=O)O");
|
|
|
|
std::unique_ptr<RWMol> outputMol(
|
|
MolBlockToMol(result.outputMolData, false, false));
|
|
REQUIRE(outputMol);
|
|
std::string outputSmiles{MolToSmiles(*outputMol)};
|
|
REQUIRE(outputSmiles == "C[N+](C)(C)CC(=O)[O-]");
|
|
}
|
|
|
|
SECTION("uncharge tertiary amine w/ explicit hydrogen") {
|
|
const char *molblock = R"(
|
|
Mrv2311 02012412352D
|
|
|
|
0 0 0 0 0 999 V3000
|
|
M V30 BEGIN CTAB
|
|
M V30 COUNTS 5 4 0 0 0
|
|
M V30 BEGIN ATOM
|
|
M V30 1 C -13.2705 5.77 0 0
|
|
M V30 2 N -14.6042 5 0 0 CHG=1
|
|
M V30 3 H -15.9378 5.77 0 0
|
|
M V30 4 C -14.6042 3.46 0 0
|
|
M V30 5 C -13.2705 4.23 0 0
|
|
M V30 END ATOM
|
|
M V30 BEGIN BOND
|
|
M V30 1 1 2 1
|
|
M V30 2 1 2 4
|
|
M V30 3 1 2 5
|
|
M V30 4 1 2 3
|
|
M V30 END BOND
|
|
M V30 END CTAB
|
|
M END
|
|
)";
|
|
|
|
MolStandardize::PipelineResult result = pipeline.run(molblock);
|
|
|
|
for (auto &info : result.log) {
|
|
std::cerr << info.status << " " << info.detail << std::endl;
|
|
}
|
|
|
|
REQUIRE(static_cast<MolStandardize::PipelineStage>(result.stage) ==
|
|
MolStandardize::PipelineStage::COMPLETED);
|
|
REQUIRE((result.status & MolStandardize::PipelineStatus::PIPELINE_ERROR) ==
|
|
MolStandardize::PipelineStatus::NO_EVENT);
|
|
REQUIRE((result.status &
|
|
MolStandardize::PipelineStatus::STRUCTURE_MODIFICATION) ==
|
|
MolStandardize::PipelineStatus::PROTONATION_CHANGED);
|
|
REQUIRE(result.parentMolData == result.outputMolData);
|
|
|
|
std::unique_ptr<RWMol> parentMol(
|
|
MolBlockToMol(result.parentMolData, false, false));
|
|
REQUIRE(parentMol);
|
|
std::string parentSmiles{MolToSmiles(*parentMol)};
|
|
REQUIRE(parentSmiles == "CN(C)C");
|
|
}
|
|
|
|
SECTION("standardize preserves explicit Hs on chiral centers") {
|
|
const char *molblock = R"(
|
|
Mrv2311 03112410582D
|
|
|
|
0 0 0 0 0 999 V3000
|
|
M V30 BEGIN CTAB
|
|
M V30 COUNTS 19 19 0 0 0
|
|
M V30 BEGIN ATOM
|
|
M V30 1 C -22.6873 4.415 0 0
|
|
M V30 2 C -24.0208 3.6451 0 0
|
|
M V30 3 C -24.0208 2.1049 0 0
|
|
M V30 4 C -22.6873 1.3349 0 0
|
|
M V30 5 C -21.3536 2.1049 0 0
|
|
M V30 6 C -21.3536 3.6451 0 0
|
|
M V30 7 S -20.02 4.415 0 0 CHG=1
|
|
M V30 8 O -20.02 5.955 0 0 CHG=-1
|
|
M V30 9 C -18.6863 3.6451 0 0
|
|
M V30 10 C -17.3526 4.415 0 0
|
|
M V30 11 C -16.0189 3.6451 0 0
|
|
M V30 12 C -14.6853 4.4151 0 0 CFG=1
|
|
M V30 13 C -13.3516 3.6452 0 0
|
|
M V30 14 F -14.6853 5.9551 0 0
|
|
M V30 15 H -14.6853 2.8752 0 0
|
|
M V30 16 C -25.3545 1.3349 0 0
|
|
M V30 17 O -26.6882 2.105 0 0
|
|
M V30 18 O -25.3546 -0.2051 0 0
|
|
M V30 19 Na -28.0219 1.335 0 0
|
|
M V30 END ATOM
|
|
M V30 BEGIN BOND
|
|
M V30 1 2 1 2
|
|
M V30 2 1 2 3
|
|
M V30 3 2 3 4
|
|
M V30 4 1 4 5
|
|
M V30 5 2 5 6
|
|
M V30 6 1 6 1
|
|
M V30 7 1 6 7
|
|
M V30 8 1 7 9
|
|
M V30 9 1 7 8
|
|
M V30 10 1 9 10
|
|
M V30 11 1 10 11
|
|
M V30 12 1 11 12
|
|
M V30 13 1 12 13
|
|
M V30 14 1 12 15 CFG=1
|
|
M V30 15 1 12 14
|
|
M V30 16 1 3 16
|
|
M V30 17 1 16 17
|
|
M V30 18 2 16 18
|
|
M V30 19 1 17 19
|
|
M V30 END BOND
|
|
M V30 END CTAB
|
|
M END
|
|
)";
|
|
|
|
MolStandardize::PipelineResult result = pipeline.run(molblock);
|
|
|
|
for (auto &info : result.log) {
|
|
std::cerr << info.status << " " << info.detail << std::endl;
|
|
}
|
|
|
|
REQUIRE(static_cast<MolStandardize::PipelineStage>(result.stage) ==
|
|
MolStandardize::PipelineStage::COMPLETED);
|
|
REQUIRE((result.status & MolStandardize::PipelineStatus::PIPELINE_ERROR) ==
|
|
MolStandardize::PipelineStatus::NO_EVENT);
|
|
REQUIRE((result.status &
|
|
MolStandardize::PipelineStatus::STRUCTURE_MODIFICATION) !=
|
|
MolStandardize::PipelineStatus::NO_EVENT);
|
|
REQUIRE((result.status &
|
|
MolStandardize::PipelineStatus::STRUCTURE_MODIFICATION) ==
|
|
(MolStandardize::PipelineStatus::METALS_DISCONNECTED |
|
|
MolStandardize::PipelineStatus::FRAGMENTS_REMOVED |
|
|
MolStandardize::PipelineStatus::PROTONATION_CHANGED));
|
|
REQUIRE(result.outputMolData == result.parentMolData);
|
|
|
|
std::unique_ptr<RWMol> parentMol(
|
|
MolBlockToMol(result.parentMolData, false, false));
|
|
REQUIRE(parentMol);
|
|
std::string parentSmiles{MolToSmiles(*parentMol)};
|
|
REQUIRE(parentSmiles == "[H][C@](C)(F)CCC[S+]([O-])C1=CC=C(C(=O)O)C=C1");
|
|
}
|
|
|
|
SECTION("standardize preserves isotopically marked explicit Hs") {
|
|
const char *molblock = R"(
|
|
Mrv2311 03112410572D
|
|
|
|
0 0 0 0 0 999 V3000
|
|
M V30 BEGIN CTAB
|
|
M V30 COUNTS 17 17 0 0 0
|
|
M V30 BEGIN ATOM
|
|
M V30 1 C -22.6873 4.415 0 0
|
|
M V30 2 C -24.0208 3.6451 0 0
|
|
M V30 3 C -24.0208 2.1049 0 0
|
|
M V30 4 C -22.6873 1.3349 0 0
|
|
M V30 5 C -21.3536 2.1049 0 0
|
|
M V30 6 C -21.3536 3.6451 0 0
|
|
M V30 7 S -20.02 4.415 0 0 CHG=1
|
|
M V30 8 O -20.02 5.955 0 0 CHG=-1
|
|
M V30 9 C -18.6863 3.6451 0 0
|
|
M V30 10 C -17.3526 4.415 0 0
|
|
M V30 11 C -16.0189 3.6451 0 0
|
|
M V30 12 C -14.6853 4.4151 0 0
|
|
M V30 13 H -13.3516 3.6452 0 0 MASS=2
|
|
M V30 14 C -25.3545 1.3349 0 0
|
|
M V30 15 O -26.6882 2.105 0 0
|
|
M V30 16 O -25.3546 -0.2051 0 0
|
|
M V30 17 Na -28.0219 1.335 0 0
|
|
M V30 END ATOM
|
|
M V30 BEGIN BOND
|
|
M V30 1 2 1 2
|
|
M V30 2 1 2 3
|
|
M V30 3 2 3 4
|
|
M V30 4 1 4 5
|
|
M V30 5 2 5 6
|
|
M V30 6 1 6 1
|
|
M V30 7 1 6 7
|
|
M V30 8 1 7 9
|
|
M V30 9 1 7 8
|
|
M V30 10 1 9 10
|
|
M V30 11 1 10 11
|
|
M V30 12 1 11 12
|
|
M V30 13 1 3 14
|
|
M V30 14 1 14 15
|
|
M V30 15 2 14 16
|
|
M V30 16 1 15 17
|
|
M V30 17 1 12 13
|
|
M V30 END BOND
|
|
M V30 END CTAB
|
|
M END
|
|
)";
|
|
|
|
MolStandardize::PipelineResult result = pipeline.run(molblock);
|
|
|
|
for (auto &info : result.log) {
|
|
std::cerr << info.status << " " << info.detail << std::endl;
|
|
}
|
|
|
|
REQUIRE(static_cast<MolStandardize::PipelineStage>(result.stage) ==
|
|
MolStandardize::PipelineStage::COMPLETED);
|
|
REQUIRE((result.status & MolStandardize::PipelineStatus::PIPELINE_ERROR) ==
|
|
MolStandardize::PipelineStatus::NO_EVENT);
|
|
REQUIRE((result.status &
|
|
MolStandardize::PipelineStatus::STRUCTURE_MODIFICATION) !=
|
|
MolStandardize::PipelineStatus::NO_EVENT);
|
|
REQUIRE((result.status &
|
|
MolStandardize::PipelineStatus::STRUCTURE_MODIFICATION) ==
|
|
(MolStandardize::PipelineStatus::METALS_DISCONNECTED
|
|
//| MolStandardize::PipelineStatus::NORMALIZATION_APPLIED
|
|
| MolStandardize::PipelineStatus::FRAGMENTS_REMOVED |
|
|
MolStandardize::PipelineStatus::PROTONATION_CHANGED));
|
|
REQUIRE(result.outputMolData == result.parentMolData);
|
|
|
|
std::unique_ptr<RWMol> parentMol(
|
|
MolBlockToMol(result.parentMolData, false, false));
|
|
REQUIRE(parentMol);
|
|
std::string parentSmiles{MolToSmiles(*parentMol)};
|
|
REQUIRE(parentSmiles == "[2H]CCCC[S+]([O-])C1=CC=C(C(=O)O)C=C1");
|
|
}
|
|
|
|
SECTION("standardize preserves generic explicit Hs") {
|
|
const char *molblock = R"(
|
|
Mrv2311 03112410542D
|
|
|
|
0 0 0 0 0 999 V3000
|
|
M V30 BEGIN CTAB
|
|
M V30 COUNTS 17 17 0 0 0
|
|
M V30 BEGIN ATOM
|
|
M V30 1 C -22.6873 4.415 0 0
|
|
M V30 2 C -24.0208 3.6451 0 0
|
|
M V30 3 C -24.0208 2.1049 0 0
|
|
M V30 4 C -22.6873 1.3349 0 0
|
|
M V30 5 C -21.3536 2.1049 0 0
|
|
M V30 6 C -21.3536 3.6451 0 0
|
|
M V30 7 S -20.02 4.415 0 0 CHG=1
|
|
M V30 8 O -20.02 5.955 0 0 CHG=-1
|
|
M V30 9 C -18.6863 3.6451 0 0
|
|
M V30 10 C -17.3526 4.415 0 0
|
|
M V30 11 C -16.0189 3.6451 0 0
|
|
M V30 12 C -14.6853 4.4151 0 0
|
|
M V30 13 H -13.3516 3.6452 0 0
|
|
M V30 14 C -25.3545 1.3349 0 0
|
|
M V30 15 O -26.6882 2.105 0 0
|
|
M V30 16 O -25.3546 -0.2051 0 0
|
|
M V30 17 Na -28.0219 1.335 0 0
|
|
M V30 END ATOM
|
|
M V30 BEGIN BOND
|
|
M V30 1 2 1 2
|
|
M V30 2 1 2 3
|
|
M V30 3 2 3 4
|
|
M V30 4 1 4 5
|
|
M V30 5 2 5 6
|
|
M V30 6 1 6 1
|
|
M V30 7 1 6 7
|
|
M V30 8 1 7 9
|
|
M V30 9 1 7 8
|
|
M V30 10 1 9 10
|
|
M V30 11 1 10 11
|
|
M V30 12 1 11 12
|
|
M V30 13 1 3 14
|
|
M V30 14 1 14 15
|
|
M V30 15 2 14 16
|
|
M V30 16 1 15 17
|
|
M V30 17 1 12 13
|
|
M V30 END BOND
|
|
M V30 END CTAB
|
|
M END
|
|
)";
|
|
|
|
MolStandardize::PipelineResult result = pipeline.run(molblock);
|
|
|
|
for (auto &info : result.log) {
|
|
std::cerr << info.status << " " << info.detail << std::endl;
|
|
}
|
|
|
|
REQUIRE(static_cast<MolStandardize::PipelineStage>(result.stage) ==
|
|
MolStandardize::PipelineStage::COMPLETED);
|
|
REQUIRE((result.status & MolStandardize::PipelineStatus::PIPELINE_ERROR) ==
|
|
MolStandardize::PipelineStatus::NO_EVENT);
|
|
REQUIRE((result.status &
|
|
MolStandardize::PipelineStatus::STRUCTURE_MODIFICATION) !=
|
|
MolStandardize::PipelineStatus::NO_EVENT);
|
|
REQUIRE((result.status &
|
|
MolStandardize::PipelineStatus::STRUCTURE_MODIFICATION) ==
|
|
(MolStandardize::PipelineStatus::METALS_DISCONNECTED |
|
|
MolStandardize::PipelineStatus::FRAGMENTS_REMOVED |
|
|
MolStandardize::PipelineStatus::PROTONATION_CHANGED));
|
|
REQUIRE(result.outputMolData == result.parentMolData);
|
|
|
|
std::unique_ptr<RWMol> parentMol(
|
|
MolBlockToMol(result.parentMolData, false, false));
|
|
REQUIRE(parentMol);
|
|
std::string parentSmiles{MolToSmiles(*parentMol)};
|
|
REQUIRE(parentSmiles == "[H]CCCC[S+]([O-])C1=CC=C(C(=O)O)C=C1");
|
|
}
|
|
|
|
SECTION(
|
|
"standardize doesn't remove wedged bonds from non-stereogenic centers") {
|
|
const char *molblock = R"(
|
|
Mrv2311 03112410512D
|
|
|
|
0 0 0 0 0 999 V3000
|
|
M V30 BEGIN CTAB
|
|
M V30 COUNTS 16 16 0 0 0
|
|
M V30 BEGIN ATOM
|
|
M V30 1 C -22.6875 4.415 0 0
|
|
M V30 2 C -24.021 3.6452 0 0
|
|
M V30 3 C -24.021 2.1049 0 0
|
|
M V30 4 C -22.6875 1.3349 0 0
|
|
M V30 5 C -21.3537 2.1049 0 0
|
|
M V30 6 C -21.3537 3.6452 0 0
|
|
M V30 7 S -20.0202 4.415 0 0 CHG=1
|
|
M V30 8 O -20.0202 5.955 0 0 CHG=-1
|
|
M V30 9 C -18.6865 3.6452 0 0
|
|
M V30 10 C -17.3528 4.415 0 0 CFG=2
|
|
M V30 11 C -16.019 3.6452 0 0
|
|
M V30 12 C -25.3547 1.3349 0 0
|
|
M V30 13 O -26.6885 2.105 0 0
|
|
M V30 14 O -25.3548 -0.2051 0 0
|
|
M V30 15 Na -28.0222 1.3351 0 0
|
|
M V30 16 C -17.3528 5.955 0 0
|
|
M V30 END ATOM
|
|
M V30 BEGIN BOND
|
|
M V30 1 2 1 2
|
|
M V30 2 1 2 3
|
|
M V30 3 2 3 4
|
|
M V30 4 1 4 5
|
|
M V30 5 2 5 6
|
|
M V30 6 1 6 1
|
|
M V30 7 1 6 7
|
|
M V30 8 1 7 9
|
|
M V30 9 1 7 8
|
|
M V30 10 1 9 10
|
|
M V30 11 1 10 11 CFG=1
|
|
M V30 12 1 3 12
|
|
M V30 13 2 12 14
|
|
M V30 14 1 12 13
|
|
M V30 15 1 13 15
|
|
M V30 16 1 10 16
|
|
M V30 END BOND
|
|
M V30 END CTAB
|
|
M END
|
|
)";
|
|
|
|
MolStandardize::PipelineResult result = pipeline.run(molblock);
|
|
|
|
for (auto &info : result.log) {
|
|
std::cerr << info.status << " " << info.detail << std::endl;
|
|
}
|
|
|
|
REQUIRE(static_cast<MolStandardize::PipelineStage>(result.stage) ==
|
|
MolStandardize::PipelineStage::COMPLETED);
|
|
REQUIRE((result.status & MolStandardize::PipelineStatus::PIPELINE_ERROR) ==
|
|
MolStandardize::PipelineStatus::NO_EVENT);
|
|
REQUIRE((result.status &
|
|
MolStandardize::PipelineStatus::STRUCTURE_MODIFICATION) !=
|
|
MolStandardize::PipelineStatus::NO_EVENT);
|
|
REQUIRE((result.status &
|
|
MolStandardize::PipelineStatus::STRUCTURE_MODIFICATION) ==
|
|
(MolStandardize::PipelineStatus::METALS_DISCONNECTED |
|
|
MolStandardize::PipelineStatus::FRAGMENTS_REMOVED |
|
|
MolStandardize::PipelineStatus::PROTONATION_CHANGED));
|
|
REQUIRE(result.outputMolData == result.parentMolData);
|
|
|
|
std::unique_ptr<RWMol> parentMol(
|
|
MolBlockToMol(result.parentMolData, false, false));
|
|
REQUIRE(parentMol);
|
|
std::string parentSmiles{MolToSmiles(*parentMol)};
|
|
REQUIRE(parentSmiles == "CC(C)C[S+]([O-])C1=CC=C(C(=O)O)C=C1");
|
|
|
|
Chirality::reapplyMolBlockWedging(*parentMol);
|
|
|
|
const Bond *wedged = nullptr;
|
|
for (auto bond : parentMol->bonds()) {
|
|
auto bondDir = bond->getBondDir();
|
|
if (bondDir == Bond::BondDir::BEGINWEDGE) {
|
|
wedged = bond;
|
|
break;
|
|
}
|
|
}
|
|
REQUIRE(wedged != nullptr);
|
|
|
|
auto beginAtom = wedged->getBeginAtom();
|
|
REQUIRE(beginAtom->getAtomicNum() == 6);
|
|
REQUIRE(beginAtom->getDegree() == 3);
|
|
auto endAtom = wedged->getEndAtom();
|
|
REQUIRE(endAtom->getAtomicNum() == 6);
|
|
REQUIRE(endAtom->getDegree() == 1);
|
|
}
|
|
|
|
SECTION("standardize removes wavy bonds from tetrahedral centers") {
|
|
const char *molblock = R"(
|
|
Mrv2311 05172413472D
|
|
|
|
0 0 0 0 0 999 V3000
|
|
M V30 BEGIN CTAB
|
|
M V30 COUNTS 6 5 0 0 0
|
|
M V30 BEGIN ATOM
|
|
M V30 1 Cl -7.1663 10.5408 0 0
|
|
M V30 2 C -8.5 9.7708 0 0 CFG=3
|
|
M V30 3 F -9.8337 10.5408 0 0
|
|
M V30 4 C -8.5 8.2308 0 0
|
|
M V30 5 C -7.1663 9.0008 0 0
|
|
M V30 6 C -9.8337 7.4608 0 0
|
|
M V30 END ATOM
|
|
M V30 BEGIN BOND
|
|
M V30 1 1 2 4
|
|
M V30 2 1 2 5
|
|
M V30 3 1 2 3
|
|
M V30 4 1 4 6
|
|
M V30 5 1 2 1 CFG=2
|
|
M V30 END BOND
|
|
M V30 END CTAB
|
|
M END
|
|
)";
|
|
|
|
MolStandardize::PipelineResult result = pipeline.run(molblock);
|
|
|
|
for (auto &info : result.log) {
|
|
std::cerr << info.status << " " << info.detail << std::endl;
|
|
}
|
|
|
|
REQUIRE(static_cast<MolStandardize::PipelineStage>(result.stage) ==
|
|
MolStandardize::PipelineStage::COMPLETED);
|
|
REQUIRE((result.status & MolStandardize::PipelineStatus::PIPELINE_ERROR) ==
|
|
MolStandardize::PipelineStatus::NO_EVENT);
|
|
REQUIRE((result.status &
|
|
MolStandardize::PipelineStatus::STRUCTURE_MODIFICATION) !=
|
|
MolStandardize::PipelineStatus::NO_EVENT);
|
|
REQUIRE((result.status &
|
|
MolStandardize::PipelineStatus::STRUCTURE_MODIFICATION) ==
|
|
MolStandardize::PipelineStatus::NORMALIZATION_APPLIED);
|
|
REQUIRE(result.outputMolData == result.parentMolData);
|
|
|
|
std::unique_ptr<RWMol> parentMol(
|
|
MolBlockToMol(result.parentMolData, false, false));
|
|
REQUIRE(parentMol);
|
|
std::string parentSmiles{MolToSmiles(*parentMol)};
|
|
REQUIRE(parentSmiles == "CCC(C)(F)Cl");
|
|
|
|
Chirality::reapplyMolBlockWedging(*parentMol);
|
|
|
|
// no wavy bond is expected to be found
|
|
const Bond *wavy = nullptr;
|
|
for (auto bond : parentMol->bonds()) {
|
|
auto bondDir = bond->getBondDir();
|
|
if (bondDir == Bond::BondDir::UNKNOWN) {
|
|
wavy = bond;
|
|
break;
|
|
}
|
|
}
|
|
REQUIRE(wavy == nullptr);
|
|
}
|
|
|
|
SECTION(
|
|
"standardize replaces wavy bonds with double bonds w/ stereo type 'either'") {
|
|
auto useLegacy = GENERATE(true, false);
|
|
CAPTURE(useLegacy);
|
|
UseLegacyStereoPerceptionFixture fx(useLegacy);
|
|
const char *molblock;
|
|
MolStandardize::PipelineResult result;
|
|
std::unique_ptr<RWMol> parentMol;
|
|
std::string parentSmiles;
|
|
const Bond *wavy;
|
|
const Bond *doubleBond;
|
|
|
|
// simplest case: wavy bond adjacent a double bond
|
|
molblock = R"(
|
|
Mrv2311 04172413232D
|
|
|
|
0 0 0 0 0 999 V3000
|
|
M V30 BEGIN CTAB
|
|
M V30 COUNTS 4 3 0 0 0
|
|
M V30 BEGIN ATOM
|
|
M V30 1 C -13.9163 3.9158 0 0
|
|
M V30 2 C -15.25 3.1458 0 0
|
|
M V30 3 C -13.9163 5.4558 0 0
|
|
M V30 4 C -15.25 6.2258 0 0
|
|
M V30 END ATOM
|
|
M V30 BEGIN BOND
|
|
M V30 1 1 2 1
|
|
M V30 2 2 1 3
|
|
M V30 3 1 3 4 CFG=2
|
|
M V30 END BOND
|
|
M V30 END CTAB
|
|
M END
|
|
)";
|
|
|
|
result = pipeline.run(molblock);
|
|
|
|
for (auto &info : result.log) {
|
|
std::cerr << info.status << " " << info.detail << std::endl;
|
|
}
|
|
|
|
REQUIRE(static_cast<MolStandardize::PipelineStage>(result.stage) ==
|
|
MolStandardize::PipelineStage::COMPLETED);
|
|
REQUIRE((result.status & MolStandardize::PipelineStatus::PIPELINE_ERROR) ==
|
|
MolStandardize::PipelineStatus::NO_EVENT);
|
|
REQUIRE((result.status &
|
|
MolStandardize::PipelineStatus::STRUCTURE_MODIFICATION) !=
|
|
MolStandardize::PipelineStatus::NO_EVENT);
|
|
REQUIRE((result.status &
|
|
MolStandardize::PipelineStatus::STRUCTURE_MODIFICATION) ==
|
|
MolStandardize::PipelineStatus::NORMALIZATION_APPLIED);
|
|
REQUIRE(result.outputMolData == result.parentMolData);
|
|
|
|
parentMol.reset(MolBlockToMol(result.parentMolData, false, false));
|
|
REQUIRE(parentMol);
|
|
parentSmiles = MolToSmiles(*parentMol);
|
|
REQUIRE(parentSmiles == "CC=CC");
|
|
|
|
Chirality::reapplyMolBlockWedging(*parentMol);
|
|
|
|
// no wavy bond is expected to be found
|
|
wavy = nullptr;
|
|
for (auto bond : parentMol->bonds()) {
|
|
auto bondDir = bond->getBondDir();
|
|
if (bondDir == Bond::BondDir::UNKNOWN) {
|
|
wavy = bond;
|
|
break;
|
|
}
|
|
}
|
|
REQUIRE(wavy == nullptr);
|
|
|
|
// the double bond should have stereo type STEREOANY
|
|
doubleBond = nullptr;
|
|
for (auto bond : parentMol->bonds()) {
|
|
auto bondType = bond->getBondType();
|
|
if (bondType == Bond::DOUBLE) {
|
|
doubleBond = bond;
|
|
break;
|
|
}
|
|
}
|
|
REQUIRE(doubleBond != nullptr);
|
|
REQUIRE(doubleBond->getStereo() == Bond::STEREOANY);
|
|
|
|
// wavy bond between stereo center and double bond
|
|
molblock = R"(
|
|
Mrv2311 05172414272D
|
|
|
|
0 0 0 0 0 999 V3000
|
|
M V30 BEGIN CTAB
|
|
M V30 COUNTS 7 6 0 0 0
|
|
M V30 BEGIN ATOM
|
|
M V30 1 C -5.3747 8.9992 0 0
|
|
M V30 2 F -6.7083 8.2292 0 0
|
|
M V30 3 Cl -5.3747 10.5392 0 0
|
|
M V30 4 C -4.041 8.2292 0 0
|
|
M V30 5 C -5.3747 7.4592 0 0
|
|
M V30 6 C -6.7083 6.6892 0 0
|
|
M V30 7 C -6.7083 5.1492 0 0
|
|
M V30 END ATOM
|
|
M V30 BEGIN BOND
|
|
M V30 1 1 1 4
|
|
M V30 2 1 5 1 CFG=2
|
|
M V30 3 2 5 6
|
|
M V30 4 1 6 7
|
|
M V30 5 1 2 1
|
|
M V30 6 1 1 3
|
|
M V30 END BOND
|
|
M V30 END CTAB
|
|
M END
|
|
)";
|
|
|
|
result = pipeline.run(molblock);
|
|
|
|
for (auto &info : result.log) {
|
|
std::cerr << info.status << " " << info.detail << std::endl;
|
|
}
|
|
|
|
REQUIRE(static_cast<MolStandardize::PipelineStage>(result.stage) ==
|
|
MolStandardize::PipelineStage::COMPLETED);
|
|
REQUIRE((result.status & MolStandardize::PipelineStatus::PIPELINE_ERROR) ==
|
|
MolStandardize::PipelineStatus::NO_EVENT);
|
|
REQUIRE((result.status &
|
|
MolStandardize::PipelineStatus::STRUCTURE_MODIFICATION) !=
|
|
MolStandardize::PipelineStatus::NO_EVENT);
|
|
REQUIRE((result.status &
|
|
MolStandardize::PipelineStatus::STRUCTURE_MODIFICATION) ==
|
|
MolStandardize::PipelineStatus::NORMALIZATION_APPLIED);
|
|
REQUIRE(result.outputMolData == result.parentMolData);
|
|
|
|
parentMol.reset(MolBlockToMol(result.parentMolData, false, false));
|
|
REQUIRE(parentMol);
|
|
parentSmiles = MolToSmiles(*parentMol);
|
|
REQUIRE(parentSmiles == "CC=CC(C)(F)Cl");
|
|
|
|
Chirality::reapplyMolBlockWedging(*parentMol);
|
|
|
|
// no wavy bond is expected to be found
|
|
wavy = nullptr;
|
|
for (auto bond : parentMol->bonds()) {
|
|
auto bondDir = bond->getBondDir();
|
|
if (bondDir == Bond::BondDir::UNKNOWN) {
|
|
wavy = bond;
|
|
break;
|
|
}
|
|
}
|
|
REQUIRE(wavy == nullptr);
|
|
|
|
// the double bond should have stereo type STEREOANY
|
|
doubleBond = nullptr;
|
|
for (auto bond : parentMol->bonds()) {
|
|
auto bondType = bond->getBondType();
|
|
if (bondType == Bond::DOUBLE) {
|
|
doubleBond = bond;
|
|
break;
|
|
}
|
|
}
|
|
REQUIRE(doubleBond != nullptr);
|
|
REQUIRE(doubleBond->getStereo() == Bond::STEREOANY);
|
|
|
|
// wavy bond between stereo center and double bond again
|
|
// same as previous test case, but with the wavy bond going
|
|
// from the stereocenter to the double bond
|
|
// (make sure the direction of the wavy bond is not significant)
|
|
molblock = R"(
|
|
Mrv2311 05172414322D
|
|
|
|
0 0 0 0 0 999 V3000
|
|
M V30 BEGIN CTAB
|
|
M V30 COUNTS 7 6 0 0 0
|
|
M V30 BEGIN ATOM
|
|
M V30 1 C -5.3747 8.9992 0 0 CFG=3
|
|
M V30 2 F -6.7083 8.2292 0 0
|
|
M V30 3 Cl -5.3747 10.5392 0 0
|
|
M V30 4 C -4.041 8.2292 0 0
|
|
M V30 5 C -5.3747 7.4592 0 0
|
|
M V30 6 C -6.7083 6.6892 0 0
|
|
M V30 7 C -6.7083 5.1492 0 0
|
|
M V30 END ATOM
|
|
M V30 BEGIN BOND
|
|
M V30 1 1 1 4
|
|
M V30 2 1 1 5 CFG=2
|
|
M V30 3 2 5 6
|
|
M V30 4 1 6 7
|
|
M V30 5 1 2 1
|
|
M V30 6 1 1 3
|
|
M V30 END BOND
|
|
M V30 END CTAB
|
|
M END
|
|
)";
|
|
|
|
result = pipeline.run(molblock);
|
|
|
|
for (auto &info : result.log) {
|
|
std::cerr << info.status << " " << info.detail << std::endl;
|
|
}
|
|
|
|
REQUIRE(static_cast<MolStandardize::PipelineStage>(result.stage) ==
|
|
MolStandardize::PipelineStage::COMPLETED);
|
|
REQUIRE((result.status & MolStandardize::PipelineStatus::PIPELINE_ERROR) ==
|
|
MolStandardize::PipelineStatus::NO_EVENT);
|
|
REQUIRE((result.status &
|
|
MolStandardize::PipelineStatus::STRUCTURE_MODIFICATION) !=
|
|
MolStandardize::PipelineStatus::NO_EVENT);
|
|
REQUIRE((result.status &
|
|
MolStandardize::PipelineStatus::STRUCTURE_MODIFICATION) ==
|
|
MolStandardize::PipelineStatus::NORMALIZATION_APPLIED);
|
|
REQUIRE(result.outputMolData == result.parentMolData);
|
|
|
|
parentMol.reset(MolBlockToMol(result.parentMolData, false, false));
|
|
REQUIRE(parentMol);
|
|
parentSmiles = MolToSmiles(*parentMol);
|
|
REQUIRE(parentSmiles == "CC=CC(C)(F)Cl");
|
|
|
|
Chirality::reapplyMolBlockWedging(*parentMol);
|
|
|
|
// no wavy bond is expected to be found
|
|
wavy = nullptr;
|
|
for (auto bond : parentMol->bonds()) {
|
|
auto bondDir = bond->getBondDir();
|
|
if (bondDir == Bond::BondDir::UNKNOWN) {
|
|
wavy = bond;
|
|
break;
|
|
}
|
|
}
|
|
REQUIRE(wavy == nullptr);
|
|
|
|
// the double bond should have stereo type STEREOANY
|
|
doubleBond = nullptr;
|
|
for (auto bond : parentMol->bonds()) {
|
|
auto bondType = bond->getBondType();
|
|
if (bondType == Bond::DOUBLE) {
|
|
doubleBond = bond;
|
|
break;
|
|
}
|
|
}
|
|
REQUIRE(doubleBond != nullptr);
|
|
REQUIRE(doubleBond->getStereo() == Bond::STEREOANY);
|
|
|
|
// the wavy bond is removed, but no crossed/either double bond
|
|
// in small rings (7 is currently the largest size for a ring
|
|
// that is considered too small to allow a trans configuration)
|
|
molblock = R"(
|
|
Mrv2311 05172414462D
|
|
|
|
0 0 0 0 0 999 V3000
|
|
M V30 BEGIN CTAB
|
|
M V30 COUNTS 8 8 0 0 0
|
|
M V30 BEGIN ATOM
|
|
M V30 1 C -15.7363 3.017 0 0
|
|
M V30 2 C -14.3488 3.6852 0 0
|
|
M V30 3 C -12.9613 3.017 0 0
|
|
M V30 4 C -12.6186 1.5156 0 0
|
|
M V30 5 C -13.5788 0.3116 0 0
|
|
M V30 6 C -16.0789 1.5156 0 0
|
|
M V30 7 C -14.3488 5.2252 0 0
|
|
M V30 8 C -15.1188 0.3116 0 0
|
|
M V30 END ATOM
|
|
M V30 BEGIN BOND
|
|
M V30 1 1 1 2
|
|
M V30 2 1 1 6
|
|
M V30 3 2 2 3
|
|
M V30 4 1 3 4
|
|
M V30 5 1 2 7 CFG=2
|
|
M V30 6 1 6 8
|
|
M V30 7 1 8 5
|
|
M V30 8 1 4 5
|
|
M V30 END BOND
|
|
M V30 END CTAB
|
|
M END
|
|
)";
|
|
|
|
result = pipeline.run(molblock);
|
|
|
|
for (auto &info : result.log) {
|
|
std::cerr << info.status << " " << info.detail << std::endl;
|
|
}
|
|
|
|
REQUIRE(static_cast<MolStandardize::PipelineStage>(result.stage) ==
|
|
MolStandardize::PipelineStage::COMPLETED);
|
|
REQUIRE((result.status & MolStandardize::PipelineStatus::PIPELINE_ERROR) ==
|
|
MolStandardize::PipelineStatus::NO_EVENT);
|
|
REQUIRE((result.status &
|
|
MolStandardize::PipelineStatus::STRUCTURE_MODIFICATION) !=
|
|
MolStandardize::PipelineStatus::NO_EVENT);
|
|
REQUIRE((result.status &
|
|
MolStandardize::PipelineStatus::STRUCTURE_MODIFICATION) ==
|
|
MolStandardize::PipelineStatus::NORMALIZATION_APPLIED);
|
|
REQUIRE(result.outputMolData == result.parentMolData);
|
|
|
|
parentMol.reset(MolBlockToMol(result.parentMolData, false, false));
|
|
REQUIRE(parentMol);
|
|
parentSmiles = MolToSmiles(*parentMol);
|
|
REQUIRE(parentSmiles == "CC1=CCCCCC1");
|
|
|
|
Chirality::reapplyMolBlockWedging(*parentMol);
|
|
|
|
// no wavy bond is expected to be found
|
|
wavy = nullptr;
|
|
for (auto bond : parentMol->bonds()) {
|
|
auto bondDir = bond->getBondDir();
|
|
if (bondDir == Bond::BondDir::UNKNOWN) {
|
|
wavy = bond;
|
|
break;
|
|
}
|
|
}
|
|
REQUIRE(wavy == nullptr);
|
|
|
|
// the double bond should have stereo type STEREONONE
|
|
doubleBond = nullptr;
|
|
for (auto bond : parentMol->bonds()) {
|
|
auto bondType = bond->getBondType();
|
|
if (bondType == Bond::DOUBLE) {
|
|
doubleBond = bond;
|
|
break;
|
|
}
|
|
}
|
|
REQUIRE(doubleBond != nullptr);
|
|
REQUIRE(doubleBond->getStereo() == Bond::STEREONONE);
|
|
|
|
// do we get a crossed/either double bond if the double bond
|
|
// has identical substituents at one end? this is apparently
|
|
// the case.
|
|
molblock = R"(
|
|
Mrv2311 05172414492D
|
|
|
|
0 0 0 0 0 999 V3000
|
|
M V30 BEGIN CTAB
|
|
M V30 COUNTS 5 4 0 0 0
|
|
M V30 BEGIN ATOM
|
|
M V30 1 C -6.4997 8.0408 0 0
|
|
M V30 2 C -7.8333 7.2708 0 0
|
|
M V30 3 C -9.167 8.0408 0 0
|
|
M V30 4 C -7.8333 5.7308 0 0
|
|
M V30 5 C -9.167 4.9608 0 0
|
|
M V30 END ATOM
|
|
M V30 BEGIN BOND
|
|
M V30 1 1 2 1
|
|
M V30 2 1 2 3
|
|
M V30 3 2 2 4
|
|
M V30 4 1 4 5 CFG=2
|
|
M V30 END BOND
|
|
M V30 END CTAB
|
|
M END
|
|
)";
|
|
|
|
result = pipeline.run(molblock);
|
|
|
|
for (auto &info : result.log) {
|
|
std::cerr << info.status << " " << info.detail << std::endl;
|
|
}
|
|
|
|
REQUIRE(static_cast<MolStandardize::PipelineStage>(result.stage) ==
|
|
MolStandardize::PipelineStage::COMPLETED);
|
|
REQUIRE((result.status & MolStandardize::PipelineStatus::PIPELINE_ERROR) ==
|
|
MolStandardize::PipelineStatus::NO_EVENT);
|
|
REQUIRE((result.status &
|
|
MolStandardize::PipelineStatus::STRUCTURE_MODIFICATION) !=
|
|
MolStandardize::PipelineStatus::NO_EVENT);
|
|
REQUIRE((result.status &
|
|
MolStandardize::PipelineStatus::STRUCTURE_MODIFICATION) ==
|
|
MolStandardize::PipelineStatus::NORMALIZATION_APPLIED);
|
|
REQUIRE(result.outputMolData == result.parentMolData);
|
|
|
|
parentMol.reset(MolBlockToMol(result.parentMolData, false, false));
|
|
REQUIRE(parentMol);
|
|
parentSmiles = MolToSmiles(*parentMol);
|
|
REQUIRE(parentSmiles == "CC=C(C)C");
|
|
|
|
Chirality::reapplyMolBlockWedging(*parentMol);
|
|
|
|
// no wavy bond is expected to be found
|
|
wavy = nullptr;
|
|
for (auto bond : parentMol->bonds()) {
|
|
auto bondDir = bond->getBondDir();
|
|
if (bondDir == Bond::BondDir::UNKNOWN) {
|
|
wavy = bond;
|
|
break;
|
|
}
|
|
}
|
|
REQUIRE(wavy == nullptr);
|
|
|
|
// the double bond should have stereo type STEREOANY
|
|
doubleBond = nullptr;
|
|
for (auto bond : parentMol->bonds()) {
|
|
auto bondType = bond->getBondType();
|
|
if (bondType == Bond::DOUBLE) {
|
|
doubleBond = bond;
|
|
break;
|
|
}
|
|
}
|
|
REQUIRE(doubleBond != nullptr);
|
|
if (useLegacy) {
|
|
REQUIRE(doubleBond->getStereo() == Bond::STEREOANY);
|
|
} else {
|
|
REQUIRE(doubleBond->getStereo() == Bond::STEREONONE);
|
|
}
|
|
}
|
|
|
|
SECTION("pipeline doesn't remove stereo bonds from biaryls") {
|
|
const char *molblock = R"(
|
|
Mrv2311 02092409022D
|
|
|
|
0 0 0 0 0 999 V3000
|
|
M V30 BEGIN CTAB
|
|
M V30 COUNTS 12 13 0 0 0
|
|
M V30 BEGIN ATOM
|
|
M V30 1 C -17.8543 8.7068 0 0
|
|
M V30 2 C -19.1878 7.9368 0 0
|
|
M V30 3 C -19.1878 6.3966 0 0
|
|
M V30 4 C -17.8543 5.6266 0 0
|
|
M V30 5 C -16.5205 6.3966 0 0
|
|
M V30 6 C -16.5205 7.9368 0 0
|
|
M V30 7 C -17.8543 4.0866 0 0
|
|
M V30 8 C -19.1879 3.3166 0 0
|
|
M V30 9 C -19.1879 1.7764 0 0
|
|
M V30 10 C -17.8544 1.0064 0 0
|
|
M V30 11 C -16.5206 1.7763 0 0
|
|
M V30 12 C -16.5206 3.3165 0 0
|
|
M V30 END ATOM
|
|
M V30 BEGIN BOND
|
|
M V30 1 2 1 2
|
|
M V30 2 1 2 3
|
|
M V30 3 2 3 4
|
|
M V30 4 1 4 5 CFG=1
|
|
M V30 5 2 5 6
|
|
M V30 6 1 6 1
|
|
M V30 7 1 4 7
|
|
M V30 8 1 8 9
|
|
M V30 9 2 9 10
|
|
M V30 10 1 10 11
|
|
M V30 11 2 11 12
|
|
M V30 12 2 7 8
|
|
M V30 13 1 12 7
|
|
M V30 END BOND
|
|
M V30 END CTAB
|
|
M END
|
|
)";
|
|
|
|
MolStandardize::PipelineResult result = pipeline.run(molblock);
|
|
|
|
for (auto &info : result.log) {
|
|
std::cerr << info.status << " " << info.detail << std::endl;
|
|
}
|
|
|
|
REQUIRE(static_cast<MolStandardize::PipelineStage>(result.stage) ==
|
|
MolStandardize::PipelineStage::COMPLETED);
|
|
REQUIRE((result.status & MolStandardize::PipelineStatus::PIPELINE_ERROR) ==
|
|
MolStandardize::PipelineStatus::NO_EVENT);
|
|
REQUIRE((result.status &
|
|
MolStandardize::PipelineStatus::STRUCTURE_MODIFICATION) ==
|
|
MolStandardize::PipelineStatus::NO_EVENT);
|
|
REQUIRE(result.outputMolData == result.parentMolData);
|
|
std::unique_ptr<RWMol> parentMol(
|
|
MolBlockToMol(result.parentMolData, false, false));
|
|
REQUIRE(parentMol);
|
|
std::string parentSmiles{MolToSmiles(*parentMol)};
|
|
|
|
Chirality::reapplyMolBlockWedging(*parentMol);
|
|
|
|
const Bond *wedged = nullptr;
|
|
for (auto bond : parentMol->bonds()) {
|
|
auto bondDir = bond->getBondDir();
|
|
if (bondDir == Bond::BondDir::BEGINWEDGE) {
|
|
wedged = bond;
|
|
break;
|
|
}
|
|
}
|
|
REQUIRE(wedged != nullptr);
|
|
|
|
auto beginAtom = wedged->getBeginAtom();
|
|
// there's only two position with degree 3
|
|
// and they are equivalent
|
|
REQUIRE(beginAtom->getDegree() == 3);
|
|
}
|
|
|
|
SECTION("SO2H normalization") {
|
|
const char *molblock = R"(
|
|
Mrv2311 03122408072D
|
|
|
|
0 0 0 0 0 999 V3000
|
|
M V30 BEGIN CTAB
|
|
M V30 COUNTS 4 3 0 0 0
|
|
M V30 BEGIN ATOM
|
|
M V30 1 S -10.708 4.2075 0 0
|
|
M V30 2 C -12.0417 3.4375 0 0
|
|
M V30 3 O -10.708 5.7475 0 0
|
|
M V30 4 O -9.3743 3.4375 0 0
|
|
M V30 END ATOM
|
|
M V30 BEGIN BOND
|
|
M V30 1 1 2 1
|
|
M V30 2 2 1 3
|
|
M V30 3 2 1 4
|
|
M V30 END BOND
|
|
M V30 END CTAB
|
|
M END
|
|
)";
|
|
|
|
MolStandardize::PipelineResult result = pipeline.run(molblock);
|
|
|
|
for (auto &info : result.log) {
|
|
std::cerr << info.status << " " << info.detail << std::endl;
|
|
}
|
|
|
|
REQUIRE(static_cast<MolStandardize::PipelineStage>(result.stage) ==
|
|
MolStandardize::PipelineStage::COMPLETED);
|
|
REQUIRE((result.status & MolStandardize::PipelineStatus::PIPELINE_ERROR) ==
|
|
MolStandardize::PipelineStatus::NO_EVENT);
|
|
REQUIRE((result.status &
|
|
MolStandardize::PipelineStatus::STRUCTURE_MODIFICATION) !=
|
|
MolStandardize::PipelineStatus::NO_EVENT);
|
|
REQUIRE((result.status &
|
|
MolStandardize::PipelineStatus::STRUCTURE_MODIFICATION) ==
|
|
MolStandardize::PipelineStatus::NORMALIZATION_APPLIED);
|
|
|
|
REQUIRE(result.outputMolData == result.parentMolData);
|
|
|
|
std::unique_ptr<RWMol> inputMol(
|
|
MolBlockToMol(result.inputMolData, false, false));
|
|
REQUIRE(inputMol);
|
|
std::string inputSmiles{MolToSmiles(*inputMol)};
|
|
REQUIRE(inputSmiles == "C[SH](=O)=O");
|
|
|
|
std::unique_ptr<RWMol> parentMol(
|
|
MolBlockToMol(result.parentMolData, false, false));
|
|
REQUIRE(parentMol);
|
|
std::string parentSmiles{MolToSmiles(*parentMol)};
|
|
REQUIRE(parentSmiles == "CS(=O)O");
|
|
}
|
|
|
|
SECTION("Neutralization of carbocations") {
|
|
// verify that carbocations are not uncharged
|
|
const char *molblock = R"(
|
|
Mrv2311 06132416082D
|
|
|
|
0 0 0 0 0 999 V3000
|
|
M V30 BEGIN CTAB
|
|
M V30 COUNTS 7 7 0 0 0
|
|
M V30 BEGIN ATOM
|
|
M V30 1 C -29.8095 9.8338 0 0 CHG=1
|
|
M V30 2 C -28.422 9.1656 0 0
|
|
M V30 3 C -28.0793 7.6642 0 0
|
|
M V30 4 C -29.0395 6.4602 0 0
|
|
M V30 5 C -30.5795 6.4602 0 0
|
|
M V30 6 C -31.5396 7.6642 0 0
|
|
M V30 7 C -31.197 9.1656 0 0
|
|
M V30 END ATOM
|
|
M V30 BEGIN BOND
|
|
M V30 1 1 1 2
|
|
M V30 2 2 2 3
|
|
M V30 3 1 3 4
|
|
M V30 4 2 4 5
|
|
M V30 5 1 5 6
|
|
M V30 6 1 1 7
|
|
M V30 7 2 7 6
|
|
M V30 END BOND
|
|
M V30 END CTAB
|
|
M END
|
|
)";
|
|
|
|
MolStandardize::PipelineResult result = pipeline.run(molblock);
|
|
|
|
for (auto &info : result.log) {
|
|
std::cerr << info.status << " " << info.detail << std::endl;
|
|
}
|
|
|
|
REQUIRE(static_cast<MolStandardize::PipelineStage>(result.stage) ==
|
|
MolStandardize::PipelineStage::COMPLETED);
|
|
REQUIRE((result.status & MolStandardize::PipelineStatus::PIPELINE_ERROR) ==
|
|
MolStandardize::PipelineStatus::NO_EVENT);
|
|
REQUIRE((result.status &
|
|
MolStandardize::PipelineStatus::STRUCTURE_MODIFICATION) ==
|
|
MolStandardize::PipelineStatus::NO_EVENT);
|
|
|
|
REQUIRE(result.outputMolData == result.parentMolData);
|
|
|
|
std::unique_ptr<RWMol> parentMol(
|
|
MolBlockToMol(result.parentMolData, false, false));
|
|
REQUIRE(parentMol);
|
|
std::string parentSmiles{MolToSmiles(*parentMol)};
|
|
REQUIRE(parentSmiles == "C1=CC=C[CH+]C=C1");
|
|
}
|
|
|
|
SECTION("Handling of failing normalization") {
|
|
// Test that the failing application of some normalization transform
|
|
// doesn't result in unexpected changes to the input
|
|
// (test case based on GitHub #7189)
|
|
const char *molblock = R"(
|
|
Mrv2311 05292413242D
|
|
|
|
0 0 0 0 0 999 V3000
|
|
M V30 BEGIN CTAB
|
|
M V30 COUNTS 6 6 0 0 0
|
|
M V30 BEGIN ATOM
|
|
M V30 1 O -12.375 9.3308 0 0
|
|
M V30 2 C -13.6208 8.4255 0 0
|
|
M V30 3 N -13.145 6.9609 0 0 CHG=1
|
|
M V30 4 C -11.605 6.9609 0 0
|
|
M V30 5 C -11.1292 8.4255 0 0
|
|
M V30 6 O -10.6998 5.715 0 0 CHG=-1
|
|
M V30 END ATOM
|
|
M V30 BEGIN BOND
|
|
M V30 1 2 4 5
|
|
M V30 2 2 2 3
|
|
M V30 3 1 3 4
|
|
M V30 4 1 4 6
|
|
M V30 5 1 1 2
|
|
M V30 6 1 1 5
|
|
M V30 END BOND
|
|
M V30 END CTAB
|
|
M END
|
|
)";
|
|
|
|
MolStandardize::PipelineResult result = pipeline.run(molblock);
|
|
|
|
for (auto &info : result.log) {
|
|
std::cerr << info.status << " " << info.detail << std::endl;
|
|
}
|
|
|
|
REQUIRE(static_cast<MolStandardize::PipelineStage>(result.stage) ==
|
|
MolStandardize::PipelineStage::COMPLETED);
|
|
REQUIRE((result.status & MolStandardize::PipelineStatus::PIPELINE_ERROR) ==
|
|
MolStandardize::PipelineStatus::NO_EVENT);
|
|
REQUIRE((result.status &
|
|
MolStandardize::PipelineStatus::STRUCTURE_MODIFICATION) ==
|
|
MolStandardize::PipelineStatus::NO_EVENT);
|
|
}
|
|
}
|
|
|
|
namespace {
|
|
RWMOL_SPTR chargeParentLocal(RWMOL_SPTR mol, MolStandardize::PipelineResult &,
|
|
const MolStandardize::PipelineOptions &) {
|
|
RWMOL_SPTR res{MolStandardize::chargeParent(*mol)};
|
|
return res;
|
|
};
|
|
MolStandardize::RWMOL_SPTR_PAIR parentNoOp(
|
|
RWMOL_SPTR mol, MolStandardize::PipelineResult &,
|
|
const MolStandardize::PipelineOptions &) {
|
|
return {mol, mol};
|
|
};
|
|
|
|
RWMOL_SPTR smilesParse(const std::string &smiles,
|
|
MolStandardize::PipelineResult &,
|
|
const MolStandardize::PipelineOptions &) {
|
|
RWMOL_SPTR mol{SmilesToMol(smiles)};
|
|
return mol;
|
|
}
|
|
|
|
void smilesSerialize(MolStandardize::RWMOL_SPTR_PAIR output,
|
|
MolStandardize::PipelineResult &result,
|
|
const MolStandardize::PipelineOptions &) {
|
|
const ROMol &outputMol = *output.first;
|
|
const ROMol &parentMol = *output.second;
|
|
result.outputMolData = MolToSmiles(outputMol);
|
|
result.parentMolData = MolToSmiles(parentMol);
|
|
}
|
|
|
|
} // namespace
|
|
|
|
TEST_CASE("custom pipeline stages") {
|
|
SECTION("basics") {
|
|
MolStandardize::PipelineOptions options;
|
|
MolStandardize::Pipeline pipeline(options);
|
|
MolStandardize::Operations::PipelineVector ops{{1, &chargeParentLocal}};
|
|
pipeline.setStandardizationSteps(ops);
|
|
pipeline.setMakeParent(&parentNoOp);
|
|
auto m = "CCC[O-]"_smiles;
|
|
REQUIRE(m);
|
|
auto mb = MolToMolBlock(*m);
|
|
auto res = pipeline.run(mb);
|
|
CHECK(res.status == MolStandardize::PipelineStatus::NO_EVENT);
|
|
CHECK(res.outputMolData == res.parentMolData);
|
|
CHECK(res.inputMolData.find("CHG") != std::string::npos);
|
|
CHECK(res.outputMolData.find("CHG") == std::string::npos);
|
|
|
|
// silly example, demonstrate that we can handle SMILES
|
|
pipeline.setValidationSteps({}); // no validation
|
|
pipeline.setParse(&smilesParse);
|
|
pipeline.setSerialize(&smilesSerialize);
|
|
res = pipeline.run("CCC[O-]");
|
|
CHECK(res.status == MolStandardize::PipelineStatus::NO_EVENT);
|
|
CHECK(res.outputMolData == res.parentMolData);
|
|
CHECK(res.outputMolData == "CCCO");
|
|
CHECK(res.inputMolData == "CCC[O-]");
|
|
}
|
|
} |