mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-04 21:54:27 +08:00
* do not throw in desctructor * remove unused var; reserve * provide operator= for DiscreteValueVect * provide operator= for SparseIntVect * remove unknown 'omp' #pragmas; refactor loop * remove unused var * remove unused variables * give EmbeddedAtom a default constructor & early exit on self assignç * handle unused vars/args * catch exception by ref * address unused args * fix signed type comparison; refactor extra checks * remove unused variable * suppress switch fallthtough warning * handle signed type comparison * handle signed type comparison * potentially uninitialized vars * fix abs() of bool * unused vars in catch statements * remove unused variables * python::list returns will be copied * give ValidationMethod constructor & virtual destructor * remove extra semicolon
272 lines
9.8 KiB
C++
272 lines
9.8 KiB
C++
//
|
|
// Copyright (C) 2019 Greg Landrum
|
|
//
|
|
// @@ All Rights Reserved @@
|
|
// This file is part of the RDKit.
|
|
// The contents are covered by the terms of the BSD license
|
|
// which is included in the file license.txt, found at the root
|
|
// of the RDKit source tree.
|
|
//
|
|
// Tests of substructure searching
|
|
//
|
|
|
|
#define CATCH_CONFIG_MAIN
|
|
#include "catch.hpp"
|
|
|
|
#include <tuple>
|
|
|
|
#include <GraphMol/RDKitBase.h>
|
|
#include <GraphMol/SmilesParse/SmilesParse.h>
|
|
#include <GraphMol/SmilesParse/SmilesWrite.h>
|
|
#include <GraphMol/Substruct/SubstructMatch.h>
|
|
|
|
using namespace RDKit;
|
|
typedef std::tuple<std::string, std::string, size_t> matchCase;
|
|
|
|
class _IsSubstructOf : public Catch::MatcherBase<const ROMol &> {
|
|
ROMol const *m_mol;
|
|
SubstructMatchParameters m_ps;
|
|
|
|
public:
|
|
_IsSubstructOf(const ROMol &m) : m_mol(&m) {}
|
|
|
|
_IsSubstructOf(const ROMol &m, SubstructMatchParameters ps)
|
|
: m_mol(&m), m_ps(ps) {}
|
|
|
|
virtual bool match(const ROMol &query) const override {
|
|
return !SubstructMatch(*m_mol, query, m_ps).empty();
|
|
}
|
|
|
|
virtual std::string describe() const override {
|
|
std::ostringstream ss;
|
|
ss << "is not a substructure of " << MolToCXSmiles(*m_mol);
|
|
return ss.str();
|
|
}
|
|
};
|
|
|
|
static _IsSubstructOf IsSubstructOf(const ROMol &m,
|
|
const SubstructMatchParameters &ps) {
|
|
return _IsSubstructOf(m, ps);
|
|
}
|
|
|
|
static _IsSubstructOf IsSubstructOf(const ROMol &m) {
|
|
return _IsSubstructOf(m);
|
|
}
|
|
|
|
namespace Catch {
|
|
// ""_smiles returns an RWMol.
|
|
template <>
|
|
struct StringMaker<RDKit::RWMol> {
|
|
static std::string convert(RDKit::RWMol const &m) { return MolToCXSmiles(m); }
|
|
};
|
|
} // namespace Catch
|
|
|
|
TEST_CASE("substructure parameters", "[substruct]") {
|
|
SECTION("chirality") {
|
|
auto mol1 = "CCC[C@@H]1CN(CCC)CCN1"_smiles;
|
|
auto mol2 = "CCC[C@H]1CN(CCC)CCN1"_smiles;
|
|
REQUIRE(mol1);
|
|
REQUIRE(mol2);
|
|
|
|
SubstructMatchParameters ps;
|
|
// default is to ignore chirality:
|
|
CHECK(SubstructMatch(*mol1, *mol2, ps).size() == 1);
|
|
CHECK(SubstructMatch(*mol1, *mol1, ps).size() == 1);
|
|
|
|
ps.useChirality = true;
|
|
CHECK_THAT(*mol2, !IsSubstructOf(*mol1, ps));
|
|
CHECK(SubstructMatch(*mol1, *mol1, ps).size() == 1);
|
|
}
|
|
SECTION("conjugated matching aromaticity 1") {
|
|
auto mol1 = "C1=COC=C1"_smiles;
|
|
REQUIRE(mol1);
|
|
RWMol mol2(*mol1);
|
|
MolOps::Kekulize(mol2);
|
|
SubstructMatchParameters ps;
|
|
CHECK(SubstructMatch(*mol1, mol2, ps).size() == 0);
|
|
CHECK(SubstructMatch(mol2, *mol1, ps).size() == 0);
|
|
|
|
ps.aromaticMatchesConjugated = true;
|
|
CHECK(SubstructMatch(*mol1, mol2, ps).size() == 1);
|
|
CHECK(SubstructMatch(mol2, *mol1, ps).size() == 1);
|
|
}
|
|
SECTION("conjugated matching aromaticity 2") {
|
|
auto mol1 = "c1ccccc1"_smiles;
|
|
REQUIRE(mol1);
|
|
RWMol mol2(*mol1);
|
|
MolOps::Kekulize(mol2);
|
|
SubstructMatchParameters ps;
|
|
CHECK_THAT(mol2, !IsSubstructOf(*mol1));
|
|
CHECK_THAT(*mol1, !IsSubstructOf(mol2));
|
|
|
|
ps.aromaticMatchesConjugated = true;
|
|
CHECK(SubstructMatch(*mol1, mol2, ps).size() == 1);
|
|
CHECK(SubstructMatch(mol2, *mol1, ps).size() == 1);
|
|
}
|
|
|
|
SECTION("conjugated matching aromaticity bulk") {
|
|
std::vector<matchCase> examples;
|
|
examples.push_back(
|
|
std::make_tuple(std::string("c1ccccc1"), std::string("C1CCCCC1"), 0));
|
|
examples.push_back(
|
|
std::make_tuple(std::string("C1CCCCC1"), std::string("c1ccccc1"), 0));
|
|
examples.push_back(std::make_tuple(std::string("O=C1C=CC(=O)C=C1"),
|
|
std::string("c1ccccc1"), 1));
|
|
SubstructMatchParameters ps;
|
|
ps.aromaticMatchesConjugated = true;
|
|
for (const auto &example : examples) {
|
|
// std::cerr << " " << std::get<0>(example) << " - "
|
|
// << std::get<1>(example) << std::endl;
|
|
std::unique_ptr<RWMol> m1(SmilesToMol(std::get<0>(example)));
|
|
REQUIRE(m1);
|
|
std::unique_ptr<RWMol> m2(SmilesToMol(std::get<1>(example)));
|
|
CHECK(SubstructMatch(*m1, *m2, ps).size() == std::get<2>(example));
|
|
}
|
|
}
|
|
SECTION("looping") {
|
|
auto mol1 = "CC(=O)C(=O)C(=O)"_smiles;
|
|
auto mol2 = "C=O"_smiles;
|
|
REQUIRE(mol1);
|
|
REQUIRE(mol2);
|
|
for (auto match : SubstructMatch(*mol1, *mol2)) {
|
|
CHECK(match.size() == 2);
|
|
}
|
|
}
|
|
}
|
|
|
|
namespace {
|
|
bool no_match(const ROMol &mol, const std::vector<unsigned int> &ids) {
|
|
RDUNUSED_PARAM(mol);
|
|
RDUNUSED_PARAM(ids);
|
|
return false;
|
|
}
|
|
bool always_match(const ROMol &mol, const std::vector<unsigned int> &ids) {
|
|
RDUNUSED_PARAM(mol);
|
|
RDUNUSED_PARAM(ids);
|
|
return true;
|
|
}
|
|
bool bigger(const ROMol &mol, const std::vector<unsigned int> &ids) {
|
|
RDUNUSED_PARAM(mol);
|
|
return std::accumulate(ids.begin(), ids.end(), 0) > 5;
|
|
}
|
|
} // namespace
|
|
TEST_CASE("providing a final match function", "[substruct]") {
|
|
SECTION("basics") {
|
|
auto mol1 = "CCOC"_smiles;
|
|
auto mol2 = "CCO"_smiles;
|
|
REQUIRE(mol1);
|
|
REQUIRE(mol2);
|
|
SubstructMatchParameters ps;
|
|
CHECK(SubstructMatch(*mol1, *mol2, ps).size() == 1);
|
|
ps.extraFinalCheck = &no_match;
|
|
CHECK(SubstructMatch(*mol1, *mol2, ps).size() == 0);
|
|
ps.extraFinalCheck = &always_match;
|
|
CHECK(SubstructMatch(*mol1, *mol2, ps).size() == 1);
|
|
}
|
|
SECTION("test 2") {
|
|
auto mol1 = "CCOCC"_smiles;
|
|
auto mol2 = "CCO"_smiles;
|
|
REQUIRE(mol1);
|
|
REQUIRE(mol2);
|
|
SubstructMatchParameters ps;
|
|
CHECK(SubstructMatch(*mol1, *mol2, ps).size() == 2);
|
|
ps.extraFinalCheck = &bigger;
|
|
CHECK(SubstructMatch(*mol1, *mol2, ps).size() == 1);
|
|
}
|
|
}
|
|
|
|
TEST_CASE("Enhanced stereochemistry", "[substruct][StereoGroup]") {
|
|
// Chirality specifications.
|
|
// 1. An achiral molecule: CC(O)C(CC)F means unknown/all stereoisomers
|
|
// 2. A chiral molecule: C[C@H](O)[C@H](CC)F means 1 stereoisomer
|
|
// 3. A chiral molecule with an AND specifier: C[C@H](O)[C@H](CC)F |a1:1,3|
|
|
// means both stereoisomers
|
|
// 4. A chiral molecule with an OR specifier: C[C@H](O)[C@H](CC)F |o1:1,3|
|
|
// means one of the two stereoisomers
|
|
auto mol_achiral = "CC(O)C(CC)F"_smiles;
|
|
auto mol_chiral = "C[C@H](O)[C@H](CC)F"_smiles;
|
|
auto mol_and = "C[C@H](O)[C@H](CC)F |&1:1,3|"_smiles;
|
|
auto mol_or = "C[C@H](O)[C@H](CC)F |o1:1,3|"_smiles;
|
|
auto mol_absolute = "C[C@H](O)[C@H](CC)F |a:1,3|"_smiles;
|
|
auto diastereomer = "C[C@H](O)[C@@H](CC)F"_smiles;
|
|
|
|
SubstructMatchParameters ps;
|
|
ps.useChirality = true;
|
|
ps.useEnhancedStereo = true;
|
|
|
|
SECTION("achiral search matches anything") {
|
|
CHECK_THAT(*mol_achiral, IsSubstructOf(*mol_chiral, ps));
|
|
CHECK_THAT(*mol_achiral, IsSubstructOf(*mol_and, ps));
|
|
CHECK_THAT(*mol_achiral, IsSubstructOf(*mol_or, ps));
|
|
CHECK_THAT(*mol_achiral, IsSubstructOf(*mol_absolute, ps));
|
|
CHECK_THAT(*mol_achiral, IsSubstructOf(*diastereomer, ps));
|
|
}
|
|
SECTION("chiral molecule is a substructure of AND or OR") {
|
|
CHECK_THAT(*mol_chiral, !IsSubstructOf(*mol_achiral, ps));
|
|
CHECK_THAT(*mol_chiral, IsSubstructOf(*mol_and, ps));
|
|
CHECK_THAT(*mol_chiral, IsSubstructOf(*mol_or, ps));
|
|
CHECK_THAT(*mol_chiral, !IsSubstructOf(*diastereomer, ps));
|
|
CHECK_THAT(*mol_absolute, !IsSubstructOf(*mol_achiral, ps));
|
|
CHECK_THAT(*mol_absolute, IsSubstructOf(*mol_and, ps));
|
|
CHECK_THAT(*mol_absolute, IsSubstructOf(*mol_or, ps));
|
|
CHECK_THAT(*mol_absolute, !IsSubstructOf(*diastereomer, ps));
|
|
}
|
|
SECTION("AND query only matches AND") {
|
|
// because it means BOTH, and only AND includes both.
|
|
CHECK_THAT(*mol_and, !IsSubstructOf(*mol_or, ps));
|
|
CHECK_THAT(*mol_and, IsSubstructOf(*mol_and, ps));
|
|
CHECK_THAT(*mol_and, !IsSubstructOf(*mol_absolute, ps));
|
|
CHECK_THAT(*mol_and, !IsSubstructOf(*mol_chiral, ps));
|
|
CHECK_THAT(*mol_and, !IsSubstructOf(*mol_achiral, ps));
|
|
}
|
|
SECTION("An OR query matches AND and OR") {
|
|
// because AND is both, so it's a superset of the molecules described in
|
|
// the OR
|
|
CHECK_THAT(*mol_or, !IsSubstructOf(*mol_chiral, ps));
|
|
CHECK_THAT(*mol_or, !IsSubstructOf(*mol_absolute, ps));
|
|
CHECK_THAT(*mol_or, !IsSubstructOf(*diastereomer, ps));
|
|
CHECK_THAT(*mol_or, IsSubstructOf(*mol_or, ps));
|
|
CHECK_THAT(*mol_or, IsSubstructOf(*mol_and, ps));
|
|
}
|
|
SECTION("AND and OR match their enantiomer") {
|
|
// This is, like, the point of And/Or
|
|
auto enantiomer = "C[C@@H](O)[C@@H](CC)F"_smiles;
|
|
CHECK_THAT(*enantiomer, IsSubstructOf(*mol_and, ps));
|
|
CHECK_THAT(*enantiomer, IsSubstructOf(*mol_or, ps));
|
|
}
|
|
SECTION("But not some arbitrary diastereomer") {
|
|
CHECK_THAT(*diastereomer, !IsSubstructOf(*mol_and, ps));
|
|
CHECK_THAT(*diastereomer, !IsSubstructOf(*mol_or, ps));
|
|
}
|
|
SECTION("Mixed stereo groups include single stereo groups") {
|
|
auto mol_mixed_or = "C[C@H](O)[C@H](CC)F |o1:1,o2:3|"_smiles;
|
|
CHECK_THAT(*mol_mixed_or, !IsSubstructOf(*mol_or, ps));
|
|
// OR refers to two of the 4 molecules that mol_mixed_or
|
|
CHECK_THAT(*mol_or, IsSubstructOf(*mol_mixed_or, ps));
|
|
|
|
auto mol_mixed_or2 = "C[C@H](O)[C@@H](CC)F |o1:1,o2:3|"_smiles;
|
|
CHECK_THAT(*mol_mixed_or2, !IsSubstructOf(*mol_or, ps));
|
|
CHECK_THAT(*mol_or, IsSubstructOf(*mol_mixed_or2, ps));
|
|
|
|
// I'm not sure about these ones, but they should be symmetric:
|
|
auto mol_mixed_or_and_abs = "C[C@H](O)[C@H](CC)F |o1:1|"_smiles;
|
|
CHECK_THAT(*mol_mixed_or_and_abs, !IsSubstructOf(*mol_or, ps));
|
|
CHECK_THAT(*mol_or, !IsSubstructOf(*mol_mixed_or_and_abs, ps));
|
|
|
|
auto mol_mixed_or_and_abs2 = "C[C@@H](O)[C@H](CC)F |o1:1|"_smiles;
|
|
CHECK_THAT(*mol_mixed_or_and_abs2, !IsSubstructOf(*mol_or, ps));
|
|
CHECK_THAT(*mol_or, !IsSubstructOf(*mol_mixed_or_and_abs, ps));
|
|
}
|
|
SECTION("It's OK to match part of a stereo group, though") {
|
|
auto mol_and_long = "F[C@@H](O)C[C@@H](CC)F |&1:1,3|"_smiles;
|
|
auto mol_and_partial = "F[C@@H](O)C |&1:1|"_smiles;
|
|
auto mol_or_long = "F[C@@H](O)C[C@@H](CC)F |o1:1,3|"_smiles;
|
|
auto mol_or_partial = "F[C@@H](O)C |o1:1|"_smiles;
|
|
|
|
CHECK_THAT(*mol_and_partial, IsSubstructOf(*mol_and_long, ps));
|
|
CHECK_THAT(*mol_or_partial, IsSubstructOf(*mol_or_long, ps));
|
|
CHECK_THAT(*mol_or_partial, IsSubstructOf(*mol_and_long, ps));
|
|
CHECK_THAT(*mol_and_partial, !IsSubstructOf(*mol_or_long, ps));
|
|
}
|
|
}
|