mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-04 21:54:27 +08:00
Add a CXSMILES option to the MolHash (#5058)
* Nonchiral atoms should be removed from StereoGroups This fixes a bug where atoms which have no chirality were left in StereoGroups In order to make this work, ROMol::setStereoGroups() needed to be made public. That shouldn't be a problem since it doesn't change connectivity. * support CXSmiles extensions in the hashes * initial tests for that * some cleanup * copyright header cleanup * minor refactoring * call out the changes in the release notes * extension and more testing * add python wrappers
This commit is contained in:
@@ -7,7 +7,7 @@ target_compile_definitions(MolHash PRIVATE RDKIT_MOLHASH_BUILD)
|
||||
rdkit_headers(MolHash.h nmmolhash.h
|
||||
DEST GraphMol/MolHash)
|
||||
|
||||
rdkit_catch_test(molHashCatchTest catch_tests.cpp LINK_LIBRARIES MolHash)
|
||||
rdkit_catch_test(molHashCatchTest ../catch_main.cpp catch_tests.cpp LINK_LIBRARIES MolHash)
|
||||
|
||||
if(RDK_BUILD_PYTHON_WRAPPERS)
|
||||
add_subdirectory(Wrap)
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
//
|
||||
// Copyright (C) 2020 Greg Landrum
|
||||
// Copyright (C) 2020 Greg Landrum and other RDKit contributors
|
||||
//
|
||||
// @@ All Rights Reserved @@
|
||||
// This file is part of the RDKit.
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
//
|
||||
// Copyright (C) 2020 Greg Landrum
|
||||
// Copyright (C) 2020-2022 Greg Landrum and other RDKit contributors
|
||||
//
|
||||
// @@ All Rights Reserved @@
|
||||
// This file is part of the RDKit.
|
||||
@@ -17,9 +17,10 @@ using namespace RDKit;
|
||||
|
||||
namespace {
|
||||
|
||||
std::string MolHashHelper(const ROMol &mol, MolHash::HashFunction func) {
|
||||
std::string MolHashHelper(const ROMol &mol, MolHash::HashFunction func,
|
||||
bool useCXSmiles) {
|
||||
RWMol cpy(mol);
|
||||
return MolHash::MolHash(&cpy, func);
|
||||
return MolHash::MolHash(&cpy, func, useCXSmiles);
|
||||
}
|
||||
} // namespace
|
||||
|
||||
@@ -48,7 +49,8 @@ BOOST_PYTHON_MODULE(rdMolHash) {
|
||||
MolHash::HashFunction::ArthorSubstructureOrder);
|
||||
|
||||
python::def("MolHash", MolHashHelper,
|
||||
(python::arg("mol"), python::arg("func")),
|
||||
(python::arg("mol"), python::arg("func"),
|
||||
python::arg("useCxSmiles") = false),
|
||||
"Generate a hash for a molecule. The func argument determines "
|
||||
"which hash is generated.");
|
||||
}
|
||||
|
||||
@@ -35,6 +35,17 @@ class TestCase(unittest.TestCase):
|
||||
self.assertEqual(rdMolHash.MolHash(m, rdMolHash.HashFunction.ArthorSubstructureOrder),
|
||||
'000f001001000c000300005f000000')
|
||||
|
||||
def testCxSmiles(self):
|
||||
m = Chem.MolFromSmiles(
|
||||
'C[C@@H](O)[C@@H](C)[C@@H](C)C[C@H](C1=CN=CN1)C1=CNC=N1 |o1:8,5,&1:1,3,r,c:11,18,t:9,15|')
|
||||
|
||||
self.assertEqual(rdMolHash.MolHash(m, rdMolHash.HashFunction.HetAtomTautomer),
|
||||
'C[C@@H](CC([C]1[CH][N][CH][N]1)[C]1[CH][N][CH][N]1)[C@H](C)[C@@H](C)[O]_3_0')
|
||||
|
||||
self.assertEqual(
|
||||
rdMolHash.MolHash(m, rdMolHash.HashFunction.HetAtomTautomer, True),
|
||||
'C[C@@H](CC([C]1[CH][N][CH][N]1)[C]1[CH][N][CH][N]1)[C@H](C)[C@@H](C)[O]_3_0 |o1:1,&1:14,16|')
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
//
|
||||
// Copyright (C) 2019 Greg Landrum
|
||||
// Copyright (C) 2019-2022 Greg Landrum
|
||||
//
|
||||
// @@ All Rights Reserved @@
|
||||
// This file is part of the RDKit.
|
||||
@@ -7,12 +7,11 @@
|
||||
// which is included in the file license.txt, found at the root
|
||||
// of the RDKit source tree.
|
||||
//
|
||||
#define CATCH_CONFIG_MAIN // This tells Catch to provide a main() - only do
|
||||
// this in one cpp file
|
||||
#include "catch.hpp"
|
||||
|
||||
#include <GraphMol/RDKitBase.h>
|
||||
#include <GraphMol/SmilesParse/SmilesParse.h>
|
||||
#include <GraphMol/FileParsers/FileParsers.h>
|
||||
#include <GraphMol/SmilesParse/SmilesWrite.h>
|
||||
#include "MolHash.h"
|
||||
|
||||
@@ -230,3 +229,179 @@ TEST_CASE("Github issues", "[molhash]") {
|
||||
CHECK(hsh == "C2H6Cl");
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("MolHash with CX extensions", "[molhash]") {
|
||||
SECTION("Tautomer") {
|
||||
auto mol =
|
||||
"C[C@@H](O)[C@@H](C)[C@@H](C)C[C@H](C1=CN=CN1)C1=CNC=N1 "
|
||||
"|o1:8,5,&1:1,3,r,c:11,18,t:9,15|"_smiles;
|
||||
REQUIRE(mol);
|
||||
|
||||
{
|
||||
RWMol cp(*mol);
|
||||
auto hsh = MolHash::MolHash(&cp, MolHash::HashFunction::HetAtomTautomer);
|
||||
CHECK(
|
||||
hsh ==
|
||||
"C[C@@H](CC([C]1[CH][N][CH][N]1)[C]1[CH][N][CH][N]1)[C@H](C)[C@@H](C)"
|
||||
"[O]_3_0");
|
||||
}
|
||||
{
|
||||
RWMol cp(*mol);
|
||||
|
||||
auto hsh =
|
||||
MolHash::MolHash(&cp, MolHash::HashFunction::HetAtomTautomer, true);
|
||||
CHECK(
|
||||
hsh ==
|
||||
"C[C@@H](CC([C]1[CH][N][CH][N]1)[C]1[CH][N][CH][N]1)[C@H](C)[C@@H](C)"
|
||||
"[O]_3_0 |o1:1,&1:14,16|");
|
||||
}
|
||||
}
|
||||
SECTION("no coordinates please") {
|
||||
auto mol = R"CTAB(
|
||||
Mrv2108 03032205502D
|
||||
|
||||
0 0 0 0 0 999 V3000
|
||||
M V30 BEGIN CTAB
|
||||
M V30 COUNTS 15 16 0 0 1
|
||||
M V30 BEGIN ATOM
|
||||
M V30 1 C -0.4657 -3.589 0 0 CFG=1
|
||||
M V30 2 C -0.4657 -2.049 0 0
|
||||
M V30 3 C 0.8679 -4.359 0 0
|
||||
M V30 4 C 2.2016 -3.589 0 0 CFG=2
|
||||
M V30 5 C 3.5353 -4.359 0 0
|
||||
M V30 6 C 4.9422 -3.7327 0 0
|
||||
M V30 7 N 5.9726 -4.8771 0 0
|
||||
M V30 8 C 5.2026 -6.2108 0 0
|
||||
M V30 9 N 3.6963 -5.8906 0 0
|
||||
M V30 10 C 2.2016 -2.049 0 0
|
||||
M V30 11 C 0.9557 -1.1438 0 0
|
||||
M V30 12 N 1.4316 0.3208 0 0
|
||||
M V30 13 C 2.9716 0.3208 0 0
|
||||
M V30 14 N 3.4475 -1.1438 0 0
|
||||
M V30 15 F -1.7994 -4.359 0 0
|
||||
M V30 END ATOM
|
||||
M V30 BEGIN BOND
|
||||
M V30 1 1 1 2 CFG=1
|
||||
M V30 2 1 1 3
|
||||
M V30 3 1 4 3 CFG=1
|
||||
M V30 4 1 4 5
|
||||
M V30 5 2 5 6
|
||||
M V30 6 1 6 7
|
||||
M V30 7 2 7 8
|
||||
M V30 8 1 8 9
|
||||
M V30 9 1 5 9
|
||||
M V30 10 1 4 10
|
||||
M V30 11 2 10 11
|
||||
M V30 12 1 11 12
|
||||
M V30 13 1 12 13
|
||||
M V30 14 2 13 14
|
||||
M V30 15 1 10 14
|
||||
M V30 16 1 1 15
|
||||
M V30 END BOND
|
||||
M V30 BEGIN COLLECTION
|
||||
M V30 MDLV30/STEREL1 ATOMS=(1 1)
|
||||
M V30 MDLV30/STERAC1 ATOMS=(1 4)
|
||||
M V30 END COLLECTION
|
||||
M V30 END CTAB
|
||||
M END
|
||||
)CTAB"_ctab;
|
||||
REQUIRE(mol);
|
||||
|
||||
{
|
||||
RWMol cp(*mol);
|
||||
auto hsh =
|
||||
MolHash::MolHash(&cp, MolHash::HashFunction::HetAtomTautomer, true);
|
||||
CHECK(hsh ==
|
||||
"C[C@H](F)CC([C]1[CH][N][CH][N]1)[C]1[CH][N][CH][N]1_2_0 |o1:1|");
|
||||
}
|
||||
}
|
||||
|
||||
SECTION("Mesomer") {
|
||||
auto mol = "C[C@H](F)C[C@@](C([NH-])=O)C([O-])=N |o1:1,&1:4|"_smiles;
|
||||
REQUIRE(mol);
|
||||
|
||||
{
|
||||
RWMol cp(*mol);
|
||||
auto hsh = MolHash::MolHash(&cp, MolHash::HashFunction::Mesomer);
|
||||
CHECK(hsh == "C[C@H](F)C[C]([C]([NH])[O])[C]([NH])[O]_-2");
|
||||
}
|
||||
{
|
||||
RWMol cp(*mol);
|
||||
|
||||
auto hsh = MolHash::MolHash(&cp, MolHash::HashFunction::Mesomer, true);
|
||||
CHECK(hsh == "C[C@H](F)C[C]([C]([NH])[O])[C]([NH])[O]_-2 |o1:1|");
|
||||
}
|
||||
}
|
||||
SECTION("Extended Murcko") {
|
||||
auto mol =
|
||||
"CC1=CC=CC=C1[C@@H](C[C@@H](C1CC1)C1CCC1)C1=CC=CC=C1O |o1:9,&1:7|"_smiles;
|
||||
REQUIRE(mol);
|
||||
|
||||
{
|
||||
RWMol cp(*mol);
|
||||
auto hsh = MolHash::MolHash(&cp, MolHash::HashFunction::ExtendedMurcko);
|
||||
CHECK(hsh == "*c1ccccc1C(C[C@H](C1CCC1)C1CC1)c1ccccc1*");
|
||||
}
|
||||
{
|
||||
RWMol cp(*mol);
|
||||
|
||||
auto hsh =
|
||||
MolHash::MolHash(&cp, MolHash::HashFunction::ExtendedMurcko, true);
|
||||
CHECK(hsh == "*c1ccccc1C(C[C@H](C1CCC1)C1CC1)c1ccccc1* |o1:9|");
|
||||
}
|
||||
}
|
||||
SECTION("Murcko") {
|
||||
auto mol =
|
||||
"CC1=CC=CC=C1[C@@H](C[C@@H](C1CC1)C1CCC1)C1=CC=CC=C1O |o1:9,&1:7|"_smiles;
|
||||
REQUIRE(mol);
|
||||
|
||||
{
|
||||
RWMol cp(*mol);
|
||||
auto hsh = MolHash::MolHash(&cp, MolHash::HashFunction::MurckoScaffold);
|
||||
CHECK(hsh == "c1ccc(C(C[C@H](C2CCC2)C2CC2)c2ccccc2)cc1");
|
||||
}
|
||||
{
|
||||
RWMol cp(*mol);
|
||||
|
||||
auto hsh =
|
||||
MolHash::MolHash(&cp, MolHash::HashFunction::MurckoScaffold, true);
|
||||
CHECK(hsh == "c1ccc(C(C[C@H](C2CCC2)C2CC2)c2ccccc2)cc1 |o1:6|");
|
||||
}
|
||||
}
|
||||
SECTION("Element") {
|
||||
auto mol =
|
||||
"C([C@@H](C1CC1)C1CCC1)[C@@H](C1CCCCC1)C1=CC=CC=C1 |o1:1,&1:9,c:21,23,t:19|"_smiles;
|
||||
REQUIRE(mol);
|
||||
|
||||
{
|
||||
RWMol cp(*mol);
|
||||
auto hsh = MolHash::MolHash(&cp, MolHash::HashFunction::ElementGraph);
|
||||
CHECK(hsh == "C1CCC(C(C[C@H](C2CCC2)C2CC2)C2CCCCC2)CC1");
|
||||
}
|
||||
{
|
||||
RWMol cp(*mol);
|
||||
|
||||
auto hsh =
|
||||
MolHash::MolHash(&cp, MolHash::HashFunction::ElementGraph, true);
|
||||
CHECK(hsh == "C1CCC(C(C[C@H](C2CCC2)C2CC2)C2CCCCC2)CC1 |o1:6|");
|
||||
}
|
||||
}
|
||||
SECTION("Anonymous") {
|
||||
auto mol =
|
||||
"C([C@@H](C1CC1)C1CCC1)[C@@H](C1CCCCC1)C1=CC=CC=N1 |o1:1,&1:9,c:21,23,t:19|"_smiles;
|
||||
REQUIRE(mol);
|
||||
|
||||
{
|
||||
RWMol cp(*mol);
|
||||
auto hsh = MolHash::MolHash(&cp, MolHash::HashFunction::AnonymousGraph);
|
||||
CHECK(hsh == "*1***(*(**(*2***2)*2**2)*2*****2)**1");
|
||||
}
|
||||
{
|
||||
RWMol cp(*mol);
|
||||
|
||||
auto hsh =
|
||||
MolHash::MolHash(&cp, MolHash::HashFunction::AnonymousGraph, true);
|
||||
CHECK(hsh == "*1***(*(**(*2***2)*2**2)*2*****2)**1");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,13 +1,11 @@
|
||||
/*==============================================*/
|
||||
/* Copyright (C) 2011-2019 NextMove Software */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* This file is part of molhash. */
|
||||
/* */
|
||||
/* The contents are covered by the terms of the */
|
||||
/* BSD license, which is included in the file */
|
||||
/* license.txt. */
|
||||
/*==============================================*/
|
||||
//
|
||||
// Copyright (C) 2011-2022 NextMove Software and other RDKit contributors
|
||||
//
|
||||
// @@ All Rights Reserved @@
|
||||
// This file is part of the RDKit.
|
||||
// The contents are covered by the terms of the BSD license
|
||||
// which is included in the file license.txt, found at the root
|
||||
// of the RDKit source tree.
|
||||
#define _CRT_SECURE_NO_WARNINGS
|
||||
|
||||
#include <cstring>
|
||||
@@ -23,6 +21,16 @@
|
||||
#include "mf.h"
|
||||
|
||||
namespace {
|
||||
|
||||
void addCXExtensions(RDKit::RWMol *mol, std::string &result,
|
||||
unsigned additionalSkips = 0) {
|
||||
auto cxext = RDKit::SmilesWrite::getCXExtensions(
|
||||
*mol, RDKit::SmilesWrite::CX_ALL ^ RDKit::SmilesWrite::CX_COORDS ^
|
||||
additionalSkips);
|
||||
if (!cxext.empty()) {
|
||||
result += " " + cxext;
|
||||
}
|
||||
}
|
||||
unsigned int NMRDKitBondGetOrder(const RDKit::Bond *bnd) {
|
||||
PRECONDITION(bnd, "bad bond");
|
||||
switch (bnd->getBondType()) {
|
||||
@@ -116,8 +124,10 @@ void NMRDKitSanitizeHydrogens(RDKit::RWMol *mol) {
|
||||
|
||||
namespace RDKit {
|
||||
namespace MolHash {
|
||||
static unsigned int NMDetermineComponents(RWMol *mol, unsigned int *parts,
|
||||
unsigned int acount) {
|
||||
|
||||
namespace {
|
||||
unsigned int NMDetermineComponents(RWMol *mol, unsigned int *parts,
|
||||
unsigned int acount) {
|
||||
PRECONDITION(mol, "bad molecule");
|
||||
PRECONDITION(parts, "bad parts pointer");
|
||||
memset(parts, 0, acount * sizeof(unsigned int));
|
||||
@@ -148,8 +158,8 @@ static unsigned int NMDetermineComponents(RWMol *mol, unsigned int *parts,
|
||||
return result;
|
||||
}
|
||||
|
||||
static std::string NMMolecularFormula(RWMol *mol, const unsigned int *parts,
|
||||
unsigned int part) {
|
||||
std::string NMMolecularFormula(RWMol *mol, const unsigned int *parts,
|
||||
unsigned int part) {
|
||||
PRECONDITION(mol, "bad molecule");
|
||||
PRECONDITION((!part || parts), "bad parts pointer");
|
||||
unsigned int hist[256];
|
||||
@@ -202,7 +212,7 @@ static std::string NMMolecularFormula(RWMol *mol, const unsigned int *parts,
|
||||
return result;
|
||||
}
|
||||
|
||||
static std::string NMMolecularFormula(RWMol *mol, bool sep = false) {
|
||||
std::string NMMolecularFormula(RWMol *mol, bool sep = false) {
|
||||
PRECONDITION(mol, "bad molecule");
|
||||
if (!sep) {
|
||||
return NMMolecularFormula(mol, nullptr, 0);
|
||||
@@ -237,7 +247,7 @@ static std::string NMMolecularFormula(RWMol *mol, bool sep = false) {
|
||||
return result;
|
||||
}
|
||||
|
||||
static void NormalizeHCount(Atom *aptr) {
|
||||
void NormalizeHCount(Atom *aptr) {
|
||||
PRECONDITION(aptr, "bad atom pointer");
|
||||
unsigned int hcount;
|
||||
|
||||
@@ -280,7 +290,7 @@ static void NormalizeHCount(Atom *aptr) {
|
||||
aptr->setNumExplicitHs(hcount);
|
||||
}
|
||||
|
||||
static std::string AnonymousGraph(RWMol *mol, bool elem) {
|
||||
std::string AnonymousGraph(RWMol *mol, bool elem, bool useCXSmiles) {
|
||||
PRECONDITION(mol, "bad molecule");
|
||||
std::string result;
|
||||
int charge = 0;
|
||||
@@ -302,11 +312,23 @@ static std::string AnonymousGraph(RWMol *mol, bool elem) {
|
||||
bptr->setBondType(Bond::SINGLE);
|
||||
}
|
||||
MolOps::assignRadicals(*mol);
|
||||
|
||||
// we may have just destroyed some stereocenters/bonds
|
||||
// clean that up:
|
||||
bool cleanIt = true;
|
||||
bool force = true;
|
||||
MolOps::assignStereochemistry(*mol, cleanIt, force);
|
||||
|
||||
result = MolToSmiles(*mol);
|
||||
|
||||
if (useCXSmiles) {
|
||||
addCXExtensions(mol, result, SmilesWrite::CX_RADICALS);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static std::string MesomerHash(RWMol *mol, bool netq) {
|
||||
std::string MesomerHash(RWMol *mol, bool netq, bool useCXSmiles) {
|
||||
PRECONDITION(mol, "bad molecule");
|
||||
std::string result;
|
||||
char buffer[32];
|
||||
@@ -323,15 +345,25 @@ static std::string MesomerHash(RWMol *mol, bool netq) {
|
||||
}
|
||||
|
||||
MolOps::assignRadicals(*mol);
|
||||
|
||||
// we may have just destroyed some stereocenters/bonds
|
||||
// clean that up:
|
||||
bool cleanIt = true;
|
||||
bool force = true;
|
||||
MolOps::assignStereochemistry(*mol, cleanIt, force);
|
||||
|
||||
result = MolToSmiles(*mol);
|
||||
if (netq) {
|
||||
sprintf(buffer, "_%d", charge);
|
||||
result += buffer;
|
||||
}
|
||||
if (useCXSmiles) {
|
||||
addCXExtensions(mol, result, SmilesWrite::CX_RADICALS);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static std::string TautomerHash(RWMol *mol, bool proto) {
|
||||
std::string TautomerHash(RWMol *mol, bool proto, bool useCXSmiles) {
|
||||
PRECONDITION(mol, "bad molecule");
|
||||
std::string result;
|
||||
char buffer[32];
|
||||
@@ -372,10 +404,14 @@ static std::string TautomerHash(RWMol *mol, bool proto) {
|
||||
sprintf(buffer, "_%d", hcount - charge);
|
||||
}
|
||||
result += buffer;
|
||||
if (useCXSmiles) {
|
||||
addCXExtensions(mol, result, SmilesWrite::CX_RADICALS);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static bool TraverseForRing(Atom *atom, unsigned char *visit) {
|
||||
bool TraverseForRing(Atom *atom, unsigned char *visit) {
|
||||
PRECONDITION(atom, "bad atom pointer");
|
||||
PRECONDITION(visit, "bad pointer");
|
||||
visit[atom->getIdx()] = 1;
|
||||
@@ -395,8 +431,7 @@ static bool TraverseForRing(Atom *atom, unsigned char *visit) {
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool DepthFirstSearchForRing(Atom *root, Atom *nbor,
|
||||
unsigned int maxatomidx) {
|
||||
bool DepthFirstSearchForRing(Atom *root, Atom *nbor, unsigned int maxatomidx) {
|
||||
PRECONDITION(root, "bad atom pointer");
|
||||
PRECONDITION(nbor, "bad atom pointer");
|
||||
|
||||
@@ -425,7 +460,7 @@ bool IsInScaffold(Atom *atom, unsigned int maxatomidx) {
|
||||
return count > 1;
|
||||
}
|
||||
|
||||
static bool HasNbrInScaffold(Atom *aptr, unsigned char *is_in_scaffold) {
|
||||
bool HasNbrInScaffold(Atom *aptr, unsigned char *is_in_scaffold) {
|
||||
PRECONDITION(aptr, "bad atom pointer");
|
||||
PRECONDITION(is_in_scaffold, "bad pointer");
|
||||
for (auto nbri : boost::make_iterator_range(
|
||||
@@ -438,7 +473,7 @@ static bool HasNbrInScaffold(Atom *aptr, unsigned char *is_in_scaffold) {
|
||||
return false;
|
||||
}
|
||||
|
||||
static std::string ExtendedMurckoScaffold(RWMol *mol) {
|
||||
std::string ExtendedMurckoScaffold(RWMol *mol, bool useCXSmiles) {
|
||||
PRECONDITION(mol, "bad molecule");
|
||||
RDKit::MolOps::fastFindRings(*mol);
|
||||
|
||||
@@ -469,12 +504,22 @@ static std::string ExtendedMurckoScaffold(RWMol *mol) {
|
||||
}
|
||||
mol->commitBatchEdit();
|
||||
MolOps::assignRadicals(*mol);
|
||||
|
||||
// we may have just destroyed some stereocenters/bonds
|
||||
// clean that up:
|
||||
bool cleanIt = true;
|
||||
bool force = true;
|
||||
MolOps::assignStereochemistry(*mol, cleanIt, force);
|
||||
|
||||
std::string result;
|
||||
result = MolToSmiles(*mol);
|
||||
if (useCXSmiles) {
|
||||
addCXExtensions(mol, result, SmilesWrite::CX_RADICALS);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static std::string MurckoScaffoldHash(RWMol *mol) {
|
||||
std::string MurckoScaffoldHash(RWMol *mol, bool useCXSmiles) {
|
||||
PRECONDITION(mol, "bad molecule");
|
||||
std::vector<Atom *> for_deletion;
|
||||
do {
|
||||
@@ -502,12 +547,22 @@ static std::string MurckoScaffoldHash(RWMol *mol) {
|
||||
mol->commitBatchEdit();
|
||||
} while (!for_deletion.empty());
|
||||
MolOps::assignRadicals(*mol);
|
||||
|
||||
// we may have just destroyed some stereocenters/bonds
|
||||
// clean that up:
|
||||
bool cleanIt = true;
|
||||
bool force = true;
|
||||
MolOps::assignStereochemistry(*mol, cleanIt, force);
|
||||
|
||||
std::string result;
|
||||
result = MolToSmiles(*mol);
|
||||
if (useCXSmiles) {
|
||||
addCXExtensions(mol, result, SmilesWrite::CX_RADICALS);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static std::string NetChargeHash(RWMol *mol) {
|
||||
std::string NetChargeHash(RWMol *mol) {
|
||||
PRECONDITION(mol, "bad molecule");
|
||||
int totalq = 0;
|
||||
|
||||
@@ -520,7 +575,7 @@ static std::string NetChargeHash(RWMol *mol) {
|
||||
return buffer;
|
||||
}
|
||||
|
||||
static std::string SmallWorldHash(RWMol *mol, bool brl) {
|
||||
std::string SmallWorldHash(RWMol *mol, bool brl) {
|
||||
PRECONDITION(mol, "bad molecule");
|
||||
char buffer[64];
|
||||
|
||||
@@ -542,7 +597,7 @@ static std::string SmallWorldHash(RWMol *mol, bool brl) {
|
||||
return buffer;
|
||||
}
|
||||
|
||||
static void DegreeVector(RWMol *mol, unsigned int *v) {
|
||||
void DegreeVector(RWMol *mol, unsigned int *v) {
|
||||
memset(v, 0, 4 * sizeof(unsigned int));
|
||||
for (auto aptr : mol->atoms()) {
|
||||
switch (aptr->getDegree()) {
|
||||
@@ -562,7 +617,7 @@ static void DegreeVector(RWMol *mol, unsigned int *v) {
|
||||
}
|
||||
}
|
||||
|
||||
static bool HasDoubleBond(Atom *atom) {
|
||||
bool HasDoubleBond(Atom *atom) {
|
||||
PRECONDITION(atom, "bad atom");
|
||||
for (const auto &nbri :
|
||||
boost::make_iterator_range(atom->getOwningMol().getAtomBonds(atom))) {
|
||||
@@ -581,7 +636,7 @@ static bool HasDoubleBond(Atom *atom) {
|
||||
// 2 means break, with hydrogen on beg and asterisk on end
|
||||
// 3 means break, with asterisks on both beg and end
|
||||
|
||||
static int RegioisomerBond(Bond *bnd) {
|
||||
int RegioisomerBond(Bond *bnd) {
|
||||
PRECONDITION(bnd, "bad bond");
|
||||
if (NMRDKitBondGetOrder(bnd) != 1) {
|
||||
return -1;
|
||||
@@ -619,7 +674,7 @@ static int RegioisomerBond(Bond *bnd) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
static void ClearEZStereo(Atom *atm) {
|
||||
void ClearEZStereo(Atom *atm) {
|
||||
PRECONDITION(atm, "bad atom");
|
||||
for (const auto &nbri :
|
||||
boost::make_iterator_range(atm->getOwningMol().getAtomBonds(atm))) {
|
||||
@@ -630,7 +685,7 @@ static void ClearEZStereo(Atom *atm) {
|
||||
}
|
||||
}
|
||||
|
||||
static std::string RegioisomerHash(RWMol *mol) {
|
||||
std::string RegioisomerHash(RWMol *mol, bool useCXSmiles) {
|
||||
PRECONDITION(mol, "bad molecule");
|
||||
|
||||
// we need a copy of the molecule so that we can loop over the bonds of
|
||||
@@ -671,12 +726,21 @@ static std::string RegioisomerHash(RWMol *mol) {
|
||||
}
|
||||
}
|
||||
|
||||
std::string result;
|
||||
result = MolToSmiles(*mol);
|
||||
// we may have just destroyed some stereocenters/bonds
|
||||
// clean that up:
|
||||
bool cleanIt = true;
|
||||
bool force = true;
|
||||
MolOps::assignStereochemistry(*mol, cleanIt, force);
|
||||
|
||||
std::string result = MolToSmiles(*mol);
|
||||
if (useCXSmiles) {
|
||||
addCXExtensions(mol, result);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static std::string ArthorSubOrderHash(RWMol *mol) {
|
||||
std::string ArthorSubOrderHash(RWMol *mol) {
|
||||
PRECONDITION(mol, "bad molecule");
|
||||
char buffer[256];
|
||||
|
||||
@@ -793,8 +857,9 @@ static std::string ArthorSubOrderHash(RWMol *mol) {
|
||||
pcount, ccount, ocount, zcount, rcount, qcount, icount);
|
||||
return buffer;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
std::string MolHash(RWMol *mol, HashFunction func) {
|
||||
std::string MolHash(RWMol *mol, HashFunction func, bool useCXSmiles) {
|
||||
PRECONDITION(mol, "bad molecule");
|
||||
std::string result;
|
||||
char buffer[32];
|
||||
@@ -803,31 +868,34 @@ std::string MolHash(RWMol *mol, HashFunction func) {
|
||||
switch (func) {
|
||||
default:
|
||||
case HashFunction::AnonymousGraph:
|
||||
result = AnonymousGraph(mol, false);
|
||||
result = AnonymousGraph(mol, false, useCXSmiles);
|
||||
break;
|
||||
case HashFunction::ElementGraph:
|
||||
result = AnonymousGraph(mol, true);
|
||||
result = AnonymousGraph(mol, true, useCXSmiles);
|
||||
break;
|
||||
case HashFunction::CanonicalSmiles:
|
||||
result = MolToSmiles(*mol);
|
||||
if (useCXSmiles) {
|
||||
addCXExtensions(mol, result);
|
||||
}
|
||||
break;
|
||||
case HashFunction::MurckoScaffold:
|
||||
result = MurckoScaffoldHash(mol);
|
||||
result = MurckoScaffoldHash(mol, useCXSmiles);
|
||||
break;
|
||||
case HashFunction::ExtendedMurcko:
|
||||
result = ExtendedMurckoScaffold(mol);
|
||||
result = ExtendedMurckoScaffold(mol, useCXSmiles);
|
||||
break;
|
||||
case HashFunction::Mesomer:
|
||||
result = MesomerHash(mol, true);
|
||||
result = MesomerHash(mol, true, useCXSmiles);
|
||||
break;
|
||||
case HashFunction::RedoxPair:
|
||||
result = MesomerHash(mol, false);
|
||||
result = MesomerHash(mol, false, useCXSmiles);
|
||||
break;
|
||||
case HashFunction::HetAtomTautomer:
|
||||
result = TautomerHash(mol, false);
|
||||
result = TautomerHash(mol, false, useCXSmiles);
|
||||
break;
|
||||
case HashFunction::HetAtomProtomer:
|
||||
result = TautomerHash(mol, true);
|
||||
result = TautomerHash(mol, true, useCXSmiles);
|
||||
break;
|
||||
case HashFunction::MolFormula:
|
||||
result = NMMolecularFormula(mol);
|
||||
@@ -855,7 +923,7 @@ std::string MolHash(RWMol *mol, HashFunction func) {
|
||||
result = ArthorSubOrderHash(mol);
|
||||
break;
|
||||
case HashFunction::Regioisomer:
|
||||
result = RegioisomerHash(mol);
|
||||
result = RegioisomerHash(mol, useCXSmiles);
|
||||
break;
|
||||
}
|
||||
return result;
|
||||
|
||||
@@ -1,13 +1,11 @@
|
||||
/*==============================================*/
|
||||
/* Copyright (C) 2016-2019 NextMove Software */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* This file is part of molhash. */
|
||||
/* */
|
||||
/* The contents are covered by the terms of the */
|
||||
/* BSD license, which is included in the file */
|
||||
/* license.txt. */
|
||||
/*==============================================*/
|
||||
//
|
||||
// Copyright (C) 2016-2022 NextMove Software and other RDKit contributors
|
||||
//
|
||||
// @@ All Rights Reserved @@
|
||||
// This file is part of the RDKit.
|
||||
// The contents are covered by the terms of the BSD license
|
||||
// which is included in the file license.txt, found at the root
|
||||
// of the RDKit source tree.
|
||||
#ifndef NMS_MOLFORMULA_H
|
||||
#define NMS_MOLFORMULA_H
|
||||
|
||||
|
||||
@@ -42,7 +42,8 @@ enum class HashFunction {
|
||||
ArthorSubstructureOrder = 17
|
||||
};
|
||||
|
||||
RDKIT_MOLHASH_EXPORT std::string MolHash(RWMol *mol, HashFunction func);
|
||||
RDKIT_MOLHASH_EXPORT std::string MolHash(RWMol *mol, HashFunction func,
|
||||
bool useCXSmiles = false);
|
||||
|
||||
enum class StripType {
|
||||
AtomStereo = 1,
|
||||
|
||||
@@ -1,13 +1,11 @@
|
||||
/*==============================================*/
|
||||
/* Copyright (C) 2019 NextMove Software */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* This file is part of molhash. */
|
||||
/* */
|
||||
/* The contents are covered by the terms of the */
|
||||
/* BSD license, which is included in the file */
|
||||
/* license.txt. */
|
||||
/*==============================================*/
|
||||
//
|
||||
// Copyright (C) 2019-2022 NextMove Software and other RDKit contributors
|
||||
//
|
||||
// @@ All Rights Reserved @@
|
||||
// This file is part of the RDKit.
|
||||
// The contents are covered by the terms of the BSD license
|
||||
// which is included in the file license.txt, found at the root
|
||||
// of the RDKit source tree.
|
||||
|
||||
#include <GraphMol/RDKitBase.h>
|
||||
#include <GraphMol/RDKitQueries.h>
|
||||
@@ -25,6 +23,10 @@ void Strip(RWMol *mol, unsigned int striptype) {
|
||||
for (auto aptr : mol->atoms()) {
|
||||
aptr->setChiralTag(RDKit::Atom::CHI_UNSPECIFIED);
|
||||
}
|
||||
if (!mol->getStereoGroups().empty()) {
|
||||
std::vector<StereoGroup> no_sgs;
|
||||
mol->setStereoGroups(std::move(no_sgs));
|
||||
}
|
||||
}
|
||||
if (striptype & static_cast<unsigned>(StripType::BondStereo)) {
|
||||
for (auto bptr : mol->bonds()) {
|
||||
@@ -49,11 +51,9 @@ void Strip(RWMol *mol, unsigned int striptype) {
|
||||
}
|
||||
|
||||
void SplitMolecule(RWMol *mol, std::vector<RWMol *> &molv) {
|
||||
RDKit::MOL_SPTR_VECT mfrags = RDKit::MolOps::getMolFrags(*mol);
|
||||
RDKit::MOL_SPTR_VECT::iterator vit;
|
||||
for (vit = mfrags.begin(); vit != mfrags.end(); ++vit) {
|
||||
RDKit::ROMol *wrappedmol =
|
||||
(*vit).get(); // reach inside the shared pointer...
|
||||
auto mfrags = RDKit::MolOps::getMolFrags(*mol);
|
||||
for (const auto &frag : mfrags) {
|
||||
const auto *wrappedmol = frag.get(); // reach inside the shared pointer...
|
||||
molv.push_back(new RWMol(*wrappedmol)); // ...and make a copy
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1739,22 +1739,6 @@ TEST_CASE("StereoGroup Testing") {
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("replaceAtom and StereoGroups") {
|
||||
SECTION("basics") {
|
||||
auto mol = "C[C@](O)(Cl)[C@H](F)Cl |o1:1,4|"_smiles;
|
||||
REQUIRE(mol);
|
||||
CHECK(mol->getStereoGroups().size() == 1);
|
||||
CHECK(mol->getStereoGroups()[0].getAtoms().size() == 2);
|
||||
CHECK(mol->getStereoGroups()[0].getAtoms()[0] == mol->getAtomWithIdx(1));
|
||||
|
||||
Atom acp(*mol->getAtomWithIdx(1));
|
||||
mol->replaceAtom(1, &acp);
|
||||
CHECK(mol->getStereoGroups().size() == 1);
|
||||
CHECK(mol->getStereoGroups()[0].getAtoms().size() == 2);
|
||||
CHECK(mol->getStereoGroups()[0].getAtoms()[0] == mol->getAtomWithIdx(1));
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("Removing stereogroups from unspecified atoms") {
|
||||
SECTION("basics") {
|
||||
auto mol = "C[C@](O)(Cl)F |o1:1|"_smiles;
|
||||
@@ -1777,3 +1761,19 @@ TEST_CASE("Removing stereogroups from unspecified atoms") {
|
||||
CHECK(mol->getStereoGroups()[0].getAtoms()[0]->getIdx() == 4);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("replaceAtom and StereoGroups") {
|
||||
SECTION("basics") {
|
||||
auto mol = "C[C@](O)(Cl)[C@H](F)Cl |o1:1,4|"_smiles;
|
||||
REQUIRE(mol);
|
||||
CHECK(mol->getStereoGroups().size() == 1);
|
||||
CHECK(mol->getStereoGroups()[0].getAtoms().size() == 2);
|
||||
CHECK(mol->getStereoGroups()[0].getAtoms()[0] == mol->getAtomWithIdx(1));
|
||||
|
||||
Atom acp(*mol->getAtomWithIdx(1));
|
||||
mol->replaceAtom(1, &acp);
|
||||
CHECK(mol->getStereoGroups().size() == 1);
|
||||
CHECK(mol->getStereoGroups()[0].getAtoms().size() == 2);
|
||||
CHECK(mol->getStereoGroups()[0].getAtoms()[0] == mol->getAtomWithIdx(1));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,6 +4,10 @@
|
||||
## Backwards incompatible changes
|
||||
- When running in Jupyter Notebook, logs are now sent only to Python's
|
||||
standard error stream, and no longer include the `RDKit LEVEL` prefix.
|
||||
- The MolHash functions now reassign stereochemistry after modifying the
|
||||
molecule and before calculating the hash. Previous versions would still
|
||||
include information about atom/bond stereochemistry in the output hash even if
|
||||
that no longer applies in the modified molecule.
|
||||
|
||||
## Code removed in this release:
|
||||
- The `useCountSimulation` keyword argument for
|
||||
|
||||
Reference in New Issue
Block a user