Add a CXSMILES option to the MolHash (#5058)

* Nonchiral atoms should be removed from StereoGroups

This fixes a bug where atoms which have no chirality were left in StereoGroups

In order to make this work, ROMol::setStereoGroups() needed to be made public.
That shouldn't be a problem since it doesn't change connectivity.

* support CXSmiles extensions in the hashes

* initial tests for that

* some cleanup

* copyright header cleanup

* minor refactoring

* call out the changes in the release notes

* extension and more testing

* add python wrappers
This commit is contained in:
Greg Landrum
2022-03-03 16:00:38 +01:00
committed by GitHub
parent 6418918710
commit 4bbbc6611d
11 changed files with 355 additions and 96 deletions

View File

@@ -7,7 +7,7 @@ target_compile_definitions(MolHash PRIVATE RDKIT_MOLHASH_BUILD)
rdkit_headers(MolHash.h nmmolhash.h
DEST GraphMol/MolHash)
rdkit_catch_test(molHashCatchTest catch_tests.cpp LINK_LIBRARIES MolHash)
rdkit_catch_test(molHashCatchTest ../catch_main.cpp catch_tests.cpp LINK_LIBRARIES MolHash)
if(RDK_BUILD_PYTHON_WRAPPERS)
add_subdirectory(Wrap)

View File

@@ -1,5 +1,5 @@
//
// Copyright (C) 2020 Greg Landrum
// Copyright (C) 2020 Greg Landrum and other RDKit contributors
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.

View File

@@ -1,5 +1,5 @@
//
// Copyright (C) 2020 Greg Landrum
// Copyright (C) 2020-2022 Greg Landrum and other RDKit contributors
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
@@ -17,9 +17,10 @@ using namespace RDKit;
namespace {
std::string MolHashHelper(const ROMol &mol, MolHash::HashFunction func) {
std::string MolHashHelper(const ROMol &mol, MolHash::HashFunction func,
bool useCXSmiles) {
RWMol cpy(mol);
return MolHash::MolHash(&cpy, func);
return MolHash::MolHash(&cpy, func, useCXSmiles);
}
} // namespace
@@ -48,7 +49,8 @@ BOOST_PYTHON_MODULE(rdMolHash) {
MolHash::HashFunction::ArthorSubstructureOrder);
python::def("MolHash", MolHashHelper,
(python::arg("mol"), python::arg("func")),
(python::arg("mol"), python::arg("func"),
python::arg("useCxSmiles") = false),
"Generate a hash for a molecule. The func argument determines "
"which hash is generated.");
}

View File

@@ -35,6 +35,17 @@ class TestCase(unittest.TestCase):
self.assertEqual(rdMolHash.MolHash(m, rdMolHash.HashFunction.ArthorSubstructureOrder),
'000f001001000c000300005f000000')
def testCxSmiles(self):
m = Chem.MolFromSmiles(
'C[C@@H](O)[C@@H](C)[C@@H](C)C[C@H](C1=CN=CN1)C1=CNC=N1 |o1:8,5,&1:1,3,r,c:11,18,t:9,15|')
self.assertEqual(rdMolHash.MolHash(m, rdMolHash.HashFunction.HetAtomTautomer),
'C[C@@H](CC([C]1[CH][N][CH][N]1)[C]1[CH][N][CH][N]1)[C@H](C)[C@@H](C)[O]_3_0')
self.assertEqual(
rdMolHash.MolHash(m, rdMolHash.HashFunction.HetAtomTautomer, True),
'C[C@@H](CC([C]1[CH][N][CH][N]1)[C]1[CH][N][CH][N]1)[C@H](C)[C@@H](C)[O]_3_0 |o1:1,&1:14,16|')
if __name__ == "__main__":
unittest.main()

View File

@@ -1,5 +1,5 @@
//
// Copyright (C) 2019 Greg Landrum
// Copyright (C) 2019-2022 Greg Landrum
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
@@ -7,12 +7,11 @@
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//
#define CATCH_CONFIG_MAIN // This tells Catch to provide a main() - only do
// this in one cpp file
#include "catch.hpp"
#include <GraphMol/RDKitBase.h>
#include <GraphMol/SmilesParse/SmilesParse.h>
#include <GraphMol/FileParsers/FileParsers.h>
#include <GraphMol/SmilesParse/SmilesWrite.h>
#include "MolHash.h"
@@ -230,3 +229,179 @@ TEST_CASE("Github issues", "[molhash]") {
CHECK(hsh == "C2H6Cl");
}
}
TEST_CASE("MolHash with CX extensions", "[molhash]") {
SECTION("Tautomer") {
auto mol =
"C[C@@H](O)[C@@H](C)[C@@H](C)C[C@H](C1=CN=CN1)C1=CNC=N1 "
"|o1:8,5,&1:1,3,r,c:11,18,t:9,15|"_smiles;
REQUIRE(mol);
{
RWMol cp(*mol);
auto hsh = MolHash::MolHash(&cp, MolHash::HashFunction::HetAtomTautomer);
CHECK(
hsh ==
"C[C@@H](CC([C]1[CH][N][CH][N]1)[C]1[CH][N][CH][N]1)[C@H](C)[C@@H](C)"
"[O]_3_0");
}
{
RWMol cp(*mol);
auto hsh =
MolHash::MolHash(&cp, MolHash::HashFunction::HetAtomTautomer, true);
CHECK(
hsh ==
"C[C@@H](CC([C]1[CH][N][CH][N]1)[C]1[CH][N][CH][N]1)[C@H](C)[C@@H](C)"
"[O]_3_0 |o1:1,&1:14,16|");
}
}
SECTION("no coordinates please") {
auto mol = R"CTAB(
Mrv2108 03032205502D
0 0 0 0 0 999 V3000
M V30 BEGIN CTAB
M V30 COUNTS 15 16 0 0 1
M V30 BEGIN ATOM
M V30 1 C -0.4657 -3.589 0 0 CFG=1
M V30 2 C -0.4657 -2.049 0 0
M V30 3 C 0.8679 -4.359 0 0
M V30 4 C 2.2016 -3.589 0 0 CFG=2
M V30 5 C 3.5353 -4.359 0 0
M V30 6 C 4.9422 -3.7327 0 0
M V30 7 N 5.9726 -4.8771 0 0
M V30 8 C 5.2026 -6.2108 0 0
M V30 9 N 3.6963 -5.8906 0 0
M V30 10 C 2.2016 -2.049 0 0
M V30 11 C 0.9557 -1.1438 0 0
M V30 12 N 1.4316 0.3208 0 0
M V30 13 C 2.9716 0.3208 0 0
M V30 14 N 3.4475 -1.1438 0 0
M V30 15 F -1.7994 -4.359 0 0
M V30 END ATOM
M V30 BEGIN BOND
M V30 1 1 1 2 CFG=1
M V30 2 1 1 3
M V30 3 1 4 3 CFG=1
M V30 4 1 4 5
M V30 5 2 5 6
M V30 6 1 6 7
M V30 7 2 7 8
M V30 8 1 8 9
M V30 9 1 5 9
M V30 10 1 4 10
M V30 11 2 10 11
M V30 12 1 11 12
M V30 13 1 12 13
M V30 14 2 13 14
M V30 15 1 10 14
M V30 16 1 1 15
M V30 END BOND
M V30 BEGIN COLLECTION
M V30 MDLV30/STEREL1 ATOMS=(1 1)
M V30 MDLV30/STERAC1 ATOMS=(1 4)
M V30 END COLLECTION
M V30 END CTAB
M END
)CTAB"_ctab;
REQUIRE(mol);
{
RWMol cp(*mol);
auto hsh =
MolHash::MolHash(&cp, MolHash::HashFunction::HetAtomTautomer, true);
CHECK(hsh ==
"C[C@H](F)CC([C]1[CH][N][CH][N]1)[C]1[CH][N][CH][N]1_2_0 |o1:1|");
}
}
SECTION("Mesomer") {
auto mol = "C[C@H](F)C[C@@](C([NH-])=O)C([O-])=N |o1:1,&1:4|"_smiles;
REQUIRE(mol);
{
RWMol cp(*mol);
auto hsh = MolHash::MolHash(&cp, MolHash::HashFunction::Mesomer);
CHECK(hsh == "C[C@H](F)C[C]([C]([NH])[O])[C]([NH])[O]_-2");
}
{
RWMol cp(*mol);
auto hsh = MolHash::MolHash(&cp, MolHash::HashFunction::Mesomer, true);
CHECK(hsh == "C[C@H](F)C[C]([C]([NH])[O])[C]([NH])[O]_-2 |o1:1|");
}
}
SECTION("Extended Murcko") {
auto mol =
"CC1=CC=CC=C1[C@@H](C[C@@H](C1CC1)C1CCC1)C1=CC=CC=C1O |o1:9,&1:7|"_smiles;
REQUIRE(mol);
{
RWMol cp(*mol);
auto hsh = MolHash::MolHash(&cp, MolHash::HashFunction::ExtendedMurcko);
CHECK(hsh == "*c1ccccc1C(C[C@H](C1CCC1)C1CC1)c1ccccc1*");
}
{
RWMol cp(*mol);
auto hsh =
MolHash::MolHash(&cp, MolHash::HashFunction::ExtendedMurcko, true);
CHECK(hsh == "*c1ccccc1C(C[C@H](C1CCC1)C1CC1)c1ccccc1* |o1:9|");
}
}
SECTION("Murcko") {
auto mol =
"CC1=CC=CC=C1[C@@H](C[C@@H](C1CC1)C1CCC1)C1=CC=CC=C1O |o1:9,&1:7|"_smiles;
REQUIRE(mol);
{
RWMol cp(*mol);
auto hsh = MolHash::MolHash(&cp, MolHash::HashFunction::MurckoScaffold);
CHECK(hsh == "c1ccc(C(C[C@H](C2CCC2)C2CC2)c2ccccc2)cc1");
}
{
RWMol cp(*mol);
auto hsh =
MolHash::MolHash(&cp, MolHash::HashFunction::MurckoScaffold, true);
CHECK(hsh == "c1ccc(C(C[C@H](C2CCC2)C2CC2)c2ccccc2)cc1 |o1:6|");
}
}
SECTION("Element") {
auto mol =
"C([C@@H](C1CC1)C1CCC1)[C@@H](C1CCCCC1)C1=CC=CC=C1 |o1:1,&1:9,c:21,23,t:19|"_smiles;
REQUIRE(mol);
{
RWMol cp(*mol);
auto hsh = MolHash::MolHash(&cp, MolHash::HashFunction::ElementGraph);
CHECK(hsh == "C1CCC(C(C[C@H](C2CCC2)C2CC2)C2CCCCC2)CC1");
}
{
RWMol cp(*mol);
auto hsh =
MolHash::MolHash(&cp, MolHash::HashFunction::ElementGraph, true);
CHECK(hsh == "C1CCC(C(C[C@H](C2CCC2)C2CC2)C2CCCCC2)CC1 |o1:6|");
}
}
SECTION("Anonymous") {
auto mol =
"C([C@@H](C1CC1)C1CCC1)[C@@H](C1CCCCC1)C1=CC=CC=N1 |o1:1,&1:9,c:21,23,t:19|"_smiles;
REQUIRE(mol);
{
RWMol cp(*mol);
auto hsh = MolHash::MolHash(&cp, MolHash::HashFunction::AnonymousGraph);
CHECK(hsh == "*1***(*(**(*2***2)*2**2)*2*****2)**1");
}
{
RWMol cp(*mol);
auto hsh =
MolHash::MolHash(&cp, MolHash::HashFunction::AnonymousGraph, true);
CHECK(hsh == "*1***(*(**(*2***2)*2**2)*2*****2)**1");
}
}
}

View File

@@ -1,13 +1,11 @@
/*==============================================*/
/* Copyright (C) 2011-2019 NextMove Software */
/* All rights reserved. */
/* */
/* This file is part of molhash. */
/* */
/* The contents are covered by the terms of the */
/* BSD license, which is included in the file */
/* license.txt. */
/*==============================================*/
//
// Copyright (C) 2011-2022 NextMove Software and other RDKit contributors
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
#define _CRT_SECURE_NO_WARNINGS
#include <cstring>
@@ -23,6 +21,16 @@
#include "mf.h"
namespace {
void addCXExtensions(RDKit::RWMol *mol, std::string &result,
unsigned additionalSkips = 0) {
auto cxext = RDKit::SmilesWrite::getCXExtensions(
*mol, RDKit::SmilesWrite::CX_ALL ^ RDKit::SmilesWrite::CX_COORDS ^
additionalSkips);
if (!cxext.empty()) {
result += " " + cxext;
}
}
unsigned int NMRDKitBondGetOrder(const RDKit::Bond *bnd) {
PRECONDITION(bnd, "bad bond");
switch (bnd->getBondType()) {
@@ -116,8 +124,10 @@ void NMRDKitSanitizeHydrogens(RDKit::RWMol *mol) {
namespace RDKit {
namespace MolHash {
static unsigned int NMDetermineComponents(RWMol *mol, unsigned int *parts,
unsigned int acount) {
namespace {
unsigned int NMDetermineComponents(RWMol *mol, unsigned int *parts,
unsigned int acount) {
PRECONDITION(mol, "bad molecule");
PRECONDITION(parts, "bad parts pointer");
memset(parts, 0, acount * sizeof(unsigned int));
@@ -148,8 +158,8 @@ static unsigned int NMDetermineComponents(RWMol *mol, unsigned int *parts,
return result;
}
static std::string NMMolecularFormula(RWMol *mol, const unsigned int *parts,
unsigned int part) {
std::string NMMolecularFormula(RWMol *mol, const unsigned int *parts,
unsigned int part) {
PRECONDITION(mol, "bad molecule");
PRECONDITION((!part || parts), "bad parts pointer");
unsigned int hist[256];
@@ -202,7 +212,7 @@ static std::string NMMolecularFormula(RWMol *mol, const unsigned int *parts,
return result;
}
static std::string NMMolecularFormula(RWMol *mol, bool sep = false) {
std::string NMMolecularFormula(RWMol *mol, bool sep = false) {
PRECONDITION(mol, "bad molecule");
if (!sep) {
return NMMolecularFormula(mol, nullptr, 0);
@@ -237,7 +247,7 @@ static std::string NMMolecularFormula(RWMol *mol, bool sep = false) {
return result;
}
static void NormalizeHCount(Atom *aptr) {
void NormalizeHCount(Atom *aptr) {
PRECONDITION(aptr, "bad atom pointer");
unsigned int hcount;
@@ -280,7 +290,7 @@ static void NormalizeHCount(Atom *aptr) {
aptr->setNumExplicitHs(hcount);
}
static std::string AnonymousGraph(RWMol *mol, bool elem) {
std::string AnonymousGraph(RWMol *mol, bool elem, bool useCXSmiles) {
PRECONDITION(mol, "bad molecule");
std::string result;
int charge = 0;
@@ -302,11 +312,23 @@ static std::string AnonymousGraph(RWMol *mol, bool elem) {
bptr->setBondType(Bond::SINGLE);
}
MolOps::assignRadicals(*mol);
// we may have just destroyed some stereocenters/bonds
// clean that up:
bool cleanIt = true;
bool force = true;
MolOps::assignStereochemistry(*mol, cleanIt, force);
result = MolToSmiles(*mol);
if (useCXSmiles) {
addCXExtensions(mol, result, SmilesWrite::CX_RADICALS);
}
return result;
}
static std::string MesomerHash(RWMol *mol, bool netq) {
std::string MesomerHash(RWMol *mol, bool netq, bool useCXSmiles) {
PRECONDITION(mol, "bad molecule");
std::string result;
char buffer[32];
@@ -323,15 +345,25 @@ static std::string MesomerHash(RWMol *mol, bool netq) {
}
MolOps::assignRadicals(*mol);
// we may have just destroyed some stereocenters/bonds
// clean that up:
bool cleanIt = true;
bool force = true;
MolOps::assignStereochemistry(*mol, cleanIt, force);
result = MolToSmiles(*mol);
if (netq) {
sprintf(buffer, "_%d", charge);
result += buffer;
}
if (useCXSmiles) {
addCXExtensions(mol, result, SmilesWrite::CX_RADICALS);
}
return result;
}
static std::string TautomerHash(RWMol *mol, bool proto) {
std::string TautomerHash(RWMol *mol, bool proto, bool useCXSmiles) {
PRECONDITION(mol, "bad molecule");
std::string result;
char buffer[32];
@@ -372,10 +404,14 @@ static std::string TautomerHash(RWMol *mol, bool proto) {
sprintf(buffer, "_%d", hcount - charge);
}
result += buffer;
if (useCXSmiles) {
addCXExtensions(mol, result, SmilesWrite::CX_RADICALS);
}
return result;
}
static bool TraverseForRing(Atom *atom, unsigned char *visit) {
bool TraverseForRing(Atom *atom, unsigned char *visit) {
PRECONDITION(atom, "bad atom pointer");
PRECONDITION(visit, "bad pointer");
visit[atom->getIdx()] = 1;
@@ -395,8 +431,7 @@ static bool TraverseForRing(Atom *atom, unsigned char *visit) {
return false;
}
static bool DepthFirstSearchForRing(Atom *root, Atom *nbor,
unsigned int maxatomidx) {
bool DepthFirstSearchForRing(Atom *root, Atom *nbor, unsigned int maxatomidx) {
PRECONDITION(root, "bad atom pointer");
PRECONDITION(nbor, "bad atom pointer");
@@ -425,7 +460,7 @@ bool IsInScaffold(Atom *atom, unsigned int maxatomidx) {
return count > 1;
}
static bool HasNbrInScaffold(Atom *aptr, unsigned char *is_in_scaffold) {
bool HasNbrInScaffold(Atom *aptr, unsigned char *is_in_scaffold) {
PRECONDITION(aptr, "bad atom pointer");
PRECONDITION(is_in_scaffold, "bad pointer");
for (auto nbri : boost::make_iterator_range(
@@ -438,7 +473,7 @@ static bool HasNbrInScaffold(Atom *aptr, unsigned char *is_in_scaffold) {
return false;
}
static std::string ExtendedMurckoScaffold(RWMol *mol) {
std::string ExtendedMurckoScaffold(RWMol *mol, bool useCXSmiles) {
PRECONDITION(mol, "bad molecule");
RDKit::MolOps::fastFindRings(*mol);
@@ -469,12 +504,22 @@ static std::string ExtendedMurckoScaffold(RWMol *mol) {
}
mol->commitBatchEdit();
MolOps::assignRadicals(*mol);
// we may have just destroyed some stereocenters/bonds
// clean that up:
bool cleanIt = true;
bool force = true;
MolOps::assignStereochemistry(*mol, cleanIt, force);
std::string result;
result = MolToSmiles(*mol);
if (useCXSmiles) {
addCXExtensions(mol, result, SmilesWrite::CX_RADICALS);
}
return result;
}
static std::string MurckoScaffoldHash(RWMol *mol) {
std::string MurckoScaffoldHash(RWMol *mol, bool useCXSmiles) {
PRECONDITION(mol, "bad molecule");
std::vector<Atom *> for_deletion;
do {
@@ -502,12 +547,22 @@ static std::string MurckoScaffoldHash(RWMol *mol) {
mol->commitBatchEdit();
} while (!for_deletion.empty());
MolOps::assignRadicals(*mol);
// we may have just destroyed some stereocenters/bonds
// clean that up:
bool cleanIt = true;
bool force = true;
MolOps::assignStereochemistry(*mol, cleanIt, force);
std::string result;
result = MolToSmiles(*mol);
if (useCXSmiles) {
addCXExtensions(mol, result, SmilesWrite::CX_RADICALS);
}
return result;
}
static std::string NetChargeHash(RWMol *mol) {
std::string NetChargeHash(RWMol *mol) {
PRECONDITION(mol, "bad molecule");
int totalq = 0;
@@ -520,7 +575,7 @@ static std::string NetChargeHash(RWMol *mol) {
return buffer;
}
static std::string SmallWorldHash(RWMol *mol, bool brl) {
std::string SmallWorldHash(RWMol *mol, bool brl) {
PRECONDITION(mol, "bad molecule");
char buffer[64];
@@ -542,7 +597,7 @@ static std::string SmallWorldHash(RWMol *mol, bool brl) {
return buffer;
}
static void DegreeVector(RWMol *mol, unsigned int *v) {
void DegreeVector(RWMol *mol, unsigned int *v) {
memset(v, 0, 4 * sizeof(unsigned int));
for (auto aptr : mol->atoms()) {
switch (aptr->getDegree()) {
@@ -562,7 +617,7 @@ static void DegreeVector(RWMol *mol, unsigned int *v) {
}
}
static bool HasDoubleBond(Atom *atom) {
bool HasDoubleBond(Atom *atom) {
PRECONDITION(atom, "bad atom");
for (const auto &nbri :
boost::make_iterator_range(atom->getOwningMol().getAtomBonds(atom))) {
@@ -581,7 +636,7 @@ static bool HasDoubleBond(Atom *atom) {
// 2 means break, with hydrogen on beg and asterisk on end
// 3 means break, with asterisks on both beg and end
static int RegioisomerBond(Bond *bnd) {
int RegioisomerBond(Bond *bnd) {
PRECONDITION(bnd, "bad bond");
if (NMRDKitBondGetOrder(bnd) != 1) {
return -1;
@@ -619,7 +674,7 @@ static int RegioisomerBond(Bond *bnd) {
return -1;
}
static void ClearEZStereo(Atom *atm) {
void ClearEZStereo(Atom *atm) {
PRECONDITION(atm, "bad atom");
for (const auto &nbri :
boost::make_iterator_range(atm->getOwningMol().getAtomBonds(atm))) {
@@ -630,7 +685,7 @@ static void ClearEZStereo(Atom *atm) {
}
}
static std::string RegioisomerHash(RWMol *mol) {
std::string RegioisomerHash(RWMol *mol, bool useCXSmiles) {
PRECONDITION(mol, "bad molecule");
// we need a copy of the molecule so that we can loop over the bonds of
@@ -671,12 +726,21 @@ static std::string RegioisomerHash(RWMol *mol) {
}
}
std::string result;
result = MolToSmiles(*mol);
// we may have just destroyed some stereocenters/bonds
// clean that up:
bool cleanIt = true;
bool force = true;
MolOps::assignStereochemistry(*mol, cleanIt, force);
std::string result = MolToSmiles(*mol);
if (useCXSmiles) {
addCXExtensions(mol, result);
}
return result;
}
static std::string ArthorSubOrderHash(RWMol *mol) {
std::string ArthorSubOrderHash(RWMol *mol) {
PRECONDITION(mol, "bad molecule");
char buffer[256];
@@ -793,8 +857,9 @@ static std::string ArthorSubOrderHash(RWMol *mol) {
pcount, ccount, ocount, zcount, rcount, qcount, icount);
return buffer;
}
} // namespace
std::string MolHash(RWMol *mol, HashFunction func) {
std::string MolHash(RWMol *mol, HashFunction func, bool useCXSmiles) {
PRECONDITION(mol, "bad molecule");
std::string result;
char buffer[32];
@@ -803,31 +868,34 @@ std::string MolHash(RWMol *mol, HashFunction func) {
switch (func) {
default:
case HashFunction::AnonymousGraph:
result = AnonymousGraph(mol, false);
result = AnonymousGraph(mol, false, useCXSmiles);
break;
case HashFunction::ElementGraph:
result = AnonymousGraph(mol, true);
result = AnonymousGraph(mol, true, useCXSmiles);
break;
case HashFunction::CanonicalSmiles:
result = MolToSmiles(*mol);
if (useCXSmiles) {
addCXExtensions(mol, result);
}
break;
case HashFunction::MurckoScaffold:
result = MurckoScaffoldHash(mol);
result = MurckoScaffoldHash(mol, useCXSmiles);
break;
case HashFunction::ExtendedMurcko:
result = ExtendedMurckoScaffold(mol);
result = ExtendedMurckoScaffold(mol, useCXSmiles);
break;
case HashFunction::Mesomer:
result = MesomerHash(mol, true);
result = MesomerHash(mol, true, useCXSmiles);
break;
case HashFunction::RedoxPair:
result = MesomerHash(mol, false);
result = MesomerHash(mol, false, useCXSmiles);
break;
case HashFunction::HetAtomTautomer:
result = TautomerHash(mol, false);
result = TautomerHash(mol, false, useCXSmiles);
break;
case HashFunction::HetAtomProtomer:
result = TautomerHash(mol, true);
result = TautomerHash(mol, true, useCXSmiles);
break;
case HashFunction::MolFormula:
result = NMMolecularFormula(mol);
@@ -855,7 +923,7 @@ std::string MolHash(RWMol *mol, HashFunction func) {
result = ArthorSubOrderHash(mol);
break;
case HashFunction::Regioisomer:
result = RegioisomerHash(mol);
result = RegioisomerHash(mol, useCXSmiles);
break;
}
return result;

View File

@@ -1,13 +1,11 @@
/*==============================================*/
/* Copyright (C) 2016-2019 NextMove Software */
/* All rights reserved. */
/* */
/* This file is part of molhash. */
/* */
/* The contents are covered by the terms of the */
/* BSD license, which is included in the file */
/* license.txt. */
/*==============================================*/
//
// Copyright (C) 2016-2022 NextMove Software and other RDKit contributors
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
#ifndef NMS_MOLFORMULA_H
#define NMS_MOLFORMULA_H

View File

@@ -42,7 +42,8 @@ enum class HashFunction {
ArthorSubstructureOrder = 17
};
RDKIT_MOLHASH_EXPORT std::string MolHash(RWMol *mol, HashFunction func);
RDKIT_MOLHASH_EXPORT std::string MolHash(RWMol *mol, HashFunction func,
bool useCXSmiles = false);
enum class StripType {
AtomStereo = 1,

View File

@@ -1,13 +1,11 @@
/*==============================================*/
/* Copyright (C) 2019 NextMove Software */
/* All rights reserved. */
/* */
/* This file is part of molhash. */
/* */
/* The contents are covered by the terms of the */
/* BSD license, which is included in the file */
/* license.txt. */
/*==============================================*/
//
// Copyright (C) 2019-2022 NextMove Software and other RDKit contributors
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
#include <GraphMol/RDKitBase.h>
#include <GraphMol/RDKitQueries.h>
@@ -25,6 +23,10 @@ void Strip(RWMol *mol, unsigned int striptype) {
for (auto aptr : mol->atoms()) {
aptr->setChiralTag(RDKit::Atom::CHI_UNSPECIFIED);
}
if (!mol->getStereoGroups().empty()) {
std::vector<StereoGroup> no_sgs;
mol->setStereoGroups(std::move(no_sgs));
}
}
if (striptype & static_cast<unsigned>(StripType::BondStereo)) {
for (auto bptr : mol->bonds()) {
@@ -49,11 +51,9 @@ void Strip(RWMol *mol, unsigned int striptype) {
}
void SplitMolecule(RWMol *mol, std::vector<RWMol *> &molv) {
RDKit::MOL_SPTR_VECT mfrags = RDKit::MolOps::getMolFrags(*mol);
RDKit::MOL_SPTR_VECT::iterator vit;
for (vit = mfrags.begin(); vit != mfrags.end(); ++vit) {
RDKit::ROMol *wrappedmol =
(*vit).get(); // reach inside the shared pointer...
auto mfrags = RDKit::MolOps::getMolFrags(*mol);
for (const auto &frag : mfrags) {
const auto *wrappedmol = frag.get(); // reach inside the shared pointer...
molv.push_back(new RWMol(*wrappedmol)); // ...and make a copy
}
}

View File

@@ -1739,22 +1739,6 @@ TEST_CASE("StereoGroup Testing") {
}
}
TEST_CASE("replaceAtom and StereoGroups") {
SECTION("basics") {
auto mol = "C[C@](O)(Cl)[C@H](F)Cl |o1:1,4|"_smiles;
REQUIRE(mol);
CHECK(mol->getStereoGroups().size() == 1);
CHECK(mol->getStereoGroups()[0].getAtoms().size() == 2);
CHECK(mol->getStereoGroups()[0].getAtoms()[0] == mol->getAtomWithIdx(1));
Atom acp(*mol->getAtomWithIdx(1));
mol->replaceAtom(1, &acp);
CHECK(mol->getStereoGroups().size() == 1);
CHECK(mol->getStereoGroups()[0].getAtoms().size() == 2);
CHECK(mol->getStereoGroups()[0].getAtoms()[0] == mol->getAtomWithIdx(1));
}
}
TEST_CASE("Removing stereogroups from unspecified atoms") {
SECTION("basics") {
auto mol = "C[C@](O)(Cl)F |o1:1|"_smiles;
@@ -1777,3 +1761,19 @@ TEST_CASE("Removing stereogroups from unspecified atoms") {
CHECK(mol->getStereoGroups()[0].getAtoms()[0]->getIdx() == 4);
}
}
TEST_CASE("replaceAtom and StereoGroups") {
SECTION("basics") {
auto mol = "C[C@](O)(Cl)[C@H](F)Cl |o1:1,4|"_smiles;
REQUIRE(mol);
CHECK(mol->getStereoGroups().size() == 1);
CHECK(mol->getStereoGroups()[0].getAtoms().size() == 2);
CHECK(mol->getStereoGroups()[0].getAtoms()[0] == mol->getAtomWithIdx(1));
Atom acp(*mol->getAtomWithIdx(1));
mol->replaceAtom(1, &acp);
CHECK(mol->getStereoGroups().size() == 1);
CHECK(mol->getStereoGroups()[0].getAtoms().size() == 2);
CHECK(mol->getStereoGroups()[0].getAtoms()[0] == mol->getAtomWithIdx(1));
}
}

View File

@@ -4,6 +4,10 @@
## Backwards incompatible changes
- When running in Jupyter Notebook, logs are now sent only to Python's
standard error stream, and no longer include the `RDKit LEVEL` prefix.
- The MolHash functions now reassign stereochemistry after modifying the
molecule and before calculating the hash. Previous versions would still
include information about atom/bond stereochemistry in the output hash even if
that no longer applies in the modified molecule.
## Code removed in this release:
- The `useCountSimulation` keyword argument for