From 9f103a9913742e65178f28f14f3c585f8e2e3363 Mon Sep 17 00:00:00 2001 From: Greg Landrum Date: Wed, 3 Apr 2019 04:48:05 +0200 Subject: [PATCH] Allow components of the MolStandardize code to be initialized from streams (#2385) * Fixes #2383 (tests coming in the next commit) Minor typo fix Fixes a "bug" in one of the default transforms * Adds support for directly providing normalization parameter data instead of requiring the use of a text file. * allow fragment removers to be initialized with string data * remove unicode * allow the reionizer to be initialized from a stream --- Code/GraphMol/MolOps.h | 2 +- .../AcidBaseCatalog/AcidBaseCatalogParams.cpp | 5 +++ .../AcidBaseCatalog/AcidBaseCatalogParams.h | 1 + Code/GraphMol/MolStandardize/Charge.cpp | 7 +++ Code/GraphMol/MolStandardize/Charge.h | 4 ++ Code/GraphMol/MolStandardize/Fragment.cpp | 12 +++++ Code/GraphMol/MolStandardize/Fragment.h | 2 + .../FragmentCatalog/FragmentCatalogParams.cpp | 5 +++ .../FragmentCatalog/FragmentCatalogParams.h | 1 + .../MolStandardize/MolStandardize.cpp | 2 +- Code/GraphMol/MolStandardize/Normalize.cpp | 21 +++++++-- Code/GraphMol/MolStandardize/Normalize.h | 3 ++ .../TransformCatalogParams.cpp | 5 +++ .../TransformCatalog/TransformCatalogParams.h | 1 + Code/GraphMol/MolStandardize/Wrap/Charge.cpp | 21 +++++++++ .../GraphMol/MolStandardize/Wrap/Fragment.cpp | 18 +++++++- .../MolStandardize/Wrap/Normalize.cpp | 10 +++++ .../MolStandardize/Wrap/testMolStandardize.py | 44 +++++++++++++++++++ .../GraphMol/MolStandardize/testNormalize.cpp | 20 +++++++++ Data/MolStandardize/acid_base_pairs.txt | 2 +- Data/MolStandardize/normalizations.txt | 2 +- 21 files changed, 178 insertions(+), 10 deletions(-) diff --git a/Code/GraphMol/MolOps.h b/Code/GraphMol/MolOps.h index 08ec7f50f..e28659802 100644 --- a/Code/GraphMol/MolOps.h +++ b/Code/GraphMol/MolOps.h @@ -216,7 +216,7 @@ RDKIT_GRAPHMOL_EXPORT void addHs(RWMol &mol, bool explicitOnly = false, - Hs that are part of the definition of double bond Stereochemistry will not be removed - Hs that are not connected to anything else will not be removed - - Hs that have a query defined (i.e. hasQuery() returns true) will not + - Hs that have a query defined (i.e. hasQuery() returns true) will not be removed - the caller is responsible for deleteing the pointer this diff --git a/Code/GraphMol/MolStandardize/AcidBaseCatalog/AcidBaseCatalogParams.cpp b/Code/GraphMol/MolStandardize/AcidBaseCatalog/AcidBaseCatalogParams.cpp index 40d2835a2..00cae7a08 100644 --- a/Code/GraphMol/MolStandardize/AcidBaseCatalog/AcidBaseCatalogParams.cpp +++ b/Code/GraphMol/MolStandardize/AcidBaseCatalog/AcidBaseCatalogParams.cpp @@ -20,6 +20,11 @@ AcidBaseCatalogParams::AcidBaseCatalogParams(const std::string &acidBaseFile) { d_pairs = readPairs(acidBaseFile); } +AcidBaseCatalogParams::AcidBaseCatalogParams(std::istream &acidBaseFile) { + d_pairs.clear(); + d_pairs = readPairs(acidBaseFile); +} + AcidBaseCatalogParams::AcidBaseCatalogParams( const AcidBaseCatalogParams &other) { d_typeStr = other.d_typeStr; diff --git a/Code/GraphMol/MolStandardize/AcidBaseCatalog/AcidBaseCatalogParams.h b/Code/GraphMol/MolStandardize/AcidBaseCatalog/AcidBaseCatalogParams.h index 7db1eab58..1d30813e0 100644 --- a/Code/GraphMol/MolStandardize/AcidBaseCatalog/AcidBaseCatalogParams.h +++ b/Code/GraphMol/MolStandardize/AcidBaseCatalog/AcidBaseCatalogParams.h @@ -31,6 +31,7 @@ class RDKIT_MOLSTANDARDIZE_EXPORT AcidBaseCatalogParams } AcidBaseCatalogParams(const std::string &acidBaseFile); + AcidBaseCatalogParams(std::istream &acidBaseFile); // copy constructor AcidBaseCatalogParams(const AcidBaseCatalogParams &other); diff --git a/Code/GraphMol/MolStandardize/Charge.cpp b/Code/GraphMol/MolStandardize/Charge.cpp index 9e4a8446e..5e0c703a2 100644 --- a/Code/GraphMol/MolStandardize/Charge.cpp +++ b/Code/GraphMol/MolStandardize/Charge.cpp @@ -45,6 +45,13 @@ Reionizer::Reionizer(const std::string acidbaseFile, this->d_ccs = ccs; } +Reionizer::Reionizer(std::istream &acidbaseStream, + const std::vector ccs) { + AcidBaseCatalogParams abparams(acidbaseStream); + this->d_abcat = new AcidBaseCatalog(&abparams); + this->d_ccs = ccs; +} + Reionizer::~Reionizer() { delete d_abcat; } // Reionizer::Reionizer(const AcidBaseCatalog *abcat, const diff --git a/Code/GraphMol/MolStandardize/Charge.h b/Code/GraphMol/MolStandardize/Charge.h index b3b25ae8b..2436bcc79 100644 --- a/Code/GraphMol/MolStandardize/Charge.h +++ b/Code/GraphMol/MolStandardize/Charge.h @@ -64,6 +64,10 @@ class RDKIT_MOLSTANDARDIZE_EXPORT Reionizer { // corrections Reionizer(const std::string acidbaseFile, const std::vector ccs); + //! construct a Reionizer with a particular acidbaseFile and charge + // corrections + Reionizer(std::istream &acidbaseStream, + const std::vector ccs); //! making Reionizer objects non-copyable Reionizer(const Reionizer &other) = delete; Reionizer &operator=(Reionizer const &) = delete; diff --git a/Code/GraphMol/MolStandardize/Fragment.cpp b/Code/GraphMol/MolStandardize/Fragment.cpp index d2f37693e..2e2261a2d 100644 --- a/Code/GraphMol/MolStandardize/Fragment.cpp +++ b/Code/GraphMol/MolStandardize/Fragment.cpp @@ -47,6 +47,18 @@ FragmentRemover::FragmentRemover(const std::string fragmentFile, this->SKIP_IF_ALL_MATCH = skip_if_all_match; } +// overloaded constructor +FragmentRemover::FragmentRemover(std::istream &fragmentStream, bool leave_last, + bool skip_if_all_match) { + FragmentCatalogParams fparams(fragmentStream); + this->d_fcat = new FragmentCatalog(&fparams); + if (!this->d_fcat) { + throw ValueErrorException("could not constract fragment catalog"); + } + this->LEAVE_LAST = leave_last; + this->SKIP_IF_ALL_MATCH = skip_if_all_match; +} + // Destructor FragmentRemover::~FragmentRemover() { delete d_fcat; }; diff --git a/Code/GraphMol/MolStandardize/Fragment.h b/Code/GraphMol/MolStandardize/Fragment.h index e52bda33e..f56a85dc1 100644 --- a/Code/GraphMol/MolStandardize/Fragment.h +++ b/Code/GraphMol/MolStandardize/Fragment.h @@ -33,6 +33,8 @@ class RDKIT_MOLSTANDARDIZE_EXPORT FragmentRemover { FragmentRemover(); FragmentRemover(const std::string fragmentFile, bool leave_last, bool skip_if_all_match = false); + FragmentRemover(std::istream &fragmentStream, bool leave_last, + bool skip_if_all_match = false); ~FragmentRemover(); //! making FragmentRemover objects non-copyable diff --git a/Code/GraphMol/MolStandardize/FragmentCatalog/FragmentCatalogParams.cpp b/Code/GraphMol/MolStandardize/FragmentCatalog/FragmentCatalogParams.cpp index 823c0b55d..b9bf668f8 100644 --- a/Code/GraphMol/MolStandardize/FragmentCatalog/FragmentCatalogParams.cpp +++ b/Code/GraphMol/MolStandardize/FragmentCatalog/FragmentCatalogParams.cpp @@ -20,6 +20,11 @@ FragmentCatalogParams::FragmentCatalogParams(const std::string &fgroupFile) { d_funcGroups = readFuncGroups(fgroupFile); } +FragmentCatalogParams::FragmentCatalogParams(std::istream &fgroupStream) { + d_funcGroups.clear(); + d_funcGroups = readFuncGroups(fgroupStream); +} + FragmentCatalogParams::FragmentCatalogParams( const FragmentCatalogParams &other) { d_typeStr = other.d_typeStr; diff --git a/Code/GraphMol/MolStandardize/FragmentCatalog/FragmentCatalogParams.h b/Code/GraphMol/MolStandardize/FragmentCatalog/FragmentCatalogParams.h index 13b390f6a..3cedc9c70 100644 --- a/Code/GraphMol/MolStandardize/FragmentCatalog/FragmentCatalogParams.h +++ b/Code/GraphMol/MolStandardize/FragmentCatalog/FragmentCatalogParams.h @@ -31,6 +31,7 @@ class RDKIT_MOLSTANDARDIZE_EXPORT FragmentCatalogParams } FragmentCatalogParams(const std::string &fgroupFile); + FragmentCatalogParams(std::istream &fgroupStream); // copy constructor FragmentCatalogParams(const FragmentCatalogParams &other); diff --git a/Code/GraphMol/MolStandardize/MolStandardize.cpp b/Code/GraphMol/MolStandardize/MolStandardize.cpp index 2059ea47a..04253d90a 100644 --- a/Code/GraphMol/MolStandardize/MolStandardize.cpp +++ b/Code/GraphMol/MolStandardize/MolStandardize.cpp @@ -104,7 +104,7 @@ RWMol *chargeParent(const RWMol &mol, const CleanupParameters ¶ms, void superParent(RWMol &mol, const CleanupParameters ¶ms) { RDUNUSED_PARAM(mol); RDUNUSED_PARAM(params); - UNDER_CONSTRUCTION("Not yet implmented"); + UNDER_CONSTRUCTION("Not yet implemented"); } RWMol *normalize(const RWMol *mol, const CleanupParameters ¶ms) { diff --git a/Code/GraphMol/MolStandardize/Normalize.cpp b/Code/GraphMol/MolStandardize/Normalize.cpp index 3e1e88817..dbb00dad7 100644 --- a/Code/GraphMol/MolStandardize/Normalize.cpp +++ b/Code/GraphMol/MolStandardize/Normalize.cpp @@ -46,6 +46,15 @@ Normalizer::Normalizer(const std::string normalizeFile, this->MAX_RESTARTS = maxRestarts; } +// overloaded constructor +Normalizer::Normalizer(std::istream &normalizeStream, + const unsigned int maxRestarts) { + BOOST_LOG(rdInfoLog) << "Initializing Normalizer\n"; + TransformCatalogParams tparams(normalizeStream); + this->d_tcat = new TransformCatalog(&tparams); + this->MAX_RESTARTS = maxRestarts; +} + // destructor Normalizer::~Normalizer() { delete d_tcat; } @@ -57,10 +66,12 @@ ROMol *Normalizer::normalize(const ROMol &mol) { PRECONDITION(tparams, ""); const std::vector> &transforms = tparams->getTransformations(); - - std::vector> frags = MolOps::getMolFrags(mol); + bool sanitizeFrags = false; + std::vector> frags = + MolOps::getMolFrags(mol, sanitizeFrags); std::vector nfrags; //( frags.size() ); for (const auto &frag : frags) { + frag->updatePropertyCache(false); ROMOL_SPTR nfrag(this->normalizeFragment(*frag, transforms)); nfrags.push_back(nfrag); } @@ -134,8 +145,10 @@ boost::shared_ptr Normalizer::applyTransform( // std::endl; unsigned int failed; try { - MolOps::sanitizeMol(*static_cast(pdt[0].get()), failed); - Normalizer::Product np(MolToSmiles(*pdt[0]), pdt[0]); + RWMol tmol(*static_cast(pdt[0].get())); + MolOps::sanitizeMol(tmol, failed); + pdt[0]->updatePropertyCache(false); + Normalizer::Product np(MolToSmiles(tmol), pdt[0]); pdts.push_back(np); } catch (MolSanitizeException &) { BOOST_LOG(rdInfoLog) << "FAILED sanitizeMol.\n"; diff --git a/Code/GraphMol/MolStandardize/Normalize.h b/Code/GraphMol/MolStandardize/Normalize.h index 9c1e95067..bedbbec38 100644 --- a/Code/GraphMol/MolStandardize/Normalize.h +++ b/Code/GraphMol/MolStandardize/Normalize.h @@ -48,6 +48,9 @@ class RDKIT_MOLSTANDARDIZE_EXPORT Normalizer { Normalizer(); //! Construct a Normalizer with a particular normalizeFile and maxRestarts Normalizer(const std::string normalizeFile, const unsigned int maxRestarts); + //! Construct a Normalizer with a particular stream (with parameters) and + //! maxRestarts + Normalizer(std::istream &normalizeStream, const unsigned int maxRestarts); //! making Normalizer objects non-copyable Normalizer(const Normalizer &other) = delete; Normalizer &operator=(Normalizer const &) = delete; diff --git a/Code/GraphMol/MolStandardize/TransformCatalog/TransformCatalogParams.cpp b/Code/GraphMol/MolStandardize/TransformCatalog/TransformCatalogParams.cpp index 9e9bbb60e..3d3deafca 100644 --- a/Code/GraphMol/MolStandardize/TransformCatalog/TransformCatalogParams.cpp +++ b/Code/GraphMol/MolStandardize/TransformCatalog/TransformCatalogParams.cpp @@ -21,6 +21,11 @@ TransformCatalogParams::TransformCatalogParams( d_transformations = readTransformations(transformFile); } +TransformCatalogParams::TransformCatalogParams(std::istream &transformStream) { + d_transformations.clear(); + d_transformations = readTransformations(transformStream); +} + TransformCatalogParams::TransformCatalogParams( const TransformCatalogParams &other) { d_typeStr = other.d_typeStr; diff --git a/Code/GraphMol/MolStandardize/TransformCatalog/TransformCatalogParams.h b/Code/GraphMol/MolStandardize/TransformCatalog/TransformCatalogParams.h index 24e50fc81..14c8bcb86 100644 --- a/Code/GraphMol/MolStandardize/TransformCatalog/TransformCatalogParams.h +++ b/Code/GraphMol/MolStandardize/TransformCatalog/TransformCatalogParams.h @@ -32,6 +32,7 @@ class RDKIT_MOLSTANDARDIZE_EXPORT TransformCatalogParams } TransformCatalogParams(const std::string &transformFile); + TransformCatalogParams(std::istream &transformStream); // copy constructor TransformCatalogParams(const TransformCatalogParams &other); diff --git a/Code/GraphMol/MolStandardize/Wrap/Charge.cpp b/Code/GraphMol/MolStandardize/Wrap/Charge.cpp index 19e829377..b47786c73 100644 --- a/Code/GraphMol/MolStandardize/Wrap/Charge.cpp +++ b/Code/GraphMol/MolStandardize/Wrap/Charge.cpp @@ -25,6 +25,21 @@ ROMol *reionizeHelper(MolStandardize::Reionizer &self, const ROMol &mol) { return self.reionize(mol); } +MolStandardize::Reionizer *reionizerFromData(const std::string &data, + python::object chargeCorrections) { + std::istringstream sstr(data); + auto corrections = + pythonObjectToVect(chargeCorrections); + MolStandardize::Reionizer *res; + if (corrections) { + res = new MolStandardize::Reionizer(sstr, *corrections); + } else { + res = new MolStandardize::Reionizer( + sstr, std::vector()); + } + return res; +} + } // namespace struct charge_wrapper { @@ -51,6 +66,12 @@ struct charge_wrapper { (python::arg("self"), python::arg("mol")), "", python::return_value_policy()); + python::def("ReionizerFromData", &reionizerFromData, + (python::arg("paramData"), + python::arg("chargeCorrections") = python::list()), + "creates a reionizer from a string containing parameter data " + "and a list of charge corrections", + python::return_value_policy()); python::class_( "Uncharger", python::init((python::arg("self"), python::arg("canonicalOrder") = true))) diff --git a/Code/GraphMol/MolStandardize/Wrap/Fragment.cpp b/Code/GraphMol/MolStandardize/Wrap/Fragment.cpp index b0f47f652..1f2e80092 100644 --- a/Code/GraphMol/MolStandardize/Wrap/Fragment.cpp +++ b/Code/GraphMol/MolStandardize/Wrap/Fragment.cpp @@ -25,6 +25,13 @@ ROMol *chooseHelper(MolStandardize::LargestFragmentChooser &self, const ROMol &mol) { return self.choose(mol); } +MolStandardize::FragmentRemover *removerFromParams(const std::string &data, + bool leave_last, + bool skip_if_all_match) { + std::istringstream sstr(data); + return new MolStandardize::FragmentRemover(sstr, leave_last, + skip_if_all_match); +} } // namespace @@ -40,11 +47,18 @@ struct fragment_wrapper { "FragmentRemover", python::init<>()) .def(python::init( (python::arg("fragmentFilename") = "", - python::arg("leave_last") = true, - python::arg("skip_if_all_match") = false))) + python::arg("leave_last") = true, + python::arg("skip_if_all_match") = false))) .def("remove", &removeHelper, (python::arg("self"), python::arg("mol")), "", python::return_value_policy()); + python::def( + "FragmentRemoverFromData", &removerFromParams, + (python::arg("fragmentData"), python::arg("leave_last") = true, + python::arg("skip_if_all_match") = false), + "creates a FragmentRemover from a string containing parameter data", + python::return_value_policy()); + python::class_( "LargestFragmentChooser", python::init((python::arg("preferOrganic") = false))) diff --git a/Code/GraphMol/MolStandardize/Wrap/Normalize.cpp b/Code/GraphMol/MolStandardize/Wrap/Normalize.cpp index 9865d2347..b78c0cfcd 100644 --- a/Code/GraphMol/MolStandardize/Wrap/Normalize.cpp +++ b/Code/GraphMol/MolStandardize/Wrap/Normalize.cpp @@ -11,6 +11,7 @@ #include #include +#include namespace python = boost::python; using namespace RDKit; @@ -21,6 +22,11 @@ ROMol *normalizeHelper(MolStandardize::Normalizer &self, const ROMol &mol) { return self.normalize(mol); } +MolStandardize::Normalizer *normalizerFromParams( + const std::string &data, const MolStandardize::CleanupParameters ¶ms) { + std::istringstream sstr(data); + return new MolStandardize::Normalizer(sstr, params.maxRestarts); +} } // namespace struct normalize_wrapper { @@ -37,6 +43,10 @@ struct normalize_wrapper { .def("normalize", &normalizeHelper, (python::arg("self"), python::arg("mol")), "", python::return_value_policy()); + python::def("NormalizerFromData", &normalizerFromParams, + (python::arg("paramData")), + "creates a normalizer from a string containing parameter data", + python::return_value_policy()); } }; diff --git a/Code/GraphMol/MolStandardize/Wrap/testMolStandardize.py b/Code/GraphMol/MolStandardize/Wrap/testMolStandardize.py index ba793e6d9..9469b007e 100644 --- a/Code/GraphMol/MolStandardize/Wrap/testMolStandardize.py +++ b/Code/GraphMol/MolStandardize/Wrap/testMolStandardize.py @@ -173,6 +173,50 @@ class TestCase(unittest.TestCase): self.assertEqual ("""INFO: [FragmentValidation] 1,2-dichloroethane is present""", msg6[0]) + def test10NormalizeParams(self): + data = """// Name SMIRKS +Nitro to N+(O-)=O [N,P,As,Sb;X3:1](=[O,S,Se,Te:2])=[O,S,Se,Te:3]>>[*+1:1]([*-1:2])=[*:3] +Sulfone to S(=O)(=O) [S+2:1]([O-:2])([O-:3])>>[S+0:1](=[O-0:2])(=[O-0:3]) +Pyridine oxide to n+O- [n:1]=[O:2]>>[n+:1][O-:2] +// Azide to N=N+=N- [*,H:1][N:2]=[N:3]#[N:4]>>[*,H:1][N:2]=[N+:3]=[N-:4] +""" + normalizer1 = rdMolStandardize.Normalizer() + params = rdMolStandardize.CleanupParameters() + normalizer2 = rdMolStandardize.NormalizerFromData(data, params) + + imol = Chem.MolFromSmiles("O=N(=O)CCN=N#N", sanitize=False) + mol1 = normalizer1.normalize(imol) + mol2 = normalizer2.normalize(imol) + self.assertEqual(Chem.MolToSmiles(imol), "N#N=NCCN(=O)=O") + self.assertEqual(Chem.MolToSmiles(mol1), "[N-]=[N+]=NCC[N+](=O)[O-]") + self.assertEqual(Chem.MolToSmiles(mol2), "N#N=NCC[N+](=O)[O-]") + + def test11FragmentParams(self): + data = """// Name SMARTS +fluorine [F] +chlorine [Cl] + """ + fragremover = rdMolStandardize.FragmentRemoverFromData(data) + mol = Chem.MolFromSmiles("CN(C)C.Cl.Cl.Br") + nm = fragremover.remove(mol) + self.assertEqual(Chem.MolToSmiles(nm), "Br.CN(C)C") + + def test12ChargeParams(self): + params = """// The default list of AcidBasePairs, sorted from strongest to weakest. +// This list is derived from the Food and Drug: Administration Substance +// Registration System Standard Operating Procedure guide. +// +// Name Acid Base +-SO2H [!O][SD3](=O)[OH] [!O][SD3](=O)[O-] +-SO3H [!O]S(=O)(=O)[OH] [!O]S(=O)(=O)[O-] +""" + mol = Chem.MolFromSmiles("C1=C(C=CC(=C1)[S]([O-])=O)[S](O)(=O)=O") + # instantiate with default acid base pair library + reionizer = rdMolStandardize.ReionizerFromData(params, []) + print("done") + nm = reionizer.reionize(mol) + self.assertEqual(Chem.MolToSmiles(nm), "O=S([O-])c1ccc(S(=O)(=O)O)cc1") + if __name__ == "__main__": unittest.main() diff --git a/Code/GraphMol/MolStandardize/testNormalize.cpp b/Code/GraphMol/MolStandardize/testNormalize.cpp index 3c4078747..c95732dab 100644 --- a/Code/GraphMol/MolStandardize/testNormalize.cpp +++ b/Code/GraphMol/MolStandardize/testNormalize.cpp @@ -85,7 +85,27 @@ void test1() { BOOST_LOG(rdInfoLog) << "Finished" << std::endl; } +void test2() { + BOOST_LOG(rdInfoLog) << "-----------------------\n test2" << std::endl; + std::string tfdata = R"DATA(// Name SMIRKS +Nitro to N+(O-)=O [N,P,As,Sb;X3:1](=[O,S,Se,Te:2])=[O,S,Se,Te:3]>>[*+1:1]([*-1:2])=[*:3] +Sulfone to S(=O)(=O) [S+2:1]([O-:2])([O-:3])>>[S+0:1](=[O-0:2])(=[O-0:3]) +Pyridine oxide to n+O- [n:1]=[O:2]>>[n+:1][O-:2] +)DATA"; + std::stringstream sstr(tfdata); + Normalizer nn(sstr, 10); + bool debugParse = false; + bool sanitize = false; + std::unique_ptr imol( + SmilesToMol("O=N(=O)CCN=N#N", debugParse, sanitize)); + std::unique_ptr m2(nn.normalize(*imol)); + TEST_ASSERT(MolToSmiles(*m2) == "N#N=NCC[N+](=O)[O-]"); + BOOST_LOG(rdInfoLog) << "Finished" << std::endl; +} + int main() { + RDLog::InitLogs(); test1(); + test2(); return 0; } diff --git a/Data/MolStandardize/acid_base_pairs.txt b/Data/MolStandardize/acid_base_pairs.txt index 1f1e7f74f..2d1dc1445 100644 --- a/Data/MolStandardize/acid_base_pairs.txt +++ b/Data/MolStandardize/acid_base_pairs.txt @@ -4,7 +4,7 @@ // // Name Acid Base -OSO3H OS(=O)(=O)[OH] OS(=O)(=O)[O-] -–SO3H [!O]S(=O)(=O)[OH] [!O]S(=O)(=O)[O-] +-SO3H [!O]S(=O)(=O)[OH] [!O]S(=O)(=O)[O-] -OSO2H O[SD3](=O)[OH] O[SD3](=O)[O-] -SO2H [!O][SD3](=O)[OH] [!O][SD3](=O)[O-] -OPO3H2 OP(=O)([OH])[OH] OP(=O)([OH])[O-] diff --git a/Data/MolStandardize/normalizations.txt b/Data/MolStandardize/normalizations.txt index ba7781f88..b9112aa64 100644 --- a/Data/MolStandardize/normalizations.txt +++ b/Data/MolStandardize/normalizations.txt @@ -3,7 +3,7 @@ Nitro to N+(O-)=O [N,P,As,Sb;X3:1](=[O,S,Se,Te:2])=[O,S,Se,Te:3]>>[*+1:1]([*-1:2])=[*:3] Sulfone to S(=O)(=O) [S+2:1]([O-:2])([O-:3])>>[S+0:1](=[O-0:2])(=[O-0:3]) Pyridine oxide to n+O- [n:1]=[O:2]>>[n+:1][O-:2] -Azide to N=N+=N- [*,H:1][N:2]=[N:3]#[N:4]>>[*,H:1][N:2]=[N+:3]=[N-:4] +Azide to N=N+=N- [*:1][N:2]=[N:3]#[N:4]>>[*:1][N:2]=[N+:3]=[N-:4] Diazo/azo to =N+=N- [*:1]=[N:2]#[N:3]>>[*:1]=[N+:2]=[N-:3] Sulfoxide to -S+(O-)- [!O:1][S+0;X3:2](=[O:3])[!O:4]>>[*:1][S+1:2]([O-:3])[*:4] // Equivalent to #1.5 in InChI technical manual