mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-06 22:39:55 +08:00
* short test file for MolVS standardize_sm * short test file for MolVS fragment * short test file for MolVS metals * short test file for MolVS normalize * short test file for MolVS reionize * short test file for MolVS tautomer * short test file for MolVS validate * long test file for MolVS standardize smiles * long test file for MolVS fragment * long test file for MolVS metals * long test file for MolVS normalize * long test file for MolVS reionize * long test file for MolVS tautomer * long test file for MolVS validate * Unit tests for MolVS steps * dropping support for Python2 * molvs/__init__.py * molvs/charge.py * molvs/errors.py * molvs/fragment.py * molvs/metal.py * molvs/normalize.py * molvs/resonance.py * molvs/standardize.py * molvs/tautomer.py * molvs/utils.py * molvs/validate.py * molvs/validations.py * molvs/cli.py * adapted and renamed molvs/cli.py to work within $RDBASE/Contrib/MolVS/ * setup MolStandardize directories, source with empty cleanup function, header, CMake files * corrections to empty source, header and test1.cpp * adding empty functions and initializers to MolStandardize * empty Metal source, header and added test * added most of Metal.cpp functionality and made some more tests * empty functions and initializers to Normalize * empty functions and initializers to Validate * added most code for RDKitDefault mode, along with some tests * restructure for abstract base class ValidateMethod * written in isNoneValidation for MolVSValidation * took out isNoneValidation, put in noAtomValidation, neutralValidation, isotopeValidation for MolVSValidation * added in AllowedAtoms * added in disallowedAtoms * corrections to Validate * added code for FragmentRemover * extended fragment functionality to include choose largest fragment, added in tests for fragment catalog, fragment remover. Also added fragmentValidation method in MolStandardize * added another test to testValidate test_fragment * corrections to fragment * corrections to Metal * added code for Normalize * added normalize member function to MolStandardize and added tests * added multi fragment functionality to Normalize.cpp and additional tests * TransformCatalog * tests for Normalize.cpp * first bit of cleanup * added most of Charge functionality and some tests * some corrections to Charge.cpp and some more tests to testCharge.cpp * corrections to Charge.cpp * start of Tautomer Enumerate with some tests * added BondType option to Tautomer Enumeration * correcting for some memory leakage * a few alterations to formatting * sorting out some memory leaks * sorting out some memory leaks * some corrections for PCS test set * redo tests with updated RDKit * fixing memory leak * more fixes after 100kPCS set testing * using tab as delimiter in CSVs rather than comma * tutorial for MolStandardize * still working on Tautomer enumeration * deleted some empty tests * starting writing tautomer canonicalize * rename test_data -> data (the source still needs to be updated) * automatic source reformatting * adjust to directory rename * move the fragment catalog test into the MolStandardize directory do not create separate library for FragmentCatalog * stop building separate libraries for the catalogs * move the CleanupParameters into the MolStandardize namespace * first pass at python wrapper * move the py module to the correct dir; add some python tests; add standardizeSmiles to python wrapper * disabling the compareMolVSTest since that requires command line arguments to run * get this building on windows * put the python lib in the right place * further work on python wrapper for rdMolStandardize * added get and set functions to Metal and wrapped them * added get and set functions to Metal and wrapped them * changed construstor of Reionizer class and input args for reionize, wrapped this default * overload Reionizer constructor so user can input own AcidBaseFile from python * added Uncharger class to Charge and added test for Uncharger * wrapped Fragment, fixed some memory leakage, changed some args and return types, added some tests * wrapped Normalized and changed how Normalizer class is initiated * changing MolVSValidation structure so user can choose which MolVS submethod they want * starting to write Wrap for Validate * now it compiles with Wrap/Validate.cpp * a couple refactorings around validate * move the validate code into the rdMolStandardize module * make sure a valid pointer is returned for standardizeSmiles * rdMolStandardize.MolVSValidation done and tests added * half way through AllowedAtomsValidation * finished AllowedAtomsValidation and DisallowedAtomsValidation * moved charge, fragment, metal, normalize into the rdMolStandardize module * changed tutorial to use wrapped code * added copyrights * added copyrights * move the data files * modify source files to adjust to the move * added validateSmiles functionality * removed std::cout * redid some of the 100k PCS tests * working on the tutorial * adding some documentation * deleting some comment lines * some changes after pull review * More changes after pull review * start of trying to make java wrap * remove some warnings, add some questions * additional warning removals, a bit more reporting * some test cleanups * enable testing of the java code
165 lines
4.5 KiB
C++
165 lines
4.5 KiB
C++
//
|
|
// Copyright (C) 2018 Susan H. Leung
|
|
//
|
|
// @@ All Rights Reserved @@
|
|
// This file is part of the RDKit.
|
|
// The contents are covered by the terms of the BSD license
|
|
// which is included in the file license.txt, found at the root
|
|
// of the RDKit source tree.
|
|
//
|
|
#include "MolStandardize.h"
|
|
#include "Metal.h"
|
|
#include "Normalize.h"
|
|
#include "Tautomer.h"
|
|
#include "Fragment.h"
|
|
#include <GraphMol/RDKitBase.h>
|
|
#include <iostream>
|
|
#include <GraphMol/ROMol.h>
|
|
#include <GraphMol/MolOps.h>
|
|
#include <GraphMol/MolStandardize/TransformCatalog/TransformCatalogParams.h>
|
|
#include "Charge.h"
|
|
|
|
#include <GraphMol/SmilesParse/SmilesWrite.h>
|
|
#include <GraphMol/SmilesParse/SmilesParse.h>
|
|
using namespace std;
|
|
|
|
namespace RDKit {
|
|
namespace MolStandardize {
|
|
const CleanupParameters defaultCleanupParameters;
|
|
|
|
RWMol *cleanup(const RWMol &mol, const CleanupParameters ¶ms) {
|
|
RWMol m(mol);
|
|
MolOps::sanitizeMol(m);
|
|
MolOps::removeHs(m);
|
|
|
|
MolStandardize::MetalDisconnector md;
|
|
md.disconnect(m);
|
|
RWMOL_SPTR normalized(MolStandardize::normalize(&m, params));
|
|
RWMol *reionized = MolStandardize::reionize(normalized.get(), params);
|
|
MolOps::assignStereochemistry(*reionized);
|
|
|
|
return reionized;
|
|
}
|
|
|
|
void tautomerParent(RWMol &mol, const CleanupParameters ¶ms) {
|
|
RDUNUSED_PARAM(mol);
|
|
RDUNUSED_PARAM(params);
|
|
UNDER_CONSTRUCTION("Not yet implmented");
|
|
}
|
|
|
|
// Return the fragment parent of a given molecule.
|
|
// The fragment parent is the largest organic covalent unit in the molecule.
|
|
//
|
|
RWMol *fragmentParent(const RWMol &mol, const CleanupParameters ¶ms,
|
|
bool skip_standardize) {
|
|
const RWMol *cleaned = nullptr;
|
|
|
|
if (!skip_standardize) {
|
|
cleaned = cleanup(mol, params);
|
|
} else {
|
|
cleaned = &mol;
|
|
}
|
|
|
|
LargestFragmentChooser lfragchooser(params.preferOrganic);
|
|
ROMol nm(*cleaned);
|
|
ROMOL_SPTR lfrag(lfragchooser.choose(nm));
|
|
delete cleaned;
|
|
return new RWMol(*lfrag);
|
|
}
|
|
|
|
void stereoParent(RWMol &mol, const CleanupParameters ¶ms) {
|
|
RDUNUSED_PARAM(mol);
|
|
RDUNUSED_PARAM(params);
|
|
UNDER_CONSTRUCTION("Not yet implmented");
|
|
}
|
|
|
|
void isotopeParent(RWMol &mol, const CleanupParameters ¶ms) {
|
|
RDUNUSED_PARAM(mol);
|
|
RDUNUSED_PARAM(params);
|
|
UNDER_CONSTRUCTION("Not yet implmented");
|
|
}
|
|
|
|
RWMol *chargeParent(const RWMol &mol, const CleanupParameters ¶ms,
|
|
bool skip_standardize) {
|
|
// Return the charge parent of a given molecule.
|
|
// The charge parent is the uncharged version of the fragment parent.
|
|
|
|
RWMol *m = nullptr;
|
|
|
|
if (!skip_standardize) {
|
|
m = cleanup(mol, params);
|
|
}
|
|
|
|
RWMOL_SPTR fragparent(fragmentParent(*m, params, true));
|
|
|
|
// if fragment...
|
|
ROMol nm(*fragparent);
|
|
|
|
Uncharger uncharger;
|
|
ROMOL_SPTR uncharged(uncharger.uncharge(nm));
|
|
RWMol *omol = cleanup(static_cast<RWMol>(*uncharged), params);
|
|
return omol;
|
|
}
|
|
|
|
void superParent(RWMol &mol, const CleanupParameters ¶ms) {
|
|
RDUNUSED_PARAM(mol);
|
|
RDUNUSED_PARAM(params);
|
|
UNDER_CONSTRUCTION("Not yet implmented");
|
|
}
|
|
|
|
RWMol *normalize(const RWMol *mol, const CleanupParameters ¶ms) {
|
|
Normalizer normalizer(params.normalizations, params.maxRestarts);
|
|
|
|
ROMol m(*mol);
|
|
ROMol *normalized = normalizer.normalize(m);
|
|
|
|
return static_cast<RWMol *>(normalized);
|
|
}
|
|
|
|
RWMol *reionize(const RWMol *mol, const CleanupParameters ¶ms) {
|
|
RDUNUSED_PARAM(params);
|
|
Reionizer reionizer;
|
|
ROMol m(*mol);
|
|
ROMol *reionized = reionizer.reionize(m);
|
|
|
|
return static_cast<RWMol *>(reionized);
|
|
}
|
|
|
|
std::string standardizeSmiles(const std::string &smiles) {
|
|
RWMOL_SPTR mol(SmilesToMol(smiles, 0, false));
|
|
if (!mol) {
|
|
std::string message =
|
|
"SMILES Parse Error: syntax error for input: " + smiles;
|
|
throw ValueErrorException(message);
|
|
}
|
|
|
|
CleanupParameters params;
|
|
RWMOL_SPTR cleaned(cleanup(*mol, params));
|
|
return MolToSmiles(*cleaned);
|
|
}
|
|
|
|
std::vector<std::string> enumerateTautomerSmiles(
|
|
const std::string &smiles, const CleanupParameters ¶ms) {
|
|
std::shared_ptr<RWMol> mol(SmilesToMol(smiles, 0, false));
|
|
cleanup(*mol, params);
|
|
MolOps::sanitizeMol(*mol);
|
|
|
|
auto *tautparams = new TautomerCatalogParams(params.tautomerTransforms);
|
|
// unsigned int ntautomers = tautparams->getNumTautomers();
|
|
TautomerCatalog tautcat(tautparams);
|
|
TautomerEnumerator te;
|
|
|
|
std::vector<ROMOL_SPTR> res =
|
|
te.enumerate(static_cast<ROMol>(*mol), &tautcat);
|
|
|
|
std::vector<std::string> tsmiles;
|
|
for (const auto &r : res) {
|
|
tsmiles.push_back(MolToSmiles(*r));
|
|
}
|
|
|
|
return tsmiles;
|
|
}
|
|
|
|
} // end of namespace MolStandardize
|
|
} // end of namespace RDKit
|