mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-05 22:04:27 +08:00
* short test file for MolVS standardize_sm * short test file for MolVS fragment * short test file for MolVS metals * short test file for MolVS normalize * short test file for MolVS reionize * short test file for MolVS tautomer * short test file for MolVS validate * long test file for MolVS standardize smiles * long test file for MolVS fragment * long test file for MolVS metals * long test file for MolVS normalize * long test file for MolVS reionize * long test file for MolVS tautomer * long test file for MolVS validate * Unit tests for MolVS steps * dropping support for Python2 * molvs/__init__.py * molvs/charge.py * molvs/errors.py * molvs/fragment.py * molvs/metal.py * molvs/normalize.py * molvs/resonance.py * molvs/standardize.py * molvs/tautomer.py * molvs/utils.py * molvs/validate.py * molvs/validations.py * molvs/cli.py * adapted and renamed molvs/cli.py to work within $RDBASE/Contrib/MolVS/ * setup MolStandardize directories, source with empty cleanup function, header, CMake files * corrections to empty source, header and test1.cpp * adding empty functions and initializers to MolStandardize * empty Metal source, header and added test * added most of Metal.cpp functionality and made some more tests * empty functions and initializers to Normalize * empty functions and initializers to Validate * added most code for RDKitDefault mode, along with some tests * restructure for abstract base class ValidateMethod * written in isNoneValidation for MolVSValidation * took out isNoneValidation, put in noAtomValidation, neutralValidation, isotopeValidation for MolVSValidation * added in AllowedAtoms * added in disallowedAtoms * corrections to Validate * added code for FragmentRemover * extended fragment functionality to include choose largest fragment, added in tests for fragment catalog, fragment remover. Also added fragmentValidation method in MolStandardize * added another test to testValidate test_fragment * corrections to fragment * corrections to Metal * added code for Normalize * added normalize member function to MolStandardize and added tests * added multi fragment functionality to Normalize.cpp and additional tests * TransformCatalog * tests for Normalize.cpp * first bit of cleanup * added most of Charge functionality and some tests * some corrections to Charge.cpp and some more tests to testCharge.cpp * corrections to Charge.cpp * start of Tautomer Enumerate with some tests * added BondType option to Tautomer Enumeration * correcting for some memory leakage * a few alterations to formatting * sorting out some memory leaks * sorting out some memory leaks * some corrections for PCS test set * redo tests with updated RDKit * fixing memory leak * more fixes after 100kPCS set testing * using tab as delimiter in CSVs rather than comma * tutorial for MolStandardize * still working on Tautomer enumeration * deleted some empty tests * starting writing tautomer canonicalize * rename test_data -> data (the source still needs to be updated) * automatic source reformatting * adjust to directory rename * move the fragment catalog test into the MolStandardize directory do not create separate library for FragmentCatalog * stop building separate libraries for the catalogs * move the CleanupParameters into the MolStandardize namespace * first pass at python wrapper * move the py module to the correct dir; add some python tests; add standardizeSmiles to python wrapper * disabling the compareMolVSTest since that requires command line arguments to run * get this building on windows * put the python lib in the right place * further work on python wrapper for rdMolStandardize * added get and set functions to Metal and wrapped them * added get and set functions to Metal and wrapped them * changed construstor of Reionizer class and input args for reionize, wrapped this default * overload Reionizer constructor so user can input own AcidBaseFile from python * added Uncharger class to Charge and added test for Uncharger * wrapped Fragment, fixed some memory leakage, changed some args and return types, added some tests * wrapped Normalized and changed how Normalizer class is initiated * changing MolVSValidation structure so user can choose which MolVS submethod they want * starting to write Wrap for Validate * now it compiles with Wrap/Validate.cpp * a couple refactorings around validate * move the validate code into the rdMolStandardize module * make sure a valid pointer is returned for standardizeSmiles * rdMolStandardize.MolVSValidation done and tests added * half way through AllowedAtomsValidation * finished AllowedAtomsValidation and DisallowedAtomsValidation * moved charge, fragment, metal, normalize into the rdMolStandardize module * changed tutorial to use wrapped code * added copyrights * added copyrights * move the data files * modify source files to adjust to the move * added validateSmiles functionality * removed std::cout * redid some of the 100k PCS tests * working on the tutorial * adding some documentation * deleting some comment lines * some changes after pull review * More changes after pull review * start of trying to make java wrap * remove some warnings, add some questions * additional warning removals, a bit more reporting * some test cleanups * enable testing of the java code
289 lines
8.7 KiB
C++
289 lines
8.7 KiB
C++
//
|
|
// Copyright (C) 2018 Susan H. Leung
|
|
//
|
|
// @@ All Rights Reserved @@
|
|
// This file is part of the RDKit.
|
|
// The contents are covered by the terms of the BSD license
|
|
// which is included in the file license.txt, found at the root
|
|
// of the RDKit source tree.
|
|
//
|
|
#include "Validate.h"
|
|
#include "Fragment.h"
|
|
#include <GraphMol/RDKitBase.h>
|
|
#include <GraphMol/ROMol.h>
|
|
#include <GraphMol/MolStandardize/FragmentCatalog/FragmentCatalogParams.h>
|
|
#include <GraphMol/Substruct/SubstructMatch.h>
|
|
#include <iostream>
|
|
#include <vector>
|
|
#include <string>
|
|
|
|
#include <GraphMol/SmilesParse/SmilesParse.h>
|
|
#include <GraphMol/SmilesParse/SmilesWrite.h>
|
|
|
|
using namespace std;
|
|
using namespace RDKit;
|
|
|
|
namespace RDKit {
|
|
class RWMol;
|
|
class ROMol;
|
|
|
|
namespace MolStandardize {
|
|
|
|
std::vector<ValidationErrorInfo> RDKitValidation::validate(
|
|
const ROMol &mol, bool reportAllFailures) const {
|
|
ROMol molCopy = mol;
|
|
std::vector<ValidationErrorInfo> errors;
|
|
|
|
unsigned int na = mol.getNumAtoms();
|
|
|
|
if (!na) {
|
|
errors.push_back(
|
|
ValidationErrorInfo("ERROR: [NoAtomValidation] Molecule has no atoms"));
|
|
}
|
|
|
|
// loop over atoms
|
|
for (size_t i = 0; i < na; ++i) {
|
|
if (!reportAllFailures) {
|
|
if (errors.size() >= 1) {
|
|
break;
|
|
}
|
|
}
|
|
Atom *atom = molCopy.getAtomWithIdx(i);
|
|
try {
|
|
atom->calcExplicitValence();
|
|
} catch (const MolSanitizeException &e) {
|
|
errors.push_back(ValidationErrorInfo("INFO: [ValenceValidation] " +
|
|
std::string(e.message())));
|
|
}
|
|
}
|
|
return errors;
|
|
}
|
|
|
|
void NoAtomValidation::run(const ROMol &mol, bool reportAllFailures,
|
|
std::vector<ValidationErrorInfo> &errors) const {
|
|
RDUNUSED_PARAM(reportAllFailures);
|
|
unsigned int na = mol.getNumAtoms();
|
|
|
|
if (!na) {
|
|
errors.push_back(
|
|
ValidationErrorInfo("ERROR: [NoAtomValidation] Molecule has no atoms"));
|
|
}
|
|
}
|
|
|
|
void FragmentValidation::run(const ROMol &mol, bool reportAllFailures,
|
|
std::vector<ValidationErrorInfo> &errors) const {
|
|
// REVIEW: reportAllFailures is not being used here. is that correct?
|
|
std::string rdbase = getenv("RDBASE");
|
|
std::string fgrpFile = rdbase + "/Data/MolStandardize/fragmentPatterns.txt";
|
|
std::shared_ptr<FragmentCatalogParams> fparams(
|
|
new FragmentCatalogParams(fgrpFile));
|
|
FragmentCatalog fcat(fparams.get());
|
|
|
|
const std::vector<std::shared_ptr<ROMol>> &fgrps = fparams->getFuncGroups();
|
|
INT_VECT mapping;
|
|
VECT_INT_VECT atom_mapping;
|
|
std::vector<ROMOL_SPTR> frags =
|
|
MolOps::getMolFrags(mol, true, &mapping, &atom_mapping);
|
|
|
|
for (auto &fgrp : fgrps) {
|
|
std::string fname;
|
|
fgrp->getProp(common_properties::_Name, fname);
|
|
std::vector<RDKit::MatchVectType> res;
|
|
unsigned int matches = SubstructMatch(mol, *fgrp, res);
|
|
// std::cout << fname << " matches " << matches << std::endl;
|
|
if (matches != 0 && frags.size() != 0) {
|
|
VECT_INT_VECT substructmap; // store idxs of frag from substructmatch
|
|
for (const auto &match : res) {
|
|
std::vector<int> vec;
|
|
for (const auto &pair : match) {
|
|
vec.push_back(pair.second);
|
|
}
|
|
substructmap.push_back(vec);
|
|
}
|
|
|
|
// to stop the same fragment being reported many times if present
|
|
// multiple times in molecule
|
|
bool fpresent = false;
|
|
|
|
for (auto &molfragidx : atom_mapping) {
|
|
std::sort(molfragidx.begin(), molfragidx.end());
|
|
for (auto &substructidx : substructmap) {
|
|
std::sort(substructidx.begin(), substructidx.end());
|
|
// // help to debug...
|
|
// std::cout << "molfragidx: " <<
|
|
// std::endl; for (const auto
|
|
// &i : molfragidx)
|
|
// {
|
|
// std::cout << i; }; std::cout
|
|
// << std::endl; std::cout <<
|
|
//"substructidx: " << std::endl;
|
|
// for (const auto &i : substructidx) { std::cout << i; };
|
|
// std::cout <<
|
|
// std::endl;
|
|
// //
|
|
if ((molfragidx == substructidx) && !fpresent) {
|
|
std::string msg = fname + " is present";
|
|
errors.push_back(
|
|
ValidationErrorInfo("INFO: [FragmentValidation] " + msg));
|
|
fpresent = true;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void NeutralValidation::run(const ROMol &mol, bool reportAllFailures,
|
|
std::vector<ValidationErrorInfo> &errors) const {
|
|
RDUNUSED_PARAM(reportAllFailures);
|
|
int charge = RDKit::MolOps::getFormalCharge(mol);
|
|
if (charge != 0) {
|
|
std::string charge_str;
|
|
if (charge > 0) {
|
|
charge_str = "+" + std::to_string(charge);
|
|
} else {
|
|
charge_str = std::to_string(charge);
|
|
}
|
|
std::string msg = "Not an overall neutral system (" + charge_str + ')';
|
|
errors.push_back(ValidationErrorInfo("INFO: [NeutralValidation] " + msg));
|
|
}
|
|
}
|
|
|
|
void IsotopeValidation::run(const ROMol &mol, bool reportAllFailures,
|
|
std::vector<ValidationErrorInfo> &errors) const {
|
|
unsigned int na = mol.getNumAtoms();
|
|
std::set<string> isotopes;
|
|
|
|
// loop over atoms
|
|
for (size_t i = 0; i < na; ++i) {
|
|
if (!reportAllFailures) {
|
|
if (errors.size() >= 1) {
|
|
break;
|
|
}
|
|
}
|
|
const Atom *atom = mol.getAtomWithIdx(i);
|
|
unsigned int isotope = atom->getIsotope();
|
|
if (isotope != 0) {
|
|
std::string symbol = atom->getSymbol();
|
|
isotopes.insert(std::to_string(isotope) + symbol);
|
|
}
|
|
}
|
|
|
|
for (auto &isotope : isotopes) {
|
|
errors.push_back(ValidationErrorInfo(
|
|
"INFO: [IsotopeValidation] Molecule contains isotope " + isotope));
|
|
}
|
|
}
|
|
|
|
// constructor
|
|
MolVSValidation::MolVSValidation() {
|
|
std::vector<boost::shared_ptr<MolVSValidations>> validations = {
|
|
boost::make_shared<NoAtomValidation>(),
|
|
boost::make_shared<FragmentValidation>(),
|
|
boost::make_shared<NeutralValidation>(),
|
|
boost::make_shared<IsotopeValidation>()};
|
|
this->d_validations = validations;
|
|
}
|
|
|
|
// overloaded constructor
|
|
MolVSValidation::MolVSValidation(
|
|
const std::vector<boost::shared_ptr<MolVSValidations>> validations) {
|
|
this->d_validations = validations;
|
|
}
|
|
|
|
// copy constructor
|
|
MolVSValidation::MolVSValidation(const MolVSValidation &other) {
|
|
d_validations = other.d_validations;
|
|
}
|
|
|
|
MolVSValidation::~MolVSValidation(){};
|
|
|
|
std::vector<ValidationErrorInfo> MolVSValidation::validate(
|
|
const ROMol &mol, bool reportAllFailures) const {
|
|
std::vector<ValidationErrorInfo> errors;
|
|
|
|
for (const auto method : this->d_validations) {
|
|
method->run(mol, reportAllFailures, errors);
|
|
}
|
|
|
|
return errors;
|
|
}
|
|
|
|
std::vector<ValidationErrorInfo> AllowedAtomsValidation::validate(
|
|
const ROMol &mol, bool reportAllFailures) const {
|
|
std::vector<ValidationErrorInfo> errors;
|
|
unsigned int na = mol.getNumAtoms();
|
|
|
|
for (size_t i = 0; i < na; ++i) {
|
|
if (!reportAllFailures) {
|
|
if (errors.size() >= 1) {
|
|
break;
|
|
}
|
|
}
|
|
const Atom *qatom = mol.getAtomWithIdx(i);
|
|
bool match = false;
|
|
// checks to see qatom matches one of list of allowedAtoms
|
|
for (const auto &allowedAtom : this->d_allowedList) {
|
|
if (allowedAtom->Match(qatom)) {
|
|
match = true;
|
|
break;
|
|
}
|
|
}
|
|
// if no match, append to list of errors.
|
|
if (!match) {
|
|
std::string symbol = qatom->getSymbol();
|
|
errors.push_back(
|
|
ValidationErrorInfo("INFO: [AllowedAtomsValidation] Atom " + symbol +
|
|
" is not in allowedAtoms list"));
|
|
}
|
|
}
|
|
return errors;
|
|
}
|
|
|
|
std::vector<ValidationErrorInfo> DisallowedAtomsValidation::validate(
|
|
const ROMol &mol, bool reportAllFailures) const {
|
|
std::vector<ValidationErrorInfo> errors;
|
|
unsigned int na = mol.getNumAtoms();
|
|
|
|
for (size_t i = 0; i < na; ++i) {
|
|
if (!reportAllFailures) {
|
|
if (errors.size() >= 1) {
|
|
break;
|
|
}
|
|
}
|
|
const Atom *qatom = mol.getAtomWithIdx(i);
|
|
bool match = false;
|
|
// checks to see qatom matches one of list of allowedAtoms
|
|
for (const auto &disallowedAtom : this->d_disallowedList) {
|
|
if (disallowedAtom->Match(qatom)) {
|
|
match = true;
|
|
}
|
|
}
|
|
// if no match, append to list of errors.
|
|
if (match) {
|
|
std::string symbol = qatom->getSymbol();
|
|
errors.push_back(
|
|
ValidationErrorInfo("INFO: [DisallowedAtomsValidation] Atom " +
|
|
symbol + " is in disallowedAtoms list"));
|
|
}
|
|
}
|
|
return errors;
|
|
}
|
|
|
|
std::vector<ValidationErrorInfo> validateSmiles(const std::string &smiles) {
|
|
RWMOL_SPTR mol(SmilesToMol(smiles));
|
|
if (!mol) {
|
|
std::string message =
|
|
"SMILES Parse Error: syntax error for input: " + smiles;
|
|
throw ValueErrorException(message);
|
|
}
|
|
|
|
MolVSValidation vm;
|
|
std::vector<ValidationErrorInfo> errors = vm.validate(*mol, true);
|
|
|
|
return errors;
|
|
}
|
|
|
|
} // namespace MolStandardize
|
|
} // namespace RDKit
|