Files
rdkit/Code/MinimalLib/common.h
Greg Landrum 158fe71ff2 Add some MolStandardize functionality to the CFFI library (#4062)
* support CleanupParameters from JSON

* add standardization to cffi

* remove a bunch of repeated code from the new stuff
2021-04-22 05:23:25 +02:00

397 lines
13 KiB
C++
Executable File

//
// Copyright (C) 2021 Greg Landrum
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//
#pragma once
#include <string>
#include <RDGeneral/versions.h>
#include <GraphMol/RDKitBase.h>
#include <GraphMol/MolPickler.h>
#include <GraphMol/SmilesParse/SmilesParse.h>
#include <GraphMol/SmilesParse/SmilesWrite.h>
#include <GraphMol/FileParsers/FileParsers.h>
#include <GraphMol/MolDraw2D/MolDraw2D.h>
#include <GraphMol/MolDraw2D/MolDraw2DSVG.h>
#include <GraphMol/MolDraw2D/MolDraw2DUtils.h>
#include <GraphMol/Substruct/SubstructMatch.h>
#include <GraphMol/MolInterchange/MolInterchange.h>
#include <GraphMol/Descriptors/Property.h>
#include <GraphMol/Descriptors/MolDescriptors.h>
#include <GraphMol/Fingerprints/MorganFingerprints.h>
#include <GraphMol/Depictor/RDDepictor.h>
#include <GraphMol/CIPLabeler/CIPLabeler.h>
#include <GraphMol/Abbreviations/Abbreviations.h>
#include <DataStructs/BitOps.h>
#include <GraphMol/MolStandardize/MolStandardize.h>
#include <GraphMol/MolStandardize/Charge.h>
#include <GraphMol/MolStandardize/Tautomer.h>
#include <sstream>
#include <RDGeneral/BoostStartInclude.h>
#include <boost/property_tree/ptree.hpp>
#include <boost/property_tree/json_parser.hpp>
#include <RDGeneral/BoostEndInclude.h>
#ifndef _MSC_VER
// shutoff some warnings from rapidjson
#if !defined(__clang__) and defined(__GNUC__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wclass-memaccess"
#endif
#endif
#include <rapidjson/document.h>
#include <rapidjson/stringbuffer.h>
#include <rapidjson/writer.h>
#ifndef _MSC_VER
#if !defined(__clang__) and defined(__GNUC__)
#pragma GCC diagnostic pop
#endif
#endif
namespace rj = rapidjson;
namespace RDKit {
namespace MinimalLib {
static constexpr unsigned int d_defaultWidth = 250;
static constexpr unsigned int d_defaultHeight = 200;
#define LPT_OPT_GET(opt) opt = pt.get(#opt, opt);
#define LPT_OPT_GET2(holder, opt) holder.opt = pt.get(#opt, holder.opt);
RWMol *mol_from_input(const std::string &input,
const std::string &details_json = "") {
bool sanitize = true;
bool removeHs = true;
RWMol *res = nullptr;
boost::property_tree::ptree pt;
if (!details_json.empty()) {
std::istringstream ss;
ss.str(details_json);
boost::property_tree::read_json(ss, pt);
LPT_OPT_GET(sanitize);
LPT_OPT_GET(removeHs);
}
if (input.find("M END") != std::string::npos) {
bool strictParsing = false;
LPT_OPT_GET(strictParsing);
res = MolBlockToMol(input, false, removeHs, strictParsing);
} else if (input.find("commonchem") != std::string::npos) {
auto ps = MolInterchange::defaultJSONParseParameters;
LPT_OPT_GET2(ps, setAromaticBonds);
LPT_OPT_GET2(ps, strictValenceCheck);
LPT_OPT_GET2(ps, parseProperties);
LPT_OPT_GET2(ps, parseConformers);
auto molVect = MolInterchange::JSONDataToMols(input, ps);
if (!molVect.empty()) {
res = new RWMol(*molVect[0]);
}
} else {
SmilesParserParams ps;
ps.sanitize = false;
ps.removeHs = removeHs;
LPT_OPT_GET2(ps, strictCXSMILES);
LPT_OPT_GET2(ps, useLegacyStereo);
res = SmilesToMol(input, ps);
}
if (res) {
try {
if (sanitize) {
MolOps::sanitizeMol(*res);
}
MolOps::assignStereochemistry(*res, true, true, true);
} catch (...) {
delete res;
res = nullptr;
}
}
return res;
}
RWMol *mol_from_input(const std::string &input, const char *details_json) {
std::string json;
if (details_json) {
json = details_json;
}
return mol_from_input(input, json);
}
RWMol *qmol_from_input(const std::string &input,
const std::string &details_json = "") {
RWMol *res = nullptr;
bool sanitize = false;
bool removeHs = true;
boost::property_tree::ptree pt;
if (!details_json.empty()) {
// FIX: this should eventually be moved somewhere else
std::istringstream ss;
ss.str(details_json);
boost::property_tree::read_json(ss, pt);
LPT_OPT_GET(sanitize);
LPT_OPT_GET(removeHs);
}
if (input.find("M END") != std::string::npos) {
bool strictParsing = false;
LPT_OPT_GET(strictParsing);
res = MolBlockToMol(input, false, removeHs, strictParsing);
} else if (input.find("commonchem") != std::string::npos) {
auto ps = MolInterchange::defaultJSONParseParameters;
LPT_OPT_GET2(ps, setAromaticBonds);
LPT_OPT_GET2(ps, strictValenceCheck);
LPT_OPT_GET2(ps, parseProperties);
LPT_OPT_GET2(ps, parseConformers);
auto molVect = MolInterchange::JSONDataToMols(input, ps);
if (!molVect.empty()) {
res = new RWMol(*molVect[0]);
}
} else {
bool mergeHs = false;
LPT_OPT_GET(mergeHs);
res = SmartsToMol(input, 0, mergeHs);
}
return res;
}
RWMol *qmol_from_input(const std::string &input, const char *details_json) {
std::string json;
if (details_json) {
json = details_json;
}
return qmol_from_input(input, json);
}
std::string process_details(const std::string &details, unsigned int &width,
unsigned int &height, int &offsetx, int &offsety,
std::string &legend, std::vector<int> &atomIds,
std::vector<int> &bondIds) {
rj::Document doc;
doc.Parse(details.c_str());
if (!doc.IsObject()) return "Invalid JSON";
if (doc.HasMember("atoms")) {
if (!doc["atoms"].IsArray()) {
return "JSON doesn't contain 'atoms' field, or it is not an array";
}
for (const auto &molval : doc["atoms"].GetArray()) {
if (!molval.IsInt()) return ("Atom IDs should be integers");
atomIds.push_back(molval.GetInt());
}
}
if (doc.HasMember("bonds")) {
if (!doc["bonds"].IsArray()) {
return "JSON contain 'bonds' field, but it is not an array";
}
for (const auto &molval : doc["bonds"].GetArray()) {
if (!molval.IsInt()) return ("Bond IDs should be integers");
bondIds.push_back(molval.GetInt());
}
}
if (doc.HasMember("width")) {
if (!doc["width"].IsUint()) {
return "JSON contains 'width' field, but it is not an unsigned int";
}
width = doc["width"].GetUint();
}
if (doc.HasMember("height")) {
if (!doc["height"].IsUint()) {
return "JSON contains 'height' field, but it is not an unsigned int";
}
height = doc["height"].GetUint();
}
if (doc.HasMember("offsetx")) {
if (!doc["offsetx"].IsInt()) {
return "JSON contains 'offsetx' field, but it is not an int";
}
offsetx = doc["offsetx"].GetInt();
}
if (doc.HasMember("offsety")) {
if (!doc["offsety"].IsInt()) {
return "JSON contains 'offsety' field, but it is not an int";
}
offsety = doc["offsety"].GetInt();
}
if (doc.HasMember("legend")) {
if (!doc["legend"].IsString()) {
return "JSON contains 'legend' field, but it is not a string";
}
legend = doc["legend"].GetString();
}
return "";
}
void get_sss_json(const ROMol &d_mol, const ROMol &q_mol,
const MatchVectType &match, rj::Value &obj,
rj::Document &doc) {
rj::Value rjAtoms(rj::kArrayType);
for (const auto &pr : match) {
rjAtoms.PushBack(pr.second, doc.GetAllocator());
}
obj.AddMember("atoms", rjAtoms, doc.GetAllocator());
rj::Value rjBonds(rj::kArrayType);
for (const auto qbond : q_mol.bonds()) {
unsigned int beginIdx = qbond->getBeginAtomIdx();
unsigned int endIdx = qbond->getEndAtomIdx();
if (beginIdx >= match.size() || endIdx >= match.size()) {
continue;
}
unsigned int idx1 = match[beginIdx].second;
unsigned int idx2 = match[endIdx].second;
const auto bond = d_mol.getBondBetweenAtoms(idx1, idx2);
if (bond != nullptr) {
rjBonds.PushBack(bond->getIdx(), doc.GetAllocator());
}
}
obj.AddMember("bonds", rjBonds, doc.GetAllocator());
}
std::string mol_to_svg(const ROMol &m, unsigned int w, unsigned int h,
const std::string &details = "") {
std::vector<int> atomIds;
std::vector<int> bondIds;
std::string legend = "";
int offsetx = 0, offsety = 0;
if (!details.empty()) {
auto problems = process_details(details, w, h, offsetx, offsety, legend,
atomIds, bondIds);
if (!problems.empty()) {
return problems;
}
}
MolDraw2DSVG drawer(w, h);
if (!details.empty()) {
MolDraw2DUtils::updateDrawerParamsFromJSON(drawer, details);
}
drawer.setOffset(offsetx, offsety);
MolDraw2DUtils::prepareAndDrawMolecule(drawer, m, legend, &atomIds, &bondIds);
drawer.finishDrawing();
return drawer.getDrawingText();
}
std::string get_descriptors(const ROMol &m) {
rj::Document doc;
doc.SetObject();
Descriptors::Properties props;
std::vector<std::string> dns = props.getPropertyNames();
std::vector<double> dvs = props.computeProperties(m);
for (size_t i = 0; i < dns.size(); ++i) {
rj::Value v(dvs[i]);
const auto srt = rj::StringRef(dns[i].c_str());
doc.AddMember(srt, v, doc.GetAllocator());
}
rj::StringBuffer buffer;
rj::Writer<rj::StringBuffer> writer(buffer);
writer.SetMaxDecimalPlaces(5);
doc.Accept(writer);
return buffer.GetString();
}
namespace {
template <typename T, typename U>
std::unique_ptr<RWMol> standardize_func(T &mol, const std::string &details_json,
U func) {
MolStandardize::CleanupParameters ps =
MolStandardize::defaultCleanupParameters;
if (!details_json.empty()) {
MolStandardize::updateCleanupParamsFromJSON(ps, details_json);
}
return std::unique_ptr<RWMol>(static_cast<RWMol *>(func(mol, ps)));
}
} // namespace
std::unique_ptr<RWMol> do_cleanup(RWMol &mol, const std::string &details_json) {
return standardize_func(mol, details_json, MolStandardize::cleanup);
}
std::unique_ptr<RWMol> do_normalize(RWMol &mol,
const std::string &details_json) {
auto molp = &mol;
return standardize_func(molp, details_json, MolStandardize::normalize);
}
std::unique_ptr<RWMol> do_reionize(RWMol &mol,
const std::string &details_json) {
auto molp = &mol;
return standardize_func(molp, details_json, MolStandardize::reionize);
}
std::unique_ptr<RWMol> do_canonical_tautomer(RWMol &mol,
const std::string &details_json) {
MolStandardize::CleanupParameters ps =
MolStandardize::defaultCleanupParameters;
if (!details_json.empty()) {
MolStandardize::updateCleanupParamsFromJSON(ps, details_json);
}
MolStandardize::TautomerEnumerator te(ps);
std::unique_ptr<RWMol> res(static_cast<RWMol *>(te.canonicalize(mol)));
return res;
}
std::unique_ptr<RWMol> do_neutralize(RWMol &mol,
const std::string &details_json) {
MolStandardize::CleanupParameters ps =
MolStandardize::defaultCleanupParameters;
if (!details_json.empty()) {
MolStandardize::updateCleanupParamsFromJSON(ps, details_json);
}
MolStandardize::Uncharger uncharger(ps.doCanonical);
std::unique_ptr<RWMol> res(static_cast<RWMol *>(uncharger.uncharge(mol)));
return res;
}
std::unique_ptr<RWMol> do_charge_parent(RWMol &mol,
const std::string &details_json) {
MolStandardize::CleanupParameters ps =
MolStandardize::defaultCleanupParameters;
bool skipStandardize = false;
if (!details_json.empty()) {
MolStandardize::updateCleanupParamsFromJSON(ps, details_json);
boost::property_tree::ptree pt;
std::istringstream ss;
ss.str(details_json);
boost::property_tree::read_json(ss, pt);
LPT_OPT_GET(skipStandardize);
}
std::unique_ptr<RWMol> res(
MolStandardize::chargeParent(mol, ps, skipStandardize));
return res;
}
std::unique_ptr<RWMol> do_fragment_parent(RWMol &mol,
const std::string &details_json) {
MolStandardize::CleanupParameters ps =
MolStandardize::defaultCleanupParameters;
bool skipStandardize = false;
if (!details_json.empty()) {
MolStandardize::updateCleanupParamsFromJSON(ps, details_json);
boost::property_tree::ptree pt;
std::istringstream ss;
ss.str(details_json);
boost::property_tree::read_json(ss, pt);
LPT_OPT_GET(skipStandardize);
}
std::unique_ptr<RWMol> res(
MolStandardize::fragmentParent(mol, ps, skipStandardize));
return res;
}
} // namespace MinimalLib
} // namespace RDKit
#undef LPT_OPT_GET
#undef LPT_OPT_GET2