mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-03 21:44:30 +08:00
* support CleanupParameters from JSON * add standardization to cffi * remove a bunch of repeated code from the new stuff
397 lines
13 KiB
C++
Executable File
397 lines
13 KiB
C++
Executable File
//
|
|
// Copyright (C) 2021 Greg Landrum
|
|
//
|
|
// @@ All Rights Reserved @@
|
|
// This file is part of the RDKit.
|
|
// The contents are covered by the terms of the BSD license
|
|
// which is included in the file license.txt, found at the root
|
|
// of the RDKit source tree.
|
|
//
|
|
#pragma once
|
|
|
|
#include <string>
|
|
#include <RDGeneral/versions.h>
|
|
#include <GraphMol/RDKitBase.h>
|
|
#include <GraphMol/MolPickler.h>
|
|
#include <GraphMol/SmilesParse/SmilesParse.h>
|
|
#include <GraphMol/SmilesParse/SmilesWrite.h>
|
|
#include <GraphMol/FileParsers/FileParsers.h>
|
|
#include <GraphMol/MolDraw2D/MolDraw2D.h>
|
|
#include <GraphMol/MolDraw2D/MolDraw2DSVG.h>
|
|
#include <GraphMol/MolDraw2D/MolDraw2DUtils.h>
|
|
#include <GraphMol/Substruct/SubstructMatch.h>
|
|
#include <GraphMol/MolInterchange/MolInterchange.h>
|
|
#include <GraphMol/Descriptors/Property.h>
|
|
#include <GraphMol/Descriptors/MolDescriptors.h>
|
|
#include <GraphMol/Fingerprints/MorganFingerprints.h>
|
|
#include <GraphMol/Depictor/RDDepictor.h>
|
|
#include <GraphMol/CIPLabeler/CIPLabeler.h>
|
|
#include <GraphMol/Abbreviations/Abbreviations.h>
|
|
#include <DataStructs/BitOps.h>
|
|
#include <GraphMol/MolStandardize/MolStandardize.h>
|
|
#include <GraphMol/MolStandardize/Charge.h>
|
|
#include <GraphMol/MolStandardize/Tautomer.h>
|
|
|
|
#include <sstream>
|
|
#include <RDGeneral/BoostStartInclude.h>
|
|
#include <boost/property_tree/ptree.hpp>
|
|
#include <boost/property_tree/json_parser.hpp>
|
|
#include <RDGeneral/BoostEndInclude.h>
|
|
|
|
#ifndef _MSC_VER
|
|
// shutoff some warnings from rapidjson
|
|
#if !defined(__clang__) and defined(__GNUC__)
|
|
#pragma GCC diagnostic push
|
|
#pragma GCC diagnostic ignored "-Wclass-memaccess"
|
|
#endif
|
|
#endif
|
|
#include <rapidjson/document.h>
|
|
#include <rapidjson/stringbuffer.h>
|
|
#include <rapidjson/writer.h>
|
|
#ifndef _MSC_VER
|
|
#if !defined(__clang__) and defined(__GNUC__)
|
|
#pragma GCC diagnostic pop
|
|
#endif
|
|
#endif
|
|
namespace rj = rapidjson;
|
|
|
|
namespace RDKit {
|
|
namespace MinimalLib {
|
|
|
|
static constexpr unsigned int d_defaultWidth = 250;
|
|
static constexpr unsigned int d_defaultHeight = 200;
|
|
|
|
#define LPT_OPT_GET(opt) opt = pt.get(#opt, opt);
|
|
#define LPT_OPT_GET2(holder, opt) holder.opt = pt.get(#opt, holder.opt);
|
|
|
|
RWMol *mol_from_input(const std::string &input,
|
|
const std::string &details_json = "") {
|
|
bool sanitize = true;
|
|
bool removeHs = true;
|
|
RWMol *res = nullptr;
|
|
boost::property_tree::ptree pt;
|
|
if (!details_json.empty()) {
|
|
std::istringstream ss;
|
|
ss.str(details_json);
|
|
boost::property_tree::read_json(ss, pt);
|
|
LPT_OPT_GET(sanitize);
|
|
LPT_OPT_GET(removeHs);
|
|
}
|
|
if (input.find("M END") != std::string::npos) {
|
|
bool strictParsing = false;
|
|
LPT_OPT_GET(strictParsing);
|
|
res = MolBlockToMol(input, false, removeHs, strictParsing);
|
|
} else if (input.find("commonchem") != std::string::npos) {
|
|
auto ps = MolInterchange::defaultJSONParseParameters;
|
|
LPT_OPT_GET2(ps, setAromaticBonds);
|
|
LPT_OPT_GET2(ps, strictValenceCheck);
|
|
LPT_OPT_GET2(ps, parseProperties);
|
|
LPT_OPT_GET2(ps, parseConformers);
|
|
|
|
auto molVect = MolInterchange::JSONDataToMols(input, ps);
|
|
if (!molVect.empty()) {
|
|
res = new RWMol(*molVect[0]);
|
|
}
|
|
} else {
|
|
SmilesParserParams ps;
|
|
ps.sanitize = false;
|
|
ps.removeHs = removeHs;
|
|
LPT_OPT_GET2(ps, strictCXSMILES);
|
|
LPT_OPT_GET2(ps, useLegacyStereo);
|
|
res = SmilesToMol(input, ps);
|
|
}
|
|
if (res) {
|
|
try {
|
|
if (sanitize) {
|
|
MolOps::sanitizeMol(*res);
|
|
}
|
|
MolOps::assignStereochemistry(*res, true, true, true);
|
|
} catch (...) {
|
|
delete res;
|
|
res = nullptr;
|
|
}
|
|
}
|
|
return res;
|
|
}
|
|
RWMol *mol_from_input(const std::string &input, const char *details_json) {
|
|
std::string json;
|
|
if (details_json) {
|
|
json = details_json;
|
|
}
|
|
return mol_from_input(input, json);
|
|
}
|
|
|
|
RWMol *qmol_from_input(const std::string &input,
|
|
const std::string &details_json = "") {
|
|
RWMol *res = nullptr;
|
|
bool sanitize = false;
|
|
bool removeHs = true;
|
|
boost::property_tree::ptree pt;
|
|
if (!details_json.empty()) {
|
|
// FIX: this should eventually be moved somewhere else
|
|
std::istringstream ss;
|
|
ss.str(details_json);
|
|
boost::property_tree::read_json(ss, pt);
|
|
LPT_OPT_GET(sanitize);
|
|
LPT_OPT_GET(removeHs);
|
|
}
|
|
if (input.find("M END") != std::string::npos) {
|
|
bool strictParsing = false;
|
|
LPT_OPT_GET(strictParsing);
|
|
res = MolBlockToMol(input, false, removeHs, strictParsing);
|
|
} else if (input.find("commonchem") != std::string::npos) {
|
|
auto ps = MolInterchange::defaultJSONParseParameters;
|
|
LPT_OPT_GET2(ps, setAromaticBonds);
|
|
LPT_OPT_GET2(ps, strictValenceCheck);
|
|
LPT_OPT_GET2(ps, parseProperties);
|
|
LPT_OPT_GET2(ps, parseConformers);
|
|
|
|
auto molVect = MolInterchange::JSONDataToMols(input, ps);
|
|
if (!molVect.empty()) {
|
|
res = new RWMol(*molVect[0]);
|
|
}
|
|
} else {
|
|
bool mergeHs = false;
|
|
LPT_OPT_GET(mergeHs);
|
|
res = SmartsToMol(input, 0, mergeHs);
|
|
}
|
|
return res;
|
|
}
|
|
RWMol *qmol_from_input(const std::string &input, const char *details_json) {
|
|
std::string json;
|
|
if (details_json) {
|
|
json = details_json;
|
|
}
|
|
return qmol_from_input(input, json);
|
|
}
|
|
|
|
std::string process_details(const std::string &details, unsigned int &width,
|
|
unsigned int &height, int &offsetx, int &offsety,
|
|
std::string &legend, std::vector<int> &atomIds,
|
|
std::vector<int> &bondIds) {
|
|
rj::Document doc;
|
|
doc.Parse(details.c_str());
|
|
if (!doc.IsObject()) return "Invalid JSON";
|
|
|
|
if (doc.HasMember("atoms")) {
|
|
if (!doc["atoms"].IsArray()) {
|
|
return "JSON doesn't contain 'atoms' field, or it is not an array";
|
|
}
|
|
for (const auto &molval : doc["atoms"].GetArray()) {
|
|
if (!molval.IsInt()) return ("Atom IDs should be integers");
|
|
atomIds.push_back(molval.GetInt());
|
|
}
|
|
}
|
|
if (doc.HasMember("bonds")) {
|
|
if (!doc["bonds"].IsArray()) {
|
|
return "JSON contain 'bonds' field, but it is not an array";
|
|
}
|
|
for (const auto &molval : doc["bonds"].GetArray()) {
|
|
if (!molval.IsInt()) return ("Bond IDs should be integers");
|
|
bondIds.push_back(molval.GetInt());
|
|
}
|
|
}
|
|
|
|
if (doc.HasMember("width")) {
|
|
if (!doc["width"].IsUint()) {
|
|
return "JSON contains 'width' field, but it is not an unsigned int";
|
|
}
|
|
width = doc["width"].GetUint();
|
|
}
|
|
|
|
if (doc.HasMember("height")) {
|
|
if (!doc["height"].IsUint()) {
|
|
return "JSON contains 'height' field, but it is not an unsigned int";
|
|
}
|
|
height = doc["height"].GetUint();
|
|
}
|
|
|
|
if (doc.HasMember("offsetx")) {
|
|
if (!doc["offsetx"].IsInt()) {
|
|
return "JSON contains 'offsetx' field, but it is not an int";
|
|
}
|
|
offsetx = doc["offsetx"].GetInt();
|
|
}
|
|
|
|
if (doc.HasMember("offsety")) {
|
|
if (!doc["offsety"].IsInt()) {
|
|
return "JSON contains 'offsety' field, but it is not an int";
|
|
}
|
|
offsety = doc["offsety"].GetInt();
|
|
}
|
|
|
|
if (doc.HasMember("legend")) {
|
|
if (!doc["legend"].IsString()) {
|
|
return "JSON contains 'legend' field, but it is not a string";
|
|
}
|
|
legend = doc["legend"].GetString();
|
|
}
|
|
|
|
return "";
|
|
}
|
|
|
|
void get_sss_json(const ROMol &d_mol, const ROMol &q_mol,
|
|
const MatchVectType &match, rj::Value &obj,
|
|
rj::Document &doc) {
|
|
rj::Value rjAtoms(rj::kArrayType);
|
|
for (const auto &pr : match) {
|
|
rjAtoms.PushBack(pr.second, doc.GetAllocator());
|
|
}
|
|
obj.AddMember("atoms", rjAtoms, doc.GetAllocator());
|
|
|
|
rj::Value rjBonds(rj::kArrayType);
|
|
for (const auto qbond : q_mol.bonds()) {
|
|
unsigned int beginIdx = qbond->getBeginAtomIdx();
|
|
unsigned int endIdx = qbond->getEndAtomIdx();
|
|
if (beginIdx >= match.size() || endIdx >= match.size()) {
|
|
continue;
|
|
}
|
|
unsigned int idx1 = match[beginIdx].second;
|
|
unsigned int idx2 = match[endIdx].second;
|
|
const auto bond = d_mol.getBondBetweenAtoms(idx1, idx2);
|
|
if (bond != nullptr) {
|
|
rjBonds.PushBack(bond->getIdx(), doc.GetAllocator());
|
|
}
|
|
}
|
|
obj.AddMember("bonds", rjBonds, doc.GetAllocator());
|
|
}
|
|
|
|
std::string mol_to_svg(const ROMol &m, unsigned int w, unsigned int h,
|
|
const std::string &details = "") {
|
|
std::vector<int> atomIds;
|
|
std::vector<int> bondIds;
|
|
std::string legend = "";
|
|
int offsetx = 0, offsety = 0;
|
|
if (!details.empty()) {
|
|
auto problems = process_details(details, w, h, offsetx, offsety, legend,
|
|
atomIds, bondIds);
|
|
if (!problems.empty()) {
|
|
return problems;
|
|
}
|
|
}
|
|
|
|
MolDraw2DSVG drawer(w, h);
|
|
if (!details.empty()) {
|
|
MolDraw2DUtils::updateDrawerParamsFromJSON(drawer, details);
|
|
}
|
|
drawer.setOffset(offsetx, offsety);
|
|
|
|
MolDraw2DUtils::prepareAndDrawMolecule(drawer, m, legend, &atomIds, &bondIds);
|
|
drawer.finishDrawing();
|
|
|
|
return drawer.getDrawingText();
|
|
}
|
|
|
|
std::string get_descriptors(const ROMol &m) {
|
|
rj::Document doc;
|
|
doc.SetObject();
|
|
|
|
Descriptors::Properties props;
|
|
std::vector<std::string> dns = props.getPropertyNames();
|
|
std::vector<double> dvs = props.computeProperties(m);
|
|
for (size_t i = 0; i < dns.size(); ++i) {
|
|
rj::Value v(dvs[i]);
|
|
const auto srt = rj::StringRef(dns[i].c_str());
|
|
doc.AddMember(srt, v, doc.GetAllocator());
|
|
}
|
|
|
|
rj::StringBuffer buffer;
|
|
rj::Writer<rj::StringBuffer> writer(buffer);
|
|
writer.SetMaxDecimalPlaces(5);
|
|
doc.Accept(writer);
|
|
return buffer.GetString();
|
|
}
|
|
|
|
namespace {
|
|
template <typename T, typename U>
|
|
std::unique_ptr<RWMol> standardize_func(T &mol, const std::string &details_json,
|
|
U func) {
|
|
MolStandardize::CleanupParameters ps =
|
|
MolStandardize::defaultCleanupParameters;
|
|
if (!details_json.empty()) {
|
|
MolStandardize::updateCleanupParamsFromJSON(ps, details_json);
|
|
}
|
|
return std::unique_ptr<RWMol>(static_cast<RWMol *>(func(mol, ps)));
|
|
}
|
|
} // namespace
|
|
|
|
std::unique_ptr<RWMol> do_cleanup(RWMol &mol, const std::string &details_json) {
|
|
return standardize_func(mol, details_json, MolStandardize::cleanup);
|
|
}
|
|
|
|
std::unique_ptr<RWMol> do_normalize(RWMol &mol,
|
|
const std::string &details_json) {
|
|
auto molp = &mol;
|
|
return standardize_func(molp, details_json, MolStandardize::normalize);
|
|
}
|
|
|
|
std::unique_ptr<RWMol> do_reionize(RWMol &mol,
|
|
const std::string &details_json) {
|
|
auto molp = &mol;
|
|
return standardize_func(molp, details_json, MolStandardize::reionize);
|
|
}
|
|
|
|
std::unique_ptr<RWMol> do_canonical_tautomer(RWMol &mol,
|
|
const std::string &details_json) {
|
|
MolStandardize::CleanupParameters ps =
|
|
MolStandardize::defaultCleanupParameters;
|
|
if (!details_json.empty()) {
|
|
MolStandardize::updateCleanupParamsFromJSON(ps, details_json);
|
|
}
|
|
MolStandardize::TautomerEnumerator te(ps);
|
|
std::unique_ptr<RWMol> res(static_cast<RWMol *>(te.canonicalize(mol)));
|
|
return res;
|
|
}
|
|
|
|
std::unique_ptr<RWMol> do_neutralize(RWMol &mol,
|
|
const std::string &details_json) {
|
|
MolStandardize::CleanupParameters ps =
|
|
MolStandardize::defaultCleanupParameters;
|
|
if (!details_json.empty()) {
|
|
MolStandardize::updateCleanupParamsFromJSON(ps, details_json);
|
|
}
|
|
MolStandardize::Uncharger uncharger(ps.doCanonical);
|
|
std::unique_ptr<RWMol> res(static_cast<RWMol *>(uncharger.uncharge(mol)));
|
|
return res;
|
|
}
|
|
|
|
std::unique_ptr<RWMol> do_charge_parent(RWMol &mol,
|
|
const std::string &details_json) {
|
|
MolStandardize::CleanupParameters ps =
|
|
MolStandardize::defaultCleanupParameters;
|
|
bool skipStandardize = false;
|
|
if (!details_json.empty()) {
|
|
MolStandardize::updateCleanupParamsFromJSON(ps, details_json);
|
|
boost::property_tree::ptree pt;
|
|
std::istringstream ss;
|
|
ss.str(details_json);
|
|
boost::property_tree::read_json(ss, pt);
|
|
LPT_OPT_GET(skipStandardize);
|
|
}
|
|
std::unique_ptr<RWMol> res(
|
|
MolStandardize::chargeParent(mol, ps, skipStandardize));
|
|
return res;
|
|
}
|
|
|
|
std::unique_ptr<RWMol> do_fragment_parent(RWMol &mol,
|
|
const std::string &details_json) {
|
|
MolStandardize::CleanupParameters ps =
|
|
MolStandardize::defaultCleanupParameters;
|
|
bool skipStandardize = false;
|
|
if (!details_json.empty()) {
|
|
MolStandardize::updateCleanupParamsFromJSON(ps, details_json);
|
|
boost::property_tree::ptree pt;
|
|
std::istringstream ss;
|
|
ss.str(details_json);
|
|
boost::property_tree::read_json(ss, pt);
|
|
LPT_OPT_GET(skipStandardize);
|
|
}
|
|
std::unique_ptr<RWMol> res(
|
|
MolStandardize::fragmentParent(mol, ps, skipStandardize));
|
|
return res;
|
|
}
|
|
|
|
} // namespace MinimalLib
|
|
} // namespace RDKit
|
|
#undef LPT_OPT_GET
|
|
#undef LPT_OPT_GET2 |