Support for a JSON-based molecule interchange format (#1798)

* expose addRing to python

* backup commit

* basics of atom and bond loading

* basics of atom and bond loading

* do bond stereo too

* Loads of cleanups
Read conformers and molecule properties
Better tests
update one of the test values so that we aren't always testing ccw

* enable auto-download of rapidjson distrib

* reader now mostly done (still missing atomic/bond properties)

* a bit of optimization

* a bit of cleanup

* a bit more optimization

* backup

* test zero-order bonds

* prep work for writer (not done)

* add isotope support

* backup

* getting there with the writing

* progress on writing; still need to do the RDKit part though

* can now roundtrip, including chirality.
double bond stereo needs to wait for backend fixes

* add a timing test for benchmarking

* add a timing test for benchmarking

* add another template

* add parse params; optionally set bond types to aromatic; add python wrapper

* disable benchmark run by default

* write conformers

* refactoring

* docs

* port to commonchem

* switch representation

* start reading and writing properties

* fix a memory problem

* set a common_property for gasteiger charges

* parse partial charges

* add partial charge writing

* reformatting

* add support for disabling parts of the parsing

* remove the "name" property from files

* editing

* more post-review changes

* make the molinterchange build optional

* make them really optional
This commit is contained in:
Greg Landrum
2018-04-02 05:12:03 +02:00
committed by GitHub
parent 76fd65efa0
commit 5cf8a6a9b7
21 changed files with 1911 additions and 96 deletions

View File

@@ -43,6 +43,7 @@ option(RDK_USE_STRICT_ROTOR_DEFINITION "Use the most strict rotatable bond defin
option(RDK_BUILD_DESCRIPTORS3D "Build the 3D descriptors calculators, requires Eigen3 to be installed" ON)
option(RDK_BUILD_FREESASA_SUPPORT "build the rdkit freesasa wrapper" OFF )
option(RDK_BUILD_COORDGEN_SUPPORT "build the rdkit coordgen wrapper" ON )
option(RDK_BUILD_MOLINTERCHANGE_SUPPORT "build in support for CommonChem molecule interchange" ON )
if(NOT MSVC)
if(RDK_OPTIMIZE_NATIVE)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mpopcnt")

View File

@@ -81,6 +81,10 @@ add_subdirectory(Trajectory)
add_subdirectory(SubstructLibrary)
add_subdirectory(RGroupDecomposition)
if(RDK_BUILD_MOLINTERCHANGE_SUPPORT)
add_subdirectory(MolInterchange)
endif(RDK_BUILD_MOLINTERCHANGE_SUPPORT)
if(RDK_BUILD_SLN_SUPPORT AND NOT CMAKE_COMPILER_IS_CLANG)
if(CMAKE_COMPILER_IS_GNUCC AND NOT CMAKE_CXX_COMPILER_VERSION GREATER 4.8)
message("Skipping SLN build with gcc version <= 4.8")

View File

@@ -0,0 +1,25 @@
if(NOT EXISTS "${CMAKE_SOURCE_DIR}/External/rapidjson-1.1.0")
downloadAndCheckMD5("https://github.com/Tencent/rapidjson/archive/v1.1.0.tar.gz"
"${CMAKE_SOURCE_DIR}/External/rapidjson-1.1.0.tar.gz"
"badd12c511e081fec6c89c43a7027bce")
execute_process(COMMAND ${CMAKE_COMMAND} -E tar zxf
${CMAKE_SOURCE_DIR}/External/rapidjson-1.1.0.tar.gz
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/External)
else()
message("-- Found RapidJSON source in ${CMAKE_SOURCE_DIR}/External")
endif()
include_directories(${CMAKE_SOURCE_DIR}/External/rapidjson-1.1.0/include)
rdkit_library(MolInterchange
Parser.cpp Writer.cpp
LINK_LIBRARIES GraphMol)
rdkit_headers(MolInterchange.h details.h
DEST GraphMol/MolInterchange)
rdkit_test(molInterchangeTest1 test1.cpp
LINK_LIBRARIES MolInterchange FileParsers SmilesParse GraphMol RDGeneral)
add_subdirectory(Wrap)

View File

@@ -0,0 +1,88 @@
//
// Copyright (C) 2018 Greg Landrum
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//
#ifndef RD_MOLINTERCHANGE_H_JAN2018
#define RD_MOLINTERCHANGE_H_JAN2018
/*! \file MolInterchange.h
\brief Contains the public API for the convertors to/from the commonchem
interchange format
\b Note that this should be considered beta and that the format and API
will very likely change in future releases.
More information about CommonChem is available here:
https://github.com/mcs07/CommonChem
*/
#include <string>
#include <iostream>
#include <vector>
#include <boost/shared_ptr.hpp>
namespace RDKit {
class RWMol;
namespace MolInterchange {
// \brief parameters controlling parsing of MolJSON
struct JSONParseParameters {
bool setAromaticBonds =
true; /*! toggles setting the BondType of aromatic bonds to Aromatic */
bool strictValenceCheck =
false; /*! toggles doing reasonable valence checks */
bool parseProperties =
true; /*! toggles extracting molecular properties from the JSON block */
bool parseConformers =
true; /*! toggles extracting conformers from the JSON block */
};
static JSONParseParameters defaultJSONParseParameters;
// \brief construct molecules from MolJSON data in a stream
/*!
* \param inStream - stream containing the data
* \param params - parsing options
*/
std::vector<boost::shared_ptr<ROMol>> JSONDataStreamToMols(
std::istream *inStream,
const JSONParseParameters &params = defaultJSONParseParameters);
// \brief construct molecules from MolJSON data
/*!
* \param jsonBlock - string containing the mol block
* \param params - parsing options
*/
std::vector<boost::shared_ptr<ROMol>> JSONDataToMols(
const std::string &jsonBlock,
const JSONParseParameters &params = defaultJSONParseParameters);
// \brief returns MolJSON for a set of molecules
/*!
* \param mols - the molecules to work with
*/
template <typename T>
std::string MolsToJSONData(const std::vector<T> &mols);
// \brief returns MolJSON for a molecule
/*!
* \param mol - the molecule to work with
*/
template <typename T>
std::string MolToJSONData(const T &mol) {
std::vector<const T *> ms{&mol};
return MolsToJSONData(ms);
};
} // end of namespace MolInterchange
} // end of namespace RDKit
#endif

View File

@@ -0,0 +1,469 @@
//
// Copyright (C) 2018 Greg Landrum
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//
#ifdef _MSC_VER
#pragma warning(disable : 4503)
#endif
#include <RDGeneral/Invariant.h>
#include <RDGeneral/versions.h>
#include <GraphMol/RDKitBase.h>
#include <GraphMol/MolInterchange/MolInterchange.h>
#include <GraphMol/MolInterchange/details.h>
#include <RDGeneral/FileParseException.h>
#include <sstream>
#include <exception>
#include <map>
#include <rapidjson/document.h>
#include <rapidjson/istreamwrapper.h>
#include <rapidjson/stringbuffer.h>
#include <rapidjson/writer.h>
#include <rapidjson/pointer.h>
namespace rj = rapidjson;
namespace RDKit {
namespace MolInterchange {
namespace {
struct DefaultValueCache {
DefaultValueCache(const rj::Value &defs) : rjDefaults(defs){};
const rj::Value &rjDefaults;
mutable std::map<const char *, int> intMap;
mutable std::map<const char *, bool> boolMap;
mutable std::map<const char *, std::string> stringMap;
int getInt(const char *key) const {
PRECONDITION(key, "no key");
const auto &lookup = intMap.find(key);
if (lookup != intMap.end()) return lookup->second;
const auto &miter = rjDefaults.FindMember(key);
if (miter != rjDefaults.MemberEnd()) {
if (!miter->value.IsInt())
throw FileParseException(std::string("Bad format: value of ") +
std::string(key) +
std::string(" is not an int"));
int res = miter->value.GetInt();
intMap[key] = res;
return res;
}
return 0;
}
bool getBool(const char *key) const {
PRECONDITION(key, "no key");
const auto &lookup = boolMap.find(key);
if (lookup != boolMap.end()) return lookup->second;
const auto &miter = rjDefaults.FindMember(key);
if (miter != rjDefaults.MemberEnd()) {
if (!miter->value.IsBool())
throw FileParseException(std::string("Bad format: value of ") +
std::string(key) +
std::string(" is not a bool"));
bool res = miter->value.GetBool();
boolMap[key] = res;
return res;
}
return false;
}
std::string getString(const char *key) const {
PRECONDITION(key, "no key");
const auto &lookup = stringMap.find(key);
if (lookup != stringMap.end()) return lookup->second;
const auto &miter = rjDefaults.FindMember(key);
if (miter != rjDefaults.MemberEnd()) {
if (!miter->value.IsString())
throw FileParseException(std::string("Bad format: value of ") +
std::string(key) +
std::string(" is not a string"));
std::string res = miter->value.GetString();
stringMap[key] = res;
return res;
}
return "";
}
};
int getIntDefaultValue(const char *key, const rj::Value &from,
const DefaultValueCache &defaults) {
PRECONDITION(key, "no key");
auto endp = from.MemberEnd();
auto miter = from.FindMember(key);
if (miter != endp) {
if (!miter->value.IsInt())
throw FileParseException(std::string("Bad format: value of ") +
std::string(key) +
std::string(" is not an int"));
return miter->value.GetInt();
}
return defaults.getInt(key);
}
bool getBoolDefaultValue(const char *key, const rj::Value &from,
const DefaultValueCache &defaults) {
PRECONDITION(key, "no key");
auto endp = from.MemberEnd();
auto miter = from.FindMember(key);
if (miter != endp) {
if (!miter->value.IsBool())
throw FileParseException(std::string("Bad format: value of ") +
std::string(key) +
std::string(" is not a bool"));
return miter->value.GetBool();
}
return defaults.getBool(key);
}
std::string getStringDefaultValue(const char *key, const rj::Value &from,
const DefaultValueCache &defaults) {
PRECONDITION(key, "no key");
auto endp = from.MemberEnd();
auto miter = from.FindMember(key);
if (miter != endp) {
if (!miter->value.IsString())
throw FileParseException(std::string("Bad format: value of ") +
std::string(key) +
std::string(" is not a string"));
return miter->value.GetString();
}
return defaults.getString(key);
}
void readAtom(RWMol *mol, const rj::Value &atomVal,
const DefaultValueCache &atomDefaults) {
PRECONDITION(mol, "no mol");
Atom *at = new Atom(getIntDefaultValue("z", atomVal, atomDefaults));
at->setNoImplicit(true);
at->setNumExplicitHs(getIntDefaultValue("impHs", atomVal, atomDefaults));
at->setFormalCharge(getIntDefaultValue("chg", atomVal, atomDefaults));
at->setNumRadicalElectrons(getIntDefaultValue("nRad", atomVal, atomDefaults));
at->setIsotope(getIntDefaultValue("isotope", atomVal, atomDefaults));
std::string stereo = getStringDefaultValue("stereo", atomVal, atomDefaults);
if (chilookup.find(stereo) == chilookup.end())
throw FileParseException("Bad Format: bad stereo value for atom");
at->setChiralTag(chilookup.find(stereo)->second);
bool updateLabel = false, takeOwnership = true;
mol->addAtom(at, updateLabel, takeOwnership);
}
void readBond(RWMol *mol, const rj::Value &bondVal,
const DefaultValueCache &bondDefaults, bool &needStereoLoop) {
PRECONDITION(mol, "no mol");
const auto &aids = bondVal["atoms"].GetArray();
unsigned int bid = mol->addBond(aids[0].GetInt(), aids[1].GetInt()) - 1;
Bond *bnd = mol->getBondWithIdx(bid);
unsigned int bo = getIntDefaultValue("bo", bondVal, bondDefaults);
if (bolookup.find(bo) == bolookup.end())
throw FileParseException("Bad Format: bad bond order for bond");
bnd->setBondType(bolookup.find(bo)->second);
if (bondVal.HasMember("stereoAtoms")) needStereoLoop = true;
}
void readBondStereo(Bond *bnd, const rj::Value &bondVal,
const DefaultValueCache &bondDefaults) {
PRECONDITION(bnd, "no bond");
const auto &miter = bondVal.FindMember("stereoAtoms");
if (miter != bondVal.MemberEnd()) {
const auto aids = miter->value.GetArray();
bnd->setStereoAtoms(aids[0].GetInt(), aids[1].GetInt());
std::string stereo = getStringDefaultValue("stereo", bondVal, bondDefaults);
if (stereolookup.find(stereo) == stereolookup.end())
throw FileParseException("Bad Format: bond stereo value for bond");
bnd->setStereo(stereolookup.find(stereo)->second);
} else {
if (bondVal.HasMember("stereo")) {
throw FileParseException(
"Bad Format: bond stereo provided without stereoAtoms");
}
}
}
void readConformer(Conformer *conf, const rj::Value &confVal) {
PRECONDITION(conf, "no conformer");
if (!confVal.HasMember("dim"))
throw FileParseException("Bad Format: no conformer dimension");
size_t dim = confVal["dim"].GetInt();
if (dim == 2)
conf->set3D(false);
else if (dim == 3)
conf->set3D(true);
else
throw FileParseException("Bad Format: conformer dimension != 2 or 3");
if (!confVal.HasMember("coords"))
throw FileParseException("Bad Format: no conformer coords");
size_t idx = 0;
for (const auto &ptVal : confVal["coords"].GetArray()) {
const auto &arr = ptVal.GetArray();
if (arr.Size() != dim)
throw FileParseException("coordinate contains wrong number of values");
RDGeom::Point3D pt(arr[0].GetFloat(), arr[1].GetFloat(),
(dim == 3 ? arr[2].GetFloat() : 0.0));
conf->setAtomPos(idx++, pt);
}
if (idx != conf->getNumAtoms())
throw FileParseException(
"Bad Format: conformer doesn't contain coordinates for all atoms");
}
void readPartialCharges(RWMol *mol, const rj::Value &repVal,
const JSONParseParameters &params) {
PRECONDITION(mol, "no molecule");
PRECONDITION(repVal["name"].GetString() == std::string("partialCharges"),
"bad charges");
if (!repVal.HasMember("formatVersion"))
throw FileParseException("Bad Format: missing version");
if (repVal["version"].GetInt() > currentChargeRepresentationVersion) {
BOOST_LOG(rdWarningLog) << "partialCharges version "
<< repVal["formatVersion"].GetInt()
<< " too recent. Ignoring it." << std::endl;
return;
}
{
const auto &miter = repVal.FindMember("values");
if (miter != repVal.MemberEnd()) {
if (miter->value.GetArray().Size() != mol->getNumAtoms())
throw FileParseException(
"Bad Format: size of values array != num atoms");
for (unsigned int idx = 0; idx != mol->getNumAtoms(); ++idx) {
const auto &val = miter->value.GetArray()[idx];
if (!val.IsDouble())
throw FileParseException("Bad Format: partial charge not double");
mol->getAtomWithIdx(idx)->setProp(common_properties::_GasteigerCharge,
val.GetDouble());
}
}
}
}
void readRDKitRepresentation(RWMol *mol, const rj::Value &repVal,
const JSONParseParameters &params) {
PRECONDITION(mol, "no molecule");
PRECONDITION(
repVal["name"].GetString() == std::string("rdkitRepresentation"),
"bad representation");
if (!repVal.HasMember("formatVersion"))
throw FileParseException("Bad Format: missing format_version");
if (repVal["format_version"].GetInt() > 1) {
BOOST_LOG(rdWarningLog) << "RDKit representation format version "
<< repVal["formatVersion"].GetInt()
<< " too recent. Ignoring it." << std::endl;
return;
}
{
const auto &miter = repVal.FindMember("aromaticAtoms");
if (miter != repVal.MemberEnd()) {
for (const auto &val : miter->value.GetArray()) {
if (!val.IsInt())
throw FileParseException("Bad Format: aromaticAtom not int");
mol->getAtomWithIdx(val.GetInt())->setIsAromatic(true);
}
}
}
{
const auto &miter = repVal.FindMember("aromaticBonds");
if (miter != repVal.MemberEnd()) {
for (const auto &val : miter->value.GetArray()) {
if (!val.IsInt())
throw FileParseException("Bad Format: aromaticBond not int");
mol->getBondWithIdx(val.GetInt())->setIsAromatic(true);
if (params.setAromaticBonds) {
mol->getBondWithIdx(val.GetInt())->setBondType(Bond::AROMATIC);
}
}
}
}
{
const auto &miter = repVal.FindMember("cipRanks");
if (miter != repVal.MemberEnd()) {
size_t i = 0;
for (const auto &val : miter->value.GetArray()) {
if (!val.IsInt())
throw FileParseException("Bad Format: ciprank not int");
mol->getAtomWithIdx(i++)->setProp(common_properties::_CIPRank,
val.GetInt());
}
}
}
{
const auto &miter = repVal.FindMember("cipCodes");
if (miter != repVal.MemberEnd()) {
for (const auto &val : miter->value.GetArray()) {
if (!val.IsArray())
throw FileParseException("Bad Format: CIPCode not string");
mol->getAtomWithIdx(val[0].GetInt())
->setProp(common_properties::_CIPCode, val[1].GetString());
}
}
}
{
const auto &miter = repVal.FindMember("atomRings");
if (miter != repVal.MemberEnd()) {
CHECK_INVARIANT(!mol->getRingInfo()->isInitialized(),
"rings already initialized");
auto ri = mol->getRingInfo();
ri->initialize();
for (const auto &val : miter->value.GetArray()) {
if (!val.IsArray())
throw FileParseException("Bad Format: atomRing not array");
INT_VECT atomRing;
INT_VECT bondRing;
size_t sz = val.Size();
atomRing.reserve(sz);
bondRing.reserve(sz);
for (size_t i = 0; i < sz - 1; ++i) {
int idx1 = val[i].GetInt();
int idx2 = val[i + 1].GetInt();
atomRing.push_back(idx1);
const auto &bnd = mol->getBondBetweenAtoms(idx1, idx2);
CHECK_INVARIANT(bnd, "no bond found for ring");
bondRing.push_back(bnd->getIdx());
}
int idx1 = val[sz - 1].GetInt();
int idx2 = val[0].GetInt();
atomRing.push_back(idx1);
const auto &bnd = mol->getBondBetweenAtoms(idx1, idx2);
CHECK_INVARIANT(bnd, "no bond found for ring");
bondRing.push_back(bnd->getIdx());
ri->addRing(atomRing, bondRing);
}
}
}
}
void processMol(RWMol *mol, const rj::Value &molval,
const DefaultValueCache &atomDefaults,
const DefaultValueCache &bondDefaults,
const JSONParseParameters &params) {
if (molval.HasMember("name")) {
mol->setProp(common_properties::_Name, molval["name"].GetString());
}
if (!molval.HasMember("atoms"))
throw FileParseException("Bad Format: missing atoms in JSON");
if (!molval.HasMember("bonds"))
throw FileParseException("Bad Format: missing bonds in JSON");
for (const auto &atomVal : molval["atoms"].GetArray()) {
readAtom(mol, atomVal, atomDefaults);
}
bool needStereoLoop = false;
for (const auto &bondVal : molval["bonds"].GetArray()) {
readBond(mol, bondVal, bondDefaults, needStereoLoop);
}
if (needStereoLoop) {
// need to set bond stereo after the bonds are there
unsigned int bidx = 0;
for (const auto &bondVal : molval["bonds"].GetArray()) {
Bond *bnd = mol->getBondWithIdx(bidx++);
readBondStereo(bnd, bondVal, bondDefaults);
}
}
if (params.parseConformers && molval.HasMember("conformers")) {
for (const auto &confVal : molval["conformers"].GetArray()) {
Conformer *conf = new Conformer(mol->getNumAtoms());
readConformer(conf, confVal);
mol->addConformer(conf, true);
}
}
if (params.parseProperties && molval.HasMember("properties")) {
for (const auto &propVal : molval["properties"].GetObject()) {
if (propVal.value.IsInt())
mol->setProp(propVal.name.GetString(), propVal.value.GetInt());
else if (propVal.value.IsDouble())
mol->setProp(propVal.name.GetString(), propVal.value.GetDouble());
else if (propVal.value.IsString())
mol->setProp(propVal.name.GetString(), propVal.value.GetString());
}
}
if (molval.HasMember("extensions")) {
for (const auto &propVal : molval["extensions"].GetArray()) {
if (!propVal.HasMember("name"))
throw FileParseException(
"Bad Format: representation has no name member");
if (propVal["name"].GetString() == std::string("rdkitRepresentation")) {
readRDKitRepresentation(mol, propVal, params);
}
if (propVal["name"].GetString() == std::string("partialCharges")) {
readPartialCharges(mol, propVal, params);
}
}
}
mol->updatePropertyCache(false);
mol->setProp(common_properties::_StereochemDone, 1);
}
std::vector<boost::shared_ptr<ROMol>> DocToMols(
rj::Document &doc, const JSONParseParameters &params) {
std::vector<boost::shared_ptr<ROMol>> res;
// some error checking
if (!doc.IsObject())
throw FileParseException("Bad Format: JSON should be an object");
if (!doc.HasMember("commonchem"))
throw FileParseException("Bad Format: missing header in JSON");
if (!doc["commonchem"].HasMember("version"))
throw FileParseException("Bad Format: missing version in JSON");
if (doc["commonchem"]["version"].GetInt() != currentMolJSONVersion)
throw FileParseException("Bad Format: bad version in JSON");
rj::Value atomDefaults_;
if (rj::GetValueByPointer(doc, "/defaults/atom")) {
atomDefaults_ = *rj::GetValueByPointer(doc, "/defaults/atom");
if (!atomDefaults_.IsObject())
throw FileParseException("Bad Format: atomDefaults is not an object");
}
const DefaultValueCache atomDefaults(atomDefaults_);
rj::Value bondDefaults_;
if (rj::GetValueByPointer(doc, "/defaults/bond")) {
bondDefaults_ = *rj::GetValueByPointer(doc, "/defaults/bond");
if (!bondDefaults_.IsObject())
throw FileParseException("Bad Format: bondDefaults is not an object");
}
const DefaultValueCache bondDefaults(bondDefaults_);
if (doc.HasMember("molecules")) {
if (!doc["molecules"].IsArray())
throw FileParseException("Bad Format: molecules is not an array");
for (const auto &molval : doc["molecules"].GetArray()) {
RWMol *mol = new RWMol();
processMol(mol, molval, atomDefaults, bondDefaults, params);
mol->updatePropertyCache(params.strictValenceCheck);
mol->setProp(common_properties::_StereochemDone, 1);
res.push_back(boost::shared_ptr<ROMol>(static_cast<ROMol *>(mol)));
}
}
return res;
}
} // end of anonymous namespace
std::vector<boost::shared_ptr<ROMol>> JSONDataStreamToMols(
std::istream *inStream, const JSONParseParameters &params) {
PRECONDITION(inStream, "no stream");
rj::IStreamWrapper isw(*inStream);
rj::Document doc;
doc.ParseStream(isw);
return DocToMols(doc, params);
}
std::vector<boost::shared_ptr<ROMol>> JSONDataToMols(
const std::string &jsonBlock, const JSONParseParameters &params) {
rj::Document doc;
doc.Parse(jsonBlock.c_str());
return DocToMols(doc, params);
}
} // end of namespace MolInterchange
} // end of namespace RDKit

View File

@@ -0,0 +1,6 @@
rdkit_python_extension(rdMolInterchange
rdMolInterchange.cpp
DEST Chem
LINK_LIBRARIES MolInterchange GraphMol RDBoost)
add_pytest(pyMolInterchange ${CMAKE_CURRENT_SOURCE_DIR}/testMolInterchange.py)

View File

@@ -0,0 +1,100 @@
//
// Copyright (C) 2018 Greg Landrum
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//
#include <RDBoost/python.h>
#include <GraphMol/GraphMol.h>
#include <RDBoost/Wrap.h>
#include <GraphMol/MolInterchange/MolInterchange.h>
namespace python = boost::python;
namespace {
python::tuple JSONToMols(const std::string &jsonBlock,
python::object pyparams) {
RDKit::MolInterchange::JSONParseParameters params;
if (pyparams) {
params =
python::extract<RDKit::MolInterchange::JSONParseParameters>(pyparams);
} else {
params = RDKit::MolInterchange::defaultJSONParseParameters;
}
auto mols = RDKit::MolInterchange::JSONDataToMols(jsonBlock, params);
python::list result;
for (auto &mol : mols) {
result.append(mol);
}
return python::tuple(result);
}
std::string MolsToJSON(const python::object &mols) {
auto pymols = pythonObjectToVect<const RDKit::ROMol *>(mols);
return RDKit::MolInterchange::MolsToJSONData(*pymols);
}
}
BOOST_PYTHON_MODULE(rdMolInterchange) {
python::scope().attr("__doc__") =
"Module containing functions for interchange of molecules.\n"
"Note that this should be considered beta and that the format\n"
" and API will very likely change in future releases.";
python::class_<RDKit::MolInterchange::JSONParseParameters,
boost::noncopyable>("JSONParseParameters",
"Paramters controlling the JSON parser")
.def_readwrite(
"setAromaticBonds",
&RDKit::MolInterchange::JSONParseParameters::setAromaticBonds,
"set bond types to aromatic for bonds flagged aromatic")
.def_readwrite(
"strictValenceCheck",
&RDKit::MolInterchange::JSONParseParameters::strictValenceCheck,
"be strict when checking atom valences")
.def_readwrite(
"parseConformers",
&RDKit::MolInterchange::JSONParseParameters::parseConformers,
"parse conformers in the JSON")
.def_readwrite(
"parseProperties",
&RDKit::MolInterchange::JSONParseParameters::parseProperties,
"parse molecular properties in the JSON");
std::string docString;
docString =
"Convert a single molecule to JSON\n\
\n\
ARGUMENTS:\n\
- mol: the molecule to work with\n\
RETURNS:\n\
a string\n";
python::def("MolToJSON", (std::string(*)(const RDKit::ROMol &))
RDKit::MolInterchange::MolToJSONData,
(python::arg("mol")), docString.c_str());
docString =
"Convert a set of molecules to JSON\n\
\n\
ARGUMENTS:\n\
- mols: the molecules to work with\n\
RETURNS:\n\
a string\n";
python::def("MolsToJSON", MolsToJSON, (python::arg("mols")),
docString.c_str());
docString =
"Convert JSON to a tuple of molecules\n\
\n\
ARGUMENTS:\n\
- jsonBlock: the molecule to work with\n\
- params: (optional) JSONParseParameters controlling the JSON parsing\n\
RETURNS:\n\
a tuple of Mols\n";
python::def(
"JSONToMols", JSONToMols,
(python::arg("jsonBlock"), python::arg("params") = python::object()),
docString.c_str());
}

View File

@@ -0,0 +1,31 @@
import unittest
from rdkit import Chem
from rdkit.Chem import rdMolInterchange
from rdkit import RDConfig
class TestCase(unittest.TestCase):
def setUp(self):
pass
def test1(self):
smis = ('c1ccccc1','C[C@H](F)Cl')
for smi in smis:
m = Chem.MolFromSmiles(smi)
csmi = Chem.MolToSmiles(m)
json = rdMolInterchange.MolToJSON(m)
nms = rdMolInterchange.JSONToMols(json)
self.assertEqual(len(nms),1)
smi2 = Chem.MolToSmiles(nms[0])
self.assertEqual(csmi,smi2)
ms = [Chem.MolFromSmiles(smi) for smi in smis]
json = rdMolInterchange.MolsToJSON(ms)
nms = rdMolInterchange.JSONToMols(json)
self.assertEqual(len(ms),len(nms))
self.assertEqual([Chem.MolToSmiles(x) for x in ms],[Chem.MolToSmiles(x) for x in nms])
if __name__ == '__main__':
unittest.main()

View File

@@ -0,0 +1,382 @@
//
// Copyright (C) 2018 Greg Landrum
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//
#ifdef _MSC_VER
#pragma warning(disable : 4503)
#endif
#include <RDGeneral/Invariant.h>
#include <RDGeneral/versions.h>
#include <GraphMol/RDKitBase.h>
#include <GraphMol/MolInterchange/MolInterchange.h>
#include <GraphMol/MolInterchange/details.h>
#include <RDGeneral/FileParseException.h>
#include <sstream>
#include <exception>
#include <map>
#include <rapidjson/document.h>
#include <rapidjson/stringbuffer.h>
#include <rapidjson/writer.h>
#include "rapidjson/pointer.h"
namespace rj = rapidjson;
namespace RDKit {
namespace MolInterchange {
namespace {
void initAtomDefaults(rj::Value &rjDefaults, rj::Document &document) {
// "atomDefaults": {"Z": 6, "impHs": 0, "chg": 0, "stereo": "unspecified",
// "nrad": 0, "isotope": 0},
rjDefaults.AddMember("z", 6, document.GetAllocator());
rjDefaults.AddMember("impHs", 0, document.GetAllocator());
rjDefaults.AddMember("chg", 0, document.GetAllocator());
rjDefaults.AddMember("nRad", 0, document.GetAllocator());
rjDefaults.AddMember("isotope", 0, document.GetAllocator());
rjDefaults.AddMember("stereo", "unspecified", document.GetAllocator());
}
void initBondDefaults(rj::Value &rjDefaults, rj::Document &document) {
// "bondDefaults": {"bo": 1, "stereo": "unspecified", "stereoAtoms": []},
rjDefaults.AddMember("bo", 1, document.GetAllocator());
rjDefaults.AddMember("stereo", "unspecified", document.GetAllocator());
}
void initHeader(rj::Value &rjHeader, rj::Document &document) {
rjHeader.AddMember("version", currentMolJSONVersion, document.GetAllocator());
}
void addIntVal(rj::Value &dest, const rj::Value &defaults, const char *tag,
int val, rj::Document &doc) {
const auto srt = rj::StringRef(tag);
const auto &miter = defaults.FindMember(srt);
if (miter != defaults.MemberEnd()) {
int dval = miter->value.GetInt();
if (dval != val) {
dest.AddMember(srt, val, doc.GetAllocator());
}
} else {
dest.AddMember(srt, val, doc.GetAllocator());
}
}
void addStringVal(rj::Value &dest, const rj::Value &defaults, const char *tag,
const std::string &val, rj::Document &doc) {
rj::Value nmv;
nmv.SetString(val.c_str(), val.size(), doc.GetAllocator());
const auto srt = rj::StringRef(tag);
const auto &miter = defaults.FindMember(srt);
if (miter != defaults.MemberEnd()) {
std::string dval = miter->value.GetString();
if (val.size() && dval != val) {
dest.AddMember(srt, nmv, doc.GetAllocator());
}
} else {
dest.AddMember(srt, nmv, doc.GetAllocator());
}
}
void addAtom(const Atom &atom, rj::Value &rjAtom, rj::Document &doc,
const rj::Value &rjDefaults) {
addIntVal(rjAtom, rjDefaults, "z", atom.getAtomicNum(), doc);
addIntVal(rjAtom, rjDefaults, "impHs", atom.getTotalNumHs(), doc);
addIntVal(rjAtom, rjDefaults, "chg", atom.getFormalCharge(), doc);
addIntVal(rjAtom, rjDefaults, "isotope", atom.getIsotope(), doc);
addIntVal(rjAtom, rjDefaults, "nRad", atom.getNumRadicalElectrons(), doc);
std::string chi = "";
if (inv_chilookup.find(atom.getChiralTag()) != inv_chilookup.end()) {
chi = inv_chilookup.find(atom.getChiralTag())->second;
} else {
BOOST_LOG(rdWarningLog)
<< " unrecognized atom chirality set to default while writing"
<< std::endl;
}
addStringVal(rjAtom, rjDefaults, "stereo", chi, doc);
}
void addBond(const Bond &bond, rj::Value &rjBond, rj::Document &doc,
const rj::Value &rjDefaults) {
int bo = 0;
if (inv_bolookup.find(bond.getBondType()) != inv_bolookup.end()) {
bo = inv_bolookup.find(bond.getBondType())->second;
} else {
BOOST_LOG(rdWarningLog)
<< " unrecognized bond type set to zero while writing" << std::endl;
}
addIntVal(rjBond, rjDefaults, "bo", bo, doc);
rj::Value rjAtoms(rj::kArrayType);
rj::Value v1(static_cast<int>(bond.getBeginAtomIdx()));
rj::Value v2(static_cast<int>(bond.getEndAtomIdx()));
rjAtoms.PushBack(v1, doc.GetAllocator());
rjAtoms.PushBack(v2, doc.GetAllocator());
rjBond.AddMember("atoms", rjAtoms, doc.GetAllocator());
std::string chi = "";
if (inv_stereolookup.find(bond.getStereo()) != inv_stereolookup.end()) {
chi = inv_stereolookup.find(bond.getStereo())->second;
} else {
BOOST_LOG(rdWarningLog) << " unrecognized bond stereo " << bond.getStereo()
<< " set to default while writing" << std::endl;
}
addStringVal(rjBond, rjDefaults, "stereo", chi, doc);
if (chi != "unspecified" && bond.getStereoAtoms().size() == 2) {
rj::Value rjStereoAtoms(rj::kArrayType);
rj::Value v1(static_cast<int>(bond.getStereoAtoms()[0]));
rj::Value v2(static_cast<int>(bond.getStereoAtoms()[1]));
rjStereoAtoms.PushBack(v1, doc.GetAllocator());
rjStereoAtoms.PushBack(v2, doc.GetAllocator());
rjBond.AddMember("stereoAtoms", rjStereoAtoms, doc.GetAllocator());
}
}
void addConformer(const Conformer &conf, rj::Value &rjConf, rj::Document &doc) {
int dim = 2;
if (conf.is3D()) {
dim = 3;
}
rjConf.AddMember("dim", dim, doc.GetAllocator());
rj::Value rjCoords(rj::kArrayType);
for (const auto &pos : conf.getPositions()) {
rj::Value rjPos(rj::kArrayType);
rjPos.PushBack(pos.x, doc.GetAllocator());
rjPos.PushBack(pos.y, doc.GetAllocator());
if (dim == 3) {
rjPos.PushBack(pos.z, doc.GetAllocator());
}
rjCoords.PushBack(rjPos, doc.GetAllocator());
}
rjConf.AddMember("coords", rjCoords, doc.GetAllocator());
}
template <typename T>
void addMol(const T &imol, rj::Value &rjMol, rj::Document &doc,
const rj::Value &atomDefaults, const rj::Value &bondDefaults) {
RWMol mol(imol);
MolOps::Kekulize(mol, false);
if (mol.hasProp(common_properties::_Name)) {
rj::Value nmv;
const std::string &nm =
mol.getProp<std::string>(common_properties::_Name).c_str();
nmv.SetString(nm.c_str(), nm.size(), doc.GetAllocator());
rjMol.AddMember("name", nmv, doc.GetAllocator());
}
rj::Value rjAtoms(rj::kArrayType);
for (const auto &at : mol.atoms()) {
rj::Value rjAtom(rj::kObjectType);
addAtom(*at, rjAtom, doc, atomDefaults);
rjAtoms.PushBack(rjAtom, doc.GetAllocator());
}
rjMol.AddMember("atoms", rjAtoms, doc.GetAllocator());
rj::Value rjBonds(rj::kArrayType);
for (const auto &bnd : mol.bonds()) {
rj::Value rjBond(rj::kObjectType);
addBond(*bnd, rjBond, doc, bondDefaults);
rjBonds.PushBack(rjBond, doc.GetAllocator());
}
rjMol.AddMember("bonds", rjBonds, doc.GetAllocator());
if (mol.getNumConformers()) {
rj::Value rjConfs(rj::kArrayType);
for (auto conf = mol.beginConformers(); conf != mol.endConformers();
++conf) {
rj::Value rjConf(rj::kObjectType);
addConformer(*(conf->get()), rjConf, doc);
rjConfs.PushBack(rjConf, doc.GetAllocator());
}
rjMol.AddMember("conformers", rjConfs, doc.GetAllocator());
}
bool includePrivate = false, includeComputed = false;
auto propNames = mol.getPropList(includePrivate, includeComputed);
if (propNames.size()) {
rj::Value properties(rj::kObjectType);
for (const auto &pN : propNames) {
rj::Value rjv;
try {
auto val = mol.getProp<int>(pN);
rjv = val;
} catch (const boost::bad_any_cast &) {
try {
auto val = mol.getProp<double>(pN);
rjv = val;
} catch (const boost::bad_any_cast &) {
auto val = mol.getProp<std::string>(pN);
rjv.SetString(val.c_str(), val.size(), doc.GetAllocator());
}
}
rj::Value rjpN;
rjpN.SetString(pN.c_str(), pN.size(), doc.GetAllocator());
properties.AddMember(rjpN, rjv, doc.GetAllocator());
}
rjMol.AddMember("properties", properties, doc.GetAllocator());
}
rj::Value representation(rj::kObjectType);
representation.AddMember("name", "rdkitRepresentation", doc.GetAllocator());
representation.AddMember("formatVersion", currentRDKitRepresentationVersion,
doc.GetAllocator());
rj::Value toolkitVersion;
toolkitVersion.SetString(rj::StringRef(rdkitVersion));
representation.AddMember("toolkitVersion", toolkitVersion,
doc.GetAllocator());
bool hasArom = false;
for (const auto &atom : mol.atoms()) {
if (atom->getIsAromatic()) {
hasArom = true;
break;
}
}
if (hasArom) {
{
rj::Value rjArr(rj::kArrayType);
for (const auto &atom : mol.atoms()) {
if (atom->getIsAromatic()) {
rjArr.PushBack(atom->getIdx(), doc.GetAllocator());
}
}
representation.AddMember("aromaticAtoms", rjArr, doc.GetAllocator());
}
{
rj::Value rjArr(rj::kArrayType);
for (const auto &bond : mol.bonds()) {
if (bond->getIsAromatic()) {
rjArr.PushBack(bond->getIdx(), doc.GetAllocator());
}
}
representation.AddMember("aromaticBonds", rjArr, doc.GetAllocator());
}
}
{
rj::Value rjArr(rj::kArrayType);
if (mol.getAtomWithIdx(0)->hasProp(common_properties::_CIPRank)) {
for (const auto &atom : mol.atoms()) {
rjArr.PushBack(atom->getProp<unsigned int>(common_properties::_CIPRank),
doc.GetAllocator());
}
}
if (rjArr.Size()) {
representation.AddMember("cipRanks", rjArr, doc.GetAllocator());
}
}
{
rj::Value rjArr(rj::kArrayType);
for (const auto &atom : mol.atoms()) {
std::string cip;
if (atom->getPropIfPresent(common_properties::_CIPCode, cip)) {
rj::Value cipv;
cipv.SetString(cip.c_str(), cip.size(), doc.GetAllocator());
rj::Value rjElement(rj::kArrayType);
rjElement.PushBack(rj::Value(atom->getIdx()), doc.GetAllocator());
rjElement.PushBack(cipv, doc.GetAllocator());
rjArr.PushBack(rjElement, doc.GetAllocator());
}
}
if (rjArr.Size()) {
representation.AddMember("cipCodes", rjArr, doc.GetAllocator());
}
}
if (mol.getRingInfo()->numRings()) {
{
rj::Value rjArr(rj::kArrayType);
for (const auto &ring : mol.getRingInfo()->atomRings()) {
rj::Value rjRing(rj::kArrayType);
for (const auto &ai : ring) {
rjRing.PushBack(ai, doc.GetAllocator());
}
rjArr.PushBack(rjRing, doc.GetAllocator());
}
representation.AddMember("atomRings", rjArr, doc.GetAllocator());
}
}
rj::Value rjReprs(rj::kArrayType);
rjReprs.PushBack(representation, doc.GetAllocator());
if (mol.getAtomWithIdx(0)->hasProp(common_properties::_GasteigerCharge)) {
rj::Value representation(rj::kObjectType);
representation.AddMember("name", "partialCharges", doc.GetAllocator());
representation.AddMember("generator", "RDKit", doc.GetAllocator());
representation.AddMember("formatVersion",
currentChargeRepresentationVersion,
doc.GetAllocator());
rj::Value toolkitVersion;
toolkitVersion.SetString(rj::StringRef(rdkitVersion));
representation.AddMember("generatorVersion", toolkitVersion,
doc.GetAllocator());
rj::Value rjArr(rj::kArrayType);
for (const auto &at : mol.atoms()) {
rj::Value rjval;
if (at->hasProp(common_properties::_GasteigerCharge)) {
rjval = at->getProp<double>(common_properties::_GasteigerCharge);
} else {
rjval = 0.0;
}
rjArr.PushBack(rjval, doc.GetAllocator());
}
representation.AddMember("values", rjArr, doc.GetAllocator());
rjReprs.PushBack(representation, doc.GetAllocator());
}
rjMol.AddMember("extensions", rjReprs, doc.GetAllocator());
}
} // end of anonymous namespace
template <typename T>
std::string MolsToJSONData(const std::vector<T> &mols) {
std::string res = "";
rj::Document doc;
doc.SetObject();
rj::Value header(rj::kObjectType);
initHeader(header, doc);
doc.AddMember("commonchem", header, doc.GetAllocator());
rj::Value defaults(rj::kObjectType);
rj::Value atomDefaults(rj::kObjectType);
initAtomDefaults(atomDefaults, doc);
defaults.AddMember("atom", atomDefaults, doc.GetAllocator());
rj::Value bondDefaults(rj::kObjectType);
initBondDefaults(bondDefaults, doc);
defaults.AddMember("bond", bondDefaults, doc.GetAllocator());
doc.AddMember("defaults", defaults, doc.GetAllocator());
rj::Value rjMols(rj::kArrayType);
for (const auto &mol : mols) {
rj::Value rjMol(rj::kObjectType);
// write mol;
addMol(*mol, rjMol, doc, *rj::GetValueByPointer(doc, "/defaults/atom"),
*rj::GetValueByPointer(doc, "/defaults/bond"));
rjMols.PushBack(rjMol, doc.GetAllocator());
}
doc.AddMember("molecules", rjMols, doc.GetAllocator());
rj::StringBuffer buffer;
rj::Writer<rj::StringBuffer> writer(buffer);
writer.SetMaxDecimalPlaces(4);
doc.Accept(writer);
return buffer.GetString();
};
template std::string MolsToJSONData<ROMol *>(const std::vector<ROMol *> &);
template std::string MolsToJSONData<RWMol *>(const std::vector<RWMol *> &);
template std::string MolsToJSONData<const ROMol *>(
const std::vector<const ROMol *> &);
template std::string MolsToJSONData<const RWMol *>(
const std::vector<const RWMol *> &);
} // end of namespace MolInterchange
} // end of namespace RDKit

View File

@@ -0,0 +1,45 @@
//
// Copyright (C) 2018 Greg Landrum
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//
#ifndef RD_MOLINTERCHANGEDETAILS_H_FEB2018
#define RD_MOLINTERCHANGEDETAILS_H_FEB2018
namespace RDKit {
namespace MolInterchange {
static const int currentMolJSONVersion = 10;
static const int currentRDKitRepresentationVersion = 1;
static const int currentChargeRepresentationVersion = 10;
static const std::map<std::string, Atom::ChiralType> chilookup = {
{"unspecified", Atom::CHI_UNSPECIFIED},
{"cw", Atom::CHI_TETRAHEDRAL_CW},
{"ccw", Atom::CHI_TETRAHEDRAL_CCW},
{"other", Atom::CHI_OTHER}};
static const std::map<Atom::ChiralType, std::string> inv_chilookup = {
{Atom::CHI_UNSPECIFIED, "unspecified"},
{Atom::CHI_TETRAHEDRAL_CW, "cw"},
{Atom::CHI_TETRAHEDRAL_CCW, "ccw"},
{Atom::CHI_OTHER, "other"}};
static const std::map<unsigned int, Bond::BondType> bolookup = {
{0, Bond::ZERO}, {1, Bond::SINGLE}, {2, Bond::DOUBLE}, {3, Bond::TRIPLE}};
static const std::map<Bond::BondType, unsigned int> inv_bolookup = {
{Bond::ZERO, 0}, {Bond::SINGLE, 1}, {Bond::DOUBLE, 2}, {Bond::TRIPLE, 3}};
static const std::map<std::string, Bond::BondStereo> stereolookup = {
{"unspecified", Bond::STEREONONE},
{"cis", Bond::STEREOCIS},
{"trans", Bond::STEREOTRANS},
{"either", Bond::STEREOANY}};
static const std::map<Bond::BondStereo, std::string> inv_stereolookup = {
{Bond::STEREONONE, "unspecified"}, {Bond::STEREOCIS, "cis"},
{Bond::STEREOTRANS, "trans"}, {Bond::STEREOZ, "cis"},
{Bond::STEREOE, "trans"}, {Bond::STEREOANY, "either"}};
}
}
#endif

View File

@@ -0,0 +1,511 @@
//
// Copyright (C) 2018 Greg Landrum
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//
#include <RDGeneral/RDLog.h>
#include <GraphMol/RDKitBase.h>
#include <GraphMol/MolPickler.h>
#include <GraphMol/MolInterchange/MolInterchange.h>
#include <GraphMol/SmilesParse/SmilesParse.h>
#include <GraphMol/SmilesParse/SmilesWrite.h>
#include <GraphMol/FileParsers/MolSupplier.h>
#include <RDGeneral/FileParseException.h>
#include <RDGeneral/BadFileException.h>
#include <RDGeneral/LocaleSwitcher.h>
#include <clocale>
#include <cstdlib>
#include <chrono>
#include <string>
#include <fstream>
using namespace RDKit;
void test1() {
BOOST_LOG(rdInfoLog) << "test1: basics" << std::endl;
#if 1
std::string rdbase = getenv("RDBASE");
{
std::string fName =
rdbase + "/Code/GraphMol/MolInterchange/test_data/test1.json";
std::ifstream inStream(fName);
if (!inStream || (inStream.bad())) {
std::ostringstream errout;
errout << "Bad input file " << fName;
throw BadFileException(errout.str());
}
auto mols = MolInterchange::JSONDataStreamToMols(&inStream);
TEST_ASSERT(mols.size() == 1);
auto m = mols[0].get();
TEST_ASSERT(m);
// m->debugMol(std::cerr);
TEST_ASSERT(m->getNumAtoms() == 15);
TEST_ASSERT(m->getNumBonds() == 15);
TEST_ASSERT(m->getAtomWithIdx(0)->getIsAromatic());
TEST_ASSERT(m->getAtomWithIdx(13)->getFormalCharge() == 1);
TEST_ASSERT(m->getAtomWithIdx(12)->getChiralTag() ==
Atom::CHI_TETRAHEDRAL_CCW);
TEST_ASSERT(m->getBondBetweenAtoms(10, 11));
TEST_ASSERT(m->getBondBetweenAtoms(10, 11)->getBondType() == Bond::DOUBLE);
TEST_ASSERT(m->getBondBetweenAtoms(10, 11)->getStereo() == Bond::STEREOCIS);
TEST_ASSERT(m->getBondBetweenAtoms(0, 1));
TEST_ASSERT(m->getBondBetweenAtoms(0, 1)->getIsAromatic());
TEST_ASSERT(m->getNumConformers() == 0);
TEST_ASSERT(m->getProp<std::string>(common_properties::_Name) ==
std::string("example 1"));
TEST_ASSERT(m->hasProp("prop1"));
TEST_ASSERT(m->getProp<int>("prop1") == 1);
TEST_ASSERT(m->hasProp("prop2"));
TEST_ASSERT(feq(m->getProp<double>("prop2"), 3.14));
TEST_ASSERT(m->hasProp("prop3"));
TEST_ASSERT(m->getProp<std::string>("prop3") == "foo");
TEST_ASSERT(m->getRingInfo()->isInitialized());
TEST_ASSERT(m->getRingInfo()->atomRings().size() == 1);
TEST_ASSERT(m->getRingInfo()->atomRings()[0].size() == 6);
}
{
std::string fName =
rdbase + "/Code/GraphMol/MolInterchange/test_data/test2.json";
std::ifstream inStream(fName);
if (!inStream || (inStream.bad())) {
std::ostringstream errout;
errout << "Bad input file " << fName;
throw BadFileException(errout.str());
}
auto mols = MolInterchange::JSONDataStreamToMols(&inStream);
TEST_ASSERT(mols.size() == 1);
auto m = mols[0].get();
TEST_ASSERT(m);
TEST_ASSERT(m->getNumAtoms() == 6);
TEST_ASSERT(m->getNumBonds() == 5);
TEST_ASSERT(m->getAtomWithIdx(1)->getChiralTag() ==
Atom::CHI_TETRAHEDRAL_CW);
TEST_ASSERT(m->getNumConformers() == 2);
TEST_ASSERT(!m->getConformer(0).is3D());
TEST_ASSERT(m->getConformer(1).is3D());
TEST_ASSERT(m->getProp<std::string>(common_properties::_Name) ==
std::string("example 2"));
TEST_ASSERT(m->getAtomWithIdx(1)->getIsotope() == 0);
TEST_ASSERT(m->getAtomWithIdx(2)->getIsotope() == 35);
TEST_ASSERT(
m->getAtomWithIdx(0)->hasProp(common_properties::_GasteigerCharge));
TEST_ASSERT(feq(m->getAtomWithIdx(0)->getProp<double>(
common_properties::_GasteigerCharge),
-0.352));
}
{
std::string json =
"{\"commonchem\": {\"version\": 10 },"
" \"defaults\": {\"atom\": {\"chg\": 0, \"impHs\": 0, "
"\"stereo\": \"unspecified\", \"nrad\": 0, \"z\": 6}, "
"\"bond\": {\"bo\": 1, \"stereo\": \"unspecified\", "
"\"stereoAtoms\": []}}, \"molecules\": [{\"name\": \"no name\", "
"\"atoms\": [{\"z\": 6, \"impHs\": 2}, {\"z\": 8}, {\"z\": 26}], "
"\"bonds\": [{\"atoms\": [0, 1], \"bo\": 2}, {\"atoms\": [1, 2], "
"\"bo\": 0}], \"extensions\": [{\"formatVersion\": 1, "
"\"name\": \"rdkitRepresentation\", \"formatVersion\": 1,"
"\"toolkitVersion\": \"2018.03.1.dev1\", "
"\"aromaticAtoms\": [], \"aromaticBonds\": [], \"cipRanks\": [0, 1, "
"2], \"cipCodes\": [], \"atomRings\": []}]}]}";
auto mols = MolInterchange::JSONDataToMols(json);
TEST_ASSERT(mols.size() == 1);
auto m = mols[0].get();
TEST_ASSERT(m);
TEST_ASSERT(m->getBondBetweenAtoms(1, 2));
TEST_ASSERT(m->getBondBetweenAtoms(1, 2)->getBondType() == Bond::ZERO);
}
#endif
{
std::string json =
"{\"commonchem\":{\"version\":10 },"
"\"defaults\":{\"atom\":{\"z\":6,\"impHs\":3,\"chg\":0,\"nRad\":0,"
"\"isotope\":0,"
"\"stereo\":\"unspecified\"},\"bond\":{\"bo\":1,\"stereo\":"
"\"unspecified\"}},"
"\"molecules\":[{\"name\":\"mol1 "
"name\",\"atoms\":[{},{}],\"bonds\":[{\"bo\":1, \"atoms\":[0, 1]}]}]}";
auto mols = MolInterchange::JSONDataToMols(json);
TEST_ASSERT(mols.size() == 1);
auto m = mols[0].get();
TEST_ASSERT(m);
TEST_ASSERT(m->getNumAtoms() == 2)
TEST_ASSERT(m->getBondBetweenAtoms(0, 1));
}
BOOST_LOG(rdInfoLog) << "done" << std::endl;
}
void roundtripSmi(const char *smi) {
std::unique_ptr<RWMol> mol(SmilesToMol(smi));
TEST_ASSERT(mol);
mol->setProp("_Name", "test mol");
auto json = MolInterchange::MolToJSONData(*mol);
std::cerr << json << std::endl;
std::string smi1 = MolToSmiles(*mol);
auto newMols = MolInterchange::JSONDataToMols(json);
TEST_ASSERT(newMols.size() == 1);
std::string smi2 = MolToSmiles(*newMols[0]);
if (smi1 != smi2) {
mol->debugMol(std::cerr);
newMols[0]->debugMol(std::cerr);
std::cerr << "smi1: " << smi1 << std::endl;
std::cerr << "smi2: " << smi2 << std::endl;
}
TEST_ASSERT(smi1 == smi2);
}
void test2() {
BOOST_LOG(rdInfoLog) << "test2: basic writing" << std::endl;
#if 1
{
std::unique_ptr<RWMol> mol(SmilesToMol("CC"));
TEST_ASSERT(mol);
mol->setProp("_Name", "mol1 name");
auto json = MolInterchange::MolToJSONData(*mol);
std::cerr << json << std::endl;
}
#endif
roundtripSmi("F[C@@](Cl)(O)C");
roundtripSmi("c1ccccc1");
roundtripSmi("CCC1=C(N)C=C(C)N=C1");
#if 0
{
std::unique_ptr<RWMol> mol(SmilesToMol("F[C@](Cl)(O)/C=C/C"));
TEST_ASSERT(mol);
mol->setProp("_Name", "test mol");
mol->getBondBetweenAtoms(4, 5)->setStereo(Bond::STEREOTRANS);
auto json = MolInterchange::MolToJSONData(*mol, "test2 mol2");
std::cerr << json << std::endl;
std::string smi1 = MolToSmiles(*mol);
auto newMols = MolInterchange::JSONDataToMols(json);
TEST_ASSERT(newMols.size() == 1);
mol->debugMol(std::cerr);
newMols[0]->debugMol(std::cerr);
std::string smi2 = MolToSmiles(*newMols[0]);
std::cerr << "smi1: " << smi1 << std::endl;
std::cerr << "smi2: " << smi2 << std::endl;
TEST_ASSERT(smi1 == smi2);
}
#endif
BOOST_LOG(rdInfoLog) << "done" << std::endl;
}
void test3() {
BOOST_LOG(rdInfoLog) << "test3: writing conformers" << std::endl;
std::string rdbase = getenv("RDBASE");
{
std::string fName =
rdbase + "/Code/GraphMol/MolInterchange/test_data/test2.json";
std::ifstream inStream(fName);
auto mols = MolInterchange::JSONDataStreamToMols(&inStream);
TEST_ASSERT(mols.size() == 1);
TEST_ASSERT(mols[0]->getNumConformers() == 2);
TEST_ASSERT(!mols[0]->getConformer(0).is3D());
TEST_ASSERT(mols[0]->getConformer(1).is3D());
std::string json = MolInterchange::MolToJSONData(*mols[0]);
std::cerr << json << std::endl;
TEST_ASSERT(json.find("conformers") != std::string::npos);
auto newMols = MolInterchange::JSONDataToMols(json);
TEST_ASSERT(newMols.size() == 1);
TEST_ASSERT(newMols[0]->getNumConformers() == 2);
TEST_ASSERT(!newMols[0]->getConformer(0).is3D());
TEST_ASSERT(newMols[0]->getConformer(1).is3D());
}
BOOST_LOG(rdInfoLog) << "done" << std::endl;
}
void test4() {
BOOST_LOG(rdInfoLog) << "test4: writing properties" << std::endl;
#if 1
{
std::unique_ptr<RWMol> mol(SmilesToMol("CC"));
TEST_ASSERT(mol);
mol->setProp("foo_string", "bar");
mol->setProp("foo_int", 1);
mol->setProp("foo_double", 1.2);
auto json = MolInterchange::MolToJSONData(*mol);
std::cerr << json << std::endl;
TEST_ASSERT(json.find("foo_string") != std::string::npos);
TEST_ASSERT(json.find("foo_int") != std::string::npos);
TEST_ASSERT(json.find("foo_double") != std::string::npos);
auto newMols = MolInterchange::JSONDataToMols(json);
TEST_ASSERT(newMols.size() == 1);
TEST_ASSERT(newMols[0]->hasProp("foo_string"));
TEST_ASSERT(newMols[0]->getProp<std::string>("foo_string") == "bar");
TEST_ASSERT(newMols[0]->hasProp("foo_int"));
TEST_ASSERT(newMols[0]->getProp<int>("foo_int") == 1);
TEST_ASSERT(newMols[0]->hasProp("foo_double"));
TEST_ASSERT(newMols[0]->getProp<double>("foo_double") == 1.2);
}
#endif
BOOST_LOG(rdInfoLog) << "done" << std::endl;
}
void test5() {
BOOST_LOG(rdInfoLog) << "test5: writing partial charges" << std::endl;
{
std::unique_ptr<RWMol> mol(SmilesToMol("CO"));
TEST_ASSERT(mol);
mol->getAtomWithIdx(0)->setProp(common_properties::_GasteigerCharge, 0.5);
mol->getAtomWithIdx(1)->setProp(common_properties::_GasteigerCharge, -0.5);
auto json = MolInterchange::MolToJSONData(*mol);
std::cerr << json << std::endl;
TEST_ASSERT(json.find("partialCharges") != std::string::npos);
auto newMols = MolInterchange::JSONDataToMols(json);
TEST_ASSERT(newMols.size() == 1);
TEST_ASSERT(newMols[0]->getAtomWithIdx(0)->hasProp(
common_properties::_GasteigerCharge));
TEST_ASSERT(feq(newMols[0]->getAtomWithIdx(0)->getProp<double>(
common_properties::_GasteigerCharge),
0.5));
TEST_ASSERT(newMols[0]->getAtomWithIdx(1)->hasProp(
common_properties::_GasteigerCharge));
TEST_ASSERT(feq(newMols[0]->getAtomWithIdx(1)->getProp<double>(
common_properties::_GasteigerCharge),
-0.5));
}
}
void benchmarking() {
BOOST_LOG(rdInfoLog) << "benchmarking performance" << std::endl;
std::string rdbase = getenv("RDBASE");
{
std::string fName =
rdbase + "/Code/GraphMol/MolInterchange/test_data/znp.50k.smi";
SmilesMolSupplier suppl(fName);
std::vector<RWMol *> mols;
auto smir_t1 = std::chrono::system_clock::now();
while (mols.size() < 20000) {
mols.push_back(static_cast<RWMol *>(suppl.next()));
}
auto smir_t2 = std::chrono::system_clock::now();
std::cerr << "construction of " << mols.size() << " took "
<< std::chrono::duration<double>(smir_t2 - smir_t1).count()
<< std::endl;
for (auto &m : mols) {
MolOps::Kekulize(*m);
}
auto jsonw_t1 = std::chrono::system_clock::now();
auto json = MolInterchange::MolsToJSONData(mols);
auto jsonw_t2 = std::chrono::system_clock::now();
std::cerr << "json generation took "
<< std::chrono::duration<double>(jsonw_t2 - jsonw_t1).count()
<< std::endl;
auto jsonr_t1 = std::chrono::system_clock::now();
auto newms = MolInterchange::JSONDataToMols(json);
auto jsonr_t2 = std::chrono::system_clock::now();
std::cerr << "json parsing took "
<< std::chrono::duration<double>(jsonr_t2 - jsonr_t1).count()
<< std::endl;
newms.clear();
auto pklw_t1 = std::chrono::system_clock::now();
std::vector<std::string> pkls;
pkls.reserve(mols.size());
for (const auto &mol : mols) {
std::string pkl;
MolPickler::pickleMol(*mol, pkl);
pkls.push_back(pkl);
}
auto pklw_t2 = std::chrono::system_clock::now();
std::cerr << "pickle generation took "
<< std::chrono::duration<double>(pklw_t2 - pklw_t1).count()
<< std::endl;
auto pklr_t1 = std::chrono::system_clock::now();
for (const auto &pkl : pkls) {
ROMol m;
MolPickler::molFromPickle(pkl, m);
}
auto pklr_t2 = std::chrono::system_clock::now();
std::cerr << "pickle parsing took "
<< std::chrono::duration<double>(pklr_t2 - pklr_t1).count()
<< std::endl;
for (auto &m : mols) {
delete m;
}
}
BOOST_LOG(rdInfoLog) << "done" << std::endl;
}
void test6() {
BOOST_LOG(rdInfoLog) << "testing parse options" << std::endl;
std::string rdbase = getenv("RDBASE");
std::string fName =
rdbase + "/Code/GraphMol/MolInterchange/test_data/test3.json";
std::ifstream inStream(fName);
if (!inStream || (inStream.bad())) {
std::ostringstream errout;
errout << "Bad input file " << fName;
throw BadFileException(errout.str());
}
const std::string jsond(std::istreambuf_iterator<char>(inStream), {});
{
MolInterchange::JSONParseParameters ps;
auto mols = MolInterchange::JSONDataToMols(jsond, ps);
TEST_ASSERT(mols.size() == 1);
auto m = mols[0].get();
TEST_ASSERT(m);
TEST_ASSERT(m->getNumAtoms() == 6);
TEST_ASSERT(m->getNumBonds() == 5);
TEST_ASSERT(m->getNumConformers() == 2);
TEST_ASSERT(m->getProp<std::string>(common_properties::_Name) ==
std::string("example 2"));
TEST_ASSERT(m->hasProp("prop3"));
TEST_ASSERT(
m->getAtomWithIdx(0)->hasProp(common_properties::_GasteigerCharge));
}
{
MolInterchange::JSONParseParameters ps;
ps.parseConformers = false;
auto mols = MolInterchange::JSONDataToMols(jsond, ps);
TEST_ASSERT(mols.size() == 1);
auto m = mols[0].get();
TEST_ASSERT(m);
TEST_ASSERT(m->getNumAtoms() == 6);
TEST_ASSERT(m->getNumBonds() == 5);
TEST_ASSERT(m->getNumConformers() == 0);
TEST_ASSERT(m->getProp<std::string>(common_properties::_Name) ==
std::string("example 2"));
TEST_ASSERT(m->hasProp("prop3"));
TEST_ASSERT(
m->getAtomWithIdx(0)->hasProp(common_properties::_GasteigerCharge));
}
{
MolInterchange::JSONParseParameters ps;
ps.parseConformers = false;
ps.parseProperties = false;
auto mols = MolInterchange::JSONDataToMols(jsond, ps);
TEST_ASSERT(mols.size() == 1);
auto m = mols[0].get();
TEST_ASSERT(m);
TEST_ASSERT(m->getNumAtoms() == 6);
TEST_ASSERT(m->getNumBonds() == 5);
TEST_ASSERT(m->getNumConformers() == 0);
TEST_ASSERT(m->getProp<std::string>(common_properties::_Name) ==
std::string("example 2")); // we always parse the name
TEST_ASSERT(!m->hasProp("prop3"));
TEST_ASSERT(
m->getAtomWithIdx(0)->hasProp(common_properties::_GasteigerCharge));
}
{
MolInterchange::JSONParseParameters ps;
ps.parseProperties = false;
auto mols = MolInterchange::JSONDataToMols(jsond, ps);
TEST_ASSERT(mols.size() == 1);
auto m = mols[0].get();
TEST_ASSERT(m);
TEST_ASSERT(m->getNumAtoms() == 6);
TEST_ASSERT(m->getNumBonds() == 5);
TEST_ASSERT(m->getNumConformers() == 2);
TEST_ASSERT(m->getProp<std::string>(common_properties::_Name) ==
std::string("example 2")); // we always parse the name
TEST_ASSERT(!m->hasProp("prop3"));
TEST_ASSERT(
m->getAtomWithIdx(0)->hasProp(common_properties::_GasteigerCharge));
}
BOOST_LOG(rdInfoLog) << "done" << std::endl;
}
void RunTests() {
#if 1
test1();
test2();
test3();
test4();
test5();
test6();
// benchmarking();
#endif
// test2();
}
#if 0
// must be in German Locale for test...
void testLocaleSwitcher() {
float d = -1.0;
char buffer[1024];
sprintf(buffer, "%0.2f", d);
if (std::string(buffer) != "-1,00") {
BOOST_LOG(rdInfoLog) << " ---- no German locale support (skipping) ---- "
<< std::endl;
return;
}
{
RDKit::Utils::LocaleSwitcher ls;
sprintf(buffer, "%0.2f", d);
CHECK_INVARIANT(std::string(buffer) == "-1.00", "Locale Switcher Fail");
// test locale switcher recursion
{
RDKit::Utils::LocaleSwitcher ls;
sprintf(buffer, "%0.2f", d);
CHECK_INVARIANT(std::string(buffer) == "-1.00", "Locale Switcher Fail");
}
// should still be in the "C" variant
sprintf(buffer, "%0.2f", d);
CHECK_INVARIANT(std::string(buffer) == "-1.00", "Locale Switcher Fail");
}
// Should be back in German Locale
sprintf(buffer, "%0.2f", d);
CHECK_INVARIANT(std::string(buffer) == "-1,00", "Locale Switcher Fail");
}
#ifdef RDK_TEST_MULTITHREADED
#include <RDGeneral/BoostStartInclude.h>
#include <boost/thread.hpp>
#include <RDGeneral/BoostEndInclude.h>
namespace {
void runblock() { testLocaleSwitcher(); }
}
void testMultiThreadedSwitcher() {
BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
BOOST_LOG(rdErrorLog) << " Test multithreading Locale Switching"
<< std::endl;
boost::thread_group tg;
unsigned int count = 100;
for (unsigned int i = 0; i < count; ++i) {
tg.add_thread(new boost::thread(runblock));
}
tg.join_all();
BOOST_LOG(rdErrorLog) << " Test multithreading (Done)" << std::endl;
BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
}
#else
void testMultiThreadedSwitcher() {
BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
BOOST_LOG(rdInfoLog) << " ---- Multithreaded tests disabled ---- "
<< std::endl;
}
#endif
#endif
int main(int argc, char *argv[]) {
(void)argc;
(void)argv;
RDLog::InitLogs();
RunTests(); // run with C locale
return 0;
}

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,47 @@
{"commonchem": {"version": 10 },
"defaults": {
"atom": {"z": 6, "impHs": 0, "chg": 0, "stereo": "unspecified", "nrad": 0},
"bond": {"bo": 1, "stereo": "unspecified", "stereoAtoms": []}
},
"molecules": [{"name": "example 1", "atoms": [{"z": 6, "impHs": 1},
{"z": 6},
{"z": 6, "impHs": 1},
{"z": 6, "impHs": 1},
{"z": 6, "impHs": 3},
{"z": 6, "impHs": 1},
{"z": 6, "impHs": 1},
{"z": 6, "impHs": 1},
{"z": 6},
{"z": 8},
{"z": 6, "impHs": 1},
{"z": 6, "impHs": 1},
{"z": 6, "impHs": 1, "stereo": "ccw"},
{"z": 7, "impHs": 3, "chg": 1},
{"z": 17}],
"bonds": [{"atoms": [0, 1],
"bo": 2},
{"atoms": [1, 2]},
{"atoms": [2, 3],
"bo": 2},
{"atoms": [3, 4]},
{"atoms": [1, 5]},
{"atoms": [5, 6],
"bo": 2},
{"atoms": [6, 7]},
{"atoms": [7, 8],
"bo": 2},
{"atoms": [8, 9]},
{"atoms": [9, 10]},
{"atoms": [10, 11],
"bo": 2, "stereoAtoms": [9, 12],
"stereo": "cis"},
{"atoms": [11, 12]},
{"atoms": [12, 13]},
{"atoms": [12, 14]},
{"atoms": [8, 0]}],
"properties": {"prop1": 1, "prop2": 3.14, "prop3": "foo"},
"extensions": [{"name": "rdkitRepresentation", "toolkitVersion": "2018.03.1.dev1", "formatVersion": 1, "aromaticAtoms": [0, 1, 5, 6, 7, 8],
"aromaticBonds": [0, 4, 5, 6, 7, 14],
"cipRanks": [6, 8, 3, 1, 0, 4, 2, 5, 10, 13, 9, 7, 11, 12, 14],
"cipCodes": [[12, "R"]],
"atomRings": [[0, 8, 7, 6, 5, 1]]}]}]}

View File

@@ -0,0 +1,24 @@
{"commonchem": {"version": 10},
"defaults": {
"atom": {"z": 6, "impHs": 0, "chg": 0, "stereo": "unspecified", "nrad": 0},
"bond": {"bo": 1, "stereo": "unspecified", "stereoAtoms": []}
},
"molecules": [{"name": "example 2", "atoms": [{"z": 8},
{"z": 6, "stereo": "cw"},
{"z": 17, "isotope": 35},
{"z": 9},
{"z": 1},
{"z": 1}],
"bonds": [{"atoms": [0, 1]},
{"atoms": [1, 2]},
{"atoms": [1, 3]},
{"atoms": [0, 4]},
{"atoms": [1, 5]}],
"conformers": [{"dim": 2, "coords": [[-1.1988, -0.0452], [-0.3332, 0.4556], [0.166, 1.322], [0.5332, -0.0438], [-2.0652, 0.4542], [-0.834, 1.3212]]},
{"dim": 3, "coords": [[0.9554, -0.3743, -0.4679], [-0.3616, -0.1311, -0.0795], [-0.5097, 1.6256, 0.1706], [-0.5779, -0.792, 1.1042], [1.5277, 0.1064, 0.1628], [-1.0339, -0.4346, -0.8902]]}],
"extensions": [{"name": "rdkitRepresentation", "toolkitVersion": "2018.03.1.dev1", "formatVersion": 1, "aromaticAtoms": [],
"aromaticBonds": [],
"cipCodes": [[1, "S"]],
"atomRings": []},
{"name": "partialCharges", "chargeType":"gasteiger", "formatVersion":10, "generator":"RDKit", "generatorVersion": "2018.03.1.dev1", "values": [-0.352, 0.273, -0.055, -0.198, 0.215, 0.117]}
]}]}

View File

@@ -0,0 +1,25 @@
{"commonchem": {"version": 10},
"defaults": {
"atom": {"z": 6, "impHs": 0, "chg": 0, "stereo": "unspecified", "nrad": 0},
"bond": {"bo": 1, "stereo": "unspecified", "stereoAtoms": []}
},
"molecules": [{"name": "example 2", "atoms": [{"z": 8},
{"z": 6, "stereo": "cw"},
{"z": 17, "isotope": 35},
{"z": 9},
{"z": 1},
{"z": 1}],
"bonds": [{"atoms": [0, 1]},
{"atoms": [1, 2]},
{"atoms": [1, 3]},
{"atoms": [0, 4]},
{"atoms": [1, 5]}],
"properties": {"prop1": 1, "prop2": 3.14, "prop3": "foo"},
"conformers": [{"dim": 2, "coords": [[-1.1988, -0.0452], [-0.3332, 0.4556], [0.166, 1.322], [0.5332, -0.0438], [-2.0652, 0.4542], [-0.834, 1.3212]]},
{"dim": 3, "coords": [[0.9554, -0.3743, -0.4679], [-0.3616, -0.1311, -0.0795], [-0.5097, 1.6256, 0.1706], [-0.5779, -0.792, 1.1042], [1.5277, 0.1064, 0.1628], [-1.0339, -0.4346, -0.8902]]}],
"extensions": [{"name": "rdkitRepresentation", "toolkitVersion": "2018.03.1.dev1", "formatVersion": 1, "aromaticAtoms": [],
"aromaticBonds": [],
"cipCodes": [[1, "S"]],
"atomRings": []},
{"name": "partialCharges", "chargeType":"gasteiger", "formatVersion":10, "generator":"RDKit", "generatorVersion": "2018.03.1.dev1", "values": [-0.352, 0.273, -0.055, -0.198, 0.215, 0.117]}
]}]}

View File

@@ -255,9 +255,11 @@ void computeGasteigerCharges(const ROMol &mol, std::vector<double> &charges,
}
for (aix = 0; aix < natms; aix++) {
mol.getAtomWithIdx(aix)->setProp("_GasteigerCharge", charges[aix], true);
mol.getAtomWithIdx(aix)->setProp(common_properties::_GasteigerCharge,
charges[aix], true);
// set the implicit hydrogen charges
mol.getAtomWithIdx(aix)->setProp("_GasteigerHCharge", hChrg[aix], true);
mol.getAtomWithIdx(aix)->setProp(common_properties::_GasteigerHCharge,
hChrg[aix], true);
}
}
}

View File

@@ -1,6 +1,5 @@
// $Id$
//
// Created by Greg Landrum: January 2007
// Copyright (C) Greg Landrum 2007-2017
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
@@ -11,6 +10,7 @@
#define NO_IMPORT_ARRAY
#include <RDBoost/python.h>
#include <RDBoost/Wrap.h>
#include <GraphMol/RDKitBase.h>
#include <RDGeneral/types.h>
@@ -35,6 +35,19 @@ python::object bondRings(const RingInfo *self) {
}
return python::tuple(res);
}
void addRing(RingInfo *self,python::object atomRing, python::object bondRing){
unsigned int nAts = python::extract<unsigned int>(atomRing.attr("__len__")());
unsigned int nBnds = python::extract<unsigned int>(bondRing.attr("__len__")());
if(nAts != nBnds) throw_value_error("list sizes must match");
if(!self->isInitialized()) self->initialize();
INT_VECT aring(nAts);
INT_VECT bring(nAts);
for (unsigned int i = 0; i < nAts; ++i) {
aring[i] = python::extract<int>(atomRing[i])();
bring[i] = python::extract<int>(bondRing[i])();
}
self->addRing(aring,bring);
}
}
namespace RDKit {
@@ -50,7 +63,9 @@ struct ringinfo_wrapper {
.def("NumBondRings", &RingInfo::numBondRings)
.def("NumRings", &RingInfo::numRings)
.def("AtomRings", atomRings)
.def("BondRings", bondRings);
.def("BondRings", bondRings)
.def("AddRing", addRing, (python::arg("self"),python::arg("atomIds"),python::arg("bondIds")),
"Adds a ring to the set. Be very careful with this operation.");
};
};
} // end of namespace

View File

@@ -12,9 +12,9 @@
#include "types.h"
namespace RDKit {
namespace detail {
const std::string computedPropName = "__computedProps";
}
namespace detail {
const std::string computedPropName = "__computedProps";
}
namespace common_properties {
const std::string TWOD = "2D";
@@ -61,6 +61,8 @@ const std::string _crippenLogP = "_crippenLogP";
const std::string _crippenLogPContribs = "_crippenLogPContribs";
const std::string _crippenMR = "_crippenMR";
const std::string _crippenMRContribs = "_crippenMRContribs";
const std::string _GasteigerCharge = "_GasteigerCharge";
const std::string _GasteigerHCharge = "_GasteigerHCharge";
const std::string _doIsoSmiles = "_doIsoSmiles";
const std::string _fragSMARTS = "_fragSMARTS";
const std::string _hasMassQuery = "_hasMassQuery";
@@ -99,6 +101,7 @@ const std::string ringMembership = "ringMembership";
const std::string smilesSymbol = "smilesSymbol";
const std::string atomLabel = "atomLabel";
const std::string internalRgroupSmiles = "internalRgroupSmiles";
} // end common_properties
const double MAX_DOUBLE = std::numeric_limits<double>::max();

View File

@@ -38,131 +38,135 @@
namespace RDKit {
namespace detail {
// used in various places for computed properties
extern const std::string computedPropName;
}
namespace detail {
// used in various places for computed properties
extern const std::string computedPropName;
}
namespace common_properties {
///////////////////////////////////////////////////////////////
// Molecule Props
extern const std::string _Name; // string
extern const std::string MolFileInfo; // string
extern const std::string MolFileComments; // string
extern const std::string _2DConf; // int (combine into dimension?)
extern const std::string _3DConf; // int
extern const std::string _doIsoSmiles; // int (should probably be removed)
extern const std::string extraRings; // vec<vec<int> >
extern const std::string _smilesAtomOutputOrder; // vec<int> computed
extern const std::string _StereochemDone; // int
extern const std::string _NeedsQueryScan; // int (bool)
extern const std::string _fragSMARTS; // std::string
extern const std::string maxAttachIdx; // int TemplEnumTools.cpp
extern const std::string origNoImplicit; // int (bool)
extern const std::string ringMembership; //? unused (molopstest.cpp)
extern const std::string _Name; // string
extern const std::string MolFileInfo; // string
extern const std::string MolFileComments; // string
extern const std::string _2DConf; // int (combine into dimension?)
extern const std::string _3DConf; // int
extern const std::string _doIsoSmiles; // int (should probably be removed)
extern const std::string extraRings; // vec<vec<int> >
extern const std::string _smilesAtomOutputOrder; // vec<int> computed
extern const std::string _StereochemDone; // int
extern const std::string _NeedsQueryScan; // int (bool)
extern const std::string _fragSMARTS; // std::string
extern const std::string maxAttachIdx; // int TemplEnumTools.cpp
extern const std::string origNoImplicit; // int (bool)
extern const std::string ringMembership; //? unused (molopstest.cpp)
// Computed Values
// ConnectivityDescriptors
extern const std::string _connectivityHKDeltas;// std::vector<double> computed
extern const std::string _connectivityNVals; // std::vector<double> computed
extern const std::string _connectivityHKDeltas; // std::vector<double> computed
extern const std::string _connectivityNVals; // std::vector<double> computed
extern const std::string _crippenLogP; // double computed
extern const std::string _crippenLogPContribs; // std::vector<double> computed
extern const std::string _crippenLogP; // double computed
extern const std::string _crippenLogPContribs; // std::vector<double> computed
extern const std::string _crippenMR; // double computed
extern const std::string _crippenMRContribs; // std::vector<double> computed
extern const std::string _crippenMR; // double computed
extern const std::string _crippenMRContribs; // std::vector<double> computed
extern const std::string _labuteASA; // double computed
extern const std::string _labuteAtomContribs; // vec<double> computed
extern const std::string _labuteAtomHContrib; // double computed
extern const std::string _tpsa; // double computed
extern const std::string _tpsaAtomContribs; // vec<double> computed
extern const std::string _tpsa; // double computed
extern const std::string _tpsaAtomContribs; // vec<double> computed
extern const std::string numArom; // int computed (only uses in tests?)
extern const std::string _MMFFSanitized; // int (bool) computed
extern const std::string numArom; // int computed (only uses in tests?)
extern const std::string _MMFFSanitized; // int (bool) computed
extern const std::string _CrippenLogP; // Unused (in the basement)
extern const std::string _CrippenMR; // Unused (in the basement)
extern const std::string _CrippenLogP; // Unused (in the basement)
extern const std::string _CrippenMR; // Unused (in the basement)
extern const std::string _GasteigerCharge; // used to hold partial charges
extern const std::string
_GasteigerHCharge; // used to hold partial charges from implicit Hs
///////////////////////////////////////////////////////////////
// Atom Props
// Chirality stuff
extern const std::string _BondsPotentialStereo; // int (or bool) COMPUTED
extern const std::string _CIPCode; // std::string COMPUTED
extern const std::string _CIPRank; // int COMPUTED
extern const std::string _ChiralityPossible; // int
extern const std::string _UnknownStereo; // int (bool) AddHs/Chirality
extern const std::string _ringStereoAtoms; // int vect Canon/Chiral/MolHash/MolOps//Renumber//RWmol
extern const std::string _ringStereochemCand; // chirality bool COMPUTED
extern const std::string _ringStereoWarning; // obsolete ?
extern const std::string _BondsPotentialStereo; // int (or bool) COMPUTED
extern const std::string _CIPCode; // std::string COMPUTED
extern const std::string _CIPRank; // int COMPUTED
extern const std::string _ChiralityPossible; // int
extern const std::string _UnknownStereo; // int (bool) AddHs/Chirality
extern const std::string
_ringStereoAtoms; // int vect Canon/Chiral/MolHash/MolOps//Renumber//RWmol
extern const std::string _ringStereochemCand; // chirality bool COMPUTED
extern const std::string _ringStereoWarning; // obsolete ?
// Smiles parsing
extern const std::string _SmilesStart; // int
extern const std::string _TraversalBondIndexOrder; // ? unused
extern const std::string _TraversalRingClosureBond; // unsigned int
extern const std::string _TraversalStartPoint; // bool
extern const std::string _queryRootAtom; // int SLNParse/SubstructMatch
extern const std::string _hasMassQuery; // atom bool
extern const std::string _protected; // atom int (bool)
extern const std::string _supplementalSmilesLabel; // atom string (SmilesWrite)
extern const std::string _unspecifiedOrder;// atom int (bool) smarts/smiles
extern const std::string _RingClosures; // INT_VECT smarts/smiles/canon
extern const std::string atomLabel; // atom string from CXSMILES
extern const std::string _SmilesStart; // int
extern const std::string _TraversalBondIndexOrder; // ? unused
extern const std::string _TraversalRingClosureBond; // unsigned int
extern const std::string _TraversalStartPoint; // bool
extern const std::string _queryRootAtom; // int SLNParse/SubstructMatch
extern const std::string _hasMassQuery; // atom bool
extern const std::string _protected; // atom int (bool)
extern const std::string _supplementalSmilesLabel; // atom string (SmilesWrite)
extern const std::string _unspecifiedOrder; // atom int (bool) smarts/smiles
extern const std::string _RingClosures; // INT_VECT smarts/smiles/canon
extern const std::string atomLabel; // atom string from CXSMILES
// MDL Style Properties (MolFileParser)
extern const std::string molAtomMapNumber; // int
extern const std::string molFileAlias; // string
extern const std::string molFileValue; // string
extern const std::string molInversionFlag; // int
extern const std::string molParity; // int
extern const std::string molRxnComponent; // int
extern const std::string molRxnRole; // int
extern const std::string molTotValence; // int
extern const std::string _MolFileRLabel; // int
extern const std::string _MolFileChiralFlag; // int
extern const std::string MRV_SMA; // smarts string from Marvin
extern const std::string dummyLabel; // atom string
extern const std::string molAtomMapNumber; // int
extern const std::string molFileAlias; // string
extern const std::string molFileValue; // string
extern const std::string molInversionFlag; // int
extern const std::string molParity; // int
extern const std::string molRxnComponent; // int
extern const std::string molRxnRole; // int
extern const std::string molTotValence; // int
extern const std::string _MolFileRLabel; // int
extern const std::string _MolFileChiralFlag; // int
extern const std::string MRV_SMA; // smarts string from Marvin
extern const std::string dummyLabel; // atom string
// Reaction Information (Reactions.cpp)
extern const std::string _QueryFormalCharge; // int
extern const std::string _QueryHCount; // int
extern const std::string _QueryIsotope; // int
extern const std::string _QueryMass; // int = round(float * 1000)
extern const std::string _ReactionDegreeChanged; // int (bool)
extern const std::string NullBond; // int (bool)
extern const std::string _QueryFormalCharge; // int
extern const std::string _QueryHCount; // int
extern const std::string _QueryIsotope; // int
extern const std::string _QueryMass; // int = round(float * 1000)
extern const std::string _ReactionDegreeChanged; // int (bool)
extern const std::string NullBond; // int (bool)
extern const std::string _rgroupAtomMaps;
extern const std::string _rgroupBonds;
// SLN
extern const std::string _AtomID; // unsigned int SLNParser
extern const std::string _starred; // atom int COMPUTED (SLN)
extern const std::string _SLN_s; // string SLNAttribs (chiral info)
extern const std::string _Unfinished_SLN_; // int (bool)
extern const std::string _AtomID; // unsigned int SLNParser
extern const std::string _starred; // atom int COMPUTED (SLN)
extern const std::string _SLN_s; // string SLNAttribs (chiral info)
extern const std::string _Unfinished_SLN_; // int (bool)
// Smarts Smiles
extern const std::string _brokenChirality; // atom bool
extern const std::string isImplicit; // atom int (bool)
extern const std::string smilesSymbol; // atom string (only used in test?)
extern const std::string _brokenChirality; // atom bool
extern const std::string isImplicit; // atom int (bool)
extern const std::string smilesSymbol; // atom string (only used in test?)
// Tripos
extern const std::string _TriposAtomType; // string Mol2FileParser
extern const std::string _TriposAtomType; // string Mol2FileParser
// missing defs for _TriposAtomName//_TriposPartialCharge...
///////////////////////////////////////////////////////////////
// misc props
extern const std::string TWOD; // need THREED -> confusing using in TDTMol supplier
// converge with _2DConf?
extern const std::string BalabanJ; // mol double
extern const std::string BalanbanJ; // typo!! fix...
extern const std::string
TWOD; // need THREED -> confusing using in TDTMol supplier
// converge with _2DConf?
extern const std::string BalabanJ; // mol double
extern const std::string BalanbanJ; // typo!! fix...
extern const std::string Discrims; // FragCatalog Entry
// Subgraphs::DiscrimTuple (uint32,uint32,uint32)
extern const std::string DistanceMatrix_Paths; // boost::shared_array<double>
// - note, confusing creation of names in
// - getDistanceMat
extern const std::string Discrims; // FragCatalog Entry
// Subgraphs::DiscrimTuple (uint32,uint32,uint32)
extern const std::string DistanceMatrix_Paths; // boost::shared_array<double>
// - note, confusing creation of names in
// - getDistanceMat
extern const std::string internalRgroupSmiles;
} // end common_properties

View File

@@ -1,6 +1,5 @@
# $Id$
#
# Copyright (C) 2001-2006 greg Landrum
# Copyright (C) 2001-2018 greg Landrum
#
# @@ All Rights Reserved @@
# This file is part of the RDKit.
@@ -143,7 +142,35 @@ class TestCase(unittest.TestCase):
assert not at.IsInRingSize(4), 'atom %d improperly in ring' % (i)
else:
assert at.IsInRingSize(4), 'atom %d not in ring of size 4' % (i)
@unittest.skipIf(not hasattr(Chem,"MolToJSON"),
"MolInterchange support not enabled")
def testJSON1(self):
""" JSON test1 """
for smi in self.bigSmiList:
m = Chem.MolFromSmiles(smi)
json = Chem.MolToJSON(m)
nm = Chem.JSONToMols(json)[0]
self.assertEqual(Chem.MolToSmiles(m),Chem.MolToSmiles(nm))
@unittest.skipIf(not hasattr(Chem,"MolToJSON"),
"MolInterchange support not enabled")
def testJSON2(self):
""" JSON test2 """
ms = [Chem.MolFromSmiles(smi) for smi in self.bigSmiList]
json = Chem.MolsToJSON(ms)
nms = Chem.JSONToMols(json)
#for nm in nms:
# Chem.SanitizeMol(nm)
self.assertEqual(len(ms),len(nms))
smis1 = [Chem.MolToSmiles(x) for x in ms]
smis2 = [Chem.MolToSmiles(x) for x in nms]
for i,(smi1,smi2) in enumerate(zip(smis1,smis2)):
if smi1 != smi2:
print(self.bigSmiList[i])
print(smi1)
print(smi2)
print("-------")
self.assertEqual(smis1,smis2)
if __name__ == '__main__':
unittest.main()

View File

@@ -24,6 +24,11 @@ from rdkit.Chem.rdchem import *
from rdkit.Chem.rdmolfiles import *
from rdkit.Chem.rdmolops import *
from rdkit.Chem.inchi import *
try:
# This is an optional component of the build
from rdkit.Chem.rdMolInterchange import *
except ImportError:
pass
# Coordgen needs to know where its template file is.
# The default install puts it in RDDataDir