Revert "ChemDraw Document and read/write support for the RDKit (#8539)" (#8618)

This reverts commit f8fde2f7d8.
This commit is contained in:
Greg Landrum
2025-07-03 07:57:20 +02:00
committed by GitHub
parent f8fde2f7d8
commit 969b1aadee
40 changed files with 6 additions and 8642 deletions

View File

@@ -7,8 +7,4 @@ add_subdirectory(RingFamilies)
add_subdirectory(GA)
if(RDK_BUILD_PUBCHEMSHAPE_SUPPORT)
add_subdirectory(pubchem_shape)
endif()
if(RDK_BUILD_CHEMDRAW_SUPPORT)
add_subdirectory(ChemDraw)
endif()
endif()

View File

@@ -1,193 +0,0 @@
add_custom_target(chemdraw_support ALL)
include(CMakePrintHelpers)
# The ChemDraw Library requires expat and expatpp to run.
# this has an include expat.h and expatpp.h that needs to be included
# Currently this is an OLD version of expatpp from source forge is used
# and included in this directory.
#
# For builds, we currently need a target_include_directories
# and will need to be fixed in the future
if(RDK_BUILD_CHEMDRAW_SUPPORT)
if(NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/chemdraw/chemdraw/CDXIO.h" )
set(RELEASE_NO "1.0.9")
set(MD5 "a41bb1abb2df2082274b74dccee19fb4")
downloadAndCheckMD5("https://codeload.github.com/Glysade/chemdraw/tar.gz/refs/tags/v${RELEASE_NO}"
"${CMAKE_CURRENT_SOURCE_DIR}/chemdraw-v${RELEASE_NO}.tar.gz" ${MD5})
execute_process(COMMAND ${CMAKE_COMMAND} -E tar zxf
${CMAKE_CURRENT_SOURCE_DIR}/chemdraw-v${RELEASE_NO}.tar.gz
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
file(RENAME "${CMAKE_CURRENT_SOURCE_DIR}/chemdraw-${RELEASE_NO}" "${CMAKE_CURRENT_SOURCE_DIR}/chemdraw")
endif()
include(TestBigEndian)
message("-- Looking for endianess")
test_big_endian(WORDS_BIGENDIAN)
#/* 1234 = LIL_ENDIAN, 4321 = BIGENDIAN */
if(WORDS_BIGENDIAN)
message("-- CHEDRAW BIGENDIAN PLATFORM")
add_definitions("-DPLATFORM_BIGENDIAN")
else(WORDS_BIGENDIAN)
message("- CHEMDRAW LITTLEENDIAN PLATFORM")
add_definitions("-DPLATFORM_LITTLEENDIAN")
endif(WORDS_BIGENDIAN)
# we don't want to install expat, this is statically linked in to the ChemDraw lib
# however, we don't want to install it so use the undocumented EXCLUDE_FROM_ALL
add_subdirectory(chemdraw/expatpp EXCLUDE_FROM_ALL)
include_directories(
${CMAKE_CURRENT_SOURCE_DIR}/chemdraw/expatpp/expatpp-code-r6-trunk/src_pp
${CMAKE_CURRENT_SOURCE_DIR}/chemdraw/expatpp/expatpp-code-r6-trunk/expat/lib)
# it's way easier to use the RDKIT machinery to build and link so let's do that
file(GLOB CHEMDRAW_SOURCE "chemdraw/chemdraw/*.cpp")
rdkit_library(ChemDraw ${CHEMDRAW_SOURCE} SHARED)
target_compile_definitions(ChemDraw PRIVATE CHEMDRAW_BUILD)
target_link_libraries(ChemDraw PRIVATE expat)
# export all the symbols for ChemDraw on MSVC
if((MSVC AND RDK_INSTALL_DLLS_MSVC) OR ((NOT MSVC) AND WIN32))
message("== ChemDraw exporting all symbols")
set_target_properties(ChemDraw PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS TRUE)
endif()
# On Windows, define MYLIB_EXPORTS when building the DLL
if (WIN32)
target_compile_definitions(ChemDraw
PRIVATE CHEMDRAW_BUILD
)
endif()
# On Linux/macOS, hide all symbols by default and expose only our API
if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR
CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR
CMAKE_CXX_COMPILER_ID STREQUAL "Emscripten")
# Require -fvisibility=hidden
set_target_properties(ChemDraw PROPERTIES
CXX_VISIBILITY_PRESET hidden
VISIBILITY_INLINES_HIDDEN ON
)
endif()
# export all the symbols for ChemDraw on MSVC
if((MSVC AND BUILD_SHARED_LIBS) OR ((NOT MSVC) AND WIN32))
set_target_properties(ChemDraw PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS TRUE)
endif()
# Suppress warnings since we don't control the chedraw code and indicate
# we have the EXPAT_CONFIG created
if(MSVC)
ADD_DEFINITIONS("-DTARGET_API_LIB -D_WINDOWS -DTARGET_OS_WIN32 -DHAVE_EXPAT_CONFIG_H")
# we don't really control chemdraw source code, so suppress warnings
target_compile_options(ChemDraw PRIVATE "/W0")
else()
ADD_DEFINITIONS("-DTARGET_API_LIB -D__linux -DHAVE_EXPAT_CONFIG_H")
# we don't really control chemdraw source code, so suppress warnings
target_compile_options(ChemDraw PRIVATE -w -Wno-unknown-pragmas -Wno-error)
if(RDK_INSTALL_STATIC_LIBS)
if(TARGET ChemDraw_static)
target_compile_options(ChemDraw_static PRIVATE -w -Wno-unknown-pragmas -Wno-error)
endif()
endif(RDK_INSTALL_STATIC_LIBS)
endif()
install(TARGETS ChemDraw DESTINATION ${RDKit_LibDir})
if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-comment -Wno-parentheses -Wno-logical-op-parentheses -Wno-pointer-bool-conversion -Wno-unused-value -Wno-unsequenced -Wno-constant-logical-operand")
endif()
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wformat-overflow=0 -Wformat=0 -Wno-format-security")
endif()
include_directories(chemdraw)
set(RDChemDrawLib_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}
CACHE STRING "RDChemDrawLib Include File" FORCE)
rdkit_library(RDChemDrawLib
bond.cpp
bracket.cpp
chemdraw.cpp
fragment.cpp
node.cpp
reaction.cpp
utils.cpp
writer.cpp
# ${EXPAT_SRC}
SHARED LINK_LIBRARIES ChemDraw
CIPLabeler ChemTransforms GraphMol RDGeneral Depictor SubstructMatch SmilesParse )
rdkit_library(RDChemDrawReactionLib
chemdrawreaction.cpp
# ${EXPAT_SRC}
SHARED LINK_LIBRARIES RDChemDrawLib ChemDraw
CIPLabeler ChemTransforms ChemReactions GraphMol RDGeneral Depictor SubstructMatch SmilesParse )
if(MSVC)
target_compile_definitions(RDChemDrawLib PRIVATE RDKIT_RDCHEMDRAWLIB_BUILD XML_USE_MSC_EXTENSIONS)
target_compile_definitions(RDChemDrawReactionLib PRIVATE RDKIT_RDCHEMDRAWREACTIONLIB_BUILD
XML_USE_MSC_EXTENSIONS)
else()
target_compile_definitions(RDChemDrawLib PRIVATE RDKIT_RDCHEMDRAWLIB_BUILD)
target_compile_definitions(RDChemDrawReactionLib PRIVATE RDKIT_RDCHEMDRAWREACTIONLIB_BUILD)
endif()
install(TARGETS RDChemDrawLib DESTINATION ${RDKit_LibDir})
install(TARGETS RDChemDrawReactionLib DESTINATION ${RDKit_LibDir})
set(RDK_CHEMDRAW_LIBS RDChemDrawLib CACHE STRING "the external libraries" FORCE)
set(RDK_CHEMDRAWREACTION_LIBS RDChemDrawReactinoLib CACHE STRING "the external libraries" FORCE)
rdkit_headers(chemdraw.h DEST GraphMol)
rdkit_headers(chemdrawreaction.h DEST GraphMol)
# all the tests
rdkit_catch_test(chemdrawCatchTest test.cpp
LINK_LIBRARIES RDChemDrawLib ChemDraw SubstructMatch ChemReactions
FileParsers SmilesParse CIPLabeler ChemTransforms GraphMol)
rdkit_catch_test(chemdrawChiralCatchTest test-chiral.cpp
LINK_LIBRARIES RDChemDrawLib ChemDraw SubstructMatch ChemReactions
FileParsers SmilesParse CIPLabeler ChemTransforms GraphMol)
rdkit_catch_test(chemdrawReactionsCatchTest test-reactions.cpp
LINK_LIBRARIES RDChemDrawReactionLib RDChemDrawLib ChemDraw SubstructMatch ChemReactions
FileParsers SmilesParse CIPLabeler ChemTransforms GraphMol)
rdkit_catch_test(chemdraw3DCatchTest test_3d.cpp
LINK_LIBRARIES RDChemDrawLib ChemDraw SubstructMatch ChemReactions
FileParsers SmilesParse CIPLabeler ChemTransforms GraphMol)
rdkit_catch_test(chemdraw6KCatchTest test_6k.cpp
LINK_LIBRARIES RDChemDrawLib ChemDraw SubstructMatch ChemReactions
FileParsers SmilesParse CIPLabeler ChemTransforms GraphMol)
if(RDK_BUILD_CPP_TESTS)
if(MSVC)
# The nanotubes blow up the smiles writer stack on MSVC so increase it
set_target_properties(chemdraw6KCatchTest PROPERTIES LINK_FLAGS
"/STACK:4194304")
# this sets everything I think
# set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /STACK:4194304")
else()
#target_compile_options(chemdrawCatchTest PRIVATE -w -Wno-unknown-pragmas -Wno-error)
#target_compile_options(chemdrawChiralCatchTest PRIVATE -w -Wno-unknown-pragmas -Wno-error)
#target_compile_options(chemdrawReactionsCatchTest PRIVATE -w -Wno-unknown-pragmas -Wno-error)
#target_compile_options(chemdraw3DCatchTest PRIVATE -w -Wno-unknown-pragmas -Wno-error)
#target_compile_options(chemdraw6KCatchTest PRIVATE -w -Wno-unknown-pragmas -Wno-error)
endif(MSVC)
endif(RDK_BUILD_CPP_TESTS)
if(RDK_BUILD_PYTHON_WRAPPERS)
add_subdirectory(Wrap)
endif(RDK_BUILD_PYTHON_WRAPPERS)
endif(RDK_BUILD_CHEMDRAW_SUPPORT)

View File

@@ -1,29 +0,0 @@
#if defined(__clang__)
/* Clang/LLVM. ---------------------------------------------- */
#pragma GCC diagnostic pop
#elif defined(__ICC) || defined(__INTEL_COMPILER)
/* Intel ICC/ICPC. ------------------------------------------ */
#elif (defined(__GNUC__) || defined(__GNUG__)) && \
(__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ > 5))
/* GNU GCC/G++. these pragmas only work with >v4.1
* --------------------------------------------- */
#pragma GCC diagnostic pop
#elif defined(__HP_cc) || defined(__HP_aCC)
/* Hewlett-Packard C/aC++. ---------------------------------- */
#elif defined(__IBMC__) || defined(__IBMCPP__)
/* IBM XL C/C++. -------------------------------------------- */
#elif defined(_MSC_VER)
/* Microsoft Visual Studio. --------------------------------- */
#pragma warning(pop)
#elif defined(__PGI)
/* Portland Group PGCC/PGCPP. ------------------------------- */
#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC)
/* Oracle Solaris Studio. ----------------------------------- */
#endif

View File

@@ -1,66 +0,0 @@
#if defined(__clang__)
/* Clang/LLVM. ---------------------------------------------- */
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-parameter"
#pragma GCC diagnostic ignored "-Wunused-value"
#pragma GCC diagnostic ignored "-Wmissing-field-initializers"
#pragma GCC diagnostic ignored "-Wsign-compare"
#pragma GCC diagnostic ignored "-Wconversion"
#pragma GCC diagnostic ignored "-Wuninitialized"
#pragma GCC diagnostic ignored "-Wtype-limits"
#pragma GCC diagnostic ignored "-Wreorder"
#pragma GCC diagnostic ignored "-Wunused"
#pragma GCC diagnostic ignored "-Wmacro-redefined"
#pragma GCC diagnostic ignored "-Wunknown-pragmas"
#pragma GCC diagnostic ignored "-Wignored-qualifiers"
#pragma GCC diagnostic ignored "-Wall"
#pragma GCC diagnostic ignored "-Wextra"
#if defined(__apple_build_version__)
#if __apple_build_version__ >= 7000072
#pragma GCC diagnostic ignored "-Wunused-local-typedef"
#endif
#endif
#elif defined(__ICC) || defined(__INTEL_COMPILER)
/* Intel ICC/ICPC. ------------------------------------------ */
#elif (defined(__GNUC__) || defined(__GNUG__)) && \
(__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ > 1))
/* GNU GCC/G++. --------------------------------------------- */
#if (__GNUC__ > 4 || __GNUC_MINOR__ > 5)
#pragma GCC diagnostic push
#endif
#pragma GCC diagnostic ignored "-Wunused-parameter"
#pragma GCC diagnostic ignored "-Wmissing-field-initializers"
#pragma GCC diagnostic ignored "-Wsign-compare"
#pragma GCC diagnostic ignored "-Wconversion"
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
//#pragma GCC diagnostic ignored "-Wmacro-redefined"
#pragma GCC diagnostic ignored "-Wunknown-pragmas"
#pragma GCC diagnostic ignored "-Wignored-qualifiers"
#pragma GCC diagnostic ignored "-Wextra"
#pragma GCC diagnostic ignored "-Wall"
#if (__GNUC__ > 4 || __GNUC_MINOR__ > 7)
#pragma GCC diagnostic ignored "-Wunused-local-typedefs"
#endif
#if (__GNUC__ > 8)
#pragma GCC diagnostic ignored "-Wdeprecated-copy"
#pragma GCC diagnostic ignored "-Wpessimizing-move"
#endif
#elif defined(__HP_cc) || defined(__HP_aCC)
/* Hewlett-Packard C/aC++. ---------------------------------- */
#elif defined(__IBMC__) || defined(__IBMCPP__)
/* IBM XL C/C++. -------------------------------------------- */
#elif defined(_MSC_VER)
/* Microsoft Visual Studio. --------------------------------- */
#pragma warning(push)
#pragma warning(disable : 4996 4267)
#elif defined(__PGI)
/* Portland Group PGCC/PGCPP. ------------------------------- */
#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC)
/* Oracle Solaris Studio. ----------------------------------- */
#endif

View File

@@ -1,8 +0,0 @@
remove_definitions(-DRDKIT_CHEMDRAW_BUILD)
rdkit_python_extension(rdChemDraw
rdChemDraw.cpp
DEST Chem
LINK_LIBRARIES
RDChemDrawLib RDChemDrawReactionLib
)

View File

@@ -1,261 +0,0 @@
//
// Copyright (c) 2025, Glysade Inc.
// and other RDKit contributors
//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Novartis Institutues for BioMedical Research Inc.
// nor the names of its contributors may be used to endorse or promote
// products derived from this software without specific prior written
// permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
#include <ChemDraw/chemdraw.h>
#include <ChemDraw/chemdrawreaction.h>
#include <GraphMol/MolPickler.h>
#include <GraphMol/Wrap/props.hpp>
#include <RDBoost/python.h>
#include <GraphMol/ChemReactions/Reaction.h>
#include <GraphMol/ChemReactions/ReactionPickler.h>
#include <GraphMol/ChemReactions/ReactionParser.h>
#include <GraphMol/ChemReactions/ReactionRunner.h>
#include <GraphMol/ChemReactions/PreprocessRxn.h>
#include <GraphMol/ChemReactions/SanitizeRxn.h>
#include <GraphMol/MarvinParse/MarvinParser.h>
#include <GraphMol/Depictor/DepictUtils.h>
#include <GraphMol/FilterCatalog/FunctionalGroupHierarchy.h>
#include <RDBoost/Wrap.h>
#include <RDGeneral/Exceptions.h>
#include <GraphMol/SanitException.h>
#include <RDGeneral/FileParseException.h>
#include <GraphMol/ChemReactions/ReactionFingerprints.h>
#include <GraphMol/ChemReactions/ReactionUtils.h>
namespace python = boost::python;
using namespace RDKit;
namespace {
std::string pyObjectToString(python::object input) {
python::extract<std::string> ex(input);
if (ex.check()) {
return ex();
}
std::wstring ws = python::extract<std::wstring>(input);
return std::string(ws.begin(), ws.end());
}
python::object MolsFromChemDrawBlockHelper(const std::string &filename, bool sanitize,
bool removeHs) {
std::vector<std::unique_ptr<RWMol>> mols;
try {
mols = RDKit::v2::MolsFromChemDrawBlock(filename, {sanitize, removeHs});
} catch (RDKit::BadFileException &e) {
PyErr_SetString(PyExc_IOError, e.what());
throw python::error_already_set();
} catch (RDKit::FileParseException &e) {
BOOST_LOG(rdWarningLog) << e.what() << std::endl;
} catch (...) {
}
python::list res;
for (auto &mol : mols) {
// take ownership of the data from the unique_ptr
ROMOL_SPTR sptr(static_cast<ROMol *>(mol.release()));
res.append(sptr);
}
return python::tuple(res);
}
python::tuple MolsFromChemDrawFileHelper(python::object cdxml, bool sanitize,
bool removeHs) {
auto mols = RDKit::v2::MolsFromChemDrawFile(pyObjectToString(cdxml), {sanitize, removeHs});
python::list res;
for (auto &mol : mols) {
// take ownership of the data from the unique_ptr
ROMOL_SPTR sptr(static_cast<ROMol *>(mol.release()));
res.append(sptr);
}
return python::tuple(res);
}
python::object ReactionsFromChemDrawFileHelper(const char *filename, bool sanitize,
bool removeHs) {
std::vector<std::unique_ptr<ChemicalReaction>> rxns;
try {
rxns = RDKit::v2::ChemDrawFileToChemicalReactions(filename, sanitize, removeHs);
} catch (RDKit::BadFileException &e) {
PyErr_SetString(PyExc_IOError, e.what());
throw python::error_already_set();
} catch (RDKit::FileParseException &e) {
BOOST_LOG(rdWarningLog) << e.what() << std::endl;
} catch (...) {
}
python::list res;
for (auto &rxn : rxns) {
// take ownership of the data from the unique_ptr
res.append(std::shared_ptr<ChemicalReaction>(rxn.release()));
}
return python::tuple(res);
}
python::object ReactionsFromChemDrawBlockHelper(python::object imolBlock, bool sanitize,
bool removeHs) {
std::istringstream inStream(pyObjectToString(imolBlock));
std::vector<std::unique_ptr<ChemicalReaction>> rxns;
try {
rxns = RDKit::v2::ChemDrawDataStreamToChemicalReactions(inStream, sanitize, removeHs);
} catch (RDKit::FileParseException &e) {
BOOST_LOG(rdWarningLog) << e.what() << std::endl;
} catch (...) {
}
python::list res;
for (auto &rxn : rxns) {
// take ownership of the data from the unique_ptr
res.append(std::shared_ptr<ChemicalReaction>(rxn.release()));
}
return python::tuple(res);
}
}
BOOST_PYTHON_MODULE(rdChemDraw) {
python::scope().attr("__doc__") =
"Module containing classes and functions for working with ChemDraw files.";
// Molecule Interface
std::string docString =
R"DOC(Extract all molecules from a ChemDraw file.
Note that the ChemDraw format is large and complex, the RDKit doesn't support
full functionality, just the base ones required for molecule and
reaction parsing.
ARGUMENTS:
- filename: the chemdraw filename (.cdx/.cdxml)
- sanitize: if True, sanitize the molecules [default True]
- removeHs: if True, convert explicit Hs into implicit Hs. [default True]
RETURNS:
a tuple of parsed Mol objects.)DOC";
python::def("MolsFromChemDrawFile", MolsFromChemDrawFileHelper,
(python::arg("filename"), python::arg("sanitize") = true,
python::arg("removeHs") = true),
docString.c_str());
docString =
R"DOC(Extract all molecules from a ChemDraw file.
Note that the ChemDraw format is large and complex, the RDKit doesn't support
full functionality, just the base ones required for molecule and
reaction parsing.
ARGUMENTS:
- block: the CDX/CDXML block
- sanitize: if True, sanitize the molecules [default True]
- removeHs: if True, convert explicit Hs into implicit Hs. [default True]
RETURNS:
a tuple of parsed Mol objects.)DOC";
python::def("MolsFromChemDrawBlock", MolsFromChemDrawBlockHelper,
(python::arg("block"), python::arg("sanitize") = true,
python::arg("removeHs") = true),
docString.c_str());
docString =
R"DOC(Extract all reactions from a ChemDraw file.
Note that the ChemDraw format is large and complex, the RDKit doesn't support
full functionality, just the base ones required for molecule and
reaction parsing.
ARGUMENTS:
- filename: the chemdraw filename (.cdx/.cdxml)
- sanitize: if True, sanitize the molecules [default True]
- removeHs: if True, convert explicit Hs into implicit Hs. [default True]
RETURNS:
a tuple of parsed ChemicalReaction objects.)DOC";
// Reaction Interface
python::def("ReactionsFromChemDrawFile", ReactionsFromChemDrawFileHelper,
(python::arg("filename"), python::arg("sanitize") = false,
python::arg("removeHs") = false),
docString.c_str());
docString =
R"DOC(Extract all reactions from a ChemDraw text block.
Note that the ChemDraw format is large and complex, the RDKit doesn't support
full functionality, just the base ones required for molecule and
reaction parsing.
ARGUMENTS:
- filename: the chemdraw filename (.cdx/.cdxml)
- sanitize: if True, sanitize the molecules [default True]
- removeHs: if True, convert explicit Hs into implicit Hs. [default True]
RETURNS:
a tuple of parsed ChemicalReaction objects.)DOC";
python::def(
"ReactionsFromChemDrawBlock", ReactionsFromChemDrawBlockHelper,
(python::arg("rxnblock"), python::arg("sanitize") = false,
python::arg("removeHs") = false),
docString.c_str());
python::enum_<v2::CDXFormat>("CDXFormat")
.value("CDX", v2::CDXFormat::CDX)
.value("CDXML", v2::CDXFormat::CDXML);
docString =
R"DOC(Convert a molecule into a chemdraw string using the specified format
ARGUMENTS:
- mol: the molecule to convert
- format: The ChemDraw format to use, CDXML/CDX [default CDXML]
RETURNS:
an iterator of parsed ChemicalReaction objects.)DOC";
python::def(
"MolToChemDrawBlock", v2::MolToChemDrawBlock,
(python::arg("mol"), python::arg("format")=v2::CDXFormat::CDXML),
docString.c_str());
}

View File

@@ -1,277 +0,0 @@
# Copyright (c) 2025 Glysade Inc
# All rights reserved.
#
# This file is part of the RDKit.
# The contents are covered by the terms of the BSD license
# which is included in the file license.txt, found at the root
# of the RDKit source tree.
import copy
import os
import sys
import unittest
from rdkit import Chem
from rdkit.Chem import rdChemDraw, rdChemDrawReaction
class TestChemDraw(unittest.TestCase):
def test_cdxml(self):
cdxml = """<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE CDXML SYSTEM "http://www.cambridgesoft.com/xml/cdxml.dtd" >
<CDXML
CreationProgram="ChemDraw JS 2.0.0.9"
Name="ACS Document 1996"
BoundingBox="94.75 178.16 154.89 211.17"
WindowPosition="0 0"
WindowSize="0 0"
FractionalWidths="yes"
InterpretChemically="yes"
ShowAtomQuery="yes"
ShowAtomStereo="no"
ShowAtomEnhancedStereo="yes"
ShowAtomNumber="no"
ShowResidueID="no"
ShowBondQuery="yes"
ShowBondRxn="yes"
ShowBondStereo="no"
ShowTerminalCarbonLabels="no"
ShowNonTerminalCarbonLabels="no"
HideImplicitHydrogens="no"
Magnification="666"
LabelFont="24"
LabelSize="10"
LabelFace="96"
CaptionFont="24"
CaptionSize="10"
HashSpacing="2.50"
MarginWidth="1.60"
LineWidth="0.60"
BoldWidth="2"
BondLength="14.40"
BondSpacing="18"
ChainAngle="120"
LabelJustification="Auto"
CaptionJustification="Left"
AminoAcidTermini="HOH"
ShowSequenceTermini="yes"
ShowSequenceBonds="yes"
ShowSequenceUnlinkedBranches="no"
ResidueWrapCount="40"
ResidueBlockCount="10"
ResidueZigZag="yes"
NumberResidueBlocks="no"
PrintMargins="36 36 36 36"
MacPrintInfo="0003000001200120000000000B6608A0FF84FF880BE309180367052703FC0002000001200120000000000B6608A0000100000064000000010001010100000001270F000100010000000000000000000000000002001901900000000000400000000000000000000100000000000000000000000000000000"
ChemPropName=""
ChemPropFormula="Chemical Formula: "
ChemPropExactMass="Exact Mass: "
ChemPropMolWt="Molecular Weight: "
ChemPropMOverZ="m/z: "
ChemPropAnalysis="Elemental Analysis: "
ChemPropBoilingPt="Boiling Point: "
ChemPropMeltingPt="Melting Point: "
ChemPropCritTemp="Critical Temp: "
ChemPropCritPres="Critical Pres: "
ChemPropCritVol="Critical Vol: "
ChemPropGibbs="Gibbs Energy: "
ChemPropLogP="Log P: "
ChemPropMR="MR: "
ChemPropHenry="Henry&apos;s Law: "
ChemPropEForm="Heat of Form: "
ChemProptPSA="tPSA: "
ChemPropID=""
ChemPropFragmentLabel=""
color="0"
bgcolor="1"
RxnAutonumberStart="1"
RxnAutonumberConditions="no"
RxnAutonumberStyle="Roman"
><colortable>
<color r="1" g="1" b="1"/>
<color r="0" g="0" b="0"/>
<color r="1" g="0" b="0"/>
<color r="1" g="1" b="0"/>
<color r="0" g="1" b="0"/>
<color r="0" g="1" b="1"/>
<color r="0" g="0" b="1"/>
<color r="1" g="0" b="1"/>
</colortable><fonttable>
<font id="24" charset="utf-8" name="Arial"/>
</fonttable><page
id="32"
BoundingBox="0 0 542 354"
Width="542"
Height="354"
HeaderPosition="36"
FooterPosition="36"
PageOverlap="0"
PrintTrimMarks="yes"
HeightPages="1"
WidthPages="1"
DrawingSpace="poster"
><fragment
id="10"
BoundingBox="94.75 178.16 154.89 211.17"
Z="4"
><n
id="7"
p="95.05 187.47"
Z="1"
AS="N"
/><n
id="9"
p="95.05 201.87"
Z="3"
AS="N"
/><n
id="11"
p="106.31 210.84"
Z="5"
AS="N"
/><n
id="13"
p="120.35 207.64"
Z="7"
AS="N"
/><n
id="15"
p="126.59 194.67"
Z="9"
AS="N"
/><n
id="17"
p="120.35 181.69"
Z="11"
AS="N"
/><n
id="19"
p="106.31 178.49"
Z="13"
AS="N"
/><n
id="28"
p="140.99 194.67"
Z="22"
NodeType="Nickname"
NeedsClean="yes"
AS="N"
><fragment
id="33"
><n
id="34"
p="148.17 207.09"
Element="8"
NumHydrogens="0"
/><n
id="35"
p="162.52 207.09"
/><n
id="36"
p="176.87 207.09"
/><n
id="37"
p="169.69 194.67"
/><n
id="38"
p="169.69 219.52"
/><n
id="39"
p="140.99 194.67"
/><n
id="40"
p="148.17 182.24"
Element="8"
NumHydrogens="0"
/><n
id="41"
p="126.64 194.67"
NodeType="ExternalConnectionPoint"
/><b
id="42"
B="39"
E="40"
Order="2"
/><b
id="43"
B="35"
E="38"
/><b
id="44"
B="35"
E="36"
/><b
id="45"
B="35"
E="37"
/><b
id="46"
B="34"
E="35"
/><b
id="47"
B="34"
E="39"
/><b
id="48"
B="41"
E="39"
/></fragment><t
p="137.66 198.28"
BoundingBox="137.66 189.64 154.89 198.28"
LabelJustification="Left"
LabelAlignment="Left"
><s font="24" size="9.95" color="0" face="96">Boc</s></t></n><b
id="21"
Z="15"
B="7"
E="9"
BS="N"
/><b
id="22"
Z="16"
B="9"
E="11"
BS="N"
/><b
id="23"
Z="17"
B="11"
E="13"
BS="N"
/><b
id="24"
Z="18"
B="13"
E="15"
BS="N"
/><b
id="25"
Z="19"
B="15"
E="17"
BS="N"
/><b
id="26"
Z="20"
B="17"
E="19"
BS="N"
/><b
id="27"
Z="21"
B="19"
E="7"
BS="N"
/><b
id="29"
Z="23"
B="15"
E="28"
BS="N"
/></fragment></page></CDXML>"""
mols = rdChemDraw.MolsFromChemDraw(cdxml)
self.assertEqual(len(mols), 1)
self.assertEqual(Chem.MolToSmiles(mols[0]), "CC(C)(C)OC(=O)C1CCCCCC1")

View File

@@ -1,227 +0,0 @@
//
// Copyright (c) 2024, Glysade Inc
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Novartis Institutes for BioMedical Research Inc.
// nor the names of its contributors may be used to endorse or promote
// products derived from this software without specific prior written
// permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//#include "node.h"
#include "utils.h"
#include "fragment.h"
namespace RDKit {
namespace ChemDraw {
bool parseBond(RWMol &mol, unsigned int fragmentId, CDXBond &bond,
PageData &pagedata) {
int bond_id = bond.GetObjectID();
Atom *start_atom = pagedata.atomIds[bond.m_beginNodeID];
Atom *end_atom = pagedata.atomIds[bond.m_endNodeID];
if ((!start_atom || !end_atom)) {
BOOST_LOG(rdErrorLog) << "Bad bond in CDXML skipping fragment "
<< fragmentId << "..." << std::endl;
return false;
}
Bond::BondType order = Bond::UNSPECIFIED;
std::unique_ptr<QueryBond> qb;
switch (bond.m_bondOrder) {
case kCDXBondOrder_Single:
order = Bond::BondType::SINGLE;
break;
case kCDXBondOrder_Double:
order = Bond::BondType::DOUBLE;
break;
case kCDXBondOrder_Triple:
order = Bond::BondType::TRIPLE;
break;
case kCDXBondOrder_Quadruple:
order = Bond::BondType::QUADRUPLE;
break;
case kCDXBondOrder_Quintuple:
order = Bond::BondType::QUINTUPLE;
break;
case kCDXBondOrder_Sextuple:
order = Bond::BondType::HEXTUPLE;
break;
case kCDXBondOrder_OneHalf:
order = Bond::BondType::AROMATIC;
start_atom->setIsAromatic(true);
end_atom->setIsAromatic(true);
break;
case kCDXBondOrder_TwoHalf:
order = Bond::BondType::TWOANDAHALF;
break;
case kCDXBondOrder_ThreeHalf:
order = Bond::BondType::THREEANDAHALF;
break;
case kCDXBondOrder_FourHalf:
order = Bond::BondType::FOURANDAHALF;
break;
case kCDXBondOrder_FiveHalf:
order = Bond::BondType::FIVEANDAHALF;
break;
case kCDXBondOrder_Dative:
order = Bond::BondType::DATIVE;
break;
case kCDXBondOrder_Ionic:
order = Bond::BondType::IONIC;
break;
case kCDXBondOrder_SingleOrDouble: {
order = Bond::BondType::SINGLE;
qb = std::make_unique<QueryBond>();
qb->setQuery(makeSingleOrDoubleBondQuery());
break;
}
case kCDXBondOrder_SingleOrAromatic: {
order = Bond::BondType::SINGLE;
qb = std::make_unique<QueryBond>();
qb->setQuery(makeSingleOrAromaticBondQuery());
break;
}
case kCDXBondOrder_DoubleOrAromatic: {
order = Bond::BondType::DOUBLE;
qb = std::make_unique<QueryBond>();
qb->setQuery(makeDoubleOrAromaticBondQuery());
break;
}
case kCDXBondOrder_Any: {
qb = std::make_unique<QueryBond>();
qb->setQuery(makeBondNullQuery());
break;
}
case kCDXBondOrder_Hydrogen:
BOOST_LOG(rdErrorLog)
<< "Unhandled bond order Hydrogen, skipping fragment" << std::endl;
return false;
case kCDXBondOrder_ThreeCenter:
BOOST_LOG(rdErrorLog)
<< "Unhandled bond order ThreeCenter, skipping fragment" << std::endl;
return false;
case kCDXBondOrder_Half:
BOOST_LOG(rdErrorLog)
<< "Unhandled bond order Half, skipping fragment" << std::endl;
return false;
default:
BOOST_LOG(rdErrorLog) << "Bad bond, skipping fragment" << std::endl;
return false;
};
// The RDKit only supports one direction for wedges so
// normalize it
bool swap_bond_ends = false;
switch (bond.m_display) {
case kCDXBondDisplay_Solid:
break;
case kCDXBondDisplay_Dash:
break;
case kCDXBondDisplay_Hash:
break;
case kCDXBondDisplay_WedgedHashBegin:
break;
case kCDXBondDisplay_WedgedHashEnd:
swap_bond_ends = true;
break;
case kCDXBondDisplay_Bold:
break;
case kCDXBondDisplay_WedgeBegin:
break;
case kCDXBondDisplay_WedgeEnd:
swap_bond_ends = true;
break;
case kCDXBondDisplay_Wavy:
break;
case kCDXBondDisplay_HollowWedgeBegin:
break;
case kCDXBondDisplay_HollowWedgeEnd:
break;
case kCDXBondDisplay_WavyWedgeBegin:
break;
case kCDXBondDisplay_WavyWedgeEnd:
break;
case kCDXBondDisplay_Dot:
break;
case kCDXBondDisplay_DashDot:
break;
case kCDXBondDisplay_DottedHydrogen:
break;
}
unsigned int bondIdx = 0;
auto startIdx = start_atom->getIdx();
auto endIdx = end_atom->getIdx();
if (swap_bond_ends) std::swap(startIdx, endIdx);
if (qb) {
qb->setBeginAtomIdx(startIdx);
qb->setEndAtomIdx(endIdx);
bondIdx = mol.addBond(qb.release(), true) - 1;
} else {
bondIdx = mol.addBond(startIdx, endIdx, order) - 1;
}
Bond *bnd = mol.getBondWithIdx(bondIdx);
if (order == Bond::BondType::AROMATIC) {
bnd->setIsAromatic(true);
bnd->getBeginAtom()->setIsAromatic(true);
bnd->getEndAtom()->setIsAromatic(true);
}
bnd->setProp(CDX_BOND_ID, bond.GetObjectID());
switch (bond.m_display) {
case kCDXBondDisplay_WedgedHashBegin:
case kCDXBondDisplay_WedgedHashEnd: {
bnd->setBondDir(Bond::BondDir::BEGINDASH);
bnd->setProp(common_properties::_MolFileBondCfg, 3);
} break;
case kCDXBondDisplay_WedgeBegin:
case kCDXBondDisplay_WedgeEnd: {
bnd->setBondDir(Bond::BondDir::BEGINWEDGE);
bnd->setProp(common_properties::_MolFileBondCfg, 1);
} break;
case kCDXBondDisplay_Wavy: {
switch (order) {
case Bond::BondType::SINGLE:
bnd->setBondDir(Bond::BondDir::UNKNOWN);
bnd->setProp(common_properties::_MolFileBondCfg, 2);
break;
case Bond::BondType::DOUBLE:
bnd->setBondDir(Bond::BondDir::EITHERDOUBLE);
bnd->setStereo(Bond::STEREOANY);
break;
default:
BOOST_LOG(rdWarningLog)
<< "ignoring Wavy bond set on a non double bond id: " << bond_id
<< std::endl;
}
break;
default:
break;
}
}
return true;
}
}
} // namespace RDKit

View File

@@ -1,52 +0,0 @@
//
// Copyright (c) 2024, Glysade Inc
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Novartis Institutes for BioMedical Research Inc.
// nor the names of its contributors may be used to endorse or promote
// products derived from this software without specific prior written
// permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
#ifndef CHEMDRAW_BOND_H
#define CHEMDRAW_BOND_H
#include <GraphMol/RDKitBase.h>
#include <GraphMol/QueryAtom.h>
#include <GraphMol/QueryOps.h>
#include "ChemDrawStartInclude.h"
#include "chemdraw/CDXStdObjects.h"
#include "ChemDrawEndInclude.h"
#include "utils.h"
#include "fragment.h"
namespace RDKit {
namespace ChemDraw {
bool parseBond(RWMol &mol, unsigned int fragmentId, CDXBond &bond,
PageData &pagedata);
}
}
#endif

View File

@@ -1,106 +0,0 @@
//
// Copyright (c) 2024, Glysade Inc
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Novartis Institutes for BioMedical Research Inc.
// nor the names of its contributors may be used to endorse or promote
// products derived from this software without specific prior written
// permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//#include "node.h"
#include "utils.h"
#include "bracket.h"
namespace RDKit {
namespace ChemDraw {
// This is currently unimplemented waiting on full bracket support in the rdkit
// or support for expansion inside the RDChemDrawLib
bool parseBracket(CDXBracketedGroup &bracket, PageData &/*pagedata*/) {
// Get the contained atoms/bonds in the bracket
for (auto &attachment : bracket.ContainedObjects()) {
CDXDatumID childid = (CDXDatumID)attachment.second->GetTag();
if (childid == kCDXObj_BracketAttachment) {
CDXBracketAttachment &bracketattachment =
(CDXBracketAttachment &)(*attachment.second);
for (auto &bracketdata : bracketattachment.ContainedObjects()) {
CDXDatumID bracketid = (CDXDatumID)bracketdata.second->GetTag();
if (bracketid == kCDXObj_CrossingBond) {
//CDXCrossingBond &crossingbond =
// (CDXCrossingBond &)(*attachment.second);
// XX unimplmented crossingbond.m_bondID; // bond that crosses brackets
// XX unimplmented crossingbond.m_innerAtomID; // atom within brackets
}
}
}
}
// SubstanceGroup sgroup;
switch (bracket.m_usage) {
case kCDXBracketUsage_Unspecified:
break;
case kCDXBracketUsage_Anypolymer:
break;
case kCDXBracketUsage_Component:
break;
case kCDXBracketUsage_Copolymer:
break;
case kCDXBracketUsage_CopolymerAlternating:
break;
case kCDXBracketUsage_CopolymerBlock:
break;
case kCDXBracketUsage_CopolymerRandom:
break;
case kCDXBracketUsage_Crosslink:
break;
case kCDXBracketUsage_Generic:
break;
case kCDXBracketUsage_Graft:
break;
case kCDXBracketUsage_Mer:
case kCDXBracketUsage_MixtureOrdered:
break;
case kCDXBracketUsage_MixtureUnordered:
break;
case kCDXBracketUsage_Modification:
break;
case kCDXBracketUsage_Monomer: // repeat head-to-tail, head-to-head (check
// flip)
break;
case kCDXBracketUsage_MultipleGroup:
break;
case kCDXBracketUsage_MultipleGroupOverride:
break;
case kCDXBracketUsage_SRU: // Structural repeating unit, repeat pattern
// head-to-tail (default) head-to-head (check
// flip?)
break;
case kCDXBracketUsage_Unused1:
break;
case kCDXBracketUsage_Unused2:
break;
}
return true;
}
}
} // namespace RDKit

View File

@@ -1,52 +0,0 @@
//
// Copyright (c) 2024, Glysade Inc
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Novartis Institutes for BioMedical Research Inc.
// nor the names of its contributors may be used to endorse or promote
// products derived from this software without specific prior written
// permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
#ifndef CHEMDRAW_BRACKET_H
#define CHEMDRAW_BRACKET_H
#include <GraphMol/RDKitBase.h>
#include <GraphMol/QueryAtom.h>
#include <GraphMol/QueryOps.h>
#include "ChemDrawStartInclude.h"
#include "chemdraw/CDXStdObjects.h"
#include "ChemDrawEndInclude.h"
#include "utils.h"
#include "fragment.h"
namespace RDKit {
namespace ChemDraw {
bool parseBracket(CDXBracketedGroup &bracket, PageData &pagedata);
}
}
#endif

View File

@@ -1,359 +0,0 @@
//
// Copyright (c) 2024 Glysade Inc and other RDkit contributors
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Novartis Institutes for BioMedical Research Inc.
// nor the names of its contributors may be used to endorse or promote
// products derived from this software without specific prior written
// permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
#include <string>
#include <iostream>
#include <fstream>
#include <sstream>
#include "ChemDrawStartInclude.h"
#include "chemdraw/CDXMLParser.h"
#include "chemdraw/CDXStdObjects.h"
#include "ChemDrawEndInclude.h"
#include "bracket.h"
#include "chemdraw.h"
#include "chemdraw_doc.h"
#include "fragment.h"
#include "reaction.h"
#include "utils.h"
#include <RDGeneral/BadFileException.h>
#include <GraphMol/MolOps.h>
#include <GraphMol/QueryAtom.h>
#include <GraphMol/QueryBond.h>
#include <GraphMol/QueryOps.h>
#include <GraphMol/ChemTransforms/MolFragmenter.h>
#include <GraphMol/FileParsers/MolFileStereochem.h>
#include <GraphMol/Atropisomers.h>
#include <boost/algorithm/string.hpp>
#include <filesystem>
// #define DEBUG 1
namespace {
using namespace RDKit;
using namespace RDKit::v2;
using namespace RDKit::ChemDraw;
// The parsing of fragments needed to be moved to a recursive function since
// they may be embedded further in the document, i.e. a group may hold multiple
// fragments
//
// Additionally, a grouped_fragments map is included to group fragments together
// for the purposes of reactions.
//
// Ungrouped fragments will end up as vectors of size 1 in the grouped_fragement
// list. The reaction schemes in the CDXML docs appear to use the fragment id
// for ungrouped fragments and the grouped id for grouped fragments, so the
// grouped_fragments holds both for ease of bookkeeping.
void visit_children(
CDXObject &node, PageData &pagedata,
int &missing_frag_id, // if we don't have a fragment id, start at -1 and
// decrement
double bondLength, // bond length of the document for assigning coordinates
const ChemDrawParserParams &params, // parser parameters
int group_id = -1) { // current group id for this set of subnodes
MolzipParams molzip_params;
molzip_params.label = MolzipLabel::AtomProperty;
molzip_params.atomProperty = FUSE_LABEL;
molzip_params.enforceValenceRules = false;
for (auto frag : node.ContainedObjects()) {
CDXDatumID id = (CDXDatumID)frag.second->GetTag();
if (id == kCDXObj_Fragment) {
std::unique_ptr<RWMol> mol = std::make_unique<RWMol>();
if (!parseFragment(*mol, (CDXFragment &)(*frag.second), pagedata,
missing_frag_id)) {
continue;
}
unsigned int frag_id = mol->getProp<int>(CDX_FRAG_ID);
pagedata.fragmentLookup[frag_id] = pagedata.mols.size();
if (group_id != -1) {
pagedata.groupedFragments[group_id].push_back(frag_id);
} else {
pagedata.groupedFragments[frag_id].push_back(frag_id);
}
if (mol->hasProp(NEEDS_FUSE)) {
mol->clearProp(NEEDS_FUSE);
std::unique_ptr<ROMol> fused;
try {
replaceFragments(*mol);
fused = molzip(*mol, molzip_params);
} catch (Invar::Invariant &) {
BOOST_LOG(rdWarningLog) << "Failed fusion of fragment skipping... "
<< frag_id << std::endl;
// perhaps have an option to extract all fragments?
// mols.push_back(std::move(mol));
continue;
}
fused->setProp<int>(CDX_FRAG_ID, static_cast<int>(frag_id));
pagedata.mols.emplace_back(dynamic_cast<RWMol *>(fused.release()));
} else {
pagedata.mols.push_back(std::move(mol));
}
RWMol *res = pagedata.mols.back().get();
auto conf = std::make_unique<Conformer>(res->getNumAtoms());
conf->set3D(false);
bool hasConf = false;
bool is3D = false;
for (auto &atm : res->atoms()) {
RDGeom::Point3D p{0.0, 0.0, 0.0};
if (atm->hasProp(CDX_ATOM_POS)) {
hasConf = true;
const std::vector<double> coord =
atm->getProp<std::vector<double>>(CDX_ATOM_POS);
p.x = coord[0];
p.y = -1 * coord[1]; // CDXML uses an inverted coordinate
// system, so we need to reverse that
if (coord.size() == 2) {
p.z = 0.0;
} else {
p.z = coord[2];
is3D = true;
}
}
conf->setAtomPos(atm->getIdx(), p);
atm->clearProp(CDX_ATOM_POS);
}
if (hasConf) {
if (!is3D) {
scaleBonds(*res, *conf, RDKIT_DEPICT_BONDLENGTH, bondLength);
}
conf->set3D(is3D);
auto confidx = res->addConformer(conf.release());
if (is3D) {
res->updatePropertyCache(false);
MolOps::assignChiralTypesFrom3D(*res, confidx, true);
} else {
MolOps::assignChiralTypesFromBondDirs(*res, confidx, true);
}
Atropisomers::detectAtropisomerChirality(*res,
&res->getConformer(confidx));
} else { // no Conformer
Atropisomers::detectAtropisomerChirality(*res, nullptr);
}
// now that atom stereochem has been perceived, the wedging
// information is no longer needed, so we clear
// single bond dir flags:
MolOps::clearSingleBondDirFlags(*res);
if (params.sanitize) {
try {
if (params.removeHs) {
// Bond stereo detection must happen before H removal, or
// else we might be removing stereogenic H atoms in double
// bonds (e.g. imines). But before we run stereo detection,
// we need to run mol cleanup so don't have trouble with
// e.g. nitro groups. Sadly, this a;; means we will find
// run both cleanup and ring finding twice (a fast find
// rings in bond stereo detection, and another in
// sanitization's SSSR symmetrization).
unsigned int failedOp = 0;
MolOps::sanitizeMol(*res, failedOp, MolOps::SANITIZE_CLEANUP);
MolOps::detectBondStereochemistry(*res);
MolOps::removeHs(*res);
} else {
MolOps::sanitizeMol(*res);
MolOps::detectBondStereochemistry(*res);
}
} catch (...) {
BOOST_LOG(rdWarningLog)
<< "CDXMLParser: failed sanitizing skipping fragment " << frag_id
<< std::endl;
pagedata.mols.pop_back();
continue;
}
MolOps::assignStereochemistry(*res, true, true, true);
// Sometimes ChemDraw just marks with R and S, so let's assign
// these as long as they were not already determined
checkChemDrawTetrahedralGeometries(*res);
} else {
MolOps::detectBondStereochemistry(*res);
}
} else if (id == kCDXObj_ReactionScheme) { // get the reaction info
CDXReactionScheme &scheme = (CDXReactionScheme &)(*frag.second);
pagedata.schemes.emplace_back(scheme);
/*
int scheme_id = scheme.GetObjectID(); //frag.second.template
get<int>("<xmlattr>.id", -1); for (auto &rxnNode :
scheme.ContainedObjects()) { CDXDatumID type_id =
(CDXDatumID)rxnNode.second->GetTag(); if (type_id == kCDXObj_ReactionStep)
{ CDXReactionStep &step = (CDXReactionStep&)(*rxnNode.second); auto
step_id = step.GetObjectID(); SchemeInfo scheme; scheme.scheme_id =
scheme_id; scheme.step_id = step_id; scheme.ReactionStepProducts =
step.m_products; scheme.ReactionStepReactants = step.m_reactants;
scheme.ReactionStepObjectsBelowArrow = step.m_objectsBelowArrow;
scheme.ReactionStepAtomMap = step.m_aamap;
schemes.push_back(scheme);
}
}
*/
} else if (id == kCDXObj_Group) {
CDXGroup &group = (CDXGroup &)(*frag.second);
group_id = frag.second->GetObjectID();
visit_children(group, pagedata, missing_frag_id, bondLength, params,
group_id);
} else if (id == kCDXObj_BracketedGroup) {
CDXBracketedGroup &bracketgroup = (CDXBracketedGroup &)(*frag.second);
parseBracket(bracketgroup, pagedata);
}
}
}
std::unique_ptr<CDXDocument> streamToCDXDocument(std::istream &inStream,
CDXFormat format) {
if (format == CDXFormat::CDXML) {
CDXMLParser parser;
// populate tree structure pt
std::string data = std::string(std::istreambuf_iterator<char>(inStream),
std::istreambuf_iterator<char>());
const bool HaveAllXml = true;
if (XML_STATUS_OK != parser.XML_Parse(data.c_str(),
static_cast<int>(data.size()),
HaveAllXml)) {
auto error = XML_GetErrorCode(parser);
BOOST_LOG(rdErrorLog) << "Failed parsing XML with error code " << error;
throw FileParseException("Bad Input File");
}
return parser.ReleaseDocument();
} else {
throw FileParseException("Can't handle cdx yet");
return std::unique_ptr<CDXDocument>();
}
}
// may raise FileParseException
std::vector<std::unique_ptr<RWMol>> molsFromCDXMLDataStream(
std::istream &inStream, const ChemDrawParserParams &params) {
std::unique_ptr<CDXDocument> document =
streamToCDXDocument(inStream, params.format);
if (!document) {
// error
return std::vector<std::unique_ptr<RWMol>>();
}
PageData pagedata;
auto bondLength = document->m_bondLength;
int missing_frag_id = -1;
for (auto node : document->ContainedObjects()) {
CDXDatumID id = (CDXDatumID)node.second->GetTag();
switch (id) {
case kCDXObj_Page:
visit_children(*node.second, pagedata, missing_frag_id, bondLength,
params);
break;
default:
break;
}
}
for (auto &scheme : pagedata.schemes) {
scheme.set_reaction_steps(pagedata.groupedFragments, pagedata.mols);
}
pagedata.clearCDXProps();
return std::move(pagedata.mols);
}
} // namespace
namespace RDKit {
namespace ChemDraw {
std::unique_ptr<CDXDocument> ChemDrawToDocument(std::istream &inStream,
CDXFormat format) {
return streamToCDXDocument(inStream, format);
}
std::unique_ptr<CDXDocument> ChemDrawToDocument(const std::string &filename) {
std::fstream chemdrawfile(filename);
std::string ext = std::filesystem::path(filename).extension().string();
boost::algorithm::to_lower(ext);
if (ext == ".cdxml")
return streamToCDXDocument(chemdrawfile, CDXFormat::CDXML);
else if (ext == ".cdx") {
return streamToCDXDocument(chemdrawfile, CDXFormat::CDX);
}
std::string msg =
std::string("Unknoen filetype ") +
(std::string)std::filesystem::path(filename).extension().string();
throw FileParseException(msg.c_str());
}
}
namespace v2 {
std::vector<std::unique_ptr<RWMol>> MolsFromChemDrawDataStream(
std::istream &inStream, const ChemDrawParserParams &params) {
auto chemdrawmols = molsFromCDXMLDataStream(inStream, params);
std::vector<std::unique_ptr<RWMol>> mols;
mols.reserve(chemdrawmols.size());
for (auto &mol : chemdrawmols) {
RWMol *m = (RWMol *)mol.release();
mols.push_back(std::unique_ptr<RWMol>(m));
}
return mols;
}
std::vector<std::unique_ptr<RWMol>> MolsFromChemDrawBlock(
const std::string &block, const ChemDrawParserParams &params) {
std::stringstream ss;
ss << block;
return MolsFromChemDrawDataStream(ss, params);
}
std::vector<std::unique_ptr<RWMol>> MolsFromChemDrawFile(
const std::string &filename, const ChemDrawParserParams &params) {
CDXMLParser parser;
std::vector<std::unique_ptr<RWMol>> mols;
std::fstream chemdrawfile(filename); // FIX ME CHECK CDX versus CDXML
if (!chemdrawfile) {
throw BadFileException(filename + " does not exist");
return mols;
}
auto chemdrawmols = molsFromCDXMLDataStream(chemdrawfile, params);
mols.reserve(chemdrawmols.size());
for (auto &mol : chemdrawmols) {
RWMol *m = (RWMol *)mol.release();
mols.push_back(std::unique_ptr<RWMol>(m));
}
return mols;
}
}
} // namespace RDKit

View File

@@ -1,69 +0,0 @@
//
// Copyright (c) 2024, Glysade Inc
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Novartis Institutes for BioMedical Research Inc.
// nor the names of its contributors may be used to endorse or promote
// products derived from this software without specific prior written
// permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
#ifndef RDKIT_CHEMDRAW_H
#define RDKIT_CHEMDRAW_H
#include <RDGeneral/export.h>
#include <GraphMol/RDKitBase.h>
#include <GraphMol/ChemReactions/Reaction.h>
#include <string>
namespace RDKit {
namespace v2 {
enum class CDXFormat {
CDX = 1,
CDXML = 2
};
struct RDKIT_RDCHEMDRAWLIB_EXPORT ChemDrawParserParams {
bool sanitize = true;
bool removeHs = true;
CDXFormat format = CDXFormat::CDXML;
};
std::vector<std::unique_ptr<RWMol>> RDKIT_RDCHEMDRAWLIB_EXPORT
MolsFromChemDrawDataStream(std::istream &inStream,
const ChemDrawParserParams &params = ChemDrawParserParams());
std::vector<std::unique_ptr<RWMol>> RDKIT_RDCHEMDRAWLIB_EXPORT
MolsFromChemDrawFile(const std::string &filename,
const ChemDrawParserParams &params = ChemDrawParserParams());
std::vector<std::unique_ptr<RWMol>> RDKIT_RDCHEMDRAWLIB_EXPORT
MolsFromChemDrawBlock(const std::string &block,
const ChemDrawParserParams &params = ChemDrawParserParams());
std::string RDKIT_RDCHEMDRAWLIB_EXPORT
MolToChemDrawBlock(const ROMol &mol, CDXFormat format = CDXFormat::CDXML);
}
} // namespace RDKit
#endif

View File

@@ -1,51 +0,0 @@
//
// Copyright (c) 2025, Glysade Inc
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Novartis Institutes for BioMedical Research Inc.
// nor the names of its contributors may be used to endorse or promote
// products derived from this software without specific prior written
// permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
#ifndef RDKIT_CHEMDRAW_DOC_H
#define RDKIT_CHEMDRAW_DOC_H
#include "chemdraw.h"
#include "ChemDrawStartInclude.h"
#include "chemdraw/CDXStdObjects.h"
#include "ChemDrawEndInclude.h"
namespace RDKit {
namespace ChemDraw {
std::unique_ptr<CDXDocument> RDKIT_RDCHEMDRAWLIB_EXPORT
ChemDrawToDocument(std::istream &inStream, v2::CDXFormat format);
std::unique_ptr<CDXDocument> RDKIT_RDCHEMDRAWLIB_EXPORT
ChemDrawToDocument(const std::string &filename);
}
} // namespace RDKit
#endif

View File

@@ -1,167 +0,0 @@
//
// Copyright (c) 2024, Glysade Inc
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Novartis Institutes for BioMedical Research Inc.
// nor the names of its contributors may be used to endorse or promote
// products derived from this software without specific prior written
// permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
#include "chemdraw.h"
#include "chemdrawreaction.h"
#include "reaction.h"
#include "utils.h"
#include <GraphMol/QueryOps.h>
#include <GraphMol/ChemReactions/SanitizeRxn.h>
#include <GraphMol/ChemReactions/ReactionUtils.h>
#include <GraphMol/ChemTransforms/ChemTransforms.h>
namespace RDKit {
using namespace RDKit::v2;
using namespace RDKit::ChemDraw;
// ChemDraw reaction API
// Convert reaction information to RDKIT reactions
namespace {
void make_query_atoms(RWMol &mol) {
for (auto &atom : mol.atoms()) {
QueryOps::replaceAtomWithQueryAtom(&mol, atom);
}
}
void add_template(const std::string &prop, std::map<int, ROMOL_SPTR> &templates,
std::unique_ptr<RWMol> &mol) {
auto reactant_idx = mol->getProp<int>(prop);
if (templates.find(reactant_idx) != templates.end()) {
templates[reactant_idx] =
ROMOL_SPTR(combineMols(*templates[reactant_idx], *mol));
} else {
templates[reactant_idx] = ROMOL_SPTR(std::move(mol));
}
}
} // namespace
namespace v2 {
//! Parse a text stream with ChemDraw data into a ChemicalReaction
std::vector<std::unique_ptr<ChemicalReaction>>
ChemDrawDataStreamToChemicalReactions(std::istream &inStream, bool sanitize,
bool removeHs) {
ChemDrawParserParams params;
params.sanitize = sanitize;
params.removeHs = removeHs;
auto mols = MolsFromChemDrawDataStream(inStream, params);
std::vector<std::unique_ptr<ChemicalReaction>> result;
std::map<std::pair<unsigned int, unsigned int>, std::vector<unsigned int>>
schemes;
std::set<unsigned int> used;
std::map<int, ROMOL_SPTR> reactant_templates;
std::map<int, ROMOL_SPTR> product_templates;
std::map<int, ROMOL_SPTR> agent_templates;
for (size_t i = 0; i < mols.size(); ++i) {
unsigned int step = 0;
unsigned int scheme = 0;
if (mols[i]->getPropIfPresent(CDX_SCHEME_ID, scheme) &&
mols[i]->getPropIfPresent(CDX_STEP_ID, step)) {
auto schemestep = std::pair<unsigned int, unsigned int>(scheme, step);
schemes[schemestep].push_back(i);
}
}
if (schemes.empty()) {
return result;
}
for (const auto &scheme : schemes) {
// convert atoms to queries:
ChemicalReaction *res = new ChemicalReaction;
result.push_back(std::unique_ptr<ChemicalReaction>(res));
for (auto idx : scheme.second) {
CHECK_INVARIANT(
used.find(idx) == used.end(),
"Fragment used in twice in one or more reactions, this shouldn't happen");
if (mols[idx]->hasProp(CDX_REAGENT_ID)) {
used.insert(idx);
make_query_atoms(*mols[idx]);
add_template(CDX_REAGENT_ID, reactant_templates, mols[idx]);
} else if (mols[idx]->hasProp(CDX_AGENT_ID)) {
used.insert(idx);
make_query_atoms(*mols[idx]);
add_template(CDX_AGENT_ID, agent_templates, mols[idx]);
} else if (mols[idx]->hasProp(CDX_PRODUCT_ID)) {
used.insert(idx);
make_query_atoms(*mols[idx]);
add_template(CDX_PRODUCT_ID, product_templates, mols[idx]);
}
}
for (auto reactant : reactant_templates) {
res->addReactantTemplate(reactant.second);
}
for (auto reactant : agent_templates) {
res->addAgentTemplate(reactant.second);
}
for (auto reactant : product_templates) {
res->addProductTemplate(reactant.second);
}
updateProductsStereochem(res);
// ChemDraw-based reactions do not have implicit properties
res->setImplicitPropertiesFlag(false);
if (!sanitize) { // we still need to fix the reaction for smarts style
// matching
unsigned int failed;
RxnOps::sanitizeRxn(
*res, failed,
RxnOps::SANITIZE_ADJUST_REACTANTS | RxnOps::SANITIZE_ADJUST_PRODUCTS,
RxnOps::MatchOnlyAtRgroupsAdjustParams());
}
}
return result;
}
std::vector<std::unique_ptr<ChemicalReaction>> ChemDrawToChemicalReactions(
const std::string &rxnBlock, bool sanitize, bool removeHs) {
std::istringstream inStream(rxnBlock);
return ChemDrawDataStreamToChemicalReactions(inStream, sanitize, removeHs);
}
std::vector<std::unique_ptr<ChemicalReaction>> ChemDrawFileToChemicalReactions(
const std::string &fName, bool sanitize, bool removeHs) {
std::ifstream inStream(fName.c_str());
std::vector<std::unique_ptr<ChemicalReaction>> res;
;
if (!inStream || inStream.bad()) {
return res;
}
if (!inStream.eof()) {
return ChemDrawDataStreamToChemicalReactions(inStream, sanitize, removeHs);
}
return res;
}
}
} // namespace RDKit

View File

@@ -1,63 +0,0 @@
//
// Copyright (c) 2025, Glysade Inc
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Novartis Institutes for BioMedical Research Inc.
// nor the names of its contributors may be used to endorse or promote
// products derived from this software without specific prior written
// permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
#ifndef RDKIT_CHEMDRAW_REACTION_H
#define RDKIT_CHEMDRAW_REACTION_H
#include <RDGeneral/export.h>
#include <GraphMol/RDKitBase.h>
#include <GraphMol/ChemReactions/Reaction.h>
#include <string>
namespace RDKit
{
namespace v2 {
//---------------------------------------------------------------------------
//! \name Chemdraw rxn Support
///@{
//! Parse text in ChemDraw rxn format into a vector of ChemicalReactions
RDKIT_RDCHEMDRAWREACTIONLIB_EXPORT std::vector<std::unique_ptr<ChemicalReaction>>
ChemDrawToChemicalReactions(const std::string &rxnBlock, bool sanitize = false,
bool removeHs = false);
//! Parse a file in ChemDraw rxn format into a vector of ChemicalReactions
RDKIT_RDCHEMDRAWREACTIONLIB_EXPORT std::vector<std::unique_ptr<ChemicalReaction>>
ChemDrawFileToChemicalReactions(const std::string &fileName, bool sanitize = false,
bool removeHs = false);
//! Parse a text stream in ChemDraw rxn format into a vector of ChemicalReactions
RDKIT_RDCHEMDRAWREACTIONLIB_EXPORT std::vector<std::unique_ptr<ChemicalReaction>>
ChemDrawDataStreamToChemicalReactions(std::istream &rxnStream,
bool sanitize = false,
bool removeHs = false);
}
} // namespace RDKit
#endif

View File

@@ -1,586 +0,0 @@
//
// Copyright (c) 2024, Glysade Inc
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Novartis Institutes for BioMedical Research Inc.
// nor the names of its contributors may be used to endorse or promote
// products derived from this software without specific prior written
// permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
#include "fragment.h"
#include "bond.h"
#include "node.h"
namespace RDKit {
namespace ChemDraw {
namespace {
const char *sequenceTypeToName(CDXSeqType seqtype) {
switch (seqtype) {
case kCDXSeqType_Unknown:
return "Unknown";
case kCDXSeqType_Peptide:
return "Peptide (Helm)"; // HELM peptides
case kCDXSeqType_Peptide1:
return "Peptide1 (Single Letter Amino Acid)"; // Single letter amino
// acids (Legacy biopolymer
// support)
case kCDXSeqType_Peptide3:
return "Peptide3 (Three letter amino acid)"; // Three letter amino acids
// (Legacy biopolymer
// support)
case kCDXSeqType_DNA:
return "DNA";
case kCDXSeqType_RNA:
return "RNA";
case kCDXSeqType_Biopolymer:
return "Biopolymer";
default:
return "";
}
}
} // namespace
bool parseFragment(RWMol &mol, CDXFragment &fragment, PageData &pagedata,
int &missingFragId, int externalAttachment) {
int frag_id = fragment.GetObjectID();
if (fragment.m_sequenceType != kCDXSeqType_Unknown) {
BOOST_LOG(rdWarningLog)
<< "Unhandled chemdraw sequence type "
<< sequenceTypeToName(fragment.m_sequenceType) << std::endl;
return false;
}
if (frag_id == -1) {
// ChemDraw simply assigns a new one
BOOST_LOG(rdWarningLog)
<< "Invalid or missing fragment id from CDXML fragment, assigning new one..."
<< std::endl;
frag_id = missingFragId;
missingFragId--;
}
mol.setProp(CDX_FRAG_ID, frag_id);
// for atom in frag
std::map<std::pair<int, StereoGroupType>, StereoGroupInfo> sgroups;
// nodetypes =
// https://www.cambridgesoft.com/services/documentation/sdk/chemdraw/cdx/properties/Node_Type.htm
bool skip_fragment =
false; // is there an irrecoverable error for this fragment
for (auto child : fragment.ContainedObjects()) {
CDXDatumID id = (CDXDatumID)child.second->GetTag();
#ifdef DEBUG
std::cerr << "Data Type: " << id << std::endl;
#endif
switch (id) {
case kCDXObj_Node: {
CDXNode &node = (CDXNode &)(*child.second);
if (!parseNode(mol, frag_id, node, pagedata, sgroups, missingFragId,
externalAttachment)) {
skip_fragment = true;
}
break;
}
case kCDXObj_Bond: {
CDXBond &bond = (CDXBond &)(*child.second);
if (!parseBond(mol, frag_id, bond, pagedata)) {
skip_fragment = true;
break;
}
}
case kCDXProp_EndObject: break;
case kCDXProp_CreationUserName: break;
case kCDXProp_CreationDate: break;
case kCDXProp_CreationProgram: break;
case kCDXProp_ModificationUserName: break;
case kCDXProp_ModificationDate: break;
case kCDXProp_ModificationProgram: break;
case kCDXProp_Unused1: break;
case kCDXProp_Name: break;
case kCDXProp_Comment: break;
case kCDXProp_ZOrder: break;
case kCDXProp_RegistryNumber: break;
case kCDXProp_RegistryAuthority: break;
case kCDXProp_Unused2: break;
case kCDXProp_RepresentsProperty: break;
case kCDXProp_IgnoreWarnings: break;
case kCDXProp_ChemicalWarning: break;
case kCDXProp_Visible: break;
case kCDXProp_Transparent: break;
case kCDXProp_SupersededBy: break;
case kCDXProp_StructurePerspective: break;
case kCDXProp_FontTable: break;
case kCDXProp_2DPosition: break;
case kCDXProp_3DPosition: break;
case kCDXProp_2DExtent: break;
case kCDXProp_3DExtent: break;
case kCDXProp_BoundingBox: break;
case kCDXProp_RotationAngle: break;
case kCDXProp_BoundsInParent: break;
case kCDXProp_3DHead: break;
case kCDXProp_3DTail: break;
case kCDXProp_TopLeft: break;
case kCDXProp_TopRight: break;
case kCDXProp_BottomRight: break;
case kCDXProp_BottomLeft: break;
case kCDXProp_3DCenter: break;
case kCDXProp_3DMajorAxisEnd: break;
case kCDXProp_3DMinorAxisEnd: break;
case kCDXProp_ColorTable: break;
case kCDXProp_ForegroundColor: break;
case kCDXProp_BackgroundColor: break;
case kCDXProp_FadePercent: break;
case kCDXProp_Unused8: break;
case kCDXProp_Unused9: break;
case kCDXProp_ForegroundAlpha: break;
case kCDXProp_BackgroundAlpha: break;
case kCDXProp_HighlightColor: break;
case kCDXProp_Node_Type: break;
case kCDXProp_Node_LabelDisplay: break;
case kCDXProp_Node_Element: break;
case kCDXProp_Atom_ElementList: break;
case kCDXProp_Atom_Formula: break;
case kCDXProp_Atom_Isotope: break;
case kCDXProp_Atom_Charge: break;
case kCDXProp_Atom_Radical: break;
case kCDXProp_Atom_RestrictFreeSites: break;
case kCDXProp_Atom_RestrictImplicitHydrogens: break;
case kCDXProp_Atom_RestrictRingBondCount: break;
case kCDXProp_Atom_RestrictUnsaturatedBonds: break;
case kCDXProp_Atom_RestrictRxnChange: break;
case kCDXProp_Atom_RestrictRxnStereo: break;
case kCDXProp_Atom_AbnormalValence: break;
case kCDXProp_Unused3: break;
case kCDXProp_Atom_NumHydrogens: break;
case kCDXProp_Unused4: break;
case kCDXProp_Unused5: break;
case kCDXProp_Atom_HDot: break;
case kCDXProp_Atom_HDash: break;
case kCDXProp_Atom_Geometry: break;
case kCDXProp_Atom_BondOrdering: break;
case kCDXProp_Node_Attachments: break;
case kCDXProp_Atom_GenericNickname: break;
case kCDXProp_Atom_AltGroupID: break;
case kCDXProp_Atom_RestrictSubstituentsUpTo: break;
case kCDXProp_Atom_RestrictSubstituentsExactly: break;
case kCDXProp_Atom_CIPStereochemistry: break;
case kCDXProp_Atom_Translation: break;
case kCDXProp_Atom_AtomNumber: break;
case kCDXProp_Atom_ShowQuery: break;
case kCDXProp_Atom_ShowStereo: break;
case kCDXProp_Atom_ShowAtomNumber: break;
case kCDXProp_Atom_LinkCountLow: break;
case kCDXProp_Atom_LinkCountHigh: break;
case kCDXProp_Atom_IsotopicAbundance: break;
case kCDXProp_Atom_ExternalConnectionType: break;
case kCDXProp_Atom_GenericList: break;
case kCDXProp_Atom_ShowTerminalCarbonLabels: break;
case kCDXProp_Atom_ShowNonTerminalCarbonLabels: break;
case kCDXProp_Atom_HideImplicitHydrogens: break;
case kCDXProp_Atom_ShowEnhancedStereo: break;
case kCDXProp_Atom_EnhancedStereoType: break;
case kCDXProp_Atom_EnhancedStereoGroupNum: break;
case kCDXProp_Node_NeedsClean: break;
case kCDXProp_Atom_ResidueID: break;
case kCDXProp_Atom_ShowResidueID: break;
case kCDXProp_Atom_ExternalConnectionNum: break;
case kCDXProp_Atom_ShowAtomID: break;
case kCDXProp_Atom_AtomID: break;
case kCDXProp_Node_HydrogenBondAttachmentAtoms: break;
case kCDXProp_Node_HydrogenBonds: break;
case kCDXProp_Mole_Racemic: break;
case kCDXProp_Mole_Absolute: break;
case kCDXProp_Mole_Relative: break;
case kCDXProp_Mole_Formula: break;
case kCDXProp_Mole_Weight: break;
case kCDXProp_Frag_ConnectionOrder: break;
case kCDXProp_Frag_SequenceType: break;
case kCDXProp_Frag_IsFromGuidedStereo: break;
case kCDXProp_Frag_IsComplement: break;
case kCDXProp_Bond_Order: break;
case kCDXProp_Bond_Display: break;
case kCDXProp_Bond_Display2: break;
case kCDXProp_Bond_DoublePosition: break;
case kCDXProp_Bond_Begin: break;
case kCDXProp_Bond_End: break;
case kCDXProp_Bond_RestrictTopology: break;
case kCDXProp_Bond_RestrictRxnParticipation: break;
case kCDXProp_Bond_BeginAttach: break;
case kCDXProp_Bond_EndAttach: break;
case kCDXProp_Bond_CIPStereochemistry: break;
case kCDXProp_Bond_BondOrdering: break;
case kCDXProp_Bond_ShowQuery: break;
case kCDXProp_Bond_ShowStereo: break;
case kCDXProp_Bond_CrossingBonds: break;
case kCDXProp_Bond_ShowRxn: break;
case kCDXProp_Bond_Connectivity: break;
case kCDXProp_Bond_BeginExternalNum: break;
case kCDXProp_Bond_EndExternalNum: break;
case kCDXProp_Bond_Connectivity_Routed: break;
case kCDXProp_Text: break;
case kCDXProp_Justification: break;
case kCDXProp_LineHeight: break;
case kCDXProp_WordWrapWidth: break;
case kCDXProp_LineStarts: break;
case kCDXProp_LabelAlignment: break;
case kCDXProp_LabelLineHeight: break;
case kCDXProp_CaptionLineHeight: break;
case kCDXProp_InterpretChemically: break;
case kCDXProp_UTF8Text: break;
case kCDXProp_MacPrintInfo: break;
case kCDXProp_WinPrintInfo: break;
case kCDXProp_PrintMargins: break;
case kCDXProp_ChainAngle: break;
case kCDXProp_BondSpacing: break;
case kCDXProp_BondLength: break;
case kCDXProp_BoldWidth: break;
case kCDXProp_LineWidth: break;
case kCDXProp_MarginWidth: break;
case kCDXProp_HashSpacing: break;
case kCDXProp_LabelStyle: break;
case kCDXProp_CaptionStyle: break;
case kCDXProp_CaptionJustification: break;
case kCDXProp_FractionalWidths: break;
case kCDXProp_Magnification: break;
case kCDXProp_WidthPages: break;
case kCDXProp_HeightPages: break;
case kCDXProp_DrawingSpaceType: break;
case kCDXProp_Width: break;
case kCDXProp_Height: break;
case kCDXProp_PageOverlap: break;
case kCDXProp_Header: break;
case kCDXProp_HeaderPosition: break;
case kCDXProp_Footer: break;
case kCDXProp_FooterPosition: break;
case kCDXProp_PrintTrimMarks: break;
case kCDXProp_LabelStyleFont: break;
case kCDXProp_CaptionStyleFont: break;
case kCDXProp_LabelStyleSize: break;
case kCDXProp_CaptionStyleSize: break;
case kCDXProp_LabelStyleFace: break;
case kCDXProp_CaptionStyleFace: break;
case kCDXProp_LabelStyleColor: break;
case kCDXProp_CaptionStyleColor: break;
case kCDXProp_BondSpacingAbs: break;
case kCDXProp_LabelJustification: break;
case kCDXProp_FixInplaceExtent: break;
case kCDXProp_Side: break;
case kCDXProp_FixInplaceGap: break;
case kCDXProp_CartridgeData: break;
case kCDXProp_AminoAcidTermini: break;
case kCDXProp_ShowSequenceTermini: break;
case kCDXProp_ShowSequenceBonds: break;
case kCDXProp_ResidueWrapCount: break;
case kCDXProp_ResidueBlockCount: break;
case kCDXProp_Unused10: break;
case kCDXProp_Unused11: break;
case kCDXProp_BondSpacingType: break;
case kCDXProp_LabelStyleFontName: break;
case kCDXProp_CaptionStyleFontName: break;
case kCDXProp_ShowSequenceUnlinkedBranches: break;
case kCDXProp_MonomerRenderingStyle: break;
case kCDXProp_Window_IsZoomed: break;
case kCDXProp_Window_Position: break;
case kCDXProp_Window_Size: break;
case kCDXProp_Graphic_Type: break;
case kCDXProp_Line_Type: break;
case kCDXProp_Arrow_Type: break;
case kCDXProp_Rectangle_Type: break;
case kCDXProp_Oval_Type: break;
case kCDXProp_Orbital_Type: break;
case kCDXProp_Bracket_Type: break;
case kCDXProp_Symbol_Type: break;
case kCDXProp_Curve_Type: break;
case kCDXProp_Arrowhead_Size: break;
case kCDXProp_Arc_AngularSize: break;
case kCDXProp_Bracket_LipSize: break;
case kCDXProp_Curve_Points: break;
case kCDXProp_Bracket_Usage: break;
case kCDXProp_Polymer_RepeatPattern: break;
case kCDXProp_Polymer_FlipType: break;
case kCDXProp_BracketedObjects: break;
case kCDXProp_Bracket_RepeatCount: break;
case kCDXProp_Bracket_ComponentOrder: break;
case kCDXProp_Bracket_SRULabel: break;
case kCDXProp_Bracket_GraphicID: break;
case kCDXProp_Bracket_BondID: break;
case kCDXProp_Bracket_InnerAtomID: break;
case kCDXProp_Curve_Points3D: break;
case kCDXProp_Arrowhead_Type: break;
case kCDXProp_Arrowhead_CenterSize: break;
case kCDXProp_Arrowhead_Width: break;
case kCDXProp_ShadowSize: break;
case kCDXProp_Arrow_ShaftSpacing: break;
case kCDXProp_Arrow_EquilibriumRatio: break;
case kCDXProp_Arrowhead_Head: break;
case kCDXProp_Arrowhead_Tail: break;
case kCDXProp_Fill_Type: break;
case kCDXProp_Curve_Spacing: break;
case kCDXProp_Closed: break;
case kCDXProp_Arrow_Dipole: break;
case kCDXProp_Arrow_NoGo: break;
case kCDXProp_CornerRadius: break;
case kCDXProp_Frame_Type: break;
case kCDXProp_Arrow_SourceID: break;
case kCDXProp_Arrow_TargetID: break;
case kCDXProp_Arrow_IsSmart_Deleted: break;
case kCDXProp_Picture_Edition: break;
case kCDXProp_Picture_EditionAlias: break;
case kCDXProp_MacPICT: break;
case kCDXProp_WindowsMetafile: break;
case kCDXProp_OLEObject: break;
case kCDXProp_EnhancedMetafile: break;
case kCDXProp_Compressed_MacPICT: break;
case kCDXProp_Compressed_WindowsMetafile: break;
case kCDXProp_Compressed_OLEObject: break;
case kCDXProp_Compressed_EnhancedMetafile: break;
case kCDXProp_Uncompressed_MacPICT_Size: break;
case kCDXProp_Uncompressed_WindowsMetafile_Size: break;
case kCDXProp_Uncompressed_OLEObject_Size: break;
case kCDXProp_Uncompressed_EnhancedMetafile_Size: break;
case kCDXProp_GIF: break;
case kCDXProp_TIFF: break;
case kCDXProp_PNG: break;
case kCDXProp_JPEG: break;
case kCDXProp_BMP: break;
case kCDXProp_PDF: break;
case kCDXProp_Spectrum_XSpacing: break;
case kCDXProp_Spectrum_XLow: break;
case kCDXProp_Spectrum_XType: break;
case kCDXProp_Spectrum_YType: break;
case kCDXProp_Spectrum_XAxisLabel: break;
case kCDXProp_Spectrum_YAxisLabel: break;
case kCDXProp_Spectrum_DataPoint: break;
case kCDXProp_Spectrum_Class: break;
case kCDXProp_Spectrum_YLow: break;
case kCDXProp_Spectrum_YScale: break;
case kCDXProp_TLC_OriginFraction: break;
case kCDXProp_TLC_SolventFrontFraction: break;
case kCDXProp_TLC_ShowOrigin: break;
case kCDXProp_TLC_ShowSolventFront: break;
case kCDXProp_ShowBorders: break;
case kCDXProp_TLC_ShowSideTicks: break;
case kCDXProp_TLC_Rf: break;
case kCDXProp_TLC_Tail: break;
case kCDXProp_TLC_ShowRf: break;
case kCDXProp_GEP_ShowScale: break;
case kCDXProp_GEP_ScaleUnit: break;
case kCDXProp_GEP_StartRange: break;
case kCDXProp_GEP_EndRange: break;
case kCDXProp_GEP_ShowValue: break;
case kCDXProp_GEP_Value: break;
case kCDXProp_GEP_LaneLabelsAngle: break;
case kCDXProp_GEP_AxisWidth: break;
case kCDXProp_BioShape_Type: break;
case kCDXProp_1SubstrateEnzyme_ReceptorSize: break;
case kCDXProp_Receptor_NeckWidth: break;
case kCDXProp_HelixProtein_CylinderWidth: break;
case kCDXProp_HelixProtein_CylinderHeight: break;
case kCDXProp_HelixProtein_CylinderDistance: break;
case kCDXProp_HelixProtein_PipeWidth: break;
case kCDXProp_HelixProtein_Extra: break;
case kCDXProp_Membrane_ElementSize: break;
case kCDXProp_Membrane_StartAngle: break;
case kCDXProp_Membrane_EndAngle: break;
case kCDXProp_DNA_WaveLength: break;
case kCDXProp_DNA_WaveWidth: break;
case kCDXProp_DNA_Offset: break;
case kCDXProp_DNA_WaveHeight: break;
case kCDXProp_Gprotein_UpperHeight: break;
case kCDXProp_NamedAlternativeGroup_TextFrame: break;
case kCDXProp_NamedAlternativeGroup_GroupFrame: break;
case kCDXProp_NamedAlternativeGroup_Valence: break;
case kCDXProp_GeometricFeature: break;
case kCDXProp_RelationValue: break;
case kCDXProp_BasisObjects: break;
case kCDXProp_ConstraintType: break;
case kCDXProp_ConstraintMin: break;
case kCDXProp_ConstraintMax: break;
case kCDXProp_IgnoreUnconnectedAtoms: break;
case kCDXProp_DihedralIsChiral: break;
case kCDXProp_PointIsDirected: break;
case kCDXProp_ChemicalPropertyType: break;
case kCDXProp_ChemicalPropertyDisplayID: break;
case kCDXProp_ChemicalPropertyIsActive: break;
case kCDXProp_ChemicalPropertyUnknown: break;
case kCDXProp_ChemicalPropertyName: break;
case kCDXProp_ChemicalPropertyFormula: break;
case kCDXProp_ChemicalPropertyExactMass: break;
case kCDXProp_ChemicalPropertyMolWeight: break;
case kCDXProp_ChemicalPropertyMOverZ: break;
case kCDXProp_ChemicalPropertyAnalysis: break;
case kCDXProp_ChemicalPropertyBoilingPoint: break;
case kCDXProp_ChemicalPropertyMeltingPoint: break;
case kCDXProp_ChemicalPropertyCriticalTemp: break;
case kCDXProp_ChemicalPropertyCriticalPressure: break;
case kCDXProp_ChemicalPropertyCriticalVolume: break;
case kCDXProp_ChemicalPropertyGibbsEnergy: break;
case kCDXProp_ChemicalPropertyLogP: break;
case kCDXProp_ChemicalPropertyMR: break;
case kCDXProp_ChemicalPropertyHenrysLaw: break;
case kCDXProp_ChemicalPropertyHeatOfForm: break;
case kCDXProp_ChemicalPropertytPSA: break;
case kCDXProp_ChemicalPropertyCLogP: break;
case kCDXProp_ChemicalPropertyCMR: break;
case kCDXProp_ChemicalPropertyLogS: break;
case kCDXProp_ChemicalPropertyPKa: break;
case kCDXProp_ChemicalPropertyID: break;
case kCDXProp_ChemicalPropertyFragmentLabel: break;
case kCDXProp_ChemicalPropertyTypeIUPACAtomNumber: break;
case kCDXProp_ChemicalPropertyIsChemicallySignificant: break;
case kCDXProp_ChemicalPropertyExternalBonds: break;
case kCDXProp_ReactionStep_Atom_Map: break;
case kCDXProp_ReactionStep_Reactants: break;
case kCDXProp_ReactionStep_Products: break;
case kCDXProp_ReactionStep_Plusses: break;
case kCDXProp_ReactionStep_Arrows: break;
case kCDXProp_ReactionStep_ObjectsAboveArrow: break;
case kCDXProp_ReactionStep_ObjectsBelowArrow: break;
case kCDXProp_ReactionStep_Atom_Map_Manual: break;
case kCDXProp_ReactionStep_Atom_Map_Auto: break;
case kCDXProp_RxnAutonumber_Style: break;
case kCDXProp_RxnAutonumber_Conditions: break;
case kCDXProp_RxnAutonumber_Start: break;
case kCDXProp_RxnAutonumber_Format: break;
case kCDXProp_ObjectTag_Type: break;
case kCDXProp_Unused6: break;
case kCDXProp_Unused7: break;
case kCDXProp_ObjectTag_Tracking: break;
case kCDXProp_ObjectTag_Persistent: break;
case kCDXProp_ObjectTag_Value: break;
case kCDXProp_Positioning: break;
case kCDXProp_PositioningAngle: break;
case kCDXProp_PositioningOffset: break;
case kCDXProp_Sequence_Identifier: break;
case kCDXProp_CrossReference_Container: break;
case kCDXProp_CrossReference_Document: break;
case kCDXProp_CrossReference_Identifier: break;
case kCDXProp_CrossReference_Sequence: break;
case kCDXProp_Template_PaneHeight: break;
case kCDXProp_Template_NumRows: break;
case kCDXProp_Template_NumColumns: break;
case kCDXProp_Group_Integral: break;
case kCDXProp_SG_DataType: break;
case kCDXProp_SG_PropertyType: break;
case kCDXProp_SG_DataValue: break;
case kCDXProp_SG_ComponentIsReactant: break;
case kCDXProp_SG_ComponentIsHeader: break;
case kCDXProp_IsHidden: break;
case kCDXProp_IsReadOnly: break;
case kCDXProp_IsEdited: break;
case kCDXProp_SG_ComponentReferenceID: break;
case kCDXProp_PlasmidMap_NumberBasePairs: break;
case kCDXProp_PlasmidMap_MarkerStart: break;
case kCDXProp_PlasmidMap_MarkerOffset: break;
case kCDXProp_PlasmidMap_MarkerAngle: break;
case kCDXProp_PlasmidMap_RegionStart: break;
case kCDXProp_PlasmidMap_RegionEnd: break;
case kCDXProp_PlasmidMap_RegionOffset: break;
case kCDXProp_PlasmidMap_RingRadius: break;
case kCDXProp_RLogic_Group: break;
case kCDXProp_RLogic_Occurrence: break;
case kCDXProp_RLogic_RestH: break;
case kCDXProp_RLogic_IfThenGroup: break;
case kCDXProp_Annotation_Keyword: break;
case kCDXProp_Annotation_Content: break;
case kCDXProp_SplitterPositions: break;
case kCDXProp_PageDefinition: break;
case kCDXProp_Property_Rule: break;
case kCDXProp_Property_DataType: break;
case kCDXProp_Property_Value: break;
case kCDXUser_TemporaryBegin: break;
case kCDXUser_TemporaryEnd: break;
case kCDXObj_Document: break;
case kCDXObj_Page: break;
case kCDXObj_Group: break;
case kCDXObj_Fragment: break;
case kCDXObj_Text: break;
case kCDXObj_Graphic: break;
case kCDXObj_Curve: break;
case kCDXObj_EmbeddedObject: break;
case kCDXObj_NamedAlternativeGroup: break;
case kCDXObj_TemplateGrid: break;
case kCDXObj_RegistryNumber: break;
case kCDXObj_ReactionScheme: break;
case kCDXObj_ReactionStep: break;
case kCDXObj_ObjectDefinition: break;
case kCDXObj_Spectrum: break;
case kCDXObj_ObjectTag: break;
case kCDXObj_OleClientItem: break;
case kCDXObj_Sequence: break;
case kCDXObj_CrossReference: break;
case kCDXObj_Splitter: break;
case kCDXObj_Table: break;
case kCDXObj_BracketedGroup: break;
case kCDXObj_BracketAttachment: break;
case kCDXObj_CrossingBond: break;
case kCDXObj_Border: break;
case kCDXObj_Geometry: break;
case kCDXObj_Constraint: break;
case kCDXObj_TLCPlate: break;
case kCDXObj_TLCLane: break;
case kCDXObj_TLCSpot: break;
case kCDXObj_ChemicalProperty: break;
case kCDXObj_Arrow: break;
case kCDXObj_StoichiometryGrid: break;
case kCDXObj_SGComponent: break;
case kCDXObj_SGDatum: break;
case kCDXObj_BioShape: break;
case kCDXObj_PlasmidMap: break;
case kCDXObj_PlasmidMarker: break;
case kCDXObj_PlasmidRegion: break;
case kCDXObj_RLogic: break;
case kCDXObj_RLogicItem: break;
case kCDXObj_Annotation: break;
case kCDXObj_GEPPlate: break;
case kCDXObj_GEPBand: break;
case kCDXObj_Marker: break;
case kCDXObj_GEPLane: break;
case kCDXObj_DocumentProperties: break;
case kCDXObj_Property: break;
case kCDXObj_ColoredMolecularArea: break;
case kCDXObj_UnknownObject: break;
}
}
// Add the stereo groups
if (!sgroups.empty()) {
std::vector<StereoGroup> stereo_groups;
for (auto &sgroup : sgroups) {
unsigned gId = 0;
if (sgroup.second.grouptype != StereoGroupType::STEREO_ABSOLUTE &&
sgroup.second.sgroup > 0) {
gId = sgroup.second.sgroup;
}
std::vector<Bond *> newBonds;
stereo_groups.emplace_back(sgroup.second.grouptype, sgroup.second.atoms,
newBonds, gId);
}
mol.setStereoGroups(std::move(stereo_groups));
}
return !skip_fragment;
}
}
} // namespace RDKit

View File

@@ -1,96 +0,0 @@
//
// Copyright (c) 2024, Glysade Inc
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Novartis Institutes for BioMedical Research Inc.
// nor the names of its contributors may be used to endorse or promote
// products derived from this software without specific prior written
// permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
#ifndef CHEMDRAW_FRAGMENT_H
#define CHEMDRAW_FRAGMENT_H
#include <GraphMol/RDKitBase.h>
#include <GraphMol/QueryAtom.h>
#include <GraphMol/QueryOps.h>
#include "ChemDrawStartInclude.h"
#include "chemdraw/CDXStdObjects.h"
#include "ChemDrawEndInclude.h"
#include "reaction.h"
#include "utils.h"
namespace RDKit {
namespace ChemDraw {
struct PageData {
PageData()
: atomIds(),
bondIds(),
mols(),
fragmentLookup(),
groupedFragments(),
schemes() {}
PageData(const PageData &) = delete;
std::map<unsigned int, Atom *> atomIds;
std::map<unsigned int, Bond *> bondIds;
std::vector<std::unique_ptr<RWMol>> mols; // All molecules found in the doc
std::map<unsigned int, size_t>
fragmentLookup; // fragment.id->molecule index
std::map<unsigned int, std::vector<int>>
groupedFragments; // grouped.id -> [fragment.id]
std::vector<ReactionInfo> schemes; // reaction schemes found
void clearCDXProps() {
for (auto &mol : mols) {
for (auto atom : mol->atoms()) {
atom->clearProp(CDX_ATOM_ID);
atom->clearProp(CDX_BOND_ORDERING);
atom->clearProp(CDX_CIP);
}
for (auto bond : mol->bonds()) {
bond->clearProp(CDX_BOND_ID);
}
}
}
};
//! Parse a CDX fragment record
//! params
//! RWMol mol : molecule to parse the fragment into
//! CDXFragment fragment : fragment to read
//! std::map<unsigned int, Atom*> ids: atom lookup, used for bonding and fusing
//! fragments int missing_frag_id: if the fragment id is missing, this is what
//! to use. n.b. may be obsolete, everything needs an id to be valid int
//! external_attachment:: if this fragment has a external node, this it it's id,
//! otherwise -1
//! external node's are normally NickNames or new Fragments
bool parseFragment(RWMol &mol, CDXFragment &fragment, PageData &pagedata,
int &missingFragId, int externalAttachment = -1);
}
} // namespace RDKit
#endif

View File

@@ -1,323 +0,0 @@
//
// Copyright (c) 2024, Glysade Inc
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Novartis Institutes for BioMedical Research Inc.
// nor the names of its contributors may be used to endorse or promote
// products derived from this software without specific prior written
// permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//#include "node.h"
#include "fragment.h"
#include "utils.h"
namespace RDKit {
namespace ChemDraw {
bool parseNode(
RWMol &mol, unsigned int fragmentId, CDXNode &node, PageData &pagedata,
std::map<std::pair<int, StereoGroupType>, StereoGroupInfo> &sgroups,
int &missingFragId, int externalAttachment) {
int atom_id = node.GetObjectID();
int elemno = node.m_elementNum; // default to carbon
// UINT16 max is not addigned?
int num_hydrogens =
node.m_numHydrogens == kNumHydrogenUnspecified ? 0 : node.m_numHydrogens;
bool explicitHs = node.m_numHydrogens != kNumHydrogenUnspecified;
int charge = 0;
if ((node.m_charge & 0x00FFFFFF) == 0)
charge = node.m_charge >> 24;
else
charge = node.m_charge;
int atommap = 0;
int rgroup_num = -1;
int isotope = node.m_isotope;
bool checkForRGroup = false;
;
std::string query_label;
std::vector<int16_t> elementlist;
// position node.m_2dPosition;
#ifdef DEBUG
std::cerr << NodeType(node.m_nodeType) << std::endl;
#endif
switch (node.m_nodeType) {
case kCDXNodeType_Element: {
break;
}
case kCDXNodeType_ElementList: {
if (node.m_elementList) {
elementlist = *node.m_elementList;
query_label = "ElementList";
}
break;
}
case kCDXNodeType_Nickname: {
elemno = 0;
atommap = atom_id;
break;
}
case kCDXNodeType_Fragment: {
elemno = 0;
atommap = atom_id;
break;
}
case kCDXNodeType_ExternalConnectionPoint: {
if (externalAttachment <= 0) {
// sometimes this is a dummy atom, but I don't know when.
if (node.m_externalConnectionType == kCDXExternalConnection_Diamond) {
elemno = 0;
}
atommap = atom_id;
} else {
elemno = 0;
atommap = externalAttachment;
}
break;
}
case kCDXNodeType_GenericNickname: {
if (node.m_genericNickname.size()) {
switch (node.m_genericNickname[0]) {
case 'R': {
checkForRGroup = true;
elemno = 0;
query_label = node.m_genericNickname;
break;
}
case 'A':
case 'Q':
case 'X':
case 'M': {
elemno = 0;
query_label = node.m_genericNickname;
} break;
default:
std::cerr << "Unhandled generic nickname: "
<< node.m_genericNickname << std::endl;
}
}
break;
}
case kCDXNodeType_Unspecified:
break;
case kCDXNodeType_ElementListNickname:
break;
case kCDXNodeType_Formula:
break;
case kCDXNodeType_AnonymousAlternativeGroup:
break;
case kCDXNodeType_NamedAlternativeGroup:
break;
case kCDXNodeType_MultiAttachment:
break;
case kCDXNodeType_VariableAttachment:
break;
case kCDXNodeType_LinkNode:
break;
case kCDXNodeType_Monomer:
break;
}
for (auto &child : node.ContainedObjects()) {
if (child.second->GetTag() == kCDXObj_Text) {
const std::string &text = ((CDXText *)child.second)->GetText().str();
if (text.size() > 0 && text[0] == 'R') {
try {
if (checkForRGroup)
rgroup_num = text.size() > 1 ? stoi(text.substr(1)) : 0;
else
isotope = text.size() > 1 ? stoi(text.substr(1)) : 0;
} catch (const std::invalid_argument &e) {
if (rgroup_num)
BOOST_LOG(rdWarningLog)
<< "RGroupError: Invalid argument - Cannot convert '" << text
<< "' to an integer." << std::endl;
} catch (const std::out_of_range &e) {
if (rgroup_num)
BOOST_LOG(rdWarningLog)
<< "RGroupError: Out of range - The number '" << text
<< "' is too large or too small." << std::endl;
}
}
}
}
StereoGroupType grouptype = StereoGroupType::STEREO_ABSOLUTE;
switch (node.m_enhancedStereoType) {
case kCDXEnhancedStereo_Absolute:
grouptype = StereoGroupType::STEREO_ABSOLUTE;
break;
case kCDXEnhancedStereo_And:
grouptype = StereoGroupType::STEREO_AND;
break;
case kCDXEnhancedStereo_Or:
grouptype = StereoGroupType::STEREO_OR;
break;
default:
break;
}
CHECK_INVARIANT(atom_id != -1, "Uninitialized atom id in cdxml.");
Atom *rd_atom = new Atom(elemno);
rd_atom->setFormalCharge(charge);
rd_atom->setNumExplicitHs(num_hydrogens);
rd_atom->setNoImplicit(explicitHs);
rd_atom->setIsotope(isotope);
if (rgroup_num >= 0) {
rd_atom->setAtomMapNum(rgroup_num);
}
set_fuse_label(rd_atom, atommap);
switch (node.m_hStereo) {
case kCDXProp_Atom_HDot: // this atom has an implicit hydrogen with a
// wedged bond
rd_atom->setProp<char>(CDX_IMPLICIT_HYDROGEN_STEREO, 'w');
break;
case kCDXProp_Atom_HDash: // this atom has an implicit hydrogen with a
// hashed bond
rd_atom->setProp<char>(CDX_IMPLICIT_HYDROGEN_STEREO, 'h');
break;
}
if (node.m_bondOrdering) {
// This node may be completely replaced by the fragment
// i.e. [*:1]C[*:1].C[*:1]C => CCC
rd_atom->setProp<std::vector<int>>(CDX_BOND_ORDERING, *node.m_bondOrdering);
}
if (node.m_geometry == kCDXAtomGeometry_Tetrahedral) {
// std::cerr << "tetrahedral" << std::endl;
// if we have a cip type we can interpret, set it, otherwise don't
switch (node.m_CIP) {
case kCDXCIPAtom_R:
case kCDXCIPAtom_r:
case kCDXCIPAtom_S:
case kCDXCIPAtom_s:
rd_atom->setProp<CDXAtomCIPType>(CDX_CIP, node.m_CIP);
break;
default:
rd_atom->setProp<CDXAtomCIPType>(CDX_CIP, kCDXCIPAtom_Undetermined);
break;
}
}
std::vector<double> atom_coords;
if (node.KnownPosition3D()) {
atom_coords.reserve(3);
atom_coords.push_back(node.m_3dPosition.x);
atom_coords.push_back(node.m_3dPosition.y);
atom_coords.push_back(node.m_3dPosition.z);
} else {
atom_coords.reserve(2);
atom_coords.push_back(node.m_2dPosition.x);
atom_coords.push_back(node.m_2dPosition.y);
}
rd_atom->setProp<std::vector<double>>(CDX_ATOM_POS, atom_coords);
rd_atom->setProp<unsigned int>(CDX_ATOM_ID, atom_id);
const bool updateLabels = true;
const bool takeOwnership = true;
auto idx = mol.addAtom(rd_atom, updateLabels, takeOwnership);
if (query_label.size()) {
if (query_label[0] == 'R') {
rd_atom = addquery(makeAtomNullQuery(), query_label, mol, idx);
} else if (query_label == "A") {
rd_atom = addquery(makeAAtomQuery(), query_label, mol, idx);
} else if (query_label == "Q") {
rd_atom = addquery(makeQAtomQuery(), query_label, mol, idx);
} else if (query_label == "M") {
rd_atom = addquery(makeMAtomQuery(), query_label, mol, idx);
} else if (query_label == "MH") {
rd_atom = addquery(makeMHAtomQuery(), query_label, mol, idx);
} else if (query_label == "X") {
rd_atom = addquery(makeXAtomQuery(), query_label, mol, idx);
} else if (query_label == "ElementList") {
if (!elementlist.size()) {
BOOST_LOG(rdWarningLog)
<< "ElementList is empty, ignoring..." << std::endl;
} else {
auto *q = new ATOM_OR_QUERY;
q->setDescription("AtomOr");
for (auto atNum : elementlist) {
q->addChild(
QueryAtom::QUERYATOM_QUERY::CHILD_TYPE(makeAtomNumQuery(atNum)));
}
rd_atom = addquery(q, query_label, mol, idx);
rd_atom->setAtomicNum(elementlist.front());
}
} else if (query_label.size()) {
std::cerr << "Unhandled generic nickname: " << query_label << std::endl;
} else {
rd_atom->setProp(common_properties::atomLabel, query_label);
}
}
switch (node.m_radical) {
case kCDXRadical_None:
break;
case kCDXRadical_Singlet:
rd_atom->setNumRadicalElectrons(2);
break;
case kCDXRadical_Doublet: {
rd_atom->setNumRadicalElectrons(1);
break;
}
case kCDXRadical_Triplet: {
rd_atom->setNumRadicalElectrons(2);
break;
}
}
if (node.m_enhancedStereoGroupNum > 0) {
auto key = std::make_pair(node.m_enhancedStereoGroupNum, grouptype);
auto &stereo = sgroups[key];
stereo.sgroup = node.m_enhancedStereoGroupNum;
stereo.grouptype = grouptype;
stereo.atoms.push_back(rd_atom);
}
pagedata.atomIds[atom_id] =
rd_atom; // The mol has ownership so this can't leak
if (node.m_nodeType == kCDXNodeType_Nickname ||
node.m_nodeType == kCDXNodeType_Fragment) {
// This fragment needs to be expanded and joined to the current one
// the external_id is the node's atom_id
for (auto fragment : node.ContainedObjects()) {
if (fragment.second->GetTag() == kCDXObj_Fragment) {
if (!parseFragment(mol, (CDXFragment &)(*fragment.second), pagedata,
missingFragId, atom_id)) {
return false;
}
mol.setProp<bool>(NEEDS_FUSE, true);
// might need to reset to OUR frag_id since parse_fragment will
// set
// it to the fragments
mol.setProp(CDX_FRAG_ID, fragmentId);
}
}
}
return true;
}
}
} // namespace RDKit

View File

@@ -1,54 +0,0 @@
//
// Copyright (c) 2024, Glysade Inc
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Novartis Institutes for BioMedical Research Inc.
// nor the names of its contributors may be used to endorse or promote
// products derived from this software without specific prior written
// permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
#ifndef CHEMDRAW_NODE_H
#define CHEMDRAW_NODE_H
#include <GraphMol/RDKitBase.h>
#include <GraphMol/QueryAtom.h>
#include <GraphMol/QueryOps.h>
#include "ChemDrawStartInclude.h"
#include "chemdraw/CDXStdObjects.h"
#include "ChemDrawEndInclude.h"
#include "utils.h"
#include "fragment.h"
namespace RDKit {
namespace ChemDraw {
bool parseNode(
RWMol &mol, unsigned int fragmentId, CDXNode &node, PageData &pagedata,
std::map<std::pair<int, StereoGroupType>, StereoGroupInfo> &sgroups,
int &missingFragId, int externalAttachment);
}
}
#endif

View File

@@ -1,165 +0,0 @@
//
// Copyright (c) 2024, Glysade Inc
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Novartis Institutes for BioMedical Research Inc.
// nor the names of its contributors may be used to endorse or promote
// products derived from this software without specific prior written
// permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
#include "chemdraw.h"
#include "chemdrawreaction.h"
#include "reaction.h"
#include "utils.h"
#include <GraphMol/QueryOps.h>
#include <GraphMol/ChemReactions/SanitizeRxn.h>
#include <GraphMol/ChemReactions/ReactionUtils.h>
#include <GraphMol/ChemTransforms/ChemTransforms.h>
namespace RDKit {
namespace ChemDraw {
void ReactionStepInfo::set_reaction_data(
std::string type, std::string prop, const std::vector<int> &frag_ids,
const std::map<unsigned int, size_t> &fragments,
std::map<unsigned int, std::vector<int>> &grouped_fragments,
const std::vector<std::unique_ptr<RWMol>> &mols) const {
unsigned int reagent_idx = 0;
for (auto idx : frag_ids) {
auto iter = grouped_fragments.find(idx);
if (iter == grouped_fragments.end()) {
BOOST_LOG(rdWarningLog) << "CDXMLParser: Schema " << scheme_id << " step "
<< step_id << " " << type << " reaction fragment "
<< idx << " not found in document." << std::endl;
continue;
}
for (auto reaction_fragment_id : iter->second) {
auto fragment = fragments.find(reaction_fragment_id);
if (fragment == fragments.end()) {
BOOST_LOG(rdWarningLog)
<< "CDXMLParser: Schema " << scheme_id << " step " << step_id << " "
<< type << " fragment " << idx << " not found in document."
<< std::endl;
continue;
}
auto &mol = mols[fragment->second];
mol->setProp(CDX_SCHEME_ID, scheme_id);
mol->setProp(CDX_STEP_ID, step_id);
mol->setProp(prop, reagent_idx);
}
reagent_idx += 1;
}
}
void ReactionStepInfo::set_reaction_step(
size_t scheme_id, std::map<unsigned int, Atom *> &atoms,
const std::map<unsigned int, size_t> &fragments,
std::map<unsigned int, std::vector<int>> &grouped_fragments,
const std::vector<std::unique_ptr<RWMol>> &mols) const {
// Set the molecule properties
set_reaction_data("ReactionStepReactants", CDX_REAGENT_ID,
ReactionStepReactants, fragments, grouped_fragments, mols);
set_reaction_data("ReactionStepProducts", CDX_PRODUCT_ID,
ReactionStepProducts, fragments, grouped_fragments, mols);
auto agents = ReactionStepObjectsAboveArrow;
agents.insert(agents.end(), ReactionStepObjectsBelowArrow.begin(),
ReactionStepObjectsBelowArrow.end());
set_reaction_data("ReactionStepAgents", CDX_AGENT_ID, agents, fragments,
grouped_fragments, mols);
// Set the Atom Maps
int atommap = 0;
for (auto mapping : ReactionStepAtomMap) {
++atommap;
unsigned int idx1 = mapping.first;
unsigned int idx2 = mapping.second;
if (atoms.find(idx1) != atoms.end()) {
atoms[idx1]->setAtomMapNum(atommap);
} else {
BOOST_LOG(rdWarningLog)
<< "CDXMLParser: Schema " << scheme_id << " step " << step_id
<< " ReactionStepAtomMap cannot find atom with node id " << idx1
<< "skipping schema..." << std::endl;
}
if (atoms.find(idx2) != atoms.end()) {
atoms[idx2]->setAtomMapNum(atommap);
} else {
// XXX log error
BOOST_LOG(rdWarningLog)
<< "CDXMLParser: Schema " << scheme_id << " step " << step_id
<< " ReactionStepAtomMap cannot find atom with node id " << idx2
<< " skipping schema..." << std::endl;
}
}
}
ReactionInfo::ReactionInfo(CDXReactionScheme &scheme)
: scheme_id(static_cast<unsigned int>(scheme.GetObjectID())) {
for (auto &rxnNode : scheme.ContainedObjects()) {
CDXDatumID type_id = (CDXDatumID)rxnNode.second->GetTag();
if (type_id == kCDXObj_ReactionStep) {
CDXReactionStep &step = (CDXReactionStep &)(*rxnNode.second);
auto step_id = step.GetObjectID();
steps.emplace_back(ReactionStepInfo());
ReactionStepInfo &scheme = steps.back();
scheme.scheme_id = scheme_id;
scheme.step_id = step_id;
scheme.ReactionStepProducts = step.m_products;
scheme.ReactionStepReactants = step.m_reactants;
scheme.ReactionStepObjectsBelowArrow = step.m_objectsBelowArrow;
scheme.ReactionStepAtomMap = step.m_aamap;
steps.push_back(scheme);
}
}
}
void ReactionInfo::set_reaction_steps(
std::map<unsigned int, std::vector<int>> &grouped_fragments,
const std::vector<std::unique_ptr<RWMol>> &mols) const {
if (steps.size()) {
std::map<unsigned int, size_t> fragments;
std::map<unsigned int, size_t> agents;
std::map<unsigned int, size_t> products;
std::map<unsigned int, Atom *> atoms;
size_t mol_idx = 0;
for (auto &mol : mols) {
auto idx = mol->getProp<unsigned int>(CDX_FRAG_ID);
fragments[idx] = mol_idx++;
for (auto &atom : mol->atoms()) {
unsigned int idx = atom->getProp<unsigned int>(CDX_ATOM_ID);
atoms[idx] = atom;
}
}
for (auto &step : steps) {
step.set_reaction_step(scheme_id, atoms, fragments, grouped_fragments,
mols);
}
}
}
}
} // namespace RDKit

View File

@@ -1,87 +0,0 @@
//
// Copyright (c) 2024, Glysade Inc
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Novartis Institutes for BioMedical Research Inc.
// nor the names of its contributors may be used to endorse or promote
// products derived from this software without specific prior written
// permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
#ifndef CHEMDRAW_REACTION_H
#define CHEMDRAW_REACTION_H
#include <GraphMol/RDKitBase.h>
#include "ChemDrawStartInclude.h"
#include "chemdraw/CDXStdObjects.h"
#include "ChemDrawEndInclude.h"
#include <map>
#include <vector>
namespace RDKit {
namespace ChemDraw {
struct ReactionStepInfo {
// Holds the current reaction step information so that we can convert
// chemdraw molecules into rdkit reactions
unsigned int scheme_id;
unsigned int step_id;
std::vector<int> ReactionStepProducts;
std::vector<int> ReactionStepReactants;
std::vector<int> ReactionStepObjectsAboveArrow;
std::vector<int> ReactionStepObjectsBelowArrow;
std::vector<std::pair<int, int>> ReactionStepAtomMap;
void set_reaction_data(
std::string type, std::string prop, const std::vector<int> &frag_ids,
const std::map<unsigned int, size_t> &fragments,
std::map<unsigned int, std::vector<int>> &grouped_fragments,
const std::vector<std::unique_ptr<RWMol>> &mols) const;
void set_reaction_step(
size_t scheme_id, std::map<unsigned int, Atom *> &atoms,
const std::map<unsigned int, size_t> &fragments,
std::map<unsigned int, std::vector<int>> &grouped_fragments,
const std::vector<std::unique_ptr<RWMol>> &mols) const;
};
class ReactionInfo {
// Holds the information form the CDX data so that we can convert
// the molecules in the file to RDKit Reactions
std::vector<ReactionStepInfo> steps;
unsigned int scheme_id;
public:
ReactionInfo(CDXReactionScheme &scheme);
void set_reaction_steps(
std::map<unsigned int, std::vector<int>> &grouped_fragments,
const std::vector<std::unique_ptr<RWMol>> &mols) const;
};
}
} // namespace RDKit
#endif

View File

@@ -1,96 +0,0 @@
//
// Copyright (c) 2024 Glysade Inc and other RDkit contributors
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Novartis Institutes for BioMedical Research Inc.
// nor the names of its contributors may be used to endorse or promote
// products derived from this software without specific prior written
// permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
#include "chemdraw.h"
#include <catch2/catch_all.hpp>
#include "RDGeneral/test.h"
#include <RDGeneral/Invariant.h>
#include <GraphMol/RDKitBase.h>
#include <GraphMol/FileParsers/FileParsers.h>
#include <GraphMol/SmilesParse/SmilesWrite.h>
#include <GraphMol/SmilesParse/SmilesParse.h>
#include <GraphMol/SmilesParse/SmartsWrite.h>
#include <RDGeneral/FileParseException.h>
#include <boost/algorithm/string.hpp>
#include <RDGeneral/BadFileException.h>
#include <GraphMol/SmilesParse/CanonicalizeStereoGroups.h>
#include <filesystem>
using namespace RDKit;
using namespace RDKit::v2;
TEST_CASE("Geometry") {
std::string path =
std::string(getenv("RDBASE")) + "/External/ChemDraw/test_data/";
SECTION("R/S Tetrahedral") {
//_sleep(10 * 1000);
{
auto fname = path + "geometry-tetrahedral.cdxml";
auto mols = MolsFromChemDrawFile(fname);
REQUIRE(mols.size()); // [C@H]1(C2)[C@@H]2C1
auto mol = "[C@H]1(C2)[C@@H]2C1"_smiles;
auto smi = MolToSmiles(*mol);
REQUIRE(smi == MolToSmiles(*mols[0]));
}
{
auto fname = path + "geometry-tetrahedral-2.cdxml";
auto mols = MolsFromChemDrawFile(fname);
REQUIRE(mols.size());
auto mol = "[C@H]1(C2)[C@@H]2C1"_smiles;
auto smi = MolToSmiles(*mol);
REQUIRE(smi == MolToSmiles(*mols[0]));
}
{
auto fname = path + "geometry-tetrahedral-3.cdxml";
auto mols = MolsFromChemDrawFile(fname);
REQUIRE(mols.size());
auto mol = "C1CC[C@H]2CCCC[C@@H]2C1"_smiles;
auto smi = MolToSmiles(*mol);
REQUIRE(smi == MolToSmiles(*mols[0]));
}
/* this one we still get wrong...
{
auto fname = path + "geometry-tetrahedral-4.cdxml";
auto mols = MolsFromChemDrawFile(fname);
REQUIRE(mols.size());
auto mol = "CC(S[C@@H]1CC2=C([H])C(CC[C@]2(C)[C@@]3([H])CC([H])([H])[C@]4(C)[C@](OC5=O)(CC5([H])[H])CC[C@@]4([H])[C@]13[H])=O)=O"_smiles;
auto smi = MolToSmiles(*mol);
std::cerr << "** " << smi << std::endl;
REQUIRE(smi == MolToSmiles(*mols[0]));
}
*/
}
}

View File

@@ -1,164 +0,0 @@
//
// Copyright (c) 2025 Glysade Inc and other RDkit contributors
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Novartis Institutes for BioMedical Research Inc.
// nor the names of its contributors may be used to endorse or promote
// products derived from this software without specific prior written
// permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
#include "chemdraw.h"
#include "chemdrawreaction.h"
#include <catch2/catch_all.hpp>
#include "RDGeneral/test.h"
#include <RDGeneral/Invariant.h>
#include <GraphMol/RDKitBase.h>
#include <GraphMol/ChemReactions/Reaction.h>
#include <GraphMol/ChemReactions/ReactionParser.h>
#include <GraphMol/ChemReactions/SanitizeRxn.h>
#include <GraphMol/SmilesParse/SmilesWrite.h>
#include <filesystem>
using namespace RDKit;
using namespace RDKit::v2;
TEST_CASE("CDXML Parser") {
std::string cdxmlbase =
std::string(getenv("RDBASE")) + "/Code/GraphMol/test_data/CDXML/";
SECTION("CDXML REACTION") {
auto fname = cdxmlbase + "rxn2.cdxml";
std::vector<std::string> expected = {
"Cl[c:1]1[cH:4][cH:3][cH:2][cH:6][cH:5]1",
"OC(O)B[c:7]1[cH:8][cH:9][cH:10][cH:11][cH:12]1",
"[cH:1]1[cH:4][cH:3][cH:2][c:6](-[c:7]2[cH:8][cH:9][cH:10][cH:11][cH:12]2)[cH:5]1"};
auto rxns = ChemDrawFileToChemicalReactions(fname);
CHECK(rxns.size() == 1);
unsigned int i = 0;
int count = 0;
for (auto &mol : rxns[0]->getReactants()) {
CHECK(mol->getProp<unsigned int>("CDX_SCHEME_ID") == 397);
CHECK(mol->getProp<unsigned int>("CDX_STEP_ID") == 398);
CHECK(mol->getProp<unsigned int>("CDX_REAGENT_ID") == i++);
CHECK(MolToSmiles(*mol) == expected[count++]);
}
i = 0;
for (auto &mol : rxns[0]->getProducts()) {
CHECK(mol->getProp<unsigned int>("CDX_SCHEME_ID") == 397);
CHECK(mol->getProp<unsigned int>("CDX_STEP_ID") == 398);
CHECK(mol->getProp<unsigned int>("CDX_PRODUCT_ID") == i++);
CHECK(MolToSmiles(*mol) == expected[count++]);
}
auto smarts = ChemicalReactionToRxnSmarts(*rxns[0]);
CHECK(
smarts ==
"[#6&D2:2]1:[#6&D2:3]:[#6&D2:4]:[#6&D3:1](:[#6&D2:5]:[#6&D2:6]:1)-[#17&D1].[#6&D3](-[#5&D2]-[#6&D3:7]1:[#6&D2:8]:[#6&D2:9]:[#6&D2:10]:[#6&D2:11]:[#6&D2:12]:1)(-[#8&D1])-[#8&D1]>>[#6&D2:1]1:[#6&D2:5]:[#6&D3:6](:[#6&D2:2]:[#6&D2:3]:[#6&D2:4]:1)-[#6&D3:7]1:[#6&D2:8]:[#6&D2:9]:[#6&D2:10]:[#6&D2:11]:[#6&D2:12]:1");
}
SECTION("Github #7528 CDXML Grouped Agents in Reactions") {
// The failing case had fragments grouped with labels, ensure the grouped
// cersion and the ungrouped versions have the same results
auto fname = cdxmlbase + "github7467-grouped-fragments.cdxml";
auto rxns = ChemDrawFileToChemicalReactions(fname);
CHECK(rxns.size() == 1);
fname = cdxmlbase + "github7467-ungrouped-fragments.cdxml";
auto rxns2 = ChemDrawFileToChemicalReactions(fname);
CHECK(ChemicalReactionToRxnSmarts(*rxns[0]) ==
ChemicalReactionToRxnSmarts(*rxns2[0]));
// Check to see if our understanding of grouped reagents in reactions is
// correct
fname = cdxmlbase + "reaction-with-grouped-templates.cdxml";
auto rxns3 = ChemDrawFileToChemicalReactions(fname);
CHECK(rxns3.size() == 1);
std::string rxnb = R"RXN($RXN
Mrv2004 062120241319
2 0
$MOL
Mrv2004 06212413192D
5 5 0 0 0 0 999 V2000
2.6221 -4.6475 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
2.6221 -5.4725 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
3.4070 -5.7274 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
3.8918 -5.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
3.4070 -4.3926 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
1 2 2 0 0 0 0
2 3 1 0 0 0 0
3 4 2 0 0 0 0
4 5 1 0 0 0 0
5 1 1 0 0 0 0
M END
$MOL
Mrv2004 06212413192D
11 11 0 0 0 0 999 V2000
6.9305 -4.5100 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
6.9305 -5.3350 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
7.6450 -5.7475 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
8.3594 -5.3350 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
8.3594 -4.5100 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
7.6450 -4.0975 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
8.6171 -4.4825 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
8.6171 -5.3075 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
9.4020 -5.5624 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
9.8868 -4.8950 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
9.4020 -4.2276 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
1 2 1 0 0 0 0
6 1 1 0 0 0 0
2 3 1 0 0 0 0
3 4 1 0 0 0 0
4 5 1 0 0 0 0
5 6 1 0 0 0 0
7 8 2 0 0 0 0
11 7 1 0 0 0 0
8 9 1 0 0 0 0
9 10 2 0 0 0 0
10 11 1 0 0 0 0
M END
)RXN";
std::unique_ptr<ChemicalReaction> rxn_mb{RxnBlockToChemicalReaction(rxnb)};
// CDXMLToReaction is sanitized by default, this might be a mistake...
unsigned int failed;
RxnOps::sanitizeRxn(
*rxn_mb, failed,
RxnOps::SANITIZE_ADJUST_REACTANTS | RxnOps::SANITIZE_ADJUST_PRODUCTS,
RxnOps::MatchOnlyAtRgroupsAdjustParams());
CHECK(rxns3[0]->getNumReactantTemplates() ==
rxn_mb->getNumReactantTemplates());
CHECK(ChemicalReactionToRxnSmarts(*rxns3[0]) ==
ChemicalReactionToRxnSmarts(*rxn_mb));
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,98 +0,0 @@
//
// Copyright (c) 2025 Glysade Inc and other RDkit contributors
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Novartis Institutes for BioMedical Research Inc.
// nor the names of its contributors may be used to endorse or promote
// products derived from this software without specific prior written
// permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
#include "chemdraw.h"
#include <catch2/catch_all.hpp>
#include "RDGeneral/test.h"
#include <RDGeneral/Invariant.h>
#include <GraphMol/RDKitBase.h>
#include <GraphMol/FileParsers/FileParsers.h>
#include <GraphMol/SmilesParse/SmilesWrite.h>
#include <GraphMol/SmilesParse/SmilesParse.h>
#include <GraphMol/SmilesParse/SmartsWrite.h>
#include <RDGeneral/FileParseException.h>
#include <boost/algorithm/string.hpp>
#include <RDGeneral/BadFileException.h>
#include <GraphMol/SmilesParse/CanonicalizeStereoGroups.h>
using namespace RDKit;
using namespace RDKit::v2;
TEST_CASE("Round TRIP") {
std::string path =
std::string(getenv("RDBASE")) + "/Code/GraphMol/test_data/";
std::string code_path = std::string(getenv("RDBASE"));
// Eventually this catch test is to see if round tripping mol 3d -> chemdraw returns
// reasonable coords, however chemdraw seems to forget about the original scale
// and converts to pixel drawing coords, so this test is kind of meaningless
SECTION("3D structs") {
auto fname =
code_path + "/Code/GraphMol/FileParsers/test_data/Issue3514824.mol";
auto mol = MolFileToMol(fname);
REQUIRE(mol);
auto &conf = mol->getConformer(0);
for (auto bond : mol->bonds()) {
auto p1 = conf.getAtomPos(bond->getBeginAtomIdx());
auto p2 = conf.getAtomPos(bond->getEndAtomIdx());
auto length = (p1 - p2).length();
std::cerr << bond->getIdx() << " : " << length << std::endl;
;
}
std::cerr << "----------" << std::endl;
{
auto fname2 =
code_path + "/Code/GraphMol/FileParsers/test_data/Issue3514824.cdxml";
auto mols = MolsFromChemDrawFile(fname2);
auto &conf2 = mols[0]->getConformer(0);
for (auto bond : mols[0]->bonds()) {
auto p1 = conf2.getAtomPos(bond->getBeginAtomIdx());
auto p2 = conf2.getAtomPos(bond->getEndAtomIdx());
auto length = (p1 - p2).length();
std::cerr << bond->getIdx() << " : " << length << std::endl;
}
}
std::cerr << "----------" << std::endl;
{
auto cdx = MolToChemDrawBlock(*mol);
auto mols = MolsFromChemDrawBlock(cdx);
auto &conf2 = mols[0]->getConformer(0);
for (auto bond : mols[0]->bonds()) {
auto p1 = conf2.getAtomPos(bond->getBeginAtomIdx());
auto p2 = conf2.getAtomPos(bond->getEndAtomIdx());
auto length = (p1 - p2).length();
std::cerr << bond->getIdx() << " : " << length << std::endl;
}
}
delete mol;
}
}

View File

@@ -1,330 +0,0 @@
//
// Copyright (c) 2025 Glysade Inc and other RDkit contributors
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Novartis Institutes for BioMedical Research Inc.
// nor the names of its contributors may be used to endorse or promote
// products derived from this software without specific prior written
// permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
#include "chemdraw.h"
#include "chemdraw_doc.h"
#include <catch2/catch_all.hpp>
#include "RDGeneral/test.h"
#include <RDGeneral/Invariant.h>
#include <GraphMol/RDKitBase.h>
#include <GraphMol/ChemTransforms/ChemTransforms.h>
#include <GraphMol/FileParsers/FileParsers.h>
#include <GraphMol/SmilesParse/SmilesWrite.h>
#include <GraphMol/SmilesParse/SmilesParse.h>
#include <GraphMol/SmilesParse/SmartsWrite.h>
#include <RDGeneral/FileParseException.h>
#include <boost/algorithm/string.hpp>
#include <RDGeneral/BadFileException.h>
#include <GraphMol/SmilesParse/CanonicalizeStereoGroups.h>
#include "ChemDrawStartInclude.h"
#include "chemdraw/CDXStdObjects.h"
#include "ChemDrawEndInclude.h"
#include <filesystem>
using namespace RDKit;
using namespace RDKit::v2;
namespace {
std::string replace(std::string &istr, const std::string &from,
const std::string &to) {
std::string str(istr);
size_t start_pos = str.find(from);
if (start_pos == std::string::npos) return str;
str.replace(start_pos, from.length(), to);
return str;
}
bool hasNonSupportedFeatures(CDXDocument &document, const std::string &fname) {
// check for monomers
std::ifstream ifs(fname);
std::stringstream xml;
xml << ifs.rdbuf();
// We should be able to figure this out from the node but...
if(xml.str().find("monomerAttachmentStructure_") != std::string::npos ||
xml.str().find("Name=\"monomerAttachments") != std::string::npos) {
return true;
}
for (auto node : document.ContainedObjects()) {
CDXDatumID id = (CDXDatumID)node.second->GetTag();
switch (id) {
case kCDXObj_Page:
for (auto frag : node.second->ContainedObjects()) {
CDXDatumID id = (CDXDatumID)frag.second->GetTag();
if (id == kCDXObj_Fragment) {
CDXFragment &fragment = (CDXFragment &)(*frag.second);
if (fragment.m_sequenceType == kCDXSeqType_Unknown) return true;
} else if (id == kCDXObj_BracketAttachment || id == kCDXObj_BracketedGroup) {
return true;
}
}
break;
case kCDXObj_ObjectTag: {
CDXObject &object = *((CDXObject *)node.second);
id = (CDXDatumID)object.GetTag();
// Check for monomers
break;
}
default:
break;
}
}
return false;
}
bool hasNonSupportedFeatures(const std::string &fname) {
auto doc = ChemDraw::ChemDrawToDocument(fname);
return hasNonSupportedFeatures(*doc, fname);
}
TEST_CASE("Round TRIP") {
std::string path =
std::string(getenv("RDBASE")) + "/External/ChemDraw/test_data/CDXML6K/";
SECTION("round trip") {
// if we can't find the CDXML6K path, then don't run the test
if(!std::filesystem::exists(path)) {
return;
}
int failed = 0;
int saniFailed = 0;
int total = 0;
int parseable = 0;
int nomol = 0;
int badparse = 0;
int success = 0;
int smimatches = 0;
int nonSupported = 0;
int no_mol_in_doc = 0;
int bad_chemdraw_mol = 0;
RDLog::LogStateSetter blocker;
std::string cdxpath = path + "CDXML/";
std::string molpath = path + "mol/";
std::string smipath = path + "smiles/";
std::string failpath = path + "FAILED/";
std::string nomolpath = path + "NOMOL/";
std::string badparsepath = path + "BADPARSE/";
std::string sanitizationpath = path + "SANI/";
std::set<std::string> known_failures{
"INDMUMLL1117_2025-01-24-17-23-14_304.cdxml", // Dative oxygen gets set to a radical
"INDMUMLL1117_2025-01-24-17-26-06_1010.cdxml", // The next batch has a type of stereochem I don't know how to parse yet
"INDMUMLL1117_2025-01-24-17-26-06_1012.cdxml",
"INDMUMLL1117_2025-01-24-17-26-06_1022.cdxml",
"INDMUMLL1117_2025-01-24-17-26-06_1024.cdxml",
"INDMUMLL1117_2025-01-24-17-26-06_1026.cdxml",
"INDMUMLL1117_2025-01-24-17-26-06_1032.cdxml",
"INDMUMLL1117_2025-01-24-17-26-06_1034.cdxml",
"INDMUMLL1117_2025-01-24-17-26-06_1036.cdxml",
"INDMUMLL1117_2025-01-24-17-26-06_1040.cdxml",
"INDMUMLL1117_2025-01-24-17-26-06_1042.cdxml",
"INDMUMLL1117_2025-01-24-17-26-06_1048.cdxml", // Stereo chem batch ends here
"INDMUMLL1117_2025-01-24-17-26-13_1690.cdxml", // RDKit shows a radical for the dative ->[O]
"INDMUMLL1117_2025-01-24-17-27-11_6877.cdxml", // The next batch has a type of stereochem I don't know how to parse yet (same as before)
"INDMUMLL1117_2025-01-24-17-27-11_6878.cdxml",
"INDMUMLL1117_2025-01-24-17-27-11_6883.cdxml",
"INDMUMLL1117_2025-01-24-17-27-11_6884.cdxml",
"INDMUMLL1117_2025-01-24-17-27-11_6889.cdxml",
"INDMUMLL1117_2025-01-24-17-27-11_6896.cdxml",
"INDMUMLL1117_2025-01-24-17-27-30_8574.cdxml", // Stereo chem batch ends here
"INDMUMLL1117_2025-01-24-17-27-31_8633.cdxml", // RDkit is missing a dummy atom molecule
"INDMUMLL1117_2025-01-24-17-27-31_8651.cdxml", // RDkit is missing a dummy atom molecule
"INDMUMLL1117_2025-01-24-17-27-53_10330.cdxml",// 2D projection of 3D stereo, we fail this one
"INDMUMLL1117_2025-01-24-17-27-53_10332.cdxml",// 2D projection of 3D stereo, we fail this one
"INDMUMLL1117_2025-01-24-17-27-54_10336.cdxml",// RDKit Smiles keeps any bonds ~, ChemDraw doesn't
"INDMUMLL1117_2025-01-24-17-28-02_10942.cdxml",// Chemdraw smiles doesn't support quadruple bond $
"INDMUMLL1117_2025-01-24-17-28-15_11666.cdxml",// RDKit Smiles keeps any bonds ~, ChemDraw doesn't
"INDMUMLL1117_2025-01-24-17-28-20_12011.cdxml",// RDKit gets stereo from the 3D data and the wedging
"INDMUMLL1117_2025-01-24-17-28-20_12012.cdxml",// RDKit gets stereo from the 3D data and the wedging
"INDMUMLL1117_2025-01-24-17-28-21_12031.cdxml",// 2D projection of 3D stereo, we fail this one
"INDMUMLL1117_2025-01-24-17-28-30_12568.cdxml",// 2D projection of 3D stereo, we fail this one
"INDMUMLL1117_2025-01-24-17-29-06_14654.cdxml",// Dative oxygen gets set to a radical
"INDMUMLL1117_2025-01-24-17-29-08_14775.cdxml",// RDKit Smiles keeps any bonds ~, ChemDraw doesn't
"INDMUMLL1117_2025-01-24-17-29-09_14896.cdxml",// We apparently do a bit of a better job than chemdraw here in parsing R/S
"INDMUMLL1117_2025-01-24-17-29-09_14897.cdxml" // RDKit just gets very different stereo chem, no idea why
};
for (auto p : {failpath, nomolpath, badparsepath, sanitizationpath}) {
if (std::filesystem::exists(p)) {
std::filesystem::remove_all(p);
}
std::filesystem::create_directory(p);
}
for (const auto &entry :
std::filesystem::recursive_directory_iterator(cdxpath)) {
if (entry.is_regular_file()) {
std::string fname = entry.path().filename().string();
// issue here - graphite nanotube
if (fname == "INDMUMLL1117_2025-01-24-17-28-02_10946.cdxml")
continue; // nanotube takes forever
auto molfname = molpath + replace(fname, ".cdxml", ".mol");
auto smifname = smipath + replace(fname, ".cdxml", ".smi");
// if chemscript couldn't make an output, ignore it
total++;
if (!std::filesystem::exists(molfname) ||
!std::filesystem::exists(smifname)) {
no_mol_in_doc++;
continue;
}
// Get the ChemScript mol and smiles
std::unique_ptr<RWMol> mol;
//= nullptr;
try {
mol.reset(MolFileToMol(molfname));
} catch (...) {
bad_chemdraw_mol++;
continue;
}
// REQUIRE(mols.size());
std::ifstream ifs(smifname);
std::string smiles_in;
ifs >> smiles_in;
std::string smiles;
{
try {
auto smimol = SmilesToMol(smiles_in);
if (!smimol)
smiles = smiles_in;
else {
smiles = MolToSmiles(*smimol);
delete smimol;
}
} catch (...) {
smiles = smiles_in;
}
}
parseable++;
// Read the cdxml
std::vector<std::unique_ptr<RWMol>> mols;
bool santizationFailure = false;
try {
mols = MolsFromChemDrawFile(entry.path().string());
if (mols.size() == 0) {
ChemDrawParserParams params;
params.sanitize = false;
mols = MolsFromChemDrawFile(entry.path().string(), params);
santizationFailure = true;
}
if (!mols.size()) {
if (smiles.size() == 0) {
// At least we match the chemscript non-mol
success++;
}
else if (hasNonSupportedFeatures(entry.path().string())) {
//std::cerr << "[NOMOL (Unsupported)]: " << entry.path().string()
// << std::endl;
nonSupported++;
} else {
std::cerr << "[NOMOL]: " << entry.path().string()
<< std::endl;
std::filesystem::copy(
entry.path().string(),
nomolpath + entry.path().filename().string());
nomol++;
}
continue;
}
} catch (...) {
std::cerr << "[BADPARSE]: " << entry.path().string() << std::endl;
std::filesystem::copy(
entry.path(), badparsepath + entry.path().filename().string());
badparse++;
continue;
}
std::unique_ptr<ROMol> m = std::make_unique<ROMol>(*mols[0]);
for (size_t i = 1; i < mols.size(); i++) {
m.reset(combineMols(*m, *mols[i]));
}
auto rdkit_smi = MolToSmiles(*m);
auto mol_smi = mol.get() ? MolToSmiles(*mol) : "";
if (mol_smi != rdkit_smi) {
// Do we match chemscripts smiles output at least?
if (rdkit_smi == smiles) {
smimatches++;
continue;
}
if (hasNonSupportedFeatures(entry.path().string())) {
nonSupported++;
continue; // has unsupported features
}
if (santizationFailure) {
std::cerr << "[SANI]: " << entry.path() << std::endl;
std::filesystem::copy(
entry.path(),
sanitizationpath + entry.path().filename().string());
saniFailed++;
} else {
if(known_failures.find(entry.path().filename().string()) != known_failures.end())
continue; // we know this failure and it's ok for now
std::cerr << "[FAIL]: " << entry.path() << std::endl;
std::filesystem::copy(entry.path(),
failpath + entry.path().filename().string());
failed++;
}
std::cerr << "rdkit: " << rdkit_smi << std::endl;
std::cerr << "chemscript (mol): " << mol_smi << std::endl;
std::cerr << "chemscript (smiles): " << smiles << std::endl;
std::cerr << molfname << std::endl;
std::cerr << smifname << std::endl;
} else {
success++;
}
}
}
std::cerr << "Total:" << total << std::endl;
std::cerr << "Parseable (has chemscript output):" << total << std::endl;
std::cerr << "Success:" << success + smimatches << std::endl;
std::cerr << "skipped (non supported features):" << nonSupported
<< std::endl;
std::cerr << "skipped (no mol in doc):" << no_mol_in_doc
<< std::endl;
std::cerr << "Chemscript smiles matches not chemscript mol: " << smimatches
<< std::endl;
std::cerr << "Failed:" << failed << std::endl;
std::cerr << "Sanitization:" << saniFailed << std::endl;
std::cerr << "Nomol:" << nomol << std::endl;
std::cerr << "Badparse:" << badparse << std::endl;
std::cerr << "Bad ChemDraw Mol:" << bad_chemdraw_mol << std::endl;
REQUIRE(failed == 0);
}
}
} // namespace

View File

@@ -1,219 +0,0 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE CDXML SYSTEM "https://static.chemistry.revvitycloud.com/cdxml/CDXML.dtd" >
<CDXML
CreationProgram="ChemDraw 23.1.2.7"
Name="missing.cdxml"
BoundingBox="244.09 262.11 295.91 291.09"
WindowPosition="0 0"
WindowSize="-429522944 -859045888"
WindowIsZoomed="yes"
FractionalWidths="yes"
InterpretChemically="yes"
ShowAtomQuery="yes"
ShowAtomStereo="no"
ShowAtomEnhancedStereo="yes"
ShowAtomNumber="no"
ShowResidueID="no"
ShowBondQuery="yes"
ShowBondRxn="yes"
ShowBondStereo="no"
ShowTerminalCarbonLabels="no"
ShowNonTerminalCarbonLabels="no"
HideImplicitHydrogens="no"
LabelFont="3"
LabelSize="10"
LabelFace="96"
CaptionFont="3"
CaptionSize="10"
HashSpacing="2.50"
MarginWidth="1.60"
LineWidth="0.60"
BoldWidth="2"
BondLength="14.40"
BondSpacing="18"
ChainAngle="120"
LabelJustification="Auto"
CaptionJustification="Left"
AminoAcidTermini="HOH"
ShowSequenceTermini="yes"
ShowSequenceBonds="yes"
ShowSequenceUnlinkedBranches="no"
ResidueWrapCount="40"
ResidueBlockCount="10"
PrintMargins="36 36 36 36"
MacPrintInfo="0003000001200120000000000B6608A0FF84FF880BE309180367052703FC0002000001200120000000000B6608A0000100000064000000010001010100000001270F000100010000000000000000000000000002001901900000000000600000000000000000000100000000000000000000000000000000"
ChemPropName=""
ChemPropFormula="Chemical Formula: "
ChemPropExactMass="Exact Mass: "
ChemPropMolWt="Molecular Weight: "
ChemPropMOverZ="m/z: "
ChemPropAnalysis="Elemental Analysis: "
ChemPropBoilingPt="Boiling Point: "
ChemPropMeltingPt="Melting Point: "
ChemPropCritTemp="Critical Temp: "
ChemPropCritPres="Critical Pres: "
ChemPropCritVol="Critical Vol: "
ChemPropGibbs="Gibbs Energy: "
ChemPropLogP="Log P: "
ChemPropMR="MR: "
ChemPropHenry="Henry&apos;s Law: "
ChemPropEForm="Heat of Form: "
ChemProptPSA="tPSA: "
ChemPropCLogP="CLogP: "
ChemPropCMR="CMR: "
ChemPropLogS="LogS: "
ChemPropPKa="pKa: "
ChemPropID=""
ChemPropFragmentLabel=""
color="0"
bgcolor="1"
RxnAutonumberStart="1"
RxnAutonumberConditions="no"
RxnAutonumberStyle="Roman"
RxnAutonumberFormat="(#)"
MonomerRenderingStyle="graphic"
><colortable>
<color r="1" g="1" b="1"/>
<color r="0" g="0" b="0"/>
<color r="1" g="0" b="0"/>
<color r="1" g="1" b="0"/>
<color r="0" g="1" b="0"/>
<color r="0" g="1" b="1"/>
<color r="0" g="0" b="1"/>
<color r="1" g="0" b="1"/>
</colortable><fonttable>
<font id="3" charset="iso-8859-1" name="Arial"/>
</fonttable><page
id="702"
BoundingBox="0 0 540 719.75"
HeaderPosition="36"
FooterPosition="36"
PrintTrimMarks="yes"
HeightPages="1"
WidthPages="1"
><group
id="411"
BoundingBox="244.09 262.11 295.91 291.09"
Z="25"
><fragment
id="412"
BoundingBox="244.09 262.11 295.91 291.09"
Z="26"
><n
id="43"
p="244.24 270.69"
Z="27"
AS="N"
AtomID="1"
/><n
id="45"
p="256.70 263.49"
Z="28"
AS="N"
AtomID="2"
/><n
id="49"
p="281.61 277.86"
Z="31"
AS="N"
AtomID="4"
/><n
id="47"
p="269.14 270.69"
Z="33"
NodeType="Fragment"
NeedsClean="yes"
AS="N"
BondOrdering="57 58"
AtomID="3"
><fragment
id="703"
ConnectionOrder="558 559"
><n
id="557"
p="269.14 270.69"
Z="20"
/><n
id="558"
p="256.68 263.48"
Z="21"
NodeType="ExternalConnectionPoint"
ExternalConnectionNum="1"
/><n
id="559"
p="281.61 277.90"
Z="22"
NodeType="ExternalConnectionPoint"
ExternalConnectionNum="2"
/><b
id="560"
Z="23"
B="558"
E="557"
Order="2"
DoublePosition="Center"
/><b
id="561"
Z="24"
B="559"
E="557"
Order="2"
DoublePosition="Center"
/></fragment><t
p="270.89 274.59"
BoundingBox="267.39 269.05 270.89 274.59"
LabelJustification="Right"
Justification="Right"
LabelAlignment="Right"
><s font="3" size="10" face="96">•</s></t></n><n
id="51"
p="288.84 290.33"
Z="34"
AS="N"
AtomID="5"
/><n
id="53"
p="295.53 281.58"
Z="36"
AS="N"
AtomID="6"
/><b
id="56"
Z="29"
B="43"
E="45"
BS="N"
/><b
id="57"
Z="30"
B="45"
E="47"
Order="2"
DoublePosition="Center"
BS="N"
/><b
id="58"
Z="32"
B="47"
E="49"
Order="2"
DoublePosition="Center"
BS="N"
/><b
id="59"
Z="35"
B="49"
E="51"
Display="WedgeBegin"
BS="N"
/><b
id="60"
Z="37"
B="49"
E="53"
Display="WedgedHashBegin"
BS="N"
/></fragment></group><annotation
Keyword="Name"
Content="acetonitrile"
/></page></CDXML>

View File

@@ -1,151 +0,0 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE CDXML SYSTEM "https://static.chemistry.revvitycloud.com/cdxml/CDXML.dtd" >
<CDXML
CreationProgram="ChemDraw 14.0.1.9"
Name="Bicyclics.ctp"
BoundingBox="192.60 112.23 243.35 144.77"
WindowPosition="0 0"
WindowSize="-2147483648 0"
WindowIsZoomed="yes"
FractionalWidths="yes"
InterpretChemically="yes"
ShowAtomQuery="yes"
ShowAtomStereo="no"
ShowAtomEnhancedStereo="yes"
ShowAtomNumber="no"
ShowResidueID="no"
ShowBondQuery="yes"
ShowBondRxn="yes"
ShowBondStereo="no"
ShowTerminalCarbonLabels="no"
ShowNonTerminalCarbonLabels="no"
HideImplicitHydrogens="no"
LabelFont="3"
LabelSize="10"
LabelFace="96"
CaptionFont="4"
CaptionSize="12"
HashSpacing="2.70"
MarginWidth="2"
LineWidth="0.85"
BoldWidth="4"
BondLength="28.35"
BondSpacing="12"
ChainAngle="120"
LabelJustification="Auto"
CaptionJustification="Left"
AminoAcidTermini="HOH"
ShowSequenceTermini="yes"
ShowSequenceBonds="yes"
ResidueWrapCount="40"
ResidueBlockCount="10"
ResidueZigZag="yes"
NumberResidueBlocks="no"
PrintMargins="36 36 36 36"
MacPrintInfo="000300000001000100000000000A000700000000000A0008036704B003C0000200000001000100000000000A0007000100640064000000010001010100FF0001270F000100010000000000000000000000000002001901900000000000400000000000000000000100000000000000000000000000000000"
ChemPropName=""
ChemPropFormula="Chemical Formula: "
ChemPropExactMass="Exact Mass: "
ChemPropMolWt="Molecular Weight: "
ChemPropMOverZ="m/z: "
ChemPropAnalysis="Elemental Analysis: "
ChemPropBoilingPt="Boiling Point: "
ChemPropMeltingPt="Melting Point: "
ChemPropCritTemp="Critical Temp: "
ChemPropCritPres="Critical Pres: "
ChemPropCritVol="Critical Vol: "
ChemPropGibbs="Gibbs Energy: "
ChemPropLogP="Log P: "
ChemPropMR="MR: "
ChemPropHenry="Henry&apos;s Law: "
ChemPropEForm="Heat of Form: "
ChemProptPSA="tPSA: "
ChemPropCLogP="CLogP: "
ChemPropCMR="CMR: "
ChemPropLogS="LogS: "
ChemPropPKa="pKa: "
color="0"
bgcolor="1"
><colortable>
<color r="1" g="1" b="1"/>
<color r="0" g="0" b="0"/>
<color r="1" g="0" b="0"/>
<color r="1" g="1" b="0"/>
<color r="0" g="1" b="0"/>
<color r="0" g="1" b="1"/>
<color r="0" g="0" b="1"/>
<color r="1" g="0" b="1"/>
</colortable><fonttable>
<font id="3" charset="iso-8859-1" name="Arial"/>
<font id="4" charset="iso-8859-1" name="Times New Roman"/>
</fonttable><page
id="913"
BoundingBox="0 0 504 648"
HeaderPosition="36"
FooterPosition="36"
PrintTrimMarks="yes"
HeightPages="1"
WidthPages="1"
><group
id="787"
BoundingBox="192.60 112.23 243.35 144.77"
Z="1"
><fragment
id="788"
BoundingBox="192.60 112.23 243.35 144.77"
Z="2"
><n
id="1"
p="218.25 114.35"
Z="12"
Geometry="Tetrahedral"
BondOrdering="10 12 13 0"
HDot="yes"
/><n
id="3"
p="217.70 142.65"
Z="13"
Geometry="Tetrahedral"
BondOrdering="10 11 0 14"
HDot="yes"
/><n
id="5"
p="242.50 128.90"
Z="14"
AS="N"
/><n
id="7"
p="193.45 128.05"
Z="18"
AS="N"
/><b
id="10"
Z="15"
B="1"
E="3"
BS="N"
/><b
id="11"
Z="16"
B="3"
E="5"
BS="N"
/><b
id="12"
Z="17"
B="1"
E="5"
BS="N"
/><b
id="13"
Z="19"
B="1"
E="7"
BS="N"
/><b
id="14"
Z="20"
B="3"
E="7"
BS="N"
/></fragment></group></page></CDXML>

View File

@@ -1,217 +0,0 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE CDXML SYSTEM "https://static.chemistry.revvitycloud.com/cdxml/CDXML.dtd" >
<CDXML
CreationProgram="ChemDraw 14.0.1.9"
Name="Bicyclics.ctp"
BoundingBox="122.12 30.21 221.07 87.84"
WindowPosition="0 0"
WindowSize="-2147483648 0"
WindowIsZoomed="yes"
FractionalWidths="yes"
InterpretChemically="yes"
ShowAtomQuery="yes"
ShowAtomStereo="no"
ShowAtomEnhancedStereo="yes"
ShowAtomNumber="no"
ShowResidueID="no"
ShowBondQuery="yes"
ShowBondRxn="yes"
ShowBondStereo="no"
ShowTerminalCarbonLabels="no"
ShowNonTerminalCarbonLabels="no"
HideImplicitHydrogens="no"
LabelFont="3"
LabelSize="10"
LabelFace="96"
CaptionFont="4"
CaptionSize="12"
HashSpacing="2.70"
MarginWidth="2"
LineWidth="0.85"
BoldWidth="4"
BondLength="28.35"
BondSpacing="12"
ChainAngle="120"
LabelJustification="Auto"
CaptionJustification="Left"
AminoAcidTermini="HOH"
ShowSequenceTermini="yes"
ShowSequenceBonds="yes"
ResidueWrapCount="40"
ResidueBlockCount="10"
ResidueZigZag="yes"
NumberResidueBlocks="no"
PrintMargins="36 36 36 36"
MacPrintInfo="000300000001000100000000000A000700000000000A0008036704B003C0000200000001000100000000000A0007000100640064000000010001010100FF0001270F000100010000000000000000000000000002001901900000000000400000000000000000000100000000000000000000000000000000"
ChemPropName=""
ChemPropFormula="Chemical Formula: "
ChemPropExactMass="Exact Mass: "
ChemPropMolWt="Molecular Weight: "
ChemPropMOverZ="m/z: "
ChemPropAnalysis="Elemental Analysis: "
ChemPropBoilingPt="Boiling Point: "
ChemPropMeltingPt="Melting Point: "
ChemPropCritTemp="Critical Temp: "
ChemPropCritPres="Critical Pres: "
ChemPropCritVol="Critical Vol: "
ChemPropGibbs="Gibbs Energy: "
ChemPropLogP="Log P: "
ChemPropMR="MR: "
ChemPropHenry="Henry&apos;s Law: "
ChemPropEForm="Heat of Form: "
ChemProptPSA="tPSA: "
ChemPropCLogP="CLogP: "
ChemPropCMR="CMR: "
ChemPropLogS="LogS: "
ChemPropPKa="pKa: "
color="0"
bgcolor="1"
><colortable>
<color r="1" g="1" b="1"/>
<color r="0" g="0" b="0"/>
<color r="1" g="0" b="0"/>
<color r="1" g="1" b="0"/>
<color r="0" g="1" b="0"/>
<color r="0" g="1" b="1"/>
<color r="0" g="0" b="1"/>
<color r="1" g="0" b="1"/>
</colortable><fonttable>
<font id="3" charset="iso-8859-1" name="Arial"/>
<font id="4" charset="iso-8859-1" name="Times New Roman"/>
</fonttable><page
id="913"
BoundingBox="0 0 504 648"
HeaderPosition="36"
FooterPosition="36"
PrintTrimMarks="yes"
HeightPages="1"
WidthPages="1"
><group
id="858"
BoundingBox="122.12 30.21 221.07 87.84"
Z="1"
><fragment
id="859"
BoundingBox="122.12 30.21 221.07 87.84"
Z="2"
><n
id="657"
p="122.55 44.90"
Z="24"
AS="N"
/><n
id="659"
p="122.55 73.20"
Z="25"
AS="N"
/><n
id="661"
p="147.05 87.35"
Z="26"
AS="N"
/><n
id="663"
p="171.60 73.20"
Z="27"
Geometry="Tetrahedral"
BondOrdering="680 681 684 0"
HDash="yes"
/><n
id="665"
p="171.60 44.90"
Z="28"
Geometry="Tetrahedral"
BondOrdering="681 682 0 688"
HDot="yes"
/><n
id="667"
p="147.05 30.70"
Z="29"
AS="N"
/><n
id="669"
p="196.10 87.35"
Z="36"
AS="N"
/><n
id="671"
p="220.65 73.20"
Z="37"
AS="N"
/><n
id="673"
p="220.65 44.90"
Z="38"
AS="N"
/><n
id="675"
p="196.10 30.70"
Z="39"
AS="N"
/><b
id="678"
Z="30"
B="657"
E="659"
BS="N"
/><b
id="679"
Z="31"
B="659"
E="661"
BS="N"
/><b
id="680"
Z="32"
B="661"
E="663"
BS="N"
/><b
id="681"
Z="33"
B="663"
E="665"
BS="N"
/><b
id="682"
Z="34"
B="665"
E="667"
BS="N"
/><b
id="683"
Z="35"
B="657"
E="667"
BS="N"
/><b
id="684"
Z="40"
B="663"
E="669"
BS="N"
/><b
id="685"
Z="41"
B="669"
E="671"
BS="N"
/><b
id="686"
Z="42"
B="671"
E="673"
BS="N"
/><b
id="687"
Z="43"
B="673"
E="675"
BS="N"
/><b
id="688"
Z="44"
B="665"
E="675"
BS="N"
/></fragment></group></page></CDXML>

File diff suppressed because it is too large Load Diff

View File

@@ -1,162 +0,0 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE CDXML SYSTEM "https://static.chemistry.revvitycloud.com/cdxml/CDXML.dtd" >
<CDXML
CreationProgram="ChemDraw 23.1.2.7"
Name="geometry-tetrahedral.cdxml"
BoundingBox="192.60 112.23 243.35 144.77"
WindowPosition="0 0"
WindowSize="-859045888 1288437760"
WindowIsZoomed="yes"
FractionalWidths="yes"
InterpretChemically="yes"
ShowAtomQuery="yes"
ShowAtomStereo="no"
ShowAtomEnhancedStereo="yes"
ShowAtomNumber="no"
ShowResidueID="no"
ShowBondQuery="yes"
ShowBondRxn="yes"
ShowBondStereo="no"
ShowTerminalCarbonLabels="no"
ShowNonTerminalCarbonLabels="no"
HideImplicitHydrogens="no"
LabelFont="3"
LabelSize="10"
LabelFace="96"
CaptionFont="20"
CaptionSize="12"
HashSpacing="2.70"
MarginWidth="2"
LineWidth="0.85"
BoldWidth="4"
BondLength="28.35"
BondSpacing="12"
ChainAngle="120"
LabelJustification="Auto"
CaptionJustification="Left"
AminoAcidTermini="HOH"
ShowSequenceTermini="yes"
ShowSequenceBonds="yes"
ShowSequenceUnlinkedBranches="no"
ResidueWrapCount="40"
ResidueBlockCount="10"
PrintMargins="36 36 36 36"
MacPrintInfo="000300000001000100000000000A000700000000000A0008036704B003C0000200000001000100000000000A0007000100000064000000010001010100FF0001270F000100010000000000000000000000000002001901900000000000400000000000000000000100000000000000000000000000000000"
ChemPropName=""
ChemPropFormula="Chemical Formula: "
ChemPropExactMass="Exact Mass: "
ChemPropMolWt="Molecular Weight: "
ChemPropMOverZ="m/z: "
ChemPropAnalysis="Elemental Analysis: "
ChemPropBoilingPt="Boiling Point: "
ChemPropMeltingPt="Melting Point: "
ChemPropCritTemp="Critical Temp: "
ChemPropCritPres="Critical Pres: "
ChemPropCritVol="Critical Vol: "
ChemPropGibbs="Gibbs Energy: "
ChemPropLogP="Log P: "
ChemPropMR="MR: "
ChemPropHenry="Henry&apos;s Law: "
ChemPropEForm="Heat of Form: "
ChemProptPSA="tPSA: "
ChemPropCLogP="CLogP: "
ChemPropCMR="CMR: "
ChemPropLogS="LogS: "
ChemPropPKa="pKa: "
ChemPropID=""
ChemPropFragmentLabel=""
color="0"
bgcolor="1"
RxnAutonumberStart="1"
RxnAutonumberConditions="no"
RxnAutonumberStyle="Roman"
RxnAutonumberFormat="(#)"
><colortable>
<color r="1" g="1" b="1"/>
<color r="0" g="0" b="0"/>
<color r="1" g="0" b="0"/>
<color r="1" g="1" b="0"/>
<color r="0" g="1" b="0"/>
<color r="0" g="1" b="1"/>
<color r="0" g="0" b="1"/>
<color r="1" g="0" b="1"/>
</colortable><fonttable>
<font id="3" charset="iso-8859-1" name="Arial"/>
<font id="20" charset="iso-8859-1" name="Times New Roman"/>
</fonttable><page
id="915"
BoundingBox="0 0 504 648"
HeaderPosition="36"
FooterPosition="36"
PrintTrimMarks="yes"
HeightPages="1"
WidthPages="1"
><group
id="787"
BoundingBox="192.60 112.23 243.35 144.77"
Z="12"
><fragment
id="788"
BoundingBox="192.60 112.23 243.35 144.77"
Z="13"
><n
id="1"
p="218.25 114.35"
Z="14"
Geometry="Tetrahedral"
AS="s"
BondOrdering="10 12 13 0"
HDot="yes"
AtomID="1"
/><n
id="3"
p="217.70 142.65"
Z="15"
Geometry="Tetrahedral"
AS="s"
BondOrdering="10 11 0 14"
HDot="yes"
AtomID="2"
/><n
id="5"
p="242.50 128.90"
Z="16"
AS="N"
AtomID="3"
/><n
id="7"
p="193.45 128.05"
Z="20"
AS="N"
AtomID="4"
/><b
id="10"
Z="17"
B="1"
E="3"
BS="N"
/><b
id="11"
Z="18"
B="3"
E="5"
BS="N"
/><b
id="12"
Z="19"
B="1"
E="5"
BS="N"
/><b
id="13"
Z="21"
B="1"
E="7"
BS="N"
/><b
id="14"
Z="22"
B="3"
E="7"
BS="N"
/></fragment></group></page></CDXML>

View File

@@ -1,325 +0,0 @@
#include "utils.h"
#include <GraphMol/MolOps.h>
#include <GraphMol/CIPLabeler/CIPLabeler.h>
#include <GraphMol/SmilesParse/SmilesParse.h>
#include <GraphMol/SmilesParse/SmilesWrite.h>
#include <GraphMol/FileParsers/FileWriters.h>
namespace RDKit {
namespace ChemDraw {
std::string NodeType(CDXNodeType nodetype) {
switch (nodetype) {
case kCDXNodeType_Unspecified:
return "Unspecified";
case kCDXNodeType_Element:
return "Element";
case kCDXNodeType_ElementList:
return "ElementList";
case kCDXNodeType_ElementListNickname:
return "ElementListNickname";
case kCDXNodeType_Nickname:
return "Nickname";
case kCDXNodeType_Fragment:
return "Fragment";
case kCDXNodeType_Formula:
return "Forumla";
case kCDXNodeType_GenericNickname:
return "GenericNickname";
case kCDXNodeType_AnonymousAlternativeGroup:
return "Anonymous Alternative Group";
case kCDXNodeType_NamedAlternativeGroup:
return "Named Alternative Group";
case kCDXNodeType_MultiAttachment:
return "MultiAttachment";
case kCDXNodeType_VariableAttachment:
return "Variable Attachment";
case kCDXNodeType_ExternalConnectionPoint:
return "ExternalConnectionPoint";
case kCDXNodeType_LinkNode:
return "LinkNode";
case kCDXNodeType_Monomer:
return "Monomer";
default:
return "?";
}
}
void scaleBonds(const ROMol &mol, Conformer &conf, double targetBondLength,
double bondLength) {
double avg_bond_length = 0.0;
if (bondLength < 0) {
// If we don't have a bond length for any reason, just scale the avgerage
// bond length
for (auto &bond : mol.bonds()) {
avg_bond_length += (conf.getAtomPos(bond->getBeginAtomIdx()) -
conf.getAtomPos(bond->getEndAtomIdx()))
.length();
}
avg_bond_length /= mol.getNumBonds();
} else {
avg_bond_length = bondLength;
}
if (avg_bond_length > 0) {
double scale = targetBondLength / avg_bond_length;
for (auto &pos : conf.getPositions()) {
pos *= scale;
}
}
}
unsigned int get_fuse_label(Atom *atm) {
// return atm->getAtomMapNum(); easier debugging
unsigned int label = 0; // default is no label
atm->getPropIfPresent<unsigned int>(FUSE_LABEL, label);
return label;
}
void set_fuse_label(Atom *atm, unsigned int idx) {
// atm->setAtomMapNum(idx); //for debugging
if (idx) {
atm->setProp<unsigned int>(FUSE_LABEL, idx);
} else {
atm->clearProp(FUSE_LABEL);
}
}
struct FragmentReplacement {
// R = Replacement
// F = Fragment
// C = Conneciton
// C R C F F
// N=*=C.*=CCC=*
// label 1 1 1
// has bond ordering
//
// goal replace the atom R with the connections
unsigned int label = 0;
Atom *replacement_atom = nullptr;
std::vector<Atom *> replacement_connection_atoms;
std::vector<Atom *> fragment_atoms;
bool replace(RWMol &mol) {
if (!replacement_atom) return true;
auto bond_ordering =
replacement_atom->getProp<std::vector<int>>(CDX_BOND_ORDERING);
// Find the connecting atoms and and do the replacement
for (auto bond : mol.atomBonds(replacement_atom)) {
// find the position of the attachement bonds in the bond ordering
auto bond_id = bond->getProp<unsigned int>(CDX_BOND_ID);
auto it = std::find(bond_ordering.begin(), bond_ordering.end(), bond_id);
if (it == bond_ordering.end()) return false;
auto pos = std::distance(bond_ordering.begin(), it);
auto &xatom = fragment_atoms[pos];
for (auto &xbond : mol.atomBonds(xatom)) {
// xatom is the fragment dummy atom
// xbond is the fragment bond
if (bond->getBeginAtom() == replacement_atom) {
mol.addBond(xbond->getOtherAtom(xatom), bond->getEndAtom(),
bond->getBondType());
} else {
mol.addBond(bond->getBeginAtom(), xbond->getOtherAtom(xatom),
bond->getBondType());
}
}
}
mol.removeAtom(replacement_atom);
for (auto &atom : fragment_atoms) {
mol.removeAtom(atom);
}
return true;
}
};
// Replace fragments that are not possible with molzip
bool replaceFragments(RWMol &mol) {
// Anything with a single atom that is supposed to be replaced via a fragment
// is here
std::map<int, FragmentReplacement> replacements;
for (auto &atom : mol.atoms()) {
auto label = get_fuse_label(atom);
if (label) {
if (atom->hasProp(CDX_BOND_ORDERING)) {
auto &frag = replacements[label];
frag.label = label;
frag.replacement_atom = atom;
} else {
// The is the fragment attachment atoms that need to
// be attached to the ones connected to the atom being replaced
auto &frag = replacements[label];
frag.fragment_atoms.push_back(atom);
}
}
}
mol.beginBatchEdit();
for (auto &replacement : replacements) {
replacement.second.replace(mol);
}
mol.commitBatchEdit();
return true;
}
namespace {
Atom::ChiralType getChirality(ROMol &mol, Atom *center_atom, Conformer &conf) {
if (center_atom->hasProp(CDX_BOND_ORDERING)) {
std::vector<int> bond_ordering =
center_atom->getProp<std::vector<int>>(CDX_BOND_ORDERING);
if (bond_ordering.size() < 3) {
return Atom::ChiralType::CHI_UNSPECIFIED;
}
std::vector<Atom *> atoms;
std::vector<std::pair<double, unsigned int>> angles;
auto center = conf.getAtomPos(center_atom->getIdx());
for (auto cdx_id : bond_ordering) {
if (cdx_id == 0) {
continue;
}
for (auto bond : mol.atomBonds(center_atom)) {
int bond_id;
if (bond->getPropIfPresent<int>(CDX_BOND_ID, bond_id)) {
} else {
return Atom::ChiralType::CHI_UNSPECIFIED;
}
if (bond_id == cdx_id) {
auto atom = bond->getOtherAtom(center_atom);
if (!atom) {
// something went really wrong
return Atom::ChiralType::CHI_UNSPECIFIED;
}
auto pos = conf.getAtomPos(atom->getIdx()) - center;
double angle = atan2(pos.x, pos.y);
angles.push_back(std::make_pair(angle, bond->getIdx()));
}
}
}
std::sort(angles.begin(), angles.end());
// angles are now sorted in a clockwise rotation
INT_LIST bonds;
for (auto &angle : angles) {
bonds.push_back(angle.second);
}
if(bonds.size() < 3) {
return Atom::ChiralType::CHI_UNSPECIFIED;
}
auto nswaps = center_atom->getPerturbationOrder(bonds);
if (bonds.size() == 3 && center_atom->getTotalNumHs() == 1) {
++nswaps;
}
// This is supports the HDot and HDash available in chemdraw
// one is an implicit wedged hydrogen and one is a dashed hydrogen
if (center_atom->hasProp(CDX_IMPLICIT_HYDROGEN_STEREO) &&
center_atom->getProp<char>(CDX_IMPLICIT_HYDROGEN_STEREO) == 'w')
nswaps++;
if (nswaps % 2) {
return Atom::ChiralType::CHI_TETRAHEDRAL_CCW;
}
return Atom::ChiralType::CHI_TETRAHEDRAL_CW;
}
return Atom::ChiralType::CHI_UNSPECIFIED;
}
} // namespace
void checkChemDrawTetrahedralGeometries(RWMol &mol) {
std::vector<std::pair<char, Atom *>> unsetTetrahedralAtoms;
Conformer *conf = nullptr;
if (mol.getNumConformers()) {
conf = &mol.getConformer();
}
bool chiralityChanged = false;
for (auto atom : mol.atoms()) {
// only deal with unspecified chiralities
if (atom->getChiralTag() != Atom::ChiralType::CHI_UNSPECIFIED) {
atom->clearProp(CDX_CIP);
continue;
}
if (conf && !conf->is3D()) {
atom->setChiralTag(getChirality(mol, atom, *conf));
if (atom->getChiralTag() != Atom::ChiralType::CHI_UNSPECIFIED) {
chiralityChanged = true;
}
}
// If we have a cip code, might as well check it too
CDXAtomCIPType cip;
if (atom->getPropIfPresent<CDXAtomCIPType>(CDX_CIP, cip)) {
// assign, possibly wrong, initial stereo.
// note: we can probably deduce this through CDX_BOND_ORDERING, but
// I currenlty don't understand that well enough.
switch (cip) {
case kCDXCIPAtom_R:
if(!chiralityChanged) atom->setChiralTag(Atom::ChiralType::CHI_TETRAHEDRAL_CW);
unsetTetrahedralAtoms.push_back(std::make_pair('R', atom));
break;
case kCDXCIPAtom_r:
if(!chiralityChanged) atom->setChiralTag(Atom::ChiralType::CHI_TETRAHEDRAL_CW);
unsetTetrahedralAtoms.push_back(std::make_pair('r', atom));
break;
case kCDXCIPAtom_S:
if(!chiralityChanged) atom->setChiralTag(Atom::ChiralType::CHI_TETRAHEDRAL_CW);
unsetTetrahedralAtoms.push_back(std::make_pair('S', atom));
break;
case kCDXCIPAtom_s:
if(!chiralityChanged) atom->setChiralTag(Atom::ChiralType::CHI_TETRAHEDRAL_CCW);
unsetTetrahedralAtoms.push_back(std::make_pair('s', atom));
break;
default:
break;
}
}
}
// Now that we have missing chiralities, let's check the CIP codes and reset
// if necessary.
// This is an expensive way of doing this, but we only have stereo->cip not
// cip->stereo implemented currently
for (auto cipatom : unsetTetrahedralAtoms) {
try {
CIPLabeler::assignCIPLabels(mol);
} catch (...) {
// can throw std::runtime error?
break;
}
std::string cipcode;
if (cipatom.second->getPropIfPresent<std::string>(
common_properties::_CIPCode, cipcode)) {
if (cipcode.size() && cipcode[0] != cipatom.first) {
// need to swap
if (cipatom.second->getChiralTag() ==
Atom::ChiralType::CHI_TETRAHEDRAL_CW) {
cipatom.second->setChiralTag(Atom::ChiralType::CHI_TETRAHEDRAL_CCW);
cipatom.second->updatePropertyCache();
chiralityChanged = true;
} else if (cipatom.second->getChiralTag() ==
Atom::ChiralType::CHI_TETRAHEDRAL_CCW) {
cipatom.second->setChiralTag(Atom::ChiralType::CHI_TETRAHEDRAL_CW);
cipatom.second->updatePropertyCache();
chiralityChanged = true;
}
}
}
}
if (chiralityChanged) {
const bool cleanIt = true;
const bool force = true;
MolOps::assignStereochemistry(mol, cleanIt, force);
}
}
}
} // namespace RDKit

View File

@@ -1,110 +0,0 @@
//
// Copyright (c) 2024, Glysade Inc
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Novartis Institutes for BioMedical Research Inc.
// nor the names of its contributors may be used to endorse or promote
// products derived from this software without specific prior written
// permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
#ifndef CHEMDRAW_UTILS_H
#define CHEMDRAW_UTILS_H
#include <GraphMol/RDKitBase.h>
#include <GraphMol/QueryAtom.h>
#include <GraphMol/QueryBond.h>
#include <GraphMol/QueryOps.h>
#include "ChemDrawStartInclude.h"
#include "chemdraw/CDXStdObjects.h"
#include "ChemDrawEndInclude.h"
namespace RDKit {
namespace ChemDraw {
constexpr double RDKIT_DEPICT_BONDLENGTH = 1.5;
const std::string NEEDS_FUSE("CDX_NEEDS_FUSE");
const std::string CDX_FRAG_ID("CDX_FRAG_ID");
const std::string CDX_GROUP_ID("CDX_GROUP_ID");
const std::string FUSE_LABEL("CDX_NODE_ID");
const std::string CDX_SCHEME_ID("CDX_SCHEME_ID");
const std::string CDX_STEP_ID("CDX_STEP_ID");
const std::string CDX_REAGENT_ID("CDX_REAGENT_ID");
const std::string CDX_PRODUCT_ID("CDX_PRODUCT_ID");
const std::string CDX_AGENT_ID("CDX_AGENT_ID");
const std::string CDX_ATOM_POS("CDX_ATOM_POS");
const std::string CDX_ATOM_ID("_CDX_ATOM_ID");
const std::string CDX_BOND_ID("_CDX_BOND_ID");
const std::string CDX_BOND_ORDERING("CDX_BOND_ORDERING");
const std::string CDX_CIP("CDX_CIP");
const std::string CDX_IMPLICIT_HYDROGEN_STEREO("CDX_ATOM_STEREO");
// Convert a ChemDrawNode to a string
std::string NodeType(CDXNodeType nodetype);
// Scale the bonds to the targetBondLength. If bondLength is zero
// use the average bond length in the molecule
void scaleBonds(const ROMol &mol, Conformer &conf, double targetBondLength,
double bondLength);
// Indicate which atoms should be fused together from various
// fragments in the ChemDraw file
unsigned int get_fuse_label(Atom *atm);
void set_fuse_label(Atom *atm, unsigned int idx);
// Replace fragments that are not possible with molzip
bool replaceFragments(RWMol &mol);
// Add a Query to a molecule
template <typename Q>
Atom *addquery(Q *qry, std::string symbol, RWMol &mol, unsigned int idx) {
PRECONDITION(qry, "bad query");
auto *atm = mol.getAtomWithIdx(idx);
auto qa = std::make_unique<QueryAtom>(*atm);
qa->setQuery(qry);
qa->setNoImplicit(true);
mol.replaceAtom(idx, qa.get());
Atom *res = mol.getAtomWithIdx(idx);
if (symbol != "") {
res->setProp(common_properties::atomLabel, symbol);
}
return res;
}
// Simple Structure for keeping track of Stereo Groups
struct StereoGroupInfo {
int sgroup = -1;
bool conflictingSgroupTypes = false;
StereoGroupType grouptype;
std::vector<Atom *> atoms;
};
// check to see if we have a tetrahedral flag and ChemDraw CIP set but no
// stereo assigned, if so check the bond ordering for CW and CCW
void checkChemDrawTetrahedralGeometries(RWMol &mol);
}
} // namespace RDKit
#endif

View File

@@ -1,296 +0,0 @@
//
// Copyright (c) 2024, Glysade Inc
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Novartis Institutes for BioMedical Research Inc.
// nor the names of its contributors may be used to endorse or promote
// products derived from this software without specific prior written
// permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
#include "chemdraw.h"
#include <GraphMol/Chirality.h>
#include <GraphMol/QueryBond.h>
#include <GraphMol/Depictor/RDDepictor.h>
#include "ChemDrawStartInclude.h"
#include "chemdraw/CDXStdObjects.h"
#include "ChemDrawEndInclude.h"
namespace RDKit {
namespace v2 {
const double DEFAULT_CDX_BOND_LENGTH = 14.4;
namespace {
// Do we need to set explicit hs in chemdraw, this uses basically the same
// logic as SmilesWriter
bool needsExplicitHs(const Atom *atom) {
auto num = atom->getAtomicNum();
const INT_VECT &defaultVs = PeriodicTable::getTable()->getValenceList(num);
int totalValence = atom->getTotalValence();
bool nonStandard = false;
if (atom->getNumRadicalElectrons()) {
nonStandard = true;
} else if ((num == 7 || num == 15) && atom->getIsAromatic() &&
atom->getNumExplicitHs()) {
// another type of "nonstandard" valence is an aromatic N or P with
// explicit Hs indicated:
nonStandard = true;
} else {
nonStandard = (totalValence != defaultVs.front() && atom->getTotalNumHs());
}
return nonStandard;
}
} // namespace
std::string MolToChemDrawBlock(const ROMol &mol, CDXFormat format) {
RWMol trmol(mol);
MolOps::Kekulize(trmol);
if (!trmol.getNumConformers()) {
RDDepict::compute2DCoords(trmol);
}
CDXObjectID object_id = 1;
CDXDocument document(object_id++);
CDXPage *page = new CDXPage(object_id++);
document.m_bondLength = DEFAULT_CDX_BOND_LENGTH;
document.m_flags |= CDXDocument::CDXDocumentProperty1::has_bondLength;
CDXFragment *fragment = new CDXFragment(object_id++);
page->AddChild(fragment);
std::vector<CDXNode *> nodes;
nodes.reserve(trmol.getNumAtoms());
const Conformer *conf = nullptr;
if (trmol.getNumConformers() == 0) {
RDDepict::compute2DCoords(trmol);
}
conf = &trmol.getConformer(0);
bool is3D = conf->is3D();
// I REALLY don't know why this is 2*DEFAULT_CDX_BOND_LENGTH but it looks
// right
// when loading the CDX into ChemDraw
// We convert the average bond length into the target bond length here
double target_bond_length = 2 * DEFAULT_CDX_BOND_LENGTH;
double dist = 0.0;
for (auto bond : trmol.bonds()) {
auto pos1 = conf->getAtomPos(bond->getBeginAtomIdx());
auto pos2 = conf->getAtomPos(bond->getEndAtomIdx());
dist += (pos1 - pos2).length();
}
dist /= trmol.getNumBonds();
double scale = is3D ? 1. : target_bond_length / dist;
auto wedgeBonds = Chirality::pickBondsToWedge(trmol, nullptr, conf);
for (auto &atom : trmol.atoms()) {
CDXNode *node = new CDXNode(object_id + atom->getIdx());
auto pos = conf->getAtomPos(atom->getIdx());
if (is3D) {
node->Position3D(CDXPoint3D(CDXCoordinatefromPoints(pos.x),
-CDXCoordinatefromPoints(pos.y),
CDXCoordinatefromPoints(pos.z)));
} else {
node->Position(CDXPoint2D(CDXCoordinatefromPoints(scale * pos.x),
CDXCoordinatefromPoints(-scale * pos.y)));
}
node->m_nodeType = kCDXNodeType_Element;
node->m_isotope = atom->getIsotope();
node->m_elementNum = atom->getAtomicNum();
// Use the same logic from the smiles writer needs brackets
//
// node->m_numHydrogens = atom->getNumExplicitHs() ?
// atom->getNumExplicitHs()
// : kNumHydrogenUnspecified;
node->m_numHydrogens =
needsExplicitHs(atom) ? atom->getTotalNumHs() : kNumHydrogenUnspecified;
node->m_charge = atom->getFormalCharge() * 0x1000000;
if (atom->getFormalCharge() || atom->getNumRadicalElectrons() != 0) {
node->m_numHydrogens =
atom->getTotalNumHs(); // XXX is this right? We seem to need to set
// it with charges
}
if (atom->getNumRadicalElectrons()) {
switch (atom->getNumRadicalElectrons()) {
case 0:
break;
case 1:
node->m_radical = kCDXRadical_Singlet;
break;
case 2:
break;
case 3:
break;
}
}
// this might be a bit slow, perhaps make into a map...
unsigned int sgnum = 0;
for (auto &sg : trmol.getStereoGroups()) {
sgnum++;
for (auto &sgatom : sg.getAtoms()) {
if (atom->getIdx() == sgatom->getIdx()) {
switch (sg.getGroupType()) {
case StereoGroupType::STEREO_ABSOLUTE:
node->m_enhancedStereoType = kCDXEnhancedStereo_Absolute;
break;
case StereoGroupType::STEREO_OR:
node->m_enhancedStereoType = kCDXEnhancedStereo_Or;
break;
case StereoGroupType::STEREO_AND:
node->m_enhancedStereoType = kCDXEnhancedStereo_And;
break;
}
node->m_enhancedStereoGroupNum = sgnum;
}
}
}
nodes.push_back(node);
fragment->AddChild(node);
}
for (auto &bond : trmol.bonds()) {
CDXBond *cdxbond =
new CDXBond(object_id + mol.getNumAtoms() + bond->getIdx());
int dirCode = 0;
bool reverse = false;
Chirality::GetMolFileBondStereoInfo(bond, wedgeBonds, conf, dirCode,
reverse);
switch (bond->getBondType()) {
case Bond::BondType::SINGLE:
cdxbond->m_bondOrder = kCDXBondOrder_Single;
break;
case Bond::DOUBLE:
cdxbond->m_bondOrder = kCDXBondOrder_Double;
break;
case Bond::TRIPLE:
cdxbond->m_bondOrder = kCDXBondOrder_Triple;
break;
case Bond::QUADRUPLE:
cdxbond->m_bondOrder = kCDXBondOrder_Quadruple;
break;
case Bond::QUINTUPLE:
cdxbond->m_bondOrder = kCDXBondOrder_Quintuple;
break;
case Bond::HEXTUPLE:
cdxbond->m_bondOrder = kCDXBondOrder_Sextuple;
break;
case Bond::ONEANDAHALF:
cdxbond->m_bondOrder = kCDXBondOrder_OneHalf;
break;
case Bond::TWOANDAHALF:
cdxbond->m_bondOrder = kCDXBondOrder_TwoHalf;
break;
case Bond::THREEANDAHALF:
cdxbond->m_bondOrder = kCDXBondOrder_ThreeHalf;
break;
case Bond::FOURANDAHALF:
cdxbond->m_bondOrder = kCDXBondOrder_FourHalf;
break;
case Bond::FIVEANDAHALF:
cdxbond->m_bondOrder = kCDXBondOrder_FiveHalf;
break;
case Bond::AROMATIC:
cdxbond->m_bondOrder = kCDXBondOrder_OneHalf;
break;
case Bond::IONIC:
cdxbond->m_bondOrder = kCDXBondOrder_Ionic;
break;
case Bond::HYDROGEN:
cdxbond->m_bondOrder = kCDXBondOrder_Hydrogen;
break;
case Bond::THREECENTER:
cdxbond->m_bondOrder = kCDXBondOrder_ThreeCenter;
break;
case Bond::DATIVE:
cdxbond->m_bondOrder = kCDXBondOrder_Dative;
break;
case Bond::UNSPECIFIED: {
auto query = describeQuery(bond);
if (query == "DoubleOrAromaticBond 1 = val\n") {
cdxbond->m_bondOrder = kCDXBondOrder_DoubleOrAromatic;
} else if (query == "SingleOrAromaticBond 1 = val\n") {
cdxbond->m_bondOrder = kCDXBondOrder_SingleOrAromatic;
} else if (query == "SingleOrDoubleBond 1 = val\n") {
cdxbond->m_bondOrder = kCDXBondOrder_SingleOrDouble;
} else {
cdxbond->m_bondOrder = kCDXBondOrder_Any;
}
} break;
case Bond::DATIVEONE:
case Bond::DATIVEL:
case Bond::DATIVER:
case Bond::OTHER:
case Bond::ZERO:
// unhandled
break;
}
cdxbond->Connects(nodes[bond->getBeginAtomIdx()],
nodes[bond->getEndAtomIdx()]);
switch (dirCode) {
case 6: // swap 1 and 6 due to swapped y
cdxbond->m_display = reverse ? kCDXBondDisplay_WedgedHashEnd
: kCDXBondDisplay_WedgedHashBegin;
break;
case 1:
cdxbond->m_display =
reverse ? kCDXBondDisplay_WedgeEnd : kCDXBondDisplay_WedgeBegin;
break;
case 3:
cdxbond->m_display = kCDXBondDisplay_Wavy;
break;
default:
break;
}
if (bond->getBondDir() == Bond::BondDir::EITHERDOUBLE ||
bond->getBondDir() == Bond::BondDir::UNKNOWN)
cdxbond->m_display = kCDXBondDisplay_Wavy;
fragment->AddChild(cdxbond);
}
document.AddChild(page);
document.m_colorTable.m_colors
.clear(); // if this isn't empty something fails.
std::ostringstream os;
if(format == CDXFormat::CDXML) {
os << kCDXML_HeaderString;
XMLDataSink ds(os);
document.XMLWrite(ds);
} else {
CDXostream ds(os);
CDXWriteDocToStorage(&document, ds);
}
return os.str();
}
}
} // namespace RDKit