Files
rdkit/External/ChemDraw/node.cpp
Brian Kelley 58c21baeba ChemDraw Document and read/write support for the RDKit (#8620)
* ChemDraw Document and read/write support for the RDKit

* Add missing test file

* Rev chemdraw version

* Rev chemdraw version

* Rev chemdraw version for g++11

* Update CMakeLists.txt

Co-authored-by: Greg Landrum <greg.landrum@gmail.com>

* Build chemdraw from an external CMake file

* Remove unused platform bigendian check

* Revert changes to constants, remove unused ones

* Keep the original constant names

* Remove __main__ section

* Use as much of the ChemDraw CMakeList as possible

* Skip installing expat

* Rev chemdraw version to fix windows issue

* Don't install expat, set the appropriate CHEMDRAW_BUILD definition

* resolve windows builds

* Fix minimal lib builds

* Move ChemDraw document to private header

* Move utility functions to ChemDraw namespace

* Move ChemDraw to v2 unique ptr api namespace

* Make class enum

* Switch to camelCase

* Response to review

* Move ChemDrawToDocument to ChemDraw namespace

* Update External/ChemDraw/Wrap/rdChemDraw.cpp

Co-authored-by: Greg Landrum <greg.landrum@gmail.com>

* Fix typo

* Update External/ChemDraw/Wrap/rdChemDraw.cpp

Co-authored-by: Greg Landrum <greg.landrum@gmail.com>

* Add better documentation

* Rev chemdraw version

---------

Co-authored-by: Brian Kelley <bkelley@glysade.com>
Co-authored-by: Greg Landrum <greg.landrum@gmail.com>
2025-07-04 06:36:10 +02:00

324 lines
11 KiB
C++

//
// Copyright (c) 2024, Glysade Inc
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Novartis Institutes for BioMedical Research Inc.
// nor the names of its contributors may be used to endorse or promote
// products derived from this software without specific prior written
// permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//#include "node.h"
#include "fragment.h"
#include "utils.h"
namespace RDKit {
namespace ChemDraw {
bool parseNode(
RWMol &mol, unsigned int fragmentId, CDXNode &node, PageData &pagedata,
std::map<std::pair<int, StereoGroupType>, StereoGroupInfo> &sgroups,
int &missingFragId, int externalAttachment) {
int atom_id = node.GetObjectID();
int elemno = node.m_elementNum; // default to carbon
// UINT16 max is not addigned?
int num_hydrogens =
node.m_numHydrogens == kNumHydrogenUnspecified ? 0 : node.m_numHydrogens;
bool explicitHs = node.m_numHydrogens != kNumHydrogenUnspecified;
int charge = 0;
if ((node.m_charge & 0x00FFFFFF) == 0)
charge = node.m_charge >> 24;
else
charge = node.m_charge;
int atommap = 0;
int rgroup_num = -1;
int isotope = node.m_isotope;
bool checkForRGroup = false;
;
std::string query_label;
std::vector<int16_t> elementlist;
// position node.m_2dPosition;
#ifdef DEBUG
std::cerr << NodeType(node.m_nodeType) << std::endl;
#endif
switch (node.m_nodeType) {
case kCDXNodeType_Element: {
break;
}
case kCDXNodeType_ElementList: {
if (node.m_elementList) {
elementlist = *node.m_elementList;
query_label = "ElementList";
}
break;
}
case kCDXNodeType_Nickname: {
elemno = 0;
atommap = atom_id;
break;
}
case kCDXNodeType_Fragment: {
elemno = 0;
atommap = atom_id;
break;
}
case kCDXNodeType_ExternalConnectionPoint: {
if (externalAttachment <= 0) {
// sometimes this is a dummy atom, but I don't know when.
if (node.m_externalConnectionType == kCDXExternalConnection_Diamond) {
elemno = 0;
}
atommap = atom_id;
} else {
elemno = 0;
atommap = externalAttachment;
}
break;
}
case kCDXNodeType_GenericNickname: {
if (node.m_genericNickname.size()) {
switch (node.m_genericNickname[0]) {
case 'R': {
checkForRGroup = true;
elemno = 0;
query_label = node.m_genericNickname;
break;
}
case 'A':
case 'Q':
case 'X':
case 'M': {
elemno = 0;
query_label = node.m_genericNickname;
} break;
default:
std::cerr << "Unhandled generic nickname: "
<< node.m_genericNickname << std::endl;
}
}
break;
}
case kCDXNodeType_Unspecified:
break;
case kCDXNodeType_ElementListNickname:
break;
case kCDXNodeType_Formula:
break;
case kCDXNodeType_AnonymousAlternativeGroup:
break;
case kCDXNodeType_NamedAlternativeGroup:
break;
case kCDXNodeType_MultiAttachment:
break;
case kCDXNodeType_VariableAttachment:
break;
case kCDXNodeType_LinkNode:
break;
case kCDXNodeType_Monomer:
break;
}
for (auto &child : node.ContainedObjects()) {
if (child.second->GetTag() == kCDXObj_Text) {
const std::string &text = ((CDXText *)child.second)->GetText().str();
if (text.size() > 0 && text[0] == 'R') {
try {
if (checkForRGroup)
rgroup_num = text.size() > 1 ? stoi(text.substr(1)) : 0;
else
isotope = text.size() > 1 ? stoi(text.substr(1)) : 0;
} catch (const std::invalid_argument &e) {
if (rgroup_num)
BOOST_LOG(rdWarningLog)
<< "RGroupError: Invalid argument - Cannot convert '" << text
<< "' to an integer." << std::endl;
} catch (const std::out_of_range &e) {
if (rgroup_num)
BOOST_LOG(rdWarningLog)
<< "RGroupError: Out of range - The number '" << text
<< "' is too large or too small." << std::endl;
}
}
}
}
StereoGroupType grouptype = StereoGroupType::STEREO_ABSOLUTE;
switch (node.m_enhancedStereoType) {
case kCDXEnhancedStereo_Absolute:
grouptype = StereoGroupType::STEREO_ABSOLUTE;
break;
case kCDXEnhancedStereo_And:
grouptype = StereoGroupType::STEREO_AND;
break;
case kCDXEnhancedStereo_Or:
grouptype = StereoGroupType::STEREO_OR;
break;
default:
break;
}
CHECK_INVARIANT(atom_id != -1, "Uninitialized atom id in cdxml.");
Atom *rd_atom = new Atom(elemno);
rd_atom->setFormalCharge(charge);
rd_atom->setNumExplicitHs(num_hydrogens);
rd_atom->setNoImplicit(explicitHs);
rd_atom->setIsotope(isotope);
if (rgroup_num >= 0) {
rd_atom->setAtomMapNum(rgroup_num);
}
set_fuse_label(rd_atom, atommap);
switch (node.m_hStereo) {
case kCDXProp_Atom_HDot: // this atom has an implicit hydrogen with a
// wedged bond
rd_atom->setProp<char>(CDX_IMPLICIT_HYDROGEN_STEREO, 'w');
break;
case kCDXProp_Atom_HDash: // this atom has an implicit hydrogen with a
// hashed bond
rd_atom->setProp<char>(CDX_IMPLICIT_HYDROGEN_STEREO, 'h');
break;
}
if (node.m_bondOrdering) {
// This node may be completely replaced by the fragment
// i.e. [*:1]C[*:1].C[*:1]C => CCC
rd_atom->setProp<std::vector<int>>(CDX_BOND_ORDERING, *node.m_bondOrdering);
}
if (node.m_geometry == kCDXAtomGeometry_Tetrahedral) {
// std::cerr << "tetrahedral" << std::endl;
// if we have a cip type we can interpret, set it, otherwise don't
switch (node.m_CIP) {
case kCDXCIPAtom_R:
case kCDXCIPAtom_r:
case kCDXCIPAtom_S:
case kCDXCIPAtom_s:
rd_atom->setProp<CDXAtomCIPType>(CDX_CIP, node.m_CIP);
break;
default:
rd_atom->setProp<CDXAtomCIPType>(CDX_CIP, kCDXCIPAtom_Undetermined);
break;
}
}
std::vector<double> atom_coords;
if (node.KnownPosition3D()) {
atom_coords.reserve(3);
atom_coords.push_back(node.m_3dPosition.x);
atom_coords.push_back(node.m_3dPosition.y);
atom_coords.push_back(node.m_3dPosition.z);
} else {
atom_coords.reserve(2);
atom_coords.push_back(node.m_2dPosition.x);
atom_coords.push_back(node.m_2dPosition.y);
}
rd_atom->setProp<std::vector<double>>(CDX_ATOM_POS, atom_coords);
rd_atom->setProp<unsigned int>(CDX_ATOM_ID, atom_id);
const bool updateLabels = true;
const bool takeOwnership = true;
auto idx = mol.addAtom(rd_atom, updateLabels, takeOwnership);
if (query_label.size()) {
if (query_label[0] == 'R') {
rd_atom = addquery(makeAtomNullQuery(), query_label, mol, idx);
} else if (query_label == "A") {
rd_atom = addquery(makeAAtomQuery(), query_label, mol, idx);
} else if (query_label == "Q") {
rd_atom = addquery(makeQAtomQuery(), query_label, mol, idx);
} else if (query_label == "M") {
rd_atom = addquery(makeMAtomQuery(), query_label, mol, idx);
} else if (query_label == "MH") {
rd_atom = addquery(makeMHAtomQuery(), query_label, mol, idx);
} else if (query_label == "X") {
rd_atom = addquery(makeXAtomQuery(), query_label, mol, idx);
} else if (query_label == "ElementList") {
if (!elementlist.size()) {
BOOST_LOG(rdWarningLog)
<< "ElementList is empty, ignoring..." << std::endl;
} else {
auto *q = new ATOM_OR_QUERY;
q->setDescription("AtomOr");
for (auto atNum : elementlist) {
q->addChild(
QueryAtom::QUERYATOM_QUERY::CHILD_TYPE(makeAtomNumQuery(atNum)));
}
rd_atom = addquery(q, query_label, mol, idx);
rd_atom->setAtomicNum(elementlist.front());
}
} else if (query_label.size()) {
std::cerr << "Unhandled generic nickname: " << query_label << std::endl;
} else {
rd_atom->setProp(common_properties::atomLabel, query_label);
}
}
switch (node.m_radical) {
case kCDXRadical_None:
break;
case kCDXRadical_Singlet:
rd_atom->setNumRadicalElectrons(2);
break;
case kCDXRadical_Doublet: {
rd_atom->setNumRadicalElectrons(1);
break;
}
case kCDXRadical_Triplet: {
rd_atom->setNumRadicalElectrons(2);
break;
}
}
if (node.m_enhancedStereoGroupNum > 0) {
auto key = std::make_pair(node.m_enhancedStereoGroupNum, grouptype);
auto &stereo = sgroups[key];
stereo.sgroup = node.m_enhancedStereoGroupNum;
stereo.grouptype = grouptype;
stereo.atoms.push_back(rd_atom);
}
pagedata.atomIds[atom_id] =
rd_atom; // The mol has ownership so this can't leak
if (node.m_nodeType == kCDXNodeType_Nickname ||
node.m_nodeType == kCDXNodeType_Fragment) {
// This fragment needs to be expanded and joined to the current one
// the external_id is the node's atom_id
for (auto fragment : node.ContainedObjects()) {
if (fragment.second->GetTag() == kCDXObj_Fragment) {
if (!parseFragment(mol, (CDXFragment &)(*fragment.second), pagedata,
missingFragId, atom_id)) {
return false;
}
mol.setProp<bool>(NEEDS_FUSE, true);
// might need to reset to OUR frag_id since parse_fragment will
// set
// it to the fragments
mol.setProp(CDX_FRAG_ID, fragmentId);
}
}
}
return true;
}
}
} // namespace RDKit