run clang-tidy and clang-format on the chemdraw files (#8837)

* git tidy

* clang-format
This commit is contained in:
Greg Landrum
2025-10-03 17:21:56 +02:00
committed by GitHub
parent 158db5d1a7
commit 6b30df1e40
24 changed files with 1145 additions and 630 deletions

View File

@@ -34,7 +34,7 @@
#pragma GCC diagnostic ignored "-Wsign-compare"
#pragma GCC diagnostic ignored "-Wconversion"
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
//#pragma GCC diagnostic ignored "-Wmacro-redefined"
// #pragma GCC diagnostic ignored "-Wmacro-redefined"
#pragma GCC diagnostic ignored "-Wunknown-pragmas"
#pragma GCC diagnostic ignored "-Wignored-qualifiers"
#pragma GCC diagnostic ignored "-Wextra"

View File

@@ -28,7 +28,7 @@
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//#include "node.h"
// #include "node.h"
#include "utils.h"
#include "fragment.h"
@@ -171,7 +171,9 @@ bool parseBond(RWMol &mol, unsigned int fragmentId, CDXBond &bond,
unsigned int bondIdx = 0;
auto startIdx = start_atom->getIdx();
auto endIdx = end_atom->getIdx();
if (swap_bond_ends) std::swap(startIdx, endIdx);
if (swap_bond_ends) {
std::swap(startIdx, endIdx);
}
if (qb) {
qb->setBeginAtomIdx(startIdx);
@@ -223,5 +225,5 @@ bool parseBond(RWMol &mol, unsigned int fragmentId, CDXBond &bond,
}
return true;
}
}
} // namespace ChemDraw
} // namespace RDKit

View File

@@ -48,5 +48,5 @@ namespace ChemDraw {
bool parseBond(RWMol &mol, unsigned int fragmentId, CDXBond &bond,
PageData &pagedata);
}
}
} // namespace RDKit
#endif

View File

@@ -28,7 +28,7 @@
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//#include "node.h"
// #include "node.h"
#include "utils.h"
#include "bracket.h"
@@ -36,20 +36,20 @@ namespace RDKit {
namespace ChemDraw {
// This is currently unimplemented waiting on full bracket support in the rdkit
// or support for expansion inside the RDChemDrawLib
bool parseBracket(CDXBracketedGroup &bracket, PageData &/*pagedata*/) {
bool parseBracket(CDXBracketedGroup &bracket, PageData & /*pagedata*/) {
// Get the contained atoms/bonds in the bracket
for (auto &attachment : bracket.ContainedObjects()) {
CDXDatumID childid = (CDXDatumID)attachment.second->GetTag();
auto childid = (CDXDatumID)attachment.second->GetTag();
if (childid == kCDXObj_BracketAttachment) {
CDXBracketAttachment &bracketattachment =
(CDXBracketAttachment &)(*attachment.second);
auto &bracketattachment = (CDXBracketAttachment &)(*attachment.second);
for (auto &bracketdata : bracketattachment.ContainedObjects()) {
CDXDatumID bracketid = (CDXDatumID)bracketdata.second->GetTag();
auto bracketid = (CDXDatumID)bracketdata.second->GetTag();
if (bracketid == kCDXObj_CrossingBond) {
//CDXCrossingBond &crossingbond =
// (CDXCrossingBond &)(*attachment.second);
// XX unimplmented crossingbond.m_bondID; // bond that crosses brackets
// XX unimplmented crossingbond.m_innerAtomID; // atom within brackets
// CDXCrossingBond &crossingbond =
// (CDXCrossingBond &)(*attachment.second);
// XX unimplmented crossingbond.m_bondID; // bond that crosses
// brackets XX unimplmented crossingbond.m_innerAtomID; // atom
// within brackets
}
}
}
@@ -102,5 +102,5 @@ bool parseBracket(CDXBracketedGroup &bracket, PageData &/*pagedata*/) {
}
return true;
}
}
} // namespace ChemDraw
} // namespace RDKit

View File

@@ -47,6 +47,6 @@ namespace RDKit {
namespace ChemDraw {
bool parseBracket(CDXBracketedGroup &bracket, PageData &pagedata);
}
}
} // namespace RDKit
#endif

View File

@@ -87,7 +87,7 @@ void visit_children(
molzip_params.enforceValenceRules = false;
for (auto frag : node.ContainedObjects()) {
CDXDatumID id = (CDXDatumID)frag.second->GetTag();
auto id = (CDXDatumID)frag.second->GetTag();
if (id == kCDXObj_Fragment) {
std::unique_ptr<RWMol> mol = std::make_unique<RWMol>();
if (!parseFragment(*mol, (CDXFragment &)(*frag.second), pagedata,
@@ -131,8 +131,7 @@ void visit_children(
if (atm->hasProp(CDX_ATOM_POS)) {
hasConf = true;
const std::vector<double> coord =
atm->getProp<std::vector<double>>(CDX_ATOM_POS);
const auto coord = atm->getProp<std::vector<double>>(CDX_ATOM_POS);
p.x = coord[0];
p.y = -1 * coord[1]; // CDXML uses an inverted coordinate
@@ -208,7 +207,7 @@ void visit_children(
MolOps::detectBondStereochemistry(*res);
}
} else if (id == kCDXObj_ReactionScheme) { // get the reaction info
CDXReactionScheme &scheme = (CDXReactionScheme &)(*frag.second);
auto &scheme = (CDXReactionScheme &)(*frag.second);
pagedata.schemes.emplace_back(scheme);
/*
int scheme_id = scheme.GetObjectID(); //frag.second.template
@@ -226,12 +225,12 @@ void visit_children(
}
*/
} else if (id == kCDXObj_Group) {
CDXGroup &group = (CDXGroup &)(*frag.second);
auto &group = (CDXGroup &)(*frag.second);
group_id = frag.second->GetObjectID();
visit_children(group, pagedata, missing_frag_id, bondLength, params,
group_id);
} else if (id == kCDXObj_BracketedGroup) {
CDXBracketedGroup &bracketgroup = (CDXBracketedGroup &)(*frag.second);
auto &bracketgroup = (CDXBracketedGroup &)(*frag.second);
parseBracket(bracketgroup, pagedata);
}
}
@@ -320,7 +319,7 @@ std::vector<std::unique_ptr<RWMol>> molsFromCDXMLDataStream(
int missing_frag_id = -1;
for (auto node : document->ContainedObjects()) {
CDXDatumID id = (CDXDatumID)node.second->GetTag();
auto id = (CDXDatumID)node.second->GetTag();
switch (id) {
case kCDXObj_Page:
visit_children(*node.second, pagedata, missing_frag_id, bondLength,
@@ -350,9 +349,9 @@ std::unique_ptr<CDXDocument> ChemDrawToDocument(const std::string &filename) {
std::fstream chemdrawfile(filename);
std::string ext = std::filesystem::path(filename).extension().string();
boost::algorithm::to_lower(ext);
if (ext == ".cdxml")
if (ext == ".cdxml") {
return streamToCDXDocument(chemdrawfile, CDXFormat::CDXML);
else if (ext == ".cdx") {
} else if (ext == ".cdx") {
return streamToCDXDocument(chemdrawfile, CDXFormat::CDX);
}
std::string msg =

View File

@@ -49,26 +49,29 @@ struct RDKIT_RDCHEMDRAWLIB_EXPORT ChemDrawParserParams {
bool sanitize;
bool removeHs;
CDXFormat format;
ChemDrawParserParams() : sanitize(true), removeHs(true), format(CDXFormat::AUTO) {}
ChemDrawParserParams(bool sanitize, bool removeHs, CDXFormat format) :
sanitize(sanitize), removeHs(removeHs), format(format) {}
ChemDrawParserParams()
: sanitize(true), removeHs(true), format(CDXFormat::AUTO) {}
ChemDrawParserParams(bool sanitize, bool removeHs, CDXFormat format)
: sanitize(sanitize), removeHs(removeHs), format(format) {}
};
std::vector<std::unique_ptr<RWMol>> RDKIT_RDCHEMDRAWLIB_EXPORT
MolsFromChemDrawDataStream(std::istream &inStream,
const ChemDrawParserParams &params = ChemDrawParserParams());
MolsFromChemDrawDataStream(
std::istream &inStream,
const ChemDrawParserParams &params = ChemDrawParserParams());
std::vector<std::unique_ptr<RWMol>> RDKIT_RDCHEMDRAWLIB_EXPORT
MolsFromChemDrawFile(const std::string &filename,
const ChemDrawParserParams &params = ChemDrawParserParams());
MolsFromChemDrawFile(
const std::string &filename,
const ChemDrawParserParams &params = ChemDrawParserParams());
std::vector<std::unique_ptr<RWMol>> RDKIT_RDCHEMDRAWLIB_EXPORT
MolsFromChemDrawBlock(const std::string &block,
const ChemDrawParserParams &params = ChemDrawParserParams());
MolsFromChemDrawBlock(
const std::string &block,
const ChemDrawParserParams &params = ChemDrawParserParams());
std::string RDKIT_RDCHEMDRAWLIB_EXPORT
MolToChemDrawBlock(const ROMol &mol, CDXFormat format = CDXFormat::CDXML);
}
} // namespace v2
} // namespace RDKit
#endif

View File

@@ -46,6 +46,6 @@ ChemDrawToDocument(std::istream &inStream, v2::CDXFormat format);
std::unique_ptr<CDXDocument> RDKIT_RDCHEMDRAWLIB_EXPORT
ChemDrawToDocument(const std::string &filename);
}
} // namespace ChemDraw
} // namespace RDKit
#endif

View File

@@ -69,7 +69,7 @@ namespace v2 {
//! Parse a text stream with ChemDraw data into a ChemicalReaction
std::vector<std::unique_ptr<ChemicalReaction>>
ChemDrawDataStreamToChemicalReactions(std::istream &inStream, bool sanitize,
bool removeHs) {
bool removeHs) {
ChemDrawParserParams params;
params.sanitize = sanitize;
params.removeHs = removeHs;
@@ -97,7 +97,7 @@ ChemDrawDataStreamToChemicalReactions(std::istream &inStream, bool sanitize,
}
for (const auto &scheme : schemes) {
// convert atoms to queries:
ChemicalReaction *res = new ChemicalReaction;
auto *res = new ChemicalReaction;
result.push_back(std::unique_ptr<ChemicalReaction>(res));
for (auto idx : scheme.second) {
CHECK_INVARIANT(
@@ -162,6 +162,6 @@ std::vector<std::unique_ptr<ChemicalReaction>> ChemDrawFileToChemicalReactions(
}
return res;
}
}
} // namespace v2
} // namespace RDKit

View File

@@ -37,27 +37,31 @@
#include <GraphMol/ChemReactions/Reaction.h>
#include <string>
namespace RDKit
{
namespace RDKit {
namespace v2 {
//---------------------------------------------------------------------------
//! \name Chemdraw rxn Support
///@{
//! Parse text in ChemDraw rxn format into a vector of ChemicalReactions
RDKIT_RDCHEMDRAWREACTIONLIB_EXPORT std::vector<std::unique_ptr<ChemicalReaction>>
ChemDrawToChemicalReactions(const std::string &rxnBlock, bool sanitize = false,
bool removeHs = false);
RDKIT_RDCHEMDRAWREACTIONLIB_EXPORT
std::vector<std::unique_ptr<ChemicalReaction>>
ChemDrawToChemicalReactions(const std::string &rxnBlock,
bool sanitize = false, bool removeHs = false);
//! Parse a file in ChemDraw rxn format into a vector of ChemicalReactions
RDKIT_RDCHEMDRAWREACTIONLIB_EXPORT std::vector<std::unique_ptr<ChemicalReaction>>
ChemDrawFileToChemicalReactions(const std::string &fileName, bool sanitize = false,
bool removeHs = false);
//! Parse a text stream in ChemDraw rxn format into a vector of ChemicalReactions
RDKIT_RDCHEMDRAWREACTIONLIB_EXPORT std::vector<std::unique_ptr<ChemicalReaction>>
ChemDrawDataStreamToChemicalReactions(std::istream &rxnStream,
bool sanitize = false,
bool removeHs = false);
RDKIT_RDCHEMDRAWREACTIONLIB_EXPORT
std::vector<std::unique_ptr<ChemicalReaction>>
ChemDrawFileToChemicalReactions(const std::string &fileName,
bool sanitize = false,
bool removeHs = false);
//! Parse a text stream in ChemDraw rxn format into a vector of
//! ChemicalReactions
RDKIT_RDCHEMDRAWREACTIONLIB_EXPORT
std::vector<std::unique_ptr<ChemicalReaction>>
ChemDrawDataStreamToChemicalReactions(std::istream &rxnStream,
bool sanitize = false,
bool removeHs = false);
}
} // namespace v2
} // namespace RDKit
#endif

File diff suppressed because it is too large Load Diff

View File

@@ -59,10 +59,9 @@ struct PageData {
std::map<unsigned int, Atom *> atomIds;
std::map<unsigned int, Bond *> bondIds;
std::vector<std::unique_ptr<RWMol>> mols; // All molecules found in the doc
std::map<unsigned int, size_t>
fragmentLookup; // fragment.id->molecule index
std::map<unsigned int, size_t> fragmentLookup; // fragment.id->molecule index
std::map<unsigned int, std::vector<int>>
groupedFragments; // grouped.id -> [fragment.id]
groupedFragments; // grouped.id -> [fragment.id]
std::vector<ReactionInfo> schemes; // reaction schemes found
void clearCDXProps() {
@@ -90,7 +89,7 @@ struct PageData {
//! external node's are normally NickNames or new Fragments
bool parseFragment(RWMol &mol, CDXFragment &fragment, PageData &pagedata,
int &missingFragId, int externalAttachment = -1);
}
} // namespace ChemDraw
} // namespace RDKit
#endif

View File

@@ -28,7 +28,7 @@
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//#include "node.h"
// #include "node.h"
#include "fragment.h"
#include "utils.h"
@@ -45,10 +45,11 @@ bool parseNode(
node.m_numHydrogens == kNumHydrogenUnspecified ? 0 : node.m_numHydrogens;
bool explicitHs = node.m_numHydrogens != kNumHydrogenUnspecified;
int charge = 0;
if ((node.m_charge & 0x00FFFFFF) == 0)
if ((node.m_charge & 0x00FFFFFF) == 0) {
charge = node.m_charge >> 24;
else
} else {
charge = node.m_charge;
}
int atommap = 0;
int rgroup_num = -1;
int isotope = node.m_isotope;
@@ -144,20 +145,23 @@ bool parseNode(
const std::string &text = ((CDXText *)child.second)->GetText().str();
if (text.size() > 0 && text[0] == 'R') {
try {
if (checkForRGroup)
if (checkForRGroup) {
rgroup_num = text.size() > 1 ? stoi(text.substr(1)) : 0;
else
} else {
isotope = text.size() > 1 ? stoi(text.substr(1)) : 0;
}
} catch (const std::invalid_argument &e) {
if (rgroup_num)
if (rgroup_num) {
BOOST_LOG(rdWarningLog)
<< "RGroupError: Invalid argument - Cannot convert '" << text
<< "' to an integer." << std::endl;
}
} catch (const std::out_of_range &e) {
if (rgroup_num)
if (rgroup_num) {
BOOST_LOG(rdWarningLog)
<< "RGroupError: Out of range - The number '" << text
<< "' is too large or too small." << std::endl;
}
}
}
}
@@ -199,7 +203,7 @@ bool parseNode(
rd_atom->setProp<char>(CDX_IMPLICIT_HYDROGEN_STEREO, 'h');
break;
}
if (node.m_bondOrdering) {
// This node may be completely replaced by the fragment
// i.e. [*:1]C[*:1].C[*:1]C => CCC
@@ -319,5 +323,5 @@ bool parseNode(
}
return true;
}
}
} // namespace ChemDraw
} // namespace RDKit

View File

@@ -50,5 +50,5 @@ bool parseNode(
std::map<std::pair<int, StereoGroupType>, StereoGroupInfo> &sgroups,
int &missingFragId, int externalAttachment);
}
}
} // namespace RDKit
#endif

View File

@@ -120,9 +120,9 @@ void ReactionStepInfo::set_reaction_step(
ReactionInfo::ReactionInfo(CDXReactionScheme &scheme)
: scheme_id(static_cast<unsigned int>(scheme.GetObjectID())) {
for (auto &rxnNode : scheme.ContainedObjects()) {
CDXDatumID type_id = (CDXDatumID)rxnNode.second->GetTag();
auto type_id = (CDXDatumID)rxnNode.second->GetTag();
if (type_id == kCDXObj_ReactionStep) {
CDXReactionStep &step = (CDXReactionStep &)(*rxnNode.second);
auto &step = (CDXReactionStep &)(*rxnNode.second);
auto step_id = step.GetObjectID();
steps.emplace_back(ReactionStepInfo());
ReactionStepInfo &scheme = steps.back();
@@ -150,7 +150,7 @@ void ReactionInfo::set_reaction_steps(
auto idx = mol->getProp<unsigned int>(CDX_FRAG_ID);
fragments[idx] = mol_idx++;
for (auto &atom : mol->atoms()) {
unsigned int idx = atom->getProp<unsigned int>(CDX_ATOM_ID);
auto idx = atom->getProp<unsigned int>(CDX_ATOM_ID);
atoms[idx] = atom;
}
}
@@ -161,5 +161,5 @@ void ReactionInfo::set_reaction_steps(
}
}
}
}
} // namespace ChemDraw
} // namespace RDKit

View File

@@ -81,7 +81,7 @@ class ReactionInfo {
std::map<unsigned int, std::vector<int>> &grouped_fragments,
const std::vector<std::unique_ptr<RWMol>> &mols) const;
};
}
} // namespace ChemDraw
} // namespace RDKit
#endif

View File

@@ -54,7 +54,7 @@ TEST_CASE("Geometry") {
std::string(getenv("RDBASE")) + "/External/ChemDraw/test_data/";
SECTION("R/S Tetrahedral") {
//_sleep(10 * 1000);
{
auto fname = path + "geometry-tetrahedral.cdxml";
auto mols = MolsFromChemDrawFile(fname);
@@ -71,11 +71,11 @@ TEST_CASE("Geometry") {
auto smi = MolToSmiles(*mol);
REQUIRE(smi == MolToSmiles(*mols[0]));
}
{
auto fname = path + "geometry-tetrahedral-3.cdxml";
auto mols = MolsFromChemDrawFile(fname);
REQUIRE(mols.size());
REQUIRE(mols.size());
auto mol = "C1CC[C@H]2CCCC[C@@H]2C1"_smiles;
auto smi = MolToSmiles(*mol);
REQUIRE(smi == MolToSmiles(*mols[0]));
@@ -86,7 +86,8 @@ TEST_CASE("Geometry") {
auto fname = path + "geometry-tetrahedral-4.cdxml";
auto mols = MolsFromChemDrawFile(fname);
REQUIRE(mols.size());
auto mol = "CC(S[C@@H]1CC2=C([H])C(CC[C@]2(C)[C@@]3([H])CC([H])([H])[C@]4(C)[C@](OC5=O)(CC5([H])[H])CC[C@@]4([H])[C@]13[H])=O)=O"_smiles;
auto mol =
"CC(S[C@@H]1CC2=C([H])C(CC[C@]2(C)[C@@]3([H])CC([H])([H])[C@]4(C)[C@](OC5=O)(CC5([H])[H])CC[C@@]4([H])[C@]13[H])=O)=O"_smiles;
auto smi = MolToSmiles(*mol);
std::cerr << "** " << smi << std::endl;
REQUIRE(smi == MolToSmiles(*mols[0]));

View File

@@ -41,7 +41,6 @@
#include <GraphMol/ChemReactions/SanitizeRxn.h>
#include <GraphMol/SmilesParse/SmilesWrite.h>
#include <filesystem>
using namespace RDKit;
using namespace RDKit::v2;
@@ -161,4 +160,3 @@ M END
ChemicalReactionToRxnSmarts(*rxn_mb));
}
}

View File

@@ -1300,9 +1300,12 @@ TEST_CASE("Round TRIP") {
RDLog::LogStateSetter blocker;
for (const auto &entry :
std::filesystem::recursive_directory_iterator(code_path)) {
if (entry.path().string().find("ChemDraw") != std::string::npos)
if (entry.path().string().find("ChemDraw") != std::string::npos) {
continue; // Skip ChemDraw directory
if (entry.path().string().find("build") != std::string::npos) continue;
}
if (entry.path().string().find("build") != std::string::npos) {
continue;
}
if (entry.is_regular_file() &&
entry.path().extension().string() == ".mol") {
if (exceptions.find(entry.path().filename().string()) !=

View File

@@ -52,8 +52,10 @@ TEST_CASE("Round TRIP") {
std::string(getenv("RDBASE")) + "/Code/GraphMol/test_data/";
std::string code_path = std::string(getenv("RDBASE"));
// Eventually this catch test is to see if round tripping mol 3d -> chemdraw returns
// reasonable coords, however chemdraw seems to forget about the original scale
// Eventually this catch test is to see if round tripping mol 3d -> chemdraw
// returns
// reasonable coords, however chemdraw seems to forget about the original
// scale
// and converts to pixel drawing coords, so this test is kind of meaningless
SECTION("3D structs") {
auto fname =

View File

@@ -59,7 +59,9 @@ std::string replace(std::string &istr, const std::string &from,
const std::string &to) {
std::string str(istr);
size_t start_pos = str.find(from);
if (start_pos == std::string::npos) return str;
if (start_pos == std::string::npos) {
return str;
}
str.replace(start_pos, from.length(), to);
return str;
}
@@ -70,21 +72,24 @@ bool hasNonSupportedFeatures(CDXDocument &document, const std::string &fname) {
std::stringstream xml;
xml << ifs.rdbuf();
// We should be able to figure this out from the node but...
if(xml.str().find("monomerAttachmentStructure_") != std::string::npos ||
xml.str().find("Name=\"monomerAttachments") != std::string::npos) {
if (xml.str().find("monomerAttachmentStructure_") != std::string::npos ||
xml.str().find("Name=\"monomerAttachments") != std::string::npos) {
return true;
}
for (auto node : document.ContainedObjects()) {
CDXDatumID id = (CDXDatumID)node.second->GetTag();
auto id = (CDXDatumID)node.second->GetTag();
switch (id) {
case kCDXObj_Page:
for (auto frag : node.second->ContainedObjects()) {
CDXDatumID id = (CDXDatumID)frag.second->GetTag();
auto id = (CDXDatumID)frag.second->GetTag();
if (id == kCDXObj_Fragment) {
CDXFragment &fragment = (CDXFragment &)(*frag.second);
if (fragment.m_sequenceType == kCDXSeqType_Unknown) return true;
} else if (id == kCDXObj_BracketAttachment || id == kCDXObj_BracketedGroup) {
auto &fragment = (CDXFragment &)(*frag.second);
if (fragment.m_sequenceType == kCDXSeqType_Unknown) {
return true;
}
} else if (id == kCDXObj_BracketAttachment ||
id == kCDXObj_BracketedGroup) {
return true;
}
}
@@ -92,7 +97,7 @@ bool hasNonSupportedFeatures(CDXDocument &document, const std::string &fname) {
case kCDXObj_ObjectTag: {
CDXObject &object = *((CDXObject *)node.second);
id = (CDXDatumID)object.GetTag();
// Check for monomers
// Check for monomers
break;
}
default:
@@ -113,7 +118,7 @@ TEST_CASE("Round TRIP") {
SECTION("round trip") {
// if we can't find the CDXML6K path, then don't run the test
if(!std::filesystem::exists(path)) {
if (!std::filesystem::exists(path)) {
return;
}
int failed = 0;
@@ -136,43 +141,82 @@ TEST_CASE("Round TRIP") {
std::string nomolpath = path + "NOMOL/";
std::string badparsepath = path + "BADPARSE/";
std::string sanitizationpath = path + "SANI/";
std::set<std::string> known_failures{
"INDMUMLL1117_2025-01-24-17-23-14_304.cdxml", // Dative oxygen gets set to a radical
"INDMUMLL1117_2025-01-24-17-26-06_1010.cdxml", // The next batch has a type of stereochem I don't know how to parse yet
"INDMUMLL1117_2025-01-24-17-26-06_1012.cdxml",
"INDMUMLL1117_2025-01-24-17-26-06_1022.cdxml",
"INDMUMLL1117_2025-01-24-17-26-06_1024.cdxml",
"INDMUMLL1117_2025-01-24-17-26-06_1026.cdxml",
"INDMUMLL1117_2025-01-24-17-26-06_1032.cdxml",
"INDMUMLL1117_2025-01-24-17-26-06_1034.cdxml",
"INDMUMLL1117_2025-01-24-17-26-06_1036.cdxml",
"INDMUMLL1117_2025-01-24-17-26-06_1040.cdxml",
"INDMUMLL1117_2025-01-24-17-26-06_1042.cdxml",
"INDMUMLL1117_2025-01-24-17-26-06_1048.cdxml", // Stereo chem batch ends here
"INDMUMLL1117_2025-01-24-17-26-13_1690.cdxml", // RDKit shows a radical for the dative ->[O]
"INDMUMLL1117_2025-01-24-17-27-11_6877.cdxml", // The next batch has a type of stereochem I don't know how to parse yet (same as before)
"INDMUMLL1117_2025-01-24-17-27-11_6878.cdxml",
"INDMUMLL1117_2025-01-24-17-27-11_6883.cdxml",
"INDMUMLL1117_2025-01-24-17-27-11_6884.cdxml",
"INDMUMLL1117_2025-01-24-17-27-11_6889.cdxml",
"INDMUMLL1117_2025-01-24-17-27-11_6896.cdxml",
"INDMUMLL1117_2025-01-24-17-27-30_8574.cdxml", // Stereo chem batch ends here
"INDMUMLL1117_2025-01-24-17-27-31_8633.cdxml", // RDkit is missing a dummy atom molecule
"INDMUMLL1117_2025-01-24-17-27-31_8651.cdxml", // RDkit is missing a dummy atom molecule
"INDMUMLL1117_2025-01-24-17-27-53_10330.cdxml",// 2D projection of 3D stereo, we fail this one
"INDMUMLL1117_2025-01-24-17-27-53_10332.cdxml",// 2D projection of 3D stereo, we fail this one
"INDMUMLL1117_2025-01-24-17-27-54_10336.cdxml",// RDKit Smiles keeps any bonds ~, ChemDraw doesn't
"INDMUMLL1117_2025-01-24-17-28-02_10942.cdxml",// Chemdraw smiles doesn't support quadruple bond $
"INDMUMLL1117_2025-01-24-17-28-15_11666.cdxml",// RDKit Smiles keeps any bonds ~, ChemDraw doesn't
"INDMUMLL1117_2025-01-24-17-28-20_12011.cdxml",// RDKit gets stereo from the 3D data and the wedging
"INDMUMLL1117_2025-01-24-17-28-20_12012.cdxml",// RDKit gets stereo from the 3D data and the wedging
"INDMUMLL1117_2025-01-24-17-28-21_12031.cdxml",// 2D projection of 3D stereo, we fail this one
"INDMUMLL1117_2025-01-24-17-28-30_12568.cdxml",// 2D projection of 3D stereo, we fail this one
"INDMUMLL1117_2025-01-24-17-29-06_14654.cdxml",// Dative oxygen gets set to a radical
"INDMUMLL1117_2025-01-24-17-29-08_14775.cdxml",// RDKit Smiles keeps any bonds ~, ChemDraw doesn't
"INDMUMLL1117_2025-01-24-17-29-09_14896.cdxml",// We apparently do a bit of a better job than chemdraw here in parsing R/S
"INDMUMLL1117_2025-01-24-17-29-09_14897.cdxml" // RDKit just gets very different stereo chem, no idea why
"INDMUMLL1117_2025-01-24-17-23-14_304.cdxml", // Dative oxygen gets set
// to a radical
"INDMUMLL1117_2025-01-24-17-26-06_1010.cdxml", // The next batch has a
// type of stereochem I
// don't know how to
// parse yet
"INDMUMLL1117_2025-01-24-17-26-06_1012.cdxml",
"INDMUMLL1117_2025-01-24-17-26-06_1022.cdxml",
"INDMUMLL1117_2025-01-24-17-26-06_1024.cdxml",
"INDMUMLL1117_2025-01-24-17-26-06_1026.cdxml",
"INDMUMLL1117_2025-01-24-17-26-06_1032.cdxml",
"INDMUMLL1117_2025-01-24-17-26-06_1034.cdxml",
"INDMUMLL1117_2025-01-24-17-26-06_1036.cdxml",
"INDMUMLL1117_2025-01-24-17-26-06_1040.cdxml",
"INDMUMLL1117_2025-01-24-17-26-06_1042.cdxml",
"INDMUMLL1117_2025-01-24-17-26-06_1048.cdxml", // Stereo chem batch
// ends here
"INDMUMLL1117_2025-01-24-17-26-13_1690.cdxml", // RDKit shows a radical
// for the dative ->[O]
"INDMUMLL1117_2025-01-24-17-27-11_6877.cdxml", // The next batch has a
// type of stereochem I
// don't know how to
// parse yet (same as
// before)
"INDMUMLL1117_2025-01-24-17-27-11_6878.cdxml",
"INDMUMLL1117_2025-01-24-17-27-11_6883.cdxml",
"INDMUMLL1117_2025-01-24-17-27-11_6884.cdxml",
"INDMUMLL1117_2025-01-24-17-27-11_6889.cdxml",
"INDMUMLL1117_2025-01-24-17-27-11_6896.cdxml",
"INDMUMLL1117_2025-01-24-17-27-30_8574.cdxml", // Stereo chem batch
// ends here
"INDMUMLL1117_2025-01-24-17-27-31_8633.cdxml", // RDkit is missing a
// dummy atom molecule
"INDMUMLL1117_2025-01-24-17-27-31_8651.cdxml", // RDkit is missing a
// dummy atom molecule
"INDMUMLL1117_2025-01-24-17-27-53_10330.cdxml", // 2D projection of 3D
// stereo, we fail this
// one
"INDMUMLL1117_2025-01-24-17-27-53_10332.cdxml", // 2D projection of 3D
// stereo, we fail this
// one
"INDMUMLL1117_2025-01-24-17-27-54_10336.cdxml", // RDKit Smiles keeps
// any bonds ~,
// ChemDraw doesn't
"INDMUMLL1117_2025-01-24-17-28-02_10942.cdxml", // Chemdraw smiles
// doesn't support
// quadruple bond $
"INDMUMLL1117_2025-01-24-17-28-15_11666.cdxml", // RDKit Smiles keeps
// any bonds ~,
// ChemDraw doesn't
"INDMUMLL1117_2025-01-24-17-28-20_12011.cdxml", // RDKit gets stereo
// from the 3D data and
// the wedging
"INDMUMLL1117_2025-01-24-17-28-20_12012.cdxml", // RDKit gets stereo
// from the 3D data and
// the wedging
"INDMUMLL1117_2025-01-24-17-28-21_12031.cdxml", // 2D projection of 3D
// stereo, we fail this
// one
"INDMUMLL1117_2025-01-24-17-28-30_12568.cdxml", // 2D projection of 3D
// stereo, we fail this
// one
"INDMUMLL1117_2025-01-24-17-29-06_14654.cdxml", // Dative oxygen gets
// set to a radical
"INDMUMLL1117_2025-01-24-17-29-08_14775.cdxml", // RDKit Smiles keeps
// any bonds ~,
// ChemDraw doesn't
"INDMUMLL1117_2025-01-24-17-29-09_14896.cdxml", // We apparently do a
// bit of a better job
// than chemdraw here
// in parsing R/S
"INDMUMLL1117_2025-01-24-17-29-09_14897.cdxml" // RDKit just gets very
// different stereo
// chem, no idea why
};
for (auto p : {failpath, nomolpath, badparsepath, sanitizationpath}) {
@@ -187,26 +231,27 @@ TEST_CASE("Round TRIP") {
if (entry.is_regular_file()) {
std::string fname = entry.path().filename().string();
// issue here - graphite nanotube
if (fname == "INDMUMLL1117_2025-01-24-17-28-02_10946.cdxml")
if (fname == "INDMUMLL1117_2025-01-24-17-28-02_10946.cdxml") {
continue; // nanotube takes forever
}
auto molfname = molpath + replace(fname, ".cdxml", ".mol");
auto smifname = smipath + replace(fname, ".cdxml", ".smi");
// if chemscript couldn't make an output, ignore it
total++;
total++;
if (!std::filesystem::exists(molfname) ||
!std::filesystem::exists(smifname)) {
no_mol_in_doc++;
no_mol_in_doc++;
continue;
}
// Get the ChemScript mol and smiles
// Get the ChemScript mol and smiles
std::unique_ptr<RWMol> mol;
//= nullptr;
try {
mol.reset(MolFileToMol(molfname));
} catch (...) {
bad_chemdraw_mol++;
bad_chemdraw_mol++;
continue;
}
// REQUIRE(mols.size());
@@ -217,9 +262,9 @@ TEST_CASE("Round TRIP") {
{
try {
auto smimol = SmilesToMol(smiles_in);
if (!smimol)
if (!smimol) {
smiles = smiles_in;
else {
} else {
smiles = MolToSmiles(*smimol);
delete smimol;
}
@@ -227,8 +272,8 @@ TEST_CASE("Round TRIP") {
smiles = smiles_in;
}
}
parseable++;
parseable++;
// Read the cdxml
std::vector<std::unique_ptr<RWMol>> mols;
bool santizationFailure = false;
@@ -241,22 +286,20 @@ TEST_CASE("Round TRIP") {
santizationFailure = true;
}
if (!mols.size()) {
if (smiles.size() == 0) {
// At least we match the chemscript non-mol
success++;
}
else if (hasNonSupportedFeatures(entry.path().string())) {
//std::cerr << "[NOMOL (Unsupported)]: " << entry.path().string()
// << std::endl;
if (smiles.size() == 0) {
// At least we match the chemscript non-mol
success++;
} else if (hasNonSupportedFeatures(entry.path().string())) {
// std::cerr << "[NOMOL (Unsupported)]: " << entry.path().string()
// << std::endl;
nonSupported++;
} else {
std::cerr << "[NOMOL]: " << entry.path().string()
<< std::endl;
std::cerr << "[NOMOL]: " << entry.path().string() << std::endl;
std::filesystem::copy(
entry.path().string(),
nomolpath + entry.path().filename().string());
nomol++;
}
nomol++;
}
continue;
}
} catch (...) {
@@ -292,9 +335,11 @@ TEST_CASE("Round TRIP") {
sanitizationpath + entry.path().filename().string());
saniFailed++;
} else {
if(known_failures.find(entry.path().filename().string()) != known_failures.end())
continue; // we know this failure and it's ok for now
if (known_failures.find(entry.path().filename().string()) !=
known_failures.end()) {
continue; // we know this failure and it's ok for now
}
std::cerr << "[FAIL]: " << entry.path() << std::endl;
std::filesystem::copy(entry.path(),
failpath + entry.path().filename().string());
@@ -315,8 +360,7 @@ TEST_CASE("Round TRIP") {
std::cerr << "Success:" << success + smimatches << std::endl;
std::cerr << "skipped (non supported features):" << nonSupported
<< std::endl;
std::cerr << "skipped (no mol in doc):" << no_mol_in_doc
<< std::endl;
std::cerr << "skipped (no mol in doc):" << no_mol_in_doc << std::endl;
std::cerr << "Chemscript smiles matches not chemscript mol: " << smimatches
<< std::endl;
std::cerr << "Failed:" << failed << std::endl;

View File

@@ -186,7 +186,7 @@ bool replaceFragments(RWMol &mol) {
namespace {
Atom::ChiralType getChirality(ROMol &mol, Atom *center_atom, Conformer &conf) {
if (center_atom->hasProp(CDX_BOND_ORDERING)) {
std::vector<int> bond_ordering =
auto bond_ordering =
center_atom->getProp<std::vector<int>>(CDX_BOND_ORDERING);
if (bond_ordering.size() < 3) {
return Atom::ChiralType::CHI_UNSPECIFIED;
@@ -239,8 +239,9 @@ Atom::ChiralType getChirality(ROMol &mol, Atom *center_atom, Conformer &conf) {
// This is supports the HDot and HDash available in chemdraw
// one is an implicit wedged hydrogen and one is a dashed hydrogen
if (center_atom->hasProp(CDX_IMPLICIT_HYDROGEN_STEREO) &&
center_atom->getProp<char>(CDX_IMPLICIT_HYDROGEN_STEREO) == 'w')
center_atom->getProp<char>(CDX_IMPLICIT_HYDROGEN_STEREO) == 'w') {
nswaps++;
}
if (nswaps % 2) {
return Atom::ChiralType::CHI_TETRAHEDRAL_CCW;
@@ -279,23 +280,27 @@ void checkChemDrawTetrahedralGeometries(RWMol &mol) {
// I currently don't understand that well enough.
switch (cip) {
case kCDXCIPAtom_R:
if (!chiralityChanged)
if (!chiralityChanged) {
atom->setChiralTag(Atom::ChiralType::CHI_TETRAHEDRAL_CW);
}
unsetTetrahedralAtoms.push_back(std::make_pair('R', atom));
break;
case kCDXCIPAtom_r:
if (!chiralityChanged)
if (!chiralityChanged) {
atom->setChiralTag(Atom::ChiralType::CHI_TETRAHEDRAL_CW);
}
unsetTetrahedralAtoms.push_back(std::make_pair('r', atom));
break;
case kCDXCIPAtom_S:
if (!chiralityChanged)
if (!chiralityChanged) {
atom->setChiralTag(Atom::ChiralType::CHI_TETRAHEDRAL_CW);
}
unsetTetrahedralAtoms.push_back(std::make_pair('S', atom));
break;
case kCDXCIPAtom_s:
if (!chiralityChanged)
if (!chiralityChanged) {
atom->setChiralTag(Atom::ChiralType::CHI_TETRAHEDRAL_CCW);
}
unsetTetrahedralAtoms.push_back(std::make_pair('s', atom));
break;
default:

View File

@@ -104,7 +104,7 @@ struct StereoGroupInfo {
// check to see if we have a tetrahedral flag and ChemDraw CIP set but no
// stereo assigned, if so check the bond ordering for CW and CCW
void checkChemDrawTetrahedralGeometries(RWMol &mol);
}
} // namespace ChemDraw
} // namespace RDKit
#endif

View File

@@ -38,7 +38,6 @@
#include "chemdraw/CDXStdObjects.h"
#include "ChemDrawEndInclude.h"
namespace RDKit {
namespace v2 {
const double DEFAULT_CDX_BOND_LENGTH = 14.4;
@@ -76,10 +75,10 @@ std::string MolToChemDrawBlock(const ROMol &mol, CDXFormat format) {
CDXObjectID object_id = 1;
CDXDocument document(object_id++);
CDXPage *page = new CDXPage(object_id++);
auto *page = new CDXPage(object_id++);
document.m_bondLength = DEFAULT_CDX_BOND_LENGTH;
document.m_flags |= CDXDocument::CDXDocumentProperty1::has_bondLength;
CDXFragment *fragment = new CDXFragment(object_id++);
auto *fragment = new CDXFragment(object_id++);
page->AddChild(fragment);
std::vector<CDXNode *> nodes;
nodes.reserve(trmol.getNumAtoms());
@@ -108,7 +107,7 @@ std::string MolToChemDrawBlock(const ROMol &mol, CDXFormat format) {
auto wedgeBonds = Chirality::pickBondsToWedge(trmol, nullptr, conf);
for (auto &atom : trmol.atoms()) {
CDXNode *node = new CDXNode(object_id + atom->getIdx());
auto *node = new CDXNode(object_id + atom->getIdx());
auto pos = conf->getAtomPos(atom->getIdx());
if (is3D) {
node->Position3D(CDXPoint3D(CDXCoordinatefromPoints(pos.x),
@@ -173,8 +172,7 @@ std::string MolToChemDrawBlock(const ROMol &mol, CDXFormat format) {
}
for (auto &bond : trmol.bonds()) {
CDXBond *cdxbond =
new CDXBond(object_id + mol.getNumAtoms() + bond->getIdx());
auto *cdxbond = new CDXBond(object_id + mol.getNumAtoms() + bond->getIdx());
int dirCode = 0;
bool reverse = false;
@@ -271,8 +269,9 @@ std::string MolToChemDrawBlock(const ROMol &mol, CDXFormat format) {
}
if (bond->getBondDir() == Bond::BondDir::EITHERDOUBLE ||
bond->getBondDir() == Bond::BondDir::UNKNOWN)
bond->getBondDir() == Bond::BondDir::UNKNOWN) {
cdxbond->m_display = kCDXBondDisplay_Wavy;
}
fragment->AddChild(cdxbond);
}
@@ -280,9 +279,9 @@ std::string MolToChemDrawBlock(const ROMol &mol, CDXFormat format) {
document.AddChild(page);
document.m_colorTable.m_colors
.clear(); // if this isn't empty something fails.
std::ostringstream os;
if(format == CDXFormat::CDXML) {
if (format == CDXFormat::CDXML) {
os << kCDXML_HeaderString;
XMLDataSink ds(os);
document.XMLWrite(ds);
@@ -292,5 +291,5 @@ std::string MolToChemDrawBlock(const ROMol &mol, CDXFormat format) {
}
return os.str();
}
}
} // namespace v2
} // namespace RDKit