From 009e593b5c2de0ce319de45a9525dad34cbe21d2 Mon Sep 17 00:00:00 2001 From: Greg Landrum Date: Wed, 17 Feb 2021 15:25:53 +0100 Subject: [PATCH] MolDraw2D: first pass at rendering atom lists (#3804) * first pass at rendering atom lists * a bit of additional cleanup --- Code/GraphMol/FileParsers/MolFileWriter.cpp | 62 ++----------------- Code/GraphMol/MolDraw2D/MolDraw2D.cpp | 29 ++++++++- Code/GraphMol/MolDraw2D/catch_tests.cpp | 62 +++++++++++++++++++ Code/GraphMol/QueryOps.cpp | 67 +++++++++++++++++++++ Code/GraphMol/QueryOps.h | 3 + 5 files changed, 165 insertions(+), 58 deletions(-) diff --git a/Code/GraphMol/FileParsers/MolFileWriter.cpp b/Code/GraphMol/FileParsers/MolFileWriter.cpp index c27a1c47e..e5edee7f0 100644 --- a/Code/GraphMol/FileParsers/MolFileWriter.cpp +++ b/Code/GraphMol/FileParsers/MolFileWriter.cpp @@ -224,64 +224,12 @@ bool hasComplexQuery(const Atom *atom) { return res; } -bool isListQuery(const Atom::QUERYATOM_QUERY *q) { - // list queries are series of nested ors of AtomAtomicNum queries - PRECONDITION(q, "bad query"); - bool res = false; - std::string descr = q->getDescription(); - if (descr == "AtomOr") { - res = true; - for (auto cIt = q->beginChildren(); cIt != q->endChildren() && res; ++cIt) { - std::string descr = (*cIt)->getDescription(); - // we don't allow negation of any children of the query: - if ((*cIt)->getNegation()) { - res = false; - } else if (descr == "AtomOr") { - res = isListQuery((*cIt).get()); - } else if (descr != "AtomAtomicNum") { - res = false; - } - } - } - return res; -} - -void getListQueryVals(const Atom::QUERYATOM_QUERY *q, INT_VECT &vals) { - // list queries are series of nested ors of AtomAtomicNum queries - PRECONDITION(q, "bad query"); - std::string descr = q->getDescription(); - PRECONDITION(descr == "AtomOr", "bad query"); - if (descr == "AtomOr") { - for (auto cIt = q->beginChildren(); cIt != q->endChildren(); ++cIt) { - std::string descr = (*cIt)->getDescription(); - CHECK_INVARIANT((descr == "AtomOr" || descr == "AtomAtomicNum"), - "bad query"); - // we don't allow negation of any children of the query: - if (descr == "AtomOr") { - getListQueryVals((*cIt).get(), vals); - } else if (descr == "AtomAtomicNum") { - vals.push_back( - static_cast((*cIt).get())->getVal()); - } - } - } -} - -bool hasListQuery(const Atom *atom) { - PRECONDITION(atom, "bad atom"); - bool res = false; - if (atom->hasQuery()) { - res = isListQuery(atom->getQuery()); - } - return res; -} - const std::string GetMolFileQueryInfo( const RWMol &mol, const boost::dynamic_bitset<> &queryListAtoms) { std::stringstream ss; boost::dynamic_bitset<> listQs(mol.getNumAtoms()); for (const auto atom : mol.atoms()) { - if (hasListQuery(atom) && !queryListAtoms[atom->getIdx()]) { + if (isAtomListQuery(atom) && !queryListAtoms[atom->getIdx()]) { listQs.set(atom->getIdx()); } } @@ -305,7 +253,7 @@ const std::string GetMolFileQueryInfo( for (const auto atom : mol.atoms()) { if (listQs[atom->getIdx()]) { INT_VECT vals; - getListQueryVals(atom->getQuery(), vals); + getAtomListQueryVals(atom->getQuery(), vals); ss << "M ALS " << std::setw(3) << atom->getIdx() + 1 << " "; ss << std::setw(2) << vals.size(); if (atom->getQuery()->getNegation()) { @@ -488,7 +436,7 @@ const std::string AtomGetMolFileSymbol( res = "MH"; queryListAtoms.set(atom->getIdx()); } else if (hasComplexQuery(atom)) { - if (hasListQuery(atom)) { + if (isAtomListQuery(atom)) { res = "L"; } else { res = "*"; @@ -928,11 +876,11 @@ const std::string GetV3000MolFileAtomLine( ss << "M V30 " << atom->getIdx() + 1; std::string symbol = AtomGetMolFileSymbol(atom, false, queryListAtoms); - if (!hasListQuery(atom) || queryListAtoms[atom->getIdx()]) { + if (!isAtomListQuery(atom) || queryListAtoms[atom->getIdx()]) { ss << " " << symbol; } else { INT_VECT vals; - getListQueryVals(atom->getQuery(), vals); + getAtomListQueryVals(atom->getQuery(), vals); if (atom->getQuery()->getNegation()) { ss << " " << "\"NOT"; diff --git a/Code/GraphMol/MolDraw2D/MolDraw2D.cpp b/Code/GraphMol/MolDraw2D/MolDraw2D.cpp index fa70e7922..b5b60b8ea 100644 --- a/Code/GraphMol/MolDraw2D/MolDraw2D.cpp +++ b/Code/GraphMol/MolDraw2D/MolDraw2D.cpp @@ -2351,7 +2351,11 @@ void MolDraw2D::extractAtomSymbols(const ROMol &mol) { atomic_nums_[activeMolIdx_].clear(); for (auto at1 : mol.atoms()) { atom_syms_[activeMolIdx_].emplace_back(getAtomSymbolAndOrientation(*at1)); - atomic_nums_[activeMolIdx_].emplace_back(at1->getAtomicNum()); + if (!isComplexQuery(at1)) { + atomic_nums_[activeMolIdx_].emplace_back(at1->getAtomicNum()); + } else { + atomic_nums_[activeMolIdx_].push_back(0); + } } } @@ -3776,6 +3780,27 @@ pair MolDraw2D::getAtomSymbolAndOrientation( return std::make_pair(symbol, orient); } +std::string getAtomListText(const Atom &atom) { + PRECONDITION(atom.hasQuery(), "no query"); + PRECONDITION(atom.getQuery()->getDescription() == "AtomOr", "bad query type"); + + std::string res = ""; + if (atom.getQuery()->getNegation()) { + res += "!"; + } + res += "["; + std::vector vals; + getAtomListQueryVals(atom.getQuery(), vals); + for (unsigned int i = 0; i < vals.size(); ++i) { + if (i != 0) { + res += ","; + } + res += PeriodicTable::getTable()->getElementSymbol(vals[i]); + } + + return res + "]"; +} + // **************************************************************************** string MolDraw2D::getAtomSymbol(const RDKit::Atom &atom, OrientType orientation) const { @@ -3818,6 +3843,8 @@ string MolDraw2D::getAtomSymbol(const RDKit::Atom &atom, atom.getDegree() == 1) { symbol = ""; literal_symbol = false; + } else if (isAtomListQuery(&atom)) { + symbol = getAtomListText(atom); } else if (isComplexQuery(&atom)) { symbol = "?"; } else if (drawOptions().atomLabelDeuteriumTritium && diff --git a/Code/GraphMol/MolDraw2D/catch_tests.cpp b/Code/GraphMol/MolDraw2D/catch_tests.cpp index 56e736e43..ed74db79c 100644 --- a/Code/GraphMol/MolDraw2D/catch_tests.cpp +++ b/Code/GraphMol/MolDraw2D/catch_tests.cpp @@ -2389,3 +2389,65 @@ M END)CTAB")); CHECK(outerBondsDistance / innerBondsDistance > 1.3f); } } + +TEST_CASE("draw atom list queries", "[extras]") { + SECTION("atom list") { + auto m = R"CTAB( + Mrv2102 02112115002D + + 0 0 0 0 0 999 V3000 +M V30 BEGIN CTAB +M V30 COUNTS 3 3 0 0 0 +M V30 BEGIN ATOM +M V30 1 [N,O,S] 9.2083 12.8058 0 0 +M V30 2 C 8.4383 11.4721 0 0 +M V30 3 C 9.9783 11.4721 0 0 +M V30 END ATOM +M V30 BEGIN BOND +M V30 1 1 1 2 +M V30 2 1 3 1 +M V30 3 1 2 3 +M V30 END BOND +M V30 END CTAB +M END +)CTAB"_ctab; + REQUIRE(m); + MolDraw2DSVG drawer(350, 300); + drawer.drawMolecule(*m, "atom list"); + drawer.finishDrawing(); + auto text = drawer.getDrawingText(); + std::ofstream outs("testAtomLists-1.svg"); + outs << text; + outs.flush(); + } + + SECTION("NOT atom list") { + auto m = R"CTAB( + Mrv2102 02112115032D + + 0 0 0 0 0 999 V3000 +M V30 BEGIN CTAB +M V30 COUNTS 3 3 0 0 0 +M V30 BEGIN ATOM +M V30 1 "NOT [N,O,S]" 9.2083 12.8058 0 0 +M V30 2 C 8.4383 11.4721 0 0 +M V30 3 C 9.9783 11.4721 0 0 +M V30 END ATOM +M V30 BEGIN BOND +M V30 1 1 1 2 +M V30 2 1 3 1 +M V30 3 1 2 3 +M V30 END BOND +M V30 END CTAB +M END +)CTAB"_ctab; + REQUIRE(m); + MolDraw2DSVG drawer(350, 300); + drawer.drawMolecule(*m, "NOT atom list"); + drawer.finishDrawing(); + auto text = drawer.getDrawingText(); + std::ofstream outs("testAtomLists-2.svg"); + outs << text; + outs.flush(); + } +} diff --git a/Code/GraphMol/QueryOps.cpp b/Code/GraphMol/QueryOps.cpp index 7770af6b8..36b8756c8 100644 --- a/Code/GraphMol/QueryOps.cpp +++ b/Code/GraphMol/QueryOps.cpp @@ -11,6 +11,7 @@ #include #include #include +#include namespace RDKit { @@ -601,6 +602,7 @@ ATOM_NULL_QUERY *makeAtomNullQuery() { } bool isComplexQuery(const Bond *b) { + PRECONDITION(b, "bad bond"); if (!b->hasQuery()) { return false; } @@ -638,6 +640,7 @@ bool isComplexQuery(const Bond *b) { return true; } +namespace { bool _complexQueryHelper(Atom::QUERYATOM_QUERY const *query, bool &hasAtNum) { if (!query) { return false; @@ -665,7 +668,70 @@ bool _complexQueryHelper(Atom::QUERYATOM_QUERY const *query, bool &hasAtNum) { } return false; } + +template +bool _atomListQueryHelper(const T query) { + PRECONDITION(query, "no query"); + if (query->getNegation()) { + return false; + } + if (query->getDescription() == "AtomAtomicNum") { + return true; + } + if (query->getDescription() == "AtomOr") { + for (const auto child : boost::make_iterator_range(query->beginChildren(), + query->endChildren())) { + if (!_atomListQueryHelper(child)) { + return false; + } + } + } + return true; +} +} // namespace +bool isAtomListQuery(const Atom *a) { + PRECONDITION(a, "bad atom"); + if (!a->hasQuery()) { + return false; + } + if (a->getQuery()->getDescription() == "AtomOr") { + for (const auto child : boost::make_iterator_range( + a->getQuery()->beginChildren(), a->getQuery()->endChildren())) { + if (!_atomListQueryHelper(child)) { + return false; + } + } + return true; + } + return false; +} + +void getAtomListQueryVals(const Atom::QUERYATOM_QUERY *q, + std::vector &vals) { + // list queries are series of nested ors of AtomAtomicNum queries + PRECONDITION(q, "bad query"); + auto descr = q->getDescription(); + PRECONDITION(descr == "AtomOr", "bad query"); + if (descr == "AtomOr") { + for (const auto child : + boost::make_iterator_range(q->beginChildren(), q->endChildren())) { + auto descr = child->getDescription(); + if (child->getNegation() || + (descr != "AtomOr" && descr != "AtomAtomicNum")) { + throw ValueErrorException("bad query type"); + } + // we don't allow negation of any children of the query: + if (descr == "AtomOr") { + getAtomListQueryVals(child.get(), vals); + } else if (descr == "AtomAtomicNum") { + vals.push_back(static_cast(child.get())->getVal()); + } + } + } +} + bool isComplexQuery(const Atom *a) { + PRECONDITION(a, "bad atom"); if (!a->hasQuery()) { return false; } @@ -697,6 +763,7 @@ bool isComplexQuery(const Atom *a) { return true; } bool isAtomAromatic(const Atom *a) { + PRECONDITION(a, "bad atom"); bool res = false; if (!a->hasQuery()) { res = a->getIsAromatic(); diff --git a/Code/GraphMol/QueryOps.h b/Code/GraphMol/QueryOps.h index 87254ccd5..aafd6651f 100644 --- a/Code/GraphMol/QueryOps.h +++ b/Code/GraphMol/QueryOps.h @@ -1061,6 +1061,9 @@ Queries::EqualityQuery *makePropQuery( RDKIT_GRAPHMOL_EXPORT bool isComplexQuery(const Bond *b); RDKIT_GRAPHMOL_EXPORT bool isComplexQuery(const Atom *a); RDKIT_GRAPHMOL_EXPORT bool isAtomAromatic(const Atom *a); +RDKIT_GRAPHMOL_EXPORT bool isAtomListQuery(const Atom *a); +RDKIT_GRAPHMOL_EXPORT void getAtomListQueryVals(const Atom::QUERYATOM_QUERY *q, + std::vector &vals); namespace QueryOps { RDKIT_GRAPHMOL_EXPORT void completeMolQueries(