Files
rdkit/Code/GraphMol/SLNParse/SLNAttribs.cpp
2026-04-18 05:22:09 +02:00

523 lines
19 KiB
C++

// $Id$
//
// Copyright (c) 2008, Novartis Institutes for BioMedical Research Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Novartis Institutes for BioMedical Research Inc.
// nor the names of its contributors may be used to endorse or promote
// products derived from this software without specific prior
// written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Created by Greg Landrum, Sept. 2006
//
#include <GraphMol/SLNParse/SLNParse.h>
#include <GraphMol/SLNParse/SLNAttribs.h>
#include <GraphMol/RDKitBase.h>
#include <GraphMol/RDKitQueries.h>
#include <RDGeneral/RDLog.h>
#include <RDGeneral/Invariant.h>
#include <boost/algorithm/string.hpp>
#include <boost/lexical_cast.hpp>
namespace RDKit {
namespace SLNParse {
namespace {
int parseIntAttribVal(std::string attribName, std::string attribVal,
int (*defaultFunc)(Atom const *at) = nullptr,
Atom *atom = nullptr) {
PRECONDITION((!defaultFunc) || atom,
"If a default func is provided, an atom must be as well.");
int iVal;
boost::to_lower(attribVal);
if (defaultFunc && attribVal == "f") {
iVal = defaultFunc(atom);
} else {
try {
iVal = boost::lexical_cast<int>(attribVal);
} catch (boost::bad_lexical_cast &) {
std::stringstream err;
err << "SLN Parser error: bad integer value (" << attribVal
<< ") provided for property: " << attribName;
throw SLNParseException(err.str());
}
}
return iVal;
}
} // end of anonymous namespace
QueryAtom::QUERYATOM_QUERY *makeQueryFromOp(const std::string &op, int val,
int (*func)(Atom const *at),
std::string description) {
PRECONDITION(func, "bad query function");
QueryAtom::QUERYATOM_QUERY *res = nullptr;
if (op == "=") {
auto *tmp = new ATOM_EQUALS_QUERY;
tmp->setVal(val);
tmp->setDataFunc(func);
tmp->setDescription(description);
res = tmp;
} else if (op == "!=") {
auto *tmp = new ATOM_EQUALS_QUERY;
tmp->setVal(val);
tmp->setDataFunc(func);
tmp->setDescription(description);
tmp->setNegation(true);
res = tmp;
} else if (op == ">") {
// don't be alarmed by this use of the LessEqual query for >, it's not a
// bug.
// The RD GreaterQuery(tgt) returns true if tgt is greater than the thing
// you
// compare to. In this case we need to reverse that because we're interested
// in
// seeing if the value is greater than the target; this is equiv to asking
// if
// the target is < the value.
auto *tmp = new ATOM_LESS_QUERY;
tmp->setVal(val);
tmp->setDataFunc(func);
tmp->setDescription(description);
res = tmp;
} else if (op == ">=") {
auto *tmp = new ATOM_LESSEQUAL_QUERY;
tmp->setVal(val);
tmp->setDataFunc(func);
tmp->setDescription(description);
res = tmp;
} else if (op == "<") {
auto *tmp = new ATOM_GREATER_QUERY;
tmp->setVal(val);
tmp->setDataFunc(func);
tmp->setDescription(description);
res = tmp;
} else if (op == "<=") {
auto *tmp = new ATOM_GREATEREQUAL_QUERY;
tmp->setVal(val);
tmp->setDataFunc(func);
tmp->setDescription(description);
res = tmp;
} else {
std::stringstream err;
err << "SLN Parser error: bad attribute operator (" << op << ") provided.";
throw SLNParseException(err.str());
}
POSTCONDITION(res, "bad query");
return res;
}
void parseAtomAttribs(Atom *atom, AttribListType attribs, bool doingQuery) {
QueryAtom::QUERYATOM_QUERY *atomQuery = nullptr;
bool lastWasLowPriAnd = false;
for (AttribListType::const_iterator it = attribs.begin(); it != attribs.end();
++it) {
QueryAtom::QUERYATOM_QUERY *query = nullptr;
AttribCombineOp how = it->first;
boost::shared_ptr<AttribType> attribPtr = it->second;
std::string attribName = attribPtr->first;
boost::to_lower(attribName);
std::string attribVal = attribPtr->second;
if (attribName == "charge") {
int chg = 0;
if (attribVal == "-") {
chg = -1;
} else if (attribVal == "+") {
chg = +1;
} else {
chg = parseIntAttribVal(attribName, attribVal);
}
if (!doingQuery) {
atom->setFormalCharge(chg);
} else {
query = makeQueryFromOp(attribPtr->op, chg, queryAtomFormalCharge,
"AtomFormalCharge");
}
} else if (attribName == "i") {
int val = parseIntAttribVal(attribName, attribVal);
if (!doingQuery) {
atom->setIsotope(static_cast<unsigned int>(val));
} else {
query = makeQueryFromOp(attribPtr->op, val, queryAtomIsotope,
"AtomIsotope");
}
} else if (attribName == "r") {
if (attribVal != "") {
BOOST_LOG(rdWarningLog)
<< "Query value '" << attribVal << "' ignored for r query\n";
}
if (!doingQuery) {
BOOST_LOG(rdWarningLog) << "Query property '" << attribName
<< "' ignored on non-query atom\n";
} else {
query = makeAtomInRingQuery();
}
} else if (attribName == "is") {
// recursive queries:
if (!attribPtr->structQuery) {
throw SLNParseException("failed recursive query");
}
query = static_cast<QueryAtom::QUERYATOM_QUERY *>(attribPtr->structQuery);
} else if (attribName == "s") {
if (attribPtr->op != "=") {
std::stringstream err;
err << "SLN Parser error: comparison operator '" << attribPtr->op
<< "' not supported for chiral attributes.\n";
throw SLNParseException(err.str());
}
boost::to_lower(attribVal);
if (attribVal[0] == 'i' || attribVal[0] == 'n') {
if (attribVal.size() > 1 && attribVal[1] == '*') {
BOOST_LOG(rdWarningLog) << "Chiral modifier * ignored, chiral spec "
<< attribVal[0] << " will be used\n";
}
if (attribVal.size() > 1 && attribVal[1] == 'm') {
BOOST_LOG(rdWarningLog) << "Chiral modifier m ignored, chiral spec "
<< attribVal[0] << " will be used\n";
}
} else {
BOOST_LOG(rdWarningLog) << "Unsupported stereochemistry specifier '"
<< attribVal << "' ignored.\n";
}
} else {
// a block of properties that can have "f" values, and so need special
// handling:
std::string fTag = "";
int val = 0;
if (attribVal == "f" || attribName == "f") {
fTag = "_SLN_";
atom->setProp(common_properties::_Unfinished_SLN_, 1);
val = -666;
}
if (attribName == "rbc") {
if (fTag == "") {
val = parseIntAttribVal(attribName, attribVal);
}
query = makeQueryFromOp(attribPtr->op, val, queryAtomRingBondCount,
fTag + "AtomRingBondCount");
} else if (attribName == "tbo") {
if (fTag == "") {
val = parseIntAttribVal(attribName, attribVal);
}
query = makeQueryFromOp(attribPtr->op, val, queryAtomTotalValence,
fTag + "AtomTotalValence");
} else if (attribName == "tac") {
if (fTag == "") {
val = parseIntAttribVal(attribName, attribVal);
}
query = makeQueryFromOp(attribPtr->op, val, queryAtomTotalDegree,
fTag + "AtomTotalDegree");
} else if (attribName == "hc") {
if (fTag == "") {
val = parseIntAttribVal(attribName, attribVal);
}
query = makeQueryFromOp(attribPtr->op, val, queryAtomHCount,
fTag + "AtomHCount");
} else if (attribName == "hac") {
if (fTag == "") {
val = parseIntAttribVal(attribName, attribVal);
}
query = makeQueryFromOp(attribPtr->op, val, queryAtomNonHydrogenDegree,
fTag + "AtomHeavyAtomDegree");
} else if (attribName == "f") {
if (fTag == "") {
val = parseIntAttribVal(attribName, attribVal);
}
query = makeQueryFromOp(
"=", val, (int (*)(const RDKit::Atom *))(queryAtomAllBondProduct),
fTag + "AtomBondEnvironment");
} else {
// anything we don't know how to deal with we'll just store in raw form:
atom->setProp(attribName, attribVal);
}
}
// if we've constructed a query from all that, then we need to add it to the
// atomQuery:
if (query) {
if (!doingQuery) {
BOOST_LOG(rdWarningLog) << "Query property '" << attribName
<< "' ignored on non-query atom\n";
delete query;
} else {
if (attribPtr->negated) {
query->setNegation(!query->getNegation());
}
if (!atomQuery) {
// first one is easy:
atomQuery = query;
} else {
QueryAtom::QUERYATOM_QUERY *tQuery;
switch (how) {
case AttribAnd:
// high-priority and:
tQuery = new ATOM_AND_QUERY;
tQuery->setDescription("AtomAnd");
tQuery->addChild(
QueryAtom::QUERYATOM_QUERY::CHILD_TYPE(atomQuery));
tQuery->addChild(QueryAtom::QUERYATOM_QUERY::CHILD_TYPE(query));
atomQuery = tQuery;
lastWasLowPriAnd = false;
break;
case AttribLowPriAnd:
tQuery = new ATOM_AND_QUERY;
tQuery->setDescription("AtomAnd");
tQuery->addChild(
QueryAtom::QUERYATOM_QUERY::CHILD_TYPE(atomQuery));
tQuery->addChild(QueryAtom::QUERYATOM_QUERY::CHILD_TYPE(query));
atomQuery = tQuery;
lastWasLowPriAnd = true;
break;
case AttribOr:
if (lastWasLowPriAnd) {
// if the last query was a low-priority AND, we need to
// restructure
// the tree a bit:
QueryAtom::QUERYATOM_QUERY *newAndQuery;
newAndQuery = new ATOM_AND_QUERY;
newAndQuery->setDescription("AtomAnd");
auto andChild = atomQuery->beginChildren();
newAndQuery->addChild(*andChild);
++andChild;
tQuery = new ATOM_OR_QUERY;
tQuery->setDescription("AtomOr");
tQuery->addChild(*andChild);
newAndQuery->addChild(
QueryAtom::QUERYATOM_QUERY::CHILD_TYPE(tQuery));
delete atomQuery;
atomQuery = newAndQuery;
} else {
// otherwise we just do a normal expansion:
tQuery = new ATOM_OR_QUERY;
tQuery->setDescription("AtomOr");
tQuery->addChild(
QueryAtom::QUERYATOM_QUERY::CHILD_TYPE(atomQuery));
tQuery->addChild(QueryAtom::QUERYATOM_QUERY::CHILD_TYPE(query));
atomQuery = tQuery;
}
lastWasLowPriAnd = false;
break;
default:
throw SLNParseException(
"unrecognized query composition operator");
}
}
}
} // end of query processing
} // end of loop over attribs
if (atomQuery) {
atom->expandQuery(atomQuery, Queries::COMPOSITE_AND);
}
}
void parseFinalAtomAttribs(Atom *atom, bool doingQuery) {
PRECONDITION(atom, "no atom");
// we need to loop over the atom's query tree and finalize any
// attributes that had "f" in the original SLN. We will recognize
// these by the fact that their names start with "_SLN_"
if (!doingQuery || !atom->hasQuery() ||
!atom->hasProp(common_properties::_Unfinished_SLN_)) {
return;
}
atom->clearProp(common_properties::_Unfinished_SLN_);
std::list<QueryAtom::QUERYATOM_QUERY *> q;
q.push_back(atom->getQuery());
while (!q.empty()) {
QueryAtom::QUERYATOM_QUERY *query = q.front();
q.pop_front();
std::string description = query->getDescription();
if (description.size() > 5 && description.substr(0, 5) == "_SLN_") {
boost::erase_head(description, 5);
query->setDescription(description);
static_cast<ATOM_EQUALS_QUERY *>(query)->setVal(
(int)(query->getDataFunc()(atom)));
}
// now add the query's children to the queue and continue:
for (auto cIt = query->beginChildren(); cIt != query->endChildren();
++cIt) {
q.push_back(const_cast<QueryAtom::QUERYATOM_QUERY *>(cIt->get()));
}
}
}
void parseBondAttribs(Bond *bond, AttribListType attribs, bool doingQuery) {
// FIX: need to do the same query tree reordering here as we did above.
bool seenTypeQuery = false;
for (AttribListType::const_iterator it = attribs.begin(); it != attribs.end();
++it) {
Queries::CompositeQueryType how;
switch (it->first) {
case AttribAnd:
how = Queries::COMPOSITE_AND;
break;
case AttribOr:
how = Queries::COMPOSITE_OR;
break;
case AttribLowPriAnd:
how = Queries::COMPOSITE_AND;
break;
default:
throw SLNParseException("unrecognized query composition operator");
}
boost::shared_ptr<AttribType> attribPtr = it->second;
std::string attribName = attribPtr->first;
boost::to_lower(attribName);
std::string attribVal = attribPtr->second;
if (attribName == "type") {
boost::to_lower(attribVal);
Bond::BondType bondType;
if (attribVal == "-" || attribVal == "1") {
bondType = Bond::SINGLE;
} else if (attribVal == "=" || attribVal == "2") {
bondType = Bond::DOUBLE;
} else if (attribVal == "#" || attribVal == "3") {
bondType = Bond::TRIPLE;
} else if (attribVal == ":" || attribVal == "aromatic") {
bondType = Bond::AROMATIC;
} else {
bondType = Bond::OTHER;
bond->setProp("SLN_Type", attribVal);
}
if (!doingQuery) {
bond->setBondType(bondType);
} else {
QueryBond::QUERYBOND_QUERY *query = makeBondOrderEqualsQuery(bondType);
if (attribPtr->negated) {
query->setNegation(!query->getNegation());
}
if (seenTypeQuery) {
static_cast<RDKit::QueryBond *>(bond)->expandQuery(query, how, true);
} else {
// if this is the first type query, we need to replace any existing
// bond order queries:
// FIX: this replaces tooo much, ring queries also get blown out
bond->setQuery(query);
}
seenTypeQuery = true;
}
} else if (attribName == "r") {
if (attribVal != "") {
BOOST_LOG(rdWarningLog)
<< "Query value '" << attribVal << "' ignored for r query\n";
}
if (!doingQuery) {
BOOST_LOG(rdWarningLog) << "Query property '" << attribName
<< "' ignored on non-query bond\n";
} else {
QueryBond::QUERYBOND_QUERY *query = makeBondIsInRingQuery();
if (attribPtr->negated) {
query->setNegation(true);
}
static_cast<QueryBond *>(bond)->expandQuery(query, how);
}
} else {
// anything we don't know how to deal with we'll just store in raw form:
bond->setProp(attribName, attribVal);
}
}
}
void parseMolAttribs(ROMol *mol, AttribListType attribs) {
for (AttribListType::const_iterator it = attribs.begin(); it != attribs.end();
++it) {
CHECK_INVARIANT(it->first == AttribAnd, "bad attrib type");
boost::shared_ptr<AttribType> attribPtr = it->second;
std::string attribName = attribPtr->first;
boost::to_lower(attribName);
std::string attribVal = attribPtr->second;
if (attribVal.begin() != attribVal.end() && *(attribVal.begin()) == '"' &&
*(attribVal.begin()) == *(attribVal.rbegin())) {
attribVal.erase(attribVal.begin());
attribVal.erase(--(attribVal.end()));
}
if (attribName == "name") {
mol->setProp(common_properties::_Name, attribVal);
} else {
mol->setProp(attribName, attribVal);
}
}
}
void adjustAtomChiralities(RWMol *mol) {
for (auto atom : mol->atoms()) {
std::string attribVal;
if (atom->getPropIfPresent(common_properties::_SLN_s, attribVal)) {
// the atom is marked as chiral, translate the sln chirality into
// RDKit chirality
// start with a straight map of the chirality value:
// as a reminder, here are some SLN <-> SMILES pairs
// C[s=n]H(Cl)(F)Br <-> [C@@H](Cl)(F)Br (CHI_TETRAHEDRAL_CW)
// ClC[s=n]H(F)Br <-> Cl[C@H](F)Br (CHI_TETRAHEDRAL_CCW)
// FC[1:s=n](Cl)OCH2@1 <-> F[C@@]1(Cl)OC1 (CHI_TETRAHEDRAL_CW)
if (attribVal[0] == 'n') {
atom->setChiralTag(Atom::CHI_TETRAHEDRAL_CW);
} else if (attribVal[0] == 'i') {
atom->setChiralTag(Atom::CHI_TETRAHEDRAL_CCW);
}
std::list<std::pair<int, int>> neighbors;
for (auto nbrBond : mol->atomBonds(atom)) {
neighbors.emplace_back(nbrBond->getOtherAtomIdx(atom->getIdx()),
nbrBond->getIdx());
}
// std::cerr << "CHIRAL " << (*atomIt)->getIdx();
// sort by neighbor idx:
neighbors.sort();
// figure out the bond ordering:
std::list<int> bondOrdering;
for (std::list<std::pair<int, int>>::const_iterator nbrIt =
neighbors.begin();
nbrIt != neighbors.end(); ++nbrIt) {
bondOrdering.push_back(nbrIt->second);
// std::cerr << " " << nbrIt->second;
}
// ok, we now have the ordering of the bonds (used for RDKit chirality),
// figure out the permutation order relative to the atom numbering
// (sln chirality):
int nSwaps = atom->getPerturbationOrder(bondOrdering);
if (nSwaps % 2) {
atom->setChiralTag(atom->getChiralTag() == Atom::CHI_TETRAHEDRAL_CW
? Atom::CHI_TETRAHEDRAL_CCW
: Atom::CHI_TETRAHEDRAL_CW);
}
}
}
}
} // namespace SLNParse
} // namespace RDKit