Files
rdkit/Code/GraphMol/ChemReactions/ReactionWriter.cpp
Kollin Trujillo 2e5f7ce80c Support Writing CX Extensions for Reactions (#7838)
* Support writing CX extensions in reactions

* fixed merge conflicts

* wip

* Updated for getCXExtensions

* Refactored and deleted extraneous file.

* Updated function signatures

* Updated some tests

* Removed extraneous include from debugging

* Removed comment in reactionwriter.cpp

* Updated some tests with expected strings

* Updated to add logging for linknodes and substance group hierarchies

* Addressed some issues

* updated tests

* Addressed Greg's comments

* Updated for recommendations

---------

Co-authored-by: Rachel Walker <rachel.walker@schrodinger.com>
Co-authored-by: Greg Landrum <greg.landrum@gmail.com>
2024-10-24 19:31:04 +02:00

284 lines
9.8 KiB
C++

//
// Copyright (c) 2010-2024, Novartis Institutes for BioMedical Research Inc.
// and other RDKit contributors
//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Novartis Institutes for BioMedical Research Inc.
// nor the names of its contributors may be used to endorse or promote
// products derived from this software without specific prior written
// permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
#include <GraphMol/ChemReactions/Reaction.h>
#include <GraphMol/ChemReactions/ReactionParser.h>
#include <GraphMol/ChemReactions/ReactionUtils.h>
#include <GraphMol/FileParsers/FileParsers.h>
#include <GraphMol/FileParsers/FileParserUtils.h>
#include <GraphMol/SmilesParse/SmartsWrite.h>
#include <GraphMol/MolOps.h>
#include <GraphMol/Chirality.h>
#include <GraphMol/FileParsers/MolFileStereochem.h>
#include <sstream>
namespace {
void setRXNRoleOfAllMoleculeAtoms(RDKit::ROMol &mol, int role) {
RDKit::ROMol::ATOM_ITER_PAIR atItP = mol.getVertices();
while (atItP.first != atItP.second) {
RDKit::Atom *oAtom = mol[*(atItP.first++)];
oAtom->setProp(RDKit::common_properties::molRxnRole, role);
}
}
std::string molToString(RDKit::ROMol &mol, bool toSmiles,
const RDKit::SmilesWriteParams &params) {
std::string res = "";
if (toSmiles) {
res = MolToSmiles(mol, params);
} else {
res = MolToSmarts(mol, params);
}
std::vector<int> mapping;
if (RDKit::MolOps::getMolFrags(mol, mapping) > 1) {
res = "(" + res + ")";
}
return res;
}
std::string chemicalReactionTemplatesToString(
const RDKit::ChemicalReaction &rxn, RDKit::ReactionMoleculeType type,
bool toSmiles, const RDKit::SmilesWriteParams &params) {
std::string res = "";
std::vector<std::string> vfragsmi;
auto begin = getStartIterator(rxn, type);
auto end = getEndIterator(rxn, type);
for (; begin != end; ++begin) {
vfragsmi.push_back(molToString(**begin, toSmiles, params));
}
if (params.canonical) {
std::sort(vfragsmi.begin(), vfragsmi.end());
}
for (unsigned i = 0; i < vfragsmi.size(); ++i) {
res += vfragsmi[i];
if (i < vfragsmi.size() - 1) {
res += ".";
}
}
return res;
}
std::string chemicalReactionToRxnToString(
const RDKit::ChemicalReaction &rxn, bool toSmiles,
const RDKit::SmilesWriteParams &params, bool includeCX, std::uint32_t flags = RDKit::SmilesWrite::CXSmilesFields::CX_ALL) {
std::string res = "";
res +=
chemicalReactionTemplatesToString(rxn, RDKit::Reactant, toSmiles, params);
res += ">";
res += chemicalReactionTemplatesToString(rxn, RDKit::Agent, toSmiles, params);
res += ">";
res +=
chemicalReactionTemplatesToString(rxn, RDKit::Product, toSmiles, params);
if (includeCX) {
std::vector<RDKit::ROMol *> mols;
// Collect reactants, agents, and products into mols vector
for (auto type : {RDKit::Reactant, RDKit::Agent, RDKit::Product}) {
for (auto begin = getStartIterator(rxn, type);
begin != getEndIterator(rxn, type); ++begin) {
mols.push_back((*begin).get());
}
}
auto ext = RDKit::SmilesWrite::getCXExtensions(mols, flags);
if (!ext.empty()) {
res += " ";
res += ext;
}
}
return res;
}
void write_template(std::ostringstream &res, RDKit::ROMol &tpl) {
RDKit::RWMol trwmol(tpl);
if (trwmol.needsUpdatePropertyCache()) {
trwmol.updatePropertyCache(false);
}
RDKit::FileParserUtils::moveAdditionalPropertiesToSGroups(trwmol);
res << RDKit::FileParserUtils::getV3000CTAB(trwmol, -1);
}
} // namespace
namespace RDKit {
//! returns the reaction SMARTS for a reaction
std::string ChemicalReactionToRxnSmarts(const ChemicalReaction &rxn,
const SmilesWriteParams &params) {
return chemicalReactionToRxnToString(rxn, false, params, false);
};
//! returns the reaction SMILES for a reaction
std::string ChemicalReactionToRxnSmiles(const ChemicalReaction &rxn,
const SmilesWriteParams &params) {
return chemicalReactionToRxnToString(rxn, true, params, false);
};
//! returns the reaction SMARTS for a reaction with CX extension
std::string ChemicalReactionToCXRxnSmarts(const ChemicalReaction &rxn,
const SmilesWriteParams &params,
std::uint32_t flags) {
return chemicalReactionToRxnToString(rxn, false, params, true, flags);
};
//! returns the reaction SMILES for a reaction with CX extension
std::string ChemicalReactionToCXRxnSmiles(const ChemicalReaction &rxn,
const SmilesWriteParams &params,
std::uint32_t flags) {
return chemicalReactionToRxnToString(rxn, true, params, true, flags);
};
//! returns an RXN block for a reaction
std::string ChemicalReactionToV3KRxnBlock(const ChemicalReaction &rxn,
bool separateAgents) {
std::ostringstream res;
res << "$RXN V3000\n\n RDKit\n\n";
if (separateAgents) {
res << "M V30 COUNTS " << rxn.getNumReactantTemplates() << " "
<< rxn.getNumProductTemplates() << " " << rxn.getNumAgentTemplates();
} else {
res << "M V30 COUNTS "
<< rxn.getNumReactantTemplates() + rxn.getNumAgentTemplates() << " "
<< rxn.getNumProductTemplates();
}
res << "\n";
res << "M V30 BEGIN REACTANT\n";
for (const auto &rt : boost::make_iterator_range(
rxn.beginReactantTemplates(), rxn.endReactantTemplates())) {
write_template(res, *rt);
}
if (!separateAgents) {
for (const auto &at : boost::make_iterator_range(rxn.beginAgentTemplates(),
rxn.endAgentTemplates())) {
write_template(res, *at);
}
}
res << "M V30 END REACTANT\n";
res << "M V30 BEGIN PRODUCT\n";
for (const auto &pt : boost::make_iterator_range(rxn.beginProductTemplates(),
rxn.endProductTemplates())) {
write_template(res, *pt);
}
res << "M V30 END PRODUCT\n";
if (separateAgents) {
res << "M V30 BEGIN AGENT\n";
for (const auto &rt : boost::make_iterator_range(rxn.beginAgentTemplates(),
rxn.endAgentTemplates())) {
write_template(res, *rt);
}
res << "M V30 END AGENT\n";
}
res << "M END\n";
return res.str();
}
//! returns an RXN block for a reaction
std::string ChemicalReactionToRxnBlock(const ChemicalReaction &rxn,
bool separateAgents, bool forceV3000) {
if (forceV3000) {
return ChemicalReactionToV3KRxnBlock(rxn, separateAgents);
}
std::ostringstream res;
res << "$RXN\n\n RDKit\n\n";
if (separateAgents) {
res << std::setw(3) << rxn.getNumReactantTemplates() << std::setw(3)
<< rxn.getNumProductTemplates() << std::setw(3)
<< rxn.getNumAgentTemplates() << "\n";
} else {
res << std::setw(3)
<< (rxn.getNumReactantTemplates() + rxn.getNumAgentTemplates())
<< std::setw(3) << rxn.getNumProductTemplates() << "\n";
}
for (auto iter = rxn.beginReactantTemplates();
iter != rxn.endReactantTemplates(); ++iter) {
res << "$MOL\n";
res << MolToMolBlock(**iter, true, -1, false);
}
if (!separateAgents) {
for (auto iter = rxn.beginAgentTemplates(); iter != rxn.endAgentTemplates();
++iter) {
res << "$MOL\n";
res << MolToMolBlock(**iter, true, -1, false);
}
}
for (auto iter = rxn.beginProductTemplates();
iter != rxn.endProductTemplates(); ++iter) {
res << "$MOL\n";
res << MolToMolBlock(**iter, true, -1, false);
}
if (separateAgents) {
for (auto iter = rxn.beginAgentTemplates(); iter != rxn.endAgentTemplates();
++iter) {
res << "$MOL\n";
res << MolToMolBlock(**iter, true, -1, false);
}
}
return res.str();
};
//! returns a ROMol with RXNMolRole used for a reaction
ROMol *ChemicalReactionToRxnMol(const ChemicalReaction &rxn) {
auto *res = new RWMol();
for (auto iter = rxn.beginReactantTemplates();
iter != rxn.endReactantTemplates(); ++iter) {
setRXNRoleOfAllMoleculeAtoms(*iter->get(), 1);
res->insertMol(*iter->get());
}
for (auto iter = rxn.beginProductTemplates();
iter != rxn.endProductTemplates(); ++iter) {
setRXNRoleOfAllMoleculeAtoms(*iter->get(), 2);
res->insertMol(*iter->get());
}
for (auto iter = rxn.beginAgentTemplates(); iter != rxn.endAgentTemplates();
++iter) {
setRXNRoleOfAllMoleculeAtoms(*iter->get(), 3);
res->insertMol(*iter->get());
}
return (ROMol *)res;
}
} // namespace RDKit