// // Copyright (c) 2007-2026, Novartis Institutes for BioMedical Research Inc. // and other RDKit contributors // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following // disclaimer in the documentation and/or other materials provided // with the distribution. // * Neither the name of Novartis Institutes for BioMedical Research Inc. // nor the names of its contributors may be used to endorse or promote // products derived from this software without specific prior written // permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "ReactionUtils.h" namespace RDKit { using namespace v2::FileParsers; namespace { void ParseV2000RxnBlock(std::istream &inStream, unsigned int &line, const MolFileParserParams ¶ms, std::unique_ptr &rxn) { std::string tempStr; // FIX: parse name and comment fields line++; tempStr = getLine(inStream); line++; tempStr = getLine(inStream); line++; tempStr = getLine(inStream); line++; tempStr = getLine(inStream); if (inStream.eof()) { throw ChemicalReactionParserException("premature EOF hit."); } unsigned int nReacts = 0, nProds = 0, nAgents = 0; unsigned int spos = 0; if (tempStr.size() < 6) { throw ChemicalReactionParserException("rxn counts line is too short"); } rxn.reset(new ChemicalReaction()); try { nReacts = FileParserUtils::stripSpacesAndCast(tempStr.substr(0, 3)); spos = 3; nProds = FileParserUtils::stripSpacesAndCast( tempStr.substr(spos, 3)); spos = 6; ; if (tempStr.size() > 6) { std::string trimmed = boost::trim_copy(tempStr.substr(spos, 3)); if (trimmed.size() > 0) { nAgents = FileParserUtils::stripSpacesAndCast( tempStr.substr(spos, 3)); spos = 9; } } } catch (boost::bad_lexical_cast &) { std::ostringstream errout; errout << "Cannot convert " << tempStr.substr(spos, 3) << " to int"; throw ChemicalReactionParserException(errout.str()); } for (unsigned int i = 0; i < nReacts; ++i) { line++; tempStr = getLine(inStream); if (inStream.eof()) { throw ChemicalReactionParserException("premature EOF hit."); } if (tempStr.substr(0, 4) != "$MOL") { throw ChemicalReactionParserException("$MOL header not found"); } ROMol *react; try { react = MolFromMolDataStream(inStream, line, params).release(); } catch (FileParseException &e) { std::ostringstream errout; errout << "Cannot parse reactant " << i << ". The error was:\n\t" << e.what(); throw ChemicalReactionParserException(errout.str()); } if (!react) { throw ChemicalReactionParserException("Null reactant in reaction file."); } rxn->addReactantTemplate(ROMOL_SPTR(react)); } for (unsigned int i = 0; i < nProds; ++i) { line++; tempStr = getLine(inStream); if (inStream.eof()) { throw ChemicalReactionParserException("premature EOF hit."); } if (tempStr.substr(0, 4) != "$MOL") { throw ChemicalReactionParserException("$MOL header not found"); } ROMol *prod; try { prod = MolFromMolDataStream(inStream, line, params).release(); } catch (FileParseException &e) { std::ostringstream errout; errout << "Cannot parse product " << i << ". The error was:\n\t" << e.what(); throw ChemicalReactionParserException(errout.str()); } if (!prod) { throw ChemicalReactionParserException("Null product in reaction file."); } rxn->addProductTemplate(ROMOL_SPTR(prod)); } for (unsigned int i = 0; i < nAgents; ++i) { line++; tempStr = getLine(inStream); if (inStream.eof()) { throw ChemicalReactionParserException("premature EOF hit."); } if (tempStr.substr(0, 4) != "$MOL") { throw ChemicalReactionParserException("$MOL header not found"); } // we don't sanitize or remove Hs from agents MolFileParserParams agentParams; agentParams.sanitize = false; agentParams.removeHs = false; agentParams.strictParsing = params.strictParsing; ROMol *agent; try { agent = MolFromMolDataStream(inStream, line, agentParams).release(); } catch (FileParseException &e) { std::ostringstream errout; errout << "Cannot parse agent " << i << ". The error was:\n\t" << e.what(); throw ChemicalReactionParserException(errout.str()); } rxn->addAgentTemplate(ROMOL_SPTR(agent)); } } void ParseV3000RxnBlock(std::istream &inStream, unsigned int &line, const MolFileParserParams ¶ms, std::unique_ptr &rxn) { std::string tempStr; // skip the header block: line++; tempStr = getLine(inStream); line++; tempStr = getLine(inStream); line++; tempStr = getLine(inStream); line++; tempStr = FileParserUtils::getV3000Line(&inStream, line); boost::to_upper(tempStr); tempStr = boost::trim_copy(tempStr); std::vector tokens; boost::split(tokens, tempStr, boost::is_any_of(" \t"), boost::token_compress_on); if (tokens.size() < 3 || tokens[0] != "COUNTS") { throw ChemicalReactionParserException("bad counts line"); } auto nReacts = FileParserUtils::stripSpacesAndCast(tokens[1]); auto nProds = FileParserUtils::stripSpacesAndCast(tokens[2]); unsigned int nAgents = 0; if (tokens.size() > 3) { nAgents = FileParserUtils::stripSpacesAndCast(tokens[3]); } tempStr = FileParserUtils::getV3000Line(&inStream, line); boost::to_upper(tempStr); if (tempStr.length() < 14 || tempStr.substr(0, 14) != "BEGIN REACTANT") { throw FileParseException("BEGIN REACTANT line not found"); } rxn.reset(new ChemicalReaction()); const bool expectMEND = false; for (unsigned int i = 0; i < nReacts; ++i) { RWMol *react; unsigned int natoms, nbonds; bool chiralityPossible = false; Conformer *conf = nullptr; react = new RWMol(); try { FileParserUtils::ParseV3000CTAB(&inStream, line, react, conf, chiralityPossible, natoms, nbonds, params.strictParsing, expectMEND); FileParserUtils::finishMolProcessing(react, chiralityPossible, params); } catch (FileParseException &e) { std::ostringstream errout; errout << "Cannot parse reactant " << i << ". The error was:\n\t" << e.what(); delete react; throw ChemicalReactionParserException(errout.str()); } if (!react) { throw ChemicalReactionParserException("Null reactant in reaction file."); } rxn->addReactantTemplate(ROMOL_SPTR(dynamic_cast(react))); } tempStr = FileParserUtils::getV3000Line(&inStream, line); boost::to_upper(tempStr); if (tempStr.length() < 12 || tempStr.substr(0, 12) != "END REACTANT") { throw FileParseException("END REACTANT line not found"); } tempStr = FileParserUtils::getV3000Line(&inStream, line); boost::to_upper(tempStr); if (tempStr.length() < 13 || tempStr.substr(0, 13) != "BEGIN PRODUCT") { throw FileParseException("BEGIN PRODUCT line not found"); } for (unsigned int i = 0; i < nProds; ++i) { RWMol *prod; unsigned int natoms, nbonds; bool chiralityPossible = false; Conformer *conf = nullptr; prod = new RWMol(); try { FileParserUtils::ParseV3000CTAB(&inStream, line, prod, conf, chiralityPossible, natoms, nbonds, params.strictParsing, expectMEND); FileParserUtils::finishMolProcessing(prod, chiralityPossible, params); } catch (FileParseException &e) { std::ostringstream errout; errout << "Cannot parse product " << i << ". The error was:\n\t" << e.what(); delete prod; throw ChemicalReactionParserException(errout.str()); } if (!prod) { throw ChemicalReactionParserException("Null product in reaction file."); } rxn->addProductTemplate(ROMOL_SPTR(dynamic_cast(prod))); } tempStr = FileParserUtils::getV3000Line(&inStream, line); boost::to_upper(tempStr); if (tempStr.length() < 11 || tempStr.substr(0, 11) != "END PRODUCT") { throw FileParseException("END PRODUCT line not found"); } if (nAgents) { tempStr = FileParserUtils::getV3000Line(&inStream, line); boost::to_upper(tempStr); if (tempStr.length() < 11 || tempStr.substr(0, 11) != "BEGIN AGENT") { throw FileParseException("BEGIN AGENT line not found"); } } for (unsigned int i = 0; i < nAgents; ++i) { RWMol *agent; unsigned int natoms, nbonds; bool chiralityPossible = false; Conformer *conf = nullptr; agent = new RWMol(); try { FileParserUtils::ParseV3000CTAB(&inStream, line, agent, conf, chiralityPossible, natoms, nbonds, true, false); FileParserUtils::finishMolProcessing(agent, chiralityPossible, params); } catch (FileParseException &e) { std::ostringstream errout; errout << "Cannot parse agent " << i << ". The error was:\n\t" << e.what(); delete agent; throw ChemicalReactionParserException(errout.str()); } rxn->addAgentTemplate(ROMOL_SPTR(dynamic_cast(agent))); } if (nAgents) { tempStr = FileParserUtils::getV3000Line(&inStream, line); boost::to_upper(tempStr); if (tempStr.length() < 9 || tempStr.substr(0, 9) != "END AGENT") { throw FileParseException("END AGENT line not found"); } } } } // namespace namespace v2 { namespace ReactionParser { //! Parse a text stream in MDL rxn format into a ChemicalReaction std::unique_ptr ReactionFromRxnDataStream( std::istream &inStream, unsigned int &line, const MolFileParserParams ¶ms) { std::string tempStr; // header line line++; tempStr = getLine(inStream); if (inStream.eof()) { throw ChemicalReactionParserException("premature EOF hit."); } if (tempStr.substr(0, 4) != "$RXN") { throw ChemicalReactionParserException("$RXN header not found"); } int version = 2000; if (tempStr.size() >= 10 && tempStr.substr(5, 5) == "V3000") { version = 3000; } std::unique_ptr res; try { if (version == 2000) { ParseV2000RxnBlock(inStream, line, params, res); } else { ParseV3000RxnBlock(inStream, line, params, res); } } catch (ChemicalReactionParserException &e) { // catch our exceptions and throw them back after cleanup res.reset(); throw e; } // convert atoms to queries: for (MOL_SPTR_VECT::const_iterator iter = res->beginReactantTemplates(); iter != res->endReactantTemplates(); ++iter) { // to write the mol block, we need ring information: for (auto atom : (*iter)->atoms()) { QueryOps::replaceAtomWithQueryAtom((RWMol *)iter->get(), atom); } } for (MOL_SPTR_VECT::const_iterator iter = res->beginProductTemplates(); iter != res->endProductTemplates(); ++iter) { // to write the mol block, we need ring information: for (auto atom : (*iter)->atoms()) { QueryOps::replaceAtomWithQueryAtom((RWMol *)iter->get(), atom); } } updateProductsStereochem(res.get()); // RXN-based reactions do not have implicit properties res->setImplicitPropertiesFlag(false); return res; }; std::unique_ptr ReactionFromRxnBlock( const std::string &rxnBlock, const MolFileParserParams ¶ms) { std::istringstream inStream(rxnBlock); unsigned int line = 0; return ReactionFromRxnDataStream(inStream, line, params); }; std::unique_ptr ReactionFromRxnFile( const std::string &fName, const MolFileParserParams ¶ms) { std::ifstream inStream(fName.c_str()); if (!inStream || inStream.eof()) { return nullptr; } unsigned int line = 0; return ReactionFromRxnDataStream(inStream, line, params); }; } // namespace ReactionParser } // namespace v2 } // namespace RDKit