diff --git a/Code/GraphMol/ChemReactions/ReactionWriter.cpp b/Code/GraphMol/ChemReactions/ReactionWriter.cpp index 9e150487f..21317021c 100644 --- a/Code/GraphMol/ChemReactions/ReactionWriter.cpp +++ b/Code/GraphMol/ChemReactions/ReactionWriter.cpp @@ -103,14 +103,18 @@ std::string chemicalReactionToRxnToString(const RDKit::ChemicalReaction &rxn, } void write_template(std::ostringstream &res, RDKit::ROMol &tpl) { - if (tpl.needsUpdatePropertyCache()) { - tpl.updatePropertyCache(false); + RDKit::RWMol trwmol(tpl); + + if (trwmol.needsUpdatePropertyCache()) { + trwmol.updatePropertyCache(false); } // to write the mol block, we need ring information: - if (!tpl.getRingInfo()->isInitialized()) { - RDKit::MolOps::findSSSR(tpl); + if (!trwmol.getRingInfo()->isInitialized()) { + RDKit::MolOps::findSSSR(trwmol); } - res << RDKit::FileParserUtils::getV3000CTAB(tpl, -1); + RDKit::FileParserUtils::moveAdditionalPropertiesToSGroups(trwmol); + + res << RDKit::FileParserUtils::getV3000CTAB(trwmol, -1); } } // namespace diff --git a/Code/GraphMol/ChemReactions/catch_tests.cpp b/Code/GraphMol/ChemReactions/catch_tests.cpp index 38f22642d..c7809c5f2 100644 --- a/Code/GraphMol/ChemReactions/catch_tests.cpp +++ b/Code/GraphMol/ChemReactions/catch_tests.cpp @@ -1584,4 +1584,75 @@ TEST_CASE("problematic in-place example from MolStandardize") { rxn->runReactant(*m); CHECK(MolToSmiles(*m)=="CN(C)c1cc[nH+]cc1"); } +} + +TEST_CASE( + "github #5819: Support writing detailed SMARTS queries to CTABs using the SMARTSQ mechanism") { + SECTION("as reported") { + auto rxn = "[C;$(C([#6])=O):1][OH:2]>>[C:1][NH2:2]"_rxnsmarts; + REQUIRE(rxn); + auto ctab = ChemicalReactionToV3KRxnBlock(*rxn); + + // make sure the templates haven't been modified by that operation + CHECK(getSubstanceGroups(*rxn->getReactants()[0]).empty()); + + CHECK(ctab.find("SMARTSQ") != std::string::npos); + CHECK(ctab.find("FIELDDATA=\"[C&$(C([#6])=O):1]\"") != std::string::npos); + CHECK(ctab.find("FIELDDATA=\"[N&H2:2]\"") != std::string::npos); + + // make sure we can properly parse that + std::unique_ptr nrxn{RxnBlockToChemicalReaction(ctab)}; + REQUIRE(nrxn); + CHECK(MolToSmarts(*nrxn->getReactants()[0]) == "[C&$(C([#6])=O):1][O&H1:2]"); + } + + +SECTION("isotopes are weird"){ + auto rxnb = R"CTAB($RXN V3000 + + Mrv2211 111620231722 + +M V30 COUNTS 1 1 +M V30 BEGIN REACTANT +M V30 BEGIN CTAB +M V30 COUNTS 4 3 0 0 0 +M V30 BEGIN ATOM +M V30 1 C 6.9035 -10.0923 0 1 +M V30 2 O 6.9035 -8.5923 0 2 MASS=18 +M V30 3 Cl 5.6045 -10.8423 0 0 +M V30 4 C 8.2025 -10.8423 0 4 +M V30 END ATOM +M V30 BEGIN BOND +M V30 1 1 1 4 +M V30 2 2 1 2 +M V30 3 1 1 3 +M V30 END BOND +M V30 END CTAB +M V30 END REACTANT +M V30 BEGIN PRODUCT +M V30 BEGIN CTAB +M V30 COUNTS 4 3 0 0 0 +M V30 BEGIN ATOM +M V30 1 C 22.0416 -9.3829 0 0 +M V30 2 C 23.3406 -8.6329 0 1 +M V30 3 O 23.3406 -7.1329 0 2 MASS=18 +M V30 4 C 24.6396 -9.3829 0 4 +M V30 END ATOM +M V30 BEGIN BOND +M V30 1 1 1 2 +M V30 2 1 2 4 +M V30 3 2 2 3 +M V30 END BOND +M V30 END CTAB +M V30 END PRODUCT +M END +)CTAB"; + std::unique_ptr rxn{RxnBlockToChemicalReaction(rxnb)}; + REQUIRE(rxn); + CHECK(rxn->getReactants()[0]->getAtomWithIdx(0)->hasQuery()); + CHECK(rxn->getReactants()[0]->getAtomWithIdx(1)->hasQuery()); + auto ctab = ChemicalReactionToV3KRxnBlock(*rxn); + CHECK(ctab.find("SMARTSQ") == std::string::npos); + +} } \ No newline at end of file diff --git a/Code/GraphMol/FileParsers/FileParserUtils.h b/Code/GraphMol/FileParsers/FileParserUtils.h index dd19dcadd..529089741 100644 --- a/Code/GraphMol/FileParsers/FileParserUtils.h +++ b/Code/GraphMol/FileParsers/FileParserUtils.h @@ -250,6 +250,8 @@ inline void createAtomStringPropertyList( missingValueMarker, lineSize)); } +RDKIT_FILEPARSERS_EXPORT void moveAdditionalPropertiesToSGroups(RWMol &mol); + } // namespace FileParserUtils } // namespace RDKit diff --git a/Code/GraphMol/FileParsers/MolFileParser.cpp b/Code/GraphMol/FileParsers/MolFileParser.cpp index 296acc7ac..6f8eeace2 100644 --- a/Code/GraphMol/FileParsers/MolFileParser.cpp +++ b/Code/GraphMol/FileParsers/MolFileParser.cpp @@ -1541,7 +1541,6 @@ Atom *ParseMolFileAtomLine(const std::string_view text, RDGeom::Point3D &pos, << " has a negative isotope offset. line: " << line << std::endl; } res->setIsotope(dIso); - res->setProp(common_properties::_hasMassQuery, true); } if (text.size() >= 42 && text.substr(39, 3) != " 0") { diff --git a/Code/GraphMol/FileParsers/MolFileWriter.cpp b/Code/GraphMol/FileParsers/MolFileWriter.cpp index c0d29d80d..579e82363 100644 --- a/Code/GraphMol/FileParsers/MolFileWriter.cpp +++ b/Code/GraphMol/FileParsers/MolFileWriter.cpp @@ -1,5 +1,5 @@ // -// Copyright (C) 2003-2021 Greg Landrum and other RDKit contributors +// Copyright (C) 2003-2023 Greg Landrum and other RDKit contributors // // @@ All Rights Reserved @@ // This file is part of the RDKit. @@ -1037,28 +1037,55 @@ int BondStereoCodeV2000ToV3000(int dirCode) { namespace { void createSMARTSQSubstanceGroups(ROMol &mol) { + auto isRedundantQuery = [](const auto query) { + if (query->getDescription() == "AtomAnd" && + (query->endChildren() - query->beginChildren() == 2) && + (*query->beginChildren())->getDescription() == "AtomAtomicNum" && + !(*query->beginChildren())->getNegation() && + !(*(query->beginChildren() + 1))->getNegation() && + ((*(query->beginChildren() + 1))->getDescription() == "AtomIsotope" || + (*(query->beginChildren() + 1))->getDescription() == + "AtomFormalCharge")) { + return true; + } + return false; + }; for (const auto atom : mol.atoms()) { - std::string sma; - if (atom->hasQuery() && - atom->getPropIfPresent(common_properties::MRV_SMA, sma) && - !sma.empty()) { - SubstanceGroup sg(&mol, "DAT"); - sg.setProp("QUERYTYPE", "SMARTSQ"); - sg.setProp("QUERYOP", "="); - std::vector dataFields{sma}; - sg.setProp("DATAFIELDS", dataFields); - sg.addAtomWithIdx(atom->getIdx()); - addSubstanceGroup(mol, sg); + if (atom->hasQuery()) { + std::string sma; + + if (!atom->getPropIfPresent(common_properties::MRV_SMA, sma) && + !isAtomListQuery(atom) && + atom->getQuery()->getDescription() != "AtomNull" && + // we may want to re-think this next one. + // including AtomType queries will result in an entry + // for every atom that comes from SMARTS, and I don't think + // we want that. + !boost::starts_with(atom->getQuery()->getDescription(), "AtomType") && + !boost::starts_with(atom->getQuery()->getDescription(), + "AtomAtomicNum") && + !isRedundantQuery(atom->getQuery())) { + sma = SmartsWrite::GetAtomSmarts(static_cast(atom)); + } + if (!sma.empty()) { + SubstanceGroup sg(&mol, "DAT"); + sg.setProp("QUERYTYPE", "SMARTSQ"); + sg.setProp("QUERYOP", "="); + std::vector dataFields{sma}; + sg.setProp("DATAFIELDS", dataFields); + sg.addAtomWithIdx(atom->getIdx()); + addSubstanceGroup(mol, sg); + } } } } } // namespace - +namespace FileParserUtils { void moveAdditionalPropertiesToSGroups(RWMol &mol) { GenericGroups::convertGenericQueriesToSubstanceGroups(mol); createSMARTSQSubstanceGroups(mol); } - +} // namespace FileParserUtils const std::string GetV3000MolFileBondLine(const Bond *bond, const INT_MAP_INT &wedgeBonds, const Conformer *conf) { @@ -1374,7 +1401,7 @@ std::string MolToMolBlock(const ROMol &mol, bool includeStereo, int confId, MolOps::assignStereochemistry(trwmol); } #endif - moveAdditionalPropertiesToSGroups(trwmol); + FileParserUtils::moveAdditionalPropertiesToSGroups(trwmol); try { return outputMolToMolBlock(trwmol, confId, forceV3000); diff --git a/Code/GraphMol/FileParsers/file_parsers_catch.cpp b/Code/GraphMol/FileParsers/file_parsers_catch.cpp index 0f4e44140..b46fe9e46 100644 --- a/Code/GraphMol/FileParsers/file_parsers_catch.cpp +++ b/Code/GraphMol/FileParsers/file_parsers_catch.cpp @@ -6804,4 +6804,27 @@ TEST_CASE( REQUIRE(bond2->getBondType() == Bond::DOUBLE); CHECK(bond2->getStereo() == Bond::BondStereo::STEREONONE); CHECK(bond2->getBondDir() == Bond::BondDir::NONE); +} + +TEST_CASE( + "github #5819: Support writing detailed SMARTS queries to CTABs using the SMARTSQ mechanism") { + SECTION("as reported") { + auto m = "[C,N,O]C[#6]*[$(C(=O)O)]"_smarts; + REQUIRE(m); + auto ctab = MolToV3KMolBlock(*m); + // std::cerr << ctab << std::endl; + // make sure we still do list queries correctly + CHECK(ctab.find("V30 1 [C,N,O]") != std::string::npos); + CHECK(ctab.find("FIELDDATA=\"[C,N,O]") == std::string::npos); + CHECK(ctab.find("FIELDDATA=\"C\"") == std::string::npos); + CHECK(ctab.find("FIELDDATA=\"[#6]\"") == std::string::npos); + CHECK(ctab.find("FIELDDATA=\"*\"") == std::string::npos); + CHECK(ctab.find("SMARTSQ") != std::string::npos); + CHECK(ctab.find("[$(C(=O)O)]") != std::string::npos); + + // make sure we can properly parse that + std::unique_ptr nm{MolBlockToMol(ctab)}; + REQUIRE(nm); + CHECK(MolToSmarts(*nm) == "[#6,#7,#8][#6][#6]*[$(C(=O)O)]"); + } } \ No newline at end of file diff --git a/Code/GraphMol/MarvinParse/MarvinParser.cpp b/Code/GraphMol/MarvinParse/MarvinParser.cpp index f5eb1f2cc..ead8127f0 100644 --- a/Code/GraphMol/MarvinParse/MarvinParser.cpp +++ b/Code/GraphMol/MarvinParse/MarvinParser.cpp @@ -192,7 +192,6 @@ class MarvinCMLReader { if (marvinAtom->isotope != 0) { res->setIsotope(marvinAtom->isotope); - res->setProp(common_properties::_hasMassQuery, true); } // mrvValence and hydrogenCount both set the number of explicit hydrogens