* Fixes #5819
still some things to think about there

* test parsing

* make sure the fix for 5819 also shows up in reactions

* patch some other holes
This commit is contained in:
Greg Landrum
2023-11-22 17:35:58 +01:00
committed by GitHub
parent 16d2842f08
commit a2ecd6723c
7 changed files with 147 additions and 22 deletions

View File

@@ -103,14 +103,18 @@ std::string chemicalReactionToRxnToString(const RDKit::ChemicalReaction &rxn,
}
void write_template(std::ostringstream &res, RDKit::ROMol &tpl) {
if (tpl.needsUpdatePropertyCache()) {
tpl.updatePropertyCache(false);
RDKit::RWMol trwmol(tpl);
if (trwmol.needsUpdatePropertyCache()) {
trwmol.updatePropertyCache(false);
}
// to write the mol block, we need ring information:
if (!tpl.getRingInfo()->isInitialized()) {
RDKit::MolOps::findSSSR(tpl);
if (!trwmol.getRingInfo()->isInitialized()) {
RDKit::MolOps::findSSSR(trwmol);
}
res << RDKit::FileParserUtils::getV3000CTAB(tpl, -1);
RDKit::FileParserUtils::moveAdditionalPropertiesToSGroups(trwmol);
res << RDKit::FileParserUtils::getV3000CTAB(trwmol, -1);
}
} // namespace

View File

@@ -1584,4 +1584,75 @@ TEST_CASE("problematic in-place example from MolStandardize") {
rxn->runReactant(*m);
CHECK(MolToSmiles(*m)=="CN(C)c1cc[nH+]cc1");
}
}
TEST_CASE(
"github #5819: Support writing detailed SMARTS queries to CTABs using the SMARTSQ mechanism") {
SECTION("as reported") {
auto rxn = "[C;$(C([#6])=O):1][OH:2]>>[C:1][NH2:2]"_rxnsmarts;
REQUIRE(rxn);
auto ctab = ChemicalReactionToV3KRxnBlock(*rxn);
// make sure the templates haven't been modified by that operation
CHECK(getSubstanceGroups(*rxn->getReactants()[0]).empty());
CHECK(ctab.find("SMARTSQ") != std::string::npos);
CHECK(ctab.find("FIELDDATA=\"[C&$(C([#6])=O):1]\"") != std::string::npos);
CHECK(ctab.find("FIELDDATA=\"[N&H2:2]\"") != std::string::npos);
// make sure we can properly parse that
std::unique_ptr<ChemicalReaction> nrxn{RxnBlockToChemicalReaction(ctab)};
REQUIRE(nrxn);
CHECK(MolToSmarts(*nrxn->getReactants()[0]) == "[C&$(C([#6])=O):1][O&H1:2]");
}
SECTION("isotopes are weird"){
auto rxnb = R"CTAB($RXN V3000
Mrv2211 111620231722
M V30 COUNTS 1 1
M V30 BEGIN REACTANT
M V30 BEGIN CTAB
M V30 COUNTS 4 3 0 0 0
M V30 BEGIN ATOM
M V30 1 C 6.9035 -10.0923 0 1
M V30 2 O 6.9035 -8.5923 0 2 MASS=18
M V30 3 Cl 5.6045 -10.8423 0 0
M V30 4 C 8.2025 -10.8423 0 4
M V30 END ATOM
M V30 BEGIN BOND
M V30 1 1 1 4
M V30 2 2 1 2
M V30 3 1 1 3
M V30 END BOND
M V30 END CTAB
M V30 END REACTANT
M V30 BEGIN PRODUCT
M V30 BEGIN CTAB
M V30 COUNTS 4 3 0 0 0
M V30 BEGIN ATOM
M V30 1 C 22.0416 -9.3829 0 0
M V30 2 C 23.3406 -8.6329 0 1
M V30 3 O 23.3406 -7.1329 0 2 MASS=18
M V30 4 C 24.6396 -9.3829 0 4
M V30 END ATOM
M V30 BEGIN BOND
M V30 1 1 1 2
M V30 2 1 2 4
M V30 3 2 2 3
M V30 END BOND
M V30 END CTAB
M V30 END PRODUCT
M END
)CTAB";
std::unique_ptr<ChemicalReaction> rxn{RxnBlockToChemicalReaction(rxnb)};
REQUIRE(rxn);
CHECK(rxn->getReactants()[0]->getAtomWithIdx(0)->hasQuery());
CHECK(rxn->getReactants()[0]->getAtomWithIdx(1)->hasQuery());
auto ctab = ChemicalReactionToV3KRxnBlock(*rxn);
CHECK(ctab.find("SMARTSQ") == std::string::npos);
}
}

View File

@@ -250,6 +250,8 @@ inline void createAtomStringPropertyList(
missingValueMarker, lineSize));
}
RDKIT_FILEPARSERS_EXPORT void moveAdditionalPropertiesToSGroups(RWMol &mol);
} // namespace FileParserUtils
} // namespace RDKit

View File

@@ -1541,7 +1541,6 @@ Atom *ParseMolFileAtomLine(const std::string_view text, RDGeom::Point3D &pos,
<< " has a negative isotope offset. line: " << line << std::endl;
}
res->setIsotope(dIso);
res->setProp(common_properties::_hasMassQuery, true);
}
if (text.size() >= 42 && text.substr(39, 3) != " 0") {

View File

@@ -1,5 +1,5 @@
//
// Copyright (C) 2003-2021 Greg Landrum and other RDKit contributors
// Copyright (C) 2003-2023 Greg Landrum and other RDKit contributors
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
@@ -1037,28 +1037,55 @@ int BondStereoCodeV2000ToV3000(int dirCode) {
namespace {
void createSMARTSQSubstanceGroups(ROMol &mol) {
auto isRedundantQuery = [](const auto query) {
if (query->getDescription() == "AtomAnd" &&
(query->endChildren() - query->beginChildren() == 2) &&
(*query->beginChildren())->getDescription() == "AtomAtomicNum" &&
!(*query->beginChildren())->getNegation() &&
!(*(query->beginChildren() + 1))->getNegation() &&
((*(query->beginChildren() + 1))->getDescription() == "AtomIsotope" ||
(*(query->beginChildren() + 1))->getDescription() ==
"AtomFormalCharge")) {
return true;
}
return false;
};
for (const auto atom : mol.atoms()) {
std::string sma;
if (atom->hasQuery() &&
atom->getPropIfPresent(common_properties::MRV_SMA, sma) &&
!sma.empty()) {
SubstanceGroup sg(&mol, "DAT");
sg.setProp("QUERYTYPE", "SMARTSQ");
sg.setProp("QUERYOP", "=");
std::vector<std::string> dataFields{sma};
sg.setProp("DATAFIELDS", dataFields);
sg.addAtomWithIdx(atom->getIdx());
addSubstanceGroup(mol, sg);
if (atom->hasQuery()) {
std::string sma;
if (!atom->getPropIfPresent(common_properties::MRV_SMA, sma) &&
!isAtomListQuery(atom) &&
atom->getQuery()->getDescription() != "AtomNull" &&
// we may want to re-think this next one.
// including AtomType queries will result in an entry
// for every atom that comes from SMARTS, and I don't think
// we want that.
!boost::starts_with(atom->getQuery()->getDescription(), "AtomType") &&
!boost::starts_with(atom->getQuery()->getDescription(),
"AtomAtomicNum") &&
!isRedundantQuery(atom->getQuery())) {
sma = SmartsWrite::GetAtomSmarts(static_cast<const QueryAtom *>(atom));
}
if (!sma.empty()) {
SubstanceGroup sg(&mol, "DAT");
sg.setProp("QUERYTYPE", "SMARTSQ");
sg.setProp("QUERYOP", "=");
std::vector<std::string> dataFields{sma};
sg.setProp("DATAFIELDS", dataFields);
sg.addAtomWithIdx(atom->getIdx());
addSubstanceGroup(mol, sg);
}
}
}
}
} // namespace
namespace FileParserUtils {
void moveAdditionalPropertiesToSGroups(RWMol &mol) {
GenericGroups::convertGenericQueriesToSubstanceGroups(mol);
createSMARTSQSubstanceGroups(mol);
}
} // namespace FileParserUtils
const std::string GetV3000MolFileBondLine(const Bond *bond,
const INT_MAP_INT &wedgeBonds,
const Conformer *conf) {
@@ -1374,7 +1401,7 @@ std::string MolToMolBlock(const ROMol &mol, bool includeStereo, int confId,
MolOps::assignStereochemistry(trwmol);
}
#endif
moveAdditionalPropertiesToSGroups(trwmol);
FileParserUtils::moveAdditionalPropertiesToSGroups(trwmol);
try {
return outputMolToMolBlock(trwmol, confId, forceV3000);

View File

@@ -6804,4 +6804,27 @@ TEST_CASE(
REQUIRE(bond2->getBondType() == Bond::DOUBLE);
CHECK(bond2->getStereo() == Bond::BondStereo::STEREONONE);
CHECK(bond2->getBondDir() == Bond::BondDir::NONE);
}
TEST_CASE(
"github #5819: Support writing detailed SMARTS queries to CTABs using the SMARTSQ mechanism") {
SECTION("as reported") {
auto m = "[C,N,O]C[#6]*[$(C(=O)O)]"_smarts;
REQUIRE(m);
auto ctab = MolToV3KMolBlock(*m);
// std::cerr << ctab << std::endl;
// make sure we still do list queries correctly
CHECK(ctab.find("V30 1 [C,N,O]") != std::string::npos);
CHECK(ctab.find("FIELDDATA=\"[C,N,O]") == std::string::npos);
CHECK(ctab.find("FIELDDATA=\"C\"") == std::string::npos);
CHECK(ctab.find("FIELDDATA=\"[#6]\"") == std::string::npos);
CHECK(ctab.find("FIELDDATA=\"*\"") == std::string::npos);
CHECK(ctab.find("SMARTSQ") != std::string::npos);
CHECK(ctab.find("[$(C(=O)O)]") != std::string::npos);
// make sure we can properly parse that
std::unique_ptr<RWMol> nm{MolBlockToMol(ctab)};
REQUIRE(nm);
CHECK(MolToSmarts(*nm) == "[#6,#7,#8][#6][#6]*[$(C(=O)O)]");
}
}

View File

@@ -192,7 +192,6 @@ class MarvinCMLReader {
if (marvinAtom->isotope != 0) {
res->setIsotope(marvinAtom->isotope);
res->setProp(common_properties::_hasMassQuery, true);
}
// mrvValence and hydrogenCount both set the number of explicit hydrogens