mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-03 21:44:30 +08:00
* Fixes #5819 still some things to think about there * test parsing * make sure the fix for 5819 also shows up in reactions * patch some other holes
This commit is contained in:
@@ -103,14 +103,18 @@ std::string chemicalReactionToRxnToString(const RDKit::ChemicalReaction &rxn,
|
||||
}
|
||||
|
||||
void write_template(std::ostringstream &res, RDKit::ROMol &tpl) {
|
||||
if (tpl.needsUpdatePropertyCache()) {
|
||||
tpl.updatePropertyCache(false);
|
||||
RDKit::RWMol trwmol(tpl);
|
||||
|
||||
if (trwmol.needsUpdatePropertyCache()) {
|
||||
trwmol.updatePropertyCache(false);
|
||||
}
|
||||
// to write the mol block, we need ring information:
|
||||
if (!tpl.getRingInfo()->isInitialized()) {
|
||||
RDKit::MolOps::findSSSR(tpl);
|
||||
if (!trwmol.getRingInfo()->isInitialized()) {
|
||||
RDKit::MolOps::findSSSR(trwmol);
|
||||
}
|
||||
res << RDKit::FileParserUtils::getV3000CTAB(tpl, -1);
|
||||
RDKit::FileParserUtils::moveAdditionalPropertiesToSGroups(trwmol);
|
||||
|
||||
res << RDKit::FileParserUtils::getV3000CTAB(trwmol, -1);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
@@ -1584,4 +1584,75 @@ TEST_CASE("problematic in-place example from MolStandardize") {
|
||||
rxn->runReactant(*m);
|
||||
CHECK(MolToSmiles(*m)=="CN(C)c1cc[nH+]cc1");
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE(
|
||||
"github #5819: Support writing detailed SMARTS queries to CTABs using the SMARTSQ mechanism") {
|
||||
SECTION("as reported") {
|
||||
auto rxn = "[C;$(C([#6])=O):1][OH:2]>>[C:1][NH2:2]"_rxnsmarts;
|
||||
REQUIRE(rxn);
|
||||
auto ctab = ChemicalReactionToV3KRxnBlock(*rxn);
|
||||
|
||||
// make sure the templates haven't been modified by that operation
|
||||
CHECK(getSubstanceGroups(*rxn->getReactants()[0]).empty());
|
||||
|
||||
CHECK(ctab.find("SMARTSQ") != std::string::npos);
|
||||
CHECK(ctab.find("FIELDDATA=\"[C&$(C([#6])=O):1]\"") != std::string::npos);
|
||||
CHECK(ctab.find("FIELDDATA=\"[N&H2:2]\"") != std::string::npos);
|
||||
|
||||
// make sure we can properly parse that
|
||||
std::unique_ptr<ChemicalReaction> nrxn{RxnBlockToChemicalReaction(ctab)};
|
||||
REQUIRE(nrxn);
|
||||
CHECK(MolToSmarts(*nrxn->getReactants()[0]) == "[C&$(C([#6])=O):1][O&H1:2]");
|
||||
}
|
||||
|
||||
|
||||
SECTION("isotopes are weird"){
|
||||
auto rxnb = R"CTAB($RXN V3000
|
||||
|
||||
Mrv2211 111620231722
|
||||
|
||||
M V30 COUNTS 1 1
|
||||
M V30 BEGIN REACTANT
|
||||
M V30 BEGIN CTAB
|
||||
M V30 COUNTS 4 3 0 0 0
|
||||
M V30 BEGIN ATOM
|
||||
M V30 1 C 6.9035 -10.0923 0 1
|
||||
M V30 2 O 6.9035 -8.5923 0 2 MASS=18
|
||||
M V30 3 Cl 5.6045 -10.8423 0 0
|
||||
M V30 4 C 8.2025 -10.8423 0 4
|
||||
M V30 END ATOM
|
||||
M V30 BEGIN BOND
|
||||
M V30 1 1 1 4
|
||||
M V30 2 2 1 2
|
||||
M V30 3 1 1 3
|
||||
M V30 END BOND
|
||||
M V30 END CTAB
|
||||
M V30 END REACTANT
|
||||
M V30 BEGIN PRODUCT
|
||||
M V30 BEGIN CTAB
|
||||
M V30 COUNTS 4 3 0 0 0
|
||||
M V30 BEGIN ATOM
|
||||
M V30 1 C 22.0416 -9.3829 0 0
|
||||
M V30 2 C 23.3406 -8.6329 0 1
|
||||
M V30 3 O 23.3406 -7.1329 0 2 MASS=18
|
||||
M V30 4 C 24.6396 -9.3829 0 4
|
||||
M V30 END ATOM
|
||||
M V30 BEGIN BOND
|
||||
M V30 1 1 1 2
|
||||
M V30 2 1 2 4
|
||||
M V30 3 2 2 3
|
||||
M V30 END BOND
|
||||
M V30 END CTAB
|
||||
M V30 END PRODUCT
|
||||
M END
|
||||
)CTAB";
|
||||
std::unique_ptr<ChemicalReaction> rxn{RxnBlockToChemicalReaction(rxnb)};
|
||||
REQUIRE(rxn);
|
||||
CHECK(rxn->getReactants()[0]->getAtomWithIdx(0)->hasQuery());
|
||||
CHECK(rxn->getReactants()[0]->getAtomWithIdx(1)->hasQuery());
|
||||
auto ctab = ChemicalReactionToV3KRxnBlock(*rxn);
|
||||
CHECK(ctab.find("SMARTSQ") == std::string::npos);
|
||||
|
||||
}
|
||||
}
|
||||
@@ -250,6 +250,8 @@ inline void createAtomStringPropertyList(
|
||||
missingValueMarker, lineSize));
|
||||
}
|
||||
|
||||
RDKIT_FILEPARSERS_EXPORT void moveAdditionalPropertiesToSGroups(RWMol &mol);
|
||||
|
||||
} // namespace FileParserUtils
|
||||
} // namespace RDKit
|
||||
|
||||
|
||||
@@ -1541,7 +1541,6 @@ Atom *ParseMolFileAtomLine(const std::string_view text, RDGeom::Point3D &pos,
|
||||
<< " has a negative isotope offset. line: " << line << std::endl;
|
||||
}
|
||||
res->setIsotope(dIso);
|
||||
res->setProp(common_properties::_hasMassQuery, true);
|
||||
}
|
||||
|
||||
if (text.size() >= 42 && text.substr(39, 3) != " 0") {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
//
|
||||
// Copyright (C) 2003-2021 Greg Landrum and other RDKit contributors
|
||||
// Copyright (C) 2003-2023 Greg Landrum and other RDKit contributors
|
||||
//
|
||||
// @@ All Rights Reserved @@
|
||||
// This file is part of the RDKit.
|
||||
@@ -1037,28 +1037,55 @@ int BondStereoCodeV2000ToV3000(int dirCode) {
|
||||
|
||||
namespace {
|
||||
void createSMARTSQSubstanceGroups(ROMol &mol) {
|
||||
auto isRedundantQuery = [](const auto query) {
|
||||
if (query->getDescription() == "AtomAnd" &&
|
||||
(query->endChildren() - query->beginChildren() == 2) &&
|
||||
(*query->beginChildren())->getDescription() == "AtomAtomicNum" &&
|
||||
!(*query->beginChildren())->getNegation() &&
|
||||
!(*(query->beginChildren() + 1))->getNegation() &&
|
||||
((*(query->beginChildren() + 1))->getDescription() == "AtomIsotope" ||
|
||||
(*(query->beginChildren() + 1))->getDescription() ==
|
||||
"AtomFormalCharge")) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
};
|
||||
for (const auto atom : mol.atoms()) {
|
||||
std::string sma;
|
||||
if (atom->hasQuery() &&
|
||||
atom->getPropIfPresent(common_properties::MRV_SMA, sma) &&
|
||||
!sma.empty()) {
|
||||
SubstanceGroup sg(&mol, "DAT");
|
||||
sg.setProp("QUERYTYPE", "SMARTSQ");
|
||||
sg.setProp("QUERYOP", "=");
|
||||
std::vector<std::string> dataFields{sma};
|
||||
sg.setProp("DATAFIELDS", dataFields);
|
||||
sg.addAtomWithIdx(atom->getIdx());
|
||||
addSubstanceGroup(mol, sg);
|
||||
if (atom->hasQuery()) {
|
||||
std::string sma;
|
||||
|
||||
if (!atom->getPropIfPresent(common_properties::MRV_SMA, sma) &&
|
||||
!isAtomListQuery(atom) &&
|
||||
atom->getQuery()->getDescription() != "AtomNull" &&
|
||||
// we may want to re-think this next one.
|
||||
// including AtomType queries will result in an entry
|
||||
// for every atom that comes from SMARTS, and I don't think
|
||||
// we want that.
|
||||
!boost::starts_with(atom->getQuery()->getDescription(), "AtomType") &&
|
||||
!boost::starts_with(atom->getQuery()->getDescription(),
|
||||
"AtomAtomicNum") &&
|
||||
!isRedundantQuery(atom->getQuery())) {
|
||||
sma = SmartsWrite::GetAtomSmarts(static_cast<const QueryAtom *>(atom));
|
||||
}
|
||||
if (!sma.empty()) {
|
||||
SubstanceGroup sg(&mol, "DAT");
|
||||
sg.setProp("QUERYTYPE", "SMARTSQ");
|
||||
sg.setProp("QUERYOP", "=");
|
||||
std::vector<std::string> dataFields{sma};
|
||||
sg.setProp("DATAFIELDS", dataFields);
|
||||
sg.addAtomWithIdx(atom->getIdx());
|
||||
addSubstanceGroup(mol, sg);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
namespace FileParserUtils {
|
||||
void moveAdditionalPropertiesToSGroups(RWMol &mol) {
|
||||
GenericGroups::convertGenericQueriesToSubstanceGroups(mol);
|
||||
createSMARTSQSubstanceGroups(mol);
|
||||
}
|
||||
|
||||
} // namespace FileParserUtils
|
||||
const std::string GetV3000MolFileBondLine(const Bond *bond,
|
||||
const INT_MAP_INT &wedgeBonds,
|
||||
const Conformer *conf) {
|
||||
@@ -1374,7 +1401,7 @@ std::string MolToMolBlock(const ROMol &mol, bool includeStereo, int confId,
|
||||
MolOps::assignStereochemistry(trwmol);
|
||||
}
|
||||
#endif
|
||||
moveAdditionalPropertiesToSGroups(trwmol);
|
||||
FileParserUtils::moveAdditionalPropertiesToSGroups(trwmol);
|
||||
|
||||
try {
|
||||
return outputMolToMolBlock(trwmol, confId, forceV3000);
|
||||
|
||||
@@ -6804,4 +6804,27 @@ TEST_CASE(
|
||||
REQUIRE(bond2->getBondType() == Bond::DOUBLE);
|
||||
CHECK(bond2->getStereo() == Bond::BondStereo::STEREONONE);
|
||||
CHECK(bond2->getBondDir() == Bond::BondDir::NONE);
|
||||
}
|
||||
|
||||
TEST_CASE(
|
||||
"github #5819: Support writing detailed SMARTS queries to CTABs using the SMARTSQ mechanism") {
|
||||
SECTION("as reported") {
|
||||
auto m = "[C,N,O]C[#6]*[$(C(=O)O)]"_smarts;
|
||||
REQUIRE(m);
|
||||
auto ctab = MolToV3KMolBlock(*m);
|
||||
// std::cerr << ctab << std::endl;
|
||||
// make sure we still do list queries correctly
|
||||
CHECK(ctab.find("V30 1 [C,N,O]") != std::string::npos);
|
||||
CHECK(ctab.find("FIELDDATA=\"[C,N,O]") == std::string::npos);
|
||||
CHECK(ctab.find("FIELDDATA=\"C\"") == std::string::npos);
|
||||
CHECK(ctab.find("FIELDDATA=\"[#6]\"") == std::string::npos);
|
||||
CHECK(ctab.find("FIELDDATA=\"*\"") == std::string::npos);
|
||||
CHECK(ctab.find("SMARTSQ") != std::string::npos);
|
||||
CHECK(ctab.find("[$(C(=O)O)]") != std::string::npos);
|
||||
|
||||
// make sure we can properly parse that
|
||||
std::unique_ptr<RWMol> nm{MolBlockToMol(ctab)};
|
||||
REQUIRE(nm);
|
||||
CHECK(MolToSmarts(*nm) == "[#6,#7,#8][#6][#6]*[$(C(=O)O)]");
|
||||
}
|
||||
}
|
||||
@@ -192,7 +192,6 @@ class MarvinCMLReader {
|
||||
|
||||
if (marvinAtom->isotope != 0) {
|
||||
res->setIsotope(marvinAtom->isotope);
|
||||
res->setProp(common_properties::_hasMassQuery, true);
|
||||
}
|
||||
|
||||
// mrvValence and hydrogenCount both set the number of explicit hydrogens
|
||||
|
||||
Reference in New Issue
Block a user