Allow fragments to be grouped in cdxml (#7529)

* Allow fragments to be groups in CDXML

* Add support for grouped reactants

* run clang-format

* Change github issue to 7528

* Add documents to the code

* response to review, check grouped reactants in cdxml against rxn file

* Remove unused code

* Add missing file

---------

Co-authored-by: Brian Kelley <bkelley@relaytx.com>
This commit is contained in:
Brian Kelley
2024-06-23 01:02:19 -04:00
committed by GitHub
parent 21df90304a
commit 2b99ee477c
7 changed files with 1907 additions and 189 deletions

View File

@@ -8,6 +8,7 @@
// of the RDKit source tree.
//
#include <GraphMol/QueryOps.h>
#include <GraphMol/ChemTransforms/ChemTransforms.h>
#include <GraphMol/ChemReactions/Reaction.h>
#include <GraphMol/ChemReactions/ReactionParser.h>
#include <GraphMol/FileParsers/FileParsers.h>
@@ -34,6 +35,17 @@ void make_query_atoms(RWMol &mol) {
QueryOps::replaceAtomWithQueryAtom(&mol, atom);
}
}
void add_template(const std::string &prop, std::map<int, ROMOL_SPTR> &templates,
std::unique_ptr<RWMol> &mol) {
auto reactant_idx = mol->getProp<int>(prop);
if (templates.find(reactant_idx) != templates.end()) {
templates[reactant_idx] =
ROMOL_SPTR(combineMols(*templates[reactant_idx], *mol));
} else {
templates[reactant_idx] = ROMOL_SPTR(std::move(mol));
}
}
} // namespace
//! Parse a text stream with CDXML data into a ChemicalReaction
@@ -46,6 +58,10 @@ CDXMLDataStreamToChemicalReactions(std::istream &inStream, bool sanitize,
std::map<std::pair<unsigned int, unsigned int>, std::vector<unsigned int>>
schemes;
std::set<unsigned int> used;
std::map<int, ROMOL_SPTR> reactant_templates;
std::map<int, ROMOL_SPTR> product_templates;
std::map<int, ROMOL_SPTR> agent_templates;
for (size_t i = 0; i < mols.size(); ++i) {
unsigned int step = 0;
unsigned int scheme = 0;
@@ -69,17 +85,26 @@ CDXMLDataStreamToChemicalReactions(std::istream &inStream, bool sanitize,
if (mols[idx]->hasProp("CDX_REAGENT_ID")) {
used.insert(idx);
make_query_atoms(*mols[idx]);
res->addReactantTemplate(ROMOL_SPTR(std::move(mols[idx])));
add_template("CDX_REAGENT_ID", reactant_templates, mols[idx]);
} else if (mols[idx]->hasProp("CDX_AGENT_ID")) {
used.insert(idx);
make_query_atoms(*mols[idx]);
res->addAgentTemplate(ROMOL_SPTR(std::move(mols[idx])));
add_template("CDX_AGENT_ID", agent_templates, mols[idx]);
} else if (mols[idx]->hasProp("CDX_PRODUCT_ID")) {
used.insert(idx);
make_query_atoms(*mols[idx]);
res->addProductTemplate(ROMOL_SPTR(std::move(mols[idx])));
add_template("CDX_PRODUCT_ID", product_templates, mols[idx]);
}
}
for (auto reactant : reactant_templates) {
res->addReactantTemplate(reactant.second);
}
for (auto reactant : agent_templates) {
res->addAgentTemplate(reactant.second);
}
for (auto reactant : product_templates) {
res->addProductTemplate(reactant.second);
}
updateProductsStereochem(res);
// CDXML-based reactions do not have implicit properties
res->setImplicitPropertiesFlag(false);

View File

@@ -1306,6 +1306,83 @@ TEST_CASE("CDXML Parser") {
smarts ==
"[#6&D2:2]1:[#6&D2:3]:[#6&D2:4]:[#6&D3:1](:[#6&D2:5]:[#6&D2:6]:1)-[#17&D1].[#6&D3](-[#5&D2]-[#6&D3:7]1:[#6&D2:8]:[#6&D2:9]:[#6&D2:10]:[#6&D2:11]:[#6&D2:12]:1)(-[#8&D1])-[#8&D1]>>[#6&D2:1]1:[#6&D2:5]:[#6&D3:6](:[#6&D2:2]:[#6&D2:3]:[#6&D2:4]:1)-[#6&D3:7]1:[#6&D2:8]:[#6&D2:9]:[#6&D2:10]:[#6&D2:11]:[#6&D2:12]:1");
}
SECTION("Github #7528 CDXML Grouped Agents in Reactions") {
// The failing case had fragments grouped with labels, ensure the grouped cersion and the ungrouped
// versions have the same results
auto fname = cdxmlbase + "github7467-grouped-fragments.cdxml";
auto rxns = CDXMLFileToChemicalReactions(fname);
CHECK(rxns.size() == 1);
fname = cdxmlbase + "github7467-ungrouped-fragments.cdxml";
auto rxns2 = CDXMLFileToChemicalReactions(fname);
CHECK(ChemicalReactionToRxnSmarts(*rxns[0]) == ChemicalReactionToRxnSmarts(*rxns2[0]));
// Check to see if our understanding of grouped reagents in reactions is correct
fname = cdxmlbase + "reaction-with-grouped-templates.cdxml";
auto rxns3 = CDXMLFileToChemicalReactions(fname);
CHECK(rxns3.size() == 1);
std::string rxnb = R"RXN($RXN
Mrv2004 062120241319
2 0
$MOL
Mrv2004 06212413192D
5 5 0 0 0 0 999 V2000
2.6221 -4.6475 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
2.6221 -5.4725 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
3.4070 -5.7274 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
3.8918 -5.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
3.4070 -4.3926 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
1 2 2 0 0 0 0
2 3 1 0 0 0 0
3 4 2 0 0 0 0
4 5 1 0 0 0 0
5 1 1 0 0 0 0
M END
$MOL
Mrv2004 06212413192D
11 11 0 0 0 0 999 V2000
6.9305 -4.5100 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
6.9305 -5.3350 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
7.6450 -5.7475 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
8.3594 -5.3350 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
8.3594 -4.5100 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
7.6450 -4.0975 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
8.6171 -4.4825 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
8.6171 -5.3075 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
9.4020 -5.5624 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
9.8868 -4.8950 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
9.4020 -4.2276 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
1 2 1 0 0 0 0
6 1 1 0 0 0 0
2 3 1 0 0 0 0
3 4 1 0 0 0 0
4 5 1 0 0 0 0
5 6 1 0 0 0 0
7 8 2 0 0 0 0
11 7 1 0 0 0 0
8 9 1 0 0 0 0
9 10 2 0 0 0 0
10 11 1 0 0 0 0
M END
)RXN";
std::unique_ptr<ChemicalReaction> rxn_mb{RxnBlockToChemicalReaction(rxnb)};
// CDXMLToReaction is sanitized by default, this might be a mistake...
unsigned int failed;
RxnOps::sanitizeRxn(
*rxn_mb, failed,
RxnOps::SANITIZE_ADJUST_REACTANTS | RxnOps::SANITIZE_ADJUST_PRODUCTS,
RxnOps::MatchOnlyAtRgroupsAdjustParams());
CHECK(rxns3[0]->getNumReactantTemplates() == rxn_mb->getNumReactantTemplates());
CHECK(ChemicalReactionToRxnSmarts(*rxns3[0]) == ChemicalReactionToRxnSmarts(*rxn_mb));
}
}
TEST_CASE("Github #5785: separateAgents ignored for V3000 RXN files") {
@@ -1800,17 +1877,26 @@ TEST_CASE("sanitizeRxnAsMols") {
TEST_CASE("Github #7372: SMILES output option to disable dative bonds") {
SECTION("basics") {
auto rxn = "[C:1]-[C:2].[NH3:3]->[Fe:4]-[NH2:5]>>[C:1]=[C:2].[NH3:3]->[Fe:4]-[NH2:5]"_rxnsmarts;
auto rxn =
"[C:1]-[C:2].[NH3:3]->[Fe:4]-[NH2:5]>>[C:1]=[C:2].[NH3:3]->[Fe:4]-[NH2:5]"_rxnsmarts;
REQUIRE(rxn);
auto smi = ChemicalReactionToRxnSmiles(*rxn);
CHECK(smi == "[CH3:1][CH3:2].[NH3:3]->[Fe:4][NH2:5]>>[CH2:1]=[CH2:2].[NH3:3]->[Fe:4][NH2:5]");
CHECK(
smi ==
"[CH3:1][CH3:2].[NH3:3]->[Fe:4][NH2:5]>>[CH2:1]=[CH2:2].[NH3:3]->[Fe:4][NH2:5]");
smi = ChemicalReactionToRxnSmarts(*rxn);
CHECK(smi == "[C:1]-[C:2].[N&H3:3]->[#26:4]-[N&H2:5]>>[C:1]=[C:2].[N&H3:3]->[#26:4]-[N&H2:5]");
CHECK(
smi ==
"[C:1]-[C:2].[N&H3:3]->[#26:4]-[N&H2:5]>>[C:1]=[C:2].[N&H3:3]->[#26:4]-[N&H2:5]");
SmilesWriteParams ps;
ps.includeDativeBonds = false;
smi = ChemicalReactionToRxnSmiles(*rxn,ps);
CHECK(smi == "[CH3:1][CH3:2].[NH3:3][Fe:4][NH2:5]>>[CH2:1]=[CH2:2].[NH3:3][Fe:4][NH2:5]");
smi = ChemicalReactionToRxnSmarts(*rxn,ps);
CHECK(smi == "[C:1]-[C:2].[N&H3:3]-[#26:4]-[N&H2:5]>>[C:1]=[C:2].[N&H3:3]-[#26:4]-[N&H2:5]");
smi = ChemicalReactionToRxnSmiles(*rxn, ps);
CHECK(
smi ==
"[CH3:1][CH3:2].[NH3:3][Fe:4][NH2:5]>>[CH2:1]=[CH2:2].[NH3:3][Fe:4][NH2:5]");
smi = ChemicalReactionToRxnSmarts(*rxn, ps);
CHECK(
smi ==
"[C:1]-[C:2].[N&H3:3]-[#26:4]-[N&H2:5]>>[C:1]=[C:2].[N&H3:3]-[#26:4]-[N&H2:5]");
}
}
}

View File

@@ -29,6 +29,7 @@ namespace RDKit {
namespace {
const std::string NEEDS_FUSE("CDXML_NEEDS_FUSE");
const std::string CDXML_FRAG_ID("CDXML_FRAG_ID");
const std::string CDXML_GROUP_ID("CDXML_GROUP_ID");
const std::string FUSE_LABEL("CDXML_NODE_ID");
const std::string CDX_SCHEME_ID("CDX_SCHEME_ID");
const std::string CDX_STEP_ID("CDX_STEP_ID");
@@ -495,28 +496,200 @@ bool parse_fragment(RWMol &mol, ptree &frag,
return !skip_fragment;
}
void set_reaction_data(std::string type, std::string prop, SchemeInfo &scheme,
const std::vector<unsigned int> &frag_ids,
const std::map<unsigned int, size_t> &fragments,
const std::vector<std::unique_ptr<RWMol>> &mols) {
void set_reaction_data(
std::string type, std::string prop, SchemeInfo &scheme,
const std::vector<unsigned int> &frag_ids,
const std::map<unsigned int, size_t> &fragments,
std::map<unsigned int, std::vector<int>> &grouped_fragments,
const std::vector<std::unique_ptr<RWMol>> &mols) {
unsigned int reagent_idx = 0;
for (auto idx : frag_ids) {
auto iter = fragments.find(idx);
if (iter == fragments.end()) {
auto iter = grouped_fragments.find(idx);
if (iter == grouped_fragments.end()) {
BOOST_LOG(rdWarningLog)
<< "CDXMLParser: Schema " << scheme.scheme_id << " step "
<< scheme.step_id << " " << type << " fragment " << idx
<< scheme.step_id << " " << type << " reaction fragment " << idx
<< " not found in document." << std::endl;
continue;
}
if (iter->second >= mols.size()) {
// shouldn't get here
continue;
for (auto reaction_fragment_id : iter->second) {
auto fragment = fragments.find(reaction_fragment_id);
if (fragment == fragments.end()) {
BOOST_LOG(rdWarningLog)
<< "CDXMLParser: Schema " << scheme.scheme_id << " step "
<< scheme.step_id << " " << type << " fragment " << idx
<< " not found in document." << std::endl;
continue;
}
auto &mol = mols[fragment->second];
mol->setProp(CDX_SCHEME_ID, scheme.scheme_id);
mol->setProp(CDX_STEP_ID, scheme.step_id);
mol->setProp(prop, reagent_idx);
}
reagent_idx += 1;
}
}
// The parsing of fragments needed to be moved to a recursive function since they may be
// embedded further in the documentation, i.e. a group may hold multiple fragments
//
// Additionally, a grouped_fragments map is included to group fragments together for the
// purposes of reactions.
//
// Ungrouped fragments will end up as vectors of size 1 in the grouped_fragement list.
// The reaction schemes in the CDXML docs appear to use the fragment id for ungrouped
// fragments and the grouped id for grouped fragments, so the grouped_fragments
// holds both for ease of bookkeeping.
template <class T>
void visit_children(T &node, std::map<unsigned int, Atom *> &ids,
std::vector<std::unique_ptr<RWMol>> &mols, // All molecules found in the doc
std::map<unsigned int, size_t> &fragment_lookup, // fragment.id->molecule index
std::map<unsigned int, std::vector<int>> &grouped_fragments, //grouped.id -> [fragment.id]
std::vector<SchemeInfo> &schemes, // reaction schemes found
int &missing_frag_id, // if we don't have a fragment id, start at -1 and decrement
double bondLength, // bond length of the document for assigning coordinates
const v2::CDXMLParser::CDXMLParserParams &params, // parser parameters
int group_id = -1) { // current group id for this set of subnodes
MolzipParams molzip_params;
molzip_params.label = MolzipLabel::AtomProperty;
molzip_params.atomProperty = FUSE_LABEL;
molzip_params.enforceValenceRules = false;
for (auto &frag : node.second) {
if (frag.first == "fragment") { // chemical matter
std::unique_ptr<RWMol> mol = std::make_unique<RWMol>();
if (!parse_fragment(*mol, frag.second, ids, missing_frag_id)) {
continue;
}
unsigned int frag_id = mol->getProp<int>(CDXML_FRAG_ID);
fragment_lookup[frag_id] = mols.size();
if (group_id != -1) {
grouped_fragments[group_id].push_back(frag_id);
} else {
grouped_fragments[frag_id].push_back(frag_id);
}
if (mol->hasProp(NEEDS_FUSE)) {
mol->clearProp(NEEDS_FUSE);
std::unique_ptr<ROMol> fused;
try {
fused = molzip(*mol, molzip_params);
} catch (Invar::Invariant &) {
BOOST_LOG(rdWarningLog) << "Failed fusion of fragment skipping... "
<< frag_id << std::endl;
// perhaps have an option to extract all fragments?
// mols.push_back(std::move(mol));
continue;
}
fused->setProp<int>(CDXML_FRAG_ID, static_cast<int>(frag_id));
mols.emplace_back(dynamic_cast<RWMol *>(fused.release()));
} else {
mols.push_back(std::move(mol));
}
RWMol *res = mols.back().get();
auto conf = std::make_unique<Conformer>(res->getNumAtoms());
conf->set3D(false);
bool hasConf = false;
for (auto &atm : res->atoms()) {
RDGeom::Point3D p{0.0, 0.0, 0.0};
if (atm->hasProp(CDX_ATOM_POS)) {
hasConf = true;
const std::vector<double> coord =
atm->getProp<std::vector<double>>(CDX_ATOM_POS);
if (coord.size() == 2) {
p.x = coord[0];
p.y = -1 * coord[1]; // CDXML uses an inverted coordinate
// system, so we need to reverse that
p.z = 0.0;
}
}
conf->setAtomPos(atm->getIdx(), p);
atm->clearProp(CDX_ATOM_POS);
}
if (hasConf) {
scaleBonds(*res, *conf, RDKIT_DEPICT_BONDLENGTH, bondLength);
auto confidx = res->addConformer(conf.release());
DetectAtomStereoChemistry(*res, &res->getConformer(confidx));
Atropisomers::detectAtropisomerChirality(*res,
&res->getConformer(confidx));
} else { // no Conformer
Atropisomers::detectAtropisomerChirality(*res, nullptr);
}
// now that atom stereochem has been perceived, the wedging
// information is no longer needed, so we clear
// single bond dir flags:
MolOps::clearSingleBondDirFlags(*res);
if (params.sanitize) {
try {
if (params.removeHs) {
// Bond stereo detection must happen before H removal, or
// else we might be removing stereogenic H atoms in double
// bonds (e.g. imines). But before we run stereo detection,
// we need to run mol cleanup so don't have trouble with
// e.g. nitro groups. Sadly, this a;; means we will find
// run both cleanup and ring finding twice (a fast find
// rings in bond stereo detection, and another in
// sanitization's SSSR symmetrization).
unsigned int failedOp = 0;
MolOps::sanitizeMol(*res, failedOp, MolOps::SANITIZE_CLEANUP);
MolOps::detectBondStereochemistry(*res);
MolOps::removeHs(*res, false, false);
} else {
MolOps::sanitizeMol(*res);
MolOps::detectBondStereochemistry(*res);
}
} catch (...) {
BOOST_LOG(rdWarningLog)
<< "CDXMLParser: failed sanitizing skipping fragment " << frag_id
<< std::endl;
mols.pop_back();
continue;
}
MolOps::assignStereochemistry(*res, true, true, true);
} else {
MolOps::detectBondStereochemistry(*res);
}
} else if (frag.first == "scheme") { // get the reaction info
int scheme_id = frag.second.template get<int>("<xmlattr>.id", -1);
for (auto &node : frag.second) {
if (node.first == "step") {
auto step_id = node.second.template get<int>("<xmlattr>.id", -1);
SchemeInfo scheme;
scheme.scheme_id = scheme_id;
scheme.step_id = step_id;
for (auto &attrib : node.second.get_child("<xmlattr>")) {
if (attrib.first == "ReactionStepProducts") {
scheme.ReactionStepProducts =
to_vec<unsigned int>(attrib.second.data());
} else if (attrib.first == "ReactionStepReactants") {
scheme.ReactionStepReactants =
to_vec<unsigned int>(attrib.second.data());
} else if (attrib.first == "ReactionStepObjectsAboveArrow") {
scheme.ReactionStepObjectsAboveArrow =
to_vec<unsigned int>(attrib.second.data());
} else if (attrib.first == "ReactionStepObjectsBelowArrow") {
scheme.ReactionStepObjectsBelowArrow =
to_vec<unsigned int>(attrib.second.data());
} else if (attrib.first == "ReactionStepAtomMap") {
scheme.ReactionStepAtomMap =
to_vec<unsigned int>(attrib.second.data());
}
}
schemes.push_back(std::move(scheme));
}
}
} else {
if (frag.first == "group") {
group_id = frag.second.template get<int>("<xmlattr>.id");
}
visit_children(frag, ids, mols, fragment_lookup, grouped_fragments,
schemes, missing_frag_id, bondLength, params, group_id);
}
auto &mol = mols[iter->second];
mol->setProp(CDX_SCHEME_ID, scheme.scheme_id);
mol->setProp(CDX_STEP_ID, scheme.step_id);
mol->setProp(prop, reagent_idx++);
}
}
} // namespace
@@ -541,151 +714,20 @@ std::vector<std::unique_ptr<RWMol>> MolsFromCDXMLDataStream(
throw FileParseException(e.what());
}
std::map<unsigned int, Atom *> ids;
std::vector<std::unique_ptr<RWMol>> mols;
std::map<unsigned int, size_t> fragment_lookup;
std::vector<SchemeInfo> schemes;
std::map<unsigned int, Atom *> ids; // atom.id to atom in fragment (used for linkages)
std::vector<std::unique_ptr<RWMol>> mols; // All molecules found in the doc
std::map<unsigned int, size_t> fragment_lookup; // fragment.id->molecule index
std::map<unsigned int, std::vector<int>> grouped_fragments; //grouped.id -> [fragment.id]
std::vector<SchemeInfo> schemes; // reaction schemes found
MolzipParams molzip_params;
molzip_params.label = MolzipLabel::AtomProperty;
molzip_params.atomProperty = FUSE_LABEL;
molzip_params.enforceValenceRules = false;
int missing_frag_id = -1;
for (auto &cdxml : pt) {
if (cdxml.first == "CDXML") {
double bondLength = cdxml.second.get<double>("<xmlattr>.BondLength");
for (auto &node : cdxml.second) {
if (node.first == "page") {
for (auto &frag : node.second) {
if (frag.first == "fragment") { // chemical matter
std::unique_ptr<RWMol> mol = std::make_unique<RWMol>();
if (!parse_fragment(*mol, frag.second, ids, missing_frag_id)) {
continue;
}
unsigned int frag_id = mol->getProp<int>(CDXML_FRAG_ID);
fragment_lookup[frag_id] = mols.size();
if (mol->hasProp(NEEDS_FUSE)) {
mol->clearProp(NEEDS_FUSE);
std::unique_ptr<ROMol> fused;
try {
fused = molzip(*mol, molzip_params);
} catch (Invar::Invariant &) {
BOOST_LOG(rdWarningLog)
<< "Failed fusion of fragment skipping... " << frag_id
<< std::endl;
// perhaps have an option to extract all fragments?
// mols.push_back(std::move(mol));
continue;
}
fused->setProp<int>(CDXML_FRAG_ID, static_cast<int>(frag_id));
mols.emplace_back(dynamic_cast<RWMol *>(fused.release()));
} else {
mols.push_back(std::move(mol));
}
RWMol *res = mols.back().get();
auto conf = std::make_unique<Conformer>(res->getNumAtoms());
conf->set3D(false);
bool hasConf = false;
for (auto &atm : res->atoms()) {
RDGeom::Point3D p{0.0, 0.0, 0.0};
if (atm->hasProp(CDX_ATOM_POS)) {
hasConf = true;
const std::vector<double> coord =
atm->getProp<std::vector<double>>(CDX_ATOM_POS);
if (coord.size() == 2) {
p.x = coord[0];
p.y = -1 * coord[1]; // CDXML uses an inverted coordinate
// system, so we need to reverse that
p.z = 0.0;
}
}
conf->setAtomPos(atm->getIdx(), p);
atm->clearProp(CDX_ATOM_POS);
}
if (hasConf) {
scaleBonds(*res, *conf, RDKIT_DEPICT_BONDLENGTH, bondLength);
auto confidx = res->addConformer(conf.release());
DetectAtomStereoChemistry(*res, &res->getConformer(confidx));
Atropisomers::detectAtropisomerChirality(
*res, &res->getConformer(confidx));
} else { // no Conformer
Atropisomers::detectAtropisomerChirality(*res, nullptr);
}
// now that atom stereochem has been perceived, the wedging
// information is no longer needed, so we clear
// single bond dir flags:
MolOps::clearSingleBondDirFlags(*res);
if (params.sanitize) {
try {
if (params.removeHs) {
// Bond stereo detection must happen before H removal, or
// else we might be removing stereogenic H atoms in double
// bonds (e.g. imines). But before we run stereo detection,
// we need to run mol cleanup so don't have trouble with
// e.g. nitro groups. Sadly, this a;; means we will find
// run both cleanup and ring finding twice (a fast find
// rings in bond stereo detection, and another in
// sanitization's SSSR symmetrization).
unsigned int failedOp = 0;
MolOps::sanitizeMol(*res, failedOp,
MolOps::SANITIZE_CLEANUP);
MolOps::detectBondStereochemistry(*res);
MolOps::removeHs(*res, false, false);
} else {
MolOps::sanitizeMol(*res);
MolOps::detectBondStereochemistry(*res);
}
} catch (...) {
BOOST_LOG(rdWarningLog)
<< "CDXMLParser: failed sanitizing skipping fragment "
<< frag_id << std::endl;
mols.pop_back();
continue;
}
MolOps::assignStereochemistry(*res, true, true, true);
} else {
MolOps::detectBondStereochemistry(*res);
}
} else if (frag.first == "scheme") { // get the reaction info
int scheme_id = frag.second.get<int>("<xmlattr>.id", -1);
for (auto &node : frag.second) {
if (node.first == "step") {
auto step_id = node.second.get<int>("<xmlattr>.id", -1);
SchemeInfo scheme;
scheme.scheme_id = scheme_id;
scheme.step_id = step_id;
for (auto &attrib : node.second.get_child("<xmlattr>")) {
if (attrib.first == "ReactionStepProducts") {
scheme.ReactionStepProducts =
to_vec<unsigned int>(attrib.second.data());
} else if (attrib.first == "ReactionStepReactants") {
scheme.ReactionStepReactants =
to_vec<unsigned int>(attrib.second.data());
} else if (attrib.first ==
"ReactionStepObjectsAboveArrow") {
scheme.ReactionStepObjectsAboveArrow =
to_vec<unsigned int>(attrib.second.data());
} else if (attrib.first ==
"ReactionStepObjectsBelowArrow") {
scheme.ReactionStepObjectsBelowArrow =
to_vec<unsigned int>(attrib.second.data());
} else if (attrib.first == "ReactionStepAtomMap") {
scheme.ReactionStepAtomMap =
to_vec<unsigned int>(attrib.second.data());
}
}
schemes.push_back(std::move(scheme));
}
}
}
}
visit_children(node, ids, mols, fragment_lookup, grouped_fragments,
schemes, missing_frag_id, bondLength, params);
}
}
}
@@ -708,15 +750,17 @@ std::vector<std::unique_ptr<RWMol>> MolsFromCDXMLDataStream(
for (auto &scheme : schemes) {
// Set the molecule properties
set_reaction_data("ReactionStepReactants", CDX_REAGENT_ID, scheme,
scheme.ReactionStepReactants, fragments, mols);
scheme.ReactionStepReactants, fragments,
grouped_fragments, mols);
set_reaction_data("ReactionStepProducts", CDX_PRODUCT_ID, scheme,
scheme.ReactionStepProducts, fragments, mols);
scheme.ReactionStepProducts, fragments,
grouped_fragments, mols);
auto agents = scheme.ReactionStepObjectsAboveArrow;
agents.insert(agents.end(),
scheme.ReactionStepObjectsBelowArrow.begin(),
scheme.ReactionStepObjectsBelowArrow.end());
set_reaction_data("ReactionStepAgents", CDX_AGENT_ID, scheme, agents,
fragments, mols);
fragments, grouped_fragments, mols);
// Set the Atom Maps
int sz = scheme.ReactionStepAtomMap.size();
if (sz % 2) {

View File

@@ -40,7 +40,6 @@ void check_smiles_and_roundtrip(const RWMol &m, const std::string &expected) {
TEST_CASE("CDXML") {
std::string cdxmlbase =
std::string(getenv("RDBASE")) + "/Code/GraphMol/test_data/CDXML/";
SECTION("SIMPLE") {
std::string cdxml1 = R"(<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE CDXML SYSTEM "http://www.cambridgesoft.com/xml/cdxml.dtd" >
@@ -423,29 +422,30 @@ TEST_CASE("CDXML") {
}
}
SECTION("Queries") {
{
auto fname = cdxmlbase + "query-atoms.cdxml";
std::vector<std::string> expected = {"*c1ccccc1", "*c1ccccc1", "*c1ccccc1"};
std::vector<std::string> expected_smarts = {
"[#6]1:[#6]:[#6]:[#6]:[#6]:[#6]:1-*",
"[#6]1:[#6]:[#6]:[#6]:[#6]:[#6]:1-[!#1]",
"[#6]1:[#6]:[#6]:[#6]:[#6]:[#6]:1-[!#6&!#1]"};
auto mols = MolsFromCDXMLFile(fname);
CHECK(mols.size() == expected.size());
int i = 0;
for (auto &mol : mols) {
CHECK(MolToSmarts(*mol) == expected_smarts[i]);
CHECK(MolToSmiles(*mol) == expected[i++]);
}
}
{
auto fname = cdxmlbase + "anybond.cdxml";
auto mols = MolsFromCDXMLFile(fname);
CHECK(mols.size() == 1);
CHECK(MolToSmiles(*mols[0]) == "C1CCC~CC1");
CHECK(MolToSmarts(*mols[0]) == "[#6]1~[#6]-[#6]-[#6]-[#6]-[#6]-1");
{
auto fname = cdxmlbase + "query-atoms.cdxml";
std::vector<std::string> expected = {"*c1ccccc1", "*c1ccccc1",
"*c1ccccc1"};
std::vector<std::string> expected_smarts = {
"[#6]1:[#6]:[#6]:[#6]:[#6]:[#6]:1-*",
"[#6]1:[#6]:[#6]:[#6]:[#6]:[#6]:1-[!#1]",
"[#6]1:[#6]:[#6]:[#6]:[#6]:[#6]:1-[!#6&!#1]"};
auto mols = MolsFromCDXMLFile(fname);
CHECK(mols.size() == expected.size());
int i = 0;
for (auto &mol : mols) {
CHECK(MolToSmarts(*mol) == expected_smarts[i]);
CHECK(MolToSmiles(*mol) == expected[i++]);
}
}
{
auto fname = cdxmlbase + "anybond.cdxml";
auto mols = MolsFromCDXMLFile(fname);
CHECK(mols.size() == 1);
CHECK(MolToSmiles(*mols[0]) == "C1CCC~CC1");
CHECK(MolToSmarts(*mols[0]) == "[#6]1~[#6]-[#6]-[#6]-[#6]-[#6]-1");
}
}
SECTION("ElementList") {
auto fname = cdxmlbase + "element-list.cdxml";
@@ -1175,3 +1175,15 @@ TEST_CASE("Github #6887: and1 or1 in same mol") {
"CO[C@H](C)C[C@H](Cl)C[C@H](C)Br |o1:5,o2:8,&1:2|");
}
}
TEST_CASE("Github #7528 - read fragments in groups") {
std::string cdxmlbase =
std::string(getenv("RDBASE")) + "/Code/GraphMol/test_data/CDXML/";
SECTION("case 1") {
auto fname = cdxmlbase + "github7467-grouped-fragments.cdxml";
CDXMLParserParams params;
params.sanitize = false;
auto mols = MolsFromCDXMLFile(fname, params);
REQUIRE(mols.size() == 2);
}
}

View File

@@ -0,0 +1,608 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE CDXML SYSTEM "http://www.cambridgesoft.com/xml/cdxml.dtd" >
<CDXML
CreationProgram="ChemDraw 21.0.0.28"
Name="github7467-grouped-fragments.cdxml"
BoundingBox="0.88 69.39 453.90 168.20"
WindowPosition="0 0"
WindowSize="0 0"
FractionalWidths="yes"
InterpretChemically="yes"
ShowAtomQuery="yes"
ShowAtomStereo="no"
ShowAtomEnhancedStereo="yes"
ShowAtomNumber="no"
ShowResidueID="no"
ShowBondQuery="yes"
ShowBondRxn="yes"
ShowBondStereo="no"
ShowTerminalCarbonLabels="no"
ShowNonTerminalCarbonLabels="no"
HideImplicitHydrogens="no"
Magnification="666"
LabelFont="58"
LabelSize="10"
LabelFace="96"
CaptionFont="58"
CaptionSize="10"
HashSpacing="2.50"
MarginWidth="1.60"
LineWidth="0.60"
BoldWidth="2"
BondLength="14.40"
BondSpacing="18"
ChainAngle="120"
LabelJustification="Auto"
CaptionJustification="Left"
AminoAcidTermini="HOH"
ShowSequenceTermini="yes"
ShowSequenceBonds="yes"
ShowSequenceUnlinkedBranches="no"
ResidueWrapCount="40"
ResidueBlockCount="10"
ResidueZigZag="yes"
NumberResidueBlocks="no"
PrintMargins="36 36 36 36"
MacPrintInfo="0003000000480048000000000300024CFFF4FFF4030C02580367052803FC0002000000480048000000000300024C000100000064000000010001010100000001270F000100010000000000000000000000000002001901900000000000400000000000000000000100000000000000000000000000000000"
ChemPropName=""
ChemPropFormula="Chemical Formula: "
ChemPropExactMass="Exact Mass: "
ChemPropMolWt="Molecular Weight: "
ChemPropMOverZ="m/z: "
ChemPropAnalysis="Elemental Analysis: "
ChemPropBoilingPt="Boiling Point: "
ChemPropMeltingPt="Melting Point: "
ChemPropCritTemp="Critical Temp: "
ChemPropCritPres="Critical Pres: "
ChemPropCritVol="Critical Vol: "
ChemPropGibbs="Gibbs Energy: "
ChemPropLogP="Log P: "
ChemPropMR="MR: "
ChemPropHenry="Henry&apos;s Law: "
ChemPropEForm="Heat of Form: "
ChemProptPSA="tPSA: "
ChemPropID=""
ChemPropFragmentLabel=""
color="0"
bgcolor="1"
RxnAutonumberStart="1"
RxnAutonumberConditions="no"
RxnAutonumberStyle="Roman"
RxnAutonumberFormat="(#)"
><colortable>
<color r="1" g="1" b="1"/>
<color r="0" g="0" b="0"/>
<color r="1" g="0" b="0"/>
<color r="1" g="1" b="0"/>
<color r="0" g="1" b="0"/>
<color r="0" g="1" b="1"/>
<color r="0" g="0" b="1"/>
<color r="1" g="0" b="1"/>
</colortable><fonttable>
<font id="21" charset="x-mac-roman" name="Helvetica"/>
<font id="58" charset="x-mac-roman" name="Arial"/>
</fonttable><page
id="82"
BoundingBox="0 0 776 240"
Width="776"
Height="240"
HeaderPosition="36"
FooterPosition="36"
PageOverlap="0"
PrintTrimMarks="yes"
HeightPages="1"
WidthPages="2"
DrawingSpace="poster"
><fragment
id="5"
BoundingBox="280.82 71.59 350.61 133.18"
Z="65"
><n
id="2"
p="333.97 109.58"
Z="63"
Element="14"
NumHydrogens="2"
NeedsClean="yes"
AS="N"
><t
p="330.64 113.26"
BoundingBox="331.10 105.90 350.61 115.56"
LabelJustification="Left"
LabelAlignment="Left"
><s font="21" size="10" color="0" face="96">SiH2</s></t></n><n
id="6"
p="323.79 119.77"
Z="64"
Warning="An atom in this label has an invalid valence."
Element="8"
NumHydrogens="0"
NeedsClean="yes"
AS="N"
><t
p="319.90 123.45"
BoundingBox="320.29 116.09 327.28 123.66"
LabelJustification="Left"
><s font="21" size="10" color="0" face="96">O</s></t></n><n
id="7"
p="309.39 119.77"
Z="66"
Element="14"
NumHydrogens="2"
NeedsClean="yes"
AS="N"
><t
p="310.50 123.45"
BoundingBox="291 116.09 309.82 125.75"
LabelJustification="Right"
Justification="Right"
LabelAlignment="Right"
><s font="21" size="10" color="0" face="96">SiH2</s></t></n><n
id="8"
p="299.21 109.58"
Z="67"
Warning="An atom in this label has an invalid valence."
Element="8"
NumHydrogens="0"
NeedsClean="yes"
AS="N"
><t
p="295.32 113.26"
BoundingBox="295.71 105.90 302.70 113.47"
LabelJustification="Left"
><s font="21" size="10" color="0" face="96">O</s></t></n><n
id="10"
p="299.21 95.18"
Z="68"
Element="14"
NumHydrogens="2"
NeedsClean="yes"
AS="N"
><t
p="300.32 98.86"
BoundingBox="280.82 91.50 299.64 101.16"
LabelJustification="Right"
Justification="Right"
LabelAlignment="Right"
><s font="21" size="10" color="0" face="96">SiH2</s></t></n><n
id="12"
p="309.39 85"
Z="69"
Warning="An atom in this label has an invalid valence."
Element="8"
NumHydrogens="0"
NeedsClean="yes"
AS="N"
><t
p="305.50 88.68"
BoundingBox="305.89 81.32 312.88 88.89"
LabelJustification="Left"
><s font="21" size="10" color="0" face="96">O</s></t></n><n
id="14"
p="323.79 85"
Z="70"
Element="14"
NumHydrogens="2"
NeedsClean="yes"
AS="N"
><t
p="320.46 88.68"
BoundingBox="320.92 81.32 340.43 90.98"
LabelJustification="Left"
LabelAlignment="Left"
><s font="21" size="10" color="0" face="96">SiH2</s></t></n><n
id="16"
p="333.97 95.18"
Z="71"
Warning="An atom in this label has an invalid valence."
Element="8"
NumHydrogens="0"
NeedsClean="yes"
AS="N"
><t
p="330.08 98.86"
BoundingBox="330.47 91.50 337.46 99.07"
LabelJustification="Left"
><s font="21" size="10" color="0" face="96">O</s></t></n><n
id="18"
p="347.28 89.67"
Z="72"
AS="N"
/><n
id="20"
p="303.88 71.70"
Z="73"
AS="N"
/><n
id="22"
p="285.91 115.09"
Z="74"
AS="N"
/><n
id="24"
p="329.30 133.07"
Z="75"
AS="N"
/><b
id="26"
Z="76"
B="2"
E="6"
BS="N"
/><b
id="27"
Z="77"
B="6"
E="7"
BS="N"
/><b
id="28"
Z="78"
B="7"
E="8"
BS="N"
/><b
id="29"
Z="79"
B="8"
E="10"
BS="N"
/><b
id="30"
Z="80"
B="10"
E="12"
BS="N"
/><b
id="31"
Z="81"
B="12"
E="14"
BS="N"
/><b
id="32"
Z="82"
B="14"
E="16"
BS="N"
/><b
id="33"
Z="83"
B="2"
E="16"
BS="N"
/><b
id="34"
Z="84"
B="16"
E="18"
BS="N"
/><b
id="35"
Z="85"
B="12"
E="20"
BS="N"
/><b
id="36"
Z="86"
B="8"
E="22"
BS="N"
/><b
id="37"
Z="87"
B="6"
E="24"
BS="N"
/></fragment><fragment
id="46"
BoundingBox="58.53 69.39 128.32 130.98"
Z="94"
><n
id="47"
p="111.68 107.38"
Z="92"
Element="14"
NumHydrogens="2"
NeedsClean="yes"
AS="N"
><t
p="108.35 111.06"
BoundingBox="108.81 103.70 128.32 113.36"
LabelJustification="Left"
LabelAlignment="Left"
><s font="21" size="10" color="0" face="96">SiH2</s></t></n><n
id="48"
p="101.50 117.57"
Z="93"
Warning="An atom in this label has an invalid valence."
Element="8"
NumHydrogens="0"
NeedsClean="yes"
AS="N"
><t
p="97.61 121.25"
BoundingBox="98 113.89 104.99 121.46"
LabelJustification="Left"
><s font="21" size="10" color="0" face="96">O</s></t></n><n
id="49"
p="87.10 117.57"
Z="95"
Element="14"
NumHydrogens="2"
NeedsClean="yes"
AS="N"
><t
p="88.21 121.25"
BoundingBox="68.71 113.89 87.53 123.55"
LabelJustification="Right"
Justification="Right"
LabelAlignment="Right"
><s font="21" size="10" color="0" face="96">SiH2</s></t></n><n
id="50"
p="76.92 107.38"
Z="96"
Warning="An atom in this label has an invalid valence."
Element="8"
NumHydrogens="0"
NeedsClean="yes"
AS="N"
><t
p="73.03 111.06"
BoundingBox="73.42 103.70 80.41 111.27"
LabelJustification="Left"
><s font="21" size="10" color="0" face="96">O</s></t></n><n
id="51"
p="76.92 92.98"
Z="97"
Element="14"
NumHydrogens="2"
NeedsClean="yes"
AS="N"
><t
p="78.03 96.66"
BoundingBox="58.53 89.30 77.35 98.96"
LabelJustification="Right"
Justification="Right"
LabelAlignment="Right"
><s font="21" size="10" color="0" face="96">SiH2</s></t></n><n
id="52"
p="87.10 82.80"
Z="98"
Warning="An atom in this label has an invalid valence."
Element="8"
NumHydrogens="0"
NeedsClean="yes"
AS="N"
><t
p="83.21 86.48"
BoundingBox="83.60 79.12 90.59 86.69"
LabelJustification="Left"
><s font="21" size="10" color="0" face="96">O</s></t></n><n
id="53"
p="101.50 82.80"
Z="99"
Element="14"
NumHydrogens="2"
NeedsClean="yes"
AS="N"
><t
p="98.17 86.48"
BoundingBox="98.63 79.12 118.14 88.78"
LabelJustification="Left"
LabelAlignment="Left"
><s font="21" size="10" color="0" face="96">SiH2</s></t></n><n
id="54"
p="111.68 92.98"
Z="100"
Warning="An atom in this label has an invalid valence."
Element="8"
NumHydrogens="0"
NeedsClean="yes"
AS="N"
><t
p="107.79 96.66"
BoundingBox="108.18 89.30 115.17 96.87"
LabelJustification="Left"
><s font="21" size="10" color="0" face="96">O</s></t></n><n
id="55"
p="124.99 87.47"
Z="101"
AS="N"
/><n
id="56"
p="81.59 69.50"
Z="102"
AS="N"
/><n
id="57"
p="63.62 112.89"
Z="103"
AS="N"
/><n
id="58"
p="107.01 130.87"
Z="104"
AS="N"
/><b
id="59"
Z="105"
B="47"
E="48"
BS="N"
/><b
id="60"
Z="106"
B="48"
E="49"
BS="N"
/><b
id="61"
Z="107"
B="49"
E="50"
BS="N"
/><b
id="62"
Z="108"
B="50"
E="51"
BS="N"
/><b
id="63"
Z="109"
B="51"
E="52"
BS="N"
/><b
id="64"
Z="110"
B="52"
E="53"
BS="N"
/><b
id="65"
Z="111"
B="53"
E="54"
BS="N"
/><b
id="66"
Z="112"
B="47"
E="54"
BS="N"
/><b
id="67"
Z="113"
B="54"
E="55"
BS="N"
/><b
id="68"
Z="114"
B="52"
E="56"
BS="N"
/><b
id="69"
Z="115"
B="50"
E="57"
BS="N"
/><b
id="70"
Z="116"
B="48"
E="58"
BS="N"
/></fragment><t
id="38"
p="315.48 166.12"
BoundingBox="223.17 158.75 407.58 168.20"
Z="88"
Warning="Chemical Interpretation is not possible for this label"
CaptionJustification="Center"
Justification="Center"
LineHeight="auto"
><s font="21" size="10" color="0">Caution: Valence appears to be exceeded</s></t><t
id="39"
p="317.48 146.19"
BoundingBox="236.34 139 398.49 148.29"
Z="89"
InterpretChemically="no"
CaptionJustification="Center"
Justification="Center"
LineHeight="auto"
><s font="58" size="10" color="0">2,4,6,8-tetramethylcyclotetrasiloxane</s></t><t
id="45"
p="201.42 121.13"
BoundingBox="201.98 115.24 206.70 119.97"
Z="91"
Warning="Chemical Interpretation is not possible for this label"
LineHeight="auto"
><s font="58" size="10" color="0">+</s></t><t
id="71"
p="93.19 163.92"
BoundingBox="0.88 156.55 185.29 166"
Z="117"
Warning="Chemical Interpretation is not possible for this label"
CaptionJustification="Center"
Justification="Center"
LineHeight="auto"
><s font="21" size="10" color="0">Caution: Valence appears to be exceeded</s></t><t
id="72"
p="95.19 143.99"
BoundingBox="14.05 136.80 176.20 146.09"
Z="118"
InterpretChemically="no"
CaptionJustification="Center"
Justification="Center"
LineHeight="auto"
><s font="58" size="10" color="0">2,4,6,8-tetramethylcyclotetrasiloxane</s></t><t
id="74"
p="93.40 61.84"
BoundingBox="93.40 61.84 93.45 61.89"
Z="120"
Visible="no"
IgnoreWarnings="yes"
InterpretChemically="no"
CaptionJustification="Center"
Justification="Center"
LineHeight="auto"
><s font="58" size="10" color="0"></s></t><t
id="76"
p="315.70 62.10"
BoundingBox="310.82 54.94 320.61 64.09"
Z="121"
Visible="no"
InterpretChemically="no"
CaptionJustification="Center"
Justification="Center"
LineHeight="auto"
><s font="58" size="10" color="0">[II]</s></t><graphic
id="80"
SupersededBy="83"
BoundingBox="453.90 119.54 423.70 119.54"
Z="62"
GraphicType="Line"
ArrowType="FullHead"
HeadSize="1500"
/><scheme
id="84"
><step
id="85"
ReactionStepReactants="46 5"
ReactionStepArrows="80"
/></scheme><chemicalproperty
id="75"
ChemicalPropertyDisplayID="74"
ChemicalPropertyIsActive="yes"
ChemicalPropertyType="24"
BasisObjects="47 48 46 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70"
/><chemicalproperty
id="77"
ChemicalPropertyDisplayID="76"
ChemicalPropertyIsActive="yes"
ChemicalPropertyType="24"
BasisObjects="2 6 5 7 8 10 12 14 16 18 20 22 24 26 27 28 29 30 31 32 33 34 35 36 37"
/><arrow
id="83"
BoundingBox="423.70 116.62 453.90 121.79"
Z="62"
FillType="None"
ArrowheadHead="Full"
ArrowheadType="Solid"
HeadSize="1500"
ArrowheadCenterSize="1313"
ArrowheadWidth="375"
Head3D="453.90 119.54 0"
Tail3D="423.70 119.54 0"
Center3D="641.05 179.38 0"
MajorAxisEnd3D="671.25 179.38 0"
MinorAxisEnd3D="641.05 209.58 0"
/><annotation
Keyword="Fragment Label"
Content="[I]"
/></page></CDXML>

View File

@@ -0,0 +1,608 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE CDXML SYSTEM "http://www.cambridgesoft.com/xml/cdxml.dtd" >
<CDXML
CreationProgram="ChemDraw 21.0.0.28"
Name="github7467-ungrouped-fragments.cdxml"
BoundingBox="0.88 69.39 453.90 168.20"
WindowPosition="0 0"
WindowSize="0 0"
FractionalWidths="yes"
InterpretChemically="yes"
ShowAtomQuery="yes"
ShowAtomStereo="no"
ShowAtomEnhancedStereo="yes"
ShowAtomNumber="no"
ShowResidueID="no"
ShowBondQuery="yes"
ShowBondRxn="yes"
ShowBondStereo="no"
ShowTerminalCarbonLabels="no"
ShowNonTerminalCarbonLabels="no"
HideImplicitHydrogens="no"
Magnification="666"
LabelFont="58"
LabelSize="10"
LabelFace="96"
CaptionFont="58"
CaptionSize="10"
HashSpacing="2.50"
MarginWidth="1.60"
LineWidth="0.60"
BoldWidth="2"
BondLength="14.40"
BondSpacing="18"
ChainAngle="120"
LabelJustification="Auto"
CaptionJustification="Left"
AminoAcidTermini="HOH"
ShowSequenceTermini="yes"
ShowSequenceBonds="yes"
ShowSequenceUnlinkedBranches="no"
ResidueWrapCount="40"
ResidueBlockCount="10"
ResidueZigZag="yes"
NumberResidueBlocks="no"
PrintMargins="36 36 36 36"
MacPrintInfo="0003000000480048000000000300024CFFF4FFF4030C02580367052803FC0002000000480048000000000300024C000100000064000000010001010100000001270F000100010000000000000000000000000002001901900000000000400000000000000000000100000000000000000000000000000000"
ChemPropName=""
ChemPropFormula="Chemical Formula: "
ChemPropExactMass="Exact Mass: "
ChemPropMolWt="Molecular Weight: "
ChemPropMOverZ="m/z: "
ChemPropAnalysis="Elemental Analysis: "
ChemPropBoilingPt="Boiling Point: "
ChemPropMeltingPt="Melting Point: "
ChemPropCritTemp="Critical Temp: "
ChemPropCritPres="Critical Pres: "
ChemPropCritVol="Critical Vol: "
ChemPropGibbs="Gibbs Energy: "
ChemPropLogP="Log P: "
ChemPropMR="MR: "
ChemPropHenry="Henry&apos;s Law: "
ChemPropEForm="Heat of Form: "
ChemProptPSA="tPSA: "
ChemPropID=""
ChemPropFragmentLabel=""
color="0"
bgcolor="1"
RxnAutonumberStart="1"
RxnAutonumberConditions="no"
RxnAutonumberStyle="Roman"
RxnAutonumberFormat="(#)"
><colortable>
<color r="1" g="1" b="1"/>
<color r="0" g="0" b="0"/>
<color r="1" g="0" b="0"/>
<color r="1" g="1" b="0"/>
<color r="0" g="1" b="0"/>
<color r="0" g="1" b="1"/>
<color r="0" g="0" b="1"/>
<color r="1" g="0" b="1"/>
</colortable><fonttable>
<font id="21" charset="x-mac-roman" name="Helvetica"/>
<font id="58" charset="x-mac-roman" name="Arial"/>
</fonttable><page
id="82"
BoundingBox="0 0 776 240"
Width="776"
Height="240"
HeaderPosition="36"
FooterPosition="36"
PageOverlap="0"
PrintTrimMarks="yes"
HeightPages="1"
WidthPages="2"
DrawingSpace="poster"
><fragment
id="5"
BoundingBox="280.82 71.59 350.61 133.18"
Z="65"
><n
id="2"
p="333.97 109.58"
Z="63"
Element="14"
NumHydrogens="2"
NeedsClean="yes"
AS="N"
><t
p="330.64 113.26"
BoundingBox="331.10 105.90 350.61 115.56"
LabelJustification="Left"
LabelAlignment="Left"
><s font="21" size="10" color="0" face="96">SiH2</s></t></n><n
id="6"
p="323.79 119.77"
Z="64"
Warning="An atom in this label has an invalid valence."
Element="8"
NumHydrogens="0"
NeedsClean="yes"
AS="N"
><t
p="319.90 123.45"
BoundingBox="320.29 116.09 327.28 123.66"
LabelJustification="Left"
><s font="21" size="10" color="0" face="96">O</s></t></n><n
id="7"
p="309.39 119.77"
Z="66"
Element="14"
NumHydrogens="2"
NeedsClean="yes"
AS="N"
><t
p="310.50 123.45"
BoundingBox="291 116.09 309.82 125.75"
LabelJustification="Right"
Justification="Right"
LabelAlignment="Right"
><s font="21" size="10" color="0" face="96">SiH2</s></t></n><n
id="8"
p="299.21 109.58"
Z="67"
Warning="An atom in this label has an invalid valence."
Element="8"
NumHydrogens="0"
NeedsClean="yes"
AS="N"
><t
p="295.32 113.26"
BoundingBox="295.71 105.90 302.70 113.47"
LabelJustification="Left"
><s font="21" size="10" color="0" face="96">O</s></t></n><n
id="10"
p="299.21 95.18"
Z="68"
Element="14"
NumHydrogens="2"
NeedsClean="yes"
AS="N"
><t
p="300.32 98.86"
BoundingBox="280.82 91.50 299.64 101.16"
LabelJustification="Right"
Justification="Right"
LabelAlignment="Right"
><s font="21" size="10" color="0" face="96">SiH2</s></t></n><n
id="12"
p="309.39 85"
Z="69"
Warning="An atom in this label has an invalid valence."
Element="8"
NumHydrogens="0"
NeedsClean="yes"
AS="N"
><t
p="305.50 88.68"
BoundingBox="305.89 81.32 312.88 88.89"
LabelJustification="Left"
><s font="21" size="10" color="0" face="96">O</s></t></n><n
id="14"
p="323.79 85"
Z="70"
Element="14"
NumHydrogens="2"
NeedsClean="yes"
AS="N"
><t
p="320.46 88.68"
BoundingBox="320.92 81.32 340.43 90.98"
LabelJustification="Left"
LabelAlignment="Left"
><s font="21" size="10" color="0" face="96">SiH2</s></t></n><n
id="16"
p="333.97 95.18"
Z="71"
Warning="An atom in this label has an invalid valence."
Element="8"
NumHydrogens="0"
NeedsClean="yes"
AS="N"
><t
p="330.08 98.86"
BoundingBox="330.47 91.50 337.46 99.07"
LabelJustification="Left"
><s font="21" size="10" color="0" face="96">O</s></t></n><n
id="18"
p="347.28 89.67"
Z="72"
AS="N"
/><n
id="20"
p="303.88 71.70"
Z="73"
AS="N"
/><n
id="22"
p="285.91 115.09"
Z="74"
AS="N"
/><n
id="24"
p="329.30 133.07"
Z="75"
AS="N"
/><b
id="26"
Z="76"
B="2"
E="6"
BS="N"
/><b
id="27"
Z="77"
B="6"
E="7"
BS="N"
/><b
id="28"
Z="78"
B="7"
E="8"
BS="N"
/><b
id="29"
Z="79"
B="8"
E="10"
BS="N"
/><b
id="30"
Z="80"
B="10"
E="12"
BS="N"
/><b
id="31"
Z="81"
B="12"
E="14"
BS="N"
/><b
id="32"
Z="82"
B="14"
E="16"
BS="N"
/><b
id="33"
Z="83"
B="2"
E="16"
BS="N"
/><b
id="34"
Z="84"
B="16"
E="18"
BS="N"
/><b
id="35"
Z="85"
B="12"
E="20"
BS="N"
/><b
id="36"
Z="86"
B="8"
E="22"
BS="N"
/><b
id="37"
Z="87"
B="6"
E="24"
BS="N"
/></fragment><fragment
id="46"
BoundingBox="58.53 69.39 128.32 130.98"
Z="94"
><n
id="47"
p="111.68 107.38"
Z="92"
Element="14"
NumHydrogens="2"
NeedsClean="yes"
AS="N"
><t
p="108.35 111.06"
BoundingBox="108.81 103.70 128.32 113.36"
LabelJustification="Left"
LabelAlignment="Left"
><s font="21" size="10" color="0" face="96">SiH2</s></t></n><n
id="48"
p="101.50 117.57"
Z="93"
Warning="An atom in this label has an invalid valence."
Element="8"
NumHydrogens="0"
NeedsClean="yes"
AS="N"
><t
p="97.61 121.25"
BoundingBox="98 113.89 104.99 121.46"
LabelJustification="Left"
><s font="21" size="10" color="0" face="96">O</s></t></n><n
id="49"
p="87.10 117.57"
Z="95"
Element="14"
NumHydrogens="2"
NeedsClean="yes"
AS="N"
><t
p="88.21 121.25"
BoundingBox="68.71 113.89 87.53 123.55"
LabelJustification="Right"
Justification="Right"
LabelAlignment="Right"
><s font="21" size="10" color="0" face="96">SiH2</s></t></n><n
id="50"
p="76.92 107.38"
Z="96"
Warning="An atom in this label has an invalid valence."
Element="8"
NumHydrogens="0"
NeedsClean="yes"
AS="N"
><t
p="73.03 111.06"
BoundingBox="73.42 103.70 80.41 111.27"
LabelJustification="Left"
><s font="21" size="10" color="0" face="96">O</s></t></n><n
id="51"
p="76.92 92.98"
Z="97"
Element="14"
NumHydrogens="2"
NeedsClean="yes"
AS="N"
><t
p="78.03 96.66"
BoundingBox="58.53 89.30 77.35 98.96"
LabelJustification="Right"
Justification="Right"
LabelAlignment="Right"
><s font="21" size="10" color="0" face="96">SiH2</s></t></n><n
id="52"
p="87.10 82.80"
Z="98"
Warning="An atom in this label has an invalid valence."
Element="8"
NumHydrogens="0"
NeedsClean="yes"
AS="N"
><t
p="83.21 86.48"
BoundingBox="83.60 79.12 90.59 86.69"
LabelJustification="Left"
><s font="21" size="10" color="0" face="96">O</s></t></n><n
id="53"
p="101.50 82.80"
Z="99"
Element="14"
NumHydrogens="2"
NeedsClean="yes"
AS="N"
><t
p="98.17 86.48"
BoundingBox="98.63 79.12 118.14 88.78"
LabelJustification="Left"
LabelAlignment="Left"
><s font="21" size="10" color="0" face="96">SiH2</s></t></n><n
id="54"
p="111.68 92.98"
Z="100"
Warning="An atom in this label has an invalid valence."
Element="8"
NumHydrogens="0"
NeedsClean="yes"
AS="N"
><t
p="107.79 96.66"
BoundingBox="108.18 89.30 115.17 96.87"
LabelJustification="Left"
><s font="21" size="10" color="0" face="96">O</s></t></n><n
id="55"
p="124.99 87.47"
Z="101"
AS="N"
/><n
id="56"
p="81.59 69.50"
Z="102"
AS="N"
/><n
id="57"
p="63.62 112.89"
Z="103"
AS="N"
/><n
id="58"
p="107.01 130.87"
Z="104"
AS="N"
/><b
id="59"
Z="105"
B="47"
E="48"
BS="N"
/><b
id="60"
Z="106"
B="48"
E="49"
BS="N"
/><b
id="61"
Z="107"
B="49"
E="50"
BS="N"
/><b
id="62"
Z="108"
B="50"
E="51"
BS="N"
/><b
id="63"
Z="109"
B="51"
E="52"
BS="N"
/><b
id="64"
Z="110"
B="52"
E="53"
BS="N"
/><b
id="65"
Z="111"
B="53"
E="54"
BS="N"
/><b
id="66"
Z="112"
B="47"
E="54"
BS="N"
/><b
id="67"
Z="113"
B="54"
E="55"
BS="N"
/><b
id="68"
Z="114"
B="52"
E="56"
BS="N"
/><b
id="69"
Z="115"
B="50"
E="57"
BS="N"
/><b
id="70"
Z="116"
B="48"
E="58"
BS="N"
/></fragment><t
id="38"
p="315.48 166.12"
BoundingBox="223.17 158.75 407.58 168.20"
Z="88"
Warning="Chemical Interpretation is not possible for this label"
CaptionJustification="Center"
Justification="Center"
LineHeight="auto"
><s font="21" size="10" color="0">Caution: Valence appears to be exceeded</s></t><t
id="39"
p="317.48 146.19"
BoundingBox="236.34 139 398.49 148.29"
Z="89"
InterpretChemically="no"
CaptionJustification="Center"
Justification="Center"
LineHeight="auto"
><s font="58" size="10" color="0">2,4,6,8-tetramethylcyclotetrasiloxane</s></t><t
id="45"
p="201.42 121.13"
BoundingBox="201.98 115.24 206.70 119.97"
Z="91"
Warning="Chemical Interpretation is not possible for this label"
LineHeight="auto"
><s font="58" size="10" color="0">+</s></t><t
id="71"
p="93.19 163.92"
BoundingBox="0.88 156.55 185.29 166"
Z="117"
Warning="Chemical Interpretation is not possible for this label"
CaptionJustification="Center"
Justification="Center"
LineHeight="auto"
><s font="21" size="10" color="0">Caution: Valence appears to be exceeded</s></t><t
id="72"
p="95.19 143.99"
BoundingBox="14.05 136.80 176.20 146.09"
Z="118"
InterpretChemically="no"
CaptionJustification="Center"
Justification="Center"
LineHeight="auto"
><s font="58" size="10" color="0">2,4,6,8-tetramethylcyclotetrasiloxane</s></t><t
id="74"
p="93.40 61.84"
BoundingBox="93.40 61.84 93.45 61.89"
Z="120"
Visible="no"
IgnoreWarnings="yes"
InterpretChemically="no"
CaptionJustification="Center"
Justification="Center"
LineHeight="auto"
><s font="58" size="10" color="0"></s></t><t
id="76"
p="315.70 62.10"
BoundingBox="310.82 54.94 320.61 64.09"
Z="121"
Visible="no"
InterpretChemically="no"
CaptionJustification="Center"
Justification="Center"
LineHeight="auto"
><s font="58" size="10" color="0">[II]</s></t><graphic
id="80"
SupersededBy="83"
BoundingBox="453.90 119.54 423.70 119.54"
Z="62"
GraphicType="Line"
ArrowType="FullHead"
HeadSize="1500"
/><scheme
id="84"
><step
id="85"
ReactionStepReactants="46 5"
ReactionStepArrows="80"
/></scheme><chemicalproperty
id="75"
ChemicalPropertyDisplayID="74"
ChemicalPropertyIsActive="yes"
ChemicalPropertyType="24"
BasisObjects="47 48 46 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70"
/><chemicalproperty
id="77"
ChemicalPropertyDisplayID="76"
ChemicalPropertyIsActive="yes"
ChemicalPropertyType="24"
BasisObjects="2 6 5 7 8 10 12 14 16 18 20 22 24 26 27 28 29 30 31 32 33 34 35 36 37"
/><arrow
id="83"
BoundingBox="423.70 116.62 453.90 121.79"
Z="62"
FillType="None"
ArrowheadHead="Full"
ArrowheadType="Solid"
HeadSize="1500"
ArrowheadCenterSize="1313"
ArrowheadWidth="375"
Head3D="453.90 119.54 0"
Tail3D="423.70 119.54 0"
Center3D="641.05 179.38 0"
MajorAxisEnd3D="671.25 179.38 0"
MinorAxisEnd3D="641.05 209.58 0"
/><annotation
Keyword="Fragment Label"
Content="[I]"
/></page></CDXML>

View File

@@ -0,0 +1,335 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE CDXML SYSTEM "http://www.cambridgesoft.com/xml/cdxml.dtd" >
<CDXML
CreationProgram="ChemDraw 21.0.0.28"
Name="reaction-with-grouped-templates.cdxml"
BoundingBox="94.85 148.42 469.63 209.58"
WindowPosition="0 0"
WindowSize="0 0"
FractionalWidths="yes"
InterpretChemically="yes"
ShowAtomQuery="yes"
ShowAtomStereo="no"
ShowAtomEnhancedStereo="yes"
ShowAtomNumber="no"
ShowResidueID="no"
ShowBondQuery="yes"
ShowBondRxn="yes"
ShowBondStereo="no"
ShowTerminalCarbonLabels="no"
ShowNonTerminalCarbonLabels="no"
HideImplicitHydrogens="no"
LabelFont="21"
LabelSize="10"
LabelFace="96"
CaptionFont="536"
CaptionSize="12"
HashSpacing="2.70"
MarginWidth="2"
LineWidth="1"
BoldWidth="4"
BondLength="30"
BondSpacing="12"
ChainAngle="120"
LabelJustification="Auto"
CaptionJustification="Left"
AminoAcidTermini="HOH"
ShowSequenceTermini="yes"
ShowSequenceBonds="yes"
ShowSequenceUnlinkedBranches="no"
ResidueWrapCount="40"
ResidueBlockCount="10"
ResidueZigZag="yes"
NumberResidueBlocks="no"
PrintMargins="36 36 36 36"
MacPrintInfo="0003000000480048000000000332023BFFF4FFF4033E02470367057B03DF0002000000480048000000000332023B000100000064000000010001010100000001270F000100010000000000000000000000000002001901900000000000400000000000000000000100000000000000000000000000000000"
ChemPropName=""
ChemPropFormula="Chemical Formula: "
ChemPropExactMass="Exact Mass: "
ChemPropMolWt="Molecular Weight: "
ChemPropMOverZ="m/z: "
ChemPropAnalysis="Elemental Analysis: "
ChemPropBoilingPt="Boiling Point: "
ChemPropMeltingPt="Melting Point: "
ChemPropCritTemp="Critical Temp: "
ChemPropCritPres="Critical Pres: "
ChemPropCritVol="Critical Vol: "
ChemPropGibbs="Gibbs Energy: "
ChemPropLogP="Log P: "
ChemPropMR="MR: "
ChemPropHenry="Henry&apos;s Law: "
ChemPropEForm="Heat of Form: "
ChemProptPSA="tPSA: "
ChemPropID=""
ChemPropFragmentLabel=""
color="0"
bgcolor="1"
RxnAutonumberStart="1"
RxnAutonumberConditions="no"
RxnAutonumberStyle="Roman"
RxnAutonumberFormat="(#)"
><colortable>
<color r="1" g="1" b="1"/>
<color r="0" g="0" b="0"/>
<color r="1" g="0" b="0"/>
<color r="1" g="1" b="0"/>
<color r="0" g="1" b="0"/>
<color r="0" g="1" b="1"/>
<color r="0" g="0" b="1"/>
<color r="1" g="0" b="1"/>
</colortable><fonttable>
<font id="21" charset="x-mac-roman" name="Helvetica"/>
<font id="58" charset="x-mac-roman" name="Arial"/>
<font id="536" charset="x-mac-roman" name="Times New Roman"/>
</fonttable><page
id="114"
BoundingBox="0 0 523 770"
HeaderPosition="36"
FooterPosition="36"
PrintTrimMarks="yes"
HeightPages="1"
WidthPages="1"
><group
id="103"
BoundingBox="251.52 148.42 360.14 209.58"
Z="86"
><fragment
id="49"
BoundingBox="251.52 148.42 304.48 209.58"
Z="63"
><n
id="46"
p="252.02 164"
Z="61"
AS="N"
/><n
id="48"
p="252.02 194"
Z="62"
AS="N"
/><n
id="50"
p="278 209"
Z="64"
AS="N"
/><n
id="52"
p="303.98 194"
Z="65"
AS="N"
/><n
id="54"
p="303.98 164"
Z="66"
AS="N"
/><n
id="56"
p="278 149"
Z="67"
AS="N"
/><b
id="58"
Z="68"
B="46"
E="48"
BS="N"
/><b
id="59"
Z="69"
B="48"
E="50"
BS="N"
/><b
id="60"
Z="70"
B="50"
E="52"
BS="N"
/><b
id="61"
Z="71"
B="52"
E="54"
BS="N"
/><b
id="62"
Z="72"
B="54"
E="56"
BS="N"
/><b
id="63"
Z="73"
B="56"
E="46"
BS="N"
/></fragment><fragment
id="67"
BoundingBox="312.85 153.14 360.14 202.86"
Z="76"
><n
id="64"
p="313.35 163"
Z="74"
AS="N"
/><n
id="66"
p="313.35 193"
Z="75"
AS="N"
/><n
id="68"
p="341.89 202.27"
Z="77"
AS="N"
/><n
id="70"
p="359.52 178"
Z="78"
AS="N"
/><n
id="72"
p="341.89 153.73"
Z="79"
AS="N"
/><b
id="74"
Z="80"
B="64"
E="66"
Order="2"
BS="N"
BondCircularOrdering="78 0 0 75"
/><b
id="75"
Z="81"
B="66"
E="68"
BS="N"
/><b
id="76"
Z="82"
B="68"
E="70"
Order="2"
BS="N"
BondCircularOrdering="75 0 0 77"
/><b
id="77"
Z="83"
B="70"
E="72"
BS="N"
/><b
id="78"
Z="84"
B="72"
E="64"
BS="N"
/></fragment></group><group
id="104"
BoundingBox="94.85 159.14 142.14 208.86"
Z="87"
><fragment
id="5"
BoundingBox="94.85 159.14 142.14 208.86"
Z="49"
><n
id="2"
p="95.35 169"
Z="47"
AS="N"
/><n
id="4"
p="95.35 199"
Z="48"
AS="N"
/><n
id="6"
p="123.89 208.27"
Z="50"
AS="N"
/><n
id="8"
p="141.52 184"
Z="51"
AS="N"
/><n
id="10"
p="123.89 159.73"
Z="52"
AS="N"
/><b
id="12"
Z="53"
B="2"
E="4"
Order="2"
BS="N"
BondCircularOrdering="16 0 0 13"
/><b
id="13"
Z="54"
B="4"
E="6"
BS="N"
/><b
id="14"
Z="55"
B="6"
E="8"
Order="2"
BS="N"
BondCircularOrdering="13 0 0 15"
/><b
id="15"
Z="56"
B="8"
E="10"
BS="N"
/><b
id="16"
Z="57"
B="10"
E="2"
BS="N"
/></fragment></group><t
id="45"
p="226.63 188.35"
BoundingBox="227.79 176.10 237.62 185.94"
Z="60"
Warning="Chemical Interpretation is not possible for this label"
LineHeight="auto"
><s font="58" size="20.8" color="0">+</s></t><graphic
id="112"
SupersededBy="115"
BoundingBox="469.63 182.04 406.71 182.04"
Z="85"
GraphicType="Line"
ArrowType="FullHead"
HeadSize="1500"
/><scheme
id="116"
><step
id="117"
ReactionStepReactants="104 103"
ReactionStepArrows="112"
/></scheme><arrow
id="115"
BoundingBox="406.71 177.29 469.63 185.79"
Z="85"
FillType="None"
ArrowheadHead="Full"
ArrowheadType="Solid"
HeadSize="1500"
ArrowheadCenterSize="1313"
ArrowheadWidth="375"
Head3D="469.63 182.04 0"
Tail3D="406.71 182.04 0"
Center3D="859.52 306.71 0"
MajorAxisEnd3D="922.44 306.71 0"
MinorAxisEnd3D="859.52 369.63 0"
/><annotation
Keyword="Fragment Label"
Content="[I]"
/></page></CDXML>