// // Copyright (C) 2022-2023 Tad Hurst, Greg Landrum and other RDKit contributors // // @@ All Rights Reserved @@ // This file is part of the RDKit. // The contents are covered by the terms of the BSD license // which is included in the file license.txt, found at the root // of the RDKit source tree. // #include #include "MarvinDefs.h" #include #include #include namespace RDKit { std::string getDoubleAsText(double val, unsigned int precision = 6) { // this is left here for backwards compatibility if (precision == 6 && fabs(val) < 0.00001) { return "0.00000"; } std::ostringstream valstr; valstr << std::setprecision(precision) << val; return valstr.str(); } std::string MarvinArrow::toString() const { std::ostringstream out; out << ""; return out.str(); } ptree MarvinMolBase::toPtree() const { ptree out; out.put(".molID", molID); if (parent != nullptr) { out.put(".id", id); out.put(".role", role()); } if (this->hasAtomBondBlocks()) { for (auto atom : atoms) { out.add_child("atomArray.atom", atom->toPtree(this->coordinatePrecision)); } for (auto bond : bonds) { out.add_child("bondArray.bond", bond->toPtree()); } } return out; } void MarvinMolBase::addSgroupsToPtree(ptree &out) const { for (auto &sgroup : sgroups) { out.add_child("molecule", sgroup->toPtree()); } } template bool getCleanNumber(std::string strToParse, T &outVal) { if (boost::algorithm::trim_copy(strToParse) != strToParse) { // should be no white space return false; } try { outVal = boost::lexical_cast(strToParse); } catch (const std::exception &e) { return false; } return true; } void MarvinMolBase::parseAtomsAndBonds(ptree &molTree) { boost::property_tree::ptree atomArray = molTree.get_child("atomArray"); // there are two types of atom arrays: // // AND // // // // See which one we have std::string atomID = atomArray.get(".atomID", "UseLongForm"); if (atomID == "UseLongForm") { // long form - each atom on a line for (auto &v : molTree.get_child("atomArray")) { if (v.first != "atom") { continue; } auto *mrvAtom = new MarvinAtom(); this->pushOwnedAtom(mrvAtom); this->atoms.push_back(mrvAtom); mrvAtom->id = v.second.get(".id", ""); mrvAtom->elementType = v.second.get(".elementType", ""); if (mrvAtom->id == "" || mrvAtom->elementType == "") { throw FileParseException( "Expected id, elementType for an atom definition in MRV file"); } std::string x2 = v.second.get(".x2", ""); std::string y2 = v.second.get(".y2", ""); // x2 and y2 are doubles if (x2 != "" && y2 != "" && (!getCleanNumber(x2, mrvAtom->x2) || !getCleanNumber(y2, mrvAtom->y2))) { throw FileParseException( "The values x2 and y2 must be large floats in MRV file"); } std::string x3 = v.second.get(".x3", ""); std::string y3 = v.second.get(".y3", ""); std::string z3 = v.second.get(".z3", ""); // x3, y3, and z3 are doubles if (x3 != "" && y3 != "" && z3 != "" && (!getCleanNumber(x3, mrvAtom->x3) || !getCleanNumber(y3, mrvAtom->y3) || !getCleanNumber(z3, mrvAtom->z3))) { throw FileParseException( "The values x3, y3, and z2 must be large floats in MRV file"); } std::string formalCharge = v.second.get(".formalCharge", ""); if (formalCharge != "") { if (!getCleanNumber(formalCharge, mrvAtom->formalCharge)) { throw FileParseException( "The value for formalCharge must be an integer in MRV file"); } } else { mrvAtom->formalCharge = 0; } mrvAtom->radical = v.second.get(".radical", ""); if (mrvAtom->radical != "") { if (std::find(marvinRadicalVals.begin(), marvinRadicalVals.end(), mrvAtom->radical) == marvinRadicalVals.end()) { std::ostringstream err; err << "The value for radical must be one of " << boost::algorithm::join(marvinRadicalVals, ", ") << " in MRV file"; throw FileParseException(err.str()); } } else { mrvAtom->radical = ""; } std::string isotopeStr = v.second.get(".isotope", ""); if (isotopeStr != "") { if (!getCleanNumber(isotopeStr, mrvAtom->isotope) || mrvAtom->isotope <= 0) { throw FileParseException( "The value for isotope must be a positive number in MRV file"); } } else { mrvAtom->isotope = 0; } std::string valenceStr = v.second.get(".mrvValence", ""); if (valenceStr != "") { if (!getCleanNumber(valenceStr, mrvAtom->mrvValence) || mrvAtom->mrvValence < 0) { throw FileParseException( "The value for mrvValence must be a positive number in MRV file"); } } else { mrvAtom->mrvValence = -1; } std::string hCountStr = v.second.get(".hydrogenCount", ""); if (hCountStr != "") { if (!getCleanNumber(hCountStr, mrvAtom->hydrogenCount) || mrvAtom->hydrogenCount < 0) { throw FileParseException( "The value for hydrogenCount must be a non-negative number in MRV file"); } } else { mrvAtom->hydrogenCount = -1; } mrvAtom->mrvAlias = v.second.get(".mrvAlias", ""); mrvAtom->rgroupRef = v.second.get(".rgroupRef", -1); mrvAtom->mrvStereoGroup = v.second.get(".mrvStereoGroup", ""); if (mrvAtom->mrvStereoGroup == "0") { mrvAtom->mrvStereoGroup = ""; } std::string mrvMap = v.second.get(".mrvMap", ""); if (mrvMap != "") { if (!getCleanNumber(mrvMap, mrvAtom->mrvMap) || mrvAtom->mrvMap <= 0) { throw FileParseException( "The value for mrvMap must be an non-=negative integer in MRV file"); } } else { mrvAtom->mrvMap = 0; } mrvAtom->sgroupRef = v.second.get(".sgroupRef", ""); mrvAtom->sgroupAttachmentPoint = v.second.get(".sgroupAttachmentPoint", ""); } } else // single line form of atoms { // std::vector atomIds; size_t atomCount; if (atomID == "") { atomCount = 0; } else { boost::algorithm::split(atomIds, atomID, boost::algorithm::is_space()); atomCount = atomIds.size(); } std::vector elementTypes; std::string elementType = atomArray.get(".elementType", ""); boost::algorithm::split(elementTypes, elementType, boost::algorithm::is_space()); std::vector x2s; std::string x2 = atomArray.get(".x2", ""); boost::algorithm::split(x2s, x2, boost::algorithm::is_space()); std::vector y2s; std::string y2 = atomArray.get(".y2", ""); boost::algorithm::split(y2s, y2, boost::algorithm::is_space()); std::vector formalCharges; std::string formalCharge = atomArray.get(".formalCharge", ""); boost::algorithm::split(formalCharges, formalCharge, boost::algorithm::is_space()); std::vector isotopes; std::string isotope = atomArray.get(".isotope", ""); boost::algorithm::split(isotopes, isotope, boost::algorithm::is_space()); std::vector radicals; std::string radical = atomArray.get(".radical", ""); boost::algorithm::split(radicals, radical, boost::algorithm::is_space()); std::vector hydrogenCounts; std::string hydrogenCount = atomArray.get(".hydrogenCount", ""); boost::algorithm::split(hydrogenCounts, hydrogenCount, boost::algorithm::is_space()); std::vector mrvValences; std::string mrvValence = atomArray.get(".mrvValence", ""); boost::algorithm::split(mrvValences, mrvValence, boost::algorithm::is_space()); std::vector mrvAliases; std::string mrvAlias = atomArray.get(".mrvAlias", ""); boost::algorithm::split(mrvAliases, mrvAlias, boost::algorithm::is_space()); std::vector rgroupRefs; std::string rgroupRef = atomArray.get(".rgroupRef", ""); boost::algorithm::split(rgroupRefs, rgroupRef, boost::algorithm::is_space()); std::vector mrvStereoGroups; std::string mrvStereoGroup = atomArray.get(".mrvStereoGroup", ""); boost::algorithm::split(mrvStereoGroups, mrvStereoGroup, boost::algorithm::is_space()); std::vector mrvMaps; std::string mrvMap = atomArray.get(".mrvMap", ""); boost::algorithm::split(mrvMaps, mrvMap, boost::algorithm::is_space()); std::vector sgroupRefs; std::string sgroupRef = atomArray.get(".sgroupRef", ""); boost::algorithm::split(sgroupRefs, sgroupRef, boost::algorithm::is_space()); std::vector sgroupAttachmentPoints; std::string sgroupAttachmentPoint = atomArray.get(".sgroupAttachmentPoint", ""); boost::algorithm::split(sgroupAttachmentPoints, sgroupAttachmentPoint, boost::algorithm::is_space()); if (atomID != "") { if (elementType == "") { throw FileParseException( "Expected an elementType array for an atomArray definition in MRV file"); } if (elementTypes.size() < atomCount) { throw FileParseException( "There must be an element type for each atom id"); } } for (size_t i = 0; i < atomCount; ++i) { auto *mrvAtom = new MarvinAtom(); this->pushOwnedAtom(mrvAtom); this->atoms.push_back(mrvAtom); mrvAtom->id = atomIds[i]; mrvAtom->elementType = elementTypes[i]; if (x2 != "" && y2 != "" && x2s.size() > i && y2s.size() > i) { if (!getCleanNumber(x2s[i], mrvAtom->x2) || !getCleanNumber(y2s[i], mrvAtom->y2)) { throw FileParseException( "The values x2 and y2 must be large floats in MRV file"); } } if (formalCharge != "" && formalCharges.size() > i) { if (!getCleanNumber(formalCharges[i], mrvAtom->formalCharge)) { throw FileParseException( "The value for formalCharge must be an integer in MRV file"); } } else { mrvAtom->formalCharge = 0; } if (isotope != "" && isotopes.size() > i) { if (!getCleanNumber(isotopes[i], mrvAtom->isotope)) { throw FileParseException( "The value for formalCharge must be an integer in MRV file"); } } else { mrvAtom->isotope = 0; } if (mrvValence != "" && mrvValences.size() > i) { if (mrvValences[i] == "-") { mrvAtom->mrvValence = -1; } else if (!getCleanNumber(mrvValences[i], mrvAtom->mrvValence)) { throw FileParseException( "The value for mrvValences must be an integer in MRV file"); } } else { mrvAtom->mrvValence = -1; } if (hydrogenCount != "" && hydrogenCounts.size() > i) { if (hydrogenCounts[i] != "-" && !getCleanNumber(hydrogenCounts[i], mrvAtom->hydrogenCount)) { throw FileParseException( "The value for hydrogenCount must be an integer in MRV file"); } } else { mrvAtom->hydrogenCount = -1; } if (radical != "" && radicals.size() > i && radicals[i] != "0") { mrvAtom->radical = radicals[i]; if (std::find(marvinRadicalVals.begin(), marvinRadicalVals.end(), mrvAtom->radical) == marvinRadicalVals.end()) { std::ostringstream err; err << "The value for radical must be one of " << boost::algorithm::join(marvinRadicalVals, ", ") << " in MRV file"; throw FileParseException(err.str()); } } else { mrvAtom->radical = ""; } if (mrvAlias != "" && mrvAliases.size() > i) { mrvAtom->mrvAlias = mrvAliases[i]; } else { mrvAtom->mrvAlias = ""; } if (rgroupRef != "" && rgroupRefs.size() > i) { if (!getCleanNumber(rgroupRefs[i], mrvAtom->rgroupRef)) { throw FileParseException( "rgroupRef value must be an integer in MRV file"); } } else { mrvAtom->rgroupRef = -1; } if (mrvStereoGroup != "" && mrvStereoGroups.size() > i && mrvStereoGroups[i] != "0") // "0" is NOT a stereo group { mrvAtom->mrvStereoGroup = mrvStereoGroups[i]; } else { mrvAtom->mrvStereoGroup = ""; } if (mrvMap != "" && mrvMaps.size() > i) { if (!getCleanNumber(mrvMaps[i], mrvAtom->mrvMap) || mrvAtom->mrvMap < 0) { throw FileParseException( "The value for mrvMap must be an non-negative integer in MRV file"); } } else { mrvAtom->mrvMap = 0; } if (sgroupRef != "" && sgroupRefs.size() > i && sgroupRefs[i] != "0") { mrvAtom->sgroupRef = sgroupRefs[i]; } else { mrvAtom->sgroupRef = ""; } if (sgroupAttachmentPoint != "" && sgroupAttachmentPoints.size() > i && sgroupAttachmentPoints[i] != "0") { mrvAtom->sgroupAttachmentPoint = sgroupAttachmentPoints[i]; } else { mrvAtom->sgroupAttachmentPoint = ""; } } } auto bondArray = molTree.get_child_optional("bondArray"); if (bondArray) { for (auto &v : molTree.get_child("bondArray")) { if (v.first != "bond") { continue; } auto *mrvBond = new MarvinBond(); this->pushOwnedBond(mrvBond); this->bonds.push_back(mrvBond); mrvBond->id = v.second.get(".id", ""); if (mrvBond->id == "") { throw FileParseException( "Expected id for an bond definition in MRV file"); } std::string atomRefs2 = v.second.get(".atomRefs2", ""); std::vector atomRefs2s; boost::algorithm::split(atomRefs2s, atomRefs2, boost::algorithm::is_space()); mrvBond->atomRefs2[0] = atomRefs2s[0]; mrvBond->atomRefs2[1] = atomRefs2s[1]; if (atomRefs2s.size() != 2 || std::find_if(atoms.begin(), atoms.end(), [mrvBond](auto a) { return a->id == mrvBond->atomRefs2[0]; }) == atoms.end() || std::find_if(atoms.begin(), atoms.end(), [mrvBond](auto a) { return a->id == mrvBond->atomRefs2[1]; }) == atoms.end()) { throw FileParseException( "atomRefs2 must contain two atom refs that must appear in the atoms array in MRV file"); } mrvBond->order = v.second.get(".order", ""); if (mrvBond->order != "") { if (std::find(marvinBondOrders.begin(), marvinBondOrders.end(), mrvBond->order) == marvinBondOrders.end()) { std::ostringstream err; err << "Expected one of " << boost::algorithm::join(marvinBondOrders, ", ") << " for order for an bond definition in MRV file"; throw FileParseException(err.str()); } } mrvBond->queryType = v.second.get(".queryType", ""); if (mrvBond->queryType != "") { if (std::find(marvinQueryBondsTypes.begin(), marvinQueryBondsTypes.end(), mrvBond->queryType) == marvinQueryBondsTypes.end()) { std::ostringstream err; err << "Expected one of " << boost::algorithm::join(marvinQueryBondsTypes, ", ") << " for queryType for an bond definition in MRV file"; throw FileParseException(err.str()); } } mrvBond->convention = v.second.get(".convention", ""); if (mrvBond->convention != "") { if (std::find(marvinConventionTypes.begin(), marvinConventionTypes.end(), mrvBond->convention) == marvinConventionTypes.end()) { std::ostringstream err; err << "Expected one of " << boost::algorithm::join(marvinConventionTypes, ", ") << " for convention for an bond definition in MRV file"; throw FileParseException(err.str()); } } int bondStereoDeclCount = 0; mrvBond->bondStereo.value = v.second.get("bondStereo", ""); if (mrvBond->bondStereo.value != "") { bondStereoDeclCount++; if (boost::algorithm::to_lower_copy(mrvBond->bondStereo.value) == "w" || boost::algorithm::to_lower_copy(mrvBond->bondStereo.value) == "h") { // do nothing - this is OK } else if (boost::algorithm::to_lower_copy(mrvBond->bondStereo.value) == "c" || boost::algorithm::to_lower_copy(mrvBond->bondStereo.value) == "t") { mrvBond->bondStereo.value = ""; // cis and trans are ignored } else { throw FileParseException( "The value for bondStereo must be \"H\", \"W\", \"C\" or \"T\" in MRV file (\"C\" and \"T\" are ignored)"); } } // see if bondstereo has a dictRef or convention auto bondStereoItem = v.second.get_child_optional("bondStereo"); if (bondStereoItem) { for (auto &ww : v.second) { mrvBond->bondStereo.convention = ww.second.get(".convention", ""); if (mrvBond->bondStereo.convention != "") { bondStereoDeclCount++; if (mrvBond->bondStereo.convention != "MDL") { throw FileParseException( "Expected MDL as value for the bond convention attribute"); } mrvBond->bondStereo.conventionValue = ww.second.get(".conventionValue", ""); if (std::find(marvinStereoConventionTypes.begin(), marvinStereoConventionTypes.end(), mrvBond->bondStereo.conventionValue) == marvinStereoConventionTypes.end()) { std::ostringstream err; err << "Expected one of " << boost::algorithm::join(marvinStereoConventionTypes, ", ") << " for a bond convention for an bond stereo def"; throw FileParseException(err.str()); } } mrvBond->bondStereo.dictRef = ww.second.get(".dictRef", ""); if (mrvBond->bondStereo.dictRef != "") { bondStereoDeclCount++; if (std::find(marvinStereoDictRefTypes.begin(), marvinStereoDictRefTypes.end(), mrvBond->bondStereo.dictRef) == marvinStereoDictRefTypes.end()) { std::ostringstream err; err << "Expected one of " << boost::algorithm::join(marvinStereoDictRefTypes, ", ") << " for a discRef value for an bond stereo def"; throw FileParseException(err.str()); } } } } // check that there were not too many different declarations if (bondStereoDeclCount > 1) { throw FileParseException( "bondStereo can either have only one of: a value, dictRef, or convention value"); } } } } ptree MarvinArrow::toPtree() const { ptree out; out.put(".type", type); out.put(".x1", getDoubleAsText(x1)); out.put(".y1", getDoubleAsText(y1)); out.put(".x2", getDoubleAsText(x2)); out.put(".y2", getDoubleAsText(y2)); return out; } std::string MarvinPlus::toString() const { std::ostringstream out; out << "" "" "" "" "" "" ""; return out.str(); } ptree MarvinPlus::toPtree() const { ptree out; out.put(".id", id); out.put(".toptions", "NOROT"); out.put(".fontScale", "14.0"); out.put(".halign", "CENTER"); out.put(".valign", "CENTER"); out.put(".autoSize", "true"); ptree field; field.put(".name", "text"); field.put_value("{D font=SansSerif,size=18,bold}+"); out.put_child("Field", field); ptree p1, p2, p3, p4; p1.put(".x", getDoubleAsText(x1)); p1.put(".y", getDoubleAsText(y1)); out.add_child("MPoint", p1); p2.put(".x", getDoubleAsText(x2)); p2.put(".y", getDoubleAsText(y1)); out.add_child("MPoint", p2); p3.put(".x", getDoubleAsText(x2)); p3.put(".y", getDoubleAsText(y2)); out.add_child("MPoint", p3); p4.put(".x", getDoubleAsText(x1)); p4.put(".y", getDoubleAsText(y2)); out.add_child("MPoint", p4); return out; } std::string MarvinCondition::toString() const { std::ostringstream out; out << " 0) { out << " fontScale=\"" << fontScale << "\""; } out << ">" "" << text << "" "" "" "" "" ""; return out.str(); } ptree MarvinCondition::toPtree() const { ptree out; out.put(".id", id); if (fontScale > 0) { out.put(".fontScale", fontScale); } out.put(".toption", "NOROT"); out.put(".halign", halign); out.put(".valign", valign); out.put(".autoSize", "true"); ptree field; field.put(".name", "text"); field.put_value("![CDATA[ {D font=SansSerif,size=18,bold}+]]"); out.put_child("Field", field); // add four points std::ostringstream xstr, ystr; xstr << x; ystr << y; for (int i = 0; i < 4; ++i) { ptree p1; p1.put(".x", xstr.str()); p1.put(".y", ystr.str()); out.add_child("MPoint", p1); } return out; } std::string MarvinAttachmentPoint::toString() const { std::ostringstream out; out << ""; return out.str(); } ptree MarvinAttachmentPoint::toPtree() const { ptree out; out.put(".atom", atom); out.put(".order", order); out.put(".bond", bond); return out; } MarvinAtom::MarvinAtom() : x2(DBL_MAX), y2(DBL_MAX), x3(DBL_MAX), y3(DBL_MAX), z3(DBL_MAX), formalCharge(0), mrvValence(-1), hydrogenCount(-1), mrvMap(0), sGroupRefIsSuperatom(false), rgroupRef(-1) // indicates that it was not specified {} MarvinAtom::MarvinAtom(const MarvinAtom &atomToCopy, std::string newId) : id(newId), elementType(atomToCopy.elementType), x2(atomToCopy.x2), y2(atomToCopy.y2), x3(atomToCopy.x3), y3(atomToCopy.y3), z3(atomToCopy.z3), formalCharge(atomToCopy.formalCharge), radical(atomToCopy.radical), isotope(atomToCopy.isotope), mrvValence(atomToCopy.mrvValence), hydrogenCount(atomToCopy.hydrogenCount), mrvAlias(atomToCopy.mrvAlias), mrvStereoGroup(atomToCopy.mrvStereoGroup), mrvMap(atomToCopy.mrvMap), sgroupRef(atomToCopy.sgroupRef), sGroupRefIsSuperatom(atomToCopy.sGroupRefIsSuperatom), sgroupAttachmentPoint(atomToCopy.sgroupAttachmentPoint), rgroupRef(atomToCopy.rgroupRef) // indicates that it was not specified {} bool MarvinAtom::operator==(const MarvinAtom &rhs) const { return this->id == rhs.id; } bool MarvinAtom::operator==(const MarvinAtom *rhs) const { return this->id == rhs->id; } bool MarvinAtom::isElement() const { return this->elementType != "R" && this->elementType != "*"; } std::string MarvinAtom::toString() const { // std::ostringstream out; out << "= 0) { out << " mrvValence=\"" << mrvValence << "\""; } if (hydrogenCount > 0) { out << " hydrogenCount=\"" << hydrogenCount << "\""; } if (mrvStereoGroup != "") { out << " mrvStereoGroup=\"" << mrvStereoGroup << "\""; } if (mrvMap != 0) { out << " mrvMap=\"" << mrvMap << "\""; } if (sgroupRef != "") { out << " sgroupRef=\"" << sgroupRef << "\""; } if (rgroupRef >= 0) { out << " rgroupRef=\"" << rgroupRef << "\""; } if (sgroupAttachmentPoint != "") { out << " sgroupAttachmentPoint=\"" << sgroupAttachmentPoint << "\""; } out << "/>"; return out.str(); } ptree MarvinAtom::toPtree(unsigned int coordinatePrecision) const { ptree out; out.put(".id", id); out.put(".elementType", elementType); if (x2 != DBL_MAX && y2 != DBL_MAX) { out.put(".x2", getDoubleAsText(x2, coordinatePrecision)); out.put(".y2", getDoubleAsText(y2, coordinatePrecision)); } if (x3 != DBL_MAX && y3 != DBL_MAX && z3 != DBL_MAX) { out.put(".x3", getDoubleAsText(x3, coordinatePrecision)); out.put(".y3", getDoubleAsText(y3, coordinatePrecision)); out.put(".z3", getDoubleAsText(z3, coordinatePrecision)); } if (formalCharge != 0) { out.put(".formalCharge", formalCharge); } if (radical != "") { out.put(".radical", radical); } if (isElement() && isotope != 0) { out.put(".isotope", isotope); } if (mrvAlias != "") { out.put(".mrvAlias", mrvAlias); } if (mrvValence >= 0) { out.put(".mrvValence", mrvValence); } if (hydrogenCount > 0) { out.put(".hydrogenCount", hydrogenCount); } if (mrvStereoGroup != "") { out.put(".mrvStereoGroup", mrvStereoGroup); } if (mrvMap != 0) { out.put(".mrvMap", mrvMap); } if (sgroupRef != "") { out.put(".sgroupRef", sgroupRef); } if (rgroupRef >= 0) { out.put(".rgroupRef", rgroupRef); } if (sgroupAttachmentPoint != "") { out.put(".sgroupAttachmentPoint", sgroupAttachmentPoint); } return out; } MarvinBond::MarvinBond(const MarvinBond &bondToCopy, std::string newId, std::string atomRef1, std::string atomRef2) : id(newId), order(bondToCopy.order), bondStereo(bondToCopy.bondStereo), queryType(bondToCopy.queryType), convention(bondToCopy.convention) { atomRefs2[0] = atomRef1; atomRefs2[1] = atomRef2; } const std::string MarvinBond::getBondType() const { std::string tempQueryType = boost::algorithm::to_upper_copy(queryType); std::string tempOrder = boost::algorithm::to_upper_copy(order); std::string tempConvention = boost::algorithm::to_upper_copy(convention); if (tempQueryType != "") { if (tempQueryType == "SD" || tempQueryType == "SA" || tempQueryType == "DA" || tempQueryType == "ANY") { return tempQueryType; } else { std::ostringstream err; err << "unrecognized query bond type " << queryType << " in MRV File "; throw FileParseException(err.str()); } } else if (tempConvention != "") // if no query type, check for convention { if (tempConvention == "CXN:COORD") { return "DATIVE"; } else if (tempConvention == "CXN:HYDROGEN") { return "HYDROGEN"; } else { std::ostringstream err; err << "unrecognized convention " << convention << " in MRV File "; throw FileParseException(err.str()); } } else if (tempOrder != "") // if no query type not convention, so check for order { if (tempOrder == "1" || tempOrder == "2" || tempOrder == "3" || tempOrder == "A") { return tempOrder; } else { std::ostringstream err; err << "unrecognized bond type " << order << " in MRV File "; throw FileParseException(err.str()); } } else { throw FileParseException( "bond must have one of: order, queryType, or convention in MRV File "); } } bool MarvinBond::isEqual(const MarvinAtom &other) const { return this->id == other.id; } bool MarvinBond::operator==(const MarvinAtom &rhs) const { return this->isEqual(rhs); } std::string MarvinBond::toString() const { // std::ostringstream out; out << "" << bondStereo.value << ""; } else if (bondStereo.dictRef != "") { out << " dictRef=\"" << bondStereo.dictRef << "\"/>"; } else { out << " convention=\"" << bondStereo.convention << "\" conventionValue=\"" << bondStereo.conventionValue << "\"/>"; } out << ""; } else { out << "/>"; // just end the bond } return out.str(); } ptree MarvinBond::toPtree() const { ptree out; out.put(".id", id); out.put(".atomRefs2", atomRefs2[0] + " " + atomRefs2[1]); if (order != "") { out.put(".order", order); } if (queryType != "") { out.put(".queryType", queryType); } if (convention != "") { out.put(".convention", convention); } if (bondStereo.value != "" || bondStereo.dictRef != "" || (bondStereo.convention != "" && bondStereo.conventionValue != "")) { ptree bondStereoPt; if (bondStereo.value != "") { bondStereoPt.put_value(bondStereo.value); } else if (bondStereo.dictRef != "") { bondStereoPt.put(".dictRef", bondStereo.dictRef); } else { bondStereoPt.put(".convention", bondStereo.convention); bondStereoPt.put(".conventionValue", bondStereo.conventionValue); } out.put_child("bondStereo", bondStereoPt); } return out; } MarvinMolBase::~MarvinMolBase() {} int MarvinMolBase::getAtomIndex(std::string id) const { auto atomIter = find_if(atoms.begin(), atoms.end(), [id](const MarvinAtom *arg) { return arg->id == id; }); if (atomIter != atoms.end()) { return atomIter - atoms.begin(); } else { return -1; } } void MarvinMolBase::pushOwnedAtom(MarvinAtom *atom) { PRECONDITION(atom, "bad atom"); PRECONDITION(this->parent, "only sgroups should call the base class version"); this->parent->pushOwnedAtom(atom); } void MarvinMolBase::pushOwnedBond(MarvinBond *bond) { PRECONDITION(bond, "bad bond"); PRECONDITION(this->parent, "only sgroups should call the base class version"); this->parent->pushOwnedBond(std::move(bond)); } void MarvinMolBase::removeOwnedAtom(MarvinAtom *atom) { PRECONDITION(this->parent, "only sgroups should call the base class version"); this->parent->removeOwnedAtom(atom); } void MarvinMolBase::removeOwnedBond(MarvinBond *bond) { PRECONDITION(this->parent, "only sgroups should call the base class version"); this->parent->removeOwnedBond(bond); } int MarvinMolBase::getBondIndex(std::string id) const { auto bondIter = find_if(bonds.begin(), bonds.end(), [id](const MarvinBond *arg) { return arg->id == id; }); if (bondIter != bonds.end()) { return bondIter - bonds.begin(); } else { return -1; } } MarvinAtom *MarvinMolBase::findAtomByRef(std::string atomId) { auto atomIter = find_if(this->atoms.begin(), this->atoms.end(), [atomId](const MarvinAtom *arg) { return arg->id == atomId; }); if (atomIter != this->atoms.end()) { return *atomIter; } // try the parent if there is one if (this->parent != nullptr) { return this->parent->findAtomByRef(atomId); } return nullptr; } MarvinBond *MarvinMolBase::findBondByRef(std::string bondId) { auto bondIter = find_if(this->bonds.begin(), this->bonds.end(), [bondId](const MarvinBond *arg) { return arg->id == bondId; }); if (bondIter != this->bonds.end()) { return *bondIter; } // try the parent if there is one if (this->parent != nullptr) { return this->parent->findBondByRef(bondId); } return nullptr; } const std::vector MarvinMolBase::getBondList() const { std::vector bondList; for (auto bond : bonds) { bondList.push_back(bond->id); } return bondList; } const std::vector MarvinMolBase::getAtomList() const { std::vector atomList; for (auto atom : atoms) { atomList.push_back(atom->id); } return atomList; } bool MarvinMolBase::has2dCoords() const { for (auto atom : atoms) { if (atom->x2 == DBL_MAX || atom->y2 == DBL_MAX) { return false; } } return true; } bool MarvinMolBase::has3dCoords() const { for (auto atom : atoms) { if (atom->x3 == DBL_MAX || atom->y3 == DBL_MAX || atom->z3 == DBL_MAX) { return false; } } return true; } bool MarvinMolBase::hasAny2dCoords() const { for (auto atom : atoms) { if (atom->x2 != DBL_MAX && atom->y2 != DBL_MAX) { return true; } } return false; } bool MarvinMolBase::hasAny3dCoords() const { for (auto atom : atoms) { if (atom->x3 != DBL_MAX && atom->y3 != DBL_MAX && atom->z3 != DBL_MAX) { return true; } } return false; } bool MarvinMolBase::hasCoords() const { return has2dCoords() || has3dCoords(); } void MarvinMolBase::removeCoords() { for (auto atom : atoms) { atom->x2 = DBL_MAX; atom->y2 = DBL_MAX; } } void MarvinMolBase::setPrecision(unsigned int precision) { this->coordinatePrecision = precision; } int MarvinMolBase::getExplicitValence(const MarvinAtom &marvinAtom) const { unsigned int resTimes10 = 0; // calculated as 10 * the actual value so we // can use int match, and have 1.5 order bonds for (auto bondPtr : bonds) { if (bondPtr->atomRefs2[0] != marvinAtom.id && bondPtr->atomRefs2[1] != marvinAtom.id) { continue; // this bond is NOT to the atom } std::string marvinBondType = bondPtr->getBondType(); if (marvinBondType == "SD" || marvinBondType == "SA" || marvinBondType == "DA") { resTimes10 += 15; // really 1.5 order bond } else if (marvinBondType == "ANY") { resTimes10 += 10; // no good answer for Any bonds - treat as a single bond } else if (marvinBondType == "DATIVE") { // // the following code has been removed because we really need both // ends of the dative bond to be counted as zero // // if (bondPtr->atomRefs2[1] == marvinAtom.id) //second atom of dative // bond count as 1 (first atom is zero) // resTimes10 += 10; // really 1 order bond } else if (marvinBondType == "HYDROGEN") { // hydrogen bonds do not add to valence} } else if (marvinBondType == "1") { resTimes10 += 10; // really 1 order bond } else if (marvinBondType == "2") { resTimes10 += 20; // really 2 order bond } else if (marvinBondType == "3") { resTimes10 += 30; // really 3 order bond } else if (marvinBondType == "A") { resTimes10 += 15; // really 1.5 order bond } } return resTimes10 / 10; } MarvinSruCoModSgroup::MarvinSruCoModSgroup(std::string roleNameInit, MarvinMolBase *parentInit) { PRECONDITION(parentInit != nullptr, "parentInit cannot be null"); parent = parentInit; if (roleNameInit != "SruSgroup" && roleNameInit != "CopolymerSgroup" && roleNameInit != "ModificationSgroup") { throw FileParseException( "A MarvinSruCoModSgroup type must be one of \"SruSgroup\". \"CopolymerSgroup\", and \"ModificationSgroup\""); } this->roleName = roleNameInit; } MarvinSruCoModSgroup::MarvinSruCoModSgroup(MarvinMolBase *parentInit, std::string roleNameInit, ptree &molTree) { PRECONDITION(parentInit != nullptr, "parentInit cannot be null"); // // // parent = parentInit; this->id = molTree.get(".id", ""); if (this->id == "") { throw FileParseException("Expected a iD in MRV file"); } this->molID = molTree.get(".molID", ""); if (this->molID == "") { throw FileParseException("Expected a molID in MRV file"); } this->roleName = roleNameInit; std::string atomRefsStr = molTree.get(".atomRefs", ""); if (atomRefsStr == "") { throw FileParseException( "Expected atomRefs for a SruSgroup definition in MRV file"); } std::vector atomList; boost::algorithm::split(atomList, atomRefsStr, boost::algorithm::is_space()); for (auto &it : atomList) { auto atomPtr = this->parent->findAtomByRef(it); if (atomPtr == nullptr) { throw FileParseException( "AtomRef specification for an SRU, Copolymer, or Modification group definition was not found in any parent"); } this->atoms.push_back(atomPtr); } this->title = molTree.get(".title", ""); if (this->title == "") { throw FileParseException( "Expected title for a SruSgroup definition in MRV file"); } // the title for an SRU group must be a lower case letter, or a range // of two positive ints (4-6) this->connect = molTree.get(".connect", ""); if (this->connect == "") { this->connect = "ht"; } if (std::find(sruSgroupConnectChoices.begin(), sruSgroupConnectChoices.end(), this->connect) == sruSgroupConnectChoices.end()) { std::ostringstream err; err << "Expected a connect string of \"" << boost::algorithm::join(sruSgroupConnectChoices, ", ") << "\" for a SruSgroup definition in MRV file"; throw FileParseException(err.str()); } this->correspondence = molTree.get(".correspondence", ""); std::string bondListStr = molTree.get(".bondList", ""); if (bondListStr != "") { std::vector bondList; boost::algorithm::split(bondList, bondListStr, boost::algorithm::is_space()); for (auto &it : bondList) { auto bondPtr = this->parent->findBondByRef(it); if (bondPtr == nullptr) { throw FileParseException( "BondList specification for an SRU, Copolymer, or Modification group definition was not found in any parent"); } this->bonds.push_back(bondPtr); } } } std::string MarvinSruCoModSgroup::toString() const { std::ostringstream out; out << ""; return out.str(); } ptree MarvinSruCoModSgroup::toPtree() const { ptree out = MarvinMolBase::toPtree(); out.put(".atomRefs", boost::algorithm::join(getAtomList(), " ")); out.put(".title", title); out.put(".connect", connect); out.put(".correspondence", correspondence); out.put(".bondList", boost::algorithm::join(getBondList(), " ")); return out; } MarvinMolBase *getActualParent(const MarvinMolBase *child) { PRECONDITION(child, "child cannot be null"); for (auto actualParent = child->parent;; actualParent = actualParent->parent) { TEST_ASSERT(actualParent); if (actualParent->role() != "MultipleSgroup") { return actualParent; ; } } TEST_ASSERT(false); // should not be reachable } MarvinMolBase *MarvinSruCoModSgroup::copyMol( const std::string &idAppendage) const { auto outSgroup = new MarvinSruCoModSgroup(this->roleName, this->parent); this->parent->sgroups.emplace_back(outSgroup); outSgroup->molID = this->molID + idAppendage; outSgroup->title = this->title; outSgroup->connect = this->connect; outSgroup->correspondence = this->correspondence; auto actualParent = getActualParent(this); // the only time this is to be called is when a multiple group above it is // being expanded and the new atoms and bonds have already been made // here we just need to find them and add refs to the new multiple sgroup for (auto atomToCopy : atoms) { auto atomIndex = actualParent->getAtomIndex(atomToCopy->id + idAppendage); if (atomIndex < 0) { throw FileParseException( "unexpected error - new atom not found in parent"); } outSgroup->atoms.push_back(actualParent->atoms[atomIndex]); } for (auto bondToCopy : bonds) { auto bondIndex = actualParent->getBondIndex(bondToCopy->id + idAppendage); if (bondIndex < 0) { throw FileParseException( "unexpected error - new bond not found in parent"); } outSgroup->bonds.push_back(actualParent->bonds[bondIndex]); } return outSgroup; } std::string MarvinSruCoModSgroup::role() const { return roleName; } bool MarvinSruCoModSgroup::hasAtomBondBlocks() const { return false; } MarvinDataSgroup::MarvinDataSgroup(MarvinMolBase *parentInit) { parent = parentInit; } MarvinDataSgroup::MarvinDataSgroup(MarvinMolBase *parentInit, ptree &molTree) { PRECONDITION(parentInit != nullptr, "parentInit cannot be null"); parent = parentInit; this->id = molTree.get(".id", ""); if (this->id == "") { throw FileParseException("Expected a iD in MRV file"); } this->molID = molTree.get(".molID", ""); if (this->molID == "") { throw FileParseException("Expected a molID in MRV file"); } std::string atomRefsStr = molTree.get(".atomRefs", ""); if (atomRefsStr == "") { throw FileParseException( "Expected atomRefs for a DataSgroup definition in MRV file"); } std::vector atomList; boost::algorithm::split(atomList, atomRefsStr, boost::algorithm::is_space()); for (auto &it : atomList) { auto atomPtr = this->parent->findAtomByRef(it); if (atomPtr == nullptr) { throw FileParseException( "AtomRef specification for a DataSgroup definition was not found in any parent"); } this->atoms.push_back(atomPtr); } this->context = molTree.get(".context", ""); this->fieldName = molTree.get(".fieldName", ""); this->placement = molTree.get(".placement", ""); this->unitsDisplayed = molTree.get(".unitsDisplayed", "Unit not displayed"); if (this->unitsDisplayed == "") { this->unitsDisplayed = "Unit not displayed"; } std::string unitsDisplayed = boost::algorithm::to_lower_copy(this->unitsDisplayed); if (unitsDisplayed != "unit displayed" && unitsDisplayed != "unit not displayed") { throw FileParseException( "Expected unitsDisplayed to be either \"Unit displayed\" or \"Unit not displayed\" for a DataSgroup definition in MRV file"); } this->queryType = molTree.get(".queryType", ""); this->queryOp = molTree.get(".queryOp", ""); this->units = molTree.get(".units", ""); this->fieldData = molTree.get(".fieldData", ""); std::string x = molTree.get(".x", "0.0"); std::string y = molTree.get(".y", "0.0"); if (x == "") { throw FileParseException( "Expected x for a DataSgroup definition in MRV file"); } if (!getCleanNumber(x, this->x)) { throw FileParseException( "The value for x must be a floating point value in MRV file"); } if (y == "") { throw FileParseException( "Expected y for a DataSgroup definition in MRV file"); } if (!getCleanNumber(y, this->y)) { throw FileParseException( "The value for y must be a floating point value in MRV file"); } } MarvinMolBase *MarvinDataSgroup::copyMol(const std::string &idAppendage) const { auto outSgroup = new MarvinDataSgroup(this->parent); this->parent->sgroups.emplace_back(outSgroup); outSgroup->molID = this->molID + idAppendage; outSgroup->id = this->id + idAppendage; outSgroup->context = this->context; outSgroup->fieldName = this->fieldName; outSgroup->placement = this->placement; outSgroup->unitsDisplayed = this->unitsDisplayed; outSgroup->units = this->units; outSgroup->fieldData = this->fieldData; outSgroup->queryType = this->queryType; outSgroup->queryOp = this->queryOp; outSgroup->x = this->x; outSgroup->y = this->y; // the only time this is to be called is when a multiple group above it is // being expanded and the new atoms and bonds have already been made // here we just need to find them and add refs to the new multiple sgroup for (auto atomToCopy : atoms) { auto atomIndex = this->parent->getAtomIndex(atomToCopy->id + idAppendage); if (atomIndex < 0) { throw FileParseException( "unexpected error - new atom not found in parent"); } outSgroup->atoms.push_back(this->parent->atoms[atomIndex]); } return outSgroup; } std::string MarvinDataSgroup::toString() const { std::ostringstream out; out << ""; return out.str(); } ptree MarvinDataSgroup::toPtree() const { ptree out = MarvinMolBase::toPtree(); out.put(".atomRefs", boost::algorithm::join(getAtomList(), " ")); out.put(".context", context); out.put(".fieldName", fieldName); out.put(".placement", placement); out.put(".unitsDisplayed", unitsDisplayed); out.put(".fieldData", fieldData); if (units != "") { out.put(".units", units); } if (queryType != "" && queryOp != "") { out.put(".queryType", queryType); out.put(".queryOp", queryOp); } std::ostringstream xstr, ystr; xstr << x; ystr << y; out.put(".x", xstr.str()); out.put(".y", ystr.str()); return out; } std::string MarvinDataSgroup::role() const { return "DataSgroup"; } bool MarvinDataSgroup::hasAtomBondBlocks() const { return false; } MarvinMultipleSgroup::MarvinMultipleSgroup(MarvinMolBase *parentInit) { parent = parentInit; } MarvinMultipleSgroup::MarvinMultipleSgroup(MarvinMolBase *parentInit, ptree &molTree) { PRECONDITION(parentInit != nullptr, "parentInit cannot be null"); parent = parentInit; this->id = molTree.get(".id", ""); if (this->id == "") { throw FileParseException("Expected a iD in MRV file"); } this->molID = molTree.get(".molID", ""); if (this->molID == "") { throw FileParseException("Expected a molID in MRV file"); } // first make sure this can be made into a proper MultipleSgroup. // To be valid, the title must be a positive integer and there must be // exactly two bonds the connect to the repeating group std::string atomRefsStr = molTree.get(".atomRefs", ""); if (atomRefsStr == "") { throw FileParseException( "Expected atomRefs for a MultipleSgroup definition in MRV file"); } std::vector atomList; boost::algorithm::split(atomList, atomRefsStr, boost::algorithm::is_space()); for (auto &it : atomList) { auto atomPtr = this->parent->findAtomByRef(it); if (atomPtr == nullptr) { throw FileParseException( "AtomRef specification for a MultipleSgroup definition was not found in any parent"); } this->atoms.push_back(atomPtr); this->parentAtoms.push_back(atomPtr); } this->title = molTree.get(".title", ""); int testInt; if (this->title == "" || !getCleanNumber(this->title, testInt) || testInt <= 0) { throw FileParseException( "Expected a positive integer title for a MultipleSgroup definition in MRV file"); } } std::string MarvinMultipleSgroup::role() const { return "MultipleSgroup"; } bool MarvinMultipleSgroup::hasAtomBondBlocks() const { return false; } MarvinMolBase *MarvinMultipleSgroup::copyMol( const std::string &idAppendage) const { auto outSgroup = new MarvinMultipleSgroup(this->parent); this->parent->sgroups.emplace_back(outSgroup); outSgroup->molID = this->molID + idAppendage; outSgroup->id = this->id + idAppendage; outSgroup->title = this->title; // the only time this is to be called is when a multiple group above it is // being expanded and the new atoms and bonds have already been made // here we just need to find them and add refs to the new multiple sgroup for (auto atomToCopy : atoms) { auto atomIndex = this->parent->getAtomIndex(atomToCopy->id + idAppendage); if (atomIndex < 0) { throw FileParseException( "unexpected error - new atom not found in parent"); } outSgroup->atoms.push_back(this->parent->atoms[atomIndex]); } for (auto atomToCopy : parentAtoms) { auto atomIndex = this->parent->getAtomIndex(atomToCopy->id + idAppendage); if (atomIndex < 0) { throw FileParseException( "unexpected error - new atom not found in parent"); } outSgroup->parentAtoms.push_back(this->parent->atoms[atomIndex]); } for (auto bondToCopy : bonds) { auto bondIndex = this->parent->getBondIndex(bondToCopy->id + idAppendage); if (bondIndex < 0) { throw FileParseException( "unexpected error - new bond not found in parent"); } outSgroup->bonds.push_back(this->parent->bonds[bondIndex]); } outSgroup->isExpanded = isExpanded; return outSgroup; } std::string MarvinMultipleSgroup::toString() const { std::ostringstream out; out << ""; for (auto &sgroup : sgroups) { out << sgroup->toString(); } out << ""; return out.str(); } ptree MarvinMultipleSgroup::toPtree() const { ptree out = MarvinMolBase::toPtree(); out.put(".atomRefs", boost::algorithm::join(getAtomList(), " ")); out.put(".title", title); MarvinMolBase::addSgroupsToPtree(out); return out; } MarvinMulticenterSgroup::MarvinMulticenterSgroup(MarvinMolBase *parentInit) { PRECONDITION(parentInit != nullptr, "parentInit cannot be null"); parent = parentInit; } MarvinMulticenterSgroup::MarvinMulticenterSgroup(MarvinMolBase *parentInit, ptree &molTree) { PRECONDITION(parentInit != nullptr, "parentInit cannot be null"); parent = parentInit; this->id = molTree.get(".id", ""); if (this->id == "") { throw FileParseException("Expected a iD in MRV file"); } this->molID = molTree.get(".molID", ""); if (this->molID == "") { throw FileParseException("Expected a molID in MRV file"); } std::string atomRefsStr = molTree.get(".atomRefs", ""); if (atomRefsStr == "") { throw FileParseException( "Expected atomRefs for a MulticenterSgroup definition in MRV file"); } std::vector atomList; boost::algorithm::split(atomList, atomRefsStr, boost::algorithm::is_space()); for (auto &it : atomList) { auto atomPtr = this->parent->findAtomByRef(it); if (atomPtr == nullptr) { throw FileParseException( "AtomRef specification for a MulticenterSgroup definition was not found in any parent"); } this->atoms.push_back(atomPtr); } std::string centerId = molTree.get(".center", ""); auto atomPtr = this->parent->findAtomByRef(centerId); if (atomPtr == nullptr) { throw FileParseException( "Center specification for a MulticenterSgroup definition was not found in any parent"); } this->center = atomPtr; } std::string MarvinMulticenterSgroup::role() const { return "MulticenterSgroup"; } bool MarvinMulticenterSgroup::hasAtomBondBlocks() const { return false; } MarvinMolBase *MarvinMulticenterSgroup::copyMol( const std::string &idAppendage) const { auto outSgroup = new MarvinMulticenterSgroup(this->parent); this->parent->sgroups.emplace_back(outSgroup); outSgroup->molID = this->molID + idAppendage; outSgroup->id = this->id; // the only time this is to be called is when a multiple group above it is // being expanded and the new atoms and bonds have already been made // here we just need to find them and add refs to the new multiple sgroup for (auto atomToCopy : atoms) { auto atomIndex = this->parent->getAtomIndex(atomToCopy->id + idAppendage); if (atomIndex < 0) { throw FileParseException( "unexpected error - new atom not found in parent"); } outSgroup->atoms.push_back(this->parent->atoms[atomIndex]); } auto centerIndex = this->parent->getAtomIndex(this->center->id + idAppendage); if (centerIndex < 0) { throw FileParseException("unexpected error - new atom not found in parent"); } outSgroup->center = this->parent->atoms[centerIndex]; return outSgroup; } std::string MarvinMulticenterSgroup::toString() const { // std::ostringstream out; out << "id << "\"/>"; return out.str(); } ptree MarvinMulticenterSgroup::toPtree() const { ptree out = MarvinMolBase::toPtree(); out.put(".atomRefs", boost::algorithm::join(getAtomList(), " ")); out.put(".center", center->id); return out; } MarvinGenericSgroup::MarvinGenericSgroup(MarvinMolBase *parentInit) { PRECONDITION(parentInit != nullptr, "parentInit cannot be null"); parent = parentInit; } MarvinGenericSgroup::MarvinGenericSgroup(MarvinMolBase *parentInit, ptree &molTree) { PRECONDITION(parentInit != nullptr, "parentInit cannot be null"); parent = parentInit; this->id = molTree.get(".id", ""); if (this->id == "") { throw FileParseException("Expected a iD in MRV file"); } this->molID = molTree.get(".molID", ""); if (this->molID == "") { throw FileParseException("Expected a molID in MRV file"); } std::string atomRefsStr = molTree.get(".atomRefs", ""); if (atomRefsStr == "") { throw FileParseException( "Expected atomRefs for a GenericSgroup definition in MRV file"); } std::vector atomList; boost::algorithm::split(atomList, atomRefsStr, boost::algorithm::is_space()); for (auto &it : atomList) { auto atomPtr = this->parent->findAtomByRef(it); if (atomPtr == nullptr) { throw FileParseException( "AtomRef specification for a GenericSgroup definition was not found in any parent"); } this->atoms.push_back(atomPtr); } this->charge = molTree.get(".charge", ""); if (this->charge != "onAtoms" && this->charge != "onBracket") { throw FileParseException( "Expected omAtoms or onBracket for a charge attr of a GenericSgroup definition in MRV file"); } } std::string MarvinGenericSgroup::role() const { return "GenericSgroup"; } bool MarvinGenericSgroup::hasAtomBondBlocks() const { return false; } MarvinMolBase *MarvinGenericSgroup::copyMol( const std::string &idAppendage) const { auto outSgroup = new MarvinGenericSgroup(this->parent); this->parent->sgroups.emplace_back(outSgroup); outSgroup->molID = this->molID + idAppendage; outSgroup->id = this->id + idAppendage; // the only time this is to be called is when a multiple group above it is // being expanded and the new atoms and bonds have already been made // here we just need to find them and add refs to the new multiple sgroup for (auto atomToCopy : atoms) { auto atomIndex = this->parent->getAtomIndex(atomToCopy->id + idAppendage); if (atomIndex < 0) { throw FileParseException( "unexpected error - new atom not found in parent"); } outSgroup->atoms.push_back(this->parent->atoms[atomIndex]); } return outSgroup; } std::string MarvinGenericSgroup::toString() const { // std::ostringstream out; out << ""; return out.str(); } ptree MarvinGenericSgroup::toPtree() const { ptree out = MarvinMolBase::toPtree(); out.put(".atomRefs", boost::algorithm::join(getAtomList(), " ")); return out; } MarvinMonomerSgroup::MarvinMonomerSgroup(MarvinMolBase *parentInit) { PRECONDITION(parentInit != nullptr, "parentInit cannot be null"); parent = parentInit; } MarvinMonomerSgroup::MarvinMonomerSgroup(MarvinMolBase *parentInit, ptree &molTree) { PRECONDITION(parentInit != nullptr, "parentInit cannot be null"); parent = parentInit; this->id = molTree.get(".id", ""); if (this->id == "") { throw FileParseException("Expected a iD in MRV file"); } this->molID = molTree.get(".molID", ""); if (this->molID == "") { throw FileParseException("Expected a molID in MRV file"); } std::string atomRefsStr = molTree.get(".atomRefs", ""); if (atomRefsStr == "") { throw FileParseException( "Expected atomRefs for a MonomerSgroup definition in MRV file"); } std::vector atomList; boost::algorithm::split(atomList, atomRefsStr, boost::algorithm::is_space()); for (auto &it : atomList) { auto atomPtr = this->parent->findAtomByRef(it); if (atomPtr == nullptr) { throw FileParseException( "AtomRef specification for a MonomerSgroup definition was not found in any parent"); } this->atoms.push_back(atomPtr); } this->title = molTree.get(".title", ""); if (this->title == "") { throw FileParseException( "Expected title for a MonomerSgroup definition in MRV file"); } this->charge = molTree.get(".charge", ""); if (this->charge == "") { throw FileParseException( "Expected omAtoms or onBracket for a charge attr of a MonomerSgroup definition in MRV file"); } } std::string MarvinMonomerSgroup::role() const { return "MonomerSgroup"; } bool MarvinMonomerSgroup::hasAtomBondBlocks() const { return false; } MarvinMolBase *MarvinMonomerSgroup::copyMol( const std::string &idAppendage) const { auto outSgroup = new MarvinMonomerSgroup(this->parent); this->parent->sgroups.emplace_back(outSgroup); outSgroup->molID = this->molID + idAppendage; outSgroup->id = this->id + idAppendage; outSgroup->title = this->title; outSgroup->charge = this->charge; // the only time this is to be called is when a multiple group above it is // being expanded and the new atoms and bonds have already been made // here we just need to find them and add refs to the new multiple sgroup for (auto atomToCopy : atoms) { auto atomIndex = this->parent->getAtomIndex(atomToCopy->id + idAppendage); if (atomIndex < 0) { throw FileParseException( "unexpected error - new atom not found in parent"); } outSgroup->atoms.push_back(this->parent->atoms[atomIndex]); } return outSgroup; } std::string MarvinMonomerSgroup::toString() const { // // std::ostringstream out; out << "" << ""; return out.str(); } ptree MarvinMonomerSgroup::toPtree() const { ptree out = MarvinMolBase::toPtree(); out.put(".atomRefs", boost::algorithm::join(getAtomList(), " ")); out.put(".title", title); out.put(".charge", charge); return out; } MarvinSuperatomSgroupExpanded::MarvinSuperatomSgroupExpanded( MarvinMolBase *parentInit) { PRECONDITION(parentInit != nullptr, "parentInit cannot be null"); parent = parentInit; } MarvinSuperatomSgroupExpanded::MarvinSuperatomSgroupExpanded( MarvinMolBase *parentInit, ptree &molTree) { PRECONDITION(parentInit != nullptr, "parentInit cannot be null"); parent = parentInit; this->id = molTree.get(".id", ""); if (this->id == "") { throw FileParseException("Expected a iD in MRV file"); } this->molID = molTree.get(".molID", ""); if (this->molID == "") { throw FileParseException("Expected a molID in MRV file"); } std::string atomRefs = molTree.get(".atomRefs", ""); if (atomRefs != "") { std::vector atomList; boost::algorithm::split(atomList, atomRefs, boost::algorithm::is_space()); for (auto &it : atomList) { auto atomPtr = this->parent->findAtomByRef(it); if (atomPtr == nullptr) { throw FileParseException( "AtomRef specification for an SuperatomSgroupExpanded group definition was not found in any parent"); } this->atoms.push_back(atomPtr); } } else // must have no atomRefs nor AtomArray - get the atoms from // the parent block - the ones that reference this // superSgroup { for (MarvinAtom *atom : parent->atoms) { if (atom->sgroupRef == this->id) { this->atoms.push_back(atom); } } } this->title = molTree.get(".title", ""); if (this->title == "") { throw FileParseException( "Expected title for a SuperatomSgroupExpanded definition in MRV file"); } } MarvinSuperatomSgroupExpanded::~MarvinSuperatomSgroupExpanded() {} MarvinMolBase *MarvinSuperatomSgroupExpanded::copyMol( const std::string &idAppendage) const { auto outSgroup = new MarvinSuperatomSgroupExpanded(this->parent); this->parent->sgroups.emplace_back(outSgroup); outSgroup->molID = this->molID + idAppendage; outSgroup->id = this->id + idAppendage; outSgroup->title = this->title; auto actualParent = getActualParent(this); // the only time this is to be called is when a multiple group above it is // being expanded and the new atoms and bonds have already been made // here we just need to find them and add refs to the new multiple sgroup for (auto atomToCopy : atoms) { auto atomIndex = actualParent->getAtomIndex(atomToCopy->id + idAppendage); if (atomIndex < 0) { throw FileParseException( "unexpected error - new atom not found in parent"); } outSgroup->atoms.push_back(actualParent->atoms[atomIndex]); } for (auto bondToCopy : bonds) { auto bondIndex = actualParent->getBondIndex(bondToCopy->id + idAppendage); if (bondIndex < 0) { throw FileParseException( "unexpected error - new bond not found in parent"); } outSgroup->bonds.push_back(actualParent->bonds[bondIndex]); } return outSgroup; } std::string MarvinSuperatomSgroupExpanded::toString() const { std::ostringstream out; out << ""; return out.str(); } ptree MarvinSuperatomSgroupExpanded::toPtree() const { ptree out = MarvinMolBase::toPtree(); out.put(".atomRefs", boost::algorithm::join(getAtomList(), " ")); out.put(".title", title); return out; } std::string MarvinSuperatomSgroupExpanded::role() const { return "SuperatomSgroupExpanded"; } bool MarvinSuperatomSgroupExpanded::hasAtomBondBlocks() const { return false; } MarvinSuperatomSgroup::MarvinSuperatomSgroup(MarvinMolBase *parentInit) { PRECONDITION(parentInit != nullptr, "parentInit cannot be null"); parent = parentInit; } MarvinSuperatomSgroup::MarvinSuperatomSgroup(MarvinMolBase *parentInit, ptree &molTree) { PRECONDITION(parentInit != nullptr, "parentInit cannot be null"); parent = parentInit; this->id = molTree.get(".id", ""); if (this->id == "") { throw FileParseException("Expected a iD in MRV file"); } this->molID = molTree.get(".molID", ""); if (this->molID == "") { throw FileParseException("Expected a molID in MRV file"); } this->title = molTree.get(".title", ""); if (this->title == "") { throw FileParseException( "Expected title for a SuperatomSgroup definition in MRV file"); } this->parseAtomsAndBonds(molTree); bool found; try { boost::property_tree::ptree AttachmentPointArrayTree = molTree.get_child("AttachmentPointArray"); found = true; } catch (const std::exception &e) { found = false; } if (found) { for (auto &v : molTree.get_child("AttachmentPointArray")) { std::string bondId = v.second.get(".bond", ""); if (bondId == "") { // this can happen if the attachment point is // not actually used - as in Amino acids that // have non-used crosslink atoms continue; } auto &marvinAttachmentPoint = this->attachmentPoints.emplace_back(new MarvinAttachmentPoint); marvinAttachmentPoint->atom = v.second.get(".atom", ""); marvinAttachmentPoint->order = v.second.get(".order", ""); marvinAttachmentPoint->bond = bondId; if (marvinAttachmentPoint->atom == "" || marvinAttachmentPoint->order == "") { throw FileParseException( "Expected atom, order and bond, for an AttachmentPoint definition in MRV file"); } if (std::find_if(atoms.begin(), atoms.end(), [&marvinAttachmentPoint](auto a) { return a->id == marvinAttachmentPoint->atom; }) == atoms.end()) { throw FileParseException( "Atom specification for an AttachmentPoint definition must be in the parent's atom array in MRV file"); } // bond must be found in the bonds vector if (this->parent->findBondByRef(marvinAttachmentPoint->bond) == nullptr) { throw FileParseException( "Bond specification for an AttachmentPoint definition must be in the bond array in MRV file"); } // the order must be an integer int orderInt; if (!getCleanNumber(marvinAttachmentPoint->order, orderInt)) { throw FileParseException( "Order for an AttachmentPoint definition must be an integer in MRV file"); } } } } MarvinSuperatomSgroup::~MarvinSuperatomSgroup() { this->attachmentPoints.clear(); } std::string MarvinSuperatomSgroup::role() const { return "SuperatomSgroup"; } bool MarvinSuperatomSgroup::hasAtomBondBlocks() const { return true; } MarvinMolBase *MarvinSuperatomSgroup::copyMol(const std::string &) const { PRECONDITION(0, "Internal error: copying a SuperatomSgroup"); } std::string MarvinSuperatomSgroup::toString() const { std::ostringstream out; out << ""; out << ""; for (auto atom : atoms) { out << atom->toString(); } out << ""; out << ""; for (auto bond : bonds) { out << bond->toString(); } out << ""; if (attachmentPoints.size() > 0) { out << ""; for (auto &attachmentPoint : attachmentPoints) { out << attachmentPoint->toString(); } out << ""; } for (auto &sgroup : sgroups) { out << sgroup->toString(); } out << ""; return out.str(); } ptree MarvinSuperatomSgroup::toPtree() const { ptree out = MarvinMolBase::toPtree(); out.put(".title", title); if (attachmentPoints.size() > 0) { ptree attachmentpointArray; for (auto &attachmentPoint : attachmentPoints) { attachmentpointArray.add_child("attachmentPoint", attachmentPoint->toPtree()); } out.put_child("AttachmentPointArray", attachmentpointArray); } MarvinMolBase::addSgroupsToPtree(out); return out; } MarvinMol::MarvinMol() { parent = nullptr; } MarvinMol::MarvinMol(ptree &molTree) { parent = nullptr; // get atoms if this mol is supposed to have them this->parseAtomsAndBonds(molTree); } MarvinMol::~MarvinMol() {} std::string MarvinMol::role() const { return "MarvinMol"; } bool MarvinMol::hasAtomBondBlocks() const { return true; } void MarvinMol::pushOwnedAtom(MarvinAtom *atom) { PRECONDITION(atom, "bad atom"); ownedAtoms.emplace_back(atom); } void MarvinMol::pushOwnedBond(MarvinBond *bond) { PRECONDITION(bond, "bad bond"); ownedBonds.emplace_back(bond); } void MarvinMol::removeOwnedAtom(MarvinAtom *atom) { PRECONDITION(atom != nullptr, "atom cannot be null"); eraseUniquePtr(ownedAtoms, atom); } void MarvinMol::removeOwnedBond(MarvinBond *bond) { PRECONDITION(bond != nullptr, "bond cannot be null"); eraseUniquePtr(ownedBonds, bond); } void MarvinMolBase::cleanUpSgNumbering( int &sgCount, std::map &sgMap) { for (auto &sgroup : this->sgroups) { std::string newId = "sg" + std::to_string(++sgCount); sgMap[sgroup->id] = newId; sgroup->id = newId; sgroup->cleanUpSgNumbering(sgCount, sgMap); } } void MarvinSuperatomSgroup::cleanUpNumberingMolsAtomsBonds( int &molCount, // this is the starting mol count, and receives the // ending mol count - THis is used when // MarvinMol->convertToSuperAtoms is called multiple // times from a RXN int &atomCount, // starting and ending atom count int &bondCount, // starting and ending bond count std::map &sgMap, std::map &atomMap, std::map &bondMap) { // the common part for all classes MarvinMolBase::cleanUpNumberingMolsAtomsBonds(molCount, atomCount, bondCount, sgMap, atomMap, bondMap); // the specific part for this class auto marvinSuperatomSgroup = (MarvinSuperatomSgroup *)this; for (auto &attachmentPoint : marvinSuperatomSgroup->attachmentPoints) { attachmentPoint->atom = atomMap[attachmentPoint->atom]; attachmentPoint->bond = bondMap[attachmentPoint->bond]; // bond is actually in the parent } } void MarvinMolBase::cleanUpNumberingMolsAtomsBonds( int &molCount, // this is the starting mol count, and receives the // ending mol count - THis is used when // MarvinMol->convertToSuperAtoms is called multiple // times from a RXN int &atomCount, // starting and ending atom count int &bondCount, // starting and ending bond count) std::map &sgMap, std::map &atomMap, std::map &bondMap) { // clean up the mol ids, the atomIds and bondIds. make map of the old to // new atom ids and bond ids this->molID = "m" + std::to_string(++molCount); if (this->hasAtomBondBlocks()) { for (auto atomPtr : this->atoms) { std::string newId = "a" + std::to_string(++atomCount); atomMap[atomPtr->id] = newId; atomPtr->id = newId; // fix the sgroupRef if (atomPtr->sgroupRef != "") { atomPtr->sgroupRef = sgMap[atomPtr->sgroupRef]; } } for (auto bondPtr : this->bonds) { std::string newId = "b" + std::to_string(++bondCount); bondMap[bondPtr->id] = newId; bondPtr->id = newId; // fix the bond's references to atoms bondPtr->atomRefs2[0] = atomMap[bondPtr->atomRefs2[0]]; bondPtr->atomRefs2[1] = atomMap[bondPtr->atomRefs2[1]]; } } // Now the sgroups for (auto &sgroup : sgroups) { sgroup->cleanUpNumberingMolsAtomsBonds(molCount, atomCount, bondCount, sgMap, atomMap, bondMap); } } void MarvinMolBase::cleanUpNumbering( int &molCount // this is the starting mol count, and receives the // ending mol count - THis is used when // MarvinMol->convertToSuperAtoms is called multiple // times from a RXN , int &atomCount // starting and ending atom count , int &bondCount // starting and ending bond count , int &sgCount // starting and ending sg count) , std::map &sgMap // map from old sg number to new sg number , std::map &atomMap // map from old atom number to new atom number , std::map &bondMap // map from old bond number to new bond number ) { this->cleanUpSgNumbering(sgCount, sgMap); this->cleanUpNumberingMolsAtomsBonds(molCount, atomCount, bondCount, sgMap, atomMap, bondMap); } bool MarvinMolBase::AnyOverLappingAtoms(const MarvinMolBase *otherMol) const { PRECONDITION(otherMol != nullptr, "otherMol cannot be null"); std::vector firstList = getAtomList(); std::sort(firstList.begin(), firstList.end()); std::vector secondList = otherMol->getAtomList(); std::sort(secondList.begin(), secondList.end()); std::vector intersection; std::set_intersection(firstList.begin(), firstList.end(), secondList.begin(), secondList.end(), back_inserter(intersection)); return intersection.size() > 0; } // the following routine determines if a sgroup role is passive when // expanding in preparation for creating an RDKit mol. sgroups are NOT // passive if they create atoms or bonds in the parent when expanding bool MarvinMolBase::isPassiveRoleForExpansion() const { // some derived classes override this and return false return true; } bool MarvinSuperatomSgroup::isPassiveRoleForExpansion() const { return false; } bool MarvinMultipleSgroup::isPassiveRoleForExpansion() const { return false; } // this routine determines if the sgroup is passive for contraction, whern // coming from an RDKit mol to a Marvin Mol // It is NOT passive if atoms in the parent are removed by contracting it. bool MarvinMolBase::isPassiveRoleForContraction() const { // some derived classes override this and return false return true; } bool MarvinMol::isPassiveRoleForContraction() const { return false; } bool MarvinSuperatomSgroupExpanded::isPassiveRoleForContraction() const { return false; } bool MarvinMultipleSgroup::isPassiveRoleForContraction() const { return false; } void MarvinMolBase::parseMoleculeSpecific(RDKit::RWMol *mol, std::unique_ptr &, int) { PRECONDITION(mol != nullptr, "mol cannot be null"); // default is nothing - most derived classes override this }; void MarvinSuperatomSgroupExpanded::parseMoleculeSpecific( RDKit::RWMol *mol, std::unique_ptr &sgroup, int sequenceId) { PRECONDITION(mol != nullptr, "mol cannot be null"); std::string typ = "SUP"; sgroup.reset(new SubstanceGroup(mol, typ)); sgroup->setProp("index", sequenceId); for (auto atomPtr : this->atoms) { sgroup->addAtomWithIdx(this->parent->getAtomIndex(atomPtr->id)); } for (auto bondPtr : this->bonds) { sgroup->addBondWithIdx(this->parent->getBondIndex(bondPtr->id)); } sgroup->setProp("LABEL", this->title); } void MarvinMultipleSgroup::parseMoleculeSpecific( RDKit::RWMol *mol, std::unique_ptr &sgroup, int sequenceId) { PRECONDITION(mol != nullptr, "mol cannot be null"); // now the MultipleSgroups // note: sequence continues counting from the loop above std::string typ = "MUL"; sgroup.reset(new SubstanceGroup(mol, typ)); sgroup->setProp("index", sequenceId); for (auto atomPtr : this->atoms) { sgroup->addAtomWithIdx(this->parent->getAtomIndex(atomPtr->id)); } for (auto atomPtr : this->parentAtoms) { sgroup->addParentAtomWithIdx(this->parent->getAtomIndex(atomPtr->id)); } // the connection bonds for (auto bondptr : this->bondsToAtomsNotInExpandedGroup) { sgroup->addBondWithIdx(this->parent->getBondIndex(bondptr->id)); } sgroup->setProp("MULT", this->title); } void MarvinSruCoModSgroup::parseMoleculeSpecific( RDKit::RWMol *mol, std::unique_ptr &sgroup, int sequenceId) { PRECONDITION(mol != nullptr, "mol cannot be null"); std::string typ; if (this->role() == "SruSgroup") { typ = "SRU"; } else if (this->role() == "CopolymerSgroup") { typ = "COP"; } else if (this->role() == "ModificationSgroup") { typ = "MOD"; } else { throw FileParseException( "Internal error: unrecognized role in a MarvinSruCoPolSgroup"); } sgroup.reset(new SubstanceGroup(mol, typ)); sgroup->setProp("index", sequenceId); sgroup->setProp("CONNECT", this->connect); for (auto atomPtr : this->atoms) { sgroup->addAtomWithIdx(this->parent->getAtomIndex(atomPtr->id)); } for (auto bondPtr : this->bonds) { sgroup->addBondWithIdx(this->parent->getBondIndex(bondPtr->id)); } sgroup->setProp("LABEL", this->title); } void MarvinDataSgroup::parseMoleculeSpecific( RDKit::RWMol *mol, std::unique_ptr &sgroup, int sequenceId) { PRECONDITION(mol != nullptr, "mol cannot be null"); // Now the data groups std::string typ = "DAT"; sgroup.reset(new SubstanceGroup(mol, typ)); sgroup->setProp("index", sequenceId); for (auto atomPtr : this->atoms) { sgroup->addAtomWithIdx(this->parent->getAtomIndex(atomPtr->id)); } sgroup->setProp("FIELDNAME", this->fieldName); if (this->queryType != "") { sgroup->setProp("QUERYTYPE", this->queryType); } if (this->queryOp != "") { sgroup->setProp("QUERYOP", this->queryOp); } std::ostringstream out; out << std::fixed << std::setw(10) << std::setprecision(4) << this->x << std::fixed << std::setw(10) << std::setprecision(4) << this->y << " DRU ALL 0 0"; sgroup->setProp("FIELDDISP", out.str()); // really not used by RDKIT std::vector fieldDatas; fieldDatas.push_back(this->fieldData); sgroup->setProp("DATAFIELDS", fieldDatas); // The following props are not part of the RDKit structure for MOL // files, but we save them so that we can round-trip the MRV sgroup->setProp("UNITS", this->units); sgroup->setProp("UNITSDISPLAYED", this->unitsDisplayed); sgroup->setProp("CONTEXT", this->context); sgroup->setProp("PLACEMENT", this->placement); sgroup->setProp("X", this->x); sgroup->setProp("Y", this->y); } void MarvinMulticenterSgroup::parseMoleculeSpecific( RDKit::RWMol *, std::unique_ptr &, int) { // the MultiCenter Sgroups // There is really no place to put these in RDKit. We should have // removed these already PRECONDITION( 0, "Internal error: a MarvinMulticenterSgroup has not been removed"); } // the Generic groups void MarvinGenericSgroup::parseMoleculeSpecific( RDKit::RWMol *mol, std::unique_ptr &sgroup, int sequenceId) { PRECONDITION(mol != nullptr, "mol cannot be null"); std::string typ = "GEN"; sgroup.reset(new SubstanceGroup(mol, typ)); sgroup->setProp("index", sequenceId); for (auto atomPtr : this->atoms) { sgroup->addAtomWithIdx(this->parent->getAtomIndex(atomPtr->id)); } // note: there is no place to put the change="onAtoms" flag } // now the MonomerSgroups // note: sequence continues counting from the loop above void MarvinMonomerSgroup::parseMoleculeSpecific( RDKit::RWMol *mol, std::unique_ptr &sgroup, int sequenceId) { PRECONDITION(mol != nullptr, "mol cannot be null"); std::string typ = "MON"; sgroup.reset(new SubstanceGroup(mol, typ)); sgroup->setProp("index", sequenceId); for (auto atomPtr : this->parent->atoms) { int atomIndex = this->getAtomIndex(atomPtr->id); sgroup->addAtomWithIdx(atomIndex); } sgroup->setProp("LABEL", this->title); // Note: RDKit does not have a place for the Bracket information nor // the charge="onAtoms" attr } void moveSgroup(MarvinMolBase *sgroupToMove, MarvinMolBase *newParent, bool removeFromOldParent = true) { PRECONDITION(sgroupToMove, "bad sgroupToMove pointer"); MarvinMolBase *parent = sgroupToMove->parent; PRECONDITION(parent, "bad sgroupToMove parent pointer"); auto moveUniqIter = find_if(sgroupToMove->parent->sgroups.begin(), sgroupToMove->parent->sgroups.end(), [sgroupToMove](std::unique_ptr &sgptr) { return sgptr->id == sgroupToMove->id; }); if (moveUniqIter == sgroupToMove->parent->sgroups.end()) { throw FileParseException( "Unexpected error - sgroup not found in its parent"); } newParent->sgroups.push_back(std::move(*moveUniqIter)); if (removeFromOldParent) { parent->sgroups.erase(moveUniqIter); } sgroupToMove->parent = newParent; } void promoteChild(MarvinMolBase *molToMove) { PRECONDITION(molToMove, "bad molToMove pointer"); moveSgroup(molToMove, molToMove->parent->parent); } void MarvinMultipleSgroup::expandOneMultipleSgroup() { // Mulitplesgroups are handled differently in Marvin and RDKit/mol file // format // // In Marvin, the atoms of the group are indicated, and the "title" // contains the number of replicates In mol files, the atoms of the group // are actually replicated in the atomBlock, and the MUL constructs // indicates the list of ALL such atoms and the list of the original // parent atoms from which the others were replicated. It also indicate // the two bonds from the group atoms to atoms NOT in the group (although // these could be derived // // This routine takes a MarvinMol and prepares it for generation of an // RDKit mol. Each MultipleSgroup is expanded by: // 1) expanding the atom block - the new replicate sets of atoms are // places just after the last atom in the original (parent) set 2) // records the two bonds to atoms NOT in the expanded group - one is // from a "parent" atom and one from a replicated atom. 3) bonds the // parent and replicated sets to each other in a head to tail fashion. // make sure it is not already expanded - this should not happen if (this->isExpanded) { return; } // find the two bonds (or zero bonds) to atoms NOT in the group std::vector originalConnectorAtoms; // the bonds in the parent that were to // outside atoms BEFORE replication std::string connectorAtoms[2]; // working atoms to connect the replicates std::string outsideConnectorAtom; std::vector bondsInGroup; this->bondsToAtomsNotInExpandedGroup.clear(); // find the actual parent - multiple groups can be nested, so the atoms // and bonds of the lower one are really in the grandparent or higher) auto actualParent = getActualParent(this); for (MarvinBond *bondPtr : actualParent->bonds) { bool atom1InSet = (std::find_if(atoms.begin(), atoms.end(), [&bondPtr](auto a) { return a->id == bondPtr->atomRefs2[0]; }) != atoms.end()); bool atom2InSet = (std::find_if(atoms.begin(), atoms.end(), [&bondPtr](auto a) { return a->id == bondPtr->atomRefs2[1]; }) != atoms.end()); if (atom1InSet && atom2InSet) { bondsInGroup.push_back(bondPtr); } else if (atom1InSet) { originalConnectorAtoms.push_back(bondPtr->atomRefs2[0]); this->bondsToAtomsNotInExpandedGroup.push_back(bondPtr); outsideConnectorAtom = bondPtr->atomRefs2[1]; // last one set wins - when done should be // the outside atom for the 2nd bond to // the outside } else if (atom2InSet) { originalConnectorAtoms.push_back(bondPtr->atomRefs2[1]); this->bondsToAtomsNotInExpandedGroup.push_back(bondPtr); outsideConnectorAtom = bondPtr->atomRefs2[0]; // last one set wins - when done should be // the outside atom for the 2nd bond to // the outside } } // should be two bonds to the outside (or zero) if (this->bondsToAtomsNotInExpandedGroup.size() != 2 && this->bondsToAtomsNotInExpandedGroup.size() != 0) { throw FileParseException( "Error - there must be zero or two bonds from the group to atoms not in the group"); } // find the hightest id of an atom in the Group - we will insert // replicated atoms after that one int highestIndex = 0; for (MarvinAtom *atomPtr : this->atoms) { int thisIndex = actualParent->getAtomIndex(atomPtr->id); if (thisIndex > highestIndex) { highestIndex = thisIndex; } } MarvinAtom *lastAtomInGroupPtr = actualParent->atoms[highestIndex]; // ready to make copies of the group int copyCount = std::stoi(this->title); if (this->bondsToAtomsNotInExpandedGroup.size() == 2) { connectorAtoms[1] = originalConnectorAtoms[1]; // this one does NOT have an _# on it } for (int copyIndex = 2; copyIndex <= copyCount; ++copyIndex) // start at 2, we already have the first copy { std::string idAppendage = "_" + this->id + ":" + std::to_string(copyIndex); for (auto parentAtomPtr : this->parentAtoms) { auto copyAtom = new MarvinAtom(*parentAtomPtr, parentAtomPtr->id + idAppendage); actualParent->pushOwnedAtom(copyAtom); if (parentAtomPtr->sgroupRef != "" && !copyAtom->sGroupRefIsSuperatom) { copyAtom->sgroupRef = parentAtomPtr->sgroupRef + idAppendage; } // copy atoms to all parents up to the actual parent (multipleSgroups // in multipleSgroups) for (auto thisParent = this->parent;; thisParent = thisParent->parent) { TEST_ASSERT(thisParent != nullptr); auto insertItr = find(thisParent->atoms.begin(), thisParent->atoms.end(), lastAtomInGroupPtr); if (insertItr != thisParent->atoms.end()) { ++insertItr; // insert after the last one } thisParent->atoms.insert(insertItr, copyAtom); // for multiple groups, also insert in the parentAtoms array - that // multiple sgroup has NOT been done yet if (thisParent->role() == "MultipleSgroup") { auto thisMultipleParent = (MarvinMultipleSgroup *)thisParent; insertItr = find(thisMultipleParent->parentAtoms.begin(), thisMultipleParent->parentAtoms.end(), lastAtomInGroupPtr); if (insertItr != thisMultipleParent->parentAtoms.end()) { ++insertItr; // insert after the last one } thisMultipleParent->parentAtoms.insert(insertItr, copyAtom); } if (thisParent == actualParent) { break; } } lastAtomInGroupPtr = copyAtom; this->atoms.push_back(copyAtom); } // add the bonds for this copy for (auto parentBondPtr : bondsInGroup) { auto copyBond = new MarvinBond(*parentBondPtr, parentBondPtr->id + idAppendage, parentBondPtr->atomRefs2[0] + idAppendage, parentBondPtr->atomRefs2[1] + idAppendage); actualParent->pushOwnedBond(copyBond); actualParent->bonds.push_back(copyBond); } for (int i = this->sgroups.size() - 1; i >= 0; --i) { auto copiedMol = this->sgroups[i]->copyMol(idAppendage); moveSgroup(copiedMol, this->parent); } // add the bond from the last group to the new one if (this->bondsToAtomsNotInExpandedGroup.size() == 2) { connectorAtoms[0] = originalConnectorAtoms[0] + idAppendage; auto connectorBond = new MarvinBond( *this->bondsToAtomsNotInExpandedGroup[0], this->bondsToAtomsNotInExpandedGroup[0]->id + idAppendage, connectorAtoms[0], connectorAtoms[1]); actualParent->pushOwnedBond(connectorBond); actualParent->bonds.push_back(connectorBond); // update connectorAtom[1] for the next pass (or the final bond to the // outside atom) connectorAtoms[1] = originalConnectorAtoms[1] + idAppendage; } } // fix the bond to the second outside atom to reflect that it comes from // the last replicate if (this->bondsToAtomsNotInExpandedGroup.size() == 2) { this->bondsToAtomsNotInExpandedGroup[1]->atomRefs2[0] = outsideConnectorAtom; this->bondsToAtomsNotInExpandedGroup[1]->atomRefs2[1] = connectorAtoms[1]; } // move any sgroups below this one up to the parent - we have already made // copies of it for (int i = this->sgroups.size() - 1; i >= 0; --i) { promoteChild(sgroups[i].get()); } this->isExpanded = true; } void MarvinSuperatomSgroup::convertFromOneSuperAtom() { // the mol-style super atoms are significanTly different than the Marvin // super atoms. The mol style has all atoms and bonds in the main mol, // and parameter lines that indicate the name of the super atom and the // atom indices affected. // // The SuperatomSgroup form can appear in contracted or expanded form in // MRV blocks: // // In the contracted form, the MRV block can have one or more dummy atoms // with the super atom name in the main molecule, and a separate sub-mol // with the atoms and bonds of the superatom. It also can have one or more // separate records called attachmentPoints that specify which atom(s) in // the super atom sub-mol that replaces the dummy atom(s), and also a bond // pointer (in the parent mol). // // In the expanded form, all of the atoms are in the parent molecule, and // the sub-mol only refers to them. The attachment points refer to the // atoms in the parent mol. The MultipleSgroup and SruGroup seem very much // alike. In both, all atoms are in the parent mol, and the sub-mol // refers to a group of atoms in that parent. The SruGroup specifies the // name and also the connection information (head-to-tail, head-head, and // unspecified). The Multiple S-group specifies the report count for the // group // // This routine deals with only the contracted form of the // SuperatomSgroups, and copies the atoms and bonds from the sub=mol to // the parent mol, and deletes the dummy atom form the parent mol. // It also saves the info creates a replacement // MarvinSuperatomSgroupExpanded item. try { // save the name of the superatom auto superatomSgroupExpanded = new MarvinSuperatomSgroupExpanded(this->parent); this->parent->sgroups.push_back( std::unique_ptr(superatomSgroupExpanded)); superatomSgroupExpanded->title = this->title; superatomSgroupExpanded->id = this->id; // the expanded sgroup will replace the contracted one superatomSgroupExpanded->molID = this->molID; // the expanded sgroup will replace the contracted one // find the actual parent - multiple groups can be nested, so the atoms // and bonds of the lower one are really in the grandparent or higher) auto actualParent = getActualParent(this); // remove and delete the dummy atom from the parent. bool coordExist = this->has2dCoords(); auto dummyAtomIter = find_if( actualParent->atoms.begin(), actualParent->atoms.end(), [this](const MarvinAtom *arg) { return arg->sgroupRef == this->id; }); if (dummyAtomIter == actualParent->atoms.end()) { throw FileParseException( "No contracted atom found for a superatomSgroup"); } auto dummyAtomPtr = *dummyAtomIter; // before we move or delete any atoms from the superatomSgroup, Make // sure that the sibling sgroups are handled if a sibling contains the // dummy R atom, then the atoms to be promoted from this sgroup should // be member of the sibling too for (auto &sibling : parent->sgroups) { if (sibling.get() == this) { continue; // not a sibling } auto dummyInSibling = find_if(sibling->atoms.begin(), sibling->atoms.end(), [dummyAtomPtr](const MarvinAtom *arg) { return arg == dummyAtomPtr; }); if (dummyInSibling != sibling->atoms.end()) { sibling->atoms.erase(dummyInSibling); for (auto atom : this->atoms) { sibling->atoms.push_back(atom); } } } // get a list of the bonds that will need to be fixed after the copy of // atoms and bonds std::vector orphanedBonds; RDGeom::Point3D centerOfAttachmentPoints; RDGeom::Point3D centerOfGroup; for (auto orphanedBond : actualParent->bonds) { std::string attachedAtomId = ""; if (orphanedBond->atomRefs2[0] == dummyAtomPtr->id) { attachedAtomId = orphanedBond->atomRefs2[1]; } else if (orphanedBond->atomRefs2[1] == dummyAtomPtr->id) { attachedAtomId = orphanedBond->atomRefs2[0]; } if (attachedAtomId != "") { auto attachmentPoint = find_if( this->attachmentPoints.begin(), this->attachmentPoints.end(), [orphanedBond](const std::unique_ptr &arg) { return arg->bond == orphanedBond->id; }); if (attachmentPoint == this->attachmentPoints.end()) { throw FileParseException( "No attachment point found for bond to the condensed atom in a superatomSgroup"); } orphanedBonds.push_back(attachmentPoint->get()); auto subMolAtomIndex = this->getAtomIndex((*attachmentPoint)->atom); if (subMolAtomIndex < 0) { throw FileParseException( "Attachment atom not found in the superatomsGroup"); } MarvinAtom *attachedAtom = this->atoms[subMolAtomIndex]; if (coordExist) { centerOfAttachmentPoints.x += attachedAtom->x2; centerOfAttachmentPoints.y += attachedAtom->y2; } } } if (coordExist) { RDGeom::Point3D offset; if (orphanedBonds.size() > 0) { centerOfAttachmentPoints.x /= orphanedBonds.size(); centerOfAttachmentPoints.y /= orphanedBonds.size(); offset.x = dummyAtomPtr->x2 - centerOfAttachmentPoints.x; offset.y = dummyAtomPtr->y2 - centerOfAttachmentPoints.y; } else // use the center of all atoms in the group { RDGeom::Point3D centerOfGroup; for (auto atom : this->atoms) { centerOfGroup.x += atom->x2; centerOfGroup.y += atom->y2; } centerOfGroup.x /= this->atoms.size(); centerOfGroup.y /= this->atoms.size(); offset.x = dummyAtomPtr->x2 - centerOfGroup.x; offset.y = dummyAtomPtr->y2 - centerOfGroup.y; } for (auto atom : this->atoms) { atom->x2 += offset.x; atom->y2 += offset.y; } } for (auto thisParent = parent;; thisParent = thisParent->parent) { TEST_ASSERT(thisParent != nullptr); auto dummyInParent = find_if(thisParent->atoms.begin(), thisParent->atoms.end(), [dummyAtomPtr](const MarvinAtom *arg) { return arg == dummyAtomPtr; }); TEST_ASSERT(dummyInParent != thisParent->atoms.end()); thisParent->atoms.erase(dummyInParent); // get rid of the atoms pointer // to the old dummy atom // for multiple groups, also delete it from the parentAtoms array - // that multiple sgroup has NOT been done yet if (thisParent->role() == "MultipleSgroup") { auto thisMultipleParent = (MarvinMultipleSgroup *)thisParent; auto deleteIter = find(thisMultipleParent->parentAtoms.begin(), thisMultipleParent->parentAtoms.end(), dummyAtomPtr); TEST_ASSERT(deleteIter != thisMultipleParent->parentAtoms.end()); thisMultipleParent->parentAtoms.erase(deleteIter); } if (thisParent == actualParent) { break; } } removeOwnedAtom(dummyAtomPtr); // add the atoms and bonds from the super group to the parent for (auto subAtomPtr : this->atoms) { for (auto thisParent = parent;; thisParent = thisParent->parent) { TEST_ASSERT(thisParent != nullptr); thisParent->atoms.push_back(subAtomPtr); if (thisParent->role() == "MultipleSgroup") { auto thisMultipleParent = (MarvinMultipleSgroup *)thisParent; thisMultipleParent->parentAtoms.push_back(subAtomPtr); } if (thisParent == actualParent) { break; } } superatomSgroupExpanded->atoms.push_back(subAtomPtr); // remove the sgroupRef from the atom if it has one (subAtomPtr)->sgroupAttachmentPoint = ""; } for (auto &bond : this->bonds) { actualParent->bonds.push_back(bond); } // process the attachment points - fix the bond that was made wrong by // deleting the dummy atom(s) for (auto &attachmentPoint : this->attachmentPoints) { // find the bond in the parent auto bondIter = find_if(actualParent->bonds.begin(), actualParent->bonds.end(), [&attachmentPoint](const MarvinBond *arg) { return arg->id == attachmentPoint->bond; }); if (bondIter == actualParent->bonds.end()) { throw FileParseException( "Bond specification for an AttachmentPoint definition was not found in the bond array in MRV file"); } superatomSgroupExpanded->bonds.push_back(*bondIter); // one of the two atoms in the bond is NOT in the mol - we deleted the // dummy atom. int atomIndex; for (atomIndex = 0; atomIndex < 2; ++atomIndex) { if (std::find_if(actualParent->atoms.begin(), actualParent->atoms.end(), [&, bondIter, atomIndex](auto a) { return a->id == (*bondIter)->atomRefs2[atomIndex]; }) == actualParent->atoms.end()) { (*bondIter)->atomRefs2[atomIndex] = attachmentPoint->atom; // the attach atom break; } } if (atomIndex == 2) // not found? { std::ostringstream err; err << "Bond " << attachmentPoint->bond.c_str() << " from attachment point did not have a missing atom in MRV file"; throw FileParseException(err.str()); } } this->attachmentPoints.clear(); this->atoms.clear(); this->bonds.clear(); // promote any children sgroups for (auto &childSgroup : this->sgroups) { moveSgroup(childSgroup.get(), actualParent, false); } this->sgroups.clear(); auto sgroupItr = std::find_if(parent->sgroups.begin(), parent->sgroups.end(), [this](std::unique_ptr &sgptr) { return sgptr->id == this->id; }); TEST_ASSERT(sgroupItr != parent->sgroups.end()); parent->sgroups.erase(sgroupItr); } catch (const std::exception &e) { throw; } } void MarvinMulticenterSgroup::processOneMulticenterSgroup() { auto actualParent = getActualParent(this); // delete the bonds to the dummy atom std::vector orphanedBonds; // list of bonds to delete for (auto orphanedBond : actualParent->bonds) { std::string attachedAtomId = ""; if (orphanedBond->atomRefs2[0] == this->center->id || orphanedBond->atomRefs2[1] == this->center->id) { orphanedBonds.push_back(orphanedBond); } } for (auto orphanedBond : orphanedBonds) { auto orphanedBondIter = std::find(actualParent->bonds.begin(), actualParent->bonds.end(), orphanedBond); TEST_ASSERT(orphanedBondIter != actualParent->bonds.end()); actualParent->bonds.erase(orphanedBondIter); actualParent->removeOwnedBond(orphanedBond); } auto centerIter = std::find(actualParent->atoms.begin(), actualParent->atoms.end(), this->center); if (centerIter != actualParent->atoms.end()) // it might already have been deleted by // another multicenter group { auto centerPtr = *centerIter; for (auto thisParent = parent;; thisParent = thisParent->parent) { TEST_ASSERT(thisParent != nullptr); auto parentAtomIter = find( thisParent->atoms.begin(), thisParent->atoms.end(), centerPtr); // get rid of the atoms pointer to the old dummy atom TEST_ASSERT(parentAtomIter != thisParent->atoms.end()); thisParent->atoms.erase(parentAtomIter); // get rid of the atoms pointer // to the old dummy atom if (thisParent->role() == "MultipleSgroup") { auto thisMultipleParent = (MarvinMultipleSgroup *)thisParent; auto deleteIter = find(thisMultipleParent->parentAtoms.begin(), thisMultipleParent->parentAtoms.end(), centerPtr); TEST_ASSERT(deleteIter != thisMultipleParent->parentAtoms.end()); thisMultipleParent->parentAtoms.erase(deleteIter); } if (thisParent == actualParent) { break; } } removeOwnedAtom(centerPtr); } // erase the multicenter group from its parent for (auto thisParent = parent;; thisParent = thisParent->parent) { TEST_ASSERT(thisParent != nullptr); auto sgrupIter = std::find_if(thisParent->sgroups.begin(), thisParent->sgroups.end(), [this](std::unique_ptr &sgptr) { return sgptr->id == this->id; }); TEST_ASSERT(sgrupIter != thisParent->sgroups.end()); thisParent->sgroups.erase(sgrupIter); if (thisParent == actualParent) { break; } } } void MarvinMolBase::prepSgroupsForRDKit() { // this routine recursively fixes the hierarchy of sgroups - some may NOT // actually belong underneath their parent and can be promoted to the // grandparent // first fix all the children std::vector sgroupsMolIdsDone; for (bool allDone = false; !allDone;) // until all at this level are done - but fixing one // child can add sgroups to this level { allDone = true; // until it is not true in the loop below for (auto &childSgroup : this->sgroups) { if (std::find(sgroupsMolIdsDone.begin(), sgroupsMolIdsDone.end(), childSgroup->molID) == sgroupsMolIdsDone.end()) { sgroupsMolIdsDone.push_back(childSgroup->molID); childSgroup->prepSgroupsForRDKit(); allDone = false; break; // have to start the loop over - we might have changed the // vector } } } // if this is top level, quit if (this->parent == nullptr) { return; } // check to see if we can move this sgroup up a level. We can if the // parent is not the root of the tree, and its parent is passive (it does // not change the atoms of the parent when it is processed or expanded, or // if all of its atoms do not belong to the the non-passive parent // if this is already a child of the root, it cannot be moved up if (this->parent->parent != nullptr) { // check this one if (this->parent->isPassiveRoleForExpansion()) { // move the child up promoteChild(this); return; } // check to see if all of its atomRefs of the child are in the parent. // there are 3 possibilities: // 1) all atoms are in the parent - do NOT move the child - it belongs // under the parent 2) NO atoms are in the parent - move the child up // 3) SOME atoms are in the parent - throw an error auto result = this->isSgroupInSetOfAtoms(this->parent->atoms); switch (result) { case SgroupNotInAtomSet: promoteChild(this); return; case SgroupInAtomSet: // do noting here break; case SgroupBothInAndNotInAtomSet: throw FileParseException( "Child sGroup has atoms both in the parent and NOT in the parent"); default: throw FileParseException( "Unexpected error: unrecognized result from isSgroupInSetOfAtoms"); } } // made to here, so the child belongs in the parent. // do any sgroup specific processing processSpecialSgroups(); return; } void MarvinMolBase::processSpecialSgroups() { // by default, nothing is done here. Some subclasses will override this. } void MarvinSuperatomSgroup::processSpecialSgroups() { convertFromOneSuperAtom(); } void MarvinMulticenterSgroup::processSpecialSgroups() { processOneMulticenterSgroup(); } void MarvinMultipleSgroup::processSpecialSgroups() { expandOneMultipleSgroup(); } MarvinMolBase *MarvinSuperatomSgroupExpanded::convertToOneSuperAtom() { // the mol-style super atoms are significantly different than the Marvin // super atoms. The mol style has all atoms and bonds in the main mol, // and parameter lines that indicate the name of the super atom and the // atom indices affected. // // The Marvin has a dummy atom with the super atom name in the main // molecule, and a separate sub-mol with the atoms and bonds of the // superatom. It also has a separate record called attachmentPoint that // atom in the super atom sub=mol that is replaces the dummy atom, and // also a bond pointer and bond order in case the bond order changes. // // Takes information from a MarvinSuperatomSgroupExpanded and converts // the MarvinSuperatomSgroup structure PRECONDITION(this->parent, "invalid parent"); auto actualParent = getActualParent(this); // make a new sub mol auto marvinSuperatomSgroup = new MarvinSuperatomSgroup(this->parent); this->parent->sgroups.push_back( std::unique_ptr(marvinSuperatomSgroup)); std::string newAtomName = "NA_" + this->molID; marvinSuperatomSgroup->molID = this->molID; marvinSuperatomSgroup->title = this->title; marvinSuperatomSgroup->id = this->id; bool coordsExist = has2dCoords(); // we have to check before we add the dummy // atom - it will not have coords (yet) // add the dummy atom into the parent auto dummyParentAtom = new MarvinAtom(); actualParent->pushOwnedAtom(dummyParentAtom); dummyParentAtom->elementType = "R"; dummyParentAtom->id = newAtomName; dummyParentAtom->sgroupRef = marvinSuperatomSgroup->id; dummyParentAtom->sGroupRefIsSuperatom = true; for (auto thisParent = this->parent;; thisParent = thisParent->parent) // do all parents and grandparents // ... to to the actual parent { thisParent->atoms.push_back(dummyParentAtom); if (thisParent->role() == "MultipleSgroup") { ((MarvinMultipleSgroup *)thisParent) ->parentAtoms.push_back(dummyParentAtom); } if (thisParent == actualParent) { break; } } RDGeom::Point3D centerOfGroup; for (auto atom : this->atoms) { atom->sgroupRef = ""; marvinSuperatomSgroup->atoms.push_back(atom); for (auto thisParent = this->parent;; thisParent = thisParent->parent) // do all parents and grandparents // ... to to the actual parent { auto deleteIter = find(thisParent->atoms.begin(), thisParent->atoms.end(), atom); TEST_ASSERT(deleteIter != thisParent->atoms.end()); thisParent->atoms.erase(deleteIter); if (thisParent->role() == "MultipleSgroup") { auto marvinMultipleSgroup = (MarvinMultipleSgroup *)thisParent; auto sgroupIter = find(marvinMultipleSgroup->parentAtoms.begin(), marvinMultipleSgroup->parentAtoms.end(), atom); TEST_ASSERT(sgroupIter != marvinMultipleSgroup->parentAtoms.end()); marvinMultipleSgroup->parentAtoms.erase(sgroupIter); } if (thisParent == actualParent) { break; } } if (coordsExist) // get the center of all atoms in the group - we might // use this if there are no attachment points { centerOfGroup.x += atom->x2; centerOfGroup.y += atom->y2; } } // move the bonds of the group int attachmentPointsAdded = 0; MarvinAtom *atomPtr = nullptr; RDGeom::Point3D centerOfAttachmentPoints; for (auto bond : actualParent->bonds) { bool atom1InGroup = (std::find_if(marvinSuperatomSgroup->atoms.begin(), marvinSuperatomSgroup->atoms.end(), [&bond](auto a) { return a->id == (bond)->atomRefs2[0]; }) != marvinSuperatomSgroup->atoms.end()); bool atom2InGroup = (std::find_if(marvinSuperatomSgroup->atoms.begin(), marvinSuperatomSgroup->atoms.end(), [&bond](auto a) { return a->id == (bond)->atomRefs2[1]; }) != marvinSuperatomSgroup->atoms.end()); if (atom1InGroup && atom2InGroup) { // both are in, so copy the bond marvinSuperatomSgroup->bonds.push_back(bond); } else if (atom1InGroup || atom2InGroup) // one is in so this is a connection point { // fix the bonds to the dummy atom and add an attachment point if (atom1InGroup) { atomPtr = marvinSuperatomSgroup->atoms[marvinSuperatomSgroup->getAtomIndex( (bond)->atomRefs2[0])]; (bond)->atomRefs2[0] = newAtomName; } else { atomPtr = marvinSuperatomSgroup->atoms[marvinSuperatomSgroup->getAtomIndex( (bond)->atomRefs2[1])]; (bond)->atomRefs2[1] = newAtomName; } atomPtr->sgroupAttachmentPoint = std::to_string(++attachmentPointsAdded); // add an attachmentPoint structure auto &marvinAttachmentPoint = marvinSuperatomSgroup->attachmentPoints.emplace_back( new MarvinAttachmentPoint); marvinAttachmentPoint->atom = atomPtr->id; marvinAttachmentPoint->bond = bond->id; marvinAttachmentPoint->order = std::to_string(attachmentPointsAdded); if (coordsExist) { centerOfAttachmentPoints.x += atomPtr->x2; centerOfAttachmentPoints.y += atomPtr->y2; } } } // now remove the bonds that were moved to the superGroup from the parent for (auto bond : marvinSuperatomSgroup->bonds) { int index = actualParent->getBondIndex(bond->id); TEST_ASSERT(index >= 0 && static_cast(index) < actualParent->bonds.size()); actualParent->bonds.erase(actualParent->bonds.begin() + index); } if (coordsExist) { if (marvinSuperatomSgroup->attachmentPoints.size() > 0) // Any attachment points? if so we use the center of the // attached atoms in the supergroup { // put the new dummy atom at the center of the removed group dummyParentAtom->x2 = centerOfAttachmentPoints.x / marvinSuperatomSgroup->attachmentPoints.size(); dummyParentAtom->y2 = centerOfAttachmentPoints.y / marvinSuperatomSgroup->attachmentPoints.size(); } else { // No attachments to the supergroup - (probably all atoms in the mol // are in the supergroup) -use the center of all atoms in the group dummyParentAtom->x2 = centerOfGroup.x / marvinSuperatomSgroup->atoms.size(); dummyParentAtom->y2 = centerOfGroup.y / marvinSuperatomSgroup->atoms.size(); } } // move any children from the old expanded group to the new // superatomSgroup for (auto &sgroupPtr : this->sgroups) { moveSgroup(sgroupPtr.get(), marvinSuperatomSgroup, false); } this->sgroups.clear(); // remove the old expanded superatom sgroup auto sgroupIter = std::find_if(parent->sgroups.begin(), parent->sgroups.end(), [this](std::unique_ptr &sgptr) { return sgptr->id == this->id; }); TEST_ASSERT(sgroupIter != parent->sgroups.end()); parent->sgroups.erase(sgroupIter); // fix up any siblings that contain all the atoms of this group. for (auto &sibling : marvinSuperatomSgroup->parent->sgroups) { if (sibling.get() == marvinSuperatomSgroup) { continue; // not a sibling } // see if the new superatom group has all of its atoms in the sibling. // If to add the dummy atom to the sibling and remove the atoms (there // must be at least one atom in the sibling that is not in the new // superatom, or it would be a child of the superatom already auto isSgroupInSetOfAtomsResult = marvinSuperatomSgroup->isSgroupInSetOfAtoms(sibling->atoms); switch (isSgroupInSetOfAtomsResult) { case SgroupInAtomSet: // remove the group atoms from the sibling and add the dummy atom sibling->atoms.push_back(dummyParentAtom); for (auto atomToRemove : marvinSuperatomSgroup->atoms) { int index = sibling->getAtomIndex(atomToRemove->id); TEST_ASSERT(index >= 0 && static_cast(index) < sibling->atoms.size()); sibling->atoms.erase(sibling->atoms.begin() + index); } break; case SgroupNotInAtomSet: // do nothing break; case SgroupBothInAndNotInAtomSet: throw FileParseException( "a subgroup contains some of the atoms of a superatomSgroup - this should not happen"); } } return marvinSuperatomSgroup; } int MarvinMultipleSgroup::getMatchedOrphanBondIndex( std::string atomIdToCheck, std::vector &bondsToTry, std::vector &orphanedBonds) const { for (auto testBond = bondsToTry.begin(); testBond != bondsToTry.end(); ++testBond) { if (*testBond == nullptr) { continue; } std::string otherAtomId; if ((*testBond)->atomRefs2[0] == atomIdToCheck) { otherAtomId = (*testBond)->atomRefs2[1]; } else if ((*testBond)->atomRefs2[1] == atomIdToCheck) { otherAtomId = (*testBond)->atomRefs2[0]; } else { continue; // bond does not have the atomId } auto testBondIter = find(orphanedBonds.begin(), orphanedBonds.end(), *testBond); if (testBondIter != orphanedBonds.end()) { return testBondIter - orphanedBonds.begin(); } *testBond = nullptr; // try the children int childId = getMatchedOrphanBondIndex(otherAtomId, bondsToTry, orphanedBonds); if (childId >= 0) { return childId; } } return -1; } void MarvinMultipleSgroup::contractOneMultipleSgroup() { // this routine takes the expanded MultipleSgroup (which comes from the // RDKit version, or is ready to produce the RDKit version), and contracts // it // to the Marvin format version. // the replicates are deleted (atoms and bonds), and the two orphaned // bonds are repaired // make sure it is not already contracted - this should not happen if (!this->isExpanded) { return; } // find the actual parent - multiple groups can be nested, so the atoms // and bonds of the lower one are really in the grandparent or higher) auto actualParent = getActualParent(this); // find the atoms to be deleted std::vector atomsToDelete; std::vector bondsToDelete; std::vector orphanedBonds; std::vector sgroupsToDelete; for (MarvinAtom *atomPtr : this->atoms) { // Atoms in the group but NOT in the parent part are to be deleted if (std::find_if(parentAtoms.begin(), parentAtoms.end(), [&, atomPtr](auto a) { return a->id == atomPtr->id; }) == parentAtoms.end()) { atomsToDelete.push_back(atomPtr); } } for (MarvinBond *bondPtr : actualParent->bonds) { bool atom1ToBeDeleted = (std::find_if(atomsToDelete.begin(), atomsToDelete.end(), [&bondPtr](auto a) { return a->id == (bondPtr)->atomRefs2[0]; }) != atomsToDelete.end()); bool atom2ToBeDeleted = (std::find_if(atomsToDelete.begin(), atomsToDelete.end(), [&bondPtr](auto a) { return a->id == (bondPtr)->atomRefs2[1]; }) != atomsToDelete.end()); if (atom1ToBeDeleted && atom2ToBeDeleted) { bondsToDelete.push_back(bondPtr); } else if (atom1ToBeDeleted) { orphanedBonds.push_back(bondPtr); swap(bondPtr->atomRefs2[0], bondPtr->atomRefs2[1]); // make sure the first ref atom is the // orphaned one (still in the mol), and // the second is the atom to be deleted bondsToDelete.push_back( bondPtr); // one of each pair of orphaned bonds will be UNdeleted } else if (atom2ToBeDeleted) { orphanedBonds.push_back(bondPtr); bondsToDelete.push_back( bondPtr); // one of each pair of orphaned bonds will be UNdeleted } } // there must be zero two or four orphaned bonds // zero happens when the whole group is replicated and not chained // two is most common - the replicates being deleted are connected to each // other 4 happens when the parent replicate (not deleted) is in between // two replicates that are deleted. // there are two orphaned bonds on the parent replicate, and two on the // rest of the molecule if (orphanedBonds.size() != 0 && orphanedBonds.size() != 2 && orphanedBonds.size() != 4) { throw FileParseException( "Error: there should be zero, two or four orphaned bonds while contracting a MultipleSgroup"); } // delete any of this group's children that were using the atoms to be // deleted for (auto &childSgroup : sgroups) { if (childSgroup->isSgroupInSetOfAtoms(atomsToDelete) != SgroupNotInAtomSet) { sgroupsToDelete.push_back(childSgroup.get()); } } // now fix the orphaned bonds - The first gets the atoms from the matched // second orphan bond and was NOT removed. the matched second orphaned // bond is deleted while (orphanedBonds.size() > 0) { int matchedOrphanBondIndex = -1; auto orphanedBondToFix = orphanedBonds[0]; orphanedBonds.erase(orphanedBonds.begin()); if (orphanedBonds.size() == 3) // was 4 but the first one was erased { std::vector bondsToTry = bondsToDelete; // copy of bonds to delete auto deleteIter = find(bondsToTry.begin(), bondsToTry.end(), orphanedBondToFix); TEST_ASSERT(deleteIter != bondsToTry.end()); bondsToTry.erase(deleteIter); matchedOrphanBondIndex = this->getMatchedOrphanBondIndex( orphanedBondToFix->atomRefs2[1], bondsToTry, orphanedBonds); if (matchedOrphanBondIndex < 0) { throw FileParseException( "For a MultipleSgroup expansion, the matching orphaned bond was not found"); } } else if (orphanedBonds.size() == 1) { // was 2 but the first one was erased matchedOrphanBondIndex = 0; // only one left - it must be the match } else { throw FileParseException( "For a MultipleSgroup contraction, the orphaned bonds must be 0,2 or 4"); } orphanedBondToFix->atomRefs2[1] = orphanedBonds[matchedOrphanBondIndex] ->atomRefs2[0]; // [0] is the orphaned atom (still in the mol) // undelete the bond which has been fixed (really remove it from the // list of bonds to be delete) auto undeleteIter = find(bondsToDelete.begin(), bondsToDelete.end(), orphanedBondToFix); TEST_ASSERT(undeleteIter != bondsToDelete.end()); bondsToDelete.erase(undeleteIter); // any siblings or children that reference the matched Orphaned bond // must be fixed to reference the retained (fixed) orphan bond for (auto &siblingSgroup : this->parent->sgroups) // note: includes this sgroups, which is // technically NOT a sibling { for (auto &bond : siblingSgroup->bonds) { if (bond->id == orphanedBonds[matchedOrphanBondIndex]->id) { bond = orphanedBondToFix; } } } // it COULD happen that children referenced the matched orphan bond for (auto &childSgroup : this->sgroups) { for (auto childBond : childSgroup->bonds) { if (childBond->id == orphanedBonds[matchedOrphanBondIndex]->id) { childBond = orphanedBondToFix; } } } TEST_ASSERT(matchedOrphanBondIndex >= 0 && static_cast(matchedOrphanBondIndex) < orphanedBonds.size()); orphanedBonds.erase(orphanedBonds.begin() + matchedOrphanBondIndex); }; // if we get here, all the changes to be made have been determined. so // make them for (auto childSgroup : sgroupsToDelete) { auto sgroupIter = std::find_if(sgroups.begin(), sgroups.end(), [childSgroup](std::unique_ptr &sgptr) { return sgptr->id == childSgroup->id; }); TEST_ASSERT(sgroupIter != sgroups.end()); sgroups.erase(sgroupIter); } // remove the atoms for (auto atomPtr : atomsToDelete) { for (auto thisParent = this->parent;; thisParent = thisParent->parent) { // do all parents and grandparents // ... to to the actual parent auto deleteIter = std::find(thisParent->atoms.begin(), thisParent->atoms.end(), atomPtr); TEST_ASSERT(deleteIter != thisParent->atoms.end()); thisParent->atoms.erase(deleteIter); if (thisParent == actualParent) { break; } } // also delete the atoms from this multiple group AND any sibling // sgroups for (auto &siblingSgroup : this->parent->sgroups) { auto siblingAtomPtr = std::find(siblingSgroup->atoms.begin(), siblingSgroup->atoms.end(), atomPtr); if (siblingAtomPtr != siblingSgroup->atoms.end()) { siblingSgroup->atoms.erase(siblingAtomPtr); } } removeOwnedAtom(atomPtr); } atomsToDelete.clear(); // remove the bonds for (MarvinBond *bondPtr : bondsToDelete) { auto deleteIter = std::find(actualParent->bonds.begin(), actualParent->bonds.end(), bondPtr); TEST_ASSERT(deleteIter != actualParent->bonds.end()); actualParent->bonds.erase(deleteIter); actualParent->removeOwnedBond(bondPtr); } bondsToDelete.clear(); this->isExpanded = false; } IsSgroupInAtomSetResult MarvinMolBase::isSgroupInSetOfAtoms( const std::vector &setOfAtoms) const { // superatom sgroups are different - it has an override for this call // if not overridden, types are in the group based on their atoms bool isInGroup = false; bool isNotInGroup = false; for (auto oneAtom : this->atoms) { if (std::find_if(setOfAtoms.begin(), setOfAtoms.end(), [&, oneAtom](auto a) { return a->id == oneAtom->id; }) != setOfAtoms.end()) { isInGroup = true; } else { isNotInGroup = true; } } if (isInGroup && isNotInGroup) { return SgroupBothInAndNotInAtomSet; } if (isInGroup) { return SgroupInAtomSet; } return SgroupNotInAtomSet; } IsSgroupInAtomSetResult MarvinSuperatomSgroup::isSgroupInSetOfAtoms( const std::vector &setOfAtoms) const { // superatom sgroups are different - they are in the set if one of the // condensed atom in the parent is in group auto dummyAtomIter = find_if( parent->atoms.begin(), parent->atoms.end(), [this](const MarvinAtom *arg) { return arg->sgroupRef == this->id; }); if (dummyAtomIter == parent->atoms.end()) { throw FileParseException("No contracted atom found for a superatomSgroup"); } if (std::find_if(setOfAtoms.begin(), setOfAtoms.end(), [&, dummyAtomIter](auto a) { return a->id == (*dummyAtomIter)->id; }) != setOfAtoms.end()) { return SgroupInAtomSet; } return SgroupNotInAtomSet; } void MarvinMolBase::processSgroupsFromRDKit() { // this routine recursively fixes sgroups to be in the heirarchy expected // by Marvin format if (this->isPassiveRoleForContraction()) { return; // passive mols are only leaves of the tree - nothing to do } // see if any siblings should be children of this one MarvinMolBase *molToProcess = this; // the processing MIGHT change the mol from one kind to another // (e.g. MarvinSuperatomSgroupExpanded -> // MarvinSuperatomSgroup) if (this->parent != nullptr) // NOT the top level mol { std::vector sgroupsMolIdsDone; for (bool allDone = false; !allDone;) { // until all at this level are done - but fixing one // child can add sgroups to this level allDone = true; // until it is not true in the loop below for (auto &sibling : this->parent->sgroups) { if (std::find(sgroupsMolIdsDone.begin(), sgroupsMolIdsDone.end(), sibling->molID) != sgroupsMolIdsDone.end()) { continue; // look at the next one to see if it is done } allDone = false; sgroupsMolIdsDone.push_back(sibling->molID); if (sibling.get() == this) { continue; // cannot be a child of itself } // there are 2 possibilities: // 1) all atoms are in this one - make the sibling into a child of // this one 2) SOME atoms are NOT this one - it remains a sibling if (sibling->isSgroupInSetOfAtoms(this->atoms) != SgroupInAtomSet) { continue; } moveSgroup(sibling.get(), this); break; // have to re=start the loop over sgroups - it has just // changed } } std::string thisSgroupRole = this->role(); if (thisSgroupRole == "SuperatomSgroupExpanded") { molToProcess = ((MarvinSuperatomSgroupExpanded *)this)->convertToOneSuperAtom(); } else if (thisSgroupRole == "MultipleSgroup" && ((MarvinMultipleSgroup *)this)->isExpanded == true) { try { ((MarvinMultipleSgroup *)this)->contractOneMultipleSgroup(); } catch (FileParseException &e) { BOOST_LOG(rdErrorLog) << e.what() << std::endl << "The multiple sgroup will be ignored" << std::endl; auto sgroupIter = std::find_if( this->parent->sgroups.begin(), this->parent->sgroups.end(), [this](std::unique_ptr &sgptr) { return sgptr->id == this->id; }); TEST_ASSERT(sgroupIter != this->parent->sgroups.end()); this->parent->sgroups.erase(sgroupIter); return; } } } // now fix all this groups children std::vector childrenSgroupsMolIdsDone; for (bool allDone = false; !allDone;) // until all at this level are done - but fixing one // child can add sgroups to this level { allDone = true; // until it is not true in the loop below for (auto &childSgroup : molToProcess->sgroups) { if (std::find(childrenSgroupsMolIdsDone.begin(), childrenSgroupsMolIdsDone.end(), childSgroup->molID) != childrenSgroupsMolIdsDone.end()) { continue; // this one is done, look at the next one } childrenSgroupsMolIdsDone.push_back(childSgroup->molID); childSgroup->processSgroupsFromRDKit(); allDone = false; break; // have to start the loop over - we might have changed the // vector } } return; } MarvinMolBase *MarvinMol::copyMol(const std::string &) const { PRECONDITION(0, "Internal error: copying a MarvinMol"); } std::string MarvinMol::toString() const { std::ostringstream out; out << ""; out << ""; for (auto atom : atoms) { out << atom->toString(); } out << ""; out << ""; for (auto bond : bonds) { out << bond->toString(); } out << ""; for (auto &sgroup : sgroups) { out << sgroup->toString(); } out << ""; return out.str(); } ptree MarvinMol::toPtree() const { ptree out = MarvinMolBase::toPtree(); MarvinMolBase::addSgroupsToPtree(out); return out; } std::string MarvinMol::generateMolString() { std::ostringstream out; out << "" ""; out << toString(); out << ""; return out.str(); } ptree MarvinMol::toMolPtree() const { ptree out; out.put("cml..xmlns", "http://www.chemaxon.com"); out.put("cml..xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance"); out.put( "cml..xsi:schemaLocation", "http://www.chemaxon.com http://www.chemaxon.com/marvin/schema/mrvSchema_20_20_0.xsd"); out.put_child("cml.MDocument.MChemicalStruct.molecule", toPtree()); return out; } MarvinReaction::~MarvinReaction() {} void MarvinReaction::prepSgroupsForRDKit() { // This routine converts all the mols in the rxn to be ready for // conversion to RDKIT mols int molCount = 0, atomCount = 0, bondCount = 0, sgCount = 0; std::map sgMap; std::map atomMap; std::map bondMap; for (auto &reactant : reactants) { reactant->prepSgroupsForRDKit(); reactant->cleanUpNumbering(molCount, atomCount, bondCount, sgCount, sgMap, atomMap, bondMap); } for (auto &agent : agents) { agent->prepSgroupsForRDKit(); agent->cleanUpNumbering(molCount, atomCount, bondCount, sgCount, sgMap, atomMap, bondMap); } for (auto &product : products) { product->prepSgroupsForRDKit(); product->cleanUpNumbering(molCount, atomCount, bondCount, sgCount, sgMap, atomMap, bondMap); } } std::string MarvinReaction::toString() { std::ostringstream out; out << "" ""; out << ""; for (auto &reactant : reactants) { out << reactant->toString(); } out << ""; out << ""; for (auto &agent : agents) { out << agent->toString(); } out << ""; out << ""; for (auto &product : products) { out << product->toString(); } out << ""; out << arrow.toString(); out << ""; for (auto &pluse : pluses) { out << pluse->toString(); } for (auto &condition : conditions) { out << condition->toString(); } out << ""; return out.str(); } ptree MarvinReaction::toPtree() const { ptree out; out.put("cml..xmlns", "http://www.chemaxon.com"); out.put("cml..xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance"); out.put( "cml..xsi:schemaLocation", "http://www.chemaxon.com http://www.chemaxon.com/marvin/schema/mrvSchema_20_20_0.xsd"); for (auto &reactant : reactants) { out.add_child( "cml.MDocument.MChemicalStruct.reaction.reactantList.molecule", reactant->toPtree()); } for (auto &agent : agents) { out.add_child("cml.MDocument.MChemicalStruct.reaction.agentList.molecule", agent->toPtree()); } for (auto &product : products) { out.add_child("cml.MDocument.MChemicalStruct.reaction.productList.molecule", product->toPtree()); } out.put_child("cml.MDocument.MChemicalStruct.reaction.arrow", arrow.toPtree()); for (auto &plus : pluses) { out.add_child("cml.MDocument.MReactionSign", plus->toPtree()); } for (auto &condition : conditions) { out.add_child("cml.MDocument.MTextBox", condition->toPtree()); } return out; } MarvinRectangle::MarvinRectangle(double left, double right, double top, double bottom) { upperLeft.x = left; upperLeft.y = top; lowerRight.x = right; lowerRight.y = bottom; centerIsStale = true; } MarvinRectangle::MarvinRectangle(const RDGeom::Point3D &upperLeftInit, const RDGeom::Point3D &lowerRightInit) { upperLeft = upperLeftInit; lowerRight = lowerRightInit; centerIsStale = true; } MarvinRectangle::MarvinRectangle(const std::vector &atoms) { centerIsStale = true; if (atoms.size() == 0) { return; } upperLeft.x = DBL_MAX; upperLeft.y = -DBL_MAX; lowerRight.x = -DBL_MAX; lowerRight.y = DBL_MAX; for (auto atom : atoms) { upperLeft.x = std::min(upperLeft.x, atom->x2); lowerRight.x = std::max(lowerRight.x, atom->x2); upperLeft.y = std::max(upperLeft.y, atom->y2); lowerRight.y = std::min(lowerRight.y, atom->y2); } } MarvinRectangle::MarvinRectangle(const std::vector &rects) { centerIsStale = true; if (rects.size() == 0) { return; } upperLeft.x = DBL_MAX; upperLeft.y = -DBL_MAX; lowerRight.x = -DBL_MAX; lowerRight.y = DBL_MAX; for (auto rect : rects) { this->extend(rect); } } void MarvinRectangle::extend(const MarvinRectangle &otherRectangle) { if (otherRectangle.upperLeft.x < upperLeft.x) { upperLeft.x = otherRectangle.upperLeft.x; } if (otherRectangle.lowerRight.x > lowerRight.x) { lowerRight.x = otherRectangle.lowerRight.x; } if (otherRectangle.upperLeft.y > upperLeft.y) { upperLeft.y = otherRectangle.upperLeft.y; } if (otherRectangle.lowerRight.y < lowerRight.y) { lowerRight.y = otherRectangle.lowerRight.y; } centerIsStale = true; } RDGeom::Point3D &MarvinRectangle::getCenter() { if (centerIsStale) { center.x = (lowerRight.x + upperLeft.x) / 2.0; center.y = (lowerRight.y + upperLeft.y) / 2.0; centerIsStale = false; } return center; } bool MarvinRectangle::overlapsVertically( const MarvinRectangle &otherRectangle) const { if (otherRectangle.upperLeft.y < lowerRight.y || otherRectangle.lowerRight.y > upperLeft.y) { return false; } return true; } bool MarvinRectangle::overlapsVHorizontally( const MarvinRectangle &otherRectangle) const { if (otherRectangle.upperLeft.x > lowerRight.x || otherRectangle.lowerRight.x < upperLeft.x) { return false; } return true; } bool MarvinRectangle::compareRectanglesByX(MarvinRectangle &r1, MarvinRectangle &r2) { return (r1.getCenter().x < r2.getCenter().x); } bool MarvinRectangle::compareRectanglesByYReverse(MarvinRectangle &r1, MarvinRectangle &r2) { return (r1.getCenter().y > r2.getCenter().y); } MarvinStereoGroup::MarvinStereoGroup(StereoGroupType grouptypeInit, int groupNumberInit) { groupType = grouptypeInit; groupNumber = groupNumberInit; } } // namespace RDKit