// // Copyright (C) 2018-2021 Greg Landrum // // @@ All Rights Reserved @@ // This file is part of the RDKit. // The contents are covered by the terms of the BSD license // which is included in the file license.txt, found at the root // of the RDKit source tree. // #ifdef _MSC_VER #pragma warning(disable : 4503) #endif #include #include #include #include #include #include #include #include #include #include #include #include #include using namespace Queries; #include #include #include #include // make sure we're using boost::json header-only #define BOOST_JSON_NO_LIB #ifndef BOOST_CONTAINER_NO_LIB #define BOOST_CONTAINER_NO_LIB #endif #include #include // only include this once in the project! #include namespace bj = boost::json; namespace RDKit { namespace MolInterchange { namespace { struct DefaultValueCache { DefaultValueCache(const bj::value &defs) : bjDefaults(defs){}; const bj::value &bjDefaults; mutable std::map intMap; mutable std::map boolMap; mutable std::map stringMap; int getInt(const char *key) const { PRECONDITION(key, "no key"); const auto &lookup = intMap.find(key); if (lookup != intMap.end()) { return lookup->second; } if (const auto fobj = bjDefaults.if_object()) { if (const auto kit = fobj->find(key); kit != fobj->end()) { const auto val = kit->value().if_int64(); if (!val) { throw FileParseException(std::string("Bad format: value of ") + std::string(key) + std::string(" is not an int")); } auto res = static_cast(*val); intMap[key] = res; return res; } } return 0; } bool getBool(const char *key) const { PRECONDITION(key, "no key"); const auto &lookup = boolMap.find(key); if (lookup != boolMap.end()) { return lookup->second; } if (const auto fobj = bjDefaults.if_object()) { if (const auto kit = fobj->find(key); kit != fobj->end()) { const auto val = kit->value().if_bool(); if (!val) { throw FileParseException(std::string("Bad format: value of ") + std::string(key) + std::string(" is not a bool")); } bool res = *val; boolMap[key] = res; return res; } } return false; } std::string getString(const char *key) const { PRECONDITION(key, "no key"); const auto &lookup = stringMap.find(key); if (lookup != stringMap.end()) { return lookup->second; } if (const auto fobj = bjDefaults.if_object()) { if (const auto kit = fobj->find(key); kit != fobj->end()) { const auto val = kit->value().if_string(); if (!val) { throw FileParseException(std::string("Bad format: value of ") + std::string(key) + std::string(" is not a string")); } auto res = val->c_str(); stringMap[key] = res; return res; } } return ""; } }; int getIntDefaultValue(const char *key, const bj::value &from, const DefaultValueCache &defaults) { PRECONDITION(key, "no key"); if (const auto fobj = from.if_object()) { if (const auto kit = fobj->find(key); kit != fobj->end()) { const auto val = kit->value().if_int64(); if (!val) { throw FileParseException(std::string("Bad format: value of ") + std::string(key) + std::string(" is not an int")); } return static_cast(*val); } } return defaults.getInt(key); } bool getBoolDefaultValue(const char *key, const bj::value &from, const DefaultValueCache &defaults) { PRECONDITION(key, "no key"); if (const auto fobj = from.if_object()) { if (const auto kit = fobj->find(key); kit != fobj->end()) { const auto val = kit->value().if_bool(); if (!val) { throw FileParseException(std::string("Bad format: value of ") + std::string(key) + std::string(" is not a bool")); } return *val; } } return defaults.getBool(key); } std::string getStringDefaultValue(const char *key, const bj::value &from, const DefaultValueCache &defaults) { PRECONDITION(key, "no key"); if (const auto fobj = from.if_object()) { if (const auto kit = fobj->find(key); kit != fobj->end()) { const auto val = kit->value().if_string(); if (!val) { throw FileParseException(std::string("Bad format: value of ") + std::string(key) + std::string(" is not a string")); } return val->c_str(); } } return defaults.getString(key); } void readAtom(RWMol *mol, const bj::value &atomVal, const DefaultValueCache &atomDefaults, const JSONParseParameters ¶ms) { PRECONDITION(mol, "no mol"); std::string stereo = getStringDefaultValue("stereo", atomVal, atomDefaults); auto stereoVal = chilookup.find(stereo); if (stereoVal == chilookup.end()) { throw FileParseException("Bad Format: bad stereo value for atom"); } std::unique_ptr at( new Atom(getIntDefaultValue("z", atomVal, atomDefaults))); if (params.useHCounts) { at->setNoImplicit(true); at->setNumExplicitHs(getIntDefaultValue("impHs", atomVal, atomDefaults)); } at->setFormalCharge(getIntDefaultValue("chg", atomVal, atomDefaults)); at->setNumRadicalElectrons(getIntDefaultValue("nRad", atomVal, atomDefaults)); at->setIsotope(getIntDefaultValue("isotope", atomVal, atomDefaults)); at->setChiralTag(stereoVal->second); bool updateLabel = false, takeOwnership = true; mol->addAtom(at.release(), updateLabel, takeOwnership); } void readBond(RWMol *mol, const bj::value &bondVal, const DefaultValueCache &bondDefaults, bool &needStereoLoop) { PRECONDITION(mol, "no mol"); unsigned int bo = getIntDefaultValue("bo", bondVal, bondDefaults); auto bondOrder = bolookup.find(bo); if (bondOrder == bolookup.end()) { throw FileParseException("Bad Format: bad bond order for bond"); } const auto &aids = bondVal.at("atoms").as_array(); std::unique_ptr bnd(new Bond()); bnd->setBeginAtomIdx(static_cast(aids.at(0).as_int64())); bnd->setEndAtomIdx(static_cast(aids.at(1).as_int64())); bnd->setBondType(bondOrder->second); bool takeOwnership = true; mol->addBond(bnd.release(), takeOwnership); std::string stereo = getStringDefaultValue("stereo", bondVal, bondDefaults); if (stereo != "unspecified") { needStereoLoop = true; } } template void parseProperties(T &obj, const bj::value &propsVal) { for (const auto &propVal : propsVal.as_object()) { if (propVal.value().is_int64()) { obj.setProp(propVal.key(), static_cast(propVal.value().as_int64())); } else if (propVal.value().is_double()) { obj.setProp(propVal.key(), propVal.value().as_double()); } else if (propVal.value().is_string()) { obj.setProp(propVal.key(), propVal.value().as_string().data()); } } } void readStereoGroups(RWMol *mol, const bj::value &sgVals) { PRECONDITION(mol, "no mol"); std::vector molSGs(mol->getStereoGroups()); for (const auto &sgVal : sgVals.as_array()) { if (!sgVal.as_object().contains("type")) { throw FileParseException("Bad Format: stereogroup does not have a type"); } if (!sgVal.as_object().contains("atoms") && !sgVal.as_object().contains("bonds")) { throw FileParseException( "Bad Format: stereogroup does not have either atoms or bonds"); } if (MolInterchange::stereoGrouplookup.find( sgVal.at("type").as_string().c_str()) == MolInterchange::stereoGrouplookup.end()) { throw FileParseException("Bad Format: bad stereogroup type"); } const auto typ = MolInterchange::stereoGrouplookup.at( sgVal.at("type").as_string().c_str()); unsigned gId = 0; if (typ != StereoGroupType::STEREO_ABSOLUTE && sgVal.as_object().contains("id")) { gId = static_cast(sgVal.at("id").as_int64()); } std::vector atoms; std::vector bonds; if (sgVal.as_object().contains("atoms")) { const auto &aids = sgVal.at("atoms").as_array(); for (const auto &aid : aids) { atoms.push_back( mol->getAtomWithIdx(static_cast(aid.as_int64()))); } } if (sgVal.as_object().contains("bonds")) { const auto &bids = sgVal.at("bonds").as_array(); for (const auto &bid : bids) { bonds.push_back( mol->getBondWithIdx(static_cast(bid.as_int64()))); } } if (!atoms.empty() || !bonds.empty()) { molSGs.emplace_back(typ, std::move(atoms), std::move(bonds), gId); } } mol->setStereoGroups(std::move(molSGs)); } void readSubstanceGroups(RWMol *mol, const bj::value &sgVals) { PRECONDITION(mol, "no mol"); for (const auto &sgVal : sgVals.as_array()) { if (!sgVal.as_object().contains("properties") || !sgVal.at("properties").as_object().contains("TYPE")) { throw FileParseException( "Bad Format: substance group does not have TYPE property"); } auto sgType = sgVal.at("properties").at("TYPE").as_string().c_str(); if (!SubstanceGroupChecks::isValidType(sgType)) { throw FileParseException( (boost::format( "Bad Format: substance group TYPE '%s' not recognized") % sgType) .str()); } SubstanceGroup sg(mol, sgType); parseProperties(sg, sgVal.at("properties")); std::string pval; if (sg.getPropIfPresent("SUBTYPE", pval) && !SubstanceGroupChecks::isValidSubType(pval)) { throw FileParseException( (boost::format( "Bad Format: substance group SUBTYPE '%s' not recognized") % pval) .str()); } if (sg.getPropIfPresent("CONNECT", pval) && !SubstanceGroupChecks::isValidConnectType(pval)) { throw FileParseException( (boost::format( "Bad Format: substance group CONNECT type '%s' not recognized") % pval) .str()); } if (sgVal.as_object().contains("atoms")) { const auto &aids = sgVal.at("atoms").as_array(); std::vector atoms; for (const auto &aid : aids) { atoms.push_back(static_cast(aid.as_int64())); } sg.setAtoms(atoms); } if (sgVal.as_object().contains("bonds")) { const auto &aids = sgVal.at("bonds").as_array(); std::vector bonds; for (const auto &aid : aids) { bonds.push_back(static_cast(aid.as_int64())); } sg.setBonds(bonds); } if (sgVal.as_object().contains("parentAtoms")) { const auto &aids = sgVal.at("parentAtoms").as_array(); std::vector atoms; for (const auto &aid : aids) { atoms.push_back(static_cast(aid.as_int64())); } sg.setParentAtoms(atoms); } if (sgVal.as_object().contains("brackets")) { const auto &brks = sgVal.at("brackets").as_array(); for (const auto &brk : brks) { SubstanceGroup::Bracket bracket; unsigned int idx = 0; for (const auto &pt : brk.as_array()) { const auto &pta = pt.as_array(); if (pta.size() != 3) { throw FileParseException( "Bad Format: bracket point doesn't have three coordinates"); } RDGeom::Point3D loc(pta[0].as_double(), pta[1].as_double(), pta[2].as_double()); bracket[idx++] = std::move(loc); } sg.getBrackets().push_back(std::move(bracket)); } } if (sgVal.as_object().contains("cstates")) { const auto &cstats = sgVal.at("cstates").as_array(); for (const auto &cstat : cstats) { SubstanceGroup::CState cstate; cstate.bondIdx = static_cast(cstat.at("bond").as_int64()); if (cstat.as_object().contains("vector")) { const auto &pta = cstat.at("vector").as_array(); if (pta.size() != 3) { throw FileParseException( "Bad Format: cstate vector doesn't have three coordinates"); } RDGeom::Point3D loc(pta[0].as_double(), pta[1].as_double(), pta[2].as_double()); cstate.vector = std::move(loc); } sg.getCStates().push_back(std::move(cstate)); } } if (sgVal.as_object().contains("attachPoints")) { const auto &aps = sgVal.at("attachPoints").as_array(); for (const auto &ap : aps) { SubstanceGroup::AttachPoint attach; attach.aIdx = static_cast(ap.at("aIdx").as_int64()); if (ap.as_object().contains("lvIdx")) { attach.lvIdx = static_cast(ap.at("lvIdx").as_int64()); } if (ap.as_object().contains("id")) { attach.id = ap.at("id").as_string().c_str(); } sg.getAttachPoints().push_back(std::move(attach)); } } addSubstanceGroup(*mol, sg); } } void readBondStereo(Bond *bnd, const bj::value &bondVal, const DefaultValueCache &bondDefaults) { PRECONDITION(bnd, "no bond"); std::string stereo = getStringDefaultValue("stereo", bondVal, bondDefaults); if (stereo == "unspecified") { return; } if (stereoBondlookup.find(stereo) == stereoBondlookup.end()) { throw FileParseException("Bad Format: bond stereo value for bond"); } if (bondVal.as_object().contains("stereoAtoms")) { const auto &aids = bondVal.at("stereoAtoms").as_array(); bnd->setStereoAtoms(static_cast(aids[0].as_int64()), static_cast(aids[1].as_int64())); } else if (stereo == "cis" || stereo == "trans") { throw FileParseException( "Bad Format: bond stereo provided without stereoAtoms"); } bnd->setStereo(stereoBondlookup.find(stereo)->second); } // namespace void readConformer(Conformer *conf, const bj::value &confVal) { PRECONDITION(conf, "no conformer"); if (!confVal.as_object().contains("dim")) { throw FileParseException("Bad Format: no conformer dimension"); } size_t dim = static_cast(confVal.at("dim").as_int64()); if (dim == 2) { conf->set3D(false); } else if (dim == 3) { conf->set3D(true); } else { throw FileParseException("Bad Format: conformer dimension != 2 or 3"); } if (!confVal.as_object().contains("coords")) { throw FileParseException("Bad Format: no conformer coords"); } size_t idx = 0; for (const auto &ptVal : confVal.at("coords").as_array()) { const auto &arr = ptVal.as_array(); if (arr.size() != dim) { throw FileParseException("coordinate contains wrong number of values"); } RDGeom::Point3D pt(arr[0].as_double(), arr[1].as_double(), (dim == 3 ? arr[2].as_double() : 0.0)); conf->setAtomPos(idx++, pt); } if (idx != conf->getNumAtoms()) { throw FileParseException( "Bad Format: conformer doesn't contain coordinates for all atoms"); } } void readPartialCharges(RWMol *mol, const bj::value &repVal, const JSONParseParameters &) { PRECONDITION(mol, "no molecule"); PRECONDITION(repVal.at("name").as_string() == std::string("partialCharges"), "bad charges"); if (!repVal.as_object().contains("formatVersion")) { throw FileParseException("Bad Format: missing version"); } if (static_cast(repVal.at("formatVersion").as_int64()) > currentChargeRepresentationVersion) { BOOST_LOG(rdWarningLog) << "partialCharges version " << static_cast(repVal.at("formatVersion").as_int64()) << " too recent. Ignoring it." << std::endl; return; } { if (repVal.as_object().contains("values")) { const auto &values = repVal.at("values").as_array(); if (values.size() != mol->getNumAtoms()) { throw FileParseException( "Bad Format: size of values array != num atoms"); } for (unsigned int idx = 0; idx != mol->getNumAtoms(); ++idx) { const auto &val = values[idx]; if (!val.is_double()) { throw FileParseException("Bad Format: partial charge not double"); } mol->getAtomWithIdx(idx)->setProp(common_properties::_GasteigerCharge, val.as_double()); } } } } void processMol(RWMol *mol, const bj::value &molval, const DefaultValueCache &atomDefaults, const DefaultValueCache &bondDefaults, const JSONParseParameters ¶ms); Query *readQuery(Atom const *owner, const bj::value &repVal, const DefaultValueCache &atomDefaults, const DefaultValueCache &bondDefaults, const JSONParseParameters ¶ms); Query *readQuery(Bond const *owner, const bj::value &repVal, const DefaultValueCache &atomDefaults, const DefaultValueCache &bondDefaults, const JSONParseParameters ¶ms); template Query *readBaseQuery(T const *owner, const bj::value &repVal, const JSONParseParameters &) { PRECONDITION(owner, "no query"); PRECONDITION(repVal.as_object().contains("tag"), "no tag"); int tag = repVal.at("tag").as_int64(); if (!repVal.as_object().contains("descr")) { throw FileParseException("Bad Format: missing query description"); } Query *res = nullptr; switch (tag) { case MolPickler::QUERY_AND: res = new AndQuery(); break; case MolPickler::QUERY_OR: res = new OrQuery(); break; case MolPickler::QUERY_XOR: res = new XOrQuery(); break; case MolPickler::QUERY_EQUALS: res = new EqualityQuery(); static_cast *>(res)->setVal( repVal.at("val").as_int64()); if (repVal.as_object().contains("tol")) { static_cast *>(res)->setTol( repVal.at("tol").as_int64()); } break; case MolPickler::QUERY_GREATER: res = new GreaterQuery(); static_cast *>(res)->setVal( repVal.at("val").as_int64()); if (repVal.as_object().contains("tol")) { static_cast *>(res)->setTol( repVal.at("tol").as_int64()); } break; case MolPickler::QUERY_GREATEREQUAL: res = new GreaterEqualQuery(); static_cast *>(res)->setVal( repVal.at("val").as_int64()); if (repVal.as_object().contains("tol")) { static_cast *>(res)->setTol( repVal.at("tol").as_int64()); } break; case MolPickler::QUERY_LESS: res = new LessQuery(); static_cast *>(res)->setVal( repVal.at("val").as_int64()); if (repVal.as_object().contains("tol")) { static_cast *>(res)->setTol( repVal.at("tol").as_int64()); } break; case MolPickler::QUERY_LESSEQUAL: res = new LessEqualQuery(); static_cast *>(res)->setVal( repVal.at("val").as_int64()); if (repVal.as_object().contains("tol")) { static_cast *>(res)->setTol( repVal.at("tol").as_int64()); } break; case MolPickler::QUERY_NULL: res = new Query(); break; case MolPickler::QUERY_RANGE: res = new RangeQuery(); static_cast *>(res)->setLower( repVal.at("lower").as_int64()); static_cast *>(res)->setUpper( repVal.at("upper").as_int64()); if (repVal.as_object().contains("tol")) { static_cast *>(res)->setTol( repVal.at("tol").as_int64()); } if (repVal.as_object().contains("ends")) { short ends = repVal.at("ends").as_int64(); const unsigned int lowerOpen = 1 << 1; const unsigned int upperOpen = 1; static_cast *>(res)->setEndsOpen( ends & lowerOpen, ends & upperOpen); } break; case MolPickler::QUERY_SET: res = new SetQuery(); if (repVal.as_object().contains("set")) { for (const auto &member : repVal.at("set").as_array()) { static_cast *>(res)->insert( member.as_int64()); } } break; default: throw FileParseException( (boost::format("Bad Format: unknown query tag %s") % tag).str()); } return res; } template void finishQuery(T const *owner, U *res, const bj::value &repVal, const DefaultValueCache &atomDefaults, const DefaultValueCache &bondDefaults, const JSONParseParameters ¶ms) { PRECONDITION(owner, "no owner"); PRECONDITION(res, "no result"); auto descr = repVal.at("descr").as_string().c_str(); res->setDescription(descr); std::string typ; if (repVal.as_object().contains("type")) { typ = repVal.at("type").as_string().c_str(); } if (!typ.empty()) { res->setTypeLabel(typ); } bool negated = false; if (repVal.as_object().contains("negated")) { negated = repVal.at("negated").as_bool(); } res->setNegation(negated); QueryOps::finalizeQueryFromDescription(res, owner); if (repVal.as_object().contains("children")) { for (const auto &child : repVal.at("children").as_array()) { typename U::CHILD_TYPE childq{ readQuery(owner, child, atomDefaults, bondDefaults, params)}; res->addChild(childq); } } } Query *readQuery(Atom const *owner, const bj::value &repVal, const DefaultValueCache &atomDefaults, const DefaultValueCache &bondDefaults, const JSONParseParameters ¶ms) { PRECONDITION(owner, "no owner"); if (!repVal.as_object().contains("tag")) { throw FileParseException("Bad Format: missing atom query tag"); } Query *res = nullptr; int tag = static_cast(repVal.at("tag").as_int64()); if (tag == MolPickler::QUERY_RECURSIVE) { if (!repVal.as_object().contains("subquery")) { throw FileParseException("Bad Format: missing subquery"); } auto *mol = new RWMol(); processMol(mol, repVal.at("subquery"), atomDefaults, bondDefaults, params); res = new RecursiveStructureQuery(mol); } else if (tag == MolPickler::QUERY_ATOMRING) { res = new AtomRingQuery(); static_cast *>(res)->setVal( static_cast(repVal.at("val").as_int64())); if (repVal.as_object().contains("tol")) { static_cast *>(res)->setTol( static_cast(repVal.at("tol").as_int64())); } } else { res = readBaseQuery(owner, repVal, params); } if (res) { finishQuery(owner, res, repVal, atomDefaults, bondDefaults, params); } return res; } Query *readQuery(Bond const *bond, const bj::value &repVal, const DefaultValueCache &atomDefaults, const DefaultValueCache &bondDefaults, const JSONParseParameters ¶ms) { PRECONDITION(bond, "no owner"); if (!repVal.as_object().contains("tag")) { throw FileParseException("Bad Format: missing bond query tag"); } Query *res = nullptr; res = readBaseQuery(bond, repVal, params); if (res) { finishQuery(bond, res, repVal, atomDefaults, bondDefaults, params); } return res; } void readQueries(RWMol *mol, const bj::value &repVal, const DefaultValueCache &atomDefaults, const DefaultValueCache &bondDefaults, const JSONParseParameters ¶ms) { PRECONDITION(mol, "no molecule"); PRECONDITION(repVal.at("name").as_string() == std::string("rdkitQueries"), "bad queries"); if (!repVal.as_object().contains("formatVersion")) { throw FileParseException("Bad Format: missing format_version"); } if (repVal.at("formatVersion").as_int64() > currentQueryRepresentationVersion) { BOOST_LOG(rdWarningLog) << "RDKit query representation format version " << repVal.at("formatVersion").as_int64() << " too recent. Ignoring it." << std::endl; return; } { const auto &miter = repVal.as_object().find("atomQueries"); if (miter != repVal.as_object().end()) { size_t idx = 0; for (const auto &val : miter->value().as_array()) { if (!val.is_object()) { throw FileParseException("Bad Format: atomQuery not object"); } if (!val.as_object().contains("tag")) { // nothing here, continue continue; } if (idx >= mol->getNumAtoms()) { throw FileParseException("too much atom data found"); } auto atom = mol->getAtomWithIdx(idx); CHECK_INVARIANT(atom != nullptr, "no atom"); // we need to replace the current atom with a query atom: QueryAtom qatom(*atom); // that copy created a bunch of query info by default, // but we want to get the info from the JSON, so delete // that: qatom.setQuery(nullptr); mol->replaceAtom(idx, &qatom); atom = mol->getAtomWithIdx(idx); static_cast(atom)->setQuery( readQuery(atom, val, atomDefaults, bondDefaults, params)); ++idx; } } } { const auto &miter = repVal.as_object().find("bondQueries"); if (miter != repVal.as_object().end()) { size_t idx = 0; for (const auto &val : miter->value().as_array()) { if (!val.is_object()) { throw FileParseException("Bad Format: bondQuery not object"); } if (!val.as_object().contains("tag")) { // nothing here, continue continue; } if (idx >= mol->getNumBonds()) { throw FileParseException("too much bond data found"); } auto bond = mol->getBondWithIdx(idx); CHECK_INVARIANT(bond != nullptr, "no bond"); QueryBond qbond(*bond); qbond.setQuery(nullptr); mol->replaceBond(idx, &qbond); bond = mol->getBondWithIdx(idx); static_cast(bond)->setQuery( readQuery(bond, val, atomDefaults, bondDefaults, params)); ++idx; } } } } void readRDKitRepresentation(RWMol *mol, const bj::value &repVal, const JSONParseParameters ¶ms) { PRECONDITION(mol, "no molecule"); PRECONDITION( repVal.at("name").as_string() == std::string("rdkitRepresentation"), "bad representation"); if (!repVal.as_object().contains("formatVersion")) { throw FileParseException("Bad Format: missing format_version"); } if (repVal.at("formatVersion").as_int64() > currentRDKitRepresentationVersion) { BOOST_LOG(rdWarningLog) << "RDKit representation format version " << repVal.at("formatVersion").as_int64() << " too recent. Ignoring it." << std::endl; return; } { const auto &miter = repVal.as_object().find("aromaticAtoms"); if (miter != repVal.as_object().end()) { for (const auto &val : miter->value().as_array()) { if (!val.is_int64()) { throw FileParseException("Bad Format: aromaticAtom not int"); } mol->getAtomWithIdx(val.as_int64())->setIsAromatic(true); } } } { const auto &miter = repVal.as_object().find("aromaticBonds"); if (miter != repVal.as_object().end()) { for (const auto &val : miter->value().as_array()) { if (!val.is_int64()) { throw FileParseException("Bad Format: aromaticBond not int"); } mol->getBondWithIdx(val.as_int64())->setIsAromatic(true); if (params.setAromaticBonds) { mol->getBondWithIdx(val.as_int64())->setBondType(Bond::AROMATIC); } } } } { const auto &miter = repVal.as_object().find("cipRanks"); if (miter != repVal.as_object().end()) { size_t i = 0; for (const auto &val : miter->value().as_array()) { if (!val.is_int64()) { throw FileParseException("Bad Format: ciprank not int"); } mol->getAtomWithIdx(i++)->setProp( common_properties::_CIPRank, static_cast(val.as_int64())); } } } { const auto &miter = repVal.as_object().find("cipCodes"); if (miter != repVal.as_object().end()) { for (const auto &val : miter->value().as_array()) { if (!val.is_array()) { throw FileParseException("Bad Format: CIPCode not string"); } mol->getAtomWithIdx(val.at(0).as_int64()) ->setProp(common_properties::_CIPCode, val.at(1).as_string().c_str()); } } } { const auto &miter = repVal.as_object().find("atomRings"); if (miter != repVal.as_object().end()) { CHECK_INVARIANT(!mol->getRingInfo()->isInitialized(), "rings already initialized"); auto ri = mol->getRingInfo(); ri->initialize(); for (const auto &val : miter->value().as_array()) { if (!val.is_array()) { throw FileParseException("Bad Format: atomRing not array"); } INT_VECT atomRing; INT_VECT bondRing; size_t sz = val.as_array().size(); atomRing.reserve(sz); bondRing.reserve(sz); for (size_t i = 0; i < sz - 1; ++i) { int idx1 = static_cast(val.as_array()[i].as_int64()); int idx2 = static_cast(val.as_array()[i + 1].as_int64()); atomRing.push_back(idx1); const auto &bnd = mol->getBondBetweenAtoms(idx1, idx2); CHECK_INVARIANT(bnd, "no bond found for ring"); bondRing.push_back(bnd->getIdx()); } int idx1 = static_cast(val.as_array()[sz - 1].as_int64()); int idx2 = static_cast(val.as_array()[0].as_int64()); atomRing.push_back(idx1); const auto &bnd = mol->getBondBetweenAtoms(idx1, idx2); CHECK_INVARIANT(bnd, "no bond found for ring"); bondRing.push_back(bnd->getIdx()); ri->addRing(atomRing, bondRing); } } } } void processMol(RWMol *mol, const bj::value &molval, const DefaultValueCache &atomDefaults, const DefaultValueCache &bondDefaults, const JSONParseParameters ¶ms) { if (molval.as_object().contains("name")) { mol->setProp(common_properties::_Name, molval.at("name").as_string().c_str()); } if (!molval.as_object().contains("atoms")) { throw FileParseException("Bad Format: missing atoms in JSON"); } if (!molval.as_object().contains("bonds")) { throw FileParseException("Bad Format: missing bonds in JSON"); } for (const auto &atomVal : molval.at("atoms").as_array()) { readAtom(mol, atomVal, atomDefaults, params); } bool needStereoLoop = false; for (const auto &bondVal : molval.at("bonds").as_array()) { readBond(mol, bondVal, bondDefaults, needStereoLoop); } if (needStereoLoop) { // need to set bond stereo after the bonds are there unsigned int bidx = 0; for (const auto &bondVal : molval.at("bonds").as_array()) { Bond *bnd = mol->getBondWithIdx(bidx++); readBondStereo(bnd, bondVal, bondDefaults); } } if (molval.as_object().contains("stereoGroups")) { readStereoGroups(mol, molval.at("stereoGroups")); } if (molval.as_object().contains("substanceGroups")) { readSubstanceGroups(mol, molval.at("substanceGroups")); } if (params.parseConformers && molval.as_object().contains("conformers")) { for (const auto &confVal : molval.at("conformers").as_array()) { auto *conf = new Conformer(mol->getNumAtoms()); readConformer(conf, confVal); mol->addConformer(conf, true); } } if (params.parseProperties && molval.as_object().contains("properties")) { parseProperties(*mol, molval.at("properties")); } if (molval.as_object().contains("extensions")) { for (const auto &propVal : molval.at("extensions").as_array()) { if (!propVal.as_object().contains("name")) { throw FileParseException( "Bad Format: representation has no name member"); } if (propVal.at("name").as_string() == std::string("rdkitRepresentation")) { readRDKitRepresentation(mol, propVal, params); } else if (propVal.at("name").as_string() == std::string("partialCharges")) { readPartialCharges(mol, propVal, params); } else if (propVal.at("name").as_string() == std::string("rdkitQueries")) { readQueries(mol, propVal, atomDefaults, bondDefaults, params); } } } mol->setProp(common_properties::_StereochemDone, 1); } std::vector> DocToMols( bj::value &doc, const JSONParseParameters ¶ms) { std::vector> res; // some error checking if (!doc.is_object()) { throw FileParseException("Bad Format: JSON should be an object"); } if (doc.as_object().contains("commonchem")) { auto jobj = doc.at("commonchem").if_object(); if (!jobj || !jobj->contains("version")) { throw FileParseException("Bad Format: missing version in JSON"); } if (jobj->at("version").as_int64() != currentMolJSONVersion) { throw FileParseException("Bad Format: bad version in JSON"); } } else if (doc.as_object().contains("rdkitjson")) { if (!doc.at("rdkitjson").is_object() || !doc.at("rdkitjson").as_object().contains("version")) { throw FileParseException("Bad Format: missing version in JSON"); } // FIX: we want to be backwards compatible // Version 10 files can be read by 11, but not vice versa. if (int jsonVersion = static_cast(doc.at("rdkitjson").at("version").as_int64()); jsonVersion > currentRDKitJSONVersion || jsonVersion < 10) { throw FileParseException("Bad Format: bad version in JSON"); } } else { throw FileParseException("Bad Format: missing header in JSON"); } bj::value atomDefaults_; if (doc.as_object().contains("defaults") && doc.at("defaults").as_object().contains("atom")) { atomDefaults_ = doc.at("defaults").at("atom"); if (!atomDefaults_.is_object()) { throw FileParseException("Bad Format: atomDefaults is not an object"); } } const DefaultValueCache atomDefaults(atomDefaults_); bj::value bondDefaults_; if (doc.as_object().contains("defaults") && doc.at("defaults").as_object().contains("bond")) { bondDefaults_ = doc.at("defaults").at("bond"); if (!bondDefaults_.is_object()) { throw FileParseException("Bad Format: bondDefaults is not an object"); } } const DefaultValueCache bondDefaults(bondDefaults_); if (doc.as_object().contains("molecules")) { if (!doc.at("molecules").is_array()) { throw FileParseException("Bad Format: molecules is not an array"); } for (const auto &molval : doc.at("molecules").as_array()) { std::unique_ptr mol(new RWMol()); processMol(mol.get(), molval, atomDefaults, bondDefaults, params); mol->updatePropertyCache(params.strictValenceCheck); mol->setProp(common_properties::_StereochemDone, 1); res.emplace_back(static_cast(mol.release())); } } return res; } } // namespace std::vector> JSONDataStreamToMols( std::istream *inStream, const JSONParseParameters ¶ms) { PRECONDITION(inStream, "no stream"); std::string jsonString((std::istreambuf_iterator(*inStream)), std::istreambuf_iterator()); bj::monotonic_resource mr; bj::value doc = bj::parse(jsonString, &mr); return DocToMols(doc, params); } std::vector> JSONDataToMols( const std::string &jsonBlock, const JSONParseParameters ¶ms) { bj::monotonic_resource mr; bj::value doc = bj::parse(jsonBlock, &mr); return DocToMols(doc, params); } } // namespace MolInterchange } // end of namespace RDKit