// // Copyright (C) 2002-2025 Greg Landrum and other RDKit contributors // // @@ All Rights Reserved @@ // This file is part of the RDKit. // The contents are covered by the terms of the BSD license // which is included in the file license.txt, found at the root // of the RDKit source tree. // #include "SmilesWrite.h" #include "SmilesParseOps.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include // #define VERBOSE_CANON 1 namespace RDKit { namespace SmilesWrite { const int atomicSmiles[] = {0, 5, 6, 7, 8, 9, 15, 16, 17, 35, 53, -1}; bool inOrganicSubset(int atomicNumber) { unsigned int idx = 0; while (atomicSmiles[idx] < atomicNumber && atomicSmiles[idx] != -1) { ++idx; } return atomicSmiles[idx] == atomicNumber; } namespace { std::string getAtomChiralityInfo(const Atom *atom) { auto allowNontet = Chirality::getAllowNontetrahedralChirality(); std::string atString; switch (atom->getChiralTag()) { case Atom::CHI_TETRAHEDRAL_CW: atString = "@@"; break; case Atom::CHI_TETRAHEDRAL_CCW: atString = "@"; break; default: break; } if (atString.empty() && allowNontet) { switch (atom->getChiralTag()) { case Atom::CHI_SQUAREPLANAR: atString = "@SP"; break; case Atom::CHI_TRIGONALBIPYRAMIDAL: atString = "@TB"; break; case Atom::CHI_OCTAHEDRAL: atString = "@OH"; break; default: break; } if (!atString.empty()) { // we added info about non-tetrahedral stereo, so check whether or not // we need to also add permutation info int permutation = 0; if (atom->getChiralTag() > Atom::ChiralType::CHI_OTHER && atom->getPropIfPresent(common_properties::_chiralPermutation, permutation) && !SmilesParseOps::checkChiralPermutation(atom->getChiralTag(), permutation)) { throw ValueErrorException("bad chirality spec"); } else if (permutation) { atString += std::to_string(permutation); } } } return atString; } // ----- // figure out if we need to put a bracket around the atom, // the conditions for this are: // - not in the organic subset // - formal charge specified // - chirality present and writing isomeric smiles // - non-default isotope and writing isomeric smiles // - atom-map information present // - the atom has a nonstandard valence // - bonded to a metal bool atomNeedsBracket(const Atom *atom, const std::string &atString, const SmilesWriteParams ¶ms) { PRECONDITION(atom, "null atom"); auto num = atom->getAtomicNum(); if (!inOrganicSubset(num)) { return true; } if (atom->getFormalCharge()) { return true; } if (params.doIsomericSmiles && (atom->getIsotope() || !atString.empty())) { return true; } if (atom->hasProp(common_properties::molAtomMapNumber)) { return true; } const INT_VECT &defaultVs = PeriodicTable::getTable()->getValenceList(num); int totalValence = atom->getTotalValence(); bool nonStandard = false; if (atom->getNumRadicalElectrons()) { nonStandard = true; } else if ((num == 7 || num == 15) && atom->getIsAromatic() && atom->getNumExplicitHs()) { // another type of "nonstandard" valence is an aromatic N or P with // explicit Hs indicated: nonStandard = true; } else { nonStandard = (totalValence != defaultVs.front() && atom->getTotalNumHs()); } if (nonStandard) { return true; } // check for bonds to a metal if (atom->hasOwningMol()) { for (const auto bond : atom->getOwningMol().atomBonds(atom)) { auto oatom = bond->getOtherAtom(atom); if (QueryOps::isMetal(*oatom)) { return true; } } } return false; } } // namespace std::string GetAtomSmiles(const Atom *atom, const SmilesWriteParams ¶ms) { PRECONDITION(atom, "bad atom"); std::string res; int fc = atom->getFormalCharge(); int num = atom->getAtomicNum(); int isotope = atom->getIsotope(); std::string symb; bool hasCustomSymbol = atom->getPropIfPresent(common_properties::smilesSymbol, symb); if (!hasCustomSymbol) { symb = PeriodicTable::getTable()->getElementSymbol(num); } // check for atomic stereochemistry std::string atString; if (params.doIsomericSmiles) { if (atom->getChiralTag() != Atom::CHI_UNSPECIFIED && !atom->hasProp(common_properties::_brokenChirality)) { atString = getAtomChiralityInfo(atom); } } bool needsBracket = true; if (!hasCustomSymbol && !params.allHsExplicit) { needsBracket = atomNeedsBracket(atom, atString, params); } if (needsBracket) { res += "["; } if (isotope && params.doIsomericSmiles) { res += std::to_string(isotope); } // this was originally only done for the organic subset, // applying it to other atom-types is a fix for Issue 3152751: // Only accept for atom->getAtomicNum() in [5, 6, 7, 8, 14, 15, 16, 33, 34, // 52] if (!params.doKekule && atom->getIsAromatic() && symb[0] >= 'A' && symb[0] <= 'Z') { switch (atom->getAtomicNum()) { case 5: case 6: case 7: case 8: case 14: case 15: case 16: case 33: case 34: case 52: symb[0] -= ('A' - 'a'); } } res += symb; res += atString; if (needsBracket) { unsigned int totNumHs = atom->getTotalNumHs(); if (totNumHs > 0) { res += "H"; if (totNumHs > 1) { res += std::to_string(totNumHs); } } if (fc > 0) { res += "+"; if (fc > 1) { res += std::to_string(fc); } } else if (fc < 0) { if (fc < -1) { res += std::to_string(fc); } else { res += "-"; } } int mapNum; if (atom->getPropIfPresent(common_properties::molAtomMapNumber, mapNum)) { res += ":"; res += std::to_string(mapNum); } res += "]"; } // If the atom has this property, the contained string will // be inserted directly in the SMILES: std::string label; if (atom->getPropIfPresent(common_properties::_supplementalSmilesLabel, label)) { res += label; } return res; } std::string GetBondSmiles(const Bond *bond, const SmilesWriteParams ¶ms, int atomToLeftIdx) { PRECONDITION(bond, "bad bond"); if (atomToLeftIdx < 0) { atomToLeftIdx = bond->getBeginAtomIdx(); } std::string res = ""; bool aromatic = false; if (!params.doKekule && (bond->getBondType() == Bond::SINGLE || bond->getBondType() == Bond::DOUBLE || bond->getBondType() == Bond::AROMATIC)) { if (bond->hasOwningMol()) { auto a1 = bond->getOwningMol().getAtomWithIdx(atomToLeftIdx); auto a2 = bond->getOwningMol().getAtomWithIdx( bond->getOtherAtomIdx(atomToLeftIdx)); if ((a1->getIsAromatic() && a2->getIsAromatic()) && (a1->getAtomicNum() || a2->getAtomicNum())) { aromatic = true; } } else { aromatic = false; } } Bond::BondDir dir = bond->getBondDir(); bond->clearProp(common_properties::_TraversalRingClosureBond); switch (bond->getBondType()) { case Bond::SINGLE: if (dir != Bond::NONE && dir != Bond::UNKNOWN) { switch (dir) { case Bond::ENDDOWNRIGHT: if (params.allBondsExplicit || params.doIsomericSmiles) { res = "\\"; } break; case Bond::ENDUPRIGHT: if (params.allBondsExplicit || params.doIsomericSmiles) { res = "/"; } break; default: if (params.allBondsExplicit) { res = "-"; } break; } } else { // if the bond is marked as aromatic and the two atoms // are aromatic, we need no marker (this arises in kekulized // molecules). // FIX: we should be able to dump kekulized smiles // currently this is possible by removing all // isAromatic flags, but there should maybe be another way if (params.allBondsExplicit) { res = "-"; } else if (aromatic && !bond->getIsAromatic()) { res = "-"; } } break; case Bond::DOUBLE: // see note above if (!aromatic || !bond->getIsAromatic() || params.allBondsExplicit) { res = "="; } break; case Bond::TRIPLE: res = "#"; break; case Bond::QUADRUPLE: res = "$"; break; case Bond::AROMATIC: if (dir != Bond::NONE && dir != Bond::UNKNOWN) { switch (dir) { case Bond::ENDDOWNRIGHT: if (params.allBondsExplicit || params.doIsomericSmiles) { res = "\\"; } break; case Bond::ENDUPRIGHT: if (params.allBondsExplicit || params.doIsomericSmiles) { res = "/"; } break; default: if (params.allBondsExplicit || !aromatic) { res = ":"; } break; } } else if (params.allBondsExplicit || !aromatic) { res = ":"; } break; case Bond::DATIVE: if (atomToLeftIdx >= 0 && bond->getBeginAtomIdx() == static_cast(atomToLeftIdx)) { res = "->"; } else { res = "<-"; } break; default: res = "~"; } return res; } std::string FragmentSmilesConstruct( ROMol &mol, int atomIdx, std::vector &colors, const UINT_VECT &ranks, const SmilesWriteParams ¶ms, std::vector &atomOrdering, std::vector &bondOrdering, const boost::dynamic_bitset<> *atomsInPlay = nullptr, const boost::dynamic_bitset<> *bondsInPlay = nullptr, const std::vector *atomSymbols = nullptr, const std::vector *bondSymbols = nullptr) { PRECONDITION(!atomsInPlay || atomsInPlay->size() >= mol.getNumAtoms(), "bad atomsInPlay"); PRECONDITION(!bondsInPlay || bondsInPlay->size() >= mol.getNumBonds(), "bad bondsInPlay"); PRECONDITION(!atomSymbols || atomSymbols->size() >= mol.getNumAtoms(), "bad atomSymbols"); PRECONDITION(!bondSymbols || bondSymbols->size() >= mol.getNumBonds(), "bad bondSymbols"); if (params.doKekule) { if (atomsInPlay && bondsInPlay) { MolOps::details::KekulizeFragment(static_cast(mol), *atomsInPlay, *bondsInPlay); } else { MolOps::Kekulize(static_cast(mol)); } } Canon::MolStack molStack; // try to prevent excessive reallocation molStack.reserve(mol.getNumAtoms() + mol.getNumBonds()); std::stringstream res; std::map ringClosureMap; int ringIdx, closureVal; if (!params.canonical) { mol.setProp(common_properties::_StereochemDone, 1); } std::list ringClosuresToErase; if (params.canonical && params.doIsomericSmiles) { Canon::canonicalizeEnhancedStereo(mol, &ranks); } Canon::canonicalizeFragment(mol, atomIdx, colors, ranks, molStack, atomsInPlay, bondsInPlay, bondSymbols, params.doIsomericSmiles, params.doRandom); Bond *bond = nullptr; for (auto &mSE : molStack) { switch (mSE.type) { case Canon::MOL_STACK_ATOM: for (auto rclosure : ringClosuresToErase) { ringClosureMap.erase(rclosure); } ringClosuresToErase.clear(); // std::cout << "\t\tAtom: " << mSE.obj.atom->getIdx() << std::endl; if (!atomSymbols) { res << GetAtomSmiles(mSE.obj.atom, params); } else { res << (*atomSymbols)[mSE.obj.atom->getIdx()]; } atomOrdering.push_back(mSE.obj.atom->getIdx()); break; case Canon::MOL_STACK_BOND: bond = mSE.obj.bond; // std::cout << "\t\tBond: " << bond->getIdx() << std::endl; if (!bondSymbols) { res << GetBondSmiles(bond, params, mSE.number); } else { res << (*bondSymbols)[bond->getIdx()]; } bondOrdering.push_back(bond->getIdx()); break; case Canon::MOL_STACK_RING: ringIdx = mSE.number; // std::cout << "\t\tRing: " << ringIdx << std::endl; if (ringClosureMap.count(ringIdx)) { // the index is already in the map -> // we're closing a ring, so grab // the index and then delete the value: closureVal = ringClosureMap[ringIdx]; ringClosuresToErase.push_back(ringIdx); } else { // we're opening a new ring, find the index for it: closureVal = 1; bool done = false; // EFF: there's got to be a more efficient way to do this while (!done) { std::map::iterator mapIt; for (mapIt = ringClosureMap.begin(); mapIt != ringClosureMap.end(); ++mapIt) { if (mapIt->second == closureVal) { break; } } if (mapIt == ringClosureMap.end()) { done = true; } else { closureVal += 1; } } ringClosureMap[ringIdx] = closureVal; } if (closureVal < 10) { res << (char)(closureVal + '0'); } else if (closureVal < 100) { res << '%' << closureVal; } else { // use extension to OpenSMILES res << "%(" << closureVal << ')'; } break; case Canon::MOL_STACK_BRANCH_OPEN: res << "("; break; case Canon::MOL_STACK_BRANCH_CLOSE: res << ")"; break; default: break; } } return res.str(); } } // end of namespace SmilesWrite static bool SortBasedOnFirstElement( const std::pair> &a, const std::pair> &b) { return a.first < b.first; } namespace SmilesWrite { namespace detail { std::string MolToSmiles(const ROMol &mol, const SmilesWriteParams ¶ms, bool doingCXSmiles, bool includeStereoGroups) { if (!mol.getNumAtoms()) { return ""; } PRECONDITION( params.rootedAtAtom < 0 || static_cast(params.rootedAtAtom) < mol.getNumAtoms(), "rootedAtAtom must be less than the number of atoms"); int rootedAtAtom; std::vector fragsRootedAtAtom; std::vector> fragsMolAtomMapping; auto mols = MolOps::getMolFrags(mol, false, nullptr, &fragsMolAtomMapping, false); // we got the mapping between fragments and atoms; repeat that for bonds std::vector> fragsMolBondMapping; boost::dynamic_bitset<> atsPresent(mol.getNumAtoms()); std::vector bondsInFrag; bondsInFrag.reserve(mol.getNumBonds()); for (const auto &atsInFrag : fragsMolAtomMapping) { atsPresent.reset(); bondsInFrag.clear(); for (auto aidx : atsInFrag) { atsPresent.set(aidx); } rootedAtAtom = -1; if (params.rootedAtAtom >= 0 && atsPresent[params.rootedAtAtom]) { rootedAtAtom = params.rootedAtAtom - atsPresent.find_first(); } fragsRootedAtAtom.push_back(rootedAtAtom); for (const auto bnd : mol.bonds()) { if (atsPresent[bnd->getBeginAtomIdx()] && atsPresent[bnd->getEndAtomIdx()]) { bondsInFrag.push_back(bnd->getIdx()); } } fragsMolBondMapping.push_back(bondsInFrag); } std::vector vfragsmi(mols.size()); // for(unsigned i=0; i> allAtomOrdering; std::vector> allBondOrdering; for (unsigned fragIdx = 0; fragIdx < mols.size(); fragIdx++) { ROMol *tmol = mols[fragIdx].get(); // update property cache std::vector atomMapNums(tmol->getNumAtoms(), 0); for (auto atom : tmol->atoms()) { if (params.ignoreAtomMapNumbers) { atomMapNums[atom->getIdx()] = atom->getAtomMapNum(); atom->setAtomMapNum(0); } atom->updatePropertyCache(false); } // clean up the chirality on any atom that is marked as chiral, // but that should not be: if (params.doIsomericSmiles) { tmol->setProp(common_properties::_doIsoSmiles, 1); if (!tmol->hasProp(common_properties::_StereochemDone)) { MolOps::assignStereochemistry(*tmol, params.cleanStereo); } } if (!doingCXSmiles || !includeStereoGroups) { // remove any stereo groups that may be present. Otherwise they will be // used in the canonicalization std::vector noStereoGroups; tmol->setStereoGroups(noStereoGroups); } if (!doingCXSmiles) { // remove any wiggle bonds, unspecified double bond stereochemistry, or // dative bonds (if we aren't doing dative bonds in the standard SMILES) for (auto bond : tmol->bonds()) { if (bond->getBondDir() == Bond::BondDir::UNKNOWN || bond->getBondDir() == Bond::BondDir::EITHERDOUBLE) { bond->setBondDir(Bond::BondDir::NONE); } if (bond->getStereo() == Bond::BondStereo::STEREOANY) { bond->setStereo(Bond::BondStereo::STEREONONE); } } // if other CXSMILES features are added to the canonicalization code // in the future, they should be removed here. } if (doingCXSmiles || !params.includeDativeBonds) { // do not output dative bonds in the SMILES if we are doing CXSmiles (we // output coordinate bonds there) or if the flag is set to ignore them for (auto bond : tmol->bonds()) { if (bond->getBondType() == Bond::DATIVE) { // we are intentionally only handling DATIVE here. The other weird // RDKit dative alternatives really shouldn't ever show up. bond->setBondType(Bond::SINGLE); // update the explicit valence of the begin atom since the implicit // valence will no longer be properly perceived bond->getBeginAtom()->calcExplicitValence(false); } } } // if we are doing CXSMILES, Hydrogen bonds are shown as single bonds // in the smiles part, and are indicated with the H: block of the CX // extensions if (doingCXSmiles) { for (auto bond : tmol->bonds()) { if (bond->getBondType() == Bond::HYDROGEN) { bond->setBondType(Bond::SINGLE); } } } rootedAtAtom = fragsRootedAtAtom[fragIdx]; if (params.doRandom && rootedAtAtom == -1) { // need to find a random atom id between 0 and mol.getNumAtoms() // exclusively rootedAtAtom = getRandomGenerator()() % tmol->getNumAtoms(); } std::string res; unsigned int nAtoms = tmol->getNumAtoms(); std::vector ranks(nAtoms); std::vector atomOrdering; std::vector bondOrdering; if (params.canonical) { const bool breakTies = true; const bool includeChiralPresence = false; const bool includeIsotopes = params.doIsomericSmiles; ; const bool includeChirality = params.doIsomericSmiles; ; const bool includeStereoGroups = params.doIsomericSmiles; ; const bool useNonStereoRanks = false; const bool includeAtomMaps = true; Canon::rankMolAtoms(*tmol, ranks, breakTies, includeChirality, includeIsotopes, includeAtomMaps, includeChiralPresence, includeStereoGroups, useNonStereoRanks); if (params.ignoreAtomMapNumbers) { for (auto atom : tmol->atoms()) { atom->setAtomMapNum(atomMapNums[atom->getIdx()]); } } } else { std::iota(ranks.begin(), ranks.end(), 0); } #ifdef VERBOSE_CANON for (unsigned int tmpI = 0; tmpI < ranks.size(); tmpI++) { std::cout << tmpI << " " << ranks[tmpI] << " " << *(tmol->getAtomWithIdx(tmpI)) << std::endl; } #endif std::vector colors(nAtoms, Canon::WHITE_NODE); int nextAtomIdx = -1; std::string subSmi; // find the next atom for a traverse if (rootedAtAtom >= 0) { nextAtomIdx = rootedAtAtom; rootedAtAtom = -1; } else { unsigned int nextRank = nAtoms + 1; for (unsigned int i = 0; i < nAtoms; i++) { if (colors[i] == Canon::WHITE_NODE && ranks[i] < nextRank) { nextRank = ranks[i]; nextAtomIdx = i; } } } CHECK_INVARIANT(nextAtomIdx >= 0, "no start atom found"); subSmi = SmilesWrite::FragmentSmilesConstruct( *tmol, nextAtomIdx, colors, ranks, params, atomOrdering, bondOrdering); res += subSmi; vfragsmi[fragIdx] = res; for (unsigned int &vit : atomOrdering) { vit = fragsMolAtomMapping[fragIdx][vit]; // Lookup the Id in the // original molecule } allAtomOrdering.push_back(atomOrdering); for (unsigned int &vit : bondOrdering) { vit = fragsMolBondMapping[fragIdx][vit]; // Lookup the Id in the // original molecule } allBondOrdering.push_back(bondOrdering); } std::string result; std::vector flattenedAtomOrdering; flattenedAtomOrdering.reserve(mol.getNumAtoms()); std::vector flattenedBondOrdering; flattenedBondOrdering.reserve(mol.getNumBonds()); if (params.canonical) { // Sort the vfragsmi, but also sort the atom and bond order vectors into // the same order typedef std::tuple, std::vector> tplType; std::vector tmp(vfragsmi.size()); for (unsigned int ti = 0; ti < vfragsmi.size(); ++ti) { tmp[ti] = std::make_tuple(vfragsmi[ti], allAtomOrdering[ti], allBondOrdering[ti]); } std::sort(tmp.begin(), tmp.end()); for (unsigned int ti = 0; ti < vfragsmi.size(); ++ti) { result += std::get<0>(tmp[ti]); if (ti < vfragsmi.size() - 1) { result += "."; } flattenedAtomOrdering.insert(flattenedAtomOrdering.end(), std::get<1>(tmp[ti]).begin(), std::get<1>(tmp[ti]).end()); flattenedBondOrdering.insert(flattenedBondOrdering.end(), std::get<2>(tmp[ti]).begin(), std::get<2>(tmp[ti]).end()); } } else { // Not canonical for (auto &i : allAtomOrdering) { flattenedAtomOrdering.insert(flattenedAtomOrdering.end(), i.begin(), i.end()); } for (auto &i : allBondOrdering) { flattenedBondOrdering.insert(flattenedBondOrdering.end(), i.begin(), i.end()); } for (unsigned i = 0; i < vfragsmi.size(); ++i) { result += vfragsmi[i]; if (i < vfragsmi.size() - 1) { result += "."; } } } mol.setProp(common_properties::_smilesAtomOutputOrder, flattenedAtomOrdering, true); mol.setProp(common_properties::_smilesBondOutputOrder, flattenedBondOrdering, true); return result; } } // namespace detail } // namespace SmilesWrite std::string MolToSmiles(const ROMol &mol, const SmilesWriteParams ¶ms) { bool doingCXSmiles = false; return SmilesWrite::detail::MolToSmiles(mol, params, doingCXSmiles); } std::string MolToCXSmiles(const ROMol &romol, const SmilesWriteParams ¶msInput, std::uint32_t flags, RestoreBondDirOption restoreBondDirs) { RWMol trwmol(romol); bool doingCXSmiles = true; bool includeStereoGroups = flags & SmilesWrite::CXSmilesFields::CX_ENHANCEDSTEREO; SmilesWriteParams params = paramsInput; // if kekule is to be done, and the bond attrs (wedging) is to be done, we // have to do the kekuleization here. Otherwise, kekule happens in the // fragment construction, and the wedges are done on the origin, possiibly // aromatic mol. THis can put wedge bonds on double bonds, which is not valid. if (params.doKekule) { MolOps::Kekulize(trwmol); params.doKekule = false; } auto res = SmilesWrite::detail::MolToSmiles(trwmol, params, doingCXSmiles, includeStereoGroups); if (res.empty()) { return res; } if (restoreBondDirs == RestoreBondDirOptionTrue) { RDKit::Chirality::reapplyMolBlockWedging(trwmol); } else if (restoreBondDirs == RestoreBondDirOptionClear) { for (auto bond : trwmol.bonds()) { if (!canHaveDirection(*bond)) { continue; } // we want to preserve wiggly bond information for discovery by // CXSmilesOps::get_bond_config_block using RDKit::common_properties::_MolFileBondCfg; // this is a wiggly bond if (auto cfg = 0u; bond->getPropIfPresent(_MolFileBondCfg, cfg) && cfg == 2) { bond->setBondDir(Bond::BondDir::UNKNOWN); } else { if (bond->getBondDir() != Bond::BondDir::NONE) { bond->setBondDir(Bond::BondDir::NONE); } bond->clearProp(_MolFileBondCfg); } } } if (!params.doIsomericSmiles) { flags &= ~(SmilesWrite::CXSmilesFields::CX_ENHANCEDSTEREO | SmilesWrite::CXSmilesFields::CX_BOND_CFG); } if (params.cleanStereo) { if (trwmol.needsUpdatePropertyCache()) { trwmol.updatePropertyCache(false); } MolOps::assignStereochemistry(trwmol, true); Chirality::cleanupStereoGroups(trwmol); } auto cxext = SmilesWrite::getCXExtensions(trwmol, flags); if (!cxext.empty()) { res += " " + cxext; } return res; } std::vector MolToRandomSmilesVect( const ROMol &mol, unsigned int numSmiles, unsigned int randomSeed, bool doIsomericSmiles, bool doKekule, bool allBondsExplicit, bool allHsExplicit) { if (randomSeed > 0) { getRandomGenerator(rdcast(randomSeed)); } std::vector res; res.reserve(numSmiles); for (unsigned int i = 0; i < numSmiles; ++i) { bool canonical = false; int rootedAtAtom = -1; bool doRandom = true; res.push_back(MolToSmiles(mol, doIsomericSmiles, doKekule, rootedAtAtom, canonical, allBondsExplicit, allHsExplicit, doRandom)); } return res; }; std::string MolFragmentToSmiles(const ROMol &mol, const SmilesWriteParams ¶ms, const std::vector &atomsToUse, const std::vector *bondsToUse, const std::vector *atomSymbols, const std::vector *bondSymbols) { PRECONDITION(atomsToUse.size(), "no atoms provided"); PRECONDITION( params.rootedAtAtom < 0 || static_cast(params.rootedAtAtom) < mol.getNumAtoms(), "rootedAtomAtom must be less than the number of atoms"); PRECONDITION(params.rootedAtAtom < 0 || std::find(atomsToUse.begin(), atomsToUse.end(), params.rootedAtAtom) != atomsToUse.end(), "rootedAtAtom not found in atomsToUse"); PRECONDITION(!atomSymbols || atomSymbols->size() >= mol.getNumAtoms(), "bad atomSymbols vector"); PRECONDITION(!bondSymbols || bondSymbols->size() >= mol.getNumBonds(), "bad bondSymbols vector"); if (!mol.getNumAtoms()) { return ""; } int rootedAtAtom = params.rootedAtAtom; ROMol tmol(mol, true); if (params.doIsomericSmiles) { tmol.setProp(common_properties::_doIsoSmiles, 1); } std::string res; boost::dynamic_bitset<> atomsInPlay(mol.getNumAtoms(), 0); for (auto aidx : atomsToUse) { atomsInPlay.set(aidx); } // figure out which bonds are actually in play: boost::dynamic_bitset<> bondsInPlay(mol.getNumBonds(), 0); if (bondsToUse) { for (auto bidx : *bondsToUse) { bondsInPlay.set(bidx); } } else { PRECONDITION( params.rootedAtAtom < 0 || MolOps::getMolFrags(mol).size() == 1, "rootedAtAtom can only be used with molecules that have a single fragment"); for (auto aidx : atomsToUse) { for (const auto &bndi : boost::make_iterator_range( mol.getAtomBonds(mol.getAtomWithIdx(aidx)))) { const Bond *bond = mol[bndi]; if (atomsInPlay[bond->getOtherAtomIdx(aidx)]) { bondsInPlay.set(bond->getIdx()); } } } } // copy over the rings that only involve atoms/bonds in this fragment: if (mol.getRingInfo()->isInitialized()) { tmol.getRingInfo()->reset(); tmol.getRingInfo()->initialize(); for (unsigned int ridx = 0; ridx < mol.getRingInfo()->numRings(); ++ridx) { const INT_VECT å = mol.getRingInfo()->atomRings()[ridx]; bool keepIt = true; for (auto aidx : aring) { if (!atomsInPlay[aidx]) { keepIt = false; break; } } if (keepIt) { const INT_VECT &bring = mol.getRingInfo()->bondRings()[ridx]; for (auto bidx : bring) { if (!bondsInPlay[bidx]) { keepIt = false; break; } } if (keepIt) { tmol.getRingInfo()->addRing(aring, bring); } } } } if (tmol.needsUpdatePropertyCache()) { for (auto atom : tmol.atoms()) { atom->updatePropertyCache(false); } } UINT_VECT ranks(tmol.getNumAtoms()); std::vector atomOrdering; std::vector bondOrdering; // clean up the chirality on any atom that is marked as chiral, // but that should not be: if (params.doIsomericSmiles) { if (!mol.hasProp(common_properties::_StereochemDone)) { MolOps::assignStereochemistry(tmol, true); } else { tmol.setProp(common_properties::_StereochemDone, 1); // we need the CIP codes: for (auto aidx : atomsToUse) { const Atom *oAt = mol.getAtomWithIdx(aidx); std::string cipCode; if (oAt->getPropIfPresent(common_properties::_CIPCode, cipCode)) { tmol.getAtomWithIdx(aidx)->setProp(common_properties::_CIPCode, cipCode); } } } } if (params.canonical) { bool breakTies = true; bool includeChiralPresence = false; bool includeRingStereo = true; Canon::rankFragmentAtoms( tmol, ranks, atomsInPlay, bondsInPlay, atomSymbols, bondSymbols, breakTies, params.doIsomericSmiles, params.doIsomericSmiles, !params.ignoreAtomMapNumbers, includeChiralPresence, includeRingStereo); // std::cerr << "RANKS: "; // std::copy(ranks.begin(), ranks.end(), // std::ostream_iterator(std::cerr, " ")); // std::cerr << std::endl; // MolOps::rankAtomsInFragment(tmol,ranks,atomsInPlay,bondsInPlay,atomSymbols,bondSymbols); } else { for (unsigned int i = 0; i < tmol.getNumAtoms(); ++i) { ranks[i] = i; } } #ifdef VERBOSE_CANON for (unsigned int tmpI = 0; tmpI < ranks.size(); tmpI++) { std::cout << tmpI << " " << ranks[tmpI] << " " << *(tmol.getAtomWithIdx(tmpI)) << std::endl; } #endif std::vector colors(tmol.getNumAtoms(), Canon::BLACK_NODE); for (auto aidx : atomsToUse) { colors[aidx] = Canon::WHITE_NODE; } std::vector::iterator colorIt; colorIt = colors.begin(); // loop to deal with the possibility that there might be disconnected // fragments while (colorIt != colors.end()) { int nextAtomIdx = -1; // find the next atom for a traverse if (rootedAtAtom >= 0) { nextAtomIdx = rootedAtAtom; rootedAtAtom = -1; } else { unsigned int nextRank = rdcast(tmol.getNumAtoms()) + 1; for (auto i : atomsToUse) { if (colors[i] == Canon::WHITE_NODE && ranks[i] < nextRank) { nextRank = ranks[i]; nextAtomIdx = i; } } } CHECK_INVARIANT(nextAtomIdx >= 0, "no start atom found"); auto subSmi = SmilesWrite::FragmentSmilesConstruct( tmol, nextAtomIdx, colors, ranks, params, atomOrdering, bondOrdering, &atomsInPlay, &bondsInPlay, atomSymbols, bondSymbols); res += subSmi; colorIt = std::find(colors.begin(), colors.end(), Canon::WHITE_NODE); if (colorIt != colors.end()) { res += "."; } } mol.setProp(common_properties::_smilesAtomOutputOrder, atomOrdering, true); mol.setProp(common_properties::_smilesBondOutputOrder, bondOrdering, true); return res; } // end of MolFragmentToSmiles() std::string MolFragmentToCXSmiles(const ROMol &mol, const SmilesWriteParams ¶ms, const std::vector &atomsToUse, const std::vector *bondsToUse, const std::vector *atomSymbols, const std::vector *bondSymbols) { auto res = MolFragmentToSmiles(mol, params, atomsToUse, bondsToUse, atomSymbols, bondSymbols); auto cxext = SmilesWrite::getCXExtensions(mol); if (!cxext.empty()) { res += " " + cxext; } return res; } } // namespace RDKit