// $Id$ // // Copyright (C) 2002-2012 Greg Landrum and Rational Discovery LLC // // @@ All Rights Reserved @@ // This file is part of the RDKit. // The contents are covered by the terms of the BSD license // which is included in the file license.txt, found at the root // of the RDKit source tree. // #include "SmilesWrite.h" #include #include #include #include #include #include #include #include namespace RDKit{ namespace SmilesWrite{ const int atomicSmiles[] = {5,6,7,8,9,15,16,17,35,53,-1}; bool inOrganicSubset(int atomicNumber){ unsigned int idx=0; while( atomicSmiles[idx]getFormalCharge(); int num = atom->getAtomicNum(); int isotope = atom->getIsotope(); bool needsBracket=false; std::string symb; symb = atom->getSymbol(); if(inOrganicSubset(num)){ // it's a member of the organic subset //if(!doKekule && atom->getIsAromatic() && symb[0] < 'a') symb[0] -= ('A'-'a'); // ----- // figure out if we need to put a bracket around the atom, // the conditions for this are: // - formal charge specified // - the atom has a nonstandard valence // - chirality present and writing isomeric smiles // - non-default isotope and writing isomeric smiles // - atom-map information present const INT_VECT &defaultVs=PeriodicTable::getTable()->getValenceList(num); int totalValence= atom->getExplicitValence()+atom->getImplicitValence(); bool nonStandard; nonStandard = std::find(defaultVs.begin(),defaultVs.end(), totalValence)==defaultVs.end(); // another type of "nonstandard" valence is an aromatic N or P with // explicit Hs indicated: if((num==7||num==15) && atom->getIsAromatic() && atom->getNumExplicitHs()){ nonStandard=true; } if(atom->getNumRadicalElectrons()){ nonStandard=true; } if(fc || nonStandard){ needsBracket=true; } if(atom->getOwningMol().hasProp("_doIsoSmiles")){ if( atom->getChiralTag()!=Atom::CHI_UNSPECIFIED ){ needsBracket = true; } else if(isotope){ needsBracket=true; } } if(atom->hasProp("molAtomMapNumber")){ needsBracket=true; } } else { needsBracket = true; } if( needsBracket ) res << "["; if(isotope && atom->getOwningMol().hasProp("_doIsoSmiles")){ res <getIsAromatic() && symb[0]>='A' && symb[0] <= 'Z'){ symb[0] -= ('A'-'a'); } res << symb; bool chiralityIncluded=false; if(atom->getOwningMol().hasProp("_doIsoSmiles") && atom->getChiralTag()!=Atom::CHI_UNSPECIFIED ){ INT_LIST trueOrder; atom->getProp("_TraversalBondIndexOrder",trueOrder); #ifdef VERBOSE_CANON std::cout << "\tatom: " << atom->getIdx() << " | "; std::copy(trueOrder.begin(),trueOrder.end(), std::ostream_iterator(std::cout,", ")); std::cout << std::endl; std::cout << "\t ---- | " ; ROMol::OEDGE_ITER beg,end; boost::tie(beg,end) = atom->getOwningMol().getAtomBonds(atom); ROMol::GRAPH_MOL_BOND_PMAP::type pMap = atom->getOwningMol().getBondPMap(); while(beg!=end){ std::cout <getIdx()<<", "; ++beg; } std::cout << std::endl; #endif int nSwaps; #if 0 if( !atom->hasProp("_CIPCode") && atom->hasProp("_CIPRank") ) { // this is a special case where the atom has stereochem indicated // but isn't a chiral center. This can happen in ring stereochem // situations. Instead of using the bond indices to collect // perturbation order (as is normal), we use the priorities of the // atoms at the end of the bonds INT_LIST ref; ROMol::OEDGE_ITER beg,end; boost::tie(beg,end) = atom->getOwningMol().getAtomBonds(atom); ROMol::GRAPH_MOL_BOND_PMAP::type pMap = atom->getOwningMol().getBondPMap(); while(beg!=end){ const Atom *endAtom=pMap[*beg]->getOtherAtom(atom); int cipRank=0; if(endAtom->hasProp("_CIPRank")){ endAtom->getProp("_CIPRank",cipRank); } ref.push_back(cipRank); ++beg; } BOOST_FOREACH(int &oIdx,trueOrder){ const Atom *endAtom=atom->getOwningMol().getBondWithIdx(oIdx)->getOtherAtom(atom); int cipRank=0; if(endAtom->hasProp("_CIPRank")){ endAtom->getProp("_CIPRank",cipRank); } oIdx=cipRank; } #if 0 BOOST_LOG(rdErrorLog)<<" ****"<(std::cerr," ")); std::cerr<(std::cerr," ")); std::cerr<(countSwapsToInterconvert(ref,trueOrder)); } else { nSwaps = atom->getPerturbationOrder(trueOrder); } #else if( !atom->hasProp("_CIPCode") && atom->hasProp("_CIPRank") && !atom->getOwningMol().hasProp("_ringSteroWarning") ){ BOOST_LOG(rdWarningLog)<<"Warning: ring stereochemistry detected. The output SMILES is not canonical."<getOwningMol().setProp("_ringStereoWarning",true,true); } nSwaps = atom->getPerturbationOrder(trueOrder); #endif if(atom->getDegree()==3 && !bondIn){ // This is a special case. Here's an example: // Our internal representation of a chiral center is equivalent to: // [C@](F)(O)(C)[H] // we'll be dumping it without the H, which entails a reordering: // [C@@H](F)(O)C ++nSwaps; } //BOOST_LOG(rdErrorLog)<<">>>> "<getIdx()<<" "<getChiralTag()<getChiralTag()){ case Atom::CHI_TETRAHEDRAL_CW: if(!(nSwaps%2)) atStr = "@@"; else atStr = "@"; chiralityIncluded=true; break; case Atom::CHI_TETRAHEDRAL_CCW: if(!(nSwaps%2)) atStr = "@"; else atStr = "@@"; chiralityIncluded=true; break; default: break; } res << atStr; } if(needsBracket){ unsigned int totNumHs=atom->getTotalNumHs(); if(totNumHs > 0){ res << "H"; if(totNumHs > 1) res << totNumHs; } if(fc > 0){ res << "+"; if(fc > 1) res << fc; } else if(fc < 0) { res << "-"; if(fc < -1) res << -fc; } if(atom->hasProp("molAtomMapNumber")){ int mapNum; atom->getProp("molAtomMapNumber",mapNum); res<<":"<hasProp("_supplementalSmilesLabel")){ std::string label; atom->getProp("_supplementalSmilesLabel",label); res << label; } return res.str(); } std::string GetBondSmiles(const Bond *bond,int atomToLeftIdx,bool doKekule,bool allBondsExplicit){ PRECONDITION(bond,"bad bond"); if(atomToLeftIdx<0) atomToLeftIdx=bond->getBeginAtomIdx(); std::stringstream res; bool aromatic=false; if( !doKekule && (bond->getBondType() == Bond::SINGLE || bond->getBondType() == Bond::DOUBLE || bond->getBondType() == Bond::AROMATIC) ){ Atom *a1,*a2; a1 = bond->getOwningMol().getAtomWithIdx(atomToLeftIdx); a2 = bond->getOwningMol().getAtomWithIdx(bond->getOtherAtomIdx(atomToLeftIdx)); if((a1->getIsAromatic() && a2->getIsAromatic()) && (a1->getAtomicNum()||a2->getAtomicNum())) aromatic=true; } Bond::BondDir dir= bond->getBondDir(); if(bond->hasProp("_TraversalRingClosureBond")){ if(dir==Bond::ENDDOWNRIGHT) dir=Bond::ENDUPRIGHT; else if(dir==Bond::ENDUPRIGHT) dir=Bond::ENDDOWNRIGHT; bond->clearProp("_TraversalRingClosureBond"); } switch(bond->getBondType()){ case Bond::SINGLE: if( dir != Bond::NONE && dir != Bond::UNKNOWN ){ switch(dir){ case Bond::ENDDOWNRIGHT: if(bond->getOwningMol().hasProp("_doIsoSmiles")) res << "\\"; break; case Bond::ENDUPRIGHT: if(bond->getOwningMol().hasProp("_doIsoSmiles")) res << "/"; break; default: break; } } else { // if the bond is marked as aromatic and the two atoms // are aromatic, we need no marker (this arises in kekulized // molecules). // FIX: we should be able to dump kekulized smiles // currently this is possible by removing all // isAromatic flags, but there should maybe be another way if(allBondsExplicit) res<<"-"; else if( aromatic && !bond->getIsAromatic() ) res << "-"; } break; case Bond::DOUBLE: // see note above if( !aromatic || !bond->getIsAromatic() ) res << "="; break; case Bond::TRIPLE: res << "#"; break; case Bond::AROMATIC: if ( dir != Bond::NONE && dir != Bond::UNKNOWN ){ switch(dir){ case Bond::ENDDOWNRIGHT: if(bond->getOwningMol().hasProp("_doIsoSmiles")) res << "\\"; break; case Bond::ENDUPRIGHT: if(bond->getOwningMol().hasProp("_doIsoSmiles")) res << "/"; break; default: break; } } if(allBondsExplicit) res << ":"; else if(!aromatic) res << ":"; break; case Bond::DATIVE: if(atomToLeftIdx>=0 && bond->getBeginAtomIdx()==static_cast(atomToLeftIdx) ) res << ">"; else res << "<"; break; default: res << "?"; } return res.str(); } std::string FragmentSmilesConstruct(ROMol &mol,int atomIdx, std::vector &colors, INT_VECT &ranks,bool doKekule,bool canonical, bool allBondsExplicit){ Canon::MolStack molStack; // try to prevent excessive reallocation molStack.reserve(mol.getNumAtoms()+ mol.getNumBonds()); std::stringstream res; std::map ringClosureMap; int ringIdx,closureVal; if(!canonical) mol.setProp("_StereochemDone",1); Canon::canonicalizeFragment(mol,atomIdx,colors,ranks, molStack); Bond *bond=0; BOOST_FOREACH(Canon::MolStackElem mSE,molStack){ switch(mSE.type){ case Canon::MOL_STACK_ATOM: //std::cout<<"\t\tAtom: "<getIdx()<getIdx()< // we're closing a ring, so grab // the index and then delete the value: closureVal = ringClosureMap[ringIdx]; ringClosureMap.erase(ringIdx); } else { // we're opening a new ring, find the index for it: closureVal = 1; bool done=false; // EFF: there's got to be a more efficient way to do this while(!done){ std::map::iterator mapIt; for(mapIt=ringClosureMap.begin(); mapIt!=ringClosureMap.end(); mapIt++){ if(mapIt->second==closureVal) break; } if(mapIt==ringClosureMap.end()){ done=true; } else { closureVal+=1; } } ringClosureMap[ringIdx]=closureVal; } if(closureVal >= 10){ res << "%"; } //std::cout << " > " << closureVal <(rootedAtAtom)updatePropertyCache(false); } unsigned int nAtoms=tmol.getNumAtoms(); INT_VECT ranks(nAtoms,-1); // clean up the chirality on any atom that is marked as chiral, // but that should not be: if(doIsomericSmiles){ if(!mol.hasProp("_StereochemDone")){ MolOps::assignStereochemistry(tmol,true); } else { tmol.setProp("_StereochemDone",1); // we need the CIP codes: for(unsigned int aidx=0;aidxhasProp("_CIPCode")){ std::string cipCode; oAt->getProp("_CIPCode",cipCode); tmol.getAtomWithIdx(aidx)->setProp("_CIPCode",cipCode); } } } } if(canonical){ MolOps::rankAtoms(tmol,ranks); } else { for(unsigned int i=0;i colors(nAtoms,Canon::WHITE_NODE); std::vector::iterator colorIt; colorIt = colors.begin(); // loop to deal with the possibility that there might be disconnected fragments while(colorIt != colors.end()){ int nextAtomIdx=-1; std::string subSmi; // find the next atom for a traverse if(rootedAtAtom>=0){ nextAtomIdx=rootedAtAtom; rootedAtAtom=-1; } else { int nextRank = nAtoms+1; for(unsigned int i=0;i=0,"no start atom found"); subSmi = SmilesWrite::FragmentSmilesConstruct(tmol, nextAtomIdx, colors, ranks,doKekule,canonical,allBondsExplicit); res += subSmi; colorIt = std::find(colors.begin(),colors.end(),Canon::WHITE_NODE); if(colorIt != colors.end()){ res += "."; } } return res; } // end of MolToSmiles() }