// $Id$ // // Copyright (C) 2003-2011 Greg Landrum and Rational Discovery LLC // // @@ All Rights Reserved @@ // This file is part of the RDKit. // The contents are covered by the terms of the BSD license // which is included in the file license.txt, found at the root // of the RDKit source tree. // #include "FileParsers.h" #include "MolFileStereochem.h" #include #include #include #include #include #include #include #include #include #include namespace RDKit{ //************************************* // // Every effort has been made to adhere to MDL's standard // for mol files // //************************************* const std::string GetMolFileChargeInfo(const RWMol &mol){ std::stringstream res; std::stringstream chgss; std::stringstream radss; unsigned int nChgs=0; unsigned int nRads=0; for(ROMol::ConstAtomIterator atomIt=mol.beginAtoms(); atomIt!=mol.endAtoms();++atomIt){ const Atom *atom=*atomIt; if(atom->getFormalCharge()!=0){ ++nChgs; chgss << boost::format(" %3d %3d") % (atom->getIdx()+1) % atom->getFormalCharge(); if(nChgs==8){ res << boost::format("M CHG%3d")%nChgs << chgss.str()<getNumRadicalElectrons(); if(nRadEs!=0){ ++nRads; if(nRadEs%2){ nRadEs=2; } else { nRadEs=3; // we use triplets, not singlets: } radss << boost::format(" %3d %3d") % (atom->getIdx()+1) % nRadEs; if(nRads==8){ res << boost::format("M RAD%3d")%nRads << radss.str()<hasQuery()){ res=true; // counter examples: // 1) atomic number // 2) the smarts parser inserts AtomAnd queries // for "C" or "c": // std::string descr=atom->getQuery()->getDescription(); if(descr=="AtomAtomicNum"){ res=false; } else if(descr=="AtomAnd"){ if((*atom->getQuery()->beginChildren())->getDescription()=="AtomAtomicNum"){ res=false; } } } return res; } const std::string GetMolFileQueryInfo(const RWMol &mol){ std::stringstream ss; for(ROMol::ConstAtomIterator atomIt=mol.beginAtoms(); atomIt!=mol.endAtoms();++atomIt){ if(hasComplexQuery(*atomIt)){ std::string sma=SmartsWrite::GetAtomSmarts(static_cast(*atomIt)); ss<< "V "<getIdx()+1<<" "<hasProp("_MolFileRLabel")){ unsigned int lbl; (*atomIt)->getProp("_MolFileRLabel",lbl); ss<<" "<getIdx()+1<<" "<hasProp("_MolFileRLabel")){ res="R#"; } else if(hasComplexQuery(atom)){ res="*"; } else if(atom->getAtomicNum()){ res=atom->getSymbol(); } else { if(!atom->hasProp("dummyLabel")){ res = "R"; } else { std::string symb; atom->getProp("dummyLabel",symb); if(symb=="*") res="R"; else if(symb=="X") res="R"; else if(symb=="Xa") res="R1"; else if(symb=="Xb") res="R2"; else if(symb=="Xc") res="R3"; else if(symb=="Xd") res="R4"; else if(symb=="Xf") res="R5"; else if(symb=="Xg") res="R6"; else if(symb=="Xh") res="R7"; else if(symb=="Xi") res="R8"; else if(symb=="Xj") res="R9"; else res=symb; } } // pad the end with spaces while(res.size()<3) res += " "; return res; } namespace { bool compPair(const std::pair &v1, const std::pair &v2) { return (v1.first < v2.first); } unsigned int getAtomParityFlag(const Atom *atom, const Conformer *conf){ PRECONDITION(atom,"bad atom"); PRECONDITION(conf,"bad conformer"); if(!conf->is3D() || !(atom->getDegree()>=3 && atom->getTotalDegree()==4)) return 0; const ROMol &mol=atom->getOwningMol(); RDGeom::Point3D pos=conf->getAtomPos(atom->getIdx()); std::vector< std::pair > vs; ROMol::ADJ_ITER nbrIdx,endNbrs; boost::tie(nbrIdx,endNbrs) = mol.getAtomNeighbors(atom); while(nbrIdx!=endNbrs){ const Atom *at=mol.getAtomWithIdx(*nbrIdx); unsigned int idx=at->getIdx(); RDGeom::Point3D v = conf->getAtomPos(idx); v -= pos; if(at->getAtomicNum()==1){ idx += mol.getNumAtoms(); } vs.push_back(std::make_pair(idx,v)); ++nbrIdx; } std::sort(vs.begin(),vs.end(),compPair); double vol; if(vs.size()==4) { vol = vs[0].second.crossProduct(vs[1].second).dotProduct(vs[3].second); } else { vol = -vs[0].second.crossProduct(vs[1].second).dotProduct(vs[2].second); } if(vol<0){ return 2; } else if(vol>0) { return 1; } return 0; } } const std::string GetMolFileAtomLine(const Atom *atom, const Conformer *conf=0){ PRECONDITION(atom,""); std::string res; int massDiff,chg,stereoCare,hCount,totValence,rxnComponentType; int rxnComponentNumber,atomMapNumber,inversionFlag,exactChangeFlag; massDiff=0; chg=0; stereoCare=0; hCount=0; totValence=0; rxnComponentType=0; rxnComponentNumber=0; atomMapNumber=0; inversionFlag=0; exactChangeFlag=0; if(atom->hasProp("molAtomMapNumber")){ atom->getProp("molAtomMapNumber",atomMapNumber); } if(!atom->hasQuery()){ double atomMassDiff=atom->getMass()-PeriodicTable::getTable()->getAtomicWeight(atom->getAtomicNum()); massDiff = static_cast(atomMassDiff+.1); } unsigned int parityFlag=0; double x, y, z; x = y = z = 0.0; if (conf) { const RDGeom::Point3D pos = conf->getAtomPos(atom->getIdx()); x = pos.x; y = pos.y; z = pos.z; if(conf->is3D() && atom->getChiralTag()!=Atom::CHI_UNSPECIFIED && atom->getChiralTag()!=Atom::CHI_OTHER && atom->getDegree()>=3 && atom->getTotalDegree()==4 ){ parityFlag=getAtomParityFlag(atom,conf); } } std::string symbol = AtomGetMolFileSymbol(atom); std::stringstream ss; ss << boost::format("%10.4f%10.4f%10.4f %3s%2d%3d%3d%3d%3d%3d 0%3d%3d%3d%3d%3d") % x % y % z % symbol.c_str() % massDiff%chg%parityFlag%hCount%stereoCare%totValence%rxnComponentType% rxnComponentNumber%atomMapNumber%inversionFlag%exactChangeFlag; res += ss.str(); return res; }; const std::string BondGetMolFileSymbol(const Bond *bond){ PRECONDITION(bond,""); // FIX: should eventually recognize queries std::string res; switch(bond->getBondType()){ case Bond::SINGLE: if(bond->getIsAromatic()){ res=" 4"; } else { res=" 1"; } break; case Bond::DOUBLE: if(bond->getIsAromatic()){ res=" 4"; } else { res=" 2"; } break; case Bond::TRIPLE: res=" 3";break; case Bond::AROMATIC: res=" 4";break; default: res=" 0";break; } return res; //return res.c_str(); } // only valid for single bonds int BondGetDirCode(const Bond::BondDir dir){ int res=0; switch(dir){ case Bond::NONE: res=0;break; case Bond::BEGINWEDGE: res=1;break; case Bond::BEGINDASH: res=6;break; case Bond::UNKNOWN: res=4;break; default: break; } return res; } const std::string GetMolFileBondLine(const Bond *bond, const INT_MAP_INT &wedgeBonds, const Conformer *conf){ PRECONDITION(bond,""); std::string symbol = BondGetMolFileSymbol(bond); int dirCode=0; Bond::BondDir dir=Bond::NONE; bool reverse = false; if(bond->getBondType()==Bond::SINGLE){ // single bond stereo chemistry dir = DetermineBondWedgeState(bond, wedgeBonds, conf); dirCode = BondGetDirCode(dir); // if this bond needs to be wedged it is possible that this // wedging was determined by a chiral atom at the end of the // bond (instead of at the beginning). In this case we need to // reverse the begin and end atoms for the bond when we write // the mol file if ((dirCode == 1) || (dirCode == 6)) { INT_MAP_INT_CI wbi = wedgeBonds.find(bond->getIdx()); if (static_cast(wbi->second) != bond->getBeginAtomIdx()) { reverse = true; } } } else if (bond->getBondType()==Bond::DOUBLE) { // double bond stereochemistry - // if the bond isn't specified, then it should go in the mol block // as "any", this was sf.net issue 2963522. // two caveats to this: // 1) if it's a ring bond, we'll only put the "any" // in the mol block if the user specifically asked for it. // Constantly seeing crossed bonds in rings, though maybe // technically correct, is irritating. // 2) if it's a terminal bond (where there's no chance of // stereochemistry anyway), we also skip the any. // this was sf.net issue 3009756 if (bond->getStereo() <= Bond::STEREOANY){ if(bond->getStereo()==Bond::STEREOANY){ dirCode = 3; } else if(!(bond->getOwningMol().getRingInfo()->numBondRings(bond->getIdx())) && bond->getBeginAtom()->getDegree()>1 && bond->getEndAtom()->getDegree()>1){ dirCode = 3; } } } std::stringstream ss; if (reverse) { // switch the begin and end atoms on the bond line ss << std::setw(3) << bond->getEndAtomIdx()+1; ss << std::setw(3) << bond->getBeginAtomIdx()+1; } else { ss << std::setw(3) << bond->getBeginAtomIdx()+1; ss << std::setw(3) << bond->getEndAtomIdx()+1; } ss << symbol; ss << " " << std::setw(2) << dirCode; return ss.str(); } //------------------------------------------------ // // gets a mol block as a string // //------------------------------------------------ std::string MolToMolBlock(const ROMol &mol,bool includeStereo, int confId, bool kekulize){ ROMol tromol(mol); RWMol &trwmol = static_cast(tromol); // NOTE: kekulize the molecule before writing it out // because of the way mol files handle aromaticity if(kekulize) MolOps::Kekulize(trwmol); #if 0 if(includeStereo){ // assign "any" status to any stereo bonds that are not // marked with "E" or "Z" code - these bonds need to be explictly written // out to the mol file MolOps::findPotentialStereoBonds(trwmol); // now assign stereo code if any have been specified by the directions on // single bonds MolOps::assignStereochemistry(trwmol); } #endif const RWMol &tmol = const_cast(trwmol); std::string res; int nAtoms,nBonds,nLists,chiralFlag,nsText,nRxnComponents; int nReactants,nProducts,nIntermediates; nAtoms = tmol.getNumAtoms(); nBonds = tmol.getNumBonds(); nLists = 0; chiralFlag = 0; nsText=0; nRxnComponents=0; nReactants=0; nProducts=0; nIntermediates=0; const Conformer *conf; if(confId<0 && tmol.getNumConformers()==0){ conf=0; } else { conf = &(tmol.getConformer(confId)); } if(tmol.hasProp("_Name")){ std::string name; tmol.getProp("_Name",name); res += name; } res += "\n"; // info if(tmol.hasProp("MolFileInfo")){ std::string info; tmol.getProp("MolFileInfo",info); res += info; } else { std::stringstream ss; ss<<" "<is3D()){ ss<<"3D"; } else { ss<<"2D"; } } res += ss.str(); } res += "\n"; // comments if(tmol.hasProp("MolFileComments")){ std::string info; tmol.getProp("MolFileComments",info); res += info; } res += "\n"; std::stringstream ss; ss<bad() ) { std::ostringstream errout; errout << "Bad output file " << fName; throw BadFileException(errout.str()); } std::string outString = MolToMolBlock(mol,includeStereo,confId,kekulize); *outStream << outString; delete outStream; } }