// $Id$ // // Copyright (C) 2002-2010 Greg Landrum and Rational Discovery LLC // // @@ All Rights Reserved @@ // This file is part of the RDKit. // The contents are covered by the terms of the BSD license // which is included in the file license.txt, found at the root // of the RDKit source tree. // #include "FileParsers.h" #include "FileParserUtils.h" #include "MolFileStereochem.h" #include #include #include #include #include #include #include #include #include #include #include namespace RDKit{ namespace FileParserUtils { int toInt(const std::string &input,bool acceptSpaces){ // atoi returns zero on failure: int res=atoi(input.c_str()); if(!res && !acceptSpaces && input[0]==' '){ std::string trimmed=boost::trim_copy(input); if(trimmed.length()==0) throw boost::bad_lexical_cast(); } return res; } double toDouble(const std::string &input,bool acceptSpaces){ // atof returns zero on failure: double res=atof(input.c_str()); if(res==0.0 && !acceptSpaces && input[0]==' '){ std::string trimmed=boost::trim_copy(input); if(trimmed.length()==0) throw boost::bad_lexical_cast(); } return res; } std::string getV3000Line(std::istream *inStream,unsigned int &line){ PRECONDITION(inStream,"bad stream"); std::string res,tempStr; ++line; tempStr = getLine(inStream); if(tempStr.size()<7 || tempStr.substr(0,7) != "M V30 "){ std::ostringstream errout; errout << "Line "<hasQuery()) return atom; QueryAtom qa(*atom); unsigned int idx=atom->getIdx(); if(atom->getFormalCharge()!=0){ qa.expandQuery(makeAtomFormalChargeQuery(atom->getFormalCharge())); } if(atom->hasProp("_hasMassQuery")){ qa.expandQuery(makeAtomMassQuery(static_cast(atom->getMass()))); } mol->replaceAtom(idx,&qa); return mol->getAtomWithIdx(idx); } } using RDKit::FileParserUtils::getV3000Line; namespace { void completeQueryAndChildren(ATOM_EQUALS_QUERY *query,Atom *tgt,int magicVal){ PRECONDITION(query,"no query"); PRECONDITION(tgt,"no atom"); if(query->getVal()==magicVal){ int tgtVal=query->getDataFunc()(tgt); query->setVal(tgtVal); } QueryAtom::QUERYATOM_QUERY::CHILD_VECT_CI childIt; for(childIt=query->beginChildren();childIt!=query->endChildren();++childIt){ completeQueryAndChildren((ATOM_EQUALS_QUERY *)(childIt->get()),tgt,magicVal); } } void CompleteMolQueries(RWMol *mol,int magicVal=-0xDEADBEEF){ for (ROMol::AtomIterator ai=mol->beginAtoms(); ai != mol->endAtoms(); ++ai){ if((*ai)->hasQuery()){ ATOM_EQUALS_QUERY *query=static_cast((*ai)->getQuery()); completeQueryAndChildren(query,*ai,magicVal); } } } //************************************* // // Every effort has been made to adhere to MDL's standard // for mol files // //************************************* void ParseOldAtomList(RWMol *mol,const std::string &text){ PRECONDITION(mol,"bad mol"); unsigned int idx; try { idx = FileParserUtils::stripSpacesAndCast(text.substr(0,3))-1; } catch (boost::bad_lexical_cast &) { std::ostringstream errout; errout << "Cannot convert " << text.substr(0,3) << " to int"; throw FileParseException(errout.str()) ; } RANGE_CHECK(0,idx,mol->getNumAtoms()-1); QueryAtom a(*(mol->getAtomWithIdx(idx))); ATOM_OR_QUERY *q = new ATOM_OR_QUERY; q->setDescription("AtomOr"); switch(text[4]){ case 'T': q->setNegation(true); break; case 'F': q->setNegation(false); break; default: std::ostringstream errout; errout << "Unrecognized atom-list query modifier: " << text[14]; throw FileParseException(errout.str()) ; } int nQueries; try { nQueries = FileParserUtils::toInt(text.substr(9,1)); } catch (boost::bad_lexical_cast &) { std::ostringstream errout; errout << "Cannot convert " << text.substr(9,1) << " to int"; throw FileParseException(errout.str()) ; } RANGE_CHECK(0,nQueries,5); for(int i=0;iaddChild(QueryAtom::QUERYATOM_QUERY::CHILD_TYPE(makeAtomNumEqualsQuery(atNum))); if(!i) a.setAtomicNum(atNum); } a.setQuery(q); mol->replaceAtom(idx,&a); }; void ParseChargeLine(RWMol *mol, const std::string &text,bool firstCall) { PRECONDITION(mol,"bad mol"); PRECONDITION(text.substr(0,6)==std::string("M CHG"),"bad charge line"); // if this line is specified all the atom other than those specified // here should carry a charge of 0; but we should only do this once: if(firstCall){ for (ROMol::AtomIterator ai = mol->beginAtoms(); ai != mol->endAtoms(); ++ai) { (*ai)->setFormalCharge(0); } } int ie, nent; try { nent = FileParserUtils::toInt(text.substr(6,3)); } catch (boost::bad_lexical_cast &) { std::ostringstream errout; errout << "Cannot convert " << text.substr(6,3) << " to int"; throw FileParseException(errout.str()) ; } int spos = 9; for (ie = 0; ie < nent; ie++) { int aid, chg; try { aid = FileParserUtils::toInt(text.substr(spos,4)); spos += 4; chg = FileParserUtils::toInt(text.substr(spos,4)); spos += 4; mol->getAtomWithIdx(aid-1)->setFormalCharge(chg); } catch (boost::bad_lexical_cast &) { std::ostringstream errout; errout << "Cannot convert " << text.substr(spos,4) << " to int"; throw FileParseException(errout.str()) ; } } } void ParseRadicalLine(RWMol *mol, const std::string &text,bool firstCall) { PRECONDITION(mol,"bad mol"); PRECONDITION(text.substr(0,6)==std::string("M RAD"),"bad charge line"); // if this line is specified all the atom other than those specified // here should carry a charge of 0; but we should only do this once: if(firstCall){ for (ROMol::AtomIterator ai = mol->beginAtoms(); ai != mol->endAtoms(); ++ai) { (*ai)->setFormalCharge(0); } } int ie, nent; try { nent = FileParserUtils::toInt(text.substr(6,3)); } catch (boost::bad_lexical_cast &) { std::ostringstream errout; errout << "Cannot convert " << text.substr(6,3) << " to int"; throw FileParseException(errout.str()) ; } int spos = 9; for (ie = 0; ie < nent; ie++) { int aid, rad; std::ostringstream errout; try { aid = FileParserUtils::toInt(text.substr(spos,4)); spos += 4; rad = FileParserUtils::toInt(text.substr(spos,4)); spos += 4; switch(rad) { case 1: mol->getAtomWithIdx(aid-1)->setNumRadicalElectrons(2); break; case 2: mol->getAtomWithIdx(aid-1)->setNumRadicalElectrons(1); break; case 3: mol->getAtomWithIdx(aid-1)->setNumRadicalElectrons(2); break; default: errout << "Unrecognized radical value " << rad << " for atom "<< aid-1 << std::endl; throw FileParseException(errout.str()) ; } } catch (boost::bad_lexical_cast &) { std::ostringstream errout; errout << "Cannot convert " << text.substr(spos,4) << " to int"; throw FileParseException(errout.str()) ; } } } void ParseIsotopeLine(RWMol *mol, const std::string &text){ PRECONDITION(mol,"bad mol"); PRECONDITION(text.substr(0,6)==std::string("M ISO"),"bad isotope line"); unsigned int nent; try { nent = FileParserUtils::stripSpacesAndCast(text.substr(6,3)); } catch (boost::bad_lexical_cast &) { std::ostringstream errout; errout << "Cannot convert " << text.substr(6,3) << " to int"; throw FileParseException(errout.str()) ; } unsigned int spos = 9; for (unsigned int ie = 0; ie < nent; ie++) { unsigned int aid; int mass; try { aid = FileParserUtils::stripSpacesAndCast(text.substr(spos,4)); spos += 4; Atom *atom=mol->getAtomWithIdx(aid-1); if(text.size()>=spos+4 && text.substr(spos,4)!=" "){ mass = FileParserUtils::toInt(text.substr(spos,4)); atom->setMass(static_cast(mass)); spos += 4; } else { atom->setMass(PeriodicTable::getTable()->getAtomicWeight(atom->getAtomicNum())); } } catch (boost::bad_lexical_cast &) { std::ostringstream errout; errout << "Cannot convert " << text.substr(spos,4) << " to int"; throw FileParseException(errout.str()) ; } } } void ParseSubstitutionCountLine(RWMol *mol, const std::string &text){ PRECONDITION(mol,"bad mol"); PRECONDITION(text.substr(0,6)==std::string("M SUB"),"bad SUB line"); unsigned int nent; try { nent = FileParserUtils::stripSpacesAndCast(text.substr(6,3)); } catch (boost::bad_lexical_cast &) { std::ostringstream errout; errout << "Cannot convert " << text.substr(6,3) << " to int"; throw FileParseException(errout.str()) ; } unsigned int spos = 9; for (unsigned int ie = 0; ie < nent; ie++) { unsigned int aid; int count; try { aid = FileParserUtils::stripSpacesAndCast(text.substr(spos,4)); spos += 4; Atom *atom=mol->getAtomWithIdx(aid-1); if(text.size()>=spos+4 && text.substr(spos,4)!=" "){ count = FileParserUtils::toInt(text.substr(spos,4)); if(count==0) continue; ATOM_EQUALS_QUERY *q=makeAtomExplicitDegreeQuery(0); switch(count){ case -1: q->setVal(0);break; case -2: q->setVal(atom->getDegree());break; case 1: case 2: case 3: case 4: case 5: q->setVal(count);break; case 6: BOOST_LOG(rdWarningLog) << " atom degree query with value 6 found. This will not match degree >6. The MDL spec says it should."; q->setVal(6);break; default: std::ostringstream errout; errout << "Value " << count << " is not supported as a degree query."; throw FileParseException(errout.str()) ; } if(!atom->hasQuery()){ atom=FileParserUtils::replaceAtomWithQueryAtom(mol,atom); } atom->expandQuery(q,Queries::COMPOSITE_AND); spos += 4; } } catch (boost::bad_lexical_cast &) { std::ostringstream errout; errout << "Cannot convert " << text.substr(spos,4) << " to int"; throw FileParseException(errout.str()) ; } } } void ParseUnsaturationLine(RWMol *mol, const std::string &text){ PRECONDITION(mol,"bad mol"); PRECONDITION(text.substr(0,6)==std::string("M UNS"),"bad UNS line"); unsigned int nent; try { nent = FileParserUtils::stripSpacesAndCast(text.substr(6,3)); } catch (boost::bad_lexical_cast &) { std::ostringstream errout; errout << "Cannot convert " << text.substr(6,3) << " to int"; throw FileParseException(errout.str()) ; } unsigned int spos = 9; for (unsigned int ie = 0; ie < nent; ie++) { unsigned int aid; int count; try { aid = FileParserUtils::stripSpacesAndCast(text.substr(spos,4)); spos += 4; Atom *atom=mol->getAtomWithIdx(aid-1); if(text.size()>=spos+4 && text.substr(spos,4)!=" "){ count = FileParserUtils::toInt(text.substr(spos,4)); if(count==0){ continue; } else if(count==1){ ATOM_EQUALS_QUERY *q=makeAtomUnsaturatedQuery(); if(!atom->hasQuery()){ atom=FileParserUtils::replaceAtomWithQueryAtom(mol,atom); } atom->expandQuery(q,Queries::COMPOSITE_AND); } else { std::ostringstream errout; errout << "Value " << count << " is not supported as an unsaturation query (only 0 and 1 are allowed)."; throw FileParseException(errout.str()) ; } } }catch (boost::bad_lexical_cast &) { std::ostringstream errout; errout << "Cannot convert " << text.substr(spos,4) << " to int"; throw FileParseException(errout.str()) ; } } } void ParseRingBondCountLine(RWMol *mol, const std::string &text){ PRECONDITION(mol,"bad mol"); PRECONDITION(text.substr(0,6)==std::string("M RBC"),"bad RBC line"); unsigned int nent; try { nent = FileParserUtils::stripSpacesAndCast(text.substr(6,3)); } catch (boost::bad_lexical_cast &) { std::ostringstream errout; errout << "Cannot convert " << text.substr(6,3) << " to int"; throw FileParseException(errout.str()) ; } unsigned int spos = 9; for (unsigned int ie = 0; ie < nent; ie++) { unsigned int aid; int count; try { aid = FileParserUtils::stripSpacesAndCast(text.substr(spos,4)); spos += 4; Atom *atom=mol->getAtomWithIdx(aid-1); if(text.size()>=spos+4 && text.substr(spos,4)!=" "){ count = FileParserUtils::toInt(text.substr(spos,4)); if(count==0) continue; ATOM_EQUALS_QUERY *q=makeAtomRingBondCountQuery(0); switch(count){ case -1: q->setVal(0);break; case -2: q->setVal(-0xDEADBEEF); mol->setProp("_NeedsQueryScan",1); break; case 1: case 2: case 3: q->setVal(count);break; case 4: delete q; q = static_cast(new ATOM_LESSEQUAL_QUERY); q->setVal(4); q->setDescription("AtomRingBondCount"); q->setDataFunc(queryAtomRingBondCount); break; default: std::ostringstream errout; errout << "Value " << count << " is not supported as a ring-bond count query."; throw FileParseException(errout.str()) ; } if(!atom->hasQuery()){ atom=FileParserUtils::replaceAtomWithQueryAtom(mol,atom); } atom->expandQuery(q,Queries::COMPOSITE_AND); spos += 4; } } catch (boost::bad_lexical_cast &) { std::ostringstream errout; errout << "Cannot convert " << text.substr(spos,4) << " to int"; throw FileParseException(errout.str()) ; } } } void ParseNewAtomList(RWMol *mol,const std::string &text){ if(text.size()<15){ std::ostringstream errout; errout << "Atom list line too short: '"<(text.substr(7,3))-1; } catch (boost::bad_lexical_cast &) { std::ostringstream errout; errout << "Cannot convert " << text.substr(7,3) << " to int"; throw FileParseException(errout.str()) ; } RANGE_CHECK(0,idx,mol->getNumAtoms()-1); QueryAtom *a=0; int nQueries; try { nQueries = FileParserUtils::toInt(text.substr(10,3)); } catch (boost::bad_lexical_cast &) { std::ostringstream errout; errout << "Cannot convert " << text.substr(10,3) << " to int"; throw FileParseException(errout.str()) ; } ASSERT_INVARIANT(nQueries>0,"no queries provided"); for(unsigned int i=0;i(nQueries);i++){ unsigned int pos = 16+i*4; if(text.size()getAtomicNumber(atSymb); if(!i){ a = new QueryAtom(*(mol->getAtomWithIdx(idx))); a->setAtomicNum(atNum); } else { a->expandQuery(makeAtomNumEqualsQuery(atNum),Queries::COMPOSITE_OR,true); } } ASSERT_INVARIANT(a,"no atom built"); switch(text[14]){ case 'T': a->getQuery()->setNegation(true); break; case 'F': a->getQuery()->setNegation(false); break; default: std::ostringstream errout; errout << "Unrecognized atom-list query modifier: " << text[14]; throw FileParseException(errout.str()) ; } mol->replaceAtom(idx,a); }; void ParseRGroupLabels(RWMol *mol,const std::string &text){ PRECONDITION(mol,"bad mol"); PRECONDITION(text.substr(0,6)==std::string("M RGP"),"bad R group label line"); int nLabels; try { nLabels = FileParserUtils::toInt(text.substr(6,3)); } catch (boost::bad_lexical_cast &) { std::ostringstream errout; errout << "Cannot convert " << text.substr(6,3) << " to int"; throw FileParseException(errout.str()) ; } for(int i=0;i(text.substr(pos,3)); } catch (boost::bad_lexical_cast &) { std::ostringstream errout; errout << "Cannot convert " << text.substr(pos,3) << " to int"; throw FileParseException(errout.str()) ; } unsigned int rLabel; try { rLabel = FileParserUtils::stripSpacesAndCast(text.substr(pos+4,3)); } catch (boost::bad_lexical_cast &) { std::ostringstream errout; errout << "Cannot convert " << text.substr(pos+4,3) << " to int"; throw FileParseException(errout.str()) ; } atIdx-=1; if(atIdx>mol->getNumAtoms()){ std::ostringstream errout; errout << "Attempt to set R group label on nonexistent atom " << atIdx; throw FileParseException(errout.str()) ; } QueryAtom qatom(*(mol->getAtomWithIdx(atIdx))); qatom.setProp("_MolFileRLabel",rLabel); // set the dummy label so that this is shown correctly // in other pieces of the code : // (this was sf.net issue 3316600) std::string dLabel="R"+boost::lexical_cast(rLabel); qatom.setProp("dummyLabel",dLabel); // the CTFile spec (June 2005 version) technically only allows // R labels up to 32. Since there are three digits, we'll accept // anything: so long as it's positive and less than 1000: if(rLabel>0 && rLabel<999){ qatom.setMass(double(rLabel)); } qatom.setQuery(makeAtomNullQuery()); mol->replaceAtom(atIdx,&qatom); } }; void ParseAtomAlias(RWMol *mol,std::string text,const std::string &nextLine){ PRECONDITION(mol,"bad mol"); PRECONDITION(text.substr(0,2)==std::string("A "),"bad atom alias line"); unsigned int idx; try { idx = FileParserUtils::stripSpacesAndCast(text.substr(3,3))-1; } catch (boost::bad_lexical_cast &) { std::ostringstream errout; errout << "Cannot convert " << text.substr(3,3) << " to int"; throw FileParseException(errout.str()) ; } RANGE_CHECK(0,idx,mol->getNumAtoms()-1); Atom *at = mol->getAtomWithIdx(idx); at->setProp("molFileAlias",nextLine); }; void ParseAtomValue(RWMol *mol,std::string text){ PRECONDITION(mol,"bad mol"); PRECONDITION(text.substr(0,2)==std::string("V "),"bad atom value line"); unsigned int idx; try { idx = FileParserUtils::stripSpacesAndCast(text.substr(3,3))-1; } catch (boost::bad_lexical_cast &) { std::ostringstream errout; errout << "Cannot convert " << text.substr(3,3) << " to int"; throw FileParseException(errout.str()) ; } RANGE_CHECK(0,idx,mol->getNumAtoms()-1); Atom *at = mol->getAtomWithIdx(idx); at->setProp("molFileValue",text.substr(7,text.length()-7)); }; Atom *ParseMolFileAtomLine(const std::string text, RDGeom::Point3D &pos) { Atom *res = new Atom; std::string symb; int massDiff,chg,hCount; if(text.size()<34){ std::ostringstream errout; errout << "Atom line too short: '"<=36 && text.substr(34,2)!=" 0"){ try { massDiff = FileParserUtils::toInt(text.substr(34,2),true); } catch (boost::bad_lexical_cast &) { std::ostringstream errout; errout << "Cannot convert " << text.substr(34,2) << " to int"; throw FileParseException(errout.str()) ; } } chg=0; if(text.size()>=39 && text.substr(36,3)!=" 0"){ try { chg = FileParserUtils::toInt(text.substr(36,3),true); } catch (boost::bad_lexical_cast &) { std::ostringstream errout; errout << "Cannot convert " << text.substr(36,3) << " to int"; throw FileParseException(errout.str()) ; } } hCount = 0; if(text.size()>=45 && text.substr(42,3)!=" 0"){ try { hCount = FileParserUtils::toInt(text.substr(42,3),true); } catch (boost::bad_lexical_cast &) { std::ostringstream errout; errout << "Cannot convert " << text.substr(42,3) << " to int"; throw FileParseException(errout.str()) ; } } if(symb=="L" || symb=="A" || symb=="Q" || symb=="*" || symb=="LP" || symb=="R" || symb=="R#" || (symb>="R0" && symb<="R9") ){ if(symb=="A"||symb=="Q"||symb=="*"){ QueryAtom *query=new QueryAtom(0); if(symb=="*"){ // according to the MDL spec, these match anything query->setQuery(makeAtomNullQuery()); } else if(symb=="Q"){ ATOM_OR_QUERY *q = new ATOM_OR_QUERY; q->setDescription("AtomOr"); q->setNegation(true); q->addChild(QueryAtom::QUERYATOM_QUERY::CHILD_TYPE(makeAtomNumEqualsQuery(6))); q->addChild(QueryAtom::QUERYATOM_QUERY::CHILD_TYPE(makeAtomNumEqualsQuery(1))); query->setQuery(q); } else if(symb=="A"){ query->setQuery(makeAtomNumEqualsQuery(1)); query->getQuery()->setNegation(true); } delete res; res=query; // queries have no implicit Hs: res->setNoImplicit(true); } else { res->setAtomicNum(0); } if(massDiff==0&&symb[0]=='R'){ if(symb=="R1") res->setMass(1); else if(symb=="R2") res->setMass(2); else if(symb=="R3") res->setMass(3); else if(symb=="R4") res->setMass(4); else if(symb=="R5") res->setMass(5); else if(symb=="R6") res->setMass(6); else if(symb=="R7") res->setMass(7); else if(symb=="R8") res->setMass(8); else if(symb=="R9") res->setMass(9); } } else if( symb=="D" ){ // mol blocks support "D" and "T" as shorthand... handle that. res->setAtomicNum(1); res->setMass(2.014); } else if( symb=="T" ){ // mol blocks support "D" and "T" as shorthand... handle that. res->setAtomicNum(1); res->setMass(3.016); } else { res->setAtomicNum(PeriodicTable::getTable()->getAtomicNumber(symb)); res->setMass(PeriodicTable::getTable()->getAtomicWeight(res->getAtomicNum())); } //res->setPos(pX,pY,pZ); if(chg!=0) res->setFormalCharge(4-chg); // FIX: this does not appear to be correct if(hCount==1){ res->setNoImplicit(true); } if(massDiff!=0) { res->setMass(PeriodicTable::getTable()->getMostCommonIsotope(res->getAtomicNum())+massDiff); res->setProp("_hasMassQuery",true); } if(text.size()>=42 && text.substr(39,3)!=" 0"){ int parity=0; try { parity = FileParserUtils::toInt(text.substr(39,3),true); } catch (boost::bad_lexical_cast &) { std::ostringstream errout; errout << "Cannot convert " << text.substr(39,3) << " to int"; throw FileParseException(errout.str()) ; } res->setProp("molParity",parity); } if(text.size()>=48 && text.substr(45,3)!=" 0"){ int stereoCare=0; try { stereoCare = FileParserUtils::toInt(text.substr(45,3),true); } catch (boost::bad_lexical_cast &) { std::ostringstream errout; errout << "Cannot convert " << text.substr(45,3) << " to int"; throw FileParseException(errout.str()) ; } res->setProp("molStereoCare",stereoCare); } if(text.size()>=51 && text.substr(48,3)!=" 0"){ int totValence=0; try { totValence= FileParserUtils::toInt(text.substr(48,3),true); } catch (boost::bad_lexical_cast &) { std::ostringstream errout; errout << "Cannot convert " << text.substr(48,3) << " to int"; throw FileParseException(errout.str()) ; } res->setProp("molTotValence",totValence); } if(text.size()>=63 && text.substr(60,3)!=" 0"){ int atomMapNumber=0; try { atomMapNumber = FileParserUtils::toInt(text.substr(60,3),true); } catch (boost::bad_lexical_cast &) { std::ostringstream errout; errout << "Cannot convert " << text.substr(60,3) << " to int"; throw FileParseException(errout.str()) ; } res->setProp("molAtomMapNumber",atomMapNumber); } if(text.size()>=66 && text.substr(63,3)!=" 0"){ int inversionFlag=0; try { inversionFlag= FileParserUtils::toInt(text.substr(63,3),true); } catch (boost::bad_lexical_cast &) { std::ostringstream errout; errout << "Cannot convert " << text.substr(63,3) << " to int"; throw FileParseException(errout.str()) ; } res->setProp("molInversionFlag",inversionFlag); } if(text.size()>=69 && text.substr(66,3)!=" 0"){ int exactChangeFlag=0; try { exactChangeFlag = FileParserUtils::toInt(text.substr(66,3),true); } catch (boost::bad_lexical_cast &) { std::ostringstream errout; errout << "Cannot convert " << text.substr(66,3) << " to int"; throw FileParseException(errout.str()) ; } res->setProp("molExactChangeFlag",exactChangeFlag); } return res; }; Bond *ParseMolFileBondLine(const std::string &text){ int idx1,idx2,bType,stereo; int spos = 0; if(text.size()<9){ std::ostringstream errout; errout << "Bond line too short: '"<setQuery(q); } else if (bType==5 || bType==6 || bType==7 ){ BOND_OR_QUERY *q; q = new BOND_OR_QUERY; if(bType == 5){ // single or double q->addChild(QueryBond::QUERYBOND_QUERY::CHILD_TYPE(makeBondOrderEqualsQuery(Bond::SINGLE))); q->addChild(QueryBond::QUERYBOND_QUERY::CHILD_TYPE(makeBondOrderEqualsQuery(Bond::DOUBLE))); q->setDescription("BondOr"); } else if(bType == 6){ // single or aromatic q->addChild(QueryBond::QUERYBOND_QUERY::CHILD_TYPE(makeBondOrderEqualsQuery(Bond::SINGLE))); q->addChild(QueryBond::QUERYBOND_QUERY::CHILD_TYPE(makeBondOrderEqualsQuery(Bond::AROMATIC))); q->setDescription("BondOr"); } else if(bType == 7){ // double or aromatic q->addChild(QueryBond::QUERYBOND_QUERY::CHILD_TYPE(makeBondOrderEqualsQuery(Bond::DOUBLE))); q->addChild(QueryBond::QUERYBOND_QUERY::CHILD_TYPE(makeBondOrderEqualsQuery(Bond::AROMATIC))); q->setDescription("BondOr"); } res->setQuery(q); } else { BOND_NULL_QUERY *q; q = makeBondNullQuery(); res->setQuery(q); BOOST_LOG(rdWarningLog) << "unrecognized query bond type, " << bType <<", found. Using an \"any\" query."<setBeginAtomIdx(idx1); res->setEndAtomIdx(idx2); res->setBondType(type); if( text.size() >= 12 && text.substr(9,3)!=" 0"){ try { stereo = FileParserUtils::toInt(text.substr(9,3)); //res->setProp("stereo",stereo); switch(stereo){ case 0: res->setBondDir(Bond::NONE); break; case 1: res->setBondDir(Bond::BEGINWEDGE); break; case 6: res->setBondDir(Bond::BEGINDASH); break; case 3: // "either" double bond res->setBondDir(Bond::EITHERDOUBLE); res->setStereo(Bond::STEREOANY); break; case 4: // "either" single bond res->setBondDir(Bond::UNKNOWN); break; } } catch (boost::bad_lexical_cast) { ; } } if( text.size() >= 18 && text.substr(15,3)!=" 0"){ try { int topology = FileParserUtils::toInt(text.substr(15,3)); QueryBond *qBond=new QueryBond(*res); BOND_EQUALS_QUERY *q=makeBondIsInRingQuery(); switch(topology){ case 1: break; case 2: q->setNegation(true); break; default: std::ostringstream errout; errout << "Unrecognized bond topology specifier: " << topology; throw FileParseException(errout.str()) ; } qBond->expandQuery(q); delete res; res = qBond; } catch (boost::bad_lexical_cast) { ; } } if( text.size() >= 21 && text.substr(18,3)!=" 0"){ try { int reactStatus = FileParserUtils::toInt(text.substr(18,3)); res->setProp("molReactStatus",reactStatus); } catch (boost::bad_lexical_cast) { ; } } return res; }; void ParseMolBlockAtoms(std::istream *inStream,unsigned int &line, unsigned int nAtoms,RWMol *mol,Conformer *conf){ PRECONDITION(inStream,"bad stream"); PRECONDITION(mol,"bad molecule"); PRECONDITION(conf,"bad conformer"); for(unsigned int i=0;ieof()){ throw FileParseException("EOF hit while reading atoms"); } RDGeom::Point3D pos; Atom *atom = ParseMolFileAtomLine(tempStr, pos); unsigned int aid = mol->addAtom(atom,false,true); conf->setAtomPos(aid, pos); } } // returns whether or not any sign of chirality was detected void ParseMolBlockBonds(std::istream *inStream,unsigned int &line, unsigned int nBonds,RWMol *mol,bool &chiralityPossible){ PRECONDITION(inStream,"bad stream"); PRECONDITION(mol,"bad molecule"); for(unsigned int i=0;ieof()){ throw FileParseException("EOF hit while reading bonds"); } Bond *bond = ParseMolFileBondLine(tempStr); // if we got an aromatic bond set the flag on the bond and the connected atoms if (bond->getBondType() == Bond::AROMATIC) { bond->setIsAromatic(true); mol->getAtomWithIdx(bond->getBeginAtomIdx())->setIsAromatic(true); mol->getAtomWithIdx(bond->getEndAtomIdx())->setIsAromatic(true); } // if the bond might have chirality info associated with it, set a flag: if(bond->getBondDir() != Bond::NONE && bond->getBondDir() != Bond::UNKNOWN){ chiralityPossible=true; } mol->addBond(bond,true); } } bool ParseMolBlockProperties(std::istream *inStream,unsigned int &line, RWMol *mol){ PRECONDITION(inStream,"bad stream"); PRECONDITION(mol,"bad molecule"); // older mol files can have an atom list block here std::string tempStr = getLine(inStream); ++line; if( tempStr[0] != 'M' && tempStr[0] != 'A' && tempStr[0] != 'V' && tempStr[0] != 'G'){ ParseOldAtomList(mol,tempStr); } bool fileComplete=false; bool firstChargeLine=true; std::string lineBeg=tempStr.substr(0,6); while(!inStream->eof() && lineBeg!="M END" && tempStr.substr(0,4)!="$$$$"){ if(tempStr[0]=='A'){ line++; std::string nextLine = getLine(inStream); if(tempStr.substr(0,6)!="M END"){ ParseAtomAlias(mol,tempStr,nextLine); } } else if(tempStr[0]=='G'){ BOOST_LOG(rdWarningLog)<<" deprecated group abbreviation ignored"<3 && cpy.substr(0,3)=="NOT"){ negate=true; token = token.substr(3,token.size()-3); boost::trim(token); } Atom *res=0; if(token[0]=='['){ // atom list: if(token[token.length()-1]!=']'){ std::ostringstream errout; errout << "Bad atom token '"< splitToken; boost::split(splitToken,token,boost::is_any_of(",")); for(std::vector::const_iterator stIt=splitToken.begin(); stIt!=splitToken.end();++stIt){ std::string atSymb=boost::trim_copy(*stIt); if(atSymb=="") continue; int atNum = PeriodicTable::getTable()->getAtomicNumber(atSymb); if(!res){ res = new QueryAtom(atNum); } else { res->expandQuery(makeAtomNumEqualsQuery(atNum),Queries::COMPOSITE_OR,true); } } res->getQuery()->setNegation(negate); } else { if(negate) { throw FileParseException("NOT tokens only supported for atom lists") ; } // it's a normal CTAB atom symbol: if(token=="R#" || token=="A" || token=="Q" || token=="*"){ if(token=="A"||token=="Q"||token=="*"){ res=new QueryAtom(0); if(token=="*"){ // according to the MDL spec, these match anything res->setQuery(makeAtomNullQuery()); } else if(token=="Q"){ ATOM_OR_QUERY *q = new ATOM_OR_QUERY; q->setDescription("AtomOr"); q->setNegation(true); q->addChild(QueryAtom::QUERYATOM_QUERY::CHILD_TYPE(makeAtomNumEqualsQuery(6))); q->addChild(QueryAtom::QUERYATOM_QUERY::CHILD_TYPE(makeAtomNumEqualsQuery(1))); res->setQuery(q); } else if(token=="A"){ res->setQuery(makeAtomNumEqualsQuery(1)); res->getQuery()->setNegation(true); } // queries have no implicit Hs: res->setNoImplicit(true); } else { res->setAtomicNum(0); } } else if( token=="D" ){ // mol blocks support "D" and "T" as shorthand... handle that. res = new Atom(1); res->setMass(2.014); } else if( token=="T" ){ // mol blocks support "D" and "T" as shorthand... handle that. res = new Atom(1); res->setMass(3.016); } else { res = new Atom(PeriodicTable::getTable()->getAtomicNumber(token)); res->setMass(PeriodicTable::getTable()->getAtomicWeight(res->getAtomicNum())); } } POSTCONDITION(res,"no atom built"); return res; } bool splitAssignToken(const std::string &token,std::string &prop,std::string &val){ std::vector splitToken; boost::split(splitToken,token, boost::is_any_of("=")); if(splitToken.size()!=2){ return false; } prop = splitToken[0]; boost::to_upper(prop); val = splitToken[1]; return true; } template void ParseV3000AtomProps(RWMol *mol,Atom *& atom, typename T::iterator &token,const T &tokens, unsigned int &line){ PRECONDITION(mol,"bad molecule"); PRECONDITION(atom,"bad atom"); std::ostringstream errout; while(token!=tokens.end()){ std::string prop,val; if(!splitAssignToken(*token,prop,val)){ errout << "Invalid atom property: " << *token << " for atom "<< atom->getIdx()+1 << std::endl; throw FileParseException(errout.str()) ; } if(prop=="CHG"){ int charge=FileParserUtils::toInt(val); if(!atom->hasQuery()) { atom->setFormalCharge(charge); } else { atom->expandQuery(makeAtomFormalChargeQuery(charge)); } } else if(prop=="RAD"){ // FIX handle queries here switch( FileParserUtils::toInt(val) ){ case 0: break; case 1: atom->setNumRadicalElectrons(2);break; case 2: atom->setNumRadicalElectrons(1);break; case 3: atom->setNumRadicalElectrons(2);break; default: errout << "Unrecognized RAD value " << val << " for atom "<< atom->getIdx()+1 << std::endl; throw FileParseException(errout.str()) ; } } else if(prop=="MASS"){ double v=FileParserUtils::toDouble(val); if(v<=0){ errout << "Bad value for MASS :" << val << " for atom "<< atom->getIdx()+1 << std::endl; throw FileParseException(errout.str()) ; } else { if(!atom->hasQuery()) { atom->setMass(v); } else { atom->expandQuery(makeAtomMassQuery(static_cast(v))); } } } else if(prop=="CFG"){ int cfg=FileParserUtils::toInt(val); switch(cfg){ case 0: break; case 1: case 2: case 3: atom->setProp("molParity",cfg); break; default: errout << "Unrecognized CFG value : " << val << " for atom "<< atom->getIdx()+1 << std::endl; throw FileParseException(errout.str()) ; } } else if(prop=="HCOUNT"){ if(val!="0"){ int hcount=FileParserUtils::toInt(val); if(!atom->hasQuery()) { atom=FileParserUtils::replaceAtomWithQueryAtom(mol,atom); } if(hcount==-1) hcount=0; atom->expandQuery(makeAtomHCountQuery(hcount)); } } else if(prop=="UNSAT"){ if(val=="1"){ if(!atom->hasQuery()) { atom=FileParserUtils::replaceAtomWithQueryAtom(mol,atom); } atom->expandQuery(makeAtomUnsaturatedQuery()); } } else if(prop=="RBCNT"){ if(val!="0"){ int rbcount=FileParserUtils::toInt(val); if(!atom->hasQuery()) { atom=FileParserUtils::replaceAtomWithQueryAtom(mol,atom); } if(rbcount==-1) rbcount=0; atom->expandQuery(makeAtomRingBondCountQuery(rbcount)); } } ++token; } } void ParseV3000AtomBlock(std::istream *inStream,unsigned int &line, unsigned int nAtoms,RWMol *mol, Conformer *conf){ PRECONDITION(inStream,"bad stream"); PRECONDITION(nAtoms>0,"bad atom count"); PRECONDITION(mol,"bad molecule"); PRECONDITION(conf,"bad conformer"); std::string tempStr; std::vector splitLine; tempStr = getV3000Line(inStream,line); if(tempStr.length()<10 || tempStr.substr(0,10) != "BEGIN ATOM"){ throw FileParseException("BEGIN ATOM line not found") ; } for(unsigned int i=0;i els(""," \t","'\""); boost::tokenizer > tokens(trimmed,els); boost::tokenizer >::iterator token; token=tokens.begin(); if(token==tokens.end()) { std::ostringstream errout; errout << "Bad atom line : '"<c_str()); // start with the symbol: ++token; if(token==tokens.end()) { std::ostringstream errout; errout << "Bad atom line : '"<c_str()); ++token; if(token==tokens.end()) { std::ostringstream errout; errout << "Bad atom line : '"<c_str()); ++token; if(token==tokens.end()) { std::ostringstream errout; errout << "Bad atom line : '"<c_str()); // the map number: ++token; if(token==tokens.end()) { std::ostringstream errout; errout << "Bad atom line : '"<c_str()); if(mapNum>0){ atom->setProp("molAtomMapNumber",mapNum); } ++token; unsigned int aid=mol->addAtom(atom,false,true); // additional properties this may change the atom, // so be careful with it: ParseV3000AtomProps(mol,atom,token,tokens,line); mol->setAtomBookmark(atom,molIdx); conf->setAtomPos(aid,pos); } tempStr = getV3000Line(inStream,line); if(tempStr.length()<8 || tempStr.substr(0,8) != "END ATOM"){ throw FileParseException("END ATOM line not found") ; } if(mol->hasProp("_2DConf")){ conf->set3D(false); mol->clearProp("_2DConf"); } else if(mol->hasProp("_3DConf")){ conf->set3D(true); mol->clearProp("_3DConf"); } } void ParseV3000BondBlock(std::istream *inStream,unsigned int &line, unsigned int nBonds,RWMol *mol, bool &chiralityPossible){ PRECONDITION(inStream,"bad stream"); PRECONDITION(nBonds>0,"bad bond count"); PRECONDITION(mol,"bad molecule"); std::string tempStr; std::vector splitLine; tempStr = getV3000Line(inStream,line); if(tempStr.length()<10 || tempStr.substr(0,10) != "BEGIN BOND"){ throw FileParseException("BEGIN BOND line not found") ; } for(unsigned int i=0;isetIsAromatic(true);break; case 0: bond = new Bond(Bond::UNSPECIFIED); BOOST_LOG(rdWarningLog) << "bond with order 0 found. This is not part of the MDL specification."<setQuery(q); } else if (bType==5 || bType==6 || bType==7 ){ BOND_OR_QUERY *q; q = new BOND_OR_QUERY; if(bType == 5){ // single or double q->addChild(QueryBond::QUERYBOND_QUERY::CHILD_TYPE(makeBondOrderEqualsQuery(Bond::SINGLE))); q->addChild(QueryBond::QUERYBOND_QUERY::CHILD_TYPE(makeBondOrderEqualsQuery(Bond::DOUBLE))); q->setDescription("BondOr"); } else if(bType == 6){ // single or aromatic q->addChild(QueryBond::QUERYBOND_QUERY::CHILD_TYPE(makeBondOrderEqualsQuery(Bond::SINGLE))); q->addChild(QueryBond::QUERYBOND_QUERY::CHILD_TYPE(makeBondOrderEqualsQuery(Bond::AROMATIC))); q->setDescription("BondOr"); } else if(bType == 7){ // double or aromatic q->addChild(QueryBond::QUERYBOND_QUERY::CHILD_TYPE(makeBondOrderEqualsQuery(Bond::DOUBLE))); q->addChild(QueryBond::QUERYBOND_QUERY::CHILD_TYPE(makeBondOrderEqualsQuery(Bond::AROMATIC))); q->setDescription("BondOr"); } bond->setQuery(q); } else { BOND_NULL_QUERY *q; q = makeBondNullQuery(); bond->setQuery(q); BOOST_LOG(rdWarningLog) << "unrecognized query bond type, " << bType <<", found. Using an \"any\" query."<setBondDir(Bond::BEGINWEDGE); chiralityPossible=true; break; case 2: if(bType==1) bond->setBondDir(Bond::UNKNOWN); else if(bType==2){ bond->setBondDir(Bond::EITHERDOUBLE); bond->setStereo(Bond::STEREOANY); } break; case 3: bond->setBondDir(Bond::BEGINDASH); chiralityPossible=true; break; default: errout << "bad bond CFG "<hasQuery()){ QueryBond *qBond=new QueryBond(*bond); delete bond; bond=qBond; } BOND_EQUALS_QUERY *q=makeBondIsInRingQuery(); if(val=="1"){ // nothing } else if(val=="2"){ q->setNegation(true); } else { errout << "bad bond TOPO "<expandQuery(q); } } else if(prop=="RXCTR"){ int reactStatus = FileParserUtils::toInt(val); bond->setProp("molReactStatus",reactStatus); } else if(prop=="STBOX"){ } ++lPos; } bond->setBeginAtomIdx(mol->getAtomWithBookmark(a1Idx)->getIdx()); bond->setEndAtomIdx(mol->getAtomWithBookmark(a2Idx)->getIdx()); mol->addBond(bond,true); if(bond->getIsAromatic()){ mol->getAtomWithIdx(bond->getBeginAtomIdx())->setIsAromatic(true); mol->getAtomWithIdx(bond->getEndAtomIdx())->setIsAromatic(true); } mol->setBondBookmark(bond,bondIdx); } tempStr = getV3000Line(inStream,line); if(tempStr.length()<8 || tempStr.substr(0,8) != "END BOND"){ throw FileParseException("END BOND line not found") ; } } } // end of local namespace namespace FileParserUtils { bool ParseV3000CTAB(std::istream *inStream,unsigned int &line, RWMol *mol, Conformer *&conf, bool &chiralityPossible,unsigned int &nAtoms, unsigned int &nBonds){ PRECONDITION(inStream,"bad stream"); PRECONDITION(mol,"bad molecule"); std::string tempStr; std::vector splitLine; tempStr = getV3000Line(inStream,line); boost::to_upper(tempStr); if(tempStr.length()<10 || tempStr.substr(0,10) != "BEGIN CTAB"){ throw FileParseException("BEGIN CTAB line not found") ; } tempStr = getV3000Line(inStream,line); boost::to_upper(tempStr); if(tempStr.size()<8 || tempStr.substr(0,7)!="COUNTS "){ std::ostringstream errout; errout << "Bad counts line : '"<2) nSgroups = FileParserUtils::toInt(splitLine[2]); if(splitLine.size()>3) n3DConstraints = FileParserUtils::toInt(splitLine[3]); if(splitLine.size()>4) chiralFlag = FileParserUtils::toInt(splitLine[4]); ParseV3000AtomBlock(inStream,line,nAtoms,mol,conf); if(nBonds){ ParseV3000BondBlock(inStream,line,nBonds,mol,chiralityPossible); } if(nSgroups){ BOOST_LOG(rdWarningLog)<<"S group information in mol block igored"<=10 && tempStr.substr(0,10) != "END SGROUP"){ break; } } } if(n3DConstraints){ BOOST_LOG(rdWarningLog)<<"3d constraint information in mol block igored"<8 && tempStr.substr(0,8)=="LINKNODE"){ tempStr = getV3000Line(inStream,line); boost::to_upper(tempStr); } while(tempStr.length()>5 && tempStr.substr(0,5)=="BEGIN"){ // skip blocks we don't know how to read BOOST_LOG(rdWarningLog)<<"skipping block: "<"<"<addConformer(conf, true); conf=0; return true; } bool ParseV2000CTAB(std::istream *inStream,unsigned int &line, RWMol *mol, Conformer *&conf, bool &chiralityPossible,unsigned int &nAtoms, unsigned int &nBonds){ if(nAtoms<=0){ throw FileParseException("molecule has no atoms"); } conf = new Conformer(nAtoms); ParseMolBlockAtoms(inStream,line,nAtoms,mol,conf); if(mol->hasProp("_2DConf")){ conf->set3D(false); mol->clearProp("_2DConf"); } else if(mol->hasProp("_3DConf")){ conf->set3D(true); mol->clearProp("_3DConf"); } mol->addConformer(conf, true); conf=0; ParseMolBlockBonds(inStream,line,nBonds,mol,chiralityPossible); bool fileComplete=ParseMolBlockProperties(inStream,line,mol); return fileComplete; } } // end of FileParserUtils namespace //------------------------------------------------ // // Read a molecule from a stream // //------------------------------------------------ RWMol *MolDataStreamToMol(std::istream *inStream, unsigned int &line, bool sanitize, bool removeHs){ PRECONDITION(inStream,"no stream"); std::string tempStr; bool fileComplete=false; bool chiralityPossible = false; // mol name line++; tempStr = getLine(inStream); if(inStream->eof()){ return NULL; } RWMol *res = new RWMol(); res->setProp("_Name", tempStr); // info line++; tempStr = getLine(inStream); res->setProp("_MolFileInfo", tempStr); if(tempStr.length()>=22){ std::string dimLabel=tempStr.substr(20,2); if(dimLabel=="2d"||dimLabel=="2D"){ res->setProp("_2DConf",1); } else if(dimLabel=="3d"||dimLabel=="3D"){ res->setProp("_3DConf",1); } } // comments line++; tempStr = getLine(inStream); res->setProp("_MolFileComments", tempStr); unsigned int nAtoms=0,nBonds=0,nLists=0,chiralFlag=0,nsText=0,nRxnComponents=0; int nReactants=0,nProducts=0,nIntermediates=0; // counts line, this is where we really get started line++; tempStr = getLine(inStream); if(tempStr.size()<6){ if(res){ delete res; res = NULL; } std::ostringstream errout; errout << "Counts line too short: '"<=9) nLists = FileParserUtils::toInt(tempStr.substr(spos,3)); spos = 12; if(tempStr.size()>=spos+3) chiralFlag = FileParserUtils::toInt(tempStr.substr(spos,3)); spos = 15; if(tempStr.size()>=spos+3) nsText = FileParserUtils::toInt(tempStr.substr(spos,3)); spos = 18; if(tempStr.size()>=spos+3) nRxnComponents = FileParserUtils::toInt(tempStr.substr(spos,3)); spos = 21; if(tempStr.size()>=spos+3) nReactants = FileParserUtils::toInt(tempStr.substr(spos,3)); spos = 24; if(tempStr.size()>=spos+3) nProducts = FileParserUtils::toInt(tempStr.substr(spos,3)); spos = 27; if(tempStr.size()>=spos+3) nIntermediates = FileParserUtils::toInt(tempStr.substr(spos,3)); } catch (boost::bad_lexical_cast &) { // some SD files (such as some from NCI) lack all the extra information // on the header line, so ignore problems parsing there. } unsigned int ctabVersion=2000; if(tempStr.size()>35){ if(tempStr.size()<39 || tempStr[34]!='V'){ if(res) delete res; throw FileParseException("CTAB version string invalid"); } if(tempStr.substr(34,5)=="V3000"){ ctabVersion=3000; //if(res) delete res; //throw FileParseException("V3000 CTABs not supported"); } else if(tempStr.substr(34,5)!="V2000"){ if(res){ delete res; res = NULL; } std::ostringstream errout; errout << "Unsupported CTAB version: '"<< tempStr.substr(34,5) << "'"; throw FileParseException(errout.str()) ; } } Conformer *conf=0; try { if(ctabVersion==2000){ fileComplete=FileParserUtils::ParseV2000CTAB(inStream,line, res,conf,chiralityPossible, nAtoms,nBonds); } else { if(nAtoms!=0 || nBonds!=0){ if(res){ delete res; res = NULL; } throw FileParseException("V3000 mol blocks should have 0s in the initial counts line.") ; } fileComplete=FileParserUtils::ParseV3000CTAB(inStream,line, res,conf,chiralityPossible, nAtoms,nBonds); } } catch (FileParseException &e) { // catch our exceptions and throw them back after cleanup if(res) delete res; if(conf) delete conf; res=NULL; conf=NULL; throw e; } if(!fileComplete){ if(res) delete res; if(conf) delete conf; res=NULL; conf=NULL; throw FileParseException("Problems encountered parsing Mol data, M END "); } // calculate explicit valence on each atom: for(RWMol::AtomIterator atomIt=res->beginAtoms(); atomIt!=res->endAtoms(); ++atomIt) { (*atomIt)->calcExplicitValence(false); } if (res && sanitize ) { // update the chirality and stereo-chemistry and stuff: // // NOTE: we detect the stereochemistry before sanitizing/removing // hydrogens because the removal of H atoms may actually remove // the wedged bond from the molecule. This wipes out the only // sign that chirality ever existed and makes us sad... so first // perceive chirality, then remove the Hs and sanitize. // // One exception to this (of course, there's always an exception): // DetectAtomStereoChemistry() needs to check the number of // implicit hydrogens on atoms to detect if things can be // chiral. However, if we ask for the number of implicit Hs before // we've called MolOps::cleanUp() on the molecule, we'll get // exceptions for common "weird" cases like a nitro group // mis-represented as -N(=O)=O. *SO*... we need to call // cleanUp(), then detect the stereochemistry. // (this was Issue 148) // if(chiralityPossible){ MolOps::cleanUp(*res); const Conformer &conf = res->getConformer(); DetectAtomStereoChemistry(*res, &conf); } try { if(removeHs){ ROMol *tmp=MolOps::removeHs(*res,false,false); delete res; res = static_cast(tmp); } else { MolOps::sanitizeMol(*res); } // now that atom stereochem has been perceived, the wedging // information is no longer needed, so we clear // single bond dir flags: ClearSingleBondDirFlags(*res); // unlike DetectAtomStereoChemistry we call DetectBondStereoChemistry // here after sanitization because we need the ring information: const Conformer &conf = res->getConformer(); DetectBondStereoChemistry(*res, &conf); } catch (...){ if(res) delete res; res=NULL; throw; } MolOps::assignStereochemistry(*res,true); } if(res->hasProp("_NeedsQueryScan")){ res->clearProp("_NeedsQueryScan"); CompleteMolQueries(res); } return res; }; RWMol *MolDataStreamToMol(std::istream &inStream, unsigned int &line, bool sanitize, bool removeHs){ return MolDataStreamToMol(&inStream,line,sanitize,removeHs); }; //------------------------------------------------ // // Read a molecule from a string // //------------------------------------------------ RWMol *MolBlockToMol(const std::string &molBlock, bool sanitize, bool removeHs){ std::istringstream inStream(molBlock); unsigned int line = 0; return MolDataStreamToMol(inStream, line, sanitize, removeHs); } //------------------------------------------------ // // Read a molecule from a file // //------------------------------------------------ RWMol *MolFileToMol(std::string fName, bool sanitize, bool removeHs){ std::ifstream inStream(fName.c_str()); if (!inStream || (inStream.bad()) ) { std::ostringstream errout; errout << "Bad input file " << fName; throw BadFileException(errout.str()); } RWMol *res=NULL; if(!inStream.eof()){ unsigned int line = 0; res=MolDataStreamToMol(inStream, line, sanitize, removeHs); } return res; } }