// $Id$ // // Copyright (C) 2001-2006 Rational Discovery LLC // // @@ All Rights Reserved @@ // #include #include "ROMol.h" #include "Atom.h" #include "PeriodicTable.h" #include "SanitException.h" #include #include #include namespace RDKit { Atom::Atom(){ d_atomicNum=0; initAtom(); } Atom::Atom(unsigned int num) { d_atomicNum = num; initAtom(); }; Atom::Atom(std::string what) { d_atomicNum = PeriodicTable::getTable()->getAtomicNumber(what); initAtom(); }; Atom::Atom( const Atom & other){ // NOTE: we do *not* copy ownership! d_atomicNum = other.d_atomicNum; dp_mol = 0; d_index=0; d_formalCharge = other.d_formalCharge; df_noImplicit = other.df_noImplicit; df_isAromatic = other.df_isAromatic; d_dativeFlag = other.d_dativeFlag; d_numExplicitHs = other.d_numExplicitHs; d_mass = other.d_mass; d_pos = other.d_pos; d_chiralTag=other.d_chiralTag; d_hybrid = other.d_hybrid; d_implicitValence=other.d_implicitValence; d_explicitValence=other.d_explicitValence; if(other.dp_props){ dp_props = new Dict(*other.dp_props); } else { dp_props = new Dict(); STR_VECT computed; dp_props->setVal("computedProps", computed); } } void Atom::initAtom(){ df_isAromatic = false; df_noImplicit = false; d_dativeFlag=0; d_numExplicitHs = 0; d_formalCharge = 0; d_index = 0; if(d_atomicNum){ d_mass = PeriodicTable::getTable()->getAtomicWeight(d_atomicNum); } else{ d_mass = 0.0; } d_chiralTag=CHI_UNSPECIFIED; d_hybrid = UNSPECIFIED; dp_mol = 0; dp_props = new Dict(); d_implicitValence=-1; d_explicitValence=-1; // ok every Atom contains a property entry called "computedProps" // which provides list of property keys that correspond to value // that have been computed this can used to blow out all computed // properties while leaving the rest along initialize this list to // an empty vector of strings //STR_VECT computed; //dp_props->setVal("computedProps", computed); } Atom::~Atom() { if(dp_props){ delete dp_props; dp_props = 0; } } Atom *Atom::copy() const { Atom *res = new Atom(*this); return res; } void Atom::setOwningMol(ROMol *other) { // NOTE: this operation does not update the topology of the owning // molecule (i.e. this atom is not added to the graph). Only // molecules can add atoms to themselves. dp_mol = other; } std::string Atom::getSymbol() const { std::string res; // handle dummies differently: if(d_atomicNum != 0 || !hasProp("dummyLabel") ){ res = PeriodicTable::getTable()->getElementSymbol(d_atomicNum); } else { getProp("dummyLabel",res); } return res; } unsigned int Atom::getDegree() const { PRECONDITION(dp_mol,"valence not defined for atoms not associated with molecules") return getOwningMol().getAtomDegree(this); } // // If includeNeighbors is set, we'll loop over our neighbors // and include any of them that are Hs in the count here // unsigned int Atom::getTotalNumHs(bool includeNeighbors) const { PRECONDITION(dp_mol,"valence not defined for atoms not associated with molecules") int res = getNumExplicitHs() + getNumImplicitHs(); if(includeNeighbors){ ROMol::ADJ_ITER begin,end; const ROMol *parent = &getOwningMol(); boost::tie(begin,end) = parent->getAtomNeighbors(this); while(begin!=end){ const Atom *at = parent->getAtomWithIdx(*begin); if(at->getAtomicNum()==1) res++; begin++; } } return res; } unsigned int Atom::getNumImplicitHs() const { if(df_noImplicit) return 0; PRECONDITION(d_implicitValence>-1, "getNumImplicitHs() called without preceding call to calcImplicitValence()"); return getImplicitValence(); } int Atom::getExplicitValence(bool forceCalc) const { PRECONDITION(dp_mol,"valence not defined for atoms not associated with molecules"); PRECONDITION(d_explicitValence>-1, "getExplicitValence() called without call to calcExplicitValence()"); return d_explicitValence; } int Atom::calcExplicitValence(bool strict) { PRECONDITION(dp_mol,"valence not defined for atoms not associated with molecules"); unsigned int res; // FIX: contributions of bonds to valence are being done at best // approximately // EFF: this code is embarrassingly bad double accum=0; ROMol::OEDGE_ITER beg,end; boost::tie(beg,end) = getOwningMol().getAtomBonds(this); ROMol::GRAPH_MOL_BOND_PMAP::type pMap = getOwningMol().getBondPMap(); while(beg!=end){ accum += pMap[*beg]->getValenceContrib(this); beg++; } accum += getNumExplicitHs(); // check accum is greater than the default valence unsigned int dv = PeriodicTable::getTable()->getDefaultValence(d_atomicNum); int chr = getFormalCharge(); if (accum > (dv + chr)) { // check if this is because of aromaticity // set the accum back to the default valence if (this->getIsAromatic()) { // this needs some explanation if the atom is aromatic and // accum > (dv + chr) we assume that no hydrogen can be added // to this atom. We set x = (v + chr) such that x is the // clossest possible integer to "accum" but less than // "accum". "v" here is one of the allowed valences e.g. if // sulfur in the following smiles : O=c1ccs(=O)cc1 const UINT_VECT &valens = PeriodicTable::getTable()->getValenceList(d_atomicNum); UINT_VECT_CI vi; int val, pval = dv + chr; for (vi = valens.begin(); vi != valens.end(); vi++) { val = (*vi) + chr; if (val > accum) { break; } else { pval = val; } } accum = pval; } } // despite promising to not to blame it on him - this a trick Greg // came up with if we have a bond order sum of x.5 (i.e. 1.5, 2.5 // etc) we would like it to round to the higher integer value so // 2.5 to 3 instead of 2, so we will add 0.1 to accum to assure // this plays a role in the number of hydrogen that are implicitly // added. This will only happen when the accum is a non-integer // value and less than the default valence (otherwise the above if // statement should have caught it). An example of where this can // happen is the following smiles: // C1ccccC1 // Daylight accepts this smiles and we should be able to Kekulize // correctly. accum += 0.1; res = static_cast(round(accum)); d_explicitValence = res; return res; } int Atom::getImplicitValence(bool forceCalc) const { PRECONDITION(dp_mol,"valence not defined for atoms not associated with molecules"); if(df_noImplicit) return 0; return d_implicitValence; } // NOTE: this uses the explicitValence, so you need to call calcExplicitValence() // before calling this... updatePropertyCache() takes care of that. int Atom::calcImplicitValence(bool strict) { PRECONDITION(dp_mol,"valence not defined for atoms not associated with molecules"); if(df_noImplicit) return 0; if(d_explicitValence==-1) this->calcExplicitValence(); // this is basically the difference between the allowed valence of // the atom and the explicit valence already specified - tells how // many Hs to add // int res; // here is how we are going to deal with the possibility of // multiple valences // - check the explicit valence "ev" // - if it is already equal to one of the allowed valences for the // atom return 0 // - otherwise take return difference between next larger allowed // valence and "ev" // if "ev" is greater than all allowed valences for the atom raise an exception // finally aromatic cases are dealt with differently - these atoms are allowed // only default valences const UINT_VECT &valens = PeriodicTable::getTable()->getValenceList(d_atomicNum); UINT_VECT_CI vi; int dv = PeriodicTable::getTable()->getDefaultValence(d_atomicNum); int explicitV = getExplicitValence(); int chg = getFormalCharge(); // NOTE: this is here to take care of the difference in element on // the right side of the carbon vs left side of carbon // For elements on the right side of the periodic table // (electronegative elements): // NHYD = V - SBO + CHG // For elements on the left side of the periodic table // (electropositive elements): // NHYD = V - SBO - CHG // This reflects that hydrogen adds to, for example, O as H+ while // it adds to Na as H-. // V = valence // SBO = Sum of bond orders // CHG = Formal charge // It seems reasonable that the line is drawn at Carbon (in Group // IV), but we must assume on which side of the line C // falls... an assumption which will not always be correct. For // example: // - Electropositive Carbon: a C with three singly-bonded // neighbors (DV = 4, SBO = 3, CHG = 1) and a positive charge (a // 'stable' carbocation) should not have any hydrogens added. // - Electronegative Carbon: C in isonitrile, R[N+]#[C-] (DV = 4, SBO = 3, // CHG = -1), also should not have any hydrogens added. // Because isonitrile seems more relevant to pharma problems, we'll be // making the second assumption: *Carbon is electronegative*. // // So assuming you read all the above stuff - you know why we are // changing signs for "chg" here if ( (4 - PeriodicTable::getTable()->getNouterElecs(d_atomicNum)) > 0) { chg *= -1; } // if we have an aromatic case treat it differently if (getIsAromatic()) { if (explicitV <= (dv + chg)) { res = dv + chg - explicitV; } else { // As we assume when finding the explicitValence if we are // aromatic we should not be adding any hydrogen and already // be at an accepted valence state, // FIX: this is just ERROR checking and probably moot - the // explicitValence function called above should assure us that // we satisfy one of the accepted valence states for the // atom. The only diff I can think of is in the way we handle // formal charge here vs the explicit valence function. bool satis = false; for (vi = valens.begin(); vi != valens.end(); vi++) { if (explicitV == ((*vi) + chg)) { satis = true; break; } } if (strict && !satis) { std::ostringstream errout; errout << "Explicit valence for aromatic atom # " << getIdx() << " not equal to any accepted valence\n"; std::string msg = errout.str(); BOOST_LOG(rdErrorLog) << msg << std::endl; throw MolSanitizeException(msg); } res = 0; } } else { // non-aromatic case we are allowed to have non default valences // and be able to add hydrogens int tot; res = -1; for (vi = valens.begin(); vi != valens.end(); vi++) { tot = (*vi) + chg; if (explicitV <= tot) { res = tot - explicitV; break; } } if (res < 0) { if(strict){ // this means that the explicit valen is greater than any // allowed valence for the atoms - raise an error std::ostringstream errout; errout << "Explicit valence for atom # " << getIdx() << " " << PeriodicTable::getTable()->getElementSymbol(d_atomicNum) << " greater than permitted"; std::string msg = errout.str(); BOOST_LOG(rdErrorLog) << msg << std::endl; throw MolSanitizeException(msg); } else { res = 0; } } } d_implicitValence = res; return res; } void Atom::setQuery(Atom::QUERYATOM_QUERY *what) { // Atoms don't have complex queries so this has to fail PRECONDITION(0,"plain atoms have no Query"); } Atom::QUERYATOM_QUERY *Atom::getQuery() const { //PRECONDITION(0,"plain atoms have no Query"); return NULL; }; void Atom::expandQuery(Atom::QUERYATOM_QUERY *what, Queries::CompositeQueryType how, bool maintainOrder) { PRECONDITION(0,"plain atoms have no Query"); } bool Atom::Match(Atom const *what) const { bool res = getAtomicNum() == what->getAtomicNum(); // special dummy--dummy match case: // X matches X, Xa, Xb, etc // Xa only matches X and Xa if(res && !getAtomicNum()){ std::string l1; if(this->hasProp("dummyLabel")){ this->getProp("dummyLabel",l1); } else{ l1="X"; } if(l1!="X"){ std::string l2; if(what->hasProp("dummyLabel")) what->getProp("dummyLabel",l2); else l2="X"; if(l2!="X" && l1!=l2){ res = false; } } } return res; } bool Atom::Match(const Atom::ATOM_SPTR what) const { return Match(what.get()); } void Atom::updatePropertyCache(bool strict) { calcExplicitValence(strict); calcImplicitValence(strict); } // returns the number of swaps required to convert the ordering // of the probe list to match the order of our incoming bonds: // // e.g. if our incoming bond order is: [0,1,2,3]: // getPerturbationOrder([1,0,2,3]) = 1 // getPerturbationOrder([1,2,3,0]) = 3 // getPerturbationOrder([1,2,0,3]) = 2 int Atom::getPerturbationOrder(INT_LIST probe) const{ INT_LIST ref; ROMol::OEDGE_ITER beg,end; boost::tie(beg,end) = getOwningMol().getAtomBonds(this); ROMol::GRAPH_MOL_BOND_PMAP::type pMap = getOwningMol().getBondPMap(); while(beg!=end){ ref.push_back(pMap[*beg]->getIdx()); beg++; } PRECONDITION(ref.size()==probe.size(),"size mismatch"); INT_LIST::const_iterator refIt=ref.begin(); INT_LIST::iterator probeIt=probe.begin(),probeIt2; int nSwaps = 0; while(refIt!=ref.end()){ if((*probeIt)!=(*refIt)){ bool foundIt=false; probeIt2=probeIt; while((*probeIt2)!=(*refIt) && probeIt2!=probe.end()){ probeIt2++; } if(probeIt2 != probe.end()){ foundIt=true; } CHECK_INVARIANT(foundIt,"could not find probe element"); int tmp=*probeIt2; *probeIt2=*probeIt; *probeIt=tmp; nSwaps++; } probeIt++; refIt++; } return nSwaps; } } // end o' namespace std::ostream & operator<<(std::ostream& target, const RDKit::Atom &at){ target << at.getIdx() << " " << at.getAtomicNum() << " " << at.getSymbol(); target << " chg: " << at.getFormalCharge(); target << " deg: " << at.getDegree(); target << " exp: "; try { int explicitValence = at.getExplicitValence(); target << explicitValence; } catch (...){ target << "N/A"; } target << " imp: "; try { int implicitValence = at.getImplicitValence(); target << implicitValence; } catch (...){ target << "N/A"; } target << " hyb: " << at.getHybridization(); target << " arom?: " << at.getIsAromatic(); target << " chi: " << at.getChiralTag(); return target; };