// // Copyright (C) 2001-2019 Greg Landrum and Rational Discovery LLC // // @@ All Rights Reserved @@ // This file is part of the RDKit. // The contents are covered by the terms of the BSD license // which is included in the file license.txt, found at the root // of the RDKit source tree. // #include #include "ROMol.h" #include "Atom.h" #include "PeriodicTable.h" #include "SanitException.h" #include "QueryOps.h" #include "MonomerInfo.h" #include #include #include #include namespace RDKit { // Determine whether or not a molecule is to the left of Carbon bool isEarlyAtom(int atomicNum) { int eshift = 4 - PeriodicTable::getTable()->getNouterElecs(atomicNum); if (eshift > 0) { return true; } else if (eshift < 0) { return false; } else { // we make an arbitrary decision that Ge, Sn, and Pb // are treated like early elements (part of github #2606) return atomicNum > 14; } } Atom::Atom() : RDProps() { d_atomicNum = 0; initAtom(); } Atom::Atom(unsigned int num) : RDProps() { d_atomicNum = num; initAtom(); }; Atom::Atom(const std::string &what) : RDProps() { d_atomicNum = PeriodicTable::getTable()->getAtomicNumber(what); initAtom(); }; void Atom::initFromOther(const Atom &other) { RDProps::operator=(other); // NOTE: we do *not* copy ownership! dp_mol = nullptr; d_atomicNum = other.d_atomicNum; d_index = 0; d_formalCharge = other.d_formalCharge; df_noImplicit = other.df_noImplicit; df_isAromatic = other.df_isAromatic; d_numExplicitHs = other.d_numExplicitHs; d_numRadicalElectrons = other.d_numRadicalElectrons; d_isotope = other.d_isotope; // d_pos = other.d_pos; d_chiralTag = other.d_chiralTag; d_hybrid = other.d_hybrid; d_implicitValence = other.d_implicitValence; d_explicitValence = other.d_explicitValence; if (other.dp_monomerInfo) { dp_monomerInfo = other.dp_monomerInfo->copy(); } else { dp_monomerInfo = nullptr; } } Atom::Atom(const Atom &other) : RDProps() { initFromOther(other); } Atom &Atom::operator=(const Atom &other) { if (this == &other) return *this; initFromOther(other); return *this; } void Atom::initAtom() { df_isAromatic = false; df_noImplicit = false; d_numExplicitHs = 0; d_numRadicalElectrons = 0; d_formalCharge = 0; d_index = 0; d_isotope = 0; d_chiralTag = CHI_UNSPECIFIED; d_hybrid = UNSPECIFIED; dp_mol = nullptr; dp_monomerInfo = nullptr; d_implicitValence = -1; d_explicitValence = -1; } Atom::~Atom() { if (dp_monomerInfo) { delete dp_monomerInfo; } } Atom *Atom::copy() const { auto *res = new Atom(*this); return res; } void Atom::setOwningMol(ROMol *other) { // NOTE: this operation does not update the topology of the owning // molecule (i.e. this atom is not added to the graph). Only // molecules can add atoms to themselves. dp_mol = other; } std::string Atom::getSymbol() const { std::string res; // handle dummies differently: if (d_atomicNum != 0 || !getPropIfPresent(common_properties::dummyLabel, res)) { res = PeriodicTable::getTable()->getElementSymbol(d_atomicNum); } return res; } unsigned int Atom::getDegree() const { PRECONDITION(dp_mol, "degree not defined for atoms not associated with molecules"); return getOwningMol().getAtomDegree(this); } unsigned int Atom::getTotalDegree() const { PRECONDITION(dp_mol, "degree not defined for atoms not associated with molecules"); unsigned int res = this->getTotalNumHs(false) + this->getDegree(); return res; } // // If includeNeighbors is set, we'll loop over our neighbors // and include any of them that are Hs in the count here // unsigned int Atom::getTotalNumHs(bool includeNeighbors) const { PRECONDITION(dp_mol, "valence not defined for atoms not associated with molecules") int res = getNumExplicitHs() + getNumImplicitHs(); if (includeNeighbors) { ROMol::ADJ_ITER begin, end; const ROMol *parent = &getOwningMol(); boost::tie(begin, end) = parent->getAtomNeighbors(this); while (begin != end) { const Atom *at = parent->getAtomWithIdx(*begin); if (at->getAtomicNum() == 1) res++; ++begin; } } return res; } unsigned int Atom::getNumImplicitHs() const { if (df_noImplicit) return 0; PRECONDITION(d_implicitValence > -1, "getNumImplicitHs() called without preceding call to " "calcImplicitValence()"); return getImplicitValence(); } int Atom::getExplicitValence() const { PRECONDITION(dp_mol, "valence not defined for atoms not associated with molecules"); PRECONDITION( d_explicitValence > -1, "getExplicitValence() called without call to calcExplicitValence()"); return d_explicitValence; } unsigned int Atom::getTotalValence() const { PRECONDITION(dp_mol, "valence not defined for atoms not associated with molecules"); return getExplicitValence() + getImplicitValence(); } int Atom::calcExplicitValence(bool strict) { PRECONDITION(dp_mol, "valence not defined for atoms not associated with molecules"); unsigned int res; // FIX: contributions of bonds to valence are being done at best // approximately double accum = 0; ROMol::OEDGE_ITER beg, end; boost::tie(beg, end) = getOwningMol().getAtomBonds(this); while (beg != end) { accum += getOwningMol()[*beg]->getValenceContrib(this); ++beg; } accum += getNumExplicitHs(); // check accum is greater than the default valence unsigned int dv = PeriodicTable::getTable()->getDefaultValence(d_atomicNum); int chr = getFormalCharge(); if (isEarlyAtom(d_atomicNum)) chr *= -1; // <- the usual correction for early atoms // special case for carbon - see GitHub #539 if (d_atomicNum == 6 && chr > 0) chr = -chr; if (accum > (dv + chr) && this->getIsAromatic()) { // this needs some explanation : if the atom is aromatic and // accum > (dv + chr) we assume that no hydrogen can be added // to this atom. We set x = (v + chr) such that x is the // closest possible integer to "accum" but less than // "accum". // // "v" here is one of the allowed valences. For example: // sulfur here : O=c1ccs(=O)cc1 // nitrogen here : c1cccn1C int pval = dv + chr; const INT_VECT &valens = PeriodicTable::getTable()->getValenceList(d_atomicNum); for (auto vi = valens.begin(); vi != valens.end() && *vi != -1; ++vi) { int val = (*vi) + chr; if (val > accum) { break; } else { pval = val; } } // if we're within 1.5 of the allowed valence, go ahead and take it. // this reflects things like the N in c1cccn1C, which starts with // accum of 4, but which can be kekulized to C1=CC=CN1C, where // the valence is 3 or the bridging N in c1ccn2cncc2c1, which starts // with a valence of 4.5, but can be happily kekulized down to a valence // of 3 if (accum - pval <= 1.5) accum = pval; } // despite promising to not to blame it on him - this a trick Greg // came up with: if we have a bond order sum of x.5 (i.e. 1.5, 2.5 // etc) we would like it to round to the higher integer value -- // 2.5 to 3 instead of 2 -- so we will add 0.1 to accum. // this plays a role in the number of hydrogen that are implicitly // added. This will only happen when the accum is a non-integer // value and less than the default valence (otherwise the above if // statement should have caught it). An example of where this can // happen is the following smiles: // C1ccccC1 // Daylight accepts this smiles and we should be able to Kekulize // correctly. accum += 0.1; res = static_cast(round(accum)); if (strict) { int effectiveValence; if (PeriodicTable::getTable()->getNouterElecs(d_atomicNum) >= 4) { effectiveValence = res - getFormalCharge(); } else { // for boron and co, we move to the right in the PT, so adding // extra valences means adding negative charge effectiveValence = res + getFormalCharge(); } const INT_VECT &valens = PeriodicTable::getTable()->getValenceList(d_atomicNum); int maxValence = *(valens.rbegin()); // maxValence == -1 signifies that we'll take anything at the high end if (maxValence > 0 && effectiveValence > maxValence) { // the explicit valence is greater than any // allowed valence for the atoms - raise an error std::ostringstream errout; errout << "Explicit valence for atom # " << getIdx() << " " << PeriodicTable::getTable()->getElementSymbol(d_atomicNum) << ", " << effectiveValence << ", is greater than permitted"; std::string msg = errout.str(); BOOST_LOG(rdErrorLog) << msg << std::endl; throw AtomValenceException(msg, getIdx()); } } d_explicitValence = res; return res; } int Atom::getImplicitValence() const { PRECONDITION(dp_mol, "valence not defined for atoms not associated with molecules"); if (df_noImplicit) return 0; return d_implicitValence; } // NOTE: this uses the explicitValence, so it will call // calcExplictValence() if it hasn't already been called int Atom::calcImplicitValence(bool strict) { PRECONDITION(dp_mol, "valence not defined for atoms not associated with molecules"); if (df_noImplicit) return 0; if (d_explicitValence == -1) this->calcExplicitValence(strict); // this is basically the difference between the allowed valence of // the atom and the explicit valence already specified - tells how // many Hs to add // int res; // The d-block and f-block of the periodic table (i.e. transition metals, // lanthanoids and actinoids) have no default valence. int dv = PeriodicTable::getTable()->getDefaultValence(d_atomicNum); if (dv == -1) { d_implicitValence = 0; return 0; } // here is how we are going to deal with the possibility of // multiple valences // - check the explicit valence "ev" // - if it is already equal to one of the allowed valences for the // atom return 0 // - otherwise take return difference between next larger allowed // valence and "ev" // if "ev" is greater than all allowed valences for the atom raise an // exception // finally aromatic cases are dealt with differently - these atoms are allowed // only default valences const INT_VECT &valens = PeriodicTable::getTable()->getValenceList(d_atomicNum); int explicitPlusRadV = getExplicitValence() + getNumRadicalElectrons(); int chg = getFormalCharge(); // NOTE: this is here to take care of the difference in element on // the right side of the carbon vs left side of carbon // For elements on the right side of the periodic table // (electronegative elements): // NHYD = V - SBO + CHG // For elements on the left side of the periodic table // (electropositive elements): // NHYD = V - SBO - CHG // This reflects that hydrogen adds to, for example, O as H+ while // it adds to Na as H-. // V = valence // SBO = Sum of bond orders // CHG = Formal charge // It seems reasonable that the line is drawn at Carbon (in Group // IV), but we must assume on which side of the line C // falls... an assumption which will not always be correct. For // example: // - Electropositive Carbon: a C with three singly-bonded // neighbors (DV = 4, SBO = 3, CHG = 1) and a positive charge (a // 'stable' carbocation) should not have any hydrogens added. // - Electronegative Carbon: C in isonitrile, R[N+]#[C-] (DV = 4, SBO = 3, // CHG = -1), also should not have any hydrogens added. // Because isonitrile seems more relevant to pharma problems, we'll be // making the second assumption: *Carbon is electronegative*. // // So assuming you read all the above stuff - you know why we are // changing signs for "chg" here if (isEarlyAtom(d_atomicNum)) { chg *= -1; } // special case for carbon - see GitHub #539 if (d_atomicNum == 6 && chg > 0) chg = -chg; // if we have an aromatic case treat it differently if (getIsAromatic()) { if (explicitPlusRadV <= (static_cast(dv) + chg)) { res = dv + chg - explicitPlusRadV; } else { // As we assume when finding the explicitPlusRadValence if we are // aromatic we should not be adding any hydrogen and already // be at an accepted valence state, // FIX: this is just ERROR checking and probably moot - the // explicitPlusRadValence function called above should assure us that // we satisfy one of the accepted valence states for the // atom. The only diff I can think of is in the way we handle // formal charge here vs the explicit valence function. bool satis = false; for (auto vi = valens.begin(); vi != valens.end() && *vi > 0; ++vi) { if (explicitPlusRadV == ((*vi) + chg)) { satis = true; break; } } if (strict && !satis) { std::ostringstream errout; errout << "Explicit valence for aromatic atom # " << getIdx() << " not equal to any accepted valence\n"; std::string msg = errout.str(); BOOST_LOG(rdErrorLog) << msg << std::endl; throw AtomValenceException(msg, getIdx()); } res = 0; } } else { // non-aromatic case we are allowed to have non default valences // and be able to add hydrogens res = -1; for (auto vi = valens.begin(); vi != valens.end() && *vi >= 0; ++vi) { int tot = (*vi) + chg; if (explicitPlusRadV <= tot) { res = tot - explicitPlusRadV; break; } } if (res < 0) { if (strict) { // this means that the explicit valence is greater than any // allowed valence for the atoms - raise an error std::ostringstream errout; errout << "Explicit valence for atom # " << getIdx() << " " << PeriodicTable::getTable()->getElementSymbol(d_atomicNum) << " greater than permitted"; std::string msg = errout.str(); BOOST_LOG(rdErrorLog) << msg << std::endl; throw AtomValenceException(msg, getIdx()); } else { res = 0; } } } d_implicitValence = res; return res; } void Atom::setIsotope(unsigned int what) { d_isotope = what; } double Atom::getMass() const { if (d_isotope) { double res = PeriodicTable::getTable()->getMassForIsotope(d_atomicNum, d_isotope); if (d_atomicNum != 0 && res == 0.0) res = d_isotope; return res; } else { return PeriodicTable::getTable()->getAtomicWeight(d_atomicNum); } } void Atom::setQuery(Atom::QUERYATOM_QUERY *what) { RDUNUSED_PARAM(what); // Atoms don't have complex queries so this has to fail PRECONDITION(0, "plain atoms have no Query"); } Atom::QUERYATOM_QUERY *Atom::getQuery() const { return nullptr; }; void Atom::expandQuery(Atom::QUERYATOM_QUERY *what, Queries::CompositeQueryType how, bool maintainOrder) { RDUNUSED_PARAM(what); RDUNUSED_PARAM(how); RDUNUSED_PARAM(maintainOrder); PRECONDITION(0, "plain atoms have no Query"); } bool Atom::Match(Atom const *what) const { PRECONDITION(what, "bad query atom"); bool res = getAtomicNum() == what->getAtomicNum(); // special dummy--dummy match case: // [*] matches [*],[1*],[2*],etc. // [1*] only matches [*] and [1*] if (res) { if (this->dp_mol && what->dp_mol && this->getOwningMol().getRingInfo()->isInitialized() && what->getOwningMol().getRingInfo()->isInitialized() && this->getOwningMol().getRingInfo()->numAtomRings(d_index) > what->getOwningMol().getRingInfo()->numAtomRings(what->d_index)) { res = false; } else if (!this->getAtomicNum()) { // this is the new behavior, based on the isotopes: int tgt = this->getIsotope(); int test = what->getIsotope(); if (tgt && test && tgt != test) { res = false; } } else { // standard atom-atom match: The general rule here is that if this atom // has a property that // deviates from the default, then the other atom should match that value. if ((this->getFormalCharge() && this->getFormalCharge() != what->getFormalCharge()) || (this->getIsotope() && this->getIsotope() != what->getIsotope()) || (this->getNumRadicalElectrons() && this->getNumRadicalElectrons() != what->getNumRadicalElectrons())) { res = false; } } } return res; } void Atom::updatePropertyCache(bool strict) { calcExplicitValence(strict); calcImplicitValence(strict); } bool Atom::needsUpdatePropertyCache() const { if (this->d_explicitValence >= 0 && (this->df_noImplicit || this->d_implicitValence >= 0)) { return false; } return true; } // returns the number of swaps required to convert the ordering // of the probe list to match the order of our incoming bonds: // // e.g. if our incoming bond order is: [0,1,2,3]: // getPerturbationOrder([1,0,2,3]) = 1 // getPerturbationOrder([1,2,3,0]) = 3 // getPerturbationOrder([1,2,0,3]) = 2 int Atom::getPerturbationOrder(INT_LIST probe) const { PRECONDITION( dp_mol, "perturbation order not defined for atoms not associated with molecules") INT_LIST ref; ROMol::OEDGE_ITER beg, end; boost::tie(beg, end) = getOwningMol().getAtomBonds(this); while (beg != end) { ref.push_back(getOwningMol()[*beg]->getIdx()); ++beg; } int nSwaps = static_cast(countSwapsToInterconvert(ref, probe)); return nSwaps; } void Atom::invertChirality() { switch (getChiralTag()) { case CHI_TETRAHEDRAL_CW: setChiralTag(CHI_TETRAHEDRAL_CCW); break; case CHI_TETRAHEDRAL_CCW: setChiralTag(CHI_TETRAHEDRAL_CW); break; case CHI_OTHER: case CHI_UNSPECIFIED: break; } } void setAtomRLabel(Atom *atm, int rlabel) { PRECONDITION(atm, "bad atom"); // rlabel ==> n2 => 0..99 PRECONDITION(rlabel >= 0 && rlabel < 100, "rlabel out of range for MDL files"); if (rlabel) { atm->setProp(common_properties::_MolFileRLabel, static_cast(rlabel)); } else if (atm->hasProp(common_properties::_MolFileRLabel)) { atm->clearProp(common_properties::_MolFileRLabel); } } //! Gets the atom's RLabel int getAtomRLabel(const Atom *atom) { PRECONDITION(atom, "bad atom"); unsigned int rlabel = 0; atom->getPropIfPresent(common_properties::_MolFileRLabel, rlabel); return static_cast(rlabel); } void setAtomAlias(Atom *atom, const std::string &alias) { PRECONDITION(atom, "bad atom"); if (alias != "") { atom->setProp(common_properties::molFileAlias, alias); } else if (atom->hasProp(common_properties::molFileAlias)) { atom->clearProp(common_properties::molFileAlias); } } std::string getAtomAlias(const Atom *atom) { PRECONDITION(atom, "bad atom"); std::string alias; atom->getPropIfPresent(common_properties::molFileAlias, alias); return alias; } void setAtomValue(Atom *atom, const std::string &value) { PRECONDITION(atom, "bad atom"); if (value != "") { atom->setProp(common_properties::molFileValue, value); } else if (atom->hasProp(common_properties::molFileValue)) { atom->clearProp(common_properties::molFileValue); } } std::string getAtomValue(const Atom *atom) { PRECONDITION(atom, "bad atom"); std::string value; atom->getPropIfPresent(common_properties::molFileValue, value); return value; } void setSupplementalSmilesLabel(Atom *atom, const std::string &label) { PRECONDITION(atom, "bad atom"); if (label != "") { atom->setProp(common_properties::_supplementalSmilesLabel, label); } else if (atom->hasProp(common_properties::_supplementalSmilesLabel)) { atom->clearProp(common_properties::_supplementalSmilesLabel); } } std::string getSupplementalSmilesLabel(const Atom *atom) { PRECONDITION(atom, "bad atom"); std::string label; atom->getPropIfPresent(common_properties::_supplementalSmilesLabel, label); return label; } } // namespace RDKit std::ostream &operator<<(std::ostream &target, const RDKit::Atom &at) { target << at.getIdx() << " " << at.getAtomicNum() << " " << at.getSymbol(); target << " chg: " << at.getFormalCharge(); target << " deg: " << at.getDegree(); target << " exp: "; try { int explicitValence = at.getExplicitValence(); target << explicitValence; } catch (...) { target << "N/A"; } target << " imp: "; try { int implicitValence = at.getImplicitValence(); target << implicitValence; } catch (...) { target << "N/A"; } target << " hyb: " << at.getHybridization(); target << " arom?: " << at.getIsAromatic(); target << " chi: " << at.getChiralTag(); if (at.getNumRadicalElectrons()) { target << " rad: " << at.getNumRadicalElectrons(); } if (at.getIsotope()) { target << " iso: " << at.getIsotope(); } if (at.getAtomMapNum()) { target << " mapno: " << at.getAtomMapNum(); } return target; };