Files
rdkit/Code/GraphMol/Atom.cpp
Greg Landrum 7041ae233e handle the NoImplicit flag properly
update aromaticity to require unsaturation at single-atom donors
test the above two things
This is not necessarily completely stable, but all current tests pass
(once obvious changes have been made). We will just have to see where
the aromaticity changes cause trouble going forward.
2006-09-19 20:20:09 +00:00

472 lines
14 KiB
C++
Executable File

// $Id$
//
// Copyright (C) 2001-2006 Rational Discovery LLC
//
// @@ All Rights Reserved @@
//
#include <math.h>
#include "ROMol.h"
#include "Atom.h"
#include "PeriodicTable.h"
#include "SanitException.h"
#include <RDGeneral/Invariant.h>
#include <RDGeneral/RDLog.h>
#include <RDGeneral/types.h>
namespace RDKit {
Atom::Atom(){
d_atomicNum=0;
initAtom();
}
Atom::Atom(unsigned int num) {
d_atomicNum = num;
initAtom();
};
Atom::Atom(std::string what) {
d_atomicNum = PeriodicTable::getTable()->getAtomicNumber(what);
initAtom();
};
Atom::Atom( const Atom & other){
// NOTE: we do *not* copy ownership!
d_atomicNum = other.d_atomicNum;
dp_mol = 0;
d_index=0;
d_formalCharge = other.d_formalCharge;
df_noImplicit = other.df_noImplicit;
df_isAromatic = other.df_isAromatic;
d_dativeFlag = other.d_dativeFlag;
d_numExplicitHs = other.d_numExplicitHs;
d_mass = other.d_mass;
d_pos = other.d_pos;
d_chiralTag=other.d_chiralTag;
d_hybrid = other.d_hybrid;
d_implicitValence=other.d_implicitValence;
d_explicitValence=other.d_explicitValence;
if(other.dp_props){
dp_props = new Dict(*other.dp_props);
} else {
dp_props = new Dict();
STR_VECT computed;
dp_props->setVal("computedProps", computed);
}
}
void Atom::initAtom(){
df_isAromatic = false;
df_noImplicit = false;
d_dativeFlag=0;
d_numExplicitHs = 0;
d_formalCharge = 0;
d_index = 0;
if(d_atomicNum){
d_mass = PeriodicTable::getTable()->getAtomicWeight(d_atomicNum);
} else{
d_mass = 0.0;
}
d_chiralTag=CHI_UNSPECIFIED;
d_hybrid = UNSPECIFIED;
dp_mol = 0;
dp_props = new Dict();
d_implicitValence=-1;
d_explicitValence=-1;
// ok every Atom contains a property entry called "computedProps"
// which provides list of property keys that correspond to value
// that have been computed this can used to blow out all computed
// properties while leaving the rest along initialize this list to
// an empty vector of strings
//STR_VECT computed;
//dp_props->setVal("computedProps", computed);
}
Atom::~Atom()
{
if(dp_props){
delete dp_props;
dp_props = 0;
}
}
Atom *Atom::copy() const {
Atom *res = new Atom(*this);
return res;
}
void Atom::setOwningMol(ROMol *other)
{
// NOTE: this operation does not update the topology of the owning
// molecule (i.e. this atom is not added to the graph). Only
// molecules can add atoms to themselves.
dp_mol = other;
}
std::string Atom::getSymbol() const {
std::string res;
// handle dummies differently:
if(d_atomicNum != 0 || !hasProp("dummyLabel") ){
res = PeriodicTable::getTable()->getElementSymbol(d_atomicNum);
} else {
getProp("dummyLabel",res);
}
return res;
}
unsigned int Atom::getDegree() const {
PRECONDITION(dp_mol,"valence not defined for atoms not associated with molecules")
return getOwningMol().getAtomDegree(this);
}
//
// If includeNeighbors is set, we'll loop over our neighbors
// and include any of them that are Hs in the count here
//
unsigned int Atom::getTotalNumHs(bool includeNeighbors) const {
PRECONDITION(dp_mol,"valence not defined for atoms not associated with molecules")
int res = getNumExplicitHs() + getNumImplicitHs();
if(includeNeighbors){
ROMol::ADJ_ITER begin,end;
const ROMol *parent = &getOwningMol();
boost::tie(begin,end) = parent->getAtomNeighbors(this);
while(begin!=end){
const Atom *at = parent->getAtomWithIdx(*begin);
if(at->getAtomicNum()==1) res++;
begin++;
}
}
return res;
}
unsigned int Atom::getNumImplicitHs() const {
if(df_noImplicit) return 0;
PRECONDITION(d_implicitValence>-1,
"getNumImplicitHs() called without preceding call to calcImplicitValence()");
return getImplicitValence();
}
int Atom::getExplicitValence(bool forceCalc) const {
PRECONDITION(dp_mol,"valence not defined for atoms not associated with molecules");
PRECONDITION(d_explicitValence>-1,
"getExplicitValence() called without call to calcExplicitValence()");
return d_explicitValence;
}
int Atom::calcExplicitValence(bool strict) {
PRECONDITION(dp_mol,"valence not defined for atoms not associated with molecules");
unsigned int res;
// FIX: contributions of bonds to valence are being done at best
// approximately
// EFF: this code is embarrassingly bad
double accum=0;
ROMol::OEDGE_ITER beg,end;
boost::tie(beg,end) = getOwningMol().getAtomBonds(this);
ROMol::GRAPH_MOL_BOND_PMAP::type pMap = getOwningMol().getBondPMap();
while(beg!=end){
accum += pMap[*beg]->getValenceContrib(this);
beg++;
}
accum += getNumExplicitHs();
// check accum is greater than the default valence
unsigned int dv = PeriodicTable::getTable()->getDefaultValence(d_atomicNum);
int chr = getFormalCharge();
if (accum > (dv + chr)) {
// check if this is because of aromaticity
// set the accum back to the default valence
if (this->getIsAromatic()) {
// this needs some explanation if the atom is aromatic and
// accum > (dv + chr) we assume that no hydrogen can be added
// to this atom. We set x = (v + chr) such that x is the
// clossest possible integer to "accum" but less than
// "accum". "v" here is one of the allowed valences e.g. if
// sulfur in the following smiles : O=c1ccs(=O)cc1
const UINT_VECT &valens = PeriodicTable::getTable()->getValenceList(d_atomicNum);
UINT_VECT_CI vi;
int val, pval = dv + chr;
for (vi = valens.begin(); vi != valens.end(); vi++) {
val = (*vi) + chr;
if (val > accum) {
break;
}
else {
pval = val;
}
}
accum = pval;
}
}
// despite promising to not to blame it on him - this a trick Greg
// came up with if we have a bond order sum of x.5 (i.e. 1.5, 2.5
// etc) we would like it to round to the higher integer value so
// 2.5 to 3 instead of 2, so we will add 0.1 to accum to assure
// this plays a role in the number of hydrogen that are implicitly
// added. This will only happen when the accum is a non-integer
// value and less than the default valence (otherwise the above if
// statement should have caught it). An example of where this can
// happen is the following smiles:
// C1ccccC1
// Daylight accepts this smiles and we should be able to Kekulize
// correctly.
accum += 0.1;
res = static_cast<int>(round(accum));
d_explicitValence = res;
return res;
}
int Atom::getImplicitValence(bool forceCalc) const {
PRECONDITION(dp_mol,"valence not defined for atoms not associated with molecules");
if(df_noImplicit) return 0;
return d_implicitValence;
}
// NOTE: this uses the explicitValence, so you need to call calcExplicitValence()
// before calling this... updatePropertyCache() takes care of that.
int Atom::calcImplicitValence(bool strict) {
PRECONDITION(dp_mol,"valence not defined for atoms not associated with molecules");
if(df_noImplicit) return 0;
if(d_explicitValence==-1) this->calcExplicitValence();
// this is basically the difference between the allowed valence of
// the atom and the explicit valence already specified - tells how
// many Hs to add
//
int res;
// here is how we are going to deal with the possibility of
// multiple valences
// - check the explicit valence "ev"
// - if it is already equal to one of the allowed valences for the
// atom return 0
// - otherwise take return difference between next larger allowed
// valence and "ev"
// if "ev" is greater than all allowed valences for the atom raise an exception
// finally aromatic cases are dealt with differently - these atoms are allowed
// only default valences
const UINT_VECT &valens = PeriodicTable::getTable()->getValenceList(d_atomicNum);
UINT_VECT_CI vi;
int dv = PeriodicTable::getTable()->getDefaultValence(d_atomicNum);
int explicitV = getExplicitValence();
int chg = getFormalCharge();
// NOTE: this is here to take care of the difference in element on
// the right side of the carbon vs left side of carbon
// For elements on the right side of the periodic table
// (electronegative elements):
// NHYD = V - SBO + CHG
// For elements on the left side of the periodic table
// (electropositive elements):
// NHYD = V - SBO - CHG
// This reflects that hydrogen adds to, for example, O as H+ while
// it adds to Na as H-.
// V = valence
// SBO = Sum of bond orders
// CHG = Formal charge
// It seems reasonable that the line is drawn at Carbon (in Group
// IV), but we must assume on which side of the line C
// falls... an assumption which will not always be correct. For
// example:
// - Electropositive Carbon: a C with three singly-bonded
// neighbors (DV = 4, SBO = 3, CHG = 1) and a positive charge (a
// 'stable' carbocation) should not have any hydrogens added.
// - Electronegative Carbon: C in isonitrile, R[N+]#[C-] (DV = 4, SBO = 3,
// CHG = -1), also should not have any hydrogens added.
// Because isonitrile seems more relevant to pharma problems, we'll be
// making the second assumption: *Carbon is electronegative*.
//
// So assuming you read all the above stuff - you know why we are
// changing signs for "chg" here
if ( (4 - PeriodicTable::getTable()->getNouterElecs(d_atomicNum)) > 0) {
chg *= -1;
}
// if we have an aromatic case treat it differently
if (getIsAromatic()) {
if (explicitV <= (dv + chg)) {
res = dv + chg - explicitV;
}
else {
// As we assume when finding the explicitValence if we are
// aromatic we should not be adding any hydrogen and already
// be at an accepted valence state,
// FIX: this is just ERROR checking and probably moot - the
// explicitValence function called above should assure us that
// we satisfy one of the accepted valence states for the
// atom. The only diff I can think of is in the way we handle
// formal charge here vs the explicit valence function.
bool satis = false;
for (vi = valens.begin(); vi != valens.end(); vi++) {
if (explicitV == ((*vi) + chg)) {
satis = true;
break;
}
}
if (strict && !satis) {
std::ostringstream errout;
errout << "Explicit valence for aromatic atom # " << getIdx()
<< " not equal to any accepted valence\n";
std::string msg = errout.str();
BOOST_LOG(rdErrorLog) << msg << std::endl;
throw MolSanitizeException(msg);
}
res = 0;
}
}
else {
// non-aromatic case we are allowed to have non default valences
// and be able to add hydrogens
int tot;
res = -1;
for (vi = valens.begin(); vi != valens.end(); vi++) {
tot = (*vi) + chg;
if (explicitV <= tot) {
res = tot - explicitV;
break;
}
}
if (res < 0) {
if(strict){
// this means that the explicit valen is greater than any
// allowed valence for the atoms - raise an error
std::ostringstream errout;
errout << "Explicit valence for atom # " << getIdx()
<< " " << PeriodicTable::getTable()->getElementSymbol(d_atomicNum)
<< " greater than permitted";
std::string msg = errout.str();
BOOST_LOG(rdErrorLog) << msg << std::endl;
throw MolSanitizeException(msg);
} else {
res = 0;
}
}
}
d_implicitValence = res;
return res;
}
void Atom::setQuery(Atom::QUERYATOM_QUERY *what) {
// Atoms don't have complex queries so this has to fail
PRECONDITION(0,"plain atoms have no Query");
}
Atom::QUERYATOM_QUERY *Atom::getQuery() const {
//PRECONDITION(0,"plain atoms have no Query");
return NULL;
};
void Atom::expandQuery(Atom::QUERYATOM_QUERY *what,
Queries::CompositeQueryType how,
bool maintainOrder) {
PRECONDITION(0,"plain atoms have no Query");
}
bool Atom::Match(Atom const *what) const {
bool res = getAtomicNum() == what->getAtomicNum();
// special dummy--dummy match case:
// X matches X, Xa, Xb, etc
// Xa only matches X and Xa
if(res && !getAtomicNum()){
std::string l1;
if(this->hasProp("dummyLabel")){
this->getProp("dummyLabel",l1);
} else{
l1="X";
}
if(l1!="X"){
std::string l2;
if(what->hasProp("dummyLabel")) what->getProp("dummyLabel",l2);
else l2="X";
if(l2!="X" && l1!=l2){
res = false;
}
}
}
return res;
}
bool Atom::Match(const Atom::ATOM_SPTR what) const {
return Match(what.get());
}
void Atom::updatePropertyCache(bool strict) {
calcExplicitValence(strict);
calcImplicitValence(strict);
}
// returns the number of swaps required to convert the ordering
// of the probe list to match the order of our incoming bonds:
//
// e.g. if our incoming bond order is: [0,1,2,3]:
// getPerturbationOrder([1,0,2,3]) = 1
// getPerturbationOrder([1,2,3,0]) = 3
// getPerturbationOrder([1,2,0,3]) = 2
int Atom::getPerturbationOrder(INT_LIST probe) const{
INT_LIST ref;
ROMol::OEDGE_ITER beg,end;
boost::tie(beg,end) = getOwningMol().getAtomBonds(this);
ROMol::GRAPH_MOL_BOND_PMAP::type pMap = getOwningMol().getBondPMap();
while(beg!=end){
ref.push_back(pMap[*beg]->getIdx());
beg++;
}
PRECONDITION(ref.size()==probe.size(),"size mismatch");
INT_LIST::const_iterator refIt=ref.begin();
INT_LIST::iterator probeIt=probe.begin(),probeIt2;
int nSwaps = 0;
while(refIt!=ref.end()){
if((*probeIt)!=(*refIt)){
bool foundIt=false;
probeIt2=probeIt;
while((*probeIt2)!=(*refIt) && probeIt2!=probe.end()){
probeIt2++;
}
if(probeIt2 != probe.end()){
foundIt=true;
}
CHECK_INVARIANT(foundIt,"could not find probe element");
int tmp=*probeIt2;
*probeIt2=*probeIt;
*probeIt=tmp;
nSwaps++;
}
probeIt++;
refIt++;
}
return nSwaps;
}
} // end o' namespace
std::ostream & operator<<(std::ostream& target, const RDKit::Atom &at){
target << at.getIdx() << " " << at.getAtomicNum() << " " << at.getSymbol();
target << " chg: " << at.getFormalCharge();
target << " deg: " << at.getDegree();
target << " exp: ";
try {
int explicitValence = at.getExplicitValence();
target << explicitValence;
} catch (...){
target << "N/A";
}
target << " imp: ";
try {
int implicitValence = at.getImplicitValence();
target << implicitValence;
} catch (...){
target << "N/A";
}
target << " hyb: " << at.getHybridization();
target << " arom?: " << at.getIsAromatic();
target << " chi: " << at.getChiralTag();
return target;
};