// $Id$ // // Copyright (C) 2003-2006 Rational Discovery LLC // // @@ All Rights Reserved @@ // #include "RDKitBase.h" #include #include "QueryAtom.h" #include "QueryOps.h" #include #include namespace RDKit{ // Local utility functionality: namespace { Atom *getAtomNeighborNot(ROMol *mol,const Atom *atom,const Atom *other){ PRECONDITION(mol,"bad molecule"); PRECONDITION(atom,"bad atom"); PRECONDITION(atom->getDegree()>1,"bad degree"); PRECONDITION(other,"bad atom"); Atom *res=0; ROMol::ADJ_ITER nbrIdx,endNbrs; boost::tie(nbrIdx,endNbrs) = mol->getAtomNeighbors(atom); while(nbrIdx!=endNbrs){ if(*nbrIdx != other->getIdx()){ res = mol->getAtomWithIdx(*nbrIdx); break; } nbrIdx++; } POSTCONDITION(res,"no neighbor found"); return res; } void setHydrogenCoords(ROMol *mol,int hydIdx,int heavyIdx){ // we will loop over all the coordinates PRECONDITION(mol,"bad molecule"); PRECONDITION(heavyIdx!=hydIdx,"degenerate atoms"); Atom *hydAtom = mol->getAtomWithIdx(hydIdx); PRECONDITION(mol->getAtomDegree(hydAtom)==1,"bad atom degree"); const Bond *bond=mol->getBondBetweenAtoms(heavyIdx,hydIdx); PRECONDITION(bond,"no bond between atoms"); const Atom *heavyAtom = mol->getAtomWithIdx(heavyIdx); double bondLength = PeriodicTable::getTable()->getRb0(1) + PeriodicTable::getTable()->getRb0(heavyAtom->getAtomicNum()); RDGeom::Point3D dirVect(0,0,0); RDGeom::Point3D perpVect,rotnAxis,nbrPerp; RDGeom::Point3D nbr1Vect,nbr2Vect,nbr3Vect; RDGeom::Transform3D tform; RDGeom::Point3D heavyPos, hydPos; const Atom *nbr1=0,*nbr2=0,*nbr3=0; const Bond *nbrBond; ROMol::ADJ_ITER nbrIdx,endNbrs; switch(heavyAtom->getDegree()){ case 1: // -------------------------------------------------------------------------- // No other atoms present: // -------------------------------------------------------------------------- dirVect.z = 1; // loop over the conformations and set the coordinates for (ROMol::ConformerIterator cfi = mol->beginConformers(); cfi != mol->endConformers(); cfi++) { heavyPos = (*cfi)->getAtomPos(heavyIdx); hydPos = heavyPos + dirVect*bondLength; (*cfi)->setAtomPos(hydIdx, hydPos); } break; case 2: // -------------------------------------------------------------------------- // One other neighbor: // -------------------------------------------------------------------------- nbr1=getAtomNeighborNot(mol,heavyAtom,hydAtom); for (ROMol::ConformerIterator cfi = mol->beginConformers(); cfi != mol->endConformers(); cfi++) { heavyPos = (*cfi)->getAtomPos(heavyIdx); RDGeom::Point3D nbr1Pos = (*cfi)->getAtomPos(nbr1->getIdx()); // get a normalized vector pointing away from the neighbor: nbr1Vect = heavyPos.directionVector(nbr1Pos); nbr1Vect *= -1; // ok, nbr1Vect points away from the other atom, figure out where // this H goes: switch(heavyAtom->getHybridization()){ case Atom::SP3: // get a perpendicular to nbr1Vect: perpVect=nbr1Vect.getPerpendicular(); // and move off it: tform.SetRotation((180-109.471)*M_PI/180.,perpVect); dirVect = tform*nbr1Vect; hydPos = heavyPos + dirVect*bondLength; (*cfi)->setAtomPos(hydIdx, hydPos); break; case Atom::SP2: // default position is to just take an arbitrary perpendicular: perpVect = nbr1Vect.getPerpendicular(); if(nbr1->getDegree()>1){ // can we use the neighboring atom to establish a perpendicular? nbrBond=mol->getBondBetweenAtoms(heavyIdx,nbr1->getIdx()); if(nbrBond->getIsAromatic() || nbrBond->getBondType()==Bond::DOUBLE){ nbr2=getAtomNeighborNot(mol,nbr1,heavyAtom); nbr2Vect=nbr1Pos.directionVector((*cfi)->getAtomPos(nbr2->getIdx())); perpVect = nbr2Vect.crossProduct(nbr1Vect); } } perpVect.normalize(); // rotate the nbr1Vect 60 degrees about perpVect and we're done: tform.SetRotation(60.*M_PI/180.,perpVect); dirVect = tform*nbr1Vect; hydPos = heavyPos + dirVect*bondLength; (*cfi)->setAtomPos(hydIdx, hydPos); break; case Atom::SP: // just lay the H along the vector: dirVect=nbr1Vect; hydPos = heavyPos + dirVect*bondLength; (*cfi)->setAtomPos(hydIdx, hydPos); break; default: // FIX: handle other hybridizations // for now, just lay the H along the vector: dirVect=nbr1Vect; hydPos = heavyPos + dirVect*bondLength; (*cfi)->setAtomPos(hydIdx, hydPos); } } break; case 3: // -------------------------------------------------------------------------- // Two other neighbors: // -------------------------------------------------------------------------- boost::tie(nbrIdx,endNbrs) = mol->getAtomNeighbors(heavyAtom); while(nbrIdx!=endNbrs){ if(*nbrIdx != hydIdx){ if(!nbr1) nbr1 = mol->getAtomWithIdx(*nbrIdx); else nbr2 = mol->getAtomWithIdx(*nbrIdx); } nbrIdx++; } TEST_ASSERT(nbr1); TEST_ASSERT(nbr2); for (ROMol::ConformerIterator cfi = mol->beginConformers(); cfi != mol->endConformers(); cfi++) { // start along the average of the two vectors: heavyPos = (*cfi)->getAtomPos(heavyIdx); nbr1Vect = (*cfi)->getAtomPos(nbr1->getIdx()).directionVector(heavyPos); nbr2Vect = (*cfi)->getAtomPos(nbr2->getIdx()).directionVector(heavyPos); dirVect = nbr1Vect+nbr2Vect; dirVect.normalize(); switch(heavyAtom->getHybridization()){ case Atom::SP3: // get the perpendicular to the neighbors: nbrPerp = nbr1Vect.crossProduct(nbr2Vect); // and the perpendicular to that: rotnAxis = nbrPerp.crossProduct(dirVect); // and then rotate about that: rotnAxis.normalize(); tform.SetRotation((109.471/2)*M_PI/180.,rotnAxis); dirVect = tform*dirVect; hydPos = heavyPos + dirVect*bondLength; (*cfi)->setAtomPos(hydIdx, hydPos); break; case Atom::SP2: // don't need to do anything here, the H atom goes right on the // direction vector hydPos = heavyPos + dirVect*bondLength; (*cfi)->setAtomPos(hydIdx, hydPos); break; default: // FIX: handle other hybridizations // for now, just lay the H along the neighbor vector; hydPos = heavyPos + dirVect*bondLength; (*cfi)->setAtomPos(hydIdx, hydPos); break; } } break; case 4: // -------------------------------------------------------------------------- // Three other neighbors: // -------------------------------------------------------------------------- boost::tie(nbrIdx,endNbrs) = mol->getAtomNeighbors(heavyAtom); while(nbrIdx!=endNbrs){ if(*nbrIdx != hydIdx){ if(!nbr1) nbr1 = mol->getAtomWithIdx(*nbrIdx); else if(!nbr2) nbr2 = mol->getAtomWithIdx(*nbrIdx); else nbr3 = mol->getAtomWithIdx(*nbrIdx); } nbrIdx++; } TEST_ASSERT(nbr1); TEST_ASSERT(nbr2); TEST_ASSERT(nbr3); for (ROMol::ConformerIterator cfi = mol->beginConformers(); cfi != mol->endConformers(); cfi++) { // use the average of the three vectors: heavyPos = (*cfi)->getAtomPos(heavyIdx); nbr1Vect = (*cfi)->getAtomPos(nbr1->getIdx()).directionVector(heavyPos); nbr2Vect = (*cfi)->getAtomPos(nbr2->getIdx()).directionVector(heavyPos); nbr3Vect = (*cfi)->getAtomPos(nbr3->getIdx()).directionVector(heavyPos); dirVect = nbr1Vect+nbr2Vect+nbr3Vect; dirVect.normalize(); hydPos = heavyPos + dirVect*bondLength; (*cfi)->setAtomPos(hydIdx, hydPos); } break; default: // -------------------------------------------------------------------------- // FIX: figure out what to do here // -------------------------------------------------------------------------- hydPos = heavyPos + dirVect*bondLength; for (ROMol::ConformerIterator cfi = mol->beginConformers(); cfi != mol->endConformers(); cfi++) { (*cfi)->setAtomPos(hydIdx, hydPos); } break; } } } // end of unnamed namespace namespace MolOps { // NOTE that we do not need to check for chiral atoms when adding Hs // because the bond order goes from hydrogen implicitly first to // hydrogen explicitly last. This is a cyclic permutation, so it // doesn't affect the chirality. (Of course there can only be one H // on the atom... otherwise it wouldn't be chiral!) ROMol *addHs(const ROMol &mol,bool explicitOnly,bool addCoords){ RWMol *res = static_cast(new ROMol(mol,false)); // when we hit each atom, clear its computed properties // NOTE: it is essential that we not clear the ring info in the // molecule's computed properties. We don't want to have to // regenerate that. This caused Issue210 and Issue212: res->clearComputedProps(false); // precompute the number of hydrogens we are going to add so that we can // pre-allocate the necessary space on the conformations of the molecule // for their coordinates unsigned int numAddHyds = 0; for(ROMol::ConstAtomIterator at=mol.beginAtoms();at!=mol.endAtoms();++at){ numAddHyds += (*at)->getNumExplicitHs(); if (!explicitOnly) { numAddHyds += (*at)->getNumImplicitHs(); } } unsigned int nSize = mol.getNumAtoms() + numAddHyds; // now if we want to add coordinates to the hydrogens; // loop over the conformations of the molecule and allocate new space // for their locations for (ROMol::ConformerIterator cfi = res->beginConformers(); cfi != res->endConformers(); ++cfi) { (*cfi)->reserve(nSize); } for(ROMol::ConstAtomIterator at=mol.beginAtoms();at!=mol.endAtoms();++at){ unsigned int oldIdx,newIdx; oldIdx = (*at)->getIdx(); Atom *newAt = res->getAtomWithIdx(oldIdx); newAt->clearComputedProps(); // always convert explicit Hs for(unsigned int i=0;i<(*at)->getNumExplicitHs();i++){ newIdx=res->addAtom(new Atom(1),false,true); res->addBond(oldIdx,newIdx,Bond::SINGLE); res->getAtomWithIdx(newIdx)->updatePropertyCache(); if(addCoords) setHydrogenCoords(res,newIdx,newAt->getIdx()); } // clear the local property newAt->setNumExplicitHs(0); if(!explicitOnly){ // take care of implicits for(unsigned int i=0;i<(*at)->getNumImplicitHs();i++){ newIdx=res->addAtom(new Atom(1),false,true); res->addBond(oldIdx,newIdx,Bond::SINGLE); // set the isImplicit label so that we can strip these back // off later if need be. res->getAtomWithIdx(newIdx)->setProp("isImplicit",1, true); res->getAtomWithIdx(newIdx)->updatePropertyCache(); if(addCoords) setHydrogenCoords(res,newIdx,newAt->getIdx()); } // be very clear about implicits not being allowed in this representation newAt->setProp("origNoImplicit",newAt->getNoImplicit(), true); newAt->setNoImplicit(true); } // update the atom's derived properties (valence count, etc.) newAt->updatePropertyCache(); } return static_cast(res); }; // // This routine removes hydrogens (and bonds to them) from the molecular graph. // Other Atom and bond indices may be affected by the removal. // // NOTE: // - Hydrogens which aren't connected to a heavy atom will not be // removed. This prevents molecules like "[H][H]" from having // all atoms removed. // ROMol *removeHs(const ROMol &mol,bool implicitOnly,bool updateExplicitCount){ Atom *atom; unsigned int currIdx=0; RWMol *res = static_cast(new ROMol(mol,false)); while(currIdx < res->getNumAtoms()){ atom = res->getAtomWithIdx(currIdx); if(atom->getAtomicNum()==1){ bool removeIt=false; if(atom->hasProp("isImplicit")){ removeIt=true; } else if(!implicitOnly){ ROMol::ADJ_ITER begin,end; boost::tie(begin,end) = res->getAtomNeighbors(atom); while(begin!=end){ if(res->getAtomWithIdx(*begin)->getAtomicNum() > 1){ removeIt=true; break; } begin++; } } if(removeIt){ ROMol::OEDGE_ITER beg,end; boost::tie(beg,end) = res->getAtomBonds(atom); ROMol::GRAPH_MOL_BOND_PMAP::type pMap = res->getBondPMap(); // note the assumption that the H only has one neighbor... I // feel no need to handle the case of hypervalent hydrogen! // :-) Bond const *bond = pMap[*beg]; Atom *heavyAtom =bond->getOtherAtom(atom); // we'll update the atom's explicit H count if we were told to // *or* if the atom is chiral, in which case the H is needed // in order to complete the coordination // *or* if the atom has the noImplicit flag set: if( updateExplicitCount || heavyAtom->getNoImplicit() || heavyAtom->getChiralTag()!=Atom::CHI_UNSPECIFIED ){ heavyAtom->setNumExplicitHs(heavyAtom->getNumExplicitHs()+1); } else { // this is a special case related to Issue 228 and the // "disappearing Hydrogen" problem discussed in MolOps::adjustHs // // If we remove a hydrogen from an aromatic N, we need to // be *sure* to increment the explicit count, even if the // H itself isn't marked as explicit if(heavyAtom->getAtomicNum()==7 && heavyAtom->getIsAromatic() && heavyAtom->getFormalCharge()==0){ heavyAtom->setNumExplicitHs(heavyAtom->getNumExplicitHs()+1); } } // One other consequence of removing the H from the graph is // that we may change the ordering of the bonds about a // chiral center. This may change the chiral label at that // atom. We deal with that by explicitly checking here: if(heavyAtom->getChiralTag()!=Atom::CHI_UNSPECIFIED){ INT_LIST neighborIndices; neighborIndices.push_back(bond->getIdx()); boost::tie(beg,end) = res->getAtomBonds(heavyAtom); while(beg!=end){ if(pMap[*beg]->getIdx()!=bond->getIdx()){ neighborIndices.push_back(pMap[*beg]->getIdx()); } beg++; } int nSwaps = heavyAtom->getPerturbationOrder(neighborIndices); // we're adding 1 to this because the H was moved to the // beginning and it really belongs in the second // position... so swap away: nSwaps += 1; if(nSwaps%2){ if(heavyAtom->getChiralTag()==Atom::CHI_TETRAHEDRAL_CW){ heavyAtom->setChiralTag(Atom::CHI_TETRAHEDRAL_CCW); } else if(heavyAtom->getChiralTag()==Atom::CHI_TETRAHEDRAL_CCW){ heavyAtom->setChiralTag(Atom::CHI_TETRAHEDRAL_CW); } } } res->removeAtom(atom); } else { // only increment the atom idx if we don't remove the atom currIdx++; } } else { // only increment the atom idx if we don't remove the atom currIdx++; if(atom->hasProp("origNoImplicit")){ // we'll get in here if we haven't already processed the atom's implicit // hydrogens. (this is protection for the case that removeHs() is called // multiple times on a single molecule without intervening addHs() calls) bool tmpBool; atom->getProp("origNoImplicit",tmpBool); atom->setNoImplicit(tmpBool); atom->clearProp("origNoImplicit"); } } } // // If we didn't only remove implicit Hs, which are guaranteed to // be the highest numbered atoms, we may have altered atom indices. // This can screw up derived properties (such as ring members), so // we'll resanitize to ensure we stay in a consistent state. // if(!implicitOnly){ sanitizeMol(*res); } return static_cast(res); }; // // This routine removes explicit hydrogens (and bonds to them) from // the molecular graph and adds them as queries to the heavy atoms // to which they are bound. If the heavy atoms (or atom queries) // already have hydrogen-count queries, they will be updated. // // NOTE: // - Hydrogens which aren't connected to a heavy atom will not be // removed. This prevents molecules like "[H][H]" from having // all atoms removed. // ROMol *mergeQueryHs(const ROMol &mol){ unsigned int currIdx=0; RWMol *res = static_cast(new ROMol(mol,false)); while(currIdx < res->getNumAtoms()){ Atom *atom = res->getAtomWithIdx(currIdx); if(atom->getAtomicNum()==1){ bool removeIt=false; ROMol::ADJ_ITER begin,end; boost::tie(begin,end) = res->getAtomNeighbors(atom); while(begin!=end){ if(res->getAtomWithIdx(*begin)->getAtomicNum() > 1){ removeIt=true; break; } begin++; } if(removeIt){ ROMol::ADJ_ITER begin,end; boost::tie(begin,end) = res->getAtomNeighbors(atom); Atom *nbr = res->getAtomWithIdx(*begin); // // We've found the neighbor. // 1) If the neighbor has no H query already: // - add a generic H query // else: // - do nothing // 2) Remove the atom from the molecular graph // // Examples: // C[H] -> [C;!H0] // [C;H1][H] -> [C;H1] // [C;H2][H] -> [C;H2] // // FIX: this is going to behave oddly in the case of a contradictory // SMARTS like: [C;H0][H], where it will give the equivalent of: // [C;H0] I think this is actually correct, but I can be persuaded // otherwise. // // First we'll search for an H query: bool hasHQuery=false; std::list childStack; QueryAtom::QUERYATOM_QUERY::CHILD_VECT_CI child1; for(child1=nbr->getQuery()->beginChildren(); child1!=nbr->getQuery()->endChildren(); child1++){ childStack.push_back(*child1); } while( !hasHQuery && childStack.size() ){ QueryAtom::QUERYATOM_QUERY::CHILD_TYPE query = childStack.front(); childStack.pop_front(); if(query->getDescription()=="AtomHCount"){ hasHQuery=true; } else { for(child1=query->beginChildren(); child1!=query->endChildren(); child1++){ childStack.push_back(*child1); } } } if(!hasHQuery){ ATOM_EQUALS_QUERY *tmp=makeAtomHCountQuery(0); tmp->setNegation(true); nbr->expandQuery(tmp); } res->removeAtom(atom); } else { // only increment the atom idx if we don't remove the atom currIdx++; } } else { currIdx++; } } return static_cast(res); }; }; // end of namespace MolOps }; // end of namespace RDKit