diff --git a/Code/Geometry/Wrap/Point.cpp b/Code/Geometry/Wrap/Point.cpp index 162d15654..51fb82a8b 100644 --- a/Code/Geometry/Wrap/Point.cpp +++ b/Code/Geometry/Wrap/Point.cpp @@ -82,6 +82,8 @@ namespace RDGeom { .def(python::self -= python::self) .def(python::self + python::self) .def(python::self += python::self) + .def(python::self * double()) + .def(python::self / double()) .def("__imul__", &Point3D::operator*=, python::return_value_policy(), "Scalar multiplication") @@ -120,6 +122,8 @@ namespace RDGeom { .def(python::self -= python::self) .def(python::self + python::self) .def(python::self += python::self) + .def(python::self * double()) + .def(python::self / double()) .def("__imul__", &Point2D::operator*=, python::return_value_policy(), "Scalar multiplication") diff --git a/Code/GraphMol/Atom.cpp b/Code/GraphMol/Atom.cpp index c512ac240..617215578 100755 --- a/Code/GraphMol/Atom.cpp +++ b/Code/GraphMol/Atom.cpp @@ -366,6 +366,25 @@ void Atom::expandQuery(Atom::QUERYATOM_QUERY *what, bool Atom::Match(Atom const *what) const { bool res = getAtomicNum() == what->getAtomicNum(); + // special dummy--dummy match case: + // X matches X, Xa, Xb, etc + // Xa only matches X and Xa + if(res && !getAtomicNum()){ + std::string l1; + if(this->hasProp("dummyLabel")){ + this->getProp("dummyLabel",l1); + } else{ + l1="X"; + } + if(l1!="X"){ + std::string l2; + if(what->hasProp("dummyLabel")) what->getProp("dummyLabel",l2); + else l2="X"; + if(l2!="X" && l1!=l2){ + res = false; + } + } + } return res; } bool Atom::Match(const Atom::ATOM_SPTR what) const { diff --git a/Code/GraphMol/ChemTransforms/ChemTransforms.cpp b/Code/GraphMol/ChemTransforms/ChemTransforms.cpp new file mode 100644 index 000000000..62c3e0e51 --- /dev/null +++ b/Code/GraphMol/ChemTransforms/ChemTransforms.cpp @@ -0,0 +1,183 @@ +// $Id$ +// +// Copyright (C) 2006 Greg Landrum +// +// @@ All Rights Reserved @@ +// +#include +#include +#include +#include + +#include +#include + +namespace RDKit{ + + ROMol *deleteSubstructs(const ROMol &mol, const ROMol &query, + bool onlyFrags) { + RWMol *res = static_cast(new ROMol(mol,false)); + std::vector fgpMatches; + std::vector::const_iterator mati; + std::pair amat; + VECT_INT_VECT matches; // all matches onto the molecule - list of list of atom ids + MatchVectType::const_iterator mi; + // do the substructure matching and get the atoms that match the query + SubstructMatch(*res, query, fgpMatches); + + // if didn't find any matches nothing to be done here + // simply return a copy of the molecule + if (fgpMatches.size() == 0) { + return res; + } + + for (mati = fgpMatches.begin(); mati != fgpMatches.end(); mati++) { + INT_VECT match; // each match onto the molecule - list of atoms ids + for (mi = mati->begin(); mi != mati->end(); mi++) { + match.push_back(mi->second); + } + matches.push_back(match); + } + + // now loop over the list of matches and check if we can delete any of them + INT_VECT delList; + + VECT_INT_VECT_I mxi, fi; + if (onlyFrags) { + VECT_INT_VECT frags; + + unsigned int nfrags = MolOps::getMolFrags(*res, frags); + for (fi = frags.begin(); fi != frags.end(); fi++) { + std::sort(fi->begin(), fi->end()); + for (mxi = matches.begin(); mxi != matches.end(); mxi++) { + std::sort(mxi->begin(), mxi->end()); + if ((*fi) == (*mxi) ) { + INT_VECT tmp; + Union((*mxi), delList, tmp); + delList = tmp; + break; + } // end of if we found a matching fragment + } // endof loop over matches + } // end of loop over fragments + } // end of if onlyFrags + else { + // in this case we want to delete any matches we find + // simply loop over the matches and collect the atoms that need to + // be removed + for (mxi = matches.begin(); mxi != matches.end(); mxi++) { + INT_VECT tmp; + Union((*mxi), delList, tmp); + delList = tmp; + } + } + + // now loop over the union list and delete the atoms + // Will do this in the decreasing order of the atomIds + // this is so that the AtomIds ids in the "delList" are + // not invalidated by a previous removal (removing atom number i changes + // the atom indices only atoms with indices >i ) + std::sort(delList.begin(), delList.end()); + + INT_VECT_RI dri; + for (dri = delList.rbegin(); dri != delList.rend(); dri++) { + res->removeAtom(*dri); + } + // if we removed any atoms, clear the computed properties: + if(delList.size()){ + res->clearComputedProps(true); + // update our properties, but allow unhappiness: + res->updatePropertyCache(false); + } + return res; + } + + std::vector + replaceSubstructs(const ROMol &mol, const ROMol &query,const ROMol &replacement, + bool replaceAll) { + std::vector res; + std::vector fgpMatches; + + // do the substructure matching and get the atoms that match the query + SubstructMatch(mol, query, fgpMatches); + + // if we didn't find any matches, there's nothing to be done here + // simply return a list with a copy of the starting molecule + if (fgpMatches.size() == 0) { + res.push_back(ROMOL_SPTR(new ROMol(mol,false))); + return res; + } + + INT_VECT delList; + + // now loop over the list of matches and replace them: + for (std::vector::const_iterator mati = fgpMatches.begin(); + mati != fgpMatches.end(); mati++) { + INT_VECT match; // each match onto the molecule - list of atoms ids + for (MatchVectType::const_iterator mi = mati->begin(); + mi != mati->end(); mi++) { + match.push_back(mi->second); + } + INT_VECT sortMatch = match; + std::sort(sortMatch.begin(),sortMatch.end()); + + if( !replaceAll || !res.size() ) { + res.push_back(ROMOL_SPTR(new ROMol(mol,false))); + } + RWMol *newMol = static_cast(res.rbegin()->get()); + + // we need a tab to the orig number of atoms because the + // new molecule will start numbered above this: + int numOrigAtoms=newMol->getNumAtoms(); + + // Add the atoms and bonds from the replacement: + newMol->insertMol(replacement); + + // loop over the central atom's (the first atom in match) bonds + // and duplicate any that connect to the remainder of the molecule: + Atom *origAtom=newMol->getAtomWithIdx(match[0]); + ROMol::ADJ_ITER nbrIdx,endNbrs; + boost::tie(nbrIdx,endNbrs) = newMol->getAtomNeighbors(origAtom); + while(nbrIdx!=endNbrs){ + // we don't want to duplicate any "intra-match" bonds: + if(!std::binary_search(sortMatch.begin(),sortMatch.end(),int(*nbrIdx))){ + Bond *oBond=newMol->getBondBetweenAtoms(match[0],*nbrIdx); + CHECK_INVARIANT(oBond,"required bond not found"); + newMol->addBond(numOrigAtoms,*nbrIdx,oBond->getBondType()); + } + nbrIdx++; + } + + if(replaceAll){ + // we'll accumulate a list of atoms to be removed: + INT_VECT tmp; + Union(sortMatch, delList, tmp); + delList = tmp; + } else { + // just delete the atoms now: + for (INT_VECT_RI dri = sortMatch.rbegin(); dri != sortMatch.rend(); dri++) { + newMol->removeAtom(*dri); + } + } + } + + if(replaceAll){ + // remove the atoms from the delList: + std::sort(delList.begin(),delList.end()); + RWMol *newMol = static_cast(res[0].get()); + for (INT_VECT_RI dri = delList.rbegin(); dri != delList.rend(); dri++) { + newMol->removeAtom(*dri); + } + } + + // clear computed props and do basic updates on the + // the resulting molecules, but allow unhappiness: + for(std::vector::iterator resI=res.begin(); + resI!=res.end();resI++){ + (*resI)->clearComputedProps(true); + (*resI)->updatePropertyCache(false); + } + + return res; + } + +} diff --git a/Code/GraphMol/ChemTransforms/ChemTransforms.h b/Code/GraphMol/ChemTransforms/ChemTransforms.h new file mode 100644 index 000000000..185ed5bec --- /dev/null +++ b/Code/GraphMol/ChemTransforms/ChemTransforms.h @@ -0,0 +1,72 @@ +// +// Copyright (C) 2006 Greg Landrum +// +// @@ All Rights Reserved @@ +// +#ifndef _RD_CHEMTRANSFORMS_H__ +#define _RD_CHEMTRANSFORMS_H__ + +#include +#include + +namespace RDKit{ + class ROMol; + typedef boost::shared_ptr ROMOL_SPTR; + + //! \brief Returns a copy of an ROMol with the atoms and bonds that + //! match a pattern removed. + /*! + \param mol the ROMol of interest + \param query the query ROMol + \param onlyFrags if this is set, only matches that correspond to an + entire fragment will be removed. This is useful for + salt removal + + \return a copy of \c mol with the matching atoms and bonds (if any) + removed. + */ + ROMol *deleteSubstructs(const ROMol &mol, const ROMol &query, + bool replaceAll=false); + + //! \brief Returns a list of copies of an ROMol with the atoms and bonds that + //! match a pattern replaced with the atoms contained in another molecule. + /*! + Bonds are created between the joining atom in the existing molecule + and the atoms in the new molecule. So, using SMILES instead of molecules: + replaceSubstructs('OC(=O)NCCNC(=O)O','C(=O)O','[X]') -> + ['[X]NCCNC(=O)O','OC(=O)NCCN[X]'] + replaceSubstructs('OC(=O)NCCNC(=O)O','C(=O)O','[X]',true) -> + ['[X]NCCN[X]'] + Chains should be handled "correctly": + replaceSubstructs('CC(=O)C','C(=O)','[X]') -> + ['C[X]C'] + As should rings: + replaceSubstructs('C1C(=O)C1','C(=O)','[X]') -> + ['C1[X]C1'] + And higher order branches: + replaceSubstructs('CC(=O)(C)C','C(=O)','[X]') -> + ['C[X](C)C'] + Note that the client is responsible for making sure that the + resulting molecule actually makes sense - this function does not + perform sanitization. + + \param mol the ROMol of interest + \param query the query ROMol + \param replacement the ROMol to be inserted + \param replaceAll if this is true, only a single result, with all occurances + of the substructure replaced, will be returned. + + \return a vector of pointers to copies of \c mol with the matching atoms + and bonds (if any) replaced + + */ + std::vector replaceSubstructs(const ROMol &mol, const ROMol &query, + const ROMol &replacement, + bool replaceAll=false); +} + +#endif + + + + diff --git a/Code/GraphMol/ChemTransforms/ChemTransforms.vcproj b/Code/GraphMol/ChemTransforms/ChemTransforms.vcproj new file mode 100644 index 000000000..dc820b9dd --- /dev/null +++ b/Code/GraphMol/ChemTransforms/ChemTransforms.vcproj @@ -0,0 +1,143 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/Code/GraphMol/ChemTransforms/testChemTransforms.cpp b/Code/GraphMol/ChemTransforms/testChemTransforms.cpp new file mode 100644 index 000000000..051252f11 --- /dev/null +++ b/Code/GraphMol/ChemTransforms/testChemTransforms.cpp @@ -0,0 +1,194 @@ +// $Id$ +// +// Copyright (C) 2006 Greg Landrum +// +// @@ All Rights Reserved @@ +// +#include +#include +#include +#include +#include +#include + +using namespace RDKit; + +void testDeleteSubstruct() +{ + int i = 0; + ROMol *mol1=0,*mol2=0,*matcher1=0,*matcher2=0,*matcher3=0; + std::string smi,sma; + + BOOST_LOG(rdInfoLog) << "-------------------------------------" << std::endl; + BOOST_LOG(rdInfoLog) << "Testing deleteSubstruct" << std::endl; + + // a lot of the seemingly repetitive stuff is here for Issue96 + smi = "CCC(=O).C=O"; + mol1 = SmilesToMol(smi); + TEST_ASSERT(mol1); + TEST_ASSERT(mol1->getNumAtoms()==6) + sma = "C=O"; + matcher1 = SmartsToMol(sma); + TEST_ASSERT(matcher1); + + + mol2 = deleteSubstructs(*mol1,*matcher1,0); + TEST_ASSERT(mol2); + TEST_ASSERT(mol2->getNumAtoms()==2) + mol2 = deleteSubstructs(*mol2,*matcher1,0); + TEST_ASSERT(mol2); + TEST_ASSERT(mol2->getNumAtoms()==2) + + delete matcher1; + sma = "[Cl;H1&X1,-]"; + matcher1 = SmartsToMol(sma); + sma = "[Na+]"; + matcher2 = SmartsToMol(sma); + sma = "[O;H2,H1&-,X0&-2]"; + matcher3 = SmartsToMol(sma); + delete mol1; + mol1 = SmilesToMol("CCO.Cl"); + TEST_ASSERT(mol1); + TEST_ASSERT(mol1->getNumAtoms()==4); + + delete mol2; + mol2 = deleteSubstructs(*mol1,*matcher1,true); + TEST_ASSERT(mol2); + TEST_ASSERT(mol2->getNumAtoms()==3); + mol2 = deleteSubstructs(*mol2,*matcher2,true); + TEST_ASSERT(mol2); + TEST_ASSERT(mol2->getNumAtoms()==3); + mol2 = deleteSubstructs(*mol2,*matcher3,true); + TEST_ASSERT(mol2); + TEST_ASSERT(mol2->getNumAtoms()==3); + + delete mol1; + mol1 = SmilesToMol("CC(=O)[O-].[Na+]"); + TEST_ASSERT(mol1); + TEST_ASSERT(mol1->getNumAtoms()==5); + + delete matcher1; + matcher1 = SmartsToMol("[Cl;H1&X1,-]"); + delete matcher2; + matcher2 = SmartsToMol("[Na+]"); + + mol2 = deleteSubstructs(*mol1,*matcher1,true); + TEST_ASSERT(mol2); + TEST_ASSERT(mol2->getNumAtoms()==5); + + mol2 = deleteSubstructs(*mol2,*matcher2,true); + TEST_ASSERT(mol2); + TEST_ASSERT(mol2->getNumAtoms()==4); + + mol2 = deleteSubstructs(*mol2,*matcher1,true); + TEST_ASSERT(mol2); + TEST_ASSERT(mol2->getNumAtoms()==4); + + BOOST_LOG(rdInfoLog) << "\tdone" << std::endl; +} + +void testReplaceSubstructs() +{ + int i = 0; + ROMol *mol1=0,*mol2=0,*matcher1=0,*frag=0; + std::string smi,sma; + std::vector vect; + + + BOOST_LOG(rdInfoLog) << "-------------------------------------" << std::endl; + BOOST_LOG(rdInfoLog) << "Testing replaceSubstruct" << std::endl; + + smi = "CCCC"; + mol1 = SmilesToMol(smi); + TEST_ASSERT(mol1); + + sma = "C(=O)O"; + matcher1 = SmartsToMol(sma); + TEST_ASSERT(matcher1); + + smi = "N"; + frag = SmilesToMol(smi); + TEST_ASSERT(frag); + + vect = replaceSubstructs(*mol1,*matcher1,*frag); + TEST_ASSERT(vect.size()==1); + TEST_ASSERT(mol1->getNumAtoms()==4); + TEST_ASSERT(vect[0]->getNumAtoms()==4); + + + delete mol1; + smi = "CCCC(=O)O"; + mol1 = SmilesToMol(smi); + TEST_ASSERT(mol1); + vect = replaceSubstructs(*mol1,*matcher1,*frag); + TEST_ASSERT(vect.size()==1); + TEST_ASSERT(mol1->getNumAtoms()==6); + TEST_ASSERT(vect[0]->getNumAtoms()==4); + + vect = replaceSubstructs(*mol1,*matcher1,*frag,true); + TEST_ASSERT(vect.size()==1); + TEST_ASSERT(mol1->getNumAtoms()==6); + TEST_ASSERT(vect[0]->getNumAtoms()==4); + + delete mol1; + smi = "OC(=O)CCCC(=O)O"; + mol1 = SmilesToMol(smi); + TEST_ASSERT(mol1); + vect = replaceSubstructs(*mol1,*matcher1,*frag); + TEST_ASSERT(vect.size()==2); + TEST_ASSERT(mol1->getNumAtoms()==9); + TEST_ASSERT(vect[0]->getNumAtoms()==7); + TEST_ASSERT(vect[1]->getNumAtoms()==7); + + // use replaceAll when a single replacement is available: + vect = replaceSubstructs(*mol1,*matcher1,*frag,true); + TEST_ASSERT(vect.size()==1); + TEST_ASSERT(mol1->getNumAtoms()==9); + TEST_ASSERT(vect[0]->getNumAtoms()==5); + + delete matcher1; + sma = "C=O"; + matcher1 = SmartsToMol(sma); + TEST_ASSERT(matcher1); + + delete mol1; + smi = "CC(=O)C"; + mol1 = SmilesToMol(smi); + TEST_ASSERT(mol1); + vect = replaceSubstructs(*mol1,*matcher1,*frag); + TEST_ASSERT(vect.size()==1); + TEST_ASSERT(mol1->getNumAtoms()==4); + TEST_ASSERT(vect[0]->getNumAtoms()==3); + + delete mol1; + smi = "C1C(=O)C1"; + mol1 = SmilesToMol(smi); + TEST_ASSERT(mol1); + vect = replaceSubstructs(*mol1,*matcher1,*frag); + TEST_ASSERT(vect.size()==1); + TEST_ASSERT(mol1->getNumAtoms()==4); + TEST_ASSERT(vect[0]->getNumAtoms()==3); + TEST_ASSERT(vect[0]->getNumBonds()==3); + + + BOOST_LOG(rdInfoLog) << "\tdone" << std::endl; +} + + + + +int main() { + RDLog::InitLogs(); + + BOOST_LOG(rdInfoLog) << "********************************************************\n"; + BOOST_LOG(rdInfoLog) << "Testing Chemical Transforms \n"; + +#if 1 + testDeleteSubstruct(); + testReplaceSubstructs(); +#endif + + BOOST_LOG(rdInfoLog) << "*******************************************************\n"; + return(0); +} + diff --git a/Code/GraphMol/ChemTransforms/testChemTransforms.vcproj b/Code/GraphMol/ChemTransforms/testChemTransforms.vcproj new file mode 100644 index 000000000..824348609 --- /dev/null +++ b/Code/GraphMol/ChemTransforms/testChemTransforms.vcproj @@ -0,0 +1,156 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/Code/GraphMol/ChemTransforms/test_list.py b/Code/GraphMol/ChemTransforms/test_list.py new file mode 100644 index 000000000..fd93ffe07 --- /dev/null +++ b/Code/GraphMol/ChemTransforms/test_list.py @@ -0,0 +1,14 @@ +import sys + +tests=[ + ("testExecs/main.exe","",{}), + ] + +longTests=[ + ] + +if __name__=='__main__': + import sys + import TestRunner + failed,tests = TestRunner.RunScript('test_list.py',0,1) + sys.exit(len(failed)) diff --git a/Code/GraphMol/FileParsers/MolFileParser.cpp b/Code/GraphMol/FileParsers/MolFileParser.cpp index ee4990cb0..c9ffda0d4 100755 --- a/Code/GraphMol/FileParsers/MolFileParser.cpp +++ b/Code/GraphMol/FileParsers/MolFileParser.cpp @@ -197,7 +197,8 @@ namespace RDKit{ //double pX,pY,pZ; std::string symb; int massDiff,chg,hCount; - //int rxnComponentType,rxnComponentNumber,atomMapNumber,inversionFlag,exactChangeFlag; + int stereoCare,totValence; + int atomMapNumber,inversionFlag,exactChangeFlag; try { pos.x = stripSpacesAndCast(text.substr(0,10)); @@ -250,6 +251,18 @@ namespace RDKit{ if(symb=="L" || symb=="A" || symb=="Q" || symb=="*" || symb=="LP" || (symb>="R0" && symb<="R9") ){ res->setAtomicNum(0); + if(symb=="*") res->setProp("dummyLabel",std::string("X")); + else if(symb=="R0") res->setProp("dummyLabel",std::string("Xa")); + else if(symb=="R1") res->setProp("dummyLabel",std::string("Xb")); + else if(symb=="R2") res->setProp("dummyLabel",std::string("Xc")); + else if(symb=="R3") res->setProp("dummyLabel",std::string("Xd")); + else if(symb=="R4") res->setProp("dummyLabel",std::string("Xe")); + else if(symb=="R5") res->setProp("dummyLabel",std::string("Xf")); + else if(symb=="R6") res->setProp("dummyLabel",std::string("Xg")); + else if(symb=="R7") res->setProp("dummyLabel",std::string("Xh")); + else if(symb=="R8") res->setProp("dummyLabel",std::string("Xi")); + else if(symb=="R9") res->setProp("dummyLabel",std::string("Xj")); + else res->setProp("dummyLabel",symb); } else { res->setAtomicNum(PeriodicTable::getTable()->getAtomicNumber(symb)); res->setMass(PeriodicTable::getTable()->getAtomicWeight(res->getAtomicNum())); @@ -264,7 +277,7 @@ namespace RDKit{ res->setMass(res->getMass()+massDiff); } -#if 0 +#if 1 stereoCare=0; if(text.size()>=48){ try { @@ -287,28 +300,6 @@ namespace RDKit{ throw FileParseException(errout.str()) ; } } - rxnComponentType=0; - if(text.size()>=57){ - try { - rxnComponentType= stripSpacesAndCast(text.substr(54,3)); - } - catch (boost::bad_lexical_cast &) { - std::ostringstream errout; - errout << "Cannot convert " << text.substr(54,3) << " to int"; - throw FileParseException(errout.str()) ; - } - } - rxnComponentNumber=0; - if(text.size()>=60){ - try { - rxnComponentNumber= stripSpacesAndCast(text.substr(57,3)); - } - catch (boost::bad_lexical_cast &) { - std::ostringstream errout; - errout << "Cannot convert " << text.substr(57,3) << " to int"; - throw FileParseException(errout.str()) ; - } - } atomMapNumber=0; if(text.size()>=63){ try { @@ -345,13 +336,11 @@ namespace RDKit{ // save it for later - res->setProp("stereoCare",stereoCare); - res->setProp("totValence",totValence); - res->setProp("rxnComponentType",rxnComponentType); - res->setProp("rxnComponentNumber",rxnComponentNumber); - res->setProp("atomMapNumber",atomMapNumber); - res->setProp("inversionFlag",inversionFlag); - res->setProp("exactChangeFlag",exactChangeFlag); + res->setProp("molStereoCare",stereoCare); + res->setProp("molTotValence",totValence); + res->setProp("molAtomMapNumber",atomMapNumber); + res->setProp("molInversionFlag",inversionFlag); + res->setProp("molExactChangeFlag",exactChangeFlag); #endif return res; }; @@ -442,19 +431,19 @@ namespace RDKit{ } catch (boost::bad_lexical_cast) { ; } -#if 0 +#if 1 if( text.size() >= 18 ) try { - topology = stripSpacesAndCast(text.substr(15,3)); - res->setProp("topology",topology); + int topology = stripSpacesAndCast(text.substr(15,3)); + res->setProp("molTopology",topology); } catch (boost::bad_lexical_cast) { ; } if( text.size() >= 21 ) try { - reactStatus = stripSpacesAndCast(text.substr(18,3)); - res->setProp("reactStatus",reactStatus); + int reactStatus = stripSpacesAndCast(text.substr(18,3)); + res->setProp("molReactStatus",reactStatus); } catch (boost::bad_lexical_cast) { ; } diff --git a/Code/GraphMol/QueryOps.cpp b/Code/GraphMol/QueryOps.cpp index 39e2e4b12..4ffc0c171 100755 --- a/Code/GraphMol/QueryOps.cpp +++ b/Code/GraphMol/QueryOps.cpp @@ -509,7 +509,8 @@ ATOM_EQUALS_QUERY *makeAtomInRingQuery(){ } ATOM_EQUALS_QUERY *makeAtomInNRingsQuery(int what){ - ATOM_EQUALS_QUERY *res= makeAtomSimpleQuery(what,queryIsAtomInNRings); + ATOM_EQUALS_QUERY *res; + res = makeAtomSimpleQuery(what,queryIsAtomInNRings); res->setDescription("AtomInNRings"); return res; } diff --git a/Code/GraphMol/QueryOps.h b/Code/GraphMol/QueryOps.h index 4872f0238..11e67657b 100755 --- a/Code/GraphMol/QueryOps.h +++ b/Code/GraphMol/QueryOps.h @@ -79,7 +79,53 @@ namespace RDKit{ //! returns a Query for matching any atom ATOM_NULL_QUERY *makeAtomNullQuery(); + static int queryAtomRingMembership(Atom const *at) { + return static_cast(at->getOwningMol().getRingInfo()->numAtomRings(at->getIdx())); + } + // I'm pretty sure that this typedef shouldn't be necessary, + // but VC++ generates a warning about const Atom const * in + // the definition of Match, then complains about an override + // that differs only by const/volatile (c4301), then generates + // incorrect code if we don't do this... so let's do it. + typedef Atom const *ConstAtomPtr; + class InRingQuery : public Queries::EqualityQuery { + public: + InRingQuery() : Queries::EqualityQuery(-1) { + this->setDescription("AtomInNRings"); + this->setDataFunc(queryAtomRingMembership); + }; + explicit InRingQuery(int v) : Queries::EqualityQuery(v) { + this->setDescription("AtomInNRings"); + this->setDataFunc(queryAtomRingMembership); + }; + + virtual bool Match(const ConstAtomPtr what) const { + int v = this->TypeConvert(what,Queries::Int2Type()); + bool res; + if(this->d_val<0){ + res = v!=0; + } else { + res=!Queries::queryCmp(v,this->d_val,this->d_tol); + } + if(this->getNegation()){ + res=!res; + } + return res; + } + + //! returns a copy of this query + Queries::Query * + copy() const { + InRingQuery *res = + new InRingQuery(this->d_val); + res->setNegation(getNegation()); + res->setTol(this->getTol()); + res->d_description = this->d_description; + return res; + } + }; + //! allows use of recursive structure queries (e.g. recursive SMARTS) class RecursiveStructureQuery : public Queries::SetQuery { public: diff --git a/Code/GraphMol/ROMol.h b/Code/GraphMol/ROMol.h index 8b932dbc0..6c761586b 100755 --- a/Code/GraphMol/ROMol.h +++ b/Code/GraphMol/ROMol.h @@ -294,7 +294,7 @@ namespace RDKit{ ... atomPtr is a const Atom * ... ROMol::OEDGE_ITER beg,end; ROMol::GRAPH_MOL_BOND_PMAP::const_type pMap = molPtr->getBondPMap(); - boost::tie(beg,end) = molPtr->getAtomNeighbors(atomPtr); + boost::tie(beg,end) = molPtr->getAtomBonds(atomPtr); while(beg!=end){ const Bond *bond=pMap[*beg]; ... do something with the Bond ... @@ -307,7 +307,7 @@ namespace RDKit{ ... atomPtr is a const Atom * ... ROMol::OEDGE_ITER beg,end; ROMol::GRAPH_MOL_BOND_PMAP::type pMap = molPtr->getBondPMap(); - boost::tie(beg,end) = molPtr->getAtomNeighbors(atomPtr); + boost::tie(beg,end) = molPtr->getAtomBonds(atomPtr); while(beg!=end){ Bond *bond=pMap[*beg]; ... do something with the Bond ... diff --git a/Code/GraphMol/SmilesParse/SmartsWrite.cpp b/Code/GraphMol/SmilesParse/SmartsWrite.cpp index 4a73bbbde..8dcc6857e 100644 --- a/Code/GraphMol/SmilesParse/SmartsWrite.cpp +++ b/Code/GraphMol/SmilesParse/SmartsWrite.cpp @@ -150,7 +150,10 @@ namespace RDKit { needParen = true; } else if (descrip == "AtomInNRings") { - res << "R" << query->getVal(); + res << "R"; + if(query->getVal()>=0){ + res << query->getVal(); + } needParen = true; } else if (descrip == "AtomFormalCharge") { diff --git a/Code/GraphMol/SmilesParse/SmilesParseOps.cpp b/Code/GraphMol/SmilesParse/SmilesParseOps.cpp index 4197813a0..dd41bebee 100755 --- a/Code/GraphMol/SmilesParse/SmilesParseOps.cpp +++ b/Code/GraphMol/SmilesParse/SmilesParseOps.cpp @@ -5,6 +5,7 @@ // @@ All Rights Reserved @@ // #include +#include #include "SmilesParse.h" #include "SmilesParseOps.h" #include @@ -28,7 +29,7 @@ namespace SmilesParseOps{ // between the fragment and the molecule // void AddFragToMol(RWMol *mol,RWMol *frag,Bond::BondType bondOrder, - Bond::BondDir bondDir,bool closeRings ){ + Bond::BondDir bondDir,bool closeRings,bool doingQuery ){ RWMol::GRAPH_NODE_TYPE lastAt = mol->getActiveAtom(); int nOrigAtoms = mol->getNumAtoms(); int nOrigBonds = mol->getNumBonds(); @@ -84,28 +85,56 @@ namespace SmilesParseOps{ leadingBond->setEndAtomIdx(atomIdx1); mol->addBond(leadingBond,true); } else { - if(bondOrder == Bond::UNSPECIFIED){ - // no bond order provided, figure it out ourselves - if(lastAt->getIsAromatic() && firstAt->getIsAromatic() ){ - bo = Bond::AROMATIC; + if(!doingQuery){ + if(bondOrder == Bond::UNSPECIFIED){ + // no bond order provided, figure it out ourselves + if(lastAt->getIsAromatic() && firstAt->getIsAromatic() ){ + bo = Bond::AROMATIC; + } + else{ + bo = Bond::SINGLE; + } } else{ - bo = Bond::SINGLE; + bo = bondOrder; + } + if(bo==Bond::DATIVEL){ + int tmp=atomIdx2; + atomIdx2 = atomIdx1; + atomIdx1 = tmp; + bo = Bond::DATIVE; + } else if(bo == Bond::DATIVER){ + bo = Bond::DATIVE; + } + int idx = mol->addBond(atomIdx2,atomIdx1,bo)-1; + mol->getBondWithIdx(idx)->setBondDir(bondDir); + } else { + // semantics are different in SMARTS, unspecified bonds can be single or aromatic: + if(bondOrder == Bond::UNSPECIFIED){ + QueryBond *newB=new QueryBond(Bond::SINGLE); + newB->expandQuery(makeBondOrderEqualsQuery(Bond::AROMATIC), + Queries::COMPOSITE_OR, + true); + newB->setOwningMol(mol); + newB->setBeginAtomIdx(atomIdx1); + newB->setEndAtomIdx(atomIdx2); + mol->addBond(newB); + delete newB; + } + else{ + bo=bondOrder; + if(bo==Bond::DATIVEL){ + int tmp=atomIdx2; + atomIdx2 = atomIdx1; + atomIdx1 = tmp; + bo = Bond::DATIVE; + } else if(bo == Bond::DATIVER){ + bo = Bond::DATIVE; + } + int idx = mol->addBond(atomIdx2,atomIdx1,bo)-1; + mol->getBondWithIdx(idx)->setBondDir(bondDir); } } - else{ - bo = bondOrder; - } - if(bo==Bond::DATIVEL){ - int tmp=atomIdx2; - atomIdx2 = atomIdx1; - atomIdx1 = tmp; - bo = Bond::DATIVE; - } else if(bo == Bond::DATIVER){ - bo = Bond::DATIVE; - } - int idx = mol->addBond(atomIdx2,atomIdx1,bo)-1; - mol->getBondWithIdx(idx)->setBondDir(bondDir); } } diff --git a/Code/GraphMol/SmilesParse/SmilesParseOps.h b/Code/GraphMol/SmilesParse/SmilesParseOps.h index 242ccf440..35fba1c5b 100755 --- a/Code/GraphMol/SmilesParse/SmilesParseOps.h +++ b/Code/GraphMol/SmilesParse/SmilesParseOps.h @@ -16,7 +16,7 @@ namespace SmilesParseOps { void AddFragToMol(RDKit::RWMol *mol,RDKit::RWMol *frag, RDKit::Bond::BondType bondOrder=RDKit::Bond::UNSPECIFIED, RDKit::Bond::BondDir bondDir=RDKit::Bond::NONE, - bool closeRings=false); + bool closeRings=false,bool doingQuery=false); RDKit::Bond::BondType GetUnspecifiedBondType(const RDKit::RWMol *mol, const RDKit::Atom *atom1, const RDKit::Atom *atom2); diff --git a/Code/GraphMol/SmilesParse/SmilesWrite.cpp b/Code/GraphMol/SmilesParse/SmilesWrite.cpp index 51eecabb3..e9b68741c 100755 --- a/Code/GraphMol/SmilesParse/SmilesWrite.cpp +++ b/Code/GraphMol/SmilesParse/SmilesWrite.cpp @@ -300,7 +300,9 @@ namespace RDKit{ // decisions and I'm gonna want to smack myself for doing this, // but we'll try anyway. std::string MolToSmiles(ROMol &mol,bool doIsomericSmiles, - bool doKekule){ + bool doKekule,int rootedAtAtom){ + PRECONDITION(rootedAtAtom<0||static_cast(rootedAtAtom)updatePropertyCache(); + } - INT_VECT ranks; - ranks.resize(nAtoms); + unsigned int nAtoms=mol.getNumAtoms(); + INT_VECT ranks(nAtoms,-1); // clean up the chirality on any atom that is marked as chiral, // but that should not be: if(doIsomericSmiles){ MolOps::assignAtomChiralCodes(mol,true); - //MolOps::assignBondStereoCodes(mol,true); MolOps::rankAtoms(mol,ranks,ComprehensiveInvariants, true,0,true); } else { @@ -343,14 +344,20 @@ namespace RDKit{ colorIt = colors.begin(); // loop to deal with the possibility that there might be disconnected fragments while(colorIt != colors.end()){ - int i,nextAtomIdx,nextRank; + int nextAtomIdx; std::string subSmi; + // find the next atom for a traverse - nextRank = nAtoms+1; - for(i=0;i=0){ + nextAtomIdx=rootedAtAtom; + rootedAtAtom=-1; + } else { + int nextRank = nAtoms+1; + for(unsigned int i=0;iR { yysmarts_lval.atom = new QueryAtom(); - // FIX: this doesn't default properly - yysmarts_lval.atom->setQuery(makeAtomInNRingsQuery(1)); + yysmarts_lval.atom->setQuery(new InRingQuery(-1)); return COMPLEX_ATOM_QUERY_TOKEN; } r { yysmarts_lval.atom = new QueryAtom(); - // FIX: this doesn't default properly - yysmarts_lval.atom->setQuery(makeAtomInRingOfSizeQuery(3)); + yysmarts_lval.atom->setQuery(makeAtomInRingQuery()); return RINGSIZE_ATOM_QUERY_TOKEN; } diff --git a/Code/GraphMol/SmilesParse/smarts.y b/Code/GraphMol/SmilesParse/smarts.y index ba0fe368d..a7f85ab7e 100755 --- a/Code/GraphMol/SmilesParse/smarts.y +++ b/Code/GraphMol/SmilesParse/smarts.y @@ -61,7 +61,7 @@ static RWMol * curMol_gps = 0; %left SEMI_TOKEN %left OR_TOKEN %left AND_TOKEN -%nonassoc NOT_TOKEN +%right NOT_TOKEN %% @@ -190,7 +190,7 @@ mol: atomd { | mol branch { RWMol *m1_p = molList_g[$$],*m2_p=molList_g[$2]; // FIX: handle generic bonds here - SmilesParseOps::AddFragToMol(m1_p,m2_p,Bond::UNSPECIFIED,Bond::NONE,false); + SmilesParseOps::AddFragToMol(m1_p,m2_p,Bond::UNSPECIFIED,Bond::NONE,false,true); delete m2_p; int sz = molList_g.size(); if ( sz==$2+1) { diff --git a/Code/GraphMol/SmilesParse/smatest.cpp b/Code/GraphMol/SmilesParse/smatest.cpp index 730a75e62..1ef70c6b7 100755 --- a/Code/GraphMol/SmilesParse/smatest.cpp +++ b/Code/GraphMol/SmilesParse/smatest.cpp @@ -133,6 +133,8 @@ std::vector< MatchVectType > _checkMatches(std::string smarts, std::string smile matcher = SmartsToMol(smarts); CHECK_INVARIANT(matcher,smarts); + + //std::cerr << "\tSMA: " << smarts << " -> " << MolToSmarts(*matcher) << std::endl;; mol = SmilesToMol(smiles); CHECK_INVARIANT(mol,smiles); @@ -240,17 +242,45 @@ void testMatches3(){ _checkMatches("[CH3][CH]", "C(C)(C)CC(=O)[O-]", 2, 2); - _checkMatches("[!C;R]", "c1ccccc1", 6, 1); - - _checkMatches("[!C;R]", "C1COC1", 1, 1); - _checkMatches("[c;H]", "c1ccccc1", 6, 1); _checkNoMatches("[c;H]", "C1CCCCC1"); + _checkMatches("[#6]([#7])[#6]", "c1ncccc1", 2, 3); + _checkMatches("[c;H]", "c1c[c-]ccc1", 5, 1); _checkMatches("C~O", "C(=O)[O-]", 2, 2); + + // ----- + // This block is connected to SF-Issue 1538280 + // http://sourceforge.net/tracker/index.php?func=detail&aid=1538280&group_id=160139&atid=814650 + _checkMatches("[R]", "c1ccccc1", 6, 1); + + _checkMatches("[r]", "c1ccccc1", 6, 1); + + _checkMatches("[R;!O]", "c1ccccc1", 6, 1); + + _checkMatches("[c]", "c1ccccc1", 6, 1); + + _checkMatches("[R;c]", "c1ccccc1", 6, 1); + + _checkMatches("[c;#6]", "c1ccccc1", 6, 1); + + _checkMatches("[R;R]", "c1ccccc1", 6, 1); + + _checkMatches("[#6;R]", "c1ccccc1", 6, 1); + + _checkMatches("[c;R]", "c1ccccc1", 6, 1); + + _checkMatches("[!O;R]", "c1ccccc1", 6, 1); + + _checkMatches("[!C;R]", "c1ccccc1", 6, 1); + + _checkMatches("[!C;R]", "C1COC1", 1, 1); + + + BOOST_LOG(rdInfoLog) << "\tdone" << std::endl; } @@ -613,77 +643,6 @@ void testSmartsWrite() { } -void testDeleteSubstruct() -{ - int i = 0; - ROMol *mol1=0,*mol2=0,*matcher1=0,*matcher2=0,*matcher3=0; - std::string smi,sma; - - BOOST_LOG(rdInfoLog) << "-------------------------------------" << std::endl; - BOOST_LOG(rdInfoLog) << "Testing deleteSubstruct" << std::endl; - - // a lot of the seemingly repetitive stuff is here for Issue96 - smi = "CCC(=O).C=O"; - mol1 = SmilesToMol(smi); - TEST_ASSERT(mol1); - sma = "C=O"; - matcher1 = SmartsToMol(sma); - TEST_ASSERT(matcher1); - - mol2 = deleteSubstructs(*mol1,*matcher1,0); - TEST_ASSERT(mol2); - TEST_ASSERT(mol2->getNumAtoms(4)) - mol2 = deleteSubstructs(*mol2,*matcher1,0); - TEST_ASSERT(mol2); - TEST_ASSERT(mol2->getNumAtoms(4)) - - delete matcher1; - sma = "[Cl;H1&X1,-]"; - matcher1 = SmartsToMol(sma); - sma = "[Na+]"; - matcher2 = SmartsToMol(sma); - sma = "[O;H2,H1&-,X0&-2]"; - matcher3 = SmartsToMol(sma); - delete mol1; - mol1 = SmilesToMol("CCO.Cl"); - TEST_ASSERT(mol1); - TEST_ASSERT(mol1->getNumAtoms()==4); - - delete mol2; - mol2 = deleteSubstructs(*mol1,*matcher1,true); - TEST_ASSERT(mol2); - TEST_ASSERT(mol2->getNumAtoms()==3); - mol2 = deleteSubstructs(*mol2,*matcher2,true); - TEST_ASSERT(mol2); - TEST_ASSERT(mol2->getNumAtoms()==3); - mol2 = deleteSubstructs(*mol2,*matcher3,true); - TEST_ASSERT(mol2); - TEST_ASSERT(mol2->getNumAtoms()==3); - - delete mol1; - mol1 = SmilesToMol("CC(=O)[O-].[Na+]"); - TEST_ASSERT(mol1); - TEST_ASSERT(mol1->getNumAtoms()==5); - - delete matcher1; - matcher1 = SmartsToMol("[Cl;H1&X1,-]"); - delete matcher2; - matcher2 = SmartsToMol("[Na+]"); - - mol2 = deleteSubstructs(*mol1,*matcher1,true); - TEST_ASSERT(mol2); - TEST_ASSERT(mol2->getNumAtoms()==5); - - mol2 = deleteSubstructs(*mol2,*matcher2,true); - TEST_ASSERT(mol2); - TEST_ASSERT(mol2->getNumAtoms()==4); - - mol2 = deleteSubstructs(*mol2,*matcher1,true); - TEST_ASSERT(mol2); - TEST_ASSERT(mol2->getNumAtoms()==4); - - BOOST_LOG(rdInfoLog) << "\tdone" << std::endl; -} void testIssue196() { @@ -864,7 +823,7 @@ int main(int argc, char *argv[]) { RDLog::InitLogs(); -#if LOCAL_TEST_ALL +#if 1 testPass(); testFail(); testMatches(); @@ -876,12 +835,12 @@ main(int argc, char *argv[]) testSmartsWrite(); testFrags(); testProblems(); - testDeleteSubstruct(); testIssue196(); testIssue254(); testIssue255(); testIssue330(); -#endif testIssue351(); +#endif + return 0; } diff --git a/Code/GraphMol/SmilesParse/test.cpp b/Code/GraphMol/SmilesParse/test.cpp index 95e2454ff..41a1d5913 100755 --- a/Code/GraphMol/SmilesParse/test.cpp +++ b/Code/GraphMol/SmilesParse/test.cpp @@ -1231,6 +1231,28 @@ void testIssue266(){ BOOST_LOG(rdInfoLog) << "\tdone" << std::endl; } +void testRootedAt(){ + RWMol *mol; + std::string smi; + int numE=0; + + BOOST_LOG(rdInfoLog) << "-------------------------------------" << std::endl; + BOOST_LOG(rdInfoLog) << "Testing rootedAtAtom functionality" << std::endl; + + smi ="CN(C)C"; + mol = SmilesToMol(smi); + TEST_ASSERT(mol); + smi = MolToSmiles(*mol,false,false,-1); + TEST_ASSERT(smi=="CN(C)C"); + smi = MolToSmiles(*mol,false,false,1); + TEST_ASSERT(smi=="N(C)(C)C"); + smi = MolToSmiles(*mol,false,false,2); + TEST_ASSERT(smi=="CN(C)C"); + + + BOOST_LOG(rdInfoLog) << "\tdone" << std::endl; +} + int main(int argc, char *argv[]) @@ -1257,6 +1279,7 @@ main(int argc, char *argv[]) testIssue184(); testIssue191(); testIssue256(); -#endif testIssue266(); +#endif + testRootedAt(); } diff --git a/Code/RDCode.sln b/Code/RDCode.sln index f8a3834c5..92b1affbc 100755 --- a/Code/RDCode.sln +++ b/Code/RDCode.sln @@ -82,6 +82,7 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "testQuery", "Query\QueryTes EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "testSmartsParse", "GraphMol\SmilesParse\SmartsParseTest.vcproj", "{1F0176D6-8882-4399-B8AF-E75B8287A591}" ProjectSection(ProjectDependencies) = postProject + {676E091A-4E6E-429B-91EE-0399C46060C6} = {676E091A-4E6E-429B-91EE-0399C46060C6} {DDF70921-1F56-4EBF-BE90-0E3A0C8BFACB} = {DDF70921-1F56-4EBF-BE90-0E3A0C8BFACB} {08AF0B31-BA0F-4C46-A889-4411ADB81D68} = {08AF0B31-BA0F-4C46-A889-4411ADB81D68} {2E8DFE39-3377-45C0-81D6-D32A2C192F34} = {2E8DFE39-3377-45C0-81D6-D32A2C192F34} @@ -97,6 +98,7 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libSmilesParse", "GraphMol\ EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "testSmilesParse", "GraphMol\SmilesParse\SmilesParseTest.vcproj", "{20AFEDA7-3046-41F0-9BEF-425D0398362A}" ProjectSection(ProjectDependencies) = postProject + {676E091A-4E6E-429B-91EE-0399C46060C6} = {676E091A-4E6E-429B-91EE-0399C46060C6} {DDF70921-1F56-4EBF-BE90-0E3A0C8BFACB} = {DDF70921-1F56-4EBF-BE90-0E3A0C8BFACB} {08AF0B31-BA0F-4C46-A889-4411ADB81D68} = {08AF0B31-BA0F-4C46-A889-4411ADB81D68} {2E8DFE39-3377-45C0-81D6-D32A2C192F34} = {2E8DFE39-3377-45C0-81D6-D32A2C192F34} @@ -185,6 +187,7 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "wrapGraphMol", "GraphMol\Wr {33BB4E07-B13A-44EB-931F-E748D22A548D} = {33BB4E07-B13A-44EB-931F-E748D22A548D} {58DBEB0A-76E7-4029-83DC-0B2AC5AAFA5F} = {58DBEB0A-76E7-4029-83DC-0B2AC5AAFA5F} {0B51D40C-2E72-42AF-97EE-C7D188E717C0} = {0B51D40C-2E72-42AF-97EE-C7D188E717C0} + {676E091A-4E6E-429B-91EE-0399C46060C6} = {676E091A-4E6E-429B-91EE-0399C46060C6} {08AF0B31-BA0F-4C46-A889-4411ADB81D68} = {08AF0B31-BA0F-4C46-A889-4411ADB81D68} {2E8DFE39-3377-45C0-81D6-D32A2C192F34} = {2E8DFE39-3377-45C0-81D6-D32A2C192F34} {1D9A3F4A-DAFD-498E-9C56-59FAE69194C8} = {1D9A3F4A-DAFD-498E-9C56-59FAE69194C8} @@ -222,6 +225,7 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "wrapMolOps", "GraphMol\Wrap {33BB4E07-B13A-44EB-931F-E748D22A548D} = {33BB4E07-B13A-44EB-931F-E748D22A548D} {58DBEB0A-76E7-4029-83DC-0B2AC5AAFA5F} = {58DBEB0A-76E7-4029-83DC-0B2AC5AAFA5F} {0B51D40C-2E72-42AF-97EE-C7D188E717C0} = {0B51D40C-2E72-42AF-97EE-C7D188E717C0} + {676E091A-4E6E-429B-91EE-0399C46060C6} = {676E091A-4E6E-429B-91EE-0399C46060C6} {2E8DFE39-3377-45C0-81D6-D32A2C192F34} = {2E8DFE39-3377-45C0-81D6-D32A2C192F34} {1D9A3F4A-DAFD-498E-9C56-59FAE69194C8} = {1D9A3F4A-DAFD-498E-9C56-59FAE69194C8} {8DA7765B-EF8E-45D4-B192-2C950C5C79F5} = {8DA7765B-EF8E-45D4-B192-2C950C5C79F5} @@ -458,6 +462,7 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ZZZGraphMolRegrs", "GraphMo {C6555EDD-5666-4CF7-8AE2-1CF49A262636} = {C6555EDD-5666-4CF7-8AE2-1CF49A262636} {55973DDE-14F0-47A9-9F85-5B387884F136} = {55973DDE-14F0-47A9-9F85-5B387884F136} {B873A5E6-B25D-425C-8527-C52FB524F607} = {B873A5E6-B25D-425C-8527-C52FB524F607} + {5408B0E6-0B50-4E12-921A-8F1B3E483716} = {5408B0E6-0B50-4E12-921A-8F1B3E483716} {4D9A72F3-BF82-4B1D-BF0C-580DF04E7429} = {4D9A72F3-BF82-4B1D-BF0C-580DF04E7429} {BB2B6CF7-385A-4478-8D03-F7BA96F5ABD4} = {BB2B6CF7-385A-4478-8D03-F7BA96F5ABD4} EndProjectSection @@ -949,6 +954,22 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "testDistGeom", "DistGeom\te {DD564ACC-9799-4A4F-AA43-C42565165EFD} = {DD564ACC-9799-4A4F-AA43-C42565165EFD} EndProjectSection EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libChemTransforms", "GraphMol\ChemTransforms\ChemTransforms.vcproj", "{676E091A-4E6E-429B-91EE-0399C46060C6}" + ProjectSection(ProjectDependencies) = postProject + EndProjectSection +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "testChemTransforms", "GraphMol\ChemTransforms\testChemTransforms.vcproj", "{5408B0E6-0B50-4E12-921A-8F1B3E483716}" + ProjectSection(ProjectDependencies) = postProject + {676E091A-4E6E-429B-91EE-0399C46060C6} = {676E091A-4E6E-429B-91EE-0399C46060C6} + {DDF70921-1F56-4EBF-BE90-0E3A0C8BFACB} = {DDF70921-1F56-4EBF-BE90-0E3A0C8BFACB} + {08AF0B31-BA0F-4C46-A889-4411ADB81D68} = {08AF0B31-BA0F-4C46-A889-4411ADB81D68} + {2E8DFE39-3377-45C0-81D6-D32A2C192F34} = {2E8DFE39-3377-45C0-81D6-D32A2C192F34} + {8DA7765B-EF8E-45D4-B192-2C950C5C79F5} = {8DA7765B-EF8E-45D4-B192-2C950C5C79F5} + {41B2DB92-DBB1-4C73-916C-BD7372F46FD4} = {41B2DB92-DBB1-4C73-916C-BD7372F46FD4} + {8A443D9F-4C81-455C-82AB-AA26C9F9EE1C} = {8A443D9F-4C81-455C-82AB-AA26C9F9EE1C} + {A6FD01DB-2F79-4317-85F8-BA361B8EA095} = {A6FD01DB-2F79-4317-85F8-BA361B8EA095} + EndProjectSection +EndProject Global GlobalSection(SolutionConfiguration) = preSolution Debug = Debug @@ -1616,6 +1637,18 @@ Global {91158915-015A-434B-AE89-CE55709440DC}.DebugPython.Build.0 = Debug|Win32 {91158915-015A-434B-AE89-CE55709440DC}.Release.ActiveCfg = Release|Win32 {91158915-015A-434B-AE89-CE55709440DC}.Release.Build.0 = Release|Win32 + {676E091A-4E6E-429B-91EE-0399C46060C6}.Debug.ActiveCfg = Debug|Win32 + {676E091A-4E6E-429B-91EE-0399C46060C6}.Debug.Build.0 = Debug|Win32 + {676E091A-4E6E-429B-91EE-0399C46060C6}.DebugPython.ActiveCfg = Debug|Win32 + {676E091A-4E6E-429B-91EE-0399C46060C6}.DebugPython.Build.0 = Debug|Win32 + {676E091A-4E6E-429B-91EE-0399C46060C6}.Release.ActiveCfg = Release|Win32 + {676E091A-4E6E-429B-91EE-0399C46060C6}.Release.Build.0 = Release|Win32 + {5408B0E6-0B50-4E12-921A-8F1B3E483716}.Debug.ActiveCfg = Debug|Win32 + {5408B0E6-0B50-4E12-921A-8F1B3E483716}.Debug.Build.0 = Debug|Win32 + {5408B0E6-0B50-4E12-921A-8F1B3E483716}.DebugPython.ActiveCfg = Debug|Win32 + {5408B0E6-0B50-4E12-921A-8F1B3E483716}.DebugPython.Build.0 = Debug|Win32 + {5408B0E6-0B50-4E12-921A-8F1B3E483716}.Release.ActiveCfg = Release|Win32 + {5408B0E6-0B50-4E12-921A-8F1B3E483716}.Release.Build.0 = Release|Win32 EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution EndGlobalSection diff --git a/Python/Chem/ChemUtils/TemplateExpand.py b/Python/Chem/ChemUtils/TemplateExpand.py new file mode 100644 index 000000000..7ac6fd406 --- /dev/null +++ b/Python/Chem/ChemUtils/TemplateExpand.py @@ -0,0 +1,253 @@ +import RDLogger as logging +logger = logging.logger() +logger.setLevel(logging.INFO) +import Chem +from Chem import Crippen +from Chem import AllChem +from Chem.ChemUtils.AlignDepict import AlignDepict + +_version="0.2.1" +_greet="This is TemplateExpand version %s"%_version + +_usage=""" +Usage: TemplateExpand [options] template + + Unless otherwise indicated, the template and sidechains are assumed to be + Smiles + + Each sidechain entry should be: + [-r] SMARTS filename + The SMARTS pattern is used to recognize the attachment point, + if the -r argument is not provided, then atoms matching the pattern + will be removed from the sidechains. + or + -n filename + where the attachment atom is the first atom in each molecule + The filename provides the list of potential sidechains. + + options: + -o filename.sdf: provides the name of the output file, otherwise + stdout is used + --sdf : sidechains should be in SD files + --molTemplate: the template is a Mol file, a new depiction + will not be generated + +""" +def Usage(): + import sys + print _usage + sys.exit(-1) + +nDumped=0 +def _exploder(mol,depth,sidechains,outF,core,chainIndices): + global nDumped + ourChains = sidechains[depth] + patt = '[X%s]'%(chr(ord('a')+depth)) + patt = Chem.MolFromSmiles(patt) + for i,chain in enumerate(ourChains): + tchain = chainIndices[:] + tchain.append(i+1) + rs = Chem.ReplaceSubstructs(mol,patt,chain,replaceAll=True) + if rs: + r = rs[0] + if depth>sys.stderr,'>>> no match' + AllChem.Compute2DCoords(r) + Chem.Kekulize(r) + r.SetProp("_Name","TemplateEnum: Mol_%d"%(nDumped+1)) + mb = Chem.MolToMolBlock(r) + outF.write(mb) + print >>outF,"> \n%d\n"%(nDumped+1) + print >>outF,"> \n%s\n"%('_'.join([str(x) for x in tchain])) + print >>outF,'$$$$' + nDumped += 1 + if not nDumped%100: + logger.info('Done %d molecules'%nDumped) + +def Explode(template,sidechains,outF): + chainIndices=[] + core = Chem.DeleteSubstructs(template,Chem.MolFromSmiles('[X]')) + _exploder(template,0,sidechains,outF,core,chainIndices) + + +def ConstructSidechains(suppl,sma=None,replace=True): + if sma: + try: + patt = Chem.MolFromSmarts(sma) + except: + logger.error('could not construct pattern from smarts: %s'%sma, + exc_info=True) + return None + else: + patt = None + + if replace: + replacement = Chem.MolFromSmiles('[X]') + + res = [] + for mol in suppl: + if not mol: + continue + if patt: + if replace: + tmp = list(Chem.ReplaceSubstructs(mol,patt,replacement)) + for i,entry in enumerate(tmp): + match = entry.GetSubstructMatch(replacement) + if match: + idx = match[0] + smi = Chem.MolToSmiles(entry,rootedAtAtom=idx) + entry = Chem.MolFromSmiles(smi) + # entry now has [X] as atom 0 and the neighbor + # as atom 1. Cleave the [X]: + entry = Chem.DeleteSubstructs(entry,replacement) + # now we have a molecule with the atom to be joined + # in position zero; Keep that: + tmp[i] = entry + else: + # no replacement, use the pattern to reorder + # atoms only: + match = mol.GetSubstructMatch(patt) + if match: + smi = Chem.MolToSmiles(mol,rootedAtAtom=match[0]) + entry = Chem.MolFromSmiles(smi) + # now we have a molecule with the atom to be joined + # in position zero; Keep that: + tmp = [entry] + else: + tmp = None + else: + tmp = [mol] + if tmp: + res.extend(tmp) + return res + +if __name__=='__main__': + import getopt,sys + print >>sys.stderr,_greet + + try: + args,extras = getopt.getopt(sys.argv[1:],'o:h',[ + 'sdf', + 'molTemplate', + 'force' + ]) + except: + import traceback + traceback.print_exc() + Usage() + + if len(extras)<3: + Usage() + + tooLong=1000 + sdLigands=False + molTemplate=False + redrawTemplate=True + outF=None + forceIt=False + for arg,val in args: + if arg=='-o': + outF=val + elif arg=='--sdf': + sdLigands=True + elif arg=='--molTemplate': + molTemplate=True + elif arg=='--force': + forceIt=True + elif arg=='-h': + Usage() + sys.exit(0) + + if molTemplate: + try: + template = Chem.MolFromMolFile(extras[0]) + except: + logger.error('Could not construct template from mol file: %s'%extras[0], + exc_info=True) + sys.exit(1) + if redrawTemplate: + AllChem.Compute2DCoords(template) + else: + try: + template = Chem.MolFromSmiles(extras[0]) + AllChem.Compute2DCoords(template) + except: + logger.error('Could not construct template from smiles: %s'%extras[0], + exc_info=True) + sys.exit(1) + + sidechains = [] + pos = 1 + while pos>sys.stderr,"No molecules to be generated." + sys.exit(0) + + + + if not forceIt and count>tooLong: + print >>sys.stderr,"This will generate %d molecules."%count + ans = raw_input("Continue anyway? [no]") + if ans not in ('y','yes','Y','YES'): + sys.exit(0) + + if outF: + try: + outF = file(outF,'w+') + except IOError: + logger.error('could not open file %s for writing'%(outF), + exc_info=True) + else: + outF = sys.stdout + + + Explode(template,sidechains,outF) + + + + + diff --git a/Python/Chem/Features/FeatDirUtils.py b/Python/Chem/Features/FeatDirUtils.py index a42862801..dd966e1ca 100644 --- a/Python/Chem/Features/FeatDirUtils.py +++ b/Python/Chem/Features/FeatDirUtils.py @@ -1,365 +1,365 @@ -# $Id$ -# -# Copyright (C) 2004-2006 Rational Discovery LLC -# -# @@ All Rights Reserved @@ -# -from Numeric import * -import math - -# BIG NOTE: we are going assume atom IDs starting from 0 instead of 1 -# for all the functions in this file. This is so that they -# are reasonably indepedent of the combicode. However when using -# with combicode the caller needs to make sure the atom IDs from combicode -# are corrected before feeding them in here. - -def cross(v1,v2): - res = array([ v1[1]*v2[2] - v1[2]*v2[1], - -v1[0]*v2[2] + v1[2]*v2[0], - v1[0]*v2[1] - v1[1]*v2[0]],Float) - return res - -def findNeighbors(atomId, adjMat): - """ - Find the IDs of the neighboring atom IDs - - ARGUMENTS: - atomId - atom of interest - adjMat - adjacency matrix for the compound - """ - nbrs = [] - for i,nid in enumerate(adjMat[atomId]): - if nid >= 1 : - nbrs.append(i) - return nbrs - -def _findAvgVec(coords, center, nbrs) : - # find the average of the normalized vectors going from the center atoms to the - # neighbors - # the average vector is also normalized - cpt = array(coords[3*center:3*center+3]) - avgVec = 0 - for nid in nbrs: - pt = array(coords[3*nid:3*nid+3]) - pt -= cpt - pt /= (sqrt(innerproduct(pt, pt))) - if (avgVec == 0) : - avgVec = pt - else : - avgVec += pt - - avgVec /= (sqrt(innerproduct(avgVec, avgVec))) - return avgVec - -def GetAromaticFeatVects(coords, featAtoms, featLoc, scale=1.5): - """ - Compute the direction vector for an aromatic feature - - ARGUMENTS: - coords - an list of atom coordinates the format is [x1, y1, z1, x2, y2, z3 ....] - featAtoms - list of atom IDs that make up the feature - featLoc - location of the aromatic feature specified as a numeric array - scale - the size of the direction vector - """ - dirType = 'linear' - head = featLoc - ats = [coords[3*(x):3*(x)+3] for x in featAtoms] - - p0 = array(ats[0]) - p1 = array(ats[1]) - v1 = p0-head - v2 = p1-head - norm1 = cross(v1,v2) - norm1 = norm1/sqrt(innerproduct(norm1,norm1)) - norm1 *= scale - norm2 = -norm1 - norm1 += head - norm2 += head - return ( (head,norm1),(head,norm2) ), dirType - -def ArbAxisRotation(theta,ax,pt): - theta = math.pi*theta/180 - c = math.cos(theta) - s = math.sin(theta) - t = 1-c - X = ax[0] - Y = ax[1] - Z = ax[2] - mat = [ [t*X*X+c, t*X*Y+s*Z, t*X*Z-s*Y], - [t*X*Y-s*Z,t*Y*Y+c,t*Y*Z+s*X], - [t*X*Z+s*Y,t*Y*Z-s*X,t*Z*Z+c] ] - mat = array(mat) - return matrixmultiply(mat,pt) - -def GetAcceptor2FeatVects(coords, adjMat, atomNames, featAtoms, scale=1.5): - """ - Get the direction vectors for Acceptor of type 2 - - This is the acceptor with two adjacent heavy atoms. We will special case a few things here. - If the acceptor atom is an oxygen we will assume a sp3 hybridization - the acceptor directions (two of them) - reflect that configurations. Otherwise the direction vector in plane with the neighboring - heavy atoms - - ARGUMENTS: - coords - an list of atom coordinates the format is [x1, y1, z1, x2, y2, z3 ....] - adjMat - adjacency martix of the compound - featAtoms - list of atoms that are part of the feature - scale - length of the direction vector - """ - assert len(featAtoms) == 1 - aid = featAtoms[0] - cpt = array(coords[3*aid:3*aid+3]) - nbrs = findNeighbors(aid, adjMat) - assert len(nbrs) == 2 - - bvec = _findAvgVec(coords, aid, nbrs) - bvec *= (-1.0*scale) - #bvec += cpt - - if (atomNames[aid] == 'O') : - # assume sp3 - # we will create two vectors by rotating bvec by half the tetrahedral angle in either directions - v1 = array(coords[3*nbrs[0]:3*nbrs[0]+3]) - v1 -= cpt - v2 = array(coords[3*nbrs[1]:3*nbrs[1]+3]) - v2 -= cpt - rotAxis = v1 - v2 - rotAxis /= sqrt(innerproduct(rotAxis, rotAxis)) - bv1 = ArbAxisRotation(54.5, rotAxis, bvec) - bv1 += cpt - bv2 = ArbAxisRotation(-54.5, rotAxis, bvec) - bv2 += cpt - return ((cpt, bv1), (cpt, bv2),), 'linear' - else : - bvec += cpt - return ((cpt, bvec),), 'linear' - -def GetDonor3FeatVects(coords, adjMat, featAtoms, scale=1.5): - """ - Get the direction vectors for Donor of type 3 - - This is a donor with three heavy atoms as neighbors. We will assume - a tetrahedral arrangement of these neighbors. So the direction we are seeking - is the last fourth arm of the sp3 arrangment - - ARGUMENTS: - coords - an list of atom coordinates the format is [x1, y1, z1, x2, y2, z3 ....] - adjMat - adjacency martix of the compound - featAtoms - list of atoms that are part of the feature - scale - length of the direction vector - """ - assert len(featAtoms) == 1 - aid = featAtoms[0] - cpt = array(coords[3*aid:3*aid+3]) - nbrs = findNeighbors(aid, adjMat) - - bvec = _findAvgVec(coords, aid, nbrs) - bvec *= (-1.0*scale) - bvec += cpt - return ((cpt, bvec),), 'linear' - -def _findHydAtoms(nbrs, atomNames): - hAtoms = [] - for nid in nbrs: - if atomNames[nid] == 'H': - hAtoms.append(nid) - return hAtoms - -def _checkPlanarity(coords, center, nbrs, tol=1.0e-3): - assert len(nbrs) == 3 - cpt = array(coords[3*center:3*center+3]) - v1 = array(coords[3*nbrs[0]:3*nbrs[0]+3]) - v1 -= cpt - v2 = array(coords[3*nbrs[1]:3*nbrs[1]+3]) - v2 -= cpt - v3 = array(coords[3*nbrs[2]:3*nbrs[2]+3]) - v3 -= cpt - normal = cross(v1,v2) - dotP = abs(innerproduct(v3, normal)) - - if (dotP <= tol) : - return 1 - else : - return 0 - -def GetDonor2FeatVects(coords, adjMat, atomNames, featAtoms, scale=1.5) : - """ - Get the direction vectors for Donor of type 2 - - This is a donor with two heavy atoms as neighbors. The atom may are may not have - hydrogen on it. Here are the situations with the neighbors that will be considered here - 1. two heavy atoms and two hydrogens: we will assume a sp3 arrangement here - 2. two heavy atoms and one hydrogen: this can either be sp2 or sp3 - 3. two heavy atoms and no hydrogens - - ARGUMENTS: - coords - an list of atom coordinates the format is [x1, y1, z1, x2, y2, z3 ....] - adjMat - adjacency martix of the compound - atomNames - element names of the atoms in the compound - featAtoms - list of atoms that are part of the feature - scale - length of the direction vector - """ - - assert len(featAtoms) == 1 - aid = featAtoms[0] - cpt = array(coords[3*aid:3*aid+3]) - # find the two atoms that are neighbors of this atoms - nbrs = findNeighbors(aid, adjMat) - assert len(nbrs) >= 2 - - hydrogens = _findHydAtoms(nbrs, atomNames) - - if len(nbrs) == 2: - # there should be no hydrogens in this case - assert len(hydrogens) == 0 - # in this case the direction is the opposite of the average vector of the two neighbors - bvec = _findAvgVec(coords, aid, nbrs) - bvec *= (-1.0*scale) - bvec += cpt - return ((cpt, bvec),), 'linear' - - elif len(nbrs) == 3: - assert len(hydrogens) == 1 - # this is a little more tricky we have to check if the hydrogen is in the plane of the - # two heavy atoms (i.e. sp2 arrangement) or out of plane (sp3 arrangement) - - # one of the directions will be from hydrogen atom to the heavy atom - hid = hydrogens[0] - bvec = array(coords[3*hid:3*hid+3]) - bvec -= cpt - bvec /= (sqrt(innerproduct(bvec, bvec))) - bvec *= scale - bvec += cpt - if _checkPlanarity(coords, aid, nbrs): - # only the hydrogen atom direction needs to be used - return ((cpt, bvec),), 'linear' - else : - # we have a non-planar configuration - we will assume sp3 and compute a second direction vector - ovec = _findAvgVec(coords, aid, nbrs) - ovec *= (-1.0*scale) - ovec += cpt - return ((cpt, bvec), (cpt, ovec),), 'linear' - - elif len(nbrs) >= 4 : - # in this case we should have two or more hydrogens we will simple use there directions - res = [] - for hid in hydrogenss: - bvec = array(coords[3*(hid):3*hid+3]) - bvec -= cpt - bvec /= (sqrt(innerproduct(bvec, bvec))) - bvec *= scale - bvec += cpt - res.append((cpt, bvec)) - return tuple(res), 'linear' - -def GetDonor1FeatVects(coords, adjMat, atomNames, featAtoms, scale=1.5) : - """ - Get the direction vectors for Donor of type 1 - - This is a donor with one heavy atom. It is not clear where we should we should be putting the - direction vector for this. It should probably be a cone. In this case we will just use the - direction vector from the donor atom to the heavy atom - - ARGUMENTS: - coords - an list of atom coordinates the format is [x1, y1, z1, x2, y2, z3 ....] - adjMat - adjacency martix of the compound - atomNames - element names of the atoms in the compound - featAtoms - list of atoms that are part of the feature - scale - length of the direction vector - """ - assert len(featAtoms) == 1 - aid = featAtoms[0] - nbrs = findNeighbors(aid, adjMat) - - # find the neighboring heavy atom - hnbr = -1 - for nid in nbrs: - if atomNames[nid] != 'H': - hnbr = nid - break - - cpt = array(coords[3*aid:3*aid+3]) - v1 = array(coords[3*hnbr:3*hnbr+3]) - v1 -= cpt - v1 /= (sqrt(innerproduct(v1, v1))) - v1 *= (-1.0*scale) - v1 += cpt - return ((cpt, v1),), 'cone' - -def GetAcceptor1FeatVects(coords, adjMat, atomNames, featAtoms, scale=1.5) : - """ - Get the direction vectors for Acceptor of type 1 - - This is a acceptor with one heavy atom neighbor. There are two possibilities we will - consider here - 1. The bond to the heavy atom is a single bond e.g. CO - In this case we don't know the exact direction and we just use the inversion of this bond direction - and mark this direction as a 'cone' - 2. The bond to the heavy atom is a double bond e.g. C=O - In this case the we have two possible direction except in some special cases e.g. SO2 - where again we will use bond direction - - ARGUMENTS: - coords - an list of atom coordinates the format is [x1, y1, z1, x2, y2, z3 ....] - adjMat - adjacency martix of the compound - atomNames - element names of the atoms in the compound - featAtoms - list of atoms that are part of the feature - scale - length of the direction vector - """ - assert len(featAtoms) == 1 - aid = featAtoms[0] - nbrs = findNeighbors(aid, adjMat) - cpt = array(coords[3*aid:3*aid+3]) - # find the adjacent heavy atom - heavyAt = -1 - for nbr in nbrs: - if atomNames[nbr] != 'H': - heavyAt = nbr - break - - singleBnd = 1 - if adjMat[aid][heavyAt] >= 1.5: - singleBnd = 0 - - # special scale - if the heavy atom is a sulfer (we sould proabably check phosphorous as well - sulfer = 0 - if atomNames[heavyAt] == 'S': - sulfer = 1 - - if singleBnd or sulfer: - v1 = array(coords[3*heavyAt:3*heavyAt+3]) - v1 -= cpt - v1 /= (sqrt(innerproduct(v1, v1))) - v1 *= (-1.0*scale) - v1 += cpt - return ((cpt, v1),), 'cone' - - else : - # ok in this case we will assume that - # heavy atom is sp2 hybridized and the diretion vectors (two of them) - # are in the same plane, we will find this plane by looking for one - # of the neighbors of the heavy atom - hvNbrs = findNeighbors(heavyAt, adjMat) - hvNbr = -1 - for nbr in hvNbrs: - if (nbr != aid) : - hvNbr = nbr - break - - pt1 = array(coords[3*hvNbr:3*hvNbr+3]) - v1 = array(coords[3*heavyAt:3*heavyAt+3]) - pt1 -= v1 - v1 -= cpt - rotAxis = cross(v1, pt1) - rotAxis /= sqrt(innerproduct(rotAxis, rotAxis)) - bv1 = ArbAxisRotation(120, rotAxis, v1) - bv1 /= sqrt(innerproduct(bv1, bv1)) - bv1 *= scale - bv1 += cpt - bv2 = ArbAxisRotation(-120, rotAxis, v1) - bv2 /= sqrt(innerproduct(bv2, bv2)) - bv2 *= scale - bv2 += cpt - return ((cpt, bv1), (cpt, bv2),), 'linear' - +# $Id$ +# +# Copyright (C) 2004-2006 Rational Discovery LLC +# +# @@ All Rights Reserved @@ +# +from Numeric import * +import math + +# BIG NOTE: we are going assume atom IDs starting from 0 instead of 1 +# for all the functions in this file. This is so that they +# are reasonably indepedent of the combicode. However when using +# with combicode the caller needs to make sure the atom IDs from combicode +# are corrected before feeding them in here. + +def cross(v1,v2): + res = array([ v1[1]*v2[2] - v1[2]*v2[1], + -v1[0]*v2[2] + v1[2]*v2[0], + v1[0]*v2[1] - v1[1]*v2[0]],Float) + return res + +def findNeighbors(atomId, adjMat): + """ + Find the IDs of the neighboring atom IDs + + ARGUMENTS: + atomId - atom of interest + adjMat - adjacency matrix for the compound + """ + nbrs = [] + for i,nid in enumerate(adjMat[atomId]): + if nid >= 1 : + nbrs.append(i) + return nbrs + +def _findAvgVec(coords, center, nbrs) : + # find the average of the normalized vectors going from the center atoms to the + # neighbors + # the average vector is also normalized + cpt = array(coords[3*center:3*center+3]) + avgVec = 0 + for nid in nbrs: + pt = array(coords[3*nid:3*nid+3]) + pt -= cpt + pt /= (sqrt(innerproduct(pt, pt))) + if (avgVec == 0) : + avgVec = pt + else : + avgVec += pt + + avgVec /= (sqrt(innerproduct(avgVec, avgVec))) + return avgVec + +def GetAromaticFeatVects(coords, featAtoms, featLoc, scale=1.5): + """ + Compute the direction vector for an aromatic feature + + ARGUMENTS: + coords - an list of atom coordinates the format is [x1, y1, z1, x2, y2, z3 ....] + featAtoms - list of atom IDs that make up the feature + featLoc - location of the aromatic feature specified as a numeric array + scale - the size of the direction vector + """ + dirType = 'linear' + head = featLoc + ats = [coords[3*(x):3*(x)+3] for x in featAtoms] + + p0 = array(ats[0]) + p1 = array(ats[1]) + v1 = p0-head + v2 = p1-head + norm1 = cross(v1,v2) + norm1 = norm1/sqrt(innerproduct(norm1,norm1)) + norm1 *= scale + norm2 = -norm1 + norm1 += head + norm2 += head + return ( (head,norm1),(head,norm2) ), dirType + +def ArbAxisRotation(theta,ax,pt): + theta = math.pi*theta/180 + c = math.cos(theta) + s = math.sin(theta) + t = 1-c + X = ax[0] + Y = ax[1] + Z = ax[2] + mat = [ [t*X*X+c, t*X*Y+s*Z, t*X*Z-s*Y], + [t*X*Y-s*Z,t*Y*Y+c,t*Y*Z+s*X], + [t*X*Z+s*Y,t*Y*Z-s*X,t*Z*Z+c] ] + mat = array(mat) + return matrixmultiply(mat,pt) + +def GetAcceptor2FeatVects(coords, adjMat, atomNames, featAtoms, scale=1.5): + """ + Get the direction vectors for Acceptor of type 2 + + This is the acceptor with two adjacent heavy atoms. We will special case a few things here. + If the acceptor atom is an oxygen we will assume a sp3 hybridization + the acceptor directions (two of them) + reflect that configurations. Otherwise the direction vector in plane with the neighboring + heavy atoms + + ARGUMENTS: + coords - an list of atom coordinates the format is [x1, y1, z1, x2, y2, z3 ....] + adjMat - adjacency martix of the compound + featAtoms - list of atoms that are part of the feature + scale - length of the direction vector + """ + assert len(featAtoms) == 1 + aid = featAtoms[0] + cpt = array(coords[3*aid:3*aid+3]) + nbrs = findNeighbors(aid, adjMat) + assert len(nbrs) == 2 + + bvec = _findAvgVec(coords, aid, nbrs) + bvec *= (-1.0*scale) + #bvec += cpt + + if (atomNames[aid] == 'O') : + # assume sp3 + # we will create two vectors by rotating bvec by half the tetrahedral angle in either directions + v1 = array(coords[3*nbrs[0]:3*nbrs[0]+3]) + v1 -= cpt + v2 = array(coords[3*nbrs[1]:3*nbrs[1]+3]) + v2 -= cpt + rotAxis = v1 - v2 + rotAxis /= sqrt(innerproduct(rotAxis, rotAxis)) + bv1 = ArbAxisRotation(54.5, rotAxis, bvec) + bv1 += cpt + bv2 = ArbAxisRotation(-54.5, rotAxis, bvec) + bv2 += cpt + return ((cpt, bv1), (cpt, bv2),), 'linear' + else : + bvec += cpt + return ((cpt, bvec),), 'linear' + +def GetDonor3FeatVects(coords, adjMat, featAtoms, scale=1.5): + """ + Get the direction vectors for Donor of type 3 + + This is a donor with three heavy atoms as neighbors. We will assume + a tetrahedral arrangement of these neighbors. So the direction we are seeking + is the last fourth arm of the sp3 arrangment + + ARGUMENTS: + coords - an list of atom coordinates the format is [x1, y1, z1, x2, y2, z3 ....] + adjMat - adjacency martix of the compound + featAtoms - list of atoms that are part of the feature + scale - length of the direction vector + """ + assert len(featAtoms) == 1 + aid = featAtoms[0] + cpt = array(coords[3*aid:3*aid+3]) + nbrs = findNeighbors(aid, adjMat) + + bvec = _findAvgVec(coords, aid, nbrs) + bvec *= (-1.0*scale) + bvec += cpt + return ((cpt, bvec),), 'linear' + +def _findHydAtoms(nbrs, atomNames): + hAtoms = [] + for nid in nbrs: + if atomNames[nid] == 'H': + hAtoms.append(nid) + return hAtoms + +def _checkPlanarity(coords, center, nbrs, tol=1.0e-3): + assert len(nbrs) == 3 + cpt = array(coords[3*center:3*center+3]) + v1 = array(coords[3*nbrs[0]:3*nbrs[0]+3]) + v1 -= cpt + v2 = array(coords[3*nbrs[1]:3*nbrs[1]+3]) + v2 -= cpt + v3 = array(coords[3*nbrs[2]:3*nbrs[2]+3]) + v3 -= cpt + normal = cross(v1,v2) + dotP = abs(innerproduct(v3, normal)) + + if (dotP <= tol) : + return 1 + else : + return 0 + +def GetDonor2FeatVects(coords, adjMat, atomNames, featAtoms, scale=1.5) : + """ + Get the direction vectors for Donor of type 2 + + This is a donor with two heavy atoms as neighbors. The atom may are may not have + hydrogen on it. Here are the situations with the neighbors that will be considered here + 1. two heavy atoms and two hydrogens: we will assume a sp3 arrangement here + 2. two heavy atoms and one hydrogen: this can either be sp2 or sp3 + 3. two heavy atoms and no hydrogens + + ARGUMENTS: + coords - an list of atom coordinates the format is [x1, y1, z1, x2, y2, z3 ....] + adjMat - adjacency martix of the compound + atomNames - element names of the atoms in the compound + featAtoms - list of atoms that are part of the feature + scale - length of the direction vector + """ + + assert len(featAtoms) == 1 + aid = featAtoms[0] + cpt = array(coords[3*aid:3*aid+3]) + # find the two atoms that are neighbors of this atoms + nbrs = findNeighbors(aid, adjMat) + assert len(nbrs) >= 2 + + hydrogens = _findHydAtoms(nbrs, atomNames) + + if len(nbrs) == 2: + # there should be no hydrogens in this case + assert len(hydrogens) == 0 + # in this case the direction is the opposite of the average vector of the two neighbors + bvec = _findAvgVec(coords, aid, nbrs) + bvec *= (-1.0*scale) + bvec += cpt + return ((cpt, bvec),), 'linear' + + elif len(nbrs) == 3: + assert len(hydrogens) == 1 + # this is a little more tricky we have to check if the hydrogen is in the plane of the + # two heavy atoms (i.e. sp2 arrangement) or out of plane (sp3 arrangement) + + # one of the directions will be from hydrogen atom to the heavy atom + hid = hydrogens[0] + bvec = array(coords[3*hid:3*hid+3]) + bvec -= cpt + bvec /= (sqrt(innerproduct(bvec, bvec))) + bvec *= scale + bvec += cpt + if _checkPlanarity(coords, aid, nbrs): + # only the hydrogen atom direction needs to be used + return ((cpt, bvec),), 'linear' + else : + # we have a non-planar configuration - we will assume sp3 and compute a second direction vector + ovec = _findAvgVec(coords, aid, nbrs) + ovec *= (-1.0*scale) + ovec += cpt + return ((cpt, bvec), (cpt, ovec),), 'linear' + + elif len(nbrs) >= 4 : + # in this case we should have two or more hydrogens we will simple use there directions + res = [] + for hid in hydrogenss: + bvec = array(coords[3*(hid):3*hid+3]) + bvec -= cpt + bvec /= (sqrt(innerproduct(bvec, bvec))) + bvec *= scale + bvec += cpt + res.append((cpt, bvec)) + return tuple(res), 'linear' + +def GetDonor1FeatVects(coords, adjMat, atomNames, featAtoms, scale=1.5) : + """ + Get the direction vectors for Donor of type 1 + + This is a donor with one heavy atom. It is not clear where we should we should be putting the + direction vector for this. It should probably be a cone. In this case we will just use the + direction vector from the donor atom to the heavy atom + + ARGUMENTS: + coords - an list of atom coordinates the format is [x1, y1, z1, x2, y2, z3 ....] + adjMat - adjacency martix of the compound + atomNames - element names of the atoms in the compound + featAtoms - list of atoms that are part of the feature + scale - length of the direction vector + """ + assert len(featAtoms) == 1 + aid = featAtoms[0] + nbrs = findNeighbors(aid, adjMat) + + # find the neighboring heavy atom + hnbr = -1 + for nid in nbrs: + if atomNames[nid] != 'H': + hnbr = nid + break + + cpt = array(coords[3*aid:3*aid+3]) + v1 = array(coords[3*hnbr:3*hnbr+3]) + v1 -= cpt + v1 /= (sqrt(innerproduct(v1, v1))) + v1 *= (-1.0*scale) + v1 += cpt + return ((cpt, v1),), 'cone' + +def GetAcceptor1FeatVects(coords, adjMat, atomNames, featAtoms, scale=1.5) : + """ + Get the direction vectors for Acceptor of type 1 + + This is a acceptor with one heavy atom neighbor. There are two possibilities we will + consider here + 1. The bond to the heavy atom is a single bond e.g. CO + In this case we don't know the exact direction and we just use the inversion of this bond direction + and mark this direction as a 'cone' + 2. The bond to the heavy atom is a double bond e.g. C=O + In this case the we have two possible direction except in some special cases e.g. SO2 + where again we will use bond direction + + ARGUMENTS: + coords - an list of atom coordinates the format is [x1, y1, z1, x2, y2, z3 ....] + adjMat - adjacency martix of the compound + atomNames - element names of the atoms in the compound + featAtoms - list of atoms that are part of the feature + scale - length of the direction vector + """ + assert len(featAtoms) == 1 + aid = featAtoms[0] + nbrs = findNeighbors(aid, adjMat) + cpt = array(coords[3*aid:3*aid+3]) + # find the adjacent heavy atom + heavyAt = -1 + for nbr in nbrs: + if atomNames[nbr] != 'H': + heavyAt = nbr + break + + singleBnd = 1 + if adjMat[aid][heavyAt] >= 1.5: + singleBnd = 0 + + # special scale - if the heavy atom is a sulfer (we sould proabably check phosphorous as well + sulfer = 0 + if atomNames[heavyAt] == 'S': + sulfer = 1 + + if singleBnd or sulfer: + v1 = array(coords[3*heavyAt:3*heavyAt+3]) + v1 -= cpt + v1 /= (sqrt(innerproduct(v1, v1))) + v1 *= (-1.0*scale) + v1 += cpt + return ((cpt, v1),), 'cone' + + else : + # ok in this case we will assume that + # heavy atom is sp2 hybridized and the diretion vectors (two of them) + # are in the same plane, we will find this plane by looking for one + # of the neighbors of the heavy atom + hvNbrs = findNeighbors(heavyAt, adjMat) + hvNbr = -1 + for nbr in hvNbrs: + if (nbr != aid) : + hvNbr = nbr + break + + pt1 = array(coords[3*hvNbr:3*hvNbr+3]) + v1 = array(coords[3*heavyAt:3*heavyAt+3]) + pt1 -= v1 + v1 -= cpt + rotAxis = cross(v1, pt1) + rotAxis /= sqrt(innerproduct(rotAxis, rotAxis)) + bv1 = ArbAxisRotation(120, rotAxis, v1) + bv1 /= sqrt(innerproduct(bv1, bv1)) + bv1 *= scale + bv1 += cpt + bv2 = ArbAxisRotation(-120, rotAxis, v1) + bv2 /= sqrt(innerproduct(bv2, bv2)) + bv2 *= scale + bv2 += cpt + return ((cpt, bv1), (cpt, bv2),), 'linear' + diff --git a/Python/Chem/Features/FeatDirUtilsRD.py b/Python/Chem/Features/FeatDirUtilsRD.py new file mode 100644 index 000000000..4fa3929ff --- /dev/null +++ b/Python/Chem/Features/FeatDirUtilsRD.py @@ -0,0 +1,401 @@ +# $Id$ +# +# Copyright (C) 2004-2006 Rational Discovery LLC +# +# @@ All Rights Reserved @@ +# +import Geometry +import Chem +from Numeric import * +import math + +# BIG NOTE: we are going assume atom IDs starting from 0 instead of 1 +# for all the functions in this file. This is so that they +# are reasonably indepedent of the combicode. However when using +# with combicode the caller needs to make sure the atom IDs from combicode +# are corrected before feeding them in here. + +def cross(v1,v2): + res = array([ v1[1]*v2[2] - v1[2]*v2[1], + -v1[0]*v2[2] + v1[2]*v2[0], + v1[0]*v2[1] - v1[1]*v2[0]],Float) + return res + +def findNeighbors(atomId, adjMat): + """ + Find the IDs of the neighboring atom IDs + + ARGUMENTS: + atomId - atom of interest + adjMat - adjacency matrix for the compound + """ + nbrs = [] + for i,nid in enumerate(adjMat[atomId]): + if nid >= 1 : + nbrs.append(i) + return nbrs + +def _findAvgVec(conf, center, nbrs) : + # find the average of the normalized vectors going from the center atoms to the + # neighbors + # the average vector is also normalized + avgVec = 0 + for nbr in nbrs: + nid = nbr.GetIdx() + pt = conf.GetAtomPosition(nid) + pt -= center + pt.Normalize() + if (avgVec == 0) : + avgVec = pt + else : + avgVec += pt + + avgVec.Normalize() + return avgVec + +def GetAromaticFeatVects(conf, featAtoms, featLoc, scale=1.5): + """ + Compute the direction vector for an aromatic feature + + ARGUMENTS: + conf - a conformer + featAtoms - list of atom IDs that make up the feature + featLoc - location of the aromatic feature specified as point3d + scale - the size of the direction vector + """ + dirType = 'linear' + head = featLoc + ats = [conf.GetAtomPosition(x) for x in featAtoms] + + p0 = ats[0] + p1 = ats[1] + v1 = p0-head + v2 = p1-head + norm1 = v1.CrossProduct(v2) + norm1.Normalize() + norm1 *= scale + #norm2 = norm1 + norm2 = head-norm1 + norm1 += head + return ( (head,norm1),(head,norm2) ), dirType + +def ArbAxisRotation(theta,ax,pt): + theta = math.pi*theta/180 + c = math.cos(theta) + s = math.sin(theta) + t = 1-c + X = ax.x + Y = ax.y + Z = ax.z + mat = [ [t*X*X+c, t*X*Y+s*Z, t*X*Z-s*Y], + [t*X*Y-s*Z,t*Y*Y+c,t*Y*Z+s*X], + [t*X*Z+s*Y,t*Y*Z-s*X,t*Z*Z+c] ] + mat = array(mat) + if isinstance(pt,Geometry.Point3D): + pt = array((pt.x,pt.y,pt.z)) + tmp = matrixmultiply(mat,pt) + res=Geometry.Point3D(tmp[0],tmp[1],tmp[2]) + elif isinstance(pt,list) or isinstance(pt,tuple): + pts = pt + res = [] + for pt in pts: + pt = array((pt.x,pt.y,pt.z)) + tmp = matrixmultiply(mat,pt) + res.append(Geometry.Point3D(tmp[0],tmp[1],tmp[2])) + else: + res=None + return res + + + + + +def GetAcceptor2FeatVects(conf, featAtoms, scale=1.5): + """ + Get the direction vectors for Acceptor of type 2 + + This is the acceptor with two adjacent heavy atoms. We will special case a few things here. + If the acceptor atom is an oxygen we will assume a sp3 hybridization + the acceptor directions (two of them) + reflect that configurations. Otherwise the direction vector in plane with the neighboring + heavy atoms + + ARGUMENTS: + featAtoms - list of atoms that are part of the feature + scale - length of the direction vector + """ + assert len(featAtoms) == 1 + aid = featAtoms[0] + cpt = conf.GetAtomPosition(aid) + + mol = conf.GetOwningMol() + nbrs = mol.GetAtomWithIdx(aid).GetNeighbors() + assert len(nbrs) == 2 + + bvec = _findAvgVec(conf, cpt, nbrs) + bvec *= (-1.0*scale) + + if (mol.GetAtomWithIdx(aid).GetAtomicNum()==8): + # assume sp3 + # we will create two vectors by rotating bvec by half the tetrahedral angle in either directions + v1 = conf.GetAtomPosition(nbrs[0].GetIdx()) + v1 -= cpt + v2 = conf.GetAtomPosition(nbrs[1].GetIdx()) + v2 -= cpt + rotAxis = v1 - v2 + rotAxis.Normalize() + bv1 = ArbAxisRotation(54.5, rotAxis, bvec) + bv1 += cpt + bv2 = ArbAxisRotation(-54.5, rotAxis, bvec) + bv2 += cpt + return ((cpt, bv1), (cpt, bv2),), 'linear' + else : + bvec += cpt + return ((cpt, bvec),), 'linear' + +def _GetTetrahedralFeatVect(conf,aid,scale): + mol = conf.GetOwningMol() + + cpt = conf.GetAtomPosition(aid) + nbrs = mol.GetAtomWithIdx(aid).GetNeighbors() + if not _checkPlanarity(conf,cpt,nbrs,tol=0.1): + bvec = _findAvgVec(conf, cpt, nbrs) + bvec *= (-1.0*scale) + bvec += cpt + res = ((cpt,bvec),) + else: + res = () + return res + +def GetDonor3FeatVects(conf, featAtoms, scale=1.5): + """ + Get the direction vectors for Donor of type 3 + + This is a donor with three heavy atoms as neighbors. We will assume + a tetrahedral arrangement of these neighbors. So the direction we are seeking + is the last fourth arm of the sp3 arrangment + + ARGUMENTS: + featAtoms - list of atoms that are part of the feature + scale - length of the direction vector + """ + assert len(featAtoms) == 1 + aid = featAtoms[0] + + tfv = _GetTetrahedralFeatVect(conf,aid,scale) + return tfv, 'linear' + +def GetAcceptor3FeatVects(conf, featAtoms, scale=1.5): + """ + Get the direction vectors for Donor of type 3 + + This is a donor with three heavy atoms as neighbors. We will assume + a tetrahedral arrangement of these neighbors. So the direction we are seeking + is the last fourth arm of the sp3 arrangment + + ARGUMENTS: + featAtoms - list of atoms that are part of the feature + scale - length of the direction vector + """ + assert len(featAtoms) == 1 + aid = featAtoms[0] + tfv = _GetTetrahedralFeatVect(conf,aid,scale) + return tfv, 'linear' + +def _findHydAtoms(nbrs, atomNames): + hAtoms = [] + for nid in nbrs: + if atomNames[nid] == 'H': + hAtoms.append(nid) + return hAtoms + +def _checkPlanarity(conf, cpt, nbrs, tol=1.0e-3): + assert len(nbrs) == 3 + v1 = conf.GetAtomPosition(nbrs[0].GetIdx()) + v1 -= cpt + v2 = conf.GetAtomPosition(nbrs[1].GetIdx()) + v2 -= cpt + v3 = conf.GetAtomPosition(nbrs[2].GetIdx()) + v3 -= cpt + normal = v1.CrossProduct(v2) + dotP = abs(v3.DotProduct(normal)) + if (dotP <= tol) : + return 1 + else : + return 0 + +def GetDonor2FeatVects(conf, featAtoms, scale=1.5) : + """ + Get the direction vectors for Donor of type 2 + + This is a donor with two heavy atoms as neighbors. The atom may are may not have + hydrogen on it. Here are the situations with the neighbors that will be considered here + 1. two heavy atoms and two hydrogens: we will assume a sp3 arrangement here + 2. two heavy atoms and one hydrogen: this can either be sp2 or sp3 + 3. two heavy atoms and no hydrogens + + ARGUMENTS: + featAtoms - list of atoms that are part of the feature + scale - length of the direction vector + """ + assert len(featAtoms) == 1 + aid = featAtoms[0] + mol = conf.GetOwningMol() + + cpt = conf.GetAtomPosition(aid) + + # find the two atoms that are neighbors of this atoms + nbrs = mol.GetAtomWithIdx(aid).GetNeighbors() + assert len(nbrs) >= 2 + + hydrogens = [] + for nbr in nbrs: + if nbr.GetAtomicNum()<=1: + hydrogens.append(nbr) + + if len(nbrs) == 2: + # there should be no hydrogens in this case + assert len(hydrogens) == 0 + # in this case the direction is the opposite of the average vector of the two neighbors + bvec = _findAvgVec(conf, cpt, nbrs) + bvec *= (-1.0*scale) + bvec += cpt + return ((cpt, bvec),), 'linear' + elif len(nbrs) == 3: + assert len(hydrogens) == 1 + # this is a little more tricky we have to check if the hydrogen is in the plane of the + # two heavy atoms (i.e. sp2 arrangement) or out of plane (sp3 arrangement) + + # one of the directions will be from hydrogen atom to the heavy atom + hid = hydrogens[0].GetIdx() + bvec = conf.GetAtomPosition(hid) + bvec -= cpt + bvec.Normalize() + bvec *= scale + bvec += cpt + if _checkPlanarity(conf, cpt, nbrs): + # only the hydrogen atom direction needs to be used + return ((cpt, bvec),), 'linear' + else : + # we have a non-planar configuration - we will assume sp3 and compute a second direction vector + ovec = _findAvgVec(conf, cpt, nbrs) + ovec *= (-1.0*scale) + ovec += cpt + return ((cpt, bvec), (cpt, ovec),), 'linear' + + elif len(nbrs) >= 4 : + # in this case we should have two or more hydrogens we will simple use there directions + res = [] + for hid in hydrogenss: + bvec = array(coords[3*(hid):3*hid+3]) + bvec -= cpt + bvec /= (sqrt(innerproduct(bvec, bvec))) + bvec *= scale + bvec += cpt + res.append((cpt, bvec)) + return tuple(res), 'linear' + +def GetDonor1FeatVects(conf, featAtoms, scale=1.5) : + """ + Get the direction vectors for Donor of type 1 + + This is a donor with one heavy atom. It is not clear where we should we should be putting the + direction vector for this. It should probably be a cone. In this case we will just use the + direction vector from the donor atom to the heavy atom + + ARGUMENTS: + + featAtoms - list of atoms that are part of the feature + scale - length of the direction vector + """ + assert len(featAtoms) == 1 + aid = featAtoms[0] + mol = conf.GetOwningMol() + nbrs = mol.GetAtomWithIdx(aid).GetNeighbors() + + # find the neighboring heavy atom + hnbr = -1 + for nbr in nbrs: + if nbr.GetAtomicNum()>1: + hnbr = nbr.GetIdx() + break + + cpt = conf.GetAtomPosition(aid) + v1 = conf.GetAtomPosition(hnbr) + v1 -= cpt + v1.Normalize() + v1 *= (-1.0*scale) + v1 += cpt + return ((cpt, v1),), 'cone' + +def GetAcceptor1FeatVects(conf, featAtoms, scale=1.5) : + """ + Get the direction vectors for Acceptor of type 1 + + This is a acceptor with one heavy atom neighbor. There are two possibilities we will + consider here + 1. The bond to the heavy atom is a single bond e.g. CO + In this case we don't know the exact direction and we just use the inversion of this bond direction + and mark this direction as a 'cone' + 2. The bond to the heavy atom is a double bond e.g. C=O + In this case the we have two possible direction except in some special cases e.g. SO2 + where again we will use bond direction + + ARGUMENTS: + featAtoms - list of atoms that are part of the feature + scale - length of the direction vector + """ + assert len(featAtoms) == 1 + aid = featAtoms[0] + mol = conf.GetOwningMol() + nbrs = mol.GetAtomWithIdx(aid).GetNeighbors() + + cpt = conf.GetAtomPosition(aid) + + # find the adjacent heavy atom + heavyAt = -1 + for nbr in nbrs: + if nbr.GetAtomicNum()>1: + heavyAt = nbr + break + + singleBnd = mol.GetBondBetweenAtoms(aid,heavyAt.GetIdx()).GetBondType() > Chem.BondType.SINGLE + + # special scale - if the heavy atom is a sulfur (we should proabably check phosphorous as well) + sulfur = heavyAt.GetAtomicNum()==16 + + if singleBnd or sulfur: + v1 = conf.GetAtomPosition(heavyAt.GetIdx()) + v1 -= cpt + v1.Normalize() + v1 *= (-1.0*scale) + v1 += cpt + return ((cpt, v1),), 'cone' + else : + # ok in this case we will assume that + # heavy atom is sp2 hybridized and the direction vectors (two of them) + # are in the same plane, we will find this plane by looking for one + # of the neighbors of the heavy atom + hvNbrs = heavyAt.GetNeighbors() + hvNbr = -1 + for nbr in hvNbrs: + if nbr.GetIdx() != aid: + hvNbr = nbr + break + + pt1 = conf.GetAtomPosition(hvNbr.GetIdx()) + v1 = conf.GetAtomPosition(heavyAt.GetIdx()) + pt1 -= v1 + v1 -= cpt + rotAxis = v1.CrossProduct(pt1) + rotAxis.Normalize() + bv1 = ArbAxisRotation(120, rotAxis, v1) + bv1.Normalize() + bv1 *= scale + bv1 += cpt + bv2 = ArbAxisRotation(-120, rotAxis, v1) + bv2.Normalize() + bv2 *= scale + bv2 += cpt + return ((cpt, bv1), (cpt, bv2),), 'linear' + diff --git a/Python/Chem/Features/ShowFeats.py b/Python/Chem/Features/ShowFeats.py new file mode 100644 index 000000000..13cd9c714 --- /dev/null +++ b/Python/Chem/Features/ShowFeats.py @@ -0,0 +1,316 @@ +# $Id$ +# +# Created by Greg Landrum Aug 2006 +# +# +_version = "0.1.0" + +_usage=""" + ShowFeats [optional args] + +Optional arguments: + -x "list": specify a comma-separated list of feature types to not be excluded from + the visualization + + -f "fdef": + --fdef="fdef": specify the name of the fdef file. + + --sd: + --sdf: expect the input to be one or more SD files (otherwise mol files are expected) + + --dirs: do not calculate and display feature directions + --heads: do not put heads on the feature-direction arrows (only cylinders will be drawn) + +""" + +_welcomeMessage="This is ShowFeats version %s"%(_version) + + +import math + +import Geometry +from Chem.Features import FeatDirUtilsRD as FeatDirUtils +_featColors = { + 'Donor':(0,1,1), + 'Acceptor':(1,0,1), + 'NegIonizable':(1,0,0), + 'PosIonizable':(0,0,1), + 'ZnBinder':(1,.5,.5), + 'Aromatic':(1,.8,.2), + 'LumpedHydrophobe':(.5,.25,0), + 'Hydrophobe':(.5,.25,0), + } + +def _getVectNormal(v,tol=1e-4): + if math.fabs(v.x)>tol: + res = Geometry.Point3D(v.y,-v.x,0) + elif math.fabs(v.y)>tol: + res = Geometry.Point3D(-v.y,v.x,0) + elif math.fabs(v.z)>tol: + res = Geometry.Point3D(1,0,0) + else: + raise ValueError,'cannot find normal to the null vector' + res.Normalize() + return res + +_canonArrowhead=None +def _buildCanonArrowhead(headFrac,nSteps,aspect): + global _canonArrowhead + startP = RDGeometry.Point3D(0,0,headFrac) + _canonArrowhead=[startP] + + scale = headFrac*aspect + baseV = RDGeometry.Point3D(scale,0,0) + _canonArrowhead.append(baseV) + + twopi = 2*math.pi + for i in range(1,nSteps): + v = RDGeometry.Point3D(scale*math.cos(i*twopi),scale*math.sin(i*twopi),0) + _canonArrowhead.append(v) + + +_globalArrowCGO=[] +_globalSphereCGO=[] +# taken from pymol's cgo.py +BEGIN=2 +END=3 +TRIANGLE_FAN=6 +COLOR=6 +VERTEX=4 +NORMAL=5 +SPHERE=7 +CYLINDER=9 +ALPHA=25 + +def _cgoArrowhead(viewer,tail,head,radius,color,label,headFrac=0.3,nSteps=10,aspect=.5): + global _globalArrowCGO + delta = head-tail + normal = _getVectNormal(delta) + delta.Normalize() + + dv = head-tail + dv.Normalize() + dv *= headFrac + startP = head + + normal*=headFrac*aspect + + cgo = [BEGIN,TRIANGLE_FAN, + COLOR,color[0],color[1],color[2], + NORMAL,dv.x,dv.y,dv.z, + VERTEX,head.x+dv.x,head.y+dv.y,head.z+dv.z] + base = [BEGIN,TRIANGLE_FAN, + COLOR,color[0],color[1],color[2], + NORMAL,-dv.x,-dv.y,-dv.z, + VERTEX,head.x,head.y,head.z] + v = startP+normal + cgo.extend([NORMAL,normal.x,normal.y,normal.z]) + cgo.extend([VERTEX,v.x,v.y,v.z]) + base.extend([VERTEX,v.x,v.y,v.z]) + for i in range(1,nSteps): + v = FeatDirUtils.ArbAxisRotation(360./nSteps*i,delta,normal) + cgo.extend([NORMAL,v.x,v.y,v.z]) + v += startP + cgo.extend([VERTEX,v.x,v.y,v.z]) + base.extend([VERTEX,v.x,v.y,v.z]) + + cgo.extend([NORMAL,normal.x,normal.y,normal.z]) + cgo.extend([VERTEX,startP.x+normal.x,startP.y+normal.y,startP.z+normal.z]) + base.extend([VERTEX,startP.x+normal.x,startP.y+normal.y,startP.z+normal.z]) + cgo.append(END) + base.append(END) + cgo.extend(base) + + #viewer.server.renderCGO(cgo,label) + _globalArrowCGO.extend(cgo) + +def ShowArrow(viewer,tail,head,radius,color,label,transparency=0,includeArrowhead=True): + global _globalArrowCGO + if transparency: + _globalArrowCGO.extend([ALPHA,1-transparency]) + else: + _globalArrowCGO.extend([ALPHA,1]) + _globalArrowCGO.extend([CYLINDER,tail.x,tail.y,tail.z, + head.x,head.y,head.z, + radius*.10, + color[0],color[1],color[2], + color[0],color[1],color[2], + ]) + + if includeArrowhead: + _cgoArrowhead(viewer,tail,head,radius,color,label) + + +def ShowMolFeats(mol,factory,viewer,radius=0.5,confId=-1,showOnly=True, + name='',transparency=0.0,colors=None,excludeTypes=[], + useFeatDirs=True,featLabel=None,dirLabel=None,includeArrowheads=True): + global _globalSphereCGO + if not name: + if mol.HasProp('_Name'): + name = mol.GetProp('_Name') + else: + name = 'molecule' + if not colors: + colors = _featColors + + viewer.ShowMol(mol,name=name,showOnly=showOnly,confId=confId) + + molFeats=factory.GetFeaturesForMol(mol) + if not featLabel: + featLabel='%s-feats'%name + viewer.server.resetCGO(featLabel) + if not dirLabel: + dirLabel=featLabel+"-dirs" + viewer.server.resetCGO(dirLabel) + queueIt=hasattr(viewer,'AddSpheres') + for i,feat in enumerate(molFeats): + family=feat.GetFamily() + if family in excludeTypes: + continue + pos = feat.GetPos(confId) + color = colors.get(family,(.5,.5,.5)) + nm = '%s(%d)'%(family,i+1) + #if transparency: + # viewer.server.sphere((pos.x,pos.y,pos.z),radius,color,featLabel,1,1,transparency) + #else: + # viewer.server.sphere((pos.x,pos.y,pos.z),radius,color,featLabel,1,0) + + if transparency: + _globalSphereCGO.extend([ALPHA,1-transparency]) + else: + _globalSphereCGO.extend([ALPHA,1]) + _globalSphereCGO.extend([COLOR,color[0],color[1],color[2], + SPHERE,pos.x,pos.y,pos.z, + radius]) + + if useFeatDirs: + ps = [] + if family=='Aromatic': + ps,fType = FeatDirUtils.GetAromaticFeatVects(mol.GetConformer(confId), + feat.GetAtomIds(),pos, + scale=1.0) + elif family=='Donor': + aids = feat.GetAtomIds() + if len(aids)==1: + if len(mol.GetAtomWithIdx(aids[0]).GetNeighbors())==1: + ps,fType = FeatDirUtils.GetDonor1FeatVects(mol.GetConformer(confId), + aids,scale=1.0) + elif len(mol.GetAtomWithIdx(aids[0]).GetNeighbors())==2: + ps,fType = FeatDirUtils.GetDonor2FeatVects(mol.GetConformer(confId), + aids,scale=1.0) + elif len(mol.GetAtomWithIdx(aids[0]).GetNeighbors())==3: + ps,fType = FeatDirUtils.GetDonor3FeatVects(mol.GetConformer(confId), + aids,scale=1.0) + elif family=='Acceptor': + aids = feat.GetAtomIds() + if len(aids)==1: + if len(mol.GetAtomWithIdx(aids[0]).GetNeighbors())==1: + ps,fType = FeatDirUtils.GetAcceptor1FeatVects(mol.GetConformer(confId), + aids,scale=1.0) + elif len(mol.GetAtomWithIdx(aids[0]).GetNeighbors())==2: + ps,fType = FeatDirUtils.GetAcceptor2FeatVects(mol.GetConformer(confId), + aids,scale=1.0) + elif len(mol.GetAtomWithIdx(aids[0]).GetNeighbors())==3: + ps,fType = FeatDirUtils.GetAcceptor3FeatVects(mol.GetConformer(confId), + aids,scale=1.0) + + + + for tail,head in ps: + ShowArrow(viewer,tail,head,radius,color,dirLabel, + transparency=transparency,includeArrowhead=includeArrowheads) + + +if __name__=='__main__': + def Usage(): + print >>sys.stderr,_usage + + import sys,os,getopt + import RDConfig + import Chem + from Chem import AllChem + from Chem.PyMol import MolViewer + + try: + args,extras = getopt.getopt(sys.argv[1:],'x:f:', + ('sdf','sd','fdef=','heads','dirs')) + except: + Usage() + sys.exit(1) + + exclude=[] + fdef='BaseFeatures.fdef' + molFormat='mol' + includeArrowheads=True + useDirs=True + for arg,val in args: + if arg=='-x': + val = val.replace('[','').replace('(','').replace(']','').replace(')','') + for ex in val.split(','): + exclude.append(ex) + elif arg in ('-f','--fdef'): + fdef = val + elif arg in ('--sd','--sdf'): + molFormat='sdf' + elif arg in ('--heads',): + includeArrowheads=False + elif arg in ('--dirs',): + useDirs=False + + print >>sys.stderr,_welcomeMessage + + try: + v = MolViewer() + except: + print >>sys.stderr,'ERROR: Unable to connect to PyMol server.\nPlease run ~landrgr1/extern/PyMol/launch.sh to start it.' + sys.exit(1) + v.DeleteAll() + + + + try: + fdef = file(fdef,'r').read() + except IOError: + print >>sys.stderr,'ERROR: Could not open fdef file %s'%fdef + sys.exit(1) + + factory = AllChem.BuildFeatureFactoryFromString(fdef) + + i = 1 + for molN in extras: + featLabel='%s_Feats'%molN + v.server.resetCGO(featLabel) + # this is a big of kludgery to work around what seem to be pymol cgo bug: + v.server.sphere((0,0,0),.01,(1,0,1),featLabel) + dirLabel=featLabel+"-dirs" + if useDirs: + v.server.resetCGO(dirLabel) + # this is a big of kludgery to work around what seem to be pymol cgo bug: + v.server.cylinder((0,0,0),(.01,.01,.01),.01,(1,0,1),dirLabel) + + if molFormat=='mol': + m = Chem.MolFromMolFile(molN) + if not m: + print >>sys.stderr,'Could not parse molecule: %s'%molN + ms = [] + else: + ms = [m] + elif molFormat=='sdf': + ms = Chem.SDMolSupplier(molN) + else: + ms = [] + + for m in ms: + nm = 'Mol_%d'%(i) + if m.HasProp('_Name'): + nm += '_'+m.GetProp('_Name') + ShowMolFeats(m,factory,v,transparency=0.25,excludeTypes=exclude,name=nm,showOnly=False, + useFeatDirs=useDirs, + featLabel=featLabel,dirLabel=dirLabel, + includeArrowheads=includeArrowheads) + i += 1 + if ms: + v.server.renderCGO(_globalSphereCGO,featLabel,1) + if useDirs: + v.server.renderCGO(_globalArrowCGO,dirLabel,1) + sys.exit(0)