diff --git a/Code/ForceField/Contrib.h b/Code/ForceField/Contrib.h index 4d9193059..707912adc 100644 --- a/Code/ForceField/Contrib.h +++ b/Code/ForceField/Contrib.h @@ -3,8 +3,8 @@ // // @@ All Rights Reserved @@ // -#ifndef __RD_CONTRIB_H__ -#define __RD_CONTRIB_H__ +#ifndef __RD_FFCONTRIB_H__ +#define __RD_FFCONTRIB_H__ namespace ForceFields { class ForceField; @@ -18,7 +18,7 @@ namespace ForceFields { //! returns our contribution to the energy of a position virtual double getEnergy(double *pos) const = 0; - //! calculates our contribution to the forces of a position + //! calculates our contribution to the gradients of a position virtual void getGrad(double *pos,double *grad) const = 0; protected: diff --git a/Code/ForceField/ForceField.cpp b/Code/ForceField/ForceField.cpp index 2f06800af..5cf77d3c9 100644 --- a/Code/ForceField/ForceField.cpp +++ b/Code/ForceField/ForceField.cpp @@ -15,6 +15,7 @@ namespace ForceFieldsHelper { double calcEnergy(double *pos){ return _ffHolder->calcEnergy(pos); }; + void calcGrad(double *pos,double *grad){ // the contribs to the gradient function use +=, so we need // to zero the grad out before moving on: @@ -46,13 +47,11 @@ namespace ForceFieldsHelper { namespace ForceFields { ForceField::~ForceField(){ - //std::cerr << " *** destroy force field " << std::endl; d_numPoints=0; d_positions.clear(); d_contribs.clear(); if(dp_distMat) delete [] dp_distMat; dp_distMat=0; - //std::cerr << " *** destroy force field DONE" << std::endl; } double ForceField::distance(int i,int j,double *pos) { @@ -67,19 +66,15 @@ namespace ForceFields { unsigned int idx=i+j*(j+1)/2; CHECK_INVARIANT(idxinitDistanceMatrix(); int N = d_positions.size(); double *pos = new double[d_dimension*N]; - this->scatter(pos,0); + this->scatter(pos); // now loop over the contribs for(ContribPtrVect::const_iterator contrib=d_contribs.begin(); contrib != d_contribs.end();contrib++){ @@ -184,27 +169,24 @@ namespace ForceFields { PRECONDITION(pos,"bad position vector"); double res = 0.0; - this->initDistanceMatrix(); // now loop over the contribs for(ContribPtrVect::const_iterator contrib=d_contribs.begin(); contrib != d_contribs.end();contrib++){ double E=(*contrib)->getEnergy(pos); - //std::cout << "\t" << count++ << " " << E << std::endl; res += E; } - //std::cout << " E: " << res << std::endl; return res; } void ForceField::calcGrad(double *grad) const { PRECONDITION(df_init,"not initialized"); PRECONDITION(grad,"bad gradient vector"); - //this->initDistanceMatrix(); + int N = d_positions.size(); double *pos = new double[d_dimension*N]; - this->scatter(pos,0); + this->scatter(pos); for(ContribPtrVect::const_iterator contrib=d_contribs.begin(); contrib != d_contribs.end();contrib++){ (*contrib)->getGrad(pos,grad); @@ -215,7 +197,6 @@ namespace ForceFields { CHECK_INVARIANT(*itdimension(); ++di) { - //std::cerr << "&&&& set: " << *it << " -> " << idx << std::endl; grad[idx+di] = 0.0; } } @@ -231,24 +212,19 @@ namespace ForceFields { (*contrib)->getGrad(pos,grad); } - for(INT_VECT::const_iterator it=d_fixedPoints.begin(); it!=d_fixedPoints.end();it++){ CHECK_INVARIANT(*it " << idx << std::endl; for (unsigned int di = 0; di < this->dimension(); ++di) { - //std::cerr << "&&&& set: " << *it << " -> " << idx << std::endl; grad[idx+di] = 0.0; } } - } - void ForceField::scatter(double *pos,double *grad) const { + void ForceField::scatter(double *pos) const { PRECONDITION(df_init,"not initialized"); PRECONDITION(pos,"bad position vector"); - //PRECONDITION(grad,"bad gradient vector"); unsigned int tab=0; for(unsigned int i=0;inumPoints() long. Note: - This function is less efficient calcEnergy with postions passed in as double * ; + This function is less efficient than calcGrad with positions passed in the positions need to be converted to double * here */ void calcGrad(double *forces) const; - //! calculates the gradient of the energy at the current position + //! calculates the gradient of the energy at the provided position /*! - \param pos an array of doubles. Should be \c 3*this->numPoints() long. - \param forces an array of doubles. Should be \c 3*this->numPoints() long. + \param pos an array of doubles. Should be \c 3*this->numPoints() long. + \param forces an array of doubles. Should be \c 3*this->numPoints() long. Side effects: - The individual contributions may modify the distance matrix */ void calcGrad(double *pos,double *forces); - //! minimizes the energy of the system by adjusting positions + //! minimizes the energy of the system by following gradients /*! \param maxIts the maximum number of iterations to try \param forceTol the convergence criterion for forces @@ -184,16 +185,14 @@ namespace ForceFields { //! scatter our positions into an array /*! \param pos should be \c 3*this->numPoints() long; - \param forces OBSOLETE */ - void scatter(double *pos,double *forces) const; + void scatter(double *pos) const; //! update our positions from an array /*! \param pos should be \c 3*this->numPoints() long; - \param forces OBSOLETE */ - void gather(double *pos,double *forces); + void gather(double *pos); //! initializes our internal distance matrix void initDistanceMatrix(); diff --git a/Code/GraphMol/ChemTransforms/ChemTransforms.cpp b/Code/GraphMol/ChemTransforms/ChemTransforms.cpp index e969d13df..4aeba1e26 100644 --- a/Code/GraphMol/ChemTransforms/ChemTransforms.cpp +++ b/Code/GraphMol/ChemTransforms/ChemTransforms.cpp @@ -1,6 +1,6 @@ // $Id$ // -// Copyright (C) 2006 Greg Landrum +// Copyright (C) 2006-22007 Greg Landrum // // @@ All Rights Reserved @@ // diff --git a/Code/GraphMol/ChemTransforms/ChemTransforms.h b/Code/GraphMol/ChemTransforms/ChemTransforms.h index 93b9eb9cb..0625294c8 100644 --- a/Code/GraphMol/ChemTransforms/ChemTransforms.h +++ b/Code/GraphMol/ChemTransforms/ChemTransforms.h @@ -1,5 +1,5 @@ // -// Copyright (C) 2006 Greg Landrum +// Copyright (C) 2006-2007 Greg Landrum // // @@ All Rights Reserved @@ // diff --git a/Code/GraphMol/Depictor/EmbeddedFrag.h b/Code/GraphMol/Depictor/EmbeddedFrag.h index 0ec2b0e5d..75f9865cd 100755 --- a/Code/GraphMol/Depictor/EmbeddedFrag.h +++ b/Code/GraphMol/Depictor/EmbeddedFrag.h @@ -1,5 +1,5 @@ // -// Copyright (C) 2003-2006 Rational Discovery LLC +// Copyright (C) 2003-2007 Greg Landrum and Rational Discovery LLC // // @@ All Rights Reserved @@ // @@ -22,7 +22,6 @@ namespace RDDepict { typedef boost::shared_array DOUBLE_SMART_PTR; //! Class that contains the data for an atoms that has alredy been embedded - class EmbeddedAtom { public: typedef enum { @@ -31,7 +30,7 @@ namespace RDDepict { RING} EAtomType; EmbeddedAtom() : aid(0), angle(-1.0), nbr1(-1), nbr2(-1), - CisTransNbr(-1), ccw(true), rotDir(0), d_density(-1.0){ + CisTransNbr(-1), ccw(true), rotDir(0), d_density(-1.0){ neighs.clear(); } @@ -179,7 +178,7 @@ namespace RDDepict { */ void expandEfrag(RDKit::INT_LIST &nratms, std::list &efrags); - + //! Add a new non-ring atom to this object /* ARGUMENTS: @@ -280,7 +279,7 @@ namespace RDDepict { EmbeddedAtom GetEmbeddedAtom(unsigned int aid) const { INT_EATOM_MAP_CI posi = d_eatoms.find(aid); if (posi == d_eatoms.end()) { - PRECONDITION(0, "Embedded atom does not contain embedded atom specified"); + PRECONDITION(0, "Embedded atom does not contain embedded atom specified"); } return posi->second; } @@ -387,7 +386,7 @@ namespace RDDepict { RDGeom::Transform2D computeTwoAtomTrans(unsigned int aid1, unsigned int aid2, - const RDGeom::INT_POINT2D_MAP &nringCor); + const RDGeom::INT_POINT2D_MAP &nringCor); //! Merge a ring with already embedded atoms /*! @@ -422,7 +421,7 @@ namespace RDDepict { \param embFrag the fragment that will be reflected if necessary \param ctCase which fragment if the cis/trans dbl bond - 1 means embFrag is the cis/trans fragment - - 2 mean "this" is the cis/trans fragment + - 2 mean "this" is the cis/trans fragment \param aid1 first atom that forms the plane (line) of reflection \param aid2 seconf atom that forms the plane of reflection */ diff --git a/Code/GraphMol/Depictor/RDDepictor.h b/Code/GraphMol/Depictor/RDDepictor.h index 9a91130d4..3cf04de03 100755 --- a/Code/GraphMol/Depictor/RDDepictor.h +++ b/Code/GraphMol/Depictor/RDDepictor.h @@ -1,11 +1,11 @@ // -// Copyright (C) 2003-2006 Rational Discovery LLC +// Copyright (C) 2003-2007 Rational Discovery LLC // // @@ All Rights Reserved @@ // -#ifndef RDDEPICTOR_H -#define RDDEPICTOR_H +#ifndef _RDDEPICTOR_H_ +#define _RDDEPICTOR_H_ #include #include @@ -17,18 +17,17 @@ namespace RDKit { } namespace RDDepict { - //class EmbeddedFrag; //! \brief This the function the user gets to compute coodinates /*! \param mol the molecule were are interested in \param coordMap a map of int to Point2D, between atom IDs and their locations. - This is the container the user need to fill if he/she wants to - specify coordinates for a portion of the molecule, defualts to 0 + This is the container the user need to fill if he/she wants to + specify coordinates for a portion of the molecule, defualts to 0 \param canonOrient canonicalize the orientation so that the the long axes - align with the x-axis etc. + align with the x-axis etc. \param clearConfs clear all existing conformations on the molecule them adding the - 2D coordinates or simple add to the list + 2D coordinates or simple add to the list \return ID of the conformation added to the molecule cotaining the 2D coordinates diff --git a/Code/GraphMol/Descriptors/MolDescriptors.cpp b/Code/GraphMol/Descriptors/MolDescriptors.cpp index a7c3a854a..2209b45c8 100644 --- a/Code/GraphMol/Descriptors/MolDescriptors.cpp +++ b/Code/GraphMol/Descriptors/MolDescriptors.cpp @@ -26,7 +26,6 @@ namespace RDKit{ if(!onlyHeavy){ res += (*atomIt)->getImplicitValence()*table->getAtomicWeight(1); } - } return res; } diff --git a/Code/GraphMol/Descriptors/MolDescriptors.h b/Code/GraphMol/Descriptors/MolDescriptors.h index 3c6f3c412..854b0ed09 100644 --- a/Code/GraphMol/Descriptors/MolDescriptors.h +++ b/Code/GraphMol/Descriptors/MolDescriptors.h @@ -25,8 +25,6 @@ namespace RDKit{ */ double CalcAMW(const ROMol &mol,bool onlyHeavy=false); - - } // end of namespace Descriptors } //end of namespace RDKit diff --git a/Code/GraphMol/FragCatalog/FragCatalogEntry.cpp b/Code/GraphMol/FragCatalog/FragCatalogEntry.cpp index 377d66454..f4f67ba2a 100755 --- a/Code/GraphMol/FragCatalog/FragCatalogEntry.cpp +++ b/Code/GraphMol/FragCatalog/FragCatalogEntry.cpp @@ -28,7 +28,7 @@ namespace RDKit { d_aToFmap.clear(); setBitId(-1); INT_MAP_INT aIdxMap; // a map from atom id in omol to the new atoms id in mol - dp_mol = PathToSubmol(*omol, path, false, aIdxMap); // Using Subgraphs functionality + dp_mol = Subgraphs::PathToSubmol(*omol, path, false, aIdxMap); // Using Subgraphs functionality d_order = path.size(); // using aIdxMap initialize the location (and their IDs) of the @@ -119,7 +119,7 @@ namespace RDKit { // FIX: if might be better if we just do the balaban first and then // move onto eigen values - PathDiscrimTuple tdiscs, odiscs;// = MolOps::computeDiscriminators(*mol, true) + Subgraphs::PathDiscrimTuple tdiscs, odiscs;// = MolOps::computeDiscriminators(*mol, true) odiscs = other->getDiscrims(); diff --git a/Code/GraphMol/FragCatalog/FragCatalogUtils.cpp b/Code/GraphMol/FragCatalog/FragCatalogUtils.cpp index 2849d5229..3437e939d 100755 --- a/Code/GraphMol/FragCatalog/FragCatalogUtils.cpp +++ b/Code/GraphMol/FragCatalog/FragCatalogUtils.cpp @@ -139,7 +139,7 @@ namespace RDKit { // create a list of bond IDs from these atom ID // these are the bond in mol that are part of portion that matches the // functional group - bondIds = bondListFromAtomList(mol, maids); + bondIds = Subgraphs::bondListFromAtomList(mol, maids); // now check if all these bonds have been covered as part of larger // functional group that was dealt with earlier @@ -189,7 +189,7 @@ namespace RDKit { INT_MAP_INT aIdxMap; // a map from atom id in mol to the new atoms id in coreMol - ROMol *coreMol = PathToSubmol(mol, cBonds, false, aIdxMap); + ROMol *coreMol = Subgraphs::PathToSubmol(mol, cBonds, false, aIdxMap); // now map the functional groups on mol to coreMol using aIdxMap MatchVectType::iterator mati; diff --git a/Code/GraphMol/Subgraphs/SubgraphUtils.cpp b/Code/GraphMol/Subgraphs/SubgraphUtils.cpp index 4486abb74..5aa7fc362 100755 --- a/Code/GraphMol/Subgraphs/SubgraphUtils.cpp +++ b/Code/GraphMol/Subgraphs/SubgraphUtils.cpp @@ -31,16 +31,16 @@ #endif namespace RDKit { - + namespace Subgraphs { ROMol *PathToSubmol(const ROMol &mol, const PATH_TYPE &path, - bool useQuery) { + bool useQuery) { INT_MAP_INT aIdxMap; return PathToSubmol(mol, path, useQuery, aIdxMap); } ROMol *PathToSubmol(const ROMol &mol, const PATH_TYPE &path, - bool useQuery, - INT_MAP_INT &atomIdxMap) { + bool useQuery, + INT_MAP_INT &atomIdxMap) { RWMol *subMol=new RWMol(); PATH_TYPE::const_iterator pathIter; //std::map atomIdxMap; @@ -58,15 +58,15 @@ ROMol *PathToSubmol(const ROMol &mol, const PATH_TYPE &path, endIdx=bond->getEndAtomIdx(); if(atomIdxMap.find(begIdx)==atomIdxMap.end()){ - atom = new QueryAtom(*(mol.getAtomWithIdx(begIdx))); - newAtomIdx=subMol->addAtom(atom,false,true); - atomIdxMap[begIdx] = newAtomIdx; + atom = new QueryAtom(*(mol.getAtomWithIdx(begIdx))); + newAtomIdx=subMol->addAtom(atom,false,true); + atomIdxMap[begIdx] = newAtomIdx; } begIdx = atomIdxMap.find(begIdx)->second; if(atomIdxMap.find(endIdx)==atomIdxMap.end()){ - atom = new QueryAtom(*(mol.getAtomWithIdx(endIdx))); - newAtomIdx=subMol->addAtom(atom,false,true); - atomIdxMap[endIdx] = newAtomIdx; + atom = new QueryAtom(*(mol.getAtomWithIdx(endIdx))); + newAtomIdx=subMol->addAtom(atom,false,true); + atomIdxMap[endIdx] = newAtomIdx; } endIdx = atomIdxMap.find(endIdx)->second; @@ -87,15 +87,15 @@ ROMol *PathToSubmol(const ROMol &mol, const PATH_TYPE &path, endIdx=bond->getEndAtomIdx(); if(atomIdxMap.find(begIdx)==atomIdxMap.end()){ - atom = mol.getAtomWithIdx(begIdx)->copy(); - newAtomIdx=subMol->addAtom(atom,false,true); - atomIdxMap[begIdx] = newAtomIdx; + atom = mol.getAtomWithIdx(begIdx)->copy(); + newAtomIdx=subMol->addAtom(atom,false,true); + atomIdxMap[begIdx] = newAtomIdx; } begIdx = atomIdxMap.find(begIdx)->second; if(atomIdxMap.find(endIdx)==atomIdxMap.end()){ - atom = mol.getAtomWithIdx(endIdx)->copy(); - newAtomIdx=subMol->addAtom(atom,false,true); - atomIdxMap[endIdx] = newAtomIdx; + atom = mol.getAtomWithIdx(endIdx)->copy(); + newAtomIdx=subMol->addAtom(atom,false,true); + atomIdxMap[endIdx] = newAtomIdx; } endIdx = atomIdxMap.find(endIdx)->second; @@ -119,11 +119,11 @@ ROMol *PathToSubmol(const ROMol &mol, const PATH_TYPE &path, int i, j, bid; for (i = 0; i < natms; i++) { for (j = i+1; j < natms; j++) { - bnd = mol.getBondBetweenAtoms(atomIds[i], atomIds[j]); - if (bnd) { - bid = bnd->getIdx(); - bids.push_back(bid); - } + bnd = mol.getBondBetweenAtoms(atomIds[i], atomIds[j]); + if (bnd) { + bid = bnd->getIdx(); + bids.push_back(bid); + } } } return bids; @@ -176,7 +176,7 @@ CalcPathDiscriminators(const ROMol &mol, const PATH_TYPE &path, bool useBO) { // atoms) // PATH_LIST uniquifyPaths (const ROMol &mol, const PATH_LIST &allPaths, - bool useBO,double tol) { + bool useBO,double tol) { PATH_LIST res; PATH_LIST::const_iterator path; std::vector discrimsSeen; @@ -187,21 +187,21 @@ CalcPathDiscriminators(const ROMol &mol, const PATH_TYPE &path, bool useBO) { found=0; for(discrimIt=discrimsSeen.begin(); - discrimIt!=discrimsSeen.end(); - discrimIt++){ - if( feq(boost::tuples::get<0>(discrims),boost::tuples::get<0>(*discrimIt),tol) && - feq(boost::tuples::get<1>(discrims),boost::tuples::get<1>(*discrimIt),tol) && - feq(boost::tuples::get<2>(discrims),boost::tuples::get<2>(*discrimIt),tol)){ - found=1; - break; - } + discrimIt!=discrimsSeen.end(); + discrimIt++){ + if( feq(boost::tuples::get<0>(discrims),boost::tuples::get<0>(*discrimIt),tol) && + feq(boost::tuples::get<1>(discrims),boost::tuples::get<1>(*discrimIt),tol) && + feq(boost::tuples::get<2>(discrims),boost::tuples::get<2>(*discrimIt),tol)){ + found=1; + break; + } } if(!found){ - discrimsSeen.push_back(discrims); - res.push_back(*path); + discrimsSeen.push_back(discrims); + res.push_back(*path); } } return res; } - -} // end of namespace + } // end of namespace Subgraphs +} // end of namespace RDKit diff --git a/Code/GraphMol/Subgraphs/SubgraphUtils.h b/Code/GraphMol/Subgraphs/SubgraphUtils.h index 1cf1f7876..f54eb39b8 100755 --- a/Code/GraphMol/Subgraphs/SubgraphUtils.h +++ b/Code/GraphMol/Subgraphs/SubgraphUtils.h @@ -1,5 +1,5 @@ // -// Copyright (C) 2003-2006 Rational Discovery LLC +// Copyright (C) 2003-2007 Rational Discovery LLC // // @@ All Rights Reserved @@ // @@ -11,33 +11,29 @@ namespace RDKit{ class ROMol; - - //typedef DiscrimTuple PathDiscrimTuple; - typedef boost::tuples::tuple PathDiscrimTuple; - PathDiscrimTuple CalcPathDiscriminators(const ROMol &mol,const PATH_TYPE &path, - bool useBO=true); - PATH_LIST uniquifyPaths (const ROMol &mol, const PATH_LIST &allPathsb, - bool useBO=true,double tol=1e-8); + namespace Subgraphs { + typedef boost::tuples::tuple PathDiscrimTuple; + PathDiscrimTuple CalcPathDiscriminators(const ROMol &mol,const PATH_TYPE &path, + bool useBO=true); + PATH_LIST uniquifyPaths (const ROMol &mol, const PATH_LIST &allPathsb, + bool useBO=true,double tol=1e-8); - // Return the list of bond that connect a list of atoms - // ASSUMPTION: the atoms specified in the list are connected - PATH_TYPE bondListFromAtomList(const ROMol &mol, const PATH_TYPE atomIds); + // Return the list of bond that connect a list of atoms + // ASSUMPTION: the atoms specified in the list are connected + PATH_TYPE bondListFromAtomList(const ROMol &mol, const PATH_TYPE atomIds); - // create a new molecule object from a part of molecule "mol". The part of - // of the molecule is specified as a list of bonds in "path". - // the optional argument "useQuery" will set all the bond and atoms in the - // the new molecule to "QueryAtoms" and "QueryBonds" instead of regular Atoms and Bonds - // atomIdxMap provides a mapping between the atomsIds in mol to the atomIds in - // the newly created sub-molecule (the molecule that is returned) - ROMol *PathToSubmol(const ROMol &mol, const PATH_TYPE &path, - bool useQuery, - std::map &atomIdxMap); - - // same as PathToSubmol above except for a bunch of optional arguments - ROMol *PathToSubmol(const ROMol &mol, const PATH_TYPE &path, - bool useQuery=false); - - + // create a new molecule object from a part of molecule "mol". The part of + // of the molecule is specified as a list of bonds in "path". + // the optional argument "useQuery" will set all the bond and atoms in the + // the new molecule to "QueryAtoms" and "QueryBonds" instead of regular Atoms and Bonds + // atomIdxMap provides a mapping between the atomsIds in mol to the atomIds in + // the newly created sub-molecule (the molecule that is returned) + ROMol *PathToSubmol(const ROMol &mol, const PATH_TYPE &path, + bool useQuery, + std::map &atomIdxMap); + ROMol *PathToSubmol(const ROMol &mol, const PATH_TYPE &path, + bool useQuery=false); + } // end of namespace Subgraphs } diff --git a/Code/GraphMol/Subgraphs/Subgraphs.cpp b/Code/GraphMol/Subgraphs/Subgraphs.cpp index 437aa1445..b5e29cf47 100755 --- a/Code/GraphMol/Subgraphs/Subgraphs.cpp +++ b/Code/GraphMol/Subgraphs/Subgraphs.cpp @@ -14,8 +14,8 @@ #include #include +namespace RDKit { namespace Subgraphs { - using namespace RDKit; INT_INT_VECT_MAP getNbrsList(const ROMol &mol, bool useHs) { int nAtoms = mol.getNumAtoms(); @@ -32,53 +32,49 @@ namespace Subgraphs { // if are at a hydrogen and we are not interested in bonds connecting to them // move on if( atom->getAtomicNum()!=1 || useHs ){ - ROMol::OEDGE_ITER bIt1,end; - ROMol::GRAPH_MOL_BOND_PMAP::const_type pMap = mol.getBondPMap(); - boost::tie(bIt1,end) = mol.getAtomBonds(atom); - while(bIt1!=end){ - Bond *bond1 = pMap[*bIt1]; - // if this bond connect to a hydrogen and we are not interested - // in it ignore - if( bond1->getOtherAtom(atom)->getAtomicNum() != 1 || useHs ){ - int bid1 = bond1->getIdx(); - if (nbrs.find(bid1) == nbrs.end()) { - INT_VECT nlst; - nbrs[bid1] = nlst; - } - ROMol::OEDGE_ITER bIt2; - bIt2 = mol.getAtomBonds(atom).first; - while(bIt2 != end){ - Bond *bond2 = pMap[*bIt2]; - int bid2 = bond2->getIdx(); - if (bid1 != bid2 && - (bond2->getOtherAtom(atom)->getAtomicNum() != 1 || useHs ) ){ - //if (nbrs.find(bid1) == nbrs.end()) { - //INT_VECT nlst; - //nbrs[bid1] = nlst; - //} - nbrs[bid1].push_back(bid2); //FIX: pathListType should probably be container of pointers ?? - } - bIt2++; - } - } - bIt1++; - } + ROMol::OEDGE_ITER bIt1,end; + ROMol::GRAPH_MOL_BOND_PMAP::const_type pMap = mol.getBondPMap(); + boost::tie(bIt1,end) = mol.getAtomBonds(atom); + while(bIt1!=end){ + Bond *bond1 = pMap[*bIt1]; + // if this bond connect to a hydrogen and we are not interested + // in it ignore + if( bond1->getOtherAtom(atom)->getAtomicNum() != 1 || useHs ){ + int bid1 = bond1->getIdx(); + if (nbrs.find(bid1) == nbrs.end()) { + INT_VECT nlst; + nbrs[bid1] = nlst; + } + ROMol::OEDGE_ITER bIt2; + bIt2 = mol.getAtomBonds(atom).first; + while(bIt2 != end){ + Bond *bond2 = pMap[*bIt2]; + int bid2 = bond2->getIdx(); + if (bid1 != bid2 && + (bond2->getOtherAtom(atom)->getAtomicNum() != 1 || useHs ) ){ + nbrs[bid1].push_back(bid2); //FIX: pathListType should probably be container of pointers ?? + } + ++bIt2; + } + } + ++bIt1; + } } } return nbrs; } - // Everything passed here by reference + // Everything passed here by reference void recurseWalk(INT_INT_VECT_MAP &nbrs, // neighbors for each bond - PATH_TYPE &spath, // the current path to be build upon - INT_VECT &cands, // neighbors of current path - unsigned int targetLen, // the maximum subgraph len we are interested in - INT_VECT forbidden, // bonds that have been covered already - // we don't want reference passing for forbidden, - // it gets altered through the processand we want - // fresh start everytime we buble back up to "FindAllSubGraphs" - PATH_LIST &res // the final list of subgraphs - ) + PATH_TYPE &spath, // the current path to be build upon + INT_VECT &cands, // neighbors of current path + unsigned int targetLen, // the maximum subgraph len we are interested in + INT_VECT forbidden, // bonds that have been covered already + // we don't want reference passing for forbidden, + // it gets altered through the processand we want + // fresh start everytime we buble back up to "FindAllSubGraphs" + PATH_LIST &res // the final list of subgraphs + ) { // end case for recursion if (spath.size() == targetLen) { @@ -98,44 +94,44 @@ namespace Subgraphs { cands.pop_back(); //cands.erase(remove(cands.begin(), cands.end(), next), cands.end()); if (std::find(forbidden.begin(), forbidden.end(), next) == forbidden.end()) { - // this bond should not appear in the later subgraphs - forbidden.push_back(next); + // this bond should not appear in the later subgraphs + forbidden.push_back(next); - // update a local stack before the next recursive call - INT_VECT tstack = cands; - for (INT_VECT::iterator bid=nbrs[next].begin(); bid != nbrs[next].end(); bid++) { - if (std::find(forbidden.begin(), forbidden.end(), *bid) == forbidden.end()) { - tstack.push_back(*bid); - } - } + // update a local stack before the next recursive call + INT_VECT tstack = cands; + for (INT_VECT::iterator bid=nbrs[next].begin(); bid != nbrs[next].end(); bid++) { + if (std::find(forbidden.begin(), forbidden.end(), *bid) == forbidden.end()) { + tstack.push_back(*bid); + } + } - PATH_TYPE tpath = spath; - tpath.push_back(next); + PATH_TYPE tpath = spath; + tpath.push_back(next); - recurseWalk(nbrs,tpath, tstack, targetLen, forbidden, res); + recurseWalk(nbrs,tpath, tstack, targetLen, forbidden, res); } } } - // Everything passed here by reference + // Everything passed here by reference void recurseWalkRange(INT_INT_VECT_MAP &nbrs, // neighbors for each bond - PATH_TYPE &spath, // the current path to be build upon - INT_VECT &cands, // neighbors of current path - unsigned int lowerLen, // lower limit of the subgraph lengths we are interested in - unsigned int upperLen, // the maximum subgraph len we are interested in - INT_VECT forbidden, // bonds that have been covered already - // we don't want reference passing for forbidden, - // it gets altered through the processand we want - // fresh start everytime we buble back up to "FindAllSubGraphs" - INT_PATH_LIST_MAP &res // the final list of subgraphs - ) + PATH_TYPE &spath, // the current path to be build upon + INT_VECT &cands, // neighbors of current path + unsigned int lowerLen, // lower limit of the subgraph lengths we are interested in + unsigned int upperLen, // the maximum subgraph len we are interested in + INT_VECT forbidden, // bonds that have been covered already + // we don't want reference passing for forbidden, + // it gets altered through the processand we want + // fresh start everytime we buble back up to "FindAllSubGraphs" + INT_PATH_LIST_MAP &res // the final list of subgraphs + ) { unsigned int nsize = spath.size(); if ((nsize >= lowerLen) && (nsize <= upperLen)) { if (res.find(nsize) == res.end()) { - PATH_LIST ordern; - res[nsize] = ordern; + PATH_LIST ordern; + res[nsize] = ordern; } res[nsize].push_back(spath); } @@ -157,21 +153,21 @@ namespace Subgraphs { cands.pop_back(); //cands.erase(remove(cands.begin(), cands.end(), next), cands.end()); if (std::find(forbidden.begin(), forbidden.end(), next) == forbidden.end()) { - // this bond should not appear in the later subgraphs - forbidden.push_back(next); + // this bond should not appear in the later subgraphs + forbidden.push_back(next); - // update a local stack before the next recursive call - INT_VECT tstack = cands; - for (INT_VECT::iterator bid=nbrs[next].begin(); bid != nbrs[next].end(); bid++) { - if (std::find(forbidden.begin(), forbidden.end(), *bid) == forbidden.end()) { - tstack.push_back(*bid); - } - } + // update a local stack before the next recursive call + INT_VECT tstack = cands; + for (INT_VECT::iterator bid=nbrs[next].begin(); bid != nbrs[next].end(); bid++) { + if (std::find(forbidden.begin(), forbidden.end(), *bid) == forbidden.end()) { + tstack.push_back(*bid); + } + } - PATH_TYPE tpath = spath; - tpath.push_back(next); + PATH_TYPE tpath = spath; + tpath.push_back(next); - recurseWalkRange(nbrs,tpath, tstack, lowerLen, upperLen, forbidden, res); + recurseWalkRange(nbrs,tpath, tstack, lowerLen, upperLen, forbidden, res); } } } @@ -181,7 +177,7 @@ namespace Subgraphs { INT_VECT::iterator j; for(i=v.begin();i!=v.end();i++){ for(j=i->begin();j!=i->end();j++){ - std::cout << *j << " "; + std::cout << *j << " "; } std::cout << std::endl; } @@ -201,34 +197,34 @@ namespace Subgraphs { unsigned int endIdx = (*path)[path->size()-1]; unsigned int iTab = endIdx*dim; for(unsigned int otherIdx = 0; otherIdx < dim; otherIdx++){ - if( adjMat[iTab+otherIdx] == 1){ - // test 1: make sure the new atom is not already - // in the path - PATH_TYPE::const_iterator loc; - loc = std::find(path->begin(),path->end(),otherIdx); - // The two conditions for adding the atom are: - // 1) it's not there already - // 2) it's there, but ring closures are allowed and this - // will be the last addition to the path. - if ( loc == path->end() ){ - // the easy case - PATH_TYPE newPath=*path; - newPath.push_back(otherIdx); - res.push_back(newPath); - } else if (allowRingClosures>2 && - path->size()==allowRingClosures-1) { - // We *might* be adding the atom, but we need to make sure - // that we're not just duplicating the second to last - // element of the path: - PATH_TYPE::const_reverse_iterator rIt=path->rbegin(); - rIt++; - if( *rIt != otherIdx ){ - INT_VECT newPath=*path; - newPath.push_back(otherIdx); - res.push_back(newPath); - } - } - } + if( adjMat[iTab+otherIdx] == 1){ + // test 1: make sure the new atom is not already + // in the path + PATH_TYPE::const_iterator loc; + loc = std::find(path->begin(),path->end(),otherIdx); + // The two conditions for adding the atom are: + // 1) it's not there already + // 2) it's there, but ring closures are allowed and this + // will be the last addition to the path. + if ( loc == path->end() ){ + // the easy case + PATH_TYPE newPath=*path; + newPath.push_back(otherIdx); + res.push_back(newPath); + } else if (allowRingClosures>2 && + path->size()==allowRingClosures-1) { + // We *might* be adding the atom, but we need to make sure + // that we're not just duplicating the second to last + // element of the path: + PATH_TYPE::const_reverse_iterator rIt=path->rbegin(); + rIt++; + if( *rIt != otherIdx ){ + INT_VECT newPath=*path; + newPath.push_back(otherIdx); + res.push_back(newPath); + } + } + } } } return res; @@ -255,8 +251,6 @@ namespace Subgraphs { for(unsigned int length=startLength;length>>>>>> len: " << length+1 << std::endl;; - //dumpVIV(paths); } PATH_LIST newPaths; @@ -264,25 +258,21 @@ namespace Subgraphs { // have any that are reverse duplicates: for(PATH_LIST::iterator path=paths.begin(); path != paths.end(); path++ ){ if( std::find(newPaths.begin(),newPaths.end(),*path) == newPaths.end() ){ - PATH_TYPE revPath(*path); - std::reverse(revPath.begin(),revPath.end()); - if( std::find(newPaths.begin(),newPaths.end(),revPath) == newPaths.end() ){ - // this one is a keeper; - newPaths.push_back(*path); - } + PATH_TYPE revPath(*path); + std::reverse(revPath.begin(),revPath.end()); + if( std::find(newPaths.begin(),newPaths.end(),revPath) == newPaths.end() ){ + // this one is a keeper; + newPaths.push_back(*path); + } } } return newPaths; } - - } // end of Subgraphs namespace -namespace RDKit{ PATH_LIST findAllSubgraphsOfLengthN (const ROMol &mol, unsigned int targetLen, - bool useHs, bool verbose) - { + bool useHs){ /********************************************* FIX: Lots of issues here: - pathListType is defined as a container of "pathType", should it be a container @@ -310,7 +300,7 @@ namespace RDKit{ // don't come back to this bond in the later subgraphs int i = (*nbi).first; if (! (std::find(forbidden.begin(), forbidden.end(), i) == forbidden.end())) { - continue; + continue; } forbidden.push_back(i); @@ -328,13 +318,12 @@ namespace RDKit{ Subgraphs::recurseWalk(nbrs, spath, cands, targetLen, forbidden, res); } nbrs.clear(); - return res; //FIX : need some verbose testing code here + return res; } INT_PATH_LIST_MAP findAllSubgraphsOfLengthsMtoN(const ROMol &mol, unsigned int lowerLen, - unsigned int upperLen, bool useHs, - bool verbose) { + unsigned int upperLen, bool useHs){ CHECK_INVARIANT(lowerLen <= upperLen, ""); INT_VECT forbidden; @@ -356,7 +345,7 @@ namespace RDKit{ // don't come back to this bond in the later subgraphs int i = (*nbi).first; if (! (std::find(forbidden.begin(), forbidden.end(), i) == forbidden.end())) { - continue; + continue; } forbidden.push_back(i); @@ -378,7 +367,7 @@ namespace RDKit{ } PATH_LIST findUniqueSubgraphsOfLengthN (const ROMol &mol, unsigned int targetLen, - bool useHs,bool useBO) + bool useHs,bool useBO) { // start by finding all subgraphs, then uniquify PATH_LIST allSubgraphs=findAllSubgraphsOfLengthN(mol,targetLen,useHs); @@ -417,7 +406,7 @@ namespace RDKit{ // PATH_LIST findAllPathsOfLengthN(const ROMol &mol,unsigned int targetLen,bool useBonds, - bool useHs) { + bool useHs) { // // We can't be clever here and just use the bond adjacency matrix // to solve this problem when useBonds is true. This is because @@ -442,8 +431,8 @@ namespace RDKit{ Atom *end=(*bondIt)->getEndAtom(); // check for H, which we might be skipping if(useHs || (beg->getAtomicNum()!=1 && end->getAtomicNum()!=1)){ - adjMat[beg->getIdx()*dim+end->getIdx()] = 1; - adjMat[end->getIdx()*dim+beg->getIdx()] = 1; + adjMat[beg->getIdx()*dim+end->getIdx()] = 1; + adjMat[end->getIdx()*dim+beg->getIdx()] = 1; } } @@ -470,14 +459,14 @@ namespace RDKit{ if(useBonds || targetLen>1){ //bondPaths.reserve(atomPaths.size()); for(vivI=atomPaths.begin();vivI!=atomPaths.end();vivI++){ - const PATH_TYPE &resi=*vivI; - PATH_TYPE locV; - locV.reserve(targetLen); - for(unsigned int j=0;jgetIdx()); - } - bondPaths.push_back(locV); + const PATH_TYPE &resi=*vivI; + PATH_TYPE locV; + locV.reserve(targetLen); + for(unsigned int j=0;jgetIdx()); + } + bondPaths.push_back(locV); } } @@ -499,22 +488,22 @@ namespace RDKit{ if(useBonds || targetLen>1){ PATH_LIST::const_iterator bondPath,atomPath; for(bondPath=bondPaths.begin(),atomPath=atomPaths.begin(); - bondPath!=bondPaths.end(); - bondPath++,atomPath++){ - double invar=1.0; - for(ivI=bondPath->begin();ivI!=bondPath->end();ivI++){ - invar *= firstThousandPrimes[*ivI]; - } - if(std::find(invars.begin(),invars.end(),invar)==invars.end()){ - invars.push_back(invar); - // this is a new one: - if(useBonds){ - res.push_back(*bondPath); - } - else{ - res.push_back(*atomPath); - } - } + bondPath!=bondPaths.end(); + bondPath++,atomPath++){ + double invar=1.0; + for(ivI=bondPath->begin();ivI!=bondPath->end();ivI++){ + invar *= firstThousandPrimes[*ivI]; + } + if(std::find(invars.begin(),invars.end(),invar)==invars.end()){ + invars.push_back(invar); + // this is a new one: + if(useBonds){ + res.push_back(*bondPath); + } + else{ + res.push_back(*atomPath); + } + } } } else { res = atomPaths; @@ -522,4 +511,4 @@ namespace RDKit{ return res; } -} // end of namespace +} // end of RDKit namespace diff --git a/Code/GraphMol/Subgraphs/Subgraphs.h b/Code/GraphMol/Subgraphs/Subgraphs.h index 2f9cbe2ed..278d1b64f 100755 --- a/Code/GraphMol/Subgraphs/Subgraphs.h +++ b/Code/GraphMol/Subgraphs/Subgraphs.h @@ -3,6 +3,23 @@ // // @@ All Rights Reserved @@ // + +/*! \file Subgraphs.h + + \brief functionality for finding subgraphs and paths in molecules + + Difference between _subgraphs_ and _paths_ : + Subgraphs are potentially branched, whereas paths (in our + terminology at least) cannot be. So, the following graph: + + C--0--C--1--C--3--C + | + 2 + | + C + has 3 _subgraphs_ of length 3: (0,1,2),(0,1,3),(2,1,3) + but only 2 _paths_ of length 3: (0,1,3),(2,1,3) +*/ #ifndef _RD_SUBGRAPHS_H_ #define _RD_SUBGRAPHS_H_ @@ -24,16 +41,58 @@ namespace RDKit{ typedef INT_PATH_LIST_MAP::const_iterator INT_PATH_LIST_MAP_CI; typedef INT_PATH_LIST_MAP::iterator INT_PATH_LIST_MAP_I; - INT_PATH_LIST_MAP findAllSubgraphsOfLengthsMtoN(const ROMol &mol, unsigned int lowerLen, - unsigned int upperLen, bool useHs=false, - bool verbose=false); + // --- --- --- --- --- --- --- --- --- --- --- --- --- + // + // + // --- --- --- --- --- --- --- --- --- --- --- --- --- - PATH_LIST findAllSubgraphsOfLengthN(const ROMol &mol,unsigned int, - bool useHs=false,bool verbose=false); - PATH_LIST findUniqueSubgraphsOfLengthN(const ROMol&,unsigned int, - bool useHs=false,bool useBO=true); - PATH_LIST findAllPathsOfLengthN(const ROMol&,unsigned int, - bool useBonds=true,bool useHs=false); + //! \brief find all subgraphs in a range of sizes + /*! + * \param mol - the molecule to be considered + * \param lowerLen - the minimum subgraph size to find + * \param upperLen - the maximum subgraph size to find + * \param useHs - if set, hydrogens in the graph will be considered + * eligible to be in paths. NOTE: this will not add + * Hs to the graph. + */ + INT_PATH_LIST_MAP findAllSubgraphsOfLengthsMtoN(const ROMol &mol, unsigned int lowerLen, + unsigned int upperLen, bool useHs=false); + + //! \brief find all subgraphs of a particular size + /*! + * \param mol - the molecule to be considered + * \param targetLen - the length of the subgraphs to be returned + * \param useHs - if set, hydrogens in the graph will be considered + * eligible to be in paths. NOTE: this will not add + * Hs to the graph. + */ + PATH_LIST findAllSubgraphsOfLengthN(const ROMol &mol,unsigned int targetLen, + bool useHs=false); + + //! \brief find unique subgraphs of a particular size + /*! + * \param mol - the molecule to be considered + * \param targetLen - the length of the subgraphs to be returned + * \param useHs - if set, hydrogens in the graph will be considered + * eligible to be in paths. NOTE: this will not add + * Hs to the graph. + * \param useBO - if set, bond orders will be considered when uniquifying + * the paths + */ + PATH_LIST findUniqueSubgraphsOfLengthN(const ROMol &mol,unsigned int targetLen, + bool useHs=false,bool useBO=true); + //! \brief find all paths of a particular size + /*! + * \param mol - the molecule to be considered + * \param targetLen - the length of the paths to be returned + * \param useBonds - if set, the path indices will be bond indices, + * not atom indices + * \param useHs - if set, hydrogens in the graph will be considered + * eligible to be in paths. NOTE: this will not add + * Hs to the graph. + */ + PATH_LIST findAllPathsOfLengthN(const ROMol &mol,unsigned int targetLen, + bool useBonds=true,bool useHs=false); } diff --git a/Code/GraphMol/Wrap/MolOps.cpp b/Code/GraphMol/Wrap/MolOps.cpp index 038ab7e35..c265c17d7 100755 --- a/Code/GraphMol/Wrap/MolOps.cpp +++ b/Code/GraphMol/Wrap/MolOps.cpp @@ -417,7 +417,7 @@ namespace RDKit{ \n"; python::def("FindAllSubgraphsOfLengthN", &findAllSubgraphsOfLengthN, (python::arg("mol"),python::arg("length"), - python::arg("useHs")=false,python::arg("verbose")=false), + python::arg("useHs")=false), docString.c_str()); // ------------------------------------------------------------------------ docString="Finds unique subgraphs of a particular length in a molecule\n\ @@ -457,9 +457,6 @@ namespace RDKit{ Otherwise atom indices are used. *Note* this behavior is different\n\ from that for subgraphs.\n\ Defaults to 1.\n\ -\n\ - - useHs: (optional) toggles verbosity in the search algorithm.\n\ - Defaults to 0.\n\ \n\ RETURNS: a tuple of tuples with IDs for the bonds.\n\ \n\ @@ -592,7 +589,7 @@ namespace RDKit{ (python::arg("mol"),python::arg("minPath")=1, python::arg("maxPath")=7,python::arg("fpSize")=2048, python::arg("nBitsPerHash")=4,python::arg("useHs")=true, - python::arg("tgtDensity")=0.0,python::arg("minSize")=128), + python::arg("tgtDensity")=0.0,python::arg("minSize")=128), docString.c_str(),python::return_value_policy()); diff --git a/Code/Query/Query.h b/Code/Query/Query.h index 7ab0b3e77..a0d4bd33e 100755 --- a/Code/Query/Query.h +++ b/Code/Query/Query.h @@ -94,13 +94,11 @@ namespace Queries { copy( ) const { Query *res = new Query(); - typename Query::CHILD_VECT_CI i; - - // FIX: I'm not sure this is right - for(i=this->beginChildren(); - i!=this->endChildren(); - ++i){ - res->addChild(*i); + typename Query::CHILD_VECT_CI iter; + for(iter=this->beginChildren(); + iter!=this->endChildren(); + ++iter){ + res->addChild(*iter); } res->df_negate = this->df_negate; res->d_matchFunc = this->d_matchFunc; @@ -109,7 +107,6 @@ namespace Queries { return res; }; - protected : std::string d_description; CHILD_VECT d_children; diff --git a/Code/SimDivPickers/DistPicker.h b/Code/SimDivPickers/DistPicker.h index 3185f6668..3180cab11 100755 --- a/Code/SimDivPickers/DistPicker.h +++ b/Code/SimDivPickers/DistPicker.h @@ -53,9 +53,11 @@ namespace RDPickers { * distance matrix above contains the right number of elements; i.e. * poolSize*(poolSize-1) * \param pickSize - the number items to pick from pool (<= poolSize) + * + * \return a vector with indices of the picked items. */ virtual RDKit::INT_VECT pick(const double *distMat, unsigned int poolSize, - unsigned int pickSize) = 0; + unsigned int pickSize) const = 0; }; }; diff --git a/Code/SimDivPickers/HierarchicalClusterPicker.cpp b/Code/SimDivPickers/HierarchicalClusterPicker.cpp index cad1305f5..539f867ca 100755 --- a/Code/SimDivPickers/HierarchicalClusterPicker.cpp +++ b/Code/SimDivPickers/HierarchicalClusterPicker.cpp @@ -18,20 +18,18 @@ extern "C" void distdriver_(int *n,int *len, namespace RDPickers { RDKit::VECT_INT_VECT HierarchicalClusterPicker::cluster(const double *distMat, - unsigned int poolSize, - unsigned int pickSize) { + unsigned int poolSize, + unsigned int pickSize) const { PRECONDITION(distMat, "Invalid Distance Matrix"); PRECONDITION((poolSize >= pickSize), - "pickSize cannot be larger than the poolSize"); + "pickSize cannot be larger than the poolSize"); // Do the clustering - int *ia, *ib; - real *crit; int method = (int)d_method; int len = poolSize*(poolSize-1); - ia = (int *)calloc(poolSize, sizeof(int)); - ib = (int *)calloc(poolSize, sizeof(int)); - crit = (real *)calloc(poolSize,sizeof(real)); + int *ia = (int *)calloc(poolSize, sizeof(int)); + int *ib = (int *)calloc(poolSize, sizeof(int)); + real *crit = (real *)calloc(poolSize,sizeof(real)); CHECK_INVARIANT(ia,"failed to allocate memory"); CHECK_INVARIANT(ib,"failed to allocate memory"); CHECK_INVARIANT(crit,"failed to allocate memory"); @@ -45,7 +43,7 @@ namespace RDPickers { crit // I believe this is a vector the difference in heights of two clusters ); - // we have the clusters now merge then untill the number of clusters is same + // we have the clusters now merge then until the number of clusters is same // as the number of picks we need // before we do that a bit of explanation on the vectors "ia" and "ib" // - We with each item in the pool as an individual cluster @@ -55,37 +53,32 @@ namespace RDPickers { // ia[j] is replaced by the new cluster in the cluster list // RDKit::VECT_INT_VECT clusters; - unsigned int i; - for (i = 0; i < poolSize; i++) { + for (unsigned int i = 0; i < poolSize; i++) { RDKit::INT_VECT cls; cls.push_back(i); clusters.push_back(cls); } - - int cx1, cx2; - RDKit::INT_VECT removed; // do the merging, each round of of this loop eleminates one cluster - for (i = 0; i < (poolSize - pickSize); i++) { - cx1 = ia[i] - 1; - cx2 = ib[i] - 1; + RDKit::INT_VECT removed; + for (unsigned int i = 0; i < (poolSize - pickSize); i++) { + int cx1 = ia[i] - 1; + int cx2 = ib[i] - 1; // add the items from cluster cx2 to cx1 - RDKit::INT_VECT_CI cx2i; // REVIEW: merge function??? - for (cx2i = clusters[cx2].begin(); cx2i != clusters[cx2].end(); cx2i++) { + for (RDKit::INT_VECT_CI cx2i = clusters[cx2].begin(); cx2i != clusters[cx2].end(); cx2i++) { clusters[cx1].push_back(*cx2i); } // mark the second cluster as removed removed.push_back(cx2); } - free(ia); free(ib); free(crit); - // sort removed so that looping will ve easier later + // sort removed so that looping will be easier later std::sort(removed.begin(), removed.end()); //some error checking here, uniqueify removed and the vector should not changed @@ -95,7 +88,7 @@ namespace RDPickers { RDKit::VECT_INT_VECT res; unsigned int j = 0; - for (i = 0; i < poolSize; i++) { + for (unsigned int i = 0; i < poolSize; i++) { if (i == removed[j]) { j++; continue; @@ -107,26 +100,27 @@ namespace RDPickers { RDKit::INT_VECT HierarchicalClusterPicker::pick(const double *distMat, unsigned int poolSize, - unsigned int pickSize) { + unsigned int pickSize) const { PRECONDITION(distMat,"bad distance matrix"); RDKit::VECT_INT_VECT clusters = this->cluster(distMat, poolSize, pickSize); - CHECK_INVARIANT(clusters.size() == pickSize, ""); - // the last step find the representative element each of the remaining clusters + + // the last step: find a representative element from each of the + // remaining clusters RDKit::INT_VECT picks; - unsigned int i; - for (i = 0; i < pickSize; i++) { - int pick, curPick; + for (unsigned int i = 0; i < pickSize; i++) { + int pick; double minSumD2 = RDKit::MAX_DOUBLE; - RDKit::INT_VECT_CI cxi1, cxi2; - for (cxi1 = clusters[i].begin(); cxi1 != clusters[i].end(); cxi1++) { - curPick = (*cxi1); - double d, d2sum = 0.0; - for (cxi2 = clusters[i].begin(); cxi2 != clusters[i].end(); cxi2++) { + for (RDKit::INT_VECT_CI cxi1 = clusters[i].begin(); + cxi1 != clusters[i].end(); ++cxi1 ) { + int curPick = (*cxi1); + double d2sum = 0.0; + for (RDKit::INT_VECT_CI cxi2 = clusters[i].begin(); + cxi2 != clusters[i].end(); ++cxi2) { if (cxi1 == cxi2) { continue; } - d = getDistFromLTM(distMat, curPick, (*cxi2)); + double d = getDistFromLTM(distMat, curPick, (*cxi2)); d2sum += (d*d); } if (d2sum < minSumD2) { diff --git a/Code/SimDivPickers/HierarchicalClusterPicker.h b/Code/SimDivPickers/HierarchicalClusterPicker.h index ee1ddc35d..bd68d737b 100755 --- a/Code/SimDivPickers/HierarchicalClusterPicker.h +++ b/Code/SimDivPickers/HierarchicalClusterPicker.h @@ -62,7 +62,7 @@ namespace RDPickers { * poolSize*(poolSize-1) \n * \param pickSize - the number items to pick from pool (<= poolSize) */ - RDKit::INT_VECT pick(const double *distMat, unsigned int poolSize, unsigned int pickSize); + RDKit::INT_VECT pick(const double *distMat, unsigned int poolSize, unsigned int pickSize) const ; /*! \brief This is the function that does the clustering of the items - used by the picker * @@ -76,7 +76,7 @@ namespace RDPickers { * poolSize*(poolSize-1) \n * \param pickSize - the number clusters to divide the pool into (<= poolSize) */ - RDKit::VECT_INT_VECT cluster(const double *distMat, unsigned int poolSize, unsigned int pickSize); + RDKit::VECT_INT_VECT cluster(const double *distMat, unsigned int poolSize, unsigned int pickSize) const; private: ClusterMethod d_method; diff --git a/Code/SimDivPickers/MaxMinPicker.cpp b/Code/SimDivPickers/MaxMinPicker.cpp index eb78d760e..03a579046 100755 --- a/Code/SimDivPickers/MaxMinPicker.cpp +++ b/Code/SimDivPickers/MaxMinPicker.cpp @@ -1,6 +1,6 @@ // $Id$ // -// Copyright (C) 2003-2006 Rational Discovery LLC +// Copyright (C) 2003-2007 Rational Discovery LLC // // @@ All Rights Reserved @@ // @@ -8,46 +8,43 @@ #include "MaxMinPicker.h" #include #include +#include #include namespace RDPickers { RDKit::INT_VECT MaxMinPicker::pick(const double *distMat, - unsigned int poolSize, unsigned int pickSize) { + unsigned int poolSize, unsigned int pickSize) const { CHECK_INVARIANT(distMat, "Invalid Distance Matrix"); - CHECK_INVARIANT((poolSize >= pickSize), "pickSize cannot be larger than the poolSize"); - - RDKit::INT_LIST pool; - RDKit::INT_LIST_I plri, pli; - + if(poolSize #include "DistPicker.h" @@ -13,7 +13,7 @@ namespace RDPickers { /*! \brief Implements the MaxMin algorithm for picking a subset of item from a pool * - * This class inherits from the DistPicker and implement a specific picking strategy + * This class inherits from the DistPicker and implements a specific picking strategy * aimed at diversity. See documentation for "pick()" member function for the algorithm details */ class MaxMinPicker : public DistPicker { @@ -25,13 +25,13 @@ namespace RDPickers { /*! \brief Contains the implementation for the MaxMin diversity picker * - * Here is how the picking algorithm works, refer to \n - * Ashton, M. et. al., Quant. Struct.-Act. Relat., 21 (2002), 598-604 \n + * Here is how the picking algorithm works, refer to + * Ashton, M. et. al., Quant. Struct.-Act. Relat., 21 (2002), 598-604 * for more detail: * * A subset of k items is to be selected from a pool containing N molecules. - * Then the MaxMin method is as follows: \n - * 1. Initialise Subset with some appropriately chosen seed + * Then the MaxMin method is as follows: + * 1. Initialise Subset with a randomly chosen seed * compound and set x = 1. * 2. For each of the N-x remaining compounds in Dataset * calculate its dissimilarity with each of the x compounds in @@ -42,16 +42,16 @@ namespace RDPickers { * transfer it to Subset. * 4. Set x = x + 1 and return to Step 2 if x < k. * - * - * * \param distMat - distance matrix - a vector of double. It is assumed that only the * lower triangle element of the matrix are supplied in a 1D array\n * \param poolSize - the size of teh pool to pick the items from. It is assumed that the * distance matrix above contains the right number of elements; i.e. * poolSize*(poolSize-1) \n * \param pickSize - the number items to pick from pool (<= poolSize) - */ - RDKit::INT_VECT pick(const double *distMat, unsigned int poolSize, unsigned int pickSize); + * + * \return a vector with indices of the picked items. + */ + RDKit::INT_VECT pick(const double *distMat, unsigned int poolSize, unsigned int pickSize) const; }; }; diff --git a/Code/SimDivPickers/Wrap/HierarchicalClusterPicker.cpp b/Code/SimDivPickers/Wrap/HierarchicalClusterPicker.cpp index d5641a055..767ef63f3 100755 --- a/Code/SimDivPickers/Wrap/HierarchicalClusterPicker.cpp +++ b/Code/SimDivPickers/Wrap/HierarchicalClusterPicker.cpp @@ -1,7 +1,6 @@ // $Id$ // -// Copyright (C) 2003-2006 Rational Discovery LLC -// +// Copyright (C) 2003-2007 Greg Landrum and Rational Discovery LLC // @@ All Rights Reserved @@ // #define NO_IMPORT_ARRAY @@ -66,16 +65,16 @@ namespace RDPickers { python::init (python::args("clusterMethod"))) .def("Pick", HierarchicalPicks, - "Pick a diverse subset of items from a pool of items using a Hierarchical Clustering\n" + "Pick a diverse subset of items from a pool of items using hierarchical clustering\n" "\n" - "ARGUMENTS: \n\n" + "ARGUMENTS: \n" " - distMat: 1D distance matrix (only the lower triangle elements)\n" " - poolSize: number of items in the pool\n" " - pickSize: number of items to pick from the pool\n") .def("Cluster", HierarchicalClusters, "Return a list of clusters of item from the pool using hierachical clustering\n" "\n" - "ARGUMENTS: \n\n" + "ARGUMENTS: \n" " - distMat: 1D distance matrix (only the lower triangle elements)\n" " - poolSize: number of items in the pool\n" " - pickSize: number of items to pick from the pool\n") diff --git a/Code/SimDivPickers/Wrap/MaxMinPicker.cpp b/Code/SimDivPickers/Wrap/MaxMinPicker.cpp index 0e6984ba4..80eed668a 100755 --- a/Code/SimDivPickers/Wrap/MaxMinPicker.cpp +++ b/Code/SimDivPickers/Wrap/MaxMinPicker.cpp @@ -1,7 +1,6 @@ // $Id$ // -// Copyright (C) 2003-2006 Rational Discovery LLC -// +// Copyright (C) 2003-2007 Greg Landrum and Rational Discovery LLC // @@ All Rights Reserved @@ // #define NO_IMPORT_ARRAY @@ -11,12 +10,10 @@ #include #include "Numeric/arrayobject.h" - #include #include #include - namespace python = boost::python; namespace RDPickers { @@ -44,8 +41,8 @@ namespace RDPickers { "A class for diversity picking of items using the MaxMin Algorithm\n") .def("Pick", MaxMinPicks, "Pick a subset of items from a pool of items using the MaxMin Algorithm\n" - "Ashton, M. et. al., Quant. Struct.-Act. Relat., 21 (2002), 598-604 \n" - "ARGUMENTS:\n\n" + "Ashton, M. et. al., Quant. Struct.-Act. Relat., 21 (2002), 598-604 \n\n" + "ARGUMENTS:\n" " - distMat: 1D distance matrix (only the lower triangle elements)\n" " - poolSize: number of items in the pool\n" " - pickSize: number of items to pick from the pool\n") diff --git a/Code/SimDivPickers/Wrap/rdSimDivPickers.cpp b/Code/SimDivPickers/Wrap/rdSimDivPickers.cpp index d4375f36c..f742127e6 100755 --- a/Code/SimDivPickers/Wrap/rdSimDivPickers.cpp +++ b/Code/SimDivPickers/Wrap/rdSimDivPickers.cpp @@ -7,12 +7,9 @@ #define PY_ARRAY_UNIQUE_SYMBOL rdpicker_array_API #include #include "Numeric/arrayobject.h" -#include -#include -#include -#include + + namespace python = boost::python; -using namespace RDPickers; void wrap_maxminpick(); void wrap_HierarchCP(); @@ -27,7 +24,6 @@ BOOST_PYTHON_MODULE(rdSimDivPickers) python::register_exception_translator(&translate_index_error); python::register_exception_translator(&translate_value_error); - wrap_maxminpick(); wrap_HierarchCP(); }