at this point I think all descriptors and fingerprints work

This commit is contained in:
Greg Landrum
2012-02-25 07:14:29 +00:00
parent a9e2447b28
commit 6dcbdb246b
7 changed files with 65 additions and 33 deletions

View File

@@ -1,6 +1,7 @@
find_package (Threads)
rdkit_library(Fingerprints
Fingerprints.cpp MorganFingerprints.cpp AtomPairs.cpp
LINK_LIBRARIES Subgraphs SubstructMatch SmilesParse GraphMol )
LINK_LIBRARIES Subgraphs SubstructMatch SmilesParse GraphMol ${CMAKE_THREAD_LIBS_INIT} )
rdkit_headers(AtomPairs.h
Fingerprints.h

View File

@@ -46,6 +46,28 @@
#include <boost/foreach.hpp>
#include <algorithm>
#include <boost/flyweight.hpp>
#include <boost/flyweight/key_value.hpp>
#include <boost/flyweight/no_tracking.hpp>
namespace {
class ss_matcher {
public:
ss_matcher() {};
ss_matcher(const std::string &pattern){
RDKit::RWMol *p=RDKit::SmartsToMol(pattern);
TEST_ASSERT(p);
m_matcher.reset(p);
};
//const RDKit::ROMOL_SPTR &getMatcher() const { return m_matcher; };
const RDKit::ROMol *getMatcher() const { return m_matcher.get(); };
private:
RDKit::ROMOL_SPTR m_matcher;
};
}
namespace RDKit{
namespace MorganFingerprints {
using boost::uint32_t;
@@ -73,30 +95,29 @@ $([N;H0&+0]([C;!$(C(=O))])([C;!$(C(=O))])[C;!$(C(=O))])]", // Basic
"[$([C,S](=[O,S,P])-[O;H1,-1])]" //Acidic
};
std::vector<std::string> defaultFeatureSmarts(smartsPatterns,smartsPatterns+6);
std::vector<ROMOL_SPTR> defaultFeatureMatchers;
typedef boost::flyweight<boost::flyweights::key_value<std::string,ss_matcher>,boost::flyweights::no_tracking > pattern_flyweight;
void getFeatureInvariants(const ROMol &mol,
std::vector<uint32_t> &invars,
std::vector<ROMOL_SPTR> *patterns){
std::vector<const ROMol *> *patterns){
unsigned int nAtoms=mol.getNumAtoms();
PRECONDITION(invars.size()>=nAtoms,"vector too small");
std::vector<const ROMol *> featureMatchers;
if(!patterns){
if(defaultFeatureMatchers.size()==0){
defaultFeatureMatchers.reserve(defaultFeatureSmarts.size());
for(std::vector<std::string>::const_iterator smaIt=defaultFeatureSmarts.begin();
smaIt!=defaultFeatureSmarts.end();++smaIt){
ROMol *matcher=static_cast<ROMol *>(SmartsToMol(*smaIt));
CHECK_INVARIANT(matcher,"bad smarts");
defaultFeatureMatchers.push_back(ROMOL_SPTR(matcher));
}
featureMatchers.reserve(defaultFeatureSmarts.size());
for(std::vector<std::string>::const_iterator smaIt=defaultFeatureSmarts.begin();
smaIt!=defaultFeatureSmarts.end();++smaIt){
const ROMol *matcher=pattern_flyweight(*smaIt).get().getMatcher();
CHECK_INVARIANT(matcher,"bad smarts");
featureMatchers.push_back(matcher);
}
patterns=&defaultFeatureMatchers;
patterns=&featureMatchers;
}
std::fill(invars.begin(),invars.end(),0);
for(unsigned int i=0;i<patterns->size();++i){
unsigned int mask=1<<i;
std::vector<MatchVectType> matchVect;
SubstructMatch(mol,*(*patterns)[i],matchVect);
SubstructMatch(mol,ROMol(*(*patterns)[i],true),matchVect);
for(std::vector<MatchVectType>::const_iterator mvIt=matchVect.begin();
mvIt!=matchVect.end();++mvIt){
for(MatchVectType::const_iterator mIt=mvIt->begin();

View File

@@ -49,7 +49,6 @@ namespace RDKit {
class ROMol;
namespace MorganFingerprints {
extern std::vector<std::string> defaultFeatureSmarts;
extern std::vector<ROMOL_SPTR> defaultFeatureMatchers;
typedef std::map<boost::uint32_t,std::vector<std::pair<boost::uint32_t,boost::uint32_t> > > BitInfoMap;
@@ -168,7 +167,7 @@ namespace RDKit {
*/
void getFeatureInvariants(const ROMol &mol,
std::vector<boost::uint32_t> &invars,
std::vector<ROMOL_SPTR> *patterns=0);
std::vector<const ROMol *> *patterns=0);
const std::string morganFeatureInvariantVersion="0.1.0";
} // end of namespace MorganFingerprints

View File

@@ -1000,12 +1000,12 @@ void test4MorganFPs(){
TEST_ASSERT(mol);
std::vector<boost::uint32_t> invars(mol->getNumAtoms());
std::vector<ROMOL_SPTR> patterns(2);
std::vector<const ROMol *> patterns(2);
RWMol *p;
p=SmartsToMol("[A]");
patterns[0]=ROMOL_SPTR(static_cast<ROMol *>(p));
patterns[0]=static_cast<const ROMol *>(p);
p=SmartsToMol("[a]");
patterns[1]=ROMOL_SPTR(static_cast<ROMol *>(p));
patterns[1]=static_cast<const ROMol *>(p);
MorganFingerprints::getFeatureInvariants(*mol,invars,&patterns);
TEST_ASSERT(invars[0]!=0);

View File

@@ -97,7 +97,7 @@ namespace RDKit {
PRECONDITION(charges.size()>=mol.getNumAtoms(),"bad array size");
PeriodicTable *table = PeriodicTable::getTable();
GasteigerParams *params = GasteigerParams::getParams();
const GasteigerParams *params = GasteigerParams::getParams();
double damp = DAMP;
int natms = mol.getNumAtoms();

View File

@@ -11,12 +11,15 @@
#include <boost/tokenizer.hpp>
typedef boost::tokenizer<boost::char_separator<char> > tokenizer;
#include "GasteigerParams.h"
#include <boost/flyweight.hpp>
#include <boost/flyweight/key_value.hpp>
#include <boost/flyweight/no_tracking.hpp>
namespace RDKit {
/*! \brief Gasteiger partial charge parameters
*/
std::string paramData =
std::string defaultParamData =
"H * 7.17 6.24 -0.56 \n \
C sp3 7.98 9.18 1.88 \n \
C sp2 8.79 9.32 1.51 \n \
@@ -38,11 +41,13 @@ P sp3 8.90 8.24 0.96 \n \
X * 0.00 0.00 0.00 \n \
";
class GasteigerParams *GasteigerParams::ds_instance = 0;
typedef boost::flyweight<boost::flyweights::key_value<std::string,GasteigerParams>,
boost::flyweights::no_tracking > gparam_flyweight;
GasteigerParams::GasteigerParams() {
GasteigerParams::GasteigerParams(std::string paramData) {
boost::char_separator<char> eolSep("\n");
boost::char_separator<char> spaceSep(" \t");
if(paramData=="") paramData=defaultParamData;
tokenizer lines(paramData,eolSep);
d_paramMap.clear();
for(tokenizer::iterator lineIter=lines.begin();
@@ -73,11 +78,9 @@ X * 0.00 0.00 0.00 \n \
}
}
GasteigerParams *GasteigerParams::getParams() {
if ( ds_instance == 0 ) {
ds_instance = new GasteigerParams();
}
return ds_instance;
const GasteigerParams *GasteigerParams::getParams(const std::string &paramData) {
const GasteigerParams *res = &(gparam_flyweight(paramData).get());
return res;
}
}

View File

@@ -35,16 +35,18 @@ namespace RDKit {
public:
static GasteigerParams *getParams();
static const GasteigerParams *getParams(const std::string &paramData="");
~GasteigerParams() {
d_paramMap.clear();
}
DOUBLE_VECT getParams(std::string elem, std::string mode,bool throwOnFailure=false) {
DOUBLE_VECT getParams(std::string elem, std::string mode,bool throwOnFailure=false) const {
std::pair<std::string, std::string> query(elem, mode);
if (d_paramMap.find(query) != d_paramMap.end()) {
return d_paramMap[query];
std::map<std::pair<std::string, std::string>, DOUBLE_VECT>::const_iterator iter;
iter=d_paramMap.find(query);
if (iter != d_paramMap.end()) {
return iter->second;
}
else {
if(throwOnFailure){
@@ -54,13 +56,19 @@ namespace RDKit {
message += mode;
throw message.c_str();
} else {
return d_paramMap[std::make_pair<std::string,std::string>("X","*")];
iter=d_paramMap.find(std::make_pair<std::string,std::string>("X","*"));
if (iter != d_paramMap.end()) {
return iter->second;
} else {
std::string message = "ERROR: Default Gasteiger Partial Charge parameters are missing";
throw message.c_str();
}
}
}
}
GasteigerParams(std::string paramData="");
private:
GasteigerParams();
std::map<std::pair<std::string, std::string>, DOUBLE_VECT> d_paramMap;
static class GasteigerParams *ds_instance;