first pass, using google style

This commit is contained in:
Greg Landrum
2015-11-14 14:58:11 +01:00
parent 80bb809b31
commit e08e0d16d8
619 changed files with 138877 additions and 133381 deletions

65
.clang-format Normal file
View File

@@ -0,0 +1,65 @@
---
Language: Cpp
# BasedOnStyle: Google
AccessModifierOffset: -1
AlignAfterOpenBracket: true
AlignEscapedNewlinesLeft: true
AlignOperands: true
AlignTrailingComments: true
AllowAllParametersOfDeclarationOnNextLine: true
AllowShortBlocksOnASingleLine: false
AllowShortCaseLabelsOnASingleLine: false
AllowShortIfStatementsOnASingleLine: true
AllowShortLoopsOnASingleLine: true
AllowShortFunctionsOnASingleLine: All
AlwaysBreakAfterDefinitionReturnType: false
AlwaysBreakTemplateDeclarations: true
AlwaysBreakBeforeMultilineStrings: true
BreakBeforeBinaryOperators: None
BreakBeforeTernaryOperators: true
BreakConstructorInitializersBeforeComma: false
BinPackParameters: true
BinPackArguments: true
ColumnLimit: 80
ConstructorInitializerAllOnOneLineOrOnePerLine: true
ConstructorInitializerIndentWidth: 4
DerivePointerAlignment: true
ExperimentalAutoDetectBinPacking: false
IndentCaseLabels: true
IndentWrappedFunctionNames: false
IndentFunctionDeclarationAfterType: false
MaxEmptyLinesToKeep: 1
KeepEmptyLinesAtTheStartOfBlocks: false
NamespaceIndentation: None
ObjCBlockIndentWidth: 2
ObjCSpaceAfterProperty: false
ObjCSpaceBeforeProtocolList: false
PenaltyBreakBeforeFirstCallParameter: 1
PenaltyBreakComment: 300
PenaltyBreakString: 1000
PenaltyBreakFirstLessLess: 120
PenaltyExcessCharacter: 1000000
PenaltyReturnTypeOnItsOwnLine: 200
PointerAlignment: Left
SpacesBeforeTrailingComments: 2
Cpp11BracedListStyle: true
Standard: Auto
IndentWidth: 2
TabWidth: 8
UseTab: Never
BreakBeforeBraces: Attach
SpacesInParentheses: false
SpacesInSquareBrackets: false
SpacesInAngles: false
SpaceInEmptyParentheses: false
SpacesInCStyleCastParentheses: false
SpaceAfterCStyleCast: false
SpacesInContainerLiterals: true
SpaceBeforeAssignmentOperators: true
ContinuationIndentWidth: 4
CommentPragmas: '^ IWYU pragma:'
ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ]
SpaceBeforeParens: ControlStatements
DisableFormat: false
...

View File

@@ -12,8 +12,4 @@
#include "Catalog.h"
namespace RDCatalog {
}
namespace RDCatalog {}

View File

@@ -23,462 +23,455 @@
#endif
#include <RDGeneral/BoostEndInclude.h>
// for some typedefs
#include <RDGeneral/types.h>
#include <RDGeneral/StreamOps.h>
namespace RDCatalog {
const int versionMajor=1;
const int versionMinor=0;
const int versionPatch=0;
const int endianId=0xDEADBEEF;
//-----------------------------------------------------------------------------
//! abstract base class for a catalog object
template <class entryType, class paramType>
class Catalog {
public:
typedef entryType entryType_t;
typedef paramType paramType_t;
//------------------------------------
Catalog() : d_fpLength(0), dp_cParams(0) {};
const int versionMajor = 1;
const int versionMinor = 0;
const int versionPatch = 0;
const int endianId = 0xDEADBEEF;
//------------------------------------
virtual ~Catalog(){
delete dp_cParams;
}
//------------------------------------
//! return a serialized form of the Catalog as an std::string
virtual std::string Serialize() const = 0;
//------------------------------------
//! adds an entry to the catalog
/*!
//-----------------------------------------------------------------------------
//! abstract base class for a catalog object
template <class entryType, class paramType>
class Catalog {
public:
typedef entryType entryType_t;
typedef paramType paramType_t;
\param entry the entry to be added
\param updateFPLength (optional) if this is true, our internal
fingerprint length will also be updated.
*/
virtual unsigned int addEntry(entryType *entry, bool updateFPLength = true) = 0;
//------------------------------------
//! returns a particular entry in the Catalog
virtual const entryType* getEntryWithIdx(unsigned int idx) const = 0;
//------------------------------------
//! returns the number of entries
virtual unsigned int getNumEntries() const = 0;
//------------------------------------
//! returns the length of our fingerprint
unsigned int getFPLength() const {return d_fpLength;}
//------------------------------------
//! sets our fingerprint length
void setFPLength(unsigned int val) {d_fpLength = val;}
//------------------------------------
//! sets our parameters by copying the \c params argument
virtual void setCatalogParams(paramType *params) {
PRECONDITION(params,"bad parameter object");
//if we already have a paramter object throw an exception
PRECONDITION(!dp_cParams,"A parameter object already exists on the catalog" );
/*
if (dp_cParams) {
// we already have parameter object on the catalog
// can't overwrite it
PRECONDITION(0, "A parameter object already exist on the catalog");
}*/
dp_cParams = new paramType(*params);
}
//------------------------------------
//! returns a pointer to our parameters
const paramType *getCatalogParams() const { return dp_cParams;}
//------------------------------------
Catalog() : d_fpLength(0), dp_cParams(0){};
protected:
// this is the ID that will be assigned to the next entry
// added to the catalog - need not be same as the number of entries
// in the catalog and does not correspond with the
// id of the entry in the catalog.
// this is more along the lines of bitId
unsigned int d_fpLength; //!< the length of our fingerprint
paramType *dp_cParams; //!< our params object
//------------------------------------
virtual ~Catalog() { delete dp_cParams; }
};
//------------------------------------
//! return a serialized form of the Catalog as an std::string
virtual std::string Serialize() const = 0;
//-----------------------------------------------------------------------------
//! A Catalog with a hierarchical structure
//------------------------------------
//! adds an entry to the catalog
/*!
The entries of a HierarchCatalog are arranged in a directed graph
<b>The difference between <i>Indices</i> and <i>Bit Ids</i></b>
A HierarchCatalog may contain more entries than the user is actually
interested in. For example a HierarchCatalog constructed to contain
orders 5 through 8 may well contain information about orders 1-5,
in order to facilitate some search optimizations.
- <i>Bit Ids</i> refer to the "interesting" bits.
So, in the above example, Bit Id \c 0 will be the first entry
with order 5.
- <i>Indices</i> refer to the underlying structure of the catalog.
So, in the above example, the entry with index \c 0 will be
the first entry with order 1.
\param entry the entry to be added
\param updateFPLength (optional) if this is true, our internal
fingerprint length will also be updated.
*/
template <class entryType, class paramType, class orderType>
class HierarchCatalog : public Catalog <entryType, paramType> {
// the entries in the catalog can be traversed using the edges
// in a desired order
public:
//! used by the BGL to set up the node properties in our graph
struct vertex_entry_t {
enum { num=1003 };
typedef boost::vertex_property_tag kind;
};
typedef boost::property<vertex_entry_t, entryType *> EntryProperty;
//! the type of the graph itself:
typedef boost::adjacency_list<boost::vecS,
boost::vecS, // FIX: should be using setS for edges so that parallel edges are never added (page 225 BGL book)
virtual unsigned int addEntry(entryType *entry,
bool updateFPLength = true) = 0;
//------------------------------------
//! returns a particular entry in the Catalog
virtual const entryType *getEntryWithIdx(unsigned int idx) const = 0;
//------------------------------------
//! returns the number of entries
virtual unsigned int getNumEntries() const = 0;
//------------------------------------
//! returns the length of our fingerprint
unsigned int getFPLength() const { return d_fpLength; }
//------------------------------------
//! sets our fingerprint length
void setFPLength(unsigned int val) { d_fpLength = val; }
//------------------------------------
//! sets our parameters by copying the \c params argument
virtual void setCatalogParams(paramType *params) {
PRECONDITION(params, "bad parameter object");
// if we already have a paramter object throw an exception
PRECONDITION(!dp_cParams,
"A parameter object already exists on the catalog");
/*
if (dp_cParams) {
// we already have parameter object on the catalog
// can't overwrite it
PRECONDITION(0, "A parameter object already exist on the catalog");
}*/
dp_cParams = new paramType(*params);
}
//------------------------------------
//! returns a pointer to our parameters
const paramType *getCatalogParams() const { return dp_cParams; }
protected:
// this is the ID that will be assigned to the next entry
// added to the catalog - need not be same as the number of entries
// in the catalog and does not correspond with the
// id of the entry in the catalog.
// this is more along the lines of bitId
unsigned int d_fpLength; //!< the length of our fingerprint
paramType *dp_cParams; //!< our params object
};
//-----------------------------------------------------------------------------
//! A Catalog with a hierarchical structure
/*!
The entries of a HierarchCatalog are arranged in a directed graph
<b>The difference between <i>Indices</i> and <i>Bit Ids</i></b>
A HierarchCatalog may contain more entries than the user is actually
interested in. For example a HierarchCatalog constructed to contain
orders 5 through 8 may well contain information about orders 1-5,
in order to facilitate some search optimizations.
- <i>Bit Ids</i> refer to the "interesting" bits.
So, in the above example, Bit Id \c 0 will be the first entry
with order 5.
- <i>Indices</i> refer to the underlying structure of the catalog.
So, in the above example, the entry with index \c 0 will be
the first entry with order 1.
*/
template <class entryType, class paramType, class orderType>
class HierarchCatalog : public Catalog<entryType, paramType> {
// the entries in the catalog can be traversed using the edges
// in a desired order
public:
//! used by the BGL to set up the node properties in our graph
struct vertex_entry_t {
enum { num = 1003 };
typedef boost::vertex_property_tag kind;
};
typedef boost::property<vertex_entry_t, entryType *> EntryProperty;
//! the type of the graph itself:
typedef boost::adjacency_list<
boost::vecS,
boost::vecS, // FIX: should be using setS for edges so that parallel
// edges are never added (page 225 BGL book)
// but that seems result in compile errors
boost::bidirectionalS,
EntryProperty> CatalogGraph;
typedef boost::graph_traits<CatalogGraph> CAT_GRAPH_TRAITS;
typedef typename CAT_GRAPH_TRAITS::vertex_iterator VER_ITER;
typedef std::pair<VER_ITER, VER_ITER> ENT_ITER_PAIR;
typedef typename CAT_GRAPH_TRAITS::adjacency_iterator DOWN_ENT_ITER;
typedef std::pair<DOWN_ENT_ITER, DOWN_ENT_ITER> DOWN_ENT_ITER_PAIR;
//------------------------------------
HierarchCatalog<entryType, paramType, orderType>() {};
//------------------------------------
//! Construct by making a copy of the input \c params object
HierarchCatalog<entryType, paramType, orderType>(paramType *params) : Catalog<entryType,paramType>() {
this->setCatalogParams(params);
boost::bidirectionalS, EntryProperty> CatalogGraph;
typedef boost::graph_traits<CatalogGraph> CAT_GRAPH_TRAITS;
typedef typename CAT_GRAPH_TRAITS::vertex_iterator VER_ITER;
typedef std::pair<VER_ITER, VER_ITER> ENT_ITER_PAIR;
typedef typename CAT_GRAPH_TRAITS::adjacency_iterator DOWN_ENT_ITER;
typedef std::pair<DOWN_ENT_ITER, DOWN_ENT_ITER> DOWN_ENT_ITER_PAIR;
//------------------------------------
HierarchCatalog<entryType, paramType, orderType>(){};
//------------------------------------
//! Construct by making a copy of the input \c params object
HierarchCatalog<entryType, paramType, orderType>(paramType *params)
: Catalog<entryType, paramType>() {
this->setCatalogParams(params);
}
//------------------------------------
//! Construct from a \c pickle (a serialized form of the HierarchCatalog)
HierarchCatalog<entryType, paramType, orderType>(const std::string &pickle) {
this->initFromString(pickle);
}
//------------------------------------
~HierarchCatalog() { destroy(); }
//------------------------------------
//! serializes this object to a stream
void toStream(std::ostream &ss) const {
PRECONDITION(this->getCatalogParams(), "NULL parameter object");
// the i/o header:
RDKit::streamWrite(ss, endianId);
RDKit::streamWrite(ss, versionMajor);
RDKit::streamWrite(ss, versionMinor);
RDKit::streamWrite(ss, versionPatch);
// information about the catalog itself:
int tmpUInt;
tmpUInt = this->getFPLength();
RDKit::streamWrite(ss, tmpUInt);
tmpUInt = this->getNumEntries();
RDKit::streamWrite(ss, tmpUInt);
// std::cout << ">>>>-------------------------------" << std::endl;
// std::cout << "\tlength: " << getFPLength() << " " << getNumEntries() <<
// std::endl;
// add the params object:
this->getCatalogParams()->toStream(ss);
// std::cout << "\tparams: " << getCatalogParams()->getLowerFragLength();
// std::cout << " " << getCatalogParams()->getUpperFragLength();
// std::cout << " " << getCatalogParams()->getNumFuncGroups();
// std::cout << std::endl;
// write the entries in order:
for (unsigned int i = 0; i < getNumEntries(); i++) {
this->getEntryWithIdx(i)->toStream(ss);
}
//------------------------------------
//! Construct from a \c pickle (a serialized form of the HierarchCatalog)
HierarchCatalog<entryType, paramType, orderType>(const std::string &pickle) {
this->initFromString(pickle);
}
//------------------------------------
~HierarchCatalog() {
destroy();
}
//------------------------------------
//! serializes this object to a stream
void toStream(std::ostream &ss) const {
PRECONDITION(this->getCatalogParams(),"NULL parameter object");
// the i/o header:
RDKit::streamWrite(ss,endianId);
RDKit::streamWrite(ss,versionMajor);
RDKit::streamWrite(ss,versionMinor);
RDKit::streamWrite(ss,versionPatch);
// information about the catalog itself:
int tmpUInt;
tmpUInt = this->getFPLength();
RDKit::streamWrite(ss,tmpUInt);
tmpUInt = this->getNumEntries();
RDKit::streamWrite(ss,tmpUInt);
//std::cout << ">>>>-------------------------------" << std::endl;
//std::cout << "\tlength: " << getFPLength() << " " << getNumEntries() << std::endl;
// add the params object:
this->getCatalogParams()->toStream(ss);
//std::cout << "\tparams: " << getCatalogParams()->getLowerFragLength();
//std::cout << " " << getCatalogParams()->getUpperFragLength();
//std::cout << " " << getCatalogParams()->getNumFuncGroups();
//std::cout << std::endl;
// write the entries in order:
for(unsigned int i=0;i<getNumEntries();i++){
this->getEntryWithIdx(i)->toStream(ss);
}
// finally the adjacency list:
for(unsigned int i=0;i<getNumEntries();i++){
RDKit::INT_VECT children=this->getDownEntryList(i);
tmpUInt = children.size();
RDKit::streamWrite(ss,tmpUInt);
for(RDKit::INT_VECT::const_iterator ivci=children.begin();
ivci!=children.end();
ivci++){
RDKit::streamWrite(ss,*ivci);
}
// finally the adjacency list:
for (unsigned int i = 0; i < getNumEntries(); i++) {
RDKit::INT_VECT children = this->getDownEntryList(i);
tmpUInt = children.size();
RDKit::streamWrite(ss, tmpUInt);
for (RDKit::INT_VECT::const_iterator ivci = children.begin();
ivci != children.end(); ivci++) {
RDKit::streamWrite(ss, *ivci);
}
}
}
//------------------------------------
//! serializes this object and returns the resulting \c pickle
std::string Serialize() const {
std::stringstream ss(std::ios_base::binary|std::ios_base::out|std::ios_base::in);
this->toStream(ss);
return ss.str();
//------------------------------------
//! serializes this object and returns the resulting \c pickle
std::string Serialize() const {
std::stringstream ss(std::ios_base::binary | std::ios_base::out |
std::ios_base::in);
this->toStream(ss);
return ss.str();
}
//------------------------------------
//! fills the contents of this object from a stream containing a \c pickle
void initFromStream(std::istream &ss) {
int tmpInt;
// FIX: at the moment we ignore the header info:
RDKit::streamRead(ss, tmpInt);
RDKit::streamRead(ss, tmpInt);
RDKit::streamRead(ss, tmpInt);
RDKit::streamRead(ss, tmpInt);
unsigned int tmpUInt;
RDKit::streamRead(ss, tmpUInt); // fp length
this->setFPLength(tmpUInt);
unsigned int numEntries;
RDKit::streamRead(ss, numEntries);
// std::cout << "<<<-------------------------------" << std::endl;
// std::cout << "\tlength: " << getFPLength() << " " << numEntries <<
// std::endl;
// grab the params:
paramType *params = new paramType();
params->initFromStream(ss);
this->setCatalogParams(params);
// std::cout << "\tparams: " << getCatalogParams()->getLowerFragLength();
// std::cout << " " << getCatalogParams()->getUpperFragLength();
// std::cout << " " << getCatalogParams()->getNumFuncGroups();
// std::cout << std::endl;
// now all of the entries:
for (unsigned int i = 0; i < numEntries; i++) {
entryType *entry = new entryType();
entry->initFromStream(ss);
this->addEntry(entry, false);
}
//------------------------------------
//! fills the contents of this object from a stream containing a \c pickle
void initFromStream(std::istream &ss) {
int tmpInt;
// FIX: at the moment we ignore the header info:
RDKit::streamRead(ss,tmpInt);
RDKit::streamRead(ss,tmpInt);
RDKit::streamRead(ss,tmpInt);
RDKit::streamRead(ss,tmpInt);
unsigned int tmpUInt;
RDKit::streamRead(ss,tmpUInt);// fp length
this->setFPLength(tmpUInt);
unsigned int numEntries;
RDKit::streamRead(ss,numEntries);
//std::cout << "<<<-------------------------------" << std::endl;
//std::cout << "\tlength: " << getFPLength() << " " << numEntries << std::endl;
// grab the params:
paramType *params = new paramType();
params->initFromStream(ss);
this->setCatalogParams(params);
//std::cout << "\tparams: " << getCatalogParams()->getLowerFragLength();
//std::cout << " " << getCatalogParams()->getUpperFragLength();
//std::cout << " " << getCatalogParams()->getNumFuncGroups();
//std::cout << std::endl;
// now all of the entries:
for(unsigned int i=0;i<numEntries;i++){
entryType *entry = new entryType();
entry->initFromStream(ss);
this->addEntry(entry,false);
}
// and, finally, the adjacency list:
for(unsigned int i=0;i<numEntries;i++){
unsigned int nNeighbors;
RDKit::streamRead(ss,nNeighbors);
for(unsigned int j=0;j<nNeighbors;j++){
RDKit::streamRead(ss,tmpInt);
this->addEdge(i,tmpInt);
}
// and, finally, the adjacency list:
for (unsigned int i = 0; i < numEntries; i++) {
unsigned int nNeighbors;
RDKit::streamRead(ss, nNeighbors);
for (unsigned int j = 0; j < nNeighbors; j++) {
RDKit::streamRead(ss, tmpInt);
this->addEdge(i, tmpInt);
}
}
//------------------------------------
unsigned int getNumEntries() const {
return boost::num_vertices(d_graph);
}
}
//------------------------------------
//! fills the contents of this object from a string containing a \c pickle
void initFromString(const std::string &text){
std::stringstream ss(std::ios_base::binary|std::ios_base::out|std::ios_base::in);
// initialize the stream:
ss.write(text.c_str(),text.length());
// now start reading out values:
this->initFromStream(ss);
}
//------------------------------------
unsigned int getNumEntries() const { return boost::num_vertices(d_graph); }
//------------------------------------
//! add a new entry to the catalog
/*!
//------------------------------------
//! fills the contents of this object from a string containing a \c pickle
void initFromString(const std::string &text) {
std::stringstream ss(std::ios_base::binary | std::ios_base::out |
std::ios_base::in);
// initialize the stream:
ss.write(text.c_str(), text.length());
// now start reading out values:
this->initFromStream(ss);
}
\param entry the entry to be added
\param updateFPLength (optional) if this is true, our internal
fingerprint length will also be updated.
*/
unsigned int addEntry(entryType *entry, bool updateFPLength = true){
PRECONDITION(entry,"bad arguments");
if (updateFPLength) {
unsigned int fpl = this->getFPLength();
entry->setBitId(fpl);
fpl++;
this->setFPLength(fpl);
}
unsigned int eid = boost::add_vertex(EntryProperty(entry), d_graph);
orderType etype = entry->getOrder();
// REVIEW: this initialization is not required: the STL map, in
// theory, will create a new object when operator[] is called
// for a new item
if (d_orderMap.find(etype) == d_orderMap.end()) {
RDKit::INT_VECT nets;
d_orderMap[etype] = nets;
}
d_orderMap[etype].push_back(eid);
return eid;
}
//------------------------------------
//! adds an edge between two entries in the catalog
/*!
Since we are using a bidirectional graph - the order in
which the ids are supplied here makes a difference
\param id1 index of the edge's beginning
\param id2 index of the edge's end
*/
void addEdge(unsigned int id1, unsigned int id2) {
unsigned int nents = getNumEntries();
URANGE_CHECK(id1, nents-1);
URANGE_CHECK(id2, nents-1);
// FIX: if we boost::setS for the edgeList BGL will
// do the checking for duplicity (parallel edges)
// But for reasons unknown setS results in compile
// errors while using adjacent_vertices.
typename CAT_GRAPH_TRAITS::edge_descriptor edge;
bool found;
boost::tie(edge,found) = boost::edge(boost::vertex(id1,d_graph),
boost::vertex(id2,d_graph),
d_graph);
if (!found) {
boost::add_edge(id1, id2, d_graph);
}
}
//------------------------------------
//! returns a pointer to our entry with a particular index
const entryType *getEntryWithIdx(unsigned int idx) const {
URANGE_CHECK(idx,getNumEntries()-1);
int vd = boost::vertex(idx, d_graph);
typename boost::property_map < CatalogGraph, vertex_entry_t>::const_type
pMap = boost::get(vertex_entry_t(), d_graph);
return pMap[vd];
}
//------------------------------------
//! returns a pointer to our entry with a particular bit ID
const entryType *getEntryWithBitId(unsigned int idx) const {
URANGE_CHECK(idx,this->getFPLength()-1);
typename boost::property_map < CatalogGraph, vertex_entry_t>::const_type
pMap = boost::get(vertex_entry_t(), d_graph);
const entryType *res=NULL;
for(unsigned int i=idx;i<this->getNumEntries();i++){
const entryType *e=pMap[i];
if(e->getBitId()==static_cast<int>(idx)){
res=e;
break;
}
}
return res;
}
//------------------------------------
//! returns the index of the entry with a particular bit ID
int getIdOfEntryWithBitId(unsigned int idx) const {
URANGE_CHECK(idx,this->getFPLength()-1);
typename boost::property_map < CatalogGraph, vertex_entry_t>::const_type
pMap = boost::get(vertex_entry_t(), d_graph);
int res=-1;
for(unsigned int i=idx;i<this->getNumEntries();i++){
const entryType *e=pMap[i];
if(static_cast<unsigned int>(e->getBitId())==idx){
res=i;
break;
}
}
return res;
}
//------------------------------------
//! returns a list of the indices of entries below the one passed in
RDKit::INT_VECT getDownEntryList(unsigned int idx) const {
RDKit::INT_VECT res;
DOWN_ENT_ITER nbrIdx, endIdx;
boost::tie(nbrIdx, endIdx) = boost::adjacent_vertices(idx, d_graph);
while (nbrIdx != endIdx) {
res.push_back(*nbrIdx);
nbrIdx++;
}
//std::cout << res.size() << "\n";
return res;
}
//------------------------------------
//! returns a list of the indices that have a particular order
const RDKit::INT_VECT &getEntriesOfOrder(orderType ord) {
return d_orderMap[ord];
}
//------------------------------------
//! returns a list of the indices that have a particular order
/*!
\overload
*/
const RDKit::INT_VECT &getEntriesOfOrder(orderType ord) const {
typename std::map<orderType, RDKit::INT_VECT>::const_iterator elem;
elem = d_orderMap.find(ord);
CHECK_INVARIANT(elem!=d_orderMap.end()," catalog does not contain any entries of the order specified");
return elem->second;
}
private:
// graphs that store the entries in the catalog in a hierachical manner
CatalogGraph d_graph;
// a map that maps the order type of entries in the catalog to
// a vector of vertex indices in the graphs above
// e.g. for a catalog with molecular fragments, the order of a fragment can
// simply be the number of bond in it. The list this oder maps to is all the
// vertex ids of these fragment in the catalog that have this many bonds in them
std::map<orderType, RDKit::INT_VECT> d_orderMap;
//------------------------------------
//! clear any memory that we've used
void destroy() {
ENT_ITER_PAIR entItP = boost::vertices(d_graph);
typename boost::property_map < CatalogGraph, vertex_entry_t>::type
pMap = boost::get(vertex_entry_t(), d_graph);
while (entItP.first != entItP.second) {
delete pMap[*(entItP.first++)];
}
}
};
//-----------------------------------------------------------------------------
//! a linear Catalog (analogous to an std::vector)
//------------------------------------
//! add a new entry to the catalog
/*!
Here there is no particular hierarchy, simply a
collection of entries.
\param entry the entry to be added
\param updateFPLength (optional) if this is true, our internal
fingerprint length will also be updated.
*/
template <class entryType, class orderType>
class LinearCatalog : public Catalog <entryType, orderType> {
// here there is no particular hierarchy of entries
// we simply model it as a vector of entries
// FIX: for retrieval purposes a better model map be std::map
unsigned int addEntry(entryType *entry, bool updateFPLength = true) {
PRECONDITION(entry, "bad arguments");
if (updateFPLength) {
unsigned int fpl = this->getFPLength();
entry->setBitId(fpl);
fpl++;
this->setFPLength(fpl);
}
unsigned int eid = boost::add_vertex(EntryProperty(entry), d_graph);
orderType etype = entry->getOrder();
// REVIEW: this initialization is not required: the STL map, in
// theory, will create a new object when operator[] is called
// for a new item
if (d_orderMap.find(etype) == d_orderMap.end()) {
RDKit::INT_VECT nets;
d_orderMap[etype] = nets;
}
d_orderMap[etype].push_back(eid);
return eid;
}
public:
std::string Serialize();
unsigned int addEntry(entryType *entry, bool updateFPLength = true);
const entryType *getEntryWithIdx(unsigned int idx) const;
//------------------------------------
//! adds an edge between two entries in the catalog
/*!
Since we are using a bidirectional graph - the order in
which the ids are supplied here makes a difference
private:
std::vector<entryType*> d_vector;
};
\param id1 index of the edge's beginning
\param id2 index of the edge's end
*/
void addEdge(unsigned int id1, unsigned int id2) {
unsigned int nents = getNumEntries();
URANGE_CHECK(id1, nents - 1);
URANGE_CHECK(id2, nents - 1);
// FIX: if we boost::setS for the edgeList BGL will
// do the checking for duplicity (parallel edges)
// But for reasons unknown setS results in compile
// errors while using adjacent_vertices.
typename CAT_GRAPH_TRAITS::edge_descriptor edge;
bool found;
boost::tie(edge, found) = boost::edge(boost::vertex(id1, d_graph),
boost::vertex(id2, d_graph), d_graph);
if (!found) {
boost::add_edge(id1, id2, d_graph);
}
}
//------------------------------------
//! returns a pointer to our entry with a particular index
const entryType *getEntryWithIdx(unsigned int idx) const {
URANGE_CHECK(idx, getNumEntries() - 1);
int vd = boost::vertex(idx, d_graph);
typename boost::property_map<CatalogGraph, vertex_entry_t>::const_type
pMap = boost::get(vertex_entry_t(), d_graph);
return pMap[vd];
}
//------------------------------------
//! returns a pointer to our entry with a particular bit ID
const entryType *getEntryWithBitId(unsigned int idx) const {
URANGE_CHECK(idx, this->getFPLength() - 1);
typename boost::property_map<CatalogGraph, vertex_entry_t>::const_type
pMap = boost::get(vertex_entry_t(), d_graph);
const entryType *res = NULL;
for (unsigned int i = idx; i < this->getNumEntries(); i++) {
const entryType *e = pMap[i];
if (e->getBitId() == static_cast<int>(idx)) {
res = e;
break;
}
}
return res;
}
//------------------------------------
//! returns the index of the entry with a particular bit ID
int getIdOfEntryWithBitId(unsigned int idx) const {
URANGE_CHECK(idx, this->getFPLength() - 1);
typename boost::property_map<CatalogGraph, vertex_entry_t>::const_type
pMap = boost::get(vertex_entry_t(), d_graph);
int res = -1;
for (unsigned int i = idx; i < this->getNumEntries(); i++) {
const entryType *e = pMap[i];
if (static_cast<unsigned int>(e->getBitId()) == idx) {
res = i;
break;
}
}
return res;
}
//------------------------------------
//! returns a list of the indices of entries below the one passed in
RDKit::INT_VECT getDownEntryList(unsigned int idx) const {
RDKit::INT_VECT res;
DOWN_ENT_ITER nbrIdx, endIdx;
boost::tie(nbrIdx, endIdx) = boost::adjacent_vertices(idx, d_graph);
while (nbrIdx != endIdx) {
res.push_back(*nbrIdx);
nbrIdx++;
}
// std::cout << res.size() << "\n";
return res;
}
//------------------------------------
//! returns a list of the indices that have a particular order
const RDKit::INT_VECT &getEntriesOfOrder(orderType ord) {
return d_orderMap[ord];
}
//------------------------------------
//! returns a list of the indices that have a particular order
/*!
\overload
*/
const RDKit::INT_VECT &getEntriesOfOrder(orderType ord) const {
typename std::map<orderType, RDKit::INT_VECT>::const_iterator elem;
elem = d_orderMap.find(ord);
CHECK_INVARIANT(
elem != d_orderMap.end(),
" catalog does not contain any entries of the order specified");
return elem->second;
}
private:
// graphs that store the entries in the catalog in a hierachical manner
CatalogGraph d_graph;
// a map that maps the order type of entries in the catalog to
// a vector of vertex indices in the graphs above
// e.g. for a catalog with molecular fragments, the order of a fragment can
// simply be the number of bond in it. The list this oder maps to is all the
// vertex ids of these fragment in the catalog that have this many bonds in
// them
std::map<orderType, RDKit::INT_VECT> d_orderMap;
//------------------------------------
//! clear any memory that we've used
void destroy() {
ENT_ITER_PAIR entItP = boost::vertices(d_graph);
typename boost::property_map<CatalogGraph, vertex_entry_t>::type pMap =
boost::get(vertex_entry_t(), d_graph);
while (entItP.first != entItP.second) {
delete pMap[*(entItP.first++)];
}
}
};
//-----------------------------------------------------------------------------
//! a linear Catalog (analogous to an std::vector)
/*!
Here there is no particular hierarchy, simply a
collection of entries.
*/
template <class entryType, class orderType>
class LinearCatalog : public Catalog<entryType, orderType> {
// here there is no particular hierarchy of entries
// we simply model it as a vector of entries
// FIX: for retrieval purposes a better model map be std::map
public:
std::string Serialize();
unsigned int addEntry(entryType *entry, bool updateFPLength = true);
const entryType *getEntryWithIdx(unsigned int idx) const;
private:
std::vector<entryType *> d_vector;
};
}
#endif

View File

@@ -12,6 +12,5 @@
#include "CatalogEntry.h"
namespace RDCatalog {
CatalogEntry::~CatalogEntry() {}
CatalogEntry::~CatalogEntry() {}
}

View File

@@ -14,36 +14,34 @@
#include <string>
namespace RDCatalog {
//! Abstract base class to be used to represent an entry in a Catalog
class CatalogEntry {
public:
virtual ~CatalogEntry() = 0;
//! sets our bit Id
void setBitId(int bid) {d_bitId = bid;};
//! Abstract base class to be used to represent an entry in a Catalog
class CatalogEntry {
public:
virtual ~CatalogEntry() = 0;
//! returns our bit Id
int getBitId() const {return d_bitId;};
//! sets our bit Id
void setBitId(int bid) { d_bitId = bid; };
//! returns a text description of this entry
virtual std::string getDescription() const = 0;
//! returns our bit Id
int getBitId() const { return d_bitId; };
//! serializes (pickles) to a stream
virtual void toStream(std::ostream &ss) const = 0;
//! returns a string with a serialized (pickled) representation
virtual std::string Serialize() const = 0;
//! initializes from a stream pickle
virtual void initFromStream(std::istream &ss) = 0;
//! initializes from a string pickle
virtual void initFromString(const std::string &text) = 0;
//! returns a text description of this entry
virtual std::string getDescription() const = 0;
//! serializes (pickles) to a stream
virtual void toStream(std::ostream &ss) const = 0;
//! returns a string with a serialized (pickled) representation
virtual std::string Serialize() const = 0;
//! initializes from a stream pickle
virtual void initFromStream(std::istream &ss) = 0;
//! initializes from a string pickle
virtual void initFromString(const std::string &text) = 0;
private:
int d_bitId; //!< our bit Id. This needs to be signed so that we can mark uninitialized entries.
};
private:
int d_bitId; //!< our bit Id. This needs to be signed so that we can mark
//uninitialized entries.
};
}
#endif

View File

@@ -12,5 +12,5 @@
#include "CatalogParams.h"
namespace RDCatalog {
CatalogParams::~CatalogParams() {};
CatalogParams::~CatalogParams(){};
}

View File

@@ -13,29 +13,29 @@
#include <string>
namespace RDCatalog {
//! abstract base class for the container used to create a catalog
class CatalogParams {
public:
virtual ~CatalogParams() = 0;
//! abstract base class for the container used to create a catalog
class CatalogParams {
public:
virtual ~CatalogParams() = 0;
//! returns our type string
std::string getTypeStr() const { return d_typeStr; };
//! returns our type string
std::string getTypeStr() const { return d_typeStr; };
//! sets our type string
void setTypeStr(const std::string &typeStr) { d_typeStr=typeStr; };
//! sets our type string
void setTypeStr(const std::string &typeStr) { d_typeStr = typeStr; };
//! serializes (pickles) to a stream
virtual void toStream(std::ostream &) const = 0;
//! returns a string with a serialized (pickled) representation
virtual std::string Serialize() const = 0;
//! initializes from a stream pickle
virtual void initFromStream(std::istream &ss) = 0;
//! initializes from a string pickle
virtual void initFromString(const std::string &text) = 0;
//! serializes (pickles) to a stream
virtual void toStream(std::ostream &) const = 0;
//! returns a string with a serialized (pickled) representation
virtual std::string Serialize() const = 0;
//! initializes from a stream pickle
virtual void initFromStream(std::istream &ss) = 0;
//! initializes from a string pickle
virtual void initFromString(const std::string &text) = 0;
protected:
std::string d_typeStr; //!< our type string
};
protected:
std::string d_typeStr; //!< our type string
};
}
#endif

View File

@@ -12,27 +12,26 @@
#include <Geometry/point.h>
namespace ChemicalFeatures {
//------------------------------------------------------------------
//! abstract base class for chemical feature
class ChemicalFeature {
public:
ChemicalFeature() {};
virtual ~ChemicalFeature() {};
// returns the feature id
virtual int getId() const = 0;
// returns the type of the feature
virtual const std::string& getType() const = 0;
// returns the family of the feature
virtual const std::string& getFamily() const = 0;
// returns the position of the feature
virtual RDGeom::Point3D getPos() const = 0;
};
//------------------------------------------------------------------
//! abstract base class for chemical feature
class ChemicalFeature {
public:
ChemicalFeature(){};
virtual ~ChemicalFeature(){};
// returns the feature id
virtual int getId() const = 0;
// returns the type of the feature
virtual const std::string& getType() const = 0;
// returns the family of the feature
virtual const std::string& getFamily() const = 0;
// returns the position of the feature
virtual RDGeom::Point3D getPos() const = 0;
};
}
#endif

View File

@@ -13,76 +13,73 @@
#include <sstream>
#include <boost/cstdint.hpp>
namespace ChemicalFeatures {
using namespace RDKit;
using boost::int32_t;
using boost::uint32_t;
const int ci_FEAT_VERSION=0x0020; //!< version number to use in pickles
using namespace RDKit;
using boost::int32_t;
using boost::uint32_t;
const int ci_FEAT_VERSION = 0x0020; //!< version number to use in pickles
std::string FreeChemicalFeature::toString() const {
std::stringstream ss(std::ios_base::binary|std::ios_base::out|std::ios_base::in);
uint32_t tInt = ci_FEAT_VERSION;
streamWrite(ss,tInt);
// write the id
streamWrite(ss,d_id);
tInt = d_family.size()+1;
streamWrite(ss,tInt);
ss.write(d_family.c_str(),tInt*sizeof(char));
tInt = d_type.size()+1;
streamWrite(ss,tInt);
ss.write(d_type.c_str(),tInt*sizeof(char));
streamWrite(ss,d_position.x);
streamWrite(ss,d_position.y);
streamWrite(ss,d_position.z);
std::string res(ss.str());
return res;
};
void FreeChemicalFeature::initFromString(const std::string &pickle){
std::stringstream ss(pickle,
std::ios_base::binary|std::ios_base::in|std::ios_base::out);
int version=0;
uint32_t tInt;
streamRead(ss,tInt);
switch(tInt){
std::string FreeChemicalFeature::toString() const {
std::stringstream ss(std::ios_base::binary | std::ios_base::out |
std::ios_base::in);
uint32_t tInt = ci_FEAT_VERSION;
streamWrite(ss, tInt);
// write the id
streamWrite(ss, d_id);
tInt = d_family.size() + 1;
streamWrite(ss, tInt);
ss.write(d_family.c_str(), tInt * sizeof(char));
tInt = d_type.size() + 1;
streamWrite(ss, tInt);
ss.write(d_type.c_str(), tInt * sizeof(char));
streamWrite(ss, d_position.x);
streamWrite(ss, d_position.y);
streamWrite(ss, d_position.z);
std::string res(ss.str());
return res;
};
void FreeChemicalFeature::initFromString(const std::string &pickle) {
std::stringstream ss(
pickle, std::ios_base::binary | std::ios_base::in | std::ios_base::out);
int version = 0;
uint32_t tInt;
streamRead(ss, tInt);
switch (tInt) {
case 0x0010:
version=1;
version = 1;
break;
case 0x0020: // ok, I know this is not 2, in hex its 32 but I just want to
// keep consistent with the above
version=2;
case 0x0020: // ok, I know this is not 2, in hex its 32 but I just want to
// keep consistent with the above
version = 2;
break;
default:
throw("Unknown version type for FreeChemicalFeature");
}
}
// read the id, but only if a later/newish version
if (version == 2) {
streamRead(ss,d_id);
}
char *tmpChr;
// read the id, but only if a later/newish version
if (version == 2) {
streamRead(ss, d_id);
}
streamRead(ss,tInt);
tmpChr = new char[tInt];
ss.read(tmpChr,tInt*sizeof(char));
d_family = tmpChr;
delete [] tmpChr;
char *tmpChr;
streamRead(ss,tInt);
tmpChr = new char[tInt];
ss.read(tmpChr,tInt*sizeof(char));
d_type = tmpChr;
delete [] tmpChr;
streamRead(ss, tInt);
tmpChr = new char[tInt];
ss.read(tmpChr, tInt * sizeof(char));
d_family = tmpChr;
delete[] tmpChr;
streamRead(ss,d_position.x);
streamRead(ss,d_position.y);
streamRead(ss,d_position.z);
streamRead(ss, tInt);
tmpChr = new char[tInt];
ss.read(tmpChr, tInt * sizeof(char));
d_type = tmpChr;
delete[] tmpChr;
};
streamRead(ss, d_position.x);
streamRead(ss, d_position.y);
streamRead(ss, d_position.z);
};
}

View File

@@ -15,92 +15,79 @@
namespace ChemicalFeatures {
//------------------------------------------------------
//! Class for chemical features that do not orignate from molecules
// e.g. pharamcophores, site-maps etc.
class FreeChemicalFeature : public ChemicalFeature {
public:
//! start with everything specified
FreeChemicalFeature(const std::string &family, std::string type,
const RDGeom::Point3D &loc,int id=-1) :
d_id(id), d_family(family), d_type(type), d_position(loc) {
}
//------------------------------------------------------
//! Class for chemical features that do not orignate from molecules
// e.g. pharamcophores, site-maps etc.
class FreeChemicalFeature : public ChemicalFeature {
public:
//! start with everything specified
FreeChemicalFeature(const std::string &family, std::string type,
const RDGeom::Point3D &loc, int id = -1)
: d_id(id), d_family(family), d_type(type), d_position(loc) {}
//! start with family and location specified, leave the type blank
FreeChemicalFeature(const std::string &family, const RDGeom::Point3D &loc) :
d_id(-1), d_family(family), d_type(""), d_position(loc) {
}
//! start with family and location specified, leave the type blank
FreeChemicalFeature(const std::string &family, const RDGeom::Point3D &loc)
: d_id(-1), d_family(family), d_type(""), d_position(loc) {}
//! start with everything blank
FreeChemicalFeature() :
d_id(-1), d_family(""), d_type(""), d_position(RDGeom::Point3D(0.0, 0.0, 0.0)) {
}
//! start with everything blank
FreeChemicalFeature()
: d_id(-1),
d_family(""),
d_type(""),
d_position(RDGeom::Point3D(0.0, 0.0, 0.0)) {}
explicit FreeChemicalFeature(const std::string &pickle) {
this->initFromString(pickle);
}
explicit FreeChemicalFeature(const std::string &pickle) {
this->initFromString(pickle);
}
FreeChemicalFeature(const FreeChemicalFeature &other) :
d_id(other.getId()), d_family(other.getFamily()), d_type(other.getType()), d_position(other.getPos()) {
}
FreeChemicalFeature(const FreeChemicalFeature &other)
: d_id(other.getId()),
d_family(other.getFamily()),
d_type(other.getType()),
d_position(other.getPos()) {}
~FreeChemicalFeature() {}
~FreeChemicalFeature() {}
//! return our id
int getId() const {
return d_id;
}
//! return our id
int getId() const { return d_id; }
//! return our family
const std::string& getFamily() const {
return d_family;
}
//! return our family
const std::string &getFamily() const { return d_family; }
//! return our type
const std::string& getType() const {
return d_type;
}
//! return our type
const std::string &getType() const { return d_type; }
//! return our position
RDGeom::Point3D getPos() const {
return d_position;
}
//! return our position
RDGeom::Point3D getPos() const { return d_position; }
//! set our id
void setId(const int id) {
d_id = id;
}
//! set our id
void setId(const int id) { d_id = id; }
//! set our family
void setFamily(const std::string &family) {
d_family = family;
}
//! set our type
void setType(const std::string &type) {
d_type = type;
}
//! set our family
void setFamily(const std::string &family) { d_family = family; }
//! set our position
void setPos(const RDGeom::Point3D &loc) {
//std::cout << loc.x << " " << loc.y << " " << loc.z << "\n";
d_position = loc;
//std::cout << d_position.x << " " << d_position.y << " " << d_position.z << "\n";
}
//! set our type
void setType(const std::string &type) { d_type = type; }
//! returns a serialized form of the feature (a pickle)
std::string toString() const;
//! initialize from a pickle string
void initFromString(const std::string &pickle);
private:
int d_id;
std::string d_family;
std::string d_type;
RDGeom::Point3D d_position;
};
//! set our position
void setPos(const RDGeom::Point3D &loc) {
// std::cout << loc.x << " " << loc.y << " " << loc.z << "\n";
d_position = loc;
// std::cout << d_position.x << " " << d_position.y << " " << d_position.z
// << "\n";
}
//! returns a serialized form of the feature (a pickle)
std::string toString() const;
//! initialize from a pickle string
void initFromString(const std::string &pickle);
private:
int d_id;
std::string d_family;
std::string d_type;
RDGeom::Point3D d_position;
};
}
#endif

View File

@@ -13,67 +13,59 @@
#include <GraphMol/RDKitBase.h>
#include <RDGeneral/types.h>
#include<RDGeneral/Invariant.h>
#include <RDGeneral/Invariant.h>
#include <RDBoost/PySequenceHolder.h>
#include <ChemicalFeatures/FreeChemicalFeature.h>
namespace ChemicalFeatures {
// support pickling:
struct chemfeat_pickle_suite : python::pickle_suite
{
static python::tuple
getinitargs(const FreeChemicalFeature& self)
{
std::string res=self.toString();
python::object retval = python::object(python::handle<>(PyBytes_FromStringAndSize(res.c_str(),res.length())));
return python::make_tuple(retval);
};
// support pickling:
struct chemfeat_pickle_suite : python::pickle_suite {
static python::tuple getinitargs(const FreeChemicalFeature &self) {
std::string res = self.toString();
python::object retval = python::object(
python::handle<>(PyBytes_FromStringAndSize(res.c_str(), res.length())));
return python::make_tuple(retval);
};
};
std::string featClassDoc="Class to represent a free chemical features.\n\
std::string featClassDoc =
"Class to represent a free chemical features.\n\
These chemical features are not associated with a molecule, though they can be matched \n\
to molecular featufres\n";
struct freefeat_wrapper {
static void wrap() {
python::class_<FreeChemicalFeature>("FreeChemicalFeature", featClassDoc.c_str(),
python::init<const std::string &>())
struct freefeat_wrapper {
static void wrap() {
python::class_<FreeChemicalFeature>("FreeChemicalFeature",
featClassDoc.c_str(),
python::init<const std::string &>())
.def(python::init<>("Default Constructor"))
.def(python::init<std::string, std::string, const RDGeom::Point3D &,int>
((python::arg("family"),
python::arg("type"),
python::arg("loc"),
python::arg("id")=-1),
"Constructor with family, type and location specified"))
.def(python::init<std::string, const RDGeom::Point3D &>
(python::args("family", "loc"),
"constructor with family and location specified, empty type and id"))
.def("SetId", &FreeChemicalFeature::setId,
"Set the id of the feature")
.def(python::init<std::string, std::string, const RDGeom::Point3D &,
int>(
(python::arg("family"), python::arg("type"), python::arg("loc"),
python::arg("id") = -1),
"Constructor with family, type and location specified"))
.def(python::init<std::string, const RDGeom::Point3D &>(
python::args("family", "loc"),
"constructor with family and location specified, empty type and "
"id"))
.def("SetId", &FreeChemicalFeature::setId, "Set the id of the feature")
.def("SetFamily", &FreeChemicalFeature::setFamily,
"Set the family of the feature")
.def("SetType", &FreeChemicalFeature::setType,
"Set the sepcific type for the feature")
.def("GetId", &FreeChemicalFeature::getId,
"Get the id of the feature")
.def("GetId", &FreeChemicalFeature::getId, "Get the id of the feature")
.def("GetFamily", &FreeChemicalFeature::getFamily,
"Get the family of the feature",
python::return_value_policy<python::copy_const_reference>())
.def("GetType", &FreeChemicalFeature::getType,
"Get the sepcific type for the feature",
python::return_value_policy<python::copy_const_reference>())
.def("SetPos", &FreeChemicalFeature::setPos,
"Set the feature position")
.def("SetPos", &FreeChemicalFeature::setPos, "Set the feature position")
.def("GetPos", &FreeChemicalFeature::getPos,
"Get the position of the feature")
.def_pickle(chemfeat_pickle_suite())
;
};
};
}
void wrap_freefeat() {
ChemicalFeatures::freefeat_wrapper::wrap();
.def_pickle(chemfeat_pickle_suite());
};
};
}
void wrap_freefeat() { ChemicalFeatures::freefeat_wrapper::wrap(); }

View File

@@ -15,12 +15,9 @@
void wrap_freefeat();
BOOST_PYTHON_MODULE(rdChemicalFeatures)
{
BOOST_PYTHON_MODULE(rdChemicalFeatures) {
python::scope().attr("__doc__") =
"Module containing free chemical feature functionality\n\
"Module containing free chemical feature functionality\n\
These are feature that are not associated with molecules. They are \n\
are typically derived from pharmacophores and site-maps.\n";

View File

@@ -17,39 +17,35 @@
using namespace ChemicalFeatures;
void test1() {
std::cout << "-----------------------------------------" << std::endl;
std::cout << "Test1" << std::endl;
FreeChemicalFeature f1("foo","bar",RDGeom::Point3D(0,0,0));
TEST_ASSERT(f1.getId()==-1);
TEST_ASSERT(f1.getFamily()=="foo");
TEST_ASSERT(f1.getType()=="bar");
FreeChemicalFeature f1("foo", "bar", RDGeom::Point3D(0, 0, 0));
TEST_ASSERT(f1.getId() == -1);
TEST_ASSERT(f1.getFamily() == "foo");
TEST_ASSERT(f1.getType() == "bar");
FreeChemicalFeature f2("foo","bar",RDGeom::Point3D(0,0,0),123);
TEST_ASSERT(f2.getId()==123);
TEST_ASSERT(f2.getFamily()=="foo");
TEST_ASSERT(f2.getType()=="bar");
FreeChemicalFeature f2("foo", "bar", RDGeom::Point3D(0, 0, 0), 123);
TEST_ASSERT(f2.getId() == 123);
TEST_ASSERT(f2.getFamily() == "foo");
TEST_ASSERT(f2.getType() == "bar");
FreeChemicalFeature f3;
f3.initFromString(f2.toString());
TEST_ASSERT(f3.getId()==123);
TEST_ASSERT(f3.getFamily()=="foo");
TEST_ASSERT(f3.getType()=="bar");
TEST_ASSERT(f3.getId() == 123);
TEST_ASSERT(f3.getFamily() == "foo");
TEST_ASSERT(f3.getType() == "bar");
FreeChemicalFeature f4(f2.toString());
TEST_ASSERT(f4.getId()==123);
TEST_ASSERT(f4.getFamily()=="foo");
TEST_ASSERT(f4.getType()=="bar");
TEST_ASSERT(f4.getId() == 123);
TEST_ASSERT(f4.getFamily() == "foo");
TEST_ASSERT(f4.getType() == "bar");
std::cout << "Done" << std::endl;
}
int main() {
test1();
return 0;
}

View File

@@ -14,37 +14,40 @@
#include <RDGeneral/Invariant.h>
namespace RDDataManip {
//! return the Euclidean distance between two vectors
template <typename T1, typename T2>
double EuclideanDistanceMetric(const T1 &v1, const T2 &v2, unsigned int dim) {
double dist = 0.0;
for (unsigned int i = 0; i < dim; i++) {
double diff = static_cast<double>(v1[i]) - static_cast<double>(v2[i]);
dist += (diff*diff);
}
return sqrt(dist);
};
//! return the Euclidean distance between two vectors
template <typename T1, typename T2>
double EuclideanDistanceMetric(const T1 &v1, const T2 &v2, unsigned int dim) {
double dist = 0.0;
for (unsigned int i = 0; i < dim; i++) {
double diff = static_cast<double>(v1[i]) - static_cast<double>(v2[i]);
dist += (diff * diff);
}
return sqrt(dist);
};
// FIX: there's no reason to have this tied to TanimotoSimilarity... could
// include
// a different sim function as a template param
//! return the Tanimoto distance (1-TanimotoSimilarity) between two bit vectors
template <typename T1, typename T2>
double TanimotoDistanceMetric(const T1 &bv1, const T2 &bv2, unsigned int dim) {
// the dim parameter is actually irrelevant here but we have to include it to
// deal with
// template version of setMetricFunc in MetricMatricCalc
RDUNUSED_PARAM(dim);
return (1.0 - SimilarityWrapper(
bv1, bv2,
(double (*)(const T1 &, const T2 &))TanimotoSimilarity));
};
// FIX: there's no reason to have this tied to TanimotoSimilarity... could include
// a different sim function as a template param
//! return the Tanimoto distance (1-TanimotoSimilarity) between two bit vectors
template <typename T1, typename T2>
double TanimotoDistanceMetric(const T1 &bv1, const T2 &bv2, unsigned int dim) {
// the dim parameter is actually irrelevant here but we have to include it to deal with
// template version of setMetricFunc in MetricMatricCalc
RDUNUSED_PARAM(dim);
return (1.0 - SimilarityWrapper(bv1, bv2,(double (*)(const T1&,const T2&))TanimotoSimilarity));
};
//! return the Tanimoto similarity between two bit vectors
template <typename T1, typename T2>
double TanimotoSimilarityMetric(const T1 &bv1, const T2 &bv2, unsigned int dim) {
RDUNUSED_PARAM(dim);
return SimilarityWrapper(bv1,bv2,(double (*)(const T1&,const T2&))TanimotoSimilarity);
};
//! return the Tanimoto similarity between two bit vectors
template <typename T1, typename T2>
double TanimotoSimilarityMetric(const T1 &bv1, const T2 &bv2,
unsigned int dim) {
RDUNUSED_PARAM(dim);
return SimilarityWrapper(
bv1, bv2, (double (*)(const T1 &, const T2 &))TanimotoSimilarity);
};
}
#endif

View File

@@ -15,84 +15,91 @@
#include <RDGeneral/Invariant.h>
namespace RDDataManip {
/*! \brief A generic metric matrix calculator (e.g similarity matrix or
* distance matrix)
*
* This templated class needs some explanation
* vectType is a container that can support [] operator
* entryType is the type of entry that is returned by the [] operator
* Examples of the container include PySequenceHolder which is wrapper around
* a python sequence objects like lists and tuples.
* Examples of the entryType include a sequence of double, floats, and ExplicitBitVects
/*! \brief A generic metric matrix calculator (e.g similarity matrix or
* distance matrix)
*
* This templated class needs some explanation
* vectType is a container that can support [] operator
* entryType is the type of entry that is returned by the [] operator
* Examples of the container include PySequenceHolder which is wrapper around
* a python sequence objects like lists and tuples.
* Examples of the entryType include a sequence of double, floats, and
*ExplicitBitVects
*
*/
template <class vectType, class entryType>
class MetricMatrixCalc {
public:
/*! \brief Default Constructor
*
*/
template <class vectType, class entryType> class MetricMatrixCalc {
public:
/*! \brief Default Constructor
*
*/
MetricMatrixCalc() {};
/*! \brief Set the metric function
*
* Set the pointer to the mertic funvtion to be used by the metric calculator
*
* ARGUMENTS:
*
* mFunc - pointer to the metric funtion
*/
void setMetricFunc(double (*mFunc)(const entryType &, const entryType &, unsigned int)) {
dp_metricFunc = mFunc;
}
MetricMatrixCalc(){};
/*! \brief The calculator function
*
* ARGUMENTS:
*
* descrips - vectType container with a entryType for each item
* nItems - the number of item in the descripts.
* In several cases this argument is irrelvant since vectType probably supports
* a size() member function, But we would like this interface to take for example
* a double** and correctly parse the row and columns.
* dim - the dimension of the sequences
* distMat - pointer to an array to write the distance matrix to
* it is assumed that the right sized array has already be allocated.
*
* FIX: we can probably make this function create the correct sized distMat and return
* it to the caller, but when pushing he result out to a python array not sure how to
* avoid copy the entire distance matrix in that case
*
* RETURNS:
*
* pointer to a 1D array of doubles. Only the lower triangle elements are
* included in the array
*/
void calcMetricMatrix(const vectType &descripts, unsigned int nItems, unsigned int dim,
double *distMat) {
CHECK_INVARIANT(distMat, "invalid pointer to a distance matix");
for (unsigned int i = 1; i < nItems; i++) {
unsigned int itab = i*(i-1)/2;
for (unsigned int j = 0; j < i; j++) {
distMat[itab+j] = dp_metricFunc(descripts[i], descripts[j], dim);
}
/*! \brief Set the metric function
*
* Set the pointer to the mertic funvtion to be used by the metric calculator
*
* ARGUMENTS:
*
* mFunc - pointer to the metric funtion
*/
void setMetricFunc(double (*mFunc)(const entryType &, const entryType &,
unsigned int)) {
dp_metricFunc = mFunc;
}
/*! \brief The calculator function
*
* ARGUMENTS:
*
* descrips - vectType container with a entryType for each item
* nItems - the number of item in the descripts.
* In several cases this argument is irrelvant since vectType
*probably supports
* a size() member function, But we would like this interface to
*take for example
* a double** and correctly parse the row and columns.
* dim - the dimension of the sequences
* distMat - pointer to an array to write the distance matrix to
* it is assumed that the right sized array has already be
*allocated.
*
* FIX: we can probably make this function create the correct sized distMat
*and return
* it to the caller, but when pushing he result out to a python array not sure
*how to
* avoid copy the entire distance matrix in that case
*
* RETURNS:
*
* pointer to a 1D array of doubles. Only the lower triangle elements are
* included in the array
*/
void calcMetricMatrix(const vectType &descripts, unsigned int nItems,
unsigned int dim, double *distMat) {
CHECK_INVARIANT(distMat, "invalid pointer to a distance matix");
for (unsigned int i = 1; i < nItems; i++) {
unsigned int itab = i * (i - 1) / 2;
for (unsigned int j = 0; j < i; j++) {
distMat[itab + j] = dp_metricFunc(descripts[i], descripts[j], dim);
}
};
private:
// pointer to the metric function
/*! \brief pointer to the metric function
*
* In several cases the last argument 'dim' should be irrelevant,
* For example when entryType is a bit vector the size is of the vector
* or the dimension can be obtained by asking the bit vector itself. However
* we woul like this interface to support other containers lines double*
* in which case the 'dim' value is useful in cumputing the metric.
*/
double (*dp_metricFunc)(const entryType &, const entryType &, unsigned int);
}
};
private:
// pointer to the metric function
/*! \brief pointer to the metric function
*
* In several cases the last argument 'dim' should be irrelevant,
* For example when entryType is a bit vector the size is of the vector
* or the dimension can be obtained by asking the bit vector itself. However
* we woul like this interface to support other containers lines double*
* in which case the 'dim' value is useful in cumputing the metric.
*/
double (*dp_metricFunc)(const entryType &, const entryType &, unsigned int);
};
};
#endif

View File

@@ -30,223 +30,245 @@ void wrap_MMcalc();
namespace python = boost::python;
namespace RDDataManip {
PyObject *getEuclideanDistMat(python::object descripMat) {
// Bit of a pain involved here, we accept three types of PyObjects here
// 1. A Numeric Array
// - first find what 'type' of entry we have (float, double and int is all we recognize for now)
// - then point to contiguous piece of memory from the array that contains the data with a type*
// - then make a new type** pointer so that double index into this contiguous memory will work
// and then pass it along to the distance calculator
// 2. A list of Numeric Vector (or 1D arrays)
// - in this case wrap descripMat with a PySequenceHolder<type*> where type is the
// type of entry in vector (accepted types are int, double and float
// - Then pass the PySequenceHolder to the metrci calculator
// 3. A list (or tuple) of lists (or tuple)
// - In this case other than wrapping descripMat with a PySequenceHolder
// each of the indivual list in there are also wrapped by a PySequenceHolder
// - so the distance calculator is passed in a "PySequenceHolder<PySequenceHolder<double>>"
// - FIX: not that we always convert entry values to double here, even if we passed
// in a list of list of ints (or floats). Given that lists can be heterogeneous, I do not
// know how to ask a list what type of entries if contains.
//
// OK my brain is going to explode now
// first deal with situation where we have an Numeric Array
PyObject *descMatObj = descripMat.ptr();
PyArrayObject *distRes;
if (PyArray_Check(descMatObj)) {
// get the dimensions of the array
int nrows = ((PyArrayObject *)descMatObj)->dimensions[0];
int ncols = ((PyArrayObject *)descMatObj)->dimensions[1];
int i;
CHECK_INVARIANT((nrows > 0) && (ncols > 0), "");
npy_intp dMatLen = nrows*(nrows-1)/2;
// now that we have the dimensions declare the distance matrix which is always a
// 1D double array
distRes = (PyArrayObject *)PyArray_SimpleNew(1, &dMatLen, NPY_DOUBLE);
// grab a pointer to the data in the array so that we can directly put values in there
// and avoid copying :
double *dMat = (double *)distRes->data;
PyObject *getEuclideanDistMat(python::object descripMat) {
// Bit of a pain involved here, we accept three types of PyObjects here
// 1. A Numeric Array
// - first find what 'type' of entry we have (float, double and int is all
// we recognize for now)
// - then point to contiguous piece of memory from the array that contains
// the data with a type*
// - then make a new type** pointer so that double index into this
// contiguous memory will work
// and then pass it along to the distance calculator
// 2. A list of Numeric Vector (or 1D arrays)
// - in this case wrap descripMat with a PySequenceHolder<type*> where
// type is the
// type of entry in vector (accepted types are int, double and float
// - Then pass the PySequenceHolder to the metrci calculator
// 3. A list (or tuple) of lists (or tuple)
// - In this case other than wrapping descripMat with a PySequenceHolder
// each of the indivual list in there are also wrapped by a
// PySequenceHolder
// - so the distance calculator is passed in a
// "PySequenceHolder<PySequenceHolder<double>>"
// - FIX: not that we always convert entry values to double here, even if
// we passed
// in a list of list of ints (or floats). Given that lists can be
// heterogeneous, I do not
// know how to ask a list what type of entries if contains.
//
// OK my brain is going to explode now
PyArrayObject *copy;
copy = (PyArrayObject *)PyArray_ContiguousFromObject(descMatObj,
((PyArrayObject *)descMatObj)->descr->type_num,
2,2);
// if we have double array
if (((PyArrayObject *)descMatObj)->descr->type_num == NPY_DOUBLE) {
double *desc = (double *)copy->data;
// REVIEW: create an adaptor object to hold a double * and support
// operator[]() so that we don't have to do this stuff:
// first deal with situation where we have an Numeric Array
PyObject *descMatObj = descripMat.ptr();
PyArrayObject *distRes;
if (PyArray_Check(descMatObj)) {
// get the dimensions of the array
int nrows = ((PyArrayObject *)descMatObj)->dimensions[0];
int ncols = ((PyArrayObject *)descMatObj)->dimensions[1];
int i;
CHECK_INVARIANT((nrows > 0) && (ncols > 0), "");
// here is the 2D array trick this so that when the distance calaculator
// asks for desc2D[i] we basically get the ith row as double*
double **desc2D = new double*[nrows];
for (i = 0; i < nrows; i++) {
desc2D[i] = desc;
desc += ncols;
}
MetricMatrixCalc<double**, double*> mmCalc;
mmCalc.setMetricFunc(&EuclideanDistanceMetric<double *, double *>);
mmCalc.calcMetricMatrix(desc2D, nrows, ncols, dMat);
delete [] desc2D;
// we got the distance matrix we are happy so return
return PyArray_Return(distRes);
}
// if we have a float array
else if (((PyArrayObject *)descMatObj)->descr->type_num == NPY_FLOAT) {
float* desc = (float *)copy->data;
float **desc2D = new float*[nrows];
for (i = 0; i < nrows; i++) {
desc2D[i] = desc;
desc += ncols;
}
MetricMatrixCalc<float**, float*> mmCalc;
mmCalc.setMetricFunc(&EuclideanDistanceMetric<float *, float*>);
mmCalc.calcMetricMatrix(desc2D, nrows, ncols, dMat);
delete [] desc2D;
return PyArray_Return(distRes);
npy_intp dMatLen = nrows * (nrows - 1) / 2;
// now that we have the dimensions declare the distance matrix which is
// always a
// 1D double array
distRes = (PyArrayObject *)PyArray_SimpleNew(1, &dMatLen, NPY_DOUBLE);
// grab a pointer to the data in the array so that we can directly put
// values in there
// and avoid copying :
double *dMat = (double *)distRes->data;
PyArrayObject *copy;
copy = (PyArrayObject *)PyArray_ContiguousFromObject(
descMatObj, ((PyArrayObject *)descMatObj)->descr->type_num, 2, 2);
// if we have double array
if (((PyArrayObject *)descMatObj)->descr->type_num == NPY_DOUBLE) {
double *desc = (double *)copy->data;
// REVIEW: create an adaptor object to hold a double * and support
// operator[]() so that we don't have to do this stuff:
// here is the 2D array trick this so that when the distance calaculator
// asks for desc2D[i] we basically get the ith row as double*
double **desc2D = new double *[nrows];
for (i = 0; i < nrows; i++) {
desc2D[i] = desc;
desc += ncols;
}
MetricMatrixCalc<double **, double *> mmCalc;
mmCalc.setMetricFunc(&EuclideanDistanceMetric<double *, double *>);
mmCalc.calcMetricMatrix(desc2D, nrows, ncols, dMat);
// if we have an interger array
else if (((PyArrayObject *)descMatObj)->descr->type_num == NPY_INT) {
int *desc = (int *)copy->data;
int **desc2D = new int*[nrows];
for (i = 0; i < nrows; i++) {
desc2D[i] = desc;
desc += ncols;
}
MetricMatrixCalc<int**, int*> mmCalc;
mmCalc.setMetricFunc(&EuclideanDistanceMetric<int *, int*>);
mmCalc.calcMetricMatrix(desc2D, nrows, ncols, dMat);
delete [] desc2D;
return PyArray_Return(distRes);
}
else {
// unreconiged type for the matrix, throw up
throw_value_error("The array has to be of type int, float, or double for GetEuclideanDistMat");
}
} // done with an array input
else {
// REVIEW: removed a ton of code here
// we have probably have a list or a tuple
unsigned int ncols = 0;
unsigned int nrows = python::extract<unsigned int>(descripMat.attr("__len__")());
CHECK_INVARIANT(nrows > 0, "Empty list passed in");
npy_intp dMatLen = nrows*(nrows-1)/2;
distRes = (PyArrayObject *)PyArray_SimpleNew(1, &dMatLen, NPY_DOUBLE);
double *dMat = (double *)distRes->data;
// assume that we a have a list of list of values (that can be extracted to double)
std::vector<PySequenceHolder<double> > dData;
dData.reserve(nrows);
for (unsigned int i = 0; i < nrows; i++) {
//PySequenceHolder<double> row(seq[i]);
PySequenceHolder<double> row(descripMat[i]);
if(i==0){
ncols = row.size();
} else if( row.size() != ncols ){
throw_value_error("All subsequences must be the same length");
}
dData.push_back(row);
}
MetricMatrixCalc< std::vector<PySequenceHolder<double> >, PySequenceHolder<double> > mmCalc;
mmCalc.setMetricFunc(&EuclideanDistanceMetric< PySequenceHolder<double>, PySequenceHolder<double> >);
mmCalc.calcMetricMatrix(dData, nrows, ncols, dMat);
delete[] desc2D;
// we got the distance matrix we are happy so return
return PyArray_Return(distRes);
}
return PyArray_Return(distRes);
}
PyObject *getTanimotoDistMat(python::object bitVectList) {
// we will assume here that we have a either a list of ExplicitBitVectors or
// SparseBitVects
int nrows = python::extract<int>(bitVectList.attr("__len__")());
CHECK_INVARIANT(nrows > 1, "");
// First check what type of vector we have
python::object v1 = bitVectList[0];
python::extract<ExplicitBitVect> ebvWorks(v1);
python::extract<SparseBitVect> sbvWorks(v1);
if(!ebvWorks.check() && !sbvWorks.check()){
throw_value_error("GetTanimotoDistMat can only take a sequence of ExplicitBitVects or SparseBitvects");
}
npy_intp dMatLen = nrows*(nrows-1)/2;
PyArrayObject *simRes = (PyArrayObject *)PyArray_SimpleNew(1, &dMatLen, NPY_DOUBLE);
double *sMat = (double *)simRes->data;
if (ebvWorks.check()) {
PySequenceHolder<ExplicitBitVect> dData(bitVectList);
MetricMatrixCalc<PySequenceHolder<ExplicitBitVect>, ExplicitBitVect> mmCalc;
mmCalc.setMetricFunc(&TanimotoDistanceMetric<ExplicitBitVect, ExplicitBitVect>);
mmCalc.calcMetricMatrix(dData, nrows, 0, sMat);
}
else if (sbvWorks.check()) {
PySequenceHolder<SparseBitVect> dData(bitVectList);
MetricMatrixCalc<PySequenceHolder<SparseBitVect>, SparseBitVect> mmCalc;
mmCalc.setMetricFunc(&TanimotoDistanceMetric<SparseBitVect, SparseBitVect>);
mmCalc.calcMetricMatrix(dData, nrows, 0, sMat);
}
return PyArray_Return(simRes);
}
PyObject *getTanimotoSimMat(python::object bitVectList) {
// we will assume here that we have a either a list of ExplicitBitVectors or
// SparseBitVects
int nrows = python::extract<int>(bitVectList.attr("__len__")());
CHECK_INVARIANT(nrows > 1, "");
// First check what type of vector we have
python::object v1 = bitVectList[0];
python::extract<ExplicitBitVect> ebvWorks(v1);
python::extract<SparseBitVect> sbvWorks(v1);
if(!ebvWorks.check() && !sbvWorks.check()){
throw_value_error("GetTanimotoDistMat can only take a sequence of ExplicitBitVects or SparseBitvects");
}
npy_intp dMatLen = nrows*(nrows-1)/2;
PyArrayObject *simRes = (PyArrayObject *)PyArray_SimpleNew(1, &dMatLen, NPY_DOUBLE);
double *sMat = (double *)simRes->data;
if (ebvWorks.check()) {
PySequenceHolder<ExplicitBitVect> dData(bitVectList);
MetricMatrixCalc<PySequenceHolder<ExplicitBitVect>, ExplicitBitVect> mmCalc;
mmCalc.setMetricFunc(&TanimotoSimilarityMetric<ExplicitBitVect, ExplicitBitVect>);
mmCalc.calcMetricMatrix(dData, nrows, 0, sMat);
}
else if (sbvWorks.check()) {
PySequenceHolder<SparseBitVect> dData(bitVectList);
MetricMatrixCalc<PySequenceHolder<SparseBitVect>, SparseBitVect> mmCalc;
mmCalc.setMetricFunc(&TanimotoSimilarityMetric<SparseBitVect, SparseBitVect>);
mmCalc.calcMetricMatrix(dData, nrows, 0, sMat);
}
return PyArray_Return(simRes);
// if we have a float array
else if (((PyArrayObject *)descMatObj)->descr->type_num == NPY_FLOAT) {
float *desc = (float *)copy->data;
float **desc2D = new float *[nrows];
for (i = 0; i < nrows; i++) {
desc2D[i] = desc;
desc += ncols;
}
MetricMatrixCalc<float **, float *> mmCalc;
mmCalc.setMetricFunc(&EuclideanDistanceMetric<float *, float *>);
mmCalc.calcMetricMatrix(desc2D, nrows, ncols, dMat);
delete[] desc2D;
return PyArray_Return(distRes);
}
// if we have an interger array
else if (((PyArrayObject *)descMatObj)->descr->type_num == NPY_INT) {
int *desc = (int *)copy->data;
int **desc2D = new int *[nrows];
for (i = 0; i < nrows; i++) {
desc2D[i] = desc;
desc += ncols;
}
MetricMatrixCalc<int **, int *> mmCalc;
mmCalc.setMetricFunc(&EuclideanDistanceMetric<int *, int *>);
mmCalc.calcMetricMatrix(desc2D, nrows, ncols, dMat);
delete[] desc2D;
return PyArray_Return(distRes);
} else {
// unreconiged type for the matrix, throw up
throw_value_error(
"The array has to be of type int, float, or double for "
"GetEuclideanDistMat");
}
} // done with an array input
else {
// REVIEW: removed a ton of code here
// we have probably have a list or a tuple
unsigned int ncols = 0;
unsigned int nrows =
python::extract<unsigned int>(descripMat.attr("__len__")());
CHECK_INVARIANT(nrows > 0, "Empty list passed in");
npy_intp dMatLen = nrows * (nrows - 1) / 2;
distRes = (PyArrayObject *)PyArray_SimpleNew(1, &dMatLen, NPY_DOUBLE);
double *dMat = (double *)distRes->data;
// assume that we a have a list of list of values (that can be extracted to
// double)
std::vector<PySequenceHolder<double> > dData;
dData.reserve(nrows);
for (unsigned int i = 0; i < nrows; i++) {
// PySequenceHolder<double> row(seq[i]);
PySequenceHolder<double> row(descripMat[i]);
if (i == 0) {
ncols = row.size();
} else if (row.size() != ncols) {
throw_value_error("All subsequences must be the same length");
}
dData.push_back(row);
}
MetricMatrixCalc<std::vector<PySequenceHolder<double> >,
PySequenceHolder<double> > mmCalc;
mmCalc.setMetricFunc(&EuclideanDistanceMetric<PySequenceHolder<double>,
PySequenceHolder<double> >);
mmCalc.calcMetricMatrix(dData, nrows, ncols, dMat);
}
return PyArray_Return(distRes);
}
BOOST_PYTHON_MODULE(rdMetricMatrixCalc)
{
PyObject *getTanimotoDistMat(python::object bitVectList) {
// we will assume here that we have a either a list of ExplicitBitVectors or
// SparseBitVects
int nrows = python::extract<int>(bitVectList.attr("__len__")());
CHECK_INVARIANT(nrows > 1, "");
// First check what type of vector we have
python::object v1 = bitVectList[0];
python::extract<ExplicitBitVect> ebvWorks(v1);
python::extract<SparseBitVect> sbvWorks(v1);
if (!ebvWorks.check() && !sbvWorks.check()) {
throw_value_error(
"GetTanimotoDistMat can only take a sequence of ExplicitBitVects or "
"SparseBitvects");
}
npy_intp dMatLen = nrows * (nrows - 1) / 2;
PyArrayObject *simRes =
(PyArrayObject *)PyArray_SimpleNew(1, &dMatLen, NPY_DOUBLE);
double *sMat = (double *)simRes->data;
if (ebvWorks.check()) {
PySequenceHolder<ExplicitBitVect> dData(bitVectList);
MetricMatrixCalc<PySequenceHolder<ExplicitBitVect>, ExplicitBitVect> mmCalc;
mmCalc.setMetricFunc(
&TanimotoDistanceMetric<ExplicitBitVect, ExplicitBitVect>);
mmCalc.calcMetricMatrix(dData, nrows, 0, sMat);
} else if (sbvWorks.check()) {
PySequenceHolder<SparseBitVect> dData(bitVectList);
MetricMatrixCalc<PySequenceHolder<SparseBitVect>, SparseBitVect> mmCalc;
mmCalc.setMetricFunc(&TanimotoDistanceMetric<SparseBitVect, SparseBitVect>);
mmCalc.calcMetricMatrix(dData, nrows, 0, sMat);
}
return PyArray_Return(simRes);
}
PyObject *getTanimotoSimMat(python::object bitVectList) {
// we will assume here that we have a either a list of ExplicitBitVectors or
// SparseBitVects
int nrows = python::extract<int>(bitVectList.attr("__len__")());
CHECK_INVARIANT(nrows > 1, "");
// First check what type of vector we have
python::object v1 = bitVectList[0];
python::extract<ExplicitBitVect> ebvWorks(v1);
python::extract<SparseBitVect> sbvWorks(v1);
if (!ebvWorks.check() && !sbvWorks.check()) {
throw_value_error(
"GetTanimotoDistMat can only take a sequence of ExplicitBitVects or "
"SparseBitvects");
}
npy_intp dMatLen = nrows * (nrows - 1) / 2;
PyArrayObject *simRes =
(PyArrayObject *)PyArray_SimpleNew(1, &dMatLen, NPY_DOUBLE);
double *sMat = (double *)simRes->data;
if (ebvWorks.check()) {
PySequenceHolder<ExplicitBitVect> dData(bitVectList);
MetricMatrixCalc<PySequenceHolder<ExplicitBitVect>, ExplicitBitVect> mmCalc;
mmCalc.setMetricFunc(
&TanimotoSimilarityMetric<ExplicitBitVect, ExplicitBitVect>);
mmCalc.calcMetricMatrix(dData, nrows, 0, sMat);
} else if (sbvWorks.check()) {
PySequenceHolder<SparseBitVect> dData(bitVectList);
MetricMatrixCalc<PySequenceHolder<SparseBitVect>, SparseBitVect> mmCalc;
mmCalc.setMetricFunc(
&TanimotoSimilarityMetric<SparseBitVect, SparseBitVect>);
mmCalc.calcMetricMatrix(dData, nrows, 0, sMat);
}
return PyArray_Return(simRes);
}
}
BOOST_PYTHON_MODULE(rdMetricMatrixCalc) {
python::scope().attr("__doc__") =
"Module containing the calculator for metric matrix calculation, \n"
"e.g. similarity and distance matrices"
;
"Module containing the calculator for metric matrix calculation, \n"
"e.g. similarity and distance matrices";
rdkit_import_array();
python::register_exception_translator<IndexErrorException>(&translate_index_error);
python::register_exception_translator<ValueErrorException>(&translate_value_error);
python::register_exception_translator<IndexErrorException>(
&translate_index_error);
python::register_exception_translator<ValueErrorException>(
&translate_value_error);
std::string docString;
docString = "Compute the distance matrix from a descriptor matrix using the Euclidean distance metric\n\n\
docString =
"Compute the distance matrix from a descriptor matrix using the Euclidean distance metric\n\n\
ARGUMENTS: \n\
\n\
descripMat - A python object of any one of the following types \n\
@@ -258,10 +280,11 @@ BOOST_PYTHON_MODULE(rdMetricMatrixCalc)
double. \n\n\
RETURNS: \n\
A numeric one-dimensional array containing the lower triangle elements of the symmetric distance matrix\n\n";
python::def("GetEuclideanDistMat", RDDataManip::getEuclideanDistMat,
python::def("GetEuclideanDistMat", RDDataManip::getEuclideanDistMat,
docString.c_str());
docString = "Compute the distance matrix from a list of BitVects using the Tanimoto distance metric\n\n\
docString =
"Compute the distance matrix from a list of BitVects using the Tanimoto distance metric\n\n\
ARGUMENTS: \n\
\n\
bitVectList - a list of bit vectors. Currently this works only for a list of explicit bit vectors, \n\
@@ -271,8 +294,9 @@ BOOST_PYTHON_MODULE(rdMetricMatrixCalc)
symmetric distance matrix\n\n";
python::def("GetTanimotoDistMat", RDDataManip::getTanimotoDistMat,
docString.c_str());
docString = "Compute the similarity matrix from a list of BitVects \n\n\
docString =
"Compute the similarity matrix from a list of BitVects \n\n\
ARGUMENTS: \n\
\n\
bitVectList - a list of bit vectors. Currently this works only for a list of explicit bit vectors, \n\

View File

@@ -16,13 +16,12 @@
using namespace RDDataManip;
int main() {
int n = 10;
int m = 3;
int dlen = n*(n-1)/2;
int dlen = n * (n - 1) / 2;
int i, j;
double *desc = new double[n*m];
double **desc2D = new double*[n];
double *desc = new double[n * m];
double **desc2D = new double *[n];
for (i = 0; i < n; i++) {
desc2D[i] = desc;
@@ -31,14 +30,14 @@ int main() {
desc = desc2D[0];
for (i = 0; i < n; i++) {
for (j = 0; j < m ; j++) {
desc[i*m + j] = ((double)rand())/10;
for (j = 0; j < m; j++) {
desc[i * m + j] = ((double)rand()) / 10;
}
}
//double x = EuclideanDistanceMetric(desc2D[0], desc2D[1], m);
// double x = EuclideanDistanceMetric(desc2D[0], desc2D[1], m);
double *dmat = new double[dlen];
MetricMatrixCalc<double**, double*> mmCalc;
MetricMatrixCalc<double **, double *> mmCalc;
mmCalc.setMetricFunc(&EuclideanDistanceMetric<double *, double *>);
mmCalc.calcMetricMatrix(desc2D, n, m, dmat);
@@ -46,9 +45,9 @@ int main() {
std::cout << dmat[i] << "\n";
}
delete [] desc2D;
delete [] desc;
delete [] dmat;
delete[] desc2D;
delete[] desc;
delete[] dmat;
exit(0);
}

File diff suppressed because it is too large Load Diff

View File

@@ -16,165 +16,145 @@
The notation used to document the similarity metrics is:
- \c V1_n: number of bits in vector 1
- \c V1_o: number of on bits in vector 1
- <tt>(V1&V2)_o</tt>: number of on bits in the intersection of vectors 1 and 2
- <tt>(V1&V2)_o</tt>: number of on bits in the intersection of vectors 1 and
2
*/
#include "BitVects.h"
#include <string>
//! general purpose wrapper for calculating the similarity between two bvs
//! that may be of unequal size (will automatically fold as appropriate)
template <typename T>
double SimilarityWrapper(const T &bv1,const T &bv2,
double (*metric)(const T &,const T &),
bool returnDistance=false){
double res=0.0;
if(bv1.getNumBits()>bv2.getNumBits()){
T *bv1tmp = FoldFingerprint(bv1,bv1.getNumBits()/bv2.getNumBits());
res = metric(*bv1tmp,bv2);
double SimilarityWrapper(const T& bv1, const T& bv2,
double (*metric)(const T&, const T&),
bool returnDistance = false) {
double res = 0.0;
if (bv1.getNumBits() > bv2.getNumBits()) {
T* bv1tmp = FoldFingerprint(bv1, bv1.getNumBits() / bv2.getNumBits());
res = metric(*bv1tmp, bv2);
delete bv1tmp;
} else if(bv2.getNumBits()>bv1.getNumBits()){
T *bv2tmp = FoldFingerprint(bv2,bv2.getNumBits()/bv1.getNumBits());
res = metric(bv1,*bv2tmp);
} else if (bv2.getNumBits() > bv1.getNumBits()) {
T* bv2tmp = FoldFingerprint(bv2, bv2.getNumBits() / bv1.getNumBits());
res = metric(bv1, *bv2tmp);
delete bv2tmp;
} else {
res = metric(bv1,bv2);
res = metric(bv1, bv2);
}
if(returnDistance) res = 1.0-res;
if (returnDistance) res = 1.0 - res;
return res;
}
//! \overload
template <typename T>
double SimilarityWrapper(const T &bv1,const T &bv2,double a,double b,
double (*metric)(const T &,const T &,double,double),
bool returnDistance=false){
double res=0.0;
if(bv1.getNumBits()>bv2.getNumBits()){
T *bv1tmp = FoldFingerprint(bv1,bv1.getNumBits()/bv2.getNumBits());
res = metric(*bv1tmp,bv2,a,b);
double SimilarityWrapper(const T& bv1, const T& bv2, double a, double b,
double (*metric)(const T&, const T&, double, double),
bool returnDistance = false) {
double res = 0.0;
if (bv1.getNumBits() > bv2.getNumBits()) {
T* bv1tmp = FoldFingerprint(bv1, bv1.getNumBits() / bv2.getNumBits());
res = metric(*bv1tmp, bv2, a, b);
delete bv1tmp;
} else if(bv2.getNumBits()>bv1.getNumBits()){
T *bv2tmp = FoldFingerprint(bv2,bv2.getNumBits()/bv1.getNumBits());
res = metric(bv1,*bv2tmp,a,b);
} else if (bv2.getNumBits() > bv1.getNumBits()) {
T* bv2tmp = FoldFingerprint(bv2, bv2.getNumBits() / bv1.getNumBits());
res = metric(bv1, *bv2tmp, a, b);
delete bv2tmp;
} else {
res = metric(bv1,bv2,a,b);
res = metric(bv1, bv2, a, b);
}
if(returnDistance) res = 1.0-res;
if (returnDistance) res = 1.0 - res;
return res;
}
bool AllProbeBitsMatch(const char *probe,const char *ref);
bool AllProbeBitsMatch(const std::string &probe,const std::string &ref);
bool AllProbeBitsMatch(const ExplicitBitVect& probe,const ExplicitBitVect &ref);
template <typename T1>
bool AllProbeBitsMatch(const T1 &probe,const std::string &pkl);
bool AllProbeBitsMatch(const char* probe, const char* ref);
bool AllProbeBitsMatch(const std::string& probe, const std::string& ref);
bool AllProbeBitsMatch(const ExplicitBitVect& probe,
const ExplicitBitVect& ref);
template <typename T1>
bool AllProbeBitsMatch(const T1 &probe,const T1 &ref);
bool AllProbeBitsMatch(const T1& probe, const std::string& pkl);
template <typename T1>
bool AllProbeBitsMatch(const T1& probe, const T1& ref);
//! returns the number of on bits in common between two bit vectors
/*!
\return (bv1&bv2)_o
*/
template <typename T1, typename T2>
int
NumOnBitsInCommon(const T1& bv1,const T2& bv2);
int NumOnBitsInCommon(const T1& bv1, const T2& bv2);
int
NumOnBitsInCommon(const ExplicitBitVect & bv1,const ExplicitBitVect & bv2);
int NumOnBitsInCommon(const ExplicitBitVect& bv1, const ExplicitBitVect& bv2);
//! returns the Tanimoto similarity between two bit vects
/*!
\return <tt>(bv1&bv2)_o / [bv1_o + bv2_o - (bv1&bv2)_o]</tt>
*/
template <typename T1, typename T2>
double
TanimotoSimilarity(const T1& bv1,const T2& bv2);
double TanimotoSimilarity(const T1& bv1, const T2& bv2);
//! returns the Cosine similarity between two bit vects
/*!
\return <tt>(bv1&bv2)_o / sqrt(bv1_o + bv2_o)</tt>
*/
template <typename T1, typename T2>
double
CosineSimilarity(const T1& bv1,
const T2& bv2);
double CosineSimilarity(const T1& bv1, const T2& bv2);
//! returns the Kulczynski similarity between two bit vects
/*!
\return <tt>(bv1&bv2)_o * [bv1_o + bv2_o] / [2 * bv1_o * bv2_o]</tt>
*/
template <typename T1, typename T2>
double
KulczynskiSimilarity(const T1& bv1,
const T2& bv2);
double KulczynskiSimilarity(const T1& bv1, const T2& bv2);
//! returns the Dice similarity between two bit vects
/*!
\return <tt>2*(bv1&bv2)_o / [bv1_o + bv2_o]</tt>
*/
template <typename T1, typename T2>
double
DiceSimilarity(const T1& bv1,
const T2& bv2);
double DiceSimilarity(const T1& bv1, const T2& bv2);
//! returns the Tversky similarity between two bit vects
/*!
\return <tt>(bv1&bv2)_o / [a*bv1_o + b*bv2_o + (1 - a - b)*(bv1&bv2)_o]</tt>
Notes:
Notes:
# 0 <= a,b <= 1
# Tversky(a=1,b=1) = Tanimoto
# Tversky(a=1/2,b=1/2) = Dice
*/
template <typename T1, typename T2>
double
TverskySimilarity(const T1& bv1,
const T2& bv2,double a,double b);
double TverskySimilarity(const T1& bv1, const T2& bv2, double a, double b);
//! returns the Sokal similarity between two bit vects
/*!
\return <tt>(bv1&bv2)_o / [2*bv1_o + 2*bv2_o - 3*(bv1&bv2)_o]</tt>
*/
template <typename T1, typename T2>
double
SokalSimilarity(const T1& bv1,
const T2& bv2);
double SokalSimilarity(const T1& bv1, const T2& bv2);
//! returns the McConnaughey similarity between two bit vects
/*!
\return <tt>[(bv1&bv2)_o * (bv1_o + bv2_o) - (bv1_o * bv2_o)] / (bv1_o * bv2_o)</tt>
\return <tt>[(bv1&bv2)_o * (bv1_o + bv2_o) - (bv1_o * bv2_o)] / (bv1_o *
bv2_o)</tt>
*/
template <typename T1, typename T2>
double
McConnaugheySimilarity(const T1& bv1,
const T2& bv2);
double McConnaugheySimilarity(const T1& bv1, const T2& bv2);
//! returns the Asymmetric similarity between two bit vects
/*!
\return <tt>(bv1&bv2)_o / min(bv1_o,bv2_o)</tt>
*/
template <typename T1, typename T2>
double
AsymmetricSimilarity(const T1& bv1,
const T2& bv2);
double AsymmetricSimilarity(const T1& bv1, const T2& bv2);
//! returns the Braun-Blanquet similarity between two bit vects
/*!
\return <tt>(bv1&bv2)_o / max(bv1_o,bv2_o)</tt>
*/
template <typename T1, typename T2>
double
BraunBlanquetSimilarity(const T1& bv1,
const T2& bv2);
double BraunBlanquetSimilarity(const T1& bv1, const T2& bv2);
//! returns the Russel similarity between two bit vects
/*!
@@ -185,9 +165,7 @@ BraunBlanquetSimilarity(const T1& bv1,
*/
template <typename T1, typename T2>
double
RusselSimilarity(const T1& bv1,
const T2& bv2);
double RusselSimilarity(const T1& bv1, const T2& bv2);
//! returns the Rogot-Goldberg similarity between two bit vects
/*!
@@ -195,28 +173,23 @@ RusselSimilarity(const T1& bv1,
+ (bv1_n - bv1_o - bv2_o + (bv1&bv2)_o) / (2*bv1_n - bv1_o - bv2_o) </tt>
*/
template <typename T1, typename T2>
double
RogotGoldbergSimilarity(const T1& bv1,const T2& bv2);
double RogotGoldbergSimilarity(const T1& bv1, const T2& bv2);
//! returns the on bit similarity between two bit vects
/*!
\return <tt>(bv1&bv2)_o / (bv1|bv2)_o </tt>
*/
template <typename T1, typename T2>
double
OnBitSimilarity(const T1& bv1,const T2& bv2);
double OnBitSimilarity(const T1& bv1, const T2& bv2);
//! returns the number of common bits (on and off) between two bit vects
/*!
\return <tt>bv1_n - (bv1^bv2)_o</tt>
*/
template <typename T1, typename T2>
int
NumBitsInCommon(const T1& bv1,const T2& bv2);
int NumBitsInCommon(const T1& bv1, const T2& bv2);
int
NumBitsInCommon(const ExplicitBitVect & bv1,const ExplicitBitVect & bv2);
int NumBitsInCommon(const ExplicitBitVect& bv1, const ExplicitBitVect& bv2);
//! returns the common-bit similarity (on and off) between two bit vects
//! This is also called Manhattan similarity.
@@ -224,89 +197,82 @@ NumBitsInCommon(const ExplicitBitVect & bv1,const ExplicitBitVect & bv2);
\return <tt>[bv1_n - (bv1^bv2)_o] / bv1_n</tt>
*/
template <typename T1, typename T2>
double
AllBitSimilarity(const T1& bv1,const T2& bv2);
double AllBitSimilarity(const T1& bv1, const T2& bv2);
//! returns an IntVect with indices of all on bits in common between two bit vects
//! returns an IntVect with indices of all on bits in common between two bit
//vects
template <typename T1, typename T2>
IntVect
OnBitsInCommon(const T1& bv1,const T2& bv2);
IntVect OnBitsInCommon(const T1& bv1, const T2& bv2);
//! returns an IntVect with indices of all off bits in common between two bit vects
//! returns an IntVect with indices of all off bits in common between two bit
//vects
template <typename T1, typename T2>
IntVect
OffBitsInCommon(const T1& bv1,const T2& bv2);
IntVect OffBitsInCommon(const T1& bv1, const T2& bv2);
//! returns the on-bit projected similarities between two bit vects
/*!
\return two values, as a DoubleVect:
- <tt>(bv1&bv2)_o / bv1_o</tt>
- <tt>(bv1&bv2)_o / bv2_o</tt>
- <tt>(bv1&bv2)_o / bv1_o</tt>
- <tt>(bv1&bv2)_o / bv2_o</tt>
*/
template <typename T1, typename T2>
DoubleVect
OnBitProjSimilarity(const T1& bv1,const T2& bv2);
DoubleVect OnBitProjSimilarity(const T1& bv1, const T2& bv2);
//! returns the on-bit projected similarities between two bit vects
/*!
\return two values, as a DoubleVect:
- <tt>[bv1_n - (bv1|bv2)_o] / [bv1_n - bv1_o]</tt>
- <tt>[bv2_n - (bv1|bv2)_o] / [bv2_n - bv2_o]</tt>
- <tt>[bv1_n - (bv1|bv2)_o] / [bv1_n - bv1_o]</tt>
- <tt>[bv2_n - (bv1|bv2)_o] / [bv2_n - bv2_o]</tt>
<b>Note:</b> <tt>bv1_n = bv2_n</tt>
*/
template <typename T1, typename T2>
DoubleVect
OffBitProjSimilarity(const T1& bv1,const T2& bv2);
DoubleVect OffBitProjSimilarity(const T1& bv1, const T2& bv2);
//! folds a bit vector \c factor times and returns the result
/*!
\param bv1 the vector to be folded
\param factor (optional) the number of times to fold it
\return a pointer to the folded fingerprint, which is
<tt>bv1_n/factor</tt> long.
<b>Note:</b> The caller is responsible for <tt>delete</tt>ing the result.
*/
template <typename T1>
T1 *
FoldFingerprint(const T1& bv1,unsigned int factor=2);
T1* FoldFingerprint(const T1& bv1, unsigned int factor = 2);
//! returns a text representation of a bit vector (a string of 0s and 1s)
/*!
\param bv1 the vector to use
\return an std::string
*/
template <typename T1>
std::string
BitVectToText(const T1& bv1);
std::string BitVectToText(const T1& bv1);
//! returns a hex representation of a bit vector compatible with Andrew Dalke's FPS format
//! returns a hex representation of a bit vector compatible with Andrew Dalke's
//FPS format
/*!
\param bv1 the vector to use
\return an std::string
*/
template <typename T1>
std::string
BitVectToFPSText(const T1& bv1);
std::string BitVectToFPSText(const T1& bv1);
//! returns a binary string representation of a bit vector (an array of bytes)
/*!
\param bv1 the vector to use
\return an std::string
*/
template <typename T1>
std::string
BitVectToBinaryText(const T1& bv1);
std::string BitVectToBinaryText(const T1& bv1);
//! updates a bit vector from Andrew Dalke's FPS format
/*!
@@ -316,10 +282,10 @@ BitVectToBinaryText(const T1& bv1);
*/
template <typename T1>
void
UpdateBitVectFromFPSText(T1& bv1,const std::string &fps);
void UpdateBitVectFromFPSText(T1& bv1, const std::string& fps);
//! updates a bit vector from a binary string representation of a bit vector (an array of bytes)
//! updates a bit vector from a binary string representation of a bit vector (an
//array of bytes)
/*!
\param bv1 the vector to use
\param fps the binary string
@@ -327,9 +293,6 @@ UpdateBitVectFromFPSText(T1& bv1,const std::string &fps);
*/
template <typename T1>
void
UpdateBitVectFromBinaryText(T1& bv1,const std::string &fps);
void UpdateBitVectFromBinaryText(T1& bv1, const std::string& fps);
#endif

View File

@@ -18,69 +18,67 @@
#endif
#include <boost/cstdint.hpp>
BitVect::~BitVect() {}; // must always implement virtual destructors
BitVect::~BitVect(){}; // must always implement virtual destructors
void BitVect::initFromText(const char *data,const unsigned int dataLen,
bool isBase64,bool allowOldFormat){
std::stringstream ss(std::ios_base::binary|std::ios_base::in|std::ios_base::out);
if(isBase64){
void BitVect::initFromText(const char *data, const unsigned int dataLen,
bool isBase64, bool allowOldFormat) {
std::stringstream ss(std::ios_base::binary | std::ios_base::in |
std::ios_base::out);
if (isBase64) {
unsigned int actualLen;
char *decoded;
decoded = Base64Decode((const char *)data,&actualLen);
ss.write(decoded,actualLen);
delete [] decoded;
decoded = Base64Decode((const char *)data, &actualLen);
ss.write(decoded, actualLen);
delete[] decoded;
} else {
ss.write(data,dataLen);
ss.write(data, dataLen);
}
boost::int32_t format=0;
boost::uint32_t nOn=0;
boost::int32_t format = 0;
boost::uint32_t nOn = 0;
boost::int32_t size;
boost::int32_t version=0;
boost::int32_t version = 0;
// earlier versions of the code did not have the version number encoded, so
// we'll use that to distinguish version 0
RDKit::streamRead(ss,size);
if(size<0){
version = -1*size;
RDKit::streamRead(ss, size);
if (size < 0) {
version = -1 * size;
if (version == 16) {
format=1;
}
else if (version == 32) {
format=2;
}
else {
format = 1;
} else if (version == 32) {
format = 2;
} else {
throw ValueErrorException("bad version in BitVect pickle");
}
RDKit::streamRead(ss,size);
} else if( !allowOldFormat ) {
RDKit::streamRead(ss, size);
} else if (!allowOldFormat) {
throw ValueErrorException("invalid BitVect pickle");
}
RDKit::streamRead(ss,nOn);
RDKit::streamRead(ss, nOn);
_initForSize(static_cast<int>(size));
// if the either have older version or or version 16 with ints for on bits
if( (format==0) ||
( (format == 1) && (size >= std::numeric_limits<unsigned short>::max()) ) ) {
if ((format == 0) ||
((format == 1) && (size >= std::numeric_limits<unsigned short>::max()))) {
boost::uint32_t tmp;
for(unsigned int i=0; i<nOn; i++){
RDKit::streamRead(ss,tmp);
for (unsigned int i = 0; i < nOn; i++) {
RDKit::streamRead(ss, tmp);
setBit(tmp);
}
} else if (format == 1) { // version 16 and on bits stored as short ints
} else if (format == 1) { // version 16 and on bits stored as short ints
boost::uint16_t tmp;
for(unsigned int i=0; i<nOn; i++){
RDKit::streamRead(ss,tmp);
for (unsigned int i = 0; i < nOn; i++) {
RDKit::streamRead(ss, tmp);
setBit(tmp);
}
} else if (format == 2) { // run length encoded format
boost::uint32_t curr=0;
for (unsigned int i=0; i<nOn; i++) {
} else if (format == 2) { // run length encoded format
boost::uint32_t curr = 0;
for (unsigned int i = 0; i < nOn; i++) {
curr += RDKit::readPackedIntFromStream(ss);
setBit(curr);
curr++;
}
}
}

View File

@@ -17,11 +17,11 @@ typedef std::vector<int> IntVect;
typedef IntVect::iterator IntVectIter;
typedef std::vector<double> DoubleVect;
typedef DoubleVect::iterator DoubleVectIter;
const int ci_BITVECT_VERSION=0x0020; //!< version number to use in pickles
const int ci_BITVECT_VERSION = 0x0020; //!< version number to use in pickles
//! Abstract base class for storing BitVectors
class BitVect{
public:
class BitVect {
public:
virtual ~BitVect() = 0;
//! sets a particular bit and returns its original value
virtual bool setBit(const unsigned int which) = 0;
@@ -34,16 +34,16 @@ public:
//! returns the number of on bits
virtual unsigned int getNumOnBits() const = 0;
//! returns the number of off bits
virtual unsigned int getNumOffBits() const =0;
virtual unsigned int getNumOffBits() const = 0;
//! replaces the contents of \c v with indices of our on bits
virtual void getOnBits (IntVect& v) const = 0;
virtual void getOnBits(IntVect& v) const = 0;
//! clears (sets to off) all of our bits
virtual void clearBits() = 0;
//! initializes this BitVect from a pickle
/*!
\param data the raw pickle data
\param dataLen the length of \c data
\param dataLen the length of \c data
\param isBase64 (optional) if this is set, \c data is assumed to
be base64 encoded.
\param allowOldFormat (optional) allows a very old form of the BitVect
@@ -51,18 +51,17 @@ public:
amount of error checking and it is strongly suggested that it not
be used in client code.
*/
void initFromText(const char *data,const unsigned int dataLen,
bool isBase64=false,bool allowOldFormat=false);
void initFromText(const char* data, const unsigned int dataLen,
bool isBase64 = false, bool allowOldFormat = false);
//! returns a serialized (pickled) version of this BitVect
virtual std::string toString() const = 0;
virtual bool operator[] (const unsigned int which) const = 0;
virtual bool operator[](const unsigned int which) const = 0;
unsigned int size() const { return getNumBits(); }
private:
private:
virtual void _initForSize(const unsigned int size) = 0;
};
#endif

View File

@@ -16,13 +16,12 @@
//! \brief Construct a BitVect from the ASCII representation of a
//! Daylight fingerprint string
template <typename T>
void FromDaylightString(T &sbv,const std::string &s);
void FromDaylightString(T &sbv, const std::string &s);
//! \brief Construct a BitVect from the ASCII representation of a
//! bit string (i.e. a bunch of zeros and ones)
template <typename T>
void FromBitString(T &sbv,const std::string &s);
void FromBitString(T &sbv, const std::string &s);
//! Convert a SparseBitVector to an ExplicitBitVector
/*!

View File

@@ -14,13 +14,14 @@
class DatastructsException : public std::exception {
public:
//! construct with an error message
DatastructsException(const char *msg) : _msg(msg) {};
DatastructsException(const char *msg) : _msg(msg){};
//! construct with an error message
DatastructsException(const std::string &msg) : _msg(msg) {};
DatastructsException(const std::string &msg) : _msg(msg){};
//! get the error message
const char *message () const { return _msg.c_str(); };
~DatastructsException () throw () {};
private:
const char *message() const { return _msg.c_str(); };
~DatastructsException() throw(){};
private:
std::string _msg;
};

View File

@@ -14,62 +14,63 @@
#include "DatastructsException.h"
namespace RDKit {
void _fillDistMat(unsigned int dmat[], unsigned int nBits) {
unsigned int i,j, a, b, ta, tb, dist;
int temp;
unsigned int mask = ((1<<nBits) -1);
for (i = 0; i < 256; ++i) {
for (j = 0; j < 256; ++j) {
dist = 0;
a = i;
b = j;
while (a || b) {
ta = a&mask;
tb = b&mask;
temp = ta-tb;
if (temp > 0) {
dist += temp;
} else {
dist -= temp;
}
a >>= nBits;
b >>= nBits;
void _fillDistMat(unsigned int dmat[], unsigned int nBits) {
unsigned int i, j, a, b, ta, tb, dist;
int temp;
unsigned int mask = ((1 << nBits) - 1);
for (i = 0; i < 256; ++i) {
for (j = 0; j < 256; ++j) {
dist = 0;
a = i;
b = j;
while (a || b) {
ta = a & mask;
tb = b & mask;
temp = ta - tb;
if (temp > 0) {
dist += temp;
} else {
dist -= temp;
}
dmat[i*256 + j] = dist;
a >>= nBits;
b >>= nBits;
}
dmat[i * 256 + j] = dist;
}
}
}
DiscreteDistMat::DiscreteDistMat() {
// fill in the distance matrix table
DiscreteDistMat::DiscreteDistMat() {
// fill in the distance matrix table
// one bit per value table
_fillDistMat(d_oneBitTab, 1);
// one bit per value table
_fillDistMat(d_oneBitTab, 1);
// two bits per value table
_fillDistMat(d_twoBitTab, 2);
// two bits per value table
_fillDistMat(d_twoBitTab, 2);
// four bits per value table
_fillDistMat(d_fourBitTab, 4);
}
// four bits per value table
_fillDistMat(d_fourBitTab, 4);
}
unsigned int DiscreteDistMat::getDist(unsigned char v1,
unsigned char v2,
DiscreteValueVect::DiscreteValueType type) {
unsigned int res=0;
int temp;
unsigned int id = static_cast<unsigned int>(v1)*256 + static_cast<unsigned int>(v2);
switch(type) {
case DiscreteValueVect::ONEBITVALUE :
unsigned int DiscreteDistMat::getDist(
unsigned char v1, unsigned char v2,
DiscreteValueVect::DiscreteValueType type) {
unsigned int res = 0;
int temp;
unsigned int id =
static_cast<unsigned int>(v1) * 256 + static_cast<unsigned int>(v2);
switch (type) {
case DiscreteValueVect::ONEBITVALUE:
res = d_oneBitTab[id];
break;
case DiscreteValueVect::TWOBITVALUE :
case DiscreteValueVect::TWOBITVALUE:
res = d_twoBitTab[id];
break;
case DiscreteValueVect::FOURBITVALUE :
case DiscreteValueVect::FOURBITVALUE:
res = d_fourBitTab[id];
break;
case DiscreteValueVect::EIGHTBITVALUE :
case DiscreteValueVect::EIGHTBITVALUE:
temp = static_cast<unsigned int>(v1) - static_cast<unsigned int>(v2);
if (temp < 0) {
res -= temp;
@@ -80,13 +81,10 @@ namespace RDKit {
default:
// ummm.. we shouldn't have come here
throw DatastructsException("We shouldn't be here");
}
return res;
}
return res;
}
static DiscreteDistMat discreteDMat;
DiscreteDistMat *getDiscreteDistMat() {
return &discreteDMat;
}
}
static DiscreteDistMat discreteDMat;
DiscreteDistMat *getDiscreteDistMat() { return &discreteDMat; }
}

View File

@@ -11,21 +11,19 @@
#define __RD_DISCRETEDISTMAT_H__
#include "DiscreteValueVect.h"
namespace RDKit{
class DiscreteDistMat {
public:
DiscreteDistMat();
~DiscreteDistMat(){};
unsigned int getDist(unsigned char v1,
unsigned char v2,
DiscreteValueVect::DiscreteValueType type);
namespace RDKit {
class DiscreteDistMat {
public:
DiscreteDistMat();
~DiscreteDistMat(){};
unsigned int getDist(unsigned char v1, unsigned char v2,
DiscreteValueVect::DiscreteValueType type);
private:
unsigned int d_oneBitTab[256*256];
unsigned int d_twoBitTab[256*256];
unsigned int d_fourBitTab[256*256];
};
extern DiscreteDistMat *getDiscreteDistMat();
private:
unsigned int d_oneBitTab[256 * 256];
unsigned int d_twoBitTab[256 * 256];
unsigned int d_fourBitTab[256 * 256];
};
extern DiscreteDistMat *getDiscreteDistMat();
}
#endif

View File

@@ -17,251 +17,255 @@
#include <boost/cstdint.hpp>
namespace RDKit {
const int ci_DISCRETEVALUEVECTPICKLE_VERSION=0x1;
const int ci_DISCRETEVALUEVECTPICKLE_VERSION = 0x1;
DiscreteValueVect::DiscreteValueVect(const DiscreteValueVect &other) {
d_type = other.getValueType();
d_bitsPerVal = other.getNumBitsPerVal();
d_numInts = other.getNumInts();
d_length = other.getLength();
d_valsPerInt = other.d_valsPerInt;
d_mask = other.d_mask;
const boost::uint32_t *odata = other.getData();
boost::uint32_t *data = new boost::uint32_t[d_numInts];
memcpy(static_cast<void *>(data), static_cast<const void *>(odata),
d_numInts*sizeof(boost::uint32_t));
d_data.reset(data);
DiscreteValueVect::DiscreteValueVect(const DiscreteValueVect &other) {
d_type = other.getValueType();
d_bitsPerVal = other.getNumBitsPerVal();
d_numInts = other.getNumInts();
d_length = other.getLength();
d_valsPerInt = other.d_valsPerInt;
d_mask = other.d_mask;
const boost::uint32_t *odata = other.getData();
boost::uint32_t *data = new boost::uint32_t[d_numInts];
memcpy(static_cast<void *>(data), static_cast<const void *>(odata),
d_numInts * sizeof(boost::uint32_t));
d_data.reset(data);
}
unsigned int DiscreteValueVect::getVal(unsigned int i) const {
if (i >= d_length) {
throw IndexErrorException(i);
}
unsigned int shift = d_bitsPerVal * (i % d_valsPerInt);
unsigned int intId = i / d_valsPerInt;
return ((d_data[intId] >> shift) & d_mask);
}
void DiscreteValueVect::setVal(unsigned int i, unsigned int val) {
if (i >= d_length) {
throw IndexErrorException(i);
}
if ((val & d_mask) != val) {
throw ValueErrorException("Value out of range");
}
unsigned int shift = d_bitsPerVal * (i % d_valsPerInt);
unsigned int intId = i / d_valsPerInt;
unsigned int mask = ((1 << d_bitsPerVal) - 1) << shift;
mask = ~mask;
d_data[intId] = (d_data[intId] & mask) | (val << shift);
}
unsigned int DiscreteValueVect::getTotalVal() const {
unsigned int i, j, res = 0;
for (i = 0; i < d_numInts; ++i) {
for (j = 0; j < d_valsPerInt; ++j) {
res += ((d_data[i] >> (j * d_bitsPerVal)) & d_mask);
}
}
return res;
}
unsigned int DiscreteValueVect::getLength() const { return d_length; }
const boost::uint32_t *DiscreteValueVect::getData() const {
return d_data.get();
}
unsigned int computeL1Norm(const DiscreteValueVect &v1,
const DiscreteValueVect &v2) {
if (v1.getLength() != v2.getLength()) {
throw ValueErrorException("Comparing vectors of different lengths");
}
unsigned int DiscreteValueVect::getVal(unsigned int i) const {
if(i >= d_length){
throw IndexErrorException(i);
}
unsigned int shift = d_bitsPerVal*(i%d_valsPerInt);
unsigned int intId = i/d_valsPerInt;
return ( (d_data[intId] >> shift) & d_mask);
DiscreteValueVect::DiscreteValueType valType = v1.getValueType();
if (valType != v2.getValueType()) {
throw ValueErrorException("Comparing vector of different value types");
}
void DiscreteValueVect::setVal(unsigned int i, unsigned int val) {
if(i >= d_length){
throw IndexErrorException(i);
}
if ((val & d_mask) != val) {
throw ValueErrorException("Value out of range");
}
unsigned int shift = d_bitsPerVal*(i%d_valsPerInt);
unsigned int intId = i/d_valsPerInt;
unsigned int mask = ((1<<d_bitsPerVal) -1) << shift;
mask = ~mask;
d_data[intId] = (d_data[intId]&mask)|(val << shift);
}
const boost::uint32_t *data1 = v1.getData();
const boost::uint32_t *data2 = v2.getData();
unsigned int DiscreteValueVect::getTotalVal() const {
unsigned int i, j, res = 0;
for (i = 0; i < d_numInts; ++i) {
for (j = 0; j < d_valsPerInt; ++j) {
res += ((d_data[i] >> (j*d_bitsPerVal)) & d_mask);
}
}
return res;
}
unsigned int res = 0;
if (valType <= DiscreteValueVect::EIGHTBITVALUE) {
DiscreteDistMat *dmat = getDiscreteDistMat();
unsigned int DiscreteValueVect::getLength() const {
return d_length;
}
const boost::uint32_t *DiscreteValueVect::getData() const {
return d_data.get();
}
unsigned int computeL1Norm(const DiscreteValueVect &v1, const DiscreteValueVect &v2) {
if (v1.getLength() != v2.getLength()) {
throw ValueErrorException("Comparing vectors of different lengths");
}
DiscreteValueVect::DiscreteValueType valType = v1.getValueType();
if (valType != v2.getValueType()) {
throw ValueErrorException("Comparing vector of different value types");
}
const boost::uint32_t* data1 = v1.getData();
const boost::uint32_t* data2 = v2.getData();
unsigned int res = 0;
if (valType <= DiscreteValueVect::EIGHTBITVALUE) {
DiscreteDistMat *dmat = getDiscreteDistMat();
unsigned char *cd1 = (unsigned char *)(data1);
unsigned char *cd2 = (unsigned char *)(data2);
const unsigned char *cend = cd1 + (v1.getNumInts()*4);
while (cd1 != cend) {
if (*cd1 == *cd2) {
cd1++;
cd2++;
continue;
}
res += dmat->getDist(*cd1, *cd2, valType);
unsigned char *cd1 = (unsigned char *)(data1);
unsigned char *cd2 = (unsigned char *)(data2);
const unsigned char *cend = cd1 + (v1.getNumInts() * 4);
while (cd1 != cend) {
if (*cd1 == *cd2) {
cd1++;
cd2++;
continue;
}
} else {
// we have a sixteen bits per value type
// REVIEW: we are making an assumption here that a short
// is 16 bit - may fail on a different compiler
const unsigned short int *sd1 = (unsigned short int *)(data1);
const unsigned short int *sd2 = (unsigned short int *)(data2);
const unsigned short int *send = sd1 + (v1.getNumInts()*2);
while (sd1 != send) {
if (*sd1 == *sd2) {
sd1++;
sd2++;
continue;
}
res += abs((*sd1) - (*sd2));
res += dmat->getDist(*cd1, *cd2, valType);
cd1++;
cd2++;
}
} else {
// we have a sixteen bits per value type
// REVIEW: we are making an assumption here that a short
// is 16 bit - may fail on a different compiler
const unsigned short int *sd1 = (unsigned short int *)(data1);
const unsigned short int *sd2 = (unsigned short int *)(data2);
const unsigned short int *send = sd1 + (v1.getNumInts() * 2);
while (sd1 != send) {
if (*sd1 == *sd2) {
sd1++;
sd2++;
continue;
}
res += abs((*sd1) - (*sd2));
sd1++;
sd2++;
}
return res;
}
return res;
}
std::string DiscreteValueVect::toString() const {
std::stringstream ss(std::ios_base::binary|std::ios_base::out|std::ios_base::in);
std::string DiscreteValueVect::toString() const {
std::stringstream ss(std::ios_base::binary | std::ios_base::out |
std::ios_base::in);
boost::int32_t tVers=ci_DISCRETEVALUEVECTPICKLE_VERSION*-1;
streamWrite(ss,tVers);
boost::uint32_t tInt;
tInt=d_type;
streamWrite(ss,tInt);
tInt=d_bitsPerVal;
streamWrite(ss,tInt);
tInt=d_mask;
streamWrite(ss,tInt);
tInt=d_length;
streamWrite(ss,tInt);
tInt=d_numInts;
streamWrite(ss,tInt);
boost::int32_t tVers = ci_DISCRETEVALUEVECTPICKLE_VERSION * -1;
streamWrite(ss, tVers);
boost::uint32_t tInt;
tInt = d_type;
streamWrite(ss, tInt);
tInt = d_bitsPerVal;
streamWrite(ss, tInt);
tInt = d_mask;
streamWrite(ss, tInt);
tInt = d_length;
streamWrite(ss, tInt);
tInt = d_numInts;
streamWrite(ss, tInt);
#if defined(BOOST_BIG_ENDIAN)
boost::uint32_t *td = new boost::uint32_t[d_numInts];
for(unsigned int i=0;i<d_numInts;++i) td[i]=EndianSwapBytes<HOST_ENDIAN_ORDER,LITTLE_ENDIAN_ORDER>(d_data.get()[i]);
ss.write((const char *)td,d_numInts*sizeof(tInt));
delete [] td;
#else
ss.write((const char *)d_data.get(),d_numInts*sizeof(tInt));
#endif
std::string res(ss.str());
return res;
};
boost::uint32_t *td = new boost::uint32_t[d_numInts];
for (unsigned int i = 0; i < d_numInts; ++i)
td[i] = EndianSwapBytes<HOST_ENDIAN_ORDER, LITTLE_ENDIAN_ORDER>(
d_data.get()[i]);
ss.write((const char *)td, d_numInts * sizeof(tInt));
delete[] td;
#else
ss.write((const char *)d_data.get(), d_numInts * sizeof(tInt));
#endif
std::string res(ss.str());
return res;
};
void DiscreteValueVect::initFromText(const char *pkl,const unsigned int len){
std::stringstream ss(std::ios_base::binary|std::ios_base::in|std::ios_base::out);
ss.write(pkl,len);
boost::int32_t tVers;
streamRead(ss,tVers);
tVers *= -1;
if(tVers==0x1){
void DiscreteValueVect::initFromText(const char *pkl, const unsigned int len) {
std::stringstream ss(std::ios_base::binary | std::ios_base::in |
std::ios_base::out);
ss.write(pkl, len);
boost::int32_t tVers;
streamRead(ss, tVers);
tVers *= -1;
if (tVers == 0x1) {
} else {
throw ValueErrorException("bad version in DiscreteValueVect pickle");
}
boost::uint32_t tInt;
streamRead(ss, tInt);
d_type = static_cast<DiscreteValueType>(tInt);
streamRead(ss, tInt);
d_bitsPerVal = tInt;
d_valsPerInt = BITS_PER_INT / d_bitsPerVal;
streamRead(ss, tInt);
d_mask = tInt;
streamRead(ss, tInt);
d_length = tInt;
streamRead(ss, tInt);
d_numInts = tInt;
boost::uint32_t *data = new boost::uint32_t[d_numInts];
ss.read((char *)data, d_numInts * sizeof(boost::uint32_t));
#if defined(BOOST_BIG_ENDIAN)
boost::uint32_t *td = new boost::uint32_t[d_numInts];
for (unsigned int i = 0; i < d_numInts; ++i)
td[i] = EndianSwapBytes<LITTLE_ENDIAN_ORDER, HOST_ENDIAN_ORDER>(data[i]);
d_data.reset(td);
delete[] data;
#else
d_data.reset(data);
#endif
};
DiscreteValueVect DiscreteValueVect::operator&(
const DiscreteValueVect &other) const {
PRECONDITION(other.d_length == d_length, "length mismatch");
DiscreteValueType typ = d_type;
if (other.d_type < typ) {
typ = other.d_type;
}
DiscreteValueVect ans(typ, d_length);
for (unsigned int i = 0; i < d_length; ++i) {
unsigned int v1 = getVal(i);
unsigned int v2 = other.getVal(i);
if (v1 < v2) {
ans.setVal(i, v1);
} else {
throw ValueErrorException("bad version in DiscreteValueVect pickle");
ans.setVal(i, v2);
}
boost::uint32_t tInt;
streamRead(ss,tInt);
d_type=static_cast<DiscreteValueType>(tInt);
streamRead(ss,tInt);
d_bitsPerVal=tInt;
d_valsPerInt = BITS_PER_INT/d_bitsPerVal;
streamRead(ss,tInt);
d_mask=tInt;
streamRead(ss,tInt);
d_length=tInt;
streamRead(ss,tInt);
d_numInts=tInt;
boost::uint32_t *data = new boost::uint32_t[d_numInts];
ss.read((char *)data,d_numInts*sizeof(boost::uint32_t));
#if defined(BOOST_BIG_ENDIAN)
boost::uint32_t *td = new boost::uint32_t[d_numInts];
for(unsigned int i=0;i<d_numInts;++i) td[i]=EndianSwapBytes<LITTLE_ENDIAN_ORDER,HOST_ENDIAN_ORDER>(data[i]);
d_data.reset(td);
delete [] data;
#else
d_data.reset(data);
#endif
};
DiscreteValueVect DiscreteValueVect::operator& (const DiscreteValueVect &other) const {
PRECONDITION(other.d_length==d_length,"length mismatch");
DiscreteValueType typ=d_type;
if(other.d_type<typ){
typ=other.d_type;
}
DiscreteValueVect ans(typ,d_length);
for(unsigned int i=0;i<d_length;++i){
unsigned int v1=getVal(i);
unsigned int v2=other.getVal(i);
if(v1<v2){
ans.setVal(i,v1);
}else{
ans.setVal(i,v2);
}
}
return(ans);
};
DiscreteValueVect DiscreteValueVect::operator|(const DiscreteValueVect &other) const {
PRECONDITION(other.d_length==d_length,"length mismatch");
DiscreteValueType typ=d_type;
if(other.d_type>typ){
typ=other.d_type;
}
DiscreteValueVect ans(typ,d_length);
for(unsigned int i=0;i<d_length;++i){
unsigned int v1=getVal(i);
unsigned int v2=other.getVal(i);
if(v1>v2){
ans.setVal(i,v1);
}else{
ans.setVal(i,v2);
}
}
return(ans);
};
DiscreteValueVect& DiscreteValueVect::operator+=(const DiscreteValueVect &other) {
PRECONDITION(other.d_length==d_length,"length mismatch");
unsigned int maxVal = (1<<d_bitsPerVal) - 1;
for(unsigned int i=0;i<d_length;i++){
unsigned int v=getVal(i)+other.getVal(i);
if(v>maxVal){
v=maxVal;
}
setVal(i,v);
}
return *this;
}
DiscreteValueVect& DiscreteValueVect::operator-=(const DiscreteValueVect &other) {
PRECONDITION(other.d_length==d_length,"length mismatch");
return (ans);
};
for(unsigned int i=0;i<d_length;i++){
unsigned int v1=getVal(i);
unsigned int v2=other.getVal(i);
if(v1>v2){
setVal(i,v1-v2);
}else{
setVal(i,0);
}
}
return *this;
DiscreteValueVect DiscreteValueVect::operator|(
const DiscreteValueVect &other) const {
PRECONDITION(other.d_length == d_length, "length mismatch");
DiscreteValueType typ = d_type;
if (other.d_type > typ) {
typ = other.d_type;
}
DiscreteValueVect ans(typ, d_length);
for (unsigned int i = 0; i < d_length; ++i) {
unsigned int v1 = getVal(i);
unsigned int v2 = other.getVal(i);
if (v1 > v2) {
ans.setVal(i, v1);
} else {
ans.setVal(i, v2);
}
}
return (ans);
};
DiscreteValueVect &DiscreteValueVect::operator+=(
const DiscreteValueVect &other) {
PRECONDITION(other.d_length == d_length, "length mismatch");
unsigned int maxVal = (1 << d_bitsPerVal) - 1;
for (unsigned int i = 0; i < d_length; i++) {
unsigned int v = getVal(i) + other.getVal(i);
if (v > maxVal) {
v = maxVal;
}
setVal(i, v);
}
return *this;
}
DiscreteValueVect &DiscreteValueVect::operator-=(
const DiscreteValueVect &other) {
PRECONDITION(other.d_length == d_length, "length mismatch");
for (unsigned int i = 0; i < d_length; i++) {
unsigned int v1 = getVal(i);
unsigned int v2 = other.getVal(i);
if (v1 > v2) {
setVal(i, v1 - v2);
} else {
setVal(i, 0);
}
}
return *this;
}
#if 0
DiscreteValueVect DiscreteValueVect::operator~() const {
@@ -275,19 +279,17 @@ namespace RDKit {
};
#endif
DiscreteValueVect operator+ (const DiscreteValueVect& p1,
const DiscreteValueVect& p2){
DiscreteValueVect res(p1);
res+=p2;
return res;
};
DiscreteValueVect operator- (const DiscreteValueVect& p1,
const DiscreteValueVect& p2){
DiscreteValueVect res(p1);
res-=p2;
return res;
};
} // end of namespace RDKit
DiscreteValueVect operator+(const DiscreteValueVect &p1,
const DiscreteValueVect &p2) {
DiscreteValueVect res(p1);
res += p2;
return res;
};
DiscreteValueVect operator-(const DiscreteValueVect &p1,
const DiscreteValueVect &p2) {
DiscreteValueVect res(p1);
res -= p2;
return res;
};
} // end of namespace RDKit

View File

@@ -15,137 +15,129 @@
#include <cstring>
#include <boost/cstdint.hpp>
namespace RDKit{
// we require 32bit unsigneds using the boost::uint32_t type:
const unsigned int BITS_PER_INT=32;
namespace RDKit {
// we require 32bit unsigneds using the boost::uint32_t type:
const unsigned int BITS_PER_INT = 32;
//! a class for efficiently storing vectors of discrete values
class DiscreteValueVect {
public:
typedef boost::shared_array<boost::uint32_t> DATA_SPTR;
//! used to define the possible range of the values
typedef enum {
ONEBITVALUE=0,
TWOBITVALUE,
FOURBITVALUE,
EIGHTBITVALUE,
SIXTEENBITVALUE,
} DiscreteValueType;
//! a class for efficiently storing vectors of discrete values
class DiscreteValueVect {
public:
typedef boost::shared_array<boost::uint32_t> DATA_SPTR;
//! initialize with a particular type and size
DiscreteValueVect(DiscreteValueType valType, unsigned int length) : d_type(valType), d_length(length) {
d_bitsPerVal = (1 << static_cast<unsigned int>(valType));
d_valsPerInt = BITS_PER_INT/d_bitsPerVal;
d_numInts = (length + d_valsPerInt -1)/d_valsPerInt;
d_mask = ((1<<d_bitsPerVal) -1);
boost::uint32_t *data = new boost::uint32_t[d_numInts];
memset(static_cast<void *>(data),0,d_numInts*sizeof(boost::uint32_t));
d_data.reset(data);
}
//! used to define the possible range of the values
typedef enum {
ONEBITVALUE = 0,
TWOBITVALUE,
FOURBITVALUE,
EIGHTBITVALUE,
SIXTEENBITVALUE,
} DiscreteValueType;
//! Copy constructor
DiscreteValueVect(const DiscreteValueVect& other);
//! initialize with a particular type and size
DiscreteValueVect(DiscreteValueType valType, unsigned int length)
: d_type(valType), d_length(length) {
d_bitsPerVal = (1 << static_cast<unsigned int>(valType));
d_valsPerInt = BITS_PER_INT / d_bitsPerVal;
d_numInts = (length + d_valsPerInt - 1) / d_valsPerInt;
d_mask = ((1 << d_bitsPerVal) - 1);
boost::uint32_t *data = new boost::uint32_t[d_numInts];
memset(static_cast<void *>(data), 0, d_numInts * sizeof(boost::uint32_t));
d_data.reset(data);
}
//! constructor from a pickle
DiscreteValueVect(const std::string &pkl){
initFromText(pkl.c_str(),pkl.size());
};
//! constructor from a pickle
DiscreteValueVect(const char *pkl,const unsigned int len){
initFromText(pkl,len);
};
//! Copy constructor
DiscreteValueVect(const DiscreteValueVect &other);
~DiscreteValueVect() {}
//! return the value at an index
unsigned int getVal(unsigned int i) const;
//! support indexing using []
int operator[] (unsigned int idx) const { return getVal(idx); };
//! set the value at an index
/*!
NOTE: it is an error to have val > the max value this
DiscreteValueVect can accomodate
*/
void setVal(unsigned int i, unsigned int val);
//! returns the sum of all the elements in the vect
unsigned int getTotalVal() const;
//! returns the length
unsigned int getLength() const;
//! returns the length
unsigned int size() const { return getLength(); };
//! return a pointer to our raw data storage
const boost::uint32_t *getData() const;
//! return the number of bits used to store each value
unsigned int getNumBitsPerVal() const {
return d_bitsPerVal;
}
//! return the type of value being stored
DiscreteValueType getValueType() const {
return d_type;
}
//! returns the size of our storage
unsigned int getNumInts() const {
return d_numInts;
}
//! support dvv3 = dvv1&dvv2
/*!
operator& returns the minimum value for each element.
e.g.:
[0,1,2,0] & [0,1,1,1] -> [0,1,1,0]
*/
DiscreteValueVect operator& (const DiscreteValueVect &other) const;
//! support dvv3 = dvv1|dvv2
/*!
operator& returns the maximum value for each element.
e.g.:
[0,1,2,0] | [0,1,1,1] -> [0,1,2,1]
*/
DiscreteValueVect operator| (const DiscreteValueVect &other) const;
//DiscreteValueVect operator^ (const DiscreteValueVect &other) const;
//DiscreteValueVect operator~ () const;
DiscreteValueVect& operator+=(const DiscreteValueVect &other);
DiscreteValueVect& operator-=(const DiscreteValueVect &other);
//! returns a binary string representation (pickle)
std::string toString() const;
private:
DiscreteValueType d_type;
unsigned int d_bitsPerVal;
unsigned int d_valsPerInt;
unsigned int d_numInts;
unsigned int d_length;
unsigned int d_mask;
DATA_SPTR d_data;
void initFromText(const char *pkl,const unsigned int len);
//! constructor from a pickle
DiscreteValueVect(const std::string &pkl) {
initFromText(pkl.c_str(), pkl.size());
};
//! constructor from a pickle
DiscreteValueVect(const char *pkl, const unsigned int len) {
initFromText(pkl, len);
};
unsigned int computeL1Norm(const DiscreteValueVect &v1, const DiscreteValueVect &v2);
~DiscreteValueVect() {}
DiscreteValueVect operator+ (const DiscreteValueVect& p1,
const DiscreteValueVect& p2);
DiscreteValueVect operator- (const DiscreteValueVect& p1,
const DiscreteValueVect& p2);
//! return the value at an index
unsigned int getVal(unsigned int i) const;
}
//! support indexing using []
int operator[](unsigned int idx) const { return getVal(idx); };
//! set the value at an index
/*!
NOTE: it is an error to have val > the max value this
DiscreteValueVect can accomodate
*/
void setVal(unsigned int i, unsigned int val);
//! returns the sum of all the elements in the vect
unsigned int getTotalVal() const;
//! returns the length
unsigned int getLength() const;
//! returns the length
unsigned int size() const { return getLength(); };
//! return a pointer to our raw data storage
const boost::uint32_t *getData() const;
//! return the number of bits used to store each value
unsigned int getNumBitsPerVal() const { return d_bitsPerVal; }
//! return the type of value being stored
DiscreteValueType getValueType() const { return d_type; }
//! returns the size of our storage
unsigned int getNumInts() const { return d_numInts; }
//! support dvv3 = dvv1&dvv2
/*!
operator& returns the minimum value for each element.
e.g.:
[0,1,2,0] & [0,1,1,1] -> [0,1,1,0]
*/
DiscreteValueVect operator&(const DiscreteValueVect &other) const;
//! support dvv3 = dvv1|dvv2
/*!
operator& returns the maximum value for each element.
e.g.:
[0,1,2,0] | [0,1,1,1] -> [0,1,2,1]
*/
DiscreteValueVect operator|(const DiscreteValueVect &other) const;
// DiscreteValueVect operator^ (const DiscreteValueVect &other) const;
// DiscreteValueVect operator~ () const;
DiscreteValueVect &operator+=(const DiscreteValueVect &other);
DiscreteValueVect &operator-=(const DiscreteValueVect &other);
//! returns a binary string representation (pickle)
std::string toString() const;
private:
DiscreteValueType d_type;
unsigned int d_bitsPerVal;
unsigned int d_valsPerInt;
unsigned int d_numInts;
unsigned int d_length;
unsigned int d_mask;
DATA_SPTR d_data;
void initFromText(const char *pkl, const unsigned int len);
};
unsigned int computeL1Norm(const DiscreteValueVect &v1,
const DiscreteValueVect &v2);
DiscreteValueVect operator+(const DiscreteValueVect &p1,
const DiscreteValueVect &p2);
DiscreteValueVect operator-(const DiscreteValueVect &p1,
const DiscreteValueVect &p2);
}
#endif

View File

@@ -21,207 +21,209 @@
#endif
#include <boost/cstdint.hpp>
ExplicitBitVect::ExplicitBitVect(unsigned int size, bool bitsSet)
{
d_size=0;dp_bits = 0;d_numOnBits=0;
ExplicitBitVect::ExplicitBitVect(unsigned int size, bool bitsSet) {
d_size = 0;
dp_bits = 0;
d_numOnBits = 0;
_initForSize(size);
if (bitsSet) {
dp_bits->set(); // set all bits to 1
dp_bits->set(); // set all bits to 1
d_numOnBits = size;
}
}
ExplicitBitVect::ExplicitBitVect(const std::string &s)
{
d_size=0;dp_bits = 0;d_numOnBits=0;
initFromText(s.c_str(),s.length());
ExplicitBitVect::ExplicitBitVect(const std::string &s) {
d_size = 0;
dp_bits = 0;
d_numOnBits = 0;
initFromText(s.c_str(), s.length());
}
ExplicitBitVect::ExplicitBitVect(const char *data,const unsigned int dataLen)
{
d_size=0;dp_bits = 0;d_numOnBits=0;
initFromText(data,dataLen);
ExplicitBitVect::ExplicitBitVect(const char *data, const unsigned int dataLen) {
d_size = 0;
dp_bits = 0;
d_numOnBits = 0;
initFromText(data, dataLen);
}
ExplicitBitVect::ExplicitBitVect(const ExplicitBitVect& other) : BitVect(other) {
d_size = other.d_size;
dp_bits = new boost::dynamic_bitset<>(*(other.dp_bits));
d_numOnBits=other.d_numOnBits;
};
ExplicitBitVect::ExplicitBitVect(const ExplicitBitVect &other)
: BitVect(other) {
d_size = other.d_size;
dp_bits = new boost::dynamic_bitset<>(*(other.dp_bits));
d_numOnBits = other.d_numOnBits;
};
ExplicitBitVect& ExplicitBitVect::operator=(const ExplicitBitVect& other){
d_size = other.d_size;
delete dp_bits;
dp_bits = new boost::dynamic_bitset<>(*(other.dp_bits));
d_numOnBits=other.d_numOnBits;
return *this;
};
bool ExplicitBitVect::operator[] (const unsigned int which) const {
if(which >= d_size){
throw IndexErrorException(which);
ExplicitBitVect &ExplicitBitVect::operator=(const ExplicitBitVect &other) {
d_size = other.d_size;
delete dp_bits;
dp_bits = new boost::dynamic_bitset<>(*(other.dp_bits));
d_numOnBits = other.d_numOnBits;
return *this;
};
bool ExplicitBitVect::operator[](const unsigned int which) const {
if (which >= d_size) {
throw IndexErrorException(which);
}
return (bool)(*dp_bits)[which];
};
bool ExplicitBitVect::setBit(const unsigned int which) {
if (which >= d_size) {
throw IndexErrorException(which);
}
if ((bool)(*dp_bits)[which]) {
return true;
} else {
(*dp_bits)[which] = 1;
++d_numOnBits;
return false;
}
};
bool ExplicitBitVect::unsetBit(const unsigned int which) {
if (which >= d_size) {
throw IndexErrorException(which);
}
if ((bool)(*dp_bits)[which]) {
(*dp_bits)[which] = 0;
--d_numOnBits;
return true;
} else {
return false;
}
};
bool ExplicitBitVect::getBit(const unsigned int which) const {
if (which >= d_size) {
throw IndexErrorException(which);
}
return ((bool)(*dp_bits)[which]);
};
ExplicitBitVect ExplicitBitVect::operator^(const ExplicitBitVect &other) const {
ExplicitBitVect ans(d_size);
*(ans.dp_bits) = (*dp_bits) ^ *(other.dp_bits);
ans.d_numOnBits = ans.dp_bits->count();
return (ans);
};
ExplicitBitVect ExplicitBitVect::operator&(const ExplicitBitVect &other) const {
ExplicitBitVect ans(d_size);
*(ans.dp_bits) = (*dp_bits) & *(other.dp_bits);
ans.d_numOnBits = ans.dp_bits->count();
return (ans);
};
ExplicitBitVect ExplicitBitVect::operator|(const ExplicitBitVect &other) const {
ExplicitBitVect ans(d_size);
*(ans.dp_bits) = (*dp_bits) | *(other.dp_bits);
ans.d_numOnBits = ans.dp_bits->count();
return (ans);
};
ExplicitBitVect &ExplicitBitVect::operator^=(const ExplicitBitVect &other) {
*(dp_bits) ^= *(other.dp_bits);
d_numOnBits = dp_bits->count();
return *this;
};
ExplicitBitVect &ExplicitBitVect::operator&=(const ExplicitBitVect &other) {
*(dp_bits) &= *(other.dp_bits);
d_numOnBits = dp_bits->count();
return *this;
};
ExplicitBitVect &ExplicitBitVect::operator|=(const ExplicitBitVect &other) {
*(dp_bits) |= *(other.dp_bits);
d_numOnBits = dp_bits->count();
return *this;
};
ExplicitBitVect ExplicitBitVect::operator~() const {
ExplicitBitVect ans(d_size);
*(ans.dp_bits) = ~(*dp_bits);
ans.d_numOnBits = ans.dp_bits->count();
return (ans);
};
ExplicitBitVect &ExplicitBitVect::operator+=(const ExplicitBitVect &other) {
dp_bits->resize(d_size + other.d_size);
unsigned int original_size = d_size;
d_size = dp_bits->size();
for (unsigned i = 0; i < other.d_size; i++) {
if (other[i]) {
setBit(i + original_size);
}
return (bool)(*dp_bits)[which];
};
bool ExplicitBitVect::setBit(const unsigned int which){
if(which >= d_size){
throw IndexErrorException(which);
}
if((bool)(*dp_bits)[which]){
return true;
} else {
(*dp_bits)[which] = 1;
++d_numOnBits;
return false;
}
};
bool ExplicitBitVect::unsetBit(const unsigned int which){
if(which >= d_size){
throw IndexErrorException(which);
}
if((bool)(*dp_bits)[which]){
(*dp_bits)[which] = 0;
--d_numOnBits;
return true;
} else {
return false;
}
};
bool ExplicitBitVect::getBit(const unsigned int which) const {
if(which >= d_size){
throw IndexErrorException(which);
}
return((bool)(*dp_bits)[which]);
};
}
d_numOnBits = dp_bits->count();
return *this;
};
ExplicitBitVect ExplicitBitVect::operator^ (const ExplicitBitVect &other) const {
ExplicitBitVect ans(d_size);
*(ans.dp_bits) = (*dp_bits) ^ *(other.dp_bits);
ans.d_numOnBits=ans.dp_bits->count();
return(ans);
};
ExplicitBitVect ExplicitBitVect::operator+(const ExplicitBitVect &other) const {
ExplicitBitVect ans(*this);
return ans += other;
};
ExplicitBitVect ExplicitBitVect::operator& (const ExplicitBitVect &other) const {
ExplicitBitVect ans(d_size);
*(ans.dp_bits) = (*dp_bits) & *(other.dp_bits);
ans.d_numOnBits=ans.dp_bits->count();
return(ans);
};
unsigned int ExplicitBitVect::getNumBits() const { return d_size; };
unsigned int ExplicitBitVect::getNumOnBits() const { return d_numOnBits; };
unsigned int ExplicitBitVect::getNumOffBits() const {
return d_size - d_numOnBits;
};
ExplicitBitVect ExplicitBitVect::operator| (const ExplicitBitVect &other) const {
ExplicitBitVect ans(d_size);
*(ans.dp_bits) = (*dp_bits) | *(other.dp_bits);
ans.d_numOnBits=ans.dp_bits->count();
return(ans);
};
ExplicitBitVect& ExplicitBitVect::operator^= (const ExplicitBitVect &other) {
*(dp_bits) ^= *(other.dp_bits);
d_numOnBits=dp_bits->count();
return *this;
};
// the contents of v are blown out
void ExplicitBitVect::getOnBits(IntVect &v) const {
unsigned int nOn = getNumOnBits();
if (!v.empty()) IntVect().swap(v);
v.reserve(nOn);
for (unsigned int i = 0; i < d_size; i++) {
if ((bool)(*dp_bits)[i]) v.push_back(i);
}
};
ExplicitBitVect& ExplicitBitVect::operator&= (const ExplicitBitVect &other) {
*(dp_bits) &= *(other.dp_bits);
d_numOnBits=dp_bits->count();
return *this;
};
void ExplicitBitVect::_initForSize(unsigned int size) {
d_size = size;
delete dp_bits;
dp_bits = new boost::dynamic_bitset<>(size);
d_numOnBits = 0;
};
ExplicitBitVect& ExplicitBitVect::operator|= (const ExplicitBitVect &other) {
*(dp_bits) |= *(other.dp_bits);
d_numOnBits=dp_bits->count();
return *this;
};
ExplicitBitVect::~ExplicitBitVect() {
delete dp_bits;
dp_bits = NULL;
};
ExplicitBitVect ExplicitBitVect::operator~ () const {
ExplicitBitVect ans(d_size);
*(ans.dp_bits) = ~(*dp_bits);
ans.d_numOnBits=ans.dp_bits->count();
return(ans);
};
ExplicitBitVect& ExplicitBitVect::operator+= (const ExplicitBitVect &other) {
dp_bits->resize(d_size+other.d_size);
unsigned int original_size = d_size;
d_size = dp_bits->size();
for(unsigned i=0;i<other.d_size;i++){
if(other[i]){
setBit(i+original_size);
}
}
d_numOnBits=dp_bits->count();
return *this;
};
ExplicitBitVect ExplicitBitVect::operator+ (const ExplicitBitVect &other) const {
ExplicitBitVect ans(*this);
return ans+=other;
};
unsigned int ExplicitBitVect::getNumBits() const {
return d_size;
};
unsigned int ExplicitBitVect::getNumOnBits() const {
return d_numOnBits;
};
unsigned int ExplicitBitVect::getNumOffBits() const {
return d_size - d_numOnBits;
};
// the contents of v are blown out
void ExplicitBitVect::getOnBits (IntVect& v) const {
unsigned int nOn = getNumOnBits();
if(!v.empty()) IntVect().swap(v);
v.reserve(nOn);
for(unsigned int i=0;i<d_size;i++){
if((bool)(*dp_bits)[i]) v.push_back(i);
}
};
void ExplicitBitVect::_initForSize(unsigned int size) {
d_size = size;
delete dp_bits;
dp_bits = new boost::dynamic_bitset<>(size);
d_numOnBits=0;
};
ExplicitBitVect::~ExplicitBitVect() {
delete dp_bits;
dp_bits=NULL;
};
std::string
ExplicitBitVect::toString() const
{
// This Function replaces the older version (version 16) of writing the onbits to
std::string ExplicitBitVect::toString() const {
// This Function replaces the older version (version 16) of writing the onbits
// to
// a string
// the old version does not perform any run length encoding, it only checks to see if
// the length of the bitvect can be short ints and writes the on bits as shorts
// the old version does not perform any run length encoding, it only checks to
// see if
// the length of the bitvect can be short ints and writes the on bits as
// shorts
// other wise the onbits are all written as ints
// here we do run length encoding and the version number has been bumped to 32 as well.
// here we do run length encoding and the version number has been bumped to 32
// as well.
// only the reader needs to take care of readinf all legacy versions
// also in this scheme each bit number written to the string is checked to see how many
// also in this scheme each bit number written to the string is checked to see
// how many
// bytes it needs
std::stringstream ss(std::ios_base::binary|std::ios_base::out|std::ios_base::in);
std::stringstream ss(std::ios_base::binary | std::ios_base::out |
std::ios_base::in);
boost::int32_t tInt = ci_BITVECT_VERSION*-1;
RDKit::streamWrite(ss,tInt);
tInt=d_size;
RDKit::streamWrite(ss,tInt);
tInt=getNumOnBits();
RDKit::streamWrite(ss,tInt);
boost::int32_t tInt = ci_BITVECT_VERSION * -1;
RDKit::streamWrite(ss, tInt);
tInt = d_size;
RDKit::streamWrite(ss, tInt);
tInt = getNumOnBits();
RDKit::streamWrite(ss, tInt);
int prev = -1;
unsigned int zeroes;
for(unsigned int i=0;i<d_size;i++){
if( (bool)(*dp_bits)[i] ){
zeroes = i - prev -1;
for (unsigned int i = 0; i < d_size; i++) {
if ((bool)(*dp_bits)[i]) {
zeroes = i - prev - 1;
RDKit::appendPackedIntToStream(ss, zeroes);
prev = i;
}
}
zeroes = d_size - prev -1;
zeroes = d_size - prev - 1;
RDKit::appendPackedIntToStream(ss, zeroes);
std::string res(ss.str());
return res;
}

View File

@@ -24,62 +24,64 @@
*/
class ExplicitBitVect : public BitVect {
public:
ExplicitBitVect() : dp_bits(0), d_size(0), d_numOnBits(0) {};
public:
ExplicitBitVect() : dp_bits(0), d_size(0), d_numOnBits(0){};
//! initialize with a particular size;
explicit ExplicitBitVect(unsigned int size) : dp_bits(0), d_size(0), d_numOnBits(0) {_initForSize(size);};
explicit ExplicitBitVect(unsigned int size)
: dp_bits(0), d_size(0), d_numOnBits(0) {
_initForSize(size);
};
//! initialize with a particular size and all bits set
ExplicitBitVect(unsigned int size, bool bitsSet);
ExplicitBitVect(const ExplicitBitVect& other);
ExplicitBitVect(const ExplicitBitVect &other);
//! construct from a string pickle
ExplicitBitVect(const std::string &);
//! construct from a text pickle
ExplicitBitVect(const char *,const unsigned int);
ExplicitBitVect(const char *, const unsigned int);
~ExplicitBitVect();
ExplicitBitVect& operator=(const ExplicitBitVect& other);
bool operator[] (const unsigned int which) const;
ExplicitBitVect &operator=(const ExplicitBitVect &other);
bool operator[](const unsigned int which) const;
bool setBit(const unsigned int which);
bool unsetBit(const unsigned int which);
bool getBit(const unsigned int which) const;
ExplicitBitVect operator^ (const ExplicitBitVect &other) const;
ExplicitBitVect operator& (const ExplicitBitVect &other) const;
ExplicitBitVect operator| (const ExplicitBitVect &other) const;
ExplicitBitVect operator~ () const;
ExplicitBitVect operator^(const ExplicitBitVect &other) const;
ExplicitBitVect operator&(const ExplicitBitVect &other) const;
ExplicitBitVect operator|(const ExplicitBitVect &other) const;
ExplicitBitVect operator~() const;
/* concatenate two ExplicitBitVects */
ExplicitBitVect operator+ (const ExplicitBitVect &other) const;
ExplicitBitVect operator+(const ExplicitBitVect &other) const;
ExplicitBitVect& operator^= (const ExplicitBitVect &other);
ExplicitBitVect& operator&= (const ExplicitBitVect &other);
ExplicitBitVect& operator|= (const ExplicitBitVect &other);
ExplicitBitVect &operator^=(const ExplicitBitVect &other);
ExplicitBitVect &operator&=(const ExplicitBitVect &other);
ExplicitBitVect &operator|=(const ExplicitBitVect &other);
/* concatenate two ExplicitBitVects */
ExplicitBitVect& operator+= (const ExplicitBitVect &other);
ExplicitBitVect &operator+=(const ExplicitBitVect &other);
unsigned int getNumBits() const;
unsigned int getNumOnBits() const;
unsigned int getNumOffBits() const;
void getOnBits (IntVect& v) const;
void getOnBits(IntVect &v) const;
void clearBits() { dp_bits->reset(); };
std::string toString() const;
boost::dynamic_bitset<> *dp_bits; //!< our raw storage
boost::dynamic_bitset<> *dp_bits; //!< our raw storage
bool operator==(const ExplicitBitVect &o) const {
return *dp_bits==*o.dp_bits;
return *dp_bits == *o.dp_bits;
}
bool operator!=(const ExplicitBitVect &o) const {
return *dp_bits!=*o.dp_bits;
return *dp_bits != *o.dp_bits;
}
private:
private:
unsigned int d_size;
unsigned int d_numOnBits;
void _initForSize(const unsigned int size);
};
#endif

View File

@@ -21,7 +21,6 @@
#endif
#include <boost/cstdint.hpp>
// """ -------------------------------------------------------
//
// Construct a SparseBitVect from a binary string.
@@ -29,13 +28,12 @@
// the format produced by SparseBitVect::toString
//
// """ -------------------------------------------------------
SparseBitVect::SparseBitVect(const std::string &s)
{
d_size=0;dp_bits = 0;
initFromText(s.c_str(),s.length());
SparseBitVect::SparseBitVect(const std::string &s) {
d_size = 0;
dp_bits = 0;
initFromText(s.c_str(), s.length());
}
// """ -------------------------------------------------------
//
// Construct a SparseBitVect from a binary string stored as a char *.
@@ -43,10 +41,10 @@ SparseBitVect::SparseBitVect(const std::string &s)
// the format produced by SparseBitVect::toString
//
// """ -------------------------------------------------------
SparseBitVect::SparseBitVect(const char *data,const unsigned int dataLen)
{
d_size=0;dp_bits = 0;
initFromText(data,dataLen);
SparseBitVect::SparseBitVect(const char *data, const unsigned int dataLen) {
d_size = 0;
dp_bits = 0;
initFromText(data, dataLen);
}
// """ -------------------------------------------------------
@@ -60,14 +58,14 @@ SparseBitVect::SparseBitVect(const char *data,const unsigned int dataLen)
// In Python this type of assignment **is** valid.
//
// """ -------------------------------------------------------
bool
SparseBitVect::operator[](const unsigned int which) const
{
if(which >= d_size){
bool SparseBitVect::operator[](const unsigned int which) const {
if (which >= d_size) {
throw IndexErrorException(which);
}
if(dp_bits->count(which)) return true;
else return false;
if (dp_bits->count(which))
return true;
else
return false;
}
// """ -------------------------------------------------------
@@ -76,32 +74,27 @@ SparseBitVect::operator[](const unsigned int which) const
// The bits of the other SBV are copied.
//
// """ -------------------------------------------------------
SparseBitVect&
SparseBitVect::operator=(const SparseBitVect& other)
{
IntSet *bv=other.dp_bits;
SparseBitVect &SparseBitVect::operator=(const SparseBitVect &other) {
IntSet *bv = other.dp_bits;
delete dp_bits;
d_size = other.getNumBits();
dp_bits = new IntSet;
std::copy(bv->begin(),bv->end(),std::inserter(*dp_bits,dp_bits->end()));
std::copy(bv->begin(), bv->end(), std::inserter(*dp_bits, dp_bits->end()));
return *this;
}
// -------------------------------------------------------
//
// Operator|
// allows SBV3 = SBV1|SBV2;
//
// -------------------------------------------------------
SparseBitVect
SparseBitVect::operator| (const SparseBitVect &other) const
{
SparseBitVect SparseBitVect::operator|(const SparseBitVect &other) const {
SparseBitVect ans(d_size);
std::set_union(dp_bits->begin(),dp_bits->end(),
other.dp_bits->begin(),other.dp_bits->end(),
std::inserter(*(ans.dp_bits),ans.dp_bits->end()));
std::set_union(dp_bits->begin(), dp_bits->end(), other.dp_bits->begin(),
other.dp_bits->end(),
std::inserter(*(ans.dp_bits), ans.dp_bits->end()));
return ans;
}
@@ -111,13 +104,11 @@ SparseBitVect::operator| (const SparseBitVect &other) const
// allows SBV3 = SBV1&SBV2;
//
// """ -------------------------------------------------------
SparseBitVect
SparseBitVect::operator& (const SparseBitVect &other) const
{
SparseBitVect SparseBitVect::operator&(const SparseBitVect &other) const {
SparseBitVect ans(d_size);
std::set_intersection(dp_bits->begin(),dp_bits->end(),
other.dp_bits->begin(),other.dp_bits->end(),
std::inserter(*(ans.dp_bits),ans.dp_bits->end()));
std::set_intersection(dp_bits->begin(), dp_bits->end(),
other.dp_bits->begin(), other.dp_bits->end(),
std::inserter(*(ans.dp_bits), ans.dp_bits->end()));
return ans;
}
@@ -127,14 +118,12 @@ SparseBitVect::operator& (const SparseBitVect &other) const
// allows SBV3 = SBV1^SBV2;
//
// """ -------------------------------------------------------
SparseBitVect
SparseBitVect::operator^ (const SparseBitVect &other) const
{
SparseBitVect SparseBitVect::operator^(const SparseBitVect &other) const {
SparseBitVect ans(d_size);
std::set_symmetric_difference(dp_bits->begin(),dp_bits->end(),
other.dp_bits->begin(),other.dp_bits->end(),
std::inserter(*(ans.dp_bits),ans.dp_bits->end()));
return(ans);
std::set_symmetric_difference(
dp_bits->begin(), dp_bits->end(), other.dp_bits->begin(),
other.dp_bits->end(), std::inserter(*(ans.dp_bits), ans.dp_bits->end()));
return (ans);
}
// """ -------------------------------------------------------
@@ -143,15 +132,13 @@ SparseBitVect::operator^ (const SparseBitVect &other) const
// allows SBV2 = ~SBV1;
//
// """ -------------------------------------------------------
SparseBitVect
SparseBitVect::operator~ () const
{
SparseBitVect SparseBitVect::operator~() const {
SparseBitVect ans(d_size);
for(unsigned int i=0;i<d_size;i++){
if(!getBit(i)) ans.setBit(i);
for (unsigned int i = 0; i < d_size; i++) {
if (!getBit(i)) ans.setBit(i);
}
return(ans);
return (ans);
}
// """ -------------------------------------------------------
@@ -160,32 +147,30 @@ SparseBitVect::operator~ () const
// Returns the state of bit which
//
// """ -------------------------------------------------------
bool
SparseBitVect::getBit(const unsigned int which) const
{
if(which >= d_size){
bool SparseBitVect::getBit(const unsigned int which) const {
if (which >= d_size) {
throw IndexErrorException(which);
}
if(dp_bits->count(which)) return true;
else return false;
if (dp_bits->count(which))
return true;
else
return false;
}
// """ -------------------------------------------------------
//
// getBit(const IntVectIter which) (C++ SPECIFIC)
// Returns the state of bit which
//
// """ -------------------------------------------------------
bool
SparseBitVect::getBit (const IntVectIter which) const
{
if(*which < 0 || static_cast<unsigned int>(*which) >= d_size){
bool SparseBitVect::getBit(const IntVectIter which) const {
if (*which < 0 || static_cast<unsigned int>(*which) >= d_size) {
throw IndexErrorException(*which);
}
if(dp_bits->count(*which)) return true;
else return false;
if (dp_bits->count(*which))
return true;
else
return false;
}
// """ -------------------------------------------------------
@@ -194,15 +179,14 @@ SparseBitVect::getBit (const IntVectIter which) const
// Returns the state of bit which
//
// """ -------------------------------------------------------
bool
SparseBitVect::getBit (const IntSetIter which) const
{
if(*which < 0 || static_cast<unsigned int>(*which) >= d_size){
bool SparseBitVect::getBit(const IntSetIter which) const {
if (*which < 0 || static_cast<unsigned int>(*which) >= d_size) {
throw IndexErrorException(*which);
}
if(dp_bits->count(*which)) return true;
else return false;
if (dp_bits->count(*which))
return true;
else
return false;
}
// """ -------------------------------------------------------
@@ -212,14 +196,12 @@ SparseBitVect::getBit (const IntSetIter which) const
// Returns the original state of the bit
//
// """ -------------------------------------------------------
bool
SparseBitVect::setBit(const unsigned int which)
{
if(!dp_bits){
bool SparseBitVect::setBit(const unsigned int which) {
if (!dp_bits) {
throw ValueErrorException("BitVect not properly initialized.");
}
std::pair<IntSetIter,bool> res;
if(which >= d_size){
std::pair<IntSetIter, bool> res;
if (which >= d_size) {
throw IndexErrorException(which);
}
res = dp_bits->insert(which);
@@ -234,14 +216,12 @@ SparseBitVect::setBit(const unsigned int which)
// Returns the original state of the bit
//
// """ -------------------------------------------------------
bool
SparseBitVect::setBit(const IntSetIter which)
{
if(!dp_bits){
bool SparseBitVect::setBit(const IntSetIter which) {
if (!dp_bits) {
throw ValueErrorException("BitVect not properly initialized.");
}
std::pair<IntSetIter,bool> res;
if(*which < 0 || static_cast<unsigned int>(*which) >= d_size){
std::pair<IntSetIter, bool> res;
if (*which < 0 || static_cast<unsigned int>(*which) >= d_size) {
throw IndexErrorException(*which);
}
res = dp_bits->insert(*which);
@@ -255,21 +235,18 @@ SparseBitVect::setBit(const IntSetIter which)
// Returns the original state of the bit
//
// """ -------------------------------------------------------
bool
SparseBitVect::unsetBit(const unsigned int which)
{
if(!dp_bits){
bool SparseBitVect::unsetBit(const unsigned int which) {
if (!dp_bits) {
throw ValueErrorException("BitVect not properly initialized.");
}
if(which >= d_size){
if (which >= d_size) {
throw IndexErrorException(which);
}
if(dp_bits->count(which)){
if (dp_bits->count(which)) {
dp_bits->erase(dp_bits->find(which));
return true;
}
else{
} else {
return false;
}
}
@@ -283,17 +260,15 @@ SparseBitVect::unsetBit(const unsigned int which)
// Python: Returns the tuple of on bits
//
// """ -------------------------------------------------------
void
SparseBitVect::getOnBits(IntVect& v) const
{
if(!dp_bits){
void SparseBitVect::getOnBits(IntVect &v) const {
if (!dp_bits) {
throw ValueErrorException("BitVect not properly initialized.");
}
unsigned int nOn = getNumOnBits();
if(!v.empty()) IntVect().swap(v);
if (!v.empty()) IntVect().swap(v);
v.reserve(nOn);
v.resize(nOn);
std::copy(dp_bits->begin(),dp_bits->end(),v.begin());
std::copy(dp_bits->begin(), dp_bits->end(), v.begin());
};
// """ -------------------------------------------------------
@@ -303,41 +278,47 @@ SparseBitVect::getOnBits(IntVect& v) const
//
// """ -------------------------------------------------------
std::string SparseBitVect::toString() const {
// This Function replaces the older version (version 16) of writing the onbits to
// This Function replaces the older version (version 16) of writing the onbits
// to
// a string
// the old version does not perform any run length encoding, it only checks to see if
// the length of the bitvect can be short ints and writes the on bits as shorts
// the old version does not perform any run length encoding, it only checks to
// see if
// the length of the bitvect can be short ints and writes the on bits as
// shorts
// other wise the onbits are all written as ints
// here we do run length encoding and the version number has been bumped to 32 as well.
// here we do run length encoding and the version number has been bumped to 32
// as well.
// only the reader needs to take care of readinf all legacy versions
// also in this scheme each bit number written to the string is checked to see how many
// also in this scheme each bit number written to the string is checked to see
// how many
// bytes it needs
std::stringstream ss(std::ios_base::binary|std::ios_base::out|std::ios_base::in);
std::stringstream ss(std::ios_base::binary | std::ios_base::out |
std::ios_base::in);
boost::int32_t tInt = ci_BITVECT_VERSION*-1;
RDKit::streamWrite(ss,tInt);
tInt=d_size;
RDKit::streamWrite(ss,tInt);
tInt=getNumOnBits();
RDKit::streamWrite(ss,tInt);
boost::int32_t tInt = ci_BITVECT_VERSION * -1;
RDKit::streamWrite(ss, tInt);
tInt = d_size;
RDKit::streamWrite(ss, tInt);
tInt = getNumOnBits();
RDKit::streamWrite(ss, tInt);
int prev = -1;
unsigned int zeroes;
for (IntSetIter i=dp_bits->begin(); i!=dp_bits->end(); i++) {
zeroes = *i - prev -1;
for (IntSetIter i = dp_bits->begin(); i != dp_bits->end(); i++) {
zeroes = *i - prev - 1;
RDKit::appendPackedIntToStream(ss, zeroes);
prev = *i;
}
zeroes = d_size - prev -1;
zeroes = d_size - prev - 1;
RDKit::appendPackedIntToStream(ss, zeroes);
std::string res(ss.str());
return res;
}
}
void SparseBitVect::_initForSize(unsigned int size){
d_size=size;
void SparseBitVect::_initForSize(unsigned int size) {
d_size = size;
delete dp_bits;
dp_bits=new IntSet;
dp_bits = new IntSet;
};

View File

@@ -17,8 +17,6 @@ using std::set;
#include <iterator>
#include <algorithm>
typedef set<int> IntSet;
typedef IntSet::iterator IntSetIter;
typedef IntSet::const_iterator IntSetConstIter;
@@ -32,41 +30,44 @@ typedef IntSet::const_iterator IntSetConstIter;
vectors but become rather a nightmare if they need to be negated.
*/
class SparseBitVect : public BitVect{
public:
SparseBitVect() : dp_bits(0), d_size(0) {};
class SparseBitVect : public BitVect {
public:
SparseBitVect() : dp_bits(0), d_size(0){};
//! initialize with a particular size;
explicit SparseBitVect(unsigned int size): dp_bits(0), d_size(0) {_initForSize(size); };
explicit SparseBitVect(unsigned int size) : dp_bits(0), d_size(0) {
_initForSize(size);
};
//! copy constructor
SparseBitVect(const SparseBitVect& other) : BitVect(other) {
d_size=0;dp_bits = 0;
SparseBitVect(const SparseBitVect &other) : BitVect(other) {
d_size = 0;
dp_bits = 0;
_initForSize(other.getNumBits());
IntSet *bv=other.dp_bits;
std::copy(bv->begin(),bv->end(),std::inserter(*dp_bits,dp_bits->end()));
IntSet *bv = other.dp_bits;
std::copy(bv->begin(), bv->end(), std::inserter(*dp_bits, dp_bits->end()));
}
//! construct from a string pickle
SparseBitVect(const std::string &);
//! construct from a text pickle
SparseBitVect(const char *data,const unsigned int dataLen);
SparseBitVect(const char *data, const unsigned int dataLen);
SparseBitVect& operator=(const SparseBitVect&);
~SparseBitVect(){ delete dp_bits; };
SparseBitVect &operator=(const SparseBitVect &);
~SparseBitVect() { delete dp_bits; };
bool operator[](const unsigned int which) const;
SparseBitVect operator| (const SparseBitVect&) const;
SparseBitVect operator& (const SparseBitVect&) const;
SparseBitVect operator^ (const SparseBitVect&) const;
SparseBitVect operator~ () const;
SparseBitVect operator|(const SparseBitVect &) const;
SparseBitVect operator&(const SparseBitVect &) const;
SparseBitVect operator^(const SparseBitVect &) const;
SparseBitVect operator~() const;
//! returns a (const) pointer to our raw storage
const IntSet *getBitSet() const { return dp_bits;}
const IntSet *getBitSet() const { return dp_bits; }
unsigned int getNumBits() const { return d_size; };
bool setBit(const unsigned int which);
bool setBit(const IntSetIter which);
bool unsetBit(const unsigned int which);
bool getBit (const unsigned int which) const;
bool getBit(const unsigned int which) const;
bool getBit(const IntVectIter which) const;
bool getBit(const IntSetIter which) const;
@@ -75,19 +76,18 @@ public:
std::string toString() const;
void getOnBits (IntVect& v) const;
void getOnBits(IntVect &v) const;
void clearBits() { dp_bits->clear(); };
IntSet *dp_bits; //!< our raw data, exposed for the sake of efficiency
IntSet *dp_bits; //!< our raw data, exposed for the sake of efficiency
bool operator==(const SparseBitVect &o) const {
return *dp_bits==*o.dp_bits;
return *dp_bits == *o.dp_bits;
}
bool operator!=(const SparseBitVect &o) const {
return *dp_bits!=*o.dp_bits;
return *dp_bits != *o.dp_bits;
}
private:
private:
unsigned int d_size;
void _initForSize(const unsigned int size);
};

File diff suppressed because it is too large Load Diff

View File

@@ -24,38 +24,43 @@ ExplicitBitVect *convertToExplicit(const SparseBitVect *sbv) {
return ebv;
}
void a2b(const char *,char *);
void a2b(const char *, char *);
//! \brief Construct a BitVect from the ASCII representation of a
//! Daylight fingerprint string
template <typename T>
void FromDaylightString(T &sbv,const std::string &s)
{
void FromDaylightString(T &sbv, const std::string &s) {
sbv.clearBits();
int length = s.length();
int nBits;
if(s[length-1] == '\n') length -= 1;
if (s[length - 1] == '\n') length -= 1;
// 4 bytes in the ascii correspond to 3 bytes in the binary
// plus there's one extra ascii byte for the pad marker
length -= 1;
nBits = (3*length/4)*8;
switch(s[length]){
case '1': nBits -= 16;break;
case '2': nBits -= 8;break;
case '3': break;
default: throw "ValueError bad daylight fingerprint string";
nBits = (3 * length / 4) * 8;
switch (s[length]) {
case '1':
nBits -= 16;
break;
case '2':
nBits -= 8;
break;
case '3':
break;
default:
throw "ValueError bad daylight fingerprint string";
}
int i=0,nBitsDone=0;
while(i < length){
int i = 0, nBitsDone = 0;
while (i < length) {
char bytes[3];
a2b(s.c_str()+i,bytes);
for(int j=0;j<3 && nBitsDone < nBits;j++){
unsigned char query=0x80;
for(int k=0;k<8;k++) {
if(bytes[j]&query){
a2b(s.c_str() + i, bytes);
for (int j = 0; j < 3 && nBitsDone < nBits; j++) {
unsigned char query = 0x80;
for (int k = 0; k < 8; k++) {
if (bytes[j] & query) {
sbv.setBit(nBitsDone);
}
query >>= 1;
@@ -66,24 +71,22 @@ void FromDaylightString(T &sbv,const std::string &s)
}
}
template void FromDaylightString(SparseBitVect &sbv,const std::string &s);
template void FromDaylightString(ExplicitBitVect &sbv,const std::string &s);
template void FromDaylightString(SparseBitVect &sbv, const std::string &s);
template void FromDaylightString(ExplicitBitVect &sbv, const std::string &s);
//! \brief Construct a BitVect from the ASCII representation of a
//! BitString
template <typename T>
void FromBitString(T &sbv,const std::string &s)
{
PRECONDITION(s.length()<=sbv.getNumBits(),"bad bitvect length");
void FromBitString(T &sbv, const std::string &s) {
PRECONDITION(s.length() <= sbv.getNumBits(), "bad bitvect length");
sbv.clearBits();
for(unsigned int i=0;i<sbv.getNumBits();++i){
if(s[i]=='1') sbv.setBit(i);
for (unsigned int i = 0; i < sbv.getNumBits(); ++i) {
if (s[i] == '1') sbv.setBit(i);
}
}
template void FromBitString(SparseBitVect &sbv,const std::string &s);
template void FromBitString(ExplicitBitVect &sbv,const std::string &s);
template void FromBitString(SparseBitVect &sbv, const std::string &s);
template void FromBitString(ExplicitBitVect &sbv, const std::string &s);
//! converts 4 ascii bytes at a4 to 3 binary bytes
/*!
@@ -96,14 +99,13 @@ template void FromBitString(ExplicitBitVect &sbv,const std::string &s);
*** ASCII: |=======+=======+=======+=======| etc.
*** ^
*** becomes... 3 <-> 4
*** v
*** v
*** BINARY: |=====+=====+=====+=====| etc.
********************************************************************
*/
void a2b(const char *a4, char *b3)
{
void a2b(const char *a4, char *b3) {
int i;
char byte=0x00, b=0x00;
char byte = 0x00, b = 0x00;
/*********************************************
*** Use the Daylight mapping to convert each
@@ -117,74 +119,202 @@ void a2b(const char *a4, char *b3)
*********************************************/
for (i = 0; i < 4; ++i) {
switch (a4[i]) {
case '.': byte = 0x00; break; /* 00 = __000000 */
case '+': byte = 0x01; break; /* 01 = __000001 */
case '0': byte = 0x02; break; /* 02 = __000010 */
case '1': byte = 0x03; break; /* 03 = __000011 */
case '2': byte = 0x04; break; /* 04 = __000100 */
case '3': byte = 0x05; break; /* 05 = __000101 */
case '4': byte = 0x06; break; /* 06 = __000110 */
case '5': byte = 0x07; break; /* 07 = __000111 */
case '6': byte = 0x08; break; /* 08 = __001000 */
case '7': byte = 0x09; break; /* 09 = __001001 */
case '8': byte = 0x0a; break; /* 10 = __001010 */
case '9': byte = 0x0b; break; /* 11 = __001011 */
case 'A': byte = 0x0c; break; /* 12 = __001100 */
case 'B': byte = 0x0d; break; /* 13 = __001101 */
case 'C': byte = 0x0e; break; /* 14 = __001110 */
case 'D': byte = 0x0f; break; /* 15 = __001111 */
case 'E': byte = 0x10; break; /* 16 = __010000 */
case 'F': byte = 0x11; break; /* 17 = __010001 */
case 'G': byte = 0x12; break; /* 18 = __010010 */
case 'H': byte = 0x13; break; /* 19 = __010011 */
case 'I': byte = 0x14; break; /* 20 = __010100 */
case 'J': byte = 0x15; break; /* 21 = __010101 */
case 'K': byte = 0x16; break; /* 22 = __010110 */
case 'L': byte = 0x17; break; /* 23 = __010111 */
case 'M': byte = 0x18; break; /* 24 = __011000 */
case 'N': byte = 0x19; break; /* 25 = __011001 */
case 'O': byte = 0x1a; break; /* 26 = __011010 */
case 'P': byte = 0x1b; break; /* 27 = __011011 */
case 'Q': byte = 0x1c; break; /* 28 = __011100 */
case 'R': byte = 0x1d; break; /* 29 = __011101 */
case 'S': byte = 0x1e; break; /* 30 = __011110 */
case 'T': byte = 0x1f; break; /* 31 = __011111 */
case 'U': byte = 0x20; break; /* 32 = __100000 */
case 'V': byte = 0x21; break; /* 33 = __100001 */
case 'W': byte = 0x22; break; /* 34 = __100010 */
case 'X': byte = 0x23; break; /* 35 = __100011 */
case 'Y': byte = 0x24; break; /* 36 = __100100 */
case 'Z': byte = 0x25; break; /* 37 = __100101 */
case 'a': byte = 0x26; break; /* 38 = __100110 */
case 'b': byte = 0x27; break; /* 39 = __100111 */
case 'c': byte = 0x28; break; /* 40 = __101000 */
case 'd': byte = 0x29; break; /* 41 = __101001 */
case 'e': byte = 0x2a; break; /* 42 = __101010 */
case 'f': byte = 0x2b; break; /* 43 = __101011 */
case 'g': byte = 0x2c; break; /* 44 = __101100 */
case 'h': byte = 0x2d; break; /* 45 = __101101 */
case 'i': byte = 0x2e; break; /* 46 = __101110 */
case 'j': byte = 0x2f; break; /* 47 = __101111 */
case 'k': byte = 0x30; break; /* 48 = __110000 */
case 'l': byte = 0x31; break; /* 49 = __110001 */
case 'm': byte = 0x32; break; /* 50 = __110010 */
case 'n': byte = 0x33; break; /* 51 = __110011 */
case 'o': byte = 0x34; break; /* 52 = __110100 */
case 'p': byte = 0x35; break; /* 53 = __110101 */
case 'q': byte = 0x36; break; /* 54 = __110110 */
case 'r': byte = 0x37; break; /* 55 = __110111 */
case 's': byte = 0x38; break; /* 56 = __111000 */
case 't': byte = 0x39; break; /* 57 = __111001 */
case 'u': byte = 0x3a; break; /* 58 = __111010 */
case 'v': byte = 0x3b; break; /* 59 = __111011 */
case 'w': byte = 0x3c; break; /* 60 = __111100 */
case 'x': byte = 0x3d; break; /* 61 = __111101 */
case 'y': byte = 0x3e; break; /* 62 = __111110 */
case 'z': byte = 0x3f; break; /* 63 = __111111 */
case '.':
byte = 0x00;
break; /* 00 = __000000 */
case '+':
byte = 0x01;
break; /* 01 = __000001 */
case '0':
byte = 0x02;
break; /* 02 = __000010 */
case '1':
byte = 0x03;
break; /* 03 = __000011 */
case '2':
byte = 0x04;
break; /* 04 = __000100 */
case '3':
byte = 0x05;
break; /* 05 = __000101 */
case '4':
byte = 0x06;
break; /* 06 = __000110 */
case '5':
byte = 0x07;
break; /* 07 = __000111 */
case '6':
byte = 0x08;
break; /* 08 = __001000 */
case '7':
byte = 0x09;
break; /* 09 = __001001 */
case '8':
byte = 0x0a;
break; /* 10 = __001010 */
case '9':
byte = 0x0b;
break; /* 11 = __001011 */
case 'A':
byte = 0x0c;
break; /* 12 = __001100 */
case 'B':
byte = 0x0d;
break; /* 13 = __001101 */
case 'C':
byte = 0x0e;
break; /* 14 = __001110 */
case 'D':
byte = 0x0f;
break; /* 15 = __001111 */
case 'E':
byte = 0x10;
break; /* 16 = __010000 */
case 'F':
byte = 0x11;
break; /* 17 = __010001 */
case 'G':
byte = 0x12;
break; /* 18 = __010010 */
case 'H':
byte = 0x13;
break; /* 19 = __010011 */
case 'I':
byte = 0x14;
break; /* 20 = __010100 */
case 'J':
byte = 0x15;
break; /* 21 = __010101 */
case 'K':
byte = 0x16;
break; /* 22 = __010110 */
case 'L':
byte = 0x17;
break; /* 23 = __010111 */
case 'M':
byte = 0x18;
break; /* 24 = __011000 */
case 'N':
byte = 0x19;
break; /* 25 = __011001 */
case 'O':
byte = 0x1a;
break; /* 26 = __011010 */
case 'P':
byte = 0x1b;
break; /* 27 = __011011 */
case 'Q':
byte = 0x1c;
break; /* 28 = __011100 */
case 'R':
byte = 0x1d;
break; /* 29 = __011101 */
case 'S':
byte = 0x1e;
break; /* 30 = __011110 */
case 'T':
byte = 0x1f;
break; /* 31 = __011111 */
case 'U':
byte = 0x20;
break; /* 32 = __100000 */
case 'V':
byte = 0x21;
break; /* 33 = __100001 */
case 'W':
byte = 0x22;
break; /* 34 = __100010 */
case 'X':
byte = 0x23;
break; /* 35 = __100011 */
case 'Y':
byte = 0x24;
break; /* 36 = __100100 */
case 'Z':
byte = 0x25;
break; /* 37 = __100101 */
case 'a':
byte = 0x26;
break; /* 38 = __100110 */
case 'b':
byte = 0x27;
break; /* 39 = __100111 */
case 'c':
byte = 0x28;
break; /* 40 = __101000 */
case 'd':
byte = 0x29;
break; /* 41 = __101001 */
case 'e':
byte = 0x2a;
break; /* 42 = __101010 */
case 'f':
byte = 0x2b;
break; /* 43 = __101011 */
case 'g':
byte = 0x2c;
break; /* 44 = __101100 */
case 'h':
byte = 0x2d;
break; /* 45 = __101101 */
case 'i':
byte = 0x2e;
break; /* 46 = __101110 */
case 'j':
byte = 0x2f;
break; /* 47 = __101111 */
case 'k':
byte = 0x30;
break; /* 48 = __110000 */
case 'l':
byte = 0x31;
break; /* 49 = __110001 */
case 'm':
byte = 0x32;
break; /* 50 = __110010 */
case 'n':
byte = 0x33;
break; /* 51 = __110011 */
case 'o':
byte = 0x34;
break; /* 52 = __110100 */
case 'p':
byte = 0x35;
break; /* 53 = __110101 */
case 'q':
byte = 0x36;
break; /* 54 = __110110 */
case 'r':
byte = 0x37;
break; /* 55 = __110111 */
case 's':
byte = 0x38;
break; /* 56 = __111000 */
case 't':
byte = 0x39;
break; /* 57 = __111001 */
case 'u':
byte = 0x3a;
break; /* 58 = __111010 */
case 'v':
byte = 0x3b;
break; /* 59 = __111011 */
case 'w':
byte = 0x3c;
break; /* 60 = __111100 */
case 'x':
byte = 0x3d;
break; /* 61 = __111101 */
case 'y':
byte = 0x3e;
break; /* 62 = __111110 */
case 'z':
byte = 0x3f;
break; /* 63 = __111111 */
}
/*********************************************
*** Now copy the 4x6=24 bits from a4 to b3.
*** Now copy the 4x6=24 bits from a4 to b3.
***
*** a4: |--000000--111111--222222--333333
*** |=======+=======+=======+=======|
@@ -193,20 +323,19 @@ void a2b(const char *a4, char *b3)
*** |=====+=====+=====+=====|
*********************************************/
if (i == 0)
b3[0] = (byte << 2); /*** 6 bits into 1st byte ***/
b3[0] = (byte << 2); /*** 6 bits into 1st byte ***/
else if (i == 1) {
b3[0] |= ((b = byte) >> 4); /*** 2 bits into 1st byte ***/
b3[1] = ((b = byte) << 4); /*** 4 bits into 2nd byte ***/
b3[0] |= ((b = byte) >> 4); /*** 2 bits into 1st byte ***/
b3[1] = ((b = byte) << 4); /*** 4 bits into 2nd byte ***/
} else if (i == 2) {
b3[1] |= ((b = byte) >> 2); /*** 4 bits into 2nd byte ***/
b3[2] = ((b = byte) << 6); /*** 2 bits into 3rd byte ***/
b3[1] |= ((b = byte) >> 2); /*** 4 bits into 2nd byte ***/
b3[2] = ((b = byte) << 6); /*** 2 bits into 3rd byte ***/
} else if (i == 3)
b3[2] |= byte; /*** 6 bits into 3rd byte ***/
b3[2] |= byte; /*** 6 bits into 3rd byte ***/
}
return;
}
// Demo Data:
// 256 bits:
//.b7HEa..ccc+gWEIr89.8lV8gOF3aXFFR.+Ps.mZ6lg.2
@@ -217,5 +346,4 @@ void a2b(const char *a4, char *b3)
// 00101011 00011000 01001010 10110001 10100100
// 01000101 10011010 00110100 01010001 01110100
// 00000000 01011011 11100000 00001100 10100101
// 00100011 00011011
// 00100011 00011011

View File

@@ -31,43 +31,44 @@ void wrap_discreteValVect();
void wrap_sparseIntVect();
template <typename T>
void convertToNumpyArray(const T &v,python::object destArray){
void convertToNumpyArray(const T &v, python::object destArray) {
if (!PyArray_Check(destArray.ptr())) {
throw_value_error("Expecting a Numeric array object");
}
PyArrayObject *destP=(PyArrayObject *)destArray.ptr();
PyArrayObject *destP = (PyArrayObject *)destArray.ptr();
npy_intp ndims[1];
ndims[0]=v.size();
ndims[0] = v.size();
PyArray_Dims dims;
dims.ptr=ndims;
dims.len=1;
PyArray_Resize(destP,&dims,0,NPY_ANYORDER);
for(unsigned int i=0;i<v.size();++i){
dims.ptr = ndims;
dims.len = 1;
PyArray_Resize(destP, &dims, 0, NPY_ANYORDER);
for (unsigned int i = 0; i < v.size(); ++i) {
PyObject *iItem = PyInt_FromLong(v[i]);
PyArray_SETITEM(destP,PyArray_GETPTR1(destP,i),iItem);
PyArray_SETITEM(destP, PyArray_GETPTR1(destP, i), iItem);
Py_DECREF(iItem);
}
}
BOOST_PYTHON_MODULE(cDataStructs)
{
BOOST_PYTHON_MODULE(cDataStructs) {
rdkit_import_array();
python::scope().attr("__doc__") =
"Module containing an assortment of functionality for basic data structures.\n"
"\n"
"At the moment the data structures defined are:\n"
" Bit Vector classes (for storing signatures, fingerprints and the like:\n"
" - ExplicitBitVect: class for relatively small (10s of thousands of bits) or\n"
" dense bit vectors.\n"
" - SparseBitVect: class for large, sparse bit vectors\n"
" DiscreteValueVect: class for storing vectors of integers\n"
" SparseIntVect: class for storing sparse vectors of integers\n"
;
python::register_exception_translator<IndexErrorException>(&translate_index_error);
python::register_exception_translator<ValueErrorException>(&translate_value_error);
"Module containing an assortment of functionality for basic data "
"structures.\n"
"\n"
"At the moment the data structures defined are:\n"
" Bit Vector classes (for storing signatures, fingerprints and the "
"like:\n"
" - ExplicitBitVect: class for relatively small (10s of thousands of "
"bits) or\n"
" dense bit vectors.\n"
" - SparseBitVect: class for large, sparse bit vectors\n"
" DiscreteValueVect: class for storing vectors of integers\n"
" SparseIntVect: class for storing sparse vectors of integers\n";
python::register_exception_translator<IndexErrorException>(
&translate_index_error);
python::register_exception_translator<ValueErrorException>(
&translate_value_error);
wrap_Utils();
wrap_SBV();
@@ -76,9 +77,12 @@ BOOST_PYTHON_MODULE(cDataStructs)
wrap_discreteValVect();
wrap_sparseIntVect();
python::def("ConvertToNumpyArray", (void (*)(const ExplicitBitVect &,python::object))convertToNumpyArray,
(python::arg("bv"),python::arg("destArray")));
python::def("ConvertToNumpyArray", (void (*)(const RDKit::DiscreteValueVect &,python::object))convertToNumpyArray,
(python::arg("bv"),python::arg("destArray")));
python::def(
"ConvertToNumpyArray",
(void (*)(const ExplicitBitVect &, python::object))convertToNumpyArray,
(python::arg("bv"), python::arg("destArray")));
python::def("ConvertToNumpyArray",
(void (*)(const RDKit::DiscreteValueVect &,
python::object))convertToNumpyArray,
(python::arg("bv"), python::arg("destArray")));
}

View File

@@ -7,4 +7,3 @@
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//

View File

@@ -11,24 +11,23 @@
#include <RDBoost/python.h>
#include <RDGeneral/types.h>
#include<RDGeneral/Invariant.h>
#include <RDGeneral/Invariant.h>
#include <RDBoost/PySequenceHolder.h>
#include <DataStructs/DiscreteValueVect.h>
using namespace RDKit;
struct dvv_pickle_suite : python::pickle_suite
{
static python::tuple
getinitargs(const DiscreteValueVect& self)
{
std::string res=self.toString();
python::object retval = python::object(python::handle<>(PyBytes_FromStringAndSize(res.c_str(),res.length())));
struct dvv_pickle_suite : python::pickle_suite {
static python::tuple getinitargs(const DiscreteValueVect& self) {
std::string res = self.toString();
python::object retval = python::object(
python::handle<>(PyBytes_FromStringAndSize(res.c_str(), res.length())));
return python::make_tuple(retval);
};
};
std::string disValVectDoc="A container class for storing unsigned integer\n\
std::string disValVectDoc =
"A container class for storing unsigned integer\n\
values within a particular range.\n\
\n\
The length of the vector and type of its elements (determines the maxium value\n\
@@ -49,46 +48,41 @@ Elements can be set and read using indexing (i.e. bv[i] = 4 or val=bv[i])\n\
struct discreteValVec_wrapper {
static void wrap() {
python::enum_<DiscreteValueVect::DiscreteValueType>("DiscreteValueType")
.value("ONEBITVALUE", DiscreteValueVect::ONEBITVALUE)
.value("TWOBITVALUE", DiscreteValueVect::TWOBITVALUE)
.value("FOURBITVALUE", DiscreteValueVect::FOURBITVALUE)
.value("EIGHTBITVALUE", DiscreteValueVect::EIGHTBITVALUE)
.value("SIXTEENBITVALUE", DiscreteValueVect::SIXTEENBITVALUE)
.export_values()
;
.value("ONEBITVALUE", DiscreteValueVect::ONEBITVALUE)
.value("TWOBITVALUE", DiscreteValueVect::TWOBITVALUE)
.value("FOURBITVALUE", DiscreteValueVect::FOURBITVALUE)
.value("EIGHTBITVALUE", DiscreteValueVect::EIGHTBITVALUE)
.value("SIXTEENBITVALUE", DiscreteValueVect::SIXTEENBITVALUE)
.export_values();
python::class_<DiscreteValueVect>("DiscreteValueVect",
disValVectDoc.c_str(),
python::init<DiscreteValueVect::DiscreteValueType, unsigned int>("Constructor"))
.def(python::init<std::string>())
.def("__len__", &DiscreteValueVect::getLength,
"Get the number of entries in the vector")
.def("__setitem__", &DiscreteValueVect::setVal,
"Set the value at a specified location")
.def("__getitem__", &DiscreteValueVect::getVal,
"Get the value at a specified location")
.def(python::self & python::self)
.def(python::self | python::self)
.def(python::self - python::self)
.def(python::self -= python::self)
.def(python::self + python::self)
.def(python::self += python::self)
python::class_<DiscreteValueVect>(
"DiscreteValueVect", disValVectDoc.c_str(),
python::init<DiscreteValueVect::DiscreteValueType, unsigned int>(
"Constructor"))
.def(python::init<std::string>())
.def("__len__", &DiscreteValueVect::getLength,
"Get the number of entries in the vector")
.def("__setitem__", &DiscreteValueVect::setVal,
"Set the value at a specified location")
.def("__getitem__", &DiscreteValueVect::getVal,
"Get the value at a specified location")
.def(python::self & python::self)
.def(python::self | python::self)
.def(python::self - python::self)
.def(python::self -= python::self)
.def(python::self + python::self)
.def(python::self += python::self)
.def("GetValueType", &DiscreteValueVect::getValueType,
"Get the type of value stored in the vector")
.def("GetTotalVal", &DiscreteValueVect::getTotalVal,
"Get the sum of the values in the vector, basically L1 norm")
.def("GetValueType", &DiscreteValueVect::getValueType,
"Get the type of value stored in the vector")
.def("GetTotalVal", &DiscreteValueVect::getTotalVal,
"Get the sum of the values in the vector, basically L1 norm")
.def_pickle(dvv_pickle_suite())
;
.def_pickle(dvv_pickle_suite());
python::def("ComputeL1Norm", computeL1Norm,
"Compute the distance between two discrete vector values\n");
}
};
void wrap_discreteValVect() {
discreteValVec_wrapper::wrap();
}
void wrap_discreteValVect() { discreteValVec_wrapper::wrap(); }

View File

@@ -20,89 +20,87 @@
using namespace RDKit;
namespace {
template <typename IndexType>
python::object SIVToBinaryText(const SparseIntVect<IndexType> &siv){
std::string res=siv.toString();
python::object retval = python::object(python::handle<>(PyBytes_FromStringAndSize(res.c_str(),res.length())));
return retval;
}
template <typename IndexType>
python::object SIVToBinaryText(const SparseIntVect<IndexType> &siv) {
std::string res = siv.toString();
python::object retval = python::object(
python::handle<>(PyBytes_FromStringAndSize(res.c_str(), res.length())));
return retval;
}
}
template <typename IndexType>
struct siv_pickle_suite : python::pickle_suite
{
static python::tuple
getinitargs(const SparseIntVect<IndexType>& self)
{
struct siv_pickle_suite : python::pickle_suite {
static python::tuple getinitargs(const SparseIntVect<IndexType> &self) {
return python::make_tuple(SIVToBinaryText(self));
};
};
namespace {
template <typename IndexType>
void pyUpdateFromSequence(SparseIntVect<IndexType> &vect,
python::object &seq){
PySequenceHolder<IndexType> seqL(seq);
for(unsigned int i=0;i<seqL.size();++i){
IndexType idx=seqL[i];
vect.setVal(idx,vect[idx]+1);
}
}
template <typename IndexType>
python::dict pyGetNonzeroElements(SparseIntVect<IndexType> &vect){
python::dict res;
typename SparseIntVect<IndexType>::StorageType::const_iterator iter=vect.getNonzeroElements().begin();
while(iter!=vect.getNonzeroElements().end()){
res[iter->first]=iter->second;
++iter;
}
return res;
}
template <typename T>
python::list BulkDice(const T &siv1,python::list sivs,bool returnDistance){
python::list res;
unsigned int nsivs=python::extract<unsigned int>(sivs.attr("__len__")());
for(unsigned int i=0;i<nsivs;++i){
double simVal;
const T &siv2=python::extract<T>(sivs[i])();
simVal = DiceSimilarity(siv1,siv2,returnDistance);
res.append(simVal);
}
return res;
}
template <typename T>
python::list BulkTanimoto(const T &siv1,python::list sivs,bool returnDistance){
python::list res;
unsigned int nsivs=python::extract<unsigned int>(sivs.attr("__len__")());
for(unsigned int i=0;i<nsivs;++i){
double simVal;
const T &siv2=python::extract<T>(sivs[i])();
simVal = TanimotoSimilarity(siv1,siv2,returnDistance);
res.append(simVal);
}
return res;
}
template <typename T>
python::list BulkTversky(const T &siv1,python::list sivs,double a,double b,bool returnDistance){
python::list res;
unsigned int nsivs=python::extract<unsigned int>(sivs.attr("__len__")());
for(unsigned int i=0;i<nsivs;++i){
double simVal;
const T &siv2=python::extract<T>(sivs[i])();
simVal = TverskySimilarity(siv1,siv2,a,b,returnDistance);
res.append(simVal);
}
return res;
template <typename IndexType>
void pyUpdateFromSequence(SparseIntVect<IndexType> &vect, python::object &seq) {
PySequenceHolder<IndexType> seqL(seq);
for (unsigned int i = 0; i < seqL.size(); ++i) {
IndexType idx = seqL[i];
vect.setVal(idx, vect[idx] + 1);
}
}
template <typename IndexType>
python::dict pyGetNonzeroElements(SparseIntVect<IndexType> &vect) {
python::dict res;
typename SparseIntVect<IndexType>::StorageType::const_iterator iter =
vect.getNonzeroElements().begin();
while (iter != vect.getNonzeroElements().end()) {
res[iter->first] = iter->second;
++iter;
}
return res;
}
std::string sparseIntVectDoc="A container class for storing integer\n\
template <typename T>
python::list BulkDice(const T &siv1, python::list sivs, bool returnDistance) {
python::list res;
unsigned int nsivs = python::extract<unsigned int>(sivs.attr("__len__")());
for (unsigned int i = 0; i < nsivs; ++i) {
double simVal;
const T &siv2 = python::extract<T>(sivs[i])();
simVal = DiceSimilarity(siv1, siv2, returnDistance);
res.append(simVal);
}
return res;
}
template <typename T>
python::list BulkTanimoto(const T &siv1, python::list sivs,
bool returnDistance) {
python::list res;
unsigned int nsivs = python::extract<unsigned int>(sivs.attr("__len__")());
for (unsigned int i = 0; i < nsivs; ++i) {
double simVal;
const T &siv2 = python::extract<T>(sivs[i])();
simVal = TanimotoSimilarity(siv1, siv2, returnDistance);
res.append(simVal);
}
return res;
}
template <typename T>
python::list BulkTversky(const T &siv1, python::list sivs, double a, double b,
bool returnDistance) {
python::list res;
unsigned int nsivs = python::extract<unsigned int>(sivs.attr("__len__")());
for (unsigned int i = 0; i < nsivs; ++i) {
double simVal;
const T &siv2 = python::extract<T>(sivs[i])();
simVal = TverskySimilarity(siv1, siv2, a, b, returnDistance);
res.append(simVal);
}
return res;
}
}
std::string sparseIntVectDoc =
"A container class for storing integer\n\
values within a particular range.\n\
\n\
The length of the vector is set at construction time.\n\
@@ -123,79 +121,79 @@ Elements can be set and read using indexing (i.e. siv[i] = 4 or val=siv[i])\n\
struct sparseIntVec_wrapper {
template <typename IndexType>
static void wrapOne(const char *className){
static void wrapOne(const char *className) {
python::class_<SparseIntVect<IndexType>,
boost::shared_ptr<SparseIntVect<IndexType> > >(className,
sparseIntVectDoc.c_str(),
python::init<IndexType>("Constructor"))
.def(python::init<std::string>())
// Note: we cannot support __len__ because, at least at the moment
// (BPL v1.34.1), it must return an int.
.def("__setitem__", &SparseIntVect<IndexType>::setVal,
"Set the value at a specified location")
.def("__getitem__", &SparseIntVect<IndexType>::getVal,
"Get the value at a specified location")
.def(python::self & python::self)
.def(python::self | python::self)
.def(python::self - python::self)
.def(python::self -= python::self)
.def(python::self + python::self)
.def(python::self += python::self)
.def(python::self == python::self)
.def(python::self != python::self)
//.def(python::self - int())
.def(python::self -= int())
//.def(python::self + int())
.def(python::self += int())
//.def(python::self / int())
.def(python::self /= int())
//.def(python::self * int())
.def(python::self *= int())
.def("GetTotalVal", &SparseIntVect<IndexType>::getTotalVal,
(python::args("useAbs")=false),
"Get the sum of the values in the vector, basically L1 norm")
.def("GetLength", &SparseIntVect<IndexType>::getLength,
"Returns the length of the vector")
.def("ToBinary", &SIVToBinaryText<IndexType>,
"returns a binary (pickle) representation of the vector")
.def("UpdateFromSequence",
&pyUpdateFromSequence<IndexType>,
"update the vector based on the values in the list or tuple")
.def("GetNonzeroElements",
&pyGetNonzeroElements<IndexType>,
"returns a dictionary of the nonzero elements")
.def_pickle(siv_pickle_suite<IndexType>())
;
boost::shared_ptr<SparseIntVect<IndexType> > >(
className, sparseIntVectDoc.c_str(),
python::init<IndexType>("Constructor"))
.def(python::init<std::string>())
// Note: we cannot support __len__ because, at least at the moment
// (BPL v1.34.1), it must return an int.
.def("__setitem__", &SparseIntVect<IndexType>::setVal,
"Set the value at a specified location")
.def("__getitem__", &SparseIntVect<IndexType>::getVal,
"Get the value at a specified location")
.def(python::self & python::self)
.def(python::self | python::self)
.def(python::self - python::self)
.def(python::self -= python::self)
.def(python::self + python::self)
.def(python::self += python::self)
.def(python::self == python::self)
.def(python::self != python::self)
//.def(python::self - int())
.def(python::self -= int())
//.def(python::self + int())
.def(python::self += int())
//.def(python::self / int())
.def(python::self /= int())
//.def(python::self * int())
.def(python::self *= int())
.def("GetTotalVal", &SparseIntVect<IndexType>::getTotalVal,
(python::args("useAbs") = false),
"Get the sum of the values in the vector, basically L1 norm")
.def("GetLength", &SparseIntVect<IndexType>::getLength,
"Returns the length of the vector")
.def("ToBinary", &SIVToBinaryText<IndexType>,
"returns a binary (pickle) representation of the vector")
.def("UpdateFromSequence", &pyUpdateFromSequence<IndexType>,
"update the vector based on the values in the list or tuple")
.def("GetNonzeroElements", &pyGetNonzeroElements<IndexType>,
"returns a dictionary of the nonzero elements")
.def_pickle(siv_pickle_suite<IndexType>());
python::def("DiceSimilarity",&DiceSimilarity<IndexType>,
(python::args("siv1"),python::args("siv2"),
python::args("returnDistance")=false,
python::args("bounds")=0.0),
"return the Dice similarity between two vectors");
python::def("BulkDiceSimilarity",&BulkDice<SparseIntVect<IndexType> >,
(python::args("v1"),python::args("v2"),
python::args("returnDistance")=false),
"return the Dice similarities between one vector and a sequence of others");
python::def("TanimotoSimilarity",&TanimotoSimilarity<IndexType>,
(python::args("siv1"),python::args("siv2"),
python::args("returnDistance")=false,
python::args("bounds")=0.0),
"return the Tanimoto similarity between two vectors");
python::def("BulkTanimotoSimilarity",&BulkTanimoto<SparseIntVect<IndexType> >,
(python::args("v1"),python::args("v2"),
python::args("returnDistance")=false),
"return the Tanimoto similarities between one vector and a sequence of others");
python::def("TverskySimilarity",&TverskySimilarity<IndexType>,
(python::args("siv1"),python::args("siv2"),
python::args("a"),python::args("b"),
python::args("returnDistance")=false,
python::args("bounds")=0.0),
python::def(
"DiceSimilarity", &DiceSimilarity<IndexType>,
(python::args("siv1"), python::args("siv2"),
python::args("returnDistance") = false, python::args("bounds") = 0.0),
"return the Dice similarity between two vectors");
python::def("BulkDiceSimilarity", &BulkDice<SparseIntVect<IndexType> >,
(python::args("v1"), python::args("v2"),
python::args("returnDistance") = false),
"return the Dice similarities between one vector and a "
"sequence of others");
python::def(
"TanimotoSimilarity", &TanimotoSimilarity<IndexType>,
(python::args("siv1"), python::args("siv2"),
python::args("returnDistance") = false, python::args("bounds") = 0.0),
"return the Tanimoto similarity between two vectors");
python::def("BulkTanimotoSimilarity",
&BulkTanimoto<SparseIntVect<IndexType> >,
(python::args("v1"), python::args("v2"),
python::args("returnDistance") = false),
"return the Tanimoto similarities between one vector and a "
"sequence of others");
python::def("TverskySimilarity", &TverskySimilarity<IndexType>,
(python::args("siv1"), python::args("siv2"), python::args("a"),
python::args("b"), python::args("returnDistance") = false,
python::args("bounds") = 0.0),
"return the Tversky similarity between two vectors");
python::def("BulkTverskySimilarity",&BulkTversky<SparseIntVect<IndexType> >,
(python::args("v1"),python::args("v2"),
python::args("a"),python::args("b"),
python::args("returnDistance")=false),
"return the Tversky similarities between one vector and a sequence of others");
python::def("BulkTverskySimilarity",
&BulkTversky<SparseIntVect<IndexType> >,
(python::args("v1"), python::args("v2"), python::args("a"),
python::args("b"), python::args("returnDistance") = false),
"return the Tversky similarities between one vector and a "
"sequence of others");
}
static void wrap() {
@@ -205,8 +203,5 @@ struct sparseIntVec_wrapper {
wrapOne<boost::uint64_t>("ULongSparseIntVect");
}
};
void wrap_sparseIntVect() {
sparseIntVec_wrapper::wrap();
}
void wrap_sparseIntVect() { sparseIntVec_wrapper::wrap(); }

View File

@@ -13,380 +13,428 @@
#include <DataStructs/BitVects.h>
#include <DataStructs/BitOps.h>
namespace python = boost::python;
SBV *ff1(const SBV &bv1, int factor=2) {
return FoldFingerprint(bv1,factor);
SBV *ff1(const SBV &bv1, int factor = 2) {
return FoldFingerprint(bv1, factor);
}
EBV *ff2(const EBV &ev1, int factor=2) {
return FoldFingerprint(ev1,factor);
EBV *ff2(const EBV &ev1, int factor = 2) {
return FoldFingerprint(ev1, factor);
}
namespace {
template <typename T>
python::object BVToBinaryText(const T &bv){
std::string res=BitVectToBinaryText(bv);
python::object retval = python::object(python::handle<>(PyBytes_FromStringAndSize(res.c_str(),res.length())));
return retval;
}
template <typename T>
python::object BVToBinaryText(const T &bv) {
std::string res = BitVectToBinaryText(bv);
python::object retval = python::object(
python::handle<>(PyBytes_FromStringAndSize(res.c_str(), res.length())));
return retval;
}
}
template <typename T>
double SimilarityWrapper(const T &bv1,const std::string &pkl,
double (*metric)(const T &,const T &),bool returnDistance){
double SimilarityWrapper(const T &bv1, const std::string &pkl,
double (*metric)(const T &, const T &),
bool returnDistance) {
T bv2(pkl);
return SimilarityWrapper(bv1,bv2,metric,returnDistance);
return SimilarityWrapper(bv1, bv2, metric, returnDistance);
}
template <typename T>
double SimilarityWrapper(const T &bv1,const std::string &pkl,double a,double b,
double (*metric)(const T &,const T &,double,double),bool returnDistance){
double SimilarityWrapper(const T &bv1, const std::string &pkl, double a,
double b,
double (*metric)(const T &, const T &, double, double),
bool returnDistance) {
T bv2(pkl);
return SimilarityWrapper(bv1,bv2,a,b,metric,returnDistance);
return SimilarityWrapper(bv1, bv2, a, b, metric, returnDistance);
}
template <typename T>
python::list BulkWrapper(const T &bv1,python::object bvs,
double (*metric)(const T &,const T &),
bool returnDistance){
python::list BulkWrapper(const T &bv1, python::object bvs,
double (*metric)(const T &, const T &),
bool returnDistance) {
python::list res;
unsigned int nbvs=python::extract<unsigned int>(bvs.attr("__len__")());
for(unsigned int i=0;i<nbvs;++i){
const T &bv2=python::extract<T>(bvs[i])();
res.append(SimilarityWrapper(bv1,bv2,metric,returnDistance));
unsigned int nbvs = python::extract<unsigned int>(bvs.attr("__len__")());
for (unsigned int i = 0; i < nbvs; ++i) {
const T &bv2 = python::extract<T>(bvs[i])();
res.append(SimilarityWrapper(bv1, bv2, metric, returnDistance));
}
return res;
}
template <typename T>
python::list BulkWrapper(const T &bv1,python::object bvs,double a,double b,
double (*metric)(const T &,const T &,double,double),
bool returnDistance){
python::list BulkWrapper(const T &bv1, python::object bvs, double a, double b,
double (*metric)(const T &, const T &, double, double),
bool returnDistance) {
python::list res;
unsigned int nbvs=python::extract<unsigned int>(bvs.attr("__len__")());
for(unsigned int i=0;i<nbvs;++i){
const T &bv2=python::extract<T>(bvs[i])();
res.append(SimilarityWrapper(bv1,bv2,a,b,metric,returnDistance));
unsigned int nbvs = python::extract<unsigned int>(bvs.attr("__len__")());
for (unsigned int i = 0; i < nbvs; ++i) {
const T &bv2 = python::extract<T>(bvs[i])();
res.append(SimilarityWrapper(bv1, bv2, a, b, metric, returnDistance));
}
return res;
}
template <typename T1, typename T2>
double TanimotoSimilarity_w(const T1 &bv1,const T2 &bv2,bool returnDistance){
return SimilarityWrapper(bv1,bv2,
(double (*)(const T1&,const T1&))TanimotoSimilarity,
returnDistance);
double TanimotoSimilarity_w(const T1 &bv1, const T2 &bv2, bool returnDistance) {
return SimilarityWrapper(
bv1, bv2, (double (*)(const T1 &, const T1 &))TanimotoSimilarity,
returnDistance);
}
template <typename T>
python::list BulkTanimotoSimilarity(const T &bv1,python::object bvs,bool returnDistance){
return BulkWrapper(bv1,bvs,
(double (*)(const T&,const T&))TanimotoSimilarity,
python::list BulkTanimotoSimilarity(const T &bv1, python::object bvs,
bool returnDistance) {
return BulkWrapper(bv1, bvs,
(double (*)(const T &, const T &))TanimotoSimilarity,
returnDistance);
}
template <typename T1, typename T2>
double TverskySimilarity_w(const T1 &bv1,const T2 &bv2,double a,double b,bool returnDistance){
return SimilarityWrapper(bv1,bv2,a,b,
(double (*)(const T1&,const T1&,double,double))TverskySimilarity,
returnDistance);
double TverskySimilarity_w(const T1 &bv1, const T2 &bv2, double a, double b,
bool returnDistance) {
return SimilarityWrapper(
bv1, bv2, a, b,
(double (*)(const T1 &, const T1 &, double, double))TverskySimilarity,
returnDistance);
}
template <typename T>
python::list BulkTverskySimilarity(const T &bv1,python::object bvs,double a,double b,
bool returnDistance){
return BulkWrapper(bv1,bvs,a,b,
(double (*)(const T&,const T&,double,double))TverskySimilarity,
python::list BulkTverskySimilarity(const T &bv1, python::object bvs, double a,
double b, bool returnDistance) {
return BulkWrapper(bv1, bvs, a, b, (double (*)(const T &, const T &, double,
double))TverskySimilarity,
returnDistance);
}
template <typename T1, typename T2>
double CosineSimilarity_w(const T1 &bv1,const T2 &bv2,bool returnDistance){
return SimilarityWrapper(bv1,bv2,
(double (*)(const T1&,const T1&))CosineSimilarity,
double CosineSimilarity_w(const T1 &bv1, const T2 &bv2, bool returnDistance) {
return SimilarityWrapper(bv1, bv2,
(double (*)(const T1 &, const T1 &))CosineSimilarity,
returnDistance);
}
template <typename T>
python::list BulkCosineSimilarity(const T &bv1,python::object bvs,bool returnDistance){
return BulkWrapper(bv1,bvs,
(double (*)(const T&,const T&))CosineSimilarity,
python::list BulkCosineSimilarity(const T &bv1, python::object bvs,
bool returnDistance) {
return BulkWrapper(bv1, bvs,
(double (*)(const T &, const T &))CosineSimilarity,
returnDistance);
}
template <typename T1, typename T2>
double KulczynskiSimilarity_w(const T1 &bv1,const T2 &bv2,bool returnDistance){
return SimilarityWrapper(bv1,bv2,
(double (*)(const T1&,const T1&))KulczynskiSimilarity,
returnDistance);
double KulczynskiSimilarity_w(const T1 &bv1, const T2 &bv2,
bool returnDistance) {
return SimilarityWrapper(
bv1, bv2, (double (*)(const T1 &, const T1 &))KulczynskiSimilarity,
returnDistance);
}
template <typename T>
python::list BulkKulczynskiSimilarity(const T &bv1,python::object bvs,bool returnDistance){
return BulkWrapper(bv1,bvs,
(double (*)(const T&,const T&))KulczynskiSimilarity,
returnDistance);
}
template <typename T1, typename T2>
double DiceSimilarity_w(const T1 &bv1,const T2 &bv2,bool returnDistance){
return SimilarityWrapper(bv1,bv2,
(double (*)(const T1&,const T1&))DiceSimilarity,
returnDistance);
}
template <typename T>
python::list BulkDiceSimilarity(const T &bv1,python::object bvs,bool returnDistance){
return BulkWrapper(bv1,bvs,
(double (*)(const T&,const T&))DiceSimilarity,
python::list BulkKulczynskiSimilarity(const T &bv1, python::object bvs,
bool returnDistance) {
return BulkWrapper(bv1, bvs,
(double (*)(const T &, const T &))KulczynskiSimilarity,
returnDistance);
}
template <typename T1, typename T2>
double SokalSimilarity_w(const T1 &bv1,const T2 &bv2,bool returnDistance){
return SimilarityWrapper(bv1,bv2,
(double (*)(const T1&,const T1&))SokalSimilarity,
double DiceSimilarity_w(const T1 &bv1, const T2 &bv2, bool returnDistance) {
return SimilarityWrapper(bv1, bv2,
(double (*)(const T1 &, const T1 &))DiceSimilarity,
returnDistance);
}
template <typename T>
python::list BulkSokalSimilarity(const T &bv1,python::object bvs,bool returnDistance){
return BulkWrapper(bv1,bvs,
(double (*)(const T&,const T&))SokalSimilarity,
python::list BulkDiceSimilarity(const T &bv1, python::object bvs,
bool returnDistance) {
return BulkWrapper(bv1, bvs, (double (*)(const T &, const T &))DiceSimilarity,
returnDistance);
}
template <typename T1, typename T2>
double McConnaugheySimilarity_w(const T1 &bv1,const T2 &bv2,bool returnDistance){
return SimilarityWrapper(bv1,bv2,
(double (*)(const T1&,const T1&))McConnaugheySimilarity,
double SokalSimilarity_w(const T1 &bv1, const T2 &bv2, bool returnDistance) {
return SimilarityWrapper(bv1, bv2,
(double (*)(const T1 &, const T1 &))SokalSimilarity,
returnDistance);
}
template <typename T>
python::list BulkMcConnaugheySimilarity(const T &bv1,python::object bvs,bool returnDistance){
return BulkWrapper(bv1,bvs,
(double (*)(const T&,const T&))McConnaugheySimilarity,
python::list BulkSokalSimilarity(const T &bv1, python::object bvs,
bool returnDistance) {
return BulkWrapper(bv1, bvs,
(double (*)(const T &, const T &))SokalSimilarity,
returnDistance);
}
template <typename T1, typename T2>
double AsymmetricSimilarity_w(const T1 &bv1,const T2 &bv2,bool returnDistance){
return SimilarityWrapper(bv1,bv2,
(double (*)(const T1&,const T1&))AsymmetricSimilarity,
returnDistance);
double McConnaugheySimilarity_w(const T1 &bv1, const T2 &bv2,
bool returnDistance) {
return SimilarityWrapper(
bv1, bv2, (double (*)(const T1 &, const T1 &))McConnaugheySimilarity,
returnDistance);
}
template <typename T>
python::list BulkAsymmetricSimilarity(const T &bv1,python::object bvs,bool returnDistance){
return BulkWrapper(bv1,bvs,
(double (*)(const T&,const T&))AsymmetricSimilarity,
python::list BulkMcConnaugheySimilarity(const T &bv1, python::object bvs,
bool returnDistance) {
return BulkWrapper(bv1, bvs,
(double (*)(const T &, const T &))McConnaugheySimilarity,
returnDistance);
}
template <typename T1, typename T2>
double BraunBlanquetSimilarity_w(const T1 &bv1,const T2 &bv2,bool returnDistance){
return SimilarityWrapper(bv1,bv2,
(double (*)(const T1&,const T1&))BraunBlanquetSimilarity,
returnDistance);
double AsymmetricSimilarity_w(const T1 &bv1, const T2 &bv2,
bool returnDistance) {
return SimilarityWrapper(
bv1, bv2, (double (*)(const T1 &, const T1 &))AsymmetricSimilarity,
returnDistance);
}
template <typename T>
python::list BulkBraunBlanquetSimilarity(const T &bv1,python::object bvs,bool returnDistance){
return BulkWrapper(bv1,bvs,
(double (*)(const T&,const T&))BraunBlanquetSimilarity,
python::list BulkAsymmetricSimilarity(const T &bv1, python::object bvs,
bool returnDistance) {
return BulkWrapper(bv1, bvs,
(double (*)(const T &, const T &))AsymmetricSimilarity,
returnDistance);
}
template <typename T1, typename T2>
double RusselSimilarity_w(const T1 &bv1,const T2 &bv2,bool returnDistance){
return SimilarityWrapper(bv1,bv2,
(double (*)(const T1&,const T1&))RusselSimilarity,
returnDistance);
double BraunBlanquetSimilarity_w(const T1 &bv1, const T2 &bv2,
bool returnDistance) {
return SimilarityWrapper(
bv1, bv2, (double (*)(const T1 &, const T1 &))BraunBlanquetSimilarity,
returnDistance);
}
template <typename T>
python::list BulkRusselSimilarity(const T &bv1,python::object bvs,bool returnDistance){
return BulkWrapper(bv1,bvs,
(double (*)(const T&,const T&))RusselSimilarity,
python::list BulkBraunBlanquetSimilarity(const T &bv1, python::object bvs,
bool returnDistance) {
return BulkWrapper(bv1, bvs,
(double (*)(const T &, const T &))BraunBlanquetSimilarity,
returnDistance);
}
template <typename T1, typename T2>
double RogotGoldbergSimilarity_w(const T1 &bv1,const T2 &bv2,bool returnDistance){
return SimilarityWrapper(bv1,bv2,
(double (*)(const T1&,const T1&))RogotGoldbergSimilarity,
double RusselSimilarity_w(const T1 &bv1, const T2 &bv2, bool returnDistance) {
return SimilarityWrapper(bv1, bv2,
(double (*)(const T1 &, const T1 &))RusselSimilarity,
returnDistance);
}
template <typename T>
python::list BulkRogotGoldbergSimilarity(const T &bv1,python::object bvs,bool returnDistance){
return BulkWrapper(bv1,bvs,
(double (*)(const T&,const T&))RogotGoldbergSimilarity,
python::list BulkRusselSimilarity(const T &bv1, python::object bvs,
bool returnDistance) {
return BulkWrapper(bv1, bvs,
(double (*)(const T &, const T &))RusselSimilarity,
returnDistance);
}
template <typename T1, typename T2>
double RogotGoldbergSimilarity_w(const T1 &bv1, const T2 &bv2,
bool returnDistance) {
return SimilarityWrapper(
bv1, bv2, (double (*)(const T1 &, const T1 &))RogotGoldbergSimilarity,
returnDistance);
}
template <typename T>
python::list BulkRogotGoldbergSimilarity(const T &bv1, python::object bvs,
bool returnDistance) {
return BulkWrapper(bv1, bvs,
(double (*)(const T &, const T &))RogotGoldbergSimilarity,
returnDistance);
}
template <typename T>
python::list BulkOnBitSimilarity(const T &bv1,python::object bvs,bool returnDistance){
return BulkWrapper(bv1,bvs,
(double (*)(const T&,const T&))OnBitSimilarity,
python::list BulkOnBitSimilarity(const T &bv1, python::object bvs,
bool returnDistance) {
return BulkWrapper(bv1, bvs,
(double (*)(const T &, const T &))OnBitSimilarity,
returnDistance);
}
template <typename T>
python::list BulkAllBitSimilarity(const T &bv1,python::object bvs,bool returnDistance){
return BulkWrapper(bv1,bvs,
(double (*)(const T&,const T&))AllBitSimilarity,
python::list BulkAllBitSimilarity(const T &bv1, python::object bvs,
bool returnDistance) {
return BulkWrapper(bv1, bvs,
(double (*)(const T &, const T &))AllBitSimilarity,
returnDistance);
}
#define DBL_DEF(_funcname_, _bulkname_, _help_) \
{ \
python::def(#_funcname_, (double (*)(const SBV &, const SBV &))_funcname_, \
(python::args("v1"), python::args("v2"))); \
python::def(#_funcname_, (double (*)(const EBV &, const EBV &))_funcname_, \
(python::args("v1"), python::args("v2")), _help_); \
python::def(#_bulkname_, (python::list (*)(const EBV &, python::object, \
bool))_bulkname_, \
(python::args("v1"), python::args("v2"), \
python::args("returnDistance") = 0)); \
python::def(#_bulkname_, (python::list (*)(const EBV &, python::object, \
bool))_bulkname_, \
(python::args("v1"), python::args("v2"), \
python::args("returnDistance") = 0), \
_help_); \
}
#define DBL_DEF(_funcname_,_bulkname_,_help_) { \
python::def( # _funcname_,(double (*)(const SBV &,const SBV &))_funcname_,\
(python::args("v1"),python::args("v2"))); \
python::def( # _funcname_,(double (*)(const EBV &,const EBV &))_funcname_,\
(python::args("v1"),python::args("v2")),_help_);\
python::def( # _bulkname_,(python::list (*)(const EBV &,python::object,bool))_bulkname_,\
(python::args("v1"),python::args("v2"),python::args("returnDistance")=0));\
python::def( # _bulkname_,(python::list (*)(const EBV &,python::object,bool))_bulkname_,\
(python::args("v1"),python::args("v2"),python::args("returnDistance")=0),_help_);}
#define BIG_DEF(_funcname_,_name_w_,_bulkname_,_help_) { \
python::def( # _funcname_,(double (*)(const SBV &,const SBV &,bool))_name_w_,\
(python::args("bv1"),python::args("bv2"),python::args("returnDistance")=0)); \
python::def( # _funcname_,(double (*)(const EBV &,const EBV &,bool))_name_w_,\
(python::args("bv1"),python::args("bv2"),python::args("returnDistance")=0),_help_);\
python::def( # _funcname_,(double (*)(const SBV &,const std::string &,bool))_name_w_,\
(python::args("bv1"),python::args("pkl"),python::args("returnDistance")=0));\
python::def( # _funcname_,(double (*)(const EBV &,const std::string &,bool))_name_w_,\
(python::args("bv1"),python::args("pkl"),python::args("returnDistance")=0),_help_);\
python::def( # _bulkname_,(python::list (*)(const SBV &,python::object,bool))_bulkname_,\
(python::args("bv1"),python::args("bvList"),python::args("returnDistance")=0));\
python::def( # _bulkname_,(python::list (*)(const EBV &,python::object,bool))_bulkname_,\
(python::args("bv1"),python::args("bvList"),python::args("returnDistance")=0),_help_);}
#define BIG_DEF(_funcname_, _name_w_, _bulkname_, _help_) \
{ \
python::def(#_funcname_, \
(double (*)(const SBV &, const SBV &, bool))_name_w_, \
(python::args("bv1"), python::args("bv2"), \
python::args("returnDistance") = 0)); \
python::def(#_funcname_, \
(double (*)(const EBV &, const EBV &, bool))_name_w_, \
(python::args("bv1"), python::args("bv2"), \
python::args("returnDistance") = 0), \
_help_); \
python::def(#_funcname_, \
(double (*)(const SBV &, const std::string &, bool))_name_w_, \
(python::args("bv1"), python::args("pkl"), \
python::args("returnDistance") = 0)); \
python::def(#_funcname_, \
(double (*)(const EBV &, const std::string &, bool))_name_w_, \
(python::args("bv1"), python::args("pkl"), \
python::args("returnDistance") = 0), \
_help_); \
python::def(#_bulkname_, (python::list (*)(const SBV &, python::object, \
bool))_bulkname_, \
(python::args("bv1"), python::args("bvList"), \
python::args("returnDistance") = 0)); \
python::def(#_bulkname_, (python::list (*)(const EBV &, python::object, \
bool))_bulkname_, \
(python::args("bv1"), python::args("bvList"), \
python::args("returnDistance") = 0), \
_help_); \
}
struct BitOps_wrapper {
static void wrap(){
BIG_DEF(TanimotoSimilarity,TanimotoSimilarity_w,BulkTanimotoSimilarity,
static void wrap() {
BIG_DEF(TanimotoSimilarity, TanimotoSimilarity_w, BulkTanimotoSimilarity,
"B(bv1&bv2) / (B(bv1) + B(bv2) - B(bv1&bv2))");
BIG_DEF(CosineSimilarity,CosineSimilarity_w,BulkCosineSimilarity,
BIG_DEF(CosineSimilarity, CosineSimilarity_w, BulkCosineSimilarity,
"B(bv1&bv2) / sqrt(B(bv1) * B(bv2))");
BIG_DEF(KulczynskiSimilarity,KulczynskiSimilarity_w,BulkKulczynskiSimilarity,
BIG_DEF(KulczynskiSimilarity, KulczynskiSimilarity_w,
BulkKulczynskiSimilarity,
"B(bv1&bv2)*(B(bv1) + B(bv2)) / (2 * B(bv1) * B(bv2))");
BIG_DEF(DiceSimilarity,DiceSimilarity_w,BulkDiceSimilarity,
BIG_DEF(DiceSimilarity, DiceSimilarity_w, BulkDiceSimilarity,
"2*B(bv1&bv2) / (B(bv1) + B(bv2))");
BIG_DEF(SokalSimilarity,SokalSimilarity_w,BulkSokalSimilarity,
BIG_DEF(SokalSimilarity, SokalSimilarity_w, BulkSokalSimilarity,
"B(bv1&bv2) / (2*B(bv1) + 2*B(bv2) - 3*B(bv1&bv2))");
BIG_DEF(McConnaugheySimilarity,McConnaugheySimilarity_w,BulkMcConnaugheySimilarity,
"(B(bv1&bv2) * (B(bv1)+B(bv2)) - B(bv1)*B(bv2)) / (B(bv1) * B(bv2))");
BIG_DEF(AsymmetricSimilarity,AsymmetricSimilarity_w,BulkAsymmetricSimilarity,
"B(bv1&bv2) / min(B(bv1),B(bv2))");
BIG_DEF(BraunBlanquetSimilarity,BraunBlanquetSimilarity_w,BulkBraunBlanquetSimilarity,
"B(bv1&bv2) / max(B(bv1),B(bv2))");
BIG_DEF(RusselSimilarity,RusselSimilarity_w,BulkRusselSimilarity,
BIG_DEF(
McConnaugheySimilarity, McConnaugheySimilarity_w,
BulkMcConnaugheySimilarity,
"(B(bv1&bv2) * (B(bv1)+B(bv2)) - B(bv1)*B(bv2)) / (B(bv1) * B(bv2))");
BIG_DEF(AsymmetricSimilarity, AsymmetricSimilarity_w,
BulkAsymmetricSimilarity, "B(bv1&bv2) / min(B(bv1),B(bv2))");
BIG_DEF(BraunBlanquetSimilarity, BraunBlanquetSimilarity_w,
BulkBraunBlanquetSimilarity, "B(bv1&bv2) / max(B(bv1),B(bv2))");
BIG_DEF(RusselSimilarity, RusselSimilarity_w, BulkRusselSimilarity,
"B(bv1&bv2) / B(bv1)");
BIG_DEF(RogotGoldbergSimilarity,RogotGoldbergSimilarity_w,BulkRogotGoldbergSimilarity,
"B(bv1&bv2) / B(bv1)");
BIG_DEF(RogotGoldbergSimilarity, RogotGoldbergSimilarity_w,
BulkRogotGoldbergSimilarity, "B(bv1&bv2) / B(bv1)");
{
std::string help="B(bv1&bv2) / (a*B(bv1)+b*B(bv2)+(1-a-b)*B(bv1&bv2)";
python::def( "TverskySimilarity",
(double (*)(const SBV &,const SBV &,double,double,bool))TverskySimilarity_w,
(python::args("bv1"),python::args("bv2"),python::args("a"),
python::args("b"),python::args("returnDistance")=0));
python::def( "TverskySimilarity",
(double (*)(const EBV &,const EBV &,double,double,bool))TverskySimilarity_w,
(python::args("bv1"),python::args("bv2"),python::args("a"),
python::args("b"),python::args("returnDistance")=0),help.c_str());
python::def( "TverskySimilarity",
(double (*)(const SBV &,const std::string &,double,double,bool))TverskySimilarity_w,
(python::args("bv1"),python::args("pkl"),python::args("a"),
python::args("b"),python::args("returnDistance")=0));
python::def( "TverskySimilarity",
(double (*)(const EBV &,const std::string &,double,double,bool))TverskySimilarity_w,
(python::args("bv1"),python::args("pkl"),python::args("a"),
python::args("b"),python::args("returnDistance")=0),help.c_str());
python::def( "BulkTverskySimilarity",
(python::list (*)(const SBV &,python::object,double,double,bool))BulkTverskySimilarity,
(python::args("bv1"),python::args("bvList"),python::args("a"),
python::args("b"),python::args("returnDistance")=0));
python::def( "BulkTverskySimilarity",
(python::list (*)(const EBV &,python::object,double,double,bool))BulkTverskySimilarity,
(python::args("bv1"),python::args("bvList"),python::args("a"),
python::args("b"),python::args("returnDistance")=0),help.c_str());
std::string help = "B(bv1&bv2) / (a*B(bv1)+b*B(bv2)+(1-a-b)*B(bv1&bv2)";
python::def("TverskySimilarity",
(double (*)(const SBV &, const SBV &, double, double,
bool))TverskySimilarity_w,
(python::args("bv1"), python::args("bv2"), python::args("a"),
python::args("b"), python::args("returnDistance") = 0));
python::def("TverskySimilarity",
(double (*)(const EBV &, const EBV &, double, double,
bool))TverskySimilarity_w,
(python::args("bv1"), python::args("bv2"), python::args("a"),
python::args("b"), python::args("returnDistance") = 0),
help.c_str());
python::def("TverskySimilarity",
(double (*)(const SBV &, const std::string &, double, double,
bool))TverskySimilarity_w,
(python::args("bv1"), python::args("pkl"), python::args("a"),
python::args("b"), python::args("returnDistance") = 0));
python::def("TverskySimilarity",
(double (*)(const EBV &, const std::string &, double, double,
bool))TverskySimilarity_w,
(python::args("bv1"), python::args("pkl"), python::args("a"),
python::args("b"), python::args("returnDistance") = 0),
help.c_str());
python::def(
"BulkTverskySimilarity",
(python::list (*)(const SBV &, python::object, double, double,
bool))BulkTverskySimilarity,
(python::args("bv1"), python::args("bvList"), python::args("a"),
python::args("b"), python::args("returnDistance") = 0));
python::def(
"BulkTverskySimilarity",
(python::list (*)(const EBV &, python::object, double, double,
bool))BulkTverskySimilarity,
(python::args("bv1"), python::args("bvList"), python::args("a"),
python::args("b"), python::args("returnDistance") = 0),
help.c_str());
}
DBL_DEF(OnBitSimilarity,BulkOnBitSimilarity,
"B(bv1&bv2) / B(bv1|bv2)");
DBL_DEF(AllBitSimilarity,BulkAllBitSimilarity,
DBL_DEF(OnBitSimilarity, BulkOnBitSimilarity, "B(bv1&bv2) / B(bv1|bv2)");
DBL_DEF(AllBitSimilarity, BulkAllBitSimilarity,
"(B(bv1) - B(bv1^bv2)) / B(bv1)");
python::def("OnBitProjSimilarity",
(DoubleVect (*)(const SBV&,const SBV&))OnBitProjSimilarity);
python::def("OnBitProjSimilarity",
(DoubleVect (*)(const EBV&,const EBV&))OnBitProjSimilarity,
"Returns a 2-tuple: (B(bv1&bv2) / B(bv1), B(bv1&bv2) / B(bv2))");
(DoubleVect (*)(const SBV &, const SBV &))OnBitProjSimilarity);
python::def(
"OnBitProjSimilarity",
(DoubleVect (*)(const EBV &, const EBV &))OnBitProjSimilarity,
"Returns a 2-tuple: (B(bv1&bv2) / B(bv1), B(bv1&bv2) / B(bv2))");
python::def("OffBitProjSimilarity",
(DoubleVect (*)(const SBV&,const SBV&))OffBitProjSimilarity);
(DoubleVect (*)(const SBV &, const SBV &))OffBitProjSimilarity);
python::def("OffBitProjSimilarity",
(DoubleVect (*)(const EBV&,const EBV&))OffBitProjSimilarity);
(DoubleVect (*)(const EBV &, const EBV &))OffBitProjSimilarity);
python::def("NumBitsInCommon",
(int (*)(const SBV&,const SBV&))NumBitsInCommon);
(int (*)(const SBV &, const SBV &))NumBitsInCommon);
python::def("NumBitsInCommon",
(int (*)(const EBV&,const EBV&))NumBitsInCommon,
"Returns the total number of bits in common between the two bit vectors"
);
(int (*)(const EBV &, const EBV &))NumBitsInCommon,
"Returns the total number of bits in common between the two "
"bit vectors");
python::def("OnBitsInCommon",
(IntVect (*)(const SBV&,const SBV&))OnBitsInCommon);
python::def("OnBitsInCommon",
(IntVect (*)(const EBV&,const EBV&))OnBitsInCommon,
"Returns the number of on bits in common between the two bit vectors"
);
(IntVect (*)(const SBV &, const SBV &))OnBitsInCommon);
python::def(
"OnBitsInCommon", (IntVect (*)(const EBV &, const EBV &))OnBitsInCommon,
"Returns the number of on bits in common between the two bit vectors");
python::def("OffBitsInCommon",
(IntVect (*)(const SBV&,const SBV&))OffBitsInCommon);
python::def("OffBitsInCommon",
(IntVect (*)(const EBV&,const EBV&))OffBitsInCommon,
"Returns the number of off bits in common between the two bit vectors"
);
(IntVect (*)(const SBV &, const SBV &))OffBitsInCommon);
python::def(
"OffBitsInCommon",
(IntVect (*)(const EBV &, const EBV &))OffBitsInCommon,
"Returns the number of off bits in common between the two bit vectors");
python::def("FoldFingerprint",
(SBV *(*)(const SBV &,unsigned int))FoldFingerprint,
(python::arg("bv"),python::arg("foldFactor")=2),
(SBV * (*)(const SBV &, unsigned int))FoldFingerprint,
(python::arg("bv"), python::arg("foldFactor") = 2),
python::return_value_policy<python::manage_new_object>());
python::def("FoldFingerprint",
(EBV *(*)(const EBV &,unsigned int))FoldFingerprint,
(python::arg("bv"),python::arg("foldFactor")=2),
(EBV * (*)(const EBV &, unsigned int))FoldFingerprint,
(python::arg("bv"), python::arg("foldFactor") = 2),
python::return_value_policy<python::manage_new_object>(),
"Folds the fingerprint by the provided amount. The default, foldFactor=2, returns a fingerprint that is half the size of the original.");
"Folds the fingerprint by the provided amount. The default, "
"foldFactor=2, returns a fingerprint that is half the size of "
"the original.");
python::def("AllProbeBitsMatch",
(bool (*)(const SBV &,const SBV &))AllProbeBitsMatch);
(bool (*)(const SBV &, const SBV &))AllProbeBitsMatch);
python::def("AllProbeBitsMatch",
(bool (*)(const EBV &,const EBV &))AllProbeBitsMatch);
(bool (*)(const EBV &, const EBV &))AllProbeBitsMatch);
python::def("AllProbeBitsMatch",
(bool (*)(const SBV &,const std::string &))AllProbeBitsMatch);
python::def("AllProbeBitsMatch",(bool (*)(const EBV &,const std::string &))AllProbeBitsMatch,
"Returns True if all bits in the first argument match all bits in the \n\
(bool (*)(const SBV &, const std::string &))AllProbeBitsMatch);
python::def(
"AllProbeBitsMatch",
(bool (*)(const EBV &, const std::string &))AllProbeBitsMatch,
"Returns True if all bits in the first argument match all bits in the \n\
vector defined by the pickle in the second argument.\n");
python::def("BitVectToText",
(std::string (*)(const SBV&))BitVectToText);
python::def("BitVectToText",
(std::string (*)(const EBV&))BitVectToText,
"Returns a string of zeros and ones representing the bit vector."
);
python::def("BitVectToText", (std::string (*)(const SBV &))BitVectToText);
python::def(
"BitVectToText", (std::string (*)(const EBV &))BitVectToText,
"Returns a string of zeros and ones representing the bit vector.");
python::def("BitVectToFPSText",
(std::string (*)(const SBV&))BitVectToFPSText);
(std::string (*)(const SBV &))BitVectToFPSText);
python::def("BitVectToFPSText",
(std::string (*)(const EBV&))BitVectToFPSText,
"Returns an FPS string representing the bit vector."
);
(std::string (*)(const EBV &))BitVectToFPSText,
"Returns an FPS string representing the bit vector.");
python::def("BitVectToBinaryText",
(python::object (*)(const SBV&))BVToBinaryText);
python::def("BitVectToBinaryText",
(python::object (*)(const EBV&))BVToBinaryText,
"Returns a binary string (byte array) representing the bit vector."
);
(python::object (*)(const SBV &))BVToBinaryText);
python::def(
"BitVectToBinaryText", (python::object (*)(const EBV &))BVToBinaryText,
"Returns a binary string (byte array) representing the bit vector.");
}
};
void wrap_BitOps() {
BitOps_wrapper::wrap();
}
void wrap_BitOps() { BitOps_wrapper::wrap(); }

View File

@@ -16,19 +16,17 @@
namespace python = boost::python;
// allows BitVects to be pickled
struct ebv_pickle_suite : python::pickle_suite
{
static python::tuple
getinitargs(const ExplicitBitVect& self)
{
std::string res=self.toString();
python::object retval = python::object(python::handle<>(PyBytes_FromStringAndSize(res.c_str(),res.length())));
struct ebv_pickle_suite : python::pickle_suite {
static python::tuple getinitargs(const ExplicitBitVect &self) {
std::string res = self.toString();
python::object retval = python::object(
python::handle<>(PyBytes_FromStringAndSize(res.c_str(), res.length())));
return python::make_tuple(retval);
};
};
std::string ebvClassDoc="A class to store explicit bit vectors.\n\
std::string ebvClassDoc =
"A class to store explicit bit vectors.\n\
\n\
This class is most useful for situations where the size of the vector\n\
is relatively small (tens of thousands or smaller).\n\
@@ -47,63 +45,56 @@ or by indexing (i.e. bv[i] = 1 or if bv[i]).\n\
\n";
struct EBV_wrapper {
static void wrap(){
python::class_<ExplicitBitVect,
boost::shared_ptr<ExplicitBitVect> >("ExplicitBitVect",ebvClassDoc.c_str(),
python::init<unsigned int>())
.def(python::init<std::string>())
.def(python::init<unsigned int, bool>())
.def("SetBit",(bool (EBV::*)(unsigned int))&EBV::setBit,
"Turns on a particular bit. Returns the original state of the bit.\n")
.def("SetBitsFromList",(void (*)(EBV *,python::object))SetBitsFromList,
"Turns on a set of bits. The argument should be a tuple or list of bit ids.\n")
.def("UnSetBit",(bool (EBV::*)(unsigned int))&EBV::unsetBit,
"Turns off a particular bit. Returns the original state of the bit.\n")
.def("UnSetBitsFromList",(void (*)(EBV *,python::object))UnSetBitsFromList,
"Turns off a set of bits. The argument should be a tuple or list of bit ids.\n")
.def("GetBit",(bool (EBV::*)(unsigned int) const)&EBV::getBit,
"Returns the value of a bit.\n")
.def("GetNumBits",&EBV::getNumBits,
"Returns the number of bits in the vector (the vector's size).\n")
.def("__len__",&EBV::getNumBits)
.def("GetNumOnBits",&EBV::getNumOnBits,
"Returns the number of on bits.\n")
.def("GetNumOffBits",&EBV::getNumOffBits,
"Returns the number of off bits.\n")
.def("__getitem__",
(int (*)(const EBV&,int))get_VectItem)
.def("__setitem__",
(int (*)(EBV&,int,int))set_VectItem)
.def("GetOnBits",
(IntVect (*)(const EBV&))GetOnBits,
"Returns a tuple containing IDs of the on bits.\n")
.def("ToBinary",(python::object (*)(const EBV&))BVToBinary,
"Returns an internal binary representation of the vector.\n")
.def("FromBase64",
(void (*)(EBV &,const std::string &))InitFromBase64,
"Initializes the vector from a base64 encoded binary string.\n")
.def("ToBase64",
(std::string (*)(EBV &))ToBase64,
"Converts the vector to a base64 string (the base64 encoded version of the results of ToString()).\n")
.def(python::self & python::self)
.def(python::self | python::self)
.def(python::self ^ python::self)
.def(python::self + python::self)
.def(~python::self)
.def(python::self == python::self)
.def(python::self != python::self)
.def(python::self += python::self)
.def_pickle(ebv_pickle_suite())
;
static void wrap() {
python::class_<ExplicitBitVect, boost::shared_ptr<ExplicitBitVect> >(
"ExplicitBitVect", ebvClassDoc.c_str(), python::init<unsigned int>())
.def(python::init<std::string>())
.def(python::init<unsigned int, bool>())
.def("SetBit", (bool (EBV::*)(unsigned int)) & EBV::setBit,
"Turns on a particular bit. Returns the original state of the "
"bit.\n")
.def("SetBitsFromList",
(void (*)(EBV *, python::object))SetBitsFromList,
"Turns on a set of bits. The argument should be a tuple or list "
"of bit ids.\n")
.def("UnSetBit", (bool (EBV::*)(unsigned int)) & EBV::unsetBit,
"Turns off a particular bit. Returns the original state of the "
"bit.\n")
.def("UnSetBitsFromList",
(void (*)(EBV *, python::object))UnSetBitsFromList,
"Turns off a set of bits. The argument should be a tuple or list "
"of bit ids.\n")
.def("GetBit", (bool (EBV::*)(unsigned int) const) & EBV::getBit,
"Returns the value of a bit.\n")
.def("GetNumBits", &EBV::getNumBits,
"Returns the number of bits in the vector (the vector's size).\n")
.def("__len__", &EBV::getNumBits)
.def("GetNumOnBits", &EBV::getNumOnBits,
"Returns the number of on bits.\n")
.def("GetNumOffBits", &EBV::getNumOffBits,
"Returns the number of off bits.\n")
.def("__getitem__", (int (*)(const EBV &, int))get_VectItem)
.def("__setitem__", (int (*)(EBV &, int, int))set_VectItem)
.def("GetOnBits", (IntVect (*)(const EBV &))GetOnBits,
"Returns a tuple containing IDs of the on bits.\n")
.def("ToBinary", (python::object (*)(const EBV &))BVToBinary,
"Returns an internal binary representation of the vector.\n")
.def("FromBase64", (void (*)(EBV &, const std::string &))InitFromBase64,
"Initializes the vector from a base64 encoded binary string.\n")
.def("ToBase64", (std::string (*)(EBV &))ToBase64,
"Converts the vector to a base64 string (the base64 encoded "
"version of the results of ToString()).\n")
.def(python::self & python::self)
.def(python::self | python::self)
.def(python::self ^ python::self)
.def(python::self + python::self)
.def(~python::self)
.def(python::self == python::self)
.def(python::self != python::self)
.def(python::self += python::self)
.def_pickle(ebv_pickle_suite());
}
};
void wrap_EBV() {
EBV_wrapper::wrap();
}
void wrap_EBV() { EBV_wrapper::wrap(); }

View File

@@ -17,19 +17,17 @@
namespace python = boost::python;
// allows BitVects to be pickled
struct sbv_pickle_suite : python::pickle_suite
{
static python::tuple
getinitargs(const SparseBitVect& self)
{
std::string res=self.toString();
python::object retval = python::object(python::handle<>(PyBytes_FromStringAndSize(res.c_str(),res.length())));
struct sbv_pickle_suite : python::pickle_suite {
static python::tuple getinitargs(const SparseBitVect &self) {
std::string res = self.toString();
python::object retval = python::object(
python::handle<>(PyBytes_FromStringAndSize(res.c_str(), res.length())));
return python::make_tuple(retval);
};
};
std::string sbvClassDoc="A class to store sparse bit vectors.\n\
std::string sbvClassDoc =
"A class to store sparse bit vectors.\n\
\n\
This class is most useful for situations where the size of the vector\n\
is large and relatively few bits are set\n\
@@ -48,56 +46,53 @@ Bits can be set and read using either the Set/UnsetBit() and GetBit() methods\n\
or by indexing (i.e. bv[i] = 1 or if bv[i]).\n\
\n";
struct SBV_wrapper {
static void wrap(){
python::class_<SparseBitVect>("SparseBitVect",
sbvClassDoc.c_str(),
static void wrap() {
python::class_<SparseBitVect>("SparseBitVect", sbvClassDoc.c_str(),
python::init<unsigned int>())
.def(python::init<std::string>())
.def("SetBit",(bool (SBV::*)(unsigned int))&SBV::setBit,
"Turns on a particular bit. Returns the original state of the bit.\n")
.def("SetBitsFromList",(void (*)(SBV *,python::object))SetBitsFromList,
"Turns on a set of bits. The argument should be a tuple or list of bit ids.\n")
.def("UnSetBit",(bool (SBV::*)(unsigned int))&SBV::unsetBit,
"Turns off a particular bit. Returns the original state of the bit.\n")
.def("UnSetBitsFromList",(void (*)(SBV *,python::object))UnSetBitsFromList,
"Turns off a set of bits. The argument should be a tuple or list of bit ids.\n")
.def("GetBit",(bool (SBV::*)(unsigned int) const)&SBV::getBit,
"Returns the value of a bit.\n")
.def("GetNumBits",&SBV::getNumBits,
"Returns the number of bits in the vector (the vector's size).\n")
.def("__len__",&SBV::getNumBits)
.def("GetNumOnBits",&SBV::getNumOnBits,
"Returns the number of on bits.\n")
.def("GetNumOffBits",&SBV::getNumOffBits,
"Returns the number of off bits.\n")
.def("__getitem__",
(int (*)(const SBV&,int))get_VectItem)
.def("__setitem__",
(int (*)(SBV&,int,int))set_VectItem)
.def("GetOnBits",
(IntVect (*)(const SBV&))GetOnBits,
"Returns a tuple containing IDs of the on bits.\n")
.def("ToBinary",(python::object (*)(const SBV&))BVToBinary,
"Returns an internal binary representation of the vector.\n")
.def("FromBase64",
(void (*)(SBV &,const std::string &))InitFromBase64,
"Initializes the vector from a base64 encoded binary string.\n")
.def("ToBase64",
(std::string (*)(SBV &))ToBase64,
"Converts the vector to a base64 string (the base64 encoded version of the results of ToString()).\n")
.def(python::self & python::self)
.def(python::self | python::self)
.def(python::self ^ python::self)
.def(~python::self)
.def(python::self == python::self)
.def(python::self != python::self)
.def(python::init<std::string>())
.def("SetBit", (bool (SBV::*)(unsigned int)) & SBV::setBit,
"Turns on a particular bit. Returns the original state of the "
"bit.\n")
.def("SetBitsFromList",
(void (*)(SBV *, python::object))SetBitsFromList,
"Turns on a set of bits. The argument should be a tuple or list "
"of bit ids.\n")
.def("UnSetBit", (bool (SBV::*)(unsigned int)) & SBV::unsetBit,
"Turns off a particular bit. Returns the original state of the "
"bit.\n")
.def("UnSetBitsFromList",
(void (*)(SBV *, python::object))UnSetBitsFromList,
"Turns off a set of bits. The argument should be a tuple or list "
"of bit ids.\n")
.def("GetBit", (bool (SBV::*)(unsigned int) const) & SBV::getBit,
"Returns the value of a bit.\n")
.def("GetNumBits", &SBV::getNumBits,
"Returns the number of bits in the vector (the vector's size).\n")
.def("__len__", &SBV::getNumBits)
.def("GetNumOnBits", &SBV::getNumOnBits,
"Returns the number of on bits.\n")
.def("GetNumOffBits", &SBV::getNumOffBits,
"Returns the number of off bits.\n")
.def("__getitem__", (int (*)(const SBV &, int))get_VectItem)
.def("__setitem__", (int (*)(SBV &, int, int))set_VectItem)
.def("GetOnBits", (IntVect (*)(const SBV &))GetOnBits,
"Returns a tuple containing IDs of the on bits.\n")
.def("ToBinary", (python::object (*)(const SBV &))BVToBinary,
"Returns an internal binary representation of the vector.\n")
.def("FromBase64", (void (*)(SBV &, const std::string &))InitFromBase64,
"Initializes the vector from a base64 encoded binary string.\n")
.def("ToBase64", (std::string (*)(SBV &))ToBase64,
"Converts the vector to a base64 string (the base64 encoded "
"version of the results of ToString()).\n")
.def(python::self & python::self)
.def(python::self | python::self)
.def(python::self ^ python::self)
.def(~python::self)
.def(python::self == python::self)
.def(python::self != python::self)
.def_pickle(sbv_pickle_suite())
;
.def_pickle(sbv_pickle_suite());
}
};
void wrap_SBV() {
SBV_wrapper::wrap();
}
void wrap_SBV() { SBV_wrapper::wrap(); }

View File

@@ -14,48 +14,53 @@
#include <DataStructs/BitOps.h>
namespace python = boost::python;
ExplicitBitVect *createFromBitString(const std::string &bits){
ExplicitBitVect *res=new ExplicitBitVect(bits.length());
FromBitString(*res,bits);
ExplicitBitVect *createFromBitString(const std::string &bits) {
ExplicitBitVect *res = new ExplicitBitVect(bits.length());
FromBitString(*res, bits);
return res;
}
ExplicitBitVect *createFromFPSText(const std::string &fps){
if(fps.length()%2){
throw ValueErrorException("input string must have an even number of characters");
ExplicitBitVect *createFromFPSText(const std::string &fps) {
if (fps.length() % 2) {
throw ValueErrorException(
"input string must have an even number of characters");
}
ExplicitBitVect *res=new ExplicitBitVect(fps.length()*4);
UpdateBitVectFromFPSText(*res,fps);
ExplicitBitVect *res = new ExplicitBitVect(fps.length() * 4);
UpdateBitVectFromFPSText(*res, fps);
return res;
}
ExplicitBitVect *createFromBinaryText(const std::string &fps){
ExplicitBitVect *res=new ExplicitBitVect(fps.length()*8);
UpdateBitVectFromBinaryText(*res,fps);
ExplicitBitVect *createFromBinaryText(const std::string &fps) {
ExplicitBitVect *res = new ExplicitBitVect(fps.length() * 8);
UpdateBitVectFromBinaryText(*res, fps);
return res;
}
struct Utils_wrapper {
static void wrap(){
python::def("ConvertToExplicit", convertToExplicit,
static void wrap() {
python::def("ConvertToExplicit", convertToExplicit,
python::return_value_policy<python::manage_new_object>(),
"Converts a SparseBitVector to an ExplicitBitVector and returns the ExplicitBitVector");
python::def("CreateFromBitString",createFromBitString,
"Converts a SparseBitVector to an ExplicitBitVector and "
"returns the ExplicitBitVector");
python::def(
"CreateFromBitString", createFromBitString,
python::return_value_policy<python::manage_new_object>(),
"Creates an ExplicitBitVect from a bit string (string of 0s and 1s).");
python::def("CreateFromFPSText", createFromFPSText,
python::return_value_policy<python::manage_new_object>(),
"Creates an ExplicitBitVect from a bit string (string of 0s and 1s).");
python::def("CreateFromFPSText",createFromFPSText,
python::return_value_policy<python::manage_new_object>(),
"Creates an ExplicitBitVect from an FPS string.");
python::def("CreateFromBinaryText",createFromBinaryText,
python::return_value_policy<python::manage_new_object>(),
"Creates an ExplicitBitVect from a binary string (byte array).");
"Creates an ExplicitBitVect from an FPS string.");
python::def(
"CreateFromBinaryText", createFromBinaryText,
python::return_value_policy<python::manage_new_object>(),
"Creates an ExplicitBitVect from a binary string (byte array).");
python::def("InitFromDaylightString",
(void (*)(SparseBitVect &,const std::string&))FromDaylightString);
python::def("InitFromDaylightString",
(void (*)(ExplicitBitVect &,const std::string&))FromDaylightString,
"Fill a BitVect using an ASCII (Daylight) encoding of a fingerprint.\n\
python::def(
"InitFromDaylightString",
(void (*)(SparseBitVect &, const std::string &))FromDaylightString);
python::def(
"InitFromDaylightString",
(void (*)(ExplicitBitVect &, const std::string &))FromDaylightString,
"Fill a BitVect using an ASCII (Daylight) encoding of a fingerprint.\n\
\n\
**Arguments**\n\
- bv: either a _SparseBitVect_ or an _ExplicitBitVect_\n\
@@ -65,7 +70,4 @@ struct Utils_wrapper {
}
};
void wrap_Utils() {
Utils_wrapper::wrap();
}
void wrap_Utils() { Utils_wrapper::wrap(); }

View File

@@ -15,89 +15,83 @@
#include <RDBoost/Wrap.h>
#include <DataStructs/base64.h>
namespace python = boost::python;
template <typename T>
void InitFromBase64(T& self,const std::string &inD)
{
self.initFromText(inD.c_str(),inD.length(),true);
};
template <typename T>
void InitFromBase64(T& self, const std::string& inD) {
self.initFromText(inD.c_str(), inD.length(), true);
};
template <typename T>
std::string ToBase64(T& self)
{
std::string tmp;
tmp = self.toString();
const char *txt=Base64Encode(tmp.c_str(),tmp.length());
std::string res(txt);
delete[] txt;
return res ;
};
template <typename T>
std::string ToBase64(T& self) {
std::string tmp;
tmp = self.toString();
const char* txt = Base64Encode(tmp.c_str(), tmp.length());
std::string res(txt);
delete[] txt;
return res;
};
template <typename T>
void SetBitsFromList(T *bv, python::object onBitList) {
PySequenceHolder<int> bitL(onBitList);
for (unsigned int i = 0; i < bitL.size(); i++) {
bv->setBit(bitL[i]);
}
template <typename T>
void SetBitsFromList(T* bv, python::object onBitList) {
PySequenceHolder<int> bitL(onBitList);
for (unsigned int i = 0; i < bitL.size(); i++) {
bv->setBit(bitL[i]);
}
}
template <typename T>
void UnSetBitsFromList(T *bv, python::object offBitList) {
PySequenceHolder<int> bitL(offBitList);
for (unsigned int i = 0; i < bitL.size(); i++) {
bv->unsetBit(bitL[i]);
}
template <typename T>
void UnSetBitsFromList(T* bv, python::object offBitList) {
PySequenceHolder<int> bitL(offBitList);
for (unsigned int i = 0; i < bitL.size(); i++) {
bv->unsetBit(bitL[i]);
}
}
// used to support __getitem__
template <typename T>
int get_VectItem(const T& self,int which)
{
if(which<0){
if(which+static_cast<int>(self.getNumBits())<0){
throw IndexErrorException(which);
} else {
which += self.getNumBits();
}
}
return self.getBit(static_cast<unsigned int>(which));
}
// used to support __setitem__
template <typename T>
int set_VectItem(T& self, int which, const int val)
{
if(which<0){
if(which+static_cast<int>(self.getNumBits())<0){
throw IndexErrorException(which);
} else {
which += self.getNumBits();
}
}
if(val){
return self.setBit(static_cast<unsigned int>(which));
// used to support __getitem__
template <typename T>
int get_VectItem(const T& self, int which) {
if (which < 0) {
if (which + static_cast<int>(self.getNumBits()) < 0) {
throw IndexErrorException(which);
} else {
return self.unsetBit(static_cast<unsigned int>(which));
which += self.getNumBits();
}
}
return self.getBit(static_cast<unsigned int>(which));
}
// used to support getOnBits()
template <typename T>
IntVect GetOnBits(const T& self)
{
IntVect res;
self.getOnBits(res);
return res;
// used to support __setitem__
template <typename T>
int set_VectItem(T& self, int which, const int val) {
if (which < 0) {
if (which + static_cast<int>(self.getNumBits()) < 0) {
throw IndexErrorException(which);
} else {
which += self.getNumBits();
}
}
template <typename T>
python::object BVToBinary(const T &bv){
std::string res=bv.toString();
python::object retval = python::object(python::handle<>(PyBytes_FromStringAndSize(res.c_str(),res.length())));
return retval;
if (val) {
return self.setBit(static_cast<unsigned int>(which));
} else {
return self.unsetBit(static_cast<unsigned int>(which));
}
}
// used to support getOnBits()
template <typename T>
IntVect GetOnBits(const T& self) {
IntVect res;
self.getOnBits(res);
return res;
}
template <typename T>
python::object BVToBinary(const T& bv) {
std::string res = bv.toString();
python::object retval = python::object(
python::handle<>(PyBytes_FromStringAndSize(res.c_str(), res.length())));
return retval;
}
#endif

View File

@@ -32,46 +32,44 @@
// 15 P 32 g 49 x
// 16 Q 33 h 50 y
char *Base64Encode(const char *inText,const unsigned int inLen){
return Base64Encode((const unsigned char *)inText,inLen);
char *Base64Encode(const char *inText, const unsigned int inLen) {
return Base64Encode((const unsigned char *)inText, inLen);
}
char *Base64Encode(const unsigned char *inText,const unsigned int inLen){
char *Base64Encode(const unsigned char *inText, const unsigned int inLen) {
// Notes:
// - whoever calls us is responsible for free'ing the result we return
// - we cheat and don't worry about breaking lines
static unsigned char transTable[64]={'A','B','C','D','E','F','G','H',
'I','J','K','L','M','N','O','P',
'Q','R','S','T','U','V','W','X',
'Y','Z','a','b','c','d','e','f',
'g','h','i','j','k','l','m','n',
'o','p','q','r','s','t','u','v',
'w','x','y','z','0','1','2','3',
'4','5','6','7','8','9','+','/'};
static unsigned char transTable[64] = {
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'};
char *res;
int resSize;
resSize = (4*inLen)/3;
while(resSize % 4) resSize++;
res = new char[resSize+1];
resSize = (4 * inLen) / 3;
while (resSize % 4) resSize++;
res = new char[resSize + 1];
unsigned int i = 0;
int pos = 0;
while(i < inLen){
res[pos++]= transTable[inText[i]>>2];
if( i+1 < inLen){
res[pos++]= transTable[((inText[i]&3)<<4)|(inText[i+1]>>4)];
if(i+2 < inLen){
res[pos++] = transTable[((inText[i+1]&0xF)<<2)|(inText[i+2]>>6)];
res[pos++] = transTable[inText[i+2]&0x3F];
while (i < inLen) {
res[pos++] = transTable[inText[i] >> 2];
if (i + 1 < inLen) {
res[pos++] = transTable[((inText[i] & 3) << 4) | (inText[i + 1] >> 4)];
if (i + 2 < inLen) {
res[pos++] =
transTable[((inText[i + 1] & 0xF) << 2) | (inText[i + 2] >> 6)];
res[pos++] = transTable[inText[i + 2] & 0x3F];
} else {
// single padding
res[pos++] = transTable[((inText[i+1]&0xF)<<2)];
res[pos++] = transTable[((inText[i + 1] & 0xF) << 2)];
res[pos++] = '=';
}
} else {
// double padding
res[pos++] = transTable[((inText[i]&3)<<4)];
res[pos++] = transTable[((inText[i] & 3) << 4)];
res[pos++] = '=';
res[pos++] = '=';
}
@@ -81,7 +79,7 @@ char *Base64Encode(const unsigned char *inText,const unsigned int inLen){
return res;
}
char *Base64Decode(const char *inText,unsigned int *size){
char *Base64Decode(const char *inText, unsigned int *size) {
// Notes:
// - whoever calls us is responsible for free'ing the result we return
@@ -90,34 +88,34 @@ char *Base64Decode(const char *inText,unsigned int *size){
int i;
// FIX: we don't really need to build this table here
for(i= 0;i<255;i++){
transTable[i]= 0x80;
for (i = 0; i < 255; i++) {
transTable[i] = 0x80;
}
for(i='A';i<='Z';i++) transTable[i] = (unsigned char)i-'A';
for(i='a';i<='z';i++) transTable[i] = (unsigned char)i-'a'+26;
for(i='0';i<='9';i++) transTable[i] = (unsigned char)i-'0'+52;
transTable[static_cast<int>('+')]=62;
transTable[static_cast<int>('/')]=63;
for (i = 'A'; i <= 'Z'; i++) transTable[i] = (unsigned char)i - 'A';
for (i = 'a'; i <= 'z'; i++) transTable[i] = (unsigned char)i - 'a' + 26;
for (i = '0'; i <= '9'; i++) transTable[i] = (unsigned char)i - '0' + 52;
transTable[static_cast<int>('+')] = 62;
transTable[static_cast<int>('/')] = 63;
int outLen = 3*inLen/4;
int outLen = 3 * inLen / 4;
char *res = new char[outLen];
res[outLen-1]=0;
res[outLen - 1] = 0;
int pos = 0;
i = 0;
// decode 4 bytes at a time
unsigned char block[4];
int nInBlock=0;
while(i < inLen){
int nInBlock = 0;
while (i < inLen) {
unsigned char c = inText[i];
// above we set 0x80 as the junk marker in the translation table
if ( !(transTable[c]&0x80) ){
if (!(transTable[c] & 0x80)) {
block[nInBlock++] = transTable[c];
if( nInBlock == 4 ) {
if (nInBlock == 4) {
// finished a block
res[pos++] = (block[0]<<2)|(block[1]>>4);
res[pos++] = (block[1]<<4)|(block[2]>>2);
res[pos++] = (block[2]<<6)|block[3];
res[pos++] = (block[0] << 2) | (block[1] >> 4);
res[pos++] = (block[1] << 4) | (block[2] >> 2);
res[pos++] = (block[2] << 6) | block[3];
nInBlock = 0;
}
}
@@ -126,11 +124,11 @@ char *Base64Decode(const char *inText,unsigned int *size){
// okay, now there can be 2 or 3 chars remaining to be processed
// (before the padding)
if(nInBlock>1){
res[pos++] = (block[0]<<2)|(block[1]>>4);
if(nInBlock > 2){
res[pos++] = (block[1]<<4)|(block[2]>>2);
res[pos] = (block[2]<<6);
if (nInBlock > 1) {
res[pos++] = (block[0] << 2) | (block[1] >> 4);
if (nInBlock > 2) {
res[pos++] = (block[1] << 4) | (block[2] >> 2);
res[pos] = (block[2] << 6);
}
}
*size = pos;

View File

@@ -20,20 +20,20 @@
<b>Note:</b> The caller is responsible for calling \c delete[] on the
char array returned by this function.
*/
char *Base64Encode(const unsigned char *,const unsigned int);
char *Base64Encode(const unsigned char *, const unsigned int);
//! return the base64 encoding of an array of chars
/*!
<b>Note:</b> The caller is responsible for calling \c delete[] on the
char array returned by this function.
*/
char *Base64Encode(const char *,const unsigned int);
char *Base64Encode(const char *, const unsigned int);
//! return the decoded version of a base64 encoded char array
/*!
<b>Note:</b> The caller is responsible for calling \c delete[] on the
char array returned by this function.
*/
char *Base64Decode(const char *,unsigned int *);
char *Base64Decode(const char *, unsigned int *);
#endif

File diff suppressed because it is too large Load Diff

View File

@@ -30,22 +30,24 @@
#include <string>
using namespace RDKit;
namespace io=boost::iostreams;
namespace io = boost::iostreams;
void test1(){
void test1() {
#ifdef SUPPORT_COMPRESSED_IO
BOOST_LOG(rdInfoLog) << "testing basic reading from compressed streams" << std::endl;
BOOST_LOG(rdInfoLog) << "testing basic reading from compressed streams"
<< std::endl;
std::string rdbase = getenv("RDBASE");
std::string fname2 = rdbase + "/Code/Demos/RDKit/BinaryIO/test_data/triazine.mol.gz";
std::string fname2 =
rdbase + "/Code/Demos/RDKit/BinaryIO/test_data/triazine.mol.gz";
RWMol *m;
io::filtering_istream inStrm;
inStrm.push(io::gzip_decompressor());
inStrm.push(io::file_source(fname2));
TEST_ASSERT(inStrm.is_complete());
unsigned int lineNo=0;
m = MolDataStreamToMol(inStrm,lineNo);
unsigned int lineNo = 0;
m = MolDataStreamToMol(inStrm, lineNo);
TEST_ASSERT(m);
TEST_ASSERT(m->getNumAtoms(6));
@@ -55,17 +57,19 @@ void test1(){
#endif
}
void test2(){
void test2() {
#ifdef SUPPORT_COMPRESSED_IO
BOOST_LOG(rdInfoLog) << "testing writing to, then reading from compressed streams" << std::endl;
BOOST_LOG(rdInfoLog)
<< "testing writing to, then reading from compressed streams"
<< std::endl;
std::string smiles="C1CCCC1";
std::string buff,molBlock;
std::string smiles = "C1CCCC1";
std::string buff, molBlock;
RWMol *m;
m = SmilesToMol(smiles);
TEST_ASSERT(m);
TEST_ASSERT(m->getNumAtoms(5));
m->setProp(common_properties::_Name,"monkey");
m->setProp(common_properties::_Name, "monkey");
io::filtering_ostream outStrm;
outStrm.push(io::gzip_compressor());
outStrm.push(io::back_inserter(buff));
@@ -74,15 +78,15 @@ void test2(){
molBlock = MolToMolBlock(*m);
outStrm << molBlock;
outStrm.reset();
delete m;
io::filtering_istream inStrm;
inStrm.push(io::gzip_decompressor());
inStrm.push(boost::make_iterator_range(buff));
TEST_ASSERT(inStrm.is_complete());
unsigned int lineNo=0;
m = MolDataStreamToMol(inStrm,lineNo);
unsigned int lineNo = 0;
m = MolDataStreamToMol(inStrm, lineNo);
TEST_ASSERT(m);
TEST_ASSERT(m->getNumAtoms(5));
@@ -92,112 +96,115 @@ void test2(){
#endif
}
void test3(){
void test3() {
#ifdef SUPPORT_COMPRESSED_IO
BOOST_LOG(rdInfoLog) << "testing writing pickles to a file then reading them back" << std::endl;
BOOST_LOG(rdInfoLog)
<< "testing writing pickles to a file then reading them back"
<< std::endl;
std::string rdbase = getenv("RDBASE");
std::string fname2 = rdbase + "/Code/Demos/RDKit/BinaryIO/test_data/mols.rdb";
std::string smiles,buff;
std::string smiles, buff;
RWMol *m;
std::vector<unsigned int> filePs;
io::filtering_ostream outStrm;
outStrm.push(io::file_sink(fname2,std::ios_base::out|std::ios_base::binary));
outStrm.push(
io::file_sink(fname2, std::ios_base::out | std::ios_base::binary));
TEST_ASSERT(outStrm.is_complete());
smiles="C1CCC1";
smiles = "C1CCC1";
m = SmilesToMol(smiles);
TEST_ASSERT(m);
TEST_ASSERT(m->getNumAtoms(4));
MolPickler::pickleMol(*m,outStrm);
MolPickler::pickleMol(*m, outStrm);
delete m;
smiles="C1CCC1";
smiles = "C1CCC1";
m = SmilesToMol(smiles);
TEST_ASSERT(m);
TEST_ASSERT(m->getNumAtoms(4));
RDDepict::compute2DCoords(*m);
filePs.push_back(0);
MolPickler::pickleMol(*m,outStrm);
MolPickler::pickleMol(*m, outStrm);
delete m;
smiles="C1CCCC1";
smiles = "C1CCCC1";
m = SmilesToMol(smiles);
TEST_ASSERT(m);
TEST_ASSERT(m->getNumAtoms(5));
RDDepict::compute2DCoords(*m);
filePs.push_back(outStrm.tellp());
MolPickler::pickleMol(*m,outStrm);
MolPickler::pickleMol(*m, outStrm);
delete m;
smiles="c1ccccc1";
smiles = "c1ccccc1";
m = SmilesToMol(smiles);
TEST_ASSERT(m);
TEST_ASSERT(m->getNumAtoms(6));
RDDepict::compute2DCoords(*m);
filePs.push_back(outStrm.tellp());
MolPickler::pickleMol(*m,outStrm);
MolPickler::pickleMol(*m, outStrm);
delete m;
smiles="c1ccccc1CC(=O)O";
smiles = "c1ccccc1CC(=O)O";
m = SmilesToMol(smiles);
TEST_ASSERT(m);
TEST_ASSERT(m->getNumAtoms(9));
RDDepict::compute2DCoords(*m);
filePs.push_back(outStrm.tellp());
MolPickler::pickleMol(*m,outStrm);
MolPickler::pickleMol(*m, outStrm);
delete m;
outStrm.flush();
outStrm.reset();
io::filtering_istream inStrm;
inStrm.push(io::file_source(fname2,std::ios_base::in|std::ios_base::binary));
inStrm.push(
io::file_source(fname2, std::ios_base::in | std::ios_base::binary));
TEST_ASSERT(inStrm.is_complete());
m = new RWMol();
TEST_ASSERT(m);
MolPickler::molFromPickle(inStrm,*m);
MolPickler::molFromPickle(inStrm, *m);
TEST_ASSERT(m->getNumAtoms(4));
delete m;
m = new RWMol();
TEST_ASSERT(m);
MolPickler::molFromPickle(inStrm,*m);
MolPickler::molFromPickle(inStrm, *m);
TEST_ASSERT(m->getNumAtoms(4));
delete m;
m = new RWMol();
TEST_ASSERT(m);
MolPickler::molFromPickle(inStrm,*m);
MolPickler::molFromPickle(inStrm, *m);
TEST_ASSERT(m->getNumAtoms(5));
delete m;
m = new RWMol();
TEST_ASSERT(m);
MolPickler::molFromPickle(inStrm,*m);
MolPickler::molFromPickle(inStrm, *m);
TEST_ASSERT(m->getNumAtoms(6));
delete m;
m = new RWMol();
TEST_ASSERT(m);
MolPickler::molFromPickle(inStrm,*m);
MolPickler::molFromPickle(inStrm, *m);
TEST_ASSERT(m->getNumAtoms(9));
delete m;
BOOST_LOG(rdInfoLog) << "done" << std::endl;
#else
BOOST_LOG(rdInfoLog) << "Compressed IO disabled; test skipped" << std::endl;
#endif
}
void test4(){
void test4() {
#ifdef SUPPORT_COMPRESSED_IO
BOOST_LOG(rdInfoLog) << "testing writing pickles to a single compressed file then reading them back" << std::endl;
BOOST_LOG(rdInfoLog) << "testing writing pickles to a single compressed file "
"then reading them back" << std::endl;
std::string rdbase = getenv("RDBASE");
std::string fname2 = rdbase + "/Code/Demos/RDKit/BinaryIO/test_data/mols.rdz";
@@ -206,197 +213,196 @@ void test4(){
io::filtering_ostream outStrm;
outStrm.push(io::gzip_compressor());
outStrm.push(io::file_sink(fname2,std::ios_base::out|std::ios_base::binary));
outStrm.push(
io::file_sink(fname2, std::ios_base::out | std::ios_base::binary));
TEST_ASSERT(outStrm.is_complete());
smiles="C1CCC1";
smiles = "C1CCC1";
m = SmilesToMol(smiles);
TEST_ASSERT(m);
TEST_ASSERT(m->getNumAtoms(4));
MolPickler::pickleMol(*m,outStrm);
MolPickler::pickleMol(*m, outStrm);
delete m;
smiles="C1CCC1";
smiles = "C1CCC1";
m = SmilesToMol(smiles);
TEST_ASSERT(m);
TEST_ASSERT(m->getNumAtoms(4));
RDDepict::compute2DCoords(*m);
MolPickler::pickleMol(*m,outStrm);
MolPickler::pickleMol(*m, outStrm);
delete m;
smiles="C1CCCC1";
smiles = "C1CCCC1";
m = SmilesToMol(smiles);
TEST_ASSERT(m);
TEST_ASSERT(m->getNumAtoms(5));
RDDepict::compute2DCoords(*m);
MolPickler::pickleMol(*m,outStrm);
MolPickler::pickleMol(*m, outStrm);
delete m;
smiles="c1ccccc1";
smiles = "c1ccccc1";
m = SmilesToMol(smiles);
TEST_ASSERT(m);
TEST_ASSERT(m->getNumAtoms(6));
RDDepict::compute2DCoords(*m);
MolPickler::pickleMol(*m,outStrm);
MolPickler::pickleMol(*m, outStrm);
delete m;
smiles="c1ccccc1CC(=O)O";
smiles = "c1ccccc1CC(=O)O";
m = SmilesToMol(smiles);
TEST_ASSERT(m);
TEST_ASSERT(m->getNumAtoms(9));
RDDepict::compute2DCoords(*m);
MolPickler::pickleMol(*m,outStrm);
MolPickler::pickleMol(*m, outStrm);
delete m;
io::flush(outStrm);
outStrm.pop();
io::filtering_istream inStrm;
inStrm.push(io::gzip_decompressor());
inStrm.push(io::file_source(fname2,std::ios_base::in|std::ios_base::binary));
inStrm.push(
io::file_source(fname2, std::ios_base::in | std::ios_base::binary));
TEST_ASSERT(inStrm.is_complete());
m = new RWMol();
TEST_ASSERT(m);
MolPickler::molFromPickle(inStrm,*m);
MolPickler::molFromPickle(inStrm, *m);
TEST_ASSERT(m->getNumAtoms(4));
delete m;
m = new RWMol();
TEST_ASSERT(m);
MolPickler::molFromPickle(inStrm,*m);
MolPickler::molFromPickle(inStrm, *m);
TEST_ASSERT(m->getNumAtoms(4));
delete m;
m = new RWMol();
TEST_ASSERT(m);
MolPickler::molFromPickle(inStrm,*m);
MolPickler::molFromPickle(inStrm, *m);
TEST_ASSERT(m->getNumAtoms(5));
delete m;
m = new RWMol();
TEST_ASSERT(m);
MolPickler::molFromPickle(inStrm,*m);
MolPickler::molFromPickle(inStrm, *m);
TEST_ASSERT(m->getNumAtoms(6));
delete m;
m = new RWMol();
TEST_ASSERT(m);
MolPickler::molFromPickle(inStrm,*m);
MolPickler::molFromPickle(inStrm, *m);
TEST_ASSERT(m->getNumAtoms(9));
delete m;
BOOST_LOG(rdInfoLog) << "done" << std::endl;
#else
BOOST_LOG(rdInfoLog) << "Compressed IO disabled; test skipped" << std::endl;
#endif
}
void test5(){
void test5() {
#ifdef SUPPORT_COMPRESSED_IO
BOOST_LOG(rdInfoLog) << "testing writing compressed pickles to a single file then reading them back" << std::endl;
BOOST_LOG(rdInfoLog) << "testing writing compressed pickles to a single file "
"then reading them back" << std::endl;
std::string rdbase = getenv("RDBASE");
std::string fname2 = rdbase + "/Code/Demos/RDKit/BinaryIO/test_data/tmp.rdz";
std::string smiles,buff;
std::string smiles, buff;
RWMol *m;
std::vector<unsigned int> filePs;
io::filtering_ostream outStrm;
outStrm.push(io::file_sink(fname2,std::ios_base::out|std::ios_base::binary));
outStrm.push(
io::file_sink(fname2, std::ios_base::out | std::ios_base::binary));
TEST_ASSERT(outStrm.is_complete());
smiles="C1CCC1";
smiles = "C1CCC1";
m = SmilesToMol(smiles);
TEST_ASSERT(m);
TEST_ASSERT(m->getNumAtoms(4));
io::filtering_ostream *tmpStrm;
tmpStrm=new io::filtering_ostream();
tmpStrm = new io::filtering_ostream();
tmpStrm->push(io::gzip_compressor());
tmpStrm->push(io::back_inserter(buff));
filePs.push_back(0);
MolPickler::pickleMol(*m,*tmpStrm);
MolPickler::pickleMol(*m, *tmpStrm);
delete m;
tmpStrm->reset();
outStrm<<buff.size();
outStrm<<buff;
std::cerr<<"sz: " <<buff.size()<<" "<<outStrm.tellp() <<std::endl;
buff="";
outStrm << buff.size();
outStrm << buff;
std::cerr << "sz: " << buff.size() << " " << outStrm.tellp() << std::endl;
buff = "";
tmpStrm->push(io::gzip_compressor());
tmpStrm->push(io::back_inserter(buff));
smiles="C1CCC1";
smiles = "C1CCC1";
m = SmilesToMol(smiles);
TEST_ASSERT(m);
TEST_ASSERT(m->getNumAtoms(4));
RDDepict::compute2DCoords(*m);
filePs.push_back(outStrm.tellp());
MolPickler::pickleMol(*m,*tmpStrm);
MolPickler::pickleMol(*m, *tmpStrm);
delete m;
tmpStrm->reset();
outStrm<<buff.size();
outStrm<<buff;
std::cerr<<"sz: " <<buff.size()<<" "<<outStrm.tellp() <<std::endl;
buff="";
outStrm << buff.size();
outStrm << buff;
std::cerr << "sz: " << buff.size() << " " << outStrm.tellp() << std::endl;
buff = "";
tmpStrm->push(io::gzip_compressor());
tmpStrm->push(io::back_inserter(buff));
smiles="C1CCCC1";
smiles = "C1CCCC1";
m = SmilesToMol(smiles);
TEST_ASSERT(m);
TEST_ASSERT(m->getNumAtoms(5));
RDDepict::compute2DCoords(*m);
filePs.push_back(outStrm.tellp());
MolPickler::pickleMol(*m,*tmpStrm);
MolPickler::pickleMol(*m, *tmpStrm);
delete m;
tmpStrm->reset();
outStrm<<buff.size();
outStrm<<buff;
std::cerr<<"sz: " <<buff.size()<<" "<<outStrm.tellp() <<std::endl;
buff="";
outStrm << buff.size();
outStrm << buff;
std::cerr << "sz: " << buff.size() << " " << outStrm.tellp() << std::endl;
buff = "";
tmpStrm->push(io::gzip_compressor());
tmpStrm->push(io::back_inserter(buff));
smiles="c1ccccc1";
smiles = "c1ccccc1";
m = SmilesToMol(smiles);
TEST_ASSERT(m);
TEST_ASSERT(m->getNumAtoms(6));
RDDepict::compute2DCoords(*m);
filePs.push_back(outStrm.tellp());
MolPickler::pickleMol(*m,*tmpStrm);
MolPickler::pickleMol(*m, *tmpStrm);
delete m;
tmpStrm->reset();
outStrm<<buff.size();
outStrm<<buff;
std::cerr<<"sz: " <<buff.size()<<" "<<outStrm.tellp() <<std::endl;
buff="";
outStrm << buff.size();
outStrm << buff;
std::cerr << "sz: " << buff.size() << " " << outStrm.tellp() << std::endl;
buff = "";
tmpStrm->push(io::gzip_compressor());
tmpStrm->push(io::back_inserter(buff));
smiles="c1ccccc1CC(=O)O";
smiles = "c1ccccc1CC(=O)O";
m = SmilesToMol(smiles);
TEST_ASSERT(m);
TEST_ASSERT(m->getNumAtoms(9));
RDDepict::compute2DCoords(*m);
filePs.push_back(outStrm.tellp());
MolPickler::pickleMol(*m,*tmpStrm);
MolPickler::pickleMol(*m, *tmpStrm);
delete m;
tmpStrm->reset();
outStrm<<buff.size();
outStrm<<buff;
std::cerr<<"sz: " <<buff.size()<<" "<<outStrm.tellp() <<std::endl;
outStrm << buff.size();
outStrm << buff;
std::cerr << "sz: " << buff.size() << " " << outStrm.tellp() << std::endl;
delete tmpStrm;
io::flush(outStrm);
@@ -406,80 +412,81 @@ void test5(){
io::filtering_istream inStrm;
unsigned int sz;
char *charArr;
inStrm.push(io::file_source(fname2,std::ios_base::in|std::ios_base::binary));
inStrm.push(
io::file_source(fname2, std::ios_base::in | std::ios_base::binary));
TEST_ASSERT(inStrm.is_complete());
inStrm>>sz;
inStrm >> sz;
charArr = new char[sz];
inStrm.read(charArr,sz);
inStrm.read(charArr, sz);
buff = "";
buff.append(charArr,sz);
buff.append(charArr, sz);
tmpIStrm.push(io::gzip_decompressor());
tmpIStrm.push(boost::make_iterator_range(buff));
m = new RWMol();
TEST_ASSERT(m);
MolPickler::molFromPickle(tmpIStrm,*m);
MolPickler::molFromPickle(tmpIStrm, *m);
TEST_ASSERT(m->getNumAtoms(4));
delete m;
inStrm>>sz;
delete [] charArr;
inStrm >> sz;
delete[] charArr;
charArr = new char[sz];
inStrm.read(charArr,sz);
inStrm.read(charArr, sz);
buff = "";
buff.append(charArr,sz);
buff.append(charArr, sz);
tmpIStrm.reset();
tmpIStrm.push(io::gzip_decompressor());
tmpIStrm.push(boost::make_iterator_range(buff));
m = new RWMol();
TEST_ASSERT(m);
MolPickler::molFromPickle(tmpIStrm,*m);
MolPickler::molFromPickle(tmpIStrm, *m);
TEST_ASSERT(m->getNumAtoms(4));
delete m;
inStrm>>sz;
delete [] charArr;
inStrm >> sz;
delete[] charArr;
charArr = new char[sz];
inStrm.read(charArr,sz);
inStrm.read(charArr, sz);
buff = "";
buff.append(charArr,sz);
buff.append(charArr, sz);
tmpIStrm.reset();
tmpIStrm.push(io::gzip_decompressor());
tmpIStrm.push(boost::make_iterator_range(buff));
m = new RWMol();
TEST_ASSERT(m);
MolPickler::molFromPickle(tmpIStrm,*m);
MolPickler::molFromPickle(tmpIStrm, *m);
TEST_ASSERT(m->getNumAtoms(5));
delete m;
inStrm>>sz;
delete [] charArr;
inStrm >> sz;
delete[] charArr;
charArr = new char[sz];
inStrm.read(charArr,sz);
inStrm.read(charArr, sz);
buff = "";
buff.append(charArr,sz);
buff.append(charArr, sz);
tmpIStrm.reset();
tmpIStrm.push(io::gzip_decompressor());
tmpIStrm.push(boost::make_iterator_range(buff));
m = new RWMol();
TEST_ASSERT(m);
MolPickler::molFromPickle(tmpIStrm,*m);
MolPickler::molFromPickle(tmpIStrm, *m);
TEST_ASSERT(m->getNumAtoms(6));
delete m;
inStrm>>sz;
delete [] charArr;
inStrm >> sz;
delete[] charArr;
charArr = new char[sz];
inStrm.read(charArr,sz);
inStrm.read(charArr, sz);
buff = "";
buff.append(charArr,sz);
buff.append(charArr, sz);
tmpIStrm.reset();
tmpIStrm.push(io::gzip_decompressor());
tmpIStrm.push(boost::make_iterator_range(buff));
m = new RWMol();
TEST_ASSERT(m);
MolPickler::molFromPickle(tmpIStrm,*m);
MolPickler::molFromPickle(tmpIStrm, *m);
TEST_ASSERT(m->getNumAtoms(9));
delete m;
@@ -489,8 +496,7 @@ void test5(){
#endif
}
int main(int argc,char *argv[]){
int main(int argc, char *argv[]) {
RDLog::InitLogs();
#if 1
test1();

View File

@@ -17,25 +17,24 @@
using namespace RDKit;
void runMol(ROMol *mol,int checkEvery=10,bool verbose=true){
void runMol(ROMol *mol, int checkEvery = 10, bool verbose = true) {
ForceFields::ForceField *field;
std::cout << MolToMolBlock(*mol) << "$$$$" << std::endl;
try{
field=UFF::constructForceField(*mol,2.5);
try {
field = UFF::constructForceField(*mol, 2.5);
} catch (...) {
field=0;
field = 0;
}
if(field){
if (field) {
field->initialize();
int needMore=1;
int nPasses=0;
while(needMore){
int needMore = 1;
int nPasses = 0;
while (needMore) {
#if 1
needMore = field->minimize(checkEvery);
if(verbose) std::cerr << "\t" << ++nPasses << std::endl;
if (verbose) std::cerr << "\t" << ++nPasses << std::endl;
#else
needMore = field->minimize(1);
std::cout << MolToMolBlock(mol) << "$$$$" << std::endl;
@@ -46,63 +45,60 @@ void runMol(ROMol *mol,int checkEvery=10,bool verbose=true){
} else {
std::cerr << "failed";
}
}
void runMolFile(std::string fileName,int checkEvery=10){
RWMol *mol=MolFileToMol(fileName,false);
void runMolFile(std::string fileName, int checkEvery = 10) {
RWMol *mol = MolFileToMol(fileName, false);
TEST_ASSERT(mol);
MolOps::sanitizeMol(*mol);
ROMol *mol2=MolOps::addHs(*mol,false,true);
ROMol *mol2 = MolOps::addHs(*mol, false, true);
runMol(mol2,checkEvery);
runMol(mol2, checkEvery);
delete mol;
delete mol2;
}
void runSDFile(std::string fileName,int checkEvery=10){
SDMolSupplier suppl(fileName,false);
void runSDFile(std::string fileName, int checkEvery = 10) {
SDMolSupplier suppl(fileName, false);
RWMol *mol;
mol = (RWMol *)suppl.next();
while(mol){
while (mol) {
std::string name;
mol->getProp(common_properties::_Name,name);
mol->getProp(common_properties::_Name, name);
std::cerr << "Mol: " << name << std::endl;
try{
try {
MolOps::sanitizeMol(*mol);
} catch (...) {
std::cerr << " sanitization failed" << std::endl;
delete mol;
mol = 0;
}
if(mol){
ROMol *mol2=MolOps::addHs(*mol,false,true);
if (mol) {
ROMol *mol2 = MolOps::addHs(*mol, false, true);
delete mol;
runMol(mol2,checkEvery,false);
runMol(mol2, checkEvery, false);
delete mol2;
}
mol = (RWMol *)suppl.next();
}
}
//-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
//
//-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
int main(int argc,char *argv[]){
PRECONDITION(argc>1,"bad arguments");
std::string fileName=argv[1];
int checkEvery=10;
std::cerr << ">" << fileName<< " " << fileName.find(".sdf") << std::endl;
if(fileName.find(".sdf")==std::string::npos){
runMolFile(fileName,checkEvery);
int main(int argc, char *argv[]) {
PRECONDITION(argc > 1, "bad arguments");
std::string fileName = argv[1];
int checkEvery = 10;
std::cerr << ">" << fileName << " " << fileName.find(".sdf") << std::endl;
if (fileName.find(".sdf") == std::string::npos) {
runMolFile(fileName, checkEvery);
} else {
runSDFile(fileName,checkEvery);
runSDFile(fileName, checkEvery);
}
std::cerr << "done" << std::endl;

View File

@@ -10,30 +10,30 @@
#include <GraphMol/RDKitBase.h>
namespace TemplateEnum {
using namespace RDKit;
using namespace RDKit;
class EnumException : public std::exception {
public :
EnumException(const char *msg) : _msg(msg) {};
EnumException(const std::string msg) : _msg(msg) {};
const char *message () const { return _msg.c_str(); };
~EnumException () throw () {};
private :
std::string _msg;
};
class EnumException : public std::exception {
public:
EnumException(const char *msg) : _msg(msg){};
EnumException(const std::string msg) : _msg(msg){};
const char *message() const { return _msg.c_str(); };
~EnumException() throw(){};
void orientSidechain(RWMol *mol,RWMol *sidechain,
int molAttachIdx,int sidechainAttachIdx);
private:
std::string _msg;
};
typedef std::vector< RWMOL_SPTR_VECT > VECT_RWMOL_SPTR_VECT;
void markAttachmentPoints(RWMOL_SPTR *mol,char frontMarker='X');
void markAttachmentPoints(RWMol *mol,char frontMarker='X');
void prepareSidechains(RWMOL_SPTR_VECT *sidechains,char frontMarker='Y');
RWMOL_SPTR_VECT enumerateLibrary(RWMol *mol,VECT_RWMOL_SPTR_VECT &sidechains,
bool orientSidechains=true);
RWMOL_SPTR_VECT enumFromFiles(const char *templateName,
std::vector<const char *> &sidechainName);
void orientSidechain(RWMol *mol, RWMol *sidechain, int molAttachIdx,
int sidechainAttachIdx);
} // end of TemplateEnum namespace
typedef std::vector<RWMOL_SPTR_VECT> VECT_RWMOL_SPTR_VECT;
void markAttachmentPoints(RWMOL_SPTR *mol, char frontMarker = 'X');
void markAttachmentPoints(RWMol *mol, char frontMarker = 'X');
void prepareSidechains(RWMOL_SPTR_VECT *sidechains, char frontMarker = 'Y');
RWMOL_SPTR_VECT enumerateLibrary(RWMol *mol, VECT_RWMOL_SPTR_VECT &sidechains,
bool orientSidechains = true);
RWMOL_SPTR_VECT enumFromFiles(const char *templateName,
std::vector<const char *> &sidechainName);
} // end of TemplateEnum namespace
#endif

View File

@@ -7,401 +7,404 @@
#include <Geometry/Transform3D.h>
#include <GraphMol/MolTransforms/MolTransforms.h>
#include <GraphMol/FileParsers/FileParsers.h>
#define FEQ(_a_,_b_) (fabs((_a_)-(_b_))<1e-4)
#define FEQ(_a_, _b_) (fabs((_a_) - (_b_)) < 1e-4)
namespace TemplateEnum {
using namespace RDKit;
using namespace RDKit;
// ------------------------------------------------------------------
// ------------------------------------------------------------------
//
// transforms a sidechain so that it is oriented better for attachment
// to a molecule
//
// Arguments:
// mol: core molecule
// sidechain: sidechain to attach.
// molConnectorIdx: the index of the attachment point atom in the
// molecule.
// sidechainConnectorIdx: the index of the attachment point atom
// in the sidechain.
//
// ------------------------------------------------------------------
void orientSidechain(RWMol *mol, RWMol *sidechain, int molAttachIdx,
int sidechainAttachIdx) {
PRECONDITION(mol, "bad molecule");
PRECONDITION(sidechain, "bad molecule");
// ---------
// start by getting our 4 atoms
// ---------
Conformer &molConf = mol->getConformer();
Conformer &sidechainConf = sidechain->getConformer();
Atom *molConnAtom, *molAttachAtom;
Atom *chainConnAtom, *chainAttachAtom;
molAttachAtom = mol->getAtomWithIdx(molAttachIdx);
chainAttachAtom = sidechain->getAtomWithIdx(sidechainAttachIdx);
PRECONDITION(molAttachAtom->getDegree() == 1,
"attachment points must be degree 1");
PRECONDITION(chainAttachAtom->getDegree() == 1,
"attachment points must be degree 1");
RWMol::ADJ_ITER nbrIdx, endNbrs;
boost::tie(nbrIdx, endNbrs) = mol->getAtomNeighbors(molAttachAtom);
molConnAtom = mol->getAtomWithIdx(*nbrIdx);
boost::tie(nbrIdx, endNbrs) = sidechain->getAtomNeighbors(chainAttachAtom);
chainConnAtom = sidechain->getAtomWithIdx(*nbrIdx);
//-----------------------------------------
// Notation:
// Pmc: molecule connection point (the atom that will be
// removed from the molecule).
// Pma: molecule attachment point (the atom to which we'll form
// the bond).
// Psc: sidechain connection point
// Psa: sidechain attachment point
// Vm: Pmc-Pma (molecular attachment vector)
// Vs: Psc-Psa (sidechain attachment vector)
//
// transforms a sidechain so that it is oriented better for attachment
// to a molecule
//
// Arguments:
// mol: core molecule
// sidechain: sidechain to attach.
// molConnectorIdx: the index of the attachment point atom in the
// molecule.
// sidechainConnectorIdx: the index of the attachment point atom
// in the sidechain.
//
// ------------------------------------------------------------------
void orientSidechain(RWMol *mol,RWMol *sidechain,
int molAttachIdx,int sidechainAttachIdx){
PRECONDITION(mol,"bad molecule");
PRECONDITION(sidechain,"bad molecule");
//-----------------------------------------
RDGeom::Transform3D sidechainTform, templateTform, tmpTform;
// ---------
// start by getting our 4 atoms
// ---------
Conformer &molConf=mol->getConformer();
Conformer &sidechainConf=sidechain->getConformer();
Atom *molConnAtom,*molAttachAtom;
Atom *chainConnAtom,*chainAttachAtom;
molAttachAtom = mol->getAtomWithIdx(molAttachIdx);
chainAttachAtom = sidechain->getAtomWithIdx(sidechainAttachIdx);
PRECONDITION(molAttachAtom->getDegree()==1,"attachment points must be degree 1");
PRECONDITION(chainAttachAtom->getDegree()==1,"attachment points must be degree 1");
RWMol::ADJ_ITER nbrIdx,endNbrs;
boost::tie(nbrIdx,endNbrs) = mol->getAtomNeighbors(molAttachAtom);
molConnAtom=mol->getAtomWithIdx(*nbrIdx);
boost::tie(nbrIdx,endNbrs) = sidechain->getAtomNeighbors(chainAttachAtom);
chainConnAtom=sidechain->getAtomWithIdx(*nbrIdx);
RDGeom::Point3D Vm, Um, Pmc, Pma;
RDGeom::Point3D Vs, Us, Psc, Psa;
Pmc = molConf.getAtomPos(molConnAtom->getIdx());
Pma = molConf.getAtomPos(molAttachAtom->getIdx());
std::cerr << "p=array([" << Pma.x << "," << Pma.y << "," << Pma.z << "])"
<< std::endl;
Psc = sidechainConf.getAtomPos(chainConnAtom->getIdx());
Psa = sidechainConf.getAtomPos(chainAttachAtom->getIdx());
templateTform.setToIdentity();
Vm = Pmc - Pma;
// note the opposite direction here:
Vs = Psa - Psc;
Um = Vm;
Um.normalize();
std::cerr << "Um=array([" << Um.x << "," << Um.y << "," << Um.z << "])"
<< std::endl;
Us = Vs;
Us.normalize();
std::cerr << "Us=array([" << Us.x << "," << Us.y << "," << Us.z << "])"
<< std::endl;
// translate Psc -> Pma
// RDGeom::Point3D headTrans = Pma-Psc;
templateTform.setToIdentity();
tmpTform.setToIdentity();
tmpTform.SetTranslation(Pma);
templateTform *= tmpTform;
double sinT, cosT;
cosT = Us.dotProduct(Um);
if (cosT > 1.0) cosT = 1.0;
if (fabs(cosT) < 1.0) {
tmpTform.setToIdentity();
sinT = sqrt(1.0 - cosT * cosT);
RDGeom::Point3D rotnAxis = Us.crossProduct(Um);
rotnAxis.normalize();
std::cerr << "ax=array([" << rotnAxis.x << "," << rotnAxis.y << ","
<< rotnAxis.z << "])" << std::endl;
tmpTform.SetRotation(cosT, sinT, rotnAxis);
templateTform *= tmpTform;
} else if (cosT == 1.0) {
RDGeom::Point3D normal(1, 0, 0);
if (fabs(Us.dotProduct(normal)) == 1.0) {
normal = RDGeom::Point3D(0, 1, 0);
}
RDGeom::Point3D rotnAxis = Us.crossProduct(normal);
templateTform.SetRotation(-1, 0, rotnAxis);
}
tmpTform.setToIdentity();
tmpTform.SetTranslation(Psc * -1.0);
templateTform *= tmpTform;
// ---------
// transform the atomic positions in the sidechain:
// ---------
MolTransforms::transformMolsAtoms(sidechain, templateTform);
// that's it!
}
// ------------------------------------------------------------------
//
// attaches a sidechain fragment to a molecule.
//
// Arguments:
// mol: molecule to be modified
// sidechain: sidechain to attach. The sidechain is copied in.
// molConnectorIdx: the index of the attachment point atom in the
// molecule.
// sidechainConnectorIdx: the index of the attachment point atom
// in the sidechain.
// bondType: type of the bond to form between the atoms
//
// The connector atoms are *NOT* part of the final molecule, they
// merely serve to establish where things connect.
//
// ------------------------------------------------------------------
void molAddSidechain(RWMol *mol, RWMol *sidechain, int molConnectorIdx,
int sidechainConnectorIdx, Bond::BondType bondType) {
PRECONDITION(mol, "bad molecule provided");
PRECONDITION(sidechain, "bad sidechain provided");
int origNumAtoms = mol->getNumAtoms();
mol->insertMol(*sidechain);
// get pointers to the two connectors (these are the atoms
// we'll end up removing)
Atom *molConnAtom, *sidechainConnAtom;
molConnAtom = mol->getAtomWithIdx(molConnectorIdx);
sidechainConnAtom = mol->getAtomWithIdx(sidechainConnectorIdx + origNumAtoms);
// now use those pointers to get the atoms which will remain
// (we're going to connect these) and remove the original
// connection points from the molecule
Atom *tmpAtom;
RWMol::ADJ_ITER nbrIdx, endNbrs;
boost::tie(nbrIdx, endNbrs) = mol->getAtomNeighbors(molConnAtom);
// we are assuming that the first neighbor is the correct one.
// Really we should be able to assume that there is only a single
// attachment point.
tmpAtom = molConnAtom;
molConnAtom = mol->getAtomWithIdx(*nbrIdx);
mol->removeAtom(tmpAtom);
// repeat that process for the sidechain:
boost::tie(nbrIdx, endNbrs) = mol->getAtomNeighbors(sidechainConnAtom);
tmpAtom = sidechainConnAtom;
sidechainConnAtom = mol->getAtomWithIdx(*nbrIdx);
mol->removeAtom(tmpAtom);
// finally connect the remaining atoms:
mol->addBond(molConnAtom, sidechainConnAtom, bondType);
}
// used as a starting point for connection bookmarks
const int CONNECT_BOOKMARK_START = 0x23424223;
// ------------------------------------------------------------------
//
// Loop through all the atoms and bookmark the attachment points
//
// attachment points are assumed to have one of these properties:
// - common_properties::molFileAlias (set by the mol file parser)
// - common_properties::dummyLabel (set by the SMILES parser)
// frontMarker is the "recognition character" to be used to pick
// valid labels. e.g. if the frontMarker is 'X', a label beginning
// with 'Y' will not be marked.
//
// In addition to bookmarking the attachment points, we also set
// the common_properties::maxAttachIdx property, which holds an integer with
// the
// maximum attachment bookmark index. This is used in the
// enumeration to prevent us from having to scan through all the
// molecule's bookmarks
//
//
// ------------------------------------------------------------------
void markAttachmentPoints(RWMOL_SPTR mol, char frontMarker) {
markAttachmentPoints(mol.get(), frontMarker);
}
void markAttachmentPoints(RWMol *mol, char frontMarker) {
PRECONDITION(mol, "bad molecule");
RWMol::AtomIterator atomIt;
int maxAttachIdx = 0;
// scan through the atoms and mark those that have aliases
// (these might be attachment points)
for (atomIt = mol->beginAtoms(); atomIt != mol->endAtoms(); atomIt++) {
// start by finding possible attachment point properties:
std::string attachLabel = "";
if ((*atomIt)->hasProp(common_properties::molFileAlias)) {
(*atomIt)->getProp(common_properties::molFileAlias, attachLabel);
} else if ((*atomIt)->hasProp(common_properties::dummyLabel)) {
(*atomIt)->getProp(common_properties::dummyLabel, attachLabel);
}
// if we got one and it starts with the appropriate front
// marker, proceed:
if (attachLabel != "" && attachLabel[0] == frontMarker) {
// to avoid trouble later, we guarantee that the attachment
// point has degree 1 (one bond to it).
if ((*atomIt)->getDegree() > 1)
throw EnumException("More than one bond to an attachment point.");
int offset = CONNECT_BOOKMARK_START;
if (attachLabel.length() > 1) {
if (attachLabel[1] >= 'a' && attachLabel[1] <= 'z') {
offset += (int)attachLabel[1] - (int)'a';
}
}
mol->setAtomBookmark(*atomIt, offset);
if (offset > maxAttachIdx) maxAttachIdx = offset;
}
}
if (maxAttachIdx) {
mol->setProp(common_properties::maxAttachIdx, maxAttachIdx);
}
}
// ------------------------------------------------------------------
//
// loops through the sidechain molecules and calls
// _markAttachmentPoints()_ on each.
//
// ------------------------------------------------------------------
void prepareSidechains(RWMOL_SPTR_VECT *sidechains, char frontMarker) {
PRECONDITION(sidechains, "bad sidechain list");
RWMOL_SPTR_VECT::iterator mpvI;
for (mpvI = sidechains->begin(); mpvI != sidechains->end(); mpvI++) {
markAttachmentPoints(*mpvI, frontMarker);
}
}
// ------------------------------------------------------------------
//
// Enumerates the library around a template and returns the result.
//
//
// ------------------------------------------------------------------
RWMOL_SPTR_VECT enumerateLibrary(RWMol *templateMol,
VECT_RWMOL_SPTR_VECT &sidechains,
bool orientSidechains) {
PRECONDITION(templateMol, "bad molecule");
RWMOL_SPTR_VECT res, tmp;
res.push_back(RWMOL_SPTR(new RWMol(*templateMol)));
// if there's no attachment point on the molecule or no
// sidechains, return now:
if (!templateMol->hasProp(common_properties::maxAttachIdx) ||
sidechains.size() == 0)
return res;
int maxIdx;
templateMol->getProp(common_properties::maxAttachIdx, maxIdx);
tmp.clear();
// loop over the sidechains and attach them
for (unsigned int i = 0; i < sidechains.size(); i++) {
int tgtMark = CONNECT_BOOKMARK_START + i;
// here's another boundary condition
if (tgtMark > maxIdx) break;
/// loop over all atoms with the appropriate mark
// This means that if a mol has two attachment points with the
// same name (e.g. two Xa's) they'll always have the same
// sidechain attached to them. This is a feature.
RWMOL_SPTR_VECT::iterator sidechainIt;
for (sidechainIt = sidechains[i].begin();
sidechainIt != sidechains[i].end(); sidechainIt++) {
// we've got our sidechain, find the atom it attaches from
if ((*sidechainIt)->hasAtomBookmark(CONNECT_BOOKMARK_START)) {
//
// NOTE: If there's more than one marked atom in the sidechain,
/// we'll only use the first for the moment.
//
int sidechainAtomIdx = (*sidechainIt)
->getAtomWithBookmark(CONNECT_BOOKMARK_START)
->getIdx();
// now add the sidechain to each molecule
RWMOL_SPTR_VECT::iterator templMolIt;
// loop over all the mols we've generated to this point
for (templMolIt = res.begin(); templMolIt != res.end(); templMolIt++) {
RWMol *templ = new RWMol(**templMolIt);
std::string name, tmpStr;
if (templ->hasProp(common_properties::_Name)) {
templ->getProp(common_properties::_Name, tmpStr);
name = name + " " + tmpStr;
}
while (templ->hasAtomBookmark(tgtMark)) {
// this is the atom we'll be replacing in the template
Atom *at = templ->getAtomWithBookmark(tgtMark);
// copy and transform the sidechain:
RWMol *sidechain;
if (orientSidechains) {
sidechain = new RWMol(*(sidechainIt->get()));
orientSidechain(templ, sidechain, at->getIdx(), sidechainAtomIdx);
} else {
sidechain = sidechainIt->get();
}
// FIX: need to use the actual bond order here:
molAddSidechain(templ, sidechain, at->getIdx(), sidechainAtomIdx,
Bond::SINGLE);
if (sidechain->hasProp(common_properties::_Name)) {
sidechain->getProp(common_properties::_Name, tmpStr);
name = name + " " + tmpStr;
}
templ->clearAtomBookmark(tgtMark, at);
if (orientSidechains) {
delete sidechain;
}
}
// std::cout << templ << "> " << MolToSmiles(*templ) << std::endl;
if (name != "") templ->setProp(common_properties::_Name, name);
tmp.push_back(RWMOL_SPTR(templ));
}
}
}
//-----------------------------------------
// Notation:
// Pmc: molecule connection point (the atom that will be
// removed from the molecule).
// Pma: molecule attachment point (the atom to which we'll form
// the bond).
// Psc: sidechain connection point
// Psa: sidechain attachment point
// Vm: Pmc-Pma (molecular attachment vector)
// Vs: Psc-Psa (sidechain attachment vector)
//
//-----------------------------------------
RDGeom::Transform3D sidechainTform,templateTform,tmpTform;
RDGeom::Point3D Vm,Um,Pmc,Pma;
RDGeom::Point3D Vs,Us,Psc,Psa;
Pmc = molConf.getAtomPos(molConnAtom->getIdx());
Pma = molConf.getAtomPos(molAttachAtom->getIdx());
std::cerr << "p=array(["<<Pma.x<<","<<Pma.y<<","<<Pma.z<<"])" << std::endl;
Psc = sidechainConf.getAtomPos(chainConnAtom->getIdx());
Psa = sidechainConf.getAtomPos(chainAttachAtom->getIdx());
templateTform.setToIdentity();
Vm = Pmc - Pma;
// note the opposite direction here:
Vs = Psa - Psc;
Um = Vm;
Um.normalize();
std::cerr << "Um=array(["<<Um.x<<","<<Um.y<<","<<Um.z<<"])" << std::endl;
Us = Vs;
Us.normalize();
std::cerr << "Us=array(["<<Us.x<<","<<Us.y<<","<<Us.z<<"])" << std::endl;
// translate Psc -> Pma
//RDGeom::Point3D headTrans = Pma-Psc;
templateTform.setToIdentity();
tmpTform.setToIdentity();
tmpTform.SetTranslation(Pma);
templateTform *= tmpTform;
double sinT,cosT;
cosT = Us.dotProduct(Um);
if(cosT>1.0) cosT = 1.0;
if(fabs(cosT)<1.0){
tmpTform.setToIdentity();
sinT = sqrt(1.0-cosT*cosT);
RDGeom::Point3D rotnAxis=Us.crossProduct(Um);
rotnAxis.normalize();
std::cerr << "ax=array(["<<rotnAxis.x<<","<<rotnAxis.y<<","<<rotnAxis.z<<"])" << std::endl;
tmpTform.SetRotation(cosT,sinT,rotnAxis);
templateTform *= tmpTform;
} else if(cosT==1.0){
RDGeom::Point3D normal(1,0,0);
if(fabs(Us.dotProduct(normal))==1.0){
normal = RDGeom::Point3D(0,1,0);
}
RDGeom::Point3D rotnAxis=Us.crossProduct(normal);
templateTform.SetRotation(-1,0,rotnAxis);
}
tmpTform.setToIdentity();
tmpTform.SetTranslation(Psc*-1.0);
templateTform *= tmpTform;
// ---------
// transform the atomic positions in the sidechain:
// ---------
MolTransforms::transformMolsAtoms(sidechain,templateTform);
// that's it!
}
// ------------------------------------------------------------------
//
// attaches a sidechain fragment to a molecule.
//
// Arguments:
// mol: molecule to be modified
// sidechain: sidechain to attach. The sidechain is copied in.
// molConnectorIdx: the index of the attachment point atom in the
// molecule.
// sidechainConnectorIdx: the index of the attachment point atom
// in the sidechain.
// bondType: type of the bond to form between the atoms
//
// The connector atoms are *NOT* part of the final molecule, they
// merely serve to establish where things connect.
//
// ------------------------------------------------------------------
void molAddSidechain(RWMol *mol,RWMol *sidechain,
int molConnectorIdx,int sidechainConnectorIdx,
Bond::BondType bondType){
PRECONDITION(mol,"bad molecule provided");
PRECONDITION(sidechain,"bad sidechain provided");
int origNumAtoms=mol->getNumAtoms();
mol->insertMol(*sidechain);
// get pointers to the two connectors (these are the atoms
// we'll end up removing)
Atom *molConnAtom,*sidechainConnAtom;
molConnAtom = mol->getAtomWithIdx(molConnectorIdx);
sidechainConnAtom = mol->getAtomWithIdx(sidechainConnectorIdx+origNumAtoms);
// now use those pointers to get the atoms which will remain
// (we're going to connect these) and remove the original
// connection points from the molecule
Atom *tmpAtom;
RWMol::ADJ_ITER nbrIdx,endNbrs;
boost::tie(nbrIdx,endNbrs) = mol->getAtomNeighbors(molConnAtom);
// we are assuming that the first neighbor is the correct one.
// Really we should be able to assume that there is only a single
// attachment point.
tmpAtom = molConnAtom;
molConnAtom = mol->getAtomWithIdx(*nbrIdx);
mol->removeAtom(tmpAtom);
// repeat that process for the sidechain:
boost::tie(nbrIdx,endNbrs) = mol->getAtomNeighbors(sidechainConnAtom);
tmpAtom = sidechainConnAtom;
sidechainConnAtom = mol->getAtomWithIdx(*nbrIdx);
mol->removeAtom(tmpAtom);
// finally connect the remaining atoms:
mol->addBond(molConnAtom,sidechainConnAtom,bondType);
}
// used as a starting point for connection bookmarks
const int CONNECT_BOOKMARK_START=0x23424223;
// ------------------------------------------------------------------
//
// Loop through all the atoms and bookmark the attachment points
//
// attachment points are assumed to have one of these properties:
// - common_properties::molFileAlias (set by the mol file parser)
// - common_properties::dummyLabel (set by the SMILES parser)
// frontMarker is the "recognition character" to be used to pick
// valid labels. e.g. if the frontMarker is 'X', a label beginning
// with 'Y' will not be marked.
//
// In addition to bookmarking the attachment points, we also set
// the common_properties::maxAttachIdx property, which holds an integer with the
// maximum attachment bookmark index. This is used in the
// enumeration to prevent us from having to scan through all the
// molecule's bookmarks
//
//
// ------------------------------------------------------------------
void markAttachmentPoints(RWMOL_SPTR mol,char frontMarker){
markAttachmentPoints(mol.get(),frontMarker);
}
void markAttachmentPoints(RWMol *mol,char frontMarker){
PRECONDITION(mol,"bad molecule");
RWMol::AtomIterator atomIt;
int maxAttachIdx=0;
// scan through the atoms and mark those that have aliases
// (these might be attachment points)
for(atomIt=mol->beginAtoms();atomIt!=mol->endAtoms();atomIt++){
// start by finding possible attachment point properties:
std::string attachLabel="";
if((*atomIt)->hasProp(common_properties::molFileAlias)){
(*atomIt)->getProp(common_properties::molFileAlias,attachLabel);
} else if((*atomIt)->hasProp(common_properties::dummyLabel)){
(*atomIt)->getProp(common_properties::dummyLabel,attachLabel);
}
// if we got one and it starts with the appropriate front
// marker, proceed:
if(attachLabel!="" && attachLabel[0]==frontMarker ){
// to avoid trouble later, we guarantee that the attachment
// point has degree 1 (one bond to it).
if((*atomIt)->getDegree()>1)
throw EnumException("More than one bond to an attachment point.");
int offset = CONNECT_BOOKMARK_START;
if(attachLabel.length()>1){
if(attachLabel[1]>='a' && attachLabel[1]<='z'){
offset += (int)attachLabel[1] - (int)'a';
}
}
mol->setAtomBookmark(*atomIt,offset);
if(offset>maxAttachIdx) maxAttachIdx=offset;
}
}
if(maxAttachIdx){
mol->setProp(common_properties::maxAttachIdx,maxAttachIdx);
}
}
// ------------------------------------------------------------------
//
// loops through the sidechain molecules and calls
// _markAttachmentPoints()_ on each.
//
// ------------------------------------------------------------------
void prepareSidechains(RWMOL_SPTR_VECT *sidechains,char frontMarker){
PRECONDITION(sidechains,"bad sidechain list");
RWMOL_SPTR_VECT::iterator mpvI;
for(mpvI=sidechains->begin();mpvI!=sidechains->end();mpvI++){
markAttachmentPoints(*mpvI,frontMarker);
}
}
// ------------------------------------------------------------------
//
// Enumerates the library around a template and returns the result.
//
//
// ------------------------------------------------------------------
RWMOL_SPTR_VECT enumerateLibrary(RWMol *templateMol,
VECT_RWMOL_SPTR_VECT &sidechains,
bool orientSidechains){
PRECONDITION(templateMol,"bad molecule");
RWMOL_SPTR_VECT res,tmp;
res.push_back(RWMOL_SPTR(new RWMol(*templateMol)));
// if there's no attachment point on the molecule or no
// sidechains, return now:
if(!templateMol->hasProp(common_properties::maxAttachIdx) || sidechains.size()==0 )
return res;
int maxIdx;
templateMol->getProp(common_properties::maxAttachIdx,maxIdx);
tmp.clear();
// loop over the sidechains and attach them
for(unsigned int i=0;i<sidechains.size();i++){
int tgtMark=CONNECT_BOOKMARK_START+i;
// here's another boundary condition
if(tgtMark>maxIdx) break;
/// loop over all atoms with the appropriate mark
// This means that if a mol has two attachment points with the
// same name (e.g. two Xa's) they'll always have the same
// sidechain attached to them. This is a feature.
RWMOL_SPTR_VECT::iterator sidechainIt;
for(sidechainIt=sidechains[i].begin();
sidechainIt!=sidechains[i].end();
sidechainIt++){
// we've got our sidechain, find the atom it attaches from
if( (*sidechainIt)->hasAtomBookmark(CONNECT_BOOKMARK_START) ){
//
// NOTE: If there's more than one marked atom in the sidechain,
/// we'll only use the first for the moment.
//
int sidechainAtomIdx = (*sidechainIt)->getAtomWithBookmark(CONNECT_BOOKMARK_START)->getIdx();
// now add the sidechain to each molecule
RWMOL_SPTR_VECT::iterator templMolIt;
// loop over all the mols we've generated to this point
for(templMolIt=res.begin();templMolIt!=res.end();templMolIt++){
RWMol *templ = new RWMol(**templMolIt);
std::string name,tmpStr;
if(templ->hasProp(common_properties::_Name)){
templ->getProp(common_properties::_Name,tmpStr);
name = name + " " + tmpStr;
}
while(templ->hasAtomBookmark(tgtMark)){
// this is the atom we'll be replacing in the template
Atom *at = templ->getAtomWithBookmark(tgtMark);
// copy and transform the sidechain:
RWMol *sidechain;
if(orientSidechains){
sidechain = new RWMol(*(sidechainIt->get()));
orientSidechain(templ,sidechain,
at->getIdx(),sidechainAtomIdx);
} else {
sidechain = sidechainIt->get();
}
// FIX: need to use the actual bond order here:
molAddSidechain(templ,sidechain,
at->getIdx(),sidechainAtomIdx,
Bond::SINGLE);
if(sidechain->hasProp(common_properties::_Name)){
sidechain->getProp(common_properties::_Name,tmpStr);
name = name + " " + tmpStr;
}
templ->clearAtomBookmark(tgtMark,at);
if(orientSidechains){
delete sidechain;
}
}
//std::cout << templ << "> " << MolToSmiles(*templ) << std::endl;
if(name != "") templ->setProp(common_properties::_Name,name);
tmp.push_back(RWMOL_SPTR(templ));
}
}
}
//
// if we just made any molecules, free up the memory used by the
// existing result set and move the molecules we just generated
// over
if(tmp.size()){
// if we just made any molecules, free up the memory used by the
// existing result set and move the molecules we just generated
// over
if (tmp.size()) {
#if 0
RWMOL_SPTR_VECT::iterator tmpMolIt;
for(tmpMolIt=res.begin();tmpMolIt!=res.end();tmpMolIt++){
delete *tmpMolIt;
}
#endif
res = tmp;
tmp.clear();
}
res = tmp;
tmp.clear();
}
return res;
}
return res;
}
// ------------------------------------------------------------------
//
// Reads a template and library of sidechains from input files.
// the template file should be a mol file and the sidechain files
// SD files
//
// ------------------------------------------------------------------
RWMOL_SPTR_VECT enumFromFiles(const char *templateName,
std::vector<const char *> &sidechainNames) {
PRECONDITION(templateName, "bad template file name passed in");
// build and mark the template molecule
RWMol *templ = MolFileToMol(templateName, false);
if (!templ) throw EnumException("could not construct template molecule");
markAttachmentPoints(templ, 'X');
// now build and mark each set of sidechains:
RWMOL_SPTR_VECT sidechains;
VECT_RWMOL_SPTR_VECT allSidechains;
for (std::vector<const char *>::const_iterator i = sidechainNames.begin();
i != sidechainNames.end(); i++) {
sidechains = SDFileToMols(*i, false);
if (!sidechains.size()) {
std::string err = "no sidechains read from file: ";
err += *i;
throw EnumException(err.c_str());
}
prepareSidechains(&sidechains, 'X');
allSidechains.push_back(sidechains);
}
// ------------------------------------------------------------------
// enumerate the library:
RWMOL_SPTR_VECT library = enumerateLibrary(templ, allSidechains);
//--------------------------
//
// Reads a template and library of sidechains from input files.
// the template file should be a mol file and the sidechain files
// SD files
// Clean up the molecules and sidechains we constructed along the
// way.
//
// ------------------------------------------------------------------
RWMOL_SPTR_VECT enumFromFiles(const char *templateName,
std::vector<const char *> &sidechainNames){
PRECONDITION(templateName,"bad template file name passed in");
// build and mark the template molecule
RWMol *templ = MolFileToMol(templateName,false);
if(!templ) throw EnumException("could not construct template molecule");
markAttachmentPoints(templ,'X');
// now build and mark each set of sidechains:
RWMOL_SPTR_VECT sidechains;
VECT_RWMOL_SPTR_VECT allSidechains;
for(std::vector<const char*>::const_iterator i=sidechainNames.begin();
i!=sidechainNames.end();i++){
sidechains = SDFileToMols(*i,false);
if(!sidechains.size()){
std::string err="no sidechains read from file: ";
err += *i;
throw EnumException(err.c_str());
}
prepareSidechains(&sidechains,'X');
allSidechains.push_back(sidechains);
}
// enumerate the library:
RWMOL_SPTR_VECT library=enumerateLibrary(templ,allSidechains);
//--------------------------
//
// Clean up the molecules and sidechains we constructed along the
// way.
//
//--------------------------
delete templ;
//--------------------------
delete templ;
#if 0
VECT_RWMOL_SPTR_VECT::iterator vmpvI;
for(vmpvI=allSidechains.begin();vmpvI!=allSidechains.end();vmpvI++){
@@ -412,10 +415,9 @@ namespace TemplateEnum {
vmpvI->clear();
}
#endif
allSidechains.clear();
allSidechains.clear();
return library;
}
} // end of TemplateEnum namespace
return library;
}
} // end of TemplateEnum namespace

View File

@@ -13,273 +13,326 @@ using namespace TemplateEnum;
#include <math.h>
bool feq(RDGeom::Point3D p1,RDGeom::Point3D p2,double tol=1e-4){
return feq(p1.x,p2.x,tol)&&feq(p1.y,p2.y,tol)&&feq(p1.z,p2.z,tol);
bool feq(RDGeom::Point3D p1, RDGeom::Point3D p2, double tol = 1e-4) {
return feq(p1.x, p2.x, tol) && feq(p1.y, p2.y, tol) && feq(p1.z, p2.z, tol);
}
void test1(){
void test1() {
// single attachment point, small list
std::cout << " ----------> Test1 " << std::endl;
RWMOL_SPTR_VECT library;
std::vector<const char *>fileNames;
std::vector<const char *> fileNames;
fileNames.push_back("esters.2.sdf");
fileNames.push_back("esters.2.sdf");
library = enumFromFiles("template.1.mol",fileNames);
library = enumFromFiles("template.1.mol", fileNames);
CHECK_INVARIANT(library.size()==2,"");
CHECK_INVARIANT(library[0]->getNumAtoms()==10,"");
CHECK_INVARIANT(library[1]->getNumAtoms()==11,"");
CHECK_INVARIANT(library.size() == 2, "");
CHECK_INVARIANT(library[0]->getNumAtoms() == 10, "");
CHECK_INVARIANT(library[1]->getNumAtoms() == 11, "");
library.clear();
std::cout << " <---------- Done " << std::endl;
}
void test2(){
void test2() {
// single attachment point, larger list
std::cout << " ----------> Test2 " << std::endl;
RWMOL_SPTR_VECT library;
std::vector<const char *>fileNames;
std::vector<const char *> fileNames;
fileNames.push_back("esters.sdf");
fileNames.push_back("esters.sdf");
library = enumFromFiles("template.1.mol",fileNames);
library = enumFromFiles("template.1.mol", fileNames);
CHECK_INVARIANT(library.size()==6,"");
CHECK_INVARIANT(library[0]->getNumAtoms()==10,"");
CHECK_INVARIANT(library[1]->getNumAtoms()==11,"");
CHECK_INVARIANT(library[2]->getNumAtoms()==12,"");
CHECK_INVARIANT(library[3]->getNumAtoms()==12,"");
CHECK_INVARIANT(library[4]->getNumAtoms()==13,"");
CHECK_INVARIANT(library[5]->getNumAtoms()==13,"");
CHECK_INVARIANT(library.size() == 6, "");
CHECK_INVARIANT(library[0]->getNumAtoms() == 10, "");
CHECK_INVARIANT(library[1]->getNumAtoms() == 11, "");
CHECK_INVARIANT(library[2]->getNumAtoms() == 12, "");
CHECK_INVARIANT(library[3]->getNumAtoms() == 12, "");
CHECK_INVARIANT(library[4]->getNumAtoms() == 13, "");
CHECK_INVARIANT(library[5]->getNumAtoms() == 13, "");
library.clear();
std::cout << " <---------- Done " << std::endl;
}
void test3(){
void test3() {
// two attachment points, small list
std::cout << " ----------> Test3 " << std::endl;
RWMOL_SPTR_VECT library;
std::vector<const char *>fileNames;
std::vector<const char *> fileNames;
fileNames.push_back("esters.2.sdf");
fileNames.push_back("esters.2.sdf");
library = enumFromFiles("template.mol",fileNames);
library = enumFromFiles("template.mol", fileNames);
CHECK_INVARIANT(library.size()==4,"");
CHECK_INVARIANT(library[0]->getNumAtoms()==14,"");
CHECK_INVARIANT(library[1]->getNumAtoms()==15,"");
CHECK_INVARIANT(library[2]->getNumAtoms()==15,"");
CHECK_INVARIANT(library[3]->getNumAtoms()==16,"");
CHECK_INVARIANT(library.size() == 4, "");
CHECK_INVARIANT(library[0]->getNumAtoms() == 14, "");
CHECK_INVARIANT(library[1]->getNumAtoms() == 15, "");
CHECK_INVARIANT(library[2]->getNumAtoms() == 15, "");
CHECK_INVARIANT(library[3]->getNumAtoms() == 16, "");
library.clear();
std::cout << " <---------- Done " << std::endl;
}
void test4(){
void test4() {
// test templates that have repeated attachment points
std::cout << " ----------> Test4 " << std::endl;
RWMOL_SPTR_VECT library;
std::vector<const char *>fileNames;
std::vector<const char *> fileNames;
fileNames.push_back("esters.2.sdf");
fileNames.push_back("esters.2.sdf");
library = enumFromFiles("template.2.mol",fileNames);
library = enumFromFiles("template.2.mol", fileNames);
CHECK_INVARIANT(library.size()==2,"");
CHECK_INVARIANT(library.size() == 2, "");
library[0]->debugMol(std::cout);
std::cout << "smi0: " << MolToSmiles(*library[0],0) << std::endl;
std::cout << "smi1: " << MolToSmiles(*library[1],0) << std::endl;
CHECK_INVARIANT(library[0]->getNumAtoms()==14,"");
CHECK_INVARIANT(library[1]->getNumAtoms()==16,"");
std::cout << "smi0: " << MolToSmiles(*library[0], 0) << std::endl;
std::cout << "smi1: " << MolToSmiles(*library[1], 0) << std::endl;
CHECK_INVARIANT(library[0]->getNumAtoms() == 14, "");
CHECK_INVARIANT(library[1]->getNumAtoms() == 16, "");
library.clear();
std::cout << " <---------- Done " << std::endl;
}
void test5(){
void test5() {
// test working from SMILES
std::cout << " ----------> Test5 " << std::endl;
RWMol *m1 = SmilesToMol("[Xa]CC([Xb])CC",0,0);
CHECK_INVARIANT(m1,"");
markAttachmentPoints(m1,'X');
RWMol *m1 = SmilesToMol("[Xa]CC([Xb])CC", 0, 0);
CHECK_INVARIANT(m1, "");
markAttachmentPoints(m1, 'X');
RWMOL_SPTR_VECT sidechains;
sidechains.push_back(RWMOL_SPTR(SmilesToMol("[X]OC(=O)",0,0)));
sidechains.push_back(RWMOL_SPTR(SmilesToMol("[X]OC(=O)C",0,0)));
sidechains.push_back(RWMOL_SPTR(SmilesToMol("[X]OC(=O)CCC",0,0)));
prepareSidechains(&sidechains,'X');
sidechains.push_back(RWMOL_SPTR(SmilesToMol("[X]OC(=O)", 0, 0)));
sidechains.push_back(RWMOL_SPTR(SmilesToMol("[X]OC(=O)C", 0, 0)));
sidechains.push_back(RWMOL_SPTR(SmilesToMol("[X]OC(=O)CCC", 0, 0)));
prepareSidechains(&sidechains, 'X');
VECT_RWMOL_SPTR_VECT allSideChains;
allSideChains.push_back(sidechains);
allSideChains.push_back(sidechains);
RWMOL_SPTR_VECT library;
library = enumerateLibrary(m1,allSideChains,false);
library = enumerateLibrary(m1, allSideChains, false);
CHECK_INVARIANT(library.size()==9,"");
CHECK_INVARIANT(library[0]->getNumAtoms()==10,"");
CHECK_INVARIANT(library[1]->getNumAtoms()==11,"");
CHECK_INVARIANT(library[2]->getNumAtoms()==13,"");
CHECK_INVARIANT(library[3]->getNumAtoms()==11,"");
CHECK_INVARIANT(library[4]->getNumAtoms()==12,"");
CHECK_INVARIANT(library[5]->getNumAtoms()==14,"");
CHECK_INVARIANT(library[6]->getNumAtoms()==13,"");
CHECK_INVARIANT(library[7]->getNumAtoms()==14,"");
CHECK_INVARIANT(library[8]->getNumAtoms()==16,"");
CHECK_INVARIANT(library.size() == 9, "");
CHECK_INVARIANT(library[0]->getNumAtoms() == 10, "");
CHECK_INVARIANT(library[1]->getNumAtoms() == 11, "");
CHECK_INVARIANT(library[2]->getNumAtoms() == 13, "");
CHECK_INVARIANT(library[3]->getNumAtoms() == 11, "");
CHECK_INVARIANT(library[4]->getNumAtoms() == 12, "");
CHECK_INVARIANT(library[5]->getNumAtoms() == 14, "");
CHECK_INVARIANT(library[6]->getNumAtoms() == 13, "");
CHECK_INVARIANT(library[7]->getNumAtoms() == 14, "");
CHECK_INVARIANT(library[8]->getNumAtoms() == 16, "");
library.clear();
std::cout << " <---------- Done " << std::endl;
}
void test6(){
void test6() {
// test working from SMILES with no matches
std::cout << " ----------> Test6 " << std::endl;
RWMol *m1 = SmilesToMol("[Xa]CC([Xb])CC",0,0);
CHECK_INVARIANT(m1,"");
markAttachmentPoints(m1,'X');
RWMol *m1 = SmilesToMol("[Xa]CC([Xb])CC", 0, 0);
CHECK_INVARIANT(m1, "");
markAttachmentPoints(m1, 'X');
RWMOL_SPTR_VECT sidechains;
sidechains.push_back(RWMOL_SPTR(SmilesToMol("OC(=O)",0,0)));
sidechains.push_back(RWMOL_SPTR(SmilesToMol("OC(=O)C",0,0)));
sidechains.push_back(RWMOL_SPTR(SmilesToMol("OC(=O)CCC",0,0)));
prepareSidechains(&sidechains,'X');
sidechains.push_back(RWMOL_SPTR(SmilesToMol("OC(=O)", 0, 0)));
sidechains.push_back(RWMOL_SPTR(SmilesToMol("OC(=O)C", 0, 0)));
sidechains.push_back(RWMOL_SPTR(SmilesToMol("OC(=O)CCC", 0, 0)));
prepareSidechains(&sidechains, 'X');
VECT_RWMOL_SPTR_VECT allSideChains;
allSideChains.push_back(sidechains);
allSideChains.push_back(sidechains);
RWMOL_SPTR_VECT library;
library = enumerateLibrary(m1,allSideChains,false);
library = enumerateLibrary(m1, allSideChains, false);
CHECK_INVARIANT(library.size()==1,"");
CHECK_INVARIANT(library[0]->getNumAtoms()==6,"");
CHECK_INVARIANT(library.size() == 1, "");
CHECK_INVARIANT(library[0]->getNumAtoms() == 6, "");
library.clear();
std::cout << " <---------- Done " << std::endl;
}
void test7(){
void test7() {
// test transforms
std::cout << " ----------> Test7 " << std::endl;
RWMOL_SPTR_VECT library;
RWMOL_SPTR mol;
Atom *at1,*at2;
Atom *at1, *at2;
int i;
std::vector<const char *>fileNames;
std::vector<const char *> fileNames;
fileNames.push_back("Ts.1.sdf");
library = enumFromFiles("box.1.mol",fileNames);
CHECK_INVARIANT(library.size()==1,"");
CHECK_INVARIANT(library[0]->getNumAtoms()==8,"");
library = enumFromFiles("box.1.mol", fileNames);
CHECK_INVARIANT(library.size() == 1, "");
CHECK_INVARIANT(library[0]->getNumAtoms() == 8, "");
mol = library[0];
at1=mol->getAtomWithIdx(0);
at2=mol->getAtomWithIdx(4);
CHECK_INVARIANT(feq(mol->getConformer().getAtomPos(at1->getIdx()).x,mol->getConformer().getAtomPos(at2->getIdx()).x),"");
CHECK_INVARIANT(mol->getConformer().getAtomPos(at1->getIdx()).y-mol->getConformer().getAtomPos(at2->getIdx()).y==-1.0,"");
at1 = mol->getAtomWithIdx(0);
at2 = mol->getAtomWithIdx(4);
CHECK_INVARIANT(feq(mol->getConformer().getAtomPos(at1->getIdx()).x,
mol->getConformer().getAtomPos(at2->getIdx()).x),
"");
CHECK_INVARIANT(mol->getConformer().getAtomPos(at1->getIdx()).y -
mol->getConformer().getAtomPos(at2->getIdx()).y ==
-1.0,
"");
library.clear();
// try another orientation of the sidechain molecule:
fileNames.clear();
fileNames.push_back("Ts.4.sdf");
library = enumFromFiles("box.1.mol",fileNames);
CHECK_INVARIANT(library.size()==1,"");
CHECK_INVARIANT(library[0]->getNumAtoms()==8,"");
library = enumFromFiles("box.1.mol", fileNames);
CHECK_INVARIANT(library.size() == 1, "");
CHECK_INVARIANT(library[0]->getNumAtoms() == 8, "");
mol = library[0];
std::cout << MolToMolBlock(*mol);
std::cout << std::endl;
at1=mol->getAtomWithIdx(0);
at2=mol->getAtomWithIdx(7);
TEST_ASSERT(feq(mol->getConformer().getAtomPos(7),RDGeom::Point3D(-.5,2.5,0.0)));
at1 = mol->getAtomWithIdx(0);
at2 = mol->getAtomWithIdx(7);
TEST_ASSERT(
feq(mol->getConformer().getAtomPos(7), RDGeom::Point3D(-.5, 2.5, 0.0)));
library.clear();
// now use an SD file that has the same molecule in different
// orientations as sidechains:
fileNames.clear();
fileNames.push_back("Ts.sdf");
library = enumFromFiles("box.1.mol",fileNames);
CHECK_INVARIANT(library.size()==4,"");
library = enumFromFiles("box.1.mol", fileNames);
CHECK_INVARIANT(library.size() == 4, "");
CHECK_INVARIANT(library[0]->getNumAtoms()==8,"");
CHECK_INVARIANT(library[1]->getNumAtoms()==8,"");
CHECK_INVARIANT(library[2]->getNumAtoms()==8,"");
CHECK_INVARIANT(library[3]->getNumAtoms()==8,"");
CHECK_INVARIANT(library[0]->getNumAtoms() == 8, "");
CHECK_INVARIANT(library[1]->getNumAtoms() == 8, "");
CHECK_INVARIANT(library[2]->getNumAtoms() == 8, "");
CHECK_INVARIANT(library[3]->getNumAtoms() == 8, "");
for(i=0;i<library.size();i++){
for (i = 0; i < library.size(); i++) {
std::cout << "------ Mol: " << i << "------" << std::endl;
mol = library[i];
std::cout << MolToMolBlock(*mol);
std::cout << std::endl;
}
for(i=0;i<library[0]->getNumAtoms();i++){
CHECK_INVARIANT(feq(library[0]->getConformer().getAtomPos(i).x,library[1]->getConformer().getAtomPos(i).x),"");
CHECK_INVARIANT(feq(library[0]->getConformer().getAtomPos(i).y,library[1]->getConformer().getAtomPos(i).y),"");
CHECK_INVARIANT(feq(library[0]->getConformer().getAtomPos(i).z,library[1]->getConformer().getAtomPos(i).z),"");
for (i = 0; i < library[0]->getNumAtoms(); i++) {
CHECK_INVARIANT(feq(library[0]->getConformer().getAtomPos(i).x,
library[1]->getConformer().getAtomPos(i).x),
"");
CHECK_INVARIANT(feq(library[0]->getConformer().getAtomPos(i).y,
library[1]->getConformer().getAtomPos(i).y),
"");
CHECK_INVARIANT(feq(library[0]->getConformer().getAtomPos(i).z,
library[1]->getConformer().getAtomPos(i).z),
"");
CHECK_INVARIANT(feq(library[0]->getConformer().getAtomPos(i).x,library[2]->getConformer().getAtomPos(i).x),"");
CHECK_INVARIANT(feq(library[0]->getConformer().getAtomPos(i).y,library[2]->getConformer().getAtomPos(i).y),"");
CHECK_INVARIANT(feq(library[0]->getConformer().getAtomPos(i).z,library[2]->getConformer().getAtomPos(i).z),"");
CHECK_INVARIANT(feq(library[0]->getConformer().getAtomPos(i).x,
library[2]->getConformer().getAtomPos(i).x),
"");
CHECK_INVARIANT(feq(library[0]->getConformer().getAtomPos(i).y,
library[2]->getConformer().getAtomPos(i).y),
"");
CHECK_INVARIANT(feq(library[0]->getConformer().getAtomPos(i).z,
library[2]->getConformer().getAtomPos(i).z),
"");
CHECK_INVARIANT(feq(library[0]->getConformer().getAtomPos(i),library[3]->getConformer().getAtomPos(i)),"");
CHECK_INVARIANT(feq(library[0]->getConformer().getAtomPos(i),
library[3]->getConformer().getAtomPos(i)),
"");
}
library.clear();
// move the attachment point on the template. This should
// make no difference.
library = enumFromFiles("box.1a.mol",fileNames);
CHECK_INVARIANT(library.size()==4,"");
library = enumFromFiles("box.1a.mol", fileNames);
CHECK_INVARIANT(library.size() == 4, "");
CHECK_INVARIANT(library[0]->getNumAtoms()==8,"");
CHECK_INVARIANT(library[1]->getNumAtoms()==8,"");
CHECK_INVARIANT(library[2]->getNumAtoms()==8,"");
CHECK_INVARIANT(library[3]->getNumAtoms()==8,"");
CHECK_INVARIANT(library[0]->getNumAtoms() == 8, "");
CHECK_INVARIANT(library[1]->getNumAtoms() == 8, "");
CHECK_INVARIANT(library[2]->getNumAtoms() == 8, "");
CHECK_INVARIANT(library[3]->getNumAtoms() == 8, "");
for(i=0;i<library[0]->getNumAtoms();i++){
CHECK_INVARIANT(feq(library[0]->getConformer().getAtomPos(i).x,library[1]->getConformer().getAtomPos(i).x),"");
CHECK_INVARIANT(feq(library[0]->getConformer().getAtomPos(i).y,library[1]->getConformer().getAtomPos(i).y),"");
CHECK_INVARIANT(feq(library[0]->getConformer().getAtomPos(i).z,library[1]->getConformer().getAtomPos(i).z),"");
for (i = 0; i < library[0]->getNumAtoms(); i++) {
CHECK_INVARIANT(feq(library[0]->getConformer().getAtomPos(i).x,
library[1]->getConformer().getAtomPos(i).x),
"");
CHECK_INVARIANT(feq(library[0]->getConformer().getAtomPos(i).y,
library[1]->getConformer().getAtomPos(i).y),
"");
CHECK_INVARIANT(feq(library[0]->getConformer().getAtomPos(i).z,
library[1]->getConformer().getAtomPos(i).z),
"");
CHECK_INVARIANT(feq(library[0]->getConformer().getAtomPos(i).x,library[2]->getConformer().getAtomPos(i).x),"");
CHECK_INVARIANT(feq(library[0]->getConformer().getAtomPos(i).y,library[2]->getConformer().getAtomPos(i).y),"");
CHECK_INVARIANT(feq(library[0]->getConformer().getAtomPos(i).z,library[2]->getConformer().getAtomPos(i).z),"");
CHECK_INVARIANT(feq(library[0]->getConformer().getAtomPos(i).x,
library[2]->getConformer().getAtomPos(i).x),
"");
CHECK_INVARIANT(feq(library[0]->getConformer().getAtomPos(i).y,
library[2]->getConformer().getAtomPos(i).y),
"");
CHECK_INVARIANT(feq(library[0]->getConformer().getAtomPos(i).z,
library[2]->getConformer().getAtomPos(i).z),
"");
//std::cout << i << "\t" << library[0]->getConformer().getAtomPos(i) << std::endl;
//std::cout << "\t" << library[3]->getConformer().getAtomPos(i) << std::endl;
CHECK_INVARIANT(feq(library[0]->getConformer().getAtomPos(i).x,library[3]->getConformer().getAtomPos(i).x),"");
CHECK_INVARIANT(feq(library[0]->getConformer().getAtomPos(i).y,library[3]->getConformer().getAtomPos(i).y),"");
CHECK_INVARIANT(feq(library[0]->getConformer().getAtomPos(i).z,library[3]->getConformer().getAtomPos(i).z),"");
// std::cout << i << "\t" << library[0]->getConformer().getAtomPos(i) <<
// std::endl;
// std::cout << "\t" << library[3]->getConformer().getAtomPos(i) <<
// std::endl;
CHECK_INVARIANT(feq(library[0]->getConformer().getAtomPos(i).x,
library[3]->getConformer().getAtomPos(i).x),
"");
CHECK_INVARIANT(feq(library[0]->getConformer().getAtomPos(i).y,
library[3]->getConformer().getAtomPos(i).y),
"");
CHECK_INVARIANT(feq(library[0]->getConformer().getAtomPos(i).z,
library[3]->getConformer().getAtomPos(i).z),
"");
}
library.clear();
// move the attachment point on the template. This should
// make no difference.
library = enumFromFiles("box.mol",fileNames);
CHECK_INVARIANT(library.size()==4,"");
library = enumFromFiles("box.mol", fileNames);
CHECK_INVARIANT(library.size() == 4, "");
CHECK_INVARIANT(library[0]->getNumAtoms()==20,"");
CHECK_INVARIANT(library[1]->getNumAtoms()==20,"");
CHECK_INVARIANT(library[2]->getNumAtoms()==20,"");
CHECK_INVARIANT(library[3]->getNumAtoms()==20,"");
CHECK_INVARIANT(library[0]->getNumAtoms() == 20, "");
CHECK_INVARIANT(library[1]->getNumAtoms() == 20, "");
CHECK_INVARIANT(library[2]->getNumAtoms() == 20, "");
CHECK_INVARIANT(library[3]->getNumAtoms() == 20, "");
for(i=0;i<library[0]->getNumAtoms();i++){
for (i = 0; i < library[0]->getNumAtoms(); i++) {
at1 = library[0]->getAtomWithIdx(i);
CHECK_INVARIANT(feq(library[0]->getConformer().getAtomPos(i).x,library[1]->getConformer().getAtomPos(i).x),"");
CHECK_INVARIANT(feq(library[0]->getConformer().getAtomPos(i).y,library[1]->getConformer().getAtomPos(i).y),"");
CHECK_INVARIANT(feq(library[0]->getConformer().getAtomPos(i).z,library[1]->getConformer().getAtomPos(i).z),"");
CHECK_INVARIANT(feq(library[0]->getConformer().getAtomPos(i).x,
library[1]->getConformer().getAtomPos(i).x),
"");
CHECK_INVARIANT(feq(library[0]->getConformer().getAtomPos(i).y,
library[1]->getConformer().getAtomPos(i).y),
"");
CHECK_INVARIANT(feq(library[0]->getConformer().getAtomPos(i).z,
library[1]->getConformer().getAtomPos(i).z),
"");
CHECK_INVARIANT(feq(library[0]->getConformer().getAtomPos(i).x,library[2]->getConformer().getAtomPos(i).x),"");
CHECK_INVARIANT(feq(library[0]->getConformer().getAtomPos(i).y,library[2]->getConformer().getAtomPos(i).y),"");
CHECK_INVARIANT(feq(library[0]->getConformer().getAtomPos(i).z,library[2]->getConformer().getAtomPos(i).z),"");
CHECK_INVARIANT(feq(library[0]->getConformer().getAtomPos(i).x,
library[2]->getConformer().getAtomPos(i).x),
"");
CHECK_INVARIANT(feq(library[0]->getConformer().getAtomPos(i).y,
library[2]->getConformer().getAtomPos(i).y),
"");
CHECK_INVARIANT(feq(library[0]->getConformer().getAtomPos(i).z,
library[2]->getConformer().getAtomPos(i).z),
"");
CHECK_INVARIANT(feq(library[0]->getConformer().getAtomPos(i).x,library[3]->getConformer().getAtomPos(i).x),"");
CHECK_INVARIANT(feq(library[0]->getConformer().getAtomPos(i).y,library[3]->getConformer().getAtomPos(i).y),"");
CHECK_INVARIANT(feq(library[0]->getConformer().getAtomPos(i).z,library[3]->getConformer().getAtomPos(i).z),"");
CHECK_INVARIANT(feq(library[0]->getConformer().getAtomPos(i).x,
library[3]->getConformer().getAtomPos(i).x),
"");
CHECK_INVARIANT(feq(library[0]->getConformer().getAtomPos(i).y,
library[3]->getConformer().getAtomPos(i).y),
"");
CHECK_INVARIANT(feq(library[0]->getConformer().getAtomPos(i).z,
library[3]->getConformer().getAtomPos(i).z),
"");
}
library.clear();
std::cout << " <---------- Done " << std::endl;
}
@@ -414,15 +467,15 @@ void testCoords(){
}
#endif
int main(){
int main() {
#if 1
test1();
test2();
test3();
//test4();
// test4();
test5();
test6();
#endif
test7();
//testCoords();
// testCoords();
}

View File

@@ -11,32 +11,32 @@ using namespace TemplateEnum;
#include <math.h>
bool feq(double v1,double v2,double tol=1e-4){
return fabs(v1-v2)<=tol;
bool feq(double v1, double v2, double tol = 1e-4) {
return fabs(v1 - v2) <= tol;
}
bool feq(RDGeom::Point3D p1,RDGeom::Point3D p2,double tol=1e-4){
return feq(p1.x,p2.x,tol)&&feq(p1.y,p2.y,tol)&&feq(p1.z,p2.z,tol);
bool feq(RDGeom::Point3D p1, RDGeom::Point3D p2, double tol = 1e-4) {
return feq(p1.x, p2.x, tol) && feq(p1.y, p2.y, tol) && feq(p1.z, p2.z, tol);
}
int main(int argc,const char *argv[]){
if(argc<3){
std::cerr << " Usage: simple.exe <template.mol> [attach1.sdf, attach2.sdf, ...]" << std::endl;
int main(int argc, const char *argv[]) {
if (argc < 3) {
std::cerr
<< " Usage: simple.exe <template.mol> [attach1.sdf, attach2.sdf, ...]"
<< std::endl;
exit(-1);
}
std::vector<const char *>fileNames;
for(int i=2;i<argc;i++){
fileNames.push_back( argv[i] );
std::vector<const char *> fileNames;
for (int i = 2; i < argc; i++) {
fileNames.push_back(argv[i]);
}
RWMOL_SPTR_VECT library=enumFromFiles(argv[1],fileNames);
RWMOL_SPTR_VECT library = enumFromFiles(argv[1], fileNames);
std::cerr << "Created: " << library.size() << " compounds." << std::endl;
for(int i=0;i<library.size();i++){
for (int i = 0; i < library.size(); i++) {
RWMOL_SPTR mol = library[i];
std::cout << MolToMolBlock(mol.get(),false);
std::cout << MolToMolBlock(mol.get(), false);
std::cout << "$$$$" << std::endl;
}
exit(0);
exit(0);
}

View File

@@ -5,7 +5,6 @@
#ifndef __gen_IRDKit_h__
#define __gen_IRDKit_h__
#ifndef __gen_nsISupports_h__
#include "nsISupports.h"
#endif
@@ -18,13 +17,15 @@
/* starting interface: IRDMolecule */
#define IRDMOLECULE_IID_STR "2fd55049-0125-48be-88e6-270b1b83a8a8"
#define IRDMOLECULE_IID \
{0x2fd55049, 0x0125, 0x48be, \
{ 0x88, 0xe6, 0x27, 0x0b, 0x1b, 0x83, 0xa8, 0xa8 }}
#define IRDMOLECULE_IID \
{ \
0x2fd55049, 0x0125, 0x48be, { \
0x88, 0xe6, 0x27, 0x0b, 0x1b, 0x83, 0xa8, 0xa8 \
} \
}
class NS_NO_VTABLE IRDMolecule : public nsISupports {
public:
public:
NS_DEFINE_STATIC_IID_ACCESSOR(IRDMOLECULE_IID)
/* double GetMW (); */
@@ -47,38 +48,56 @@ class NS_NO_VTABLE IRDMolecule : public nsISupports {
/* void Generate3DCoords (); */
NS_IMETHOD Generate3DCoords(void) = 0;
};
/* Use this macro when declaring classes that implement this interface. */
#define NS_DECL_IRDMOLECULE \
NS_IMETHOD GetMW(double *_retval); \
NS_IMETHOD GetSmiles(char **_retval); \
NS_IMETHOD GetMolBlock(char **_retval); \
#define NS_DECL_IRDMOLECULE \
NS_IMETHOD GetMW(double *_retval); \
NS_IMETHOD GetSmiles(char **_retval); \
NS_IMETHOD GetMolBlock(char **_retval); \
NS_IMETHOD GetSmartsMatchCount(const char *smarts, PRUint32 *_retval); \
NS_IMETHOD LogP(double *_retval); \
NS_IMETHOD MR(double *_retval); \
NS_IMETHOD Generate3DCoords(void);
NS_IMETHOD LogP(double *_retval); \
NS_IMETHOD MR(double *_retval); \
NS_IMETHOD Generate3DCoords(void);
/* Use this macro to declare functions that forward the behavior of this interface to another object. */
#define NS_FORWARD_IRDMOLECULE(_to) \
NS_IMETHOD GetMW(double *_retval) { return _to GetMW(_retval); } \
NS_IMETHOD GetSmiles(char **_retval) { return _to GetSmiles(_retval); } \
/* Use this macro to declare functions that forward the behavior of this
* interface to another object. */
#define NS_FORWARD_IRDMOLECULE(_to) \
NS_IMETHOD GetMW(double *_retval) { return _to GetMW(_retval); } \
NS_IMETHOD GetSmiles(char **_retval) { return _to GetSmiles(_retval); } \
NS_IMETHOD GetMolBlock(char **_retval) { return _to GetMolBlock(_retval); } \
NS_IMETHOD GetSmartsMatchCount(const char *smarts, PRUint32 *_retval) { return _to GetSmartsMatchCount(smarts, _retval); } \
NS_IMETHOD LogP(double *_retval) { return _to LogP(_retval); } \
NS_IMETHOD MR(double *_retval) { return _to MR(_retval); } \
NS_IMETHOD Generate3DCoords(void) { return _to Generate3DCoords(); }
NS_IMETHOD GetSmartsMatchCount(const char *smarts, PRUint32 *_retval) { \
return _to GetSmartsMatchCount(smarts, _retval); \
} \
NS_IMETHOD LogP(double *_retval) { return _to LogP(_retval); } \
NS_IMETHOD MR(double *_retval) { return _to MR(_retval); } \
NS_IMETHOD Generate3DCoords(void) { return _to Generate3DCoords(); }
/* Use this macro to declare functions that forward the behavior of this interface to another object in a safe way. */
#define NS_FORWARD_SAFE_IRDMOLECULE(_to) \
NS_IMETHOD GetMW(double *_retval) { return !_to ? NS_ERROR_NULL_POINTER : _to->GetMW(_retval); } \
NS_IMETHOD GetSmiles(char **_retval) { return !_to ? NS_ERROR_NULL_POINTER : _to->GetSmiles(_retval); } \
NS_IMETHOD GetMolBlock(char **_retval) { return !_to ? NS_ERROR_NULL_POINTER : _to->GetMolBlock(_retval); } \
NS_IMETHOD GetSmartsMatchCount(const char *smarts, PRUint32 *_retval) { return !_to ? NS_ERROR_NULL_POINTER : _to->GetSmartsMatchCount(smarts, _retval); } \
NS_IMETHOD LogP(double *_retval) { return !_to ? NS_ERROR_NULL_POINTER : _to->LogP(_retval); } \
NS_IMETHOD MR(double *_retval) { return !_to ? NS_ERROR_NULL_POINTER : _to->MR(_retval); } \
NS_IMETHOD Generate3DCoords(void) { return !_to ? NS_ERROR_NULL_POINTER : _to->Generate3DCoords(); }
/* Use this macro to declare functions that forward the behavior of this
* interface to another object in a safe way. */
#define NS_FORWARD_SAFE_IRDMOLECULE(_to) \
NS_IMETHOD GetMW(double *_retval) { \
return !_to ? NS_ERROR_NULL_POINTER : _to->GetMW(_retval); \
} \
NS_IMETHOD GetSmiles(char **_retval) { \
return !_to ? NS_ERROR_NULL_POINTER : _to->GetSmiles(_retval); \
} \
NS_IMETHOD GetMolBlock(char **_retval) { \
return !_to ? NS_ERROR_NULL_POINTER : _to->GetMolBlock(_retval); \
} \
NS_IMETHOD GetSmartsMatchCount(const char *smarts, PRUint32 *_retval) { \
return !_to ? NS_ERROR_NULL_POINTER \
: _to->GetSmartsMatchCount(smarts, _retval); \
} \
NS_IMETHOD LogP(double *_retval) { \
return !_to ? NS_ERROR_NULL_POINTER : _to->LogP(_retval); \
} \
NS_IMETHOD MR(double *_retval) { \
return !_to ? NS_ERROR_NULL_POINTER : _to->MR(_retval); \
} \
NS_IMETHOD Generate3DCoords(void) { \
return !_to ? NS_ERROR_NULL_POINTER : _to->Generate3DCoords(); \
}
#if 0
/* Use the code below as a template for the implementation class for this interface. */
@@ -157,17 +176,18 @@ NS_IMETHODIMP _MYCLASS_::Generate3DCoords()
/* End of implementation class template. */
#endif
/* starting interface: IRDMolSupplier */
#define IRDMOLSUPPLIER_IID_STR "056a8da1-7820-41d7-b254-5ef7dd1693ce"
#define IRDMOLSUPPLIER_IID \
{0x056a8da1, 0x7820, 0x41d7, \
{ 0xb2, 0x54, 0x5e, 0xf7, 0xdd, 0x16, 0x93, 0xce }}
#define IRDMOLSUPPLIER_IID \
{ \
0x056a8da1, 0x7820, 0x41d7, { \
0xb2, 0x54, 0x5e, 0xf7, 0xdd, 0x16, 0x93, 0xce \
} \
}
class NS_NO_VTABLE IRDMolSupplier : public nsISupports {
public:
public:
NS_DEFINE_STATIC_IID_ACCESSOR(IRDMOLSUPPLIER_IID)
/* boolean atEnd (); */
@@ -175,23 +195,28 @@ class NS_NO_VTABLE IRDMolSupplier : public nsISupports {
/* IRDMolecule next (); */
NS_IMETHOD Next(IRDMolecule **_retval) = 0;
};
/* Use this macro when declaring classes that implement this interface. */
#define NS_DECL_IRDMOLSUPPLIER \
#define NS_DECL_IRDMOLSUPPLIER \
NS_IMETHOD AtEnd(PRBool *_retval); \
NS_IMETHOD Next(IRDMolecule **_retval);
NS_IMETHOD Next(IRDMolecule **_retval);
/* Use this macro to declare functions that forward the behavior of this interface to another object. */
#define NS_FORWARD_IRDMOLSUPPLIER(_to) \
/* Use this macro to declare functions that forward the behavior of this
* interface to another object. */
#define NS_FORWARD_IRDMOLSUPPLIER(_to) \
NS_IMETHOD AtEnd(PRBool *_retval) { return _to AtEnd(_retval); } \
NS_IMETHOD Next(IRDMolecule **_retval) { return _to Next(_retval); }
NS_IMETHOD Next(IRDMolecule **_retval) { return _to Next(_retval); }
/* Use this macro to declare functions that forward the behavior of this interface to another object in a safe way. */
#define NS_FORWARD_SAFE_IRDMOLSUPPLIER(_to) \
NS_IMETHOD AtEnd(PRBool *_retval) { return !_to ? NS_ERROR_NULL_POINTER : _to->AtEnd(_retval); } \
NS_IMETHOD Next(IRDMolecule **_retval) { return !_to ? NS_ERROR_NULL_POINTER : _to->Next(_retval); }
/* Use this macro to declare functions that forward the behavior of this
* interface to another object in a safe way. */
#define NS_FORWARD_SAFE_IRDMOLSUPPLIER(_to) \
NS_IMETHOD AtEnd(PRBool *_retval) { \
return !_to ? NS_ERROR_NULL_POINTER : _to->AtEnd(_retval); \
} \
NS_IMETHOD Next(IRDMolecule **_retval) { \
return !_to ? NS_ERROR_NULL_POINTER : _to->Next(_retval); \
}
#if 0
/* Use the code below as a template for the implementation class for this interface. */
@@ -240,17 +265,18 @@ NS_IMETHODIMP _MYCLASS_::Next(IRDMolecule **_retval)
/* End of implementation class template. */
#endif
/* starting interface: IRDKit */
#define IRDKIT_IID_STR "bfb9acf3-9349-47ec-8984-f6f8e2f02f65"
#define IRDKIT_IID \
{0xbfb9acf3, 0x9349, 0x47ec, \
{ 0x89, 0x84, 0xf6, 0xf8, 0xe2, 0xf0, 0x2f, 0x65 }}
#define IRDKIT_IID \
{ \
0xbfb9acf3, 0x9349, 0x47ec, { \
0x89, 0x84, 0xf6, 0xf8, 0xe2, 0xf0, 0x2f, 0x65 \
} \
}
class NS_NO_VTABLE IRDKit : public nsISupports {
public:
public:
NS_DEFINE_STATIC_IID_ACCESSOR(IRDKIT_IID)
/* unsigned long strlen (in string arg); */
@@ -263,30 +289,52 @@ class NS_NO_VTABLE IRDKit : public nsISupports {
NS_IMETHOD MolFromMolBlock(const char *molBlock, IRDMolecule **_retval) = 0;
/* IRDMolSupplier SupplierFromSDFile (in string fileName); */
NS_IMETHOD SupplierFromSDFile(const char *fileName, IRDMolSupplier **_retval) = 0;
NS_IMETHOD SupplierFromSDFile(const char *fileName,
IRDMolSupplier **_retval) = 0;
};
/* Use this macro when declaring classes that implement this interface. */
#define NS_DECL_IRDKIT \
NS_IMETHOD Strlen(const char *arg, PRUint32 *_retval); \
NS_IMETHOD MolFromSmiles(const char *smiles, IRDMolecule **_retval); \
#define NS_DECL_IRDKIT \
NS_IMETHOD Strlen(const char *arg, PRUint32 *_retval); \
NS_IMETHOD MolFromSmiles(const char *smiles, IRDMolecule **_retval); \
NS_IMETHOD MolFromMolBlock(const char *molBlock, IRDMolecule **_retval); \
NS_IMETHOD SupplierFromSDFile(const char *fileName, IRDMolSupplier **_retval);
NS_IMETHOD SupplierFromSDFile(const char *fileName, IRDMolSupplier **_retval);
/* Use this macro to declare functions that forward the behavior of this interface to another object. */
#define NS_FORWARD_IRDKIT(_to) \
NS_IMETHOD Strlen(const char *arg, PRUint32 *_retval) { return _to Strlen(arg, _retval); } \
NS_IMETHOD MolFromSmiles(const char *smiles, IRDMolecule **_retval) { return _to MolFromSmiles(smiles, _retval); } \
NS_IMETHOD MolFromMolBlock(const char *molBlock, IRDMolecule **_retval) { return _to MolFromMolBlock(molBlock, _retval); } \
NS_IMETHOD SupplierFromSDFile(const char *fileName, IRDMolSupplier **_retval) { return _to SupplierFromSDFile(fileName, _retval); }
/* Use this macro to declare functions that forward the behavior of this
* interface to another object. */
#define NS_FORWARD_IRDKIT(_to) \
NS_IMETHOD Strlen(const char *arg, PRUint32 *_retval) { \
return _to Strlen(arg, _retval); \
} \
NS_IMETHOD MolFromSmiles(const char *smiles, IRDMolecule **_retval) { \
return _to MolFromSmiles(smiles, _retval); \
} \
NS_IMETHOD MolFromMolBlock(const char *molBlock, IRDMolecule **_retval) { \
return _to MolFromMolBlock(molBlock, _retval); \
} \
NS_IMETHOD SupplierFromSDFile(const char *fileName, \
IRDMolSupplier **_retval) { \
return _to SupplierFromSDFile(fileName, _retval); \
}
/* Use this macro to declare functions that forward the behavior of this interface to another object in a safe way. */
#define NS_FORWARD_SAFE_IRDKIT(_to) \
NS_IMETHOD Strlen(const char *arg, PRUint32 *_retval) { return !_to ? NS_ERROR_NULL_POINTER : _to->Strlen(arg, _retval); } \
NS_IMETHOD MolFromSmiles(const char *smiles, IRDMolecule **_retval) { return !_to ? NS_ERROR_NULL_POINTER : _to->MolFromSmiles(smiles, _retval); } \
NS_IMETHOD MolFromMolBlock(const char *molBlock, IRDMolecule **_retval) { return !_to ? NS_ERROR_NULL_POINTER : _to->MolFromMolBlock(molBlock, _retval); } \
NS_IMETHOD SupplierFromSDFile(const char *fileName, IRDMolSupplier **_retval) { return !_to ? NS_ERROR_NULL_POINTER : _to->SupplierFromSDFile(fileName, _retval); }
/* Use this macro to declare functions that forward the behavior of this
* interface to another object in a safe way. */
#define NS_FORWARD_SAFE_IRDKIT(_to) \
NS_IMETHOD Strlen(const char *arg, PRUint32 *_retval) { \
return !_to ? NS_ERROR_NULL_POINTER : _to->Strlen(arg, _retval); \
} \
NS_IMETHOD MolFromSmiles(const char *smiles, IRDMolecule **_retval) { \
return !_to ? NS_ERROR_NULL_POINTER : _to->MolFromSmiles(smiles, _retval); \
} \
NS_IMETHOD MolFromMolBlock(const char *molBlock, IRDMolecule **_retval) { \
return !_to ? NS_ERROR_NULL_POINTER \
: _to->MolFromMolBlock(molBlock, _retval); \
} \
NS_IMETHOD SupplierFromSDFile(const char *fileName, \
IRDMolSupplier **_retval) { \
return !_to ? NS_ERROR_NULL_POINTER \
: _to->SupplierFromSDFile(fileName, _retval); \
}
#if 0
/* Use the code below as a template for the implementation class for this interface. */
@@ -347,5 +395,4 @@ NS_IMETHODIMP _MYCLASS_::SupplierFromSDFile(const char *fileName, IRDMolSupplier
/* End of implementation class template. */
#endif
#endif /* __gen_IRDKit_h__ */

View File

@@ -8,27 +8,14 @@ NS_GENERIC_FACTORY_CONSTRUCTOR(RDKitImpl);
NS_GENERIC_FACTORY_CONSTRUCTOR(RDMolecule);
NS_GENERIC_FACTORY_CONSTRUCTOR(RDMolSupplier);
static const nsModuleComponentInfo components[] =
{
{ "RDKit Interface",
IRDKIT_IID,
"@rationaldiscovery.com/RDKit/base",
RDKitImplConstructor
},
{ "RDKit Molecule Interface",
IRDMOLECULE_IID,
"@rationaldiscovery.com/RDKit/molecule",
RDMoleculeConstructor
},
{ "RDKit Molecule Supplier Interface",
IRDMOLSUPPLIER_IID,
"@rationaldiscovery.com/RDKit/molsupplier",
RDMolSupplierConstructor
},
};
static const nsModuleComponentInfo components[] = {
{"RDKit Interface", IRDKIT_IID, "@rationaldiscovery.com/RDKit/base",
RDKitImplConstructor},
{"RDKit Molecule Interface", IRDMOLECULE_IID,
"@rationaldiscovery.com/RDKit/molecule", RDMoleculeConstructor},
{"RDKit Molecule Supplier Interface", IRDMOLSUPPLIER_IID,
"@rationaldiscovery.com/RDKit/molsupplier", RDMolSupplierConstructor},
};
NS_IMPL_NSGETMODULE(nsRDKitModule, components);

View File

@@ -10,34 +10,26 @@
/* Implementation file */
NS_IMPL_ISUPPORTS1(RDKitImpl, IRDKit)
RDKitImpl::RDKitImpl()
{
/* member initializers and constructor code */
}
RDKitImpl::RDKitImpl() { /* member initializers and constructor code */ }
RDKitImpl::~RDKitImpl()
{
/* destructor code */
}
RDKitImpl::~RDKitImpl() { /* destructor code */ }
/* unsigned long strlen (in string arg); */
NS_IMETHODIMP RDKitImpl::Strlen(const char *arg, PRUint32 *_retval)
{
NS_IMETHODIMP RDKitImpl::Strlen(const char *arg, PRUint32 *_retval) {
std::string text(arg);
*_retval = text.size();
return NS_OK;
}
/* IRDMolecule MolFromSmiles (in string smiles); */
NS_IMETHODIMP RDKitImpl::MolFromSmiles(const char *smiles, IRDMolecule **_retval)
{
NS_IMETHODIMP RDKitImpl::MolFromSmiles(const char *smiles,
IRDMolecule **_retval) {
std::string smi(smiles);
RDKit::ROMol *roMol = RDKit::SmilesToMol(smiles);
if(!roMol) return NS_ERROR_FAILURE;
if (!roMol) return NS_ERROR_FAILURE;
RDMolecule *mol = new RDMolecule(roMol);
if(!mol) return NS_ERROR_OUT_OF_MEMORY;
if (!mol) return NS_ERROR_OUT_OF_MEMORY;
*_retval = static_cast<IRDMolecule *>(mol);
// FIX: does this leak?
@@ -47,13 +39,13 @@ NS_IMETHODIMP RDKitImpl::MolFromSmiles(const char *smiles, IRDMolecule **_retval
}
/* IRDMolecule MolFromMolBlock (in string molBlock); */
NS_IMETHODIMP RDKitImpl::MolFromMolBlock(const char *molBlock, IRDMolecule **_retval)
{
NS_IMETHODIMP RDKitImpl::MolFromMolBlock(const char *molBlock,
IRDMolecule **_retval) {
RDKit::ROMol *roMol = RDKit::MolBlockToMol(std::string(molBlock));
if(!roMol) return NS_ERROR_FAILURE;
if (!roMol) return NS_ERROR_FAILURE;
RDMolecule *mol = new RDMolecule(roMol);
if(!mol) return NS_ERROR_OUT_OF_MEMORY;
if (!mol) return NS_ERROR_OUT_OF_MEMORY;
*_retval = static_cast<IRDMolecule *>(mol);
// FIX: does this leak?
@@ -62,14 +54,14 @@ NS_IMETHODIMP RDKitImpl::MolFromMolBlock(const char *molBlock, IRDMolecule **_re
return NS_OK;
}
/* IRDMolSupplier SupplierFromSDFile (in string fileName); */
NS_IMETHODIMP RDKitImpl::SupplierFromSDFile(const char *fileName, IRDMolSupplier **_retval)
{
/* IRDMolSupplier SupplierFromSDFile (in string fileName); */
NS_IMETHODIMP RDKitImpl::SupplierFromSDFile(const char *fileName,
IRDMolSupplier **_retval) {
RDKit::MolSupplier *sdSuppl = new RDKit::SDMolSupplier(std::string(fileName));
if(!sdSuppl) return NS_ERROR_FAILURE;
if (!sdSuppl) return NS_ERROR_FAILURE;
RDMolSupplier *suppl = new RDMolSupplier(sdSuppl);
if(!suppl) return NS_ERROR_OUT_OF_MEMORY;
if (!suppl) return NS_ERROR_OUT_OF_MEMORY;
*_retval = static_cast<IRDMolSupplier *>(suppl);
// FIX: does this leak?
@@ -77,4 +69,3 @@ NS_IMETHODIMP RDKitImpl::SupplierFromSDFile(const char *fileName, IRDMolSupplier
return NS_OK;
}

View File

@@ -3,18 +3,17 @@
#include "IRDKit.h"
/* Header file */
class RDKitImpl : public IRDKit
{
public:
class RDKitImpl : public IRDKit {
public:
NS_DECL_ISUPPORTS
NS_DECL_IRDKIT
RDKitImpl();
private:
private:
~RDKitImpl();
protected:
protected:
/* additional members */
};
#endif

View File

@@ -14,107 +14,100 @@
NS_IMPL_ISUPPORTS1(RDMolecule, IRDMolecule)
RDMolecule::~RDMolecule()
{
if(this->dp_mol){
RDMolecule::~RDMolecule() {
if (this->dp_mol) {
delete dp_mol;
dp_mol=0;
}
dp_mol = 0;
}
}
/* double GetMW (); */
NS_IMETHODIMP RDMolecule::GetMW(double *_retval)
{
if(!dp_mol) return NS_ERROR_NOT_INITIALIZED;
RDKit::PeriodicTable *pt=RDKit::PeriodicTable::getTable();
*_retval=0.0;
for(RDKit::ROMol::AtomIterator atIt=dp_mol->beginAtoms();
atIt!=dp_mol->endAtoms();atIt++){
*_retval += (*atIt)->getMass()+(*atIt)->getNumImplicitHs()*pt->getAtomicWeight(1);
NS_IMETHODIMP RDMolecule::GetMW(double *_retval) {
if (!dp_mol) return NS_ERROR_NOT_INITIALIZED;
RDKit::PeriodicTable *pt = RDKit::PeriodicTable::getTable();
*_retval = 0.0;
for (RDKit::ROMol::AtomIterator atIt = dp_mol->beginAtoms();
atIt != dp_mol->endAtoms(); atIt++) {
*_retval += (*atIt)->getMass() +
(*atIt)->getNumImplicitHs() * pt->getAtomicWeight(1);
}
return NS_OK;
}
/* string GetSmiles (); */
NS_IMETHODIMP RDMolecule::GetSmiles(char **_retval)
{
if(!dp_mol) return NS_ERROR_NOT_INITIALIZED;
std::string smi=RDKit::MolToSmiles(*dp_mol);
*_retval=(char *)nsMemory::Clone(smi.c_str(),sizeof(char)*(smi.size()+1));
NS_IMETHODIMP RDMolecule::GetSmiles(char **_retval) {
if (!dp_mol) return NS_ERROR_NOT_INITIALIZED;
std::string smi = RDKit::MolToSmiles(*dp_mol);
*_retval =
(char *)nsMemory::Clone(smi.c_str(), sizeof(char) * (smi.size() + 1));
return *_retval ? NS_OK : NS_ERROR_OUT_OF_MEMORY;
}
/* unsigned long GetSmartsMatchCount (in string smarts); */
NS_IMETHODIMP RDMolecule::GetSmartsMatchCount(const char *smarts, PRUint32 *_retval)
{
if(!dp_mol) return NS_ERROR_NOT_INITIALIZED;
RDKit::ROMol *patt=RDKit::SmartsToMol(std::string(smarts));
if(!patt) return NS_ERROR_FAILURE;
NS_IMETHODIMP RDMolecule::GetSmartsMatchCount(const char *smarts,
PRUint32 *_retval) {
if (!dp_mol) return NS_ERROR_NOT_INITIALIZED;
RDKit::ROMol *patt = RDKit::SmartsToMol(std::string(smarts));
if (!patt) return NS_ERROR_FAILURE;
std::vector<RDKit::MatchVectType> matches;
int res=RDKit::SubstructMatch(dp_mol,patt,matches);
int res = RDKit::SubstructMatch(dp_mol, patt, matches);
*_retval = res;
return NS_OK;
}
/* double LogP (); */
NS_IMETHODIMP RDMolecule::LogP(double *_retval)
{
if(!dp_mol) return NS_ERROR_NOT_INITIALIZED;
NS_IMETHODIMP RDMolecule::LogP(double *_retval) {
if (!dp_mol) return NS_ERROR_NOT_INITIALIZED;
double logp;
if(dp_mol->hasProp(common_properties::_CrippenLogP)) {
dp_mol->getProp(common_properties::_CrippenLogP,logp);
if (dp_mol->hasProp(common_properties::_CrippenLogP)) {
dp_mol->getProp(common_properties::_CrippenLogP, logp);
} else {
double mr;
RDKit::Descriptors::CalcCrippenDescriptors(dp_mol,logp,mr);
dp_mol->setProp(common_properties::_CrippenLogP,logp,true);
dp_mol->setProp(common_properties::_CrippenMR,mr,true);
RDKit::Descriptors::CalcCrippenDescriptors(dp_mol, logp, mr);
dp_mol->setProp(common_properties::_CrippenLogP, logp, true);
dp_mol->setProp(common_properties::_CrippenMR, mr, true);
}
*_retval=logp;
*_retval = logp;
return NS_OK;
}
/* double MR (); */
NS_IMETHODIMP RDMolecule::MR(double *_retval)
{
if(!dp_mol) return NS_ERROR_NOT_INITIALIZED;
NS_IMETHODIMP RDMolecule::MR(double *_retval) {
if (!dp_mol) return NS_ERROR_NOT_INITIALIZED;
double mr;
if(dp_mol->hasProp(common_properties::_CrippenMR)) {
dp_mol->getProp(common_properties::_CrippenMR,mr);
if (dp_mol->hasProp(common_properties::_CrippenMR)) {
dp_mol->getProp(common_properties::_CrippenMR, mr);
} else {
double logp;
RDKit::Descriptors::CalcCrippenDescriptors(dp_mol,logp,mr);
dp_mol->setProp(common_properties::_CrippenLogP,logp,true);
dp_mol->setProp(common_properties::_CrippenMR,mr,true);
RDKit::Descriptors::CalcCrippenDescriptors(dp_mol, logp, mr);
dp_mol->setProp(common_properties::_CrippenLogP, logp, true);
dp_mol->setProp(common_properties::_CrippenMR, mr, true);
}
*_retval=mr;
*_retval = mr;
return NS_OK;
}
/* string GetMolBlock (); */
NS_IMETHODIMP RDMolecule::GetMolBlock(char **_retval)
{
if(!dp_mol) return NS_ERROR_NOT_INITIALIZED;
std::string molB=RDKit::MolToMolBlock(dp_mol);
*_retval=(char *)nsMemory::Clone(molB.c_str(),sizeof(char)*(molB.size()+1));
NS_IMETHODIMP RDMolecule::GetMolBlock(char **_retval) {
if (!dp_mol) return NS_ERROR_NOT_INITIALIZED;
std::string molB = RDKit::MolToMolBlock(dp_mol);
*_retval =
(char *)nsMemory::Clone(molB.c_str(), sizeof(char) * (molB.size() + 1));
return *_retval ? NS_OK : NS_ERROR_OUT_OF_MEMORY;
}
/* void Generate3DCoords (); */
NS_IMETHODIMP RDMolecule::Generate3DCoords()
{
if(!dp_mol) return NS_ERROR_NOT_INITIALIZED;
bool embedded=RDKit::DGeomHelpers::EmbedMolecule(*dp_mol);
if(!embedded) return NS_ERROR_FAILURE;
ForceFields::ForceField *ff=RDKit::UFF::constructForceField(dp_mol);
if(!ff) return NS_ERROR_FAILURE;
NS_IMETHODIMP RDMolecule::Generate3DCoords() {
if (!dp_mol) return NS_ERROR_NOT_INITIALIZED;
bool embedded = RDKit::DGeomHelpers::EmbedMolecule(*dp_mol);
if (!embedded) return NS_ERROR_FAILURE;
ForceFields::ForceField *ff = RDKit::UFF::constructForceField(dp_mol);
if (!ff) return NS_ERROR_FAILURE;
ff->initialize();
int needsMore=ff->minimize();
int needsMore = ff->minimize();
delete ff;
if(needsMore) return NS_ERROR_FAILURE;
if (needsMore) return NS_ERROR_FAILURE;
return NS_OK;
}

View File

@@ -3,22 +3,22 @@
#include "IRDKit.h"
namespace RDKit {
class ROMol;
class ROMol;
}
class RDMolecule : public IRDMolecule
{
public:
class RDMolecule : public IRDMolecule {
public:
NS_DECL_ISUPPORTS
NS_DECL_IRDMOLECULE
RDMolecule() : dp_mol(0) {};
RDMolecule(RDKit::ROMol *mol) : dp_mol(mol) {};
RDMolecule() : dp_mol(0){};
RDMolecule(RDKit::ROMol *mol) : dp_mol(mol){};
RDKit::ROMol *dp_mol;
private:
private:
~RDMolecule();
protected:
protected:
/* additional members */
};

View File

@@ -7,33 +7,29 @@
NS_IMPL_ISUPPORTS1(RDMolSupplier, IRDMolSupplier)
RDMolSupplier::~RDMolSupplier()
{
if(this->dp_suppl){
RDMolSupplier::~RDMolSupplier() {
if (this->dp_suppl) {
delete dp_suppl;
dp_suppl=0;
}
dp_suppl = 0;
}
}
/* boolean atEnd (); */
NS_IMETHODIMP RDMolSupplier::AtEnd(PRBool *_retval)
{
if(!dp_suppl) return NS_ERROR_NOT_INITIALIZED;
*_retval=dp_suppl->atEnd();
NS_IMETHODIMP RDMolSupplier::AtEnd(PRBool *_retval) {
if (!dp_suppl) return NS_ERROR_NOT_INITIALIZED;
*_retval = dp_suppl->atEnd();
return NS_OK;
}
/* IRDMolecule next (); */
NS_IMETHODIMP RDMolSupplier::Next(IRDMolecule **_retval)
{
if(!dp_suppl) return NS_ERROR_NOT_INITIALIZED;
if(dp_suppl->atEnd()) return NS_BASE_STREAM_CLOSED;
RDKit::ROMol *roMol=dp_suppl->next();
if(!roMol) return NS_ERROR_UNEXPECTED;
NS_IMETHODIMP RDMolSupplier::Next(IRDMolecule **_retval) {
if (!dp_suppl) return NS_ERROR_NOT_INITIALIZED;
if (dp_suppl->atEnd()) return NS_BASE_STREAM_CLOSED;
RDKit::ROMol *roMol = dp_suppl->next();
if (!roMol) return NS_ERROR_UNEXPECTED;
RDMolecule *mol = new RDMolecule(roMol);
if(!mol) return NS_ERROR_OUT_OF_MEMORY;
if (!mol) return NS_ERROR_OUT_OF_MEMORY;
*_retval = static_cast<IRDMolecule *>(mol);
// FIX: does this leak?

View File

@@ -3,23 +3,23 @@
#include "IRDKit.h"
namespace RDKit {
class ROMol;
class MolSupplier;
class ROMol;
class MolSupplier;
}
class RDMolSupplier : public IRDMolSupplier
{
public:
class RDMolSupplier : public IRDMolSupplier {
public:
NS_DECL_ISUPPORTS
NS_DECL_IRDMOLSUPPLIER
RDMolSupplier() : dp_suppl(0) {};
RDMolSupplier(RDKit::MolSupplier *suppl) : dp_suppl(suppl) {};
RDMolSupplier() : dp_suppl(0){};
RDMolSupplier(RDKit::MolSupplier *suppl) : dp_suppl(suppl){};
RDKit::MolSupplier *dp_suppl;
private:
private:
~RDMolSupplier();
protected:
protected:
/* additional members */
};

View File

@@ -22,180 +22,180 @@
#include <sstream>
#include <fstream>
using namespace RDKit;
std::string MolToSVG(const ROMol &mol){
std::vector<int> drawing=RDKit::Drawing::MolToDrawing(mol);
std::string svg=RDKit::Drawing::DrawingToSVG(drawing);
std::string MolToSVG(const ROMol &mol) {
std::vector<int> drawing = RDKit::Drawing::MolToDrawing(mol);
std::string svg = RDKit::Drawing::DrawingToSVG(drawing);
return svg;
}
#ifdef USE_CAIRO
void MolToCairo(const ROMol &mol,cairo_t *cr,int width,int height){
PRECONDITION(cr,"no context");
PRECONDITION(width>0 && height>0,"bad dimensions");
std::vector<int> drawing=RDKit::Drawing::MolToDrawing(mol);
RDKit::Drawing::DrawingToCairo(drawing,cr,width,height);
void MolToCairo(const ROMol &mol, cairo_t *cr, int width, int height) {
PRECONDITION(cr, "no context");
PRECONDITION(width > 0 && height > 0, "bad dimensions");
std::vector<int> drawing = RDKit::Drawing::MolToDrawing(mol);
RDKit::Drawing::DrawingToCairo(drawing, cr, width, height);
}
#endif
void DrawDemo(){
void DrawDemo() {
#if 1
{
RWMol *mol=SmilesToMol("c1c(C#N)cccc1C~C2CC2");
std::string svg=MolToSVG(*mol);
RWMol *mol = SmilesToMol("c1c(C#N)cccc1C~C2CC2");
std::string svg = MolToSVG(*mol);
std::ofstream ostr("blah.svg");
ostr<<svg<<std::endl;
ostr << svg << std::endl;
delete mol;
}
{
RWMol *mol=SmilesToMol("[Mg]c1c(C#N)cc(C(=O)NCc2sc([NH3+])c([NH3+])c2)cc1");
std::string svg=MolToSVG(*mol);
RWMol *mol =
SmilesToMol("[Mg]c1c(C#N)cc(C(=O)NCc2sc([NH3+])c([NH3+])c2)cc1");
std::string svg = MolToSVG(*mol);
std::ofstream ostr("blah3.svg");
ostr<<svg<<std::endl;
ostr << svg << std::endl;
delete mol;
}
{
RWMol *mol=SmilesToMol("[Mg]c1c(C#N)cc(C(=O)NCCCCCC(CCCCCCCCCCC(CCCCCCCCC)(CCCCCCCCCC)CCCCCC)CCCCCCCCCCCCCCCCCCCCCc2sc([NH3+])c([NH3+])c2)cc1");
std::string svg=MolToSVG(*mol);
RWMol *mol = SmilesToMol(
"[Mg]c1c(C#N)cc(C(=O)NCCCCCC(CCCCCCCCCCC(CCCCCCCCC)(CCCCCCCCCC)CCCCCC)"
"CCCCCCCCCCCCCCCCCCCCCc2sc([NH3+])c([NH3+])c2)cc1");
std::string svg = MolToSVG(*mol);
std::ofstream ostr("blah4.svg");
ostr<<svg<<std::endl;
ostr << svg << std::endl;
delete mol;
}
{
RWMol *mol=SmilesToMol("BrO");
std::string svg=MolToSVG(*mol);
RWMol *mol = SmilesToMol("BrO");
std::string svg = MolToSVG(*mol);
std::ofstream ostr("blah2.svg");
ostr<<svg<<std::endl;
ostr << svg << std::endl;
delete mol;
}
{
RWMol *mol=SmilesToMol("BrC(O)(Cl)N");
std::string svg=MolToSVG(*mol);
RWMol *mol = SmilesToMol("BrC(O)(Cl)N");
std::string svg = MolToSVG(*mol);
std::ofstream ostr("blah5.svg");
ostr<<svg<<std::endl;
ostr << svg << std::endl;
delete mol;
}
{
RWMol *mol=SmilesToMol("[14NH2+]=[14NH2+]");
std::string svg=MolToSVG(*mol);
RWMol *mol = SmilesToMol("[14NH2+]=[14NH2+]");
std::string svg = MolToSVG(*mol);
std::ofstream ostr("blah6.svg");
ostr<<svg<<std::endl;
ostr << svg << std::endl;
delete mol;
}
#endif
#ifdef USE_CAIRO
{
RWMol *mol=SmilesToMol("[Mg]c1c(C#N)cc(C(=O)NCc2sc([NH3+])c([NH3+])c2)cc1");
RWMol *mol =
SmilesToMol("[Mg]c1c(C#N)cc(C(=O)NCc2sc([NH3+])c([NH3+])c2)cc1");
cairo_surface_t *surface =
cairo_image_surface_create (CAIRO_FORMAT_ARGB32, 200, 200);
cairo_t *cr = cairo_create (surface);
MolToCairo(*mol,cr,200,200);
cairo_image_surface_create(CAIRO_FORMAT_ARGB32, 200, 200);
cairo_t *cr = cairo_create(surface);
MolToCairo(*mol, cr, 200, 200);
cairo_destroy (cr);
cairo_surface_write_to_png (surface, "mol1.png");
cairo_surface_destroy (surface);
cairo_destroy(cr);
cairo_surface_write_to_png(surface, "mol1.png");
cairo_surface_destroy(surface);
delete mol;
}
{
RWMol *mol=SmilesToMol("c1c[12cH]ccn1");
RWMol *mol = SmilesToMol("c1c[12cH]ccn1");
cairo_surface_t *surface =
cairo_image_surface_create (CAIRO_FORMAT_ARGB32, 300, 300);
cairo_t *cr = cairo_create (surface);
MolToCairo(*mol,cr,300,300);
cairo_image_surface_create(CAIRO_FORMAT_ARGB32, 300, 300);
cairo_t *cr = cairo_create(surface);
MolToCairo(*mol, cr, 300, 300);
cairo_destroy (cr);
cairo_surface_write_to_png (surface, "mol2.png");
cairo_surface_destroy (surface);
cairo_destroy(cr);
cairo_surface_write_to_png(surface, "mol2.png");
cairo_surface_destroy(surface);
delete mol;
}
{
RWMol *mol=SmilesToMol("Nc1ccc(cc1)S(=O)(=O)c1ccc(N)cc1");
RWMol *mol = SmilesToMol("Nc1ccc(cc1)S(=O)(=O)c1ccc(N)cc1");
cairo_surface_t *surface =
cairo_image_surface_create (CAIRO_FORMAT_ARGB32, 300, 300);
cairo_t *cr = cairo_create (surface);
MolToCairo(*mol,cr,300,300);
cairo_image_surface_create(CAIRO_FORMAT_ARGB32, 300, 300);
cairo_t *cr = cairo_create(surface);
MolToCairo(*mol, cr, 300, 300);
cairo_destroy (cr);
cairo_surface_write_to_png (surface, "mol3.png");
cairo_surface_destroy (surface);
cairo_destroy(cr);
cairo_surface_write_to_png(surface, "mol3.png");
cairo_surface_destroy(surface);
delete mol;
}
{
RWMol *mol=SmilesToMol("Nccc(CCO)n",0,false);
RWMol *mol = SmilesToMol("Nccc(CCO)n", 0, false);
mol->updatePropertyCache();
cairo_surface_t *surface =
cairo_image_surface_create (CAIRO_FORMAT_ARGB32, 300, 300);
cairo_t *cr = cairo_create (surface);
MolToCairo(*mol,cr,300,300);
cairo_image_surface_create(CAIRO_FORMAT_ARGB32, 300, 300);
cairo_t *cr = cairo_create(surface);
MolToCairo(*mol, cr, 300, 300);
cairo_destroy (cr);
cairo_surface_write_to_png (surface, "mol4.png");
cairo_surface_destroy (surface);
cairo_destroy(cr);
cairo_surface_write_to_png(surface, "mol4.png");
cairo_surface_destroy(surface);
delete mol;
}
{
RWMol *mol=SmilesToMol("N~ccc(CCO)n",0,false);
RWMol *mol = SmilesToMol("N~ccc(CCO)n", 0, false);
mol->updatePropertyCache();
cairo_surface_t *surface =
cairo_image_surface_create (CAIRO_FORMAT_ARGB32, 300, 300);
cairo_t *cr = cairo_create (surface);
MolToCairo(*mol,cr,300,300);
cairo_image_surface_create(CAIRO_FORMAT_ARGB32, 300, 300);
cairo_t *cr = cairo_create(surface);
MolToCairo(*mol, cr, 300, 300);
cairo_destroy (cr);
cairo_surface_write_to_png (surface, "mol5.png");
cairo_surface_destroy (surface);
cairo_destroy(cr);
cairo_surface_write_to_png(surface, "mol5.png");
cairo_surface_destroy(surface);
delete mol;
}
{
RWMol *mol=SmilesToMol("BrO",0,false);
RWMol *mol = SmilesToMol("BrO", 0, false);
mol->updatePropertyCache();
cairo_surface_t *surface =
cairo_image_surface_create (CAIRO_FORMAT_ARGB32, 300, 300);
cairo_t *cr = cairo_create (surface);
MolToCairo(*mol,cr,300,300);
cairo_image_surface_create(CAIRO_FORMAT_ARGB32, 300, 300);
cairo_t *cr = cairo_create(surface);
MolToCairo(*mol, cr, 300, 300);
cairo_destroy (cr);
cairo_surface_write_to_png (surface, "mol6.png");
cairo_surface_destroy (surface);
cairo_destroy(cr);
cairo_surface_write_to_png(surface, "mol6.png");
cairo_surface_destroy(surface);
delete mol;
}
{
RWMol *mol=SmilesToMol("[Mg]c1c(C#N)cc(C(=O)NCCCCCC(CCCCCCCCCCC(CCCCCCCCC)(CCCCCCCCCC)CCCCCC)CCCCCCCCCCCCCCCCCCCCCc2sc([NH3+])c([NH3+])c2)cc1");
RWMol *mol = SmilesToMol(
"[Mg]c1c(C#N)cc(C(=O)NCCCCCC(CCCCCCCCCCC(CCCCCCCCC)(CCCCCCCCCC)CCCCCC)"
"CCCCCCCCCCCCCCCCCCCCCc2sc([NH3+])c([NH3+])c2)cc1");
cairo_surface_t *surface =
cairo_image_surface_create (CAIRO_FORMAT_ARGB32, 300, 300);
cairo_t *cr = cairo_create (surface);
MolToCairo(*mol,cr,300,300);
cairo_image_surface_create(CAIRO_FORMAT_ARGB32, 300, 300);
cairo_t *cr = cairo_create(surface);
MolToCairo(*mol, cr, 300, 300);
cairo_destroy (cr);
cairo_surface_write_to_png (surface, "mol7.png");
cairo_surface_destroy (surface);
cairo_destroy(cr);
cairo_surface_write_to_png(surface, "mol7.png");
cairo_surface_destroy(surface);
delete mol;
}
{
RWMol *mol=SmilesToMol("[NH3+][NH3+]",0,false);
RWMol *mol = SmilesToMol("[NH3+][NH3+]", 0, false);
mol->updatePropertyCache();
cairo_surface_t *surface =
cairo_image_surface_create (CAIRO_FORMAT_ARGB32, 300, 300);
cairo_t *cr = cairo_create (surface);
MolToCairo(*mol,cr,300,300);
cairo_image_surface_create(CAIRO_FORMAT_ARGB32, 300, 300);
cairo_t *cr = cairo_create(surface);
MolToCairo(*mol, cr, 300, 300);
cairo_destroy (cr);
cairo_surface_write_to_png (surface, "mol8.png");
cairo_surface_destroy (surface);
cairo_destroy(cr);
cairo_surface_write_to_png(surface, "mol8.png");
cairo_surface_destroy(surface);
delete mol;
}
#endif
}
int
main(int argc, char *argv[])
{
int main(int argc, char *argv[]) {
RDLog::InitLogs();
DrawDemo();
}

View File

@@ -30,218 +30,217 @@
using namespace RDKit;
void BuildSimpleMolecule(){
void BuildSimpleMolecule() {
// build the molecule: C/C=C\C
RWMol *mol=new RWMol();
RWMol *mol = new RWMol();
// add atoms and bonds:
mol->addAtom(new Atom(6)); // atom 0
mol->addAtom(new Atom(6)); // atom 1
mol->addAtom(new Atom(6)); // atom 2
mol->addAtom(new Atom(6)); // atom 3
mol->addBond(0,1,Bond::SINGLE); // bond 0
mol->addBond(1,2,Bond::DOUBLE); // bond 1
mol->addBond(2,3,Bond::SINGLE); // bond 2
mol->addAtom(new Atom(6)); // atom 0
mol->addAtom(new Atom(6)); // atom 1
mol->addAtom(new Atom(6)); // atom 2
mol->addAtom(new Atom(6)); // atom 3
mol->addBond(0, 1, Bond::SINGLE); // bond 0
mol->addBond(1, 2, Bond::DOUBLE); // bond 1
mol->addBond(2, 3, Bond::SINGLE); // bond 2
// setup the stereochem:
mol->getBondWithIdx(0)->setBondDir(Bond::ENDUPRIGHT);
mol->getBondWithIdx(2)->setBondDir(Bond::ENDDOWNRIGHT);
// do the chemistry perception:
RDKit::MolOps::sanitizeMol(*mol);
// Get the canonical SMILES, include stereochemistry:
std::string smiles;
smiles = MolToSmiles(*(static_cast<ROMol *>(mol)),true);
BOOST_LOG(rdInfoLog)<<" sample 1 SMILES: " <<smiles<<std::endl;
smiles = MolToSmiles(*(static_cast<ROMol *>(mol)), true);
BOOST_LOG(rdInfoLog) << " sample 1 SMILES: " << smiles << std::endl;
}
void WorkWithRingInfo(){
void WorkWithRingInfo() {
// use a more complicated molecule to demonstrate querying about
// ring information
ROMol *mol=SmilesToMol("OC1CCC2C1CCCC2");
ROMol *mol = SmilesToMol("OC1CCC2C1CCCC2");
// the molecule from SmilesToMol is already sanitized, so we don't
// need to worry about that.
// work with ring information
RingInfo *ringInfo = mol->getRingInfo();
TEST_ASSERT(ringInfo->numRings()==2);
TEST_ASSERT(ringInfo->numRings() == 2);
// can ask how many rings an atom is in:
TEST_ASSERT(ringInfo->numAtomRings(0)==0);
TEST_ASSERT(ringInfo->numAtomRings(1)==1);
TEST_ASSERT(ringInfo->numAtomRings(4)==2);
TEST_ASSERT(ringInfo->numAtomRings(0) == 0);
TEST_ASSERT(ringInfo->numAtomRings(1) == 1);
TEST_ASSERT(ringInfo->numAtomRings(4) == 2);
// same with bonds:
TEST_ASSERT(ringInfo->numBondRings(0)==0);
TEST_ASSERT(ringInfo->numBondRings(1)==1);
TEST_ASSERT(ringInfo->numBondRings(0) == 0);
TEST_ASSERT(ringInfo->numBondRings(1) == 1);
// can check if an atom is in a ring of a particular size:
TEST_ASSERT(!ringInfo->isAtomInRingOfSize(0,5));
TEST_ASSERT(ringInfo->isAtomInRingOfSize(1,5));
TEST_ASSERT(ringInfo->isAtomInRingOfSize(4,5));
TEST_ASSERT(ringInfo->isAtomInRingOfSize(4,6));
TEST_ASSERT(!ringInfo->isAtomInRingOfSize(0, 5));
TEST_ASSERT(ringInfo->isAtomInRingOfSize(1, 5));
TEST_ASSERT(ringInfo->isAtomInRingOfSize(4, 5));
TEST_ASSERT(ringInfo->isAtomInRingOfSize(4, 6));
// same with bonds:
TEST_ASSERT(!ringInfo->isBondInRingOfSize(0,5));
TEST_ASSERT(ringInfo->isBondInRingOfSize(1,5));
TEST_ASSERT(!ringInfo->isBondInRingOfSize(0, 5));
TEST_ASSERT(ringInfo->isBondInRingOfSize(1, 5));
// can also get the full list of rings as atom indices:
VECT_INT_VECT atomRings; // VECT_INT_VECT is vector< vector<int> >
atomRings=ringInfo->atomRings();
TEST_ASSERT(atomRings.size()==2);
TEST_ASSERT(atomRings[0].size()==5);
TEST_ASSERT(atomRings[1].size()==6);
VECT_INT_VECT atomRings; // VECT_INT_VECT is vector< vector<int> >
atomRings = ringInfo->atomRings();
TEST_ASSERT(atomRings.size() == 2);
TEST_ASSERT(atomRings[0].size() == 5);
TEST_ASSERT(atomRings[1].size() == 6);
// this sort is just here for test/demo purposes:
std::sort(atomRings[0].begin(),atomRings[0].end());
TEST_ASSERT(atomRings[0][0]==1);
TEST_ASSERT(atomRings[0][1]==2);
TEST_ASSERT(atomRings[0][2]==3);
TEST_ASSERT(atomRings[0][3]==4);
TEST_ASSERT(atomRings[0][4]==5);
std::sort(atomRings[0].begin(), atomRings[0].end());
TEST_ASSERT(atomRings[0][0] == 1);
TEST_ASSERT(atomRings[0][1] == 2);
TEST_ASSERT(atomRings[0][2] == 3);
TEST_ASSERT(atomRings[0][3] == 4);
TEST_ASSERT(atomRings[0][4] == 5);
// same with bonds:
VECT_INT_VECT bondRings; // VECT_INT_VECT is vector< vector<int> >
bondRings=ringInfo->bondRings();
TEST_ASSERT(bondRings.size()==2);
TEST_ASSERT(bondRings[0].size()==5);
TEST_ASSERT(bondRings[1].size()==6);
VECT_INT_VECT bondRings; // VECT_INT_VECT is vector< vector<int> >
bondRings = ringInfo->bondRings();
TEST_ASSERT(bondRings.size() == 2);
TEST_ASSERT(bondRings[0].size() == 5);
TEST_ASSERT(bondRings[1].size() == 6);
// the same trick played above with the contents of each ring
// can be played, but we won't
// count the number of rings of size 5:
unsigned int nRingsSize5=0;
for(VECT_INT_VECT_CI ringIt=atomRings.begin();
ringIt!=atomRings.end();++ringIt){
if(ringIt->size()==5) nRingsSize5++;
unsigned int nRingsSize5 = 0;
for (VECT_INT_VECT_CI ringIt = atomRings.begin(); ringIt != atomRings.end();
++ringIt) {
if (ringIt->size() == 5) nRingsSize5++;
}
TEST_ASSERT(nRingsSize5==1);
TEST_ASSERT(nRingsSize5 == 1);
delete mol;
// count the number of atoms in 5-rings where all the atoms
// are aromatic:
mol=SmilesToMol("C1CC2=C(C1)C1=C(NC3=C1C=CC=C3)C=C2");
mol = SmilesToMol("C1CC2=C(C1)C1=C(NC3=C1C=CC=C3)C=C2");
ringInfo = mol->getRingInfo();
atomRings=ringInfo->atomRings();
atomRings = ringInfo->atomRings();
unsigned int nMatchingAtoms=0;
for(VECT_INT_VECT_CI ringIt=atomRings.begin();
ringIt!=atomRings.end();++ringIt){
if(ringIt->size()!=5){
unsigned int nMatchingAtoms = 0;
for (VECT_INT_VECT_CI ringIt = atomRings.begin(); ringIt != atomRings.end();
++ringIt) {
if (ringIt->size() != 5) {
continue;
}
bool isAromatic=true;
for(INT_VECT_CI atomIt=ringIt->begin();
atomIt!=ringIt->end();++atomIt){
if(!mol->getAtomWithIdx(*atomIt)->getIsAromatic()){
isAromatic=false;
bool isAromatic = true;
for (INT_VECT_CI atomIt = ringIt->begin(); atomIt != ringIt->end();
++atomIt) {
if (!mol->getAtomWithIdx(*atomIt)->getIsAromatic()) {
isAromatic = false;
break;
}
}
if(isAromatic){
nMatchingAtoms+=5;
if (isAromatic) {
nMatchingAtoms += 5;
}
}
TEST_ASSERT(nMatchingAtoms==5);
TEST_ASSERT(nMatchingAtoms == 5);
delete mol;
// count the number of rings where all the bonds
// are aromatic.
mol=SmilesToMol("c1cccc2c1CCCC2");
mol = SmilesToMol("c1cccc2c1CCCC2");
ringInfo = mol->getRingInfo();
bondRings=ringInfo->bondRings();
unsigned int nAromaticRings=0;
for(VECT_INT_VECT_CI ringIt=bondRings.begin();
ringIt!=bondRings.end();++ringIt){
bool isAromatic=true;
for(INT_VECT_CI bondIt=ringIt->begin();
bondIt!=ringIt->end();++bondIt){
if(!mol->getBondWithIdx(*bondIt)->getIsAromatic()){
isAromatic=false;
bondRings = ringInfo->bondRings();
unsigned int nAromaticRings = 0;
for (VECT_INT_VECT_CI ringIt = bondRings.begin(); ringIt != bondRings.end();
++ringIt) {
bool isAromatic = true;
for (INT_VECT_CI bondIt = ringIt->begin(); bondIt != ringIt->end();
++bondIt) {
if (!mol->getBondWithIdx(*bondIt)->getIsAromatic()) {
isAromatic = false;
break;
}
}
if(isAromatic) nAromaticRings++;
if (isAromatic) nAromaticRings++;
}
TEST_ASSERT(nAromaticRings==1);
TEST_ASSERT(nAromaticRings == 1);
delete mol;
}
void WorkWithSmarts(){
void WorkWithSmarts() {
// demonstrate the use of substructure searching
ROMol *mol=SmilesToMol("ClCC=CCC");
ROMol *mol = SmilesToMol("ClCC=CCC");
// a simple SMARTS pattern for rotatable bonds:
ROMol *pattern=SmartsToMol("[!$(*#*)&!D1]-&!@[!$(*#*)&!D1]");
ROMol *pattern = SmartsToMol("[!$(*#*)&!D1]-&!@[!$(*#*)&!D1]");
std::vector<MatchVectType> matches;
unsigned int nMatches;
nMatches=SubstructMatch(*mol,*pattern,matches);
TEST_ASSERT(nMatches==2);
TEST_ASSERT(matches.size()==2); // <- there are two rotatable bonds
nMatches = SubstructMatch(*mol, *pattern, matches);
TEST_ASSERT(nMatches == 2);
TEST_ASSERT(matches.size() == 2); // <- there are two rotatable bonds
// a MatchVect is a vector of std::pairs with (patternIdx, molIdx):
TEST_ASSERT(matches[0].size()==2);
TEST_ASSERT(matches[0][0].first==0);
TEST_ASSERT(matches[0][0].second==1);
TEST_ASSERT(matches[0][1].first==1);
TEST_ASSERT(matches[0][1].second==2);
TEST_ASSERT(matches[0].size() == 2);
TEST_ASSERT(matches[0][0].first == 0);
TEST_ASSERT(matches[0][0].second == 1);
TEST_ASSERT(matches[0][1].first == 1);
TEST_ASSERT(matches[0][1].second == 2);
delete pattern;
delete mol;
}
void DepictDemo(){
void DepictDemo() {
// demonstrate the use of the depiction-generation code2D coordinates:
ROMol *mol=SmilesToMol("ClCC=CCC");
ROMol *mol = SmilesToMol("ClCC=CCC");
// generate the 2D coordinates:
RDDepict::compute2DCoords(*mol);
// generate a mol block (could also go to a file):
std::string molBlock=MolToMolBlock(*mol);
BOOST_LOG(rdInfoLog)<<molBlock;
std::string molBlock = MolToMolBlock(*mol);
BOOST_LOG(rdInfoLog) << molBlock;
delete mol;
}
void CleanupMolecule(){
void CleanupMolecule() {
// an example of doing some cleaning up of a molecule before
// calling the sanitizeMol function()
// build: C1CC1C(:O):O
RWMol *mol=new RWMol();
RWMol *mol = new RWMol();
// add atoms and bonds:
mol->addAtom(new Atom(6)); // atom 0
mol->addAtom(new Atom(6)); // atom 1
mol->addAtom(new Atom(6)); // atom 2
mol->addAtom(new Atom(6)); // atom 3
mol->addAtom(new Atom(8)); // atom 4
mol->addAtom(new Atom(8)); // atom 5
mol->addBond(3,4,Bond::AROMATIC); // bond 0
mol->addBond(3,5,Bond::AROMATIC); // bond 1
mol->addBond(3,2,Bond::SINGLE); // bond 2
mol->addBond(2,1,Bond::SINGLE); // bond 3
mol->addBond(1,0,Bond::SINGLE); // bond 4
mol->addBond(0,2,Bond::SINGLE); // bond 5
mol->addAtom(new Atom(6)); // atom 0
mol->addAtom(new Atom(6)); // atom 1
mol->addAtom(new Atom(6)); // atom 2
mol->addAtom(new Atom(6)); // atom 3
mol->addAtom(new Atom(8)); // atom 4
mol->addAtom(new Atom(8)); // atom 5
mol->addBond(3, 4, Bond::AROMATIC); // bond 0
mol->addBond(3, 5, Bond::AROMATIC); // bond 1
mol->addBond(3, 2, Bond::SINGLE); // bond 2
mol->addBond(2, 1, Bond::SINGLE); // bond 3
mol->addBond(1, 0, Bond::SINGLE); // bond 4
mol->addBond(0, 2, Bond::SINGLE); // bond 5
// instead of calling sanitize mol, which would generate an error,
// we'll perceive the rings, then take care of aromatic bonds
// that aren't in a ring, then sanitize:
MolOps::findSSSR(*mol);
for(ROMol::BondIterator bondIt=mol->beginBonds();
bondIt!=mol->endBonds();++bondIt){
if( ((*bondIt)->getIsAromatic() ||
(*bondIt)->getBondType()==Bond::AROMATIC)
&& !mol->getRingInfo()->numBondRings((*bondIt)->getIdx()) ){
for (ROMol::BondIterator bondIt = mol->beginBonds();
bondIt != mol->endBonds(); ++bondIt) {
if (((*bondIt)->getIsAromatic() ||
(*bondIt)->getBondType() == Bond::AROMATIC) &&
!mol->getRingInfo()->numBondRings((*bondIt)->getIdx())) {
// remove the aromatic flag on the bond:
(*bondIt)->setIsAromatic(false);
// and cleanup its attached atoms as well (they were
// also marked aromatic when the bond was added)
(*bondIt)->getBeginAtom()->setIsAromatic(false);
(*bondIt)->getEndAtom()->setIsAromatic(false);
// NOTE: this isn't really reasonable:
(*bondIt)->setBondType(Bond::SINGLE);
(*bondIt)->setBondType(Bond::SINGLE);
}
}
@@ -250,20 +249,20 @@ void CleanupMolecule(){
// Get the canonical SMILES, include stereochemistry:
std::string smiles;
smiles = MolToSmiles(*(static_cast<ROMol *>(mol)),true);
BOOST_LOG(rdInfoLog)<<" fixed SMILES: " <<smiles<<std::endl;
smiles = MolToSmiles(*(static_cast<ROMol *>(mol)), true);
BOOST_LOG(rdInfoLog) << " fixed SMILES: " << smiles << std::endl;
}
void ReactionDemo(){
void ReactionDemo() {
// reaction smarts for a crude amide-bond formation definition:
std::string sma="[C:1](=[O:2])[OH].[N:3]>>[O:2]=[C:1][N:3]";
std::string sma = "[C:1](=[O:2])[OH].[N:3]>>[O:2]=[C:1][N:3]";
// construct the reaction:
ChemicalReaction *rxn = RxnSmartsToChemicalReaction(sma);
ChemicalReaction *rxn = RxnSmartsToChemicalReaction(sma);
// now initialize it and check for errors:
rxn->initReactantMatchers();
unsigned int nWarn,nError;
rxn->validate(nWarn,nError);
unsigned int nWarn, nError;
rxn->validate(nWarn, nError);
ROMol *mol;
MOL_SPTR_VECT reacts;
@@ -278,20 +277,18 @@ void ReactionDemo(){
prods = rxn->runReactants(reacts);
// for each of the possible applications of the reaction to the reactants:
for(unsigned int i=0;i<prods.size();++i){
BOOST_LOG(rdInfoLog)<<" product set: " <<i<<std::endl;
for (unsigned int i = 0; i < prods.size(); ++i) {
BOOST_LOG(rdInfoLog) << " product set: " << i << std::endl;
// for each product of that application:
for(unsigned int j=0;j<prods[i].size();++j){
std::string psmiles=MolToSmiles(*prods[i][j],true);
BOOST_LOG(rdInfoLog)<<" product : " <<j<<" "<<psmiles<<std::endl;
}
for (unsigned int j = 0; j < prods[i].size(); ++j) {
std::string psmiles = MolToSmiles(*prods[i][j], true);
BOOST_LOG(rdInfoLog) << " product : " << j << " " << psmiles
<< std::endl;
}
}
}
int
main(int argc, char *argv[])
{
int main(int argc, char *argv[]) {
RDLog::InitLogs();
BuildSimpleMolecule();
WorkWithRingInfo();

View File

@@ -24,61 +24,58 @@
namespace mpi = boost::mpi;
int main(int argc, char* argv[])
{
int main(int argc, char *argv[]) {
mpi::environment env(argc, argv);
mpi::communicator world;
// construct the data:
std::vector<std::string> data;
if (world.rank() == 0) {
for(unsigned int i=0;i<100;++i){
std::string txt(i+1,'C');
for (unsigned int i = 0; i < 100; ++i) {
std::string txt(i + 1, 'C');
data.push_back(txt);
}
}
// broadcast it:
broadcast(world,data,0);
broadcast(world, data, 0);
// process it:
std::vector<unsigned int> res;
std::vector<std::vector<unsigned int> > allRes;
// start by finding our chunk:
unsigned int nProcs=world.size();
unsigned int chunkSize=data.size() / nProcs;
unsigned int extraBits=data.size() % nProcs;
unsigned int nProcs = world.size();
unsigned int chunkSize = data.size() / nProcs;
unsigned int extraBits = data.size() % nProcs;
// handle extra bits on the root node:
if( world.rank() == 0 ){
for(unsigned int i=0;i<extraBits;++i){
const std::string &elem=data[i];
if (world.rank() == 0) {
for (unsigned int i = 0; i < extraBits; ++i) {
const std::string &elem = data[i];
res.push_back(elem.length());
}
}
unsigned int pos=extraBits+world.rank()*chunkSize;
for(unsigned int i=0;i<chunkSize;++i){
const std::string &elem=data[pos++];
unsigned int pos = extraBits + world.rank() * chunkSize;
for (unsigned int i = 0; i < chunkSize; ++i) {
const std::string &elem = data[pos++];
res.push_back(elem.length());
}
if( world.rank() == 0 ){
gather(world,res,allRes,0);
if (world.rank() == 0) {
gather(world, res, allRes, 0);
} else {
gather(world,res,0);
gather(world, res, 0);
}
// reporting:
if(world.rank()==0){
for(unsigned int i=0;i<static_cast<unsigned int>(world.size());++i){
std::cout<<"results from process "<<i<<": ";
std::copy(allRes[i].begin(),allRes[i].end(),std::ostream_iterator<int,char>(std::cout, " "));
std::cout<<std::endl;
if (world.rank() == 0) {
for (unsigned int i = 0; i < static_cast<unsigned int>(world.size()); ++i) {
std::cout << "results from process " << i << ": ";
std::copy(allRes[i].begin(), allRes[i].end(),
std::ostream_iterator<int, char>(std::cout, " "));
std::cout << std::endl;
}
}
return 0;
}
}

View File

@@ -24,80 +24,79 @@
namespace mpi = boost::mpi;
void broadcastMols(mpi::communicator &world,std::vector<RDKit::ROMOL_SPTR> &mols){
void broadcastMols(mpi::communicator &world,
std::vector<RDKit::ROMOL_SPTR> &mols) {
std::vector<std::string> data;
if (world.rank() == 0) {
data.reserve(mols.size());
BOOST_FOREACH( const RDKit::ROMOL_SPTR &ptr, mols ) {
BOOST_FOREACH (const RDKit::ROMOL_SPTR &ptr, mols) {
std::string pickle;
RDKit::MolPickler::pickleMol(*ptr,pickle);
RDKit::MolPickler::pickleMol(*ptr, pickle);
data.push_back(pickle);
}
}
broadcast(world,data,0);
broadcast(world, data, 0);
if (world.rank() != 0) {
mols.reserve(data.size());
BOOST_FOREACH( const std::string &pickle, data ) {
RDKit::ROMol *mol=new RDKit::ROMol;
RDKit::MolPickler::molFromPickle(pickle,mol);
BOOST_FOREACH (const std::string &pickle, data) {
RDKit::ROMol *mol = new RDKit::ROMol;
RDKit::MolPickler::molFromPickle(pickle, mol);
mols.push_back(RDKit::ROMOL_SPTR(mol));
}
}
}
int main(int argc, char* argv[])
{
int main(int argc, char *argv[]) {
mpi::environment env(argc, argv);
mpi::communicator world;
// construct the data:
std::vector<RDKit::ROMOL_SPTR> data;
if (world.rank() == 0) {
for(unsigned int i=0;i<100;++i){
std::string txt(i+1,'C');
RDKit::ROMol *m=RDKit::SmilesToMol(txt);
for (unsigned int i = 0; i < 100; ++i) {
std::string txt(i + 1, 'C');
RDKit::ROMol *m = RDKit::SmilesToMol(txt);
data.push_back(RDKit::ROMOL_SPTR(m));
}
}
// broadcast it:
broadcastMols(world,data);
broadcastMols(world, data);
// process it:
std::vector<unsigned int> res;
std::vector<std::vector<unsigned int> > allRes;
// start by finding our chunk:
unsigned int nProcs=world.size();
unsigned int chunkSize=data.size() / nProcs;
unsigned int extraBits=data.size() % nProcs;
unsigned int nProcs = world.size();
unsigned int chunkSize = data.size() / nProcs;
unsigned int extraBits = data.size() % nProcs;
// handle extra bits on the root node:
if( world.rank() == 0 ){
for(unsigned int i=0;i<extraBits;++i){
if (world.rank() == 0) {
for (unsigned int i = 0; i < extraBits; ++i) {
res.push_back(data[i]->getNumAtoms());
}
}
unsigned int pos=extraBits+world.rank()*chunkSize;
for(unsigned int i=0;i<chunkSize;++i){
unsigned int pos = extraBits + world.rank() * chunkSize;
for (unsigned int i = 0; i < chunkSize; ++i) {
res.push_back(data[pos++]->getNumAtoms());
}
if( world.rank() == 0 ){
gather(world,res,allRes,0);
if (world.rank() == 0) {
gather(world, res, allRes, 0);
} else {
gather(world,res,0);
gather(world, res, 0);
}
// reporting:
if(world.rank()==0){
for(unsigned int i=0;i<static_cast<unsigned int>(world.size());++i){
std::cout<<"results from process "<<i<<": ";
std::copy(allRes[i].begin(),allRes[i].end(),std::ostream_iterator<int,char>(std::cout, " "));
std::cout<<std::endl;
if (world.rank() == 0) {
for (unsigned int i = 0; i < static_cast<unsigned int>(world.size()); ++i) {
std::cout << "results from process " << i << ": ";
std::copy(allRes[i].begin(), allRes[i].end(),
std::ostream_iterator<int, char>(std::cout, " "));
std::cout << std::endl;
}
}
return 0;
}
}

View File

@@ -2,25 +2,23 @@
#include <iostream>
#include <string>
#ifndef CLASSA_H
#define CLASSA_H
#ifdef WIN32
#pragma warning (disable: 4786) // warning: long & complicated stl warning
#pragma warning (disable: 4788) // warning: long & complicated stl warning
#pragma warning (disable: 4660)
#pragma warning (disable: 4275) // warning: non dll-interface class used as...
#pragma warning (disable: 4305) // warning: truncation from 'const double' to 'const float'
#pragma warning(disable : 4786) // warning: long & complicated stl warning
#pragma warning(disable : 4788) // warning: long & complicated stl warning
#pragma warning(disable : 4660)
#pragma warning(disable : 4275) // warning: non dll-interface class used as...
#pragma warning(disable : 4305) // warning: truncation from 'const double' to
// 'const float'
#endif
class classA {
public:
classA() {
setProp("useless", 10);
};
classA() { setProp("useless", 10); };
~classA() {}
void printA() const {
if (hasProp("useless")) {
std::cout << "has useless\n";
@@ -31,9 +29,7 @@ class classA {
return (dp_props.find(key) != dp_props.end());
}
void setProp(const std::string &key, int val) {
dp_props[key] = val;
}
void setProp(const std::string &key, int val) { dp_props[key] = val; }
private:
std::map<std::string, int> dp_props;

View File

@@ -7,24 +7,22 @@
#define CLASSC_H
#ifdef WIN32
#pragma warning (disable: 4786) // warning: long & complicated stl warning
#pragma warning (disable: 4788) // warning: long & complicated stl warning
#pragma warning (disable: 4660)
#pragma warning (disable: 4275) // warning: non dll-interface class used as...
#pragma warning (disable: 4305) // warning: truncation from 'const double' to 'const float'
#pragma warning(disable : 4786) // warning: long & complicated stl warning
#pragma warning(disable : 4788) // warning: long & complicated stl warning
#pragma warning(disable : 4660)
#pragma warning(disable : 4275) // warning: non dll-interface class used as...
#pragma warning(disable : 4305) // warning: truncation from 'const double' to
// 'const float'
#endif
typedef std::pair<std::string, int> STR_INT;
typedef std::vector<STR_INT> PAIR_VECT;
class classC {
public:
classC() {
setProp("useless", 10);
};
classC() { setProp("useless", 10); };
~classC() {}
void printC() const {
if (hasProp("useless")) {
std::cout << "has useless\n";
@@ -38,7 +36,7 @@ class classC {
return true;
}
}
return false;
return false;
}
void setProp(const std::string &key, int val) {
@@ -48,7 +46,6 @@ class classC {
private:
PAIR_VECT dp_props;
};
#endif

View File

@@ -4,13 +4,9 @@
#include <DataStructs/ExplicitBitVect.h>
namespace python = boost::python;
void testCrossA(classA *A) {
A->printA();
};
void testCrossA(classA *A) { A->printA(); };
void testCrossC(classC *C) {
C->printC();
};
void testCrossC(classC *C) { C->printC(); };
classC *getClassC() {
classC *nc = new classC();
@@ -18,12 +14,11 @@ classC *getClassC() {
}
ExplicitBitVect *getEBV() {
ExplicitBitVect *ebv = new ExplicitBitVect(20);
ExplicitBitVect *ebv = new ExplicitBitVect(20);
return ebv;
}
BOOST_PYTHON_MODULE(moduleB)
{
BOOST_PYTHON_MODULE(moduleB) {
python::def("testCrossA", testCrossA);
python::def("testCrossC", testCrossC);
python::def("GetClassC", getClassC,

View File

@@ -2,11 +2,12 @@
#include <boost/python.hpp>
#ifdef WIN32
#pragma warning (disable: 4786) // warning: long & complicated stl warning
#pragma warning (disable: 4788) // warning: long & complicated stl warning
#pragma warning (disable: 4660)
#pragma warning (disable: 4275) // warning: non dll-interface class used as...
#pragma warning (disable: 4305) // warning: truncation from 'const double' to 'const float'
#pragma warning(disable : 4786) // warning: long & complicated stl warning
#pragma warning(disable : 4788) // warning: long & complicated stl warning
#pragma warning(disable : 4660)
#pragma warning(disable : 4275) // warning: non dll-interface class used as...
#pragma warning(disable : 4305) // warning: truncation from 'const double' to
// 'const float'
#endif
namespace python = boost::python;
@@ -14,17 +15,10 @@ namespace python = boost::python;
struct A_wrapper {
static void wrap() {
python::class_<classA>("classA", python::init<>())
.def("printA", &classA::printA)
;
.def("printA", &classA::printA);
};
};
void wrap_classA() {
A_wrapper::wrap();
}
BOOST_PYTHON_MODULE(moduleA)
{
wrap_classA();
}
void wrap_classA() { A_wrapper::wrap(); }
BOOST_PYTHON_MODULE(moduleA) { wrap_classA(); }

View File

@@ -1,31 +1,24 @@
#ifdef WIN32
#pragma warning (disable: 4786) // warning: long & complicated stl warning
#pragma warning (disable: 4788) // warning: long & complicated stl warning
#pragma warning (disable: 4660)
#pragma warning (disable: 4275) // warning: non dll-interface class used as...
#pragma warning (disable: 4305) // warning: truncation from 'const double' to 'const float'
#pragma warning(disable : 4786) // warning: long & complicated stl warning
#pragma warning(disable : 4788) // warning: long & complicated stl warning
#pragma warning(disable : 4660)
#pragma warning(disable : 4275) // warning: non dll-interface class used as...
#pragma warning(disable : 4305) // warning: truncation from 'const double' to
// 'const float'
#endif
#include "classC.h"
#include <boost/python.hpp>
namespace python = boost::python;
struct C_wrapper {
static void wrap() {
python::class_<classC>("classC", python::init<>())
.def("printC", &classC::printC)
;
.def("printC", &classC::printC);
};
};
void wrap_classC() {
C_wrapper::wrap();
}
BOOST_PYTHON_MODULE(moduleC)
{
wrap_classC();
}
void wrap_classC() { C_wrapper::wrap(); }
BOOST_PYTHON_MODULE(moduleC) { wrap_classC(); }

View File

@@ -7,21 +7,19 @@
#include <iostream>
#include <Invariant/Invariant.h>
int idx=0;
int idx = 0;
class Contained {
public:
public:
Contained() : _idx(idx) {
std::cout << "Contained CTOR:" << _idx << std::endl;
idx++;
}
~Contained() {
std::cout << "Contained DTOR:" << _idx << std::endl;
}
Contained( const Contained &other) : _idx(other._idx) {
~Contained() { std::cout << "Contained DTOR:" << _idx << std::endl; }
Contained(const Contained &other) : _idx(other._idx) {
std::cout << "Contained Copy CTOR:" << _idx << std::endl;
}
Contained &operator=( const Contained &other) {
Contained &operator=(const Contained &other) {
_idx = other._idx;
std::cout << "Contained Assign:" << _idx << std::endl;
return *this;
@@ -29,171 +27,168 @@ public:
int _idx;
};
template <typename T>
struct larger_of {
T operator()(T arg1, T arg2) { return arg1 > arg2 ? arg1 : arg2; };
};
template <typename T>
struct larger_of {
T operator()(T arg1,T arg2) { return arg1>arg2 ? arg1 : arg2; };
struct charptr_functor {
bool operator()(const char *s1, const char *s2) const {
return strcmp(s1, s2) < 0;
};
struct charptr_functor {
bool operator()(const char* s1, const char* s2) const
{
return strcmp(s1, s2) < 0;
};
};
};
class Dict {
public:
typedef std::map<const char *,boost::any,charptr_functor> DataType;
Dict() {_data.clear();};
Dict(const Dict &other) { _data = other._data; };
Dict &operator=(const Dict &other) {
_data = other._data;
return *this;
};
bool hasVal(const char *what) const {
//DataType::const_iterator i;
//std::cerr << "\t\tpre: ";
//for(i=_data.begin();i!=_data.end();i++){
//std::cerr << (*i).first << " ";
//}
//std::cerr << std::endl;
//std::cerr << "\thasVal: " << what << ": " << _data.count(what) << std::endl;
//std::cerr << "\t\tpost: ";
//for(i=_data.begin();i!=_data.end();i++){
//std::cerr << (*i).first << " ";
//}
//std::cerr << std::endl;
return _data.count(what);
};
bool hasVal(const std::string &what) const {
return hasVal(what.c_str());
};
//
// We're going to try and be somewhat crafty about this getVal stuff to make these
// containers a little bit more generic. The normal behavior here is that the
// value being queried must be directly castable to type T. We'll robustify a
// little bit by trying that and, if the cast fails, attempting a couple of
// other casts, which will then be lexically cast to type T.
//
template <typename T>
void getVal(const char *what,T &res) const {
PRECONDITION(hasVal(what),"getVal called on non-existent key");
const boost::any &val = _data.find(what)->second;
res = boost::any_cast<T>(val);
};
void getVal(const char *what,std::string &res) const {
PRECONDITION(hasVal(what),"getVal called on non-existent key");
const boost::any &val = _data.find(what)->second;
try{
res = boost::any_cast<std::string>(val);
} catch (const boost::bad_any_cast &) {
if(val.type()==typeid(int)){
res = boost::lexical_cast<std::string>(boost::any_cast<int>(val));
} else if(val.type()==typeid(long)){
res = boost::lexical_cast<std::string>(boost::any_cast<long>(val));
} else if(val.type()==typeid(float)){
res = boost::lexical_cast<std::string>(boost::any_cast<float>(val));
} else if(val.type()==typeid(double)){
res = boost::lexical_cast<std::string>(boost::any_cast<double>(val));
} else if(val.type()==typeid(const char *)){
res = std::string(boost::any_cast<const char *>(val));
} else {
throw;
}
}
};
template <typename T>
void getVal(const std::string &what,T &res) const { getVal(what.c_str(),res); };
template <typename T>
void setVal(const char *what,T &val){
_data[what] = boost::any(val);
};
template <typename T>
void setVal(const std::string &what,T &val) { setVal(what.c_str(),val); };
void clearVal(const char *what) { _data.erase(what); };
void reset() { _data.clear(); };
private:
DataType _data;
public:
typedef std::map<const char *, boost::any, charptr_functor> DataType;
Dict() { _data.clear(); };
Dict(const Dict &other) { _data = other._data; };
Dict &operator=(const Dict &other) {
_data = other._data;
return *this;
};
bool hasVal(const char *what) const {
// DataType::const_iterator i;
// std::cerr << "\t\tpre: ";
// for(i=_data.begin();i!=_data.end();i++){
// std::cerr << (*i).first << " ";
//}
// std::cerr << std::endl;
// std::cerr << "\thasVal: " << what << ": " << _data.count(what) <<
// std::endl;
// std::cerr << "\t\tpost: ";
// for(i=_data.begin();i!=_data.end();i++){
// std::cerr << (*i).first << " ";
//}
// std::cerr << std::endl;
return _data.count(what);
};
bool hasVal(const std::string &what) const { return hasVal(what.c_str()); };
void test1(){
//
// We're going to try and be somewhat crafty about this getVal stuff to make
// these
// containers a little bit more generic. The normal behavior here is that
// the
// value being queried must be directly castable to type T. We'll robustify
// a
// little bit by trying that and, if the cast fails, attempting a couple of
// other casts, which will then be lexically cast to type T.
//
template <typename T>
void getVal(const char *what, T &res) const {
PRECONDITION(hasVal(what), "getVal called on non-existent key");
const boost::any &val = _data.find(what)->second;
res = boost::any_cast<T>(val);
};
void getVal(const char *what, std::string &res) const {
PRECONDITION(hasVal(what), "getVal called on non-existent key");
const boost::any &val = _data.find(what)->second;
try {
res = boost::any_cast<std::string>(val);
} catch (const boost::bad_any_cast &) {
if (val.type() == typeid(int)) {
res = boost::lexical_cast<std::string>(boost::any_cast<int>(val));
} else if (val.type() == typeid(long)) {
res = boost::lexical_cast<std::string>(boost::any_cast<long>(val));
} else if (val.type() == typeid(float)) {
res = boost::lexical_cast<std::string>(boost::any_cast<float>(val));
} else if (val.type() == typeid(double)) {
res = boost::lexical_cast<std::string>(boost::any_cast<double>(val));
} else if (val.type() == typeid(const char *)) {
res = std::string(boost::any_cast<const char *>(val));
} else {
throw;
}
}
};
template <typename T>
void getVal(const std::string &what, T &res) const {
getVal(what.c_str(), res);
};
template <typename T>
void setVal(const char *what, T &val) {
_data[what] = boost::any(val);
};
template <typename T>
void setVal(const std::string &what, T &val) {
setVal(what.c_str(), val);
};
void clearVal(const char *what) { _data.erase(what); };
void reset() { _data.clear(); };
private:
DataType _data;
};
void test1() {
// basic containers
std::cout << "----------- TEST1 -----------" << std::endl;
Contained c;
TEST_ASSERT(c._idx==0);
TEST_ASSERT(c._idx == 0);
Contained d(c);
TEST_ASSERT(d._idx==0);
TEST_ASSERT(d._idx == 0);
Contained e;
TEST_ASSERT(e._idx==1);
TEST_ASSERT(e._idx == 1);
std::cout << "assign" << std::endl;
e=c;
TEST_ASSERT(e._idx==0);
e = c;
TEST_ASSERT(e._idx == 0);
std::cout <<" Done" << std::endl;
std::cout << " Done" << std::endl;
}
void test2(){
void test2() {
// shared pointers with containers
std::cout << "----------- TEST2 -----------" << std::endl;
typedef boost::shared_ptr<Contained> CONT_SPTR;
CONT_SPTR s_d(new Contained());
TEST_ASSERT(s_d.use_count()==1);
CONT_SPTR s_e=s_d;
TEST_ASSERT(s_e.use_count()==2);
TEST_ASSERT(s_d.use_count() == 1);
CONT_SPTR s_e = s_d;
TEST_ASSERT(s_e.use_count() == 2);
s_d.reset();
TEST_ASSERT(s_e.use_count()==1);
TEST_ASSERT(s_e.use_count() == 1);
s_e.reset();
std::cout <<" Done" << std::endl;
std::cout << " Done" << std::endl;
}
void test3(){
void test3() {
// basic dict stuff
std::cout << "----------- TEST3 -----------" << std::endl;
Dict dict;
TEST_ASSERT(!dict.hasVal("foo"));
int tmp=4;
dict.setVal("foo",tmp);
int tmp = 4;
dict.setVal("foo", tmp);
TEST_ASSERT(dict.hasVal("foo"));
tmp=0;
dict.getVal("foo",tmp);
TEST_ASSERT(tmp==4);
tmp = 0;
dict.getVal("foo", tmp);
TEST_ASSERT(tmp == 4);
Dict d2(dict);
TEST_ASSERT(d2.hasVal("foo"));
d2.getVal("foo",tmp);
TEST_ASSERT(tmp==4);
d2.getVal("foo", tmp);
TEST_ASSERT(tmp == 4);
Dict d3;
d3=d2;
d3 = d2;
TEST_ASSERT(d3.hasVal("foo"));
d3.getVal("foo",tmp);
TEST_ASSERT(tmp==4);
std::cout <<" Done" << std::endl;
d3.getVal("foo", tmp);
TEST_ASSERT(tmp == 4);
std::cout << " Done" << std::endl;
}
void test4(){
void test4() {
// dict containing Contained objects
std::cout << "----------- TEST4 -----------" << std::endl;
@@ -201,33 +196,31 @@ void test4(){
TEST_ASSERT(!dict.hasVal("foo"));
Contained tmp;
int hold=tmp._idx;
int hold = tmp._idx;
std::cout << " set" << std::endl;
dict.setVal("foo",tmp);
dict.setVal("foo", tmp);
std::cout << " query" << std::endl;
TEST_ASSERT(dict.hasVal("foo"));
std::cout << " get" << std::endl;
dict.getVal("foo",tmp);
TEST_ASSERT(tmp._idx==hold);
dict.getVal("foo", tmp);
TEST_ASSERT(tmp._idx == hold);
std::cout << " copy" << std::endl;
Dict d2(dict);
TEST_ASSERT(d2.hasVal("foo"));
d2.getVal("foo",tmp);
TEST_ASSERT(tmp._idx==hold);
d2.getVal("foo", tmp);
TEST_ASSERT(tmp._idx == hold);
Dict d3;
d3=d2;
d3 = d2;
TEST_ASSERT(d3.hasVal("foo"));
d3.getVal("foo",tmp);
TEST_ASSERT(tmp._idx==hold);
d3.getVal("foo", tmp);
TEST_ASSERT(tmp._idx == hold);
std::cout <<" Done" << std::endl;
std::cout << " Done" << std::endl;
}
void test5(){
void test5() {
// dict containing sptrs to Contained objects
std::cout << "----------- TEST5 -----------" << std::endl;
typedef boost::shared_ptr<Contained> CONT_SPTR;
@@ -236,38 +229,34 @@ void test5(){
TEST_ASSERT(!dict.hasVal("foo"));
CONT_SPTR tmp(new Contained());
int hold=tmp->_idx;
int hold = tmp->_idx;
std::cout << " set" << std::endl;
dict.setVal("foo",tmp);
dict.setVal("foo", tmp);
std::cout << " query" << std::endl;
TEST_ASSERT(dict.hasVal("foo"));
std::cout << " get" << std::endl;
dict.getVal("foo",tmp);
TEST_ASSERT(tmp->_idx==hold);
dict.getVal("foo", tmp);
TEST_ASSERT(tmp->_idx == hold);
std::cout << " copy" << std::endl;
Dict d2(dict);
TEST_ASSERT(d2.hasVal("foo"));
d2.getVal("foo",tmp);
TEST_ASSERT(tmp->_idx==hold);
d2.getVal("foo", tmp);
TEST_ASSERT(tmp->_idx == hold);
Dict d3;
d3=d2;
d3 = d2;
TEST_ASSERT(d3.hasVal("foo"));
d3.getVal("foo",tmp);
TEST_ASSERT(tmp->_idx==hold);
d3.getVal("foo", tmp);
TEST_ASSERT(tmp->_idx == hold);
std::cout <<" Done" << std::endl;
std::cout << " Done" << std::endl;
}
int main(){
int main() {
test1();
test2();
test3();
test4();
test5();
}

View File

@@ -2,25 +2,23 @@
#include <iostream>
#include <string>
#ifndef CLASSA_H
#define CLASSA_H
#ifdef WIN32
#pragma warning (disable: 4786) // warning: long & complicated stl warning
#pragma warning (disable: 4788) // warning: long & complicated stl warning
#pragma warning (disable: 4660)
#pragma warning (disable: 4275) // warning: non dll-interface class used as...
#pragma warning (disable: 4305) // warning: truncation from 'const double' to 'const float'
#pragma warning(disable : 4786) // warning: long & complicated stl warning
#pragma warning(disable : 4788) // warning: long & complicated stl warning
#pragma warning(disable : 4660)
#pragma warning(disable : 4275) // warning: non dll-interface class used as...
#pragma warning(disable : 4305) // warning: truncation from 'const double' to
// 'const float'
#endif
class classA {
public:
classA() {
setProp("useless", 10);
};
classA() { setProp("useless", 10); };
~classA() {}
void printA() const {
if (hasProp("useless")) {
std::cout << "has useless\n";
@@ -31,9 +29,7 @@ class classA {
return (dp_props.find(key) != dp_props.end());
}
void setProp(const std::string &key, int val) {
dp_props[key] = val;
}
void setProp(const std::string &key, int val) { dp_props[key] = val; }
private:
std::map<std::string, int> dp_props;

View File

@@ -7,24 +7,22 @@
#define CLASSC_H
#ifdef WIN32
#pragma warning (disable: 4786) // warning: long & complicated stl warning
#pragma warning (disable: 4788) // warning: long & complicated stl warning
#pragma warning (disable: 4660)
#pragma warning (disable: 4275) // warning: non dll-interface class used as...
#pragma warning (disable: 4305) // warning: truncation from 'const double' to 'const float'
#pragma warning(disable : 4786) // warning: long & complicated stl warning
#pragma warning(disable : 4788) // warning: long & complicated stl warning
#pragma warning(disable : 4660)
#pragma warning(disable : 4275) // warning: non dll-interface class used as...
#pragma warning(disable : 4305) // warning: truncation from 'const double' to
// 'const float'
#endif
typedef std::pair<std::string, int> STR_INT;
typedef std::vector<STR_INT> PAIR_VECT;
class classC {
public:
classC() {
setProp("useless", 10);
};
classC() { setProp("useless", 10); };
~classC() {}
void printC() const {
if (hasProp("useless")) {
std::cout << "has useless\n";
@@ -38,7 +36,7 @@ class classC {
return true;
}
}
return false;
return false;
}
void setProp(const std::string &key, int val) {
@@ -48,7 +46,6 @@ class classC {
private:
PAIR_VECT dp_props;
};
#endif

View File

@@ -4,18 +4,11 @@
namespace python = boost::python;
void testCrossA(classA *A) {
A->printA();
};
void testCrossA(classA *A) { A->printA(); };
void testCrossC(classC *C) {
C->printC();
};
void testCrossC(classC *C) { C->printC(); };
BOOST_PYTHON_MODULE(moduleB)
{
BOOST_PYTHON_MODULE(moduleB) {
python::def("testCrossA", testCrossA);
python::def("testCrossC", testCrossC);
}

View File

@@ -2,11 +2,12 @@
#include <boost/python.hpp>
#ifdef WIN32
#pragma warning (disable: 4786) // warning: long & complicated stl warning
#pragma warning (disable: 4788) // warning: long & complicated stl warning
#pragma warning (disable: 4660)
#pragma warning (disable: 4275) // warning: non dll-interface class used as...
#pragma warning (disable: 4305) // warning: truncation from 'const double' to 'const float'
#pragma warning(disable : 4786) // warning: long & complicated stl warning
#pragma warning(disable : 4788) // warning: long & complicated stl warning
#pragma warning(disable : 4660)
#pragma warning(disable : 4275) // warning: non dll-interface class used as...
#pragma warning(disable : 4305) // warning: truncation from 'const double' to
// 'const float'
#endif
namespace python = boost::python;
@@ -14,17 +15,10 @@ namespace python = boost::python;
struct A_wrapper {
static void wrap() {
python::class_<classA>("classA", python::init<>())
.def("printA", &classA::printA)
;
.def("printA", &classA::printA);
};
};
void wrap_classA() {
A_wrapper::wrap();
}
BOOST_PYTHON_MODULE(moduleA)
{
wrap_classA();
}
void wrap_classA() { A_wrapper::wrap(); }
BOOST_PYTHON_MODULE(moduleA) { wrap_classA(); }

View File

@@ -2,11 +2,12 @@
#include <boost/python.hpp>
#ifdef WIN32
#pragma warning (disable: 4786) // warning: long & complicated stl warning
#pragma warning (disable: 4788) // warning: long & complicated stl warning
#pragma warning (disable: 4660)
#pragma warning (disable: 4275) // warning: non dll-interface class used as...
#pragma warning (disable: 4305) // warning: truncation from 'const double' to 'const float'
#pragma warning(disable : 4786) // warning: long & complicated stl warning
#pragma warning(disable : 4788) // warning: long & complicated stl warning
#pragma warning(disable : 4660)
#pragma warning(disable : 4275) // warning: non dll-interface class used as...
#pragma warning(disable : 4305) // warning: truncation from 'const double' to
// 'const float'
#endif
namespace python = boost::python;
@@ -14,17 +15,10 @@ namespace python = boost::python;
struct C_wrapper {
static void wrap() {
python::class_<classC>("classC", python::init<>())
.def("printC", &classC::printC)
;
.def("printC", &classC::printC);
};
};
void wrap_classC() {
C_wrapper::wrap();
}
BOOST_PYTHON_MODULE(moduleC)
{
wrap_classC();
}
void wrap_classC() { C_wrapper::wrap(); }
BOOST_PYTHON_MODULE(moduleC) { wrap_classC(); }

View File

@@ -6,10 +6,6 @@
namespace python = boost::python;
BOOST_PYTHON_MODULE(moda)
{
python::class_<ClassA>("ClassA")
.def("Get4",&ClassA::get4)
;
BOOST_PYTHON_MODULE(moda) {
python::class_<ClassA>("ClassA").def("Get4", &ClassA::get4);
}

View File

@@ -6,13 +6,10 @@
namespace python = boost::python;
BOOST_PYTHON_MODULE(modb)
{
python::class_<ClassB>("ClassB")
.def("Get3",&ClassB::get3)
.def("ReturnOther",&ClassB::returnOther,
python::return_value_policy<python::manage_new_object>())
.def("AcceptOther",&ClassB::acceptOther)
;
BOOST_PYTHON_MODULE(modb) {
python::class_<ClassB>("ClassB")
.def("Get3", &ClassB::get3)
.def("ReturnOther", &ClassB::returnOther,
python::return_value_policy<python::manage_new_object>())
.def("AcceptOther", &ClassB::acceptOther);
}

View File

@@ -4,15 +4,14 @@
#include <boost/python.hpp>
class ClassA{
public:
class ClassA {
public:
int get4() { return 4; };
};
class ClassB{
public:
class ClassB {
public:
ClassA *returnOther() { return new ClassA; };
int get3() { return 3; };
int acceptOther(ClassA *other) { return other->get4();};
int acceptOther(ClassA *other) { return other->get4(); };
};

View File

@@ -7,16 +7,14 @@
#define PY_ARRAY_UNIQUE_SYMBOL RD_array_API
#include "numpy/oldnumeric.h"
namespace python = boost::python;
double GetFirstElement(python::numeric::array &x){
double GetFirstElement(python::numeric::array &x) {
PyArrayObject *ptr = (PyArrayObject *)x.ptr();
void* data = ptr->data;
double res=0.0;
void *data = ptr->data;
double res = 0.0;
switch(ptr->descr->type_num)
{
switch (ptr->descr->type_num) {
case PyArray_DOUBLE:
res = ((double *)data)[0];
break;
@@ -29,14 +27,8 @@ double GetFirstElement(python::numeric::array &x){
case PyArray_INT:
res = (double)((int *)data)[0];
break;
}
}
return res;
}
BOOST_PYTHON_MODULE(linalg)
{
python::def("GetFirstElement",GetFirstElement);
}
BOOST_PYTHON_MODULE(linalg) { python::def("GetFirstElement", GetFirstElement); }

View File

@@ -5,18 +5,16 @@
#include <boost/python.hpp>
namespace python = boost::python;
// ------
//
// This one is relatively easy:
// expose a function with a default argument
//
// ------
int func2(int v1,int plus=3);
int func2(int v1,int plus) {return v1+plus;};
int func2(int v1, int plus = 3);
int func2(int v1, int plus) { return v1 + plus; };
BOOST_PYTHON_FUNCTION_OVERLOADS(f2_overloads, func2, 1, 2)
// ------
//
// More complex:
@@ -24,25 +22,21 @@ BOOST_PYTHON_FUNCTION_OVERLOADS(f2_overloads, func2, 1, 2)
//
// ------
template <typename T>
T func(T v1,int plus=3);
T func(T v1, int plus = 3);
template <typename T>
T func(T v1,int plus) {
return v1+plus;
T func(T v1, int plus) {
return v1 + plus;
}
int (*f1_int)(int,int=3)=func; // gotta love that syntax!
int (*f1_int)(int, int = 3) = func; // gotta love that syntax!
BOOST_PYTHON_FUNCTION_OVERLOADS(f1_int_overloads, f1_int, 1, 2)
float (*f1_float)(float,int=3)=func;
float (*f1_float)(float, int = 3) = func;
BOOST_PYTHON_FUNCTION_OVERLOADS(f1_float_overloads, f1_float, 1, 2)
BOOST_PYTHON_MODULE(overloads) {
python::def("f2", func2, f2_overloads(python::args("v1", "plus")));
BOOST_PYTHON_MODULE(overloads)
{
python::def("f2",func2,f2_overloads(python::args("v1","plus")));
python::def("f1",f1_int,f1_int_overloads(python::args("v1","plus")));
python::def("f1",f1_float,f1_float_overloads(python::args("v1","plus")));
python::def("f1", f1_int, f1_int_overloads(python::args("v1", "plus")));
python::def("f1", f1_float, f1_float_overloads(python::args("v1", "plus")));
}

View File

@@ -6,29 +6,22 @@
#include <boost/python/detail/api_placeholder.hpp>
namespace python = boost::python;
// ----------
//
// In both cases here, the restriction on the object passed in is
// solely that it support the functions used.
//
// ----------
int seq_len(python::object seq){
return python::len(seq);
}
int seq_len(python::object seq) { return python::len(seq); }
int sum_first2(python::object seq){
int sum_first2(python::object seq) {
int sum;
sum = python::extract<int>(seq[0]) + python::extract<int>(seq[1]);
return sum;
}
BOOST_PYTHON_MODULE(python_objs)
{
python::def("seq_len",seq_len);
python::def("sum_first2",sum_first2);
BOOST_PYTHON_MODULE(python_objs) {
python::def("seq_len", seq_len);
python::def("sum_first2", sum_first2);
}

View File

@@ -3,7 +3,6 @@
// accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
#include <boost/python.hpp>
#include <boost/python/suite/indexing/vector_indexing_suite.hpp>
#include <boost/python/detail/api_placeholder.hpp>
@@ -14,90 +13,68 @@
namespace python = boost::python;
class DemoKlass {
public:
explicit DemoKlass(int v) : val_(v) {};
public:
explicit DemoKlass(int v) : val_(v){};
int getVal() const { return val_; };
private:
private:
int val_;
};
typedef boost::shared_ptr<DemoKlass> DemoKlassSPtr;
typedef std::vector<DemoKlass*> DemoKlassPtrVect;
typedef std::vector<DemoKlass *> DemoKlassPtrVect;
typedef std::vector<DemoKlassSPtr> DemoKlassSPtrVect;
DemoKlass *buildPtr(int v) {
return new DemoKlass(v);
}
DemoKlassSPtr buildSPtr(int v) {
return DemoKlassSPtr(new DemoKlass(v));
}
DemoKlass *buildPtr(int v) { return new DemoKlass(v); }
DemoKlassSPtr buildSPtr(int v) { return DemoKlassSPtr(new DemoKlass(v)); }
DemoKlassPtrVect buildPtrVector(unsigned int sz){
DemoKlassPtrVect buildPtrVector(unsigned int sz) {
DemoKlassPtrVect res;
for(unsigned int i=0;i<sz;i++){
for (unsigned int i = 0; i < sz; i++) {
res.push_back(new DemoKlass(i));
}
return res;
}
DemoKlassSPtrVect buildSPtrVector(unsigned int sz){
DemoKlassSPtrVect buildSPtrVector(unsigned int sz) {
DemoKlassSPtrVect res;
for(unsigned int i=0;i<sz;i++){
for (unsigned int i = 0; i < sz; i++) {
res.push_back(DemoKlassSPtr(new DemoKlass(i)));
}
return res;
}
class DemoContainer {
public:
public:
typedef DemoKlassSPtrVect::iterator iterator;
typedef DemoKlassSPtrVect::const_iterator const_iterator;
explicit DemoContainer(unsigned int sz) {
vect_ = buildSPtrVector(sz);
}
iterator begin() {
return vect_.begin();
}
iterator end() {
return vect_.end();
}
const_iterator begin() const {
return vect_.begin();
}
const_iterator end() const {
return vect_.end();
}
explicit DemoContainer(unsigned int sz) { vect_ = buildSPtrVector(sz); }
iterator begin() { return vect_.begin(); }
iterator end() { return vect_.end(); }
const_iterator begin() const { return vect_.begin(); }
const_iterator end() const { return vect_.end(); }
private:
private:
DemoKlassSPtrVect vect_;
};
BOOST_PYTHON_MODULE(SPtrTestModule) {
python::class_<DemoKlass, DemoKlassSPtr>("DemoKlass", "demo class",
python::init<int>())
.def("GetVal", &DemoKlass::getVal);
BOOST_PYTHON_MODULE(SPtrTestModule)
{
python::class_<DemoKlass,DemoKlassSPtr >("DemoKlass","demo class",python::init<int>())
.def("GetVal",&DemoKlass::getVal)
;
python::def("buildPtr",buildPtr,python::return_value_policy<python::manage_new_object>());
python::def("buildSPtr",buildSPtr);
python::def("buildPtr", buildPtr,
python::return_value_policy<python::manage_new_object>());
python::def("buildSPtr", buildSPtr);
python::class_<DemoKlassPtrVect>("DemoKlassPtrVec")
.def(python::vector_indexing_suite<DemoKlassPtrVect>())
;
python::def("buildPtrVector",buildPtrVector);
.def(python::vector_indexing_suite<DemoKlassPtrVect>());
python::def("buildPtrVector", buildPtrVector);
python::class_<DemoKlassSPtrVect>("DemoKlassSPtrVec")
.def(python::vector_indexing_suite<DemoKlassSPtrVect,true>())
;
python::def("buildSPtrVector",buildSPtrVector);
python::class_<DemoContainer>("DemoContainer","demo container",python::init<unsigned int>())
.def("__iter__",python::iterator<DemoContainer>())
;
.def(python::vector_indexing_suite<DemoKlassSPtrVect, true>());
python::def("buildSPtrVector", buildSPtrVector);
python::class_<DemoContainer>("DemoContainer", "demo container",
python::init<unsigned int>())
.def("__iter__", python::iterator<DemoContainer>());
}

View File

@@ -1,5 +1,5 @@
// $Id$
//
//
// Copyright (C) 2007,2008 Greg Landrum
//
// @@ All Rights Reserved @@
@@ -24,56 +24,54 @@ SQLITE_EXTENSION_INIT1
#include <string>
#include <map>
std::string stringFromTextArg(sqlite3_value *arg){
const unsigned char *text=sqlite3_value_text(arg);
int nBytes=sqlite3_value_bytes(arg);
std::string res((const char *)text,nBytes);
std::string stringFromTextArg(sqlite3_value *arg) {
const unsigned char *text = sqlite3_value_text(arg);
int nBytes = sqlite3_value_bytes(arg);
std::string res((const char *)text, nBytes);
return res;
}
std::string stringFromBlobArg(sqlite3_value *arg){
const void *blob=sqlite3_value_blob(arg);
int nBytes=sqlite3_value_bytes(arg);
std::string res((const char *)blob,nBytes);
std::string stringFromBlobArg(sqlite3_value *arg) {
const void *blob = sqlite3_value_blob(arg);
int nBytes = sqlite3_value_bytes(arg);
std::string res((const char *)blob, nBytes);
return res;
}
RDKit::ROMol *molFromBlobArg(sqlite3_value *arg){
std::string pkl=stringFromBlobArg(arg);
RDKit::ROMol *molFromBlobArg(sqlite3_value *arg) {
std::string pkl = stringFromBlobArg(arg);
RDKit::ROMol *m;
try{
try {
m = new RDKit::ROMol(pkl);
} catch (RDKit::MolPicklerException &){
m=0;
} catch (RDKit::MolPicklerException &) {
m = 0;
}
return m;
}
ExplicitBitVect *ebvFromBlobArg(sqlite3_value *arg){
std::string pkl=stringFromBlobArg(arg);
ExplicitBitVect *ebvFromBlobArg(sqlite3_value *arg) {
std::string pkl = stringFromBlobArg(arg);
ExplicitBitVect *ebv;
try{
try {
ebv = new ExplicitBitVect(pkl);
} catch (ValueErrorException &){
ebv=0;
} catch (ValueErrorException &) {
ebv = 0;
}
return ebv;
}
template <typename T>
RDKit::SparseIntVect<T> *sivFromBlobArg(sqlite3_value *arg){
std::string pkl=stringFromBlobArg(arg);
RDKit::SparseIntVect<T> *sivFromBlobArg(sqlite3_value *arg) {
std::string pkl = stringFromBlobArg(arg);
RDKit::SparseIntVect<T> *siv;
try{
try {
siv = new RDKit::SparseIntVect<T>(pkl);
} catch (ValueErrorException &){
siv=0;
} catch (ValueErrorException &) {
siv = 0;
}
return siv;
}
/* ---------------------------------
Benchmarking results.
@@ -82,263 +80,249 @@ RDKit::SparseIntVect<T> *sivFromBlobArg(sqlite3_value *arg){
Simple access: select count(*) from molecules where length(molpkl)>40;
0.3s
depickle : select count(*) from molecules where rdk_molNumAtoms(molpkl)>40;
depickle : select count(*) from molecules where
rdk_molNumAtoms(molpkl)>40;
11.3s
substruct1 : select count(*) from molecules where
substruct1 : select count(*) from molecules where
rdk_molHasSubstruct(molpkl,'c1ncncn1');
18.0s
substruct2 : select count(*) from molecules where
substruct2 : select count(*) from molecules where
rdk_molHasSubstruct(molpkl,'[#6;r10]');
15.8
3 Oct 2007:
depickle : select count(*) from molecules where rdk_molNumAtoms(molpkl)>40;
depickle : select count(*) from molecules where
rdk_molNumAtoms(molpkl)>40;
9.4s
mw : select count(*) from molecules where rdk_molAMW(molpkl)<200;
9.7s
--------------------------------- */
static void numAtomsFunc(
sqlite3_context *context,
int argc,
sqlite3_value **argv
){
RDKit::ROMol *m=molFromBlobArg(argv[0]);
if(m){
int res=m->getNumAtoms();
static void numAtomsFunc(sqlite3_context *context, int argc,
sqlite3_value **argv) {
RDKit::ROMol *m = molFromBlobArg(argv[0]);
if (m) {
int res = m->getNumAtoms();
delete m;
sqlite3_result_int(context, res);
} else {
std::string errorMsg="BLOB could not be converted into a molecule";
sqlite3_result_error(context,errorMsg.c_str(),errorMsg.length());
std::string errorMsg = "BLOB could not be converted into a molecule";
sqlite3_result_error(context, errorMsg.c_str(), errorMsg.length());
}
}
static void molWtFunc(
sqlite3_context *context,
int argc,
sqlite3_value **argv
){
RDKit::ROMol *m=molFromBlobArg(argv[0]);
if(m){
double res=RDKit::Descriptors::CalcAMW(*m);
static void molWtFunc(sqlite3_context *context, int argc,
sqlite3_value **argv) {
RDKit::ROMol *m = molFromBlobArg(argv[0]);
if (m) {
double res = RDKit::Descriptors::CalcAMW(*m);
delete m;
sqlite3_result_double(context, res);
} else {
std::string errorMsg="BLOB could not be converted into a molecule";
sqlite3_result_error(context,errorMsg.c_str(),errorMsg.length());
std::string errorMsg = "BLOB could not be converted into a molecule";
sqlite3_result_error(context, errorMsg.c_str(), errorMsg.length());
}
}
static void molLogPFunc(
sqlite3_context *context,
int argc,
sqlite3_value **argv
){
RDKit::ROMol *m=molFromBlobArg(argv[0]);
if(m){
double res,tmp;
RDKit::Descriptors::CalcCrippenDescriptors(*m,res,tmp);
static void molLogPFunc(sqlite3_context *context, int argc,
sqlite3_value **argv) {
RDKit::ROMol *m = molFromBlobArg(argv[0]);
if (m) {
double res, tmp;
RDKit::Descriptors::CalcCrippenDescriptors(*m, res, tmp);
delete m;
sqlite3_result_double(context, res);
} else {
std::string errorMsg="BLOB could not be converted into a molecule";
sqlite3_result_error(context,errorMsg.c_str(),errorMsg.length());
std::string errorMsg = "BLOB could not be converted into a molecule";
sqlite3_result_error(context, errorMsg.c_str(), errorMsg.length());
}
}
static void smilesToBlob(
sqlite3_context *context,
int argc,
sqlite3_value **argv
){
std::string smiles=stringFromTextArg(argv[0]);
RDKit::ROMol *m=0;
try{
m=RDKit::SmilesToMol(smiles);
} catch(RDKit::MolSanitizeException &){
m=0;
static void smilesToBlob(sqlite3_context *context, int argc,
sqlite3_value **argv) {
std::string smiles = stringFromTextArg(argv[0]);
RDKit::ROMol *m = 0;
try {
m = RDKit::SmilesToMol(smiles);
} catch (RDKit::MolSanitizeException &) {
m = 0;
}
if(m){
if (m) {
std::string text;
RDKit::MolPickler::pickleMol(*m,text);
RDKit::MolPickler::pickleMol(*m, text);
delete m;
sqlite3_result_blob(context, text.c_str(), text.length(), SQLITE_TRANSIENT );
sqlite3_result_blob(context, text.c_str(), text.length(), SQLITE_TRANSIENT);
} else {
std::string errorMsg="SMILES could not be converted into a molecule";
sqlite3_result_error(context,errorMsg.c_str(),errorMsg.length());
std::string errorMsg = "SMILES could not be converted into a molecule";
sqlite3_result_error(context, errorMsg.c_str(), errorMsg.length());
}
}
static void molHasSubstruct(
sqlite3_context *context,
int argc,
sqlite3_value **argv
){
RDKit::ROMol *m=molFromBlobArg(argv[0]);
if(!m){
std::string errorMsg="BLOB (argument 1) could not be converted into a molecule";
sqlite3_result_error(context,errorMsg.c_str(),errorMsg.length());
static void molHasSubstruct(sqlite3_context *context, int argc,
sqlite3_value **argv) {
RDKit::ROMol *m = molFromBlobArg(argv[0]);
if (!m) {
std::string errorMsg =
"BLOB (argument 1) could not be converted into a molecule";
sqlite3_result_error(context, errorMsg.c_str(), errorMsg.length());
return;
}
std::string smarts=stringFromTextArg(argv[1]);
std::string smarts = stringFromTextArg(argv[1]);
std::map<std::string,boost::any> &molMap=
*static_cast<std::map<std::string,boost::any> *>(sqlite3_user_data(context));
RDKit::ROMol *patt=0;
if(molMap.find(smarts)!=molMap.end()){
patt=boost::any_cast<RDKit::ROMOL_SPTR>(molMap[smarts]).get();
std::map<std::string, boost::any> &molMap =
*static_cast<std::map<std::string, boost::any> *>(
sqlite3_user_data(context));
RDKit::ROMol *patt = 0;
if (molMap.find(smarts) != molMap.end()) {
patt = boost::any_cast<RDKit::ROMOL_SPTR>(molMap[smarts]).get();
} else {
patt=static_cast<RDKit::ROMol *>(RDKit::SmartsToMol(smarts));
molMap[smarts]=boost::any(RDKit::ROMOL_SPTR(patt));
patt = static_cast<RDKit::ROMol *>(RDKit::SmartsToMol(smarts));
molMap[smarts] = boost::any(RDKit::ROMOL_SPTR(patt));
}
if(!patt){
std::string errorMsg="SMARTS (argument 2) could not be converted into a molecule";
sqlite3_result_error(context,errorMsg.c_str(),errorMsg.length());
if (!patt) {
std::string errorMsg =
"SMARTS (argument 2) could not be converted into a molecule";
sqlite3_result_error(context, errorMsg.c_str(), errorMsg.length());
return;
}
RDKit::MatchVectType match;
int res=RDKit::SubstructMatch(*m,*patt,match,true,false,true);
int res = RDKit::SubstructMatch(*m, *patt, match, true, false, true);
delete m;
sqlite3_result_int(context, res);
}
static void molSubstructCount(
sqlite3_context *context,
int argc,
sqlite3_value **argv
){
RDKit::ROMol *m=molFromBlobArg(argv[0]);
if(!m){
std::string errorMsg="BLOB (argument 1) could not be converted into a molecule";
sqlite3_result_error(context,errorMsg.c_str(),errorMsg.length());
static void molSubstructCount(sqlite3_context *context, int argc,
sqlite3_value **argv) {
RDKit::ROMol *m = molFromBlobArg(argv[0]);
if (!m) {
std::string errorMsg =
"BLOB (argument 1) could not be converted into a molecule";
sqlite3_result_error(context, errorMsg.c_str(), errorMsg.length());
return;
}
std::string smarts=stringFromTextArg(argv[1]);
std::string smarts = stringFromTextArg(argv[1]);
std::map<std::string,boost::any> &molMap=
*static_cast<std::map<std::string,boost::any> *>(sqlite3_user_data(context));
RDKit::ROMol *patt=0;
if(molMap.find(smarts)!=molMap.end()){
patt=boost::any_cast<RDKit::ROMOL_SPTR>(molMap[smarts]).get();
std::map<std::string, boost::any> &molMap =
*static_cast<std::map<std::string, boost::any> *>(
sqlite3_user_data(context));
RDKit::ROMol *patt = 0;
if (molMap.find(smarts) != molMap.end()) {
patt = boost::any_cast<RDKit::ROMOL_SPTR>(molMap[smarts]).get();
} else {
patt=static_cast<RDKit::ROMol *>(RDKit::SmartsToMol(smarts));
molMap[smarts]=boost::any(RDKit::ROMOL_SPTR(patt));
patt = static_cast<RDKit::ROMol *>(RDKit::SmartsToMol(smarts));
molMap[smarts] = boost::any(RDKit::ROMOL_SPTR(patt));
}
if(!patt){
std::string errorMsg="SMARTS (argument 2) could not be converted into a molecule";
sqlite3_result_error(context,errorMsg.c_str(),errorMsg.length());
if (!patt) {
std::string errorMsg =
"SMARTS (argument 2) could not be converted into a molecule";
sqlite3_result_error(context, errorMsg.c_str(), errorMsg.length());
return;
}
std::vector<RDKit::MatchVectType> matches;
int res=RDKit::SubstructMatch(*m,*patt,matches,true,true,false);
int res = RDKit::SubstructMatch(*m, *patt, matches, true, true, false);
delete m;
sqlite3_result_int(context, res);
}
static void blobToRDKitFingerprint(
sqlite3_context *context,
int argc,
sqlite3_value **argv
){
RDKit::ROMol *m=molFromBlobArg(argv[0]);
if(!m){
std::string errorMsg="BLOB (argument 1) could not be converted into a molecule";
sqlite3_result_error(context,errorMsg.c_str(),errorMsg.length());
static void blobToRDKitFingerprint(sqlite3_context *context, int argc,
sqlite3_value **argv) {
RDKit::ROMol *m = molFromBlobArg(argv[0]);
if (!m) {
std::string errorMsg =
"BLOB (argument 1) could not be converted into a molecule";
sqlite3_result_error(context, errorMsg.c_str(), errorMsg.length());
return;
}
ExplicitBitVect *fp=RDKit::DaylightFingerprintMol(*m,1,7,2048,4,true,0.3,128);
std::string text=fp->toString();
ExplicitBitVect *fp =
RDKit::DaylightFingerprintMol(*m, 1, 7, 2048, 4, true, 0.3, 128);
std::string text = fp->toString();
delete fp;
delete m;
sqlite3_result_text(context, text.c_str(), text.length(), SQLITE_TRANSIENT );
sqlite3_result_text(context, text.c_str(), text.length(), SQLITE_TRANSIENT);
}
static void blobToAtomPairFingerprint(
sqlite3_context *context,
int argc,
sqlite3_value **argv
){
RDKit::ROMol *m=molFromBlobArg(argv[0]);
if(!m){
std::string errorMsg="BLOB (argument 1) could not be converted into a molecule";
sqlite3_result_error(context,errorMsg.c_str(),errorMsg.length());
static void blobToAtomPairFingerprint(sqlite3_context *context, int argc,
sqlite3_value **argv) {
RDKit::ROMol *m = molFromBlobArg(argv[0]);
if (!m) {
std::string errorMsg =
"BLOB (argument 1) could not be converted into a molecule";
sqlite3_result_error(context, errorMsg.c_str(), errorMsg.length());
return;
}
RDKit::SparseIntVect<int> *fp=RDKit::Descriptors::AtomPairs::getAtomPairFingerprint(*m);
std::string text=fp->toString();
RDKit::SparseIntVect<int> *fp =
RDKit::Descriptors::AtomPairs::getAtomPairFingerprint(*m);
std::string text = fp->toString();
delete fp;
delete m;
sqlite3_result_text(context, text.c_str(), text.length(), SQLITE_TRANSIENT );
sqlite3_result_text(context, text.c_str(), text.length(), SQLITE_TRANSIENT);
}
static void bvTanimotoSim(
sqlite3_context *context,
int argc,
sqlite3_value **argv
){
ExplicitBitVect *bv1=ebvFromBlobArg(argv[0]);
if(!bv1){
std::string errorMsg="BLOB (argument 1) could not be converted into a bit vector";
sqlite3_result_error(context,errorMsg.c_str(),errorMsg.length());
static void bvTanimotoSim(sqlite3_context *context, int argc,
sqlite3_value **argv) {
ExplicitBitVect *bv1 = ebvFromBlobArg(argv[0]);
if (!bv1) {
std::string errorMsg =
"BLOB (argument 1) could not be converted into a bit vector";
sqlite3_result_error(context, errorMsg.c_str(), errorMsg.length());
return;
}
ExplicitBitVect *bv2=ebvFromBlobArg(argv[1]);
if(!bv2){
ExplicitBitVect *bv2 = ebvFromBlobArg(argv[1]);
if (!bv2) {
delete bv1;
std::string errorMsg="BLOB (argument 2) could not be converted into a bit vector";
sqlite3_result_error(context,errorMsg.c_str(),errorMsg.length());
std::string errorMsg =
"BLOB (argument 2) could not be converted into a bit vector";
sqlite3_result_error(context, errorMsg.c_str(), errorMsg.length());
return;
}
double res=SimilarityWrapper(*bv1,*bv2,TanimotoSimilarity);
double res = SimilarityWrapper(*bv1, *bv2, TanimotoSimilarity);
delete bv1;
delete bv2;
sqlite3_result_double(context, res);
}
static void ucvTanimotoSim(
sqlite3_context *context,
int argc,
sqlite3_value **argv
){
static void ucvTanimotoSim(sqlite3_context *context, int argc,
sqlite3_value **argv) {
// table from Andrew Dalke:
static const unsigned int popCounts[] = {
0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,
1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,
1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,
2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,
3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,4,5,5,6,5,6,6,7,5,6,6,7,6,7,7,8,
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4,
2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4,
2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6,
4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5,
3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6,
4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8,
};
const unsigned char *t1=(const unsigned char *)sqlite3_value_blob(argv[0]);
int nB1=sqlite3_value_bytes(argv[0]);
const unsigned char *t2=(const unsigned char *)sqlite3_value_blob(argv[1]);
int nB2=sqlite3_value_bytes(argv[1]);
if(nB1!=nB2){
std::string errorMsg="bit vectors not ths same length";
sqlite3_result_error(context,errorMsg.c_str(),errorMsg.length());
const unsigned char *t1 = (const unsigned char *)sqlite3_value_blob(argv[0]);
int nB1 = sqlite3_value_bytes(argv[0]);
const unsigned char *t2 = (const unsigned char *)sqlite3_value_blob(argv[1]);
int nB2 = sqlite3_value_bytes(argv[1]);
if (nB1 != nB2) {
std::string errorMsg = "bit vectors not ths same length";
sqlite3_result_error(context, errorMsg.c_str(), errorMsg.length());
return;
}
unsigned int x=0,y=0,z=0;
for(unsigned int i=0;i<(unsigned int)nB1;++i){
y+= popCounts[*t1];
z+= popCounts[*t2];
x+= popCounts[(*t1)&(*t2)];
unsigned int x = 0, y = 0, z = 0;
for (unsigned int i = 0; i < (unsigned int)nB1; ++i) {
y += popCounts[*t1];
z += popCounts[*t2];
x += popCounts[(*t1) & (*t2)];
++t1;
++t2;
}
double res=0;
if(y+z-x>0){
res=double(x) / (y+z-x);
double res = 0;
if (y + z - x > 0) {
res = double(x) / (y + z - x);
}
sqlite3_result_double(context, res);
}
@@ -370,131 +354,133 @@ static void sivDiceSim(
}
#else
// faster, just parse the format directly
static void sivDiceSim(
sqlite3_context *context,
int argc,
sqlite3_value **argv
){
const unsigned char *t1=(const unsigned char *)sqlite3_value_blob(argv[0]);
int nB1=sqlite3_value_bytes(argv[0]);
const unsigned char *t2=(const unsigned char *)sqlite3_value_blob(argv[1]);
int nB2=sqlite3_value_bytes(argv[1]);
static void sivDiceSim(sqlite3_context *context, int argc,
sqlite3_value **argv) {
const unsigned char *t1 = (const unsigned char *)sqlite3_value_blob(argv[0]);
int nB1 = sqlite3_value_bytes(argv[0]);
const unsigned char *t2 = (const unsigned char *)sqlite3_value_blob(argv[1]);
int nB2 = sqlite3_value_bytes(argv[1]);
// check the version flags:
boost::uint32_t tmp;
tmp = *(reinterpret_cast<const boost::uint32_t *>(t1));
t1+=sizeof(boost::uint32_t);
if(tmp!=ci_SPARSEINTVECT_VERSION){
std::string errorMsg="BLOB (argument 1) could not be converted into an int vector";
sqlite3_result_error(context,errorMsg.c_str(),errorMsg.length());
t1 += sizeof(boost::uint32_t);
if (tmp != ci_SPARSEINTVECT_VERSION) {
std::string errorMsg =
"BLOB (argument 1) could not be converted into an int vector";
sqlite3_result_error(context, errorMsg.c_str(), errorMsg.length());
return;
}
tmp = *(reinterpret_cast<const boost::uint32_t *>(t2));
t2+=sizeof(boost::uint32_t);
if(tmp!=ci_SPARSEINTVECT_VERSION){
std::string errorMsg="BLOB (argument 2) could not be converted into an int vector";
sqlite3_result_error(context,errorMsg.c_str(),errorMsg.length());
t2 += sizeof(boost::uint32_t);
if (tmp != ci_SPARSEINTVECT_VERSION) {
std::string errorMsg =
"BLOB (argument 2) could not be converted into an int vector";
sqlite3_result_error(context, errorMsg.c_str(), errorMsg.length());
return;
}
// check the element size:
tmp = *(reinterpret_cast<const boost::uint32_t *>(t1));
t1+=sizeof(boost::uint32_t);
if(tmp!=sizeof(boost::uint32_t)){
std::string errorMsg="BLOB (argument 1) could not be converted into an uint32_t vector";
sqlite3_result_error(context,errorMsg.c_str(),errorMsg.length());
t1 += sizeof(boost::uint32_t);
if (tmp != sizeof(boost::uint32_t)) {
std::string errorMsg =
"BLOB (argument 1) could not be converted into an uint32_t vector";
sqlite3_result_error(context, errorMsg.c_str(), errorMsg.length());
return;
}
tmp = *(reinterpret_cast<const boost::uint32_t *>(t2));
t2+=sizeof(boost::uint32_t);
if(tmp!=sizeof(boost::uint32_t)){
std::string errorMsg="BLOB (argument 2) could not be converted into an uint32_t vector";
sqlite3_result_error(context,errorMsg.c_str(),errorMsg.length());
t2 += sizeof(boost::uint32_t);
if (tmp != sizeof(boost::uint32_t)) {
std::string errorMsg =
"BLOB (argument 2) could not be converted into an uint32_t vector";
sqlite3_result_error(context, errorMsg.c_str(), errorMsg.length());
return;
}
double res=0.;
double res = 0.;
// start reading:
boost::uint32_t len1,len2;
boost::uint32_t len1, len2;
len1 = *(reinterpret_cast<const boost::uint32_t *>(t1));
t1+=sizeof(boost::uint32_t);
t1 += sizeof(boost::uint32_t);
len2 = *(reinterpret_cast<const boost::uint32_t *>(t2));
t2+=sizeof(boost::uint32_t);
if(len1!=len2){
std::string errorMsg="attempt to compare fingerprints of different length";
sqlite3_result_error(context,errorMsg.c_str(),errorMsg.length());
t2 += sizeof(boost::uint32_t);
if (len1 != len2) {
std::string errorMsg =
"attempt to compare fingerprints of different length";
sqlite3_result_error(context, errorMsg.c_str(), errorMsg.length());
return;
}
boost::uint32_t nElem1,nElem2;
boost::uint32_t nElem1, nElem2;
nElem1 = *(reinterpret_cast<const boost::uint32_t *>(t1));
t1+=sizeof(boost::uint32_t);
t1 += sizeof(boost::uint32_t);
nElem2 = *(reinterpret_cast<const boost::uint32_t *>(t2));
t2+=sizeof(boost::uint32_t);
t2 += sizeof(boost::uint32_t);
if(!nElem1 || !nElem2){
res=0.0;
if (!nElem1 || !nElem2) {
res = 0.0;
sqlite3_result_double(context, res);
}
double v1Sum=0,v2Sum=0,numer=0;
boost::uint32_t idx1=0;
double v1Sum = 0, v2Sum = 0, numer = 0;
boost::uint32_t idx1 = 0;
boost::int32_t v1;
boost::uint32_t idx2=0;
boost::uint32_t idx2 = 0;
boost::int32_t v2;
idx1 = *(reinterpret_cast<const boost::uint32_t *>(t1));
t1+=sizeof(boost::uint32_t);
t1 += sizeof(boost::uint32_t);
v1 = *(reinterpret_cast<const boost::int32_t *>(t1));
t1+=sizeof(boost::int32_t);
t1 += sizeof(boost::int32_t);
nElem1--;
v1Sum += v1;
idx2 = *(reinterpret_cast<const boost::uint32_t *>(t2));
t2+=sizeof(boost::uint32_t);
t2 += sizeof(boost::uint32_t);
v2 = *(reinterpret_cast<const boost::int32_t *>(t2));
t2+=sizeof(boost::int32_t);
t2 += sizeof(boost::int32_t);
nElem2--;
v2Sum += v2;
while(1){
while(nElem2 && idx2<idx1){
while (1) {
while (nElem2 && idx2 < idx1) {
idx2 = *(reinterpret_cast<const boost::uint32_t *>(t2));
t2+=sizeof(boost::uint32_t);
t2 += sizeof(boost::uint32_t);
v2 = *(reinterpret_cast<const boost::int32_t *>(t2));
t2+=sizeof(boost::int32_t);
t2 += sizeof(boost::int32_t);
nElem2--;
v2Sum += v2;
}
if(idx2==idx1 ){
//std::cerr<<" --- "<<idx1<<" "<<v1<<" - "<<idx2<<" "<<v2<<std::endl;
numer += std::min(v1,v2);
if (idx2 == idx1) {
// std::cerr<<" --- "<<idx1<<" "<<v1<<" - "<<idx2<<" "<<v2<<std::endl;
numer += std::min(v1, v2);
}
if(nElem1){
if (nElem1) {
idx1 = *(reinterpret_cast<const boost::uint32_t *>(t1));
t1+=sizeof(boost::uint32_t);
t1 += sizeof(boost::uint32_t);
v1 = *(reinterpret_cast<const boost::int32_t *>(t1));
t1+=sizeof(boost::int32_t);
t1 += sizeof(boost::int32_t);
nElem1--;
v1Sum += v1;
} else {
break;
}
}
while(nElem2){
while (nElem2) {
idx2 = *(reinterpret_cast<const boost::uint32_t *>(t2));
t2+=sizeof(boost::uint32_t);
t2 += sizeof(boost::uint32_t);
v2 = *(reinterpret_cast<const boost::int32_t *>(t2));
t2+=sizeof(boost::int32_t);
t2 += sizeof(boost::int32_t);
nElem2--;
v2Sum += v2;
}
double denom=v1Sum+v2Sum;
if(fabs(denom)<1e-6){
res=0.0;
double denom = v1Sum + v2Sum;
if (fabs(denom) < 1e-6) {
res = 0.0;
} else {
res = 2.*numer/denom;
res = 2. * numer / denom;
}
//std::cerr<<" "<<v1Sum<<" "<<v2Sum<<" "<<numer<<" "<<res<<std::endl;
// std::cerr<<" "<<v1Sum<<" "<<v2Sum<<" "<<numer<<" "<<res<<std::endl;
sqlite3_result_double(context, res);
}
#endif
@@ -504,34 +490,33 @@ static void sivDiceSim(
** modules here. This is usually the only exported symbol in
** the shared library.
*/
extern "C" int sqlite3_extension_init(
sqlite3 *db,
char **pzErrMsg,
const sqlite3_api_routines *pApi
){
extern "C" int sqlite3_extension_init(sqlite3 *db, char **pzErrMsg,
const sqlite3_api_routines *pApi) {
SQLITE_EXTENSION_INIT2(pApi);
std::map<std::string,boost::any> *molMap=new std::map<std::string,boost::any>();
sqlite3_create_function(db, "rdk_molNumAtoms", 1, SQLITE_ANY, 0, numAtomsFunc, 0, 0);
std::map<std::string, boost::any> *molMap =
new std::map<std::string, boost::any>();
sqlite3_create_function(db, "rdk_molNumAtoms", 1, SQLITE_ANY, 0, numAtomsFunc,
0, 0);
sqlite3_create_function(db, "rdk_molAMW", 1, SQLITE_ANY, 0, molWtFunc, 0, 0);
sqlite3_create_function(db, "rdk_smilesToBlob", 1, SQLITE_ANY, 0, smilesToBlob, 0, 0);
sqlite3_create_function(db, "rdk_smilesToBlob", 1, SQLITE_ANY, 0,
smilesToBlob, 0, 0);
sqlite3_create_function(db, "rdk_molToRDKitFP", 1, SQLITE_ANY, 0,
blobToRDKitFingerprint, 0, 0);
blobToRDKitFingerprint, 0, 0);
sqlite3_create_function(db, "rdk_bvTanimotoSim", 2, SQLITE_ANY, 0,
bvTanimotoSim, 0, 0);
bvTanimotoSim, 0, 0);
sqlite3_create_function(db, "rdk_ucvTanimotoSim", 2, SQLITE_ANY, 0,
ucvTanimotoSim, 0, 0);
ucvTanimotoSim, 0, 0);
sqlite3_create_function(db, "rdk_molToAtomPairFP", 1, SQLITE_ANY, 0,
blobToAtomPairFingerprint, 0, 0);
sqlite3_create_function(db, "rdk_sivDiceSim", 2, SQLITE_ANY, 0,
sivDiceSim, 0, 0);
sqlite3_create_function(db, "rdk_sivDiceSim2", 2, SQLITE_ANY, 0,
sivDiceSim2, 0, 0);
blobToAtomPairFingerprint, 0, 0);
sqlite3_create_function(db, "rdk_sivDiceSim", 2, SQLITE_ANY, 0, sivDiceSim, 0,
0);
sqlite3_create_function(db, "rdk_sivDiceSim2", 2, SQLITE_ANY, 0, sivDiceSim2,
0, 0);
sqlite3_create_function(db, "rdk_molHasSubstruct", 2, SQLITE_ANY,
static_cast<void *>(molMap),
molHasSubstruct, 0, 0);
static_cast<void *>(molMap), molHasSubstruct, 0, 0);
sqlite3_create_function(db, "rdk_molSubstructCount", 2, SQLITE_ANY,
static_cast<void *>(molMap),
molSubstructCount, 0, 0);
sqlite3_create_function(db, "rdk_molLogP", 1, SQLITE_ANY, 0, molLogPFunc, 0, 0);
static_cast<void *>(molMap), molSubstructCount, 0, 0);
sqlite3_create_function(db, "rdk_molLogP", 1, SQLITE_ANY, 0, molLogPFunc, 0,
0);
return 0;
}

View File

@@ -17,101 +17,103 @@
#include <Numerics/SquareMatrix.h>
namespace DistGeom {
//! Class to store the distance bound
/*!
Basically a N by N matrix
with lower distance bounds on the lower traingle and upper bounds in the upper
triangle
*/
class BoundsMatrix : public RDNumeric::SquareMatrix<double> {
public:
typedef boost::shared_array<double> DATA_SPTR;
//! Class to store the distance bound
/*!
Basically a N by N matrix
with lower distance bounds on the lower traingle and upper bounds in the upper
triangle
*/
class BoundsMatrix : public RDNumeric::SquareMatrix<double> {
public:
typedef boost::shared_array<double> DATA_SPTR;
explicit BoundsMatrix(unsigned int N) : RDNumeric::SquareMatrix<double>(N,0.0) {};
BoundsMatrix(unsigned int N, DATA_SPTR data) :
RDNumeric::SquareMatrix<double>(N,data) {};
explicit BoundsMatrix(unsigned int N)
: RDNumeric::SquareMatrix<double>(N, 0.0){};
BoundsMatrix(unsigned int N, DATA_SPTR data)
: RDNumeric::SquareMatrix<double>(N, data){};
//! Get the upper bound between points i and j
inline double getUpperBound(unsigned int i, unsigned int j) const {
URANGE_CHECK(i, d_nRows-1);
URANGE_CHECK(j, d_nCols-1);
//! Get the upper bound between points i and j
inline double getUpperBound(unsigned int i, unsigned int j) const {
URANGE_CHECK(i, d_nRows - 1);
URANGE_CHECK(j, d_nCols - 1);
if (i < j) {
return getVal(i,j);
} else {
return getVal(j,i);
}
}
//! Set the lower bound between points i and j
inline void setUpperBound(unsigned int i, unsigned int j, double val) {
URANGE_CHECK(i, d_nRows-1);
URANGE_CHECK(j, d_nCols-1);
CHECK_INVARIANT(val >= 0.0, "Negative upper bound");
if (i < j) {
setVal(i,j,val);
} else {
setVal(j,i,val);
}
}
//! Set the upper bound between points i and j only if it is better than
//! previously existing value (i.e. the new value is smaller)
inline void setUpperBoundIfBetter(unsigned int i, unsigned int j, double val) {
if ((val < getUpperBound(i, j)) && (val > getLowerBound(i, j)) ) {
setUpperBound(i, j, val);
}
if (i < j) {
return getVal(i, j);
} else {
return getVal(j, i);
}
}
//! Set the lower bound between points i and j
inline void setLowerBound(unsigned int i, unsigned int j, double val) {
URANGE_CHECK(i, d_nRows-1);
URANGE_CHECK(j, d_nCols-1);
CHECK_INVARIANT(val >= 0.0, "Negative lower bound");
if (i < j) {
setVal(j,i,val);
} else {
setVal(i,j,val);
}
//! Set the lower bound between points i and j
inline void setUpperBound(unsigned int i, unsigned int j, double val) {
URANGE_CHECK(i, d_nRows - 1);
URANGE_CHECK(j, d_nCols - 1);
CHECK_INVARIANT(val >= 0.0, "Negative upper bound");
if (i < j) {
setVal(i, j, val);
} else {
setVal(j, i, val);
}
//! Set the lower bound between points i and j only if it is better than
//! previously existing value (i.e. the new value is larger)
inline void setLowerBoundIfBetter(unsigned int i, unsigned int j, double val) {
if ((val > getLowerBound(i,j)) && (val < getUpperBound(i,j))) {
setLowerBound(i,j, val);
}
}
//! Get the lower bound between points i and j
inline double getLowerBound(unsigned int i, unsigned int j) const {
URANGE_CHECK(i, d_nRows-1);
URANGE_CHECK(j, d_nCols-1);
}
if (i < j) {
return getVal(j,i);
} else {
return getVal(i,j);
}
//! Set the upper bound between points i and j only if it is better than
//! previously existing value (i.e. the new value is smaller)
inline void setUpperBoundIfBetter(unsigned int i, unsigned int j,
double val) {
if ((val < getUpperBound(i, j)) && (val > getLowerBound(i, j))) {
setUpperBound(i, j, val);
}
}
//! Do a simple check of the current bounds - i.e. all lower bounds are
//! smaller than the existing upper bounds
inline bool checkValid() const {
unsigned int i, j;
for (i = 1; i < d_nRows; i++) {
for (j = 0; j < i; j++) {
if (getUpperBound(i,j) < getLowerBound(i,j)) {
return false;
}
//! Set the lower bound between points i and j
inline void setLowerBound(unsigned int i, unsigned int j, double val) {
URANGE_CHECK(i, d_nRows - 1);
URANGE_CHECK(j, d_nCols - 1);
CHECK_INVARIANT(val >= 0.0, "Negative lower bound");
if (i < j) {
setVal(j, i, val);
} else {
setVal(i, j, val);
}
}
//! Set the lower bound between points i and j only if it is better than
//! previously existing value (i.e. the new value is larger)
inline void setLowerBoundIfBetter(unsigned int i, unsigned int j,
double val) {
if ((val > getLowerBound(i, j)) && (val < getUpperBound(i, j))) {
setLowerBound(i, j, val);
}
}
//! Get the lower bound between points i and j
inline double getLowerBound(unsigned int i, unsigned int j) const {
URANGE_CHECK(i, d_nRows - 1);
URANGE_CHECK(j, d_nCols - 1);
if (i < j) {
return getVal(j, i);
} else {
return getVal(i, j);
}
}
//! Do a simple check of the current bounds - i.e. all lower bounds are
//! smaller than the existing upper bounds
inline bool checkValid() const {
unsigned int i, j;
for (i = 1; i < d_nRows; i++) {
for (j = 0; j < i; j++) {
if (getUpperBound(i, j) < getLowerBound(i, j)) {
return false;
}
}
return true;
}
};
return true;
}
};
typedef boost::shared_ptr<BoundsMatrix> BoundsMatPtr;
typedef boost::shared_ptr<BoundsMatrix> BoundsMatPtr;
}
#endif

View File

@@ -16,43 +16,42 @@
namespace DistGeom {
/*! \brief Class used to store a quartet of points and chiral volume bounds on them
*
*/
class ChiralSet {
public:
/*! \brief Class used to store a quartet of points and chiral volume bounds on
*them
*
*/
class ChiralSet {
public:
unsigned int d_idx0; // the centroid
unsigned int d_idx1;
unsigned int d_idx2;
unsigned int d_idx3;
unsigned int d_idx4;
double d_volumeLowerBound;
double d_volumeUpperBound;
unsigned int d_idx0; // the centroid
unsigned int d_idx1;
unsigned int d_idx2;
unsigned int d_idx3;
unsigned int d_idx4;
double d_volumeLowerBound;
double d_volumeUpperBound;
ChiralSet(unsigned int pid0, unsigned int pid1, unsigned int pid2,
unsigned int pid3, unsigned int pid4, double lowerVolBound,
double upperVolBound)
: d_idx0(pid0),
d_idx1(pid1),
d_idx2(pid2),
d_idx3(pid3),
d_idx4(pid4),
d_volumeLowerBound(lowerVolBound),
d_volumeUpperBound(upperVolBound) {
CHECK_INVARIANT(lowerVolBound <= upperVolBound, "Inconsistent bounds\n");
d_volumeLowerBound = lowerVolBound;
d_volumeUpperBound = upperVolBound;
}
ChiralSet(unsigned int pid0,
unsigned int pid1, unsigned int pid2,
unsigned int pid3, unsigned int pid4,
double lowerVolBound, double upperVolBound) :
d_idx0(pid0), d_idx1(pid1), d_idx2(pid2), d_idx3(pid3), d_idx4(pid4),
d_volumeLowerBound(lowerVolBound),d_volumeUpperBound(upperVolBound) {
CHECK_INVARIANT(lowerVolBound <= upperVolBound, "Inconsistent bounds\n");
d_volumeLowerBound = lowerVolBound;
d_volumeUpperBound = upperVolBound;
}
inline double getUpperVolumeBound() const {
return d_volumeUpperBound;
}
inline double getLowerVolumeBound() const {
return d_volumeLowerBound;
}
};
typedef boost::shared_ptr<ChiralSet> ChiralSetPtr;
typedef std::vector<ChiralSetPtr> VECT_CHIRALSET;
inline double getUpperVolumeBound() const { return d_volumeUpperBound; }
}
inline double getLowerVolumeBound() const { return d_volumeLowerBound; }
};
typedef boost::shared_ptr<ChiralSet> ChiralSetPtr;
typedef std::vector<ChiralSetPtr> VECT_CHIRALSET;
}
#endif

View File

@@ -8,108 +8,111 @@
#include <ForceField/ForceField.h>
namespace DistGeom {
ChiralViolationContrib::ChiralViolationContrib(ForceFields::ForceField *owner, const ChiralSet* cset,
double weight) {
PRECONDITION(owner,"bad owner");
PRECONDITION(cset, "bad chiral set")
ChiralViolationContrib::ChiralViolationContrib(ForceFields::ForceField *owner,
const ChiralSet *cset,
double weight) {
PRECONDITION(owner, "bad owner");
PRECONDITION(cset, "bad chiral set")
URANGE_CHECK(cset->d_idx1,owner->positions().size()-1);
URANGE_CHECK(cset->d_idx2,owner->positions().size()-1);
URANGE_CHECK(cset->d_idx3,owner->positions().size()-1);
URANGE_CHECK(cset->d_idx4,owner->positions().size()-1);
URANGE_CHECK(cset->d_idx1, owner->positions().size() - 1);
URANGE_CHECK(cset->d_idx2, owner->positions().size() - 1);
URANGE_CHECK(cset->d_idx3, owner->positions().size() - 1);
URANGE_CHECK(cset->d_idx4, owner->positions().size() - 1);
dp_forceField = owner;
d_idx1 = cset->d_idx1;
d_idx2 = cset->d_idx2;
d_idx3 = cset->d_idx3;
d_idx4 = cset->d_idx4;
dp_forceField = owner;
d_volLower = cset->getLowerVolumeBound();
d_volUpper = cset->getUpperVolumeBound();
d_idx1 = cset->d_idx1;
d_idx2 = cset->d_idx2;
d_idx3 = cset->d_idx3;
d_idx4 = cset->d_idx4;
d_weight = weight;
}
d_volLower = cset->getLowerVolumeBound();
d_volUpper = cset->getUpperVolumeBound();
double ChiralViolationContrib::getEnergy(double *pos) const {
PRECONDITION(dp_forceField,"no owner");
PRECONDITION(pos,"bad vector");
unsigned int dim = dp_forceField->dimension();
double vol = calcChiralVolume(d_idx1,d_idx2,d_idx3,d_idx4,pos,dim);
double res=0.0;
if (vol < d_volLower) {
res=d_weight*(vol - d_volLower)*(vol - d_volLower);
} else if (vol > d_volUpper) {
res=d_weight*(vol - d_volUpper)*(vol - d_volUpper);
}
//std::cerr<<"Chiral Violation vol: "<<vol<<" E: "<<res<<std::endl;
return res;
}
void ChiralViolationContrib::getGrad(double *pos, double *grad) const {
PRECONDITION(dp_forceField,"no owner");
PRECONDITION(pos,"bad vector");
unsigned int dim = dp_forceField->dimension();
// even if we are minimizing in higher dimension the chiral volume is
// calculated using only the first 3 dimensions
RDGeom::Point3D v1(pos[d_idx1*dim] - pos[d_idx4*dim],
pos[d_idx1*dim+1] - pos[d_idx4*dim+1],
pos[d_idx1*dim+2] - pos[d_idx4*dim+2]);
RDGeom::Point3D v2(pos[d_idx2*dim] - pos[d_idx4*dim],
pos[d_idx2*dim+1] - pos[d_idx4*dim+1],
pos[d_idx2*dim+2] - pos[d_idx4*dim+2]);
RDGeom::Point3D v3(pos[d_idx3*dim] - pos[d_idx4*dim],
pos[d_idx3*dim+1] - pos[d_idx4*dim+1],
pos[d_idx3*dim+2] - pos[d_idx4*dim+2]);
RDGeom::Point3D v2xv3 = v2.crossProduct(v3);
double vol = v1.dotProduct(v2xv3);
double preFactor;
if (vol < d_volLower) {
preFactor = d_weight*(vol - d_volLower);
} else if (vol > d_volUpper) {
preFactor = d_weight*(vol - d_volUpper);
} else {
return;
}
// now comes the hard part - there are a total of 12 variables involved
// 4 x 3 - four points and 3 dimensions
//
grad[dim*d_idx1] += preFactor*((v2.y)*(v3.z) - (v3.y)*(v2.z));
grad[dim*d_idx1+1] += preFactor*((v3.x)*(v2.z) - (v2.x)*(v3.z));
grad[dim*d_idx1+2] += preFactor*((v2.x)*(v3.y) - (v3.x)*(v2.y));
grad[dim*d_idx2] += preFactor*((v3.y)*(v1.z) - (v3.z)*(v1.y));
grad[dim*d_idx2 + 1] += preFactor*((v3.z)*(v1.x) - (v3.x)*(v1.z));
grad[dim*d_idx2 + 2] += preFactor*((v3.x)*(v1.y) - (v3.y)*(v1.x));
grad[dim*d_idx3] += preFactor*((v2.z)*(v1.y) - (v2.y)*(v1.z));
grad[dim*d_idx3 + 1] += preFactor*((v2.x)*(v1.z) - (v2.z)*(v1.x));
grad[dim*d_idx3 + 2] += preFactor*((v2.y)*(v1.x) - (v2.x)*(v1.y));
grad[dim*d_idx4] += preFactor*(pos[d_idx1*dim+2]*(pos[d_idx2*dim+1] - pos[d_idx3*dim+1])
+ pos[d_idx2*dim+2]*(pos[d_idx3*dim+1] - pos[d_idx1*dim+1])
+ pos[d_idx3*dim+2]*(pos[d_idx1*dim+1] - pos[d_idx2*dim+1]));
grad[dim*d_idx4+1] += preFactor*(pos[d_idx1*dim]*(pos[d_idx2*dim+2] - pos[d_idx3*dim+2])
+ pos[d_idx2*dim]*(pos[d_idx3*dim+2] - pos[d_idx1*dim+2])
+ pos[d_idx3*dim]*(pos[d_idx1*dim+2] - pos[d_idx2*dim+2]));
grad[dim*d_idx4+2] += preFactor*(pos[d_idx1*dim+1]*(pos[d_idx2*dim] - pos[d_idx3*dim])
+ pos[d_idx2*dim+1]*(pos[d_idx3*dim] - pos[d_idx1*dim])
+ pos[d_idx3*dim+1]*(pos[d_idx1*dim] - pos[d_idx2*dim]));
//std::cerr<<"Chiral Violation grad: "<<preFactor<<std::endl;
}
d_weight = weight;
}
double ChiralViolationContrib::getEnergy(double *pos) const {
PRECONDITION(dp_forceField, "no owner");
PRECONDITION(pos, "bad vector");
unsigned int dim = dp_forceField->dimension();
double vol = calcChiralVolume(d_idx1, d_idx2, d_idx3, d_idx4, pos, dim);
double res = 0.0;
if (vol < d_volLower) {
res = d_weight * (vol - d_volLower) * (vol - d_volLower);
} else if (vol > d_volUpper) {
res = d_weight * (vol - d_volUpper) * (vol - d_volUpper);
}
// std::cerr<<"Chiral Violation vol: "<<vol<<" E: "<<res<<std::endl;
return res;
}
void ChiralViolationContrib::getGrad(double *pos, double *grad) const {
PRECONDITION(dp_forceField, "no owner");
PRECONDITION(pos, "bad vector");
unsigned int dim = dp_forceField->dimension();
// even if we are minimizing in higher dimension the chiral volume is
// calculated using only the first 3 dimensions
RDGeom::Point3D v1(pos[d_idx1 * dim] - pos[d_idx4 * dim],
pos[d_idx1 * dim + 1] - pos[d_idx4 * dim + 1],
pos[d_idx1 * dim + 2] - pos[d_idx4 * dim + 2]);
RDGeom::Point3D v2(pos[d_idx2 * dim] - pos[d_idx4 * dim],
pos[d_idx2 * dim + 1] - pos[d_idx4 * dim + 1],
pos[d_idx2 * dim + 2] - pos[d_idx4 * dim + 2]);
RDGeom::Point3D v3(pos[d_idx3 * dim] - pos[d_idx4 * dim],
pos[d_idx3 * dim + 1] - pos[d_idx4 * dim + 1],
pos[d_idx3 * dim + 2] - pos[d_idx4 * dim + 2]);
RDGeom::Point3D v2xv3 = v2.crossProduct(v3);
double vol = v1.dotProduct(v2xv3);
double preFactor;
if (vol < d_volLower) {
preFactor = d_weight * (vol - d_volLower);
} else if (vol > d_volUpper) {
preFactor = d_weight * (vol - d_volUpper);
} else {
return;
}
// now comes the hard part - there are a total of 12 variables involved
// 4 x 3 - four points and 3 dimensions
//
grad[dim * d_idx1] += preFactor * ((v2.y) * (v3.z) - (v3.y) * (v2.z));
grad[dim * d_idx1 + 1] += preFactor * ((v3.x) * (v2.z) - (v2.x) * (v3.z));
grad[dim * d_idx1 + 2] += preFactor * ((v2.x) * (v3.y) - (v3.x) * (v2.y));
grad[dim * d_idx2] += preFactor * ((v3.y) * (v1.z) - (v3.z) * (v1.y));
grad[dim * d_idx2 + 1] += preFactor * ((v3.z) * (v1.x) - (v3.x) * (v1.z));
grad[dim * d_idx2 + 2] += preFactor * ((v3.x) * (v1.y) - (v3.y) * (v1.x));
grad[dim * d_idx3] += preFactor * ((v2.z) * (v1.y) - (v2.y) * (v1.z));
grad[dim * d_idx3 + 1] += preFactor * ((v2.x) * (v1.z) - (v2.z) * (v1.x));
grad[dim * d_idx3 + 2] += preFactor * ((v2.y) * (v1.x) - (v2.x) * (v1.y));
grad[dim * d_idx4] +=
preFactor *
(pos[d_idx1 * dim + 2] * (pos[d_idx2 * dim + 1] - pos[d_idx3 * dim + 1]) +
pos[d_idx2 * dim + 2] * (pos[d_idx3 * dim + 1] - pos[d_idx1 * dim + 1]) +
pos[d_idx3 * dim + 2] * (pos[d_idx1 * dim + 1] - pos[d_idx2 * dim + 1]));
grad[dim * d_idx4 + 1] +=
preFactor *
(pos[d_idx1 * dim] * (pos[d_idx2 * dim + 2] - pos[d_idx3 * dim + 2]) +
pos[d_idx2 * dim] * (pos[d_idx3 * dim + 2] - pos[d_idx1 * dim + 2]) +
pos[d_idx3 * dim] * (pos[d_idx1 * dim + 2] - pos[d_idx2 * dim + 2]));
grad[dim * d_idx4 + 2] +=
preFactor *
(pos[d_idx1 * dim + 1] * (pos[d_idx2 * dim] - pos[d_idx3 * dim]) +
pos[d_idx2 * dim + 1] * (pos[d_idx3 * dim] - pos[d_idx1 * dim]) +
pos[d_idx3 * dim + 1] * (pos[d_idx1 * dim] - pos[d_idx2 * dim]));
// std::cerr<<"Chiral Violation grad: "<<preFactor<<std::endl;
}
}

View File

@@ -8,83 +8,94 @@
#include <Geometry/point.h>
namespace DistGeom {
class ChiralSet;
//! A term to capture the violation of chirality at an atom center
//!
class ChiralViolationContrib : public ForceFields::ForceFieldContrib {
public:
ChiralViolationContrib() : d_idx1(0), d_idx2(0), d_idx3(0), d_idx4(0),
d_volLower(0.0), d_volUpper(0.0), d_weight(0.0){};
//! Constructor
/*!
\param owner pointer to the owning forcefield
\param cset a chiral set containing the four chiral atom ids (in sequence)
and the upper and lower limits on the signed chiral volume
\param weight (optional) the weight to be used for this contrib
*/
ChiralViolationContrib(ForceFields::ForceField *owner, const ChiralSet *cset, double weight=1.0);
//! return the contribution of this contrib to the energy of a given state
double getEnergy(double *pos) const;
//! calculate the contribution of this contrib to the gradient at a given state
void getGrad(double *pos, double *grad) const;
virtual ChiralViolationContrib *copy() const { return new ChiralViolationContrib(*this); };
class ChiralSet;
static double calcChiralVolume(unsigned int idx1,unsigned int idx2,unsigned int idx3,unsigned int idx4,
const double *pos,unsigned int dim){
// even if we are minimizing in higher dimension the chiral volume is
// calculated using only the first 3 dimensions
RDGeom::Point3D v1(pos[idx1*dim] - pos[idx4*dim],
pos[idx1*dim+1] - pos[idx4*dim+1],
pos[idx1*dim+2] - pos[idx4*dim+2]);
//! A term to capture the violation of chirality at an atom center
//!
class ChiralViolationContrib : public ForceFields::ForceFieldContrib {
public:
ChiralViolationContrib()
: d_idx1(0),
d_idx2(0),
d_idx3(0),
d_idx4(0),
d_volLower(0.0),
d_volUpper(0.0),
d_weight(0.0){};
RDGeom::Point3D v2(pos[idx2*dim] - pos[idx4*dim],
pos[idx2*dim+1] - pos[idx4*dim+1],
pos[idx2*dim+2] - pos[idx4*dim+2]);
RDGeom::Point3D v3(pos[idx3*dim] - pos[idx4*dim],
pos[idx3*dim+1] - pos[idx4*dim+1],
pos[idx3*dim+2] - pos[idx4*dim+2]);
//! Constructor
/*!
\param owner pointer to the owning forcefield
\param cset a chiral set containing the four chiral atom ids (in
sequence)
and the upper and lower limits on the signed chiral volume
\param weight (optional) the weight to be used for this contrib
RDGeom::Point3D v2xv3 = v2.crossProduct(v3);
double vol = v1.dotProduct(v2xv3);
return vol;
}
static double calcChiralVolume(unsigned int idx1,unsigned int idx2,unsigned int idx3,unsigned int idx4,
const RDGeom::PointPtrVect &pts){
// even if we are minimizing in higher dimension the chiral volume is
// calculated using only the first 3 dimensions
RDGeom::Point3D v1((*pts[idx1])[0] - (*pts[idx4])[0],
(*pts[idx1])[1] - (*pts[idx4])[1],
(*pts[idx1])[2] - (*pts[idx4])[2]);
*/
ChiralViolationContrib(ForceFields::ForceField *owner, const ChiralSet *cset,
double weight = 1.0);
RDGeom::Point3D v2((*pts[idx2])[0] - (*pts[idx4])[0],
(*pts[idx2])[1] - (*pts[idx4])[1],
(*pts[idx2])[2] - (*pts[idx4])[2]);
RDGeom::Point3D v3((*pts[idx3])[0] - (*pts[idx4])[0],
(*pts[idx3])[1] - (*pts[idx4])[1],
(*pts[idx3])[2] - (*pts[idx4])[2]);
//! return the contribution of this contrib to the energy of a given state
double getEnergy(double *pos) const;
RDGeom::Point3D v2xv3 = v2.crossProduct(v3);
double vol = v1.dotProduct(v2xv3);
return vol;
}
private:
unsigned int d_idx1, d_idx2, d_idx3, d_idx4;
double d_volLower;
double d_volUpper;
double d_weight;
//! calculate the contribution of this contrib to the gradient at a given
//state
void getGrad(double *pos, double *grad) const;
virtual ChiralViolationContrib *copy() const {
return new ChiralViolationContrib(*this);
};
static double calcChiralVolume(unsigned int idx1, unsigned int idx2,
unsigned int idx3, unsigned int idx4,
const double *pos, unsigned int dim) {
// even if we are minimizing in higher dimension the chiral volume is
// calculated using only the first 3 dimensions
RDGeom::Point3D v1(pos[idx1 * dim] - pos[idx4 * dim],
pos[idx1 * dim + 1] - pos[idx4 * dim + 1],
pos[idx1 * dim + 2] - pos[idx4 * dim + 2]);
RDGeom::Point3D v2(pos[idx2 * dim] - pos[idx4 * dim],
pos[idx2 * dim + 1] - pos[idx4 * dim + 1],
pos[idx2 * dim + 2] - pos[idx4 * dim + 2]);
RDGeom::Point3D v3(pos[idx3 * dim] - pos[idx4 * dim],
pos[idx3 * dim + 1] - pos[idx4 * dim + 1],
pos[idx3 * dim + 2] - pos[idx4 * dim + 2]);
RDGeom::Point3D v2xv3 = v2.crossProduct(v3);
double vol = v1.dotProduct(v2xv3);
return vol;
}
static double calcChiralVolume(unsigned int idx1, unsigned int idx2,
unsigned int idx3, unsigned int idx4,
const RDGeom::PointPtrVect &pts) {
// even if we are minimizing in higher dimension the chiral volume is
// calculated using only the first 3 dimensions
RDGeom::Point3D v1((*pts[idx1])[0] - (*pts[idx4])[0],
(*pts[idx1])[1] - (*pts[idx4])[1],
(*pts[idx1])[2] - (*pts[idx4])[2]);
RDGeom::Point3D v2((*pts[idx2])[0] - (*pts[idx4])[0],
(*pts[idx2])[1] - (*pts[idx4])[1],
(*pts[idx2])[2] - (*pts[idx4])[2]);
RDGeom::Point3D v3((*pts[idx3])[0] - (*pts[idx4])[0],
(*pts[idx3])[1] - (*pts[idx4])[1],
(*pts[idx3])[2] - (*pts[idx4])[2]);
RDGeom::Point3D v2xv3 = v2.crossProduct(v3);
double vol = v1.dotProduct(v2xv3);
return vol;
}
private:
unsigned int d_idx1, d_idx2, d_idx3, d_idx4;
double d_volLower;
double d_volUpper;
double d_weight;
};
}
#endif

View File

@@ -27,434 +27,443 @@
#include <boost/dynamic_bitset.hpp>
namespace DistGeom {
const double EIGVAL_TOL=0.001;
const double EIGVAL_TOL = 0.001;
double pickRandomDistMat(const BoundsMatrix &mmat,
RDNumeric::SymmMatrix<double> &distMat,
int seed) {
if(seed>0){
RDKit::getRandomGenerator(seed);
}
return pickRandomDistMat(mmat,distMat,RDKit::getDoubleRandomSource());
double pickRandomDistMat(const BoundsMatrix &mmat,
RDNumeric::SymmMatrix<double> &distMat, int seed) {
if (seed > 0) {
RDKit::getRandomGenerator(seed);
}
double pickRandomDistMat(const BoundsMatrix &mmat,
RDNumeric::SymmMatrix<double> &distMat,
RDKit::double_source_type &rng) {
// make sure the sizes match up
unsigned int npt = mmat.numRows();
CHECK_INVARIANT(npt == distMat.numRows(), "Size mismatch");
double largestVal=-1.0;
double *ddata = distMat.getData();
for (unsigned int i = 1; i < npt; i++) {
unsigned int id = i*(i+1)/2;
for (unsigned int j = 0; j < i; j++) {
double ub = mmat.getUpperBound(i,j);
double lb = mmat.getLowerBound(i,j);
CHECK_INVARIANT(ub >= lb, "");
double rval = rng();
//std::cerr<<i<<"-"<<j<<": "<<rval<<std::endl;
double d = lb + (rval)*(ub - lb);
ddata[id+j] = d;
if(d>largestVal){
largestVal=d;
}
}
}
return largestVal;
}
bool computeInitialCoords(const RDNumeric::SymmMatrix<double> &distMat,
RDGeom::PointPtrVect &positions, bool randNegEig,
unsigned int numZeroFail,
int seed) {
if(seed>0){
RDKit::getRandomGenerator(seed);
}
return computeInitialCoords(distMat,positions,RDKit::getDoubleRandomSource(),
randNegEig,numZeroFail);
}
bool computeInitialCoords(const RDNumeric::SymmMatrix<double> &distMat,
RDGeom::PointPtrVect &positions,
RDKit::double_source_type &rng,
bool randNegEig,
unsigned int numZeroFail){
unsigned int N = distMat.numRows();
unsigned int nPt = positions.size();
CHECK_INVARIANT(nPt == N, "Size mismatch");
unsigned int dim = positions.front()->dimension();
const double *data = distMat.getData();
RDNumeric::SymmMatrix<double> sqMat(N), T(N, 0.0);
RDNumeric::DoubleMatrix eigVecs(dim,N);
RDNumeric::DoubleVector eigVals(dim);
double *sqDat = sqMat.getData();
unsigned int dSize = distMat.getDataSize();
double sumSqD2 = 0.0;
for (unsigned int i = 0; i < dSize; i++) {
sqDat[i] = data[i]*data[i];
sumSqD2 += sqDat[i];
}
sumSqD2 /= (N*N);
RDNumeric::DoubleVector sqD0i(N, 0.0);
double *sqD0iData = sqD0i.getData();
for (unsigned int i = 0; i < N; i++) {
for (unsigned int j = 0; j < N; j++) {
sqD0iData[i] += sqMat.getVal(i,j);
}
sqD0iData[i] /= N;
sqD0iData[i] -= sumSqD2;
if ((sqD0iData[i] < EIGVAL_TOL) && (N > 3)){
return false;
}
}
for (unsigned int i = 0; i < N; i++) {
for (unsigned int j = 0; j <= i; j++) {
double val = 0.5*(sqD0iData[i] + sqD0iData[j] - sqMat.getVal(i,j));
T.setVal(i,j, val);
}
}
unsigned int nEigs = (dim < N) ? dim : N;
RDNumeric::EigenSolvers::powerEigenSolver(nEigs, T, eigVals, eigVecs,
(int)(sumSqD2*N));
double *eigData = eigVals.getData();
bool foundNeg = false;
unsigned int zeroEigs = 0;
for (unsigned int i = 0; i < dim; i++) {
if (eigData[i] > EIGVAL_TOL) {
eigData[i] = sqrt(eigData[i]);
} else if (fabs(eigData[i]) < EIGVAL_TOL) {
eigData[i] = 0.0;
zeroEigs++;
} else {
foundNeg = true;
}
}
if ((foundNeg) && (!randNegEig) ) {
return false;
}
if ((zeroEigs >= numZeroFail) && (N > 3)) {
return false;
}
for (unsigned int i = 0; i < N; i++) {
RDGeom::Point *pt = positions[i];
for (unsigned int j = 0; j < dim; ++j) {
if (eigData[j] >= 0.0) {
(*pt)[j] = eigData[j]*eigVecs.getVal(j,i);
} else {
//std::cerr<<"!!! "<<i<<"-"<<j<<": "<<eigData[j]<<std::endl;
(*pt)[j] = 1.0 - 2.0*rng();
}
}
}
return true;
}
bool computeRandomCoords(RDGeom::PointPtrVect &positions, double boxSize,
int seed){
if(seed>0){
RDKit::getRandomGenerator(seed);
}
return computeRandomCoords(positions,boxSize,
RDKit::getDoubleRandomSource());
}
bool computeRandomCoords(RDGeom::PointPtrVect &positions, double boxSize,
RDKit::double_source_type &rng){
CHECK_INVARIANT(boxSize>0.0, "bad boxSize");
for(RDGeom::PointPtrVect::iterator ptIt=positions.begin();
ptIt!=positions.end();++ptIt){
RDGeom::Point *pt = *ptIt;
for (unsigned int i = 0; i<pt->dimension(); ++i) {
(*pt)[i]=boxSize*(rng()-0.5);
}
}
return true;
}
ForceFields::ForceField *constructForceField(const BoundsMatrix &mmat,
RDGeom::PointPtrVect &positions,
const VECT_CHIRALSET & csets,
double weightChiral,
double weightFourthDim,
std::map< std::pair<int,int>,double> *extraWeights,
double basinSizeTol) {
unsigned int N = mmat.numRows();
CHECK_INVARIANT(N == positions.size(), "");
ForceFields::ForceField *field=new ForceFields::ForceField(positions[0]->dimension());
for(unsigned int i=0; i < N; i++){
field->positions().push_back(positions[i]);
}
for (unsigned int i = 1; i < N; i++) {
for (unsigned int j = 0; j < i; j++) {
double w = 1.0;
double l = mmat.getLowerBound(i,j);
double u = mmat.getUpperBound(i,j);
bool includeIt=false;
if(extraWeights){
std::map< std::pair<int,int>,double>::const_iterator mapIt;
mapIt = extraWeights->find(std::make_pair(i,j));
if(mapIt != extraWeights->end()){
w = mapIt->second;
includeIt=true;
}
}
if(u-l <= basinSizeTol) {
includeIt=true;
}
if(includeIt){
DistViolationContrib *contrib = new DistViolationContrib(field, i, j, u, l, w);
field->contribs().push_back(ForceFields::ContribPtr(contrib));
}
}
}
// now add chiral constraints
if (weightChiral > 1.e-8) {
for (VECT_CHIRALSET::const_iterator csi = csets.begin();
csi != csets.end(); csi++) {
ChiralViolationContrib *contrib = new ChiralViolationContrib(field, csi->get(),
weightChiral);
field->contribs().push_back(ForceFields::ContribPtr(contrib));
}
}
// finally the contribution from the fourth dimension if we need to
if ((field->dimension() == 4) && (weightFourthDim > 1.e-8)) {
for (unsigned int i = 1; i < N; i++) {
FourthDimContrib *contrib = new FourthDimContrib(field,i,weightFourthDim);
field->contribs().push_back(ForceFields::ContribPtr(contrib));
}
}
return field;
} // constructForceField
ForceFields::ForceField *construct3DForceField(const BoundsMatrix &mmat,
RDGeom::Point3DPtrVect &positions,
const std::vector<std::pair<int, int> > &bonds,
const std::vector<std::vector<int> > &angles,
const std::vector<std::vector<int> > &expTorsionAtoms,
const std::vector<std::pair<std::vector<int>, std::vector<double> > > &expTorsionAngles,
const std::vector<std::vector<int> > &improperAtoms,
const std::vector<int> &atomNums) {
(void)atomNums;
unsigned int N = mmat.numRows();
CHECK_INVARIANT(N == positions.size(), "");
CHECK_INVARIANT(expTorsionAtoms.size() == expTorsionAngles.size(), "");
ForceFields::ForceField *field = new ForceFields::ForceField(positions[0]->dimension());
for (unsigned int i = 0; i < N; ++i){
field->positions().push_back(positions[i]);
}
// keep track which atoms are 1,2- or 1,3-restrained
boost::dynamic_bitset<> atomPairs(N*N);
// torsion constraints
for (unsigned int t = 0; t < expTorsionAtoms.size(); ++t) {
int i = expTorsionAtoms[t][0];
int j = expTorsionAtoms[t][1];
int k = expTorsionAtoms[t][2];
int l = expTorsionAtoms[t][3];
if (i < j)
atomPairs[i*N+j] = 1;
else
atomPairs[j*N+i] = 1;
// expTorsionAngles[t][0] = (signs, V's)
ForceFields::CrystalFF::TorsionAngleContribM6 *contrib =
new ForceFields::CrystalFF::TorsionAngleContribM6(field, i, j, k, l, expTorsionAngles[t].second, expTorsionAngles[t].first);
field->contribs().push_back(ForceFields::ContribPtr(contrib));
} // torsion constraints
// improper torsions / out-of-plane bend / inversion
for (unsigned int t = 0; t < improperAtoms.size(); ++t) {
std::vector<int> n(4);
for (unsigned int i = 0; i < 3; ++i) {
n[1] = 1;
switch (i) {
case 0:
n[0] = 0;
n[2] = 2;
n[3] = 3;
break;
case 1:
n[0] = 0;
n[2] = 3;
n[3] = 2;
break;
case 2:
n[0] = 2;
n[2] = 3;
n[3] = 0;
break;
}
ForceFields::UFF::InversionContrib *contrib =
new ForceFields::UFF::InversionContrib(field, improperAtoms[t][n[0]], improperAtoms[t][n[1]],
improperAtoms[t][n[2]], improperAtoms[t][n[3]], improperAtoms[t][4], improperAtoms[t][5]);
field->contribs().push_back(ForceFields::ContribPtr(contrib));
}
}
double fdist = 100.0; // force constant
// 1,2 distance constraints
std::vector<std::pair<int, int> >::const_iterator bi;
for (bi = bonds.begin(); bi != bonds.end(); ++bi) {
unsigned int i = bi->first;
unsigned int j = bi->second;
if (i < j)
atomPairs[i*N+j] = 1;
else
atomPairs[j*N+i] = 1;
double d = ((*positions[i]) - (*positions[j])).length();
double l = d-0.01;
double u = d+0.01;
ForceFields::UFF::DistanceConstraintContrib *contrib =
new ForceFields::UFF::DistanceConstraintContrib(field, i, j, l, u, fdist);
field->contribs().push_back(ForceFields::ContribPtr(contrib));
}
// 1,3 distance constraints
for (unsigned int a = 0; a < angles.size(); ++a) {
unsigned int i = angles[a][0];
unsigned int j = angles[a][1];
unsigned int k = angles[a][2];
if (i < j)
atomPairs[i*N+j] = 1;
else
atomPairs[j*N+i] = 1;
// check for triple bonds
if (angles[a][3]) {
ForceFields::UFF::AngleConstraintContrib *contrib =
new ForceFields::UFF::AngleConstraintContrib(field, i, j, k, 179.0, 180.0, fdist);
field->contribs().push_back(ForceFields::ContribPtr(contrib));
} else {
double d = ((*positions[i]) - (*positions[k])).length();
double l = d-0.01;
double u = d+0.01;
ForceFields::UFF::DistanceConstraintContrib *contrib =
new ForceFields::UFF::DistanceConstraintContrib(field, i, k, l, u, fdist);
field->contribs().push_back(ForceFields::ContribPtr(contrib));
}
}
// minimum distance for all other atom pairs
fdist = 10.0;
for (unsigned int i = 1; i < N; ++i) {
for (unsigned int j = 0; j < i; ++j) {
if (!atomPairs[j*N+i]) {
double l = mmat.getLowerBound(i,j);
double u = mmat.getUpperBound(i,j);
ForceFields::UFF::DistanceConstraintContrib *contrib =
new ForceFields::UFF::DistanceConstraintContrib(field, i, j, l, u, fdist);
field->contribs().push_back(ForceFields::ContribPtr(contrib));
}
}
}
return field;
} // construct3DForceField
ForceFields::ForceField *constructPlain3DForceField(const BoundsMatrix &mmat,
RDGeom::Point3DPtrVect &positions,
const std::vector<std::pair<int, int> > &bonds,
const std::vector<std::vector<int> > &angles,
const std::vector<std::vector<int> > &expTorsionAtoms,
const std::vector<std::pair<std::vector<int>, std::vector<double> > > &expTorsionAngles,
const std::vector<int> &atomNums) {
(void)atomNums;
unsigned int N = mmat.numRows();
CHECK_INVARIANT(N == positions.size(), "");
CHECK_INVARIANT(expTorsionAtoms.size() == expTorsionAngles.size(), "");
ForceFields::ForceField *field = new ForceFields::ForceField(positions[0]->dimension());
for (unsigned int i = 0; i < N; ++i){
field->positions().push_back(positions[i]);
}
// keep track which atoms are 1,2- or 1,3-restrained
boost::dynamic_bitset<> atomPairs(N*N);
// torsion constraints
for (unsigned int t = 0; t < expTorsionAtoms.size(); ++t) {
int i = expTorsionAtoms[t][0];
int j = expTorsionAtoms[t][1];
int k = expTorsionAtoms[t][2];
int l = expTorsionAtoms[t][3];
if (i < j)
atomPairs[i*N+j] = 1;
else
atomPairs[j*N+i] = 1;
// expTorsionAngles[t][0] = (signs, V's)
ForceFields::CrystalFF::TorsionAngleContribM6 *contrib =
new ForceFields::CrystalFF::TorsionAngleContribM6(field, i, j, k, l, expTorsionAngles[t].second, expTorsionAngles[t].first);
field->contribs().push_back(ForceFields::ContribPtr(contrib));
} // torsion constraints
double fdist = 100.0; // force constant
// 1,2 distance constraints
std::vector<std::pair<int, int> >::const_iterator bi;
for (bi = bonds.begin(); bi != bonds.end(); ++bi) {
unsigned int i = bi->first;
unsigned int j = bi->second;
if (i < j)
atomPairs[i*N+j] = 1;
else
atomPairs[j*N+i] = 1;
double d = ((*positions[i]) - (*positions[j])).length();
double l = d-0.01;
double u = d+0.01;
ForceFields::UFF::DistanceConstraintContrib *contrib =
new ForceFields::UFF::DistanceConstraintContrib(field, i, j, l, u, fdist);
field->contribs().push_back(ForceFields::ContribPtr(contrib));
}
// 1,3 distance constraints
for (unsigned int a = 1; a < angles.size(); ++a) {
unsigned int i = angles[a][0];
unsigned int j = angles[a][2];
if (i < j)
atomPairs[i*N+j] = 1;
else
atomPairs[j*N+i] = 1;
double d = ((*positions[i]) - (*positions[j])).length();
double l = d-0.01;
double u = d+0.01;
ForceFields::UFF::DistanceConstraintContrib *contrib =
new ForceFields::UFF::DistanceConstraintContrib(field, i, j, l, u, fdist);
field->contribs().push_back(ForceFields::ContribPtr(contrib));
}
// minimum distance for all other atom pairs
fdist = 10.0;
for (unsigned int i = 1; i < N; ++i) {
for (unsigned int j = 0; j < i; ++j) {
if (!atomPairs[j*N+i]) {
double l = mmat.getLowerBound(i,j);
double u = mmat.getUpperBound(i,j);
ForceFields::UFF::DistanceConstraintContrib *contrib =
new ForceFields::UFF::DistanceConstraintContrib(field, i, j, l, u, fdist);
field->contribs().push_back(ForceFields::ContribPtr(contrib));
}
}
}
return field;
} // constructPlain3DForceField
return pickRandomDistMat(mmat, distMat, RDKit::getDoubleRandomSource());
}
double pickRandomDistMat(const BoundsMatrix &mmat,
RDNumeric::SymmMatrix<double> &distMat,
RDKit::double_source_type &rng) {
// make sure the sizes match up
unsigned int npt = mmat.numRows();
CHECK_INVARIANT(npt == distMat.numRows(), "Size mismatch");
double largestVal = -1.0;
double *ddata = distMat.getData();
for (unsigned int i = 1; i < npt; i++) {
unsigned int id = i * (i + 1) / 2;
for (unsigned int j = 0; j < i; j++) {
double ub = mmat.getUpperBound(i, j);
double lb = mmat.getLowerBound(i, j);
CHECK_INVARIANT(ub >= lb, "");
double rval = rng();
// std::cerr<<i<<"-"<<j<<": "<<rval<<std::endl;
double d = lb + (rval) * (ub - lb);
ddata[id + j] = d;
if (d > largestVal) {
largestVal = d;
}
}
}
return largestVal;
}
bool computeInitialCoords(const RDNumeric::SymmMatrix<double> &distMat,
RDGeom::PointPtrVect &positions, bool randNegEig,
unsigned int numZeroFail, int seed) {
if (seed > 0) {
RDKit::getRandomGenerator(seed);
}
return computeInitialCoords(distMat, positions,
RDKit::getDoubleRandomSource(), randNegEig,
numZeroFail);
}
bool computeInitialCoords(const RDNumeric::SymmMatrix<double> &distMat,
RDGeom::PointPtrVect &positions,
RDKit::double_source_type &rng, bool randNegEig,
unsigned int numZeroFail) {
unsigned int N = distMat.numRows();
unsigned int nPt = positions.size();
CHECK_INVARIANT(nPt == N, "Size mismatch");
unsigned int dim = positions.front()->dimension();
const double *data = distMat.getData();
RDNumeric::SymmMatrix<double> sqMat(N), T(N, 0.0);
RDNumeric::DoubleMatrix eigVecs(dim, N);
RDNumeric::DoubleVector eigVals(dim);
double *sqDat = sqMat.getData();
unsigned int dSize = distMat.getDataSize();
double sumSqD2 = 0.0;
for (unsigned int i = 0; i < dSize; i++) {
sqDat[i] = data[i] * data[i];
sumSqD2 += sqDat[i];
}
sumSqD2 /= (N * N);
RDNumeric::DoubleVector sqD0i(N, 0.0);
double *sqD0iData = sqD0i.getData();
for (unsigned int i = 0; i < N; i++) {
for (unsigned int j = 0; j < N; j++) {
sqD0iData[i] += sqMat.getVal(i, j);
}
sqD0iData[i] /= N;
sqD0iData[i] -= sumSqD2;
if ((sqD0iData[i] < EIGVAL_TOL) && (N > 3)) {
return false;
}
}
for (unsigned int i = 0; i < N; i++) {
for (unsigned int j = 0; j <= i; j++) {
double val = 0.5 * (sqD0iData[i] + sqD0iData[j] - sqMat.getVal(i, j));
T.setVal(i, j, val);
}
}
unsigned int nEigs = (dim < N) ? dim : N;
RDNumeric::EigenSolvers::powerEigenSolver(nEigs, T, eigVals, eigVecs,
(int)(sumSqD2 * N));
double *eigData = eigVals.getData();
bool foundNeg = false;
unsigned int zeroEigs = 0;
for (unsigned int i = 0; i < dim; i++) {
if (eigData[i] > EIGVAL_TOL) {
eigData[i] = sqrt(eigData[i]);
} else if (fabs(eigData[i]) < EIGVAL_TOL) {
eigData[i] = 0.0;
zeroEigs++;
} else {
foundNeg = true;
}
}
if ((foundNeg) && (!randNegEig)) {
return false;
}
if ((zeroEigs >= numZeroFail) && (N > 3)) {
return false;
}
for (unsigned int i = 0; i < N; i++) {
RDGeom::Point *pt = positions[i];
for (unsigned int j = 0; j < dim; ++j) {
if (eigData[j] >= 0.0) {
(*pt)[j] = eigData[j] * eigVecs.getVal(j, i);
} else {
// std::cerr<<"!!! "<<i<<"-"<<j<<": "<<eigData[j]<<std::endl;
(*pt)[j] = 1.0 - 2.0 * rng();
}
}
}
return true;
}
bool computeRandomCoords(RDGeom::PointPtrVect &positions, double boxSize,
int seed) {
if (seed > 0) {
RDKit::getRandomGenerator(seed);
}
return computeRandomCoords(positions, boxSize,
RDKit::getDoubleRandomSource());
}
bool computeRandomCoords(RDGeom::PointPtrVect &positions, double boxSize,
RDKit::double_source_type &rng) {
CHECK_INVARIANT(boxSize > 0.0, "bad boxSize");
for (RDGeom::PointPtrVect::iterator ptIt = positions.begin();
ptIt != positions.end(); ++ptIt) {
RDGeom::Point *pt = *ptIt;
for (unsigned int i = 0; i < pt->dimension(); ++i) {
(*pt)[i] = boxSize * (rng() - 0.5);
}
}
return true;
}
ForceFields::ForceField *constructForceField(
const BoundsMatrix &mmat, RDGeom::PointPtrVect &positions,
const VECT_CHIRALSET &csets, double weightChiral, double weightFourthDim,
std::map<std::pair<int, int>, double> *extraWeights, double basinSizeTol) {
unsigned int N = mmat.numRows();
CHECK_INVARIANT(N == positions.size(), "");
ForceFields::ForceField *field =
new ForceFields::ForceField(positions[0]->dimension());
for (unsigned int i = 0; i < N; i++) {
field->positions().push_back(positions[i]);
}
for (unsigned int i = 1; i < N; i++) {
for (unsigned int j = 0; j < i; j++) {
double w = 1.0;
double l = mmat.getLowerBound(i, j);
double u = mmat.getUpperBound(i, j);
bool includeIt = false;
if (extraWeights) {
std::map<std::pair<int, int>, double>::const_iterator mapIt;
mapIt = extraWeights->find(std::make_pair(i, j));
if (mapIt != extraWeights->end()) {
w = mapIt->second;
includeIt = true;
}
}
if (u - l <= basinSizeTol) {
includeIt = true;
}
if (includeIt) {
DistViolationContrib *contrib =
new DistViolationContrib(field, i, j, u, l, w);
field->contribs().push_back(ForceFields::ContribPtr(contrib));
}
}
}
// now add chiral constraints
if (weightChiral > 1.e-8) {
for (VECT_CHIRALSET::const_iterator csi = csets.begin(); csi != csets.end();
csi++) {
ChiralViolationContrib *contrib =
new ChiralViolationContrib(field, csi->get(), weightChiral);
field->contribs().push_back(ForceFields::ContribPtr(contrib));
}
}
// finally the contribution from the fourth dimension if we need to
if ((field->dimension() == 4) && (weightFourthDim > 1.e-8)) {
for (unsigned int i = 1; i < N; i++) {
FourthDimContrib *contrib =
new FourthDimContrib(field, i, weightFourthDim);
field->contribs().push_back(ForceFields::ContribPtr(contrib));
}
}
return field;
} // constructForceField
ForceFields::ForceField *construct3DForceField(
const BoundsMatrix &mmat, RDGeom::Point3DPtrVect &positions,
const std::vector<std::pair<int, int> > &bonds,
const std::vector<std::vector<int> > &angles,
const std::vector<std::vector<int> > &expTorsionAtoms,
const std::vector<std::pair<std::vector<int>, std::vector<double> > > &
expTorsionAngles,
const std::vector<std::vector<int> > &improperAtoms,
const std::vector<int> &atomNums) {
(void)atomNums;
unsigned int N = mmat.numRows();
CHECK_INVARIANT(N == positions.size(), "");
CHECK_INVARIANT(expTorsionAtoms.size() == expTorsionAngles.size(), "");
ForceFields::ForceField *field =
new ForceFields::ForceField(positions[0]->dimension());
for (unsigned int i = 0; i < N; ++i) {
field->positions().push_back(positions[i]);
}
// keep track which atoms are 1,2- or 1,3-restrained
boost::dynamic_bitset<> atomPairs(N * N);
// torsion constraints
for (unsigned int t = 0; t < expTorsionAtoms.size(); ++t) {
int i = expTorsionAtoms[t][0];
int j = expTorsionAtoms[t][1];
int k = expTorsionAtoms[t][2];
int l = expTorsionAtoms[t][3];
if (i < j)
atomPairs[i * N + j] = 1;
else
atomPairs[j * N + i] = 1;
// expTorsionAngles[t][0] = (signs, V's)
ForceFields::CrystalFF::TorsionAngleContribM6 *contrib =
new ForceFields::CrystalFF::TorsionAngleContribM6(
field, i, j, k, l, expTorsionAngles[t].second,
expTorsionAngles[t].first);
field->contribs().push_back(ForceFields::ContribPtr(contrib));
} // torsion constraints
// improper torsions / out-of-plane bend / inversion
for (unsigned int t = 0; t < improperAtoms.size(); ++t) {
std::vector<int> n(4);
for (unsigned int i = 0; i < 3; ++i) {
n[1] = 1;
switch (i) {
case 0:
n[0] = 0;
n[2] = 2;
n[3] = 3;
break;
case 1:
n[0] = 0;
n[2] = 3;
n[3] = 2;
break;
case 2:
n[0] = 2;
n[2] = 3;
n[3] = 0;
break;
}
ForceFields::UFF::InversionContrib *contrib =
new ForceFields::UFF::InversionContrib(
field, improperAtoms[t][n[0]], improperAtoms[t][n[1]],
improperAtoms[t][n[2]], improperAtoms[t][n[3]],
improperAtoms[t][4], improperAtoms[t][5]);
field->contribs().push_back(ForceFields::ContribPtr(contrib));
}
}
double fdist = 100.0; // force constant
// 1,2 distance constraints
std::vector<std::pair<int, int> >::const_iterator bi;
for (bi = bonds.begin(); bi != bonds.end(); ++bi) {
unsigned int i = bi->first;
unsigned int j = bi->second;
if (i < j)
atomPairs[i * N + j] = 1;
else
atomPairs[j * N + i] = 1;
double d = ((*positions[i]) - (*positions[j])).length();
double l = d - 0.01;
double u = d + 0.01;
ForceFields::UFF::DistanceConstraintContrib *contrib =
new ForceFields::UFF::DistanceConstraintContrib(field, i, j, l, u,
fdist);
field->contribs().push_back(ForceFields::ContribPtr(contrib));
}
// 1,3 distance constraints
for (unsigned int a = 0; a < angles.size(); ++a) {
unsigned int i = angles[a][0];
unsigned int j = angles[a][1];
unsigned int k = angles[a][2];
if (i < j)
atomPairs[i * N + j] = 1;
else
atomPairs[j * N + i] = 1;
// check for triple bonds
if (angles[a][3]) {
ForceFields::UFF::AngleConstraintContrib *contrib =
new ForceFields::UFF::AngleConstraintContrib(field, i, j, k, 179.0,
180.0, fdist);
field->contribs().push_back(ForceFields::ContribPtr(contrib));
} else {
double d = ((*positions[i]) - (*positions[k])).length();
double l = d - 0.01;
double u = d + 0.01;
ForceFields::UFF::DistanceConstraintContrib *contrib =
new ForceFields::UFF::DistanceConstraintContrib(field, i, k, l, u,
fdist);
field->contribs().push_back(ForceFields::ContribPtr(contrib));
}
}
// minimum distance for all other atom pairs
fdist = 10.0;
for (unsigned int i = 1; i < N; ++i) {
for (unsigned int j = 0; j < i; ++j) {
if (!atomPairs[j * N + i]) {
double l = mmat.getLowerBound(i, j);
double u = mmat.getUpperBound(i, j);
ForceFields::UFF::DistanceConstraintContrib *contrib =
new ForceFields::UFF::DistanceConstraintContrib(field, i, j, l, u,
fdist);
field->contribs().push_back(ForceFields::ContribPtr(contrib));
}
}
}
return field;
} // construct3DForceField
ForceFields::ForceField *constructPlain3DForceField(
const BoundsMatrix &mmat, RDGeom::Point3DPtrVect &positions,
const std::vector<std::pair<int, int> > &bonds,
const std::vector<std::vector<int> > &angles,
const std::vector<std::vector<int> > &expTorsionAtoms,
const std::vector<std::pair<std::vector<int>, std::vector<double> > > &
expTorsionAngles,
const std::vector<int> &atomNums) {
(void)atomNums;
unsigned int N = mmat.numRows();
CHECK_INVARIANT(N == positions.size(), "");
CHECK_INVARIANT(expTorsionAtoms.size() == expTorsionAngles.size(), "");
ForceFields::ForceField *field =
new ForceFields::ForceField(positions[0]->dimension());
for (unsigned int i = 0; i < N; ++i) {
field->positions().push_back(positions[i]);
}
// keep track which atoms are 1,2- or 1,3-restrained
boost::dynamic_bitset<> atomPairs(N * N);
// torsion constraints
for (unsigned int t = 0; t < expTorsionAtoms.size(); ++t) {
int i = expTorsionAtoms[t][0];
int j = expTorsionAtoms[t][1];
int k = expTorsionAtoms[t][2];
int l = expTorsionAtoms[t][3];
if (i < j)
atomPairs[i * N + j] = 1;
else
atomPairs[j * N + i] = 1;
// expTorsionAngles[t][0] = (signs, V's)
ForceFields::CrystalFF::TorsionAngleContribM6 *contrib =
new ForceFields::CrystalFF::TorsionAngleContribM6(
field, i, j, k, l, expTorsionAngles[t].second,
expTorsionAngles[t].first);
field->contribs().push_back(ForceFields::ContribPtr(contrib));
} // torsion constraints
double fdist = 100.0; // force constant
// 1,2 distance constraints
std::vector<std::pair<int, int> >::const_iterator bi;
for (bi = bonds.begin(); bi != bonds.end(); ++bi) {
unsigned int i = bi->first;
unsigned int j = bi->second;
if (i < j)
atomPairs[i * N + j] = 1;
else
atomPairs[j * N + i] = 1;
double d = ((*positions[i]) - (*positions[j])).length();
double l = d - 0.01;
double u = d + 0.01;
ForceFields::UFF::DistanceConstraintContrib *contrib =
new ForceFields::UFF::DistanceConstraintContrib(field, i, j, l, u,
fdist);
field->contribs().push_back(ForceFields::ContribPtr(contrib));
}
// 1,3 distance constraints
for (unsigned int a = 1; a < angles.size(); ++a) {
unsigned int i = angles[a][0];
unsigned int j = angles[a][2];
if (i < j)
atomPairs[i * N + j] = 1;
else
atomPairs[j * N + i] = 1;
double d = ((*positions[i]) - (*positions[j])).length();
double l = d - 0.01;
double u = d + 0.01;
ForceFields::UFF::DistanceConstraintContrib *contrib =
new ForceFields::UFF::DistanceConstraintContrib(field, i, j, l, u,
fdist);
field->contribs().push_back(ForceFields::ContribPtr(contrib));
}
// minimum distance for all other atom pairs
fdist = 10.0;
for (unsigned int i = 1; i < N; ++i) {
for (unsigned int j = 0; j < i; ++j) {
if (!atomPairs[j * N + i]) {
double l = mmat.getLowerBound(i, j);
double u = mmat.getUpperBound(i, j);
ForceFields::UFF::DistanceConstraintContrib *contrib =
new ForceFields::UFF::DistanceConstraintContrib(field, i, j, l, u,
fdist);
field->contribs().push_back(ForceFields::ContribPtr(contrib));
}
}
}
return field;
} // constructPlain3DForceField
}

View File

@@ -18,145 +18,163 @@
#include <RDGeneral/utils.h>
namespace ForceFields {
class ForceField;
class ForceField;
}
namespace DistGeom {
//! Pick a distance matrix at random such that the
//! distance satisfy the bounds in the BoundsMatrix
/*!
\param mmat Bounds matrix
\param distmat Storage for randomly chosen distances
\param seed the random number seed to use
//! Pick a distance matrix at random such that the
//! distance satisfy the bounds in the BoundsMatrix
/*!
\param mmat Bounds matrix
\param distmat Storage for randomly chosen distances
\param seed the random number seed to use
\return the largest element of the distance matrix
*/
double pickRandomDistMat(const BoundsMatrix &mmat,
RDNumeric::SymmMatrix<double> &distmat,
int seed=-1);
//! \overload
double pickRandomDistMat(const BoundsMatrix &mmat,
RDNumeric::SymmMatrix<double> &distmat,
RDKit::double_source_type &rng);
\return the largest element of the distance matrix
*/
double pickRandomDistMat(const BoundsMatrix &mmat,
RDNumeric::SymmMatrix<double> &distmat, int seed = -1);
//! \overload
double pickRandomDistMat(const BoundsMatrix &mmat,
RDNumeric::SymmMatrix<double> &distmat,
RDKit::double_source_type &rng);
//! Compute an initial embedded in 3D based on a distance matrix
/*!
This function follows the embed algorithm mentioned in
"Distance Geometry and Molecular Conformation" by G.M.Crippen and T.F.Havel
(pages 312-313)
//! Compute an initial embedded in 3D based on a distance matrix
/*!
This function follows the embed algorithm mentioned in
"Distance Geometry and Molecular Conformation" by G.M.Crippen and T.F.Havel
(pages 312-313)
\param distmat Distance matrix
\param positions A vector of pointers to Points to write out the resulting coordinates
\param randNegEig If set to true and if any of the eigen values are negative, we will
pick the corresponding components of the coordinates at random
\param numZeroFail Fail embedding is more this many (or more) eigen values are zero
\param seed the random number seed to use
\param distmat Distance matrix
\param positions A vector of pointers to Points to write out the resulting
coordinates
\param randNegEig If set to true and if any of the eigen values are negative,
we will
pick the corresponding components of the coordinates at
random
\param numZeroFail Fail embedding is more this many (or more) eigen values are
zero
\param seed the random number seed to use
\return true if the embedding was successful
*/
bool computeInitialCoords(const RDNumeric::SymmMatrix<double> &distmat,
RDGeom::PointPtrVect &positions, bool randNegEig=false,
unsigned int numZeroFail=2,
int seed=-1);
//! \overload
bool computeInitialCoords(const RDNumeric::SymmMatrix<double> &distmat,
RDGeom::PointPtrVect &positions,
RDKit::double_source_type &rng,
bool randNegEig=false,
unsigned int numZeroFail=2
);
\return true if the embedding was successful
*/
bool computeInitialCoords(const RDNumeric::SymmMatrix<double> &distmat,
RDGeom::PointPtrVect &positions,
bool randNegEig = false, unsigned int numZeroFail = 2,
int seed = -1);
//! \overload
bool computeInitialCoords(const RDNumeric::SymmMatrix<double> &distmat,
RDGeom::PointPtrVect &positions,
RDKit::double_source_type &rng,
bool randNegEig = false,
unsigned int numZeroFail = 2);
//! places atoms randomly in a box
/*!
\param positions A vector of pointers to Points to write out the resulting coordinates
\param boxSize the side-length of the cubic box
\param seed the random number seed to use
//! places atoms randomly in a box
/*!
\param positions A vector of pointers to Points to write out the resulting
coordinates
\param boxSize the side-length of the cubic box
\param seed the random number seed to use
\return true if the coordinate generation was successful
*/
bool computeRandomCoords(RDGeom::PointPtrVect &positions, double boxSize,
int seed=-1);
//! \overload
bool computeRandomCoords(RDGeom::PointPtrVect &positions, double boxSize,
RDKit::double_source_type &rng);
\return true if the coordinate generation was successful
*/
bool computeRandomCoords(RDGeom::PointPtrVect &positions, double boxSize,
int seed = -1);
//! \overload
bool computeRandomCoords(RDGeom::PointPtrVect &positions, double boxSize,
RDKit::double_source_type &rng);
//! Setup the error function for violation of distance bounds as a forcefield
/*!
This is based on function E3 on page 311 of "Distance Geometry in Molecular
Modeling" Jeffrey M.Blaney and J.Scott Dixon, Review in Computational Chemistry,
Volume V
//! Setup the error function for violation of distance bounds as a forcefield
/*!
This is based on function E3 on page 311 of "Distance Geometry in Molecular
Modeling" Jeffrey M.Blaney and J.Scott Dixon, Review in Computational
Chemistry,
Volume V
\param mmat Distance bounds matrix
\param positions A vector of pointers to Points to write out the resulting coordinates
\param csets The vector of chiral points (type: ChiralSet)
\param weightChiral weight to be used to enforce chirality
\param weightFourthDim another chiral weight
\param extraWeights an optional set of weights for distance bounds violations
\param basinSizeTol Optional: any distance bound with a basin (distance between max and
min bounds) larger than this value will not be included in the force
field used to cleanup the structure.
\param mmat Distance bounds matrix
\param positions A vector of pointers to Points to write out the
resulting coordinates
\param csets The vector of chiral points (type: ChiralSet)
\param weightChiral weight to be used to enforce chirality
\param weightFourthDim another chiral weight
\param extraWeights an optional set of weights for distance bounds
violations
\param basinSizeTol Optional: any distance bound with a basin (distance
between max and
min bounds) larger than this value will not be included
in the force
field used to cleanup the structure.
\return a pointer to a ForceField suitable for cleaning up the violations.
<b>NOTE:</b> the caller is responsible for deleting this force field.
\return a pointer to a ForceField suitable for cleaning up the violations.
<b>NOTE:</b> the caller is responsible for deleting this force field.
*/
ForceFields::ForceField *constructForceField(const BoundsMatrix &mmat,
RDGeom::PointPtrVect &positions, const VECT_CHIRALSET &csets,
double weightChiral=1.0,
double weightFourthDim=0.1,
std::map< std::pair<int,int>,double> *extraWeights=0,
double basinSizeTol=5.0);
*/
ForceFields::ForceField *constructForceField(
const BoundsMatrix &mmat, RDGeom::PointPtrVect &positions,
const VECT_CHIRALSET &csets, double weightChiral = 1.0,
double weightFourthDim = 0.1,
std::map<std::pair<int, int>, double> *extraWeights = 0,
double basinSizeTol = 5.0);
//! Force field with experimental torsion angle preferences and 1-2/1-3 distance constraints
/*!
//! Force field with experimental torsion angle preferences and 1-2/1-3 distance
//constraints
/*!
\param mmat Distance bounds matrix
\param positions A vector of pointers to 3D Points to write out the resulting coordinates
\param bonds A list of 1-2 partners (bonds)
\param angles A list of 1-3 partners (angles)
\param expTorsionAtoms A list of groups of 4 atom indices for experimental torsions
\param expTorsionAngles A list of corresponding torsion angle-potential parameters
\param improperAtoms A list of groups of 4 atom indices for inversion terms
\param atomNums A list of atomic numbers for all atoms in the molecule
\param mmat Distance bounds matrix
\param positions A vector of pointers to 3D Points to write out the
resulting coordinates
\param bonds A list of 1-2 partners (bonds)
\param angles A list of 1-3 partners (angles)
\param expTorsionAtoms A list of groups of 4 atom indices for experimental
torsions
\param expTorsionAngles A list of corresponding torsion angle-potential
parameters
\param improperAtoms A list of groups of 4 atom indices for inversion terms
\param atomNums A list of atomic numbers for all atoms in the molecule
\return a pointer to a ForceField suitable for imposing experimental torsion angle preferences
<b>NOTE:</b> the caller is responsible for deleting this force field.
\return a pointer to a ForceField suitable for imposing experimental torsion
angle preferences
<b>NOTE:</b> the caller is responsible for deleting this force field.
*/
ForceFields::ForceField *construct3DForceField(const BoundsMatrix &mmat,
RDGeom::Point3DPtrVect &positions,
const std::vector<std::pair<int, int> > &bonds,
const std::vector<std::vector<int> > &angles,
const std::vector<std::vector<int> > &expTorsionAtoms,
const std::vector<std::pair<std::vector<int>, std::vector<double> > > &expTorsionAngles,
const std::vector<std::vector<int> > &improperAtoms,
const std::vector<int> &atomNums);
*/
ForceFields::ForceField *construct3DForceField(
const BoundsMatrix &mmat, RDGeom::Point3DPtrVect &positions,
const std::vector<std::pair<int, int> > &bonds,
const std::vector<std::vector<int> > &angles,
const std::vector<std::vector<int> > &expTorsionAtoms,
const std::vector<std::pair<std::vector<int>, std::vector<double> > > &
expTorsionAngles,
const std::vector<std::vector<int> > &improperAtoms,
const std::vector<int> &atomNums);
//! Force field with experimental torsion angle preferences and 1-2/1-3 distance constraints
/*!
//! Force field with experimental torsion angle preferences and 1-2/1-3 distance
//constraints
/*!
\param mmat Distance bounds matrix
\param positions A vector of pointers to 3D Points to write out the resulting coordinates
\param bonds A list of 1-2 partners (bonds)
\param angles A list of 1-3 partners (angles)
\param expTorsionAtoms A list of groups of 4 atom indices for experimental torsions
\param expTorsionAngles A list of corresponding torsion angle-potential parameters
\param atomNums A list of atomic numbers for all atoms in the molecule
\param mmat Distance bounds matrix
\param positions A vector of pointers to 3D Points to write out the
resulting coordinates
\param bonds A list of 1-2 partners (bonds)
\param angles A list of 1-3 partners (angles)
\param expTorsionAtoms A list of groups of 4 atom indices for experimental
torsions
\param expTorsionAngles A list of corresponding torsion angle-potential
parameters
\param atomNums A list of atomic numbers for all atoms in the molecule
\return a pointer to a ForceField suitable for imposing experimental torsion angle preferences
<b>NOTE:</b> the caller is responsible for deleting this force field.
*/
ForceFields::ForceField *constructPlain3DForceField(const BoundsMatrix &mmat,
RDGeom::Point3DPtrVect &positions,
const std::vector<std::pair<int, int> > &bonds,
const std::vector<std::vector<int> > &angles,
const std::vector<std::vector<int> > &expTorsionAtoms,
const std::vector<std::pair<std::vector<int>, std::vector<double> > > &expTorsionAngles,
const std::vector<int> &atomNums);
\return a pointer to a ForceField suitable for imposing experimental torsion
angle preferences
<b>NOTE:</b> the caller is responsible for deleting this force field.
*/
ForceFields::ForceField *constructPlain3DForceField(
const BoundsMatrix &mmat, RDGeom::Point3DPtrVect &positions,
const std::vector<std::pair<int, int> > &bonds,
const std::vector<std::vector<int> > &angles,
const std::vector<std::vector<int> > &expTorsionAtoms,
const std::vector<std::pair<std::vector<int>, std::vector<double> > > &
expTorsionAngles,
const std::vector<int> &atomNums);
}
#endif

View File

@@ -14,75 +14,78 @@
#include <RDGeneral/Invariant.h>
namespace DistGeom {
DistViolationContrib::DistViolationContrib(ForceFields::ForceField *owner,unsigned int idx1,
unsigned int idx2,
double ub, double lb, double weight) {
PRECONDITION(owner,"bad owner");
URANGE_CHECK(idx1,owner->positions().size()-1);
URANGE_CHECK(idx2,owner->positions().size()-1);
dp_forceField = owner;
d_end1Idx = idx1;
d_end2Idx = idx2;
d_ub = ub;
d_lb = lb;
d_weight = weight;
DistViolationContrib::DistViolationContrib(ForceFields::ForceField *owner,
unsigned int idx1, unsigned int idx2,
double ub, double lb,
double weight) {
PRECONDITION(owner, "bad owner");
URANGE_CHECK(idx1, owner->positions().size() - 1);
URANGE_CHECK(idx2, owner->positions().size() - 1);
dp_forceField = owner;
d_end1Idx = idx1;
d_end2Idx = idx2;
d_ub = ub;
d_lb = lb;
d_weight = weight;
}
double DistViolationContrib::getEnergy(double *pos) const {
PRECONDITION(dp_forceField, "no owner");
PRECONDITION(pos, "bad vector");
double d =
this->dp_forceField->distance(this->d_end1Idx, this->d_end2Idx, pos);
double val = 0.0;
if (d > d_ub) {
val = ((d * d) / (d_ub * d_ub)) - 1.0;
} else if (d < d_lb) {
val = ((2 * d_lb * d_lb) / (d_lb * d_lb + d * d)) - 1.0;
}
double res;
if (val > 0.0) {
res = d_weight * val * val;
} else {
res = 0;
}
return res;
}
void DistViolationContrib::getGrad(double *pos, double *grad) const {
PRECONDITION(dp_forceField, "no owner");
PRECONDITION(pos, "bad vector");
PRECONDITION(grad, "bad vector");
unsigned int dim = this->dp_forceField->dimension();
double d =
this->dp_forceField->distance(this->d_end1Idx, this->d_end2Idx, pos);
double preFactor = 0.0;
if (d > d_ub) {
double u2 = d_ub * d_ub;
preFactor = 4. * (((d * d) / u2) - 1.0) * (d / u2);
} else if (d < d_lb) {
double d2 = d * d;
double l2 = d_lb * d_lb;
double l2d2 = d2 + l2;
preFactor = 8. * l2 * d * (1. - 2 * l2 / l2d2) / (l2d2 * l2d2);
// preFactor = -8.*((l2-d2)/pow(l2+d2,3))*d*l2;
} else {
return;
}
double DistViolationContrib::getEnergy(double *pos) const {
PRECONDITION(dp_forceField,"no owner");
PRECONDITION(pos,"bad vector");
double d = this->dp_forceField->distance(this->d_end1Idx,this->d_end2Idx,pos);
double val=0.0;
if (d > d_ub) {
val = ((d*d)/(d_ub*d_ub)) - 1.0;
} else if (d < d_lb) {
val = ((2*d_lb*d_lb)/(d_lb*d_lb + d*d)) - 1.0;
}
double res;
if(val>0.0){
res=d_weight*val*val;
}else{
res=0;
}
return res;
}
double *end1Coords = &(pos[dim * this->d_end1Idx]);
double *end2Coords = &(pos[dim * this->d_end2Idx]);
void DistViolationContrib::getGrad(double *pos, double *grad) const {
PRECONDITION(dp_forceField,"no owner");
PRECONDITION(pos,"bad vector");
PRECONDITION(grad,"bad vector");
unsigned int dim=this->dp_forceField->dimension();
double d = this->dp_forceField->distance(this->d_end1Idx,this->d_end2Idx,pos);
double preFactor = 0.0;
if (d > d_ub) {
double u2 = d_ub*d_ub;
preFactor = 4.*(((d*d)/u2) - 1.0)*(d/u2);
} else if (d < d_lb) {
double d2 = d*d;
double l2 = d_lb*d_lb;
double l2d2 = d2+l2;
preFactor = 8.*l2*d * (1.-2*l2/l2d2) / (l2d2*l2d2);
//preFactor = -8.*((l2-d2)/pow(l2+d2,3))*d*l2;
for (unsigned int i = 0; i < dim; i++) {
double dGrad;
if (d > 0.0) {
dGrad = d_weight * preFactor * (end1Coords[i] - end2Coords[i]) / d;
} else {
return;
}
double *end1Coords = &(pos[dim*this->d_end1Idx]);
double *end2Coords = &(pos[dim*this->d_end2Idx]);
for(unsigned int i=0;i<dim;i++){
double dGrad;
if(d>0.0){
dGrad= d_weight*preFactor * (end1Coords[i]-end2Coords[i])/d;
} else {
// FIX: this likely isn't right
dGrad= d_weight*preFactor * (end1Coords[i]-end2Coords[i]);
}
grad[dim*this->d_end1Idx+i] += dGrad;
grad[dim*this->d_end2Idx+i] -= dGrad;
// FIX: this likely isn't right
dGrad = d_weight * preFactor * (end1Coords[i] - end2Coords[i]);
}
grad[dim * this->d_end1Idx + i] += dGrad;
grad[dim * this->d_end2Idx + i] -= dGrad;
}
}
}

View File

@@ -13,35 +13,39 @@
#include <ForceField/Contrib.h>
namespace DistGeom {
//! A term to capture the violation of the upper and lower bounds by
//! distance between two points
class DistViolationContrib : public ForceFields::ForceFieldContrib {
public:
DistViolationContrib() : d_end1Idx(0), d_end2Idx(0), d_ub(1000.0), d_lb(0.0), d_weight(1.0) {};
//! A term to capture the violation of the upper and lower bounds by
//! distance between two points
class DistViolationContrib : public ForceFields::ForceFieldContrib {
public:
DistViolationContrib()
: d_end1Idx(0), d_end2Idx(0), d_ub(1000.0), d_lb(0.0), d_weight(1.0){};
//! Constructor
/*!
\param owner pointer to the owning ForceField
\param idx1 index of end1 in the ForceField's positions
\param idx2 index of end2 in the ForceField's positions
\param ub Upper bound on the distance
\param lb Lower bound on the distance
\param weight optional weight for this contribution
*/
DistViolationContrib(ForceFields::ForceField *owner,unsigned int idx1,unsigned int idx2,
double ub, double lb, double weight=1.0);
//! Constructor
/*!
\param owner pointer to the owning ForceField
\param idx1 index of end1 in the ForceField's positions
\param idx2 index of end2 in the ForceField's positions
\param ub Upper bound on the distance
\param lb Lower bound on the distance
\param weight optional weight for this contribution
*/
DistViolationContrib(ForceFields::ForceField *owner, unsigned int idx1,
unsigned int idx2, double ub, double lb,
double weight = 1.0);
double getEnergy(double *pos) const;
void getGrad(double *pos, double *grad) const;
virtual DistViolationContrib *copy() const { return new DistViolationContrib(*this); };
double getEnergy(double *pos) const;
private:
unsigned int d_end1Idx,d_end2Idx; //!< indices of end points
double d_ub; //!< upper bound on the distance between d_end1Idx,d_end2Idx
double d_lb; //!< lower bound on the distance between d_end1Idx,d_end2Idx
double d_weight; //!< used to adjust relative contribution weights
void getGrad(double *pos, double *grad) const;
virtual DistViolationContrib *copy() const {
return new DistViolationContrib(*this);
};
private:
unsigned int d_end1Idx, d_end2Idx; //!< indices of end points
double d_ub; //!< upper bound on the distance between d_end1Idx,d_end2Idx
double d_lb; //!< lower bound on the distance between d_end1Idx,d_end2Idx
double d_weight; //!< used to adjust relative contribution weights
};
}
#endif

View File

@@ -9,49 +9,55 @@
#include <ForceField/ForceField.h>
namespace DistGeom {
//! A term used in penalizing chirality violations
//!
class FourthDimContrib : public ForceFields::ForceFieldContrib {
public:
FourthDimContrib() : d_idx(0), d_weight(0.0) {};
//! A term used in penalizing chirality violations
//!
class FourthDimContrib : public ForceFields::ForceFieldContrib {
public:
FourthDimContrib() : d_idx(0), d_weight(0.0){};
//! Constructor
/*!
\param owner pointer to the owning ForceField
\param idx the index of the atom to be considered
\param weight (optional) the weight to be used for this contrib
*/
FourthDimContrib(ForceFields::ForceField *owner,unsigned int idx,
double weight) : d_idx(idx), d_weight(weight) {
PRECONDITION(owner,"bad force field");
PRECONDITION(owner->dimension()==4,"force field has wrong dimension");
dp_forceField=owner;
};
//! Constructor
/*!
\param owner pointer to the owning ForceField
\param idx the index of the atom to be considered
\param weight (optional) the weight to be used for this contrib
//! return the contribution of this contrib to the energy of a given state
double getEnergy(double *pos) const {
PRECONDITION(dp_forceField,"no owner");
PRECONDITION(dp_forceField->dimension()==4,"force field has wrong dimension");
PRECONDITION(pos,"bad vector");
unsigned int pid = d_idx*dp_forceField->dimension() + 3;
return d_weight*pos[pid]*pos[pid];
}
//! calculate the contribution of this contrib to the gradient at a given state
void getGrad(double *pos, double *grad) const {
PRECONDITION(dp_forceField,"no owner");
PRECONDITION(dp_forceField->dimension()==4,"force field has wrong dimension");
PRECONDITION(pos,"bad vector");
unsigned int pid = d_idx*dp_forceField->dimension() + 3;
grad[pid] += d_weight*pos[pid];
}
virtual FourthDimContrib *copy() const { return new FourthDimContrib(*this); };
private:
unsigned int d_idx;
double d_weight;
*/
FourthDimContrib(ForceFields::ForceField *owner, unsigned int idx,
double weight)
: d_idx(idx), d_weight(weight) {
PRECONDITION(owner, "bad force field");
PRECONDITION(owner->dimension() == 4, "force field has wrong dimension");
dp_forceField = owner;
};
//! return the contribution of this contrib to the energy of a given state
double getEnergy(double *pos) const {
PRECONDITION(dp_forceField, "no owner");
PRECONDITION(dp_forceField->dimension() == 4,
"force field has wrong dimension");
PRECONDITION(pos, "bad vector");
unsigned int pid = d_idx * dp_forceField->dimension() + 3;
return d_weight * pos[pid] * pos[pid];
}
//! calculate the contribution of this contrib to the gradient at a given
//state
void getGrad(double *pos, double *grad) const {
PRECONDITION(dp_forceField, "no owner");
PRECONDITION(dp_forceField->dimension() == 4,
"force field has wrong dimension");
PRECONDITION(pos, "bad vector");
unsigned int pid = d_idx * dp_forceField->dimension() + 3;
grad[pid] += d_weight * pos[pid];
}
virtual FourthDimContrib *copy() const {
return new FourthDimContrib(*this);
};
private:
unsigned int d_idx;
double d_weight;
};
}
#endif

View File

@@ -12,62 +12,62 @@
#include "TriangleSmooth.h"
namespace DistGeom {
bool triangleSmoothBounds(BoundsMatPtr boundsMat,double tol) {
return triangleSmoothBounds(boundsMat.get(),tol);
}
bool triangleSmoothBounds(BoundsMatrix *boundsMat,double tol) {
int npt = boundsMat->numRows();
int i, j, k;
double Uik, Lik, Ukj, sumUikUkj, diffLikUjk, diffLjkUik;
for (k = 0; k < npt; k++) {
for (i = 0; i < npt-1; i++) {
if (i == k) {
bool triangleSmoothBounds(BoundsMatPtr boundsMat, double tol) {
return triangleSmoothBounds(boundsMat.get(), tol);
}
bool triangleSmoothBounds(BoundsMatrix *boundsMat, double tol) {
int npt = boundsMat->numRows();
int i, j, k;
double Uik, Lik, Ukj, sumUikUkj, diffLikUjk, diffLjkUik;
for (k = 0; k < npt; k++) {
for (i = 0; i < npt - 1; i++) {
if (i == k) {
continue;
}
Uik = boundsMat->getUpperBound(i, k);
Lik = boundsMat->getLowerBound(i, k);
for (j = i + 1; j < npt; j++) {
if (j == k) {
continue;
}
Uik = boundsMat->getUpperBound(i,k);
Lik = boundsMat->getLowerBound(i,k);
for (j = i+1; j < npt; j++) {
if (j == k) {
continue;
}
Ukj = boundsMat->getUpperBound(k,j);
sumUikUkj = Uik + Ukj;
if (boundsMat->getUpperBound(i,j) > sumUikUkj) {
boundsMat->setUpperBound(i,j, sumUikUkj);
}
diffLikUjk = Lik - Ukj;
diffLjkUik = boundsMat->getLowerBound(j,k) - Uik;
if (boundsMat->getLowerBound(i,j) < diffLikUjk) {
boundsMat->setLowerBound(i,j, diffLikUjk);
} else if (boundsMat->getLowerBound(i,j) < diffLjkUik) {
boundsMat->setLowerBound(i,j, diffLjkUik);
}
double lBound=boundsMat->getLowerBound(i,j);
double uBound=boundsMat->getUpperBound(i,j);
if( tol>0. &&
(lBound-uBound)/lBound>0. &&
(lBound-uBound)/lBound<tol ){
boundsMat->setUpperBound(i,j,lBound);
uBound=lBound;
}
if (lBound - uBound>0.) {
// std::cerr<<std::endl;
// for(unsigned int ii=0;ii<npt;++ii){
// for(unsigned int jj=0;jj<npt;++jj){
// std::cerr<<" "<<std::setprecision(3)<<boundsMat->getVal(ii,jj);
// }
// std::cerr<<std::endl;
// }
// std::cerr<<std::endl;
// std::cerr<<" Fail: "<<i<<"-"<<j<<": " << boundsMat->getLowerBound(i,j) << " " << boundsMat->getUpperBound(i,j) << "\n";
return false;
}
Ukj = boundsMat->getUpperBound(k, j);
sumUikUkj = Uik + Ukj;
if (boundsMat->getUpperBound(i, j) > sumUikUkj) {
boundsMat->setUpperBound(i, j, sumUikUkj);
}
diffLikUjk = Lik - Ukj;
diffLjkUik = boundsMat->getLowerBound(j, k) - Uik;
if (boundsMat->getLowerBound(i, j) < diffLikUjk) {
boundsMat->setLowerBound(i, j, diffLikUjk);
} else if (boundsMat->getLowerBound(i, j) < diffLjkUik) {
boundsMat->setLowerBound(i, j, diffLjkUik);
}
double lBound = boundsMat->getLowerBound(i, j);
double uBound = boundsMat->getUpperBound(i, j);
if (tol > 0. && (lBound - uBound) / lBound > 0. &&
(lBound - uBound) / lBound < tol) {
boundsMat->setUpperBound(i, j, lBound);
uBound = lBound;
}
if (lBound - uBound > 0.) {
// std::cerr<<std::endl;
// for(unsigned int ii=0;ii<npt;++ii){
// for(unsigned int jj=0;jj<npt;++jj){
// std::cerr<<" "<<std::setprecision(3)<<boundsMat->getVal(ii,jj);
// }
// std::cerr<<std::endl;
// }
// std::cerr<<std::endl;
// std::cerr<<" Fail: "<<i<<"-"<<j<<": " <<
// boundsMat->getLowerBound(i,j) << " " <<
// boundsMat->getUpperBound(i,j) << "\n";
return false;
}
}
}
return true;
}
return true;
}
}

View File

@@ -12,24 +12,22 @@
#include "BoundsMatrix.h"
namespace DistGeom {
//! Smooth the upper and lower bound in a metric matrix so that triangle
//! inequality is not violated
/*!
This an implementation of the O(N^3) algorithm given on pages 252-253 of
"Distance Geometry and Molecular Conformation" by G.M.Crippen and T.F.Havel
Research Studies Press, 1988. There are other (slightly) more implementations
(see pages 301-302 in the above book), but that is for later
//! Smooth the upper and lower bound in a metric matrix so that triangle
//! inequality is not violated
/*!
This an implementation of the O(N^3) algorithm given on pages 252-253 of
"Distance Geometry and Molecular Conformation" by G.M.Crippen and T.F.Havel
Research Studies Press, 1988. There are other (slightly) more implementations
(see pages 301-302 in the above book), but that is for later
\param boundsMat A pointer to the distance bounds matrix
\param tol a tolerance (percent) for errors in the smoothing process
\param boundsMat A pointer to the distance bounds matrix
\param tol a tolerance (percent) for errors in the smoothing process
*/
bool triangleSmoothBounds(BoundsMatrix *boundsMat,double tol=0.);
//! \overload
bool triangleSmoothBounds(BoundsMatPtr boundsMat,double tol=0.);
*/
bool triangleSmoothBounds(BoundsMatrix *boundsMat, double tol = 0.);
//! \overload
bool triangleSmoothBounds(BoundsMatPtr boundsMat, double tol = 0.);
}
#endif

View File

@@ -34,149 +34,144 @@
namespace python = boost::python;
namespace RDKit {
bool doTriangleSmoothing(python::object boundsMatArg,double tol){
PyObject *boundsMatObj = boundsMatArg.ptr();
if(!PyArray_Check(boundsMatObj))
throw_value_error("Argument isn't an array");
PyArrayObject *boundsMat=reinterpret_cast<PyArrayObject *>(boundsMatObj);
// get the dimensions of the array
int nrows = boundsMat->dimensions[0];
int ncols = boundsMat->dimensions[1];
if(nrows!=ncols)
throw_value_error("The array has to be square");
if(nrows<=0)
throw_value_error("The array has to have a nonzero size");
if (boundsMat->descr->type_num != PyArray_DOUBLE)
throw_value_error("Only double arrays are currently supported");
bool doTriangleSmoothing(python::object boundsMatArg, double tol) {
PyObject *boundsMatObj = boundsMatArg.ptr();
if (!PyArray_Check(boundsMatObj))
throw_value_error("Argument isn't an array");
unsigned int dSize = nrows*nrows;
double *cData = new double[dSize];
double *inData = reinterpret_cast<double *>(boundsMat->data);
memcpy(static_cast<void *>(cData),
static_cast<const void *>(inData),
dSize*sizeof(double));
DistGeom::BoundsMatrix::DATA_SPTR sdata(cData);
DistGeom::BoundsMatrix bm(nrows,sdata);
PyArrayObject *boundsMat = reinterpret_cast<PyArrayObject *>(boundsMatObj);
// get the dimensions of the array
int nrows = boundsMat->dimensions[0];
int ncols = boundsMat->dimensions[1];
if (nrows != ncols) throw_value_error("The array has to be square");
if (nrows <= 0) throw_value_error("The array has to have a nonzero size");
if (boundsMat->descr->type_num != PyArray_DOUBLE)
throw_value_error("Only double arrays are currently supported");
bool res=DistGeom::triangleSmoothBounds(&bm,tol);
memcpy(static_cast<void *>(inData),
static_cast<const void *>(cData),
dSize*sizeof(double));
return res;
unsigned int dSize = nrows * nrows;
double *cData = new double[dSize];
double *inData = reinterpret_cast<double *>(boundsMat->data);
memcpy(static_cast<void *>(cData), static_cast<const void *>(inData),
dSize * sizeof(double));
DistGeom::BoundsMatrix::DATA_SPTR sdata(cData);
DistGeom::BoundsMatrix bm(nrows, sdata);
bool res = DistGeom::triangleSmoothBounds(&bm, tol);
memcpy(static_cast<void *>(inData), static_cast<const void *>(cData),
dSize * sizeof(double));
return res;
}
PyObject *embedBoundsMatrix(python::object boundsMatArg, int maxIters = 10,
bool randomizeOnFailure = false,
int numZeroFail = 2,
python::list weights = python::list(),
int randomSeed = -1) {
PyObject *boundsMatObj = boundsMatArg.ptr();
if (!PyArray_Check(boundsMatObj))
throw_value_error("Argument isn't an array");
PyArrayObject *boundsMat = reinterpret_cast<PyArrayObject *>(boundsMatObj);
// get the dimensions of the array
unsigned int nrows = boundsMat->dimensions[0];
unsigned int ncols = boundsMat->dimensions[1];
if (nrows != ncols) throw_value_error("The array has to be square");
if (nrows <= 0) throw_value_error("The array has to have a nonzero size");
if (boundsMat->descr->type_num != PyArray_DOUBLE)
throw_value_error("Only double arrays are currently supported");
unsigned int dSize = nrows * nrows;
double *cData = new double[dSize];
double *inData = reinterpret_cast<double *>(boundsMat->data);
memcpy(static_cast<void *>(cData), static_cast<const void *>(inData),
dSize * sizeof(double));
DistGeom::BoundsMatrix::DATA_SPTR sdata(cData);
DistGeom::BoundsMatrix bm(nrows, sdata);
RDGeom::Point3D *positions = new RDGeom::Point3D[nrows];
std::vector<RDGeom::Point *> posPtrs;
for (unsigned int i = 0; i < nrows; i++) {
posPtrs.push_back(&positions[i]);
}
PyObject *embedBoundsMatrix(python::object boundsMatArg,int maxIters=10,
bool randomizeOnFailure=false,int numZeroFail=2,
python::list weights=python::list(),
int randomSeed=-1){
PyObject *boundsMatObj = boundsMatArg.ptr();
if(!PyArray_Check(boundsMatObj))
throw_value_error("Argument isn't an array");
PyArrayObject *boundsMat=reinterpret_cast<PyArrayObject *>(boundsMatObj);
// get the dimensions of the array
unsigned int nrows = boundsMat->dimensions[0];
unsigned int ncols = boundsMat->dimensions[1];
if(nrows!=ncols)
throw_value_error("The array has to be square");
if(nrows<=0)
throw_value_error("The array has to have a nonzero size");
if (boundsMat->descr->type_num != PyArray_DOUBLE)
throw_value_error("Only double arrays are currently supported");
RDNumeric::DoubleSymmMatrix distMat(nrows, 0.0);
unsigned int dSize = nrows*nrows;
double *cData = new double[dSize];
double *inData = reinterpret_cast<double *>(boundsMat->data);
memcpy(static_cast<void *>(cData),
static_cast<const void *>(inData),
dSize*sizeof(double));
// ---- ---- ---- ---- ---- ---- ---- ---- ----
// start the embedding:
bool gotCoords = false;
for (int iter = 0; iter < maxIters && !gotCoords; iter++) {
// pick a random distance matrix
DistGeom::pickRandomDistMat(bm, distMat, randomSeed);
DistGeom::BoundsMatrix::DATA_SPTR sdata(cData);
DistGeom::BoundsMatrix bm(nrows,sdata);
// and embed it:
gotCoords = DistGeom::computeInitialCoords(
distMat, posPtrs, randomizeOnFailure, numZeroFail, randomSeed);
RDGeom::Point3D *positions=new RDGeom::Point3D[nrows];
std::vector<RDGeom::Point *> posPtrs;
for (unsigned int i = 0; i < nrows; i++) {
posPtrs.push_back(&positions[i]);
}
RDNumeric::DoubleSymmMatrix distMat(nrows, 0.0);
// ---- ---- ---- ---- ---- ---- ---- ---- ----
// start the embedding:
bool gotCoords=false;
for(int iter=0;iter<maxIters && !gotCoords;iter++){
// pick a random distance matrix
DistGeom::pickRandomDistMat(bm,distMat,randomSeed);
// and embed it:
gotCoords=DistGeom::computeInitialCoords(distMat,posPtrs,randomizeOnFailure,
numZeroFail,randomSeed);
// update the seed:
if(randomSeed>=0) randomSeed+=iter*999;
}
if(gotCoords){
std::map<std::pair<int,int>,double> weightMap;
unsigned int nElems=PySequence_Size(weights.ptr());
for(unsigned int entryIdx=0;entryIdx<nElems;entryIdx++){
PyObject *entry=PySequence_GetItem(weights.ptr(),entryIdx);
if(!PySequence_Check(entry) || PySequence_Size(entry)!=3){
throw_value_error("weights argument must be a sequence of 3-sequences");
}
int idx1=PyInt_AsLong(PySequence_GetItem(entry,0));
int idx2=PyInt_AsLong(PySequence_GetItem(entry,1));
double w=PyFloat_AsDouble(PySequence_GetItem(entry,2));
weightMap[std::make_pair(idx1,idx2)]=w;
}
DistGeom::VECT_CHIRALSET csets;
ForceFields::ForceField *field = DistGeom::constructForceField(bm,posPtrs,csets,0.0, 0.0,
&weightMap);
CHECK_INVARIANT(field,"could not build dgeom force field");
field->initialize();
if(field->calcEnergy()>1e-5){
int needMore=1;
while(needMore){
needMore=field->minimize();
}
}
delete field;
} else {
throw_value_error("could not embed matrix");
}
// ---- ---- ---- ---- ---- ---- ---- ---- ----
// construct the results matrix:
npy_intp dims[2];
dims[0] = nrows;
dims[1] = 3;
PyArrayObject *res = (PyArrayObject *)PyArray_SimpleNew(2,dims,NPY_DOUBLE);
double *resData=reinterpret_cast<double *>(res->data);
for(unsigned int i=0;i<nrows;i++){
unsigned int iTab=i*3;
for (unsigned int j = 0; j < 3; ++j) {
resData[iTab + j]=positions[i][j]; //.x;
}
}
delete [] positions;
return PyArray_Return(res);
// update the seed:
if (randomSeed >= 0) randomSeed += iter * 999;
}
if (gotCoords) {
std::map<std::pair<int, int>, double> weightMap;
unsigned int nElems = PySequence_Size(weights.ptr());
for (unsigned int entryIdx = 0; entryIdx < nElems; entryIdx++) {
PyObject *entry = PySequence_GetItem(weights.ptr(), entryIdx);
if (!PySequence_Check(entry) || PySequence_Size(entry) != 3) {
throw_value_error("weights argument must be a sequence of 3-sequences");
}
int idx1 = PyInt_AsLong(PySequence_GetItem(entry, 0));
int idx2 = PyInt_AsLong(PySequence_GetItem(entry, 1));
double w = PyFloat_AsDouble(PySequence_GetItem(entry, 2));
weightMap[std::make_pair(idx1, idx2)] = w;
}
DistGeom::VECT_CHIRALSET csets;
ForceFields::ForceField *field =
DistGeom::constructForceField(bm, posPtrs, csets, 0.0, 0.0, &weightMap);
CHECK_INVARIANT(field, "could not build dgeom force field");
field->initialize();
if (field->calcEnergy() > 1e-5) {
int needMore = 1;
while (needMore) {
needMore = field->minimize();
}
}
delete field;
} else {
throw_value_error("could not embed matrix");
}
// ---- ---- ---- ---- ---- ---- ---- ---- ----
// construct the results matrix:
npy_intp dims[2];
dims[0] = nrows;
dims[1] = 3;
PyArrayObject *res = (PyArrayObject *)PyArray_SimpleNew(2, dims, NPY_DOUBLE);
double *resData = reinterpret_cast<double *>(res->data);
for (unsigned int i = 0; i < nrows; i++) {
unsigned int iTab = i * 3;
for (unsigned int j = 0; j < 3; ++j) {
resData[iTab + j] = positions[i][j]; //.x;
}
}
delete[] positions;
return PyArray_Return(res);
}
}
BOOST_PYTHON_MODULE(DistGeom) {
python::scope().attr("__doc__") =
"Module containing functions for basic distance geometry operations"
;
"Module containing functions for basic distance geometry operations";
rdkit_import_array();
python::register_exception_translator<ValueErrorException>(&translate_value_error);
python::register_exception_translator<ValueErrorException>(
&translate_value_error);
std::string docString;
docString = "Do triangle smoothing on a bounds matrix\n\n\
docString =
"Do triangle smoothing on a bounds matrix\n\n\
\n\
ARGUMENTS:\n\n\
- mat: a square Numeric array of doubles containing the bounds matrix, this matrix\n\
@@ -186,11 +181,11 @@ BOOST_PYTHON_MODULE(DistGeom) {
a boolean indicating whether or not the smoothing worked.\n\
\n";
python::def("DoTriangleSmoothing", RDKit::doTriangleSmoothing,
(python::arg("boundsMatrix"),python::arg("tol")=0.),
(python::arg("boundsMatrix"), python::arg("tol") = 0.),
docString.c_str());
docString = "Embed a bounds matrix and return the coordinates\n\n\
docString =
"Embed a bounds matrix and return the coordinates\n\n\
\n\
ARGUMENTS:\n\n\
- boundsMatrix: a square Numeric array of doubles containing the bounds matrix, this matrix\n\
@@ -205,13 +200,11 @@ BOOST_PYTHON_MODULE(DistGeom) {
RETURNS:\n\n\
a Numeric array of doubles with the coordinates\n\
\n";
python::def("EmbedBoundsMatrix", RDKit::embedBoundsMatrix,
(python::arg("boundsMatrix"),python::arg("maxIters")=10,
python::arg("randomizeOnFailure")=false,
python::arg("numZeroFail")=2,
python::arg("weights")=python::list(),
python::arg("randomSeed")=-1),
docString.c_str());
python::def(
"EmbedBoundsMatrix", RDKit::embedBoundsMatrix,
(python::arg("boundsMatrix"), python::arg("maxIters") = 10,
python::arg("randomizeOnFailure") = false,
python::arg("numZeroFail") = 2, python::arg("weights") = python::list(),
python::arg("randomSeed") = -1),
docString.c_str());
}

View File

@@ -9,7 +9,6 @@
// of the RDKit source tree.
//
#include "BoundsMatrix.h"
#include "TriangleSmooth.h"
#include <iostream>
@@ -25,20 +24,30 @@ using namespace RDNumeric;
void test1() {
// test triangle smoothing
unsigned int npt = 5;
double x= sqrt(3.0);
BoundsMatrix * mmat = new BoundsMatrix(npt);
double x = sqrt(3.0);
BoundsMatrix *mmat = new BoundsMatrix(npt);
mmat->setUpperBound(0, 1, 1.0);
mmat->setLowerBound(0, 1, 1.0);
mmat->setUpperBound(0, 2, x);
mmat->setLowerBound(0, 2, x);
mmat->setUpperBound(0, 3, 10.0);
mmat->setLowerBound(0, 3, 0.0);
mmat->setUpperBound(0, 4, 10.0);
mmat->setLowerBound(0, 4, 0.0);
mmat->setUpperBound(1, 2, 1.0);
mmat->setLowerBound(1, 2, 1.0);
mmat->setUpperBound(1, 3, x);
mmat->setLowerBound(1, 3, x);
mmat->setUpperBound(1, 4, 10.0);
mmat->setLowerBound(1, 4, 0.0);
mmat->setUpperBound(2, 3, 1.0);
mmat->setLowerBound(2, 3, 1.0);
mmat->setUpperBound(2, 4, x);
mmat->setLowerBound(2, 4, x);
mmat->setUpperBound(3, 4, 1.0);
mmat->setLowerBound(3, 4, 1.0);
mmat->setUpperBound(0, 1, 1.0); mmat->setLowerBound(0, 1, 1.0);
mmat->setUpperBound(0, 2, x); mmat->setLowerBound(0, 2, x);
mmat->setUpperBound(0, 3, 10.0); mmat->setLowerBound(0, 3, 0.0);
mmat->setUpperBound(0, 4, 10.0); mmat->setLowerBound(0, 4, 0.0);
mmat->setUpperBound(1, 2, 1.0); mmat->setLowerBound(1, 2, 1.0);
mmat->setUpperBound(1, 3, x); mmat->setLowerBound(1, 3, x);
mmat->setUpperBound(1, 4, 10.0); mmat->setLowerBound(1, 4, 0.0);
mmat->setUpperBound(2, 3, 1.0); mmat->setLowerBound(2, 3, 1.0);
mmat->setUpperBound(2, 4, x); mmat->setLowerBound(2, 4, x);
mmat->setUpperBound(3, 4, 1.0); mmat->setLowerBound(3, 4, 1.0);
BoundsMatPtr mptr(mmat);
triangleSmoothBounds(mptr);
@@ -66,8 +75,8 @@ void test1() {
DoubleSymmMatrix dmat(npt, 0.0);
RDKit::rng_type generator(42u);
generator.seed(100);
RDKit::uniform_double distrib(0,1.0);
RDKit::double_source_type rng(generator,distrib);
RDKit::uniform_double distrib(0, 1.0);
RDKit::double_source_type rng(generator, distrib);
pickRandomDistMat(*mmat, dmat, rng);
double sumElem = 0.0;
@@ -79,18 +88,24 @@ void test1() {
void testIssue216() {
RDNumeric::DoubleSymmMatrix dmat(4);
dmat.setVal(0,0, 0.0); dmat.setVal(0,1, 1.0); dmat.setVal(0,2, 1.0); dmat.setVal(0,3, 1.0);
dmat.setVal(1,1, 0.0); dmat.setVal(1,2, 1.0); dmat.setVal(1,3, 1.0);
dmat.setVal(2,2, 0.0); dmat.setVal(2,3, 1.0);
dmat.setVal(3,3, 0.0);
dmat.setVal(0, 0, 0.0);
dmat.setVal(0, 1, 1.0);
dmat.setVal(0, 2, 1.0);
dmat.setVal(0, 3, 1.0);
dmat.setVal(1, 1, 0.0);
dmat.setVal(1, 2, 1.0);
dmat.setVal(1, 3, 1.0);
dmat.setVal(2, 2, 0.0);
dmat.setVal(2, 3, 1.0);
dmat.setVal(3, 3, 0.0);
std::cout << dmat;
RDGeom::PointPtrVect pos;
for (int i = 0; i < 4; i++) {
RDGeom::Point3D *pt = new RDGeom::Point3D();
pos.push_back(pt);
}
bool gotCoords = DistGeom::computeInitialCoords(dmat, pos);
CHECK_INVARIANT(gotCoords, "");
@@ -111,6 +126,7 @@ int main() {
std::cout << "***********************************************************\n";
std::cout << " testIssue216 \n";
testIssue216();
std::cout << "***********************************************************\n\n";
std::cout
<< "***********************************************************\n\n";
return 0;
}

View File

@@ -9,6 +9,4 @@
// of the RDKit source tree.
//
#include <Features/Feature.h>
namespace RDFeatures {
}
namespace RDFeatures {}

View File

@@ -14,81 +14,82 @@
#include <Geometry/point.h>
namespace RDFeatures {
template <typename FAMILYMARKER, typename TYPEMARKER=FAMILYMARKER, typename LOCTYPE=RDGeom::Point3D>
class ExplicitFeature {
public:
ExplicitFeature() {};
explicit ExplicitFeature(const FAMILYMARKER &f,const TYPEMARKER &t) :
d_family(f), d_type(t) {};
ExplicitFeature(const FAMILYMARKER &f,const TYPEMARKER &t,const LOCTYPE &loc) :
d_family(f), d_type(t), d_loc(loc){};
template <typename FAMILYMARKER, typename TYPEMARKER = FAMILYMARKER,
typename LOCTYPE = RDGeom::Point3D>
class ExplicitFeature {
public:
ExplicitFeature(){};
explicit ExplicitFeature(const FAMILYMARKER &f, const TYPEMARKER &t)
: d_family(f), d_type(t){};
ExplicitFeature(const FAMILYMARKER &f, const TYPEMARKER &t,
const LOCTYPE &loc)
: d_family(f), d_type(t), d_loc(loc){};
const FAMILYMARKER &getFamily() const { return d_family; };
void setFamily(const FAMILYMARKER &f) { d_family=f; };
const FAMILYMARKER &getFamily() const { return d_family; };
void setFamily(const FAMILYMARKER &f) { d_family = f; };
const TYPEMARKER &getType() const { return d_type; };
void setType(const TYPEMARKER &t) { d_type=t; };
const TYPEMARKER &getType() const { return d_type; };
void setType(const TYPEMARKER &t) { d_type = t; };
const LOCTYPE &getLoc() const { return d_loc; };
void setLoc(const LOCTYPE &loc) { d_loc=loc; };
const LOCTYPE &getLoc() const { return d_loc; };
void setLoc(const LOCTYPE &loc) { d_loc = loc; };
const std::vector<LOCTYPE> &getDirs() const { return d_dirs; };
std::vector<LOCTYPE> &getDirs() { return d_dirs; };
private:
FAMILYMARKER d_family;
TYPEMARKER d_type;
LOCTYPE d_loc;
std::vector<LOCTYPE> d_dirs;
};
const std::vector<LOCTYPE> &getDirs() const { return d_dirs; };
std::vector<LOCTYPE> &getDirs() { return d_dirs; };
private:
FAMILYMARKER d_family;
TYPEMARKER d_type;
LOCTYPE d_loc;
std::vector<LOCTYPE> d_dirs;
};
template <typename FAMILYMARKER, typename TYPEMARKER=FAMILYMARKER, typename LOCTYPE=RDGeom::Point3D>
class ImplicitFeature {
public:
ImplicitFeature() : d_weightSum(0.0) {};
explicit ImplicitFeature(const FAMILYMARKER &f,const TYPEMARKER &t) :
d_weightSum(0.0), d_family(f), d_type(t) {};
template <typename FAMILYMARKER, typename TYPEMARKER = FAMILYMARKER,
typename LOCTYPE = RDGeom::Point3D>
class ImplicitFeature {
public:
ImplicitFeature() : d_weightSum(0.0){};
explicit ImplicitFeature(const FAMILYMARKER &f, const TYPEMARKER &t)
: d_weightSum(0.0), d_family(f), d_type(t){};
const FAMILYMARKER &getFamily() const { return d_family; };
void setFamily(const FAMILYMARKER &f) { d_family=f; };
const FAMILYMARKER &getFamily() const { return d_family; };
void setFamily(const FAMILYMARKER &f) { d_family = f; };
const TYPEMARKER &getType() const { return d_type; };
void setType(const TYPEMARKER &t) { d_type=t; };
const TYPEMARKER &getType() const { return d_type; };
void setType(const TYPEMARKER &t) { d_type = t; };
LOCTYPE getLoc() const {
PRECONDITION(d_weights.size()==d_locs.size(),"weight/locs mismatch");
LOCTYPE accum;
for(unsigned int i=0;i<d_weights.size();i++){
LOCTYPE tmp=*d_locs[i];
tmp *= d_weights[i]/d_weightSum;
accum += tmp;
}
return accum;
};
void addPoint(const LOCTYPE *p,double weight=1.0){
d_locs.push_back(p);
d_weights.push_back(weight);
d_weightSum += weight;
LOCTYPE getLoc() const {
PRECONDITION(d_weights.size() == d_locs.size(), "weight/locs mismatch");
LOCTYPE accum;
for (unsigned int i = 0; i < d_weights.size(); i++) {
LOCTYPE tmp = *d_locs[i];
tmp *= d_weights[i] / d_weightSum;
accum += tmp;
}
void reset() {
d_locs.clear();
d_weights.clear();
d_weightSum=0.0;
}
const std::vector<LOCTYPE> &getDirs() const { return d_dirs; };
std::vector<LOCTYPE> &getDirs() { return d_dirs; };
private:
double d_weightSum;
FAMILYMARKER d_family;
TYPEMARKER d_type;
std::vector<double> d_weights;
std::vector<const LOCTYPE *> d_locs;
// FIX: add something correct for directions
std::vector<LOCTYPE> d_dirs;
return accum;
};
void addPoint(const LOCTYPE *p, double weight = 1.0) {
d_locs.push_back(p);
d_weights.push_back(weight);
d_weightSum += weight;
}
void reset() {
d_locs.clear();
d_weights.clear();
d_weightSum = 0.0;
}
const std::vector<LOCTYPE> &getDirs() const { return d_dirs; };
std::vector<LOCTYPE> &getDirs() { return d_dirs; };
private:
double d_weightSum;
FAMILYMARKER d_family;
TYPEMARKER d_type;
std::vector<double> d_weights;
std::vector<const LOCTYPE *> d_locs;
// FIX: add something correct for directions
std::vector<LOCTYPE> d_dirs;
};
}
#endif

View File

@@ -30,127 +30,124 @@ typedef enum {
grnType,
} TypeMarker;
void test1(){
void test1() {
std::cerr << "-------------------------------------" << std::endl;
std::cerr << "Basics for ExplicitFeatures." << std::endl;
ExplicitFeature<TypeMarker> f1;
f1.setFamily(bazType);
TEST_ASSERT(f1.getFamily()==bazType);
TEST_ASSERT(f1.getFamily() == bazType);
f1.setType(grnType);
TEST_ASSERT(f1.getType()==grnType);
TEST_ASSERT(feq(f1.getLoc().x,0.0));
TEST_ASSERT(feq(f1.getLoc().y,0.0));
TEST_ASSERT(feq(f1.getLoc().z,0.0));
TEST_ASSERT(f1.getDirs().size()==0);
TEST_ASSERT(f1.getType() == grnType);
TEST_ASSERT(feq(f1.getLoc().x, 0.0));
TEST_ASSERT(feq(f1.getLoc().y, 0.0));
TEST_ASSERT(feq(f1.getLoc().z, 0.0));
TEST_ASSERT(f1.getDirs().size() == 0);
f1 = ExplicitFeature<TypeMarker>(barType,fooType);
TEST_ASSERT(f1.getFamily()==barType);
TEST_ASSERT(f1.getType()==fooType);
TEST_ASSERT(feq(f1.getLoc().x,0.0));
TEST_ASSERT(feq(f1.getLoc().y,0.0));
TEST_ASSERT(feq(f1.getLoc().z,0.0));
TEST_ASSERT(f1.getDirs().size()==0);
f1 = ExplicitFeature<TypeMarker>(barType, fooType);
TEST_ASSERT(f1.getFamily() == barType);
TEST_ASSERT(f1.getType() == fooType);
TEST_ASSERT(feq(f1.getLoc().x, 0.0));
TEST_ASSERT(feq(f1.getLoc().y, 0.0));
TEST_ASSERT(feq(f1.getLoc().z, 0.0));
TEST_ASSERT(f1.getDirs().size() == 0);
f1 = ExplicitFeature<TypeMarker>(barType,fooType,Point3D(1.0,2.0,3.0));
TEST_ASSERT(f1.getFamily()==barType);
TEST_ASSERT(f1.getType()==fooType);
TEST_ASSERT(feq(f1.getLoc().x,1.0));
TEST_ASSERT(feq(f1.getLoc().y,2.0));
TEST_ASSERT(feq(f1.getLoc().z,3.0));
TEST_ASSERT(f1.getDirs().size()==0);
f1 = ExplicitFeature<TypeMarker>(barType, fooType, Point3D(1.0, 2.0, 3.0));
TEST_ASSERT(f1.getFamily() == barType);
TEST_ASSERT(f1.getType() == fooType);
TEST_ASSERT(feq(f1.getLoc().x, 1.0));
TEST_ASSERT(feq(f1.getLoc().y, 2.0));
TEST_ASSERT(feq(f1.getLoc().z, 3.0));
TEST_ASSERT(f1.getDirs().size() == 0);
f1.setLoc(Point3D(-1.0,-2.0,-3.0));
TEST_ASSERT(feq(f1.getLoc().x,-1.0));
TEST_ASSERT(feq(f1.getLoc().y,-2.0));
TEST_ASSERT(feq(f1.getLoc().z,-3.0));
TEST_ASSERT(f1.getDirs().size()==0);
f1.setLoc(Point3D(-1.0, -2.0, -3.0));
TEST_ASSERT(feq(f1.getLoc().x, -1.0));
TEST_ASSERT(feq(f1.getLoc().y, -2.0));
TEST_ASSERT(feq(f1.getLoc().z, -3.0));
TEST_ASSERT(f1.getDirs().size() == 0);
std::cerr << " done" << std::endl;
}
void test2(){
void test2() {
std::cerr << "-------------------------------------" << std::endl;
std::cerr << "Basics for ImplicitFeatures." << std::endl;
ImplicitFeature<TypeMarker> f1;
f1.setFamily(fooType);
TEST_ASSERT(f1.getType()==fooType);
TEST_ASSERT(f1.getType() == fooType);
f1.setType(grnType);
TEST_ASSERT(f1.getType()==grnType);
TEST_ASSERT(feq(f1.getLoc().x,0.0));
TEST_ASSERT(feq(f1.getLoc().y,0.0));
TEST_ASSERT(feq(f1.getLoc().z,0.0));
TEST_ASSERT(f1.getDirs().size()==0);
TEST_ASSERT(f1.getType() == grnType);
TEST_ASSERT(feq(f1.getLoc().x, 0.0));
TEST_ASSERT(feq(f1.getLoc().y, 0.0));
TEST_ASSERT(feq(f1.getLoc().z, 0.0));
TEST_ASSERT(f1.getDirs().size() == 0);
f1 = ImplicitFeature<TypeMarker>(barType,fooType);
TEST_ASSERT(f1.getFamily()==barType);
TEST_ASSERT(f1.getType()==barType);
TEST_ASSERT(feq(f1.getLoc().x,0.0));
TEST_ASSERT(feq(f1.getLoc().y,0.0));
TEST_ASSERT(feq(f1.getLoc().z,0.0));
TEST_ASSERT(f1.getDirs().size()==0);
f1 = ImplicitFeature<TypeMarker>(barType, fooType);
TEST_ASSERT(f1.getFamily() == barType);
TEST_ASSERT(f1.getType() == barType);
TEST_ASSERT(feq(f1.getLoc().x, 0.0));
TEST_ASSERT(feq(f1.getLoc().y, 0.0));
TEST_ASSERT(feq(f1.getLoc().z, 0.0));
TEST_ASSERT(f1.getDirs().size() == 0);
Point3D p1(0,0,0),p2(1,0,0),p3(0,1,0);
Point3D p1(0, 0, 0), p2(1, 0, 0), p3(0, 1, 0);
f1.addPoint(&p1);
f1.addPoint(&p2);
TEST_ASSERT(feq(f1.getLoc().x,0.50));
TEST_ASSERT(feq(f1.getLoc().y,0.0));
TEST_ASSERT(feq(f1.getLoc().z,0.0));
TEST_ASSERT(feq(f1.getLoc().x, 0.50));
TEST_ASSERT(feq(f1.getLoc().y, 0.0));
TEST_ASSERT(feq(f1.getLoc().z, 0.0));
f1.addPoint(&p3);
TEST_ASSERT(feq(f1.getLoc().x,0.3333));
TEST_ASSERT(feq(f1.getLoc().y,0.3333));
TEST_ASSERT(feq(f1.getLoc().z,0.0));
TEST_ASSERT(feq(f1.getLoc().x, 0.3333));
TEST_ASSERT(feq(f1.getLoc().y, 0.3333));
TEST_ASSERT(feq(f1.getLoc().z, 0.0));
f1.reset();
TEST_ASSERT(feq(f1.getLoc().x,0.0));
TEST_ASSERT(feq(f1.getLoc().y,0.0));
TEST_ASSERT(feq(f1.getLoc().z,0.0));
TEST_ASSERT(feq(f1.getLoc().x, 0.0));
TEST_ASSERT(feq(f1.getLoc().y, 0.0));
TEST_ASSERT(feq(f1.getLoc().z, 0.0));
std::cerr << " done" << std::endl;
}
void test3(){
void test3() {
std::cerr << "-------------------------------------" << std::endl;
std::cerr << "ExplicitFeatures 2D, string type." << std::endl;
typedef ExplicitFeature<std::string,std::string,Point2D> LocalFeature;
typedef ExplicitFeature<std::string, std::string, Point2D> LocalFeature;
LocalFeature f1;
f1.setType("foo");
TEST_ASSERT(f1.getType()=="foo");
TEST_ASSERT(f1.getType() == "foo");
f1.setFamily("foob");
TEST_ASSERT(f1.getFamily()=="foob");
TEST_ASSERT(feq(f1.getLoc().x,0.0));
TEST_ASSERT(feq(f1.getLoc().y,0.0));
TEST_ASSERT(f1.getDirs().size()==0);
TEST_ASSERT(f1.getFamily() == "foob");
TEST_ASSERT(feq(f1.getLoc().x, 0.0));
TEST_ASSERT(feq(f1.getLoc().y, 0.0));
TEST_ASSERT(f1.getDirs().size() == 0);
f1 = LocalFeature("foo","bar");
TEST_ASSERT(f1.getFamily()=="bar");
TEST_ASSERT(f1.getType()=="bar");
TEST_ASSERT(feq(f1.getLoc().x,0.0));
TEST_ASSERT(feq(f1.getLoc().y,0.0));
TEST_ASSERT(f1.getDirs().size()==0);
f1 = LocalFeature("foo", "bar");
TEST_ASSERT(f1.getFamily() == "bar");
TEST_ASSERT(f1.getType() == "bar");
TEST_ASSERT(feq(f1.getLoc().x, 0.0));
TEST_ASSERT(feq(f1.getLoc().y, 0.0));
TEST_ASSERT(f1.getDirs().size() == 0);
f1 = LocalFeature("grm","grn",Point2D(1.0,2.0));
TEST_ASSERT(f1.getFamily()=="grm");
TEST_ASSERT(f1.getType()=="grn");
TEST_ASSERT(feq(f1.getLoc().x,1.0));
TEST_ASSERT(feq(f1.getLoc().y,2.0));
TEST_ASSERT(f1.getDirs().size()==0);
f1 = LocalFeature("grm", "grn", Point2D(1.0, 2.0));
TEST_ASSERT(f1.getFamily() == "grm");
TEST_ASSERT(f1.getType() == "grn");
TEST_ASSERT(feq(f1.getLoc().x, 1.0));
TEST_ASSERT(feq(f1.getLoc().y, 2.0));
TEST_ASSERT(f1.getDirs().size() == 0);
f1.setLoc(Point2D(-1.0, -2.0));
TEST_ASSERT(feq(f1.getLoc().x, -1.0));
TEST_ASSERT(feq(f1.getLoc().y, -2.0));
TEST_ASSERT(f1.getDirs().size() == 0);
f1.setLoc(Point2D(-1.0,-2.0));
TEST_ASSERT(feq(f1.getLoc().x,-1.0));
TEST_ASSERT(feq(f1.getLoc().y,-2.0));
TEST_ASSERT(f1.getDirs().size()==0);
std::cerr << " done" << std::endl;
}
void testParser(){
void testParser() {
std::cerr << "-------------------------------------" << std::endl;
std::cerr << "parser testing." << std::endl;
@@ -158,29 +155,23 @@ void testParser(){
int n;
bool ok;
text="p1";
ok=parse(text.c_str(),
( ch_p('p') >> int_p[assign_a(n)] ),space_p).full;
text = "p1";
ok = parse(text.c_str(), (ch_p('p') >> int_p[assign_a(n)]), space_p).full;
TEST_ASSERT(ok);
TEST_ASSERT(n==1);
TEST_ASSERT(n == 1);
text="p12";
ok=parse(text.c_str(),
( ch_p('p') >> int_p[assign_a(n)] ),space_p).full;
text = "p12";
ok = parse(text.c_str(), (ch_p('p') >> int_p[assign_a(n)]), space_p).full;
TEST_ASSERT(ok);
TEST_ASSERT(n==12);
text="p2 + p3";
ok=parse(text.c_str(),
( ch_p('p') >> int_p[assign_a(n)] ),space_p).full;
TEST_ASSERT(n == 12);
text = "p2 + p3";
ok = parse(text.c_str(), (ch_p('p') >> int_p[assign_a(n)]), space_p).full;
TEST_ASSERT(!ok);
std::cerr << " done" << std::endl;
}
int main(){
int main() {
#if 0
test1();
test2();

View File

@@ -11,30 +11,29 @@
#define __RD_FFCONTRIB_H__
namespace ForceFields {
class ForceField;
class ForceField;
//! abstract base class for contributions to ForceFields
class ForceFieldContrib {
public:
friend class ForceField;
//! abstract base class for contributions to ForceFields
class ForceFieldContrib {
public:
friend class ForceField;
ForceFieldContrib() : dp_forceField(0) {};
ForceFieldContrib(ForceFields::ForceField *owner) : dp_forceField(owner) {};
virtual ~ForceFieldContrib() {};
ForceFieldContrib() : dp_forceField(0){};
ForceFieldContrib(ForceFields::ForceField *owner) : dp_forceField(owner){};
virtual ~ForceFieldContrib(){};
//! returns our contribution to the energy of a position
virtual double getEnergy(double *pos) const = 0;
//! returns our contribution to the energy of a position
virtual double getEnergy(double *pos) const = 0;
//! calculates our contribution to the gradients of a position
virtual void getGrad(double *pos,double *grad) const = 0;
//! calculates our contribution to the gradients of a position
virtual void getGrad(double *pos, double *grad) const = 0;
//! return a copy
virtual ForceFieldContrib *copy() const = 0;
protected:
ForceField *dp_forceField; //!< our owning ForceField
};
//! return a copy
virtual ForceFieldContrib *copy() const = 0;
protected:
ForceField *dp_forceField; //!< our owning ForceField
};
}
#endif

View File

@@ -15,291 +15,295 @@
#include <Numerics/Optimizer/BFGSOpt.h>
namespace ForceFieldsHelper {
class calcEnergy {
public:
calcEnergy(ForceFields::ForceField *ffHolder) :
mp_ffHolder(ffHolder) {} ;
double operator() (double *pos) const {
return mp_ffHolder->calcEnergy(pos);
}
private:
ForceFields::ForceField *mp_ffHolder;
};
class calcEnergy {
public:
calcEnergy(ForceFields::ForceField *ffHolder) : mp_ffHolder(ffHolder){};
double operator()(double *pos) const { return mp_ffHolder->calcEnergy(pos); }
class calcGradient {
public:
calcGradient(ForceFields::ForceField *ffHolder) :
mp_ffHolder(ffHolder) {} ;
double operator() (double *pos,double *grad) const {
double res = 1.0;
// the contribs to the gradient function use +=, so we need
// to zero the grad out before moving on:
for(unsigned int i=0;i<mp_ffHolder->numPoints()*mp_ffHolder->dimension();i++){
grad[i] = 0.0;
}
mp_ffHolder->calcGrad(pos,grad);
private:
ForceFields::ForceField *mp_ffHolder;
};
class calcGradient {
public:
calcGradient(ForceFields::ForceField *ffHolder) : mp_ffHolder(ffHolder){};
double operator()(double *pos, double *grad) const {
double res = 1.0;
// the contribs to the gradient function use +=, so we need
// to zero the grad out before moving on:
for (unsigned int i = 0;
i < mp_ffHolder->numPoints() * mp_ffHolder->dimension(); i++) {
grad[i] = 0.0;
}
mp_ffHolder->calcGrad(pos, grad);
#if 1
// FIX: this hack reduces the gradients so that the
// minimizer is more efficient.
double maxGrad=-1e8;
double gradScale=0.1;
for(unsigned int i=0;i<mp_ffHolder->numPoints()*mp_ffHolder->dimension();i++){
grad[i] *= gradScale;
if(grad[i]>maxGrad) maxGrad=grad[i];
}
// this is a continuation of the same hack to avoid
// some potential numeric instabilities:
if(maxGrad>10.0){
while(maxGrad*gradScale>10.0){
gradScale*=.5;
}
for(unsigned int i=0;i<mp_ffHolder->numPoints()*mp_ffHolder->dimension();i++){
grad[i] *= gradScale;
}
}
res=gradScale;
#endif
return res;
// FIX: this hack reduces the gradients so that the
// minimizer is more efficient.
double maxGrad = -1e8;
double gradScale = 0.1;
for (unsigned int i = 0;
i < mp_ffHolder->numPoints() * mp_ffHolder->dimension(); i++) {
grad[i] *= gradScale;
if (grad[i] > maxGrad) maxGrad = grad[i];
}
private:
ForceFields::ForceField *mp_ffHolder;
};
// this is a continuation of the same hack to avoid
// some potential numeric instabilities:
if (maxGrad > 10.0) {
while (maxGrad * gradScale > 10.0) {
gradScale *= .5;
}
for (unsigned int i = 0;
i < mp_ffHolder->numPoints() * mp_ffHolder->dimension(); i++) {
grad[i] *= gradScale;
}
}
res = gradScale;
#endif
return res;
}
private:
ForceFields::ForceField *mp_ffHolder;
};
}
namespace ForceFields {
ForceField::~ForceField(){
d_numPoints=0;
d_positions.clear();
d_contribs.clear();
delete [] dp_distMat;
dp_distMat=0;
ForceField::~ForceField() {
d_numPoints = 0;
d_positions.clear();
d_contribs.clear();
delete[] dp_distMat;
dp_distMat = 0;
}
ForceField::ForceField(const ForceField &other)
: d_dimension(other.d_dimension),
df_init(false),
d_numPoints(other.d_numPoints),
dp_distMat(0) {
d_contribs.clear();
BOOST_FOREACH (const ContribPtr &contrib, other.d_contribs) {
ForceFieldContrib *ncontrib = contrib->copy();
ncontrib->dp_forceField = this;
d_contribs.push_back(ContribPtr(ncontrib));
}
};
ForceField::ForceField(const ForceField &other) : d_dimension(other.d_dimension),
df_init(false),
d_numPoints(other.d_numPoints),
dp_distMat(0) {
d_contribs.clear();
BOOST_FOREACH(const ContribPtr &contrib,other.d_contribs){
ForceFieldContrib *ncontrib=contrib->copy();
ncontrib->dp_forceField=this;
d_contribs.push_back(ContribPtr(ncontrib));
}
};
double ForceField::distance(unsigned int i,unsigned int j,double *pos) {
PRECONDITION(df_init,"not initialized");
URANGE_CHECK(i,d_numPoints-1);
URANGE_CHECK(j,d_numPoints-1);
if(j<i){
int tmp=j;
j = i;
i = tmp;
}
unsigned int idx=i+j*(j+1)/2;
CHECK_INVARIANT(idx<d_matSize,"Bad index");
double &res=dp_distMat[idx];
if(res<0.0){
// we need to calculate this distance:
if(!pos){
res = 0.0;
for (unsigned int idx = 0; idx < d_dimension; ++idx) {
double tmp = (*(this->positions()[i]))[idx] - (*(this->positions()[j]))[idx];
res += tmp*tmp;
}
} else {
res = 0.0;
for(unsigned int idx=0;idx<d_dimension;idx++){
double tmp=pos[d_dimension*i+idx]-pos[d_dimension*j+idx];
res += tmp*tmp;
}
}
res = sqrt(res);
dp_distMat[idx]=res;
}
return res;
double ForceField::distance(unsigned int i, unsigned int j, double *pos) {
PRECONDITION(df_init, "not initialized");
URANGE_CHECK(i, d_numPoints - 1);
URANGE_CHECK(j, d_numPoints - 1);
if (j < i) {
int tmp = j;
j = i;
i = tmp;
}
double ForceField::distance(unsigned int i,unsigned int j,double *pos) const {
PRECONDITION(df_init,"not initialized");
URANGE_CHECK(i,d_numPoints-1);
URANGE_CHECK(j,d_numPoints-1);
if(j<i){
int tmp=j;
j = i;
i = tmp;
}
double res;
if(!pos){
unsigned int idx = i + j * (j + 1) / 2;
CHECK_INVARIANT(idx < d_matSize, "Bad index");
double &res = dp_distMat[idx];
if (res < 0.0) {
// we need to calculate this distance:
if (!pos) {
res = 0.0;
for (unsigned int idx = 0; idx < d_dimension; ++idx) {
double tmp = (*(this->positions()[i]))[idx] - (*(this->positions()[j]))[idx];
res += tmp*tmp;
double tmp =
(*(this->positions()[i]))[idx] - (*(this->positions()[j]))[idx];
res += tmp * tmp;
}
} else {
res = 0.0;
for(unsigned int idx=0;idx<d_dimension;idx++){
double tmp=pos[d_dimension*i+idx]-pos[d_dimension*j+idx];
res += tmp*tmp;
for (unsigned int idx = 0; idx < d_dimension; idx++) {
double tmp = pos[d_dimension * i + idx] - pos[d_dimension * j + idx];
res += tmp * tmp;
}
}
res = sqrt(res);
return res;
dp_distMat[idx] = res;
}
void ForceField::initialize(){
// clean up if we have used this already:
df_init=false;
delete [] dp_distMat;
dp_distMat=0;
d_numPoints = d_positions.size();
d_matSize=d_numPoints*(d_numPoints+1)/2;
dp_distMat = new double[d_matSize];
this->initDistanceMatrix();
df_init=true;
}
int ForceField::minimize(unsigned int maxIts,double forceTol,double energyTol){
PRECONDITION(df_init,"not initialized");
PRECONDITION(static_cast<unsigned int>(d_numPoints)==d_positions.size(),"size mismatch");
if(d_contribs.empty()) return 0;
unsigned int numIters=0;
unsigned int dim=this->d_numPoints*d_dimension;
double finalForce;
double *points=new double[dim];
this->scatter(points);
ForceFieldsHelper::calcEnergy eCalc(this);
ForceFieldsHelper::calcGradient gCalc(this);
int res = BFGSOpt::minimize(dim,points,forceTol,numIters,finalForce,
eCalc,gCalc,
energyTol,maxIts);
this->gather(points);
delete [] points;
return res;
}
double ForceField::calcEnergy() const{
PRECONDITION(df_init,"not initialized");
double res = 0.0;
if(d_contribs.empty()) return res;
unsigned int N = d_positions.size();
double *pos = new double[d_dimension*N];
this->scatter(pos);
// now loop over the contribs
for(ContribPtrVect::const_iterator contrib=d_contribs.begin();
contrib != d_contribs.end();contrib++){
res += (*contrib)->getEnergy(pos);
}
delete [] pos;
return res;
}
double ForceField::calcEnergy(double *pos){
PRECONDITION(df_init,"not initialized");
PRECONDITION(pos,"bad position vector");
double res = 0.0;
this->initDistanceMatrix();
if(d_contribs.empty()) return res;
// now loop over the contribs
for(ContribPtrVect::const_iterator contrib=d_contribs.begin();
contrib != d_contribs.end();contrib++){
double E=(*contrib)->getEnergy(pos);
res += E;
}
return res;
}
void ForceField::calcGrad(double *grad) const {
PRECONDITION(df_init,"not initialized");
PRECONDITION(grad,"bad gradient vector");
if(d_contribs.empty()) return;
unsigned int N = d_positions.size();
double *pos = new double[d_dimension*N];
this->scatter(pos);
for(ContribPtrVect::const_iterator contrib=d_contribs.begin();
contrib != d_contribs.end();contrib++){
(*contrib)->getGrad(pos,grad);
}
// zero out gradient values for any fixed points:
for(INT_VECT::const_iterator it=d_fixedPoints.begin();
it!=d_fixedPoints.end();it++){
CHECK_INVARIANT(static_cast<unsigned int>(*it)<d_numPoints,"bad fixed point index");
unsigned int idx=d_dimension*(*it);
for (unsigned int di = 0; di < this->dimension(); ++di) {
grad[idx+di] = 0.0;
}
}
delete [] pos;
}
void ForceField::calcGrad(double *pos,double *grad) {
PRECONDITION(df_init,"not initialized");
PRECONDITION(pos,"bad position vector");
PRECONDITION(grad,"bad gradient vector");
if(d_contribs.empty()) return;
for(ContribPtrVect::const_iterator contrib=d_contribs.begin();
contrib != d_contribs.end();contrib++){
(*contrib)->getGrad(pos,grad);
}
for(INT_VECT::const_iterator it=d_fixedPoints.begin();
it!=d_fixedPoints.end();it++){
CHECK_INVARIANT(static_cast<unsigned int>(*it)<d_numPoints,"bad fixed point index");
unsigned int idx=d_dimension*(*it);
for (unsigned int di = 0; di < this->dimension(); ++di) {
grad[idx+di] = 0.0;
}
}
}
void ForceField::scatter(double *pos) const {
PRECONDITION(df_init,"not initialized");
PRECONDITION(pos,"bad position vector");
unsigned int tab=0;
for(unsigned int i=0;i<d_positions.size();i++){
for (unsigned int di=0; di < this->dimension(); ++di){
pos[tab+di] = (*d_positions[i])[di]; //->x;
}
tab+=this->dimension();
}
POSTCONDITION(tab==this->dimension()*d_positions.size(),"bad index");
}
void ForceField::gather(double *pos) {
PRECONDITION(df_init,"not initialized");
PRECONDITION(pos,"bad position vector");
unsigned int tab=0;
for(unsigned int i=0;i<d_positions.size();i++){
for (unsigned int di=0; di < this->dimension(); ++di){
(*d_positions[i])[di] = pos[tab+di];
}
tab+=this->dimension();
}
}
void ForceField::initDistanceMatrix(){
PRECONDITION(d_numPoints,"no points");
PRECONDITION(dp_distMat,"no distance matrix");
PRECONDITION(static_cast<unsigned int>(d_numPoints*(d_numPoints+1)/2)<=d_matSize,"matrix size mismatch");
for(unsigned int i=0;i<d_numPoints*(d_numPoints+1)/2;i++){
dp_distMat[i]=-1.0;
}
}
return res;
}
double ForceField::distance(unsigned int i, unsigned int j, double *pos) const {
PRECONDITION(df_init, "not initialized");
URANGE_CHECK(i, d_numPoints - 1);
URANGE_CHECK(j, d_numPoints - 1);
if (j < i) {
int tmp = j;
j = i;
i = tmp;
}
double res;
if (!pos) {
res = 0.0;
for (unsigned int idx = 0; idx < d_dimension; ++idx) {
double tmp =
(*(this->positions()[i]))[idx] - (*(this->positions()[j]))[idx];
res += tmp * tmp;
}
} else {
res = 0.0;
for (unsigned int idx = 0; idx < d_dimension; idx++) {
double tmp = pos[d_dimension * i + idx] - pos[d_dimension * j + idx];
res += tmp * tmp;
}
}
res = sqrt(res);
return res;
}
void ForceField::initialize() {
// clean up if we have used this already:
df_init = false;
delete[] dp_distMat;
dp_distMat = 0;
d_numPoints = d_positions.size();
d_matSize = d_numPoints * (d_numPoints + 1) / 2;
dp_distMat = new double[d_matSize];
this->initDistanceMatrix();
df_init = true;
}
int ForceField::minimize(unsigned int maxIts, double forceTol,
double energyTol) {
PRECONDITION(df_init, "not initialized");
PRECONDITION(static_cast<unsigned int>(d_numPoints) == d_positions.size(),
"size mismatch");
if (d_contribs.empty()) return 0;
unsigned int numIters = 0;
unsigned int dim = this->d_numPoints * d_dimension;
double finalForce;
double *points = new double[dim];
this->scatter(points);
ForceFieldsHelper::calcEnergy eCalc(this);
ForceFieldsHelper::calcGradient gCalc(this);
int res = BFGSOpt::minimize(dim, points, forceTol, numIters, finalForce,
eCalc, gCalc, energyTol, maxIts);
this->gather(points);
delete[] points;
return res;
}
double ForceField::calcEnergy() const {
PRECONDITION(df_init, "not initialized");
double res = 0.0;
if (d_contribs.empty()) return res;
unsigned int N = d_positions.size();
double *pos = new double[d_dimension * N];
this->scatter(pos);
// now loop over the contribs
for (ContribPtrVect::const_iterator contrib = d_contribs.begin();
contrib != d_contribs.end(); contrib++) {
res += (*contrib)->getEnergy(pos);
}
delete[] pos;
return res;
}
double ForceField::calcEnergy(double *pos) {
PRECONDITION(df_init, "not initialized");
PRECONDITION(pos, "bad position vector");
double res = 0.0;
this->initDistanceMatrix();
if (d_contribs.empty()) return res;
// now loop over the contribs
for (ContribPtrVect::const_iterator contrib = d_contribs.begin();
contrib != d_contribs.end(); contrib++) {
double E = (*contrib)->getEnergy(pos);
res += E;
}
return res;
}
void ForceField::calcGrad(double *grad) const {
PRECONDITION(df_init, "not initialized");
PRECONDITION(grad, "bad gradient vector");
if (d_contribs.empty()) return;
unsigned int N = d_positions.size();
double *pos = new double[d_dimension * N];
this->scatter(pos);
for (ContribPtrVect::const_iterator contrib = d_contribs.begin();
contrib != d_contribs.end(); contrib++) {
(*contrib)->getGrad(pos, grad);
}
// zero out gradient values for any fixed points:
for (INT_VECT::const_iterator it = d_fixedPoints.begin();
it != d_fixedPoints.end(); it++) {
CHECK_INVARIANT(static_cast<unsigned int>(*it) < d_numPoints,
"bad fixed point index");
unsigned int idx = d_dimension * (*it);
for (unsigned int di = 0; di < this->dimension(); ++di) {
grad[idx + di] = 0.0;
}
}
delete[] pos;
}
void ForceField::calcGrad(double *pos, double *grad) {
PRECONDITION(df_init, "not initialized");
PRECONDITION(pos, "bad position vector");
PRECONDITION(grad, "bad gradient vector");
if (d_contribs.empty()) return;
for (ContribPtrVect::const_iterator contrib = d_contribs.begin();
contrib != d_contribs.end(); contrib++) {
(*contrib)->getGrad(pos, grad);
}
for (INT_VECT::const_iterator it = d_fixedPoints.begin();
it != d_fixedPoints.end(); it++) {
CHECK_INVARIANT(static_cast<unsigned int>(*it) < d_numPoints,
"bad fixed point index");
unsigned int idx = d_dimension * (*it);
for (unsigned int di = 0; di < this->dimension(); ++di) {
grad[idx + di] = 0.0;
}
}
}
void ForceField::scatter(double *pos) const {
PRECONDITION(df_init, "not initialized");
PRECONDITION(pos, "bad position vector");
unsigned int tab = 0;
for (unsigned int i = 0; i < d_positions.size(); i++) {
for (unsigned int di = 0; di < this->dimension(); ++di) {
pos[tab + di] = (*d_positions[i])[di]; //->x;
}
tab += this->dimension();
}
POSTCONDITION(tab == this->dimension() * d_positions.size(), "bad index");
}
void ForceField::gather(double *pos) {
PRECONDITION(df_init, "not initialized");
PRECONDITION(pos, "bad position vector");
unsigned int tab = 0;
for (unsigned int i = 0; i < d_positions.size(); i++) {
for (unsigned int di = 0; di < this->dimension(); ++di) {
(*d_positions[i])[di] = pos[tab + di];
}
tab += this->dimension();
}
}
void ForceField::initDistanceMatrix() {
PRECONDITION(d_numPoints, "no points");
PRECONDITION(dp_distMat, "no distance matrix");
PRECONDITION(static_cast<unsigned int>(d_numPoints * (d_numPoints + 1) / 2) <=
d_matSize,
"matrix size mismatch");
for (unsigned int i = 0; i < d_numPoints * (d_numPoints + 1) / 2; i++) {
dp_distMat[i] = -1.0;
}
}
}

Some files were not shown because too many files have changed in this diff Show More