mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-03 21:44:30 +08:00
c++ implementation of USR descriptor added
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
rdkit_library(Descriptors
|
||||
Crippen.cpp MolDescriptors.cpp MolSurf.cpp Lipinski.cpp ConnectivityDescriptors.cpp
|
||||
MQN.cpp
|
||||
MQN.cpp USRDescriptor.cpp
|
||||
LINK_LIBRARIES PartialCharges SmilesParse FileParsers Subgraphs SubstructMatch
|
||||
${RDKit_THREAD_LIBS})
|
||||
|
||||
@@ -8,6 +8,7 @@ rdkit_headers(Crippen.h Lipinski.h
|
||||
MolDescriptors.h
|
||||
MolSurf.h
|
||||
ConnectivityDescriptors.h MQN.h
|
||||
USRDescriptor.h
|
||||
DEST GraphMol/Descriptors)
|
||||
|
||||
rdkit_test(testDescriptors test.cpp
|
||||
|
||||
149
Code/GraphMol/Descriptors/USRDescriptor.cpp
Normal file
149
Code/GraphMol/Descriptors/USRDescriptor.cpp
Normal file
@@ -0,0 +1,149 @@
|
||||
//
|
||||
// Copyright (C) 2011-2013 Greg Landrum
|
||||
//
|
||||
// @@ All Rights Reserved @@
|
||||
// This file is part of the RDKit.
|
||||
// The contents are covered by the terms of the BSD license
|
||||
// which is included in the file license.txt, found at the root
|
||||
// of the RDKit source tree.
|
||||
//
|
||||
|
||||
|
||||
#include <Geometry/point.h>
|
||||
#include <Numerics/Vector.h>
|
||||
#include <GraphMol/Conformer.h>
|
||||
#include <GraphMol/ROMol.h>
|
||||
#include <boost/foreach.hpp>
|
||||
#include "USRDescriptor.h"
|
||||
|
||||
|
||||
namespace RDKit{
|
||||
|
||||
namespace {
|
||||
void calcDistances(const RDGeom::Point3DConstPtrVect &coords,
|
||||
const RDGeom::Point3D &point,
|
||||
RDNumeric::Vector<double> &distances) {
|
||||
PRECONDITION(distances.size() == coords.size(), "distances and coords must be the same size");
|
||||
RDGeom::Point3D tmpPt;
|
||||
unsigned int i = 0;
|
||||
// loop over coordinates
|
||||
BOOST_FOREACH(const RDGeom::Point3D *tpp, coords) {
|
||||
distances[i++] = (*tpp-point).length();
|
||||
}
|
||||
}
|
||||
|
||||
void calcCentroid(const RDGeom::Point3DConstPtrVect &coords,
|
||||
RDGeom::Point3D &pt) {
|
||||
PRECONDITION(coords.size() != 0,"no coordinates");
|
||||
// set pt to zero
|
||||
pt *= 0.0;
|
||||
// loop over coordinates
|
||||
BOOST_FOREACH(const RDGeom::Point3D *opt, coords) {
|
||||
pt += *opt;
|
||||
}
|
||||
pt /= coords.size();
|
||||
}
|
||||
|
||||
void calcMoments(const RDNumeric::Vector<double> &dist,
|
||||
RDNumeric::Vector<double> &moments) {
|
||||
PRECONDITION(moments.size() == 3, "moments must have 3 elements");
|
||||
PRECONDITION(dist.size() != 0,"no distances");
|
||||
// set all elements to zero
|
||||
moments *= 0.0;
|
||||
unsigned int numPts = dist.size();
|
||||
// 1. moment: mean
|
||||
for (unsigned int i = 0; i < numPts; ++i) {
|
||||
moments[0] += dist[i];
|
||||
}
|
||||
moments[0] /= numPts;
|
||||
// 2. moment: standard deviation
|
||||
// 3. moment: cubic root of skewness
|
||||
for (unsigned int i = 0; i < numPts; ++i) {
|
||||
double diff = dist[i] - moments[0];
|
||||
moments[1] += diff * diff;
|
||||
moments[2] += diff * diff * diff;
|
||||
}
|
||||
moments[1] = sqrt(moments[1] / numPts);
|
||||
moments[2] /= numPts;
|
||||
moments[2] = cbrt(moments[2] / (moments[1] * moments[1] * moments[1]));
|
||||
}
|
||||
|
||||
void addMoments(std::vector<double> &d,
|
||||
const RDNumeric::Vector<double> &m,
|
||||
int idx) {
|
||||
for (unsigned int i = 0; i < m.size(); ++i) {
|
||||
d[i + idx] = m[i];
|
||||
}
|
||||
}
|
||||
|
||||
} // end namespace
|
||||
|
||||
namespace Descriptors {
|
||||
|
||||
void USR(const ROMol &mol, std::vector<double> &descriptor, int confId) {
|
||||
PRECONDITION(descriptor.size() == 12, "descriptor must have 12 elements");
|
||||
unsigned int na = mol.getNumAtoms();
|
||||
// check that number of atoms > 3
|
||||
if (na < 3) {
|
||||
throw ValueErrorException("Number of atoms must be greater than 3");
|
||||
}
|
||||
// check that minimum a conformer exists
|
||||
if (mol.getNumConformers() == 0) {
|
||||
throw ConformerException("No conformations available on this molecule");
|
||||
}
|
||||
|
||||
const Conformer &conf = mol.getConformer(confId);
|
||||
RDGeom::Point3DConstPtrVect coords(na);
|
||||
// loop over atoms
|
||||
for (unsigned int ai = 0; ai < na; ++ai) {
|
||||
coords[ai] = &conf.getAtomPos(ai);
|
||||
}
|
||||
calcUSRForPoints(coords, descriptor);
|
||||
}
|
||||
|
||||
void calcUSRForPoints(const RDGeom::Point3DConstPtrVect &coords,
|
||||
std::vector<double> &descriptor) {
|
||||
PRECONDITION(descriptor.size() == 12, "descriptor must have 12 elements");
|
||||
RDGeom::Point3D pt;
|
||||
RDGeom::Point3D pt2;
|
||||
RDNumeric::Vector<double> dist(coords.size());
|
||||
RDNumeric::Vector<double> moments(3, 0.0);
|
||||
|
||||
// ctd = centroid
|
||||
calcCentroid(coords, pt);
|
||||
calcDistances(coords, pt, dist);
|
||||
calcMoments(dist, moments);
|
||||
addMoments(descriptor, moments, 0);
|
||||
|
||||
// catc = closest atom to centroid
|
||||
pt = (*coords[dist.smallestValId()]); // catc
|
||||
pt2 = (*coords[dist.largestValId()]); // fatc
|
||||
calcDistances(coords, pt, dist);
|
||||
calcMoments(dist, moments);
|
||||
addMoments(descriptor, moments, 3);
|
||||
|
||||
// fatc = farthest atom to centroid
|
||||
calcDistances(coords, pt2, dist);
|
||||
calcMoments(dist, moments);
|
||||
addMoments(descriptor, moments, 6);
|
||||
|
||||
// fatf = farthest atom to fatc
|
||||
pt = (*coords[dist.largestValId()]);
|
||||
calcDistances(coords, pt, dist);
|
||||
calcMoments(dist, moments);
|
||||
addMoments(descriptor, moments, 9);
|
||||
}
|
||||
|
||||
double calcUSRScore(const std::vector<double> &d1, const std::vector<double> &d2) {
|
||||
PRECONDITION(d1.size() == d2.size(), "descriptors must have the same size");
|
||||
double score = 0.0;
|
||||
unsigned int num = d1.size();
|
||||
for (unsigned int i = 0; i < num; ++i) {
|
||||
score += fabs(d1[i] - d2[i]);
|
||||
}
|
||||
score /= num;
|
||||
return 1.0 / (1.0 + score);
|
||||
}
|
||||
|
||||
} // end of namespace Descriptors
|
||||
} //end of namespace RDKit
|
||||
64
Code/GraphMol/Descriptors/USRDescriptor.h
Normal file
64
Code/GraphMol/Descriptors/USRDescriptor.h
Normal file
@@ -0,0 +1,64 @@
|
||||
//
|
||||
// Copyright (C) 2011-2013 Greg Landrum
|
||||
//
|
||||
// @@ All Rights Reserved @@
|
||||
// This file is part of the RDKit.
|
||||
// The contents are covered by the terms of the BSD license
|
||||
// which is included in the file license.txt, found at the root
|
||||
// of the RDKit source tree.
|
||||
//
|
||||
|
||||
/*! \file USRDescriptor.h
|
||||
|
||||
\brief Contains the USR descriptor. Use MolDescriptors.h in client code.
|
||||
|
||||
*/
|
||||
#ifndef __RD_USR_H__
|
||||
#define __RD_USR_H__
|
||||
|
||||
#include <Geometry/point.h>
|
||||
#include <Numerics/Vector.h>
|
||||
|
||||
namespace RDKit{
|
||||
class ROMol;
|
||||
class Conformer;
|
||||
namespace Descriptors {
|
||||
/*!
|
||||
Calculates the ultra-fast shape recognition (USR)
|
||||
|
||||
Reference: P. J. Ballester, W. G. Richards, JCC (2007), 28, 1711 - 1723.
|
||||
|
||||
Derived from RDKit Python implementation of Jan Domanski
|
||||
who derived his code from Adrian Schreyer's code:
|
||||
http://hg.adrianschreyer.eu/usrcat/src/70e075d93cd25370e7ef93301d0e28d49a0851c2/usrcat/geometry.py?at=default
|
||||
|
||||
\param mol the molecule of interest
|
||||
\param descriptor storage for the computed USR descriptor
|
||||
\param confId the conformer Id
|
||||
|
||||
*/
|
||||
void USR(const ROMol &mol, std::vector<double> &descriptor, int confId = -1);
|
||||
|
||||
/*!
|
||||
Calculates the USR descriptor for a single conformer
|
||||
|
||||
\param coords the 3D coordinates of the atoms of a conformer
|
||||
\param descriptor storage for the computed USR descriptor
|
||||
|
||||
*/
|
||||
void calcUSRForPoints(const RDGeom::Point3DConstPtrVect &coords, std::vector<double> &descriptor);
|
||||
|
||||
/*!
|
||||
Calculates the score between two USR descriptors
|
||||
|
||||
\param d1 descriptor 1
|
||||
\param d2 descriptor 2
|
||||
|
||||
\return the score
|
||||
*/
|
||||
double calcUSRScore(const std::vector<double> &d1, const std::vector<double> &d2);
|
||||
|
||||
} // end of namespace Descriptors
|
||||
} //end of namespace RDKit
|
||||
|
||||
#endif
|
||||
@@ -29,7 +29,7 @@
|
||||
|
||||
#include <GraphMol/Descriptors/MolDescriptors.h>
|
||||
#include <GraphMol/Descriptors/Crippen.h>
|
||||
|
||||
#include <GraphMol/Descriptors/USRDescriptor.h>
|
||||
|
||||
#include <DataStructs/BitVects.h>
|
||||
#include <DataStructs/BitOps.h>
|
||||
@@ -1606,6 +1606,57 @@ void testMQNs(){
|
||||
BOOST_LOG(rdErrorLog) << " done" << std::endl;
|
||||
}
|
||||
|
||||
void testUSRDescriptor(){
|
||||
BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
|
||||
BOOST_LOG(rdErrorLog) << " Test USR Descriptor" << std::endl;
|
||||
std::vector<double> descriptor(12);
|
||||
|
||||
// no conformers
|
||||
ROMol *mol = SmilesToMol("C1CCCCC1");
|
||||
bool ok = false;
|
||||
try {
|
||||
USR(*mol, descriptor);
|
||||
} catch (ConformerException &e) {
|
||||
ok = true;
|
||||
}
|
||||
TEST_ASSERT(ok);
|
||||
|
||||
// number of atoms < 3
|
||||
mol = SmilesToMol("CC");
|
||||
ok = false;
|
||||
try {
|
||||
USR(*mol, descriptor);
|
||||
} catch (ValueErrorException &e) {
|
||||
ok = true;
|
||||
}
|
||||
TEST_ASSERT(ok);
|
||||
|
||||
// DESCRIPTOR
|
||||
// comparing to results produced by Adrian Schreyer's code
|
||||
// http://hg.adrianschreyer.eu/usrcat/src/70e075d93cd25370e7ef93301d0e28d49a0851c2/usrcat/geometry.py?at=default
|
||||
double refValues[12] = {2.37938524, 0.62181927, -0.89089872, 2.63773456, 1.1577952, -0.6937349,
|
||||
3.38248245, 1.59816952, -0.72933115, 3.38248245, 1.59816952,-0.72933115};
|
||||
std::vector<double> refUSR (refValues, refValues + sizeof(refValues) / sizeof(double) );
|
||||
std::string rdbase = getenv("RDBASE");
|
||||
std::string fname1 = rdbase + "/Code/GraphMol/Descriptors/test_data/benzene.mol";
|
||||
mol = MolFileToMol(fname1, true, false, true);
|
||||
std::vector<double> myUSR(12);
|
||||
USR(*mol, myUSR);
|
||||
for (unsigned int i = 0; i < myUSR.size(); ++i) {
|
||||
TEST_ASSERT(feq(myUSR[i], refUSR[i]));
|
||||
}
|
||||
|
||||
// SCORE
|
||||
// descriptors and reference score from JCC (2007), 28, 1711-1723.
|
||||
double m1[12]={4.44, 2.98, 1.04, 4.55, 4.70, 0.23, 8.30, 16.69, -22.97, 7.37, 15.64, 0.51};
|
||||
double m2[12]={4.39, 3.11, 1.36, 4.50, 4.44, 0.09, 8.34, 16.78, -23.20, 7.15, 16.52, 0.13};
|
||||
std::vector<double> d1 (m1, m1 + sizeof(m1) / sizeof(double) );
|
||||
std::vector<double> d2 (m2, m2 + sizeof(m2) / sizeof(double) );
|
||||
TEST_ASSERT(feq(calcUSRScore(d1, d2), 0.812, 0.001));
|
||||
|
||||
BOOST_LOG(rdErrorLog) << " done" << std::endl;
|
||||
}
|
||||
|
||||
void testGitHubIssue56(){
|
||||
BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
|
||||
BOOST_LOG(rdErrorLog) << " Test GitHub Issue 56." << std::endl;
|
||||
@@ -1685,6 +1736,7 @@ int main(){
|
||||
testRingDescriptors();
|
||||
testMiscCountDescriptors();
|
||||
testMQNs();
|
||||
testUSRDescriptor();
|
||||
#endif
|
||||
testGitHubIssue56();
|
||||
|
||||
|
||||
@@ -235,6 +235,20 @@ namespace RDNumeric {
|
||||
return id;
|
||||
}
|
||||
|
||||
//! \brief Gets the ID of the entry that has the smallest value
|
||||
inline unsigned int smallestValId() const {
|
||||
TYPE res = (TYPE)(1.e8);
|
||||
unsigned int i, id=d_size;
|
||||
TYPE *data = d_data.get();
|
||||
for (i = 0; i < d_size; i++) {
|
||||
if (data[i] < res) {
|
||||
res = data[i];
|
||||
id = i;
|
||||
}
|
||||
}
|
||||
return id;
|
||||
}
|
||||
|
||||
//! returns the dot product between two Vectors
|
||||
inline TYPE dotProduct(const Vector<TYPE> other) {
|
||||
PRECONDITION(d_size == other.size(), "Size mismatch in vector doct product");
|
||||
|
||||
Reference in New Issue
Block a user