mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-07 22:44:25 +08:00
* 3D Descriptors Dragons * stripped down, not yet working * get this building on a non C++-11 compiler * move the python test to the python directory * move the python test to the python directory * add the python test * now at least those tests runn * warning comment * some basic refactoring and cleanup * get python wrapper "working" (completely untested) * fix Name * fixing AutoCorr & RDF * AutoCorr test added * RDF reviewed based on AutoCorr comments * fix Morse code * Morse reviewed * Correct Morse & start Getaway * correct MORSE test * start Getaway clean up * simplification of Whim * better * fix Getaway * fix RCON * merge repaired * adding Dragon 2D autocorrelations descriptors * fix the 3D autocorrelation descriptors based on the modification in Dragon. * Adding the 2D autocorrelation descriptors (no need of Eigen dependency for this one) * Adding 2D test case * IState … no idea for the moment * there is an error in 2D computation (memory error ???) * fix the IState for molecules with Hs * need to use getTotalNumHs(true) not getTotalNumHs() * also need to remove Hs in both dv and d! * fixing Rcov values I fix the Rcov values * fix Getaway * remove push_back * remove call to sum * improve tests * fix getaway * adding precision parameter to GETAWAY * adding rouding (1e-3) * fix WHIM * use void in declarations of function * update MolDescriptors link * remove print option in WHIM * fix python wrapper to 3D descriptors. - all modifications reduce computation time by a factor of 3! * final fix for Getaway * all output are fixed except the 2 first values due to clustering approach. * cluster cannot be fixed du tu float precision issue between Java & c++ * best fix of ITH and ISH * use the same algorithm as in Dragon 6 but there is still a deviation * remove std::move * std:move only works on c++ 11 * fixing issue based on Greg Comments * auto2D still not working on my env * update 3d test.py * auto 2D not working after the first loop test * tighten up the tests * change name * update, but still does not pass * make this run (though it does not work) * re-enable test3D * some cleanup * add GETAWAY test data. Note that the tests fail * fix the ATS and ATSC autocorrelation 2D Broto Moreau and Geary autocorrelation are not correct again a specificity of Dragon to compute them. The result are not consistant with Padel because we use the relative weigth not in Padel. * one minor change to get things to compile * fix the M & G matrix computation fix inversion in the computation of the equations for both M & G matrixes * update autocorr2d tests * 192 examples * fix issue in cluster 0.01 0.009 case this is not correct all the cases * update GETAWAY expected values to reflect the fact that we cannot reproduce the literature values exactly fix a leak in GETAWAY * fix the negative values in gamma this is strickly the implementation that we find in the book molecular descriptors for chemoinformatics (except the case where an atom is already in the axis in this case it should be added in the symetric list which is not the case in this implementation) * Update WHIM.cpp adding the axis atoms to the symetrical list * update WHIM tests * add AUTOCORR2D to MolDescriptors and the python wrappers * start adding tests * test the python versions of the new descriptors * update list of descriptors
339 lines
11 KiB
C++
339 lines
11 KiB
C++
//
|
||
// Copyright (c) 2016, Guillaume GODIN
|
||
// All rights reserved.
|
||
//
|
||
// Redistribution and use in source and binary forms, with or without
|
||
// modification, are permitted provided that the following conditions are
|
||
// met:
|
||
//
|
||
// * Redistributions of source code must retain the above copyright
|
||
// notice, this list of conditions and the following disclaimer.
|
||
// * Redistributions in binary form must reproduce the above
|
||
// copyright notice, this list of conditions and the following
|
||
// disclaimer in the documentation and/or other materials provided
|
||
// with the distribution.
|
||
// * Neither the name of Institue of Cancer Research.
|
||
// nor the names of its contributors may be used to endorse or promote
|
||
// products derived from this software without specific prior written
|
||
// permission.
|
||
//
|
||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||
//
|
||
// for build & set RDBASE! => export RDBASE=/Users/GVALMTGG/Github/rdkit_mine/
|
||
|
||
#include <GraphMol/RDKitBase.h>
|
||
|
||
#include "WHIM.h"
|
||
#include "MolData3Ddescriptors.h"
|
||
|
||
#include <math.h>
|
||
#include <Eigen/Dense>
|
||
#include <Eigen/SVD>
|
||
|
||
using namespace Eigen;
|
||
|
||
namespace RDKit {
|
||
namespace Descriptors {
|
||
namespace {
|
||
|
||
MolData3Ddescriptors moldata3D;
|
||
|
||
double roundn(double in, int factor) {
|
||
return round(in * pow(10, factor)) / pow(10, factor);
|
||
}
|
||
|
||
MatrixXd GetCenterMatrix(MatrixXd &Mat) {
|
||
VectorXd v = Mat.colwise().mean();
|
||
MatrixXd X = Mat.rowwise() - v.transpose();
|
||
return X;
|
||
}
|
||
|
||
MatrixXd GetCovMatrix(MatrixXd &X, MatrixXd &Weigth, double weigth) {
|
||
return X.transpose() * Weigth * X / weigth;
|
||
}
|
||
|
||
JacobiSVD<MatrixXd> *getSVD(MatrixXd &Mat) {
|
||
JacobiSVD<MatrixXd> *svd =
|
||
new JacobiSVD<MatrixXd>(Mat, ComputeThinU | ComputeThinV);
|
||
return svd;
|
||
}
|
||
|
||
std::vector<double> getWhimD(std::vector<double> weigthvector,
|
||
MatrixXd MatOrigin, int numAtoms, double th) {
|
||
double *weigtharray = &weigthvector[0];
|
||
|
||
Map<VectorXd> Weigth(weigtharray, numAtoms);
|
||
|
||
MatrixXd WeigthMat = Weigth.asDiagonal();
|
||
|
||
double weigth = WeigthMat.diagonal().sum();
|
||
|
||
MatrixXd Xmean = GetCenterMatrix(MatOrigin);
|
||
|
||
MatrixXd covmat = GetCovMatrix(Xmean, WeigthMat, weigth);
|
||
|
||
JacobiSVD<MatrixXd> *svd = getSVD(covmat);
|
||
|
||
std::vector<double> w(18);
|
||
// prepare data for Whim parameter computation
|
||
|
||
const double *SingVal = svd->singularValues().data();
|
||
MatrixXd Scores = Xmean * svd->matrixV(); // V is similar
|
||
|
||
// compute parameters
|
||
w[0] = SingVal[0];
|
||
w[1] = SingVal[1];
|
||
w[2] = SingVal[2];
|
||
w[3] = SingVal[0] + SingVal[1] + SingVal[2]; // T
|
||
w[4] = SingVal[0] * SingVal[1] + SingVal[0] * SingVal[2] +
|
||
SingVal[1] * SingVal[2]; // A
|
||
w[5] = w[3] + w[4] + SingVal[0] * SingVal[1] * SingVal[2]; // V
|
||
w[6] = SingVal[0] / (SingVal[0] + SingVal[1] + SingVal[2]); // P1
|
||
w[7] = SingVal[1] / (SingVal[0] + SingVal[1] + SingVal[2]); // p2
|
||
w[8] = SingVal[2] / (SingVal[0] + SingVal[1] + SingVal[2]); // P3
|
||
|
||
double res = 0.0;
|
||
for (int i = 0; i < 3; i++) {
|
||
res += std::abs(w[i] / w[3] - 1.0 / 3.0);
|
||
}
|
||
|
||
w[9] = 3.0 / 4.0 * res; // K
|
||
|
||
// center original matrix
|
||
VectorXd v1 = Scores.col(0);
|
||
VectorXd v2 = Scores.col(1);
|
||
VectorXd v3 = Scores.col(2);
|
||
|
||
// inverse of the kurtosis
|
||
if (v1.array().pow(4).sum() > 0) {
|
||
w[10] = numAtoms * pow(w[0], 2) / v1.array().pow(4).sum(); // E1
|
||
} else {
|
||
w[10] = 0.0;
|
||
}
|
||
|
||
if (v2.array().pow(4).sum() > 0) {
|
||
w[11] = numAtoms * pow(w[1], 2) / v2.array().pow(4).sum(); // E2
|
||
} else {
|
||
w[11] = 0.0;
|
||
}
|
||
|
||
if (v3.array().pow(4).sum() > 0) {
|
||
w[12] = numAtoms * pow(w[2], 2) / v3.array().pow(4).sum(); // E3
|
||
} else {
|
||
w[12] = 0.0;
|
||
}
|
||
|
||
w[13] = (w[10] + w[11] + w[12]) / 3.0; // mean total density of the atoms
|
||
// called D is used on Dragon 6 not
|
||
// just the sum!
|
||
|
||
// check if the molecule is fully symmetrical "like CH4" using Canonical Rank
|
||
// Index and/or Sphericity !
|
||
|
||
double gamma[3]; // Gamma values
|
||
double nAT = (double)numAtoms;
|
||
|
||
// check if two atoms are symetric versus the new axis ie newx,newy,newz a
|
||
for (int i = 0; i < 3; i++) {
|
||
for (int j = 0; j < numAtoms; j++) {
|
||
Scores(j, i) = roundn(Scores(j, i),
|
||
3); // round the matrix! same as eigen tolerance !
|
||
}
|
||
}
|
||
|
||
|
||
// we should take into account atoms that are in the axis too!!! which is not trivial
|
||
for (int i = 0; i < 3; i++) {
|
||
double Symetric[2*numAtoms];
|
||
double ns = 0.0;
|
||
double na = 0.0;
|
||
for (int j = 0; j < numAtoms; j++) {
|
||
bool amatch = false;
|
||
for (int k = 0; k < numAtoms; k++) {
|
||
if (j==k) {
|
||
continue;
|
||
}
|
||
if (std::abs(Scores(j, i) + Scores(k, i)) <= th) {
|
||
// those that are close opposite & not close to the axis!
|
||
ns += 1; // check only once the symetric none null we need to add +2!
|
||
// (reduce the loop duration)
|
||
amatch = true;
|
||
Symetric[j]=1.0;
|
||
Symetric[j+numAtoms]=2.0;
|
||
Symetric[k]=1.0;
|
||
Symetric[k+numAtoms]=2.0;
|
||
break;
|
||
}
|
||
}
|
||
if (!amatch) {
|
||
na +=1;
|
||
Symetric[j]=0.0;
|
||
Symetric[j+numAtoms]=std::abs(Scores(j, i));
|
||
}
|
||
}
|
||
// take into account the atoms close to the axis
|
||
for (int aj = 0; aj < numAtoms; aj++) {
|
||
if (Symetric[aj+numAtoms]<th and Symetric[aj]<1.0) {
|
||
ns +=1;
|
||
na -=1;
|
||
}
|
||
}
|
||
|
||
|
||
gamma[i] = 0.0;
|
||
double gammainv=1.0;
|
||
if (ns == 0) {
|
||
gammainv = 1.0 - (na / nAT) * log(1.0 / nAT) / log(2);
|
||
}
|
||
if (ns > 0) {
|
||
gammainv = 1.0 - ((ns / nAT) * log(ns / nAT) / log(2) +
|
||
(na / nAT) * log(1.0 / nAT) / log(2));
|
||
}
|
||
gamma[i]=1.0/gammainv;
|
||
}
|
||
|
||
w[14] = gamma[0]; // G1
|
||
w[15] = gamma[1]; // G2
|
||
w[16] = gamma[2]; // G3
|
||
w[17] = pow(gamma[0] * gamma[1] * gamma[2], 1.0 / 3.0);
|
||
delete svd;
|
||
|
||
return w;
|
||
}
|
||
|
||
void GetWHIMs(const Conformer &conf, std::vector<double> &result,
|
||
double Vpoints[], double th) {
|
||
std::vector<double> wu(18);
|
||
std::vector<double> wm(18);
|
||
std::vector<double> wv(18);
|
||
std::vector<double> we(18);
|
||
std::vector<double> wp(18);
|
||
std::vector<double> wi(18);
|
||
std::vector<double> ws(18);
|
||
|
||
int numAtoms = conf.getNumAtoms();
|
||
Map<MatrixXd> matorigin(Vpoints, 3, numAtoms);
|
||
MatrixXd MatOrigin = matorigin.transpose();
|
||
std::vector<double> weigthvector;
|
||
|
||
// intermediate 18 values stored in this order per weighted vector :
|
||
// "L1","L2","L3","T","A","V","P1","P2","P3","K","E1","E2","E3","D","G1","G2","G3","G"
|
||
weigthvector = moldata3D.GetUn(numAtoms);
|
||
wu = getWhimD(weigthvector, MatOrigin, numAtoms, th);
|
||
|
||
weigthvector = moldata3D.GetRelativeMW(conf.getOwningMol());
|
||
wm = getWhimD(weigthvector, MatOrigin, numAtoms, th);
|
||
|
||
weigthvector = moldata3D.GetRelativeVdW(conf.getOwningMol());
|
||
wv = getWhimD(weigthvector, MatOrigin, numAtoms, th);
|
||
|
||
weigthvector = moldata3D.GetRelativeENeg(conf.getOwningMol());
|
||
we = getWhimD(weigthvector, MatOrigin, numAtoms, th);
|
||
|
||
weigthvector = moldata3D.GetRelativePol(conf.getOwningMol());
|
||
wp = getWhimD(weigthvector, MatOrigin, numAtoms, th);
|
||
|
||
weigthvector = moldata3D.GetRelativeIonPol(conf.getOwningMol());
|
||
wi = getWhimD(weigthvector, MatOrigin, numAtoms, th);
|
||
|
||
weigthvector = moldata3D.GetIState(conf.getOwningMol());
|
||
ws = getWhimD(weigthvector, MatOrigin, numAtoms, th);
|
||
|
||
result.clear();
|
||
result.resize(126);
|
||
|
||
for (int i = 0; i < 18; i++) {
|
||
result[i + 18 * 0] = wu[i];
|
||
result[i + 18 * 1] = wm[i];
|
||
result[i + 18 * 2] = wv[i];
|
||
result[i + 18 * 3] = we[i];
|
||
result[i + 18 * 4] = wp[i];
|
||
result[i + 18 * 5] = wi[i];
|
||
result[i + 18 * 6] = ws[i];
|
||
}
|
||
wu.clear();
|
||
wm.clear();
|
||
wv.clear();
|
||
we.clear();
|
||
wp.clear();
|
||
wi.clear();
|
||
ws.clear();
|
||
}
|
||
|
||
void getWHIM(const ROMol &mol, std::vector<double> &res, int confId,
|
||
double th) {
|
||
int numAtoms = mol.getNumAtoms();
|
||
const Conformer &conf = mol.getConformer(confId);
|
||
double Vpoints[3 * numAtoms];
|
||
|
||
for (int i = 0; i < numAtoms; ++i) {
|
||
Vpoints[3 * i] = conf.getAtomPos(i).x;
|
||
Vpoints[3 * i + 1] = conf.getAtomPos(i).y;
|
||
Vpoints[3 * i + 2] = conf.getAtomPos(i).z;
|
||
}
|
||
|
||
std::vector<double> w(126);
|
||
GetWHIMs(conf, w, Vpoints, th);
|
||
|
||
// Dragon extract only this list in this order : L1 L2 L3 P1 P2 G1 G2 G3 E1 E2
|
||
// E3
|
||
int map1[11] = {0, 1, 2, 6, 7, 14, 15, 16, 10, 11, 12};
|
||
|
||
for (int k = 0; k < 7; k++) {
|
||
for (int i = 0; i < 11; i++) {
|
||
res[i + 11 * k] = roundn(w[map1[i] + 18 * k], 3);
|
||
}
|
||
}
|
||
|
||
for (int i = 0; i < 2; i++) {
|
||
res[i + 13 * 7] = roundn(w[17 + 18 * i], 3); // 92 93 for Gu Gm
|
||
}
|
||
|
||
for (int i = 0; i < 7; i++) {
|
||
res[i + 11 * 7] =
|
||
roundn(w[3 + 18 * i],
|
||
3); // 78 79 80 81 82 83 84 for Tu Tm Tv Te Tp Ti Ts
|
||
res[i + 12 * 7] =
|
||
roundn(w[4 + 18 * i],
|
||
3); // 85 86 87 88 89 90 91 for Tu Am Av Ae Ap Ai As
|
||
res[i + 13 * 7 + 2] =
|
||
roundn(w[9 + 18 * i],
|
||
3); // 94 95 96 97 98 99 100 for Ku Km Kv Ke Kp Ki Ks
|
||
res[i + 14 * 7 + 2] =
|
||
roundn(w[13 + 18 * i],
|
||
3); // 101 102 103 104 105 106 107 for Du Dm Dv De Dp Di Ds
|
||
res[i + 15 * 7 + 2] =
|
||
roundn(w[5 + 18 * i],
|
||
3); // 108 109 110 111 112 113 114 for Vu Vm Vv Ve Vp Vi Vs
|
||
}
|
||
}
|
||
|
||
} // end of anonymous namespace
|
||
|
||
void WHIM(const ROMol &mol, std::vector<double> &res, int confId, double th) {
|
||
PRECONDITION(mol.getNumConformers() >= 1, "molecule has no conformers")
|
||
// Dragon final list is: L1u L2u L3u P1u P2u G1u G2u G3u E1u E2u E3u L1m L2m
|
||
// L3m P1m P2m G1m G2m G3m E1m E2m E3m L1v L2v L3v P1v P2v G1v G2v G3v E1v E2v
|
||
// E3v L1e L2e L3e P1e P2e G1e G2e G3e E1e E2e E3e L1p L2p L3p P1p P2p G1p G2p
|
||
// G3p E1p E2p E3p L1i L2i L3i P1i P2i G1i G2i G3i E1i E2i E3i L1s L2s L3s P1s
|
||
// P2s G1s G2s G3s E1s E2s E3s Tu Tm Tv Te Tp Ti Ts Au Am Av Ae Ap
|
||
// Ai As Gu Gm Ku Km Kv Ke Kp Ki Ks Du Dm Dv De Dp Di Ds Vu
|
||
// Vm Vv Ve Vp Vi Vs
|
||
|
||
res.clear();
|
||
res.resize(114);
|
||
getWHIM(mol, res, confId, th);
|
||
}
|
||
} // end of Descriptors namespace
|
||
} // end of RDKit namespace
|