Files
rdkit/Code/GraphMol/Descriptors/MolData3Ddescriptors.cpp
Greg Landrum 71932ee4f9 Add a collection of new 3D descriptors (#1467)
* 3D Descriptors Dragons

* stripped down, not yet working

* get this building on a non C++-11 compiler

* move the python test to the python directory

* move the python test to the python directory

* add the python test

* now at least those tests runn

* warning comment

* some basic refactoring and cleanup

* get python wrapper "working" (completely untested)

* fix Name

* fixing AutoCorr & RDF

* AutoCorr test added
* RDF reviewed based on AutoCorr comments

* fix Morse code

* Morse reviewed

* Correct Morse & start Getaway

* correct MORSE test
* start Getaway clean up

* simplification of Whim

* better

* fix Getaway

* fix RCON

* merge repaired

* adding Dragon 2D autocorrelations descriptors

* fix the 3D autocorrelation descriptors based on the modification in
Dragon.
* Adding the 2D autocorrelation descriptors (no need of Eigen
dependency for this one)
* Adding 2D test case
* IState … no idea for the moment
* there is an error in 2D computation (memory error ???)

* fix the IState for molecules with Hs

* need to use getTotalNumHs(true) not getTotalNumHs()
* also need to remove Hs in both dv and d!

* fixing Rcov values

I fix the Rcov values

* fix Getaway

* remove push_back
* remove call to sum
* improve tests

* fix getaway

* adding precision parameter to GETAWAY
* adding rouding (1e-3)

* fix WHIM

* use void in declarations of function
* update MolDescriptors link

* remove print option in WHIM

* fix python wrapper to 3D descriptors.

- all modifications reduce computation time by a factor of 3!

* final fix for Getaway

* all output are fixed except the 2 first values due to clustering
approach.
* cluster cannot be fixed du tu float precision issue between Java &
c++

* best fix of ITH and ISH

* use the same algorithm as in Dragon 6 but there is still a deviation

* remove std::move

* std:move only works on c++ 11

* fixing issue based on Greg Comments

* auto2D still not working on my env

* update 3d test.py

* auto 2D not working after the first loop test

* tighten up the tests

* change name

* update, but still does not pass

* make this run (though it does not work)

* re-enable test3D

* some cleanup

* add GETAWAY test data. Note that the tests fail

* fix the ATS and ATSC autocorrelation 2D

Broto Moreau and Geary autocorrelation are not correct again a specificity of Dragon to compute them. The result are not consistant with Padel because we use the relative weigth not in Padel.

* one minor change to get things to compile

* fix the M & G matrix computation

fix inversion in the computation of the equations for both M & G matrixes

* update autocorr2d tests

* 192 examples

* fix issue in cluster 0.01 0.009 case

this is not correct all the cases

* update GETAWAY expected values to reflect the fact that we cannot reproduce the literature values exactly
fix a leak in GETAWAY

* fix the negative values in gamma

this is strickly the implementation that we find in the book molecular descriptors for chemoinformatics (except the case where an atom is already in the axis in this case it should be added in the symetric list which is not the case in this implementation)

* Update WHIM.cpp

adding the axis atoms to the symetrical list

* update WHIM tests

* add AUTOCORR2D to MolDescriptors and the python wrappers

* start adding tests

* test the python versions of the new descriptors

* update list of descriptors
2017-06-27 18:57:10 +02:00

239 lines
6.7 KiB
C++

#include <cstdlib>
#include <iostream>
#include "MolData3Ddescriptors.h"
#include <GraphMol/RDKitBase.h>
#include "GraphMol/PartialCharges/GasteigerCharges.h"
#include "GraphMol/PartialCharges/GasteigerParams.h"
using namespace std;
MolData3Ddescriptors::MolData3Ddescriptors() {}
std::vector<double> MolData3Ddescriptors::GetUn(int numAtoms) {
std::vector<double> u(numAtoms, 1.0);
return u;
}
std::vector<double> MolData3Ddescriptors::GetRelativeMW(
const RDKit::ROMol& mol) {
double* relativeMw = data3D.getMW();
int numAtoms = mol.getNumAtoms();
std::vector<double> pol(numAtoms, 0.0);
for (int i = 0; i < numAtoms; ++i) {
pol[i] = relativeMw[mol.getAtomWithIdx(i)->getAtomicNum() - 1];
}
return pol;
}
std::vector<double> MolData3Ddescriptors::GetRelativePol(
const RDKit::ROMol& mol) {
int numAtoms = mol.getNumAtoms();
double* relativePol = data3D.getPOL();
std::vector<double> pol(numAtoms, 0.0);
for (int i = 0; i < numAtoms; ++i) {
pol[i] = relativePol[mol.getAtomWithIdx(i)->getAtomicNum() - 1];
}
return pol;
}
std::vector<double> MolData3Ddescriptors::GetRelativeVdW(
const RDKit::ROMol& mol) {
int numAtoms = mol.getNumAtoms();
double* relativeVdW = data3D.getVDW();
std::vector<double> vdw(numAtoms, 0.0);
for (int i = 0; i < numAtoms; ++i) {
vdw[i] = relativeVdW[mol.getAtomWithIdx(i)->getAtomicNum() - 1];
}
return vdw;
}
std::vector<double> MolData3Ddescriptors::GetRelativeRcov(
const RDKit::ROMol& mol) {
int numAtoms = mol.getNumAtoms();
double* rcov = data3D.getRCOV();
std::vector<double> wroc(numAtoms, 0.0);
for (int i = 0; i < numAtoms; ++i) {
wroc[i] = rcov[mol.getAtomWithIdx(i)->getAtomicNum() - 1] / rcov[5];
}
return wroc;
}
std::vector<double> MolData3Ddescriptors::GetRelativeENeg(
const RDKit::ROMol& mol) {
int numAtoms = mol.getNumAtoms();
double* relativeNeg = data3D.getNEG();
std::vector<double> neg(numAtoms, 0.0);
for (int i = 0; i < numAtoms; ++i) {
neg[i] = relativeNeg[mol.getAtomWithIdx(i)->getAtomicNum() - 1];
}
return neg;
}
std::vector<double> MolData3Ddescriptors::GetRelativeIonPol(
const RDKit::ROMol& mol) {
int numAtoms = mol.getNumAtoms();
double* absionpol = data3D.getIonPOL();
std::vector<double> ionpols(numAtoms, 0.0);
for (int i = 0; i < numAtoms; ++i) {
ionpols[i] = absionpol[mol.getAtomWithIdx(i)->getAtomicNum() - 1];
}
return ionpols;
}
std::vector<double> MolData3Ddescriptors::GetCharges(const RDKit::ROMol& mol) {
std::vector<double> charges(mol.getNumAtoms(), 0);
// use 12 iterations... can be more
RDKit::computeGasteigerCharges(mol, charges, 12, true);
return charges;
}
int MolData3Ddescriptors::GetPrincipalQuantumNumber(int AtomicNum) {
if (AtomicNum <= 2)
return 1;
else if (AtomicNum <= 10)
return 2;
else if (AtomicNum <= 18)
return 3;
else if (AtomicNum <= 36)
return 4;
else if (AtomicNum <= 54)
return 5;
else if (AtomicNum <= 86)
return 6;
else
return 7;
}
std::vector<double> MolData3Ddescriptors::GetIState(const RDKit::ROMol& mol) {
int numAtoms = mol.getNumAtoms();
std::vector<double> Is(numAtoms, 1.0); // values set to 1 for Hs
for (int i = 0; i < numAtoms; ++i) {
const RDKit::Atom* atom = mol.getAtomWithIdx(i);
int atNum = atom->getAtomicNum();
int degree = atom->getDegree(); // number of substituants (heavy of not?)
if (degree > 0 and atNum > 1) {
int h = atom->getTotalNumHs(
true); // caution getTotalNumHs(true) to count h !!!!
int dv = RDKit::PeriodicTable::getTable()->getNouterElecs(atNum) -
h; // number of valence (explicit with Hs)
int N = GetPrincipalQuantumNumber(atNum); // principal quantum number
double d = (double)degree - h; // degree-h
if (d > 0) {
Is[i] = round(1000 * (4.0 / (N * N) * dv + 1.0) / d) / 1000;
}
}
}
return Is;
}
std::vector<double> MolData3Ddescriptors::GetIStateDrag(
const RDKit::ROMol& mol) {
int numAtoms = mol.getNumAtoms();
std::vector<double> Is(numAtoms, 1.0);
for (int i = 0; i < numAtoms; ++i) {
const RDKit::Atom* atom = mol.getAtomWithIdx(i);
int atNum = atom->getAtomicNum();
int degree = atom->getDegree(); // number of substituants
if (degree > 0 and atNum > 1) {
int h = atom->getTotalNumHs(true);
int Zv = RDKit::PeriodicTable::getTable()->getNouterElecs(
atNum); // number of valence (explicit with Hs)
double dv = (double)Zv - h; // number of valence electron without Hs
int N = GetPrincipalQuantumNumber(atNum); // principal quantum number
double d = (double)degree - h; // degree-h
if (d > 0) {
Is[i] = round(1000 * (4.0 / (N * N) * dv + 1.0) / d) / 1000;
}
}
}
return Is;
}
// adaptation from EState.py
// we need the Is value only there
std::vector<double> MolData3Ddescriptors::GetEState(const RDKit::ROMol& mol) {
int numAtoms = mol.getNumAtoms();
std::vector<double> Is = GetIState(mol);
double tmp, p;
double* dist = RDKit::MolOps::getDistanceMat(mol, false, false);
double accum[numAtoms];
for (int i = 0; i < numAtoms; i++) {
accum[i] = 0.0;
}
for (int i = 0; i < numAtoms; i++) {
for (int j = i + 1; j < numAtoms; j++) {
p = dist[i * numAtoms + j] + 1;
if (p < 1e6) {
tmp = (Is[i] - Is[j]) / (p * p);
accum[i] += tmp;
accum[j] -= tmp;
}
}
}
for (int i = 0; i < numAtoms; i++) {
Is[i] += accum[i];
}
return Is;
}
// modification of previous code to follow documentation from Padel code
std::vector<double> MolData3Ddescriptors::GetEState2(const RDKit::ROMol& mol) {
int numAtoms = mol.getNumAtoms();
std::vector<double> Si = GetIState(mol);
// in WHIM definition it's write:
double tmp, p, d;
double* dist = RDKit::MolOps::getDistanceMat(mol, false, false);
double accum[numAtoms];
for (int i = 0; i < numAtoms; i++) {
accum[i] = 0.0;
}
for (int i = 0; i < numAtoms; i++) {
for (int j = i + 1; j < numAtoms; j++) {
d = dist[i * numAtoms + j];
p = dist[i * numAtoms + j] + 1;
if (d == 1) {
tmp = (Si[i] - Si[j]) / (p * p);
accum[i] += tmp;
accum[j] -= tmp;
}
}
}
// add the Accum to the Si
// WHIM Si values
// electrotopological indices are scaled thus: Si'=Si + 7 => Si' > 0
// In this case, only the nonhydrogen atoms are considered,
// and the atomic electrotopological charge of each atom depends on its atom
// neighbor.
// So we should not use all the terms in the sum but only Adj matrix cases!
// Correct the Si adding the rescaling parameter for WHIM only
for (int i = 0; i < numAtoms; i++) {
Si[i] += accum[i] + 7.0;
}
return Si;
}