// // Copyright (c) 2016, Guillaume GODIN. // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following // disclaimer in the documentation and/or other materials provided // with the distribution. // * Neither the name of Institue of Cancer Research. // nor the names of its contributors may be used to endorse or promote // products derived from this software without specific prior written // permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Guillaume GODIN access the AutoCorrelation 2D descriptors names in Dragon TDB #include #include "AUTOCORR2D.h" #include "MolData3Ddescriptors.h" #include #include namespace RDKit { namespace Descriptors { namespace { MolData3Ddescriptors moldata3D; // this is the Broto-Moreau 2D descriptors (centered or not) void get2DautocorrelationDesc(const double* dist, unsigned int numAtoms, const ROMol& mol, std::vector& res) { std::vector wp = moldata3D.GetRelativePol(mol); std::vector wm = moldata3D.GetRelativeMW(mol); std::vector wv = moldata3D.GetRelativeVdW(mol); std::vector wi = moldata3D.GetRelativeIonPol(mol); std::vector we = moldata3D.GetRelativeENeg(mol); std::vector ws = moldata3D.GetIState(mol); std::vector w(6 * numAtoms, 0.0); std::vector wmean(6, 0.0); for (unsigned int i = 0; i < numAtoms; i++) { w[0 * numAtoms + i] = wm[i]; w[1 * numAtoms + i] = wv[i]; w[2 * numAtoms + i] = we[i]; w[3 * numAtoms + i] = wp[i]; w[4 * numAtoms + i] = wi[i]; w[5 * numAtoms + i] = ws[i]; } for (unsigned int i = 0; i < numAtoms; i++) { for (unsigned int t = 0; t < 6; ++t) { wmean[t] += w[t * numAtoms + i] / (double)numAtoms; } } std::vector squaresumdiff(6,0.0); for (unsigned int i = 0; i < numAtoms; i++) { for (unsigned int t = 0; t < 6; ++t) { squaresumdiff[t] += ( w[t * numAtoms + i]-wmean[t]) * ( w[t * numAtoms + i]-wmean[t]); } } std::vector TDBmat(48, 0.0); std::vector TDBmatC(48, 0.0); std::vector TDBmatM(48, 0.0); std::vector TDBmatG(48, 0.0); for (unsigned int k = 0; k < 8; k++) { int maxkVertexPairs = 0; for (unsigned int i = 0; i < numAtoms; ++i) { for (unsigned int j = i + 1; j < numAtoms; ++j) { if (dist[j * numAtoms + i] == k + 1) { for (unsigned int t = 0; t < 6; ++t) { TDBmatM[t * 8 + k] += (w[t * numAtoms + i]-wmean[t]) * (w[t * numAtoms + j]-wmean[t]); // ATSC TDBmatG[t * 8 + k] += (w[t * numAtoms + i] - w[t * numAtoms + j]) * (w[t * numAtoms + i] - w[t * numAtoms + j]); TDBmat[t * 8 + k] += w[t * numAtoms + i] * w[t * numAtoms + j]; TDBmatC[t * 8 + k] += fabs(w[t * numAtoms + i] - wmean[t]) * fabs(w[t * numAtoms + j] - wmean[t]); } maxkVertexPairs += 1; } } } for (unsigned int t = 0; t < 6; ++t) { if (maxkVertexPairs > 0) { TDBmat[t * 8 + k] = log1p(TDBmat[t * 8 + k]); TDBmatG[t * 8 + k] = TDBmatG[t * 8 + k]/ squaresumdiff[t] / maxkVertexPairs * (numAtoms-1) / 2.0; TDBmatM[t * 8 + k] = TDBmatM[t * 8 + k]/ squaresumdiff[t] / maxkVertexPairs * numAtoms ; } else { TDBmat[t * 8 + k] = 0.0; TDBmatC[t * 8 + k] = 0.0; TDBmatM[t * 8 + k] = 0.0; TDBmatG[t * 8 + k] = 0.0; } } } // update the Output vector! for (unsigned int t = 0; t < 6; ++t) { for (unsigned int k = 0; k < 8; ++k) { res[t * 8 + k] = std::round(1000 * TDBmat[k + t * 8]) / 1000; res[t * 8 + k + 48] = std::round(1000 * TDBmatC[k + t * 8]) / 1000; res[t * 8 + k + 96] = std::round(1000 * TDBmatM[k + t * 8]) / 1000; res[t * 8 + k + 144] = std::round(1000 * TDBmatG[k + t * 8]) / 1000; } } TDBmat.clear(); TDBmatC.clear(); TDBmatM.clear(); TDBmatG.clear(); wp.clear(); wm.clear(); wv.clear(); wi.clear(); we.clear(); ws.clear(); w.clear(); wmean.clear(); } // this is the Broto-Moreau 2D descriptors (centered or not) void get2DautocorrelationDescCustom(const double* dist, unsigned int numAtoms, const ROMol& mol, std::vector& res, const std::string &customAtomPropName) { std::vector wc = moldata3D.GetCustomAtomProp(mol,customAtomPropName); std::vector w(numAtoms, 0.0); double wmean=0.0; for (unsigned int i = 0; i < numAtoms; i++) { wmean += wc[ + i] / (double)numAtoms; } double squaresumdiff= 0.0; for (unsigned int i = 0; i < numAtoms; i++) { squaresumdiff += (wc[i]-wmean) * (wc[i]-wmean); } std::vector TDBmat(8, 0.0); std::vector TDBmatC(8, 0.0); std::vector TDBmatM(8, 0.0); std::vector TDBmatG(8, 0.0); for (unsigned int k = 0; k < 8; k++) { int maxkVertexPairs = 0; for (unsigned int i = 0; i < numAtoms; ++i) { for (unsigned int j = i + 1; j < numAtoms; ++j) { if (dist[j * numAtoms + i] == k + 1) { TDBmatM[k] += (wc[ i]-wmean) * (wc[ j]-wmean); TDBmatG[k] += (wc[ i] - wc[ j]) * (wc[ i] - w[ j]); TDBmat[k] += wc[ i] * wc[ j]; TDBmatC[k] += fabs(wc[ i] - wmean) * fabs(wc[ j] - wmean); maxkVertexPairs += 1; } } } for (unsigned int t = 0; t < 1; ++t) { if (maxkVertexPairs > 0) { TDBmat[k] = log1p(TDBmat[k]); TDBmatG[k] = TDBmatG[k]/ squaresumdiff / maxkVertexPairs * (numAtoms-1) / 2.0; TDBmatM[k] = TDBmatM[k]/ squaresumdiff / maxkVertexPairs * numAtoms ; } else { TDBmat[k] = 0.0; TDBmatC[k] = 0.0; TDBmatM[k] = 0.0; TDBmatG[k] = 0.0; } } } // update the Output vector! for (unsigned int k = 0; k < 8; ++k) { res[k] = std::round(1000 * TDBmat[k]) / 1000; res[k + 8] = std::round(1000 * TDBmatC[k]) / 1000; res[k + 16] = std::round(1000 * TDBmatM[k]) / 1000; res[k + 24] = std::round(1000 * TDBmatG[k]) / 1000; } TDBmat.clear(); TDBmatC.clear(); TDBmatM.clear(); TDBmatG.clear(); wc.clear(); } void Get2Dauto(const double* dist, unsigned int numAtoms, const ROMol& mol, std::vector& res) { get2DautocorrelationDesc(dist, numAtoms, mol, res); } void Get2Dautoone(const double* dist, unsigned int numAtoms, const ROMol& mol, std::vector& res, const std::string &customAtomPropName) { get2DautocorrelationDescCustom(dist, numAtoms, mol, res, customAtomPropName); } } // end of anonymous namespace void AUTOCORR2D(const ROMol& mol, std::vector& result, const std::string &customAtomPropName) { unsigned int numAtoms = mol.getNumAtoms(); double* dist = MolOps::getDistanceMat(mol, false); // topological matrix if (!customAtomPropName.empty()) { result.clear(); result.resize(32); Get2Dautoone(dist, numAtoms, mol, result, customAtomPropName); } else { result.clear(); result.resize(192); Get2Dauto(dist, numAtoms, mol, result); } } } // end of Descriptors namespace } // end of RDKit namespace