mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-04 21:54:27 +08:00
* First import of GaussianShape. * Tidying. * Custom features. * Optimise. * Optimise. * Return 3 scores rather than 2 including combo score. * Rename useFeatures to useColors. * Python wrappers. * Python tests. * Take out big test. * Add new start mode, as PubChem does it. * Doh! * Fix MolTransforms eigenvalue return. * Two cycle optimisation, mostly working. * Take out bestSoFar score from SCA. * Take out DTYPE. * Tidy out redundant variables. * Optimisation in 2 parts. * More fiddling in pursuit of speed. * Update Python wrapper. * Tweak. * Atom subsets and different radii. * Fix test. * Revert pubchem_shape's test.cpp. * Serialize ShapeInput. * Trigger build * Remove pointers to std::arrays in ShapeInput. * ShapeInput virtual d'tor. * Precondition - ShapeInput needs a molecule with at least 1 conformer. * Rename ShapeInput::d_centroid to ShapeInput::d_canonTrans. * Fix normalization bugs. * Select start mode using moments of inertia rather than eigenvalues of canonical transformation. * Include color features in moments of inertia. * Smidge faster. * Tversky similarity. * Tidy tests. * Tests working on Linux. * Revert force of right handed axes in MolTransforms::computePrincipalAxesAndMomentsFromGyrationMatrix replacing with a comment in the code. * Response to review. * Sneaky allCarbon bug. * add multithreaded test * Response to review. * Doh! Don't recalculate normalization after every transformation. * Re-instate d_normalizationOK. * Re-name functions for fetching canonical transformations. * Separate alpha from coords. * MultiConf works with single conf extraction. * Extract all conformations. Max and best similarities. * Renames d_currConformer to d_activeShape. * Update shapeToMol. * Update shapeToMol. * Changes from synthon shape searching. * Fix normalization of multiple confs. * Update Python wrappers. * Fix shape merge. * Improve bestSimilarity. * Fix python wrapper. * Pull in changes from SynthonShapeSearch: make pruneShapes public. function to negate Alpha values. * clang-tidy suggestions. * clang-tidy suggestions. * Bug in quaternion gradients - we now have only 3 coordinates. * Tidy tests. * Mac result slightly different. * Multi conformer molecule alignment. * Optionally return raw overlap volumes in score functions. * Python wrappers for raw overlap volumes. * Update Python wrapper ShapeInputOptions. * Tidy for PR. * Extra include file. * Extra library * Tidy forward declarations. * Don't prune if threshold < 0.0. * Windows exporty thing. * Check SMILES on merge of ShapeInputs. * PRECONDITION of SMILES on merge of ShapeInputs. * Response to review - rename some functions. * change how overlapVols is passed add a test for it * API suggestions * Response to review. * Remove debugging writes. * Fix Python wrappers. --------- Co-authored-by: David Cosgrove <david@cozchemix.co.uk> Co-authored-by: greg landrum <greg.landrum@gmail.com>
119 lines
4.9 KiB
C++
119 lines
4.9 KiB
C++
//
|
|
// Copyright (C) 2026 David Cosgrove and other RDKit contributors
|
|
//
|
|
// @@ All Rights Reserved @@
|
|
// This file is part of the RDKit.
|
|
// The contents are covered by the terms of the BSD license
|
|
// which is included in the file license.txt, found at the root
|
|
// of the RDKit source tree.
|
|
//
|
|
// Original author: David Cosgrove (CozChemIx Limited)
|
|
//
|
|
// Options for the Roshambo2-based shape overlay.
|
|
|
|
#ifndef RDKIT_SHAPEOVERLAYOPTIONS_GUARD
|
|
#define RDKIT_SHAPEOVERLAYOPTIONS_GUARD
|
|
|
|
#include <RDGeneral/export.h>
|
|
|
|
namespace RDKit {
|
|
namespace GaussianShape {
|
|
|
|
enum class RDKIT_GAUSSIANSHAPE_EXPORT StartMode {
|
|
ROTATE_0, //! No rotation, just normalization if requested
|
|
ROTATE_180, //! ROTATE_0 plus rotate by 180 degrees about each of x, y, z
|
|
ROTATE_180_WIGGLE, //! ROTATE_180 plus, as the PubChem code does, rotate by a
|
|
//! small amount (~25 degrees) about each axis and use the
|
|
//! highest scoring of those as the start point for that
|
|
//! rotation angle
|
|
ROTATE_45, //! ROTATE_180 plus rotate by 45 degrees about pairs of each of x,
|
|
//! y, z
|
|
ROTATE_0_FRAGMENT, //! No rotation, translate probe to each end of ref
|
|
ROTATE_180_FRAGMENT, //! Translate probe to each end of ref and then
|
|
//! ROTATE_180
|
|
ROTATE_45_FRAGMENT, //! Translate probe to each end of ref and then ROTATE_90
|
|
A_LA_PUBCHEM, //! Uses the eigenvalues of the principal vectors to decide
|
|
//! whether to do ROTATE_180_WIGGLE or ROTATE_45
|
|
};
|
|
|
|
enum class RDKIT_GAUSSIANSHAPE_EXPORT OptimMode {
|
|
SHAPE_ONLY, //! Drive the optimisation by shape overlap only.
|
|
SHAPE_PLUS_COLOR_SCORE, //! Drive the optimisation by shape, but include
|
|
//! color in the score to determine the best
|
|
//! solution. Color never used in the optimisation
|
|
//! stage.
|
|
SHAPE_PLUS_COLOR, //! Drive the optimisation by overlap of shape and color
|
|
//! features.
|
|
};
|
|
|
|
struct RDKIT_GAUSSIANSHAPE_EXPORT ShapeOverlayOptions {
|
|
//! Different modes for starting the optimisation. Default is as used by the
|
|
//! PubChem code. The molecules are normalized so the principal axes are
|
|
//! along the cartesian axes rather than the shape quadrupole axes as Grant et
|
|
//! al. did.
|
|
StartMode startMode{StartMode::A_LA_PUBCHEM};
|
|
OptimMode optimMode{
|
|
OptimMode::SHAPE_PLUS_COLOR_SCORE}; //! Optimisation mode.
|
|
double simAlpha{
|
|
1.0}; //! When doing a Tversky similarity, the alpha value. If alpha and
|
|
//! beta are both the default 1.0, it's a Tanimoto similarity. A
|
|
//! high alpha and low beta emphasize the fit volume in the
|
|
//! similarity and vice versa. Tversky is O / (A * (R - O) + B * (F
|
|
//! - O) + O) where O is the overlap volume, R is the reference's
|
|
//! volume and F is the fit's volume. This is different from that
|
|
//! used by OpenEye (O / (A * R + B * F)).
|
|
double simBeta{1.0}; //! When doing a Tversky similarity, the beta value.
|
|
double optParam{0.5}; //! If using colors, the relative weights of shape and
|
|
//! color scores.
|
|
int nSteps{100}; //! Maximum number of steps for optimiser to take.
|
|
|
|
bool normalize{
|
|
true}; //! Whether to normalise the shapes by putting them into their
|
|
//! canonical conformations (centred at the origin, aligned along
|
|
//! its principal axes) before starting.
|
|
bool useDistCutoff{
|
|
true}; //! Whether to use a distance cutoff for the volume calculation.
|
|
double distCutoff{4.5}; //! The distance cutoff. If 2 atoms are more than
|
|
//! this distance apart, they are not included in the
|
|
//! volume calculation. A smaller value is faster but
|
|
//! less precise.
|
|
double shapeConvergenceCriterion{
|
|
0.001}; //! Optimisation stops when the shape score changes by less
|
|
//! than this amount. A larger number is faster but less
|
|
//! precise.
|
|
};
|
|
|
|
inline std::ostream &operator<<(std::ostream &os, const StartMode &sm) {
|
|
switch (sm) {
|
|
case StartMode::ROTATE_0:
|
|
os << "ROTATE_0";
|
|
break;
|
|
case StartMode::ROTATE_0_FRAGMENT:
|
|
os << "ROTATE_0_FRAGMENT";
|
|
break;
|
|
case StartMode::ROTATE_180:
|
|
os << "ROTATE_180";
|
|
break;
|
|
case StartMode::ROTATE_180_FRAGMENT:
|
|
os << "ROTATE_180_FRAGMENT";
|
|
break;
|
|
case StartMode::ROTATE_180_WIGGLE:
|
|
os << "ROTATE_180_WIGGLE";
|
|
break;
|
|
case StartMode::ROTATE_45:
|
|
os << "ROTATE_45";
|
|
break;
|
|
case StartMode::ROTATE_45_FRAGMENT:
|
|
os << "ROTATE_45_FRAGMENT";
|
|
break;
|
|
case StartMode::A_LA_PUBCHEM:
|
|
os << "A_LA_PUBCHEM";
|
|
break;
|
|
}
|
|
return os;
|
|
}
|
|
} // namespace GaussianShape
|
|
} // namespace RDKit
|
|
|
|
#endif // RDKIT_SHAPEOVERLAYOPTIONS_GUARD
|