mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-06 22:39:55 +08:00
266 lines
9.8 KiB
C++
266 lines
9.8 KiB
C++
//
|
|
// Copyright (C) 2026 David Cosgrove and other RDKit contributors
|
|
//
|
|
// @@ All Rights Reserved @@
|
|
// This file is part of the RDKit.
|
|
// The contents are covered by the terms of the BSD license
|
|
// which is included in the file license.txt, found at the root
|
|
// of the RDKit source tree.
|
|
//
|
|
// Original author: David Cosgrove (CozChemIx Limited)
|
|
//
|
|
|
|
#ifndef RDKIT_SHAPEINPUT_GUARD
|
|
#define RDKIT_SHAPEINPUT_GUARD
|
|
|
|
#include <array>
|
|
#include <vector>
|
|
|
|
#include <RDGeneral/export.h>
|
|
#include <Geometry/Transform3D.h>
|
|
|
|
#include <RDGeneral/BoostStartInclude.h>
|
|
#include <boost/dynamic_bitset.hpp>
|
|
#ifdef RDK_USE_BOOST_SERIALIZATION
|
|
#include <boost/archive/text_oarchive.hpp>
|
|
#include <boost/archive/text_iarchive.hpp>
|
|
#include <boost/serialization/vector.hpp>
|
|
#include <boost/serialization/array.hpp>
|
|
#include <boost/serialization/unique_ptr.hpp>
|
|
#endif
|
|
#include <RDGeneral/BoostEndInclude.h>
|
|
|
|
#include <GraphMol/GaussianShape/ShapeOverlayOptions.h>
|
|
|
|
// The code below was provided by Claude (Sonnet 4.6).
|
|
// If first tried to get me to use boost/serialization/dynamic_bitset.hpp
|
|
// and then admitted that it had made that up.
|
|
namespace boost {
|
|
namespace serialization {
|
|
|
|
template <class Archive, typename Block, typename Allocator>
|
|
void serialize(Archive &ar, boost::dynamic_bitset<Block, Allocator> &bs,
|
|
const unsigned int /*version*/) {
|
|
size_t num_bits = bs.size();
|
|
ar & num_bits;
|
|
|
|
std::vector<Block> blocks;
|
|
|
|
if (Archive::is_saving::value) {
|
|
to_block_range(bs, std::back_inserter(blocks));
|
|
}
|
|
|
|
ar & blocks;
|
|
|
|
if (Archive::is_loading::value) {
|
|
bs.resize(num_bits);
|
|
from_block_range(blocks.begin(), blocks.end(), bs);
|
|
bs.resize(num_bits); // trim any excess bits
|
|
}
|
|
}
|
|
|
|
} // namespace serialization
|
|
} // namespace boost
|
|
|
|
namespace RDKit {
|
|
class ROMol;
|
|
class RWMol;
|
|
namespace GaussianShape {
|
|
|
|
// From Grant et al.
|
|
constexpr double P = 2.7;
|
|
constexpr double KAPPA = 2.41798793102;
|
|
using CustomFeatures =
|
|
std::vector<std::tuple<unsigned int, RDGeom::Point3D, double>>;
|
|
|
|
struct ShapeInputOptions {
|
|
ShapeInputOptions() = default;
|
|
ShapeInputOptions(const ShapeInputOptions &) = default;
|
|
ShapeInputOptions(ShapeInputOptions &&) = default;
|
|
ShapeInputOptions &operator=(const ShapeInputOptions &) = default;
|
|
ShapeInputOptions &operator=(ShapeInputOptions &&) = default;
|
|
|
|
~ShapeInputOptions() = default;
|
|
|
|
// By default, it will create features using the RDKit pharmacophore
|
|
// definitions.
|
|
bool useColors{
|
|
true}; //! Whether to build the color features. By default, it will
|
|
//! create features using the RDKit pharmacophore definitions.
|
|
|
|
CustomFeatures customFeatures; //! Custom color features used verbatim. A
|
|
//! vector of tuples of integer type, Point3D
|
|
//! coords, double radius.
|
|
std::vector<unsigned int>
|
|
atomSubset; //! If not empty, use just these atoms in the molecule to
|
|
//! form the ShapeInput object.
|
|
std::vector<std::pair<unsigned int, double>>
|
|
atomRadii; //! Use these non-standard radii for these atoms. The int is
|
|
//! for the atom index in the molecule, not the atomic number.
|
|
//! Not all atoms need be specified, just some radii can be
|
|
//! over-ridden, with the rest left as standard.
|
|
bool allCarbonRadii{
|
|
true}; //! Whether to use carbon radii for all atoms (which is quicker
|
|
//! but less accurate) or vdw radii appropriate for the elements.
|
|
};
|
|
|
|
// Data for shape alignment code
|
|
class RDKIT_GAUSSIANSHAPE_EXPORT ShapeInput {
|
|
public:
|
|
//! Create the ShapeInput object.
|
|
//! @param mol: The molecule of interest
|
|
//! @param confId: The conformer to use
|
|
//! @param opts: Options for setting up the shape
|
|
ShapeInput(const ROMol &mol, int confId = -1,
|
|
const ShapeInputOptions &opts = ShapeInputOptions(),
|
|
const ShapeOverlayOptions &overlayOpts = ShapeOverlayOptions());
|
|
ShapeInput(const std::string &str) {
|
|
#ifndef RDK_USE_BOOST_SERIALIZATION
|
|
PRECONDITION(0, "Boost SERIALIZATION is not enabled")
|
|
#else
|
|
std::stringstream ss(str);
|
|
boost::archive::text_iarchive ia(ss);
|
|
ia &*this;
|
|
#endif
|
|
}
|
|
ShapeInput(const ShapeInput &other);
|
|
ShapeInput(ShapeInput &&other) = default;
|
|
ShapeInput &operator=(const ShapeInput &other);
|
|
ShapeInput &operator=(ShapeInput &&other) = default;
|
|
virtual ~ShapeInput() = default;
|
|
|
|
std::string toString() const {
|
|
#ifndef RDK_USE_BOOST_SERIALIZATION
|
|
PRECONDITION(0, "Boost SERIALIZATION is not enabled")
|
|
#else
|
|
std::stringstream ss;
|
|
boost::archive::text_oarchive oa(ss);
|
|
oa &*this;
|
|
return ss.str();
|
|
#endif
|
|
}
|
|
|
|
// Note that the coords returned is a vector size 4*getNumAtoms()
|
|
// with the 4th value per atom being the alpha paramter.
|
|
const std::vector<double> &getCoords() const { return d_coords; }
|
|
//! Fetch the coordinates of the atoms and optionally features.
|
|
std::vector<RDGeom::Point3D> getAtomPoints(bool includeColors = false) const;
|
|
bool getNormalized() const { return d_normalized; }
|
|
const std::vector<int> &getTypes() const { return d_types; }
|
|
unsigned int getNumAtoms() const { return d_numAtoms; }
|
|
unsigned int getNumFeatures() const { return d_numFeats; }
|
|
double getShapeVolume() const { return d_selfOverlapVol; }
|
|
double getColorVolume() const { return d_selfOverlapColor; }
|
|
const boost::dynamic_bitset<> *getCarbonRadii() const {
|
|
return d_carbonRadii.get();
|
|
}
|
|
// These functions use cached values if available.
|
|
const std::array<double, 9> &calcCanonicalRotation();
|
|
const std::array<double, 3> &calcCanonicalTranslation();
|
|
const std::array<double, 3> &calcEigenValues();
|
|
const std::array<size_t, 6> &calcExtremes();
|
|
// Return the principal moments of inertia, if Eigen3 is available, and the
|
|
// eigenvalues of the canonical transformation if not.
|
|
std::array<double, 3> calcMomentsOfInertia(bool includeColors = false) const;
|
|
|
|
// Align the principal axes to the cartesian axes and centre on the origin.
|
|
// Doesn't require that the shape was created from a molecule. Creates
|
|
// the necessary transformation if not already done.
|
|
void normalizeCoords();
|
|
|
|
void transformCoords(RDGeom::Transform3D &xform);
|
|
|
|
// Mock a molecule up from the shape for visual inspection and sometimes
|
|
// calculation of the normalization matrices. No bonds.
|
|
// Atoms are C, features are N.
|
|
virtual std::unique_ptr<RWMol> shapeToMol(bool includeColors = true) const;
|
|
|
|
#ifdef RDK_USE_BOOST_SERIALIZATION
|
|
template <class Archive>
|
|
void serialize(Archive &ar, const unsigned int) {
|
|
ar & d_coords;
|
|
ar & d_types;
|
|
ar & d_numAtoms;
|
|
ar & d_numFeats;
|
|
ar & d_selfOverlapVol;
|
|
ar & d_selfOverlapColor;
|
|
ar & d_extremePoints;
|
|
ar & d_carbonRadii;
|
|
ar & d_normalized;
|
|
ar & d_normalizationOK;
|
|
ar & d_canonRot;
|
|
ar & d_canonTrans;
|
|
ar & d_eigenValues;
|
|
}
|
|
#endif
|
|
|
|
private:
|
|
void extractAtoms(const ROMol &mol, int confId,
|
|
const ShapeInputOptions &opts);
|
|
// Extract the features for the color scores, using RDKit pphore features
|
|
// for now. Other options to be added later.
|
|
void extractFeatures(const ROMol &mol, int confId,
|
|
const ShapeInputOptions &shapeOpts);
|
|
// Calculate the rotation and translation that will align the principal axes
|
|
// to the cartesian axes and centre on the origin.
|
|
void calcNormalization();
|
|
|
|
void calculateExtremes();
|
|
|
|
std::vector<double> d_coords; // The coordinates and alpha parameter for the
|
|
// atoms and features, packed as 4 floats per
|
|
// item - x, y, z and alpha. alpha is KAPPA / (r * r) where r is the radius
|
|
// of the atom. This is not used if using all_atoms_carbon mode.
|
|
std::vector<int> d_types; // The feature types. The size is the same
|
|
// as the number of coordinates, padded with 0
|
|
// for the atoms.
|
|
unsigned int d_numAtoms; // The number of atoms
|
|
unsigned int d_numFeats; // The number of features
|
|
double d_selfOverlapVol{0.0}; // Shape volume
|
|
double d_selfOverlapColor{0.0}; // Color volume
|
|
// These are the points at the extremes of the x, y and z axes.
|
|
// they are min_x, min_y, min_z and max_x, max_y, max_z.
|
|
std::array<size_t, 6> d_extremePoints;
|
|
std::unique_ptr<boost::dynamic_bitset<>>
|
|
d_carbonRadii; // Flags those atoms with a carbon radius, for faster
|
|
// calculation later.
|
|
|
|
// This is the rotation and translation to align the principal axes of the
|
|
// shape with cartesian axes. If d_normalized is true, it has been applied
|
|
// to the coordinates.
|
|
bool d_normalized{false};
|
|
// If the shape is moved, the normalization matrices are no longer valid.
|
|
// This flags that so it is re-computed as required.
|
|
bool d_normalizationOK{false};
|
|
|
|
std::array<double, 9> d_canonRot;
|
|
std::array<double, 3> d_canonTrans;
|
|
// The sorted eigenvalues of the principal axes.
|
|
std::array<double, 3> d_eigenValues;
|
|
};
|
|
|
|
// Calculate the mean position of the given atoms.
|
|
RDKIT_GAUSSIANSHAPE_EXPORT RDGeom::Point3D computeFeaturePos(
|
|
const ROMol &mol, int confId, const std::vector<unsigned int> &ats);
|
|
|
|
RDKIT_GAUSSIANSHAPE_EXPORT RDGeom::Transform3D quatTransToTransform(
|
|
const double *quat, const double *trans);
|
|
|
|
// Apply the transformation to the coordinates assumed to be in
|
|
// ShapeInput.d_coords form.
|
|
RDKIT_GAUSSIANSHAPE_EXPORT void applyTransformToShape(
|
|
std::vector<double> &shape, RDGeom::Transform3D &xform);
|
|
RDKIT_GAUSSIANSHAPE_EXPORT void applyTransformToShape(
|
|
const double *inShape, double *outShape, size_t numPoints,
|
|
RDGeom::Transform3D &xform);
|
|
RDKIT_GAUSSIANSHAPE_EXPORT void translateShape(
|
|
std::vector<double> &shape, const RDGeom::Point3D &translation);
|
|
RDKIT_GAUSSIANSHAPE_EXPORT void translateShape(
|
|
const double *inShape, double *outShape, size_t numPoints,
|
|
const RDGeom::Point3D &translation);
|
|
|
|
} // namespace GaussianShape
|
|
} // namespace RDKit
|
|
|
|
#endif // RDKIT_SHAPEINPUT_GUARD
|