// // Copyright (C) 2026 David Cosgrove and other RDKit contributors // // @@ All Rights Reserved @@ // This file is part of the RDKit. // The contents are covered by the terms of the BSD license // which is included in the file license.txt, found at the root // of the RDKit source tree. // // Original author: David Cosgrove (CozChemIx Limited) // #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef RDK_HAS_EIGEN3 #include #endif namespace RDKit { namespace GaussianShape { // Bondi radii // You can find more of these in Table 12 of this publication: // https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3658832/ const std::map vdw_radii = { {0, DUMMY_RAD}, // Dummy, same as Xe. {1, 1.10}, // H {2, 1.40}, // He {3, 1.81}, // Li {4, 1.53}, // Be {5, 1.92}, // B {6, CARBON_RAD}, // C {7, 1.55}, // N {8, 1.52}, // O {9, 1.47}, // F {10, 1.54}, // Ne {11, 2.27}, // Na {12, 1.73}, // Mg {13, 1.84}, // Al {14, 2.10}, // Si {15, 1.80}, // P {16, 1.80}, // S {17, 1.75}, // Cl {18, 1.88}, // Ar {19, 2.75}, // K {20, 2.31}, // Ca {31, 1.87}, // Ga {32, 2.11}, // Ge {33, 1.85}, // As {34, 1.90}, // Se {35, 1.83}, // Br {36, 2.02}, // Kr {37, 3.03}, // Rb {38, 2.49}, // Sr {49, 1.93}, // In {50, 2.17}, // Sn {51, 2.06}, // Sb {52, 2.06}, // Te {53, 1.98}, // I {54, 2.16}, // Xe {55, 3.43}, // Cs {56, 2.68}, // Ba {81, 1.96}, // Tl {82, 2.02}, // Pb {83, 2.07}, // Bi {84, 1.97}, // Po {85, 2.02}, // At {86, 2.20}, // Rn {87, 3.48}, // Fr {88, 2.83}, // Ra }; constexpr double radius_color = 1.08265; // same radius for all feature/color "atoms", as used by the // PubChem code. namespace { // Throw out any atoms we don't want. copySubsetMol does more work than // is necessary for this and seemed to leave the molecule in an odd state // that didn't play well with extractAtoms. std::unique_ptr extractSubset( const RWMol &mol, const std::vector &atomsToKeep) { auto retMol = std::make_unique(mol); boost::dynamic_bitset<> keepAtoms(mol.getNumAtoms()); for (const auto atk : atomsToKeep) { keepAtoms[atk] = true; } retMol->beginBatchEdit(); for (const auto atom : retMol->atoms()) { if (!keepAtoms[atom->getIdx()]) { retMol->removeAtom(atom->getIdx()); } } retMol->commitBatchEdit(); // Fix the aromatic flags on anything that is no longer in a ring. MolOps::findSSSR(*retMol); const auto rings = retMol->getRingInfo(); for (const auto a : retMol->atoms()) { if (a->getIsAromatic() && !rings->numAtomRings(a->getIdx())) { a->setIsAromatic(false); } } return retMol; } } // namespace ShapeInput::ShapeInput(const ROMol &mol, const int confId, const ShapeInputOptions &opts, const ShapeOverlayOptions &overlayOpts) { PRECONDITION(mol.getNumConformers() > 0, "ShapeInput object needs the molecule to have conformers. " + mol.getProp("_Name") + " " + MolToSmiles(mol)); // std::cout << "making shape from " << MolToSmiles(mol) << " with " // << mol.getNumConformers() << " confs" << std::endl; std::unique_ptr tmpMol; // Subsetting the molecule makes any bespoke atom radii, identified // by atom index, incorrect so stash them as atom properties. for (const auto &[idx, radius] : opts.atomRadii) { const auto atom = mol.getAtomWithIdx(idx); atom->setProp("BespokeRadius", radius); } if (!opts.atomSubset.empty()) { tmpMol = extractSubset(mol, opts.atomSubset); } else { tmpMol.reset(new RWMol(mol)); } d_smiles = MolToSmiles(*tmpMol); // std::cout << "shape smiles : " << d_smiles << std::endl; std::vector atOrder; tmpMol->getProp(common_properties::_smilesAtomOutputOrder, atOrder); tmpMol.reset(dynamic_cast(MolOps::renumberAtoms(*tmpMol, atOrder))); if (!tmpMol->getRingInfo()->isInitialized()) { // Query molecules don't seem to have the ring info generated on creation. MolOps::findSSSR(*tmpMol); } d_normalizeds = boost::dynamic_bitset<>(tmpMol->getNumConformers()); d_normalizationOKs = boost::dynamic_bitset<>(tmpMol->getNumConformers()); // Molecules don't necessarily have consecutive conformer IDs from 0 // upwards. confNum is just the sequential number of the iterated // conformers. auto processConf = [&](const Conformer &conf, const unsigned int confNum, const bool fa) { extractAtoms(conf, opts, fa); if (opts.useColors) { extractFeatures(conf, confNum, opts, fa); } }; if (confId >= 0) { processConf(tmpMol->getConformer(confId), 0, true); } else { unsigned int i = 0; for (auto cfi = tmpMol->beginConformers(); cfi != tmpMol->endConformers(); ++cfi, ++i) { processConf(*(*cfi), i, i == 0); } } d_activeShape = 0; calcNormalization(); calcExtremes(); calculateSelfOverlaps(overlayOpts); if (opts.shapePruneThreshold > 0.0 && tmpMol->getNumConformers() > 1) { pruneShapes(opts.shapePruneThreshold); } if (opts.sortShapes) { sortShapesByVolumes(); } d_activeShape = 0; } ShapeInput::ShapeInput(const ShapeInput &other, const unsigned int shapeNum) { PRECONDITION( shapeNum < other.getNumShapes(), "Invalid shape number in makeSingleShape : " + std::to_string(shapeNum) + " vs " + std::to_string(other.getNumShapes()) + "."); d_activeShape = 0; d_coords.emplace_back(other.d_coords[shapeNum]); d_alphas = other.d_alphas; d_types = other.d_types; d_numAtoms = other.d_numAtoms; d_numFeats = other.d_numFeats; d_selfOverlapShapeVols.emplace_back(other.d_selfOverlapShapeVols[shapeNum]); d_selfOverlapColorVols.emplace_back(other.d_selfOverlapColorVols[shapeNum]); d_extremePointss.emplace_back(other.d_extremePointss[shapeNum]); if (other.d_carbonRadii) { d_carbonRadii.reset(new boost::dynamic_bitset<>(*other.d_carbonRadii)); } d_smiles = other.d_smiles; d_normalizeds = boost::dynamic_bitset<>(1); d_normalizeds[0] = other.d_normalizeds[shapeNum]; d_normalizationOKs = boost::dynamic_bitset<>(1); d_normalizationOKs[0] = other.d_normalizationOKs[shapeNum]; d_canonRots.emplace_back(other.d_canonRots[shapeNum]); d_canonTranss.emplace_back(other.d_canonTranss[shapeNum]); d_eigenValuess.emplace_back(other.d_eigenValuess[shapeNum]); } ShapeInput::ShapeInput(const ShapeInput &other) : d_activeShape(other.d_activeShape), d_coords(other.d_coords), d_alphas(other.d_alphas), d_types(other.d_types), d_numAtoms(other.d_numAtoms), d_numFeats(other.d_numFeats), d_selfOverlapShapeVols(other.d_selfOverlapShapeVols), d_selfOverlapColorVols(other.d_selfOverlapColorVols), d_extremePointss(other.d_extremePointss), d_smiles(other.d_smiles), d_normalizeds(other.d_normalizeds), d_normalizationOKs(other.d_normalizationOKs), d_canonRots(other.d_canonRots), d_canonTranss(other.d_canonTranss), d_eigenValuess(other.d_eigenValuess) { if (other.d_carbonRadii) { d_carbonRadii.reset(new boost::dynamic_bitset<>(*other.d_carbonRadii)); } } ShapeInput &ShapeInput::operator=(const ShapeInput &other) { if (this == &other) { return *this; } d_activeShape = other.d_activeShape; d_coords = other.d_coords; d_alphas = other.d_alphas; d_types = other.d_types; d_numAtoms = other.d_numAtoms; d_numFeats = other.d_numFeats; d_selfOverlapShapeVols = other.d_selfOverlapShapeVols; d_selfOverlapColorVols = other.d_selfOverlapColorVols; d_extremePointss = other.d_extremePointss; d_smiles = other.d_smiles; d_normalizeds = other.d_normalizeds; d_normalizationOKs = other.d_normalizationOKs; d_canonRots = other.d_canonRots; d_canonTranss = other.d_canonTranss; d_eigenValuess = other.d_eigenValuess; if (other.d_carbonRadii) { d_carbonRadii.reset(new boost::dynamic_bitset<>(*other.d_carbonRadii)); } else { d_carbonRadii.reset(); } return *this; } void ShapeInput::merge(ShapeInput &other) { PRECONDITION(d_smiles == other.d_smiles, "Shapes have different SMILES strings."); if (!d_coords.empty() && d_coords.front().size() != other.d_coords.front().size()) { BOOST_LOG(rdWarningLog) << "Can't merge shapes as different sizes.\n"; return; } if (other.d_coords.empty()) { return; } d_coords.reserve(d_coords.size() + other.d_coords.size()); d_coords.insert(d_coords.end(), std::make_move_iterator(other.d_coords.begin()), std::make_move_iterator(other.d_coords.end())); d_selfOverlapShapeVols.reserve(d_selfOverlapShapeVols.size() + other.d_selfOverlapShapeVols.size()); d_selfOverlapShapeVols.insert( d_selfOverlapShapeVols.end(), std::make_move_iterator(other.d_selfOverlapShapeVols.begin()), std::make_move_iterator(other.d_selfOverlapShapeVols.end())); d_selfOverlapColorVols.reserve(d_selfOverlapColorVols.size() + other.d_selfOverlapColorVols.size()); d_selfOverlapColorVols.insert( d_selfOverlapColorVols.end(), std::make_move_iterator(other.d_selfOverlapColorVols.begin()), std::make_move_iterator(other.d_selfOverlapColorVols.end())); d_extremePointss.reserve(d_extremePointss.size() + other.d_extremePointss.size()); d_extremePointss.insert( d_extremePointss.end(), std::make_move_iterator(other.d_extremePointss.begin()), std::make_move_iterator(other.d_extremePointss.end())); d_canonRots.reserve(d_canonRots.size() + other.d_canonRots.size()); d_canonRots.insert(d_canonRots.end(), std::make_move_iterator(other.d_canonRots.begin()), std::make_move_iterator(other.d_canonRots.end())); d_canonTranss.reserve(d_canonTranss.size() + other.d_canonTranss.size()); d_canonTranss.insert(d_canonTranss.end(), std::make_move_iterator(other.d_canonTranss.begin()), std::make_move_iterator(other.d_canonTranss.end())); d_eigenValuess.reserve(d_eigenValuess.size() + other.d_eigenValuess.size()); d_eigenValuess.insert(d_eigenValuess.end(), std::make_move_iterator(other.d_eigenValuess.begin()), std::make_move_iterator(other.d_eigenValuess.end())); auto joinBS = [](const boost::dynamic_bitset<> &bs1, const boost::dynamic_bitset<> &bs2) -> boost::dynamic_bitset<> { boost::dynamic_bitset<> bs1Copy(bs1); boost::dynamic_bitset<> bs2Copy(bs2); const size_t totalSize = bs1.size() + bs2.size(); bs1Copy.resize(totalSize); bs2Copy.resize(totalSize); bs1Copy <<= bs2.size(); bs1Copy |= bs2Copy; return bs1Copy; }; d_normalizeds = joinBS(other.d_normalizeds, d_normalizeds); other.d_normalizeds.clear(); d_normalizationOKs = joinBS(other.d_normalizationOKs, d_normalizationOKs); other.d_normalizationOKs.clear(); other.d_coords.clear(); other.d_selfOverlapShapeVols.clear(); other.d_selfOverlapColorVols.clear(); other.d_extremePointss.clear(); other.d_canonRots.clear(); other.d_canonTranss.clear(); other.d_eigenValuess.clear(); } void ShapeInput::setActiveShape(const unsigned int newShape) { PRECONDITION(newShape < d_coords.size(), "Invalid shape number (" + std::to_string(newShape) + " vs " + std::to_string(d_coords.size()) + ")."); if (d_activeShape != newShape) { d_activeShape = newShape; } } void ShapeInput::negateAlpha(const unsigned int alphaNum) { PRECONDITION(alphaNum < d_alphas.size(), "Bad number for alpha negation"); d_alphas[alphaNum] *= -1.0; } std::vector ShapeInput::getAtomPoints( const bool includeColors) const { std::vector atomPoints; unsigned int numPoints = getNumAtoms(); if (includeColors) { numPoints += getNumFeatures(); } atomPoints.reserve(numPoints); for (unsigned int i = 0; i < 3 * numPoints; i += 3) { atomPoints.emplace_back(RDGeom::Point3D(d_coords[d_activeShape][i], d_coords[d_activeShape][i + 1], d_coords[d_activeShape][i + 2])); } return atomPoints; } double ShapeInput::getShapeVolume(const unsigned int shapeNum) const { PRECONDITION(shapeNum < d_coords.size(), "Invalid shape number (" + std::to_string(shapeNum) + " vs " + std::to_string(d_coords.size()) + ")."); return d_selfOverlapShapeVols[shapeNum]; } double ShapeInput::getColorVolume(const unsigned int shapeNum) const { PRECONDITION(shapeNum < d_coords.size(), "Invalid shape number (" + std::to_string(shapeNum) + " vs " + std::to_string(d_coords.size()) + ")."); return d_selfOverlapColorVols[shapeNum]; } const std::array &ShapeInput::calcCanonicalRotation() { if (!d_normalizationOKs[d_activeShape]) { calcNormalization(); } return d_canonRots[d_activeShape]; } const std::array &ShapeInput::calcCanonicalTranslation() { if (!d_normalizationOKs[d_activeShape]) { calcNormalization(); } return d_canonTranss[d_activeShape]; } const std::array &ShapeInput::calcEigenValues() { if (!d_normalizationOKs[d_activeShape]) { calcNormalization(); } return d_eigenValuess[d_activeShape]; } const std::array &ShapeInput::calcExtremes() { if (!d_normalizationOKs[d_activeShape]) { calculateExtremes(); } return d_extremePointss[d_activeShape]; } std::array ShapeInput::calcMomentsOfInertia( const bool includeColors) const { const auto tmpMol = shapeToMol(includeColors); std::array eVals; #if RDK_HAS_EIGEN3 Eigen::Matrix3d axes; Eigen::Vector3d moments; MolTransforms::computePrincipalAxesAndMoments(tmpMol->getConformer(), axes, moments); eVals[0] = moments[0]; eVals[1] = moments[1]; eVals[2] = moments[2]; #else std::unique_ptr canonXform( MolTransforms::computeCanonicalTransform(tmpMol->getConformer(), nullptr, false, true, eVals.data())); #endif return eVals; } void ShapeInput::normalizeCoords() { if (d_normalizeds[d_activeShape]) { return; } if (!d_normalizationOKs[d_activeShape]) { calcNormalization(); } RDGeom::Transform3D canonRot; for (unsigned int i = 0, k = 0; i < 3; ++i) { for (unsigned int j = 0; j < 3; ++j, ++k) { canonRot.setValUnchecked(i, j, d_canonRots[d_activeShape][k]); } } RDGeom::Point3D trans{d_canonTranss[d_activeShape][0], d_canonTranss[d_activeShape][1], d_canonTranss[d_activeShape][2]}; canonRot.TransformPoint(trans); canonRot.SetTranslation(trans); transformCoords(canonRot); d_normalizeds[d_activeShape] = true; // Recalculate the extremes now we've changed the coordinates. calcExtremes(); } void ShapeInput::transformCoords(RDGeom::Transform3D &xform) { applyTransformToShape(d_coords[d_activeShape], xform); d_normalizeds[d_activeShape] = false; d_normalizationOKs[d_activeShape] = false; } std::unique_ptr ShapeInput::shapeToMol(const bool includeColors, const bool withBonds) const { std::unique_ptr mol; if (withBonds) { // The SMILES string and the atom coordinates should be in the same // order. v2::SmilesParse::SmilesParserParams params; params.sanitize = false; mol = v2::SmilesParse::MolFromSmiles(d_smiles, params); } else { mol.reset(new RWMol()); for (unsigned int i = 0; i < getNumAtoms(); i++) { const auto atom = new Atom(6); mol->addAtom(atom, true, true); } } if (includeColors) { for (unsigned int i = 0; i < getNumFeatures(); i++) { const auto atom = new Atom(54); mol->addAtom(atom, true, true); } } unsigned int num = getNumAtoms(); if (includeColors) { num += getNumFeatures(); } const auto conf = new Conformer(num); const auto &shapeCds = getCoords(); for (unsigned int i = 0; i < num; i++) { auto &pos = conf->getAtomPos(i); pos.x = shapeCds[3 * i]; pos.y = shapeCds[3 * i + 1]; pos.z = shapeCds[3 * i + 2]; } mol->addConformer(conf, true); return mol; } std::array ShapeInput::bestSimilarity( const ShapeInput &fitShape, unsigned int &bestThisShape, unsigned int &bestFitShape, RDGeom::Transform3D &bestXform, const double threshold, const ShapeOverlayOptions &overlayOpts) { bestThisShape = -1; bestFitShape = -1; std::array bestSim{-1.0, -1.0, -1.0}; if (maxPossibleSimilarity(fitShape) < threshold) { return bestSim; } RDGeom::Transform3D xform; const auto currActiveShape = getActiveShape(); for (size_t i = 0; i < getNumShapes(); i++) { setActiveShape(i); for (size_t j = 0; j < fitShape.getNumShapes(); j++) { const auto maxSim = maxScore(getShapeVolume(), fitShape.getShapeVolume(j), getColorVolume(), fitShape.getColorVolume(j), overlayOpts); if (maxSim > threshold) { ShapeInput singleFitShape(fitShape, j); auto scores = AlignShape(*this, singleFitShape, &xform, overlayOpts); if (scores[0] > bestSim[0]) { bestSim = scores; bestThisShape = i; bestFitShape = j; bestXform = xform; } } // Floating point cruft means we sometimes get a similarity slightly // above 1.0. 1.0 is the maximum possible, so stop if we hit it. if (bestSim[0] > 1.0 || fabs(bestSim[0] - 1.0) < 1.0e-6) { setActiveShape(currActiveShape); return bestSim; } } } setActiveShape(currActiveShape); return bestSim; } double ShapeInput::maxPossibleSimilarity( const ShapeInput &fitShape, const ShapeOverlayOptions &overlayOpts) const { // The best score achievable is when the smaller volume is entirely inside // the larger volume. double maxSim = 0.0; for (unsigned int i = 0; i < getNumShapes(); i++) { for (unsigned int j = 0; j < fitShape.getNumShapes(); j++) { const auto sim = maxScore(fitShape.d_selfOverlapShapeVols[j], d_selfOverlapShapeVols[i], fitShape.d_selfOverlapColorVols[j], d_selfOverlapColorVols[i], overlayOpts); if (sim > maxSim) { maxSim = sim; } } } return maxSim; } void ShapeInput::pruneShapes(const double simThreshold) { if (d_coords.size() < 2 || simThreshold < 0.0) { return; } class DistFunctor { public: explicit DistFunctor(const ShapeInput &shapes) : d_shapes(shapes), d_shapesCp(shapes) {} ~DistFunctor() = default; double operator()(const unsigned int i, const unsigned int j) { d_shapes.setActiveShape(i); d_shapesCp.setActiveShape(j); const auto scores = AlignShape(d_shapes, d_shapesCp); // The picker works on distances. return 1.0 - scores[0]; } ShapeInput d_shapes; ShapeInput d_shapesCp; }; const RDPickers::LeaderPicker leaderPicker; DistFunctor distFunctor(*this); // The picker works on distances. auto picks = leaderPicker.lazyPick(distFunctor, d_coords.size(), 0, 1.0 - simThreshold); // Allow for mysterious LeaderPicker behaviour where it returns a // vector of 0s when it should only pick 1 shape. std::ranges::sort(picks); auto [first, last] = std::ranges::unique(picks); picks.erase(first, last); selectConformations(picks); d_activeShape = 0; } namespace { double getStandardAtomRadius(const unsigned int atomicNum) { // Mostly they will be carbons, so just return that without lookup. if (atomicNum == 6) { return CARBON_RAD; } if (const auto rad = vdw_radii.find(static_cast(atomicNum)); rad != vdw_radii.end()) { return rad->second; } throw ValueErrorException("No VdW radius for atom with Z=" + std::to_string(atomicNum)); } } // namespace void ShapeInput::extractAtoms(const Conformer &conf, const ShapeInputOptions &shapeOpts, const bool fillAlphas) { if (!shapeOpts.allCarbonRadii) { d_carbonRadii.reset(new boost::dynamic_bitset<>( !shapeOpts.atomSubset.empty() ? shapeOpts.atomSubset.size() : conf.getNumAtoms())); } std::vector theseCoords; theseCoords.reserve(conf.getNumAtoms() * 3); for (const auto atom : conf.getOwningMol().atoms()) { // Ignore H atoms except deuterium and tritium which are treated elsewhere // as explicit atoms, and do use dummies if requested. The latter two // are set to be ignored in the volume calculation, however. This is // just to keep the atom numberings correct. Dummy atoms can also have // isotope number 1. if (atom->getAtomicNum() != 1 || (atom->getAtomicNum() == 1 && atom->getIsotope() > 1)) { if (!shapeOpts.includeDummies && !atom->getAtomicNum()) { continue; } const auto atIdx = atom->getIdx(); auto &pos = conf.getAtomPos(atIdx); theseCoords.push_back(pos.x); theseCoords.push_back(pos.y); theseCoords.push_back(pos.z); if (fillAlphas) { if (shapeOpts.allCarbonRadii && !d_carbonRadii) { d_alphas.push_back(KAPPA / (CARBON_RAD * CARBON_RAD)); } else { double rad = 0.0; if (shapeOpts.atomRadii.empty()) { if ((shapeOpts.allCarbonRadii && atom->getAtomicNum()) || atom->getAtomicNum() == 6) { (*d_carbonRadii)[atIdx] = true; } rad = getStandardAtomRadius(atom->getAtomicNum()); } else { if (atom->hasProp("BespokeRadius")) { rad = atom->getProp("BespokeRadius"); } else { rad = getStandardAtomRadius(atom->getAtomicNum()); if ((shapeOpts.allCarbonRadii && atom->getAtomicNum()) || atom->getAtomicNum() == 6) { (*d_carbonRadii)[atIdx] = true; } } } // TODO: For now, check there hasn't been a logic error leaving a rad // of 0.0. Take it out later when it's been tried enough we can be // confident it's ok. if (rad == 0.0) { throw ValueErrorException("Atom has radius 0.0."); } d_alphas.push_back(KAPPA / (rad * rad)); } if (atom->getAtomicNum() == 1) { // So it's not included in the volume calcs. d_alphas.back() = -1.0; } } } } d_activeShape = d_coords.size(); d_coords.push_back(theseCoords); if (fillAlphas) { d_numAtoms = d_coords.front().size() / 3; d_types.resize(d_numAtoms); d_numFeats = 0; } } namespace { class ss_matcher { public: explicit ss_matcher(const std::string &pattern) : m_pattern(pattern) { m_needCopies = pattern.find_first_of("$") != std::string::npos; m_matcher = SmartsToMol(pattern); POSTCONDITION(m_matcher, "no matcher"); }; const ROMol *getMatcher() const { return m_matcher; }; ~ss_matcher() { delete m_matcher; }; private: ss_matcher() = default; std::string m_pattern; bool m_needCopies{false}; const ROMol *m_matcher{nullptr}; }; } // namespace // This came from the original PubChemShape.cpp typedef boost::flyweight, boost::flyweights::no_tracking> pattern_flyweight; // Definitions for feature points adapted from: // Gobbi and Poppinger, Biotech. Bioeng. _61_ 47-54 (1998) const std::vector> smartsPatterns = { {"[$([N;!H0;v3,v4&+1]),\ $([O,S;H1;+0]),\ n&H1&+0]"}, // Donor {"[$([O,S;H1;v2;!$(*-*=[O,N,P,S])]),\ $([O,S;H0;v2]),\ $([O,S;-]),\ $([N;v3;!$(N-*=[O,N,P,S])]),\ n&H0&+0,\ $([o,s;+0;!$([o,s]:n);!$([o,s]:c:n)])]"}, // Acceptor { "[r]1[r][r]1", "[r]1[r][r][r]1", "[r]1[r][r][r][r]1", "[r]1[r][r][r][r][r]1", "[r]1[r][r][r][r][r][r]1", }, // rings // "[a]", // // Aromatic // "[F,Cl,Br,I]", // Halogen {"[#7;+,\ $([N;H2&+0][$([C,a]);!$([C,a](=O))]),\ $([N;H1&+0]([$([C,a]);!$([C,a](=O))])[$([C,a]);!$([C,a](=O))]),\ $([N;H0&+0]([C;!$(C(=O))])([C;!$(C(=O))])[C;!$(C(=O))])]"}, // Basic {"[$([C,S](=[O,S,P])-[O;H1,-1])]"} // Acidic }; std::vector> *getPh4Patterns() { static std::unique_ptr>> patterns; if (!patterns) { patterns.reset(new std::vector>()); for (const auto &smartsV : smartsPatterns) { std::vector v; for (const auto &smarts : smartsV) { const ROMol *matcher = pattern_flyweight(smarts).get().getMatcher(); CHECK_INVARIANT(matcher, "bad smarts"); v.push_back(matcher); } patterns->push_back(std::move(v)); } } return patterns.get(); } // Extract the features for the color scores, using RDKit pphore features // for now. Other options to be added later. This should be working on // the parent molecule, but only extracting features that are due to // atoms entirely within any atom subset. void ShapeInput::extractFeatures(const Conformer &conf, unsigned int confNum, const ShapeInputOptions &shapeOpts, const bool fillAlphas) { std::vector feats; if (shapeOpts.customFeatures.empty()) { findFeatures(conf, feats); } else { PRECONDITION(confNum < shapeOpts.customFeatures.size(), "Conformer number " + std::to_string(confNum) + " too large for custom features size " + std::to_string(shapeOpts.customFeatures.size())); feats = shapeOpts.customFeatures[confNum]; } // Just copy them directly except that the last is a radius, so convert // to alpha. for (const auto &f : feats) { const auto &pos = f.pos; d_coords[d_activeShape].push_back(pos.x); d_coords[d_activeShape].push_back(pos.y); d_coords[d_activeShape].push_back(pos.z); if (fillAlphas) { const auto rad = f.rad; d_alphas.push_back(KAPPA / (rad * rad)); d_types.push_back(f.type); d_numFeats++; } } } void ShapeInput::calcNormalization() { if (d_eigenValuess.size() < d_coords.size()) { d_eigenValuess.resize(d_coords.size()); d_canonRots.resize(d_coords.size()); d_canonTranss.resize(d_coords.size()); d_eigenValuess.resize(d_coords.size()); d_extremePointss.resize(d_coords.size()); } d_eigenValuess[d_activeShape] = std::array{0.0, 0.0, 0.0}; // Build a "molecule" just of the shape points, not the color features // with which to calculate the canonical transformation. Doesn't ever // use the input molecule in case the shape was built from a subset of // atoms in that molecule. const auto tmpMol = shapeToMol(false, false); std::unique_ptr canonXform( MolTransforms::computeCanonicalTransform( tmpMol->getConformer(), nullptr, false, true, d_eigenValuess[d_activeShape].data())); d_canonRots[d_activeShape] = std::array{1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0}; for (unsigned int i = 0, k = 0; i < 3; ++i) { for (unsigned int j = 0; j < 3; ++j, ++k) { d_canonRots[d_activeShape][k] = canonXform->getValUnchecked(i, j); } } d_canonTranss[d_activeShape] = std::array{0.0, 0.0, 0.0}; for (unsigned int i = 0; i < 3 * d_numAtoms; i += 3) { d_canonTranss[d_activeShape][0] -= d_coords[d_activeShape][i]; d_canonTranss[d_activeShape][1] -= d_coords[d_activeShape][i + 1]; d_canonTranss[d_activeShape][2] -= d_coords[d_activeShape][i + 2]; } d_canonTranss[d_activeShape][0] /= d_numAtoms; d_canonTranss[d_activeShape][1] /= d_numAtoms; d_canonTranss[d_activeShape][2] /= d_numAtoms; d_normalizationOKs[d_activeShape] = true; } void ShapeInput::calculateExtremes() { if (d_extremePointss.size() < d_coords.size()) { d_extremePointss.resize(d_coords.size()); } d_extremePointss[d_activeShape] = std::array{0, 0, 0, 0, 0, 0}; const std::vector &theseCoords = d_coords[d_activeShape]; for (size_t i = 0, j = 0; i < theseCoords.size(); i += 3, ++j) { if (theseCoords[i] < theseCoords[3 * d_extremePointss[d_activeShape][0]]) { d_extremePointss[d_activeShape][0] = j; } if (theseCoords[i] > theseCoords[3 * d_extremePointss[d_activeShape][3]]) { d_extremePointss[d_activeShape][3] = j; } if (theseCoords[i + 1] < theseCoords[3 * d_extremePointss[d_activeShape][1] + 1]) { d_extremePointss[d_activeShape][1] = j; } if (theseCoords[i + 1] > theseCoords[3 * d_extremePointss[d_activeShape][4] + 1]) { d_extremePointss[d_activeShape][4] = j; } if (theseCoords[i + 2] < theseCoords[3 * d_extremePointss[d_activeShape][2] + 2]) { d_extremePointss[d_activeShape][2] = j; } if (theseCoords[i + 2] > theseCoords[3 * d_extremePointss[d_activeShape][5] + 2]) { d_extremePointss[d_activeShape][5] = j; } } } void ShapeInput::selectConformations(const std::vector &picks) { std::vector> newCoords; newCoords.reserve(picks.size()); std::vector newMolConfs; newMolConfs.reserve(picks.size()); std::vector newDummyVols; newDummyVols.reserve(picks.size()); std::vector newShapeVolumes; newShapeVolumes.reserve(picks.size()); std::vector newColorVolumes; newColorVolumes.reserve(picks.size()); std::vector> newExtremePointss; newExtremePointss.reserve(picks.size()); boost::dynamic_bitset<> newNormalizeds(picks.size()); boost::dynamic_bitset<> newNormalizationOKs(picks.size()); std::vector> newCanRots; newCanRots.reserve(picks.size()); std::vector> newCentroids; newCentroids.reserve(picks.size()); std::vector> newEigenValuess; newEigenValuess.reserve(picks.size()); unsigned int i = 0; for (const auto p : picks) { newCoords.push_back(std::move(d_coords[p])); newShapeVolumes.push_back(d_selfOverlapShapeVols[p]); newColorVolumes.push_back(d_selfOverlapColorVols[p]); newExtremePointss.push_back(std::move(d_extremePointss[p])); newNormalizeds[i] = d_normalizeds[p]; newNormalizationOKs[i] = d_normalizationOKs[p]; newCanRots.push_back(std::move(d_canonRots[p])); newCentroids.push_back(std::move(d_canonTranss[p])); newEigenValuess.push_back(std::move(d_eigenValuess[p])); ++i; } d_coords = std::move(newCoords); d_selfOverlapShapeVols = std::move(newShapeVolumes); d_selfOverlapColorVols = std::move(newColorVolumes); d_extremePointss = std::move(newExtremePointss); d_normalizeds = newNormalizeds; d_normalizationOKs = newNormalizationOKs; d_canonRots = std::move(newCanRots); d_canonTranss = std::move(newCentroids); d_eigenValuess = std::move(newEigenValuess); } void ShapeInput::calculateSelfOverlaps(const ShapeOverlayOptions &overlayOpts) { d_selfOverlapShapeVols.reserve(getNumShapes()); d_selfOverlapColorVols.reserve(getNumShapes()); std::vector gradConverters(12 * (d_numAtoms + d_numFeats)); for (unsigned int i = 0; i < getNumShapes(); i++) { d_selfOverlapShapeVols.push_back(calcVolAndGrads( d_coords[i].data(), d_alphas.data(), d_numAtoms, d_carbonRadii.get(), d_coords[i].data(), d_alphas.data(), d_numAtoms, d_carbonRadii.get(), gradConverters, overlayOpts.useDistCutoff, overlayOpts.distCutoff * overlayOpts.distCutoff)); d_selfOverlapColorVols.push_back(calcVolAndGrads( d_coords[i].data() + 3 * d_numAtoms, d_alphas.data() + d_numAtoms, d_numFeats, d_types.data() + d_numAtoms, d_coords[i].data() + 3 * d_numAtoms, d_alphas.data() + d_numAtoms, d_numFeats, d_types.data() + d_numAtoms, d_numAtoms, gradConverters, overlayOpts.useDistCutoff, overlayOpts.distCutoff * overlayOpts.distCutoff, nullptr, nullptr)); } } void ShapeInput::sortShapesByVolumes() { std::vector> vals; vals.reserve(d_coords.size()); for (size_t i = 0; i < d_coords.size(); i++) { vals.push_back(std::make_pair( d_selfOverlapShapeVols[i] + d_selfOverlapColorVols[i], i)); } std::ranges::sort(vals, [](const std::pair &a, const std::pair &b) -> bool { return a.first > b.first; }); std::vector picks(vals.size()); std::ranges::transform(vals, begin(picks), [](const std::pair &a) -> int { return static_cast(a.second); }); selectConformations(picks); } void findFeatures(const Conformer &conf, std::vector &features, const std::optional> &atomSubset) { unsigned pattIdx = 1; for (const auto &patts : *getPh4Patterns()) { for (const auto &patt : patts) { std::vector matches; // Once, recursive queries weren't thread safe but Greg assures // me that they now are. matches = SubstructMatch(conf.getOwningMol(), *patt); for (const auto &match : matches) { std::vector ats; bool featOk = true; for (const auto &pr : match) { // make sure all the atoms are in the subset, if there is one if (atomSubset.has_value() && !atomSubset->empty()) { if (std::ranges::find_if(*atomSubset, [pr](const auto &p) -> bool { return p == static_cast(pr.second); }) == atomSubset->end()) { featOk = false; break; } } ats.push_back(pr.second); } if (!featOk) { continue; } auto featPos = computeFeaturePos(conf, ats); features.emplace_back( CustomFeature{pattIdx, featPos, radius_color, ats}); } } ++pattIdx; } } RDGeom::Point3D computeFeaturePos(const Conformer &conf, const std::vector &ats) { RDGeom::Point3D featPos; for (const auto at : ats) { featPos += conf.getAtomPos(at); } featPos /= ats.size(); return featPos; } void applyTransformToShape(std::vector &shape, const RDGeom::Transform3D &xform) { for (size_t i = 0; i < shape.size(); i += 3) { RDGeom::Point3D pos{shape[i], shape[i + 1], shape[i + 2]}; xform.TransformPoint(pos); shape[i] = pos.x; shape[i + 1] = pos.y; shape[i + 2] = pos.z; } } void applyTransformToShape(const double *inShape, double *outShape, const size_t numPoints, const RDGeom::Transform3D &xform) { for (size_t i = 0; i < 3 * numPoints; i += 3) { RDGeom::Point3D pos{inShape[i], inShape[i + 1], inShape[i + 2]}; xform.TransformPoint(pos); outShape[i] = pos.x; outShape[i + 1] = pos.y; outShape[i + 2] = pos.z; } } void translateShape(std::vector &shape, const RDGeom::Point3D &translation) { for (size_t i = 0; i < shape.size(); i += 3) { shape[i] += translation.x; shape[i + 1] += translation.y; shape[i + 2] += translation.z; } } void translateShape(const double *inShape, double *outShape, const size_t numPoints, const RDGeom::Point3D &translation) { for (size_t i = 0; i < 3 * numPoints; i += 3) { outShape[i] = inShape[i] + translation.x; outShape[i + 1] = inShape[i + 1] + translation.y; outShape[i + 2] = inShape[i + 2] + translation.z; } } // Maximum possible score of the 2 shape (v[12]) and color (c[12]) volumes double maxScore(const double v1, const double v2, const double c1, const double c2, const ShapeOverlayOptions &overlayOpts) { // We're dealing with a Tversky score // s = O / (A * (R - O) + B * (F - O) + O) // There are 2 cases to handle, where v1 < v2 in which case the max overlap // is v1, and the opposite. auto maxPart = [](const double p1, const double p2, const ShapeOverlayOptions &ovlyOpts) -> double { if (p1 < p2) { return p1 / (ovlyOpts.simBeta * (p2 - p1) + p1); } return p2 / (ovlyOpts.simAlpha * (p1 - p2) + p2); }; const auto maxSt = maxPart(v1, v2, overlayOpts); const auto maxCt = maxPart(c1, c2, overlayOpts); return maxSt * (1.0 - overlayOpts.optParam) + maxCt * overlayOpts.optParam; } } // namespace GaussianShape } // namespace RDKit