mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-03 21:44:30 +08:00
Provide an RDKit wrapper around pubchem-align3d (#7798)
* initial support for pubchem-align3d * works, passes the tests * bump commit we are using * update readme * disable pubchem shape for limited depedencies build * comment from review * get windows dll builds working * stupid oversight * changes in response to review get rid of the "using namespace std" test that the alignment is re-entrant
This commit is contained in:
@@ -37,6 +37,7 @@ steps:
|
||||
-DRDK_BUILD_CAIRO_SUPPORT=OFF \
|
||||
-DRDK_BUILD_QT_SUPPORT=OFF \
|
||||
-DRDK_BUILD_XYZ2MOL_SUPPORT=ON \
|
||||
-DRDK_BUILD_PUBCHEMSHAPE_SUPPORT=OFF \
|
||||
-DRDK_BUILD_SWIG_WRAPPERS=OFF \
|
||||
-DRDK_SWIG_STATIC=OFF \
|
||||
-DRDK_BUILD_THREADSAFE_SSS=OFF \
|
||||
|
||||
@@ -59,6 +59,7 @@ option(RDK_BUILD_MOLINTERCHANGE_SUPPORT "build in support for CommonChem molecul
|
||||
option(RDK_BUILD_YAEHMOP_SUPPORT "build support for the YAeHMOP wrapper" OFF)
|
||||
option(RDK_BUILD_XYZ2MOL_SUPPORT "build in support for the RDKit's implementation of xyz2mol (in the DetermineBonds library)" OFF )
|
||||
option(RDK_BUILD_STRUCTCHECKER_SUPPORT "build in support for the StructChecker alpha (not recommended, use the MolVS integration instead)" OFF )
|
||||
option(RDK_BUILD_PUBCHEMSHAPE_SUPPORT "build the rdkit wrapper around pubchem-align3d" ON )
|
||||
option(RDK_USE_URF "Build support for Florian Flachsenberg's URF library" ON)
|
||||
option(RDK_INSTALL_DEV_COMPONENT "install libraries and headers" ON)
|
||||
option(RDK_USE_BOOST_IOSTREAMS "use boost::iostreams" ON)
|
||||
|
||||
3
External/CMakeLists.txt
vendored
3
External/CMakeLists.txt
vendored
@@ -5,3 +5,6 @@ add_subdirectory(CoordGen)
|
||||
add_subdirectory(YAeHMOP)
|
||||
add_subdirectory(RingFamilies)
|
||||
add_subdirectory(GA)
|
||||
if(RDK_BUILD_PUBCHEMSHAPE_SUPPORT)
|
||||
add_subdirectory(pubchem_shape)
|
||||
endif()
|
||||
56
External/pubchem_shape/CMakeLists.txt
vendored
Normal file
56
External/pubchem_shape/CMakeLists.txt
vendored
Normal file
@@ -0,0 +1,56 @@
|
||||
if(NOT RDK_BUILD_PUBCHEMSHAPE_SUPPORT)
|
||||
return()
|
||||
endif()
|
||||
|
||||
|
||||
|
||||
if(NOT DEFINED PUBCHEMSHAPE_DIR)
|
||||
set(PUBCHEMSHAPE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/pubchem-align3d")
|
||||
set(fileToCheck "${PUBCHEMSHAPE_DIR}/shape_functions1.cpp")
|
||||
set(needDownload "TRUE")
|
||||
if(EXISTS "${fileToCheck}")
|
||||
set(needDownload "FALSE")
|
||||
endif()
|
||||
else()
|
||||
set(needDownload "FALSE")
|
||||
endif()
|
||||
|
||||
if(needDownload)
|
||||
set(PUBCHEM_COMMIT_SHA 27f063f)
|
||||
if(NOT DEFINED PUBCHEMSHAPE_URL)
|
||||
set(PUBCHEMSHAPE_URL "https://github.com/ncbi/pubchem-align3d/archive/${PUBCHEM_COMMIT_SHA}.tar.gz")
|
||||
endif()
|
||||
if(NOT DEFINED PUBCHEMSHAPE_MD5SUM)
|
||||
set(PUBCHEMSHAPE_MD5SUM "4f763a1cdff13a1dd2bd4c4612e06462")
|
||||
endif()
|
||||
if(NOT DEFINED PUBCHEMSHAPE_BASE)
|
||||
string(REGEX REPLACE "^.*/" "" PUBCHEMSHAPE_BASE "${PUBCHEMSHAPE_URL}")
|
||||
endif()
|
||||
downloadAndCheckMD5(${PUBCHEMSHAPE_URL} "${CMAKE_CURRENT_SOURCE_DIR}/${PUBCHEMSHAPE_BASE}" ${PUBCHEMSHAPE_MD5SUM})
|
||||
execute_process(COMMAND ${CMAKE_COMMAND} -E tar xzf
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/${PUBCHEM_COMMIT_SHA}.tar.gz
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
|
||||
file(GLOB tar_dirname ${CMAKE_CURRENT_SOURCE_DIR}/pubchem-align3d-${PUBCHEM_COMMIT_SHA}*)
|
||||
execute_process(COMMAND ${CMAKE_COMMAND} -E rename ${tar_dirname}
|
||||
${PUBCHEMSHAPE_DIR}
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
endif()
|
||||
|
||||
rdkit_library(pubchem_align3d ./pubchem-align3d/shape_functions1.cpp
|
||||
./pubchem-align3d/shape_functions2.cpp ./pubchem-align3d/shape_neighbor.cpp SHARED)
|
||||
if((MSVC AND RDK_INSTALL_DLLS_MSVC) OR ((NOT MSVC) AND WIN32))
|
||||
set_target_properties(pubchem_align3d PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS TRUE)
|
||||
endif()
|
||||
|
||||
rdkit_library(PubChemShape PubChemShape.cpp SHARED
|
||||
LINK_LIBRARIES pubchem_align3d SmilesParse SubstructMatch)
|
||||
target_compile_definitions(PubChemShape PRIVATE RDKIT_PUBCHEMSHAPE_BUILD)
|
||||
|
||||
rdkit_headers(PubChemShape.hpp DEST GraphMol)
|
||||
|
||||
rdkit_catch_test(shape_test test.cpp LINK_LIBRARIES PubChemShape FileParsers)
|
||||
|
||||
if(RDK_BUILD_PYTHON_WRAPPERS)
|
||||
add_subdirectory(Wrap)
|
||||
endif()
|
||||
426
External/pubchem_shape/PubChemShape.cpp
vendored
Normal file
426
External/pubchem_shape/PubChemShape.cpp
vendored
Normal file
@@ -0,0 +1,426 @@
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
|
||||
#include <GraphMol/RWMol.h>
|
||||
#include <GraphMol/SmilesParse/SmilesParse.h>
|
||||
#include <GraphMol/Substruct/SubstructMatch.h>
|
||||
|
||||
#include <RDGeneral/BoostStartInclude.h>
|
||||
#include <boost/flyweight.hpp>
|
||||
#include <boost/flyweight/key_value.hpp>
|
||||
#include <boost/flyweight/no_tracking.hpp>
|
||||
#include <RDGeneral/BoostEndInclude.h>
|
||||
|
||||
#include "pubchem-align3d/shape_functions.hpp"
|
||||
#include "PubChemShape.hpp"
|
||||
|
||||
constexpr auto pubchemFeatureName = "PUBCHEM_PHARMACOPHORE_FEATURES";
|
||||
|
||||
// #define DEBUG_MSG(msg_stream) cout << msg_stream << '\n'
|
||||
#define DEBUG_MSG(msg_stream)
|
||||
|
||||
using namespace RDKit;
|
||||
|
||||
// Bondi radii
|
||||
// can find more of these in Table 12 of this publication:
|
||||
// https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3658832/
|
||||
const std::map<unsigned int, double> vdw_radii = {
|
||||
{0, 1.10}, // dummy atom (value copied from H)
|
||||
{1, 1.10}, // H
|
||||
{2, 1.40}, // He
|
||||
{3, 1.81}, // Li
|
||||
{4, 1.53}, // Be
|
||||
{5, 1.92}, // B
|
||||
{6, 1.70}, // C
|
||||
{7, 1.55}, // N
|
||||
{8, 1.52}, // O
|
||||
{9, 1.47}, // F
|
||||
{10, 1.54}, // Ne
|
||||
{11, 2.27}, // Na
|
||||
{12, 1.73}, // Mg
|
||||
{13, 1.84}, // Al
|
||||
{14, 2.10}, // Si
|
||||
{15, 1.80}, // P
|
||||
{16, 1.80}, // S
|
||||
{17, 1.75}, // Cl
|
||||
{18, 1.88}, // Ar
|
||||
{19, 2.75}, // K
|
||||
{20, 2.31}, // Ca
|
||||
{31, 1.87}, // Ga
|
||||
{32, 2.11}, // Ge
|
||||
{33, 1.85}, // As
|
||||
{34, 1.90}, // Se
|
||||
{35, 1.83}, // Br
|
||||
{36, 2.02}, // Kr
|
||||
{37, 3.03}, // Rb
|
||||
{38, 2.49}, // Sr
|
||||
{49, 1.93}, // In
|
||||
{50, 2.17}, // Sn
|
||||
{51, 2.06}, // Sb
|
||||
{52, 2.06}, // Te
|
||||
{53, 1.98}, // I
|
||||
{54, 2.16}, // Xe
|
||||
{55, 3.43}, // Cs
|
||||
{56, 2.68}, // Ba
|
||||
{81, 1.96}, // Tl
|
||||
{82, 2.02}, // Pb
|
||||
{83, 2.07}, // Bi
|
||||
{84, 1.97}, // Po
|
||||
{85, 2.02}, // At
|
||||
{86, 2.20}, // Rn
|
||||
{87, 3.48}, // Fr
|
||||
{88, 2.83}, // Ra
|
||||
};
|
||||
constexpr double radius_color =
|
||||
1.08265; // same radius for all feature/color "atoms"
|
||||
|
||||
namespace {
|
||||
class ss_matcher {
|
||||
public:
|
||||
ss_matcher(const std::string &pattern) : m_pattern(pattern) {
|
||||
m_needCopies = (pattern.find_first_of("$") != std::string::npos);
|
||||
RDKit::RWMol *p = RDKit::SmartsToMol(pattern);
|
||||
m_matcher = p;
|
||||
POSTCONDITION(m_matcher, "no matcher");
|
||||
};
|
||||
const RDKit::ROMol *getMatcher() const { return m_matcher; };
|
||||
unsigned int countMatches(const RDKit::ROMol &mol) const {
|
||||
PRECONDITION(m_matcher, "no matcher");
|
||||
std::vector<RDKit::MatchVectType> matches;
|
||||
// This is an ugly one. Recursive queries aren't thread safe.
|
||||
// Unfortunately we have to take a performance hit here in order
|
||||
// to guarantee thread safety
|
||||
if (m_needCopies) {
|
||||
const RDKit::ROMol nm(*(m_matcher), true);
|
||||
RDKit::SubstructMatch(mol, nm, matches);
|
||||
} else {
|
||||
const RDKit::ROMol &nm = *m_matcher;
|
||||
RDKit::SubstructMatch(mol, nm, matches);
|
||||
}
|
||||
return matches.size();
|
||||
}
|
||||
~ss_matcher() { delete m_matcher; };
|
||||
|
||||
private:
|
||||
ss_matcher() : m_pattern("") {};
|
||||
std::string m_pattern;
|
||||
bool m_needCopies{false};
|
||||
const RDKit::ROMol *m_matcher{nullptr};
|
||||
};
|
||||
} // namespace
|
||||
|
||||
typedef boost::flyweight<boost::flyweights::key_value<std::string, ss_matcher>,
|
||||
boost::flyweights::no_tracking>
|
||||
pattern_flyweight;
|
||||
// Definitions for feature points adapted from:
|
||||
// Gobbi and Poppinger, Biotech. Bioeng. _61_ 47-54 (1998)
|
||||
const std::vector<std::vector<std::string>> smartsPatterns = {
|
||||
{"[$([N;!H0;v3,v4&+1]),\
|
||||
$([O,S;H1;+0]),\
|
||||
n&H1&+0]"}, // Donor
|
||||
{"[$([O,S;H1;v2;!$(*-*=[O,N,P,S])]),\
|
||||
$([O,S;H0;v2]),\
|
||||
$([O,S;-]),\
|
||||
$([N;v3;!$(N-*=[O,N,P,S])]),\
|
||||
n&H0&+0,\
|
||||
$([o,s;+0;!$([o,s]:n);!$([o,s]:c:n)])]"}, // Acceptor
|
||||
{
|
||||
"[r]1[r][r]1",
|
||||
"[r]1[r][r][r]1",
|
||||
"[r]1[r][r][r][r]1",
|
||||
"[r]1[r][r][r][r][r]1",
|
||||
"[r]1[r][r][r][r][r][r]1",
|
||||
}, // rings
|
||||
// "[a]", //
|
||||
// Aromatic
|
||||
// "[F,Cl,Br,I]", // Halogen
|
||||
{"[#7;+,\
|
||||
$([N;H2&+0][$([C,a]);!$([C,a](=O))]),\
|
||||
$([N;H1&+0]([$([C,a]);!$([C,a](=O))])[$([C,a]);!$([C,a](=O))]),\
|
||||
$([N;H0&+0]([C;!$(C(=O))])([C;!$(C(=O))])[C;!$(C(=O))])]"}, // Basic
|
||||
{"[$([C,S](=[O,S,P])-[O;H1,-1])]"} // Acidic
|
||||
};
|
||||
std::vector<std::vector<const ROMol *>> *getPh4Patterns() {
|
||||
static std::unique_ptr<std::vector<std::vector<const ROMol *>>> patterns;
|
||||
if (!patterns) {
|
||||
patterns.reset(new std::vector<std::vector<const ROMol *>>());
|
||||
for (const auto &smartsV : smartsPatterns) {
|
||||
std::vector<const ROMol *> v;
|
||||
for (const auto &smarts : smartsV) {
|
||||
const ROMol *matcher = pattern_flyweight(smarts).get().getMatcher();
|
||||
CHECK_INVARIANT(matcher, "bad smarts");
|
||||
v.push_back(matcher);
|
||||
}
|
||||
patterns->push_back(std::move(v));
|
||||
}
|
||||
}
|
||||
|
||||
return patterns.get();
|
||||
}
|
||||
|
||||
// the conformer is translated to the origin
|
||||
ShapeInput PrepareConformer(const ROMol &mol, int confId, bool useColors) {
|
||||
ShapeInput res;
|
||||
|
||||
// unpack features (PubChem-specific property from SDF)
|
||||
// NOTE: this unpacking assumes that RWMol-atom-index = SDF-atom-number - 1
|
||||
// e.g. RWMol uses [0..N-1] and SDF uses [1..N], with atoms in the same
|
||||
// order
|
||||
|
||||
std::vector<std::pair<std::vector<unsigned int>, unsigned int>>
|
||||
feature_idx_type;
|
||||
|
||||
if (useColors) {
|
||||
std::string features;
|
||||
if (mol.getPropIfPresent(pubchemFeatureName, features)) {
|
||||
// regular atoms have type 0; feature "atoms" (features represented by a
|
||||
// single point+radius) must have type > 0
|
||||
static const std::map<std::string, unsigned int> atomTypes = {
|
||||
{"acceptor", 1}, {"anion", 2}, {"cation", 3},
|
||||
{"donor", 4}, {"hydrophobe", 5}, {"rings", 6},
|
||||
};
|
||||
|
||||
std::istringstream iss(features);
|
||||
std::string line;
|
||||
unsigned int n = 0;
|
||||
while (std::getline(iss, line)) {
|
||||
if (n == 0) {
|
||||
feature_idx_type.resize(stoul(line));
|
||||
}
|
||||
|
||||
else {
|
||||
unsigned int f = n - 1;
|
||||
if (f >= feature_idx_type.size()) {
|
||||
throw ValueErrorException("Too many features");
|
||||
}
|
||||
|
||||
std::istringstream iss2(line);
|
||||
std::string token;
|
||||
unsigned int t = 0;
|
||||
while (std::getline(iss2, token, ' ')) {
|
||||
if (t == 0) {
|
||||
feature_idx_type[f].first.resize(stoul(token));
|
||||
} else if (t <= feature_idx_type[f].first.size()) {
|
||||
feature_idx_type[f].first[t - 1] = stoul(token) - 1;
|
||||
} else {
|
||||
auto type = atomTypes.find(token);
|
||||
if (type == atomTypes.end()) {
|
||||
throw ValueErrorException("Invalid feature type: " + token);
|
||||
}
|
||||
feature_idx_type[f].second = type->second;
|
||||
}
|
||||
++t;
|
||||
}
|
||||
if (t != (feature_idx_type[f].first.size() + 2)) {
|
||||
throw ValueErrorException("Wrong number of tokens in feature");
|
||||
}
|
||||
}
|
||||
|
||||
++n;
|
||||
}
|
||||
if (n != (feature_idx_type.size() + 1)) {
|
||||
throw ValueErrorException("Wrong number of features");
|
||||
}
|
||||
|
||||
DEBUG_MSG("# features: " << feature_idx_type.size());
|
||||
} else {
|
||||
const auto pattVects = getPh4Patterns();
|
||||
feature_idx_type.clear();
|
||||
|
||||
unsigned pattIdx = 1;
|
||||
for (const auto &patts : *pattVects) {
|
||||
for (const auto patt : patts) {
|
||||
auto matches = SubstructMatch(mol, *patt);
|
||||
for (auto match : matches) {
|
||||
std::vector<unsigned int> ats;
|
||||
for (const auto &pr : match) {
|
||||
ats.push_back(pr.second);
|
||||
}
|
||||
feature_idx_type.emplace_back(ats, pattIdx);
|
||||
}
|
||||
}
|
||||
++pattIdx;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// unpack atoms
|
||||
|
||||
auto &conformer = mol.getConformer(confId);
|
||||
if (!conformer.is3D()) {
|
||||
throw ValueErrorException("Conformer must be 3D");
|
||||
}
|
||||
unsigned int nAtoms = mol.getNumAtoms();
|
||||
// DEBUG_MSG("num atoms: " << nAtoms);
|
||||
|
||||
unsigned int nHeavyAtoms = mol.getNumHeavyAtoms();
|
||||
DEBUG_MSG("num heavy atoms: " << nHeavyAtoms);
|
||||
|
||||
unsigned int nAlignmentAtoms = nHeavyAtoms + feature_idx_type.size();
|
||||
|
||||
std::vector<double> rad_vector(nAlignmentAtoms);
|
||||
res.atom_type_vector.resize(nAlignmentAtoms, 0);
|
||||
|
||||
RDGeom::Point3D ave;
|
||||
for (unsigned i = 0; i < nAtoms; ++i) {
|
||||
unsigned int Z = mol.getAtomWithIdx(i)->getAtomicNum();
|
||||
if (Z > 1) {
|
||||
ave += conformer.getAtomPos(i);
|
||||
|
||||
if (vdw_radii.find(Z) == vdw_radii.end()) {
|
||||
throw ValueErrorException("No VdW radius for atom with Z=" +
|
||||
std::to_string(Z));
|
||||
}
|
||||
rad_vector[i] = vdw_radii.at(Z);
|
||||
}
|
||||
}
|
||||
|
||||
// translate steric center to origin
|
||||
ave /= nHeavyAtoms;
|
||||
DEBUG_MSG("steric center: (" << ave << ")");
|
||||
res.shift = {-ave.x, -ave.y, -ave.z};
|
||||
res.coord.resize(nAlignmentAtoms * 3);
|
||||
|
||||
for (unsigned i = 0; i < nAtoms; ++i) {
|
||||
// translate all atoms
|
||||
RDGeom::Point3D pos = conformer.getAtomPos(i);
|
||||
pos -= ave;
|
||||
|
||||
// but use only non-H for alignment
|
||||
if (mol.getAtomWithIdx(i)->getAtomicNum() > 1) {
|
||||
res.coord[i * 3] = pos.x;
|
||||
res.coord[(i * 3) + 1] = pos.y;
|
||||
res.coord[(i * 3) + 2] = pos.z;
|
||||
}
|
||||
}
|
||||
|
||||
// get feature coordinates - simply the average of coords of all atoms in the
|
||||
// feature
|
||||
for (unsigned i = 0; i < feature_idx_type.size(); ++i) {
|
||||
RDGeom::Point3D floc;
|
||||
for (unsigned int j = 0; j < feature_idx_type[i].first.size(); ++j) {
|
||||
unsigned int idx = feature_idx_type[i].first[j];
|
||||
if (idx >= nAtoms || mol.getAtomWithIdx(idx)->getAtomicNum() <= 1) {
|
||||
throw ValueErrorException("Invalid feature atom index");
|
||||
}
|
||||
floc += conformer.getAtomPos(idx);
|
||||
}
|
||||
floc /= feature_idx_type[i].first.size();
|
||||
floc -= ave;
|
||||
DEBUG_MSG("feature type " << feature_idx_type[i].second << " (" << floc
|
||||
<< ")");
|
||||
|
||||
auto array_idx = nHeavyAtoms + i;
|
||||
res.coord[array_idx * 3] = floc.x;
|
||||
res.coord[(array_idx * 3) + 1] = floc.y;
|
||||
res.coord[(array_idx * 3) + 2] = floc.z;
|
||||
rad_vector[array_idx] = radius_color;
|
||||
res.atom_type_vector[array_idx] = feature_idx_type[i].second;
|
||||
}
|
||||
|
||||
Align3D::setAlpha(rad_vector.data(), rad_vector.size(), res.alpha_vector);
|
||||
|
||||
// regular atom self overlap
|
||||
Align3D::getVolumeAtomIndexVector(res.atom_type_vector.data(),
|
||||
res.atom_type_vector.size(),
|
||||
res.volumeAtomIndexVector);
|
||||
res.sov = Align3D::ComputeShapeOverlap(
|
||||
res.coord.data(), res.alpha_vector, res.volumeAtomIndexVector,
|
||||
res.coord.data(), res.alpha_vector, res.volumeAtomIndexVector);
|
||||
DEBUG_MSG("sov: " << res.sov);
|
||||
|
||||
// feature self overlap
|
||||
if (feature_idx_type.size() > 0) {
|
||||
Align3D::getColorAtomType2IndexVectorMap(res.atom_type_vector.data(),
|
||||
res.atom_type_vector.size(),
|
||||
res.colorAtomType2IndexVectorMap);
|
||||
res.sof = Align3D::ComputeFeatureOverlap(
|
||||
res.coord.data(), res.alpha_vector, res.colorAtomType2IndexVectorMap,
|
||||
res.coord.data(), res.alpha_vector, res.colorAtomType2IndexVectorMap);
|
||||
DEBUG_MSG("sof: " << res.sof);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
std::pair<double, double> AlignMolecule(const ShapeInput &refShape, ROMol &fit,
|
||||
std::vector<float> &matrix,
|
||||
int fitConfId, bool useColors,
|
||||
double opt_param,
|
||||
unsigned int max_preiters,
|
||||
unsigned int max_postiters) {
|
||||
PRECONDITION(matrix.size() == 12, "bad matrix size");
|
||||
Align3D::setUseCutOff(true);
|
||||
|
||||
DEBUG_MSG("Fit details:");
|
||||
auto fitShape = PrepareConformer(fit, fitConfId, useColors);
|
||||
|
||||
std::set<unsigned int> jointColorAtomTypeSet;
|
||||
Align3D::getJointColorTypeSet(
|
||||
refShape.atom_type_vector.data(), refShape.atom_type_vector.size(),
|
||||
fitShape.atom_type_vector.data(), fitShape.atom_type_vector.size(),
|
||||
jointColorAtomTypeSet);
|
||||
auto mapCp = refShape.colorAtomType2IndexVectorMap;
|
||||
Align3D::restrictColorAtomType2IndexVectorMap(mapCp, jointColorAtomTypeSet);
|
||||
Align3D::restrictColorAtomType2IndexVectorMap(
|
||||
fitShape.colorAtomType2IndexVectorMap, jointColorAtomTypeSet);
|
||||
|
||||
DEBUG_MSG("Running alignment...");
|
||||
double nbr_st = 0.0;
|
||||
double nbr_ct = 0.0;
|
||||
Align3D::Neighbor_Conformers(
|
||||
refShape.coord.data(), refShape.alpha_vector,
|
||||
refShape.volumeAtomIndexVector, mapCp, refShape.sov, refShape.sof,
|
||||
fitShape.coord.data(), fitShape.alpha_vector,
|
||||
fitShape.volumeAtomIndexVector, fitShape.colorAtomType2IndexVectorMap,
|
||||
fitShape.sov, fitShape.sof, !jointColorAtomTypeSet.empty(), max_preiters,
|
||||
max_postiters, opt_param, matrix.data(), nbr_st, nbr_ct);
|
||||
|
||||
DEBUG_MSG("Done!");
|
||||
DEBUG_MSG("nbr_st: " << nbr_st);
|
||||
DEBUG_MSG("nbr_ct: " << nbr_ct);
|
||||
|
||||
// transform fit coords
|
||||
Conformer &fit_conformer = fit.getConformer(fitConfId);
|
||||
if (3 * fit.getNumAtoms() != fitShape.coord.size()) {
|
||||
// Hs were removed
|
||||
fitShape.coord.resize(3 * fit.getNumAtoms());
|
||||
for (unsigned int i = 0; i < fit.getNumAtoms(); ++i) {
|
||||
const auto &pos = fit_conformer.getAtomPos(i);
|
||||
fitShape.coord[i * 3] = pos.x;
|
||||
fitShape.coord[i * 3 + 1] = pos.y;
|
||||
fitShape.coord[i * 3 + 2] = pos.z;
|
||||
}
|
||||
}
|
||||
std::vector<float> transformed(fit.getNumAtoms() * 3);
|
||||
Align3D::VApplyRotTransMatrix(transformed.data(), fitShape.coord.data(),
|
||||
fit.getNumAtoms(), matrix.data());
|
||||
|
||||
for (unsigned i = 0; i < fit.getNumAtoms(); ++i) {
|
||||
RDGeom::Point3D &pos = fit_conformer.getAtomPos(i);
|
||||
// both conformers have been translated to the origin, translate the fit
|
||||
// conformer back to the steric center of the reference.
|
||||
pos.x = transformed[i * 3] - refShape.shift[0] + fitShape.shift[0];
|
||||
pos.y = transformed[(i * 3) + 1] - refShape.shift[1] + fitShape.shift[1];
|
||||
pos.z = transformed[(i * 3) + 2] - refShape.shift[2] + fitShape.shift[2];
|
||||
}
|
||||
fit.setProp("shape_align_shape_tanimoto", nbr_st);
|
||||
fit.setProp("shape_align_color_tanimoto", nbr_ct);
|
||||
|
||||
return std::make_pair(nbr_st, nbr_ct);
|
||||
}
|
||||
|
||||
std::pair<double, double> AlignMolecule(const ROMol &ref, ROMol &fit,
|
||||
std::vector<float> &matrix,
|
||||
int refConfId, int fitConfId,
|
||||
bool useColors, double opt_param,
|
||||
unsigned int max_preiters,
|
||||
unsigned int max_postiters) {
|
||||
Align3D::setUseCutOff(true);
|
||||
|
||||
DEBUG_MSG("Reference details:");
|
||||
auto refShape = PrepareConformer(ref, refConfId, useColors);
|
||||
|
||||
return AlignMolecule(refShape, fit, matrix, fitConfId, useColors, opt_param,
|
||||
max_preiters, max_postiters);
|
||||
}
|
||||
30
External/pubchem_shape/PubChemShape.hpp
vendored
Normal file
30
External/pubchem_shape/PubChemShape.hpp
vendored
Normal file
@@ -0,0 +1,30 @@
|
||||
#include <GraphMol/ROMol.h>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
struct RDKIT_PUBCHEMSHAPE_EXPORT ShapeInput {
|
||||
std::vector<float> coord;
|
||||
std::vector<double> alpha_vector;
|
||||
std::vector<unsigned int> atom_type_vector;
|
||||
std::vector<unsigned int> volumeAtomIndexVector;
|
||||
std::map<unsigned int, std::vector<unsigned int>>
|
||||
colorAtomType2IndexVectorMap;
|
||||
std::vector<double> shift;
|
||||
double sov{0.0};
|
||||
double sof{0.0};
|
||||
};
|
||||
|
||||
RDKIT_PUBCHEMSHAPE_EXPORT ShapeInput PrepareConformer(const RDKit::ROMol &mol,
|
||||
int confId = -1,
|
||||
bool useColors = true);
|
||||
|
||||
RDKIT_PUBCHEMSHAPE_EXPORT std::pair<double, double> AlignMolecule(
|
||||
const ShapeInput &refShape, RDKit::ROMol &fit, std::vector<float> &matrix,
|
||||
int fitConfId = -1, bool useColors = true, double opt_param = 0.5,
|
||||
unsigned int max_preiters = 3u, unsigned int max_postiters = 16u);
|
||||
|
||||
RDKIT_PUBCHEMSHAPE_EXPORT std::pair<double, double> AlignMolecule(
|
||||
const RDKit::ROMol &ref, RDKit::ROMol &fit, std::vector<float> &matrix,
|
||||
int refConfId = -1, int fitConfId = -1, bool useColors = true,
|
||||
double opt_param = 0.5, unsigned int max_preiters = 3u,
|
||||
unsigned int max_postiters = 16u);
|
||||
3
External/pubchem_shape/README.md
vendored
Normal file
3
External/pubchem_shape/README.md
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
This is a thin RDKit wrapper around pubchem-align3d, a shape-based alignment code written by the PubChem team.
|
||||
|
||||
The repo for pubchem-align3d is here: https://github.com/ncbi/pubchem-align3d/
|
||||
13
External/pubchem_shape/Wrap/CMakeLists.txt
vendored
Normal file
13
External/pubchem_shape/Wrap/CMakeLists.txt
vendored
Normal file
@@ -0,0 +1,13 @@
|
||||
find_package(Python3 COMPONENTS Interpreter Development NumPy REQUIRED)
|
||||
find_package(Python3 COMPONENTS Interpreter REQUIRED)
|
||||
set(py3lib "python${Python3_VERSION_MAJOR}${Python3_VERSION_MINOR}")
|
||||
find_package(Boost COMPONENTS ${py3lib} REQUIRED CONFIG)
|
||||
|
||||
|
||||
rdkit_python_extension(rdShapeAlign
|
||||
cshapealign.cpp
|
||||
DEST Chem
|
||||
LINK_LIBRARIES
|
||||
PubChemShape )
|
||||
add_pytest(pyShapeAlign
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/test_rdshapealign.py)
|
||||
94
External/pubchem_shape/Wrap/cshapealign.cpp
vendored
Normal file
94
External/pubchem_shape/Wrap/cshapealign.cpp
vendored
Normal file
@@ -0,0 +1,94 @@
|
||||
/*******************************************************************************
|
||||
|
||||
Copyright 2024 by Greg Landrum and the pubchem_shape contributors
|
||||
|
||||
This file is part of pubchem_shape
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
***********************************************************************/
|
||||
|
||||
#include <boost/python.hpp>
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "../PubChemShape.hpp"
|
||||
#include <GraphMol/RDKitBase.h>
|
||||
#include <RDBoost/Wrap.h>
|
||||
namespace python = boost::python;
|
||||
|
||||
namespace helpers {
|
||||
python::tuple alignMol(const RDKit::ROMol &ref, RDKit::ROMol &probe,
|
||||
int refConfId, int probeConfId, bool useColors,
|
||||
double opt_param, unsigned int max_preiters,
|
||||
unsigned int max_postiters) {
|
||||
std::vector<float> matrix(12, 0.0);
|
||||
auto [nbr_st, nbr_ct] =
|
||||
AlignMolecule(ref, probe, matrix, refConfId, probeConfId, useColors,
|
||||
opt_param, max_preiters, max_postiters);
|
||||
return python::make_tuple(nbr_st, nbr_ct);
|
||||
}
|
||||
python::tuple alignMol2(const ShapeInput &ref, RDKit::ROMol &probe,
|
||||
int probeConfId, bool useColors, double opt_param,
|
||||
unsigned int max_preiters, unsigned int max_postiters) {
|
||||
std::vector<float> matrix(12, 0.0);
|
||||
auto [nbr_st, nbr_ct] =
|
||||
AlignMolecule(ref, probe, matrix, probeConfId, useColors, opt_param,
|
||||
max_preiters, max_postiters);
|
||||
return python::make_tuple(nbr_st, nbr_ct);
|
||||
}
|
||||
ShapeInput *prepConf(const RDKit::ROMol &mol, int confId, bool useColors) {
|
||||
return new ShapeInput(PrepareConformer(mol, confId, useColors));
|
||||
}
|
||||
} // namespace helpers
|
||||
|
||||
void wrap_pubchemshape() {
|
||||
RegisterVectorConverter<float>("FloatVector");
|
||||
RegisterVectorConverter<double>("DoubleVector");
|
||||
RegisterVectorConverter<unsigned int>("UnsignedIntVector");
|
||||
|
||||
python::def(
|
||||
"AlignMol", &helpers::alignMol,
|
||||
(python::arg("ref"), python::arg("probe"), python::arg("refConfId") = -1,
|
||||
python::arg("probeConfId") = -1, python::arg("useColors") = true,
|
||||
python::arg("opt_param") = 0.5, python::arg("max_preiters") = 3,
|
||||
python::arg("max_postiters") = 16),
|
||||
"aligns probe to ref, probe is modified");
|
||||
python::def("AlignMol", &helpers::alignMol2,
|
||||
(python::arg("refShape"), python::arg("probe"),
|
||||
python::arg("probeConfId") = -1, python::arg("useColors") = true,
|
||||
python::arg("opt_param") = 0.5, python::arg("max_preiters") = 3,
|
||||
python::arg("max_postiters") = 16),
|
||||
"aligns probe to reference shape, probe is modified");
|
||||
python::def("PrepareConformer", &helpers::prepConf,
|
||||
(python::arg("mol"), python::arg("confId") = -1,
|
||||
python::arg("useColors") = true),
|
||||
"returns a shape object for a molecule",
|
||||
python::return_value_policy<python::manage_new_object>());
|
||||
python::class_<ShapeInput, boost::noncopyable>("ShapeInput", python::no_init)
|
||||
.def_readwrite("coord", &ShapeInput::coord)
|
||||
.def_readwrite("alpha_vector", &ShapeInput::alpha_vector)
|
||||
.def_readwrite("atom_type_vector", &ShapeInput::atom_type_vector)
|
||||
.def_readwrite("volumeAtomIndexVector",
|
||||
&ShapeInput::volumeAtomIndexVector)
|
||||
.def_readwrite("shift", &ShapeInput::shift)
|
||||
.def_readwrite("sov", &ShapeInput::sov)
|
||||
.def_readwrite("sof", &ShapeInput::sof);
|
||||
}
|
||||
|
||||
BOOST_PYTHON_MODULE(rdShapeAlign) { wrap_pubchemshape(); }
|
||||
35
External/pubchem_shape/Wrap/test_rdshapealign.py
vendored
Normal file
35
External/pubchem_shape/Wrap/test_rdshapealign.py
vendored
Normal file
@@ -0,0 +1,35 @@
|
||||
import unittest
|
||||
from rdkit import Chem
|
||||
from rdkit.Chem import rdShapeAlign
|
||||
from rdkit import RDConfig
|
||||
|
||||
datadir = RDConfig.RDBaseDir + '/External/pubchem_shape/test_data';
|
||||
|
||||
|
||||
class TestCase(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
suppl = Chem.SDMolSupplier(datadir + '/test1.sdf')
|
||||
self.ref = suppl[0]
|
||||
self.probe = suppl[1]
|
||||
|
||||
def test1_Defaults(self):
|
||||
tpl = rdShapeAlign.AlignMol(self.ref, self.probe)
|
||||
self.assertAlmostEqual(tpl[0], 0.773, places=3)
|
||||
self.assertAlmostEqual(tpl[1], 0.303, places=3)
|
||||
|
||||
def test2_NoColor(self):
|
||||
tpl = rdShapeAlign.AlignMol(self.ref, self.probe, useColors=False)
|
||||
self.assertAlmostEqual(tpl[0], 0.773, places=3)
|
||||
self.assertAlmostEqual(tpl[1], 0.0, places=3)
|
||||
|
||||
def test3_FromShape(self):
|
||||
shp = rdShapeAlign.PrepareConformer(self.ref)
|
||||
self.assertTrue(type(shp) == rdShapeAlign.ShapeInput)
|
||||
tpl = rdShapeAlign.AlignMol(shp, self.probe)
|
||||
self.assertAlmostEqual(tpl[0], 0.773, places=3)
|
||||
self.assertAlmostEqual(tpl[1], 0.303, places=3)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
80
External/pubchem_shape/sdf_align.cpp
vendored
Normal file
80
External/pubchem_shape/sdf_align.cpp
vendored
Normal file
@@ -0,0 +1,80 @@
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
|
||||
#include <GraphMol/FileParsers/MolSupplier.h>
|
||||
#include <GraphMol/FileParsers/MolWriters.h>
|
||||
#include <GraphMol/Fingerprints/FingerprintUtil.h>
|
||||
#include <GraphMol/RWMol.h>
|
||||
#include <GraphMol/SmilesParse/SmilesParse.h>
|
||||
#include <GraphMol/Substruct/SubstructMatch.h>
|
||||
|
||||
#include <RDGeneral/BoostEndInclude.h>
|
||||
#include <RDGeneral/BoostStartInclude.h>
|
||||
#include <boost/flyweight.hpp>
|
||||
#include <boost/flyweight/key_value.hpp>
|
||||
#include <boost/flyweight/no_tracking.hpp>
|
||||
|
||||
#include "PubChemShape.hpp"
|
||||
|
||||
using namespace std;
|
||||
|
||||
#define ERRORTHROW(msg_stream) \
|
||||
do { \
|
||||
ostringstream os; \
|
||||
os << msg_stream; \
|
||||
throw runtime_error(os.str()); \
|
||||
} while (0)
|
||||
|
||||
// #define DEBUG_MSG(msg_stream) cout << msg_stream << '\n'
|
||||
#define DEBUG_MSG(msg_stream)
|
||||
|
||||
using namespace RDKit;
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
int status = -1;
|
||||
|
||||
try {
|
||||
if (argc != 2)
|
||||
ERRORTHROW(
|
||||
"Usage: sdf_align <input.sdf> The first molecule in the SDF "
|
||||
"is the reference"); //"opt_param
|
||||
// max_preiters
|
||||
// max_postiters");
|
||||
bool useColors = true;
|
||||
|
||||
bool sanitize = true;
|
||||
bool removeHs = false;
|
||||
SDMolSupplier suppl(argv[1], sanitize, removeHs);
|
||||
SDWriter writer("sdf_align.out.sdf");
|
||||
unique_ptr<ROMol> ref{suppl[0]};
|
||||
if (!ref.get()) {
|
||||
ERRORTHROW("Failed to read ref conformer");
|
||||
}
|
||||
|
||||
for (auto i = 1u; i < suppl.length(); ++i) {
|
||||
std::unique_ptr<ROMol> fit{suppl[i]};
|
||||
if (!fit) {
|
||||
continue;
|
||||
}
|
||||
vector<float> matrix(12, 0.0);
|
||||
int refConfId = -1;
|
||||
int fitConfId = -1;
|
||||
auto [nbr_st, nbr_ct] =
|
||||
AlignMolecule(*ref, *fit, matrix, refConfId, fitConfId, useColors);
|
||||
if (i == 1) {
|
||||
writer.write(*ref, refConfId);
|
||||
}
|
||||
writer.write(*fit, fitConfId);
|
||||
}
|
||||
|
||||
status = 0;
|
||||
|
||||
} catch (std::exception &e) {
|
||||
cerr << "Caught std::exception: " << e.what() << '\n';
|
||||
} catch (...) {
|
||||
cerr << "Caught unknown exception\n";
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
121
External/pubchem_shape/test.cpp
vendored
Normal file
121
External/pubchem_shape/test.cpp
vendored
Normal file
@@ -0,0 +1,121 @@
|
||||
#include <catch2/catch_test_macros.hpp>
|
||||
#include <catch2/matchers/catch_matchers_floating_point.hpp>
|
||||
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
|
||||
#include <GraphMol/FileParsers/MolSupplier.h>
|
||||
#include <GraphMol/RWMol.h>
|
||||
|
||||
#include "PubChemShape.hpp"
|
||||
|
||||
using namespace RDKit;
|
||||
|
||||
TEST_CASE("basic alignment") {
|
||||
std::string dirName = getenv("RDBASE");
|
||||
dirName += "/External/pubchem_shape/test_data";
|
||||
|
||||
auto suppl = v2::FileParsers::SDMolSupplier(dirName + "/test1.sdf");
|
||||
auto ref = suppl[0];
|
||||
REQUIRE(ref);
|
||||
auto probe = suppl[1];
|
||||
REQUIRE(probe);
|
||||
SECTION("basics") {
|
||||
RWMol cp(*probe);
|
||||
std::vector<float> matrix(12, 0.0);
|
||||
auto [nbr_st, nbr_ct] = AlignMolecule(*ref, cp, matrix);
|
||||
CHECK_THAT(nbr_st, Catch::Matchers::WithinAbs(0.773, 0.005));
|
||||
CHECK_THAT(nbr_ct, Catch::Matchers::WithinAbs(0.303, 0.005));
|
||||
for (auto i = 0u; i < probe->getNumAtoms(); ++i) {
|
||||
CHECK(probe->getConformer().getAtomPos(i).x !=
|
||||
cp.getConformer().getAtomPos(i).x);
|
||||
}
|
||||
}
|
||||
SECTION("from shape") {
|
||||
auto ref_shape = PrepareConformer(*ref);
|
||||
std::vector<float> matrix(12, 0.0);
|
||||
auto [nbr_st, nbr_ct] = AlignMolecule(ref_shape, *probe, matrix);
|
||||
CHECK_THAT(nbr_st, Catch::Matchers::WithinAbs(0.773, 0.005));
|
||||
CHECK_THAT(nbr_ct, Catch::Matchers::WithinAbs(0.303, 0.005));
|
||||
}
|
||||
SECTION("RDKit features") {
|
||||
ref->clearProp("PUBCHEM_PHARMACOPHORE_FEATURES");
|
||||
probe->clearProp("PUBCHEM_PHARMACOPHORE_FEATURES");
|
||||
std::vector<float> matrix(12, 0.0);
|
||||
auto [nbr_st, nbr_ct] = AlignMolecule(*ref, *probe, matrix);
|
||||
CHECK_THAT(nbr_st, Catch::Matchers::WithinAbs(0.773, 0.005));
|
||||
CHECK_THAT(nbr_ct, Catch::Matchers::WithinAbs(0.231, 0.005));
|
||||
}
|
||||
SECTION("no colors") {
|
||||
std::vector<float> matrix(12, 0.0);
|
||||
int refConfId = -1;
|
||||
int prbConfId = -1;
|
||||
bool useColors = false;
|
||||
auto [nbr_st, nbr_ct] =
|
||||
AlignMolecule(*ref, *probe, matrix, refConfId, prbConfId, useColors);
|
||||
CHECK_THAT(nbr_st, Catch::Matchers::WithinAbs(0.773, 0.005));
|
||||
CHECK_THAT(nbr_ct, Catch::Matchers::WithinAbs(0.000, 0.005));
|
||||
}
|
||||
SECTION("we are re-entrant") {
|
||||
RWMol cp(*probe);
|
||||
std::vector<float> matrix(12, 0.0);
|
||||
auto [nbr_st, nbr_ct] = AlignMolecule(*ref, cp, matrix);
|
||||
RWMol cp2(cp);
|
||||
auto [nbr_st2, nbr_ct2] = AlignMolecule(*ref, cp2, matrix);
|
||||
CHECK_THAT(nbr_st, Catch::Matchers::WithinAbs(nbr_st2, 0.005));
|
||||
CHECK_THAT(nbr_ct, Catch::Matchers::WithinAbs(nbr_ct2, 0.005));
|
||||
|
||||
for (auto i = 0u; i < probe->getNumAtoms(); ++i) {
|
||||
CHECK_THAT(cp.getConformer().getAtomPos(i).x,
|
||||
Catch::Matchers::WithinAbs(cp2.getConformer().getAtomPos(i).x,
|
||||
0.005));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("bulk") {
|
||||
std::string dirName = getenv("RDBASE");
|
||||
dirName += "/External/pubchem_shape/test_data";
|
||||
auto suppl = v2::FileParsers::SDMolSupplier(dirName + "/bulk.pubchem.sdf");
|
||||
auto ref = suppl[0];
|
||||
REQUIRE(ref);
|
||||
for (auto i = 1u; i < suppl.length(); ++i) {
|
||||
auto probe = suppl[1];
|
||||
REQUIRE(probe);
|
||||
std::vector<float> matrix(12, 0.0);
|
||||
auto [nbr_st, nbr_ct] = AlignMolecule(*ref, *probe, matrix);
|
||||
CHECK_THAT(nbr_st,
|
||||
Catch::Matchers::WithinAbs(
|
||||
probe->getProp<float>("shape_align_shape_tanimoto"), 0.005));
|
||||
CHECK_THAT(nbr_ct,
|
||||
Catch::Matchers::WithinAbs(
|
||||
probe->getProp<float>("shape_align_color_tanimoto"), 0.005));
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("handling molecules with Hs") {
|
||||
std::string dirName = getenv("RDBASE");
|
||||
dirName += "/External/pubchem_shape/test_data";
|
||||
|
||||
v2::FileParsers::MolFileParserParams params;
|
||||
params.removeHs = false;
|
||||
auto suppl =
|
||||
v2::FileParsers::SDMolSupplier(dirName + "/align_with_hs.sdf", params);
|
||||
auto ref = suppl[0];
|
||||
REQUIRE(ref);
|
||||
auto probe = suppl[1];
|
||||
REQUIRE(probe);
|
||||
SECTION("basics") {
|
||||
RWMol cp(*probe);
|
||||
std::vector<float> matrix(12, 0.0);
|
||||
auto [nbr_st, nbr_ct] = AlignMolecule(*ref, cp, matrix);
|
||||
CHECK_THAT(nbr_st, Catch::Matchers::WithinAbs(0.837, 0.005));
|
||||
CHECK_THAT(nbr_ct, Catch::Matchers::WithinAbs(0.694, 0.005));
|
||||
for (auto i = 0u; i < cp.getNumAtoms(); ++i) {
|
||||
// the failure mode here was that Hs had HUGE coordinates
|
||||
auto pos = cp.getConformer().getAtomPos(i);
|
||||
CHECK((pos.x > -10 && pos.x < 10));
|
||||
}
|
||||
}
|
||||
}
|
||||
106
External/pubchem_shape/test_data/align_with_hs.sdf
vendored
Normal file
106
External/pubchem_shape/test_data/align_with_hs.sdf
vendored
Normal file
@@ -0,0 +1,106 @@
|
||||
ref
|
||||
RDKit 3D
|
||||
|
||||
0 0 0 0 0 0 0 0 0 0999 V3000
|
||||
M V30 BEGIN CTAB
|
||||
M V30 COUNTS 20 20 0 0 0
|
||||
M V30 BEGIN ATOM
|
||||
M V30 1 C -1.243631 -1.056161 0.012000 0
|
||||
M V30 2 C -2.484202 -0.539894 -0.306757 0
|
||||
M V30 3 C -2.794438 0.793168 -0.108146 0
|
||||
M V30 4 C -1.787925 1.596812 0.433091 0
|
||||
M V30 5 N -0.571003 1.079472 0.743255 0
|
||||
M V30 6 C -0.270575 -0.217935 0.549419 0
|
||||
M V30 7 C 1.052234 -0.806392 0.880565 0
|
||||
M V30 8 C 2.005463 -0.723537 -0.301576 0
|
||||
M V30 9 C 2.215991 0.705977 -0.696919 0
|
||||
M V30 10 H -1.040984 -2.099314 -0.157577 0
|
||||
M V30 11 H -3.278069 -1.167168 -0.732104 0
|
||||
M V30 12 H -3.767439 1.211397 -0.355050 0
|
||||
M V30 13 H -2.013491 2.654386 0.597951 0
|
||||
M V30 14 H 0.946668 -1.847615 1.199110 0
|
||||
M V30 15 H 1.527663 -0.268265 1.740520 0
|
||||
M V30 16 H 2.965394 -1.162120 0.010096 0
|
||||
M V30 17 H 1.579697 -1.299175 -1.152527 0
|
||||
M V30 18 H 1.276250 1.227295 -0.932445 0
|
||||
M V30 19 H 2.773214 1.193331 0.150198 0
|
||||
M V30 20 H 2.909182 0.725741 -1.573103 0
|
||||
M V30 END ATOM
|
||||
M V30 BEGIN BOND
|
||||
M V30 1 2 1 2
|
||||
M V30 2 1 2 3
|
||||
M V30 3 2 3 4
|
||||
M V30 4 1 4 5
|
||||
M V30 5 2 5 6
|
||||
M V30 6 1 6 7
|
||||
M V30 7 1 7 8
|
||||
M V30 8 1 8 9
|
||||
M V30 9 1 6 1
|
||||
M V30 10 1 1 10
|
||||
M V30 11 1 2 11
|
||||
M V30 12 1 3 12
|
||||
M V30 13 1 4 13
|
||||
M V30 14 1 7 14
|
||||
M V30 15 1 7 15
|
||||
M V30 16 1 8 16
|
||||
M V30 17 1 8 17
|
||||
M V30 18 1 9 18
|
||||
M V30 19 1 9 19
|
||||
M V30 20 1 9 20
|
||||
M V30 END BOND
|
||||
M V30 END CTAB
|
||||
M END
|
||||
$$$$
|
||||
prb
|
||||
RDKit 3D
|
||||
|
||||
0 0 0 0 0 0 0 0 0 0999 V3000
|
||||
M V30 BEGIN CTAB
|
||||
M V30 COUNTS 20 20 0 0 0
|
||||
M V30 BEGIN ATOM
|
||||
M V30 1 C -0.309632 -0.572843 -0.290631 0
|
||||
M V30 2 C -1.554814 -0.141033 0.148348 0
|
||||
M V30 3 C -2.633169 -1.122608 0.369224 0
|
||||
M V30 4 C -1.738823 1.207246 0.364168 0
|
||||
M V30 5 C -0.717149 2.120087 0.152894 0
|
||||
M V30 6 N 0.488305 1.679942 -0.274679 0
|
||||
M V30 7 C 0.699665 0.360415 -0.495647 0
|
||||
M V30 8 C 2.027105 -0.141403 -0.968505 0
|
||||
M V30 9 C 2.806988 -0.435730 0.319076 0
|
||||
M V30 10 H -0.121517 -1.623149 -0.472715 0
|
||||
M V30 11 H -2.575226 -1.932547 -0.406057 0
|
||||
M V30 12 H -3.623083 -0.655015 0.416635 0
|
||||
M V30 13 H -2.433528 -1.639935 1.352035 0
|
||||
M V30 14 H -2.708923 1.581110 0.710578 0
|
||||
M V30 15 H -0.886527 3.191467 0.331607 0
|
||||
M V30 16 H 1.899404 -1.033381 -1.581815 0
|
||||
M V30 17 H 2.568267 0.699915 -1.463488 0
|
||||
M V30 18 H 2.092539 -1.007350 0.945729 0
|
||||
M V30 19 H 3.060811 0.528814 0.798600 0
|
||||
M V30 20 H 3.659309 -1.064002 0.044643 0
|
||||
M V30 END ATOM
|
||||
M V30 BEGIN BOND
|
||||
M V30 1 2 1 2
|
||||
M V30 2 1 2 3
|
||||
M V30 3 1 2 4
|
||||
M V30 4 2 4 5
|
||||
M V30 5 1 5 6
|
||||
M V30 6 2 6 7
|
||||
M V30 7 1 7 8
|
||||
M V30 8 1 8 9
|
||||
M V30 9 1 7 1
|
||||
M V30 10 1 1 10
|
||||
M V30 11 1 3 11
|
||||
M V30 12 1 3 12
|
||||
M V30 13 1 3 13
|
||||
M V30 14 1 4 14
|
||||
M V30 15 1 5 15
|
||||
M V30 16 1 8 16
|
||||
M V30 17 1 8 17
|
||||
M V30 18 1 9 18
|
||||
M V30 19 1 9 19
|
||||
M V30 20 1 9 20
|
||||
M V30 END BOND
|
||||
M V30 END CTAB
|
||||
M END
|
||||
$$$$
|
||||
22190
External/pubchem_shape/test_data/bulk.pubchem.sdf
vendored
Normal file
22190
External/pubchem_shape/test_data/bulk.pubchem.sdf
vendored
Normal file
File diff suppressed because it is too large
Load Diff
91
External/pubchem_shape/test_data/test1.sdf
vendored
Normal file
91
External/pubchem_shape/test_data/test1.sdf
vendored
Normal file
@@ -0,0 +1,91 @@
|
||||
2244
|
||||
RDKit 3D
|
||||
|
||||
13 13 0 0 0 0 0 0 0 0999 V2000
|
||||
-17.2334 -5.4951 2.3053 O 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-14.0938 -3.8235 0.2824 O 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-15.9621 -3.2015 1.3992 O 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-17.3818 -6.7019 0.3049 O 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-15.9632 -5.9632 2.4646 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-14.8724 -5.2643 1.9474 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-15.7617 -7.1573 3.1570 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-13.5802 -5.7595 2.1226 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-14.4695 -7.6525 3.3323 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-13.3789 -6.9536 2.8151 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-15.0605 -4.0152 1.2226 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-17.8696 -5.9525 1.1405 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-19.2553 -5.3841 1.0571 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
1 5 1 0
|
||||
1 12 1 0
|
||||
2 11 1 0
|
||||
3 11 2 0
|
||||
4 12 2 0
|
||||
5 6 2 0
|
||||
5 7 1 0
|
||||
6 8 1 0
|
||||
6 11 1 0
|
||||
7 9 2 0
|
||||
8 10 2 0
|
||||
9 10 1 0
|
||||
12 13 1 0
|
||||
M END
|
||||
|
||||
> <PUBCHEM_PHARMACOPHORE_FEATURES>
|
||||
5
|
||||
1 2 acceptor
|
||||
1 3 acceptor
|
||||
1 4 acceptor
|
||||
3 2 3 11 anion
|
||||
6 5 6 7 8 9 10 rings
|
||||
|
||||
|
||||
$$$$
|
||||
166295140
|
||||
RDKit 3D
|
||||
|
||||
16 17 0 0 1 0 0 0 0 0999 V2000
|
||||
-2.8293 0.2150 1.4768 F 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-4.0626 0.7806 -0.2297 F 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0.1990 -2.7308 -0.6547 O 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
2.1655 0.9098 0.9439 O 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0.2386 -1.4812 1.2511 O 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-0.5537 0.6911 -0.1687 N 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
3.9396 -0.0288 -0.0987 N 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-0.8484 -0.6331 -0.7303 C 0 0 2 0 0 0 0 0 0 0 0 0
|
||||
-2.3589 -0.8260 -0.5838 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-2.8195 0.4211 0.1395 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-1.7941 1.4679 -0.2225 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0.5493 1.3657 -0.8388 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-0.0875 -1.6304 0.0829 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
1.8856 0.8393 -0.3872 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
2.9644 0.2690 -1.0161 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
3.4121 0.3708 1.0364 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
1 10 1 0
|
||||
2 10 1 0
|
||||
3 13 1 0
|
||||
4 14 1 0
|
||||
4 16 1 0
|
||||
5 13 2 0
|
||||
6 8 1 0
|
||||
6 11 1 0
|
||||
6 12 1 0
|
||||
7 15 1 0
|
||||
7 16 2 0
|
||||
8 9 1 0
|
||||
8 13 1 1
|
||||
9 10 1 0
|
||||
10 11 1 0
|
||||
12 14 1 0
|
||||
14 15 2 0
|
||||
M END
|
||||
|
||||
> <PUBCHEM_PHARMACOPHORE_FEATURES>
|
||||
6
|
||||
1 3 acceptor
|
||||
1 5 acceptor
|
||||
1 6 cation
|
||||
3 3 5 13 anion
|
||||
5 4 7 14 15 16 rings
|
||||
5 6 8 9 10 11 rings
|
||||
|
||||
$$$$
|
||||
Reference in New Issue
Block a user