mirror of
https://github.com/PDB-REDO/libcifpp.git
synced 2026-06-07 15:54:22 +08:00
Compare commits
24 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9db12761f7 | ||
|
|
0f8a7c4817 | ||
|
|
47e59a55c5 | ||
|
|
b3496f4e5d | ||
|
|
e866228afd | ||
|
|
4aeaa5251e | ||
|
|
b36988e64a | ||
|
|
393aefce8f | ||
|
|
227ff1b8be | ||
|
|
82086a93b0 | ||
|
|
abd97cc1c9 | ||
|
|
3315fae83e | ||
|
|
d8c3c3f7f0 | ||
|
|
23459879f8 | ||
|
|
f1ca916d58 | ||
|
|
6aae012ae5 | ||
|
|
516983427a | ||
|
|
05d78c92f9 | ||
|
|
dc57144472 | ||
|
|
dd260ca45e | ||
|
|
3bc2fc4151 | ||
|
|
6c58eaa7e8 | ||
|
|
e1a1c11a01 | ||
|
|
95a6b4264d |
331
CMakeLists.txt
331
CMakeLists.txt
@@ -11,21 +11,24 @@
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
cmake_minimum_required(VERSION 3.16)
|
||||
|
||||
# set the project name
|
||||
project(libcifpp VERSION 6.0.0 LANGUAGES CXX)
|
||||
project(
|
||||
libcifpp
|
||||
VERSION 6.1.0
|
||||
LANGUAGES CXX)
|
||||
|
||||
list(PREPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
|
||||
|
||||
@@ -50,7 +53,9 @@ if(CMAKE_GENERATOR STREQUAL "Ninja Multi-Config")
|
||||
endif()
|
||||
|
||||
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers")
|
||||
set(CMAKE_CXX_FLAGS
|
||||
"${CMAKE_CXX_FLAGS} -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers"
|
||||
)
|
||||
elseif(MSVC)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4")
|
||||
endif()
|
||||
@@ -71,17 +76,21 @@ if(BUILD_FOR_CCP4)
|
||||
unset(CIFPP_INSTALL_UPDATE_SCRIPT)
|
||||
else()
|
||||
# Lots of code depend on the availability of the components.cif file
|
||||
option(CIFPP_DOWNLOAD_CCD "Download the CCD file components.cif during installation" ON)
|
||||
option(CIFPP_DOWNLOAD_CCD
|
||||
"Download the CCD file components.cif during installation" ON)
|
||||
|
||||
# An optional cron script can be installed to keep the data files up-to-date
|
||||
if(UNIX AND NOT APPLE)
|
||||
option(CIFPP_INSTALL_UPDATE_SCRIPT "Install the script to update CCD and dictionary files" ON)
|
||||
option(CIFPP_INSTALL_UPDATE_SCRIPT
|
||||
"Install the script to update CCD and dictionary files" ON)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# When CCP4 is sourced in the environment, we can recreate the symmetry operations table
|
||||
# When CCP4 is sourced in the environment, we can recreate the symmetry
|
||||
# operations table
|
||||
if(EXISTS "$ENV{CCP4}/lib/data/syminfo.lib")
|
||||
option(CIFPP_RECREATE_SYMOP_DATA "Recreate SymOp data table in case it is out of date" ON)
|
||||
option(CIFPP_RECREATE_SYMOP_DATA
|
||||
"Recreate SymOp data table in case it is out of date" ON)
|
||||
endif()
|
||||
|
||||
# CCP4 build
|
||||
@@ -138,23 +147,30 @@ endif()
|
||||
# Libraries
|
||||
|
||||
# Start by finding out if std:regex is usable. Note that the current
|
||||
# implementation in GCC is not acceptable, it crashes on long lines.
|
||||
# The implementation in libc++ (clang) and MSVC seem to be OK.
|
||||
check_cxx_source_compiles("
|
||||
# implementation in GCC is not acceptable, it crashes on long lines. The
|
||||
# implementation in libc++ (clang) and MSVC seem to be OK.
|
||||
check_cxx_source_compiles(
|
||||
"
|
||||
#include <iostream>
|
||||
#ifndef __GLIBCXX__
|
||||
#error
|
||||
#endif
|
||||
int main(int argc, char *argv[]) { return 0; }" GXX_LIBSTDCPP)
|
||||
int main(int argc, char *argv[]) { return 0; }"
|
||||
GXX_LIBSTDCPP)
|
||||
|
||||
if(GXX_LIBSTDCPP)
|
||||
message(STATUS "Testing for known regex bug, since you're using GNU libstdc++")
|
||||
message(
|
||||
STATUS "Testing for known regex bug, since you're using GNU libstdc++")
|
||||
|
||||
try_run(STD_REGEX_RUNNING STD_REGEX_COMPILING
|
||||
${CMAKE_CURRENT_BINARY_DIR}/test ${PROJECT_SOURCE_DIR}/cmake/test-rx.cpp)
|
||||
${CMAKE_CURRENT_BINARY_DIR}/test
|
||||
${PROJECT_SOURCE_DIR}/cmake/test-rx.cpp)
|
||||
|
||||
if(STD_REGEX_RUNNING STREQUAL FAILED_TO_RUN)
|
||||
message(STATUS "You are probably trying to compile using the g++ standard library which contains a crashing std::regex implementation. Will use boost::regex instead")
|
||||
message(
|
||||
STATUS
|
||||
"You are probably trying to compile using the g++ standard library which contains a crashing std::regex implementation. Will use boost::regex instead"
|
||||
)
|
||||
|
||||
find_package(Boost 1.80 QUIET COMPONENTS regex)
|
||||
|
||||
@@ -164,8 +180,7 @@ if(GXX_LIBSTDCPP)
|
||||
FetchContent_Declare(
|
||||
boost-rx
|
||||
GIT_REPOSITORY https://github.com/boostorg/regex
|
||||
GIT_TAG boost-1.83.0
|
||||
)
|
||||
GIT_TAG boost-1.83.0)
|
||||
|
||||
FetchContent_MakeAvailable(boost-rx)
|
||||
endif()
|
||||
@@ -179,8 +194,8 @@ set(THREADS_PREFER_PTHREAD_FLAG)
|
||||
find_package(Threads)
|
||||
|
||||
if(MSVC)
|
||||
# Avoid linking the shared library of zlib
|
||||
# Search ZLIB_ROOT first if it is set.
|
||||
# Avoid linking the shared library of zlib Search ZLIB_ROOT first if it is
|
||||
# set.
|
||||
if(ZLIB_ROOT)
|
||||
set(_ZLIB_SEARCH_ROOT PATHS ${ZLIB_ROOT} NO_DEFAULT_PATH)
|
||||
list(APPEND _ZLIB_SEARCHES _ZLIB_SEARCH_ROOT)
|
||||
@@ -190,8 +205,7 @@ if(MSVC)
|
||||
set(_ZLIB_x86 "(x86)")
|
||||
set(_ZLIB_SEARCH_NORMAL
|
||||
PATHS "[HKEY_LOCAL_MACHINE\\SOFTWARE\\GnuWin32\\Zlib;InstallPath]"
|
||||
"$ENV{ProgramFiles}/zlib"
|
||||
"$ENV{ProgramFiles${_ZLIB_x86}}/zlib")
|
||||
"$ENV{ProgramFiles}/zlib" "$ENV{ProgramFiles${_ZLIB_x86}}/zlib")
|
||||
unset(_ZLIB_x86)
|
||||
list(APPEND _ZLIB_SEARCHES _ZLIB_SEARCH_NORMAL)
|
||||
|
||||
@@ -200,26 +214,29 @@ if(MSVC)
|
||||
endif()
|
||||
|
||||
foreach(search ${_ZLIB_SEARCHES})
|
||||
find_library(ZLIB_LIBRARY NAMES zlibstatic NAMES_PER_DIR ${${search}} PATH_SUFFIXES lib)
|
||||
find_library(
|
||||
ZLIB_LIBRARY
|
||||
NAMES zlibstatic NAMES_PER_DIR ${${search}}
|
||||
PATH_SUFFIXES lib)
|
||||
endforeach()
|
||||
endif()
|
||||
|
||||
find_package(ZLIB REQUIRED)
|
||||
|
||||
# Using Eigen3 is a bit of a thing. We don't want to build it completely since we
|
||||
# only need a couple of header files. Nothing special. But often, eigen3 is already
|
||||
# installed and then we prefer that.
|
||||
# Using Eigen3 is a bit of a thing. We don't want to build it completely since
|
||||
# we only need a couple of header files. Nothing special. But often, eigen3 is
|
||||
# already installed and then we prefer that.
|
||||
find_package(Eigen3 3.4 QUIET)
|
||||
|
||||
if(Eigen3_FOUND AND TARGET Eigen3::Eigen)
|
||||
get_target_property(EIGEN_INCLUDE_DIR Eigen3::Eigen INTERFACE_INCLUDE_DIRECTORIES)
|
||||
get_target_property(EIGEN_INCLUDE_DIR Eigen3::Eigen
|
||||
INTERFACE_INCLUDE_DIRECTORIES)
|
||||
else()
|
||||
# Create a private copy of eigen3 and populate it only, no need to build
|
||||
FetchContent_Declare(
|
||||
my-eigen3
|
||||
GIT_REPOSITORY https://gitlab.com/libeigen/eigen.git
|
||||
GIT_TAG 3.4.0
|
||||
)
|
||||
GIT_TAG 3.4.0)
|
||||
|
||||
FetchContent_GetProperties(my-eigen3)
|
||||
|
||||
@@ -243,17 +260,20 @@ write_version_header(${PROJECT_SOURCE_DIR}/src/ LIB_NAME "LibCIFPP")
|
||||
# SymOp data table
|
||||
if(CIFPP_RECREATE_SYMOP_DATA)
|
||||
# The tool to create the table
|
||||
add_executable(symop-map-generator "${PROJECT_SOURCE_DIR}/src/symop-map-generator.cpp")
|
||||
add_executable(symop-map-generator
|
||||
"${PROJECT_SOURCE_DIR}/src/symop-map-generator.cpp")
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT ${PROJECT_SOURCE_DIR}/src/symop_table_data.hpp
|
||||
COMMAND $<TARGET_FILE:symop-map-generator> $ENV{CLIBD}/syminfo.lib $ENV{CLIBD}/symop.lib ${PROJECT_SOURCE_DIR}/src/symop_table_data.hpp
|
||||
)
|
||||
COMMAND
|
||||
$<TARGET_FILE:symop-map-generator> $ENV{CLIBD}/syminfo.lib
|
||||
$ENV{CLIBD}/symop.lib ${PROJECT_SOURCE_DIR}/src/symop_table_data.hpp)
|
||||
|
||||
add_custom_target(
|
||||
OUTPUT ${PROJECT_SOURCE_DIR}/src/symop_table_data.hpp
|
||||
DEPENDS symop-map-generator "$ENV{CLIBD}/syminfo.lib" "$ENV{CLIBD}/symop.lib"
|
||||
)
|
||||
OUTPUT
|
||||
${PROJECT_SOURCE_DIR}/src/symop_table_data.hpp
|
||||
DEPENDS symop-map-generator "$ENV{CLIBD}/syminfo.lib"
|
||||
"$ENV{CLIBD}/symop.lib")
|
||||
endif()
|
||||
|
||||
# Sources
|
||||
@@ -269,19 +289,18 @@ set(project_sources
|
||||
${PROJECT_SOURCE_DIR}/src/validate.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/text.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/utilities.cpp
|
||||
|
||||
${PROJECT_SOURCE_DIR}/src/atom_type.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/compound.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/point.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/symmetry.cpp
|
||||
|
||||
${PROJECT_SOURCE_DIR}/src/model.cpp
|
||||
|
||||
${PROJECT_SOURCE_DIR}/src/pdb/cif2pdb.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/pdb/pdb2cif.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/pdb/pdb_record.hpp
|
||||
${PROJECT_SOURCE_DIR}/src/pdb/pdb2cif_remark_3.hpp
|
||||
${PROJECT_SOURCE_DIR}/src/pdb/pdb2cif_remark_3.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/pdb/reconstruct.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/pdb/validate-pdbx.cpp
|
||||
)
|
||||
|
||||
set(project_headers
|
||||
@@ -298,33 +317,32 @@ set(project_headers
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/condition.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/category.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/row.hpp
|
||||
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/atom_type.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/compound.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/point.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/symmetry.hpp
|
||||
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/model.hpp
|
||||
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/pdb.hpp
|
||||
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/pdb/cif2pdb.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/pdb/io.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/pdb/pdb2cif.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/pdb/tls.hpp
|
||||
)
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/pdb/tls.hpp)
|
||||
|
||||
add_library(cifpp ${project_sources} ${project_headers} ${PROJECT_SOURCE_DIR}/src/symop_table_data.hpp)
|
||||
add_library(cifpp ${project_sources} ${project_headers}
|
||||
${PROJECT_SOURCE_DIR}/src/symop_table_data.hpp)
|
||||
add_library(cifpp::cifpp ALIAS cifpp)
|
||||
|
||||
set(CMAKE_DEBUG_POSTFIX d)
|
||||
set_target_properties(cifpp PROPERTIES DEBUG_POSTFIX "d")
|
||||
|
||||
generate_export_header(cifpp EXPORT_FILE_NAME ${PROJECT_SOURCE_DIR}/include/cif++/exports.hpp)
|
||||
generate_export_header(cifpp EXPORT_FILE_NAME
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/exports.hpp)
|
||||
|
||||
if(BOOST_REGEX)
|
||||
target_compile_definitions(cifpp PRIVATE USE_BOOST_REGEX=1 BOOST_REGEX_STANDALONE=1)
|
||||
get_target_property(BOOST_REGEX_INCLUDE_DIR Boost::regex INTERFACE_INCLUDE_DIRECTORIES)
|
||||
target_compile_definitions(cifpp PRIVATE USE_BOOST_REGEX=1
|
||||
BOOST_REGEX_STANDALONE=1)
|
||||
get_target_property(BOOST_REGEX_INCLUDE_DIR Boost::regex
|
||||
INTERFACE_INCLUDE_DIRECTORIES)
|
||||
endif()
|
||||
|
||||
if(MSVC)
|
||||
@@ -333,17 +351,14 @@ endif()
|
||||
|
||||
set_target_properties(cifpp PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
target_include_directories(cifpp
|
||||
PUBLIC
|
||||
"$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>"
|
||||
target_include_directories(
|
||||
cifpp
|
||||
PUBLIC "$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>"
|
||||
"$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>"
|
||||
PRIVATE
|
||||
"${BOOST_REGEX_INCLUDE_DIR}"
|
||||
"${EIGEN_INCLUDE_DIR}"
|
||||
)
|
||||
PRIVATE "${BOOST_REGEX_INCLUDE_DIR}" "${EIGEN_INCLUDE_DIR}")
|
||||
|
||||
target_link_libraries(cifpp
|
||||
PUBLIC Threads::Threads ZLIB::ZLIB ${CIFPP_REQUIRED_LIBRARIES})
|
||||
target_link_libraries(cifpp PUBLIC Threads::Threads ZLIB::ZLIB
|
||||
${CIFPP_REQUIRED_LIBRARIES})
|
||||
|
||||
if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
|
||||
target_link_options(cifpp PRIVATE -undefined dynamic_lookup)
|
||||
@@ -363,21 +378,28 @@ if(CIFPP_DOWNLOAD_CCD)
|
||||
endif()
|
||||
|
||||
if(NOT EXISTS ${COMPONENTS_CIF})
|
||||
# Since the file(DOWNLOAD) command in cmake does not use
|
||||
# compression, we try to download the gzipped version and
|
||||
# decompress it ourselves.
|
||||
# Since the file(DOWNLOAD) command in cmake does not use compression, we try
|
||||
# to download the gzipped version and decompress it ourselves.
|
||||
find_program(GUNZIP gunzip)
|
||||
|
||||
if(WIN32 OR GUNZIP STREQUAL "GUNZIP-NOTFOUND")
|
||||
file(DOWNLOAD https://files.wwpdb.org/pub/pdb/data/monomers/components.cif ${COMPONENTS_CIF}
|
||||
SHOW_PROGRESS STATUS CCD_FETCH_STATUS)
|
||||
file(
|
||||
DOWNLOAD https://files.wwpdb.org/pub/pdb/data/monomers/components.cif
|
||||
${COMPONENTS_CIF}
|
||||
SHOW_PROGRESS
|
||||
STATUS CCD_FETCH_STATUS)
|
||||
else()
|
||||
if(NOT EXISTS "${COMPONENTS_CIF}.gz")
|
||||
file(DOWNLOAD https://files.wwpdb.org/pub/pdb/data/monomers/components.cif.gz ${COMPONENTS_CIF}.gz
|
||||
SHOW_PROGRESS STATUS CCD_FETCH_STATUS)
|
||||
file(
|
||||
DOWNLOAD
|
||||
https://files.wwpdb.org/pub/pdb/data/monomers/components.cif.gz
|
||||
${COMPONENTS_CIF}.gz
|
||||
SHOW_PROGRESS
|
||||
STATUS CCD_FETCH_STATUS)
|
||||
endif()
|
||||
|
||||
add_custom_command(OUTPUT ${COMPONENTS_CIF}
|
||||
add_custom_command(
|
||||
OUTPUT ${COMPONENTS_CIF}
|
||||
COMMAND "${GUNZIP}" ${COMPONENTS_CIF}.gz
|
||||
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/rsrc/)
|
||||
|
||||
@@ -388,41 +410,56 @@ if(CIFPP_DOWNLOAD_CCD)
|
||||
list(POP_FRONT CCD_FETCH_STATUS CCD_FETCH_STATUS_CODE)
|
||||
|
||||
if(CCD_FETCH_STATUS_CODE)
|
||||
message(FATAL_ERROR "Error trying to download CCD file: ${CCD_FETCH_STATUS}")
|
||||
message(
|
||||
FATAL_ERROR "Error trying to download CCD file: ${CCD_FETCH_STATUS}")
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Installation directories
|
||||
if(BUILD_FOR_CCP4)
|
||||
set(CIFPP_DATA_DIR "$ENV{CCP4}/share/libcifpp" CACHE PATH "Directory where dictionary and other static data is stored")
|
||||
set(CIFPP_DATA_DIR
|
||||
"$ENV{CCP4}/share/libcifpp"
|
||||
CACHE PATH "Directory where dictionary and other static data is stored")
|
||||
else()
|
||||
set(CIFPP_DATA_DIR "${CMAKE_INSTALL_FULL_DATADIR}/libcifpp" CACHE PATH "Directory where dictionary and other static data is stored")
|
||||
set(CIFPP_DATA_DIR
|
||||
"${CMAKE_INSTALL_FULL_DATADIR}/libcifpp"
|
||||
CACHE PATH "Directory where dictionary and other static data is stored")
|
||||
endif()
|
||||
|
||||
target_compile_definitions(cifpp PUBLIC DATA_DIR="${CIFPP_DATA_DIR}")
|
||||
if(CIFPP_DATA_DIR)
|
||||
target_compile_definitions(cifpp PUBLIC DATA_DIR="${CIFPP_DATA_DIR}")
|
||||
endif()
|
||||
|
||||
if(UNIX AND NOT BUILD_FOR_CCP4)
|
||||
if("${CMAKE_INSTALL_PREFIX}" STREQUAL "/usr/local")
|
||||
set(CIFPP_CACHE_DIR "/var/cache/libcifpp" CACHE PATH "The directory where downloaded data files are stored")
|
||||
set(CIFPP_CACHE_DIR
|
||||
"/var/cache/libcifpp"
|
||||
CACHE PATH "The directory where downloaded data files are stored")
|
||||
else()
|
||||
set(CIFPP_CACHE_DIR "${CMAKE_INSTALL_FULL_LOCALSTATEDIR}/cache/libcifpp" CACHE PATH "The directory where downloaded data files are stored")
|
||||
set(CIFPP_CACHE_DIR
|
||||
"${CMAKE_INSTALL_FULL_LOCALSTATEDIR}/cache/libcifpp"
|
||||
CACHE PATH "The directory where downloaded data files are stored")
|
||||
endif()
|
||||
|
||||
target_compile_definitions(cifpp PUBLIC CACHE_DIR="${CIFPP_CACHE_DIR}")
|
||||
|
||||
set(CIFPP_ETC_DIR "${CMAKE_INSTALL_FULL_SYSCONFDIR}" CACHE PATH "The directory where the update configuration file is stored")
|
||||
set(CIFPP_ETC_DIR
|
||||
"${CMAKE_INSTALL_FULL_SYSCONFDIR}"
|
||||
CACHE PATH "The directory where the update configuration file is stored")
|
||||
else()
|
||||
unset(CIFPP_CACHE_DIR)
|
||||
endif()
|
||||
|
||||
# Install rules
|
||||
install(TARGETS cifpp
|
||||
install(
|
||||
TARGETS cifpp
|
||||
EXPORT cifpp-targets
|
||||
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
|
||||
INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
|
||||
INCLUDES
|
||||
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
|
||||
|
||||
if(MSVC AND BUILD_SHARED_LIBS)
|
||||
install(
|
||||
@@ -437,88 +474,89 @@ file(GLOB OLD_CONFIG_FILES
|
||||
${CMAKE_INSTALL_FULL_LIBDIR}/cmake/cifpp/cifppTargets*.cmake)
|
||||
|
||||
if(OLD_CONFIG_FILES)
|
||||
message(STATUS "Installation will remove old config files: ${OLD_CONFIG_FILES}")
|
||||
message(
|
||||
STATUS "Installation will remove old config files: ${OLD_CONFIG_FILES}")
|
||||
install(CODE "file(REMOVE ${OLD_CONFIG_FILES})")
|
||||
endif()
|
||||
|
||||
install(EXPORT cifpp-targets
|
||||
install(
|
||||
EXPORT cifpp-targets
|
||||
FILE "cifpp-targets.cmake"
|
||||
NAMESPACE cifpp::
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cifpp
|
||||
)
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cifpp)
|
||||
|
||||
install(
|
||||
DIRECTORY include/cif++
|
||||
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
|
||||
COMPONENT Devel
|
||||
)
|
||||
COMPONENT Devel)
|
||||
|
||||
install(
|
||||
FILES include/cif++.hpp
|
||||
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
|
||||
COMPONENT Devel
|
||||
)
|
||||
COMPONENT Devel)
|
||||
|
||||
install(FILES
|
||||
${PROJECT_SOURCE_DIR}/rsrc/mmcif_ddl.dic
|
||||
${PROJECT_SOURCE_DIR}/rsrc/mmcif_pdbx.dic
|
||||
${PROJECT_SOURCE_DIR}/rsrc/mmcif_ma.dic
|
||||
${COMPONENTS_CIF}
|
||||
DESTINATION ${CIFPP_DATA_DIR}
|
||||
)
|
||||
if(CIFPP_DATA_DIR)
|
||||
install(
|
||||
FILES ${PROJECT_SOURCE_DIR}/rsrc/mmcif_ddl.dic
|
||||
${PROJECT_SOURCE_DIR}/rsrc/mmcif_pdbx.dic
|
||||
${PROJECT_SOURCE_DIR}/rsrc/mmcif_ma.dic ${COMPONENTS_CIF}
|
||||
DESTINATION ${CIFPP_DATA_DIR})
|
||||
endif()
|
||||
|
||||
if(CIFPP_CACHE_DIR)
|
||||
install(FILES
|
||||
${PROJECT_SOURCE_DIR}/rsrc/mmcif_ddl.dic
|
||||
install(
|
||||
FILES ${PROJECT_SOURCE_DIR}/rsrc/mmcif_ddl.dic
|
||||
${PROJECT_SOURCE_DIR}/rsrc/mmcif_pdbx.dic
|
||||
${PROJECT_SOURCE_DIR}/rsrc/mmcif_ma.dic
|
||||
${COMPONENTS_CIF}
|
||||
DESTINATION ${CIFPP_CACHE_DIR}
|
||||
)
|
||||
${PROJECT_SOURCE_DIR}/rsrc/mmcif_ma.dic ${COMPONENTS_CIF}
|
||||
DESTINATION ${CIFPP_CACHE_DIR})
|
||||
endif()
|
||||
|
||||
set(CONFIG_TEMPLATE_FILE ${PROJECT_SOURCE_DIR}/cmake/cifpp-config.cmake.in)
|
||||
|
||||
configure_package_config_file(
|
||||
${CONFIG_TEMPLATE_FILE}
|
||||
${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifpp-config.cmake
|
||||
${CONFIG_TEMPLATE_FILE} ${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifpp-config.cmake
|
||||
INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cifpp
|
||||
PATH_VARS CIFPP_DATA_DIR
|
||||
)
|
||||
PATH_VARS CIFPP_DATA_DIR)
|
||||
|
||||
install(FILES
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifpp-config.cmake"
|
||||
install(
|
||||
FILES "${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifpp-config.cmake"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifpp-config-version.cmake"
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cifpp
|
||||
COMPONENT Devel
|
||||
)
|
||||
COMPONENT Devel)
|
||||
|
||||
set_target_properties(cifpp PROPERTIES
|
||||
VERSION ${PROJECT_VERSION}
|
||||
set_target_properties(
|
||||
cifpp
|
||||
PROPERTIES VERSION ${PROJECT_VERSION}
|
||||
SOVERSION "${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}"
|
||||
INTERFACE_cifpp_MAJOR_VERSION ${PROJECT_VERSION_MAJOR})
|
||||
|
||||
set_property(TARGET cifpp APPEND PROPERTY
|
||||
COMPATIBLE_INTERFACE_STRING cifpp_MAJOR_VERSION
|
||||
)
|
||||
set_property(
|
||||
TARGET cifpp
|
||||
APPEND
|
||||
PROPERTY COMPATIBLE_INTERFACE_STRING cifpp_MAJOR_VERSION)
|
||||
|
||||
write_basic_package_version_file(
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifpp-config-version.cmake"
|
||||
VERSION ${PROJECT_VERSION}
|
||||
COMPATIBILITY AnyNewerVersion
|
||||
)
|
||||
COMPATIBILITY AnyNewerVersion)
|
||||
|
||||
# In case we're included as sub_directory:
|
||||
if(NOT PROJECT_IS_TOP_LEVEL)
|
||||
set(CIFPP_SHARE_DIR ${PROJECT_SOURCE_DIR}/rsrc PARENT_SCOPE)
|
||||
endif()
|
||||
|
||||
if(BUILD_TESTING)
|
||||
# We're using the older version 2 of Catch2
|
||||
FetchContent_Declare(
|
||||
Catch2
|
||||
GIT_REPOSITORY https://github.com/catchorg/Catch2.git
|
||||
GIT_TAG v2.13.9
|
||||
)
|
||||
GIT_TAG v2.13.9)
|
||||
|
||||
FetchContent_MakeAvailable(Catch2)
|
||||
|
||||
list(APPEND CIFPP_tests
|
||||
list(
|
||||
APPEND
|
||||
CIFPP_tests
|
||||
unit-v2
|
||||
unit-3d
|
||||
format
|
||||
@@ -526,15 +564,18 @@ if(BUILD_TESTING)
|
||||
rename-compound
|
||||
sugar
|
||||
spinner
|
||||
)
|
||||
validate-pdbx)
|
||||
|
||||
foreach(CIFPP_TEST IN LISTS CIFPP_tests)
|
||||
set(CIFPP_TEST "${CIFPP_TEST}-test")
|
||||
set(CIFPP_TEST_SOURCE "${CMAKE_CURRENT_SOURCE_DIR}/test/${CIFPP_TEST}.cpp")
|
||||
|
||||
add_executable(${CIFPP_TEST} ${CIFPP_TEST_SOURCE} "${CMAKE_CURRENT_SOURCE_DIR}/test/test-main.cpp")
|
||||
add_executable(
|
||||
${CIFPP_TEST} ${CIFPP_TEST_SOURCE}
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/test/test-main.cpp")
|
||||
|
||||
target_link_libraries(${CIFPP_TEST} PRIVATE Threads::Threads cifpp::cifpp Catch2::Catch2)
|
||||
target_link_libraries(${CIFPP_TEST} PRIVATE Threads::Threads cifpp::cifpp
|
||||
Catch2::Catch2)
|
||||
target_include_directories(${CIFPP_TEST} PRIVATE "${EIGEN_INCLUDE_DIR}")
|
||||
|
||||
if(MSVC)
|
||||
@@ -542,47 +583,63 @@ if(BUILD_TESTING)
|
||||
target_compile_options(${CIFPP_TEST} PRIVATE /EHsc)
|
||||
endif()
|
||||
|
||||
add_custom_target("run-${CIFPP_TEST}" DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/Run${CIFPP_TEST}.touch ${CIFPP_TEST})
|
||||
add_custom_target(
|
||||
"run-${CIFPP_TEST}"
|
||||
DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/Run${CIFPP_TEST}.touch ${CIFPP_TEST})
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/Run${CIFPP_TEST}.touch
|
||||
COMMAND $<TARGET_FILE:${CIFPP_TEST}> --data-dir ${CMAKE_CURRENT_SOURCE_DIR}/test)
|
||||
COMMAND $<TARGET_FILE:${CIFPP_TEST}> --data-dir
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/test)
|
||||
|
||||
add_test(NAME ${CIFPP_TEST}
|
||||
COMMAND $<TARGET_FILE:${CIFPP_TEST}> --data-dir ${CMAKE_CURRENT_SOURCE_DIR}/test)
|
||||
add_test(NAME ${CIFPP_TEST} COMMAND $<TARGET_FILE:${CIFPP_TEST}> --data-dir
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/test)
|
||||
endforeach()
|
||||
endif()
|
||||
|
||||
# Optionally install the update scripts for CCD and dictionary files
|
||||
if(CIFPP_INSTALL_UPDATE_SCRIPT)
|
||||
if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux" OR ${CMAKE_SYSTEM_NAME} STREQUAL "GNU")
|
||||
if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux" OR ${CMAKE_SYSTEM_NAME} STREQUAL
|
||||
"GNU")
|
||||
if("${CMAKE_INSTALL_PREFIX}" STREQUAL "/usr/local")
|
||||
set(CIFPP_CRON_DIR "/etc/cron.weekly" CACHE PATH "The cron directory, for the update script")
|
||||
set(CIFPP_CRON_DIR
|
||||
"/etc/cron.weekly"
|
||||
CACHE PATH "The cron directory, for the update script")
|
||||
else()
|
||||
set(CIFPP_CRON_DIR "${CIFPP_ETC_DIR}/cron.weekly" CACHE PATH "The cron directory, for the update script")
|
||||
set(CIFPP_CRON_DIR
|
||||
"${CIFPP_ETC_DIR}/cron.weekly"
|
||||
CACHE PATH "The cron directory, for the update script")
|
||||
endif()
|
||||
elseif(${CMAKE_SYSTEM_NAME} STREQUAL "FreeBSD")
|
||||
set(CIFPP_CRON_DIR "${CIFPP_ETC_DIR}/periodic/weekly" CACHE PATH "The cron directory, for the update script")
|
||||
set(CIFPP_CRON_DIR
|
||||
"${CIFPP_ETC_DIR}/periodic/weekly"
|
||||
CACHE PATH "The cron directory, for the update script")
|
||||
else()
|
||||
message(FATAL_ERROR "Don't know where to install the update script")
|
||||
endif()
|
||||
|
||||
configure_file(${PROJECT_SOURCE_DIR}/tools/update-libcifpp-data.in update-libcifpp-data @ONLY)
|
||||
configure_file(${PROJECT_SOURCE_DIR}/tools/update-libcifpp-data.in
|
||||
update-libcifpp-data @ONLY)
|
||||
install(
|
||||
FILES ${CMAKE_CURRENT_BINARY_DIR}/update-libcifpp-data
|
||||
DESTINATION ${CIFPP_CRON_DIR}
|
||||
PERMISSIONS OWNER_EXECUTE OWNER_READ GROUP_EXECUTE GROUP_READ WORLD_EXECUTE WORLD_READ
|
||||
)
|
||||
PERMISSIONS OWNER_EXECUTE OWNER_READ GROUP_EXECUTE GROUP_READ WORLD_EXECUTE
|
||||
WORLD_READ)
|
||||
|
||||
install(DIRECTORY DESTINATION ${CIFPP_CACHE_DIR})
|
||||
|
||||
# a config file, to make it complete
|
||||
if(NOT EXISTS "${CIFPP_ETC_DIR}/libcifpp.conf")
|
||||
file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/libcifpp.conf [[# Uncomment the next line to enable automatic updates
|
||||
file(
|
||||
WRITE ${CMAKE_CURRENT_BINARY_DIR}/libcifpp.conf
|
||||
[[# Uncomment the next line to enable automatic updates
|
||||
# update=true
|
||||
]])
|
||||
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libcifpp.conf DESTINATION "${CIFPP_ETC_DIR}")
|
||||
install(CODE "message(\"A configuration file has been written to ${CIFPP_ETC_DIR}/libcifpp.conf, please edit this file to enable automatic updates\")")
|
||||
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libcifpp.conf
|
||||
DESTINATION "${CIFPP_ETC_DIR}")
|
||||
install(
|
||||
CODE "message(\"A configuration file has been written to ${CIFPP_ETC_DIR}/libcifpp.conf, please edit this file to enable automatic updates\")"
|
||||
)
|
||||
|
||||
install(DIRECTORY DESTINATION "${CIFPP_ETC_DIR}/libcifpp/cache-update.d")
|
||||
endif()
|
||||
|
||||
@@ -1,3 +1,11 @@
|
||||
Version 6.1.0
|
||||
- Add formula weight to entity in pdb2cif
|
||||
- Change order of categories inside a datablock to match order in file
|
||||
- Change default order to write out categories in a file based on
|
||||
parent/child relationship
|
||||
- Added validate_pdbx and recover_pdbx
|
||||
- Fixed a serious bug in category_index when moving categories
|
||||
|
||||
Version 6.0.0
|
||||
- Drop the use of CCP4's monomer library for compound information
|
||||
|
||||
|
||||
@@ -18,7 +18,7 @@ Loading Resources
|
||||
|
||||
No matter where the resource is located, you should always use the single libcifpp API call :cpp:func:`cif::load_resource` to load them. This function returns a *std::istream* wrapped inside a *std::unique_ptr*.
|
||||
|
||||
The order in which resources are search for is:
|
||||
The order in which resources are searched for is:
|
||||
|
||||
* Use the resource that was defined by calling :cpp:func:`cif::add_file_resource`
|
||||
for this name.
|
||||
|
||||
@@ -166,17 +166,22 @@ class compound
|
||||
return m_id == "HOH" or m_id == "H2O" or m_id == "WAT";
|
||||
}
|
||||
|
||||
char one_letter_code() const { return m_one_letter_code; }; ///< Return the one letter code to use in a canonical sequence. If unknown the value '\0' is returned
|
||||
std::string parent_id() const { return m_parent_id; }; ///< Return the parent id code in case a parent is specified (e.g. MET for MSE)
|
||||
|
||||
private:
|
||||
friend class compound_factory_impl;
|
||||
friend class local_compound_factory_impl;
|
||||
|
||||
compound(cif::datablock &db);
|
||||
compound(cif::datablock &db, const std::string &id, const std::string &name, const std::string &type, const std::string &group);
|
||||
compound(cif::datablock &db, int);
|
||||
|
||||
std::string m_id;
|
||||
std::string m_name;
|
||||
std::string m_type;
|
||||
std::string m_group;
|
||||
std::string m_formula;
|
||||
char m_one_letter_code = 0;
|
||||
std::string m_parent_id;
|
||||
float m_formula_weight = 0;
|
||||
int m_formal_charge = 0;
|
||||
std::vector<compound_atom> m_atoms;
|
||||
@@ -214,6 +219,20 @@ class compound_factory
|
||||
/// Override any previously loaded dictionary with @a inDictFile
|
||||
void push_dictionary(const std::filesystem::path &inDictFile);
|
||||
|
||||
/** @brief Override any previously loaded dictionary with the data in @a file
|
||||
*
|
||||
* @note experimental feature
|
||||
*
|
||||
* Load the file @a file as a source for compound information. This may
|
||||
* be e.g. a regular mmCIF file with extra files containing compound
|
||||
* information.
|
||||
*
|
||||
* Be carefull to remove the block again, best use @ref cif::compound_source
|
||||
* as a stack based object.
|
||||
*/
|
||||
|
||||
void push_dictionary(const file &file);
|
||||
|
||||
/// Remove the last pushed dictionary
|
||||
void pop_dictionary();
|
||||
|
||||
@@ -251,4 +270,35 @@ class compound_factory
|
||||
std::shared_ptr<compound_factory_impl> m_impl;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* @brief Stack based source for compound info.
|
||||
*
|
||||
* Use this class to temporarily add a compound source to the
|
||||
* compound_factory.
|
||||
*
|
||||
* @code{.cpp}
|
||||
* cif::file f("1cbs-with-custom-rea.cif");
|
||||
* cif::compound_source cs(f);
|
||||
*
|
||||
* auto &cf = cif::compound_factory::instance();
|
||||
* auto rea_compound = cf.create("REA");
|
||||
* @endcode
|
||||
*/
|
||||
|
||||
class compound_source
|
||||
{
|
||||
public:
|
||||
compound_source(const cif::file &file)
|
||||
{
|
||||
cif::compound_factory::instance().push_dictionary(file);
|
||||
}
|
||||
|
||||
~compound_source()
|
||||
{
|
||||
cif::compound_factory::instance().pop_dictionary();
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace cif
|
||||
|
||||
@@ -1115,11 +1115,4 @@ class structure
|
||||
std::vector<residue> m_non_polymers;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
/// \brief Reconstruct all missing categories for an assumed PDBx file.
|
||||
/// Some people believe that simply dumping some atom records is enough.
|
||||
/// \param db The cif::datablock that hopefully contains some valid data
|
||||
void reconstruct_pdbx(datablock &db);
|
||||
|
||||
} // namespace cif::mm
|
||||
|
||||
@@ -1,17 +1,17 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
*
|
||||
* Copyright (c) 2023 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
@@ -30,13 +30,13 @@
|
||||
|
||||
/**
|
||||
* @file pdb.hpp
|
||||
*
|
||||
*
|
||||
* This file presents the API to read and write files in the
|
||||
* legacy and ancient PDB format.
|
||||
*
|
||||
*
|
||||
* The code works on the basis of best effort since it is
|
||||
* impossible to have correct round trip fidelity.
|
||||
*
|
||||
*
|
||||
*/
|
||||
|
||||
namespace cif::pdb
|
||||
@@ -81,7 +81,7 @@ inline void write(std::ostream &os, const file &f)
|
||||
/** @brief Write out the data in @a db to file @a file
|
||||
* in legacy PDB format or mmCIF format, depending on the
|
||||
* filename extension.
|
||||
*
|
||||
*
|
||||
* If extension of @a file is *.gz* the resulting file will
|
||||
* be written in gzip compressed format.
|
||||
*/
|
||||
@@ -90,7 +90,7 @@ void write(const std::filesystem::path &file, const datablock &db);
|
||||
/** @brief Write out the data in @a f to file @a file
|
||||
* in legacy PDB format or mmCIF format, depending on the
|
||||
* filename extension.
|
||||
*
|
||||
*
|
||||
* If extension of @a file is *.gz* the resulting file will
|
||||
* be written in gzip compressed format.
|
||||
*/
|
||||
@@ -99,6 +99,34 @@ inline void write(const std::filesystem::path &p, const file &f)
|
||||
write(p, f.front());
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
/** \brief Reconstruct all missing categories for an assumed PDBx file.
|
||||
*
|
||||
* Some people believe that simply dumping some atom records is enough.
|
||||
*
|
||||
* \param file The cif::file that hopefully contains some valid data
|
||||
* \param dictionary The mmcif dictionary to use
|
||||
*/
|
||||
|
||||
void reconstruct_pdbx(file &pdbx_file, std::string_view dictionary = "mmcif_pdbx");
|
||||
|
||||
/** \brief This is an extension to cif::validator, use the logic in common
|
||||
* PDBx files to see if the file is internally consistent.
|
||||
*
|
||||
* This function for now checks if the following categories are consistent:
|
||||
*
|
||||
* atom_site -> pdbx_poly_seq_scheme -> entity_poly_seq -> entity_poly -> entity
|
||||
*
|
||||
* Use the common \ref cif::VERBOSE flag to turn on diagnostic messages.
|
||||
*
|
||||
* \param file The input file
|
||||
* \param dictionary The mmcif dictionary to use
|
||||
* \result Returns true if the file was valid and consistent
|
||||
*/
|
||||
|
||||
bool is_valid_pdbx_file(const file &pdbx_file, std::string_view dictionary = "mmcif_pdbx");
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// Other I/O related routines
|
||||
|
||||
@@ -106,7 +134,7 @@ inline void write(const std::filesystem::path &p, const file &f)
|
||||
*
|
||||
* The line returned should be compatible with the legacy PDB
|
||||
* format and is e.g. used in the DSSP program.
|
||||
*
|
||||
*
|
||||
* @param data The datablock to use as source for the requested data
|
||||
* @param truncate_at The maximum length of the line returned
|
||||
*/
|
||||
@@ -116,7 +144,7 @@ std::string get_HEADER_line(const datablock &data, std::string::size_type trunca
|
||||
*
|
||||
* The line returned should be compatible with the legacy PDB
|
||||
* format and is e.g. used in the DSSP program.
|
||||
*
|
||||
*
|
||||
* @param data The datablock to use as source for the requested data
|
||||
* @param truncate_at The maximum length of the line returned
|
||||
*/
|
||||
@@ -126,7 +154,7 @@ std::string get_COMPND_line(const datablock &data, std::string::size_type trunca
|
||||
*
|
||||
* The line returned should be compatible with the legacy PDB
|
||||
* format and is e.g. used in the DSSP program.
|
||||
*
|
||||
*
|
||||
* @param data The datablock to use as source for the requested data
|
||||
* @param truncate_at The maximum length of the line returned
|
||||
*/
|
||||
@@ -136,12 +164,11 @@ std::string get_SOURCE_line(const datablock &data, std::string::size_type trunca
|
||||
*
|
||||
* The line returned should be compatible with the legacy PDB
|
||||
* format and is e.g. used in the DSSP program.
|
||||
*
|
||||
*
|
||||
* @param data The datablock to use as source for the requested data
|
||||
* @param truncate_at The maximum length of the line returned
|
||||
*/
|
||||
|
||||
std::string get_AUTHOR_line(const datablock &data, std::string::size_type truncate_at = 127);
|
||||
|
||||
} // namespace pdbx
|
||||
|
||||
} // namespace cif::pdb
|
||||
|
||||
@@ -290,6 +290,13 @@ class row_handle
|
||||
return operator[](get_column_ix(column)).template as<T>();
|
||||
}
|
||||
|
||||
/// \brief Get the value of column @a column cast to type @a T
|
||||
template <typename T>
|
||||
T get(std::string_view column) const
|
||||
{
|
||||
return operator[](get_column_ix(column)).template as<T>();
|
||||
}
|
||||
|
||||
/// \brief assign each of the columns named in @a values to their respective value
|
||||
void assign(const std::vector<item> &values)
|
||||
{
|
||||
|
||||
114
src/category.cpp
114
src/category.cpp
@@ -47,7 +47,6 @@ class row_comparator
|
||||
{
|
||||
public:
|
||||
row_comparator(category &cat)
|
||||
: m_category(cat)
|
||||
{
|
||||
auto cv = cat.get_cat_validator();
|
||||
|
||||
@@ -69,13 +68,13 @@ class row_comparator
|
||||
}
|
||||
}
|
||||
|
||||
int operator()(const row *a, const row *b) const
|
||||
int operator()(const category &cat, const row *a, const row *b) const
|
||||
{
|
||||
assert(a);
|
||||
assert(b);
|
||||
|
||||
row_handle rha(m_category, *a);
|
||||
row_handle rhb(m_category, *b);
|
||||
row_handle rha(cat, *a);
|
||||
row_handle rhb(cat, *b);
|
||||
|
||||
int d = 0;
|
||||
for (const auto &[k, f] : m_comparator)
|
||||
@@ -92,11 +91,11 @@ class row_comparator
|
||||
return d;
|
||||
}
|
||||
|
||||
int operator()(const row_initializer &a, const row *b) const
|
||||
int operator()(const category &cat, const row_initializer &a, const row *b) const
|
||||
{
|
||||
assert(b);
|
||||
|
||||
row_handle rhb(m_category, *b);
|
||||
row_handle rhb(cat, *b);
|
||||
|
||||
int d = 0;
|
||||
auto ai = a.begin();
|
||||
@@ -124,7 +123,6 @@ class row_comparator
|
||||
using key_comparator = std::tuple<uint16_t, compareFunc>;
|
||||
|
||||
std::vector<key_comparator> m_comparator;
|
||||
category &m_category;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
@@ -135,18 +133,18 @@ class row_comparator
|
||||
class category_index
|
||||
{
|
||||
public:
|
||||
category_index(category *cat);
|
||||
category_index(category &cat);
|
||||
|
||||
~category_index()
|
||||
{
|
||||
delete m_root;
|
||||
}
|
||||
|
||||
row *find(row *k) const;
|
||||
row *find_by_value(row_initializer k) const;
|
||||
row *find(const category &cat, row *k) const;
|
||||
row *find_by_value(const category &cat, row_initializer k) const;
|
||||
|
||||
void insert(row *r);
|
||||
void erase(row *r);
|
||||
void insert(category &cat, row *r);
|
||||
void erase(category &cat, row *r);
|
||||
|
||||
// reorder the row's and returns new head and tail
|
||||
std::tuple<row *, row *> reorder()
|
||||
@@ -192,8 +190,8 @@ class category_index
|
||||
bool m_red;
|
||||
};
|
||||
|
||||
entry *insert(entry *h, row *v);
|
||||
entry *erase(entry *h, row *k);
|
||||
entry *insert(category &cat, entry *h, row *v);
|
||||
entry *erase(category &cat, entry *h, row *k);
|
||||
|
||||
// void validate(entry* h, bool isParentRed, uint32_t blackDepth, uint32_t& minBlack, uint32_t& maxBlack) const;
|
||||
|
||||
@@ -324,26 +322,24 @@ class category_index
|
||||
return result;
|
||||
}
|
||||
|
||||
category &m_category;
|
||||
row_comparator m_row_comparator;
|
||||
entry *m_root;
|
||||
};
|
||||
|
||||
category_index::category_index(category *cat)
|
||||
: m_category(*cat)
|
||||
, m_row_comparator(m_category)
|
||||
category_index::category_index(category &cat)
|
||||
: m_row_comparator(cat)
|
||||
, m_root(nullptr)
|
||||
{
|
||||
for (auto r : m_category)
|
||||
insert(r.get_row());
|
||||
for (auto r : cat)
|
||||
insert(cat, r.get_row());
|
||||
}
|
||||
|
||||
row *category_index::find(row *k) const
|
||||
row *category_index::find(const category &cat, row *k) const
|
||||
{
|
||||
const entry *r = m_root;
|
||||
while (r != nullptr)
|
||||
{
|
||||
int d = m_row_comparator(k, r->m_row);
|
||||
int d = m_row_comparator(cat, k, r->m_row);
|
||||
if (d < 0)
|
||||
r = r->m_left;
|
||||
else if (d > 0)
|
||||
@@ -355,14 +351,14 @@ row *category_index::find(row *k) const
|
||||
return r ? r->m_row : nullptr;
|
||||
}
|
||||
|
||||
row *category_index::find_by_value(row_initializer k) const
|
||||
row *category_index::find_by_value(const category &cat, row_initializer k) const
|
||||
{
|
||||
// sort the values in k first
|
||||
|
||||
row_initializer k2;
|
||||
for (auto &f : m_category.key_field_indices())
|
||||
for (auto &f : cat.key_field_indices())
|
||||
{
|
||||
auto fld = m_category.get_column_name(f);
|
||||
auto fld = cat.get_column_name(f);
|
||||
|
||||
auto ki = find_if(k.begin(), k.end(), [&fld](auto &i) { return i.name() == fld; });
|
||||
if (ki == k.end())
|
||||
@@ -374,7 +370,7 @@ row *category_index::find_by_value(row_initializer k) const
|
||||
const entry *r = m_root;
|
||||
while (r != nullptr)
|
||||
{
|
||||
int d = m_row_comparator(k2, r->m_row);
|
||||
int d = m_row_comparator(cat, k2, r->m_row);
|
||||
if (d < 0)
|
||||
r = r->m_left;
|
||||
else if (d > 0)
|
||||
@@ -386,34 +382,34 @@ row *category_index::find_by_value(row_initializer k) const
|
||||
return r ? r->m_row : nullptr;
|
||||
}
|
||||
|
||||
void category_index::insert(row *k)
|
||||
void category_index::insert(category &cat, row *k)
|
||||
{
|
||||
m_root = insert(m_root, k);
|
||||
m_root = insert(cat, m_root, k);
|
||||
m_root->m_red = false;
|
||||
}
|
||||
|
||||
category_index::entry *category_index::insert(entry *h, row *v)
|
||||
category_index::entry *category_index::insert(category &cat, entry *h, row *v)
|
||||
{
|
||||
if (h == nullptr)
|
||||
return new entry(v);
|
||||
|
||||
int d = m_row_comparator(v, h->m_row);
|
||||
int d = m_row_comparator(cat, v, h->m_row);
|
||||
if (d < 0)
|
||||
h->m_left = insert(h->m_left, v);
|
||||
h->m_left = insert(cat, h->m_left, v);
|
||||
else if (d > 0)
|
||||
h->m_right = insert(h->m_right, v);
|
||||
h->m_right = insert(cat, h->m_right, v);
|
||||
else
|
||||
{
|
||||
row_handle rh(m_category, *v);
|
||||
row_handle rh(cat, *v);
|
||||
|
||||
std::ostringstream os;
|
||||
for (auto col : m_category.key_fields())
|
||||
for (auto col : cat.key_fields())
|
||||
{
|
||||
if (rh[col])
|
||||
os << col << ": " << std::quoted(rh[col].text()) << "; ";
|
||||
}
|
||||
|
||||
throw duplicate_key_error("Duplicate Key violation, cat: " + m_category.name() + " values: " + os.str());
|
||||
throw duplicate_key_error("Duplicate Key violation, cat: " + cat.name() + " values: " + os.str());
|
||||
}
|
||||
|
||||
if (is_red(h->m_right) and not is_red(h->m_left))
|
||||
@@ -428,25 +424,25 @@ category_index::entry *category_index::insert(entry *h, row *v)
|
||||
return h;
|
||||
}
|
||||
|
||||
void category_index::erase(row *k)
|
||||
void category_index::erase(category &cat, row *k)
|
||||
{
|
||||
assert(find(k) == k);
|
||||
assert(find(cat, k) == k);
|
||||
|
||||
m_root = erase(m_root, k);
|
||||
m_root = erase(cat, m_root, k);
|
||||
if (m_root != nullptr)
|
||||
m_root->m_red = false;
|
||||
}
|
||||
|
||||
category_index::entry *category_index::erase(entry *h, row *k)
|
||||
category_index::entry *category_index::erase(category &cat, entry *h, row *k)
|
||||
{
|
||||
if (m_row_comparator(k, h->m_row) < 0)
|
||||
if (m_row_comparator(cat, k, h->m_row) < 0)
|
||||
{
|
||||
if (h->m_left != nullptr)
|
||||
{
|
||||
if (not is_red(h->m_left) and not is_red(h->m_left->m_left))
|
||||
h = move_red_left(h);
|
||||
|
||||
h->m_left = erase(h->m_left, k);
|
||||
h->m_left = erase(cat, h->m_left, k);
|
||||
}
|
||||
}
|
||||
else
|
||||
@@ -454,7 +450,7 @@ category_index::entry *category_index::erase(entry *h, row *k)
|
||||
if (is_red(h->m_left))
|
||||
h = rotateRight(h);
|
||||
|
||||
if (m_row_comparator(k, h->m_row) == 0 and h->m_right == nullptr)
|
||||
if (m_row_comparator(cat, k, h->m_row) == 0 and h->m_right == nullptr)
|
||||
{
|
||||
delete h;
|
||||
return nullptr;
|
||||
@@ -465,13 +461,13 @@ category_index::entry *category_index::erase(entry *h, row *k)
|
||||
if (not is_red(h->m_right) and not is_red(h->m_right->m_left))
|
||||
h = move_red_right(h);
|
||||
|
||||
if (m_row_comparator(k, h->m_row) == 0)
|
||||
if (m_row_comparator(cat, k, h->m_row) == 0)
|
||||
{
|
||||
h->m_row = find_min(h->m_right)->m_row;
|
||||
h->m_right = erase_min(h->m_right);
|
||||
}
|
||||
else
|
||||
h->m_right = erase(h->m_right, k);
|
||||
h->m_right = erase(cat, h->m_right, k);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -520,7 +516,7 @@ category::category(const category &rhs)
|
||||
insert_impl(end(), clone_row(*r));
|
||||
|
||||
if (m_cat_validator != nullptr and m_index == nullptr)
|
||||
m_index = new category_index(this);
|
||||
m_index = new category_index(*this);
|
||||
}
|
||||
|
||||
category::category(category &&rhs)
|
||||
@@ -564,7 +560,7 @@ category &category::operator=(const category &rhs)
|
||||
m_cat_validator = rhs.m_cat_validator;
|
||||
|
||||
if (m_cat_validator != nullptr and m_index == nullptr)
|
||||
m_index = new category_index(this);
|
||||
m_index = new category_index(*this);
|
||||
}
|
||||
|
||||
return *this;
|
||||
@@ -669,7 +665,7 @@ void category::set_validator(const validator *v, datablock &db)
|
||||
}
|
||||
|
||||
if (missing.empty())
|
||||
m_index = new category_index(this);
|
||||
m_index = new category_index(*this);
|
||||
else
|
||||
{
|
||||
std::ostringstream msg;
|
||||
@@ -782,7 +778,7 @@ bool category::is_valid() const
|
||||
for (auto r : *this)
|
||||
{
|
||||
auto p = r.get_row();
|
||||
if (m_index->find(p) != p)
|
||||
if (m_index->find(*this, p) != p)
|
||||
m_validator->report_error("Key not found in index for category " + m_name, true);
|
||||
}
|
||||
}
|
||||
@@ -904,7 +900,7 @@ row_handle category::operator[](const key_type &key)
|
||||
if (m_index == nullptr)
|
||||
throw std::logic_error("Category " + m_name + " does not have an index");
|
||||
|
||||
auto row = m_index->find_by_value(key);
|
||||
auto row = m_index->find_by_value(*this, key);
|
||||
if (row != nullptr)
|
||||
result = { *this, *row };
|
||||
}
|
||||
@@ -1078,7 +1074,7 @@ category::iterator category::erase(iterator pos)
|
||||
throw std::runtime_error("erase");
|
||||
|
||||
if (m_index != nullptr)
|
||||
m_index->erase(r);
|
||||
m_index->erase(*this, r);
|
||||
|
||||
if (r == m_head)
|
||||
{
|
||||
@@ -1250,12 +1246,14 @@ std::string category::get_unique_id(std::function<std::string(int)> generator)
|
||||
std::string id_tag = "id";
|
||||
if (m_cat_validator != nullptr and m_cat_validator->m_keys.size() == 1)
|
||||
{
|
||||
id_tag = m_cat_validator->m_keys.front();
|
||||
|
||||
if (m_index == nullptr and m_cat_validator != nullptr)
|
||||
m_index = new category_index(this);
|
||||
m_index = new category_index(*this);
|
||||
|
||||
for (;;)
|
||||
{
|
||||
if (m_index->find_by_value({{ id_tag, result }}) == nullptr)
|
||||
if (m_index->find_by_value(*this, {{ id_tag, result }}) == nullptr)
|
||||
break;
|
||||
result = generator(static_cast<int>(m_last_unique_num++));
|
||||
}
|
||||
@@ -1407,7 +1405,7 @@ void category::update_value(row *row, uint16_t column, std::string_view value, b
|
||||
{
|
||||
// make sure we have an index, if possible
|
||||
if (m_index == nullptr and m_cat_validator != nullptr)
|
||||
m_index = new category_index(this);
|
||||
m_index = new category_index(*this);
|
||||
|
||||
auto &col = m_columns[column];
|
||||
|
||||
@@ -1433,9 +1431,9 @@ void category::update_value(row *row, uint16_t column, std::string_view value, b
|
||||
if (updateLinked and // an update of an Item's value
|
||||
m_index != nullptr and key_field_indices().count(column))
|
||||
{
|
||||
reinsert = m_index->find(row);
|
||||
reinsert = m_index->find(*this, row);
|
||||
if (reinsert)
|
||||
m_index->erase(row);
|
||||
m_index->erase(*this, row);
|
||||
}
|
||||
|
||||
// first remove old value with cix
|
||||
@@ -1446,7 +1444,7 @@ void category::update_value(row *row, uint16_t column, std::string_view value, b
|
||||
row->append(column, { value });
|
||||
|
||||
if (reinsert)
|
||||
m_index->insert(row);
|
||||
m_index->insert(*this, row);
|
||||
|
||||
// see if we need to update any child categories that depend on this value
|
||||
auto iv = col.m_validator;
|
||||
@@ -1602,7 +1600,7 @@ row_handle category::create_copy(row_handle r)
|
||||
category::iterator category::insert_impl(const_iterator pos, row *n)
|
||||
{
|
||||
if (m_index == nullptr and m_cat_validator != nullptr)
|
||||
m_index = new category_index(this);
|
||||
m_index = new category_index(*this);
|
||||
|
||||
assert(n != nullptr);
|
||||
assert(n->m_next == nullptr);
|
||||
@@ -1642,7 +1640,7 @@ category::iterator category::insert_impl(const_iterator pos, row *n)
|
||||
}
|
||||
|
||||
if (m_index != nullptr)
|
||||
m_index->insert(n);
|
||||
m_index->insert(*this, n);
|
||||
|
||||
// insert at end, most often this is the case
|
||||
if (pos.m_current == nullptr)
|
||||
|
||||
156
src/compound.cpp
156
src/compound.cpp
@@ -136,14 +136,17 @@ compound::compound(cif::datablock &db)
|
||||
if (chemComp.size() != 1)
|
||||
throw std::runtime_error("Invalid compound file, chem_comp should contain a single row");
|
||||
|
||||
cif::tie(m_id, m_name, m_type, m_formula, m_formula_weight, m_formal_charge) =
|
||||
chemComp.front().get("id", "name", "type", "formula", "formula_weight", "pdbx_formal_charge");
|
||||
std::string one_letter_code;
|
||||
|
||||
cif::tie(m_id, m_name, m_type, m_formula, m_formula_weight, m_formal_charge, one_letter_code, m_parent_id) =
|
||||
chemComp.front().get("id", "name", "type", "formula", "formula_weight", "pdbx_formal_charge", "one_letter_code", "mon_nstd_parent_comp_id");
|
||||
|
||||
if (one_letter_code.length() == 1)
|
||||
m_one_letter_code = one_letter_code.front();
|
||||
|
||||
// The name should not contain newline characters since that triggers validation errors later on
|
||||
cif::replace_all(m_name, "\n", "");
|
||||
|
||||
m_group = "non-polymer";
|
||||
|
||||
auto &chemCompAtom = db["chem_comp_atom"];
|
||||
for (auto row : chemCompAtom)
|
||||
{
|
||||
@@ -153,6 +156,9 @@ compound::compound(cif::datablock &db)
|
||||
row.get("atom_id", "type_symbol", "charge", "pdbx_aromatic_flag", "pdbx_leaving_atom_flag", "pdbx_stereo_config",
|
||||
"model_Cartn_x", "model_Cartn_y", "model_Cartn_z");
|
||||
atom.type_symbol = atom_type_traits(type_symbol).type();
|
||||
if (stereo_config.empty())
|
||||
atom.stereo_config = stereo_config_type::N;
|
||||
else
|
||||
atom.stereo_config = parse_stereo_config_from_string(stereo_config);
|
||||
m_atoms.push_back(std::move(atom));
|
||||
}
|
||||
@@ -163,17 +169,28 @@ compound::compound(cif::datablock &db)
|
||||
compound_bond bond;
|
||||
std::string valueOrder;
|
||||
cif::tie(bond.atom_id[0], bond.atom_id[1], valueOrder, bond.aromatic, bond.stereo_config) = row.get("atom_id_1", "atom_id_2", "value_order", "pdbx_aromatic_flag", "pdbx_stereo_config");
|
||||
if (valueOrder.empty())
|
||||
bond.type = bond_type::sing;
|
||||
else
|
||||
bond.type = parse_bond_type_from_string(valueOrder);
|
||||
m_bonds.push_back(std::move(bond));
|
||||
}
|
||||
}
|
||||
|
||||
compound::compound(cif::datablock &db, const std::string &id, const std::string &name, const std::string &type, const std::string &group)
|
||||
: m_id(id)
|
||||
, m_name(name)
|
||||
, m_type(type)
|
||||
, m_group(group)
|
||||
compound::compound(cif::datablock &db, int)
|
||||
{
|
||||
auto &chemComp = db["chem_comp"];
|
||||
|
||||
if (chemComp.size() != 1)
|
||||
throw std::runtime_error("Invalid compound file, chem_comp should contain a single row");
|
||||
|
||||
cif::tie(m_id, m_name) =
|
||||
chemComp.front().get("id", "name");
|
||||
|
||||
cif::trim(m_name);
|
||||
|
||||
m_type = "NON-POLYMER";
|
||||
|
||||
auto &chemCompAtom = db["chem_comp_atom"];
|
||||
for (auto row : chemCompAtom)
|
||||
{
|
||||
@@ -184,7 +201,6 @@ compound::compound(cif::datablock &db, const std::string &id, const std::string
|
||||
atom.type_symbol = atom_type_traits(type_symbol).type();
|
||||
|
||||
m_formal_charge += atom.charge;
|
||||
m_formula_weight += atom_type_traits(atom.type_symbol).weight();
|
||||
|
||||
m_atoms.push_back(std::move(atom));
|
||||
}
|
||||
@@ -209,11 +225,39 @@ compound::compound(cif::datablock &db, const std::string &id, const std::string
|
||||
else
|
||||
{
|
||||
if (cif::VERBOSE > 0)
|
||||
std::cerr << "Unimplemented chem_comp_bond.type " << btype << " in " << id << '\n';
|
||||
std::cerr << "Unimplemented chem_comp_bond.type " << btype << " in " << db.name() << '\n';
|
||||
bond.type = bond_type::sing;
|
||||
}
|
||||
m_bonds.push_back(std::move(bond));
|
||||
}
|
||||
|
||||
// reconstruct a formula and weight
|
||||
|
||||
m_formula_weight = 0;
|
||||
|
||||
std::map<atom_type, int> f;
|
||||
for (auto &atom : m_atoms)
|
||||
f[atom.type_symbol] += 1;
|
||||
|
||||
if (f.count(atom_type::C))
|
||||
{
|
||||
atom_type_traits att(atom_type::C);
|
||||
m_formula += att.symbol() + std::to_string(f[atom_type::C]) + ' ';
|
||||
m_formula_weight += att.weight() * f[atom_type::C];
|
||||
}
|
||||
|
||||
for (const auto &[type, count] : f)
|
||||
{
|
||||
if (type == atom_type::C)
|
||||
continue;
|
||||
|
||||
atom_type_traits att(type);
|
||||
m_formula += att.symbol() + std::to_string(count) + ' ';
|
||||
m_formula_weight += att.weight() * count;
|
||||
}
|
||||
|
||||
if (not m_formula.empty())
|
||||
m_formula.pop_back();
|
||||
}
|
||||
|
||||
compound_atom compound::get_atom_by_atom_id(const std::string &atom_id) const
|
||||
@@ -260,13 +304,12 @@ float compound::bond_length(const std::string &atomId_1, const std::string &atom
|
||||
auto a = get_atom_by_atom_id(atomId_1);
|
||||
auto b = get_atom_by_atom_id(atomId_2);
|
||||
|
||||
result = distance(point{a.x, a.y, a.z}, point{b.x, b.y, b.z});
|
||||
result = distance(point{ a.x, a.y, a.z }, point{ b.x, b.y, b.z });
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// known amino acids and bases
|
||||
|
||||
@@ -316,7 +359,7 @@ class compound_factory_impl : public std::enable_shared_from_this<compound_facto
|
||||
compound_factory_impl();
|
||||
compound_factory_impl(const fs::path &file, std::shared_ptr<compound_factory_impl> next);
|
||||
|
||||
~compound_factory_impl()
|
||||
virtual ~compound_factory_impl()
|
||||
{
|
||||
for (auto c : m_compounds)
|
||||
delete c;
|
||||
@@ -373,13 +416,15 @@ class compound_factory_impl : public std::enable_shared_from_this<compound_facto
|
||||
os << "CCD components.cif resource\n";
|
||||
else
|
||||
os << "CCD components file: " << std::quoted(m_file.string()) << '\n';
|
||||
|
||||
|
||||
if (m_next)
|
||||
m_next->describe(os);
|
||||
}
|
||||
|
||||
private:
|
||||
compound *create(const std::string &id);
|
||||
protected:
|
||||
compound_factory_impl(std::shared_ptr<compound_factory_impl> next);
|
||||
|
||||
virtual compound *create(const std::string &id);
|
||||
|
||||
std::shared_timed_mutex mMutex;
|
||||
|
||||
@@ -395,12 +440,17 @@ compound_factory_impl::compound_factory_impl()
|
||||
{
|
||||
}
|
||||
|
||||
compound_factory_impl::compound_factory_impl(const fs::path &file, std::shared_ptr<compound_factory_impl> next)
|
||||
: m_file(file)
|
||||
, m_next(next)
|
||||
compound_factory_impl::compound_factory_impl(std::shared_ptr<compound_factory_impl> next)
|
||||
: m_next(next)
|
||||
{
|
||||
}
|
||||
|
||||
compound_factory_impl::compound_factory_impl(const fs::path &file, std::shared_ptr<compound_factory_impl> next)
|
||||
: compound_factory_impl(next)
|
||||
{
|
||||
m_file = file;
|
||||
}
|
||||
|
||||
compound *compound_factory_impl::create(const std::string &id)
|
||||
{
|
||||
compound *result = nullptr;
|
||||
@@ -476,6 +526,45 @@ compound *compound_factory_impl::create(const std::string &id)
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
class local_compound_factory_impl : public compound_factory_impl
|
||||
{
|
||||
public:
|
||||
local_compound_factory_impl(const cif::file &file, std::shared_ptr<compound_factory_impl> next)
|
||||
: compound_factory_impl(next)
|
||||
, m_local_file(file)
|
||||
{
|
||||
}
|
||||
|
||||
compound *create(const std::string &id) override;
|
||||
|
||||
private:
|
||||
const cif::file &m_local_file;
|
||||
};
|
||||
|
||||
compound *local_compound_factory_impl::create(const std::string &id)
|
||||
{
|
||||
compound *result = nullptr;
|
||||
|
||||
for (auto &db : m_local_file)
|
||||
{
|
||||
if (db.name() == "comp_" + id)
|
||||
{
|
||||
cif::datablock db_copy(db);
|
||||
|
||||
result = new compound(db_copy, 1);
|
||||
|
||||
std::shared_lock lock(mMutex);
|
||||
m_compounds.push_back(result);
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
std::unique_ptr<compound_factory> compound_factory::s_instance;
|
||||
thread_local std::unique_ptr<compound_factory> compound_factory::tl_instance;
|
||||
bool compound_factory::s_use_thread_local_instance;
|
||||
@@ -553,6 +642,18 @@ void compound_factory::push_dictionary(const fs::path &inDictFile)
|
||||
}
|
||||
}
|
||||
|
||||
void compound_factory::push_dictionary(const cif::file &inDictFile)
|
||||
{
|
||||
try
|
||||
{
|
||||
m_impl.reset(new local_compound_factory_impl(inDictFile, m_impl));
|
||||
}
|
||||
catch (const std::exception &)
|
||||
{
|
||||
std::throw_with_nested(std::runtime_error("Error loading dictionary from local mmCIF file"));
|
||||
}
|
||||
}
|
||||
|
||||
void compound_factory::pop_dictionary()
|
||||
{
|
||||
if (m_impl)
|
||||
@@ -584,25 +685,26 @@ void compound_factory::report_missing_compound(const std::string &compound_id)
|
||||
{
|
||||
using namespace cif::colour;
|
||||
|
||||
std::clog << "\n" << cif::coloured("Configuration error:", white, red) << "\n\n"
|
||||
std::clog << "\n"
|
||||
<< cif::coloured("Configuration error:", white, red) << "\n\n"
|
||||
<< "The attempt to retrieve compound information for " << std::quoted(compound_id) << " failed.\n\n"
|
||||
<< "This information is searched for in a CCD file called components.cif or\n"
|
||||
<< "components.cif.gz which should be located in one of the following directories:\n\n";
|
||||
|
||||
|
||||
cif::list_data_directories(std::clog);
|
||||
|
||||
std::clog << "\n(Note that you can add a directory to the search paths by setting the \n"
|
||||
<< "LIBCIFPP_DATA_DIR environmental variable)\n\n";
|
||||
|
||||
#if defined(CACHE_DIR)
|
||||
#if defined(CACHE_DIR)
|
||||
std::clog << "On Linux an optional cron script might have been installed that automatically updates\n"
|
||||
<< "components.cif and mmCIF dictionary files. This script only works when the file\n"
|
||||
<< "libcifpp.conf contains an uncommented line with the text:\n\n"
|
||||
<< "update=true\n\n"
|
||||
<< "If you do not have a working cron script, you can manually update the files\n"
|
||||
<< "in /var/cache/libcifpp using the following commands:\n\n"
|
||||
<< "curl -o " << CACHE_DIR << "/components.cif https://ftp.wwpdb.org/pub/pdb/data/monomers/components.cif.gz\n"
|
||||
<< "curl -o " << CACHE_DIR << "/mmcif_pdbx.dic https://mmcif.wwpdb.org/dictionaries/ascii/mmcif_pdbx_v50.dic.gz\n"
|
||||
<< "curl -o " << CACHE_DIR << "/components.cif https://files.wwpdb.org/pub/pdb/data/monomers/components.cif.gz\n"
|
||||
<< "curl -o " << CACHE_DIR << "/mmcif_pdbx.dic https://mmcif.wwpdb.org/dictionaries/ascii/mmcif_pdbx_v50.dic.gz\n"
|
||||
<< "curl -o " << CACHE_DIR << "/mmcif_ma.dic https://github.com/ihmwg/ModelCIF/raw/master/dist/mmcif_ma.dic\n\n";
|
||||
#endif
|
||||
|
||||
@@ -613,9 +715,9 @@ void compound_factory::report_missing_compound(const std::string &compound_id)
|
||||
}
|
||||
else
|
||||
std::clog << "No compound factory objects are created since none of the data sources is found.\n";
|
||||
|
||||
|
||||
cif::list_file_resources(std::clog);
|
||||
|
||||
|
||||
std::clog.flush();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -91,7 +91,7 @@ bool datablock::validate_links() const
|
||||
|
||||
for (auto &cat : *this)
|
||||
result = cat.validate_links() and result;
|
||||
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -158,11 +158,12 @@ std::tuple<datablock::iterator, bool> datablock::emplace(std::string_view name)
|
||||
|
||||
if (is_new)
|
||||
{
|
||||
auto &c = emplace_front(name);
|
||||
auto &c = emplace_back(name);
|
||||
c.set_validator(m_validator, *this);
|
||||
}
|
||||
|
||||
return std::make_tuple(begin(), is_new);
|
||||
assert(end() != begin());
|
||||
return std::make_tuple(std::prev(end()), is_new);
|
||||
}
|
||||
|
||||
std::vector<std::string> datablock::get_tag_order() const
|
||||
@@ -171,14 +172,16 @@ std::vector<std::string> datablock::get_tag_order() const
|
||||
|
||||
// for entry and audit_conform on top
|
||||
|
||||
auto ci = find_if(begin(), end(), [](const category &cat) { return cat.name() == "entry"; });
|
||||
auto ci = find_if(begin(), end(), [](const category &cat)
|
||||
{ return cat.name() == "entry"; });
|
||||
if (ci != end())
|
||||
{
|
||||
auto cto = ci->get_tag_order();
|
||||
result.insert(result.end(), cto.begin(), cto.end());
|
||||
}
|
||||
|
||||
ci = find_if(begin(), end(), [](const category &cat) { return cat.name() == "audit_conform"; });
|
||||
ci = find_if(begin(), end(), [](const category &cat)
|
||||
{ return cat.name() == "audit_conform"; });
|
||||
if (ci != end())
|
||||
{
|
||||
auto cto = ci->get_tag_order();
|
||||
@@ -196,42 +199,131 @@ std::vector<std::string> datablock::get_tag_order() const
|
||||
return result;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
using elem_t = std::tuple<std::string, int, bool>;
|
||||
using cat_order_t = std::vector<elem_t>;
|
||||
using iter_t = cat_order_t::iterator;
|
||||
|
||||
inline int get_count(iter_t i)
|
||||
{
|
||||
return std::get<1>(*i);
|
||||
}
|
||||
|
||||
inline bool is_on_stack(iter_t i)
|
||||
{
|
||||
return std::get<2>(*i);
|
||||
}
|
||||
|
||||
void calculate_cat_order(cat_order_t &cat_order, iter_t i, const validator &validator)
|
||||
{
|
||||
if (i == cat_order.end() or get_count(i) >= 0)
|
||||
return;
|
||||
|
||||
auto &&[cat, count, on_stack] = *i;
|
||||
|
||||
on_stack = true;
|
||||
|
||||
int parent_count = 0;
|
||||
|
||||
for (auto link : validator.get_links_for_child(cat))
|
||||
{
|
||||
auto ei = std::find_if(cat_order.begin(), cat_order.end(), [parent = link->m_parent_category](elem_t &a)
|
||||
{ return std::get<0>(a) == parent; });
|
||||
|
||||
if (ei == cat_order.end())
|
||||
continue;
|
||||
|
||||
if (not is_on_stack(ei))
|
||||
calculate_cat_order(cat_order, ei, validator);
|
||||
|
||||
parent_count += get_count(ei);
|
||||
}
|
||||
|
||||
count = parent_count + 1;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
void datablock::write(std::ostream &os) const
|
||||
{
|
||||
os << "data_" << m_name << '\n'
|
||||
<< "# \n";
|
||||
|
||||
// mmcif support, sort of. First write the 'entry' Category
|
||||
// and if it exists, _AND_ we have a Validator, write out the
|
||||
// audit_conform record.
|
||||
|
||||
for (auto &cat : *this)
|
||||
if (m_validator and size() > 0)
|
||||
{
|
||||
if (cat.name() != "entry")
|
||||
continue;
|
||||
// If the dictionary declares an audit_conform category, put it in,
|
||||
// but only if it does not exist already!
|
||||
if (get("audit_conform") == nullptr and m_validator->get_validator_for_category("audit_conform") != nullptr)
|
||||
{
|
||||
category auditConform("audit_conform");
|
||||
auditConform.emplace({ { "dict_name", m_validator->name() },
|
||||
{ "dict_version", m_validator->version() } });
|
||||
auditConform.write(os);
|
||||
}
|
||||
|
||||
cat.write(os);
|
||||
// base order on parent child relationships, parents first
|
||||
|
||||
break;
|
||||
cat_order_t cat_order;
|
||||
|
||||
for (auto &cat : *this)
|
||||
cat_order.emplace_back(cat.name(), -1, false);
|
||||
|
||||
for (auto i = cat_order.begin(); i != cat_order.end(); ++i)
|
||||
calculate_cat_order(cat_order, i, *m_validator);
|
||||
|
||||
std::sort(cat_order.begin(), cat_order.end(), [](const elem_t &a, const elem_t &b)
|
||||
{
|
||||
const auto &[cat_a, count_a, on_stack_a] = a;
|
||||
const auto &[cat_b, count_b, on_stack_b] = b;
|
||||
|
||||
int d = 0;
|
||||
|
||||
if (cat_a == "audit_conform")
|
||||
d = -1;
|
||||
else if (cat_b == "audit_conform")
|
||||
d = 1;
|
||||
else if (cat_a == "entry")
|
||||
d = -1;
|
||||
else if (cat_b == "entry")
|
||||
d = 1;
|
||||
else
|
||||
{
|
||||
d = std::get<1>(a) - std::get<1>(b);
|
||||
if (d == 0)
|
||||
d = cat_b.compare(cat_a);
|
||||
}
|
||||
|
||||
return d < 0; });
|
||||
|
||||
for (auto &&[cat, count, on_stack] : cat_order)
|
||||
get(cat)->write(os);
|
||||
}
|
||||
|
||||
// If the dictionary declares an audit_conform category, put it in,
|
||||
// but only if it does not exist already!
|
||||
if (get("audit_conform"))
|
||||
get("audit_conform")->write(os);
|
||||
else if (m_validator != nullptr and m_validator->get_validator_for_category("audit_conform") != nullptr)
|
||||
else
|
||||
{
|
||||
category auditConform("audit_conform");
|
||||
auditConform.emplace({
|
||||
{"dict_name", m_validator->name()},
|
||||
{"dict_version", m_validator->version()}});
|
||||
auditConform.write(os);
|
||||
}
|
||||
// mmcif support, sort of. First write the 'entry' Category
|
||||
// and if it exists, _AND_ we have a Validator, write out the
|
||||
// audit_conform record.
|
||||
|
||||
for (auto &cat : *this)
|
||||
{
|
||||
if (cat.name() != "entry")
|
||||
continue;
|
||||
|
||||
for (auto &cat : *this)
|
||||
{
|
||||
if (cat.name() != "entry" and cat.name() != "audit_conform")
|
||||
cat.write(os);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
// If the dictionary declares an audit_conform category, put it in,
|
||||
// but only if it does not exist already!
|
||||
if (get("audit_conform"))
|
||||
get("audit_conform")->write(os);
|
||||
|
||||
for (auto &cat : *this)
|
||||
{
|
||||
if (cat.name() != "entry" and cat.name() != "audit_conform")
|
||||
cat.write(os);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -337,7 +429,7 @@ bool datablock::operator==(const datablock &rhs) const
|
||||
++catA_i;
|
||||
else
|
||||
{
|
||||
if (not (*dbA.get(*catA_i) == *dbB.get(*catB_i)))
|
||||
if (not(*dbA.get(*catA_i) == *dbB.get(*catB_i)))
|
||||
return false;
|
||||
++catA_i;
|
||||
++catB_i;
|
||||
@@ -347,4 +439,4 @@ bool datablock::operator==(const datablock &rhs) const
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace cif::cif
|
||||
} // namespace cif
|
||||
@@ -173,11 +173,12 @@ std::tuple<file::iterator, bool> file::emplace(std::string_view name)
|
||||
|
||||
if (is_new)
|
||||
{
|
||||
auto &db = emplace_front(name);
|
||||
auto &db = emplace_back(name);
|
||||
db.set_validator(m_validator);
|
||||
}
|
||||
|
||||
return std::make_tuple(begin(), is_new);
|
||||
assert(begin() != end());
|
||||
return std::make_tuple(std::prev(end()), is_new);
|
||||
}
|
||||
|
||||
void file::load(const std::filesystem::path &p)
|
||||
|
||||
@@ -2836,15 +2836,4 @@ void structure::validate_atoms() const
|
||||
assert(atoms.empty());
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
void reconstruct_pdbx(datablock &db)
|
||||
{
|
||||
if (db.get("atom_site") == nullptr)
|
||||
throw std::runtime_error("Cannot reconstruct PDBx file, atom data missing");
|
||||
|
||||
assert(false);
|
||||
throw std::runtime_error("not implemented yet");
|
||||
}
|
||||
|
||||
} // namespace pdbx
|
||||
|
||||
1083
src/pdb/pdb2cif.cpp
1083
src/pdb/pdb2cif.cpp
File diff suppressed because it is too large
Load Diff
561
src/pdb/reconstruct.cpp
Normal file
561
src/pdb/reconstruct.cpp
Normal file
@@ -0,0 +1,561 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2024 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "cif++.hpp"
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
namespace cif::pdb
|
||||
{
|
||||
|
||||
|
||||
void checkAtomRecords(datablock &db)
|
||||
{
|
||||
using namespace literals;
|
||||
|
||||
auto &cf = compound_factory::instance();
|
||||
|
||||
auto &atom_site = db["atom_site"];
|
||||
auto &atom_type = db["atom_type"];
|
||||
auto &chem_comp = db["chem_comp"];
|
||||
|
||||
for (auto row : atom_site)
|
||||
{
|
||||
const auto &[symbol, label_asym_id, auth_asym_id, label_comp_id, auth_comp_id, label_seq_id, auth_seq_id, label_atom_id, auth_atom_id] =
|
||||
row.get<std::string, std::optional<std::string>, std::optional<std::string>, std::optional<std::string>, std::optional<std::string>,
|
||||
std::optional<int>, std::optional<std::string>, std::optional<std::string>, std::optional<std::string>>(
|
||||
"type_symbol", "label_asym_id", "auth_asym_id", "label_comp_id", "auth_comp_id", "label_seq_id", "auth_seq_id", "label_atom_id", "auth_atom_id");
|
||||
|
||||
if (symbol.empty())
|
||||
throw std::runtime_error("Missing type symbol in atom_site record");
|
||||
|
||||
if (atom_type.count("symbol"_key == symbol) == 0)
|
||||
atom_type.emplace({ { "symbol", symbol } });
|
||||
|
||||
if (not(label_asym_id.has_value() or auth_asym_id.has_value()))
|
||||
throw std::runtime_error("atom_site records does not have a label_asym_id nor an auth_asym_id, cannot continue");
|
||||
|
||||
if (not(label_comp_id.has_value() or auth_comp_id.has_value()))
|
||||
throw std::runtime_error("atom_site records does not have a label_comp_id nor an auth_comp_id, cannot continue");
|
||||
|
||||
if (not(label_atom_id.has_value() or auth_atom_id.has_value()))
|
||||
throw std::runtime_error("atom_site records does not have a label_atom_id nor an auth_atom_id, cannot continue");
|
||||
|
||||
std::string asym_id = label_asym_id.value_or(*auth_asym_id);
|
||||
std::string comp_id = label_comp_id.value_or(*auth_comp_id);
|
||||
|
||||
bool is_peptide = cf.is_known_peptide(comp_id);
|
||||
auto compound = cf.create(comp_id);
|
||||
|
||||
if (not compound)
|
||||
throw std::runtime_error("Missing compound information for " + comp_id);
|
||||
|
||||
std::string mon_nstd_flag(".");
|
||||
if (is_peptide)
|
||||
{
|
||||
if (compound_factory::kAAMap.find(comp_id) != compound_factory::kAAMap.end())
|
||||
mon_nstd_flag = "y";
|
||||
else
|
||||
mon_nstd_flag = "n";
|
||||
}
|
||||
|
||||
auto chem_comp_entry = chem_comp.find_first("id"_key == comp_id);
|
||||
|
||||
if (not chem_comp_entry)
|
||||
{
|
||||
chem_comp.emplace({ //
|
||||
{ "id", comp_id },
|
||||
{ "type", compound->type() },
|
||||
{ "mon_nstd_flag", mon_nstd_flag },
|
||||
{ "name", compound->name() },
|
||||
{ "formula", compound->formula() },
|
||||
{ "formula_weight", compound->formula_weight() } });
|
||||
}
|
||||
else
|
||||
{
|
||||
std::vector<item> items;
|
||||
|
||||
if (not chem_comp_entry["type"])
|
||||
items.emplace_back(item{ "type", compound->type() });
|
||||
if (not chem_comp_entry["mon_nstd_flag"])
|
||||
items.emplace_back(item{ "mon_nstd_flag", mon_nstd_flag });
|
||||
if (not chem_comp_entry["name"])
|
||||
items.emplace_back(item{ "name", compound->name() });
|
||||
if (not chem_comp_entry["formula"])
|
||||
items.emplace_back(item{ "formula", compound->formula() });
|
||||
if (not chem_comp_entry["formula_weight"])
|
||||
items.emplace_back(item{ "formula_weight", compound->formula_weight() });
|
||||
|
||||
if (not items.empty())
|
||||
chem_comp_entry.assign(std::move(items));
|
||||
}
|
||||
|
||||
if (is_peptide and not(label_seq_id.has_value() or auth_seq_id.has_value()))
|
||||
throw std::runtime_error("atom_site record has peptide comp_id but no sequence number, cannot continue");
|
||||
|
||||
std::string seq_id;
|
||||
if (label_seq_id.has_value())
|
||||
seq_id = std::to_string(*label_seq_id);
|
||||
else if (auth_seq_id.has_value())
|
||||
seq_id = *auth_seq_id;
|
||||
|
||||
row.assign({ //
|
||||
{ "auth_asym_id", auth_asym_id.value_or(*label_asym_id) },
|
||||
{ "auth_seq_id", auth_seq_id.value_or(std::to_string(*label_seq_id)) },
|
||||
{ "auth_comp_id", auth_comp_id.value_or(*label_comp_id) },
|
||||
{ "auth_atom_id", auth_atom_id.value_or(*label_atom_id) } });
|
||||
}
|
||||
}
|
||||
|
||||
void createStructAsym(datablock &db)
|
||||
{
|
||||
auto &atom_site = db["atom_site"];
|
||||
auto &struct_asym = db["struct_asym"];
|
||||
|
||||
for (auto label_asym_id : atom_site.rows<std::string>("label_asym_id"))
|
||||
{
|
||||
if (label_asym_id.empty())
|
||||
throw std::runtime_error("File contains atom_site records without a label_asym_id");
|
||||
if (struct_asym.count(key("id") == label_asym_id) == 0)
|
||||
{
|
||||
struct_asym.emplace({ //
|
||||
{ "id", label_asym_id } });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void createEntity(datablock &db)
|
||||
{
|
||||
using namespace literals;
|
||||
|
||||
auto &cf = compound_factory::instance();
|
||||
|
||||
auto &atom_site = db["atom_site"];
|
||||
atom_site.add_column("label_entity_id");
|
||||
|
||||
auto &struct_asym = db["struct_asym"];
|
||||
struct_asym.add_column("entity_id");
|
||||
|
||||
std::map<std::string,std::vector<std::tuple<std::string,int>>> asyms;
|
||||
|
||||
for (auto asym_id : db["struct_asym"].rows<std::string>("id"))
|
||||
{
|
||||
int last_seq_id = -1;
|
||||
|
||||
for (const auto &[comp_id, seq_id] : atom_site.find<std::string,int>("label_asym_id"_key == asym_id, "label_comp_id", "label_seq_id"))
|
||||
{
|
||||
if (seq_id == last_seq_id)
|
||||
continue;
|
||||
|
||||
last_seq_id = seq_id;
|
||||
|
||||
asyms[asym_id].emplace_back(comp_id, last_seq_id);
|
||||
}
|
||||
}
|
||||
|
||||
auto less = [](const std::vector<std::tuple<std::string,int>> &a, const std::vector<std::tuple<std::string,int>> &b)
|
||||
{
|
||||
int d = static_cast<int>(a.size()) - static_cast<int>(b.size());
|
||||
return d == 0 ? a > b : d > 0;
|
||||
};
|
||||
|
||||
std::set<std::vector<std::tuple<std::string,int>>,decltype(less)> entities(less);
|
||||
|
||||
for (const auto &[asym_id, content] : asyms)
|
||||
entities.emplace(content);
|
||||
|
||||
auto water_weight = cf.create("HOH")->formula_weight();
|
||||
|
||||
int poly_count = 0;
|
||||
|
||||
auto &entity = db["entity"];
|
||||
for (auto &content : entities)
|
||||
{
|
||||
auto entity_id = entity.get_unique_id("");
|
||||
|
||||
std::string type, desc;
|
||||
float weight = 0;
|
||||
int count = 0;
|
||||
|
||||
auto first_comp_id = std::get<0>(content.front());
|
||||
|
||||
if (first_comp_id == "HOH")
|
||||
{
|
||||
type = "water";
|
||||
desc = "water";
|
||||
weight = water_weight;
|
||||
}
|
||||
else if (content.size() == 1)
|
||||
{
|
||||
auto c = cf.create(first_comp_id);
|
||||
|
||||
type = "non-polymer";
|
||||
desc = c->name();
|
||||
weight = c->formula_weight();
|
||||
}
|
||||
else
|
||||
{
|
||||
type = "polymer";
|
||||
desc = "polymer-" + std::to_string(++poly_count);
|
||||
|
||||
weight = water_weight;
|
||||
for (const auto &[comp_id, seq_id] : content)
|
||||
weight += cf.create(comp_id)->formula_weight() - water_weight;
|
||||
}
|
||||
|
||||
for (const auto &[asym_id, ac] : asyms)
|
||||
{
|
||||
if (ac != content)
|
||||
continue;
|
||||
|
||||
atom_site.update_value("label_asym_id"_key == asym_id, "label_entity_id", entity_id);
|
||||
struct_asym.update_value("id"_key == asym_id, "entity_id", entity_id);
|
||||
|
||||
if (type != "water")
|
||||
++count;
|
||||
else
|
||||
count = atom_site.count("label_asym_id"_key == asym_id and "label_atom_id"_key == "O");
|
||||
}
|
||||
|
||||
entity.emplace({ //
|
||||
{ "id", entity_id },
|
||||
{ "type", type },
|
||||
{ "pdbx_description", desc },
|
||||
{ "formula_weight", weight },
|
||||
{ "pdbx_number_of_molecules", count }
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
void createEntityPoly(datablock &db)
|
||||
{
|
||||
using namespace literals;
|
||||
|
||||
auto &cf = compound_factory::instance();
|
||||
|
||||
auto &atom_site = db["atom_site"];
|
||||
auto &entity_poly = db["entity_poly"];
|
||||
|
||||
for (auto entity_id : db["entity"].find<std::string>("type"_key == "polymer", "id"))
|
||||
{
|
||||
std::string type;
|
||||
int last_seq_id = -1;
|
||||
std::string seq, seq_can;
|
||||
bool non_std_monomer = false;
|
||||
bool non_std_linkage = false;
|
||||
std::string pdb_strand_id;
|
||||
|
||||
for (const auto &[comp_id, seq_id, auth_asym_id] : atom_site.find<std::string,int,std::string>("label_entity_id"_key == entity_id, "label_comp_id", "label_seq_id", "auth_asym_id"))
|
||||
{
|
||||
if (seq_id == last_seq_id)
|
||||
continue;
|
||||
|
||||
last_seq_id = seq_id;
|
||||
|
||||
auto c = cf.create(comp_id);
|
||||
|
||||
std::string letter;
|
||||
char letter_can;
|
||||
|
||||
// TODO: Perhaps we should improve this...
|
||||
if (type != "other")
|
||||
{
|
||||
std::string c_type;
|
||||
if (cf.is_known_base(comp_id))
|
||||
{
|
||||
c_type = "polydeoxyribonucleotide";
|
||||
letter = letter_can = compound_factory::kBaseMap.at(comp_id);
|
||||
}
|
||||
else if (cf.is_known_peptide(comp_id))
|
||||
{
|
||||
c_type = "polypeptide(L)";
|
||||
letter = letter_can = compound_factory::kAAMap.at(comp_id);
|
||||
}
|
||||
else if (iequals(c->type(), "D-PEPTIDE LINKING"))
|
||||
{
|
||||
c_type = "polypeptide(D)";
|
||||
|
||||
letter_can = c->one_letter_code();
|
||||
if (letter_can == 0)
|
||||
letter_can = 'X';
|
||||
|
||||
letter = '(' + comp_id + ')';
|
||||
|
||||
non_std_linkage = true;
|
||||
non_std_monomer = true;
|
||||
}
|
||||
else if (iequals(c->type(), "L-PEPTIDE LINKING") or iequals(c->type(), "PEPTIDE LINKING"))
|
||||
{
|
||||
c_type = "polypeptide(L)";
|
||||
|
||||
letter_can = c->one_letter_code();
|
||||
if (letter_can == 0)
|
||||
letter_can = 'X';
|
||||
|
||||
letter = '(' + comp_id + ')';
|
||||
|
||||
non_std_monomer = true;
|
||||
}
|
||||
|
||||
if (type.empty())
|
||||
type = c_type;
|
||||
else if (type != c_type)
|
||||
type = "other";
|
||||
}
|
||||
|
||||
seq += letter;
|
||||
seq_can += letter_can;
|
||||
|
||||
pdb_strand_id = auth_asym_id;
|
||||
}
|
||||
|
||||
for (auto i = seq.begin() + 80; i < seq.end(); i += 80)
|
||||
i = seq.insert(i, '\n') + 1;
|
||||
|
||||
for (auto i = seq_can.begin() + 76; i < seq_can.end(); i += 76)
|
||||
{
|
||||
auto j = i;
|
||||
while (j < i + 4 and j < seq_can.end())
|
||||
{
|
||||
if (*j == '(')
|
||||
break;
|
||||
++j;
|
||||
}
|
||||
|
||||
if (j < seq_can.end())
|
||||
i = seq_can.insert(j, '\n') + 1;
|
||||
else
|
||||
i = j;
|
||||
}
|
||||
|
||||
entity_poly.emplace({ //
|
||||
{ "entity_id", entity_id },
|
||||
{ "type", type },
|
||||
{ "nstd_linkage", non_std_linkage },
|
||||
{ "nstd_monomer", non_std_monomer },
|
||||
{ "pdbx_seq_one_letter_code", seq },
|
||||
{ "pdbx_seq_one_letter_code_can", seq_can },
|
||||
{ "pdbx_strand_id", pdb_strand_id }
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
void createEntityPolySeq(datablock &db)
|
||||
{
|
||||
if (db.get("entity_poly") == nullptr)
|
||||
createEntityPoly(db);
|
||||
|
||||
using namespace literals;
|
||||
|
||||
auto &atom_site = db["atom_site"];
|
||||
auto &entity_poly = db["entity_poly"];
|
||||
auto &entity_poly_seq = db["entity_poly_seq"];
|
||||
auto &struct_asym = db["struct_asym"];
|
||||
|
||||
for (auto entity_id : entity_poly.rows<std::string>("entity_id"))
|
||||
{
|
||||
int last_seq_id = -1;
|
||||
std::string last_comp_id;
|
||||
std::string asym_id = struct_asym.find_first<std::string>("entity_id"_key == entity_id, "id");
|
||||
|
||||
for (const auto &[comp_id, seq_id] : atom_site.find<std::string,int>("label_entity_id"_key == entity_id and "label_asym_id"_key == asym_id, "label_comp_id", "label_seq_id"))
|
||||
{
|
||||
bool hetero = false;
|
||||
|
||||
if (seq_id == last_seq_id)
|
||||
{
|
||||
if (last_comp_id != comp_id)
|
||||
hetero = true;
|
||||
else
|
||||
continue;
|
||||
}
|
||||
|
||||
if (hetero)
|
||||
{
|
||||
entity_poly_seq.back().assign({
|
||||
{ "hetero", true }
|
||||
});
|
||||
}
|
||||
|
||||
entity_poly_seq.emplace({ //
|
||||
{ "entity_id", entity_id },
|
||||
{ "num", seq_id },
|
||||
{ "mon_id", comp_id },
|
||||
{ "hetero", hetero }
|
||||
});
|
||||
|
||||
last_seq_id = seq_id;
|
||||
last_comp_id = comp_id;
|
||||
}
|
||||
|
||||
// you cannot assume this is correct...
|
||||
entity_poly_seq.sort([](row_handle a, row_handle b)
|
||||
{
|
||||
return a.get<int>("num") < b.get<int>("num");
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
void createPdbxPolySeqScheme(datablock &db)
|
||||
{
|
||||
if (db.get("entity_poly_seq") == nullptr)
|
||||
createEntityPolySeq(db);
|
||||
|
||||
using namespace literals;
|
||||
|
||||
auto &atom_site = db["atom_site"];
|
||||
auto &entity_poly = db["entity_poly"];
|
||||
auto &entity_poly_seq = db["entity_poly_seq"];
|
||||
auto &struct_asym = db["struct_asym"];
|
||||
auto &pdbx_poly_seq_scheme = db["pdbx_poly_seq_scheme"];
|
||||
|
||||
for (const auto &[entity_id, pdb_strand_id] : entity_poly.rows<std::string, std::string>("entity_id", "pdbx_strand_id"))
|
||||
{
|
||||
for (auto asym_id : struct_asym.find<std::string>("entity_id"_key == entity_id, "id"))
|
||||
{
|
||||
for (const auto &[comp_id, num, hetero] : entity_poly_seq.find<std::string,int,bool>("entity_id"_key == entity_id, "mon_id", "num", "hetero"))
|
||||
{
|
||||
const auto &[auth_seq_num, auth_mon_id, ins_code] =
|
||||
atom_site.find_first<std::string,std::string,std::optional<std::string>>(
|
||||
"label_asym_id"_key == asym_id and "label_seq_id"_key == num,
|
||||
"auth_seq_id", "auth_comp_id", "pdbx_PDB_ins_code"
|
||||
);
|
||||
|
||||
pdbx_poly_seq_scheme.emplace({ //
|
||||
{ "asym_id", asym_id },
|
||||
{ "entity_id", entity_id },
|
||||
{ "seq_id", num },
|
||||
{ "mon_id", comp_id },
|
||||
{ "ndb_seq_num", num },
|
||||
{ "pdb_seq_num", auth_seq_num },
|
||||
{ "auth_seq_num", auth_seq_num },
|
||||
{ "pdb_mon_id", auth_mon_id },
|
||||
{ "auth_mon_id", auth_mon_id },
|
||||
{ "pdb_strand_id", pdb_strand_id },
|
||||
{ "pdb_ins_code", ins_code },
|
||||
{ "hetero", hetero }
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void reconstruct_pdbx(file &file, std::string_view dictionary)
|
||||
{
|
||||
if (file.empty())
|
||||
throw std::runtime_error("Cannot reconstruct PDBx, file seems to be empty");
|
||||
|
||||
// assuming the first datablock contains the entry ...
|
||||
auto &db = file.front();
|
||||
|
||||
// ... and any additional datablock will contain compound information
|
||||
cif::compound_source cs(file);
|
||||
|
||||
if (db.get("atom_site") == nullptr)
|
||||
throw std::runtime_error("Cannot reconstruct PDBx file, atom data missing");
|
||||
|
||||
auto &validator = validator_factory::instance()[dictionary];
|
||||
|
||||
std::string entry_id;
|
||||
|
||||
// Phenix files do not have an entry record
|
||||
if (db.get("entry") == nullptr)
|
||||
{
|
||||
entry_id = db.name();
|
||||
category entry("entry");
|
||||
entry.emplace({ { "id", entry_id } });
|
||||
db.emplace_back(std::move(entry));
|
||||
}
|
||||
else
|
||||
{
|
||||
auto &entry = db["entry"];
|
||||
if (entry.size() != 1)
|
||||
throw std::runtime_error("Unexpected size of entry category");
|
||||
|
||||
entry_id = entry.front().get<std::string>("id");
|
||||
}
|
||||
|
||||
for (auto &cat : db)
|
||||
{
|
||||
auto cv = validator.get_validator_for_category(cat.name());
|
||||
if (not cv)
|
||||
continue;
|
||||
|
||||
for (auto link : validator.get_links_for_child(cat.name()))
|
||||
{
|
||||
if (link->m_parent_category != "entry")
|
||||
continue;
|
||||
|
||||
// So, this cat should have a link to the entry
|
||||
|
||||
auto pk = find(link->m_parent_keys.begin(), link->m_parent_keys.end(), "id");
|
||||
if (pk == link->m_parent_keys.end())
|
||||
continue;
|
||||
|
||||
auto ix = pk - link->m_parent_keys.begin();
|
||||
auto key = link->m_child_keys[ix];
|
||||
|
||||
for (auto row : cat)
|
||||
{
|
||||
row.assign({ { key, entry_id } });
|
||||
}
|
||||
}
|
||||
|
||||
// See if all categories that need a key do have a value
|
||||
if (cv->m_keys.size() == 1)
|
||||
{
|
||||
auto key = cv->m_keys.front();
|
||||
for (auto row : cat)
|
||||
{
|
||||
auto ord = row.get<std::string>(key.c_str());
|
||||
if (ord.empty())
|
||||
row.assign({ //
|
||||
{ key, cat.get_unique_id([](int nr)
|
||||
{ return std::to_string(nr); }) } });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
file.load_dictionary(dictionary);
|
||||
|
||||
// Now create any missing categories
|
||||
|
||||
// First, see if atom records make sense at all
|
||||
// Will take care of atom_type and chem_comp as well.
|
||||
checkAtomRecords(db);
|
||||
|
||||
// Next make sure we have struct_asym records
|
||||
if (db.get("struct_asym") == nullptr)
|
||||
createStructAsym(db);
|
||||
|
||||
if (db.get("entity") == nullptr)
|
||||
createEntity(db);
|
||||
|
||||
if (db.get("pdbx_poly_seq_scheme") == nullptr)
|
||||
createPdbxPolySeqScheme(db);
|
||||
}
|
||||
|
||||
} // namespace cif::pdb
|
||||
284
src/pdb/validate-pdbx.cpp
Normal file
284
src/pdb/validate-pdbx.cpp
Normal file
@@ -0,0 +1,284 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2024 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "cif++.hpp"
|
||||
|
||||
namespace cif::pdb
|
||||
{
|
||||
|
||||
condition get_parents_condition(const validator &validator, row_handle rh, const category &parentCat)
|
||||
{
|
||||
condition result;
|
||||
|
||||
auto &childCat = rh.get_category();
|
||||
auto childName = childCat.name();
|
||||
auto parentName = parentCat.name();
|
||||
|
||||
auto links = validator.get_links_for_child(childName);
|
||||
links.erase(remove_if(links.begin(), links.end(), [n = parentName](auto &l)
|
||||
{ return l->m_parent_category != n; }),
|
||||
links.end());
|
||||
|
||||
if (not links.empty())
|
||||
{
|
||||
for (auto &link : links)
|
||||
{
|
||||
condition cond;
|
||||
|
||||
for (size_t ix = 0; ix < link->m_child_keys.size(); ++ix)
|
||||
{
|
||||
auto childValue = rh[link->m_child_keys[ix]];
|
||||
|
||||
if (childValue.empty())
|
||||
continue;
|
||||
|
||||
cond = std::move(cond) and key(link->m_parent_keys[ix]) == childValue.text();
|
||||
}
|
||||
|
||||
result = std::move(result) or std::move(cond);
|
||||
}
|
||||
}
|
||||
else if (cif::VERBOSE > 0)
|
||||
std::cerr << "warning: no child to parent links were found for child " << childName << " and parent " << parentName << '\n';
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
bool is_valid_pdbx_file(const file &file, std::string_view dictionary)
|
||||
{
|
||||
using namespace cif::literals;
|
||||
|
||||
auto &cf = cif::compound_factory::instance();
|
||||
auto &validator = cif::validator_factory::instance().operator[](dictionary);
|
||||
|
||||
bool result = true;
|
||||
|
||||
try
|
||||
{
|
||||
if (file.empty())
|
||||
throw validation_error("Empty file");
|
||||
|
||||
auto &db = file.front();
|
||||
|
||||
if (db.empty())
|
||||
throw validation_error("Empty datablock");
|
||||
|
||||
auto &atom_site = db["atom_site"];
|
||||
if (atom_site.empty())
|
||||
throw validation_error("Empty or missing atom_site category");
|
||||
|
||||
auto &pdbx_poly_seq_scheme = db["pdbx_poly_seq_scheme"];
|
||||
|
||||
std::string last_asym_id;
|
||||
int last_seq_id = -1;
|
||||
for (auto r : atom_site)
|
||||
{
|
||||
auto seq_id = r.get<std::optional<int>>("label_seq_id");
|
||||
if (not seq_id.has_value()) // not a residue in a polymer
|
||||
continue;
|
||||
|
||||
if (*seq_id == last_seq_id)
|
||||
continue;
|
||||
|
||||
last_seq_id = *seq_id;
|
||||
|
||||
auto comp_id = r.get<std::string>("label_comp_id");
|
||||
if (not cf.is_known_peptide(comp_id))
|
||||
continue;
|
||||
|
||||
auto p = pdbx_poly_seq_scheme.find(get_parents_condition(validator, r, pdbx_poly_seq_scheme));
|
||||
if (p.size() != 1)
|
||||
throw validation_error("For each residue in atom_site that is a residue in a polymer there should be exactly one pdbx_poly_seq_scheme record");
|
||||
}
|
||||
|
||||
auto &entity = db["entity"];
|
||||
if (entity.empty())
|
||||
throw validation_error("Entity category is missing or empty");
|
||||
|
||||
auto &entity_poly = db["entity_poly"];
|
||||
if (entity_poly.empty())
|
||||
throw validation_error("Entity_poly category is missing or empty");
|
||||
|
||||
auto &entity_poly_seq = db["entity_poly_seq"];
|
||||
if (entity_poly_seq.empty())
|
||||
throw validation_error("Entity_poly_seq category is missing or empty");
|
||||
|
||||
auto &struct_asym = db["struct_asym"];
|
||||
if (struct_asym.empty())
|
||||
throw validation_error("struct_asym category is missing or empty");
|
||||
|
||||
for (auto entity_id : entity.find<std::string>("type"_key == "polymer", "id"))
|
||||
{
|
||||
if (entity_poly.count("entity_id"_key == entity_id) != 1)
|
||||
throw validation_error("There should be exactly one entity_poly record per polymer entity");
|
||||
|
||||
const auto entity_poly_type = entity_poly.find1<std::string>("entity_id"_key == entity_id, "type");
|
||||
|
||||
std::map<int,std::set<std::string>> mon_per_seq_id;
|
||||
|
||||
for (const auto &[num, mon_id, hetero] : entity_poly_seq.find<int, std::string, bool>("entity_id"_key == entity_id, "num", "mon_id", "hetero"))
|
||||
{
|
||||
mon_per_seq_id[num].emplace(mon_id);
|
||||
|
||||
for (auto asym_id : struct_asym.find<std::string>("entity_id"_key == entity_id, "id"))
|
||||
{
|
||||
if (pdbx_poly_seq_scheme.count(
|
||||
"asym_id"_key == asym_id and
|
||||
"mon_id"_key == mon_id and
|
||||
"seq_id"_key == num and
|
||||
"hetero"_key == hetero) != 1)
|
||||
{
|
||||
throw validation_error("For each entity_poly_seq record there should be exactly one pdbx_poly_seq record");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto &[seq_id, mon_id, hetero] : pdbx_poly_seq_scheme.find<int, std::string, bool>("entity_id"_key == entity_id, "seq_id", "mon_id", "hetero"))
|
||||
{
|
||||
if (entity_poly_seq.count(
|
||||
"mon_id"_key == mon_id and
|
||||
"num"_key == seq_id and
|
||||
"hetero"_key == hetero) != 1)
|
||||
{
|
||||
throw validation_error("For each pdbx_poly_seq/struct_asym record there should be exactly one entity_poly_seq record");
|
||||
}
|
||||
|
||||
if ((mon_per_seq_id[seq_id].size() > 1) != hetero)
|
||||
throw validation_error("Mismatch between the hetero flag in the poly seq schemes and the number residues per seq_id");
|
||||
}
|
||||
|
||||
for (const auto &[seq_id, mon_ids] : mon_per_seq_id)
|
||||
{
|
||||
for (auto asym_id : struct_asym.find<std::string>("entity_id"_key == entity_id, "id"))
|
||||
{
|
||||
condition cond;
|
||||
|
||||
for (auto mon_id : mon_ids)
|
||||
cond = std::move(cond) or "label_comp_id"_key == mon_id;
|
||||
|
||||
cond = "label_entity_id"_key == entity_id and
|
||||
"label_asym_id"_key == asym_id and
|
||||
"label_seq_id"_key == seq_id and not std::move(cond);
|
||||
|
||||
if (atom_site.exists(std::move(cond)))
|
||||
throw validation_error("An atom_site record exists that has no parent in the poly seq scheme categories");
|
||||
}
|
||||
}
|
||||
|
||||
auto &&[seq, seq_can] = entity_poly.find1<std::optional<std::string>, std::optional<std::string>>("entity_id"_key == entity_id,
|
||||
"pdbx_seq_one_letter_code", "pdbx_seq_one_letter_code_can");
|
||||
|
||||
std::string::const_iterator si, sci, se, sce;
|
||||
|
||||
auto seq_match = [&](bool can, std::string::const_iterator si, std::string::const_iterator se)
|
||||
{
|
||||
for (const auto &[seq_id, comp_ids] : mon_per_seq_id)
|
||||
{
|
||||
if (si == se)
|
||||
return false;
|
||||
|
||||
bool match = false;
|
||||
|
||||
for (auto comp_id : comp_ids)
|
||||
{
|
||||
std::string letter;
|
||||
if (cf.is_known_base(comp_id))
|
||||
letter = compound_factory::kBaseMap.at(comp_id);
|
||||
else if (cf.is_known_peptide(comp_id))
|
||||
letter = compound_factory::kAAMap.at(comp_id);
|
||||
else
|
||||
{
|
||||
if (can)
|
||||
{
|
||||
auto c = cf.create(comp_id);
|
||||
if (c and c->one_letter_code())
|
||||
letter = c->one_letter_code();
|
||||
else
|
||||
letter = "X";
|
||||
}
|
||||
else
|
||||
letter = '(' + comp_id + ')';
|
||||
}
|
||||
|
||||
if (iequals(std::string{si, si + letter.length()}, letter))
|
||||
{
|
||||
match = true;
|
||||
si += letter.length();
|
||||
break;
|
||||
}
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
if (not match)
|
||||
break;
|
||||
}
|
||||
|
||||
return si == se;
|
||||
};
|
||||
|
||||
if (not seq.has_value())
|
||||
{
|
||||
if (cif::VERBOSE > 0)
|
||||
std::clog << "Warning: entity_poly has no sequence for entity_id " << entity_id << '\n';
|
||||
}
|
||||
else
|
||||
{
|
||||
seq->erase(std::remove_if(seq->begin(), seq->end(), [](char ch) { return std::isspace(ch); }), seq->end());
|
||||
|
||||
if (not seq_match(false, seq->begin(), seq->end()))
|
||||
throw validation_error("Sequences do not match for entity " + entity_id);
|
||||
}
|
||||
|
||||
if (not seq_can.has_value())
|
||||
{
|
||||
if (cif::VERBOSE > 0)
|
||||
std::clog << "Warning: entity_poly has no sequence for entity_id " << entity_id << '\n';
|
||||
}
|
||||
else
|
||||
{
|
||||
seq_can->erase(std::remove_if(seq_can->begin(), seq_can->end(), [](char ch) { return std::isspace(ch); }), seq_can->end());
|
||||
|
||||
if (not seq_match(true, seq_can->begin(), seq_can->end()))
|
||||
throw validation_error("Canonical sequences do not match for entity " + entity_id);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
result = true;
|
||||
}
|
||||
catch (const std::exception &ex)
|
||||
{
|
||||
result = false;
|
||||
if (cif::VERBOSE > 0)
|
||||
std::clog << ex.what() << '\n';
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace cif::pdb
|
||||
|
||||
BIN
test/pdb1cbs.ent.gz
Normal file
BIN
test/pdb1cbs.ent.gz
Normal file
Binary file not shown.
@@ -13,10 +13,11 @@ int main(int argc, char *argv[])
|
||||
|
||||
// Build a new parser on top of Catch2's
|
||||
using namespace Catch::clara;
|
||||
auto cli = session.cli() // Get Catch2's command line parser
|
||||
| Opt(gTestDir, "data-dir") // bind variable to a new option, with a hint string
|
||||
["-D"]["--data-dir"] // the option names it will respond to
|
||||
("The directory containing the data files"); // description string for the help output
|
||||
auto cli = session.cli() // Get Catch2's command line parser
|
||||
| Opt(gTestDir, "data-dir") // bind variable to a new option, with a hint string
|
||||
["-D"]["--data-dir"] // the option names it will respond to
|
||||
("The directory containing the data files") // description string for the help output
|
||||
| Opt(cif::VERBOSE, "verbose")["-v"]["--cif-verbose"]("Flag for cif::VERBOSE");
|
||||
|
||||
// Now pass the new composite back to Catch2 so it uses that
|
||||
session.cli(cli);
|
||||
@@ -34,6 +35,5 @@ int main(int argc, char *argv[])
|
||||
|
||||
cif::compound_factory::instance().push_dictionary(gTestDir / "HEM.cif");
|
||||
|
||||
|
||||
return session.run();
|
||||
}
|
||||
@@ -3468,3 +3468,22 @@ TEST_CASE("compound_not_found_test_1")
|
||||
auto cmp = cif::compound_factory::instance().create("&&&");
|
||||
REQUIRE(cmp == nullptr);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// PDB2CIF tests
|
||||
|
||||
TEST_CASE("pdb2cif_formula_weight")
|
||||
{
|
||||
cif::compound_factory::instance().push_dictionary(gTestDir / "REA.cif");
|
||||
|
||||
cif::file a = cif::pdb::read(gTestDir / "pdb1cbs.ent.gz");
|
||||
|
||||
auto fw = a.front()["entity"].find1<float>(cif::key("id") == 1, "formula_weight");
|
||||
CHECK(std::abs(fw - 15581.802f) < 0.1f);
|
||||
|
||||
fw = a.front()["entity"].find1<float>(cif::key("id") == 2, "formula_weight");
|
||||
CHECK(fw == 300.435f);
|
||||
|
||||
fw = a.front()["entity"].find1<float>(cif::key("id") == 3, "formula_weight");
|
||||
CHECK(fw == 18.015f);
|
||||
}
|
||||
286
test/validate-pdbx-test.cpp
Normal file
286
test/validate-pdbx-test.cpp
Normal file
@@ -0,0 +1,286 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "test-main.hpp"
|
||||
|
||||
#include <catch2/catch.hpp>
|
||||
|
||||
#include <cif++.hpp>
|
||||
|
||||
#include <stdexcept>
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
cif::file operator""_cf(const char *text, size_t length)
|
||||
{
|
||||
struct membuf : public std::streambuf
|
||||
{
|
||||
membuf(char *text, size_t length)
|
||||
{
|
||||
this->setg(text, text, text + length);
|
||||
}
|
||||
} buffer(const_cast<char *>(text), length);
|
||||
|
||||
std::istream is(&buffer);
|
||||
return cif::file(is);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
TEST_CASE("test-1")
|
||||
{
|
||||
auto f = R"(data_1CBS
|
||||
#
|
||||
_entry.id 1CBS
|
||||
#
|
||||
_entity.id 1
|
||||
_entity.type polymer
|
||||
#
|
||||
_entity_poly.entity_id 1
|
||||
_entity_poly.type 'polypeptide(L)'
|
||||
_entity_poly.nstd_linkage no
|
||||
_entity_poly.nstd_monomer no
|
||||
_entity_poly.pdbx_seq_one_letter_code
|
||||
;PNFSG
|
||||
;
|
||||
_entity_poly.pdbx_seq_one_letter_code_can
|
||||
;PNFSG
|
||||
;
|
||||
_entity_poly.pdbx_strand_id A
|
||||
_entity_poly.pdbx_target_identifier ?
|
||||
#
|
||||
loop_
|
||||
_entity_poly_seq.entity_id
|
||||
_entity_poly_seq.num
|
||||
_entity_poly_seq.mon_id
|
||||
_entity_poly_seq.hetero
|
||||
1 1 PRO n
|
||||
1 2 ASN n
|
||||
1 3 PHE n
|
||||
1 4 SER n
|
||||
1 5 GLY n
|
||||
#
|
||||
loop_
|
||||
_struct_asym.id
|
||||
_struct_asym.pdbx_blank_PDB_chainid_flag
|
||||
_struct_asym.pdbx_modified
|
||||
_struct_asym.entity_id
|
||||
_struct_asym.details
|
||||
A N N 1 ?
|
||||
#
|
||||
loop_
|
||||
_atom_type.symbol
|
||||
C
|
||||
N
|
||||
O
|
||||
S
|
||||
#
|
||||
loop_
|
||||
_atom_site.group_PDB
|
||||
_atom_site.id
|
||||
_atom_site.type_symbol
|
||||
_atom_site.label_atom_id
|
||||
_atom_site.label_alt_id
|
||||
_atom_site.label_comp_id
|
||||
_atom_site.label_asym_id
|
||||
_atom_site.label_entity_id
|
||||
_atom_site.label_seq_id
|
||||
_atom_site.pdbx_PDB_ins_code
|
||||
_atom_site.Cartn_x
|
||||
_atom_site.Cartn_y
|
||||
_atom_site.Cartn_z
|
||||
_atom_site.occupancy
|
||||
_atom_site.B_iso_or_equiv
|
||||
_atom_site.pdbx_formal_charge
|
||||
_atom_site.auth_seq_id
|
||||
_atom_site.auth_comp_id
|
||||
_atom_site.auth_asym_id
|
||||
_atom_site.auth_atom_id
|
||||
_atom_site.pdbx_PDB_model_num
|
||||
ATOM 2 C CA . PRO A 1 1 ? 18.150 13.525 43.680 1.00 28.82 ? 1 PRO A CA 1
|
||||
ATOM 9 C CA . ASN A 1 2 ? 20.576 16.457 43.578 1.00 20.79 ? 2 ASN A CA 1
|
||||
ATOM 17 C CA . PHE A 1 3 ? 21.144 17.838 40.087 1.00 12.62 ? 3 PHE A CA 1
|
||||
ATOM 28 C CA . SER A 1 4 ? 23.170 20.780 41.464 1.00 11.30 ? 4 SER A CA 1
|
||||
ATOM 34 C CA . GLY A 1 5 ? 26.628 21.486 40.103 1.00 10.86 ? 5 GLY A CA 1
|
||||
#
|
||||
loop_
|
||||
_pdbx_poly_seq_scheme.asym_id
|
||||
_pdbx_poly_seq_scheme.entity_id
|
||||
_pdbx_poly_seq_scheme.seq_id
|
||||
_pdbx_poly_seq_scheme.mon_id
|
||||
_pdbx_poly_seq_scheme.ndb_seq_num
|
||||
_pdbx_poly_seq_scheme.pdb_seq_num
|
||||
_pdbx_poly_seq_scheme.auth_seq_num
|
||||
_pdbx_poly_seq_scheme.pdb_mon_id
|
||||
_pdbx_poly_seq_scheme.auth_mon_id
|
||||
_pdbx_poly_seq_scheme.pdb_strand_id
|
||||
_pdbx_poly_seq_scheme.pdb_ins_code
|
||||
_pdbx_poly_seq_scheme.hetero
|
||||
A 1 1 PRO 1 1 1 PRO PRO A . n
|
||||
A 1 2 ASN 2 2 2 ASN ASN A . n
|
||||
A 1 3 PHE 3 3 3 PHE PHE A . n
|
||||
A 1 4 SER 4 4 4 SER SER A . n
|
||||
A 1 5 GLY 5 5 5 GLY GLY A . n
|
||||
#
|
||||
)"_cf;
|
||||
|
||||
SECTION("Plain file")
|
||||
{
|
||||
REQUIRE(cif::pdb::is_valid_pdbx_file(f));
|
||||
}
|
||||
|
||||
SECTION("Delete one atom_site")
|
||||
{
|
||||
auto &db = f.front();
|
||||
auto n = db["atom_site"].erase(cif::key("id") == 2);
|
||||
|
||||
REQUIRE(n == 1);
|
||||
|
||||
REQUIRE(cif::pdb::is_valid_pdbx_file(f));
|
||||
}
|
||||
|
||||
SECTION("Delete a pdbx_poly_seq_scheme record")
|
||||
{
|
||||
auto &db = f.front();
|
||||
auto n = db["pdbx_poly_seq_scheme"].erase(cif::key("seq_id") == 2);
|
||||
|
||||
REQUIRE(n == 1);
|
||||
|
||||
REQUIRE_FALSE(cif::pdb::is_valid_pdbx_file(f));
|
||||
}
|
||||
|
||||
SECTION("Delete an entity_poly_seq record")
|
||||
{
|
||||
auto &db = f.front();
|
||||
auto n = db["entity_poly_seq"].erase(cif::key("num") == 2);
|
||||
|
||||
REQUIRE(n == 1);
|
||||
|
||||
REQUIRE_FALSE(cif::pdb::is_valid_pdbx_file(f));
|
||||
}
|
||||
|
||||
SECTION("Delete an entity_poly record")
|
||||
{
|
||||
auto &db = f.front();
|
||||
auto n = db["entity_poly"].erase(cif::key("entity_id") == 1);
|
||||
|
||||
REQUIRE(n == 1);
|
||||
|
||||
REQUIRE_FALSE(cif::pdb::is_valid_pdbx_file(f));
|
||||
}
|
||||
|
||||
SECTION("Mutate an atom_site record")
|
||||
{
|
||||
auto &db = f.front();
|
||||
auto r = db["atom_site"].find1(cif::key("id") == 9);
|
||||
r.assign({
|
||||
{ "label_comp_id", "ALA" },
|
||||
{ "auth_comp_id", "ALA" }
|
||||
});
|
||||
|
||||
REQUIRE_FALSE(cif::pdb::is_valid_pdbx_file(f));
|
||||
}
|
||||
|
||||
SECTION("Hetero consistency")
|
||||
{
|
||||
auto &db = f.front();
|
||||
db["entity_poly_seq"].emplace({ //
|
||||
{ "entity_id", 1 },
|
||||
{ "num", 1 },
|
||||
{ "mon_id", "ALA" },
|
||||
{ "hetero", "n" }
|
||||
});
|
||||
|
||||
db["pdbx_poly_seq_scheme"].emplace({ //
|
||||
{ "asym_id", "A" },
|
||||
{ "entity_id", "1" },
|
||||
{ "seq_id", "1" },
|
||||
{ "mon_id", "ALA" },
|
||||
{ "ndb_seq_num", "1" },
|
||||
{ "pdb_seq_num", "1" },
|
||||
{ "auth_seq_num", "1" },
|
||||
{ "pdb_mon_id", "ALA" },
|
||||
{ "auth_mon_id", "ALA" },
|
||||
{ "pdb_strand_id", "A" },
|
||||
{ "pdb_ins_code", "." },
|
||||
{ "hetero", "n" }
|
||||
});
|
||||
|
||||
REQUIRE_FALSE(cif::pdb::is_valid_pdbx_file(f));
|
||||
}
|
||||
|
||||
SECTION("Missing hetero for record in atom_site")
|
||||
{
|
||||
auto &db = f.front();
|
||||
|
||||
auto r1 = db["atom_site"].front();
|
||||
cif::row_initializer cr(r1);
|
||||
cr.set_value("id", "3");
|
||||
cr.set_value("label_comp_id", "ALA");
|
||||
|
||||
db["atom_site"].emplace(std::move(cr));
|
||||
|
||||
REQUIRE_FALSE(cif::pdb::is_valid_pdbx_file(f));
|
||||
}
|
||||
|
||||
SECTION("Missing letter in entity_poly.pdbx_seq_one_letter_code")
|
||||
{
|
||||
auto &db = f.front();
|
||||
auto &entity_poly = db["entity_poly"];
|
||||
|
||||
entity_poly.front().assign({
|
||||
{ "pdbx_seq_one_letter_code", "PNSG" }
|
||||
});
|
||||
|
||||
REQUIRE_FALSE(cif::pdb::is_valid_pdbx_file(f));
|
||||
}
|
||||
|
||||
SECTION("Too many letters in entity_poly.pdbx_seq_one_letter_code")
|
||||
{
|
||||
auto &db = f.front();
|
||||
auto &entity_poly = db["entity_poly"];
|
||||
|
||||
entity_poly.front().assign({
|
||||
{ "pdbx_seq_one_letter_code", "PNFSGX" }
|
||||
});
|
||||
|
||||
REQUIRE_FALSE(cif::pdb::is_valid_pdbx_file(f));
|
||||
}
|
||||
|
||||
SECTION("Mismatch in entity_poly.pdbx_seq_one_letter_code")
|
||||
{
|
||||
auto &db = f.front();
|
||||
auto &entity_poly = db["entity_poly"];
|
||||
|
||||
entity_poly.front().assign({
|
||||
{ "pdbx_seq_one_letter_code", "PNASG" }
|
||||
});
|
||||
|
||||
REQUIRE_FALSE(cif::pdb::is_valid_pdbx_file(f));
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user