Compare commits

..

2 Commits

Author SHA1 Message Date
Maarten L. Hekkelman
92a9a067c6 for write_data_files=off 2024-01-31 10:55:08 +01:00
Maarten L. Hekkelman
9acc343145 added option to not write data files 2024-01-31 10:37:02 +01:00
28 changed files with 790 additions and 2700 deletions

View File

@@ -1,65 +0,0 @@
# This starter workflow is for a CMake project running on multiple platforms. There is a different starter workflow if you just want a single platform.
# See: https://github.com/actions/starter-workflows/blob/main/ci/cmake-single-platform.yml
name: publish docs
on:
push:
branches: [ "trunk" ]
permissions:
contents: read
pages: write
id-token: write
concurrency:
group: "pages"
cancel-in-progress: false
jobs:
docs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v1
- name: Set reusable strings
# Turn repeated input strings (such as the build output directory) into step outputs. These step outputs can be used throughout the workflow file.
id: strings
shell: bash
run: |
echo "build-output-dir=${{ github.workspace }}/build" >> "$GITHUB_OUTPUT"
- name: Install dependencies Ubuntu
run: sudo apt-get update && sudo apt-get install cmake doxygen
- uses: actions/setup-python@v4
with:
python-version: '3.9'
cache: 'pip' # caching pip dependencies
- run: pip install -r docs/requirements.txt
- name: Configure CMake
run: cmake -S . -B ${{ steps.strings.outputs.build-output-dir }} -DBUILD_DOCUMENTATION=ON -DBUILD_TESTING=OFF
- name: Run Sphinx
run: |
cmake --build ${{ steps.strings.outputs.build-output-dir }} --target Sphinx-libcifpp
ls -l ${{ steps.strings.outputs.build-output-dir }}
ls -l ${{ steps.strings.outputs.build-output-dir }}/docs/sphinx
- name: Upload artifact
uses: actions/upload-pages-artifact@v2
with:
path: ${{ steps.strings.outputs.build-output-dir }}/docs/sphinx
deploy:
environment:
name: github-pages
url: ${{ steps.deployment.outputs.page_url }}
runs-on: ubuntu-latest
needs: docs
steps:
- name: Deploy to GitHub Pages
id: deployment
uses: actions/deploy-pages@v2

View File

@@ -45,15 +45,13 @@ jobs:
cmake -B ${{ steps.strings.outputs.build-output-dir }}
-DCMAKE_CXX_COMPILER=${{ matrix.cpp_compiler }}
-DCMAKE_BUILD_TYPE=Release
-DCIFPP_DOWNLOAD_CCD=OFF
-S ${{ github.workspace }}
- name: Build
run: cmake --build ${{ steps.strings.outputs.build-output-dir }} --config Release
- name: Test
working-directory: ${{ steps.strings.outputs.build-output-dir }}
run: ctest --build-config Release --output-on-failure
- name: Install
if: matrix.os != 'windows-latest'
run: sudo cmake --install ${{ steps.strings.outputs.build-output-dir }} --config Release

2
.gitignore vendored
View File

@@ -2,7 +2,7 @@ build/
.vscode/
.vs/
tools/update-libcifpp-data
rsrc/components.cif*
data/components.cif*
CMakeSettings.json
msvc/
src/revision.hpp

View File

@@ -11,24 +11,21 @@
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
cmake_minimum_required(VERSION 3.16)
# set the project name
project(
libcifpp
VERSION 6.1.0
LANGUAGES CXX)
project(libcifpp VERSION 6.0.0 LANGUAGES CXX)
list(PREPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
@@ -53,9 +50,7 @@ if(CMAKE_GENERATOR STREQUAL "Ninja Multi-Config")
endif()
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
set(CMAKE_CXX_FLAGS
"${CMAKE_CXX_FLAGS} -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers"
)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers")
elseif(MSVC)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4")
endif()
@@ -71,26 +66,28 @@ if(NOT(BUILD_FOR_CCP4 AND WIN32))
option(BUILD_SHARED_LIBS "Build a shared library instead of a static one" OFF)
endif()
if(BUILD_FOR_CCP4)
unset(CIFPP_DOWNLOAD_CCD)
unset(CIFPP_INSTALL_UPDATE_SCRIPT)
else()
# Lots of code depend on the availability of the components.cif file
option(CIFPP_DOWNLOAD_CCD
"Download the CCD file components.cif during installation" ON)
# Lots of code depend on the availability of the components.cif file
option(CIFPP_DOWNLOAD_CCD "Download the CCD file components.cif during installation" ON)
# An optional cron script can be installed to keep the data files up-to-date
if(UNIX AND NOT APPLE)
option(CIFPP_INSTALL_UPDATE_SCRIPT
"Install the script to update CCD and dictionary files" ON)
endif()
# An optional cron script can be installed to keep the data files up-to-date
if("${CMAKE_SYSTEM_NAME}" STREQUAL "Linux" AND NOT BUILD_FOR_CCP4)
option(CIFPP_INSTALL_UPDATE_SCRIPT "Install the script to update CCD and dictionary files" ON)
endif()
# When CCP4 is sourced in the environment, we can recreate the symmetry
# operations table
if(EXISTS "$ENV{CCP4}/lib/data/syminfo.lib")
option(CIFPP_RECREATE_SYMOP_DATA
"Recreate SymOp data table in case it is out of date" ON)
# Optionally avoid installing data files (requires privileges at inconvenient moments)
option(WRITE_DATA_FILES "Write data files during installation" ON)
# When CCP4 is sourced in the environment, we can recreate the symmetry operations table
if(EXISTS "$ENV{CCP4}")
if(EXISTS "$ENV{CCP4}/lib/data/syminfo.lib")
option(CIFPP_RECREATE_SYMOP_DATA "Recreate SymOp data table in case it is out of date" ON)
else()
set(CIFPP_RECREATE_SYMOP_DATA OFF)
message(WARNING "Symop data table recreation requested, but file syminfo.lib was not found in $ENV{CLIBD}")
endif()
else()
set(CIFPP_RECREATE_SYMOP_DATA OFF)
message("Not trying to recreate symop_table_data.hpp since CCP4 is not defined")
endif()
# CCP4 build
@@ -147,30 +144,23 @@ endif()
# Libraries
# Start by finding out if std:regex is usable. Note that the current
# implementation in GCC is not acceptable, it crashes on long lines. The
# implementation in libc++ (clang) and MSVC seem to be OK.
check_cxx_source_compiles(
"
# implementation in GCC is not acceptable, it crashes on long lines.
# The implementation in libc++ (clang) and MSVC seem to be OK.
check_cxx_source_compiles("
#include <iostream>
#ifndef __GLIBCXX__
#error
#endif
int main(int argc, char *argv[]) { return 0; }"
GXX_LIBSTDCPP)
int main(int argc, char *argv[]) { return 0; }" GXX_LIBSTDCPP)
if(GXX_LIBSTDCPP)
message(
STATUS "Testing for known regex bug, since you're using GNU libstdc++")
message(STATUS "Testing for known regex bug, since you're using GNU libstdc++")
try_run(STD_REGEX_RUNNING STD_REGEX_COMPILING
${CMAKE_CURRENT_BINARY_DIR}/test
${PROJECT_SOURCE_DIR}/cmake/test-rx.cpp)
${CMAKE_CURRENT_BINARY_DIR}/test ${PROJECT_SOURCE_DIR}/cmake/test-rx.cpp)
if(STD_REGEX_RUNNING STREQUAL FAILED_TO_RUN)
message(
STATUS
"You are probably trying to compile using the g++ standard library which contains a crashing std::regex implementation. Will use boost::regex instead"
)
message(STATUS "You are probably trying to compile using the g++ standard library which contains a crashing std::regex implementation. Will use boost::regex instead")
find_package(Boost 1.80 QUIET COMPONENTS regex)
@@ -180,7 +170,8 @@ if(GXX_LIBSTDCPP)
FetchContent_Declare(
boost-rx
GIT_REPOSITORY https://github.com/boostorg/regex
GIT_TAG boost-1.83.0)
GIT_TAG boost-1.83.0
)
FetchContent_MakeAvailable(boost-rx)
endif()
@@ -194,8 +185,8 @@ set(THREADS_PREFER_PTHREAD_FLAG)
find_package(Threads)
if(MSVC)
# Avoid linking the shared library of zlib Search ZLIB_ROOT first if it is
# set.
# Avoid linking the shared library of zlib
# Search ZLIB_ROOT first if it is set.
if(ZLIB_ROOT)
set(_ZLIB_SEARCH_ROOT PATHS ${ZLIB_ROOT} NO_DEFAULT_PATH)
list(APPEND _ZLIB_SEARCHES _ZLIB_SEARCH_ROOT)
@@ -205,7 +196,8 @@ if(MSVC)
set(_ZLIB_x86 "(x86)")
set(_ZLIB_SEARCH_NORMAL
PATHS "[HKEY_LOCAL_MACHINE\\SOFTWARE\\GnuWin32\\Zlib;InstallPath]"
"$ENV{ProgramFiles}/zlib" "$ENV{ProgramFiles${_ZLIB_x86}}/zlib")
"$ENV{ProgramFiles}/zlib"
"$ENV{ProgramFiles${_ZLIB_x86}}/zlib")
unset(_ZLIB_x86)
list(APPEND _ZLIB_SEARCHES _ZLIB_SEARCH_NORMAL)
@@ -214,37 +206,26 @@ if(MSVC)
endif()
foreach(search ${_ZLIB_SEARCHES})
find_library(
ZLIB_LIBRARY
NAMES zlibstatic NAMES_PER_DIR ${${search}}
PATH_SUFFIXES lib)
find_library(ZLIB_LIBRARY NAMES zlibstatic NAMES_PER_DIR ${${search}} PATH_SUFFIXES lib)
endforeach()
endif()
find_package(ZLIB REQUIRED)
# Using Eigen3 is a bit of a thing. We don't want to build it completely since
# we only need a couple of header files. Nothing special. But often, eigen3 is
# already installed and then we prefer that.
find_package(Eigen3 3.4 QUIET)
find_package(Eigen3 QUIET)
if(Eigen3_FOUND AND TARGET Eigen3::Eigen)
get_target_property(EIGEN_INCLUDE_DIR Eigen3::Eigen
INTERFACE_INCLUDE_DIRECTORIES)
if(Eigen3_FOUND)
get_target_property(EIGEN_INCLUDE_DIR Eigen3::Eigen INTERFACE_INCLUDE_DIRECTORIES)
else()
# Create a private copy of eigen3 and populate it only, no need to build
FetchContent_Declare(
my-eigen3
ExternalProject_Add(
local_Eigen3
GIT_REPOSITORY https://gitlab.com/libeigen/eigen.git
GIT_TAG 3.4.0)
GIT_TAG 3.4.0
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${CMAKE_CURRENT_BINARY_DIR}/external
)
FetchContent_GetProperties(my-eigen3)
if(NOT my-eigen3_POPULATED)
FetchContent_Populate(my-eigen3)
endif()
set(EIGEN_INCLUDE_DIR ${my-eigen3_SOURCE_DIR})
set(EIGEN3_D local_Eigen3)
set(EIGEN_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/external/include/eigen3)
endif()
include(FindFilesystem)
@@ -260,20 +241,17 @@ write_version_header(${PROJECT_SOURCE_DIR}/src/ LIB_NAME "LibCIFPP")
# SymOp data table
if(CIFPP_RECREATE_SYMOP_DATA)
# The tool to create the table
add_executable(symop-map-generator
"${PROJECT_SOURCE_DIR}/src/symop-map-generator.cpp")
add_executable(symop-map-generator "${PROJECT_SOURCE_DIR}/src/symop-map-generator.cpp")
add_custom_command(
OUTPUT ${PROJECT_SOURCE_DIR}/src/symop_table_data.hpp
COMMAND
$<TARGET_FILE:symop-map-generator> $ENV{CLIBD}/syminfo.lib
$ENV{CLIBD}/symop.lib ${PROJECT_SOURCE_DIR}/src/symop_table_data.hpp)
COMMAND $<TARGET_FILE:symop-map-generator> $ENV{CLIBD}/syminfo.lib $ENV{CLIBD}/symop.lib ${PROJECT_SOURCE_DIR}/src/symop_table_data.hpp
)
add_custom_target(
OUTPUT
${PROJECT_SOURCE_DIR}/src/symop_table_data.hpp
DEPENDS symop-map-generator "$ENV{CLIBD}/syminfo.lib"
"$ENV{CLIBD}/symop.lib")
OUTPUT ${PROJECT_SOURCE_DIR}/src/symop_table_data.hpp
DEPENDS symop-map-generator "$ENV{CLIBD}/syminfo.lib" "$ENV{CLIBD}/symop.lib"
)
endif()
# Sources
@@ -289,18 +267,19 @@ set(project_sources
${PROJECT_SOURCE_DIR}/src/validate.cpp
${PROJECT_SOURCE_DIR}/src/text.cpp
${PROJECT_SOURCE_DIR}/src/utilities.cpp
${PROJECT_SOURCE_DIR}/src/atom_type.cpp
${PROJECT_SOURCE_DIR}/src/compound.cpp
${PROJECT_SOURCE_DIR}/src/point.cpp
${PROJECT_SOURCE_DIR}/src/symmetry.cpp
${PROJECT_SOURCE_DIR}/src/model.cpp
${PROJECT_SOURCE_DIR}/src/pdb/cif2pdb.cpp
${PROJECT_SOURCE_DIR}/src/pdb/pdb2cif.cpp
${PROJECT_SOURCE_DIR}/src/pdb/pdb_record.hpp
${PROJECT_SOURCE_DIR}/src/pdb/pdb2cif_remark_3.hpp
${PROJECT_SOURCE_DIR}/src/pdb/pdb2cif_remark_3.cpp
${PROJECT_SOURCE_DIR}/src/pdb/reconstruct.cpp
${PROJECT_SOURCE_DIR}/src/pdb/validate-pdbx.cpp
)
set(project_headers
@@ -317,32 +296,33 @@ set(project_headers
${PROJECT_SOURCE_DIR}/include/cif++/condition.hpp
${PROJECT_SOURCE_DIR}/include/cif++/category.hpp
${PROJECT_SOURCE_DIR}/include/cif++/row.hpp
${PROJECT_SOURCE_DIR}/include/cif++/atom_type.hpp
${PROJECT_SOURCE_DIR}/include/cif++/compound.hpp
${PROJECT_SOURCE_DIR}/include/cif++/point.hpp
${PROJECT_SOURCE_DIR}/include/cif++/symmetry.hpp
${PROJECT_SOURCE_DIR}/include/cif++/model.hpp
${PROJECT_SOURCE_DIR}/include/cif++/pdb.hpp
${PROJECT_SOURCE_DIR}/include/cif++/pdb/cif2pdb.hpp
${PROJECT_SOURCE_DIR}/include/cif++/pdb/io.hpp
${PROJECT_SOURCE_DIR}/include/cif++/pdb/pdb2cif.hpp
${PROJECT_SOURCE_DIR}/include/cif++/pdb/tls.hpp)
${PROJECT_SOURCE_DIR}/include/cif++/pdb/tls.hpp
)
add_library(cifpp ${project_sources} ${project_headers}
${PROJECT_SOURCE_DIR}/src/symop_table_data.hpp)
add_library(cifpp ${project_sources} ${project_headers} ${PROJECT_SOURCE_DIR}/src/symop_table_data.hpp)
add_library(cifpp::cifpp ALIAS cifpp)
set(CMAKE_DEBUG_POSTFIX d)
set_target_properties(cifpp PROPERTIES DEBUG_POSTFIX "d")
generate_export_header(cifpp EXPORT_FILE_NAME
${PROJECT_SOURCE_DIR}/include/cif++/exports.hpp)
generate_export_header(cifpp EXPORT_FILE_NAME ${PROJECT_SOURCE_DIR}/include/cif++/exports.hpp)
if(BOOST_REGEX)
target_compile_definitions(cifpp PRIVATE USE_BOOST_REGEX=1
BOOST_REGEX_STANDALONE=1)
get_target_property(BOOST_REGEX_INCLUDE_DIR Boost::regex
INTERFACE_INCLUDE_DIRECTORIES)
target_compile_definitions(cifpp PRIVATE USE_BOOST_REGEX=1 BOOST_REGEX_STANDALONE=1)
get_target_property(BOOST_REGEX_INCLUDE_DIR Boost::regex INTERFACE_INCLUDE_DIRECTORIES)
endif()
if(MSVC)
@@ -351,14 +331,20 @@ endif()
set_target_properties(cifpp PROPERTIES POSITION_INDEPENDENT_CODE ON)
target_include_directories(
cifpp
PUBLIC "$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>"
target_include_directories(cifpp
PUBLIC
"$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>"
"$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>"
PRIVATE "${BOOST_REGEX_INCLUDE_DIR}" "${EIGEN_INCLUDE_DIR}")
PRIVATE
"${EIGEN_INCLUDE_DIR}"
"${BOOST_REGEX_INCLUDE_DIR}"
)
target_link_libraries(cifpp PUBLIC Threads::Threads ZLIB::ZLIB
${CIFPP_REQUIRED_LIBRARIES})
target_link_libraries(cifpp PUBLIC Threads::Threads ZLIB::ZLIB ${CIFPP_REQUIRED_LIBRARIES})
if(${EIGEN3_D})
add_dependencies(cifpp ${EIGEN3_D})
endif()
if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
target_link_options(cifpp PRIVATE -undefined dynamic_lookup)
@@ -366,7 +352,7 @@ endif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
if(CIFPP_DOWNLOAD_CCD)
# download the components.cif file from CCD
set(COMPONENTS_CIF ${PROJECT_SOURCE_DIR}/rsrc/components.cif)
set(COMPONENTS_CIF ${PROJECT_SOURCE_DIR}/data/components.cif)
if(EXISTS ${COMPONENTS_CIF})
file(SIZE ${COMPONENTS_CIF} CCD_FILE_SIZE)
@@ -378,88 +364,70 @@ if(CIFPP_DOWNLOAD_CCD)
endif()
if(NOT EXISTS ${COMPONENTS_CIF})
# Since the file(DOWNLOAD) command in cmake does not use compression, we try
# to download the gzipped version and decompress it ourselves.
if(NOT EXISTS ${PROJECT_SOURCE_DIR}/data)
file(MAKE_DIRECTORY ${PROJECT_SOURCE_DIR}/data/)
endif()
# Since the file(DOWNLOAD) command in cmake does not use
# compression, we try to download the gzipped version and
# decompress it ourselves.
find_program(GUNZIP gunzip)
if(WIN32 OR GUNZIP STREQUAL "GUNZIP-NOTFOUND")
file(
DOWNLOAD https://files.wwpdb.org/pub/pdb/data/monomers/components.cif
${COMPONENTS_CIF}
SHOW_PROGRESS
STATUS CCD_FETCH_STATUS)
else()
if(GUNZIP)
if(NOT EXISTS "${COMPONENTS_CIF}.gz")
file(
DOWNLOAD
https://files.wwpdb.org/pub/pdb/data/monomers/components.cif.gz
${COMPONENTS_CIF}.gz
SHOW_PROGRESS
STATUS CCD_FETCH_STATUS)
file(DOWNLOAD https://files.wwpdb.org/pub/pdb/data/monomers/components.cif.gz ${COMPONENTS_CIF}.gz
SHOW_PROGRESS STATUS CCD_FETCH_STATUS)
endif()
add_custom_command(
OUTPUT ${COMPONENTS_CIF}
COMMAND "${GUNZIP}" ${COMPONENTS_CIF}.gz
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/rsrc/)
add_custom_target(COMPONENTS ALL DEPENDS ${COMPONENTS_CIF})
add_custom_command(OUTPUT ${COMPONENTS_CIF}
COMMAND ${GUNZIP} ${COMPONENTS_CIF}.gz
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/data/)
else()
file(DOWNLOAD https://files.wwpdb.org/pub/pdb/data/monomers/components.cif ${COMPONENTS_CIF}
SHOW_PROGRESS STATUS CCD_FETCH_STATUS)
endif()
# Do not continue if downloading went wrong
list(POP_FRONT CCD_FETCH_STATUS CCD_FETCH_STATUS_CODE)
if(CCD_FETCH_STATUS_CODE)
message(
FATAL_ERROR "Error trying to download CCD file: ${CCD_FETCH_STATUS}")
message(FATAL_ERROR "Error trying to download CCD file: ${CCD_FETCH_STATUS}")
endif()
endif()
add_custom_target(COMPONENTS ALL DEPENDS ${COMPONENTS_CIF})
endif()
# Installation directories
if(BUILD_FOR_CCP4)
set(CIFPP_DATA_DIR
"$ENV{CCP4}/share/libcifpp"
CACHE PATH "Directory where dictionary and other static data is stored")
set(CIFPP_DATA_DIR "$ENV{CCP4}/share/libcifpp" CACHE PATH "Directory where dictionary and other static data is stored")
else()
set(CIFPP_DATA_DIR
"${CMAKE_INSTALL_FULL_DATADIR}/libcifpp"
CACHE PATH "Directory where dictionary and other static data is stored")
set(CIFPP_DATA_DIR "${CMAKE_INSTALL_FULL_DATADIR}/libcifpp" CACHE PATH "Directory where dictionary and other static data is stored")
endif()
if(CIFPP_DATA_DIR)
target_compile_definitions(cifpp PUBLIC DATA_DIR="${CIFPP_DATA_DIR}")
endif()
target_compile_definitions(cifpp PUBLIC DATA_DIR="${CIFPP_DATA_DIR}")
if(UNIX AND NOT BUILD_FOR_CCP4)
if("${CMAKE_INSTALL_PREFIX}" STREQUAL "/usr/local")
set(CIFPP_CACHE_DIR
"/var/cache/libcifpp"
CACHE PATH "The directory where downloaded data files are stored")
set(CIFPP_CACHE_DIR "/var/cache/libcifpp" CACHE PATH "The directory where downloaded data files are stored")
else()
set(CIFPP_CACHE_DIR
"${CMAKE_INSTALL_FULL_LOCALSTATEDIR}/cache/libcifpp"
CACHE PATH "The directory where downloaded data files are stored")
set(CIFPP_CACHE_DIR "${CMAKE_INSTALL_FULL_LOCALSTATEDIR}/cache/libcifpp" CACHE PATH "The directory where downloaded data files are stored")
endif()
target_compile_definitions(cifpp PUBLIC CACHE_DIR="${CIFPP_CACHE_DIR}")
set(CIFPP_ETC_DIR
"${CMAKE_INSTALL_FULL_SYSCONFDIR}"
CACHE PATH "The directory where the update configuration file is stored")
set(CIFPP_ETC_DIR "${CMAKE_INSTALL_FULL_SYSCONFDIR}" CACHE PATH "The directory where the update configuration file is stored")
else()
unset(CIFPP_CACHE_DIR)
endif()
# Install rules
install(
TARGETS cifpp
install(TARGETS cifpp
EXPORT cifpp-targets
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
INCLUDES
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
if(MSVC AND BUILD_SHARED_LIBS)
install(
@@ -474,89 +442,91 @@ file(GLOB OLD_CONFIG_FILES
${CMAKE_INSTALL_FULL_LIBDIR}/cmake/cifpp/cifppTargets*.cmake)
if(OLD_CONFIG_FILES)
message(
STATUS "Installation will remove old config files: ${OLD_CONFIG_FILES}")
message(STATUS "Installation will remove old config files: ${OLD_CONFIG_FILES}")
install(CODE "file(REMOVE ${OLD_CONFIG_FILES})")
endif()
install(
EXPORT cifpp-targets
install(EXPORT cifpp-targets
FILE "cifpp-targets.cmake"
NAMESPACE cifpp::
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cifpp)
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cifpp
)
install(
DIRECTORY include/cif++
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
COMPONENT Devel)
COMPONENT Devel
)
install(
FILES include/cif++.hpp
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
COMPONENT Devel)
COMPONENT Devel
)
if(CIFPP_DATA_DIR)
install(
FILES ${PROJECT_SOURCE_DIR}/rsrc/mmcif_ddl.dic
if(WRITE_DATA_FILES)
install(FILES
${PROJECT_SOURCE_DIR}/rsrc/mmcif_ddl.dic
${PROJECT_SOURCE_DIR}/rsrc/mmcif_pdbx.dic
${PROJECT_SOURCE_DIR}/rsrc/mmcif_ma.dic ${COMPONENTS_CIF}
DESTINATION ${CIFPP_DATA_DIR})
endif()
${PROJECT_SOURCE_DIR}/rsrc/mmcif_ma.dic
${COMPONENTS_CIF}
DESTINATION ${CIFPP_DATA_DIR}
)
if(CIFPP_CACHE_DIR)
install(
FILES ${PROJECT_SOURCE_DIR}/rsrc/mmcif_ddl.dic
${PROJECT_SOURCE_DIR}/rsrc/mmcif_pdbx.dic
${PROJECT_SOURCE_DIR}/rsrc/mmcif_ma.dic ${COMPONENTS_CIF}
DESTINATION ${CIFPP_CACHE_DIR})
if(CIFPP_CACHE_DIR)
install(FILES
${PROJECT_SOURCE_DIR}/rsrc/mmcif_ddl.dic
${PROJECT_SOURCE_DIR}/rsrc/mmcif_pdbx.dic
${PROJECT_SOURCE_DIR}/rsrc/mmcif_ma.dic
${COMPONENTS_CIF}
DESTINATION ${CIFPP_CACHE_DIR}
)
endif()
endif()
set(CONFIG_TEMPLATE_FILE ${PROJECT_SOURCE_DIR}/cmake/cifpp-config.cmake.in)
configure_package_config_file(
${CONFIG_TEMPLATE_FILE} ${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifpp-config.cmake
${CONFIG_TEMPLATE_FILE}
${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifpp-config.cmake
INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cifpp
PATH_VARS CIFPP_DATA_DIR)
PATH_VARS CIFPP_DATA_DIR
)
install(
FILES "${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifpp-config.cmake"
install(FILES
"${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifpp-config.cmake"
"${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifpp-config-version.cmake"
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cifpp
COMPONENT Devel)
COMPONENT Devel
)
set_target_properties(
cifpp
PROPERTIES VERSION ${PROJECT_VERSION}
set_target_properties(cifpp PROPERTIES
VERSION ${PROJECT_VERSION}
SOVERSION "${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}"
INTERFACE_cifpp_MAJOR_VERSION ${PROJECT_VERSION_MAJOR})
set_property(
TARGET cifpp
APPEND
PROPERTY COMPATIBLE_INTERFACE_STRING cifpp_MAJOR_VERSION)
set_property(TARGET cifpp APPEND PROPERTY
COMPATIBLE_INTERFACE_STRING cifpp_MAJOR_VERSION
)
write_basic_package_version_file(
"${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifpp-config-version.cmake"
VERSION ${PROJECT_VERSION}
COMPATIBILITY AnyNewerVersion)
# In case we're included as sub_directory:
if(NOT PROJECT_IS_TOP_LEVEL)
set(CIFPP_SHARE_DIR ${PROJECT_SOURCE_DIR}/rsrc PARENT_SCOPE)
endif()
COMPATIBILITY AnyNewerVersion
)
if(BUILD_TESTING)
# We're using the older version 2 of Catch2
FetchContent_Declare(
Catch2
GIT_REPOSITORY https://github.com/catchorg/Catch2.git
GIT_TAG v2.13.9)
GIT_TAG v2.13.9
)
FetchContent_MakeAvailable(Catch2)
list(
APPEND
CIFPP_tests
list(APPEND CIFPP_tests
unit-v2
unit-3d
format
@@ -564,82 +534,63 @@ if(BUILD_TESTING)
rename-compound
sugar
spinner
validate-pdbx)
)
foreach(CIFPP_TEST IN LISTS CIFPP_tests)
set(CIFPP_TEST "${CIFPP_TEST}-test")
set(CIFPP_TEST_SOURCE "${CMAKE_CURRENT_SOURCE_DIR}/test/${CIFPP_TEST}.cpp")
add_executable(
${CIFPP_TEST} ${CIFPP_TEST_SOURCE}
"${CMAKE_CURRENT_SOURCE_DIR}/test/test-main.cpp")
add_executable(${CIFPP_TEST} ${CIFPP_TEST_SOURCE} "${CMAKE_CURRENT_SOURCE_DIR}/test/test-main.cpp")
target_link_libraries(${CIFPP_TEST} PRIVATE Threads::Threads cifpp::cifpp
Catch2::Catch2)
target_include_directories(${CIFPP_TEST} PRIVATE "${EIGEN_INCLUDE_DIR}")
target_link_libraries(${CIFPP_TEST} PRIVATE Threads::Threads cifpp::cifpp Catch2::Catch2)
target_include_directories(${CIFPP_TEST} PRIVATE ${EIGEN_INCLUDE_DIR})
if(MSVC)
# Specify unwind semantics so that MSVC knowns how to handle exceptions
target_compile_options(${CIFPP_TEST} PRIVATE /EHsc)
endif()
add_custom_target(
"run-${CIFPP_TEST}"
DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/Run${CIFPP_TEST}.touch ${CIFPP_TEST})
add_custom_target("run-${CIFPP_TEST}" DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/Run${CIFPP_TEST}.touch ${CIFPP_TEST})
add_custom_command(
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/Run${CIFPP_TEST}.touch
COMMAND $<TARGET_FILE:${CIFPP_TEST}> --data-dir
${CMAKE_CURRENT_SOURCE_DIR}/test)
COMMAND $<TARGET_FILE:${CIFPP_TEST}> --data-dir ${CMAKE_CURRENT_SOURCE_DIR}/test)
add_test(NAME ${CIFPP_TEST} COMMAND $<TARGET_FILE:${CIFPP_TEST}> --data-dir
${CMAKE_CURRENT_SOURCE_DIR}/test)
add_test(NAME ${CIFPP_TEST}
COMMAND $<TARGET_FILE:${CIFPP_TEST}> --data-dir ${CMAKE_CURRENT_SOURCE_DIR}/test)
endforeach()
endif()
# Optionally install the update scripts for CCD and dictionary files
if(CIFPP_INSTALL_UPDATE_SCRIPT)
if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux" OR ${CMAKE_SYSTEM_NAME} STREQUAL
"GNU")
if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux" OR ${CMAKE_SYSTEM_NAME} STREQUAL "GNU")
if("${CMAKE_INSTALL_PREFIX}" STREQUAL "/usr/local")
set(CIFPP_CRON_DIR
"/etc/cron.weekly"
CACHE PATH "The cron directory, for the update script")
set(CIFPP_CRON_DIR "/etc/cron.weekly" CACHE PATH "The cron directory, for the update script")
else()
set(CIFPP_CRON_DIR
"${CIFPP_ETC_DIR}/cron.weekly"
CACHE PATH "The cron directory, for the update script")
set(CIFPP_CRON_DIR "${CIFPP_ETC_DIR}/cron.weekly" CACHE PATH "The cron directory, for the update script")
endif()
elseif(${CMAKE_SYSTEM_NAME} STREQUAL "FreeBSD")
set(CIFPP_CRON_DIR
"${CIFPP_ETC_DIR}/periodic/weekly"
CACHE PATH "The cron directory, for the update script")
elseif(UNIX) # assume all others are like FreeBSD...
set(CIFPP_CRON_DIR "${CIFPP_ETC_DIR}/periodic/weekly" CACHE PATH "The cron directory, for the update script")
else()
message(FATAL_ERROR "Don't know where to install the update script")
endif()
configure_file(${PROJECT_SOURCE_DIR}/tools/update-libcifpp-data.in
update-libcifpp-data @ONLY)
configure_file(${PROJECT_SOURCE_DIR}/tools/update-libcifpp-data.in update-libcifpp-data @ONLY)
install(
FILES ${CMAKE_CURRENT_BINARY_DIR}/update-libcifpp-data
DESTINATION ${CIFPP_CRON_DIR}
PERMISSIONS OWNER_EXECUTE OWNER_READ GROUP_EXECUTE GROUP_READ WORLD_EXECUTE
WORLD_READ)
PERMISSIONS OWNER_EXECUTE OWNER_READ GROUP_EXECUTE GROUP_READ WORLD_EXECUTE WORLD_READ
)
install(DIRECTORY DESTINATION ${CIFPP_CACHE_DIR})
# a config file, to make it complete
if(NOT EXISTS "${CIFPP_ETC_DIR}/libcifpp.conf")
file(
WRITE ${CMAKE_CURRENT_BINARY_DIR}/libcifpp.conf
[[# Uncomment the next line to enable automatic updates
file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/libcifpp.conf [[# Uncomment the next line to enable automatic updates
# update=true
]])
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libcifpp.conf
DESTINATION "${CIFPP_ETC_DIR}")
install(
CODE "message(\"A configuration file has been written to ${CIFPP_ETC_DIR}/libcifpp.conf, please edit this file to enable automatic updates\")"
)
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libcifpp.conf DESTINATION "${CIFPP_ETC_DIR}")
install(CODE "message(\"A configuration file has been written to ${CIFPP_ETC_DIR}/libcifpp.conf, please edit this file to enable automatic updates\")")
install(DIRECTORY DESTINATION "${CIFPP_ETC_DIR}/libcifpp/cache-update.d")
endif()
@@ -656,7 +607,7 @@ set(CPACK_SOURCE_TGZ ON)
set(CPACK_SOURCE_TBZ2 OFF)
set(CPACK_SOURCE_TXZ OFF)
set(CPACK_SOURCE_TZ OFF)
set(CPACK_SOURCE_IGNORE_FILES "/rsrc/components.cif;/build;/.vscode;/.git")
set(CPACK_SOURCE_IGNORE_FILES "/data/components.cif;/build;/.vscode;/.git")
set(CPACK_PACKAGE_FILE_NAME "${PROJECT_NAME}-${PROJECT_VERSION}")
set(CPACK_SOURCE_PACKAGE_FILE_NAME ${CPACK_PACKAGE_FILE_NAME})
include(CPack)

162
README.md
View File

@@ -1,34 +1,10 @@
[![github CI](https://github.com/pdb-redo/libcifpp/actions/workflows/cmake-multi-platform.yml/badge.svg)](https://github.com/pdb-redo/libcifpp/actions)
[![GitHub License](https://img.shields.io/github/license/pdb-redo/libcifpp)](https://github.com/pdb-redo/libcifpp/LICENSE)
# libcifpp
As the name implies, this library was originally written to work with mmCIF files
using C++ as programming language. The design of this library leanes heavily on
the structure of CIF files. These files can be thought of as a text dump of a
relational databank with, often but not always, a very strict schema describing
the data. These schema's are called dictionaries.
Using information from the content of a mmCIF file and an optional schema,
libcifpp allows you to access the data in the file as a collection of datablock
each containing a collection of categories with rows of data. The categories can
be searched for data using queries written in regular C++ syntax. When a dictionary
was specified, inserted data is checked for validity. Likewise removal of data
may result in cascaded removal of linked data in other categories using
parent/child relationship information.
Since there were still many programs using the legacy PDB format at the time
development started, a layer was added that converts data to and from PDB format
into mmCIF format. This means you can manipulate PDB files as if they were
normal mmCIF files.
Apart from this basic functionality, libcifpp also offers code to help with
symmetry calculations, 3d manipulations and obtaining information from the CCD
[Chemical Component Dictionary](https://www.wwpdb.org/data/ccd).
This library contains code to work with mmCIF and legacy PDB files.
## Documentation
The documentation can be found at [github.io](https://pdb-redo.github.io/libcifpp/)
The documentation can be found at https://www.hekkelman.com/libcifpp-doc/
## Synopsis
@@ -88,138 +64,54 @@ int main(int argc, char *argv[])
You might be able to use libcifpp from a package manager used by your
OS distribution. But most likely this package will be out-of-date.
Therefore it is recommended to build *libcifpp* from code. It is not
hard to do. But it is recommended to read the following instructions
carefully.
hard to do.
### Requirements
The code for this library was written in C++17. You therefore need a
recent compiler to build it. For the development gcc >= 9.4 and clang >= 9.0
recent compiler to build it. For the development gcc 9.4 and clang 9.0
have been used as well as MSVC version 2019.
The other requirement you really need to have installed on your computer
is a version of [CMake](https://cmake.org). For now the minimum version
is 3.16 but that may soon change into a higher version. You should also
install the gui version of CMake to set build options easily, on Debian
I prefer to use the curses version installed with `cmake-curses-gui`.
It is very useful to have [mrc](https://github.com/mhekkel/mrc) available.
However, this is only an option if you use Windows or an operating system
using the ELF executable format (i.e. Linux or FreeBSD). MRC is a resource
compiler that allows including data files into the executable making them
easier to install.
Other libraries you might want to install beforehand are:
Other requirements are:
- [cmake](https://cmake.org) A build tool.
- [mrc](https://github.com/mhekkel/mrc), a resource compiler that
allows including data files into the executable making them easier to
install. Strictly speaking this is optional, but at the expense of
functionality.
- [libeigen](https://eigen.tuxfamily.org/index.php?title=Main_Page), a
library to do amongst others matrix calculations. This usually can be
installed using your package manager, in Debian/Ubuntu it is called
`libeigen3-dev`
- [zlib](https://github.com/madler/zlib), the development version of this
library. On Debian/Ubuntu this is the package `zlib1g-dev`.
- [boost](https://www.boost.org), in Debian/Ubuntu this is `libboost-dev`.
The Boost libraries are only needed in case you are using GCC due to a long
standing bug in GNU's implementation of std::regex. It simply crashes
on the regular expressions used in the mmcif_pdbx dictionary and so
we use the boost regex implementation instead.
- [boost](https://www.boost.org).
The Boost libraries are only needed in case you want to build the test
code or if you are using GCC. That last condition is due to a long
standing bug in the implementation of std::regex. It simply crashes
on the regular expressions used in the mmcif_pdbx dictionary and so
we use the boost regex implementation instead.
### Building
First you need to download the code:
Building the code is as simple as typing:
```console
git clone https://github.com/PDB-REDO/libcifpp.git
git clone https://github.com/PDB-REDO/libcifpp.git --recurse-submodules
cd libcifpp
cmake -S . -B build -DCMAKE_INSTALL_PREFIX=$HOME/.local -DCMAKE_BUILD_TYPE=Release
cmake --build build
cmake --install build
```
You should start by considering where to install libcifpp. If you have
sufficient permissions on your computer you perhaps should use the
default but libcifpp can be configured to be installed anywhere
including e.g. *$HOME/.local*.
Next step is to configure, for this use the CMake gui application. If you
installed the curses version of cmake you can type `ccmake`. On Windows
you can use `cmake-gui.exe`.
To install in the default location:
```console
ccmake -S . -B build
```
To install elsewhere, e.g. *$HOME/.local*:
```console
ccmake -S . -B build -DCMAKE_INSTALL_PREFIX=$HOME/.local
```
In the cmake window, start the configure command (use button or press 'c').
After the first configure step you will see a list of settable options.
Alter these to match your preferences. Most options are self explaining
and contain a description. Some may need a bit more explanation:
- CIFPP_DATA_DIR, this directory will be used to store initial versions
of the mmcif_pdbx dictionary as well as the optional CCD file.
- CIFPP_DOWNLOAD_CCD
The CCD file is huge and perhaps you think you don't
need it. In that case you can leave this OFF. But that will limit the
use cases.
- CIFPP_INSTALL_UPDATE_SCRIPT
The files in CIFPP_DATA_DIR are quickly becoming out of date. On
FreeBSD and Linux you can install a script that updates these files
on a weekly basis.
- CIFPP_CRON_DIR
The directory where the update script is to be installed.
- CIFPP_ETC_DIR
The update script will only work if the file called *libcifpp.conf*
in this *etc* directory will contain an uncommented line with
```console
update=true
```
- CIFPP_CACHE_DIR
When you installed and enabled the update script, new files are
written to this directory.
- CIFPP_RECREATE_SYMOP_DATA
If you had CCP4 sourced into your environment, this option allows
you to recreate the symop data file.
- BUILD_FOR_CCP4
Build a special version of libcifpp to be installed in the CCP4
environment.
After setting these options you can run the configure step again and
then use generate to create the makefiles.
Building and installing is then as simple as:
```console
cmake --build build
cmake --install build
```
If this fails due to lack of permissions, you can try:
```console
sudo cmake --install build
```
This checks out the source code from github, creates a new directory
where cmake stores its files. Run a configure, build the code and then
it installs the library and auxiliary files.
Tests are created by default, and to test the code you can run:
```console
ctest --test-dir build
cmake --build build
ctest --test-dir build
```

View File

@@ -1,11 +1,3 @@
Version 6.1.0
- Add formula weight to entity in pdb2cif
- Change order of categories inside a datablock to match order in file
- Change default order to write out categories in a file based on
parent/child relationship
- Added validate_pdbx and recover_pdbx
- Fixed a serious bug in category_index when moving categories
Version 6.0.0
- Drop the use of CCP4's monomer library for compound information

View File

@@ -2,7 +2,7 @@
include("${CMAKE_CURRENT_LIST_DIR}/cifpp-targets.cmake")
set_and_check(CIFPP_SHARE_DIR "@PACKAGE_CIFPP_DATA_DIR@")
# set_and_check(CIFPP_SHARE_DIR "@PACKAGE_CIFPP_DATA_DIR@")
include(CMakeFindDependencyMacro)

View File

@@ -18,7 +18,7 @@ Loading Resources
No matter where the resource is located, you should always use the single libcifpp API call :cpp:func:`cif::load_resource` to load them. This function returns a *std::istream* wrapped inside a *std::unique_ptr*.
The order in which resources are searched for is:
The order in which resources are search for is:
* Use the resource that was defined by calling :cpp:func:`cif::add_file_resource`
for this name.

View File

@@ -166,22 +166,17 @@ class compound
return m_id == "HOH" or m_id == "H2O" or m_id == "WAT";
}
char one_letter_code() const { return m_one_letter_code; }; ///< Return the one letter code to use in a canonical sequence. If unknown the value '\0' is returned
std::string parent_id() const { return m_parent_id; }; ///< Return the parent id code in case a parent is specified (e.g. MET for MSE)
private:
friend class compound_factory_impl;
friend class local_compound_factory_impl;
compound(cif::datablock &db);
compound(cif::datablock &db, int);
compound(cif::datablock &db, const std::string &id, const std::string &name, const std::string &type, const std::string &group);
std::string m_id;
std::string m_name;
std::string m_type;
std::string m_group;
std::string m_formula;
char m_one_letter_code = 0;
std::string m_parent_id;
float m_formula_weight = 0;
int m_formal_charge = 0;
std::vector<compound_atom> m_atoms;
@@ -219,20 +214,6 @@ class compound_factory
/// Override any previously loaded dictionary with @a inDictFile
void push_dictionary(const std::filesystem::path &inDictFile);
/** @brief Override any previously loaded dictionary with the data in @a file
*
* @note experimental feature
*
* Load the file @a file as a source for compound information. This may
* be e.g. a regular mmCIF file with extra files containing compound
* information.
*
* Be carefull to remove the block again, best use @ref cif::compound_source
* as a stack based object.
*/
void push_dictionary(const file &file);
/// Remove the last pushed dictionary
void pop_dictionary();
@@ -270,35 +251,4 @@ class compound_factory
std::shared_ptr<compound_factory_impl> m_impl;
};
// --------------------------------------------------------------------
/**
* @brief Stack based source for compound info.
*
* Use this class to temporarily add a compound source to the
* compound_factory.
*
* @code{.cpp}
* cif::file f("1cbs-with-custom-rea.cif");
* cif::compound_source cs(f);
*
* auto &cf = cif::compound_factory::instance();
* auto rea_compound = cf.create("REA");
* @endcode
*/
class compound_source
{
public:
compound_source(const cif::file &file)
{
cif::compound_factory::instance().push_dictionary(file);
}
~compound_source()
{
cif::compound_factory::instance().pop_dictionary();
}
};
} // namespace cif

View File

@@ -1115,4 +1115,11 @@ class structure
std::vector<residue> m_non_polymers;
};
// --------------------------------------------------------------------
/// \brief Reconstruct all missing categories for an assumed PDBx file.
/// Some people believe that simply dumping some atom records is enough.
/// \param db The cif::datablock that hopefully contains some valid data
void reconstruct_pdbx(datablock &db);
} // namespace cif::mm

View File

@@ -1,17 +1,17 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
*
* Copyright (c) 2023 NKI/AVL, Netherlands Cancer Institute
*
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
@@ -30,13 +30,13 @@
/**
* @file pdb.hpp
*
*
* This file presents the API to read and write files in the
* legacy and ancient PDB format.
*
*
* The code works on the basis of best effort since it is
* impossible to have correct round trip fidelity.
*
*
*/
namespace cif::pdb
@@ -81,7 +81,7 @@ inline void write(std::ostream &os, const file &f)
/** @brief Write out the data in @a db to file @a file
* in legacy PDB format or mmCIF format, depending on the
* filename extension.
*
*
* If extension of @a file is *.gz* the resulting file will
* be written in gzip compressed format.
*/
@@ -90,7 +90,7 @@ void write(const std::filesystem::path &file, const datablock &db);
/** @brief Write out the data in @a f to file @a file
* in legacy PDB format or mmCIF format, depending on the
* filename extension.
*
*
* If extension of @a file is *.gz* the resulting file will
* be written in gzip compressed format.
*/
@@ -99,34 +99,6 @@ inline void write(const std::filesystem::path &p, const file &f)
write(p, f.front());
}
// --------------------------------------------------------------------
/** \brief Reconstruct all missing categories for an assumed PDBx file.
*
* Some people believe that simply dumping some atom records is enough.
*
* \param file The cif::file that hopefully contains some valid data
* \param dictionary The mmcif dictionary to use
*/
void reconstruct_pdbx(file &pdbx_file, std::string_view dictionary = "mmcif_pdbx");
/** \brief This is an extension to cif::validator, use the logic in common
* PDBx files to see if the file is internally consistent.
*
* This function for now checks if the following categories are consistent:
*
* atom_site -> pdbx_poly_seq_scheme -> entity_poly_seq -> entity_poly -> entity
*
* Use the common \ref cif::VERBOSE flag to turn on diagnostic messages.
*
* \param file The input file
* \param dictionary The mmcif dictionary to use
* \result Returns true if the file was valid and consistent
*/
bool is_valid_pdbx_file(const file &pdbx_file, std::string_view dictionary = "mmcif_pdbx");
// --------------------------------------------------------------------
// Other I/O related routines
@@ -134,7 +106,7 @@ bool is_valid_pdbx_file(const file &pdbx_file, std::string_view dictionary = "mm
*
* The line returned should be compatible with the legacy PDB
* format and is e.g. used in the DSSP program.
*
*
* @param data The datablock to use as source for the requested data
* @param truncate_at The maximum length of the line returned
*/
@@ -144,7 +116,7 @@ std::string get_HEADER_line(const datablock &data, std::string::size_type trunca
*
* The line returned should be compatible with the legacy PDB
* format and is e.g. used in the DSSP program.
*
*
* @param data The datablock to use as source for the requested data
* @param truncate_at The maximum length of the line returned
*/
@@ -154,7 +126,7 @@ std::string get_COMPND_line(const datablock &data, std::string::size_type trunca
*
* The line returned should be compatible with the legacy PDB
* format and is e.g. used in the DSSP program.
*
*
* @param data The datablock to use as source for the requested data
* @param truncate_at The maximum length of the line returned
*/
@@ -164,11 +136,12 @@ std::string get_SOURCE_line(const datablock &data, std::string::size_type trunca
*
* The line returned should be compatible with the legacy PDB
* format and is e.g. used in the DSSP program.
*
*
* @param data The datablock to use as source for the requested data
* @param truncate_at The maximum length of the line returned
*/
std::string get_AUTHOR_line(const datablock &data, std::string::size_type truncate_at = 127);
} // namespace cif::pdb
} // namespace pdbx

View File

@@ -290,13 +290,6 @@ class row_handle
return operator[](get_column_ix(column)).template as<T>();
}
/// \brief Get the value of column @a column cast to type @a T
template <typename T>
T get(std::string_view column) const
{
return operator[](get_column_ix(column)).template as<T>();
}
/// \brief assign each of the columns named in @a values to their respective value
void assign(const std::vector<item> &values)
{

View File

@@ -47,6 +47,7 @@ class row_comparator
{
public:
row_comparator(category &cat)
: m_category(cat)
{
auto cv = cat.get_cat_validator();
@@ -68,13 +69,13 @@ class row_comparator
}
}
int operator()(const category &cat, const row *a, const row *b) const
int operator()(const row *a, const row *b) const
{
assert(a);
assert(b);
row_handle rha(cat, *a);
row_handle rhb(cat, *b);
row_handle rha(m_category, *a);
row_handle rhb(m_category, *b);
int d = 0;
for (const auto &[k, f] : m_comparator)
@@ -91,11 +92,11 @@ class row_comparator
return d;
}
int operator()(const category &cat, const row_initializer &a, const row *b) const
int operator()(const row_initializer &a, const row *b) const
{
assert(b);
row_handle rhb(cat, *b);
row_handle rhb(m_category, *b);
int d = 0;
auto ai = a.begin();
@@ -123,6 +124,7 @@ class row_comparator
using key_comparator = std::tuple<uint16_t, compareFunc>;
std::vector<key_comparator> m_comparator;
category &m_category;
};
// --------------------------------------------------------------------
@@ -133,18 +135,18 @@ class row_comparator
class category_index
{
public:
category_index(category &cat);
category_index(category *cat);
~category_index()
{
delete m_root;
}
row *find(const category &cat, row *k) const;
row *find_by_value(const category &cat, row_initializer k) const;
row *find(row *k) const;
row *find_by_value(row_initializer k) const;
void insert(category &cat, row *r);
void erase(category &cat, row *r);
void insert(row *r);
void erase(row *r);
// reorder the row's and returns new head and tail
std::tuple<row *, row *> reorder()
@@ -190,8 +192,8 @@ class category_index
bool m_red;
};
entry *insert(category &cat, entry *h, row *v);
entry *erase(category &cat, entry *h, row *k);
entry *insert(entry *h, row *v);
entry *erase(entry *h, row *k);
// void validate(entry* h, bool isParentRed, uint32_t blackDepth, uint32_t& minBlack, uint32_t& maxBlack) const;
@@ -322,24 +324,26 @@ class category_index
return result;
}
category &m_category;
row_comparator m_row_comparator;
entry *m_root;
};
category_index::category_index(category &cat)
: m_row_comparator(cat)
category_index::category_index(category *cat)
: m_category(*cat)
, m_row_comparator(m_category)
, m_root(nullptr)
{
for (auto r : cat)
insert(cat, r.get_row());
for (auto r : m_category)
insert(r.get_row());
}
row *category_index::find(const category &cat, row *k) const
row *category_index::find(row *k) const
{
const entry *r = m_root;
while (r != nullptr)
{
int d = m_row_comparator(cat, k, r->m_row);
int d = m_row_comparator(k, r->m_row);
if (d < 0)
r = r->m_left;
else if (d > 0)
@@ -351,14 +355,14 @@ row *category_index::find(const category &cat, row *k) const
return r ? r->m_row : nullptr;
}
row *category_index::find_by_value(const category &cat, row_initializer k) const
row *category_index::find_by_value(row_initializer k) const
{
// sort the values in k first
row_initializer k2;
for (auto &f : cat.key_field_indices())
for (auto &f : m_category.key_field_indices())
{
auto fld = cat.get_column_name(f);
auto fld = m_category.get_column_name(f);
auto ki = find_if(k.begin(), k.end(), [&fld](auto &i) { return i.name() == fld; });
if (ki == k.end())
@@ -370,7 +374,7 @@ row *category_index::find_by_value(const category &cat, row_initializer k) const
const entry *r = m_root;
while (r != nullptr)
{
int d = m_row_comparator(cat, k2, r->m_row);
int d = m_row_comparator(k2, r->m_row);
if (d < 0)
r = r->m_left;
else if (d > 0)
@@ -382,34 +386,34 @@ row *category_index::find_by_value(const category &cat, row_initializer k) const
return r ? r->m_row : nullptr;
}
void category_index::insert(category &cat, row *k)
void category_index::insert(row *k)
{
m_root = insert(cat, m_root, k);
m_root = insert(m_root, k);
m_root->m_red = false;
}
category_index::entry *category_index::insert(category &cat, entry *h, row *v)
category_index::entry *category_index::insert(entry *h, row *v)
{
if (h == nullptr)
return new entry(v);
int d = m_row_comparator(cat, v, h->m_row);
int d = m_row_comparator(v, h->m_row);
if (d < 0)
h->m_left = insert(cat, h->m_left, v);
h->m_left = insert(h->m_left, v);
else if (d > 0)
h->m_right = insert(cat, h->m_right, v);
h->m_right = insert(h->m_right, v);
else
{
row_handle rh(cat, *v);
row_handle rh(m_category, *v);
std::ostringstream os;
for (auto col : cat.key_fields())
for (auto col : m_category.key_fields())
{
if (rh[col])
os << col << ": " << std::quoted(rh[col].text()) << "; ";
}
throw duplicate_key_error("Duplicate Key violation, cat: " + cat.name() + " values: " + os.str());
throw duplicate_key_error("Duplicate Key violation, cat: " + m_category.name() + " values: " + os.str());
}
if (is_red(h->m_right) and not is_red(h->m_left))
@@ -424,25 +428,25 @@ category_index::entry *category_index::insert(category &cat, entry *h, row *v)
return h;
}
void category_index::erase(category &cat, row *k)
void category_index::erase(row *k)
{
assert(find(cat, k) == k);
assert(find(k) == k);
m_root = erase(cat, m_root, k);
m_root = erase(m_root, k);
if (m_root != nullptr)
m_root->m_red = false;
}
category_index::entry *category_index::erase(category &cat, entry *h, row *k)
category_index::entry *category_index::erase(entry *h, row *k)
{
if (m_row_comparator(cat, k, h->m_row) < 0)
if (m_row_comparator(k, h->m_row) < 0)
{
if (h->m_left != nullptr)
{
if (not is_red(h->m_left) and not is_red(h->m_left->m_left))
h = move_red_left(h);
h->m_left = erase(cat, h->m_left, k);
h->m_left = erase(h->m_left, k);
}
}
else
@@ -450,7 +454,7 @@ category_index::entry *category_index::erase(category &cat, entry *h, row *k)
if (is_red(h->m_left))
h = rotateRight(h);
if (m_row_comparator(cat, k, h->m_row) == 0 and h->m_right == nullptr)
if (m_row_comparator(k, h->m_row) == 0 and h->m_right == nullptr)
{
delete h;
return nullptr;
@@ -461,13 +465,13 @@ category_index::entry *category_index::erase(category &cat, entry *h, row *k)
if (not is_red(h->m_right) and not is_red(h->m_right->m_left))
h = move_red_right(h);
if (m_row_comparator(cat, k, h->m_row) == 0)
if (m_row_comparator(k, h->m_row) == 0)
{
h->m_row = find_min(h->m_right)->m_row;
h->m_right = erase_min(h->m_right);
}
else
h->m_right = erase(cat, h->m_right, k);
h->m_right = erase(h->m_right, k);
}
}
@@ -516,7 +520,7 @@ category::category(const category &rhs)
insert_impl(end(), clone_row(*r));
if (m_cat_validator != nullptr and m_index == nullptr)
m_index = new category_index(*this);
m_index = new category_index(this);
}
category::category(category &&rhs)
@@ -560,7 +564,7 @@ category &category::operator=(const category &rhs)
m_cat_validator = rhs.m_cat_validator;
if (m_cat_validator != nullptr and m_index == nullptr)
m_index = new category_index(*this);
m_index = new category_index(this);
}
return *this;
@@ -665,7 +669,7 @@ void category::set_validator(const validator *v, datablock &db)
}
if (missing.empty())
m_index = new category_index(*this);
m_index = new category_index(this);
else
{
std::ostringstream msg;
@@ -778,7 +782,7 @@ bool category::is_valid() const
for (auto r : *this)
{
auto p = r.get_row();
if (m_index->find(*this, p) != p)
if (m_index->find(p) != p)
m_validator->report_error("Key not found in index for category " + m_name, true);
}
}
@@ -900,7 +904,7 @@ row_handle category::operator[](const key_type &key)
if (m_index == nullptr)
throw std::logic_error("Category " + m_name + " does not have an index");
auto row = m_index->find_by_value(*this, key);
auto row = m_index->find_by_value(key);
if (row != nullptr)
result = { *this, *row };
}
@@ -1074,7 +1078,7 @@ category::iterator category::erase(iterator pos)
throw std::runtime_error("erase");
if (m_index != nullptr)
m_index->erase(*this, r);
m_index->erase(r);
if (r == m_head)
{
@@ -1246,14 +1250,12 @@ std::string category::get_unique_id(std::function<std::string(int)> generator)
std::string id_tag = "id";
if (m_cat_validator != nullptr and m_cat_validator->m_keys.size() == 1)
{
id_tag = m_cat_validator->m_keys.front();
if (m_index == nullptr and m_cat_validator != nullptr)
m_index = new category_index(*this);
m_index = new category_index(this);
for (;;)
{
if (m_index->find_by_value(*this, {{ id_tag, result }}) == nullptr)
if (m_index->find_by_value({{ id_tag, result }}) == nullptr)
break;
result = generator(static_cast<int>(m_last_unique_num++));
}
@@ -1405,7 +1407,7 @@ void category::update_value(row *row, uint16_t column, std::string_view value, b
{
// make sure we have an index, if possible
if (m_index == nullptr and m_cat_validator != nullptr)
m_index = new category_index(*this);
m_index = new category_index(this);
auto &col = m_columns[column];
@@ -1431,9 +1433,9 @@ void category::update_value(row *row, uint16_t column, std::string_view value, b
if (updateLinked and // an update of an Item's value
m_index != nullptr and key_field_indices().count(column))
{
reinsert = m_index->find(*this, row);
reinsert = m_index->find(row);
if (reinsert)
m_index->erase(*this, row);
m_index->erase(row);
}
// first remove old value with cix
@@ -1444,7 +1446,7 @@ void category::update_value(row *row, uint16_t column, std::string_view value, b
row->append(column, { value });
if (reinsert)
m_index->insert(*this, row);
m_index->insert(row);
// see if we need to update any child categories that depend on this value
auto iv = col.m_validator;
@@ -1600,7 +1602,7 @@ row_handle category::create_copy(row_handle r)
category::iterator category::insert_impl(const_iterator pos, row *n)
{
if (m_index == nullptr and m_cat_validator != nullptr)
m_index = new category_index(*this);
m_index = new category_index(this);
assert(n != nullptr);
assert(n->m_next == nullptr);
@@ -1640,7 +1642,7 @@ category::iterator category::insert_impl(const_iterator pos, row *n)
}
if (m_index != nullptr)
m_index->insert(*this, n);
m_index->insert(n);
// insert at end, most often this is the case
if (pos.m_current == nullptr)

View File

@@ -136,17 +136,14 @@ compound::compound(cif::datablock &db)
if (chemComp.size() != 1)
throw std::runtime_error("Invalid compound file, chem_comp should contain a single row");
std::string one_letter_code;
cif::tie(m_id, m_name, m_type, m_formula, m_formula_weight, m_formal_charge, one_letter_code, m_parent_id) =
chemComp.front().get("id", "name", "type", "formula", "formula_weight", "pdbx_formal_charge", "one_letter_code", "mon_nstd_parent_comp_id");
if (one_letter_code.length() == 1)
m_one_letter_code = one_letter_code.front();
cif::tie(m_id, m_name, m_type, m_formula, m_formula_weight, m_formal_charge) =
chemComp.front().get("id", "name", "type", "formula", "formula_weight", "pdbx_formal_charge");
// The name should not contain newline characters since that triggers validation errors later on
cif::replace_all(m_name, "\n", "");
m_group = "non-polymer";
auto &chemCompAtom = db["chem_comp_atom"];
for (auto row : chemCompAtom)
{
@@ -156,9 +153,6 @@ compound::compound(cif::datablock &db)
row.get("atom_id", "type_symbol", "charge", "pdbx_aromatic_flag", "pdbx_leaving_atom_flag", "pdbx_stereo_config",
"model_Cartn_x", "model_Cartn_y", "model_Cartn_z");
atom.type_symbol = atom_type_traits(type_symbol).type();
if (stereo_config.empty())
atom.stereo_config = stereo_config_type::N;
else
atom.stereo_config = parse_stereo_config_from_string(stereo_config);
m_atoms.push_back(std::move(atom));
}
@@ -169,28 +163,17 @@ compound::compound(cif::datablock &db)
compound_bond bond;
std::string valueOrder;
cif::tie(bond.atom_id[0], bond.atom_id[1], valueOrder, bond.aromatic, bond.stereo_config) = row.get("atom_id_1", "atom_id_2", "value_order", "pdbx_aromatic_flag", "pdbx_stereo_config");
if (valueOrder.empty())
bond.type = bond_type::sing;
else
bond.type = parse_bond_type_from_string(valueOrder);
m_bonds.push_back(std::move(bond));
}
}
compound::compound(cif::datablock &db, int)
compound::compound(cif::datablock &db, const std::string &id, const std::string &name, const std::string &type, const std::string &group)
: m_id(id)
, m_name(name)
, m_type(type)
, m_group(group)
{
auto &chemComp = db["chem_comp"];
if (chemComp.size() != 1)
throw std::runtime_error("Invalid compound file, chem_comp should contain a single row");
cif::tie(m_id, m_name) =
chemComp.front().get("id", "name");
cif::trim(m_name);
m_type = "NON-POLYMER";
auto &chemCompAtom = db["chem_comp_atom"];
for (auto row : chemCompAtom)
{
@@ -201,6 +184,7 @@ compound::compound(cif::datablock &db, int)
atom.type_symbol = atom_type_traits(type_symbol).type();
m_formal_charge += atom.charge;
m_formula_weight += atom_type_traits(atom.type_symbol).weight();
m_atoms.push_back(std::move(atom));
}
@@ -225,39 +209,11 @@ compound::compound(cif::datablock &db, int)
else
{
if (cif::VERBOSE > 0)
std::cerr << "Unimplemented chem_comp_bond.type " << btype << " in " << db.name() << '\n';
std::cerr << "Unimplemented chem_comp_bond.type " << btype << " in " << id << '\n';
bond.type = bond_type::sing;
}
m_bonds.push_back(std::move(bond));
}
// reconstruct a formula and weight
m_formula_weight = 0;
std::map<atom_type, int> f;
for (auto &atom : m_atoms)
f[atom.type_symbol] += 1;
if (f.count(atom_type::C))
{
atom_type_traits att(atom_type::C);
m_formula += att.symbol() + std::to_string(f[atom_type::C]) + ' ';
m_formula_weight += att.weight() * f[atom_type::C];
}
for (const auto &[type, count] : f)
{
if (type == atom_type::C)
continue;
atom_type_traits att(type);
m_formula += att.symbol() + std::to_string(count) + ' ';
m_formula_weight += att.weight() * count;
}
if (not m_formula.empty())
m_formula.pop_back();
}
compound_atom compound::get_atom_by_atom_id(const std::string &atom_id) const
@@ -304,12 +260,13 @@ float compound::bond_length(const std::string &atomId_1, const std::string &atom
auto a = get_atom_by_atom_id(atomId_1);
auto b = get_atom_by_atom_id(atomId_2);
result = distance(point{ a.x, a.y, a.z }, point{ b.x, b.y, b.z });
result = distance(point{a.x, a.y, a.z}, point{b.x, b.y, b.z});
}
return result;
}
// --------------------------------------------------------------------
// known amino acids and bases
@@ -359,7 +316,7 @@ class compound_factory_impl : public std::enable_shared_from_this<compound_facto
compound_factory_impl();
compound_factory_impl(const fs::path &file, std::shared_ptr<compound_factory_impl> next);
virtual ~compound_factory_impl()
~compound_factory_impl()
{
for (auto c : m_compounds)
delete c;
@@ -416,15 +373,13 @@ class compound_factory_impl : public std::enable_shared_from_this<compound_facto
os << "CCD components.cif resource\n";
else
os << "CCD components file: " << std::quoted(m_file.string()) << '\n';
if (m_next)
m_next->describe(os);
}
protected:
compound_factory_impl(std::shared_ptr<compound_factory_impl> next);
virtual compound *create(const std::string &id);
private:
compound *create(const std::string &id);
std::shared_timed_mutex mMutex;
@@ -440,15 +395,10 @@ compound_factory_impl::compound_factory_impl()
{
}
compound_factory_impl::compound_factory_impl(std::shared_ptr<compound_factory_impl> next)
: m_next(next)
{
}
compound_factory_impl::compound_factory_impl(const fs::path &file, std::shared_ptr<compound_factory_impl> next)
: compound_factory_impl(next)
: m_file(file)
, m_next(next)
{
m_file = file;
}
compound *compound_factory_impl::create(const std::string &id)
@@ -526,45 +476,6 @@ compound *compound_factory_impl::create(const std::string &id)
// --------------------------------------------------------------------
class local_compound_factory_impl : public compound_factory_impl
{
public:
local_compound_factory_impl(const cif::file &file, std::shared_ptr<compound_factory_impl> next)
: compound_factory_impl(next)
, m_local_file(file)
{
}
compound *create(const std::string &id) override;
private:
const cif::file &m_local_file;
};
compound *local_compound_factory_impl::create(const std::string &id)
{
compound *result = nullptr;
for (auto &db : m_local_file)
{
if (db.name() == "comp_" + id)
{
cif::datablock db_copy(db);
result = new compound(db_copy, 1);
std::shared_lock lock(mMutex);
m_compounds.push_back(result);
break;
}
}
return result;
}
// --------------------------------------------------------------------
std::unique_ptr<compound_factory> compound_factory::s_instance;
thread_local std::unique_ptr<compound_factory> compound_factory::tl_instance;
bool compound_factory::s_use_thread_local_instance;
@@ -642,18 +553,6 @@ void compound_factory::push_dictionary(const fs::path &inDictFile)
}
}
void compound_factory::push_dictionary(const cif::file &inDictFile)
{
try
{
m_impl.reset(new local_compound_factory_impl(inDictFile, m_impl));
}
catch (const std::exception &)
{
std::throw_with_nested(std::runtime_error("Error loading dictionary from local mmCIF file"));
}
}
void compound_factory::pop_dictionary()
{
if (m_impl)
@@ -685,26 +584,25 @@ void compound_factory::report_missing_compound(const std::string &compound_id)
{
using namespace cif::colour;
std::clog << "\n"
<< cif::coloured("Configuration error:", white, red) << "\n\n"
std::clog << "\n" << cif::coloured("Configuration error:", white, red) << "\n\n"
<< "The attempt to retrieve compound information for " << std::quoted(compound_id) << " failed.\n\n"
<< "This information is searched for in a CCD file called components.cif or\n"
<< "components.cif.gz which should be located in one of the following directories:\n\n";
cif::list_data_directories(std::clog);
std::clog << "\n(Note that you can add a directory to the search paths by setting the \n"
<< "LIBCIFPP_DATA_DIR environmental variable)\n\n";
#if defined(CACHE_DIR)
#if defined(CACHE_DIR)
std::clog << "On Linux an optional cron script might have been installed that automatically updates\n"
<< "components.cif and mmCIF dictionary files. This script only works when the file\n"
<< "libcifpp.conf contains an uncommented line with the text:\n\n"
<< "update=true\n\n"
<< "If you do not have a working cron script, you can manually update the files\n"
<< "in /var/cache/libcifpp using the following commands:\n\n"
<< "curl -o " << CACHE_DIR << "/components.cif https://files.wwpdb.org/pub/pdb/data/monomers/components.cif.gz\n"
<< "curl -o " << CACHE_DIR << "/mmcif_pdbx.dic https://mmcif.wwpdb.org/dictionaries/ascii/mmcif_pdbx_v50.dic.gz\n"
<< "curl -o " << CACHE_DIR << "/components.cif https://ftp.wwpdb.org/pub/pdb/data/monomers/components.cif.gz\n"
<< "curl -o " << CACHE_DIR << "/mmcif_pdbx.dic https://mmcif.wwpdb.org/dictionaries/ascii/mmcif_pdbx_v50.dic.gz\n"
<< "curl -o " << CACHE_DIR << "/mmcif_ma.dic https://github.com/ihmwg/ModelCIF/raw/master/dist/mmcif_ma.dic\n\n";
#endif
@@ -715,9 +613,9 @@ void compound_factory::report_missing_compound(const std::string &compound_id)
}
else
std::clog << "No compound factory objects are created since none of the data sources is found.\n";
cif::list_file_resources(std::clog);
std::clog.flush();
}
}

View File

@@ -91,7 +91,7 @@ bool datablock::validate_links() const
for (auto &cat : *this)
result = cat.validate_links() and result;
return result;
}
@@ -158,12 +158,11 @@ std::tuple<datablock::iterator, bool> datablock::emplace(std::string_view name)
if (is_new)
{
auto &c = emplace_back(name);
auto &c = emplace_front(name);
c.set_validator(m_validator, *this);
}
assert(end() != begin());
return std::make_tuple(std::prev(end()), is_new);
return std::make_tuple(begin(), is_new);
}
std::vector<std::string> datablock::get_tag_order() const
@@ -172,16 +171,14 @@ std::vector<std::string> datablock::get_tag_order() const
// for entry and audit_conform on top
auto ci = find_if(begin(), end(), [](const category &cat)
{ return cat.name() == "entry"; });
auto ci = find_if(begin(), end(), [](const category &cat) { return cat.name() == "entry"; });
if (ci != end())
{
auto cto = ci->get_tag_order();
result.insert(result.end(), cto.begin(), cto.end());
}
ci = find_if(begin(), end(), [](const category &cat)
{ return cat.name() == "audit_conform"; });
ci = find_if(begin(), end(), [](const category &cat) { return cat.name() == "audit_conform"; });
if (ci != end())
{
auto cto = ci->get_tag_order();
@@ -199,131 +196,42 @@ std::vector<std::string> datablock::get_tag_order() const
return result;
}
namespace
{
using elem_t = std::tuple<std::string, int, bool>;
using cat_order_t = std::vector<elem_t>;
using iter_t = cat_order_t::iterator;
inline int get_count(iter_t i)
{
return std::get<1>(*i);
}
inline bool is_on_stack(iter_t i)
{
return std::get<2>(*i);
}
void calculate_cat_order(cat_order_t &cat_order, iter_t i, const validator &validator)
{
if (i == cat_order.end() or get_count(i) >= 0)
return;
auto &&[cat, count, on_stack] = *i;
on_stack = true;
int parent_count = 0;
for (auto link : validator.get_links_for_child(cat))
{
auto ei = std::find_if(cat_order.begin(), cat_order.end(), [parent = link->m_parent_category](elem_t &a)
{ return std::get<0>(a) == parent; });
if (ei == cat_order.end())
continue;
if (not is_on_stack(ei))
calculate_cat_order(cat_order, ei, validator);
parent_count += get_count(ei);
}
count = parent_count + 1;
}
} // namespace
void datablock::write(std::ostream &os) const
{
os << "data_" << m_name << '\n'
<< "# \n";
if (m_validator and size() > 0)
// mmcif support, sort of. First write the 'entry' Category
// and if it exists, _AND_ we have a Validator, write out the
// audit_conform record.
for (auto &cat : *this)
{
// If the dictionary declares an audit_conform category, put it in,
// but only if it does not exist already!
if (get("audit_conform") == nullptr and m_validator->get_validator_for_category("audit_conform") != nullptr)
{
category auditConform("audit_conform");
auditConform.emplace({ { "dict_name", m_validator->name() },
{ "dict_version", m_validator->version() } });
auditConform.write(os);
}
if (cat.name() != "entry")
continue;
// base order on parent child relationships, parents first
cat.write(os);
cat_order_t cat_order;
for (auto &cat : *this)
cat_order.emplace_back(cat.name(), -1, false);
for (auto i = cat_order.begin(); i != cat_order.end(); ++i)
calculate_cat_order(cat_order, i, *m_validator);
std::sort(cat_order.begin(), cat_order.end(), [](const elem_t &a, const elem_t &b)
{
const auto &[cat_a, count_a, on_stack_a] = a;
const auto &[cat_b, count_b, on_stack_b] = b;
int d = 0;
if (cat_a == "audit_conform")
d = -1;
else if (cat_b == "audit_conform")
d = 1;
else if (cat_a == "entry")
d = -1;
else if (cat_b == "entry")
d = 1;
else
{
d = std::get<1>(a) - std::get<1>(b);
if (d == 0)
d = cat_b.compare(cat_a);
}
return d < 0; });
for (auto &&[cat, count, on_stack] : cat_order)
get(cat)->write(os);
break;
}
else
// If the dictionary declares an audit_conform category, put it in,
// but only if it does not exist already!
if (get("audit_conform"))
get("audit_conform")->write(os);
else if (m_validator != nullptr and m_validator->get_validator_for_category("audit_conform") != nullptr)
{
// mmcif support, sort of. First write the 'entry' Category
// and if it exists, _AND_ we have a Validator, write out the
// audit_conform record.
for (auto &cat : *this)
{
if (cat.name() != "entry")
continue;
category auditConform("audit_conform");
auditConform.emplace({
{"dict_name", m_validator->name()},
{"dict_version", m_validator->version()}});
auditConform.write(os);
}
for (auto &cat : *this)
{
if (cat.name() != "entry" and cat.name() != "audit_conform")
cat.write(os);
break;
}
// If the dictionary declares an audit_conform category, put it in,
// but only if it does not exist already!
if (get("audit_conform"))
get("audit_conform")->write(os);
for (auto &cat : *this)
{
if (cat.name() != "entry" and cat.name() != "audit_conform")
cat.write(os);
}
}
}
@@ -429,7 +337,7 @@ bool datablock::operator==(const datablock &rhs) const
++catA_i;
else
{
if (not(*dbA.get(*catA_i) == *dbB.get(*catB_i)))
if (not (*dbA.get(*catA_i) == *dbB.get(*catB_i)))
return false;
++catA_i;
++catB_i;
@@ -439,4 +347,4 @@ bool datablock::operator==(const datablock &rhs) const
return true;
}
} // namespace cif
} // namespace cif::cif

View File

@@ -173,12 +173,11 @@ std::tuple<file::iterator, bool> file::emplace(std::string_view name)
if (is_new)
{
auto &db = emplace_back(name);
auto &db = emplace_front(name);
db.set_validator(m_validator);
}
assert(begin() != end());
return std::make_tuple(std::prev(end()), is_new);
return std::make_tuple(begin(), is_new);
}
void file::load(const std::filesystem::path &p)

View File

@@ -2006,10 +2006,7 @@ void structure::change_residue(residue &res, const std::string &newCompound,
continue;
if (a2.empty() or a2 == ".")
{
i->set_property("label_comp_id", newCompound);
remove_atom(*i);
}
else if (a1 != a2)
{
auto ra = r.front();
@@ -2836,4 +2833,15 @@ void structure::validate_atoms() const
assert(atoms.empty());
}
// --------------------------------------------------------------------
void reconstruct_pdbx(datablock &db)
{
if (db.get("atom_site") == nullptr)
throw std::runtime_error("Cannot reconstruct PDBx file, atom data missing");
assert(false);
throw std::runtime_error("not implemented yet");
}
} // namespace pdbx

File diff suppressed because it is too large Load Diff

View File

@@ -1,561 +0,0 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2024 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "cif++.hpp"
// --------------------------------------------------------------------
namespace cif::pdb
{
void checkAtomRecords(datablock &db)
{
using namespace literals;
auto &cf = compound_factory::instance();
auto &atom_site = db["atom_site"];
auto &atom_type = db["atom_type"];
auto &chem_comp = db["chem_comp"];
for (auto row : atom_site)
{
const auto &[symbol, label_asym_id, auth_asym_id, label_comp_id, auth_comp_id, label_seq_id, auth_seq_id, label_atom_id, auth_atom_id] =
row.get<std::string, std::optional<std::string>, std::optional<std::string>, std::optional<std::string>, std::optional<std::string>,
std::optional<int>, std::optional<std::string>, std::optional<std::string>, std::optional<std::string>>(
"type_symbol", "label_asym_id", "auth_asym_id", "label_comp_id", "auth_comp_id", "label_seq_id", "auth_seq_id", "label_atom_id", "auth_atom_id");
if (symbol.empty())
throw std::runtime_error("Missing type symbol in atom_site record");
if (atom_type.count("symbol"_key == symbol) == 0)
atom_type.emplace({ { "symbol", symbol } });
if (not(label_asym_id.has_value() or auth_asym_id.has_value()))
throw std::runtime_error("atom_site records does not have a label_asym_id nor an auth_asym_id, cannot continue");
if (not(label_comp_id.has_value() or auth_comp_id.has_value()))
throw std::runtime_error("atom_site records does not have a label_comp_id nor an auth_comp_id, cannot continue");
if (not(label_atom_id.has_value() or auth_atom_id.has_value()))
throw std::runtime_error("atom_site records does not have a label_atom_id nor an auth_atom_id, cannot continue");
std::string asym_id = label_asym_id.value_or(*auth_asym_id);
std::string comp_id = label_comp_id.value_or(*auth_comp_id);
bool is_peptide = cf.is_known_peptide(comp_id);
auto compound = cf.create(comp_id);
if (not compound)
throw std::runtime_error("Missing compound information for " + comp_id);
std::string mon_nstd_flag(".");
if (is_peptide)
{
if (compound_factory::kAAMap.find(comp_id) != compound_factory::kAAMap.end())
mon_nstd_flag = "y";
else
mon_nstd_flag = "n";
}
auto chem_comp_entry = chem_comp.find_first("id"_key == comp_id);
if (not chem_comp_entry)
{
chem_comp.emplace({ //
{ "id", comp_id },
{ "type", compound->type() },
{ "mon_nstd_flag", mon_nstd_flag },
{ "name", compound->name() },
{ "formula", compound->formula() },
{ "formula_weight", compound->formula_weight() } });
}
else
{
std::vector<item> items;
if (not chem_comp_entry["type"])
items.emplace_back(item{ "type", compound->type() });
if (not chem_comp_entry["mon_nstd_flag"])
items.emplace_back(item{ "mon_nstd_flag", mon_nstd_flag });
if (not chem_comp_entry["name"])
items.emplace_back(item{ "name", compound->name() });
if (not chem_comp_entry["formula"])
items.emplace_back(item{ "formula", compound->formula() });
if (not chem_comp_entry["formula_weight"])
items.emplace_back(item{ "formula_weight", compound->formula_weight() });
if (not items.empty())
chem_comp_entry.assign(std::move(items));
}
if (is_peptide and not(label_seq_id.has_value() or auth_seq_id.has_value()))
throw std::runtime_error("atom_site record has peptide comp_id but no sequence number, cannot continue");
std::string seq_id;
if (label_seq_id.has_value())
seq_id = std::to_string(*label_seq_id);
else if (auth_seq_id.has_value())
seq_id = *auth_seq_id;
row.assign({ //
{ "auth_asym_id", auth_asym_id.value_or(*label_asym_id) },
{ "auth_seq_id", auth_seq_id.value_or(std::to_string(*label_seq_id)) },
{ "auth_comp_id", auth_comp_id.value_or(*label_comp_id) },
{ "auth_atom_id", auth_atom_id.value_or(*label_atom_id) } });
}
}
void createStructAsym(datablock &db)
{
auto &atom_site = db["atom_site"];
auto &struct_asym = db["struct_asym"];
for (auto label_asym_id : atom_site.rows<std::string>("label_asym_id"))
{
if (label_asym_id.empty())
throw std::runtime_error("File contains atom_site records without a label_asym_id");
if (struct_asym.count(key("id") == label_asym_id) == 0)
{
struct_asym.emplace({ //
{ "id", label_asym_id } });
}
}
}
void createEntity(datablock &db)
{
using namespace literals;
auto &cf = compound_factory::instance();
auto &atom_site = db["atom_site"];
atom_site.add_column("label_entity_id");
auto &struct_asym = db["struct_asym"];
struct_asym.add_column("entity_id");
std::map<std::string,std::vector<std::tuple<std::string,int>>> asyms;
for (auto asym_id : db["struct_asym"].rows<std::string>("id"))
{
int last_seq_id = -1;
for (const auto &[comp_id, seq_id] : atom_site.find<std::string,int>("label_asym_id"_key == asym_id, "label_comp_id", "label_seq_id"))
{
if (seq_id == last_seq_id)
continue;
last_seq_id = seq_id;
asyms[asym_id].emplace_back(comp_id, last_seq_id);
}
}
auto less = [](const std::vector<std::tuple<std::string,int>> &a, const std::vector<std::tuple<std::string,int>> &b)
{
int d = static_cast<int>(a.size()) - static_cast<int>(b.size());
return d == 0 ? a > b : d > 0;
};
std::set<std::vector<std::tuple<std::string,int>>,decltype(less)> entities(less);
for (const auto &[asym_id, content] : asyms)
entities.emplace(content);
auto water_weight = cf.create("HOH")->formula_weight();
int poly_count = 0;
auto &entity = db["entity"];
for (auto &content : entities)
{
auto entity_id = entity.get_unique_id("");
std::string type, desc;
float weight = 0;
int count = 0;
auto first_comp_id = std::get<0>(content.front());
if (first_comp_id == "HOH")
{
type = "water";
desc = "water";
weight = water_weight;
}
else if (content.size() == 1)
{
auto c = cf.create(first_comp_id);
type = "non-polymer";
desc = c->name();
weight = c->formula_weight();
}
else
{
type = "polymer";
desc = "polymer-" + std::to_string(++poly_count);
weight = water_weight;
for (const auto &[comp_id, seq_id] : content)
weight += cf.create(comp_id)->formula_weight() - water_weight;
}
for (const auto &[asym_id, ac] : asyms)
{
if (ac != content)
continue;
atom_site.update_value("label_asym_id"_key == asym_id, "label_entity_id", entity_id);
struct_asym.update_value("id"_key == asym_id, "entity_id", entity_id);
if (type != "water")
++count;
else
count = atom_site.count("label_asym_id"_key == asym_id and "label_atom_id"_key == "O");
}
entity.emplace({ //
{ "id", entity_id },
{ "type", type },
{ "pdbx_description", desc },
{ "formula_weight", weight },
{ "pdbx_number_of_molecules", count }
});
}
}
void createEntityPoly(datablock &db)
{
using namespace literals;
auto &cf = compound_factory::instance();
auto &atom_site = db["atom_site"];
auto &entity_poly = db["entity_poly"];
for (auto entity_id : db["entity"].find<std::string>("type"_key == "polymer", "id"))
{
std::string type;
int last_seq_id = -1;
std::string seq, seq_can;
bool non_std_monomer = false;
bool non_std_linkage = false;
std::string pdb_strand_id;
for (const auto &[comp_id, seq_id, auth_asym_id] : atom_site.find<std::string,int,std::string>("label_entity_id"_key == entity_id, "label_comp_id", "label_seq_id", "auth_asym_id"))
{
if (seq_id == last_seq_id)
continue;
last_seq_id = seq_id;
auto c = cf.create(comp_id);
std::string letter;
char letter_can;
// TODO: Perhaps we should improve this...
if (type != "other")
{
std::string c_type;
if (cf.is_known_base(comp_id))
{
c_type = "polydeoxyribonucleotide";
letter = letter_can = compound_factory::kBaseMap.at(comp_id);
}
else if (cf.is_known_peptide(comp_id))
{
c_type = "polypeptide(L)";
letter = letter_can = compound_factory::kAAMap.at(comp_id);
}
else if (iequals(c->type(), "D-PEPTIDE LINKING"))
{
c_type = "polypeptide(D)";
letter_can = c->one_letter_code();
if (letter_can == 0)
letter_can = 'X';
letter = '(' + comp_id + ')';
non_std_linkage = true;
non_std_monomer = true;
}
else if (iequals(c->type(), "L-PEPTIDE LINKING") or iequals(c->type(), "PEPTIDE LINKING"))
{
c_type = "polypeptide(L)";
letter_can = c->one_letter_code();
if (letter_can == 0)
letter_can = 'X';
letter = '(' + comp_id + ')';
non_std_monomer = true;
}
if (type.empty())
type = c_type;
else if (type != c_type)
type = "other";
}
seq += letter;
seq_can += letter_can;
pdb_strand_id = auth_asym_id;
}
for (auto i = seq.begin() + 80; i < seq.end(); i += 80)
i = seq.insert(i, '\n') + 1;
for (auto i = seq_can.begin() + 76; i < seq_can.end(); i += 76)
{
auto j = i;
while (j < i + 4 and j < seq_can.end())
{
if (*j == '(')
break;
++j;
}
if (j < seq_can.end())
i = seq_can.insert(j, '\n') + 1;
else
i = j;
}
entity_poly.emplace({ //
{ "entity_id", entity_id },
{ "type", type },
{ "nstd_linkage", non_std_linkage },
{ "nstd_monomer", non_std_monomer },
{ "pdbx_seq_one_letter_code", seq },
{ "pdbx_seq_one_letter_code_can", seq_can },
{ "pdbx_strand_id", pdb_strand_id }
});
}
}
void createEntityPolySeq(datablock &db)
{
if (db.get("entity_poly") == nullptr)
createEntityPoly(db);
using namespace literals;
auto &atom_site = db["atom_site"];
auto &entity_poly = db["entity_poly"];
auto &entity_poly_seq = db["entity_poly_seq"];
auto &struct_asym = db["struct_asym"];
for (auto entity_id : entity_poly.rows<std::string>("entity_id"))
{
int last_seq_id = -1;
std::string last_comp_id;
std::string asym_id = struct_asym.find_first<std::string>("entity_id"_key == entity_id, "id");
for (const auto &[comp_id, seq_id] : atom_site.find<std::string,int>("label_entity_id"_key == entity_id and "label_asym_id"_key == asym_id, "label_comp_id", "label_seq_id"))
{
bool hetero = false;
if (seq_id == last_seq_id)
{
if (last_comp_id != comp_id)
hetero = true;
else
continue;
}
if (hetero)
{
entity_poly_seq.back().assign({
{ "hetero", true }
});
}
entity_poly_seq.emplace({ //
{ "entity_id", entity_id },
{ "num", seq_id },
{ "mon_id", comp_id },
{ "hetero", hetero }
});
last_seq_id = seq_id;
last_comp_id = comp_id;
}
// you cannot assume this is correct...
entity_poly_seq.sort([](row_handle a, row_handle b)
{
return a.get<int>("num") < b.get<int>("num");
});
}
}
void createPdbxPolySeqScheme(datablock &db)
{
if (db.get("entity_poly_seq") == nullptr)
createEntityPolySeq(db);
using namespace literals;
auto &atom_site = db["atom_site"];
auto &entity_poly = db["entity_poly"];
auto &entity_poly_seq = db["entity_poly_seq"];
auto &struct_asym = db["struct_asym"];
auto &pdbx_poly_seq_scheme = db["pdbx_poly_seq_scheme"];
for (const auto &[entity_id, pdb_strand_id] : entity_poly.rows<std::string, std::string>("entity_id", "pdbx_strand_id"))
{
for (auto asym_id : struct_asym.find<std::string>("entity_id"_key == entity_id, "id"))
{
for (const auto &[comp_id, num, hetero] : entity_poly_seq.find<std::string,int,bool>("entity_id"_key == entity_id, "mon_id", "num", "hetero"))
{
const auto &[auth_seq_num, auth_mon_id, ins_code] =
atom_site.find_first<std::string,std::string,std::optional<std::string>>(
"label_asym_id"_key == asym_id and "label_seq_id"_key == num,
"auth_seq_id", "auth_comp_id", "pdbx_PDB_ins_code"
);
pdbx_poly_seq_scheme.emplace({ //
{ "asym_id", asym_id },
{ "entity_id", entity_id },
{ "seq_id", num },
{ "mon_id", comp_id },
{ "ndb_seq_num", num },
{ "pdb_seq_num", auth_seq_num },
{ "auth_seq_num", auth_seq_num },
{ "pdb_mon_id", auth_mon_id },
{ "auth_mon_id", auth_mon_id },
{ "pdb_strand_id", pdb_strand_id },
{ "pdb_ins_code", ins_code },
{ "hetero", hetero }
});
}
}
}
}
void reconstruct_pdbx(file &file, std::string_view dictionary)
{
if (file.empty())
throw std::runtime_error("Cannot reconstruct PDBx, file seems to be empty");
// assuming the first datablock contains the entry ...
auto &db = file.front();
// ... and any additional datablock will contain compound information
cif::compound_source cs(file);
if (db.get("atom_site") == nullptr)
throw std::runtime_error("Cannot reconstruct PDBx file, atom data missing");
auto &validator = validator_factory::instance()[dictionary];
std::string entry_id;
// Phenix files do not have an entry record
if (db.get("entry") == nullptr)
{
entry_id = db.name();
category entry("entry");
entry.emplace({ { "id", entry_id } });
db.emplace_back(std::move(entry));
}
else
{
auto &entry = db["entry"];
if (entry.size() != 1)
throw std::runtime_error("Unexpected size of entry category");
entry_id = entry.front().get<std::string>("id");
}
for (auto &cat : db)
{
auto cv = validator.get_validator_for_category(cat.name());
if (not cv)
continue;
for (auto link : validator.get_links_for_child(cat.name()))
{
if (link->m_parent_category != "entry")
continue;
// So, this cat should have a link to the entry
auto pk = find(link->m_parent_keys.begin(), link->m_parent_keys.end(), "id");
if (pk == link->m_parent_keys.end())
continue;
auto ix = pk - link->m_parent_keys.begin();
auto key = link->m_child_keys[ix];
for (auto row : cat)
{
row.assign({ { key, entry_id } });
}
}
// See if all categories that need a key do have a value
if (cv->m_keys.size() == 1)
{
auto key = cv->m_keys.front();
for (auto row : cat)
{
auto ord = row.get<std::string>(key.c_str());
if (ord.empty())
row.assign({ //
{ key, cat.get_unique_id([](int nr)
{ return std::to_string(nr); }) } });
}
}
}
file.load_dictionary(dictionary);
// Now create any missing categories
// First, see if atom records make sense at all
// Will take care of atom_type and chem_comp as well.
checkAtomRecords(db);
// Next make sure we have struct_asym records
if (db.get("struct_asym") == nullptr)
createStructAsym(db);
if (db.get("entity") == nullptr)
createEntity(db);
if (db.get("pdbx_poly_seq_scheme") == nullptr)
createPdbxPolySeqScheme(db);
}
} // namespace cif::pdb

View File

@@ -1,284 +0,0 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2024 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "cif++.hpp"
namespace cif::pdb
{
condition get_parents_condition(const validator &validator, row_handle rh, const category &parentCat)
{
condition result;
auto &childCat = rh.get_category();
auto childName = childCat.name();
auto parentName = parentCat.name();
auto links = validator.get_links_for_child(childName);
links.erase(remove_if(links.begin(), links.end(), [n = parentName](auto &l)
{ return l->m_parent_category != n; }),
links.end());
if (not links.empty())
{
for (auto &link : links)
{
condition cond;
for (size_t ix = 0; ix < link->m_child_keys.size(); ++ix)
{
auto childValue = rh[link->m_child_keys[ix]];
if (childValue.empty())
continue;
cond = std::move(cond) and key(link->m_parent_keys[ix]) == childValue.text();
}
result = std::move(result) or std::move(cond);
}
}
else if (cif::VERBOSE > 0)
std::cerr << "warning: no child to parent links were found for child " << childName << " and parent " << parentName << '\n';
return result;
}
bool is_valid_pdbx_file(const file &file, std::string_view dictionary)
{
using namespace cif::literals;
auto &cf = cif::compound_factory::instance();
auto &validator = cif::validator_factory::instance().operator[](dictionary);
bool result = true;
try
{
if (file.empty())
throw validation_error("Empty file");
auto &db = file.front();
if (db.empty())
throw validation_error("Empty datablock");
auto &atom_site = db["atom_site"];
if (atom_site.empty())
throw validation_error("Empty or missing atom_site category");
auto &pdbx_poly_seq_scheme = db["pdbx_poly_seq_scheme"];
std::string last_asym_id;
int last_seq_id = -1;
for (auto r : atom_site)
{
auto seq_id = r.get<std::optional<int>>("label_seq_id");
if (not seq_id.has_value()) // not a residue in a polymer
continue;
if (*seq_id == last_seq_id)
continue;
last_seq_id = *seq_id;
auto comp_id = r.get<std::string>("label_comp_id");
if (not cf.is_known_peptide(comp_id))
continue;
auto p = pdbx_poly_seq_scheme.find(get_parents_condition(validator, r, pdbx_poly_seq_scheme));
if (p.size() != 1)
throw validation_error("For each residue in atom_site that is a residue in a polymer there should be exactly one pdbx_poly_seq_scheme record");
}
auto &entity = db["entity"];
if (entity.empty())
throw validation_error("Entity category is missing or empty");
auto &entity_poly = db["entity_poly"];
if (entity_poly.empty())
throw validation_error("Entity_poly category is missing or empty");
auto &entity_poly_seq = db["entity_poly_seq"];
if (entity_poly_seq.empty())
throw validation_error("Entity_poly_seq category is missing or empty");
auto &struct_asym = db["struct_asym"];
if (struct_asym.empty())
throw validation_error("struct_asym category is missing or empty");
for (auto entity_id : entity.find<std::string>("type"_key == "polymer", "id"))
{
if (entity_poly.count("entity_id"_key == entity_id) != 1)
throw validation_error("There should be exactly one entity_poly record per polymer entity");
const auto entity_poly_type = entity_poly.find1<std::string>("entity_id"_key == entity_id, "type");
std::map<int,std::set<std::string>> mon_per_seq_id;
for (const auto &[num, mon_id, hetero] : entity_poly_seq.find<int, std::string, bool>("entity_id"_key == entity_id, "num", "mon_id", "hetero"))
{
mon_per_seq_id[num].emplace(mon_id);
for (auto asym_id : struct_asym.find<std::string>("entity_id"_key == entity_id, "id"))
{
if (pdbx_poly_seq_scheme.count(
"asym_id"_key == asym_id and
"mon_id"_key == mon_id and
"seq_id"_key == num and
"hetero"_key == hetero) != 1)
{
throw validation_error("For each entity_poly_seq record there should be exactly one pdbx_poly_seq record");
}
}
}
for (const auto &[seq_id, mon_id, hetero] : pdbx_poly_seq_scheme.find<int, std::string, bool>("entity_id"_key == entity_id, "seq_id", "mon_id", "hetero"))
{
if (entity_poly_seq.count(
"mon_id"_key == mon_id and
"num"_key == seq_id and
"hetero"_key == hetero) != 1)
{
throw validation_error("For each pdbx_poly_seq/struct_asym record there should be exactly one entity_poly_seq record");
}
if ((mon_per_seq_id[seq_id].size() > 1) != hetero)
throw validation_error("Mismatch between the hetero flag in the poly seq schemes and the number residues per seq_id");
}
for (const auto &[seq_id, mon_ids] : mon_per_seq_id)
{
for (auto asym_id : struct_asym.find<std::string>("entity_id"_key == entity_id, "id"))
{
condition cond;
for (auto mon_id : mon_ids)
cond = std::move(cond) or "label_comp_id"_key == mon_id;
cond = "label_entity_id"_key == entity_id and
"label_asym_id"_key == asym_id and
"label_seq_id"_key == seq_id and not std::move(cond);
if (atom_site.exists(std::move(cond)))
throw validation_error("An atom_site record exists that has no parent in the poly seq scheme categories");
}
}
auto &&[seq, seq_can] = entity_poly.find1<std::optional<std::string>, std::optional<std::string>>("entity_id"_key == entity_id,
"pdbx_seq_one_letter_code", "pdbx_seq_one_letter_code_can");
std::string::const_iterator si, sci, se, sce;
auto seq_match = [&](bool can, std::string::const_iterator si, std::string::const_iterator se)
{
for (const auto &[seq_id, comp_ids] : mon_per_seq_id)
{
if (si == se)
return false;
bool match = false;
for (auto comp_id : comp_ids)
{
std::string letter;
if (cf.is_known_base(comp_id))
letter = compound_factory::kBaseMap.at(comp_id);
else if (cf.is_known_peptide(comp_id))
letter = compound_factory::kAAMap.at(comp_id);
else
{
if (can)
{
auto c = cf.create(comp_id);
if (c and c->one_letter_code())
letter = c->one_letter_code();
else
letter = "X";
}
else
letter = '(' + comp_id + ')';
}
if (iequals(std::string{si, si + letter.length()}, letter))
{
match = true;
si += letter.length();
break;
}
else
return false;
}
if (not match)
break;
}
return si == se;
};
if (not seq.has_value())
{
if (cif::VERBOSE > 0)
std::clog << "Warning: entity_poly has no sequence for entity_id " << entity_id << '\n';
}
else
{
seq->erase(std::remove_if(seq->begin(), seq->end(), [](char ch) { return std::isspace(ch); }), seq->end());
if (not seq_match(false, seq->begin(), seq->end()))
throw validation_error("Sequences do not match for entity " + entity_id);
}
if (not seq_can.has_value())
{
if (cif::VERBOSE > 0)
std::clog << "Warning: entity_poly has no sequence for entity_id " << entity_id << '\n';
}
else
{
seq_can->erase(std::remove_if(seq_can->begin(), seq_can->end(), [](char ch) { return std::isspace(ch); }), seq_can->end());
if (not seq_match(true, seq_can->begin(), seq_can->end()))
throw validation_error("Canonical sequences do not match for entity " + entity_id);
}
}
result = true;
}
catch (const std::exception &ex)
{
result = false;
if (cif::VERBOSE > 0)
std::clog << ex.what() << '\n';
}
return result;
}
} // namespace cif::pdb

BIN
test/.1juh.cif.gz.swp Normal file

Binary file not shown.

Binary file not shown.

View File

@@ -37,8 +37,8 @@ TEST_CASE("rename")
{
cif::VERBOSE = 3;
if (std::filesystem::exists(gTestDir / ".." / "rsrc" / "ccd-subset.cif"))
cif::add_file_resource("components.cif", gTestDir / ".." / "rsrc" / "ccd-subset.cif");
if (std::filesystem::exists(gTestDir / ".." / "data" / "ccd-subset.cif"))
cif::add_file_resource("components.cif", gTestDir / ".." / "data" / "ccd-subset.cif");
if (std::filesystem::exists(gTestDir / ".." / "rsrc" / "mmcif_pdbx.dic"))
cif::add_file_resource("mmcif_pdbx.dic", gTestDir / ".." / "rsrc" / "mmcif_pdbx.dic");

View File

@@ -13,11 +13,10 @@ int main(int argc, char *argv[])
// Build a new parser on top of Catch2's
using namespace Catch::clara;
auto cli = session.cli() // Get Catch2's command line parser
| Opt(gTestDir, "data-dir") // bind variable to a new option, with a hint string
["-D"]["--data-dir"] // the option names it will respond to
("The directory containing the data files") // description string for the help output
| Opt(cif::VERBOSE, "verbose")["-v"]["--cif-verbose"]("Flag for cif::VERBOSE");
auto cli = session.cli() // Get Catch2's command line parser
| Opt(gTestDir, "data-dir") // bind variable to a new option, with a hint string
["-D"]["--data-dir"] // the option names it will respond to
("The directory containing the data files"); // description string for the help output
// Now pass the new composite back to Catch2 so it uses that
session.cli(cli);
@@ -31,9 +30,10 @@ int main(int argc, char *argv[])
cif::add_file_resource("mmcif_pdbx.dic", gTestDir / ".." / "rsrc" / "mmcif_pdbx.dic");
// initialize CCD location
cif::add_file_resource("components.cif", gTestDir / ".." / "rsrc" / "ccd-subset.cif");
cif::add_file_resource("components.cif", gTestDir / ".." / "data" / "ccd-subset.cif");
cif::compound_factory::instance().push_dictionary(gTestDir / "HEM.cif");
return session.run();
}

View File

@@ -3468,22 +3468,3 @@ TEST_CASE("compound_not_found_test_1")
auto cmp = cif::compound_factory::instance().create("&&&");
REQUIRE(cmp == nullptr);
}
// --------------------------------------------------------------------
// PDB2CIF tests
TEST_CASE("pdb2cif_formula_weight")
{
cif::compound_factory::instance().push_dictionary(gTestDir / "REA.cif");
cif::file a = cif::pdb::read(gTestDir / "pdb1cbs.ent.gz");
auto fw = a.front()["entity"].find1<float>(cif::key("id") == 1, "formula_weight");
CHECK(std::abs(fw - 15581.802f) < 0.1f);
fw = a.front()["entity"].find1<float>(cif::key("id") == 2, "formula_weight");
CHECK(fw == 300.435f);
fw = a.front()["entity"].find1<float>(cif::key("id") == 3, "formula_weight");
CHECK(fw == 18.015f);
}

View File

@@ -1,286 +0,0 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "test-main.hpp"
#include <catch2/catch.hpp>
#include <cif++.hpp>
#include <stdexcept>
// --------------------------------------------------------------------
cif::file operator""_cf(const char *text, size_t length)
{
struct membuf : public std::streambuf
{
membuf(char *text, size_t length)
{
this->setg(text, text, text + length);
}
} buffer(const_cast<char *>(text), length);
std::istream is(&buffer);
return cif::file(is);
}
// --------------------------------------------------------------------
TEST_CASE("test-1")
{
auto f = R"(data_1CBS
#
_entry.id 1CBS
#
_entity.id 1
_entity.type polymer
#
_entity_poly.entity_id 1
_entity_poly.type 'polypeptide(L)'
_entity_poly.nstd_linkage no
_entity_poly.nstd_monomer no
_entity_poly.pdbx_seq_one_letter_code
;PNFSG
;
_entity_poly.pdbx_seq_one_letter_code_can
;PNFSG
;
_entity_poly.pdbx_strand_id A
_entity_poly.pdbx_target_identifier ?
#
loop_
_entity_poly_seq.entity_id
_entity_poly_seq.num
_entity_poly_seq.mon_id
_entity_poly_seq.hetero
1 1 PRO n
1 2 ASN n
1 3 PHE n
1 4 SER n
1 5 GLY n
#
loop_
_struct_asym.id
_struct_asym.pdbx_blank_PDB_chainid_flag
_struct_asym.pdbx_modified
_struct_asym.entity_id
_struct_asym.details
A N N 1 ?
#
loop_
_atom_type.symbol
C
N
O
S
#
loop_
_atom_site.group_PDB
_atom_site.id
_atom_site.type_symbol
_atom_site.label_atom_id
_atom_site.label_alt_id
_atom_site.label_comp_id
_atom_site.label_asym_id
_atom_site.label_entity_id
_atom_site.label_seq_id
_atom_site.pdbx_PDB_ins_code
_atom_site.Cartn_x
_atom_site.Cartn_y
_atom_site.Cartn_z
_atom_site.occupancy
_atom_site.B_iso_or_equiv
_atom_site.pdbx_formal_charge
_atom_site.auth_seq_id
_atom_site.auth_comp_id
_atom_site.auth_asym_id
_atom_site.auth_atom_id
_atom_site.pdbx_PDB_model_num
ATOM 2 C CA . PRO A 1 1 ? 18.150 13.525 43.680 1.00 28.82 ? 1 PRO A CA 1
ATOM 9 C CA . ASN A 1 2 ? 20.576 16.457 43.578 1.00 20.79 ? 2 ASN A CA 1
ATOM 17 C CA . PHE A 1 3 ? 21.144 17.838 40.087 1.00 12.62 ? 3 PHE A CA 1
ATOM 28 C CA . SER A 1 4 ? 23.170 20.780 41.464 1.00 11.30 ? 4 SER A CA 1
ATOM 34 C CA . GLY A 1 5 ? 26.628 21.486 40.103 1.00 10.86 ? 5 GLY A CA 1
#
loop_
_pdbx_poly_seq_scheme.asym_id
_pdbx_poly_seq_scheme.entity_id
_pdbx_poly_seq_scheme.seq_id
_pdbx_poly_seq_scheme.mon_id
_pdbx_poly_seq_scheme.ndb_seq_num
_pdbx_poly_seq_scheme.pdb_seq_num
_pdbx_poly_seq_scheme.auth_seq_num
_pdbx_poly_seq_scheme.pdb_mon_id
_pdbx_poly_seq_scheme.auth_mon_id
_pdbx_poly_seq_scheme.pdb_strand_id
_pdbx_poly_seq_scheme.pdb_ins_code
_pdbx_poly_seq_scheme.hetero
A 1 1 PRO 1 1 1 PRO PRO A . n
A 1 2 ASN 2 2 2 ASN ASN A . n
A 1 3 PHE 3 3 3 PHE PHE A . n
A 1 4 SER 4 4 4 SER SER A . n
A 1 5 GLY 5 5 5 GLY GLY A . n
#
)"_cf;
SECTION("Plain file")
{
REQUIRE(cif::pdb::is_valid_pdbx_file(f));
}
SECTION("Delete one atom_site")
{
auto &db = f.front();
auto n = db["atom_site"].erase(cif::key("id") == 2);
REQUIRE(n == 1);
REQUIRE(cif::pdb::is_valid_pdbx_file(f));
}
SECTION("Delete a pdbx_poly_seq_scheme record")
{
auto &db = f.front();
auto n = db["pdbx_poly_seq_scheme"].erase(cif::key("seq_id") == 2);
REQUIRE(n == 1);
REQUIRE_FALSE(cif::pdb::is_valid_pdbx_file(f));
}
SECTION("Delete an entity_poly_seq record")
{
auto &db = f.front();
auto n = db["entity_poly_seq"].erase(cif::key("num") == 2);
REQUIRE(n == 1);
REQUIRE_FALSE(cif::pdb::is_valid_pdbx_file(f));
}
SECTION("Delete an entity_poly record")
{
auto &db = f.front();
auto n = db["entity_poly"].erase(cif::key("entity_id") == 1);
REQUIRE(n == 1);
REQUIRE_FALSE(cif::pdb::is_valid_pdbx_file(f));
}
SECTION("Mutate an atom_site record")
{
auto &db = f.front();
auto r = db["atom_site"].find1(cif::key("id") == 9);
r.assign({
{ "label_comp_id", "ALA" },
{ "auth_comp_id", "ALA" }
});
REQUIRE_FALSE(cif::pdb::is_valid_pdbx_file(f));
}
SECTION("Hetero consistency")
{
auto &db = f.front();
db["entity_poly_seq"].emplace({ //
{ "entity_id", 1 },
{ "num", 1 },
{ "mon_id", "ALA" },
{ "hetero", "n" }
});
db["pdbx_poly_seq_scheme"].emplace({ //
{ "asym_id", "A" },
{ "entity_id", "1" },
{ "seq_id", "1" },
{ "mon_id", "ALA" },
{ "ndb_seq_num", "1" },
{ "pdb_seq_num", "1" },
{ "auth_seq_num", "1" },
{ "pdb_mon_id", "ALA" },
{ "auth_mon_id", "ALA" },
{ "pdb_strand_id", "A" },
{ "pdb_ins_code", "." },
{ "hetero", "n" }
});
REQUIRE_FALSE(cif::pdb::is_valid_pdbx_file(f));
}
SECTION("Missing hetero for record in atom_site")
{
auto &db = f.front();
auto r1 = db["atom_site"].front();
cif::row_initializer cr(r1);
cr.set_value("id", "3");
cr.set_value("label_comp_id", "ALA");
db["atom_site"].emplace(std::move(cr));
REQUIRE_FALSE(cif::pdb::is_valid_pdbx_file(f));
}
SECTION("Missing letter in entity_poly.pdbx_seq_one_letter_code")
{
auto &db = f.front();
auto &entity_poly = db["entity_poly"];
entity_poly.front().assign({
{ "pdbx_seq_one_letter_code", "PNSG" }
});
REQUIRE_FALSE(cif::pdb::is_valid_pdbx_file(f));
}
SECTION("Too many letters in entity_poly.pdbx_seq_one_letter_code")
{
auto &db = f.front();
auto &entity_poly = db["entity_poly"];
entity_poly.front().assign({
{ "pdbx_seq_one_letter_code", "PNFSGX" }
});
REQUIRE_FALSE(cif::pdb::is_valid_pdbx_file(f));
}
SECTION("Mismatch in entity_poly.pdbx_seq_one_letter_code")
{
auto &db = f.front();
auto &entity_poly = db["entity_poly"];
entity_poly.front().assign({
{ "pdbx_seq_one_letter_code", "PNASG" }
});
REQUIRE_FALSE(cif::pdb::is_valid_pdbx_file(f));
}
}

View File

@@ -10,7 +10,7 @@ euid=${EUID:-$(id -u)}
if [ "${euid}" -ne 0 ]; then
echo "Please run as root"
exit 1
exit
fi
if [ -f "@CIFPP_ETC_DIR@/libcifpp.conf" ]; then
@@ -19,13 +19,12 @@ fi
# check to see if we're supposed to run at all
if [ "$update" != "true" ]; then
exit 0
exit
fi
# if cache directory doesn't exist, exit.
if ! [ -d "@CIFPP_CACHE_DIR@" ]; then
echo "Cache directory '@CIFPP_CACHE_DIR@' does not exist"
exit 1
exit
fi
# Create a temp file in the right directory and
@@ -61,16 +60,12 @@ update_dictionary() {
# Update the dictionaries
update_dictionary "@CIFPP_CACHE_DIR@/components.cif" "https://files.wwpdb.org/pub/pdb/data/monomers/components.cif.gz"
update_dictionary "@CIFPP_CACHE_DIR@/components.cif" "https://ftp.wwpdb.org/pub/pdb/data/monomers/components.cif.gz"
update_dictionary "@CIFPP_CACHE_DIR@/mmcif_pdbx.dic" "https://mmcif.wwpdb.org/dictionaries/ascii/mmcif_pdbx_v50.dic.gz"
update_dictionary "@CIFPP_CACHE_DIR@/mmcif_ma.dic" "https://github.com/ihmwg/ModelCIF/raw/master/dist/mmcif_ma.dic"
# notify subscribers, using find instead of run-parts to make it work on FreeBSD as well
# notify subscribers, will fail on FreeBSD
if [ -d "@CIFPP_ETC_DIR@/libcifpp/cache-update.d" ]; then
find "@CIFPP_ETC_DIR@/libcifpp/cache-update.d" \
-exec test -x {} \; -and -not -exec test -d {} \; \
-exec {} "@CIFPP_CACHE_DIR@" \;
if [ -d "@CIFPP_ETC_DIR@/libcifpp/cache-update.d" ] && [ -x /bin/run-parts ]; then
run-parts --arg "@CIFPP_CACHE_DIR@" -- "@CIFPP_ETC_DIR@/libcifpp/cache-update.d"
fi
exit 0